initial_commit

author: root <root@artemis.panaceas.org> 2015-12-25 04:40:36 +0000
committer: root <root@artemis.panaceas.org> 2015-12-25 04:40:36 +0000
commit: 849369d6c66d3054688672f97d31fceb8e8230fb (patch)
tree: 6135abc790ca67dedbe07c39806591e70eda81ce /arch/ia64
download: linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.gz
linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.bz2
linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.zip
478 files changed, 122195 insertions, 0 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
new file mode 100644
index 00000000..38280ef4
--- /dev/null
+++ b/arch/ia64/Kconfig
@@ -0,0 +1,686 @@
+source "init/Kconfig"
+
+source "kernel/Kconfig.freezer"
+
+menu "Processor type and features"
+
+config IA64
+	bool
+	select PCI if (!IA64_HP_SIM)
+	select ACPI if (!IA64_HP_SIM)
+	select PM if (!IA64_HP_SIM)
+	select ARCH_SUPPORTS_MSI
+	select HAVE_UNSTABLE_SCHED_CLOCK
+	select HAVE_IDE
+	select HAVE_OPROFILE
+	select HAVE_KPROBES
+	select HAVE_KRETPROBES
+	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
+	select HAVE_FUNCTION_TRACER
+	select HAVE_DMA_ATTRS
+	select HAVE_KVM
+	select HAVE_ARCH_TRACEHOOK
+	select HAVE_DMA_API_DEBUG
+	select HAVE_GENERIC_HARDIRQS
+	select GENERIC_IRQ_PROBE
+	select GENERIC_PENDING_IRQ if SMP
+	select IRQ_PER_CPU
+	select GENERIC_IRQ_SHOW
+	default y
+	help
+	  The Itanium Processor Family is Intel's 64-bit successor to
+	  the 32-bit X86 line.  The IA-64 Linux project has a home
+	  page at <http://www.linuxia64.org/> and a mailing list at
+	  <linux-ia64@vger.kernel.org>.
+
+config 64BIT
+	bool
+	select ATA_NONSTANDARD if ATA
+	default y
+
+config ZONE_DMA
+	def_bool y
+	depends on !IA64_SGI_SN2
+
+config QUICKLIST
+	bool
+	default y
+
+config MMU
+	bool
+	default y
+
+config ARCH_DMA_ADDR_T_64BIT
+	def_bool y
+
+config NEED_DMA_MAP_STATE
+	def_bool y
+
+config NEED_SG_DMA_LENGTH
+	def_bool y
+
+config SWIOTLB
+       bool
+
+config STACKTRACE_SUPPORT
+	def_bool y
+
+config GENERIC_LOCKBREAK
+	def_bool n
+
+config RWSEM_XCHGADD_ALGORITHM
+	bool
+	default y
+
+config HUGETLB_PAGE_SIZE_VARIABLE
+	bool
+	depends on HUGETLB_PAGE
+	default y
+
+config GENERIC_CALIBRATE_DELAY
+	bool
+	default y
+
+config GENERIC_TIME_VSYSCALL
+	bool
+	default y
+
+config HAVE_SETUP_PER_CPU_AREA
+	def_bool y
+
+config DMI
+	bool
+	default y
+
+config EFI
+	bool
+	default y
+
+config GENERIC_IOMAP
+	bool
+	default y
+
+config SCHED_OMIT_FRAME_POINTER
+	bool
+	default y
+
+config IA64_UNCACHED_ALLOCATOR
+	bool
+	select GENERIC_ALLOCATOR
+
+config ARCH_USES_PG_UNCACHED
+	def_bool y
+	depends on IA64_UNCACHED_ALLOCATOR
+
+config AUDIT_ARCH
+	bool
+	default y
+
+menuconfig PARAVIRT_GUEST
+	bool "Paravirtualized guest support"
+	help
+	  Say Y here to get to see options related to running Linux under
+	  various hypervisors.  This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and disabled.
+
+if PARAVIRT_GUEST
+
+config PARAVIRT
+	bool "Enable paravirtualization code"
+	depends on PARAVIRT_GUEST
+	default y
+	bool
+	default y
+	help
+	  This changes the kernel so it can modify itself when it is run
+	  under a hypervisor, potentially improving performance significantly
+	  over full virtualization.  However, when run without a hypervisor
+	  the kernel is theoretically slower and slightly larger.
+
+
+source "arch/ia64/xen/Kconfig"
+
+endif
+
+choice
+	prompt "System type"
+	default IA64_GENERIC
+
+config IA64_GENERIC
+	bool "generic"
+	select NUMA
+	select ACPI_NUMA
+	select SWIOTLB
+	select PCI_MSI
+	select DMAR
+	help
+	  This selects the system type of your hardware.  A "generic" kernel
+	  will run on any supported IA-64 system.  However, if you configure
+	  a kernel for your specific system, it will be faster and smaller.
+
+	  generic		For any supported IA-64 system
+	  DIG-compliant		For DIG ("Developer's Interface Guide") compliant systems
+	  DIG+Intel+IOMMU	For DIG systems with Intel IOMMU
+	  HP-zx1/sx1000		For HP systems
+	  HP-zx1/sx1000+swiotlb	For HP systems with (broken) DMA-constrained devices.
+	  SGI-SN2		For SGI Altix systems
+	  SGI-UV		For SGI UV systems
+	  Ski-simulator		For the HP simulator <http://www.hpl.hp.com/research/linux/ski/>
+	  Xen-domU		For xen domU system
+
+	  If you don't know what to do, choose "generic".
+
+config IA64_DIG
+	bool "DIG-compliant"
+	select SWIOTLB
+
+config IA64_DIG_VTD
+	bool "DIG+Intel+IOMMU"
+	select DMAR
+	select PCI_MSI
+
+config IA64_HP_ZX1
+	bool "HP-zx1/sx1000"
+	help
+	  Build a kernel that runs on HP zx1 and sx1000 systems.  This adds
+	  support for the HP I/O MMU.
+
+config IA64_HP_ZX1_SWIOTLB
+	bool "HP-zx1/sx1000 with software I/O TLB"
+	select SWIOTLB
+	help
+	  Build a kernel that runs on HP zx1 and sx1000 systems even when they
+	  have broken PCI devices which cannot DMA to full 32 bits.  Apart
+	  from support for the HP I/O MMU, this includes support for the software
+	  I/O TLB, which allows supporting the broken devices at the expense of
+	  wasting some kernel memory (about 2MB by default).
+
+config IA64_SGI_SN2
+	bool "SGI-SN2"
+	select NUMA
+	select ACPI_NUMA
+	help
+	  Selecting this option will optimize the kernel for use on sn2 based
+	  systems, but the resulting kernel binary will not run on other
+	  types of ia64 systems.  If you have an SGI Altix system, it's safe
+	  to select this option.  If in doubt, select ia64 generic support
+	  instead.
+
+config IA64_SGI_UV
+	bool "SGI-UV"
+	select NUMA
+	select ACPI_NUMA
+	select SWIOTLB
+	help
+	  Selecting this option will optimize the kernel for use on UV based
+	  systems, but the resulting kernel binary will not run on other
+	  types of ia64 systems.  If you have an SGI UV system, it's safe
+	  to select this option.  If in doubt, select ia64 generic support
+	  instead.
+
+config IA64_HP_SIM
+	bool "Ski-simulator"
+	select SWIOTLB
+
+config IA64_XEN_GUEST
+	bool "Xen guest"
+	select SWIOTLB
+	depends on XEN
+	help
+	  Build a kernel that runs on Xen guest domain. At this moment only
+	  16KB page size in supported.
+
+endchoice
+
+choice
+	prompt "Processor type"
+	default ITANIUM
+
+config ITANIUM
+	bool "Itanium"
+	help
+	  Select your IA-64 processor type.  The default is Itanium.
+	  This choice is safe for all IA-64 systems, but may not perform
+	  optimally on systems with, say, Itanium 2 or newer processors.
+
+config MCKINLEY
+	bool "Itanium 2"
+	help
+	  Select this to configure for an Itanium 2 (McKinley) processor.
+
+endchoice
+
+choice
+	prompt "Kernel page size"
+	default IA64_PAGE_SIZE_16KB
+
+config IA64_PAGE_SIZE_4KB
+	bool "4KB"
+	help
+	  This lets you select the page size of the kernel.  For best IA-64
+	  performance, a page size of 8KB or 16KB is recommended.  For best
+	  IA-32 compatibility, a page size of 4KB should be selected (the vast
+	  majority of IA-32 binaries work perfectly fine with a larger page
+	  size).  For Itanium 2 or newer systems, a page size of 64KB can also
+	  be selected.
+
+	  4KB                For best IA-32 compatibility
+	  8KB                For best IA-64 performance
+	  16KB               For best IA-64 performance
+	  64KB               Requires Itanium 2 or newer processor.
+
+	  If you don't know what to do, choose 16KB.
+
+config IA64_PAGE_SIZE_8KB
+	bool "8KB"
+
+config IA64_PAGE_SIZE_16KB
+	bool "16KB"
+
+config IA64_PAGE_SIZE_64KB
+	depends on !ITANIUM
+	bool "64KB"
+
+endchoice
+
+choice
+	prompt "Page Table Levels"
+	default PGTABLE_3
+
+config PGTABLE_3
+	bool "3 Levels"
+
+config PGTABLE_4
+	depends on !IA64_PAGE_SIZE_64KB
+	bool "4 Levels"
+
+endchoice
+
+if IA64_HP_SIM
+config HZ
+	default 32
+endif
+
+if !IA64_HP_SIM
+source kernel/Kconfig.hz
+endif
+
+config IA64_BRL_EMU
+	bool
+	depends on ITANIUM
+	default y
+
+# align cache-sensitive data to 128 bytes
+config IA64_L1_CACHE_SHIFT
+	int
+	default "7" if MCKINLEY
+	default "6" if ITANIUM
+
+config IA64_CYCLONE
+	bool "Cyclone (EXA) Time Source support"
+	help
+	  Say Y here to enable support for IBM EXA Cyclone time source.
+	  If you're unsure, answer N.
+
+config IOSAPIC
+	bool
+	depends on !IA64_HP_SIM
+	default y
+
+config FORCE_MAX_ZONEORDER
+	int "MAX_ORDER (11 - 17)"  if !HUGETLB_PAGE
+	range 11 17  if !HUGETLB_PAGE
+	default "17" if HUGETLB_PAGE
+	default "11"
+
+config VIRT_CPU_ACCOUNTING
+	bool "Deterministic task and CPU time accounting"
+	default n
+	help
+	  Select this option to enable more accurate task and CPU time
+	  accounting.  This is done by reading a CPU counter on each
+	  kernel entry and exit and on transitions within the kernel
+	  between system, softirq and hardirq state, so there is a
+	  small performance impact.
+	  If in doubt, say N here.
+
+config SMP
+	bool "Symmetric multi-processing support"
+	select USE_GENERIC_SMP_HELPERS
+	help
+	  This enables support for systems with more than one CPU. If you have
+	  a system with only one CPU, say N.  If you have a system with more
+	  than one CPU, say Y.
+
+	  If you say N here, the kernel will run on single and multiprocessor
+	  systems, but will use only one CPU of a multiprocessor system.  If
+	  you say Y here, the kernel will run on many, but not all,
+	  single processor systems.  On a single processor system, the kernel
+	  will run faster if you say N here.
+
+	  See also the SMP-HOWTO available at
+	  <http://www.tldp.org/docs.html#howto>.
+
+	  If you don't know what to do here, say N.
+
+config NR_CPUS
+	int "Maximum number of CPUs (2-4096)"
+	range 2 4096
+	depends on SMP
+	default "4096"
+	help
+	  You should set this to the number of CPUs in your system, but
+	  keep in mind that a kernel compiled for, e.g., 2 CPUs will boot but
+	  only use 2 CPUs on a >2 CPU system.  Setting this to a value larger
+	  than 64 will cause the use of a CPU mask array, causing a small
+	  performance hit.
+
+config HOTPLUG_CPU
+	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
+	depends on SMP && EXPERIMENTAL
+	select HOTPLUG
+	default n
+	---help---
+	  Say Y here to experiment with turning CPUs off and on.  CPUs
+	  can be controlled through /sys/devices/system/cpu/cpu#.
+	  Say N if you want to disable CPU hotplug.
+
+config ARCH_ENABLE_MEMORY_HOTPLUG
+	def_bool y
+
+config ARCH_ENABLE_MEMORY_HOTREMOVE
+	def_bool y
+
+config SCHED_SMT
+	bool "SMT scheduler support"
+	depends on SMP
+	help
+	  Improves the CPU scheduler's decision making when dealing with
+	  Intel IA64 chips with MultiThreading at a cost of slightly increased
+	  overhead in some places. If unsure say N here.
+
+config PERMIT_BSP_REMOVE
+	bool "Support removal of Bootstrap Processor"
+	depends on HOTPLUG_CPU
+	default n
+	---help---
+	Say Y here if your platform SAL will support removal of BSP with HOTPLUG_CPU
+	support. 
+
+config FORCE_CPEI_RETARGET
+	bool "Force assumption that CPEI can be re-targeted"
+	depends on PERMIT_BSP_REMOVE
+	default n
+	---help---
+	Say Y if you need to force the assumption that CPEI can be re-targeted to
+	any cpu in the system. This hint is available via ACPI 3.0 specifications.
+	Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP.
+	This option it useful to enable this feature on older BIOS's as well.
+	You can also enable this by using boot command line option force_cpei=1.
+
+source "kernel/Kconfig.preempt"
+
+source "mm/Kconfig"
+
+config ARCH_SELECT_MEMORY_MODEL
+	def_bool y
+
+config ARCH_DISCONTIGMEM_ENABLE
+	def_bool y
+	help
+	  Say Y to support efficient handling of discontiguous physical memory,
+	  for architectures which are either NUMA (Non-Uniform Memory Access)
+	  or have huge holes in the physical address space for other reasons.
+ 	  See <file:Documentation/vm/numa> for more.
+
+config ARCH_FLATMEM_ENABLE
+	def_bool y
+
+config ARCH_SPARSEMEM_ENABLE
+	def_bool y
+	depends on ARCH_DISCONTIGMEM_ENABLE
+	select SPARSEMEM_VMEMMAP_ENABLE
+
+config ARCH_DISCONTIGMEM_DEFAULT
+	def_bool y if (IA64_SGI_SN2 || IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB)
+	depends on ARCH_DISCONTIGMEM_ENABLE
+
+config NUMA
+	bool "NUMA support"
+	depends on !IA64_HP_SIM && !FLATMEM
+	default y if IA64_SGI_SN2
+	select ACPI_NUMA if ACPI
+	help
+	  Say Y to compile the kernel to support NUMA (Non-Uniform Memory
+	  Access).  This option is for configuring high-end multiprocessor
+	  server systems.  If in doubt, say N.
+
+config NODES_SHIFT
+	int "Max num nodes shift(3-10)"
+	range 3 10
+	default "10"
+	depends on NEED_MULTIPLE_NODES
+	help
+	  This option specifies the maximum number of nodes in your SSI system.
+	  MAX_NUMNODES will be 2^(This value).
+	  If in doubt, use the default.
+
+config ARCH_POPULATES_NODE_MAP
+	def_bool y
+
+# VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent.
+# VIRTUAL_MEM_MAP has been retained for historical reasons.
+config VIRTUAL_MEM_MAP
+	bool "Virtual mem map"
+	depends on !SPARSEMEM
+	default y if !IA64_HP_SIM
+	help
+	  Say Y to compile the kernel with support for a virtual mem map.
+	  This code also only takes effect if a memory hole of greater than
+	  1 Gb is found during boot.  You must turn this option on if you
+	  require the DISCONTIGMEM option for your machine. If you are
+	  unsure, say Y.
+
+config HOLES_IN_ZONE
+	bool
+	default y if VIRTUAL_MEM_MAP
+
+config HAVE_ARCH_EARLY_PFN_TO_NID
+	def_bool NUMA && SPARSEMEM
+
+config HAVE_ARCH_NODEDATA_EXTENSION
+	def_bool y
+	depends on NUMA
+
+config USE_PERCPU_NUMA_NODE_ID
+	def_bool y
+	depends on NUMA
+
+config HAVE_MEMORYLESS_NODES
+	def_bool NUMA
+
+config ARCH_PROC_KCORE_TEXT
+	def_bool y
+	depends on PROC_KCORE
+
+config IA64_MCA_RECOVERY
+	tristate "MCA recovery from errors other than TLB."
+
+config PERFMON
+	bool "Performance monitor support"
+	help
+	  Selects whether support for the IA-64 performance monitor hardware
+	  is included in the kernel.  This makes some kernel data-structures a
+	  little bigger and slows down execution a bit, but it is generally
+	  a good idea to turn this on.  If you're unsure, say Y.
+
+config IA64_PALINFO
+	tristate "/proc/pal support"
+	help
+	  If you say Y here, you are able to get PAL (Processor Abstraction
+	  Layer) information in /proc/pal.  This contains useful information
+	  about the processors in your systems, such as cache and TLB sizes
+	  and the PAL firmware version in use.
+
+	  To use this option, you have to ensure that the "/proc file system
+	  support" (CONFIG_PROC_FS) is enabled, too.
+
+config IA64_MC_ERR_INJECT
+	tristate "MC error injection support"
+	help
+	  Adds support for MC error injection. If enabled, the kernel 
+	  will provide a sysfs interface for user applications to
+	  call MC error injection PAL procedures to inject various errors.
+	  This is a useful tool for MCA testing.
+
+	  If you're unsure, do not select this option.
+
+config SGI_SN
+	def_bool y if (IA64_SGI_SN2 || IA64_GENERIC)
+
+config IA64_ESI
+	bool "ESI (Extensible SAL Interface) support"
+	help
+	  If you say Y here, support is built into the kernel to
+	  make ESI calls.  ESI calls are used to support vendor-specific
+	  firmware extensions, such as the ability to inject memory-errors
+	  for test-purposes.  If you're unsure, say N.
+
+config IA64_HP_AML_NFW
+	bool "Support ACPI AML calls to native firmware"
+	help
+	  This driver installs a global ACPI Operation Region handler for
+	  region 0xA1.  AML methods can use this OpRegion to call arbitrary
+	  native firmware functions.  The driver installs the OpRegion
+	  handler if there is an HPQ5001 device or if the user supplies
+	  the "force" module parameter, e.g., with the "aml_nfw.force"
+	  kernel command line option.
+
+source "drivers/sn/Kconfig"
+
+config KEXEC
+	bool "kexec system call (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
+	help
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is independent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
+	  The name comes from the similarity to the exec system call.
+
+	  It is an ongoing process to be certain the hardware in a machine
+	  is properly shutdown, so do not be surprised if this code does not
+	  initially work for you.  It may help to enable device hotplugging
+	  support.  As of this writing the exact hardware interface is
+	  strongly in flux, so no good recommendation can be made.
+
+config CRASH_DUMP
+	  bool "kernel crash dumps"
+	  depends on IA64_MCA_RECOVERY && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
+	  help
+	    Generate crash dump after being started by kexec.
+
+source "drivers/firmware/Kconfig"
+
+source "fs/Kconfig.binfmt"
+
+endmenu
+
+menu "Power management and ACPI options"
+
+source "kernel/power/Kconfig"
+
+source "drivers/acpi/Kconfig"
+
+if PM
+
+source "arch/ia64/kernel/cpufreq/Kconfig"
+
+endif
+
+endmenu
+
+if !IA64_HP_SIM
+
+menu "Bus options (PCI, PCMCIA)"
+
+config PCI
+	bool "PCI support"
+	help
+	  Real IA-64 machines all have PCI/PCI-X/PCI Express busses.  Say Y
+	  here unless you are using a simulator without PCI support.
+
+config PCI_DOMAINS
+	def_bool PCI
+
+config PCI_SYSCALL
+	def_bool PCI
+
+source "drivers/pci/pcie/Kconfig"
+
+source "drivers/pci/Kconfig"
+
+source "drivers/pci/hotplug/Kconfig"
+
+source "drivers/pcmcia/Kconfig"
+
+config DMAR
+        bool "Support for DMA Remapping Devices (EXPERIMENTAL)"
+        depends on IA64_GENERIC && ACPI && EXPERIMENTAL
+	help
+	  DMA remapping (DMAR) devices support enables independent address
+	  translations for Direct Memory Access (DMA) from devices.
+	  These DMA remapping devices are reported via ACPI tables
+	  and include PCI device scope covered by these DMA
+	  remapping devices.
+
+config DMAR_DEFAULT_ON
+	def_bool y
+	prompt "Enable DMA Remapping Devices by default"
+	depends on DMAR
+	help
+	  Selecting this option will enable a DMAR device at boot time if
+	  one is found. If this option is not selected, DMAR support can
+	  be enabled by passing intel_iommu=on to the kernel. It is
+	  recommended you say N here while the DMAR code remains
+	  experimental.
+
+endmenu
+
+endif
+
+source "net/Kconfig"
+
+source "drivers/Kconfig"
+
+source "arch/ia64/hp/sim/Kconfig"
+
+config MSPEC
+	tristate "Memory special operations driver"
+	depends on IA64
+	select IA64_UNCACHED_ALLOCATOR
+	help
+	  If you have an ia64 and you want to enable memory special
+	  operations support (formerly known as fetchop), say Y here,
+	  otherwise say N.
+
+source "fs/Kconfig"
+
+source "arch/ia64/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
+source "arch/ia64/kvm/Kconfig"
+
+source "lib/Kconfig"
+
+config IOMMU_HELPER
+	def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB)
+
+config IOMMU_API
+	def_bool (DMAR)
diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug
new file mode 100644
index 00000000..de9d507b
--- /dev/null
+++ b/arch/ia64/Kconfig.debug
@@ -0,0 +1,64 @@
+menu "Kernel hacking"
+
+source "lib/Kconfig.debug"
+
+choice
+	prompt "Physical memory granularity"
+	default IA64_GRANULE_64MB
+
+config IA64_GRANULE_16MB
+	bool "16MB"
+	help
+	  IA-64 identity-mapped regions use a large page size called "granules".
+
+	  Select "16MB" for a small granule size.
+	  Select "64MB" for a large granule size.  This is the current default.
+
+config IA64_GRANULE_64MB
+	bool "64MB"
+	depends on !(IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_SGI_SN2)
+
+endchoice
+
+config IA64_PRINT_HAZARDS
+	bool "Print possible IA-64 dependency violations to console"
+	depends on DEBUG_KERNEL
+	help
+	  Selecting this option prints more information for Illegal Dependency
+	  Faults, that is, for Read-after-Write (RAW), Write-after-Write (WAW),
+	  or Write-after-Read (WAR) violations.  This option is ignored if you
+	  are compiling for an Itanium A step processor
+	  (CONFIG_ITANIUM_ASTEP_SPECIFIC).  If you're unsure, select Y.
+
+config DISABLE_VHPT
+	bool "Disable VHPT"
+	depends on DEBUG_KERNEL
+	help
+	  The Virtual Hash Page Table (VHPT) enhances virtual address
+	  translation performance.  Normally you want the VHPT active but you
+	  can select this option to disable the VHPT for debugging.  If you're
+	  unsure, answer N.
+
+config IA64_DEBUG_CMPXCHG
+	bool "Turn on compare-and-exchange bug checking (slow!)"
+	depends on DEBUG_KERNEL
+	help
+	  Selecting this option turns on bug checking for the IA-64
+	  compare-and-exchange instructions.  This is slow!  Itaniums
+	  from step B3 or later don't have this problem. If you're unsure,
+	  select N.
+
+config IA64_DEBUG_IRQ
+	bool "Turn on irq debug checks (slow!)"
+	depends on DEBUG_KERNEL
+	help
+	  Selecting this option turns on bug checking for the IA-64 irq_save
+	  and restore instructions.  It's useful for tracking down spinlock
+	  problems, but slow!  If you're unsure, select N.
+
+config SYSVIPC_COMPAT
+	bool
+	depends on COMPAT && SYSVIPC
+	default y
+
+endmenu
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
new file mode 100644
index 00000000..be7bfa12
--- /dev/null
+++ b/arch/ia64/Makefile
@@ -0,0 +1,105 @@
+#
+# ia64/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1998-2004 by David Mosberger-Tang <davidm@hpl.hp.com>
+#
+
+KBUILD_DEFCONFIG := generic_defconfig
+
+NM := $(CROSS_COMPILE)nm -B
+READELF := $(CROSS_COMPILE)readelf
+
+export AWK
+
+CHECKFLAGS	+= -m64 -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__
+
+OBJCOPYFLAGS	:= --strip-all
+LDFLAGS_vmlinux	:= -static
+KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/ia64/module.lds
+KBUILD_AFLAGS_KERNEL := -mconstant-gp
+EXTRA		:=
+
+cflags-y	:= -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \
+		   -falign-functions=32 -frename-registers -fno-optimize-sibling-calls
+KBUILD_CFLAGS_KERNEL := -mconstant-gp
+
+GAS_STATUS	= $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)")
+KBUILD_CPPFLAGS += $(shell $(srctree)/arch/ia64/scripts/toolchain-flags "$(CC)" "$(OBJDUMP)" "$(READELF)")
+
+ifeq ($(GAS_STATUS),buggy)
+$(error Sorry, you need a newer version of the assember, one that is built from	\
+	a source-tree that post-dates 18-Dec-2002.  You can find a pre-compiled	\
+	static binary of such an assembler at:					\
+										\
+		ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz)
+endif
+
+KBUILD_CFLAGS += $(cflags-y)
+head-y := arch/ia64/kernel/head.o arch/ia64/kernel/init_task.o
+
+libs-y				+= arch/ia64/lib/
+core-y				+= arch/ia64/kernel/ arch/ia64/mm/
+core-$(CONFIG_IA64_DIG) 	+= arch/ia64/dig/
+core-$(CONFIG_IA64_DIG_VTD) 	+= arch/ia64/dig/
+core-$(CONFIG_IA64_GENERIC) 	+= arch/ia64/dig/
+core-$(CONFIG_IA64_HP_ZX1)	+= arch/ia64/dig/
+core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
+core-$(CONFIG_IA64_XEN_GUEST)	+= arch/ia64/dig/
+core-$(CONFIG_IA64_SGI_SN2)	+= arch/ia64/sn/
+core-$(CONFIG_IA64_SGI_UV)	+= arch/ia64/uv/
+core-$(CONFIG_KVM) 		+= arch/ia64/kvm/
+core-$(CONFIG_XEN)		+= arch/ia64/xen/
+
+drivers-$(CONFIG_PCI)		+= arch/ia64/pci/
+drivers-$(CONFIG_IA64_HP_SIM)	+= arch/ia64/hp/sim/
+drivers-$(CONFIG_IA64_HP_ZX1)	+= arch/ia64/hp/common/ arch/ia64/hp/zx1/
+drivers-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/hp/common/ arch/ia64/hp/zx1/
+drivers-$(CONFIG_IA64_GENERIC)	+= arch/ia64/hp/common/ arch/ia64/hp/zx1/ arch/ia64/hp/sim/ arch/ia64/sn/ arch/ia64/uv/
+drivers-$(CONFIG_OPROFILE)	+= arch/ia64/oprofile/
+
+boot := arch/ia64/hp/sim/boot
+
+PHONY += boot compressed check
+
+all: compressed unwcheck
+
+compressed: vmlinux.gz
+
+vmlinuz: vmlinux.gz
+
+vmlinux.gz: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $@
+
+unwcheck: vmlinux
+	-$(Q)READELF=$(READELF) python $(srctree)/arch/ia64/scripts/unwcheck.py $<
+
+archclean:
+	$(Q)$(MAKE) $(clean)=$(boot)
+
+CLEAN_FILES += vmlinux.gz bootloader
+
+boot:	lib/lib.a vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $@
+
+install: vmlinux.gz
+	sh $(srctree)/arch/ia64/install.sh $(KERNELRELEASE) $< System.map "$(INSTALL_PATH)"
+
+define archhelp
+  echo '* compressed	- Build compressed kernel image'
+  echo '  install	- Install compressed kernel image'
+  echo '  boot		- Build vmlinux and bootloader for Ski simulator'
+  echo '* unwcheck	- Check vmlinux for invalid unwind info'
+endef
+
+archprepare: make_nr_irqs_h FORCE
+PHONY += make_nr_irqs_h FORCE
+
+make_nr_irqs_h: FORCE
+	$(Q)$(MAKE) $(build)=arch/ia64/kernel include/generated/nr-irqs.h
diff --git a/arch/ia64/configs/bigsur_defconfig b/arch/ia64/configs/bigsur_defconfig
new file mode 100644
index 00000000..cf5993f0
--- /dev/null
+++ b/arch/ia64/configs/bigsur_defconfig
@@ -0,0 +1,117 @@
+CONFIG_EXPERIMENTAL=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_LOG_BUF_SHIFT=16
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_IA64_DIG=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
+CONFIG_PREEMPT=y
+# CONFIG_VIRTUAL_MEM_MAP is not set
+CONFIG_PERFMON=y
+CONFIG_IA64_PALINFO=y
+CONFIG_EFI_VARS=y
+CONFIG_BINFMT_MISC=m
+CONFIG_ACPI_BUTTON=m
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_PROCESSOR=m
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+# CONFIG_IPV6 is not set
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=m
+CONFIG_IDE=m
+CONFIG_BLK_DEV_IDECD=m
+CONFIG_BLK_DEV_GENERIC=m
+CONFIG_BLK_DEV_PIIX=m
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_QLOGIC_1280=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=m
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID10=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=y
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+CONFIG_NET_PCI=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_EFI_RTC=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_AGP=m
+CONFIG_AGP_I460=m
+CONFIG_DRM=m
+CONFIG_DRM_R128=m
+CONFIG_SOUND=m
+CONFIG_SND=m
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_CS4281=m
+CONFIG_USB_HIDDEV=y
+CONFIG_USB=m
+CONFIG_USB_DEVICEFS=y
+CONFIG_USB_MON=m
+CONFIG_USB_UHCI_HCD=m
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=m
+CONFIG_USB_STORAGE=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT3_FS=y
+CONFIG_XFS_FS=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_AUTOFS_FS=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_UDF_FS=m
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_HUGETLBFS=y
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_V4=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V4=y
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_SGI_PARTITION=y
+CONFIG_EFI_PARTITION=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=m
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig
new file mode 100644
index 00000000..1d7bca0a
--- /dev/null
+++ b/arch/ia64/configs/generic_defconfig
@@ -0,0 +1,236 @@
+CONFIG_EXPERIMENTAL=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=20
+CONFIG_CGROUPS=y
+CONFIG_CPUSETS=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_MCKINLEY=y
+CONFIG_IA64_PAGE_SIZE_64KB=y
+CONFIG_IA64_CYCLONE=y
+CONFIG_SMP=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_IA64_MCA_RECOVERY=y
+CONFIG_PERFMON=y
+CONFIG_IA64_PALINFO=y
+CONFIG_KEXEC=y
+CONFIG_CRASH_DUMP=y
+CONFIG_EFI_VARS=y
+CONFIG_BINFMT_MISC=m
+CONFIG_ACPI_PROCFS=y
+CONFIG_ACPI_BUTTON=m
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_DOCK=y
+CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_CONTAINER=m
+CONFIG_HOTPLUG_PCI=m
+CONFIG_HOTPLUG_PCI_ACPI=m
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_ARPD=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_CONNECTOR=y
+# CONFIG_PNP_DEBUG_MESSAGES is not set
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_SGI_IOC4=y
+CONFIG_SGI_XP=m
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDECD=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_BLK_DEV_CMD64X=y
+CONFIG_BLK_DEV_PIIX=y
+CONFIG_BLK_DEV_SGIIOC4=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=m
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_QLOGIC_1280=y
+CONFIG_ATA=y
+CONFIG_ATA_PIIX=y
+CONFIG_SATA_VITESSE=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=m
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=y
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=y
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=m
+CONFIG_NET_ETHERNET=y
+CONFIG_NET_TULIP=y
+CONFIG_TULIP=m
+CONFIG_NET_PCI=y
+CONFIG_E100=m
+CONFIG_E1000=y
+CONFIG_IGB=y
+CONFIG_TIGON3=y
+CONFIG_NETCONSOLE=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_GAMEPORT=m
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_SGI_SNSC=y
+CONFIG_SGI_TIOCX=y
+CONFIG_SGI_MBCS=m
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=6
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_SGI_L1_CONSOLE=y
+CONFIG_SERIAL_SGI_IOC4=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_EFI_RTC=y
+CONFIG_RAW_DRIVER=m
+CONFIG_HPET=y
+CONFIG_AGP=m
+CONFIG_AGP_I460=m
+CONFIG_AGP_HP_ZX1=m
+CONFIG_AGP_SGI_TIOCA=m
+CONFIG_DRM=m
+CONFIG_DRM_TDFX=m
+CONFIG_DRM_R128=m
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_MGA=m
+CONFIG_DRM_SIS=m
+CONFIG_SOUND=m
+CONFIG_SND=m
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_VERBOSE_PRINTK=y
+CONFIG_SND_DUMMY=m
+CONFIG_SND_VIRMIDI=m
+CONFIG_SND_MTPAV=m
+CONFIG_SND_SERIAL_U16550=m
+CONFIG_SND_MPU401=m
+CONFIG_SND_CS4281=m
+CONFIG_SND_CS46XX=m
+CONFIG_SND_EMU10K1=m
+CONFIG_SND_FM801=m
+CONFIG_HID_GYRATION=m
+CONFIG_HID_PANTHERLORD=m
+CONFIG_HID_PETALYNX=m
+CONFIG_HID_SAMSUNG=m
+CONFIG_HID_SONY=m
+CONFIG_HID_SUNPLUS=m
+CONFIG_USB=m
+CONFIG_USB_DEVICEFS=y
+CONFIG_USB_MON=m
+CONFIG_USB_EHCI_HCD=m
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_UHCI_HCD=m
+CONFIG_USB_STORAGE=m
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_MTHCA=m
+CONFIG_INFINIBAND_IPOIB=m
+CONFIG_MSPEC=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_REISERFS_FS=y
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_XFS_FS=y
+CONFIG_AUTOFS_FS=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_UDF_FS=m
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_HUGETLBFS=y
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_V4=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V4=y
+CONFIG_SMB_FS=m
+CONFIG_SMB_NLS_DEFAULT=y
+CONFIG_CIFS=m
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_SGI_PARTITION=y
+CONFIG_EFI_PARTITION=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_MUTEXES=y
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_MD5=y
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRC_T10DIF=y
+CONFIG_MISC_DEVICES=y
diff --git a/arch/ia64/configs/gensparse_defconfig b/arch/ia64/configs/gensparse_defconfig
new file mode 100644
index 00000000..b11fa880
--- /dev/null
+++ b/arch/ia64/configs/gensparse_defconfig
@@ -0,0 +1,211 @@
+CONFIG_EXPERIMENTAL=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=20
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MCKINLEY=y
+CONFIG_IA64_CYCLONE=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=512
+CONFIG_HOTPLUG_CPU=y
+CONFIG_SPARSEMEM_MANUAL=y
+CONFIG_IA64_MCA_RECOVERY=y
+CONFIG_PERFMON=y
+CONFIG_IA64_PALINFO=y
+CONFIG_SGI_IOC3=y
+CONFIG_EFI_VARS=y
+CONFIG_BINFMT_MISC=m
+CONFIG_ACPI_BUTTON=m
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_CONTAINER=m
+CONFIG_HOTPLUG_PCI=m
+CONFIG_HOTPLUG_PCI_ACPI=m
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_ARPD=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_SGI_IOC4=y
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDECD=y
+CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_BLK_DEV_CMD64X=y
+CONFIG_BLK_DEV_PIIX=y
+CONFIG_BLK_DEV_SGIIOC4=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=m
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_QLOGIC_1280=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=m
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=y
+CONFIG_FUSION_FC=m
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=m
+CONFIG_NET_ETHERNET=y
+CONFIG_NET_TULIP=y
+CONFIG_TULIP=m
+CONFIG_NET_PCI=y
+CONFIG_E100=m
+CONFIG_E1000=y
+CONFIG_TIGON3=y
+CONFIG_NETCONSOLE=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_GAMEPORT=m
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_SGI_SNSC=y
+CONFIG_SGI_TIOCX=y
+CONFIG_SGI_MBCS=m
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=6
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_SGI_L1_CONSOLE=y
+CONFIG_SERIAL_SGI_IOC4=y
+CONFIG_SERIAL_SGI_IOC3=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_EFI_RTC=y
+CONFIG_RAW_DRIVER=m
+CONFIG_HPET=y
+CONFIG_AGP=m
+CONFIG_AGP_I460=m
+CONFIG_AGP_HP_ZX1=m
+CONFIG_AGP_SGI_TIOCA=m
+CONFIG_DRM=m
+CONFIG_DRM_TDFX=m
+CONFIG_DRM_R128=m
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_MGA=m
+CONFIG_DRM_SIS=m
+CONFIG_SOUND=m
+CONFIG_SND=m
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_VERBOSE_PRINTK=y
+CONFIG_SND_DUMMY=m
+CONFIG_SND_VIRMIDI=m
+CONFIG_SND_MTPAV=m
+CONFIG_SND_SERIAL_U16550=m
+CONFIG_SND_MPU401=m
+CONFIG_SND_CS4281=m
+CONFIG_SND_CS46XX=m
+CONFIG_SND_EMU10K1=m
+CONFIG_SND_FM801=m
+CONFIG_USB=m
+CONFIG_USB_DEVICEFS=y
+CONFIG_USB_MON=m
+CONFIG_USB_EHCI_HCD=m
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_UHCI_HCD=m
+CONFIG_USB_STORAGE=m
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_MTHCA=m
+CONFIG_INFINIBAND_IPOIB=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_REISERFS_FS=y
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_XFS_FS=y
+CONFIG_AUTOFS_FS=y
+CONFIG_AUTOFS4_FS=y
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_UDF_FS=m
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_HUGETLBFS=y
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_V4=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V4=y
+CONFIG_SMB_FS=m
+CONFIG_SMB_NLS_DEFAULT=y
+CONFIG_CIFS=m
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_SGI_PARTITION=y
+CONFIG_EFI_PARTITION=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_MISC_DEVICES=y
diff --git a/arch/ia64/configs/sim_defconfig b/arch/ia64/configs/sim_defconfig
new file mode 100644
index 00000000..b4548a3e
--- /dev/null
+++ b/arch/ia64/configs/sim_defconfig
@@ -0,0 +1,56 @@
+CONFIG_EXPERIMENTAL=y
+CONFIG_SYSVIPC=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=16
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_IA64_HP_SIM=y
+CONFIG_MCKINLEY=y
+CONFIG_IA64_PAGE_SIZE_64KB=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=64
+CONFIG_PREEMPT=y
+CONFIG_IA64_PALINFO=m
+CONFIG_EFI_VARS=y
+CONFIG_BINFMT_MISC=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IPV6 is not set
+# CONFIG_STANDALONE is not set
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SPI_ATTRS=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO_I8042 is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_EFI_RTC=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_HP_SIMETH=y
+CONFIG_HP_SIMSERIAL=y
+CONFIG_HP_SIMSERIAL_CONSOLE=y
+CONFIG_HP_SIMSCSI=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_FS_XATTR is not set
+CONFIG_PROC_KCORE=y
+CONFIG_HUGETLBFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFSD=y
+CONFIG_NFSD_V3=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_EFI_PARTITION=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_DEBUG_INFO=y
diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig
new file mode 100644
index 00000000..137a453d
--- /dev/null
+++ b/arch/ia64/configs/tiger_defconfig
@@ -0,0 +1,186 @@
+CONFIG_EXPERIMENTAL=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=20
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_IA64_DIG=y
+CONFIG_MCKINLEY=y
+CONFIG_IA64_PAGE_SIZE_64KB=y
+CONFIG_IA64_CYCLONE=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=16
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PERMIT_BSP_REMOVE=y
+CONFIG_FORCE_CPEI_RETARGET=y
+CONFIG_IA64_MCA_RECOVERY=y
+CONFIG_PERFMON=y
+CONFIG_IA64_PALINFO=y
+CONFIG_KEXEC=y
+CONFIG_EFI_VARS=y
+CONFIG_BINFMT_MISC=m
+CONFIG_ACPI_PROCFS=y
+CONFIG_ACPI_BUTTON=m
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_CONTAINER=m
+CONFIG_HOTPLUG_PCI=m
+CONFIG_HOTPLUG_PCI_ACPI=m
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_ARPD=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDECD=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_BLK_DEV_CMD64X=y
+CONFIG_BLK_DEV_PIIX=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=m
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_QLOGIC_1280=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=m
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=y
+CONFIG_FUSION_FC=y
+CONFIG_FUSION_CTL=y
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=m
+CONFIG_NET_ETHERNET=y
+CONFIG_NET_TULIP=y
+CONFIG_TULIP=m
+CONFIG_NET_PCI=y
+CONFIG_E100=m
+CONFIG_E1000=y
+CONFIG_TIGON3=y
+CONFIG_NETCONSOLE=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_GAMEPORT=m
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=6
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_EFI_RTC=y
+CONFIG_RAW_DRIVER=m
+CONFIG_HPET=y
+CONFIG_AGP=m
+CONFIG_AGP_I460=m
+CONFIG_DRM=m
+CONFIG_DRM_TDFX=m
+CONFIG_DRM_R128=m
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_MGA=m
+CONFIG_DRM_SIS=m
+CONFIG_USB=y
+CONFIG_USB_DEVICEFS=y
+CONFIG_USB_EHCI_HCD=m
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_UHCI_HCD=y
+CONFIG_USB_STORAGE=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_REISERFS_FS=y
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_XFS_FS=y
+CONFIG_AUTOFS_FS=y
+CONFIG_AUTOFS4_FS=y
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_UDF_FS=m
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_HUGETLBFS=y
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_V4=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V4=y
+CONFIG_SMB_FS=m
+CONFIG_SMB_NLS_DEFAULT=y
+CONFIG_CIFS=m
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_SGI_PARTITION=y
+CONFIG_EFI_PARTITION=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_IA64_GRANULE_16MB=y
+CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_MD5=y
diff --git a/arch/ia64/configs/xen_domu_defconfig b/arch/ia64/configs/xen_domu_defconfig
new file mode 100644
index 00000000..2bf76e41
--- /dev/null
+++ b/arch/ia64/configs/xen_domu_defconfig
@@ -0,0 +1,198 @@
+CONFIG_EXPERIMENTAL=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=20
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARAVIRT_GUEST=y
+CONFIG_IA64_XEN_GUEST=y
+CONFIG_MCKINLEY=y
+CONFIG_IA64_CYCLONE=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=16
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PERMIT_BSP_REMOVE=y
+CONFIG_FORCE_CPEI_RETARGET=y
+CONFIG_IA64_MCA_RECOVERY=y
+CONFIG_PERFMON=y
+CONFIG_IA64_PALINFO=y
+CONFIG_KEXEC=y
+CONFIG_EFI_VARS=y
+CONFIG_BINFMT_MISC=m
+CONFIG_ACPI_PROCFS=y
+CONFIG_ACPI_BUTTON=m
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_CONTAINER=m
+CONFIG_HOTPLUG_PCI=m
+CONFIG_HOTPLUG_PCI_ACPI=m
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_ARPD=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_LRO is not set
+# CONFIG_IPV6 is not set
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDECD=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_BLK_DEV_CMD64X=y
+CONFIG_BLK_DEV_PIIX=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=m
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_QLOGIC_1280=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=m
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=y
+CONFIG_FUSION_FC=y
+CONFIG_FUSION_CTL=y
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=m
+CONFIG_NET_ETHERNET=y
+CONFIG_NET_TULIP=y
+CONFIG_TULIP=m
+CONFIG_NET_PCI=y
+CONFIG_E100=m
+CONFIG_E1000=y
+CONFIG_TIGON3=y
+CONFIG_NETCONSOLE=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_GAMEPORT=m
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=6
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_EFI_RTC=y
+CONFIG_RAW_DRIVER=m
+CONFIG_HPET=y
+CONFIG_AGP=m
+CONFIG_DRM=m
+CONFIG_DRM_TDFX=m
+CONFIG_DRM_R128=m
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_MGA=m
+CONFIG_DRM_SIS=m
+CONFIG_HID_GYRATION=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SONY=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_TOPSEED=y
+CONFIG_USB=y
+CONFIG_USB_DEVICEFS=y
+CONFIG_USB_EHCI_HCD=m
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_UHCI_HCD=y
+CONFIG_USB_STORAGE=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_REISERFS_FS=y
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_XFS_FS=y
+CONFIG_AUTOFS_FS=y
+CONFIG_AUTOFS4_FS=y
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_UDF_FS=m
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_HUGETLBFS=y
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_V4=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V4=y
+CONFIG_SMB_FS=m
+CONFIG_SMB_NLS_DEFAULT=y
+CONFIG_CIFS=m
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_SGI_PARTITION=y
+CONFIG_EFI_PARTITION=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_MUTEXES=y
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+CONFIG_IA64_GRANULE_16MB=y
+CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_MD5=y
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/ia64/configs/zx1_defconfig b/arch/ia64/configs/zx1_defconfig
new file mode 100644
index 00000000..1d42827f
--- /dev/null
+++ b/arch/ia64/configs/zx1_defconfig
@@ -0,0 +1,160 @@
+CONFIG_EXPERIMENTAL=y
+CONFIG_SYSVIPC=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+CONFIG_IA64_HP_ZX1=y
+CONFIG_MCKINLEY=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=16
+CONFIG_HOTPLUG_CPU=y
+CONFIG_FLATMEM_MANUAL=y
+CONFIG_IA64_MCA_RECOVERY=y
+CONFIG_PERFMON=y
+CONFIG_IA64_PALINFO=y
+CONFIG_CRASH_DUMP=y
+CONFIG_EFI_VARS=y
+CONFIG_BINFMT_MISC=y
+CONFIG_ACPI_PROCFS=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_ACPI=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDECD=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_BLK_DEV_CMD64X=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_CHR_DEV_OSST=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_QLOGIC_1280=y
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=y
+CONFIG_FUSION_FC=y
+CONFIG_FUSION_CTL=m
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=y
+CONFIG_NET_ETHERNET=y
+CONFIG_NET_TULIP=y
+CONFIG_TULIP=y
+CONFIG_TULIP_MWI=y
+CONFIG_TULIP_MMIO=y
+CONFIG_TULIP_NAPI=y
+CONFIG_TULIP_NAPI_HW_MITIGATION=y
+CONFIG_NET_PCI=y
+CONFIG_E100=y
+CONFIG_E1000=y
+CONFIG_TIGON3=y
+CONFIG_INPUT_JOYDEV=y
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO_I8042 is not set
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=8
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_EFI_RTC=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_AGP=y
+CONFIG_AGP_HP_ZX1=y
+CONFIG_DRM=y
+CONFIG_DRM_RADEON=y
+CONFIG_FB_RADEON=y
+CONFIG_FB_RADEON_DEBUG=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_SEQUENCER=y
+CONFIG_SND_MIXER_OSS=y
+CONFIG_SND_PCM_OSS=y
+CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_FM801=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_UHCI_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT3_FS=y
+CONFIG_AUTOFS_FS=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_UDF_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_HUGETLBFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+CONFIG_NFS_V4=y
+CONFIG_NFSD=y
+CONFIG_NFSD_V3=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_EFI_PARTITION=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_737=y
+CONFIG_NLS_CODEPAGE_775=y
+CONFIG_NLS_CODEPAGE_850=y
+CONFIG_NLS_CODEPAGE_852=y
+CONFIG_NLS_CODEPAGE_855=y
+CONFIG_NLS_CODEPAGE_857=y
+CONFIG_NLS_CODEPAGE_860=y
+CONFIG_NLS_CODEPAGE_861=y
+CONFIG_NLS_CODEPAGE_862=y
+CONFIG_NLS_CODEPAGE_863=y
+CONFIG_NLS_CODEPAGE_864=y
+CONFIG_NLS_CODEPAGE_865=y
+CONFIG_NLS_CODEPAGE_866=y
+CONFIG_NLS_CODEPAGE_869=y
+CONFIG_NLS_CODEPAGE_936=y
+CONFIG_NLS_CODEPAGE_950=y
+CONFIG_NLS_CODEPAGE_932=y
+CONFIG_NLS_CODEPAGE_949=y
+CONFIG_NLS_CODEPAGE_874=y
+CONFIG_NLS_ISO8859_8=y
+CONFIG_NLS_CODEPAGE_1251=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_ISO8859_2=y
+CONFIG_NLS_ISO8859_3=y
+CONFIG_NLS_ISO8859_4=y
+CONFIG_NLS_ISO8859_5=y
+CONFIG_NLS_ISO8859_6=y
+CONFIG_NLS_ISO8859_7=y
+CONFIG_NLS_ISO8859_9=y
+CONFIG_NLS_ISO8859_13=y
+CONFIG_NLS_ISO8859_14=y
+CONFIG_NLS_ISO8859_15=y
+CONFIG_NLS_KOI8_R=y
+CONFIG_NLS_KOI8_U=y
+CONFIG_NLS_UTF8=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_IA64_PRINT_HAZARDS=y
+CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_PCBC=m
diff --git a/arch/ia64/dig/Makefile b/arch/ia64/dig/Makefile
new file mode 100644
index 00000000..2f7caddf
--- /dev/null
+++ b/arch/ia64/dig/Makefile
@@ -0,0 +1,14 @@
+#
+# ia64/platform/dig/Makefile
+#
+# Copyright (C) 1999 Silicon Graphics, Inc.
+# Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com)
+#
+
+obj-y := setup.o
+ifeq ($(CONFIG_DMAR), y)
+obj-$(CONFIG_IA64_GENERIC) += machvec.o machvec_vtd.o
+else
+obj-$(CONFIG_IA64_GENERIC) += machvec.o
+endif
+
diff --git a/arch/ia64/dig/machvec.c b/arch/ia64/dig/machvec.c
new file mode 100644
index 00000000..0c55bdaf
--- /dev/null
+++ b/arch/ia64/dig/machvec.c
@@ -0,0 +1,3 @@
+#define MACHVEC_PLATFORM_NAME		dig
+#define MACHVEC_PLATFORM_HEADER		<asm/machvec_dig.h>
+#include <asm/machvec_init.h>
diff --git a/arch/ia64/dig/machvec_vtd.c b/arch/ia64/dig/machvec_vtd.c
new file mode 100644
index 00000000..7cd3eb47
--- /dev/null
+++ b/arch/ia64/dig/machvec_vtd.c
@@ -0,0 +1,3 @@
+#define MACHVEC_PLATFORM_NAME		dig_vtd
+#define MACHVEC_PLATFORM_HEADER		<asm/machvec_dig_vtd.h>
+#include <asm/machvec_init.h>
diff --git a/arch/ia64/dig/setup.c b/arch/ia64/dig/setup.c
new file mode 100644
index 00000000..9196b330
--- /dev/null
+++ b/arch/ia64/dig/setup.c
@@ -0,0 +1,70 @@
+/*
+ * Platform dependent support for DIG64 platforms.
+ *
+ * Copyright (C) 1999 Intel Corp.
+ * Copyright (C) 1999, 2001 Hewlett-Packard Co
+ * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
+ */
+
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/kdev_t.h>
+#include <linux/string.h>
+#include <linux/screen_info.h>
+#include <linux/console.h>
+#include <linux/timex.h>
+#include <linux/sched.h>
+#include <linux/root_dev.h>
+
+#include <asm/io.h>
+#include <asm/machvec.h>
+#include <asm/system.h>
+
+void __init
+dig_setup (char **cmdline_p)
+{
+	unsigned int orig_x, orig_y, num_cols, num_rows, font_height;
+
+	/*
+	 * Default to /dev/sda2.  This assumes that the EFI partition
+	 * is physical disk 1 partition 1 and the Linux root disk is
+	 * physical disk 1 partition 2.
+	 */
+	ROOT_DEV = Root_SDA2;		/* default to second partition on first drive */
+
+#ifdef CONFIG_SMP
+	init_smp_config();
+#endif
+
+	memset(&screen_info, 0, sizeof(screen_info));
+
+	if (!ia64_boot_param->console_info.num_rows
+	    || !ia64_boot_param->console_info.num_cols)
+	{
+		printk(KERN_WARNING "dig_setup: warning: invalid screen-info, guessing 80x25\n");
+		orig_x = 0;
+		orig_y = 0;
+		num_cols = 80;
+		num_rows = 25;
+		font_height = 16;
+	} else {
+		orig_x = ia64_boot_param->console_info.orig_x;
+		orig_y = ia64_boot_param->console_info.orig_y;
+		num_cols = ia64_boot_param->console_info.num_cols;
+		num_rows = ia64_boot_param->console_info.num_rows;
+		font_height = 400 / num_rows;
+	}
+
+	screen_info.orig_x = orig_x;
+	screen_info.orig_y = orig_y;
+	screen_info.orig_video_cols  = num_cols;
+	screen_info.orig_video_lines = num_rows;
+	screen_info.orig_video_points = font_height;
+	screen_info.orig_video_mode = 3;	/* XXX fake */
+	screen_info.orig_video_isVGA = 1;	/* XXX fake */
+	screen_info.orig_video_ega_bx = 3;	/* XXX fake */
+}
diff --git a/arch/ia64/hp/common/Makefile b/arch/ia64/hp/common/Makefile
new file mode 100644
index 00000000..9e179dd0
--- /dev/null
+++ b/arch/ia64/hp/common/Makefile
@@ -0,0 +1,11 @@
+#
+# ia64/platform/hp/common/Makefile
+#
+# Copyright (C) 2002 Hewlett Packard
+# Copyright (C) Alex Williamson (alex_williamson@hp.com)
+#
+
+obj-y := sba_iommu.o
+obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += hwsw_iommu.o
+obj-$(CONFIG_IA64_GENERIC) += hwsw_iommu.o
+obj-$(CONFIG_IA64_HP_AML_NFW) += aml_nfw.o
diff --git a/arch/ia64/hp/common/aml_nfw.c b/arch/ia64/hp/common/aml_nfw.c
new file mode 100644
index 00000000..22078486
--- /dev/null
+++ b/arch/ia64/hp/common/aml_nfw.c
@@ -0,0 +1,236 @@
+/*
+ * OpRegion handler to allow AML to call native firmware
+ *
+ * (c) Copyright 2007 Hewlett-Packard Development Company, L.P.
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This driver implements HP Open Source Review Board proposal 1842,
+ * which was approved on 9/20/2006.
+ *
+ * For technical documentation, see the HP SPPA Firmware EAS, Appendix F.
+ *
+ * ACPI does not define a mechanism for AML methods to call native firmware
+ * interfaces such as PAL or SAL.  This OpRegion handler adds such a mechanism.
+ * After the handler is installed, an AML method can call native firmware by
+ * storing the arguments and firmware entry point to specific offsets in the
+ * OpRegion.  When AML reads the "return value" offset from the OpRegion, this
+ * handler loads up the arguments, makes the firmware call, and returns the
+ * result.
+ */
+
+#include <linux/module.h>
+#include <acpi/acpi_bus.h>
+#include <acpi/acpi_drivers.h>
+#include <asm/sal.h>
+
+MODULE_AUTHOR("Bjorn Helgaas <bjorn.helgaas@hp.com>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ACPI opregion handler for native firmware calls");
+
+static int force_register;
+module_param_named(force, force_register, bool, 0);
+MODULE_PARM_DESC(force, "Install opregion handler even without HPQ5001 device");
+
+#define AML_NFW_SPACE		0xA1
+
+struct ia64_pdesc {
+	void *ip;
+	void *gp;
+};
+
+/*
+ * N.B.  The layout of this structure is defined in the HP SPPA FW EAS, and
+ *	 the member offsets are embedded in AML methods.
+ */
+struct ia64_nfw_context {
+	u64 arg[8];
+	struct ia64_sal_retval ret;
+	u64 ip;
+	u64 gp;
+	u64 pad[2];
+};
+
+static void *virt_map(u64 address)
+{
+	if (address & (1UL << 63))
+		return (void *) (__IA64_UNCACHED_OFFSET | address);
+
+	return __va(address);
+}
+
+static void aml_nfw_execute(struct ia64_nfw_context *c)
+{
+	struct ia64_pdesc virt_entry;
+	ia64_sal_handler entry;
+
+	virt_entry.ip = virt_map(c->ip);
+	virt_entry.gp = virt_map(c->gp);
+
+	entry = (ia64_sal_handler) &virt_entry;
+
+	IA64_FW_CALL(entry, c->ret,
+		     c->arg[0], c->arg[1], c->arg[2], c->arg[3],
+		     c->arg[4], c->arg[5], c->arg[6], c->arg[7]);
+}
+
+static void aml_nfw_read_arg(u8 *offset, u32 bit_width, u64 *value)
+{
+	switch (bit_width) {
+	case 8:
+		*value = *(u8 *)offset;
+		break;
+	case 16:
+		*value = *(u16 *)offset;
+		break;
+	case 32:
+		*value = *(u32 *)offset;
+		break;
+	case 64:
+		*value = *(u64 *)offset;
+		break;
+	}
+}
+
+static void aml_nfw_write_arg(u8 *offset, u32 bit_width, u64 *value)
+{
+	switch (bit_width) {
+	case 8:
+		*(u8 *) offset = *value;
+		break;
+	case 16:
+		*(u16 *) offset = *value;
+		break;
+	case 32:
+		*(u32 *) offset = *value;
+		break;
+	case 64:
+		*(u64 *) offset = *value;
+		break;
+	}
+}
+
+static acpi_status aml_nfw_handler(u32 function, acpi_physical_address address,
+	u32 bit_width, u64 *value, void *handler_context,
+	void *region_context)
+{
+	struct ia64_nfw_context *context = handler_context;
+	u8 *offset = (u8 *) context + address;
+
+	if (bit_width !=  8 && bit_width != 16 &&
+	    bit_width != 32 && bit_width != 64)
+		return AE_BAD_PARAMETER;
+
+	if (address + (bit_width >> 3) > sizeof(struct ia64_nfw_context))
+		return AE_BAD_PARAMETER;
+
+	switch (function) {
+	case ACPI_READ:
+		if (address == offsetof(struct ia64_nfw_context, ret))
+			aml_nfw_execute(context);
+		aml_nfw_read_arg(offset, bit_width, value);
+		break;
+	case ACPI_WRITE:
+		aml_nfw_write_arg(offset, bit_width, value);
+		break;
+	}
+
+	return AE_OK;
+}
+
+static struct ia64_nfw_context global_context;
+static int global_handler_registered;
+
+static int aml_nfw_add_global_handler(void)
+{
+	acpi_status status;
+
+	if (global_handler_registered)
+		return 0;
+
+	status = acpi_install_address_space_handler(ACPI_ROOT_OBJECT,
+		AML_NFW_SPACE, aml_nfw_handler, NULL, &global_context);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	global_handler_registered = 1;
+	printk(KERN_INFO "Global 0x%02X opregion handler registered\n",
+		AML_NFW_SPACE);
+	return 0;
+}
+
+static int aml_nfw_remove_global_handler(void)
+{
+	acpi_status status;
+
+	if (!global_handler_registered)
+		return 0;
+
+	status = acpi_remove_address_space_handler(ACPI_ROOT_OBJECT,
+		AML_NFW_SPACE, aml_nfw_handler);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	global_handler_registered = 0;
+	printk(KERN_INFO "Global 0x%02X opregion handler removed\n",
+		AML_NFW_SPACE);
+	return 0;
+}
+
+static int aml_nfw_add(struct acpi_device *device)
+{
+	/*
+	 * We would normally allocate a new context structure and install
+	 * the address space handler for the specific device we found.
+	 * But the HP-UX implementation shares a single global context
+	 * and always puts the handler at the root, so we'll do the same.
+	 */
+	return aml_nfw_add_global_handler();
+}
+
+static int aml_nfw_remove(struct acpi_device *device, int type)
+{
+	return aml_nfw_remove_global_handler();
+}
+
+static const struct acpi_device_id aml_nfw_ids[] = {
+	{"HPQ5001", 0},
+	{"", 0}
+};
+
+static struct acpi_driver acpi_aml_nfw_driver = {
+	.name = "native firmware",
+	.ids = aml_nfw_ids,
+	.ops = {
+		.add = aml_nfw_add,
+		.remove = aml_nfw_remove,
+		},
+};
+
+static int __init aml_nfw_init(void)
+{
+	int result;
+
+	if (force_register)
+		aml_nfw_add_global_handler();
+
+	result = acpi_bus_register_driver(&acpi_aml_nfw_driver);
+	if (result < 0) {
+		aml_nfw_remove_global_handler();
+		return result;
+	}
+
+	return 0;
+}
+
+static void __exit aml_nfw_exit(void)
+{
+	acpi_bus_unregister_driver(&acpi_aml_nfw_driver);
+	aml_nfw_remove_global_handler();
+}
+
+module_init(aml_nfw_init);
+module_exit(aml_nfw_exit);
diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c
new file mode 100644
index 00000000..e4a80d82
--- /dev/null
+++ b/arch/ia64/hp/common/hwsw_iommu.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2004 Hewlett-Packard Development Company, L.P.
+ *   Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * This is a pseudo I/O MMU which dispatches to the hardware I/O MMU
+ * whenever possible.  We assume that the hardware I/O MMU requires
+ * full 32-bit addressability, as is the case, e.g., for HP zx1-based
+ * systems (there, the I/O MMU window is mapped at 3-4GB).  If a
+ * device doesn't provide full 32-bit addressability, we fall back on
+ * the sw I/O TLB.  This is good enough to let us support broken
+ * hardware such as soundcards which have a DMA engine that can
+ * address only 28 bits.
+ */
+
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/swiotlb.h>
+#include <asm/machvec.h>
+
+extern struct dma_map_ops sba_dma_ops, swiotlb_dma_ops;
+
+/* swiotlb declarations & definitions: */
+extern int swiotlb_late_init_with_default_size (size_t size);
+
+/*
+ * Note: we need to make the determination of whether or not to use
+ * the sw I/O TLB based purely on the device structure.  Anything else
+ * would be unreliable or would be too intrusive.
+ */
+static inline int use_swiotlb(struct device *dev)
+{
+	return dev && dev->dma_mask &&
+		!sba_dma_ops.dma_supported(dev, *dev->dma_mask);
+}
+
+struct dma_map_ops *hwsw_dma_get_ops(struct device *dev)
+{
+	if (use_swiotlb(dev))
+		return &swiotlb_dma_ops;
+	return &sba_dma_ops;
+}
+EXPORT_SYMBOL(hwsw_dma_get_ops);
+
+void __init
+hwsw_init (void)
+{
+	/* default to a smallish 2MB sw I/O TLB */
+	if (swiotlb_late_init_with_default_size (2 * (1<<20)) != 0) {
+#ifdef CONFIG_IA64_GENERIC
+		/* Better to have normal DMA than panic */
+		printk(KERN_WARNING "%s: Failed to initialize software I/O TLB,"
+		       " reverting to hpzx1 platform vector\n", __func__);
+		machvec_init("hpzx1");
+#else
+		panic("Unable to initialize software I/O TLB services");
+#endif
+	}
+}
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
new file mode 100644
index 00000000..80241fe0
--- /dev/null
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -0,0 +1,2233 @@
+/*
+**  IA64 System Bus Adapter (SBA) I/O MMU manager
+**
+**	(c) Copyright 2002-2005 Alex Williamson
+**	(c) Copyright 2002-2003 Grant Grundler
+**	(c) Copyright 2002-2005 Hewlett-Packard Company
+**
+**	Portions (c) 2000 Grant Grundler (from parisc I/O MMU code)
+**	Portions (c) 1999 Dave S. Miller (from sparc64 I/O MMU code)
+**
+**	This program is free software; you can redistribute it and/or modify
+**	it under the terms of the GNU General Public License as published by
+**      the Free Software Foundation; either version 2 of the License, or
+**      (at your option) any later version.
+**
+**
+** This module initializes the IOC (I/O Controller) found on HP
+** McKinley machines and their successors.
+**
+*/
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+#include <linux/nodemask.h>
+#include <linux/bitops.h>         /* hweight64() */
+#include <linux/crash_dump.h>
+#include <linux/iommu-helper.h>
+#include <linux/dma-mapping.h>
+#include <linux/prefetch.h>
+
+#include <asm/delay.h>		/* ia64_get_itc() */
+#include <asm/io.h>
+#include <asm/page.h>		/* PAGE_OFFSET */
+#include <asm/dma.h>
+#include <asm/system.h>		/* wmb() */
+
+#include <asm/acpi-ext.h>
+
+extern int swiotlb_late_init_with_default_size (size_t size);
+
+#define PFX "IOC: "
+
+/*
+** Enabling timing search of the pdir resource map.  Output in /proc.
+** Disabled by default to optimize performance.
+*/
+#undef PDIR_SEARCH_TIMING
+
+/*
+** This option allows cards capable of 64bit DMA to bypass the IOMMU.  If
+** not defined, all DMA will be 32bit and go through the TLB.
+** There's potentially a conflict in the bio merge code with us
+** advertising an iommu, but then bypassing it.  Since I/O MMU bypassing
+** appears to give more performance than bio-level virtual merging, we'll
+** do the former for now.  NOTE: BYPASS_SG also needs to be undef'd to
+** completely restrict DMA to the IOMMU.
+*/
+#define ALLOW_IOV_BYPASS
+
+/*
+** This option specifically allows/disallows bypassing scatterlists with
+** multiple entries.  Coalescing these entries can allow better DMA streaming
+** and in some cases shows better performance than entirely bypassing the
+** IOMMU.  Performance increase on the order of 1-2% sequential output/input
+** using bonnie++ on a RAID0 MD device (sym2 & mpt).
+*/
+#undef ALLOW_IOV_BYPASS_SG
+
+/*
+** If a device prefetches beyond the end of a valid pdir entry, it will cause
+** a hard failure, ie. MCA.  Version 3.0 and later of the zx1 LBA should
+** disconnect on 4k boundaries and prevent such issues.  If the device is
+** particularly aggressive, this option will keep the entire pdir valid such
+** that prefetching will hit a valid address.  This could severely impact
+** error containment, and is therefore off by default.  The page that is
+** used for spill-over is poisoned, so that should help debugging somewhat.
+*/
+#undef FULL_VALID_PDIR
+
+#define ENABLE_MARK_CLEAN
+
+/*
+** The number of debug flags is a clue - this code is fragile.  NOTE: since
+** tightening the use of res_lock the resource bitmap and actual pdir are no
+** longer guaranteed to stay in sync.  The sanity checking code isn't going to
+** like that.
+*/
+#undef DEBUG_SBA_INIT
+#undef DEBUG_SBA_RUN
+#undef DEBUG_SBA_RUN_SG
+#undef DEBUG_SBA_RESOURCE
+#undef ASSERT_PDIR_SANITY
+#undef DEBUG_LARGE_SG_ENTRIES
+#undef DEBUG_BYPASS
+
+#if defined(FULL_VALID_PDIR) && defined(ASSERT_PDIR_SANITY)
+#error FULL_VALID_PDIR and ASSERT_PDIR_SANITY are mutually exclusive
+#endif
+
+#define SBA_INLINE	__inline__
+/* #define SBA_INLINE */
+
+#ifdef DEBUG_SBA_INIT
+#define DBG_INIT(x...)	printk(x)
+#else
+#define DBG_INIT(x...)
+#endif
+
+#ifdef DEBUG_SBA_RUN
+#define DBG_RUN(x...)	printk(x)
+#else
+#define DBG_RUN(x...)
+#endif
+
+#ifdef DEBUG_SBA_RUN_SG
+#define DBG_RUN_SG(x...)	printk(x)
+#else
+#define DBG_RUN_SG(x...)
+#endif
+
+
+#ifdef DEBUG_SBA_RESOURCE
+#define DBG_RES(x...)	printk(x)
+#else
+#define DBG_RES(x...)
+#endif
+
+#ifdef DEBUG_BYPASS
+#define DBG_BYPASS(x...)	printk(x)
+#else
+#define DBG_BYPASS(x...)
+#endif
+
+#ifdef ASSERT_PDIR_SANITY
+#define ASSERT(expr) \
+        if(!(expr)) { \
+                printk( "\n" __FILE__ ":%d: Assertion " #expr " failed!\n",__LINE__); \
+                panic(#expr); \
+        }
+#else
+#define ASSERT(expr)
+#endif
+
+/*
+** The number of pdir entries to "free" before issuing
+** a read to PCOM register to flush out PCOM writes.
+** Interacts with allocation granularity (ie 4 or 8 entries
+** allocated and free'd/purged at a time might make this
+** less interesting).
+*/
+#define DELAYED_RESOURCE_CNT	64
+
+#define PCI_DEVICE_ID_HP_SX2000_IOC	0x12ec
+
+#define ZX1_IOC_ID	((PCI_DEVICE_ID_HP_ZX1_IOC << 16) | PCI_VENDOR_ID_HP)
+#define ZX2_IOC_ID	((PCI_DEVICE_ID_HP_ZX2_IOC << 16) | PCI_VENDOR_ID_HP)
+#define REO_IOC_ID	((PCI_DEVICE_ID_HP_REO_IOC << 16) | PCI_VENDOR_ID_HP)
+#define SX1000_IOC_ID	((PCI_DEVICE_ID_HP_SX1000_IOC << 16) | PCI_VENDOR_ID_HP)
+#define SX2000_IOC_ID	((PCI_DEVICE_ID_HP_SX2000_IOC << 16) | PCI_VENDOR_ID_HP)
+
+#define ZX1_IOC_OFFSET	0x1000	/* ACPI reports SBA, we want IOC */
+
+#define IOC_FUNC_ID	0x000
+#define IOC_FCLASS	0x008	/* function class, bist, header, rev... */
+#define IOC_IBASE	0x300	/* IO TLB */
+#define IOC_IMASK	0x308
+#define IOC_PCOM	0x310
+#define IOC_TCNFG	0x318
+#define IOC_PDIR_BASE	0x320
+
+#define IOC_ROPE0_CFG	0x500
+#define   IOC_ROPE_AO	  0x10	/* Allow "Relaxed Ordering" */
+
+
+/* AGP GART driver looks for this */
+#define ZX1_SBA_IOMMU_COOKIE	0x0000badbadc0ffeeUL
+
+/*
+** The zx1 IOC supports 4/8/16/64KB page sizes (see TCNFG register)
+**
+** Some IOCs (sx1000) can run at the above pages sizes, but are
+** really only supported using the IOC at a 4k page size.
+**
+** iovp_size could only be greater than PAGE_SIZE if we are
+** confident the drivers really only touch the next physical
+** page iff that driver instance owns it.
+*/
+static unsigned long iovp_size;
+static unsigned long iovp_shift;
+static unsigned long iovp_mask;
+
+struct ioc {
+	void __iomem	*ioc_hpa;	/* I/O MMU base address */
+	char		*res_map;	/* resource map, bit == pdir entry */
+	u64		*pdir_base;	/* physical base address */
+	unsigned long	ibase;		/* pdir IOV Space base */
+	unsigned long	imask;		/* pdir IOV Space mask */
+
+	unsigned long	*res_hint;	/* next avail IOVP - circular search */
+	unsigned long	dma_mask;
+	spinlock_t	res_lock;	/* protects the resource bitmap, but must be held when */
+					/* clearing pdir to prevent races with allocations. */
+	unsigned int	res_bitshift;	/* from the RIGHT! */
+	unsigned int	res_size;	/* size of resource map in bytes */
+#ifdef CONFIG_NUMA
+	unsigned int	node;		/* node where this IOC lives */
+#endif
+#if DELAYED_RESOURCE_CNT > 0
+	spinlock_t	saved_lock;	/* may want to try to get this on a separate cacheline */
+					/* than res_lock for bigger systems. */
+	int		saved_cnt;
+	struct sba_dma_pair {
+		dma_addr_t	iova;
+		size_t		size;
+	} saved[DELAYED_RESOURCE_CNT];
+#endif
+
+#ifdef PDIR_SEARCH_TIMING
+#define SBA_SEARCH_SAMPLE	0x100
+	unsigned long avg_search[SBA_SEARCH_SAMPLE];
+	unsigned long avg_idx;	/* current index into avg_search */
+#endif
+
+	/* Stuff we don't need in performance path */
+	struct ioc	*next;		/* list of IOC's in system */
+	acpi_handle	handle;		/* for multiple IOC's */
+	const char 	*name;
+	unsigned int	func_id;
+	unsigned int	rev;		/* HW revision of chip */
+	u32		iov_size;
+	unsigned int	pdir_size;	/* in bytes, determined by IOV Space size */
+	struct pci_dev	*sac_only_dev;
+};
+
+static struct ioc *ioc_list;
+static int reserve_sba_gart = 1;
+
+static SBA_INLINE void sba_mark_invalid(struct ioc *, dma_addr_t, size_t);
+static SBA_INLINE void sba_free_range(struct ioc *, dma_addr_t, size_t);
+
+#define sba_sg_address(sg)	sg_virt((sg))
+
+#ifdef FULL_VALID_PDIR
+static u64 prefetch_spill_page;
+#endif
+
+#ifdef CONFIG_PCI
+# define GET_IOC(dev)	(((dev)->bus == &pci_bus_type)						\
+			 ? ((struct ioc *) PCI_CONTROLLER(to_pci_dev(dev))->iommu) : NULL)
+#else
+# define GET_IOC(dev)	NULL
+#endif
+
+/*
+** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up
+** (or rather not merge) DMAs into manageable chunks.
+** On parisc, this is more of the software/tuning constraint
+** rather than the HW. I/O MMU allocation algorithms can be
+** faster with smaller sizes (to some degree).
+*/
+#define DMA_CHUNK_SIZE  (BITS_PER_LONG*iovp_size)
+
+#define ROUNDUP(x,y) ((x + ((y)-1)) & ~((y)-1))
+
+/************************************
+** SBA register read and write support
+**
+** BE WARNED: register writes are posted.
+**  (ie follow writes which must reach HW with a read)
+**
+*/
+#define READ_REG(addr)       __raw_readq(addr)
+#define WRITE_REG(val, addr) __raw_writeq(val, addr)
+
+#ifdef DEBUG_SBA_INIT
+
+/**
+ * sba_dump_tlb - debugging only - print IOMMU operating parameters
+ * @hpa: base address of the IOMMU
+ *
+ * Print the size/location of the IO MMU PDIR.
+ */
+static void
+sba_dump_tlb(char *hpa)
+{
+	DBG_INIT("IO TLB at 0x%p\n", (void *)hpa);
+	DBG_INIT("IOC_IBASE    : %016lx\n", READ_REG(hpa+IOC_IBASE));
+	DBG_INIT("IOC_IMASK    : %016lx\n", READ_REG(hpa+IOC_IMASK));
+	DBG_INIT("IOC_TCNFG    : %016lx\n", READ_REG(hpa+IOC_TCNFG));
+	DBG_INIT("IOC_PDIR_BASE: %016lx\n", READ_REG(hpa+IOC_PDIR_BASE));
+	DBG_INIT("\n");
+}
+#endif
+
+
+#ifdef ASSERT_PDIR_SANITY
+
+/**
+ * sba_dump_pdir_entry - debugging only - print one IOMMU PDIR entry
+ * @ioc: IO MMU structure which owns the pdir we are interested in.
+ * @msg: text to print ont the output line.
+ * @pide: pdir index.
+ *
+ * Print one entry of the IO MMU PDIR in human readable form.
+ */
+static void
+sba_dump_pdir_entry(struct ioc *ioc, char *msg, uint pide)
+{
+	/* start printing from lowest pde in rval */
+	u64 *ptr = &ioc->pdir_base[pide  & ~(BITS_PER_LONG - 1)];
+	unsigned long *rptr = (unsigned long *) &ioc->res_map[(pide >>3) & -sizeof(unsigned long)];
+	uint rcnt;
+
+	printk(KERN_DEBUG "SBA: %s rp %p bit %d rval 0x%lx\n",
+		 msg, rptr, pide & (BITS_PER_LONG - 1), *rptr);
+
+	rcnt = 0;
+	while (rcnt < BITS_PER_LONG) {
+		printk(KERN_DEBUG "%s %2d %p %016Lx\n",
+		       (rcnt == (pide & (BITS_PER_LONG - 1)))
+		       ? "    -->" : "       ",
+		       rcnt, ptr, (unsigned long long) *ptr );
+		rcnt++;
+		ptr++;
+	}
+	printk(KERN_DEBUG "%s", msg);
+}
+
+
+/**
+ * sba_check_pdir - debugging only - consistency checker
+ * @ioc: IO MMU structure which owns the pdir we are interested in.
+ * @msg: text to print ont the output line.
+ *
+ * Verify the resource map and pdir state is consistent
+ */
+static int
+sba_check_pdir(struct ioc *ioc, char *msg)
+{
+	u64 *rptr_end = (u64 *) &(ioc->res_map[ioc->res_size]);
+	u64 *rptr = (u64 *) ioc->res_map;	/* resource map ptr */
+	u64 *pptr = ioc->pdir_base;	/* pdir ptr */
+	uint pide = 0;
+
+	while (rptr < rptr_end) {
+		u64 rval;
+		int rcnt; /* number of bits we might check */
+
+		rval = *rptr;
+		rcnt = 64;
+
+		while (rcnt) {
+			/* Get last byte and highest bit from that */
+			u32 pde = ((u32)((*pptr >> (63)) & 0x1));
+			if ((rval & 0x1) ^ pde)
+			{
+				/*
+				** BUMMER!  -- res_map != pdir --
+				** Dump rval and matching pdir entries
+				*/
+				sba_dump_pdir_entry(ioc, msg, pide);
+				return(1);
+			}
+			rcnt--;
+			rval >>= 1;	/* try the next bit */
+			pptr++;
+			pide++;
+		}
+		rptr++;	/* look at next word of res_map */
+	}
+	/* It'd be nice if we always got here :^) */
+	return 0;
+}
+
+
+/**
+ * sba_dump_sg - debugging only - print Scatter-Gather list
+ * @ioc: IO MMU structure which owns the pdir we are interested in.
+ * @startsg: head of the SG list
+ * @nents: number of entries in SG list
+ *
+ * print the SG list so we can verify it's correct by hand.
+ */
+static void
+sba_dump_sg( struct ioc *ioc, struct scatterlist *startsg, int nents)
+{
+	while (nents-- > 0) {
+		printk(KERN_DEBUG " %d : DMA %08lx/%05x CPU %p\n", nents,
+		       startsg->dma_address, startsg->dma_length,
+		       sba_sg_address(startsg));
+		startsg = sg_next(startsg);
+	}
+}
+
+static void
+sba_check_sg( struct ioc *ioc, struct scatterlist *startsg, int nents)
+{
+	struct scatterlist *the_sg = startsg;
+	int the_nents = nents;
+
+	while (the_nents-- > 0) {
+		if (sba_sg_address(the_sg) == 0x0UL)
+			sba_dump_sg(NULL, startsg, nents);
+		the_sg = sg_next(the_sg);
+	}
+}
+
+#endif /* ASSERT_PDIR_SANITY */
+
+
+
+
+/**************************************************************
+*
+*   I/O Pdir Resource Management
+*
+*   Bits set in the resource map are in use.
+*   Each bit can represent a number of pages.
+*   LSbs represent lower addresses (IOVA's).
+*
+***************************************************************/
+#define PAGES_PER_RANGE 1	/* could increase this to 4 or 8 if needed */
+
+/* Convert from IOVP to IOVA and vice versa. */
+#define SBA_IOVA(ioc,iovp,offset) ((ioc->ibase) | (iovp) | (offset))
+#define SBA_IOVP(ioc,iova) ((iova) & ~(ioc->ibase))
+
+#define PDIR_ENTRY_SIZE	sizeof(u64)
+
+#define PDIR_INDEX(iovp)   ((iovp)>>iovp_shift)
+
+#define RESMAP_MASK(n)    ~(~0UL << (n))
+#define RESMAP_IDX_MASK   (sizeof(unsigned long) - 1)
+
+
+/**
+ * For most cases the normal get_order is sufficient, however it limits us
+ * to PAGE_SIZE being the minimum mapping alignment and TC flush granularity.
+ * It only incurs about 1 clock cycle to use this one with the static variable
+ * and makes the code more intuitive.
+ */
+static SBA_INLINE int
+get_iovp_order (unsigned long size)
+{
+	long double d = size - 1;
+	long order;
+
+	order = ia64_getf_exp(d);
+	order = order - iovp_shift - 0xffff + 1;
+	if (order < 0)
+		order = 0;
+	return order;
+}
+
+static unsigned long ptr_to_pide(struct ioc *ioc, unsigned long *res_ptr,
+				 unsigned int bitshiftcnt)
+{
+	return (((unsigned long)res_ptr - (unsigned long)ioc->res_map) << 3)
+		+ bitshiftcnt;
+}
+
+/**
+ * sba_search_bitmap - find free space in IO PDIR resource bitmap
+ * @ioc: IO MMU structure which owns the pdir we are interested in.
+ * @bits_wanted: number of entries we need.
+ * @use_hint: use res_hint to indicate where to start looking
+ *
+ * Find consecutive free bits in resource bitmap.
+ * Each bit represents one entry in the IO Pdir.
+ * Cool perf optimization: search for log2(size) bits at a time.
+ */
+static SBA_INLINE unsigned long
+sba_search_bitmap(struct ioc *ioc, struct device *dev,
+		  unsigned long bits_wanted, int use_hint)
+{
+	unsigned long *res_ptr;
+	unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]);
+	unsigned long flags, pide = ~0UL, tpide;
+	unsigned long boundary_size;
+	unsigned long shift;
+	int ret;
+
+	ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0);
+	ASSERT(res_ptr < res_end);
+
+	boundary_size = (unsigned long long)dma_get_seg_boundary(dev) + 1;
+	boundary_size = ALIGN(boundary_size, 1ULL << iovp_shift) >> iovp_shift;
+
+	BUG_ON(ioc->ibase & ~iovp_mask);
+	shift = ioc->ibase >> iovp_shift;
+
+	spin_lock_irqsave(&ioc->res_lock, flags);
+
+	/* Allow caller to force a search through the entire resource space */
+	if (likely(use_hint)) {
+		res_ptr = ioc->res_hint;
+	} else {
+		res_ptr = (ulong *)ioc->res_map;
+		ioc->res_bitshift = 0;
+	}
+
+	/*
+	 * N.B.  REO/Grande defect AR2305 can cause TLB fetch timeouts
+	 * if a TLB entry is purged while in use.  sba_mark_invalid()
+	 * purges IOTLB entries in power-of-two sizes, so we also
+	 * allocate IOVA space in power-of-two sizes.
+	 */
+	bits_wanted = 1UL << get_iovp_order(bits_wanted << iovp_shift);
+
+	if (likely(bits_wanted == 1)) {
+		unsigned int bitshiftcnt;
+		for(; res_ptr < res_end ; res_ptr++) {
+			if (likely(*res_ptr != ~0UL)) {
+				bitshiftcnt = ffz(*res_ptr);
+				*res_ptr |= (1UL << bitshiftcnt);
+				pide = ptr_to_pide(ioc, res_ptr, bitshiftcnt);
+				ioc->res_bitshift = bitshiftcnt + bits_wanted;
+				goto found_it;
+			}
+		}
+		goto not_found;
+
+	}
+	
+	if (likely(bits_wanted <= BITS_PER_LONG/2)) {
+		/*
+		** Search the resource bit map on well-aligned values.
+		** "o" is the alignment.
+		** We need the alignment to invalidate I/O TLB using
+		** SBA HW features in the unmap path.
+		*/
+		unsigned long o = 1 << get_iovp_order(bits_wanted << iovp_shift);
+		uint bitshiftcnt = ROUNDUP(ioc->res_bitshift, o);
+		unsigned long mask, base_mask;
+
+		base_mask = RESMAP_MASK(bits_wanted);
+		mask = base_mask << bitshiftcnt;
+
+		DBG_RES("%s() o %ld %p", __func__, o, res_ptr);
+		for(; res_ptr < res_end ; res_ptr++)
+		{ 
+			DBG_RES("    %p %lx %lx\n", res_ptr, mask, *res_ptr);
+			ASSERT(0 != mask);
+			for (; mask ; mask <<= o, bitshiftcnt += o) {
+				tpide = ptr_to_pide(ioc, res_ptr, bitshiftcnt);
+				ret = iommu_is_span_boundary(tpide, bits_wanted,
+							     shift,
+							     boundary_size);
+				if ((0 == ((*res_ptr) & mask)) && !ret) {
+					*res_ptr |= mask;     /* mark resources busy! */
+					pide = tpide;
+					ioc->res_bitshift = bitshiftcnt + bits_wanted;
+					goto found_it;
+				}
+			}
+
+			bitshiftcnt = 0;
+			mask = base_mask;
+
+		}
+
+	} else {
+		int qwords, bits, i;
+		unsigned long *end;
+
+		qwords = bits_wanted >> 6; /* /64 */
+		bits = bits_wanted - (qwords * BITS_PER_LONG);
+
+		end = res_end - qwords;
+
+		for (; res_ptr < end; res_ptr++) {
+			tpide = ptr_to_pide(ioc, res_ptr, 0);
+			ret = iommu_is_span_boundary(tpide, bits_wanted,
+						     shift, boundary_size);
+			if (ret)
+				goto next_ptr;
+			for (i = 0 ; i < qwords ; i++) {
+				if (res_ptr[i] != 0)
+					goto next_ptr;
+			}
+			if (bits && res_ptr[i] && (__ffs(res_ptr[i]) < bits))
+				continue;
+
+			/* Found it, mark it */
+			for (i = 0 ; i < qwords ; i++)
+				res_ptr[i] = ~0UL;
+			res_ptr[i] |= RESMAP_MASK(bits);
+
+			pide = tpide;
+			res_ptr += qwords;
+			ioc->res_bitshift = bits;
+			goto found_it;
+next_ptr:
+			;
+		}
+	}
+
+not_found:
+	prefetch(ioc->res_map);
+	ioc->res_hint = (unsigned long *) ioc->res_map;
+	ioc->res_bitshift = 0;
+	spin_unlock_irqrestore(&ioc->res_lock, flags);
+	return (pide);
+
+found_it:
+	ioc->res_hint = res_ptr;
+	spin_unlock_irqrestore(&ioc->res_lock, flags);
+	return (pide);
+}
+
+
+/**
+ * sba_alloc_range - find free bits and mark them in IO PDIR resource bitmap
+ * @ioc: IO MMU structure which owns the pdir we are interested in.
+ * @size: number of bytes to create a mapping for
+ *
+ * Given a size, find consecutive unmarked and then mark those bits in the
+ * resource bit map.
+ */
+static int
+sba_alloc_range(struct ioc *ioc, struct device *dev, size_t size)
+{
+	unsigned int pages_needed = size >> iovp_shift;
+#ifdef PDIR_SEARCH_TIMING
+	unsigned long itc_start;
+#endif
+	unsigned long pide;
+
+	ASSERT(pages_needed);
+	ASSERT(0 == (size & ~iovp_mask));
+
+#ifdef PDIR_SEARCH_TIMING
+	itc_start = ia64_get_itc();
+#endif
+	/*
+	** "seek and ye shall find"...praying never hurts either...
+	*/
+	pide = sba_search_bitmap(ioc, dev, pages_needed, 1);
+	if (unlikely(pide >= (ioc->res_size << 3))) {
+		pide = sba_search_bitmap(ioc, dev, pages_needed, 0);
+		if (unlikely(pide >= (ioc->res_size << 3))) {
+#if DELAYED_RESOURCE_CNT > 0
+			unsigned long flags;
+
+			/*
+			** With delayed resource freeing, we can give this one more shot.  We're
+			** getting close to being in trouble here, so do what we can to make this
+			** one count.
+			*/
+			spin_lock_irqsave(&ioc->saved_lock, flags);
+			if (ioc->saved_cnt > 0) {
+				struct sba_dma_pair *d;
+				int cnt = ioc->saved_cnt;
+
+				d = &(ioc->saved[ioc->saved_cnt - 1]);
+
+				spin_lock(&ioc->res_lock);
+				while (cnt--) {
+					sba_mark_invalid(ioc, d->iova, d->size);
+					sba_free_range(ioc, d->iova, d->size);
+					d--;
+				}
+				ioc->saved_cnt = 0;
+				READ_REG(ioc->ioc_hpa+IOC_PCOM);	/* flush purges */
+				spin_unlock(&ioc->res_lock);
+			}
+			spin_unlock_irqrestore(&ioc->saved_lock, flags);
+
+			pide = sba_search_bitmap(ioc, dev, pages_needed, 0);
+			if (unlikely(pide >= (ioc->res_size << 3))) {
+				printk(KERN_WARNING "%s: I/O MMU @ %p is"
+				       "out of mapping resources, %u %u %lx\n",
+				       __func__, ioc->ioc_hpa, ioc->res_size,
+				       pages_needed, dma_get_seg_boundary(dev));
+				return -1;
+			}
+#else
+			printk(KERN_WARNING "%s: I/O MMU @ %p is"
+			       "out of mapping resources, %u %u %lx\n",
+			       __func__, ioc->ioc_hpa, ioc->res_size,
+			       pages_needed, dma_get_seg_boundary(dev));
+			return -1;
+#endif
+		}
+	}
+
+#ifdef PDIR_SEARCH_TIMING
+	ioc->avg_search[ioc->avg_idx++] = (ia64_get_itc() - itc_start) / pages_needed;
+	ioc->avg_idx &= SBA_SEARCH_SAMPLE - 1;
+#endif
+
+	prefetchw(&(ioc->pdir_base[pide]));
+
+#ifdef ASSERT_PDIR_SANITY
+	/* verify the first enable bit is clear */
+	if(0x00 != ((u8 *) ioc->pdir_base)[pide*PDIR_ENTRY_SIZE + 7]) {
+		sba_dump_pdir_entry(ioc, "sba_search_bitmap() botched it?", pide);
+	}
+#endif
+
+	DBG_RES("%s(%x) %d -> %lx hint %x/%x\n",
+		__func__, size, pages_needed, pide,
+		(uint) ((unsigned long) ioc->res_hint - (unsigned long) ioc->res_map),
+		ioc->res_bitshift );
+
+	return (pide);
+}
+
+
+/**
+ * sba_free_range - unmark bits in IO PDIR resource bitmap
+ * @ioc: IO MMU structure which owns the pdir we are interested in.
+ * @iova: IO virtual address which was previously allocated.
+ * @size: number of bytes to create a mapping for
+ *
+ * clear bits in the ioc's resource map
+ */
+static SBA_INLINE void
+sba_free_range(struct ioc *ioc, dma_addr_t iova, size_t size)
+{
+	unsigned long iovp = SBA_IOVP(ioc, iova);
+	unsigned int pide = PDIR_INDEX(iovp);
+	unsigned int ridx = pide >> 3;	/* convert bit to byte address */
+	unsigned long *res_ptr = (unsigned long *) &((ioc)->res_map[ridx & ~RESMAP_IDX_MASK]);
+	int bits_not_wanted = size >> iovp_shift;
+	unsigned long m;
+
+	/* Round up to power-of-two size: see AR2305 note above */
+	bits_not_wanted = 1UL << get_iovp_order(bits_not_wanted << iovp_shift);
+	for (; bits_not_wanted > 0 ; res_ptr++) {
+		
+		if (unlikely(bits_not_wanted > BITS_PER_LONG)) {
+
+			/* these mappings start 64bit aligned */
+			*res_ptr = 0UL;
+			bits_not_wanted -= BITS_PER_LONG;
+			pide += BITS_PER_LONG;
+
+		} else {
+
+			/* 3-bits "bit" address plus 2 (or 3) bits for "byte" == bit in word */
+			m = RESMAP_MASK(bits_not_wanted) << (pide & (BITS_PER_LONG - 1));
+			bits_not_wanted = 0;
+
+			DBG_RES("%s( ,%x,%x) %x/%lx %x %p %lx\n", __func__, (uint) iova, size,
+			        bits_not_wanted, m, pide, res_ptr, *res_ptr);
+
+			ASSERT(m != 0);
+			ASSERT(bits_not_wanted);
+			ASSERT((*res_ptr & m) == m); /* verify same bits are set */
+			*res_ptr &= ~m;
+		}
+	}
+}
+
+
+/**************************************************************
+*
+*   "Dynamic DMA Mapping" support (aka "Coherent I/O")
+*
+***************************************************************/
+
+/**
+ * sba_io_pdir_entry - fill in one IO PDIR entry
+ * @pdir_ptr:  pointer to IO PDIR entry
+ * @vba: Virtual CPU address of buffer to map
+ *
+ * SBA Mapping Routine
+ *
+ * Given a virtual address (vba, arg1) sba_io_pdir_entry()
+ * loads the I/O PDIR entry pointed to by pdir_ptr (arg0).
+ * Each IO Pdir entry consists of 8 bytes as shown below
+ * (LSB == bit 0):
+ *
+ *  63                    40                                 11    7        0
+ * +-+---------------------+----------------------------------+----+--------+
+ * |V|        U            |            PPN[39:12]            | U  |   FF   |
+ * +-+---------------------+----------------------------------+----+--------+
+ *
+ *  V  == Valid Bit
+ *  U  == Unused
+ * PPN == Physical Page Number
+ *
+ * The physical address fields are filled with the results of virt_to_phys()
+ * on the vba.
+ */
+
+#if 1
+#define sba_io_pdir_entry(pdir_ptr, vba) *pdir_ptr = ((vba & ~0xE000000000000FFFULL)	\
+						      | 0x8000000000000000ULL)
+#else
+void SBA_INLINE
+sba_io_pdir_entry(u64 *pdir_ptr, unsigned long vba)
+{
+	*pdir_ptr = ((vba & ~0xE000000000000FFFULL) | 0x80000000000000FFULL);
+}
+#endif
+
+#ifdef ENABLE_MARK_CLEAN
+/**
+ * Since DMA is i-cache coherent, any (complete) pages that were written via
+ * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
+ * flush them when they get mapped into an executable vm-area.
+ */
+static void
+mark_clean (void *addr, size_t size)
+{
+	unsigned long pg_addr, end;
+
+	pg_addr = PAGE_ALIGN((unsigned long) addr);
+	end = (unsigned long) addr + size;
+	while (pg_addr + PAGE_SIZE <= end) {
+		struct page *page = virt_to_page((void *)pg_addr);
+		set_bit(PG_arch_1, &page->flags);
+		pg_addr += PAGE_SIZE;
+	}
+}
+#endif
+
+/**
+ * sba_mark_invalid - invalidate one or more IO PDIR entries
+ * @ioc: IO MMU structure which owns the pdir we are interested in.
+ * @iova:  IO Virtual Address mapped earlier
+ * @byte_cnt:  number of bytes this mapping covers.
+ *
+ * Marking the IO PDIR entry(ies) as Invalid and invalidate
+ * corresponding IO TLB entry. The PCOM (Purge Command Register)
+ * is to purge stale entries in the IO TLB when unmapping entries.
+ *
+ * The PCOM register supports purging of multiple pages, with a minium
+ * of 1 page and a maximum of 2GB. Hardware requires the address be
+ * aligned to the size of the range being purged. The size of the range
+ * must be a power of 2. The "Cool perf optimization" in the
+ * allocation routine helps keep that true.
+ */
+static SBA_INLINE void
+sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt)
+{
+	u32 iovp = (u32) SBA_IOVP(ioc,iova);
+
+	int off = PDIR_INDEX(iovp);
+
+	/* Must be non-zero and rounded up */
+	ASSERT(byte_cnt > 0);
+	ASSERT(0 == (byte_cnt & ~iovp_mask));
+
+#ifdef ASSERT_PDIR_SANITY
+	/* Assert first pdir entry is set */
+	if (!(ioc->pdir_base[off] >> 60)) {
+		sba_dump_pdir_entry(ioc,"sba_mark_invalid()", PDIR_INDEX(iovp));
+	}
+#endif
+
+	if (byte_cnt <= iovp_size)
+	{
+		ASSERT(off < ioc->pdir_size);
+
+		iovp |= iovp_shift;     /* set "size" field for PCOM */
+
+#ifndef FULL_VALID_PDIR
+		/*
+		** clear I/O PDIR entry "valid" bit
+		** Do NOT clear the rest - save it for debugging.
+		** We should only clear bits that have previously
+		** been enabled.
+		*/
+		ioc->pdir_base[off] &= ~(0x80000000000000FFULL);
+#else
+		/*
+  		** If we want to maintain the PDIR as valid, put in
+		** the spill page so devices prefetching won't
+		** cause a hard fail.
+		*/
+		ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page);
+#endif
+	} else {
+		u32 t = get_iovp_order(byte_cnt) + iovp_shift;
+
+		iovp |= t;
+		ASSERT(t <= 31);   /* 2GB! Max value of "size" field */
+
+		do {
+			/* verify this pdir entry is enabled */
+			ASSERT(ioc->pdir_base[off]  >> 63);
+#ifndef FULL_VALID_PDIR
+			/* clear I/O Pdir entry "valid" bit first */
+			ioc->pdir_base[off] &= ~(0x80000000000000FFULL);
+#else
+			ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page);
+#endif
+			off++;
+			byte_cnt -= iovp_size;
+		} while (byte_cnt > 0);
+	}
+
+	WRITE_REG(iovp | ioc->ibase, ioc->ioc_hpa+IOC_PCOM);
+}
+
+/**
+ * sba_map_single_attrs - map one buffer and return IOVA for DMA
+ * @dev: instance of PCI owned by the driver that's asking.
+ * @addr:  driver buffer to map.
+ * @size:  number of bytes to map in driver buffer.
+ * @dir:  R/W or both.
+ * @attrs: optional dma attributes
+ *
+ * See Documentation/PCI/PCI-DMA-mapping.txt
+ */
+static dma_addr_t sba_map_page(struct device *dev, struct page *page,
+			       unsigned long poff, size_t size,
+			       enum dma_data_direction dir,
+			       struct dma_attrs *attrs)
+{
+	struct ioc *ioc;
+	void *addr = page_address(page) + poff;
+	dma_addr_t iovp;
+	dma_addr_t offset;
+	u64 *pdir_start;
+	int pide;
+#ifdef ASSERT_PDIR_SANITY
+	unsigned long flags;
+#endif
+#ifdef ALLOW_IOV_BYPASS
+	unsigned long pci_addr = virt_to_phys(addr);
+#endif
+
+#ifdef ALLOW_IOV_BYPASS
+	ASSERT(to_pci_dev(dev)->dma_mask);
+	/*
+ 	** Check if the PCI device can DMA to ptr... if so, just return ptr
+ 	*/
+	if (likely((pci_addr & ~to_pci_dev(dev)->dma_mask) == 0)) {
+		/*
+ 		** Device is bit capable of DMA'ing to the buffer...
+		** just return the PCI address of ptr
+ 		*/
+		DBG_BYPASS("sba_map_single_attrs() bypass mask/addr: "
+			   "0x%lx/0x%lx\n",
+		           to_pci_dev(dev)->dma_mask, pci_addr);
+		return pci_addr;
+	}
+#endif
+	ioc = GET_IOC(dev);
+	ASSERT(ioc);
+
+	prefetch(ioc->res_hint);
+
+	ASSERT(size > 0);
+	ASSERT(size <= DMA_CHUNK_SIZE);
+
+	/* save offset bits */
+	offset = ((dma_addr_t) (long) addr) & ~iovp_mask;
+
+	/* round up to nearest iovp_size */
+	size = (size + offset + ~iovp_mask) & iovp_mask;
+
+#ifdef ASSERT_PDIR_SANITY
+	spin_lock_irqsave(&ioc->res_lock, flags);
+	if (sba_check_pdir(ioc,"Check before sba_map_single_attrs()"))
+		panic("Sanity check failed");
+	spin_unlock_irqrestore(&ioc->res_lock, flags);
+#endif
+
+	pide = sba_alloc_range(ioc, dev, size);
+	if (pide < 0)
+		return 0;
+
+	iovp = (dma_addr_t) pide << iovp_shift;
+
+	DBG_RUN("%s() 0x%p -> 0x%lx\n", __func__, addr, (long) iovp | offset);
+
+	pdir_start = &(ioc->pdir_base[pide]);
+
+	while (size > 0) {
+		ASSERT(((u8 *)pdir_start)[7] == 0); /* verify availability */
+		sba_io_pdir_entry(pdir_start, (unsigned long) addr);
+
+		DBG_RUN("     pdir 0x%p %lx\n", pdir_start, *pdir_start);
+
+		addr += iovp_size;
+		size -= iovp_size;
+		pdir_start++;
+	}
+	/* force pdir update */
+	wmb();
+
+	/* form complete address */
+#ifdef ASSERT_PDIR_SANITY
+	spin_lock_irqsave(&ioc->res_lock, flags);
+	sba_check_pdir(ioc,"Check after sba_map_single_attrs()");
+	spin_unlock_irqrestore(&ioc->res_lock, flags);
+#endif
+	return SBA_IOVA(ioc, iovp, offset);
+}
+
+static dma_addr_t sba_map_single_attrs(struct device *dev, void *addr,
+				       size_t size, enum dma_data_direction dir,
+				       struct dma_attrs *attrs)
+{
+	return sba_map_page(dev, virt_to_page(addr),
+			    (unsigned long)addr & ~PAGE_MASK, size, dir, attrs);
+}
+
+#ifdef ENABLE_MARK_CLEAN
+static SBA_INLINE void
+sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size)
+{
+	u32	iovp = (u32) SBA_IOVP(ioc,iova);
+	int	off = PDIR_INDEX(iovp);
+	void	*addr;
+
+	if (size <= iovp_size) {
+		addr = phys_to_virt(ioc->pdir_base[off] &
+		                    ~0xE000000000000FFFULL);
+		mark_clean(addr, size);
+	} else {
+		do {
+			addr = phys_to_virt(ioc->pdir_base[off] &
+			                    ~0xE000000000000FFFULL);
+			mark_clean(addr, min(size, iovp_size));
+			off++;
+			size -= iovp_size;
+		} while (size > 0);
+	}
+}
+#endif
+
+/**
+ * sba_unmap_single_attrs - unmap one IOVA and free resources
+ * @dev: instance of PCI owned by the driver that's asking.
+ * @iova:  IOVA of driver buffer previously mapped.
+ * @size:  number of bytes mapped in driver buffer.
+ * @dir:  R/W or both.
+ * @attrs: optional dma attributes
+ *
+ * See Documentation/PCI/PCI-DMA-mapping.txt
+ */
+static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size,
+			   enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+	struct ioc *ioc;
+#if DELAYED_RESOURCE_CNT > 0
+	struct sba_dma_pair *d;
+#endif
+	unsigned long flags;
+	dma_addr_t offset;
+
+	ioc = GET_IOC(dev);
+	ASSERT(ioc);
+
+#ifdef ALLOW_IOV_BYPASS
+	if (likely((iova & ioc->imask) != ioc->ibase)) {
+		/*
+		** Address does not fall w/in IOVA, must be bypassing
+		*/
+		DBG_BYPASS("sba_unmap_single_attrs() bypass addr: 0x%lx\n",
+			   iova);
+
+#ifdef ENABLE_MARK_CLEAN
+		if (dir == DMA_FROM_DEVICE) {
+			mark_clean(phys_to_virt(iova), size);
+		}
+#endif
+		return;
+	}
+#endif
+	offset = iova & ~iovp_mask;
+
+	DBG_RUN("%s() iovp 0x%lx/%x\n", __func__, (long) iova, size);
+
+	iova ^= offset;        /* clear offset bits */
+	size += offset;
+	size = ROUNDUP(size, iovp_size);
+
+#ifdef ENABLE_MARK_CLEAN
+	if (dir == DMA_FROM_DEVICE)
+		sba_mark_clean(ioc, iova, size);
+#endif
+
+#if DELAYED_RESOURCE_CNT > 0
+	spin_lock_irqsave(&ioc->saved_lock, flags);
+	d = &(ioc->saved[ioc->saved_cnt]);
+	d->iova = iova;
+	d->size = size;
+	if (unlikely(++(ioc->saved_cnt) >= DELAYED_RESOURCE_CNT)) {
+		int cnt = ioc->saved_cnt;
+		spin_lock(&ioc->res_lock);
+		while (cnt--) {
+			sba_mark_invalid(ioc, d->iova, d->size);
+			sba_free_range(ioc, d->iova, d->size);
+			d--;
+		}
+		ioc->saved_cnt = 0;
+		READ_REG(ioc->ioc_hpa+IOC_PCOM);	/* flush purges */
+		spin_unlock(&ioc->res_lock);
+	}
+	spin_unlock_irqrestore(&ioc->saved_lock, flags);
+#else /* DELAYED_RESOURCE_CNT == 0 */
+	spin_lock_irqsave(&ioc->res_lock, flags);
+	sba_mark_invalid(ioc, iova, size);
+	sba_free_range(ioc, iova, size);
+	READ_REG(ioc->ioc_hpa+IOC_PCOM);	/* flush purges */
+	spin_unlock_irqrestore(&ioc->res_lock, flags);
+#endif /* DELAYED_RESOURCE_CNT == 0 */
+}
+
+void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size,
+			    enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+	sba_unmap_page(dev, iova, size, dir, attrs);
+}
+
+/**
+ * sba_alloc_coherent - allocate/map shared mem for DMA
+ * @dev: instance of PCI owned by the driver that's asking.
+ * @size:  number of bytes mapped in driver buffer.
+ * @dma_handle:  IOVA of new buffer.
+ *
+ * See Documentation/PCI/PCI-DMA-mapping.txt
+ */
+static void *
+sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags)
+{
+	struct ioc *ioc;
+	void *addr;
+
+	ioc = GET_IOC(dev);
+	ASSERT(ioc);
+
+#ifdef CONFIG_NUMA
+	{
+		struct page *page;
+		page = alloc_pages_exact_node(ioc->node == MAX_NUMNODES ?
+		                        numa_node_id() : ioc->node, flags,
+		                        get_order(size));
+
+		if (unlikely(!page))
+			return NULL;
+
+		addr = page_address(page);
+	}
+#else
+	addr = (void *) __get_free_pages(flags, get_order(size));
+#endif
+	if (unlikely(!addr))
+		return NULL;
+
+	memset(addr, 0, size);
+	*dma_handle = virt_to_phys(addr);
+
+#ifdef ALLOW_IOV_BYPASS
+	ASSERT(dev->coherent_dma_mask);
+	/*
+ 	** Check if the PCI device can DMA to ptr... if so, just return ptr
+ 	*/
+	if (likely((*dma_handle & ~dev->coherent_dma_mask) == 0)) {
+		DBG_BYPASS("sba_alloc_coherent() bypass mask/addr: 0x%lx/0x%lx\n",
+		           dev->coherent_dma_mask, *dma_handle);
+
+		return addr;
+	}
+#endif
+
+	/*
+	 * If device can't bypass or bypass is disabled, pass the 32bit fake
+	 * device to map single to get an iova mapping.
+	 */
+	*dma_handle = sba_map_single_attrs(&ioc->sac_only_dev->dev, addr,
+					   size, 0, NULL);
+
+	return addr;
+}
+
+
+/**
+ * sba_free_coherent - free/unmap shared mem for DMA
+ * @dev: instance of PCI owned by the driver that's asking.
+ * @size:  number of bytes mapped in driver buffer.
+ * @vaddr:  virtual address IOVA of "consistent" buffer.
+ * @dma_handler:  IO virtual address of "consistent" buffer.
+ *
+ * See Documentation/PCI/PCI-DMA-mapping.txt
+ */
+static void sba_free_coherent (struct device *dev, size_t size, void *vaddr,
+			       dma_addr_t dma_handle)
+{
+	sba_unmap_single_attrs(dev, dma_handle, size, 0, NULL);
+	free_pages((unsigned long) vaddr, get_order(size));
+}
+
+
+/*
+** Since 0 is a valid pdir_base index value, can't use that
+** to determine if a value is valid or not. Use a flag to indicate
+** the SG list entry contains a valid pdir index.
+*/
+#define PIDE_FLAG 0x1UL
+
+#ifdef DEBUG_LARGE_SG_ENTRIES
+int dump_run_sg = 0;
+#endif
+
+
+/**
+ * sba_fill_pdir - write allocated SG entries into IO PDIR
+ * @ioc: IO MMU structure which owns the pdir we are interested in.
+ * @startsg:  list of IOVA/size pairs
+ * @nents: number of entries in startsg list
+ *
+ * Take preprocessed SG list and write corresponding entries
+ * in the IO PDIR.
+ */
+
+static SBA_INLINE int
+sba_fill_pdir(
+	struct ioc *ioc,
+	struct scatterlist *startsg,
+	int nents)
+{
+	struct scatterlist *dma_sg = startsg;	/* pointer to current DMA */
+	int n_mappings = 0;
+	u64 *pdirp = NULL;
+	unsigned long dma_offset = 0;
+
+	while (nents-- > 0) {
+		int     cnt = startsg->dma_length;
+		startsg->dma_length = 0;
+
+#ifdef DEBUG_LARGE_SG_ENTRIES
+		if (dump_run_sg)
+			printk(" %2d : %08lx/%05x %p\n",
+				nents, startsg->dma_address, cnt,
+				sba_sg_address(startsg));
+#else
+		DBG_RUN_SG(" %d : %08lx/%05x %p\n",
+				nents, startsg->dma_address, cnt,
+				sba_sg_address(startsg));
+#endif
+		/*
+		** Look for the start of a new DMA stream
+		*/
+		if (startsg->dma_address & PIDE_FLAG) {
+			u32 pide = startsg->dma_address & ~PIDE_FLAG;
+			dma_offset = (unsigned long) pide & ~iovp_mask;
+			startsg->dma_address = 0;
+			if (n_mappings)
+				dma_sg = sg_next(dma_sg);
+			dma_sg->dma_address = pide | ioc->ibase;
+			pdirp = &(ioc->pdir_base[pide >> iovp_shift]);
+			n_mappings++;
+		}
+
+		/*
+		** Look for a VCONTIG chunk
+		*/
+		if (cnt) {
+			unsigned long vaddr = (unsigned long) sba_sg_address(startsg);
+			ASSERT(pdirp);
+
+			/* Since multiple Vcontig blocks could make up
+			** one DMA stream, *add* cnt to dma_len.
+			*/
+			dma_sg->dma_length += cnt;
+			cnt += dma_offset;
+			dma_offset=0;	/* only want offset on first chunk */
+			cnt = ROUNDUP(cnt, iovp_size);
+			do {
+				sba_io_pdir_entry(pdirp, vaddr);
+				vaddr += iovp_size;
+				cnt -= iovp_size;
+				pdirp++;
+			} while (cnt > 0);
+		}
+		startsg = sg_next(startsg);
+	}
+	/* force pdir update */
+	wmb();
+
+#ifdef DEBUG_LARGE_SG_ENTRIES
+	dump_run_sg = 0;
+#endif
+	return(n_mappings);
+}
+
+
+/*
+** Two address ranges are DMA contiguous *iff* "end of prev" and
+** "start of next" are both on an IOV page boundary.
+**
+** (shift left is a quick trick to mask off upper bits)
+*/
+#define DMA_CONTIG(__X, __Y) \
+	(((((unsigned long) __X) | ((unsigned long) __Y)) << (BITS_PER_LONG - iovp_shift)) == 0UL)
+
+
+/**
+ * sba_coalesce_chunks - preprocess the SG list
+ * @ioc: IO MMU structure which owns the pdir we are interested in.
+ * @startsg:  list of IOVA/size pairs
+ * @nents: number of entries in startsg list
+ *
+ * First pass is to walk the SG list and determine where the breaks are
+ * in the DMA stream. Allocates PDIR entries but does not fill them.
+ * Returns the number of DMA chunks.
+ *
+ * Doing the fill separate from the coalescing/allocation keeps the
+ * code simpler. Future enhancement could make one pass through
+ * the sglist do both.
+ */
+static SBA_INLINE int
+sba_coalesce_chunks(struct ioc *ioc, struct device *dev,
+	struct scatterlist *startsg,
+	int nents)
+{
+	struct scatterlist *vcontig_sg;    /* VCONTIG chunk head */
+	unsigned long vcontig_len;         /* len of VCONTIG chunk */
+	unsigned long vcontig_end;
+	struct scatterlist *dma_sg;        /* next DMA stream head */
+	unsigned long dma_offset, dma_len; /* start/len of DMA stream */
+	int n_mappings = 0;
+	unsigned int max_seg_size = dma_get_max_seg_size(dev);
+	int idx;
+
+	while (nents > 0) {
+		unsigned long vaddr = (unsigned long) sba_sg_address(startsg);
+
+		/*
+		** Prepare for first/next DMA stream
+		*/
+		dma_sg = vcontig_sg = startsg;
+		dma_len = vcontig_len = vcontig_end = startsg->length;
+		vcontig_end +=  vaddr;
+		dma_offset = vaddr & ~iovp_mask;
+
+		/* PARANOID: clear entries */
+		startsg->dma_address = startsg->dma_length = 0;
+
+		/*
+		** This loop terminates one iteration "early" since
+		** it's always looking one "ahead".
+		*/
+		while (--nents > 0) {
+			unsigned long vaddr;	/* tmp */
+
+			startsg = sg_next(startsg);
+
+			/* PARANOID */
+			startsg->dma_address = startsg->dma_length = 0;
+
+			/* catch brokenness in SCSI layer */
+			ASSERT(startsg->length <= DMA_CHUNK_SIZE);
+
+			/*
+			** First make sure current dma stream won't
+			** exceed DMA_CHUNK_SIZE if we coalesce the
+			** next entry.
+			*/
+			if (((dma_len + dma_offset + startsg->length + ~iovp_mask) & iovp_mask)
+			    > DMA_CHUNK_SIZE)
+				break;
+
+			if (dma_len + startsg->length > max_seg_size)
+				break;
+
+			/*
+			** Then look for virtually contiguous blocks.
+			**
+			** append the next transaction?
+			*/
+			vaddr = (unsigned long) sba_sg_address(startsg);
+			if  (vcontig_end == vaddr)
+			{
+				vcontig_len += startsg->length;
+				vcontig_end += startsg->length;
+				dma_len     += startsg->length;
+				continue;
+			}
+
+#ifdef DEBUG_LARGE_SG_ENTRIES
+			dump_run_sg = (vcontig_len > iovp_size);
+#endif
+
+			/*
+			** Not virtually contiguous.
+			** Terminate prev chunk.
+			** Start a new chunk.
+			**
+			** Once we start a new VCONTIG chunk, dma_offset
+			** can't change. And we need the offset from the first
+			** chunk - not the last one. Ergo Successive chunks
+			** must start on page boundaries and dove tail
+			** with it's predecessor.
+			*/
+			vcontig_sg->dma_length = vcontig_len;
+
+			vcontig_sg = startsg;
+			vcontig_len = startsg->length;
+
+			/*
+			** 3) do the entries end/start on page boundaries?
+			**    Don't update vcontig_end until we've checked.
+			*/
+			if (DMA_CONTIG(vcontig_end, vaddr))
+			{
+				vcontig_end = vcontig_len + vaddr;
+				dma_len += vcontig_len;
+				continue;
+			} else {
+				break;
+			}
+		}
+
+		/*
+		** End of DMA Stream
+		** Terminate last VCONTIG block.
+		** Allocate space for DMA stream.
+		*/
+		vcontig_sg->dma_length = vcontig_len;
+		dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask;
+		ASSERT(dma_len <= DMA_CHUNK_SIZE);
+		idx = sba_alloc_range(ioc, dev, dma_len);
+		if (idx < 0) {
+			dma_sg->dma_length = 0;
+			return -1;
+		}
+		dma_sg->dma_address = (dma_addr_t)(PIDE_FLAG | (idx << iovp_shift)
+						   | dma_offset);
+		n_mappings++;
+	}
+
+	return n_mappings;
+}
+
+static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist,
+			       int nents, enum dma_data_direction dir,
+			       struct dma_attrs *attrs);
+/**
+ * sba_map_sg - map Scatter/Gather list
+ * @dev: instance of PCI owned by the driver that's asking.
+ * @sglist:  array of buffer/length pairs
+ * @nents:  number of entries in list
+ * @dir:  R/W or both.
+ * @attrs: optional dma attributes
+ *
+ * See Documentation/PCI/PCI-DMA-mapping.txt
+ */
+static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist,
+			    int nents, enum dma_data_direction dir,
+			    struct dma_attrs *attrs)
+{
+	struct ioc *ioc;
+	int coalesced, filled = 0;
+#ifdef ASSERT_PDIR_SANITY
+	unsigned long flags;
+#endif
+#ifdef ALLOW_IOV_BYPASS_SG
+	struct scatterlist *sg;
+#endif
+
+	DBG_RUN_SG("%s() START %d entries\n", __func__, nents);
+	ioc = GET_IOC(dev);
+	ASSERT(ioc);
+
+#ifdef ALLOW_IOV_BYPASS_SG
+	ASSERT(to_pci_dev(dev)->dma_mask);
+	if (likely((ioc->dma_mask & ~to_pci_dev(dev)->dma_mask) == 0)) {
+		for_each_sg(sglist, sg, nents, filled) {
+			sg->dma_length = sg->length;
+			sg->dma_address = virt_to_phys(sba_sg_address(sg));
+		}
+		return filled;
+	}
+#endif
+	/* Fast path single entry scatterlists. */
+	if (nents == 1) {
+		sglist->dma_length = sglist->length;
+		sglist->dma_address = sba_map_single_attrs(dev, sba_sg_address(sglist), sglist->length, dir, attrs);
+		return 1;
+	}
+
+#ifdef ASSERT_PDIR_SANITY
+	spin_lock_irqsave(&ioc->res_lock, flags);
+	if (sba_check_pdir(ioc,"Check before sba_map_sg_attrs()"))
+	{
+		sba_dump_sg(ioc, sglist, nents);
+		panic("Check before sba_map_sg_attrs()");
+	}
+	spin_unlock_irqrestore(&ioc->res_lock, flags);
+#endif
+
+	prefetch(ioc->res_hint);
+
+	/*
+	** First coalesce the chunks and allocate I/O pdir space
+	**
+	** If this is one DMA stream, we can properly map using the
+	** correct virtual address associated with each DMA page.
+	** w/o this association, we wouldn't have coherent DMA!
+	** Access to the virtual address is what forces a two pass algorithm.
+	*/
+	coalesced = sba_coalesce_chunks(ioc, dev, sglist, nents);
+	if (coalesced < 0) {
+		sba_unmap_sg_attrs(dev, sglist, nents, dir, attrs);
+		return 0;
+	}
+
+	/*
+	** Program the I/O Pdir
+	**
+	** map the virtual addresses to the I/O Pdir
+	** o dma_address will contain the pdir index
+	** o dma_len will contain the number of bytes to map
+	** o address contains the virtual address.
+	*/
+	filled = sba_fill_pdir(ioc, sglist, nents);
+
+#ifdef ASSERT_PDIR_SANITY
+	spin_lock_irqsave(&ioc->res_lock, flags);
+	if (sba_check_pdir(ioc,"Check after sba_map_sg_attrs()"))
+	{
+		sba_dump_sg(ioc, sglist, nents);
+		panic("Check after sba_map_sg_attrs()\n");
+	}
+	spin_unlock_irqrestore(&ioc->res_lock, flags);
+#endif
+
+	ASSERT(coalesced == filled);
+	DBG_RUN_SG("%s() DONE %d mappings\n", __func__, filled);
+
+	return filled;
+}
+
+/**
+ * sba_unmap_sg_attrs - unmap Scatter/Gather list
+ * @dev: instance of PCI owned by the driver that's asking.
+ * @sglist:  array of buffer/length pairs
+ * @nents:  number of entries in list
+ * @dir:  R/W or both.
+ * @attrs: optional dma attributes
+ *
+ * See Documentation/PCI/PCI-DMA-mapping.txt
+ */
+static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist,
+			       int nents, enum dma_data_direction dir,
+			       struct dma_attrs *attrs)
+{
+#ifdef ASSERT_PDIR_SANITY
+	struct ioc *ioc;
+	unsigned long flags;
+#endif
+
+	DBG_RUN_SG("%s() START %d entries,  %p,%x\n",
+		   __func__, nents, sba_sg_address(sglist), sglist->length);
+
+#ifdef ASSERT_PDIR_SANITY
+	ioc = GET_IOC(dev);
+	ASSERT(ioc);
+
+	spin_lock_irqsave(&ioc->res_lock, flags);
+	sba_check_pdir(ioc,"Check before sba_unmap_sg_attrs()");
+	spin_unlock_irqrestore(&ioc->res_lock, flags);
+#endif
+
+	while (nents && sglist->dma_length) {
+
+		sba_unmap_single_attrs(dev, sglist->dma_address,
+				       sglist->dma_length, dir, attrs);
+		sglist = sg_next(sglist);
+		nents--;
+	}
+
+	DBG_RUN_SG("%s() DONE (nents %d)\n", __func__,  nents);
+
+#ifdef ASSERT_PDIR_SANITY
+	spin_lock_irqsave(&ioc->res_lock, flags);
+	sba_check_pdir(ioc,"Check after sba_unmap_sg_attrs()");
+	spin_unlock_irqrestore(&ioc->res_lock, flags);
+#endif
+
+}
+
+/**************************************************************
+*
+*   Initialization and claim
+*
+***************************************************************/
+
+static void __init
+ioc_iova_init(struct ioc *ioc)
+{
+	int tcnfg;
+	int agp_found = 0;
+	struct pci_dev *device = NULL;
+#ifdef FULL_VALID_PDIR
+	unsigned long index;
+#endif
+
+	/*
+	** Firmware programs the base and size of a "safe IOVA space"
+	** (one that doesn't overlap memory or LMMIO space) in the
+	** IBASE and IMASK registers.
+	*/
+	ioc->ibase = READ_REG(ioc->ioc_hpa + IOC_IBASE) & ~0x1UL;
+	ioc->imask = READ_REG(ioc->ioc_hpa + IOC_IMASK) | 0xFFFFFFFF00000000UL;
+
+	ioc->iov_size = ~ioc->imask + 1;
+
+	DBG_INIT("%s() hpa %p IOV base 0x%lx mask 0x%lx (%dMB)\n",
+		__func__, ioc->ioc_hpa, ioc->ibase, ioc->imask,
+		ioc->iov_size >> 20);
+
+	switch (iovp_size) {
+		case  4*1024: tcnfg = 0; break;
+		case  8*1024: tcnfg = 1; break;
+		case 16*1024: tcnfg = 2; break;
+		case 64*1024: tcnfg = 3; break;
+		default:
+			panic(PFX "Unsupported IOTLB page size %ldK",
+				iovp_size >> 10);
+			break;
+	}
+	WRITE_REG(tcnfg, ioc->ioc_hpa + IOC_TCNFG);
+
+	ioc->pdir_size = (ioc->iov_size / iovp_size) * PDIR_ENTRY_SIZE;
+	ioc->pdir_base = (void *) __get_free_pages(GFP_KERNEL,
+						   get_order(ioc->pdir_size));
+	if (!ioc->pdir_base)
+		panic(PFX "Couldn't allocate I/O Page Table\n");
+
+	memset(ioc->pdir_base, 0, ioc->pdir_size);
+
+	DBG_INIT("%s() IOV page size %ldK pdir %p size %x\n", __func__,
+		iovp_size >> 10, ioc->pdir_base, ioc->pdir_size);
+
+	ASSERT(ALIGN((unsigned long) ioc->pdir_base, 4*1024) == (unsigned long) ioc->pdir_base);
+	WRITE_REG(virt_to_phys(ioc->pdir_base), ioc->ioc_hpa + IOC_PDIR_BASE);
+
+	/*
+	** If an AGP device is present, only use half of the IOV space
+	** for PCI DMA.  Unfortunately we can't know ahead of time
+	** whether GART support will actually be used, for now we
+	** can just key on an AGP device found in the system.
+	** We program the next pdir index after we stop w/ a key for
+	** the GART code to handshake on.
+	*/
+	for_each_pci_dev(device)	
+		agp_found |= pci_find_capability(device, PCI_CAP_ID_AGP);
+
+	if (agp_found && reserve_sba_gart) {
+		printk(KERN_INFO PFX "reserving %dMb of IOVA space at 0x%lx for agpgart\n",
+		      ioc->iov_size/2 >> 20, ioc->ibase + ioc->iov_size/2);
+		ioc->pdir_size /= 2;
+		((u64 *)ioc->pdir_base)[PDIR_INDEX(ioc->iov_size/2)] = ZX1_SBA_IOMMU_COOKIE;
+	}
+#ifdef FULL_VALID_PDIR
+	/*
+  	** Check to see if the spill page has been allocated, we don't need more than
+	** one across multiple SBAs.
+	*/
+	if (!prefetch_spill_page) {
+		char *spill_poison = "SBAIOMMU POISON";
+		int poison_size = 16;
+		void *poison_addr, *addr;
+
+		addr = (void *)__get_free_pages(GFP_KERNEL, get_order(iovp_size));
+		if (!addr)
+			panic(PFX "Couldn't allocate PDIR spill page\n");
+
+		poison_addr = addr;
+		for ( ; (u64) poison_addr < addr + iovp_size; poison_addr += poison_size)
+			memcpy(poison_addr, spill_poison, poison_size);
+
+		prefetch_spill_page = virt_to_phys(addr);
+
+		DBG_INIT("%s() prefetch spill addr: 0x%lx\n", __func__, prefetch_spill_page);
+	}
+	/*
+  	** Set all the PDIR entries valid w/ the spill page as the target
+	*/
+	for (index = 0 ; index < (ioc->pdir_size / PDIR_ENTRY_SIZE) ; index++)
+		((u64 *)ioc->pdir_base)[index] = (0x80000000000000FF | prefetch_spill_page);
+#endif
+
+	/* Clear I/O TLB of any possible entries */
+	WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) + iovp_shift), ioc->ioc_hpa + IOC_PCOM);
+	READ_REG(ioc->ioc_hpa + IOC_PCOM);
+
+	/* Enable IOVA translation */
+	WRITE_REG(ioc->ibase | 1, ioc->ioc_hpa + IOC_IBASE);
+	READ_REG(ioc->ioc_hpa + IOC_IBASE);
+}
+
+static void __init
+ioc_resource_init(struct ioc *ioc)
+{
+	spin_lock_init(&ioc->res_lock);
+#if DELAYED_RESOURCE_CNT > 0
+	spin_lock_init(&ioc->saved_lock);
+#endif
+
+	/* resource map size dictated by pdir_size */
+	ioc->res_size = ioc->pdir_size / PDIR_ENTRY_SIZE; /* entries */
+	ioc->res_size >>= 3;  /* convert bit count to byte count */
+	DBG_INIT("%s() res_size 0x%x\n", __func__, ioc->res_size);
+
+	ioc->res_map = (char *) __get_free_pages(GFP_KERNEL,
+						 get_order(ioc->res_size));
+	if (!ioc->res_map)
+		panic(PFX "Couldn't allocate resource map\n");
+
+	memset(ioc->res_map, 0, ioc->res_size);
+	/* next available IOVP - circular search */
+	ioc->res_hint = (unsigned long *) ioc->res_map;
+
+#ifdef ASSERT_PDIR_SANITY
+	/* Mark first bit busy - ie no IOVA 0 */
+	ioc->res_map[0] = 0x1;
+	ioc->pdir_base[0] = 0x8000000000000000ULL | ZX1_SBA_IOMMU_COOKIE;
+#endif
+#ifdef FULL_VALID_PDIR
+	/* Mark the last resource used so we don't prefetch beyond IOVA space */
+	ioc->res_map[ioc->res_size - 1] |= 0x80UL; /* res_map is chars */
+	ioc->pdir_base[(ioc->pdir_size / PDIR_ENTRY_SIZE) - 1] = (0x80000000000000FF
+							      | prefetch_spill_page);
+#endif
+
+	DBG_INIT("%s() res_map %x %p\n", __func__,
+		 ioc->res_size, (void *) ioc->res_map);
+}
+
+static void __init
+ioc_sac_init(struct ioc *ioc)
+{
+	struct pci_dev *sac = NULL;
+	struct pci_controller *controller = NULL;
+
+	/*
+	 * pci_alloc_coherent() must return a DMA address which is
+	 * SAC (single address cycle) addressable, so allocate a
+	 * pseudo-device to enforce that.
+	 */
+	sac = kzalloc(sizeof(*sac), GFP_KERNEL);
+	if (!sac)
+		panic(PFX "Couldn't allocate struct pci_dev");
+
+	controller = kzalloc(sizeof(*controller), GFP_KERNEL);
+	if (!controller)
+		panic(PFX "Couldn't allocate struct pci_controller");
+
+	controller->iommu = ioc;
+	sac->sysdata = controller;
+	sac->dma_mask = 0xFFFFFFFFUL;
+#ifdef CONFIG_PCI
+	sac->dev.bus = &pci_bus_type;
+#endif
+	ioc->sac_only_dev = sac;
+}
+
+static void __init
+ioc_zx1_init(struct ioc *ioc)
+{
+	unsigned long rope_config;
+	unsigned int i;
+
+	if (ioc->rev < 0x20)
+		panic(PFX "IOC 2.0 or later required for IOMMU support\n");
+
+	/* 38 bit memory controller + extra bit for range displaced by MMIO */
+	ioc->dma_mask = (0x1UL << 39) - 1;
+
+	/*
+	** Clear ROPE(N)_CONFIG AO bit.
+	** Disables "NT Ordering" (~= !"Relaxed Ordering")
+	** Overrides bit 1 in DMA Hint Sets.
+	** Improves netperf UDP_STREAM by ~10% for tg3 on bcm5701.
+	*/
+	for (i=0; i<(8*8); i+=8) {
+		rope_config = READ_REG(ioc->ioc_hpa + IOC_ROPE0_CFG + i);
+		rope_config &= ~IOC_ROPE_AO;
+		WRITE_REG(rope_config, ioc->ioc_hpa + IOC_ROPE0_CFG + i);
+	}
+}
+
+typedef void (initfunc)(struct ioc *);
+
+struct ioc_iommu {
+	u32 func_id;
+	char *name;
+	initfunc *init;
+};
+
+static struct ioc_iommu ioc_iommu_info[] __initdata = {
+	{ ZX1_IOC_ID, "zx1", ioc_zx1_init },
+	{ ZX2_IOC_ID, "zx2", NULL },
+	{ SX1000_IOC_ID, "sx1000", NULL },
+	{ SX2000_IOC_ID, "sx2000", NULL },
+};
+
+static struct ioc * __init
+ioc_init(unsigned long hpa, void *handle)
+{
+	struct ioc *ioc;
+	struct ioc_iommu *info;
+
+	ioc = kzalloc(sizeof(*ioc), GFP_KERNEL);
+	if (!ioc)
+		return NULL;
+
+	ioc->next = ioc_list;
+	ioc_list = ioc;
+
+	ioc->handle = handle;
+	ioc->ioc_hpa = ioremap(hpa, 0x1000);
+
+	ioc->func_id = READ_REG(ioc->ioc_hpa + IOC_FUNC_ID);
+	ioc->rev = READ_REG(ioc->ioc_hpa + IOC_FCLASS) & 0xFFUL;
+	ioc->dma_mask = 0xFFFFFFFFFFFFFFFFUL;	/* conservative */
+
+	for (info = ioc_iommu_info; info < ioc_iommu_info + ARRAY_SIZE(ioc_iommu_info); info++) {
+		if (ioc->func_id == info->func_id) {
+			ioc->name = info->name;
+			if (info->init)
+				(info->init)(ioc);
+		}
+	}
+
+	iovp_size = (1 << iovp_shift);
+	iovp_mask = ~(iovp_size - 1);
+
+	DBG_INIT("%s: PAGE_SIZE %ldK, iovp_size %ldK\n", __func__,
+		PAGE_SIZE >> 10, iovp_size >> 10);
+
+	if (!ioc->name) {
+		ioc->name = kmalloc(24, GFP_KERNEL);
+		if (ioc->name)
+			sprintf((char *) ioc->name, "Unknown (%04x:%04x)",
+				ioc->func_id & 0xFFFF, (ioc->func_id >> 16) & 0xFFFF);
+		else
+			ioc->name = "Unknown";
+	}
+
+	ioc_iova_init(ioc);
+	ioc_resource_init(ioc);
+	ioc_sac_init(ioc);
+
+	if ((long) ~iovp_mask > (long) ia64_max_iommu_merge_mask)
+		ia64_max_iommu_merge_mask = ~iovp_mask;
+
+	printk(KERN_INFO PFX
+		"%s %d.%d HPA 0x%lx IOVA space %dMb at 0x%lx\n",
+		ioc->name, (ioc->rev >> 4) & 0xF, ioc->rev & 0xF,
+		hpa, ioc->iov_size >> 20, ioc->ibase);
+
+	return ioc;
+}
+
+
+
+/**************************************************************************
+**
+**   SBA initialization code (HW and SW)
+**
+**   o identify SBA chip itself
+**   o FIXME: initialize DMA hints for reasonable defaults
+**
+**************************************************************************/
+
+#ifdef CONFIG_PROC_FS
+static void *
+ioc_start(struct seq_file *s, loff_t *pos)
+{
+	struct ioc *ioc;
+	loff_t n = *pos;
+
+	for (ioc = ioc_list; ioc; ioc = ioc->next)
+		if (!n--)
+			return ioc;
+
+	return NULL;
+}
+
+static void *
+ioc_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	struct ioc *ioc = v;
+
+	++*pos;
+	return ioc->next;
+}
+
+static void
+ioc_stop(struct seq_file *s, void *v)
+{
+}
+
+static int
+ioc_show(struct seq_file *s, void *v)
+{
+	struct ioc *ioc = v;
+	unsigned long *res_ptr = (unsigned long *)ioc->res_map;
+	int i, used = 0;
+
+	seq_printf(s, "Hewlett Packard %s IOC rev %d.%d\n",
+		ioc->name, ((ioc->rev >> 4) & 0xF), (ioc->rev & 0xF));
+#ifdef CONFIG_NUMA
+	if (ioc->node != MAX_NUMNODES)
+		seq_printf(s, "NUMA node       : %d\n", ioc->node);
+#endif
+	seq_printf(s, "IOVA size       : %ld MB\n", ((ioc->pdir_size >> 3) * iovp_size)/(1024*1024));
+	seq_printf(s, "IOVA page size  : %ld kb\n", iovp_size/1024);
+
+	for (i = 0; i < (ioc->res_size / sizeof(unsigned long)); ++i, ++res_ptr)
+		used += hweight64(*res_ptr);
+
+	seq_printf(s, "PDIR size       : %d entries\n", ioc->pdir_size >> 3);
+	seq_printf(s, "PDIR used       : %d entries\n", used);
+
+#ifdef PDIR_SEARCH_TIMING
+	{
+		unsigned long i = 0, avg = 0, min, max;
+		min = max = ioc->avg_search[0];
+		for (i = 0; i < SBA_SEARCH_SAMPLE; i++) {
+			avg += ioc->avg_search[i];
+			if (ioc->avg_search[i] > max) max = ioc->avg_search[i];
+			if (ioc->avg_search[i] < min) min = ioc->avg_search[i];
+		}
+		avg /= SBA_SEARCH_SAMPLE;
+		seq_printf(s, "Bitmap search   : %ld/%ld/%ld (min/avg/max CPU Cycles/IOVA page)\n",
+		           min, avg, max);
+	}
+#endif
+#ifndef ALLOW_IOV_BYPASS
+	 seq_printf(s, "IOVA bypass disabled\n");
+#endif
+	return 0;
+}
+
+static const struct seq_operations ioc_seq_ops = {
+	.start = ioc_start,
+	.next  = ioc_next,
+	.stop  = ioc_stop,
+	.show  = ioc_show
+};
+
+static int
+ioc_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &ioc_seq_ops);
+}
+
+static const struct file_operations ioc_fops = {
+	.open    = ioc_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release
+};
+
+static void __init
+ioc_proc_init(void)
+{
+	struct proc_dir_entry *dir;
+
+	dir = proc_mkdir("bus/mckinley", NULL);
+	if (!dir)
+		return;
+
+	proc_create(ioc_list->name, 0, dir, &ioc_fops);
+}
+#endif
+
+static void
+sba_connect_bus(struct pci_bus *bus)
+{
+	acpi_handle handle, parent;
+	acpi_status status;
+	struct ioc *ioc;
+
+	if (!PCI_CONTROLLER(bus))
+		panic(PFX "no sysdata on bus %d!\n", bus->number);
+
+	if (PCI_CONTROLLER(bus)->iommu)
+		return;
+
+	handle = PCI_CONTROLLER(bus)->acpi_handle;
+	if (!handle)
+		return;
+
+	/*
+	 * The IOC scope encloses PCI root bridges in the ACPI
+	 * namespace, so work our way out until we find an IOC we
+	 * claimed previously.
+	 */
+	do {
+		for (ioc = ioc_list; ioc; ioc = ioc->next)
+			if (ioc->handle == handle) {
+				PCI_CONTROLLER(bus)->iommu = ioc;
+				return;
+			}
+
+		status = acpi_get_parent(handle, &parent);
+		handle = parent;
+	} while (ACPI_SUCCESS(status));
+
+	printk(KERN_WARNING "No IOC for PCI Bus %04x:%02x in ACPI\n", pci_domain_nr(bus), bus->number);
+}
+
+#ifdef CONFIG_NUMA
+static void __init
+sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle)
+{
+	unsigned int node;
+	int pxm;
+
+	ioc->node = MAX_NUMNODES;
+
+	pxm = acpi_get_pxm(handle);
+
+	if (pxm < 0)
+		return;
+
+	node = pxm_to_node(pxm);
+
+	if (node >= MAX_NUMNODES || !node_online(node))
+		return;
+
+	ioc->node = node;
+	return;
+}
+#else
+#define sba_map_ioc_to_node(ioc, handle)
+#endif
+
+static int __init
+acpi_sba_ioc_add(struct acpi_device *device)
+{
+	struct ioc *ioc;
+	acpi_status status;
+	u64 hpa, length;
+	struct acpi_device_info *adi;
+
+	status = hp_acpi_csr_space(device->handle, &hpa, &length);
+	if (ACPI_FAILURE(status))
+		return 1;
+
+	status = acpi_get_object_info(device->handle, &adi);
+	if (ACPI_FAILURE(status))
+		return 1;
+
+	/*
+	 * For HWP0001, only SBA appears in ACPI namespace.  It encloses the PCI
+	 * root bridges, and its CSR space includes the IOC function.
+	 */
+	if (strncmp("HWP0001", adi->hardware_id.string, 7) == 0) {
+		hpa += ZX1_IOC_OFFSET;
+		/* zx1 based systems default to kernel page size iommu pages */
+		if (!iovp_shift)
+			iovp_shift = min(PAGE_SHIFT, 16);
+	}
+	kfree(adi);
+
+	/*
+	 * default anything not caught above or specified on cmdline to 4k
+	 * iommu page size
+	 */
+	if (!iovp_shift)
+		iovp_shift = 12;
+
+	ioc = ioc_init(hpa, device->handle);
+	if (!ioc)
+		return 1;
+
+	/* setup NUMA node association */
+	sba_map_ioc_to_node(ioc, device->handle);
+	return 0;
+}
+
+static const struct acpi_device_id hp_ioc_iommu_device_ids[] = {
+	{"HWP0001", 0},
+	{"HWP0004", 0},
+	{"", 0},
+};
+static struct acpi_driver acpi_sba_ioc_driver = {
+	.name		= "IOC IOMMU Driver",
+	.ids		= hp_ioc_iommu_device_ids,
+	.ops		= {
+		.add	= acpi_sba_ioc_add,
+	},
+};
+
+extern struct dma_map_ops swiotlb_dma_ops;
+
+static int __init
+sba_init(void)
+{
+	if (!ia64_platform_is("hpzx1") && !ia64_platform_is("hpzx1_swiotlb"))
+		return 0;
+
+#if defined(CONFIG_IA64_GENERIC)
+	/* If we are booting a kdump kernel, the sba_iommu will
+	 * cause devices that were not shutdown properly to MCA
+	 * as soon as they are turned back on.  Our only option for
+	 * a successful kdump kernel boot is to use the swiotlb.
+	 */
+	if (is_kdump_kernel()) {
+		dma_ops = &swiotlb_dma_ops;
+		if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0)
+			panic("Unable to initialize software I/O TLB:"
+				  " Try machvec=dig boot option");
+		machvec_init("dig");
+		return 0;
+	}
+#endif
+
+	acpi_bus_register_driver(&acpi_sba_ioc_driver);
+	if (!ioc_list) {
+#ifdef CONFIG_IA64_GENERIC
+		/*
+		 * If we didn't find something sba_iommu can claim, we
+		 * need to setup the swiotlb and switch to the dig machvec.
+		 */
+		dma_ops = &swiotlb_dma_ops;
+		if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0)
+			panic("Unable to find SBA IOMMU or initialize "
+			      "software I/O TLB: Try machvec=dig boot option");
+		machvec_init("dig");
+#else
+		panic("Unable to find SBA IOMMU: Try a generic or DIG kernel");
+#endif
+		return 0;
+	}
+
+#if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_HP_ZX1_SWIOTLB)
+	/*
+	 * hpzx1_swiotlb needs to have a fairly small swiotlb bounce
+	 * buffer setup to support devices with smaller DMA masks than
+	 * sba_iommu can handle.
+	 */
+	if (ia64_platform_is("hpzx1_swiotlb")) {
+		extern void hwsw_init(void);
+
+		hwsw_init();
+	}
+#endif
+
+#ifdef CONFIG_PCI
+	{
+		struct pci_bus *b = NULL;
+		while ((b = pci_find_next_bus(b)) != NULL)
+			sba_connect_bus(b);
+	}
+#endif
+
+#ifdef CONFIG_PROC_FS
+	ioc_proc_init();
+#endif
+	return 0;
+}
+
+subsys_initcall(sba_init); /* must be initialized after ACPI etc., but before any drivers... */
+
+static int __init
+nosbagart(char *str)
+{
+	reserve_sba_gart = 0;
+	return 1;
+}
+
+static int sba_dma_supported (struct device *dev, u64 mask)
+{
+	/* make sure it's at least 32bit capable */
+	return ((mask & 0xFFFFFFFFUL) == 0xFFFFFFFFUL);
+}
+
+static int sba_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return 0;
+}
+
+__setup("nosbagart", nosbagart);
+
+static int __init
+sba_page_override(char *str)
+{
+	unsigned long page_size;
+
+	page_size = memparse(str, &str);
+	switch (page_size) {
+		case 4096:
+		case 8192:
+		case 16384:
+		case 65536:
+			iovp_shift = ffs(page_size) - 1;
+			break;
+		default:
+			printk("%s: unknown/unsupported iommu page size %ld\n",
+			       __func__, page_size);
+	}
+
+	return 1;
+}
+
+__setup("sbapagesize=",sba_page_override);
+
+struct dma_map_ops sba_dma_ops = {
+	.alloc_coherent		= sba_alloc_coherent,
+	.free_coherent		= sba_free_coherent,
+	.map_page		= sba_map_page,
+	.unmap_page		= sba_unmap_page,
+	.map_sg			= sba_map_sg_attrs,
+	.unmap_sg		= sba_unmap_sg_attrs,
+	.sync_single_for_cpu	= machvec_dma_sync_single,
+	.sync_sg_for_cpu	= machvec_dma_sync_sg,
+	.sync_single_for_device	= machvec_dma_sync_single,
+	.sync_sg_for_device	= machvec_dma_sync_sg,
+	.dma_supported		= sba_dma_supported,
+	.mapping_error		= sba_dma_mapping_error,
+};
+
+void sba_dma_init(void)
+{
+	dma_ops = &sba_dma_ops;
+}
diff --git a/arch/ia64/hp/sim/Kconfig b/arch/ia64/hp/sim/Kconfig
new file mode 100644
index 00000000..8d513a8c
--- /dev/null
+++ b/arch/ia64/hp/sim/Kconfig
@@ -0,0 +1,21 @@
+
+menu "HP Simulator drivers"
+	depends on IA64_HP_SIM || IA64_GENERIC
+
+config HP_SIMETH
+	bool "Simulated Ethernet "
+	depends on NET
+
+config HP_SIMSERIAL
+	bool "Simulated serial driver support"
+
+config HP_SIMSERIAL_CONSOLE
+	bool "Console for HP simulator"
+	depends on HP_SIMSERIAL
+
+config HP_SIMSCSI
+	bool "Simulated SCSI disk"
+	depends on SCSI=y
+
+endmenu
+
diff --git a/arch/ia64/hp/sim/Makefile b/arch/ia64/hp/sim/Makefile
new file mode 100644
index 00000000..d10da479
--- /dev/null
+++ b/arch/ia64/hp/sim/Makefile
@@ -0,0 +1,16 @@
+#
+# ia64/platform/hp/sim/Makefile
+#
+# Copyright (C) 2002 Hewlett-Packard Co.
+#	David Mosberger-Tang <davidm@hpl.hp.com>
+# Copyright (C) 1999 Silicon Graphics, Inc.
+# Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com)
+#
+
+obj-y := hpsim_irq.o hpsim_setup.o hpsim.o
+obj-$(CONFIG_IA64_GENERIC) += hpsim_machvec.o
+
+obj-$(CONFIG_HP_SIMETH)	+= simeth.o
+obj-$(CONFIG_HP_SIMSERIAL) += simserial.o
+obj-$(CONFIG_HP_SIMSERIAL_CONSOLE) += hpsim_console.o
+obj-$(CONFIG_HP_SIMSCSI) += simscsi.o
diff --git a/arch/ia64/hp/sim/boot/Makefile b/arch/ia64/hp/sim/boot/Makefile
new file mode 100644
index 00000000..2e805e0c
--- /dev/null
+++ b/arch/ia64/hp/sim/boot/Makefile
@@ -0,0 +1,37 @@
+#
+# ia64/boot/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1998, 2003 by David Mosberger-Tang <davidm@hpl.hp.com>
+#
+
+targets-$(CONFIG_IA64_HP_SIM)  += bootloader
+targets := vmlinux.bin vmlinux.gz $(targets-y)
+
+quiet_cmd_cptotop = LN      $@
+      cmd_cptotop = ln -f $< $@
+
+vmlinux.gz: $(obj)/vmlinux.gz $(addprefix $(obj)/,$(targets-y))
+	$(call cmd,cptotop)
+	@echo '  Kernel: $@ is ready'
+
+boot: bootloader
+
+bootloader: $(obj)/bootloader
+	$(call cmd,cptotop)
+
+$(obj)/vmlinux.gz: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,gzip)
+
+$(obj)/vmlinux.bin: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+
+LDFLAGS_bootloader = -static -T
+
+$(obj)/bootloader: $(src)/bootloader.lds $(obj)/bootloader.o $(obj)/boot_head.o $(obj)/fw-emu.o \
+                   lib/lib.a arch/ia64/lib/built-in.o arch/ia64/lib/lib.a FORCE
+	$(call if_changed,ld)
diff --git a/arch/ia64/hp/sim/boot/boot_head.S b/arch/ia64/hp/sim/boot/boot_head.S
new file mode 100644
index 00000000..88085654
--- /dev/null
+++ b/arch/ia64/hp/sim/boot/boot_head.S
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/pal.h>
+
+	.bss
+	.align 16
+stack_mem:
+	.skip 16834
+
+	.text
+
+/* This needs to be defined because lib/string.c:strlcat() calls it in case of error... */
+GLOBAL_ENTRY(printk)
+	break 0
+END(printk)
+
+GLOBAL_ENTRY(_start)
+	.prologue
+	.save rp, r0
+	.body
+	movl gp = __gp
+	movl sp = stack_mem+16384-16
+	bsw.1
+	br.call.sptk.many rp=start_bootloader
+0:	nop 0		  /* dummy nop to make unwinding work */
+END(_start)
+
+/*
+ * Set a break point on this function so that symbols are available to set breakpoints in
+ * the kernel being debugged.
+ */
+GLOBAL_ENTRY(debug_break)
+	br.ret.sptk.many b0
+END(debug_break)
+
+GLOBAL_ENTRY(ssc)
+	.regstk 5,0,0,0
+	mov r15=in4
+	break 0x80001
+	br.ret.sptk.many b0
+END(ssc)
+
+GLOBAL_ENTRY(jmp_to_kernel)
+	.regstk 2,0,0,0
+	mov r28=in0
+	mov b7=in1
+	br.sptk.few b7
+END(jmp_to_kernel)
+
+/*
+ * r28 contains the index of the PAL function
+ * r29--31 the args
+ * Return values in ret0--3 (r8--11)
+ */
+GLOBAL_ENTRY(pal_emulator_static)
+	mov r8=-1
+	mov r9=256
+	;;
+	cmp.gtu p6,p7=r9,r28		/* r28 <= 255? */
+(p6)	br.cond.sptk.few static
+	;;
+	mov r9=512
+	;;
+	cmp.gtu p6,p7=r9,r28
+(p6)	br.cond.sptk.few stacked
+	;;
+static:	cmp.eq p6,p7=PAL_PTCE_INFO,r28
+(p7)	br.cond.sptk.few 1f
+	;;
+	mov r8=0			/* status = 0 */
+	movl r9=0x100000000		/* tc.base */
+	movl r10=0x0000000200000003	/* count[0], count[1] */
+	movl r11=0x1000000000002000	/* stride[0], stride[1] */
+	br.cond.sptk.few rp
+1:	cmp.eq p6,p7=PAL_FREQ_RATIOS,r28
+(p7)	br.cond.sptk.few 1f
+	mov r8=0			/* status = 0 */
+	movl r9 =0x100000064		/* proc_ratio (1/100) */
+	movl r10=0x100000100		/* bus_ratio<<32 (1/256) */
+	movl r11=0x100000064		/* itc_ratio<<32 (1/100) */
+	;;
+1:	cmp.eq p6,p7=PAL_RSE_INFO,r28
+(p7)	br.cond.sptk.few 1f
+	mov r8=0			/* status = 0 */
+	mov r9=96			/* num phys stacked */
+	mov r10=0			/* hints */
+	mov r11=0
+	br.cond.sptk.few rp
+1:	cmp.eq p6,p7=PAL_CACHE_FLUSH,r28		/* PAL_CACHE_FLUSH */
+(p7)	br.cond.sptk.few 1f
+	mov r9=ar.lc
+	movl r8=524288			/* flush 512k million cache lines (16MB) */
+	;;
+	mov ar.lc=r8
+	movl r8=0xe000000000000000
+	;;
+.loop:	fc r8
+	add r8=32,r8
+	br.cloop.sptk.few .loop
+	sync.i
+	;;
+	srlz.i
+	;;
+	mov ar.lc=r9
+	mov r8=r0
+	;;
+1:	cmp.eq p6,p7=PAL_PERF_MON_INFO,r28
+(p7)	br.cond.sptk.few 1f
+	mov r8=0			/* status = 0 */
+	movl r9 =0x08122f04		/* generic=4 width=47 retired=8 cycles=18 */
+	mov r10=0			/* reserved */
+	mov r11=0			/* reserved */
+	mov r16=0xffff			/* implemented PMC */
+	mov r17=0x3ffff			/* implemented PMD */
+	add r18=8,r29			/* second index */
+	;;
+	st8 [r29]=r16,16		/* store implemented PMC */
+	st8 [r18]=r0,16			/* clear remaining bits  */
+	;;
+	st8 [r29]=r0,16			/* clear remaining bits  */
+	st8 [r18]=r0,16			/* clear remaining bits  */
+	;;
+	st8 [r29]=r17,16		/* store implemented PMD */
+	st8 [r18]=r0,16			/* clear remaining bits  */
+	mov r16=0xf0			/* cycles count capable PMC */
+	;;
+	st8 [r29]=r0,16			/* clear remaining bits  */
+	st8 [r18]=r0,16			/* clear remaining bits  */
+	mov r17=0xf0			/* retired bundles capable PMC */
+	;;
+	st8 [r29]=r16,16		/* store cycles capable */
+	st8 [r18]=r0,16			/* clear remaining bits  */
+	;;
+	st8 [r29]=r0,16			/* clear remaining bits  */
+	st8 [r18]=r0,16			/* clear remaining bits  */
+	;;
+	st8 [r29]=r17,16		/* store retired bundle capable */
+	st8 [r18]=r0,16			/* clear remaining bits  */
+	;;
+	st8 [r29]=r0,16			/* clear remaining bits  */
+	st8 [r18]=r0,16			/* clear remaining bits  */
+	;;
+1:	cmp.eq p6,p7=PAL_VM_SUMMARY,r28
+(p7)	br.cond.sptk.few 1f
+	mov	r8=0			/* status = 0  */
+	movl	r9=0x2044040020F1865	/* num_tc_levels=2, num_unique_tcs=4 */
+					/* max_itr_entry=64, max_dtr_entry=64 */
+					/* hash_tag_id=2, max_pkr=15 */
+					/* key_size=24, phys_add_size=50, vw=1 */
+	movl	r10=0x183C		/* rid_size=24, impl_va_msb=60 */
+	;;
+1:	cmp.eq p6,p7=PAL_MEM_ATTRIB,r28
+(p7)	br.cond.sptk.few 1f
+	mov	r8=0			/* status = 0 */
+	mov	r9=0x80|0x01		/* NatPage|WB */
+	;;
+1:	br.cond.sptk.few rp
+stacked:
+	br.ret.sptk.few rp
+END(pal_emulator_static)
diff --git a/arch/ia64/hp/sim/boot/bootloader.c b/arch/ia64/hp/sim/boot/bootloader.c
new file mode 100644
index 00000000..c5e9baaf
--- /dev/null
+++ b/arch/ia64/hp/sim/boot/bootloader.c
@@ -0,0 +1,175 @@
+/*
+ * arch/ia64/hp/sim/boot/bootloader.c
+ *
+ * Loads an ELF kernel.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 01/07/99 S.Eranian modified to pass command line arguments to kernel
+ */
+struct task_struct;	/* forward declaration for elf.h */
+
+#include <linux/elf.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+
+#include <asm/elf.h>
+#include <asm/intrinsics.h>
+#include <asm/pal.h>
+#include <asm/pgtable.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+
+#include "ssc.h"
+
+struct disk_req {
+	unsigned long addr;
+	unsigned len;
+};
+
+struct disk_stat {
+	int fd;
+	unsigned count;
+};
+
+extern void jmp_to_kernel (unsigned long bp, unsigned long e_entry);
+extern struct ia64_boot_param *sys_fw_init (const char *args, int arglen);
+extern void debug_break (void);
+
+static void
+cons_write (const char *buf)
+{
+	unsigned long ch;
+
+	while ((ch = *buf++) != '\0') {
+		ssc(ch, 0, 0, 0, SSC_PUTCHAR);
+		if (ch == '\n')
+		  ssc('\r', 0, 0, 0, SSC_PUTCHAR);
+	}
+}
+
+#define MAX_ARGS 32
+
+void
+start_bootloader (void)
+{
+	static char mem[4096];
+	static char buffer[1024];
+	unsigned long off;
+	int fd, i;
+	struct disk_req req;
+	struct disk_stat stat;
+	struct elfhdr *elf;
+	struct elf_phdr *elf_phdr;	/* program header */
+	unsigned long e_entry, e_phoff, e_phnum;
+	register struct ia64_boot_param *bp;
+	char *kpath, *args;
+	long arglen = 0;
+
+	ssc(0, 0, 0, 0, SSC_CONSOLE_INIT);
+
+	/*
+	 * S.Eranian: extract the commandline argument from the simulator
+	 *
+	 * The expected format is as follows:
+         *
+	 *	kernelname args...
+	 *
+	 * Both are optional but you can't have the second one without the first.
+	 */
+	arglen = ssc((long) buffer, 0, 0, 0, SSC_GET_ARGS);
+
+	kpath = "vmlinux";
+	args = buffer;
+	if (arglen > 0) {
+		kpath = buffer;
+		while (*args != ' ' && *args != '\0')
+			++args, --arglen;
+		if (*args == ' ')
+			*args++ = '\0', --arglen;
+	}
+
+	if (arglen <= 0) {
+		args = "";
+		arglen = 1;
+	}
+
+	fd = ssc((long) kpath, 1, 0, 0, SSC_OPEN);
+
+	if (fd < 0) {
+		cons_write(kpath);
+		cons_write(": file not found, reboot now\n");
+		for(;;);
+	}
+	stat.fd = fd;
+	off = 0;
+
+	req.len = sizeof(mem);
+	req.addr = (long) mem;
+	ssc(fd, 1, (long) &req, off, SSC_READ);
+	ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION);
+
+	elf = (struct elfhdr *) mem;
+	if (elf->e_ident[0] == 0x7f && strncmp(elf->e_ident + 1, "ELF", 3) != 0) {
+		cons_write("not an ELF file\n");
+		return;
+	}
+	if (elf->e_type != ET_EXEC) {
+		cons_write("not an ELF executable\n");
+		return;
+	}
+	if (!elf_check_arch(elf)) {
+		cons_write("kernel not for this processor\n");
+		return;
+	}
+
+	e_entry = elf->e_entry;
+	e_phnum = elf->e_phnum;
+	e_phoff = elf->e_phoff;
+
+	cons_write("loading ");
+	cons_write(kpath);
+	cons_write("...\n");
+
+	for (i = 0; i < e_phnum; ++i) {
+		req.len = sizeof(*elf_phdr);
+		req.addr = (long) mem;
+		ssc(fd, 1, (long) &req, e_phoff, SSC_READ);
+		ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION);
+		if (stat.count != sizeof(*elf_phdr)) {
+			cons_write("failed to read phdr\n");
+			return;
+		}
+		e_phoff += sizeof(*elf_phdr);
+
+		elf_phdr = (struct elf_phdr *) mem;
+
+		if (elf_phdr->p_type != PT_LOAD)
+			continue;
+
+		req.len = elf_phdr->p_filesz;
+		req.addr = __pa(elf_phdr->p_paddr);
+		ssc(fd, 1, (long) &req, elf_phdr->p_offset, SSC_READ);
+		ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION);
+		memset((char *)__pa(elf_phdr->p_paddr) + elf_phdr->p_filesz, 0,
+		       elf_phdr->p_memsz - elf_phdr->p_filesz);
+	}
+	ssc(fd, 0, 0, 0, SSC_CLOSE);
+
+	cons_write("starting kernel...\n");
+
+	/* fake an I/O base address: */
+	ia64_setreg(_IA64_REG_AR_KR0, 0xffffc000000UL);
+
+	bp = sys_fw_init(args, arglen);
+
+	ssc(0, (long) kpath, 0, 0, SSC_LOAD_SYMBOLS);
+
+	debug_break();
+	jmp_to_kernel((unsigned long) bp, e_entry);
+
+	cons_write("kernel returned!\n");
+	ssc(-1, 0, 0, 0, SSC_EXIT);
+}
diff --git a/arch/ia64/hp/sim/boot/bootloader.lds b/arch/ia64/hp/sim/boot/bootloader.lds
new file mode 100644
index 00000000..3977f25a
--- /dev/null
+++ b/arch/ia64/hp/sim/boot/bootloader.lds
@@ -0,0 +1,66 @@
+OUTPUT_FORMAT("elf64-ia64-little")
+OUTPUT_ARCH(ia64)
+ENTRY(_start)
+SECTIONS
+{
+  /* Read-only sections, merged into text segment: */
+  . = 0x100000;
+
+  _text = .;
+  .text : { *(__ivt_section) *(.text) }
+  _etext = .;
+
+  /* Global data */
+  _data = .;
+  .rodata : { *(.rodata) *(.rodata.*) }
+  .data    : { *(.data) *(.gnu.linkonce.d*) CONSTRUCTORS }
+  __gp = ALIGN (8) + 0x200000;
+  .got           : { *(.got.plt) *(.got) }
+  /* We want the small data sections together, so single-instruction offsets
+     can access them all, and initialized data all before uninitialized, so
+     we can shorten the on-disk segment size.  */
+  .sdata     : { *(.sdata) }
+  _edata  =  .;
+
+  __bss_start = .;
+  .sbss      : { *(.sbss) *(.scommon) }
+  .bss       : { *(.bss) *(COMMON) }
+  . = ALIGN(64 / 8);
+  __bss_stop = .;
+  _end = . ;
+
+  /* Stabs debugging sections.  */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment 0 : { *(.comment) }
+  /* DWARF debug sections.
+     Symbols in the DWARF debugging sections are relative to the beginning
+     of the section so we begin them at 0.  */
+  /* DWARF 1 */
+  .debug          0 : { *(.debug) }
+  .line           0 : { *(.line) }
+  /* GNU DWARF 1 extensions */
+  .debug_srcinfo  0 : { *(.debug_srcinfo) }
+  .debug_sfnames  0 : { *(.debug_sfnames) }
+  /* DWARF 1.1 and DWARF 2 */
+  .debug_aranges  0 : { *(.debug_aranges) }
+  .debug_pubnames 0 : { *(.debug_pubnames) }
+  /* DWARF 2 */
+  .debug_info     0 : { *(.debug_info) }
+  .debug_abbrev   0 : { *(.debug_abbrev) }
+  .debug_line     0 : { *(.debug_line) }
+  .debug_frame    0 : { *(.debug_frame) }
+  .debug_str      0 : { *(.debug_str) }
+  .debug_loc      0 : { *(.debug_loc) }
+  .debug_macinfo  0 : { *(.debug_macinfo) }
+  /* SGI/MIPS DWARF 2 extensions */
+  .debug_weaknames 0 : { *(.debug_weaknames) }
+  .debug_funcnames 0 : { *(.debug_funcnames) }
+  .debug_typenames 0 : { *(.debug_typenames) }
+  .debug_varnames  0 : { *(.debug_varnames) }
+  /* These must appear regardless of  .  */
+}
diff --git a/arch/ia64/hp/sim/boot/fw-emu.c b/arch/ia64/hp/sim/boot/fw-emu.c
new file mode 100644
index 00000000..bf6d9d8c
--- /dev/null
+++ b/arch/ia64/hp/sim/boot/fw-emu.c
@@ -0,0 +1,381 @@
+/*
+ * PAL & SAL emulation.
+ *
+ * Copyright (C) 1998-2001 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#ifdef CONFIG_PCI
+# include <linux/pci.h>
+#endif
+
+#include <linux/efi.h>
+#include <asm/io.h>
+#include <asm/pal.h>
+#include <asm/sal.h>
+
+#include "ssc.h"
+
+#define MB	(1024*1024UL)
+
+#define SIMPLE_MEMMAP	1
+
+#if SIMPLE_MEMMAP
+# define NUM_MEM_DESCS	4
+#else
+# define NUM_MEM_DESCS	16
+#endif
+
+static char fw_mem[(  sizeof(struct ia64_boot_param)
+		    + sizeof(efi_system_table_t)
+		    + sizeof(efi_runtime_services_t)
+		    + 1*sizeof(efi_config_table_t)
+		    + sizeof(struct ia64_sal_systab)
+		    + sizeof(struct ia64_sal_desc_entry_point)
+		    + NUM_MEM_DESCS*(sizeof(efi_memory_desc_t))
+		    + 1024)] __attribute__ ((aligned (8)));
+
+#define SECS_PER_HOUR   (60 * 60)
+#define SECS_PER_DAY    (SECS_PER_HOUR * 24)
+
+/* Compute the `struct tm' representation of *T,
+   offset OFFSET seconds east of UTC,
+   and store year, yday, mon, mday, wday, hour, min, sec into *TP.
+   Return nonzero if successful.  */
+int
+offtime (unsigned long t, efi_time_t *tp)
+{
+	const unsigned short int __mon_yday[2][13] =
+	{
+		/* Normal years.  */
+		{ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
+		/* Leap years.  */
+		{ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
+	};
+	long int days, rem, y;
+	const unsigned short int *ip;
+
+	days = t / SECS_PER_DAY;
+	rem = t % SECS_PER_DAY;
+	while (rem < 0) {
+		rem += SECS_PER_DAY;
+		--days;
+	}
+	while (rem >= SECS_PER_DAY) {
+		rem -= SECS_PER_DAY;
+		++days;
+	}
+	tp->hour = rem / SECS_PER_HOUR;
+	rem %= SECS_PER_HOUR;
+	tp->minute = rem / 60;
+	tp->second = rem % 60;
+	/* January 1, 1970 was a Thursday.  */
+	y = 1970;
+
+#	define DIV(a, b) ((a) / (b) - ((a) % (b) < 0))
+#	define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400))
+#	define __isleap(year) \
+	  ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
+
+	while (days < 0 || days >= (__isleap (y) ? 366 : 365)) {
+		/* Guess a corrected year, assuming 365 days per year.  */
+		long int yg = y + days / 365 - (days % 365 < 0);
+
+		/* Adjust DAYS and Y to match the guessed year.  */
+		days -= ((yg - y) * 365 + LEAPS_THRU_END_OF (yg - 1)
+			 - LEAPS_THRU_END_OF (y - 1));
+		y = yg;
+	}
+	tp->year = y;
+	ip = __mon_yday[__isleap(y)];
+	for (y = 11; days < (long int) ip[y]; --y)
+		continue;
+	days -= ip[y];
+	tp->month = y + 1;
+	tp->day = days + 1;
+	return 1;
+}
+
+extern void pal_emulator_static (void);
+
+/* Macro to emulate SAL call using legacy IN and OUT calls to CF8, CFC etc.. */
+
+#define BUILD_CMD(addr)		((0x80000000 | (addr)) & ~3)
+
+#define REG_OFFSET(addr)	(0x00000000000000FF & (addr))
+#define DEVICE_FUNCTION(addr)	(0x000000000000FF00 & (addr))
+#define BUS_NUMBER(addr)	(0x0000000000FF0000 & (addr))
+
+static efi_status_t
+fw_efi_get_time (efi_time_t *tm, efi_time_cap_t *tc)
+{
+#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC)
+	struct {
+		int tv_sec;	/* must be 32bits to work */
+		int tv_usec;
+	} tv32bits;
+
+	ssc((unsigned long) &tv32bits, 0, 0, 0, SSC_GET_TOD);
+
+	memset(tm, 0, sizeof(*tm));
+	offtime(tv32bits.tv_sec, tm);
+
+	if (tc)
+		memset(tc, 0, sizeof(*tc));
+#else
+#	error Not implemented yet...
+#endif
+	return EFI_SUCCESS;
+}
+
+static void
+efi_reset_system (int reset_type, efi_status_t status, unsigned long data_size, efi_char16_t *data)
+{
+#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC)
+	ssc(status, 0, 0, 0, SSC_EXIT);
+#else
+#	error Not implemented yet...
+#endif
+}
+
+static efi_status_t
+efi_unimplemented (void)
+{
+	return EFI_UNSUPPORTED;
+}
+
+static struct sal_ret_values
+sal_emulator (long index, unsigned long in1, unsigned long in2,
+	      unsigned long in3, unsigned long in4, unsigned long in5,
+	      unsigned long in6, unsigned long in7)
+{
+	long r9  = 0;
+	long r10 = 0;
+	long r11 = 0;
+	long status;
+
+	/*
+	 * Don't do a "switch" here since that gives us code that
+	 * isn't self-relocatable.
+	 */
+	status = 0;
+	if (index == SAL_FREQ_BASE) {
+		switch (in1) {
+		      case SAL_FREQ_BASE_PLATFORM:
+			r9 = 200000000;
+			break;
+
+		      case SAL_FREQ_BASE_INTERVAL_TIMER:
+			/*
+			 * Is this supposed to be the cr.itc frequency
+			 * or something platform specific?  The SAL
+			 * doc ain't exactly clear on this...
+			 */
+			r9 = 700000000;
+			break;
+
+		      case SAL_FREQ_BASE_REALTIME_CLOCK:
+			r9 = 1;
+			break;
+
+		      default:
+			status = -1;
+			break;
+		}
+	} else if (index == SAL_SET_VECTORS) {
+		;
+	} else if (index == SAL_GET_STATE_INFO) {
+		;
+	} else if (index == SAL_GET_STATE_INFO_SIZE) {
+		;
+	} else if (index == SAL_CLEAR_STATE_INFO) {
+		;
+	} else if (index == SAL_MC_RENDEZ) {
+		;
+	} else if (index == SAL_MC_SET_PARAMS) {
+		;
+	} else if (index == SAL_CACHE_FLUSH) {
+		;
+	} else if (index == SAL_CACHE_INIT) {
+		;
+#ifdef CONFIG_PCI
+	} else if (index == SAL_PCI_CONFIG_READ) {
+		/*
+		 * in1 contains the PCI configuration address and in2
+		 * the size of the read.  The value that is read is
+		 * returned via the general register r9.
+		 */
+                outl(BUILD_CMD(in1), 0xCF8);
+                if (in2 == 1)                           /* Reading byte  */
+                        r9 = inb(0xCFC + ((REG_OFFSET(in1) & 3)));
+                else if (in2 == 2)                      /* Reading word  */
+                        r9 = inw(0xCFC + ((REG_OFFSET(in1) & 2)));
+                else                                    /* Reading dword */
+                        r9 = inl(0xCFC);
+                status = PCIBIOS_SUCCESSFUL;
+	} else if (index == SAL_PCI_CONFIG_WRITE) {
+	      	/*
+		 * in1 contains the PCI configuration address, in2 the
+		 * size of the write, and in3 the actual value to be
+		 * written out.
+		 */
+                outl(BUILD_CMD(in1), 0xCF8);
+                if (in2 == 1)                           /* Writing byte  */
+                        outb(in3, 0xCFC + ((REG_OFFSET(in1) & 3)));
+                else if (in2 == 2)                      /* Writing word  */
+                        outw(in3, 0xCFC + ((REG_OFFSET(in1) & 2)));
+                else                                    /* Writing dword */
+                        outl(in3, 0xCFC);
+                status = PCIBIOS_SUCCESSFUL;
+#endif /* CONFIG_PCI */
+	} else if (index == SAL_UPDATE_PAL) {
+		;
+	} else {
+		status = -1;
+	}
+	return ((struct sal_ret_values) {status, r9, r10, r11});
+}
+
+struct ia64_boot_param *
+sys_fw_init (const char *args, int arglen)
+{
+	efi_system_table_t *efi_systab;
+	efi_runtime_services_t *efi_runtime;
+	efi_config_table_t *efi_tables;
+	struct ia64_sal_systab *sal_systab;
+	efi_memory_desc_t *efi_memmap, *md;
+	unsigned long *pal_desc, *sal_desc;
+	struct ia64_sal_desc_entry_point *sal_ed;
+	struct ia64_boot_param *bp;
+	unsigned char checksum = 0;
+	char *cp, *cmd_line;
+	int i = 0;
+#	define MAKE_MD(typ, attr, start, end)		\
+	do {						\
+		md = efi_memmap + i++;			\
+		md->type = typ;				\
+		md->pad = 0;				\
+		md->phys_addr = start;			\
+		md->virt_addr = 0;			\
+		md->num_pages = (end - start) >> 12;	\
+		md->attribute = attr;			\
+	} while (0)
+
+	memset(fw_mem, 0, sizeof(fw_mem));
+
+	pal_desc = (unsigned long *) &pal_emulator_static;
+	sal_desc = (unsigned long *) &sal_emulator;
+
+	cp = fw_mem;
+	efi_systab  = (void *) cp; cp += sizeof(*efi_systab);
+	efi_runtime = (void *) cp; cp += sizeof(*efi_runtime);
+	efi_tables  = (void *) cp; cp += sizeof(*efi_tables);
+	sal_systab  = (void *) cp; cp += sizeof(*sal_systab);
+	sal_ed      = (void *) cp; cp += sizeof(*sal_ed);
+	efi_memmap  = (void *) cp; cp += NUM_MEM_DESCS*sizeof(*efi_memmap);
+	bp	    = (void *) cp; cp += sizeof(*bp);
+	cmd_line    = (void *) cp;
+
+	if (args) {
+		if (arglen >= 1024)
+			arglen = 1023;
+		memcpy(cmd_line, args, arglen);
+	} else {
+		arglen = 0;
+	}
+	cmd_line[arglen] = '\0';
+
+	memset(efi_systab, 0, sizeof(*efi_systab));
+	efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE;
+	efi_systab->hdr.revision  = ((1 << 16) | 00);
+	efi_systab->hdr.headersize = sizeof(efi_systab->hdr);
+	efi_systab->fw_vendor = __pa("H\0e\0w\0l\0e\0t\0t\0-\0P\0a\0c\0k\0a\0r\0d\0\0");
+	efi_systab->fw_revision = 1;
+	efi_systab->runtime = (void *) __pa(efi_runtime);
+	efi_systab->nr_tables = 1;
+	efi_systab->tables = __pa(efi_tables);
+
+	efi_runtime->hdr.signature = EFI_RUNTIME_SERVICES_SIGNATURE;
+	efi_runtime->hdr.revision = EFI_RUNTIME_SERVICES_REVISION;
+	efi_runtime->hdr.headersize = sizeof(efi_runtime->hdr);
+	efi_runtime->get_time = __pa(&fw_efi_get_time);
+	efi_runtime->set_time = __pa(&efi_unimplemented);
+	efi_runtime->get_wakeup_time = __pa(&efi_unimplemented);
+	efi_runtime->set_wakeup_time = __pa(&efi_unimplemented);
+	efi_runtime->set_virtual_address_map = __pa(&efi_unimplemented);
+	efi_runtime->get_variable = __pa(&efi_unimplemented);
+	efi_runtime->get_next_variable = __pa(&efi_unimplemented);
+	efi_runtime->set_variable = __pa(&efi_unimplemented);
+	efi_runtime->get_next_high_mono_count = __pa(&efi_unimplemented);
+	efi_runtime->reset_system = __pa(&efi_reset_system);
+
+	efi_tables->guid = SAL_SYSTEM_TABLE_GUID;
+	efi_tables->table = __pa(sal_systab);
+
+	/* fill in the SAL system table: */
+	memcpy(sal_systab->signature, "SST_", 4);
+	sal_systab->size = sizeof(*sal_systab);
+	sal_systab->sal_rev_minor = 1;
+	sal_systab->sal_rev_major = 0;
+	sal_systab->entry_count = 1;
+
+#ifdef CONFIG_IA64_GENERIC
+        strcpy(sal_systab->oem_id, "Generic");
+        strcpy(sal_systab->product_id, "IA-64 system");
+#endif
+
+#ifdef CONFIG_IA64_HP_SIM
+	strcpy(sal_systab->oem_id, "Hewlett-Packard");
+	strcpy(sal_systab->product_id, "HP-simulator");
+#endif
+
+	/* fill in an entry point: */
+	sal_ed->type = SAL_DESC_ENTRY_POINT;
+	sal_ed->pal_proc = __pa(pal_desc[0]);
+	sal_ed->sal_proc = __pa(sal_desc[0]);
+	sal_ed->gp = __pa(sal_desc[1]);
+
+	for (cp = (char *) sal_systab; cp < (char *) efi_memmap; ++cp)
+		checksum += *cp;
+
+	sal_systab->checksum = -checksum;
+
+#if SIMPLE_MEMMAP
+	/* simulate free memory at physical address zero */
+	MAKE_MD(EFI_BOOT_SERVICES_DATA,		EFI_MEMORY_WB,    0*MB,    1*MB);
+	MAKE_MD(EFI_PAL_CODE,			EFI_MEMORY_WB,    1*MB,    2*MB);
+	MAKE_MD(EFI_CONVENTIONAL_MEMORY,	EFI_MEMORY_WB,    2*MB,  130*MB);
+	MAKE_MD(EFI_CONVENTIONAL_MEMORY,	EFI_MEMORY_WB, 4096*MB, 4128*MB);
+#else
+	MAKE_MD( 4,		   0x9, 0x0000000000000000, 0x0000000000001000);
+	MAKE_MD( 7,		   0x9, 0x0000000000001000, 0x000000000008a000);
+	MAKE_MD( 4,		   0x9, 0x000000000008a000, 0x00000000000a0000);
+	MAKE_MD( 5, 0x8000000000000009, 0x00000000000c0000, 0x0000000000100000);
+	MAKE_MD( 7,		   0x9, 0x0000000000100000, 0x0000000004400000);
+	MAKE_MD( 2,		   0x9, 0x0000000004400000, 0x0000000004be5000);
+	MAKE_MD( 7,		   0x9, 0x0000000004be5000, 0x000000007f77e000);
+	MAKE_MD( 6, 0x8000000000000009, 0x000000007f77e000, 0x000000007fb94000);
+	MAKE_MD( 6, 0x8000000000000009, 0x000000007fb94000, 0x000000007fb95000);
+	MAKE_MD( 6, 0x8000000000000009, 0x000000007fb95000, 0x000000007fc00000);
+	MAKE_MD(13, 0x8000000000000009, 0x000000007fc00000, 0x000000007fc3a000);
+	MAKE_MD( 7,		   0x9, 0x000000007fc3a000, 0x000000007fea0000);
+	MAKE_MD( 5, 0x8000000000000009, 0x000000007fea0000, 0x000000007fea8000);
+	MAKE_MD( 7,		   0x9, 0x000000007fea8000, 0x000000007feab000);
+	MAKE_MD( 5, 0x8000000000000009, 0x000000007feab000, 0x000000007ffff000);
+	MAKE_MD( 7,		   0x9, 0x00000000ff400000, 0x0000000104000000);
+#endif
+
+	bp->efi_systab = __pa(&fw_mem);
+	bp->efi_memmap = __pa(efi_memmap);
+	bp->efi_memmap_size = NUM_MEM_DESCS*sizeof(efi_memory_desc_t);
+	bp->efi_memdesc_size = sizeof(efi_memory_desc_t);
+	bp->efi_memdesc_version = 1;
+	bp->command_line = __pa(cmd_line);
+	bp->console_info.num_cols = 80;
+	bp->console_info.num_rows = 25;
+	bp->console_info.orig_x = 0;
+	bp->console_info.orig_y = 24;
+	bp->fpswa = 0;
+
+	return bp;
+}
diff --git a/arch/ia64/hp/sim/boot/ssc.h b/arch/ia64/hp/sim/boot/ssc.h
new file mode 100644
index 00000000..3b94c03e
--- /dev/null
+++ b/arch/ia64/hp/sim/boot/ssc.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ */
+#ifndef ssc_h
+#define ssc_h
+
+/* Simulator system calls: */
+
+#define SSC_CONSOLE_INIT		20
+#define SSC_GETCHAR			21
+#define SSC_PUTCHAR			31
+#define SSC_OPEN			50
+#define SSC_CLOSE			51
+#define SSC_READ			52
+#define SSC_WRITE			53
+#define SSC_GET_COMPLETION		54
+#define SSC_WAIT_COMPLETION		55
+#define SSC_CONNECT_INTERRUPT		58
+#define SSC_GENERATE_INTERRUPT		59
+#define SSC_SET_PERIODIC_INTERRUPT	60
+#define SSC_GET_RTC			65
+#define SSC_EXIT			66
+#define SSC_LOAD_SYMBOLS		69
+#define SSC_GET_TOD			74
+
+#define SSC_GET_ARGS			75
+
+/*
+ * Simulator system call.
+ */
+extern long ssc (long arg0, long arg1, long arg2, long arg3, int nr);
+
+#endif /* ssc_h */
diff --git a/arch/ia64/hp/sim/hpsim.S b/arch/ia64/hp/sim/hpsim.S
new file mode 100644
index 00000000..ff16e8a8
--- /dev/null
+++ b/arch/ia64/hp/sim/hpsim.S
@@ -0,0 +1,10 @@
+#include <asm/asmmacro.h>
+
+/*
+ * Simulator system call.
+ */
+GLOBAL_ENTRY(ia64_ssc)
+	mov r15=r36
+	break 0x80001
+	br.ret.sptk.many rp
+END(ia64_ssc)
diff --git a/arch/ia64/hp/sim/hpsim_console.c b/arch/ia64/hp/sim/hpsim_console.c
new file mode 100644
index 00000000..01663bc4
--- /dev/null
+++ b/arch/ia64/hp/sim/hpsim_console.c
@@ -0,0 +1,76 @@
+/*
+ * Platform dependent support for HP simulator.
+ *
+ * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/tty.h>
+#include <linux/kdev_t.h>
+#include <linux/console.h>
+
+#include <asm/delay.h>
+#include <asm/irq.h>
+#include <asm/pal.h>
+#include <asm/machvec.h>
+#include <asm/pgtable.h>
+#include <asm/sal.h>
+#include <asm/hpsim.h>
+
+#include "hpsim_ssc.h"
+
+static int simcons_init (struct console *, char *);
+static void simcons_write (struct console *, const char *, unsigned);
+static struct tty_driver *simcons_console_device (struct console *, int *);
+
+static struct console hpsim_cons = {
+	.name =		"simcons",
+	.write =	simcons_write,
+	.device =	simcons_console_device,
+	.setup =	simcons_init,
+	.flags =	CON_PRINTBUFFER,
+	.index =	-1,
+};
+
+static int
+simcons_init (struct console *cons, char *options)
+{
+	return 0;
+}
+
+static void
+simcons_write (struct console *cons, const char *buf, unsigned count)
+{
+	unsigned long ch;
+
+	while (count-- > 0) {
+		ch = *buf++;
+		ia64_ssc(ch, 0, 0, 0, SSC_PUTCHAR);
+		if (ch == '\n')
+		  ia64_ssc('\r', 0, 0, 0, SSC_PUTCHAR);
+	}
+}
+
+static struct tty_driver *simcons_console_device (struct console *c, int *index)
+{
+	*index = c->index;
+	return hp_simserial_driver;
+}
+
+int simcons_register(void)
+{
+	if (!ia64_platform_is("hpsim"))
+		return 1;
+
+	if (hpsim_cons.flags & CON_ENABLED)
+		return 1;
+
+	register_console(&hpsim_cons);
+	return 0;
+}
diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c
new file mode 100644
index 00000000..4bd9a632
--- /dev/null
+++ b/arch/ia64/hp/sim/hpsim_irq.c
@@ -0,0 +1,51 @@
+/*
+ * Platform dependent support for HP simulator.
+ *
+ * Copyright (C) 1998-2001 Hewlett-Packard Co
+ * Copyright (C) 1998-2001 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/irq.h>
+
+static unsigned int
+hpsim_irq_startup(struct irq_data *data)
+{
+	return 0;
+}
+
+static void
+hpsim_irq_noop(struct irq_data *data)
+{
+}
+
+static int
+hpsim_set_affinity_noop(struct irq_data *d, const struct cpumask *b, bool f)
+{
+	return 0;
+}
+
+static struct irq_chip irq_type_hp_sim = {
+	.name =			"hpsim",
+	.irq_startup =		hpsim_irq_startup,
+	.irq_shutdown =		hpsim_irq_noop,
+	.irq_enable =		hpsim_irq_noop,
+	.irq_disable =		hpsim_irq_noop,
+	.irq_ack =		hpsim_irq_noop,
+	.irq_set_affinity =	hpsim_set_affinity_noop,
+};
+
+void __init
+hpsim_irq_init (void)
+{
+	int i;
+
+	for_each_active_irq(i) {
+		struct irq_chip *chip = irq_get_chip(i);
+
+		if (chip == &no_irq_chip)
+			irq_set_chip(i, &irq_type_hp_sim);
+	}
+}
diff --git a/arch/ia64/hp/sim/hpsim_machvec.c b/arch/ia64/hp/sim/hpsim_machvec.c
new file mode 100644
index 00000000..c2141935
--- /dev/null
+++ b/arch/ia64/hp/sim/hpsim_machvec.c
@@ -0,0 +1,3 @@
+#define MACHVEC_PLATFORM_NAME		hpsim
+#define MACHVEC_PLATFORM_HEADER		<asm/machvec_hpsim.h>
+#include <asm/machvec_init.h>
diff --git a/arch/ia64/hp/sim/hpsim_setup.c b/arch/ia64/hp/sim/hpsim_setup.c
new file mode 100644
index 00000000..f629e903
--- /dev/null
+++ b/arch/ia64/hp/sim/hpsim_setup.c
@@ -0,0 +1,46 @@
+/*
+ * Platform dependent support for HP simulator.
+ *
+ * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
+ */
+#include <linux/console.h>
+#include <linux/init.h>
+#include <linux/kdev_t.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/param.h>
+#include <linux/root_dev.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <asm/delay.h>
+#include <asm/irq.h>
+#include <asm/pal.h>
+#include <asm/machvec.h>
+#include <asm/pgtable.h>
+#include <asm/sal.h>
+#include <asm/hpsim.h>
+
+#include "hpsim_ssc.h"
+
+void
+ia64_ssc_connect_irq (long intr, long irq)
+{
+	ia64_ssc(intr, irq, 0, 0, SSC_CONNECT_INTERRUPT);
+}
+
+void
+ia64_ctl_trace (long on)
+{
+	ia64_ssc(on, 0, 0, 0, SSC_CTL_TRACE);
+}
+
+void __init
+hpsim_setup (char **cmdline_p)
+{
+	ROOT_DEV = Root_SDA1;		/* default to first SCSI drive */
+
+	simcons_register();
+}
diff --git a/arch/ia64/hp/sim/hpsim_ssc.h b/arch/ia64/hp/sim/hpsim_ssc.h
new file mode 100644
index 00000000..bfa39062
--- /dev/null
+++ b/arch/ia64/hp/sim/hpsim_ssc.h
@@ -0,0 +1,36 @@
+/*
+ * Platform dependent support for HP simulator.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
+ */
+#ifndef _IA64_PLATFORM_HPSIM_SSC_H
+#define _IA64_PLATFORM_HPSIM_SSC_H
+
+/* Simulator system calls: */
+
+#define SSC_CONSOLE_INIT		20
+#define SSC_GETCHAR			21
+#define SSC_PUTCHAR			31
+#define SSC_CONNECT_INTERRUPT		58
+#define SSC_GENERATE_INTERRUPT		59
+#define SSC_SET_PERIODIC_INTERRUPT	60
+#define SSC_GET_RTC			65
+#define SSC_EXIT			66
+#define SSC_LOAD_SYMBOLS		69
+#define SSC_GET_TOD			74
+#define SSC_CTL_TRACE			76
+
+#define SSC_NETDEV_PROBE		100
+#define SSC_NETDEV_SEND			101
+#define SSC_NETDEV_RECV			102
+#define SSC_NETDEV_ATTACH		103
+#define SSC_NETDEV_DETACH		104
+
+/*
+ * Simulator system call.
+ */
+extern long ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr);
+
+#endif /* _IA64_PLATFORM_HPSIM_SSC_H */
diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c
new file mode 100644
index 00000000..7e81966c
--- /dev/null
+++ b/arch/ia64/hp/sim/simeth.c
@@ -0,0 +1,527 @@
+/*
+ * Simulated Ethernet Driver
+ *
+ * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/in.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <linux/notifier.h>
+#include <linux/bitops.h>
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/hpsim.h>
+
+#include "hpsim_ssc.h"
+
+#define SIMETH_RECV_MAX	10
+
+/*
+ * Maximum possible received frame for Ethernet.
+ * We preallocate an sk_buff of that size to avoid costly
+ * memcpy for temporary buffer into sk_buff. We do basically
+ * what's done in other drivers, like eepro with a ring.
+ * The difference is, of course, that we don't have real DMA !!!
+ */
+#define SIMETH_FRAME_SIZE	ETH_FRAME_LEN
+
+
+#define NETWORK_INTR			8
+
+struct simeth_local {
+	struct net_device_stats stats;
+	int 			simfd;	 /* descriptor in the simulator */
+};
+
+static int simeth_probe1(void);
+static int simeth_open(struct net_device *dev);
+static int simeth_close(struct net_device *dev);
+static int simeth_tx(struct sk_buff *skb, struct net_device *dev);
+static int simeth_rx(struct net_device *dev);
+static struct net_device_stats *simeth_get_stats(struct net_device *dev);
+static irqreturn_t simeth_interrupt(int irq, void *dev_id);
+static void set_multicast_list(struct net_device *dev);
+static int simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr);
+
+static char *simeth_version="0.3";
+
+/*
+ * This variable is used to establish a mapping between the Linux/ia64 kernel
+ * and the host linux kernel.
+ *
+ * As of today, we support only one card, even though most of the code
+ * is ready for many more. The mapping is then:
+ *	linux/ia64 -> linux/x86
+ * 	   eth0    -> eth1
+ *
+ * In the future, we some string operations, we could easily support up
+ * to 10 cards (0-9).
+ *
+ * The default mapping can be changed on the kernel command line by
+ * specifying simeth=ethX (or whatever string you want).
+ */
+static char *simeth_device="eth0";	 /* default host interface to use */
+
+
+
+static volatile unsigned int card_count; /* how many cards "found" so far */
+static int simeth_debug;		/* set to 1 to get debug information */
+
+/*
+ * Used to catch IFF_UP & IFF_DOWN events
+ */
+static struct notifier_block simeth_dev_notifier = {
+	simeth_device_event,
+	NULL
+};
+
+
+/*
+ * Function used when using a kernel command line option.
+ *
+ * Format: simeth=interface_name (like eth0)
+ */
+static int __init
+simeth_setup(char *str)
+{
+	simeth_device = str;
+	return 1;
+}
+
+__setup("simeth=", simeth_setup);
+
+/*
+ * Function used to probe for simeth devices when not installed
+ * as a loadable module
+ */
+
+int __init
+simeth_probe (void)
+{
+	int r;
+
+	printk(KERN_INFO "simeth: v%s\n", simeth_version);
+
+	r = simeth_probe1();
+
+	if (r == 0) register_netdevice_notifier(&simeth_dev_notifier);
+
+	return r;
+}
+
+static inline int
+netdev_probe(char *name, unsigned char *ether)
+{
+	return ia64_ssc(__pa(name), __pa(ether), 0,0, SSC_NETDEV_PROBE);
+}
+
+
+static inline int
+netdev_connect(int irq)
+{
+	/* XXX Fix me
+	 * this does not support multiple cards
+	 * also no return value
+	 */
+	ia64_ssc_connect_irq(NETWORK_INTR, irq);
+	return 0;
+}
+
+static inline int
+netdev_attach(int fd, int irq, unsigned int ipaddr)
+{
+	/* this puts the host interface in the right mode (start interrupting) */
+	return ia64_ssc(fd, ipaddr, 0,0, SSC_NETDEV_ATTACH);
+}
+
+
+static inline int
+netdev_detach(int fd)
+{
+	/*
+	 * inactivate the host interface (don't interrupt anymore) */
+	return ia64_ssc(fd, 0,0,0, SSC_NETDEV_DETACH);
+}
+
+static inline int
+netdev_send(int fd, unsigned char *buf, unsigned int len)
+{
+	return ia64_ssc(fd, __pa(buf), len, 0, SSC_NETDEV_SEND);
+}
+
+static inline int
+netdev_read(int fd, unsigned char *buf, unsigned int len)
+{
+	return ia64_ssc(fd, __pa(buf), len, 0, SSC_NETDEV_RECV);
+}
+
+static const struct net_device_ops simeth_netdev_ops = {
+	.ndo_open		= simeth_open,
+	.ndo_stop		= simeth_close,
+	.ndo_start_xmit		= simeth_tx,
+	.ndo_get_stats		= simeth_get_stats,
+	.ndo_set_multicast_list	= set_multicast_list, /* not yet used */
+
+};
+
+/*
+ * Function shared with module code, so cannot be in init section
+ *
+ * So far this function "detects" only one card (test_&_set) but could
+ * be extended easily.
+ *
+ * Return:
+ * 	- -ENODEV is no device found
+ *	- -ENOMEM is no more memory
+ *	- 0 otherwise
+ */
+static int
+simeth_probe1(void)
+{
+	unsigned char mac_addr[ETH_ALEN];
+	struct simeth_local *local;
+	struct net_device *dev;
+	int fd, i, err, rc;
+
+	/*
+	 * XXX Fix me
+	 * let's support just one card for now
+	 */
+	if (test_and_set_bit(0, &card_count))
+		return -ENODEV;
+
+	/*
+	 * check with the simulator for the device
+	 */
+	fd = netdev_probe(simeth_device, mac_addr);
+	if (fd == -1)
+		return -ENODEV;
+
+	dev = alloc_etherdev(sizeof(struct simeth_local));
+	if (!dev)
+		return -ENOMEM;
+
+	memcpy(dev->dev_addr, mac_addr, sizeof(mac_addr));
+
+	local = netdev_priv(dev);
+	local->simfd = fd; /* keep track of underlying file descriptor */
+
+	dev->netdev_ops = &simeth_netdev_ops;
+
+	err = register_netdev(dev);
+	if (err) {
+		free_netdev(dev);
+		return err;
+	}
+
+	if ((rc = assign_irq_vector(AUTO_ASSIGN)) < 0)
+		panic("%s: out of interrupt vectors!\n", __func__);
+	dev->irq = rc;
+
+	/*
+	 * attach the interrupt in the simulator, this does enable interrupts
+	 * until a netdev_attach() is called
+	 */
+	netdev_connect(dev->irq);
+
+	printk(KERN_INFO "%s: hosteth=%s simfd=%d, HwAddr",
+	       dev->name, simeth_device, local->simfd);
+	for(i = 0; i < ETH_ALEN; i++) {
+		printk(" %2.2x", dev->dev_addr[i]);
+	}
+	printk(", IRQ %d\n", dev->irq);
+
+	return 0;
+}
+
+/*
+ * actually binds the device to an interrupt vector
+ */
+static int
+simeth_open(struct net_device *dev)
+{
+	if (request_irq(dev->irq, simeth_interrupt, 0, "simeth", dev)) {
+		printk(KERN_WARNING "simeth: unable to get IRQ %d.\n", dev->irq);
+		return -EAGAIN;
+	}
+
+	netif_start_queue(dev);
+
+	return 0;
+}
+
+/* copied from lapbether.c */
+static __inline__ int dev_is_ethdev(struct net_device *dev)
+{
+       return ( dev->type == ARPHRD_ETHER && strncmp(dev->name, "dummy", 5));
+}
+
+
+/*
+ * Handler for IFF_UP or IFF_DOWN
+ *
+ * The reason for that is that we don't want to be interrupted when the
+ * interface is down. There is no way to unconnect in the simualtor. Instead
+ * we use this function to shutdown packet processing in the frame filter
+ * in the simulator. Thus no interrupts are generated
+ *
+ *
+ * That's also the place where we pass the IP address of this device to the
+ * simulator so that that we can start filtering packets for it
+ *
+ * There may be a better way of doing this, but I don't know which yet.
+ */
+static int
+simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct simeth_local *local;
+	struct in_device *in_dev;
+	struct in_ifaddr **ifap = NULL;
+	struct in_ifaddr *ifa = NULL;
+	int r;
+
+
+	if ( ! dev ) {
+		printk(KERN_WARNING "simeth_device_event dev=0\n");
+		return NOTIFY_DONE;
+	}
+
+	if (dev_net(dev) != &init_net)
+		return NOTIFY_DONE;
+
+	if ( event != NETDEV_UP && event != NETDEV_DOWN ) return NOTIFY_DONE;
+
+	/*
+	 * Check whether or not it's for an ethernet device
+	 *
+	 * XXX Fixme: This works only as long as we support one
+	 * type of ethernet device.
+	 */
+	if ( !dev_is_ethdev(dev) ) return NOTIFY_DONE;
+
+	if ((in_dev=dev->ip_ptr) != NULL) {
+		for (ifap=&in_dev->ifa_list; (ifa=*ifap) != NULL; ifap=&ifa->ifa_next)
+			if (strcmp(dev->name, ifa->ifa_label) == 0) break;
+	}
+	if ( ifa == NULL ) {
+		printk(KERN_ERR "simeth_open: can't find device %s's ifa\n", dev->name);
+		return NOTIFY_DONE;
+	}
+
+	printk(KERN_INFO "simeth_device_event: %s ipaddr=0x%x\n",
+	       dev->name, ntohl(ifa->ifa_local));
+
+	/*
+	 * XXX Fix me
+	 * if the device was up, and we're simply reconfiguring it, not sure
+	 * we get DOWN then UP.
+	 */
+
+	local = netdev_priv(dev);
+	/* now do it for real */
+	r = event == NETDEV_UP ?
+		netdev_attach(local->simfd, dev->irq, ntohl(ifa->ifa_local)):
+		netdev_detach(local->simfd);
+
+	printk(KERN_INFO "simeth: netdev_attach/detach: event=%s ->%d\n",
+	       event == NETDEV_UP ? "attach":"detach", r);
+
+	return NOTIFY_DONE;
+}
+
+static int
+simeth_close(struct net_device *dev)
+{
+	netif_stop_queue(dev);
+
+	free_irq(dev->irq, dev);
+
+	return 0;
+}
+
+/*
+ * Only used for debug
+ */
+static void
+frame_print(unsigned char *from, unsigned char *frame, int len)
+{
+	int i;
+
+	printk("%s: (%d) %02x", from, len, frame[0] & 0xff);
+	for(i=1; i < 6; i++ ) {
+		printk(":%02x", frame[i] &0xff);
+	}
+	printk(" %2x", frame[6] &0xff);
+	for(i=7; i < 12; i++ ) {
+		printk(":%02x", frame[i] &0xff);
+	}
+	printk(" [%02x%02x]\n", frame[12], frame[13]);
+
+	for(i=14; i < len; i++ ) {
+		printk("%02x ", frame[i] &0xff);
+		if ( (i%10)==0) printk("\n");
+	}
+	printk("\n");
+}
+
+
+/*
+ * Function used to transmit of frame, very last one on the path before
+ * going to the simulator.
+ */
+static int
+simeth_tx(struct sk_buff *skb, struct net_device *dev)
+{
+	struct simeth_local *local = netdev_priv(dev);
+
+#if 0
+	/* ensure we have at least ETH_ZLEN bytes (min frame size) */
+	unsigned int length = ETH_ZLEN < skb->len ? skb->len : ETH_ZLEN;
+	/* Where do the extra padding bytes comes from inthe skbuff ? */
+#else
+	/* the real driver in the host system is going to take care of that
+	 * or maybe it's the NIC itself.
+	 */
+	unsigned int length = skb->len;
+#endif
+
+	local->stats.tx_bytes += skb->len;
+	local->stats.tx_packets++;
+
+
+	if (simeth_debug > 5) frame_print("simeth_tx", skb->data, length);
+
+	netdev_send(local->simfd, skb->data, length);
+
+	/*
+	 * we are synchronous on write, so we don't simulate a
+	 * trasnmit complete interrupt, thus we don't need to arm a tx
+	 */
+
+	dev_kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static inline struct sk_buff *
+make_new_skb(struct net_device *dev)
+{
+	struct sk_buff *nskb;
+
+	/*
+	 * The +2 is used to make sure that the IP header is nicely
+	 * aligned (on 4byte boundary I assume 14+2=16)
+	 */
+	nskb = dev_alloc_skb(SIMETH_FRAME_SIZE + 2);
+	if ( nskb == NULL ) {
+		printk(KERN_NOTICE "%s: memory squeeze. dropping packet.\n", dev->name);
+		return NULL;
+	}
+
+	skb_reserve(nskb, 2);	/* Align IP on 16 byte boundaries */
+
+	skb_put(nskb,SIMETH_FRAME_SIZE);
+
+	return nskb;
+}
+
+/*
+ * called from interrupt handler to process a received frame
+ */
+static int
+simeth_rx(struct net_device *dev)
+{
+	struct simeth_local	*local;
+	struct sk_buff		*skb;
+	int			len;
+	int			rcv_count = SIMETH_RECV_MAX;
+
+	local = netdev_priv(dev);
+	/*
+	 * the loop concept has been borrowed from other drivers
+	 * looks to me like it's a throttling thing to avoid pushing to many
+	 * packets at one time into the stack. Making sure we can process them
+	 * upstream and make forward progress overall
+	 */
+	do {
+		if ( (skb=make_new_skb(dev)) == NULL ) {
+			printk(KERN_NOTICE "%s: memory squeeze. dropping packet.\n", dev->name);
+			local->stats.rx_dropped++;
+			return 0;
+		}
+		/*
+		 * Read only one frame at a time
+		 */
+		len = netdev_read(local->simfd, skb->data, SIMETH_FRAME_SIZE);
+		if ( len == 0 ) {
+			if ( simeth_debug > 0 ) printk(KERN_WARNING "%s: count=%d netdev_read=0\n",
+						       dev->name, SIMETH_RECV_MAX-rcv_count);
+			break;
+		}
+#if 0
+		/*
+		 * XXX Fix me
+		 * Should really do a csum+copy here
+		 */
+		skb_copy_to_linear_data(skb, frame, len);
+#endif
+		skb->protocol = eth_type_trans(skb, dev);
+
+		if ( simeth_debug > 6 ) frame_print("simeth_rx", skb->data, len);
+
+		/*
+		 * push the packet up & trigger software interrupt
+		 */
+		netif_rx(skb);
+
+		local->stats.rx_packets++;
+		local->stats.rx_bytes += len;
+
+	} while ( --rcv_count );
+
+	return len; /* 0 = nothing left to read, otherwise, we can try again */
+}
+
+/*
+ * Interrupt handler (Yes, we can do it too !!!)
+ */
+static irqreturn_t
+simeth_interrupt(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+
+	/*
+	 * very simple loop because we get interrupts only when receiving
+	 */
+	while (simeth_rx(dev));
+	return IRQ_HANDLED;
+}
+
+static struct net_device_stats *
+simeth_get_stats(struct net_device *dev)
+{
+	struct simeth_local *local = netdev_priv(dev);
+
+	return &local->stats;
+}
+
+/* fake multicast ability */
+static void
+set_multicast_list(struct net_device *dev)
+{
+	printk(KERN_WARNING "%s: set_multicast_list called\n", dev->name);
+}
+
+__initcall(simeth_probe);
diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c
new file mode 100644
index 00000000..331de723
--- /dev/null
+++ b/arch/ia64/hp/sim/simscsi.c
@@ -0,0 +1,380 @@
+/*
+ * Simulated SCSI driver.
+ *
+ * Copyright (C) 1999, 2001-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 02/01/15 David Mosberger	Updated for v2.5.1
+ * 99/12/18 David Mosberger	Added support for READ10/WRITE10 needed by linux v2.3.33
+ */
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <asm/irq.h>
+#include "hpsim_ssc.h"
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_host.h>
+
+#define DEBUG_SIMSCSI	0
+
+#define SIMSCSI_REQ_QUEUE_LEN	64
+#define DEFAULT_SIMSCSI_ROOT	"/var/ski-disks/sd"
+
+/* Simulator system calls: */
+
+#define SSC_OPEN			50
+#define SSC_CLOSE			51
+#define SSC_READ			52
+#define SSC_WRITE			53
+#define SSC_GET_COMPLETION		54
+#define SSC_WAIT_COMPLETION		55
+
+#define SSC_WRITE_ACCESS		2
+#define SSC_READ_ACCESS			1
+
+#if DEBUG_SIMSCSI
+  int simscsi_debug;
+# define DBG	simscsi_debug
+#else
+# define DBG	0
+#endif
+
+static struct Scsi_Host *host;
+
+static void simscsi_interrupt (unsigned long val);
+static DECLARE_TASKLET(simscsi_tasklet, simscsi_interrupt, 0);
+
+struct disk_req {
+	unsigned long addr;
+	unsigned len;
+};
+
+struct disk_stat {
+	int fd;
+	unsigned count;
+};
+
+static int desc[16] = {
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+};
+
+static struct queue_entry {
+	struct scsi_cmnd *sc;
+} queue[SIMSCSI_REQ_QUEUE_LEN];
+
+static int rd, wr;
+static atomic_t num_reqs = ATOMIC_INIT(0);
+
+/* base name for default disks */
+static char *simscsi_root = DEFAULT_SIMSCSI_ROOT;
+
+#define MAX_ROOT_LEN	128
+
+/*
+ * used to setup a new base for disk images
+ * to use /foo/bar/disk[a-z] as disk images
+ * you have to specify simscsi=/foo/bar/disk on the command line
+ */
+static int __init
+simscsi_setup (char *s)
+{
+	/* XXX Fix me we may need to strcpy() ? */
+	if (strlen(s) > MAX_ROOT_LEN) {
+		printk(KERN_ERR "simscsi_setup: prefix too long---using default %s\n",
+		       simscsi_root);
+	}
+	simscsi_root = s;
+	return 1;
+}
+
+__setup("simscsi=", simscsi_setup);
+
+static void
+simscsi_interrupt (unsigned long val)
+{
+	struct scsi_cmnd *sc;
+
+	while ((sc = queue[rd].sc) != NULL) {
+		atomic_dec(&num_reqs);
+		queue[rd].sc = NULL;
+		if (DBG)
+			printk("simscsi_interrupt: done with %ld\n", sc->serial_number);
+		(*sc->scsi_done)(sc);
+		rd = (rd + 1) % SIMSCSI_REQ_QUEUE_LEN;
+	}
+}
+
+static int
+simscsi_biosparam (struct scsi_device *sdev, struct block_device *n,
+		sector_t capacity, int ip[])
+{
+	ip[0] = 64;		/* heads */
+	ip[1] = 32;		/* sectors */
+	ip[2] = capacity >> 11;	/* cylinders */
+	return 0;
+}
+
+static void
+simscsi_sg_readwrite (struct scsi_cmnd *sc, int mode, unsigned long offset)
+{
+	int i;
+	struct scatterlist *sl;
+	struct disk_stat stat;
+	struct disk_req req;
+
+	stat.fd = desc[sc->device->id];
+
+	scsi_for_each_sg(sc, sl, scsi_sg_count(sc), i) {
+		req.addr = __pa(sg_virt(sl));
+		req.len  = sl->length;
+		if (DBG)
+			printk("simscsi_sg_%s @ %lx (off %lx) use_sg=%d len=%d\n",
+			       mode == SSC_READ ? "read":"write", req.addr, offset,
+			       scsi_sg_count(sc) - i, sl->length);
+		ia64_ssc(stat.fd, 1, __pa(&req), offset, mode);
+		ia64_ssc(__pa(&stat), 0, 0, 0, SSC_WAIT_COMPLETION);
+
+		/* should not happen in our case */
+		if (stat.count != req.len) {
+			sc->result = DID_ERROR << 16;
+			return;
+		}
+		offset +=  sl->length;
+	}
+	sc->result = GOOD;
+}
+
+/*
+ * function handling both READ_6/WRITE_6 (non-scatter/gather mode)
+ * commands.
+ * Added 02/26/99 S.Eranian
+ */
+static void
+simscsi_readwrite6 (struct scsi_cmnd *sc, int mode)
+{
+	unsigned long offset;
+
+	offset = (((sc->cmnd[1] & 0x1f) << 16) | (sc->cmnd[2] << 8) | sc->cmnd[3])*512;
+	simscsi_sg_readwrite(sc, mode, offset);
+}
+
+static size_t
+simscsi_get_disk_size (int fd)
+{
+	struct disk_stat stat;
+	size_t bit, sectors = 0;
+	struct disk_req req;
+	char buf[512];
+
+	/*
+	 * This is a bit kludgey: the simulator doesn't provide a
+	 * direct way of determining the disk size, so we do a binary
+	 * search, assuming a maximum disk size of 128GB.
+	 */
+	for (bit = (128UL << 30)/512; bit != 0; bit >>= 1) {
+		req.addr = __pa(&buf);
+		req.len = sizeof(buf);
+		ia64_ssc(fd, 1, __pa(&req), ((sectors | bit) - 1)*512, SSC_READ);
+		stat.fd = fd;
+		ia64_ssc(__pa(&stat), 0, 0, 0, SSC_WAIT_COMPLETION);
+		if (stat.count == sizeof(buf))
+			sectors |= bit;
+	}
+	return sectors - 1;	/* return last valid sector number */
+}
+
+static void
+simscsi_readwrite10 (struct scsi_cmnd *sc, int mode)
+{
+	unsigned long offset;
+
+	offset = (((unsigned long)sc->cmnd[2] << 24) 
+		| ((unsigned long)sc->cmnd[3] << 16)
+		| ((unsigned long)sc->cmnd[4] <<  8) 
+		| ((unsigned long)sc->cmnd[5] <<  0))*512UL;
+	simscsi_sg_readwrite(sc, mode, offset);
+}
+
+static int
+simscsi_queuecommand_lck (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
+{
+	unsigned int target_id = sc->device->id;
+	char fname[MAX_ROOT_LEN+16];
+	size_t disk_size;
+	char *buf;
+	char localbuf[36];
+#if DEBUG_SIMSCSI
+	register long sp asm ("sp");
+
+	if (DBG)
+		printk("simscsi_queuecommand: target=%d,cmnd=%u,sc=%lu,sp=%lx,done=%p\n",
+		       target_id, sc->cmnd[0], sc->serial_number, sp, done);
+#endif
+
+	sc->result = DID_BAD_TARGET << 16;
+	sc->scsi_done = done;
+	if (target_id <= 15 && sc->device->lun == 0) {
+		switch (sc->cmnd[0]) {
+		      case INQUIRY:
+			if (scsi_bufflen(sc) < 35) {
+				break;
+			}
+			sprintf (fname, "%s%c", simscsi_root, 'a' + target_id);
+			desc[target_id] = ia64_ssc(__pa(fname), SSC_READ_ACCESS|SSC_WRITE_ACCESS,
+						   0, 0, SSC_OPEN);
+			if (desc[target_id] < 0) {
+				/* disk doesn't exist... */
+				break;
+			}
+			buf = localbuf;
+			buf[0] = 0;	/* magnetic disk */
+			buf[1] = 0;	/* not a removable medium */
+			buf[2] = 2;	/* SCSI-2 compliant device */
+			buf[3] = 2;	/* SCSI-2 response data format */
+			buf[4] = 31;	/* additional length (bytes) */
+			buf[5] = 0;	/* reserved */
+			buf[6] = 0;	/* reserved */
+			buf[7] = 0;	/* various flags */
+			memcpy(buf + 8, "HP      SIMULATED DISK  0.00",  28);
+			scsi_sg_copy_from_buffer(sc, buf, 36);
+			sc->result = GOOD;
+			break;
+
+		      case TEST_UNIT_READY:
+			sc->result = GOOD;
+			break;
+
+		      case READ_6:
+			if (desc[target_id] < 0 )
+				break;
+			simscsi_readwrite6(sc, SSC_READ);
+			break;
+
+		      case READ_10:
+			if (desc[target_id] < 0 )
+				break;
+			simscsi_readwrite10(sc, SSC_READ);
+			break;
+
+		      case WRITE_6:
+			if (desc[target_id] < 0)
+				break;
+			simscsi_readwrite6(sc, SSC_WRITE);
+			break;
+
+		      case WRITE_10:
+			if (desc[target_id] < 0)
+				break;
+			simscsi_readwrite10(sc, SSC_WRITE);
+			break;
+
+		      case READ_CAPACITY:
+			if (desc[target_id] < 0 || scsi_bufflen(sc) < 8) {
+				break;
+			}
+			buf = localbuf;
+			disk_size = simscsi_get_disk_size(desc[target_id]);
+
+			buf[0] = (disk_size >> 24) & 0xff;
+			buf[1] = (disk_size >> 16) & 0xff;
+			buf[2] = (disk_size >>  8) & 0xff;
+			buf[3] = (disk_size >>  0) & 0xff;
+			/* set block size of 512 bytes: */
+			buf[4] = 0;
+			buf[5] = 0;
+			buf[6] = 2;
+			buf[7] = 0;
+			scsi_sg_copy_from_buffer(sc, buf, 8);
+			sc->result = GOOD;
+			break;
+
+		      case MODE_SENSE:
+		      case MODE_SENSE_10:
+			/* sd.c uses this to determine whether disk does write-caching. */
+			scsi_sg_copy_from_buffer(sc, (char *)empty_zero_page,
+						 PAGE_SIZE);
+			sc->result = GOOD;
+			break;
+
+		      case START_STOP:
+			printk(KERN_ERR "START_STOP\n");
+			break;
+
+		      default:
+			panic("simscsi: unknown SCSI command %u\n", sc->cmnd[0]);
+		}
+	}
+	if (sc->result == DID_BAD_TARGET) {
+		sc->result |= DRIVER_SENSE << 24;
+		sc->sense_buffer[0] = 0x70;
+		sc->sense_buffer[2] = 0x00;
+	}
+	if (atomic_read(&num_reqs) >= SIMSCSI_REQ_QUEUE_LEN) {
+		panic("Attempt to queue command while command is pending!!");
+	}
+	atomic_inc(&num_reqs);
+	queue[wr].sc = sc;
+	wr = (wr + 1) % SIMSCSI_REQ_QUEUE_LEN;
+
+	tasklet_schedule(&simscsi_tasklet);
+	return 0;
+}
+
+static DEF_SCSI_QCMD(simscsi_queuecommand)
+
+static int
+simscsi_host_reset (struct scsi_cmnd *sc)
+{
+	printk(KERN_ERR "simscsi_host_reset: not implemented\n");
+	return 0;
+}
+
+static struct scsi_host_template driver_template = {
+	.name			= "simulated SCSI host adapter",
+	.proc_name		= "simscsi",
+	.queuecommand		= simscsi_queuecommand,
+	.eh_host_reset_handler	= simscsi_host_reset,
+	.bios_param		= simscsi_biosparam,
+	.can_queue		= SIMSCSI_REQ_QUEUE_LEN,
+	.this_id		= -1,
+	.sg_tablesize		= SG_ALL,
+	.max_sectors		= 1024,
+	.cmd_per_lun		= SIMSCSI_REQ_QUEUE_LEN,
+	.use_clustering		= DISABLE_CLUSTERING,
+};
+
+static int __init
+simscsi_init(void)
+{
+	int error;
+
+	host = scsi_host_alloc(&driver_template, 0);
+	if (!host)
+		return -ENOMEM;
+
+	error = scsi_add_host(host, NULL);
+	if (error)
+		goto free_host;
+	scsi_scan_host(host);
+	return 0;
+
+ free_host:
+	scsi_host_put(host);
+	return error;
+}
+
+static void __exit
+simscsi_exit(void)
+{
+	scsi_remove_host(host);
+	scsi_host_put(host);
+}
+
+module_init(simscsi_init);
+module_exit(simscsi_exit);
diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
new file mode 100644
index 00000000..bff0824c
--- /dev/null
+++ b/arch/ia64/hp/sim/simserial.c
@@ -0,0 +1,973 @@
+/*
+ * Simulated Serial Driver (fake serial)
+ *
+ * This driver is mostly used for bringup purposes and will go away.
+ * It has a strong dependency on the system console. All outputs
+ * are rerouted to the same facility as the one used by printk which, in our
+ * case means sys_sim.c console (goes via the simulator). The code hereafter
+ * is completely leveraged from the serial.c driver.
+ *
+ * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 02/04/00 D. Mosberger	Merged in serial.c bug fixes in rs_close().
+ * 02/25/00 D. Mosberger	Synced up with 2.3.99pre-5 version of serial.c.
+ * 07/30/02 D. Mosberger	Replace sti()/cli() with explicit spinlocks & local irq masking
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/major.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/capability.h>
+#include <linux/console.h>
+#include <linux/module.h>
+#include <linux/serial.h>
+#include <linux/serialP.h>
+#include <linux/sysrq.h>
+
+#include <asm/irq.h>
+#include <asm/hw_irq.h>
+#include <asm/uaccess.h>
+
+#undef SIMSERIAL_DEBUG	/* define this to get some debug information */
+
+#define KEYBOARD_INTR	3	/* must match with simulator! */
+
+#define NR_PORTS	1	/* only one port for now */
+
+#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? IRQF_SHARED : IRQF_DISABLED)
+
+#define SSC_GETCHAR	21
+
+extern long ia64_ssc (long, long, long, long, int);
+extern void ia64_ssc_connect_irq (long intr, long irq);
+
+static char *serial_name = "SimSerial driver";
+static char *serial_version = "0.6";
+
+/*
+ * This has been extracted from asm/serial.h. We need one eventually but
+ * I don't know exactly what we're going to put in it so just fake one
+ * for now.
+ */
+#define BASE_BAUD ( 1843200 / 16 )
+
+#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST)
+
+/*
+ * Most of the values here are meaningless to this particular driver.
+ * However some values must be preserved for the code (leveraged from serial.c
+ * to work correctly).
+ * port must not be 0
+ * type must not be UNKNOWN
+ * So I picked arbitrary (guess from where?) values instead
+ */
+static struct serial_state rs_table[NR_PORTS]={
+  /* UART CLK   PORT IRQ     FLAGS        */
+  { 0, BASE_BAUD, 0x3F8, 0, STD_COM_FLAGS,0,PORT_16550 }  /* ttyS0 */
+};
+
+/*
+ * Just for the fun of it !
+ */
+static struct serial_uart_config uart_config[] = {
+	{ "unknown", 1, 0 },
+	{ "8250", 1, 0 },
+	{ "16450", 1, 0 },
+	{ "16550", 1, 0 },
+	{ "16550A", 16, UART_CLEAR_FIFO | UART_USE_FIFO },
+	{ "cirrus", 1, 0 },
+	{ "ST16650", 1, UART_CLEAR_FIFO | UART_STARTECH },
+	{ "ST16650V2", 32, UART_CLEAR_FIFO | UART_USE_FIFO |
+		  UART_STARTECH },
+	{ "TI16750", 64, UART_CLEAR_FIFO | UART_USE_FIFO},
+	{ NULL, 0}
+};
+
+struct tty_driver *hp_simserial_driver;
+
+static struct async_struct *IRQ_ports[NR_IRQS];
+
+static struct console *console;
+
+static unsigned char *tmp_buf;
+
+extern struct console *console_drivers; /* from kernel/printk.c */
+
+/*
+ * ------------------------------------------------------------
+ * rs_stop() and rs_start()
+ *
+ * This routines are called before setting or resetting tty->stopped.
+ * They enable or disable transmitter interrupts, as necessary.
+ * ------------------------------------------------------------
+ */
+static void rs_stop(struct tty_struct *tty)
+{
+#ifdef SIMSERIAL_DEBUG
+	printk("rs_stop: tty->stopped=%d tty->hw_stopped=%d tty->flow_stopped=%d\n",
+		tty->stopped, tty->hw_stopped, tty->flow_stopped);
+#endif
+
+}
+
+static void rs_start(struct tty_struct *tty)
+{
+#ifdef SIMSERIAL_DEBUG
+	printk("rs_start: tty->stopped=%d tty->hw_stopped=%d tty->flow_stopped=%d\n",
+		tty->stopped, tty->hw_stopped, tty->flow_stopped);
+#endif
+}
+
+static  void receive_chars(struct tty_struct *tty)
+{
+	unsigned char ch;
+	static unsigned char seen_esc = 0;
+
+	while ( (ch = ia64_ssc(0, 0, 0, 0, SSC_GETCHAR)) ) {
+		if ( ch == 27 && seen_esc == 0 ) {
+			seen_esc = 1;
+			continue;
+		} else {
+			if ( seen_esc==1 && ch == 'O' ) {
+				seen_esc = 2;
+				continue;
+			} else if ( seen_esc == 2 ) {
+				if ( ch == 'P' ) /* F1 */
+					show_state();
+#ifdef CONFIG_MAGIC_SYSRQ
+				if ( ch == 'S' ) { /* F4 */
+					do
+						ch = ia64_ssc(0, 0, 0, 0,
+							      SSC_GETCHAR);
+					while (!ch);
+					handle_sysrq(ch);
+				}
+#endif
+				seen_esc = 0;
+				continue;
+			}
+		}
+		seen_esc = 0;
+
+		if (tty_insert_flip_char(tty, ch, TTY_NORMAL) == 0)
+			break;
+	}
+	tty_flip_buffer_push(tty);
+}
+
+/*
+ * This is the serial driver's interrupt routine for a single port
+ */
+static irqreturn_t rs_interrupt_single(int irq, void *dev_id)
+{
+	struct async_struct * info;
+
+	/*
+	 * I don't know exactly why they don't use the dev_id opaque data
+	 * pointer instead of this extra lookup table
+	 */
+	info = IRQ_ports[irq];
+	if (!info || !info->tty) {
+		printk(KERN_INFO "simrs_interrupt_single: info|tty=0 info=%p problem\n", info);
+		return IRQ_NONE;
+	}
+	/*
+	 * pretty simple in our case, because we only get interrupts
+	 * on inbound traffic
+	 */
+	receive_chars(info->tty);
+	return IRQ_HANDLED;
+}
+
+/*
+ * -------------------------------------------------------------------
+ * Here ends the serial interrupt routines.
+ * -------------------------------------------------------------------
+ */
+
+static void do_softint(struct work_struct *private_)
+{
+	printk(KERN_ERR "simserial: do_softint called\n");
+}
+
+static int rs_put_char(struct tty_struct *tty, unsigned char ch)
+{
+	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	unsigned long flags;
+
+	if (!tty || !info->xmit.buf)
+		return 0;
+
+	local_irq_save(flags);
+	if (CIRC_SPACE(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE) == 0) {
+		local_irq_restore(flags);
+		return 0;
+	}
+	info->xmit.buf[info->xmit.head] = ch;
+	info->xmit.head = (info->xmit.head + 1) & (SERIAL_XMIT_SIZE-1);
+	local_irq_restore(flags);
+	return 1;
+}
+
+static void transmit_chars(struct async_struct *info, int *intr_done)
+{
+	int count;
+	unsigned long flags;
+
+
+	local_irq_save(flags);
+
+	if (info->x_char) {
+		char c = info->x_char;
+
+		console->write(console, &c, 1);
+
+		info->state->icount.tx++;
+		info->x_char = 0;
+
+		goto out;
+	}
+
+	if (info->xmit.head == info->xmit.tail || info->tty->stopped || info->tty->hw_stopped) {
+#ifdef SIMSERIAL_DEBUG
+		printk("transmit_chars: head=%d, tail=%d, stopped=%d\n",
+		       info->xmit.head, info->xmit.tail, info->tty->stopped);
+#endif
+		goto out;
+	}
+	/*
+	 * We removed the loop and try to do it in to chunks. We need
+	 * 2 operations maximum because it's a ring buffer.
+	 *
+	 * First from current to tail if possible.
+	 * Then from the beginning of the buffer until necessary
+	 */
+
+	count = min(CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE),
+		    SERIAL_XMIT_SIZE - info->xmit.tail);
+	console->write(console, info->xmit.buf+info->xmit.tail, count);
+
+	info->xmit.tail = (info->xmit.tail+count) & (SERIAL_XMIT_SIZE-1);
+
+	/*
+	 * We have more at the beginning of the buffer
+	 */
+	count = CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
+	if (count) {
+		console->write(console, info->xmit.buf, count);
+		info->xmit.tail += count;
+	}
+out:
+	local_irq_restore(flags);
+}
+
+static void rs_flush_chars(struct tty_struct *tty)
+{
+	struct async_struct *info = (struct async_struct *)tty->driver_data;
+
+	if (info->xmit.head == info->xmit.tail || tty->stopped || tty->hw_stopped ||
+	    !info->xmit.buf)
+		return;
+
+	transmit_chars(info, NULL);
+}
+
+
+static int rs_write(struct tty_struct * tty,
+		    const unsigned char *buf, int count)
+{
+	int	c, ret = 0;
+	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	unsigned long flags;
+
+	if (!tty || !info->xmit.buf || !tmp_buf) return 0;
+
+	local_irq_save(flags);
+	while (1) {
+		c = CIRC_SPACE_TO_END(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
+		if (count < c)
+			c = count;
+		if (c <= 0) {
+			break;
+		}
+		memcpy(info->xmit.buf + info->xmit.head, buf, c);
+		info->xmit.head = ((info->xmit.head + c) &
+				   (SERIAL_XMIT_SIZE-1));
+		buf += c;
+		count -= c;
+		ret += c;
+	}
+	local_irq_restore(flags);
+	/*
+	 * Hey, we transmit directly from here in our case
+	 */
+	if (CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE)
+	    && !tty->stopped && !tty->hw_stopped) {
+		transmit_chars(info, NULL);
+	}
+	return ret;
+}
+
+static int rs_write_room(struct tty_struct *tty)
+{
+	struct async_struct *info = (struct async_struct *)tty->driver_data;
+
+	return CIRC_SPACE(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
+}
+
+static int rs_chars_in_buffer(struct tty_struct *tty)
+{
+	struct async_struct *info = (struct async_struct *)tty->driver_data;
+
+	return CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
+}
+
+static void rs_flush_buffer(struct tty_struct *tty)
+{
+	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	info->xmit.head = info->xmit.tail = 0;
+	local_irq_restore(flags);
+
+	tty_wakeup(tty);
+}
+
+/*
+ * This function is used to send a high-priority XON/XOFF character to
+ * the device
+ */
+static void rs_send_xchar(struct tty_struct *tty, char ch)
+{
+	struct async_struct *info = (struct async_struct *)tty->driver_data;
+
+	info->x_char = ch;
+	if (ch) {
+		/*
+		 * I guess we could call console->write() directly but
+		 * let's do that for now.
+		 */
+		transmit_chars(info, NULL);
+	}
+}
+
+/*
+ * ------------------------------------------------------------
+ * rs_throttle()
+ *
+ * This routine is called by the upper-layer tty layer to signal that
+ * incoming characters should be throttled.
+ * ------------------------------------------------------------
+ */
+static void rs_throttle(struct tty_struct * tty)
+{
+	if (I_IXOFF(tty)) rs_send_xchar(tty, STOP_CHAR(tty));
+
+	printk(KERN_INFO "simrs_throttle called\n");
+}
+
+static void rs_unthrottle(struct tty_struct * tty)
+{
+	struct async_struct *info = (struct async_struct *)tty->driver_data;
+
+	if (I_IXOFF(tty)) {
+		if (info->x_char)
+			info->x_char = 0;
+		else
+			rs_send_xchar(tty, START_CHAR(tty));
+	}
+	printk(KERN_INFO "simrs_unthrottle called\n");
+}
+
+
+static int rs_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg)
+{
+	if ((cmd != TIOCGSERIAL) && (cmd != TIOCSSERIAL) &&
+	    (cmd != TIOCSERCONFIG) && (cmd != TIOCSERGSTRUCT) &&
+	    (cmd != TIOCMIWAIT)) {
+		if (tty->flags & (1 << TTY_IO_ERROR))
+		    return -EIO;
+	}
+
+	switch (cmd) {
+		case TIOCGSERIAL:
+			printk(KERN_INFO "simrs_ioctl TIOCGSERIAL called\n");
+			return 0;
+		case TIOCSSERIAL:
+			printk(KERN_INFO "simrs_ioctl TIOCSSERIAL called\n");
+			return 0;
+		case TIOCSERCONFIG:
+			printk(KERN_INFO "rs_ioctl: TIOCSERCONFIG called\n");
+			return -EINVAL;
+
+		case TIOCSERGETLSR: /* Get line status register */
+			printk(KERN_INFO "rs_ioctl: TIOCSERGETLSR called\n");
+			return  -EINVAL;
+
+		case TIOCSERGSTRUCT:
+			printk(KERN_INFO "rs_ioctl: TIOCSERGSTRUCT called\n");
+#if 0
+			if (copy_to_user((struct async_struct *) arg,
+					 info, sizeof(struct async_struct)))
+				return -EFAULT;
+#endif
+			return 0;
+
+		/*
+		 * Wait for any of the 4 modem inputs (DCD,RI,DSR,CTS) to change
+		 * - mask passed in arg for lines of interest
+		 *   (use |'ed TIOCM_RNG/DSR/CD/CTS for masking)
+		 * Caller should use TIOCGICOUNT to see which one it was
+		 */
+		case TIOCMIWAIT:
+			printk(KERN_INFO "rs_ioctl: TIOCMIWAIT: called\n");
+			return 0;
+		case TIOCSERGWILD:
+		case TIOCSERSWILD:
+			/* "setserial -W" is called in Debian boot */
+			printk (KERN_INFO "TIOCSER?WILD ioctl obsolete, ignored.\n");
+			return 0;
+
+		default:
+			return -ENOIOCTLCMD;
+		}
+	return 0;
+}
+
+#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK))
+
+static void rs_set_termios(struct tty_struct *tty, struct ktermios *old_termios)
+{
+	/* Handle turning off CRTSCTS */
+	if ((old_termios->c_cflag & CRTSCTS) &&
+	    !(tty->termios->c_cflag & CRTSCTS)) {
+		tty->hw_stopped = 0;
+		rs_start(tty);
+	}
+}
+/*
+ * This routine will shutdown a serial port; interrupts are disabled, and
+ * DTR is dropped if the hangup on close termio flag is on.
+ */
+static void shutdown(struct async_struct * info)
+{
+	unsigned long	flags;
+	struct serial_state *state;
+	int		retval;
+
+	if (!(info->flags & ASYNC_INITIALIZED)) return;
+
+	state = info->state;
+
+#ifdef SIMSERIAL_DEBUG
+	printk("Shutting down serial port %d (irq %d)....", info->line,
+	       state->irq);
+#endif
+
+	local_irq_save(flags);
+	{
+		/*
+		 * First unlink the serial port from the IRQ chain...
+		 */
+		if (info->next_port)
+			info->next_port->prev_port = info->prev_port;
+		if (info->prev_port)
+			info->prev_port->next_port = info->next_port;
+		else
+			IRQ_ports[state->irq] = info->next_port;
+
+		/*
+		 * Free the IRQ, if necessary
+		 */
+		if (state->irq && (!IRQ_ports[state->irq] ||
+				   !IRQ_ports[state->irq]->next_port)) {
+			if (IRQ_ports[state->irq]) {
+				free_irq(state->irq, NULL);
+				retval = request_irq(state->irq, rs_interrupt_single,
+						     IRQ_T(info), "serial", NULL);
+
+				if (retval)
+					printk(KERN_ERR "serial shutdown: request_irq: error %d"
+					       "  Couldn't reacquire IRQ.\n", retval);
+			} else
+				free_irq(state->irq, NULL);
+		}
+
+		if (info->xmit.buf) {
+			free_page((unsigned long) info->xmit.buf);
+			info->xmit.buf = NULL;
+		}
+
+		if (info->tty) set_bit(TTY_IO_ERROR, &info->tty->flags);
+
+		info->flags &= ~ASYNC_INITIALIZED;
+	}
+	local_irq_restore(flags);
+}
+
+/*
+ * ------------------------------------------------------------
+ * rs_close()
+ *
+ * This routine is called when the serial port gets closed.  First, we
+ * wait for the last remaining data to be sent.  Then, we unlink its
+ * async structure from the interrupt chain if necessary, and we free
+ * that IRQ if nothing is left in the chain.
+ * ------------------------------------------------------------
+ */
+static void rs_close(struct tty_struct *tty, struct file * filp)
+{
+	struct async_struct * info = (struct async_struct *)tty->driver_data;
+	struct serial_state *state;
+	unsigned long flags;
+
+	if (!info ) return;
+
+	state = info->state;
+
+	local_irq_save(flags);
+	if (tty_hung_up_p(filp)) {
+#ifdef SIMSERIAL_DEBUG
+		printk("rs_close: hung_up\n");
+#endif
+		local_irq_restore(flags);
+		return;
+	}
+#ifdef SIMSERIAL_DEBUG
+	printk("rs_close ttys%d, count = %d\n", info->line, state->count);
+#endif
+	if ((tty->count == 1) && (state->count != 1)) {
+		/*
+		 * Uh, oh.  tty->count is 1, which means that the tty
+		 * structure will be freed.  state->count should always
+		 * be one in these conditions.  If it's greater than
+		 * one, we've got real problems, since it means the
+		 * serial port won't be shutdown.
+		 */
+		printk(KERN_ERR "rs_close: bad serial port count; tty->count is 1, "
+		       "state->count is %d\n", state->count);
+		state->count = 1;
+	}
+	if (--state->count < 0) {
+		printk(KERN_ERR "rs_close: bad serial port count for ttys%d: %d\n",
+		       info->line, state->count);
+		state->count = 0;
+	}
+	if (state->count) {
+		local_irq_restore(flags);
+		return;
+	}
+	info->flags |= ASYNC_CLOSING;
+	local_irq_restore(flags);
+
+	/*
+	 * Now we wait for the transmit buffer to clear; and we notify
+	 * the line discipline to only process XON/XOFF characters.
+	 */
+	shutdown(info);
+	rs_flush_buffer(tty);
+	tty_ldisc_flush(tty);
+	info->event = 0;
+	info->tty = NULL;
+	if (info->blocked_open) {
+		if (info->close_delay)
+			schedule_timeout_interruptible(info->close_delay);
+		wake_up_interruptible(&info->open_wait);
+	}
+	info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
+	wake_up_interruptible(&info->close_wait);
+}
+
+/*
+ * rs_wait_until_sent() --- wait until the transmitter is empty
+ */
+static void rs_wait_until_sent(struct tty_struct *tty, int timeout)
+{
+}
+
+
+/*
+ * rs_hangup() --- called by tty_hangup() when a hangup is signaled.
+ */
+static void rs_hangup(struct tty_struct *tty)
+{
+	struct async_struct * info = (struct async_struct *)tty->driver_data;
+	struct serial_state *state = info->state;
+
+#ifdef SIMSERIAL_DEBUG
+	printk("rs_hangup: called\n");
+#endif
+
+	state = info->state;
+
+	rs_flush_buffer(tty);
+	if (info->flags & ASYNC_CLOSING)
+		return;
+	shutdown(info);
+
+	info->event = 0;
+	state->count = 0;
+	info->flags &= ~ASYNC_NORMAL_ACTIVE;
+	info->tty = NULL;
+	wake_up_interruptible(&info->open_wait);
+}
+
+
+static int get_async_struct(int line, struct async_struct **ret_info)
+{
+	struct async_struct *info;
+	struct serial_state *sstate;
+
+	sstate = rs_table + line;
+	sstate->count++;
+	if (sstate->info) {
+		*ret_info = sstate->info;
+		return 0;
+	}
+	info = kzalloc(sizeof(struct async_struct), GFP_KERNEL);
+	if (!info) {
+		sstate->count--;
+		return -ENOMEM;
+	}
+	init_waitqueue_head(&info->open_wait);
+	init_waitqueue_head(&info->close_wait);
+	init_waitqueue_head(&info->delta_msr_wait);
+	info->magic = SERIAL_MAGIC;
+	info->port = sstate->port;
+	info->flags = sstate->flags;
+	info->xmit_fifo_size = sstate->xmit_fifo_size;
+	info->line = line;
+	INIT_WORK(&info->work, do_softint);
+	info->state = sstate;
+	if (sstate->info) {
+		kfree(info);
+		*ret_info = sstate->info;
+		return 0;
+	}
+	*ret_info = sstate->info = info;
+	return 0;
+}
+
+static int
+startup(struct async_struct *info)
+{
+	unsigned long flags;
+	int	retval=0;
+	irq_handler_t handler;
+	struct serial_state *state= info->state;
+	unsigned long page;
+
+	page = get_zeroed_page(GFP_KERNEL);
+	if (!page)
+		return -ENOMEM;
+
+	local_irq_save(flags);
+
+	if (info->flags & ASYNC_INITIALIZED) {
+		free_page(page);
+		goto errout;
+	}
+
+	if (!state->port || !state->type) {
+		if (info->tty) set_bit(TTY_IO_ERROR, &info->tty->flags);
+		free_page(page);
+		goto errout;
+	}
+	if (info->xmit.buf)
+		free_page(page);
+	else
+		info->xmit.buf = (unsigned char *) page;
+
+#ifdef SIMSERIAL_DEBUG
+	printk("startup: ttys%d (irq %d)...", info->line, state->irq);
+#endif
+
+	/*
+	 * Allocate the IRQ if necessary
+	 */
+	if (state->irq && (!IRQ_ports[state->irq] ||
+			  !IRQ_ports[state->irq]->next_port)) {
+		if (IRQ_ports[state->irq]) {
+			retval = -EBUSY;
+			goto errout;
+		} else
+			handler = rs_interrupt_single;
+
+		retval = request_irq(state->irq, handler, IRQ_T(info), "simserial", NULL);
+		if (retval) {
+			if (capable(CAP_SYS_ADMIN)) {
+				if (info->tty)
+					set_bit(TTY_IO_ERROR,
+						&info->tty->flags);
+				retval = 0;
+			}
+			goto errout;
+		}
+	}
+
+	/*
+	 * Insert serial port into IRQ chain.
+	 */
+	info->prev_port = NULL;
+	info->next_port = IRQ_ports[state->irq];
+	if (info->next_port)
+		info->next_port->prev_port = info;
+	IRQ_ports[state->irq] = info;
+
+	if (info->tty) clear_bit(TTY_IO_ERROR, &info->tty->flags);
+
+	info->xmit.head = info->xmit.tail = 0;
+
+#if 0
+	/*
+	 * Set up serial timers...
+	 */
+	timer_table[RS_TIMER].expires = jiffies + 2*HZ/100;
+	timer_active |= 1 << RS_TIMER;
+#endif
+
+	/*
+	 * Set up the tty->alt_speed kludge
+	 */
+	if (info->tty) {
+		if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI)
+			info->tty->alt_speed = 57600;
+		if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI)
+			info->tty->alt_speed = 115200;
+		if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI)
+			info->tty->alt_speed = 230400;
+		if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP)
+			info->tty->alt_speed = 460800;
+	}
+
+	info->flags |= ASYNC_INITIALIZED;
+	local_irq_restore(flags);
+	return 0;
+
+errout:
+	local_irq_restore(flags);
+	return retval;
+}
+
+
+/*
+ * This routine is called whenever a serial port is opened.  It
+ * enables interrupts for a serial port, linking in its async structure into
+ * the IRQ chain.   It also performs the serial-specific
+ * initialization for the tty structure.
+ */
+static int rs_open(struct tty_struct *tty, struct file * filp)
+{
+	struct async_struct	*info;
+	int			retval, line;
+	unsigned long		page;
+
+	line = tty->index;
+	if ((line < 0) || (line >= NR_PORTS))
+		return -ENODEV;
+	retval = get_async_struct(line, &info);
+	if (retval)
+		return retval;
+	tty->driver_data = info;
+	info->tty = tty;
+
+#ifdef SIMSERIAL_DEBUG
+	printk("rs_open %s, count = %d\n", tty->name, info->state->count);
+#endif
+	info->tty->low_latency = (info->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+
+	if (!tmp_buf) {
+		page = get_zeroed_page(GFP_KERNEL);
+		if (!page)
+			return -ENOMEM;
+		if (tmp_buf)
+			free_page(page);
+		else
+			tmp_buf = (unsigned char *) page;
+	}
+
+	/*
+	 * If the port is the middle of closing, bail out now
+	 */
+	if (tty_hung_up_p(filp) ||
+	    (info->flags & ASYNC_CLOSING)) {
+		if (info->flags & ASYNC_CLOSING)
+			interruptible_sleep_on(&info->close_wait);
+#ifdef SERIAL_DO_RESTART
+		return ((info->flags & ASYNC_HUP_NOTIFY) ?
+			-EAGAIN : -ERESTARTSYS);
+#else
+		return -EAGAIN;
+#endif
+	}
+
+	/*
+	 * Start up serial port
+	 */
+	retval = startup(info);
+	if (retval) {
+		return retval;
+	}
+
+	/*
+	 * figure out which console to use (should be one already)
+	 */
+	console = console_drivers;
+	while (console) {
+		if ((console->flags & CON_ENABLED) && console->write) break;
+		console = console->next;
+	}
+
+#ifdef SIMSERIAL_DEBUG
+	printk("rs_open ttys%d successful\n", info->line);
+#endif
+	return 0;
+}
+
+/*
+ * /proc fs routines....
+ */
+
+static inline void line_info(struct seq_file *m, struct serial_state *state)
+{
+	seq_printf(m, "%d: uart:%s port:%lX irq:%d\n",
+		       state->line, uart_config[state->type].name,
+		       state->port, state->irq);
+}
+
+static int rs_proc_show(struct seq_file *m, void *v)
+{
+	int i;
+
+	seq_printf(m, "simserinfo:1.0 driver:%s\n", serial_version);
+	for (i = 0; i < NR_PORTS; i++)
+		line_info(m, &rs_table[i]);
+	return 0;
+}
+
+static int rs_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, rs_proc_show, NULL);
+}
+
+static const struct file_operations rs_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= rs_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+/*
+ * ---------------------------------------------------------------------
+ * rs_init() and friends
+ *
+ * rs_init() is called at boot-time to initialize the serial driver.
+ * ---------------------------------------------------------------------
+ */
+
+/*
+ * This routine prints out the appropriate serial driver version
+ * number, and identifies which options were configured into this
+ * driver.
+ */
+static inline void show_serial_version(void)
+{
+	printk(KERN_INFO "%s version %s with", serial_name, serial_version);
+	printk(KERN_INFO " no serial options enabled\n");
+}
+
+static const struct tty_operations hp_ops = {
+	.open = rs_open,
+	.close = rs_close,
+	.write = rs_write,
+	.put_char = rs_put_char,
+	.flush_chars = rs_flush_chars,
+	.write_room = rs_write_room,
+	.chars_in_buffer = rs_chars_in_buffer,
+	.flush_buffer = rs_flush_buffer,
+	.ioctl = rs_ioctl,
+	.throttle = rs_throttle,
+	.unthrottle = rs_unthrottle,
+	.send_xchar = rs_send_xchar,
+	.set_termios = rs_set_termios,
+	.stop = rs_stop,
+	.start = rs_start,
+	.hangup = rs_hangup,
+	.wait_until_sent = rs_wait_until_sent,
+	.proc_fops = &rs_proc_fops,
+};
+
+/*
+ * The serial driver boot-time initialization code!
+ */
+static int __init
+simrs_init (void)
+{
+	int			i, rc;
+	struct serial_state	*state;
+
+	if (!ia64_platform_is("hpsim"))
+		return -ENODEV;
+
+	hp_simserial_driver = alloc_tty_driver(1);
+	if (!hp_simserial_driver)
+		return -ENOMEM;
+
+	show_serial_version();
+
+	/* Initialize the tty_driver structure */
+
+	hp_simserial_driver->owner = THIS_MODULE;
+	hp_simserial_driver->driver_name = "simserial";
+	hp_simserial_driver->name = "ttyS";
+	hp_simserial_driver->major = TTY_MAJOR;
+	hp_simserial_driver->minor_start = 64;
+	hp_simserial_driver->type = TTY_DRIVER_TYPE_SERIAL;
+	hp_simserial_driver->subtype = SERIAL_TYPE_NORMAL;
+	hp_simserial_driver->init_termios = tty_std_termios;
+	hp_simserial_driver->init_termios.c_cflag =
+		B9600 | CS8 | CREAD | HUPCL | CLOCAL;
+	hp_simserial_driver->flags = TTY_DRIVER_REAL_RAW;
+	tty_set_operations(hp_simserial_driver, &hp_ops);
+
+	/*
+	 * Let's have a little bit of fun !
+	 */
+	for (i = 0, state = rs_table; i < NR_PORTS; i++,state++) {
+
+		if (state->type == PORT_UNKNOWN) continue;
+
+		if (!state->irq) {
+			if ((rc = assign_irq_vector(AUTO_ASSIGN)) < 0)
+				panic("%s: out of interrupt vectors!\n",
+				      __func__);
+			state->irq = rc;
+			ia64_ssc_connect_irq(KEYBOARD_INTR, state->irq);
+		}
+
+		printk(KERN_INFO "ttyS%d at 0x%04lx (irq = %d) is a %s\n",
+		       state->line,
+		       state->port, state->irq,
+		       uart_config[state->type].name);
+	}
+
+	if (tty_register_driver(hp_simserial_driver))
+		panic("Couldn't register simserial driver\n");
+
+	return 0;
+}
+
+#ifndef MODULE
+__initcall(simrs_init);
+#endif
diff --git a/arch/ia64/hp/zx1/Makefile b/arch/ia64/hp/zx1/Makefile
new file mode 100644
index 00000000..61e87872
--- /dev/null
+++ b/arch/ia64/hp/zx1/Makefile
@@ -0,0 +1,8 @@
+#
+# ia64/hp/zx1/Makefile
+#
+# Copyright (C) 2002 Hewlett Packard
+# Copyright (C) Alex Williamson (alex_williamson@hp.com)
+#
+
+obj-$(CONFIG_IA64_GENERIC) += hpzx1_machvec.o hpzx1_swiotlb_machvec.o
diff --git a/arch/ia64/hp/zx1/hpzx1_machvec.c b/arch/ia64/hp/zx1/hpzx1_machvec.c
new file mode 100644
index 00000000..32518b0f
--- /dev/null
+++ b/arch/ia64/hp/zx1/hpzx1_machvec.c
@@ -0,0 +1,3 @@
+#define MACHVEC_PLATFORM_NAME		hpzx1
+#define MACHVEC_PLATFORM_HEADER		<asm/machvec_hpzx1.h>
+#include <asm/machvec_init.h>
diff --git a/arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c b/arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c
new file mode 100644
index 00000000..4392a96b
--- /dev/null
+++ b/arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c
@@ -0,0 +1,3 @@
+#define MACHVEC_PLATFORM_NAME		hpzx1_swiotlb
+#define MACHVEC_PLATFORM_HEADER		<asm/machvec_hpzx1_swiotlb.h>
+#include <asm/machvec_init.h>
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
new file mode 100644
index 00000000..241d1c53
--- /dev/null
+++ b/arch/ia64/include/asm/Kbuild
@@ -0,0 +1,14 @@
+include include/asm-generic/Kbuild.asm
+
+header-y += break.h
+header-y += fpu.h
+header-y += gcc_intrin.h
+header-y += ia64regs.h
+header-y += intel_intrin.h
+header-y += intrinsics.h
+header-y += perfmon.h
+header-y += perfmon_default_smpl.h
+header-y += ptrace_offsets.h
+header-y += rse.h
+header-y += ucontext.h
+header-y += ustack.h
diff --git a/arch/ia64/include/asm/acpi-ext.h b/arch/ia64/include/asm/acpi-ext.h
new file mode 100644
index 00000000..7f8362b3
--- /dev/null
+++ b/arch/ia64/include/asm/acpi-ext.h
@@ -0,0 +1,20 @@
+/*
+ * (c) Copyright 2003, 2006 Hewlett-Packard Development Company, L.P.
+ *	Alex Williamson <alex.williamson@hp.com>
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Vendor specific extensions to ACPI.
+ */
+
+#ifndef _ASM_IA64_ACPI_EXT_H
+#define _ASM_IA64_ACPI_EXT_H
+
+#include <linux/types.h>
+
+extern acpi_status hp_acpi_csr_space (acpi_handle, u64 *base, u64 *length);
+
+#endif /* _ASM_IA64_ACPI_EXT_H */
diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h
new file mode 100644
index 00000000..a06dfb13
--- /dev/null
+++ b/arch/ia64/include/asm/acpi.h
@@ -0,0 +1,199 @@
+/*
+ *  Copyright (C) 1999 VA Linux Systems
+ *  Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ *  Copyright (C) 2000,2001 J.I. Lee <jung-ik.lee@intel.com>
+ *  Copyright (C) 2001,2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#ifndef _ASM_ACPI_H
+#define _ASM_ACPI_H
+
+#ifdef __KERNEL__
+
+#include <acpi/pdc_intel.h>
+
+#include <linux/init.h>
+#include <linux/numa.h>
+#include <asm/system.h>
+#include <asm/numa.h>
+
+#define COMPILER_DEPENDENT_INT64	long
+#define COMPILER_DEPENDENT_UINT64	unsigned long
+
+/*
+ * Calling conventions:
+ *
+ * ACPI_SYSTEM_XFACE        - Interfaces to host OS (handlers, threads)
+ * ACPI_EXTERNAL_XFACE      - External ACPI interfaces
+ * ACPI_INTERNAL_XFACE      - Internal ACPI interfaces
+ * ACPI_INTERNAL_VAR_XFACE  - Internal variable-parameter list interfaces
+ */
+#define ACPI_SYSTEM_XFACE
+#define ACPI_EXTERNAL_XFACE
+#define ACPI_INTERNAL_XFACE
+#define ACPI_INTERNAL_VAR_XFACE
+
+/* Asm macros */
+
+#define ACPI_ASM_MACROS
+#define BREAKPOINT3
+#define ACPI_DISABLE_IRQS() local_irq_disable()
+#define ACPI_ENABLE_IRQS()  local_irq_enable()
+#define ACPI_FLUSH_CPU_CACHE()
+
+static inline int
+ia64_acpi_acquire_global_lock (unsigned int *lock)
+{
+	unsigned int old, new, val;
+	do {
+		old = *lock;
+		new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1));
+		val = ia64_cmpxchg4_acq(lock, new, old);
+	} while (unlikely (val != old));
+	return (new < 3) ? -1 : 0;
+}
+
+static inline int
+ia64_acpi_release_global_lock (unsigned int *lock)
+{
+	unsigned int old, new, val;
+	do {
+		old = *lock;
+		new = old & ~0x3;
+		val = ia64_cmpxchg4_acq(lock, new, old);
+	} while (unlikely (val != old));
+	return old & 0x1;
+}
+
+#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq)				\
+	((Acq) = ia64_acpi_acquire_global_lock(&facs->global_lock))
+
+#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq)				\
+	((Acq) = ia64_acpi_release_global_lock(&facs->global_lock))
+
+#ifdef	CONFIG_ACPI
+#define acpi_disabled 0	/* ACPI always enabled on IA64 */
+#define acpi_noirq 0	/* ACPI always enabled on IA64 */
+#define acpi_pci_disabled 0 /* ACPI PCI always enabled on IA64 */
+#define acpi_strict 1	/* no ACPI spec workarounds on IA64 */
+#endif
+#define acpi_processor_cstate_check(x) (x) /* no idle limits on IA64 :) */
+static inline void disable_acpi(void) { }
+static inline void pci_acpi_crs_quirks(void) { }
+
+#ifdef CONFIG_IA64_GENERIC
+const char *acpi_get_sysname (void);
+#else
+static inline const char *acpi_get_sysname (void)
+{
+# if defined (CONFIG_IA64_HP_SIM)
+	return "hpsim";
+# elif defined (CONFIG_IA64_HP_ZX1)
+	return "hpzx1";
+# elif defined (CONFIG_IA64_HP_ZX1_SWIOTLB)
+	return "hpzx1_swiotlb";
+# elif defined (CONFIG_IA64_SGI_SN2)
+	return "sn2";
+# elif defined (CONFIG_IA64_SGI_UV)
+	return "uv";
+# elif defined (CONFIG_IA64_DIG)
+	return "dig";
+# elif defined (CONFIG_IA64_XEN_GUEST)
+	return "xen";
+# elif defined(CONFIG_IA64_DIG_VTD)
+	return "dig_vtd";
+# else
+#	error Unknown platform.  Fix acpi.c.
+# endif
+}
+#endif
+int acpi_request_vector (u32 int_type);
+int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
+
+/* Low-level suspend routine. */
+extern int acpi_suspend_lowlevel(void);
+
+extern unsigned long acpi_wakeup_address;
+
+/*
+ * Record the cpei override flag and current logical cpu. This is
+ * useful for CPU removal.
+ */
+extern unsigned int can_cpei_retarget(void);
+extern unsigned int is_cpu_cpei_target(unsigned int cpu);
+extern void set_cpei_target_cpu(unsigned int cpu);
+extern unsigned int get_cpei_target_cpu(void);
+extern void prefill_possible_map(void);
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+extern int additional_cpus;
+#else
+#define additional_cpus 0
+#endif
+
+#ifdef CONFIG_ACPI_NUMA
+#if MAX_NUMNODES > 256
+#define MAX_PXM_DOMAINS MAX_NUMNODES
+#else
+#define MAX_PXM_DOMAINS (256)
+#endif
+extern int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS];
+extern int __initdata nid_to_pxm_map[MAX_NUMNODES];
+#endif
+
+static inline bool arch_has_acpi_pdc(void) { return true; }
+static inline void arch_acpi_set_pdc_bits(u32 *buf)
+{
+	buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP;
+}
+
+#define acpi_unlazy_tlb(x)
+
+#ifdef CONFIG_ACPI_NUMA
+extern cpumask_t early_cpu_possible_map;
+#define for_each_possible_early_cpu(cpu)  \
+	for_each_cpu_mask((cpu), early_cpu_possible_map)
+
+static inline void per_cpu_scan_finalize(int min_cpus, int reserve_cpus)
+{
+	int low_cpu, high_cpu;
+	int cpu;
+	int next_nid = 0;
+
+	low_cpu = cpus_weight(early_cpu_possible_map);
+
+	high_cpu = max(low_cpu, min_cpus);
+	high_cpu = min(high_cpu + reserve_cpus, NR_CPUS);
+
+	for (cpu = low_cpu; cpu < high_cpu; cpu++) {
+		cpu_set(cpu, early_cpu_possible_map);
+		if (node_cpuid[cpu].nid == NUMA_NO_NODE) {
+			node_cpuid[cpu].nid = next_nid;
+			next_nid++;
+			if (next_nid >= num_online_nodes())
+				next_nid = 0;
+		}
+	}
+}
+#endif /* CONFIG_ACPI_NUMA */
+
+#endif /*__KERNEL__*/
+
+#endif /*_ASM_ACPI_H*/
diff --git a/arch/ia64/include/asm/agp.h b/arch/ia64/include/asm/agp.h
new file mode 100644
index 00000000..01d09c40
--- /dev/null
+++ b/arch/ia64/include/asm/agp.h
@@ -0,0 +1,26 @@
+#ifndef _ASM_IA64_AGP_H
+#define _ASM_IA64_AGP_H
+
+/*
+ * IA-64 specific AGP definitions.
+ *
+ * Copyright (C) 2002-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+/*
+ * To avoid memory-attribute aliasing issues, we require that the AGPGART engine operate
+ * in coherent mode, which lets us map the AGP memory as normal (write-back) memory
+ * (unlike x86, where it gets mapped "write-coalescing").
+ */
+#define map_page_into_agp(page)		/* nothing */
+#define unmap_page_from_agp(page)	/* nothing */
+#define flush_agp_cache()		mb()
+
+/* GATT allocation. Returns/accepts GATT kernel virtual address. */
+#define alloc_gatt_pages(order)		\
+	((char *)__get_free_pages(GFP_KERNEL, (order)))
+#define free_gatt_pages(table, order)	\
+	free_pages((unsigned long)(table), (order))
+
+#endif /* _ASM_IA64_AGP_H */
diff --git a/arch/ia64/include/asm/asm-offsets.h b/arch/ia64/include/asm/asm-offsets.h
new file mode 100644
index 00000000..d370ee36
--- /dev/null
+++ b/arch/ia64/include/asm/asm-offsets.h
@@ -0,0 +1 @@
+#include <generated/asm-offsets.h>
diff --git a/arch/ia64/include/asm/asmmacro.h b/arch/ia64/include/asm/asmmacro.h
new file mode 100644
index 00000000..3ab6d75a
--- /dev/null
+++ b/arch/ia64/include/asm/asmmacro.h
@@ -0,0 +1,135 @@
+#ifndef _ASM_IA64_ASMMACRO_H
+#define _ASM_IA64_ASMMACRO_H
+
+/*
+ * Copyright (C) 2000-2001, 2003-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+
+#define ENTRY(name)				\
+	.align 32;				\
+	.proc name;				\
+name:
+
+#define ENTRY_MIN_ALIGN(name)			\
+	.align 16;				\
+	.proc name;				\
+name:
+
+#define GLOBAL_ENTRY(name)			\
+	.global name;				\
+	ENTRY(name)
+
+#define END(name)				\
+	.endp name
+
+/*
+ * Helper macros to make unwind directives more readable:
+ */
+
+/* prologue_gr: */
+#define ASM_UNW_PRLG_RP			0x8
+#define ASM_UNW_PRLG_PFS		0x4
+#define ASM_UNW_PRLG_PSP		0x2
+#define ASM_UNW_PRLG_PR			0x1
+#define ASM_UNW_PRLG_GRSAVE(ninputs)	(32+(ninputs))
+
+/*
+ * Helper macros for accessing user memory.
+ *
+ * When adding any new .section/.previous entries here, make sure to
+ * also add it to the DISCARD section in arch/ia64/kernel/gate.lds.S or
+ * unpleasant things will happen.
+ */
+
+	.section "__ex_table", "a"		// declare section & section attributes
+	.previous
+
+# define EX(y,x...)				\
+	.xdata4 "__ex_table", 99f-., y-.;	\
+  [99:]	x
+# define EXCLR(y,x...)				\
+	.xdata4 "__ex_table", 99f-., y-.+4;	\
+  [99:]	x
+
+/*
+ * Tag MCA recoverable instruction ranges.
+ */
+
+	.section "__mca_table", "a"		// declare section & section attributes
+	.previous
+
+# define MCA_RECOVER_RANGE(y)			\
+	.xdata4 "__mca_table", y-., 99f-.;	\
+  [99:]
+
+/*
+ * Mark instructions that need a load of a virtual address patched to be
+ * a load of a physical address.  We use this either in critical performance
+ * path (ivt.S - TLB miss processing) or in places where it might not be
+ * safe to use a "tpa" instruction (mca_asm.S - error recovery).
+ */
+	.section ".data..patch.vtop", "a"	// declare section & section attributes
+	.previous
+
+#define	LOAD_PHYSICAL(pr, reg, obj)		\
+[1:](pr)movl reg = obj;				\
+	.xdata4 ".data..patch.vtop", 1b-.
+
+/*
+ * For now, we always put in the McKinley E9 workaround.  On CPUs that don't need it,
+ * we'll patch out the work-around bundles with NOPs, so their impact is minimal.
+ */
+#define DO_MCKINLEY_E9_WORKAROUND
+
+#ifdef DO_MCKINLEY_E9_WORKAROUND
+	.section ".data..patch.mckinley_e9", "a"
+	.previous
+/* workaround for Itanium 2 Errata 9: */
+# define FSYS_RETURN					\
+	.xdata4 ".data..patch.mckinley_e9", 1f-.;	\
+1:{ .mib;						\
+	nop.m 0;					\
+	mov r16=ar.pfs;					\
+	br.call.sptk.many b7=2f;;			\
+  };							\
+2:{ .mib;						\
+	nop.m 0;					\
+	mov ar.pfs=r16;					\
+	br.ret.sptk.many b6;;				\
+  }
+#else
+# define FSYS_RETURN	br.ret.sptk.many b6
+#endif
+
+/*
+ * If physical stack register size is different from DEF_NUM_STACK_REG,
+ * dynamically patch the kernel for correct size.
+ */
+	.section ".data..patch.phys_stack_reg", "a"
+	.previous
+#define LOAD_PHYS_STACK_REG_SIZE(reg)			\
+[1:]	adds reg=IA64_NUM_PHYS_STACK_REG*8+8,r0;	\
+	.xdata4 ".data..patch.phys_stack_reg", 1b-.
+
+/*
+ * Up until early 2004, use of .align within a function caused bad unwind info.
+ * TEXT_ALIGN(n) expands into ".align n" if a fixed GAS is available or into nothing
+ * otherwise.
+ */
+#ifdef HAVE_WORKING_TEXT_ALIGN
+# define TEXT_ALIGN(n)	.align n
+#else
+# define TEXT_ALIGN(n)
+#endif
+
+#ifdef HAVE_SERIALIZE_DIRECTIVE
+# define dv_serialize_data		.serialize.data
+# define dv_serialize_instruction	.serialize.instruction
+#else
+# define dv_serialize_data
+# define dv_serialize_instruction
+#endif
+
+#endif /* _ASM_IA64_ASMMACRO_H */
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
new file mode 100644
index 00000000..44688143
--- /dev/null
+++ b/arch/ia64/include/asm/atomic.h
@@ -0,0 +1,220 @@
+#ifndef _ASM_IA64_ATOMIC_H
+#define _ASM_IA64_ATOMIC_H
+
+/*
+ * Atomic operations that C can't guarantee us.  Useful for
+ * resource counting etc..
+ *
+ * NOTE: don't mess with the types below!  The "unsigned long" and
+ * "int" types were carefully placed so as to ensure proper operation
+ * of the macros.
+ *
+ * Copyright (C) 1998, 1999, 2002-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/types.h>
+
+#include <asm/intrinsics.h>
+#include <asm/system.h>
+
+
+#define ATOMIC_INIT(i)		((atomic_t) { (i) })
+#define ATOMIC64_INIT(i)	((atomic64_t) { (i) })
+
+#define atomic_read(v)		(*(volatile int *)&(v)->counter)
+#define atomic64_read(v)	(*(volatile long *)&(v)->counter)
+
+#define atomic_set(v,i)		(((v)->counter) = (i))
+#define atomic64_set(v,i)	(((v)->counter) = (i))
+
+static __inline__ int
+ia64_atomic_add (int i, atomic_t *v)
+{
+	__s32 old, new;
+	CMPXCHG_BUGCHECK_DECL
+
+	do {
+		CMPXCHG_BUGCHECK(v);
+		old = atomic_read(v);
+		new = old + i;
+	} while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old);
+	return new;
+}
+
+static __inline__ long
+ia64_atomic64_add (__s64 i, atomic64_t *v)
+{
+	__s64 old, new;
+	CMPXCHG_BUGCHECK_DECL
+
+	do {
+		CMPXCHG_BUGCHECK(v);
+		old = atomic64_read(v);
+		new = old + i;
+	} while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old);
+	return new;
+}
+
+static __inline__ int
+ia64_atomic_sub (int i, atomic_t *v)
+{
+	__s32 old, new;
+	CMPXCHG_BUGCHECK_DECL
+
+	do {
+		CMPXCHG_BUGCHECK(v);
+		old = atomic_read(v);
+		new = old - i;
+	} while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old);
+	return new;
+}
+
+static __inline__ long
+ia64_atomic64_sub (__s64 i, atomic64_t *v)
+{
+	__s64 old, new;
+	CMPXCHG_BUGCHECK_DECL
+
+	do {
+		CMPXCHG_BUGCHECK(v);
+		old = atomic64_read(v);
+		new = old - i;
+	} while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old);
+	return new;
+}
+
+#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
+#define atomic64_cmpxchg(v, old, new) \
+	(cmpxchg(&((v)->counter), old, new))
+#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
+
+static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int c, old;
+	c = atomic_read(v);
+	for (;;) {
+		if (unlikely(c == (u)))
+			break;
+		old = atomic_cmpxchg((v), c, c + (a));
+		if (likely(old == c))
+			break;
+		c = old;
+	}
+	return c != (u);
+}
+
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
+static __inline__ long atomic64_add_unless(atomic64_t *v, long a, long u)
+{
+	long c, old;
+	c = atomic64_read(v);
+	for (;;) {
+		if (unlikely(c == (u)))
+			break;
+		old = atomic64_cmpxchg((v), c, c + (a));
+		if (likely(old == c))
+			break;
+		c = old;
+	}
+	return c != (u);
+}
+
+#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
+
+#define atomic_add_return(i,v)						\
+({									\
+	int __ia64_aar_i = (i);						\
+	(__builtin_constant_p(i)					\
+	 && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)		\
+	     || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)		\
+	     || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)		\
+	     || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))		\
+		? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter)	\
+		: ia64_atomic_add(__ia64_aar_i, v);			\
+})
+
+#define atomic64_add_return(i,v)					\
+({									\
+	long __ia64_aar_i = (i);					\
+	(__builtin_constant_p(i)					\
+	 && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)		\
+	     || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)		\
+	     || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)		\
+	     || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))		\
+		? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter)	\
+		: ia64_atomic64_add(__ia64_aar_i, v);			\
+})
+
+/*
+ * Atomically add I to V and return TRUE if the resulting value is
+ * negative.
+ */
+static __inline__ int
+atomic_add_negative (int i, atomic_t *v)
+{
+	return atomic_add_return(i, v) < 0;
+}
+
+static __inline__ long
+atomic64_add_negative (__s64 i, atomic64_t *v)
+{
+	return atomic64_add_return(i, v) < 0;
+}
+
+#define atomic_sub_return(i,v)						\
+({									\
+	int __ia64_asr_i = (i);						\
+	(__builtin_constant_p(i)					\
+	 && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)		\
+	     || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)		\
+	     || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)		\
+	     || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))	\
+		? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter)	\
+		: ia64_atomic_sub(__ia64_asr_i, v);			\
+})
+
+#define atomic64_sub_return(i,v)					\
+({									\
+	long __ia64_asr_i = (i);					\
+	(__builtin_constant_p(i)					\
+	 && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)		\
+	     || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)		\
+	     || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)		\
+	     || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))	\
+		? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter)	\
+		: ia64_atomic64_sub(__ia64_asr_i, v);			\
+})
+
+#define atomic_dec_return(v)		atomic_sub_return(1, (v))
+#define atomic_inc_return(v)		atomic_add_return(1, (v))
+#define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
+#define atomic64_inc_return(v)		atomic64_add_return(1, (v))
+
+#define atomic_sub_and_test(i,v)	(atomic_sub_return((i), (v)) == 0)
+#define atomic_dec_and_test(v)		(atomic_sub_return(1, (v)) == 0)
+#define atomic_inc_and_test(v)		(atomic_add_return(1, (v)) == 0)
+#define atomic64_sub_and_test(i,v)	(atomic64_sub_return((i), (v)) == 0)
+#define atomic64_dec_and_test(v)	(atomic64_sub_return(1, (v)) == 0)
+#define atomic64_inc_and_test(v)	(atomic64_add_return(1, (v)) == 0)
+
+#define atomic_add(i,v)			atomic_add_return((i), (v))
+#define atomic_sub(i,v)			atomic_sub_return((i), (v))
+#define atomic_inc(v)			atomic_add(1, (v))
+#define atomic_dec(v)			atomic_sub(1, (v))
+
+#define atomic64_add(i,v)		atomic64_add_return((i), (v))
+#define atomic64_sub(i,v)		atomic64_sub_return((i), (v))
+#define atomic64_inc(v)			atomic64_add(1, (v))
+#define atomic64_dec(v)			atomic64_sub(1, (v))
+
+/* Atomic operations are already serializing */
+#define smp_mb__before_atomic_dec()	barrier()
+#define smp_mb__after_atomic_dec()	barrier()
+#define smp_mb__before_atomic_inc()	barrier()
+#define smp_mb__after_atomic_inc()	barrier()
+
+#include <asm-generic/atomic-long.h>
+#endif /* _ASM_IA64_ATOMIC_H */
diff --git a/arch/ia64/include/asm/auxvec.h b/arch/ia64/include/asm/auxvec.h
new file mode 100644
index 00000000..23cebe56
--- /dev/null
+++ b/arch/ia64/include/asm/auxvec.h
@@ -0,0 +1,11 @@
+#ifndef _ASM_IA64_AUXVEC_H
+#define _ASM_IA64_AUXVEC_H
+
+/*
+ * Architecture-neutral AT_ values are in the range 0-17.  Leave some room for more of
+ * them, start the architecture-specific ones at 32.
+ */
+#define AT_SYSINFO	32
+#define AT_SYSINFO_EHDR	33
+
+#endif /* _ASM_IA64_AUXVEC_H */
diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h
new file mode 100644
index 00000000..b76f7e00
--- /dev/null
+++ b/arch/ia64/include/asm/bitops.h
@@ -0,0 +1,468 @@
+#ifndef _ASM_IA64_BITOPS_H
+#define _ASM_IA64_BITOPS_H
+
+/*
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 02/06/02 find_next_bit() and find_first_bit() added from Erich Focht's ia64
+ * O(1) scheduler patch
+ */
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/intrinsics.h>
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.  See __set_bit()
+ * if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ *
+ * The address must be (at least) "long" aligned.
+ * Note that there are driver (e.g., eepro100) which use these operations to
+ * operate on hw-defined data-structures, so we can't easily change these
+ * operations to force a bigger alignment.
+ *
+ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
+ */
+static __inline__ void
+set_bit (int nr, volatile void *addr)
+{
+	__u32 bit, old, new;
+	volatile __u32 *m;
+	CMPXCHG_BUGCHECK_DECL
+
+	m = (volatile __u32 *) addr + (nr >> 5);
+	bit = 1 << (nr & 31);
+	do {
+		CMPXCHG_BUGCHECK(m);
+		old = *m;
+		new = old | bit;
+	} while (cmpxchg_acq(m, old, new) != old);
+}
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void
+__set_bit (int nr, volatile void *addr)
+{
+	*((__u32 *) addr + (nr >> 5)) |= (1 << (nr & 31));
+}
+
+/*
+ * clear_bit() has "acquire" semantics.
+ */
+#define smp_mb__before_clear_bit()	smp_mb()
+#define smp_mb__after_clear_bit()	do { /* skip */; } while (0)
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered.  However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static __inline__ void
+clear_bit (int nr, volatile void *addr)
+{
+	__u32 mask, old, new;
+	volatile __u32 *m;
+	CMPXCHG_BUGCHECK_DECL
+
+	m = (volatile __u32 *) addr + (nr >> 5);
+	mask = ~(1 << (nr & 31));
+	do {
+		CMPXCHG_BUGCHECK(m);
+		old = *m;
+		new = old & mask;
+	} while (cmpxchg_acq(m, old, new) != old);
+}
+
+/**
+ * clear_bit_unlock - Clears a bit in memory with release
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit_unlock() is atomic and may not be reordered.  It does
+ * contain a memory barrier suitable for unlock type operations.
+ */
+static __inline__ void
+clear_bit_unlock (int nr, volatile void *addr)
+{
+	__u32 mask, old, new;
+	volatile __u32 *m;
+	CMPXCHG_BUGCHECK_DECL
+
+	m = (volatile __u32 *) addr + (nr >> 5);
+	mask = ~(1 << (nr & 31));
+	do {
+		CMPXCHG_BUGCHECK(m);
+		old = *m;
+		new = old & mask;
+	} while (cmpxchg_rel(m, old, new) != old);
+}
+
+/**
+ * __clear_bit_unlock - Non-atomically clears a bit in memory with release
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * Similarly to clear_bit_unlock, the implementation uses a store
+ * with release semantics. See also arch_spin_unlock().
+ */
+static __inline__ void
+__clear_bit_unlock(int nr, void *addr)
+{
+	__u32 * const m = (__u32 *) addr + (nr >> 5);
+	__u32 const new = *m & ~(1 << (nr & 31));
+
+	ia64_st4_rel_nta(m, new);
+}
+
+/**
+ * __clear_bit - Clears a bit in memory (non-atomic version)
+ * @nr: the bit to clear
+ * @addr: the address to start counting from
+ *
+ * Unlike clear_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void
+__clear_bit (int nr, volatile void *addr)
+{
+	*((__u32 *) addr + (nr >> 5)) &= ~(1 << (nr & 31));
+}
+
+/**
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to toggle
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static __inline__ void
+change_bit (int nr, volatile void *addr)
+{
+	__u32 bit, old, new;
+	volatile __u32 *m;
+	CMPXCHG_BUGCHECK_DECL
+
+	m = (volatile __u32 *) addr + (nr >> 5);
+	bit = (1 << (nr & 31));
+	do {
+		CMPXCHG_BUGCHECK(m);
+		old = *m;
+		new = old ^ bit;
+	} while (cmpxchg_acq(m, old, new) != old);
+}
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to toggle
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void
+__change_bit (int nr, volatile void *addr)
+{
+	*((__u32 *) addr + (nr >> 5)) ^= (1 << (nr & 31));
+}
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies the acquisition side of the memory barrier.
+ */
+static __inline__ int
+test_and_set_bit (int nr, volatile void *addr)
+{
+	__u32 bit, old, new;
+	volatile __u32 *m;
+	CMPXCHG_BUGCHECK_DECL
+
+	m = (volatile __u32 *) addr + (nr >> 5);
+	bit = 1 << (nr & 31);
+	do {
+		CMPXCHG_BUGCHECK(m);
+		old = *m;
+		new = old | bit;
+	} while (cmpxchg_acq(m, old, new) != old);
+	return (old & bit) != 0;
+}
+
+/**
+ * test_and_set_bit_lock - Set a bit and return its old value for lock
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This is the same as test_and_set_bit on ia64
+ */
+#define test_and_set_bit_lock test_and_set_bit
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.  
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static __inline__ int
+__test_and_set_bit (int nr, volatile void *addr)
+{
+	__u32 *p = (__u32 *) addr + (nr >> 5);
+	__u32 m = 1 << (nr & 31);
+	int oldbitset = (*p & m) != 0;
+
+	*p |= m;
+	return oldbitset;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies the acquisition side of the memory barrier.
+ */
+static __inline__ int
+test_and_clear_bit (int nr, volatile void *addr)
+{
+	__u32 mask, old, new;
+	volatile __u32 *m;
+	CMPXCHG_BUGCHECK_DECL
+
+	m = (volatile __u32 *) addr + (nr >> 5);
+	mask = ~(1 << (nr & 31));
+	do {
+		CMPXCHG_BUGCHECK(m);
+		old = *m;
+		new = old & mask;
+	} while (cmpxchg_acq(m, old, new) != old);
+	return (old & ~mask) != 0;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.  
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static __inline__ int
+__test_and_clear_bit(int nr, volatile void * addr)
+{
+	__u32 *p = (__u32 *) addr + (nr >> 5);
+	__u32 m = 1 << (nr & 31);
+	int oldbitset = (*p & m) != 0;
+
+	*p &= ~m;
+	return oldbitset;
+}
+
+/**
+ * test_and_change_bit - Change a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies the acquisition side of the memory barrier.
+ */
+static __inline__ int
+test_and_change_bit (int nr, volatile void *addr)
+{
+	__u32 bit, old, new;
+	volatile __u32 *m;
+	CMPXCHG_BUGCHECK_DECL
+
+	m = (volatile __u32 *) addr + (nr >> 5);
+	bit = (1 << (nr & 31));
+	do {
+		CMPXCHG_BUGCHECK(m);
+		old = *m;
+		new = old ^ bit;
+	} while (cmpxchg_acq(m, old, new) != old);
+	return (old & bit) != 0;
+}
+
+/**
+ * __test_and_change_bit - Change a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ */
+static __inline__ int
+__test_and_change_bit (int nr, void *addr)
+{
+	__u32 old, bit = (1 << (nr & 31));
+	__u32 *m = (__u32 *) addr + (nr >> 5);
+
+	old = *m;
+	*m = old ^ bit;
+	return (old & bit) != 0;
+}
+
+static __inline__ int
+test_bit (int nr, const volatile void *addr)
+{
+	return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31));
+}
+
+/**
+ * ffz - find the first zero bit in a long word
+ * @x: The long word to find the bit in
+ *
+ * Returns the bit-number (0..63) of the first (least significant) zero bit.
+ * Undefined if no zero exists, so code should check against ~0UL first...
+ */
+static inline unsigned long
+ffz (unsigned long x)
+{
+	unsigned long result;
+
+	result = ia64_popcnt(x & (~x - 1));
+	return result;
+}
+
+/**
+ * __ffs - find first bit in word.
+ * @x: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static __inline__ unsigned long
+__ffs (unsigned long x)
+{
+	unsigned long result;
+
+	result = ia64_popcnt((x-1) & ~x);
+	return result;
+}
+
+#ifdef __KERNEL__
+
+/*
+ * Return bit number of last (most-significant) bit set.  Undefined
+ * for x==0.  Bits are numbered from 0..63 (e.g., ia64_fls(9) == 3).
+ */
+static inline unsigned long
+ia64_fls (unsigned long x)
+{
+	long double d = x;
+	long exp;
+
+	exp = ia64_getf_exp(d);
+	return exp - 0xffff;
+}
+
+/*
+ * Find the last (most significant) bit set.  Returns 0 for x==0 and
+ * bits are numbered from 1..32 (e.g., fls(9) == 4).
+ */
+static inline int
+fls (int t)
+{
+	unsigned long x = t & 0xffffffffu;
+
+	if (!x)
+		return 0;
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+	return ia64_popcnt(x);
+}
+
+/*
+ * Find the last (most significant) bit set.  Undefined for x==0.
+ * Bits are numbered from 0..63 (e.g., __fls(9) == 3).
+ */
+static inline unsigned long
+__fls (unsigned long x)
+{
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+	x |= x >> 32;
+	return ia64_popcnt(x) - 1;
+}
+
+#include <asm-generic/bitops/fls64.h>
+
+/*
+ * ffs: find first bit set. This is defined the same way as the libc and
+ * compiler builtin ffs routines, therefore differs in spirit from the above
+ * ffz (man ffs): it operates on "int" values only and the result value is the
+ * bit number + 1.  ffs(0) is defined to return zero.
+ */
+#define ffs(x)	__builtin_ffs(x)
+
+/*
+ * hweightN: returns the hamming weight (i.e. the number
+ * of bits set) of a N-bit word
+ */
+static __inline__ unsigned long __arch_hweight64(unsigned long x)
+{
+	unsigned long result;
+	result = ia64_popcnt(x);
+	return result;
+}
+
+#define __arch_hweight32(x) ((unsigned int) __arch_hweight64((x) & 0xfffffffful))
+#define __arch_hweight16(x) ((unsigned int) __arch_hweight64((x) & 0xfffful))
+#define __arch_hweight8(x)  ((unsigned int) __arch_hweight64((x) & 0xfful))
+
+#include <asm-generic/bitops/const_hweight.h>
+
+#endif /* __KERNEL__ */
+
+#include <asm-generic/bitops/find.h>
+
+#ifdef __KERNEL__
+
+#include <asm-generic/bitops/le.h>
+
+#define ext2_set_bit_atomic(l,n,a)	test_and_set_bit(n,a)
+#define ext2_clear_bit_atomic(l,n,a)	test_and_clear_bit(n,a)
+
+#include <asm-generic/bitops/sched.h>
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_IA64_BITOPS_H */
diff --git a/arch/ia64/include/asm/bitsperlong.h b/arch/ia64/include/asm/bitsperlong.h
new file mode 100644
index 00000000..ec4db3c9
--- /dev/null
+++ b/arch/ia64/include/asm/bitsperlong.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_IA64_BITSPERLONG_H
+#define __ASM_IA64_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_IA64_BITSPERLONG_H */
diff --git a/arch/ia64/include/asm/break.h b/arch/ia64/include/asm/break.h
new file mode 100644
index 00000000..e90c40ec
--- /dev/null
+++ b/arch/ia64/include/asm/break.h
@@ -0,0 +1,32 @@
+#ifndef _ASM_IA64_BREAK_H
+#define _ASM_IA64_BREAK_H
+
+/*
+ * IA-64 Linux break numbers.
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+/*
+ * OS-specific debug break numbers:
+ */
+#define __IA64_BREAK_KDB		0x80100
+#define __IA64_BREAK_KPROBE		0x81000 /* .. 0x81fff */
+#define __IA64_BREAK_JPROBE		0x82000
+
+/*
+ * OS-specific break numbers:
+ */
+#define __IA64_BREAK_SYSCALL		0x100000
+
+/*
+ * Xen specific break numbers:
+ */
+#define __IA64_XEN_HYPERCALL		0x1000
+/* [__IA64_XEN_HYPERPRIVOP_START, __IA64_XEN_HYPERPRIVOP_MAX] is used
+   for xen hyperprivops */
+#define __IA64_XEN_HYPERPRIVOP_START	0x1
+#define __IA64_XEN_HYPERPRIVOP_MAX	0x1a
+
+#endif /* _ASM_IA64_BREAK_H */
diff --git a/arch/ia64/include/asm/bug.h b/arch/ia64/include/asm/bug.h
new file mode 100644
index 00000000..823616b5
--- /dev/null
+++ b/arch/ia64/include/asm/bug.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_IA64_BUG_H
+#define _ASM_IA64_BUG_H
+
+#ifdef CONFIG_BUG
+#define ia64_abort()	__builtin_trap()
+#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); ia64_abort(); } while (0)
+
+/* should this BUG be made generic? */
+#define HAVE_ARCH_BUG
+#endif
+
+#include <asm-generic/bug.h>
+
+#endif
diff --git a/arch/ia64/include/asm/bugs.h b/arch/ia64/include/asm/bugs.h
new file mode 100644
index 00000000..433523e3
--- /dev/null
+++ b/arch/ia64/include/asm/bugs.h
@@ -0,0 +1,19 @@
+/*
+ * This is included by init/main.c to check for architecture-dependent bugs.
+ *
+ * Needs:
+ *	void check_bugs(void);
+ *
+ * Based on <asm-alpha/bugs.h>.
+ *
+ * Modified 1998, 1999, 2003
+ *	David Mosberger-Tang <davidm@hpl.hp.com>,  Hewlett-Packard Co.
+ */
+#ifndef _ASM_IA64_BUGS_H
+#define _ASM_IA64_BUGS_H
+
+#include <asm/processor.h>
+
+extern void check_bugs (void);
+
+#endif /* _ASM_IA64_BUGS_H */
diff --git a/arch/ia64/include/asm/byteorder.h b/arch/ia64/include/asm/byteorder.h
new file mode 100644
index 00000000..a8dd7355
--- /dev/null
+++ b/arch/ia64/include/asm/byteorder.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_IA64_BYTEORDER_H
+#define _ASM_IA64_BYTEORDER_H
+
+#include <linux/byteorder/little_endian.h>
+
+#endif /* _ASM_IA64_BYTEORDER_H */
diff --git a/arch/ia64/include/asm/cache.h b/arch/ia64/include/asm/cache.h
new file mode 100644
index 00000000..988254a7
--- /dev/null
+++ b/arch/ia64/include/asm/cache.h
@@ -0,0 +1,29 @@
+#ifndef _ASM_IA64_CACHE_H
+#define _ASM_IA64_CACHE_H
+
+
+/*
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+/* Bytes per L1 (data) cache line.  */
+#define L1_CACHE_SHIFT		CONFIG_IA64_L1_CACHE_SHIFT
+#define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
+
+#ifdef CONFIG_SMP
+# define SMP_CACHE_SHIFT	L1_CACHE_SHIFT
+# define SMP_CACHE_BYTES	L1_CACHE_BYTES
+#else
+  /*
+   * The "aligned" directive can only _increase_ alignment, so this is
+   * safe and provides an easy way to avoid wasting space on a
+   * uni-processor:
+   */
+# define SMP_CACHE_SHIFT	3
+# define SMP_CACHE_BYTES	(1 << 3)
+#endif
+
+#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+
+#endif /* _ASM_IA64_CACHE_H */
diff --git a/arch/ia64/include/asm/cacheflush.h b/arch/ia64/include/asm/cacheflush.h
new file mode 100644
index 00000000..429eefc9
--- /dev/null
+++ b/arch/ia64/include/asm/cacheflush.h
@@ -0,0 +1,54 @@
+#ifndef _ASM_IA64_CACHEFLUSH_H
+#define _ASM_IA64_CACHEFLUSH_H
+
+/*
+ * Copyright (C) 2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/page-flags.h>
+#include <linux/bitops.h>
+
+#include <asm/page.h>
+
+/*
+ * Cache flushing routines.  This is the kind of stuff that can be very expensive, so try
+ * to avoid them whenever possible.
+ */
+
+#define flush_cache_all()			do { } while (0)
+#define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
+#define flush_cache_range(vma, start, end)	do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define flush_icache_page(vma,page)		do { } while (0)
+#define flush_cache_vmap(start, end)		do { } while (0)
+#define flush_cache_vunmap(start, end)		do { } while (0)
+
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
+#define flush_dcache_page(page)			\
+do {						\
+	clear_bit(PG_arch_1, &(page)->flags);	\
+} while (0)
+
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+
+extern void flush_icache_range (unsigned long start, unsigned long end);
+extern void clflush_cache_range(void *addr, int size);
+
+
+#define flush_icache_user_range(vma, page, user_addr, len)					\
+do {												\
+	unsigned long _addr = (unsigned long) page_address(page) + ((user_addr) & ~PAGE_MASK);	\
+	flush_icache_range(_addr, _addr + (len));						\
+} while (0)
+
+#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
+do { memcpy(dst, src, len); \
+     flush_icache_user_range(vma, page, vaddr, len); \
+} while (0)
+#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
+	memcpy(dst, src, len)
+
+#endif /* _ASM_IA64_CACHEFLUSH_H */
diff --git a/arch/ia64/include/asm/checksum.h b/arch/ia64/include/asm/checksum.h
new file mode 100644
index 00000000..97af1550
--- /dev/null
+++ b/arch/ia64/include/asm/checksum.h
@@ -0,0 +1,79 @@
+#ifndef _ASM_IA64_CHECKSUM_H
+#define _ASM_IA64_CHECKSUM_H
+
+/*
+ * Modified 1998, 1999
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ */
+extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
+
+/*
+ * Computes the checksum of the TCP/UDP pseudo-header returns a 16-bit
+ * checksum, already complemented
+ */
+extern __sum16 csum_tcpudp_magic (__be32 saddr, __be32 daddr,
+					     unsigned short len,
+					     unsigned short proto,
+					     __wsum sum);
+
+extern __wsum csum_tcpudp_nofold (__be32 saddr, __be32 daddr,
+					unsigned short len,
+					unsigned short proto,
+					__wsum sum);
+
+/*
+ * Computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+
+/*
+ * Same as csum_partial, but copies from src while it checksums.
+ *
+ * Here it is even more important to align src and dst on a 32-bit (or
+ * even better 64-bit) boundary.
+ */
+extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst,
+						 int len, __wsum sum,
+						 int *errp);
+
+extern __wsum csum_partial_copy_nocheck(const void *src, void *dst,
+					       int len, __wsum sum);
+
+/*
+ * This routine is used for miscellaneous IP-like checksums, mainly in
+ * icmp.c
+ */
+extern __sum16 ip_compute_csum(const void *buff, int len);
+
+/*
+ * Fold a partial checksum without adding pseudo headers.
+ */
+static inline __sum16 csum_fold(__wsum csum)
+{
+	u32 sum = (__force u32)csum;
+	sum = (sum & 0xffff) + (sum >> 16);
+	sum = (sum & 0xffff) + (sum >> 16);
+	return (__force __sum16)~sum;
+}
+
+#define _HAVE_ARCH_IPV6_CSUM	1
+struct in6_addr;
+extern __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+	const struct in6_addr *daddr, __u32 len, unsigned short proto,
+	__wsum csum);
+
+#endif /* _ASM_IA64_CHECKSUM_H */
diff --git a/arch/ia64/include/asm/cpu.h b/arch/ia64/include/asm/cpu.h
new file mode 100644
index 00000000..fcca30b9
--- /dev/null
+++ b/arch/ia64/include/asm/cpu.h
@@ -0,0 +1,22 @@
+#ifndef _ASM_IA64_CPU_H_
+#define _ASM_IA64_CPU_H_
+
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <linux/topology.h>
+#include <linux/percpu.h>
+
+struct ia64_cpu {
+	struct cpu cpu;
+};
+
+DECLARE_PER_CPU(struct ia64_cpu, cpu_devices);
+
+DECLARE_PER_CPU(int, cpu_state);
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern int arch_register_cpu(int num);
+extern void arch_unregister_cpu(int);
+#endif
+
+#endif /* _ASM_IA64_CPU_H_ */
diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h
new file mode 100644
index 00000000..6073b187
--- /dev/null
+++ b/arch/ia64/include/asm/cputime.h
@@ -0,0 +1,110 @@
+/*
+ * Definitions for measuring cputime on ia64 machines.
+ *
+ * Based on <asm-powerpc/cputime.h>.
+ *
+ * Copyright (C) 2007 FUJITSU LIMITED
+ * Copyright (C) 2007 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in nsec.
+ * Otherwise we measure cpu time in jiffies using the generic definitions.
+ */
+
+#ifndef __IA64_CPUTIME_H
+#define __IA64_CPUTIME_H
+
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#include <asm-generic/cputime.h>
+#else
+
+#include <linux/time.h>
+#include <linux/jiffies.h>
+#include <asm/processor.h>
+
+typedef u64 cputime_t;
+typedef u64 cputime64_t;
+
+#define cputime_zero			((cputime_t)0)
+#define cputime_one_jiffy		jiffies_to_cputime(1)
+#define cputime_max			((~((cputime_t)0) >> 1) - 1)
+#define cputime_add(__a, __b)		((__a) +  (__b))
+#define cputime_sub(__a, __b)		((__a) -  (__b))
+#define cputime_div(__a, __n)		((__a) /  (__n))
+#define cputime_halve(__a)		((__a) >> 1)
+#define cputime_eq(__a, __b)		((__a) == (__b))
+#define cputime_gt(__a, __b)		((__a) >  (__b))
+#define cputime_ge(__a, __b)		((__a) >= (__b))
+#define cputime_lt(__a, __b)		((__a) <  (__b))
+#define cputime_le(__a, __b)		((__a) <= (__b))
+
+#define cputime64_zero			((cputime64_t)0)
+#define cputime64_add(__a, __b)		((__a) + (__b))
+#define cputime64_sub(__a, __b)		((__a) - (__b))
+#define cputime_to_cputime64(__ct)	(__ct)
+
+/*
+ * Convert cputime <-> jiffies (HZ)
+ */
+#define cputime_to_jiffies(__ct)	((__ct) / (NSEC_PER_SEC / HZ))
+#define jiffies_to_cputime(__jif)	((__jif) * (NSEC_PER_SEC / HZ))
+#define cputime64_to_jiffies64(__ct)	((__ct) / (NSEC_PER_SEC / HZ))
+#define jiffies64_to_cputime64(__jif)	((__jif) * (NSEC_PER_SEC / HZ))
+
+/*
+ * Convert cputime <-> microseconds
+ */
+#define cputime_to_usecs(__ct)		((__ct) / NSEC_PER_USEC)
+#define usecs_to_cputime(__usecs)	((__usecs) * NSEC_PER_USEC)
+
+/*
+ * Convert cputime <-> seconds
+ */
+#define cputime_to_secs(__ct)		((__ct) / NSEC_PER_SEC)
+#define secs_to_cputime(__secs)		((__secs) * NSEC_PER_SEC)
+
+/*
+ * Convert cputime <-> timespec (nsec)
+ */
+static inline cputime_t timespec_to_cputime(const struct timespec *val)
+{
+	cputime_t ret = val->tv_sec * NSEC_PER_SEC;
+	return (ret + val->tv_nsec);
+}
+static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
+{
+	val->tv_sec  = ct / NSEC_PER_SEC;
+	val->tv_nsec = ct % NSEC_PER_SEC;
+}
+
+/*
+ * Convert cputime <-> timeval (msec)
+ */
+static inline cputime_t timeval_to_cputime(struct timeval *val)
+{
+	cputime_t ret = val->tv_sec * NSEC_PER_SEC;
+	return (ret + val->tv_usec * NSEC_PER_USEC);
+}
+static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
+{
+	val->tv_sec = ct / NSEC_PER_SEC;
+	val->tv_usec = (ct % NSEC_PER_SEC) / NSEC_PER_USEC;
+}
+
+/*
+ * Convert cputime <-> clock (USER_HZ)
+ */
+#define cputime_to_clock_t(__ct)	((__ct) / (NSEC_PER_SEC / USER_HZ))
+#define clock_t_to_cputime(__x)		((__x) * (NSEC_PER_SEC / USER_HZ))
+
+/*
+ * Convert cputime64 to clock.
+ */
+#define cputime64_to_clock_t(__ct)      cputime_to_clock_t((cputime_t)__ct)
+
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* __IA64_CPUTIME_H */
diff --git a/arch/ia64/include/asm/current.h b/arch/ia64/include/asm/current.h
new file mode 100644
index 00000000..c659f90f
--- /dev/null
+++ b/arch/ia64/include/asm/current.h
@@ -0,0 +1,17 @@
+#ifndef _ASM_IA64_CURRENT_H
+#define _ASM_IA64_CURRENT_H
+
+/*
+ * Modified 1998-2000
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+
+#include <asm/intrinsics.h>
+
+/*
+ * In kernel mode, thread pointer (r13) is used to point to the current task
+ * structure.
+ */
+#define current	((struct task_struct *) ia64_getreg(_IA64_REG_TP))
+
+#endif /* _ASM_IA64_CURRENT_H */
diff --git a/arch/ia64/include/asm/cyclone.h b/arch/ia64/include/asm/cyclone.h
new file mode 100644
index 00000000..88f6500e
--- /dev/null
+++ b/arch/ia64/include/asm/cyclone.h
@@ -0,0 +1,15 @@
+#ifndef ASM_IA64_CYCLONE_H
+#define ASM_IA64_CYCLONE_H
+
+#ifdef	CONFIG_IA64_CYCLONE
+extern int use_cyclone;
+extern void __init cyclone_setup(void);
+#else	/* CONFIG_IA64_CYCLONE */
+#define use_cyclone 0
+static inline void cyclone_setup(void)
+{
+	printk(KERN_ERR "Cyclone Counter: System not configured"
+					" w/ CONFIG_IA64_CYCLONE.\n");
+}
+#endif	/* CONFIG_IA64_CYCLONE */
+#endif	/* !ASM_IA64_CYCLONE_H */
diff --git a/arch/ia64/include/asm/delay.h b/arch/ia64/include/asm/delay.h
new file mode 100644
index 00000000..a30a62f2
--- /dev/null
+++ b/arch/ia64/include/asm/delay.h
@@ -0,0 +1,88 @@
+#ifndef _ASM_IA64_DELAY_H
+#define _ASM_IA64_DELAY_H
+
+/*
+ * Delay routines using a pre-computed "cycles/usec" value.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/compiler.h>
+
+#include <asm/intrinsics.h>
+#include <asm/processor.h>
+
+static __inline__ void
+ia64_set_itm (unsigned long val)
+{
+	ia64_setreg(_IA64_REG_CR_ITM, val);
+	ia64_srlz_d();
+}
+
+static __inline__ unsigned long
+ia64_get_itm (void)
+{
+	unsigned long result;
+
+	result = ia64_getreg(_IA64_REG_CR_ITM);
+	ia64_srlz_d();
+	return result;
+}
+
+static __inline__ void
+ia64_set_itv (unsigned long val)
+{
+	ia64_setreg(_IA64_REG_CR_ITV, val);
+	ia64_srlz_d();
+}
+
+static __inline__ unsigned long
+ia64_get_itv (void)
+{
+	return ia64_getreg(_IA64_REG_CR_ITV);
+}
+
+static __inline__ void
+ia64_set_itc (unsigned long val)
+{
+	ia64_setreg(_IA64_REG_AR_ITC, val);
+	ia64_srlz_d();
+}
+
+static __inline__ unsigned long
+ia64_get_itc (void)
+{
+	unsigned long result;
+
+	result = ia64_getreg(_IA64_REG_AR_ITC);
+	ia64_barrier();
+#ifdef CONFIG_ITANIUM
+	while (unlikely((__s32) result == -1)) {
+		result = ia64_getreg(_IA64_REG_AR_ITC);
+		ia64_barrier();
+	}
+#endif
+	return result;
+}
+
+extern void ia64_delay_loop (unsigned long loops);
+
+static __inline__ void
+__delay (unsigned long loops)
+{
+	if (unlikely(loops < 1))
+		return;
+
+	ia64_delay_loop (loops - 1);
+}
+
+extern void udelay (unsigned long usecs);
+
+#endif /* _ASM_IA64_DELAY_H */
diff --git a/arch/ia64/include/asm/device.h b/arch/ia64/include/asm/device.h
new file mode 100644
index 00000000..d66d446b
--- /dev/null
+++ b/arch/ia64/include/asm/device.h
@@ -0,0 +1,21 @@
+/*
+ * Arch specific extensions to struct device
+ *
+ * This file is released under the GPLv2
+ */
+#ifndef _ASM_IA64_DEVICE_H
+#define _ASM_IA64_DEVICE_H
+
+struct dev_archdata {
+#ifdef CONFIG_ACPI
+	void	*acpi_handle;
+#endif
+#ifdef CONFIG_DMAR
+	void *iommu; /* hook for IOMMU specific extension */
+#endif
+};
+
+struct pdev_archdata {
+};
+
+#endif /* _ASM_IA64_DEVICE_H */
diff --git a/arch/ia64/include/asm/div64.h b/arch/ia64/include/asm/div64.h
new file mode 100644
index 00000000..6cd978ce
--- /dev/null
+++ b/arch/ia64/include/asm/div64.h
@@ -0,0 +1 @@
+#include <asm-generic/div64.h>
diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h
new file mode 100644
index 00000000..4336d080
--- /dev/null
+++ b/arch/ia64/include/asm/dma-mapping.h
@@ -0,0 +1,102 @@
+#ifndef _ASM_IA64_DMA_MAPPING_H
+#define _ASM_IA64_DMA_MAPPING_H
+
+/*
+ * Copyright (C) 2003-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <asm/machvec.h>
+#include <linux/scatterlist.h>
+#include <asm/swiotlb.h>
+#include <linux/dma-debug.h>
+
+#define ARCH_HAS_DMA_GET_REQUIRED_MASK
+
+#define DMA_ERROR_CODE 0
+
+extern struct dma_map_ops *dma_ops;
+extern struct ia64_machine_vector ia64_mv;
+extern void set_iommu_machvec(void);
+
+extern void machvec_dma_sync_single(struct device *, dma_addr_t, size_t,
+				    enum dma_data_direction);
+extern void machvec_dma_sync_sg(struct device *, struct scatterlist *, int,
+				enum dma_data_direction);
+
+static inline void *dma_alloc_coherent(struct device *dev, size_t size,
+				       dma_addr_t *daddr, gfp_t gfp)
+{
+	struct dma_map_ops *ops = platform_dma_get_ops(dev);
+	void *caddr;
+
+	caddr = ops->alloc_coherent(dev, size, daddr, gfp);
+	debug_dma_alloc_coherent(dev, size, *daddr, caddr);
+	return caddr;
+}
+
+static inline void dma_free_coherent(struct device *dev, size_t size,
+				     void *caddr, dma_addr_t daddr)
+{
+	struct dma_map_ops *ops = platform_dma_get_ops(dev);
+	debug_dma_free_coherent(dev, size, caddr, daddr);
+	ops->free_coherent(dev, size, caddr, daddr);
+}
+
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+
+#define get_dma_ops(dev) platform_dma_get_ops(dev)
+
+#include <asm-generic/dma-mapping-common.h>
+
+static inline int dma_mapping_error(struct device *dev, dma_addr_t daddr)
+{
+	struct dma_map_ops *ops = platform_dma_get_ops(dev);
+	return ops->mapping_error(dev, daddr);
+}
+
+static inline int dma_supported(struct device *dev, u64 mask)
+{
+	struct dma_map_ops *ops = platform_dma_get_ops(dev);
+	return ops->dma_supported(dev, mask);
+}
+
+static inline int
+dma_set_mask (struct device *dev, u64 mask)
+{
+	if (!dev->dma_mask || !dma_supported(dev, mask))
+		return -EIO;
+	*dev->dma_mask = mask;
+	return 0;
+}
+
+static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
+{
+	if (!dev->dma_mask)
+		return 0;
+
+	return addr + size - 1 <= *dev->dma_mask;
+}
+
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	return paddr;
+}
+
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+	return daddr;
+}
+
+static inline void
+dma_cache_sync (struct device *dev, void *vaddr, size_t size,
+	enum dma_data_direction dir)
+{
+	/*
+	 * IA-64 is cache-coherent, so this is mostly a no-op.  However, we do need to
+	 * ensure that dma_cache_sync() enforces order, hence the mb().
+	 */
+	mb();
+}
+
+#endif /* _ASM_IA64_DMA_MAPPING_H */
diff --git a/arch/ia64/include/asm/dma.h b/arch/ia64/include/asm/dma.h
new file mode 100644
index 00000000..4d97f60f
--- /dev/null
+++ b/arch/ia64/include/asm/dma.h
@@ -0,0 +1,24 @@
+#ifndef _ASM_IA64_DMA_H
+#define _ASM_IA64_DMA_H
+
+/*
+ * Copyright (C) 1998-2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+
+#include <asm/io.h>		/* need byte IO */
+
+extern unsigned long MAX_DMA_ADDRESS;
+
+#ifdef CONFIG_PCI
+  extern int isa_dma_bridge_buggy;
+#else
+# define isa_dma_bridge_buggy 	(0)
+#endif
+
+#define free_dma(x)
+
+void dma_mark_clean(void *addr, size_t size);
+
+#endif /* _ASM_IA64_DMA_H */
diff --git a/arch/ia64/include/asm/dmi.h b/arch/ia64/include/asm/dmi.h
new file mode 100644
index 00000000..1ed4c8fe
--- /dev/null
+++ b/arch/ia64/include/asm/dmi.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_DMI_H
+#define _ASM_DMI_H 1
+
+#include <linux/slab.h>
+#include <asm/io.h>
+
+/* Use normal IO mappings for DMI */
+#define dmi_ioremap ioremap
+#define dmi_iounmap(x,l) iounmap(x)
+#define dmi_alloc(l) kmalloc(l, GFP_ATOMIC)
+
+#endif
diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h
new file mode 100644
index 00000000..b5298eb0
--- /dev/null
+++ b/arch/ia64/include/asm/elf.h
@@ -0,0 +1,237 @@
+#ifndef _ASM_IA64_ELF_H
+#define _ASM_IA64_ELF_H
+
+/*
+ * ELF-specific definitions.
+ *
+ * Copyright (C) 1998-1999, 2002-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+
+#include <asm/fpu.h>
+#include <asm/page.h>
+#include <asm/auxvec.h>
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) ((x)->e_machine == EM_IA_64)
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS	ELFCLASS64
+#define ELF_DATA	ELFDATA2LSB
+#define ELF_ARCH	EM_IA_64
+
+#define CORE_DUMP_USE_REGSET
+
+/* Least-significant four bits of ELF header's e_flags are OS-specific.  The bits are
+   interpreted as follows by Linux: */
+#define EF_IA_64_LINUX_EXECUTABLE_STACK	0x1	/* is stack (& heap) executable by default? */
+
+#define ELF_EXEC_PAGESIZE	PAGE_SIZE
+
+/*
+ * This is the location that an ET_DYN program is loaded if exec'ed.
+ * Typical use of this is to invoke "./ld.so someprog" to test out a
+ * new version of the loader.  We need to make sure that it is out of
+ * the way of the program that it will "exec", and that there is
+ * sufficient room for the brk.
+ */
+#define ELF_ET_DYN_BASE		(TASK_UNMAPPED_BASE + 0x800000000UL)
+
+#define PT_IA_64_UNWIND		0x70000001
+
+/* IA-64 relocations: */
+#define R_IA64_NONE		0x00	/* none */
+#define R_IA64_IMM14		0x21	/* symbol + addend, add imm14 */
+#define R_IA64_IMM22		0x22	/* symbol + addend, add imm22 */
+#define R_IA64_IMM64		0x23	/* symbol + addend, mov imm64 */
+#define R_IA64_DIR32MSB		0x24	/* symbol + addend, data4 MSB */
+#define R_IA64_DIR32LSB		0x25	/* symbol + addend, data4 LSB */
+#define R_IA64_DIR64MSB		0x26	/* symbol + addend, data8 MSB */
+#define R_IA64_DIR64LSB		0x27	/* symbol + addend, data8 LSB */
+#define R_IA64_GPREL22		0x2a	/* @gprel(sym+add), add imm22 */
+#define R_IA64_GPREL64I		0x2b	/* @gprel(sym+add), mov imm64 */
+#define R_IA64_GPREL32MSB	0x2c	/* @gprel(sym+add), data4 MSB */
+#define R_IA64_GPREL32LSB	0x2d	/* @gprel(sym+add), data4 LSB */
+#define R_IA64_GPREL64MSB	0x2e	/* @gprel(sym+add), data8 MSB */
+#define R_IA64_GPREL64LSB	0x2f	/* @gprel(sym+add), data8 LSB */
+#define R_IA64_LTOFF22		0x32	/* @ltoff(sym+add), add imm22 */
+#define R_IA64_LTOFF64I		0x33	/* @ltoff(sym+add), mov imm64 */
+#define R_IA64_PLTOFF22		0x3a	/* @pltoff(sym+add), add imm22 */
+#define R_IA64_PLTOFF64I	0x3b	/* @pltoff(sym+add), mov imm64 */
+#define R_IA64_PLTOFF64MSB	0x3e	/* @pltoff(sym+add), data8 MSB */
+#define R_IA64_PLTOFF64LSB	0x3f	/* @pltoff(sym+add), data8 LSB */
+#define R_IA64_FPTR64I		0x43	/* @fptr(sym+add), mov imm64 */
+#define R_IA64_FPTR32MSB	0x44	/* @fptr(sym+add), data4 MSB */
+#define R_IA64_FPTR32LSB	0x45	/* @fptr(sym+add), data4 LSB */
+#define R_IA64_FPTR64MSB	0x46	/* @fptr(sym+add), data8 MSB */
+#define R_IA64_FPTR64LSB	0x47	/* @fptr(sym+add), data8 LSB */
+#define R_IA64_PCREL60B		0x48	/* @pcrel(sym+add), brl */
+#define R_IA64_PCREL21B		0x49	/* @pcrel(sym+add), ptb, call */
+#define R_IA64_PCREL21M		0x4a	/* @pcrel(sym+add), chk.s */
+#define R_IA64_PCREL21F		0x4b	/* @pcrel(sym+add), fchkf */
+#define R_IA64_PCREL32MSB	0x4c	/* @pcrel(sym+add), data4 MSB */
+#define R_IA64_PCREL32LSB	0x4d	/* @pcrel(sym+add), data4 LSB */
+#define R_IA64_PCREL64MSB	0x4e	/* @pcrel(sym+add), data8 MSB */
+#define R_IA64_PCREL64LSB	0x4f	/* @pcrel(sym+add), data8 LSB */
+#define R_IA64_LTOFF_FPTR22	0x52	/* @ltoff(@fptr(s+a)), imm22 */
+#define R_IA64_LTOFF_FPTR64I	0x53	/* @ltoff(@fptr(s+a)), imm64 */
+#define R_IA64_LTOFF_FPTR32MSB	0x54	/* @ltoff(@fptr(s+a)), 4 MSB */
+#define R_IA64_LTOFF_FPTR32LSB	0x55	/* @ltoff(@fptr(s+a)), 4 LSB */
+#define R_IA64_LTOFF_FPTR64MSB	0x56	/* @ltoff(@fptr(s+a)), 8 MSB */
+#define R_IA64_LTOFF_FPTR64LSB	0x57	/* @ltoff(@fptr(s+a)), 8 LSB */
+#define R_IA64_SEGREL32MSB	0x5c	/* @segrel(sym+add), data4 MSB */
+#define R_IA64_SEGREL32LSB	0x5d	/* @segrel(sym+add), data4 LSB */
+#define R_IA64_SEGREL64MSB	0x5e	/* @segrel(sym+add), data8 MSB */
+#define R_IA64_SEGREL64LSB	0x5f	/* @segrel(sym+add), data8 LSB */
+#define R_IA64_SECREL32MSB	0x64	/* @secrel(sym+add), data4 MSB */
+#define R_IA64_SECREL32LSB	0x65	/* @secrel(sym+add), data4 LSB */
+#define R_IA64_SECREL64MSB	0x66	/* @secrel(sym+add), data8 MSB */
+#define R_IA64_SECREL64LSB	0x67	/* @secrel(sym+add), data8 LSB */
+#define R_IA64_REL32MSB		0x6c	/* data 4 + REL */
+#define R_IA64_REL32LSB		0x6d	/* data 4 + REL */
+#define R_IA64_REL64MSB		0x6e	/* data 8 + REL */
+#define R_IA64_REL64LSB		0x6f	/* data 8 + REL */
+#define R_IA64_LTV32MSB		0x74	/* symbol + addend, data4 MSB */
+#define R_IA64_LTV32LSB		0x75	/* symbol + addend, data4 LSB */
+#define R_IA64_LTV64MSB		0x76	/* symbol + addend, data8 MSB */
+#define R_IA64_LTV64LSB		0x77	/* symbol + addend, data8 LSB */
+#define R_IA64_PCREL21BI	0x79	/* @pcrel(sym+add), ptb, call */
+#define R_IA64_PCREL22		0x7a	/* @pcrel(sym+add), imm22 */
+#define R_IA64_PCREL64I		0x7b	/* @pcrel(sym+add), imm64 */
+#define R_IA64_IPLTMSB		0x80	/* dynamic reloc, imported PLT, MSB */
+#define R_IA64_IPLTLSB		0x81	/* dynamic reloc, imported PLT, LSB */
+#define R_IA64_COPY		0x84	/* dynamic reloc, data copy */
+#define R_IA64_SUB		0x85	/* -symbol + addend, add imm22 */
+#define R_IA64_LTOFF22X		0x86	/* LTOFF22, relaxable.  */
+#define R_IA64_LDXMOV		0x87	/* Use of LTOFF22X.  */
+#define R_IA64_TPREL14		0x91	/* @tprel(sym+add), add imm14 */
+#define R_IA64_TPREL22		0x92	/* @tprel(sym+add), add imm22 */
+#define R_IA64_TPREL64I		0x93	/* @tprel(sym+add), add imm64 */
+#define R_IA64_TPREL64MSB	0x96	/* @tprel(sym+add), data8 MSB */
+#define R_IA64_TPREL64LSB	0x97	/* @tprel(sym+add), data8 LSB */
+#define R_IA64_LTOFF_TPREL22	0x9a	/* @ltoff(@tprel(s+a)), add imm22 */
+#define R_IA64_DTPMOD64MSB	0xa6	/* @dtpmod(sym+add), data8 MSB */
+#define R_IA64_DTPMOD64LSB	0xa7	/* @dtpmod(sym+add), data8 LSB */
+#define R_IA64_LTOFF_DTPMOD22	0xaa	/* @ltoff(@dtpmod(s+a)), imm22 */
+#define R_IA64_DTPREL14		0xb1	/* @dtprel(sym+add), imm14 */
+#define R_IA64_DTPREL22		0xb2	/* @dtprel(sym+add), imm22 */
+#define R_IA64_DTPREL64I	0xb3	/* @dtprel(sym+add), imm64 */
+#define R_IA64_DTPREL32MSB	0xb4	/* @dtprel(sym+add), data4 MSB */
+#define R_IA64_DTPREL32LSB	0xb5	/* @dtprel(sym+add), data4 LSB */
+#define R_IA64_DTPREL64MSB	0xb6	/* @dtprel(sym+add), data8 MSB */
+#define R_IA64_DTPREL64LSB	0xb7	/* @dtprel(sym+add), data8 LSB */
+#define R_IA64_LTOFF_DTPREL22	0xba	/* @ltoff(@dtprel(s+a)), imm22 */
+
+/* IA-64 specific section flags: */
+#define SHF_IA_64_SHORT		0x10000000	/* section near gp */
+
+/*
+ * We use (abuse?) this macro to insert the (empty) vm_area that is
+ * used to map the register backing store.  I don't see any better
+ * place to do this, but we should discuss this with Linus once we can
+ * talk to him...
+ */
+extern void ia64_init_addr_space (void);
+#define ELF_PLAT_INIT(_r, load_addr)	ia64_init_addr_space()
+
+/* ELF register definitions.  This is needed for core dump support.  */
+
+/*
+ * elf_gregset_t contains the application-level state in the following order:
+ *	r0-r31
+ *	NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
+ *	predicate registers (p0-p63)
+ *	b0-b7
+ *	ip cfm psr
+ *	ar.rsc ar.bsp ar.bspstore ar.rnat
+ *	ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
+ */
+#define ELF_NGREG	128	/* we really need just 72 but let's leave some headroom... */
+#define ELF_NFPREG	128	/* f0 and f1 could be omitted, but so what... */
+
+/* elf_gregset_t register offsets */
+#define ELF_GR_0_OFFSET     0
+#define ELF_NAT_OFFSET     (32 * sizeof(elf_greg_t))
+#define ELF_PR_OFFSET      (33 * sizeof(elf_greg_t))
+#define ELF_BR_0_OFFSET    (34 * sizeof(elf_greg_t))
+#define ELF_CR_IIP_OFFSET  (42 * sizeof(elf_greg_t))
+#define ELF_CFM_OFFSET     (43 * sizeof(elf_greg_t))
+#define ELF_CR_IPSR_OFFSET (44 * sizeof(elf_greg_t))
+#define ELF_GR_OFFSET(i)   (ELF_GR_0_OFFSET + i * sizeof(elf_greg_t))
+#define ELF_BR_OFFSET(i)   (ELF_BR_0_OFFSET + i * sizeof(elf_greg_t))
+#define ELF_AR_RSC_OFFSET  (45 * sizeof(elf_greg_t))
+#define ELF_AR_BSP_OFFSET  (46 * sizeof(elf_greg_t))
+#define ELF_AR_BSPSTORE_OFFSET (47 * sizeof(elf_greg_t))
+#define ELF_AR_RNAT_OFFSET (48 * sizeof(elf_greg_t))
+#define ELF_AR_CCV_OFFSET  (49 * sizeof(elf_greg_t))
+#define ELF_AR_UNAT_OFFSET (50 * sizeof(elf_greg_t))
+#define ELF_AR_FPSR_OFFSET (51 * sizeof(elf_greg_t))
+#define ELF_AR_PFS_OFFSET  (52 * sizeof(elf_greg_t))
+#define ELF_AR_LC_OFFSET   (53 * sizeof(elf_greg_t))
+#define ELF_AR_EC_OFFSET   (54 * sizeof(elf_greg_t))
+#define ELF_AR_CSD_OFFSET  (55 * sizeof(elf_greg_t))
+#define ELF_AR_SSD_OFFSET  (56 * sizeof(elf_greg_t))
+#define ELF_AR_END_OFFSET  (57 * sizeof(elf_greg_t))
+
+typedef unsigned long elf_fpxregset_t;
+
+typedef unsigned long elf_greg_t;
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef struct ia64_fpreg elf_fpreg_t;
+typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
+
+
+
+struct pt_regs;	/* forward declaration... */
+extern void ia64_elf_core_copy_regs (struct pt_regs *src, elf_gregset_t dst);
+#define ELF_CORE_COPY_REGS(_dest,_regs)	ia64_elf_core_copy_regs(_regs, _dest);
+
+/* This macro yields a bitmask that programs can use to figure out
+   what instruction set this CPU supports.  */
+#define ELF_HWCAP 	0
+
+/* This macro yields a string that ld.so will use to load
+   implementation specific libraries for optimization.  Not terribly
+   relevant until we have real hardware to play with... */
+#define ELF_PLATFORM	NULL
+
+#define SET_PERSONALITY(ex)	\
+	set_personality((current->personality & ~PER_MASK) | PER_LINUX)
+
+#define elf_read_implies_exec(ex, executable_stack)					\
+	((executable_stack!=EXSTACK_DISABLE_X) && ((ex).e_flags & EF_IA_64_LINUX_EXECUTABLE_STACK) != 0)
+
+struct task_struct;
+
+#define GATE_EHDR	((const struct elfhdr *) GATE_ADDR)
+
+/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
+#define ARCH_DLINFO								\
+do {										\
+	extern char __kernel_syscall_via_epc[];					\
+	NEW_AUX_ENT(AT_SYSINFO, (unsigned long) __kernel_syscall_via_epc);	\
+	NEW_AUX_ENT(AT_SYSINFO_EHDR, (unsigned long) GATE_EHDR);		\
+} while (0)
+
+/*
+ * format for entries in the Global Offset Table
+ */
+struct got_entry {
+	uint64_t val;
+};
+
+/*
+ * Layout of the Function Descriptor
+ */
+struct fdesc {
+	uint64_t ip;
+	uint64_t gp;
+};
+
+#endif /* _ASM_IA64_ELF_H */
diff --git a/arch/ia64/include/asm/emergency-restart.h b/arch/ia64/include/asm/emergency-restart.h
new file mode 100644
index 00000000..108d8c48
--- /dev/null
+++ b/arch/ia64/include/asm/emergency-restart.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_EMERGENCY_RESTART_H
+#define _ASM_EMERGENCY_RESTART_H
+
+#include <asm-generic/emergency-restart.h>
+
+#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/arch/ia64/include/asm/errno.h b/arch/ia64/include/asm/errno.h
new file mode 100644
index 00000000..4c82b503
--- /dev/null
+++ b/arch/ia64/include/asm/errno.h
@@ -0,0 +1 @@
+#include <asm-generic/errno.h>
diff --git a/arch/ia64/include/asm/esi.h b/arch/ia64/include/asm/esi.h
new file mode 100644
index 00000000..40991c6b
--- /dev/null
+++ b/arch/ia64/include/asm/esi.h
@@ -0,0 +1,29 @@
+/*
+ * ESI service calls.
+ *
+ * Copyright (c) Copyright 2005-2006 Hewlett-Packard Development Company, L.P.
+ * 	Alex Williamson <alex.williamson@hp.com>
+ */
+#ifndef esi_h
+#define esi_h
+
+#include <linux/efi.h>
+
+#define ESI_QUERY			0x00000001
+#define ESI_OPEN_HANDLE			0x02000000
+#define ESI_CLOSE_HANDLE		0x02000001
+
+enum esi_proc_type {
+	ESI_PROC_SERIALIZED,	/* calls need to be serialized */
+	ESI_PROC_MP_SAFE,	/* MP-safe, but not reentrant */
+	ESI_PROC_REENTRANT	/* MP-safe and reentrant */
+};
+
+extern struct ia64_sal_retval esi_call_phys (void *, u64 *);
+extern int ia64_esi_call(efi_guid_t, struct ia64_sal_retval *,
+			 enum esi_proc_type,
+			 u64, u64, u64, u64, u64, u64, u64, u64);
+extern int ia64_esi_call_phys(efi_guid_t, struct ia64_sal_retval *, u64, u64,
+                              u64, u64, u64, u64, u64, u64);
+
+#endif /* esi_h */
diff --git a/arch/ia64/include/asm/fb.h b/arch/ia64/include/asm/fb.h
new file mode 100644
index 00000000..89a397ce
--- /dev/null
+++ b/arch/ia64/include/asm/fb.h
@@ -0,0 +1,23 @@
+#ifndef _ASM_FB_H_
+#define _ASM_FB_H_
+
+#include <linux/fb.h>
+#include <linux/fs.h>
+#include <linux/efi.h>
+#include <asm/page.h>
+
+static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
+				unsigned long off)
+{
+	if (efi_range_is_wc(vma->vm_start, vma->vm_end - vma->vm_start))
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	else
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+}
+
+static inline int fb_is_primary_device(struct fb_info *info)
+{
+	return 0;
+}
+
+#endif /* _ASM_FB_H_ */
diff --git a/arch/ia64/include/asm/fcntl.h b/arch/ia64/include/asm/fcntl.h
new file mode 100644
index 00000000..1dd275dc
--- /dev/null
+++ b/arch/ia64/include/asm/fcntl.h
@@ -0,0 +1,13 @@
+#ifndef _ASM_IA64_FCNTL_H
+#define _ASM_IA64_FCNTL_H
+/*
+ * Modified 1998-2000
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co.
+ */
+
+#define force_o_largefile()	\
+		(personality(current->personality) != PER_LINUX32)
+
+#include <asm-generic/fcntl.h>
+
+#endif /* _ASM_IA64_FCNTL_H */
diff --git a/arch/ia64/include/asm/fpswa.h b/arch/ia64/include/asm/fpswa.h
new file mode 100644
index 00000000..62edfcea
--- /dev/null
+++ b/arch/ia64/include/asm/fpswa.h
@@ -0,0 +1,73 @@
+#ifndef _ASM_IA64_FPSWA_H
+#define _ASM_IA64_FPSWA_H
+
+/*
+ * Floating-point Software Assist
+ *
+ * Copyright (C) 1999 Intel Corporation.
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999 Goutham Rao <goutham.rao@intel.com>
+ */
+
+typedef struct {
+	/* 4 * 128 bits */
+	unsigned long fp_lp[4*2];
+} fp_state_low_preserved_t;
+
+typedef struct {
+	/* 10 * 128 bits */
+	unsigned long fp_lv[10 * 2];
+} fp_state_low_volatile_t;
+
+typedef	struct {
+	/* 16 * 128 bits */
+	unsigned long fp_hp[16 * 2];
+} fp_state_high_preserved_t;
+
+typedef struct {
+	/* 96 * 128 bits */
+	unsigned long fp_hv[96 * 2];
+} fp_state_high_volatile_t;
+
+/**
+ * floating point state to be passed to the FP emulation library by
+ * the trap/fault handler
+ */
+typedef struct {
+	unsigned long			bitmask_low64;
+	unsigned long			bitmask_high64;
+	fp_state_low_preserved_t	*fp_state_low_preserved;
+	fp_state_low_volatile_t		*fp_state_low_volatile;
+	fp_state_high_preserved_t	*fp_state_high_preserved;
+	fp_state_high_volatile_t	*fp_state_high_volatile;
+} fp_state_t;
+
+typedef struct {
+	unsigned long status;
+	unsigned long err0;
+	unsigned long err1;
+	unsigned long err2;
+} fpswa_ret_t;
+
+/**
+ * function header for the Floating Point software assist
+ * library. This function is invoked by the Floating point software
+ * assist trap/fault handler.
+ */
+typedef fpswa_ret_t (*efi_fpswa_t) (unsigned long trap_type, void *bundle, unsigned long *ipsr,
+				    unsigned long *fsr, unsigned long *isr, unsigned long *preds,
+				    unsigned long *ifs, fp_state_t *fp_state);
+
+/**
+ * This is the FPSWA library interface as defined by EFI.  We need to pass a 
+ * pointer to the interface itself on a call to the assist library
+ */
+typedef struct {
+	unsigned int	 revision;
+	unsigned int	 reserved;
+	efi_fpswa_t	 fpswa;
+} fpswa_interface_t;
+
+extern fpswa_interface_t *fpswa_interface;
+
+#endif /* _ASM_IA64_FPSWA_H */
diff --git a/arch/ia64/include/asm/fpu.h b/arch/ia64/include/asm/fpu.h
new file mode 100644
index 00000000..b6395ad1
--- /dev/null
+++ b/arch/ia64/include/asm/fpu.h
@@ -0,0 +1,66 @@
+#ifndef _ASM_IA64_FPU_H
+#define _ASM_IA64_FPU_H
+
+/*
+ * Copyright (C) 1998, 1999, 2002, 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/types.h>
+
+/* floating point status register: */
+#define FPSR_TRAP_VD	(1 << 0)	/* invalid op trap disabled */
+#define FPSR_TRAP_DD	(1 << 1)	/* denormal trap disabled */
+#define FPSR_TRAP_ZD	(1 << 2)	/* zero-divide trap disabled */
+#define FPSR_TRAP_OD	(1 << 3)	/* overflow trap disabled */
+#define FPSR_TRAP_UD	(1 << 4)	/* underflow trap disabled */
+#define FPSR_TRAP_ID	(1 << 5)	/* inexact trap disabled */
+#define FPSR_S0(x)	((x) <<  6)
+#define FPSR_S1(x)	((x) << 19)
+#define FPSR_S2(x)	(__IA64_UL(x) << 32)
+#define FPSR_S3(x)	(__IA64_UL(x) << 45)
+
+/* floating-point status field controls: */
+#define FPSF_FTZ	(1 << 0)		/* flush-to-zero */
+#define FPSF_WRE	(1 << 1)		/* widest-range exponent */
+#define FPSF_PC(x)	(((x) & 0x3) << 2)	/* precision control */
+#define FPSF_RC(x)	(((x) & 0x3) << 4)	/* rounding control */
+#define FPSF_TD		(1 << 6)		/* trap disabled */
+
+/* floating-point status field flags: */
+#define FPSF_V		(1 <<  7)		/* invalid operation flag */
+#define FPSF_D		(1 <<  8)		/* denormal/unnormal operand flag */
+#define FPSF_Z		(1 <<  9)		/* zero divide (IEEE) flag */
+#define FPSF_O		(1 << 10)		/* overflow (IEEE) flag */
+#define FPSF_U		(1 << 11)		/* underflow (IEEE) flag */
+#define FPSF_I		(1 << 12)		/* inexact (IEEE) flag) */
+
+/* floating-point rounding control: */
+#define FPRC_NEAREST	0x0
+#define FPRC_NEGINF	0x1
+#define FPRC_POSINF	0x2
+#define FPRC_TRUNC	0x3
+
+#define FPSF_DEFAULT	(FPSF_PC (0x3) | FPSF_RC (FPRC_NEAREST))
+
+/* This default value is the same as HP-UX uses.  Don't change it
+   without a very good reason.  */
+#define FPSR_DEFAULT	(FPSR_TRAP_VD | FPSR_TRAP_DD | FPSR_TRAP_ZD	\
+			 | FPSR_TRAP_OD | FPSR_TRAP_UD | FPSR_TRAP_ID	\
+			 | FPSR_S0 (FPSF_DEFAULT)			\
+			 | FPSR_S1 (FPSF_DEFAULT | FPSF_TD | FPSF_WRE)	\
+			 | FPSR_S2 (FPSF_DEFAULT | FPSF_TD)		\
+			 | FPSR_S3 (FPSF_DEFAULT | FPSF_TD))
+
+# ifndef __ASSEMBLY__
+
+struct ia64_fpreg {
+	union {
+		unsigned long bits[2];
+		long double __dummy;	/* force 16-byte alignment */
+	} u;
+};
+
+# endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_IA64_FPU_H */
diff --git a/arch/ia64/include/asm/ftrace.h b/arch/ia64/include/asm/ftrace.h
new file mode 100644
index 00000000..fbd1a247
--- /dev/null
+++ b/arch/ia64/include/asm/ftrace.h
@@ -0,0 +1,27 @@
+#ifndef _ASM_IA64_FTRACE_H
+#define _ASM_IA64_FTRACE_H
+
+#ifdef CONFIG_FUNCTION_TRACER
+#define MCOUNT_INSN_SIZE        32 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void _mcount(unsigned long pfs, unsigned long r1, unsigned long b0, unsigned long r0);
+#define mcount _mcount
+
+/* In IA64, MCOUNT_ADDR is set in link time, so it's not a constant at compile time */
+#define MCOUNT_ADDR (((struct fnptr *)mcount)->ip)
+#define FTRACE_ADDR (((struct fnptr *)ftrace_caller)->ip)
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	/* second bundle, insn 2 */
+	return addr - 0x12;
+}
+
+struct dyn_arch_ftrace {
+};
+#endif
+
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#endif /* _ASM_IA64_FTRACE_H */
diff --git a/arch/ia64/include/asm/futex.h b/arch/ia64/include/asm/futex.h
new file mode 100644
index 00000000..21ab3764
--- /dev/null
+++ b/arch/ia64/include/asm/futex.h
@@ -0,0 +1,128 @@
+#ifndef _ASM_FUTEX_H
+#define _ASM_FUTEX_H
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <asm/errno.h>
+#include <asm/system.h>
+
+#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
+do {									\
+	register unsigned long r8 __asm ("r8") = 0;			\
+	__asm__ __volatile__(						\
+		"	mf;;					\n"	\
+		"[1:] "	insn ";;				\n"	\
+		"	.xdata4 \"__ex_table\", 1b-., 2f-.	\n"	\
+		"[2:]"							\
+		: "+r" (r8), "=r" (oldval)				\
+		: "r" (uaddr), "r" (oparg)				\
+		: "memory");						\
+	ret = r8;							\
+} while (0)
+
+#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
+do {									\
+	register unsigned long r8 __asm ("r8") = 0;			\
+	int val, newval;						\
+	do {								\
+		__asm__ __volatile__(					\
+			"	mf;;				  \n"	\
+			"[1:]	ld4 %3=[%4];;			  \n"	\
+			"	mov %2=%3			  \n"	\
+				insn	";;			  \n"	\
+			"	mov ar.ccv=%2;;			  \n"	\
+			"[2:]	cmpxchg4.acq %1=[%4],%3,ar.ccv;;  \n"	\
+			"	.xdata4 \"__ex_table\", 1b-., 3f-.\n"	\
+			"	.xdata4 \"__ex_table\", 2b-., 3f-.\n"	\
+			"[3:]"						\
+			: "+r" (r8), "=r" (val), "=&r" (oldval),	\
+			   "=&r" (newval)				\
+			: "r" (uaddr), "r" (oparg)			\
+			: "memory");					\
+		if (unlikely (r8))					\
+			break;						\
+	} while (unlikely (val != oldval));				\
+	ret = r8;							\
+} while (0)
+
+static inline int
+futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
+{
+	int op = (encoded_op >> 28) & 7;
+	int cmp = (encoded_op >> 24) & 15;
+	int oparg = (encoded_op << 8) >> 20;
+	int cmparg = (encoded_op << 20) >> 20;
+	int oldval = 0, ret;
+	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+		oparg = 1 << oparg;
+
+	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
+		return -EFAULT;
+
+	pagefault_disable();
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		__futex_atomic_op1("xchg4 %1=[%2],%3", ret, oldval, uaddr,
+				   oparg);
+		break;
+	case FUTEX_OP_ADD:
+		__futex_atomic_op2("add %3=%3,%5", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op2("or %3=%3,%5", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op2("and %3=%3,%5", ret, oldval, uaddr,
+				   ~oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op2("xor %3=%3,%5", ret, oldval, uaddr, oparg);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	pagefault_enable();
+
+	if (!ret) {
+		switch (cmp) {
+		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
+		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
+		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
+		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
+		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
+		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
+		default: ret = -ENOSYS;
+		}
+	}
+	return ret;
+}
+
+static inline int
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+			      u32 oldval, u32 newval)
+{
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+		return -EFAULT;
+
+	{
+		register unsigned long r8 __asm ("r8");
+		unsigned long prev;
+		__asm__ __volatile__(
+			"	mf;;					\n"
+			"	mov %0=r0				\n"
+			"	mov ar.ccv=%4;;				\n"
+			"[1:]	cmpxchg4.acq %1=[%2],%3,ar.ccv		\n"
+			"	.xdata4 \"__ex_table\", 1b-., 2f-.	\n"
+			"[2:]"
+			: "=r" (r8), "=r" (prev)
+			: "r" (uaddr), "r" (newval),
+			  "rO" ((long) (unsigned) oldval)
+			: "memory");
+		*uval = prev;
+		return r8;
+	}
+}
+
+#endif /* _ASM_FUTEX_H */
diff --git a/arch/ia64/include/asm/gcc_intrin.h b/arch/ia64/include/asm/gcc_intrin.h
new file mode 100644
index 00000000..21ddee54
--- /dev/null
+++ b/arch/ia64/include/asm/gcc_intrin.h
@@ -0,0 +1,621 @@
+#ifndef _ASM_IA64_GCC_INTRIN_H
+#define _ASM_IA64_GCC_INTRIN_H
+/*
+ *
+ * Copyright (C) 2002,2003 Jun Nakajima <jun.nakajima@intel.com>
+ * Copyright (C) 2002,2003 Suresh Siddha <suresh.b.siddha@intel.com>
+ */
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+
+/* define this macro to get some asm stmts included in 'c' files */
+#define ASM_SUPPORTED
+
+/* Optimization barrier */
+/* The "volatile" is due to gcc bugs */
+#define ia64_barrier()	asm volatile ("":::"memory")
+
+#define ia64_stop()	asm volatile (";;"::)
+
+#define ia64_invala_gr(regnum)	asm volatile ("invala.e r%0" :: "i"(regnum))
+
+#define ia64_invala_fr(regnum)	asm volatile ("invala.e f%0" :: "i"(regnum))
+
+#define ia64_flushrs() asm volatile ("flushrs;;":::"memory")
+
+#define ia64_loadrs() asm volatile ("loadrs;;":::"memory")
+
+extern void ia64_bad_param_for_setreg (void);
+extern void ia64_bad_param_for_getreg (void);
+
+#ifdef __KERNEL__
+register unsigned long ia64_r13 asm ("r13") __used;
+#endif
+
+#define ia64_native_setreg(regnum, val)						\
+({										\
+	switch (regnum) {							\
+	    case _IA64_REG_PSR_L:						\
+		    asm volatile ("mov psr.l=%0" :: "r"(val) : "memory");	\
+		    break;							\
+	    case _IA64_REG_AR_KR0 ... _IA64_REG_AR_EC:				\
+		    asm volatile ("mov ar%0=%1" ::				\
+		    			  "i" (regnum - _IA64_REG_AR_KR0),	\
+					  "r"(val): "memory");			\
+		    break;							\
+	    case _IA64_REG_CR_DCR ... _IA64_REG_CR_LRR1:			\
+		    asm volatile ("mov cr%0=%1" ::				\
+				          "i" (regnum - _IA64_REG_CR_DCR),	\
+					  "r"(val): "memory" );			\
+		    break;							\
+	    case _IA64_REG_SP:							\
+		    asm volatile ("mov r12=%0" ::				\
+			    		  "r"(val): "memory");			\
+		    break;							\
+	    case _IA64_REG_GP:							\
+		    asm volatile ("mov gp=%0" :: "r"(val) : "memory");		\
+		break;								\
+	    default:								\
+		    ia64_bad_param_for_setreg();				\
+		    break;							\
+	}									\
+})
+
+#define ia64_native_getreg(regnum)						\
+({										\
+	__u64 ia64_intri_res;							\
+										\
+	switch (regnum) {							\
+	case _IA64_REG_GP:							\
+		asm volatile ("mov %0=gp" : "=r"(ia64_intri_res));		\
+		break;								\
+	case _IA64_REG_IP:							\
+		asm volatile ("mov %0=ip" : "=r"(ia64_intri_res));		\
+		break;								\
+	case _IA64_REG_PSR:							\
+		asm volatile ("mov %0=psr" : "=r"(ia64_intri_res));		\
+		break;								\
+	case _IA64_REG_TP:	/* for current() */				\
+		ia64_intri_res = ia64_r13;					\
+		break;								\
+	case _IA64_REG_AR_KR0 ... _IA64_REG_AR_EC:				\
+		asm volatile ("mov %0=ar%1" : "=r" (ia64_intri_res)		\
+				      : "i"(regnum - _IA64_REG_AR_KR0));	\
+		break;								\
+	case _IA64_REG_CR_DCR ... _IA64_REG_CR_LRR1:				\
+		asm volatile ("mov %0=cr%1" : "=r" (ia64_intri_res)		\
+				      : "i" (regnum - _IA64_REG_CR_DCR));	\
+		break;								\
+	case _IA64_REG_SP:							\
+		asm volatile ("mov %0=sp" : "=r" (ia64_intri_res));		\
+		break;								\
+	default:								\
+		ia64_bad_param_for_getreg();					\
+		break;								\
+	}									\
+	ia64_intri_res;								\
+})
+
+#define ia64_hint_pause 0
+
+#define ia64_hint(mode)						\
+({								\
+	switch (mode) {						\
+	case ia64_hint_pause:					\
+		asm volatile ("hint @pause" ::: "memory");	\
+		break;						\
+	}							\
+})
+
+
+/* Integer values for mux1 instruction */
+#define ia64_mux1_brcst 0
+#define ia64_mux1_mix   8
+#define ia64_mux1_shuf  9
+#define ia64_mux1_alt  10
+#define ia64_mux1_rev  11
+
+#define ia64_mux1(x, mode)							\
+({										\
+	__u64 ia64_intri_res;							\
+										\
+	switch (mode) {								\
+	case ia64_mux1_brcst:							\
+		asm ("mux1 %0=%1,@brcst" : "=r" (ia64_intri_res) : "r" (x));	\
+		break;								\
+	case ia64_mux1_mix:							\
+		asm ("mux1 %0=%1,@mix" : "=r" (ia64_intri_res) : "r" (x));	\
+		break;								\
+	case ia64_mux1_shuf:							\
+		asm ("mux1 %0=%1,@shuf" : "=r" (ia64_intri_res) : "r" (x));	\
+		break;								\
+	case ia64_mux1_alt:							\
+		asm ("mux1 %0=%1,@alt" : "=r" (ia64_intri_res) : "r" (x));	\
+		break;								\
+	case ia64_mux1_rev:							\
+		asm ("mux1 %0=%1,@rev" : "=r" (ia64_intri_res) : "r" (x));	\
+		break;								\
+	}									\
+	ia64_intri_res;								\
+})
+
+#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+# define ia64_popcnt(x)		__builtin_popcountl(x)
+#else
+# define ia64_popcnt(x)						\
+  ({								\
+	__u64 ia64_intri_res;					\
+	asm ("popcnt %0=%1" : "=r" (ia64_intri_res) : "r" (x));	\
+								\
+	ia64_intri_res;						\
+  })
+#endif
+
+#define ia64_getf_exp(x)					\
+({								\
+	long ia64_intri_res;					\
+								\
+	asm ("getf.exp %0=%1" : "=r"(ia64_intri_res) : "f"(x));	\
+								\
+	ia64_intri_res;						\
+})
+
+#define ia64_shrp(a, b, count)								\
+({											\
+	__u64 ia64_intri_res;								\
+	asm ("shrp %0=%1,%2,%3" : "=r"(ia64_intri_res) : "r"(a), "r"(b), "i"(count));	\
+	ia64_intri_res;									\
+})
+
+#define ia64_ldfs(regnum, x)					\
+({								\
+	register double __f__ asm ("f"#regnum);			\
+	asm volatile ("ldfs %0=[%1]" :"=f"(__f__): "r"(x));	\
+})
+
+#define ia64_ldfd(regnum, x)					\
+({								\
+	register double __f__ asm ("f"#regnum);			\
+	asm volatile ("ldfd %0=[%1]" :"=f"(__f__): "r"(x));	\
+})
+
+#define ia64_ldfe(regnum, x)					\
+({								\
+	register double __f__ asm ("f"#regnum);			\
+	asm volatile ("ldfe %0=[%1]" :"=f"(__f__): "r"(x));	\
+})
+
+#define ia64_ldf8(regnum, x)					\
+({								\
+	register double __f__ asm ("f"#regnum);			\
+	asm volatile ("ldf8 %0=[%1]" :"=f"(__f__): "r"(x));	\
+})
+
+#define ia64_ldf_fill(regnum, x)				\
+({								\
+	register double __f__ asm ("f"#regnum);			\
+	asm volatile ("ldf.fill %0=[%1]" :"=f"(__f__): "r"(x));	\
+})
+
+#define ia64_st4_rel_nta(m, val)					\
+({									\
+	asm volatile ("st4.rel.nta [%0] = %1\n\t" :: "r"(m), "r"(val));	\
+})
+
+#define ia64_stfs(x, regnum)						\
+({									\
+	register double __f__ asm ("f"#regnum);				\
+	asm volatile ("stfs [%0]=%1" :: "r"(x), "f"(__f__) : "memory");	\
+})
+
+#define ia64_stfd(x, regnum)						\
+({									\
+	register double __f__ asm ("f"#regnum);				\
+	asm volatile ("stfd [%0]=%1" :: "r"(x), "f"(__f__) : "memory");	\
+})
+
+#define ia64_stfe(x, regnum)						\
+({									\
+	register double __f__ asm ("f"#regnum);				\
+	asm volatile ("stfe [%0]=%1" :: "r"(x), "f"(__f__) : "memory");	\
+})
+
+#define ia64_stf8(x, regnum)						\
+({									\
+	register double __f__ asm ("f"#regnum);				\
+	asm volatile ("stf8 [%0]=%1" :: "r"(x), "f"(__f__) : "memory");	\
+})
+
+#define ia64_stf_spill(x, regnum)						\
+({										\
+	register double __f__ asm ("f"#regnum);					\
+	asm volatile ("stf.spill [%0]=%1" :: "r"(x), "f"(__f__) : "memory");	\
+})
+
+#define ia64_fetchadd4_acq(p, inc)						\
+({										\
+										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("fetchadd4.acq %0=[%1],%2"				\
+				: "=r"(ia64_intri_res) : "r"(p), "i" (inc)	\
+				: "memory");					\
+										\
+	ia64_intri_res;								\
+})
+
+#define ia64_fetchadd4_rel(p, inc)						\
+({										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("fetchadd4.rel %0=[%1],%2"				\
+				: "=r"(ia64_intri_res) : "r"(p), "i" (inc)	\
+				: "memory");					\
+										\
+	ia64_intri_res;								\
+})
+
+#define ia64_fetchadd8_acq(p, inc)						\
+({										\
+										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("fetchadd8.acq %0=[%1],%2"				\
+				: "=r"(ia64_intri_res) : "r"(p), "i" (inc)	\
+				: "memory");					\
+										\
+	ia64_intri_res;								\
+})
+
+#define ia64_fetchadd8_rel(p, inc)						\
+({										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("fetchadd8.rel %0=[%1],%2"				\
+				: "=r"(ia64_intri_res) : "r"(p), "i" (inc)	\
+				: "memory");					\
+										\
+	ia64_intri_res;								\
+})
+
+#define ia64_xchg1(ptr,x)							\
+({										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("xchg1 %0=[%1],%2"					\
+		      : "=r" (ia64_intri_res) : "r" (ptr), "r" (x) : "memory");	\
+	ia64_intri_res;								\
+})
+
+#define ia64_xchg2(ptr,x)						\
+({									\
+	__u64 ia64_intri_res;						\
+	asm volatile ("xchg2 %0=[%1],%2" : "=r" (ia64_intri_res)	\
+		      : "r" (ptr), "r" (x) : "memory");			\
+	ia64_intri_res;							\
+})
+
+#define ia64_xchg4(ptr,x)						\
+({									\
+	__u64 ia64_intri_res;						\
+	asm volatile ("xchg4 %0=[%1],%2" : "=r" (ia64_intri_res)	\
+		      : "r" (ptr), "r" (x) : "memory");			\
+	ia64_intri_res;							\
+})
+
+#define ia64_xchg8(ptr,x)						\
+({									\
+	__u64 ia64_intri_res;						\
+	asm volatile ("xchg8 %0=[%1],%2" : "=r" (ia64_intri_res)	\
+		      : "r" (ptr), "r" (x) : "memory");			\
+	ia64_intri_res;							\
+})
+
+#define ia64_cmpxchg1_acq(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+	asm volatile ("cmpxchg1.acq %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_cmpxchg1_rel(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+	asm volatile ("cmpxchg1.rel %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_cmpxchg2_acq(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+	asm volatile ("cmpxchg2.acq %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_cmpxchg2_rel(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+											\
+	asm volatile ("cmpxchg2.rel %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_cmpxchg4_acq(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+	asm volatile ("cmpxchg4.acq %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_cmpxchg4_rel(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+	asm volatile ("cmpxchg4.rel %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_cmpxchg8_acq(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+	asm volatile ("cmpxchg8.acq %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_cmpxchg8_rel(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+											\
+	asm volatile ("cmpxchg8.rel %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_mf()	asm volatile ("mf" ::: "memory")
+#define ia64_mfa()	asm volatile ("mf.a" ::: "memory")
+
+#define ia64_invala() asm volatile ("invala" ::: "memory")
+
+#define ia64_native_thash(addr)							\
+({										\
+	unsigned long ia64_intri_res;						\
+	asm volatile ("thash %0=%1" : "=r"(ia64_intri_res) : "r" (addr));	\
+	ia64_intri_res;								\
+})
+
+#define ia64_srlz_i()	asm volatile (";; srlz.i ;;" ::: "memory")
+#define ia64_srlz_d()	asm volatile (";; srlz.d" ::: "memory");
+
+#ifdef HAVE_SERIALIZE_DIRECTIVE
+# define ia64_dv_serialize_data()		asm volatile (".serialize.data");
+# define ia64_dv_serialize_instruction()	asm volatile (".serialize.instruction");
+#else
+# define ia64_dv_serialize_data()
+# define ia64_dv_serialize_instruction()
+#endif
+
+#define ia64_nop(x)	asm volatile ("nop %0"::"i"(x));
+
+#define ia64_itci(addr)	asm volatile ("itc.i %0;;" :: "r"(addr) : "memory")
+
+#define ia64_itcd(addr)	asm volatile ("itc.d %0;;" :: "r"(addr) : "memory")
+
+
+#define ia64_itri(trnum, addr) asm volatile ("itr.i itr[%0]=%1"				\
+					     :: "r"(trnum), "r"(addr) : "memory")
+
+#define ia64_itrd(trnum, addr) asm volatile ("itr.d dtr[%0]=%1"				\
+					     :: "r"(trnum), "r"(addr) : "memory")
+
+#define ia64_tpa(addr)								\
+({										\
+	unsigned long ia64_pa;							\
+	asm volatile ("tpa %0 = %1" : "=r"(ia64_pa) : "r"(addr) : "memory");	\
+	ia64_pa;								\
+})
+
+#define __ia64_set_dbr(index, val)						\
+	asm volatile ("mov dbr[%0]=%1" :: "r"(index), "r"(val) : "memory")
+
+#define ia64_set_ibr(index, val)						\
+	asm volatile ("mov ibr[%0]=%1" :: "r"(index), "r"(val) : "memory")
+
+#define ia64_set_pkr(index, val)						\
+	asm volatile ("mov pkr[%0]=%1" :: "r"(index), "r"(val) : "memory")
+
+#define ia64_set_pmc(index, val)						\
+	asm volatile ("mov pmc[%0]=%1" :: "r"(index), "r"(val) : "memory")
+
+#define ia64_set_pmd(index, val)						\
+	asm volatile ("mov pmd[%0]=%1" :: "r"(index), "r"(val) : "memory")
+
+#define ia64_native_set_rr(index, val)							\
+	asm volatile ("mov rr[%0]=%1" :: "r"(index), "r"(val) : "memory");
+
+#define ia64_native_get_cpuid(index)							\
+({											\
+	unsigned long ia64_intri_res;							\
+	asm volatile ("mov %0=cpuid[%r1]" : "=r"(ia64_intri_res) : "rO"(index));	\
+	ia64_intri_res;									\
+})
+
+#define __ia64_get_dbr(index)							\
+({										\
+	unsigned long ia64_intri_res;						\
+	asm volatile ("mov %0=dbr[%1]" : "=r"(ia64_intri_res) : "r"(index));	\
+	ia64_intri_res;								\
+})
+
+#define ia64_get_ibr(index)							\
+({										\
+	unsigned long ia64_intri_res;						\
+	asm volatile ("mov %0=ibr[%1]" : "=r"(ia64_intri_res) : "r"(index));	\
+	ia64_intri_res;								\
+})
+
+#define ia64_get_pkr(index)							\
+({										\
+	unsigned long ia64_intri_res;						\
+	asm volatile ("mov %0=pkr[%1]" : "=r"(ia64_intri_res) : "r"(index));	\
+	ia64_intri_res;								\
+})
+
+#define ia64_get_pmc(index)							\
+({										\
+	unsigned long ia64_intri_res;						\
+	asm volatile ("mov %0=pmc[%1]" : "=r"(ia64_intri_res) : "r"(index));	\
+	ia64_intri_res;								\
+})
+
+
+#define ia64_native_get_pmd(index)						\
+({										\
+	unsigned long ia64_intri_res;						\
+	asm volatile ("mov %0=pmd[%1]" : "=r"(ia64_intri_res) : "r"(index));	\
+	ia64_intri_res;								\
+})
+
+#define ia64_native_get_rr(index)						\
+({										\
+	unsigned long ia64_intri_res;						\
+	asm volatile ("mov %0=rr[%1]" : "=r"(ia64_intri_res) : "r" (index));	\
+	ia64_intri_res;								\
+})
+
+#define ia64_native_fc(addr)	asm volatile ("fc %0" :: "r"(addr) : "memory")
+
+
+#define ia64_sync_i()	asm volatile (";; sync.i" ::: "memory")
+
+#define ia64_native_ssm(mask)	asm volatile ("ssm %0":: "i"((mask)) : "memory")
+#define ia64_native_rsm(mask)	asm volatile ("rsm %0":: "i"((mask)) : "memory")
+#define ia64_sum(mask)	asm volatile ("sum %0":: "i"((mask)) : "memory")
+#define ia64_rum(mask)	asm volatile ("rum %0":: "i"((mask)) : "memory")
+
+#define ia64_ptce(addr)	asm volatile ("ptc.e %0" :: "r"(addr))
+
+#define ia64_native_ptcga(addr, size)						\
+do {										\
+	asm volatile ("ptc.ga %0,%1" :: "r"(addr), "r"(size) : "memory");	\
+	ia64_dv_serialize_data();						\
+} while (0)
+
+#define ia64_ptcl(addr, size)							\
+do {										\
+	asm volatile ("ptc.l %0,%1" :: "r"(addr), "r"(size) : "memory");	\
+	ia64_dv_serialize_data();						\
+} while (0)
+
+#define ia64_ptri(addr, size)						\
+	asm volatile ("ptr.i %0,%1" :: "r"(addr), "r"(size) : "memory")
+
+#define ia64_ptrd(addr, size)						\
+	asm volatile ("ptr.d %0,%1" :: "r"(addr), "r"(size) : "memory")
+
+#define ia64_ttag(addr)							\
+({									  \
+	__u64 ia64_intri_res;						   \
+	asm volatile ("ttag %0=%1" : "=r"(ia64_intri_res) : "r" (addr));   \
+	ia64_intri_res;							 \
+})
+
+
+/* Values for lfhint in ia64_lfetch and ia64_lfetch_fault */
+
+#define ia64_lfhint_none   0
+#define ia64_lfhint_nt1    1
+#define ia64_lfhint_nt2    2
+#define ia64_lfhint_nta    3
+
+#define ia64_lfetch(lfhint, y)					\
+({								\
+        switch (lfhint) {					\
+        case ia64_lfhint_none:					\
+                asm volatile ("lfetch [%0]" : : "r"(y));	\
+                break;						\
+        case ia64_lfhint_nt1:					\
+                asm volatile ("lfetch.nt1 [%0]" : : "r"(y));	\
+                break;						\
+        case ia64_lfhint_nt2:					\
+                asm volatile ("lfetch.nt2 [%0]" : : "r"(y));	\
+                break;						\
+        case ia64_lfhint_nta:					\
+                asm volatile ("lfetch.nta [%0]" : : "r"(y));	\
+                break;						\
+        }							\
+})
+
+#define ia64_lfetch_excl(lfhint, y)					\
+({									\
+        switch (lfhint) {						\
+        case ia64_lfhint_none:						\
+                asm volatile ("lfetch.excl [%0]" :: "r"(y));		\
+                break;							\
+        case ia64_lfhint_nt1:						\
+                asm volatile ("lfetch.excl.nt1 [%0]" :: "r"(y));	\
+                break;							\
+        case ia64_lfhint_nt2:						\
+                asm volatile ("lfetch.excl.nt2 [%0]" :: "r"(y));	\
+                break;							\
+        case ia64_lfhint_nta:						\
+                asm volatile ("lfetch.excl.nta [%0]" :: "r"(y));	\
+                break;							\
+        }								\
+})
+
+#define ia64_lfetch_fault(lfhint, y)					\
+({									\
+        switch (lfhint) {						\
+        case ia64_lfhint_none:						\
+                asm volatile ("lfetch.fault [%0]" : : "r"(y));		\
+                break;							\
+        case ia64_lfhint_nt1:						\
+                asm volatile ("lfetch.fault.nt1 [%0]" : : "r"(y));	\
+                break;							\
+        case ia64_lfhint_nt2:						\
+                asm volatile ("lfetch.fault.nt2 [%0]" : : "r"(y));	\
+                break;							\
+        case ia64_lfhint_nta:						\
+                asm volatile ("lfetch.fault.nta [%0]" : : "r"(y));	\
+                break;							\
+        }								\
+})
+
+#define ia64_lfetch_fault_excl(lfhint, y)				\
+({									\
+        switch (lfhint) {						\
+        case ia64_lfhint_none:						\
+                asm volatile ("lfetch.fault.excl [%0]" :: "r"(y));	\
+                break;							\
+        case ia64_lfhint_nt1:						\
+                asm volatile ("lfetch.fault.excl.nt1 [%0]" :: "r"(y));	\
+                break;							\
+        case ia64_lfhint_nt2:						\
+                asm volatile ("lfetch.fault.excl.nt2 [%0]" :: "r"(y));	\
+                break;							\
+        case ia64_lfhint_nta:						\
+                asm volatile ("lfetch.fault.excl.nta [%0]" :: "r"(y));	\
+                break;							\
+        }								\
+})
+
+#define ia64_native_intrin_local_irq_restore(x)			\
+do {								\
+	asm volatile (";;   cmp.ne p6,p7=%0,r0;;"		\
+		      "(p6) ssm psr.i;"				\
+		      "(p7) rsm psr.i;;"			\
+		      "(p6) srlz.d"				\
+		      :: "r"((x)) : "p6", "p7", "memory");	\
+} while (0)
+
+#endif /* _ASM_IA64_GCC_INTRIN_H */
diff --git a/arch/ia64/include/asm/hardirq.h b/arch/ia64/include/asm/hardirq.h
new file mode 100644
index 00000000..8fb7d33a
--- /dev/null
+++ b/arch/ia64/include/asm/hardirq.h
@@ -0,0 +1,26 @@
+#ifndef _ASM_IA64_HARDIRQ_H
+#define _ASM_IA64_HARDIRQ_H
+
+/*
+ * Modified 1998-2002, 2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+/*
+ * No irq_cpustat_t for IA-64.  The data is held in the per-CPU data structure.
+ */
+
+#define __ARCH_IRQ_STAT	1
+
+#define local_softirq_pending()		(local_cpu_data->softirq_pending)
+
+#include <linux/threads.h>
+#include <linux/irq.h>
+
+#include <asm/processor.h>
+
+extern void __iomem *ipi_base_addr;
+
+void ack_bad_irq(unsigned int irq);
+
+#endif /* _ASM_IA64_HARDIRQ_H */
diff --git a/arch/ia64/include/asm/hpsim.h b/arch/ia64/include/asm/hpsim.h
new file mode 100644
index 00000000..892ab198
--- /dev/null
+++ b/arch/ia64/include/asm/hpsim.h
@@ -0,0 +1,16 @@
+#ifndef _ASMIA64_HPSIM_H
+#define _ASMIA64_HPSIM_H
+
+#ifndef CONFIG_HP_SIMSERIAL_CONSOLE
+static inline int simcons_register(void) { return 1; }
+#else
+int simcons_register(void);
+#endif
+
+struct tty_driver;
+extern struct tty_driver *hp_simserial_driver;
+
+void ia64_ssc_connect_irq(long intr, long irq);
+void ia64_ctl_trace(long on);
+
+#endif
diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h
new file mode 100644
index 00000000..da55c637
--- /dev/null
+++ b/arch/ia64/include/asm/hugetlb.h
@@ -0,0 +1,80 @@
+#ifndef _ASM_IA64_HUGETLB_H
+#define _ASM_IA64_HUGETLB_H
+
+#include <asm/page.h>
+
+
+void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
+			    unsigned long end, unsigned long floor,
+			    unsigned long ceiling);
+
+int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len);
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+					 unsigned long addr,
+					 unsigned long len)
+{
+	return (REGION_NUMBER(addr) == RGN_HPAGE ||
+		REGION_NUMBER((addr)+(len)-1) == RGN_HPAGE);
+}
+
+static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
+{
+}
+
+static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep, pte_t pte)
+{
+	set_pte_at(mm, addr, ptep, pte);
+}
+
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep)
+{
+	return ptep_get_and_clear(mm, addr, ptep);
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+					 unsigned long addr, pte_t *ptep)
+{
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+	return pte_none(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+	return pte_wrprotect(pte);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	ptep_set_wrprotect(mm, addr, ptep);
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+					     unsigned long addr, pte_t *ptep,
+					     pte_t pte, int dirty)
+{
+	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+}
+
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+	return *ptep;
+}
+
+static inline int arch_prepare_hugepage(struct page *page)
+{
+	return 0;
+}
+
+static inline void arch_release_hugepage(struct page *page)
+{
+}
+
+#endif /* _ASM_IA64_HUGETLB_H */
diff --git a/arch/ia64/include/asm/hw_irq.h b/arch/ia64/include/asm/hw_irq.h
new file mode 100644
index 00000000..a681d02c
--- /dev/null
+++ b/arch/ia64/include/asm/hw_irq.h
@@ -0,0 +1,195 @@
+#ifndef _ASM_IA64_HW_IRQ_H
+#define _ASM_IA64_HW_IRQ_H
+
+/*
+ * Copyright (C) 2001-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/profile.h>
+
+#include <asm/machvec.h>
+#include <asm/ptrace.h>
+#include <asm/smp.h>
+
+#ifndef CONFIG_PARAVIRT
+typedef u8 ia64_vector;
+#else
+typedef u16 ia64_vector;
+#endif
+
+/*
+ * 0 special
+ *
+ * 1,3-14 are reserved from firmware
+ *
+ * 16-255 (vectored external interrupts) are available
+ *
+ * 15 spurious interrupt (see IVR)
+ *
+ * 16 lowest priority, 255 highest priority
+ *
+ * 15 classes of 16 interrupts each.
+ */
+#define IA64_MIN_VECTORED_IRQ		 16
+#define IA64_MAX_VECTORED_IRQ		255
+#define IA64_NUM_VECTORS		256
+
+#define AUTO_ASSIGN			-1
+
+#define IA64_SPURIOUS_INT_VECTOR	0x0f
+
+/*
+ * Vectors 0x10-0x1f are used for low priority interrupts, e.g. CMCI.
+ */
+#define IA64_CPEP_VECTOR		0x1c	/* corrected platform error polling vector */
+#define IA64_CMCP_VECTOR		0x1d	/* corrected machine-check polling vector */
+#define IA64_CPE_VECTOR			0x1e	/* corrected platform error interrupt vector */
+#define IA64_CMC_VECTOR			0x1f	/* corrected machine-check interrupt vector */
+/*
+ * Vectors 0x20-0x2f are reserved for legacy ISA IRQs.
+ * Use vectors 0x30-0xe7 as the default device vector range for ia64.
+ * Platforms may choose to reduce this range in platform_irq_setup, but the
+ * platform range must fall within
+ *	[IA64_DEF_FIRST_DEVICE_VECTOR..IA64_DEF_LAST_DEVICE_VECTOR]
+ */
+extern int ia64_first_device_vector;
+extern int ia64_last_device_vector;
+
+#if defined(CONFIG_SMP) && (defined(CONFIG_IA64_GENERIC) || defined (CONFIG_IA64_DIG))
+/* Reserve the lower priority vector than device vectors for "move IRQ" IPI */
+#define IA64_IRQ_MOVE_VECTOR		0x30	/* "move IRQ" IPI */
+#define IA64_DEF_FIRST_DEVICE_VECTOR	0x31
+#else
+#define IA64_DEF_FIRST_DEVICE_VECTOR	0x30
+#endif
+#define IA64_DEF_LAST_DEVICE_VECTOR	0xe7
+#define IA64_FIRST_DEVICE_VECTOR	ia64_first_device_vector
+#define IA64_LAST_DEVICE_VECTOR		ia64_last_device_vector
+#define IA64_MAX_DEVICE_VECTORS		(IA64_DEF_LAST_DEVICE_VECTOR - IA64_DEF_FIRST_DEVICE_VECTOR + 1)
+#define IA64_NUM_DEVICE_VECTORS		(IA64_LAST_DEVICE_VECTOR - IA64_FIRST_DEVICE_VECTOR + 1)
+
+#define IA64_MCA_RENDEZ_VECTOR		0xe8	/* MCA rendez interrupt */
+#define IA64_PERFMON_VECTOR		0xee	/* performance monitor interrupt vector */
+#define IA64_TIMER_VECTOR		0xef	/* use highest-prio group 15 interrupt for timer */
+#define	IA64_MCA_WAKEUP_VECTOR		0xf0	/* MCA wakeup (must be >MCA_RENDEZ_VECTOR) */
+#define IA64_IPI_LOCAL_TLB_FLUSH	0xfc	/* SMP flush local TLB */
+#define IA64_IPI_RESCHEDULE		0xfd	/* SMP reschedule */
+#define IA64_IPI_VECTOR			0xfe	/* inter-processor interrupt vector */
+
+/* Used for encoding redirected irqs */
+
+#define IA64_IRQ_REDIRECTED		(1 << 31)
+
+/* IA64 inter-cpu interrupt related definitions */
+
+#define IA64_IPI_DEFAULT_BASE_ADDR	0xfee00000
+
+/* Delivery modes for inter-cpu interrupts */
+enum {
+        IA64_IPI_DM_INT =       0x0,    /* pend an external interrupt */
+        IA64_IPI_DM_PMI =       0x2,    /* pend a PMI */
+        IA64_IPI_DM_NMI =       0x4,    /* pend an NMI (vector 2) */
+        IA64_IPI_DM_INIT =      0x5,    /* pend an INIT interrupt */
+        IA64_IPI_DM_EXTINT =    0x7,    /* pend an 8259-compatible interrupt. */
+};
+
+extern __u8 isa_irq_to_vector_map[16];
+#define isa_irq_to_vector(x)	isa_irq_to_vector_map[(x)]
+
+struct irq_cfg {
+	ia64_vector vector;
+	cpumask_t domain;
+	cpumask_t old_domain;
+	unsigned move_cleanup_count;
+	u8 move_in_progress : 1;
+};
+extern spinlock_t vector_lock;
+extern struct irq_cfg irq_cfg[NR_IRQS];
+#define irq_to_domain(x)	irq_cfg[(x)].domain
+DECLARE_PER_CPU(int[IA64_NUM_VECTORS], vector_irq);
+
+extern struct irq_chip irq_type_ia64_lsapic;	/* CPU-internal interrupt controller */
+
+#ifdef CONFIG_PARAVIRT_GUEST
+#include <asm/paravirt.h>
+#else
+#define ia64_register_ipi	ia64_native_register_ipi
+#define assign_irq_vector	ia64_native_assign_irq_vector
+#define free_irq_vector		ia64_native_free_irq_vector
+#define register_percpu_irq	ia64_native_register_percpu_irq
+#define ia64_resend_irq		ia64_native_resend_irq
+#endif
+
+extern void ia64_native_register_ipi(void);
+extern int bind_irq_vector(int irq, int vector, cpumask_t domain);
+extern int ia64_native_assign_irq_vector (int irq);	/* allocate a free vector */
+extern void ia64_native_free_irq_vector (int vector);
+extern int reserve_irq_vector (int vector);
+extern void __setup_vector_irq(int cpu);
+extern void ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect);
+extern void ia64_native_register_percpu_irq (ia64_vector vec, struct irqaction *action);
+extern int check_irq_used (int irq);
+extern void destroy_and_reserve_irq (unsigned int irq);
+
+#if defined(CONFIG_SMP) && (defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG))
+extern int irq_prepare_move(int irq, int cpu);
+extern void irq_complete_move(unsigned int irq);
+#else
+static inline int irq_prepare_move(int irq, int cpu) { return 0; }
+static inline void irq_complete_move(unsigned int irq) {}
+#endif
+
+static inline void ia64_native_resend_irq(unsigned int vector)
+{
+	platform_send_ipi(smp_processor_id(), vector, IA64_IPI_DM_INT, 0);
+}
+
+/*
+ * Default implementations for the irq-descriptor API:
+ */
+#ifndef CONFIG_IA64_GENERIC
+static inline ia64_vector __ia64_irq_to_vector(int irq)
+{
+	return irq_cfg[irq].vector;
+}
+
+static inline unsigned int
+__ia64_local_vector_to_irq (ia64_vector vec)
+{
+	return __get_cpu_var(vector_irq)[vec];
+}
+#endif
+
+/*
+ * Next follows the irq descriptor interface.  On IA-64, each CPU supports 256 interrupt
+ * vectors.  On smaller systems, there is a one-to-one correspondence between interrupt
+ * vectors and the Linux irq numbers.  However, larger systems may have multiple interrupt
+ * domains meaning that the translation from vector number to irq number depends on the
+ * interrupt domain that a CPU belongs to.  This API abstracts such platform-dependent
+ * differences and provides a uniform means to translate between vector and irq numbers
+ * and to obtain the irq descriptor for a given irq number.
+ */
+
+/* Extract the IA-64 vector that corresponds to IRQ.  */
+static inline ia64_vector
+irq_to_vector (int irq)
+{
+	return platform_irq_to_vector(irq);
+}
+
+/*
+ * Convert the local IA-64 vector to the corresponding irq number.  This translation is
+ * done in the context of the interrupt domain that the currently executing CPU belongs
+ * to.
+ */
+static inline unsigned int
+local_vector_to_irq (ia64_vector vec)
+{
+	return platform_local_vector_to_irq(vec);
+}
+
+#endif /* _ASM_IA64_HW_IRQ_H */
diff --git a/arch/ia64/include/asm/ia64regs.h b/arch/ia64/include/asm/ia64regs.h
new file mode 100644
index 00000000..1757f1c1
--- /dev/null
+++ b/arch/ia64/include/asm/ia64regs.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2002,2003 Intel Corp.
+ *      Jun Nakajima <jun.nakajima@intel.com>
+ *      Suresh Siddha <suresh.b.siddha@intel.com>
+ */
+
+#ifndef _ASM_IA64_IA64REGS_H
+#define _ASM_IA64_IA64REGS_H
+
+/*
+ * Register Names for getreg() and setreg().
+ *
+ * The "magic" numbers happen to match the values used by the Intel compiler's
+ * getreg()/setreg() intrinsics.
+ */
+
+/* Special Registers */
+
+#define _IA64_REG_IP		1016	/* getreg only */
+#define _IA64_REG_PSR		1019
+#define _IA64_REG_PSR_L		1019
+
+/* General Integer Registers */
+
+#define _IA64_REG_GP		1025	/* R1 */
+#define _IA64_REG_R8		1032	/* R8 */
+#define _IA64_REG_R9		1033	/* R9 */
+#define _IA64_REG_SP		1036	/* R12 */
+#define _IA64_REG_TP		1037	/* R13 */
+
+/* Application Registers */
+
+#define _IA64_REG_AR_KR0	3072
+#define _IA64_REG_AR_KR1	3073
+#define _IA64_REG_AR_KR2	3074
+#define _IA64_REG_AR_KR3	3075
+#define _IA64_REG_AR_KR4	3076
+#define _IA64_REG_AR_KR5	3077
+#define _IA64_REG_AR_KR6	3078
+#define _IA64_REG_AR_KR7	3079
+#define _IA64_REG_AR_RSC	3088
+#define _IA64_REG_AR_BSP	3089
+#define _IA64_REG_AR_BSPSTORE	3090
+#define _IA64_REG_AR_RNAT	3091
+#define _IA64_REG_AR_FCR	3093
+#define _IA64_REG_AR_EFLAG	3096
+#define _IA64_REG_AR_CSD	3097
+#define _IA64_REG_AR_SSD	3098
+#define _IA64_REG_AR_CFLAG	3099
+#define _IA64_REG_AR_FSR	3100
+#define _IA64_REG_AR_FIR	3101
+#define _IA64_REG_AR_FDR	3102
+#define _IA64_REG_AR_CCV	3104
+#define _IA64_REG_AR_UNAT	3108
+#define _IA64_REG_AR_FPSR	3112
+#define _IA64_REG_AR_ITC	3116
+#define _IA64_REG_AR_PFS	3136
+#define _IA64_REG_AR_LC		3137
+#define _IA64_REG_AR_EC		3138
+
+/* Control Registers */
+
+#define _IA64_REG_CR_DCR	4096
+#define _IA64_REG_CR_ITM	4097
+#define _IA64_REG_CR_IVA	4098
+#define _IA64_REG_CR_PTA	4104
+#define _IA64_REG_CR_IPSR	4112
+#define _IA64_REG_CR_ISR	4113
+#define _IA64_REG_CR_IIP	4115
+#define _IA64_REG_CR_IFA	4116
+#define _IA64_REG_CR_ITIR	4117
+#define _IA64_REG_CR_IIPA	4118
+#define _IA64_REG_CR_IFS	4119
+#define _IA64_REG_CR_IIM	4120
+#define _IA64_REG_CR_IHA	4121
+#define _IA64_REG_CR_LID	4160
+#define _IA64_REG_CR_IVR	4161	/* getreg only */
+#define _IA64_REG_CR_TPR	4162
+#define _IA64_REG_CR_EOI	4163
+#define _IA64_REG_CR_IRR0	4164	/* getreg only */
+#define _IA64_REG_CR_IRR1	4165	/* getreg only */
+#define _IA64_REG_CR_IRR2	4166	/* getreg only */
+#define _IA64_REG_CR_IRR3	4167	/* getreg only */
+#define _IA64_REG_CR_ITV	4168
+#define _IA64_REG_CR_PMV	4169
+#define _IA64_REG_CR_CMCV	4170
+#define _IA64_REG_CR_LRR0	4176
+#define _IA64_REG_CR_LRR1	4177
+
+/* Indirect Registers for getindreg() and setindreg() */
+
+#define _IA64_REG_INDR_CPUID	9000	/* getindreg only */
+#define _IA64_REG_INDR_DBR	9001
+#define _IA64_REG_INDR_IBR	9002
+#define _IA64_REG_INDR_PKR	9003
+#define _IA64_REG_INDR_PMC	9004
+#define _IA64_REG_INDR_PMD	9005
+#define _IA64_REG_INDR_RR	9006
+
+#endif /* _ASM_IA64_IA64REGS_H */
diff --git a/arch/ia64/include/asm/idle.h b/arch/ia64/include/asm/idle.h
new file mode 100644
index 00000000..b7685015
--- /dev/null
+++ b/arch/ia64/include/asm/idle.h
@@ -0,0 +1,7 @@
+#ifndef _ASM_IA64_IDLE_H
+#define _ASM_IA64_IDLE_H
+
+static inline void enter_idle(void) { }
+static inline void exit_idle(void) { }
+
+#endif /* _ASM_IA64_IDLE_H */
diff --git a/arch/ia64/include/asm/intel_intrin.h b/arch/ia64/include/asm/intel_intrin.h
new file mode 100644
index 00000000..53cec577
--- /dev/null
+++ b/arch/ia64/include/asm/intel_intrin.h
@@ -0,0 +1,161 @@
+#ifndef _ASM_IA64_INTEL_INTRIN_H
+#define _ASM_IA64_INTEL_INTRIN_H
+/*
+ * Intel Compiler Intrinsics
+ *
+ * Copyright (C) 2002,2003 Jun Nakajima <jun.nakajima@intel.com>
+ * Copyright (C) 2002,2003 Suresh Siddha <suresh.b.siddha@intel.com>
+ * Copyright (C) 2005,2006 Hongjiu Lu <hongjiu.lu@intel.com>
+ *
+ */
+#include <ia64intrin.h>
+
+#define ia64_barrier()		__memory_barrier()
+
+#define ia64_stop()	/* Nothing: As of now stop bit is generated for each
+		 	 * intrinsic
+		 	 */
+
+#define ia64_native_getreg	__getReg
+#define ia64_native_setreg	__setReg
+
+#define ia64_hint		__hint
+#define ia64_hint_pause		__hint_pause
+
+#define ia64_mux1_brcst		_m64_mux1_brcst
+#define ia64_mux1_mix		_m64_mux1_mix
+#define ia64_mux1_shuf		_m64_mux1_shuf
+#define ia64_mux1_alt		_m64_mux1_alt
+#define ia64_mux1_rev		_m64_mux1_rev
+
+#define ia64_mux1(x,v)		_m_to_int64(_m64_mux1(_m_from_int64(x), (v)))
+#define ia64_popcnt		_m64_popcnt
+#define ia64_getf_exp		__getf_exp
+#define ia64_shrp		_m64_shrp
+
+#define ia64_tpa		__tpa
+#define ia64_invala		__invala
+#define ia64_invala_gr		__invala_gr
+#define ia64_invala_fr		__invala_fr
+#define ia64_nop		__nop
+#define ia64_sum		__sum
+#define ia64_native_ssm		__ssm
+#define ia64_rum		__rum
+#define ia64_native_rsm		__rsm
+#define ia64_native_fc 		__fc
+
+#define ia64_ldfs		__ldfs
+#define ia64_ldfd		__ldfd
+#define ia64_ldfe		__ldfe
+#define ia64_ldf8		__ldf8
+#define ia64_ldf_fill		__ldf_fill
+
+#define ia64_stfs		__stfs
+#define ia64_stfd		__stfd
+#define ia64_stfe		__stfe
+#define ia64_stf8		__stf8
+#define ia64_stf_spill		__stf_spill
+
+#define ia64_mf			__mf
+#define ia64_mfa		__mfa
+
+#define ia64_fetchadd4_acq	__fetchadd4_acq
+#define ia64_fetchadd4_rel	__fetchadd4_rel
+#define ia64_fetchadd8_acq	__fetchadd8_acq
+#define ia64_fetchadd8_rel	__fetchadd8_rel
+
+#define ia64_xchg1		_InterlockedExchange8
+#define ia64_xchg2		_InterlockedExchange16
+#define ia64_xchg4		_InterlockedExchange
+#define ia64_xchg8		_InterlockedExchange64
+
+#define ia64_cmpxchg1_rel	_InterlockedCompareExchange8_rel
+#define ia64_cmpxchg1_acq	_InterlockedCompareExchange8_acq
+#define ia64_cmpxchg2_rel	_InterlockedCompareExchange16_rel
+#define ia64_cmpxchg2_acq	_InterlockedCompareExchange16_acq
+#define ia64_cmpxchg4_rel	_InterlockedCompareExchange_rel
+#define ia64_cmpxchg4_acq	_InterlockedCompareExchange_acq
+#define ia64_cmpxchg8_rel	_InterlockedCompareExchange64_rel
+#define ia64_cmpxchg8_acq	_InterlockedCompareExchange64_acq
+
+#define __ia64_set_dbr(index, val)	\
+		__setIndReg(_IA64_REG_INDR_DBR, index, val)
+#define ia64_set_ibr(index, val)	\
+		__setIndReg(_IA64_REG_INDR_IBR, index, val)
+#define ia64_set_pkr(index, val)	\
+		__setIndReg(_IA64_REG_INDR_PKR, index, val)
+#define ia64_set_pmc(index, val)	\
+		__setIndReg(_IA64_REG_INDR_PMC, index, val)
+#define ia64_set_pmd(index, val)	\
+		__setIndReg(_IA64_REG_INDR_PMD, index, val)
+#define ia64_native_set_rr(index, val)	\
+		__setIndReg(_IA64_REG_INDR_RR, index, val)
+
+#define ia64_native_get_cpuid(index)	\
+		__getIndReg(_IA64_REG_INDR_CPUID, index)
+#define __ia64_get_dbr(index)		__getIndReg(_IA64_REG_INDR_DBR, index)
+#define ia64_get_ibr(index)		__getIndReg(_IA64_REG_INDR_IBR, index)
+#define ia64_get_pkr(index)		__getIndReg(_IA64_REG_INDR_PKR, index)
+#define ia64_get_pmc(index)		__getIndReg(_IA64_REG_INDR_PMC, index)
+#define ia64_native_get_pmd(index)	__getIndReg(_IA64_REG_INDR_PMD, index)
+#define ia64_native_get_rr(index)	__getIndReg(_IA64_REG_INDR_RR, index)
+
+#define ia64_srlz_d		__dsrlz
+#define ia64_srlz_i		__isrlz
+
+#define ia64_dv_serialize_data()
+#define ia64_dv_serialize_instruction()
+
+#define ia64_st1_rel		__st1_rel
+#define ia64_st2_rel		__st2_rel
+#define ia64_st4_rel		__st4_rel
+#define ia64_st8_rel		__st8_rel
+
+/* FIXME: need st4.rel.nta intrinsic */
+#define ia64_st4_rel_nta	__st4_rel
+
+#define ia64_ld1_acq		__ld1_acq
+#define ia64_ld2_acq		__ld2_acq
+#define ia64_ld4_acq		__ld4_acq
+#define ia64_ld8_acq		__ld8_acq
+
+#define ia64_sync_i		__synci
+#define ia64_native_thash	__thash
+#define ia64_native_ttag	__ttag
+#define ia64_itcd		__itcd
+#define ia64_itci		__itci
+#define ia64_itrd		__itrd
+#define ia64_itri		__itri
+#define ia64_ptce		__ptce
+#define ia64_ptcl		__ptcl
+#define ia64_native_ptcg	__ptcg
+#define ia64_native_ptcga	__ptcga
+#define ia64_ptri		__ptri
+#define ia64_ptrd		__ptrd
+#define ia64_dep_mi		_m64_dep_mi
+
+/* Values for lfhint in __lfetch and __lfetch_fault */
+
+#define ia64_lfhint_none	__lfhint_none
+#define ia64_lfhint_nt1		__lfhint_nt1
+#define ia64_lfhint_nt2		__lfhint_nt2
+#define ia64_lfhint_nta		__lfhint_nta
+
+#define ia64_lfetch		__lfetch
+#define ia64_lfetch_excl	__lfetch_excl
+#define ia64_lfetch_fault	__lfetch_fault
+#define ia64_lfetch_fault_excl	__lfetch_fault_excl
+
+#define ia64_native_intrin_local_irq_restore(x)		\
+do {							\
+	if ((x) != 0) {					\
+		ia64_native_ssm(IA64_PSR_I);		\
+		ia64_srlz_d();				\
+	} else {					\
+		ia64_native_rsm(IA64_PSR_I);		\
+	}						\
+} while (0)
+
+#define __builtin_trap()	__break(0);
+
+#endif /* _ASM_IA64_INTEL_INTRIN_H */
diff --git a/arch/ia64/include/asm/intrinsics.h b/arch/ia64/include/asm/intrinsics.h
new file mode 100644
index 00000000..111ed522
--- /dev/null
+++ b/arch/ia64/include/asm/intrinsics.h
@@ -0,0 +1,246 @@
+#ifndef _ASM_IA64_INTRINSICS_H
+#define _ASM_IA64_INTRINSICS_H
+
+/*
+ * Compiler-dependent intrinsics.
+ *
+ * Copyright (C) 2002-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+/* include compiler specific intrinsics */
+#include <asm/ia64regs.h>
+#ifdef __INTEL_COMPILER
+# include <asm/intel_intrin.h>
+#else
+# include <asm/gcc_intrin.h>
+#endif
+
+#define ia64_native_get_psr_i()	(ia64_native_getreg(_IA64_REG_PSR) & IA64_PSR_I)
+
+#define ia64_native_set_rr0_to_rr4(val0, val1, val2, val3, val4)	\
+do {									\
+	ia64_native_set_rr(0x0000000000000000UL, (val0));		\
+	ia64_native_set_rr(0x2000000000000000UL, (val1));		\
+	ia64_native_set_rr(0x4000000000000000UL, (val2));		\
+	ia64_native_set_rr(0x6000000000000000UL, (val3));		\
+	ia64_native_set_rr(0x8000000000000000UL, (val4));		\
+} while (0)
+
+/*
+ * Force an unresolved reference if someone tries to use
+ * ia64_fetch_and_add() with a bad value.
+ */
+extern unsigned long __bad_size_for_ia64_fetch_and_add (void);
+extern unsigned long __bad_increment_for_ia64_fetch_and_add (void);
+
+#define IA64_FETCHADD(tmp,v,n,sz,sem)						\
+({										\
+	switch (sz) {								\
+	      case 4:								\
+	        tmp = ia64_fetchadd4_##sem((unsigned int *) v, n);		\
+		break;								\
+										\
+	      case 8:								\
+	        tmp = ia64_fetchadd8_##sem((unsigned long *) v, n);		\
+		break;								\
+										\
+	      default:								\
+		__bad_size_for_ia64_fetch_and_add();				\
+	}									\
+})
+
+#define ia64_fetchadd(i,v,sem)								\
+({											\
+	__u64 _tmp;									\
+	volatile __typeof__(*(v)) *_v = (v);						\
+	/* Can't use a switch () here: gcc isn't always smart enough for that... */	\
+	if ((i) == -16)									\
+		IA64_FETCHADD(_tmp, _v, -16, sizeof(*(v)), sem);			\
+	else if ((i) == -8)								\
+		IA64_FETCHADD(_tmp, _v, -8, sizeof(*(v)), sem);				\
+	else if ((i) == -4)								\
+		IA64_FETCHADD(_tmp, _v, -4, sizeof(*(v)), sem);				\
+	else if ((i) == -1)								\
+		IA64_FETCHADD(_tmp, _v, -1, sizeof(*(v)), sem);				\
+	else if ((i) == 1)								\
+		IA64_FETCHADD(_tmp, _v, 1, sizeof(*(v)), sem);				\
+	else if ((i) == 4)								\
+		IA64_FETCHADD(_tmp, _v, 4, sizeof(*(v)), sem);				\
+	else if ((i) == 8)								\
+		IA64_FETCHADD(_tmp, _v, 8, sizeof(*(v)), sem);				\
+	else if ((i) == 16)								\
+		IA64_FETCHADD(_tmp, _v, 16, sizeof(*(v)), sem);				\
+	else										\
+		_tmp = __bad_increment_for_ia64_fetch_and_add();			\
+	(__typeof__(*(v))) (_tmp);	/* return old value */				\
+})
+
+#define ia64_fetch_and_add(i,v)	(ia64_fetchadd(i, v, rel) + (i)) /* return new value */
+
+/*
+ * This function doesn't exist, so you'll get a linker error if
+ * something tries to do an invalid xchg().
+ */
+extern void ia64_xchg_called_with_bad_pointer (void);
+
+#define __xchg(x,ptr,size)						\
+({									\
+	unsigned long __xchg_result;					\
+									\
+	switch (size) {							\
+	      case 1:							\
+		__xchg_result = ia64_xchg1((__u8 *)ptr, x);		\
+		break;							\
+									\
+	      case 2:							\
+		__xchg_result = ia64_xchg2((__u16 *)ptr, x);		\
+		break;							\
+									\
+	      case 4:							\
+		__xchg_result = ia64_xchg4((__u32 *)ptr, x);		\
+		break;							\
+									\
+	      case 8:							\
+		__xchg_result = ia64_xchg8((__u64 *)ptr, x);		\
+		break;							\
+	      default:							\
+		ia64_xchg_called_with_bad_pointer();			\
+	}								\
+	__xchg_result;							\
+})
+
+#define xchg(ptr,x)							     \
+  ((__typeof__(*(ptr))) __xchg ((unsigned long) (x), (ptr), sizeof(*(ptr))))
+
+/*
+ * Atomic compare and exchange.  Compare OLD with MEM, if identical,
+ * store NEW in MEM.  Return the initial value in MEM.  Success is
+ * indicated by comparing RETURN with OLD.
+ */
+
+#define __HAVE_ARCH_CMPXCHG 1
+
+/*
+ * This function doesn't exist, so you'll get a linker error
+ * if something tries to do an invalid cmpxchg().
+ */
+extern long ia64_cmpxchg_called_with_bad_pointer (void);
+
+#define ia64_cmpxchg(sem,ptr,old,new,size)						\
+({											\
+	__u64 _o_, _r_;									\
+											\
+	switch (size) {									\
+	      case 1: _o_ = (__u8 ) (long) (old); break;				\
+	      case 2: _o_ = (__u16) (long) (old); break;				\
+	      case 4: _o_ = (__u32) (long) (old); break;				\
+	      case 8: _o_ = (__u64) (long) (old); break;				\
+	      default: break;								\
+	}										\
+	switch (size) {									\
+	      case 1:									\
+	      	_r_ = ia64_cmpxchg1_##sem((__u8 *) ptr, new, _o_);			\
+		break;									\
+											\
+	      case 2:									\
+	       _r_ = ia64_cmpxchg2_##sem((__u16 *) ptr, new, _o_);			\
+		break;									\
+											\
+	      case 4:									\
+	      	_r_ = ia64_cmpxchg4_##sem((__u32 *) ptr, new, _o_);			\
+		break;									\
+											\
+	      case 8:									\
+		_r_ = ia64_cmpxchg8_##sem((__u64 *) ptr, new, _o_);			\
+		break;									\
+											\
+	      default:									\
+		_r_ = ia64_cmpxchg_called_with_bad_pointer();				\
+		break;									\
+	}										\
+	(__typeof__(old)) _r_;								\
+})
+
+#define cmpxchg_acq(ptr, o, n)	\
+	ia64_cmpxchg(acq, (ptr), (o), (n), sizeof(*(ptr)))
+#define cmpxchg_rel(ptr, o, n)	\
+	ia64_cmpxchg(rel, (ptr), (o), (n), sizeof(*(ptr)))
+
+/* for compatibility with other platforms: */
+#define cmpxchg(ptr, o, n)	cmpxchg_acq((ptr), (o), (n))
+#define cmpxchg64(ptr, o, n)	cmpxchg_acq((ptr), (o), (n))
+
+#define cmpxchg_local		cmpxchg
+#define cmpxchg64_local		cmpxchg64
+
+#ifdef CONFIG_IA64_DEBUG_CMPXCHG
+# define CMPXCHG_BUGCHECK_DECL	int _cmpxchg_bugcheck_count = 128;
+# define CMPXCHG_BUGCHECK(v)							\
+  do {										\
+	if (_cmpxchg_bugcheck_count-- <= 0) {					\
+		void *ip;							\
+		extern int printk(const char *fmt, ...);			\
+		ip = (void *) ia64_getreg(_IA64_REG_IP);			\
+		printk("CMPXCHG_BUGCHECK: stuck at %p on word %p\n", ip, (v));	\
+		break;								\
+	}									\
+  } while (0)
+#else /* !CONFIG_IA64_DEBUG_CMPXCHG */
+# define CMPXCHG_BUGCHECK_DECL
+# define CMPXCHG_BUGCHECK(v)
+#endif /* !CONFIG_IA64_DEBUG_CMPXCHG */
+
+#endif
+
+#ifdef __KERNEL__
+#include <asm/paravirt_privop.h>
+#endif
+
+#ifndef __ASSEMBLY__
+#if defined(CONFIG_PARAVIRT) && defined(__KERNEL__)
+#ifdef ASM_SUPPORTED
+# define IA64_INTRINSIC_API(name)	paravirt_ ## name
+#else
+# define IA64_INTRINSIC_API(name)	pv_cpu_ops.name
+#endif
+#define IA64_INTRINSIC_MACRO(name)	paravirt_ ## name
+#else
+#define IA64_INTRINSIC_API(name)	ia64_native_ ## name
+#define IA64_INTRINSIC_MACRO(name)	ia64_native_ ## name
+#endif
+
+/************************************************/
+/* Instructions paravirtualized for correctness */
+/************************************************/
+/* fc, thash, get_cpuid, get_pmd, get_eflags, set_eflags */
+/* Note that "ttag" and "cover" are also privilege-sensitive; "ttag"
+ * is not currently used (though it may be in a long-format VHPT system!)
+ */
+#define ia64_fc				IA64_INTRINSIC_API(fc)
+#define ia64_thash			IA64_INTRINSIC_API(thash)
+#define ia64_get_cpuid			IA64_INTRINSIC_API(get_cpuid)
+#define ia64_get_pmd			IA64_INTRINSIC_API(get_pmd)
+
+
+/************************************************/
+/* Instructions paravirtualized for performance */
+/************************************************/
+#define ia64_ssm			IA64_INTRINSIC_MACRO(ssm)
+#define ia64_rsm			IA64_INTRINSIC_MACRO(rsm)
+#define ia64_getreg			IA64_INTRINSIC_MACRO(getreg)
+#define ia64_setreg			IA64_INTRINSIC_API(setreg)
+#define ia64_set_rr			IA64_INTRINSIC_API(set_rr)
+#define ia64_get_rr			IA64_INTRINSIC_API(get_rr)
+#define ia64_ptcga			IA64_INTRINSIC_API(ptcga)
+#define ia64_get_psr_i			IA64_INTRINSIC_API(get_psr_i)
+#define ia64_intrin_local_irq_restore	\
+	IA64_INTRINSIC_API(intrin_local_irq_restore)
+#define ia64_set_rr0_to_rr4		IA64_INTRINSIC_API(set_rr0_to_rr4)
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_IA64_INTRINSICS_H */
diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h
new file mode 100644
index 00000000..e5a6c353
--- /dev/null
+++ b/arch/ia64/include/asm/io.h
@@ -0,0 +1,444 @@
+#ifndef _ASM_IA64_IO_H
+#define _ASM_IA64_IO_H
+
+/*
+ * This file contains the definitions for the emulated IO instructions
+ * inb/inw/inl/outb/outw/outl and the "string versions" of the same
+ * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing"
+ * versions of the single-IO instructions (inb_p/inw_p/..).
+ *
+ * This file is not meant to be obfuscating: it's just complicated to
+ * (a) handle it all in a way that makes gcc able to optimize it as
+ * well as possible and (b) trying to avoid writing the same thing
+ * over and over again with slight variations and possibly making a
+ * mistake somewhere.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ */
+
+#include <asm/unaligned.h>
+
+/* We don't use IO slowdowns on the ia64, but.. */
+#define __SLOW_DOWN_IO	do { } while (0)
+#define SLOW_DOWN_IO	do { } while (0)
+
+#define __IA64_UNCACHED_OFFSET	RGN_BASE(RGN_UNCACHED)
+
+/*
+ * The legacy I/O space defined by the ia64 architecture supports only 65536 ports, but
+ * large machines may have multiple other I/O spaces so we can't place any a priori limit
+ * on IO_SPACE_LIMIT.  These additional spaces are described in ACPI.
+ */
+#define IO_SPACE_LIMIT		0xffffffffffffffffUL
+
+#define MAX_IO_SPACES_BITS		8
+#define MAX_IO_SPACES			(1UL << MAX_IO_SPACES_BITS)
+#define IO_SPACE_BITS			24
+#define IO_SPACE_SIZE			(1UL << IO_SPACE_BITS)
+
+#define IO_SPACE_NR(port)		((port) >> IO_SPACE_BITS)
+#define IO_SPACE_BASE(space)		((space) << IO_SPACE_BITS)
+#define IO_SPACE_PORT(port)		((port) & (IO_SPACE_SIZE - 1))
+
+#define IO_SPACE_SPARSE_ENCODING(p)	((((p) >> 2) << 12) | ((p) & 0xfff))
+
+struct io_space {
+	unsigned long mmio_base;	/* base in MMIO space */
+	int sparse;
+};
+
+extern struct io_space io_space[];
+extern unsigned int num_io_spaces;
+
+# ifdef __KERNEL__
+
+/*
+ * All MMIO iomem cookies are in region 6; anything less is a PIO cookie:
+ *	0xCxxxxxxxxxxxxxxx	MMIO cookie (return from ioremap)
+ *	0x000000001SPPPPPP	PIO cookie (S=space number, P..P=port)
+ *
+ * ioread/writeX() uses the leading 1 in PIO cookies (PIO_OFFSET) to catch
+ * code that uses bare port numbers without the prerequisite pci_iomap().
+ */
+#define PIO_OFFSET		(1UL << (MAX_IO_SPACES_BITS + IO_SPACE_BITS))
+#define PIO_MASK		(PIO_OFFSET - 1)
+#define PIO_RESERVED		__IA64_UNCACHED_OFFSET
+#define HAVE_ARCH_PIO_SIZE
+
+#include <asm/intrinsics.h>
+#include <asm/machvec.h>
+#include <asm/page.h>
+#include <asm/system.h>
+#include <asm-generic/iomap.h>
+
+/*
+ * Change virtual addresses to physical addresses and vv.
+ */
+static inline unsigned long
+virt_to_phys (volatile void *address)
+{
+	return (unsigned long) address - PAGE_OFFSET;
+}
+
+static inline void*
+phys_to_virt (unsigned long address)
+{
+	return (void *) (address + PAGE_OFFSET);
+}
+
+#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
+extern u64 kern_mem_attribute (unsigned long phys_addr, unsigned long size);
+extern int valid_phys_addr_range (unsigned long addr, size_t count); /* efi.c */
+extern int valid_mmap_phys_addr_range (unsigned long pfn, size_t count);
+
+/*
+ * The following two macros are deprecated and scheduled for removal.
+ * Please use the PCI-DMA interface defined in <asm/pci.h> instead.
+ */
+#define bus_to_virt	phys_to_virt
+#define virt_to_bus	virt_to_phys
+#define page_to_bus	page_to_phys
+
+# endif /* KERNEL */
+
+/*
+ * Memory fence w/accept.  This should never be used in code that is
+ * not IA-64 specific.
+ */
+#define __ia64_mf_a()	ia64_mfa()
+
+/**
+ * ___ia64_mmiowb - I/O write barrier
+ *
+ * Ensure ordering of I/O space writes.  This will make sure that writes
+ * following the barrier will arrive after all previous writes.  For most
+ * ia64 platforms, this is a simple 'mf.a' instruction.
+ *
+ * See Documentation/DocBook/deviceiobook.tmpl for more information.
+ */
+static inline void ___ia64_mmiowb(void)
+{
+	ia64_mfa();
+}
+
+static inline void*
+__ia64_mk_io_addr (unsigned long port)
+{
+	struct io_space *space;
+	unsigned long offset;
+
+	space = &io_space[IO_SPACE_NR(port)];
+	port = IO_SPACE_PORT(port);
+	if (space->sparse)
+		offset = IO_SPACE_SPARSE_ENCODING(port);
+	else
+		offset = port;
+
+	return (void *) (space->mmio_base | offset);
+}
+
+#define __ia64_inb	___ia64_inb
+#define __ia64_inw	___ia64_inw
+#define __ia64_inl	___ia64_inl
+#define __ia64_outb	___ia64_outb
+#define __ia64_outw	___ia64_outw
+#define __ia64_outl	___ia64_outl
+#define __ia64_readb	___ia64_readb
+#define __ia64_readw	___ia64_readw
+#define __ia64_readl	___ia64_readl
+#define __ia64_readq	___ia64_readq
+#define __ia64_readb_relaxed	___ia64_readb
+#define __ia64_readw_relaxed	___ia64_readw
+#define __ia64_readl_relaxed	___ia64_readl
+#define __ia64_readq_relaxed	___ia64_readq
+#define __ia64_writeb	___ia64_writeb
+#define __ia64_writew	___ia64_writew
+#define __ia64_writel	___ia64_writel
+#define __ia64_writeq	___ia64_writeq
+#define __ia64_mmiowb	___ia64_mmiowb
+
+/*
+ * For the in/out routines, we need to do "mf.a" _after_ doing the I/O access to ensure
+ * that the access has completed before executing other I/O accesses.  Since we're doing
+ * the accesses through an uncachable (UC) translation, the CPU will execute them in
+ * program order.  However, we still need to tell the compiler not to shuffle them around
+ * during optimization, which is why we use "volatile" pointers.
+ */
+
+static inline unsigned int
+___ia64_inb (unsigned long port)
+{
+	volatile unsigned char *addr = __ia64_mk_io_addr(port);
+	unsigned char ret;
+
+	ret = *addr;
+	__ia64_mf_a();
+	return ret;
+}
+
+static inline unsigned int
+___ia64_inw (unsigned long port)
+{
+	volatile unsigned short *addr = __ia64_mk_io_addr(port);
+	unsigned short ret;
+
+	ret = *addr;
+	__ia64_mf_a();
+	return ret;
+}
+
+static inline unsigned int
+___ia64_inl (unsigned long port)
+{
+	volatile unsigned int *addr = __ia64_mk_io_addr(port);
+	unsigned int ret;
+
+	ret = *addr;
+	__ia64_mf_a();
+	return ret;
+}
+
+static inline void
+___ia64_outb (unsigned char val, unsigned long port)
+{
+	volatile unsigned char *addr = __ia64_mk_io_addr(port);
+
+	*addr = val;
+	__ia64_mf_a();
+}
+
+static inline void
+___ia64_outw (unsigned short val, unsigned long port)
+{
+	volatile unsigned short *addr = __ia64_mk_io_addr(port);
+
+	*addr = val;
+	__ia64_mf_a();
+}
+
+static inline void
+___ia64_outl (unsigned int val, unsigned long port)
+{
+	volatile unsigned int *addr = __ia64_mk_io_addr(port);
+
+	*addr = val;
+	__ia64_mf_a();
+}
+
+static inline void
+__insb (unsigned long port, void *dst, unsigned long count)
+{
+	unsigned char *dp = dst;
+
+	while (count--)
+		*dp++ = platform_inb(port);
+}
+
+static inline void
+__insw (unsigned long port, void *dst, unsigned long count)
+{
+	unsigned short *dp = dst;
+
+	while (count--)
+		put_unaligned(platform_inw(port), dp++);
+}
+
+static inline void
+__insl (unsigned long port, void *dst, unsigned long count)
+{
+	unsigned int *dp = dst;
+
+	while (count--)
+		put_unaligned(platform_inl(port), dp++);
+}
+
+static inline void
+__outsb (unsigned long port, const void *src, unsigned long count)
+{
+	const unsigned char *sp = src;
+
+	while (count--)
+		platform_outb(*sp++, port);
+}
+
+static inline void
+__outsw (unsigned long port, const void *src, unsigned long count)
+{
+	const unsigned short *sp = src;
+
+	while (count--)
+		platform_outw(get_unaligned(sp++), port);
+}
+
+static inline void
+__outsl (unsigned long port, const void *src, unsigned long count)
+{
+	const unsigned int *sp = src;
+
+	while (count--)
+		platform_outl(get_unaligned(sp++), port);
+}
+
+/*
+ * Unfortunately, some platforms are broken and do not follow the IA-64 architecture
+ * specification regarding legacy I/O support.  Thus, we have to make these operations
+ * platform dependent...
+ */
+#define __inb		platform_inb
+#define __inw		platform_inw
+#define __inl		platform_inl
+#define __outb		platform_outb
+#define __outw		platform_outw
+#define __outl		platform_outl
+#define __mmiowb	platform_mmiowb
+
+#define inb(p)		__inb(p)
+#define inw(p)		__inw(p)
+#define inl(p)		__inl(p)
+#define insb(p,d,c)	__insb(p,d,c)
+#define insw(p,d,c)	__insw(p,d,c)
+#define insl(p,d,c)	__insl(p,d,c)
+#define outb(v,p)	__outb(v,p)
+#define outw(v,p)	__outw(v,p)
+#define outl(v,p)	__outl(v,p)
+#define outsb(p,s,c)	__outsb(p,s,c)
+#define outsw(p,s,c)	__outsw(p,s,c)
+#define outsl(p,s,c)	__outsl(p,s,c)
+#define mmiowb()	__mmiowb()
+
+/*
+ * The address passed to these functions are ioremap()ped already.
+ *
+ * We need these to be machine vectors since some platforms don't provide
+ * DMA coherence via PIO reads (PCI drivers and the spec imply that this is
+ * a good idea).  Writes are ok though for all existing ia64 platforms (and
+ * hopefully it'll stay that way).
+ */
+static inline unsigned char
+___ia64_readb (const volatile void __iomem *addr)
+{
+	return *(volatile unsigned char __force *)addr;
+}
+
+static inline unsigned short
+___ia64_readw (const volatile void __iomem *addr)
+{
+	return *(volatile unsigned short __force *)addr;
+}
+
+static inline unsigned int
+___ia64_readl (const volatile void __iomem *addr)
+{
+	return *(volatile unsigned int __force *) addr;
+}
+
+static inline unsigned long
+___ia64_readq (const volatile void __iomem *addr)
+{
+	return *(volatile unsigned long __force *) addr;
+}
+
+static inline void
+__writeb (unsigned char val, volatile void __iomem *addr)
+{
+	*(volatile unsigned char __force *) addr = val;
+}
+
+static inline void
+__writew (unsigned short val, volatile void __iomem *addr)
+{
+	*(volatile unsigned short __force *) addr = val;
+}
+
+static inline void
+__writel (unsigned int val, volatile void __iomem *addr)
+{
+	*(volatile unsigned int __force *) addr = val;
+}
+
+static inline void
+__writeq (unsigned long val, volatile void __iomem *addr)
+{
+	*(volatile unsigned long __force *) addr = val;
+}
+
+#define __readb		platform_readb
+#define __readw		platform_readw
+#define __readl		platform_readl
+#define __readq		platform_readq
+#define __readb_relaxed	platform_readb_relaxed
+#define __readw_relaxed	platform_readw_relaxed
+#define __readl_relaxed	platform_readl_relaxed
+#define __readq_relaxed	platform_readq_relaxed
+
+#define readb(a)	__readb((a))
+#define readw(a)	__readw((a))
+#define readl(a)	__readl((a))
+#define readq(a)	__readq((a))
+#define readb_relaxed(a)	__readb_relaxed((a))
+#define readw_relaxed(a)	__readw_relaxed((a))
+#define readl_relaxed(a)	__readl_relaxed((a))
+#define readq_relaxed(a)	__readq_relaxed((a))
+#define __raw_readb	readb
+#define __raw_readw	readw
+#define __raw_readl	readl
+#define __raw_readq	readq
+#define __raw_readb_relaxed	readb_relaxed
+#define __raw_readw_relaxed	readw_relaxed
+#define __raw_readl_relaxed	readl_relaxed
+#define __raw_readq_relaxed	readq_relaxed
+#define writeb(v,a)	__writeb((v), (a))
+#define writew(v,a)	__writew((v), (a))
+#define writel(v,a)	__writel((v), (a))
+#define writeq(v,a)	__writeq((v), (a))
+#define __raw_writeb	writeb
+#define __raw_writew	writew
+#define __raw_writel	writel
+#define __raw_writeq	writeq
+
+#ifndef inb_p
+# define inb_p		inb
+#endif
+#ifndef inw_p
+# define inw_p		inw
+#endif
+#ifndef inl_p
+# define inl_p		inl
+#endif
+
+#ifndef outb_p
+# define outb_p		outb
+#endif
+#ifndef outw_p
+# define outw_p		outw
+#endif
+#ifndef outl_p
+# define outl_p		outl
+#endif
+
+# ifdef __KERNEL__
+
+extern void __iomem * ioremap(unsigned long offset, unsigned long size);
+extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size);
+extern void iounmap (volatile void __iomem *addr);
+extern void __iomem * early_ioremap (unsigned long phys_addr, unsigned long size);
+extern void early_iounmap (volatile void __iomem *addr, unsigned long size);
+static inline void __iomem * ioremap_cache (unsigned long phys_addr, unsigned long size)
+{
+	return ioremap(phys_addr, size);
+}
+
+
+/*
+ * String version of IO memory access ops:
+ */
+extern void memcpy_fromio(void *dst, const volatile void __iomem *src, long n);
+extern void memcpy_toio(volatile void __iomem *dst, const void *src, long n);
+extern void memset_io(volatile void __iomem *s, int c, long n);
+
+# endif /* __KERNEL__ */
+
+#endif /* _ASM_IA64_IO_H */
diff --git a/arch/ia64/include/asm/ioctl.h b/arch/ia64/include/asm/ioctl.h
new file mode 100644
index 00000000..b279fe06
--- /dev/null
+++ b/arch/ia64/include/asm/ioctl.h
@@ -0,0 +1 @@
+#include <asm-generic/ioctl.h>
diff --git a/arch/ia64/include/asm/ioctls.h b/arch/ia64/include/asm/ioctls.h
new file mode 100644
index 00000000..f3aab551
--- /dev/null
+++ b/arch/ia64/include/asm/ioctls.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_IA64_IOCTLS_H
+#define _ASM_IA64_IOCTLS_H
+
+#include <asm-generic/ioctls.h>
+
+#endif /* _ASM_IA64_IOCTLS_H */
diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h
new file mode 100644
index 00000000..745e095f
--- /dev/null
+++ b/arch/ia64/include/asm/iommu.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_IA64_IOMMU_H
+#define _ASM_IA64_IOMMU_H 1
+
+#define cpu_has_x2apic 0
+/* 10 seconds */
+#define DMAR_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10)
+
+extern void pci_iommu_shutdown(void);
+extern void no_iommu_init(void);
+extern int force_iommu, no_iommu;
+extern int iommu_detected;
+#ifdef	CONFIG_DMAR
+extern int iommu_pass_through;
+#else
+#define iommu_pass_through	(0)
+#endif
+extern void iommu_dma_init(void);
+extern void machvec_init(const char *name);
+
+#endif
diff --git a/arch/ia64/include/asm/iommu_table.h b/arch/ia64/include/asm/iommu_table.h
new file mode 100644
index 00000000..92c8d36a
--- /dev/null
+++ b/arch/ia64/include/asm/iommu_table.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_IA64_IOMMU_TABLE_H
+#define _ASM_IA64_IOMMU_TABLE_H
+
+#define IOMMU_INIT_POST(_detect)
+
+#endif /* _ASM_IA64_IOMMU_TABLE_H */
diff --git a/arch/ia64/include/asm/iosapic.h b/arch/ia64/include/asm/iosapic.h
new file mode 100644
index 00000000..b9c102e1
--- /dev/null
+++ b/arch/ia64/include/asm/iosapic.h
@@ -0,0 +1,126 @@
+#ifndef __ASM_IA64_IOSAPIC_H
+#define __ASM_IA64_IOSAPIC_H
+
+#define	IOSAPIC_REG_SELECT	0x0
+#define	IOSAPIC_WINDOW		0x10
+#define	IOSAPIC_EOI		0x40
+
+#define	IOSAPIC_VERSION		0x1
+
+/*
+ * Redirection table entry
+ */
+#define	IOSAPIC_RTE_LOW(i)	(0x10+i*2)
+#define	IOSAPIC_RTE_HIGH(i)	(0x11+i*2)
+
+#define	IOSAPIC_DEST_SHIFT		16
+
+/*
+ * Delivery mode
+ */
+#define	IOSAPIC_DELIVERY_SHIFT		8
+#define	IOSAPIC_FIXED			0x0
+#define	IOSAPIC_LOWEST_PRIORITY	0x1
+#define	IOSAPIC_PMI			0x2
+#define	IOSAPIC_NMI			0x4
+#define	IOSAPIC_INIT			0x5
+#define	IOSAPIC_EXTINT			0x7
+
+/*
+ * Interrupt polarity
+ */
+#define	IOSAPIC_POLARITY_SHIFT		13
+#define	IOSAPIC_POL_HIGH		0
+#define	IOSAPIC_POL_LOW		1
+
+/*
+ * Trigger mode
+ */
+#define	IOSAPIC_TRIGGER_SHIFT		15
+#define	IOSAPIC_EDGE			0
+#define	IOSAPIC_LEVEL			1
+
+/*
+ * Mask bit
+ */
+
+#define	IOSAPIC_MASK_SHIFT		16
+#define	IOSAPIC_MASK			(1<<IOSAPIC_MASK_SHIFT)
+
+#define IOSAPIC_VECTOR_MASK		0xffffff00
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_IOSAPIC
+
+#define NR_IOSAPICS			256
+
+#ifdef CONFIG_PARAVIRT_GUEST
+#include <asm/paravirt.h>
+#else
+#define iosapic_pcat_compat_init	ia64_native_iosapic_pcat_compat_init
+#define __iosapic_read			__ia64_native_iosapic_read
+#define __iosapic_write			__ia64_native_iosapic_write
+#define iosapic_get_irq_chip		ia64_native_iosapic_get_irq_chip
+#endif
+
+extern void __init ia64_native_iosapic_pcat_compat_init(void);
+extern struct irq_chip *ia64_native_iosapic_get_irq_chip(unsigned long trigger);
+
+static inline unsigned int
+__ia64_native_iosapic_read(char __iomem *iosapic, unsigned int reg)
+{
+	writel(reg, iosapic + IOSAPIC_REG_SELECT);
+	return readl(iosapic + IOSAPIC_WINDOW);
+}
+
+static inline void
+__ia64_native_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
+{
+	writel(reg, iosapic + IOSAPIC_REG_SELECT);
+	writel(val, iosapic + IOSAPIC_WINDOW);
+}
+
+static inline void iosapic_eoi(char __iomem *iosapic, u32 vector)
+{
+	writel(vector, iosapic + IOSAPIC_EOI);
+}
+
+extern void __init iosapic_system_init (int pcat_compat);
+extern int __devinit iosapic_init (unsigned long address,
+				    unsigned int gsi_base);
+#ifdef CONFIG_HOTPLUG
+extern int iosapic_remove (unsigned int gsi_base);
+#else
+#define iosapic_remove(gsi_base)				(-EINVAL)
+#endif /* CONFIG_HOTPLUG */
+extern int gsi_to_irq (unsigned int gsi);
+extern int iosapic_register_intr (unsigned int gsi, unsigned long polarity,
+				  unsigned long trigger);
+extern void iosapic_unregister_intr (unsigned int irq);
+extern void __devinit iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
+				      unsigned long polarity,
+				      unsigned long trigger);
+extern int __init iosapic_register_platform_intr (u32 int_type,
+					   unsigned int gsi,
+					   int pmi_vector,
+					   u16 eid, u16 id,
+					   unsigned long polarity,
+					   unsigned long trigger);
+
+#ifdef CONFIG_NUMA
+extern void __devinit map_iosapic_to_node (unsigned int, int);
+#endif
+#else
+#define iosapic_system_init(pcat_compat)			do { } while (0)
+#define iosapic_init(address,gsi_base)				(-EINVAL)
+#define iosapic_remove(gsi_base)				(-ENODEV)
+#define iosapic_register_intr(gsi,polarity,trigger)		(gsi)
+#define iosapic_unregister_intr(irq)				do { } while (0)
+#define iosapic_override_isa_irq(isa_irq,gsi,polarity,trigger)	do { } while (0)
+#define iosapic_register_platform_intr(type,gsi,pmi,eid,id, \
+	polarity,trigger)					(gsi)
+#endif
+
+# endif /* !__ASSEMBLY__ */
+#endif /* __ASM_IA64_IOSAPIC_H */
diff --git a/arch/ia64/include/asm/ipcbuf.h b/arch/ia64/include/asm/ipcbuf.h
new file mode 100644
index 00000000..079899ae
--- /dev/null
+++ b/arch/ia64/include/asm/ipcbuf.h
@@ -0,0 +1,28 @@
+#ifndef _ASM_IA64_IPCBUF_H
+#define _ASM_IA64_IPCBUF_H
+
+/*
+ * The ipc64_perm structure for IA-64 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 32-bit seq
+ * - 2 miscellaneous 64-bit values
+ */
+
+struct ipc64_perm
+{
+	__kernel_key_t	key;
+	__kernel_uid_t	uid;
+	__kernel_gid_t	gid;
+	__kernel_uid_t	cuid;
+	__kernel_gid_t	cgid;
+	__kernel_mode_t	mode;
+	unsigned short	seq;
+	unsigned short	__pad1;
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+};
+
+#endif /* _ASM_IA64_IPCBUF_H */
diff --git a/arch/ia64/include/asm/irq.h b/arch/ia64/include/asm/irq.h
new file mode 100644
index 00000000..91b920fd
--- /dev/null
+++ b/arch/ia64/include/asm/irq.h
@@ -0,0 +1,34 @@
+#ifndef _ASM_IA64_IRQ_H
+#define _ASM_IA64_IRQ_H
+
+/*
+ * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 11/24/98	S.Eranian 	updated TIMER_IRQ and irq_canonicalize
+ * 01/20/99	S.Eranian	added keyboard interrupt
+ * 02/29/00     D.Mosberger	moved most things into hw_irq.h
+ */
+
+#include <linux/types.h>
+#include <linux/cpumask.h>
+#include <generated/nr-irqs.h>
+
+static __inline__ int
+irq_canonicalize (int irq)
+{
+	/*
+	 * We do the legacy thing here of pretending that irqs < 16
+	 * are 8259 irqs.  This really shouldn't be necessary at all,
+	 * but we keep it here as serial.c still uses it...
+	 */
+	return ((irq == 2) ? 9 : irq);
+}
+
+extern void set_irq_affinity_info (unsigned int irq, int dest, int redir);
+bool is_affinity_mask_valid(const struct cpumask *cpumask);
+
+#define is_affinity_mask_valid is_affinity_mask_valid
+
+#endif /* _ASM_IA64_IRQ_H */
diff --git a/arch/ia64/include/asm/irq_regs.h b/arch/ia64/include/asm/irq_regs.h
new file mode 100644
index 00000000..3dd9c0b7
--- /dev/null
+++ b/arch/ia64/include/asm/irq_regs.h
@@ -0,0 +1 @@
+#include <asm-generic/irq_regs.h>
diff --git a/arch/ia64/include/asm/irqflags.h b/arch/ia64/include/asm/irqflags.h
new file mode 100644
index 00000000..f82d6be2
--- /dev/null
+++ b/arch/ia64/include/asm/irqflags.h
@@ -0,0 +1,94 @@
+/*
+ * IRQ flags defines.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ */
+
+#ifndef _ASM_IA64_IRQFLAGS_H
+#define _ASM_IA64_IRQFLAGS_H
+
+#ifdef CONFIG_IA64_DEBUG_IRQ
+extern unsigned long last_cli_ip;
+static inline void arch_maybe_save_ip(unsigned long flags)
+{
+	if (flags & IA64_PSR_I)
+		last_cli_ip = ia64_getreg(_IA64_REG_IP);
+}
+#else
+#define arch_maybe_save_ip(flags) do {} while (0)
+#endif
+
+/*
+ * - clearing psr.i is implicitly serialized (visible by next insn)
+ * - setting psr.i requires data serialization
+ * - we need a stop-bit before reading PSR because we sometimes
+ *   write a floating-point register right before reading the PSR
+ *   and that writes to PSR.mfl
+ */
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	ia64_stop();
+#ifdef CONFIG_PARAVIRT
+	return ia64_get_psr_i();
+#else
+	return ia64_getreg(_IA64_REG_PSR);
+#endif
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags = arch_local_save_flags();
+
+	ia64_stop();
+	ia64_rsm(IA64_PSR_I);
+	arch_maybe_save_ip(flags);
+	return flags;
+}
+
+static inline void arch_local_irq_disable(void)
+{
+#ifdef CONFIG_IA64_DEBUG_IRQ
+	arch_local_irq_save();
+#else
+	ia64_stop();
+	ia64_rsm(IA64_PSR_I);
+#endif
+}
+
+static inline void arch_local_irq_enable(void)
+{
+	ia64_stop();
+	ia64_ssm(IA64_PSR_I);
+	ia64_srlz_d();
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+#ifdef CONFIG_IA64_DEBUG_IRQ
+	unsigned long old_psr = arch_local_save_flags();
+#endif
+	ia64_intrin_local_irq_restore(flags & IA64_PSR_I);
+	arch_maybe_save_ip(old_psr & ~flags);
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return (flags & IA64_PSR_I) == 0;
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+static inline void arch_safe_halt(void)
+{
+	ia64_pal_halt_light();	/* PAL_HALT_LIGHT */
+}
+
+
+#endif /* _ASM_IA64_IRQFLAGS_H */
diff --git a/arch/ia64/include/asm/kdebug.h b/arch/ia64/include/asm/kdebug.h
new file mode 100644
index 00000000..d11a6985
--- /dev/null
+++ b/arch/ia64/include/asm/kdebug.h
@@ -0,0 +1,57 @@
+#ifndef _IA64_KDEBUG_H
+#define _IA64_KDEBUG_H 1
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) Intel Corporation, 2005
+ *
+ * 2005-Apr     Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy
+ *              <anil.s.keshavamurthy@intel.com> adopted from
+ *              include/asm-x86_64/kdebug.h
+ *
+ * 2005-Oct	Keith Owens <kaos@sgi.com>.  Expand notify_die to cover more
+ *		events.
+ */
+
+enum die_val {
+	DIE_BREAK = 1,
+	DIE_FAULT,
+	DIE_OOPS,
+	DIE_MACHINE_HALT,
+	DIE_MACHINE_RESTART,
+	DIE_MCA_MONARCH_ENTER,
+	DIE_MCA_MONARCH_PROCESS,
+	DIE_MCA_MONARCH_LEAVE,
+	DIE_MCA_SLAVE_ENTER,
+	DIE_MCA_SLAVE_PROCESS,
+	DIE_MCA_SLAVE_LEAVE,
+	DIE_MCA_RENDZVOUS_ENTER,
+	DIE_MCA_RENDZVOUS_PROCESS,
+	DIE_MCA_RENDZVOUS_LEAVE,
+	DIE_MCA_NEW_TIMEOUT,
+	DIE_INIT_ENTER,
+	DIE_INIT_MONARCH_ENTER,
+	DIE_INIT_MONARCH_PROCESS,
+	DIE_INIT_MONARCH_LEAVE,
+	DIE_INIT_SLAVE_ENTER,
+	DIE_INIT_SLAVE_PROCESS,
+	DIE_INIT_SLAVE_LEAVE,
+	DIE_KDEBUG_ENTER,
+	DIE_KDEBUG_LEAVE,
+	DIE_KDUMP_ENTER,
+	DIE_KDUMP_LEAVE,
+};
+
+#endif
diff --git a/arch/ia64/include/asm/kexec.h b/arch/ia64/include/asm/kexec.h
new file mode 100644
index 00000000..e1d58f81
--- /dev/null
+++ b/arch/ia64/include/asm/kexec.h
@@ -0,0 +1,44 @@
+#ifndef _ASM_IA64_KEXEC_H
+#define _ASM_IA64_KEXEC_H
+
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+/* Maximum address we can use for the control code buffer */
+#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+
+#define KEXEC_CONTROL_PAGE_SIZE (8192 + 8192 + 4096)
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_IA_64
+
+#define kexec_flush_icache_page(page) do { \
+                unsigned long page_addr = (unsigned long)page_address(page); \
+                flush_icache_range(page_addr, page_addr + PAGE_SIZE); \
+        } while(0)
+
+extern struct kimage *ia64_kimage;
+extern const unsigned int relocate_new_kernel_size;
+extern void relocate_new_kernel(unsigned long, unsigned long,
+		struct ia64_boot_param *, unsigned long);
+static inline void
+crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs)
+{
+}
+extern struct resource efi_memmap_res;
+extern struct resource boot_param_res;
+extern void kdump_smp_send_stop(void);
+extern void kdump_smp_send_init(void);
+extern void kexec_disable_iosapic(void);
+extern void crash_save_this_cpu(void);
+struct rsvd_region;
+extern unsigned long kdump_find_rsvd_region(unsigned long size,
+		struct rsvd_region *rsvd_regions, int n);
+extern void kdump_cpu_freeze(struct unw_frame_info *info, void *arg);
+extern int kdump_status[];
+extern atomic_t kdump_cpu_freezed;
+extern atomic_t kdump_in_progress;
+
+#endif /* _ASM_IA64_KEXEC_H */
diff --git a/arch/ia64/include/asm/kmap_types.h b/arch/ia64/include/asm/kmap_types.h
new file mode 100644
index 00000000..05d5f999
--- /dev/null
+++ b/arch/ia64/include/asm/kmap_types.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_IA64_KMAP_TYPES_H
+#define _ASM_IA64_KMAP_TYPES_H
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+#define  __WITH_KM_FENCE
+#endif
+
+#include <asm-generic/kmap_types.h>
+
+#undef __WITH_KM_FENCE
+
+#endif /* _ASM_IA64_KMAP_TYPES_H */
diff --git a/arch/ia64/include/asm/kprobes.h b/arch/ia64/include/asm/kprobes.h
new file mode 100644
index 00000000..d5505d6f
--- /dev/null
+++ b/arch/ia64/include/asm/kprobes.h
@@ -0,0 +1,127 @@
+#ifndef _ASM_KPROBES_H
+#define _ASM_KPROBES_H
+/*
+ *  Kernel Probes (KProbes)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ * Copyright (C) Intel Corporation, 2005
+ *
+ * 2005-Apr     Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy
+ *              <anil.s.keshavamurthy@intel.com> adapted from i386
+ */
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+#include <asm/break.h>
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define MAX_INSN_SIZE   2	/* last half is for kprobe-booster */
+#define BREAK_INST	(long)(__IA64_BREAK_KPROBE << 6)
+#define NOP_M_INST	(long)(1<<27)
+#define BRL_INST(i1, i2) ((long)((0xcL << 37) |	/* brl */ \
+				(0x1L << 12) |	/* many */ \
+				(((i1) & 1) << 36) | ((i2) << 13))) /* imm */
+
+typedef union cmp_inst {
+	struct {
+	unsigned long long qp : 6;
+	unsigned long long p1 : 6;
+	unsigned long long c  : 1;
+	unsigned long long r2 : 7;
+	unsigned long long r3 : 7;
+	unsigned long long p2 : 6;
+	unsigned long long ta : 1;
+	unsigned long long x2 : 2;
+	unsigned long long tb : 1;
+	unsigned long long opcode : 4;
+	unsigned long long reserved : 23;
+	}f;
+	unsigned long long l;
+} cmp_inst_t;
+
+struct kprobe;
+
+typedef struct _bundle {
+	struct {
+		unsigned long long template : 5;
+		unsigned long long slot0 : 41;
+		unsigned long long slot1_p0 : 64-46;
+	} quad0;
+	struct {
+		unsigned long long slot1_p1 : 41 - (64-46);
+		unsigned long long slot2 : 41;
+	} quad1;
+} __attribute__((__aligned__(16)))  bundle_t;
+
+struct prev_kprobe {
+	struct kprobe *kp;
+	unsigned long status;
+};
+
+#define	MAX_PARAM_RSE_SIZE	(0x60+0x60/0x3f)
+/* per-cpu kprobe control block */
+#define ARCH_PREV_KPROBE_SZ 2
+struct kprobe_ctlblk {
+	unsigned long kprobe_status;
+	struct pt_regs jprobe_saved_regs;
+	unsigned long jprobes_saved_stacked_regs[MAX_PARAM_RSE_SIZE];
+	unsigned long *bsp;
+	unsigned long cfm;
+	atomic_t prev_kprobe_index;
+	struct prev_kprobe prev_kprobe[ARCH_PREV_KPROBE_SZ];
+};
+
+#define kretprobe_blacklist_size 0
+
+#define SLOT0_OPCODE_SHIFT	(37)
+#define SLOT1_p1_OPCODE_SHIFT	(37 - (64-46))
+#define SLOT2_OPCODE_SHIFT 	(37)
+
+#define INDIRECT_CALL_OPCODE		(1)
+#define IP_RELATIVE_CALL_OPCODE		(5)
+#define IP_RELATIVE_BRANCH_OPCODE	(4)
+#define IP_RELATIVE_PREDICT_OPCODE	(7)
+#define LONG_BRANCH_OPCODE		(0xC)
+#define LONG_CALL_OPCODE		(0xD)
+#define flush_insn_slot(p)		do { } while (0)
+
+typedef struct kprobe_opcode {
+	bundle_t bundle;
+} kprobe_opcode_t;
+
+/* Architecture specific copy of original instruction*/
+struct arch_specific_insn {
+	/* copy of the instruction to be emulated */
+	kprobe_opcode_t *insn;
+ #define INST_FLAG_FIX_RELATIVE_IP_ADDR		1
+ #define INST_FLAG_FIX_BRANCH_REG		2
+ #define INST_FLAG_BREAK_INST			4
+ #define INST_FLAG_BOOSTABLE			8
+ 	unsigned long inst_flag;
+ 	unsigned short target_br_reg;
+	unsigned short slot;
+};
+
+extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
+extern int kprobe_exceptions_notify(struct notifier_block *self,
+				    unsigned long val, void *data);
+
+extern void invalidate_stacked_regs(void);
+extern void flush_register_stack(void);
+extern void arch_remove_kprobe(struct kprobe *p);
+
+#endif				/* _ASM_KPROBES_H */
diff --git a/arch/ia64/include/asm/kregs.h b/arch/ia64/include/asm/kregs.h
new file mode 100644
index 00000000..39e65f66
--- /dev/null
+++ b/arch/ia64/include/asm/kregs.h
@@ -0,0 +1,165 @@
+#ifndef _ASM_IA64_KREGS_H
+#define _ASM_IA64_KREGS_H
+
+/*
+ * Copyright (C) 2001-2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+/*
+ * This file defines the kernel register usage convention used by Linux/ia64.
+ */
+
+/*
+ * Kernel registers:
+ */
+#define IA64_KR_IO_BASE		0	/* ar.k0: legacy I/O base address */
+#define IA64_KR_TSSD		1	/* ar.k1: IVE uses this as the TSSD */
+#define IA64_KR_PER_CPU_DATA	3	/* ar.k3: physical per-CPU base */
+#define IA64_KR_CURRENT_STACK	4	/* ar.k4: what's mapped in IA64_TR_CURRENT_STACK */
+#define IA64_KR_FPU_OWNER	5	/* ar.k5: fpu-owner (UP only, at the moment) */
+#define IA64_KR_CURRENT		6	/* ar.k6: "current" task pointer */
+#define IA64_KR_PT_BASE		7	/* ar.k7: page table base address (physical) */
+
+#define _IA64_KR_PASTE(x,y)	x##y
+#define _IA64_KR_PREFIX(n)	_IA64_KR_PASTE(ar.k, n)
+#define IA64_KR(n)		_IA64_KR_PREFIX(IA64_KR_##n)
+
+/*
+ * Translation registers:
+ */
+#define IA64_TR_KERNEL		0	/* itr0, dtr0: maps kernel image (code & data) */
+#define IA64_TR_PALCODE		1	/* itr1: maps PALcode as required by EFI */
+#define IA64_TR_CURRENT_STACK	1	/* dtr1: maps kernel's memory- & register-stacks */
+
+#define IA64_TR_ALLOC_BASE	2 	/* itr&dtr: Base of dynamic TR resource*/
+#define IA64_TR_ALLOC_MAX	64 	/* Max number for dynamic use*/
+
+/* Processor status register bits: */
+#define IA64_PSR_BE_BIT		1
+#define IA64_PSR_UP_BIT		2
+#define IA64_PSR_AC_BIT		3
+#define IA64_PSR_MFL_BIT	4
+#define IA64_PSR_MFH_BIT	5
+#define IA64_PSR_IC_BIT		13
+#define IA64_PSR_I_BIT		14
+#define IA64_PSR_PK_BIT		15
+#define IA64_PSR_DT_BIT		17
+#define IA64_PSR_DFL_BIT	18
+#define IA64_PSR_DFH_BIT	19
+#define IA64_PSR_SP_BIT		20
+#define IA64_PSR_PP_BIT		21
+#define IA64_PSR_DI_BIT		22
+#define IA64_PSR_SI_BIT		23
+#define IA64_PSR_DB_BIT		24
+#define IA64_PSR_LP_BIT		25
+#define IA64_PSR_TB_BIT		26
+#define IA64_PSR_RT_BIT		27
+/* The following are not affected by save_flags()/restore_flags(): */
+#define IA64_PSR_CPL0_BIT	32
+#define IA64_PSR_CPL1_BIT	33
+#define IA64_PSR_IS_BIT		34
+#define IA64_PSR_MC_BIT		35
+#define IA64_PSR_IT_BIT		36
+#define IA64_PSR_ID_BIT		37
+#define IA64_PSR_DA_BIT		38
+#define IA64_PSR_DD_BIT		39
+#define IA64_PSR_SS_BIT		40
+#define IA64_PSR_RI_BIT		41
+#define IA64_PSR_ED_BIT		43
+#define IA64_PSR_BN_BIT		44
+#define IA64_PSR_IA_BIT		45
+
+/* A mask of PSR bits that we generally don't want to inherit across a clone2() or an
+   execve().  Only list flags here that need to be cleared/set for BOTH clone2() and
+   execve().  */
+#define IA64_PSR_BITS_TO_CLEAR	(IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_DB | IA64_PSR_LP | \
+				 IA64_PSR_TB  | IA64_PSR_ID  | IA64_PSR_DA | IA64_PSR_DD | \
+				 IA64_PSR_SS  | IA64_PSR_ED  | IA64_PSR_IA)
+#define IA64_PSR_BITS_TO_SET	(IA64_PSR_DFH | IA64_PSR_SP)
+
+#define IA64_PSR_BE	(__IA64_UL(1) << IA64_PSR_BE_BIT)
+#define IA64_PSR_UP	(__IA64_UL(1) << IA64_PSR_UP_BIT)
+#define IA64_PSR_AC	(__IA64_UL(1) << IA64_PSR_AC_BIT)
+#define IA64_PSR_MFL	(__IA64_UL(1) << IA64_PSR_MFL_BIT)
+#define IA64_PSR_MFH	(__IA64_UL(1) << IA64_PSR_MFH_BIT)
+#define IA64_PSR_IC	(__IA64_UL(1) << IA64_PSR_IC_BIT)
+#define IA64_PSR_I	(__IA64_UL(1) << IA64_PSR_I_BIT)
+#define IA64_PSR_PK	(__IA64_UL(1) << IA64_PSR_PK_BIT)
+#define IA64_PSR_DT	(__IA64_UL(1) << IA64_PSR_DT_BIT)
+#define IA64_PSR_DFL	(__IA64_UL(1) << IA64_PSR_DFL_BIT)
+#define IA64_PSR_DFH	(__IA64_UL(1) << IA64_PSR_DFH_BIT)
+#define IA64_PSR_SP	(__IA64_UL(1) << IA64_PSR_SP_BIT)
+#define IA64_PSR_PP	(__IA64_UL(1) << IA64_PSR_PP_BIT)
+#define IA64_PSR_DI	(__IA64_UL(1) << IA64_PSR_DI_BIT)
+#define IA64_PSR_SI	(__IA64_UL(1) << IA64_PSR_SI_BIT)
+#define IA64_PSR_DB	(__IA64_UL(1) << IA64_PSR_DB_BIT)
+#define IA64_PSR_LP	(__IA64_UL(1) << IA64_PSR_LP_BIT)
+#define IA64_PSR_TB	(__IA64_UL(1) << IA64_PSR_TB_BIT)
+#define IA64_PSR_RT	(__IA64_UL(1) << IA64_PSR_RT_BIT)
+/* The following are not affected by save_flags()/restore_flags(): */
+#define IA64_PSR_CPL	(__IA64_UL(3) << IA64_PSR_CPL0_BIT)
+#define IA64_PSR_IS	(__IA64_UL(1) << IA64_PSR_IS_BIT)
+#define IA64_PSR_MC	(__IA64_UL(1) << IA64_PSR_MC_BIT)
+#define IA64_PSR_IT	(__IA64_UL(1) << IA64_PSR_IT_BIT)
+#define IA64_PSR_ID	(__IA64_UL(1) << IA64_PSR_ID_BIT)
+#define IA64_PSR_DA	(__IA64_UL(1) << IA64_PSR_DA_BIT)
+#define IA64_PSR_DD	(__IA64_UL(1) << IA64_PSR_DD_BIT)
+#define IA64_PSR_SS	(__IA64_UL(1) << IA64_PSR_SS_BIT)
+#define IA64_PSR_RI	(__IA64_UL(3) << IA64_PSR_RI_BIT)
+#define IA64_PSR_ED	(__IA64_UL(1) << IA64_PSR_ED_BIT)
+#define IA64_PSR_BN	(__IA64_UL(1) << IA64_PSR_BN_BIT)
+#define IA64_PSR_IA	(__IA64_UL(1) << IA64_PSR_IA_BIT)
+
+/* User mask bits: */
+#define IA64_PSR_UM	(IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL | IA64_PSR_MFH)
+
+/* Default Control Register */
+#define IA64_DCR_PP_BIT		 0	/* privileged performance monitor default */
+#define IA64_DCR_BE_BIT		 1	/* big-endian default */
+#define IA64_DCR_LC_BIT		 2	/* ia32 lock-check enable */
+#define IA64_DCR_DM_BIT		 8	/* defer TLB miss faults */
+#define IA64_DCR_DP_BIT		 9	/* defer page-not-present faults */
+#define IA64_DCR_DK_BIT		10	/* defer key miss faults */
+#define IA64_DCR_DX_BIT		11	/* defer key permission faults */
+#define IA64_DCR_DR_BIT		12	/* defer access right faults */
+#define IA64_DCR_DA_BIT		13	/* defer access bit faults */
+#define IA64_DCR_DD_BIT		14	/* defer debug faults */
+
+#define IA64_DCR_PP	(__IA64_UL(1) << IA64_DCR_PP_BIT)
+#define IA64_DCR_BE	(__IA64_UL(1) << IA64_DCR_BE_BIT)
+#define IA64_DCR_LC	(__IA64_UL(1) << IA64_DCR_LC_BIT)
+#define IA64_DCR_DM	(__IA64_UL(1) << IA64_DCR_DM_BIT)
+#define IA64_DCR_DP	(__IA64_UL(1) << IA64_DCR_DP_BIT)
+#define IA64_DCR_DK	(__IA64_UL(1) << IA64_DCR_DK_BIT)
+#define IA64_DCR_DX	(__IA64_UL(1) << IA64_DCR_DX_BIT)
+#define IA64_DCR_DR	(__IA64_UL(1) << IA64_DCR_DR_BIT)
+#define IA64_DCR_DA	(__IA64_UL(1) << IA64_DCR_DA_BIT)
+#define IA64_DCR_DD	(__IA64_UL(1) << IA64_DCR_DD_BIT)
+
+/* Interrupt Status Register */
+#define IA64_ISR_X_BIT		32	/* execute access */
+#define IA64_ISR_W_BIT		33	/* write access */
+#define IA64_ISR_R_BIT		34	/* read access */
+#define IA64_ISR_NA_BIT		35	/* non-access */
+#define IA64_ISR_SP_BIT		36	/* speculative load exception */
+#define IA64_ISR_RS_BIT		37	/* mandatory register-stack exception */
+#define IA64_ISR_IR_BIT		38	/* invalid register frame exception */
+#define IA64_ISR_CODE_MASK	0xf
+
+#define IA64_ISR_X	(__IA64_UL(1) << IA64_ISR_X_BIT)
+#define IA64_ISR_W	(__IA64_UL(1) << IA64_ISR_W_BIT)
+#define IA64_ISR_R	(__IA64_UL(1) << IA64_ISR_R_BIT)
+#define IA64_ISR_NA	(__IA64_UL(1) << IA64_ISR_NA_BIT)
+#define IA64_ISR_SP	(__IA64_UL(1) << IA64_ISR_SP_BIT)
+#define IA64_ISR_RS	(__IA64_UL(1) << IA64_ISR_RS_BIT)
+#define IA64_ISR_IR	(__IA64_UL(1) << IA64_ISR_IR_BIT)
+
+/* ISR code field for non-access instructions */
+#define IA64_ISR_CODE_TPA	0
+#define IA64_ISR_CODE_FC	1
+#define IA64_ISR_CODE_PROBE	2
+#define IA64_ISR_CODE_TAK	3
+#define IA64_ISR_CODE_LFETCH	4
+#define IA64_ISR_CODE_PROBEF	5
+
+#endif /* _ASM_IA64_kREGS_H */
diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
new file mode 100644
index 00000000..bc90c75a
--- /dev/null
+++ b/arch/ia64/include/asm/kvm.h
@@ -0,0 +1,264 @@
+#ifndef __ASM_IA64_KVM_H
+#define __ASM_IA64_KVM_H
+
+/*
+ * kvm structure definitions  for ia64
+ *
+ * Copyright (C) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/* Select x86 specific features in <linux/kvm.h> */
+#define __KVM_HAVE_IOAPIC
+#define __KVM_HAVE_DEVICE_ASSIGNMENT
+
+/* Architectural interrupt line count. */
+#define KVM_NR_INTERRUPTS 256
+
+#define KVM_IOAPIC_NUM_PINS  48
+
+struct kvm_ioapic_state {
+	__u64 base_address;
+	__u32 ioregsel;
+	__u32 id;
+	__u32 irr;
+	__u32 pad;
+	union {
+		__u64 bits;
+		struct {
+			__u8 vector;
+			__u8 delivery_mode:3;
+			__u8 dest_mode:1;
+			__u8 delivery_status:1;
+			__u8 polarity:1;
+			__u8 remote_irr:1;
+			__u8 trig_mode:1;
+			__u8 mask:1;
+			__u8 reserve:7;
+			__u8 reserved[4];
+			__u8 dest_id;
+		} fields;
+	} redirtbl[KVM_IOAPIC_NUM_PINS];
+};
+
+#define KVM_IRQCHIP_PIC_MASTER   0
+#define KVM_IRQCHIP_PIC_SLAVE    1
+#define KVM_IRQCHIP_IOAPIC       2
+#define KVM_NR_IRQCHIPS          3
+
+#define KVM_CONTEXT_SIZE	8*1024
+
+struct kvm_fpreg {
+	union {
+		unsigned long bits[2];
+		long double __dummy;	/* force 16-byte alignment */
+	} u;
+};
+
+union context {
+	/* 8K size */
+	char	dummy[KVM_CONTEXT_SIZE];
+	struct {
+		unsigned long       psr;
+		unsigned long       pr;
+		unsigned long       caller_unat;
+		unsigned long       pad;
+		unsigned long       gr[32];
+		unsigned long       ar[128];
+		unsigned long       br[8];
+		unsigned long       cr[128];
+		unsigned long       rr[8];
+		unsigned long       ibr[8];
+		unsigned long       dbr[8];
+		unsigned long       pkr[8];
+		struct kvm_fpreg   fr[128];
+	};
+};
+
+struct thash_data {
+	union {
+		struct {
+			unsigned long p    :  1; /* 0 */
+			unsigned long rv1  :  1; /* 1 */
+			unsigned long ma   :  3; /* 2-4 */
+			unsigned long a    :  1; /* 5 */
+			unsigned long d    :  1; /* 6 */
+			unsigned long pl   :  2; /* 7-8 */
+			unsigned long ar   :  3; /* 9-11 */
+			unsigned long ppn  : 38; /* 12-49 */
+			unsigned long rv2  :  2; /* 50-51 */
+			unsigned long ed   :  1; /* 52 */
+			unsigned long ig1  : 11; /* 53-63 */
+		};
+		struct {
+			unsigned long __rv1 : 53;     /* 0-52 */
+			unsigned long contiguous : 1; /*53 */
+			unsigned long tc : 1;         /* 54 TR or TC */
+			unsigned long cl : 1;
+			/* 55 I side or D side cache line */
+			unsigned long len  :  4;      /* 56-59 */
+			unsigned long io  : 1;	/* 60 entry is for io or not */
+			unsigned long nomap : 1;
+			/* 61 entry cann't be inserted into machine TLB.*/
+			unsigned long checked : 1;
+			/* 62 for VTLB/VHPT sanity check */
+			unsigned long invalid : 1;
+			/* 63 invalid entry */
+		};
+		unsigned long page_flags;
+	};                  /* same for VHPT and TLB */
+
+	union {
+		struct {
+			unsigned long rv3  :  2;
+			unsigned long ps   :  6;
+			unsigned long key  : 24;
+			unsigned long rv4  : 32;
+		};
+		unsigned long itir;
+	};
+	union {
+		struct {
+			unsigned long ig2  :  12;
+			unsigned long vpn  :  49;
+			unsigned long vrn  :   3;
+		};
+		unsigned long ifa;
+		unsigned long vadr;
+		struct {
+			unsigned long tag  :  63;
+			unsigned long ti   :  1;
+		};
+		unsigned long etag;
+	};
+	union {
+		struct thash_data *next;
+		unsigned long rid;
+		unsigned long gpaddr;
+	};
+};
+
+#define	NITRS	8
+#define NDTRS	8
+
+struct saved_vpd {
+	unsigned long  vhpi;
+	unsigned long  vgr[16];
+	unsigned long  vbgr[16];
+	unsigned long  vnat;
+	unsigned long  vbnat;
+	unsigned long  vcpuid[5];
+	unsigned long  vpsr;
+	unsigned long  vpr;
+	union {
+		unsigned long  vcr[128];
+		struct {
+			unsigned long dcr;
+			unsigned long itm;
+			unsigned long iva;
+			unsigned long rsv1[5];
+			unsigned long pta;
+			unsigned long rsv2[7];
+			unsigned long ipsr;
+			unsigned long isr;
+			unsigned long rsv3;
+			unsigned long iip;
+			unsigned long ifa;
+			unsigned long itir;
+			unsigned long iipa;
+			unsigned long ifs;
+			unsigned long iim;
+			unsigned long iha;
+			unsigned long rsv4[38];
+			unsigned long lid;
+			unsigned long ivr;
+			unsigned long tpr;
+			unsigned long eoi;
+			unsigned long irr[4];
+			unsigned long itv;
+			unsigned long pmv;
+			unsigned long cmcv;
+			unsigned long rsv5[5];
+			unsigned long lrr0;
+			unsigned long lrr1;
+			unsigned long rsv6[46];
+		};
+	};
+};
+
+struct kvm_regs {
+	struct saved_vpd vpd;
+	/*Arch-regs*/
+	int mp_state;
+	unsigned long vmm_rr;
+	/* TR and TC.  */
+	struct thash_data itrs[NITRS];
+	struct thash_data dtrs[NDTRS];
+	/* Bit is set if there is a tr/tc for the region.  */
+	unsigned char itr_regions;
+	unsigned char dtr_regions;
+	unsigned char tc_regions;
+
+	char irq_check;
+	unsigned long saved_itc;
+	unsigned long itc_check;
+	unsigned long timer_check;
+	unsigned long timer_pending;
+	unsigned long last_itc;
+
+	unsigned long vrr[8];
+	unsigned long ibr[8];
+	unsigned long dbr[8];
+	unsigned long insvc[4];		/* Interrupt in service.  */
+	unsigned long xtp;
+
+	unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */
+	unsigned long metaphysical_rr4;	/* from kvm_arch (so is pinned) */
+	unsigned long metaphysical_saved_rr0; /* from kvm_arch          */
+	unsigned long metaphysical_saved_rr4; /* from kvm_arch          */
+	unsigned long fp_psr;       /*used for lazy float register */
+	unsigned long saved_gp;
+	/*for phycial  emulation */
+
+	union context saved_guest;
+
+	unsigned long reserved[64];	/* for future use */
+};
+
+struct kvm_sregs {
+};
+
+struct kvm_fpu {
+};
+
+#define KVM_IA64_VCPU_STACK_SHIFT	16
+#define KVM_IA64_VCPU_STACK_SIZE	(1UL << KVM_IA64_VCPU_STACK_SHIFT)
+
+struct kvm_ia64_vcpu_stack {
+	unsigned char stack[KVM_IA64_VCPU_STACK_SIZE];
+};
+
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
+#endif
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
new file mode 100644
index 00000000..2689ee54
--- /dev/null
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -0,0 +1,599 @@
+/*
+ * kvm_host.h: used for kvm module, and hold ia64-specific sections.
+ *
+ * Copyright (C) 2007, Intel Corporation.
+ *
+ * Xiantao Zhang <xiantao.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#ifndef __ASM_KVM_HOST_H
+#define __ASM_KVM_HOST_H
+
+#define KVM_MEMORY_SLOTS 32
+/* memory slots that does not exposed to userspace */
+#define KVM_PRIVATE_MEM_SLOTS 4
+
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
+/* define exit reasons from vmm to kvm*/
+#define EXIT_REASON_VM_PANIC		0
+#define EXIT_REASON_MMIO_INSTRUCTION	1
+#define EXIT_REASON_PAL_CALL		2
+#define EXIT_REASON_SAL_CALL		3
+#define EXIT_REASON_SWITCH_RR6		4
+#define EXIT_REASON_VM_DESTROY		5
+#define EXIT_REASON_EXTERNAL_INTERRUPT	6
+#define EXIT_REASON_IPI			7
+#define EXIT_REASON_PTC_G		8
+#define EXIT_REASON_DEBUG		20
+
+/*Define vmm address space and vm data space.*/
+#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20)
+#define KVM_VMM_SHIFT 24
+#define KVM_VMM_BASE 0xD000000000000000
+#define VMM_SIZE (__IA64_UL_CONST(8)<<20)
+
+/*
+ * Define vm_buffer, used by PAL Services, base address.
+ * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M
+ */
+#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE)
+#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20)
+
+/*
+ * kvm guest's data area looks as follow:
+ *
+ *            +----------------------+	-------	KVM_VM_DATA_SIZE
+ *	      |	    vcpu[n]'s data   |	 |     ___________________KVM_STK_OFFSET
+ *     	      |			     |	 |    /			  |
+ *     	      |	       ..........    |	 |   /vcpu's struct&stack |
+ *     	      |	       ..........    |	 |  /---------------------|---- 0
+ *	      |	    vcpu[5]'s data   |	 | /	   vpd		  |
+ *	      |	    vcpu[4]'s data   |	 |/-----------------------|
+ *	      |	    vcpu[3]'s data   |	 /	   vtlb		  |
+ *	      |	    vcpu[2]'s data   |	/|------------------------|
+ *	      |	    vcpu[1]'s data   |/  |	   vhpt		  |
+ *	      |	    vcpu[0]'s data   |____________________________|
+ *            +----------------------+	 |
+ *	      |	   memory dirty log  |	 |
+ *            +----------------------+	 |
+ *	      |	   vm's data struct  |	 |
+ *            +----------------------+	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |	  vm's p2m table  |	 |
+ *	      |			     |	 |
+ *            |			     |	 |
+ *	      |			     |	 |  |
+ * vm's data->|			     |   |  |
+ *	      +----------------------+ ------- 0
+ * To support large memory, needs to increase the size of p2m.
+ * To support more vcpus, needs to ensure it has enough space to
+ * hold vcpus' data.
+ */
+
+#define KVM_VM_DATA_SHIFT	26
+#define KVM_VM_DATA_SIZE	(__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT)
+#define KVM_VM_DATA_BASE	(KVM_VMM_BASE + KVM_VM_DATA_SIZE)
+
+#define KVM_P2M_BASE		KVM_VM_DATA_BASE
+#define KVM_P2M_SIZE		(__IA64_UL_CONST(24) << 20)
+
+#define VHPT_SHIFT		16
+#define VHPT_SIZE		(__IA64_UL_CONST(1) << VHPT_SHIFT)
+#define VHPT_NUM_ENTRIES	(__IA64_UL_CONST(1) << (VHPT_SHIFT-5))
+
+#define VTLB_SHIFT		16
+#define VTLB_SIZE		(__IA64_UL_CONST(1) << VTLB_SHIFT)
+#define VTLB_NUM_ENTRIES	(1UL << (VHPT_SHIFT-5))
+
+#define VPD_SHIFT		16
+#define VPD_SIZE		(__IA64_UL_CONST(1) << VPD_SHIFT)
+
+#define VCPU_STRUCT_SHIFT	16
+#define VCPU_STRUCT_SIZE	(__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
+
+/*
+ * This must match KVM_IA64_VCPU_STACK_{SHIFT,SIZE} arch/ia64/include/asm/kvm.h
+ */
+#define KVM_STK_SHIFT		16
+#define KVM_STK_OFFSET		(__IA64_UL_CONST(1)<< KVM_STK_SHIFT)
+
+#define KVM_VM_STRUCT_SHIFT	19
+#define KVM_VM_STRUCT_SIZE	(__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
+
+#define KVM_MEM_DIRY_LOG_SHIFT	19
+#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT)
+
+#ifndef __ASSEMBLY__
+
+/*Define the max vcpus and memory for Guests.*/
+#define KVM_MAX_VCPUS	(KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\
+			KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data)
+#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT)
+
+#define VMM_LOG_LEN 256
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/kvm.h>
+#include <linux/kvm_para.h>
+#include <linux/kvm_types.h>
+
+#include <asm/pal.h>
+#include <asm/sal.h>
+#include <asm/page.h>
+
+struct kvm_vcpu_data {
+	char vcpu_vhpt[VHPT_SIZE];
+	char vcpu_vtlb[VTLB_SIZE];
+	char vcpu_vpd[VPD_SIZE];
+	char vcpu_struct[VCPU_STRUCT_SIZE];
+};
+
+struct kvm_vm_data {
+	char kvm_p2m[KVM_P2M_SIZE];
+	char kvm_vm_struct[KVM_VM_STRUCT_SIZE];
+	char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE];
+	struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
+};
+
+#define VCPU_BASE(n)	(KVM_VM_DATA_BASE + \
+				offsetof(struct kvm_vm_data, vcpu_data[n]))
+#define KVM_VM_BASE	(KVM_VM_DATA_BASE + \
+				offsetof(struct kvm_vm_data, kvm_vm_struct))
+#define KVM_MEM_DIRTY_LOG_BASE	KVM_VM_DATA_BASE + \
+				offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
+
+#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt))
+#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb))
+#define VPD_BASE(n)  (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd))
+#define VCPU_STRUCT_BASE(n)	(VCPU_BASE(n) + \
+				offsetof(struct kvm_vcpu_data, vcpu_struct))
+
+/*IO section definitions*/
+#define IOREQ_READ      1
+#define IOREQ_WRITE     0
+
+#define STATE_IOREQ_NONE        0
+#define STATE_IOREQ_READY       1
+#define STATE_IOREQ_INPROCESS   2
+#define STATE_IORESP_READY      3
+
+/*Guest Physical address layout.*/
+#define GPFN_MEM        (0UL << 60) /* Guest pfn is normal mem */
+#define GPFN_FRAME_BUFFER   (1UL << 60) /* VGA framebuffer */
+#define GPFN_LOW_MMIO       (2UL << 60) /* Low MMIO range */
+#define GPFN_PIB        (3UL << 60) /* PIB base */
+#define GPFN_IOSAPIC        (4UL << 60) /* IOSAPIC base */
+#define GPFN_LEGACY_IO      (5UL << 60) /* Legacy I/O base */
+#define GPFN_GFW        (6UL << 60) /* Guest Firmware */
+#define GPFN_PHYS_MMIO      (7UL << 60) /* Directed MMIO Range */
+
+#define GPFN_IO_MASK        (7UL << 60) /* Guest pfn is I/O type */
+#define GPFN_INV_MASK       (1UL << 63) /* Guest pfn is invalid */
+#define INVALID_MFN       (~0UL)
+#define MEM_G   (1UL << 30)
+#define MEM_M   (1UL << 20)
+#define MMIO_START       (3 * MEM_G)
+#define MMIO_SIZE        (512 * MEM_M)
+#define VGA_IO_START     0xA0000UL
+#define VGA_IO_SIZE      0x20000
+#define LEGACY_IO_START  (MMIO_START + MMIO_SIZE)
+#define LEGACY_IO_SIZE   (64 * MEM_M)
+#define IO_SAPIC_START   0xfec00000UL
+#define IO_SAPIC_SIZE    0x100000
+#define PIB_START 0xfee00000UL
+#define PIB_SIZE 0x200000
+#define GFW_START        (4 * MEM_G - 16 * MEM_M)
+#define GFW_SIZE         (16 * MEM_M)
+
+/*Deliver mode, defined for ioapic.c*/
+#define dest_Fixed IOSAPIC_FIXED
+#define dest_LowestPrio IOSAPIC_LOWEST_PRIORITY
+
+#define NMI_VECTOR      		2
+#define ExtINT_VECTOR       		0
+#define NULL_VECTOR     		(-1)
+#define IA64_SPURIOUS_INT_VECTOR    	0x0f
+
+#define VCPU_LID(v) (((u64)(v)->vcpu_id) << 24)
+
+/*
+ *Delivery mode
+ */
+#define SAPIC_DELIV_SHIFT      8
+#define SAPIC_FIXED            0x0
+#define SAPIC_LOWEST_PRIORITY  0x1
+#define SAPIC_PMI              0x2
+#define SAPIC_NMI              0x4
+#define SAPIC_INIT             0x5
+#define SAPIC_EXTINT           0x7
+
+/*
+ * vcpu->requests bit members for arch
+ */
+#define KVM_REQ_PTC_G		32
+#define KVM_REQ_RESUME		33
+
+#define KVM_HPAGE_GFN_SHIFT(x)	0
+#define KVM_NR_PAGE_SIZES	1
+#define KVM_PAGES_PER_HPAGE(x)	1
+
+struct kvm;
+struct kvm_vcpu;
+
+struct kvm_mmio_req {
+	uint64_t addr;          /*  physical address		*/
+	uint64_t size;          /*  size in bytes		*/
+	uint64_t data;          /*  data (or paddr of data)     */
+	uint8_t state:4;
+	uint8_t dir:1;          /*  1=read, 0=write             */
+};
+
+/*Pal data struct */
+struct kvm_pal_call{
+	/*In area*/
+	uint64_t gr28;
+	uint64_t gr29;
+	uint64_t gr30;
+	uint64_t gr31;
+	/*Out area*/
+	struct ia64_pal_retval ret;
+};
+
+/* Sal data structure */
+struct kvm_sal_call{
+	/*In area*/
+	uint64_t in0;
+	uint64_t in1;
+	uint64_t in2;
+	uint64_t in3;
+	uint64_t in4;
+	uint64_t in5;
+	uint64_t in6;
+	uint64_t in7;
+	struct sal_ret_values ret;
+};
+
+/*Guest change rr6*/
+struct kvm_switch_rr6 {
+	uint64_t old_rr;
+	uint64_t new_rr;
+};
+
+union ia64_ipi_a{
+	unsigned long val;
+	struct {
+		unsigned long rv  : 3;
+		unsigned long ir  : 1;
+		unsigned long eid : 8;
+		unsigned long id  : 8;
+		unsigned long ib_base : 44;
+	};
+};
+
+union ia64_ipi_d {
+	unsigned long val;
+	struct {
+		unsigned long vector : 8;
+		unsigned long dm  : 3;
+		unsigned long ig  : 53;
+	};
+};
+
+/*ipi check exit data*/
+struct kvm_ipi_data{
+	union ia64_ipi_a addr;
+	union ia64_ipi_d data;
+};
+
+/*global purge data*/
+struct kvm_ptc_g {
+	unsigned long vaddr;
+	unsigned long rr;
+	unsigned long ps;
+	struct kvm_vcpu *vcpu;
+};
+
+/*Exit control data */
+struct exit_ctl_data{
+	uint32_t exit_reason;
+	uint32_t vm_status;
+	union {
+		struct kvm_mmio_req	ioreq;
+		struct kvm_pal_call	pal_data;
+		struct kvm_sal_call	sal_data;
+		struct kvm_switch_rr6	rr_data;
+		struct kvm_ipi_data	ipi_data;
+		struct kvm_ptc_g	ptc_g_data;
+	} u;
+};
+
+union pte_flags {
+	unsigned long val;
+	struct {
+		unsigned long p    :  1; /*0      */
+		unsigned long      :  1; /* 1     */
+		unsigned long ma   :  3; /* 2-4   */
+		unsigned long a    :  1; /* 5     */
+		unsigned long d    :  1; /* 6     */
+		unsigned long pl   :  2; /* 7-8   */
+		unsigned long ar   :  3; /* 9-11  */
+		unsigned long ppn  : 38; /* 12-49 */
+		unsigned long      :  2; /* 50-51 */
+		unsigned long ed   :  1; /* 52    */
+	};
+};
+
+union ia64_pta {
+	unsigned long val;
+	struct {
+		unsigned long ve : 1;
+		unsigned long reserved0 : 1;
+		unsigned long size : 6;
+		unsigned long vf : 1;
+		unsigned long reserved1 : 6;
+		unsigned long base : 49;
+	};
+};
+
+struct thash_cb {
+	/* THASH base information */
+	struct thash_data	*hash; /* hash table pointer */
+	union ia64_pta		pta;
+	int           num;
+};
+
+struct kvm_vcpu_stat {
+};
+
+struct kvm_vcpu_arch {
+	int launched;
+	int last_exit;
+	int last_run_cpu;
+	int vmm_tr_slot;
+	int vm_tr_slot;
+	int sn_rtc_tr_slot;
+
+#define KVM_MP_STATE_RUNNABLE          0
+#define KVM_MP_STATE_UNINITIALIZED     1
+#define KVM_MP_STATE_INIT_RECEIVED     2
+#define KVM_MP_STATE_HALTED            3
+	int mp_state;
+
+#define MAX_PTC_G_NUM			3
+	int ptc_g_count;
+	struct kvm_ptc_g ptc_g_data[MAX_PTC_G_NUM];
+
+	/*halt timer to wake up sleepy vcpus*/
+	struct hrtimer hlt_timer;
+	long ht_active;
+
+	struct kvm_lapic *apic;    /* kernel irqchip context */
+	struct vpd *vpd;
+
+	/* Exit data for vmm_transition*/
+	struct exit_ctl_data exit_data;
+
+	cpumask_t cache_coherent_map;
+
+	unsigned long vmm_rr;
+	unsigned long host_rr6;
+	unsigned long psbits[8];
+	unsigned long cr_iipa;
+	unsigned long cr_isr;
+	unsigned long vsa_base;
+	unsigned long dirty_log_lock_pa;
+	unsigned long __gp;
+	/* TR and TC.  */
+	struct thash_data itrs[NITRS];
+	struct thash_data dtrs[NDTRS];
+	/* Bit is set if there is a tr/tc for the region.  */
+	unsigned char itr_regions;
+	unsigned char dtr_regions;
+	unsigned char tc_regions;
+	/* purge all */
+	unsigned long ptce_base;
+	unsigned long ptce_count[2];
+	unsigned long ptce_stride[2];
+	/* itc/itm */
+	unsigned long last_itc;
+	long itc_offset;
+	unsigned long itc_check;
+	unsigned long timer_check;
+	unsigned int timer_pending;
+	unsigned int timer_fired;
+
+	unsigned long vrr[8];
+	unsigned long ibr[8];
+	unsigned long dbr[8];
+	unsigned long insvc[4];		/* Interrupt in service.  */
+	unsigned long xtp;
+
+	unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */
+	unsigned long metaphysical_rr4;	/* from kvm_arch (so is pinned) */
+	unsigned long metaphysical_saved_rr0; /* from kvm_arch          */
+	unsigned long metaphysical_saved_rr4; /* from kvm_arch          */
+	unsigned long fp_psr;       /*used for lazy float register */
+	unsigned long saved_gp;
+	/*for phycial  emulation */
+	int mode_flags;
+	struct thash_cb vtlb;
+	struct thash_cb vhpt;
+	char irq_check;
+	char irq_new_pending;
+
+	unsigned long opcode;
+	unsigned long cause;
+	char log_buf[VMM_LOG_LEN];
+	union context host;
+	union context guest;
+};
+
+struct kvm_vm_stat {
+	u64 remote_tlb_flush;
+};
+
+struct kvm_sal_data {
+	unsigned long boot_ip;
+	unsigned long boot_gp;
+};
+
+struct kvm_arch {
+	spinlock_t dirty_log_lock;
+
+	unsigned long	vm_base;
+	unsigned long	metaphysical_rr0;
+	unsigned long	metaphysical_rr4;
+	unsigned long	vmm_init_rr;
+
+	int		is_sn2;
+
+	struct kvm_ioapic *vioapic;
+	struct kvm_vm_stat stat;
+	struct kvm_sal_data rdv_sal_data;
+
+	struct list_head assigned_dev_head;
+	struct iommu_domain *iommu_domain;
+	int iommu_flags;
+
+	unsigned long irq_sources_bitmap;
+	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
+};
+
+union cpuid3_t {
+	u64 value;
+	struct {
+		u64 number : 8;
+		u64 revision : 8;
+		u64 model : 8;
+		u64 family : 8;
+		u64 archrev : 8;
+		u64 rv : 24;
+	};
+};
+
+struct kvm_pt_regs {
+	/* The following registers are saved by SAVE_MIN: */
+	unsigned long b6;  /* scratch */
+	unsigned long b7;  /* scratch */
+
+	unsigned long ar_csd; /* used by cmp8xchg16 (scratch) */
+	unsigned long ar_ssd; /* reserved for future use (scratch) */
+
+	unsigned long r8;  /* scratch (return value register 0) */
+	unsigned long r9;  /* scratch (return value register 1) */
+	unsigned long r10; /* scratch (return value register 2) */
+	unsigned long r11; /* scratch (return value register 3) */
+
+	unsigned long cr_ipsr; /* interrupted task's psr */
+	unsigned long cr_iip;  /* interrupted task's instruction pointer */
+	unsigned long cr_ifs;  /* interrupted task's function state */
+
+	unsigned long ar_unat; /* interrupted task's NaT register (preserved) */
+	unsigned long ar_pfs;  /* prev function state  */
+	unsigned long ar_rsc;  /* RSE configuration */
+	/* The following two are valid only if cr_ipsr.cpl > 0: */
+	unsigned long ar_rnat;  /* RSE NaT */
+	unsigned long ar_bspstore; /* RSE bspstore */
+
+	unsigned long pr;  /* 64 predicate registers (1 bit each) */
+	unsigned long b0;  /* return pointer (bp) */
+	unsigned long loadrs;  /* size of dirty partition << 16 */
+
+	unsigned long r1;  /* the gp pointer */
+	unsigned long r12; /* interrupted task's memory stack pointer */
+	unsigned long r13; /* thread pointer */
+
+	unsigned long ar_fpsr;  /* floating point status (preserved) */
+	unsigned long r15;  /* scratch */
+
+	/* The remaining registers are NOT saved for system calls.  */
+	unsigned long r14;  /* scratch */
+	unsigned long r2;  /* scratch */
+	unsigned long r3;  /* scratch */
+	unsigned long r16;  /* scratch */
+	unsigned long r17;  /* scratch */
+	unsigned long r18;  /* scratch */
+	unsigned long r19;  /* scratch */
+	unsigned long r20;  /* scratch */
+	unsigned long r21;  /* scratch */
+	unsigned long r22;  /* scratch */
+	unsigned long r23;  /* scratch */
+	unsigned long r24;  /* scratch */
+	unsigned long r25;  /* scratch */
+	unsigned long r26;  /* scratch */
+	unsigned long r27;  /* scratch */
+	unsigned long r28;  /* scratch */
+	unsigned long r29;  /* scratch */
+	unsigned long r30;  /* scratch */
+	unsigned long r31;  /* scratch */
+	unsigned long ar_ccv;  /* compare/exchange value (scratch) */
+
+	/*
+	 * Floating point registers that the kernel considers scratch:
+	 */
+	struct ia64_fpreg f6;  /* scratch */
+	struct ia64_fpreg f7;  /* scratch */
+	struct ia64_fpreg f8;  /* scratch */
+	struct ia64_fpreg f9;  /* scratch */
+	struct ia64_fpreg f10;  /* scratch */
+	struct ia64_fpreg f11;  /* scratch */
+
+	unsigned long r4;  /* preserved */
+	unsigned long r5;  /* preserved */
+	unsigned long r6;  /* preserved */
+	unsigned long r7;  /* preserved */
+	unsigned long eml_unat;    /* used for emulating instruction */
+	unsigned long pad0;     /* alignment pad */
+};
+
+static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v)
+{
+	return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1;
+}
+
+typedef int kvm_vmm_entry(void);
+typedef void kvm_tramp_entry(union context *host, union context *guest);
+
+struct kvm_vmm_info{
+	struct module	*module;
+	kvm_vmm_entry 	*vmm_entry;
+	kvm_tramp_entry *tramp_entry;
+	unsigned long 	vmm_ivt;
+	unsigned long	patch_mov_ar;
+	unsigned long	patch_mov_ar_sn2;
+};
+
+int kvm_highest_pending_irq(struct kvm_vcpu *vcpu);
+int kvm_emulate_halt(struct kvm_vcpu *vcpu);
+int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
+void kvm_sal_emul(struct kvm_vcpu *vcpu);
+
+#define __KVM_HAVE_ARCH_VM_ALLOC 1
+struct kvm *kvm_arch_alloc_vm(void);
+void kvm_arch_free_vm(struct kvm *kvm);
+
+#endif /* __ASSEMBLY__*/
+
+#endif
diff --git a/arch/ia64/include/asm/kvm_para.h b/arch/ia64/include/asm/kvm_para.h
new file mode 100644
index 00000000..1588aee7
--- /dev/null
+++ b/arch/ia64/include/asm/kvm_para.h
@@ -0,0 +1,31 @@
+#ifndef __IA64_KVM_PARA_H
+#define __IA64_KVM_PARA_H
+
+/*
+ * Copyright (C) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#ifdef __KERNEL__
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+	return 0;
+}
+
+#endif
+
+#endif
diff --git a/arch/ia64/include/asm/libata-portmap.h b/arch/ia64/include/asm/libata-portmap.h
new file mode 100644
index 00000000..0e00c9a9
--- /dev/null
+++ b/arch/ia64/include/asm/libata-portmap.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_IA64_LIBATA_PORTMAP_H
+#define __ASM_IA64_LIBATA_PORTMAP_H
+
+#define ATA_PRIMARY_CMD		0x1F0
+#define ATA_PRIMARY_CTL		0x3F6
+#define ATA_PRIMARY_IRQ(dev)	isa_irq_to_vector(14)
+
+#define ATA_SECONDARY_CMD	0x170
+#define ATA_SECONDARY_CTL	0x376
+#define ATA_SECONDARY_IRQ(dev)	isa_irq_to_vector(15)
+
+#endif
diff --git a/arch/ia64/include/asm/linkage.h b/arch/ia64/include/asm/linkage.h
new file mode 100644
index 00000000..ef22a45c
--- /dev/null
+++ b/arch/ia64/include/asm/linkage.h
@@ -0,0 +1,14 @@
+#ifndef __ASM_LINKAGE_H
+#define __ASM_LINKAGE_H
+
+#ifndef __ASSEMBLY__
+
+#define asmlinkage CPP_ASMLINKAGE __attribute__((syscall_linkage))
+
+#else
+
+#include <asm/asmmacro.h>
+
+#endif
+
+#endif
diff --git a/arch/ia64/include/asm/local.h b/arch/ia64/include/asm/local.h
new file mode 100644
index 00000000..c11c530f
--- /dev/null
+++ b/arch/ia64/include/asm/local.h
@@ -0,0 +1 @@
+#include <asm-generic/local.h>
diff --git a/arch/ia64/include/asm/local64.h b/arch/ia64/include/asm/local64.h
new file mode 100644
index 00000000..36c93b5c
--- /dev/null
+++ b/arch/ia64/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h
new file mode 100644
index 00000000..367d299d
--- /dev/null
+++ b/arch/ia64/include/asm/machvec.h
@@ -0,0 +1,369 @@
+/*
+ * Machine vector for IA-64.
+ *
+ * Copyright (C) 1999 Silicon Graphics, Inc.
+ * Copyright (C) Srinivasa Thirumalachar <sprasad@engr.sgi.com>
+ * Copyright (C) Vijay Chander <vijay@engr.sgi.com>
+ * Copyright (C) 1999-2001, 2003-2004 Hewlett-Packard Co.
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#ifndef _ASM_IA64_MACHVEC_H
+#define _ASM_IA64_MACHVEC_H
+
+#include <linux/types.h>
+
+/* forward declarations: */
+struct device;
+struct pt_regs;
+struct scatterlist;
+struct page;
+struct mm_struct;
+struct pci_bus;
+struct task_struct;
+struct pci_dev;
+struct msi_desc;
+struct dma_attrs;
+
+typedef void ia64_mv_setup_t (char **);
+typedef void ia64_mv_cpu_init_t (void);
+typedef void ia64_mv_irq_init_t (void);
+typedef void ia64_mv_send_ipi_t (int, int, int, int);
+typedef void ia64_mv_timer_interrupt_t (int, void *);
+typedef void ia64_mv_global_tlb_purge_t (struct mm_struct *, unsigned long, unsigned long, unsigned long);
+typedef void ia64_mv_tlb_migrate_finish_t (struct mm_struct *);
+typedef u8 ia64_mv_irq_to_vector (int);
+typedef unsigned int ia64_mv_local_vector_to_irq (u8);
+typedef char *ia64_mv_pci_get_legacy_mem_t (struct pci_bus *);
+typedef int ia64_mv_pci_legacy_read_t (struct pci_bus *, u16 port, u32 *val,
+				       u8 size);
+typedef int ia64_mv_pci_legacy_write_t (struct pci_bus *, u16 port, u32 val,
+					u8 size);
+typedef void ia64_mv_migrate_t(struct task_struct * task);
+typedef void ia64_mv_pci_fixup_bus_t (struct pci_bus *);
+typedef void ia64_mv_kernel_launch_event_t(void);
+
+/* DMA-mapping interface: */
+typedef void ia64_mv_dma_init (void);
+typedef u64 ia64_mv_dma_get_required_mask (struct device *);
+typedef struct dma_map_ops *ia64_mv_dma_get_ops(struct device *);
+
+/*
+ * WARNING: The legacy I/O space is _architected_.  Platforms are
+ * expected to follow this architected model (see Section 10.7 in the
+ * IA-64 Architecture Software Developer's Manual).  Unfortunately,
+ * some broken machines do not follow that model, which is why we have
+ * to make the inX/outX operations part of the machine vector.
+ * Platform designers should follow the architected model whenever
+ * possible.
+ */
+typedef unsigned int ia64_mv_inb_t (unsigned long);
+typedef unsigned int ia64_mv_inw_t (unsigned long);
+typedef unsigned int ia64_mv_inl_t (unsigned long);
+typedef void ia64_mv_outb_t (unsigned char, unsigned long);
+typedef void ia64_mv_outw_t (unsigned short, unsigned long);
+typedef void ia64_mv_outl_t (unsigned int, unsigned long);
+typedef void ia64_mv_mmiowb_t (void);
+typedef unsigned char ia64_mv_readb_t (const volatile void __iomem *);
+typedef unsigned short ia64_mv_readw_t (const volatile void __iomem *);
+typedef unsigned int ia64_mv_readl_t (const volatile void __iomem *);
+typedef unsigned long ia64_mv_readq_t (const volatile void __iomem *);
+typedef unsigned char ia64_mv_readb_relaxed_t (const volatile void __iomem *);
+typedef unsigned short ia64_mv_readw_relaxed_t (const volatile void __iomem *);
+typedef unsigned int ia64_mv_readl_relaxed_t (const volatile void __iomem *);
+typedef unsigned long ia64_mv_readq_relaxed_t (const volatile void __iomem *);
+
+typedef int ia64_mv_setup_msi_irq_t (struct pci_dev *pdev, struct msi_desc *);
+typedef void ia64_mv_teardown_msi_irq_t (unsigned int irq);
+
+static inline void
+machvec_noop (void)
+{
+}
+
+static inline void
+machvec_noop_mm (struct mm_struct *mm)
+{
+}
+
+static inline void
+machvec_noop_task (struct task_struct *task)
+{
+}
+
+static inline void
+machvec_noop_bus (struct pci_bus *bus)
+{
+}
+
+extern void machvec_setup (char **);
+extern void machvec_timer_interrupt (int, void *);
+extern void machvec_tlb_migrate_finish (struct mm_struct *);
+
+# if defined (CONFIG_IA64_HP_SIM)
+#  include <asm/machvec_hpsim.h>
+# elif defined (CONFIG_IA64_DIG)
+#  include <asm/machvec_dig.h>
+# elif defined(CONFIG_IA64_DIG_VTD)
+#  include <asm/machvec_dig_vtd.h>
+# elif defined (CONFIG_IA64_HP_ZX1)
+#  include <asm/machvec_hpzx1.h>
+# elif defined (CONFIG_IA64_HP_ZX1_SWIOTLB)
+#  include <asm/machvec_hpzx1_swiotlb.h>
+# elif defined (CONFIG_IA64_SGI_SN2)
+#  include <asm/machvec_sn2.h>
+# elif defined (CONFIG_IA64_SGI_UV)
+#  include <asm/machvec_uv.h>
+# elif defined (CONFIG_IA64_XEN_GUEST)
+#  include <asm/machvec_xen.h>
+# elif defined (CONFIG_IA64_GENERIC)
+
+# ifdef MACHVEC_PLATFORM_HEADER
+#  include MACHVEC_PLATFORM_HEADER
+# else
+#  define platform_name		ia64_mv.name
+#  define platform_setup	ia64_mv.setup
+#  define platform_cpu_init	ia64_mv.cpu_init
+#  define platform_irq_init	ia64_mv.irq_init
+#  define platform_send_ipi	ia64_mv.send_ipi
+#  define platform_timer_interrupt	ia64_mv.timer_interrupt
+#  define platform_global_tlb_purge	ia64_mv.global_tlb_purge
+#  define platform_tlb_migrate_finish	ia64_mv.tlb_migrate_finish
+#  define platform_dma_init		ia64_mv.dma_init
+#  define platform_dma_get_required_mask ia64_mv.dma_get_required_mask
+#  define platform_dma_get_ops		ia64_mv.dma_get_ops
+#  define platform_irq_to_vector	ia64_mv.irq_to_vector
+#  define platform_local_vector_to_irq	ia64_mv.local_vector_to_irq
+#  define platform_pci_get_legacy_mem	ia64_mv.pci_get_legacy_mem
+#  define platform_pci_legacy_read	ia64_mv.pci_legacy_read
+#  define platform_pci_legacy_write	ia64_mv.pci_legacy_write
+#  define platform_inb		ia64_mv.inb
+#  define platform_inw		ia64_mv.inw
+#  define platform_inl		ia64_mv.inl
+#  define platform_outb		ia64_mv.outb
+#  define platform_outw		ia64_mv.outw
+#  define platform_outl		ia64_mv.outl
+#  define platform_mmiowb	ia64_mv.mmiowb
+#  define platform_readb        ia64_mv.readb
+#  define platform_readw        ia64_mv.readw
+#  define platform_readl        ia64_mv.readl
+#  define platform_readq        ia64_mv.readq
+#  define platform_readb_relaxed        ia64_mv.readb_relaxed
+#  define platform_readw_relaxed        ia64_mv.readw_relaxed
+#  define platform_readl_relaxed        ia64_mv.readl_relaxed
+#  define platform_readq_relaxed        ia64_mv.readq_relaxed
+#  define platform_migrate		ia64_mv.migrate
+#  define platform_setup_msi_irq	ia64_mv.setup_msi_irq
+#  define platform_teardown_msi_irq	ia64_mv.teardown_msi_irq
+#  define platform_pci_fixup_bus	ia64_mv.pci_fixup_bus
+#  define platform_kernel_launch_event	ia64_mv.kernel_launch_event
+# endif
+
+/* __attribute__((__aligned__(16))) is required to make size of the
+ * structure multiple of 16 bytes.
+ * This will fillup the holes created because of section 3.3.1 in
+ * Software Conventions guide.
+ */
+struct ia64_machine_vector {
+	const char *name;
+	ia64_mv_setup_t *setup;
+	ia64_mv_cpu_init_t *cpu_init;
+	ia64_mv_irq_init_t *irq_init;
+	ia64_mv_send_ipi_t *send_ipi;
+	ia64_mv_timer_interrupt_t *timer_interrupt;
+	ia64_mv_global_tlb_purge_t *global_tlb_purge;
+	ia64_mv_tlb_migrate_finish_t *tlb_migrate_finish;
+	ia64_mv_dma_init *dma_init;
+	ia64_mv_dma_get_required_mask *dma_get_required_mask;
+	ia64_mv_dma_get_ops *dma_get_ops;
+	ia64_mv_irq_to_vector *irq_to_vector;
+	ia64_mv_local_vector_to_irq *local_vector_to_irq;
+	ia64_mv_pci_get_legacy_mem_t *pci_get_legacy_mem;
+	ia64_mv_pci_legacy_read_t *pci_legacy_read;
+	ia64_mv_pci_legacy_write_t *pci_legacy_write;
+	ia64_mv_inb_t *inb;
+	ia64_mv_inw_t *inw;
+	ia64_mv_inl_t *inl;
+	ia64_mv_outb_t *outb;
+	ia64_mv_outw_t *outw;
+	ia64_mv_outl_t *outl;
+	ia64_mv_mmiowb_t *mmiowb;
+	ia64_mv_readb_t *readb;
+	ia64_mv_readw_t *readw;
+	ia64_mv_readl_t *readl;
+	ia64_mv_readq_t *readq;
+	ia64_mv_readb_relaxed_t *readb_relaxed;
+	ia64_mv_readw_relaxed_t *readw_relaxed;
+	ia64_mv_readl_relaxed_t *readl_relaxed;
+	ia64_mv_readq_relaxed_t *readq_relaxed;
+	ia64_mv_migrate_t *migrate;
+	ia64_mv_setup_msi_irq_t *setup_msi_irq;
+	ia64_mv_teardown_msi_irq_t *teardown_msi_irq;
+	ia64_mv_pci_fixup_bus_t *pci_fixup_bus;
+	ia64_mv_kernel_launch_event_t *kernel_launch_event;
+} __attribute__((__aligned__(16))); /* align attrib? see above comment */
+
+#define MACHVEC_INIT(name)			\
+{						\
+	#name,					\
+	platform_setup,				\
+	platform_cpu_init,			\
+	platform_irq_init,			\
+	platform_send_ipi,			\
+	platform_timer_interrupt,		\
+	platform_global_tlb_purge,		\
+	platform_tlb_migrate_finish,		\
+	platform_dma_init,			\
+	platform_dma_get_required_mask,		\
+	platform_dma_get_ops,			\
+	platform_irq_to_vector,			\
+	platform_local_vector_to_irq,		\
+	platform_pci_get_legacy_mem,		\
+	platform_pci_legacy_read,		\
+	platform_pci_legacy_write,		\
+	platform_inb,				\
+	platform_inw,				\
+	platform_inl,				\
+	platform_outb,				\
+	platform_outw,				\
+	platform_outl,				\
+	platform_mmiowb,			\
+	platform_readb,				\
+	platform_readw,				\
+	platform_readl,				\
+	platform_readq,				\
+	platform_readb_relaxed,			\
+	platform_readw_relaxed,			\
+	platform_readl_relaxed,			\
+	platform_readq_relaxed,			\
+	platform_migrate,			\
+	platform_setup_msi_irq,			\
+	platform_teardown_msi_irq,		\
+	platform_pci_fixup_bus,			\
+	platform_kernel_launch_event            \
+}
+
+extern struct ia64_machine_vector ia64_mv;
+extern void machvec_init (const char *name);
+extern void machvec_init_from_cmdline(const char *cmdline);
+
+# else
+#  error Unknown configuration.  Update arch/ia64/include/asm/machvec.h.
+# endif /* CONFIG_IA64_GENERIC */
+
+extern void swiotlb_dma_init(void);
+extern struct dma_map_ops *dma_get_ops(struct device *);
+
+/*
+ * Define default versions so we can extend machvec for new platforms without having
+ * to update the machvec files for all existing platforms.
+ */
+#ifndef platform_setup
+# define platform_setup			machvec_setup
+#endif
+#ifndef platform_cpu_init
+# define platform_cpu_init		machvec_noop
+#endif
+#ifndef platform_irq_init
+# define platform_irq_init		machvec_noop
+#endif
+
+#ifndef platform_send_ipi
+# define platform_send_ipi		ia64_send_ipi	/* default to architected version */
+#endif
+#ifndef platform_timer_interrupt
+# define platform_timer_interrupt 	machvec_timer_interrupt
+#endif
+#ifndef platform_global_tlb_purge
+# define platform_global_tlb_purge	ia64_global_tlb_purge /* default to architected version */
+#endif
+#ifndef platform_tlb_migrate_finish
+# define platform_tlb_migrate_finish	machvec_noop_mm
+#endif
+#ifndef platform_kernel_launch_event
+# define platform_kernel_launch_event	machvec_noop
+#endif
+#ifndef platform_dma_init
+# define platform_dma_init		swiotlb_dma_init
+#endif
+#ifndef platform_dma_get_ops
+# define platform_dma_get_ops		dma_get_ops
+#endif
+#ifndef platform_dma_get_required_mask
+# define  platform_dma_get_required_mask	ia64_dma_get_required_mask
+#endif
+#ifndef platform_irq_to_vector
+# define platform_irq_to_vector		__ia64_irq_to_vector
+#endif
+#ifndef platform_local_vector_to_irq
+# define platform_local_vector_to_irq	__ia64_local_vector_to_irq
+#endif
+#ifndef platform_pci_get_legacy_mem
+# define platform_pci_get_legacy_mem	ia64_pci_get_legacy_mem
+#endif
+#ifndef platform_pci_legacy_read
+# define platform_pci_legacy_read	ia64_pci_legacy_read
+extern int ia64_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size);
+#endif
+#ifndef platform_pci_legacy_write
+# define platform_pci_legacy_write	ia64_pci_legacy_write
+extern int ia64_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size);
+#endif
+#ifndef platform_inb
+# define platform_inb		__ia64_inb
+#endif
+#ifndef platform_inw
+# define platform_inw		__ia64_inw
+#endif
+#ifndef platform_inl
+# define platform_inl		__ia64_inl
+#endif
+#ifndef platform_outb
+# define platform_outb		__ia64_outb
+#endif
+#ifndef platform_outw
+# define platform_outw		__ia64_outw
+#endif
+#ifndef platform_outl
+# define platform_outl		__ia64_outl
+#endif
+#ifndef platform_mmiowb
+# define platform_mmiowb	__ia64_mmiowb
+#endif
+#ifndef platform_readb
+# define platform_readb		__ia64_readb
+#endif
+#ifndef platform_readw
+# define platform_readw		__ia64_readw
+#endif
+#ifndef platform_readl
+# define platform_readl		__ia64_readl
+#endif
+#ifndef platform_readq
+# define platform_readq		__ia64_readq
+#endif
+#ifndef platform_readb_relaxed
+# define platform_readb_relaxed	__ia64_readb_relaxed
+#endif
+#ifndef platform_readw_relaxed
+# define platform_readw_relaxed	__ia64_readw_relaxed
+#endif
+#ifndef platform_readl_relaxed
+# define platform_readl_relaxed	__ia64_readl_relaxed
+#endif
+#ifndef platform_readq_relaxed
+# define platform_readq_relaxed	__ia64_readq_relaxed
+#endif
+#ifndef platform_migrate
+# define platform_migrate machvec_noop_task
+#endif
+#ifndef platform_setup_msi_irq
+# define platform_setup_msi_irq		((ia64_mv_setup_msi_irq_t*)NULL)
+#endif
+#ifndef platform_teardown_msi_irq
+# define platform_teardown_msi_irq	((ia64_mv_teardown_msi_irq_t*)NULL)
+#endif
+#ifndef platform_pci_fixup_bus
+# define platform_pci_fixup_bus	machvec_noop_bus
+#endif
+
+#endif /* _ASM_IA64_MACHVEC_H */
diff --git a/arch/ia64/include/asm/machvec_dig.h b/arch/ia64/include/asm/machvec_dig.h
new file mode 100644
index 00000000..8a0752f4
--- /dev/null
+++ b/arch/ia64/include/asm/machvec_dig.h
@@ -0,0 +1,16 @@
+#ifndef _ASM_IA64_MACHVEC_DIG_h
+#define _ASM_IA64_MACHVEC_DIG_h
+
+extern ia64_mv_setup_t dig_setup;
+
+/*
+ * This stuff has dual use!
+ *
+ * For a generic kernel, the macros are used to initialize the
+ * platform's machvec structure.  When compiling a non-generic kernel,
+ * the macros are used directly.
+ */
+#define platform_name		"dig"
+#define platform_setup		dig_setup
+
+#endif /* _ASM_IA64_MACHVEC_DIG_h */
diff --git a/arch/ia64/include/asm/machvec_dig_vtd.h b/arch/ia64/include/asm/machvec_dig_vtd.h
new file mode 100644
index 00000000..6ab1de5c
--- /dev/null
+++ b/arch/ia64/include/asm/machvec_dig_vtd.h
@@ -0,0 +1,18 @@
+#ifndef _ASM_IA64_MACHVEC_DIG_VTD_h
+#define _ASM_IA64_MACHVEC_DIG_VTD_h
+
+extern ia64_mv_setup_t			dig_setup;
+extern ia64_mv_dma_init			pci_iommu_alloc;
+
+/*
+ * This stuff has dual use!
+ *
+ * For a generic kernel, the macros are used to initialize the
+ * platform's machvec structure.  When compiling a non-generic kernel,
+ * the macros are used directly.
+ */
+#define platform_name				"dig_vtd"
+#define platform_setup				dig_setup
+#define platform_dma_init			pci_iommu_alloc
+
+#endif /* _ASM_IA64_MACHVEC_DIG_VTD_h */
diff --git a/arch/ia64/include/asm/machvec_hpsim.h b/arch/ia64/include/asm/machvec_hpsim.h
new file mode 100644
index 00000000..cf72fc87
--- /dev/null
+++ b/arch/ia64/include/asm/machvec_hpsim.h
@@ -0,0 +1,18 @@
+#ifndef _ASM_IA64_MACHVEC_HPSIM_h
+#define _ASM_IA64_MACHVEC_HPSIM_h
+
+extern ia64_mv_setup_t hpsim_setup;
+extern ia64_mv_irq_init_t hpsim_irq_init;
+
+/*
+ * This stuff has dual use!
+ *
+ * For a generic kernel, the macros are used to initialize the
+ * platform's machvec structure.  When compiling a non-generic kernel,
+ * the macros are used directly.
+ */
+#define platform_name		"hpsim"
+#define platform_setup		hpsim_setup
+#define platform_irq_init	hpsim_irq_init
+
+#endif /* _ASM_IA64_MACHVEC_HPSIM_h */
diff --git a/arch/ia64/include/asm/machvec_hpzx1.h b/arch/ia64/include/asm/machvec_hpzx1.h
new file mode 100644
index 00000000..3bd83d78
--- /dev/null
+++ b/arch/ia64/include/asm/machvec_hpzx1.h
@@ -0,0 +1,18 @@
+#ifndef _ASM_IA64_MACHVEC_HPZX1_h
+#define _ASM_IA64_MACHVEC_HPZX1_h
+
+extern ia64_mv_setup_t			dig_setup;
+extern ia64_mv_dma_init			sba_dma_init;
+
+/*
+ * This stuff has dual use!
+ *
+ * For a generic kernel, the macros are used to initialize the
+ * platform's machvec structure.  When compiling a non-generic kernel,
+ * the macros are used directly.
+ */
+#define platform_name				"hpzx1"
+#define platform_setup				dig_setup
+#define platform_dma_init			sba_dma_init
+
+#endif /* _ASM_IA64_MACHVEC_HPZX1_h */
diff --git a/arch/ia64/include/asm/machvec_hpzx1_swiotlb.h b/arch/ia64/include/asm/machvec_hpzx1_swiotlb.h
new file mode 100644
index 00000000..1091ac39
--- /dev/null
+++ b/arch/ia64/include/asm/machvec_hpzx1_swiotlb.h
@@ -0,0 +1,19 @@
+#ifndef _ASM_IA64_MACHVEC_HPZX1_SWIOTLB_h
+#define _ASM_IA64_MACHVEC_HPZX1_SWIOTLB_h
+
+extern ia64_mv_setup_t				dig_setup;
+extern ia64_mv_dma_get_ops			hwsw_dma_get_ops;
+
+/*
+ * This stuff has dual use!
+ *
+ * For a generic kernel, the macros are used to initialize the
+ * platform's machvec structure.  When compiling a non-generic kernel,
+ * the macros are used directly.
+ */
+#define platform_name				"hpzx1_swiotlb"
+#define platform_setup				dig_setup
+#define platform_dma_init			machvec_noop
+#define platform_dma_get_ops			hwsw_dma_get_ops
+
+#endif /* _ASM_IA64_MACHVEC_HPZX1_SWIOTLB_h */
diff --git a/arch/ia64/include/asm/machvec_init.h b/arch/ia64/include/asm/machvec_init.h
new file mode 100644
index 00000000..37a46984
--- /dev/null
+++ b/arch/ia64/include/asm/machvec_init.h
@@ -0,0 +1,35 @@
+#include <asm/iommu.h>
+#include <asm/machvec.h>
+
+extern ia64_mv_send_ipi_t ia64_send_ipi;
+extern ia64_mv_global_tlb_purge_t ia64_global_tlb_purge;
+extern ia64_mv_dma_get_required_mask ia64_dma_get_required_mask;
+extern ia64_mv_irq_to_vector __ia64_irq_to_vector;
+extern ia64_mv_local_vector_to_irq __ia64_local_vector_to_irq;
+extern ia64_mv_pci_get_legacy_mem_t ia64_pci_get_legacy_mem;
+extern ia64_mv_pci_legacy_read_t ia64_pci_legacy_read;
+extern ia64_mv_pci_legacy_write_t ia64_pci_legacy_write;
+
+extern ia64_mv_inb_t __ia64_inb;
+extern ia64_mv_inw_t __ia64_inw;
+extern ia64_mv_inl_t __ia64_inl;
+extern ia64_mv_outb_t __ia64_outb;
+extern ia64_mv_outw_t __ia64_outw;
+extern ia64_mv_outl_t __ia64_outl;
+extern ia64_mv_mmiowb_t __ia64_mmiowb;
+extern ia64_mv_readb_t __ia64_readb;
+extern ia64_mv_readw_t __ia64_readw;
+extern ia64_mv_readl_t __ia64_readl;
+extern ia64_mv_readq_t __ia64_readq;
+extern ia64_mv_readb_t __ia64_readb_relaxed;
+extern ia64_mv_readw_t __ia64_readw_relaxed;
+extern ia64_mv_readl_t __ia64_readl_relaxed;
+extern ia64_mv_readq_t __ia64_readq_relaxed;
+
+#define MACHVEC_HELPER(name)									\
+ struct ia64_machine_vector machvec_##name __attribute__ ((unused, __section__ (".machvec")))	\
+	= MACHVEC_INIT(name);
+
+#define MACHVEC_DEFINE(name)	MACHVEC_HELPER(name)
+
+MACHVEC_DEFINE(MACHVEC_PLATFORM_NAME)
diff --git a/arch/ia64/include/asm/machvec_sn2.h b/arch/ia64/include/asm/machvec_sn2.h
new file mode 100644
index 00000000..f061a30a
--- /dev/null
+++ b/arch/ia64/include/asm/machvec_sn2.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2002-2003,2006 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it 
+ * under the terms of version 2 of the GNU General Public License 
+ * as published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
+ * 
+ * Further, this software is distributed without any warranty that it is 
+ * free of the rightful claim of any third person regarding infringement 
+ * or the like.  Any license provided herein, whether implied or 
+ * otherwise, applies only to this software file.  Patent licenses, if 
+ * any, provided herein do not apply to combinations of this program with 
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public 
+ * License along with this program; if not, write the Free Software 
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/NoticeExplan
+ */
+
+#ifndef _ASM_IA64_MACHVEC_SN2_H
+#define _ASM_IA64_MACHVEC_SN2_H
+
+extern ia64_mv_setup_t sn_setup;
+extern ia64_mv_cpu_init_t sn_cpu_init;
+extern ia64_mv_irq_init_t sn_irq_init;
+extern ia64_mv_send_ipi_t sn2_send_IPI;
+extern ia64_mv_timer_interrupt_t sn_timer_interrupt;
+extern ia64_mv_global_tlb_purge_t sn2_global_tlb_purge;
+extern ia64_mv_tlb_migrate_finish_t	sn_tlb_migrate_finish;
+extern ia64_mv_irq_to_vector sn_irq_to_vector;
+extern ia64_mv_local_vector_to_irq sn_local_vector_to_irq;
+extern ia64_mv_pci_get_legacy_mem_t sn_pci_get_legacy_mem;
+extern ia64_mv_pci_legacy_read_t sn_pci_legacy_read;
+extern ia64_mv_pci_legacy_write_t sn_pci_legacy_write;
+extern ia64_mv_inb_t __sn_inb;
+extern ia64_mv_inw_t __sn_inw;
+extern ia64_mv_inl_t __sn_inl;
+extern ia64_mv_outb_t __sn_outb;
+extern ia64_mv_outw_t __sn_outw;
+extern ia64_mv_outl_t __sn_outl;
+extern ia64_mv_mmiowb_t __sn_mmiowb;
+extern ia64_mv_readb_t __sn_readb;
+extern ia64_mv_readw_t __sn_readw;
+extern ia64_mv_readl_t __sn_readl;
+extern ia64_mv_readq_t __sn_readq;
+extern ia64_mv_readb_t __sn_readb_relaxed;
+extern ia64_mv_readw_t __sn_readw_relaxed;
+extern ia64_mv_readl_t __sn_readl_relaxed;
+extern ia64_mv_readq_t __sn_readq_relaxed;
+extern ia64_mv_dma_get_required_mask	sn_dma_get_required_mask;
+extern ia64_mv_dma_init			sn_dma_init;
+extern ia64_mv_migrate_t		sn_migrate;
+extern ia64_mv_kernel_launch_event_t	sn_kernel_launch_event;
+extern ia64_mv_setup_msi_irq_t		sn_setup_msi_irq;
+extern ia64_mv_teardown_msi_irq_t	sn_teardown_msi_irq;
+extern ia64_mv_pci_fixup_bus_t		sn_pci_fixup_bus;
+
+
+/*
+ * This stuff has dual use!
+ *
+ * For a generic kernel, the macros are used to initialize the
+ * platform's machvec structure.  When compiling a non-generic kernel,
+ * the macros are used directly.
+ */
+#define platform_name			"sn2"
+#define platform_setup			sn_setup
+#define platform_cpu_init		sn_cpu_init
+#define platform_irq_init		sn_irq_init
+#define platform_send_ipi		sn2_send_IPI
+#define platform_timer_interrupt	sn_timer_interrupt
+#define platform_global_tlb_purge       sn2_global_tlb_purge
+#define platform_tlb_migrate_finish	sn_tlb_migrate_finish
+#define platform_pci_fixup		sn_pci_fixup
+#define platform_inb			__sn_inb
+#define platform_inw			__sn_inw
+#define platform_inl			__sn_inl
+#define platform_outb			__sn_outb
+#define platform_outw			__sn_outw
+#define platform_outl			__sn_outl
+#define platform_mmiowb			__sn_mmiowb
+#define platform_readb			__sn_readb
+#define platform_readw			__sn_readw
+#define platform_readl			__sn_readl
+#define platform_readq			__sn_readq
+#define platform_readb_relaxed		__sn_readb_relaxed
+#define platform_readw_relaxed		__sn_readw_relaxed
+#define platform_readl_relaxed		__sn_readl_relaxed
+#define platform_readq_relaxed		__sn_readq_relaxed
+#define platform_irq_to_vector		sn_irq_to_vector
+#define platform_local_vector_to_irq	sn_local_vector_to_irq
+#define platform_pci_get_legacy_mem	sn_pci_get_legacy_mem
+#define platform_pci_legacy_read	sn_pci_legacy_read
+#define platform_pci_legacy_write	sn_pci_legacy_write
+#define platform_dma_get_required_mask	sn_dma_get_required_mask
+#define platform_dma_init		sn_dma_init
+#define platform_migrate		sn_migrate
+#define platform_kernel_launch_event    sn_kernel_launch_event
+#ifdef CONFIG_PCI_MSI
+#define platform_setup_msi_irq		sn_setup_msi_irq
+#define platform_teardown_msi_irq	sn_teardown_msi_irq
+#else
+#define platform_setup_msi_irq		((ia64_mv_setup_msi_irq_t*)NULL)
+#define platform_teardown_msi_irq	((ia64_mv_teardown_msi_irq_t*)NULL)
+#endif
+#define platform_pci_fixup_bus		sn_pci_fixup_bus
+
+#include <asm/sn/io.h>
+
+#endif /* _ASM_IA64_MACHVEC_SN2_H */
diff --git a/arch/ia64/include/asm/machvec_uv.h b/arch/ia64/include/asm/machvec_uv.h
new file mode 100644
index 00000000..2931447f
--- /dev/null
+++ b/arch/ia64/include/asm/machvec_uv.h
@@ -0,0 +1,26 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV Core Functions
+ *
+ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_MACHVEC_UV_H
+#define _ASM_IA64_MACHVEC_UV_H
+
+extern ia64_mv_setup_t uv_setup;
+
+/*
+ * This stuff has dual use!
+ *
+ * For a generic kernel, the macros are used to initialize the
+ * platform's machvec structure.  When compiling a non-generic kernel,
+ * the macros are used directly.
+ */
+#define platform_name			"uv"
+#define platform_setup			uv_setup
+
+#endif /* _ASM_IA64_MACHVEC_UV_H */
diff --git a/arch/ia64/include/asm/machvec_xen.h b/arch/ia64/include/asm/machvec_xen.h
new file mode 100644
index 00000000..55f92280
--- /dev/null
+++ b/arch/ia64/include/asm/machvec_xen.h
@@ -0,0 +1,22 @@
+#ifndef _ASM_IA64_MACHVEC_XEN_h
+#define _ASM_IA64_MACHVEC_XEN_h
+
+extern ia64_mv_setup_t			dig_setup;
+extern ia64_mv_cpu_init_t		xen_cpu_init;
+extern ia64_mv_irq_init_t		xen_irq_init;
+extern ia64_mv_send_ipi_t		xen_platform_send_ipi;
+
+/*
+ * This stuff has dual use!
+ *
+ * For a generic kernel, the macros are used to initialize the
+ * platform's machvec structure.  When compiling a non-generic kernel,
+ * the macros are used directly.
+ */
+#define platform_name				"xen"
+#define platform_setup				dig_setup
+#define platform_cpu_init			xen_cpu_init
+#define platform_irq_init			xen_irq_init
+#define platform_send_ipi			xen_platform_send_ipi
+
+#endif /* _ASM_IA64_MACHVEC_XEN_h */
diff --git a/arch/ia64/include/asm/mc146818rtc.h b/arch/ia64/include/asm/mc146818rtc.h
new file mode 100644
index 00000000..407787a2
--- /dev/null
+++ b/arch/ia64/include/asm/mc146818rtc.h
@@ -0,0 +1,10 @@
+#ifndef _ASM_IA64_MC146818RTC_H
+#define _ASM_IA64_MC146818RTC_H
+
+/*
+ * Machine dependent access functions for RTC registers.
+ */
+
+/* empty include file to satisfy the include in genrtc.c */
+
+#endif /* _ASM_IA64_MC146818RTC_H */
diff --git a/arch/ia64/include/asm/mca.h b/arch/ia64/include/asm/mca.h
new file mode 100644
index 00000000..43f96ab1
--- /dev/null
+++ b/arch/ia64/include/asm/mca.h
@@ -0,0 +1,186 @@
+/*
+ * File:	mca.h
+ * Purpose:	Machine check handling specific defines
+ *
+ * Copyright (C) 1999, 2004 Silicon Graphics, Inc.
+ * Copyright (C) Vijay Chander <vijay@engr.sgi.com>
+ * Copyright (C) Srinivasa Thirumalachar <sprasad@engr.sgi.com>
+ * Copyright (C) Russ Anderson <rja@sgi.com>
+ */
+
+#ifndef _ASM_IA64_MCA_H
+#define _ASM_IA64_MCA_H
+
+#if !defined(__ASSEMBLY__)
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+
+#include <asm/param.h>
+#include <asm/sal.h>
+#include <asm/processor.h>
+#include <asm/mca_asm.h>
+
+#define IA64_MCA_RENDEZ_TIMEOUT		(20 * 1000)	/* value in milliseconds - 20 seconds */
+
+typedef struct ia64_fptr {
+	unsigned long fp;
+	unsigned long gp;
+} ia64_fptr_t;
+
+typedef union cmcv_reg_u {
+	u64	cmcv_regval;
+	struct	{
+		u64	cmcr_vector		: 8;
+		u64	cmcr_reserved1		: 4;
+		u64	cmcr_ignored1		: 1;
+		u64	cmcr_reserved2		: 3;
+		u64	cmcr_mask		: 1;
+		u64	cmcr_ignored2		: 47;
+	} cmcv_reg_s;
+
+} cmcv_reg_t;
+
+#define cmcv_mask		cmcv_reg_s.cmcr_mask
+#define cmcv_vector		cmcv_reg_s.cmcr_vector
+
+enum {
+	IA64_MCA_RENDEZ_CHECKIN_NOTDONE	=	0x0,
+	IA64_MCA_RENDEZ_CHECKIN_DONE	=	0x1,
+	IA64_MCA_RENDEZ_CHECKIN_INIT	=	0x2,
+	IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA	=	0x3,
+};
+
+/* Information maintained by the MC infrastructure */
+typedef struct ia64_mc_info_s {
+	u64		imi_mca_handler;
+	size_t		imi_mca_handler_size;
+	u64		imi_monarch_init_handler;
+	size_t		imi_monarch_init_handler_size;
+	u64		imi_slave_init_handler;
+	size_t		imi_slave_init_handler_size;
+	u8		imi_rendez_checkin[NR_CPUS];
+
+} ia64_mc_info_t;
+
+/* Handover state from SAL to OS and vice versa, for both MCA and INIT events.
+ * Besides the handover state, it also contains some saved registers from the
+ * time of the event.
+ * Note: mca_asm.S depends on the precise layout of this structure.
+ */
+
+struct ia64_sal_os_state {
+
+	/* SAL to OS */
+	unsigned long		os_gp;			/* GP of the os registered with the SAL, physical */
+	unsigned long		pal_proc;		/* PAL_PROC entry point, physical */
+	unsigned long		sal_proc;		/* SAL_PROC entry point, physical */
+	unsigned long		rv_rc;			/* MCA - Rendezvous state, INIT - reason code */
+	unsigned long		proc_state_param;	/* from R18 */
+	unsigned long		monarch;		/* 1 for a monarch event, 0 for a slave */
+
+	/* common */
+	unsigned long		sal_ra;			/* Return address in SAL, physical */
+	unsigned long		sal_gp;			/* GP of the SAL - physical */
+	pal_min_state_area_t	*pal_min_state;		/* from R17.  physical in asm, virtual in C */
+	/* Previous values of IA64_KR(CURRENT) and IA64_KR(CURRENT_STACK).
+	 * Note: if the MCA/INIT recovery code wants to resume to a new context
+	 * then it must change these values to reflect the new kernel stack.
+	 */
+	unsigned long		prev_IA64_KR_CURRENT;	/* previous value of IA64_KR(CURRENT) */
+	unsigned long		prev_IA64_KR_CURRENT_STACK;
+	struct task_struct	*prev_task;		/* previous task, NULL if it is not useful */
+	/* Some interrupt registers are not saved in minstate, pt_regs or
+	 * switch_stack.  Because MCA/INIT can occur when interrupts are
+	 * disabled, we need to save the additional interrupt registers over
+	 * MCA/INIT and resume.
+	 */
+	unsigned long		isr;
+	unsigned long		ifa;
+	unsigned long		itir;
+	unsigned long		iipa;
+	unsigned long		iim;
+	unsigned long		iha;
+
+	/* OS to SAL */
+	unsigned long		os_status;		/* OS status to SAL, enum below */
+	unsigned long		context;		/* 0 if return to same context
+							   1 if return to new context */
+
+	/* I-resources */
+	unsigned long		iip;
+	unsigned long		ipsr;
+	unsigned long		ifs;
+};
+
+enum {
+	IA64_MCA_CORRECTED	=	0x0,	/* Error has been corrected by OS_MCA */
+	IA64_MCA_WARM_BOOT	=	-1,	/* Warm boot of the system need from SAL */
+	IA64_MCA_COLD_BOOT	=	-2,	/* Cold boot of the system need from SAL */
+	IA64_MCA_HALT		=	-3	/* System to be halted by SAL */
+};
+
+enum {
+	IA64_INIT_RESUME	=	0x0,	/* Resume after return from INIT */
+	IA64_INIT_WARM_BOOT	=	-1,	/* Warm boot of the system need from SAL */
+};
+
+enum {
+	IA64_MCA_SAME_CONTEXT	=	0x0,	/* SAL to return to same context */
+	IA64_MCA_NEW_CONTEXT	=	-1	/* SAL to return to new context */
+};
+
+/* Per-CPU MCA state that is too big for normal per-CPU variables.  */
+
+struct ia64_mca_cpu {
+	u64 mca_stack[KERNEL_STACK_SIZE/8];
+	u64 init_stack[KERNEL_STACK_SIZE/8];
+};
+
+/* Array of physical addresses of each CPU's MCA area.  */
+extern unsigned long __per_cpu_mca[NR_CPUS];
+
+extern int cpe_vector;
+extern int ia64_cpe_irq;
+extern void ia64_mca_init(void);
+extern void ia64_mca_cpu_init(void *);
+extern void ia64_os_mca_dispatch(void);
+extern void ia64_os_mca_dispatch_end(void);
+extern void ia64_mca_ucmc_handler(struct pt_regs *, struct ia64_sal_os_state *);
+extern void ia64_init_handler(struct pt_regs *,
+			      struct switch_stack *,
+			      struct ia64_sal_os_state *);
+extern void ia64_os_init_on_kdump(void);
+extern void ia64_monarch_init_handler(void);
+extern void ia64_slave_init_handler(void);
+extern void ia64_mca_cmc_vector_setup(void);
+extern int  ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *));
+extern void ia64_unreg_MCA_extension(void);
+extern unsigned long ia64_get_rnat(unsigned long *);
+extern void ia64_set_psr_mc(void);
+extern void ia64_mca_printk(const char * fmt, ...)
+	 __attribute__ ((format (printf, 1, 2)));
+
+struct ia64_mca_notify_die {
+	struct ia64_sal_os_state *sos;
+	int *monarch_cpu;
+	int *data;
+};
+
+DECLARE_PER_CPU(u64, ia64_mca_pal_base);
+
+#else	/* __ASSEMBLY__ */
+
+#define IA64_MCA_CORRECTED	0x0	/* Error has been corrected by OS_MCA */
+#define IA64_MCA_WARM_BOOT	-1	/* Warm boot of the system need from SAL */
+#define IA64_MCA_COLD_BOOT	-2	/* Cold boot of the system need from SAL */
+#define IA64_MCA_HALT		-3	/* System to be halted by SAL */
+
+#define IA64_INIT_RESUME	0x0	/* Resume after return from INIT */
+#define IA64_INIT_WARM_BOOT	-1	/* Warm boot of the system need from SAL */
+
+#define IA64_MCA_SAME_CONTEXT	0x0	/* SAL to return to same context */
+#define IA64_MCA_NEW_CONTEXT	-1	/* SAL to return to new context */
+
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_IA64_MCA_H */
diff --git a/arch/ia64/include/asm/mca_asm.h b/arch/ia64/include/asm/mca_asm.h
new file mode 100644
index 00000000..dd2a5b13
--- /dev/null
+++ b/arch/ia64/include/asm/mca_asm.h
@@ -0,0 +1,242 @@
+/*
+ * File:	mca_asm.h
+ * Purpose:	Machine check handling specific defines
+ *
+ * Copyright (C) 1999 Silicon Graphics, Inc.
+ * Copyright (C) Vijay Chander <vijay@engr.sgi.com>
+ * Copyright (C) Srinivasa Thirumalachar <sprasad@engr.sgi.com>
+ * Copyright (C) 2000 Hewlett-Packard Co.
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2002 Intel Corp.
+ * Copyright (C) 2002 Jenna Hall <jenna.s.hall@intel.com>
+ * Copyright (C) 2005 Silicon Graphics, Inc
+ * Copyright (C) 2005 Keith Owens <kaos@sgi.com>
+ */
+#ifndef _ASM_IA64_MCA_ASM_H
+#define _ASM_IA64_MCA_ASM_H
+
+#define PSR_IC		13
+#define PSR_I		14
+#define	PSR_DT		17
+#define PSR_RT		27
+#define PSR_MC		35
+#define PSR_IT		36
+#define PSR_BN		44
+
+/*
+ * This macro converts a instruction virtual address to a physical address
+ * Right now for simulation purposes the virtual addresses are
+ * direct mapped to physical addresses.
+ *	1. Lop off bits 61 thru 63 in the virtual address
+ */
+#define INST_VA_TO_PA(addr)							\
+	dep	addr	= 0, addr, 61, 3
+/*
+ * This macro converts a data virtual address to a physical address
+ * Right now for simulation purposes the virtual addresses are
+ * direct mapped to physical addresses.
+ *	1. Lop off bits 61 thru 63 in the virtual address
+ */
+#define DATA_VA_TO_PA(addr)							\
+	tpa	addr	= addr
+/*
+ * This macro converts a data physical address to a virtual address
+ * Right now for simulation purposes the virtual addresses are
+ * direct mapped to physical addresses.
+ *	1. Put 0x7 in bits 61 thru 63.
+ */
+#define DATA_PA_TO_VA(addr,temp)							\
+	mov	temp	= 0x7	;;							\
+	dep	addr	= temp, addr, 61, 3
+
+#define GET_THIS_PADDR(reg, var)		\
+	mov	reg = IA64_KR(PER_CPU_DATA);;	\
+        addl	reg = THIS_CPU(var), reg
+
+/*
+ * This macro jumps to the instruction at the given virtual address
+ * and starts execution in physical mode with all the address
+ * translations turned off.
+ *	1.	Save the current psr
+ *	2.	Make sure that all the upper 32 bits are off
+ *
+ *	3.	Clear the interrupt enable and interrupt state collection bits
+ *		in the psr before updating the ipsr and iip.
+ *
+ *	4.	Turn off the instruction, data and rse translation bits of the psr
+ *		and store the new value into ipsr
+ *		Also make sure that the interrupts are disabled.
+ *		Ensure that we are in little endian mode.
+ *		[psr.{rt, it, dt, i, be} = 0]
+ *
+ *	5.	Get the physical address corresponding to the virtual address
+ *		of the next instruction bundle and put it in iip.
+ *		(Using magic numbers 24 and 40 in the deposint instruction since
+ *		 the IA64_SDK code directly maps to lower 24bits as physical address
+ *		 from a virtual address).
+ *
+ *	6.	Do an rfi to move the values from ipsr to psr and iip to ip.
+ */
+#define  PHYSICAL_MODE_ENTER(temp1, temp2, start_addr, old_psr)				\
+	mov	old_psr = psr;								\
+	;;										\
+	dep	old_psr = 0, old_psr, 32, 32;						\
+											\
+	mov	ar.rsc = 0 ;								\
+	;;										\
+	srlz.d;										\
+	mov	temp2 = ar.bspstore;							\
+	;;										\
+	DATA_VA_TO_PA(temp2);								\
+	;;										\
+	mov	temp1 = ar.rnat;							\
+	;;										\
+	mov	ar.bspstore = temp2;							\
+	;;										\
+	mov	ar.rnat = temp1;							\
+	mov	temp1 = psr;								\
+	mov	temp2 = psr;								\
+	;;										\
+											\
+	dep	temp2 = 0, temp2, PSR_IC, 2;						\
+	;;										\
+	mov	psr.l = temp2;								\
+	;;										\
+	srlz.d;										\
+	dep	temp1 = 0, temp1, 32, 32;						\
+	;;										\
+	dep	temp1 = 0, temp1, PSR_IT, 1;						\
+	;;										\
+	dep	temp1 = 0, temp1, PSR_DT, 1;						\
+	;;										\
+	dep	temp1 = 0, temp1, PSR_RT, 1;						\
+	;;										\
+	dep	temp1 = 0, temp1, PSR_I, 1;						\
+	;;										\
+	dep	temp1 = 0, temp1, PSR_IC, 1;						\
+	;;										\
+	dep	temp1 = -1, temp1, PSR_MC, 1;						\
+	;;										\
+	mov	cr.ipsr = temp1;							\
+	;;										\
+	LOAD_PHYSICAL(p0, temp2, start_addr);						\
+	;;										\
+	mov	cr.iip = temp2;								\
+	mov	cr.ifs = r0;								\
+	DATA_VA_TO_PA(sp);								\
+	DATA_VA_TO_PA(gp);								\
+	;;										\
+	srlz.i;										\
+	;;										\
+	nop	1;									\
+	nop	2;									\
+	nop	1;									\
+	nop	2;									\
+	rfi;										\
+	;;
+
+/*
+ * This macro jumps to the instruction at the given virtual address
+ * and starts execution in virtual mode with all the address
+ * translations turned on.
+ *	1.	Get the old saved psr
+ *
+ *	2.	Clear the interrupt state collection bit in the current psr.
+ *
+ *	3.	Set the instruction translation bit back in the old psr
+ *		Note we have to do this since we are right now saving only the
+ *		lower 32-bits of old psr.(Also the old psr has the data and
+ *		rse translation bits on)
+ *
+ *	4.	Set ipsr to this old_psr with "it" bit set and "bn" = 1.
+ *
+ *	5.	Reset the current thread pointer (r13).
+ *
+ *	6.	Set iip to the virtual address of the next instruction bundle.
+ *
+ *	7.	Do an rfi to move ipsr to psr and iip to ip.
+ */
+
+#define VIRTUAL_MODE_ENTER(temp1, temp2, start_addr, old_psr)	\
+	mov	temp2 = psr;					\
+	;;							\
+	mov	old_psr = temp2;				\
+	;;							\
+	dep	temp2 = 0, temp2, PSR_IC, 2;			\
+	;;							\
+	mov	psr.l = temp2;					\
+	mov	ar.rsc = 0;					\
+	;;							\
+	srlz.d;							\
+	mov	r13 = ar.k6;					\
+	mov	temp2 = ar.bspstore;				\
+	;;							\
+	DATA_PA_TO_VA(temp2,temp1);				\
+	;;							\
+	mov	temp1 = ar.rnat;				\
+	;;							\
+	mov	ar.bspstore = temp2;				\
+	;;							\
+	mov	ar.rnat = temp1;				\
+	;;							\
+	mov	temp1 = old_psr;				\
+	;;							\
+	mov	temp2 = 1;					\
+	;;							\
+	dep	temp1 = temp2, temp1, PSR_IC, 1;		\
+	;;							\
+	dep	temp1 = temp2, temp1, PSR_IT, 1;		\
+	;;							\
+	dep	temp1 = temp2, temp1, PSR_DT, 1;		\
+	;;							\
+	dep	temp1 = temp2, temp1, PSR_RT, 1;		\
+	;;							\
+	dep	temp1 = temp2, temp1, PSR_BN, 1;		\
+	;;							\
+								\
+	mov     cr.ipsr = temp1;				\
+	movl	temp2 = start_addr;				\
+	;;							\
+	mov	cr.iip = temp2;					\
+	movl	gp = __gp					\
+	;;							\
+	DATA_PA_TO_VA(sp, temp1);				\
+	srlz.i;							\
+	;;							\
+	nop	1;						\
+	nop	2;						\
+	nop	1;						\
+	rfi							\
+	;;
+
+/*
+ * The MCA and INIT stacks in struct ia64_mca_cpu look like normal kernel
+ * stacks, except that the SAL/OS state and a switch_stack are stored near the
+ * top of the MCA/INIT stack.  To support concurrent entry to MCA or INIT, as
+ * well as MCA over INIT, each event needs its own SAL/OS state.  All entries
+ * are 16 byte aligned.
+ *
+ *      +---------------------------+
+ *      |          pt_regs          |
+ *      +---------------------------+
+ *      |        switch_stack       |
+ *      +---------------------------+
+ *      |        SAL/OS state       |
+ *      +---------------------------+
+ *      |    16 byte scratch area   |
+ *      +---------------------------+ <-------- SP at start of C MCA handler
+ *      |           .....           |
+ *      +---------------------------+
+ *      | RBS for MCA/INIT handler  |
+ *      +---------------------------+
+ *      | struct task for MCA/INIT  |
+ *      +---------------------------+ <-------- Bottom of MCA/INIT stack
+ */
+
+#define ALIGN16(x)			((x)&~15)
+#define MCA_PT_REGS_OFFSET		ALIGN16(KERNEL_STACK_SIZE-IA64_PT_REGS_SIZE)
+#define MCA_SWITCH_STACK_OFFSET		ALIGN16(MCA_PT_REGS_OFFSET-IA64_SWITCH_STACK_SIZE)
+#define MCA_SOS_OFFSET			ALIGN16(MCA_SWITCH_STACK_OFFSET-IA64_SAL_OS_STATE_SIZE)
+#define MCA_SP_OFFSET			ALIGN16(MCA_SOS_OFFSET-16)
+
+#endif /* _ASM_IA64_MCA_ASM_H */
diff --git a/arch/ia64/include/asm/meminit.h b/arch/ia64/include/asm/meminit.h
new file mode 100644
index 00000000..61c7b175
--- /dev/null
+++ b/arch/ia64/include/asm/meminit.h
@@ -0,0 +1,75 @@
+#ifndef meminit_h
+#define meminit_h
+
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+
+/*
+ * Entries defined so far:
+ * 	- boot param structure itself
+ * 	- memory map
+ * 	- initrd (optional)
+ * 	- command line string
+ * 	- kernel code & data
+ * 	- crash dumping code reserved region
+ * 	- Kernel memory map built from EFI memory map
+ * 	- ELF core header
+ *	- xen start info if CONFIG_XEN
+ *
+ * More could be added if necessary
+ */
+#define IA64_MAX_RSVD_REGIONS 9
+
+struct rsvd_region {
+	u64 start;	/* virtual address of beginning of element */
+	u64 end;	/* virtual address of end of element + 1 */
+};
+
+extern struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1];
+extern int num_rsvd_regions;
+
+extern void find_memory (void);
+extern void reserve_memory (void);
+extern void find_initrd (void);
+extern int filter_rsvd_memory (u64 start, u64 end, void *arg);
+extern int filter_memory (u64 start, u64 end, void *arg);
+extern unsigned long efi_memmap_init(u64 *s, u64 *e);
+extern int find_max_min_low_pfn (u64, u64, void *);
+
+extern unsigned long vmcore_find_descriptor_size(unsigned long address);
+extern int reserve_elfcorehdr(u64 *start, u64 *end);
+
+/*
+ * For rounding an address to the next IA64_GRANULE_SIZE or order
+ */
+#define GRANULEROUNDDOWN(n)	((n) & ~(IA64_GRANULE_SIZE-1))
+#define GRANULEROUNDUP(n)	(((n)+IA64_GRANULE_SIZE-1) & ~(IA64_GRANULE_SIZE-1))
+
+#ifdef CONFIG_NUMA
+  extern void call_pernode_memory (unsigned long start, unsigned long len, void *func);
+#else
+# define call_pernode_memory(start, len, func)	(*func)(start, len, 0)
+#endif
+
+#define IGNORE_PFN0	1	/* XXX fix me: ignore pfn 0 until TLB miss handler is updated... */
+
+extern int register_active_ranges(u64 start, u64 len, int nid);
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+# define LARGE_GAP	0x40000000 /* Use virtual mem map if hole is > than this */
+  extern unsigned long VMALLOC_END;
+  extern struct page *vmem_map;
+  extern int find_largest_hole(u64 start, u64 end, void *arg);
+  extern int create_mem_map_page_table(u64 start, u64 end, void *arg);
+  extern int vmemmap_find_next_valid_pfn(int, int);
+#else
+static inline int vmemmap_find_next_valid_pfn(int node, int i)
+{
+	return i + 1;
+}
+#endif
+#endif /* meminit_h */
diff --git a/arch/ia64/include/asm/mman.h b/arch/ia64/include/asm/mman.h
new file mode 100644
index 00000000..4459028e
--- /dev/null
+++ b/arch/ia64/include/asm/mman.h
@@ -0,0 +1,23 @@
+#ifndef _ASM_IA64_MMAN_H
+#define _ASM_IA64_MMAN_H
+
+/*
+ * Based on <asm-i386/mman.h>.
+ *
+ * Modified 1998-2000, 2002
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+
+#include <asm-generic/mman.h>
+
+#define MAP_GROWSUP	0x0200		/* register stack-like segment */
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+#define arch_mmap_check	ia64_mmap_check
+int ia64_mmap_check(unsigned long addr, unsigned long len,
+		unsigned long flags);
+#endif
+#endif
+
+#endif /* _ASM_IA64_MMAN_H */
diff --git a/arch/ia64/include/asm/mmu.h b/arch/ia64/include/asm/mmu.h
new file mode 100644
index 00000000..611432ba
--- /dev/null
+++ b/arch/ia64/include/asm/mmu.h
@@ -0,0 +1,13 @@
+#ifndef __MMU_H
+#define __MMU_H
+
+/*
+ * Type for a context number.  We declare it volatile to ensure proper
+ * ordering when it's accessed outside of spinlock'd critical sections
+ * (e.g., as done in activate_mm() and init_new_context()).
+ */
+typedef volatile unsigned long mm_context_t;
+
+typedef unsigned long nv_mm_context_t;
+
+#endif
diff --git a/arch/ia64/include/asm/mmu_context.h b/arch/ia64/include/asm/mmu_context.h
new file mode 100644
index 00000000..7f2a4566
--- /dev/null
+++ b/arch/ia64/include/asm/mmu_context.h
@@ -0,0 +1,198 @@
+#ifndef _ASM_IA64_MMU_CONTEXT_H
+#define _ASM_IA64_MMU_CONTEXT_H
+
+/*
+ * Copyright (C) 1998-2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+/*
+ * Routines to manage the allocation of task context numbers.  Task context
+ * numbers are used to reduce or eliminate the need to perform TLB flushes
+ * due to context switches.  Context numbers are implemented using ia-64
+ * region ids.  Since the IA-64 TLB does not consider the region number when
+ * performing a TLB lookup, we need to assign a unique region id to each
+ * region in a process.  We use the least significant three bits in aregion
+ * id for this purpose.
+ */
+
+#define IA64_REGION_ID_KERNEL	0 /* the kernel's region id (tlb.c depends on this being 0) */
+
+#define ia64_rid(ctx,addr)	(((ctx) << 3) | (addr >> 61))
+
+# include <asm/page.h>
+# ifndef __ASSEMBLY__
+
+#include <linux/compiler.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+#include <asm/processor.h>
+#include <asm-generic/mm_hooks.h>
+
+struct ia64_ctx {
+	spinlock_t lock;
+	unsigned int next;	/* next context number to use */
+	unsigned int limit;     /* available free range */
+	unsigned int max_ctx;   /* max. context value supported by all CPUs */
+				/* call wrap_mmu_context when next >= max */
+	unsigned long *bitmap;  /* bitmap size is max_ctx+1 */
+	unsigned long *flushmap;/* pending rid to be flushed */
+};
+
+extern struct ia64_ctx ia64_ctx;
+DECLARE_PER_CPU(u8, ia64_need_tlb_flush);
+
+extern void mmu_context_init (void);
+extern void wrap_mmu_context (struct mm_struct *mm);
+
+static inline void
+enter_lazy_tlb (struct mm_struct *mm, struct task_struct *tsk)
+{
+}
+
+/*
+ * When the context counter wraps around all TLBs need to be flushed because
+ * an old context number might have been reused. This is signalled by the
+ * ia64_need_tlb_flush per-CPU variable, which is checked in the routine
+ * below. Called by activate_mm(). <efocht@ess.nec.de>
+ */
+static inline void
+delayed_tlb_flush (void)
+{
+	extern void local_flush_tlb_all (void);
+	unsigned long flags;
+
+	if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) {
+		spin_lock_irqsave(&ia64_ctx.lock, flags);
+		if (__ia64_per_cpu_var(ia64_need_tlb_flush)) {
+			local_flush_tlb_all();
+			__ia64_per_cpu_var(ia64_need_tlb_flush) = 0;
+		}
+		spin_unlock_irqrestore(&ia64_ctx.lock, flags);
+	}
+}
+
+static inline nv_mm_context_t
+get_mmu_context (struct mm_struct *mm)
+{
+	unsigned long flags;
+	nv_mm_context_t context = mm->context;
+
+	if (likely(context))
+		goto out;
+
+	spin_lock_irqsave(&ia64_ctx.lock, flags);
+	/* re-check, now that we've got the lock: */
+	context = mm->context;
+	if (context == 0) {
+		cpumask_clear(mm_cpumask(mm));
+		if (ia64_ctx.next >= ia64_ctx.limit) {
+			ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
+					ia64_ctx.max_ctx, ia64_ctx.next);
+			ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
+					ia64_ctx.max_ctx, ia64_ctx.next);
+			if (ia64_ctx.next >= ia64_ctx.max_ctx)
+				wrap_mmu_context(mm);
+		}
+		mm->context = context = ia64_ctx.next++;
+		__set_bit(context, ia64_ctx.bitmap);
+	}
+	spin_unlock_irqrestore(&ia64_ctx.lock, flags);
+out:
+	/*
+	 * Ensure we're not starting to use "context" before any old
+	 * uses of it are gone from our TLB.
+	 */
+	delayed_tlb_flush();
+
+	return context;
+}
+
+/*
+ * Initialize context number to some sane value.  MM is guaranteed to be a
+ * brand-new address-space, so no TLB flushing is needed, ever.
+ */
+static inline int
+init_new_context (struct task_struct *p, struct mm_struct *mm)
+{
+	mm->context = 0;
+	return 0;
+}
+
+static inline void
+destroy_context (struct mm_struct *mm)
+{
+	/* Nothing to do.  */
+}
+
+static inline void
+reload_context (nv_mm_context_t context)
+{
+	unsigned long rid;
+	unsigned long rid_incr = 0;
+	unsigned long rr0, rr1, rr2, rr3, rr4, old_rr4;
+
+	old_rr4 = ia64_get_rr(RGN_BASE(RGN_HPAGE));
+	rid = context << 3;	/* make space for encoding the region number */
+	rid_incr = 1 << 8;
+
+	/* encode the region id, preferred page size, and VHPT enable bit: */
+	rr0 = (rid << 8) | (PAGE_SHIFT << 2) | 1;
+	rr1 = rr0 + 1*rid_incr;
+	rr2 = rr0 + 2*rid_incr;
+	rr3 = rr0 + 3*rid_incr;
+	rr4 = rr0 + 4*rid_incr;
+#ifdef  CONFIG_HUGETLB_PAGE
+	rr4 = (rr4 & (~(0xfcUL))) | (old_rr4 & 0xfc);
+
+#  if RGN_HPAGE != 4
+#    error "reload_context assumes RGN_HPAGE is 4"
+#  endif
+#endif
+
+	ia64_set_rr0_to_rr4(rr0, rr1, rr2, rr3, rr4);
+	ia64_srlz_i();			/* srlz.i implies srlz.d */
+}
+
+/*
+ * Must be called with preemption off
+ */
+static inline void
+activate_context (struct mm_struct *mm)
+{
+	nv_mm_context_t context;
+
+	do {
+		context = get_mmu_context(mm);
+		if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)))
+			cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+		reload_context(context);
+		/*
+		 * in the unlikely event of a TLB-flush by another thread,
+		 * redo the load.
+		 */
+	} while (unlikely(context != mm->context));
+}
+
+#define deactivate_mm(tsk,mm)	do { } while (0)
+
+/*
+ * Switch from address space PREV to address space NEXT.
+ */
+static inline void
+activate_mm (struct mm_struct *prev, struct mm_struct *next)
+{
+	/*
+	 * We may get interrupts here, but that's OK because interrupt
+	 * handlers cannot touch user-space.
+	 */
+	ia64_set_kr(IA64_KR_PT_BASE, __pa(next->pgd));
+	activate_context(next);
+}
+
+#define switch_mm(prev_mm,next_mm,next_task)	activate_mm(prev_mm, next_mm)
+
+# endif /* ! __ASSEMBLY__ */
+#endif /* _ASM_IA64_MMU_CONTEXT_H */
diff --git a/arch/ia64/include/asm/mmzone.h b/arch/ia64/include/asm/mmzone.h
new file mode 100644
index 00000000..e0de6170
--- /dev/null
+++ b/arch/ia64/include/asm/mmzone.h
@@ -0,0 +1,42 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000,2003 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (c) 2002 NEC Corp.
+ * Copyright (c) 2002 Erich Focht <efocht@ess.nec.de>
+ * Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com>
+ */
+#ifndef _ASM_IA64_MMZONE_H
+#define _ASM_IA64_MMZONE_H
+
+#include <linux/numa.h>
+#include <asm/page.h>
+#include <asm/meminit.h>
+
+#ifdef CONFIG_NUMA
+
+static inline int pfn_to_nid(unsigned long pfn)
+{
+	extern int paddr_to_nid(unsigned long);
+	int nid = paddr_to_nid(pfn << PAGE_SHIFT);
+	if (nid < 0)
+		return 0;
+	else
+		return nid;
+}
+
+#ifdef CONFIG_IA64_DIG /* DIG systems are small */
+# define MAX_PHYSNODE_ID	8
+# define NR_NODE_MEMBLKS	(MAX_NUMNODES * 8)
+#else /* sn2 is the biggest case, so we use that if !DIG */
+# define MAX_PHYSNODE_ID	2048
+# define NR_NODE_MEMBLKS	(MAX_NUMNODES * 4)
+#endif
+
+#else /* CONFIG_NUMA */
+# define NR_NODE_MEMBLKS	(MAX_NUMNODES * 4)
+#endif /* CONFIG_NUMA */
+
+#endif /* _ASM_IA64_MMZONE_H */
diff --git a/arch/ia64/include/asm/module.h b/arch/ia64/include/asm/module.h
new file mode 100644
index 00000000..908eaef4
--- /dev/null
+++ b/arch/ia64/include/asm/module.h
@@ -0,0 +1,42 @@
+#ifndef _ASM_IA64_MODULE_H
+#define _ASM_IA64_MODULE_H
+
+/*
+ * IA-64-specific support for kernel module loader.
+ *
+ * Copyright (C) 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+struct elf64_shdr;			/* forward declration */
+
+struct mod_arch_specific {
+	struct elf64_shdr *core_plt;	/* core PLT section */
+	struct elf64_shdr *init_plt;	/* init PLT section */
+	struct elf64_shdr *got;		/* global offset table */
+	struct elf64_shdr *opd;		/* official procedure descriptors */
+	struct elf64_shdr *unwind;	/* unwind-table section */
+#ifdef CONFIG_PARAVIRT
+	struct elf64_shdr *paravirt_bundles;
+					/* paravirt_alt_bundle_patch table */
+	struct elf64_shdr *paravirt_insts;
+					/* paravirt_alt_inst_patch table */
+#endif
+	unsigned long gp;		/* global-pointer for module */
+
+	void *core_unw_table;		/* core unwind-table cookie returned by unwinder */
+	void *init_unw_table;		/* init unwind-table cookie returned by unwinder */
+	unsigned int next_got_entry;	/* index of next available got entry */
+};
+
+#define Elf_Shdr	Elf64_Shdr
+#define Elf_Sym		Elf64_Sym
+#define Elf_Ehdr	Elf64_Ehdr
+
+#define MODULE_PROC_FAMILY	"ia64"
+#define MODULE_ARCH_VERMAGIC	MODULE_PROC_FAMILY \
+	"gcc-" __stringify(__GNUC__) "." __stringify(__GNUC_MINOR__)
+
+#define ARCH_SHF_SMALL	SHF_IA_64_SHORT
+
+#endif /* _ASM_IA64_MODULE_H */
diff --git a/arch/ia64/include/asm/msgbuf.h b/arch/ia64/include/asm/msgbuf.h
new file mode 100644
index 00000000..6c64c0d2
--- /dev/null
+++ b/arch/ia64/include/asm/msgbuf.h
@@ -0,0 +1,27 @@
+#ifndef _ASM_IA64_MSGBUF_H
+#define _ASM_IA64_MSGBUF_H
+
+/*
+ * The msqid64_ds structure for IA-64 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 2 miscellaneous 64-bit values
+ */
+
+struct msqid64_ds {
+	struct ipc64_perm msg_perm;
+	__kernel_time_t msg_stime;	/* last msgsnd time */
+	__kernel_time_t msg_rtime;	/* last msgrcv time */
+	__kernel_time_t msg_ctime;	/* last change time */
+	unsigned long  msg_cbytes;	/* current number of bytes on queue */
+	unsigned long  msg_qnum;	/* number of messages in queue */
+	unsigned long  msg_qbytes;	/* max number of bytes on queue */
+	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
+	__kernel_pid_t msg_lrpid;	/* last receive pid */
+	unsigned long  __unused1;
+	unsigned long  __unused2;
+};
+
+#endif /* _ASM_IA64_MSGBUF_H */
diff --git a/arch/ia64/include/asm/msidef.h b/arch/ia64/include/asm/msidef.h
new file mode 100644
index 00000000..592c1047
--- /dev/null
+++ b/arch/ia64/include/asm/msidef.h
@@ -0,0 +1,42 @@
+#ifndef _IA64_MSI_DEF_H
+#define _IA64_MSI_DEF_H
+
+/*
+ * Shifts for APIC-based data
+ */
+
+#define     MSI_DATA_VECTOR_SHIFT	0
+#define	    MSI_DATA_VECTOR(v)		(((u8)v) << MSI_DATA_VECTOR_SHIFT)
+#define     MSI_DATA_VECTOR_MASK	0xffffff00
+
+#define     MSI_DATA_DELIVERY_MODE_SHIFT	8
+#define     MSI_DATA_DELIVERY_FIXED	(0 << MSI_DATA_DELIVERY_MODE_SHIFT)
+#define     MSI_DATA_DELIVERY_LOWPRI	(1 << MSI_DATA_DELIVERY_MODE_SHIFT)
+
+#define     MSI_DATA_LEVEL_SHIFT	14
+#define     MSI_DATA_LEVEL_DEASSERT	(0 << MSI_DATA_LEVEL_SHIFT)
+#define     MSI_DATA_LEVEL_ASSERT	(1 << MSI_DATA_LEVEL_SHIFT)
+
+#define     MSI_DATA_TRIGGER_SHIFT	15
+#define     MSI_DATA_TRIGGER_EDGE	(0 << MSI_DATA_TRIGGER_SHIFT)
+#define     MSI_DATA_TRIGGER_LEVEL	(1 << MSI_DATA_TRIGGER_SHIFT)
+
+/*
+ * Shift/mask fields for APIC-based bus address
+ */
+
+#define     MSI_ADDR_DEST_ID_SHIFT	4
+#define     MSI_ADDR_HEADER		0xfee00000
+
+#define     MSI_ADDR_DEST_ID_MASK	0xfff0000f
+#define     MSI_ADDR_DEST_ID_CPU(cpu)	((cpu) << MSI_ADDR_DEST_ID_SHIFT)
+
+#define     MSI_ADDR_DEST_MODE_SHIFT	2
+#define     MSI_ADDR_DEST_MODE_PHYS	(0 << MSI_ADDR_DEST_MODE_SHIFT)
+#define	    MSI_ADDR_DEST_MODE_LOGIC	(1 << MSI_ADDR_DEST_MODE_SHIFT)
+
+#define     MSI_ADDR_REDIRECTION_SHIFT	3
+#define     MSI_ADDR_REDIRECTION_CPU	(0 << MSI_ADDR_REDIRECTION_SHIFT)
+#define     MSI_ADDR_REDIRECTION_LOWPRI	(1 << MSI_ADDR_REDIRECTION_SHIFT)
+
+#endif/* _IA64_MSI_DEF_H */
diff --git a/arch/ia64/include/asm/mutex.h b/arch/ia64/include/asm/mutex.h
new file mode 100644
index 00000000..bed73a64
--- /dev/null
+++ b/arch/ia64/include/asm/mutex.h
@@ -0,0 +1,92 @@
+/*
+ * ia64 implementation of the mutex fastpath.
+ *
+ * Copyright (C) 2006 Ken Chen <kenneth.w.chen@intel.com>
+ *
+ */
+
+#ifndef _ASM_MUTEX_H
+#define _ASM_MUTEX_H
+
+/**
+ *  __mutex_fastpath_lock - try to take the lock by moving the count
+ *                          from 1 to a 0 value
+ *  @count: pointer of type atomic_t
+ *  @fail_fn: function to call if the original value was not 1
+ *
+ * Change the count from 1 to a value lower than 1, and call <fail_fn> if
+ * it wasn't 1 originally. This function MUST leave the value lower than
+ * 1 even when the "1" assertion wasn't true.
+ */
+static inline void
+__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
+{
+	if (unlikely(ia64_fetchadd4_acq(count, -1) != 1))
+		fail_fn(count);
+}
+
+/**
+ *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
+ *                                 from 1 to a 0 value
+ *  @count: pointer of type atomic_t
+ *  @fail_fn: function to call if the original value was not 1
+ *
+ * Change the count from 1 to a value lower than 1, and call <fail_fn> if
+ * it wasn't 1 originally. This function returns 0 if the fastpath succeeds,
+ * or anything the slow path function returns.
+ */
+static inline int
+__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
+{
+	if (unlikely(ia64_fetchadd4_acq(count, -1) != 1))
+		return fail_fn(count);
+	return 0;
+}
+
+/**
+ *  __mutex_fastpath_unlock - try to promote the count from 0 to 1
+ *  @count: pointer of type atomic_t
+ *  @fail_fn: function to call if the original value was not 0
+ *
+ * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>.
+ * In the failure case, this function is allowed to either set the value to
+ * 1, or to set it to a value lower than 1.
+ *
+ * If the implementation sets it to a value of lower than 1, then the
+ * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
+ * to return 0 otherwise.
+ */
+static inline void
+__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
+{
+	int ret = ia64_fetchadd4_rel(count, 1);
+	if (unlikely(ret < 0))
+		fail_fn(count);
+}
+
+#define __mutex_slowpath_needs_to_unlock()		1
+
+/**
+ * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
+ *
+ *  @count: pointer of type atomic_t
+ *  @fail_fn: fallback function
+ *
+ * Change the count from 1 to a value lower than 1, and return 0 (failure)
+ * if it wasn't 1 originally, or return 1 (success) otherwise. This function
+ * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
+ * Additionally, if the value was < 0 originally, this function must not leave
+ * it to 0 on failure.
+ *
+ * If the architecture has no effective trylock variant, it should call the
+ * <fail_fn> spinlock-based trylock variant unconditionally.
+ */
+static inline int
+__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
+{
+	if (cmpxchg_acq(count, 1, 0) == 1)
+		return 1;
+	return 0;
+}
+
+#endif
diff --git a/arch/ia64/include/asm/native/inst.h b/arch/ia64/include/asm/native/inst.h
new file mode 100644
index 00000000..d2d46efb
--- /dev/null
+++ b/arch/ia64/include/asm/native/inst.h
@@ -0,0 +1,194 @@
+/******************************************************************************
+ * arch/ia64/include/asm/native/inst.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#define DO_SAVE_MIN		IA64_NATIVE_DO_SAVE_MIN
+
+#define __paravirt_switch_to			ia64_native_switch_to
+#define __paravirt_leave_syscall		ia64_native_leave_syscall
+#define __paravirt_work_processed_syscall	ia64_native_work_processed_syscall
+#define __paravirt_leave_kernel			ia64_native_leave_kernel
+#define __paravirt_pending_syscall_end		ia64_work_pending_syscall_end
+#define __paravirt_work_processed_syscall_target \
+						ia64_work_processed_syscall
+
+#define paravirt_fsyscall_table			ia64_native_fsyscall_table
+#define paravirt_fsys_bubble_down		ia64_native_fsys_bubble_down
+
+#ifdef CONFIG_PARAVIRT_GUEST_ASM_CLOBBER_CHECK
+# define PARAVIRT_POISON	0xdeadbeefbaadf00d
+# define CLOBBER(clob)				\
+	;;					\
+	movl clob = PARAVIRT_POISON;		\
+	;;
+# define CLOBBER_PRED(pred_clob)		\
+	;;					\
+	cmp.eq pred_clob, p0 = r0, r0		\
+	;;
+#else
+# define CLOBBER(clob)			/* nothing */
+# define CLOBBER_PRED(pred_clob)	/* nothing */
+#endif
+
+#define MOV_FROM_IFA(reg)	\
+	mov reg = cr.ifa
+
+#define MOV_FROM_ITIR(reg)	\
+	mov reg = cr.itir
+
+#define MOV_FROM_ISR(reg)	\
+	mov reg = cr.isr
+
+#define MOV_FROM_IHA(reg)	\
+	mov reg = cr.iha
+
+#define MOV_FROM_IPSR(pred, reg)	\
+(pred)	mov reg = cr.ipsr
+
+#define MOV_FROM_IIM(reg)	\
+	mov reg = cr.iim
+
+#define MOV_FROM_IIP(reg)	\
+	mov reg = cr.iip
+
+#define MOV_FROM_IVR(reg, clob)	\
+	mov reg = cr.ivr	\
+	CLOBBER(clob)
+
+#define MOV_FROM_PSR(pred, reg, clob)	\
+(pred)	mov reg = psr			\
+	CLOBBER(clob)
+
+#define MOV_FROM_ITC(pred, pred_clob, reg, clob)	\
+(pred)	mov reg = ar.itc				\
+	CLOBBER(clob)					\
+	CLOBBER_PRED(pred_clob)
+
+#define MOV_TO_IFA(reg, clob)	\
+	mov cr.ifa = reg	\
+	CLOBBER(clob)
+
+#define MOV_TO_ITIR(pred, reg, clob)	\
+(pred)	mov cr.itir = reg		\
+	CLOBBER(clob)
+
+#define MOV_TO_IHA(pred, reg, clob)	\
+(pred)	mov cr.iha = reg		\
+	CLOBBER(clob)
+
+#define MOV_TO_IPSR(pred, reg, clob)		\
+(pred)	mov cr.ipsr = reg			\
+	CLOBBER(clob)
+
+#define MOV_TO_IFS(pred, reg, clob)	\
+(pred)	mov cr.ifs = reg		\
+	CLOBBER(clob)
+
+#define MOV_TO_IIP(reg, clob)	\
+	mov cr.iip = reg	\
+	CLOBBER(clob)
+
+#define MOV_TO_KR(kr, reg, clob0, clob1)	\
+	mov IA64_KR(kr) = reg			\
+	CLOBBER(clob0)				\
+	CLOBBER(clob1)
+
+#define ITC_I(pred, reg, clob)	\
+(pred)	itc.i reg		\
+	CLOBBER(clob)
+
+#define ITC_D(pred, reg, clob)	\
+(pred)	itc.d reg		\
+	CLOBBER(clob)
+
+#define ITC_I_AND_D(pred_i, pred_d, reg, clob)	\
+(pred_i) itc.i reg;				\
+(pred_d) itc.d reg				\
+	CLOBBER(clob)
+
+#define THASH(pred, reg0, reg1, clob)		\
+(pred)	thash reg0 = reg1			\
+	CLOBBER(clob)
+
+#define SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(clob0, clob1)		\
+	ssm psr.ic | PSR_DEFAULT_BITS					\
+	CLOBBER(clob0)							\
+	CLOBBER(clob1)							\
+	;;								\
+	srlz.i /* guarantee that interruption collectin is on */	\
+	;;
+
+#define SSM_PSR_IC_AND_SRLZ_D(clob0, clob1)	\
+	ssm psr.ic				\
+	CLOBBER(clob0)				\
+	CLOBBER(clob1)				\
+	;;					\
+	srlz.d
+
+#define RSM_PSR_IC(clob)	\
+	rsm psr.ic		\
+	CLOBBER(clob)
+
+#define SSM_PSR_I(pred, pred_clob, clob)	\
+(pred)	ssm psr.i				\
+	CLOBBER(clob)				\
+	CLOBBER_PRED(pred_clob)
+
+#define RSM_PSR_I(pred, clob0, clob1)	\
+(pred)	rsm psr.i			\
+	CLOBBER(clob0)			\
+	CLOBBER(clob1)
+
+#define RSM_PSR_I_IC(clob0, clob1, clob2)	\
+	rsm psr.i | psr.ic			\
+	CLOBBER(clob0)				\
+	CLOBBER(clob1)				\
+	CLOBBER(clob2)
+
+#define RSM_PSR_DT		\
+	rsm psr.dt
+
+#define RSM_PSR_BE_I(clob0, clob1)	\
+	rsm psr.be | psr.i		\
+	CLOBBER(clob0)			\
+	CLOBBER(clob1)
+
+#define SSM_PSR_DT_AND_SRLZ_I	\
+	ssm psr.dt		\
+	;;			\
+	srlz.i
+
+#define BSW_0(clob0, clob1, clob2)	\
+	bsw.0				\
+	CLOBBER(clob0)			\
+	CLOBBER(clob1)			\
+	CLOBBER(clob2)
+
+#define BSW_1(clob0, clob1)	\
+	bsw.1			\
+	CLOBBER(clob0)		\
+	CLOBBER(clob1)
+
+#define COVER	\
+	cover
+
+#define RFI	\
+	rfi
diff --git a/arch/ia64/include/asm/native/irq.h b/arch/ia64/include/asm/native/irq.h
new file mode 100644
index 00000000..887a228e
--- /dev/null
+++ b/arch/ia64/include/asm/native/irq.h
@@ -0,0 +1,33 @@
+/******************************************************************************
+ * arch/ia64/include/asm/native/irq.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _ASM_IA64_NATIVE_IRQ_H
+#define _ASM_IA64_NATIVE_IRQ_H
+
+#define NR_VECTORS	256
+
+#if (NR_VECTORS + 32 * NR_CPUS) < 1024
+#define IA64_NATIVE_NR_IRQS (NR_VECTORS + 32 * NR_CPUS)
+#else
+#define IA64_NATIVE_NR_IRQS 1024
+#endif
+
+#endif /* _ASM_IA64_NATIVE_IRQ_H */
diff --git a/arch/ia64/include/asm/native/patchlist.h b/arch/ia64/include/asm/native/patchlist.h
new file mode 100644
index 00000000..be16ca93
--- /dev/null
+++ b/arch/ia64/include/asm/native/patchlist.h
@@ -0,0 +1,38 @@
+/******************************************************************************
+ * arch/ia64/include/asm/native/inst.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#define __paravirt_start_gate_fsyscall_patchlist		\
+	__ia64_native_start_gate_fsyscall_patchlist
+#define __paravirt_end_gate_fsyscall_patchlist			\
+	__ia64_native_end_gate_fsyscall_patchlist
+#define __paravirt_start_gate_brl_fsys_bubble_down_patchlist	\
+	__ia64_native_start_gate_brl_fsys_bubble_down_patchlist
+#define __paravirt_end_gate_brl_fsys_bubble_down_patchlist	\
+	__ia64_native_end_gate_brl_fsys_bubble_down_patchlist
+#define __paravirt_start_gate_vtop_patchlist			\
+	__ia64_native_start_gate_vtop_patchlist
+#define __paravirt_end_gate_vtop_patchlist			\
+	__ia64_native_end_gate_vtop_patchlist
+#define __paravirt_start_gate_mckinley_e9_patchlist		\
+	__ia64_native_start_gate_mckinley_e9_patchlist
+#define __paravirt_end_gate_mckinley_e9_patchlist		\
+	__ia64_native_end_gate_mckinley_e9_patchlist
diff --git a/arch/ia64/include/asm/native/pvchk_inst.h b/arch/ia64/include/asm/native/pvchk_inst.h
new file mode 100644
index 00000000..8d72962e
--- /dev/null
+++ b/arch/ia64/include/asm/native/pvchk_inst.h
@@ -0,0 +1,271 @@
+#ifndef _ASM_NATIVE_PVCHK_INST_H
+#define _ASM_NATIVE_PVCHK_INST_H
+
+/******************************************************************************
+ * arch/ia64/include/asm/native/pvchk_inst.h
+ * Checker for paravirtualizations of privileged operations.
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *      Dan Magenheimer <dan.magenheimer@hp.com>
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/**********************************************
+ * Instructions paravirtualized for correctness
+ **********************************************/
+
+/* "fc" and "thash" are privilege-sensitive instructions, meaning they
+ *  may have different semantics depending on whether they are executed
+ *  at PL0 vs PL!=0.  When paravirtualized, these instructions mustn't
+ *  be allowed to execute directly, lest incorrect semantics result.
+ */
+
+#define fc	.error "fc should not be used directly."
+#define thash	.error "thash should not be used directly."
+
+/* Note that "ttag" and "cover" are also privilege-sensitive; "ttag"
+ * is not currently used (though it may be in a long-format VHPT system!)
+ * and the semantics of cover only change if psr.ic is off which is very
+ * rare (and currently non-existent outside of assembly code
+ */
+#define ttag	.error "ttag should not be used directly."
+#define cover	.error "cover should not be used directly."
+
+/* There are also privilege-sensitive registers.  These registers are
+ * readable at any privilege level but only writable at PL0.
+ */
+#define cpuid	.error "cpuid should not be used directly."
+#define pmd	.error "pmd should not be used directly."
+
+/*
+ * mov ar.eflag =
+ * mov = ar.eflag
+ */
+
+/**********************************************
+ * Instructions paravirtualized for performance
+ **********************************************/
+/*
+ * Those instructions include '.' which can't be handled by cpp.
+ * or can't be handled by cpp easily.
+ * They are handled by sed instead of cpp.
+ */
+
+/* for .S
+ * itc.i
+ * itc.d
+ *
+ * bsw.0
+ * bsw.1
+ *
+ * ssm psr.ic | PSR_DEFAULT_BITS
+ * ssm psr.ic
+ * rsm psr.ic
+ * ssm psr.i
+ * rsm psr.i
+ * rsm psr.i | psr.ic
+ * rsm psr.dt
+ * ssm psr.dt
+ *
+ * mov = cr.ifa
+ * mov = cr.itir
+ * mov = cr.isr
+ * mov = cr.iha
+ * mov = cr.ipsr
+ * mov = cr.iim
+ * mov = cr.iip
+ * mov = cr.ivr
+ * mov = psr
+ *
+ * mov cr.ifa =
+ * mov cr.itir =
+ * mov cr.iha =
+ * mov cr.ipsr =
+ * mov cr.ifs =
+ * mov cr.iip =
+ * mov cr.kr =
+ */
+
+/* for intrinsics
+ * ssm psr.i
+ * rsm psr.i
+ * mov = psr
+ * mov = ivr
+ * mov = tpr
+ * mov cr.itm =
+ * mov eoi =
+ * mov rr[] =
+ * mov = rr[]
+ * mov = kr
+ * mov kr =
+ * ptc.ga
+ */
+
+/*************************************************************
+ * define paravirtualized instrcution macros as nop to ingore.
+ * and check whether arguments are appropriate.
+ *************************************************************/
+
+/* check whether reg is a regular register */
+.macro is_rreg_in reg
+	.ifc "\reg", "r0"
+		nop 0
+		.exitm
+	.endif
+	;;
+	mov \reg = r0
+	;;
+.endm
+#define IS_RREG_IN(reg)	is_rreg_in reg ;
+
+#define IS_RREG_OUT(reg)			\
+	;;					\
+	mov reg = r0				\
+	;;
+
+#define IS_RREG_CLOB(reg)	IS_RREG_OUT(reg)
+
+/* check whether pred is a predicate register */
+#define IS_PRED_IN(pred)			\
+	;;					\
+	(pred)	nop 0				\
+	;;
+
+#define IS_PRED_OUT(pred)			\
+	;;					\
+	cmp.eq pred, p0 = r0, r0		\
+	;;
+
+#define IS_PRED_CLOB(pred)	IS_PRED_OUT(pred)
+
+
+#define DO_SAVE_MIN(__COVER, SAVE_IFS, EXTRA, WORKAROUND)	\
+	nop 0
+#define MOV_FROM_IFA(reg)			\
+	IS_RREG_OUT(reg)
+#define MOV_FROM_ITIR(reg)			\
+	IS_RREG_OUT(reg)
+#define MOV_FROM_ISR(reg)			\
+	IS_RREG_OUT(reg)
+#define MOV_FROM_IHA(reg)			\
+	IS_RREG_OUT(reg)
+#define MOV_FROM_IPSR(pred, reg)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_OUT(reg)
+#define MOV_FROM_IIM(reg)			\
+	IS_RREG_OUT(reg)
+#define MOV_FROM_IIP(reg)			\
+	IS_RREG_OUT(reg)
+#define MOV_FROM_IVR(reg, clob)			\
+	IS_RREG_OUT(reg)			\
+	IS_RREG_CLOB(clob)
+#define MOV_FROM_PSR(pred, reg, clob)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_OUT(reg)			\
+	IS_RREG_CLOB(clob)
+#define MOV_FROM_ITC(pred, pred_clob, reg, clob)	\
+	IS_PRED_IN(pred)				\
+	IS_PRED_CLOB(pred_clob)				\
+	IS_RREG_OUT(reg)				\
+	IS_RREG_CLOB(clob)
+#define MOV_TO_IFA(reg, clob)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define MOV_TO_ITIR(pred, reg, clob)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define MOV_TO_IHA(pred, reg, clob)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define MOV_TO_IPSR(pred, reg, clob)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define MOV_TO_IFS(pred, reg, clob)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define MOV_TO_IIP(reg, clob)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define MOV_TO_KR(kr, reg, clob0, clob1)	\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob0)			\
+	IS_RREG_CLOB(clob1)
+#define ITC_I(pred, reg, clob)			\
+	IS_PRED_IN(pred)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define ITC_D(pred, reg, clob)			\
+	IS_PRED_IN(pred)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define ITC_I_AND_D(pred_i, pred_d, reg, clob)	\
+	IS_PRED_IN(pred_i)			\
+	IS_PRED_IN(pred_d)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define THASH(pred, reg0, reg1, clob)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_OUT(reg0)			\
+	IS_RREG_IN(reg1)			\
+	IS_RREG_CLOB(clob)
+#define SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(clob0, clob1)	\
+	IS_RREG_CLOB(clob0)					\
+	IS_RREG_CLOB(clob1)
+#define SSM_PSR_IC_AND_SRLZ_D(clob0, clob1)	\
+	IS_RREG_CLOB(clob0)			\
+	IS_RREG_CLOB(clob1)
+#define RSM_PSR_IC(clob)			\
+	IS_RREG_CLOB(clob)
+#define SSM_PSR_I(pred, pred_clob, clob)	\
+	IS_PRED_IN(pred)			\
+	IS_PRED_CLOB(pred_clob)			\
+	IS_RREG_CLOB(clob)
+#define RSM_PSR_I(pred, clob0, clob1)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_CLOB(clob0)			\
+	IS_RREG_CLOB(clob1)
+#define RSM_PSR_I_IC(clob0, clob1, clob2)	\
+	IS_RREG_CLOB(clob0)			\
+	IS_RREG_CLOB(clob1)			\
+	IS_RREG_CLOB(clob2)
+#define RSM_PSR_DT				\
+	nop 0
+#define RSM_PSR_BE_I(clob0, clob1)		\
+	IS_RREG_CLOB(clob0)			\
+	IS_RREG_CLOB(clob1)
+#define SSM_PSR_DT_AND_SRLZ_I			\
+	nop 0
+#define BSW_0(clob0, clob1, clob2)		\
+	IS_RREG_CLOB(clob0)			\
+	IS_RREG_CLOB(clob1)			\
+	IS_RREG_CLOB(clob2)
+#define BSW_1(clob0, clob1)			\
+	IS_RREG_CLOB(clob0)			\
+	IS_RREG_CLOB(clob1)
+#define COVER					\
+	nop 0
+#define RFI					\
+	br.ret.sptk.many rp /* defining nop causes dependency error */
+
+#endif /* _ASM_NATIVE_PVCHK_INST_H */
diff --git a/arch/ia64/include/asm/nodedata.h b/arch/ia64/include/asm/nodedata.h
new file mode 100644
index 00000000..2fb337b0
--- /dev/null
+++ b/arch/ia64/include/asm/nodedata.h
@@ -0,0 +1,63 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (c) 2002 NEC Corp.
+ * Copyright (c) 2002 Erich Focht <efocht@ess.nec.de>
+ * Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com>
+ */
+#ifndef _ASM_IA64_NODEDATA_H
+#define _ASM_IA64_NODEDATA_H
+
+#include <linux/numa.h>
+
+#include <asm/percpu.h>
+#include <asm/mmzone.h>
+
+#ifdef CONFIG_NUMA
+
+/*
+ * Node Data. One of these structures is located on each node of a NUMA system.
+ */
+
+struct pglist_data;
+struct ia64_node_data {
+	short			active_cpu_count;
+	short			node;
+	struct pglist_data	*pg_data_ptrs[MAX_NUMNODES];
+};
+
+
+/*
+ * Return a pointer to the node_data structure for the executing cpu.
+ */
+#define local_node_data		(local_cpu_data->node_data)
+
+/*
+ * Given a node id, return a pointer to the pg_data_t for the node.
+ *
+ * NODE_DATA 	- should be used in all code not related to system
+ *		  initialization. It uses pernode data structures to minimize
+ *		  offnode memory references. However, these structure are not 
+ *		  present during boot. This macro can be used once cpu_init
+ *		  completes.
+ */
+#define NODE_DATA(nid)		(local_node_data->pg_data_ptrs[nid])
+
+/*
+ * LOCAL_DATA_ADDR - This is to calculate the address of other node's
+ *		     "local_node_data" at hot-plug phase. The local_node_data
+ *		     is pointed by per_cpu_page. Kernel usually use it for
+ *		     just executing cpu. However, when new node is hot-added,
+ *		     the addresses of local data for other nodes are necessary
+ *		     to update all of them.
+ */
+#define LOCAL_DATA_ADDR(pgdat)  			\
+	((struct ia64_node_data *)((u64)(pgdat) + 	\
+				   L1_CACHE_ALIGN(sizeof(struct pglist_data))))
+
+#endif /* CONFIG_NUMA */
+
+#endif /* _ASM_IA64_NODEDATA_H */
diff --git a/arch/ia64/include/asm/numa.h b/arch/ia64/include/asm/numa.h
new file mode 100644
index 00000000..6a8a27cf
--- /dev/null
+++ b/arch/ia64/include/asm/numa.h
@@ -0,0 +1,80 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * This file contains NUMA specific prototypes and definitions.
+ *
+ * 2002/08/05 Erich Focht <efocht@ess.nec.de>
+ *
+ */
+#ifndef _ASM_IA64_NUMA_H
+#define _ASM_IA64_NUMA_H
+
+
+#ifdef CONFIG_NUMA
+
+#include <linux/cache.h>
+#include <linux/cpumask.h>
+#include <linux/numa.h>
+#include <linux/smp.h>
+#include <linux/threads.h>
+
+#include <asm/mmzone.h>
+
+extern u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
+extern cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
+extern pg_data_t *pgdat_list[MAX_NUMNODES];
+
+/* Stuff below this line could be architecture independent */
+
+extern int num_node_memblks;		/* total number of memory chunks */
+
+/*
+ * List of node memory chunks. Filled when parsing SRAT table to
+ * obtain information about memory nodes.
+*/
+
+struct node_memblk_s {
+	unsigned long start_paddr;
+	unsigned long size;
+	int nid;		/* which logical node contains this chunk? */
+	int bank;		/* which mem bank on this node */
+};
+
+struct node_cpuid_s {
+	u16	phys_id;	/* id << 8 | eid */
+	int	nid;		/* logical node containing this CPU */
+};
+
+extern struct node_memblk_s node_memblk[NR_NODE_MEMBLKS];
+extern struct node_cpuid_s node_cpuid[NR_CPUS];
+
+/*
+ * ACPI 2.0 SLIT (System Locality Information Table)
+ * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf
+ *
+ * This is a matrix with "distances" between nodes, they should be
+ * proportional to the memory access latency ratios.
+ */
+
+extern u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES];
+#define node_distance(from,to) (numa_slit[(from) * num_online_nodes() + (to)])
+
+extern int paddr_to_nid(unsigned long paddr);
+
+#define local_nodeid (cpu_to_node_map[smp_processor_id()])
+
+extern void map_cpu_to_node(int cpu, int nid);
+extern void unmap_cpu_from_node(int cpu, int nid);
+
+
+#else /* !CONFIG_NUMA */
+#define map_cpu_to_node(cpu, nid)	do{}while(0)
+#define unmap_cpu_from_node(cpu, nid)	do{}while(0)
+
+#define paddr_to_nid(addr)	0
+
+#endif /* CONFIG_NUMA */
+
+#endif /* _ASM_IA64_NUMA_H */
diff --git a/arch/ia64/include/asm/page.h b/arch/ia64/include/asm/page.h
new file mode 100644
index 00000000..961a16f4
--- /dev/null
+++ b/arch/ia64/include/asm/page.h
@@ -0,0 +1,224 @@
+#ifndef _ASM_IA64_PAGE_H
+#define _ASM_IA64_PAGE_H
+/*
+ * Pagetable related stuff.
+ *
+ * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <asm/intrinsics.h>
+#include <asm/types.h>
+
+/*
+ * The top three bits of an IA64 address are its Region Number.
+ * Different regions are assigned to different purposes.
+ */
+#define RGN_SHIFT	(61)
+#define RGN_BASE(r)	(__IA64_UL_CONST(r)<<RGN_SHIFT)
+#define RGN_BITS	(RGN_BASE(-1))
+
+#define RGN_KERNEL	7	/* Identity mapped region */
+#define RGN_UNCACHED    6	/* Identity mapped I/O region */
+#define RGN_GATE	5	/* Gate page, Kernel text, etc */
+#define RGN_HPAGE	4	/* For Huge TLB pages */
+
+/*
+ * PAGE_SHIFT determines the actual kernel page size.
+ */
+#if defined(CONFIG_IA64_PAGE_SIZE_4KB)
+# define PAGE_SHIFT	12
+#elif defined(CONFIG_IA64_PAGE_SIZE_8KB)
+# define PAGE_SHIFT	13
+#elif defined(CONFIG_IA64_PAGE_SIZE_16KB)
+# define PAGE_SHIFT	14
+#elif defined(CONFIG_IA64_PAGE_SIZE_64KB)
+# define PAGE_SHIFT	16
+#else
+# error Unsupported page size!
+#endif
+
+#define PAGE_SIZE		(__IA64_UL_CONST(1) << PAGE_SHIFT)
+#define PAGE_MASK		(~(PAGE_SIZE - 1))
+
+#define PERCPU_PAGE_SHIFT	18	/* log2() of max. size of per-CPU area */
+#define PERCPU_PAGE_SIZE	(__IA64_UL_CONST(1) << PERCPU_PAGE_SHIFT)
+
+
+#ifdef CONFIG_HUGETLB_PAGE
+# define HPAGE_REGION_BASE	RGN_BASE(RGN_HPAGE)
+# define HPAGE_SHIFT		hpage_shift
+# define HPAGE_SHIFT_DEFAULT	28	/* check ia64 SDM for architecture supported size */
+# define HPAGE_SIZE		(__IA64_UL_CONST(1) << HPAGE_SHIFT)
+# define HPAGE_MASK		(~(HPAGE_SIZE - 1))
+
+# define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif /* CONFIG_HUGETLB_PAGE */
+
+#ifdef __ASSEMBLY__
+# define __pa(x)		((x) - PAGE_OFFSET)
+# define __va(x)		((x) + PAGE_OFFSET)
+#else /* !__ASSEMBLY */
+#  define STRICT_MM_TYPECHECKS
+
+extern void clear_page (void *page);
+extern void copy_page (void *to, void *from);
+
+/*
+ * clear_user_page() and copy_user_page() can't be inline functions because
+ * flush_dcache_page() can't be defined until later...
+ */
+#define clear_user_page(addr, vaddr, page)	\
+do {						\
+	clear_page(addr);			\
+	flush_dcache_page(page);		\
+} while (0)
+
+#define copy_user_page(to, from, vaddr, page)	\
+do {						\
+	copy_page((to), (from));		\
+	flush_dcache_page(page);		\
+} while (0)
+
+
+#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr)		\
+({									\
+	struct page *page = alloc_page_vma(				\
+		GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr);	\
+	if (page)							\
+ 		flush_dcache_page(page);				\
+	page;								\
+})
+
+#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+
+#define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+extern int ia64_pfn_valid (unsigned long pfn);
+#else
+# define ia64_pfn_valid(pfn) 1
+#endif
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+extern struct page *vmem_map;
+#ifdef CONFIG_DISCONTIGMEM
+# define page_to_pfn(page)	((unsigned long) (page - vmem_map))
+# define pfn_to_page(pfn)	(vmem_map + (pfn))
+#else
+# include <asm-generic/memory_model.h>
+#endif
+#else
+# include <asm-generic/memory_model.h>
+#endif
+
+#ifdef CONFIG_FLATMEM
+# define pfn_valid(pfn)		(((pfn) < max_mapnr) && ia64_pfn_valid(pfn))
+#elif defined(CONFIG_DISCONTIGMEM)
+extern unsigned long min_low_pfn;
+extern unsigned long max_low_pfn;
+# define pfn_valid(pfn)		(((pfn) >= min_low_pfn) && ((pfn) < max_low_pfn) && ia64_pfn_valid(pfn))
+#endif
+
+#define page_to_phys(page)	(page_to_pfn(page) << PAGE_SHIFT)
+#define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
+
+typedef union ia64_va {
+	struct {
+		unsigned long off : 61;		/* intra-region offset */
+		unsigned long reg :  3;		/* region number */
+	} f;
+	unsigned long l;
+	void *p;
+} ia64_va;
+
+/*
+ * Note: These macros depend on the fact that PAGE_OFFSET has all
+ * region bits set to 1 and all other bits set to zero.  They are
+ * expressed in this way to ensure they result in a single "dep"
+ * instruction.
+ */
+#define __pa(x)		({ia64_va _v; _v.l = (long) (x); _v.f.reg = 0; _v.l;})
+#define __va(x)		({ia64_va _v; _v.l = (long) (x); _v.f.reg = -1; _v.p;})
+
+#define REGION_NUMBER(x)	({ia64_va _v; _v.l = (long) (x); _v.f.reg;})
+#define REGION_OFFSET(x)	({ia64_va _v; _v.l = (long) (x); _v.f.off;})
+
+#ifdef CONFIG_HUGETLB_PAGE
+# define htlbpage_to_page(x)	(((unsigned long) REGION_NUMBER(x) << 61)			\
+				 | (REGION_OFFSET(x) >> (HPAGE_SHIFT-PAGE_SHIFT)))
+# define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+extern unsigned int hpage_shift;
+#endif
+
+static __inline__ int
+get_order (unsigned long size)
+{
+	long double d = size - 1;
+	long order;
+
+	order = ia64_getf_exp(d);
+	order = order - PAGE_SHIFT - 0xffff + 1;
+	if (order < 0)
+		order = 0;
+	return order;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#ifdef STRICT_MM_TYPECHECKS
+  /*
+   * These are used to make use of C type-checking..
+   */
+  typedef struct { unsigned long pte; } pte_t;
+  typedef struct { unsigned long pmd; } pmd_t;
+#ifdef CONFIG_PGTABLE_4
+  typedef struct { unsigned long pud; } pud_t;
+#endif
+  typedef struct { unsigned long pgd; } pgd_t;
+  typedef struct { unsigned long pgprot; } pgprot_t;
+  typedef struct page *pgtable_t;
+
+# define pte_val(x)	((x).pte)
+# define pmd_val(x)	((x).pmd)
+#ifdef CONFIG_PGTABLE_4
+# define pud_val(x)	((x).pud)
+#endif
+# define pgd_val(x)	((x).pgd)
+# define pgprot_val(x)	((x).pgprot)
+
+# define __pte(x)	((pte_t) { (x) } )
+# define __pmd(x)	((pmd_t) { (x) } )
+# define __pgprot(x)	((pgprot_t) { (x) } )
+
+#else /* !STRICT_MM_TYPECHECKS */
+  /*
+   * .. while these make it easier on the compiler
+   */
+# ifndef __ASSEMBLY__
+    typedef unsigned long pte_t;
+    typedef unsigned long pmd_t;
+    typedef unsigned long pgd_t;
+    typedef unsigned long pgprot_t;
+    typedef struct page *pgtable_t;
+# endif
+
+# define pte_val(x)	(x)
+# define pmd_val(x)	(x)
+# define pgd_val(x)	(x)
+# define pgprot_val(x)	(x)
+
+# define __pte(x)	(x)
+# define __pgd(x)	(x)
+# define __pgprot(x)	(x)
+#endif /* !STRICT_MM_TYPECHECKS */
+
+#define PAGE_OFFSET			RGN_BASE(RGN_KERNEL)
+
+#define VM_DATA_DEFAULT_FLAGS		(VM_READ | VM_WRITE |					\
+					 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC |		\
+					 (((current->personality & READ_IMPLIES_EXEC) != 0)	\
+					  ? VM_EXEC : 0))
+
+#endif /* _ASM_IA64_PAGE_H */
diff --git a/arch/ia64/include/asm/pal.h b/arch/ia64/include/asm/pal.h
new file mode 100644
index 00000000..2e69284d
--- /dev/null
+++ b/arch/ia64/include/asm/pal.h
@@ -0,0 +1,1825 @@
+#ifndef _ASM_IA64_PAL_H
+#define _ASM_IA64_PAL_H
+
+/*
+ * Processor Abstraction Layer definitions.
+ *
+ * This is based on Intel IA-64 Architecture Software Developer's Manual rev 1.0
+ * chapter 11 IA-64 Processor Abstraction Layer
+ *
+ * Copyright (C) 1998-2001 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Srinivasa Prasad Thirumalachar <sprasad@sprasad.engr.sgi.com>
+ * Copyright (C) 2008 Silicon Graphics, Inc. (SGI)
+ *
+ * 99/10/01	davidm	Make sure we pass zero for reserved parameters.
+ * 00/03/07	davidm	Updated pal_cache_flush() to be in sync with PAL v2.6.
+ * 00/03/23     cfleck  Modified processor min-state save area to match updated PAL & SAL info
+ * 00/05/24     eranian Updated to latest PAL spec, fix structures bugs, added
+ * 00/05/25	eranian Support for stack calls, and static physical calls
+ * 00/06/18	eranian Support for stacked physical calls
+ * 06/10/26	rja	Support for Intel Itanium Architecture Software Developer's
+ *			Manual Rev 2.2 (Jan 2006)
+ */
+
+/*
+ * Note that some of these calls use a static-register only calling
+ * convention which has nothing to do with the regular calling
+ * convention.
+ */
+#define PAL_CACHE_FLUSH		1	/* flush i/d cache */
+#define PAL_CACHE_INFO		2	/* get detailed i/d cache info */
+#define PAL_CACHE_INIT		3	/* initialize i/d cache */
+#define PAL_CACHE_SUMMARY	4	/* get summary of cache hierarchy */
+#define PAL_MEM_ATTRIB		5	/* list supported memory attributes */
+#define PAL_PTCE_INFO		6	/* purge TLB info */
+#define PAL_VM_INFO		7	/* return supported virtual memory features */
+#define PAL_VM_SUMMARY		8	/* return summary on supported vm features */
+#define PAL_BUS_GET_FEATURES	9	/* return processor bus interface features settings */
+#define PAL_BUS_SET_FEATURES	10	/* set processor bus features */
+#define PAL_DEBUG_INFO		11	/* get number of debug registers */
+#define PAL_FIXED_ADDR		12	/* get fixed component of processors's directed address */
+#define PAL_FREQ_BASE		13	/* base frequency of the platform */
+#define PAL_FREQ_RATIOS		14	/* ratio of processor, bus and ITC frequency */
+#define PAL_PERF_MON_INFO	15	/* return performance monitor info */
+#define PAL_PLATFORM_ADDR	16	/* set processor interrupt block and IO port space addr */
+#define PAL_PROC_GET_FEATURES	17	/* get configurable processor features & settings */
+#define PAL_PROC_SET_FEATURES	18	/* enable/disable configurable processor features */
+#define PAL_RSE_INFO		19	/* return rse information */
+#define PAL_VERSION		20	/* return version of PAL code */
+#define PAL_MC_CLEAR_LOG	21	/* clear all processor log info */
+#define PAL_MC_DRAIN		22	/* drain operations which could result in an MCA */
+#define PAL_MC_EXPECTED		23	/* set/reset expected MCA indicator */
+#define PAL_MC_DYNAMIC_STATE	24	/* get processor dynamic state */
+#define PAL_MC_ERROR_INFO	25	/* get processor MCA info and static state */
+#define PAL_MC_RESUME		26	/* Return to interrupted process */
+#define PAL_MC_REGISTER_MEM	27	/* Register memory for PAL to use during MCAs and inits */
+#define PAL_HALT		28	/* enter the low power HALT state */
+#define PAL_HALT_LIGHT		29	/* enter the low power light halt state*/
+#define PAL_COPY_INFO		30	/* returns info needed to relocate PAL */
+#define PAL_CACHE_LINE_INIT	31	/* init tags & data of cache line */
+#define PAL_PMI_ENTRYPOINT	32	/* register PMI memory entry points with the processor */
+#define PAL_ENTER_IA_32_ENV	33	/* enter IA-32 system environment */
+#define PAL_VM_PAGE_SIZE	34	/* return vm TC and page walker page sizes */
+
+#define PAL_MEM_FOR_TEST	37	/* get amount of memory needed for late processor test */
+#define PAL_CACHE_PROT_INFO	38	/* get i/d cache protection info */
+#define PAL_REGISTER_INFO	39	/* return AR and CR register information*/
+#define PAL_SHUTDOWN		40	/* enter processor shutdown state */
+#define PAL_PREFETCH_VISIBILITY	41	/* Make Processor Prefetches Visible */
+#define PAL_LOGICAL_TO_PHYSICAL 42	/* returns information on logical to physical processor mapping */
+#define PAL_CACHE_SHARED_INFO	43	/* returns information on caches shared by logical processor */
+#define PAL_GET_HW_POLICY	48	/* Get current hardware resource sharing policy */
+#define PAL_SET_HW_POLICY	49	/* Set current hardware resource sharing policy */
+#define PAL_VP_INFO		50	/* Information about virtual processor features */
+#define PAL_MC_HW_TRACKING	51	/* Hardware tracking status */
+
+#define PAL_COPY_PAL		256	/* relocate PAL procedures and PAL PMI */
+#define PAL_HALT_INFO		257	/* return the low power capabilities of processor */
+#define PAL_TEST_PROC		258	/* perform late processor self-test */
+#define PAL_CACHE_READ		259	/* read tag & data of cacheline for diagnostic testing */
+#define PAL_CACHE_WRITE		260	/* write tag & data of cacheline for diagnostic testing */
+#define PAL_VM_TR_READ		261	/* read contents of translation register */
+#define PAL_GET_PSTATE		262	/* get the current P-state */
+#define PAL_SET_PSTATE		263	/* set the P-state */
+#define PAL_BRAND_INFO		274	/* Processor branding information */
+
+#define PAL_GET_PSTATE_TYPE_LASTSET	0
+#define PAL_GET_PSTATE_TYPE_AVGANDRESET	1
+#define PAL_GET_PSTATE_TYPE_AVGNORESET	2
+#define PAL_GET_PSTATE_TYPE_INSTANT	3
+
+#define PAL_MC_ERROR_INJECT	276	/* Injects processor error or returns injection capabilities */
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <asm/fpu.h>
+
+/*
+ * Data types needed to pass information into PAL procedures and
+ * interpret information returned by them.
+ */
+
+/* Return status from the PAL procedure */
+typedef s64				pal_status_t;
+
+#define PAL_STATUS_SUCCESS		0	/* No error */
+#define PAL_STATUS_UNIMPLEMENTED	(-1)	/* Unimplemented procedure */
+#define PAL_STATUS_EINVAL		(-2)	/* Invalid argument */
+#define PAL_STATUS_ERROR		(-3)	/* Error */
+#define PAL_STATUS_CACHE_INIT_FAIL	(-4)	/* Could not initialize the
+						 * specified level and type of
+						 * cache without sideeffects
+						 * and "restrict" was 1
+						 */
+#define PAL_STATUS_REQUIRES_MEMORY	(-9)	/* Call requires PAL memory buffer */
+
+/* Processor cache level in the hierarchy */
+typedef u64				pal_cache_level_t;
+#define PAL_CACHE_LEVEL_L0		0	/* L0 */
+#define PAL_CACHE_LEVEL_L1		1	/* L1 */
+#define PAL_CACHE_LEVEL_L2		2	/* L2 */
+
+
+/* Processor cache type at a particular level in the hierarchy */
+
+typedef u64				pal_cache_type_t;
+#define PAL_CACHE_TYPE_INSTRUCTION	1	/* Instruction cache */
+#define PAL_CACHE_TYPE_DATA		2	/* Data or unified cache */
+#define PAL_CACHE_TYPE_INSTRUCTION_DATA	3	/* Both Data & Instruction */
+
+
+#define PAL_CACHE_FLUSH_INVALIDATE	1	/* Invalidate clean lines */
+#define PAL_CACHE_FLUSH_CHK_INTRS	2	/* check for interrupts/mc while flushing */
+
+/* Processor cache line size in bytes  */
+typedef int				pal_cache_line_size_t;
+
+/* Processor cache line state */
+typedef u64				pal_cache_line_state_t;
+#define PAL_CACHE_LINE_STATE_INVALID	0	/* Invalid */
+#define PAL_CACHE_LINE_STATE_SHARED	1	/* Shared */
+#define PAL_CACHE_LINE_STATE_EXCLUSIVE	2	/* Exclusive */
+#define PAL_CACHE_LINE_STATE_MODIFIED	3	/* Modified */
+
+typedef struct pal_freq_ratio {
+	u32 den, num;		/* numerator & denominator */
+} itc_ratio, proc_ratio;
+
+typedef	union  pal_cache_config_info_1_s {
+	struct {
+		u64		u		: 1,	/* 0 Unified cache ? */
+				at		: 2,	/* 2-1 Cache mem attr*/
+				reserved	: 5,	/* 7-3 Reserved */
+				associativity	: 8,	/* 16-8 Associativity*/
+				line_size	: 8,	/* 23-17 Line size */
+				stride		: 8,	/* 31-24 Stride */
+				store_latency	: 8,	/*39-32 Store latency*/
+				load_latency	: 8,	/* 47-40 Load latency*/
+				store_hints	: 8,	/* 55-48 Store hints*/
+				load_hints	: 8;	/* 63-56 Load hints */
+	} pcci1_bits;
+	u64			pcci1_data;
+} pal_cache_config_info_1_t;
+
+typedef	union  pal_cache_config_info_2_s {
+	struct {
+		u32		cache_size;		/*cache size in bytes*/
+
+
+		u32		alias_boundary	: 8,	/* 39-32 aliased addr
+							 * separation for max
+							 * performance.
+							 */
+				tag_ls_bit	: 8,	/* 47-40 LSb of addr*/
+				tag_ms_bit	: 8,	/* 55-48 MSb of addr*/
+				reserved	: 8;	/* 63-56 Reserved */
+	} pcci2_bits;
+	u64			pcci2_data;
+} pal_cache_config_info_2_t;
+
+
+typedef struct pal_cache_config_info_s {
+	pal_status_t			pcci_status;
+	pal_cache_config_info_1_t	pcci_info_1;
+	pal_cache_config_info_2_t	pcci_info_2;
+	u64				pcci_reserved;
+} pal_cache_config_info_t;
+
+#define pcci_ld_hints		pcci_info_1.pcci1_bits.load_hints
+#define pcci_st_hints		pcci_info_1.pcci1_bits.store_hints
+#define pcci_ld_latency		pcci_info_1.pcci1_bits.load_latency
+#define pcci_st_latency		pcci_info_1.pcci1_bits.store_latency
+#define pcci_stride		pcci_info_1.pcci1_bits.stride
+#define pcci_line_size		pcci_info_1.pcci1_bits.line_size
+#define pcci_assoc		pcci_info_1.pcci1_bits.associativity
+#define pcci_cache_attr		pcci_info_1.pcci1_bits.at
+#define pcci_unified		pcci_info_1.pcci1_bits.u
+#define pcci_tag_msb		pcci_info_2.pcci2_bits.tag_ms_bit
+#define pcci_tag_lsb		pcci_info_2.pcci2_bits.tag_ls_bit
+#define pcci_alias_boundary	pcci_info_2.pcci2_bits.alias_boundary
+#define pcci_cache_size		pcci_info_2.pcci2_bits.cache_size
+
+
+
+/* Possible values for cache attributes */
+
+#define PAL_CACHE_ATTR_WT		0	/* Write through cache */
+#define PAL_CACHE_ATTR_WB		1	/* Write back cache */
+#define PAL_CACHE_ATTR_WT_OR_WB		2	/* Either write thru or write
+						 * back depending on TLB
+						 * memory attributes
+						 */
+
+
+/* Possible values for cache hints */
+
+#define PAL_CACHE_HINT_TEMP_1		0	/* Temporal level 1 */
+#define PAL_CACHE_HINT_NTEMP_1		1	/* Non-temporal level 1 */
+#define PAL_CACHE_HINT_NTEMP_ALL	3	/* Non-temporal all levels */
+
+/* Processor cache protection  information */
+typedef union pal_cache_protection_element_u {
+	u32			pcpi_data;
+	struct {
+		u32		data_bits	: 8, /* # data bits covered by
+						      * each unit of protection
+						      */
+
+				tagprot_lsb	: 6, /* Least -do- */
+				tagprot_msb	: 6, /* Most Sig. tag address
+						      * bit that this
+						      * protection covers.
+						      */
+				prot_bits	: 6, /* # of protection bits */
+				method		: 4, /* Protection method */
+				t_d		: 2; /* Indicates which part
+						      * of the cache this
+						      * protection encoding
+						      * applies.
+						      */
+	} pcp_info;
+} pal_cache_protection_element_t;
+
+#define pcpi_cache_prot_part	pcp_info.t_d
+#define pcpi_prot_method	pcp_info.method
+#define pcpi_prot_bits		pcp_info.prot_bits
+#define pcpi_tagprot_msb	pcp_info.tagprot_msb
+#define pcpi_tagprot_lsb	pcp_info.tagprot_lsb
+#define pcpi_data_bits		pcp_info.data_bits
+
+/* Processor cache part encodings */
+#define PAL_CACHE_PROT_PART_DATA	0	/* Data protection  */
+#define PAL_CACHE_PROT_PART_TAG		1	/* Tag  protection */
+#define PAL_CACHE_PROT_PART_TAG_DATA	2	/* Tag+data protection (tag is
+						 * more significant )
+						 */
+#define PAL_CACHE_PROT_PART_DATA_TAG	3	/* Data+tag protection (data is
+						 * more significant )
+						 */
+#define PAL_CACHE_PROT_PART_MAX		6
+
+
+typedef struct pal_cache_protection_info_s {
+	pal_status_t			pcpi_status;
+	pal_cache_protection_element_t	pcp_info[PAL_CACHE_PROT_PART_MAX];
+} pal_cache_protection_info_t;
+
+
+/* Processor cache protection method encodings */
+#define PAL_CACHE_PROT_METHOD_NONE		0	/* No protection */
+#define PAL_CACHE_PROT_METHOD_ODD_PARITY	1	/* Odd parity */
+#define PAL_CACHE_PROT_METHOD_EVEN_PARITY	2	/* Even parity */
+#define PAL_CACHE_PROT_METHOD_ECC		3	/* ECC protection */
+
+
+/* Processor cache line identification in the hierarchy */
+typedef union pal_cache_line_id_u {
+	u64			pclid_data;
+	struct {
+		u64		cache_type	: 8,	/* 7-0 cache type */
+				level		: 8,	/* 15-8 level of the
+							 * cache in the
+							 * hierarchy.
+							 */
+				way		: 8,	/* 23-16 way in the set
+							 */
+				part		: 8,	/* 31-24 part of the
+							 * cache
+							 */
+				reserved	: 32;	/* 63-32 is reserved*/
+	} pclid_info_read;
+	struct {
+		u64		cache_type	: 8,	/* 7-0 cache type */
+				level		: 8,	/* 15-8 level of the
+							 * cache in the
+							 * hierarchy.
+							 */
+				way		: 8,	/* 23-16 way in the set
+							 */
+				part		: 8,	/* 31-24 part of the
+							 * cache
+							 */
+				mesi		: 8,	/* 39-32 cache line
+							 * state
+							 */
+				start		: 8,	/* 47-40 lsb of data to
+							 * invert
+							 */
+				length		: 8,	/* 55-48 #bits to
+							 * invert
+							 */
+				trigger		: 8;	/* 63-56 Trigger error
+							 * by doing a load
+							 * after the write
+							 */
+
+	} pclid_info_write;
+} pal_cache_line_id_u_t;
+
+#define pclid_read_part		pclid_info_read.part
+#define pclid_read_way		pclid_info_read.way
+#define pclid_read_level	pclid_info_read.level
+#define pclid_read_cache_type	pclid_info_read.cache_type
+
+#define pclid_write_trigger	pclid_info_write.trigger
+#define pclid_write_length	pclid_info_write.length
+#define pclid_write_start	pclid_info_write.start
+#define pclid_write_mesi	pclid_info_write.mesi
+#define pclid_write_part	pclid_info_write.part
+#define pclid_write_way		pclid_info_write.way
+#define pclid_write_level	pclid_info_write.level
+#define pclid_write_cache_type	pclid_info_write.cache_type
+
+/* Processor cache line part encodings */
+#define PAL_CACHE_LINE_ID_PART_DATA		0	/* Data */
+#define PAL_CACHE_LINE_ID_PART_TAG		1	/* Tag */
+#define PAL_CACHE_LINE_ID_PART_DATA_PROT	2	/* Data protection */
+#define PAL_CACHE_LINE_ID_PART_TAG_PROT		3	/* Tag protection */
+#define PAL_CACHE_LINE_ID_PART_DATA_TAG_PROT	4	/* Data+tag
+							 * protection
+							 */
+typedef struct pal_cache_line_info_s {
+	pal_status_t		pcli_status;		/* Return status of the read cache line
+							 * info call.
+							 */
+	u64			pcli_data;		/* 64-bit data, tag, protection bits .. */
+	u64			pcli_data_len;		/* data length in bits */
+	pal_cache_line_state_t	pcli_cache_line_state;	/* mesi state */
+
+} pal_cache_line_info_t;
+
+
+/* Machine Check related crap */
+
+/* Pending event status bits  */
+typedef u64					pal_mc_pending_events_t;
+
+#define PAL_MC_PENDING_MCA			(1 << 0)
+#define PAL_MC_PENDING_INIT			(1 << 1)
+
+/* Error information type */
+typedef u64					pal_mc_info_index_t;
+
+#define PAL_MC_INFO_PROCESSOR			0	/* Processor */
+#define PAL_MC_INFO_CACHE_CHECK			1	/* Cache check */
+#define PAL_MC_INFO_TLB_CHECK			2	/* Tlb check */
+#define PAL_MC_INFO_BUS_CHECK			3	/* Bus check */
+#define PAL_MC_INFO_REQ_ADDR			4	/* Requestor address */
+#define PAL_MC_INFO_RESP_ADDR			5	/* Responder address */
+#define PAL_MC_INFO_TARGET_ADDR			6	/* Target address */
+#define PAL_MC_INFO_IMPL_DEP			7	/* Implementation
+							 * dependent
+							 */
+
+#define PAL_TLB_CHECK_OP_PURGE			8
+
+typedef struct pal_process_state_info_s {
+	u64		reserved1	: 2,
+			rz		: 1,	/* PAL_CHECK processor
+						 * rendezvous
+						 * successful.
+						 */
+
+			ra		: 1,	/* PAL_CHECK attempted
+						 * a rendezvous.
+						 */
+			me		: 1,	/* Distinct multiple
+						 * errors occurred
+						 */
+
+			mn		: 1,	/* Min. state save
+						 * area has been
+						 * registered with PAL
+						 */
+
+			sy		: 1,	/* Storage integrity
+						 * synched
+						 */
+
+
+			co		: 1,	/* Continuable */
+			ci		: 1,	/* MC isolated */
+			us		: 1,	/* Uncontained storage
+						 * damage.
+						 */
+
+
+			hd		: 1,	/* Non-essential hw
+						 * lost (no loss of
+						 * functionality)
+						 * causing the
+						 * processor to run in
+						 * degraded mode.
+						 */
+
+			tl		: 1,	/* 1 => MC occurred
+						 * after an instr was
+						 * executed but before
+						 * the trap that
+						 * resulted from instr
+						 * execution was
+						 * generated.
+						 * (Trap Lost )
+						 */
+			mi		: 1,	/* More information available
+						 * call PAL_MC_ERROR_INFO
+						 */
+			pi		: 1,	/* Precise instruction pointer */
+			pm		: 1,	/* Precise min-state save area */
+
+			dy		: 1,	/* Processor dynamic
+						 * state valid
+						 */
+
+
+			in		: 1,	/* 0 = MC, 1 = INIT */
+			rs		: 1,	/* RSE valid */
+			cm		: 1,	/* MC corrected */
+			ex		: 1,	/* MC is expected */
+			cr		: 1,	/* Control regs valid*/
+			pc		: 1,	/* Perf cntrs valid */
+			dr		: 1,	/* Debug regs valid */
+			tr		: 1,	/* Translation regs
+						 * valid
+						 */
+			rr		: 1,	/* Region regs valid */
+			ar		: 1,	/* App regs valid */
+			br		: 1,	/* Branch regs valid */
+			pr		: 1,	/* Predicate registers
+						 * valid
+						 */
+
+			fp		: 1,	/* fp registers valid*/
+			b1		: 1,	/* Preserved bank one
+						 * general registers
+						 * are valid
+						 */
+			b0		: 1,	/* Preserved bank zero
+						 * general registers
+						 * are valid
+						 */
+			gr		: 1,	/* General registers
+						 * are valid
+						 * (excl. banked regs)
+						 */
+			dsize		: 16,	/* size of dynamic
+						 * state returned
+						 * by the processor
+						 */
+
+			se		: 1,	/* Shared error.  MCA in a
+						   shared structure */
+			reserved2	: 10,
+			cc		: 1,	/* Cache check */
+			tc		: 1,	/* TLB check */
+			bc		: 1,	/* Bus check */
+			rc		: 1,	/* Register file check */
+			uc		: 1;	/* Uarch check */
+
+} pal_processor_state_info_t;
+
+typedef struct pal_cache_check_info_s {
+	u64		op		: 4,	/* Type of cache
+						 * operation that
+						 * caused the machine
+						 * check.
+						 */
+			level		: 2,	/* Cache level */
+			reserved1	: 2,
+			dl		: 1,	/* Failure in data part
+						 * of cache line
+						 */
+			tl		: 1,	/* Failure in tag part
+						 * of cache line
+						 */
+			dc		: 1,	/* Failure in dcache */
+			ic		: 1,	/* Failure in icache */
+			mesi		: 3,	/* Cache line state */
+			mv		: 1,	/* mesi valid */
+			way		: 5,	/* Way in which the
+						 * error occurred
+						 */
+			wiv		: 1,	/* Way field valid */
+			reserved2	: 1,
+			dp		: 1,	/* Data poisoned on MBE */
+			reserved3	: 6,
+			hlth		: 2,	/* Health indicator */
+
+			index		: 20,	/* Cache line index */
+			reserved4	: 2,
+
+			is		: 1,	/* instruction set (1 == ia32) */
+			iv		: 1,	/* instruction set field valid */
+			pl		: 2,	/* privilege level */
+			pv		: 1,	/* privilege level field valid */
+			mcc		: 1,	/* Machine check corrected */
+			tv		: 1,	/* Target address
+						 * structure is valid
+						 */
+			rq		: 1,	/* Requester identifier
+						 * structure is valid
+						 */
+			rp		: 1,	/* Responder identifier
+						 * structure is valid
+						 */
+			pi		: 1;	/* Precise instruction pointer
+						 * structure is valid
+						 */
+} pal_cache_check_info_t;
+
+typedef struct pal_tlb_check_info_s {
+
+	u64		tr_slot		: 8,	/* Slot# of TR where
+						 * error occurred
+						 */
+			trv		: 1,	/* tr_slot field is valid */
+			reserved1	: 1,
+			level		: 2,	/* TLB level where failure occurred */
+			reserved2	: 4,
+			dtr		: 1,	/* Fail in data TR */
+			itr		: 1,	/* Fail in inst TR */
+			dtc		: 1,	/* Fail in data TC */
+			itc		: 1,	/* Fail in inst. TC */
+			op		: 4,	/* Cache operation */
+			reserved3	: 6,
+			hlth		: 2,	/* Health indicator */
+			reserved4	: 22,
+
+			is		: 1,	/* instruction set (1 == ia32) */
+			iv		: 1,	/* instruction set field valid */
+			pl		: 2,	/* privilege level */
+			pv		: 1,	/* privilege level field valid */
+			mcc		: 1,	/* Machine check corrected */
+			tv		: 1,	/* Target address
+						 * structure is valid
+						 */
+			rq		: 1,	/* Requester identifier
+						 * structure is valid
+						 */
+			rp		: 1,	/* Responder identifier
+						 * structure is valid
+						 */
+			pi		: 1;	/* Precise instruction pointer
+						 * structure is valid
+						 */
+} pal_tlb_check_info_t;
+
+typedef struct pal_bus_check_info_s {
+	u64		size		: 5,	/* Xaction size */
+			ib		: 1,	/* Internal bus error */
+			eb		: 1,	/* External bus error */
+			cc		: 1,	/* Error occurred
+						 * during cache-cache
+						 * transfer.
+						 */
+			type		: 8,	/* Bus xaction type*/
+			sev		: 5,	/* Bus error severity*/
+			hier		: 2,	/* Bus hierarchy level */
+			dp		: 1,	/* Data poisoned on MBE */
+			bsi		: 8,	/* Bus error status
+						 * info
+						 */
+			reserved2	: 22,
+
+			is		: 1,	/* instruction set (1 == ia32) */
+			iv		: 1,	/* instruction set field valid */
+			pl		: 2,	/* privilege level */
+			pv		: 1,	/* privilege level field valid */
+			mcc		: 1,	/* Machine check corrected */
+			tv		: 1,	/* Target address
+						 * structure is valid
+						 */
+			rq		: 1,	/* Requester identifier
+						 * structure is valid
+						 */
+			rp		: 1,	/* Responder identifier
+						 * structure is valid
+						 */
+			pi		: 1;	/* Precise instruction pointer
+						 * structure is valid
+						 */
+} pal_bus_check_info_t;
+
+typedef struct pal_reg_file_check_info_s {
+	u64		id		: 4,	/* Register file identifier */
+			op		: 4,	/* Type of register
+						 * operation that
+						 * caused the machine
+						 * check.
+						 */
+			reg_num		: 7,	/* Register number */
+			rnv		: 1,	/* reg_num valid */
+			reserved2	: 38,
+
+			is		: 1,	/* instruction set (1 == ia32) */
+			iv		: 1,	/* instruction set field valid */
+			pl		: 2,	/* privilege level */
+			pv		: 1,	/* privilege level field valid */
+			mcc		: 1,	/* Machine check corrected */
+			reserved3	: 3,
+			pi		: 1;	/* Precise instruction pointer
+						 * structure is valid
+						 */
+} pal_reg_file_check_info_t;
+
+typedef struct pal_uarch_check_info_s {
+	u64		sid		: 5,	/* Structure identification */
+			level		: 3,	/* Level of failure */
+			array_id	: 4,	/* Array identification */
+			op		: 4,	/* Type of
+						 * operation that
+						 * caused the machine
+						 * check.
+						 */
+			way		: 6,	/* Way of structure */
+			wv		: 1,	/* way valid */
+			xv		: 1,	/* index valid */
+			reserved1	: 6,
+			hlth		: 2,	/* Health indicator */
+			index		: 8,	/* Index or set of the uarch
+						 * structure that failed.
+						 */
+			reserved2	: 24,
+
+			is		: 1,	/* instruction set (1 == ia32) */
+			iv		: 1,	/* instruction set field valid */
+			pl		: 2,	/* privilege level */
+			pv		: 1,	/* privilege level field valid */
+			mcc		: 1,	/* Machine check corrected */
+			tv		: 1,	/* Target address
+						 * structure is valid
+						 */
+			rq		: 1,	/* Requester identifier
+						 * structure is valid
+						 */
+			rp		: 1,	/* Responder identifier
+						 * structure is valid
+						 */
+			pi		: 1;	/* Precise instruction pointer
+						 * structure is valid
+						 */
+} pal_uarch_check_info_t;
+
+typedef union pal_mc_error_info_u {
+	u64				pmei_data;
+	pal_processor_state_info_t	pme_processor;
+	pal_cache_check_info_t		pme_cache;
+	pal_tlb_check_info_t		pme_tlb;
+	pal_bus_check_info_t		pme_bus;
+	pal_reg_file_check_info_t	pme_reg_file;
+	pal_uarch_check_info_t		pme_uarch;
+} pal_mc_error_info_t;
+
+#define pmci_proc_unknown_check			pme_processor.uc
+#define pmci_proc_bus_check			pme_processor.bc
+#define pmci_proc_tlb_check			pme_processor.tc
+#define pmci_proc_cache_check			pme_processor.cc
+#define pmci_proc_dynamic_state_size		pme_processor.dsize
+#define pmci_proc_gpr_valid			pme_processor.gr
+#define pmci_proc_preserved_bank0_gpr_valid	pme_processor.b0
+#define pmci_proc_preserved_bank1_gpr_valid	pme_processor.b1
+#define pmci_proc_fp_valid			pme_processor.fp
+#define pmci_proc_predicate_regs_valid		pme_processor.pr
+#define pmci_proc_branch_regs_valid		pme_processor.br
+#define pmci_proc_app_regs_valid		pme_processor.ar
+#define pmci_proc_region_regs_valid		pme_processor.rr
+#define pmci_proc_translation_regs_valid	pme_processor.tr
+#define pmci_proc_debug_regs_valid		pme_processor.dr
+#define pmci_proc_perf_counters_valid		pme_processor.pc
+#define pmci_proc_control_regs_valid		pme_processor.cr
+#define pmci_proc_machine_check_expected	pme_processor.ex
+#define pmci_proc_machine_check_corrected	pme_processor.cm
+#define pmci_proc_rse_valid			pme_processor.rs
+#define pmci_proc_machine_check_or_init		pme_processor.in
+#define pmci_proc_dynamic_state_valid		pme_processor.dy
+#define pmci_proc_operation			pme_processor.op
+#define pmci_proc_trap_lost			pme_processor.tl
+#define pmci_proc_hardware_damage		pme_processor.hd
+#define pmci_proc_uncontained_storage_damage	pme_processor.us
+#define pmci_proc_machine_check_isolated	pme_processor.ci
+#define pmci_proc_continuable			pme_processor.co
+#define pmci_proc_storage_intergrity_synced	pme_processor.sy
+#define pmci_proc_min_state_save_area_regd	pme_processor.mn
+#define	pmci_proc_distinct_multiple_errors	pme_processor.me
+#define pmci_proc_pal_attempted_rendezvous	pme_processor.ra
+#define pmci_proc_pal_rendezvous_complete	pme_processor.rz
+
+
+#define pmci_cache_level			pme_cache.level
+#define pmci_cache_line_state			pme_cache.mesi
+#define pmci_cache_line_state_valid		pme_cache.mv
+#define pmci_cache_line_index			pme_cache.index
+#define pmci_cache_instr_cache_fail		pme_cache.ic
+#define pmci_cache_data_cache_fail		pme_cache.dc
+#define pmci_cache_line_tag_fail		pme_cache.tl
+#define pmci_cache_line_data_fail		pme_cache.dl
+#define pmci_cache_operation			pme_cache.op
+#define pmci_cache_way_valid			pme_cache.wv
+#define pmci_cache_target_address_valid		pme_cache.tv
+#define pmci_cache_way				pme_cache.way
+#define pmci_cache_mc				pme_cache.mc
+
+#define pmci_tlb_instr_translation_cache_fail	pme_tlb.itc
+#define pmci_tlb_data_translation_cache_fail	pme_tlb.dtc
+#define pmci_tlb_instr_translation_reg_fail	pme_tlb.itr
+#define pmci_tlb_data_translation_reg_fail	pme_tlb.dtr
+#define pmci_tlb_translation_reg_slot		pme_tlb.tr_slot
+#define pmci_tlb_mc				pme_tlb.mc
+
+#define pmci_bus_status_info			pme_bus.bsi
+#define pmci_bus_req_address_valid		pme_bus.rq
+#define pmci_bus_resp_address_valid		pme_bus.rp
+#define pmci_bus_target_address_valid		pme_bus.tv
+#define pmci_bus_error_severity			pme_bus.sev
+#define pmci_bus_transaction_type		pme_bus.type
+#define pmci_bus_cache_cache_transfer		pme_bus.cc
+#define pmci_bus_transaction_size		pme_bus.size
+#define pmci_bus_internal_error			pme_bus.ib
+#define pmci_bus_external_error			pme_bus.eb
+#define pmci_bus_mc				pme_bus.mc
+
+/*
+ * NOTE: this min_state_save area struct only includes the 1KB
+ * architectural state save area.  The other 3 KB is scratch space
+ * for PAL.
+ */
+
+typedef struct pal_min_state_area_s {
+	u64	pmsa_nat_bits;		/* nat bits for saved GRs  */
+	u64	pmsa_gr[15];		/* GR1	- GR15		   */
+	u64	pmsa_bank0_gr[16];	/* GR16 - GR31		   */
+	u64	pmsa_bank1_gr[16];	/* GR16 - GR31		   */
+	u64	pmsa_pr;		/* predicate registers	   */
+	u64	pmsa_br0;		/* branch register 0	   */
+	u64	pmsa_rsc;		/* ar.rsc		   */
+	u64	pmsa_iip;		/* cr.iip		   */
+	u64	pmsa_ipsr;		/* cr.ipsr		   */
+	u64	pmsa_ifs;		/* cr.ifs		   */
+	u64	pmsa_xip;		/* previous iip		   */
+	u64	pmsa_xpsr;		/* previous psr		   */
+	u64	pmsa_xfs;		/* previous ifs		   */
+	u64	pmsa_br1;		/* branch register 1	   */
+	u64	pmsa_reserved[70];	/* pal_min_state_area should total to 1KB */
+} pal_min_state_area_t;
+
+
+struct ia64_pal_retval {
+	/*
+	 * A zero status value indicates call completed without error.
+	 * A negative status value indicates reason of call failure.
+	 * A positive status value indicates success but an
+	 * informational value should be printed (e.g., "reboot for
+	 * change to take effect").
+	 */
+	s64 status;
+	u64 v0;
+	u64 v1;
+	u64 v2;
+};
+
+/*
+ * Note: Currently unused PAL arguments are generally labeled
+ * "reserved" so the value specified in the PAL documentation
+ * (generally 0) MUST be passed.  Reserved parameters are not optional
+ * parameters.
+ */
+extern struct ia64_pal_retval ia64_pal_call_static (u64, u64, u64, u64);
+extern struct ia64_pal_retval ia64_pal_call_stacked (u64, u64, u64, u64);
+extern struct ia64_pal_retval ia64_pal_call_phys_static (u64, u64, u64, u64);
+extern struct ia64_pal_retval ia64_pal_call_phys_stacked (u64, u64, u64, u64);
+extern void ia64_save_scratch_fpregs (struct ia64_fpreg *);
+extern void ia64_load_scratch_fpregs (struct ia64_fpreg *);
+
+#define PAL_CALL(iprv,a0,a1,a2,a3) do {			\
+	struct ia64_fpreg fr[6];			\
+	ia64_save_scratch_fpregs(fr);			\
+	iprv = ia64_pal_call_static(a0, a1, a2, a3);	\
+	ia64_load_scratch_fpregs(fr);			\
+} while (0)
+
+#define PAL_CALL_STK(iprv,a0,a1,a2,a3) do {		\
+	struct ia64_fpreg fr[6];			\
+	ia64_save_scratch_fpregs(fr);			\
+	iprv = ia64_pal_call_stacked(a0, a1, a2, a3);	\
+	ia64_load_scratch_fpregs(fr);			\
+} while (0)
+
+#define PAL_CALL_PHYS(iprv,a0,a1,a2,a3) do {			\
+	struct ia64_fpreg fr[6];				\
+	ia64_save_scratch_fpregs(fr);				\
+	iprv = ia64_pal_call_phys_static(a0, a1, a2, a3);	\
+	ia64_load_scratch_fpregs(fr);				\
+} while (0)
+
+#define PAL_CALL_PHYS_STK(iprv,a0,a1,a2,a3) do {		\
+	struct ia64_fpreg fr[6];				\
+	ia64_save_scratch_fpregs(fr);				\
+	iprv = ia64_pal_call_phys_stacked(a0, a1, a2, a3);	\
+	ia64_load_scratch_fpregs(fr);				\
+} while (0)
+
+typedef int (*ia64_pal_handler) (u64, ...);
+extern ia64_pal_handler ia64_pal;
+extern void ia64_pal_handler_init (void *);
+
+extern ia64_pal_handler ia64_pal;
+
+extern pal_cache_config_info_t		l0d_cache_config_info;
+extern pal_cache_config_info_t		l0i_cache_config_info;
+extern pal_cache_config_info_t		l1_cache_config_info;
+extern pal_cache_config_info_t		l2_cache_config_info;
+
+extern pal_cache_protection_info_t	l0d_cache_protection_info;
+extern pal_cache_protection_info_t	l0i_cache_protection_info;
+extern pal_cache_protection_info_t	l1_cache_protection_info;
+extern pal_cache_protection_info_t	l2_cache_protection_info;
+
+extern pal_cache_config_info_t		pal_cache_config_info_get(pal_cache_level_t,
+								  pal_cache_type_t);
+
+extern pal_cache_protection_info_t	pal_cache_protection_info_get(pal_cache_level_t,
+								      pal_cache_type_t);
+
+
+extern void				pal_error(int);
+
+
+/* Useful wrappers for the current list of pal procedures */
+
+typedef union pal_bus_features_u {
+	u64	pal_bus_features_val;
+	struct {
+		u64	pbf_reserved1				:	29;
+		u64	pbf_req_bus_parking			:	1;
+		u64	pbf_bus_lock_mask			:	1;
+		u64	pbf_enable_half_xfer_rate		:	1;
+		u64	pbf_reserved2				:	20;
+		u64	pbf_enable_shared_line_replace		:	1;
+		u64	pbf_enable_exclusive_line_replace	:	1;
+		u64	pbf_disable_xaction_queueing		:	1;
+		u64	pbf_disable_resp_err_check		:	1;
+		u64	pbf_disable_berr_check			:	1;
+		u64	pbf_disable_bus_req_internal_err_signal	:	1;
+		u64	pbf_disable_bus_req_berr_signal		:	1;
+		u64	pbf_disable_bus_init_event_check	:	1;
+		u64	pbf_disable_bus_init_event_signal	:	1;
+		u64	pbf_disable_bus_addr_err_check		:	1;
+		u64	pbf_disable_bus_addr_err_signal		:	1;
+		u64	pbf_disable_bus_data_err_check		:	1;
+	} pal_bus_features_s;
+} pal_bus_features_u_t;
+
+extern void pal_bus_features_print (u64);
+
+/* Provide information about configurable processor bus features */
+static inline s64
+ia64_pal_bus_get_features (pal_bus_features_u_t *features_avail,
+			   pal_bus_features_u_t *features_status,
+			   pal_bus_features_u_t *features_control)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS(iprv, PAL_BUS_GET_FEATURES, 0, 0, 0);
+	if (features_avail)
+		features_avail->pal_bus_features_val = iprv.v0;
+	if (features_status)
+		features_status->pal_bus_features_val = iprv.v1;
+	if (features_control)
+		features_control->pal_bus_features_val = iprv.v2;
+	return iprv.status;
+}
+
+/* Enables/disables specific processor bus features */
+static inline s64
+ia64_pal_bus_set_features (pal_bus_features_u_t feature_select)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS(iprv, PAL_BUS_SET_FEATURES, feature_select.pal_bus_features_val, 0, 0);
+	return iprv.status;
+}
+
+/* Get detailed cache information */
+static inline s64
+ia64_pal_cache_config_info (u64 cache_level, u64 cache_type, pal_cache_config_info_t *conf)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL(iprv, PAL_CACHE_INFO, cache_level, cache_type, 0);
+
+	if (iprv.status == 0) {
+		conf->pcci_status                 = iprv.status;
+		conf->pcci_info_1.pcci1_data      = iprv.v0;
+		conf->pcci_info_2.pcci2_data      = iprv.v1;
+		conf->pcci_reserved               = iprv.v2;
+	}
+	return iprv.status;
+
+}
+
+/* Get detailed cche protection information */
+static inline s64
+ia64_pal_cache_prot_info (u64 cache_level, u64 cache_type, pal_cache_protection_info_t *prot)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL(iprv, PAL_CACHE_PROT_INFO, cache_level, cache_type, 0);
+
+	if (iprv.status == 0) {
+		prot->pcpi_status           = iprv.status;
+		prot->pcp_info[0].pcpi_data = iprv.v0 & 0xffffffff;
+		prot->pcp_info[1].pcpi_data = iprv.v0 >> 32;
+		prot->pcp_info[2].pcpi_data = iprv.v1 & 0xffffffff;
+		prot->pcp_info[3].pcpi_data = iprv.v1 >> 32;
+		prot->pcp_info[4].pcpi_data = iprv.v2 & 0xffffffff;
+		prot->pcp_info[5].pcpi_data = iprv.v2 >> 32;
+	}
+	return iprv.status;
+}
+
+/*
+ * Flush the processor instruction or data caches.  *PROGRESS must be
+ * initialized to zero before calling this for the first time..
+ */
+static inline s64
+ia64_pal_cache_flush (u64 cache_type, u64 invalidate, u64 *progress, u64 *vector)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_CACHE_FLUSH, cache_type, invalidate, *progress);
+	if (vector)
+		*vector = iprv.v0;
+	*progress = iprv.v1;
+	return iprv.status;
+}
+
+
+/* Initialize the processor controlled caches */
+static inline s64
+ia64_pal_cache_init (u64 level, u64 cache_type, u64 rest)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_CACHE_INIT, level, cache_type, rest);
+	return iprv.status;
+}
+
+/* Initialize the tags and data of a data or unified cache line of
+ * processor controlled cache to known values without the availability
+ * of backing memory.
+ */
+static inline s64
+ia64_pal_cache_line_init (u64 physical_addr, u64 data_value)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_CACHE_LINE_INIT, physical_addr, data_value, 0);
+	return iprv.status;
+}
+
+
+/* Read the data and tag of a processor controlled cache line for diags */
+static inline s64
+ia64_pal_cache_read (pal_cache_line_id_u_t line_id, u64 physical_addr)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS_STK(iprv, PAL_CACHE_READ, line_id.pclid_data,
+				physical_addr, 0);
+	return iprv.status;
+}
+
+/* Return summary information about the hierarchy of caches controlled by the processor */
+static inline long ia64_pal_cache_summary(unsigned long *cache_levels,
+						unsigned long *unique_caches)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_CACHE_SUMMARY, 0, 0, 0);
+	if (cache_levels)
+		*cache_levels = iprv.v0;
+	if (unique_caches)
+		*unique_caches = iprv.v1;
+	return iprv.status;
+}
+
+/* Write the data and tag of a processor-controlled cache line for diags */
+static inline s64
+ia64_pal_cache_write (pal_cache_line_id_u_t line_id, u64 physical_addr, u64 data)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS_STK(iprv, PAL_CACHE_WRITE, line_id.pclid_data,
+				physical_addr, data);
+	return iprv.status;
+}
+
+
+/* Return the parameters needed to copy relocatable PAL procedures from ROM to memory */
+static inline s64
+ia64_pal_copy_info (u64 copy_type, u64 num_procs, u64 num_iopics,
+		    u64 *buffer_size, u64 *buffer_align)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_COPY_INFO, copy_type, num_procs, num_iopics);
+	if (buffer_size)
+		*buffer_size = iprv.v0;
+	if (buffer_align)
+		*buffer_align = iprv.v1;
+	return iprv.status;
+}
+
+/* Copy relocatable PAL procedures from ROM to memory */
+static inline s64
+ia64_pal_copy_pal (u64 target_addr, u64 alloc_size, u64 processor, u64 *pal_proc_offset)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_COPY_PAL, target_addr, alloc_size, processor);
+	if (pal_proc_offset)
+		*pal_proc_offset = iprv.v0;
+	return iprv.status;
+}
+
+/* Return the number of instruction and data debug register pairs */
+static inline long ia64_pal_debug_info(unsigned long *inst_regs,
+						unsigned long *data_regs)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_DEBUG_INFO, 0, 0, 0);
+	if (inst_regs)
+		*inst_regs = iprv.v0;
+	if (data_regs)
+		*data_regs = iprv.v1;
+
+	return iprv.status;
+}
+
+#ifdef TBD
+/* Switch from IA64-system environment to IA-32 system environment */
+static inline s64
+ia64_pal_enter_ia32_env (ia32_env1, ia32_env2, ia32_env3)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_ENTER_IA_32_ENV, ia32_env1, ia32_env2, ia32_env3);
+	return iprv.status;
+}
+#endif
+
+/* Get unique geographical address of this processor on its bus */
+static inline s64
+ia64_pal_fixed_addr (u64 *global_unique_addr)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_FIXED_ADDR, 0, 0, 0);
+	if (global_unique_addr)
+		*global_unique_addr = iprv.v0;
+	return iprv.status;
+}
+
+/* Get base frequency of the platform if generated by the processor */
+static inline long ia64_pal_freq_base(unsigned long *platform_base_freq)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_FREQ_BASE, 0, 0, 0);
+	if (platform_base_freq)
+		*platform_base_freq = iprv.v0;
+	return iprv.status;
+}
+
+/*
+ * Get the ratios for processor frequency, bus frequency and interval timer to
+ * to base frequency of the platform
+ */
+static inline s64
+ia64_pal_freq_ratios (struct pal_freq_ratio *proc_ratio, struct pal_freq_ratio *bus_ratio,
+		      struct pal_freq_ratio *itc_ratio)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_FREQ_RATIOS, 0, 0, 0);
+	if (proc_ratio)
+		*(u64 *)proc_ratio = iprv.v0;
+	if (bus_ratio)
+		*(u64 *)bus_ratio = iprv.v1;
+	if (itc_ratio)
+		*(u64 *)itc_ratio = iprv.v2;
+	return iprv.status;
+}
+
+/*
+ * Get the current hardware resource sharing policy of the processor
+ */
+static inline s64
+ia64_pal_get_hw_policy (u64 proc_num, u64 *cur_policy, u64 *num_impacted,
+			u64 *la)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_GET_HW_POLICY, proc_num, 0, 0);
+	if (cur_policy)
+		*cur_policy = iprv.v0;
+	if (num_impacted)
+		*num_impacted = iprv.v1;
+	if (la)
+		*la = iprv.v2;
+	return iprv.status;
+}
+
+/* Make the processor enter HALT or one of the implementation dependent low
+ * power states where prefetching and execution are suspended and cache and
+ * TLB coherency is not maintained.
+ */
+static inline s64
+ia64_pal_halt (u64 halt_state)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_HALT, halt_state, 0, 0);
+	return iprv.status;
+}
+
+typedef union pal_power_mgmt_info_u {
+	u64			ppmi_data;
+	struct {
+	       u64		exit_latency		: 16,
+				entry_latency		: 16,
+				power_consumption	: 28,
+				im			: 1,
+				co			: 1,
+				reserved		: 2;
+	} pal_power_mgmt_info_s;
+} pal_power_mgmt_info_u_t;
+
+/* Return information about processor's optional power management capabilities. */
+static inline s64
+ia64_pal_halt_info (pal_power_mgmt_info_u_t *power_buf)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_STK(iprv, PAL_HALT_INFO, (unsigned long) power_buf, 0, 0);
+	return iprv.status;
+}
+
+/* Get the current P-state information */
+static inline s64
+ia64_pal_get_pstate (u64 *pstate_index, unsigned long type)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_STK(iprv, PAL_GET_PSTATE, type, 0, 0);
+	*pstate_index = iprv.v0;
+	return iprv.status;
+}
+
+/* Set the P-state */
+static inline s64
+ia64_pal_set_pstate (u64 pstate_index)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_STK(iprv, PAL_SET_PSTATE, pstate_index, 0, 0);
+	return iprv.status;
+}
+
+/* Processor branding information*/
+static inline s64
+ia64_pal_get_brand_info (char *brand_info)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_STK(iprv, PAL_BRAND_INFO, 0, (u64)brand_info, 0);
+	return iprv.status;
+}
+
+/* Cause the processor to enter LIGHT HALT state, where prefetching and execution are
+ * suspended, but cache and TLB coherency is maintained.
+ */
+static inline s64
+ia64_pal_halt_light (void)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_HALT_LIGHT, 0, 0, 0);
+	return iprv.status;
+}
+
+/* Clear all the processor error logging   registers and reset the indicator that allows
+ * the error logging registers to be written. This procedure also checks the pending
+ * machine check bit and pending INIT bit and reports their states.
+ */
+static inline s64
+ia64_pal_mc_clear_log (u64 *pending_vector)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_CLEAR_LOG, 0, 0, 0);
+	if (pending_vector)
+		*pending_vector = iprv.v0;
+	return iprv.status;
+}
+
+/* Ensure that all outstanding transactions in a processor are completed or that any
+ * MCA due to thes outstanding transaction is taken.
+ */
+static inline s64
+ia64_pal_mc_drain (void)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_DRAIN, 0, 0, 0);
+	return iprv.status;
+}
+
+/* Return the machine check dynamic processor state */
+static inline s64
+ia64_pal_mc_dynamic_state (u64 info_type, u64 dy_buffer, u64 *size)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_DYNAMIC_STATE, info_type, dy_buffer, 0);
+	if (size)
+		*size = iprv.v0;
+	return iprv.status;
+}
+
+/* Return processor machine check information */
+static inline s64
+ia64_pal_mc_error_info (u64 info_index, u64 type_index, u64 *size, u64 *error_info)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_ERROR_INFO, info_index, type_index, 0);
+	if (size)
+		*size = iprv.v0;
+	if (error_info)
+		*error_info = iprv.v1;
+	return iprv.status;
+}
+
+/* Injects the requested processor error or returns info on
+ * supported injection capabilities for current processor implementation
+ */
+static inline s64
+ia64_pal_mc_error_inject_phys (u64 err_type_info, u64 err_struct_info,
+			u64 err_data_buffer, u64 *capabilities, u64 *resources)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS_STK(iprv, PAL_MC_ERROR_INJECT, err_type_info,
+			  err_struct_info, err_data_buffer);
+	if (capabilities)
+		*capabilities= iprv.v0;
+	if (resources)
+		*resources= iprv.v1;
+	return iprv.status;
+}
+
+static inline s64
+ia64_pal_mc_error_inject_virt (u64 err_type_info, u64 err_struct_info,
+			u64 err_data_buffer, u64 *capabilities, u64 *resources)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_STK(iprv, PAL_MC_ERROR_INJECT, err_type_info,
+			  err_struct_info, err_data_buffer);
+	if (capabilities)
+		*capabilities= iprv.v0;
+	if (resources)
+		*resources= iprv.v1;
+	return iprv.status;
+}
+
+/* Inform PALE_CHECK whether a machine check is expected so that PALE_CHECK willnot
+ * attempt to correct any expected machine checks.
+ */
+static inline s64
+ia64_pal_mc_expected (u64 expected, u64 *previous)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_EXPECTED, expected, 0, 0);
+	if (previous)
+		*previous = iprv.v0;
+	return iprv.status;
+}
+
+typedef union pal_hw_tracking_u {
+	u64			pht_data;
+	struct {
+		u64		itc	:4,	/* Instruction cache tracking */
+				dct	:4,	/* Date cache tracking */
+				itt	:4,	/* Instruction TLB tracking */
+				ddt	:4,	/* Data TLB tracking */
+				reserved:48;
+	} pal_hw_tracking_s;
+} pal_hw_tracking_u_t;
+
+/*
+ * Hardware tracking status.
+ */
+static inline s64
+ia64_pal_mc_hw_tracking (u64 *status)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_HW_TRACKING, 0, 0, 0);
+	if (status)
+		*status = iprv.v0;
+	return iprv.status;
+}
+
+/* Register a platform dependent location with PAL to which it can save
+ * minimal processor state in the event of a machine check or initialization
+ * event.
+ */
+static inline s64
+ia64_pal_mc_register_mem (u64 physical_addr, u64 size, u64 *req_size)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_REGISTER_MEM, physical_addr, size, 0);
+	if (req_size)
+		*req_size = iprv.v0;
+	return iprv.status;
+}
+
+/* Restore minimal architectural processor state, set CMC interrupt if necessary
+ * and resume execution
+ */
+static inline s64
+ia64_pal_mc_resume (u64 set_cmci, u64 save_ptr)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_RESUME, set_cmci, save_ptr, 0);
+	return iprv.status;
+}
+
+/* Return the memory attributes implemented by the processor */
+static inline s64
+ia64_pal_mem_attrib (u64 *mem_attrib)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MEM_ATTRIB, 0, 0, 0);
+	if (mem_attrib)
+		*mem_attrib = iprv.v0 & 0xff;
+	return iprv.status;
+}
+
+/* Return the amount of memory needed for second phase of processor
+ * self-test and the required alignment of memory.
+ */
+static inline s64
+ia64_pal_mem_for_test (u64 *bytes_needed, u64 *alignment)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MEM_FOR_TEST, 0, 0, 0);
+	if (bytes_needed)
+		*bytes_needed = iprv.v0;
+	if (alignment)
+		*alignment = iprv.v1;
+	return iprv.status;
+}
+
+typedef union pal_perf_mon_info_u {
+	u64			  ppmi_data;
+	struct {
+	       u64		generic		: 8,
+				width		: 8,
+				cycles		: 8,
+				retired		: 8,
+				reserved	: 32;
+	} pal_perf_mon_info_s;
+} pal_perf_mon_info_u_t;
+
+/* Return the performance monitor information about what can be counted
+ * and how to configure the monitors to count the desired events.
+ */
+static inline s64
+ia64_pal_perf_mon_info (u64 *pm_buffer, pal_perf_mon_info_u_t *pm_info)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_PERF_MON_INFO, (unsigned long) pm_buffer, 0, 0);
+	if (pm_info)
+		pm_info->ppmi_data = iprv.v0;
+	return iprv.status;
+}
+
+/* Specifies the physical address of the processor interrupt block
+ * and I/O port space.
+ */
+static inline s64
+ia64_pal_platform_addr (u64 type, u64 physical_addr)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_PLATFORM_ADDR, type, physical_addr, 0);
+	return iprv.status;
+}
+
+/* Set the SAL PMI entrypoint in memory */
+static inline s64
+ia64_pal_pmi_entrypoint (u64 sal_pmi_entry_addr)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_PMI_ENTRYPOINT, sal_pmi_entry_addr, 0, 0);
+	return iprv.status;
+}
+
+struct pal_features_s;
+/* Provide information about configurable processor features */
+static inline s64
+ia64_pal_proc_get_features (u64 *features_avail,
+			    u64 *features_status,
+			    u64 *features_control,
+			    u64 features_set)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, features_set, 0);
+	if (iprv.status == 0) {
+		*features_avail   = iprv.v0;
+		*features_status  = iprv.v1;
+		*features_control = iprv.v2;
+	}
+	return iprv.status;
+}
+
+/* Enable/disable processor dependent features */
+static inline s64
+ia64_pal_proc_set_features (u64 feature_select)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES, feature_select, 0, 0);
+	return iprv.status;
+}
+
+/*
+ * Put everything in a struct so we avoid the global offset table whenever
+ * possible.
+ */
+typedef struct ia64_ptce_info_s {
+	unsigned long	base;
+	u32		count[2];
+	u32		stride[2];
+} ia64_ptce_info_t;
+
+/* Return the information required for the architected loop used to purge
+ * (initialize) the entire TC
+ */
+static inline s64
+ia64_get_ptce (ia64_ptce_info_t *ptce)
+{
+	struct ia64_pal_retval iprv;
+
+	if (!ptce)
+		return -1;
+
+	PAL_CALL(iprv, PAL_PTCE_INFO, 0, 0, 0);
+	if (iprv.status == 0) {
+		ptce->base = iprv.v0;
+		ptce->count[0] = iprv.v1 >> 32;
+		ptce->count[1] = iprv.v1 & 0xffffffff;
+		ptce->stride[0] = iprv.v2 >> 32;
+		ptce->stride[1] = iprv.v2 & 0xffffffff;
+	}
+	return iprv.status;
+}
+
+/* Return info about implemented application and control registers. */
+static inline s64
+ia64_pal_register_info (u64 info_request, u64 *reg_info_1, u64 *reg_info_2)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_REGISTER_INFO, info_request, 0, 0);
+	if (reg_info_1)
+		*reg_info_1 = iprv.v0;
+	if (reg_info_2)
+		*reg_info_2 = iprv.v1;
+	return iprv.status;
+}
+
+typedef union pal_hints_u {
+	unsigned long		ph_data;
+	struct {
+	       unsigned long	si		: 1,
+				li		: 1,
+				reserved	: 62;
+	} pal_hints_s;
+} pal_hints_u_t;
+
+/* Return information about the register stack and RSE for this processor
+ * implementation.
+ */
+static inline long ia64_pal_rse_info(unsigned long *num_phys_stacked,
+							pal_hints_u_t *hints)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_RSE_INFO, 0, 0, 0);
+	if (num_phys_stacked)
+		*num_phys_stacked = iprv.v0;
+	if (hints)
+		hints->ph_data = iprv.v1;
+	return iprv.status;
+}
+
+/*
+ * Set the current hardware resource sharing policy of the processor
+ */
+static inline s64
+ia64_pal_set_hw_policy (u64 policy)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_SET_HW_POLICY, policy, 0, 0);
+	return iprv.status;
+}
+
+/* Cause the processor to enter	SHUTDOWN state, where prefetching and execution are
+ * suspended, but cause cache and TLB coherency to be maintained.
+ * This is usually called in IA-32 mode.
+ */
+static inline s64
+ia64_pal_shutdown (void)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_SHUTDOWN, 0, 0, 0);
+	return iprv.status;
+}
+
+/* Perform the second phase of processor self-test. */
+static inline s64
+ia64_pal_test_proc (u64 test_addr, u64 test_size, u64 attributes, u64 *self_test_state)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_TEST_PROC, test_addr, test_size, attributes);
+	if (self_test_state)
+		*self_test_state = iprv.v0;
+	return iprv.status;
+}
+
+typedef union  pal_version_u {
+	u64	pal_version_val;
+	struct {
+		u64	pv_pal_b_rev		:	8;
+		u64	pv_pal_b_model		:	8;
+		u64	pv_reserved1		:	8;
+		u64	pv_pal_vendor		:	8;
+		u64	pv_pal_a_rev		:	8;
+		u64	pv_pal_a_model		:	8;
+		u64	pv_reserved2		:	16;
+	} pal_version_s;
+} pal_version_u_t;
+
+
+/*
+ * Return PAL version information.  While the documentation states that
+ * PAL_VERSION can be called in either physical or virtual mode, some
+ * implementations only allow physical calls.  We don't call it very often,
+ * so the overhead isn't worth eliminating.
+ */
+static inline s64
+ia64_pal_version (pal_version_u_t *pal_min_version, pal_version_u_t *pal_cur_version)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS(iprv, PAL_VERSION, 0, 0, 0);
+	if (pal_min_version)
+		pal_min_version->pal_version_val = iprv.v0;
+
+	if (pal_cur_version)
+		pal_cur_version->pal_version_val = iprv.v1;
+
+	return iprv.status;
+}
+
+typedef union pal_tc_info_u {
+	u64			pti_val;
+	struct {
+	       u64		num_sets	:	8,
+				associativity	:	8,
+				num_entries	:	16,
+				pf		:	1,
+				unified		:	1,
+				reduce_tr	:	1,
+				reserved	:	29;
+	} pal_tc_info_s;
+} pal_tc_info_u_t;
+
+#define tc_reduce_tr		pal_tc_info_s.reduce_tr
+#define tc_unified		pal_tc_info_s.unified
+#define tc_pf			pal_tc_info_s.pf
+#define tc_num_entries		pal_tc_info_s.num_entries
+#define tc_associativity	pal_tc_info_s.associativity
+#define tc_num_sets		pal_tc_info_s.num_sets
+
+
+/* Return information about the virtual memory characteristics of the processor
+ * implementation.
+ */
+static inline s64
+ia64_pal_vm_info (u64 tc_level, u64 tc_type,  pal_tc_info_u_t *tc_info, u64 *tc_pages)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_VM_INFO, tc_level, tc_type, 0);
+	if (tc_info)
+		tc_info->pti_val = iprv.v0;
+	if (tc_pages)
+		*tc_pages = iprv.v1;
+	return iprv.status;
+}
+
+/* Get page size information about the virtual memory characteristics of the processor
+ * implementation.
+ */
+static inline s64 ia64_pal_vm_page_size(u64 *tr_pages, u64 *vw_pages)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_VM_PAGE_SIZE, 0, 0, 0);
+	if (tr_pages)
+		*tr_pages = iprv.v0;
+	if (vw_pages)
+		*vw_pages = iprv.v1;
+	return iprv.status;
+}
+
+typedef union pal_vm_info_1_u {
+	u64			pvi1_val;
+	struct {
+		u64		vw		: 1,
+				phys_add_size	: 7,
+				key_size	: 8,
+				max_pkr		: 8,
+				hash_tag_id	: 8,
+				max_dtr_entry	: 8,
+				max_itr_entry	: 8,
+				max_unique_tcs	: 8,
+				num_tc_levels	: 8;
+	} pal_vm_info_1_s;
+} pal_vm_info_1_u_t;
+
+#define PAL_MAX_PURGES		0xFFFF		/* all ones is means unlimited */
+
+typedef union pal_vm_info_2_u {
+	u64			pvi2_val;
+	struct {
+		u64		impl_va_msb	: 8,
+				rid_size	: 8,
+				max_purges	: 16,
+				reserved	: 32;
+	} pal_vm_info_2_s;
+} pal_vm_info_2_u_t;
+
+/* Get summary information about the virtual memory characteristics of the processor
+ * implementation.
+ */
+static inline s64
+ia64_pal_vm_summary (pal_vm_info_1_u_t *vm_info_1, pal_vm_info_2_u_t *vm_info_2)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_VM_SUMMARY, 0, 0, 0);
+	if (vm_info_1)
+		vm_info_1->pvi1_val = iprv.v0;
+	if (vm_info_2)
+		vm_info_2->pvi2_val = iprv.v1;
+	return iprv.status;
+}
+
+typedef union pal_vp_info_u {
+	u64			pvi_val;
+	struct {
+		u64		index:		48,	/* virtual feature set info */
+				vmm_id:		16;	/* feature set id */
+	} pal_vp_info_s;
+} pal_vp_info_u_t;
+
+/*
+ * Returns information about virtual processor features
+ */
+static inline s64
+ia64_pal_vp_info (u64 feature_set, u64 vp_buffer, u64 *vp_info, u64 *vmm_id)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_VP_INFO, feature_set, vp_buffer, 0);
+	if (vp_info)
+		*vp_info = iprv.v0;
+	if (vmm_id)
+		*vmm_id = iprv.v1;
+	return iprv.status;
+}
+
+typedef union pal_itr_valid_u {
+	u64			piv_val;
+	struct {
+	       u64		access_rights_valid	: 1,
+				priv_level_valid	: 1,
+				dirty_bit_valid		: 1,
+				mem_attr_valid		: 1,
+				reserved		: 60;
+	} pal_tr_valid_s;
+} pal_tr_valid_u_t;
+
+/* Read a translation register */
+static inline s64
+ia64_pal_tr_read (u64 reg_num, u64 tr_type, u64 *tr_buffer, pal_tr_valid_u_t *tr_valid)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS_STK(iprv, PAL_VM_TR_READ, reg_num, tr_type,(u64)ia64_tpa(tr_buffer));
+	if (tr_valid)
+		tr_valid->piv_val = iprv.v0;
+	return iprv.status;
+}
+
+/*
+ * PAL_PREFETCH_VISIBILITY transaction types
+ */
+#define PAL_VISIBILITY_VIRTUAL		0
+#define PAL_VISIBILITY_PHYSICAL		1
+
+/*
+ * PAL_PREFETCH_VISIBILITY return codes
+ */
+#define PAL_VISIBILITY_OK		1
+#define PAL_VISIBILITY_OK_REMOTE_NEEDED	0
+#define PAL_VISIBILITY_INVAL_ARG	-2
+#define PAL_VISIBILITY_ERROR		-3
+
+static inline s64
+ia64_pal_prefetch_visibility (s64 trans_type)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_PREFETCH_VISIBILITY, trans_type, 0, 0);
+	return iprv.status;
+}
+
+/* data structure for getting information on logical to physical mappings */
+typedef union pal_log_overview_u {
+	struct {
+		u64	num_log		:16,	/* Total number of logical
+						 * processors on this die
+						 */
+			tpc		:8,	/* Threads per core */
+			reserved3	:8,	/* Reserved */
+			cpp		:8,	/* Cores per processor */
+			reserved2	:8,	/* Reserved */
+			ppid		:8,	/* Physical processor ID */
+			reserved1	:8;	/* Reserved */
+	} overview_bits;
+	u64 overview_data;
+} pal_log_overview_t;
+
+typedef union pal_proc_n_log_info1_u{
+	struct {
+		u64	tid		:16,	/* Thread id */
+			reserved2	:16,	/* Reserved */
+			cid		:16,	/* Core id */
+			reserved1	:16;	/* Reserved */
+	} ppli1_bits;
+	u64	ppli1_data;
+} pal_proc_n_log_info1_t;
+
+typedef union pal_proc_n_log_info2_u {
+	struct {
+		u64	la		:16,	/* Logical address */
+			reserved	:48;	/* Reserved */
+	} ppli2_bits;
+	u64	ppli2_data;
+} pal_proc_n_log_info2_t;
+
+typedef struct pal_logical_to_physical_s
+{
+	pal_log_overview_t overview;
+	pal_proc_n_log_info1_t ppli1;
+	pal_proc_n_log_info2_t ppli2;
+} pal_logical_to_physical_t;
+
+#define overview_num_log	overview.overview_bits.num_log
+#define overview_tpc		overview.overview_bits.tpc
+#define overview_cpp		overview.overview_bits.cpp
+#define overview_ppid		overview.overview_bits.ppid
+#define log1_tid		ppli1.ppli1_bits.tid
+#define log1_cid		ppli1.ppli1_bits.cid
+#define log2_la			ppli2.ppli2_bits.la
+
+/* Get information on logical to physical processor mappings. */
+static inline s64
+ia64_pal_logical_to_phys(u64 proc_number, pal_logical_to_physical_t *mapping)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL(iprv, PAL_LOGICAL_TO_PHYSICAL, proc_number, 0, 0);
+
+	if (iprv.status == PAL_STATUS_SUCCESS)
+	{
+		mapping->overview.overview_data = iprv.v0;
+		mapping->ppli1.ppli1_data = iprv.v1;
+		mapping->ppli2.ppli2_data = iprv.v2;
+	}
+
+	return iprv.status;
+}
+
+typedef struct pal_cache_shared_info_s
+{
+	u64 num_shared;
+	pal_proc_n_log_info1_t ppli1;
+	pal_proc_n_log_info2_t ppli2;
+} pal_cache_shared_info_t;
+
+/* Get information on logical to physical processor mappings. */
+static inline s64
+ia64_pal_cache_shared_info(u64 level,
+		u64 type,
+		u64 proc_number,
+		pal_cache_shared_info_t *info)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL(iprv, PAL_CACHE_SHARED_INFO, level, type, proc_number);
+
+	if (iprv.status == PAL_STATUS_SUCCESS) {
+		info->num_shared = iprv.v0;
+		info->ppli1.ppli1_data = iprv.v1;
+		info->ppli2.ppli2_data = iprv.v2;
+	}
+
+	return iprv.status;
+}
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_IA64_PAL_H */
diff --git a/arch/ia64/include/asm/param.h b/arch/ia64/include/asm/param.h
new file mode 100644
index 00000000..0964c32c
--- /dev/null
+++ b/arch/ia64/include/asm/param.h
@@ -0,0 +1,33 @@
+#ifndef _ASM_IA64_PARAM_H
+#define _ASM_IA64_PARAM_H
+
+/*
+ * Fundamental kernel parameters.
+ *
+ * Based on <asm-i386/param.h>.
+ *
+ * Modified 1998, 1999, 2002-2003
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+
+#define EXEC_PAGESIZE	65536
+
+#ifndef NOGROUP
+# define NOGROUP	(-1)
+#endif
+
+#define MAXHOSTNAMELEN	64	/* max length of hostname */
+
+#ifdef __KERNEL__
+# define HZ		CONFIG_HZ
+# define USER_HZ	HZ
+# define CLOCKS_PER_SEC	HZ	/* frequency at which times() counts */
+#else
+   /*
+    * Technically, this is wrong, but some old apps still refer to it.  The proper way to
+    * get the HZ value is via sysconf(_SC_CLK_TCK).
+    */
+# define HZ 1024
+#endif
+
+#endif /* _ASM_IA64_PARAM_H */
diff --git a/arch/ia64/include/asm/paravirt.h b/arch/ia64/include/asm/paravirt.h
new file mode 100644
index 00000000..2eb0a981
--- /dev/null
+++ b/arch/ia64/include/asm/paravirt.h
@@ -0,0 +1,318 @@
+/******************************************************************************
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+
+#ifndef __ASM_PARAVIRT_H
+#define __ASM_PARAVIRT_H
+
+#ifndef __ASSEMBLY__
+/******************************************************************************
+ * fsys related addresses
+ */
+struct pv_fsys_data {
+	unsigned long *fsyscall_table;
+	void *fsys_bubble_down;
+};
+
+extern struct pv_fsys_data pv_fsys_data;
+
+unsigned long *paravirt_get_fsyscall_table(void);
+char *paravirt_get_fsys_bubble_down(void);
+
+/******************************************************************************
+ * patchlist addresses for gate page
+ */
+enum pv_gate_patchlist {
+	PV_GATE_START_FSYSCALL,
+	PV_GATE_END_FSYSCALL,
+
+	PV_GATE_START_BRL_FSYS_BUBBLE_DOWN,
+	PV_GATE_END_BRL_FSYS_BUBBLE_DOWN,
+
+	PV_GATE_START_VTOP,
+	PV_GATE_END_VTOP,
+
+	PV_GATE_START_MCKINLEY_E9,
+	PV_GATE_END_MCKINLEY_E9,
+};
+
+struct pv_patchdata {
+	unsigned long start_fsyscall_patchlist;
+	unsigned long end_fsyscall_patchlist;
+	unsigned long start_brl_fsys_bubble_down_patchlist;
+	unsigned long end_brl_fsys_bubble_down_patchlist;
+	unsigned long start_vtop_patchlist;
+	unsigned long end_vtop_patchlist;
+	unsigned long start_mckinley_e9_patchlist;
+	unsigned long end_mckinley_e9_patchlist;
+
+	void *gate_section;
+};
+
+extern struct pv_patchdata pv_patchdata;
+
+unsigned long paravirt_get_gate_patchlist(enum pv_gate_patchlist type);
+void *paravirt_get_gate_section(void);
+#endif
+
+#ifdef CONFIG_PARAVIRT_GUEST
+
+#define PARAVIRT_HYPERVISOR_TYPE_DEFAULT	0
+#define PARAVIRT_HYPERVISOR_TYPE_XEN		1
+
+#ifndef __ASSEMBLY__
+
+#include <asm/hw_irq.h>
+#include <asm/meminit.h>
+
+/******************************************************************************
+ * general info
+ */
+struct pv_info {
+	unsigned int kernel_rpl;
+	int paravirt_enabled;
+	const char *name;
+};
+
+extern struct pv_info pv_info;
+
+static inline int paravirt_enabled(void)
+{
+	return pv_info.paravirt_enabled;
+}
+
+static inline unsigned int get_kernel_rpl(void)
+{
+	return pv_info.kernel_rpl;
+}
+
+/******************************************************************************
+ * initialization hooks.
+ */
+struct rsvd_region;
+
+struct pv_init_ops {
+	void (*banner)(void);
+
+	int (*reserve_memory)(struct rsvd_region *region);
+
+	void (*arch_setup_early)(void);
+	void (*arch_setup_console)(char **cmdline_p);
+	int (*arch_setup_nomca)(void);
+
+	void (*post_smp_prepare_boot_cpu)(void);
+
+#ifdef ASM_SUPPORTED
+	unsigned long (*patch_bundle)(void *sbundle, void *ebundle,
+				      unsigned long type);
+	unsigned long (*patch_inst)(unsigned long stag, unsigned long etag,
+				    unsigned long type);
+#endif
+	void (*patch_branch)(unsigned long tag, unsigned long type);
+};
+
+extern struct pv_init_ops pv_init_ops;
+
+static inline void paravirt_banner(void)
+{
+	if (pv_init_ops.banner)
+		pv_init_ops.banner();
+}
+
+static inline int paravirt_reserve_memory(struct rsvd_region *region)
+{
+	if (pv_init_ops.reserve_memory)
+		return pv_init_ops.reserve_memory(region);
+	return 0;
+}
+
+static inline void paravirt_arch_setup_early(void)
+{
+	if (pv_init_ops.arch_setup_early)
+		pv_init_ops.arch_setup_early();
+}
+
+static inline void paravirt_arch_setup_console(char **cmdline_p)
+{
+	if (pv_init_ops.arch_setup_console)
+		pv_init_ops.arch_setup_console(cmdline_p);
+}
+
+static inline int paravirt_arch_setup_nomca(void)
+{
+	if (pv_init_ops.arch_setup_nomca)
+		return pv_init_ops.arch_setup_nomca();
+	return 0;
+}
+
+static inline void paravirt_post_smp_prepare_boot_cpu(void)
+{
+	if (pv_init_ops.post_smp_prepare_boot_cpu)
+		pv_init_ops.post_smp_prepare_boot_cpu();
+}
+
+/******************************************************************************
+ * replacement of iosapic operations.
+ */
+
+struct pv_iosapic_ops {
+	void (*pcat_compat_init)(void);
+
+	struct irq_chip *(*__get_irq_chip)(unsigned long trigger);
+
+	unsigned int (*__read)(char __iomem *iosapic, unsigned int reg);
+	void (*__write)(char __iomem *iosapic, unsigned int reg, u32 val);
+};
+
+extern struct pv_iosapic_ops pv_iosapic_ops;
+
+static inline void
+iosapic_pcat_compat_init(void)
+{
+	if (pv_iosapic_ops.pcat_compat_init)
+		pv_iosapic_ops.pcat_compat_init();
+}
+
+static inline struct irq_chip*
+iosapic_get_irq_chip(unsigned long trigger)
+{
+	return pv_iosapic_ops.__get_irq_chip(trigger);
+}
+
+static inline unsigned int
+__iosapic_read(char __iomem *iosapic, unsigned int reg)
+{
+	return pv_iosapic_ops.__read(iosapic, reg);
+}
+
+static inline void
+__iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
+{
+	return pv_iosapic_ops.__write(iosapic, reg, val);
+}
+
+/******************************************************************************
+ * replacement of irq operations.
+ */
+
+struct pv_irq_ops {
+	void (*register_ipi)(void);
+
+	int (*assign_irq_vector)(int irq);
+	void (*free_irq_vector)(int vector);
+
+	void (*register_percpu_irq)(ia64_vector vec,
+				    struct irqaction *action);
+
+	void (*resend_irq)(unsigned int vector);
+};
+
+extern struct pv_irq_ops pv_irq_ops;
+
+static inline void
+ia64_register_ipi(void)
+{
+	pv_irq_ops.register_ipi();
+}
+
+static inline int
+assign_irq_vector(int irq)
+{
+	return pv_irq_ops.assign_irq_vector(irq);
+}
+
+static inline void
+free_irq_vector(int vector)
+{
+	return pv_irq_ops.free_irq_vector(vector);
+}
+
+static inline void
+register_percpu_irq(ia64_vector vec, struct irqaction *action)
+{
+	pv_irq_ops.register_percpu_irq(vec, action);
+}
+
+static inline void
+ia64_resend_irq(unsigned int vector)
+{
+	pv_irq_ops.resend_irq(vector);
+}
+
+/******************************************************************************
+ * replacement of time operations.
+ */
+
+extern struct itc_jitter_data_t itc_jitter_data;
+extern volatile int time_keeper_id;
+
+struct pv_time_ops {
+	void (*init_missing_ticks_accounting)(int cpu);
+	int (*do_steal_accounting)(unsigned long *new_itm);
+
+	void (*clocksource_resume)(void);
+
+	unsigned long long (*sched_clock)(void);
+};
+
+extern struct pv_time_ops pv_time_ops;
+
+static inline void
+paravirt_init_missing_ticks_accounting(int cpu)
+{
+	if (pv_time_ops.init_missing_ticks_accounting)
+		pv_time_ops.init_missing_ticks_accounting(cpu);
+}
+
+static inline int
+paravirt_do_steal_accounting(unsigned long *new_itm)
+{
+	return pv_time_ops.do_steal_accounting(new_itm);
+}
+
+static inline unsigned long long paravirt_sched_clock(void)
+{
+	return pv_time_ops.sched_clock();
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#else
+/* fallback for native case */
+
+#ifndef __ASSEMBLY__
+
+#define paravirt_banner()				do { } while (0)
+#define paravirt_reserve_memory(region)			0
+
+#define paravirt_arch_setup_early()			do { } while (0)
+#define paravirt_arch_setup_console(cmdline_p)		do { } while (0)
+#define paravirt_arch_setup_nomca()			0
+#define paravirt_post_smp_prepare_boot_cpu()		do { } while (0)
+
+#define paravirt_init_missing_ticks_accounting(cpu)	do { } while (0)
+#define paravirt_do_steal_accounting(new_itm)		0
+
+#endif /* __ASSEMBLY__ */
+
+
+#endif /* CONFIG_PARAVIRT_GUEST */
+
+#endif /* __ASM_PARAVIRT_H */
diff --git a/arch/ia64/include/asm/paravirt_patch.h b/arch/ia64/include/asm/paravirt_patch.h
new file mode 100644
index 00000000..128ff5db
--- /dev/null
+++ b/arch/ia64/include/asm/paravirt_patch.h
@@ -0,0 +1,143 @@
+/******************************************************************************
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef __ASM_PARAVIRT_PATCH_H
+#define __ASM_PARAVIRT_PATCH_H
+
+#ifdef __ASSEMBLY__
+
+	.section .paravirt_branches, "a"
+	.previous
+#define PARAVIRT_PATCH_SITE_BR(type)		\
+	{					\
+	[1:] ;					\
+	br.cond.sptk.many 2f ;			\
+	nop.b 0 ;				\
+	nop.b 0;; ;				\
+	} ;					\
+	2:					\
+	.xdata8 ".paravirt_branches", 1b, type
+
+#else
+
+#include <linux/stringify.h>
+#include <asm/intrinsics.h>
+
+/* for binary patch */
+struct paravirt_patch_site_bundle {
+	void		*sbundle;
+	void		*ebundle;
+	unsigned long	type;
+};
+
+/* label means the beginning of new bundle */
+#define paravirt_alt_bundle(instr, privop)				\
+	"\t998:\n"							\
+	"\t" instr "\n"							\
+	"\t999:\n"							\
+	"\t.pushsection .paravirt_bundles, \"a\"\n"			\
+	"\t.popsection\n"						\
+	"\t.xdata8 \".paravirt_bundles\", 998b, 999b, "			\
+	__stringify(privop) "\n"
+
+
+struct paravirt_patch_bundle_elem {
+	const void	*sbundle;
+	const void	*ebundle;
+	unsigned long	type;
+};
+
+
+struct paravirt_patch_site_inst {
+	unsigned long	stag;
+	unsigned long	etag;
+	unsigned long	type;
+};
+
+#define paravirt_alt_inst(instr, privop)				\
+	"\t[998:]\n"							\
+	"\t" instr "\n"							\
+	"\t[999:]\n"							\
+	"\t.pushsection .paravirt_insts, \"a\"\n"			\
+	"\t.popsection\n"						\
+	"\t.xdata8 \".paravirt_insts\", 998b, 999b, "			\
+	__stringify(privop) "\n"
+
+struct paravirt_patch_site_branch {
+	unsigned long	tag;
+	unsigned long	type;
+};
+
+struct paravirt_patch_branch_target {
+	const void	*entry;
+	unsigned long	type;
+};
+
+void
+__paravirt_patch_apply_branch(
+	unsigned long tag, unsigned long type,
+	const struct paravirt_patch_branch_target *entries,
+	unsigned int nr_entries);
+
+void
+paravirt_patch_reloc_br(unsigned long tag, const void *target);
+
+void
+paravirt_patch_reloc_brl(unsigned long tag, const void *target);
+
+
+#if defined(ASM_SUPPORTED) && defined(CONFIG_PARAVIRT)
+unsigned long
+ia64_native_patch_bundle(void *sbundle, void *ebundle, unsigned long type);
+
+unsigned long
+__paravirt_patch_apply_bundle(void *sbundle, void *ebundle, unsigned long type,
+			      const struct paravirt_patch_bundle_elem *elems,
+			      unsigned long nelems,
+			      const struct paravirt_patch_bundle_elem **found);
+
+void
+paravirt_patch_apply_bundle(const struct paravirt_patch_site_bundle *start,
+			    const struct paravirt_patch_site_bundle *end);
+
+void
+paravirt_patch_apply_inst(const struct paravirt_patch_site_inst *start,
+			  const struct paravirt_patch_site_inst *end);
+
+void paravirt_patch_apply(void);
+#else
+#define paravirt_patch_apply_bundle(start, end)	do { } while (0)
+#define paravirt_patch_apply_inst(start, end)	do { } while (0)
+#define paravirt_patch_apply()			do { } while (0)
+#endif
+
+#endif /* !__ASSEMBLEY__ */
+
+#endif /* __ASM_PARAVIRT_PATCH_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "linux"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/arch/ia64/include/asm/paravirt_privop.h b/arch/ia64/include/asm/paravirt_privop.h
new file mode 100644
index 00000000..8f6cb11c
--- /dev/null
+++ b/arch/ia64/include/asm/paravirt_privop.h
@@ -0,0 +1,479 @@
+/******************************************************************************
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef _ASM_IA64_PARAVIRT_PRIVOP_H
+#define _ASM_IA64_PARAVIRT_PRIVOP_H
+
+#ifdef CONFIG_PARAVIRT
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <asm/kregs.h> /* for IA64_PSR_I */
+
+/******************************************************************************
+ * replacement of intrinsics operations.
+ */
+
+struct pv_cpu_ops {
+	void (*fc)(void *addr);
+	unsigned long (*thash)(unsigned long addr);
+	unsigned long (*get_cpuid)(int index);
+	unsigned long (*get_pmd)(int index);
+	unsigned long (*getreg)(int reg);
+	void (*setreg)(int reg, unsigned long val);
+	void (*ptcga)(unsigned long addr, unsigned long size);
+	unsigned long (*get_rr)(unsigned long index);
+	void (*set_rr)(unsigned long index, unsigned long val);
+	void (*set_rr0_to_rr4)(unsigned long val0, unsigned long val1,
+			       unsigned long val2, unsigned long val3,
+			       unsigned long val4);
+	void (*ssm_i)(void);
+	void (*rsm_i)(void);
+	unsigned long (*get_psr_i)(void);
+	void (*intrin_local_irq_restore)(unsigned long flags);
+};
+
+extern struct pv_cpu_ops pv_cpu_ops;
+
+extern void ia64_native_setreg_func(int regnum, unsigned long val);
+extern unsigned long ia64_native_getreg_func(int regnum);
+
+/************************************************/
+/* Instructions paravirtualized for performance */
+/************************************************/
+
+#ifndef ASM_SUPPORTED
+#define paravirt_ssm_i()	pv_cpu_ops.ssm_i()
+#define paravirt_rsm_i()	pv_cpu_ops.rsm_i()
+#define __paravirt_getreg()	pv_cpu_ops.getreg()
+#endif
+
+/* mask for ia64_native_ssm/rsm() must be constant.("i" constraing).
+ * static inline function doesn't satisfy it. */
+#define paravirt_ssm(mask)			\
+	do {					\
+		if ((mask) == IA64_PSR_I)	\
+			paravirt_ssm_i();	\
+		else				\
+			ia64_native_ssm(mask);	\
+	} while (0)
+
+#define paravirt_rsm(mask)			\
+	do {					\
+		if ((mask) == IA64_PSR_I)	\
+			paravirt_rsm_i();	\
+		else				\
+			ia64_native_rsm(mask);	\
+	} while (0)
+
+/* returned ip value should be the one in the caller,
+ * not in __paravirt_getreg() */
+#define paravirt_getreg(reg)					\
+	({							\
+		unsigned long res;				\
+		if ((reg) == _IA64_REG_IP)			\
+			res = ia64_native_getreg(_IA64_REG_IP); \
+		else						\
+			res = __paravirt_getreg(reg);		\
+		res;						\
+	})
+
+/******************************************************************************
+ * replacement of hand written assembly codes.
+ */
+struct pv_cpu_asm_switch {
+	unsigned long switch_to;
+	unsigned long leave_syscall;
+	unsigned long work_processed_syscall;
+	unsigned long leave_kernel;
+};
+void paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch);
+
+#endif /* __ASSEMBLY__ */
+
+#define IA64_PARAVIRT_ASM_FUNC(name)	paravirt_ ## name
+
+#else
+
+/* fallback for native case */
+#define IA64_PARAVIRT_ASM_FUNC(name)	ia64_native_ ## name
+
+#endif /* CONFIG_PARAVIRT */
+
+#if defined(CONFIG_PARAVIRT) && defined(ASM_SUPPORTED)
+#define paravirt_dv_serialize_data()	ia64_dv_serialize_data()
+#else
+#define paravirt_dv_serialize_data()	/* nothing */
+#endif
+
+/* these routines utilize privilege-sensitive or performance-sensitive
+ * privileged instructions so the code must be replaced with
+ * paravirtualized versions */
+#define ia64_switch_to			IA64_PARAVIRT_ASM_FUNC(switch_to)
+#define ia64_leave_syscall		IA64_PARAVIRT_ASM_FUNC(leave_syscall)
+#define ia64_work_processed_syscall	\
+	IA64_PARAVIRT_ASM_FUNC(work_processed_syscall)
+#define ia64_leave_kernel		IA64_PARAVIRT_ASM_FUNC(leave_kernel)
+
+
+#if defined(CONFIG_PARAVIRT)
+/******************************************************************************
+ * binary patching infrastructure
+ */
+#define PARAVIRT_PATCH_TYPE_FC				1
+#define PARAVIRT_PATCH_TYPE_THASH			2
+#define PARAVIRT_PATCH_TYPE_GET_CPUID			3
+#define PARAVIRT_PATCH_TYPE_GET_PMD			4
+#define PARAVIRT_PATCH_TYPE_PTCGA			5
+#define PARAVIRT_PATCH_TYPE_GET_RR			6
+#define PARAVIRT_PATCH_TYPE_SET_RR			7
+#define PARAVIRT_PATCH_TYPE_SET_RR0_TO_RR4		8
+#define PARAVIRT_PATCH_TYPE_SSM_I			9
+#define PARAVIRT_PATCH_TYPE_RSM_I			10
+#define PARAVIRT_PATCH_TYPE_GET_PSR_I			11
+#define PARAVIRT_PATCH_TYPE_INTRIN_LOCAL_IRQ_RESTORE	12
+
+/* PARAVIRT_PATY_TYPE_[GS]ETREG + _IA64_REG_xxx */
+#define PARAVIRT_PATCH_TYPE_GETREG			0x10000000
+#define PARAVIRT_PATCH_TYPE_SETREG			0x20000000
+
+/*
+ * struct task_struct* (*ia64_switch_to)(void* next_task);
+ * void *ia64_leave_syscall;
+ * void *ia64_work_processed_syscall
+ * void *ia64_leave_kernel;
+ */
+
+#define PARAVIRT_PATCH_TYPE_BR_START			0x30000000
+#define PARAVIRT_PATCH_TYPE_BR_SWITCH_TO		\
+	(PARAVIRT_PATCH_TYPE_BR_START + 0)
+#define PARAVIRT_PATCH_TYPE_BR_LEAVE_SYSCALL		\
+	(PARAVIRT_PATCH_TYPE_BR_START + 1)
+#define PARAVIRT_PATCH_TYPE_BR_WORK_PROCESSED_SYSCALL	\
+	(PARAVIRT_PATCH_TYPE_BR_START + 2)
+#define PARAVIRT_PATCH_TYPE_BR_LEAVE_KERNEL		\
+	(PARAVIRT_PATCH_TYPE_BR_START + 3)
+
+#ifdef ASM_SUPPORTED
+#include <asm/paravirt_patch.h>
+
+/*
+ * pv_cpu_ops calling stub.
+ * normal function call convension can't be written by gcc
+ * inline assembly.
+ *
+ * from the caller's point of view,
+ * the following registers will be clobbered.
+ * r2, r3
+ * r8-r15
+ * r16, r17
+ * b6, b7
+ * p6-p15
+ * ar.ccv
+ *
+ * from the callee's point of view ,
+ * the following registers can be used.
+ * r2, r3: scratch
+ * r8: scratch, input argument0 and return value
+ * r0-r15: scratch, input argument1-5
+ * b6: return pointer
+ * b7: scratch
+ * p6-p15: scratch
+ * ar.ccv: scratch
+ *
+ * other registers must not be changed. especially
+ * b0: rp: preserved. gcc ignores b0 in clobbered register.
+ * r16: saved gp
+ */
+/* 5 bundles */
+#define __PARAVIRT_BR							\
+	";;\n"								\
+	"{ .mlx\n"							\
+	"nop 0\n"							\
+	"movl r2 = %[op_addr]\n"/* get function pointer address */	\
+	";;\n"								\
+	"}\n"								\
+	"1:\n"								\
+	"{ .mii\n"							\
+	"ld8 r2 = [r2]\n"	/* load function descriptor address */	\
+	"mov r17 = ip\n"	/* get ip to calc return address */	\
+	"mov r16 = gp\n"	/* save gp */				\
+	";;\n"								\
+	"}\n"								\
+	"{ .mii\n"							\
+	"ld8 r3 = [r2], 8\n"	/* load entry address */		\
+	"adds r17 =  1f - 1b, r17\n"	/* calculate return address */	\
+	";;\n"								\
+	"mov b7 = r3\n"		/* set entry address */			\
+	"}\n"								\
+	"{ .mib\n"							\
+	"ld8 gp = [r2]\n"	/* load gp value */			\
+	"mov b6 = r17\n"	/* set return address */		\
+	"br.cond.sptk.few b7\n"	/* intrinsics are very short isns */	\
+	"}\n"								\
+	"1:\n"								\
+	"{ .mii\n"							\
+	"mov gp = r16\n"	/* restore gp value */			\
+	"nop 0\n"							\
+	"nop 0\n"							\
+	";;\n"								\
+	"}\n"
+
+#define PARAVIRT_OP(op)				\
+	[op_addr] "i"(&pv_cpu_ops.op)
+
+#define PARAVIRT_TYPE(type)			\
+	PARAVIRT_PATCH_TYPE_ ## type
+
+#define PARAVIRT_REG_CLOBBERS0					\
+	"r2", "r3", /*"r8",*/ "r9", "r10", "r11", "r14",	\
+		"r15", "r16", "r17"
+
+#define PARAVIRT_REG_CLOBBERS1					\
+	"r2","r3", /*"r8",*/ "r9", "r10", "r11", "r14",		\
+		"r15", "r16", "r17"
+
+#define PARAVIRT_REG_CLOBBERS2					\
+	"r2", "r3", /*"r8", "r9",*/ "r10", "r11", "r14",	\
+		"r15", "r16", "r17"
+
+#define PARAVIRT_REG_CLOBBERS5					\
+	"r2", "r3", /*"r8", "r9", "r10", "r11", "r14",*/	\
+		"r15", "r16", "r17"
+
+#define PARAVIRT_BR_CLOBBERS			\
+	"b6", "b7"
+
+#define PARAVIRT_PR_CLOBBERS						\
+	"p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"
+
+#define PARAVIRT_AR_CLOBBERS			\
+	"ar.ccv"
+
+#define PARAVIRT_CLOBBERS0			\
+		PARAVIRT_REG_CLOBBERS0,		\
+		PARAVIRT_BR_CLOBBERS,		\
+		PARAVIRT_PR_CLOBBERS,		\
+		PARAVIRT_AR_CLOBBERS,		\
+		"memory"
+
+#define PARAVIRT_CLOBBERS1			\
+		PARAVIRT_REG_CLOBBERS1,		\
+		PARAVIRT_BR_CLOBBERS,		\
+		PARAVIRT_PR_CLOBBERS,		\
+		PARAVIRT_AR_CLOBBERS,		\
+		"memory"
+
+#define PARAVIRT_CLOBBERS2			\
+		PARAVIRT_REG_CLOBBERS2,		\
+		PARAVIRT_BR_CLOBBERS,		\
+		PARAVIRT_PR_CLOBBERS,		\
+		PARAVIRT_AR_CLOBBERS,		\
+		"memory"
+
+#define PARAVIRT_CLOBBERS5			\
+		PARAVIRT_REG_CLOBBERS5,		\
+		PARAVIRT_BR_CLOBBERS,		\
+		PARAVIRT_PR_CLOBBERS,		\
+		PARAVIRT_AR_CLOBBERS,		\
+		"memory"
+
+#define PARAVIRT_BR0(op, type)					\
+	register unsigned long ia64_clobber asm ("r8");		\
+	asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,	\
+					  PARAVIRT_TYPE(type))	\
+		      :	"=r"(ia64_clobber)			\
+		      : PARAVIRT_OP(op)				\
+		      : PARAVIRT_CLOBBERS0)
+
+#define PARAVIRT_BR0_RET(op, type)				\
+	register unsigned long ia64_intri_res asm ("r8");	\
+	asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,	\
+					  PARAVIRT_TYPE(type))	\
+		      : "=r"(ia64_intri_res)			\
+		      : PARAVIRT_OP(op)				\
+		      : PARAVIRT_CLOBBERS0)
+
+#define PARAVIRT_BR1(op, type, arg1)				\
+	register unsigned long __##arg1 asm ("r8") = arg1;	\
+	register unsigned long ia64_clobber asm ("r8");		\
+	asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,	\
+					  PARAVIRT_TYPE(type))	\
+		      :	"=r"(ia64_clobber)			\
+		      : PARAVIRT_OP(op), "0"(__##arg1)		\
+		      : PARAVIRT_CLOBBERS1)
+
+#define PARAVIRT_BR1_RET(op, type, arg1)			\
+	register unsigned long ia64_intri_res asm ("r8");	\
+	register unsigned long __##arg1 asm ("r8") = arg1;	\
+	asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,	\
+					  PARAVIRT_TYPE(type))	\
+		      : "=r"(ia64_intri_res)			\
+		      : PARAVIRT_OP(op), "0"(__##arg1)		\
+		      : PARAVIRT_CLOBBERS1)
+
+#define PARAVIRT_BR1_VOID(op, type, arg1)			\
+	register void *__##arg1 asm ("r8") = arg1;		\
+	register unsigned long ia64_clobber asm ("r8");		\
+	asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,	\
+					  PARAVIRT_TYPE(type))	\
+		      :	"=r"(ia64_clobber)			\
+		      : PARAVIRT_OP(op), "0"(__##arg1)		\
+		      : PARAVIRT_CLOBBERS1)
+
+#define PARAVIRT_BR2(op, type, arg1, arg2)				\
+	register unsigned long __##arg1 asm ("r8") = arg1;		\
+	register unsigned long __##arg2 asm ("r9") = arg2;		\
+	register unsigned long ia64_clobber1 asm ("r8");		\
+	register unsigned long ia64_clobber2 asm ("r9");		\
+	asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,		\
+					  PARAVIRT_TYPE(type))		\
+		      : "=r"(ia64_clobber1), "=r"(ia64_clobber2)	\
+		      : PARAVIRT_OP(op), "0"(__##arg1), "1"(__##arg2)	\
+		      : PARAVIRT_CLOBBERS2)
+
+
+#define PARAVIRT_DEFINE_CPU_OP0(op, type)		\
+	static inline void				\
+	paravirt_ ## op (void)				\
+	{						\
+		PARAVIRT_BR0(op, type);			\
+	}
+
+#define PARAVIRT_DEFINE_CPU_OP0_RET(op, type)		\
+	static inline unsigned long			\
+	paravirt_ ## op (void)				\
+	{						\
+		PARAVIRT_BR0_RET(op, type);		\
+		return ia64_intri_res;			\
+	}
+
+#define PARAVIRT_DEFINE_CPU_OP1_VOID(op, type)		\
+	static inline void				\
+	paravirt_ ## op (void *arg1)			\
+	{						\
+		PARAVIRT_BR1_VOID(op, type, arg1);	\
+	}
+
+#define PARAVIRT_DEFINE_CPU_OP1(op, type)		\
+	static inline void				\
+	paravirt_ ## op (unsigned long arg1)		\
+	{						\
+		PARAVIRT_BR1(op, type, arg1);		\
+	}
+
+#define PARAVIRT_DEFINE_CPU_OP1_RET(op, type)		\
+	static inline unsigned long			\
+	paravirt_ ## op (unsigned long arg1)		\
+	{						\
+		PARAVIRT_BR1_RET(op, type, arg1);	\
+		return ia64_intri_res;			\
+	}
+
+#define PARAVIRT_DEFINE_CPU_OP2(op, type)		\
+	static inline void				\
+	paravirt_ ## op (unsigned long arg1,		\
+			 unsigned long arg2)		\
+	{						\
+		PARAVIRT_BR2(op, type, arg1, arg2);	\
+	}
+
+
+PARAVIRT_DEFINE_CPU_OP1_VOID(fc, FC);
+PARAVIRT_DEFINE_CPU_OP1_RET(thash, THASH)
+PARAVIRT_DEFINE_CPU_OP1_RET(get_cpuid, GET_CPUID)
+PARAVIRT_DEFINE_CPU_OP1_RET(get_pmd, GET_PMD)
+PARAVIRT_DEFINE_CPU_OP2(ptcga, PTCGA)
+PARAVIRT_DEFINE_CPU_OP1_RET(get_rr, GET_RR)
+PARAVIRT_DEFINE_CPU_OP2(set_rr, SET_RR)
+PARAVIRT_DEFINE_CPU_OP0(ssm_i, SSM_I)
+PARAVIRT_DEFINE_CPU_OP0(rsm_i, RSM_I)
+PARAVIRT_DEFINE_CPU_OP0_RET(get_psr_i, GET_PSR_I)
+PARAVIRT_DEFINE_CPU_OP1(intrin_local_irq_restore, INTRIN_LOCAL_IRQ_RESTORE)
+
+static inline void
+paravirt_set_rr0_to_rr4(unsigned long val0, unsigned long val1,
+			unsigned long val2, unsigned long val3,
+			unsigned long val4)
+{
+	register unsigned long __val0 asm ("r8") = val0;
+	register unsigned long __val1 asm ("r9") = val1;
+	register unsigned long __val2 asm ("r10") = val2;
+	register unsigned long __val3 asm ("r11") = val3;
+	register unsigned long __val4 asm ("r14") = val4;
+
+	register unsigned long ia64_clobber0 asm ("r8");
+	register unsigned long ia64_clobber1 asm ("r9");
+	register unsigned long ia64_clobber2 asm ("r10");
+	register unsigned long ia64_clobber3 asm ("r11");
+	register unsigned long ia64_clobber4 asm ("r14");
+
+	asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,
+					  PARAVIRT_TYPE(SET_RR0_TO_RR4))
+		      : "=r"(ia64_clobber0),
+			"=r"(ia64_clobber1),
+			"=r"(ia64_clobber2),
+			"=r"(ia64_clobber3),
+			"=r"(ia64_clobber4)
+		      : PARAVIRT_OP(set_rr0_to_rr4),
+			"0"(__val0), "1"(__val1), "2"(__val2),
+			"3"(__val3), "4"(__val4)
+		      : PARAVIRT_CLOBBERS5);
+}
+
+/* unsigned long paravirt_getreg(int reg) */
+#define __paravirt_getreg(reg)						\
+	({								\
+		register unsigned long ia64_intri_res asm ("r8");	\
+		register unsigned long __reg asm ("r8") = (reg);	\
+									\
+		asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,	\
+						  PARAVIRT_TYPE(GETREG) \
+						  + (reg))		\
+			      : "=r"(ia64_intri_res)			\
+			      : PARAVIRT_OP(getreg), "0"(__reg)		\
+			      : PARAVIRT_CLOBBERS1);			\
+									\
+		ia64_intri_res;						\
+	})
+
+/* void paravirt_setreg(int reg, unsigned long val) */
+#define paravirt_setreg(reg, val)					\
+	do {								\
+		register unsigned long __val asm ("r8") = val;		\
+		register unsigned long __reg asm ("r9") = reg;		\
+		register unsigned long ia64_clobber1 asm ("r8");	\
+		register unsigned long ia64_clobber2 asm ("r9");	\
+									\
+		asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,	\
+						  PARAVIRT_TYPE(SETREG) \
+						  + (reg))		\
+			      : "=r"(ia64_clobber1),			\
+				"=r"(ia64_clobber2)			\
+			      : PARAVIRT_OP(setreg),			\
+				"1"(__reg), "0"(__val)			\
+			      : PARAVIRT_CLOBBERS2);			\
+	} while (0)
+
+#endif /* ASM_SUPPORTED */
+#endif /* CONFIG_PARAVIRT && ASM_SUPPOTED */
+
+#endif /* _ASM_IA64_PARAVIRT_PRIVOP_H */
diff --git a/arch/ia64/include/asm/parport.h b/arch/ia64/include/asm/parport.h
new file mode 100644
index 00000000..67e16adf
--- /dev/null
+++ b/arch/ia64/include/asm/parport.h
@@ -0,0 +1,20 @@
+/*
+ * parport.h: platform-specific PC-style parport initialisation
+ *
+ * Copyright (C) 1999, 2000  Tim Waugh <tim@cyberelk.demon.co.uk>
+ *
+ * This file should only be included by drivers/parport/parport_pc.c.
+ */
+
+#ifndef _ASM_IA64_PARPORT_H
+#define _ASM_IA64_PARPORT_H 1
+
+static int __devinit parport_pc_find_isa_ports (int autoirq, int autodma);
+
+static int __devinit
+parport_pc_find_nonpci_ports (int autoirq, int autodma)
+{
+	return parport_pc_find_isa_ports(autoirq, autodma);
+}
+
+#endif /* _ASM_IA64_PARPORT_H */
diff --git a/arch/ia64/include/asm/patch.h b/arch/ia64/include/asm/patch.h
new file mode 100644
index 00000000..295fe6ab
--- /dev/null
+++ b/arch/ia64/include/asm/patch.h
@@ -0,0 +1,27 @@
+#ifndef _ASM_IA64_PATCH_H
+#define _ASM_IA64_PATCH_H
+
+/*
+ * Copyright (C) 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * There are a number of reasons for patching instructions.  Rather than duplicating code
+ * all over the place, we put the common stuff here.  Reasons for patching: in-kernel
+ * module-loader, virtual-to-physical patch-list, McKinley Errata 9 workaround, and gate
+ * shared library.  Undoubtedly, some of these reasons will disappear and others will
+ * be added over time.
+ */
+#include <linux/elf.h>
+#include <linux/types.h>
+
+extern void ia64_patch (u64 insn_addr, u64 mask, u64 val);	/* patch any insn slot */
+extern void ia64_patch_imm64 (u64 insn_addr, u64 val);		/* patch "movl" w/abs. value*/
+extern void ia64_patch_imm60 (u64 insn_addr, u64 val);		/* patch "brl" w/ip-rel value */
+
+extern void ia64_patch_mckinley_e9 (unsigned long start, unsigned long end);
+extern void ia64_patch_vtop (unsigned long start, unsigned long end);
+extern void ia64_patch_phys_stack_reg(unsigned long val);
+extern void ia64_patch_rse (unsigned long start, unsigned long end);
+extern void ia64_patch_gate (void);
+
+#endif /* _ASM_IA64_PATCH_H */
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
new file mode 100644
index 00000000..73b5f785
--- /dev/null
+++ b/arch/ia64/include/asm/pci.h
@@ -0,0 +1,145 @@
+#ifndef _ASM_IA64_PCI_H
+#define _ASM_IA64_PCI_H
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <asm/io.h>
+#include <asm/scatterlist.h>
+#include <asm/hw_irq.h>
+
+/*
+ * Can be used to override the logic in pci_scan_bus for skipping already-configured bus
+ * numbers - to be used for buggy BIOSes or architectures with incomplete PCI setup by the
+ * loader.
+ */
+#define pcibios_assign_all_busses()     0
+
+#define PCIBIOS_MIN_IO		0x1000
+#define PCIBIOS_MIN_MEM		0x10000000
+
+void pcibios_config_init(void);
+
+struct pci_dev;
+
+/*
+ * PCI_DMA_BUS_IS_PHYS should be set to 1 if there is _necessarily_ a direct
+ * correspondence between device bus addresses and CPU physical addresses.
+ * Platforms with a hardware I/O MMU _must_ turn this off to suppress the
+ * bounce buffer handling code in the block and network device layers.
+ * Platforms with separate bus address spaces _must_ turn this off and provide
+ * a device DMA mapping implementation that takes care of the necessary
+ * address translation.
+ *
+ * For now, the ia64 platforms which may have separate/multiple bus address
+ * spaces all have I/O MMUs which support the merging of physically
+ * discontiguous buffers, so we can use that as the sole factor to determine
+ * the setting of PCI_DMA_BUS_IS_PHYS.
+ */
+extern unsigned long ia64_max_iommu_merge_mask;
+#define PCI_DMA_BUS_IS_PHYS	(ia64_max_iommu_merge_mask == ~0UL)
+
+static inline void
+pcibios_set_master (struct pci_dev *dev)
+{
+	/* No special bus mastering setup handling */
+}
+
+static inline void
+pcibios_penalize_isa_irq (int irq, int active)
+{
+	/* We don't do dynamic PCI IRQ allocation */
+}
+
+#include <asm-generic/pci-dma-compat.h>
+
+#ifdef CONFIG_PCI
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+					enum pci_dma_burst_strategy *strat,
+					unsigned long *strategy_parameter)
+{
+	unsigned long cacheline_size;
+	u8 byte;
+
+	pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte);
+	if (byte == 0)
+		cacheline_size = 1024;
+	else
+		cacheline_size = (int) byte * 4;
+
+	*strat = PCI_DMA_BURST_MULTIPLE;
+	*strategy_parameter = cacheline_size;
+}
+#endif
+
+#define HAVE_PCI_MMAP
+extern int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma,
+				enum pci_mmap_state mmap_state, int write_combine);
+#define HAVE_PCI_LEGACY
+extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
+				      struct vm_area_struct *vma,
+				      enum pci_mmap_state mmap_state);
+
+#define pci_get_legacy_mem platform_pci_get_legacy_mem
+#define pci_legacy_read platform_pci_legacy_read
+#define pci_legacy_write platform_pci_legacy_write
+
+struct pci_window {
+	struct resource resource;
+	u64 offset;
+};
+
+struct pci_controller {
+	void *acpi_handle;
+	void *iommu;
+	int segment;
+	int node;		/* nearest node with memory or -1 for global allocation */
+
+	unsigned int windows;
+	struct pci_window *window;
+
+	void *platform_data;
+};
+
+#define PCI_CONTROLLER(busdev) ((struct pci_controller *) busdev->sysdata)
+#define pci_domain_nr(busdev)    (PCI_CONTROLLER(busdev)->segment)
+
+extern struct pci_ops pci_root_ops;
+
+static inline int pci_proc_domain(struct pci_bus *bus)
+{
+	return (pci_domain_nr(bus) != 0);
+}
+
+extern void pcibios_resource_to_bus(struct pci_dev *dev,
+		struct pci_bus_region *region, struct resource *res);
+
+extern void pcibios_bus_to_resource(struct pci_dev *dev,
+		struct resource *res, struct pci_bus_region *region);
+
+static inline struct resource *
+pcibios_select_root(struct pci_dev *pdev, struct resource *res)
+{
+	struct resource *root = NULL;
+
+	if (res->flags & IORESOURCE_IO)
+		root = &ioport_resource;
+	if (res->flags & IORESOURCE_MEM)
+		root = &iomem_resource;
+
+	return root;
+}
+
+#define HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
+static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
+{
+	return channel ? isa_irq_to_vector(15) : isa_irq_to_vector(14);
+}
+
+#ifdef CONFIG_DMAR
+extern void pci_iommu_alloc(void);
+#endif
+#endif /* _ASM_IA64_PCI_H */
diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h
new file mode 100644
index 00000000..14aa1c58
--- /dev/null
+++ b/arch/ia64/include/asm/percpu.h
@@ -0,0 +1,54 @@
+#ifndef _ASM_IA64_PERCPU_H
+#define _ASM_IA64_PERCPU_H
+
+/*
+ * Copyright (C) 2002-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#define PERCPU_ENOUGH_ROOM PERCPU_PAGE_SIZE
+
+#ifdef __ASSEMBLY__
+# define THIS_CPU(var)	(var)  /* use this to mark accesses to per-CPU variables... */
+#else /* !__ASSEMBLY__ */
+
+
+#include <linux/threads.h>
+
+#ifdef CONFIG_SMP
+
+#ifdef HAVE_MODEL_SMALL_ATTRIBUTE
+# define PER_CPU_ATTRIBUTES	__attribute__((__model__ (__small__)))
+#endif
+
+#define __my_cpu_offset	__ia64_per_cpu_var(local_per_cpu_offset)
+
+extern void *per_cpu_init(void);
+
+#else /* ! SMP */
+
+#define per_cpu_init()				(__phys_per_cpu_start)
+
+#endif	/* SMP */
+
+#define PER_CPU_BASE_SECTION ".data..percpu"
+
+/*
+ * Be extremely careful when taking the address of this variable!  Due to virtual
+ * remapping, it is different from the canonical address returned by __get_cpu_var(var)!
+ * On the positive side, using __ia64_per_cpu_var() instead of __get_cpu_var() is slightly
+ * more efficient.
+ */
+#define __ia64_per_cpu_var(var) (*({					\
+	__verify_pcpu_ptr(&(var));					\
+	((typeof(var) __kernel __force *)&(var));			\
+}))
+
+#include <asm-generic/percpu.h>
+
+/* Equal to __per_cpu_offset[smp_processor_id()], but faster to access: */
+DECLARE_PER_CPU(unsigned long, local_per_cpu_offset);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_IA64_PERCPU_H */
diff --git a/arch/ia64/include/asm/perfmon.h b/arch/ia64/include/asm/perfmon.h
new file mode 100644
index 00000000..d551183f
--- /dev/null
+++ b/arch/ia64/include/asm/perfmon.h
@@ -0,0 +1,279 @@
+/*
+ * Copyright (C) 2001-2003 Hewlett-Packard Co
+ *               Stephane Eranian <eranian@hpl.hp.com>
+ */
+
+#ifndef _ASM_IA64_PERFMON_H
+#define _ASM_IA64_PERFMON_H
+
+/*
+ * perfmon commands supported on all CPU models
+ */
+#define PFM_WRITE_PMCS		0x01
+#define PFM_WRITE_PMDS		0x02
+#define PFM_READ_PMDS		0x03
+#define PFM_STOP		0x04
+#define PFM_START		0x05
+#define PFM_ENABLE		0x06 /* obsolete */
+#define PFM_DISABLE		0x07 /* obsolete */
+#define PFM_CREATE_CONTEXT	0x08
+#define PFM_DESTROY_CONTEXT	0x09 /* obsolete use close() */
+#define PFM_RESTART		0x0a
+#define PFM_PROTECT_CONTEXT	0x0b /* obsolete */
+#define PFM_GET_FEATURES	0x0c
+#define PFM_DEBUG		0x0d
+#define PFM_UNPROTECT_CONTEXT	0x0e /* obsolete */
+#define PFM_GET_PMC_RESET_VAL	0x0f
+#define PFM_LOAD_CONTEXT	0x10
+#define PFM_UNLOAD_CONTEXT	0x11
+
+/*
+ * PMU model specific commands (may not be supported on all PMU models)
+ */
+#define PFM_WRITE_IBRS		0x20
+#define PFM_WRITE_DBRS		0x21
+
+/*
+ * context flags
+ */
+#define PFM_FL_NOTIFY_BLOCK    	 0x01	/* block task on user level notifications */
+#define PFM_FL_SYSTEM_WIDE	 0x02	/* create a system wide context */
+#define PFM_FL_OVFL_NO_MSG	 0x80   /* do not post overflow/end messages for notification */
+
+/*
+ * event set flags
+ */
+#define PFM_SETFL_EXCL_IDLE      0x01   /* exclude idle task (syswide only) XXX: DO NOT USE YET */
+
+/*
+ * PMC flags
+ */
+#define PFM_REGFL_OVFL_NOTIFY	0x1	/* send notification on overflow */
+#define PFM_REGFL_RANDOM	0x2	/* randomize sampling interval   */
+
+/*
+ * PMD/PMC/IBR/DBR return flags (ignored on input)
+ *
+ * Those flags are used on output and must be checked in case EAGAIN is returned
+ * by any of the calls using a pfarg_reg_t or pfarg_dbreg_t structure.
+ */
+#define PFM_REG_RETFL_NOTAVAIL	(1UL<<31) /* set if register is implemented but not available */
+#define PFM_REG_RETFL_EINVAL	(1UL<<30) /* set if register entry is invalid */
+#define PFM_REG_RETFL_MASK	(PFM_REG_RETFL_NOTAVAIL|PFM_REG_RETFL_EINVAL)
+
+#define PFM_REG_HAS_ERROR(flag)	(((flag) & PFM_REG_RETFL_MASK) != 0)
+
+typedef unsigned char pfm_uuid_t[16];	/* custom sampling buffer identifier type */
+
+/*
+ * Request structure used to define a context
+ */
+typedef struct {
+	pfm_uuid_t     ctx_smpl_buf_id;	 /* which buffer format to use (if needed) */
+	unsigned long  ctx_flags;	 /* noblock/block */
+	unsigned short ctx_nextra_sets;	 /* number of extra event sets (you always get 1) */
+	unsigned short ctx_reserved1;	 /* for future use */
+	int	       ctx_fd;		 /* return arg: unique identification for context */
+	void	       *ctx_smpl_vaddr;	 /* return arg: virtual address of sampling buffer, is used */
+	unsigned long  ctx_reserved2[11];/* for future use */
+} pfarg_context_t;
+
+/*
+ * Request structure used to write/read a PMC or PMD
+ */
+typedef struct {
+	unsigned int	reg_num;	   /* which register */
+	unsigned short	reg_set;	   /* event set for this register */
+	unsigned short	reg_reserved1;	   /* for future use */
+
+	unsigned long	reg_value;	   /* initial pmc/pmd value */
+	unsigned long	reg_flags;	   /* input: pmc/pmd flags, return: reg error */
+
+	unsigned long	reg_long_reset;	   /* reset after buffer overflow notification */
+	unsigned long	reg_short_reset;   /* reset after counter overflow */
+
+	unsigned long	reg_reset_pmds[4]; /* which other counters to reset on overflow */
+	unsigned long	reg_random_seed;   /* seed value when randomization is used */
+	unsigned long	reg_random_mask;   /* bitmask used to limit random value */
+	unsigned long   reg_last_reset_val;/* return: PMD last reset value */
+
+	unsigned long	reg_smpl_pmds[4];  /* which pmds are accessed when PMC overflows */
+	unsigned long	reg_smpl_eventid;  /* opaque sampling event identifier */
+
+	unsigned long   reg_reserved2[3];   /* for future use */
+} pfarg_reg_t;
+
+typedef struct {
+	unsigned int	dbreg_num;		/* which debug register */
+	unsigned short	dbreg_set;		/* event set for this register */
+	unsigned short	dbreg_reserved1;	/* for future use */
+	unsigned long	dbreg_value;		/* value for debug register */
+	unsigned long	dbreg_flags;		/* return: dbreg error */
+	unsigned long	dbreg_reserved2[1];	/* for future use */
+} pfarg_dbreg_t;
+
+typedef struct {
+	unsigned int	ft_version;	/* perfmon: major [16-31], minor [0-15] */
+	unsigned int	ft_reserved;	/* reserved for future use */
+	unsigned long	reserved[4];	/* for future use */
+} pfarg_features_t;
+
+typedef struct {
+	pid_t		load_pid;	   /* process to load the context into */
+	unsigned short	load_set;	   /* first event set to load */
+	unsigned short	load_reserved1;	   /* for future use */
+	unsigned long	load_reserved2[3]; /* for future use */
+} pfarg_load_t;
+
+typedef struct {
+	int		msg_type;		/* generic message header */
+	int		msg_ctx_fd;		/* generic message header */
+	unsigned long	msg_ovfl_pmds[4];	/* which PMDs overflowed */
+	unsigned short  msg_active_set;		/* active set at the time of overflow */
+	unsigned short  msg_reserved1;		/* for future use */
+	unsigned int    msg_reserved2;		/* for future use */
+	unsigned long	msg_tstamp;		/* for perf tuning/debug */
+} pfm_ovfl_msg_t;
+
+typedef struct {
+	int		msg_type;		/* generic message header */
+	int		msg_ctx_fd;		/* generic message header */
+	unsigned long	msg_tstamp;		/* for perf tuning */
+} pfm_end_msg_t;
+
+typedef struct {
+	int		msg_type;		/* type of the message */
+	int		msg_ctx_fd;		/* unique identifier for the context */
+	unsigned long	msg_tstamp;		/* for perf tuning */
+} pfm_gen_msg_t;
+
+#define PFM_MSG_OVFL	1	/* an overflow happened */
+#define PFM_MSG_END	2	/* task to which context was attached ended */
+
+typedef union {
+	pfm_ovfl_msg_t	pfm_ovfl_msg;
+	pfm_end_msg_t	pfm_end_msg;
+	pfm_gen_msg_t	pfm_gen_msg;
+} pfm_msg_t;
+
+/*
+ * Define the version numbers for both perfmon as a whole and the sampling buffer format.
+ */
+#define PFM_VERSION_MAJ		 2U
+#define PFM_VERSION_MIN		 0U
+#define PFM_VERSION		 (((PFM_VERSION_MAJ&0xffff)<<16)|(PFM_VERSION_MIN & 0xffff))
+#define PFM_VERSION_MAJOR(x)	 (((x)>>16) & 0xffff)
+#define PFM_VERSION_MINOR(x)	 ((x) & 0xffff)
+
+
+/*
+ * miscellaneous architected definitions
+ */
+#define PMU_FIRST_COUNTER	4	/* first counting monitor (PMC/PMD) */
+#define PMU_MAX_PMCS		256	/* maximum architected number of PMC registers */
+#define PMU_MAX_PMDS		256	/* maximum architected number of PMD registers */
+
+#ifdef __KERNEL__
+
+extern long perfmonctl(int fd, int cmd, void *arg, int narg);
+
+typedef struct {
+	void (*handler)(int irq, void *arg, struct pt_regs *regs);
+} pfm_intr_handler_desc_t;
+
+extern void pfm_save_regs (struct task_struct *);
+extern void pfm_load_regs (struct task_struct *);
+
+extern void pfm_exit_thread(struct task_struct *);
+extern int  pfm_use_debug_registers(struct task_struct *);
+extern int  pfm_release_debug_registers(struct task_struct *);
+extern void pfm_syst_wide_update_task(struct task_struct *, unsigned long info, int is_ctxswin);
+extern void pfm_inherit(struct task_struct *task, struct pt_regs *regs);
+extern void pfm_init_percpu(void);
+extern void pfm_handle_work(void);
+extern int  pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *h);
+extern int  pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *h);
+
+
+
+/*
+ * Reset PMD register flags
+ */
+#define PFM_PMD_SHORT_RESET	0
+#define PFM_PMD_LONG_RESET	1
+
+typedef union {
+	unsigned int val;
+	struct {
+		unsigned int notify_user:1;	/* notify user program of overflow */
+		unsigned int reset_ovfl_pmds:1;	/* reset overflowed PMDs */
+		unsigned int block_task:1;	/* block monitored task on kernel exit */
+		unsigned int mask_monitoring:1; /* mask monitors via PMCx.plm */
+		unsigned int reserved:28;	/* for future use */
+	} bits;
+} pfm_ovfl_ctrl_t;
+
+typedef struct {
+	unsigned char	ovfl_pmd;			/* index of overflowed PMD  */
+	unsigned char   ovfl_notify;			/* =1 if monitor requested overflow notification */
+	unsigned short  active_set;			/* event set active at the time of the overflow */
+	pfm_ovfl_ctrl_t ovfl_ctrl;			/* return: perfmon controls to set by handler */
+
+	unsigned long   pmd_last_reset;			/* last reset value of of the PMD */
+	unsigned long	smpl_pmds[4];			/* bitmask of other PMD of interest on overflow */
+	unsigned long   smpl_pmds_values[PMU_MAX_PMDS]; /* values for the other PMDs of interest */
+	unsigned long   pmd_value;			/* current 64-bit value of the PMD */
+	unsigned long	pmd_eventid;			/* eventid associated with PMD */
+} pfm_ovfl_arg_t;
+
+
+typedef struct {
+	char		*fmt_name;
+	pfm_uuid_t	fmt_uuid;
+	size_t		fmt_arg_size;
+	unsigned long	fmt_flags;
+
+	int		(*fmt_validate)(struct task_struct *task, unsigned int flags, int cpu, void *arg);
+	int		(*fmt_getsize)(struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size);
+	int 		(*fmt_init)(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *arg);
+	int		(*fmt_handler)(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp);
+	int		(*fmt_restart)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs);
+	int		(*fmt_restart_active)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs);
+	int		(*fmt_exit)(struct task_struct *task, void *buf, struct pt_regs *regs);
+
+	struct list_head fmt_list;
+} pfm_buffer_fmt_t;
+
+extern int pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt);
+extern int pfm_unregister_buffer_fmt(pfm_uuid_t uuid);
+
+/*
+ * perfmon interface exported to modules
+ */
+extern int pfm_mod_read_pmds(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs);
+extern int pfm_mod_write_pmcs(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs);
+extern int pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs);
+extern int pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs);
+
+/*
+ * describe the content of the local_cpu_date->pfm_syst_info field
+ */
+#define PFM_CPUINFO_SYST_WIDE	0x1	/* if set a system wide session exists */
+#define PFM_CPUINFO_DCR_PP	0x2	/* if set the system wide session has started */
+#define PFM_CPUINFO_EXCL_IDLE	0x4	/* the system wide session excludes the idle task */
+
+/*
+ * sysctl control structure. visible to sampling formats
+ */
+typedef struct {
+	int	debug;		/* turn on/off debugging via syslog */
+	int	debug_ovfl;	/* turn on/off debug printk in overflow handler */
+	int	fastctxsw;	/* turn on/off fast (unsecure) ctxsw */
+	int	expert_mode;	/* turn on/off value checking */
+} pfm_sysctl_t;
+extern pfm_sysctl_t pfm_sysctl;
+
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_IA64_PERFMON_H */
diff --git a/arch/ia64/include/asm/perfmon_default_smpl.h b/arch/ia64/include/asm/perfmon_default_smpl.h
new file mode 100644
index 00000000..a2d560c6
--- /dev/null
+++ b/arch/ia64/include/asm/perfmon_default_smpl.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2002-2003 Hewlett-Packard Co
+ *               Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * This file implements the default sampling buffer format
+ * for Linux/ia64 perfmon subsystem.
+ */
+#ifndef __PERFMON_DEFAULT_SMPL_H__
+#define __PERFMON_DEFAULT_SMPL_H__ 1
+
+#define PFM_DEFAULT_SMPL_UUID { \
+		0x4d, 0x72, 0xbe, 0xc0, 0x06, 0x64, 0x41, 0x43, 0x82, 0xb4, 0xd3, 0xfd, 0x27, 0x24, 0x3c, 0x97}
+
+/*
+ * format specific parameters (passed at context creation)
+ */
+typedef struct {
+	unsigned long buf_size;		/* size of the buffer in bytes */
+	unsigned int  flags;		/* buffer specific flags */
+	unsigned int  res1;		/* for future use */
+	unsigned long reserved[2];	/* for future use */
+} pfm_default_smpl_arg_t;
+
+/*
+ * combined context+format specific structure. Can be passed
+ * to PFM_CONTEXT_CREATE
+ */
+typedef struct {
+	pfarg_context_t		ctx_arg;
+	pfm_default_smpl_arg_t	buf_arg;
+} pfm_default_smpl_ctx_arg_t;
+
+/*
+ * This header is at the beginning of the sampling buffer returned to the user.
+ * It is directly followed by the first record.
+ */
+typedef struct {
+	unsigned long	hdr_count;		/* how many valid entries */
+	unsigned long	hdr_cur_offs;		/* current offset from top of buffer */
+	unsigned long	hdr_reserved2;		/* reserved for future use */
+
+	unsigned long	hdr_overflows;		/* how many times the buffer overflowed */
+	unsigned long   hdr_buf_size;		/* how many bytes in the buffer */
+
+	unsigned int	hdr_version;		/* contains perfmon version (smpl format diffs) */
+	unsigned int	hdr_reserved1;		/* for future use */
+	unsigned long	hdr_reserved[10];	/* for future use */
+} pfm_default_smpl_hdr_t;
+
+/*
+ * Entry header in the sampling buffer.  The header is directly followed
+ * with the values of the PMD registers of interest saved in increasing 
+ * index order: PMD4, PMD5, and so on. How many PMDs are present depends 
+ * on how the session was programmed.
+ *
+ * In the case where multiple counters overflow at the same time, multiple
+ * entries are written consecutively.
+ *
+ * last_reset_value member indicates the initial value of the overflowed PMD. 
+ */
+typedef struct {
+        int             pid;                    /* thread id (for NPTL, this is gettid()) */
+        unsigned char   reserved1[3];           /* reserved for future use */
+        unsigned char   ovfl_pmd;               /* index of overflowed PMD */
+
+        unsigned long   last_reset_val;         /* initial value of overflowed PMD */
+        unsigned long   ip;                     /* where did the overflow interrupt happened  */
+        unsigned long   tstamp;                 /* ar.itc when entering perfmon intr. handler */
+
+        unsigned short  cpu;                    /* cpu on which the overflow occurred */
+        unsigned short  set;                    /* event set active when overflow occurred   */
+        int    		tgid;              	/* thread group id (for NPTL, this is getpid()) */
+} pfm_default_smpl_entry_t;
+
+#define PFM_DEFAULT_MAX_PMDS		64 /* how many pmds supported by data structures (sizeof(unsigned long) */
+#define PFM_DEFAULT_MAX_ENTRY_SIZE	(sizeof(pfm_default_smpl_entry_t)+(sizeof(unsigned long)*PFM_DEFAULT_MAX_PMDS))
+#define PFM_DEFAULT_SMPL_MIN_BUF_SIZE	(sizeof(pfm_default_smpl_hdr_t)+PFM_DEFAULT_MAX_ENTRY_SIZE)
+
+#define PFM_DEFAULT_SMPL_VERSION_MAJ	2U
+#define PFM_DEFAULT_SMPL_VERSION_MIN	0U
+#define PFM_DEFAULT_SMPL_VERSION	(((PFM_DEFAULT_SMPL_VERSION_MAJ&0xffff)<<16)|(PFM_DEFAULT_SMPL_VERSION_MIN & 0xffff))
+
+#endif /* __PERFMON_DEFAULT_SMPL_H__ */
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
new file mode 100644
index 00000000..96a8d927
--- /dev/null
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -0,0 +1,122 @@
+#ifndef _ASM_IA64_PGALLOC_H
+#define _ASM_IA64_PGALLOC_H
+
+/*
+ * This file contains the functions and defines necessary to allocate
+ * page tables.
+ *
+ * This hopefully works with any (fixed) ia-64 page-size, as defined
+ * in <asm/page.h> (currently 8192).
+ *
+ * Copyright (C) 1998-2001 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2000, Goutham Rao <goutham.rao@intel.com>
+ */
+
+
+#include <linux/compiler.h>
+#include <linux/mm.h>
+#include <linux/page-flags.h>
+#include <linux/threads.h>
+#include <linux/quicklist.h>
+
+#include <asm/mmu_context.h>
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	return quicklist_alloc(0, GFP_KERNEL, NULL);
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	quicklist_free(0, NULL, pgd);
+}
+
+#ifdef CONFIG_PGTABLE_4
+static inline void
+pgd_populate(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud)
+{
+	pgd_val(*pgd_entry) = __pa(pud);
+}
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	return quicklist_alloc(0, GFP_KERNEL, NULL);
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+	quicklist_free(0, NULL, pud);
+}
+#define __pud_free_tlb(tlb, pud, address)	pud_free((tlb)->mm, pud)
+#endif /* CONFIG_PGTABLE_4 */
+
+static inline void
+pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd)
+{
+	pud_val(*pud_entry) = __pa(pmd);
+}
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	return quicklist_alloc(0, GFP_KERNEL, NULL);
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+	quicklist_free(0, NULL, pmd);
+}
+
+#define __pmd_free_tlb(tlb, pmd, address)	pmd_free((tlb)->mm, pmd)
+
+static inline void
+pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, pgtable_t pte)
+{
+	pmd_val(*pmd_entry) = page_to_phys(pte);
+}
+#define pmd_pgtable(pmd) pmd_page(pmd)
+
+static inline void
+pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
+{
+	pmd_val(*pmd_entry) = __pa(pte);
+}
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	struct page *page;
+	void *pg;
+
+	pg = quicklist_alloc(0, GFP_KERNEL, NULL);
+	if (!pg)
+		return NULL;
+	page = virt_to_page(pg);
+	pgtable_page_ctor(page);
+	return page;
+}
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+					  unsigned long addr)
+{
+	return quicklist_alloc(0, GFP_KERNEL, NULL);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
+{
+	pgtable_page_dtor(pte);
+	quicklist_free_page(0, NULL, pte);
+}
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+	quicklist_free(0, NULL, pte);
+}
+
+static inline void check_pgt_cache(void)
+{
+	quicklist_trim(0, NULL, 25, 16);
+}
+
+#define __pte_free_tlb(tlb, pte, address)	pte_free((tlb)->mm, pte)
+
+#endif				/* _ASM_IA64_PGALLOC_H */
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
new file mode 100644
index 00000000..1a97af31
--- /dev/null
+++ b/arch/ia64/include/asm/pgtable.h
@@ -0,0 +1,613 @@
+#ifndef _ASM_IA64_PGTABLE_H
+#define _ASM_IA64_PGTABLE_H
+
+/*
+ * This file contains the functions and defines necessary to modify and use
+ * the IA-64 page table tree.
+ *
+ * This hopefully works with any (fixed) IA-64 page-size, as defined
+ * in <asm/page.h>.
+ *
+ * Copyright (C) 1998-2005 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+
+#include <asm/mman.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/types.h>
+
+#define IA64_MAX_PHYS_BITS	50	/* max. number of physical address bits (architected) */
+
+/*
+ * First, define the various bits in a PTE.  Note that the PTE format
+ * matches the VHPT short format, the firt doubleword of the VHPD long
+ * format, and the first doubleword of the TLB insertion format.
+ */
+#define _PAGE_P_BIT		0
+#define _PAGE_A_BIT		5
+#define _PAGE_D_BIT		6
+
+#define _PAGE_P			(1 << _PAGE_P_BIT)	/* page present bit */
+#define _PAGE_MA_WB		(0x0 <<  2)	/* write back memory attribute */
+#define _PAGE_MA_UC		(0x4 <<  2)	/* uncacheable memory attribute */
+#define _PAGE_MA_UCE		(0x5 <<  2)	/* UC exported attribute */
+#define _PAGE_MA_WC		(0x6 <<  2)	/* write coalescing memory attribute */
+#define _PAGE_MA_NAT		(0x7 <<  2)	/* not-a-thing attribute */
+#define _PAGE_MA_MASK		(0x7 <<  2)
+#define _PAGE_PL_0		(0 <<  7)	/* privilege level 0 (kernel) */
+#define _PAGE_PL_1		(1 <<  7)	/* privilege level 1 (unused) */
+#define _PAGE_PL_2		(2 <<  7)	/* privilege level 2 (unused) */
+#define _PAGE_PL_3		(3 <<  7)	/* privilege level 3 (user) */
+#define _PAGE_PL_MASK		(3 <<  7)
+#define _PAGE_AR_R		(0 <<  9)	/* read only */
+#define _PAGE_AR_RX		(1 <<  9)	/* read & execute */
+#define _PAGE_AR_RW		(2 <<  9)	/* read & write */
+#define _PAGE_AR_RWX		(3 <<  9)	/* read, write & execute */
+#define _PAGE_AR_R_RW		(4 <<  9)	/* read / read & write */
+#define _PAGE_AR_RX_RWX		(5 <<  9)	/* read & exec / read, write & exec */
+#define _PAGE_AR_RWX_RW		(6 <<  9)	/* read, write & exec / read & write */
+#define _PAGE_AR_X_RX		(7 <<  9)	/* exec & promote / read & exec */
+#define _PAGE_AR_MASK		(7 <<  9)
+#define _PAGE_AR_SHIFT		9
+#define _PAGE_A			(1 << _PAGE_A_BIT)	/* page accessed bit */
+#define _PAGE_D			(1 << _PAGE_D_BIT)	/* page dirty bit */
+#define _PAGE_PPN_MASK		(((__IA64_UL(1) << IA64_MAX_PHYS_BITS) - 1) & ~0xfffUL)
+#define _PAGE_ED		(__IA64_UL(1) << 52)	/* exception deferral */
+#define _PAGE_PROTNONE		(__IA64_UL(1) << 63)
+
+/* Valid only for a PTE with the present bit cleared: */
+#define _PAGE_FILE		(1 << 1)		/* see swap & file pte remarks below */
+
+#define _PFN_MASK		_PAGE_PPN_MASK
+/* Mask of bits which may be changed by pte_modify(); the odd bits are there for _PAGE_PROTNONE */
+#define _PAGE_CHG_MASK	(_PAGE_P | _PAGE_PROTNONE | _PAGE_PL_MASK | _PAGE_AR_MASK | _PAGE_ED)
+
+#define _PAGE_SIZE_4K	12
+#define _PAGE_SIZE_8K	13
+#define _PAGE_SIZE_16K	14
+#define _PAGE_SIZE_64K	16
+#define _PAGE_SIZE_256K	18
+#define _PAGE_SIZE_1M	20
+#define _PAGE_SIZE_4M	22
+#define _PAGE_SIZE_16M	24
+#define _PAGE_SIZE_64M	26
+#define _PAGE_SIZE_256M	28
+#define _PAGE_SIZE_1G	30
+#define _PAGE_SIZE_4G	32
+
+#define __ACCESS_BITS		_PAGE_ED | _PAGE_A | _PAGE_P | _PAGE_MA_WB
+#define __DIRTY_BITS_NO_ED	_PAGE_A | _PAGE_P | _PAGE_D | _PAGE_MA_WB
+#define __DIRTY_BITS		_PAGE_ED | __DIRTY_BITS_NO_ED
+
+/*
+ * How many pointers will a page table level hold expressed in shift
+ */
+#define PTRS_PER_PTD_SHIFT	(PAGE_SHIFT-3)
+
+/*
+ * Definitions for fourth level:
+ */
+#define PTRS_PER_PTE	(__IA64_UL(1) << (PTRS_PER_PTD_SHIFT))
+
+/*
+ * Definitions for third level:
+ *
+ * PMD_SHIFT determines the size of the area a third-level page table
+ * can map.
+ */
+#define PMD_SHIFT	(PAGE_SHIFT + (PTRS_PER_PTD_SHIFT))
+#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_MASK	(~(PMD_SIZE-1))
+#define PTRS_PER_PMD	(1UL << (PTRS_PER_PTD_SHIFT))
+
+#ifdef CONFIG_PGTABLE_4
+/*
+ * Definitions for second level:
+ *
+ * PUD_SHIFT determines the size of the area a second-level page table
+ * can map.
+ */
+#define PUD_SHIFT	(PMD_SHIFT + (PTRS_PER_PTD_SHIFT))
+#define PUD_SIZE	(1UL << PUD_SHIFT)
+#define PUD_MASK	(~(PUD_SIZE-1))
+#define PTRS_PER_PUD	(1UL << (PTRS_PER_PTD_SHIFT))
+#endif
+
+/*
+ * Definitions for first level:
+ *
+ * PGDIR_SHIFT determines what a first-level page table entry can map.
+ */
+#ifdef CONFIG_PGTABLE_4
+#define PGDIR_SHIFT		(PUD_SHIFT + (PTRS_PER_PTD_SHIFT))
+#else
+#define PGDIR_SHIFT		(PMD_SHIFT + (PTRS_PER_PTD_SHIFT))
+#endif
+#define PGDIR_SIZE		(__IA64_UL(1) << PGDIR_SHIFT)
+#define PGDIR_MASK		(~(PGDIR_SIZE-1))
+#define PTRS_PER_PGD_SHIFT	PTRS_PER_PTD_SHIFT
+#define PTRS_PER_PGD		(1UL << PTRS_PER_PGD_SHIFT)
+#define USER_PTRS_PER_PGD	(5*PTRS_PER_PGD/8)	/* regions 0-4 are user regions */
+#define FIRST_USER_ADDRESS	0
+
+/*
+ * All the normal masks have the "page accessed" bits on, as any time
+ * they are used, the page is accessed. They are cleared only by the
+ * page-out routines.
+ */
+#define PAGE_NONE	__pgprot(_PAGE_PROTNONE | _PAGE_A)
+#define PAGE_SHARED	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RW)
+#define PAGE_READONLY	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R)
+#define PAGE_COPY	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R)
+#define PAGE_COPY_EXEC	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX)
+#define PAGE_GATE	__pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX)
+#define PAGE_KERNEL	__pgprot(__DIRTY_BITS  | _PAGE_PL_0 | _PAGE_AR_RWX)
+#define PAGE_KERNELRX	__pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX)
+#define PAGE_KERNEL_UC	__pgprot(__DIRTY_BITS  | _PAGE_PL_0 | _PAGE_AR_RWX | \
+				 _PAGE_MA_UC)
+
+# ifndef __ASSEMBLY__
+
+#include <linux/sched.h>	/* for mm_struct */
+#include <linux/bitops.h>
+#include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
+
+/*
+ * Next come the mappings that determine how mmap() protection bits
+ * (PROT_EXEC, PROT_READ, PROT_WRITE, PROT_NONE) get implemented.  The
+ * _P version gets used for a private shared memory segment, the _S
+ * version gets used for a shared memory segment with MAP_SHARED on.
+ * In a private shared memory segment, we do a copy-on-write if a task
+ * attempts to write to the page.
+ */
+	/* xwr */
+#define __P000	PAGE_NONE
+#define __P001	PAGE_READONLY
+#define __P010	PAGE_READONLY	/* write to priv pg -> copy & make writable */
+#define __P011	PAGE_READONLY	/* ditto */
+#define __P100	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_X_RX)
+#define __P101	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX)
+#define __P110	PAGE_COPY_EXEC
+#define __P111	PAGE_COPY_EXEC
+
+#define __S000	PAGE_NONE
+#define __S001	PAGE_READONLY
+#define __S010	PAGE_SHARED	/* we don't have (and don't need) write-only */
+#define __S011	PAGE_SHARED
+#define __S100	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_X_RX)
+#define __S101	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX)
+#define __S110	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RWX)
+#define __S111	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RWX)
+
+#define pgd_ERROR(e)	printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e))
+#ifdef CONFIG_PGTABLE_4
+#define pud_ERROR(e)	printk("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
+#endif
+#define pmd_ERROR(e)	printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pte_ERROR(e)	printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
+
+
+/*
+ * Some definitions to translate between mem_map, PTEs, and page addresses:
+ */
+
+
+/* Quick test to see if ADDR is a (potentially) valid physical address. */
+static inline long
+ia64_phys_addr_valid (unsigned long addr)
+{
+	return (addr & (local_cpu_data->unimpl_pa_mask)) == 0;
+}
+
+/*
+ * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
+ * memory.  For the return value to be meaningful, ADDR must be >=
+ * PAGE_OFFSET.  This operation can be relatively expensive (e.g.,
+ * require a hash-, or multi-level tree-lookup or something of that
+ * sort) but it guarantees to return TRUE only if accessing the page
+ * at that address does not cause an error.  Note that there may be
+ * addresses for which kern_addr_valid() returns FALSE even though an
+ * access would not cause an error (e.g., this is typically true for
+ * memory mapped I/O regions.
+ *
+ * XXX Need to implement this for IA-64.
+ */
+#define kern_addr_valid(addr)	(1)
+
+
+/*
+ * Now come the defines and routines to manage and access the three-level
+ * page table.
+ */
+
+
+#define VMALLOC_START		(RGN_BASE(RGN_GATE) + 0x200000000UL)
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+# define VMALLOC_END_INIT	(RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 9)))
+extern unsigned long VMALLOC_END;
+#else
+#if defined(CONFIG_SPARSEMEM) && defined(CONFIG_SPARSEMEM_VMEMMAP)
+/* SPARSEMEM_VMEMMAP uses half of vmalloc... */
+# define VMALLOC_END		(RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 10)))
+# define vmemmap		((struct page *)VMALLOC_END)
+#else
+# define VMALLOC_END		(RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 9)))
+#endif
+#endif
+
+/* fs/proc/kcore.c */
+#define	kc_vaddr_to_offset(v) ((v) - RGN_BASE(RGN_GATE))
+#define	kc_offset_to_vaddr(o) ((o) + RGN_BASE(RGN_GATE))
+
+#define RGN_MAP_SHIFT (PGDIR_SHIFT + PTRS_PER_PGD_SHIFT - 3)
+#define RGN_MAP_LIMIT	((1UL << RGN_MAP_SHIFT) - PAGE_SIZE)	/* per region addr limit */
+
+/*
+ * Conversion functions: convert page frame number (pfn) and a protection value to a page
+ * table entry (pte).
+ */
+#define pfn_pte(pfn, pgprot) \
+({ pte_t __pte; pte_val(__pte) = ((pfn) << PAGE_SHIFT) | pgprot_val(pgprot); __pte; })
+
+/* Extract pfn from pte.  */
+#define pte_pfn(_pte)		((pte_val(_pte) & _PFN_MASK) >> PAGE_SHIFT)
+
+#define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
+
+/* This takes a physical page address that is used by the remapping functions */
+#define mk_pte_phys(physpage, pgprot) \
+({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); __pte; })
+
+#define pte_modify(_pte, newprot) \
+	(__pte((pte_val(_pte) & ~_PAGE_CHG_MASK) | (pgprot_val(newprot) & _PAGE_CHG_MASK)))
+
+#define pte_none(pte) 			(!pte_val(pte))
+#define pte_present(pte)		(pte_val(pte) & (_PAGE_P | _PAGE_PROTNONE))
+#define pte_clear(mm,addr,pte)		(pte_val(*(pte)) = 0UL)
+/* pte_page() returns the "struct page *" corresponding to the PTE: */
+#define pte_page(pte)			virt_to_page(((pte_val(pte) & _PFN_MASK) + PAGE_OFFSET))
+
+#define pmd_none(pmd)			(!pmd_val(pmd))
+#define pmd_bad(pmd)			(!ia64_phys_addr_valid(pmd_val(pmd)))
+#define pmd_present(pmd)		(pmd_val(pmd) != 0UL)
+#define pmd_clear(pmdp)			(pmd_val(*(pmdp)) = 0UL)
+#define pmd_page_vaddr(pmd)		((unsigned long) __va(pmd_val(pmd) & _PFN_MASK))
+#define pmd_page(pmd)			virt_to_page((pmd_val(pmd) + PAGE_OFFSET))
+
+#define pud_none(pud)			(!pud_val(pud))
+#define pud_bad(pud)			(!ia64_phys_addr_valid(pud_val(pud)))
+#define pud_present(pud)		(pud_val(pud) != 0UL)
+#define pud_clear(pudp)			(pud_val(*(pudp)) = 0UL)
+#define pud_page_vaddr(pud)		((unsigned long) __va(pud_val(pud) & _PFN_MASK))
+#define pud_page(pud)			virt_to_page((pud_val(pud) + PAGE_OFFSET))
+
+#ifdef CONFIG_PGTABLE_4
+#define pgd_none(pgd)			(!pgd_val(pgd))
+#define pgd_bad(pgd)			(!ia64_phys_addr_valid(pgd_val(pgd)))
+#define pgd_present(pgd)		(pgd_val(pgd) != 0UL)
+#define pgd_clear(pgdp)			(pgd_val(*(pgdp)) = 0UL)
+#define pgd_page_vaddr(pgd)		((unsigned long) __va(pgd_val(pgd) & _PFN_MASK))
+#define pgd_page(pgd)			virt_to_page((pgd_val(pgd) + PAGE_OFFSET))
+#endif
+
+/*
+ * The following have defined behavior only work if pte_present() is true.
+ */
+#define pte_write(pte)	((unsigned) (((pte_val(pte) & _PAGE_AR_MASK) >> _PAGE_AR_SHIFT) - 2) <= 4)
+#define pte_exec(pte)		((pte_val(pte) & _PAGE_AR_RX) != 0)
+#define pte_dirty(pte)		((pte_val(pte) & _PAGE_D) != 0)
+#define pte_young(pte)		((pte_val(pte) & _PAGE_A) != 0)
+#define pte_file(pte)		((pte_val(pte) & _PAGE_FILE) != 0)
+#define pte_special(pte)	0
+
+/*
+ * Note: we convert AR_RWX to AR_RX and AR_RW to AR_R by clearing the 2nd bit in the
+ * access rights:
+ */
+#define pte_wrprotect(pte)	(__pte(pte_val(pte) & ~_PAGE_AR_RW))
+#define pte_mkwrite(pte)	(__pte(pte_val(pte) | _PAGE_AR_RW))
+#define pte_mkold(pte)		(__pte(pte_val(pte) & ~_PAGE_A))
+#define pte_mkyoung(pte)	(__pte(pte_val(pte) | _PAGE_A))
+#define pte_mkclean(pte)	(__pte(pte_val(pte) & ~_PAGE_D))
+#define pte_mkdirty(pte)	(__pte(pte_val(pte) | _PAGE_D))
+#define pte_mkhuge(pte)		(__pte(pte_val(pte)))
+#define pte_mkspecial(pte)	(pte)
+
+/*
+ * Because ia64's Icache and Dcache is not coherent (on a cpu), we need to
+ * sync icache and dcache when we insert *new* executable page.
+ *  __ia64_sync_icache_dcache() check Pg_arch_1 bit and flush icache
+ * if necessary.
+ *
+ *  set_pte() is also called by the kernel, but we can expect that the kernel
+ *  flushes icache explicitly if necessary.
+ */
+#define pte_present_exec_user(pte)\
+	((pte_val(pte) & (_PAGE_P | _PAGE_PL_MASK | _PAGE_AR_RX)) == \
+		(_PAGE_P | _PAGE_PL_3 | _PAGE_AR_RX))
+
+extern void __ia64_sync_icache_dcache(pte_t pteval);
+static inline void set_pte(pte_t *ptep, pte_t pteval)
+{
+	/* page is present && page is user  && page is executable
+	 * && (page swapin or new page or page migraton
+	 *	|| copy_on_write with page copying.)
+	 */
+	if (pte_present_exec_user(pteval) &&
+	    (!pte_present(*ptep) ||
+		pte_pfn(*ptep) != pte_pfn(pteval)))
+		/* load_module() calles flush_icache_range() explicitly*/
+		__ia64_sync_icache_dcache(pteval);
+	*ptep = pteval;
+}
+
+#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
+
+/*
+ * Make page protection values cacheable, uncacheable, or write-
+ * combining.  Note that "protection" is really a misnomer here as the
+ * protection value contains the memory attribute bits, dirty bits, and
+ * various other bits as well.
+ */
+#define pgprot_cacheable(prot)		__pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_WB)
+#define pgprot_noncached(prot)		__pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_UC)
+#define pgprot_writecombine(prot)	__pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_WC)
+
+struct file;
+extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+				     unsigned long size, pgprot_t vma_prot);
+#define __HAVE_PHYS_MEM_ACCESS_PROT
+
+static inline unsigned long
+pgd_index (unsigned long address)
+{
+	unsigned long region = address >> 61;
+	unsigned long l1index = (address >> PGDIR_SHIFT) & ((PTRS_PER_PGD >> 3) - 1);
+
+	return (region << (PAGE_SHIFT - 6)) | l1index;
+}
+
+/* The offset in the 1-level directory is given by the 3 region bits
+   (61..63) and the level-1 bits.  */
+static inline pgd_t*
+pgd_offset (const struct mm_struct *mm, unsigned long address)
+{
+	return mm->pgd + pgd_index(address);
+}
+
+/* In the kernel's mapped region we completely ignore the region number
+   (since we know it's in region number 5). */
+#define pgd_offset_k(addr) \
+	(init_mm.pgd + (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)))
+
+/* Look up a pgd entry in the gate area.  On IA-64, the gate-area
+   resides in the kernel-mapped segment, hence we use pgd_offset_k()
+   here.  */
+#define pgd_offset_gate(mm, addr)	pgd_offset_k(addr)
+
+#ifdef CONFIG_PGTABLE_4
+/* Find an entry in the second-level page table.. */
+#define pud_offset(dir,addr) \
+	((pud_t *) pgd_page_vaddr(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
+#endif
+
+/* Find an entry in the third-level page table.. */
+#define pmd_offset(dir,addr) \
+	((pmd_t *) pud_page_vaddr(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
+
+/*
+ * Find an entry in the third-level page table.  This looks more complicated than it
+ * should be because some platforms place page tables in high memory.
+ */
+#define pte_index(addr)	 	(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define pte_offset_kernel(dir,addr)	((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(addr))
+#define pte_offset_map(dir,addr)	pte_offset_kernel(dir, addr)
+#define pte_unmap(pte)			do { } while (0)
+
+/* atomic versions of the some PTE manipulations: */
+
+static inline int
+ptep_test_and_clear_young (struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+{
+#ifdef CONFIG_SMP
+	if (!pte_young(*ptep))
+		return 0;
+	return test_and_clear_bit(_PAGE_A_BIT, ptep);
+#else
+	pte_t pte = *ptep;
+	if (!pte_young(pte))
+		return 0;
+	set_pte_at(vma->vm_mm, addr, ptep, pte_mkold(pte));
+	return 1;
+#endif
+}
+
+static inline pte_t
+ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+#ifdef CONFIG_SMP
+	return __pte(xchg((long *) ptep, 0));
+#else
+	pte_t pte = *ptep;
+	pte_clear(mm, addr, ptep);
+	return pte;
+#endif
+}
+
+static inline void
+ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+#ifdef CONFIG_SMP
+	unsigned long new, old;
+
+	do {
+		old = pte_val(*ptep);
+		new = pte_val(pte_wrprotect(__pte (old)));
+	} while (cmpxchg((unsigned long *) ptep, old, new) != old);
+#else
+	pte_t old_pte = *ptep;
+	set_pte_at(mm, addr, ptep, pte_wrprotect(old_pte));
+#endif
+}
+
+static inline int
+pte_same (pte_t a, pte_t b)
+{
+	return pte_val(a) == pte_val(b);
+}
+
+#define update_mmu_cache(vma, address, ptep) do { } while (0)
+
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+extern void paging_init (void);
+
+/*
+ * Note: The macros below rely on the fact that MAX_SWAPFILES_SHIFT <= number of
+ *	 bits in the swap-type field of the swap pte.  It would be nice to
+ *	 enforce that, but we can't easily include <linux/swap.h> here.
+ *	 (Of course, better still would be to define MAX_SWAPFILES_SHIFT here...).
+ *
+ * Format of swap pte:
+ *	bit   0   : present bit (must be zero)
+ *	bit   1   : _PAGE_FILE (must be zero)
+ *	bits  2- 8: swap-type
+ *	bits  9-62: swap offset
+ *	bit  63   : _PAGE_PROTNONE bit
+ *
+ * Format of file pte:
+ *	bit   0   : present bit (must be zero)
+ *	bit   1   : _PAGE_FILE (must be one)
+ *	bits  2-62: file_offset/PAGE_SIZE
+ *	bit  63   : _PAGE_PROTNONE bit
+ */
+#define __swp_type(entry)		(((entry).val >> 2) & 0x7f)
+#define __swp_offset(entry)		(((entry).val << 1) >> 10)
+#define __swp_entry(type,offset)	((swp_entry_t) { ((type) << 2) | ((long) (offset) << 9) })
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)		((pte_t) { (x).val })
+
+#define PTE_FILE_MAX_BITS		61
+#define pte_to_pgoff(pte)		((pte_val(pte) << 1) >> 3)
+#define pgoff_to_pte(off)		((pte_t) { ((off) << 2) | _PAGE_FILE })
+
+#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)		\
+		remap_pfn_range(vma, vaddr, pfn, size, prot)
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
+extern struct page *zero_page_memmap_ptr;
+#define ZERO_PAGE(vaddr) (zero_page_memmap_ptr)
+
+/* We provide our own get_unmapped_area to cope with VA holes for userland */
+#define HAVE_ARCH_UNMAPPED_AREA
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define HUGETLB_PGDIR_SHIFT	(HPAGE_SHIFT + 2*(PAGE_SHIFT-3))
+#define HUGETLB_PGDIR_SIZE	(__IA64_UL(1) << HUGETLB_PGDIR_SHIFT)
+#define HUGETLB_PGDIR_MASK	(~(HUGETLB_PGDIR_SIZE-1))
+#endif
+
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+/*
+ * Update PTEP with ENTRY, which is guaranteed to be a less
+ * restrictive PTE.  That is, ENTRY may have the ACCESSED, DIRTY, and
+ * WRITABLE bits turned on, when the value at PTEP did not.  The
+ * WRITABLE bit may only be turned if SAFELY_WRITABLE is TRUE.
+ *
+ * SAFELY_WRITABLE is TRUE if we can update the value at PTEP without
+ * having to worry about races.  On SMP machines, there are only two
+ * cases where this is true:
+ *
+ *	(1) *PTEP has the PRESENT bit turned OFF
+ *	(2) ENTRY has the DIRTY bit turned ON
+ *
+ * On ia64, we could implement this routine with a cmpxchg()-loop
+ * which ORs in the _PAGE_A/_PAGE_D bit if they're set in ENTRY.
+ * However, like on x86, we can get a more streamlined version by
+ * observing that it is OK to drop ACCESSED bit updates when
+ * SAFELY_WRITABLE is FALSE.  Besides being rare, all that would do is
+ * result in an extra Access-bit fault, which would then turn on the
+ * ACCESSED bit in the low-level fault handler (iaccess_bit or
+ * daccess_bit in ivt.S).
+ */
+#ifdef CONFIG_SMP
+# define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __safely_writable) \
+({									\
+	int __changed = !pte_same(*(__ptep), __entry);			\
+	if (__changed && __safely_writable) {				\
+		set_pte(__ptep, __entry);				\
+		flush_tlb_page(__vma, __addr);				\
+	}								\
+	__changed;							\
+})
+#else
+# define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __safely_writable) \
+({									\
+	int __changed = !pte_same(*(__ptep), __entry);			\
+	if (__changed) {						\
+		set_pte_at((__vma)->vm_mm, (__addr), __ptep, __entry);	\
+		flush_tlb_page(__vma, __addr);				\
+	}								\
+	__changed;							\
+})
+#endif
+
+#  ifdef CONFIG_VIRTUAL_MEM_MAP
+  /* arch mem_map init routine is needed due to holes in a virtual mem_map */
+#   define __HAVE_ARCH_MEMMAP_INIT
+    extern void memmap_init (unsigned long size, int nid, unsigned long zone,
+			     unsigned long start_pfn);
+#  endif /* CONFIG_VIRTUAL_MEM_MAP */
+# endif /* !__ASSEMBLY__ */
+
+/*
+ * Identity-mapped regions use a large page size.  We'll call such large pages
+ * "granules".  If you can think of a better name that's unambiguous, let me
+ * know...
+ */
+#if defined(CONFIG_IA64_GRANULE_64MB)
+# define IA64_GRANULE_SHIFT	_PAGE_SIZE_64M
+#elif defined(CONFIG_IA64_GRANULE_16MB)
+# define IA64_GRANULE_SHIFT	_PAGE_SIZE_16M
+#endif
+#define IA64_GRANULE_SIZE	(1 << IA64_GRANULE_SHIFT)
+/*
+ * log2() of the page size we use to map the kernel image (IA64_TR_KERNEL):
+ */
+#define KERNEL_TR_PAGE_SHIFT	_PAGE_SIZE_64M
+#define KERNEL_TR_PAGE_SIZE	(1 << KERNEL_TR_PAGE_SHIFT)
+
+/*
+ * No page table caches to initialise
+ */
+#define pgtable_cache_init()	do { } while (0)
+
+/* These tell get_user_pages() that the first gate page is accessible from user-level.  */
+#define FIXADDR_USER_START	GATE_ADDR
+#ifdef HAVE_BUGGY_SEGREL
+# define FIXADDR_USER_END	(GATE_ADDR + 2*PAGE_SIZE)
+#else
+# define FIXADDR_USER_END	(GATE_ADDR + 2*PERCPU_PAGE_SIZE)
+#endif
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+#define __HAVE_ARCH_PTE_SAME
+#define __HAVE_ARCH_PGD_OFFSET_GATE
+
+
+#ifndef CONFIG_PGTABLE_4
+#include <asm-generic/pgtable-nopud.h>
+#endif
+#include <asm-generic/pgtable.h>
+
+#endif /* _ASM_IA64_PGTABLE_H */
diff --git a/arch/ia64/include/asm/poll.h b/arch/ia64/include/asm/poll.h
new file mode 100644
index 00000000..c98509d3
--- /dev/null
+++ b/arch/ia64/include/asm/poll.h
@@ -0,0 +1 @@
+#include <asm-generic/poll.h>
diff --git a/arch/ia64/include/asm/posix_types.h b/arch/ia64/include/asm/posix_types.h
new file mode 100644
index 00000000..17885567
--- /dev/null
+++ b/arch/ia64/include/asm/posix_types.h
@@ -0,0 +1,126 @@
+#ifndef _ASM_IA64_POSIX_TYPES_H
+#define _ASM_IA64_POSIX_TYPES_H
+
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc.  Also, we cannot
+ * assume GCC is being used.
+ *
+ * Based on <asm-alpha/posix_types.h>.
+ *
+ * Modified 1998-2000, 2003
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+
+typedef unsigned long	__kernel_ino_t;
+typedef unsigned int	__kernel_mode_t;
+typedef unsigned int	__kernel_nlink_t;
+typedef long		__kernel_off_t;
+typedef long long	__kernel_loff_t;
+typedef int		__kernel_pid_t;
+typedef int		__kernel_ipc_pid_t;
+typedef unsigned int	__kernel_uid_t;
+typedef unsigned int	__kernel_gid_t;
+typedef unsigned long	__kernel_size_t;
+typedef long		__kernel_ssize_t;
+typedef long		__kernel_ptrdiff_t;
+typedef long		__kernel_time_t;
+typedef long		__kernel_suseconds_t;
+typedef long		__kernel_clock_t;
+typedef int		__kernel_timer_t;
+typedef int		__kernel_clockid_t;
+typedef int		__kernel_daddr_t;
+typedef char *		__kernel_caddr_t;
+typedef unsigned long	__kernel_sigset_t;	/* at least 32 bits */
+typedef unsigned short	__kernel_uid16_t;
+typedef unsigned short	__kernel_gid16_t;
+
+typedef struct {
+	int	val[2];
+} __kernel_fsid_t;
+
+typedef __kernel_uid_t __kernel_old_uid_t;
+typedef __kernel_gid_t __kernel_old_gid_t;
+typedef __kernel_uid_t __kernel_uid32_t;
+typedef __kernel_gid_t __kernel_gid32_t;
+
+typedef unsigned int	__kernel_old_dev_t;
+
+# ifdef __KERNEL__
+
+#  ifndef __GNUC__
+
+#define	__FD_SET(d, set)	((set)->fds_bits[__FDELT(d)] |= __FDMASK(d))
+#define	__FD_CLR(d, set)	((set)->fds_bits[__FDELT(d)] &= ~__FDMASK(d))
+#define	__FD_ISSET(d, set)	(((set)->fds_bits[__FDELT(d)] & __FDMASK(d)) != 0)
+#define	__FD_ZERO(set)	\
+  ((void) memset ((void *) (set), 0, sizeof (__kernel_fd_set)))
+
+#  else /* !__GNUC__ */
+
+/* With GNU C, use inline functions instead so args are evaluated only once: */
+
+#undef __FD_SET
+static __inline__ void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp)
+{
+	unsigned long _tmp = fd / __NFDBITS;
+	unsigned long _rem = fd % __NFDBITS;
+	fdsetp->fds_bits[_tmp] |= (1UL<<_rem);
+}
+
+#undef __FD_CLR
+static __inline__ void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp)
+{
+	unsigned long _tmp = fd / __NFDBITS;
+	unsigned long _rem = fd % __NFDBITS;
+	fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem);
+}
+
+#undef __FD_ISSET
+static __inline__ int __FD_ISSET(unsigned long fd, const __kernel_fd_set *p)
+{ 
+	unsigned long _tmp = fd / __NFDBITS;
+	unsigned long _rem = fd % __NFDBITS;
+	return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0;
+}
+
+/*
+ * This will unroll the loop for the normal constant case (8 ints,
+ * for a 256-bit fd_set)
+ */
+#undef __FD_ZERO
+static __inline__ void __FD_ZERO(__kernel_fd_set *p)
+{
+	unsigned long *tmp = p->fds_bits;
+	int i;
+
+	if (__builtin_constant_p(__FDSET_LONGS)) {
+		switch (__FDSET_LONGS) {
+		      case 16:
+			tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
+			tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
+			tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0;
+			tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0;
+			return;
+
+		      case 8:
+			tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
+			tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
+			return;
+
+		      case 4:
+			tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
+			return;
+		}
+	}
+	i = __FDSET_LONGS;
+	while (i) {
+		i--;
+		*tmp = 0;
+		tmp++;
+	}
+}
+
+#  endif /* !__GNUC__ */
+# endif /* __KERNEL__ */
+#endif /* _ASM_IA64_POSIX_TYPES_H */
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
new file mode 100644
index 00000000..03afe797
--- /dev/null
+++ b/arch/ia64/include/asm/processor.h
@@ -0,0 +1,726 @@
+#ifndef _ASM_IA64_PROCESSOR_H
+#define _ASM_IA64_PROCESSOR_H
+
+/*
+ * Copyright (C) 1998-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ *
+ * 11/24/98	S.Eranian	added ia64_set_iva()
+ * 12/03/99	D. Mosberger	implement thread_saved_pc() via kernel unwind API
+ * 06/16/00	A. Mallick	added csd/ssd/tssd for ia32 support
+ */
+
+
+#include <asm/intrinsics.h>
+#include <asm/kregs.h>
+#include <asm/ptrace.h>
+#include <asm/ustack.h>
+
+#define IA64_NUM_PHYS_STACK_REG	96
+#define IA64_NUM_DBG_REGS	8
+
+#define DEFAULT_MAP_BASE	__IA64_UL_CONST(0x2000000000000000)
+#define DEFAULT_TASK_SIZE	__IA64_UL_CONST(0xa000000000000000)
+
+/*
+ * TASK_SIZE really is a mis-named.  It really is the maximum user
+ * space address (plus one).  On IA-64, there are five regions of 2TB
+ * each (assuming 8KB page size), for a total of 8TB of user virtual
+ * address space.
+ */
+#define TASK_SIZE_OF(tsk)	((tsk)->thread.task_size)
+#define TASK_SIZE       	TASK_SIZE_OF(current)
+
+/*
+ * This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_BASE	(current->thread.map_base)
+
+#define IA64_THREAD_FPH_VALID	(__IA64_UL(1) << 0)	/* floating-point high state valid? */
+#define IA64_THREAD_DBG_VALID	(__IA64_UL(1) << 1)	/* debug registers valid? */
+#define IA64_THREAD_PM_VALID	(__IA64_UL(1) << 2)	/* performance registers valid? */
+#define IA64_THREAD_UAC_NOPRINT	(__IA64_UL(1) << 3)	/* don't log unaligned accesses */
+#define IA64_THREAD_UAC_SIGBUS	(__IA64_UL(1) << 4)	/* generate SIGBUS on unaligned acc. */
+#define IA64_THREAD_MIGRATION	(__IA64_UL(1) << 5)	/* require migration
+							   sync at ctx sw */
+#define IA64_THREAD_FPEMU_NOPRINT (__IA64_UL(1) << 6)	/* don't log any fpswa faults */
+#define IA64_THREAD_FPEMU_SIGFPE  (__IA64_UL(1) << 7)	/* send a SIGFPE for fpswa faults */
+
+#define IA64_THREAD_UAC_SHIFT	3
+#define IA64_THREAD_UAC_MASK	(IA64_THREAD_UAC_NOPRINT | IA64_THREAD_UAC_SIGBUS)
+#define IA64_THREAD_FPEMU_SHIFT	6
+#define IA64_THREAD_FPEMU_MASK	(IA64_THREAD_FPEMU_NOPRINT | IA64_THREAD_FPEMU_SIGFPE)
+
+
+/*
+ * This shift should be large enough to be able to represent 1000000000/itc_freq with good
+ * accuracy while being small enough to fit 10*1000000000<<IA64_NSEC_PER_CYC_SHIFT in 64 bits
+ * (this will give enough slack to represent 10 seconds worth of time as a scaled number).
+ */
+#define IA64_NSEC_PER_CYC_SHIFT	30
+
+#ifndef __ASSEMBLY__
+
+#include <linux/cache.h>
+#include <linux/compiler.h>
+#include <linux/threads.h>
+#include <linux/types.h>
+
+#include <asm/fpu.h>
+#include <asm/page.h>
+#include <asm/percpu.h>
+#include <asm/rse.h>
+#include <asm/unwind.h>
+#include <asm/atomic.h>
+#ifdef CONFIG_NUMA
+#include <asm/nodedata.h>
+#endif
+
+/* like above but expressed as bitfields for more efficient access: */
+struct ia64_psr {
+	__u64 reserved0 : 1;
+	__u64 be : 1;
+	__u64 up : 1;
+	__u64 ac : 1;
+	__u64 mfl : 1;
+	__u64 mfh : 1;
+	__u64 reserved1 : 7;
+	__u64 ic : 1;
+	__u64 i : 1;
+	__u64 pk : 1;
+	__u64 reserved2 : 1;
+	__u64 dt : 1;
+	__u64 dfl : 1;
+	__u64 dfh : 1;
+	__u64 sp : 1;
+	__u64 pp : 1;
+	__u64 di : 1;
+	__u64 si : 1;
+	__u64 db : 1;
+	__u64 lp : 1;
+	__u64 tb : 1;
+	__u64 rt : 1;
+	__u64 reserved3 : 4;
+	__u64 cpl : 2;
+	__u64 is : 1;
+	__u64 mc : 1;
+	__u64 it : 1;
+	__u64 id : 1;
+	__u64 da : 1;
+	__u64 dd : 1;
+	__u64 ss : 1;
+	__u64 ri : 2;
+	__u64 ed : 1;
+	__u64 bn : 1;
+	__u64 reserved4 : 19;
+};
+
+union ia64_isr {
+	__u64  val;
+	struct {
+		__u64 code : 16;
+		__u64 vector : 8;
+		__u64 reserved1 : 8;
+		__u64 x : 1;
+		__u64 w : 1;
+		__u64 r : 1;
+		__u64 na : 1;
+		__u64 sp : 1;
+		__u64 rs : 1;
+		__u64 ir : 1;
+		__u64 ni : 1;
+		__u64 so : 1;
+		__u64 ei : 2;
+		__u64 ed : 1;
+		__u64 reserved2 : 20;
+	};
+};
+
+union ia64_lid {
+	__u64 val;
+	struct {
+		__u64  rv  : 16;
+		__u64  eid : 8;
+		__u64  id  : 8;
+		__u64  ig  : 32;
+	};
+};
+
+union ia64_tpr {
+	__u64 val;
+	struct {
+		__u64 ig0 : 4;
+		__u64 mic : 4;
+		__u64 rsv : 8;
+		__u64 mmi : 1;
+		__u64 ig1 : 47;
+	};
+};
+
+union ia64_itir {
+	__u64 val;
+	struct {
+		__u64 rv3  :  2; /* 0-1 */
+		__u64 ps   :  6; /* 2-7 */
+		__u64 key  : 24; /* 8-31 */
+		__u64 rv4  : 32; /* 32-63 */
+	};
+};
+
+union  ia64_rr {
+	__u64 val;
+	struct {
+		__u64  ve	:  1;  /* enable hw walker */
+		__u64  reserved0:  1;  /* reserved */
+		__u64  ps	:  6;  /* log page size */
+		__u64  rid	: 24;  /* region id */
+		__u64  reserved1: 32;  /* reserved */
+	};
+};
+
+/*
+ * CPU type, hardware bug flags, and per-CPU state.  Frequently used
+ * state comes earlier:
+ */
+struct cpuinfo_ia64 {
+	unsigned int softirq_pending;
+	unsigned long itm_delta;	/* # of clock cycles between clock ticks */
+	unsigned long itm_next;		/* interval timer mask value to use for next clock tick */
+	unsigned long nsec_per_cyc;	/* (1000000000<<IA64_NSEC_PER_CYC_SHIFT)/itc_freq */
+	unsigned long unimpl_va_mask;	/* mask of unimplemented virtual address bits (from PAL) */
+	unsigned long unimpl_pa_mask;	/* mask of unimplemented physical address bits (from PAL) */
+	unsigned long itc_freq;		/* frequency of ITC counter */
+	unsigned long proc_freq;	/* frequency of processor */
+	unsigned long cyc_per_usec;	/* itc_freq/1000000 */
+	unsigned long ptce_base;
+	unsigned int ptce_count[2];
+	unsigned int ptce_stride[2];
+	struct task_struct *ksoftirqd;	/* kernel softirq daemon for this CPU */
+
+#ifdef CONFIG_SMP
+	unsigned long loops_per_jiffy;
+	int cpu;
+	unsigned int socket_id;	/* physical processor socket id */
+	unsigned short core_id;	/* core id */
+	unsigned short thread_id; /* thread id */
+	unsigned short num_log;	/* Total number of logical processors on
+				 * this socket that were successfully booted */
+	unsigned char cores_per_socket;	/* Cores per processor socket */
+	unsigned char threads_per_core;	/* Threads per core */
+#endif
+
+	/* CPUID-derived information: */
+	unsigned long ppn;
+	unsigned long features;
+	unsigned char number;
+	unsigned char revision;
+	unsigned char model;
+	unsigned char family;
+	unsigned char archrev;
+	char vendor[16];
+	char *model_name;
+
+#ifdef CONFIG_NUMA
+	struct ia64_node_data *node_data;
+#endif
+};
+
+DECLARE_PER_CPU(struct cpuinfo_ia64, ia64_cpu_info);
+
+/*
+ * The "local" data variable.  It refers to the per-CPU data of the currently executing
+ * CPU, much like "current" points to the per-task data of the currently executing task.
+ * Do not use the address of local_cpu_data, since it will be different from
+ * cpu_data(smp_processor_id())!
+ */
+#define local_cpu_data		(&__ia64_per_cpu_var(ia64_cpu_info))
+#define cpu_data(cpu)		(&per_cpu(ia64_cpu_info, cpu))
+
+extern void print_cpu_info (struct cpuinfo_ia64 *);
+
+typedef struct {
+	unsigned long seg;
+} mm_segment_t;
+
+#define SET_UNALIGN_CTL(task,value)								\
+({												\
+	(task)->thread.flags = (((task)->thread.flags & ~IA64_THREAD_UAC_MASK)			\
+				| (((value) << IA64_THREAD_UAC_SHIFT) & IA64_THREAD_UAC_MASK));	\
+	0;											\
+})
+#define GET_UNALIGN_CTL(task,addr)								\
+({												\
+	put_user(((task)->thread.flags & IA64_THREAD_UAC_MASK) >> IA64_THREAD_UAC_SHIFT,	\
+		 (int __user *) (addr));							\
+})
+
+#define SET_FPEMU_CTL(task,value)								\
+({												\
+	(task)->thread.flags = (((task)->thread.flags & ~IA64_THREAD_FPEMU_MASK)		\
+			  | (((value) << IA64_THREAD_FPEMU_SHIFT) & IA64_THREAD_FPEMU_MASK));	\
+	0;											\
+})
+#define GET_FPEMU_CTL(task,addr)								\
+({												\
+	put_user(((task)->thread.flags & IA64_THREAD_FPEMU_MASK) >> IA64_THREAD_FPEMU_SHIFT,	\
+		 (int __user *) (addr));							\
+})
+
+struct thread_struct {
+	__u32 flags;			/* various thread flags (see IA64_THREAD_*) */
+	/* writing on_ustack is performance-critical, so it's worth spending 8 bits on it... */
+	__u8 on_ustack;			/* executing on user-stacks? */
+	__u8 pad[3];
+	__u64 ksp;			/* kernel stack pointer */
+	__u64 map_base;			/* base address for get_unmapped_area() */
+	__u64 task_size;		/* limit for task size */
+	__u64 rbs_bot;			/* the base address for the RBS */
+	int last_fph_cpu;		/* CPU that may hold the contents of f32-f127 */
+
+#ifdef CONFIG_PERFMON
+	void *pfm_context;		     /* pointer to detailed PMU context */
+	unsigned long pfm_needs_checking;    /* when >0, pending perfmon work on kernel exit */
+# define INIT_THREAD_PM		.pfm_context =		NULL,     \
+				.pfm_needs_checking =	0UL,
+#else
+# define INIT_THREAD_PM
+#endif
+	unsigned long dbr[IA64_NUM_DBG_REGS];
+	unsigned long ibr[IA64_NUM_DBG_REGS];
+	struct ia64_fpreg fph[96];	/* saved/loaded on demand */
+};
+
+#define INIT_THREAD {						\
+	.flags =	0,					\
+	.on_ustack =	0,					\
+	.ksp =		0,					\
+	.map_base =	DEFAULT_MAP_BASE,			\
+	.rbs_bot =	STACK_TOP - DEFAULT_USER_STACK_SIZE,	\
+	.task_size =	DEFAULT_TASK_SIZE,			\
+	.last_fph_cpu =  -1,					\
+	INIT_THREAD_PM						\
+	.dbr =		{0, },					\
+	.ibr =		{0, },					\
+	.fph =		{{{{0}}}, }				\
+}
+
+#define start_thread(regs,new_ip,new_sp) do {							\
+	set_fs(USER_DS);									\
+	regs->cr_ipsr = ((regs->cr_ipsr | (IA64_PSR_BITS_TO_SET | IA64_PSR_CPL))		\
+			 & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS));		\
+	regs->cr_iip = new_ip;									\
+	regs->ar_rsc = 0xf;		/* eager mode, privilege level 3 */			\
+	regs->ar_rnat = 0;									\
+	regs->ar_bspstore = current->thread.rbs_bot;						\
+	regs->ar_fpsr = FPSR_DEFAULT;								\
+	regs->loadrs = 0;									\
+	regs->r8 = get_dumpable(current->mm);	/* set "don't zap registers" flag */		\
+	regs->r12 = new_sp - 16;	/* allocate 16 byte scratch area */			\
+	if (unlikely(!get_dumpable(current->mm))) {							\
+		/*										\
+		 * Zap scratch regs to avoid leaking bits between processes with different	\
+		 * uid/privileges.								\
+		 */										\
+		regs->ar_pfs = 0; regs->b0 = 0; regs->pr = 0;					\
+		regs->r1 = 0; regs->r9  = 0; regs->r11 = 0; regs->r13 = 0; regs->r15 = 0;	\
+	}											\
+} while (0)
+
+/* Forward declarations, a strange C thing... */
+struct mm_struct;
+struct task_struct;
+
+/*
+ * Free all resources held by a thread. This is called after the
+ * parent of DEAD_TASK has collected the exit status of the task via
+ * wait().
+ */
+#define release_thread(dead_task)
+
+/* Prepare to copy thread state - unlazy all lazy status */
+#define prepare_to_copy(tsk)	do { } while (0)
+
+/*
+ * This is the mechanism for creating a new kernel thread.
+ *
+ * NOTE 1: Only a kernel-only process (ie the swapper or direct
+ * descendants who haven't done an "execve()") should use this: it
+ * will work within a system call from a "real" process, but the
+ * process memory space will not be free'd until both the parent and
+ * the child have exited.
+ *
+ * NOTE 2: This MUST NOT be an inlined function.  Otherwise, we get
+ * into trouble in init/main.c when the child thread returns to
+ * do_basic_setup() and the timing is such that free_initmem() has
+ * been called already.
+ */
+extern pid_t kernel_thread (int (*fn)(void *), void *arg, unsigned long flags);
+
+/* Get wait channel for task P.  */
+extern unsigned long get_wchan (struct task_struct *p);
+
+/* Return instruction pointer of blocked task TSK.  */
+#define KSTK_EIP(tsk)					\
+  ({							\
+	struct pt_regs *_regs = task_pt_regs(tsk);	\
+	_regs->cr_iip + ia64_psr(_regs)->ri;		\
+  })
+
+/* Return stack pointer of blocked task TSK.  */
+#define KSTK_ESP(tsk)  ((tsk)->thread.ksp)
+
+extern void ia64_getreg_unknown_kr (void);
+extern void ia64_setreg_unknown_kr (void);
+
+#define ia64_get_kr(regnum)					\
+({								\
+	unsigned long r = 0;					\
+								\
+	switch (regnum) {					\
+	    case 0: r = ia64_getreg(_IA64_REG_AR_KR0); break;	\
+	    case 1: r = ia64_getreg(_IA64_REG_AR_KR1); break;	\
+	    case 2: r = ia64_getreg(_IA64_REG_AR_KR2); break;	\
+	    case 3: r = ia64_getreg(_IA64_REG_AR_KR3); break;	\
+	    case 4: r = ia64_getreg(_IA64_REG_AR_KR4); break;	\
+	    case 5: r = ia64_getreg(_IA64_REG_AR_KR5); break;	\
+	    case 6: r = ia64_getreg(_IA64_REG_AR_KR6); break;	\
+	    case 7: r = ia64_getreg(_IA64_REG_AR_KR7); break;	\
+	    default: ia64_getreg_unknown_kr(); break;		\
+	}							\
+	r;							\
+})
+
+#define ia64_set_kr(regnum, r) 					\
+({								\
+	switch (regnum) {					\
+	    case 0: ia64_setreg(_IA64_REG_AR_KR0, r); break;	\
+	    case 1: ia64_setreg(_IA64_REG_AR_KR1, r); break;	\
+	    case 2: ia64_setreg(_IA64_REG_AR_KR2, r); break;	\
+	    case 3: ia64_setreg(_IA64_REG_AR_KR3, r); break;	\
+	    case 4: ia64_setreg(_IA64_REG_AR_KR4, r); break;	\
+	    case 5: ia64_setreg(_IA64_REG_AR_KR5, r); break;	\
+	    case 6: ia64_setreg(_IA64_REG_AR_KR6, r); break;	\
+	    case 7: ia64_setreg(_IA64_REG_AR_KR7, r); break;	\
+	    default: ia64_setreg_unknown_kr(); break;		\
+	}							\
+})
+
+/*
+ * The following three macros can't be inline functions because we don't have struct
+ * task_struct at this point.
+ */
+
+/*
+ * Return TRUE if task T owns the fph partition of the CPU we're running on.
+ * Must be called from code that has preemption disabled.
+ */
+#define ia64_is_local_fpu_owner(t)								\
+({												\
+	struct task_struct *__ia64_islfo_task = (t);						\
+	(__ia64_islfo_task->thread.last_fph_cpu == smp_processor_id()				\
+	 && __ia64_islfo_task == (struct task_struct *) ia64_get_kr(IA64_KR_FPU_OWNER));	\
+})
+
+/*
+ * Mark task T as owning the fph partition of the CPU we're running on.
+ * Must be called from code that has preemption disabled.
+ */
+#define ia64_set_local_fpu_owner(t) do {						\
+	struct task_struct *__ia64_slfo_task = (t);					\
+	__ia64_slfo_task->thread.last_fph_cpu = smp_processor_id();			\
+	ia64_set_kr(IA64_KR_FPU_OWNER, (unsigned long) __ia64_slfo_task);		\
+} while (0)
+
+/* Mark the fph partition of task T as being invalid on all CPUs.  */
+#define ia64_drop_fpu(t)	((t)->thread.last_fph_cpu = -1)
+
+extern void __ia64_init_fpu (void);
+extern void __ia64_save_fpu (struct ia64_fpreg *fph);
+extern void __ia64_load_fpu (struct ia64_fpreg *fph);
+extern void ia64_save_debug_regs (unsigned long *save_area);
+extern void ia64_load_debug_regs (unsigned long *save_area);
+
+#define ia64_fph_enable()	do { ia64_rsm(IA64_PSR_DFH); ia64_srlz_d(); } while (0)
+#define ia64_fph_disable()	do { ia64_ssm(IA64_PSR_DFH); ia64_srlz_d(); } while (0)
+
+/* load fp 0.0 into fph */
+static inline void
+ia64_init_fpu (void) {
+	ia64_fph_enable();
+	__ia64_init_fpu();
+	ia64_fph_disable();
+}
+
+/* save f32-f127 at FPH */
+static inline void
+ia64_save_fpu (struct ia64_fpreg *fph) {
+	ia64_fph_enable();
+	__ia64_save_fpu(fph);
+	ia64_fph_disable();
+}
+
+/* load f32-f127 from FPH */
+static inline void
+ia64_load_fpu (struct ia64_fpreg *fph) {
+	ia64_fph_enable();
+	__ia64_load_fpu(fph);
+	ia64_fph_disable();
+}
+
+static inline __u64
+ia64_clear_ic (void)
+{
+	__u64 psr;
+	psr = ia64_getreg(_IA64_REG_PSR);
+	ia64_stop();
+	ia64_rsm(IA64_PSR_I | IA64_PSR_IC);
+	ia64_srlz_i();
+	return psr;
+}
+
+/*
+ * Restore the psr.
+ */
+static inline void
+ia64_set_psr (__u64 psr)
+{
+	ia64_stop();
+	ia64_setreg(_IA64_REG_PSR_L, psr);
+	ia64_srlz_i();
+}
+
+/*
+ * Insert a translation into an instruction and/or data translation
+ * register.
+ */
+static inline void
+ia64_itr (__u64 target_mask, __u64 tr_num,
+	  __u64 vmaddr, __u64 pte,
+	  __u64 log_page_size)
+{
+	ia64_setreg(_IA64_REG_CR_ITIR, (log_page_size << 2));
+	ia64_setreg(_IA64_REG_CR_IFA, vmaddr);
+	ia64_stop();
+	if (target_mask & 0x1)
+		ia64_itri(tr_num, pte);
+	if (target_mask & 0x2)
+		ia64_itrd(tr_num, pte);
+}
+
+/*
+ * Insert a translation into the instruction and/or data translation
+ * cache.
+ */
+static inline void
+ia64_itc (__u64 target_mask, __u64 vmaddr, __u64 pte,
+	  __u64 log_page_size)
+{
+	ia64_setreg(_IA64_REG_CR_ITIR, (log_page_size << 2));
+	ia64_setreg(_IA64_REG_CR_IFA, vmaddr);
+	ia64_stop();
+	/* as per EAS2.6, itc must be the last instruction in an instruction group */
+	if (target_mask & 0x1)
+		ia64_itci(pte);
+	if (target_mask & 0x2)
+		ia64_itcd(pte);
+}
+
+/*
+ * Purge a range of addresses from instruction and/or data translation
+ * register(s).
+ */
+static inline void
+ia64_ptr (__u64 target_mask, __u64 vmaddr, __u64 log_size)
+{
+	if (target_mask & 0x1)
+		ia64_ptri(vmaddr, (log_size << 2));
+	if (target_mask & 0x2)
+		ia64_ptrd(vmaddr, (log_size << 2));
+}
+
+/* Set the interrupt vector address.  The address must be suitably aligned (32KB).  */
+static inline void
+ia64_set_iva (void *ivt_addr)
+{
+	ia64_setreg(_IA64_REG_CR_IVA, (__u64) ivt_addr);
+	ia64_srlz_i();
+}
+
+/* Set the page table address and control bits.  */
+static inline void
+ia64_set_pta (__u64 pta)
+{
+	/* Note: srlz.i implies srlz.d */
+	ia64_setreg(_IA64_REG_CR_PTA, pta);
+	ia64_srlz_i();
+}
+
+static inline void
+ia64_eoi (void)
+{
+	ia64_setreg(_IA64_REG_CR_EOI, 0);
+	ia64_srlz_d();
+}
+
+#define cpu_relax()	ia64_hint(ia64_hint_pause)
+
+static inline int
+ia64_get_irr(unsigned int vector)
+{
+	unsigned int reg = vector / 64;
+	unsigned int bit = vector % 64;
+	u64 irr;
+
+	switch (reg) {
+	case 0: irr = ia64_getreg(_IA64_REG_CR_IRR0); break;
+	case 1: irr = ia64_getreg(_IA64_REG_CR_IRR1); break;
+	case 2: irr = ia64_getreg(_IA64_REG_CR_IRR2); break;
+	case 3: irr = ia64_getreg(_IA64_REG_CR_IRR3); break;
+	}
+
+	return test_bit(bit, &irr);
+}
+
+static inline void
+ia64_set_lrr0 (unsigned long val)
+{
+	ia64_setreg(_IA64_REG_CR_LRR0, val);
+	ia64_srlz_d();
+}
+
+static inline void
+ia64_set_lrr1 (unsigned long val)
+{
+	ia64_setreg(_IA64_REG_CR_LRR1, val);
+	ia64_srlz_d();
+}
+
+
+/*
+ * Given the address to which a spill occurred, return the unat bit
+ * number that corresponds to this address.
+ */
+static inline __u64
+ia64_unat_pos (void *spill_addr)
+{
+	return ((__u64) spill_addr >> 3) & 0x3f;
+}
+
+/*
+ * Set the NaT bit of an integer register which was spilled at address
+ * SPILL_ADDR.  UNAT is the mask to be updated.
+ */
+static inline void
+ia64_set_unat (__u64 *unat, void *spill_addr, unsigned long nat)
+{
+	__u64 bit = ia64_unat_pos(spill_addr);
+	__u64 mask = 1UL << bit;
+
+	*unat = (*unat & ~mask) | (nat << bit);
+}
+
+/*
+ * Return saved PC of a blocked thread.
+ * Note that the only way T can block is through a call to schedule() -> switch_to().
+ */
+static inline unsigned long
+thread_saved_pc (struct task_struct *t)
+{
+	struct unw_frame_info info;
+	unsigned long ip;
+
+	unw_init_from_blocked_task(&info, t);
+	if (unw_unwind(&info) < 0)
+		return 0;
+	unw_get_ip(&info, &ip);
+	return ip;
+}
+
+/*
+ * Get the current instruction/program counter value.
+ */
+#define current_text_addr() \
+	({ void *_pc; _pc = (void *)ia64_getreg(_IA64_REG_IP); _pc; })
+
+static inline __u64
+ia64_get_ivr (void)
+{
+	__u64 r;
+	ia64_srlz_d();
+	r = ia64_getreg(_IA64_REG_CR_IVR);
+	ia64_srlz_d();
+	return r;
+}
+
+static inline void
+ia64_set_dbr (__u64 regnum, __u64 value)
+{
+	__ia64_set_dbr(regnum, value);
+#ifdef CONFIG_ITANIUM
+	ia64_srlz_d();
+#endif
+}
+
+static inline __u64
+ia64_get_dbr (__u64 regnum)
+{
+	__u64 retval;
+
+	retval = __ia64_get_dbr(regnum);
+#ifdef CONFIG_ITANIUM
+	ia64_srlz_d();
+#endif
+	return retval;
+}
+
+static inline __u64
+ia64_rotr (__u64 w, __u64 n)
+{
+	return (w >> n) | (w << (64 - n));
+}
+
+#define ia64_rotl(w,n)	ia64_rotr((w), (64) - (n))
+
+/*
+ * Take a mapped kernel address and return the equivalent address
+ * in the region 7 identity mapped virtual area.
+ */
+static inline void *
+ia64_imva (void *addr)
+{
+	void *result;
+	result = (void *) ia64_tpa(addr);
+	return __va(result);
+}
+
+#define ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCHW
+#define ARCH_HAS_SPINLOCK_PREFETCH
+#define PREFETCH_STRIDE			L1_CACHE_BYTES
+
+static inline void
+prefetch (const void *x)
+{
+	 ia64_lfetch(ia64_lfhint_none, x);
+}
+
+static inline void
+prefetchw (const void *x)
+{
+	ia64_lfetch_excl(ia64_lfhint_none, x);
+}
+
+#define spin_lock_prefetch(x)	prefetchw(x)
+
+extern unsigned long boot_option_idle_override;
+
+enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_FORCE_MWAIT,
+			 IDLE_NOMWAIT, IDLE_POLL};
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_IA64_PROCESSOR_H */
diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h
new file mode 100644
index 00000000..7ae9c3f1
--- /dev/null
+++ b/arch/ia64/include/asm/ptrace.h
@@ -0,0 +1,366 @@
+#ifndef _ASM_IA64_PTRACE_H
+#define _ASM_IA64_PTRACE_H
+
+/*
+ * Copyright (C) 1998-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2003 Intel Co
+ *	Suresh Siddha <suresh.b.siddha@intel.com>
+ *	Fenghua Yu <fenghua.yu@intel.com>
+ *	Arun Sharma <arun.sharma@intel.com>
+ *
+ * 12/07/98	S. Eranian	added pt_regs & switch_stack
+ * 12/21/98	D. Mosberger	updated to match latest code
+ *  6/17/99	D. Mosberger	added second unat member to "struct switch_stack"
+ *
+ */
+/*
+ * When a user process is blocked, its state looks as follows:
+ *
+ *            +----------------------+	-------	IA64_STK_OFFSET
+ *     	      |			     |	 ^
+ *            | struct pt_regs       |	 |
+ *	      |			     |	 |
+ *            +----------------------+	 |
+ *	      |			     |	 |
+ *     	      |	   memory stack	     |	 |
+ *	      |	(growing downwards)  |	 |
+ *	      //.....................//	 |
+ *					 |
+ *	      //.....................//	 |
+ *	      |			     |	 |
+ *            +----------------------+	 |
+ *            | struct switch_stack  |	 |
+ *	      |			     |	 |
+ *	      +----------------------+	 |
+ *	      |			     |	 |
+ *	      //.....................//	 |
+ *					 |
+ *	      //.....................//	 |
+ *	      |			     |	 |
+ *	      |	 register stack	     |	 |
+ *	      |	(growing upwards)    |	 |
+ *            |			     |	 |
+ *	      +----------------------+	 |  ---	IA64_RBS_OFFSET
+ *            |  struct thread_info  |	 |  ^
+ *	      +----------------------+	 |  |
+ *	      |			     |	 |  |
+ *            |  struct task_struct  |	 |  |
+ * current -> |			     |   |  |
+ *	      +----------------------+ -------
+ *
+ * Note that ar.ec is not saved explicitly in pt_reg or switch_stack.
+ * This is because ar.ec is saved as part of ar.pfs.
+ */
+
+
+#include <asm/fpu.h>
+
+#ifdef __KERNEL__
+#ifndef ASM_OFFSETS_C
+#include <asm/asm-offsets.h>
+#endif
+
+/*
+ * Base-2 logarithm of number of pages to allocate per task structure
+ * (including register backing store and memory stack):
+ */
+#if defined(CONFIG_IA64_PAGE_SIZE_4KB)
+# define KERNEL_STACK_SIZE_ORDER		3
+#elif defined(CONFIG_IA64_PAGE_SIZE_8KB)
+# define KERNEL_STACK_SIZE_ORDER		2
+#elif defined(CONFIG_IA64_PAGE_SIZE_16KB)
+# define KERNEL_STACK_SIZE_ORDER		1
+#else
+# define KERNEL_STACK_SIZE_ORDER		0
+#endif
+
+#define IA64_RBS_OFFSET			((IA64_TASK_SIZE + IA64_THREAD_INFO_SIZE + 31) & ~31)
+#define IA64_STK_OFFSET			((1 << KERNEL_STACK_SIZE_ORDER)*PAGE_SIZE)
+
+#define KERNEL_STACK_SIZE		IA64_STK_OFFSET
+
+#endif /* __KERNEL__ */
+
+#ifndef __ASSEMBLY__
+
+/*
+ * This struct defines the way the registers are saved on system
+ * calls.
+ *
+ * We don't save all floating point register because the kernel
+ * is compiled to use only a very small subset, so the other are
+ * untouched.
+ *
+ * THIS STRUCTURE MUST BE A MULTIPLE 16-BYTE IN SIZE
+ * (because the memory stack pointer MUST ALWAYS be aligned this way)
+ *
+ */
+struct pt_regs {
+	/* The following registers are saved by SAVE_MIN: */
+	unsigned long b6;		/* scratch */
+	unsigned long b7;		/* scratch */
+
+	unsigned long ar_csd;           /* used by cmp8xchg16 (scratch) */
+	unsigned long ar_ssd;           /* reserved for future use (scratch) */
+
+	unsigned long r8;		/* scratch (return value register 0) */
+	unsigned long r9;		/* scratch (return value register 1) */
+	unsigned long r10;		/* scratch (return value register 2) */
+	unsigned long r11;		/* scratch (return value register 3) */
+
+	unsigned long cr_ipsr;		/* interrupted task's psr */
+	unsigned long cr_iip;		/* interrupted task's instruction pointer */
+	/*
+	 * interrupted task's function state; if bit 63 is cleared, it
+	 * contains syscall's ar.pfs.pfm:
+	 */
+	unsigned long cr_ifs;
+
+	unsigned long ar_unat;		/* interrupted task's NaT register (preserved) */
+	unsigned long ar_pfs;		/* prev function state  */
+	unsigned long ar_rsc;		/* RSE configuration */
+	/* The following two are valid only if cr_ipsr.cpl > 0 || ti->flags & _TIF_MCA_INIT */
+	unsigned long ar_rnat;		/* RSE NaT */
+	unsigned long ar_bspstore;	/* RSE bspstore */
+
+	unsigned long pr;		/* 64 predicate registers (1 bit each) */
+	unsigned long b0;		/* return pointer (bp) */
+	unsigned long loadrs;		/* size of dirty partition << 16 */
+
+	unsigned long r1;		/* the gp pointer */
+	unsigned long r12;		/* interrupted task's memory stack pointer */
+	unsigned long r13;		/* thread pointer */
+
+	unsigned long ar_fpsr;		/* floating point status (preserved) */
+	unsigned long r15;		/* scratch */
+
+	/* The remaining registers are NOT saved for system calls.  */
+
+	unsigned long r14;		/* scratch */
+	unsigned long r2;		/* scratch */
+	unsigned long r3;		/* scratch */
+
+	/* The following registers are saved by SAVE_REST: */
+	unsigned long r16;		/* scratch */
+	unsigned long r17;		/* scratch */
+	unsigned long r18;		/* scratch */
+	unsigned long r19;		/* scratch */
+	unsigned long r20;		/* scratch */
+	unsigned long r21;		/* scratch */
+	unsigned long r22;		/* scratch */
+	unsigned long r23;		/* scratch */
+	unsigned long r24;		/* scratch */
+	unsigned long r25;		/* scratch */
+	unsigned long r26;		/* scratch */
+	unsigned long r27;		/* scratch */
+	unsigned long r28;		/* scratch */
+	unsigned long r29;		/* scratch */
+	unsigned long r30;		/* scratch */
+	unsigned long r31;		/* scratch */
+
+	unsigned long ar_ccv;		/* compare/exchange value (scratch) */
+
+	/*
+	 * Floating point registers that the kernel considers scratch:
+	 */
+	struct ia64_fpreg f6;		/* scratch */
+	struct ia64_fpreg f7;		/* scratch */
+	struct ia64_fpreg f8;		/* scratch */
+	struct ia64_fpreg f9;		/* scratch */
+	struct ia64_fpreg f10;		/* scratch */
+	struct ia64_fpreg f11;		/* scratch */
+};
+
+/*
+ * This structure contains the addition registers that need to
+ * preserved across a context switch.  This generally consists of
+ * "preserved" registers.
+ */
+struct switch_stack {
+	unsigned long caller_unat;	/* user NaT collection register (preserved) */
+	unsigned long ar_fpsr;		/* floating-point status register */
+
+	struct ia64_fpreg f2;		/* preserved */
+	struct ia64_fpreg f3;		/* preserved */
+	struct ia64_fpreg f4;		/* preserved */
+	struct ia64_fpreg f5;		/* preserved */
+
+	struct ia64_fpreg f12;		/* scratch, but untouched by kernel */
+	struct ia64_fpreg f13;		/* scratch, but untouched by kernel */
+	struct ia64_fpreg f14;		/* scratch, but untouched by kernel */
+	struct ia64_fpreg f15;		/* scratch, but untouched by kernel */
+	struct ia64_fpreg f16;		/* preserved */
+	struct ia64_fpreg f17;		/* preserved */
+	struct ia64_fpreg f18;		/* preserved */
+	struct ia64_fpreg f19;		/* preserved */
+	struct ia64_fpreg f20;		/* preserved */
+	struct ia64_fpreg f21;		/* preserved */
+	struct ia64_fpreg f22;		/* preserved */
+	struct ia64_fpreg f23;		/* preserved */
+	struct ia64_fpreg f24;		/* preserved */
+	struct ia64_fpreg f25;		/* preserved */
+	struct ia64_fpreg f26;		/* preserved */
+	struct ia64_fpreg f27;		/* preserved */
+	struct ia64_fpreg f28;		/* preserved */
+	struct ia64_fpreg f29;		/* preserved */
+	struct ia64_fpreg f30;		/* preserved */
+	struct ia64_fpreg f31;		/* preserved */
+
+	unsigned long r4;		/* preserved */
+	unsigned long r5;		/* preserved */
+	unsigned long r6;		/* preserved */
+	unsigned long r7;		/* preserved */
+
+	unsigned long b0;		/* so we can force a direct return in copy_thread */
+	unsigned long b1;
+	unsigned long b2;
+	unsigned long b3;
+	unsigned long b4;
+	unsigned long b5;
+
+	unsigned long ar_pfs;		/* previous function state */
+	unsigned long ar_lc;		/* loop counter (preserved) */
+	unsigned long ar_unat;		/* NaT bits for r4-r7 */
+	unsigned long ar_rnat;		/* RSE NaT collection register */
+	unsigned long ar_bspstore;	/* RSE dirty base (preserved) */
+	unsigned long pr;		/* 64 predicate registers (1 bit each) */
+};
+
+#ifdef __KERNEL__
+
+#include <asm/current.h>
+#include <asm/page.h>
+
+/*
+ * We use the ia64_psr(regs)->ri to determine which of the three
+ * instructions in bundle (16 bytes) took the sample. Generate
+ * the canonical representation by adding to instruction pointer.
+ */
+# define instruction_pointer(regs) ((regs)->cr_iip + ia64_psr(regs)->ri)
+
+static inline unsigned long user_stack_pointer(struct pt_regs *regs)
+{
+	/* FIXME: should this be bspstore + nr_dirty regs? */
+	return regs->ar_bspstore;
+}
+
+#define regs_return_value(regs) ((regs)->r8)
+
+/* Conserve space in histogram by encoding slot bits in address
+ * bits 2 and 3 rather than bits 0 and 1.
+ */
+#define profile_pc(regs)						\
+({									\
+	unsigned long __ip = instruction_pointer(regs);			\
+	(__ip & ~3UL) + ((__ip & 3UL) << 2);				\
+})
+
+  /* given a pointer to a task_struct, return the user's pt_regs */
+# define task_pt_regs(t)		(((struct pt_regs *) ((char *) (t) + IA64_STK_OFFSET)) - 1)
+# define ia64_psr(regs)			((struct ia64_psr *) &(regs)->cr_ipsr)
+# define user_mode(regs)		(((struct ia64_psr *) &(regs)->cr_ipsr)->cpl != 0)
+# define user_stack(task,regs)	((long) regs - (long) task == IA64_STK_OFFSET - sizeof(*regs))
+# define fsys_mode(task,regs)					\
+  ({								\
+	  struct task_struct *_task = (task);			\
+	  struct pt_regs *_regs = (regs);			\
+	  !user_mode(_regs) && user_stack(_task, _regs);	\
+  })
+
+  /*
+   * System call handlers that, upon successful completion, need to return a negative value
+   * should call force_successful_syscall_return() right before returning.  On architectures
+   * where the syscall convention provides for a separate error flag (e.g., alpha, ia64,
+   * ppc{,64}, sparc{,64}, possibly others), this macro can be used to ensure that the error
+   * flag will not get set.  On architectures which do not support a separate error flag,
+   * the macro is a no-op and the spurious error condition needs to be filtered out by some
+   * other means (e.g., in user-level, by passing an extra argument to the syscall handler,
+   * or something along those lines).
+   *
+   * On ia64, we can clear the user's pt_regs->r8 to force a successful syscall.
+   */
+# define force_successful_syscall_return()	(task_pt_regs(current)->r8 = 0)
+
+  struct task_struct;			/* forward decl */
+  struct unw_frame_info;		/* forward decl */
+
+  extern void show_regs (struct pt_regs *);
+  extern void ia64_do_show_stack (struct unw_frame_info *, void *);
+  extern unsigned long ia64_get_user_rbs_end (struct task_struct *, struct pt_regs *,
+					      unsigned long *);
+  extern long ia64_peek (struct task_struct *, struct switch_stack *, unsigned long,
+			 unsigned long, long *);
+  extern long ia64_poke (struct task_struct *, struct switch_stack *, unsigned long,
+			 unsigned long, long);
+  extern void ia64_flush_fph (struct task_struct *);
+  extern void ia64_sync_fph (struct task_struct *);
+  extern void ia64_sync_krbs(void);
+  extern long ia64_sync_user_rbs (struct task_struct *, struct switch_stack *,
+				  unsigned long, unsigned long);
+
+  /* get nat bits for scratch registers such that bit N==1 iff scratch register rN is a NaT */
+  extern unsigned long ia64_get_scratch_nat_bits (struct pt_regs *pt, unsigned long scratch_unat);
+  /* put nat bits for scratch registers such that scratch register rN is a NaT iff bit N==1 */
+  extern unsigned long ia64_put_scratch_nat_bits (struct pt_regs *pt, unsigned long nat);
+
+  extern void ia64_increment_ip (struct pt_regs *pt);
+  extern void ia64_decrement_ip (struct pt_regs *pt);
+
+  extern void ia64_ptrace_stop(void);
+  #define arch_ptrace_stop(code, info) \
+	ia64_ptrace_stop()
+  #define arch_ptrace_stop_needed(code, info) \
+	(!test_thread_flag(TIF_RESTORE_RSE))
+
+  extern void ptrace_attach_sync_user_rbs (struct task_struct *);
+  #define arch_ptrace_attach(child) \
+	ptrace_attach_sync_user_rbs(child)
+
+  #define arch_has_single_step()  (1)
+  #define arch_has_block_step()   (1)
+
+#endif /* !__KERNEL__ */
+
+/* pt_all_user_regs is used for PTRACE_GETREGS PTRACE_SETREGS */
+struct pt_all_user_regs {
+	unsigned long nat;
+	unsigned long cr_iip;
+	unsigned long cfm;
+	unsigned long cr_ipsr;
+	unsigned long pr;
+
+	unsigned long gr[32];
+	unsigned long br[8];
+	unsigned long ar[128];
+	struct ia64_fpreg fr[128];
+};
+
+#endif /* !__ASSEMBLY__ */
+
+/* indices to application-registers array in pt_all_user_regs */
+#define PT_AUR_RSC	16
+#define PT_AUR_BSP	17
+#define PT_AUR_BSPSTORE	18
+#define PT_AUR_RNAT	19
+#define PT_AUR_CCV	32
+#define PT_AUR_UNAT	36
+#define PT_AUR_FPSR	40
+#define PT_AUR_PFS	64
+#define PT_AUR_LC	65
+#define PT_AUR_EC	66
+
+/*
+ * The numbers chosen here are somewhat arbitrary but absolutely MUST
+ * not overlap with any of the number assigned in <linux/ptrace.h>.
+ */
+#define PTRACE_SINGLEBLOCK	12	/* resume execution until next branch */
+#define PTRACE_OLD_GETSIGINFO	13	/* (replaced by PTRACE_GETSIGINFO in <linux/ptrace.h>)  */
+#define PTRACE_OLD_SETSIGINFO	14	/* (replaced by PTRACE_SETSIGINFO in <linux/ptrace.h>)  */
+#define PTRACE_GETREGS		18	/* get all registers (pt_all_user_regs) in one shot */
+#define PTRACE_SETREGS		19	/* set all registers (pt_all_user_regs) in one shot */
+
+#define PTRACE_OLDSETOPTIONS	21
+
+#endif /* _ASM_IA64_PTRACE_H */
diff --git a/arch/ia64/include/asm/ptrace_offsets.h b/arch/ia64/include/asm/ptrace_offsets.h
new file mode 100644
index 00000000..b712773c
--- /dev/null
+++ b/arch/ia64/include/asm/ptrace_offsets.h
@@ -0,0 +1,268 @@
+#ifndef _ASM_IA64_PTRACE_OFFSETS_H
+#define _ASM_IA64_PTRACE_OFFSETS_H
+
+/*
+ * Copyright (C) 1999, 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+/*
+ * The "uarea" that can be accessed via PEEKUSER and POKEUSER is a
+ * virtual structure that would have the following definition:
+ *
+ *	struct uarea {
+ *		struct ia64_fpreg fph[96];		// f32-f127
+ *		unsigned long nat_bits;
+ *		unsigned long empty1;
+ *		struct ia64_fpreg f2;			// f2-f5
+ *			:
+ *		struct ia64_fpreg f5;
+ *		struct ia64_fpreg f10;			// f10-f31
+ *			:
+ *		struct ia64_fpreg f31;
+ *		unsigned long r4;			// r4-r7
+ *			:
+ *		unsigned long r7;
+ *		unsigned long b1;			// b1-b5
+ *			:
+ *		unsigned long b5;
+ *		unsigned long ar_ec;
+ *		unsigned long ar_lc;
+ *		unsigned long empty2[5];
+ *		unsigned long cr_ipsr;
+ *		unsigned long cr_iip;
+ *		unsigned long cfm;
+ *		unsigned long ar_unat;
+ *		unsigned long ar_pfs;
+ *		unsigned long ar_rsc;
+ *		unsigned long ar_rnat;
+ *		unsigned long ar_bspstore;
+ *		unsigned long pr;
+ *		unsigned long b6;
+ *		unsigned long ar_bsp;
+ *		unsigned long r1;
+ *		unsigned long r2;
+ *		unsigned long r3;
+ *		unsigned long r12;
+ *		unsigned long r13;
+ *		unsigned long r14;
+ *		unsigned long r15;
+ *		unsigned long r8;
+ *		unsigned long r9;
+ *		unsigned long r10;
+ *		unsigned long r11;
+ *		unsigned long r16;
+ *			:
+ *		unsigned long r31;
+ *		unsigned long ar_ccv;
+ *		unsigned long ar_fpsr;
+ *		unsigned long b0;
+ *		unsigned long b7;
+ *		unsigned long f6;
+ *		unsigned long f7;
+ *		unsigned long f8;
+ *		unsigned long f9;
+ *		unsigned long ar_csd;
+ *		unsigned long ar_ssd;
+ *		unsigned long rsvd1[710];
+ *		unsigned long dbr[8];
+ *		unsigned long rsvd2[504];
+ *		unsigned long ibr[8];
+ *		unsigned long rsvd3[504];
+ *		unsigned long pmd[4];
+ *	}
+ */
+
+/* fph: */
+#define PT_F32			0x0000
+#define PT_F33			0x0010
+#define PT_F34			0x0020
+#define PT_F35			0x0030
+#define PT_F36			0x0040
+#define PT_F37			0x0050
+#define PT_F38			0x0060
+#define PT_F39			0x0070
+#define PT_F40			0x0080
+#define PT_F41			0x0090
+#define PT_F42			0x00a0
+#define PT_F43			0x00b0
+#define PT_F44			0x00c0
+#define PT_F45			0x00d0
+#define PT_F46			0x00e0
+#define PT_F47			0x00f0
+#define PT_F48			0x0100
+#define PT_F49			0x0110
+#define PT_F50			0x0120
+#define PT_F51			0x0130
+#define PT_F52			0x0140
+#define PT_F53			0x0150
+#define PT_F54			0x0160
+#define PT_F55			0x0170
+#define PT_F56			0x0180
+#define PT_F57			0x0190
+#define PT_F58			0x01a0
+#define PT_F59			0x01b0
+#define PT_F60			0x01c0
+#define PT_F61			0x01d0
+#define PT_F62			0x01e0
+#define PT_F63			0x01f0
+#define PT_F64			0x0200
+#define PT_F65			0x0210
+#define PT_F66			0x0220
+#define PT_F67			0x0230
+#define PT_F68			0x0240
+#define PT_F69			0x0250
+#define PT_F70			0x0260
+#define PT_F71			0x0270
+#define PT_F72			0x0280
+#define PT_F73			0x0290
+#define PT_F74			0x02a0
+#define PT_F75			0x02b0
+#define PT_F76			0x02c0
+#define PT_F77			0x02d0
+#define PT_F78			0x02e0
+#define PT_F79			0x02f0
+#define PT_F80			0x0300
+#define PT_F81			0x0310
+#define PT_F82			0x0320
+#define PT_F83			0x0330
+#define PT_F84			0x0340
+#define PT_F85			0x0350
+#define PT_F86			0x0360
+#define PT_F87			0x0370
+#define PT_F88			0x0380
+#define PT_F89			0x0390
+#define PT_F90			0x03a0
+#define PT_F91			0x03b0
+#define PT_F92			0x03c0
+#define PT_F93			0x03d0
+#define PT_F94			0x03e0
+#define PT_F95			0x03f0
+#define PT_F96			0x0400
+#define PT_F97			0x0410
+#define PT_F98			0x0420
+#define PT_F99			0x0430
+#define PT_F100			0x0440
+#define PT_F101			0x0450
+#define PT_F102			0x0460
+#define PT_F103			0x0470
+#define PT_F104			0x0480
+#define PT_F105			0x0490
+#define PT_F106			0x04a0
+#define PT_F107			0x04b0
+#define PT_F108			0x04c0
+#define PT_F109			0x04d0
+#define PT_F110			0x04e0
+#define PT_F111			0x04f0
+#define PT_F112			0x0500
+#define PT_F113			0x0510
+#define PT_F114			0x0520
+#define PT_F115			0x0530
+#define PT_F116			0x0540
+#define PT_F117			0x0550
+#define PT_F118			0x0560
+#define PT_F119			0x0570
+#define PT_F120			0x0580
+#define PT_F121			0x0590
+#define PT_F122			0x05a0
+#define PT_F123			0x05b0
+#define PT_F124			0x05c0
+#define PT_F125			0x05d0
+#define PT_F126			0x05e0
+#define PT_F127			0x05f0
+
+#define PT_NAT_BITS		0x0600
+
+#define PT_F2			0x0610
+#define PT_F3			0x0620
+#define PT_F4			0x0630
+#define PT_F5			0x0640
+#define PT_F10			0x0650
+#define PT_F11			0x0660
+#define PT_F12			0x0670
+#define PT_F13			0x0680
+#define PT_F14			0x0690
+#define PT_F15			0x06a0
+#define PT_F16			0x06b0
+#define PT_F17			0x06c0
+#define PT_F18			0x06d0
+#define PT_F19			0x06e0
+#define PT_F20			0x06f0
+#define PT_F21			0x0700
+#define PT_F22			0x0710
+#define PT_F23			0x0720
+#define PT_F24			0x0730
+#define PT_F25			0x0740
+#define PT_F26			0x0750
+#define PT_F27			0x0760
+#define PT_F28			0x0770
+#define PT_F29			0x0780
+#define PT_F30			0x0790
+#define PT_F31			0x07a0
+#define PT_R4			0x07b0
+#define PT_R5			0x07b8
+#define PT_R6			0x07c0
+#define PT_R7			0x07c8
+
+#define PT_B1			0x07d8
+#define PT_B2			0x07e0
+#define PT_B3			0x07e8
+#define PT_B4			0x07f0
+#define PT_B5			0x07f8
+
+#define PT_AR_EC		0x0800
+#define PT_AR_LC		0x0808
+
+#define PT_CR_IPSR		0x0830
+#define PT_CR_IIP		0x0838
+#define PT_CFM			0x0840
+#define PT_AR_UNAT		0x0848
+#define PT_AR_PFS		0x0850
+#define PT_AR_RSC		0x0858
+#define PT_AR_RNAT		0x0860
+#define PT_AR_BSPSTORE		0x0868
+#define PT_PR			0x0870
+#define PT_B6			0x0878
+#define PT_AR_BSP		0x0880	/* note: this points to the *end* of the backing store! */
+#define PT_R1			0x0888
+#define PT_R2			0x0890
+#define PT_R3			0x0898
+#define PT_R12			0x08a0
+#define PT_R13			0x08a8
+#define PT_R14			0x08b0
+#define PT_R15			0x08b8
+#define PT_R8 			0x08c0
+#define PT_R9			0x08c8
+#define PT_R10			0x08d0
+#define PT_R11			0x08d8
+#define PT_R16			0x08e0
+#define PT_R17			0x08e8
+#define PT_R18			0x08f0
+#define PT_R19			0x08f8
+#define PT_R20			0x0900
+#define PT_R21			0x0908
+#define PT_R22			0x0910
+#define PT_R23			0x0918
+#define PT_R24			0x0920
+#define PT_R25			0x0928
+#define PT_R26			0x0930
+#define PT_R27			0x0938
+#define PT_R28			0x0940
+#define PT_R29			0x0948
+#define PT_R30			0x0950
+#define PT_R31			0x0958
+#define PT_AR_CCV		0x0960
+#define PT_AR_FPSR		0x0968
+#define PT_B0			0x0970
+#define PT_B7			0x0978
+#define PT_F6			0x0980
+#define PT_F7			0x0990
+#define PT_F8			0x09a0
+#define PT_F9			0x09b0
+#define PT_AR_CSD		0x09c0
+#define PT_AR_SSD		0x09c8
+
+#define PT_DBR			0x2000	/* data breakpoint registers */
+#define PT_IBR			0x3000	/* instruction breakpoint registers */
+#define PT_PMD			0x4000	/* performance monitoring counters */
+
+#endif /* _ASM_IA64_PTRACE_OFFSETS_H */
diff --git a/arch/ia64/include/asm/pvclock-abi.h b/arch/ia64/include/asm/pvclock-abi.h
new file mode 100644
index 00000000..44ef9ef8
--- /dev/null
+++ b/arch/ia64/include/asm/pvclock-abi.h
@@ -0,0 +1,48 @@
+/*
+ * same structure to x86's
+ * Hopefully asm-x86/pvclock-abi.h would be moved to somewhere more generic.
+ * For now, define same duplicated definitions.
+ */
+
+#ifndef _ASM_IA64__PVCLOCK_ABI_H
+#define _ASM_IA64__PVCLOCK_ABI_H
+#ifndef __ASSEMBLY__
+
+/*
+ * These structs MUST NOT be changed.
+ * They are the ABI between hypervisor and guest OS.
+ * Both Xen and KVM are using this.
+ *
+ * pvclock_vcpu_time_info holds the system time and the tsc timestamp
+ * of the last update. So the guest can use the tsc delta to get a
+ * more precise system time.  There is one per virtual cpu.
+ *
+ * pvclock_wall_clock references the point in time when the system
+ * time was zero (usually boot time), thus the guest calculates the
+ * current wall clock by adding the system time.
+ *
+ * Protocol for the "version" fields is: hypervisor raises it (making
+ * it uneven) before it starts updating the fields and raises it again
+ * (making it even) when it is done.  Thus the guest can make sure the
+ * time values it got are consistent by checking the version before
+ * and after reading them.
+ */
+
+struct pvclock_vcpu_time_info {
+	u32   version;
+	u32   pad0;
+	u64   tsc_timestamp;
+	u64   system_time;
+	u32   tsc_to_system_mul;
+	s8    tsc_shift;
+	u8    pad[3];
+} __attribute__((__packed__)); /* 32 bytes */
+
+struct pvclock_wall_clock {
+	u32   version;
+	u32   sec;
+	u32   nsec;
+} __attribute__((__packed__));
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_IA64__PVCLOCK_ABI_H */
diff --git a/arch/ia64/include/asm/resource.h b/arch/ia64/include/asm/resource.h
new file mode 100644
index 00000000..ba2272a8
--- /dev/null
+++ b/arch/ia64/include/asm/resource.h
@@ -0,0 +1,7 @@
+#ifndef _ASM_IA64_RESOURCE_H
+#define _ASM_IA64_RESOURCE_H
+
+#include <asm/ustack.h>
+#include <asm-generic/resource.h>
+
+#endif /* _ASM_IA64_RESOURCE_H */
diff --git a/arch/ia64/include/asm/rse.h b/arch/ia64/include/asm/rse.h
new file mode 100644
index 00000000..02830a3b
--- /dev/null
+++ b/arch/ia64/include/asm/rse.h
@@ -0,0 +1,66 @@
+#ifndef _ASM_IA64_RSE_H
+#define _ASM_IA64_RSE_H
+
+/*
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Register stack engine related helper functions.  This file may be
+ * used in applications, so be careful about the name-space and give
+ * some consideration to non-GNU C compilers (though __inline__ is
+ * fine).
+ */
+
+static __inline__ unsigned long
+ia64_rse_slot_num (unsigned long *addr)
+{
+	return (((unsigned long) addr) >> 3) & 0x3f;
+}
+
+/*
+ * Return TRUE if ADDR is the address of an RNAT slot.
+ */
+static __inline__ unsigned long
+ia64_rse_is_rnat_slot (unsigned long *addr)
+{
+	return ia64_rse_slot_num(addr) == 0x3f;
+}
+
+/*
+ * Returns the address of the RNAT slot that covers the slot at
+ * address SLOT_ADDR.
+ */
+static __inline__ unsigned long *
+ia64_rse_rnat_addr (unsigned long *slot_addr)
+{
+	return (unsigned long *) ((unsigned long) slot_addr | (0x3f << 3));
+}
+
+/*
+ * Calculate the number of registers in the dirty partition starting at BSPSTORE and
+ * ending at BSP.  This isn't simply (BSP-BSPSTORE)/8 because every 64th slot stores
+ * ar.rnat.
+ */
+static __inline__ unsigned long
+ia64_rse_num_regs (unsigned long *bspstore, unsigned long *bsp)
+{
+	unsigned long slots = (bsp - bspstore);
+
+	return slots - (ia64_rse_slot_num(bspstore) + slots)/0x40;
+}
+
+/*
+ * The inverse of the above: given bspstore and the number of
+ * registers, calculate ar.bsp.
+ */
+static __inline__ unsigned long *
+ia64_rse_skip_regs (unsigned long *addr, long num_regs)
+{
+	long delta = ia64_rse_slot_num(addr) + num_regs;
+
+	if (num_regs < 0)
+		delta -= 0x3e;
+	return addr + num_regs + delta/0x3f;
+}
+
+#endif /* _ASM_IA64_RSE_H */
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
new file mode 100644
index 00000000..3027e751
--- /dev/null
+++ b/arch/ia64/include/asm/rwsem.h
@@ -0,0 +1,145 @@
+/*
+ * R/W semaphores for ia64
+ *
+ * Copyright (C) 2003 Ken Chen <kenneth.w.chen@intel.com>
+ * Copyright (C) 2003 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 2005 Christoph Lameter <clameter@sgi.com>
+ *
+ * Based on asm-i386/rwsem.h and other architecture implementation.
+ *
+ * The MSW of the count is the negated number of active writers and
+ * waiting lockers, and the LSW is the total number of active locks.
+ *
+ * The lock count is initialized to 0 (no active and no waiting lockers).
+ *
+ * When a writer subtracts WRITE_BIAS, it'll get 0xffffffff00000001 for
+ * the case of an uncontended lock. Readers increment by 1 and see a positive
+ * value when uncontended, negative if there are writers (and maybe) readers
+ * waiting (in which case it goes to sleep).
+ */
+
+#ifndef _ASM_IA64_RWSEM_H
+#define _ASM_IA64_RWSEM_H
+
+#ifndef _LINUX_RWSEM_H
+#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
+#endif
+
+#include <asm/intrinsics.h>
+
+#define RWSEM_UNLOCKED_VALUE		__IA64_UL_CONST(0x0000000000000000)
+#define RWSEM_ACTIVE_BIAS		(1L)
+#define RWSEM_ACTIVE_MASK		(0xffffffffL)
+#define RWSEM_WAITING_BIAS		(-0x100000000L)
+#define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
+#define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
+
+/*
+ * lock for reading
+ */
+static inline void
+__down_read (struct rw_semaphore *sem)
+{
+	long result = ia64_fetchadd8_acq((unsigned long *)&sem->count, 1);
+
+	if (result < 0)
+		rwsem_down_read_failed(sem);
+}
+
+/*
+ * lock for writing
+ */
+static inline void
+__down_write (struct rw_semaphore *sem)
+{
+	long old, new;
+
+	do {
+		old = sem->count;
+		new = old + RWSEM_ACTIVE_WRITE_BIAS;
+	} while (cmpxchg_acq(&sem->count, old, new) != old);
+
+	if (old != 0)
+		rwsem_down_write_failed(sem);
+}
+
+/*
+ * unlock after reading
+ */
+static inline void
+__up_read (struct rw_semaphore *sem)
+{
+	long result = ia64_fetchadd8_rel((unsigned long *)&sem->count, -1);
+
+	if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0)
+		rwsem_wake(sem);
+}
+
+/*
+ * unlock after writing
+ */
+static inline void
+__up_write (struct rw_semaphore *sem)
+{
+	long old, new;
+
+	do {
+		old = sem->count;
+		new = old - RWSEM_ACTIVE_WRITE_BIAS;
+	} while (cmpxchg_rel(&sem->count, old, new) != old);
+
+	if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0)
+		rwsem_wake(sem);
+}
+
+/*
+ * trylock for reading -- returns 1 if successful, 0 if contention
+ */
+static inline int
+__down_read_trylock (struct rw_semaphore *sem)
+{
+	long tmp;
+	while ((tmp = sem->count) >= 0) {
+		if (tmp == cmpxchg_acq(&sem->count, tmp, tmp+1)) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * trylock for writing -- returns 1 if successful, 0 if contention
+ */
+static inline int
+__down_write_trylock (struct rw_semaphore *sem)
+{
+	long tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE,
+			      RWSEM_ACTIVE_WRITE_BIAS);
+	return tmp == RWSEM_UNLOCKED_VALUE;
+}
+
+/*
+ * downgrade write lock to read lock
+ */
+static inline void
+__downgrade_write (struct rw_semaphore *sem)
+{
+	long old, new;
+
+	do {
+		old = sem->count;
+		new = old - RWSEM_WAITING_BIAS;
+	} while (cmpxchg_rel(&sem->count, old, new) != old);
+
+	if (old < 0)
+		rwsem_downgrade_wake(sem);
+}
+
+/*
+ * Implement atomic add functionality.  These used to be "inline" functions, but GCC v3.1
+ * doesn't quite optimize this stuff right and ends up with bad calls to fetchandadd.
+ */
+#define rwsem_atomic_add(delta, sem)	atomic64_add(delta, (atomic64_t *)(&(sem)->count))
+#define rwsem_atomic_update(delta, sem)	atomic64_add_return(delta, (atomic64_t *)(&(sem)->count))
+
+#endif /* _ASM_IA64_RWSEM_H */
diff --git a/arch/ia64/include/asm/sal.h b/arch/ia64/include/asm/sal.h
new file mode 100644
index 00000000..d19ddba4
--- /dev/null
+++ b/arch/ia64/include/asm/sal.h
@@ -0,0 +1,918 @@
+#ifndef _ASM_IA64_SAL_H
+#define _ASM_IA64_SAL_H
+
+/*
+ * System Abstraction Layer definitions.
+ *
+ * This is based on version 2.5 of the manual "IA-64 System
+ * Abstraction Layer".
+ *
+ * Copyright (C) 2001 Intel
+ * Copyright (C) 2002 Jenna Hall <jenna.s.hall@intel.com>
+ * Copyright (C) 2001 Fred Lewis <frederick.v.lewis@intel.com>
+ * Copyright (C) 1998, 1999, 2001, 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Srinivasa Prasad Thirumalachar <sprasad@sprasad.engr.sgi.com>
+ *
+ * 02/01/04 J. Hall Updated Error Record Structures to conform to July 2001
+ *		    revision of the SAL spec.
+ * 01/01/03 fvlewis Updated Error Record Structures to conform with Nov. 2000
+ *                  revision of the SAL spec.
+ * 99/09/29 davidm	Updated for SAL 2.6.
+ * 00/03/29 cfleck      Updated SAL Error Logging info for processor (SAL 2.6)
+ *                      (plus examples of platform error info structures from smariset @ Intel)
+ */
+
+#define IA64_SAL_PLATFORM_FEATURE_BUS_LOCK_BIT		0
+#define IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT_BIT	1
+#define IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT_BIT	2
+#define IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT_BIT	 	3
+
+#define IA64_SAL_PLATFORM_FEATURE_BUS_LOCK	  (1<<IA64_SAL_PLATFORM_FEATURE_BUS_LOCK_BIT)
+#define IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT (1<<IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT_BIT)
+#define IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT (1<<IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT_BIT)
+#define IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT	  (1<<IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT_BIT)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/bcd.h>
+#include <linux/spinlock.h>
+#include <linux/efi.h>
+
+#include <asm/pal.h>
+#include <asm/system.h>
+#include <asm/fpu.h>
+
+extern spinlock_t sal_lock;
+
+/* SAL spec _requires_ eight args for each call. */
+#define __IA64_FW_CALL(entry,result,a0,a1,a2,a3,a4,a5,a6,a7)	\
+	result = (*entry)(a0,a1,a2,a3,a4,a5,a6,a7)
+
+# define IA64_FW_CALL(entry,result,args...) do {		\
+	unsigned long __ia64_sc_flags;				\
+	struct ia64_fpreg __ia64_sc_fr[6];			\
+	ia64_save_scratch_fpregs(__ia64_sc_fr);			\
+	spin_lock_irqsave(&sal_lock, __ia64_sc_flags);		\
+	__IA64_FW_CALL(entry, result, args);			\
+	spin_unlock_irqrestore(&sal_lock, __ia64_sc_flags);	\
+	ia64_load_scratch_fpregs(__ia64_sc_fr);			\
+} while (0)
+
+# define SAL_CALL(result,args...)			\
+	IA64_FW_CALL(ia64_sal, result, args);
+
+# define SAL_CALL_NOLOCK(result,args...) do {		\
+	unsigned long __ia64_scn_flags;			\
+	struct ia64_fpreg __ia64_scn_fr[6];		\
+	ia64_save_scratch_fpregs(__ia64_scn_fr);	\
+	local_irq_save(__ia64_scn_flags);		\
+	__IA64_FW_CALL(ia64_sal, result, args);		\
+	local_irq_restore(__ia64_scn_flags);		\
+	ia64_load_scratch_fpregs(__ia64_scn_fr);	\
+} while (0)
+
+# define SAL_CALL_REENTRANT(result,args...) do {	\
+	struct ia64_fpreg __ia64_scs_fr[6];		\
+	ia64_save_scratch_fpregs(__ia64_scs_fr);	\
+	preempt_disable();				\
+	__IA64_FW_CALL(ia64_sal, result, args);		\
+	preempt_enable();				\
+	ia64_load_scratch_fpregs(__ia64_scs_fr);	\
+} while (0)
+
+#define SAL_SET_VECTORS			0x01000000
+#define SAL_GET_STATE_INFO		0x01000001
+#define SAL_GET_STATE_INFO_SIZE		0x01000002
+#define SAL_CLEAR_STATE_INFO		0x01000003
+#define SAL_MC_RENDEZ			0x01000004
+#define SAL_MC_SET_PARAMS		0x01000005
+#define SAL_REGISTER_PHYSICAL_ADDR	0x01000006
+
+#define SAL_CACHE_FLUSH			0x01000008
+#define SAL_CACHE_INIT			0x01000009
+#define SAL_PCI_CONFIG_READ		0x01000010
+#define SAL_PCI_CONFIG_WRITE		0x01000011
+#define SAL_FREQ_BASE			0x01000012
+#define SAL_PHYSICAL_ID_INFO		0x01000013
+
+#define SAL_UPDATE_PAL			0x01000020
+
+struct ia64_sal_retval {
+	/*
+	 * A zero status value indicates call completed without error.
+	 * A negative status value indicates reason of call failure.
+	 * A positive status value indicates success but an
+	 * informational value should be printed (e.g., "reboot for
+	 * change to take effect").
+	 */
+	long status;
+	unsigned long v0;
+	unsigned long v1;
+	unsigned long v2;
+};
+
+typedef struct ia64_sal_retval (*ia64_sal_handler) (u64, ...);
+
+enum {
+	SAL_FREQ_BASE_PLATFORM = 0,
+	SAL_FREQ_BASE_INTERVAL_TIMER = 1,
+	SAL_FREQ_BASE_REALTIME_CLOCK = 2
+};
+
+/*
+ * The SAL system table is followed by a variable number of variable
+ * length descriptors.  The structure of these descriptors follows
+ * below.
+ * The defininition follows SAL specs from July 2000
+ */
+struct ia64_sal_systab {
+	u8 signature[4];	/* should be "SST_" */
+	u32 size;		/* size of this table in bytes */
+	u8 sal_rev_minor;
+	u8 sal_rev_major;
+	u16 entry_count;	/* # of entries in variable portion */
+	u8 checksum;
+	u8 reserved1[7];
+	u8 sal_a_rev_minor;
+	u8 sal_a_rev_major;
+	u8 sal_b_rev_minor;
+	u8 sal_b_rev_major;
+	/* oem_id & product_id: terminating NUL is missing if string is exactly 32 bytes long. */
+	u8 oem_id[32];
+	u8 product_id[32];	/* ASCII product id  */
+	u8 reserved2[8];
+};
+
+enum sal_systab_entry_type {
+	SAL_DESC_ENTRY_POINT = 0,
+	SAL_DESC_MEMORY = 1,
+	SAL_DESC_PLATFORM_FEATURE = 2,
+	SAL_DESC_TR = 3,
+	SAL_DESC_PTC = 4,
+	SAL_DESC_AP_WAKEUP = 5
+};
+
+/*
+ * Entry type:	Size:
+ *	0	48
+ *	1	32
+ *	2	16
+ *	3	32
+ *	4	16
+ *	5	16
+ */
+#define SAL_DESC_SIZE(type)	"\060\040\020\040\020\020"[(unsigned) type]
+
+typedef struct ia64_sal_desc_entry_point {
+	u8 type;
+	u8 reserved1[7];
+	u64 pal_proc;
+	u64 sal_proc;
+	u64 gp;
+	u8 reserved2[16];
+}ia64_sal_desc_entry_point_t;
+
+typedef struct ia64_sal_desc_memory {
+	u8 type;
+	u8 used_by_sal;	/* needs to be mapped for SAL? */
+	u8 mem_attr;		/* current memory attribute setting */
+	u8 access_rights;	/* access rights set up by SAL */
+	u8 mem_attr_mask;	/* mask of supported memory attributes */
+	u8 reserved1;
+	u8 mem_type;		/* memory type */
+	u8 mem_usage;		/* memory usage */
+	u64 addr;		/* physical address of memory */
+	u32 length;	/* length (multiple of 4KB pages) */
+	u32 reserved2;
+	u8 oem_reserved[8];
+} ia64_sal_desc_memory_t;
+
+typedef struct ia64_sal_desc_platform_feature {
+	u8 type;
+	u8 feature_mask;
+	u8 reserved1[14];
+} ia64_sal_desc_platform_feature_t;
+
+typedef struct ia64_sal_desc_tr {
+	u8 type;
+	u8 tr_type;		/* 0 == instruction, 1 == data */
+	u8 regnum;		/* translation register number */
+	u8 reserved1[5];
+	u64 addr;		/* virtual address of area covered */
+	u64 page_size;		/* encoded page size */
+	u8 reserved2[8];
+} ia64_sal_desc_tr_t;
+
+typedef struct ia64_sal_desc_ptc {
+	u8 type;
+	u8 reserved1[3];
+	u32 num_domains;	/* # of coherence domains */
+	u64 domain_info;	/* physical address of domain info table */
+} ia64_sal_desc_ptc_t;
+
+typedef struct ia64_sal_ptc_domain_info {
+	u64 proc_count;		/* number of processors in domain */
+	u64 proc_list;		/* physical address of LID array */
+} ia64_sal_ptc_domain_info_t;
+
+typedef struct ia64_sal_ptc_domain_proc_entry {
+	u64 id  : 8;		/* id of processor */
+	u64 eid : 8;		/* eid of processor */
+} ia64_sal_ptc_domain_proc_entry_t;
+
+
+#define IA64_SAL_AP_EXTERNAL_INT 0
+
+typedef struct ia64_sal_desc_ap_wakeup {
+	u8 type;
+	u8 mechanism;		/* 0 == external interrupt */
+	u8 reserved1[6];
+	u64 vector;		/* interrupt vector in range 0x10-0xff */
+} ia64_sal_desc_ap_wakeup_t ;
+
+extern ia64_sal_handler ia64_sal;
+extern struct ia64_sal_desc_ptc *ia64_ptc_domain_info;
+
+extern unsigned short sal_revision;	/* supported SAL spec revision */
+extern unsigned short sal_version;	/* SAL version; OEM dependent */
+#define SAL_VERSION_CODE(major, minor) ((bin2bcd(major) << 8) | bin2bcd(minor))
+
+extern const char *ia64_sal_strerror (long status);
+extern void ia64_sal_init (struct ia64_sal_systab *sal_systab);
+
+/* SAL information type encodings */
+enum {
+	SAL_INFO_TYPE_MCA  = 0,		/* Machine check abort information */
+        SAL_INFO_TYPE_INIT = 1,		/* Init information */
+        SAL_INFO_TYPE_CMC  = 2,		/* Corrected machine check information */
+        SAL_INFO_TYPE_CPE  = 3		/* Corrected platform error information */
+};
+
+/* Encodings for machine check parameter types */
+enum {
+	SAL_MC_PARAM_RENDEZ_INT    = 1,	/* Rendezvous interrupt */
+	SAL_MC_PARAM_RENDEZ_WAKEUP = 2,	/* Wakeup */
+	SAL_MC_PARAM_CPE_INT	   = 3	/* Corrected Platform Error Int */
+};
+
+/* Encodings for rendezvous mechanisms */
+enum {
+	SAL_MC_PARAM_MECHANISM_INT = 1,	/* Use interrupt */
+	SAL_MC_PARAM_MECHANISM_MEM = 2	/* Use memory synchronization variable*/
+};
+
+/* Encodings for vectors which can be registered by the OS with SAL */
+enum {
+	SAL_VECTOR_OS_MCA	  = 0,
+	SAL_VECTOR_OS_INIT	  = 1,
+	SAL_VECTOR_OS_BOOT_RENDEZ = 2
+};
+
+/* Encodings for mca_opt parameter sent to SAL_MC_SET_PARAMS */
+#define	SAL_MC_PARAM_RZ_ALWAYS		0x1
+#define	SAL_MC_PARAM_BINIT_ESCALATE	0x10
+
+/*
+ * Definition of the SAL Error Log from the SAL spec
+ */
+
+/* SAL Error Record Section GUID Definitions */
+#define SAL_PROC_DEV_ERR_SECT_GUID  \
+    EFI_GUID(0xe429faf1, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define SAL_PLAT_MEM_DEV_ERR_SECT_GUID  \
+    EFI_GUID(0xe429faf2, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define SAL_PLAT_SEL_DEV_ERR_SECT_GUID  \
+    EFI_GUID(0xe429faf3, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define SAL_PLAT_PCI_BUS_ERR_SECT_GUID  \
+    EFI_GUID(0xe429faf4, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID  \
+    EFI_GUID(0xe429faf5, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define SAL_PLAT_PCI_COMP_ERR_SECT_GUID  \
+    EFI_GUID(0xe429faf6, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define SAL_PLAT_SPECIFIC_ERR_SECT_GUID  \
+    EFI_GUID(0xe429faf7, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define SAL_PLAT_HOST_CTLR_ERR_SECT_GUID  \
+    EFI_GUID(0xe429faf8, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define SAL_PLAT_BUS_ERR_SECT_GUID  \
+    EFI_GUID(0xe429faf9, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID \
+    EFI_GUID(0x6cb0a200, 0x893a, 0x11da, 0x96, 0xd2, 0x0, 0x10, 0x83, 0xff, \
+		0xca, 0x4d)
+
+#define MAX_CACHE_ERRORS	6
+#define MAX_TLB_ERRORS		6
+#define MAX_BUS_ERRORS		1
+
+/* Definition of version  according to SAL spec for logging purposes */
+typedef struct sal_log_revision {
+	u8 minor;		/* BCD (0..99) */
+	u8 major;		/* BCD (0..99) */
+} sal_log_revision_t;
+
+/* Definition of timestamp according to SAL spec for logging purposes */
+typedef struct sal_log_timestamp {
+	u8 slh_second;		/* Second (0..59) */
+	u8 slh_minute;		/* Minute (0..59) */
+	u8 slh_hour;		/* Hour (0..23) */
+	u8 slh_reserved;
+	u8 slh_day;		/* Day (1..31) */
+	u8 slh_month;		/* Month (1..12) */
+	u8 slh_year;		/* Year (00..99) */
+	u8 slh_century;		/* Century (19, 20, 21, ...) */
+} sal_log_timestamp_t;
+
+/* Definition of log record  header structures */
+typedef struct sal_log_record_header {
+	u64 id;				/* Unique monotonically increasing ID */
+	sal_log_revision_t revision;	/* Major and Minor revision of header */
+	u8 severity;			/* Error Severity */
+	u8 validation_bits;		/* 0: platform_guid, 1: !timestamp */
+	u32 len;			/* Length of this error log in bytes */
+	sal_log_timestamp_t timestamp;	/* Timestamp */
+	efi_guid_t platform_guid;	/* Unique OEM Platform ID */
+} sal_log_record_header_t;
+
+#define sal_log_severity_recoverable	0
+#define sal_log_severity_fatal		1
+#define sal_log_severity_corrected	2
+
+/*
+ * Error Recovery Info (ERI) bit decode.  From SAL Spec section B.2.2 Table B-3
+ * Error Section Error_Recovery_Info Field Definition.
+ */
+#define ERI_NOT_VALID		0x0	/* Error Recovery Field is not valid */
+#define ERI_NOT_ACCESSIBLE	0x30	/* Resource not accessible */
+#define ERI_CONTAINMENT_WARN	0x22	/* Corrupt data propagated */
+#define ERI_UNCORRECTED_ERROR	0x20	/* Uncorrected error */
+#define ERI_COMPONENT_RESET	0x24	/* Component must be reset */
+#define ERI_CORR_ERROR_LOG	0x21	/* Corrected error, needs logging */
+#define ERI_CORR_ERROR_THRESH	0x29	/* Corrected error threshold exceeded */
+
+/* Definition of log section header structures */
+typedef struct sal_log_sec_header {
+    efi_guid_t guid;			/* Unique Section ID */
+    sal_log_revision_t revision;	/* Major and Minor revision of Section */
+    u8 error_recovery_info;		/* Platform error recovery status */
+    u8 reserved;
+    u32 len;				/* Section length */
+} sal_log_section_hdr_t;
+
+typedef struct sal_log_mod_error_info {
+	struct {
+		u64 check_info              : 1,
+		    requestor_identifier    : 1,
+		    responder_identifier    : 1,
+		    target_identifier       : 1,
+		    precise_ip              : 1,
+		    reserved                : 59;
+	} valid;
+	u64 check_info;
+	u64 requestor_identifier;
+	u64 responder_identifier;
+	u64 target_identifier;
+	u64 precise_ip;
+} sal_log_mod_error_info_t;
+
+typedef struct sal_processor_static_info {
+	struct {
+		u64 minstate        : 1,
+		    br              : 1,
+		    cr              : 1,
+		    ar              : 1,
+		    rr              : 1,
+		    fr              : 1,
+		    reserved        : 58;
+	} valid;
+	pal_min_state_area_t min_state_area;
+	u64 br[8];
+	u64 cr[128];
+	u64 ar[128];
+	u64 rr[8];
+	struct ia64_fpreg __attribute__ ((packed)) fr[128];
+} sal_processor_static_info_t;
+
+struct sal_cpuid_info {
+	u64 regs[5];
+	u64 reserved;
+};
+
+typedef struct sal_log_processor_info {
+	sal_log_section_hdr_t header;
+	struct {
+		u64 proc_error_map      : 1,
+		    proc_state_param    : 1,
+		    proc_cr_lid         : 1,
+		    psi_static_struct   : 1,
+		    num_cache_check     : 4,
+		    num_tlb_check       : 4,
+		    num_bus_check       : 4,
+		    num_reg_file_check  : 4,
+		    num_ms_check        : 4,
+		    cpuid_info          : 1,
+		    reserved1           : 39;
+	} valid;
+	u64 proc_error_map;
+	u64 proc_state_parameter;
+	u64 proc_cr_lid;
+	/*
+	 * The rest of this structure consists of variable-length arrays, which can't be
+	 * expressed in C.
+	 */
+	sal_log_mod_error_info_t info[0];
+	/*
+	 * This is what the rest looked like if C supported variable-length arrays:
+	 *
+	 * sal_log_mod_error_info_t cache_check_info[.valid.num_cache_check];
+	 * sal_log_mod_error_info_t tlb_check_info[.valid.num_tlb_check];
+	 * sal_log_mod_error_info_t bus_check_info[.valid.num_bus_check];
+	 * sal_log_mod_error_info_t reg_file_check_info[.valid.num_reg_file_check];
+	 * sal_log_mod_error_info_t ms_check_info[.valid.num_ms_check];
+	 * struct sal_cpuid_info cpuid_info;
+	 * sal_processor_static_info_t processor_static_info;
+	 */
+} sal_log_processor_info_t;
+
+/* Given a sal_log_processor_info_t pointer, return a pointer to the processor_static_info: */
+#define SAL_LPI_PSI_INFO(l)									\
+({	sal_log_processor_info_t *_l = (l);							\
+	((sal_processor_static_info_t *)							\
+	 ((char *) _l->info + ((_l->valid.num_cache_check + _l->valid.num_tlb_check		\
+				+ _l->valid.num_bus_check + _l->valid.num_reg_file_check	\
+				+ _l->valid.num_ms_check) * sizeof(sal_log_mod_error_info_t)	\
+			       + sizeof(struct sal_cpuid_info))));				\
+})
+
+/* platform error log structures */
+
+typedef struct sal_log_mem_dev_err_info {
+	sal_log_section_hdr_t header;
+	struct {
+		u64 error_status    : 1,
+		    physical_addr   : 1,
+		    addr_mask       : 1,
+		    node            : 1,
+		    card            : 1,
+		    module          : 1,
+		    bank            : 1,
+		    device          : 1,
+		    row             : 1,
+		    column          : 1,
+		    bit_position    : 1,
+		    requestor_id    : 1,
+		    responder_id    : 1,
+		    target_id       : 1,
+		    bus_spec_data   : 1,
+		    oem_id          : 1,
+		    oem_data        : 1,
+		    reserved        : 47;
+	} valid;
+	u64 error_status;
+	u64 physical_addr;
+	u64 addr_mask;
+	u16 node;
+	u16 card;
+	u16 module;
+	u16 bank;
+	u16 device;
+	u16 row;
+	u16 column;
+	u16 bit_position;
+	u64 requestor_id;
+	u64 responder_id;
+	u64 target_id;
+	u64 bus_spec_data;
+	u8 oem_id[16];
+	u8 oem_data[1];			/* Variable length data */
+} sal_log_mem_dev_err_info_t;
+
+typedef struct sal_log_sel_dev_err_info {
+	sal_log_section_hdr_t header;
+	struct {
+		u64 record_id       : 1,
+		    record_type     : 1,
+		    generator_id    : 1,
+		    evm_rev         : 1,
+		    sensor_type     : 1,
+		    sensor_num      : 1,
+		    event_dir       : 1,
+		    event_data1     : 1,
+		    event_data2     : 1,
+		    event_data3     : 1,
+		    reserved        : 54;
+	} valid;
+	u16 record_id;
+	u8 record_type;
+	u8 timestamp[4];
+	u16 generator_id;
+	u8 evm_rev;
+	u8 sensor_type;
+	u8 sensor_num;
+	u8 event_dir;
+	u8 event_data1;
+	u8 event_data2;
+	u8 event_data3;
+} sal_log_sel_dev_err_info_t;
+
+typedef struct sal_log_pci_bus_err_info {
+	sal_log_section_hdr_t header;
+	struct {
+		u64 err_status      : 1,
+		    err_type        : 1,
+		    bus_id          : 1,
+		    bus_address     : 1,
+		    bus_data        : 1,
+		    bus_cmd         : 1,
+		    requestor_id    : 1,
+		    responder_id    : 1,
+		    target_id       : 1,
+		    oem_data        : 1,
+		    reserved        : 54;
+	} valid;
+	u64 err_status;
+	u16 err_type;
+	u16 bus_id;
+	u32 reserved;
+	u64 bus_address;
+	u64 bus_data;
+	u64 bus_cmd;
+	u64 requestor_id;
+	u64 responder_id;
+	u64 target_id;
+	u8 oem_data[1];			/* Variable length data */
+} sal_log_pci_bus_err_info_t;
+
+typedef struct sal_log_smbios_dev_err_info {
+	sal_log_section_hdr_t header;
+	struct {
+		u64 event_type      : 1,
+		    length          : 1,
+		    time_stamp      : 1,
+		    data            : 1,
+		    reserved1       : 60;
+	} valid;
+	u8 event_type;
+	u8 length;
+	u8 time_stamp[6];
+	u8 data[1];			/* data of variable length, length == slsmb_length */
+} sal_log_smbios_dev_err_info_t;
+
+typedef struct sal_log_pci_comp_err_info {
+	sal_log_section_hdr_t header;
+	struct {
+		u64 err_status      : 1,
+		    comp_info       : 1,
+		    num_mem_regs    : 1,
+		    num_io_regs     : 1,
+		    reg_data_pairs  : 1,
+		    oem_data        : 1,
+		    reserved        : 58;
+	} valid;
+	u64 err_status;
+	struct {
+		u16 vendor_id;
+		u16 device_id;
+		u8 class_code[3];
+		u8 func_num;
+		u8 dev_num;
+		u8 bus_num;
+		u8 seg_num;
+		u8 reserved[5];
+	} comp_info;
+	u32 num_mem_regs;
+	u32 num_io_regs;
+	u64 reg_data_pairs[1];
+	/*
+	 * array of address/data register pairs is num_mem_regs + num_io_regs elements
+	 * long.  Each array element consists of a u64 address followed by a u64 data
+	 * value.  The oem_data array immediately follows the reg_data_pairs array
+	 */
+	u8 oem_data[1];			/* Variable length data */
+} sal_log_pci_comp_err_info_t;
+
+typedef struct sal_log_plat_specific_err_info {
+	sal_log_section_hdr_t header;
+	struct {
+		u64 err_status      : 1,
+		    guid            : 1,
+		    oem_data        : 1,
+		    reserved        : 61;
+	} valid;
+	u64 err_status;
+	efi_guid_t guid;
+	u8 oem_data[1];			/* platform specific variable length data */
+} sal_log_plat_specific_err_info_t;
+
+typedef struct sal_log_host_ctlr_err_info {
+	sal_log_section_hdr_t header;
+	struct {
+		u64 err_status      : 1,
+		    requestor_id    : 1,
+		    responder_id    : 1,
+		    target_id       : 1,
+		    bus_spec_data   : 1,
+		    oem_data        : 1,
+		    reserved        : 58;
+	} valid;
+	u64 err_status;
+	u64 requestor_id;
+	u64 responder_id;
+	u64 target_id;
+	u64 bus_spec_data;
+	u8 oem_data[1];			/* Variable length OEM data */
+} sal_log_host_ctlr_err_info_t;
+
+typedef struct sal_log_plat_bus_err_info {
+	sal_log_section_hdr_t header;
+	struct {
+		u64 err_status      : 1,
+		    requestor_id    : 1,
+		    responder_id    : 1,
+		    target_id       : 1,
+		    bus_spec_data   : 1,
+		    oem_data        : 1,
+		    reserved        : 58;
+	} valid;
+	u64 err_status;
+	u64 requestor_id;
+	u64 responder_id;
+	u64 target_id;
+	u64 bus_spec_data;
+	u8 oem_data[1];			/* Variable length OEM data */
+} sal_log_plat_bus_err_info_t;
+
+/* Overall platform error section structure */
+typedef union sal_log_platform_err_info {
+	sal_log_mem_dev_err_info_t mem_dev_err;
+	sal_log_sel_dev_err_info_t sel_dev_err;
+	sal_log_pci_bus_err_info_t pci_bus_err;
+	sal_log_smbios_dev_err_info_t smbios_dev_err;
+	sal_log_pci_comp_err_info_t pci_comp_err;
+	sal_log_plat_specific_err_info_t plat_specific_err;
+	sal_log_host_ctlr_err_info_t host_ctlr_err;
+	sal_log_plat_bus_err_info_t plat_bus_err;
+} sal_log_platform_err_info_t;
+
+/* SAL log over-all, multi-section error record structure (processor+platform) */
+typedef struct err_rec {
+	sal_log_record_header_t sal_elog_header;
+	sal_log_processor_info_t proc_err;
+	sal_log_platform_err_info_t plat_err;
+	u8 oem_data_pad[1024];
+} ia64_err_rec_t;
+
+/*
+ * Now define a couple of inline functions for improved type checking
+ * and convenience.
+ */
+
+extern s64 ia64_sal_cache_flush (u64 cache_type);
+extern void __init check_sal_cache_flush (void);
+
+/* Initialize all the processor and platform level instruction and data caches */
+static inline s64
+ia64_sal_cache_init (void)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL(isrv, SAL_CACHE_INIT, 0, 0, 0, 0, 0, 0, 0);
+	return isrv.status;
+}
+
+/*
+ * Clear the processor and platform information logged by SAL with respect to the machine
+ * state at the time of MCA's, INITs, CMCs, or CPEs.
+ */
+static inline s64
+ia64_sal_clear_state_info (u64 sal_info_type)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL_REENTRANT(isrv, SAL_CLEAR_STATE_INFO, sal_info_type, 0,
+	              0, 0, 0, 0, 0);
+	return isrv.status;
+}
+
+
+/* Get the processor and platform information logged by SAL with respect to the machine
+ * state at the time of the MCAs, INITs, CMCs, or CPEs.
+ */
+static inline u64
+ia64_sal_get_state_info (u64 sal_info_type, u64 *sal_info)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL_REENTRANT(isrv, SAL_GET_STATE_INFO, sal_info_type, 0,
+	              sal_info, 0, 0, 0, 0);
+	if (isrv.status)
+		return 0;
+
+	return isrv.v0;
+}
+
+/*
+ * Get the maximum size of the information logged by SAL with respect to the machine state
+ * at the time of MCAs, INITs, CMCs, or CPEs.
+ */
+static inline u64
+ia64_sal_get_state_info_size (u64 sal_info_type)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL_REENTRANT(isrv, SAL_GET_STATE_INFO_SIZE, sal_info_type, 0,
+	              0, 0, 0, 0, 0);
+	if (isrv.status)
+		return 0;
+	return isrv.v0;
+}
+
+/*
+ * Causes the processor to go into a spin loop within SAL where SAL awaits a wakeup from
+ * the monarch processor.  Must not lock, because it will not return on any cpu until the
+ * monarch processor sends a wake up.
+ */
+static inline s64
+ia64_sal_mc_rendez (void)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL_NOLOCK(isrv, SAL_MC_RENDEZ, 0, 0, 0, 0, 0, 0, 0);
+	return isrv.status;
+}
+
+/*
+ * Allow the OS to specify the interrupt number to be used by SAL to interrupt OS during
+ * the machine check rendezvous sequence as well as the mechanism to wake up the
+ * non-monarch processor at the end of machine check processing.
+ * Returns the complete ia64_sal_retval because some calls return more than just a status
+ * value.
+ */
+static inline struct ia64_sal_retval
+ia64_sal_mc_set_params (u64 param_type, u64 i_or_m, u64 i_or_m_val, u64 timeout, u64 rz_always)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL(isrv, SAL_MC_SET_PARAMS, param_type, i_or_m, i_or_m_val,
+		 timeout, rz_always, 0, 0);
+	return isrv;
+}
+
+/* Read from PCI configuration space */
+static inline s64
+ia64_sal_pci_config_read (u64 pci_config_addr, int type, u64 size, u64 *value)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL(isrv, SAL_PCI_CONFIG_READ, pci_config_addr, size, type, 0, 0, 0, 0);
+	if (value)
+		*value = isrv.v0;
+	return isrv.status;
+}
+
+/* Write to PCI configuration space */
+static inline s64
+ia64_sal_pci_config_write (u64 pci_config_addr, int type, u64 size, u64 value)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL(isrv, SAL_PCI_CONFIG_WRITE, pci_config_addr, size, value,
+	         type, 0, 0, 0);
+	return isrv.status;
+}
+
+/*
+ * Register physical addresses of locations needed by SAL when SAL procedures are invoked
+ * in virtual mode.
+ */
+static inline s64
+ia64_sal_register_physical_addr (u64 phys_entry, u64 phys_addr)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL(isrv, SAL_REGISTER_PHYSICAL_ADDR, phys_entry, phys_addr,
+	         0, 0, 0, 0, 0);
+	return isrv.status;
+}
+
+/*
+ * Register software dependent code locations within SAL. These locations are handlers or
+ * entry points where SAL will pass control for the specified event. These event handlers
+ * are for the bott rendezvous, MCAs and INIT scenarios.
+ */
+static inline s64
+ia64_sal_set_vectors (u64 vector_type,
+		      u64 handler_addr1, u64 gp1, u64 handler_len1,
+		      u64 handler_addr2, u64 gp2, u64 handler_len2)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL(isrv, SAL_SET_VECTORS, vector_type,
+			handler_addr1, gp1, handler_len1,
+			handler_addr2, gp2, handler_len2);
+
+	return isrv.status;
+}
+
+/* Update the contents of PAL block in the non-volatile storage device */
+static inline s64
+ia64_sal_update_pal (u64 param_buf, u64 scratch_buf, u64 scratch_buf_size,
+		     u64 *error_code, u64 *scratch_buf_size_needed)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL(isrv, SAL_UPDATE_PAL, param_buf, scratch_buf, scratch_buf_size,
+	         0, 0, 0, 0);
+	if (error_code)
+		*error_code = isrv.v0;
+	if (scratch_buf_size_needed)
+		*scratch_buf_size_needed = isrv.v1;
+	return isrv.status;
+}
+
+/* Get physical processor die mapping in the platform. */
+static inline s64
+ia64_sal_physical_id_info(u16 *splid)
+{
+	struct ia64_sal_retval isrv;
+
+	if (sal_revision < SAL_VERSION_CODE(3,2))
+		return -1;
+
+	SAL_CALL(isrv, SAL_PHYSICAL_ID_INFO, 0, 0, 0, 0, 0, 0, 0);
+	if (splid)
+		*splid = isrv.v0;
+	return isrv.status;
+}
+
+extern unsigned long sal_platform_features;
+
+extern int (*salinfo_platform_oemdata)(const u8 *, u8 **, u64 *);
+
+struct sal_ret_values {
+	long r8; long r9; long r10; long r11;
+};
+
+#define IA64_SAL_OEMFUNC_MIN		0x02000000
+#define IA64_SAL_OEMFUNC_MAX		0x03ffffff
+
+extern int ia64_sal_oemcall(struct ia64_sal_retval *, u64, u64, u64, u64, u64,
+			    u64, u64, u64);
+extern int ia64_sal_oemcall_nolock(struct ia64_sal_retval *, u64, u64, u64,
+				   u64, u64, u64, u64, u64);
+extern int ia64_sal_oemcall_reentrant(struct ia64_sal_retval *, u64, u64, u64,
+				      u64, u64, u64, u64, u64);
+extern long
+ia64_sal_freq_base (unsigned long which, unsigned long *ticks_per_second,
+		    unsigned long *drift_info);
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * System Abstraction Layer Specification
+ * Section 3.2.5.1: OS_BOOT_RENDEZ to SAL return State.
+ * Note: region regs are stored first in head.S _start. Hence they must
+ * stay up front.
+ */
+struct sal_to_os_boot {
+	u64 rr[8];		/* Region Registers */
+	u64 br[6];		/* br0:
+				 * return addr into SAL boot rendez routine */
+	u64 gr1;		/* SAL:GP */
+	u64 gr12;		/* SAL:SP */
+	u64 gr13;		/* SAL: Task Pointer */
+	u64 fpsr;
+	u64 pfs;
+	u64 rnat;
+	u64 unat;
+	u64 bspstore;
+	u64 dcr;		/* Default Control Register */
+	u64 iva;
+	u64 pta;
+	u64 itv;
+	u64 pmv;
+	u64 cmcv;
+	u64 lrr[2];
+	u64 gr[4];
+	u64 pr;			/* Predicate registers */
+	u64 lc;			/* Loop Count */
+	struct ia64_fpreg fp[20];
+};
+
+/*
+ * Global array allocated for NR_CPUS at boot time
+ */
+extern struct sal_to_os_boot sal_boot_rendez_state[NR_CPUS];
+
+extern void ia64_jump_to_sal(struct sal_to_os_boot *);
+#endif
+
+extern void ia64_sal_handler_init(void *entry_point, void *gpval);
+
+#define PALO_MAX_TLB_PURGES	0xFFFF
+#define PALO_SIG	"PALO"
+
+struct palo_table {
+	u8  signature[4];	/* Should be "PALO" */
+	u32 length;
+	u8  minor_revision;
+	u8  major_revision;
+	u8  checksum;
+	u8  reserved1[5];
+	u16 max_tlb_purges;
+	u8  reserved2[6];
+};
+
+#define NPTCG_FROM_PAL			0
+#define NPTCG_FROM_PALO			1
+#define NPTCG_FROM_KERNEL_PARAMETER	2
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_IA64_SAL_H */
diff --git a/arch/ia64/include/asm/scatterlist.h b/arch/ia64/include/asm/scatterlist.h
new file mode 100644
index 00000000..08fd93bf
--- /dev/null
+++ b/arch/ia64/include/asm/scatterlist.h
@@ -0,0 +1,7 @@
+#ifndef _ASM_IA64_SCATTERLIST_H
+#define _ASM_IA64_SCATTERLIST_H
+
+#include <asm-generic/scatterlist.h>
+#define ARCH_HAS_SG_CHAIN
+
+#endif /* _ASM_IA64_SCATTERLIST_H */
diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h
new file mode 100644
index 00000000..1a873b36
--- /dev/null
+++ b/arch/ia64/include/asm/sections.h
@@ -0,0 +1,42 @@
+#ifndef _ASM_IA64_SECTIONS_H
+#define _ASM_IA64_SECTIONS_H
+
+/*
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/elf.h>
+#include <linux/uaccess.h>
+#include <asm-generic/sections.h>
+
+extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[];
+#ifdef	CONFIG_SMP
+extern char __cpu0_per_cpu[];
+#endif
+extern char __start___vtop_patchlist[], __end___vtop_patchlist[];
+extern char __start___rse_patchlist[], __end___rse_patchlist[];
+extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[];
+extern char __start___phys_stack_reg_patchlist[], __end___phys_stack_reg_patchlist[];
+extern char __start_gate_section[];
+extern char __start_gate_mckinley_e9_patchlist[], __end_gate_mckinley_e9_patchlist[];
+extern char __start_gate_vtop_patchlist[], __end_gate_vtop_patchlist[];
+extern char __start_gate_fsyscall_patchlist[], __end_gate_fsyscall_patchlist[];
+extern char __start_gate_brl_fsys_bubble_down_patchlist[], __end_gate_brl_fsys_bubble_down_patchlist[];
+extern char __start_unwind[], __end_unwind[];
+extern char __start_ivt_text[], __end_ivt_text[];
+
+#undef dereference_function_descriptor
+static inline void *dereference_function_descriptor(void *ptr)
+{
+	struct fdesc *desc = ptr;
+	void *p;
+
+	if (!probe_kernel_address(&desc->ip, p))
+		ptr = p;
+	return ptr;
+}
+
+
+#endif /* _ASM_IA64_SECTIONS_H */
+
diff --git a/arch/ia64/include/asm/segment.h b/arch/ia64/include/asm/segment.h
new file mode 100644
index 00000000..b89e2b3d
--- /dev/null
+++ b/arch/ia64/include/asm/segment.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_IA64_SEGMENT_H
+#define _ASM_IA64_SEGMENT_H
+
+/* Only here because we have some old header files that expect it.. */
+
+#endif /* _ASM_IA64_SEGMENT_H */
diff --git a/arch/ia64/include/asm/sembuf.h b/arch/ia64/include/asm/sembuf.h
new file mode 100644
index 00000000..1340fbc0
--- /dev/null
+++ b/arch/ia64/include/asm/sembuf.h
@@ -0,0 +1,22 @@
+#ifndef _ASM_IA64_SEMBUF_H
+#define _ASM_IA64_SEMBUF_H
+
+/*
+ * The semid64_ds structure for IA-64 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 2 miscellaneous 64-bit values
+ */
+
+struct semid64_ds {
+	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
+	__kernel_time_t	sem_otime;		/* last semop time */
+	__kernel_time_t	sem_ctime;		/* last change time */
+	unsigned long	sem_nsems;		/* no. of semaphores in array */
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+};
+
+#endif /* _ASM_IA64_SEMBUF_H */
diff --git a/arch/ia64/include/asm/serial.h b/arch/ia64/include/asm/serial.h
new file mode 100644
index 00000000..068be115
--- /dev/null
+++ b/arch/ia64/include/asm/serial.h
@@ -0,0 +1,17 @@
+/*
+ * Derived from the i386 version.
+ */
+
+/*
+ * This assumes you have a 1.8432 MHz clock for your UART.
+ *
+ * It'd be nice if someone built a serial card with a 24.576 MHz
+ * clock, since the 16550A is capable of handling a top speed of 1.5
+ * megabits/second; but this requires the faster clock.
+ */
+#define BASE_BAUD ( 1843200 / 16 )
+
+/*
+ * All legacy serial ports should be enumerated via ACPI namespace, so
+ * we need not list them here.
+ */
diff --git a/arch/ia64/include/asm/setup.h b/arch/ia64/include/asm/setup.h
new file mode 100644
index 00000000..4399a443
--- /dev/null
+++ b/arch/ia64/include/asm/setup.h
@@ -0,0 +1,6 @@
+#ifndef __IA64_SETUP_H
+#define __IA64_SETUP_H
+
+#define COMMAND_LINE_SIZE	2048
+
+#endif
diff --git a/arch/ia64/include/asm/shmbuf.h b/arch/ia64/include/asm/shmbuf.h
new file mode 100644
index 00000000..585002a7
--- /dev/null
+++ b/arch/ia64/include/asm/shmbuf.h
@@ -0,0 +1,38 @@
+#ifndef _ASM_IA64_SHMBUF_H
+#define _ASM_IA64_SHMBUF_H
+
+/*
+ * The shmid64_ds structure for IA-64 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 2 miscellaneous 64-bit values
+ */
+
+struct shmid64_ds {
+	struct ipc64_perm	shm_perm;	/* operation perms */
+	size_t			shm_segsz;	/* size of segment (bytes) */
+	__kernel_time_t		shm_atime;	/* last attach time */
+	__kernel_time_t		shm_dtime;	/* last detach time */
+	__kernel_time_t		shm_ctime;	/* last change time */
+	__kernel_pid_t		shm_cpid;	/* pid of creator */
+	__kernel_pid_t		shm_lpid;	/* pid of last operator */
+	unsigned long		shm_nattch;	/* no. of current attaches */
+	unsigned long		__unused1;
+	unsigned long		__unused2;
+};
+
+struct shminfo64 {
+	unsigned long	shmmax;
+	unsigned long	shmmin;
+	unsigned long	shmmni;
+	unsigned long	shmseg;
+	unsigned long	shmall;
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+	unsigned long	__unused3;
+	unsigned long	__unused4;
+};
+
+#endif /* _ASM_IA64_SHMBUF_H */
diff --git a/arch/ia64/include/asm/shmparam.h b/arch/ia64/include/asm/shmparam.h
new file mode 100644
index 00000000..d07508dc
--- /dev/null
+++ b/arch/ia64/include/asm/shmparam.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_IA64_SHMPARAM_H
+#define _ASM_IA64_SHMPARAM_H
+
+/*
+ * SHMLBA controls minimum alignment at which shared memory segments
+ * get attached.  The IA-64 architecture says that there may be a
+ * performance degradation when there are virtual aliases within 1MB.
+ * To reduce the chance of this, we set SHMLBA to 1MB. --davidm 00/12/20
+ */
+#define	SHMLBA	(1024*1024)
+
+#endif /* _ASM_IA64_SHMPARAM_H */
diff --git a/arch/ia64/include/asm/sigcontext.h b/arch/ia64/include/asm/sigcontext.h
new file mode 100644
index 00000000..57ff777b
--- /dev/null
+++ b/arch/ia64/include/asm/sigcontext.h
@@ -0,0 +1,70 @@
+#ifndef _ASM_IA64_SIGCONTEXT_H
+#define _ASM_IA64_SIGCONTEXT_H
+
+/*
+ * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999, 2001 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <asm/fpu.h>
+
+#define IA64_SC_FLAG_ONSTACK_BIT		0	/* is handler running on signal stack? */
+#define IA64_SC_FLAG_IN_SYSCALL_BIT		1	/* did signal interrupt a syscall? */
+#define IA64_SC_FLAG_FPH_VALID_BIT		2	/* is state in f[32]-f[127] valid? */
+
+#define IA64_SC_FLAG_ONSTACK		(1 << IA64_SC_FLAG_ONSTACK_BIT)
+#define IA64_SC_FLAG_IN_SYSCALL		(1 << IA64_SC_FLAG_IN_SYSCALL_BIT)
+#define IA64_SC_FLAG_FPH_VALID		(1 << IA64_SC_FLAG_FPH_VALID_BIT)
+
+# ifndef __ASSEMBLY__
+
+/*
+ * Note on handling of register backing store: sc_ar_bsp contains the address that would
+ * be found in ar.bsp after executing a "cover" instruction the context in which the
+ * signal was raised.  If signal delivery required switching to an alternate signal stack
+ * (sc_rbs_base is not NULL), the "dirty" partition (as it would exist after executing the
+ * imaginary "cover" instruction) is backed by the *alternate* signal stack, not the
+ * original one.  In this case, sc_rbs_base contains the base address of the new register
+ * backing store.  The number of registers in the dirty partition can be calculated as:
+ *
+ *   ndirty = ia64_rse_num_regs(sc_rbs_base, sc_rbs_base + (sc_loadrs >> 16))
+ *
+ */
+
+struct sigcontext {
+	unsigned long		sc_flags;	/* see manifest constants above */
+	unsigned long		sc_nat;		/* bit i == 1 iff scratch reg gr[i] is a NaT */
+	stack_t			sc_stack;	/* previously active stack */
+
+	unsigned long		sc_ip;		/* instruction pointer */
+	unsigned long		sc_cfm;		/* current frame marker */
+	unsigned long		sc_um;		/* user mask bits */
+	unsigned long		sc_ar_rsc;	/* register stack configuration register */
+	unsigned long		sc_ar_bsp;	/* backing store pointer */
+	unsigned long		sc_ar_rnat;	/* RSE NaT collection register */
+	unsigned long		sc_ar_ccv;	/* compare and exchange compare value register */
+	unsigned long		sc_ar_unat;	/* ar.unat of interrupted context */
+	unsigned long		sc_ar_fpsr;	/* floating-point status register */
+	unsigned long		sc_ar_pfs;	/* previous function state */
+	unsigned long		sc_ar_lc;	/* loop count register */
+	unsigned long		sc_pr;		/* predicate registers */
+	unsigned long		sc_br[8];	/* branch registers */
+	/* Note: sc_gr[0] is used as the "uc_link" member of ucontext_t */
+	unsigned long		sc_gr[32];	/* general registers (static partition) */
+	struct ia64_fpreg	sc_fr[128];	/* floating-point registers */
+
+	unsigned long		sc_rbs_base;	/* NULL or new base of sighandler's rbs */
+	unsigned long		sc_loadrs;	/* see description above */
+
+	unsigned long		sc_ar25;	/* cmp8xchg16 uses this */
+	unsigned long		sc_ar26;	/* rsvd for scratch use */
+	unsigned long		sc_rsvd[12];	/* reserved for future use */
+	/*
+	 * The mask must come last so we can increase _NSIG_WORDS
+	 * without breaking binary compatibility.
+	 */
+	sigset_t		sc_mask;	/* signal mask to restore after handler returns */
+};
+
+# endif /* __ASSEMBLY__ */
+#endif /* _ASM_IA64_SIGCONTEXT_H */
diff --git a/arch/ia64/include/asm/siginfo.h b/arch/ia64/include/asm/siginfo.h
new file mode 100644
index 00000000..c8fcaa2a
--- /dev/null
+++ b/arch/ia64/include/asm/siginfo.h
@@ -0,0 +1,135 @@
+#ifndef _ASM_IA64_SIGINFO_H
+#define _ASM_IA64_SIGINFO_H
+
+/*
+ * Based on <asm-i386/siginfo.h>.
+ *
+ * Modified 1998-2002
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+
+#define __ARCH_SI_PREAMBLE_SIZE	(4 * sizeof(int))
+
+#define HAVE_ARCH_SIGINFO_T
+#define HAVE_ARCH_COPY_SIGINFO
+#define HAVE_ARCH_COPY_SIGINFO_TO_USER
+
+#include <asm-generic/siginfo.h>
+
+typedef struct siginfo {
+	int si_signo;
+	int si_errno;
+	int si_code;
+	int __pad0;
+
+	union {
+		int _pad[SI_PAD_SIZE];
+
+		/* kill() */
+		struct {
+			pid_t _pid;		/* sender's pid */
+			uid_t _uid;		/* sender's uid */
+		} _kill;
+
+		/* POSIX.1b timers */
+		struct {
+			timer_t _tid;		/* timer id */
+			int _overrun;		/* overrun count */
+			char _pad[sizeof(__ARCH_SI_UID_T) - sizeof(int)];
+			sigval_t _sigval;	/* must overlay ._rt._sigval! */
+			int _sys_private;	/* not to be passed to user */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			pid_t _pid;		/* sender's pid */
+			uid_t _uid;		/* sender's uid */
+			sigval_t _sigval;
+		} _rt;
+
+		/* SIGCHLD */
+		struct {
+			pid_t _pid;		/* which child */
+			uid_t _uid;		/* sender's uid */
+			int _status;		/* exit code */
+			clock_t _utime;
+			clock_t _stime;
+		} _sigchld;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+		struct {
+			void __user *_addr;	/* faulting insn/memory ref. */
+			int _imm;		/* immediate value for "break" */
+			unsigned int _flags;	/* see below */
+			unsigned long _isr;	/* isr */
+			short _addr_lsb;	/* lsb of faulting address */
+		} _sigfault;
+
+		/* SIGPOLL */
+		struct {
+			long _band;	/* POLL_IN, POLL_OUT, POLL_MSG (XPG requires a "long") */
+			int _fd;
+		} _sigpoll;
+	} _sifields;
+} siginfo_t;
+
+#define si_imm		_sifields._sigfault._imm	/* as per UNIX SysV ABI spec */
+#define si_flags	_sifields._sigfault._flags
+/*
+ * si_isr is valid for SIGILL, SIGFPE, SIGSEGV, SIGBUS, and SIGTRAP provided that
+ * si_code is non-zero and __ISR_VALID is set in si_flags.
+ */
+#define si_isr		_sifields._sigfault._isr
+
+/*
+ * Flag values for si_flags:
+ */
+#define __ISR_VALID_BIT	0
+#define __ISR_VALID	(1 << __ISR_VALID_BIT)
+
+/*
+ * SIGILL si_codes
+ */
+#define ILL_BADIADDR	(__SI_FAULT|9)	/* unimplemented instruction address */
+#define __ILL_BREAK	(__SI_FAULT|10)	/* illegal break */
+#define __ILL_BNDMOD	(__SI_FAULT|11)	/* bundle-update (modification) in progress */
+#undef NSIGILL
+#define NSIGILL		11
+
+/*
+ * SIGFPE si_codes
+ */
+#define __FPE_DECOVF	(__SI_FAULT|9)	/* decimal overflow */
+#define __FPE_DECDIV	(__SI_FAULT|10)	/* decimal division by zero */
+#define __FPE_DECERR	(__SI_FAULT|11)	/* packed decimal error */
+#define __FPE_INVASC	(__SI_FAULT|12)	/* invalid ASCII digit */
+#define __FPE_INVDEC	(__SI_FAULT|13)	/* invalid decimal digit */
+#undef NSIGFPE
+#define NSIGFPE		13
+
+/*
+ * SIGSEGV si_codes
+ */
+#define __SEGV_PSTKOVF	(__SI_FAULT|3)	/* paragraph stack overflow */
+#undef NSIGSEGV
+#define NSIGSEGV	3
+
+#undef NSIGTRAP
+#define NSIGTRAP	4
+
+#ifdef __KERNEL__
+#include <linux/string.h>
+
+static inline void
+copy_siginfo (siginfo_t *to, siginfo_t *from)
+{
+	if (from->si_code < 0)
+		memcpy(to, from, sizeof(siginfo_t));
+	else
+		/* _sigchld is currently the largest know union member */
+		memcpy(to, from, 4*sizeof(int) + sizeof(from->_sifields._sigchld));
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_IA64_SIGINFO_H */
diff --git a/arch/ia64/include/asm/signal.h b/arch/ia64/include/asm/signal.h
new file mode 100644
index 00000000..b166248d
--- /dev/null
+++ b/arch/ia64/include/asm/signal.h
@@ -0,0 +1,160 @@
+#ifndef _ASM_IA64_SIGNAL_H
+#define _ASM_IA64_SIGNAL_H
+
+/*
+ * Modified 1998-2001, 2003
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ *
+ * Unfortunately, this file is being included by bits/signal.h in
+ * glibc-2.x.  Hence the #ifdef __KERNEL__ ugliness.
+ */
+
+#define SIGHUP		 1
+#define SIGINT		 2
+#define SIGQUIT		 3
+#define SIGILL		 4
+#define SIGTRAP		 5
+#define SIGABRT		 6
+#define SIGIOT		 6
+#define SIGBUS		 7
+#define SIGFPE		 8
+#define SIGKILL		 9
+#define SIGUSR1		10
+#define SIGSEGV		11
+#define SIGUSR2		12
+#define SIGPIPE		13
+#define SIGALRM		14
+#define SIGTERM		15
+#define SIGSTKFLT	16
+#define SIGCHLD		17
+#define SIGCONT		18
+#define SIGSTOP		19
+#define SIGTSTP		20
+#define SIGTTIN		21
+#define SIGTTOU		22
+#define SIGURG		23
+#define SIGXCPU		24
+#define SIGXFSZ		25
+#define SIGVTALRM	26
+#define SIGPROF		27
+#define SIGWINCH	28
+#define SIGIO		29
+#define SIGPOLL		SIGIO
+/*
+#define SIGLOST		29
+*/
+#define SIGPWR		30
+#define SIGSYS		31
+/* signal 31 is no longer "unused", but the SIGUNUSED macro remains for backwards compatibility */
+#define	SIGUNUSED	31
+
+/* These should not be considered constants from userland.  */
+#define SIGRTMIN	32
+#define SIGRTMAX	_NSIG
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+#define SA_NOCLDSTOP	0x00000001
+#define SA_NOCLDWAIT	0x00000002
+#define SA_SIGINFO	0x00000004
+#define SA_ONSTACK	0x08000000
+#define SA_RESTART	0x10000000
+#define SA_NODEFER	0x40000000
+#define SA_RESETHAND	0x80000000
+
+#define SA_NOMASK	SA_NODEFER
+#define SA_ONESHOT	SA_RESETHAND
+
+#define SA_RESTORER	0x04000000
+
+/*
+ * sigaltstack controls
+ */
+#define SS_ONSTACK	1
+#define SS_DISABLE	2
+
+/*
+ * The minimum stack size needs to be fairly large because we want to
+ * be sure that an app compiled for today's CPUs will continue to run
+ * on all future CPU models.  The CPU model matters because the signal
+ * frame needs to have space for the complete machine state, including
+ * all physical stacked registers.  The number of physical stacked
+ * registers is CPU model dependent, but given that the width of
+ * ar.rsc.loadrs is 14 bits, we can assume that they'll never take up
+ * more than 16KB of space.
+ */
+#if 1
+  /*
+   * This is a stupid typo: the value was _meant_ to be 131072 (0x20000), but I typed it
+   * in wrong. ;-(  To preserve backwards compatibility, we leave the kernel at the
+   * incorrect value and fix libc only.
+   */
+# define MINSIGSTKSZ	131027	/* min. stack size for sigaltstack() */
+#else
+# define MINSIGSTKSZ	131072	/* min. stack size for sigaltstack() */
+#endif
+#define SIGSTKSZ	262144	/* default stack size for sigaltstack() */
+
+#ifdef __KERNEL__
+
+#define _NSIG		64
+#define _NSIG_BPW	64
+#define _NSIG_WORDS	(_NSIG / _NSIG_BPW)
+
+#endif /* __KERNEL__ */
+
+#include <asm-generic/signal-defs.h>
+
+# ifndef __ASSEMBLY__
+
+#  include <linux/types.h>
+
+/* Avoid too many header ordering problems.  */
+struct siginfo;
+
+typedef struct sigaltstack {
+	void __user *ss_sp;
+	int ss_flags;
+	size_t ss_size;
+} stack_t;
+
+#ifdef __KERNEL__
+
+/* Most things should be clean enough to redefine this at will, if care
+   is taken to make libc match.  */
+
+typedef unsigned long old_sigset_t;
+
+typedef struct {
+	unsigned long sig[_NSIG_WORDS];
+} sigset_t;
+
+struct sigaction {
+	__sighandler_t sa_handler;
+	unsigned long sa_flags;
+	sigset_t sa_mask;		/* mask last for extensibility */
+};
+
+struct k_sigaction {
+	struct sigaction sa;
+};
+
+#  include <asm/sigcontext.h>
+
+#define ptrace_signal_deliver(regs, cookie) do { } while (0)
+
+#endif /* __KERNEL__ */
+
+# endif /* !__ASSEMBLY__ */
+#endif /* _ASM_IA64_SIGNAL_H */
diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h
new file mode 100644
index 00000000..0b3b3997
--- /dev/null
+++ b/arch/ia64/include/asm/smp.h
@@ -0,0 +1,137 @@
+/*
+ * SMP Support
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * (c) Copyright 2001-2003, 2005 Hewlett-Packard Development Company, L.P.
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
+ */
+#ifndef _ASM_IA64_SMP_H
+#define _ASM_IA64_SMP_H
+
+#include <linux/init.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+#include <linux/bitops.h>
+#include <linux/irqreturn.h>
+
+#include <asm/io.h>
+#include <asm/param.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+
+static inline unsigned int
+ia64_get_lid (void)
+{
+	union {
+		struct {
+			unsigned long reserved : 16;
+			unsigned long eid : 8;
+			unsigned long id : 8;
+			unsigned long ignored : 32;
+		} f;
+		unsigned long bits;
+	} lid;
+
+	lid.bits = ia64_getreg(_IA64_REG_CR_LID);
+	return lid.f.id << 8 | lid.f.eid;
+}
+
+#define hard_smp_processor_id()		ia64_get_lid()
+
+#ifdef CONFIG_SMP
+
+#define XTP_OFFSET		0x1e0008
+
+#define SMP_IRQ_REDIRECTION	(1 << 0)
+#define SMP_IPI_REDIRECTION	(1 << 1)
+
+#define raw_smp_processor_id() (current_thread_info()->cpu)
+
+extern struct smp_boot_data {
+	int cpu_count;
+	int cpu_phys_id[NR_CPUS];
+} smp_boot_data __initdata;
+
+extern char no_int_routing __devinitdata;
+
+extern cpumask_t cpu_core_map[NR_CPUS];
+DECLARE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map);
+extern int smp_num_siblings;
+extern void __iomem *ipi_base_addr;
+extern unsigned char smp_int_redirect;
+
+extern volatile int ia64_cpu_to_sapicid[];
+#define cpu_physical_id(i)	ia64_cpu_to_sapicid[i]
+
+extern unsigned long ap_wakeup_vector;
+
+/*
+ * Function to map hard smp processor id to logical id.  Slow, so don't use this in
+ * performance-critical code.
+ */
+static inline int
+cpu_logical_id (int cpuid)
+{
+	int i;
+
+	for (i = 0; i < NR_CPUS; ++i)
+		if (cpu_physical_id(i) == cpuid)
+			break;
+	return i;
+}
+
+/*
+ * XTP control functions:
+ *	min_xtp   : route all interrupts to this CPU
+ *	normal_xtp: nominal XTP value
+ *	max_xtp   : never deliver interrupts to this CPU.
+ */
+
+static inline void
+min_xtp (void)
+{
+	if (smp_int_redirect & SMP_IRQ_REDIRECTION)
+		writeb(0x00, ipi_base_addr + XTP_OFFSET); /* XTP to min */
+}
+
+static inline void
+normal_xtp (void)
+{
+	if (smp_int_redirect & SMP_IRQ_REDIRECTION)
+		writeb(0x08, ipi_base_addr + XTP_OFFSET); /* XTP normal */
+}
+
+static inline void
+max_xtp (void)
+{
+	if (smp_int_redirect & SMP_IRQ_REDIRECTION)
+		writeb(0x0f, ipi_base_addr + XTP_OFFSET); /* Set XTP to max */
+}
+
+/* Upping and downing of CPUs */
+extern int __cpu_disable (void);
+extern void __cpu_die (unsigned int cpu);
+extern void cpu_die (void) __attribute__ ((noreturn));
+extern void __init smp_build_cpu_map(void);
+
+extern void __init init_smp_config (void);
+extern void smp_do_timer (struct pt_regs *regs);
+
+extern irqreturn_t handle_IPI(int irq, void *dev_id);
+extern void smp_send_reschedule (int cpu);
+extern void identify_siblings (struct cpuinfo_ia64 *);
+extern int is_multithreading_enabled(void);
+
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+
+#else /* CONFIG_SMP */
+
+#define cpu_logical_id(i)		0
+#define cpu_physical_id(i)		ia64_get_lid()
+
+#endif /* CONFIG_SMP */
+#endif /* _ASM_IA64_SMP_H */
diff --git a/arch/ia64/include/asm/sn/acpi.h b/arch/ia64/include/asm/sn/acpi.h
new file mode 100644
index 00000000..fd480db2
--- /dev/null
+++ b/arch/ia64/include/asm/sn/acpi.h
@@ -0,0 +1,15 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2006 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_ACPI_H
+#define _ASM_IA64_SN_ACPI_H
+
+extern int sn_acpi_rev;
+#define SN_ACPI_BASE_SUPPORT()   (sn_acpi_rev >= 0x20101)
+
+#endif /* _ASM_IA64_SN_ACPI_H */
diff --git a/arch/ia64/include/asm/sn/addrs.h b/arch/ia64/include/asm/sn/addrs.h
new file mode 100644
index 00000000..e715c794
--- /dev/null
+++ b/arch/ia64/include/asm/sn/addrs.h
@@ -0,0 +1,299 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 1992-1999,2001-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_ADDRS_H
+#define _ASM_IA64_SN_ADDRS_H
+
+#include <asm/percpu.h>
+#include <asm/sn/types.h>
+#include <asm/sn/arch.h>
+#include <asm/sn/pda.h>
+
+/*
+ *  Memory/SHUB Address Format:
+ *  +-+---------+--+--------------+
+ *  |0|  NASID  |AS| NodeOffset   |
+ *  +-+---------+--+--------------+
+ *
+ *  NASID: (low NASID bit is 0) Memory and SHUB MMRs
+ *   AS: 2-bit Address Space Identifier. Used only if low NASID bit is 0
+ *     00: Local Resources and MMR space
+ *           Top bit of NodeOffset
+ *               0: Local resources space
+ *                  node id:
+ *                        0: IA64/NT compatibility space
+ *                        2: Local MMR Space
+ *                        4: Local memory, regardless of local node id
+ *               1: Global MMR space
+ *     01: GET space.
+ *     10: AMO space.
+ *     11: Cacheable memory space.
+ *
+ *   NodeOffset: byte offset
+ *
+ *
+ *  TIO address format:
+ *  +-+----------+--+--------------+
+ *  |0|  NASID   |AS| Nodeoffset   |
+ *  +-+----------+--+--------------+
+ *
+ *  NASID: (low NASID bit is 1) TIO
+ *   AS: 2-bit Chiplet Identifier
+ *     00: TIO LB (Indicates TIO MMR access.)
+ *     01: TIO ICE (indicates coretalk space access.)
+ * 
+ *   NodeOffset: top bit must be set.
+ *
+ *
+ * Note that in both of the above address formats, the low
+ * NASID bit indicates if the reference is to the SHUB or TIO MMRs.
+ */
+
+
+/*
+ * Define basic shift & mask constants for manipulating NASIDs and AS values.
+ */
+#define NASID_BITMASK		(sn_hub_info->nasid_bitmask)
+#define NASID_SHIFT		(sn_hub_info->nasid_shift)
+#define AS_SHIFT		(sn_hub_info->as_shift)
+#define AS_BITMASK		0x3UL
+
+#define NASID_MASK              ((u64)NASID_BITMASK << NASID_SHIFT)
+#define AS_MASK			((u64)AS_BITMASK << AS_SHIFT)
+
+
+/*
+ * AS values. These are the same on both SHUB1 & SHUB2.
+ */
+#define AS_GET_VAL		1UL
+#define AS_AMO_VAL		2UL
+#define AS_CAC_VAL		3UL
+#define AS_GET_SPACE		(AS_GET_VAL << AS_SHIFT)
+#define AS_AMO_SPACE		(AS_AMO_VAL << AS_SHIFT)
+#define AS_CAC_SPACE		(AS_CAC_VAL << AS_SHIFT)
+
+
+/* 
+ * Virtual Mode Local & Global MMR space.  
+ */
+#define SH1_LOCAL_MMR_OFFSET	0x8000000000UL
+#define SH2_LOCAL_MMR_OFFSET	0x0200000000UL
+#define LOCAL_MMR_OFFSET	(is_shub2() ? SH2_LOCAL_MMR_OFFSET : SH1_LOCAL_MMR_OFFSET)
+#define LOCAL_MMR_SPACE		(__IA64_UNCACHED_OFFSET | LOCAL_MMR_OFFSET)
+#define LOCAL_PHYS_MMR_SPACE	(RGN_BASE(RGN_HPAGE) | LOCAL_MMR_OFFSET)
+
+#define SH1_GLOBAL_MMR_OFFSET	0x0800000000UL
+#define SH2_GLOBAL_MMR_OFFSET	0x0300000000UL
+#define GLOBAL_MMR_OFFSET	(is_shub2() ? SH2_GLOBAL_MMR_OFFSET : SH1_GLOBAL_MMR_OFFSET)
+#define GLOBAL_MMR_SPACE	(__IA64_UNCACHED_OFFSET | GLOBAL_MMR_OFFSET)
+
+/*
+ * Physical mode addresses
+ */
+#define GLOBAL_PHYS_MMR_SPACE	(RGN_BASE(RGN_HPAGE) | GLOBAL_MMR_OFFSET)
+
+
+/*
+ * Clear region & AS bits.
+ */
+#define TO_PHYS_MASK		(~(RGN_BITS | AS_MASK))
+
+
+/*
+ * Misc NASID manipulation.
+ */
+#define NASID_SPACE(n)		((u64)(n) << NASID_SHIFT)
+#define REMOTE_ADDR(n,a)	(NASID_SPACE(n) | (a))
+#define NODE_OFFSET(x)		((x) & (NODE_ADDRSPACE_SIZE - 1))
+#define NODE_ADDRSPACE_SIZE     (1UL << AS_SHIFT)
+#define NASID_GET(x)		(int) (((u64) (x) >> NASID_SHIFT) & NASID_BITMASK)
+#define LOCAL_MMR_ADDR(a)	(LOCAL_MMR_SPACE | (a))
+#define GLOBAL_MMR_ADDR(n,a)	(GLOBAL_MMR_SPACE | REMOTE_ADDR(n,a))
+#define GLOBAL_MMR_PHYS_ADDR(n,a) (GLOBAL_PHYS_MMR_SPACE | REMOTE_ADDR(n,a))
+#define GLOBAL_CAC_ADDR(n,a)	(CAC_BASE | REMOTE_ADDR(n,a))
+#define CHANGE_NASID(n,x)	((void *)(((u64)(x) & ~NASID_MASK) | NASID_SPACE(n)))
+#define IS_TIO_NASID(n)		((n) & 1)
+
+
+/* non-II mmr's start at top of big window space (4G) */
+#define BWIN_TOP		0x0000000100000000UL
+
+/*
+ * general address defines
+ */
+#define CAC_BASE		(PAGE_OFFSET | AS_CAC_SPACE)
+#define AMO_BASE		(__IA64_UNCACHED_OFFSET | AS_AMO_SPACE)
+#define AMO_PHYS_BASE		(RGN_BASE(RGN_HPAGE) | AS_AMO_SPACE)
+#define GET_BASE		(PAGE_OFFSET | AS_GET_SPACE)
+
+/*
+ * Convert Memory addresses between various addressing modes.
+ */
+#define TO_PHYS(x)		(TO_PHYS_MASK & (x))
+#define TO_CAC(x)		(CAC_BASE     | TO_PHYS(x))
+#ifdef CONFIG_SGI_SN
+#define TO_AMO(x)		(AMO_BASE     | TO_PHYS(x))
+#define TO_GET(x)		(GET_BASE     | TO_PHYS(x))
+#else
+#define TO_AMO(x)		({ BUG(); x; })
+#define TO_GET(x)		({ BUG(); x; })
+#endif
+
+/*
+ * Covert from processor physical address to II/TIO physical address:
+ *	II - squeeze out the AS bits
+ *	TIO- requires a chiplet id in bits 38-39.  For DMA to memory,
+ *           the chiplet id is zero.  If we implement TIO-TIO dma, we might need
+ *           to insert a chiplet id into this macro.  However, it is our belief
+ *           right now that this chiplet id will be ICE, which is also zero.
+ */
+#define SH1_TIO_PHYS_TO_DMA(x) 						\
+	((((u64)(NASID_GET(x))) << 40) | NODE_OFFSET(x))
+
+#define SH2_NETWORK_BANK_OFFSET(x) 					\
+        ((u64)(x) & ((1UL << (sn_hub_info->nasid_shift - 4)) -1))
+
+#define SH2_NETWORK_BANK_SELECT(x) 					\
+        ((((u64)(x) & (0x3UL << (sn_hub_info->nasid_shift - 4)))	\
+        	>> (sn_hub_info->nasid_shift - 4)) << 36)
+
+#define SH2_NETWORK_ADDRESS(x) 						\
+	(SH2_NETWORK_BANK_OFFSET(x) | SH2_NETWORK_BANK_SELECT(x))
+
+#define SH2_TIO_PHYS_TO_DMA(x) 						\
+        (((u64)(NASID_GET(x)) << 40) | 	SH2_NETWORK_ADDRESS(x))
+
+#define PHYS_TO_TIODMA(x)						\
+	(is_shub1() ? SH1_TIO_PHYS_TO_DMA(x) : SH2_TIO_PHYS_TO_DMA(x))
+
+#define PHYS_TO_DMA(x)							\
+	((((u64)(x) & NASID_MASK) >> 2) | NODE_OFFSET(x))
+
+
+/*
+ * Macros to test for address type.
+ */
+#define IS_AMO_ADDRESS(x)	(((u64)(x) & (RGN_BITS | AS_MASK)) == AMO_BASE)
+#define IS_AMO_PHYS_ADDRESS(x)	(((u64)(x) & (RGN_BITS | AS_MASK)) == AMO_PHYS_BASE)
+
+
+/*
+ * The following definitions pertain to the IO special address
+ * space.  They define the location of the big and little windows
+ * of any given node.
+ */
+#define BWIN_SIZE_BITS			29	/* big window size: 512M */
+#define TIO_BWIN_SIZE_BITS		30	/* big window size: 1G */
+#define NODE_SWIN_BASE(n, w)		((w == 0) ? NODE_BWIN_BASE((n), SWIN0_BIGWIN) \
+		: RAW_NODE_SWIN_BASE(n, w))
+#define TIO_SWIN_BASE(n, w) 		(TIO_IO_BASE(n) + \
+					    ((u64) (w) << TIO_SWIN_SIZE_BITS))
+#define NODE_IO_BASE(n)			(GLOBAL_MMR_SPACE | NASID_SPACE(n))
+#define TIO_IO_BASE(n)                  (__IA64_UNCACHED_OFFSET | NASID_SPACE(n))
+#define BWIN_SIZE			(1UL << BWIN_SIZE_BITS)
+#define NODE_BWIN_BASE0(n)		(NODE_IO_BASE(n) + BWIN_SIZE)
+#define NODE_BWIN_BASE(n, w)		(NODE_BWIN_BASE0(n) + ((u64) (w) << BWIN_SIZE_BITS))
+#define RAW_NODE_SWIN_BASE(n, w)	(NODE_IO_BASE(n) + ((u64) (w) << SWIN_SIZE_BITS))
+#define BWIN_WIDGET_MASK		0x7
+#define BWIN_WINDOWNUM(x)		(((x) >> BWIN_SIZE_BITS) & BWIN_WIDGET_MASK)
+#define SH1_IS_BIG_WINDOW_ADDR(x)	((x) & BWIN_TOP)
+
+#define TIO_BWIN_WINDOW_SELECT_MASK	0x7
+#define TIO_BWIN_WINDOWNUM(x)		(((x) >> TIO_BWIN_SIZE_BITS) & TIO_BWIN_WINDOW_SELECT_MASK)
+
+#define TIO_HWIN_SHIFT_BITS		33
+#define TIO_HWIN(x)			(NODE_OFFSET(x) >> TIO_HWIN_SHIFT_BITS)
+
+/*
+ * The following definitions pertain to the IO special address
+ * space.  They define the location of the big and little windows
+ * of any given node.
+ */
+
+#define SWIN_SIZE_BITS			24
+#define	SWIN_WIDGET_MASK		0xF
+
+#define TIO_SWIN_SIZE_BITS		28
+#define TIO_SWIN_SIZE			(1UL << TIO_SWIN_SIZE_BITS)
+#define TIO_SWIN_WIDGET_MASK		0x3
+
+/*
+ * Convert smallwindow address to xtalk address.
+ *
+ * 'addr' can be physical or virtual address, but will be converted
+ * to Xtalk address in the range 0 -> SWINZ_SIZEMASK
+ */
+#define	SWIN_WIDGETNUM(x)		(((x)  >> SWIN_SIZE_BITS) & SWIN_WIDGET_MASK)
+#define TIO_SWIN_WIDGETNUM(x)		(((x)  >> TIO_SWIN_SIZE_BITS) & TIO_SWIN_WIDGET_MASK)
+
+
+/*
+ * The following macros produce the correct base virtual address for
+ * the hub registers. The REMOTE_HUB_* macro produce
+ * the address for the specified hub's registers.  The intent is
+ * that the appropriate PI, MD, NI, or II register would be substituted
+ * for x.
+ *
+ *   WARNING:
+ *	When certain Hub chip workaround are defined, it's not sufficient
+ *	to dereference the *_HUB_ADDR() macros.  You should instead use
+ *	HUB_L() and HUB_S() if you must deal with pointers to hub registers.
+ *	Otherwise, the recommended approach is to use *_HUB_L() and *_HUB_S().
+ *	They're always safe.
+ */
+/* Shub1 TIO & MMR addressing macros */
+#define SH1_TIO_IOSPACE_ADDR(n,x)					\
+	GLOBAL_MMR_ADDR(n,x)
+
+#define SH1_REMOTE_BWIN_MMR(n,x)					\
+	GLOBAL_MMR_ADDR(n,x)
+
+#define SH1_REMOTE_SWIN_MMR(n,x)					\
+	(NODE_SWIN_BASE(n,1) + 0x800000UL + (x))
+
+#define SH1_REMOTE_MMR(n,x)						\
+	(SH1_IS_BIG_WINDOW_ADDR(x) ? SH1_REMOTE_BWIN_MMR(n,x) :		\
+	 	SH1_REMOTE_SWIN_MMR(n,x))
+
+/* Shub1 TIO & MMR addressing macros */
+#define SH2_TIO_IOSPACE_ADDR(n,x)					\
+	((__IA64_UNCACHED_OFFSET | REMOTE_ADDR(n,x) | 1UL << (NASID_SHIFT - 2)))
+
+#define SH2_REMOTE_MMR(n,x)						\
+	GLOBAL_MMR_ADDR(n,x)
+
+
+/* TIO & MMR addressing macros that work on both shub1 & shub2 */
+#define TIO_IOSPACE_ADDR(n,x)						\
+	((u64 *)(is_shub1() ? SH1_TIO_IOSPACE_ADDR(n,x) :		\
+		 SH2_TIO_IOSPACE_ADDR(n,x)))
+
+#define SH_REMOTE_MMR(n,x)						\
+	(is_shub1() ? SH1_REMOTE_MMR(n,x) : SH2_REMOTE_MMR(n,x))
+
+#define REMOTE_HUB_ADDR(n,x)						\
+	(IS_TIO_NASID(n) ?  ((volatile u64*)TIO_IOSPACE_ADDR(n,x)) :	\
+	 ((volatile u64*)SH_REMOTE_MMR(n,x)))
+
+
+#define HUB_L(x)			(*((volatile typeof(*x) *)x))
+#define	HUB_S(x,d)			(*((volatile typeof(*x) *)x) = (d))
+
+#define REMOTE_HUB_L(n, a)		HUB_L(REMOTE_HUB_ADDR((n), (a)))
+#define REMOTE_HUB_S(n, a, d)		HUB_S(REMOTE_HUB_ADDR((n), (a)), (d))
+
+/*
+ * Coretalk address breakdown
+ */
+#define CTALK_NASID_SHFT		40
+#define CTALK_NASID_MASK		(0x3FFFULL << CTALK_NASID_SHFT)
+#define CTALK_CID_SHFT			38
+#define CTALK_CID_MASK			(0x3ULL << CTALK_CID_SHFT)
+#define CTALK_NODE_OFFSET		0x3FFFFFFFFF
+
+#endif /* _ASM_IA64_SN_ADDRS_H */
diff --git a/arch/ia64/include/asm/sn/arch.h b/arch/ia64/include/asm/sn/arch.h
new file mode 100644
index 00000000..7caa1f44
--- /dev/null
+++ b/arch/ia64/include/asm/sn/arch.h
@@ -0,0 +1,86 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI specific setup.
+ *
+ * Copyright (C) 1995-1997,1999,2001-2005 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (C) 1999 Ralf Baechle (ralf@gnu.org)
+ */
+#ifndef _ASM_IA64_SN_ARCH_H
+#define _ASM_IA64_SN_ARCH_H
+
+#include <linux/numa.h>
+#include <asm/types.h>
+#include <asm/percpu.h>
+#include <asm/sn/types.h>
+#include <asm/sn/sn_cpuid.h>
+
+/*
+ * This is the maximum number of NUMALINK nodes that can be part of a single
+ * SSI kernel. This number includes C-brick, M-bricks, and TIOs. Nodes in
+ * remote partitions are NOT included in this number.
+ * The number of compact nodes cannot exceed size of a coherency domain.
+ * The purpose of this define is to specify a node count that includes
+ * all C/M/TIO nodes in an SSI system.
+ *
+ * SGI system can currently support up to 256 C/M nodes plus additional TIO nodes.
+ *
+ * 	Note: ACPI20 has an architectural limit of 256 nodes. When we upgrade
+ * 	to ACPI3.0, this limit will be removed. The notion of "compact nodes"
+ * 	should be deleted and TIOs should be included in MAX_NUMNODES.
+ */
+#define MAX_TIO_NODES		MAX_NUMNODES
+#define MAX_COMPACT_NODES	(MAX_NUMNODES + MAX_TIO_NODES)
+
+/*
+ * Maximum number of nodes in all partitions and in all coherency domains.
+ * This is the total number of nodes accessible in the numalink fabric. It
+ * includes all C & M bricks, plus all TIOs.
+ *
+ * This value is also the value of the maximum number of NASIDs in the numalink
+ * fabric.
+ */
+#define MAX_NUMALINK_NODES	16384
+
+/*
+ * The following defines attributes of the HUB chip. These attributes are
+ * frequently referenced. They are kept in the per-cpu data areas of each cpu.
+ * They are kept together in a struct to minimize cache misses.
+ */
+struct sn_hub_info_s {
+	u8 shub2;
+	u8 nasid_shift;
+	u8 as_shift;
+	u8 shub_1_1_found;
+	u16 nasid_bitmask;
+};
+DECLARE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
+#define sn_hub_info 	(&__get_cpu_var(__sn_hub_info))
+#define is_shub2()	(sn_hub_info->shub2)
+#define is_shub1()	(sn_hub_info->shub2 == 0)
+
+/*
+ * Use this macro to test if shub 1.1 wars should be enabled
+ */
+#define enable_shub_wars_1_1()	(sn_hub_info->shub_1_1_found)
+
+
+/*
+ * Compact node ID to nasid mappings kept in the per-cpu data areas of each
+ * cpu.
+ */
+DECLARE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]);
+#define sn_cnodeid_to_nasid	(&__get_cpu_var(__sn_cnodeid_to_nasid[0]))
+
+
+extern u8 sn_partition_id;
+extern u8 sn_system_size;
+extern u8 sn_sharing_domain_size;
+extern u8 sn_region_size;
+
+extern void sn_flush_all_caches(long addr, long bytes);
+extern bool sn_cpu_disable_allowed(int cpu);
+
+#endif /* _ASM_IA64_SN_ARCH_H */
diff --git a/arch/ia64/include/asm/sn/bte.h b/arch/ia64/include/asm/sn/bte.h
new file mode 100644
index 00000000..cc6c4dbf
--- /dev/null
+++ b/arch/ia64/include/asm/sn/bte.h
@@ -0,0 +1,234 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000-2007 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+
+#ifndef _ASM_IA64_SN_BTE_H
+#define _ASM_IA64_SN_BTE_H
+
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <asm/sn/pda.h>
+#include <asm/sn/types.h>
+#include <asm/sn/shub_mmr.h>
+
+#define IBCT_NOTIFY             (0x1UL << 4)
+#define IBCT_ZFIL_MODE          (0x1UL << 0)
+
+/* #define BTE_DEBUG */
+/* #define BTE_DEBUG_VERBOSE */
+
+#ifdef BTE_DEBUG
+#  define BTE_PRINTK(x) printk x	/* Terse */
+#  ifdef BTE_DEBUG_VERBOSE
+#    define BTE_PRINTKV(x) printk x	/* Verbose */
+#  else
+#    define BTE_PRINTKV(x)
+#  endif /* BTE_DEBUG_VERBOSE */
+#else
+#  define BTE_PRINTK(x)
+#  define BTE_PRINTKV(x)
+#endif	/* BTE_DEBUG */
+
+
+/* BTE status register only supports 16 bits for length field */
+#define BTE_LEN_BITS (16)
+#define BTE_LEN_MASK ((1 << BTE_LEN_BITS) - 1)
+#define BTE_MAX_XFER (BTE_LEN_MASK << L1_CACHE_SHIFT)
+
+
+/* Define hardware */
+#define BTES_PER_NODE (is_shub2() ? 4 : 2)
+#define MAX_BTES_PER_NODE 4
+
+#define BTE2OFF_CTRL	0
+#define BTE2OFF_SRC	(SH2_BT_ENG_SRC_ADDR_0 - SH2_BT_ENG_CSR_0)
+#define BTE2OFF_DEST	(SH2_BT_ENG_DEST_ADDR_0 - SH2_BT_ENG_CSR_0)
+#define BTE2OFF_NOTIFY	(SH2_BT_ENG_NOTIF_ADDR_0 - SH2_BT_ENG_CSR_0)
+
+#define BTE_BASE_ADDR(interface) 				\
+    (is_shub2() ? (interface == 0) ? SH2_BT_ENG_CSR_0 :		\
+		  (interface == 1) ? SH2_BT_ENG_CSR_1 :		\
+		  (interface == 2) ? SH2_BT_ENG_CSR_2 :		\
+		  		     SH2_BT_ENG_CSR_3 		\
+		: (interface == 0) ? IIO_IBLS0 : IIO_IBLS1)
+
+#define BTE_SOURCE_ADDR(base)					\
+    (is_shub2() ? base + (BTE2OFF_SRC/8) 			\
+		: base + (BTEOFF_SRC/8))
+
+#define BTE_DEST_ADDR(base)					\
+    (is_shub2() ? base + (BTE2OFF_DEST/8) 			\
+		: base + (BTEOFF_DEST/8))
+
+#define BTE_CTRL_ADDR(base)					\
+    (is_shub2() ? base + (BTE2OFF_CTRL/8) 			\
+		: base + (BTEOFF_CTRL/8))
+
+#define BTE_NOTIF_ADDR(base)					\
+    (is_shub2() ? base + (BTE2OFF_NOTIFY/8) 			\
+		: base + (BTEOFF_NOTIFY/8))
+
+/* Define hardware modes */
+#define BTE_NOTIFY IBCT_NOTIFY
+#define BTE_NORMAL BTE_NOTIFY
+#define BTE_ZERO_FILL (BTE_NOTIFY | IBCT_ZFIL_MODE)
+/* Use a reserved bit to let the caller specify a wait for any BTE */
+#define BTE_WACQUIRE 0x4000
+/* Use the BTE on the node with the destination memory */
+#define BTE_USE_DEST (BTE_WACQUIRE << 1)
+/* Use any available BTE interface on any node for the transfer */
+#define BTE_USE_ANY (BTE_USE_DEST << 1)
+/* macro to force the IBCT0 value valid */
+#define BTE_VALID_MODE(x) ((x) & (IBCT_NOTIFY | IBCT_ZFIL_MODE))
+
+#define BTE_ACTIVE		(IBLS_BUSY | IBLS_ERROR)
+#define BTE_WORD_AVAILABLE	(IBLS_BUSY << 1)
+#define BTE_WORD_BUSY		(~BTE_WORD_AVAILABLE)
+
+/*
+ * Some macros to simplify reading.
+ * Start with macros to locate the BTE control registers.
+ */
+#define BTE_LNSTAT_LOAD(_bte)						\
+			HUB_L(_bte->bte_base_addr)
+#define BTE_LNSTAT_STORE(_bte, _x)					\
+			HUB_S(_bte->bte_base_addr, (_x))
+#define BTE_SRC_STORE(_bte, _x)						\
+({									\
+		u64 __addr = ((_x) & ~AS_MASK);				\
+		if (is_shub2()) 					\
+			__addr = SH2_TIO_PHYS_TO_DMA(__addr);		\
+		HUB_S(_bte->bte_source_addr, __addr);			\
+})
+#define BTE_DEST_STORE(_bte, _x)					\
+({									\
+		u64 __addr = ((_x) & ~AS_MASK);				\
+		if (is_shub2()) 					\
+			__addr = SH2_TIO_PHYS_TO_DMA(__addr);		\
+		HUB_S(_bte->bte_destination_addr, __addr);		\
+})
+#define BTE_CTRL_STORE(_bte, _x)					\
+			HUB_S(_bte->bte_control_addr, (_x))
+#define BTE_NOTIF_STORE(_bte, _x)					\
+({									\
+		u64 __addr = ia64_tpa((_x) & ~AS_MASK);			\
+		if (is_shub2()) 					\
+			__addr = SH2_TIO_PHYS_TO_DMA(__addr);		\
+		HUB_S(_bte->bte_notify_addr, __addr);			\
+})
+
+#define BTE_START_TRANSFER(_bte, _len, _mode)				\
+	is_shub2() ? BTE_CTRL_STORE(_bte, IBLS_BUSY | (_mode << 24) | _len) \
+		: BTE_LNSTAT_STORE(_bte, _len);				\
+		  BTE_CTRL_STORE(_bte, _mode)
+
+/* Possible results from bte_copy and bte_unaligned_copy */
+/* The following error codes map into the BTE hardware codes
+ * IIO_ICRB_ECODE_* (in shubio.h). The hardware uses
+ * an error code of 0 (IIO_ICRB_ECODE_DERR), but we want zero
+ * to mean BTE_SUCCESS, so add one (BTEFAIL_OFFSET) to the error
+ * codes to give the following error codes.
+ */
+#define BTEFAIL_OFFSET	1
+
+typedef enum {
+	BTE_SUCCESS,		/* 0 is success */
+	BTEFAIL_DIR,		/* Directory error due to IIO access*/
+	BTEFAIL_POISON,		/* poison error on IO access (write to poison page) */
+	BTEFAIL_WERR,		/* Write error (ie WINV to a Read only line) */
+	BTEFAIL_ACCESS,		/* access error (protection violation) */
+	BTEFAIL_PWERR,		/* Partial Write Error */
+	BTEFAIL_PRERR,		/* Partial Read Error */
+	BTEFAIL_TOUT,		/* CRB Time out */
+	BTEFAIL_XTERR,		/* Incoming xtalk pkt had error bit */
+	BTEFAIL_NOTAVAIL,	/* BTE not available */
+} bte_result_t;
+
+#define BTEFAIL_SH2_RESP_SHORT	0x1	/* bit 000001 */
+#define BTEFAIL_SH2_RESP_LONG	0x2	/* bit 000010 */
+#define BTEFAIL_SH2_RESP_DSP	0x4	/* bit 000100 */
+#define BTEFAIL_SH2_RESP_ACCESS	0x8	/* bit 001000 */
+#define BTEFAIL_SH2_CRB_TO	0x10	/* bit 010000 */
+#define BTEFAIL_SH2_NACK_LIMIT	0x20	/* bit 100000 */
+#define BTEFAIL_SH2_ALL		0x3F	/* bit 111111 */
+
+#define	BTE_ERR_BITS	0x3FUL
+#define	BTE_ERR_SHIFT	36
+#define BTE_ERR_MASK	(BTE_ERR_BITS << BTE_ERR_SHIFT)
+
+#define BTE_ERROR_RETRY(value)						\
+	(is_shub2() ? (value != BTEFAIL_SH2_CRB_TO)			\
+		: (value != BTEFAIL_TOUT))
+
+/*
+ * On shub1 BTE_ERR_MASK will always be false, so no need for is_shub2()
+ */
+#define BTE_SHUB2_ERROR(_status)					\
+	((_status & BTE_ERR_MASK) 					\
+	   ? (((_status >> BTE_ERR_SHIFT) & BTE_ERR_BITS) | IBLS_ERROR) \
+	   : _status)
+
+#define BTE_GET_ERROR_STATUS(_status)					\
+	(BTE_SHUB2_ERROR(_status) & ~IBLS_ERROR)
+
+#define BTE_VALID_SH2_ERROR(value)					\
+	((value >= BTEFAIL_SH2_RESP_SHORT) && (value <= BTEFAIL_SH2_ALL))
+
+/*
+ * Structure defining a bte.  An instance of this
+ * structure is created in the nodepda for each
+ * bte on that node (as defined by BTES_PER_NODE)
+ * This structure contains everything necessary
+ * to work with a BTE.
+ */
+struct bteinfo_s {
+	volatile u64 notify ____cacheline_aligned;
+	u64 *bte_base_addr ____cacheline_aligned;
+	u64 *bte_source_addr;
+	u64 *bte_destination_addr;
+	u64 *bte_control_addr;
+	u64 *bte_notify_addr;
+	spinlock_t spinlock;
+	cnodeid_t bte_cnode;	/* cnode                            */
+	int bte_error_count;	/* Number of errors encountered     */
+	int bte_num;		/* 0 --> BTE0, 1 --> BTE1           */
+	int cleanup_active;	/* Interface is locked for cleanup  */
+	volatile bte_result_t bh_error;	/* error while processing   */
+	volatile u64 *most_rcnt_na;
+	struct bteinfo_s *btes_to_try[MAX_BTES_PER_NODE];
+};
+
+
+/*
+ * Function prototypes (functions defined in bte.c, used elsewhere)
+ */
+extern bte_result_t bte_copy(u64, u64, u64, u64, void *);
+extern bte_result_t bte_unaligned_copy(u64, u64, u64, u64);
+extern void bte_error_handler(unsigned long);
+
+#define bte_zero(dest, len, mode, notification) \
+	bte_copy(0, dest, len, ((mode) | BTE_ZERO_FILL), notification)
+
+/*
+ * The following is the preferred way of calling bte_unaligned_copy
+ * If the copy is fully cache line aligned, then bte_copy is
+ * used instead.  Since bte_copy is inlined, this saves a call
+ * stack.  NOTE: bte_copy is called synchronously and does block
+ * until the transfer is complete.  In order to get the asynch
+ * version of bte_copy, you must perform this check yourself.
+ */
+#define BTE_UNALIGNED_COPY(src, dest, len, mode)			\
+	(((len & (L1_CACHE_BYTES - 1)) ||				\
+	  (src & (L1_CACHE_BYTES - 1)) ||				\
+	  (dest & (L1_CACHE_BYTES - 1))) ?				\
+	 bte_unaligned_copy(src, dest, len, mode) :			\
+	 bte_copy(src, dest, len, mode, NULL))
+
+
+#endif	/* _ASM_IA64_SN_BTE_H */
diff --git a/arch/ia64/include/asm/sn/clksupport.h b/arch/ia64/include/asm/sn/clksupport.h
new file mode 100644
index 00000000..d340c365
--- /dev/null
+++ b/arch/ia64/include/asm/sn/clksupport.h
@@ -0,0 +1,28 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+/*
+ * This file contains definitions for accessing a platform supported high resolution
+ * clock. The clock is monitonically increasing and can be accessed from any node
+ * in the system. The clock is synchronized across nodes - all nodes see the
+ * same value.
+ * 
+ *	RTC_COUNTER_ADDR - contains the address of the counter 
+ *
+ */
+
+#ifndef _ASM_IA64_SN_CLKSUPPORT_H
+#define _ASM_IA64_SN_CLKSUPPORT_H
+
+extern unsigned long sn_rtc_cycles_per_second;
+
+#define RTC_COUNTER_ADDR	((long *)LOCAL_MMR_ADDR(SH_RTC))
+
+#define rtc_time()		(*RTC_COUNTER_ADDR)
+
+#endif /* _ASM_IA64_SN_CLKSUPPORT_H */
diff --git a/arch/ia64/include/asm/sn/geo.h b/arch/ia64/include/asm/sn/geo.h
new file mode 100644
index 00000000..f083c943
--- /dev/null
+++ b/arch/ia64/include/asm/sn/geo.h
@@ -0,0 +1,132 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_GEO_H
+#define _ASM_IA64_SN_GEO_H
+
+/* The geoid_t implementation below is based loosely on the pcfg_t
+   implementation in sys/SN/promcfg.h. */
+
+/* Type declaractions */
+
+/* Size of a geoid_t structure (must be before decl. of geoid_u) */
+#define GEOID_SIZE	8	/* Would 16 be better?  The size can
+				   be different on different platforms. */
+
+#define MAX_SLOTS	0xf	/* slots per module */
+#define MAX_SLABS	0xf	/* slabs per slot */
+
+typedef unsigned char	geo_type_t;
+
+/* Fields common to all substructures */
+typedef struct geo_common_s {
+    moduleid_t	module;		/* The module (box) this h/w lives in */
+    geo_type_t	type;		/* What type of h/w is named by this geoid_t */
+    slabid_t	slab:4;		/* slab (ASIC), 0 .. 15 within slot */
+    slotid_t	slot:4;		/* slot (Blade), 0 .. 15 within module */
+} geo_common_t;
+
+/* Additional fields for particular types of hardware */
+typedef struct geo_node_s {
+    geo_common_t	common;		/* No additional fields needed */
+} geo_node_t;
+
+typedef struct geo_rtr_s {
+    geo_common_t	common;		/* No additional fields needed */
+} geo_rtr_t;
+
+typedef struct geo_iocntl_s {
+    geo_common_t	common;		/* No additional fields needed */
+} geo_iocntl_t;
+
+typedef struct geo_pcicard_s {
+    geo_iocntl_t	common;
+    char		bus;	/* Bus/widget number */
+    char		slot;	/* PCI slot number */
+} geo_pcicard_t;
+
+/* Subcomponents of a node */
+typedef struct geo_cpu_s {
+    geo_node_t	node;
+    char	slice;		/* Which CPU on the node */
+} geo_cpu_t;
+
+typedef struct geo_mem_s {
+    geo_node_t	node;
+    char	membus;		/* The memory bus on the node */
+    char	memslot;	/* The memory slot on the bus */
+} geo_mem_t;
+
+
+typedef union geoid_u {
+    geo_common_t	common;
+    geo_node_t		node;
+    geo_iocntl_t	iocntl;
+    geo_pcicard_t	pcicard;
+    geo_rtr_t		rtr;
+    geo_cpu_t		cpu;
+    geo_mem_t		mem;
+    char		padsize[GEOID_SIZE];
+} geoid_t;
+
+
+/* Preprocessor macros */
+
+#define GEO_MAX_LEN	48	/* max. formatted length, plus some pad:
+				   module/001c07/slab/5/node/memory/2/slot/4 */
+
+/* Values for geo_type_t */
+#define GEO_TYPE_INVALID	0
+#define GEO_TYPE_MODULE		1
+#define GEO_TYPE_NODE		2
+#define GEO_TYPE_RTR		3
+#define GEO_TYPE_IOCNTL		4
+#define GEO_TYPE_IOCARD		5
+#define GEO_TYPE_CPU		6
+#define GEO_TYPE_MEM		7
+#define GEO_TYPE_MAX		(GEO_TYPE_MEM+1)
+
+/* Parameter for hwcfg_format_geoid_compt() */
+#define GEO_COMPT_MODULE	1
+#define GEO_COMPT_SLAB		2
+#define GEO_COMPT_IOBUS		3
+#define GEO_COMPT_IOSLOT	4
+#define GEO_COMPT_CPU		5
+#define GEO_COMPT_MEMBUS	6
+#define GEO_COMPT_MEMSLOT	7
+
+#define GEO_INVALID_STR		"<invalid>"
+
+#define INVALID_NASID           ((nasid_t)-1)
+#define INVALID_CNODEID         ((cnodeid_t)-1)
+#define INVALID_PNODEID         ((pnodeid_t)-1)
+#define INVALID_SLAB            (slabid_t)-1
+#define INVALID_SLOT            (slotid_t)-1
+#define INVALID_MODULE          ((moduleid_t)-1)
+
+static inline slabid_t geo_slab(geoid_t g)
+{
+	return (g.common.type == GEO_TYPE_INVALID) ?
+		INVALID_SLAB : g.common.slab;
+}
+
+static inline slotid_t geo_slot(geoid_t g)
+{
+	return (g.common.type == GEO_TYPE_INVALID) ?
+		INVALID_SLOT : g.common.slot;
+}
+
+static inline moduleid_t geo_module(geoid_t g)
+{
+	return (g.common.type == GEO_TYPE_INVALID) ?
+		INVALID_MODULE : g.common.module;
+}
+
+extern geoid_t cnodeid_get_geoid(cnodeid_t cnode);
+
+#endif /* _ASM_IA64_SN_GEO_H */
diff --git a/arch/ia64/include/asm/sn/intr.h b/arch/ia64/include/asm/sn/intr.h
new file mode 100644
index 00000000..e0487aa9
--- /dev/null
+++ b/arch/ia64/include/asm/sn/intr.h
@@ -0,0 +1,68 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2006 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_INTR_H
+#define _ASM_IA64_SN_INTR_H
+
+#include <linux/rcupdate.h>
+#include <asm/sn/types.h>
+
+#define SGI_UART_VECTOR		0xe9
+
+/* Reserved IRQs : Note, not to exceed IA64_SN2_FIRST_DEVICE_VECTOR */
+#define SGI_XPC_ACTIVATE	0x30
+#define SGI_II_ERROR		0x31
+#define SGI_XBOW_ERROR		0x32
+#define SGI_PCIASIC_ERROR	0x33
+#define SGI_ACPI_SCI_INT	0x34
+#define SGI_TIOCA_ERROR		0x35
+#define SGI_TIO_ERROR		0x36
+#define SGI_TIOCX_ERROR		0x37
+#define SGI_MMTIMER_VECTOR	0x38
+#define SGI_XPC_NOTIFY		0xe7
+
+#define IA64_SN2_FIRST_DEVICE_VECTOR	0x3c
+#define IA64_SN2_LAST_DEVICE_VECTOR	0xe6
+
+#define SN2_IRQ_RESERVED	0x1
+#define SN2_IRQ_CONNECTED	0x2
+#define SN2_IRQ_SHARED		0x4
+
+// The SN PROM irq struct
+struct sn_irq_info {
+	struct sn_irq_info *irq_next;	/* deprecated DO NOT USE     */
+	short		irq_nasid;	/* Nasid IRQ is assigned to  */
+	int		irq_slice;	/* slice IRQ is assigned to  */
+	int		irq_cpuid;	/* kernel logical cpuid	     */
+	int		irq_irq;	/* the IRQ number */
+	int		irq_int_bit;	/* Bridge interrupt pin */
+					/* <0 means MSI */
+	u64	irq_xtalkaddr;	/* xtalkaddr IRQ is sent to  */
+	int		irq_bridge_type;/* pciio asic type (pciio.h) */
+	void	       *irq_bridge;	/* bridge generating irq     */
+	void	       *irq_pciioinfo;	/* associated pciio_info_t   */
+	int		irq_last_intr;	/* For Shub lb lost intr WAR */
+	int		irq_cookie;	/* unique cookie 	     */
+	int		irq_flags;	/* flags */
+	int		irq_share_cnt;	/* num devices sharing IRQ   */
+	struct list_head	list;	/* list of sn_irq_info structs */
+	struct rcu_head		rcu;	/* rcu callback list */
+};
+
+extern void sn_send_IPI_phys(int, long, int, int);
+extern u64 sn_intr_alloc(nasid_t, int,
+			      struct sn_irq_info *,
+			      int, nasid_t, int);
+extern void sn_intr_free(nasid_t, int, struct sn_irq_info *);
+extern struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *, nasid_t, int);
+extern void sn_set_err_irq_affinity(unsigned int);
+extern struct list_head **sn_irq_lh;
+
+#define CPU_VECTOR_TO_IRQ(cpuid,vector) (vector)
+
+#endif /* _ASM_IA64_SN_INTR_H */
diff --git a/arch/ia64/include/asm/sn/io.h b/arch/ia64/include/asm/sn/io.h
new file mode 100644
index 00000000..41c73a73
--- /dev/null
+++ b/arch/ia64/include/asm/sn/io.h
@@ -0,0 +1,274 @@
+/* 
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_SN_IO_H
+#define _ASM_SN_IO_H
+#include <linux/compiler.h>
+#include <asm/intrinsics.h>
+
+extern void * sn_io_addr(unsigned long port) __attribute_const__; /* Forward definition */
+extern void __sn_mmiowb(void); /* Forward definition */
+
+extern int num_cnodes;
+
+#define __sn_mf_a()   ia64_mfa()
+
+extern void sn_dma_flush(unsigned long);
+
+#define __sn_inb ___sn_inb
+#define __sn_inw ___sn_inw
+#define __sn_inl ___sn_inl
+#define __sn_outb ___sn_outb
+#define __sn_outw ___sn_outw
+#define __sn_outl ___sn_outl
+#define __sn_readb ___sn_readb
+#define __sn_readw ___sn_readw
+#define __sn_readl ___sn_readl
+#define __sn_readq ___sn_readq
+#define __sn_readb_relaxed ___sn_readb_relaxed
+#define __sn_readw_relaxed ___sn_readw_relaxed
+#define __sn_readl_relaxed ___sn_readl_relaxed
+#define __sn_readq_relaxed ___sn_readq_relaxed
+
+/*
+ * Convenience macros for setting/clearing bits using the above accessors
+ */
+
+#define __sn_setq_relaxed(addr, val) \
+	writeq((__sn_readq_relaxed(addr) | (val)), (addr))
+#define __sn_clrq_relaxed(addr, val) \
+	writeq((__sn_readq_relaxed(addr) & ~(val)), (addr))
+
+/*
+ * The following routines are SN Platform specific, called when
+ * a reference is made to inX/outX set macros.  SN Platform
+ * inX set of macros ensures that Posted DMA writes on the
+ * Bridge is flushed.
+ *
+ * The routines should be self explainatory.
+ */
+
+static inline unsigned int
+___sn_inb (unsigned long port)
+{
+	volatile unsigned char *addr;
+	unsigned char ret = -1;
+
+	if ((addr = sn_io_addr(port))) {
+		ret = *addr;
+		__sn_mf_a();
+		sn_dma_flush((unsigned long)addr);
+	}
+	return ret;
+}
+
+static inline unsigned int
+___sn_inw (unsigned long port)
+{
+	volatile unsigned short *addr;
+	unsigned short ret = -1;
+
+	if ((addr = sn_io_addr(port))) {
+		ret = *addr;
+		__sn_mf_a();
+		sn_dma_flush((unsigned long)addr);
+	}
+	return ret;
+}
+
+static inline unsigned int
+___sn_inl (unsigned long port)
+{
+	volatile unsigned int *addr;
+	unsigned int ret = -1;
+
+	if ((addr = sn_io_addr(port))) {
+		ret = *addr;
+		__sn_mf_a();
+		sn_dma_flush((unsigned long)addr);
+	}
+	return ret;
+}
+
+static inline void
+___sn_outb (unsigned char val, unsigned long port)
+{
+	volatile unsigned char *addr;
+
+	if ((addr = sn_io_addr(port))) {
+		*addr = val;
+		__sn_mmiowb();
+	}
+}
+
+static inline void
+___sn_outw (unsigned short val, unsigned long port)
+{
+	volatile unsigned short *addr;
+
+	if ((addr = sn_io_addr(port))) {
+		*addr = val;
+		__sn_mmiowb();
+	}
+}
+
+static inline void
+___sn_outl (unsigned int val, unsigned long port)
+{
+	volatile unsigned int *addr;
+
+	if ((addr = sn_io_addr(port))) {
+		*addr = val;
+		__sn_mmiowb();
+	}
+}
+
+/*
+ * The following routines are SN Platform specific, called when 
+ * a reference is made to readX/writeX set macros.  SN Platform 
+ * readX set of macros ensures that Posted DMA writes on the 
+ * Bridge is flushed.
+ * 
+ * The routines should be self explainatory.
+ */
+
+static inline unsigned char
+___sn_readb (const volatile void __iomem *addr)
+{
+	unsigned char val;
+
+	val = *(volatile unsigned char __force *)addr;
+	__sn_mf_a();
+	sn_dma_flush((unsigned long)addr);
+        return val;
+}
+
+static inline unsigned short
+___sn_readw (const volatile void __iomem *addr)
+{
+	unsigned short val;
+
+	val = *(volatile unsigned short __force *)addr;
+	__sn_mf_a();
+	sn_dma_flush((unsigned long)addr);
+        return val;
+}
+
+static inline unsigned int
+___sn_readl (const volatile void __iomem *addr)
+{
+	unsigned int val;
+
+	val = *(volatile unsigned int __force *)addr;
+	__sn_mf_a();
+	sn_dma_flush((unsigned long)addr);
+        return val;
+}
+
+static inline unsigned long
+___sn_readq (const volatile void __iomem *addr)
+{
+	unsigned long val;
+
+	val = *(volatile unsigned long __force *)addr;
+	__sn_mf_a();
+	sn_dma_flush((unsigned long)addr);
+        return val;
+}
+
+/*
+ * For generic and SN2 kernels, we have a set of fast access
+ * PIO macros.	These macros are provided on SN Platform
+ * because the normal inX and readX macros perform an
+ * additional task of flushing Post DMA request on the Bridge.
+ *
+ * These routines should be self explainatory.
+ */
+
+static inline unsigned int
+sn_inb_fast (unsigned long port)
+{
+	volatile unsigned char *addr = (unsigned char *)port;
+	unsigned char ret;
+
+	ret = *addr;
+	__sn_mf_a();
+	return ret;
+}
+
+static inline unsigned int
+sn_inw_fast (unsigned long port)
+{
+	volatile unsigned short *addr = (unsigned short *)port;
+	unsigned short ret;
+
+	ret = *addr;
+	__sn_mf_a();
+	return ret;
+}
+
+static inline unsigned int
+sn_inl_fast (unsigned long port)
+{
+	volatile unsigned int *addr = (unsigned int *)port;
+	unsigned int ret;
+
+	ret = *addr;
+	__sn_mf_a();
+	return ret;
+}
+
+static inline unsigned char
+___sn_readb_relaxed (const volatile void __iomem *addr)
+{
+	return *(volatile unsigned char __force *)addr;
+}
+
+static inline unsigned short
+___sn_readw_relaxed (const volatile void __iomem *addr)
+{
+	return *(volatile unsigned short __force *)addr;
+}
+
+static inline unsigned int
+___sn_readl_relaxed (const volatile void __iomem *addr)
+{
+	return *(volatile unsigned int __force *) addr;
+}
+
+static inline unsigned long
+___sn_readq_relaxed (const volatile void __iomem *addr)
+{
+	return *(volatile unsigned long __force *) addr;
+}
+
+struct pci_dev;
+
+static inline int
+sn_pci_set_vchan(struct pci_dev *pci_dev, unsigned long *addr, int vchan)
+{
+
+	if (vchan > 1) {
+		return -1;
+	}
+
+	if (!(*addr >> 32))	/* Using a mask here would be cleaner */
+		return 0;	/* but this generates better code */
+
+	if (vchan == 1) {
+		/* Set Bit 57 */
+		*addr |= (1UL << 57);
+	} else {
+		/* Clear Bit 57 */
+		*addr &= ~(1UL << 57);
+	}
+
+	return 0;
+}
+
+#endif	/* _ASM_SN_IO_H */
diff --git a/arch/ia64/include/asm/sn/ioc3.h b/arch/ia64/include/asm/sn/ioc3.h
new file mode 100644
index 00000000..95ed6cc8
--- /dev/null
+++ b/arch/ia64/include/asm/sn/ioc3.h
@@ -0,0 +1,241 @@
+/*
+ * Copyright (C) 2005 Silicon Graphics, Inc.
+ */
+#ifndef IA64_SN_IOC3_H
+#define IA64_SN_IOC3_H
+
+/* serial port register map */
+struct ioc3_serialregs {
+	uint32_t sscr;
+	uint32_t stpir;
+	uint32_t stcir;
+	uint32_t srpir;
+	uint32_t srcir;
+	uint32_t srtr;
+	uint32_t shadow;
+};
+
+/* SUPERIO uart register map */
+struct ioc3_uartregs {
+	char iu_lcr;
+	union {
+		char iir;	/* read only */
+		char fcr;	/* write only */
+	} u3;
+	union {
+		char ier;	/* DLAB == 0 */
+		char dlm;	/* DLAB == 1 */
+	} u2;
+	union {
+		char rbr;	/* read only, DLAB == 0 */
+		char thr;	/* write only, DLAB == 0 */
+		char dll;	/* DLAB == 1 */
+	} u1;
+	char iu_scr;
+	char iu_msr;
+	char iu_lsr;
+	char iu_mcr;
+};
+
+#define iu_rbr u1.rbr
+#define iu_thr u1.thr
+#define iu_dll u1.dll
+#define iu_ier u2.ier
+#define iu_dlm u2.dlm
+#define iu_iir u3.iir
+#define iu_fcr u3.fcr
+
+struct ioc3_sioregs {
+	char fill[0x170];
+	struct ioc3_uartregs uartb;
+	struct ioc3_uartregs uarta;
+};
+
+/* PCI IO/mem space register map */
+struct ioc3 {
+	uint32_t pci_id;
+	uint32_t pci_scr;
+	uint32_t pci_rev;
+	uint32_t pci_lat;
+	uint32_t pci_addr;
+	uint32_t pci_err_addr_l;
+	uint32_t pci_err_addr_h;
+
+	uint32_t sio_ir;
+	/* these registers are read-only for general kernel code. To
+	 * modify them use the functions in ioc3.c
+	 */
+	uint32_t sio_ies;
+	uint32_t sio_iec;
+	uint32_t sio_cr;
+	uint32_t int_out;
+	uint32_t mcr;
+	uint32_t gpcr_s;
+	uint32_t gpcr_c;
+	uint32_t gpdr;
+	uint32_t gppr[9];
+	char fill[0x4c];
+
+	/* serial port registers */
+	uint32_t sbbr_h;
+	uint32_t sbbr_l;
+
+	struct ioc3_serialregs port_a;
+	struct ioc3_serialregs port_b;
+	char fill1[0x1ff10];
+	/* superio registers */
+	struct ioc3_sioregs sregs;
+};
+
+/* These don't exist on the ioc3 serial card... */
+#define eier	fill1[8]
+#define eisr	fill1[4]
+
+#define PCI_LAT			0xc	/* Latency Timer */
+#define PCI_SCR_DROP_MODE_EN	0x00008000 /* drop pios on parity err */
+#define UARTA_BASE		0x178
+#define UARTB_BASE		0x170
+
+
+/* bitmasks for serial RX status byte */
+#define RXSB_OVERRUN		0x01	/* char(s) lost */
+#define RXSB_PAR_ERR		0x02	/* parity error */
+#define RXSB_FRAME_ERR		0x04	/* framing error */
+#define RXSB_BREAK		0x08	/* break character */
+#define RXSB_CTS		0x10	/* state of CTS */
+#define RXSB_DCD		0x20	/* state of DCD */
+#define RXSB_MODEM_VALID	0x40	/* DCD, CTS and OVERRUN are valid */
+#define RXSB_DATA_VALID		0x80	/* FRAME_ERR PAR_ERR & BREAK valid */
+
+/* bitmasks for serial TX control byte */
+#define TXCB_INT_WHEN_DONE	0x20	/* interrupt after this byte is sent */
+#define TXCB_INVALID		0x00	/* byte is invalid */
+#define TXCB_VALID		0x40	/* byte is valid */
+#define TXCB_MCR		0x80	/* data<7:0> to modem cntrl register */
+#define TXCB_DELAY		0xc0	/* delay data<7:0> mSec */
+
+/* bitmasks for SBBR_L */
+#define SBBR_L_SIZE		0x00000001	/* 0 1KB rings, 1 4KB rings */
+
+/* bitmasks for SSCR_<A:B> */
+#define SSCR_RX_THRESHOLD	0x000001ff	/* hiwater mark */
+#define SSCR_TX_TIMER_BUSY	0x00010000	/* TX timer in progress */
+#define SSCR_HFC_EN		0x00020000	/* h/w flow cntrl enabled */
+#define SSCR_RX_RING_DCD	0x00040000	/* postRX record on delta-DCD */
+#define SSCR_RX_RING_CTS	0x00080000	/* postRX record on delta-CTS */
+#define SSCR_HIGH_SPD		0x00100000	/* 4X speed */
+#define SSCR_DIAG		0x00200000	/* bypass clock divider */
+#define SSCR_RX_DRAIN		0x08000000	/* drain RX buffer to memory */
+#define SSCR_DMA_EN		0x10000000	/* enable ring buffer DMA */
+#define SSCR_DMA_PAUSE		0x20000000	/* pause DMA */
+#define SSCR_PAUSE_STATE	0x40000000	/* set when PAUSE takes effect*/
+#define SSCR_RESET		0x80000000	/* reset DMA channels */
+
+/* all producer/comsumer pointers are the same bitfield */
+#define PROD_CONS_PTR_4K	0x00000ff8	/* for 4K buffers */
+#define PROD_CONS_PTR_1K	0x000003f8	/* for 1K buffers */
+#define PROD_CONS_PTR_OFF	3
+
+/* bitmasks for SRCIR_<A:B> */
+#define SRCIR_ARM		0x80000000	/* arm RX timer */
+
+/* bitmasks for SHADOW_<A:B> */
+#define SHADOW_DR		0x00000001	/* data ready */
+#define SHADOW_OE		0x00000002	/* overrun error */
+#define SHADOW_PE		0x00000004	/* parity error */
+#define SHADOW_FE		0x00000008	/* framing error */
+#define SHADOW_BI		0x00000010	/* break interrupt */
+#define SHADOW_THRE		0x00000020	/* transmit holding reg empty */
+#define SHADOW_TEMT		0x00000040	/* transmit shift reg empty */
+#define SHADOW_RFCE		0x00000080	/* char in RX fifo has error */
+#define SHADOW_DCTS		0x00010000	/* delta clear to send */
+#define SHADOW_DDCD		0x00080000	/* delta data carrier detect */
+#define SHADOW_CTS		0x00100000	/* clear to send */
+#define SHADOW_DCD		0x00800000	/* data carrier detect */
+#define SHADOW_DTR		0x01000000	/* data terminal ready */
+#define SHADOW_RTS		0x02000000	/* request to send */
+#define SHADOW_OUT1		0x04000000	/* 16550 OUT1 bit */
+#define SHADOW_OUT2		0x08000000	/* 16550 OUT2 bit */
+#define SHADOW_LOOP		0x10000000	/* loopback enabled */
+
+/* bitmasks for SRTR_<A:B> */
+#define SRTR_CNT		0x00000fff	/* reload value for RX timer */
+#define SRTR_CNT_VAL		0x0fff0000	/* current value of RX timer */
+#define SRTR_CNT_VAL_SHIFT	16
+#define SRTR_HZ			16000		/* SRTR clock frequency */
+
+/* bitmasks for SIO_IR, SIO_IEC and SIO_IES  */
+#define SIO_IR_SA_TX_MT		0x00000001	/* Serial port A TX empty */
+#define SIO_IR_SA_RX_FULL	0x00000002	/* port A RX buf full */
+#define SIO_IR_SA_RX_HIGH	0x00000004	/* port A RX hiwat */
+#define SIO_IR_SA_RX_TIMER	0x00000008	/* port A RX timeout */
+#define SIO_IR_SA_DELTA_DCD	0x00000010	/* port A delta DCD */
+#define SIO_IR_SA_DELTA_CTS	0x00000020	/* port A delta CTS */
+#define SIO_IR_SA_INT		0x00000040	/* port A pass-thru intr */
+#define SIO_IR_SA_TX_EXPLICIT	0x00000080	/* port A explicit TX thru */
+#define SIO_IR_SA_MEMERR	0x00000100	/* port A PCI error */
+#define SIO_IR_SB_TX_MT		0x00000200
+#define SIO_IR_SB_RX_FULL	0x00000400
+#define SIO_IR_SB_RX_HIGH	0x00000800
+#define SIO_IR_SB_RX_TIMER	0x00001000
+#define SIO_IR_SB_DELTA_DCD	0x00002000
+#define SIO_IR_SB_DELTA_CTS	0x00004000
+#define SIO_IR_SB_INT		0x00008000
+#define SIO_IR_SB_TX_EXPLICIT	0x00010000
+#define SIO_IR_SB_MEMERR	0x00020000
+#define SIO_IR_PP_INT		0x00040000	/* P port pass-thru intr */
+#define SIO_IR_PP_INTA		0x00080000	/* PP context A thru */
+#define SIO_IR_PP_INTB		0x00100000	/* PP context B thru */
+#define SIO_IR_PP_MEMERR	0x00200000	/* PP PCI error */
+#define SIO_IR_KBD_INT		0x00400000	/* kbd/mouse intr */
+#define SIO_IR_RT_INT		0x08000000	/* RT output pulse */
+#define SIO_IR_GEN_INT1		0x10000000	/* RT input pulse */
+#define SIO_IR_GEN_INT_SHIFT	28
+
+/* per device interrupt masks */
+#define SIO_IR_SA		(SIO_IR_SA_TX_MT | \
+				 SIO_IR_SA_RX_FULL | \
+				 SIO_IR_SA_RX_HIGH | \
+				 SIO_IR_SA_RX_TIMER | \
+				 SIO_IR_SA_DELTA_DCD | \
+				 SIO_IR_SA_DELTA_CTS | \
+				 SIO_IR_SA_INT | \
+				 SIO_IR_SA_TX_EXPLICIT | \
+				 SIO_IR_SA_MEMERR)
+
+#define SIO_IR_SB		(SIO_IR_SB_TX_MT | \
+				 SIO_IR_SB_RX_FULL | \
+				 SIO_IR_SB_RX_HIGH | \
+				 SIO_IR_SB_RX_TIMER | \
+				 SIO_IR_SB_DELTA_DCD | \
+				 SIO_IR_SB_DELTA_CTS | \
+				 SIO_IR_SB_INT | \
+				 SIO_IR_SB_TX_EXPLICIT | \
+				 SIO_IR_SB_MEMERR)
+
+#define SIO_IR_PP		(SIO_IR_PP_INT | SIO_IR_PP_INTA | \
+				 SIO_IR_PP_INTB | SIO_IR_PP_MEMERR)
+#define SIO_IR_RT		(SIO_IR_RT_INT | SIO_IR_GEN_INT1)
+
+/* bitmasks for SIO_CR */
+#define SIO_CR_CMD_PULSE_SHIFT 15
+#define SIO_CR_SER_A_BASE_SHIFT 1
+#define SIO_CR_SER_B_BASE_SHIFT 8
+#define SIO_CR_ARB_DIAG		0x00380000	/* cur !enet PCI requet (ro) */
+#define SIO_CR_ARB_DIAG_TXA	0x00000000
+#define SIO_CR_ARB_DIAG_RXA	0x00080000
+#define SIO_CR_ARB_DIAG_TXB	0x00100000
+#define SIO_CR_ARB_DIAG_RXB	0x00180000
+#define SIO_CR_ARB_DIAG_PP	0x00200000
+#define SIO_CR_ARB_DIAG_IDLE	0x00400000	/* 0 -> active request (ro) */
+
+/* defs for some of the generic I/O pins */
+#define GPCR_PHY_RESET		0x20	/* pin is output to PHY reset */
+#define GPCR_UARTB_MODESEL	0x40	/* pin is output to port B mode sel */
+#define GPCR_UARTA_MODESEL	0x80	/* pin is output to port A mode sel */
+
+#define GPPR_PHY_RESET_PIN	5	/* GIO pin controlling phy reset */
+#define GPPR_UARTB_MODESEL_PIN	6	/* GIO pin cntrling uartb modeselect */
+#define GPPR_UARTA_MODESEL_PIN	7	/* GIO pin cntrling uarta modeselect */
+
+#endif /* IA64_SN_IOC3_H */
diff --git a/arch/ia64/include/asm/sn/klconfig.h b/arch/ia64/include/asm/sn/klconfig.h
new file mode 100644
index 00000000..bcbf209d
--- /dev/null
+++ b/arch/ia64/include/asm/sn/klconfig.h
@@ -0,0 +1,246 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Derived from IRIX <sys/SN/klconfig.h>.
+ *
+ * Copyright (C) 1992-1997,1999,2001-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (C) 1999 by Ralf Baechle
+ */
+#ifndef _ASM_IA64_SN_KLCONFIG_H
+#define _ASM_IA64_SN_KLCONFIG_H
+
+/*
+ * The KLCONFIG structures store info about the various BOARDs found
+ * during Hardware Discovery. In addition, it stores info about the
+ * components found on the BOARDs.
+ */
+
+typedef s32 klconf_off_t;
+
+
+/* Functions/macros needed to use this structure */
+
+typedef struct kl_config_hdr {
+	char		pad[20];
+	klconf_off_t	ch_board_info;	/* the link list of boards */
+	char		pad0[88];
+} kl_config_hdr_t;
+
+
+#define NODE_OFFSET_TO_LBOARD(nasid,off)        (lboard_t*)(GLOBAL_CAC_ADDR((nasid), (off)))
+
+/*
+ * The KLCONFIG area is organized as a LINKED LIST of BOARDs. A BOARD
+ * can be either 'LOCAL' or 'REMOTE'. LOCAL means it is attached to 
+ * the LOCAL/current NODE. REMOTE means it is attached to a different
+ * node.(TBD - Need a way to treat ROUTER boards.)
+ *
+ * There are 2 different structures to represent these boards -
+ * lboard - Local board, rboard - remote board. These 2 structures
+ * can be arbitrarily mixed in the LINKED LIST of BOARDs. (Refer
+ * Figure below). The first byte of the rboard or lboard structure
+ * is used to find out its type - no unions are used.
+ * If it is a lboard, then the config info of this board will be found
+ * on the local node. (LOCAL NODE BASE + offset value gives pointer to 
+ * the structure.
+ * If it is a rboard, the local structure contains the node number
+ * and the offset of the beginning of the LINKED LIST on the remote node.
+ * The details of the hardware on a remote node can be built locally,
+ * if required, by reading the LINKED LIST on the remote node and 
+ * ignoring all the rboards on that node.
+ *
+ * The local node uses the REMOTE NODE NUMBER + OFFSET to point to the 
+ * First board info on the remote node. The remote node list is 
+ * traversed as the local list, using the REMOTE BASE ADDRESS and not
+ * the local base address and ignoring all rboard values.
+ *
+ * 
+ KLCONFIG
+
+ +------------+      +------------+      +------------+      +------------+
+ |  lboard    |  +-->|   lboard   |  +-->|   rboard   |  +-->|   lboard   |
+ +------------+  |   +------------+  |   +------------+  |   +------------+
+ | board info |  |   | board info |  |   |errinfo,bptr|  |   | board info |
+ +------------+  |   +------------+  |   +------------+  |   +------------+
+ | offset     |--+   |  offset    |--+   |  offset    |--+   |offset=NULL |
+ +------------+      +------------+      +------------+      +------------+
+
+
+ +------------+
+ | board info |
+ +------------+       +--------------------------------+
+ | compt 1    |------>| type, rev, diaginfo, size ...  |  (CPU)
+ +------------+       +--------------------------------+
+ | compt 2    |--+
+ +------------+  |    +--------------------------------+
+ |  ...       |  +--->| type, rev, diaginfo, size ...  |  (MEM_BANK)
+ +------------+       +--------------------------------+
+ | errinfo    |--+
+ +------------+  |    +--------------------------------+
+                 +--->|r/l brd errinfo,compt err flags |
+                      +--------------------------------+
+
+ *
+ * Each BOARD consists of COMPONENTs and the BOARD structure has 
+ * pointers (offsets) to its COMPONENT structure.
+ * The COMPONENT structure has version info, size and speed info, revision,
+ * error info and the NIC info. This structure can accommodate any
+ * BOARD with arbitrary COMPONENT composition.
+ *
+ * The ERRORINFO part of each BOARD has error information
+ * that describes errors about the BOARD itself. It also has flags to
+ * indicate the COMPONENT(s) on the board that have errors. The error 
+ * information specific to the COMPONENT is present in the respective 
+ * COMPONENT structure.
+ *
+ * The ERRORINFO structure is also treated like a COMPONENT, ie. the 
+ * BOARD has pointers(offset) to the ERRORINFO structure. The rboard
+ * structure also has a pointer to the ERRORINFO structure. This is 
+ * the place to store ERRORINFO about a REMOTE NODE, if the HUB on
+ * that NODE is not working or if the REMOTE MEMORY is BAD. In cases where 
+ * only the CPU of the REMOTE NODE is disabled, the ERRORINFO pointer can
+ * be a NODE NUMBER, REMOTE OFFSET combination, pointing to error info 
+ * which is present on the REMOTE NODE.(TBD)
+ * REMOTE ERRINFO can be stored on any of the nearest nodes 
+ * or on all the nearest nodes.(TBD)
+ * Like BOARD structures, REMOTE ERRINFO structures can be built locally
+ * using the rboard errinfo pointer.
+ *
+ * In order to get useful information from this Data organization, a set of
+ * interface routines are provided (TBD). The important thing to remember while
+ * manipulating the structures, is that, the NODE number information should
+ * be used. If the NODE is non-zero (remote) then each offset should
+ * be added to the REMOTE BASE ADDR else it should be added to the LOCAL BASE ADDR. 
+ * This includes offsets for BOARDS, COMPONENTS and ERRORINFO.
+ * 
+ * Note that these structures do not provide much info about connectivity.
+ * That info will be part of HWGRAPH, which is an extension of the cfg_t
+ * data structure. (ref IP27prom/cfg.h) It has to be extended to include
+ * the IO part of the Network(TBD).
+ *
+ * The data structures below define the above concepts.
+ */
+
+
+/*
+ * BOARD classes
+ */
+
+#define KLCLASS_MASK	0xf0   
+#define KLCLASS_NONE	0x00
+#define KLCLASS_NODE	0x10             /* CPU, Memory and HUB board */
+#define KLCLASS_CPU	KLCLASS_NODE	
+#define KLCLASS_IO	0x20             /* BaseIO, 4 ch SCSI, ethernet, FDDI 
+					    and the non-graphics widget boards */
+#define KLCLASS_ROUTER	0x30             /* Router board */
+#define KLCLASS_MIDPLANE 0x40            /* We need to treat this as a board
+                                            so that we can record error info */
+#define KLCLASS_IOBRICK	0x70		/* IP35 iobrick */
+#define KLCLASS_MAX	8		/* Bump this if a new CLASS is added */
+
+#define KLCLASS(_x) ((_x) & KLCLASS_MASK)
+
+
+/*
+ * board types
+ */
+
+#define KLTYPE_MASK	0x0f
+#define KLTYPE(_x)      ((_x) & KLTYPE_MASK)
+
+#define KLTYPE_SNIA	(KLCLASS_CPU | 0x1)
+#define KLTYPE_TIO	(KLCLASS_CPU | 0x2)
+
+#define KLTYPE_ROUTER     (KLCLASS_ROUTER | 0x1)
+#define KLTYPE_META_ROUTER (KLCLASS_ROUTER | 0x3)
+#define KLTYPE_REPEATER_ROUTER (KLCLASS_ROUTER | 0x4)
+
+#define KLTYPE_IOBRICK_XBOW	(KLCLASS_MIDPLANE | 0x2)
+
+#define KLTYPE_IOBRICK		(KLCLASS_IOBRICK | 0x0)
+#define KLTYPE_NBRICK		(KLCLASS_IOBRICK | 0x4)
+#define KLTYPE_PXBRICK		(KLCLASS_IOBRICK | 0x6)
+#define KLTYPE_IXBRICK		(KLCLASS_IOBRICK | 0x7)
+#define KLTYPE_CGBRICK		(KLCLASS_IOBRICK | 0x8)
+#define KLTYPE_OPUSBRICK	(KLCLASS_IOBRICK | 0x9)
+#define KLTYPE_SABRICK          (KLCLASS_IOBRICK | 0xa)
+#define KLTYPE_IABRICK		(KLCLASS_IOBRICK | 0xb)
+#define KLTYPE_PABRICK          (KLCLASS_IOBRICK | 0xc)
+#define KLTYPE_GABRICK		(KLCLASS_IOBRICK | 0xd)
+
+
+/* 
+ * board structures
+ */
+
+#define MAX_COMPTS_PER_BRD 24
+
+typedef struct lboard_s {
+	klconf_off_t 	brd_next_any;     /* Next BOARD */
+	unsigned char 	struct_type;      /* type of structure, local or remote */
+	unsigned char 	brd_type;         /* type+class */
+	unsigned char 	brd_sversion;     /* version of this structure */
+        unsigned char 	brd_brevision;    /* board revision */
+        unsigned char 	brd_promver;      /* board prom version, if any */
+ 	unsigned char 	brd_flags;        /* Enabled, Disabled etc */
+	unsigned char 	brd_slot;         /* slot number */
+	unsigned short	brd_debugsw;      /* Debug switches */
+	geoid_t		brd_geoid;	  /* geo id */
+	partid_t 	brd_partition;    /* Partition number */
+        unsigned short 	brd_diagval;      /* diagnostic value */
+        unsigned short 	brd_diagparm;     /* diagnostic parameter */
+        unsigned char 	brd_inventory;    /* inventory history */
+        unsigned char 	brd_numcompts;    /* Number of components */
+        nic_t         	brd_nic;          /* Number in CAN */
+	nasid_t		brd_nasid;        /* passed parameter */
+	klconf_off_t 	brd_compts[MAX_COMPTS_PER_BRD]; /* pointers to COMPONENTS */
+	klconf_off_t 	brd_errinfo;      /* Board's error information */
+	struct lboard_s *brd_parent;	  /* Logical parent for this brd */
+	char            pad0[4];
+	unsigned char	brd_confidence;	  /* confidence that the board is bad */
+	nasid_t		brd_owner;        /* who owns this board */
+	unsigned char 	brd_nic_flags;    /* To handle 8 more NICs */
+	char		pad1[24];	  /* future expansion */
+	char		brd_name[32];
+	nasid_t		brd_next_same_host; /* host of next brd w/same nasid */
+	klconf_off_t	brd_next_same;    /* Next BOARD with same nasid */
+} lboard_t;
+
+/*
+ * Generic info structure. This stores common info about a 
+ * component.
+ */
+ 
+typedef struct klinfo_s {                  /* Generic info */
+        unsigned char   struct_type;       /* type of this structure */
+        unsigned char   struct_version;    /* version of this structure */
+        unsigned char   flags;            /* Enabled, disabled etc */
+        unsigned char   revision;         /* component revision */
+        unsigned short  diagval;          /* result of diagnostics */
+        unsigned short  diagparm;         /* diagnostic parameter */
+        unsigned char   inventory;        /* previous inventory status */
+        unsigned short  partid;		   /* widget part number */
+	nic_t 		nic;              /* MUst be aligned properly */
+        unsigned char   physid;           /* physical id of component */
+        unsigned int    virtid;           /* virtual id as seen by system */
+	unsigned char	widid;	          /* Widget id - if applicable */
+	nasid_t		nasid;            /* node number - from parent */
+	char		pad1;		  /* pad out structure. */
+	char		pad2;		  /* pad out structure. */
+	void		*data;
+        klconf_off_t	errinfo;          /* component specific errors */
+        unsigned short  pad3;             /* pci fields have moved over to */
+        unsigned short  pad4;             /* klbri_t */
+} klinfo_t ;
+
+
+static inline lboard_t *find_lboard_next(lboard_t * brd)
+{
+	if (brd && brd->brd_next_any)
+		return NODE_OFFSET_TO_LBOARD(NASID_GET(brd), brd->brd_next_any);
+        return NULL;
+}
+
+#endif /* _ASM_IA64_SN_KLCONFIG_H */
diff --git a/arch/ia64/include/asm/sn/l1.h b/arch/ia64/include/asm/sn/l1.h
new file mode 100644
index 00000000..344bf44b
--- /dev/null
+++ b/arch/ia64/include/asm/sn/l1.h
@@ -0,0 +1,51 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992-1997,2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#ifndef _ASM_IA64_SN_L1_H
+#define _ASM_IA64_SN_L1_H
+
+/* brick type response codes */
+#define L1_BRICKTYPE_PX         0x23            /* # */
+#define L1_BRICKTYPE_PE         0x25            /* % */
+#define L1_BRICKTYPE_N_p0       0x26            /* & */
+#define L1_BRICKTYPE_IP45       0x34            /* 4 */
+#define L1_BRICKTYPE_IP41       0x35            /* 5 */
+#define L1_BRICKTYPE_TWISTER    0x36            /* 6 */ /* IP53 & ROUTER */
+#define L1_BRICKTYPE_IX         0x3d            /* = */
+#define L1_BRICKTYPE_IP34       0x61            /* a */
+#define L1_BRICKTYPE_GA		0x62            /* b */
+#define L1_BRICKTYPE_C          0x63            /* c */
+#define L1_BRICKTYPE_OPUS_TIO	0x66		/* f */
+#define L1_BRICKTYPE_I          0x69            /* i */
+#define L1_BRICKTYPE_N          0x6e            /* n */
+#define L1_BRICKTYPE_OPUS       0x6f		/* o */
+#define L1_BRICKTYPE_P          0x70            /* p */
+#define L1_BRICKTYPE_R          0x72            /* r */
+#define L1_BRICKTYPE_CHI_CG     0x76            /* v */
+#define L1_BRICKTYPE_X          0x78            /* x */
+#define L1_BRICKTYPE_X2         0x79            /* y */
+#define L1_BRICKTYPE_SA		0x5e            /* ^ */
+#define L1_BRICKTYPE_PA		0x6a            /* j */
+#define L1_BRICKTYPE_IA		0x6b            /* k */
+#define L1_BRICKTYPE_ATHENA	0x2b            /* + */
+#define L1_BRICKTYPE_DAYTONA	0x7a            /* z */
+#define L1_BRICKTYPE_1932	0x2c		/* . */
+#define L1_BRICKTYPE_191010	0x2e		/* , */
+
+/* board type response codes */
+#define L1_BOARDTYPE_IP69       0x0100          /* CA */
+#define L1_BOARDTYPE_IP63       0x0200          /* CB */
+#define L1_BOARDTYPE_BASEIO     0x0300          /* IB */
+#define L1_BOARDTYPE_PCIE2SLOT  0x0400          /* IC */
+#define L1_BOARDTYPE_PCIX3SLOT  0x0500          /* ID */
+#define L1_BOARDTYPE_PCIXPCIE4SLOT 0x0600       /* IE */
+#define L1_BOARDTYPE_ABACUS     0x0700          /* AB */
+#define L1_BOARDTYPE_DAYTONA    0x0800          /* AD */
+#define L1_BOARDTYPE_INVAL      (-1)            /* invalid brick type */
+
+#endif /* _ASM_IA64_SN_L1_H */
diff --git a/arch/ia64/include/asm/sn/leds.h b/arch/ia64/include/asm/sn/leds.h
new file mode 100644
index 00000000..66cf8c4d
--- /dev/null
+++ b/arch/ia64/include/asm/sn/leds.h
@@ -0,0 +1,33 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_LEDS_H
+#define _ASM_IA64_SN_LEDS_H
+
+#include <asm/sn/addrs.h>
+#include <asm/sn/pda.h>
+#include <asm/sn/shub_mmr.h>
+
+#define LED0		(LOCAL_MMR_ADDR(SH_REAL_JUNK_BUS_LED0))
+#define LED_CPU_SHIFT	16
+
+#define LED_CPU_HEARTBEAT	0x01
+#define LED_CPU_ACTIVITY	0x02
+#define LED_ALWAYS_SET		0x00
+
+/*
+ * Basic macros for flashing the LEDS on an SGI SN.
+ */
+
+static __inline__ void
+set_led_bits(u8 value, u8 mask)
+{
+	pda->led_state = (pda->led_state & ~mask) | (value & mask);
+	*pda->led_address = (short) pda->led_state;
+}
+
+#endif /* _ASM_IA64_SN_LEDS_H */
+
diff --git a/arch/ia64/include/asm/sn/module.h b/arch/ia64/include/asm/sn/module.h
new file mode 100644
index 00000000..734e980e
--- /dev/null
+++ b/arch/ia64/include/asm/sn/module.h
@@ -0,0 +1,127 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_MODULE_H
+#define _ASM_IA64_SN_MODULE_H
+
+/* parameter for format_module_id() */
+#define MODULE_FORMAT_BRIEF	1
+#define MODULE_FORMAT_LONG	2
+#define MODULE_FORMAT_LCD	3
+
+/*
+ *	Module id format
+ *
+ *	31-16	Rack ID (encoded class, group, number - 16-bit unsigned int)
+ *	 15-8	Brick type (8-bit ascii character)
+ *	  7-0	Bay (brick position in rack (0-63) - 8-bit unsigned int)
+ *
+ */
+
+/*
+ * Macros for getting the brick type
+ */
+#define MODULE_BTYPE_MASK	0xff00
+#define MODULE_BTYPE_SHFT	8
+#define MODULE_GET_BTYPE(_m)	(((_m) & MODULE_BTYPE_MASK) >> MODULE_BTYPE_SHFT)
+#define MODULE_BT_TO_CHAR(_b)	((char)(_b))
+#define MODULE_GET_BTCHAR(_m)	(MODULE_BT_TO_CHAR(MODULE_GET_BTYPE(_m)))
+
+/*
+ * Macros for getting the rack ID.
+ */
+#define MODULE_RACK_MASK	0xffff0000
+#define MODULE_RACK_SHFT	16
+#define MODULE_GET_RACK(_m)	(((_m) & MODULE_RACK_MASK) >> MODULE_RACK_SHFT)
+
+/*
+ * Macros for getting the brick position
+ */
+#define MODULE_BPOS_MASK	0x00ff
+#define MODULE_BPOS_SHFT	0
+#define MODULE_GET_BPOS(_m)	(((_m) & MODULE_BPOS_MASK) >> MODULE_BPOS_SHFT)
+
+/*
+ * Macros for encoding and decoding rack IDs
+ * A rack number consists of three parts:
+ *   class (0==CPU/mixed, 1==I/O), group, number
+ *
+ * Rack number is stored just as it is displayed on the screen:
+ * a 3-decimal-digit number.
+ */
+#define RACK_CLASS_DVDR         100
+#define RACK_GROUP_DVDR         10
+#define RACK_NUM_DVDR           1
+
+#define RACK_CREATE_RACKID(_c, _g, _n)  ((_c) * RACK_CLASS_DVDR +       \
+        (_g) * RACK_GROUP_DVDR + (_n) * RACK_NUM_DVDR)
+
+#define RACK_GET_CLASS(_r)              ((_r) / RACK_CLASS_DVDR)
+#define RACK_GET_GROUP(_r)              (((_r) - RACK_GET_CLASS(_r) *   \
+            RACK_CLASS_DVDR) / RACK_GROUP_DVDR)
+#define RACK_GET_NUM(_r)                (((_r) - RACK_GET_CLASS(_r) *   \
+            RACK_CLASS_DVDR - RACK_GET_GROUP(_r) *      \
+            RACK_GROUP_DVDR) / RACK_NUM_DVDR)
+
+/*
+ * Macros for encoding and decoding rack IDs
+ * A rack number consists of three parts:
+ *   class      1 bit, 0==CPU/mixed, 1==I/O
+ *   group      2 bits for CPU/mixed, 3 bits for I/O
+ *   number     3 bits for CPU/mixed, 2 bits for I/O (1 based)
+ */
+#define RACK_GROUP_BITS(_r)     (RACK_GET_CLASS(_r) ? 3 : 2)
+#define RACK_NUM_BITS(_r)       (RACK_GET_CLASS(_r) ? 2 : 3)
+
+#define RACK_CLASS_MASK(_r)     0x20
+#define RACK_CLASS_SHFT(_r)     5
+#define RACK_ADD_CLASS(_r, _c)  \
+        ((_r) |= (_c) << RACK_CLASS_SHFT(_r) & RACK_CLASS_MASK(_r))
+
+#define RACK_GROUP_SHFT(_r)     RACK_NUM_BITS(_r)
+#define RACK_GROUP_MASK(_r)     \
+        ( (((unsigned)1<<RACK_GROUP_BITS(_r)) - 1) << RACK_GROUP_SHFT(_r) )
+#define RACK_ADD_GROUP(_r, _g)  \
+        ((_r) |= (_g) << RACK_GROUP_SHFT(_r) & RACK_GROUP_MASK(_r))
+
+#define RACK_NUM_SHFT(_r)       0
+#define RACK_NUM_MASK(_r)       \
+        ( (((unsigned)1<<RACK_NUM_BITS(_r)) - 1) << RACK_NUM_SHFT(_r) )
+#define RACK_ADD_NUM(_r, _n)    \
+        ((_r) |= ((_n) - 1) << RACK_NUM_SHFT(_r) & RACK_NUM_MASK(_r))
+
+
+/*
+ * Brick type definitions
+ */
+#define MAX_BRICK_TYPES         256 /* brick type is stored as uchar */
+
+extern char brick_types[];
+
+#define MODULE_CBRICK           0
+#define MODULE_RBRICK           1
+#define MODULE_IBRICK           2
+#define MODULE_KBRICK           3
+#define MODULE_XBRICK           4
+#define MODULE_DBRICK           5
+#define MODULE_PBRICK           6
+#define MODULE_NBRICK           7
+#define MODULE_PEBRICK          8
+#define MODULE_PXBRICK          9
+#define MODULE_IXBRICK          10
+#define MODULE_CGBRICK		11
+#define MODULE_OPUSBRICK        12
+#define MODULE_SABRICK		13	/* TIO BringUp Brick */
+#define MODULE_IABRICK		14
+#define MODULE_PABRICK		15
+#define MODULE_GABRICK		16
+#define MODULE_OPUS_TIO		17	/* OPUS TIO Riser */
+
+extern char brick_types[];
+extern void format_module_id(char *, moduleid_t, int);
+
+#endif /* _ASM_IA64_SN_MODULE_H */
diff --git a/arch/ia64/include/asm/sn/mspec.h b/arch/ia64/include/asm/sn/mspec.h
new file mode 100644
index 00000000..c1d3c50c
--- /dev/null
+++ b/arch/ia64/include/asm/sn/mspec.h
@@ -0,0 +1,59 @@
+/*
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2001-2008 Silicon Graphics, Inc.  All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_MSPEC_H
+#define _ASM_IA64_SN_MSPEC_H
+
+#define FETCHOP_VAR_SIZE 64 /* 64 byte per fetchop variable */
+
+#define FETCHOP_LOAD		0
+#define FETCHOP_INCREMENT	8
+#define FETCHOP_DECREMENT	16
+#define FETCHOP_CLEAR		24
+
+#define FETCHOP_STORE		0
+#define FETCHOP_AND		24
+#define FETCHOP_OR		32
+
+#define FETCHOP_CLEAR_CACHE	56
+
+#define FETCHOP_LOAD_OP(addr, op) ( \
+         *(volatile long *)((char*) (addr) + (op)))
+
+#define FETCHOP_STORE_OP(addr, op, x) ( \
+         *(volatile long *)((char*) (addr) + (op)) = (long) (x))
+
+#ifdef __KERNEL__
+
+/*
+ * Each Atomic Memory Operation (amo, formerly known as fetchop)
+ * variable is 64 bytes long.  The first 8 bytes are used.  The
+ * remaining 56 bytes are unaddressable due to the operation taking
+ * that portion of the address.
+ *
+ * NOTE: The amo structure _MUST_ be placed in either the first or second
+ * half of the cache line.  The cache line _MUST NOT_ be used for anything
+ * other than additional amo entries.  This is because there are two
+ * addresses which reference the same physical cache line.  One will
+ * be a cached entry with the memory type bits all set.  This address
+ * may be loaded into processor cache.  The amo will be referenced
+ * uncached via the memory special memory type.  If any portion of the
+ * cached cache-line is modified, when that line is flushed, it will
+ * overwrite the uncached value in physical memory and lead to
+ * inconsistency.
+ */
+struct amo {
+        u64 variable;
+        u64 unused[7];
+};
+
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_IA64_SN_MSPEC_H */
diff --git a/arch/ia64/include/asm/sn/nodepda.h b/arch/ia64/include/asm/sn/nodepda.h
new file mode 100644
index 00000000..ee118b90
--- /dev/null
+++ b/arch/ia64/include/asm/sn/nodepda.h
@@ -0,0 +1,82 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_NODEPDA_H
+#define _ASM_IA64_SN_NODEPDA_H
+
+
+#include <asm/irq.h>
+#include <asm/sn/arch.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/bte.h>
+
+/*
+ * NUMA Node-Specific Data structures are defined in this file.
+ * In particular, this is the location of the node PDA.
+ * A pointer to the right node PDA is saved in each CPU PDA.
+ */
+
+/*
+ * Node-specific data structure.
+ *
+ * One of these structures is allocated on each node of a NUMA system.
+ *
+ * This structure provides a convenient way of keeping together 
+ * all per-node data structures. 
+ */
+struct phys_cpuid {
+	short			nasid;
+	char			subnode;
+	char			slice;
+};
+
+struct nodepda_s {
+	void 		*pdinfo;	/* Platform-dependent per-node info */
+
+	/*
+	 * The BTEs on this node are shared by the local cpus
+	 */
+	struct bteinfo_s	bte_if[MAX_BTES_PER_NODE];	/* Virtual Interface */
+	struct timer_list	bte_recovery_timer;
+	spinlock_t		bte_recovery_lock;
+
+	/* 
+	 * Array of pointers to the nodepdas for each node.
+	 */
+	struct nodepda_s	*pernode_pdaindr[MAX_COMPACT_NODES]; 
+
+	/*
+	 * Array of physical cpu identifiers. Indexed by cpuid.
+	 */
+	struct phys_cpuid	phys_cpuid[NR_CPUS];
+	spinlock_t		ptc_lock ____cacheline_aligned_in_smp;
+};
+
+typedef struct nodepda_s nodepda_t;
+
+/*
+ * Access Functions for node PDA.
+ * Since there is one nodepda for each node, we need a convenient mechanism
+ * to access these nodepdas without cluttering code with #ifdefs.
+ * The next set of definitions provides this.
+ * Routines are expected to use 
+ *
+ *	sn_nodepda   - to access node PDA for the node on which code is running
+ *	NODEPDA(cnodeid)   - to access node PDA for cnodeid
+ */
+
+DECLARE_PER_CPU(struct nodepda_s *, __sn_nodepda);
+#define sn_nodepda		(__get_cpu_var(__sn_nodepda))
+#define	NODEPDA(cnodeid)	(sn_nodepda->pernode_pdaindr[cnodeid])
+
+/*
+ * Check if given a compact node id the corresponding node has all the
+ * cpus disabled. 
+ */
+#define is_headless_node(cnodeid)	(nr_cpus_node(cnodeid) == 0)
+
+#endif /* _ASM_IA64_SN_NODEPDA_H */
diff --git a/arch/ia64/include/asm/sn/pcibr_provider.h b/arch/ia64/include/asm/sn/pcibr_provider.h
new file mode 100644
index 00000000..da205b7c
--- /dev/null
+++ b/arch/ia64/include/asm/sn/pcibr_provider.h
@@ -0,0 +1,150 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992-1997,2000-2006 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H
+#define _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H
+
+#include <asm/sn/intr.h>
+#include <asm/sn/pcibus_provider_defs.h>
+
+/* Workarounds */
+#define PV907516 (1 << 1) /* TIOCP: Don't write the write buffer flush reg */
+
+#define BUSTYPE_MASK                    0x1
+
+/* Macros given a pcibus structure */
+#define IS_PCIX(ps)     ((ps)->pbi_bridge_mode & BUSTYPE_MASK)
+#define IS_PCI_BRIDGE_ASIC(asic) (asic == PCIIO_ASIC_TYPE_PIC || \
+                asic == PCIIO_ASIC_TYPE_TIOCP)
+#define IS_PIC_SOFT(ps)     (ps->pbi_bridge_type == PCIBR_BRIDGETYPE_PIC)
+#define IS_TIOCP_SOFT(ps)   (ps->pbi_bridge_type == PCIBR_BRIDGETYPE_TIOCP)
+
+
+/*
+ * The different PCI Bridge types supported on the SGI Altix platforms
+ */
+#define PCIBR_BRIDGETYPE_UNKNOWN       -1
+#define PCIBR_BRIDGETYPE_PIC            2
+#define PCIBR_BRIDGETYPE_TIOCP          3
+
+/*
+ * Bridge 64bit Direct Map Attributes
+ */
+#define PCI64_ATTR_PREF                 (1ull << 59)
+#define PCI64_ATTR_PREC                 (1ull << 58)
+#define PCI64_ATTR_VIRTUAL              (1ull << 57)
+#define PCI64_ATTR_BAR                  (1ull << 56)
+#define PCI64_ATTR_SWAP                 (1ull << 55)
+#define PCI64_ATTR_VIRTUAL1             (1ull << 54)
+
+#define PCI32_LOCAL_BASE                0
+#define PCI32_MAPPED_BASE               0x40000000
+#define PCI32_DIRECT_BASE               0x80000000
+
+#define IS_PCI32_MAPPED(x)              ((u64)(x) < PCI32_DIRECT_BASE && \
+                                         (u64)(x) >= PCI32_MAPPED_BASE)
+#define IS_PCI32_DIRECT(x)              ((u64)(x) >= PCI32_MAPPED_BASE)
+
+
+/*
+ * Bridge PMU Address Transaltion Entry Attibutes
+ */
+#define PCI32_ATE_V                     (0x1 << 0)
+#define PCI32_ATE_CO                    (0x1 << 1)	/* PIC ASIC ONLY */
+#define PCI32_ATE_PIO                   (0x1 << 1)	/* TIOCP ASIC ONLY */
+#define PCI32_ATE_MSI                   (0x1 << 2)
+#define PCI32_ATE_PREF                  (0x1 << 3)
+#define PCI32_ATE_BAR                   (0x1 << 4)
+#define PCI32_ATE_ADDR_SHFT             12
+
+#define MINIMAL_ATES_REQUIRED(addr, size) \
+	(IOPG(IOPGOFF(addr) + (size) - 1) == IOPG((size) - 1))
+
+#define MINIMAL_ATE_FLAG(addr, size) \
+	(MINIMAL_ATES_REQUIRED((u64)addr, size) ? 1 : 0)
+
+/* bit 29 of the pci address is the SWAP bit */
+#define ATE_SWAPSHIFT                   29
+#define ATE_SWAP_ON(x)                  ((x) |= (1 << ATE_SWAPSHIFT))
+#define ATE_SWAP_OFF(x)                 ((x) &= ~(1 << ATE_SWAPSHIFT))
+
+/*
+ * I/O page size
+ */
+#if PAGE_SIZE < 16384
+#define IOPFNSHIFT                      12      /* 4K per mapped page */
+#else
+#define IOPFNSHIFT                      14      /* 16K per mapped page */
+#endif
+
+#define IOPGSIZE                        (1 << IOPFNSHIFT)
+#define IOPG(x)                         ((x) >> IOPFNSHIFT)
+#define IOPGOFF(x)                      ((x) & (IOPGSIZE-1))
+
+#define PCIBR_DEV_SWAP_DIR              (1ull << 19)
+#define PCIBR_CTRL_PAGE_SIZE            (0x1 << 21)
+
+/*
+ * PMU resources.
+ */
+struct ate_resource{
+	u64 *ate;
+	u64 num_ate;
+	u64 lowest_free_index;
+};
+
+struct pcibus_info {
+	struct pcibus_bussoft	pbi_buscommon;   /* common header */
+	u32                pbi_moduleid;
+	short                   pbi_bridge_type;
+	short                   pbi_bridge_mode;
+
+	struct ate_resource     pbi_int_ate_resource;
+	u64                pbi_int_ate_size;
+
+	u64                pbi_dir_xbase;
+	char                    pbi_hub_xid;
+
+	u64                pbi_devreg[8];
+
+	u32		pbi_valid_devices;
+	u32		pbi_enabled_devices;
+
+	spinlock_t              pbi_lock;
+};
+
+extern int  pcibr_init_provider(void);
+extern void *pcibr_bus_fixup(struct pcibus_bussoft *, struct pci_controller *);
+extern dma_addr_t pcibr_dma_map(struct pci_dev *, unsigned long, size_t, int type);
+extern dma_addr_t pcibr_dma_map_consistent(struct pci_dev *, unsigned long, size_t, int type);
+extern void pcibr_dma_unmap(struct pci_dev *, dma_addr_t, int);
+
+/*
+ * prototypes for the bridge asic register access routines in pcibr_reg.c
+ */
+extern void             pcireg_control_bit_clr(struct pcibus_info *, u64);
+extern void             pcireg_control_bit_set(struct pcibus_info *, u64);
+extern u64         pcireg_tflush_get(struct pcibus_info *);
+extern u64         pcireg_intr_status_get(struct pcibus_info *);
+extern void             pcireg_intr_enable_bit_clr(struct pcibus_info *, u64);
+extern void             pcireg_intr_enable_bit_set(struct pcibus_info *, u64);
+extern void             pcireg_intr_addr_addr_set(struct pcibus_info *, int, u64);
+extern void             pcireg_force_intr_set(struct pcibus_info *, int);
+extern u64         pcireg_wrb_flush_get(struct pcibus_info *, int);
+extern void             pcireg_int_ate_set(struct pcibus_info *, int, u64);
+extern u64 __iomem *	pcireg_int_ate_addr(struct pcibus_info *, int);
+extern void 		pcibr_force_interrupt(struct sn_irq_info *sn_irq_info);
+extern void 		pcibr_change_devices_irq(struct sn_irq_info *sn_irq_info);
+extern int 		pcibr_ate_alloc(struct pcibus_info *, int);
+extern void 		pcibr_ate_free(struct pcibus_info *, int);
+extern void 		ate_write(struct pcibus_info *, int, int, u64);
+extern int sal_pcibr_slot_enable(struct pcibus_info *soft, int device,
+				 void *resp, char **ssdt);
+extern int sal_pcibr_slot_disable(struct pcibus_info *soft, int device,
+				  int action, void *resp);
+extern u16 sn_ioboard_to_pci_bus(struct pci_bus *pci_bus);
+#endif
diff --git a/arch/ia64/include/asm/sn/pcibus_provider_defs.h b/arch/ia64/include/asm/sn/pcibus_provider_defs.h
new file mode 100644
index 00000000..8f7c83d0
--- /dev/null
+++ b/arch/ia64/include/asm/sn/pcibus_provider_defs.h
@@ -0,0 +1,68 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H
+#define _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H
+
+/*
+ * SN pci asic types.  Do not ever renumber these or reuse values.  The
+ * values must agree with what prom thinks they are.
+ */
+
+#define PCIIO_ASIC_TYPE_UNKNOWN	0
+#define PCIIO_ASIC_TYPE_PPB	1
+#define PCIIO_ASIC_TYPE_PIC	2
+#define PCIIO_ASIC_TYPE_TIOCP	3
+#define PCIIO_ASIC_TYPE_TIOCA	4
+#define PCIIO_ASIC_TYPE_TIOCE	5
+
+#define PCIIO_ASIC_MAX_TYPES	6
+
+/*
+ * Common pciio bus provider data.  There should be one of these as the
+ * first field in any pciio based provider soft structure (e.g. pcibr_soft
+ * tioca_soft, etc).
+ */
+
+struct pcibus_bussoft {
+	u32		bs_asic_type;	/* chipset type */
+	u32		bs_xid;		/* xwidget id */
+	u32		bs_persist_busnum; /* Persistent Bus Number */
+	u32		bs_persist_segment; /* Segment Number */
+	u64		bs_legacy_io;	/* legacy io pio addr */
+	u64		bs_legacy_mem;	/* legacy mem pio addr */
+	u64		bs_base;	/* widget base */
+	struct xwidget_info	*bs_xwidget_info;
+};
+
+struct pci_controller;
+/*
+ * SN pci bus indirection
+ */
+
+struct sn_pcibus_provider {
+	dma_addr_t	(*dma_map)(struct pci_dev *, unsigned long, size_t, int flags);
+	dma_addr_t	(*dma_map_consistent)(struct pci_dev *, unsigned long, size_t, int flags);
+	void		(*dma_unmap)(struct pci_dev *, dma_addr_t, int);
+	void *		(*bus_fixup)(struct pcibus_bussoft *, struct pci_controller *);
+ 	void		(*force_interrupt)(struct sn_irq_info *);
+ 	void		(*target_interrupt)(struct sn_irq_info *);
+};
+
+/*
+ * Flags used by the map interfaces
+ * bits 3:0 specifies format of passed in address
+ * bit  4   specifies that address is to be used for MSI
+ */
+
+#define SN_DMA_ADDRTYPE(x)	((x) & 0xf)
+#define     SN_DMA_ADDR_PHYS	1	/* address is an xio address. */
+#define     SN_DMA_ADDR_XIO	2	/* address is phys memory */
+#define SN_DMA_MSI		0x10	/* Bus address is to be used for MSI */
+
+extern struct sn_pcibus_provider *sn_pci_provider[];
+#endif				/* _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H */
diff --git a/arch/ia64/include/asm/sn/pcidev.h b/arch/ia64/include/asm/sn/pcidev.h
new file mode 100644
index 00000000..1c2382ce
--- /dev/null
+++ b/arch/ia64/include/asm/sn/pcidev.h
@@ -0,0 +1,85 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2006 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_PCI_PCIDEV_H
+#define _ASM_IA64_SN_PCI_PCIDEV_H
+
+#include <linux/pci.h>
+
+/*
+ * In ia64, pci_dev->sysdata must be a *pci_controller. To provide access to
+ * the pcidev_info structs for all devices under a controller, we keep a
+ * list of pcidev_info under pci_controller->platform_data.
+ */
+struct sn_platform_data {
+	void *provider_soft;
+	struct list_head pcidev_info;
+};
+
+#define SN_PLATFORM_DATA(busdev) \
+	((struct sn_platform_data *)(PCI_CONTROLLER(busdev)->platform_data))
+
+#define SN_PCIDEV_INFO(dev)	sn_pcidev_info_get(dev)
+
+/*
+ * Given a pci_bus, return the sn pcibus_bussoft struct.  Note that
+ * this only works for root busses, not for busses represented by PPB's.
+ */
+
+#define SN_PCIBUS_BUSSOFT(pci_bus) \
+	((struct pcibus_bussoft *)(SN_PLATFORM_DATA(pci_bus)->provider_soft))
+
+#define SN_PCIBUS_BUSSOFT_INFO(pci_bus) \
+	((struct pcibus_info *)(SN_PLATFORM_DATA(pci_bus)->provider_soft))
+/*
+ * Given a struct pci_dev, return the sn pcibus_bussoft struct.  Note
+ * that this is not equivalent to SN_PCIBUS_BUSSOFT(pci_dev->bus) due
+ * due to possible PPB's in the path.
+ */
+
+#define SN_PCIDEV_BUSSOFT(pci_dev) \
+	(SN_PCIDEV_INFO(pci_dev)->pdi_host_pcidev_info->pdi_pcibus_info)
+
+#define SN_PCIDEV_BUSPROVIDER(pci_dev) \
+	(SN_PCIDEV_INFO(pci_dev)->pdi_provider)
+
+#define PCIIO_BUS_NONE	255      /* bus 255 reserved */
+#define PCIIO_SLOT_NONE 255
+#define PCIIO_FUNC_NONE 255
+#define PCIIO_VENDOR_ID_NONE	(-1)
+
+struct pcidev_info {
+	u64		pdi_pio_mapped_addr[7]; /* 6 BARs PLUS 1 ROM */
+	u64		pdi_slot_host_handle;	/* Bus and devfn Host pci_dev */
+
+	struct pcibus_bussoft	*pdi_pcibus_info;	/* Kernel common bus soft */
+	struct pcidev_info	*pdi_host_pcidev_info;	/* Kernel Host pci_dev */
+	struct pci_dev		*pdi_linux_pcidev;	/* Kernel pci_dev */
+
+	struct sn_irq_info	*pdi_sn_irq_info;
+	struct sn_pcibus_provider *pdi_provider;	/* sn pci ops */
+	struct pci_dev 		*host_pci_dev;		/* host bus link */
+	struct list_head	pdi_list;		/* List of pcidev_info */
+};
+
+extern void sn_irq_fixup(struct pci_dev *pci_dev,
+			 struct sn_irq_info *sn_irq_info);
+extern void sn_irq_unfixup(struct pci_dev *pci_dev);
+extern struct pcidev_info * sn_pcidev_info_get(struct pci_dev *);
+extern void sn_bus_fixup(struct pci_bus *);
+extern void sn_acpi_bus_fixup(struct pci_bus *);
+extern void sn_common_bus_fixup(struct pci_bus *, struct pcibus_bussoft *);
+extern void sn_bus_store_sysdata(struct pci_dev *dev);
+extern void sn_bus_free_sysdata(void);
+extern void sn_generate_path(struct pci_bus *pci_bus, char *address);
+extern void sn_io_slot_fixup(struct pci_dev *);
+extern void sn_acpi_slot_fixup(struct pci_dev *);
+extern void sn_pci_fixup_slot(struct pci_dev *dev, struct pcidev_info *,
+			      struct sn_irq_info *);
+extern void sn_pci_unfixup_slot(struct pci_dev *dev);
+extern void sn_irq_lh_init(void);
+#endif				/* _ASM_IA64_SN_PCI_PCIDEV_H */
diff --git a/arch/ia64/include/asm/sn/pda.h b/arch/ia64/include/asm/sn/pda.h
new file mode 100644
index 00000000..1c5108d4
--- /dev/null
+++ b/arch/ia64/include/asm/sn/pda.h
@@ -0,0 +1,69 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_PDA_H
+#define _ASM_IA64_SN_PDA_H
+
+#include <linux/cache.h>
+#include <asm/percpu.h>
+#include <asm/system.h>
+
+
+/*
+ * CPU-specific data structure.
+ *
+ * One of these structures is allocated for each cpu of a NUMA system.
+ *
+ * This structure provides a convenient way of keeping together 
+ * all SN per-cpu data structures. 
+ */
+
+typedef struct pda_s {
+
+	/*
+	 * Support for SN LEDs
+	 */
+	volatile short	*led_address;
+	u8		led_state;
+	u8		hb_state;	/* supports blinking heartbeat leds */
+	unsigned int	hb_count;
+
+	unsigned int	idle_flag;
+	
+	volatile unsigned long *bedrock_rev_id;
+	volatile unsigned long *pio_write_status_addr;
+	unsigned long pio_write_status_val;
+	volatile unsigned long *pio_shub_war_cam_addr;
+
+	unsigned long	sn_in_service_ivecs[4];
+	int		sn_lb_int_war_ticks;
+	int		sn_last_irq;
+	int		sn_first_irq;
+} pda_t;
+
+
+#define CACHE_ALIGN(x)	(((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
+
+/*
+ * PDA
+ * Per-cpu private data area for each cpu. The PDA is located immediately after
+ * the IA64 cpu_data area. A full page is allocated for the cp_data area for each
+ * cpu but only a small amout of the page is actually used. We put the SNIA PDA
+ * in the same page as the cpu_data area. Note that there is a check in the setup
+ * code to verify that we don't overflow the page.
+ *
+ * Seems like we should should cache-line align the pda so that any changes in the
+ * size of the cpu_data area don't change cache layout. Should we align to 32, 64, 128
+ * or 512 boundary. Each has merits. For now, pick 128 but should be revisited later.
+ */
+DECLARE_PER_CPU(struct pda_s, pda_percpu);
+
+#define pda		(&__ia64_per_cpu_var(pda_percpu))
+
+#define pdacpu(cpu)	(&per_cpu(pda_percpu, cpu))
+
+#endif /* _ASM_IA64_SN_PDA_H */
diff --git a/arch/ia64/include/asm/sn/pic.h b/arch/ia64/include/asm/sn/pic.h
new file mode 100644
index 00000000..5f9da5fd
--- /dev/null
+++ b/arch/ia64/include/asm/sn/pic.h
@@ -0,0 +1,261 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2003 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_PCI_PIC_H
+#define _ASM_IA64_SN_PCI_PIC_H
+
+/*
+ * PIC AS DEVICE ZERO
+ * ------------------
+ *
+ * PIC handles PCI/X busses.  PCI/X requires that the 'bridge' (i.e. PIC)
+ * be designated as 'device 0'.   That is a departure from earlier SGI
+ * PCI bridges.  Because of that we use config space 1 to access the
+ * config space of the first actual PCI device on the bus.
+ * Here's what the PIC manual says:
+ *
+ *     The current PCI-X bus specification now defines that the parent
+ *     hosts bus bridge (PIC for example) must be device 0 on bus 0. PIC
+ *     reduced the total number of devices from 8 to 4 and removed the
+ *     device registers and windows, now only supporting devices 0,1,2, and
+ *     3. PIC did leave all 8 configuration space windows. The reason was
+ *     there was nothing to gain by removing them. Here in lies the problem.
+ *     The device numbering we do using 0 through 3 is unrelated to the device
+ *     numbering which PCI-X requires in configuration space. In the past we
+ *     correlated Configs pace and our device space 0 <-> 0, 1 <-> 1, etc.
+ *     PCI-X requires we start a 1, not 0 and currently the PX brick
+ *     does associate our:
+ *
+ *         device 0 with configuration space window 1,
+ *         device 1 with configuration space window 2,
+ *         device 2 with configuration space window 3,
+ *         device 3 with configuration space window 4.
+ *
+ * The net effect is that all config space access are off-by-one with
+ * relation to other per-slot accesses on the PIC.
+ * Here is a table that shows some of that:
+ *
+ *                               Internal Slot#
+ *           |
+ *           |     0         1        2         3
+ * ----------|---------------------------------------
+ * config    |  0x21000   0x22000  0x23000   0x24000
+ *           |
+ * even rrb  |  0[0]      n/a      1[0]      n/a	[] == implied even/odd
+ *           |
+ * odd rrb   |  n/a       0[1]     n/a       1[1]
+ *           |
+ * int dev   |  00       01        10        11
+ *           |
+ * ext slot# |  1        2         3         4
+ * ----------|---------------------------------------
+ */
+
+#define PIC_ATE_TARGETID_SHFT           8
+#define PIC_HOST_INTR_ADDR              0x0000FFFFFFFFFFFFUL
+#define PIC_PCI64_ATTR_TARG_SHFT        60
+
+
+/*****************************************************************************
+ *********************** PIC MMR structure mapping ***************************
+ *****************************************************************************/
+
+/* NOTE: PIC WAR. PV#854697.  PIC does not allow writes just to [31:0]
+ * of a 64-bit register.  When writing PIC registers, always write the
+ * entire 64 bits.
+ */
+
+struct pic {
+
+    /* 0x000000-0x00FFFF -- Local Registers */
+
+    /* 0x000000-0x000057 -- Standard Widget Configuration */
+    u64		p_wid_id;			/* 0x000000 */
+    u64		p_wid_stat;			/* 0x000008 */
+    u64		p_wid_err_upper;		/* 0x000010 */
+    u64		p_wid_err_lower;		/* 0x000018 */
+    #define p_wid_err p_wid_err_lower
+    u64		p_wid_control;			/* 0x000020 */
+    u64		p_wid_req_timeout;		/* 0x000028 */
+    u64		p_wid_int_upper;		/* 0x000030 */
+    u64		p_wid_int_lower;		/* 0x000038 */
+    #define p_wid_int p_wid_int_lower
+    u64		p_wid_err_cmdword;		/* 0x000040 */
+    u64		p_wid_llp;			/* 0x000048 */
+    u64		p_wid_tflush;			/* 0x000050 */
+
+    /* 0x000058-0x00007F -- Bridge-specific Widget Configuration */
+    u64		p_wid_aux_err;			/* 0x000058 */
+    u64		p_wid_resp_upper;		/* 0x000060 */
+    u64		p_wid_resp_lower;		/* 0x000068 */
+    #define p_wid_resp p_wid_resp_lower
+    u64		p_wid_tst_pin_ctrl;		/* 0x000070 */
+    u64		p_wid_addr_lkerr;		/* 0x000078 */
+
+    /* 0x000080-0x00008F -- PMU & MAP */
+    u64		p_dir_map;			/* 0x000080 */
+    u64		_pad_000088;			/* 0x000088 */
+
+    /* 0x000090-0x00009F -- SSRAM */
+    u64		p_map_fault;			/* 0x000090 */
+    u64		_pad_000098;			/* 0x000098 */
+
+    /* 0x0000A0-0x0000AF -- Arbitration */
+    u64		p_arb;				/* 0x0000A0 */
+    u64		_pad_0000A8;			/* 0x0000A8 */
+
+    /* 0x0000B0-0x0000BF -- Number In A Can or ATE Parity Error */
+    u64		p_ate_parity_err;		/* 0x0000B0 */
+    u64		_pad_0000B8;			/* 0x0000B8 */
+
+    /* 0x0000C0-0x0000FF -- PCI/GIO */
+    u64		p_bus_timeout;			/* 0x0000C0 */
+    u64		p_pci_cfg;			/* 0x0000C8 */
+    u64		p_pci_err_upper;		/* 0x0000D0 */
+    u64		p_pci_err_lower;		/* 0x0000D8 */
+    #define p_pci_err p_pci_err_lower
+    u64		_pad_0000E0[4];			/* 0x0000{E0..F8} */
+
+    /* 0x000100-0x0001FF -- Interrupt */
+    u64		p_int_status;			/* 0x000100 */
+    u64		p_int_enable;			/* 0x000108 */
+    u64		p_int_rst_stat;			/* 0x000110 */
+    u64		p_int_mode;			/* 0x000118 */
+    u64		p_int_device;			/* 0x000120 */
+    u64		p_int_host_err;			/* 0x000128 */
+    u64		p_int_addr[8];			/* 0x0001{30,,,68} */
+    u64		p_err_int_view;			/* 0x000170 */
+    u64		p_mult_int;			/* 0x000178 */
+    u64		p_force_always[8];		/* 0x0001{80,,,B8} */
+    u64		p_force_pin[8];			/* 0x0001{C0,,,F8} */
+
+    /* 0x000200-0x000298 -- Device */
+    u64		p_device[4];			/* 0x0002{00,,,18} */
+    u64		_pad_000220[4];			/* 0x0002{20,,,38} */
+    u64		p_wr_req_buf[4];		/* 0x0002{40,,,58} */
+    u64		_pad_000260[4];			/* 0x0002{60,,,78} */
+    u64		p_rrb_map[2];			/* 0x0002{80,,,88} */
+    #define p_even_resp p_rrb_map[0]			/* 0x000280 */
+    #define p_odd_resp  p_rrb_map[1]			/* 0x000288 */
+    u64		p_resp_status;			/* 0x000290 */
+    u64		p_resp_clear;			/* 0x000298 */
+
+    u64		_pad_0002A0[12];		/* 0x0002{A0..F8} */
+
+    /* 0x000300-0x0003F8 -- Buffer Address Match Registers */
+    struct {
+	u64	upper;				/* 0x0003{00,,,F0} */
+	u64	lower;				/* 0x0003{08,,,F8} */
+    } p_buf_addr_match[16];
+
+    /* 0x000400-0x0005FF -- Performance Monitor Registers (even only) */
+    struct {
+	u64	flush_w_touch;			/* 0x000{400,,,5C0} */
+	u64	flush_wo_touch;			/* 0x000{408,,,5C8} */
+	u64	inflight;			/* 0x000{410,,,5D0} */
+	u64	prefetch;			/* 0x000{418,,,5D8} */
+	u64	total_pci_retry;		/* 0x000{420,,,5E0} */
+	u64	max_pci_retry;			/* 0x000{428,,,5E8} */
+	u64	max_latency;			/* 0x000{430,,,5F0} */
+	u64	clear_all;			/* 0x000{438,,,5F8} */
+    } p_buf_count[8];
+
+
+    /* 0x000600-0x0009FF -- PCI/X registers */
+    u64		p_pcix_bus_err_addr;		/* 0x000600 */
+    u64		p_pcix_bus_err_attr;		/* 0x000608 */
+    u64		p_pcix_bus_err_data;		/* 0x000610 */
+    u64		p_pcix_pio_split_addr;		/* 0x000618 */
+    u64		p_pcix_pio_split_attr;		/* 0x000620 */
+    u64		p_pcix_dma_req_err_attr;	/* 0x000628 */
+    u64		p_pcix_dma_req_err_addr;	/* 0x000630 */
+    u64		p_pcix_timeout;			/* 0x000638 */
+
+    u64		_pad_000640[120];		/* 0x000{640,,,9F8} */
+
+    /* 0x000A00-0x000BFF -- PCI/X Read&Write Buffer */
+    struct {
+	u64	p_buf_addr;			/* 0x000{A00,,,AF0} */
+	u64	p_buf_attr;			/* 0X000{A08,,,AF8} */
+    } p_pcix_read_buf_64[16];
+
+    struct {
+	u64	p_buf_addr;			/* 0x000{B00,,,BE0} */
+	u64	p_buf_attr;			/* 0x000{B08,,,BE8} */
+	u64	p_buf_valid;			/* 0x000{B10,,,BF0} */
+	u64	__pad1;				/* 0x000{B18,,,BF8} */
+    } p_pcix_write_buf_64[8];
+
+    /* End of Local Registers -- Start of Address Map space */
+
+    char		_pad_000c00[0x010000 - 0x000c00];
+
+    /* 0x010000-0x011fff -- Internal ATE RAM (Auto Parity Generation) */
+    u64		p_int_ate_ram[1024];		/* 0x010000-0x011fff */
+
+    /* 0x012000-0x013fff -- Internal ATE RAM (Manual Parity Generation) */
+    u64		p_int_ate_ram_mp[1024];		/* 0x012000-0x013fff */
+
+    char		_pad_014000[0x18000 - 0x014000];
+
+    /* 0x18000-0x197F8 -- PIC Write Request Ram */
+    u64		p_wr_req_lower[256];		/* 0x18000 - 0x187F8 */
+    u64		p_wr_req_upper[256];		/* 0x18800 - 0x18FF8 */
+    u64		p_wr_req_parity[256];		/* 0x19000 - 0x197F8 */
+
+    char		_pad_019800[0x20000 - 0x019800];
+
+    /* 0x020000-0x027FFF -- PCI Device Configuration Spaces */
+    union {
+	u8		c[0x1000 / 1];			/* 0x02{0000,,,7FFF} */
+	u16	s[0x1000 / 2];			/* 0x02{0000,,,7FFF} */
+	u32	l[0x1000 / 4];			/* 0x02{0000,,,7FFF} */
+	u64	d[0x1000 / 8];			/* 0x02{0000,,,7FFF} */
+	union {
+	    u8	c[0x100 / 1];
+	    u16	s[0x100 / 2];
+	    u32	l[0x100 / 4];
+	    u64	d[0x100 / 8];
+	} f[8];
+    } p_type0_cfg_dev[8];				/* 0x02{0000,,,7FFF} */
+
+    /* 0x028000-0x028FFF -- PCI Type 1 Configuration Space */
+    union {
+	u8		c[0x1000 / 1];			/* 0x028000-0x029000 */
+	u16	s[0x1000 / 2];			/* 0x028000-0x029000 */
+	u32	l[0x1000 / 4];			/* 0x028000-0x029000 */
+	u64	d[0x1000 / 8];			/* 0x028000-0x029000 */
+	union {
+	    u8	c[0x100 / 1];
+	    u16	s[0x100 / 2];
+	    u32	l[0x100 / 4];
+	    u64	d[0x100 / 8];
+	} f[8];
+    } p_type1_cfg;					/* 0x028000-0x029000 */
+
+    char		_pad_029000[0x030000-0x029000];
+
+    /* 0x030000-0x030007 -- PCI Interrupt Acknowledge Cycle */
+    union {
+	u8		c[8 / 1];
+	u16	s[8 / 2];
+	u32	l[8 / 4];
+	u64	d[8 / 8];
+    } p_pci_iack;					/* 0x030000-0x030007 */
+
+    char		_pad_030007[0x040000-0x030008];
+
+    /* 0x040000-0x030007 -- PCIX Special Cycle */
+    union {
+	u8		c[8 / 1];
+	u16	s[8 / 2];
+	u32	l[8 / 4];
+	u64	d[8 / 8];
+    } p_pcix_cycle;					/* 0x040000-0x040007 */
+};
+
+#endif                          /* _ASM_IA64_SN_PCI_PIC_H */
diff --git a/arch/ia64/include/asm/sn/rw_mmr.h b/arch/ia64/include/asm/sn/rw_mmr.h
new file mode 100644
index 00000000..2d78f4c5
--- /dev/null
+++ b/arch/ia64/include/asm/sn/rw_mmr.h
@@ -0,0 +1,28 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2002-2006 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+#ifndef _ASM_IA64_SN_RW_MMR_H
+#define _ASM_IA64_SN_RW_MMR_H
+
+
+/*
+ * This file that access MMRs via uncached physical addresses.
+ * 	pio_phys_read_mmr  - read an MMR
+ * 	pio_phys_write_mmr - write an MMR
+ * 	pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0
+ *		Second MMR will be skipped if address is NULL
+ *
+ * Addresses passed to these routines should be uncached physical addresses
+ * ie., 0x80000....
+ */
+
+
+extern long pio_phys_read_mmr(volatile long *mmr); 
+extern void pio_phys_write_mmr(volatile long *mmr, long val);
+extern void pio_atomic_phys_write_mmrs(volatile long *mmr1, long val1, volatile long *mmr2, long val2); 
+
+#endif /* _ASM_IA64_SN_RW_MMR_H */
diff --git a/arch/ia64/include/asm/sn/shub_mmr.h b/arch/ia64/include/asm/sn/shub_mmr.h
new file mode 100644
index 00000000..a84d870f
--- /dev/null
+++ b/arch/ia64/include/asm/sn/shub_mmr.h
@@ -0,0 +1,502 @@
+/*
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2001-2005 Silicon Graphics, Inc.  All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_SHUB_MMR_H
+#define _ASM_IA64_SN_SHUB_MMR_H
+
+/* ==================================================================== */
+/*                        Register "SH_IPI_INT"                         */
+/*               SHub Inter-Processor Interrupt Registers               */
+/* ==================================================================== */
+#define SH1_IPI_INT			__IA64_UL_CONST(0x0000000110000380)
+#define SH2_IPI_INT			__IA64_UL_CONST(0x0000000010000380)
+
+/*   SH_IPI_INT_TYPE                                                    */
+/*   Description:  Type of Interrupt: 0=INT, 2=PMI, 4=NMI, 5=INIT       */
+#define SH_IPI_INT_TYPE_SHFT				0
+#define SH_IPI_INT_TYPE_MASK		__IA64_UL_CONST(0x0000000000000007)
+
+/*   SH_IPI_INT_AGT                                                     */
+/*   Description:  Agent, must be 0 for SHub                            */
+#define SH_IPI_INT_AGT_SHFT				3
+#define SH_IPI_INT_AGT_MASK		__IA64_UL_CONST(0x0000000000000008)
+
+/*   SH_IPI_INT_PID                                                     */
+/*   Description:  Processor ID, same setting as on targeted McKinley  */
+#define SH_IPI_INT_PID_SHFT                      	4
+#define SH_IPI_INT_PID_MASK		__IA64_UL_CONST(0x00000000000ffff0)
+
+/*   SH_IPI_INT_BASE                                                    */
+/*   Description:  Optional interrupt vector area, 2MB aligned          */
+#define SH_IPI_INT_BASE_SHFT				21
+#define SH_IPI_INT_BASE_MASK 		__IA64_UL_CONST(0x0003ffffffe00000)
+
+/*   SH_IPI_INT_IDX                                                     */
+/*   Description:  Targeted McKinley interrupt vector                   */
+#define SH_IPI_INT_IDX_SHFT				52
+#define SH_IPI_INT_IDX_MASK		__IA64_UL_CONST(0x0ff0000000000000)
+
+/*   SH_IPI_INT_SEND                                                    */
+/*   Description:  Send Interrupt Message to PI, This generates a puls  */
+#define SH_IPI_INT_SEND_SHFT				63
+#define SH_IPI_INT_SEND_MASK		__IA64_UL_CONST(0x8000000000000000)
+
+/* ==================================================================== */
+/*                     Register "SH_EVENT_OCCURRED"                     */
+/*                    SHub Interrupt Event Occurred                     */
+/* ==================================================================== */
+#define SH1_EVENT_OCCURRED		__IA64_UL_CONST(0x0000000110010000)
+#define SH1_EVENT_OCCURRED_ALIAS	__IA64_UL_CONST(0x0000000110010008)
+#define SH2_EVENT_OCCURRED		__IA64_UL_CONST(0x0000000010010000)
+#define SH2_EVENT_OCCURRED_ALIAS 	__IA64_UL_CONST(0x0000000010010008)
+
+/* ==================================================================== */
+/*                     Register "SH_PI_CAM_CONTROL"                     */
+/*                      CRB CAM MMR Access Control                      */
+/* ==================================================================== */
+#define SH1_PI_CAM_CONTROL		__IA64_UL_CONST(0x0000000120050300)
+
+/* ==================================================================== */
+/*                        Register "SH_SHUB_ID"                         */
+/*                            SHub ID Number                            */
+/* ==================================================================== */
+#define SH1_SHUB_ID			__IA64_UL_CONST(0x0000000110060580)
+#define SH1_SHUB_ID_REVISION_SHFT			28
+#define SH1_SHUB_ID_REVISION_MASK	__IA64_UL_CONST(0x00000000f0000000)
+
+/* ==================================================================== */
+/*                          Register "SH_RTC"                           */
+/*                           Real-time Clock                            */
+/* ==================================================================== */
+#define SH1_RTC				__IA64_UL_CONST(0x00000001101c0000)
+#define SH2_RTC				__IA64_UL_CONST(0x00000002101c0000)
+#define SH_RTC_MASK			__IA64_UL_CONST(0x007fffffffffffff)
+
+/* ==================================================================== */
+/*                   Register "SH_PIO_WRITE_STATUS_0|1"                 */
+/*                      PIO Write Status for CPU 0 & 1                  */
+/* ==================================================================== */
+#define SH1_PIO_WRITE_STATUS_0		__IA64_UL_CONST(0x0000000120070200)
+#define SH1_PIO_WRITE_STATUS_1		__IA64_UL_CONST(0x0000000120070280)
+#define SH2_PIO_WRITE_STATUS_0		__IA64_UL_CONST(0x0000000020070200)
+#define SH2_PIO_WRITE_STATUS_1		__IA64_UL_CONST(0x0000000020070280)
+#define SH2_PIO_WRITE_STATUS_2		__IA64_UL_CONST(0x0000000020070300)
+#define SH2_PIO_WRITE_STATUS_3		__IA64_UL_CONST(0x0000000020070380)
+
+/*   SH_PIO_WRITE_STATUS_0_WRITE_DEADLOCK                               */
+/*   Description:  Deadlock response detected                           */
+#define SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_SHFT		1
+#define SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK \
+					__IA64_UL_CONST(0x0000000000000002)
+
+/*   SH_PIO_WRITE_STATUS_0_PENDING_WRITE_COUNT                          */
+/*   Description:  Count of currently pending PIO writes                */
+#define SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_SHFT	56
+#define SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK \
+					__IA64_UL_CONST(0x3f00000000000000)
+
+/* ==================================================================== */
+/*                Register "SH_PIO_WRITE_STATUS_0_ALIAS"                */
+/* ==================================================================== */
+#define SH1_PIO_WRITE_STATUS_0_ALIAS	__IA64_UL_CONST(0x0000000120070208)
+#define SH2_PIO_WRITE_STATUS_0_ALIAS	__IA64_UL_CONST(0x0000000020070208)
+
+/* ==================================================================== */
+/*                     Register "SH_EVENT_OCCURRED"                     */
+/*                    SHub Interrupt Event Occurred                     */
+/* ==================================================================== */
+/*   SH_EVENT_OCCURRED_UART_INT                                         */
+/*   Description:  Pending Junk Bus UART Interrupt                      */
+#define SH_EVENT_OCCURRED_UART_INT_SHFT			20
+#define SH_EVENT_OCCURRED_UART_INT_MASK	__IA64_UL_CONST(0x0000000000100000)
+
+/*   SH_EVENT_OCCURRED_IPI_INT                                          */
+/*   Description:  Pending IPI Interrupt                                */
+#define SH_EVENT_OCCURRED_IPI_INT_SHFT			28
+#define SH_EVENT_OCCURRED_IPI_INT_MASK	__IA64_UL_CONST(0x0000000010000000)
+
+/*   SH_EVENT_OCCURRED_II_INT0                                          */
+/*   Description:  Pending II 0 Interrupt                               */
+#define SH_EVENT_OCCURRED_II_INT0_SHFT			29
+#define SH_EVENT_OCCURRED_II_INT0_MASK	__IA64_UL_CONST(0x0000000020000000)
+
+/*   SH_EVENT_OCCURRED_II_INT1                                          */
+/*   Description:  Pending II 1 Interrupt                               */
+#define SH_EVENT_OCCURRED_II_INT1_SHFT			30
+#define SH_EVENT_OCCURRED_II_INT1_MASK	__IA64_UL_CONST(0x0000000040000000)
+
+/*   SH2_EVENT_OCCURRED_EXTIO_INT2                                      */
+/*   Description:  Pending SHUB 2 EXT IO INT2                           */
+#define SH2_EVENT_OCCURRED_EXTIO_INT2_SHFT		33
+#define SH2_EVENT_OCCURRED_EXTIO_INT2_MASK __IA64_UL_CONST(0x0000000200000000)
+
+/*   SH2_EVENT_OCCURRED_EXTIO_INT3                                      */
+/*   Description:  Pending SHUB 2 EXT IO INT3                           */
+#define SH2_EVENT_OCCURRED_EXTIO_INT3_SHFT		34
+#define SH2_EVENT_OCCURRED_EXTIO_INT3_MASK __IA64_UL_CONST(0x0000000400000000)
+
+#define SH_ALL_INT_MASK \
+	(SH_EVENT_OCCURRED_UART_INT_MASK | SH_EVENT_OCCURRED_IPI_INT_MASK | \
+	 SH_EVENT_OCCURRED_II_INT0_MASK | SH_EVENT_OCCURRED_II_INT1_MASK | \
+	 SH_EVENT_OCCURRED_II_INT1_MASK | SH2_EVENT_OCCURRED_EXTIO_INT2_MASK | \
+	 SH2_EVENT_OCCURRED_EXTIO_INT3_MASK)
+
+
+/* ==================================================================== */
+/*                         LEDS                                         */
+/* ==================================================================== */
+#define SH1_REAL_JUNK_BUS_LED0			0x7fed00000UL
+#define SH1_REAL_JUNK_BUS_LED1			0x7fed10000UL
+#define SH1_REAL_JUNK_BUS_LED2			0x7fed20000UL
+#define SH1_REAL_JUNK_BUS_LED3			0x7fed30000UL
+
+#define SH2_REAL_JUNK_BUS_LED0			0xf0000000UL
+#define SH2_REAL_JUNK_BUS_LED1			0xf0010000UL
+#define SH2_REAL_JUNK_BUS_LED2			0xf0020000UL
+#define SH2_REAL_JUNK_BUS_LED3			0xf0030000UL
+
+/* ==================================================================== */
+/*                         Register "SH1_PTC_0"                         */
+/*       Puge Translation Cache Message Configuration Information       */
+/* ==================================================================== */
+#define SH1_PTC_0			__IA64_UL_CONST(0x00000001101a0000)
+
+/*   SH1_PTC_0_A                                                        */
+/*   Description:  Type                                                 */
+#define SH1_PTC_0_A_SHFT				0
+
+/*   SH1_PTC_0_PS                                                       */
+/*   Description:  Page Size                                            */
+#define SH1_PTC_0_PS_SHFT				2
+
+/*   SH1_PTC_0_RID                                                      */
+/*   Description:  Region ID                                            */
+#define SH1_PTC_0_RID_SHFT				8
+
+/*   SH1_PTC_0_START                                                    */
+/*   Description:  Start                                                */
+#define SH1_PTC_0_START_SHFT				63
+
+/* ==================================================================== */
+/*                         Register "SH1_PTC_1"                         */
+/*       Puge Translation Cache Message Configuration Information       */
+/* ==================================================================== */
+#define SH1_PTC_1			__IA64_UL_CONST(0x00000001101a0080)
+
+/*   SH1_PTC_1_START                                                    */
+/*   Description:  PTC_1 Start                                          */
+#define SH1_PTC_1_START_SHFT				63
+
+/* ==================================================================== */
+/*                         Register "SH2_PTC"                           */
+/*       Puge Translation Cache Message Configuration Information       */
+/* ==================================================================== */
+#define SH2_PTC				__IA64_UL_CONST(0x0000000170000000)
+
+/*   SH2_PTC_A                                                          */
+/*   Description:  Type                                                 */
+#define SH2_PTC_A_SHFT					0
+
+/*   SH2_PTC_PS                                                         */
+/*   Description:  Page Size                                            */
+#define SH2_PTC_PS_SHFT					2
+
+/*   SH2_PTC_RID                                                      */
+/*   Description:  Region ID                                            */
+#define SH2_PTC_RID_SHFT				4
+
+/*   SH2_PTC_START                                                      */
+/*   Description:  Start                                                */
+#define SH2_PTC_START_SHFT				63
+
+/*   SH2_PTC_ADDR_RID                                                   */
+/*   Description:  Region ID                                            */
+#define SH2_PTC_ADDR_SHFT				4
+#define SH2_PTC_ADDR_MASK		__IA64_UL_CONST(0x1ffffffffffff000)
+
+/* ==================================================================== */
+/*                    Register "SH_RTC1_INT_CONFIG"                     */
+/*                SHub RTC 1 Interrupt Config Registers                 */
+/* ==================================================================== */
+
+#define SH1_RTC1_INT_CONFIG		__IA64_UL_CONST(0x0000000110001480)
+#define SH2_RTC1_INT_CONFIG		__IA64_UL_CONST(0x0000000010001480)
+#define SH_RTC1_INT_CONFIG_MASK		__IA64_UL_CONST(0x0ff3ffffffefffff)
+#define SH_RTC1_INT_CONFIG_INIT		__IA64_UL_CONST(0x0000000000000000)
+
+/*   SH_RTC1_INT_CONFIG_TYPE                                            */
+/*   Description:  Type of Interrupt: 0=INT, 2=PMI, 4=NMI, 5=INIT       */
+#define SH_RTC1_INT_CONFIG_TYPE_SHFT			0
+#define SH_RTC1_INT_CONFIG_TYPE_MASK	__IA64_UL_CONST(0x0000000000000007)
+
+/*   SH_RTC1_INT_CONFIG_AGT                                             */
+/*   Description:  Agent, must be 0 for SHub                            */
+#define SH_RTC1_INT_CONFIG_AGT_SHFT			3
+#define SH_RTC1_INT_CONFIG_AGT_MASK	__IA64_UL_CONST(0x0000000000000008)
+
+/*   SH_RTC1_INT_CONFIG_PID                                             */
+/*   Description:  Processor ID, same setting as on targeted McKinley  */
+#define SH_RTC1_INT_CONFIG_PID_SHFT			4
+#define SH_RTC1_INT_CONFIG_PID_MASK	__IA64_UL_CONST(0x00000000000ffff0)
+
+/*   SH_RTC1_INT_CONFIG_BASE                                            */
+/*   Description:  Optional interrupt vector area, 2MB aligned          */
+#define SH_RTC1_INT_CONFIG_BASE_SHFT			21
+#define SH_RTC1_INT_CONFIG_BASE_MASK	__IA64_UL_CONST(0x0003ffffffe00000)
+
+/*   SH_RTC1_INT_CONFIG_IDX                                             */
+/*   Description:  Targeted McKinley interrupt vector                   */
+#define SH_RTC1_INT_CONFIG_IDX_SHFT			52
+#define SH_RTC1_INT_CONFIG_IDX_MASK	__IA64_UL_CONST(0x0ff0000000000000)
+
+/* ==================================================================== */
+/*                    Register "SH_RTC1_INT_ENABLE"                     */
+/*                SHub RTC 1 Interrupt Enable Registers                 */
+/* ==================================================================== */
+
+#define SH1_RTC1_INT_ENABLE		__IA64_UL_CONST(0x0000000110001500)
+#define SH2_RTC1_INT_ENABLE		__IA64_UL_CONST(0x0000000010001500)
+#define SH_RTC1_INT_ENABLE_MASK		__IA64_UL_CONST(0x0000000000000001)
+#define SH_RTC1_INT_ENABLE_INIT		__IA64_UL_CONST(0x0000000000000000)
+
+/*   SH_RTC1_INT_ENABLE_RTC1_ENABLE                                     */
+/*   Description:  Enable RTC 1 Interrupt                               */
+#define SH_RTC1_INT_ENABLE_RTC1_ENABLE_SHFT		0
+#define SH_RTC1_INT_ENABLE_RTC1_ENABLE_MASK \
+					__IA64_UL_CONST(0x0000000000000001)
+
+/* ==================================================================== */
+/*                    Register "SH_RTC2_INT_CONFIG"                     */
+/*                SHub RTC 2 Interrupt Config Registers                 */
+/* ==================================================================== */
+
+#define SH1_RTC2_INT_CONFIG		__IA64_UL_CONST(0x0000000110001580)
+#define SH2_RTC2_INT_CONFIG		__IA64_UL_CONST(0x0000000010001580)
+#define SH_RTC2_INT_CONFIG_MASK		__IA64_UL_CONST(0x0ff3ffffffefffff)
+#define SH_RTC2_INT_CONFIG_INIT		__IA64_UL_CONST(0x0000000000000000)
+
+/*   SH_RTC2_INT_CONFIG_TYPE                                            */
+/*   Description:  Type of Interrupt: 0=INT, 2=PMI, 4=NMI, 5=INIT       */
+#define SH_RTC2_INT_CONFIG_TYPE_SHFT			0
+#define SH_RTC2_INT_CONFIG_TYPE_MASK	__IA64_UL_CONST(0x0000000000000007)
+
+/*   SH_RTC2_INT_CONFIG_AGT                                             */
+/*   Description:  Agent, must be 0 for SHub                            */
+#define SH_RTC2_INT_CONFIG_AGT_SHFT			3
+#define SH_RTC2_INT_CONFIG_AGT_MASK	__IA64_UL_CONST(0x0000000000000008)
+
+/*   SH_RTC2_INT_CONFIG_PID                                             */
+/*   Description:  Processor ID, same setting as on targeted McKinley  */
+#define SH_RTC2_INT_CONFIG_PID_SHFT			4
+#define SH_RTC2_INT_CONFIG_PID_MASK	__IA64_UL_CONST(0x00000000000ffff0)
+
+/*   SH_RTC2_INT_CONFIG_BASE                                            */
+/*   Description:  Optional interrupt vector area, 2MB aligned          */
+#define SH_RTC2_INT_CONFIG_BASE_SHFT			21
+#define SH_RTC2_INT_CONFIG_BASE_MASK	__IA64_UL_CONST(0x0003ffffffe00000)
+
+/*   SH_RTC2_INT_CONFIG_IDX                                             */
+/*   Description:  Targeted McKinley interrupt vector                   */
+#define SH_RTC2_INT_CONFIG_IDX_SHFT			52
+#define SH_RTC2_INT_CONFIG_IDX_MASK	__IA64_UL_CONST(0x0ff0000000000000)
+
+/* ==================================================================== */
+/*                    Register "SH_RTC2_INT_ENABLE"                     */
+/*                SHub RTC 2 Interrupt Enable Registers                 */
+/* ==================================================================== */
+
+#define SH1_RTC2_INT_ENABLE		__IA64_UL_CONST(0x0000000110001600)
+#define SH2_RTC2_INT_ENABLE		__IA64_UL_CONST(0x0000000010001600)
+#define SH_RTC2_INT_ENABLE_MASK		__IA64_UL_CONST(0x0000000000000001)
+#define SH_RTC2_INT_ENABLE_INIT		__IA64_UL_CONST(0x0000000000000000)
+
+/*   SH_RTC2_INT_ENABLE_RTC2_ENABLE                                     */
+/*   Description:  Enable RTC 2 Interrupt                               */
+#define SH_RTC2_INT_ENABLE_RTC2_ENABLE_SHFT		0
+#define SH_RTC2_INT_ENABLE_RTC2_ENABLE_MASK \
+					__IA64_UL_CONST(0x0000000000000001)
+
+/* ==================================================================== */
+/*                    Register "SH_RTC3_INT_CONFIG"                     */
+/*                SHub RTC 3 Interrupt Config Registers                 */
+/* ==================================================================== */
+
+#define SH1_RTC3_INT_CONFIG		__IA64_UL_CONST(0x0000000110001680)
+#define SH2_RTC3_INT_CONFIG		__IA64_UL_CONST(0x0000000010001680)
+#define SH_RTC3_INT_CONFIG_MASK		__IA64_UL_CONST(0x0ff3ffffffefffff)
+#define SH_RTC3_INT_CONFIG_INIT		__IA64_UL_CONST(0x0000000000000000)
+
+/*   SH_RTC3_INT_CONFIG_TYPE                                            */
+/*   Description:  Type of Interrupt: 0=INT, 2=PMI, 4=NMI, 5=INIT       */
+#define SH_RTC3_INT_CONFIG_TYPE_SHFT			0
+#define SH_RTC3_INT_CONFIG_TYPE_MASK	__IA64_UL_CONST(0x0000000000000007)
+
+/*   SH_RTC3_INT_CONFIG_AGT                                             */
+/*   Description:  Agent, must be 0 for SHub                            */
+#define SH_RTC3_INT_CONFIG_AGT_SHFT			3
+#define SH_RTC3_INT_CONFIG_AGT_MASK	__IA64_UL_CONST(0x0000000000000008)
+
+/*   SH_RTC3_INT_CONFIG_PID                                             */
+/*   Description:  Processor ID, same setting as on targeted McKinley  */
+#define SH_RTC3_INT_CONFIG_PID_SHFT			4
+#define SH_RTC3_INT_CONFIG_PID_MASK	__IA64_UL_CONST(0x00000000000ffff0)
+
+/*   SH_RTC3_INT_CONFIG_BASE                                            */
+/*   Description:  Optional interrupt vector area, 2MB aligned          */
+#define SH_RTC3_INT_CONFIG_BASE_SHFT			21
+#define SH_RTC3_INT_CONFIG_BASE_MASK	__IA64_UL_CONST(0x0003ffffffe00000)
+
+/*   SH_RTC3_INT_CONFIG_IDX                                             */
+/*   Description:  Targeted McKinley interrupt vector                   */
+#define SH_RTC3_INT_CONFIG_IDX_SHFT			52
+#define SH_RTC3_INT_CONFIG_IDX_MASK	__IA64_UL_CONST(0x0ff0000000000000)
+
+/* ==================================================================== */
+/*                    Register "SH_RTC3_INT_ENABLE"                     */
+/*                SHub RTC 3 Interrupt Enable Registers                 */
+/* ==================================================================== */
+
+#define SH1_RTC3_INT_ENABLE		__IA64_UL_CONST(0x0000000110001700)
+#define SH2_RTC3_INT_ENABLE		__IA64_UL_CONST(0x0000000010001700)
+#define SH_RTC3_INT_ENABLE_MASK		__IA64_UL_CONST(0x0000000000000001)
+#define SH_RTC3_INT_ENABLE_INIT		__IA64_UL_CONST(0x0000000000000000)
+
+/*   SH_RTC3_INT_ENABLE_RTC3_ENABLE                                     */
+/*   Description:  Enable RTC 3 Interrupt                               */
+#define SH_RTC3_INT_ENABLE_RTC3_ENABLE_SHFT		0
+#define SH_RTC3_INT_ENABLE_RTC3_ENABLE_MASK \
+					__IA64_UL_CONST(0x0000000000000001)
+
+/*   SH_EVENT_OCCURRED_RTC1_INT                                         */
+/*   Description:  Pending RTC 1 Interrupt                              */
+#define SH_EVENT_OCCURRED_RTC1_INT_SHFT			24
+#define SH_EVENT_OCCURRED_RTC1_INT_MASK	__IA64_UL_CONST(0x0000000001000000)
+
+/*   SH_EVENT_OCCURRED_RTC2_INT                                         */
+/*   Description:  Pending RTC 2 Interrupt                              */
+#define SH_EVENT_OCCURRED_RTC2_INT_SHFT			25
+#define SH_EVENT_OCCURRED_RTC2_INT_MASK	__IA64_UL_CONST(0x0000000002000000)
+
+/*   SH_EVENT_OCCURRED_RTC3_INT                                         */
+/*   Description:  Pending RTC 3 Interrupt                              */
+#define SH_EVENT_OCCURRED_RTC3_INT_SHFT			26
+#define SH_EVENT_OCCURRED_RTC3_INT_MASK	__IA64_UL_CONST(0x0000000004000000)
+
+/* ==================================================================== */
+/*                       Register "SH_IPI_ACCESS"                       */
+/*                 CPU interrupt Access Permission Bits                 */
+/* ==================================================================== */
+
+#define SH1_IPI_ACCESS			__IA64_UL_CONST(0x0000000110060480)
+#define SH2_IPI_ACCESS0			__IA64_UL_CONST(0x0000000010060c00)
+#define SH2_IPI_ACCESS1			__IA64_UL_CONST(0x0000000010060c80)
+#define SH2_IPI_ACCESS2			__IA64_UL_CONST(0x0000000010060d00)
+#define SH2_IPI_ACCESS3			__IA64_UL_CONST(0x0000000010060d80)
+
+/* ==================================================================== */
+/*                        Register "SH_INT_CMPB"                        */
+/*                  RTC Compare Value for Processor B                   */
+/* ==================================================================== */
+
+#define SH1_INT_CMPB			__IA64_UL_CONST(0x00000001101b0080)
+#define SH2_INT_CMPB			__IA64_UL_CONST(0x00000000101b0080)
+#define SH_INT_CMPB_MASK		__IA64_UL_CONST(0x007fffffffffffff)
+#define SH_INT_CMPB_INIT		__IA64_UL_CONST(0x0000000000000000)
+
+/*   SH_INT_CMPB_REAL_TIME_CMPB                                         */
+/*   Description:  Real Time Clock Compare                              */
+#define SH_INT_CMPB_REAL_TIME_CMPB_SHFT			0
+#define SH_INT_CMPB_REAL_TIME_CMPB_MASK	__IA64_UL_CONST(0x007fffffffffffff)
+
+/* ==================================================================== */
+/*                        Register "SH_INT_CMPC"                        */
+/*                  RTC Compare Value for Processor C                   */
+/* ==================================================================== */
+
+#define SH1_INT_CMPC			__IA64_UL_CONST(0x00000001101b0100)
+#define SH2_INT_CMPC			__IA64_UL_CONST(0x00000000101b0100)
+#define SH_INT_CMPC_MASK		__IA64_UL_CONST(0x007fffffffffffff)
+#define SH_INT_CMPC_INIT		__IA64_UL_CONST(0x0000000000000000)
+
+/*   SH_INT_CMPC_REAL_TIME_CMPC                                         */
+/*   Description:  Real Time Clock Compare                              */
+#define SH_INT_CMPC_REAL_TIME_CMPC_SHFT			0
+#define SH_INT_CMPC_REAL_TIME_CMPC_MASK	__IA64_UL_CONST(0x007fffffffffffff)
+
+/* ==================================================================== */
+/*                        Register "SH_INT_CMPD"                        */
+/*                  RTC Compare Value for Processor D                   */
+/* ==================================================================== */
+
+#define SH1_INT_CMPD			__IA64_UL_CONST(0x00000001101b0180)
+#define SH2_INT_CMPD			__IA64_UL_CONST(0x00000000101b0180)
+#define SH_INT_CMPD_MASK		__IA64_UL_CONST(0x007fffffffffffff)
+#define SH_INT_CMPD_INIT		__IA64_UL_CONST(0x0000000000000000)
+
+/*   SH_INT_CMPD_REAL_TIME_CMPD                                         */
+/*   Description:  Real Time Clock Compare                              */
+#define SH_INT_CMPD_REAL_TIME_CMPD_SHFT			0
+#define SH_INT_CMPD_REAL_TIME_CMPD_MASK	__IA64_UL_CONST(0x007fffffffffffff)
+
+/* ==================================================================== */
+/*                Register "SH_MD_DQLP_MMR_DIR_PRIVEC0"                 */
+/*                      privilege vector for acc=0                      */
+/* ==================================================================== */
+#define SH1_MD_DQLP_MMR_DIR_PRIVEC0	__IA64_UL_CONST(0x0000000100030300)
+
+/* ==================================================================== */
+/*                Register "SH_MD_DQRP_MMR_DIR_PRIVEC0"                 */
+/*                      privilege vector for acc=0                      */
+/* ==================================================================== */
+#define SH1_MD_DQRP_MMR_DIR_PRIVEC0	__IA64_UL_CONST(0x0000000100050300)
+
+/* ==================================================================== */
+/* Some MMRs are functionally identical (or close enough) on both SHUB1 */
+/* and SHUB2 that it makes sense to define a geberic name for the MMR.  */
+/* It is acceptable to use (for example) SH_IPI_INT to reference the    */
+/* the IPI MMR. The value of SH_IPI_INT is determined at runtime based  */
+/* on the type of the SHUB. Do not use these #defines in performance    */
+/* critical code  or loops - there is a small performance penalty.      */
+/* ==================================================================== */
+#define shubmmr(a,b) 		(is_shub2() ? a##2_##b : a##1_##b)
+
+#define SH_REAL_JUNK_BUS_LED0	shubmmr(SH, REAL_JUNK_BUS_LED0)
+#define SH_IPI_INT		shubmmr(SH, IPI_INT)
+#define SH_EVENT_OCCURRED	shubmmr(SH, EVENT_OCCURRED)
+#define SH_EVENT_OCCURRED_ALIAS	shubmmr(SH, EVENT_OCCURRED_ALIAS)
+#define SH_RTC			shubmmr(SH, RTC)
+#define SH_RTC1_INT_CONFIG	shubmmr(SH, RTC1_INT_CONFIG)
+#define SH_RTC1_INT_ENABLE	shubmmr(SH, RTC1_INT_ENABLE)
+#define SH_RTC2_INT_CONFIG	shubmmr(SH, RTC2_INT_CONFIG)
+#define SH_RTC2_INT_ENABLE	shubmmr(SH, RTC2_INT_ENABLE)
+#define SH_RTC3_INT_CONFIG	shubmmr(SH, RTC3_INT_CONFIG)
+#define SH_RTC3_INT_ENABLE	shubmmr(SH, RTC3_INT_ENABLE)
+#define SH_INT_CMPB		shubmmr(SH, INT_CMPB)
+#define SH_INT_CMPC		shubmmr(SH, INT_CMPC)
+#define SH_INT_CMPD		shubmmr(SH, INT_CMPD)
+
+/* ========================================================================== */
+/*                        Register "SH2_BT_ENG_CSR_0"                         */
+/*                    Engine 0 Control and Status Register                    */
+/* ========================================================================== */
+
+#define SH2_BT_ENG_CSR_0		__IA64_UL_CONST(0x0000000030040000)
+#define SH2_BT_ENG_SRC_ADDR_0		__IA64_UL_CONST(0x0000000030040080)
+#define SH2_BT_ENG_DEST_ADDR_0		__IA64_UL_CONST(0x0000000030040100)
+#define SH2_BT_ENG_NOTIF_ADDR_0		__IA64_UL_CONST(0x0000000030040180)
+
+/* ========================================================================== */
+/*                       BTE interfaces 1-3                                   */
+/* ========================================================================== */
+
+#define SH2_BT_ENG_CSR_1		__IA64_UL_CONST(0x0000000030050000)
+#define SH2_BT_ENG_CSR_2		__IA64_UL_CONST(0x0000000030060000)
+#define SH2_BT_ENG_CSR_3		__IA64_UL_CONST(0x0000000030070000)
+
+#endif /* _ASM_IA64_SN_SHUB_MMR_H */
diff --git a/arch/ia64/include/asm/sn/shubio.h b/arch/ia64/include/asm/sn/shubio.h
new file mode 100644
index 00000000..ecb8a494
--- /dev/null
+++ b/arch/ia64/include/asm/sn/shubio.h
@@ -0,0 +1,3358 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_SHUBIO_H
+#define _ASM_IA64_SN_SHUBIO_H
+
+#define HUB_WIDGET_ID_MAX	0xf
+#define IIO_NUM_ITTES		7
+#define HUB_NUM_BIG_WINDOW	(IIO_NUM_ITTES - 1)
+
+#define		IIO_WID			0x00400000	/* Crosstalk Widget Identification */
+							/* This register is also accessible from
+							 * Crosstalk at address 0x0.  */
+#define		IIO_WSTAT		0x00400008	/* Crosstalk Widget Status */
+#define		IIO_WCR			0x00400020	/* Crosstalk Widget Control Register */
+#define		IIO_ILAPR		0x00400100	/* IO Local Access Protection Register */
+#define		IIO_ILAPO		0x00400108	/* IO Local Access Protection Override */
+#define		IIO_IOWA		0x00400110	/* IO Outbound Widget Access */
+#define		IIO_IIWA		0x00400118	/* IO Inbound Widget Access */
+#define		IIO_IIDEM		0x00400120	/* IO Inbound Device Error Mask */
+#define		IIO_ILCSR		0x00400128	/* IO LLP Control and Status Register */
+#define		IIO_ILLR		0x00400130	/* IO LLP Log Register    */
+#define		IIO_IIDSR		0x00400138	/* IO Interrupt Destination */
+
+#define		IIO_IGFX0		0x00400140	/* IO Graphics Node-Widget Map 0 */
+#define		IIO_IGFX1		0x00400148	/* IO Graphics Node-Widget Map 1 */
+
+#define		IIO_ISCR0		0x00400150	/* IO Scratch Register 0 */
+#define		IIO_ISCR1		0x00400158	/* IO Scratch Register 1 */
+
+#define		IIO_ITTE1		0x00400160	/* IO Translation Table Entry 1 */
+#define		IIO_ITTE2		0x00400168	/* IO Translation Table Entry 2 */
+#define		IIO_ITTE3		0x00400170	/* IO Translation Table Entry 3 */
+#define		IIO_ITTE4		0x00400178	/* IO Translation Table Entry 4 */
+#define		IIO_ITTE5		0x00400180	/* IO Translation Table Entry 5 */
+#define		IIO_ITTE6		0x00400188	/* IO Translation Table Entry 6 */
+#define		IIO_ITTE7		0x00400190	/* IO Translation Table Entry 7 */
+
+#define		IIO_IPRB0		0x00400198	/* IO PRB Entry 0   */
+#define		IIO_IPRB8		0x004001A0	/* IO PRB Entry 8   */
+#define		IIO_IPRB9		0x004001A8	/* IO PRB Entry 9   */
+#define		IIO_IPRBA		0x004001B0	/* IO PRB Entry A   */
+#define		IIO_IPRBB		0x004001B8	/* IO PRB Entry B   */
+#define		IIO_IPRBC		0x004001C0	/* IO PRB Entry C   */
+#define		IIO_IPRBD		0x004001C8	/* IO PRB Entry D   */
+#define		IIO_IPRBE		0x004001D0	/* IO PRB Entry E   */
+#define		IIO_IPRBF		0x004001D8	/* IO PRB Entry F   */
+
+#define		IIO_IXCC		0x004001E0	/* IO Crosstalk Credit Count Timeout */
+#define		IIO_IMEM		0x004001E8	/* IO Miscellaneous Error Mask */
+#define		IIO_IXTT		0x004001F0	/* IO Crosstalk Timeout Threshold */
+#define		IIO_IECLR		0x004001F8	/* IO Error Clear Register */
+#define		IIO_IBCR		0x00400200	/* IO BTE Control Register */
+
+#define		IIO_IXSM		0x00400208	/* IO Crosstalk Spurious Message */
+#define		IIO_IXSS		0x00400210	/* IO Crosstalk Spurious Sideband */
+
+#define		IIO_ILCT		0x00400218	/* IO LLP Channel Test    */
+
+#define		IIO_IIEPH1 		0x00400220	/* IO Incoming Error Packet Header, Part 1 */
+#define		IIO_IIEPH2 		0x00400228	/* IO Incoming Error Packet Header, Part 2 */
+
+#define		IIO_ISLAPR 		0x00400230	/* IO SXB Local Access Protection Regster */
+#define		IIO_ISLAPO 		0x00400238	/* IO SXB Local Access Protection Override */
+
+#define		IIO_IWI			0x00400240	/* IO Wrapper Interrupt Register */
+#define		IIO_IWEL		0x00400248	/* IO Wrapper Error Log Register */
+#define		IIO_IWC			0x00400250	/* IO Wrapper Control Register */
+#define		IIO_IWS			0x00400258	/* IO Wrapper Status Register */
+#define		IIO_IWEIM		0x00400260	/* IO Wrapper Error Interrupt Masking Register */
+
+#define		IIO_IPCA		0x00400300	/* IO PRB Counter Adjust */
+
+#define		IIO_IPRTE0_A		0x00400308	/* IO PIO Read Address Table Entry 0, Part A */
+#define		IIO_IPRTE1_A		0x00400310	/* IO PIO Read Address Table Entry 1, Part A */
+#define		IIO_IPRTE2_A		0x00400318	/* IO PIO Read Address Table Entry 2, Part A */
+#define		IIO_IPRTE3_A		0x00400320	/* IO PIO Read Address Table Entry 3, Part A */
+#define		IIO_IPRTE4_A		0x00400328	/* IO PIO Read Address Table Entry 4, Part A */
+#define		IIO_IPRTE5_A		0x00400330	/* IO PIO Read Address Table Entry 5, Part A */
+#define		IIO_IPRTE6_A		0x00400338	/* IO PIO Read Address Table Entry 6, Part A */
+#define		IIO_IPRTE7_A		0x00400340	/* IO PIO Read Address Table Entry 7, Part A */
+
+#define		IIO_IPRTE0_B		0x00400348	/* IO PIO Read Address Table Entry 0, Part B */
+#define		IIO_IPRTE1_B		0x00400350	/* IO PIO Read Address Table Entry 1, Part B */
+#define		IIO_IPRTE2_B		0x00400358	/* IO PIO Read Address Table Entry 2, Part B */
+#define		IIO_IPRTE3_B		0x00400360	/* IO PIO Read Address Table Entry 3, Part B */
+#define		IIO_IPRTE4_B		0x00400368	/* IO PIO Read Address Table Entry 4, Part B */
+#define		IIO_IPRTE5_B		0x00400370	/* IO PIO Read Address Table Entry 5, Part B */
+#define		IIO_IPRTE6_B		0x00400378	/* IO PIO Read Address Table Entry 6, Part B */
+#define		IIO_IPRTE7_B		0x00400380	/* IO PIO Read Address Table Entry 7, Part B */
+
+#define		IIO_IPDR		0x00400388	/* IO PIO Deallocation Register */
+#define		IIO_ICDR		0x00400390	/* IO CRB Entry Deallocation Register */
+#define		IIO_IFDR		0x00400398	/* IO IOQ FIFO Depth Register */
+#define		IIO_IIAP		0x004003A0	/* IO IIQ Arbitration Parameters */
+#define		IIO_ICMR		0x004003A8	/* IO CRB Management Register */
+#define		IIO_ICCR		0x004003B0	/* IO CRB Control Register */
+#define		IIO_ICTO		0x004003B8	/* IO CRB Timeout   */
+#define		IIO_ICTP		0x004003C0	/* IO CRB Timeout Prescalar */
+
+#define		IIO_ICRB0_A		0x00400400	/* IO CRB Entry 0_A */
+#define		IIO_ICRB0_B		0x00400408	/* IO CRB Entry 0_B */
+#define		IIO_ICRB0_C		0x00400410	/* IO CRB Entry 0_C */
+#define		IIO_ICRB0_D		0x00400418	/* IO CRB Entry 0_D */
+#define		IIO_ICRB0_E		0x00400420	/* IO CRB Entry 0_E */
+
+#define		IIO_ICRB1_A		0x00400430	/* IO CRB Entry 1_A */
+#define		IIO_ICRB1_B		0x00400438	/* IO CRB Entry 1_B */
+#define		IIO_ICRB1_C		0x00400440	/* IO CRB Entry 1_C */
+#define		IIO_ICRB1_D		0x00400448	/* IO CRB Entry 1_D */
+#define		IIO_ICRB1_E		0x00400450	/* IO CRB Entry 1_E */
+
+#define		IIO_ICRB2_A		0x00400460	/* IO CRB Entry 2_A */
+#define		IIO_ICRB2_B		0x00400468	/* IO CRB Entry 2_B */
+#define		IIO_ICRB2_C		0x00400470	/* IO CRB Entry 2_C */
+#define		IIO_ICRB2_D		0x00400478	/* IO CRB Entry 2_D */
+#define		IIO_ICRB2_E		0x00400480	/* IO CRB Entry 2_E */
+
+#define		IIO_ICRB3_A		0x00400490	/* IO CRB Entry 3_A */
+#define		IIO_ICRB3_B		0x00400498	/* IO CRB Entry 3_B */
+#define		IIO_ICRB3_C		0x004004a0	/* IO CRB Entry 3_C */
+#define		IIO_ICRB3_D		0x004004a8	/* IO CRB Entry 3_D */
+#define		IIO_ICRB3_E		0x004004b0	/* IO CRB Entry 3_E */
+
+#define		IIO_ICRB4_A		0x004004c0	/* IO CRB Entry 4_A */
+#define		IIO_ICRB4_B		0x004004c8	/* IO CRB Entry 4_B */
+#define		IIO_ICRB4_C		0x004004d0	/* IO CRB Entry 4_C */
+#define		IIO_ICRB4_D		0x004004d8	/* IO CRB Entry 4_D */
+#define		IIO_ICRB4_E		0x004004e0	/* IO CRB Entry 4_E */
+
+#define		IIO_ICRB5_A		0x004004f0	/* IO CRB Entry 5_A */
+#define		IIO_ICRB5_B		0x004004f8	/* IO CRB Entry 5_B */
+#define		IIO_ICRB5_C		0x00400500	/* IO CRB Entry 5_C */
+#define		IIO_ICRB5_D		0x00400508	/* IO CRB Entry 5_D */
+#define		IIO_ICRB5_E		0x00400510	/* IO CRB Entry 5_E */
+
+#define		IIO_ICRB6_A		0x00400520	/* IO CRB Entry 6_A */
+#define		IIO_ICRB6_B		0x00400528	/* IO CRB Entry 6_B */
+#define		IIO_ICRB6_C		0x00400530	/* IO CRB Entry 6_C */
+#define		IIO_ICRB6_D		0x00400538	/* IO CRB Entry 6_D */
+#define		IIO_ICRB6_E		0x00400540	/* IO CRB Entry 6_E */
+
+#define		IIO_ICRB7_A		0x00400550	/* IO CRB Entry 7_A */
+#define		IIO_ICRB7_B		0x00400558	/* IO CRB Entry 7_B */
+#define		IIO_ICRB7_C		0x00400560	/* IO CRB Entry 7_C */
+#define		IIO_ICRB7_D		0x00400568	/* IO CRB Entry 7_D */
+#define		IIO_ICRB7_E		0x00400570	/* IO CRB Entry 7_E */
+
+#define		IIO_ICRB8_A		0x00400580	/* IO CRB Entry 8_A */
+#define		IIO_ICRB8_B		0x00400588	/* IO CRB Entry 8_B */
+#define		IIO_ICRB8_C		0x00400590	/* IO CRB Entry 8_C */
+#define		IIO_ICRB8_D		0x00400598	/* IO CRB Entry 8_D */
+#define		IIO_ICRB8_E		0x004005a0	/* IO CRB Entry 8_E */
+
+#define		IIO_ICRB9_A		0x004005b0	/* IO CRB Entry 9_A */
+#define		IIO_ICRB9_B		0x004005b8	/* IO CRB Entry 9_B */
+#define		IIO_ICRB9_C		0x004005c0	/* IO CRB Entry 9_C */
+#define		IIO_ICRB9_D		0x004005c8	/* IO CRB Entry 9_D */
+#define		IIO_ICRB9_E		0x004005d0	/* IO CRB Entry 9_E */
+
+#define		IIO_ICRBA_A		0x004005e0	/* IO CRB Entry A_A */
+#define		IIO_ICRBA_B		0x004005e8	/* IO CRB Entry A_B */
+#define		IIO_ICRBA_C		0x004005f0	/* IO CRB Entry A_C */
+#define		IIO_ICRBA_D		0x004005f8	/* IO CRB Entry A_D */
+#define		IIO_ICRBA_E		0x00400600	/* IO CRB Entry A_E */
+
+#define		IIO_ICRBB_A		0x00400610	/* IO CRB Entry B_A */
+#define		IIO_ICRBB_B		0x00400618	/* IO CRB Entry B_B */
+#define		IIO_ICRBB_C		0x00400620	/* IO CRB Entry B_C */
+#define		IIO_ICRBB_D		0x00400628	/* IO CRB Entry B_D */
+#define		IIO_ICRBB_E		0x00400630	/* IO CRB Entry B_E */
+
+#define		IIO_ICRBC_A		0x00400640	/* IO CRB Entry C_A */
+#define		IIO_ICRBC_B		0x00400648	/* IO CRB Entry C_B */
+#define		IIO_ICRBC_C		0x00400650	/* IO CRB Entry C_C */
+#define		IIO_ICRBC_D		0x00400658	/* IO CRB Entry C_D */
+#define		IIO_ICRBC_E		0x00400660	/* IO CRB Entry C_E */
+
+#define		IIO_ICRBD_A		0x00400670	/* IO CRB Entry D_A */
+#define		IIO_ICRBD_B		0x00400678	/* IO CRB Entry D_B */
+#define		IIO_ICRBD_C		0x00400680	/* IO CRB Entry D_C */
+#define		IIO_ICRBD_D		0x00400688	/* IO CRB Entry D_D */
+#define		IIO_ICRBD_E		0x00400690	/* IO CRB Entry D_E */
+
+#define		IIO_ICRBE_A		0x004006a0	/* IO CRB Entry E_A */
+#define		IIO_ICRBE_B		0x004006a8	/* IO CRB Entry E_B */
+#define		IIO_ICRBE_C		0x004006b0	/* IO CRB Entry E_C */
+#define		IIO_ICRBE_D		0x004006b8	/* IO CRB Entry E_D */
+#define		IIO_ICRBE_E		0x004006c0	/* IO CRB Entry E_E */
+
+#define		IIO_ICSML		0x00400700	/* IO CRB Spurious Message Low */
+#define		IIO_ICSMM		0x00400708	/* IO CRB Spurious Message Middle */
+#define		IIO_ICSMH		0x00400710	/* IO CRB Spurious Message High */
+
+#define		IIO_IDBSS		0x00400718	/* IO Debug Submenu Select */
+
+#define		IIO_IBLS0		0x00410000	/* IO BTE Length Status 0 */
+#define		IIO_IBSA0		0x00410008	/* IO BTE Source Address 0 */
+#define		IIO_IBDA0		0x00410010	/* IO BTE Destination Address 0 */
+#define		IIO_IBCT0		0x00410018	/* IO BTE Control Terminate 0 */
+#define		IIO_IBNA0		0x00410020	/* IO BTE Notification Address 0 */
+#define		IIO_IBIA0		0x00410028	/* IO BTE Interrupt Address 0 */
+#define		IIO_IBLS1		0x00420000	/* IO BTE Length Status 1 */
+#define		IIO_IBSA1		0x00420008	/* IO BTE Source Address 1 */
+#define		IIO_IBDA1		0x00420010	/* IO BTE Destination Address 1 */
+#define		IIO_IBCT1		0x00420018	/* IO BTE Control Terminate 1 */
+#define		IIO_IBNA1		0x00420020	/* IO BTE Notification Address 1 */
+#define		IIO_IBIA1		0x00420028	/* IO BTE Interrupt Address 1 */
+
+#define		IIO_IPCR		0x00430000	/* IO Performance Control */
+#define		IIO_IPPR		0x00430008	/* IO Performance Profiling */
+
+/************************************************************************
+ *									*
+ * Description:  This register echoes some information from the         *
+ * LB_REV_ID register. It is available through Crosstalk as described   *
+ * above. The REV_NUM and MFG_NUM fields receive their values from      *
+ * the REVISION and MANUFACTURER fields in the LB_REV_ID register.      *
+ * The PART_NUM field's value is the Crosstalk device ID number that    *
+ * Steve Miller assigned to the SHub chip.                              *
+ *									*
+ ************************************************************************/
+
+typedef union ii_wid_u {
+	u64 ii_wid_regval;
+	struct {
+		u64 w_rsvd_1:1;
+		u64 w_mfg_num:11;
+		u64 w_part_num:16;
+		u64 w_rev_num:4;
+		u64 w_rsvd:32;
+	} ii_wid_fld_s;
+} ii_wid_u_t;
+
+/************************************************************************
+ *									*
+ *  The fields in this register are set upon detection of an error      *
+ * and cleared by various mechanisms, as explained in the               *
+ * description.                                                         *
+ *									*
+ ************************************************************************/
+
+typedef union ii_wstat_u {
+	u64 ii_wstat_regval;
+	struct {
+		u64 w_pending:4;
+		u64 w_xt_crd_to:1;
+		u64 w_xt_tail_to:1;
+		u64 w_rsvd_3:3;
+		u64 w_tx_mx_rty:1;
+		u64 w_rsvd_2:6;
+		u64 w_llp_tx_cnt:8;
+		u64 w_rsvd_1:8;
+		u64 w_crazy:1;
+		u64 w_rsvd:31;
+	} ii_wstat_fld_s;
+} ii_wstat_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  This is a read-write enabled register. It controls     *
+ * various aspects of the Crosstalk flow control.                       *
+ *									*
+ ************************************************************************/
+
+typedef union ii_wcr_u {
+	u64 ii_wcr_regval;
+	struct {
+		u64 w_wid:4;
+		u64 w_tag:1;
+		u64 w_rsvd_1:8;
+		u64 w_dst_crd:3;
+		u64 w_f_bad_pkt:1;
+		u64 w_dir_con:1;
+		u64 w_e_thresh:5;
+		u64 w_rsvd:41;
+	} ii_wcr_fld_s;
+} ii_wcr_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  This register's value is a bit vector that guards      *
+ * access to local registers within the II as well as to external       *
+ * Crosstalk widgets. Each bit in the register corresponds to a         *
+ * particular region in the system; a region consists of one, two or    *
+ * four nodes (depending on the value of the REGION_SIZE field in the   *
+ * LB_REV_ID register, which is documented in Section 8.3.1.1). The     *
+ * protection provided by this register applies to PIO read             *
+ * operations as well as PIO write operations. The II will perform a    *
+ * PIO read or write request only if the bit for the requestor's        *
+ * region is set; otherwise, the II will not perform the requested      *
+ * operation and will return an error response. When a PIO read or      *
+ * write request targets an external Crosstalk widget, then not only    *
+ * must the bit for the requestor's region be set in the ILAPR, but     *
+ * also the target widget's bit in the IOWA register must be set in     *
+ * order for the II to perform the requested operation; otherwise,      *
+ * the II will return an error response. Hence, the protection          *
+ * provided by the IOWA register supplements the protection provided    *
+ * by the ILAPR for requests that target external Crosstalk widgets.    *
+ * This register itself can be accessed only by the nodes whose         *
+ * region ID bits are enabled in this same register. It can also be     *
+ * accessed through the IAlias space by the local processors.           *
+ * The reset value of this register allows access by all nodes.         *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ilapr_u {
+	u64 ii_ilapr_regval;
+	struct {
+		u64 i_region:64;
+	} ii_ilapr_fld_s;
+} ii_ilapr_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  A write to this register of the 64-bit value           *
+ * "SGIrules" in ASCII, will cause the bit in the ILAPR register        *
+ * corresponding to the region of the requestor to be set (allow        *
+ * access). A write of any other value will be ignored. Access          *
+ * protection for this register is "SGIrules".                          *
+ * This register can also be accessed through the IAlias space.         *
+ * However, this access will not change the access permissions in the   *
+ * ILAPR.                                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ilapo_u {
+	u64 ii_ilapo_regval;
+	struct {
+		u64 i_io_ovrride:64;
+	} ii_ilapo_fld_s;
+} ii_ilapo_u_t;
+
+/************************************************************************
+ *									*
+ *  This register qualifies all the PIO and Graphics writes launched    *
+ * from the SHUB towards a widget.                                      *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iowa_u {
+	u64 ii_iowa_regval;
+	struct {
+		u64 i_w0_oac:1;
+		u64 i_rsvd_1:7;
+		u64 i_wx_oac:8;
+		u64 i_rsvd:48;
+	} ii_iowa_fld_s;
+} ii_iowa_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  This register qualifies all the requests launched      *
+ * from a widget towards the Shub. This register is intended to be      *
+ * used by software in case of misbehaving widgets.                     *
+ *									*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iiwa_u {
+	u64 ii_iiwa_regval;
+	struct {
+		u64 i_w0_iac:1;
+		u64 i_rsvd_1:7;
+		u64 i_wx_iac:8;
+		u64 i_rsvd:48;
+	} ii_iiwa_fld_s;
+} ii_iiwa_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  This register qualifies all the operations launched    *
+ * from a widget towards the SHub. It allows individual access          *
+ * control for up to 8 devices per widget. A device refers to           *
+ * individual DMA master hosted by a widget.                            *
+ * The bits in each field of this register are cleared by the Shub      *
+ * upon detection of an error which requires the device to be           *
+ * disabled. These fields assume that 0=TNUM=7 (i.e., Bridge-centric    *
+ * Crosstalk). Whether or not a device has access rights to this        *
+ * Shub is determined by an AND of the device enable bit in the         *
+ * appropriate field of this register and the corresponding bit in      *
+ * the Wx_IAC field (for the widget which this device belongs to).      *
+ * The bits in this field are set by writing a 1 to them. Incoming      *
+ * replies from Crosstalk are not subject to this access control        *
+ * mechanism.                                                           *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iidem_u {
+	u64 ii_iidem_regval;
+	struct {
+		u64 i_w8_dxs:8;
+		u64 i_w9_dxs:8;
+		u64 i_wa_dxs:8;
+		u64 i_wb_dxs:8;
+		u64 i_wc_dxs:8;
+		u64 i_wd_dxs:8;
+		u64 i_we_dxs:8;
+		u64 i_wf_dxs:8;
+	} ii_iidem_fld_s;
+} ii_iidem_u_t;
+
+/************************************************************************
+ *									*
+ *  This register contains the various programmable fields necessary    *
+ * for controlling and observing the LLP signals.                       *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ilcsr_u {
+	u64 ii_ilcsr_regval;
+	struct {
+		u64 i_nullto:6;
+		u64 i_rsvd_4:2;
+		u64 i_wrmrst:1;
+		u64 i_rsvd_3:1;
+		u64 i_llp_en:1;
+		u64 i_bm8:1;
+		u64 i_llp_stat:2;
+		u64 i_remote_power:1;
+		u64 i_rsvd_2:1;
+		u64 i_maxrtry:10;
+		u64 i_d_avail_sel:2;
+		u64 i_rsvd_1:4;
+		u64 i_maxbrst:10;
+		u64 i_rsvd:22;
+
+	} ii_ilcsr_fld_s;
+} ii_ilcsr_u_t;
+
+/************************************************************************
+ *									*
+ *  This is simply a status registers that monitors the LLP error       *
+ * rate.								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_illr_u {
+	u64 ii_illr_regval;
+	struct {
+		u64 i_sn_cnt:16;
+		u64 i_cb_cnt:16;
+		u64 i_rsvd:32;
+	} ii_illr_fld_s;
+} ii_illr_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  All II-detected non-BTE error interrupts are           *
+ * specified via this register.                                         *
+ * NOTE: The PI interrupt register address is hardcoded in the II. If   *
+ * PI_ID==0, then the II sends an interrupt request (Duplonet PWRI      *
+ * packet) to address offset 0x0180_0090 within the local register      *
+ * address space of PI0 on the node specified by the NODE field. If     *
+ * PI_ID==1, then the II sends the interrupt request to address         *
+ * offset 0x01A0_0090 within the local register address space of PI1    *
+ * on the node specified by the NODE field.                             *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iidsr_u {
+	u64 ii_iidsr_regval;
+	struct {
+		u64 i_level:8;
+		u64 i_pi_id:1;
+		u64 i_node:11;
+		u64 i_rsvd_3:4;
+		u64 i_enable:1;
+		u64 i_rsvd_2:3;
+		u64 i_int_sent:2;
+		u64 i_rsvd_1:2;
+		u64 i_pi0_forward_int:1;
+		u64 i_pi1_forward_int:1;
+		u64 i_rsvd:30;
+	} ii_iidsr_fld_s;
+} ii_iidsr_u_t;
+
+/************************************************************************
+ *									*
+ *  There are two instances of this register. This register is used     *
+ * for matching up the incoming responses from the graphics widget to   *
+ * the processor that initiated the graphics operation. The             *
+ * write-responses are converted to graphics credits and returned to    *
+ * the processor so that the processor interface can manage the flow    *
+ * control.                                                             *
+ *									*
+ ************************************************************************/
+
+typedef union ii_igfx0_u {
+	u64 ii_igfx0_regval;
+	struct {
+		u64 i_w_num:4;
+		u64 i_pi_id:1;
+		u64 i_n_num:12;
+		u64 i_p_num:1;
+		u64 i_rsvd:46;
+	} ii_igfx0_fld_s;
+} ii_igfx0_u_t;
+
+/************************************************************************
+ *									*
+ *  There are two instances of this register. This register is used     *
+ * for matching up the incoming responses from the graphics widget to   *
+ * the processor that initiated the graphics operation. The             *
+ * write-responses are converted to graphics credits and returned to    *
+ * the processor so that the processor interface can manage the flow    *
+ * control.                                                             *
+ *									*
+ ************************************************************************/
+
+typedef union ii_igfx1_u {
+	u64 ii_igfx1_regval;
+	struct {
+		u64 i_w_num:4;
+		u64 i_pi_id:1;
+		u64 i_n_num:12;
+		u64 i_p_num:1;
+		u64 i_rsvd:46;
+	} ii_igfx1_fld_s;
+} ii_igfx1_u_t;
+
+/************************************************************************
+ *									*
+ *  There are two instances of this registers. These registers are      *
+ * used as scratch registers for software use.                          *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iscr0_u {
+	u64 ii_iscr0_regval;
+	struct {
+		u64 i_scratch:64;
+	} ii_iscr0_fld_s;
+} ii_iscr0_u_t;
+
+/************************************************************************
+ *									*
+ *  There are two instances of this registers. These registers are      *
+ * used as scratch registers for software use.                          *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iscr1_u {
+	u64 ii_iscr1_regval;
+	struct {
+		u64 i_scratch:64;
+	} ii_iscr1_fld_s;
+} ii_iscr1_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are seven instances of translation table entry   *
+ * registers. Each register maps a Shub Big Window to a 48-bit          *
+ * address on Crosstalk.                                                *
+ * For M-mode (128 nodes, 8 GBytes/node), SysAD[31:29] (Big Window      *
+ * number) are used to select one of these 7 registers. The Widget      *
+ * number field is then derived from the W_NUM field for synthesizing   *
+ * a Crosstalk packet. The 5 bits of OFFSET are concatenated with       *
+ * SysAD[28:0] to form Crosstalk[33:0]. The upper Crosstalk[47:34]      *
+ * are padded with zeros. Although the maximum Crosstalk space          *
+ * addressable by the SHub is thus the lower 16 GBytes per widget       * 
+ * (M-mode), however only <SUP >7</SUP>/<SUB >32nds</SUB> of this       *
+ * space can be accessed.                                               *
+ * For the N-mode (256 nodes, 4 GBytes/node), SysAD[30:28] (Big         *
+ * Window number) are used to select one of these 7 registers. The      *
+ * Widget number field is then derived from the W_NUM field for         *
+ * synthesizing a Crosstalk packet. The 5 bits of OFFSET are            *
+ * concatenated with SysAD[27:0] to form Crosstalk[33:0]. The IOSP      *
+ * field is used as Crosstalk[47], and remainder of the Crosstalk       *
+ * address bits (Crosstalk[46:34]) are always zero. While the maximum   *
+ * Crosstalk space addressable by the Shub is thus the lower            *
+ * 8-GBytes per widget (N-mode), only <SUP >7</SUP>/<SUB >32nds</SUB>   *
+ * of this space can be accessed.                                       *
+ *									*
+ ************************************************************************/
+
+typedef union ii_itte1_u {
+	u64 ii_itte1_regval;
+	struct {
+		u64 i_offset:5;
+		u64 i_rsvd_1:3;
+		u64 i_w_num:4;
+		u64 i_iosp:1;
+		u64 i_rsvd:51;
+	} ii_itte1_fld_s;
+} ii_itte1_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are seven instances of translation table entry   *
+ * registers. Each register maps a Shub Big Window to a 48-bit          *
+ * address on Crosstalk.                                                *
+ * For M-mode (128 nodes, 8 GBytes/node), SysAD[31:29] (Big Window      *
+ * number) are used to select one of these 7 registers. The Widget      *
+ * number field is then derived from the W_NUM field for synthesizing   *
+ * a Crosstalk packet. The 5 bits of OFFSET are concatenated with       *
+ * SysAD[28:0] to form Crosstalk[33:0]. The upper Crosstalk[47:34]      *
+ * are padded with zeros. Although the maximum Crosstalk space          *
+ * addressable by the Shub is thus the lower 16 GBytes per widget       *
+ * (M-mode), however only <SUP >7</SUP>/<SUB >32nds</SUB> of this       *
+ * space can be accessed.                                               *
+ * For the N-mode (256 nodes, 4 GBytes/node), SysAD[30:28] (Big         *
+ * Window number) are used to select one of these 7 registers. The      *
+ * Widget number field is then derived from the W_NUM field for         *
+ * synthesizing a Crosstalk packet. The 5 bits of OFFSET are            *
+ * concatenated with SysAD[27:0] to form Crosstalk[33:0]. The IOSP      *
+ * field is used as Crosstalk[47], and remainder of the Crosstalk       *
+ * address bits (Crosstalk[46:34]) are always zero. While the maximum   *
+ * Crosstalk space addressable by the Shub is thus the lower            *
+ * 8-GBytes per widget (N-mode), only <SUP >7</SUP>/<SUB >32nds</SUB>   *
+ * of this space can be accessed.                                       *
+ *									*
+ ************************************************************************/
+
+typedef union ii_itte2_u {
+	u64 ii_itte2_regval;
+	struct {
+		u64 i_offset:5;
+		u64 i_rsvd_1:3;
+		u64 i_w_num:4;
+		u64 i_iosp:1;
+		u64 i_rsvd:51;
+	} ii_itte2_fld_s;
+} ii_itte2_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are seven instances of translation table entry   *
+ * registers. Each register maps a Shub Big Window to a 48-bit          *
+ * address on Crosstalk.                                                *
+ * For M-mode (128 nodes, 8 GBytes/node), SysAD[31:29] (Big Window      *
+ * number) are used to select one of these 7 registers. The Widget      *
+ * number field is then derived from the W_NUM field for synthesizing   *
+ * a Crosstalk packet. The 5 bits of OFFSET are concatenated with       *
+ * SysAD[28:0] to form Crosstalk[33:0]. The upper Crosstalk[47:34]      *
+ * are padded with zeros. Although the maximum Crosstalk space          *
+ * addressable by the Shub is thus the lower 16 GBytes per widget       *
+ * (M-mode), however only <SUP >7</SUP>/<SUB >32nds</SUB> of this       *
+ * space can be accessed.                                               *
+ * For the N-mode (256 nodes, 4 GBytes/node), SysAD[30:28] (Big         *
+ * Window number) are used to select one of these 7 registers. The      *
+ * Widget number field is then derived from the W_NUM field for         *
+ * synthesizing a Crosstalk packet. The 5 bits of OFFSET are            *
+ * concatenated with SysAD[27:0] to form Crosstalk[33:0]. The IOSP      *
+ * field is used as Crosstalk[47], and remainder of the Crosstalk       *
+ * address bits (Crosstalk[46:34]) are always zero. While the maximum   *
+ * Crosstalk space addressable by the SHub is thus the lower            *
+ * 8-GBytes per widget (N-mode), only <SUP >7</SUP>/<SUB >32nds</SUB>   *
+ * of this space can be accessed.                                       *
+ *									*
+ ************************************************************************/
+
+typedef union ii_itte3_u {
+	u64 ii_itte3_regval;
+	struct {
+		u64 i_offset:5;
+		u64 i_rsvd_1:3;
+		u64 i_w_num:4;
+		u64 i_iosp:1;
+		u64 i_rsvd:51;
+	} ii_itte3_fld_s;
+} ii_itte3_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are seven instances of translation table entry   *
+ * registers. Each register maps a SHub Big Window to a 48-bit          *
+ * address on Crosstalk.                                                *
+ * For M-mode (128 nodes, 8 GBytes/node), SysAD[31:29] (Big Window      *
+ * number) are used to select one of these 7 registers. The Widget      *
+ * number field is then derived from the W_NUM field for synthesizing   *
+ * a Crosstalk packet. The 5 bits of OFFSET are concatenated with       *
+ * SysAD[28:0] to form Crosstalk[33:0]. The upper Crosstalk[47:34]      *
+ * are padded with zeros. Although the maximum Crosstalk space          *
+ * addressable by the SHub is thus the lower 16 GBytes per widget       *
+ * (M-mode), however only <SUP >7</SUP>/<SUB >32nds</SUB> of this       *
+ * space can be accessed.                                               *
+ * For the N-mode (256 nodes, 4 GBytes/node), SysAD[30:28] (Big         *
+ * Window number) are used to select one of these 7 registers. The      *
+ * Widget number field is then derived from the W_NUM field for         *
+ * synthesizing a Crosstalk packet. The 5 bits of OFFSET are            *
+ * concatenated with SysAD[27:0] to form Crosstalk[33:0]. The IOSP      *
+ * field is used as Crosstalk[47], and remainder of the Crosstalk       *
+ * address bits (Crosstalk[46:34]) are always zero. While the maximum   *
+ * Crosstalk space addressable by the SHub is thus the lower            *
+ * 8-GBytes per widget (N-mode), only <SUP >7</SUP>/<SUB >32nds</SUB>   *
+ * of this space can be accessed.                                       *
+ *									*
+ ************************************************************************/
+
+typedef union ii_itte4_u {
+	u64 ii_itte4_regval;
+	struct {
+		u64 i_offset:5;
+		u64 i_rsvd_1:3;
+		u64 i_w_num:4;
+		u64 i_iosp:1;
+		u64 i_rsvd:51;
+	} ii_itte4_fld_s;
+} ii_itte4_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are seven instances of translation table entry   *
+ * registers. Each register maps a SHub Big Window to a 48-bit          *
+ * address on Crosstalk.                                                *
+ * For M-mode (128 nodes, 8 GBytes/node), SysAD[31:29] (Big Window      *
+ * number) are used to select one of these 7 registers. The Widget      *
+ * number field is then derived from the W_NUM field for synthesizing   *
+ * a Crosstalk packet. The 5 bits of OFFSET are concatenated with       *
+ * SysAD[28:0] to form Crosstalk[33:0]. The upper Crosstalk[47:34]      *
+ * are padded with zeros. Although the maximum Crosstalk space          *
+ * addressable by the Shub is thus the lower 16 GBytes per widget       *
+ * (M-mode), however only <SUP >7</SUP>/<SUB >32nds</SUB> of this       *
+ * space can be accessed.                                               *
+ * For the N-mode (256 nodes, 4 GBytes/node), SysAD[30:28] (Big         *
+ * Window number) are used to select one of these 7 registers. The      *
+ * Widget number field is then derived from the W_NUM field for         *
+ * synthesizing a Crosstalk packet. The 5 bits of OFFSET are            *
+ * concatenated with SysAD[27:0] to form Crosstalk[33:0]. The IOSP      *
+ * field is used as Crosstalk[47], and remainder of the Crosstalk       *
+ * address bits (Crosstalk[46:34]) are always zero. While the maximum   *
+ * Crosstalk space addressable by the Shub is thus the lower            *
+ * 8-GBytes per widget (N-mode), only <SUP >7</SUP>/<SUB >32nds</SUB>   *
+ * of this space can be accessed.                                       *
+ *									*
+ ************************************************************************/
+
+typedef union ii_itte5_u {
+	u64 ii_itte5_regval;
+	struct {
+		u64 i_offset:5;
+		u64 i_rsvd_1:3;
+		u64 i_w_num:4;
+		u64 i_iosp:1;
+		u64 i_rsvd:51;
+	} ii_itte5_fld_s;
+} ii_itte5_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are seven instances of translation table entry   *
+ * registers. Each register maps a Shub Big Window to a 48-bit          *
+ * address on Crosstalk.                                                *
+ * For M-mode (128 nodes, 8 GBytes/node), SysAD[31:29] (Big Window      *
+ * number) are used to select one of these 7 registers. The Widget      *
+ * number field is then derived from the W_NUM field for synthesizing   *
+ * a Crosstalk packet. The 5 bits of OFFSET are concatenated with       *
+ * SysAD[28:0] to form Crosstalk[33:0]. The upper Crosstalk[47:34]      *
+ * are padded with zeros. Although the maximum Crosstalk space          *
+ * addressable by the Shub is thus the lower 16 GBytes per widget       *
+ * (M-mode), however only <SUP >7</SUP>/<SUB >32nds</SUB> of this       *
+ * space can be accessed.                                               *
+ * For the N-mode (256 nodes, 4 GBytes/node), SysAD[30:28] (Big         *
+ * Window number) are used to select one of these 7 registers. The      *
+ * Widget number field is then derived from the W_NUM field for         *
+ * synthesizing a Crosstalk packet. The 5 bits of OFFSET are            *
+ * concatenated with SysAD[27:0] to form Crosstalk[33:0]. The IOSP      *
+ * field is used as Crosstalk[47], and remainder of the Crosstalk       *
+ * address bits (Crosstalk[46:34]) are always zero. While the maximum   *
+ * Crosstalk space addressable by the Shub is thus the lower            *
+ * 8-GBytes per widget (N-mode), only <SUP >7</SUP>/<SUB >32nds</SUB>   *
+ * of this space can be accessed.                                       *
+ *									*
+ ************************************************************************/
+
+typedef union ii_itte6_u {
+	u64 ii_itte6_regval;
+	struct {
+		u64 i_offset:5;
+		u64 i_rsvd_1:3;
+		u64 i_w_num:4;
+		u64 i_iosp:1;
+		u64 i_rsvd:51;
+	} ii_itte6_fld_s;
+} ii_itte6_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are seven instances of translation table entry   *
+ * registers. Each register maps a Shub Big Window to a 48-bit          *
+ * address on Crosstalk.                                                *
+ * For M-mode (128 nodes, 8 GBytes/node), SysAD[31:29] (Big Window      *
+ * number) are used to select one of these 7 registers. The Widget      *
+ * number field is then derived from the W_NUM field for synthesizing   *
+ * a Crosstalk packet. The 5 bits of OFFSET are concatenated with       *
+ * SysAD[28:0] to form Crosstalk[33:0]. The upper Crosstalk[47:34]      *
+ * are padded with zeros. Although the maximum Crosstalk space          *
+ * addressable by the Shub is thus the lower 16 GBytes per widget       *
+ * (M-mode), however only <SUP >7</SUP>/<SUB >32nds</SUB> of this       *
+ * space can be accessed.                                               *
+ * For the N-mode (256 nodes, 4 GBytes/node), SysAD[30:28] (Big         *
+ * Window number) are used to select one of these 7 registers. The      *
+ * Widget number field is then derived from the W_NUM field for         *
+ * synthesizing a Crosstalk packet. The 5 bits of OFFSET are            *
+ * concatenated with SysAD[27:0] to form Crosstalk[33:0]. The IOSP      *
+ * field is used as Crosstalk[47], and remainder of the Crosstalk       *
+ * address bits (Crosstalk[46:34]) are always zero. While the maximum   *
+ * Crosstalk space addressable by the SHub is thus the lower            *
+ * 8-GBytes per widget (N-mode), only <SUP >7</SUP>/<SUB >32nds</SUB>   *
+ * of this space can be accessed.                                       *
+ *									*
+ ************************************************************************/
+
+typedef union ii_itte7_u {
+	u64 ii_itte7_regval;
+	struct {
+		u64 i_offset:5;
+		u64 i_rsvd_1:3;
+		u64 i_w_num:4;
+		u64 i_iosp:1;
+		u64 i_rsvd:51;
+	} ii_itte7_fld_s;
+} ii_itte7_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 9 instances of this register, one per        *
+ * actual widget in this implementation of SHub and Crossbow.           *
+ * Note: Crossbow only has ports for Widgets 8 through F, widget 0      *
+ * refers to Crossbow's internal space.                                 *
+ * This register contains the state elements per widget that are        *
+ * necessary to manage the PIO flow control on Crosstalk and on the     *
+ * Router Network. See the PIO Flow Control chapter for a complete      *
+ * description of this register                                         *
+ * The SPUR_WR bit requires some explanation. When this register is     *
+ * written, the new value of the C field is captured in an internal     *
+ * register so the hardware can remember what the programmer wrote      *
+ * into the credit counter. The SPUR_WR bit sets whenever the C field   *
+ * increments above this stored value, which indicates that there       *
+ * have been more responses received than requests sent. The SPUR_WR    *
+ * bit cannot be cleared until a value is written to the IPRBx          *
+ * register; the write will correct the C field and capture its new     *
+ * value in the internal register. Even if IECLR[E_PRB_x] is set, the   *
+ * SPUR_WR bit will persist if IPRBx hasn't yet been written.           *
+ * .    								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprb0_u {
+	u64 ii_iprb0_regval;
+	struct {
+		u64 i_c:8;
+		u64 i_na:14;
+		u64 i_rsvd_2:2;
+		u64 i_nb:14;
+		u64 i_rsvd_1:2;
+		u64 i_m:2;
+		u64 i_f:1;
+		u64 i_of_cnt:5;
+		u64 i_error:1;
+		u64 i_rd_to:1;
+		u64 i_spur_wr:1;
+		u64 i_spur_rd:1;
+		u64 i_rsvd:11;
+		u64 i_mult_err:1;
+	} ii_iprb0_fld_s;
+} ii_iprb0_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 9 instances of this register, one per        *
+ * actual widget in this implementation of SHub and Crossbow.           *
+ * Note: Crossbow only has ports for Widgets 8 through F, widget 0      *
+ * refers to Crossbow's internal space.                                 *
+ * This register contains the state elements per widget that are        *
+ * necessary to manage the PIO flow control on Crosstalk and on the     *
+ * Router Network. See the PIO Flow Control chapter for a complete      *
+ * description of this register                                         *
+ * The SPUR_WR bit requires some explanation. When this register is     *
+ * written, the new value of the C field is captured in an internal     *
+ * register so the hardware can remember what the programmer wrote      *
+ * into the credit counter. The SPUR_WR bit sets whenever the C field   *
+ * increments above this stored value, which indicates that there       *
+ * have been more responses received than requests sent. The SPUR_WR    *
+ * bit cannot be cleared until a value is written to the IPRBx          *
+ * register; the write will correct the C field and capture its new     *
+ * value in the internal register. Even if IECLR[E_PRB_x] is set, the   *
+ * SPUR_WR bit will persist if IPRBx hasn't yet been written.           *
+ * .    								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprb8_u {
+	u64 ii_iprb8_regval;
+	struct {
+		u64 i_c:8;
+		u64 i_na:14;
+		u64 i_rsvd_2:2;
+		u64 i_nb:14;
+		u64 i_rsvd_1:2;
+		u64 i_m:2;
+		u64 i_f:1;
+		u64 i_of_cnt:5;
+		u64 i_error:1;
+		u64 i_rd_to:1;
+		u64 i_spur_wr:1;
+		u64 i_spur_rd:1;
+		u64 i_rsvd:11;
+		u64 i_mult_err:1;
+	} ii_iprb8_fld_s;
+} ii_iprb8_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 9 instances of this register, one per        *
+ * actual widget in this implementation of SHub and Crossbow.           *
+ * Note: Crossbow only has ports for Widgets 8 through F, widget 0      *
+ * refers to Crossbow's internal space.                                 *
+ * This register contains the state elements per widget that are        *
+ * necessary to manage the PIO flow control on Crosstalk and on the     *
+ * Router Network. See the PIO Flow Control chapter for a complete      *
+ * description of this register                                         *
+ * The SPUR_WR bit requires some explanation. When this register is     *
+ * written, the new value of the C field is captured in an internal     *
+ * register so the hardware can remember what the programmer wrote      *
+ * into the credit counter. The SPUR_WR bit sets whenever the C field   *
+ * increments above this stored value, which indicates that there       *
+ * have been more responses received than requests sent. The SPUR_WR    *
+ * bit cannot be cleared until a value is written to the IPRBx          *
+ * register; the write will correct the C field and capture its new     *
+ * value in the internal register. Even if IECLR[E_PRB_x] is set, the   *
+ * SPUR_WR bit will persist if IPRBx hasn't yet been written.           *
+ * .    								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprb9_u {
+	u64 ii_iprb9_regval;
+	struct {
+		u64 i_c:8;
+		u64 i_na:14;
+		u64 i_rsvd_2:2;
+		u64 i_nb:14;
+		u64 i_rsvd_1:2;
+		u64 i_m:2;
+		u64 i_f:1;
+		u64 i_of_cnt:5;
+		u64 i_error:1;
+		u64 i_rd_to:1;
+		u64 i_spur_wr:1;
+		u64 i_spur_rd:1;
+		u64 i_rsvd:11;
+		u64 i_mult_err:1;
+	} ii_iprb9_fld_s;
+} ii_iprb9_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 9 instances of this register, one per        *
+ * actual widget in this implementation of SHub and Crossbow.        *
+ * Note: Crossbow only has ports for Widgets 8 through F, widget 0      *
+ * refers to Crossbow's internal space.                                 *
+ * This register contains the state elements per widget that are        *
+ * necessary to manage the PIO flow control on Crosstalk and on the     *
+ * Router Network. See the PIO Flow Control chapter for a complete      *
+ * description of this register                                         *
+ * The SPUR_WR bit requires some explanation. When this register is     *
+ * written, the new value of the C field is captured in an internal     *
+ * register so the hardware can remember what the programmer wrote      *
+ * into the credit counter. The SPUR_WR bit sets whenever the C field   *
+ * increments above this stored value, which indicates that there       *
+ * have been more responses received than requests sent. The SPUR_WR    *
+ * bit cannot be cleared until a value is written to the IPRBx          *
+ * register; the write will correct the C field and capture its new     *
+ * value in the internal register. Even if IECLR[E_PRB_x] is set, the   *
+ * SPUR_WR bit will persist if IPRBx hasn't yet been written.           *
+ *									*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprba_u {
+	u64 ii_iprba_regval;
+	struct {
+		u64 i_c:8;
+		u64 i_na:14;
+		u64 i_rsvd_2:2;
+		u64 i_nb:14;
+		u64 i_rsvd_1:2;
+		u64 i_m:2;
+		u64 i_f:1;
+		u64 i_of_cnt:5;
+		u64 i_error:1;
+		u64 i_rd_to:1;
+		u64 i_spur_wr:1;
+		u64 i_spur_rd:1;
+		u64 i_rsvd:11;
+		u64 i_mult_err:1;
+	} ii_iprba_fld_s;
+} ii_iprba_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 9 instances of this register, one per        *
+ * actual widget in this implementation of SHub and Crossbow.           *
+ * Note: Crossbow only has ports for Widgets 8 through F, widget 0      *
+ * refers to Crossbow's internal space.                                 *
+ * This register contains the state elements per widget that are        *
+ * necessary to manage the PIO flow control on Crosstalk and on the     *
+ * Router Network. See the PIO Flow Control chapter for a complete      *
+ * description of this register                                         *
+ * The SPUR_WR bit requires some explanation. When this register is     *
+ * written, the new value of the C field is captured in an internal     *
+ * register so the hardware can remember what the programmer wrote      *
+ * into the credit counter. The SPUR_WR bit sets whenever the C field   *
+ * increments above this stored value, which indicates that there       *
+ * have been more responses received than requests sent. The SPUR_WR    *
+ * bit cannot be cleared until a value is written to the IPRBx          *
+ * register; the write will correct the C field and capture its new     *
+ * value in the internal register. Even if IECLR[E_PRB_x] is set, the   *
+ * SPUR_WR bit will persist if IPRBx hasn't yet been written.           *
+ * .    								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprbb_u {
+	u64 ii_iprbb_regval;
+	struct {
+		u64 i_c:8;
+		u64 i_na:14;
+		u64 i_rsvd_2:2;
+		u64 i_nb:14;
+		u64 i_rsvd_1:2;
+		u64 i_m:2;
+		u64 i_f:1;
+		u64 i_of_cnt:5;
+		u64 i_error:1;
+		u64 i_rd_to:1;
+		u64 i_spur_wr:1;
+		u64 i_spur_rd:1;
+		u64 i_rsvd:11;
+		u64 i_mult_err:1;
+	} ii_iprbb_fld_s;
+} ii_iprbb_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 9 instances of this register, one per        *
+ * actual widget in this implementation of SHub and Crossbow.           *
+ * Note: Crossbow only has ports for Widgets 8 through F, widget 0      *
+ * refers to Crossbow's internal space.                                 *
+ * This register contains the state elements per widget that are        *
+ * necessary to manage the PIO flow control on Crosstalk and on the     *
+ * Router Network. See the PIO Flow Control chapter for a complete      *
+ * description of this register                                         *
+ * The SPUR_WR bit requires some explanation. When this register is     *
+ * written, the new value of the C field is captured in an internal     *
+ * register so the hardware can remember what the programmer wrote      *
+ * into the credit counter. The SPUR_WR bit sets whenever the C field   *
+ * increments above this stored value, which indicates that there       *
+ * have been more responses received than requests sent. The SPUR_WR    *
+ * bit cannot be cleared until a value is written to the IPRBx          *
+ * register; the write will correct the C field and capture its new     *
+ * value in the internal register. Even if IECLR[E_PRB_x] is set, the   *
+ * SPUR_WR bit will persist if IPRBx hasn't yet been written.           *
+ * .    								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprbc_u {
+	u64 ii_iprbc_regval;
+	struct {
+		u64 i_c:8;
+		u64 i_na:14;
+		u64 i_rsvd_2:2;
+		u64 i_nb:14;
+		u64 i_rsvd_1:2;
+		u64 i_m:2;
+		u64 i_f:1;
+		u64 i_of_cnt:5;
+		u64 i_error:1;
+		u64 i_rd_to:1;
+		u64 i_spur_wr:1;
+		u64 i_spur_rd:1;
+		u64 i_rsvd:11;
+		u64 i_mult_err:1;
+	} ii_iprbc_fld_s;
+} ii_iprbc_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 9 instances of this register, one per        *
+ * actual widget in this implementation of SHub and Crossbow.           *
+ * Note: Crossbow only has ports for Widgets 8 through F, widget 0      *
+ * refers to Crossbow's internal space.                                 *
+ * This register contains the state elements per widget that are        *
+ * necessary to manage the PIO flow control on Crosstalk and on the     *
+ * Router Network. See the PIO Flow Control chapter for a complete      *
+ * description of this register                                         *
+ * The SPUR_WR bit requires some explanation. When this register is     *
+ * written, the new value of the C field is captured in an internal     *
+ * register so the hardware can remember what the programmer wrote      *
+ * into the credit counter. The SPUR_WR bit sets whenever the C field   *
+ * increments above this stored value, which indicates that there       *
+ * have been more responses received than requests sent. The SPUR_WR    *
+ * bit cannot be cleared until a value is written to the IPRBx          *
+ * register; the write will correct the C field and capture its new     *
+ * value in the internal register. Even if IECLR[E_PRB_x] is set, the   *
+ * SPUR_WR bit will persist if IPRBx hasn't yet been written.           *
+ * .    								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprbd_u {
+	u64 ii_iprbd_regval;
+	struct {
+		u64 i_c:8;
+		u64 i_na:14;
+		u64 i_rsvd_2:2;
+		u64 i_nb:14;
+		u64 i_rsvd_1:2;
+		u64 i_m:2;
+		u64 i_f:1;
+		u64 i_of_cnt:5;
+		u64 i_error:1;
+		u64 i_rd_to:1;
+		u64 i_spur_wr:1;
+		u64 i_spur_rd:1;
+		u64 i_rsvd:11;
+		u64 i_mult_err:1;
+	} ii_iprbd_fld_s;
+} ii_iprbd_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 9 instances of this register, one per        *
+ * actual widget in this implementation of SHub and Crossbow.           *
+ * Note: Crossbow only has ports for Widgets 8 through F, widget 0      *
+ * refers to Crossbow's internal space.                                 *
+ * This register contains the state elements per widget that are        *
+ * necessary to manage the PIO flow control on Crosstalk and on the     *
+ * Router Network. See the PIO Flow Control chapter for a complete      *
+ * description of this register                                         *
+ * The SPUR_WR bit requires some explanation. When this register is     *
+ * written, the new value of the C field is captured in an internal     *
+ * register so the hardware can remember what the programmer wrote      *
+ * into the credit counter. The SPUR_WR bit sets whenever the C field   *
+ * increments above this stored value, which indicates that there       *
+ * have been more responses received than requests sent. The SPUR_WR    *
+ * bit cannot be cleared until a value is written to the IPRBx          *
+ * register; the write will correct the C field and capture its new     *
+ * value in the internal register. Even if IECLR[E_PRB_x] is set, the   *
+ * SPUR_WR bit will persist if IPRBx hasn't yet been written.           *
+ * .    								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprbe_u {
+	u64 ii_iprbe_regval;
+	struct {
+		u64 i_c:8;
+		u64 i_na:14;
+		u64 i_rsvd_2:2;
+		u64 i_nb:14;
+		u64 i_rsvd_1:2;
+		u64 i_m:2;
+		u64 i_f:1;
+		u64 i_of_cnt:5;
+		u64 i_error:1;
+		u64 i_rd_to:1;
+		u64 i_spur_wr:1;
+		u64 i_spur_rd:1;
+		u64 i_rsvd:11;
+		u64 i_mult_err:1;
+	} ii_iprbe_fld_s;
+} ii_iprbe_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 9 instances of this register, one per        *
+ * actual widget in this implementation of Shub and Crossbow.           *
+ * Note: Crossbow only has ports for Widgets 8 through F, widget 0      *
+ * refers to Crossbow's internal space.                                 *
+ * This register contains the state elements per widget that are        *
+ * necessary to manage the PIO flow control on Crosstalk and on the     *
+ * Router Network. See the PIO Flow Control chapter for a complete      *
+ * description of this register                                         *
+ * The SPUR_WR bit requires some explanation. When this register is     *
+ * written, the new value of the C field is captured in an internal     *
+ * register so the hardware can remember what the programmer wrote      *
+ * into the credit counter. The SPUR_WR bit sets whenever the C field   *
+ * increments above this stored value, which indicates that there       *
+ * have been more responses received than requests sent. The SPUR_WR    *
+ * bit cannot be cleared until a value is written to the IPRBx          *
+ * register; the write will correct the C field and capture its new     *
+ * value in the internal register. Even if IECLR[E_PRB_x] is set, the   *
+ * SPUR_WR bit will persist if IPRBx hasn't yet been written.           *
+ * .    								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprbf_u {
+	u64 ii_iprbf_regval;
+	struct {
+		u64 i_c:8;
+		u64 i_na:14;
+		u64 i_rsvd_2:2;
+		u64 i_nb:14;
+		u64 i_rsvd_1:2;
+		u64 i_m:2;
+		u64 i_f:1;
+		u64 i_of_cnt:5;
+		u64 i_error:1;
+		u64 i_rd_to:1;
+		u64 i_spur_wr:1;
+		u64 i_spur_rd:1;
+		u64 i_rsvd:11;
+		u64 i_mult_err:1;
+	} ii_iprbe_fld_s;
+} ii_iprbf_u_t;
+
+/************************************************************************
+ *									*
+ *  This register specifies the timeout value to use for monitoring     *
+ * Crosstalk credits which are used outbound to Crosstalk. An           *
+ * internal counter called the Crosstalk Credit Timeout Counter         *
+ * increments every 128 II clocks. The counter starts counting          *
+ * anytime the credit count drops below a threshold, and resets to      *
+ * zero (stops counting) anytime the credit count is at or above the    *
+ * threshold. The threshold is 1 credit in direct connect mode and 2    *
+ * in Crossbow connect mode. When the internal Crosstalk Credit         *
+ * Timeout Counter reaches the value programmed in this register, a     *
+ * Crosstalk Credit Timeout has occurred. The internal counter is not   *
+ * readable from software, and stops counting at its maximum value,     *
+ * so it cannot cause more than one interrupt.                          *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ixcc_u {
+	u64 ii_ixcc_regval;
+	struct {
+		u64 i_time_out:26;
+		u64 i_rsvd:38;
+	} ii_ixcc_fld_s;
+} ii_ixcc_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  This register qualifies all the PIO and DMA            *
+ * operations launched from widget 0 towards the SHub. In               *
+ * addition, it also qualifies accesses by the BTE streams.             *
+ * The bits in each field of this register are cleared by the SHub      *
+ * upon detection of an error which requires widget 0 or the BTE        *
+ * streams to be terminated. Whether or not widget x has access         *
+ * rights to this SHub is determined by an AND of the device            *
+ * enable bit in the appropriate field of this register and bit 0 in    *
+ * the Wx_IAC field. The bits in this field are set by writing a 1 to   *
+ * them. Incoming replies from Crosstalk are not subject to this        *
+ * access control mechanism.                                            *
+ *									*
+ ************************************************************************/
+
+typedef union ii_imem_u {
+	u64 ii_imem_regval;
+	struct {
+		u64 i_w0_esd:1;
+		u64 i_rsvd_3:3;
+		u64 i_b0_esd:1;
+		u64 i_rsvd_2:3;
+		u64 i_b1_esd:1;
+		u64 i_rsvd_1:3;
+		u64 i_clr_precise:1;
+		u64 i_rsvd:51;
+	} ii_imem_fld_s;
+} ii_imem_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  This register specifies the timeout value to use for   *
+ * monitoring Crosstalk tail flits coming into the Shub in the          *
+ * TAIL_TO field. An internal counter associated with this register     *
+ * is incremented every 128 II internal clocks (7 bits). The counter    *
+ * starts counting anytime a header micropacket is received and stops   *
+ * counting (and resets to zero) any time a micropacket with a Tail     *
+ * bit is received. Once the counter reaches the threshold value        *
+ * programmed in this register, it generates an interrupt to the        *
+ * processor that is programmed into the IIDSR. The counter saturates   *
+ * (does not roll over) at its maximum value, so it cannot cause        *
+ * another interrupt until after it is cleared.                         *
+ * The register also contains the Read Response Timeout values. The     *
+ * Prescalar is 23 bits, and counts II clocks. An internal counter      *
+ * increments on every II clock and when it reaches the value in the    *
+ * Prescalar field, all IPRTE registers with their valid bits set       *
+ * have their Read Response timers bumped. Whenever any of them match   *
+ * the value in the RRSP_TO field, a Read Response Timeout has          *
+ * occurred, and error handling occurs as described in the Error        *
+ * Handling section of this document.                                   *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ixtt_u {
+	u64 ii_ixtt_regval;
+	struct {
+		u64 i_tail_to:26;
+		u64 i_rsvd_1:6;
+		u64 i_rrsp_ps:23;
+		u64 i_rrsp_to:5;
+		u64 i_rsvd:4;
+	} ii_ixtt_fld_s;
+} ii_ixtt_u_t;
+
+/************************************************************************
+ *									*
+ *  Writing a 1 to the fields of this register clears the appropriate   *
+ * error bits in other areas of SHub. Note that when the                *
+ * E_PRB_x bits are used to clear error bits in PRB registers,          *
+ * SPUR_RD and SPUR_WR may persist, because they require additional     *
+ * action to clear them. See the IPRBx and IXSS Register                *
+ * specifications.                                                      *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ieclr_u {
+	u64 ii_ieclr_regval;
+	struct {
+		u64 i_e_prb_0:1;
+		u64 i_rsvd:7;
+		u64 i_e_prb_8:1;
+		u64 i_e_prb_9:1;
+		u64 i_e_prb_a:1;
+		u64 i_e_prb_b:1;
+		u64 i_e_prb_c:1;
+		u64 i_e_prb_d:1;
+		u64 i_e_prb_e:1;
+		u64 i_e_prb_f:1;
+		u64 i_e_crazy:1;
+		u64 i_e_bte_0:1;
+		u64 i_e_bte_1:1;
+		u64 i_reserved_1:10;
+		u64 i_spur_rd_hdr:1;
+		u64 i_cam_intr_to:1;
+		u64 i_cam_overflow:1;
+		u64 i_cam_read_miss:1;
+		u64 i_ioq_rep_underflow:1;
+		u64 i_ioq_req_underflow:1;
+		u64 i_ioq_rep_overflow:1;
+		u64 i_ioq_req_overflow:1;
+		u64 i_iiq_rep_overflow:1;
+		u64 i_iiq_req_overflow:1;
+		u64 i_ii_xn_rep_cred_overflow:1;
+		u64 i_ii_xn_req_cred_overflow:1;
+		u64 i_ii_xn_invalid_cmd:1;
+		u64 i_xn_ii_invalid_cmd:1;
+		u64 i_reserved_2:21;
+	} ii_ieclr_fld_s;
+} ii_ieclr_u_t;
+
+/************************************************************************
+ *									*
+ *  This register controls both BTEs. SOFT_RESET is intended for        *
+ * recovery after an error. COUNT controls the total number of CRBs     *
+ * that both BTEs (combined) can use, which affects total BTE           *
+ * bandwidth.                                                           *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibcr_u {
+	u64 ii_ibcr_regval;
+	struct {
+		u64 i_count:4;
+		u64 i_rsvd_1:4;
+		u64 i_soft_reset:1;
+		u64 i_rsvd:55;
+	} ii_ibcr_fld_s;
+} ii_ibcr_u_t;
+
+/************************************************************************
+ *									*
+ *  This register contains the header of a spurious read response       *
+ * received from Crosstalk. A spurious read response is defined as a    *
+ * read response received by II from a widget for which (1) the SIDN    *
+ * has a value between 1 and 7, inclusive (II never sends requests to   *
+ * these widgets (2) there is no valid IPRTE register which             *
+ * corresponds to the TNUM, or (3) the widget indicated in SIDN is      *
+ * not the same as the widget recorded in the IPRTE register            *
+ * referenced by the TNUM. If this condition is true, and if the        *
+ * IXSS[VALID] bit is clear, then the header of the spurious read       *
+ * response is capture in IXSM and IXSS, and IXSS[VALID] is set. The    *
+ * errant header is thereby captured, and no further spurious read      *
+ * respones are captured until IXSS[VALID] is cleared by setting the    *
+ * appropriate bit in IECLR. Every time a spurious read response is     *
+ * detected, the SPUR_RD bit of the PRB corresponding to the incoming   *
+ * message's SIDN field is set. This always happens, regarless of       *
+ * whether a header is captured. The programmer should check            *
+ * IXSM[SIDN] to determine which widget sent the spurious response,     *
+ * because there may be more than one SPUR_RD bit set in the PRB        *
+ * registers. The widget indicated by IXSM[SIDN] was the first          *
+ * spurious read response to be received since the last time            *
+ * IXSS[VALID] was clear. The SPUR_RD bit of the corresponding PRB      *
+ * will be set. Any SPUR_RD bits in any other PRB registers indicate    *
+ * spurious messages from other widets which were detected after the    *
+ * header was captured..                                                *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ixsm_u {
+	u64 ii_ixsm_regval;
+	struct {
+		u64 i_byte_en:32;
+		u64 i_reserved:1;
+		u64 i_tag:3;
+		u64 i_alt_pactyp:4;
+		u64 i_bo:1;
+		u64 i_error:1;
+		u64 i_vbpm:1;
+		u64 i_gbr:1;
+		u64 i_ds:2;
+		u64 i_ct:1;
+		u64 i_tnum:5;
+		u64 i_pactyp:4;
+		u64 i_sidn:4;
+		u64 i_didn:4;
+	} ii_ixsm_fld_s;
+} ii_ixsm_u_t;
+
+/************************************************************************
+ *									*
+ *  This register contains the sideband bits of a spurious read         *
+ * response received from Crosstalk.                                    *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ixss_u {
+	u64 ii_ixss_regval;
+	struct {
+		u64 i_sideband:8;
+		u64 i_rsvd:55;
+		u64 i_valid:1;
+	} ii_ixss_fld_s;
+} ii_ixss_u_t;
+
+/************************************************************************
+ *									*
+ *  This register enables software to access the II LLP's test port.    *
+ * Refer to the LLP 2.5 documentation for an explanation of the test    *
+ * port. Software can write to this register to program the values      *
+ * for the control fields (TestErrCapture, TestClear, TestFlit,         *
+ * TestMask and TestSeed). Similarly, software can read from this       *
+ * register to obtain the values of the test port's status outputs      *
+ * (TestCBerr, TestValid and TestData).                                 *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ilct_u {
+	u64 ii_ilct_regval;
+	struct {
+		u64 i_test_seed:20;
+		u64 i_test_mask:8;
+		u64 i_test_data:20;
+		u64 i_test_valid:1;
+		u64 i_test_cberr:1;
+		u64 i_test_flit:3;
+		u64 i_test_clear:1;
+		u64 i_test_err_capture:1;
+		u64 i_rsvd:9;
+	} ii_ilct_fld_s;
+} ii_ilct_u_t;
+
+/************************************************************************
+ *									*
+ *  If the II detects an illegal incoming Duplonet packet (request or   *
+ * reply) when VALID==0 in the IIEPH1 register, then it saves the       *
+ * contents of the packet's header flit in the IIEPH1 and IIEPH2        *
+ * registers, sets the VALID bit in IIEPH1, clears the OVERRUN bit,     *
+ * and assigns a value to the ERR_TYPE field which indicates the        *
+ * specific nature of the error. The II recognizes four different       *
+ * types of errors: short request packets (ERR_TYPE==2), short reply    *
+ * packets (ERR_TYPE==3), long request packets (ERR_TYPE==4) and long   *
+ * reply packets (ERR_TYPE==5). The encodings for these types of        *
+ * errors were chosen to be consistent with the same types of errors    *
+ * indicated by the ERR_TYPE field in the LB_ERROR_HDR1 register (in    *
+ * the LB unit). If the II detects an illegal incoming Duplonet         *
+ * packet when VALID==1 in the IIEPH1 register, then it merely sets     *
+ * the OVERRUN bit to indicate that a subsequent error has happened,    *
+ * and does nothing further.                                            *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iieph1_u {
+	u64 ii_iieph1_regval;
+	struct {
+		u64 i_command:7;
+		u64 i_rsvd_5:1;
+		u64 i_suppl:14;
+		u64 i_rsvd_4:1;
+		u64 i_source:14;
+		u64 i_rsvd_3:1;
+		u64 i_err_type:4;
+		u64 i_rsvd_2:4;
+		u64 i_overrun:1;
+		u64 i_rsvd_1:3;
+		u64 i_valid:1;
+		u64 i_rsvd:13;
+	} ii_iieph1_fld_s;
+} ii_iieph1_u_t;
+
+/************************************************************************
+ *									*
+ *  This register holds the Address field from the header flit of an    *
+ * incoming erroneous Duplonet packet, along with the tail bit which    *
+ * accompanied this header flit. This register is essentially an        *
+ * extension of IIEPH1. Two registers were necessary because the 64     *
+ * bits available in only a single register were insufficient to        *
+ * capture the entire header flit of an erroneous packet.               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iieph2_u {
+	u64 ii_iieph2_regval;
+	struct {
+		u64 i_rsvd_0:3;
+		u64 i_address:47;
+		u64 i_rsvd_1:10;
+		u64 i_tail:1;
+		u64 i_rsvd:3;
+	} ii_iieph2_fld_s;
+} ii_iieph2_u_t;
+
+/******************************/
+
+/************************************************************************
+ *									*
+ *  This register's value is a bit vector that guards access from SXBs  *
+ * to local registers within the II as well as to external Crosstalk    *
+ * widgets								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_islapr_u {
+	u64 ii_islapr_regval;
+	struct {
+		u64 i_region:64;
+	} ii_islapr_fld_s;
+} ii_islapr_u_t;
+
+/************************************************************************
+ *									*
+ *  A write to this register of the 56-bit value "Pup+Bun" will cause	*
+ * the bit in the ISLAPR register corresponding to the region of the	*
+ * requestor to be set (access allowed).				(
+ *									*
+ ************************************************************************/
+
+typedef union ii_islapo_u {
+	u64 ii_islapo_regval;
+	struct {
+		u64 i_io_sbx_ovrride:56;
+		u64 i_rsvd:8;
+	} ii_islapo_fld_s;
+} ii_islapo_u_t;
+
+/************************************************************************
+ *									*
+ *  Determines how long the wrapper will wait aftr an interrupt is	*
+ * initially issued from the II before it times out the outstanding	*
+ * interrupt and drops it from the interrupt queue.			* 
+ *									*
+ ************************************************************************/
+
+typedef union ii_iwi_u {
+	u64 ii_iwi_regval;
+	struct {
+		u64 i_prescale:24;
+		u64 i_rsvd:8;
+		u64 i_timeout:8;
+		u64 i_rsvd1:8;
+		u64 i_intrpt_retry_period:8;
+		u64 i_rsvd2:8;
+	} ii_iwi_fld_s;
+} ii_iwi_u_t;
+
+/************************************************************************
+ *									*
+ *  Log errors which have occurred in the II wrapper. The errors are	*
+ * cleared by writing to the IECLR register.				* 
+ *									*
+ ************************************************************************/
+
+typedef union ii_iwel_u {
+	u64 ii_iwel_regval;
+	struct {
+		u64 i_intr_timed_out:1;
+		u64 i_rsvd:7;
+		u64 i_cam_overflow:1;
+		u64 i_cam_read_miss:1;
+		u64 i_rsvd1:2;
+		u64 i_ioq_rep_underflow:1;
+		u64 i_ioq_req_underflow:1;
+		u64 i_ioq_rep_overflow:1;
+		u64 i_ioq_req_overflow:1;
+		u64 i_iiq_rep_overflow:1;
+		u64 i_iiq_req_overflow:1;
+		u64 i_rsvd2:6;
+		u64 i_ii_xn_rep_cred_over_under:1;
+		u64 i_ii_xn_req_cred_over_under:1;
+		u64 i_rsvd3:6;
+		u64 i_ii_xn_invalid_cmd:1;
+		u64 i_xn_ii_invalid_cmd:1;
+		u64 i_rsvd4:30;
+	} ii_iwel_fld_s;
+} ii_iwel_u_t;
+
+/************************************************************************
+ *									*
+ *  Controls the II wrapper.						* 
+ *									*
+ ************************************************************************/
+
+typedef union ii_iwc_u {
+	u64 ii_iwc_regval;
+	struct {
+		u64 i_dma_byte_swap:1;
+		u64 i_rsvd:3;
+		u64 i_cam_read_lines_reset:1;
+		u64 i_rsvd1:3;
+		u64 i_ii_xn_cred_over_under_log:1;
+		u64 i_rsvd2:19;
+		u64 i_xn_rep_iq_depth:5;
+		u64 i_rsvd3:3;
+		u64 i_xn_req_iq_depth:5;
+		u64 i_rsvd4:3;
+		u64 i_iiq_depth:6;
+		u64 i_rsvd5:12;
+		u64 i_force_rep_cred:1;
+		u64 i_force_req_cred:1;
+	} ii_iwc_fld_s;
+} ii_iwc_u_t;
+
+/************************************************************************
+ *									*
+ *  Status in the II wrapper.						* 
+ *									*
+ ************************************************************************/
+
+typedef union ii_iws_u {
+	u64 ii_iws_regval;
+	struct {
+		u64 i_xn_rep_iq_credits:5;
+		u64 i_rsvd:3;
+		u64 i_xn_req_iq_credits:5;
+		u64 i_rsvd1:51;
+	} ii_iws_fld_s;
+} ii_iws_u_t;
+
+/************************************************************************
+ *									*
+ *  Masks errors in the IWEL register.					*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iweim_u {
+	u64 ii_iweim_regval;
+	struct {
+		u64 i_intr_timed_out:1;
+		u64 i_rsvd:7;
+		u64 i_cam_overflow:1;
+		u64 i_cam_read_miss:1;
+		u64 i_rsvd1:2;
+		u64 i_ioq_rep_underflow:1;
+		u64 i_ioq_req_underflow:1;
+		u64 i_ioq_rep_overflow:1;
+		u64 i_ioq_req_overflow:1;
+		u64 i_iiq_rep_overflow:1;
+		u64 i_iiq_req_overflow:1;
+		u64 i_rsvd2:6;
+		u64 i_ii_xn_rep_cred_overflow:1;
+		u64 i_ii_xn_req_cred_overflow:1;
+		u64 i_rsvd3:6;
+		u64 i_ii_xn_invalid_cmd:1;
+		u64 i_xn_ii_invalid_cmd:1;
+		u64 i_rsvd4:30;
+	} ii_iweim_fld_s;
+} ii_iweim_u_t;
+
+/************************************************************************
+ *									*
+ *  A write to this register causes a particular field in the           *
+ * corresponding widget's PRB entry to be adjusted up or down by 1.     *
+ * This counter should be used when recovering from error and reset     *
+ * conditions. Note that software would be capable of causing           *
+ * inadvertent overflow or underflow of these counters.                 *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ipca_u {
+	u64 ii_ipca_regval;
+	struct {
+		u64 i_wid:4;
+		u64 i_adjust:1;
+		u64 i_rsvd_1:3;
+		u64 i_field:2;
+		u64 i_rsvd:54;
+	} ii_ipca_fld_s;
+} ii_ipca_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte0a_u {
+	u64 ii_iprte0a_regval;
+	struct {
+		u64 i_rsvd_1:54;
+		u64 i_widget:4;
+		u64 i_to_cnt:5;
+		u64 i_vld:1;
+	} ii_iprte0a_fld_s;
+} ii_iprte0a_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte1a_u {
+	u64 ii_iprte1a_regval;
+	struct {
+		u64 i_rsvd_1:54;
+		u64 i_widget:4;
+		u64 i_to_cnt:5;
+		u64 i_vld:1;
+	} ii_iprte1a_fld_s;
+} ii_iprte1a_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte2a_u {
+	u64 ii_iprte2a_regval;
+	struct {
+		u64 i_rsvd_1:54;
+		u64 i_widget:4;
+		u64 i_to_cnt:5;
+		u64 i_vld:1;
+	} ii_iprte2a_fld_s;
+} ii_iprte2a_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte3a_u {
+	u64 ii_iprte3a_regval;
+	struct {
+		u64 i_rsvd_1:54;
+		u64 i_widget:4;
+		u64 i_to_cnt:5;
+		u64 i_vld:1;
+	} ii_iprte3a_fld_s;
+} ii_iprte3a_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte4a_u {
+	u64 ii_iprte4a_regval;
+	struct {
+		u64 i_rsvd_1:54;
+		u64 i_widget:4;
+		u64 i_to_cnt:5;
+		u64 i_vld:1;
+	} ii_iprte4a_fld_s;
+} ii_iprte4a_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte5a_u {
+	u64 ii_iprte5a_regval;
+	struct {
+		u64 i_rsvd_1:54;
+		u64 i_widget:4;
+		u64 i_to_cnt:5;
+		u64 i_vld:1;
+	} ii_iprte5a_fld_s;
+} ii_iprte5a_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte6a_u {
+	u64 ii_iprte6a_regval;
+	struct {
+		u64 i_rsvd_1:54;
+		u64 i_widget:4;
+		u64 i_to_cnt:5;
+		u64 i_vld:1;
+	} ii_iprte6a_fld_s;
+} ii_iprte6a_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte7a_u {
+	u64 ii_iprte7a_regval;
+	struct {
+		u64 i_rsvd_1:54;
+		u64 i_widget:4;
+		u64 i_to_cnt:5;
+		u64 i_vld:1;
+	} ii_iprtea7_fld_s;
+} ii_iprte7a_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte0b_u {
+	u64 ii_iprte0b_regval;
+	struct {
+		u64 i_rsvd_1:3;
+		u64 i_address:47;
+		u64 i_init:3;
+		u64 i_source:11;
+	} ii_iprte0b_fld_s;
+} ii_iprte0b_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte1b_u {
+	u64 ii_iprte1b_regval;
+	struct {
+		u64 i_rsvd_1:3;
+		u64 i_address:47;
+		u64 i_init:3;
+		u64 i_source:11;
+	} ii_iprte1b_fld_s;
+} ii_iprte1b_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte2b_u {
+	u64 ii_iprte2b_regval;
+	struct {
+		u64 i_rsvd_1:3;
+		u64 i_address:47;
+		u64 i_init:3;
+		u64 i_source:11;
+	} ii_iprte2b_fld_s;
+} ii_iprte2b_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte3b_u {
+	u64 ii_iprte3b_regval;
+	struct {
+		u64 i_rsvd_1:3;
+		u64 i_address:47;
+		u64 i_init:3;
+		u64 i_source:11;
+	} ii_iprte3b_fld_s;
+} ii_iprte3b_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte4b_u {
+	u64 ii_iprte4b_regval;
+	struct {
+		u64 i_rsvd_1:3;
+		u64 i_address:47;
+		u64 i_init:3;
+		u64 i_source:11;
+	} ii_iprte4b_fld_s;
+} ii_iprte4b_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte5b_u {
+	u64 ii_iprte5b_regval;
+	struct {
+		u64 i_rsvd_1:3;
+		u64 i_address:47;
+		u64 i_init:3;
+		u64 i_source:11;
+	} ii_iprte5b_fld_s;
+} ii_iprte5b_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte6b_u {
+	u64 ii_iprte6b_regval;
+	struct {
+		u64 i_rsvd_1:3;
+		u64 i_address:47;
+		u64 i_init:3;
+		u64 i_source:11;
+
+	} ii_iprte6b_fld_s;
+} ii_iprte6b_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte7b_u {
+	u64 ii_iprte7b_regval;
+	struct {
+		u64 i_rsvd_1:3;
+		u64 i_address:47;
+		u64 i_init:3;
+		u64 i_source:11;
+	} ii_iprte7b_fld_s;
+} ii_iprte7b_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  SHub II contains a feature which did not exist in      *
+ * the Hub which automatically cleans up after a Read Response          *
+ * timeout, including deallocation of the IPRTE and recovery of IBuf    *
+ * space. The inclusion of this register in SHub is for backward        *
+ * compatibility                                                        *
+ * A write to this register causes an entry from the table of           *
+ * outstanding PIO Read Requests to be freed and returned to the        *
+ * stack of free entries. This register is used in handling the         *
+ * timeout errors that result in a PIO Reply never returning from       *
+ * Crosstalk.                                                           *
+ * Note that this register does not affect the contents of the IPRTE    *
+ * registers. The Valid bits in those registers have to be              *
+ * specifically turned off by software.                                 *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ipdr_u {
+	u64 ii_ipdr_regval;
+	struct {
+		u64 i_te:3;
+		u64 i_rsvd_1:1;
+		u64 i_pnd:1;
+		u64 i_init_rpcnt:1;
+		u64 i_rsvd:58;
+	} ii_ipdr_fld_s;
+} ii_ipdr_u_t;
+
+/************************************************************************
+ *									*
+ *  A write to this register causes a CRB entry to be returned to the   *
+ * queue of free CRBs. The entry should have previously been cleared    *
+ * (mark bit) via backdoor access to the pertinent CRB entry. This      *
+ * register is used in the last step of handling the errors that are    *
+ * captured and marked in CRB entries.  Briefly: 1) first error for     *
+ * DMA write from a particular device, and first error for a            *
+ * particular BTE stream, lead to a marked CRB entry, and processor     *
+ * interrupt, 2) software reads the error information captured in the   *
+ * CRB entry, and presumably takes some corrective action, 3)           *
+ * software clears the mark bit, and finally 4) software writes to      *
+ * the ICDR register to return the CRB entry to the list of free CRB    *
+ * entries.                                                             *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icdr_u {
+	u64 ii_icdr_regval;
+	struct {
+		u64 i_crb_num:4;
+		u64 i_pnd:1;
+		u64 i_rsvd:59;
+	} ii_icdr_fld_s;
+} ii_icdr_u_t;
+
+/************************************************************************
+ *									*
+ *  This register provides debug access to two FIFOs inside of II.      *
+ * Both IOQ_MAX* fields of this register contain the instantaneous      *
+ * depth (in units of the number of available entries) of the           *
+ * associated IOQ FIFO.  A read of this register will return the        *
+ * number of free entries on each FIFO at the time of the read.  So     *
+ * when a FIFO is idle, the associated field contains the maximum       *
+ * depth of the FIFO.  This register is writable for debug reasons      *
+ * and is intended to be written with the maximum desired FIFO depth    *
+ * while the FIFO is idle. Software must assure that II is idle when    *
+ * this register is written. If there are any active entries in any     *
+ * of these FIFOs when this register is written, the results are        *
+ * undefined.                                                           *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ifdr_u {
+	u64 ii_ifdr_regval;
+	struct {
+		u64 i_ioq_max_rq:7;
+		u64 i_set_ioq_rq:1;
+		u64 i_ioq_max_rp:7;
+		u64 i_set_ioq_rp:1;
+		u64 i_rsvd:48;
+	} ii_ifdr_fld_s;
+} ii_ifdr_u_t;
+
+/************************************************************************
+ *									*
+ *  This register allows the II to become sluggish in removing          *
+ * messages from its inbound queue (IIQ). This will cause messages to   *
+ * back up in either virtual channel. Disabling the "molasses" mode     *
+ * subsequently allows the II to be tested under stress. In the         *
+ * sluggish ("Molasses") mode, the localized effects of congestion      *
+ * can be observed.                                                     *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iiap_u {
+	u64 ii_iiap_regval;
+	struct {
+		u64 i_rq_mls:6;
+		u64 i_rsvd_1:2;
+		u64 i_rp_mls:6;
+		u64 i_rsvd:50;
+	} ii_iiap_fld_s;
+} ii_iiap_u_t;
+
+/************************************************************************
+ *									*
+ *  This register allows several parameters of CRB operation to be      *
+ * set. Note that writing to this register can have catastrophic side   *
+ * effects, if the CRB is not quiescent, i.e. if the CRB is             *
+ * processing protocol messages when the write occurs.                  *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icmr_u {
+	u64 ii_icmr_regval;
+	struct {
+		u64 i_sp_msg:1;
+		u64 i_rd_hdr:1;
+		u64 i_rsvd_4:2;
+		u64 i_c_cnt:4;
+		u64 i_rsvd_3:4;
+		u64 i_clr_rqpd:1;
+		u64 i_clr_rppd:1;
+		u64 i_rsvd_2:2;
+		u64 i_fc_cnt:4;
+		u64 i_crb_vld:15;
+		u64 i_crb_mark:15;
+		u64 i_rsvd_1:2;
+		u64 i_precise:1;
+		u64 i_rsvd:11;
+	} ii_icmr_fld_s;
+} ii_icmr_u_t;
+
+/************************************************************************
+ *									*
+ *  This register allows control of the table portion of the CRB        *
+ * logic via software. Control operations from this register have       *
+ * priority over all incoming Crosstalk or BTE requests.                *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iccr_u {
+	u64 ii_iccr_regval;
+	struct {
+		u64 i_crb_num:4;
+		u64 i_rsvd_1:4;
+		u64 i_cmd:8;
+		u64 i_pending:1;
+		u64 i_rsvd:47;
+	} ii_iccr_fld_s;
+} ii_iccr_u_t;
+
+/************************************************************************
+ *									*
+ *  This register allows the maximum timeout value to be programmed.    *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icto_u {
+	u64 ii_icto_regval;
+	struct {
+		u64 i_timeout:8;
+		u64 i_rsvd:56;
+	} ii_icto_fld_s;
+} ii_icto_u_t;
+
+/************************************************************************
+ *									*
+ *  This register allows the timeout prescalar to be programmed. An     *
+ * internal counter is associated with this register. When the          *
+ * internal counter reaches the value of the PRESCALE field, the        *
+ * timer registers in all valid CRBs are incremented (CRBx_D[TIMEOUT]   *
+ * field). The internal counter resets to zero, and then continues      *
+ * counting.                                                            *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ictp_u {
+	u64 ii_ictp_regval;
+	struct {
+		u64 i_prescale:24;
+		u64 i_rsvd:40;
+	} ii_ictp_fld_s;
+} ii_ictp_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 15 CRB Entries (ICRB0 to ICRBE) that are     *
+ * used for Crosstalk operations (both cacheline and partial            *
+ * operations) or BTE/IO. Because the CRB entries are very wide, five   *
+ * registers (_A to _E) are required to read and write each entry.      *
+ * The CRB Entry registers can be conceptualized as rows and columns    *
+ * (illustrated in the table above). Each row contains the 4            *
+ * registers required for a single CRB Entry. The first doubleword      *
+ * (column) for each entry is labeled A, and the second doubleword      *
+ * (higher address) is labeled B, the third doubleword is labeled C,    *
+ * the fourth doubleword is labeled D and the fifth doubleword is       *
+ * labeled E. All CRB entries have their addresses on a quarter         *
+ * cacheline aligned boundary.                   *
+ * Upon reset, only the following fields are initialized: valid         *
+ * (VLD), priority count, timeout, timeout valid, and context valid.    *
+ * All other bits should be cleared by software before use (after       *
+ * recovering any potential error state from before the reset).         *
+ * The following four tables summarize the format for the four          *
+ * registers that are used for each ICRB# Entry.                        *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icrb0_a_u {
+	u64 ii_icrb0_a_regval;
+	struct {
+		u64 ia_iow:1;
+		u64 ia_vld:1;
+		u64 ia_addr:47;
+		u64 ia_tnum:5;
+		u64 ia_sidn:4;
+		u64 ia_rsvd:6;
+	} ii_icrb0_a_fld_s;
+} ii_icrb0_a_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 15 CRB Entries (ICRB0 to ICRBE) that are     *
+ * used for Crosstalk operations (both cacheline and partial            *
+ * operations) or BTE/IO. Because the CRB entries are very wide, five   *
+ * registers (_A to _E) are required to read and write each entry.      *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icrb0_b_u {
+	u64 ii_icrb0_b_regval;
+	struct {
+		u64 ib_xt_err:1;
+		u64 ib_mark:1;
+		u64 ib_ln_uce:1;
+		u64 ib_errcode:3;
+		u64 ib_error:1;
+		u64 ib_stall__bte_1:1;
+		u64 ib_stall__bte_0:1;
+		u64 ib_stall__intr:1;
+		u64 ib_stall_ib:1;
+		u64 ib_intvn:1;
+		u64 ib_wb:1;
+		u64 ib_hold:1;
+		u64 ib_ack:1;
+		u64 ib_resp:1;
+		u64 ib_ack_cnt:11;
+		u64 ib_rsvd:7;
+		u64 ib_exc:5;
+		u64 ib_init:3;
+		u64 ib_imsg:8;
+		u64 ib_imsgtype:2;
+		u64 ib_use_old:1;
+		u64 ib_rsvd_1:11;
+	} ii_icrb0_b_fld_s;
+} ii_icrb0_b_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 15 CRB Entries (ICRB0 to ICRBE) that are     *
+ * used for Crosstalk operations (both cacheline and partial            *
+ * operations) or BTE/IO. Because the CRB entries are very wide, five   *
+ * registers (_A to _E) are required to read and write each entry.      *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icrb0_c_u {
+	u64 ii_icrb0_c_regval;
+	struct {
+		u64 ic_source:15;
+		u64 ic_size:2;
+		u64 ic_ct:1;
+		u64 ic_bte_num:1;
+		u64 ic_gbr:1;
+		u64 ic_resprqd:1;
+		u64 ic_bo:1;
+		u64 ic_suppl:15;
+		u64 ic_rsvd:27;
+	} ii_icrb0_c_fld_s;
+} ii_icrb0_c_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 15 CRB Entries (ICRB0 to ICRBE) that are     *
+ * used for Crosstalk operations (both cacheline and partial            *
+ * operations) or BTE/IO. Because the CRB entries are very wide, five   *
+ * registers (_A to _E) are required to read and write each entry.      *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icrb0_d_u {
+	u64 ii_icrb0_d_regval;
+	struct {
+		u64 id_pa_be:43;
+		u64 id_bte_op:1;
+		u64 id_pr_psc:4;
+		u64 id_pr_cnt:4;
+		u64 id_sleep:1;
+		u64 id_rsvd:11;
+	} ii_icrb0_d_fld_s;
+} ii_icrb0_d_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 15 CRB Entries (ICRB0 to ICRBE) that are     *
+ * used for Crosstalk operations (both cacheline and partial            *
+ * operations) or BTE/IO. Because the CRB entries are very wide, five   *
+ * registers (_A to _E) are required to read and write each entry.      *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icrb0_e_u {
+	u64 ii_icrb0_e_regval;
+	struct {
+		u64 ie_timeout:8;
+		u64 ie_context:15;
+		u64 ie_rsvd:1;
+		u64 ie_tvld:1;
+		u64 ie_cvld:1;
+		u64 ie_rsvd_0:38;
+	} ii_icrb0_e_fld_s;
+} ii_icrb0_e_u_t;
+
+/************************************************************************
+ *									*
+ *  This register contains the lower 64 bits of the header of the       *
+ * spurious message captured by II. Valid when the SP_MSG bit in ICMR   *
+ * register is set.                                                     *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icsml_u {
+	u64 ii_icsml_regval;
+	struct {
+		u64 i_tt_addr:47;
+		u64 i_newsuppl_ex:14;
+		u64 i_reserved:2;
+		u64 i_overflow:1;
+	} ii_icsml_fld_s;
+} ii_icsml_u_t;
+
+/************************************************************************
+ *									*
+ *  This register contains the middle 64 bits of the header of the      *
+ * spurious message captured by II. Valid when the SP_MSG bit in ICMR   *
+ * register is set.                                                     *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icsmm_u {
+	u64 ii_icsmm_regval;
+	struct {
+		u64 i_tt_ack_cnt:11;
+		u64 i_reserved:53;
+	} ii_icsmm_fld_s;
+} ii_icsmm_u_t;
+
+/************************************************************************
+ *									*
+ *  This register contains the microscopic state, all the inputs to     *
+ * the protocol table, captured with the spurious message. Valid when   *
+ * the SP_MSG bit in the ICMR register is set.                          *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icsmh_u {
+	u64 ii_icsmh_regval;
+	struct {
+		u64 i_tt_vld:1;
+		u64 i_xerr:1;
+		u64 i_ft_cwact_o:1;
+		u64 i_ft_wact_o:1;
+		u64 i_ft_active_o:1;
+		u64 i_sync:1;
+		u64 i_mnusg:1;
+		u64 i_mnusz:1;
+		u64 i_plusz:1;
+		u64 i_plusg:1;
+		u64 i_tt_exc:5;
+		u64 i_tt_wb:1;
+		u64 i_tt_hold:1;
+		u64 i_tt_ack:1;
+		u64 i_tt_resp:1;
+		u64 i_tt_intvn:1;
+		u64 i_g_stall_bte1:1;
+		u64 i_g_stall_bte0:1;
+		u64 i_g_stall_il:1;
+		u64 i_g_stall_ib:1;
+		u64 i_tt_imsg:8;
+		u64 i_tt_imsgtype:2;
+		u64 i_tt_use_old:1;
+		u64 i_tt_respreqd:1;
+		u64 i_tt_bte_num:1;
+		u64 i_cbn:1;
+		u64 i_match:1;
+		u64 i_rpcnt_lt_34:1;
+		u64 i_rpcnt_ge_34:1;
+		u64 i_rpcnt_lt_18:1;
+		u64 i_rpcnt_ge_18:1;
+		u64 i_rpcnt_lt_2:1;
+		u64 i_rpcnt_ge_2:1;
+		u64 i_rqcnt_lt_18:1;
+		u64 i_rqcnt_ge_18:1;
+		u64 i_rqcnt_lt_2:1;
+		u64 i_rqcnt_ge_2:1;
+		u64 i_tt_device:7;
+		u64 i_tt_init:3;
+		u64 i_reserved:5;
+	} ii_icsmh_fld_s;
+} ii_icsmh_u_t;
+
+/************************************************************************
+ *									*
+ *  The Shub DEBUG unit provides a 3-bit selection signal to the        *
+ * II core and a 3-bit selection signal to the fsbclk domain in the II  *
+ * wrapper.                                                             *
+ *									*
+ ************************************************************************/
+
+typedef union ii_idbss_u {
+	u64 ii_idbss_regval;
+	struct {
+		u64 i_iioclk_core_submenu:3;
+		u64 i_rsvd:5;
+		u64 i_fsbclk_wrapper_submenu:3;
+		u64 i_rsvd_1:5;
+		u64 i_iioclk_menu:5;
+		u64 i_rsvd_2:43;
+	} ii_idbss_fld_s;
+} ii_idbss_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  This register is used to set up the length for a       *
+ * transfer and then to monitor the progress of that transfer. This     *
+ * register needs to be initialized before a transfer is started. A     *
+ * legitimate write to this register will set the Busy bit, clear the   *
+ * Error bit, and initialize the length to the value desired.           *
+ * While the transfer is in progress, hardware will decrement the       *
+ * length field with each successful block that is copied. Once the     *
+ * transfer completes, hardware will clear the Busy bit. The length     *
+ * field will also contain the number of cache lines left to be         *
+ * transferred.                                                         *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibls0_u {
+	u64 ii_ibls0_regval;
+	struct {
+		u64 i_length:16;
+		u64 i_error:1;
+		u64 i_rsvd_1:3;
+		u64 i_busy:1;
+		u64 i_rsvd:43;
+	} ii_ibls0_fld_s;
+} ii_ibls0_u_t;
+
+/************************************************************************
+ *									*
+ *  This register should be loaded before a transfer is started. The    *
+ * address to be loaded in bits 39:0 is the 40-bit TRex+ physical       *
+ * address as described in Section 1.3, Figure2 and Figure3. Since      *
+ * the bottom 7 bits of the address are always taken to be zero, BTE    *
+ * transfers are always cacheline-aligned.                              *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibsa0_u {
+	u64 ii_ibsa0_regval;
+	struct {
+		u64 i_rsvd_1:7;
+		u64 i_addr:42;
+		u64 i_rsvd:15;
+	} ii_ibsa0_fld_s;
+} ii_ibsa0_u_t;
+
+/************************************************************************
+ *									*
+ *  This register should be loaded before a transfer is started. The    *
+ * address to be loaded in bits 39:0 is the 40-bit TRex+ physical       *
+ * address as described in Section 1.3, Figure2 and Figure3. Since      *
+ * the bottom 7 bits of the address are always taken to be zero, BTE    *
+ * transfers are always cacheline-aligned.                              *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibda0_u {
+	u64 ii_ibda0_regval;
+	struct {
+		u64 i_rsvd_1:7;
+		u64 i_addr:42;
+		u64 i_rsvd:15;
+	} ii_ibda0_fld_s;
+} ii_ibda0_u_t;
+
+/************************************************************************
+ *									*
+ *  Writing to this register sets up the attributes of the transfer     *
+ * and initiates the transfer operation. Reading this register has      *
+ * the side effect of terminating any transfer in progress. Note:       *
+ * stopping a transfer midstream could have an adverse impact on the    *
+ * other BTE. If a BTE stream has to be stopped (due to error           *
+ * handling for example), both BTE streams should be stopped and        *
+ * their transfers discarded.                                           *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibct0_u {
+	u64 ii_ibct0_regval;
+	struct {
+		u64 i_zerofill:1;
+		u64 i_rsvd_2:3;
+		u64 i_notify:1;
+		u64 i_rsvd_1:3;
+		u64 i_poison:1;
+		u64 i_rsvd:55;
+	} ii_ibct0_fld_s;
+} ii_ibct0_u_t;
+
+/************************************************************************
+ *									*
+ *  This register contains the address to which the WINV is sent.       *
+ * This address has to be cache line aligned.                           *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibna0_u {
+	u64 ii_ibna0_regval;
+	struct {
+		u64 i_rsvd_1:7;
+		u64 i_addr:42;
+		u64 i_rsvd:15;
+	} ii_ibna0_fld_s;
+} ii_ibna0_u_t;
+
+/************************************************************************
+ *									*
+ *  This register contains the programmable level as well as the node   *
+ * ID and PI unit of the processor to which the interrupt will be       *
+ * sent.								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibia0_u {
+	u64 ii_ibia0_regval;
+	struct {
+		u64 i_rsvd_2:1;
+		u64 i_node_id:11;
+		u64 i_rsvd_1:4;
+		u64 i_level:7;
+		u64 i_rsvd:41;
+	} ii_ibia0_fld_s;
+} ii_ibia0_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  This register is used to set up the length for a       *
+ * transfer and then to monitor the progress of that transfer. This     *
+ * register needs to be initialized before a transfer is started. A     *
+ * legitimate write to this register will set the Busy bit, clear the   *
+ * Error bit, and initialize the length to the value desired.           *
+ * While the transfer is in progress, hardware will decrement the       *
+ * length field with each successful block that is copied. Once the     *
+ * transfer completes, hardware will clear the Busy bit. The length     *
+ * field will also contain the number of cache lines left to be         *
+ * transferred.                                                         *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibls1_u {
+	u64 ii_ibls1_regval;
+	struct {
+		u64 i_length:16;
+		u64 i_error:1;
+		u64 i_rsvd_1:3;
+		u64 i_busy:1;
+		u64 i_rsvd:43;
+	} ii_ibls1_fld_s;
+} ii_ibls1_u_t;
+
+/************************************************************************
+ *									*
+ *  This register should be loaded before a transfer is started. The    *
+ * address to be loaded in bits 39:0 is the 40-bit TRex+ physical       *
+ * address as described in Section 1.3, Figure2 and Figure3. Since      *
+ * the bottom 7 bits of the address are always taken to be zero, BTE    *
+ * transfers are always cacheline-aligned.                              *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibsa1_u {
+	u64 ii_ibsa1_regval;
+	struct {
+		u64 i_rsvd_1:7;
+		u64 i_addr:33;
+		u64 i_rsvd:24;
+	} ii_ibsa1_fld_s;
+} ii_ibsa1_u_t;
+
+/************************************************************************
+ *									*
+ *  This register should be loaded before a transfer is started. The    *
+ * address to be loaded in bits 39:0 is the 40-bit TRex+ physical       *
+ * address as described in Section 1.3, Figure2 and Figure3. Since      *
+ * the bottom 7 bits of the address are always taken to be zero, BTE    *
+ * transfers are always cacheline-aligned.                              *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibda1_u {
+	u64 ii_ibda1_regval;
+	struct {
+		u64 i_rsvd_1:7;
+		u64 i_addr:33;
+		u64 i_rsvd:24;
+	} ii_ibda1_fld_s;
+} ii_ibda1_u_t;
+
+/************************************************************************
+ *									*
+ *  Writing to this register sets up the attributes of the transfer     *
+ * and initiates the transfer operation. Reading this register has      *
+ * the side effect of terminating any transfer in progress. Note:       *
+ * stopping a transfer midstream could have an adverse impact on the    *
+ * other BTE. If a BTE stream has to be stopped (due to error           *
+ * handling for example), both BTE streams should be stopped and        *
+ * their transfers discarded.                                           *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibct1_u {
+	u64 ii_ibct1_regval;
+	struct {
+		u64 i_zerofill:1;
+		u64 i_rsvd_2:3;
+		u64 i_notify:1;
+		u64 i_rsvd_1:3;
+		u64 i_poison:1;
+		u64 i_rsvd:55;
+	} ii_ibct1_fld_s;
+} ii_ibct1_u_t;
+
+/************************************************************************
+ *									*
+ *  This register contains the address to which the WINV is sent.       *
+ * This address has to be cache line aligned.                           *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibna1_u {
+	u64 ii_ibna1_regval;
+	struct {
+		u64 i_rsvd_1:7;
+		u64 i_addr:33;
+		u64 i_rsvd:24;
+	} ii_ibna1_fld_s;
+} ii_ibna1_u_t;
+
+/************************************************************************
+ *									*
+ *  This register contains the programmable level as well as the node   *
+ * ID and PI unit of the processor to which the interrupt will be       *
+ * sent.								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibia1_u {
+	u64 ii_ibia1_regval;
+	struct {
+		u64 i_pi_id:1;
+		u64 i_node_id:8;
+		u64 i_rsvd_1:7;
+		u64 i_level:7;
+		u64 i_rsvd:41;
+	} ii_ibia1_fld_s;
+} ii_ibia1_u_t;
+
+/************************************************************************
+ *									*
+ *  This register defines the resources that feed information into      *
+ * the two performance counters located in the IO Performance           *
+ * Profiling Register. There are 17 different quantities that can be    *
+ * measured. Given these 17 different options, the two performance      *
+ * counters have 15 of them in common; menu selections 0 through 0xE    *
+ * are identical for each performance counter. As for the other two     *
+ * options, one is available from one performance counter and the       *
+ * other is available from the other performance counter. Hence, the    *
+ * II supports all 17*16=272 possible combinations of quantities to     *
+ * measure.                                                             *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ipcr_u {
+	u64 ii_ipcr_regval;
+	struct {
+		u64 i_ippr0_c:4;
+		u64 i_ippr1_c:4;
+		u64 i_icct:8;
+		u64 i_rsvd:48;
+	} ii_ipcr_fld_s;
+} ii_ipcr_u_t;
+
+/************************************************************************
+ *									*
+ *									*
+ *									*
+ ************************************************************************/
+
+typedef union ii_ippr_u {
+	u64 ii_ippr_regval;
+	struct {
+		u64 i_ippr0:32;
+		u64 i_ippr1:32;
+	} ii_ippr_fld_s;
+} ii_ippr_u_t;
+
+/************************************************************************
+ *									*
+ * The following defines which were not formed into structures are	*
+ * probably identical to another register, and the name of the		*
+ * register is provided against each of these registers. This		*
+ * information needs to be checked carefully				*
+ *									*
+ *		IIO_ICRB1_A		IIO_ICRB0_A			*
+ *		IIO_ICRB1_B		IIO_ICRB0_B			*
+ *		IIO_ICRB1_C		IIO_ICRB0_C			*
+ *		IIO_ICRB1_D		IIO_ICRB0_D			*
+ *		IIO_ICRB1_E		IIO_ICRB0_E			*
+ *		IIO_ICRB2_A		IIO_ICRB0_A			*
+ *		IIO_ICRB2_B		IIO_ICRB0_B			*
+ *		IIO_ICRB2_C		IIO_ICRB0_C			*
+ *		IIO_ICRB2_D		IIO_ICRB0_D			*
+ *		IIO_ICRB2_E		IIO_ICRB0_E			*
+ *		IIO_ICRB3_A		IIO_ICRB0_A			*
+ *		IIO_ICRB3_B		IIO_ICRB0_B			*
+ *		IIO_ICRB3_C		IIO_ICRB0_C			*
+ *		IIO_ICRB3_D		IIO_ICRB0_D			*
+ *		IIO_ICRB3_E		IIO_ICRB0_E			*
+ *		IIO_ICRB4_A		IIO_ICRB0_A			*
+ *		IIO_ICRB4_B		IIO_ICRB0_B			*
+ *		IIO_ICRB4_C		IIO_ICRB0_C			*
+ *		IIO_ICRB4_D		IIO_ICRB0_D			*
+ *		IIO_ICRB4_E		IIO_ICRB0_E			*
+ *		IIO_ICRB5_A		IIO_ICRB0_A			*
+ *		IIO_ICRB5_B		IIO_ICRB0_B			*
+ *		IIO_ICRB5_C		IIO_ICRB0_C			*
+ *		IIO_ICRB5_D		IIO_ICRB0_D			*
+ *		IIO_ICRB5_E		IIO_ICRB0_E			*
+ *		IIO_ICRB6_A		IIO_ICRB0_A			*
+ *		IIO_ICRB6_B		IIO_ICRB0_B			*
+ *		IIO_ICRB6_C		IIO_ICRB0_C			*
+ *		IIO_ICRB6_D		IIO_ICRB0_D			*
+ *		IIO_ICRB6_E		IIO_ICRB0_E			*
+ *		IIO_ICRB7_A		IIO_ICRB0_A			*
+ *		IIO_ICRB7_B		IIO_ICRB0_B			*
+ *		IIO_ICRB7_C		IIO_ICRB0_C			*
+ *		IIO_ICRB7_D		IIO_ICRB0_D			*
+ *		IIO_ICRB7_E		IIO_ICRB0_E			*
+ *		IIO_ICRB8_A		IIO_ICRB0_A			*
+ *		IIO_ICRB8_B		IIO_ICRB0_B			*
+ *		IIO_ICRB8_C		IIO_ICRB0_C			*
+ *		IIO_ICRB8_D		IIO_ICRB0_D			*
+ *		IIO_ICRB8_E		IIO_ICRB0_E			*
+ *		IIO_ICRB9_A		IIO_ICRB0_A			*
+ *		IIO_ICRB9_B		IIO_ICRB0_B			*
+ *		IIO_ICRB9_C		IIO_ICRB0_C			*
+ *		IIO_ICRB9_D		IIO_ICRB0_D			*
+ *		IIO_ICRB9_E		IIO_ICRB0_E			*
+ *		IIO_ICRBA_A		IIO_ICRB0_A			*
+ *		IIO_ICRBA_B		IIO_ICRB0_B			*
+ *		IIO_ICRBA_C		IIO_ICRB0_C			*
+ *		IIO_ICRBA_D		IIO_ICRB0_D			*
+ *		IIO_ICRBA_E		IIO_ICRB0_E			*
+ *		IIO_ICRBB_A		IIO_ICRB0_A			*
+ *		IIO_ICRBB_B		IIO_ICRB0_B			*
+ *		IIO_ICRBB_C		IIO_ICRB0_C			*
+ *		IIO_ICRBB_D		IIO_ICRB0_D			*
+ *		IIO_ICRBB_E		IIO_ICRB0_E			*
+ *		IIO_ICRBC_A		IIO_ICRB0_A			*
+ *		IIO_ICRBC_B		IIO_ICRB0_B			*
+ *		IIO_ICRBC_C		IIO_ICRB0_C			*
+ *		IIO_ICRBC_D		IIO_ICRB0_D			*
+ *		IIO_ICRBC_E		IIO_ICRB0_E			*
+ *		IIO_ICRBD_A		IIO_ICRB0_A			*
+ *		IIO_ICRBD_B		IIO_ICRB0_B			*
+ *		IIO_ICRBD_C		IIO_ICRB0_C			*
+ *		IIO_ICRBD_D		IIO_ICRB0_D			*
+ *		IIO_ICRBD_E		IIO_ICRB0_E			*
+ *		IIO_ICRBE_A		IIO_ICRB0_A			*
+ *		IIO_ICRBE_B		IIO_ICRB0_B			*
+ *		IIO_ICRBE_C		IIO_ICRB0_C			*
+ *		IIO_ICRBE_D		IIO_ICRB0_D			*
+ *		IIO_ICRBE_E		IIO_ICRB0_E			*
+ *									*
+ ************************************************************************/
+
+/*
+ * Slightly friendlier names for some common registers.
+ */
+#define IIO_WIDGET              IIO_WID		/* Widget identification */
+#define IIO_WIDGET_STAT         IIO_WSTAT	/* Widget status register */
+#define IIO_WIDGET_CTRL         IIO_WCR		/* Widget control register */
+#define IIO_PROTECT             IIO_ILAPR	/* IO interface protection */
+#define IIO_PROTECT_OVRRD       IIO_ILAPO	/* IO protect override */
+#define IIO_OUTWIDGET_ACCESS    IIO_IOWA	/* Outbound widget access */
+#define IIO_INWIDGET_ACCESS     IIO_IIWA	/* Inbound widget access */
+#define IIO_INDEV_ERR_MASK      IIO_IIDEM	/* Inbound device error mask */
+#define IIO_LLP_CSR             IIO_ILCSR	/* LLP control and status */
+#define IIO_LLP_LOG             IIO_ILLR	/* LLP log */
+#define IIO_XTALKCC_TOUT        IIO_IXCC	/* Xtalk credit count timeout */
+#define IIO_XTALKTT_TOUT        IIO_IXTT	/* Xtalk tail timeout */
+#define IIO_IO_ERR_CLR          IIO_IECLR	/* IO error clear */
+#define IIO_IGFX_0 		IIO_IGFX0
+#define IIO_IGFX_1 		IIO_IGFX1
+#define IIO_IBCT_0		IIO_IBCT0
+#define IIO_IBCT_1		IIO_IBCT1
+#define IIO_IBLS_0		IIO_IBLS0
+#define IIO_IBLS_1		IIO_IBLS1
+#define IIO_IBSA_0		IIO_IBSA0
+#define IIO_IBSA_1		IIO_IBSA1
+#define IIO_IBDA_0		IIO_IBDA0
+#define IIO_IBDA_1		IIO_IBDA1
+#define IIO_IBNA_0		IIO_IBNA0
+#define IIO_IBNA_1		IIO_IBNA1
+#define IIO_IBIA_0		IIO_IBIA0
+#define IIO_IBIA_1		IIO_IBIA1
+#define IIO_IOPRB_0		IIO_IPRB0
+
+#define IIO_PRTE_A(_x)		(IIO_IPRTE0_A + (8 * (_x)))
+#define IIO_PRTE_B(_x)		(IIO_IPRTE0_B + (8 * (_x)))
+#define IIO_NUM_PRTES		8	/* Total number of PRB table entries */
+#define IIO_WIDPRTE_A(x)	IIO_PRTE_A(((x) - 8))	/* widget ID to its PRTE num */
+#define IIO_WIDPRTE_B(x)	IIO_PRTE_B(((x) - 8))	/* widget ID to its PRTE num */
+
+#define IIO_NUM_IPRBS 		9
+
+#define IIO_LLP_CSR_IS_UP		0x00002000
+#define IIO_LLP_CSR_LLP_STAT_MASK       0x00003000
+#define IIO_LLP_CSR_LLP_STAT_SHFT       12
+
+#define IIO_LLP_CB_MAX  0xffff	/* in ILLR CB_CNT, Max Check Bit errors */
+#define IIO_LLP_SN_MAX  0xffff	/* in ILLR SN_CNT, Max Sequence Number errors */
+
+/* key to IIO_PROTECT_OVRRD */
+#define IIO_PROTECT_OVRRD_KEY   0x53474972756c6573ull	/* "SGIrules" */
+
+/* BTE register names */
+#define IIO_BTE_STAT_0          IIO_IBLS_0	/* Also BTE length/status 0 */
+#define IIO_BTE_SRC_0           IIO_IBSA_0	/* Also BTE source address  0 */
+#define IIO_BTE_DEST_0          IIO_IBDA_0	/* Also BTE dest. address 0 */
+#define IIO_BTE_CTRL_0          IIO_IBCT_0	/* Also BTE control/terminate 0 */
+#define IIO_BTE_NOTIFY_0        IIO_IBNA_0	/* Also BTE notification 0 */
+#define IIO_BTE_INT_0           IIO_IBIA_0	/* Also BTE interrupt 0 */
+#define IIO_BTE_OFF_0           0	/* Base offset from BTE 0 regs. */
+#define IIO_BTE_OFF_1   	(IIO_IBLS_1 - IIO_IBLS_0)	/* Offset from base to BTE 1 */
+
+/* BTE register offsets from base */
+#define BTEOFF_STAT             0
+#define BTEOFF_SRC      	(IIO_BTE_SRC_0 - IIO_BTE_STAT_0)
+#define BTEOFF_DEST     	(IIO_BTE_DEST_0 - IIO_BTE_STAT_0)
+#define BTEOFF_CTRL     	(IIO_BTE_CTRL_0 - IIO_BTE_STAT_0)
+#define BTEOFF_NOTIFY   	(IIO_BTE_NOTIFY_0 - IIO_BTE_STAT_0)
+#define BTEOFF_INT      	(IIO_BTE_INT_0 - IIO_BTE_STAT_0)
+
+/* names used in shub diags */
+#define IIO_BASE_BTE0   IIO_IBLS_0
+#define IIO_BASE_BTE1   IIO_IBLS_1
+
+/*
+ * Macro which takes the widget number, and returns the
+ * IO PRB address of that widget.
+ * value _x is expected to be a widget number in the range
+ * 0, 8 - 0xF
+ */
+#define IIO_IOPRB(_x)	(IIO_IOPRB_0 + ( ( (_x) < HUB_WIDGET_ID_MIN ? \
+                	(_x) : \
+                	(_x) - (HUB_WIDGET_ID_MIN-1)) << 3) )
+
+/* GFX Flow Control Node/Widget Register */
+#define IIO_IGFX_W_NUM_BITS	4	/* size of widget num field */
+#define IIO_IGFX_W_NUM_MASK	((1<<IIO_IGFX_W_NUM_BITS)-1)
+#define IIO_IGFX_W_NUM_SHIFT	0
+#define IIO_IGFX_PI_NUM_BITS	1	/* size of PI num field */
+#define IIO_IGFX_PI_NUM_MASK	((1<<IIO_IGFX_PI_NUM_BITS)-1)
+#define IIO_IGFX_PI_NUM_SHIFT	4
+#define IIO_IGFX_N_NUM_BITS	8	/* size of node num field */
+#define IIO_IGFX_N_NUM_MASK	((1<<IIO_IGFX_N_NUM_BITS)-1)
+#define IIO_IGFX_N_NUM_SHIFT	5
+#define IIO_IGFX_P_NUM_BITS	1	/* size of processor num field */
+#define IIO_IGFX_P_NUM_MASK	((1<<IIO_IGFX_P_NUM_BITS)-1)
+#define IIO_IGFX_P_NUM_SHIFT	16
+#define IIO_IGFX_INIT(widget, pi, node, cpu)				(\
+	(((widget) & IIO_IGFX_W_NUM_MASK) << IIO_IGFX_W_NUM_SHIFT) |	 \
+	(((pi)     & IIO_IGFX_PI_NUM_MASK)<< IIO_IGFX_PI_NUM_SHIFT)|	 \
+	(((node)   & IIO_IGFX_N_NUM_MASK) << IIO_IGFX_N_NUM_SHIFT) |	 \
+	(((cpu)    & IIO_IGFX_P_NUM_MASK) << IIO_IGFX_P_NUM_SHIFT))
+
+/* Scratch registers (all bits available) */
+#define IIO_SCRATCH_REG0        IIO_ISCR0
+#define IIO_SCRATCH_REG1        IIO_ISCR1
+#define IIO_SCRATCH_MASK        0xffffffffffffffffUL
+
+#define IIO_SCRATCH_BIT0_0      0x0000000000000001UL
+#define IIO_SCRATCH_BIT0_1      0x0000000000000002UL
+#define IIO_SCRATCH_BIT0_2      0x0000000000000004UL
+#define IIO_SCRATCH_BIT0_3      0x0000000000000008UL
+#define IIO_SCRATCH_BIT0_4      0x0000000000000010UL
+#define IIO_SCRATCH_BIT0_5      0x0000000000000020UL
+#define IIO_SCRATCH_BIT0_6      0x0000000000000040UL
+#define IIO_SCRATCH_BIT0_7      0x0000000000000080UL
+#define IIO_SCRATCH_BIT0_8      0x0000000000000100UL
+#define IIO_SCRATCH_BIT0_9      0x0000000000000200UL
+#define IIO_SCRATCH_BIT0_A      0x0000000000000400UL
+
+#define IIO_SCRATCH_BIT1_0      0x0000000000000001UL
+#define IIO_SCRATCH_BIT1_1      0x0000000000000002UL
+/* IO Translation Table Entries */
+#define IIO_NUM_ITTES   7	/* ITTEs numbered 0..6 */
+					/* Hw manuals number them 1..7! */
+/*
+ * IIO_IMEM Register fields.
+ */
+#define IIO_IMEM_W0ESD  0x1UL	/* Widget 0 shut down due to error */
+#define IIO_IMEM_B0ESD	(1UL << 4)	/* BTE 0 shut down due to error */
+#define IIO_IMEM_B1ESD	(1UL << 8)	/* BTE 1 Shut down due to error */
+
+/*
+ * As a permanent workaround for a bug in the PI side of the shub, we've
+ * redefined big window 7 as small window 0.
+ XXX does this still apply for SN1??
+ */
+#define HUB_NUM_BIG_WINDOW	(IIO_NUM_ITTES - 1)
+
+/*
+ * Use the top big window as a surrogate for the first small window
+ */
+#define SWIN0_BIGWIN            HUB_NUM_BIG_WINDOW
+
+#define ILCSR_WARM_RESET        0x100
+
+/*
+ * CRB manipulation macros
+ *	The CRB macros are slightly complicated, since there are up to
+ *	four registers associated with each CRB entry.
+ */
+#define IIO_NUM_CRBS            15	/* Number of CRBs */
+#define IIO_NUM_PC_CRBS         4	/* Number of partial cache CRBs */
+#define IIO_ICRB_OFFSET         8
+#define IIO_ICRB_0              IIO_ICRB0_A
+#define IIO_ICRB_ADDR_SHFT	2	/* Shift to get proper address */
+/* XXX - This is now tuneable:
+        #define IIO_FIRST_PC_ENTRY 12
+ */
+
+#define IIO_ICRB_A(_x)	((u64)(IIO_ICRB_0 + (6 * IIO_ICRB_OFFSET * (_x))))
+#define IIO_ICRB_B(_x)	((u64)((char *)IIO_ICRB_A(_x) + 1*IIO_ICRB_OFFSET))
+#define IIO_ICRB_C(_x)	((u64)((char *)IIO_ICRB_A(_x) + 2*IIO_ICRB_OFFSET))
+#define IIO_ICRB_D(_x)	((u64)((char *)IIO_ICRB_A(_x) + 3*IIO_ICRB_OFFSET))
+#define IIO_ICRB_E(_x)	((u64)((char *)IIO_ICRB_A(_x) + 4*IIO_ICRB_OFFSET))
+
+#define TNUM_TO_WIDGET_DEV(_tnum)	(_tnum & 0x7)
+
+/*
+ * values for "ecode" field
+ */
+#define IIO_ICRB_ECODE_DERR     0	/* Directory error due to IIO access */
+#define IIO_ICRB_ECODE_PERR     1	/* Poison error on IO access */
+#define IIO_ICRB_ECODE_WERR     2	/* Write error by IIO access
+					 * e.g. WINV to a Read only line. */
+#define IIO_ICRB_ECODE_AERR     3	/* Access error caused by IIO access */
+#define IIO_ICRB_ECODE_PWERR    4	/* Error on partial write */
+#define IIO_ICRB_ECODE_PRERR    5	/* Error on partial read  */
+#define IIO_ICRB_ECODE_TOUT     6	/* CRB timeout before deallocating */
+#define IIO_ICRB_ECODE_XTERR    7	/* Incoming xtalk pkt had error bit */
+
+/*
+ * Values for field imsgtype
+ */
+#define IIO_ICRB_IMSGT_XTALK    0	/* Incoming Meessage from Xtalk */
+#define IIO_ICRB_IMSGT_BTE      1	/* Incoming message from BTE    */
+#define IIO_ICRB_IMSGT_SN1NET   2	/* Incoming message from SN1 net */
+#define IIO_ICRB_IMSGT_CRB      3	/* Incoming message from CRB ???  */
+
+/*
+ * values for field initiator.
+ */
+#define IIO_ICRB_INIT_XTALK     0	/* Message originated in xtalk  */
+#define IIO_ICRB_INIT_BTE0      0x1	/* Message originated in BTE 0  */
+#define IIO_ICRB_INIT_SN1NET    0x2	/* Message originated in SN1net */
+#define IIO_ICRB_INIT_CRB       0x3	/* Message originated in CRB ?  */
+#define IIO_ICRB_INIT_BTE1      0x5	/* MEssage originated in BTE 1  */
+
+/*
+ * Number of credits Hub widget has while sending req/response to
+ * xbow.
+ * Value of 3 is required by Xbow 1.1
+ * We may be able to increase this to 4 with Xbow 1.2.
+ */
+#define		   HUBII_XBOW_CREDIT       3
+#define		   HUBII_XBOW_REV2_CREDIT  4
+
+/*
+ * Number of credits that xtalk devices should use when communicating
+ * with a SHub (depth of SHub's queue).
+ */
+#define HUB_CREDIT 4
+
+/*
+ * Some IIO_PRB fields
+ */
+#define IIO_PRB_MULTI_ERR	(1LL << 63)
+#define IIO_PRB_SPUR_RD		(1LL << 51)
+#define IIO_PRB_SPUR_WR		(1LL << 50)
+#define IIO_PRB_RD_TO		(1LL << 49)
+#define IIO_PRB_ERROR		(1LL << 48)
+
+/*************************************************************************
+
+ Some of the IIO field masks and shifts are defined here.
+ This is in order to maintain compatibility in SN0 and SN1 code
+ 
+**************************************************************************/
+
+/*
+ * ICMR register fields
+ * (Note: the IIO_ICMR_P_CNT and IIO_ICMR_PC_VLD from Hub are not
+ * present in SHub)
+ */
+
+#define IIO_ICMR_CRB_VLD_SHFT   20
+#define IIO_ICMR_CRB_VLD_MASK	(0x7fffUL << IIO_ICMR_CRB_VLD_SHFT)
+
+#define IIO_ICMR_FC_CNT_SHFT    16
+#define IIO_ICMR_FC_CNT_MASK	(0xf << IIO_ICMR_FC_CNT_SHFT)
+
+#define IIO_ICMR_C_CNT_SHFT     4
+#define IIO_ICMR_C_CNT_MASK	(0xf << IIO_ICMR_C_CNT_SHFT)
+
+#define IIO_ICMR_PRECISE	(1UL << 52)
+#define IIO_ICMR_CLR_RPPD	(1UL << 13)
+#define IIO_ICMR_CLR_RQPD	(1UL << 12)
+
+/*
+ * IIO PIO Deallocation register field masks : (IIO_IPDR)
+ XXX present but not needed in bedrock?  See the manual.
+ */
+#define IIO_IPDR_PND    	(1 << 4)
+
+/*
+ * IIO CRB deallocation register field masks: (IIO_ICDR)
+ */
+#define IIO_ICDR_PND    	(1 << 4)
+
+/* 
+ * IO BTE Length/Status (IIO_IBLS) register bit field definitions
+ */
+#define IBLS_BUSY		(0x1UL << 20)
+#define IBLS_ERROR_SHFT		16
+#define IBLS_ERROR		(0x1UL << IBLS_ERROR_SHFT)
+#define IBLS_LENGTH_MASK	0xffff
+
+/*
+ * IO BTE Control/Terminate register (IBCT) register bit field definitions
+ */
+#define IBCT_POISON		(0x1UL << 8)
+#define IBCT_NOTIFY		(0x1UL << 4)
+#define IBCT_ZFIL_MODE		(0x1UL << 0)
+
+/*
+ * IIO Incoming Error Packet Header (IIO_IIEPH1/IIO_IIEPH2)
+ */
+#define IIEPH1_VALID		(1UL << 44)
+#define IIEPH1_OVERRUN		(1UL << 40)
+#define IIEPH1_ERR_TYPE_SHFT	32
+#define IIEPH1_ERR_TYPE_MASK	0xf
+#define IIEPH1_SOURCE_SHFT	20
+#define IIEPH1_SOURCE_MASK	11
+#define IIEPH1_SUPPL_SHFT	8
+#define IIEPH1_SUPPL_MASK	11
+#define IIEPH1_CMD_SHFT		0
+#define IIEPH1_CMD_MASK		7
+
+#define IIEPH2_TAIL		(1UL << 40)
+#define IIEPH2_ADDRESS_SHFT	0
+#define IIEPH2_ADDRESS_MASK	38
+
+#define IIEPH1_ERR_SHORT_REQ	2
+#define IIEPH1_ERR_SHORT_REPLY	3
+#define IIEPH1_ERR_LONG_REQ	4
+#define IIEPH1_ERR_LONG_REPLY	5
+
+/*
+ * IO Error Clear register bit field definitions
+ */
+#define IECLR_PI1_FWD_INT	(1UL << 31)	/* clear PI1_FORWARD_INT in iidsr */
+#define IECLR_PI0_FWD_INT	(1UL << 30)	/* clear PI0_FORWARD_INT in iidsr */
+#define IECLR_SPUR_RD_HDR	(1UL << 29)	/* clear valid bit in ixss reg */
+#define IECLR_BTE1		(1UL << 18)	/* clear bte error 1 */
+#define IECLR_BTE0		(1UL << 17)	/* clear bte error 0 */
+#define IECLR_CRAZY		(1UL << 16)	/* clear crazy bit in wstat reg */
+#define IECLR_PRB_F		(1UL << 15)	/* clear err bit in PRB_F reg */
+#define IECLR_PRB_E		(1UL << 14)	/* clear err bit in PRB_E reg */
+#define IECLR_PRB_D		(1UL << 13)	/* clear err bit in PRB_D reg */
+#define IECLR_PRB_C		(1UL << 12)	/* clear err bit in PRB_C reg */
+#define IECLR_PRB_B		(1UL << 11)	/* clear err bit in PRB_B reg */
+#define IECLR_PRB_A		(1UL << 10)	/* clear err bit in PRB_A reg */
+#define IECLR_PRB_9		(1UL << 9)	/* clear err bit in PRB_9 reg */
+#define IECLR_PRB_8		(1UL << 8)	/* clear err bit in PRB_8 reg */
+#define IECLR_PRB_0		(1UL << 0)	/* clear err bit in PRB_0 reg */
+
+/*
+ * IIO CRB control register Fields: IIO_ICCR 
+ */
+#define	IIO_ICCR_PENDING	0x10000
+#define	IIO_ICCR_CMD_MASK	0xFF
+#define	IIO_ICCR_CMD_SHFT	7
+#define	IIO_ICCR_CMD_NOP	0x0	/* No Op */
+#define	IIO_ICCR_CMD_WAKE	0x100	/* Reactivate CRB entry and process */
+#define	IIO_ICCR_CMD_TIMEOUT	0x200	/* Make CRB timeout & mark invalid */
+#define	IIO_ICCR_CMD_EJECT	0x400	/* Contents of entry written to memory
+					 * via a WB
+					 */
+#define	IIO_ICCR_CMD_FLUSH	0x800
+
+/*
+ *
+ * CRB Register description.
+ *
+ * WARNING * WARNING * WARNING * WARNING * WARNING * WARNING * WARNING
+ * WARNING * WARNING * WARNING * WARNING * WARNING * WARNING * WARNING
+ * WARNING * WARNING * WARNING * WARNING * WARNING * WARNING * WARNING
+ * WARNING * WARNING * WARNING * WARNING * WARNING * WARNING * WARNING
+ * WARNING * WARNING * WARNING * WARNING * WARNING * WARNING * WARNING
+ *
+ * Many of the fields in CRB are status bits used by hardware
+ * for implementation of the protocol. It's very dangerous to
+ * mess around with the CRB registers.
+ *
+ * It's OK to read the CRB registers and try to make sense out of the
+ * fields in CRB.
+ *
+ * Updating CRB requires all activities in Hub IIO to be quiesced.
+ * otherwise, a write to CRB could corrupt other CRB entries.
+ * CRBs are here only as a back door peek to shub IIO's status.
+ * Quiescing implies  no dmas no PIOs
+ * either directly from the cpu or from sn0net.
+ * this is not something that can be done easily. So, AVOID updating
+ * CRBs.
+ */
+
+/*
+ * Easy access macros for CRBs, all 5 registers (A-E)
+ */
+typedef ii_icrb0_a_u_t icrba_t;
+#define a_sidn		ii_icrb0_a_fld_s.ia_sidn
+#define a_tnum		ii_icrb0_a_fld_s.ia_tnum
+#define a_addr          ii_icrb0_a_fld_s.ia_addr
+#define a_valid         ii_icrb0_a_fld_s.ia_vld
+#define a_iow           ii_icrb0_a_fld_s.ia_iow
+#define a_regvalue	ii_icrb0_a_regval
+
+typedef ii_icrb0_b_u_t icrbb_t;
+#define b_use_old       ii_icrb0_b_fld_s.ib_use_old
+#define b_imsgtype      ii_icrb0_b_fld_s.ib_imsgtype
+#define b_imsg          ii_icrb0_b_fld_s.ib_imsg
+#define b_initiator     ii_icrb0_b_fld_s.ib_init
+#define b_exc           ii_icrb0_b_fld_s.ib_exc
+#define b_ackcnt        ii_icrb0_b_fld_s.ib_ack_cnt
+#define b_resp          ii_icrb0_b_fld_s.ib_resp
+#define b_ack           ii_icrb0_b_fld_s.ib_ack
+#define b_hold          ii_icrb0_b_fld_s.ib_hold
+#define b_wb            ii_icrb0_b_fld_s.ib_wb
+#define b_intvn         ii_icrb0_b_fld_s.ib_intvn
+#define b_stall_ib      ii_icrb0_b_fld_s.ib_stall_ib
+#define b_stall_int     ii_icrb0_b_fld_s.ib_stall__intr
+#define b_stall_bte_0   ii_icrb0_b_fld_s.ib_stall__bte_0
+#define b_stall_bte_1   ii_icrb0_b_fld_s.ib_stall__bte_1
+#define b_error         ii_icrb0_b_fld_s.ib_error
+#define b_ecode         ii_icrb0_b_fld_s.ib_errcode
+#define b_lnetuce       ii_icrb0_b_fld_s.ib_ln_uce
+#define b_mark          ii_icrb0_b_fld_s.ib_mark
+#define b_xerr          ii_icrb0_b_fld_s.ib_xt_err
+#define b_regvalue	ii_icrb0_b_regval
+
+typedef ii_icrb0_c_u_t icrbc_t;
+#define c_suppl         ii_icrb0_c_fld_s.ic_suppl
+#define c_barrop        ii_icrb0_c_fld_s.ic_bo
+#define c_doresp        ii_icrb0_c_fld_s.ic_resprqd
+#define c_gbr           ii_icrb0_c_fld_s.ic_gbr
+#define c_btenum        ii_icrb0_c_fld_s.ic_bte_num
+#define c_cohtrans      ii_icrb0_c_fld_s.ic_ct
+#define c_xtsize        ii_icrb0_c_fld_s.ic_size
+#define c_source        ii_icrb0_c_fld_s.ic_source
+#define c_regvalue	ii_icrb0_c_regval
+
+typedef ii_icrb0_d_u_t icrbd_t;
+#define d_sleep         ii_icrb0_d_fld_s.id_sleep
+#define d_pricnt        ii_icrb0_d_fld_s.id_pr_cnt
+#define d_pripsc        ii_icrb0_d_fld_s.id_pr_psc
+#define d_bteop         ii_icrb0_d_fld_s.id_bte_op
+#define d_bteaddr       ii_icrb0_d_fld_s.id_pa_be	/* ic_pa_be fld has 2 names */
+#define d_benable       ii_icrb0_d_fld_s.id_pa_be	/* ic_pa_be fld has 2 names */
+#define d_regvalue	ii_icrb0_d_regval
+
+typedef ii_icrb0_e_u_t icrbe_t;
+#define icrbe_ctxtvld   ii_icrb0_e_fld_s.ie_cvld
+#define icrbe_toutvld   ii_icrb0_e_fld_s.ie_tvld
+#define icrbe_context   ii_icrb0_e_fld_s.ie_context
+#define icrbe_timeout   ii_icrb0_e_fld_s.ie_timeout
+#define e_regvalue	ii_icrb0_e_regval
+
+/* Number of widgets supported by shub */
+#define HUB_NUM_WIDGET          9
+#define HUB_WIDGET_ID_MIN       0x8
+#define HUB_WIDGET_ID_MAX       0xf
+
+#define HUB_WIDGET_PART_NUM     0xc120
+#define MAX_HUBS_PER_XBOW       2
+
+/* A few more #defines for backwards compatibility */
+#define iprb_t          ii_iprb0_u_t
+#define iprb_regval     ii_iprb0_regval
+#define iprb_mult_err	ii_iprb0_fld_s.i_mult_err
+#define iprb_spur_rd	ii_iprb0_fld_s.i_spur_rd
+#define iprb_spur_wr	ii_iprb0_fld_s.i_spur_wr
+#define iprb_rd_to	ii_iprb0_fld_s.i_rd_to
+#define iprb_ovflow     ii_iprb0_fld_s.i_of_cnt
+#define iprb_error      ii_iprb0_fld_s.i_error
+#define iprb_ff         ii_iprb0_fld_s.i_f
+#define iprb_mode       ii_iprb0_fld_s.i_m
+#define iprb_bnakctr    ii_iprb0_fld_s.i_nb
+#define iprb_anakctr    ii_iprb0_fld_s.i_na
+#define iprb_xtalkctr   ii_iprb0_fld_s.i_c
+
+#define LNK_STAT_WORKING        0x2		/* LLP is working */
+
+#define IIO_WSTAT_ECRAZY	(1ULL << 32)	/* Hub gone crazy */
+#define IIO_WSTAT_TXRETRY	(1ULL << 9)	/* Hub Tx Retry timeout */
+#define IIO_WSTAT_TXRETRY_MASK  0x7F		/* should be 0xFF?? */
+#define IIO_WSTAT_TXRETRY_SHFT  16
+#define IIO_WSTAT_TXRETRY_CNT(w)	(((w) >> IIO_WSTAT_TXRETRY_SHFT) & \
+                          		IIO_WSTAT_TXRETRY_MASK)
+
+/* Number of II perf. counters we can multiplex at once */
+
+#define IO_PERF_SETS	32
+
+/* Bit for the widget in inbound access register */
+#define IIO_IIWA_WIDGET(_w)	((u64)(1ULL << _w))
+/* Bit for the widget in outbound access register */
+#define IIO_IOWA_WIDGET(_w)	((u64)(1ULL << _w))
+
+/* NOTE: The following define assumes that we are going to get
+ * widget numbers from 8 thru F and the device numbers within
+ * widget from 0 thru 7.
+ */
+#define IIO_IIDEM_WIDGETDEV_MASK(w, d)	((u64)(1ULL << (8 * ((w) - 8) + (d))))
+
+/* IO Interrupt Destination Register */
+#define IIO_IIDSR_SENT_SHIFT    28
+#define IIO_IIDSR_SENT_MASK     0x30000000
+#define IIO_IIDSR_ENB_SHIFT     24
+#define IIO_IIDSR_ENB_MASK      0x01000000
+#define IIO_IIDSR_NODE_SHIFT    9
+#define IIO_IIDSR_NODE_MASK     0x000ff700
+#define IIO_IIDSR_PI_ID_SHIFT   8
+#define IIO_IIDSR_PI_ID_MASK    0x00000100
+#define IIO_IIDSR_LVL_SHIFT     0
+#define IIO_IIDSR_LVL_MASK      0x000000ff
+
+/* Xtalk timeout threshold register (IIO_IXTT) */
+#define IXTT_RRSP_TO_SHFT	55	/* read response timeout */
+#define IXTT_RRSP_TO_MASK	(0x1FULL << IXTT_RRSP_TO_SHFT)
+#define IXTT_RRSP_PS_SHFT	32	/* read responsed TO prescalar */
+#define IXTT_RRSP_PS_MASK	(0x7FFFFFULL << IXTT_RRSP_PS_SHFT)
+#define IXTT_TAIL_TO_SHFT	0	/* tail timeout counter threshold */
+#define IXTT_TAIL_TO_MASK	(0x3FFFFFFULL << IXTT_TAIL_TO_SHFT)
+
+/*
+ * The IO LLP control status register and widget control register
+ */
+
+typedef union hubii_wcr_u {
+	u64 wcr_reg_value;
+	struct {
+		u64 wcr_widget_id:4,	/* LLP crossbar credit */
+		 wcr_tag_mode:1,	/* Tag mode */
+		 wcr_rsvd1:8,	/* Reserved */
+		 wcr_xbar_crd:3,	/* LLP crossbar credit */
+		 wcr_f_bad_pkt:1,	/* Force bad llp pkt enable */
+		 wcr_dir_con:1,	/* widget direct connect */
+		 wcr_e_thresh:5,	/* elasticity threshold */
+		 wcr_rsvd:41;	/* unused */
+	} wcr_fields_s;
+} hubii_wcr_t;
+
+#define iwcr_dir_con    wcr_fields_s.wcr_dir_con
+
+/* The structures below are defined to extract and modify the ii
+performance registers */
+
+/* io_perf_sel allows the caller to specify what tests will be
+   performed */
+
+typedef union io_perf_sel {
+	u64 perf_sel_reg;
+	struct {
+		u64 perf_ippr0:4, perf_ippr1:4, perf_icct:8, perf_rsvd:48;
+	} perf_sel_bits;
+} io_perf_sel_t;
+
+/* io_perf_cnt is to extract the count from the shub registers. Due to
+   hardware problems there is only one counter, not two. */
+
+typedef union io_perf_cnt {
+	u64 perf_cnt;
+	struct {
+		u64 perf_cnt:20, perf_rsvd2:12, perf_rsvd1:32;
+	} perf_cnt_bits;
+
+} io_perf_cnt_t;
+
+typedef union iprte_a {
+	u64 entry;
+	struct {
+		u64 i_rsvd_1:3;
+		u64 i_addr:38;
+		u64 i_init:3;
+		u64 i_source:8;
+		u64 i_rsvd:2;
+		u64 i_widget:4;
+		u64 i_to_cnt:5;
+		u64 i_vld:1;
+	} iprte_fields;
+} iprte_a_t;
+
+#endif				/* _ASM_IA64_SN_SHUBIO_H */
diff --git a/arch/ia64/include/asm/sn/simulator.h b/arch/ia64/include/asm/sn/simulator.h
new file mode 100644
index 00000000..c2611f6c
--- /dev/null
+++ b/arch/ia64/include/asm/sn/simulator.h
@@ -0,0 +1,25 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_SIMULATOR_H
+#define _ASM_IA64_SN_SIMULATOR_H
+
+#if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_SGI_UV)
+#define SNMAGIC 0xaeeeeeee8badbeefL
+#define IS_MEDUSA()			({long sn; asm("mov %0=cpuid[%1]" : "=r"(sn) : "r"(2)); sn == SNMAGIC;})
+
+#define SIMULATOR_SLEEP()		asm("nop.i 0x8beef")
+#define IS_RUNNING_ON_SIMULATOR()	(sn_prom_type)
+#define IS_RUNNING_ON_FAKE_PROM()	(sn_prom_type == 2)
+extern int sn_prom_type;		/* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */
+#else
+#define IS_MEDUSA()			0
+#define SIMULATOR_SLEEP()
+#define IS_RUNNING_ON_SIMULATOR()	0
+#endif
+
+#endif /* _ASM_IA64_SN_SIMULATOR_H */
diff --git a/arch/ia64/include/asm/sn/sn2/sn_hwperf.h b/arch/ia64/include/asm/sn/sn2/sn_hwperf.h
new file mode 100644
index 00000000..e61ebac3
--- /dev/null
+++ b/arch/ia64/include/asm/sn/sn2/sn_hwperf.h
@@ -0,0 +1,242 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved.
+ *
+ * Data types used by the SN_SAL_HWPERF_OP SAL call for monitoring
+ * SGI Altix node and router hardware
+ *
+ * Mark Goodwin <markgw@sgi.com> Mon Aug 30 12:23:46 EST 2004
+ */
+
+#ifndef SN_HWPERF_H
+#define SN_HWPERF_H
+
+/*
+ * object structure. SN_HWPERF_ENUM_OBJECTS and SN_HWPERF_GET_CPU_INFO
+ * return an array of these. Do not change this without also
+ * changing the corresponding SAL code.
+ */
+#define SN_HWPERF_MAXSTRING		128
+struct sn_hwperf_object_info {
+	u32 id;
+	union {
+		struct {
+			u64 this_part:1;
+			u64 is_shared:1;
+		} fields;
+		struct {
+			u64 flags;
+			u64 reserved;
+		} b;
+	} f;
+	char name[SN_HWPERF_MAXSTRING];
+	char location[SN_HWPERF_MAXSTRING];
+	u32 ports;
+};
+
+#define sn_hwp_this_part	f.fields.this_part
+#define sn_hwp_is_shared	f.fields.is_shared
+#define sn_hwp_flags		f.b.flags
+
+/* macros for object classification */
+#define SN_HWPERF_IS_NODE(x)		((x) && strstr((x)->name, "SHub"))
+#define SN_HWPERF_IS_NODE_SHUB2(x)	((x) && strstr((x)->name, "SHub 2."))
+#define SN_HWPERF_IS_IONODE(x)		((x) && strstr((x)->name, "TIO"))
+#define SN_HWPERF_IS_NL3ROUTER(x)	((x) && strstr((x)->name, "NL3Router"))
+#define SN_HWPERF_IS_NL4ROUTER(x)	((x) && strstr((x)->name, "NL4Router"))
+#define SN_HWPERF_IS_OLDROUTER(x)	((x) && strstr((x)->name, "Router"))
+#define SN_HWPERF_IS_ROUTER(x)		(SN_HWPERF_IS_NL3ROUTER(x) || 		\
+					 	SN_HWPERF_IS_NL4ROUTER(x) || 	\
+					 	SN_HWPERF_IS_OLDROUTER(x))
+#define SN_HWPERF_FOREIGN(x)		((x) && !(x)->sn_hwp_this_part && !(x)->sn_hwp_is_shared)
+#define SN_HWPERF_SAME_OBJTYPE(x,y)	((SN_HWPERF_IS_NODE(x) && SN_HWPERF_IS_NODE(y)) ||\
+					(SN_HWPERF_IS_IONODE(x) && SN_HWPERF_IS_IONODE(y)) ||\
+					(SN_HWPERF_IS_ROUTER(x) && SN_HWPERF_IS_ROUTER(y)))
+
+/* numa port structure, SN_HWPERF_ENUM_PORTS returns an array of these */
+struct sn_hwperf_port_info {
+	u32 port;
+	u32 conn_id;
+	u32 conn_port;
+};
+
+/* for HWPERF_{GET,SET}_MMRS */
+struct sn_hwperf_data {
+	u64 addr;
+	u64 data;
+};
+
+/* user ioctl() argument, see below */
+struct sn_hwperf_ioctl_args {
+        u64 arg;		/* argument, usually an object id */
+        u64 sz;                 /* size of transfer */
+        void *ptr;              /* pointer to source/target */
+        u32 v0;			/* second return value */
+};
+
+/*
+ * For SN_HWPERF_{GET,SET}_MMRS and SN_HWPERF_OBJECT_DISTANCE,
+ * sn_hwperf_ioctl_args.arg can be used to specify a CPU on which
+ * to call SAL, and whether to use an interprocessor interrupt
+ * or task migration in order to do so. If the CPU specified is
+ * SN_HWPERF_ARG_ANY_CPU, then the current CPU will be used.
+ */
+#define SN_HWPERF_ARG_ANY_CPU		0x7fffffffUL
+#define SN_HWPERF_ARG_CPU_MASK		0x7fffffff00000000ULL
+#define SN_HWPERF_ARG_USE_IPI_MASK	0x8000000000000000ULL
+#define SN_HWPERF_ARG_OBJID_MASK	0x00000000ffffffffULL
+
+/* 
+ * ioctl requests on the "sn_hwperf" misc device that call SAL.
+ */
+#define SN_HWPERF_OP_MEM_COPYIN		0x1000
+#define SN_HWPERF_OP_MEM_COPYOUT	0x2000
+#define SN_HWPERF_OP_MASK		0x0fff
+
+/*
+ * Determine mem requirement.
+ * arg	don't care
+ * sz	8
+ * p	pointer to u64 integer
+ */
+#define	SN_HWPERF_GET_HEAPSIZE		1
+
+/*
+ * Install mem for SAL drvr
+ * arg	don't care
+ * sz	sizeof buffer pointed to by p
+ * p	pointer to buffer for scratch area
+ */
+#define SN_HWPERF_INSTALL_HEAP		2
+
+/*
+ * Determine number of objects
+ * arg	don't care
+ * sz	8
+ * p	pointer to u64 integer
+ */
+#define SN_HWPERF_OBJECT_COUNT		(10|SN_HWPERF_OP_MEM_COPYOUT)
+
+/*
+ * Determine object "distance", relative to a cpu. This operation can
+ * execute on a designated logical cpu number, using either an IPI or
+ * via task migration. If the cpu number is SN_HWPERF_ANY_CPU, then
+ * the current CPU is used. See the SN_HWPERF_ARG_* macros above.
+ *
+ * arg	bitmap of IPI flag, cpu number and object id
+ * sz	8
+ * p	pointer to u64 integer
+ */
+#define SN_HWPERF_OBJECT_DISTANCE	(11|SN_HWPERF_OP_MEM_COPYOUT)
+
+/*
+ * Enumerate objects. Special case if sz == 8, returns the required
+ * buffer size.
+ * arg	don't care
+ * sz	sizeof buffer pointed to by p
+ * p	pointer to array of struct sn_hwperf_object_info
+ */
+#define SN_HWPERF_ENUM_OBJECTS		(12|SN_HWPERF_OP_MEM_COPYOUT)
+
+/*
+ * Enumerate NumaLink ports for an object. Special case if sz == 8,
+ * returns the required buffer size.
+ * arg	object id
+ * sz	sizeof buffer pointed to by p
+ * p	pointer to array of struct sn_hwperf_port_info
+ */
+#define SN_HWPERF_ENUM_PORTS		(13|SN_HWPERF_OP_MEM_COPYOUT)
+
+/*
+ * SET/GET memory mapped registers. These operations can execute
+ * on a designated logical cpu number, using either an IPI or via
+ * task migration. If the cpu number is SN_HWPERF_ANY_CPU, then
+ * the current CPU is used. See the SN_HWPERF_ARG_* macros above.
+ *
+ * arg	bitmap of ipi flag, cpu number and object id
+ * sz	sizeof buffer pointed to by p
+ * p	pointer to array of struct sn_hwperf_data
+ */
+#define SN_HWPERF_SET_MMRS		(14|SN_HWPERF_OP_MEM_COPYIN)
+#define SN_HWPERF_GET_MMRS		(15|SN_HWPERF_OP_MEM_COPYOUT| \
+					    SN_HWPERF_OP_MEM_COPYIN)
+/*
+ * Lock a shared object
+ * arg	object id
+ * sz	don't care
+ * p	don't care
+ */
+#define SN_HWPERF_ACQUIRE		16
+
+/*
+ * Unlock a shared object
+ * arg	object id
+ * sz	don't care
+ * p	don't care
+ */
+#define SN_HWPERF_RELEASE		17
+
+/*
+ * Break a lock on a shared object
+ * arg	object id
+ * sz	don't care
+ * p	don't care
+ */
+#define SN_HWPERF_FORCE_RELEASE		18
+
+/*
+ * ioctl requests on "sn_hwperf" that do not call SAL
+ */
+
+/*
+ * get cpu info as an array of hwperf_object_info_t. 
+ * id is logical CPU number, name is description, location
+ * is geoid (e.g. 001c04#1c). Special case if sz == 8,
+ * returns the required buffer size.
+ *
+ * arg	don't care
+ * sz	sizeof buffer pointed to by p
+ * p	pointer to array of struct sn_hwperf_object_info
+ */
+#define SN_HWPERF_GET_CPU_INFO		(100|SN_HWPERF_OP_MEM_COPYOUT)
+
+/*
+ * Given an object id, return it's node number (aka cnode).
+ * arg	object id
+ * sz	8
+ * p	pointer to u64 integer
+ */
+#define SN_HWPERF_GET_OBJ_NODE		(101|SN_HWPERF_OP_MEM_COPYOUT)
+
+/*
+ * Given a node number (cnode), return it's nasid.
+ * arg	ordinal node number (aka cnodeid)
+ * sz	8
+ * p	pointer to u64 integer
+ */
+#define SN_HWPERF_GET_NODE_NASID	(102|SN_HWPERF_OP_MEM_COPYOUT)
+
+/*
+ * Given a node id, determine the id of the nearest node with CPUs
+ * and the id of the nearest node that has memory. The argument
+ * node would normally be a "headless" node, e.g. an "IO node".
+ * Return 0 on success.
+ */
+extern int sn_hwperf_get_nearest_node(cnodeid_t node,
+	cnodeid_t *near_mem, cnodeid_t *near_cpu);
+
+/* return codes */
+#define SN_HWPERF_OP_OK			0
+#define SN_HWPERF_OP_NOMEM		1
+#define SN_HWPERF_OP_NO_PERM		2
+#define SN_HWPERF_OP_IO_ERROR		3
+#define SN_HWPERF_OP_BUSY		4
+#define SN_HWPERF_OP_RECONFIGURE	253
+#define SN_HWPERF_OP_INVAL		254
+
+int sn_topology_open(struct inode *inode, struct file *file);
+int sn_topology_release(struct inode *inode, struct file *file);
+#endif				/* SN_HWPERF_H */
diff --git a/arch/ia64/include/asm/sn/sn_cpuid.h b/arch/ia64/include/asm/sn/sn_cpuid.h
new file mode 100644
index 00000000..a676dd9a
--- /dev/null
+++ b/arch/ia64/include/asm/sn/sn_cpuid.h
@@ -0,0 +1,132 @@
+/* 
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+
+#ifndef _ASM_IA64_SN_SN_CPUID_H
+#define _ASM_IA64_SN_SN_CPUID_H
+
+#include <linux/smp.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/pda.h>
+#include <asm/intrinsics.h>
+
+
+/*
+ * Functions for converting between cpuids, nodeids and NASIDs.
+ * 
+ * These are for SGI platforms only.
+ *
+ */
+
+
+
+
+/*
+ *  Definitions of terms (these definitions are for IA64 ONLY. Other architectures
+ *  use cpuid/cpunum quite defferently):
+ *
+ *	   CPUID - a number in range of 0..NR_CPUS-1 that uniquely identifies
+ *		the cpu. The value cpuid has no significance on IA64 other than
+ *		the boot cpu is 0.
+ *			smp_processor_id() returns the cpuid of the current cpu.
+ *
+ * 	   CPU_PHYSICAL_ID (also known as HARD_PROCESSOR_ID)
+ *		This is the same as 31:24 of the processor LID register
+ *			hard_smp_processor_id()- cpu_physical_id of current processor
+ *			cpu_physical_id(cpuid) - convert a <cpuid> to a <physical_cpuid>
+ *			cpu_logical_id(phy_id) - convert a <physical_cpuid> to a <cpuid> 
+ *				* not real efficient - don't use in perf critical code
+ *
+ *         SLICE - a number in the range of 0 - 3 (typically) that represents the
+ *		cpu number on a brick.
+ *
+ *	   SUBNODE - (almost obsolete) the number of the FSB that a cpu is
+ *		connected to. This is also the same as the PI number. Usually 0 or 1.
+ *
+ *	NOTE!!!: the value of the bits in the cpu physical id (SAPICid or LID) of a cpu has no 
+ *	significance. The SAPIC id (LID) is a 16-bit cookie that has meaning only to the PROM.
+ *
+ *
+ * The macros convert between cpu physical ids & slice/nasid/cnodeid.
+ * These terms are described below:
+ *
+ *
+ * Brick
+ *          -----   -----           -----   -----       CPU
+ *          | 0 |   | 1 |           | 0 |   | 1 |       SLICE
+ *          -----   -----           -----   -----
+ *            |       |               |       |
+ *            |       |               |       |
+ *          0 |       | 2           0 |       | 2       FSB SLOT
+ *             -------                 -------  
+ *                |                       |
+ *                |                       |
+ *                |                       |
+ *             ------------      -------------
+ *             |          |      |           |
+ *             |    SHUB  |      |   SHUB    |        NASID   (0..MAX_NASIDS)
+ *             |          |----- |           |        CNODEID (0..num_compact_nodes-1)
+ *             |          |      |           |
+ *             |          |      |           |
+ *             ------------      -------------
+ *                   |                 |
+ *                           
+ *
+ */
+
+#define get_node_number(addr)			NASID_GET(addr)
+
+/*
+ * NOTE: on non-MP systems, only cpuid 0 exists
+ */
+
+extern short physical_node_map[];	/* indexed by nasid to get cnode */
+
+/*
+ * Macros for retrieving info about current cpu
+ */
+#define get_nasid()	(sn_nodepda->phys_cpuid[smp_processor_id()].nasid)
+#define get_subnode()	(sn_nodepda->phys_cpuid[smp_processor_id()].subnode)
+#define get_slice()	(sn_nodepda->phys_cpuid[smp_processor_id()].slice)
+#define get_cnode()	(sn_nodepda->phys_cpuid[smp_processor_id()].cnode)
+#define get_sapicid()	((ia64_getreg(_IA64_REG_CR_LID) >> 16) & 0xffff)
+
+/*
+ * Macros for retrieving info about an arbitrary cpu
+ *	cpuid - logical cpu id
+ */
+#define cpuid_to_nasid(cpuid)		(sn_nodepda->phys_cpuid[cpuid].nasid)
+#define cpuid_to_subnode(cpuid)		(sn_nodepda->phys_cpuid[cpuid].subnode)
+#define cpuid_to_slice(cpuid)		(sn_nodepda->phys_cpuid[cpuid].slice)
+
+
+/*
+ * Dont use the following in performance critical code. They require scans
+ * of potentially large tables.
+ */
+extern int nasid_slice_to_cpuid(int, int);
+
+/*
+ * cnodeid_to_nasid - convert a cnodeid to a NASID
+ */
+#define cnodeid_to_nasid(cnodeid)	(sn_cnodeid_to_nasid[cnodeid])
+ 
+/*
+ * nasid_to_cnodeid - convert a NASID to a cnodeid
+ */
+#define nasid_to_cnodeid(nasid)		(physical_node_map[nasid])
+
+/*
+ * partition_coherence_id - get the coherence ID of the current partition
+ */
+extern u8 sn_coherency_id;
+#define partition_coherence_id()	(sn_coherency_id)
+
+#endif /* _ASM_IA64_SN_SN_CPUID_H */
+
diff --git a/arch/ia64/include/asm/sn/sn_feature_sets.h b/arch/ia64/include/asm/sn/sn_feature_sets.h
new file mode 100644
index 00000000..8e83ac11
--- /dev/null
+++ b/arch/ia64/include/asm/sn/sn_feature_sets.h
@@ -0,0 +1,58 @@
+#ifndef _ASM_IA64_SN_FEATURE_SETS_H
+#define _ASM_IA64_SN_FEATURE_SETS_H
+
+/*
+ * SN PROM Features
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2005-2006 Silicon Graphics, Inc.  All rights reserved.
+ */
+
+
+/* --------------------- PROM Features -----------------------------*/
+extern int sn_prom_feature_available(int id);
+
+#define MAX_PROM_FEATURE_SETS			2
+
+/*
+ * The following defines features that may or may not be supported by the
+ * current PROM. The OS uses sn_prom_feature_available(feature) to test for
+ * the presence of a PROM feature. Down rev (old) PROMs will always test
+ * "false" for new features.
+ *
+ * Use:
+ * 		if (sn_prom_feature_available(PRF_XXX))
+ * 			...
+ */
+
+#define PRF_PAL_CACHE_FLUSH_SAFE	0
+#define PRF_DEVICE_FLUSH_LIST		1
+#define PRF_HOTPLUG_SUPPORT		2
+#define PRF_CPU_DISABLE_SUPPORT		3
+
+/* --------------------- OS Features -------------------------------*/
+
+/*
+ * The following defines OS features that are optionally present in
+ * the operating system.
+ * During boot, PROM is notified of these features via a series of calls:
+ *
+ * 		ia64_sn_set_os_feature(feature1);
+ *
+ * Once enabled, a feature cannot be disabled.
+ *
+ * By default, features are disabled unless explicitly enabled.
+ *
+ * These defines must be kept in sync with the corresponding
+ * PROM definitions in feature_sets.h.
+ */
+#define  OSF_MCA_SLV_TO_OS_INIT_SLV	0
+#define  OSF_FEAT_LOG_SBES		1
+#define  OSF_ACPI_ENABLE		2
+#define  OSF_PCISEGMENT_ENABLE		3
+
+
+#endif /* _ASM_IA64_SN_FEATURE_SETS_H */
diff --git a/arch/ia64/include/asm/sn/sn_sal.h b/arch/ia64/include/asm/sn/sn_sal.h
new file mode 100644
index 00000000..1f5ff470
--- /dev/null
+++ b/arch/ia64/include/asm/sn/sn_sal.h
@@ -0,0 +1,1233 @@
+#ifndef _ASM_IA64_SN_SN_SAL_H
+#define _ASM_IA64_SN_SN_SAL_H
+
+/*
+ * System Abstraction Layer definitions for IA64
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.  All rights reserved.
+ */
+
+
+#include <asm/sal.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/arch.h>
+#include <asm/sn/geo.h>
+#include <asm/sn/nodepda.h>
+#include <asm/sn/shub_mmr.h>
+
+// SGI Specific Calls
+#define  SN_SAL_POD_MODE                           0x02000001
+#define  SN_SAL_SYSTEM_RESET                       0x02000002
+#define  SN_SAL_PROBE                              0x02000003
+#define  SN_SAL_GET_MASTER_NASID                   0x02000004
+#define	 SN_SAL_GET_KLCONFIG_ADDR		   0x02000005
+#define  SN_SAL_LOG_CE				   0x02000006
+#define  SN_SAL_REGISTER_CE			   0x02000007
+#define  SN_SAL_GET_PARTITION_ADDR		   0x02000009
+#define  SN_SAL_XP_ADDR_REGION			   0x0200000f
+#define  SN_SAL_NO_FAULT_ZONE_VIRTUAL		   0x02000010
+#define  SN_SAL_NO_FAULT_ZONE_PHYSICAL		   0x02000011
+#define  SN_SAL_PRINT_ERROR			   0x02000012
+#define  SN_SAL_REGISTER_PMI_HANDLER		   0x02000014
+#define  SN_SAL_SET_ERROR_HANDLING_FEATURES	   0x0200001a	// reentrant
+#define  SN_SAL_GET_FIT_COMPT			   0x0200001b	// reentrant
+#define  SN_SAL_GET_SAPIC_INFO                     0x0200001d
+#define  SN_SAL_GET_SN_INFO                        0x0200001e
+#define  SN_SAL_CONSOLE_PUTC                       0x02000021
+#define  SN_SAL_CONSOLE_GETC                       0x02000022
+#define  SN_SAL_CONSOLE_PUTS                       0x02000023
+#define  SN_SAL_CONSOLE_GETS                       0x02000024
+#define  SN_SAL_CONSOLE_GETS_TIMEOUT               0x02000025
+#define  SN_SAL_CONSOLE_POLL                       0x02000026
+#define  SN_SAL_CONSOLE_INTR                       0x02000027
+#define  SN_SAL_CONSOLE_PUTB			   0x02000028
+#define  SN_SAL_CONSOLE_XMIT_CHARS		   0x0200002a
+#define  SN_SAL_CONSOLE_READC			   0x0200002b
+#define  SN_SAL_SYSCTL_OP			   0x02000030
+#define  SN_SAL_SYSCTL_MODID_GET	           0x02000031
+#define  SN_SAL_SYSCTL_GET                         0x02000032
+#define  SN_SAL_SYSCTL_IOBRICK_MODULE_GET          0x02000033
+#define  SN_SAL_SYSCTL_IO_PORTSPEED_GET            0x02000035
+#define  SN_SAL_SYSCTL_SLAB_GET                    0x02000036
+#define  SN_SAL_BUS_CONFIG		   	   0x02000037
+#define  SN_SAL_SYS_SERIAL_GET			   0x02000038
+#define  SN_SAL_PARTITION_SERIAL_GET		   0x02000039
+#define  SN_SAL_SYSCTL_PARTITION_GET               0x0200003a
+#define  SN_SAL_SYSTEM_POWER_DOWN		   0x0200003b
+#define  SN_SAL_GET_MASTER_BASEIO_NASID		   0x0200003c
+#define  SN_SAL_COHERENCE                          0x0200003d
+#define  SN_SAL_MEMPROTECT                         0x0200003e
+#define  SN_SAL_SYSCTL_FRU_CAPTURE		   0x0200003f
+
+#define  SN_SAL_SYSCTL_IOBRICK_PCI_OP		   0x02000042	// reentrant
+#define	 SN_SAL_IROUTER_OP			   0x02000043
+#define  SN_SAL_SYSCTL_EVENT                       0x02000044
+#define  SN_SAL_IOIF_INTERRUPT			   0x0200004a
+#define  SN_SAL_HWPERF_OP			   0x02000050   // lock
+#define  SN_SAL_IOIF_ERROR_INTERRUPT		   0x02000051
+#define  SN_SAL_IOIF_PCI_SAFE			   0x02000052
+#define  SN_SAL_IOIF_SLOT_ENABLE		   0x02000053
+#define  SN_SAL_IOIF_SLOT_DISABLE		   0x02000054
+#define  SN_SAL_IOIF_GET_HUBDEV_INFO		   0x02000055
+#define  SN_SAL_IOIF_GET_PCIBUS_INFO		   0x02000056
+#define  SN_SAL_IOIF_GET_PCIDEV_INFO		   0x02000057
+#define  SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST	   0x02000058	// deprecated
+#define  SN_SAL_IOIF_GET_DEVICE_DMAFLUSH_LIST	   0x0200005a
+
+#define SN_SAL_IOIF_INIT			   0x0200005f
+#define SN_SAL_HUB_ERROR_INTERRUPT		   0x02000060
+#define SN_SAL_BTE_RECOVER			   0x02000061
+#define SN_SAL_RESERVED_DO_NOT_USE		   0x02000062
+#define SN_SAL_IOIF_GET_PCI_TOPOLOGY		   0x02000064
+
+#define  SN_SAL_GET_PROM_FEATURE_SET		   0x02000065
+#define  SN_SAL_SET_OS_FEATURE_SET		   0x02000066
+#define  SN_SAL_INJECT_ERROR			   0x02000067
+#define  SN_SAL_SET_CPU_NUMBER			   0x02000068
+
+#define  SN_SAL_KERNEL_LAUNCH_EVENT		   0x02000069
+#define  SN_SAL_WATCHLIST_ALLOC			   0x02000070
+#define  SN_SAL_WATCHLIST_FREE			   0x02000071
+
+/*
+ * Service-specific constants
+ */
+
+/* Console interrupt manipulation */
+	/* action codes */
+#define SAL_CONSOLE_INTR_OFF    0       /* turn the interrupt off */
+#define SAL_CONSOLE_INTR_ON     1       /* turn the interrupt on */
+#define SAL_CONSOLE_INTR_STATUS 2	/* retrieve the interrupt status */
+	/* interrupt specification & status return codes */
+#define SAL_CONSOLE_INTR_XMIT	1	/* output interrupt */
+#define SAL_CONSOLE_INTR_RECV	2	/* input interrupt */
+
+/* interrupt handling */
+#define SAL_INTR_ALLOC		1
+#define SAL_INTR_FREE		2
+#define SAL_INTR_REDIRECT	3
+
+/*
+ * operations available on the generic SN_SAL_SYSCTL_OP
+ * runtime service
+ */
+#define SAL_SYSCTL_OP_IOBOARD		0x0001  /*  retrieve board type */
+#define SAL_SYSCTL_OP_TIO_JLCK_RST      0x0002  /* issue TIO clock reset */
+
+/*
+ * IRouter (i.e. generalized system controller) operations
+ */
+#define SAL_IROUTER_OPEN	0	/* open a subchannel */
+#define SAL_IROUTER_CLOSE	1	/* close a subchannel */
+#define SAL_IROUTER_SEND	2	/* send part of an IRouter packet */
+#define SAL_IROUTER_RECV	3	/* receive part of an IRouter packet */
+#define SAL_IROUTER_INTR_STATUS	4	/* check the interrupt status for
+					 * an open subchannel
+					 */
+#define SAL_IROUTER_INTR_ON	5	/* enable an interrupt */
+#define SAL_IROUTER_INTR_OFF	6	/* disable an interrupt */
+#define SAL_IROUTER_INIT	7	/* initialize IRouter driver */
+
+/* IRouter interrupt mask bits */
+#define SAL_IROUTER_INTR_XMIT	SAL_CONSOLE_INTR_XMIT
+#define SAL_IROUTER_INTR_RECV	SAL_CONSOLE_INTR_RECV
+
+/*
+ * Error Handling Features
+ */
+#define SAL_ERR_FEAT_MCA_SLV_TO_OS_INIT_SLV	0x1	// obsolete
+#define SAL_ERR_FEAT_LOG_SBES			0x2	// obsolete
+#define SAL_ERR_FEAT_MFR_OVERRIDE		0x4
+#define SAL_ERR_FEAT_SBE_THRESHOLD		0xffff0000
+
+/*
+ * SAL Error Codes
+ */
+#define SALRET_MORE_PASSES	1
+#define SALRET_OK		0
+#define SALRET_NOT_IMPLEMENTED	(-1)
+#define SALRET_INVALID_ARG	(-2)
+#define SALRET_ERROR		(-3)
+
+#define SN_SAL_FAKE_PROM			   0x02009999
+
+/**
+  * sn_sal_revision - get the SGI SAL revision number
+  *
+  * The SGI PROM stores its version in the sal_[ab]_rev_(major|minor).
+  * This routine simply extracts the major and minor values and
+  * presents them in a u32 format.
+  *
+  * For example, version 4.05 would be represented at 0x0405.
+  */
+static inline u32
+sn_sal_rev(void)
+{
+	struct ia64_sal_systab *systab = __va(efi.sal_systab);
+
+	return (u32)(systab->sal_b_rev_major << 8 | systab->sal_b_rev_minor);
+}
+
+/*
+ * Returns the master console nasid, if the call fails, return an illegal
+ * value.
+ */
+static inline u64
+ia64_sn_get_console_nasid(void)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL(ret_stuff, SN_SAL_GET_MASTER_NASID, 0, 0, 0, 0, 0, 0, 0);
+
+	if (ret_stuff.status < 0)
+		return ret_stuff.status;
+
+	/* Master console nasid is in 'v0' */
+	return ret_stuff.v0;
+}
+
+/*
+ * Returns the master baseio nasid, if the call fails, return an illegal
+ * value.
+ */
+static inline u64
+ia64_sn_get_master_baseio_nasid(void)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL(ret_stuff, SN_SAL_GET_MASTER_BASEIO_NASID, 0, 0, 0, 0, 0, 0, 0);
+
+	if (ret_stuff.status < 0)
+		return ret_stuff.status;
+
+	/* Master baseio nasid is in 'v0' */
+	return ret_stuff.v0;
+}
+
+static inline void *
+ia64_sn_get_klconfig_addr(nasid_t nasid)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL(ret_stuff, SN_SAL_GET_KLCONFIG_ADDR, (u64)nasid, 0, 0, 0, 0, 0, 0);
+	return ret_stuff.v0 ? __va(ret_stuff.v0) : NULL;
+}
+
+/*
+ * Returns the next console character.
+ */
+static inline u64
+ia64_sn_console_getc(int *ch)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_GETC, 0, 0, 0, 0, 0, 0, 0);
+
+	/* character is in 'v0' */
+	*ch = (int)ret_stuff.v0;
+
+	return ret_stuff.status;
+}
+
+/*
+ * Read a character from the SAL console device, after a previous interrupt
+ * or poll operation has given us to know that a character is available
+ * to be read.
+ */
+static inline u64
+ia64_sn_console_readc(void)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_READC, 0, 0, 0, 0, 0, 0, 0);
+
+	/* character is in 'v0' */
+	return ret_stuff.v0;
+}
+
+/*
+ * Sends the given character to the console.
+ */
+static inline u64
+ia64_sn_console_putc(char ch)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_PUTC, (u64)ch, 0, 0, 0, 0, 0, 0);
+
+	return ret_stuff.status;
+}
+
+/*
+ * Sends the given buffer to the console.
+ */
+static inline u64
+ia64_sn_console_putb(const char *buf, int len)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0; 
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_PUTB, (u64)buf, (u64)len, 0, 0, 0, 0, 0);
+
+	if ( ret_stuff.status == 0 ) {
+		return ret_stuff.v0;
+	}
+	return (u64)0;
+}
+
+/*
+ * Print a platform error record
+ */
+static inline u64
+ia64_sn_plat_specific_err_print(int (*hook)(const char*, ...), char *rec)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_REENTRANT(ret_stuff, SN_SAL_PRINT_ERROR, (u64)hook, (u64)rec, 0, 0, 0, 0, 0);
+
+	return ret_stuff.status;
+}
+
+/*
+ * Check for Platform errors
+ */
+static inline u64
+ia64_sn_plat_cpei_handler(void)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_LOG_CE, 0, 0, 0, 0, 0, 0, 0);
+
+	return ret_stuff.status;
+}
+
+/*
+ * Set Error Handling Features	(Obsolete)
+ */
+static inline u64
+ia64_sn_plat_set_error_handling_features(void)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_REENTRANT(ret_stuff, SN_SAL_SET_ERROR_HANDLING_FEATURES,
+		SAL_ERR_FEAT_LOG_SBES,
+		0, 0, 0, 0, 0, 0);
+
+	return ret_stuff.status;
+}
+
+/*
+ * Checks for console input.
+ */
+static inline u64
+ia64_sn_console_check(int *result)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_POLL, 0, 0, 0, 0, 0, 0, 0);
+
+	/* result is in 'v0' */
+	*result = (int)ret_stuff.v0;
+
+	return ret_stuff.status;
+}
+
+/*
+ * Checks console interrupt status
+ */
+static inline u64
+ia64_sn_console_intr_status(void)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_INTR, 
+		 0, SAL_CONSOLE_INTR_STATUS,
+		 0, 0, 0, 0, 0);
+
+	if (ret_stuff.status == 0) {
+	    return ret_stuff.v0;
+	}
+	
+	return 0;
+}
+
+/*
+ * Enable an interrupt on the SAL console device.
+ */
+static inline void
+ia64_sn_console_intr_enable(u64 intr)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_INTR, 
+		 intr, SAL_CONSOLE_INTR_ON,
+		 0, 0, 0, 0, 0);
+}
+
+/*
+ * Disable an interrupt on the SAL console device.
+ */
+static inline void
+ia64_sn_console_intr_disable(u64 intr)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_INTR, 
+		 intr, SAL_CONSOLE_INTR_OFF,
+		 0, 0, 0, 0, 0);
+}
+
+/*
+ * Sends a character buffer to the console asynchronously.
+ */
+static inline u64
+ia64_sn_console_xmit_chars(char *buf, int len)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_XMIT_CHARS,
+		 (u64)buf, (u64)len,
+		 0, 0, 0, 0, 0);
+
+	if (ret_stuff.status == 0) {
+	    return ret_stuff.v0;
+	}
+
+	return 0;
+}
+
+/*
+ * Returns the iobrick module Id
+ */
+static inline u64
+ia64_sn_sysctl_iobrick_module_get(nasid_t nasid, int *result)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_SYSCTL_IOBRICK_MODULE_GET, nasid, 0, 0, 0, 0, 0, 0);
+
+	/* result is in 'v0' */
+	*result = (int)ret_stuff.v0;
+
+	return ret_stuff.status;
+}
+
+/**
+ * ia64_sn_pod_mode - call the SN_SAL_POD_MODE function
+ *
+ * SN_SAL_POD_MODE actually takes an argument, but it's always
+ * 0 when we call it from the kernel, so we don't have to expose
+ * it to the caller.
+ */
+static inline u64
+ia64_sn_pod_mode(void)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL_REENTRANT(isrv, SN_SAL_POD_MODE, 0, 0, 0, 0, 0, 0, 0);
+	if (isrv.status)
+		return 0;
+	return isrv.v0;
+}
+
+/**
+ * ia64_sn_probe_mem - read from memory safely
+ * @addr: address to probe
+ * @size: number bytes to read (1,2,4,8)
+ * @data_ptr: address to store value read by probe (-1 returned if probe fails)
+ *
+ * Call into the SAL to do a memory read.  If the read generates a machine
+ * check, this routine will recover gracefully and return -1 to the caller.
+ * @addr is usually a kernel virtual address in uncached space (i.e. the
+ * address starts with 0xc), but if called in physical mode, @addr should
+ * be a physical address.
+ *
+ * Return values:
+ *  0 - probe successful
+ *  1 - probe failed (generated MCA)
+ *  2 - Bad arg
+ * <0 - PAL error
+ */
+static inline u64
+ia64_sn_probe_mem(long addr, long size, void *data_ptr)
+{
+	struct ia64_sal_retval isrv;
+
+	SAL_CALL(isrv, SN_SAL_PROBE, addr, size, 0, 0, 0, 0, 0);
+
+	if (data_ptr) {
+		switch (size) {
+		case 1:
+			*((u8*)data_ptr) = (u8)isrv.v0;
+			break;
+		case 2:
+			*((u16*)data_ptr) = (u16)isrv.v0;
+			break;
+		case 4:
+			*((u32*)data_ptr) = (u32)isrv.v0;
+			break;
+		case 8:
+			*((u64*)data_ptr) = (u64)isrv.v0;
+			break;
+		default:
+			isrv.status = 2;
+		}
+	}
+	return isrv.status;
+}
+
+/*
+ * Retrieve the system serial number as an ASCII string.
+ */
+static inline u64
+ia64_sn_sys_serial_get(char *buf)
+{
+	struct ia64_sal_retval ret_stuff;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_SYS_SERIAL_GET, buf, 0, 0, 0, 0, 0, 0);
+	return ret_stuff.status;
+}
+
+extern char sn_system_serial_number_string[];
+extern u64 sn_partition_serial_number;
+
+static inline char *
+sn_system_serial_number(void) {
+	if (sn_system_serial_number_string[0]) {
+		return(sn_system_serial_number_string);
+	} else {
+		ia64_sn_sys_serial_get(sn_system_serial_number_string);
+		return(sn_system_serial_number_string);
+	}
+}
+	
+
+/*
+ * Returns a unique id number for this system and partition (suitable for
+ * use with license managers), based in part on the system serial number.
+ */
+static inline u64
+ia64_sn_partition_serial_get(void)
+{
+	struct ia64_sal_retval ret_stuff;
+	ia64_sal_oemcall_reentrant(&ret_stuff, SN_SAL_PARTITION_SERIAL_GET, 0,
+				   0, 0, 0, 0, 0, 0);
+	if (ret_stuff.status != 0)
+	    return 0;
+	return ret_stuff.v0;
+}
+
+static inline u64
+sn_partition_serial_number_val(void) {
+	if (unlikely(sn_partition_serial_number == 0)) {
+		sn_partition_serial_number = ia64_sn_partition_serial_get();
+	}
+	return sn_partition_serial_number;
+}
+
+/*
+ * Returns the partition id of the nasid passed in as an argument,
+ * or INVALID_PARTID if the partition id cannot be retrieved.
+ */
+static inline partid_t
+ia64_sn_sysctl_partition_get(nasid_t nasid)
+{
+	struct ia64_sal_retval ret_stuff;
+	SAL_CALL(ret_stuff, SN_SAL_SYSCTL_PARTITION_GET, nasid,
+		0, 0, 0, 0, 0, 0);
+	if (ret_stuff.status != 0)
+	    return -1;
+	return ((partid_t)ret_stuff.v0);
+}
+
+/*
+ * Returns the physical address of the partition's reserved page through
+ * an iterative number of calls.
+ *
+ * On first call, 'cookie' and 'len' should be set to 0, and 'addr'
+ * set to the nasid of the partition whose reserved page's address is
+ * being sought.
+ * On subsequent calls, pass the values, that were passed back on the
+ * previous call.
+ *
+ * While the return status equals SALRET_MORE_PASSES, keep calling
+ * this function after first copying 'len' bytes starting at 'addr'
+ * into 'buf'. Once the return status equals SALRET_OK, 'addr' will
+ * be the physical address of the partition's reserved page. If the
+ * return status equals neither of these, an error as occurred.
+ */
+static inline s64
+sn_partition_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len)
+{
+	struct ia64_sal_retval rv;
+	ia64_sal_oemcall_reentrant(&rv, SN_SAL_GET_PARTITION_ADDR, *cookie,
+				   *addr, buf, *len, 0, 0, 0);
+	*cookie = rv.v0;
+	*addr = rv.v1;
+	*len = rv.v2;
+	return rv.status;
+}
+
+/*
+ * Register or unregister a physical address range being referenced across
+ * a partition boundary for which certain SAL errors should be scanned for,
+ * cleaned up and ignored.  This is of value for kernel partitioning code only.
+ * Values for the operation argument:
+ *	1 = register this address range with SAL
+ *	0 = unregister this address range with SAL
+ * 
+ * SAL maintains a reference count on an address range in case it is registered
+ * multiple times.
+ * 
+ * On success, returns the reference count of the address range after the SAL
+ * call has performed the current registration/unregistration.  Returns a
+ * negative value if an error occurred.
+ */
+static inline int
+sn_register_xp_addr_region(u64 paddr, u64 len, int operation)
+{
+	struct ia64_sal_retval ret_stuff;
+	ia64_sal_oemcall(&ret_stuff, SN_SAL_XP_ADDR_REGION, paddr, len,
+			 (u64)operation, 0, 0, 0, 0);
+	return ret_stuff.status;
+}
+
+/*
+ * Register or unregister an instruction range for which SAL errors should
+ * be ignored.  If an error occurs while in the registered range, SAL jumps
+ * to return_addr after ignoring the error.  Values for the operation argument:
+ *	1 = register this instruction range with SAL
+ *	0 = unregister this instruction range with SAL
+ *
+ * Returns 0 on success, or a negative value if an error occurred.
+ */
+static inline int
+sn_register_nofault_code(u64 start_addr, u64 end_addr, u64 return_addr,
+			 int virtual, int operation)
+{
+	struct ia64_sal_retval ret_stuff;
+	u64 call;
+	if (virtual) {
+		call = SN_SAL_NO_FAULT_ZONE_VIRTUAL;
+	} else {
+		call = SN_SAL_NO_FAULT_ZONE_PHYSICAL;
+	}
+	ia64_sal_oemcall(&ret_stuff, call, start_addr, end_addr, return_addr,
+			 (u64)1, 0, 0, 0);
+	return ret_stuff.status;
+}
+
+/*
+ * Register or unregister a function to handle a PMI received by a CPU.
+ * Before calling the registered handler, SAL sets r1 to the value that
+ * was passed in as the global_pointer.
+ *
+ * If the handler pointer is NULL, then the currently registered handler
+ * will be unregistered.
+ *
+ * Returns 0 on success, or a negative value if an error occurred.
+ */
+static inline int
+sn_register_pmi_handler(u64 handler, u64 global_pointer)
+{
+	struct ia64_sal_retval ret_stuff;
+	ia64_sal_oemcall(&ret_stuff, SN_SAL_REGISTER_PMI_HANDLER, handler,
+			 global_pointer, 0, 0, 0, 0, 0);
+	return ret_stuff.status;
+}
+
+/*
+ * Change or query the coherence domain for this partition. Each cpu-based
+ * nasid is represented by a bit in an array of 64-bit words:
+ *      0 = not in this partition's coherency domain
+ *      1 = in this partition's coherency domain
+ *
+ * It is not possible for the local system's nasids to be removed from
+ * the coherency domain.  Purpose of the domain arguments:
+ *      new_domain = set the coherence domain to the given nasids
+ *      old_domain = return the current coherence domain
+ *
+ * Returns 0 on success, or a negative value if an error occurred.
+ */
+static inline int
+sn_change_coherence(u64 *new_domain, u64 *old_domain)
+{
+	struct ia64_sal_retval ret_stuff;
+	ia64_sal_oemcall_nolock(&ret_stuff, SN_SAL_COHERENCE, (u64)new_domain,
+				(u64)old_domain, 0, 0, 0, 0, 0);
+	return ret_stuff.status;
+}
+
+/*
+ * Change memory access protections for a physical address range.
+ * nasid_array is not used on Altix, but may be in future architectures.
+ * Available memory protection access classes are defined after the function.
+ */
+static inline int
+sn_change_memprotect(u64 paddr, u64 len, u64 perms, u64 *nasid_array)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ia64_sal_oemcall_nolock(&ret_stuff, SN_SAL_MEMPROTECT, paddr, len,
+				(u64)nasid_array, perms, 0, 0, 0);
+	return ret_stuff.status;
+}
+#define SN_MEMPROT_ACCESS_CLASS_0		0x14a080
+#define SN_MEMPROT_ACCESS_CLASS_1		0x2520c2
+#define SN_MEMPROT_ACCESS_CLASS_2		0x14a1ca
+#define SN_MEMPROT_ACCESS_CLASS_3		0x14a290
+#define SN_MEMPROT_ACCESS_CLASS_6		0x084080
+#define SN_MEMPROT_ACCESS_CLASS_7		0x021080
+
+/*
+ * Turns off system power.
+ */
+static inline void
+ia64_sn_power_down(void)
+{
+	struct ia64_sal_retval ret_stuff;
+	SAL_CALL(ret_stuff, SN_SAL_SYSTEM_POWER_DOWN, 0, 0, 0, 0, 0, 0, 0);
+	while(1)
+		cpu_relax();
+	/* never returns */
+}
+
+/**
+ * ia64_sn_fru_capture - tell the system controller to capture hw state
+ *
+ * This routine will call the SAL which will tell the system controller(s)
+ * to capture hw mmr information from each SHub in the system.
+ */
+static inline u64
+ia64_sn_fru_capture(void)
+{
+        struct ia64_sal_retval isrv;
+        SAL_CALL(isrv, SN_SAL_SYSCTL_FRU_CAPTURE, 0, 0, 0, 0, 0, 0, 0);
+        if (isrv.status)
+                return 0;
+        return isrv.v0;
+}
+
+/*
+ * Performs an operation on a PCI bus or slot -- power up, power down
+ * or reset.
+ */
+static inline u64
+ia64_sn_sysctl_iobrick_pci_op(nasid_t n, u64 connection_type, 
+			      u64 bus, char slot, 
+			      u64 action)
+{
+	struct ia64_sal_retval rv = {0, 0, 0, 0};
+
+	SAL_CALL_NOLOCK(rv, SN_SAL_SYSCTL_IOBRICK_PCI_OP, connection_type, n, action,
+		 bus, (u64) slot, 0, 0);
+	if (rv.status)
+	    	return rv.v0;
+	return 0;
+}
+
+
+/*
+ * Open a subchannel for sending arbitrary data to the system
+ * controller network via the system controller device associated with
+ * 'nasid'.  Return the subchannel number or a negative error code.
+ */
+static inline int
+ia64_sn_irtr_open(nasid_t nasid)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_OPEN, nasid,
+			   0, 0, 0, 0, 0);
+	return (int) rv.v0;
+}
+
+/*
+ * Close system controller subchannel 'subch' previously opened on 'nasid'.
+ */
+static inline int
+ia64_sn_irtr_close(nasid_t nasid, int subch)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_CLOSE,
+			   (u64) nasid, (u64) subch, 0, 0, 0, 0);
+	return (int) rv.status;
+}
+
+/*
+ * Read data from system controller associated with 'nasid' on
+ * subchannel 'subch'.  The buffer to be filled is pointed to by
+ * 'buf', and its capacity is in the integer pointed to by 'len'.  The
+ * referent of 'len' is set to the number of bytes read by the SAL
+ * call.  The return value is either SALRET_OK (for bytes read) or
+ * SALRET_ERROR (for error or "no data available").
+ */
+static inline int
+ia64_sn_irtr_recv(nasid_t nasid, int subch, char *buf, int *len)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_RECV,
+			   (u64) nasid, (u64) subch, (u64) buf, (u64) len,
+			   0, 0);
+	return (int) rv.status;
+}
+
+/*
+ * Write data to the system controller network via the system
+ * controller associated with 'nasid' on suchannel 'subch'.  The
+ * buffer to be written out is pointed to by 'buf', and 'len' is the
+ * number of bytes to be written.  The return value is either the
+ * number of bytes written (which could be zero) or a negative error
+ * code.
+ */
+static inline int
+ia64_sn_irtr_send(nasid_t nasid, int subch, char *buf, int len)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_SEND,
+			   (u64) nasid, (u64) subch, (u64) buf, (u64) len,
+			   0, 0);
+	return (int) rv.v0;
+}
+
+/*
+ * Check whether any interrupts are pending for the system controller
+ * associated with 'nasid' and its subchannel 'subch'.  The return
+ * value is a mask of pending interrupts (SAL_IROUTER_INTR_XMIT and/or
+ * SAL_IROUTER_INTR_RECV).
+ */
+static inline int
+ia64_sn_irtr_intr(nasid_t nasid, int subch)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_INTR_STATUS,
+			   (u64) nasid, (u64) subch, 0, 0, 0, 0);
+	return (int) rv.v0;
+}
+
+/*
+ * Enable the interrupt indicated by the intr parameter (either
+ * SAL_IROUTER_INTR_XMIT or SAL_IROUTER_INTR_RECV).
+ */
+static inline int
+ia64_sn_irtr_intr_enable(nasid_t nasid, int subch, u64 intr)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_INTR_ON,
+			   (u64) nasid, (u64) subch, intr, 0, 0, 0);
+	return (int) rv.v0;
+}
+
+/*
+ * Disable the interrupt indicated by the intr parameter (either
+ * SAL_IROUTER_INTR_XMIT or SAL_IROUTER_INTR_RECV).
+ */
+static inline int
+ia64_sn_irtr_intr_disable(nasid_t nasid, int subch, u64 intr)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_INTR_OFF,
+			   (u64) nasid, (u64) subch, intr, 0, 0, 0);
+	return (int) rv.v0;
+}
+
+/*
+ * Set up a node as the point of contact for system controller
+ * environmental event delivery.
+ */
+static inline int
+ia64_sn_sysctl_event_init(nasid_t nasid)
+{
+        struct ia64_sal_retval rv;
+        SAL_CALL_REENTRANT(rv, SN_SAL_SYSCTL_EVENT, (u64) nasid,
+			   0, 0, 0, 0, 0, 0);
+        return (int) rv.v0;
+}
+
+/*
+ * Ask the system controller on the specified nasid to reset
+ * the CX corelet clock.  Only valid on TIO nodes.
+ */
+static inline int
+ia64_sn_sysctl_tio_clock_reset(nasid_t nasid)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_REENTRANT(rv, SN_SAL_SYSCTL_OP, SAL_SYSCTL_OP_TIO_JLCK_RST,
+			nasid, 0, 0, 0, 0, 0);
+	if (rv.status != 0)
+		return (int)rv.status;
+	if (rv.v0 != 0)
+		return (int)rv.v0;
+
+	return 0;
+}
+
+/*
+ * Get the associated ioboard type for a given nasid.
+ */
+static inline long
+ia64_sn_sysctl_ioboard_get(nasid_t nasid, u16 *ioboard)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL_REENTRANT(isrv, SN_SAL_SYSCTL_OP, SAL_SYSCTL_OP_IOBOARD,
+			   nasid, 0, 0, 0, 0, 0);
+	if (isrv.v0 != 0) {
+		*ioboard = isrv.v0;
+		return isrv.status;
+	}
+	if (isrv.v1 != 0) {
+		*ioboard = isrv.v1;
+		return isrv.status;
+	}
+
+	return isrv.status;
+}
+
+/**
+ * ia64_sn_get_fit_compt - read a FIT entry from the PROM header
+ * @nasid: NASID of node to read
+ * @index: FIT entry index to be retrieved (0..n)
+ * @fitentry: 16 byte buffer where FIT entry will be stored.
+ * @banbuf: optional buffer for retrieving banner
+ * @banlen: length of banner buffer
+ *
+ * Access to the physical PROM chips needs to be serialized since reads and
+ * writes can't occur at the same time, so we need to call into the SAL when
+ * we want to look at the FIT entries on the chips.
+ *
+ * Returns:
+ *	%SALRET_OK if ok
+ *	%SALRET_INVALID_ARG if index too big
+ *	%SALRET_NOT_IMPLEMENTED if running on older PROM
+ *	??? if nasid invalid OR banner buffer not large enough
+ */
+static inline int
+ia64_sn_get_fit_compt(u64 nasid, u64 index, void *fitentry, void *banbuf,
+		      u64 banlen)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_NOLOCK(rv, SN_SAL_GET_FIT_COMPT, nasid, index, fitentry,
+			banbuf, banlen, 0, 0);
+	return (int) rv.status;
+}
+
+/*
+ * Initialize the SAL components of the system controller
+ * communication driver; specifically pass in a sizable buffer that
+ * can be used for allocation of subchannel queues as new subchannels
+ * are opened.  "buf" points to the buffer, and "len" specifies its
+ * length.
+ */
+static inline int
+ia64_sn_irtr_init(nasid_t nasid, void *buf, int len)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_INIT,
+			   (u64) nasid, (u64) buf, (u64) len, 0, 0, 0);
+	return (int) rv.status;
+}
+
+/*
+ * Returns the nasid, subnode & slice corresponding to a SAPIC ID
+ *
+ *  In:
+ *	arg0 - SN_SAL_GET_SAPIC_INFO
+ *	arg1 - sapicid (lid >> 16) 
+ *  Out:
+ *	v0 - nasid
+ *	v1 - subnode
+ *	v2 - slice
+ */
+static inline u64
+ia64_sn_get_sapic_info(int sapicid, int *nasid, int *subnode, int *slice)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_GET_SAPIC_INFO, sapicid, 0, 0, 0, 0, 0, 0);
+
+/***** BEGIN HACK - temp til old proms no longer supported ********/
+	if (ret_stuff.status == SALRET_NOT_IMPLEMENTED) {
+		if (nasid) *nasid = sapicid & 0xfff;
+		if (subnode) *subnode = (sapicid >> 13) & 1;
+		if (slice) *slice = (sapicid >> 12) & 3;
+		return 0;
+	}
+/***** END HACK *******/
+
+	if (ret_stuff.status < 0)
+		return ret_stuff.status;
+
+	if (nasid) *nasid = (int) ret_stuff.v0;
+	if (subnode) *subnode = (int) ret_stuff.v1;
+	if (slice) *slice = (int) ret_stuff.v2;
+	return 0;
+}
+ 
+/*
+ * Returns information about the HUB/SHUB.
+ *  In:
+ *	arg0 - SN_SAL_GET_SN_INFO
+ * 	arg1 - 0 (other values reserved for future use)
+ *  Out:
+ *	v0 
+ *		[7:0]   - shub type (0=shub1, 1=shub2)
+ *		[15:8]  - Log2 max number of nodes in entire system (includes
+ *			  C-bricks, I-bricks, etc)
+ *		[23:16] - Log2 of nodes per sharing domain			 
+ * 		[31:24] - partition ID
+ * 		[39:32] - coherency_id
+ * 		[47:40] - regionsize
+ *	v1 
+ *		[15:0]  - nasid mask (ex., 0x7ff for 11 bit nasid)
+ *	 	[23:15] - bit position of low nasid bit
+ */
+static inline u64
+ia64_sn_get_sn_info(int fc, u8 *shubtype, u16 *nasid_bitmask, u8 *nasid_shift, 
+		u8 *systemsize, u8 *sharing_domain_size, u8 *partid, u8 *coher, u8 *reg)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_GET_SN_INFO, fc, 0, 0, 0, 0, 0, 0);
+
+/***** BEGIN HACK - temp til old proms no longer supported ********/
+	if (ret_stuff.status == SALRET_NOT_IMPLEMENTED) {
+		int nasid = get_sapicid() & 0xfff;
+#define SH_SHUB_ID_NODES_PER_BIT_MASK 0x001f000000000000UL
+#define SH_SHUB_ID_NODES_PER_BIT_SHFT 48
+		if (shubtype) *shubtype = 0;
+		if (nasid_bitmask) *nasid_bitmask = 0x7ff;
+		if (nasid_shift) *nasid_shift = 38;
+		if (systemsize) *systemsize = 10;
+		if (sharing_domain_size) *sharing_domain_size = 8;
+		if (partid) *partid = ia64_sn_sysctl_partition_get(nasid);
+		if (coher) *coher = nasid >> 9;
+		if (reg) *reg = (HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_SHUB_ID)) & SH_SHUB_ID_NODES_PER_BIT_MASK) >>
+			SH_SHUB_ID_NODES_PER_BIT_SHFT;
+		return 0;
+	}
+/***** END HACK *******/
+
+	if (ret_stuff.status < 0)
+		return ret_stuff.status;
+
+	if (shubtype) *shubtype = ret_stuff.v0 & 0xff;
+	if (systemsize) *systemsize = (ret_stuff.v0 >> 8) & 0xff;
+	if (sharing_domain_size) *sharing_domain_size = (ret_stuff.v0 >> 16) & 0xff;
+	if (partid) *partid = (ret_stuff.v0 >> 24) & 0xff;
+	if (coher) *coher = (ret_stuff.v0 >> 32) & 0xff;
+	if (reg) *reg = (ret_stuff.v0 >> 40) & 0xff;
+	if (nasid_bitmask) *nasid_bitmask = (ret_stuff.v1 & 0xffff);
+	if (nasid_shift) *nasid_shift = (ret_stuff.v1 >> 16) & 0xff;
+	return 0;
+}
+ 
+/*
+ * This is the access point to the Altix PROM hardware performance
+ * and status monitoring interface. For info on using this, see
+ * arch/ia64/include/asm/sn/sn2/sn_hwperf.h
+ */
+static inline int
+ia64_sn_hwperf_op(nasid_t nasid, u64 opcode, u64 a0, u64 a1, u64 a2,
+                  u64 a3, u64 a4, int *v0)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_NOLOCK(rv, SN_SAL_HWPERF_OP, (u64)nasid,
+		opcode, a0, a1, a2, a3, a4);
+	if (v0)
+		*v0 = (int) rv.v0;
+	return (int) rv.status;
+}
+
+static inline int
+ia64_sn_ioif_get_pci_topology(u64 buf, u64 len)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_NOLOCK(rv, SN_SAL_IOIF_GET_PCI_TOPOLOGY, buf, len, 0, 0, 0, 0, 0);
+	return (int) rv.status;
+}
+
+/*
+ * BTE error recovery is implemented in SAL
+ */
+static inline int
+ia64_sn_bte_recovery(nasid_t nasid)
+{
+	struct ia64_sal_retval rv;
+
+	rv.status = 0;
+	SAL_CALL_NOLOCK(rv, SN_SAL_BTE_RECOVER, (u64)nasid, 0, 0, 0, 0, 0, 0);
+	if (rv.status == SALRET_NOT_IMPLEMENTED)
+		return 0;
+	return (int) rv.status;
+}
+
+static inline int
+ia64_sn_is_fake_prom(void)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_NOLOCK(rv, SN_SAL_FAKE_PROM, 0, 0, 0, 0, 0, 0, 0);
+	return (rv.status == 0);
+}
+
+static inline int
+ia64_sn_get_prom_feature_set(int set, unsigned long *feature_set)
+{
+	struct ia64_sal_retval rv;
+
+	SAL_CALL_NOLOCK(rv, SN_SAL_GET_PROM_FEATURE_SET, set, 0, 0, 0, 0, 0, 0);
+	if (rv.status != 0)
+		return rv.status;
+	*feature_set = rv.v0;
+	return 0;
+}
+
+static inline int
+ia64_sn_set_os_feature(int feature)
+{
+	struct ia64_sal_retval rv;
+
+	SAL_CALL_NOLOCK(rv, SN_SAL_SET_OS_FEATURE_SET, feature, 0, 0, 0, 0, 0, 0);
+	return rv.status;
+}
+
+static inline int
+sn_inject_error(u64 paddr, u64 *data, u64 *ecc)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ia64_sal_oemcall_nolock(&ret_stuff, SN_SAL_INJECT_ERROR, paddr, (u64)data,
+				(u64)ecc, 0, 0, 0, 0);
+	return ret_stuff.status;
+}
+
+static inline int
+ia64_sn_set_cpu_number(int cpu)
+{
+	struct ia64_sal_retval rv;
+
+	SAL_CALL_NOLOCK(rv, SN_SAL_SET_CPU_NUMBER, cpu, 0, 0, 0, 0, 0, 0);
+	return rv.status;
+}
+static inline int
+ia64_sn_kernel_launch_event(void)
+{
+ 	struct ia64_sal_retval rv;
+	SAL_CALL_NOLOCK(rv, SN_SAL_KERNEL_LAUNCH_EVENT, 0, 0, 0, 0, 0, 0, 0);
+	return rv.status;
+}
+
+union sn_watchlist_u {
+	u64     val;
+	struct {
+		u64	blade	: 16,
+			size	: 32,
+			filler	: 16;
+	};
+};
+
+static inline int
+sn_mq_watchlist_alloc(int blade, void *mq, unsigned int mq_size,
+				unsigned long *intr_mmr_offset)
+{
+	struct ia64_sal_retval rv;
+	unsigned long addr;
+	union sn_watchlist_u size_blade;
+	int watchlist;
+
+	addr = (unsigned long)mq;
+	size_blade.size = mq_size;
+	size_blade.blade = blade;
+
+	/*
+	 * bios returns watchlist number or negative error number.
+	 */
+	ia64_sal_oemcall_nolock(&rv, SN_SAL_WATCHLIST_ALLOC, addr,
+			size_blade.val, (u64)intr_mmr_offset,
+			(u64)&watchlist, 0, 0, 0);
+	if (rv.status < 0)
+		return rv.status;
+
+	return watchlist;
+}
+
+static inline int
+sn_mq_watchlist_free(int blade, int watchlist_num)
+{
+	struct ia64_sal_retval rv;
+	ia64_sal_oemcall_nolock(&rv, SN_SAL_WATCHLIST_FREE, blade,
+			watchlist_num, 0, 0, 0, 0, 0);
+	return rv.status;
+}
+#endif /* _ASM_IA64_SN_SN_SAL_H */
diff --git a/arch/ia64/include/asm/sn/tioca.h b/arch/ia64/include/asm/sn/tioca.h
new file mode 100644
index 00000000..666222d7
--- /dev/null
+++ b/arch/ia64/include/asm/sn/tioca.h
@@ -0,0 +1,596 @@
+#ifndef _ASM_IA64_SN_TIO_TIOCA_H
+#define _ASM_IA64_SN_TIO_TIOCA_H
+
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2003-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+
+#define TIOCA_PART_NUM	0xE020
+#define TIOCA_MFGR_NUM	0x24
+#define TIOCA_REV_A	0x1
+
+/*
+ * Register layout for TIO:CA.  See below for bitmasks for each register.
+ */
+
+struct tioca {
+	u64	ca_id;				/* 0x000000 */
+	u64	ca_control1;			/* 0x000008 */
+	u64	ca_control2;			/* 0x000010 */
+	u64	ca_status1;			/* 0x000018 */
+	u64	ca_status2;			/* 0x000020 */
+	u64	ca_gart_aperature;		/* 0x000028 */
+	u64	ca_gfx_detach;			/* 0x000030 */
+	u64	ca_inta_dest_addr;		/* 0x000038 */
+	u64	ca_intb_dest_addr;		/* 0x000040 */
+	u64	ca_err_int_dest_addr;		/* 0x000048 */
+	u64	ca_int_status;			/* 0x000050 */
+	u64	ca_int_status_alias;		/* 0x000058 */
+	u64	ca_mult_error;			/* 0x000060 */
+	u64	ca_mult_error_alias;		/* 0x000068 */
+	u64	ca_first_error;			/* 0x000070 */
+	u64	ca_int_mask;			/* 0x000078 */
+	u64	ca_crm_pkterr_type;		/* 0x000080 */
+	u64	ca_crm_pkterr_type_alias;	/* 0x000088 */
+	u64	ca_crm_ct_error_detail_1;	/* 0x000090 */
+	u64	ca_crm_ct_error_detail_2;	/* 0x000098 */
+	u64	ca_crm_tnumto;			/* 0x0000A0 */
+	u64	ca_gart_err;			/* 0x0000A8 */
+	u64	ca_pcierr_type;			/* 0x0000B0 */
+	u64	ca_pcierr_addr;			/* 0x0000B8 */
+
+	u64	ca_pad_0000C0[3];		/* 0x0000{C0..D0} */
+
+	u64	ca_pci_rd_buf_flush;		/* 0x0000D8 */
+	u64	ca_pci_dma_addr_extn;		/* 0x0000E0 */
+	u64	ca_agp_dma_addr_extn;		/* 0x0000E8 */
+	u64	ca_force_inta;			/* 0x0000F0 */
+	u64	ca_force_intb;			/* 0x0000F8 */
+	u64	ca_debug_vector_sel;		/* 0x000100 */
+	u64	ca_debug_mux_core_sel;		/* 0x000108 */
+	u64	ca_debug_mux_pci_sel;		/* 0x000110 */
+	u64	ca_debug_domain_sel;		/* 0x000118 */
+
+	u64	ca_pad_000120[28];		/* 0x0001{20..F8} */
+
+	u64	ca_gart_ptr_table;		/* 0x200 */
+	u64	ca_gart_tlb_addr[8];		/* 0x2{08..40} */
+};
+
+/*
+ * Mask/shift definitions for TIO:CA registers.  The convention here is
+ * to mainly use the names as they appear in the "TIO AEGIS Programmers'
+ * Reference" with a CA_ prefix added.  Some exceptions were made to fix
+ * duplicate field names or to generalize fields that are common to
+ * different registers (ca_debug_mux_core_sel and ca_debug_mux_pci_sel for
+ * example).
+ *
+ * Fields consisting of a single bit have a single #define have a single
+ * macro declaration to mask the bit.  Fields consisting of multiple bits
+ * have two declarations: one to mask the proper bits in a register, and 
+ * a second with the suffix "_SHFT" to identify how far the mask needs to
+ * be shifted right to get its base value.
+ */
+
+/* ==== ca_control1 */
+#define CA_SYS_BIG_END			(1ull << 0)
+#define CA_DMA_AGP_SWAP			(1ull << 1)
+#define CA_DMA_PCI_SWAP			(1ull << 2)
+#define CA_PIO_IO_SWAP			(1ull << 3)
+#define CA_PIO_MEM_SWAP			(1ull << 4)
+#define CA_GFX_WR_SWAP			(1ull << 5)
+#define CA_AGP_FW_ENABLE		(1ull << 6)
+#define CA_AGP_CAL_CYCLE		(0x7ull << 7)
+#define CA_AGP_CAL_CYCLE_SHFT		7
+#define CA_AGP_CAL_PRSCL_BYP		(1ull << 10)
+#define CA_AGP_INIT_CAL_ENB		(1ull << 11)
+#define CA_INJ_ADDR_PERR		(1ull << 12)
+#define CA_INJ_DATA_PERR		(1ull << 13)
+	/* bits 15:14 unused */
+#define CA_PCIM_IO_NBE_AD		(0x7ull << 16)
+#define CA_PCIM_IO_NBE_AD_SHFT		16
+#define CA_PCIM_FAST_BTB_ENB		(1ull << 19)
+	/* bits 23:20 unused */
+#define CA_PIO_ADDR_OFFSET		(0xffull << 24)
+#define CA_PIO_ADDR_OFFSET_SHFT		24
+	/* bits 35:32 unused */
+#define CA_AGPDMA_OP_COMBDELAY		(0x1full << 36)
+#define CA_AGPDMA_OP_COMBDELAY_SHFT	36
+	/* bit 41 unused */
+#define CA_AGPDMA_OP_ENB_COMBDELAY	(1ull << 42)
+#define	CA_PCI_INT_LPCNT		(0xffull << 44)
+#define CA_PCI_INT_LPCNT_SHFT		44
+	/* bits 63:52 unused */
+
+/* ==== ca_control2 */
+#define CA_AGP_LATENCY_TO		(0xffull << 0)
+#define CA_AGP_LATENCY_TO_SHFT		0
+#define CA_PCI_LATENCY_TO		(0xffull << 8)
+#define CA_PCI_LATENCY_TO_SHFT		8
+#define CA_PCI_MAX_RETRY		(0x3ffull << 16)
+#define CA_PCI_MAX_RETRY_SHFT		16
+	/* bits 27:26 unused */
+#define CA_RT_INT_EN			(0x3ull << 28)
+#define CA_RT_INT_EN_SHFT			28
+#define CA_MSI_INT_ENB			(1ull << 30)
+#define CA_PCI_ARB_ERR_ENB		(1ull << 31)
+#define CA_GART_MEM_PARAM		(0x3ull << 32)
+#define CA_GART_MEM_PARAM_SHFT		32
+#define CA_GART_RD_PREFETCH_ENB		(1ull << 34)
+#define CA_GART_WR_PREFETCH_ENB		(1ull << 35)
+#define CA_GART_FLUSH_TLB		(1ull << 36)
+	/* bits 39:37 unused */
+#define CA_CRM_TNUMTO_PERIOD		(0x1fffull << 40)
+#define CA_CRM_TNUMTO_PERIOD_SHFT	40
+	/* bits 55:53 unused */
+#define CA_CRM_TNUMTO_ENB		(1ull << 56)
+#define CA_CRM_PRESCALER_BYP		(1ull << 57)
+	/* bits 59:58 unused */
+#define CA_CRM_MAX_CREDIT		(0x7ull << 60)
+#define CA_CRM_MAX_CREDIT_SHFT		60
+	/* bit 63 unused */
+
+/* ==== ca_status1 */
+#define CA_CORELET_ID			(0x3ull << 0)
+#define CA_CORELET_ID_SHFT		0
+#define CA_INTA_N			(1ull << 2)
+#define CA_INTB_N			(1ull << 3)
+#define CA_CRM_CREDIT_AVAIL		(0x7ull << 4)
+#define CA_CRM_CREDIT_AVAIL_SHFT	4
+	/* bit 7 unused */
+#define CA_CRM_SPACE_AVAIL		(0x7full << 8)
+#define CA_CRM_SPACE_AVAIL_SHFT		8
+	/* bit 15 unused */
+#define CA_GART_TLB_VAL			(0xffull << 16)
+#define CA_GART_TLB_VAL_SHFT		16
+	/* bits 63:24 unused */
+
+/* ==== ca_status2 */
+#define CA_GFX_CREDIT_AVAIL		(0xffull << 0)
+#define CA_GFX_CREDIT_AVAIL_SHFT	0
+#define CA_GFX_OPQ_AVAIL		(0xffull << 8)
+#define CA_GFX_OPQ_AVAIL_SHFT		8
+#define CA_GFX_WRBUFF_AVAIL		(0xffull << 16)
+#define CA_GFX_WRBUFF_AVAIL_SHFT	16
+#define CA_ADMA_OPQ_AVAIL		(0xffull << 24)
+#define CA_ADMA_OPQ_AVAIL_SHFT		24
+#define CA_ADMA_WRBUFF_AVAIL		(0xffull << 32)
+#define CA_ADMA_WRBUFF_AVAIL_SHFT	32
+#define CA_ADMA_RDBUFF_AVAIL		(0x7full << 40)
+#define CA_ADMA_RDBUFF_AVAIL_SHFT	40
+#define CA_PCI_PIO_OP_STAT		(1ull << 47)
+#define CA_PDMA_OPQ_AVAIL		(0xfull << 48)
+#define CA_PDMA_OPQ_AVAIL_SHFT		48
+#define CA_PDMA_WRBUFF_AVAIL		(0xfull << 52)
+#define CA_PDMA_WRBUFF_AVAIL_SHFT	52
+#define CA_PDMA_RDBUFF_AVAIL		(0x3ull << 56)
+#define CA_PDMA_RDBUFF_AVAIL_SHFT	56
+	/* bits 63:58 unused */
+
+/* ==== ca_gart_aperature */
+#define CA_GART_AP_ENB_AGP		(1ull << 0)
+#define CA_GART_PAGE_SIZE		(1ull << 1)
+#define CA_GART_AP_ENB_PCI		(1ull << 2)
+	/* bits 11:3 unused */
+#define CA_GART_AP_SIZE			(0x3ffull << 12)
+#define CA_GART_AP_SIZE_SHFT		12
+#define CA_GART_AP_BASE			(0x3ffffffffffull << 22)
+#define CA_GART_AP_BASE_SHFT		22
+
+/* ==== ca_inta_dest_addr
+   ==== ca_intb_dest_addr 
+   ==== ca_err_int_dest_addr */
+	/* bits 2:0 unused */
+#define CA_INT_DEST_ADDR		(0x7ffffffffffffull << 3)
+#define CA_INT_DEST_ADDR_SHFT		3
+	/* bits 55:54 unused */
+#define CA_INT_DEST_VECT		(0xffull << 56)
+#define CA_INT_DEST_VECT_SHFT		56
+
+/* ==== ca_int_status */
+/* ==== ca_int_status_alias */
+/* ==== ca_mult_error */
+/* ==== ca_mult_error_alias */
+/* ==== ca_first_error */
+/* ==== ca_int_mask */
+#define CA_PCI_ERR			(1ull << 0)
+	/* bits 3:1 unused */
+#define CA_GART_FETCH_ERR		(1ull << 4)
+#define CA_GFX_WR_OVFLW			(1ull << 5)
+#define CA_PIO_REQ_OVFLW		(1ull << 6)
+#define CA_CRM_PKTERR			(1ull << 7)
+#define CA_CRM_DVERR			(1ull << 8)
+#define CA_TNUMTO			(1ull << 9)
+#define CA_CXM_RSP_CRED_OVFLW		(1ull << 10)
+#define CA_CXM_REQ_CRED_OVFLW		(1ull << 11)
+#define CA_PIO_INVALID_ADDR		(1ull << 12)
+#define CA_PCI_ARB_TO			(1ull << 13)
+#define CA_AGP_REQ_OFLOW		(1ull << 14)
+#define CA_SBA_TYPE1_ERR		(1ull << 15)
+	/* bit 16 unused */
+#define CA_INTA				(1ull << 17)
+#define CA_INTB				(1ull << 18)
+#define CA_MULT_INTA			(1ull << 19)
+#define CA_MULT_INTB			(1ull << 20)
+#define CA_GFX_CREDIT_OVFLW		(1ull << 21)
+	/* bits 63:22 unused */
+
+/* ==== ca_crm_pkterr_type */
+/* ==== ca_crm_pkterr_type_alias */
+#define CA_CRM_PKTERR_SBERR_HDR		(1ull << 0)
+#define CA_CRM_PKTERR_DIDN		(1ull << 1)
+#define CA_CRM_PKTERR_PACTYPE		(1ull << 2)
+#define CA_CRM_PKTERR_INV_TNUM		(1ull << 3)
+#define CA_CRM_PKTERR_ADDR_RNG		(1ull << 4)
+#define CA_CRM_PKTERR_ADDR_ALGN		(1ull << 5)
+#define CA_CRM_PKTERR_HDR_PARAM		(1ull << 6)
+#define CA_CRM_PKTERR_CW_ERR		(1ull << 7)
+#define CA_CRM_PKTERR_SBERR_NH		(1ull << 8)
+#define CA_CRM_PKTERR_EARLY_TERM	(1ull << 9)
+#define CA_CRM_PKTERR_EARLY_TAIL	(1ull << 10)
+#define CA_CRM_PKTERR_MSSNG_TAIL	(1ull << 11)
+#define CA_CRM_PKTERR_MSSNG_HDR		(1ull << 12)
+	/* bits 15:13 unused */
+#define CA_FIRST_CRM_PKTERR_SBERR_HDR	(1ull << 16)
+#define CA_FIRST_CRM_PKTERR_DIDN	(1ull << 17)
+#define CA_FIRST_CRM_PKTERR_PACTYPE	(1ull << 18)
+#define CA_FIRST_CRM_PKTERR_INV_TNUM	(1ull << 19)
+#define CA_FIRST_CRM_PKTERR_ADDR_RNG	(1ull << 20)
+#define CA_FIRST_CRM_PKTERR_ADDR_ALGN	(1ull << 21)
+#define CA_FIRST_CRM_PKTERR_HDR_PARAM	(1ull << 22)
+#define CA_FIRST_CRM_PKTERR_CW_ERR	(1ull << 23)
+#define CA_FIRST_CRM_PKTERR_SBERR_NH	(1ull << 24)
+#define CA_FIRST_CRM_PKTERR_EARLY_TERM	(1ull << 25)
+#define CA_FIRST_CRM_PKTERR_EARLY_TAIL	(1ull << 26)
+#define CA_FIRST_CRM_PKTERR_MSSNG_TAIL	(1ull << 27)
+#define CA_FIRST_CRM_PKTERR_MSSNG_HDR	(1ull << 28)
+	/* bits 63:29 unused */
+
+/* ==== ca_crm_ct_error_detail_1 */
+#define CA_PKT_TYPE			(0xfull << 0)
+#define CA_PKT_TYPE_SHFT		0
+#define CA_SRC_ID			(0x3ull << 4)
+#define CA_SRC_ID_SHFT			4
+#define CA_DATA_SZ			(0x3ull << 6)
+#define CA_DATA_SZ_SHFT			6
+#define CA_TNUM				(0xffull << 8)
+#define CA_TNUM_SHFT			8
+#define CA_DW_DATA_EN			(0xffull << 16)
+#define CA_DW_DATA_EN_SHFT		16
+#define CA_GFX_CRED			(0xffull << 24)
+#define CA_GFX_CRED_SHFT		24
+#define CA_MEM_RD_PARAM			(0x3ull << 32)
+#define CA_MEM_RD_PARAM_SHFT		32
+#define CA_PIO_OP			(1ull << 34)
+#define CA_CW_ERR			(1ull << 35)
+	/* bits 62:36 unused */
+#define CA_VALID			(1ull << 63)
+
+/* ==== ca_crm_ct_error_detail_2 */
+	/* bits 2:0 unused */
+#define CA_PKT_ADDR			(0x1fffffffffffffull << 3)
+#define CA_PKT_ADDR_SHFT		3
+	/* bits 63:56 unused */
+
+/* ==== ca_crm_tnumto */
+#define CA_CRM_TNUMTO_VAL		(0xffull << 0)
+#define CA_CRM_TNUMTO_VAL_SHFT		0
+#define CA_CRM_TNUMTO_WR		(1ull << 8)
+	/* bits 63:9 unused */
+
+/* ==== ca_gart_err */
+#define CA_GART_ERR_SOURCE		(0x3ull << 0)
+#define CA_GART_ERR_SOURCE_SHFT		0
+	/* bits 3:2 unused */
+#define CA_GART_ERR_ADDR		(0xfffffffffull << 4)
+#define CA_GART_ERR_ADDR_SHFT		4
+	/* bits 63:40 unused */
+
+/* ==== ca_pcierr_type */
+#define CA_PCIERR_DATA			(0xffffffffull << 0)
+#define CA_PCIERR_DATA_SHFT		0
+#define CA_PCIERR_ENB			(0xfull << 32)
+#define CA_PCIERR_ENB_SHFT		32
+#define CA_PCIERR_CMD			(0xfull << 36)
+#define CA_PCIERR_CMD_SHFT		36
+#define CA_PCIERR_A64			(1ull << 40)
+#define CA_PCIERR_SLV_SERR		(1ull << 41)
+#define CA_PCIERR_SLV_WR_PERR		(1ull << 42)
+#define CA_PCIERR_SLV_RD_PERR		(1ull << 43)
+#define CA_PCIERR_MST_SERR		(1ull << 44)
+#define CA_PCIERR_MST_WR_PERR		(1ull << 45)
+#define CA_PCIERR_MST_RD_PERR		(1ull << 46)
+#define CA_PCIERR_MST_MABT		(1ull << 47)
+#define CA_PCIERR_MST_TABT		(1ull << 48)
+#define CA_PCIERR_MST_RETRY_TOUT	(1ull << 49)
+
+#define CA_PCIERR_TYPES \
+	(CA_PCIERR_A64|CA_PCIERR_SLV_SERR| \
+	 CA_PCIERR_SLV_WR_PERR|CA_PCIERR_SLV_RD_PERR| \
+	 CA_PCIERR_MST_SERR|CA_PCIERR_MST_WR_PERR|CA_PCIERR_MST_RD_PERR| \
+	 CA_PCIERR_MST_MABT|CA_PCIERR_MST_TABT|CA_PCIERR_MST_RETRY_TOUT)
+
+	/* bits 63:50 unused */
+
+/* ==== ca_pci_dma_addr_extn */
+#define CA_UPPER_NODE_OFFSET		(0x3full << 0)
+#define CA_UPPER_NODE_OFFSET_SHFT	0
+	/* bits 7:6 unused */
+#define CA_CHIPLET_ID			(0x3ull << 8)
+#define CA_CHIPLET_ID_SHFT		8
+	/* bits 11:10 unused */
+#define CA_PCI_DMA_NODE_ID		(0xffffull << 12)
+#define CA_PCI_DMA_NODE_ID_SHFT		12
+	/* bits 27:26 unused */
+#define CA_PCI_DMA_PIO_MEM_TYPE		(1ull << 28)
+	/* bits 63:29 unused */
+
+
+/* ==== ca_agp_dma_addr_extn */
+	/* bits 19:0 unused */
+#define CA_AGP_DMA_NODE_ID		(0xffffull << 20)
+#define CA_AGP_DMA_NODE_ID_SHFT		20
+	/* bits 27:26 unused */
+#define CA_AGP_DMA_PIO_MEM_TYPE		(1ull << 28)
+	/* bits 63:29 unused */
+
+/* ==== ca_debug_vector_sel */
+#define CA_DEBUG_MN_VSEL		(0xfull << 0)
+#define CA_DEBUG_MN_VSEL_SHFT		0
+#define CA_DEBUG_PP_VSEL		(0xfull << 4)
+#define CA_DEBUG_PP_VSEL_SHFT		4
+#define CA_DEBUG_GW_VSEL		(0xfull << 8)
+#define CA_DEBUG_GW_VSEL_SHFT		8
+#define CA_DEBUG_GT_VSEL		(0xfull << 12)
+#define CA_DEBUG_GT_VSEL_SHFT		12
+#define CA_DEBUG_PD_VSEL		(0xfull << 16)
+#define CA_DEBUG_PD_VSEL_SHFT		16
+#define CA_DEBUG_AD_VSEL		(0xfull << 20)
+#define CA_DEBUG_AD_VSEL_SHFT		20
+#define CA_DEBUG_CX_VSEL		(0xfull << 24)
+#define CA_DEBUG_CX_VSEL_SHFT		24
+#define CA_DEBUG_CR_VSEL		(0xfull << 28)
+#define CA_DEBUG_CR_VSEL_SHFT		28
+#define CA_DEBUG_BA_VSEL		(0xfull << 32)
+#define CA_DEBUG_BA_VSEL_SHFT		32
+#define CA_DEBUG_PE_VSEL		(0xfull << 36)
+#define CA_DEBUG_PE_VSEL_SHFT		36
+#define CA_DEBUG_BO_VSEL		(0xfull << 40)
+#define CA_DEBUG_BO_VSEL_SHFT		40
+#define CA_DEBUG_BI_VSEL		(0xfull << 44)
+#define CA_DEBUG_BI_VSEL_SHFT		44
+#define CA_DEBUG_AS_VSEL		(0xfull << 48)
+#define CA_DEBUG_AS_VSEL_SHFT		48
+#define CA_DEBUG_PS_VSEL		(0xfull << 52)
+#define CA_DEBUG_PS_VSEL_SHFT		52
+#define CA_DEBUG_PM_VSEL		(0xfull << 56)
+#define CA_DEBUG_PM_VSEL_SHFT		56
+	/* bits 63:60 unused */
+
+/* ==== ca_debug_mux_core_sel */
+/* ==== ca_debug_mux_pci_sel */
+#define CA_DEBUG_MSEL0			(0x7ull << 0)
+#define CA_DEBUG_MSEL0_SHFT		0
+	/* bit 3 unused */
+#define CA_DEBUG_NSEL0			(0x7ull << 4)
+#define CA_DEBUG_NSEL0_SHFT		4
+	/* bit 7 unused */
+#define CA_DEBUG_MSEL1			(0x7ull << 8)
+#define CA_DEBUG_MSEL1_SHFT		8
+	/* bit 11 unused */
+#define CA_DEBUG_NSEL1			(0x7ull << 12)
+#define CA_DEBUG_NSEL1_SHFT		12
+	/* bit 15 unused */
+#define CA_DEBUG_MSEL2			(0x7ull << 16)
+#define CA_DEBUG_MSEL2_SHFT		16
+	/* bit 19 unused */
+#define CA_DEBUG_NSEL2			(0x7ull << 20)
+#define CA_DEBUG_NSEL2_SHFT		20
+	/* bit 23 unused */
+#define CA_DEBUG_MSEL3			(0x7ull << 24)
+#define CA_DEBUG_MSEL3_SHFT		24
+	/* bit 27 unused */
+#define CA_DEBUG_NSEL3			(0x7ull << 28)
+#define CA_DEBUG_NSEL3_SHFT		28
+	/* bit 31 unused */
+#define CA_DEBUG_MSEL4			(0x7ull << 32)
+#define CA_DEBUG_MSEL4_SHFT		32
+	/* bit 35 unused */
+#define CA_DEBUG_NSEL4			(0x7ull << 36)
+#define CA_DEBUG_NSEL4_SHFT		36
+	/* bit 39 unused */
+#define CA_DEBUG_MSEL5			(0x7ull << 40)
+#define CA_DEBUG_MSEL5_SHFT		40
+	/* bit 43 unused */
+#define CA_DEBUG_NSEL5			(0x7ull << 44)
+#define CA_DEBUG_NSEL5_SHFT		44
+	/* bit 47 unused */
+#define CA_DEBUG_MSEL6			(0x7ull << 48)
+#define CA_DEBUG_MSEL6_SHFT		48
+	/* bit 51 unused */
+#define CA_DEBUG_NSEL6			(0x7ull << 52)
+#define CA_DEBUG_NSEL6_SHFT		52
+	/* bit 55 unused */
+#define CA_DEBUG_MSEL7			(0x7ull << 56)
+#define CA_DEBUG_MSEL7_SHFT		56
+	/* bit 59 unused */
+#define CA_DEBUG_NSEL7			(0x7ull << 60)
+#define CA_DEBUG_NSEL7_SHFT		60
+	/* bit 63 unused */
+
+
+/* ==== ca_debug_domain_sel */
+#define CA_DEBUG_DOMAIN_L		(1ull << 0)
+#define CA_DEBUG_DOMAIN_H		(1ull << 1)
+	/* bits 63:2 unused */
+
+/* ==== ca_gart_ptr_table */
+#define CA_GART_PTR_VAL			(1ull << 0)
+	/* bits 11:1 unused */
+#define CA_GART_PTR_ADDR		(0xfffffffffffull << 12)
+#define CA_GART_PTR_ADDR_SHFT		12
+	/* bits 63:56 unused */
+
+/* ==== ca_gart_tlb_addr[0-7] */
+#define CA_GART_TLB_ADDR		(0xffffffffffffffull << 0)
+#define CA_GART_TLB_ADDR_SHFT		0
+	/* bits 62:56 unused */
+#define CA_GART_TLB_ENTRY_VAL		(1ull << 63)
+
+/*
+ * PIO address space ranges for TIO:CA
+ */
+
+/* CA internal registers */
+#define CA_PIO_ADMIN			0x00000000
+#define CA_PIO_ADMIN_LEN		0x00010000
+
+/* GFX Write Buffer - Diagnostics */
+#define CA_PIO_GFX			0x00010000
+#define CA_PIO_GFX_LEN			0x00010000
+
+/* AGP DMA Write Buffer - Diagnostics */
+#define CA_PIO_AGP_DMAWRITE		0x00020000
+#define CA_PIO_AGP_DMAWRITE_LEN		0x00010000
+
+/* AGP DMA READ Buffer - Diagnostics */
+#define CA_PIO_AGP_DMAREAD		0x00030000
+#define CA_PIO_AGP_DMAREAD_LEN		0x00010000
+
+/* PCI Config Type 0 */
+#define CA_PIO_PCI_TYPE0_CONFIG		0x01000000
+#define CA_PIO_PCI_TYPE0_CONFIG_LEN	0x01000000
+
+/* PCI Config Type 1 */
+#define CA_PIO_PCI_TYPE1_CONFIG		0x02000000
+#define CA_PIO_PCI_TYPE1_CONFIG_LEN	0x01000000
+
+/* PCI I/O Cycles - mapped to PCI Address 0x00000000-0x04ffffff */
+#define CA_PIO_PCI_IO			0x03000000
+#define CA_PIO_PCI_IO_LEN		0x05000000
+
+/* PCI MEM Cycles - mapped to PCI with CA_PIO_ADDR_OFFSET of ca_control1 */
+/*	use Fast Write if enabled and coretalk packet type is a GFX request */
+#define CA_PIO_PCI_MEM_OFFSET		0x08000000
+#define CA_PIO_PCI_MEM_OFFSET_LEN	0x08000000
+
+/* PCI MEM Cycles - mapped to PCI Address 0x00000000-0xbfffffff */
+/*	use Fast Write if enabled and coretalk packet type is a GFX request */
+#define CA_PIO_PCI_MEM			0x40000000
+#define CA_PIO_PCI_MEM_LEN		0xc0000000
+
+/*
+ * DMA space
+ *
+ * The CA aperature (ie. bus address range) mapped by the GART is segmented into
+ * two parts.  The lower portion of the aperature is used for mapping 32 bit
+ * PCI addresses which are managed by the dma interfaces in this file.  The
+ * upper poprtion of the aperature is used for mapping 48 bit AGP addresses.
+ * The AGP portion of the aperature is managed by the agpgart_be.c driver
+ * in drivers/linux/agp.  There are ca-specific hooks in that driver to
+ * manipulate the gart, but management of the AGP portion of the aperature
+ * is the responsibility of that driver.
+ *
+ * CA allows three main types of DMA mapping:
+ *
+ * PCI 64-bit	Managed by this driver
+ * PCI 32-bit 	Managed by this driver
+ * AGP 48-bit	Managed by hooks in the /dev/agpgart driver
+ *
+ * All of the above can optionally be remapped through the GART.  The following
+ * table lists the combinations of addressing types and GART remapping that
+ * is currently supported by the driver (h/w supports all, s/w limits this):
+ *
+ *		PCI64		PCI32		AGP48
+ * GART		no		yes		yes
+ * Direct	yes		yes		no
+ *
+ * GART remapping of PCI64 is not done because there is no need to.  The
+ * 64 bit PCI address holds all of the information necessary to target any
+ * memory in the system.
+ *
+ * AGP48 is always mapped through the GART.  Management of the AGP48 portion
+ * of the aperature is the responsibility of code in the agpgart_be driver.
+ *
+ * The non-64 bit bus address space will currently be partitioned like this:
+ *
+ *	0xffff_ffff_ffff	+--------
+ *				| AGP48 direct
+ *				| Space managed by this driver
+ *	CA_AGP_DIRECT_BASE	+--------
+ *				| AGP GART mapped (gfx aperature)
+ *				| Space managed by /dev/agpgart driver
+ *				| This range is exposed to the agpgart
+ * 				| driver as the "graphics aperature"
+ *	CA_AGP_MAPPED_BASE	+-----
+ *				| PCI GART mapped
+ *				| Space managed by this driver		
+ *	CA_PCI32_MAPPED_BASE	+----
+ *				| PCI32 direct
+ *				| Space managed by this driver
+ *	0xC000_0000		+--------
+ *	(CA_PCI32_DIRECT_BASE)
+ *
+ * The bus address range CA_PCI32_MAPPED_BASE through CA_AGP_DIRECT_BASE
+ * is what we call the CA aperature.  Addresses falling in this range will
+ * be remapped using the GART.
+ *
+ * The bus address range CA_AGP_MAPPED_BASE through CA_AGP_DIRECT_BASE
+ * is what we call the graphics aperature.  This is a subset of the CA
+ * aperature and is under the control of the agpgart_be driver.
+ *
+ * CA_PCI32_MAPPED_BASE, CA_AGP_MAPPED_BASE, and CA_AGP_DIRECT_BASE are
+ * somewhat arbitrary values.  The known constraints on choosing these is:
+ *
+ * 1)  CA_AGP_DIRECT_BASE-CA_PCI32_MAPPED_BASE+1 (the CA aperature size)
+ *     must be one of the values supported by the ca_gart_aperature register.
+ *     Currently valid values are: 4MB through 4096MB in powers of 2 increments
+ *
+ * 2)  CA_AGP_DIRECT_BASE-CA_AGP_MAPPED_BASE+1 (the gfx aperature size)
+ *     must be in MB units since that's what the agpgart driver assumes.
+ */
+
+/*
+ * Define Bus DMA ranges.  These are configurable (see constraints above)
+ * and will probably need tuning based on experience.
+ */
+
+
+/*
+ * 11/24/03
+ * CA has an addressing glitch w.r.t. PCI direct 32 bit DMA that makes it
+ * generally unusable.  The problem is that for PCI direct 32 
+ * DMA's, all 32 bits of the bus address are used to form the lower 32 bits
+ * of the coretalk address, and coretalk bits 38:32 come from a register.
+ * Since only PCI bus addresses 0xC0000000-0xFFFFFFFF (1GB) are available
+ * for DMA (the rest is allocated to PIO), host node addresses need to be
+ * such that their lower 32 bits fall in the 0xC0000000-0xffffffff range
+ * as well.  So there can be no PCI32 direct DMA below 3GB!!  For this
+ * reason we set the CA_PCI32_DIRECT_SIZE to 0 which essentially makes
+ * tioca_dma_direct32() a noop but preserves the code flow should this issue
+ * be fixed in a respin.
+ *
+ * For now, all PCI32 DMA's must be mapped through the GART.
+ */
+
+#define CA_PCI32_DIRECT_BASE	0xC0000000UL	/* BASE not configurable */
+#define CA_PCI32_DIRECT_SIZE	0x00000000UL	/* 0 MB */
+
+#define CA_PCI32_MAPPED_BASE	0xC0000000UL
+#define CA_PCI32_MAPPED_SIZE	0x40000000UL	/* 2GB */
+
+#define CA_AGP_MAPPED_BASE	0x80000000UL
+#define CA_AGP_MAPPED_SIZE	0x40000000UL	/* 2GB */
+
+#define CA_AGP_DIRECT_BASE	0x40000000UL	/* 2GB */
+#define CA_AGP_DIRECT_SIZE	0x40000000UL
+
+#define CA_APERATURE_BASE	(CA_AGP_MAPPED_BASE)
+#define CA_APERATURE_SIZE	(CA_AGP_MAPPED_SIZE+CA_PCI32_MAPPED_SIZE)
+
+#endif  /* _ASM_IA64_SN_TIO_TIOCA_H */
diff --git a/arch/ia64/include/asm/sn/tioca_provider.h b/arch/ia64/include/asm/sn/tioca_provider.h
new file mode 100644
index 00000000..9a820ac6
--- /dev/null
+++ b/arch/ia64/include/asm/sn/tioca_provider.h
@@ -0,0 +1,207 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2003-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_TIO_CA_AGP_PROVIDER_H
+#define _ASM_IA64_SN_TIO_CA_AGP_PROVIDER_H
+
+#include <asm/sn/tioca.h>
+
+/*
+ * WAR enables
+ * Defines for individual WARs. Each is a bitmask of applicable
+ * part revision numbers. (1 << 1) == rev A, (1 << 2) == rev B,
+ * (3 << 1) == (rev A or rev B), etc
+ */
+
+#define TIOCA_WAR_ENABLED(pv, tioca_common) \
+	((1 << tioca_common->ca_rev) & pv)
+
+  /* TIO:ICE:FRZ:Freezer loses a PIO data ucred on PIO RD RSP with CW error */
+#define PV907908 (1 << 1)
+  /* ATI config space problems after BIOS execution starts */
+#define PV908234 (1 << 1)
+  /* CA:AGPDMA write request data mismatch with ABC1CL merge */
+#define PV895469 (1 << 1)
+  /* TIO:CA TLB invalidate of written GART entries possibly not occurring in CA*/
+#define PV910244 (1 << 1)
+
+struct tioca_dmamap{
+	struct list_head	cad_list;	/* headed by ca_list */
+
+	dma_addr_t		cad_dma_addr;	/* Linux dma handle */
+	uint			cad_gart_entry; /* start entry in ca_gart_pagemap */
+	uint			cad_gart_size;	/* #entries for this map */
+};
+
+/*
+ * Kernel only fields.  Prom may look at this stuff for debugging only.
+ * Access this structure through the ca_kernel_private ptr.
+ */
+
+struct tioca_common ;
+
+struct tioca_kernel {
+	struct tioca_common	*ca_common;	/* tioca this belongs to */
+	struct list_head	ca_list;	/* list of all ca's */
+	struct list_head	ca_dmamaps;
+	spinlock_t		ca_lock;	/* Kernel lock */
+	cnodeid_t		ca_closest_node;
+	struct list_head	*ca_devices;	/* bus->devices */
+
+	/*
+	 * General GART stuff
+	 */
+	u64	ca_ap_size;		/* size of aperature in bytes */
+	u32	ca_gart_entries;	/* # u64 entries in gart */
+	u32	ca_ap_pagesize; 	/* aperature page size in bytes */
+	u64	ca_ap_bus_base; 	/* bus address of CA aperature */
+	u64	ca_gart_size;		/* gart size in bytes */
+	u64	*ca_gart;		/* gart table vaddr */
+	u64	ca_gart_coretalk_addr;	/* gart coretalk addr */
+	u8		ca_gart_iscoherent;	/* used in tioca_tlbflush */
+
+	/* PCI GART convenience values */
+	u64	ca_pciap_base;		/* pci aperature bus base address */
+	u64	ca_pciap_size;		/* pci aperature size (bytes) */
+	u64	ca_pcigart_base;	/* gfx GART bus base address */
+	u64	*ca_pcigart;		/* gfx GART vm address */
+	u32	ca_pcigart_entries;
+	u32	ca_pcigart_start;	/* PCI start index in ca_gart */
+	void		*ca_pcigart_pagemap;
+
+	/* AGP GART convenience values */
+	u64	ca_gfxap_base;		/* gfx aperature bus base address */
+	u64	ca_gfxap_size;		/* gfx aperature size (bytes) */
+	u64	ca_gfxgart_base;	/* gfx GART bus base address */
+	u64	*ca_gfxgart;		/* gfx GART vm address */
+	u32	ca_gfxgart_entries;
+	u32	ca_gfxgart_start;	/* agpgart start index in ca_gart */
+};
+
+/*
+ * Common tioca info shared between kernel and prom
+ *
+ * DO NOT CHANGE THIS STRUCT WITHOUT MAKING CORRESPONDING CHANGES
+ * TO THE PROM VERSION.
+ */
+
+struct tioca_common {
+	struct pcibus_bussoft	ca_common;	/* common pciio header */
+
+	u32		ca_rev;
+	u32		ca_closest_nasid;
+
+	u64		ca_prom_private;
+	u64		ca_kernel_private;
+};
+
+/**
+ * tioca_paddr_to_gart - Convert an SGI coretalk address to a CA GART entry
+ * @paddr: page address to convert
+ *
+ * Convert a system [coretalk] address to a GART entry.  GART entries are
+ * formed using the following:
+ *
+ *     data = ( (1<<63) |  ( (REMAP_NODE_ID << 40) | (MD_CHIPLET_ID << 38) | 
+ * (REMAP_SYS_ADDR) ) >> 12 )
+ *
+ * DATA written to 1 GART TABLE Entry in system memory is remapped system
+ * addr for 1 page 
+ *
+ * The data is for coretalk address format right shifted 12 bits with a
+ * valid bit.
+ *
+ *	GART_TABLE_ENTRY [ 25:0 ]  -- REMAP_SYS_ADDRESS[37:12].
+ *	GART_TABLE_ENTRY [ 27:26 ] -- SHUB MD chiplet id.
+ *	GART_TABLE_ENTRY [ 41:28 ] -- REMAP_NODE_ID.
+ *	GART_TABLE_ENTRY [ 63 ]    -- Valid Bit 
+ */
+static inline u64
+tioca_paddr_to_gart(unsigned long paddr)
+{
+	/*
+	 * We are assuming right now that paddr already has the correct
+	 * format since the address from xtalk_dmaXXX should already have
+	 * NODE_ID, CHIPLET_ID, and SYS_ADDR in the correct locations.
+	 */
+
+	return ((paddr) >> 12) | (1UL << 63);
+}
+
+/**
+ * tioca_physpage_to_gart - Map a host physical page for SGI CA based DMA
+ * @page_addr: system page address to map
+ */
+
+static inline unsigned long
+tioca_physpage_to_gart(u64 page_addr)
+{
+	u64 coretalk_addr;
+
+	coretalk_addr = PHYS_TO_TIODMA(page_addr);
+	if (!coretalk_addr) {
+		return 0;
+	}
+
+	return tioca_paddr_to_gart(coretalk_addr);
+}
+
+/**
+ * tioca_tlbflush - invalidate cached SGI CA GART TLB entries
+ * @tioca_kernel: CA context 
+ *
+ * Invalidate tlb entries for a given CA GART.  Main complexity is to account
+ * for revA bug.
+ */
+static inline void
+tioca_tlbflush(struct tioca_kernel *tioca_kernel)
+{
+	volatile u64 tmp;
+	volatile struct tioca __iomem *ca_base;
+	struct tioca_common *tioca_common;
+
+	tioca_common = tioca_kernel->ca_common;
+	ca_base = (struct tioca __iomem *)tioca_common->ca_common.bs_base;
+
+	/*
+	 * Explicit flushes not needed if GART is in cached mode
+	 */
+	if (tioca_kernel->ca_gart_iscoherent) {
+		if (TIOCA_WAR_ENABLED(PV910244, tioca_common)) {
+			/*
+			 * PV910244:  RevA CA needs explicit flushes.
+			 * Need to put GART into uncached mode before
+			 * flushing otherwise the explicit flush is ignored.
+			 *
+			 * Alternate WAR would be to leave GART cached and
+			 * touch every CL aligned GART entry.
+			 */
+
+			__sn_clrq_relaxed(&ca_base->ca_control2, CA_GART_MEM_PARAM);
+			__sn_setq_relaxed(&ca_base->ca_control2, CA_GART_FLUSH_TLB);
+			__sn_setq_relaxed(&ca_base->ca_control2,
+			    (0x2ull << CA_GART_MEM_PARAM_SHFT));
+			tmp = __sn_readq_relaxed(&ca_base->ca_control2);
+		}
+
+		return;
+	}
+
+	/*
+	 * Gart in uncached mode ... need an explicit flush.
+	 */
+
+	__sn_setq_relaxed(&ca_base->ca_control2, CA_GART_FLUSH_TLB);
+	tmp = __sn_readq_relaxed(&ca_base->ca_control2);
+}
+
+extern u32	tioca_gart_found;
+extern struct list_head tioca_list;
+extern int tioca_init_provider(void);
+extern void tioca_fastwrite_enable(struct tioca_kernel *tioca_kern);
+#endif /* _ASM_IA64_SN_TIO_CA_AGP_PROVIDER_H */
diff --git a/arch/ia64/include/asm/sn/tioce.h b/arch/ia64/include/asm/sn/tioce.h
new file mode 100644
index 00000000..893468e1
--- /dev/null
+++ b/arch/ia64/include/asm/sn/tioce.h
@@ -0,0 +1,760 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2003-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef __ASM_IA64_SN_TIOCE_H__
+#define __ASM_IA64_SN_TIOCE_H__
+
+/* CE ASIC part & mfgr information  */
+#define TIOCE_PART_NUM			0xCE00
+#define TIOCE_SRC_ID			0x01
+#define TIOCE_REV_A			0x1
+
+/* CE Virtual PPB Vendor/Device IDs */
+#define CE_VIRT_PPB_VENDOR_ID		0x10a9
+#define CE_VIRT_PPB_DEVICE_ID		0x4002
+
+/* CE Host Bridge Vendor/Device IDs */
+#define CE_HOST_BRIDGE_VENDOR_ID	0x10a9
+#define CE_HOST_BRIDGE_DEVICE_ID	0x4001
+
+
+#define TIOCE_NUM_M40_ATES		4096
+#define TIOCE_NUM_M3240_ATES		2048
+#define TIOCE_NUM_PORTS			2
+
+/*
+ * Register layout for TIOCE.  MMR offsets are shown at the far right of the
+ * structure definition.
+ */
+typedef volatile struct tioce {
+	/*
+	 * ADMIN : Administration Registers
+	 */
+	u64	ce_adm_id;				/* 0x000000 */
+	u64	ce_pad_000008;				/* 0x000008 */
+	u64	ce_adm_dyn_credit_status;		/* 0x000010 */
+	u64	ce_adm_last_credit_status;		/* 0x000018 */
+	u64	ce_adm_credit_limit;			/* 0x000020 */
+	u64	ce_adm_force_credit;			/* 0x000028 */
+	u64	ce_adm_control;				/* 0x000030 */
+	u64	ce_adm_mmr_chn_timeout;			/* 0x000038 */
+	u64	ce_adm_ssp_ure_timeout;			/* 0x000040 */
+	u64	ce_adm_ssp_dre_timeout;			/* 0x000048 */
+	u64	ce_adm_ssp_debug_sel;			/* 0x000050 */
+	u64	ce_adm_int_status;			/* 0x000058 */
+	u64	ce_adm_int_status_alias;		/* 0x000060 */
+	u64	ce_adm_int_mask;			/* 0x000068 */
+	u64	ce_adm_int_pending;			/* 0x000070 */
+	u64	ce_adm_force_int;			/* 0x000078 */
+	u64	ce_adm_ure_ups_buf_barrier_flush;	/* 0x000080 */
+	u64	ce_adm_int_dest[15];	    /* 0x000088 -- 0x0000F8 */
+	u64	ce_adm_error_summary;			/* 0x000100 */
+	u64	ce_adm_error_summary_alias;		/* 0x000108 */
+	u64	ce_adm_error_mask;			/* 0x000110 */
+	u64	ce_adm_first_error;			/* 0x000118 */
+	u64	ce_adm_error_overflow;			/* 0x000120 */
+	u64	ce_adm_error_overflow_alias;		/* 0x000128 */
+	u64	ce_pad_000130[2];	    /* 0x000130 -- 0x000138 */
+	u64	ce_adm_tnum_error;			/* 0x000140 */
+	u64	ce_adm_mmr_err_detail;			/* 0x000148 */
+	u64	ce_adm_msg_sram_perr_detail;		/* 0x000150 */
+	u64	ce_adm_bap_sram_perr_detail;		/* 0x000158 */
+	u64	ce_adm_ce_sram_perr_detail;		/* 0x000160 */
+	u64	ce_adm_ce_credit_oflow_detail;		/* 0x000168 */
+	u64	ce_adm_tx_link_idle_max_timer;		/* 0x000170 */
+	u64	ce_adm_pcie_debug_sel;			/* 0x000178 */
+	u64	ce_pad_000180[16];	    /* 0x000180 -- 0x0001F8 */
+
+	u64	ce_adm_pcie_debug_sel_top;		/* 0x000200 */
+	u64	ce_adm_pcie_debug_lat_sel_lo_top;	/* 0x000208 */
+	u64	ce_adm_pcie_debug_lat_sel_hi_top;	/* 0x000210 */
+	u64	ce_adm_pcie_debug_trig_sel_top;		/* 0x000218 */
+	u64	ce_adm_pcie_debug_trig_lat_sel_lo_top;	/* 0x000220 */
+	u64	ce_adm_pcie_debug_trig_lat_sel_hi_top;	/* 0x000228 */
+	u64	ce_adm_pcie_trig_compare_top;		/* 0x000230 */
+	u64	ce_adm_pcie_trig_compare_en_top;	/* 0x000238 */
+	u64	ce_adm_ssp_debug_sel_top;		/* 0x000240 */
+	u64	ce_adm_ssp_debug_lat_sel_lo_top;	/* 0x000248 */
+	u64	ce_adm_ssp_debug_lat_sel_hi_top;	/* 0x000250 */
+	u64	ce_adm_ssp_debug_trig_sel_top;		/* 0x000258 */
+	u64	ce_adm_ssp_debug_trig_lat_sel_lo_top;	/* 0x000260 */
+	u64	ce_adm_ssp_debug_trig_lat_sel_hi_top;	/* 0x000268 */
+	u64	ce_adm_ssp_trig_compare_top;		/* 0x000270 */
+	u64	ce_adm_ssp_trig_compare_en_top;		/* 0x000278 */
+	u64	ce_pad_000280[48];	    /* 0x000280 -- 0x0003F8 */
+
+	u64	ce_adm_bap_ctrl;			/* 0x000400 */
+	u64	ce_pad_000408[127];	    /* 0x000408 -- 0x0007F8 */
+
+	u64	ce_msg_buf_data63_0[35];    /* 0x000800 -- 0x000918 */
+	u64	ce_pad_000920[29];	    /* 0x000920 -- 0x0009F8 */
+
+	u64	ce_msg_buf_data127_64[35];  /* 0x000A00 -- 0x000B18 */
+	u64	ce_pad_000B20[29];	    /* 0x000B20 -- 0x000BF8 */
+
+	u64	ce_msg_buf_parity[35];	    /* 0x000C00 -- 0x000D18 */
+	u64	ce_pad_000D20[29];	    /* 0x000D20 -- 0x000DF8 */
+
+	u64	ce_pad_000E00[576];	    /* 0x000E00 -- 0x001FF8 */
+
+	/*
+	 * LSI : LSI's PCI Express Link Registers (Link#1 and Link#2)
+	 * Link#1 MMRs at start at 0x002000, Link#2 MMRs at 0x003000
+	 * NOTE: the comment offsets at far right: let 'z' = {2 or 3}
+	 */
+	#define ce_lsi(link_num)	ce_lsi[link_num-1]
+	struct ce_lsi_reg {
+		u64	ce_lsi_lpu_id;			/* 0x00z000 */
+		u64	ce_lsi_rst;			/* 0x00z008 */
+		u64	ce_lsi_dbg_stat;		/* 0x00z010 */
+		u64	ce_lsi_dbg_cfg;			/* 0x00z018 */
+		u64	ce_lsi_ltssm_ctrl;		/* 0x00z020 */
+		u64	ce_lsi_lk_stat;			/* 0x00z028 */
+		u64	ce_pad_00z030[2];   /* 0x00z030 -- 0x00z038 */
+		u64	ce_lsi_int_and_stat;		/* 0x00z040 */
+		u64	ce_lsi_int_mask;		/* 0x00z048 */
+		u64	ce_pad_00z050[22];  /* 0x00z050 -- 0x00z0F8 */
+		u64	ce_lsi_lk_perf_cnt_sel;		/* 0x00z100 */
+		u64	ce_pad_00z108;			/* 0x00z108 */
+		u64	ce_lsi_lk_perf_cnt_ctrl;	/* 0x00z110 */
+		u64	ce_pad_00z118;			/* 0x00z118 */
+		u64	ce_lsi_lk_perf_cnt1;		/* 0x00z120 */
+		u64	ce_lsi_lk_perf_cnt1_test;	/* 0x00z128 */
+		u64	ce_lsi_lk_perf_cnt2;		/* 0x00z130 */
+		u64	ce_lsi_lk_perf_cnt2_test;	/* 0x00z138 */
+		u64	ce_pad_00z140[24];  /* 0x00z140 -- 0x00z1F8 */
+		u64	ce_lsi_lk_lyr_cfg;		/* 0x00z200 */
+		u64	ce_lsi_lk_lyr_status;		/* 0x00z208 */
+		u64	ce_lsi_lk_lyr_int_stat;		/* 0x00z210 */
+		u64	ce_lsi_lk_ly_int_stat_test;	/* 0x00z218 */
+		u64	ce_lsi_lk_ly_int_stat_mask;	/* 0x00z220 */
+		u64	ce_pad_00z228[3];   /* 0x00z228 -- 0x00z238 */
+		u64	ce_lsi_fc_upd_ctl;		/* 0x00z240 */
+		u64	ce_pad_00z248[3];   /* 0x00z248 -- 0x00z258 */
+		u64	ce_lsi_flw_ctl_upd_to_timer;	/* 0x00z260 */
+		u64	ce_lsi_flw_ctl_upd_timer0;	/* 0x00z268 */
+		u64	ce_lsi_flw_ctl_upd_timer1;	/* 0x00z270 */
+		u64	ce_pad_00z278[49];  /* 0x00z278 -- 0x00z3F8 */
+		u64	ce_lsi_freq_nak_lat_thrsh;	/* 0x00z400 */
+		u64	ce_lsi_ack_nak_lat_tmr;		/* 0x00z408 */
+		u64	ce_lsi_rply_tmr_thr;		/* 0x00z410 */
+		u64	ce_lsi_rply_tmr;		/* 0x00z418 */
+		u64	ce_lsi_rply_num_stat;		/* 0x00z420 */
+		u64	ce_lsi_rty_buf_max_addr;	/* 0x00z428 */
+		u64	ce_lsi_rty_fifo_ptr;		/* 0x00z430 */
+		u64	ce_lsi_rty_fifo_rd_wr_ptr;	/* 0x00z438 */
+		u64	ce_lsi_rty_fifo_cred;		/* 0x00z440 */
+		u64	ce_lsi_seq_cnt;			/* 0x00z448 */
+		u64	ce_lsi_ack_sent_seq_num;	/* 0x00z450 */
+		u64	ce_lsi_seq_cnt_fifo_max_addr;	/* 0x00z458 */
+		u64	ce_lsi_seq_cnt_fifo_ptr;	/* 0x00z460 */
+		u64	ce_lsi_seq_cnt_rd_wr_ptr;	/* 0x00z468 */
+		u64	ce_lsi_tx_lk_ts_ctl;		/* 0x00z470 */
+		u64	ce_pad_00z478;			/* 0x00z478 */
+		u64	ce_lsi_mem_addr_ctl;		/* 0x00z480 */
+		u64	ce_lsi_mem_d_ld0;		/* 0x00z488 */
+		u64	ce_lsi_mem_d_ld1;		/* 0x00z490 */
+		u64	ce_lsi_mem_d_ld2;		/* 0x00z498 */
+		u64	ce_lsi_mem_d_ld3;		/* 0x00z4A0 */
+		u64	ce_lsi_mem_d_ld4;		/* 0x00z4A8 */
+		u64	ce_pad_00z4B0[2];   /* 0x00z4B0 -- 0x00z4B8 */
+		u64	ce_lsi_rty_d_cnt;		/* 0x00z4C0 */
+		u64	ce_lsi_seq_buf_cnt;		/* 0x00z4C8 */
+		u64	ce_lsi_seq_buf_bt_d;		/* 0x00z4D0 */
+		u64	ce_pad_00z4D8;			/* 0x00z4D8 */
+		u64	ce_lsi_ack_lat_thr;		/* 0x00z4E0 */
+		u64	ce_pad_00z4E8[3];   /* 0x00z4E8 -- 0x00z4F8 */
+		u64	ce_lsi_nxt_rcv_seq_1_cntr;	/* 0x00z500 */
+		u64	ce_lsi_unsp_dllp_rcvd;		/* 0x00z508 */
+		u64	ce_lsi_rcv_lk_ts_ctl;		/* 0x00z510 */
+		u64	ce_pad_00z518[29];  /* 0x00z518 -- 0x00z5F8 */
+		u64	ce_lsi_phy_lyr_cfg;		/* 0x00z600 */
+		u64	ce_pad_00z608;			/* 0x00z608 */
+		u64	ce_lsi_phy_lyr_int_stat;	/* 0x00z610 */
+		u64	ce_lsi_phy_lyr_int_stat_test;	/* 0x00z618 */
+		u64	ce_lsi_phy_lyr_int_mask;	/* 0x00z620 */
+		u64	ce_pad_00z628[11];  /* 0x00z628 -- 0x00z678 */
+		u64	ce_lsi_rcv_phy_cfg;		/* 0x00z680 */
+		u64	ce_lsi_rcv_phy_stat1;		/* 0x00z688 */
+		u64	ce_lsi_rcv_phy_stat2;		/* 0x00z690 */
+		u64	ce_lsi_rcv_phy_stat3;		/* 0x00z698 */
+		u64	ce_lsi_rcv_phy_int_stat;	/* 0x00z6A0 */
+		u64	ce_lsi_rcv_phy_int_stat_test;	/* 0x00z6A8 */
+		u64	ce_lsi_rcv_phy_int_mask;	/* 0x00z6B0 */
+		u64	ce_pad_00z6B8[9];   /* 0x00z6B8 -- 0x00z6F8 */
+		u64	ce_lsi_tx_phy_cfg;		/* 0x00z700 */
+		u64	ce_lsi_tx_phy_stat;		/* 0x00z708 */
+		u64	ce_lsi_tx_phy_int_stat;		/* 0x00z710 */
+		u64	ce_lsi_tx_phy_int_stat_test;	/* 0x00z718 */
+		u64	ce_lsi_tx_phy_int_mask;		/* 0x00z720 */
+		u64	ce_lsi_tx_phy_stat2;		/* 0x00z728 */
+		u64	ce_pad_00z730[10];  /* 0x00z730 -- 0x00z77F */
+		u64	ce_lsi_ltssm_cfg1;		/* 0x00z780 */
+		u64	ce_lsi_ltssm_cfg2;		/* 0x00z788 */
+		u64	ce_lsi_ltssm_cfg3;		/* 0x00z790 */
+		u64	ce_lsi_ltssm_cfg4;		/* 0x00z798 */
+		u64	ce_lsi_ltssm_cfg5;		/* 0x00z7A0 */
+		u64	ce_lsi_ltssm_stat1;		/* 0x00z7A8 */
+		u64	ce_lsi_ltssm_stat2;		/* 0x00z7B0 */
+		u64	ce_lsi_ltssm_int_stat;		/* 0x00z7B8 */
+		u64	ce_lsi_ltssm_int_stat_test;	/* 0x00z7C0 */
+		u64	ce_lsi_ltssm_int_mask;		/* 0x00z7C8 */
+		u64	ce_lsi_ltssm_stat_wr_en;	/* 0x00z7D0 */
+		u64	ce_pad_00z7D8[5];   /* 0x00z7D8 -- 0x00z7F8 */
+		u64	ce_lsi_gb_cfg1;			/* 0x00z800 */
+		u64	ce_lsi_gb_cfg2;			/* 0x00z808 */
+		u64	ce_lsi_gb_cfg3;			/* 0x00z810 */
+		u64	ce_lsi_gb_cfg4;			/* 0x00z818 */
+		u64	ce_lsi_gb_stat;			/* 0x00z820 */
+		u64	ce_lsi_gb_int_stat;		/* 0x00z828 */
+		u64	ce_lsi_gb_int_stat_test;	/* 0x00z830 */
+		u64	ce_lsi_gb_int_mask;		/* 0x00z838 */
+		u64	ce_lsi_gb_pwr_dn1;		/* 0x00z840 */
+		u64	ce_lsi_gb_pwr_dn2;		/* 0x00z848 */
+		u64	ce_pad_00z850[246]; /* 0x00z850 -- 0x00zFF8 */
+	} ce_lsi[2];
+
+	u64	ce_pad_004000[10];	    /* 0x004000 -- 0x004048 */
+
+	/*
+	 * CRM: Coretalk Receive Module Registers
+	 */
+	u64	ce_crm_debug_mux;			/* 0x004050 */
+	u64	ce_pad_004058;				/* 0x004058 */
+	u64	ce_crm_ssp_err_cmd_wrd;			/* 0x004060 */
+	u64	ce_crm_ssp_err_addr;			/* 0x004068 */
+	u64	ce_crm_ssp_err_syn;			/* 0x004070 */
+
+	u64	ce_pad_004078[499];	    /* 0x004078 -- 0x005008 */
+
+	/*
+         * CXM: Coretalk Xmit Module Registers
+         */
+	u64	ce_cxm_dyn_credit_status;		/* 0x005010 */
+	u64	ce_cxm_last_credit_status;		/* 0x005018 */
+	u64	ce_cxm_credit_limit;			/* 0x005020 */
+	u64	ce_cxm_force_credit;			/* 0x005028 */
+	u64	ce_cxm_disable_bypass;			/* 0x005030 */
+	u64	ce_pad_005038[3];	    /* 0x005038 -- 0x005048 */
+	u64	ce_cxm_debug_mux;			/* 0x005050 */
+
+        u64        ce_pad_005058[501];         /* 0x005058 -- 0x005FF8 */
+
+	/*
+	 * DTL: Downstream Transaction Layer Regs (Link#1 and Link#2)
+	 * DTL: Link#1 MMRs at start at 0x006000, Link#2 MMRs at 0x008000
+	 * DTL: the comment offsets at far right: let 'y' = {6 or 8}
+	 *
+	 * UTL: Downstream Transaction Layer Regs (Link#1 and Link#2)
+	 * UTL: Link#1 MMRs at start at 0x007000, Link#2 MMRs at 0x009000
+	 * UTL: the comment offsets at far right: let 'z' = {7 or 9}
+	 */
+	#define ce_dtl(link_num)	ce_dtl_utl[link_num-1]
+	#define ce_utl(link_num)	ce_dtl_utl[link_num-1]
+	struct ce_dtl_utl_reg {
+		/* DTL */
+		u64	ce_dtl_dtdr_credit_limit;	/* 0x00y000 */
+		u64	ce_dtl_dtdr_credit_force;	/* 0x00y008 */
+		u64	ce_dtl_dyn_credit_status;	/* 0x00y010 */
+		u64	ce_dtl_dtl_last_credit_stat;	/* 0x00y018 */
+		u64	ce_dtl_dtl_ctrl;		/* 0x00y020 */
+		u64	ce_pad_00y028[5];   /* 0x00y028 -- 0x00y048 */
+		u64	ce_dtl_debug_sel;		/* 0x00y050 */
+		u64	ce_pad_00y058[501]; /* 0x00y058 -- 0x00yFF8 */
+
+		/* UTL */
+		u64	ce_utl_utl_ctrl;		/* 0x00z000 */
+		u64	ce_utl_debug_sel;		/* 0x00z008 */
+		u64	ce_pad_00z010[510]; /* 0x00z010 -- 0x00zFF8 */
+	} ce_dtl_utl[2];
+
+	u64	ce_pad_00A000[514];	    /* 0x00A000 -- 0x00B008 */
+
+	/*
+	 * URE: Upstream Request Engine
+         */
+	u64	ce_ure_dyn_credit_status;		/* 0x00B010 */
+	u64	ce_ure_last_credit_status;		/* 0x00B018 */
+	u64	ce_ure_credit_limit;			/* 0x00B020 */
+	u64	ce_pad_00B028;				/* 0x00B028 */
+	u64	ce_ure_control;				/* 0x00B030 */
+	u64	ce_ure_status;				/* 0x00B038 */
+	u64	ce_pad_00B040[2];	    /* 0x00B040 -- 0x00B048 */
+	u64	ce_ure_debug_sel;			/* 0x00B050 */
+	u64	ce_ure_pcie_debug_sel;			/* 0x00B058 */
+	u64	ce_ure_ssp_err_cmd_wrd;			/* 0x00B060 */
+	u64	ce_ure_ssp_err_addr;			/* 0x00B068 */
+	u64	ce_ure_page_map;			/* 0x00B070 */
+	u64	ce_ure_dir_map[TIOCE_NUM_PORTS];	/* 0x00B078 */
+	u64	ce_ure_pipe_sel1;			/* 0x00B088 */
+	u64	ce_ure_pipe_mask1;			/* 0x00B090 */
+	u64	ce_ure_pipe_sel2;			/* 0x00B098 */
+	u64	ce_ure_pipe_mask2;			/* 0x00B0A0 */
+	u64	ce_ure_pcie1_credits_sent;		/* 0x00B0A8 */
+	u64	ce_ure_pcie1_credits_used;		/* 0x00B0B0 */
+	u64	ce_ure_pcie1_credit_limit;		/* 0x00B0B8 */
+	u64	ce_ure_pcie2_credits_sent;		/* 0x00B0C0 */
+	u64	ce_ure_pcie2_credits_used;		/* 0x00B0C8 */
+	u64	ce_ure_pcie2_credit_limit;		/* 0x00B0D0 */
+	u64	ce_ure_pcie_force_credit;		/* 0x00B0D8 */
+	u64	ce_ure_rd_tnum_val;			/* 0x00B0E0 */
+	u64	ce_ure_rd_tnum_rsp_rcvd;		/* 0x00B0E8 */
+	u64	ce_ure_rd_tnum_esent_timer;		/* 0x00B0F0 */
+	u64	ce_ure_rd_tnum_error;			/* 0x00B0F8 */
+	u64	ce_ure_rd_tnum_first_cl;		/* 0x00B100 */
+	u64	ce_ure_rd_tnum_link_buf;		/* 0x00B108 */
+	u64	ce_ure_wr_tnum_val;			/* 0x00B110 */
+	u64	ce_ure_sram_err_addr0;			/* 0x00B118 */
+	u64	ce_ure_sram_err_addr1;			/* 0x00B120 */
+	u64	ce_ure_sram_err_addr2;			/* 0x00B128 */
+	u64	ce_ure_sram_rd_addr0;			/* 0x00B130 */
+	u64	ce_ure_sram_rd_addr1;			/* 0x00B138 */
+	u64	ce_ure_sram_rd_addr2;			/* 0x00B140 */
+	u64	ce_ure_sram_wr_addr0;			/* 0x00B148 */
+	u64	ce_ure_sram_wr_addr1;			/* 0x00B150 */
+	u64	ce_ure_sram_wr_addr2;			/* 0x00B158 */
+	u64	ce_ure_buf_flush10;			/* 0x00B160 */
+	u64	ce_ure_buf_flush11;			/* 0x00B168 */
+	u64	ce_ure_buf_flush12;			/* 0x00B170 */
+	u64	ce_ure_buf_flush13;			/* 0x00B178 */
+	u64	ce_ure_buf_flush20;			/* 0x00B180 */
+	u64	ce_ure_buf_flush21;			/* 0x00B188 */
+	u64	ce_ure_buf_flush22;			/* 0x00B190 */
+	u64	ce_ure_buf_flush23;			/* 0x00B198 */
+	u64	ce_ure_pcie_control1;			/* 0x00B1A0 */
+	u64	ce_ure_pcie_control2;			/* 0x00B1A8 */
+
+	u64	ce_pad_00B1B0[458];	    /* 0x00B1B0 -- 0x00BFF8 */
+
+	/* Upstream Data Buffer, Port1 */
+	struct ce_ure_maint_ups_dat1_data {
+		u64	data63_0[512];	    /* 0x00C000 -- 0x00CFF8 */
+		u64	data127_64[512];    /* 0x00D000 -- 0x00DFF8 */
+		u64	parity[512];	    /* 0x00E000 -- 0x00EFF8 */
+	} ce_ure_maint_ups_dat1;
+
+	/* Upstream Header Buffer, Port1 */
+	struct ce_ure_maint_ups_hdr1_data {
+		u64	data63_0[512];	    /* 0x00F000 -- 0x00FFF8 */
+		u64	data127_64[512];    /* 0x010000 -- 0x010FF8 */
+		u64	parity[512];	    /* 0x011000 -- 0x011FF8 */
+	} ce_ure_maint_ups_hdr1;
+
+	/* Upstream Data Buffer, Port2 */
+	struct ce_ure_maint_ups_dat2_data {
+		u64	data63_0[512];	    /* 0x012000 -- 0x012FF8 */
+		u64	data127_64[512];    /* 0x013000 -- 0x013FF8 */
+		u64	parity[512];	    /* 0x014000 -- 0x014FF8 */
+	} ce_ure_maint_ups_dat2;
+
+	/* Upstream Header Buffer, Port2 */
+	struct ce_ure_maint_ups_hdr2_data {
+		u64	data63_0[512];	    /* 0x015000 -- 0x015FF8 */
+		u64	data127_64[512];    /* 0x016000 -- 0x016FF8 */
+		u64	parity[512];	    /* 0x017000 -- 0x017FF8 */
+	} ce_ure_maint_ups_hdr2;
+
+	/* Downstream Data Buffer */
+	struct ce_ure_maint_dns_dat_data {
+		u64	data63_0[512];	    /* 0x018000 -- 0x018FF8 */
+		u64	data127_64[512];    /* 0x019000 -- 0x019FF8 */
+		u64	parity[512];	    /* 0x01A000 -- 0x01AFF8 */
+	} ce_ure_maint_dns_dat;
+
+	/* Downstream Header Buffer */
+	struct	ce_ure_maint_dns_hdr_data {
+		u64	data31_0[64];	    /* 0x01B000 -- 0x01B1F8 */
+		u64	data95_32[64];	    /* 0x01B200 -- 0x01B3F8 */
+		u64	parity[64];	    /* 0x01B400 -- 0x01B5F8 */
+	} ce_ure_maint_dns_hdr;
+
+	/* RCI Buffer Data */
+	struct	ce_ure_maint_rci_data {
+		u64	data41_0[64];	    /* 0x01B600 -- 0x01B7F8 */
+		u64	data69_42[64];	    /* 0x01B800 -- 0x01B9F8 */
+	} ce_ure_maint_rci;
+
+	/* Response Queue */
+	u64	ce_ure_maint_rspq[64];	    /* 0x01BA00 -- 0x01BBF8 */
+
+	u64	ce_pad_01C000[4224];	    /* 0x01BC00 -- 0x023FF8 */
+
+	/* Admin Build-a-Packet Buffer */
+	struct	ce_adm_maint_bap_buf_data {
+		u64	data63_0[258];	    /* 0x024000 -- 0x024808 */
+		u64	data127_64[258];    /* 0x024810 -- 0x025018 */
+		u64	parity[258];	    /* 0x025020 -- 0x025828 */
+	} ce_adm_maint_bap_buf;
+
+	u64	ce_pad_025830[5370];	    /* 0x025830 -- 0x02FFF8 */
+
+	/* URE: 40bit PMU ATE Buffer */		    /* 0x030000 -- 0x037FF8 */
+	u64	ce_ure_ate40[TIOCE_NUM_M40_ATES];
+
+	/* URE: 32/40bit PMU ATE Buffer */	    /* 0x038000 -- 0x03BFF8 */
+	u64	ce_ure_ate3240[TIOCE_NUM_M3240_ATES];
+
+	u64	ce_pad_03C000[2050];	    /* 0x03C000 -- 0x040008 */
+
+	/*
+	 * DRE: Down Stream Request Engine
+         */
+	u64	ce_dre_dyn_credit_status1;		/* 0x040010 */
+	u64	ce_dre_dyn_credit_status2;		/* 0x040018 */
+	u64	ce_dre_last_credit_status1;		/* 0x040020 */
+	u64	ce_dre_last_credit_status2;		/* 0x040028 */
+	u64	ce_dre_credit_limit1;			/* 0x040030 */
+	u64	ce_dre_credit_limit2;			/* 0x040038 */
+	u64	ce_dre_force_credit1;			/* 0x040040 */
+	u64	ce_dre_force_credit2;			/* 0x040048 */
+	u64	ce_dre_debug_mux1;			/* 0x040050 */
+	u64	ce_dre_debug_mux2;			/* 0x040058 */
+	u64	ce_dre_ssp_err_cmd_wrd;			/* 0x040060 */
+	u64	ce_dre_ssp_err_addr;			/* 0x040068 */
+	u64	ce_dre_comp_err_cmd_wrd;		/* 0x040070 */
+	u64	ce_dre_comp_err_addr;			/* 0x040078 */
+	u64	ce_dre_req_status;			/* 0x040080 */
+	u64	ce_dre_config1;				/* 0x040088 */
+	u64	ce_dre_config2;				/* 0x040090 */
+	u64	ce_dre_config_req_status;		/* 0x040098 */
+	u64	ce_pad_0400A0[12];	    /* 0x0400A0 -- 0x0400F8 */
+	u64	ce_dre_dyn_fifo;			/* 0x040100 */
+	u64	ce_pad_040108[3];	    /* 0x040108 -- 0x040118 */
+	u64	ce_dre_last_fifo;			/* 0x040120 */
+
+	u64	ce_pad_040128[27];	    /* 0x040128 -- 0x0401F8 */
+
+	/* DRE Downstream Head Queue */
+	struct	ce_dre_maint_ds_head_queue {
+		u64	data63_0[32];	    /* 0x040200 -- 0x0402F8 */
+		u64	data127_64[32];	    /* 0x040300 -- 0x0403F8 */
+		u64	parity[32];	    /* 0x040400 -- 0x0404F8 */
+	} ce_dre_maint_ds_head_q;
+
+	u64	ce_pad_040500[352];	    /* 0x040500 -- 0x040FF8 */
+
+	/* DRE Downstream Data Queue */
+	struct	ce_dre_maint_ds_data_queue {
+		u64	data63_0[256];	    /* 0x041000 -- 0x0417F8 */
+		u64	ce_pad_041800[256]; /* 0x041800 -- 0x041FF8 */
+		u64	data127_64[256];    /* 0x042000 -- 0x0427F8 */
+		u64	ce_pad_042800[256]; /* 0x042800 -- 0x042FF8 */
+		u64	parity[256];	    /* 0x043000 -- 0x0437F8 */
+		u64	ce_pad_043800[256]; /* 0x043800 -- 0x043FF8 */
+	} ce_dre_maint_ds_data_q;
+
+	/* DRE URE Upstream Response Queue */
+	struct	ce_dre_maint_ure_us_rsp_queue {
+		u64	data63_0[8];	    /* 0x044000 -- 0x044038 */
+		u64	ce_pad_044040[24];  /* 0x044040 -- 0x0440F8 */
+		u64	data127_64[8];      /* 0x044100 -- 0x044138 */
+		u64	ce_pad_044140[24];  /* 0x044140 -- 0x0441F8 */
+		u64	parity[8];	    /* 0x044200 -- 0x044238 */
+		u64	ce_pad_044240[24];  /* 0x044240 -- 0x0442F8 */
+	} ce_dre_maint_ure_us_rsp_q;
+
+	u64 	ce_dre_maint_us_wrt_rsp[32];/* 0x044300 -- 0x0443F8 */
+
+	u64	ce_end_of_struct;			/* 0x044400 */
+} tioce_t;
+
+/* ce_lsiX_gb_cfg1 register bit masks & shifts */
+#define CE_LSI_GB_CFG1_RXL0S_THS_SHFT	0
+#define CE_LSI_GB_CFG1_RXL0S_THS_MASK	(0xffULL << 0)
+#define CE_LSI_GB_CFG1_RXL0S_SMP_SHFT	8
+#define CE_LSI_GB_CFG1_RXL0S_SMP_MASK	(0xfULL << 8);
+#define CE_LSI_GB_CFG1_RXL0S_ADJ_SHFT	12
+#define CE_LSI_GB_CFG1_RXL0S_ADJ_MASK	(0x7ULL << 12)
+#define CE_LSI_GB_CFG1_RXL0S_FLT_SHFT	15
+#define CE_LSI_GB_CFG1_RXL0S_FLT_MASK	(0x1ULL << 15)
+#define CE_LSI_GB_CFG1_LPBK_SEL_SHFT	16
+#define CE_LSI_GB_CFG1_LPBK_SEL_MASK	(0x3ULL << 16)
+#define CE_LSI_GB_CFG1_LPBK_EN_SHFT	18
+#define CE_LSI_GB_CFG1_LPBK_EN_MASK	(0x1ULL << 18)
+#define CE_LSI_GB_CFG1_RVRS_LB_SHFT	19
+#define CE_LSI_GB_CFG1_RVRS_LB_MASK	(0x1ULL << 19)
+#define CE_LSI_GB_CFG1_RVRS_CLK_SHFT	20
+#define CE_LSI_GB_CFG1_RVRS_CLK_MASK	(0x3ULL << 20)
+#define CE_LSI_GB_CFG1_SLF_TS_SHFT	24
+#define CE_LSI_GB_CFG1_SLF_TS_MASK	(0xfULL << 24)
+
+/* ce_adm_int_mask/ce_adm_int_status register bit defines */
+#define CE_ADM_INT_CE_ERROR_SHFT		0
+#define CE_ADM_INT_LSI1_IP_ERROR_SHFT		1
+#define CE_ADM_INT_LSI2_IP_ERROR_SHFT		2
+#define CE_ADM_INT_PCIE_ERROR_SHFT		3
+#define CE_ADM_INT_PORT1_HOTPLUG_EVENT_SHFT	4
+#define CE_ADM_INT_PORT2_HOTPLUG_EVENT_SHFT	5
+#define CE_ADM_INT_PCIE_PORT1_DEV_A_SHFT	6
+#define CE_ADM_INT_PCIE_PORT1_DEV_B_SHFT	7
+#define CE_ADM_INT_PCIE_PORT1_DEV_C_SHFT	8
+#define CE_ADM_INT_PCIE_PORT1_DEV_D_SHFT	9
+#define CE_ADM_INT_PCIE_PORT2_DEV_A_SHFT	10
+#define CE_ADM_INT_PCIE_PORT2_DEV_B_SHFT	11
+#define CE_ADM_INT_PCIE_PORT2_DEV_C_SHFT	12
+#define CE_ADM_INT_PCIE_PORT2_DEV_D_SHFT	13
+#define CE_ADM_INT_PCIE_MSG_SHFT		14 /*see int_dest_14*/
+#define CE_ADM_INT_PCIE_MSG_SLOT_0_SHFT		14
+#define CE_ADM_INT_PCIE_MSG_SLOT_1_SHFT		15
+#define CE_ADM_INT_PCIE_MSG_SLOT_2_SHFT		16
+#define CE_ADM_INT_PCIE_MSG_SLOT_3_SHFT		17
+#define CE_ADM_INT_PORT1_PM_PME_MSG_SHFT	22
+#define CE_ADM_INT_PORT2_PM_PME_MSG_SHFT	23
+
+/* ce_adm_force_int register bit defines */
+#define CE_ADM_FORCE_INT_PCIE_PORT1_DEV_A_SHFT	0
+#define CE_ADM_FORCE_INT_PCIE_PORT1_DEV_B_SHFT	1
+#define CE_ADM_FORCE_INT_PCIE_PORT1_DEV_C_SHFT	2
+#define CE_ADM_FORCE_INT_PCIE_PORT1_DEV_D_SHFT	3
+#define CE_ADM_FORCE_INT_PCIE_PORT2_DEV_A_SHFT	4
+#define CE_ADM_FORCE_INT_PCIE_PORT2_DEV_B_SHFT	5
+#define CE_ADM_FORCE_INT_PCIE_PORT2_DEV_C_SHFT	6
+#define CE_ADM_FORCE_INT_PCIE_PORT2_DEV_D_SHFT	7
+#define CE_ADM_FORCE_INT_ALWAYS_SHFT		8
+
+/* ce_adm_int_dest register bit masks & shifts */
+#define INTR_VECTOR_SHFT			56
+
+/* ce_adm_error_mask and ce_adm_error_summary register bit masks */
+#define CE_ADM_ERR_CRM_SSP_REQ_INVALID			(0x1ULL <<  0)
+#define CE_ADM_ERR_SSP_REQ_HEADER			(0x1ULL <<  1)
+#define CE_ADM_ERR_SSP_RSP_HEADER			(0x1ULL <<  2)
+#define CE_ADM_ERR_SSP_PROTOCOL_ERROR			(0x1ULL <<  3)
+#define CE_ADM_ERR_SSP_SBE				(0x1ULL <<  4)
+#define CE_ADM_ERR_SSP_MBE				(0x1ULL <<  5)
+#define CE_ADM_ERR_CXM_CREDIT_OFLOW			(0x1ULL <<  6)
+#define CE_ADM_ERR_DRE_SSP_REQ_INVAL			(0x1ULL <<  7)
+#define CE_ADM_ERR_SSP_REQ_LONG				(0x1ULL <<  8)
+#define CE_ADM_ERR_SSP_REQ_OFLOW			(0x1ULL <<  9)
+#define CE_ADM_ERR_SSP_REQ_SHORT			(0x1ULL << 10)
+#define CE_ADM_ERR_SSP_REQ_SIDEBAND			(0x1ULL << 11)
+#define CE_ADM_ERR_SSP_REQ_ADDR_ERR			(0x1ULL << 12)
+#define CE_ADM_ERR_SSP_REQ_BAD_BE			(0x1ULL << 13)
+#define CE_ADM_ERR_PCIE_COMPL_TIMEOUT			(0x1ULL << 14)
+#define CE_ADM_ERR_PCIE_UNEXP_COMPL			(0x1ULL << 15)
+#define CE_ADM_ERR_PCIE_ERR_COMPL			(0x1ULL << 16)
+#define CE_ADM_ERR_DRE_CREDIT_OFLOW			(0x1ULL << 17)
+#define CE_ADM_ERR_DRE_SRAM_PE				(0x1ULL << 18)
+#define CE_ADM_ERR_SSP_RSP_INVALID			(0x1ULL << 19)
+#define CE_ADM_ERR_SSP_RSP_LONG				(0x1ULL << 20)
+#define CE_ADM_ERR_SSP_RSP_SHORT			(0x1ULL << 21)
+#define CE_ADM_ERR_SSP_RSP_SIDEBAND			(0x1ULL << 22)
+#define CE_ADM_ERR_URE_SSP_RSP_UNEXP			(0x1ULL << 23)
+#define CE_ADM_ERR_URE_SSP_WR_REQ_TIMEOUT		(0x1ULL << 24)
+#define CE_ADM_ERR_URE_SSP_RD_REQ_TIMEOUT		(0x1ULL << 25)
+#define CE_ADM_ERR_URE_ATE3240_PAGE_FAULT		(0x1ULL << 26)
+#define CE_ADM_ERR_URE_ATE40_PAGE_FAULT			(0x1ULL << 27)
+#define CE_ADM_ERR_URE_CREDIT_OFLOW			(0x1ULL << 28)
+#define CE_ADM_ERR_URE_SRAM_PE				(0x1ULL << 29)
+#define CE_ADM_ERR_ADM_SSP_RSP_UNEXP			(0x1ULL << 30)
+#define CE_ADM_ERR_ADM_SSP_REQ_TIMEOUT			(0x1ULL << 31)
+#define CE_ADM_ERR_MMR_ACCESS_ERROR			(0x1ULL << 32)
+#define CE_ADM_ERR_MMR_ADDR_ERROR			(0x1ULL << 33)
+#define CE_ADM_ERR_ADM_CREDIT_OFLOW			(0x1ULL << 34)
+#define CE_ADM_ERR_ADM_SRAM_PE				(0x1ULL << 35)
+#define CE_ADM_ERR_DTL1_MIN_PDATA_CREDIT_ERR		(0x1ULL << 36)
+#define CE_ADM_ERR_DTL1_INF_COMPL_CRED_UPDT_ERR		(0x1ULL << 37)
+#define CE_ADM_ERR_DTL1_INF_POSTED_CRED_UPDT_ERR	(0x1ULL << 38)
+#define CE_ADM_ERR_DTL1_INF_NPOSTED_CRED_UPDT_ERR	(0x1ULL << 39)
+#define CE_ADM_ERR_DTL1_COMP_HD_CRED_MAX_ERR		(0x1ULL << 40)
+#define CE_ADM_ERR_DTL1_COMP_D_CRED_MAX_ERR		(0x1ULL << 41)
+#define CE_ADM_ERR_DTL1_NPOSTED_HD_CRED_MAX_ERR		(0x1ULL << 42)
+#define CE_ADM_ERR_DTL1_NPOSTED_D_CRED_MAX_ERR		(0x1ULL << 43)
+#define CE_ADM_ERR_DTL1_POSTED_HD_CRED_MAX_ERR		(0x1ULL << 44)
+#define CE_ADM_ERR_DTL1_POSTED_D_CRED_MAX_ERR		(0x1ULL << 45)
+#define CE_ADM_ERR_DTL2_MIN_PDATA_CREDIT_ERR		(0x1ULL << 46)
+#define CE_ADM_ERR_DTL2_INF_COMPL_CRED_UPDT_ERR		(0x1ULL << 47)
+#define CE_ADM_ERR_DTL2_INF_POSTED_CRED_UPDT_ERR	(0x1ULL << 48)
+#define CE_ADM_ERR_DTL2_INF_NPOSTED_CRED_UPDT_ERR	(0x1ULL << 49)
+#define CE_ADM_ERR_DTL2_COMP_HD_CRED_MAX_ERR		(0x1ULL << 50)
+#define CE_ADM_ERR_DTL2_COMP_D_CRED_MAX_ERR		(0x1ULL << 51)
+#define CE_ADM_ERR_DTL2_NPOSTED_HD_CRED_MAX_ERR		(0x1ULL << 52)
+#define CE_ADM_ERR_DTL2_NPOSTED_D_CRED_MAX_ERR		(0x1ULL << 53)
+#define CE_ADM_ERR_DTL2_POSTED_HD_CRED_MAX_ERR		(0x1ULL << 54)
+#define CE_ADM_ERR_DTL2_POSTED_D_CRED_MAX_ERR		(0x1ULL << 55)
+#define CE_ADM_ERR_PORT1_PCIE_COR_ERR			(0x1ULL << 56)
+#define CE_ADM_ERR_PORT1_PCIE_NFAT_ERR			(0x1ULL << 57)
+#define CE_ADM_ERR_PORT1_PCIE_FAT_ERR			(0x1ULL << 58)
+#define CE_ADM_ERR_PORT2_PCIE_COR_ERR			(0x1ULL << 59)
+#define CE_ADM_ERR_PORT2_PCIE_NFAT_ERR			(0x1ULL << 60)
+#define CE_ADM_ERR_PORT2_PCIE_FAT_ERR			(0x1ULL << 61)
+
+/* ce_adm_ure_ups_buf_barrier_flush register bit masks and shifts */
+#define FLUSH_SEL_PORT1_PIPE0_SHFT	0
+#define FLUSH_SEL_PORT1_PIPE1_SHFT	4
+#define FLUSH_SEL_PORT1_PIPE2_SHFT	8
+#define FLUSH_SEL_PORT1_PIPE3_SHFT	12
+#define FLUSH_SEL_PORT2_PIPE0_SHFT	16
+#define FLUSH_SEL_PORT2_PIPE1_SHFT	20
+#define FLUSH_SEL_PORT2_PIPE2_SHFT	24
+#define FLUSH_SEL_PORT2_PIPE3_SHFT	28
+
+/* ce_dre_config1 register bit masks and shifts */
+#define CE_DRE_RO_ENABLE		(0x1ULL << 0)
+#define CE_DRE_DYN_RO_ENABLE		(0x1ULL << 1)
+#define CE_DRE_SUP_CONFIG_COMP_ERROR	(0x1ULL << 2)
+#define CE_DRE_SUP_IO_COMP_ERROR	(0x1ULL << 3)
+#define CE_DRE_ADDR_MODE_SHFT		4
+
+/* ce_dre_config_req_status register bit masks */
+#define CE_DRE_LAST_CONFIG_COMPLETION	(0x7ULL << 0)
+#define CE_DRE_DOWNSTREAM_CONFIG_ERROR	(0x1ULL << 3)
+#define CE_DRE_CONFIG_COMPLETION_VALID	(0x1ULL << 4)
+#define CE_DRE_CONFIG_REQUEST_ACTIVE	(0x1ULL << 5)
+
+/* ce_ure_control register bit masks & shifts */
+#define CE_URE_RD_MRG_ENABLE		(0x1ULL << 0)
+#define CE_URE_WRT_MRG_ENABLE1		(0x1ULL << 4)
+#define CE_URE_WRT_MRG_ENABLE2		(0x1ULL << 5)
+#define CE_URE_WRT_MRG_TIMER_SHFT	12
+#define CE_URE_WRT_MRG_TIMER_MASK	(0x7FFULL << CE_URE_WRT_MRG_TIMER_SHFT)
+#define CE_URE_WRT_MRG_TIMER(x)		(((u64)(x) << \
+					  CE_URE_WRT_MRG_TIMER_SHFT) & \
+					 CE_URE_WRT_MRG_TIMER_MASK)
+#define CE_URE_RSPQ_BYPASS_DISABLE	(0x1ULL << 24)
+#define CE_URE_UPS_DAT1_PAR_DISABLE	(0x1ULL << 32)
+#define CE_URE_UPS_HDR1_PAR_DISABLE	(0x1ULL << 33)
+#define CE_URE_UPS_DAT2_PAR_DISABLE	(0x1ULL << 34)
+#define CE_URE_UPS_HDR2_PAR_DISABLE	(0x1ULL << 35)
+#define CE_URE_ATE_PAR_DISABLE		(0x1ULL << 36)
+#define CE_URE_RCI_PAR_DISABLE		(0x1ULL << 37)
+#define CE_URE_RSPQ_PAR_DISABLE		(0x1ULL << 38)
+#define CE_URE_DNS_DAT_PAR_DISABLE	(0x1ULL << 39)
+#define CE_URE_DNS_HDR_PAR_DISABLE	(0x1ULL << 40)
+#define CE_URE_MALFORM_DISABLE		(0x1ULL << 44)
+#define CE_URE_UNSUP_DISABLE		(0x1ULL << 45)
+
+/* ce_ure_page_map register bit masks & shifts */
+#define CE_URE_ATE3240_ENABLE		(0x1ULL << 0)
+#define CE_URE_ATE40_ENABLE 		(0x1ULL << 1)
+#define CE_URE_PAGESIZE_SHFT		4
+#define CE_URE_PAGESIZE_MASK		(0x7ULL << CE_URE_PAGESIZE_SHFT)
+#define CE_URE_4K_PAGESIZE		(0x0ULL << CE_URE_PAGESIZE_SHFT)
+#define CE_URE_16K_PAGESIZE		(0x1ULL << CE_URE_PAGESIZE_SHFT)
+#define CE_URE_64K_PAGESIZE		(0x2ULL << CE_URE_PAGESIZE_SHFT)
+#define CE_URE_128K_PAGESIZE		(0x3ULL << CE_URE_PAGESIZE_SHFT)
+#define CE_URE_256K_PAGESIZE		(0x4ULL << CE_URE_PAGESIZE_SHFT)
+
+/* ce_ure_pipe_sel register bit masks & shifts */
+#define PKT_TRAFIC_SHRT			16
+#define BUS_SRC_ID_SHFT			8
+#define DEV_SRC_ID_SHFT			3
+#define FNC_SRC_ID_SHFT			0
+#define CE_URE_TC_MASK			(0x07ULL << PKT_TRAFIC_SHRT)
+#define CE_URE_BUS_MASK			(0xFFULL << BUS_SRC_ID_SHFT)
+#define CE_URE_DEV_MASK			(0x1FULL << DEV_SRC_ID_SHFT)
+#define CE_URE_FNC_MASK			(0x07ULL << FNC_SRC_ID_SHFT)
+#define CE_URE_PIPE_BUS(b)		(((u64)(b) << BUS_SRC_ID_SHFT) & \
+					 CE_URE_BUS_MASK)
+#define CE_URE_PIPE_DEV(d)		(((u64)(d) << DEV_SRC_ID_SHFT) & \
+					 CE_URE_DEV_MASK)
+#define CE_URE_PIPE_FNC(f)		(((u64)(f) << FNC_SRC_ID_SHFT) & \
+					 CE_URE_FNC_MASK)
+
+#define CE_URE_SEL1_SHFT		0
+#define CE_URE_SEL2_SHFT		20
+#define CE_URE_SEL3_SHFT		40
+#define CE_URE_SEL1_MASK		(0x7FFFFULL << CE_URE_SEL1_SHFT)
+#define CE_URE_SEL2_MASK		(0x7FFFFULL << CE_URE_SEL2_SHFT)
+#define CE_URE_SEL3_MASK		(0x7FFFFULL << CE_URE_SEL3_SHFT)
+
+
+/* ce_ure_pipe_mask register bit masks & shifts */
+#define CE_URE_MASK1_SHFT		0
+#define CE_URE_MASK2_SHFT		20
+#define CE_URE_MASK3_SHFT		40
+#define CE_URE_MASK1_MASK		(0x7FFFFULL << CE_URE_MASK1_SHFT)
+#define CE_URE_MASK2_MASK		(0x7FFFFULL << CE_URE_MASK2_SHFT)
+#define CE_URE_MASK3_MASK		(0x7FFFFULL << CE_URE_MASK3_SHFT)
+
+
+/* ce_ure_pcie_control1 register bit masks & shifts */
+#define CE_URE_SI			(0x1ULL << 0)
+#define CE_URE_ELAL_SHFT		4
+#define CE_URE_ELAL_MASK		(0x7ULL << CE_URE_ELAL_SHFT)
+#define CE_URE_ELAL_SET(n)		(((u64)(n) << CE_URE_ELAL_SHFT) & \
+					 CE_URE_ELAL_MASK)
+#define CE_URE_ELAL1_SHFT		8
+#define CE_URE_ELAL1_MASK		(0x7ULL << CE_URE_ELAL1_SHFT)
+#define CE_URE_ELAL1_SET(n)		(((u64)(n) << CE_URE_ELAL1_SHFT) & \
+					 CE_URE_ELAL1_MASK)
+#define CE_URE_SCC			(0x1ULL << 12)
+#define CE_URE_PN1_SHFT			16
+#define CE_URE_PN1_MASK			(0xFFULL << CE_URE_PN1_SHFT)
+#define CE_URE_PN2_SHFT			24
+#define CE_URE_PN2_MASK			(0xFFULL << CE_URE_PN2_SHFT)
+#define CE_URE_PN1_SET(n)		(((u64)(n) << CE_URE_PN1_SHFT) & \
+					 CE_URE_PN1_MASK)
+#define CE_URE_PN2_SET(n)		(((u64)(n) << CE_URE_PN2_SHFT) & \
+					 CE_URE_PN2_MASK)
+
+/* ce_ure_pcie_control2 register bit masks & shifts */
+#define CE_URE_ABP			(0x1ULL << 0)
+#define CE_URE_PCP			(0x1ULL << 1)
+#define CE_URE_MSP			(0x1ULL << 2)
+#define CE_URE_AIP			(0x1ULL << 3)
+#define CE_URE_PIP			(0x1ULL << 4)
+#define CE_URE_HPS			(0x1ULL << 5)
+#define CE_URE_HPC			(0x1ULL << 6)
+#define CE_URE_SPLV_SHFT		7
+#define CE_URE_SPLV_MASK		(0xFFULL << CE_URE_SPLV_SHFT)
+#define CE_URE_SPLV_SET(n)		(((u64)(n) << CE_URE_SPLV_SHFT) & \
+					 CE_URE_SPLV_MASK)
+#define CE_URE_SPLS_SHFT		15
+#define CE_URE_SPLS_MASK		(0x3ULL << CE_URE_SPLS_SHFT)
+#define CE_URE_SPLS_SET(n)		(((u64)(n) << CE_URE_SPLS_SHFT) & \
+					 CE_URE_SPLS_MASK)
+#define CE_URE_PSN1_SHFT		19
+#define CE_URE_PSN1_MASK		(0x1FFFULL << CE_URE_PSN1_SHFT)
+#define CE_URE_PSN2_SHFT		32
+#define CE_URE_PSN2_MASK		(0x1FFFULL << CE_URE_PSN2_SHFT)
+#define CE_URE_PSN1_SET(n)		(((u64)(n) << CE_URE_PSN1_SHFT) & \
+					 CE_URE_PSN1_MASK)
+#define CE_URE_PSN2_SET(n)		(((u64)(n) << CE_URE_PSN2_SHFT) & \
+					 CE_URE_PSN2_MASK)
+
+/*
+ * PIO address space ranges for CE
+ */
+
+/* Local CE Registers Space */
+#define CE_PIO_MMR			0x00000000
+#define CE_PIO_MMR_LEN			0x04000000
+
+/* PCI Compatible Config Space */
+#define CE_PIO_CONFIG_SPACE		0x04000000
+#define CE_PIO_CONFIG_SPACE_LEN		0x04000000
+
+/* PCI I/O Space Alias */
+#define CE_PIO_IO_SPACE_ALIAS		0x08000000
+#define CE_PIO_IO_SPACE_ALIAS_LEN	0x08000000
+
+/* PCI Enhanced Config Space */
+#define CE_PIO_E_CONFIG_SPACE		0x10000000
+#define CE_PIO_E_CONFIG_SPACE_LEN	0x10000000
+
+/* PCI I/O Space */
+#define CE_PIO_IO_SPACE			0x100000000
+#define CE_PIO_IO_SPACE_LEN		0x100000000
+
+/* PCI MEM Space */
+#define CE_PIO_MEM_SPACE		0x200000000
+#define CE_PIO_MEM_SPACE_LEN		TIO_HWIN_SIZE
+
+
+/*
+ * CE PCI Enhanced Config Space shifts & masks
+ */
+#define CE_E_CONFIG_BUS_SHFT		20
+#define CE_E_CONFIG_BUS_MASK		(0xFF << CE_E_CONFIG_BUS_SHFT)
+#define CE_E_CONFIG_DEVICE_SHFT		15
+#define CE_E_CONFIG_DEVICE_MASK		(0x1F << CE_E_CONFIG_DEVICE_SHFT)
+#define CE_E_CONFIG_FUNC_SHFT		12
+#define CE_E_CONFIG_FUNC_MASK		(0x7  << CE_E_CONFIG_FUNC_SHFT)
+
+#endif /* __ASM_IA64_SN_TIOCE_H__ */
diff --git a/arch/ia64/include/asm/sn/tioce_provider.h b/arch/ia64/include/asm/sn/tioce_provider.h
new file mode 100644
index 00000000..32c32f30
--- /dev/null
+++ b/arch/ia64/include/asm/sn/tioce_provider.h
@@ -0,0 +1,63 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2003-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_CE_PROVIDER_H
+#define _ASM_IA64_SN_CE_PROVIDER_H
+
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/tioce.h>
+
+/*
+ * Common TIOCE structure shared between the prom and kernel
+ *
+ * DO NOT CHANGE THIS STRUCT WITHOUT MAKING CORRESPONDING CHANGES TO THE
+ * PROM VERSION.
+ */
+struct tioce_common {
+	struct pcibus_bussoft	ce_pcibus;	/* common pciio header */
+
+	u32		ce_rev;
+	u64		ce_kernel_private;
+	u64		ce_prom_private;
+};
+
+struct tioce_kernel {
+	struct tioce_common	*ce_common;
+	spinlock_t		ce_lock;
+	struct list_head	ce_dmamap_list;
+
+	u64		ce_ate40_shadow[TIOCE_NUM_M40_ATES];
+	u64		ce_ate3240_shadow[TIOCE_NUM_M3240_ATES];
+	u32		ce_ate3240_pagesize;
+
+	u8			ce_port1_secondary;
+
+	/* per-port resources */
+	struct {
+		int 		dirmap_refcnt;
+		u64	dirmap_shadow;
+	} ce_port[TIOCE_NUM_PORTS];
+};
+
+struct tioce_dmamap {
+	struct list_head	ce_dmamap_list;	/* headed by tioce_kernel */
+	u32		refcnt;
+
+	u64		nbytes;		/* # bytes mapped */
+
+	u64		ct_start;	/* coretalk start address */
+	u64		pci_start;	/* bus start address */
+
+	u64		__iomem *ate_hw;/* hw ptr of first ate in map */
+	u64		*ate_shadow;	/* shadow ptr of firat ate */
+	u16		ate_count;	/* # ate's in the map */
+};
+
+extern int tioce_init_provider(void);
+
+#endif  /* __ASM_IA64_SN_CE_PROVIDER_H */
diff --git a/arch/ia64/include/asm/sn/tiocp.h b/arch/ia64/include/asm/sn/tiocp.h
new file mode 100644
index 00000000..e8ad0bb5
--- /dev/null
+++ b/arch/ia64/include/asm/sn/tiocp.h
@@ -0,0 +1,257 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2003-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_PCI_TIOCP_H
+#define _ASM_IA64_SN_PCI_TIOCP_H
+
+#define TIOCP_HOST_INTR_ADDR            0x003FFFFFFFFFFFFFUL
+#define TIOCP_PCI64_CMDTYPE_MEM         (0x1ull << 60)
+#define TIOCP_PCI64_CMDTYPE_MSI         (0x3ull << 60)
+
+
+/*****************************************************************************
+ *********************** TIOCP MMR structure mapping ***************************
+ *****************************************************************************/
+
+struct tiocp{
+
+    /* 0x000000-0x00FFFF -- Local Registers */
+
+    /* 0x000000-0x000057 -- (Legacy Widget Space) Configuration */
+    u64		cp_id;				/* 0x000000 */
+    u64		cp_stat;			/* 0x000008 */
+    u64		cp_err_upper;			/* 0x000010 */
+    u64		cp_err_lower;			/* 0x000018 */
+    #define cp_err cp_err_lower
+    u64		cp_control;			/* 0x000020 */
+    u64		cp_req_timeout;			/* 0x000028 */
+    u64		cp_intr_upper;			/* 0x000030 */
+    u64		cp_intr_lower;			/* 0x000038 */
+    #define cp_intr cp_intr_lower
+    u64		cp_err_cmdword;			/* 0x000040 */
+    u64		_pad_000048;			/* 0x000048 */
+    u64		cp_tflush;			/* 0x000050 */
+
+    /* 0x000058-0x00007F -- Bridge-specific Configuration */
+    u64		cp_aux_err;			/* 0x000058 */
+    u64		cp_resp_upper;			/* 0x000060 */
+    u64		cp_resp_lower;			/* 0x000068 */
+    #define cp_resp cp_resp_lower
+    u64		cp_tst_pin_ctrl;		/* 0x000070 */
+    u64		cp_addr_lkerr;			/* 0x000078 */
+
+    /* 0x000080-0x00008F -- PMU & MAP */
+    u64		cp_dir_map;			/* 0x000080 */
+    u64		_pad_000088;			/* 0x000088 */
+
+    /* 0x000090-0x00009F -- SSRAM */
+    u64		cp_map_fault;			/* 0x000090 */
+    u64		_pad_000098;			/* 0x000098 */
+
+    /* 0x0000A0-0x0000AF -- Arbitration */
+    u64		cp_arb;				/* 0x0000A0 */
+    u64		_pad_0000A8;			/* 0x0000A8 */
+
+    /* 0x0000B0-0x0000BF -- Number In A Can or ATE Parity Error */
+    u64		cp_ate_parity_err;		/* 0x0000B0 */
+    u64		_pad_0000B8;			/* 0x0000B8 */
+
+    /* 0x0000C0-0x0000FF -- PCI/GIO */
+    u64		cp_bus_timeout;			/* 0x0000C0 */
+    u64		cp_pci_cfg;			/* 0x0000C8 */
+    u64		cp_pci_err_upper;		/* 0x0000D0 */
+    u64		cp_pci_err_lower;		/* 0x0000D8 */
+    #define cp_pci_err cp_pci_err_lower
+    u64		_pad_0000E0[4];			/* 0x0000{E0..F8} */
+
+    /* 0x000100-0x0001FF -- Interrupt */
+    u64		cp_int_status;			/* 0x000100 */
+    u64		cp_int_enable;			/* 0x000108 */
+    u64		cp_int_rst_stat;		/* 0x000110 */
+    u64		cp_int_mode;			/* 0x000118 */
+    u64		cp_int_device;			/* 0x000120 */
+    u64		cp_int_host_err;		/* 0x000128 */
+    u64		cp_int_addr[8];			/* 0x0001{30,,,68} */
+    u64		cp_err_int_view;		/* 0x000170 */
+    u64		cp_mult_int;			/* 0x000178 */
+    u64		cp_force_always[8];		/* 0x0001{80,,,B8} */
+    u64		cp_force_pin[8];		/* 0x0001{C0,,,F8} */
+
+    /* 0x000200-0x000298 -- Device */
+    u64		cp_device[4];			/* 0x0002{00,,,18} */
+    u64		_pad_000220[4];			/* 0x0002{20,,,38} */
+    u64		cp_wr_req_buf[4];		/* 0x0002{40,,,58} */
+    u64		_pad_000260[4];			/* 0x0002{60,,,78} */
+    u64		cp_rrb_map[2];			/* 0x0002{80,,,88} */
+    #define cp_even_resp cp_rrb_map[0]			/* 0x000280 */
+    #define cp_odd_resp  cp_rrb_map[1]			/* 0x000288 */
+    u64		cp_resp_status;			/* 0x000290 */
+    u64		cp_resp_clear;			/* 0x000298 */
+
+    u64		_pad_0002A0[12];		/* 0x0002{A0..F8} */
+
+    /* 0x000300-0x0003F8 -- Buffer Address Match Registers */
+    struct {
+	u64	upper;				/* 0x0003{00,,,F0} */
+	u64	lower;				/* 0x0003{08,,,F8} */
+    } cp_buf_addr_match[16];
+
+    /* 0x000400-0x0005FF -- Performance Monitor Registers (even only) */
+    struct {
+	u64	flush_w_touch;			/* 0x000{400,,,5C0} */
+	u64	flush_wo_touch;			/* 0x000{408,,,5C8} */
+	u64	inflight;			/* 0x000{410,,,5D0} */
+	u64	prefetch;			/* 0x000{418,,,5D8} */
+	u64	total_pci_retry;		/* 0x000{420,,,5E0} */
+	u64	max_pci_retry;			/* 0x000{428,,,5E8} */
+	u64	max_latency;			/* 0x000{430,,,5F0} */
+	u64	clear_all;			/* 0x000{438,,,5F8} */
+    } cp_buf_count[8];
+
+
+    /* 0x000600-0x0009FF -- PCI/X registers */
+    u64		cp_pcix_bus_err_addr;		/* 0x000600 */
+    u64		cp_pcix_bus_err_attr;		/* 0x000608 */
+    u64		cp_pcix_bus_err_data;		/* 0x000610 */
+    u64		cp_pcix_pio_split_addr;		/* 0x000618 */
+    u64		cp_pcix_pio_split_attr;		/* 0x000620 */
+    u64		cp_pcix_dma_req_err_attr;	/* 0x000628 */
+    u64		cp_pcix_dma_req_err_addr;	/* 0x000630 */
+    u64		cp_pcix_timeout;		/* 0x000638 */
+
+    u64		_pad_000640[24];		/* 0x000{640,,,6F8} */
+
+    /* 0x000700-0x000737 -- Debug Registers */
+    u64		cp_ct_debug_ctl;		/* 0x000700 */
+    u64		cp_br_debug_ctl;		/* 0x000708 */
+    u64		cp_mux3_debug_ctl;		/* 0x000710 */
+    u64		cp_mux4_debug_ctl;		/* 0x000718 */
+    u64		cp_mux5_debug_ctl;		/* 0x000720 */
+    u64		cp_mux6_debug_ctl;		/* 0x000728 */
+    u64		cp_mux7_debug_ctl;		/* 0x000730 */
+
+    u64		_pad_000738[89];		/* 0x000{738,,,9F8} */
+
+    /* 0x000A00-0x000BFF -- PCI/X Read&Write Buffer */
+    struct {
+	u64	cp_buf_addr;			/* 0x000{A00,,,AF0} */
+	u64	cp_buf_attr;			/* 0X000{A08,,,AF8} */
+    } cp_pcix_read_buf_64[16];
+
+    struct {
+	u64	cp_buf_addr;			/* 0x000{B00,,,BE0} */
+	u64	cp_buf_attr;			/* 0x000{B08,,,BE8} */
+	u64	cp_buf_valid;			/* 0x000{B10,,,BF0} */
+	u64	__pad1;				/* 0x000{B18,,,BF8} */
+    } cp_pcix_write_buf_64[8];
+
+    /* End of Local Registers -- Start of Address Map space */
+
+    char	_pad_000c00[0x010000 - 0x000c00];
+
+    /* 0x010000-0x011FF8 -- Internal ATE RAM (Auto Parity Generation) */
+    u64		cp_int_ate_ram[1024];		/* 0x010000-0x011FF8 */
+
+    char	_pad_012000[0x14000 - 0x012000];
+
+    /* 0x014000-0x015FF8 -- Internal ATE RAM (Manual Parity Generation) */
+    u64		cp_int_ate_ram_mp[1024];	/* 0x014000-0x015FF8 */
+
+    char	_pad_016000[0x18000 - 0x016000];
+
+    /* 0x18000-0x197F8 -- TIOCP Write Request Ram */
+    u64		cp_wr_req_lower[256];		/* 0x18000 - 0x187F8 */
+    u64		cp_wr_req_upper[256];		/* 0x18800 - 0x18FF8 */
+    u64		cp_wr_req_parity[256];		/* 0x19000 - 0x197F8 */
+
+    char	_pad_019800[0x1C000 - 0x019800];
+
+    /* 0x1C000-0x1EFF8 -- TIOCP Read Response Ram */
+    u64		cp_rd_resp_lower[512];		/* 0x1C000 - 0x1CFF8 */
+    u64		cp_rd_resp_upper[512];		/* 0x1D000 - 0x1DFF8 */
+    u64		cp_rd_resp_parity[512];		/* 0x1E000 - 0x1EFF8 */
+
+    char	_pad_01F000[0x20000 - 0x01F000];
+
+    /* 0x020000-0x021FFF -- Host Device (CP) Configuration Space (not used)  */
+    char	_pad_020000[0x021000 - 0x20000];
+
+    /* 0x021000-0x027FFF -- PCI Device Configuration Spaces */
+    union {
+	u8	c[0x1000 / 1];			/* 0x02{0000,,,7FFF} */
+	u16	s[0x1000 / 2];			/* 0x02{0000,,,7FFF} */
+	u32	l[0x1000 / 4];			/* 0x02{0000,,,7FFF} */
+	u64	d[0x1000 / 8];			/* 0x02{0000,,,7FFF} */
+	union {
+	    u8	c[0x100 / 1];
+	    u16	s[0x100 / 2];
+	    u32	l[0x100 / 4];
+	    u64	d[0x100 / 8];
+	} f[8];
+    } cp_type0_cfg_dev[7];				/* 0x02{1000,,,7FFF} */
+
+    /* 0x028000-0x028FFF -- PCI Type 1 Configuration Space */
+    union {
+	u8	c[0x1000 / 1];			/* 0x028000-0x029000 */
+	u16	s[0x1000 / 2];			/* 0x028000-0x029000 */
+	u32	l[0x1000 / 4];			/* 0x028000-0x029000 */
+	u64	d[0x1000 / 8];			/* 0x028000-0x029000 */
+	union {
+	    u8	c[0x100 / 1];
+	    u16	s[0x100 / 2];
+	    u32	l[0x100 / 4];
+	    u64	d[0x100 / 8];
+	} f[8];
+    } cp_type1_cfg;					/* 0x028000-0x029000 */
+
+    char		_pad_029000[0x030000-0x029000];
+
+    /* 0x030000-0x030007 -- PCI Interrupt Acknowledge Cycle */
+    union {
+	u8	c[8 / 1];
+	u16	s[8 / 2];
+	u32	l[8 / 4];
+	u64	d[8 / 8];
+    } cp_pci_iack;					/* 0x030000-0x030007 */
+
+    char		_pad_030007[0x040000-0x030008];
+
+    /* 0x040000-0x040007 -- PCIX Special Cycle */
+    union {
+	u8	c[8 / 1];
+	u16	s[8 / 2];
+	u32	l[8 / 4];
+	u64	d[8 / 8];
+    } cp_pcix_cycle;					/* 0x040000-0x040007 */
+
+    char		_pad_040007[0x200000-0x040008];
+
+    /* 0x200000-0x7FFFFF -- PCI/GIO Device Spaces */
+    union {
+	u8	c[0x100000 / 1];
+	u16	s[0x100000 / 2];
+	u32	l[0x100000 / 4];
+	u64	d[0x100000 / 8];
+    } cp_devio_raw[6];					/* 0x200000-0x7FFFFF */
+
+    #define cp_devio(n)  cp_devio_raw[((n)<2)?(n*2):(n+2)]
+
+    char		_pad_800000[0xA00000-0x800000];
+
+    /* 0xA00000-0xBFFFFF -- PCI/GIO Device Spaces w/flush  */
+    union {
+	u8	c[0x100000 / 1];
+	u16	s[0x100000 / 2];
+	u32	l[0x100000 / 4];
+	u64	d[0x100000 / 8];
+    } cp_devio_raw_flush[6];				/* 0xA00000-0xBFFFFF */
+
+    #define cp_devio_flush(n)  cp_devio_raw_flush[((n)<2)?(n*2):(n+2)]
+
+};
+
+#endif 	/* _ASM_IA64_SN_PCI_TIOCP_H */
diff --git a/arch/ia64/include/asm/sn/tiocx.h b/arch/ia64/include/asm/sn/tiocx.h
new file mode 100644
index 00000000..d2972849
--- /dev/null
+++ b/arch/ia64/include/asm/sn/tiocx.h
@@ -0,0 +1,72 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_TIO_TIOCX_H
+#define _ASM_IA64_SN_TIO_TIOCX_H
+
+#ifdef __KERNEL__
+
+struct cx_id_s {
+	unsigned int part_num;
+	unsigned int mfg_num;
+	int nasid;
+};
+
+struct cx_dev {
+	struct cx_id_s cx_id;
+	int bt;				/* board/blade type */
+	void *soft;			/* driver specific */
+	struct hubdev_info *hubdev;
+	struct device dev;
+	struct cx_drv *driver;
+};
+
+struct cx_device_id {
+	unsigned int part_num;
+	unsigned int mfg_num;
+};
+
+struct cx_drv {
+	char *name;
+	const struct cx_device_id *id_table;
+	struct device_driver driver;
+	int (*probe) (struct cx_dev * dev, const struct cx_device_id * id);
+	int (*remove) (struct cx_dev * dev);
+};
+
+/* create DMA address by stripping AS bits */
+#define TIOCX_DMA_ADDR(a) (u64)((u64)(a) & 0xffffcfffffffffUL)
+
+#define TIOCX_TO_TIOCX_DMA_ADDR(a) (u64)(((u64)(a) & 0xfffffffff) |  \
+                                  ((((u64)(a)) & 0xffffc000000000UL) <<2))
+
+#define TIO_CE_ASIC_PARTNUM 0xce00
+#define TIOCX_CORELET 3
+
+/* These are taken from tio_mmr_as.h */
+#define TIO_ICE_FRZ_CFG               TIO_MMR_ADDR_MOD(0x00000000b0008100UL)
+#define TIO_ICE_PMI_TX_CFG            TIO_MMR_ADDR_MOD(0x00000000b000b100UL)
+#define TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3 TIO_MMR_ADDR_MOD(0x00000000b000be18UL)
+#define TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3_CREDIT_CNT_MASK 0x000000000000000fUL
+
+#define to_cx_dev(n) container_of(n, struct cx_dev, dev)
+#define to_cx_driver(drv) container_of(drv, struct cx_drv, driver)
+
+extern struct sn_irq_info *tiocx_irq_alloc(nasid_t, int, int, nasid_t, int);
+extern void tiocx_irq_free(struct sn_irq_info *);
+extern int cx_device_unregister(struct cx_dev *);
+extern int cx_device_register(nasid_t, int, int, struct hubdev_info *, int);
+extern int cx_driver_unregister(struct cx_drv *);
+extern int cx_driver_register(struct cx_drv *);
+extern u64 tiocx_dma_addr(u64 addr);
+extern u64 tiocx_swin_base(int nasid);
+extern void tiocx_mmr_store(int nasid, u64 offset, u64 value);
+extern u64 tiocx_mmr_load(int nasid, u64 offset);
+
+#endif				//  __KERNEL__
+#endif				// _ASM_IA64_SN_TIO_TIOCX__
diff --git a/arch/ia64/include/asm/sn/types.h b/arch/ia64/include/asm/sn/types.h
new file mode 100644
index 00000000..8e04ee21
--- /dev/null
+++ b/arch/ia64/include/asm/sn/types.h
@@ -0,0 +1,26 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999,2001-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (C) 1999 by Ralf Baechle
+ */
+#ifndef _ASM_IA64_SN_TYPES_H
+#define _ASM_IA64_SN_TYPES_H
+
+#include <linux/types.h>
+
+typedef unsigned long 	cpuid_t;
+typedef signed short	nasid_t;	/* node id in numa-as-id space */
+typedef signed char	partid_t;	/* partition ID type */
+typedef unsigned int    moduleid_t;     /* user-visible module number type */
+typedef unsigned int    cmoduleid_t;    /* kernel compact module id type */
+typedef unsigned char	slotid_t;	/* slot (blade) within module */
+typedef unsigned char	slabid_t;	/* slab (asic) within slot */
+typedef u64 nic_t;
+typedef unsigned long iopaddr_t;
+typedef unsigned long paddr_t;
+typedef short cnodeid_t;
+
+#endif /* _ASM_IA64_SN_TYPES_H */
diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h
new file mode 100644
index 00000000..51427eaa
--- /dev/null
+++ b/arch/ia64/include/asm/socket.h
@@ -0,0 +1,74 @@
+#ifndef _ASM_IA64_SOCKET_H
+#define _ASM_IA64_SOCKET_H
+
+/*
+ * Socket related defines.
+ *
+ * Based on <asm-i386/socket.h>.
+ *
+ * Modified 1998-2000
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+
+#include <asm/sockios.h>
+
+/* For setsockopt(2) */
+#define SOL_SOCKET	1
+
+#define SO_DEBUG	1
+#define SO_REUSEADDR	2
+#define SO_TYPE		3
+#define SO_ERROR	4
+#define SO_DONTROUTE	5
+#define SO_BROADCAST	6
+#define SO_SNDBUF	7
+#define SO_RCVBUF	8
+#define SO_SNDBUFFORCE	32
+#define SO_RCVBUFFORCE	33
+#define SO_KEEPALIVE	9
+#define SO_OOBINLINE	10
+#define SO_NO_CHECK	11
+#define SO_PRIORITY	12
+#define SO_LINGER	13
+#define SO_BSDCOMPAT	14
+/* To add :#define SO_REUSEPORT 15 */
+#define SO_PASSCRED	16
+#define SO_PEERCRED	17
+#define SO_RCVLOWAT	18
+#define SO_SNDLOWAT	19
+#define SO_RCVTIMEO	20
+#define SO_SNDTIMEO	21
+
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define SO_SECURITY_AUTHENTICATION		22
+#define SO_SECURITY_ENCRYPTION_TRANSPORT	23
+#define SO_SECURITY_ENCRYPTION_NETWORK		24
+
+#define SO_BINDTODEVICE		25
+
+/* Socket filtering */
+#define SO_ATTACH_FILTER	26
+#define SO_DETACH_FILTER	27
+
+#define SO_PEERNAME		28
+#define SO_TIMESTAMP		29
+#define SCM_TIMESTAMP		SO_TIMESTAMP
+
+#define SO_ACCEPTCONN		30
+
+#define SO_PEERSEC             31
+#define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
+
+#define SO_MARK			36
+
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
+#define SO_PROTOCOL		38
+#define SO_DOMAIN		39
+
+#define SO_RXQ_OVFL             40
+
+#endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/ia64/include/asm/sockios.h b/arch/ia64/include/asm/sockios.h
new file mode 100644
index 00000000..15c92468
--- /dev/null
+++ b/arch/ia64/include/asm/sockios.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_IA64_SOCKIOS_H
+#define _ASM_IA64_SOCKIOS_H
+
+/*
+ * Socket-level I/O control calls.
+ *
+ * Based on <asm-i386/sockios.h>.
+ *
+ * Modified 1998, 1999
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+#define FIOSETOWN 	0x8901
+#define SIOCSPGRP	0x8902
+#define FIOGETOWN	0x8903
+#define SIOCGPGRP	0x8904
+#define SIOCATMARK	0x8905
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
+
+#endif /* _ASM_IA64_SOCKIOS_H */
diff --git a/arch/ia64/include/asm/sparsemem.h b/arch/ia64/include/asm/sparsemem.h
new file mode 100644
index 00000000..67a7c40e
--- /dev/null
+++ b/arch/ia64/include/asm/sparsemem.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_IA64_SPARSEMEM_H
+#define _ASM_IA64_SPARSEMEM_H
+
+#ifdef CONFIG_SPARSEMEM
+/*
+ * SECTION_SIZE_BITS            2^N: how big each section will be
+ * MAX_PHYSMEM_BITS             2^N: how much memory we can have in that space
+ */
+
+#define SECTION_SIZE_BITS	(30)
+#define MAX_PHYSMEM_BITS	(50)
+#ifdef CONFIG_FORCE_MAX_ZONEORDER
+#if ((CONFIG_FORCE_MAX_ZONEORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS)
+#undef SECTION_SIZE_BITS
+#define SECTION_SIZE_BITS (CONFIG_FORCE_MAX_ZONEORDER - 1 + PAGE_SHIFT)
+#endif
+#endif
+
+#endif /* CONFIG_SPARSEMEM */
+#endif /* _ASM_IA64_SPARSEMEM_H */
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
new file mode 100644
index 00000000..1a91c912
--- /dev/null
+++ b/arch/ia64/include/asm/spinlock.h
@@ -0,0 +1,292 @@
+#ifndef _ASM_IA64_SPINLOCK_H
+#define _ASM_IA64_SPINLOCK_H
+
+/*
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ *
+ * This file is used for SMP configurations only.
+ */
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+
+#include <asm/atomic.h>
+#include <asm/intrinsics.h>
+#include <asm/system.h>
+
+#define arch_spin_lock_init(x)			((x)->lock = 0)
+
+/*
+ * Ticket locks are conceptually two parts, one indicating the current head of
+ * the queue, and the other indicating the current tail. The lock is acquired
+ * by atomically noting the tail and incrementing it by one (thus adding
+ * ourself to the queue and noting our position), then waiting until the head
+ * becomes equal to the the initial value of the tail.
+ * The pad bits in the middle are used to prevent the next_ticket number
+ * overflowing into the now_serving number.
+ *
+ *   31             17  16    15  14                    0
+ *  +----------------------------------------------------+
+ *  |  now_serving     | padding |   next_ticket         |
+ *  +----------------------------------------------------+
+ */
+
+#define TICKET_SHIFT	17
+#define TICKET_BITS	15
+#define	TICKET_MASK	((1 << TICKET_BITS) - 1)
+
+static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
+{
+	int	*p = (int *)&lock->lock, ticket, serve;
+
+	ticket = ia64_fetchadd(1, p, acq);
+
+	if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK))
+		return;
+
+	ia64_invala();
+
+	for (;;) {
+		asm volatile ("ld4.c.nc %0=[%1]" : "=r"(serve) : "r"(p) : "memory");
+
+		if (!(((serve >> TICKET_SHIFT) ^ ticket) & TICKET_MASK))
+			return;
+		cpu_relax();
+	}
+}
+
+static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
+{
+	int tmp = ACCESS_ONCE(lock->lock);
+
+	if (!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK))
+		return ia64_cmpxchg(acq, &lock->lock, tmp, tmp + 1, sizeof (tmp)) == tmp;
+	return 0;
+}
+
+static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
+{
+	unsigned short	*p = (unsigned short *)&lock->lock + 1, tmp;
+
+	asm volatile ("ld2.bias %0=[%1]" : "=r"(tmp) : "r"(p));
+	ACCESS_ONCE(*p) = (tmp + 2) & ~1;
+}
+
+static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	int	*p = (int *)&lock->lock, ticket;
+
+	ia64_invala();
+
+	for (;;) {
+		asm volatile ("ld4.c.nc %0=[%1]" : "=r"(ticket) : "r"(p) : "memory");
+		if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK))
+			return;
+		cpu_relax();
+	}
+}
+
+static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
+{
+	long tmp = ACCESS_ONCE(lock->lock);
+
+	return !!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK);
+}
+
+static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
+{
+	long tmp = ACCESS_ONCE(lock->lock);
+
+	return ((tmp - (tmp >> TICKET_SHIFT)) & TICKET_MASK) > 1;
+}
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+	return __ticket_spin_is_locked(lock);
+}
+
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
+{
+	return __ticket_spin_is_contended(lock);
+}
+#define arch_spin_is_contended	arch_spin_is_contended
+
+static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	__ticket_spin_lock(lock);
+}
+
+static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	return __ticket_spin_trylock(lock);
+}
+
+static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	__ticket_spin_unlock(lock);
+}
+
+static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
+						  unsigned long flags)
+{
+	arch_spin_lock(lock);
+}
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	__ticket_spin_unlock_wait(lock);
+}
+
+#define arch_read_can_lock(rw)		(*(volatile int *)(rw) >= 0)
+#define arch_write_can_lock(rw)	(*(volatile int *)(rw) == 0)
+
+#ifdef ASM_SUPPORTED
+
+static __always_inline void
+arch_read_lock_flags(arch_rwlock_t *lock, unsigned long flags)
+{
+	__asm__ __volatile__ (
+		"tbit.nz p6, p0 = %1,%2\n"
+		"br.few 3f\n"
+		"1:\n"
+		"fetchadd4.rel r2 = [%0], -1;;\n"
+		"(p6) ssm psr.i\n"
+		"2:\n"
+		"hint @pause\n"
+		"ld4 r2 = [%0];;\n"
+		"cmp4.lt p7,p0 = r2, r0\n"
+		"(p7) br.cond.spnt.few 2b\n"
+		"(p6) rsm psr.i\n"
+		";;\n"
+		"3:\n"
+		"fetchadd4.acq r2 = [%0], 1;;\n"
+		"cmp4.lt p7,p0 = r2, r0\n"
+		"(p7) br.cond.spnt.few 1b\n"
+		: : "r"(lock), "r"(flags), "i"(IA64_PSR_I_BIT)
+		: "p6", "p7", "r2", "memory");
+}
+
+#define arch_read_lock(lock) arch_read_lock_flags(lock, 0)
+
+#else /* !ASM_SUPPORTED */
+
+#define arch_read_lock_flags(rw, flags) arch_read_lock(rw)
+
+#define arch_read_lock(rw)								\
+do {											\
+	arch_rwlock_t *__read_lock_ptr = (rw);						\
+											\
+	while (unlikely(ia64_fetchadd(1, (int *) __read_lock_ptr, acq) < 0)) {		\
+		ia64_fetchadd(-1, (int *) __read_lock_ptr, rel);			\
+		while (*(volatile int *)__read_lock_ptr < 0)				\
+			cpu_relax();							\
+	}										\
+} while (0)
+
+#endif /* !ASM_SUPPORTED */
+
+#define arch_read_unlock(rw)					\
+do {								\
+	arch_rwlock_t *__read_lock_ptr = (rw);			\
+	ia64_fetchadd(-1, (int *) __read_lock_ptr, rel);	\
+} while (0)
+
+#ifdef ASM_SUPPORTED
+
+static __always_inline void
+arch_write_lock_flags(arch_rwlock_t *lock, unsigned long flags)
+{
+	__asm__ __volatile__ (
+		"tbit.nz p6, p0 = %1, %2\n"
+		"mov ar.ccv = r0\n"
+		"dep r29 = -1, r0, 31, 1\n"
+		"br.few 3f;;\n"
+		"1:\n"
+		"(p6) ssm psr.i\n"
+		"2:\n"
+		"hint @pause\n"
+		"ld4 r2 = [%0];;\n"
+		"cmp4.eq p0,p7 = r0, r2\n"
+		"(p7) br.cond.spnt.few 2b\n"
+		"(p6) rsm psr.i\n"
+		";;\n"
+		"3:\n"
+		"cmpxchg4.acq r2 = [%0], r29, ar.ccv;;\n"
+		"cmp4.eq p0,p7 = r0, r2\n"
+		"(p7) br.cond.spnt.few 1b;;\n"
+		: : "r"(lock), "r"(flags), "i"(IA64_PSR_I_BIT)
+		: "ar.ccv", "p6", "p7", "r2", "r29", "memory");
+}
+
+#define arch_write_lock(rw) arch_write_lock_flags(rw, 0)
+
+#define arch_write_trylock(rw)							\
+({										\
+	register long result;							\
+										\
+	__asm__ __volatile__ (							\
+		"mov ar.ccv = r0\n"						\
+		"dep r29 = -1, r0, 31, 1;;\n"					\
+		"cmpxchg4.acq %0 = [%1], r29, ar.ccv\n"				\
+		: "=r"(result) : "r"(rw) : "ar.ccv", "r29", "memory");		\
+	(result == 0);								\
+})
+
+static inline void arch_write_unlock(arch_rwlock_t *x)
+{
+	u8 *y = (u8 *)x;
+	barrier();
+	asm volatile ("st1.rel.nta [%0] = r0\n\t" :: "r"(y+3) : "memory" );
+}
+
+#else /* !ASM_SUPPORTED */
+
+#define arch_write_lock_flags(l, flags) arch_write_lock(l)
+
+#define arch_write_lock(l)								\
+({											\
+	__u64 ia64_val, ia64_set_val = ia64_dep_mi(-1, 0, 31, 1);			\
+	__u32 *ia64_write_lock_ptr = (__u32 *) (l);					\
+	do {										\
+		while (*ia64_write_lock_ptr)						\
+			ia64_barrier();							\
+		ia64_val = ia64_cmpxchg4_acq(ia64_write_lock_ptr, ia64_set_val, 0);	\
+	} while (ia64_val);								\
+})
+
+#define arch_write_trylock(rw)						\
+({									\
+	__u64 ia64_val;							\
+	__u64 ia64_set_val = ia64_dep_mi(-1, 0, 31,1);			\
+	ia64_val = ia64_cmpxchg4_acq((__u32 *)(rw), ia64_set_val, 0);	\
+	(ia64_val == 0);						\
+})
+
+static inline void arch_write_unlock(arch_rwlock_t *x)
+{
+	barrier();
+	x->write_lock = 0;
+}
+
+#endif /* !ASM_SUPPORTED */
+
+static inline int arch_read_trylock(arch_rwlock_t *x)
+{
+	union {
+		arch_rwlock_t lock;
+		__u32 word;
+	} old, new;
+	old.lock = new.lock = *x;
+	old.lock.write_lock = new.lock.write_lock = 0;
+	++new.lock.read_counter;
+	return (u32)ia64_cmpxchg4_acq((__u32 *)(x), new.word, old.word) == old.word;
+}
+
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
+
+#endif /*  _ASM_IA64_SPINLOCK_H */
diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h
new file mode 100644
index 00000000..e2b42a52
--- /dev/null
+++ b/arch/ia64/include/asm/spinlock_types.h
@@ -0,0 +1,21 @@
+#ifndef _ASM_IA64_SPINLOCK_TYPES_H
+#define _ASM_IA64_SPINLOCK_TYPES_H
+
+#ifndef __LINUX_SPINLOCK_TYPES_H
+# error "please don't include this file directly"
+#endif
+
+typedef struct {
+	volatile unsigned int lock;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+
+typedef struct {
+	volatile unsigned int read_counter	: 31;
+	volatile unsigned int write_lock	:  1;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0, 0 }
+
+#endif
diff --git a/arch/ia64/include/asm/stat.h b/arch/ia64/include/asm/stat.h
new file mode 100644
index 00000000..367bb90c
--- /dev/null
+++ b/arch/ia64/include/asm/stat.h
@@ -0,0 +1,51 @@
+#ifndef _ASM_IA64_STAT_H
+#define _ASM_IA64_STAT_H
+
+/*
+ * Modified 1998, 1999
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+
+struct stat {
+	unsigned long	st_dev;
+	unsigned long	st_ino;
+	unsigned long	st_nlink;
+	unsigned int	st_mode;
+	unsigned int	st_uid;
+	unsigned int	st_gid;
+	unsigned int	__pad0;
+	unsigned long	st_rdev;
+	unsigned long	st_size;
+	unsigned long	st_atime;
+	unsigned long	st_atime_nsec;
+	unsigned long	st_mtime;
+	unsigned long	st_mtime_nsec;
+	unsigned long	st_ctime;
+	unsigned long	st_ctime_nsec;
+	unsigned long	st_blksize;
+	long		st_blocks;
+	unsigned long	__unused[3];
+};
+
+#define STAT_HAVE_NSEC 1
+
+struct ia64_oldstat {
+	unsigned int	st_dev;
+	unsigned int	st_ino;
+	unsigned int	st_mode;
+	unsigned int	st_nlink;
+	unsigned int	st_uid;
+	unsigned int	st_gid;
+	unsigned int	st_rdev;
+	unsigned int	__pad1;
+	unsigned long	st_size;
+	unsigned long	st_atime;
+	unsigned long	st_mtime;
+	unsigned long	st_ctime;
+	unsigned int	st_blksize;
+	int		st_blocks;
+	unsigned int	__unused1;
+	unsigned int	__unused2;
+};
+
+#endif /* _ASM_IA64_STAT_H */
diff --git a/arch/ia64/include/asm/statfs.h b/arch/ia64/include/asm/statfs.h
new file mode 100644
index 00000000..1e589669
--- /dev/null
+++ b/arch/ia64/include/asm/statfs.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_IA64_STATFS_H
+#define _ASM_IA64_STATFS_H
+
+/*
+ * Based on <asm-i386/statfs.h>.
+ *
+ * Modified 1998, 1999, 2003
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+
+/*
+ * We need compat_statfs64 to be packed, because the i386 ABI won't
+ * add padding at the end to bring it to a multiple of 8 bytes, but
+ * the IA64 ABI will.
+ */
+#define ARCH_PACK_COMPAT_STATFS64 __attribute__((packed,aligned(4)))
+
+#include <asm-generic/statfs.h>
+
+#endif /* _ASM_IA64_STATFS_H */
diff --git a/arch/ia64/include/asm/string.h b/arch/ia64/include/asm/string.h
new file mode 100644
index 00000000..85fd65c5
--- /dev/null
+++ b/arch/ia64/include/asm/string.h
@@ -0,0 +1,21 @@
+#ifndef _ASM_IA64_STRING_H
+#define _ASM_IA64_STRING_H
+
+/*
+ * Here is where we want to put optimized versions of the string
+ * routines.
+ *
+ * Copyright (C) 1998-2000, 2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+
+#define __HAVE_ARCH_STRLEN	1 /* see arch/ia64/lib/strlen.S */
+#define __HAVE_ARCH_MEMSET	1 /* see arch/ia64/lib/memset.S */
+#define __HAVE_ARCH_MEMCPY	1 /* see arch/ia64/lib/memcpy.S */
+
+extern __kernel_size_t strlen (const char *);
+extern void *memcpy (void *, const void *, __kernel_size_t);
+extern void *memset (void *, int, __kernel_size_t);
+
+#endif /* _ASM_IA64_STRING_H */
diff --git a/arch/ia64/include/asm/swab.h b/arch/ia64/include/asm/swab.h
new file mode 100644
index 00000000..c89a8cb5
--- /dev/null
+++ b/arch/ia64/include/asm/swab.h
@@ -0,0 +1,34 @@
+#ifndef _ASM_IA64_SWAB_H
+#define _ASM_IA64_SWAB_H
+
+/*
+ * Modified 1998, 1999
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co.
+ */
+
+#include <linux/types.h>
+#include <asm/intrinsics.h>
+#include <linux/compiler.h>
+
+static __inline__ __attribute_const__ __u64 __arch_swab64(__u64 x)
+{
+	__u64 result;
+
+	result = ia64_mux1(x, ia64_mux1_rev);
+	return result;
+}
+#define __arch_swab64 __arch_swab64
+
+static __inline__ __attribute_const__ __u32 __arch_swab32(__u32 x)
+{
+	return __arch_swab64(x) >> 32;
+}
+#define __arch_swab32 __arch_swab32
+
+static __inline__ __attribute_const__ __u16 __arch_swab16(__u16 x)
+{
+	return __arch_swab64(x) >> 48;
+}
+#define __arch_swab16 __arch_swab16
+
+#endif /* _ASM_IA64_SWAB_H */
diff --git a/arch/ia64/include/asm/swiotlb.h b/arch/ia64/include/asm/swiotlb.h
new file mode 100644
index 00000000..f0acde68
--- /dev/null
+++ b/arch/ia64/include/asm/swiotlb.h
@@ -0,0 +1,17 @@
+#ifndef ASM_IA64__SWIOTLB_H
+#define ASM_IA64__SWIOTLB_H
+
+#include <linux/dma-mapping.h>
+#include <linux/swiotlb.h>
+
+#ifdef CONFIG_SWIOTLB
+extern int swiotlb;
+extern void pci_swiotlb_init(void);
+#else
+#define swiotlb 0
+static inline void pci_swiotlb_init(void)
+{
+}
+#endif
+
+#endif /* ASM_IA64__SWIOTLB_H */
diff --git a/arch/ia64/include/asm/sync_bitops.h b/arch/ia64/include/asm/sync_bitops.h
new file mode 100644
index 00000000..593c12ee
--- /dev/null
+++ b/arch/ia64/include/asm/sync_bitops.h
@@ -0,0 +1,51 @@
+#ifndef _ASM_IA64_SYNC_BITOPS_H
+#define _ASM_IA64_SYNC_BITOPS_H
+
+/*
+ * Copyright (C) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *
+ * Based on synch_bitops.h which Dan Magenhaimer wrote.
+ *
+ * bit operations which provide guaranteed strong synchronisation
+ * when communicating with Xen or other guest OSes running on other CPUs.
+ */
+
+static inline void sync_set_bit(int nr, volatile void *addr)
+{
+	set_bit(nr, addr);
+}
+
+static inline void sync_clear_bit(int nr, volatile void *addr)
+{
+	clear_bit(nr, addr);
+}
+
+static inline void sync_change_bit(int nr, volatile void *addr)
+{
+	change_bit(nr, addr);
+}
+
+static inline int sync_test_and_set_bit(int nr, volatile void *addr)
+{
+	return test_and_set_bit(nr, addr);
+}
+
+static inline int sync_test_and_clear_bit(int nr, volatile void *addr)
+{
+	return test_and_clear_bit(nr, addr);
+}
+
+static inline int sync_test_and_change_bit(int nr, volatile void *addr)
+{
+	return test_and_change_bit(nr, addr);
+}
+
+static inline int sync_test_bit(int nr, const volatile void *addr)
+{
+	return test_bit(nr, addr);
+}
+
+#define sync_cmpxchg(ptr, old, new)				\
+	((__typeof__(*(ptr)))cmpxchg_acq((ptr), (old), (new)))
+
+#endif /* _ASM_IA64_SYNC_BITOPS_H */
diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h
new file mode 100644
index 00000000..a7ff1c6a
--- /dev/null
+++ b/arch/ia64/include/asm/syscall.h
@@ -0,0 +1,82 @@
+/*
+ * Access to user system call parameters and results
+ *
+ * Copyright (C) 2008 Intel Corp.  Shaohua Li <shaohua.li@intel.com>
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * See asm-generic/syscall.h for descriptions of what we must do here.
+ */
+
+#ifndef _ASM_SYSCALL_H
+#define _ASM_SYSCALL_H	1
+
+#include <linux/sched.h>
+#include <linux/err.h>
+
+static inline long syscall_get_nr(struct task_struct *task,
+				  struct pt_regs *regs)
+{
+	if ((long)regs->cr_ifs < 0) /* Not a syscall */
+		return -1;
+
+	return regs->r15;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	/* do nothing */
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	return regs->r10 == -1 ? regs->r8:0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->r8;
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	if (error) {
+		/* error < 0, but ia64 uses > 0 return value */
+		regs->r8 = -error;
+		regs->r10 = -1;
+	} else {
+		regs->r8 = val;
+		regs->r10 = 0;
+	}
+}
+
+extern void ia64_syscall_get_set_arguments(struct task_struct *task,
+	struct pt_regs *regs, unsigned int i, unsigned int n,
+	unsigned long *args, int rw);
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	BUG_ON(i + n > 6);
+
+	ia64_syscall_get_set_arguments(task, regs, i, n, args, 0);
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	BUG_ON(i + n > 6);
+
+	ia64_syscall_get_set_arguments(task, regs, i, n, args, 1);
+}
+#endif	/* _ASM_SYSCALL_H */
diff --git a/arch/ia64/include/asm/system.h b/arch/ia64/include/asm/system.h
new file mode 100644
index 00000000..6cca3070
--- /dev/null
+++ b/arch/ia64/include/asm/system.h
@@ -0,0 +1,203 @@
+#ifndef _ASM_IA64_SYSTEM_H
+#define _ASM_IA64_SYSTEM_H
+
+/*
+ * System defines. Note that this is included both from .c and .S
+ * files, so it does only defines, not any C code.  This is based
+ * on information published in the Processor Abstraction Layer
+ * and the System Abstraction Layer manual.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ */
+
+#include <asm/kregs.h>
+#include <asm/page.h>
+#include <asm/pal.h>
+#include <asm/percpu.h>
+
+#define GATE_ADDR		RGN_BASE(RGN_GATE)
+
+/*
+ * 0xa000000000000000+2*PERCPU_PAGE_SIZE
+ * - 0xa000000000000000+3*PERCPU_PAGE_SIZE remain unmapped (guard page)
+ */
+#define KERNEL_START		 (GATE_ADDR+__IA64_UL_CONST(0x100000000))
+#define PERCPU_ADDR		(-PERCPU_PAGE_SIZE)
+#define LOAD_OFFSET		(KERNEL_START - KERNEL_TR_PAGE_SIZE)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#define AT_VECTOR_SIZE_ARCH 2 /* entries in ARCH_DLINFO */
+
+struct pci_vector_struct {
+	__u16 segment;	/* PCI Segment number */
+	__u16 bus;	/* PCI Bus number */
+	__u32 pci_id;	/* ACPI split 16 bits device, 16 bits function (see section 6.1.1) */
+	__u8 pin;	/* PCI PIN (0 = A, 1 = B, 2 = C, 3 = D) */
+	__u32 irq;	/* IRQ assigned */
+};
+
+extern struct ia64_boot_param {
+	__u64 command_line;		/* physical address of command line arguments */
+	__u64 efi_systab;		/* physical address of EFI system table */
+	__u64 efi_memmap;		/* physical address of EFI memory map */
+	__u64 efi_memmap_size;		/* size of EFI memory map */
+	__u64 efi_memdesc_size;		/* size of an EFI memory map descriptor */
+	__u32 efi_memdesc_version;	/* memory descriptor version */
+	struct {
+		__u16 num_cols;	/* number of columns on console output device */
+		__u16 num_rows;	/* number of rows on console output device */
+		__u16 orig_x;	/* cursor's x position */
+		__u16 orig_y;	/* cursor's y position */
+	} console_info;
+	__u64 fpswa;		/* physical address of the fpswa interface */
+	__u64 initrd_start;
+	__u64 initrd_size;
+} *ia64_boot_param;
+
+/*
+ * Macros to force memory ordering.  In these descriptions, "previous"
+ * and "subsequent" refer to program order; "visible" means that all
+ * architecturally visible effects of a memory access have occurred
+ * (at a minimum, this means the memory has been read or written).
+ *
+ *   wmb():	Guarantees that all preceding stores to memory-
+ *		like regions are visible before any subsequent
+ *		stores and that all following stores will be
+ *		visible only after all previous stores.
+ *   rmb():	Like wmb(), but for reads.
+ *   mb():	wmb()/rmb() combo, i.e., all previous memory
+ *		accesses are visible before all subsequent
+ *		accesses and vice versa.  This is also known as
+ *		a "fence."
+ *
+ * Note: "mb()" and its variants cannot be used as a fence to order
+ * accesses to memory mapped I/O registers.  For that, mf.a needs to
+ * be used.  However, we don't want to always use mf.a because (a)
+ * it's (presumably) much slower than mf and (b) mf.a is supported for
+ * sequential memory pages only.
+ */
+#define mb()	ia64_mf()
+#define rmb()	mb()
+#define wmb()	mb()
+#define read_barrier_depends()	do { } while(0)
+
+#ifdef CONFIG_SMP
+# define smp_mb()	mb()
+# define smp_rmb()	rmb()
+# define smp_wmb()	wmb()
+# define smp_read_barrier_depends()	read_barrier_depends()
+#else
+# define smp_mb()	barrier()
+# define smp_rmb()	barrier()
+# define smp_wmb()	barrier()
+# define smp_read_barrier_depends()	do { } while(0)
+#endif
+
+/*
+ * XXX check on this ---I suspect what Linus really wants here is
+ * acquire vs release semantics but we can't discuss this stuff with
+ * Linus just yet.  Grrr...
+ */
+#define set_mb(var, value)	do { (var) = (value); mb(); } while (0)
+
+/*
+ * The group barrier in front of the rsm & ssm are necessary to ensure
+ * that none of the previous instructions in the same group are
+ * affected by the rsm/ssm.
+ */
+
+#ifdef __KERNEL__
+
+/*
+ * Context switch from one thread to another.  If the two threads have
+ * different address spaces, schedule() has already taken care of
+ * switching to the new address space by calling switch_mm().
+ *
+ * Disabling access to the fph partition and the debug-register
+ * context switch MUST be done before calling ia64_switch_to() since a
+ * newly created thread returns directly to
+ * ia64_ret_from_syscall_clear_r8.
+ */
+extern struct task_struct *ia64_switch_to (void *next_task);
+
+struct task_struct;
+
+extern void ia64_save_extra (struct task_struct *task);
+extern void ia64_load_extra (struct task_struct *task);
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct *next);
+# define IA64_ACCOUNT_ON_SWITCH(p,n) ia64_account_on_switch(p,n)
+#else
+# define IA64_ACCOUNT_ON_SWITCH(p,n)
+#endif
+
+#ifdef CONFIG_PERFMON
+  DECLARE_PER_CPU(unsigned long, pfm_syst_info);
+# define PERFMON_IS_SYSWIDE() (__get_cpu_var(pfm_syst_info) & 0x1)
+#else
+# define PERFMON_IS_SYSWIDE() (0)
+#endif
+
+#define IA64_HAS_EXTRA_STATE(t)							\
+	((t)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID)	\
+	 || PERFMON_IS_SYSWIDE())
+
+#define __switch_to(prev,next,last) do {							 \
+	IA64_ACCOUNT_ON_SWITCH(prev, next);							 \
+	if (IA64_HAS_EXTRA_STATE(prev))								 \
+		ia64_save_extra(prev);								 \
+	if (IA64_HAS_EXTRA_STATE(next))								 \
+		ia64_load_extra(next);								 \
+	ia64_psr(task_pt_regs(next))->dfh = !ia64_is_local_fpu_owner(next);			 \
+	(last) = ia64_switch_to((next));							 \
+} while (0)
+
+#ifdef CONFIG_SMP
+/*
+ * In the SMP case, we save the fph state when context-switching away from a thread that
+ * modified fph.  This way, when the thread gets scheduled on another CPU, the CPU can
+ * pick up the state from task->thread.fph, avoiding the complication of having to fetch
+ * the latest fph state from another CPU.  In other words: eager save, lazy restore.
+ */
+# define switch_to(prev,next,last) do {						\
+	if (ia64_psr(task_pt_regs(prev))->mfh && ia64_is_local_fpu_owner(prev)) {				\
+		ia64_psr(task_pt_regs(prev))->mfh = 0;			\
+		(prev)->thread.flags |= IA64_THREAD_FPH_VALID;			\
+		__ia64_save_fpu((prev)->thread.fph);				\
+	}									\
+	__switch_to(prev, next, last);						\
+	/* "next" in old context is "current" in new context */			\
+	if (unlikely((current->thread.flags & IA64_THREAD_MIGRATION) &&	       \
+		     (task_cpu(current) !=				       \
+		      		      task_thread_info(current)->last_cpu))) { \
+		platform_migrate(current);				       \
+		task_thread_info(current)->last_cpu = task_cpu(current);       \
+	}								       \
+} while (0)
+#else
+# define switch_to(prev,next,last)	__switch_to(prev, next, last)
+#endif
+
+#define __ARCH_WANT_UNLOCKED_CTXSW
+#define ARCH_HAS_PREFETCH_SWITCH_STACK
+#define ia64_platform_is(x) (strcmp(x, platform_name) == 0)
+
+void cpu_idle_wait(void);
+
+#define arch_align_stack(x) (x)
+
+void default_idle(void);
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_IA64_SYSTEM_H */
diff --git a/arch/ia64/include/asm/termbits.h b/arch/ia64/include/asm/termbits.h
new file mode 100644
index 00000000..c009b94e
--- /dev/null
+++ b/arch/ia64/include/asm/termbits.h
@@ -0,0 +1,208 @@
+#ifndef _ASM_IA64_TERMBITS_H
+#define _ASM_IA64_TERMBITS_H
+
+/*
+ * Based on <asm-i386/termbits.h>.
+ *
+ * Modified 1999
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ *
+ * 99/01/28	Added new baudrates
+ */
+
+#include <linux/posix_types.h>
+
+typedef unsigned char	cc_t;
+typedef unsigned int	speed_t;
+typedef unsigned int	tcflag_t;
+
+#define NCCS 19
+struct termios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS];		/* control characters */
+};
+
+struct termios2 {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS];		/* control characters */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
+struct ktermios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS];		/* control characters */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
+/* c_cc characters */
+#define VINTR 0
+#define VQUIT 1
+#define VERASE 2
+#define VKILL 3
+#define VEOF 4
+#define VTIME 5
+#define VMIN 6
+#define VSWTC 7
+#define VSTART 8
+#define VSTOP 9
+#define VSUSP 10
+#define VEOL 11
+#define VREPRINT 12
+#define VDISCARD 13
+#define VWERASE 14
+#define VLNEXT 15
+#define VEOL2 16
+
+/* c_iflag bits */
+#define IGNBRK	0000001
+#define BRKINT	0000002
+#define IGNPAR	0000004
+#define PARMRK	0000010
+#define INPCK	0000020
+#define ISTRIP	0000040
+#define INLCR	0000100
+#define IGNCR	0000200
+#define ICRNL	0000400
+#define IUCLC	0001000
+#define IXON	0002000
+#define IXANY	0004000
+#define IXOFF	0010000
+#define IMAXBEL	0020000
+#define IUTF8	0040000
+
+/* c_oflag bits */
+#define OPOST	0000001
+#define OLCUC	0000002
+#define ONLCR	0000004
+#define OCRNL	0000010
+#define ONOCR	0000020
+#define ONLRET	0000040
+#define OFILL	0000100
+#define OFDEL	0000200
+#define NLDLY	0000400
+#define   NL0	0000000
+#define   NL1	0000400
+#define CRDLY	0003000
+#define   CR0	0000000
+#define   CR1	0001000
+#define   CR2	0002000
+#define   CR3	0003000
+#define TABDLY	0014000
+#define   TAB0	0000000
+#define   TAB1	0004000
+#define   TAB2	0010000
+#define   TAB3	0014000
+#define   XTABS	0014000
+#define BSDLY	0020000
+#define   BS0	0000000
+#define   BS1	0020000
+#define VTDLY	0040000
+#define   VT0	0000000
+#define   VT1	0040000
+#define FFDLY	0100000
+#define   FF0	0000000
+#define   FF1	0100000
+
+/* c_cflag bit meaning */
+#define CBAUD	0010017
+#define  B0	0000000		/* hang up */
+#define  B50	0000001
+#define  B75	0000002
+#define  B110	0000003
+#define  B134	0000004
+#define  B150	0000005
+#define  B200	0000006
+#define  B300	0000007
+#define  B600	0000010
+#define  B1200	0000011
+#define  B1800	0000012
+#define  B2400	0000013
+#define  B4800	0000014
+#define  B9600	0000015
+#define  B19200	0000016
+#define  B38400	0000017
+#define EXTA B19200
+#define EXTB B38400
+#define CSIZE	0000060
+#define   CS5	0000000
+#define   CS6	0000020
+#define   CS7	0000040
+#define   CS8	0000060
+#define CSTOPB	0000100
+#define CREAD	0000200
+#define PARENB	0000400
+#define PARODD	0001000
+#define HUPCL	0002000
+#define CLOCAL	0004000
+#define CBAUDEX 0010000
+#define    BOTHER 0010000
+#define    B57600 0010001
+#define   B115200 0010002
+#define   B230400 0010003
+#define   B460800 0010004
+#define   B500000 0010005
+#define   B576000 0010006
+#define   B921600 0010007
+#define  B1000000 0010010
+#define  B1152000 0010011
+#define  B1500000 0010012
+#define  B2000000 0010013
+#define  B2500000 0010014
+#define  B3000000 0010015
+#define  B3500000 0010016
+#define  B4000000 0010017
+#define CIBAUD	  002003600000		/* input baud rate */
+#define CMSPAR	  010000000000		/* mark or space (stick) parity */
+#define CRTSCTS	  020000000000		/* flow control */
+
+#define IBSHIFT	16		/* Shift from CBAUD to CIBAUD */
+
+/* c_lflag bits */
+#define ISIG	0000001
+#define ICANON	0000002
+#define XCASE	0000004
+#define ECHO	0000010
+#define ECHOE	0000020
+#define ECHOK	0000040
+#define ECHONL	0000100
+#define NOFLSH	0000200
+#define TOSTOP	0000400
+#define ECHOCTL	0001000
+#define ECHOPRT	0002000
+#define ECHOKE	0004000
+#define FLUSHO	0010000
+#define PENDIN	0040000
+#define IEXTEN	0100000
+#define EXTPROC	0200000
+
+/* tcflow() and TCXONC use these */
+#define	TCOOFF		0
+#define	TCOON		1
+#define	TCIOFF		2
+#define	TCION		3
+
+/* tcflush() and TCFLSH use these */
+#define	TCIFLUSH	0
+#define	TCOFLUSH	1
+#define	TCIOFLUSH	2
+
+/* tcsetattr uses these */
+#define	TCSANOW		0
+#define	TCSADRAIN	1
+#define	TCSAFLUSH	2
+
+#endif /* _ASM_IA64_TERMBITS_H */
diff --git a/arch/ia64/include/asm/termios.h b/arch/ia64/include/asm/termios.h
new file mode 100644
index 00000000..689d218c
--- /dev/null
+++ b/arch/ia64/include/asm/termios.h
@@ -0,0 +1,97 @@
+#ifndef _ASM_IA64_TERMIOS_H
+#define _ASM_IA64_TERMIOS_H
+
+/*
+ * Modified 1999
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ *
+ * 99/01/28	Added N_IRDA and N_SMSBLOCK
+ */
+
+#include <asm/termbits.h>
+#include <asm/ioctls.h>
+
+struct winsize {
+	unsigned short ws_row;
+	unsigned short ws_col;
+	unsigned short ws_xpixel;
+	unsigned short ws_ypixel;
+};
+
+#define NCC 8
+struct termio {
+	unsigned short c_iflag;		/* input mode flags */
+	unsigned short c_oflag;		/* output mode flags */
+	unsigned short c_cflag;		/* control mode flags */
+	unsigned short c_lflag;		/* local mode flags */
+	unsigned char c_line;		/* line discipline */
+	unsigned char c_cc[NCC];	/* control characters */
+};
+
+/* modem lines */
+#define TIOCM_LE	0x001
+#define TIOCM_DTR	0x002
+#define TIOCM_RTS	0x004
+#define TIOCM_ST	0x008
+#define TIOCM_SR	0x010
+#define TIOCM_CTS	0x020
+#define TIOCM_CAR	0x040
+#define TIOCM_RNG	0x080
+#define TIOCM_DSR	0x100
+#define TIOCM_CD	TIOCM_CAR
+#define TIOCM_RI	TIOCM_RNG
+#define TIOCM_OUT1	0x2000
+#define TIOCM_OUT2	0x4000
+#define TIOCM_LOOP	0x8000
+
+/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
+
+# ifdef __KERNEL__
+
+/*	intr=^C		quit=^\		erase=del	kill=^U
+	eof=^D		vtime=\0	vmin=\1		sxtc=\0
+	start=^Q	stop=^S		susp=^Z		eol=\0
+	reprint=^R	discard=^U	werase=^W	lnext=^V
+	eol2=\0
+*/
+#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
+
+/*
+ * Translate a "termio" structure into a "termios". Ugh.
+ */
+#define SET_LOW_TERMIOS_BITS(termios, termio, x) {	\
+	unsigned short __tmp;				\
+	get_user(__tmp,&(termio)->x);			\
+	*(unsigned short *) &(termios)->x = __tmp;	\
+}
+
+#define user_termio_to_kernel_termios(termios, termio)		\
+({								\
+	SET_LOW_TERMIOS_BITS(termios, termio, c_iflag);		\
+	SET_LOW_TERMIOS_BITS(termios, termio, c_oflag);		\
+	SET_LOW_TERMIOS_BITS(termios, termio, c_cflag);		\
+	SET_LOW_TERMIOS_BITS(termios, termio, c_lflag);		\
+	copy_from_user((termios)->c_cc, (termio)->c_cc, NCC);	\
+})
+
+/*
+ * Translate a "termios" structure into a "termio". Ugh.
+ */
+#define kernel_termios_to_user_termio(termio, termios)		\
+({								\
+	put_user((termios)->c_iflag, &(termio)->c_iflag);	\
+	put_user((termios)->c_oflag, &(termio)->c_oflag);	\
+	put_user((termios)->c_cflag, &(termio)->c_cflag);	\
+	put_user((termios)->c_lflag, &(termio)->c_lflag);	\
+	put_user((termios)->c_line,  &(termio)->c_line);	\
+	copy_to_user((termio)->c_cc, (termios)->c_cc, NCC);	\
+})
+
+#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2))
+#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2))
+#define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios))
+#define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios))
+
+# endif /* __KERNEL__ */
+
+#endif /* _ASM_IA64_TERMIOS_H */
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
new file mode 100644
index 00000000..ff0cc84e
--- /dev/null
+++ b/arch/ia64/include/asm/thread_info.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2002-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#ifndef _ASM_IA64_THREAD_INFO_H
+#define _ASM_IA64_THREAD_INFO_H
+
+#ifndef ASM_OFFSETS_C
+#include <asm/asm-offsets.h>
+#endif
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+
+#define PREEMPT_ACTIVE_BIT 30
+#define PREEMPT_ACTIVE	(1 << PREEMPT_ACTIVE_BIT)
+
+#ifndef __ASSEMBLY__
+
+/*
+ * On IA-64, we want to keep the task structure and kernel stack together, so they can be
+ * mapped by a single TLB entry and so they can be addressed by the "current" pointer
+ * without having to do pointer masking.
+ */
+struct thread_info {
+	struct task_struct *task;	/* XXX not really needed, except for dup_task_struct() */
+	struct exec_domain *exec_domain;/* execution domain */
+	__u32 flags;			/* thread_info flags (see TIF_*) */
+	__u32 cpu;			/* current CPU */
+	__u32 last_cpu;			/* Last CPU thread ran on */
+	__u32 status;			/* Thread synchronous flags */
+	mm_segment_t addr_limit;	/* user-level address space limit */
+	int preempt_count;		/* 0=premptable, <0=BUG; will also serve as bh-counter */
+	struct restart_block restart_block;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	__u64 ac_stamp;
+	__u64 ac_leave;
+	__u64 ac_stime;
+	__u64 ac_utime;
+#endif
+};
+
+#define THREAD_SIZE			KERNEL_STACK_SIZE
+
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.task		= &tsk,			\
+	.exec_domain	= &default_exec_domain,	\
+	.flags		= 0,			\
+	.cpu		= 0,			\
+	.addr_limit	= KERNEL_DS,		\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
+	.restart_block = {			\
+		.fn = do_no_restart_syscall,	\
+	},					\
+}
+
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
+#ifndef ASM_OFFSETS_C
+/* how to get the thread information struct from C */
+#define current_thread_info()	((struct thread_info *) ((char *) current + IA64_TASK_SIZE))
+#define alloc_thread_info_node(tsk, node)	\
+		((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
+#define task_thread_info(tsk)	((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
+#else
+#define current_thread_info()	((struct thread_info *) 0)
+#define alloc_thread_info_node(tsk, node)	((struct thread_info *) 0)
+#define task_thread_info(tsk)	((struct thread_info *) 0)
+#endif
+#define free_thread_info(ti)	/* nothing */
+#define task_stack_page(tsk)	((void *)(tsk))
+
+#define __HAVE_THREAD_FUNCTIONS
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#define setup_thread_stack(p, org)			\
+	*task_thread_info(p) = *task_thread_info(org);	\
+	task_thread_info(p)->ac_stime = 0;		\
+	task_thread_info(p)->ac_utime = 0;		\
+	task_thread_info(p)->task = (p);
+#else
+#define setup_thread_stack(p, org) \
+	*task_thread_info(p) = *task_thread_info(org); \
+	task_thread_info(p)->task = (p);
+#endif
+#define end_of_stack(p) (unsigned long *)((void *)(p) + IA64_RBS_OFFSET)
+
+#define __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
+#define alloc_task_struct_node(node)						\
+({										\
+	struct page *page = alloc_pages_node(node, GFP_KERNEL | __GFP_COMP,	\
+					     KERNEL_STACK_SIZE_ORDER);		\
+	struct task_struct *ret = page ? page_address(page) : NULL;		\
+										\
+	ret;									\
+})
+#define free_task_struct(tsk)	free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER)
+
+#endif /* !__ASSEMBLY */
+
+/*
+ * thread information flags
+ * - these are process state flags that various assembly files may need to access
+ * - pending work-to-be-done flags are in least-significant 16 bits, other flags
+ *   in top 16 bits
+ */
+#define TIF_SIGPENDING		0	/* signal pending */
+#define TIF_NEED_RESCHED	1	/* rescheduling necessary */
+#define TIF_SYSCALL_TRACE	2	/* syscall trace active */
+#define TIF_SYSCALL_AUDIT	3	/* syscall auditing active */
+#define TIF_SINGLESTEP		4	/* restore singlestep on return to user mode */
+#define TIF_NOTIFY_RESUME	6	/* resumption notification requested */
+#define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_MEMDIE		17	/* is terminating due to OOM killer */
+#define TIF_MCA_INIT		18	/* this task is processing MCA or INIT */
+#define TIF_DB_DISABLED		19	/* debug trap disabled for fsyscall */
+#define TIF_FREEZE		20	/* is freezing for suspend */
+#define TIF_RESTORE_RSE		21	/* user RBS is newer than kernel RBS */
+
+#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
+#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
+#define _TIF_SYSCALL_TRACEAUDIT	(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
+#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
+#define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
+#define _TIF_MCA_INIT		(1 << TIF_MCA_INIT)
+#define _TIF_DB_DISABLED	(1 << TIF_DB_DISABLED)
+#define _TIF_FREEZE		(1 << TIF_FREEZE)
+#define _TIF_RESTORE_RSE	(1 << TIF_RESTORE_RSE)
+
+/* "work to do on user-return" bits */
+#define TIF_ALLWORK_MASK	(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SYSCALL_AUDIT|\
+				 _TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE)
+/* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */
+#define TIF_WORK_MASK		(TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT))
+
+#define TS_POLLING		1 	/* true if in idle loop and not sleeping */
+#define TS_RESTORE_SIGMASK	2	/* restore signal mask in do_signal() */
+
+#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
+
+#ifndef __ASSEMBLY__
+#define HAVE_SET_RESTORE_SIGMASK	1
+static inline void set_restore_sigmask(void)
+{
+	struct thread_info *ti = current_thread_info();
+	ti->status |= TS_RESTORE_SIGMASK;
+	set_bit(TIF_SIGPENDING, &ti->flags);
+}
+#endif	/* !__ASSEMBLY__ */
+
+#endif /* _ASM_IA64_THREAD_INFO_H */
diff --git a/arch/ia64/include/asm/timex.h b/arch/ia64/include/asm/timex.h
new file mode 100644
index 00000000..86c7db86
--- /dev/null
+++ b/arch/ia64/include/asm/timex.h
@@ -0,0 +1,45 @@
+#ifndef _ASM_IA64_TIMEX_H
+#define _ASM_IA64_TIMEX_H
+
+/*
+ * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+/*
+ * 2001/01/18 davidm	Removed CLOCK_TICK_RATE.  It makes no sense on IA-64.
+ *			Also removed cacheflush_time as it's entirely unused.
+ */
+
+#include <asm/intrinsics.h>
+#include <asm/processor.h>
+
+typedef unsigned long cycles_t;
+
+extern void (*ia64_udelay)(unsigned long usecs);
+
+/*
+ * For performance reasons, we don't want to define CLOCK_TICK_TRATE as
+ * local_cpu_data->itc_rate.  Fortunately, we don't have to, either: according to George
+ * Anzinger, 1/CLOCK_TICK_RATE is taken as the resolution of the timer clock.  The time
+ * calculation assumes that you will use enough of these so that your tick size <= 1/HZ.
+ * If the calculation shows that your CLOCK_TICK_RATE can not supply exactly 1/HZ ticks,
+ * the actual value is calculated and used to update the wall clock each jiffie.  Setting
+ * the CLOCK_TICK_RATE to x*HZ insures that the calculation will find no errors.  Hence we
+ * pick a multiple of HZ which gives us a (totally virtual) CLOCK_TICK_RATE of about
+ * 100MHz.
+ */
+#define CLOCK_TICK_RATE		(HZ * 100000UL)
+
+static inline cycles_t
+get_cycles (void)
+{
+	cycles_t ret;
+
+	ret = ia64_getreg(_IA64_REG_AR_ITC);
+	return ret;
+}
+
+extern void ia64_cpu_local_tick (void);
+extern unsigned long long ia64_native_sched_clock (void);
+
+#endif /* _ASM_IA64_TIMEX_H */
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
new file mode 100644
index 00000000..c3ffe3e5
--- /dev/null
+++ b/arch/ia64/include/asm/tlb.h
@@ -0,0 +1,283 @@
+#ifndef _ASM_IA64_TLB_H
+#define _ASM_IA64_TLB_H
+/*
+ * Based on <asm-generic/tlb.h>.
+ *
+ * Copyright (C) 2002-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+/*
+ * Removing a translation from a page table (including TLB-shootdown) is a four-step
+ * procedure:
+ *
+ *	(1) Flush (virtual) caches --- ensures virtual memory is coherent with kernel memory
+ *	    (this is a no-op on ia64).
+ *	(2) Clear the relevant portions of the page-table
+ *	(3) Flush the TLBs --- ensures that stale content is gone from CPU TLBs
+ *	(4) Release the pages that were freed up in step (2).
+ *
+ * Note that the ordering of these steps is crucial to avoid races on MP machines.
+ *
+ * The Linux kernel defines several platform-specific hooks for TLB-shootdown.  When
+ * unmapping a portion of the virtual address space, these hooks are called according to
+ * the following template:
+ *
+ *	tlb <- tlb_gather_mmu(mm, full_mm_flush);	// start unmap for address space MM
+ *	{
+ *	  for each vma that needs a shootdown do {
+ *	    tlb_start_vma(tlb, vma);
+ *	      for each page-table-entry PTE that needs to be removed do {
+ *		tlb_remove_tlb_entry(tlb, pte, address);
+ *		if (pte refers to a normal page) {
+ *		  tlb_remove_page(tlb, page);
+ *		}
+ *	      }
+ *	    tlb_end_vma(tlb, vma);
+ *	  }
+ *	}
+ *	tlb_finish_mmu(tlb, start, end);	// finish unmap for address space MM
+ */
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+
+#include <asm/pgalloc.h>
+#include <asm/processor.h>
+#include <asm/tlbflush.h>
+#include <asm/machvec.h>
+
+#ifdef CONFIG_SMP
+# define tlb_fast_mode(tlb)	((tlb)->nr == ~0U)
+#else
+# define tlb_fast_mode(tlb)	(1)
+#endif
+
+/*
+ * If we can't allocate a page to make a big batch of page pointers
+ * to work on, then just handle a few from the on-stack structure.
+ */
+#define	IA64_GATHER_BUNDLE	8
+
+struct mmu_gather {
+	struct mm_struct	*mm;
+	unsigned int		nr;		/* == ~0U => fast mode */
+	unsigned int		max;
+	unsigned char		fullmm;		/* non-zero means full mm flush */
+	unsigned char		need_flush;	/* really unmapped some PTEs? */
+	unsigned long		start_addr;
+	unsigned long		end_addr;
+	struct page		**pages;
+	struct page		*local[IA64_GATHER_BUNDLE];
+};
+
+struct ia64_tr_entry {
+	u64 ifa;
+	u64 itir;
+	u64 pte;
+	u64 rr;
+}; /*Record for tr entry!*/
+
+extern int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size);
+extern void ia64_ptr_entry(u64 target_mask, int slot);
+
+extern struct ia64_tr_entry *ia64_idtrs[NR_CPUS];
+
+/*
+ region register macros
+*/
+#define RR_TO_VE(val)   (((val) >> 0) & 0x0000000000000001)
+#define RR_VE(val)	(((val) & 0x0000000000000001) << 0)
+#define RR_VE_MASK	0x0000000000000001L
+#define RR_VE_SHIFT	0
+#define RR_TO_PS(val)	(((val) >> 2) & 0x000000000000003f)
+#define RR_PS(val)	(((val) & 0x000000000000003f) << 2)
+#define RR_PS_MASK	0x00000000000000fcL
+#define RR_PS_SHIFT	2
+#define RR_RID_MASK	0x00000000ffffff00L
+#define RR_TO_RID(val) 	((val >> 8) & 0xffffff)
+
+/*
+ * Flush the TLB for address range START to END and, if not in fast mode, release the
+ * freed pages that where gathered up to this point.
+ */
+static inline void
+ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
+{
+	unsigned int nr;
+
+	if (!tlb->need_flush)
+		return;
+	tlb->need_flush = 0;
+
+	if (tlb->fullmm) {
+		/*
+		 * Tearing down the entire address space.  This happens both as a result
+		 * of exit() and execve().  The latter case necessitates the call to
+		 * flush_tlb_mm() here.
+		 */
+		flush_tlb_mm(tlb->mm);
+	} else if (unlikely (end - start >= 1024*1024*1024*1024UL
+			     || REGION_NUMBER(start) != REGION_NUMBER(end - 1)))
+	{
+		/*
+		 * If we flush more than a tera-byte or across regions, we're probably
+		 * better off just flushing the entire TLB(s).  This should be very rare
+		 * and is not worth optimizing for.
+		 */
+		flush_tlb_all();
+	} else {
+		/*
+		 * XXX fix me: flush_tlb_range() should take an mm pointer instead of a
+		 * vma pointer.
+		 */
+		struct vm_area_struct vma;
+
+		vma.vm_mm = tlb->mm;
+		/* flush the address range from the tlb: */
+		flush_tlb_range(&vma, start, end);
+		/* now flush the virt. page-table area mapping the address range: */
+		flush_tlb_range(&vma, ia64_thash(start), ia64_thash(end));
+	}
+
+	/* lastly, release the freed pages */
+	nr = tlb->nr;
+	if (!tlb_fast_mode(tlb)) {
+		unsigned long i;
+		tlb->nr = 0;
+		tlb->start_addr = ~0UL;
+		for (i = 0; i < nr; ++i)
+			free_page_and_swap_cache(tlb->pages[i]);
+	}
+}
+
+static inline void __tlb_alloc_page(struct mmu_gather *tlb)
+{
+	unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
+
+	if (addr) {
+		tlb->pages = (void *)addr;
+		tlb->max = PAGE_SIZE / sizeof(void *);
+	}
+}
+
+
+static inline void
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
+{
+	tlb->mm = mm;
+	tlb->max = ARRAY_SIZE(tlb->local);
+	tlb->pages = tlb->local;
+	/*
+	 * Use fast mode if only 1 CPU is online.
+	 *
+	 * It would be tempting to turn on fast-mode for full_mm_flush as well.  But this
+	 * doesn't work because of speculative accesses and software prefetching: the page
+	 * table of "mm" may (and usually is) the currently active page table and even
+	 * though the kernel won't do any user-space accesses during the TLB shoot down, a
+	 * compiler might use speculation or lfetch.fault on what happens to be a valid
+	 * user-space address.  This in turn could trigger a TLB miss fault (or a VHPT
+	 * walk) and re-insert a TLB entry we just removed.  Slow mode avoids such
+	 * problems.  (We could make fast-mode work by switching the current task to a
+	 * different "mm" during the shootdown.) --davidm 08/02/2002
+	 */
+	tlb->nr = (num_online_cpus() == 1) ? ~0U : 0;
+	tlb->fullmm = full_mm_flush;
+	tlb->start_addr = ~0UL;
+}
+
+/*
+ * Called at the end of the shootdown operation to free up any resources that were
+ * collected.
+ */
+static inline void
+tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+{
+	/*
+	 * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
+	 * tlb->end_addr.
+	 */
+	ia64_tlb_flush_mmu(tlb, start, end);
+
+	/* keep the page table cache within bounds */
+	check_pgt_cache();
+
+	if (tlb->pages != tlb->local)
+		free_pages((unsigned long)tlb->pages, 0);
+}
+
+/*
+ * Logically, this routine frees PAGE.  On MP machines, the actual freeing of the page
+ * must be delayed until after the TLB has been flushed (see comments at the beginning of
+ * this file).
+ */
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	tlb->need_flush = 1;
+
+	if (tlb_fast_mode(tlb)) {
+		free_page_and_swap_cache(page);
+		return 1; /* avoid calling tlb_flush_mmu */
+	}
+
+	if (!tlb->nr && tlb->pages == tlb->local)
+		__tlb_alloc_page(tlb);
+
+	tlb->pages[tlb->nr++] = page;
+	VM_BUG_ON(tlb->nr > tlb->max);
+
+	return tlb->max - tlb->nr;
+}
+
+static inline void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+	ia64_tlb_flush_mmu(tlb, tlb->start_addr, tlb->end_addr);
+}
+
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	if (!__tlb_remove_page(tlb, page))
+		tlb_flush_mmu(tlb);
+}
+
+/*
+ * Remove TLB entry for PTE mapped at virtual address ADDRESS.  This is called for any
+ * PTE, not just those pointing to (normal) physical memory.
+ */
+static inline void
+__tlb_remove_tlb_entry (struct mmu_gather *tlb, pte_t *ptep, unsigned long address)
+{
+	if (tlb->start_addr == ~0UL)
+		tlb->start_addr = address;
+	tlb->end_addr = address + PAGE_SIZE;
+}
+
+#define tlb_migrate_finish(mm)	platform_tlb_migrate_finish(mm)
+
+#define tlb_start_vma(tlb, vma)			do { } while (0)
+#define tlb_end_vma(tlb, vma)			do { } while (0)
+
+#define tlb_remove_tlb_entry(tlb, ptep, addr)		\
+do {							\
+	tlb->need_flush = 1;				\
+	__tlb_remove_tlb_entry(tlb, ptep, addr);	\
+} while (0)
+
+#define pte_free_tlb(tlb, ptep, address)		\
+do {							\
+	tlb->need_flush = 1;				\
+	__pte_free_tlb(tlb, ptep, address);		\
+} while (0)
+
+#define pmd_free_tlb(tlb, ptep, address)		\
+do {							\
+	tlb->need_flush = 1;				\
+	__pmd_free_tlb(tlb, ptep, address);		\
+} while (0)
+
+#define pud_free_tlb(tlb, pudp, address)		\
+do {							\
+	tlb->need_flush = 1;				\
+	__pud_free_tlb(tlb, pudp, address);		\
+} while (0)
+
+#endif /* _ASM_IA64_TLB_H */
diff --git a/arch/ia64/include/asm/tlbflush.h b/arch/ia64/include/asm/tlbflush.h
new file mode 100644
index 00000000..3be25dfe
--- /dev/null
+++ b/arch/ia64/include/asm/tlbflush.h
@@ -0,0 +1,102 @@
+#ifndef _ASM_IA64_TLBFLUSH_H
+#define _ASM_IA64_TLBFLUSH_H
+
+/*
+ * Copyright (C) 2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+
+#include <linux/mm.h>
+
+#include <asm/intrinsics.h>
+#include <asm/mmu_context.h>
+#include <asm/page.h>
+
+/*
+ * Now for some TLB flushing routines.  This is the kind of stuff that
+ * can be very expensive, so try to avoid them whenever possible.
+ */
+extern void setup_ptcg_sem(int max_purges, int from_palo);
+
+/*
+ * Flush everything (kernel mapping may also have changed due to
+ * vmalloc/vfree).
+ */
+extern void local_flush_tlb_all (void);
+
+#ifdef CONFIG_SMP
+  extern void smp_flush_tlb_all (void);
+  extern void smp_flush_tlb_mm (struct mm_struct *mm);
+  extern void smp_flush_tlb_cpumask (cpumask_t xcpumask);
+# define flush_tlb_all()	smp_flush_tlb_all()
+#else
+# define flush_tlb_all()	local_flush_tlb_all()
+# define smp_flush_tlb_cpumask(m) local_flush_tlb_all()
+#endif
+
+static inline void
+local_finish_flush_tlb_mm (struct mm_struct *mm)
+{
+	if (mm == current->active_mm)
+		activate_context(mm);
+}
+
+/*
+ * Flush a specified user mapping.  This is called, e.g., as a result of fork() and
+ * exit().  fork() ends up here because the copy-on-write mechanism needs to write-protect
+ * the PTEs of the parent task.
+ */
+static inline void
+flush_tlb_mm (struct mm_struct *mm)
+{
+	if (!mm)
+		return;
+
+	set_bit(mm->context, ia64_ctx.flushmap);
+	mm->context = 0;
+
+	if (atomic_read(&mm->mm_users) == 0)
+		return;		/* happens as a result of exit_mmap() */
+
+#ifdef CONFIG_SMP
+	smp_flush_tlb_mm(mm);
+#else
+	local_finish_flush_tlb_mm(mm);
+#endif
+}
+
+extern void flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end);
+
+/*
+ * Page-granular tlb flush.
+ */
+static inline void
+flush_tlb_page (struct vm_area_struct *vma, unsigned long addr)
+{
+#ifdef CONFIG_SMP
+	flush_tlb_range(vma, (addr & PAGE_MASK), (addr & PAGE_MASK) + PAGE_SIZE);
+#else
+	if (vma->vm_mm == current->active_mm)
+		ia64_ptcl(addr, (PAGE_SHIFT << 2));
+	else
+		vma->vm_mm->context = 0;
+#endif
+}
+
+/*
+ * Flush the local TLB. Invoked from another cpu using an IPI.
+ */
+#ifdef CONFIG_SMP
+void smp_local_flush_tlb(void);
+#else
+#define smp_local_flush_tlb()
+#endif
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+					  unsigned long end)
+{
+	flush_tlb_all();	/* XXX fix me */
+}
+
+#endif /* _ASM_IA64_TLBFLUSH_H */
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
new file mode 100644
index 00000000..09f64675
--- /dev/null
+++ b/arch/ia64/include/asm/topology.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2002, Erich Focht, NEC
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef _ASM_IA64_TOPOLOGY_H
+#define _ASM_IA64_TOPOLOGY_H
+
+#include <asm/acpi.h>
+#include <asm/numa.h>
+#include <asm/smp.h>
+
+#ifdef CONFIG_NUMA
+
+/* Nodes w/o CPUs are preferred for memory allocations, see build_zonelists */
+#define PENALTY_FOR_NODE_WITH_CPUS 255
+
+/*
+ * Distance above which we begin to use zone reclaim
+ */
+#define RECLAIM_DISTANCE 15
+
+/*
+ * Returns a bitmask of CPUs on Node 'node'.
+ */
+#define cpumask_of_node(node) ((node) == -1 ?				\
+			       cpu_all_mask :				\
+			       &node_to_cpu_mask[node])
+
+/*
+ * Returns the number of the node containing Node 'nid'.
+ * Not implemented here. Multi-level hierarchies detected with
+ * the help of node_distance().
+ */
+#define parent_node(nid) (nid)
+
+/*
+ * Determines the node for a given pci bus
+ */
+#define pcibus_to_node(bus) PCI_CONTROLLER(bus)->node
+
+void build_cpu_to_node_map(void);
+
+#define SD_CPU_INIT (struct sched_domain) {		\
+	.parent			= NULL,			\
+	.child			= NULL,			\
+	.groups			= NULL,			\
+	.min_interval		= 1,			\
+	.max_interval		= 4,			\
+	.busy_factor		= 64,			\
+	.imbalance_pct		= 125,			\
+	.cache_nice_tries	= 2,			\
+	.busy_idx		= 2,			\
+	.idle_idx		= 1,			\
+	.newidle_idx		= 0,			\
+	.wake_idx		= 0,			\
+	.forkexec_idx		= 0,			\
+	.flags			= SD_LOAD_BALANCE	\
+				| SD_BALANCE_NEWIDLE	\
+				| SD_BALANCE_EXEC	\
+				| SD_BALANCE_FORK	\
+				| SD_WAKE_AFFINE,	\
+	.last_balance		= jiffies,		\
+	.balance_interval	= 1,			\
+	.nr_balance_failed	= 0,			\
+}
+
+/* sched_domains SD_NODE_INIT for IA64 NUMA machines */
+#define SD_NODE_INIT (struct sched_domain) {		\
+	.parent			= NULL,			\
+	.child			= NULL,			\
+	.groups			= NULL,			\
+	.min_interval		= 8,			\
+	.max_interval		= 8*(min(num_online_cpus(), 32U)), \
+	.busy_factor		= 64,			\
+	.imbalance_pct		= 125,			\
+	.cache_nice_tries	= 2,			\
+	.busy_idx		= 3,			\
+	.idle_idx		= 2,			\
+	.newidle_idx		= 0,			\
+	.wake_idx		= 0,			\
+	.forkexec_idx		= 0,			\
+	.flags			= SD_LOAD_BALANCE	\
+				| SD_BALANCE_NEWIDLE	\
+				| SD_BALANCE_EXEC	\
+				| SD_BALANCE_FORK	\
+				| SD_SERIALIZE,		\
+	.last_balance		= jiffies,		\
+	.balance_interval	= 64,			\
+	.nr_balance_failed	= 0,			\
+}
+
+#endif /* CONFIG_NUMA */
+
+#ifdef CONFIG_SMP
+#define topology_physical_package_id(cpu)	(cpu_data(cpu)->socket_id)
+#define topology_core_id(cpu)			(cpu_data(cpu)->core_id)
+#define topology_core_cpumask(cpu)		(&cpu_core_map[cpu])
+#define topology_thread_cpumask(cpu)		(&per_cpu(cpu_sibling_map, cpu))
+#define smt_capable() 				(smp_num_siblings > 1)
+#endif
+
+extern void arch_fix_phys_package_id(int num, u32 slot);
+
+#define cpumask_of_pcibus(bus)	(pcibus_to_node(bus) == -1 ?		\
+				 cpu_all_mask :				\
+				 cpumask_of_node(pcibus_to_node(bus)))
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_IA64_TOPOLOGY_H */
diff --git a/arch/ia64/include/asm/types.h b/arch/ia64/include/asm/types.h
new file mode 100644
index 00000000..82b3939d
--- /dev/null
+++ b/arch/ia64/include/asm/types.h
@@ -0,0 +1,46 @@
+#ifndef _ASM_IA64_TYPES_H
+#define _ASM_IA64_TYPES_H
+
+/*
+ * This file is never included by application software unless explicitly
+ * requested (e.g., via linux/types.h) in which case the application is
+ * Linux specific so (user-) name space pollution is not a major issue.
+ * However, for interoperability, libraries still need to be careful to
+ * avoid naming clashes.
+ *
+ * Based on <asm-alpha/types.h>.
+ *
+ * Modified 1998-2000, 2002
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+
+#ifdef __KERNEL__
+#include <asm-generic/int-ll64.h>
+#else
+#include <asm-generic/int-l64.h>
+#endif
+
+#ifdef __ASSEMBLY__
+# define __IA64_UL(x)		(x)
+# define __IA64_UL_CONST(x)	x
+
+#else
+# define __IA64_UL(x)		((unsigned long)(x))
+# define __IA64_UL_CONST(x)	x##UL
+
+typedef unsigned int umode_t;
+
+/*
+ * These aren't exported outside the kernel to avoid name space clashes
+ */
+# ifdef __KERNEL__
+
+struct fnptr {
+	unsigned long ip;
+	unsigned long gp;
+};
+
+# endif /* __KERNEL__ */
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_IA64_TYPES_H */
diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h
new file mode 100644
index 00000000..449c8c0f
--- /dev/null
+++ b/arch/ia64/include/asm/uaccess.h
@@ -0,0 +1,401 @@
+#ifndef _ASM_IA64_UACCESS_H
+#define _ASM_IA64_UACCESS_H
+
+/*
+ * This file defines various macros to transfer memory areas across
+ * the user/kernel boundary.  This needs to be done carefully because
+ * this code is executed in kernel mode and uses user-specified
+ * addresses.  Thus, we need to be careful not to let the user to
+ * trick us into accessing kernel memory that would normally be
+ * inaccessible.  This code is also fairly performance sensitive,
+ * so we want to spend as little time doing safety checks as
+ * possible.
+ *
+ * To make matters a bit more interesting, these macros sometimes also
+ * called from within the kernel itself, in which case the address
+ * validity check must be skipped.  The get_fs() macro tells us what
+ * to do: if get_fs()==USER_DS, checking is performed, if
+ * get_fs()==KERNEL_DS, checking is bypassed.
+ *
+ * Note that even if the memory area specified by the user is in a
+ * valid address range, it is still possible that we'll get a page
+ * fault while accessing it.  This is handled by filling out an
+ * exception handler fixup entry for each instruction that has the
+ * potential to fault.  When such a fault occurs, the page fault
+ * handler checks to see whether the faulting instruction has a fixup
+ * associated and, if so, sets r8 to -EFAULT and clears r9 to 0 and
+ * then resumes execution at the continuation point.
+ *
+ * Based on <asm-alpha/uaccess.h>.
+ *
+ * Copyright (C) 1998, 1999, 2001-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/page-flags.h>
+#include <linux/mm.h>
+
+#include <asm/intrinsics.h>
+#include <asm/pgtable.h>
+#include <asm/io.h>
+
+/*
+ * For historical reasons, the following macros are grossly misnamed:
+ */
+#define KERNEL_DS	((mm_segment_t) { ~0UL })		/* cf. access_ok() */
+#define USER_DS		((mm_segment_t) { TASK_SIZE-1 })	/* cf. access_ok() */
+
+#define VERIFY_READ	0
+#define VERIFY_WRITE	1
+
+#define get_ds()  (KERNEL_DS)
+#define get_fs()  (current_thread_info()->addr_limit)
+#define set_fs(x) (current_thread_info()->addr_limit = (x))
+
+#define segment_eq(a, b)	((a).seg == (b).seg)
+
+/*
+ * When accessing user memory, we need to make sure the entire area really is in
+ * user-level space.  In order to do this efficiently, we make sure that the page at
+ * address TASK_SIZE is never valid.  We also need to make sure that the address doesn't
+ * point inside the virtually mapped linear page table.
+ */
+#define __access_ok(addr, size, segment)						\
+({											\
+	__chk_user_ptr(addr);								\
+	(likely((unsigned long) (addr) <= (segment).seg)				\
+	 && ((segment).seg == KERNEL_DS.seg						\
+	     || likely(REGION_OFFSET((unsigned long) (addr)) < RGN_MAP_LIMIT)));	\
+})
+#define access_ok(type, addr, size)	__access_ok((addr), (size), get_fs())
+
+/*
+ * These are the main single-value transfer routines.  They automatically
+ * use the right size if we just have the right pointer type.
+ *
+ * Careful to not
+ * (a) re-use the arguments for side effects (sizeof/typeof is ok)
+ * (b) require any knowledge of processes at this stage
+ */
+#define put_user(x, ptr)	__put_user_check((__typeof__(*(ptr))) (x), (ptr), sizeof(*(ptr)), get_fs())
+#define get_user(x, ptr)	__get_user_check((x), (ptr), sizeof(*(ptr)), get_fs())
+
+/*
+ * The "__xxx" versions do not do address space checking, useful when
+ * doing multiple accesses to the same area (the programmer has to do the
+ * checks by hand with "access_ok()")
+ */
+#define __put_user(x, ptr)	__put_user_nocheck((__typeof__(*(ptr))) (x), (ptr), sizeof(*(ptr)))
+#define __get_user(x, ptr)	__get_user_nocheck((x), (ptr), sizeof(*(ptr)))
+
+extern long __put_user_unaligned_unknown (void);
+
+#define __put_user_unaligned(x, ptr)								\
+({												\
+	long __ret;										\
+	switch (sizeof(*(ptr))) {								\
+		case 1: __ret = __put_user((x), (ptr)); break;					\
+		case 2: __ret = (__put_user((x), (u8 __user *)(ptr)))				\
+			| (__put_user((x) >> 8, ((u8 __user *)(ptr) + 1))); break;		\
+		case 4: __ret = (__put_user((x), (u16 __user *)(ptr)))				\
+			| (__put_user((x) >> 16, ((u16 __user *)(ptr) + 1))); break;		\
+		case 8: __ret = (__put_user((x), (u32 __user *)(ptr)))				\
+			| (__put_user((x) >> 32, ((u32 __user *)(ptr) + 1))); break;		\
+		default: __ret = __put_user_unaligned_unknown();				\
+	}											\
+	__ret;											\
+})
+
+extern long __get_user_unaligned_unknown (void);
+
+#define __get_user_unaligned(x, ptr)								\
+({												\
+	long __ret;										\
+	switch (sizeof(*(ptr))) {								\
+		case 1: __ret = __get_user((x), (ptr)); break;					\
+		case 2: __ret = (__get_user((x), (u8 __user *)(ptr)))				\
+			| (__get_user((x) >> 8, ((u8 __user *)(ptr) + 1))); break;		\
+		case 4: __ret = (__get_user((x), (u16 __user *)(ptr)))				\
+			| (__get_user((x) >> 16, ((u16 __user *)(ptr) + 1))); break;		\
+		case 8: __ret = (__get_user((x), (u32 __user *)(ptr)))				\
+			| (__get_user((x) >> 32, ((u32 __user *)(ptr) + 1))); break;		\
+		default: __ret = __get_user_unaligned_unknown();				\
+	}											\
+	__ret;											\
+})
+
+#ifdef ASM_SUPPORTED
+  struct __large_struct { unsigned long buf[100]; };
+# define __m(x) (*(struct __large_struct __user *)(x))
+
+/* We need to declare the __ex_table section before we can use it in .xdata.  */
+asm (".section \"__ex_table\", \"a\"\n\t.previous");
+
+# define __get_user_size(val, addr, n, err)							\
+do {												\
+	register long __gu_r8 asm ("r8") = 0;							\
+	register long __gu_r9 asm ("r9");							\
+	asm ("\n[1:]\tld"#n" %0=%2%P2\t// %0 and %1 get overwritten by exception handler\n"	\
+	     "\t.xdata4 \"__ex_table\", 1b-., 1f-.+4\n"						\
+	     "[1:]"										\
+	     : "=r"(__gu_r9), "=r"(__gu_r8) : "m"(__m(addr)), "1"(__gu_r8));			\
+	(err) = __gu_r8;									\
+	(val) = __gu_r9;									\
+} while (0)
+
+/*
+ * The "__put_user_size()" macro tells gcc it reads from memory instead of writing it.  This
+ * is because they do not write to any memory gcc knows about, so there are no aliasing
+ * issues.
+ */
+# define __put_user_size(val, addr, n, err)							\
+do {												\
+	register long __pu_r8 asm ("r8") = 0;							\
+	asm volatile ("\n[1:]\tst"#n" %1=%r2%P1\t// %0 gets overwritten by exception handler\n"	\
+		      "\t.xdata4 \"__ex_table\", 1b-., 1f-.\n"					\
+		      "[1:]"									\
+		      : "=r"(__pu_r8) : "m"(__m(addr)), "rO"(val), "0"(__pu_r8));		\
+	(err) = __pu_r8;									\
+} while (0)
+
+#else /* !ASM_SUPPORTED */
+# define RELOC_TYPE	2	/* ip-rel */
+# define __get_user_size(val, addr, n, err)				\
+do {									\
+	__ld_user("__ex_table", (unsigned long) addr, n, RELOC_TYPE);	\
+	(err) = ia64_getreg(_IA64_REG_R8);				\
+	(val) = ia64_getreg(_IA64_REG_R9);				\
+} while (0)
+# define __put_user_size(val, addr, n, err)							\
+do {												\
+	__st_user("__ex_table", (unsigned long) addr, n, RELOC_TYPE, (unsigned long) (val));	\
+	(err) = ia64_getreg(_IA64_REG_R8);							\
+} while (0)
+#endif /* !ASM_SUPPORTED */
+
+extern void __get_user_unknown (void);
+
+/*
+ * Evaluating arguments X, PTR, SIZE, and SEGMENT may involve subroutine-calls, which
+ * could clobber r8 and r9 (among others).  Thus, be careful not to evaluate it while
+ * using r8/r9.
+ */
+#define __do_get_user(check, x, ptr, size, segment)					\
+({											\
+	const __typeof__(*(ptr)) __user *__gu_ptr = (ptr);				\
+	__typeof__ (size) __gu_size = (size);						\
+	long __gu_err = -EFAULT;							\
+	unsigned long __gu_val = 0;							\
+	if (!check || __access_ok(__gu_ptr, size, segment))				\
+		switch (__gu_size) {							\
+		      case 1: __get_user_size(__gu_val, __gu_ptr, 1, __gu_err); break;	\
+		      case 2: __get_user_size(__gu_val, __gu_ptr, 2, __gu_err); break;	\
+		      case 4: __get_user_size(__gu_val, __gu_ptr, 4, __gu_err); break;	\
+		      case 8: __get_user_size(__gu_val, __gu_ptr, 8, __gu_err); break;	\
+		      default: __get_user_unknown(); break;				\
+		}									\
+	(x) = (__typeof__(*(__gu_ptr))) __gu_val;					\
+	__gu_err;									\
+})
+
+#define __get_user_nocheck(x, ptr, size)	__do_get_user(0, x, ptr, size, KERNEL_DS)
+#define __get_user_check(x, ptr, size, segment)	__do_get_user(1, x, ptr, size, segment)
+
+extern void __put_user_unknown (void);
+
+/*
+ * Evaluating arguments X, PTR, SIZE, and SEGMENT may involve subroutine-calls, which
+ * could clobber r8 (among others).  Thus, be careful not to evaluate them while using r8.
+ */
+#define __do_put_user(check, x, ptr, size, segment)					\
+({											\
+	__typeof__ (x) __pu_x = (x);							\
+	__typeof__ (*(ptr)) __user *__pu_ptr = (ptr);					\
+	__typeof__ (size) __pu_size = (size);						\
+	long __pu_err = -EFAULT;							\
+											\
+	if (!check || __access_ok(__pu_ptr, __pu_size, segment))			\
+		switch (__pu_size) {							\
+		      case 1: __put_user_size(__pu_x, __pu_ptr, 1, __pu_err); break;	\
+		      case 2: __put_user_size(__pu_x, __pu_ptr, 2, __pu_err); break;	\
+		      case 4: __put_user_size(__pu_x, __pu_ptr, 4, __pu_err); break;	\
+		      case 8: __put_user_size(__pu_x, __pu_ptr, 8, __pu_err); break;	\
+		      default: __put_user_unknown(); break;				\
+		}									\
+	__pu_err;									\
+})
+
+#define __put_user_nocheck(x, ptr, size)	__do_put_user(0, x, ptr, size, KERNEL_DS)
+#define __put_user_check(x, ptr, size, segment)	__do_put_user(1, x, ptr, size, segment)
+
+/*
+ * Complex access routines
+ */
+extern unsigned long __must_check __copy_user (void __user *to, const void __user *from,
+					       unsigned long count);
+
+static inline unsigned long
+__copy_to_user (void __user *to, const void *from, unsigned long count)
+{
+	return __copy_user(to, (__force void __user *) from, count);
+}
+
+static inline unsigned long
+__copy_from_user (void *to, const void __user *from, unsigned long count)
+{
+	return __copy_user((__force void __user *) to, from, count);
+}
+
+#define __copy_to_user_inatomic		__copy_to_user
+#define __copy_from_user_inatomic	__copy_from_user
+#define copy_to_user(to, from, n)							\
+({											\
+	void __user *__cu_to = (to);							\
+	const void *__cu_from = (from);							\
+	long __cu_len = (n);								\
+											\
+	if (__access_ok(__cu_to, __cu_len, get_fs()))					\
+		__cu_len = __copy_user(__cu_to, (__force void __user *) __cu_from, __cu_len);	\
+	__cu_len;									\
+})
+
+#define copy_from_user(to, from, n)							\
+({											\
+	void *__cu_to = (to);								\
+	const void __user *__cu_from = (from);						\
+	long __cu_len = (n);								\
+											\
+	__chk_user_ptr(__cu_from);							\
+	if (__access_ok(__cu_from, __cu_len, get_fs()))					\
+		__cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len);	\
+	__cu_len;									\
+})
+
+#define __copy_in_user(to, from, size)	__copy_user((to), (from), (size))
+
+static inline unsigned long
+copy_in_user (void __user *to, const void __user *from, unsigned long n)
+{
+	if (likely(access_ok(VERIFY_READ, from, n) && access_ok(VERIFY_WRITE, to, n)))
+		n = __copy_user(to, from, n);
+	return n;
+}
+
+extern unsigned long __do_clear_user (void __user *, unsigned long);
+
+#define __clear_user(to, n)		__do_clear_user(to, n)
+
+#define clear_user(to, n)					\
+({								\
+	unsigned long __cu_len = (n);				\
+	if (__access_ok(to, __cu_len, get_fs()))		\
+		__cu_len = __do_clear_user(to, __cu_len);	\
+	__cu_len;						\
+})
+
+
+/*
+ * Returns: -EFAULT if exception before terminator, N if the entire buffer filled, else
+ * strlen.
+ */
+extern long __must_check __strncpy_from_user (char *to, const char __user *from, long to_len);
+
+#define strncpy_from_user(to, from, n)					\
+({									\
+	const char __user * __sfu_from = (from);			\
+	long __sfu_ret = -EFAULT;					\
+	if (__access_ok(__sfu_from, 0, get_fs()))			\
+		__sfu_ret = __strncpy_from_user((to), __sfu_from, (n));	\
+	__sfu_ret;							\
+})
+
+/* Returns: 0 if bad, string length+1 (memory size) of string if ok */
+extern unsigned long __strlen_user (const char __user *);
+
+#define strlen_user(str)				\
+({							\
+	const char __user *__su_str = (str);		\
+	unsigned long __su_ret = 0;			\
+	if (__access_ok(__su_str, 0, get_fs()))		\
+		__su_ret = __strlen_user(__su_str);	\
+	__su_ret;					\
+})
+
+/*
+ * Returns: 0 if exception before NUL or reaching the supplied limit
+ * (N), a value greater than N if the limit would be exceeded, else
+ * strlen.
+ */
+extern unsigned long __strnlen_user (const char __user *, long);
+
+#define strnlen_user(str, len)					\
+({								\
+	const char __user *__su_str = (str);			\
+	unsigned long __su_ret = 0;				\
+	if (__access_ok(__su_str, 0, get_fs()))			\
+		__su_ret = __strnlen_user(__su_str, len);	\
+	__su_ret;						\
+})
+
+/* Generic code can't deal with the location-relative format that we use for compactness.  */
+#define ARCH_HAS_SORT_EXTABLE
+#define ARCH_HAS_SEARCH_EXTABLE
+
+struct exception_table_entry {
+	int addr;	/* location-relative address of insn this fixup is for */
+	int cont;	/* location-relative continuation addr.; if bit 2 is set, r9 is set to 0 */
+};
+
+extern void ia64_handle_exception (struct pt_regs *regs, const struct exception_table_entry *e);
+extern const struct exception_table_entry *search_exception_tables (unsigned long addr);
+
+static inline int
+ia64_done_with_exception (struct pt_regs *regs)
+{
+	const struct exception_table_entry *e;
+	e = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri);
+	if (e) {
+		ia64_handle_exception(regs, e);
+		return 1;
+	}
+	return 0;
+}
+
+#define ARCH_HAS_TRANSLATE_MEM_PTR	1
+static __inline__ char *
+xlate_dev_mem_ptr (unsigned long p)
+{
+	struct page *page;
+	char * ptr;
+
+	page = pfn_to_page(p >> PAGE_SHIFT);
+	if (PageUncached(page))
+		ptr = (char *)p + __IA64_UNCACHED_OFFSET;
+	else
+		ptr = __va(p);
+
+	return ptr;
+}
+
+/*
+ * Convert a virtual cached kernel memory pointer to an uncached pointer
+ */
+static __inline__ char *
+xlate_dev_kmem_ptr (char * p)
+{
+	struct page *page;
+	char * ptr;
+
+	page = virt_to_page((unsigned long)p);
+	if (PageUncached(page))
+		ptr = (char *)__pa(p) + __IA64_UNCACHED_OFFSET;
+	else
+		ptr = p;
+
+	return ptr;
+}
+
+#endif /* _ASM_IA64_UACCESS_H */
diff --git a/arch/ia64/include/asm/ucontext.h b/arch/ia64/include/asm/ucontext.h
new file mode 100644
index 00000000..bf573dc8
--- /dev/null
+++ b/arch/ia64/include/asm/ucontext.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_IA64_UCONTEXT_H
+#define _ASM_IA64_UCONTEXT_H
+
+struct ucontext {
+	struct sigcontext uc_mcontext;
+};
+
+#define uc_link		uc_mcontext.sc_gr[0]	/* wrong type; nobody cares */
+#define uc_sigmask	uc_mcontext.sc_sigmask
+#define uc_stack	uc_mcontext.sc_stack
+
+#endif /* _ASM_IA64_UCONTEXT_H */
diff --git a/arch/ia64/include/asm/unaligned.h b/arch/ia64/include/asm/unaligned.h
new file mode 100644
index 00000000..7bddc7f5
--- /dev/null
+++ b/arch/ia64/include/asm/unaligned.h
@@ -0,0 +1,11 @@
+#ifndef _ASM_IA64_UNALIGNED_H
+#define _ASM_IA64_UNALIGNED_H
+
+#include <linux/unaligned/le_struct.h>
+#include <linux/unaligned/be_byteshift.h>
+#include <linux/unaligned/generic.h>
+
+#define get_unaligned	__get_unaligned_le
+#define put_unaligned	__put_unaligned_le
+
+#endif /* _ASM_IA64_UNALIGNED_H */
diff --git a/arch/ia64/include/asm/uncached.h b/arch/ia64/include/asm/uncached.h
new file mode 100644
index 00000000..13d7e65c
--- /dev/null
+++ b/arch/ia64/include/asm/uncached.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright (C) 2001-2008 Silicon Graphics, Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * Prototypes for the uncached page allocator
+ */
+
+extern unsigned long uncached_alloc_page(int starting_nid, int n_pages);
+extern void uncached_free_page(unsigned long uc_addr, int n_pages);
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
new file mode 100644
index 00000000..d8de1825
--- /dev/null
+++ b/arch/ia64/include/asm/unistd.h
@@ -0,0 +1,384 @@
+#ifndef _ASM_IA64_UNISTD_H
+#define _ASM_IA64_UNISTD_H
+
+/*
+ * IA-64 Linux syscall numbers and inline-functions.
+ *
+ * Copyright (C) 1998-2005 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <asm/break.h>
+
+#define __BREAK_SYSCALL			__IA64_BREAK_SYSCALL
+
+#define __NR_ni_syscall			1024
+#define __NR_exit			1025
+#define __NR_read			1026
+#define __NR_write			1027
+#define __NR_open			1028
+#define __NR_close			1029
+#define __NR_creat			1030
+#define __NR_link			1031
+#define __NR_unlink			1032
+#define __NR_execve			1033
+#define __NR_chdir			1034
+#define __NR_fchdir			1035
+#define __NR_utimes			1036
+#define __NR_mknod			1037
+#define __NR_chmod			1038
+#define __NR_chown			1039
+#define __NR_lseek			1040
+#define __NR_getpid			1041
+#define __NR_getppid			1042
+#define __NR_mount			1043
+#define __NR_umount			1044
+#define __NR_setuid			1045
+#define __NR_getuid			1046
+#define __NR_geteuid			1047
+#define __NR_ptrace			1048
+#define __NR_access			1049
+#define __NR_sync			1050
+#define __NR_fsync			1051
+#define __NR_fdatasync			1052
+#define __NR_kill			1053
+#define __NR_rename			1054
+#define __NR_mkdir			1055
+#define __NR_rmdir			1056
+#define __NR_dup			1057
+#define __NR_pipe			1058
+#define __NR_times			1059
+#define __NR_brk			1060
+#define __NR_setgid			1061
+#define __NR_getgid			1062
+#define __NR_getegid			1063
+#define __NR_acct			1064
+#define __NR_ioctl			1065
+#define __NR_fcntl			1066
+#define __NR_umask			1067
+#define __NR_chroot			1068
+#define __NR_ustat			1069
+#define __NR_dup2			1070
+#define __NR_setreuid			1071
+#define __NR_setregid			1072
+#define __NR_getresuid			1073
+#define __NR_setresuid			1074
+#define __NR_getresgid			1075
+#define __NR_setresgid			1076
+#define __NR_getgroups			1077
+#define __NR_setgroups			1078
+#define __NR_getpgid			1079
+#define __NR_setpgid			1080
+#define __NR_setsid			1081
+#define __NR_getsid			1082
+#define __NR_sethostname		1083
+#define __NR_setrlimit			1084
+#define __NR_getrlimit			1085
+#define __NR_getrusage			1086
+#define __NR_gettimeofday		1087
+#define __NR_settimeofday		1088
+#define __NR_select			1089
+#define __NR_poll			1090
+#define __NR_symlink			1091
+#define __NR_readlink			1092
+#define __NR_uselib			1093
+#define __NR_swapon			1094
+#define __NR_swapoff			1095
+#define __NR_reboot			1096
+#define __NR_truncate			1097
+#define __NR_ftruncate			1098
+#define __NR_fchmod			1099
+#define __NR_fchown			1100
+#define __NR_getpriority		1101
+#define __NR_setpriority		1102
+#define __NR_statfs			1103
+#define __NR_fstatfs			1104
+#define __NR_gettid			1105
+#define __NR_semget			1106
+#define __NR_semop			1107
+#define __NR_semctl			1108
+#define __NR_msgget			1109
+#define __NR_msgsnd			1110
+#define __NR_msgrcv			1111
+#define __NR_msgctl			1112
+#define __NR_shmget			1113
+#define __NR_shmat			1114
+#define __NR_shmdt			1115
+#define __NR_shmctl			1116
+/* also known as klogctl() in GNU libc: */
+#define __NR_syslog			1117
+#define __NR_setitimer			1118
+#define __NR_getitimer			1119
+/* 1120 was __NR_old_stat */
+/* 1121 was __NR_old_lstat */
+/* 1122 was __NR_old_fstat */
+#define __NR_vhangup			1123
+#define __NR_lchown			1124
+#define __NR_remap_file_pages		1125
+#define __NR_wait4			1126
+#define __NR_sysinfo			1127
+#define __NR_clone			1128
+#define __NR_setdomainname		1129
+#define __NR_uname			1130
+#define __NR_adjtimex			1131
+/* 1132 was __NR_create_module */
+#define __NR_init_module		1133
+#define __NR_delete_module		1134
+/* 1135 was __NR_get_kernel_syms */
+/* 1136 was __NR_query_module */
+#define __NR_quotactl			1137
+#define __NR_bdflush			1138
+#define __NR_sysfs			1139
+#define __NR_personality		1140
+#define __NR_afs_syscall		1141
+#define __NR_setfsuid			1142
+#define __NR_setfsgid			1143
+#define __NR_getdents			1144
+#define __NR_flock			1145
+#define __NR_readv			1146
+#define __NR_writev			1147
+#define __NR_pread64			1148
+#define __NR_pwrite64			1149
+#define __NR__sysctl			1150
+#define __NR_mmap			1151
+#define __NR_munmap			1152
+#define __NR_mlock			1153
+#define __NR_mlockall			1154
+#define __NR_mprotect			1155
+#define __NR_mremap			1156
+#define __NR_msync			1157
+#define __NR_munlock			1158
+#define __NR_munlockall			1159
+#define __NR_sched_getparam		1160
+#define __NR_sched_setparam		1161
+#define __NR_sched_getscheduler		1162
+#define __NR_sched_setscheduler		1163
+#define __NR_sched_yield		1164
+#define __NR_sched_get_priority_max	1165
+#define __NR_sched_get_priority_min	1166
+#define __NR_sched_rr_get_interval	1167
+#define __NR_nanosleep			1168
+#define __NR_nfsservctl			1169
+#define __NR_prctl			1170
+/* 1171 is reserved for backwards compatibility with old __NR_getpagesize */
+#define __NR_mmap2			1172
+#define __NR_pciconfig_read		1173
+#define __NR_pciconfig_write		1174
+#define __NR_perfmonctl			1175
+#define __NR_sigaltstack		1176
+#define __NR_rt_sigaction		1177
+#define __NR_rt_sigpending		1178
+#define __NR_rt_sigprocmask		1179
+#define __NR_rt_sigqueueinfo		1180
+#define __NR_rt_sigreturn		1181
+#define __NR_rt_sigsuspend		1182
+#define __NR_rt_sigtimedwait		1183
+#define __NR_getcwd			1184
+#define __NR_capget			1185
+#define __NR_capset			1186
+#define __NR_sendfile			1187
+#define __NR_getpmsg			1188
+#define __NR_putpmsg			1189
+#define __NR_socket			1190
+#define __NR_bind			1191
+#define __NR_connect			1192
+#define __NR_listen			1193
+#define __NR_accept			1194
+#define __NR_getsockname		1195
+#define __NR_getpeername		1196
+#define __NR_socketpair			1197
+#define __NR_send			1198
+#define __NR_sendto			1199
+#define __NR_recv			1200
+#define __NR_recvfrom			1201
+#define __NR_shutdown			1202
+#define __NR_setsockopt			1203
+#define __NR_getsockopt			1204
+#define __NR_sendmsg			1205
+#define __NR_recvmsg			1206
+#define __NR_pivot_root			1207
+#define __NR_mincore			1208
+#define __NR_madvise			1209
+#define __NR_stat			1210
+#define __NR_lstat			1211
+#define __NR_fstat			1212
+#define __NR_clone2			1213
+#define __NR_getdents64			1214
+#define __NR_getunwind			1215
+#define __NR_readahead			1216
+#define __NR_setxattr			1217
+#define __NR_lsetxattr			1218
+#define __NR_fsetxattr			1219
+#define __NR_getxattr			1220
+#define __NR_lgetxattr			1221
+#define __NR_fgetxattr			1222
+#define __NR_listxattr			1223
+#define __NR_llistxattr			1224
+#define __NR_flistxattr			1225
+#define __NR_removexattr		1226
+#define __NR_lremovexattr		1227
+#define __NR_fremovexattr		1228
+#define __NR_tkill			1229
+#define __NR_futex			1230
+#define __NR_sched_setaffinity		1231
+#define __NR_sched_getaffinity		1232
+#define __NR_set_tid_address		1233
+#define __NR_fadvise64			1234
+#define __NR_tgkill			1235
+#define __NR_exit_group			1236
+#define __NR_lookup_dcookie		1237
+#define __NR_io_setup			1238
+#define __NR_io_destroy			1239
+#define __NR_io_getevents		1240
+#define __NR_io_submit			1241
+#define __NR_io_cancel			1242
+#define __NR_epoll_create		1243
+#define __NR_epoll_ctl			1244
+#define __NR_epoll_wait			1245
+#define __NR_restart_syscall		1246
+#define __NR_semtimedop			1247
+#define __NR_timer_create		1248
+#define __NR_timer_settime		1249
+#define __NR_timer_gettime		1250
+#define __NR_timer_getoverrun		1251
+#define __NR_timer_delete		1252
+#define __NR_clock_settime		1253
+#define __NR_clock_gettime		1254
+#define __NR_clock_getres		1255
+#define __NR_clock_nanosleep		1256
+#define __NR_fstatfs64			1257
+#define __NR_statfs64			1258
+#define __NR_mbind			1259
+#define __NR_get_mempolicy		1260
+#define __NR_set_mempolicy		1261
+#define __NR_mq_open			1262
+#define __NR_mq_unlink			1263
+#define __NR_mq_timedsend		1264
+#define __NR_mq_timedreceive		1265
+#define __NR_mq_notify			1266
+#define __NR_mq_getsetattr		1267
+#define __NR_kexec_load			1268
+#define __NR_vserver			1269
+#define __NR_waitid			1270
+#define __NR_add_key			1271
+#define __NR_request_key		1272
+#define __NR_keyctl			1273
+#define __NR_ioprio_set			1274
+#define __NR_ioprio_get			1275
+#define __NR_move_pages			1276
+#define __NR_inotify_init		1277
+#define __NR_inotify_add_watch		1278
+#define __NR_inotify_rm_watch		1279
+#define __NR_migrate_pages		1280
+#define __NR_openat			1281
+#define __NR_mkdirat			1282
+#define __NR_mknodat			1283
+#define __NR_fchownat			1284
+#define __NR_futimesat			1285
+#define __NR_newfstatat			1286
+#define __NR_unlinkat			1287
+#define __NR_renameat			1288
+#define __NR_linkat			1289
+#define __NR_symlinkat			1290
+#define __NR_readlinkat			1291
+#define __NR_fchmodat			1292
+#define __NR_faccessat			1293
+#define __NR_pselect6			1294
+#define __NR_ppoll			1295
+#define __NR_unshare			1296
+#define __NR_splice			1297
+#define __NR_set_robust_list		1298
+#define __NR_get_robust_list		1299
+#define __NR_sync_file_range		1300
+#define __NR_tee			1301
+#define __NR_vmsplice			1302
+#define __NR_fallocate			1303
+#define __NR_getcpu			1304
+#define __NR_epoll_pwait		1305
+#define __NR_utimensat			1306
+#define __NR_signalfd			1307
+#define __NR_timerfd			1308
+#define __NR_eventfd			1309
+#define __NR_timerfd_create		1310
+#define __NR_timerfd_settime		1311
+#define __NR_timerfd_gettime		1312
+#define __NR_signalfd4			1313
+#define __NR_eventfd2			1314
+#define __NR_epoll_create1		1315
+#define __NR_dup3			1316
+#define __NR_pipe2			1317
+#define __NR_inotify_init1		1318
+#define __NR_preadv			1319
+#define __NR_pwritev			1320
+#define __NR_rt_tgsigqueueinfo		1321
+#define __NR_recvmmsg			1322
+#define __NR_fanotify_init		1323
+#define __NR_fanotify_mark		1324
+#define __NR_prlimit64			1325
+#define __NR_name_to_handle_at		1326
+#define __NR_open_by_handle_at  	1327
+#define __NR_clock_adjtime		1328
+#define __NR_syncfs			1329
+#define __NR_setns			1330
+#define __NR_sendmmsg			1331
+#define __NR_accept4			1334
+
+#ifdef __KERNEL__
+
+
+#define NR_syscalls			311 /* length of syscall table */
+
+/*
+ * The following defines stop scripts/checksyscalls.sh from complaining about
+ * unimplemented system calls.  Glibc provides for each of these by using
+ * more modern equivalent system calls.
+ */
+#define __IGNORE_fork		/* clone() */
+#define __IGNORE_time		/* gettimeofday() */
+#define __IGNORE_alarm		/* setitimer(ITIMER_REAL, ... */
+#define __IGNORE_pause		/* rt_sigprocmask(), rt_sigsuspend() */
+#define __IGNORE_utime		/* utimes() */
+#define __IGNORE_getpgrp	/* getpgid() */
+#define __IGNORE_vfork		/* clone() */
+#define __IGNORE_umount2	/* umount() */
+
+#define __ARCH_WANT_SYS_RT_SIGACTION
+#define __ARCH_WANT_SYS_RT_SIGSUSPEND
+
+#if !defined(__ASSEMBLY__) && !defined(ASSEMBLER)
+
+#include <linux/types.h>
+#include <linux/linkage.h>
+#include <linux/compiler.h>
+
+extern long __ia64_syscall (long a0, long a1, long a2, long a3, long a4, long nr);
+
+asmlinkage unsigned long sys_mmap(
+				unsigned long addr, unsigned long len,
+				int prot, int flags,
+				int fd, long off);
+asmlinkage unsigned long sys_mmap2(
+				unsigned long addr, unsigned long len,
+				int prot, int flags,
+				int fd, long pgoff);
+struct pt_regs;
+struct sigaction;
+asmlinkage long sys_ia64_pipe(void);
+asmlinkage long sys_rt_sigaction(int sig,
+				 const struct sigaction __user *act,
+				 struct sigaction __user *oact,
+				 size_t sigsetsize);
+
+/*
+ * "Conditional" syscalls
+ *
+ * Note, this macro can only be used in the file which defines sys_ni_syscall, i.e., in
+ * kernel/sys_ni.c.  This version causes warnings because the declaration isn't a
+ * proper prototype, but we can't use __typeof__ either, because not all cond_syscall()
+ * declarations have prototypes at the moment.
+ */
+#define cond_syscall(x) asmlinkage long x (void) __attribute__((weak,alias("sys_ni_syscall")))
+
+#endif /* !__ASSEMBLY__ */
+#endif /* __KERNEL__ */
+#endif /* _ASM_IA64_UNISTD_H */
diff --git a/arch/ia64/include/asm/unwind.h b/arch/ia64/include/asm/unwind.h
new file mode 100644
index 00000000..1af3875f
--- /dev/null
+++ b/arch/ia64/include/asm/unwind.h
@@ -0,0 +1,233 @@
+#ifndef _ASM_IA64_UNWIND_H
+#define _ASM_IA64_UNWIND_H
+
+/*
+ * Copyright (C) 1999-2000, 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * A simple API for unwinding kernel stacks.  This is used for
+ * debugging and error reporting purposes.  The kernel doesn't need
+ * full-blown stack unwinding with all the bells and whitles, so there
+ * is not much point in implementing the full IA-64 unwind API (though
+ * it would of course be possible to implement the kernel API on top
+ * of it).
+ */
+
+struct task_struct;	/* forward declaration */
+struct switch_stack;	/* forward declaration */
+
+enum unw_application_register {
+	UNW_AR_BSP,
+	UNW_AR_BSPSTORE,
+	UNW_AR_PFS,
+	UNW_AR_RNAT,
+	UNW_AR_UNAT,
+	UNW_AR_LC,
+	UNW_AR_EC,
+	UNW_AR_FPSR,
+	UNW_AR_RSC,
+	UNW_AR_CCV,
+	UNW_AR_CSD,
+	UNW_AR_SSD
+};
+
+/*
+ * The following declarations are private to the unwind
+ * implementation:
+ */
+
+struct unw_stack {
+	unsigned long limit;
+	unsigned long top;
+};
+
+#define UNW_FLAG_INTERRUPT_FRAME	(1UL << 0)
+
+/*
+ * No user of this module should every access this structure directly
+ * as it is subject to change.  It is declared here solely so we can
+ * use automatic variables.
+ */
+struct unw_frame_info {
+	struct unw_stack regstk;
+	struct unw_stack memstk;
+	unsigned int flags;
+	short hint;
+	short prev_script;
+
+	/* current frame info: */
+	unsigned long bsp;		/* backing store pointer value */
+	unsigned long sp;		/* stack pointer value */
+	unsigned long psp;		/* previous sp value */
+	unsigned long ip;		/* instruction pointer value */
+	unsigned long pr;		/* current predicate values */
+	unsigned long *cfm_loc;		/* cfm save location (or NULL) */
+	unsigned long pt;		/* struct pt_regs location */
+
+	struct task_struct *task;
+	struct switch_stack *sw;
+
+	/* preserved state: */
+	unsigned long *bsp_loc;		/* previous bsp save location */
+	unsigned long *bspstore_loc;
+	unsigned long *pfs_loc;
+	unsigned long *rnat_loc;
+	unsigned long *rp_loc;
+	unsigned long *pri_unat_loc;
+	unsigned long *unat_loc;
+	unsigned long *pr_loc;
+	unsigned long *lc_loc;
+	unsigned long *fpsr_loc;
+	struct unw_ireg {
+		unsigned long *loc;
+		struct unw_ireg_nat {
+			unsigned long type : 3;		/* enum unw_nat_type */
+			signed long off : 61;		/* NaT word is at loc+nat.off */
+		} nat;
+	} r4, r5, r6, r7;
+	unsigned long *b1_loc, *b2_loc, *b3_loc, *b4_loc, *b5_loc;
+	struct ia64_fpreg *f2_loc, *f3_loc, *f4_loc, *f5_loc, *fr_loc[16];
+};
+
+/*
+ * The official API follows below:
+ */
+
+struct unw_table_entry {
+	u64 start_offset;
+	u64 end_offset;
+	u64 info_offset;
+};
+
+/*
+ * Initialize unwind support.
+ */
+extern void unw_init (void);
+
+extern void *unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned long gp,
+				   const void *table_start, const void *table_end);
+
+extern void unw_remove_unwind_table (void *handle);
+
+/*
+ * Prepare to unwind blocked task t.
+ */
+extern void unw_init_from_blocked_task (struct unw_frame_info *info, struct task_struct *t);
+
+extern void unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t,
+				 struct switch_stack *sw);
+
+/*
+ * Prepare to unwind the currently running thread.
+ */
+extern void unw_init_running (void (*callback)(struct unw_frame_info *info, void *arg), void *arg);
+
+/*
+ * Unwind to previous to frame.  Returns 0 if successful, negative
+ * number in case of an error.
+ */
+extern int unw_unwind (struct unw_frame_info *info);
+
+/*
+ * Unwind until the return pointer is in user-land (or until an error
+ * occurs).  Returns 0 if successful, negative number in case of
+ * error.
+ */
+extern int unw_unwind_to_user (struct unw_frame_info *info);
+
+#define unw_is_intr_frame(info)	(((info)->flags & UNW_FLAG_INTERRUPT_FRAME) != 0)
+
+static inline int
+unw_get_ip (struct unw_frame_info *info, unsigned long *valp)
+{
+	*valp = (info)->ip;
+	return 0;
+}
+
+static inline int
+unw_get_sp (struct unw_frame_info *info, unsigned long *valp)
+{
+	*valp = (info)->sp;
+	return 0;
+}
+
+static inline int
+unw_get_psp (struct unw_frame_info *info, unsigned long *valp)
+{
+	*valp = (info)->psp;
+	return 0;
+}
+
+static inline int
+unw_get_bsp (struct unw_frame_info *info, unsigned long *valp)
+{
+	*valp = (info)->bsp;
+	return 0;
+}
+
+static inline int
+unw_get_cfm (struct unw_frame_info *info, unsigned long *valp)
+{
+	*valp = *(info)->cfm_loc;
+	return 0;
+}
+
+static inline int
+unw_set_cfm (struct unw_frame_info *info, unsigned long val)
+{
+	*(info)->cfm_loc = val;
+	return 0;
+}
+
+static inline int
+unw_get_rp (struct unw_frame_info *info, unsigned long *val)
+{
+	if (!info->rp_loc)
+		return -1;
+	*val = *info->rp_loc;
+	return 0;
+}
+
+extern int unw_access_gr (struct unw_frame_info *, int, unsigned long *, char *, int);
+extern int unw_access_br (struct unw_frame_info *, int, unsigned long *, int);
+extern int unw_access_fr (struct unw_frame_info *, int, struct ia64_fpreg *, int);
+extern int unw_access_ar (struct unw_frame_info *, int, unsigned long *, int);
+extern int unw_access_pr (struct unw_frame_info *, unsigned long *, int);
+
+static inline int
+unw_set_gr (struct unw_frame_info *i, int n, unsigned long v, char nat)
+{
+	return unw_access_gr(i, n, &v, &nat, 1);
+}
+
+static inline int
+unw_set_br (struct unw_frame_info *i, int n, unsigned long v)
+{
+	return unw_access_br(i, n, &v, 1);
+}
+
+static inline int
+unw_set_fr (struct unw_frame_info *i, int n, struct ia64_fpreg v)
+{
+	return unw_access_fr(i, n, &v, 1);
+}
+
+static inline int
+unw_set_ar (struct unw_frame_info *i, int n, unsigned long v)
+{
+	return unw_access_ar(i, n, &v, 1);
+}
+
+static inline int
+unw_set_pr (struct unw_frame_info *i, unsigned long v)
+{
+	return unw_access_pr(i, &v, 1);
+}
+
+#define unw_get_gr(i,n,v,nat)	unw_access_gr(i,n,v,nat,0)
+#define unw_get_br(i,n,v)	unw_access_br(i,n,v,0)
+#define unw_get_fr(i,n,v)	unw_access_fr(i,n,v,0)
+#define unw_get_ar(i,n,v)	unw_access_ar(i,n,v,0)
+#define unw_get_pr(i,v)		unw_access_pr(i,v,0)
+
+#endif /* _ASM_UNWIND_H */
diff --git a/arch/ia64/include/asm/user.h b/arch/ia64/include/asm/user.h
new file mode 100644
index 00000000..8b982111
--- /dev/null
+++ b/arch/ia64/include/asm/user.h
@@ -0,0 +1,58 @@
+#ifndef _ASM_IA64_USER_H
+#define _ASM_IA64_USER_H
+
+/*
+ * Core file format: The core file is written in such a way that gdb
+ * can understand it and provide useful information to the user (under
+ * linux we use the `trad-core' bfd).  The file contents are as
+ * follows:
+ *
+ *  upage: 1 page consisting of a user struct that tells gdb
+ *	what is present in the file.  Directly after this is a
+ *	copy of the task_struct, which is currently not used by gdb,
+ *	but it may come in handy at some point.  All of the registers
+ *	are stored as part of the upage.  The upage should always be
+ *	only one page long.
+ *  data: The data segment follows next.  We use current->end_text to
+ *	current->brk to pick up all of the user variables, plus any memory
+ *	that may have been sbrk'ed.  No attempt is made to determine if a
+ *	page is demand-zero or if a page is totally unused, we just cover
+ *	the entire range.  All of the addresses are rounded in such a way
+ *	that an integral number of pages is written.
+ *  stack: We need the stack information in order to get a meaningful
+ *	backtrace.  We need to write the data from usp to
+ *	current->start_stack, so we round each of these in order to be able
+ *	to write an integer number of pages.
+ *
+ * Modified 1998, 1999, 2001
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+
+#include <linux/ptrace.h>
+#include <linux/types.h>
+
+#include <asm/page.h>
+
+#define EF_SIZE		3072	/* XXX fix me */
+
+struct user {
+	unsigned long	regs[EF_SIZE/8+32];	/* integer and fp regs */
+	size_t		u_tsize;		/* text size (pages) */
+	size_t		u_dsize;		/* data size (pages) */
+	size_t		u_ssize;		/* stack size (pages) */
+	unsigned long	start_code;		/* text starting address */
+	unsigned long	start_data;		/* data starting address */
+	unsigned long	start_stack;		/* stack starting address */
+	long int	signal;			/* signal causing core dump */
+	unsigned long	u_ar0;			/* help gdb find registers */
+	unsigned long	magic;			/* identifies a core file */
+	char		u_comm[32];		/* user command name */
+};
+
+#define NBPG			PAGE_SIZE
+#define UPAGES			1
+#define HOST_TEXT_START_ADDR	(u.start_code)
+#define HOST_DATA_START_ADDR	(u.start_data)
+#define HOST_STACK_END_ADDR	(u.start_stack + u.u_ssize * NBPG)
+
+#endif /* _ASM_IA64_USER_H */
diff --git a/arch/ia64/include/asm/ustack.h b/arch/ia64/include/asm/ustack.h
new file mode 100644
index 00000000..504167c3
--- /dev/null
+++ b/arch/ia64/include/asm/ustack.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_IA64_USTACK_H
+#define _ASM_IA64_USTACK_H
+
+/*
+ * Constants for the user stack size
+ */
+
+#ifdef __KERNEL__
+#include <asm/page.h>
+
+/* The absolute hard limit for stack size is 1/2 of the mappable space in the region */
+#define MAX_USER_STACK_SIZE	(RGN_MAP_LIMIT/2)
+#define STACK_TOP		(0x6000000000000000UL + RGN_MAP_LIMIT)
+#define STACK_TOP_MAX		STACK_TOP
+#endif
+
+/* Make a default stack size of 2GiB */
+#define DEFAULT_USER_STACK_SIZE	(1UL << 31)
+
+#endif /* _ASM_IA64_USTACK_H */
diff --git a/arch/ia64/include/asm/uv/uv.h b/arch/ia64/include/asm/uv/uv.h
new file mode 100644
index 00000000..61b5bdfd
--- /dev/null
+++ b/arch/ia64/include/asm/uv/uv.h
@@ -0,0 +1,13 @@
+#ifndef _ASM_IA64_UV_UV_H
+#define _ASM_IA64_UV_UV_H
+
+#include <asm/system.h>
+#include <asm/sn/simulator.h>
+
+static inline int is_uv_system(void)
+{
+	/* temporary support for running on hardware simulator */
+	return IS_MEDUSA() || ia64_platform_is("uv");
+}
+
+#endif	/* _ASM_IA64_UV_UV_H */
diff --git a/arch/ia64/include/asm/uv/uv_hub.h b/arch/ia64/include/asm/uv/uv_hub.h
new file mode 100644
index 00000000..53e9dfac
--- /dev/null
+++ b/arch/ia64/include/asm/uv/uv_hub.h
@@ -0,0 +1,315 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV architectural definitions
+ *
+ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef __ASM_IA64_UV_HUB_H__
+#define __ASM_IA64_UV_HUB_H__
+
+#include <linux/numa.h>
+#include <linux/percpu.h>
+#include <asm/types.h>
+#include <asm/percpu.h>
+
+
+/*
+ * Addressing Terminology
+ *
+ *	M       - The low M bits of a physical address represent the offset
+ *		  into the blade local memory. RAM memory on a blade is physically
+ *		  contiguous (although various IO spaces may punch holes in
+ *		  it)..
+ *
+ * 	N	- Number of bits in the node portion of a socket physical
+ * 		  address.
+ *
+ * 	NASID   - network ID of a router, Mbrick or Cbrick. Nasid values of
+ * 	 	  routers always have low bit of 1, C/MBricks have low bit
+ * 		  equal to 0. Most addressing macros that target UV hub chips
+ * 		  right shift the NASID by 1 to exclude the always-zero bit.
+ * 		  NASIDs contain up to 15 bits.
+ *
+ *	GNODE   - NASID right shifted by 1 bit. Most mmrs contain gnodes instead
+ *		  of nasids.
+ *
+ * 	PNODE   - the low N bits of the GNODE. The PNODE is the most useful variant
+ * 		  of the nasid for socket usage.
+ *
+ *
+ *  NumaLink Global Physical Address Format:
+ *  +--------------------------------+---------------------+
+ *  |00..000|      GNODE             |      NodeOffset     |
+ *  +--------------------------------+---------------------+
+ *          |<-------53 - M bits --->|<--------M bits ----->
+ *
+ *	M - number of node offset bits (35 .. 40)
+ *
+ *
+ *  Memory/UV-HUB Processor Socket Address Format:
+ *  +----------------+---------------+---------------------+
+ *  |00..000000000000|   PNODE       |      NodeOffset     |
+ *  +----------------+---------------+---------------------+
+ *                   <--- N bits --->|<--------M bits ----->
+ *
+ *	M - number of node offset bits (35 .. 40)
+ *	N - number of PNODE bits (0 .. 10)
+ *
+ *		Note: M + N cannot currently exceed 44 (x86_64) or 46 (IA64).
+ *		The actual values are configuration dependent and are set at
+ *		boot time. M & N values are set by the hardware/BIOS at boot.
+ */
+
+
+/*
+ * Maximum number of bricks in all partitions and in all coherency domains.
+ * This is the total number of bricks accessible in the numalink fabric. It
+ * includes all C & M bricks. Routers are NOT included.
+ *
+ * This value is also the value of the maximum number of non-router NASIDs
+ * in the numalink fabric.
+ *
+ * NOTE: a brick may contain 1 or 2 OS nodes. Don't get these confused.
+ */
+#define UV_MAX_NUMALINK_BLADES	16384
+
+/*
+ * Maximum number of C/Mbricks within a software SSI (hardware may support
+ * more).
+ */
+#define UV_MAX_SSI_BLADES	1
+
+/*
+ * The largest possible NASID of a C or M brick (+ 2)
+ */
+#define UV_MAX_NASID_VALUE	(UV_MAX_NUMALINK_NODES * 2)
+
+/*
+ * The following defines attributes of the HUB chip. These attributes are
+ * frequently referenced and are kept in the per-cpu data areas of each cpu.
+ * They are kept together in a struct to minimize cache misses.
+ */
+struct uv_hub_info_s {
+	unsigned long	global_mmr_base;
+	unsigned long	gpa_mask;
+	unsigned long	gnode_upper;
+	unsigned long	lowmem_remap_top;
+	unsigned long	lowmem_remap_base;
+	unsigned short	pnode;
+	unsigned short	pnode_mask;
+	unsigned short	coherency_domain_number;
+	unsigned short	numa_blade_id;
+	unsigned char	blade_processor_id;
+	unsigned char	m_val;
+	unsigned char	n_val;
+};
+DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
+#define uv_hub_info 		(&__get_cpu_var(__uv_hub_info))
+#define uv_cpu_hub_info(cpu)	(&per_cpu(__uv_hub_info, cpu))
+
+/*
+ * Local & Global MMR space macros.
+ * 	Note: macros are intended to be used ONLY by inline functions
+ * 	in this file - not by other kernel code.
+ * 		n -  NASID (full 15-bit global nasid)
+ * 		g -  GNODE (full 15-bit global nasid, right shifted 1)
+ * 		p -  PNODE (local part of nsids, right shifted 1)
+ */
+#define UV_NASID_TO_PNODE(n)		(((n) >> 1) & uv_hub_info->pnode_mask)
+#define UV_PNODE_TO_NASID(p)		(((p) << 1) | uv_hub_info->gnode_upper)
+
+#define UV_LOCAL_MMR_BASE		0xf4000000UL
+#define UV_GLOBAL_MMR32_BASE		0xf8000000UL
+#define UV_GLOBAL_MMR64_BASE		(uv_hub_info->global_mmr_base)
+
+#define UV_GLOBAL_MMR32_PNODE_SHIFT	15
+#define UV_GLOBAL_MMR64_PNODE_SHIFT	26
+
+#define UV_GLOBAL_MMR32_PNODE_BITS(p)	((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT))
+
+#define UV_GLOBAL_MMR64_PNODE_BITS(p)					\
+	((unsigned long)(p) << UV_GLOBAL_MMR64_PNODE_SHIFT)
+
+/*
+ * Macros for converting between kernel virtual addresses, socket local physical
+ * addresses, and UV global physical addresses.
+ * 	Note: use the standard __pa() & __va() macros for converting
+ * 	      between socket virtual and socket physical addresses.
+ */
+
+/* socket phys RAM --> UV global physical address */
+static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr)
+{
+	if (paddr < uv_hub_info->lowmem_remap_top)
+		paddr += uv_hub_info->lowmem_remap_base;
+	return paddr | uv_hub_info->gnode_upper;
+}
+
+
+/* socket virtual --> UV global physical address */
+static inline unsigned long uv_gpa(void *v)
+{
+	return __pa(v) | uv_hub_info->gnode_upper;
+}
+
+/* socket virtual --> UV global physical address */
+static inline void *uv_vgpa(void *v)
+{
+	return (void *)uv_gpa(v);
+}
+
+/* UV global physical address --> socket virtual */
+static inline void *uv_va(unsigned long gpa)
+{
+	return __va(gpa & uv_hub_info->gpa_mask);
+}
+
+/* pnode, offset --> socket virtual */
+static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset)
+{
+	return __va(((unsigned long)pnode << uv_hub_info->m_val) | offset);
+}
+
+
+/*
+ * Access global MMRs using the low memory MMR32 space. This region supports
+ * faster MMR access but not all MMRs are accessible in this space.
+ */
+static inline unsigned long *uv_global_mmr32_address(int pnode,
+				unsigned long offset)
+{
+	return __va(UV_GLOBAL_MMR32_BASE |
+		       UV_GLOBAL_MMR32_PNODE_BITS(pnode) | offset);
+}
+
+static inline void uv_write_global_mmr32(int pnode, unsigned long offset,
+				 unsigned long val)
+{
+	*uv_global_mmr32_address(pnode, offset) = val;
+}
+
+static inline unsigned long uv_read_global_mmr32(int pnode,
+						 unsigned long offset)
+{
+	return *uv_global_mmr32_address(pnode, offset);
+}
+
+/*
+ * Access Global MMR space using the MMR space located at the top of physical
+ * memory.
+ */
+static inline unsigned long *uv_global_mmr64_address(int pnode,
+				unsigned long offset)
+{
+	return __va(UV_GLOBAL_MMR64_BASE |
+		    UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset);
+}
+
+static inline void uv_write_global_mmr64(int pnode, unsigned long offset,
+				unsigned long val)
+{
+	*uv_global_mmr64_address(pnode, offset) = val;
+}
+
+static inline unsigned long uv_read_global_mmr64(int pnode,
+						 unsigned long offset)
+{
+	return *uv_global_mmr64_address(pnode, offset);
+}
+
+/*
+ * Access hub local MMRs. Faster than using global space but only local MMRs
+ * are accessible.
+ */
+static inline unsigned long *uv_local_mmr_address(unsigned long offset)
+{
+	return __va(UV_LOCAL_MMR_BASE | offset);
+}
+
+static inline unsigned long uv_read_local_mmr(unsigned long offset)
+{
+	return *uv_local_mmr_address(offset);
+}
+
+static inline void uv_write_local_mmr(unsigned long offset, unsigned long val)
+{
+	*uv_local_mmr_address(offset) = val;
+}
+
+/*
+ * Structures and definitions for converting between cpu, node, pnode, and blade
+ * numbers.
+ */
+
+/* Blade-local cpu number of current cpu. Numbered 0 .. <# cpus on the blade> */
+static inline int uv_blade_processor_id(void)
+{
+	return smp_processor_id();
+}
+
+/* Blade number of current cpu. Numnbered 0 .. <#blades -1> */
+static inline int uv_numa_blade_id(void)
+{
+	return 0;
+}
+
+/* Convert a cpu number to the the UV blade number */
+static inline int uv_cpu_to_blade_id(int cpu)
+{
+	return 0;
+}
+
+/* Convert linux node number to the UV blade number */
+static inline int uv_node_to_blade_id(int nid)
+{
+	return 0;
+}
+
+/* Convert a blade id to the PNODE of the blade */
+static inline int uv_blade_to_pnode(int bid)
+{
+	return 0;
+}
+
+/* Determine the number of possible cpus on a blade */
+static inline int uv_blade_nr_possible_cpus(int bid)
+{
+	return num_possible_cpus();
+}
+
+/* Determine the number of online cpus on a blade */
+static inline int uv_blade_nr_online_cpus(int bid)
+{
+	return num_online_cpus();
+}
+
+/* Convert a cpu id to the PNODE of the blade containing the cpu */
+static inline int uv_cpu_to_pnode(int cpu)
+{
+	return 0;
+}
+
+/* Convert a linux node number to the PNODE of the blade */
+static inline int uv_node_to_pnode(int nid)
+{
+	return 0;
+}
+
+/* Maximum possible number of blades */
+static inline int uv_num_possible_blades(void)
+{
+	return 1;
+}
+
+static inline void uv_hub_send_ipi(int pnode, int apicid, int vector)
+{
+	/* not currently needed on ia64 */
+}
+
+
+#endif /* __ASM_IA64_UV_HUB__ */
+
diff --git a/arch/ia64/include/asm/uv/uv_mmrs.h b/arch/ia64/include/asm/uv/uv_mmrs.h
new file mode 100644
index 00000000..fe0b8f05
--- /dev/null
+++ b/arch/ia64/include/asm/uv/uv_mmrs.h
@@ -0,0 +1,825 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV MMR definitions
+ *
+ * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_UV_UV_MMRS_H
+#define _ASM_IA64_UV_UV_MMRS_H
+
+#define UV_MMR_ENABLE		(1UL << 63)
+
+/* ========================================================================= */
+/*                           UVH_BAU_DATA_CONFIG                             */
+/* ========================================================================= */
+#define UVH_BAU_DATA_CONFIG 0x61680UL
+#define UVH_BAU_DATA_CONFIG_32 0x0438
+
+#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0
+#define UVH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_BAU_DATA_CONFIG_DM_SHFT 8
+#define UVH_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_BAU_DATA_CONFIG_DESTMODE_SHFT 11
+#define UVH_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_BAU_DATA_CONFIG_STATUS_SHFT 12
+#define UVH_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_BAU_DATA_CONFIG_P_SHFT 13
+#define UVH_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_BAU_DATA_CONFIG_T_SHFT 15
+#define UVH_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_BAU_DATA_CONFIG_M_SHFT 16
+#define UVH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_BAU_DATA_CONFIG_APIC_ID_SHFT 32
+#define UVH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_bau_data_config_u {
+    unsigned long	v;
+    struct uvh_bau_data_config_s {
+	unsigned long	vector_  :  8;  /* RW */
+	unsigned long	dm       :  3;  /* RW */
+	unsigned long	destmode :  1;  /* RW */
+	unsigned long	status   :  1;  /* RO */
+	unsigned long	p        :  1;  /* RO */
+	unsigned long	rsvd_14  :  1;  /*    */
+	unsigned long	t        :  1;  /* RO */
+	unsigned long	m        :  1;  /* RW */
+	unsigned long	rsvd_17_31: 15;  /*    */
+	unsigned long	apic_id  : 32;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                           UVH_EVENT_OCCURRED0                             */
+/* ========================================================================= */
+#define UVH_EVENT_OCCURRED0 0x70000UL
+#define UVH_EVENT_OCCURRED0_32 0x005e8
+
+#define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT 0
+#define UVH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL
+#define UVH_EVENT_OCCURRED0_GR0_HCERR_SHFT 1
+#define UVH_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL
+#define UVH_EVENT_OCCURRED0_GR1_HCERR_SHFT 2
+#define UVH_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL
+#define UVH_EVENT_OCCURRED0_LH_HCERR_SHFT 3
+#define UVH_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL
+#define UVH_EVENT_OCCURRED0_RH_HCERR_SHFT 4
+#define UVH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000010UL
+#define UVH_EVENT_OCCURRED0_XN_HCERR_SHFT 5
+#define UVH_EVENT_OCCURRED0_XN_HCERR_MASK 0x0000000000000020UL
+#define UVH_EVENT_OCCURRED0_SI_HCERR_SHFT 6
+#define UVH_EVENT_OCCURRED0_SI_HCERR_MASK 0x0000000000000040UL
+#define UVH_EVENT_OCCURRED0_LB_AOERR0_SHFT 7
+#define UVH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000080UL
+#define UVH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8
+#define UVH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL
+#define UVH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9
+#define UVH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL
+#define UVH_EVENT_OCCURRED0_LH_AOERR0_SHFT 10
+#define UVH_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL
+#define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11
+#define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL
+#define UVH_EVENT_OCCURRED0_XN_AOERR0_SHFT 12
+#define UVH_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL
+#define UVH_EVENT_OCCURRED0_SI_AOERR0_SHFT 13
+#define UVH_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL
+#define UVH_EVENT_OCCURRED0_LB_AOERR1_SHFT 14
+#define UVH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL
+#define UVH_EVENT_OCCURRED0_GR0_AOERR1_SHFT 15
+#define UVH_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000000008000UL
+#define UVH_EVENT_OCCURRED0_GR1_AOERR1_SHFT 16
+#define UVH_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000000010000UL
+#define UVH_EVENT_OCCURRED0_LH_AOERR1_SHFT 17
+#define UVH_EVENT_OCCURRED0_LH_AOERR1_MASK 0x0000000000020000UL
+#define UVH_EVENT_OCCURRED0_RH_AOERR1_SHFT 18
+#define UVH_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000040000UL
+#define UVH_EVENT_OCCURRED0_XN_AOERR1_SHFT 19
+#define UVH_EVENT_OCCURRED0_XN_AOERR1_MASK 0x0000000000080000UL
+#define UVH_EVENT_OCCURRED0_SI_AOERR1_SHFT 20
+#define UVH_EVENT_OCCURRED0_SI_AOERR1_MASK 0x0000000000100000UL
+#define UVH_EVENT_OCCURRED0_RH_VPI_INT_SHFT 21
+#define UVH_EVENT_OCCURRED0_RH_VPI_INT_MASK 0x0000000000200000UL
+#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 22
+#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 23
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000000800000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 24
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000001000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 25
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000002000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 26
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000004000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 27
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000000008000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 28
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000000010000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 29
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000000020000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 30
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000000040000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 31
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000000080000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 32
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000000100000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 33
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000000200000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 34
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000000400000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 35
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000000800000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 36
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000001000000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 37
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000002000000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 38
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000004000000000UL
+#define UVH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 39
+#define UVH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0000008000000000UL
+#define UVH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 40
+#define UVH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0000010000000000UL
+#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 41
+#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0000020000000000UL
+#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 42
+#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0000040000000000UL
+#define UVH_EVENT_OCCURRED0_LTC_INT_SHFT 43
+#define UVH_EVENT_OCCURRED0_LTC_INT_MASK 0x0000080000000000UL
+#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 44
+#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL
+#define UVH_EVENT_OCCURRED0_IPI_INT_SHFT 45
+#define UVH_EVENT_OCCURRED0_IPI_INT_MASK 0x0000200000000000UL
+#define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT 46
+#define UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0000400000000000UL
+#define UVH_EVENT_OCCURRED0_EXTIO_INT1_SHFT 47
+#define UVH_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0000800000000000UL
+#define UVH_EVENT_OCCURRED0_EXTIO_INT2_SHFT 48
+#define UVH_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0001000000000000UL
+#define UVH_EVENT_OCCURRED0_EXTIO_INT3_SHFT 49
+#define UVH_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0002000000000000UL
+#define UVH_EVENT_OCCURRED0_PROFILE_INT_SHFT 50
+#define UVH_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0004000000000000UL
+#define UVH_EVENT_OCCURRED0_RTC0_SHFT 51
+#define UVH_EVENT_OCCURRED0_RTC0_MASK 0x0008000000000000UL
+#define UVH_EVENT_OCCURRED0_RTC1_SHFT 52
+#define UVH_EVENT_OCCURRED0_RTC1_MASK 0x0010000000000000UL
+#define UVH_EVENT_OCCURRED0_RTC2_SHFT 53
+#define UVH_EVENT_OCCURRED0_RTC2_MASK 0x0020000000000000UL
+#define UVH_EVENT_OCCURRED0_RTC3_SHFT 54
+#define UVH_EVENT_OCCURRED0_RTC3_MASK 0x0040000000000000UL
+#define UVH_EVENT_OCCURRED0_BAU_DATA_SHFT 55
+#define UVH_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL
+#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56
+#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL
+union uvh_event_occurred0_u {
+    unsigned long	v;
+    struct uvh_event_occurred0_s {
+	unsigned long	lb_hcerr             :  1;  /* RW, W1C */
+	unsigned long	gr0_hcerr            :  1;  /* RW, W1C */
+	unsigned long	gr1_hcerr            :  1;  /* RW, W1C */
+	unsigned long	lh_hcerr             :  1;  /* RW, W1C */
+	unsigned long	rh_hcerr             :  1;  /* RW, W1C */
+	unsigned long	xn_hcerr             :  1;  /* RW, W1C */
+	unsigned long	si_hcerr             :  1;  /* RW, W1C */
+	unsigned long	lb_aoerr0            :  1;  /* RW, W1C */
+	unsigned long	gr0_aoerr0           :  1;  /* RW, W1C */
+	unsigned long	gr1_aoerr0           :  1;  /* RW, W1C */
+	unsigned long	lh_aoerr0            :  1;  /* RW, W1C */
+	unsigned long	rh_aoerr0            :  1;  /* RW, W1C */
+	unsigned long	xn_aoerr0            :  1;  /* RW, W1C */
+	unsigned long	si_aoerr0            :  1;  /* RW, W1C */
+	unsigned long	lb_aoerr1            :  1;  /* RW, W1C */
+	unsigned long	gr0_aoerr1           :  1;  /* RW, W1C */
+	unsigned long	gr1_aoerr1           :  1;  /* RW, W1C */
+	unsigned long	lh_aoerr1            :  1;  /* RW, W1C */
+	unsigned long	rh_aoerr1            :  1;  /* RW, W1C */
+	unsigned long	xn_aoerr1            :  1;  /* RW, W1C */
+	unsigned long	si_aoerr1            :  1;  /* RW, W1C */
+	unsigned long	rh_vpi_int           :  1;  /* RW, W1C */
+	unsigned long	system_shutdown_int  :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_0         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_1         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_2         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_3         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_4         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_5         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_6         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_7         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_8         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_9         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_10        :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_11        :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_12        :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_13        :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_14        :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_15        :  1;  /* RW, W1C */
+	unsigned long	l1_nmi_int           :  1;  /* RW, W1C */
+	unsigned long	stop_clock           :  1;  /* RW, W1C */
+	unsigned long	asic_to_l1           :  1;  /* RW, W1C */
+	unsigned long	l1_to_asic           :  1;  /* RW, W1C */
+	unsigned long	ltc_int              :  1;  /* RW, W1C */
+	unsigned long	la_seq_trigger       :  1;  /* RW, W1C */
+	unsigned long	ipi_int              :  1;  /* RW, W1C */
+	unsigned long	extio_int0           :  1;  /* RW, W1C */
+	unsigned long	extio_int1           :  1;  /* RW, W1C */
+	unsigned long	extio_int2           :  1;  /* RW, W1C */
+	unsigned long	extio_int3           :  1;  /* RW, W1C */
+	unsigned long	profile_int          :  1;  /* RW, W1C */
+	unsigned long	rtc0                 :  1;  /* RW, W1C */
+	unsigned long	rtc1                 :  1;  /* RW, W1C */
+	unsigned long	rtc2                 :  1;  /* RW, W1C */
+	unsigned long	rtc3                 :  1;  /* RW, W1C */
+	unsigned long	bau_data             :  1;  /* RW, W1C */
+	unsigned long	power_management_req :  1;  /* RW, W1C */
+	unsigned long	rsvd_57_63           :  7;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                        UVH_EVENT_OCCURRED0_ALIAS                          */
+/* ========================================================================= */
+#define UVH_EVENT_OCCURRED0_ALIAS 0x0000000000070008UL
+#define UVH_EVENT_OCCURRED0_ALIAS_32 0x005f0
+
+/* ========================================================================= */
+/*                         UVH_GR0_TLB_INT0_CONFIG                           */
+/* ========================================================================= */
+#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL
+
+#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0
+#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_GR0_TLB_INT0_CONFIG_DM_SHFT 8
+#define UVH_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11
+#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12
+#define UVH_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_GR0_TLB_INT0_CONFIG_P_SHFT 13
+#define UVH_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_GR0_TLB_INT0_CONFIG_T_SHFT 15
+#define UVH_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_GR0_TLB_INT0_CONFIG_M_SHFT 16
+#define UVH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32
+#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_gr0_tlb_int0_config_u {
+    unsigned long	v;
+    struct uvh_gr0_tlb_int0_config_s {
+	unsigned long	vector_  :  8;  /* RW */
+	unsigned long	dm       :  3;  /* RW */
+	unsigned long	destmode :  1;  /* RW */
+	unsigned long	status   :  1;  /* RO */
+	unsigned long	p        :  1;  /* RO */
+	unsigned long	rsvd_14  :  1;  /*    */
+	unsigned long	t        :  1;  /* RO */
+	unsigned long	m        :  1;  /* RW */
+	unsigned long	rsvd_17_31: 15;  /*    */
+	unsigned long	apic_id  : 32;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                         UVH_GR0_TLB_INT1_CONFIG                           */
+/* ========================================================================= */
+#define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL
+
+#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0
+#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_GR0_TLB_INT1_CONFIG_DM_SHFT 8
+#define UVH_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11
+#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12
+#define UVH_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_GR0_TLB_INT1_CONFIG_P_SHFT 13
+#define UVH_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_GR0_TLB_INT1_CONFIG_T_SHFT 15
+#define UVH_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_GR0_TLB_INT1_CONFIG_M_SHFT 16
+#define UVH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32
+#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_gr0_tlb_int1_config_u {
+    unsigned long	v;
+    struct uvh_gr0_tlb_int1_config_s {
+	unsigned long	vector_  :  8;  /* RW */
+	unsigned long	dm       :  3;  /* RW */
+	unsigned long	destmode :  1;  /* RW */
+	unsigned long	status   :  1;  /* RO */
+	unsigned long	p        :  1;  /* RO */
+	unsigned long	rsvd_14  :  1;  /*    */
+	unsigned long	t        :  1;  /* RO */
+	unsigned long	m        :  1;  /* RW */
+	unsigned long	rsvd_17_31: 15;  /*    */
+	unsigned long	apic_id  : 32;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                         UVH_GR1_TLB_INT0_CONFIG                           */
+/* ========================================================================= */
+#define UVH_GR1_TLB_INT0_CONFIG 0x61f00UL
+
+#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0
+#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT 8
+#define UVH_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11
+#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12
+#define UVH_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_GR1_TLB_INT0_CONFIG_P_SHFT 13
+#define UVH_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_GR1_TLB_INT0_CONFIG_T_SHFT 15
+#define UVH_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_GR1_TLB_INT0_CONFIG_M_SHFT 16
+#define UVH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32
+#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_gr1_tlb_int0_config_u {
+    unsigned long	v;
+    struct uvh_gr1_tlb_int0_config_s {
+	unsigned long	vector_  :  8;  /* RW */
+	unsigned long	dm       :  3;  /* RW */
+	unsigned long	destmode :  1;  /* RW */
+	unsigned long	status   :  1;  /* RO */
+	unsigned long	p        :  1;  /* RO */
+	unsigned long	rsvd_14  :  1;  /*    */
+	unsigned long	t        :  1;  /* RO */
+	unsigned long	m        :  1;  /* RW */
+	unsigned long	rsvd_17_31: 15;  /*    */
+	unsigned long	apic_id  : 32;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                         UVH_GR1_TLB_INT1_CONFIG                           */
+/* ========================================================================= */
+#define UVH_GR1_TLB_INT1_CONFIG 0x61f40UL
+
+#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0
+#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT 8
+#define UVH_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11
+#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12
+#define UVH_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_GR1_TLB_INT1_CONFIG_P_SHFT 13
+#define UVH_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_GR1_TLB_INT1_CONFIG_T_SHFT 15
+#define UVH_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_GR1_TLB_INT1_CONFIG_M_SHFT 16
+#define UVH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32
+#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_gr1_tlb_int1_config_u {
+    unsigned long	v;
+    struct uvh_gr1_tlb_int1_config_s {
+	unsigned long	vector_  :  8;  /* RW */
+	unsigned long	dm       :  3;  /* RW */
+	unsigned long	destmode :  1;  /* RW */
+	unsigned long	status   :  1;  /* RO */
+	unsigned long	p        :  1;  /* RO */
+	unsigned long	rsvd_14  :  1;  /*    */
+	unsigned long	t        :  1;  /* RO */
+	unsigned long	m        :  1;  /* RW */
+	unsigned long	rsvd_17_31: 15;  /*    */
+	unsigned long	apic_id  : 32;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                               UVH_INT_CMPB                                */
+/* ========================================================================= */
+#define UVH_INT_CMPB 0x22080UL
+
+#define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0
+#define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL
+
+union uvh_int_cmpb_u {
+    unsigned long	v;
+    struct uvh_int_cmpb_s {
+	unsigned long	real_time_cmpb : 56;  /* RW */
+	unsigned long	rsvd_56_63     :  8;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                               UVH_INT_CMPC                                */
+/* ========================================================================= */
+#define UVH_INT_CMPC 0x22100UL
+
+#define UVH_INT_CMPC_REAL_TIME_CMPC_SHFT 0
+#define UVH_INT_CMPC_REAL_TIME_CMPC_MASK 0x00ffffffffffffffUL
+
+union uvh_int_cmpc_u {
+    unsigned long	v;
+    struct uvh_int_cmpc_s {
+	unsigned long	real_time_cmpc : 56;  /* RW */
+	unsigned long	rsvd_56_63     :  8;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                               UVH_INT_CMPD                                */
+/* ========================================================================= */
+#define UVH_INT_CMPD 0x22180UL
+
+#define UVH_INT_CMPD_REAL_TIME_CMPD_SHFT 0
+#define UVH_INT_CMPD_REAL_TIME_CMPD_MASK 0x00ffffffffffffffUL
+
+union uvh_int_cmpd_u {
+    unsigned long	v;
+    struct uvh_int_cmpd_s {
+	unsigned long	real_time_cmpd : 56;  /* RW */
+	unsigned long	rsvd_56_63     :  8;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                               UVH_NODE_ID                                 */
+/* ========================================================================= */
+#define UVH_NODE_ID 0x0UL
+
+#define UVH_NODE_ID_FORCE1_SHFT 0
+#define UVH_NODE_ID_FORCE1_MASK 0x0000000000000001UL
+#define UVH_NODE_ID_MANUFACTURER_SHFT 1
+#define UVH_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
+#define UVH_NODE_ID_PART_NUMBER_SHFT 12
+#define UVH_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
+#define UVH_NODE_ID_REVISION_SHFT 28
+#define UVH_NODE_ID_REVISION_MASK 0x00000000f0000000UL
+#define UVH_NODE_ID_NODE_ID_SHFT 32
+#define UVH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
+#define UVH_NODE_ID_NODES_PER_BIT_SHFT 48
+#define UVH_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL
+#define UVH_NODE_ID_NI_PORT_SHFT 56
+#define UVH_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL
+
+union uvh_node_id_u {
+    unsigned long	v;
+    struct uvh_node_id_s {
+	unsigned long	force1        :  1;  /* RO */
+	unsigned long	manufacturer  : 11;  /* RO */
+	unsigned long	part_number   : 16;  /* RO */
+	unsigned long	revision      :  4;  /* RO */
+	unsigned long	node_id       : 15;  /* RW */
+	unsigned long	rsvd_47       :  1;  /*    */
+	unsigned long	nodes_per_bit :  7;  /* RW */
+	unsigned long	rsvd_55       :  1;  /*    */
+	unsigned long	ni_port       :  4;  /* RO */
+	unsigned long	rsvd_60_63    :  4;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR                  */
+/* ========================================================================= */
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
+
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
+union uvh_rh_gam_alias210_redirect_config_0_mmr_u {
+    unsigned long	v;
+    struct uvh_rh_gam_alias210_redirect_config_0_mmr_s {
+	unsigned long	rsvd_0_23 : 24;  /*    */
+	unsigned long	dest_base : 22;  /* RW */
+	unsigned long	rsvd_46_63: 18;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR                  */
+/* ========================================================================= */
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
+
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
+union uvh_rh_gam_alias210_redirect_config_1_mmr_u {
+    unsigned long	v;
+    struct uvh_rh_gam_alias210_redirect_config_1_mmr_s {
+	unsigned long	rsvd_0_23 : 24;  /*    */
+	unsigned long	dest_base : 22;  /* RW */
+	unsigned long	rsvd_46_63: 18;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR                  */
+/* ========================================================================= */
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
+
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
+union uvh_rh_gam_alias210_redirect_config_2_mmr_u {
+    unsigned long	v;
+    struct uvh_rh_gam_alias210_redirect_config_2_mmr_s {
+	unsigned long	rsvd_0_23 : 24;  /*    */
+	unsigned long	dest_base : 22;  /* RW */
+	unsigned long	rsvd_46_63: 18;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                    UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR                      */
+/* ========================================================================= */
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
+
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0001000000000000UL
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+
+union uvh_rh_gam_gru_overlay_config_mmr_u {
+    unsigned long	v;
+    struct uvh_rh_gam_gru_overlay_config_mmr_s {
+	unsigned long	rsvd_0_27: 28;  /*    */
+	unsigned long	base   : 18;  /* RW */
+	unsigned long	rsvd_46_47:  2;  /*    */
+	unsigned long	gr4    :  1;  /* RW */
+	unsigned long	rsvd_49_51:  3;  /*    */
+	unsigned long	n_gru  :  4;  /* RW */
+	unsigned long	rsvd_56_62:  7;  /*    */
+	unsigned long	enable :  1;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                    UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR                      */
+/* ========================================================================= */
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
+
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+
+union uvh_rh_gam_mmr_overlay_config_mmr_u {
+    unsigned long	v;
+    struct uvh_rh_gam_mmr_overlay_config_mmr_s {
+	unsigned long	rsvd_0_25: 26;  /*    */
+	unsigned long	base     : 20;  /* RW */
+	unsigned long	dual_hub :  1;  /* RW */
+	unsigned long	rsvd_47_62: 16;  /*    */
+	unsigned long	enable   :  1;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                                 UVH_RTC                                   */
+/* ========================================================================= */
+#define UVH_RTC 0x340000UL
+
+#define UVH_RTC_REAL_TIME_CLOCK_SHFT 0
+#define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL
+
+union uvh_rtc_u {
+    unsigned long	v;
+    struct uvh_rtc_s {
+	unsigned long	real_time_clock : 56;  /* RW */
+	unsigned long	rsvd_56_63      :  8;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                           UVH_RTC1_INT_CONFIG                             */
+/* ========================================================================= */
+#define UVH_RTC1_INT_CONFIG 0x615c0UL
+
+#define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0
+#define UVH_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_RTC1_INT_CONFIG_DM_SHFT 8
+#define UVH_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT 11
+#define UVH_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_RTC1_INT_CONFIG_STATUS_SHFT 12
+#define UVH_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_RTC1_INT_CONFIG_P_SHFT 13
+#define UVH_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_RTC1_INT_CONFIG_T_SHFT 15
+#define UVH_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_RTC1_INT_CONFIG_M_SHFT 16
+#define UVH_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT 32
+#define UVH_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_rtc1_int_config_u {
+    unsigned long	v;
+    struct uvh_rtc1_int_config_s {
+	unsigned long	vector_  :  8;  /* RW */
+	unsigned long	dm       :  3;  /* RW */
+	unsigned long	destmode :  1;  /* RW */
+	unsigned long	status   :  1;  /* RO */
+	unsigned long	p        :  1;  /* RO */
+	unsigned long	rsvd_14  :  1;  /*    */
+	unsigned long	t        :  1;  /* RO */
+	unsigned long	m        :  1;  /* RW */
+	unsigned long	rsvd_17_31: 15;  /*    */
+	unsigned long	apic_id  : 32;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                           UVH_RTC2_INT_CONFIG                             */
+/* ========================================================================= */
+#define UVH_RTC2_INT_CONFIG 0x61600UL
+
+#define UVH_RTC2_INT_CONFIG_VECTOR_SHFT 0
+#define UVH_RTC2_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_RTC2_INT_CONFIG_DM_SHFT 8
+#define UVH_RTC2_INT_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_RTC2_INT_CONFIG_DESTMODE_SHFT 11
+#define UVH_RTC2_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_RTC2_INT_CONFIG_STATUS_SHFT 12
+#define UVH_RTC2_INT_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_RTC2_INT_CONFIG_P_SHFT 13
+#define UVH_RTC2_INT_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_RTC2_INT_CONFIG_T_SHFT 15
+#define UVH_RTC2_INT_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_RTC2_INT_CONFIG_M_SHFT 16
+#define UVH_RTC2_INT_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_RTC2_INT_CONFIG_APIC_ID_SHFT 32
+#define UVH_RTC2_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_rtc2_int_config_u {
+    unsigned long	v;
+    struct uvh_rtc2_int_config_s {
+	unsigned long	vector_  :  8;  /* RW */
+	unsigned long	dm       :  3;  /* RW */
+	unsigned long	destmode :  1;  /* RW */
+	unsigned long	status   :  1;  /* RO */
+	unsigned long	p        :  1;  /* RO */
+	unsigned long	rsvd_14  :  1;  /*    */
+	unsigned long	t        :  1;  /* RO */
+	unsigned long	m        :  1;  /* RW */
+	unsigned long	rsvd_17_31: 15;  /*    */
+	unsigned long	apic_id  : 32;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                           UVH_RTC3_INT_CONFIG                             */
+/* ========================================================================= */
+#define UVH_RTC3_INT_CONFIG 0x61640UL
+
+#define UVH_RTC3_INT_CONFIG_VECTOR_SHFT 0
+#define UVH_RTC3_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_RTC3_INT_CONFIG_DM_SHFT 8
+#define UVH_RTC3_INT_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_RTC3_INT_CONFIG_DESTMODE_SHFT 11
+#define UVH_RTC3_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_RTC3_INT_CONFIG_STATUS_SHFT 12
+#define UVH_RTC3_INT_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_RTC3_INT_CONFIG_P_SHFT 13
+#define UVH_RTC3_INT_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_RTC3_INT_CONFIG_T_SHFT 15
+#define UVH_RTC3_INT_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_RTC3_INT_CONFIG_M_SHFT 16
+#define UVH_RTC3_INT_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_RTC3_INT_CONFIG_APIC_ID_SHFT 32
+#define UVH_RTC3_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_rtc3_int_config_u {
+    unsigned long	v;
+    struct uvh_rtc3_int_config_s {
+	unsigned long	vector_  :  8;  /* RW */
+	unsigned long	dm       :  3;  /* RW */
+	unsigned long	destmode :  1;  /* RW */
+	unsigned long	status   :  1;  /* RO */
+	unsigned long	p        :  1;  /* RO */
+	unsigned long	rsvd_14  :  1;  /*    */
+	unsigned long	t        :  1;  /* RO */
+	unsigned long	m        :  1;  /* RW */
+	unsigned long	rsvd_17_31: 15;  /*    */
+	unsigned long	apic_id  : 32;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                            UVH_RTC_INC_RATIO                              */
+/* ========================================================================= */
+#define UVH_RTC_INC_RATIO 0x350000UL
+
+#define UVH_RTC_INC_RATIO_FRACTION_SHFT 0
+#define UVH_RTC_INC_RATIO_FRACTION_MASK 0x00000000000fffffUL
+#define UVH_RTC_INC_RATIO_RATIO_SHFT 20
+#define UVH_RTC_INC_RATIO_RATIO_MASK 0x0000000000700000UL
+
+union uvh_rtc_inc_ratio_u {
+    unsigned long	v;
+    struct uvh_rtc_inc_ratio_s {
+	unsigned long	fraction : 20;  /* RW */
+	unsigned long	ratio    :  3;  /* RW */
+	unsigned long	rsvd_23_63: 41;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                          UVH_SI_ADDR_MAP_CONFIG                           */
+/* ========================================================================= */
+#define UVH_SI_ADDR_MAP_CONFIG 0xc80000UL
+
+#define UVH_SI_ADDR_MAP_CONFIG_M_SKT_SHFT 0
+#define UVH_SI_ADDR_MAP_CONFIG_M_SKT_MASK 0x000000000000003fUL
+#define UVH_SI_ADDR_MAP_CONFIG_N_SKT_SHFT 8
+#define UVH_SI_ADDR_MAP_CONFIG_N_SKT_MASK 0x0000000000000f00UL
+
+union uvh_si_addr_map_config_u {
+    unsigned long	v;
+    struct uvh_si_addr_map_config_s {
+	unsigned long	m_skt :  6;  /* RW */
+	unsigned long	rsvd_6_7:  2;  /*    */
+	unsigned long	n_skt :  4;  /* RW */
+	unsigned long	rsvd_12_63: 52;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                       UVH_SI_ALIAS0_OVERLAY_CONFIG                        */
+/* ========================================================================= */
+#define UVH_SI_ALIAS0_OVERLAY_CONFIG 0xc80008UL
+
+#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_SHFT 24
+#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
+#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_SHFT 48
+#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
+#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+union uvh_si_alias0_overlay_config_u {
+    unsigned long	v;
+    struct uvh_si_alias0_overlay_config_s {
+	unsigned long	rsvd_0_23: 24;  /*    */
+	unsigned long	base    :  8;  /* RW */
+	unsigned long	rsvd_32_47: 16;  /*    */
+	unsigned long	m_alias :  5;  /* RW */
+	unsigned long	rsvd_53_62: 10;  /*    */
+	unsigned long	enable  :  1;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                       UVH_SI_ALIAS1_OVERLAY_CONFIG                        */
+/* ========================================================================= */
+#define UVH_SI_ALIAS1_OVERLAY_CONFIG 0xc80010UL
+
+#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_SHFT 24
+#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
+#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_SHFT 48
+#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
+#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+union uvh_si_alias1_overlay_config_u {
+    unsigned long	v;
+    struct uvh_si_alias1_overlay_config_s {
+	unsigned long	rsvd_0_23: 24;  /*    */
+	unsigned long	base    :  8;  /* RW */
+	unsigned long	rsvd_32_47: 16;  /*    */
+	unsigned long	m_alias :  5;  /* RW */
+	unsigned long	rsvd_53_62: 10;  /*    */
+	unsigned long	enable  :  1;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                       UVH_SI_ALIAS2_OVERLAY_CONFIG                        */
+/* ========================================================================= */
+#define UVH_SI_ALIAS2_OVERLAY_CONFIG 0xc80018UL
+
+#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_SHFT 24
+#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
+#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_SHFT 48
+#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
+#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+union uvh_si_alias2_overlay_config_u {
+    unsigned long	v;
+    struct uvh_si_alias2_overlay_config_s {
+	unsigned long	rsvd_0_23: 24;  /*    */
+	unsigned long	base    :  8;  /* RW */
+	unsigned long	rsvd_32_47: 16;  /*    */
+	unsigned long	m_alias :  5;  /* RW */
+	unsigned long	rsvd_53_62: 10;  /*    */
+	unsigned long	enable  :  1;  /* RW */
+    } s;
+};
+
+
+#endif /* _ASM_IA64_UV_UV_MMRS_H */
diff --git a/arch/ia64/include/asm/vga.h b/arch/ia64/include/asm/vga.h
new file mode 100644
index 00000000..02184ecd
--- /dev/null
+++ b/arch/ia64/include/asm/vga.h
@@ -0,0 +1,25 @@
+/*
+ *	Access to VGA videoram
+ *
+ *	(c) 1998 Martin Mares <mj@ucw.cz>
+ *	(c) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ *	(c) 1999 Don Dugger <don.dugger@intel.com>
+ */
+
+#ifndef __ASM_IA64_VGA_H_
+#define __ASM_IA64_VGA_H_
+
+/*
+ * On the PC, we can just recalculate addresses and then access the
+ * videoram directly without any black magic.
+ */
+
+extern unsigned long vga_console_iobase;
+extern unsigned long vga_console_membase;
+
+#define VGA_MAP_MEM(x,s)	((unsigned long) ioremap_nocache(vga_console_membase + (x), s))
+
+#define vga_readb(x)	(*(x))
+#define vga_writeb(x,y)	(*(y) = (x))
+
+#endif /* __ASM_IA64_VGA_H_ */
diff --git a/arch/ia64/include/asm/xen/events.h b/arch/ia64/include/asm/xen/events.h
new file mode 100644
index 00000000..baa74c82
--- /dev/null
+++ b/arch/ia64/include/asm/xen/events.h
@@ -0,0 +1,41 @@
+/******************************************************************************
+ * arch/ia64/include/asm/xen/events.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+#ifndef _ASM_IA64_XEN_EVENTS_H
+#define _ASM_IA64_XEN_EVENTS_H
+
+enum ipi_vector {
+	XEN_RESCHEDULE_VECTOR,
+	XEN_IPI_VECTOR,
+	XEN_CMCP_VECTOR,
+	XEN_CPEP_VECTOR,
+
+	XEN_NR_IPIS,
+};
+
+static inline int xen_irqs_disabled(struct pt_regs *regs)
+{
+	return !(ia64_psr(regs)->i);
+}
+
+#define irq_ctx_init(cpu)	do { } while (0)
+
+#endif /* _ASM_IA64_XEN_EVENTS_H */
diff --git a/arch/ia64/include/asm/xen/grant_table.h b/arch/ia64/include/asm/xen/grant_table.h
new file mode 100644
index 00000000..2b1fae0e
--- /dev/null
+++ b/arch/ia64/include/asm/xen/grant_table.h
@@ -0,0 +1,29 @@
+/******************************************************************************
+ * arch/ia64/include/asm/xen/grant_table.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef _ASM_IA64_XEN_GRANT_TABLE_H
+#define _ASM_IA64_XEN_GRANT_TABLE_H
+
+struct vm_struct *xen_alloc_vm_area(unsigned long size);
+void xen_free_vm_area(struct vm_struct *area);
+
+#endif /* _ASM_IA64_XEN_GRANT_TABLE_H */
diff --git a/arch/ia64/include/asm/xen/hypercall.h b/arch/ia64/include/asm/xen/hypercall.h
new file mode 100644
index 00000000..ed28bcd5
--- /dev/null
+++ b/arch/ia64/include/asm/xen/hypercall.h
@@ -0,0 +1,265 @@
+/******************************************************************************
+ * hypercall.h
+ *
+ * Linux-specific hypervisor handling.
+ *
+ * Copyright (c) 2002-2004, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _ASM_IA64_XEN_HYPERCALL_H
+#define _ASM_IA64_XEN_HYPERCALL_H
+
+#include <xen/interface/xen.h>
+#include <xen/interface/physdev.h>
+#include <xen/interface/sched.h>
+#include <asm/xen/xcom_hcall.h>
+struct xencomm_handle;
+extern unsigned long __hypercall(unsigned long a1, unsigned long a2,
+				 unsigned long a3, unsigned long a4,
+				 unsigned long a5, unsigned long cmd);
+
+/*
+ * Assembler stubs for hyper-calls.
+ */
+
+#define _hypercall0(type, name)					\
+({								\
+	long __res;						\
+	__res = __hypercall(0, 0, 0, 0, 0, __HYPERVISOR_##name);\
+	(type)__res;						\
+})
+
+#define _hypercall1(type, name, a1)				\
+({								\
+	long __res;						\
+	__res = __hypercall((unsigned long)a1,			\
+			     0, 0, 0, 0, __HYPERVISOR_##name);	\
+	(type)__res;						\
+})
+
+#define _hypercall2(type, name, a1, a2)				\
+({								\
+	long __res;						\
+	__res = __hypercall((unsigned long)a1,			\
+			    (unsigned long)a2,			\
+			    0, 0, 0, __HYPERVISOR_##name);	\
+	(type)__res;						\
+})
+
+#define _hypercall3(type, name, a1, a2, a3)			\
+({								\
+	long __res;						\
+	__res = __hypercall((unsigned long)a1,			\
+			    (unsigned long)a2,			\
+			    (unsigned long)a3,			\
+			    0, 0, __HYPERVISOR_##name);		\
+	(type)__res;						\
+})
+
+#define _hypercall4(type, name, a1, a2, a3, a4)			\
+({								\
+	long __res;						\
+	__res = __hypercall((unsigned long)a1,			\
+			    (unsigned long)a2,			\
+			    (unsigned long)a3,			\
+			    (unsigned long)a4,			\
+			    0, __HYPERVISOR_##name);		\
+	(type)__res;						\
+})
+
+#define _hypercall5(type, name, a1, a2, a3, a4, a5)		\
+({								\
+	long __res;						\
+	__res = __hypercall((unsigned long)a1,			\
+			    (unsigned long)a2,			\
+			    (unsigned long)a3,			\
+			    (unsigned long)a4,			\
+			    (unsigned long)a5,			\
+			    __HYPERVISOR_##name);		\
+	(type)__res;						\
+})
+
+
+static inline int
+xencomm_arch_hypercall_sched_op(int cmd, struct xencomm_handle *arg)
+{
+	return _hypercall2(int, sched_op, cmd, arg);
+}
+
+static inline long
+HYPERVISOR_set_timer_op(u64 timeout)
+{
+	unsigned long timeout_hi = (unsigned long)(timeout >> 32);
+	unsigned long timeout_lo = (unsigned long)timeout;
+	return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi);
+}
+
+static inline int
+xencomm_arch_hypercall_multicall(struct xencomm_handle *call_list,
+				 int nr_calls)
+{
+	return _hypercall2(int, multicall, call_list, nr_calls);
+}
+
+static inline int
+xencomm_arch_hypercall_memory_op(unsigned int cmd, struct xencomm_handle *arg)
+{
+	return _hypercall2(int, memory_op, cmd, arg);
+}
+
+static inline int
+xencomm_arch_hypercall_event_channel_op(int cmd, struct xencomm_handle *arg)
+{
+	return _hypercall2(int, event_channel_op, cmd, arg);
+}
+
+static inline int
+xencomm_arch_hypercall_xen_version(int cmd, struct xencomm_handle *arg)
+{
+	return _hypercall2(int, xen_version, cmd, arg);
+}
+
+static inline int
+xencomm_arch_hypercall_console_io(int cmd, int count,
+				  struct xencomm_handle *str)
+{
+	return _hypercall3(int, console_io, cmd, count, str);
+}
+
+static inline int
+xencomm_arch_hypercall_physdev_op(int cmd, struct xencomm_handle *arg)
+{
+	return _hypercall2(int, physdev_op, cmd, arg);
+}
+
+static inline int
+xencomm_arch_hypercall_grant_table_op(unsigned int cmd,
+				      struct xencomm_handle *uop,
+				      unsigned int count)
+{
+	return _hypercall3(int, grant_table_op, cmd, uop, count);
+}
+
+int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count);
+
+extern int xencomm_arch_hypercall_suspend(struct xencomm_handle *arg);
+
+static inline int
+xencomm_arch_hypercall_callback_op(int cmd, struct xencomm_handle *arg)
+{
+	return _hypercall2(int, callback_op, cmd, arg);
+}
+
+static inline long
+xencomm_arch_hypercall_vcpu_op(int cmd, int cpu, void *arg)
+{
+	return _hypercall3(long, vcpu_op, cmd, cpu, arg);
+}
+
+static inline int
+HYPERVISOR_physdev_op(int cmd, void *arg)
+{
+	switch (cmd) {
+	case PHYSDEVOP_eoi:
+		return _hypercall1(int, ia64_fast_eoi,
+				   ((struct physdev_eoi *)arg)->irq);
+	default:
+		return xencomm_hypercall_physdev_op(cmd, arg);
+	}
+}
+
+static inline long
+xencomm_arch_hypercall_opt_feature(struct xencomm_handle *arg)
+{
+	return _hypercall1(long, opt_feature, arg);
+}
+
+/* for balloon driver */
+#define HYPERVISOR_update_va_mapping(va, new_val, flags) (0)
+
+/* Use xencomm to do hypercalls.  */
+#define HYPERVISOR_sched_op xencomm_hypercall_sched_op
+#define HYPERVISOR_event_channel_op xencomm_hypercall_event_channel_op
+#define HYPERVISOR_callback_op xencomm_hypercall_callback_op
+#define HYPERVISOR_multicall xencomm_hypercall_multicall
+#define HYPERVISOR_xen_version xencomm_hypercall_xen_version
+#define HYPERVISOR_console_io xencomm_hypercall_console_io
+#define HYPERVISOR_memory_op xencomm_hypercall_memory_op
+#define HYPERVISOR_suspend xencomm_hypercall_suspend
+#define HYPERVISOR_vcpu_op xencomm_hypercall_vcpu_op
+#define HYPERVISOR_opt_feature xencomm_hypercall_opt_feature
+
+/* to compile gnttab_copy_grant_page() in drivers/xen/core/gnttab.c */
+#define HYPERVISOR_mmu_update(req, count, success_count, domid) ({ BUG(); 0; })
+
+static inline int
+HYPERVISOR_shutdown(
+	unsigned int reason)
+{
+	struct sched_shutdown sched_shutdown = {
+		.reason = reason
+	};
+
+	int rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown);
+
+	return rc;
+}
+
+/* for netfront.c, netback.c */
+#define MULTI_UVMFLAGS_INDEX 0 /* XXX any value */
+
+static inline void
+MULTI_update_va_mapping(
+	struct multicall_entry *mcl, unsigned long va,
+	pte_t new_val, unsigned long flags)
+{
+	mcl->op = __HYPERVISOR_update_va_mapping;
+	mcl->result = 0;
+}
+
+static inline void
+MULTI_grant_table_op(struct multicall_entry *mcl, unsigned int cmd,
+	void *uop, unsigned int count)
+{
+	mcl->op = __HYPERVISOR_grant_table_op;
+	mcl->args[0] = cmd;
+	mcl->args[1] = (unsigned long)uop;
+	mcl->args[2] = count;
+}
+
+static inline void
+MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req,
+		 int count, int *success_count, domid_t domid)
+{
+	mcl->op = __HYPERVISOR_mmu_update;
+	mcl->args[0] = (unsigned long)req;
+	mcl->args[1] = count;
+	mcl->args[2] = (unsigned long)success_count;
+	mcl->args[3] = domid;
+}
+
+#endif /* _ASM_IA64_XEN_HYPERCALL_H */
diff --git a/arch/ia64/include/asm/xen/hypervisor.h b/arch/ia64/include/asm/xen/hypervisor.h
new file mode 100644
index 00000000..67455c2e
--- /dev/null
+++ b/arch/ia64/include/asm/xen/hypervisor.h
@@ -0,0 +1,61 @@
+/******************************************************************************
+ * hypervisor.h
+ *
+ * Linux-specific hypervisor handling.
+ *
+ * Copyright (c) 2002-2004, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _ASM_IA64_XEN_HYPERVISOR_H
+#define _ASM_IA64_XEN_HYPERVISOR_H
+
+#include <linux/err.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/version.h>	/* to compile feature.c */
+#include <xen/features.h>		/* to comiple xen-netfront.c */
+#include <xen/xen.h>
+#include <asm/xen/hypercall.h>
+
+#ifdef CONFIG_XEN
+extern struct shared_info *HYPERVISOR_shared_info;
+extern struct start_info *xen_start_info;
+
+void __init xen_setup_vcpu_info_placement(void);
+void force_evtchn_callback(void);
+
+/* for drivers/xen/balloon/balloon.c */
+#ifdef CONFIG_XEN_SCRUB_PAGES
+#define scrub_pages(_p, _n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT)
+#else
+#define scrub_pages(_p, _n) ((void)0)
+#endif
+
+/* For setup_arch() in arch/ia64/kernel/setup.c */
+void xen_ia64_enable_opt_feature(void);
+#endif
+
+#endif /* _ASM_IA64_XEN_HYPERVISOR_H */
diff --git a/arch/ia64/include/asm/xen/inst.h b/arch/ia64/include/asm/xen/inst.h
new file mode 100644
index 00000000..c53a4761
--- /dev/null
+++ b/arch/ia64/include/asm/xen/inst.h
@@ -0,0 +1,486 @@
+/******************************************************************************
+ * arch/ia64/include/asm/xen/inst.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <asm/xen/privop.h>
+
+#define ia64_ivt				xen_ivt
+#define DO_SAVE_MIN				XEN_DO_SAVE_MIN
+
+#define __paravirt_switch_to			xen_switch_to
+#define __paravirt_leave_syscall		xen_leave_syscall
+#define __paravirt_work_processed_syscall	xen_work_processed_syscall
+#define __paravirt_leave_kernel			xen_leave_kernel
+#define __paravirt_pending_syscall_end		xen_work_pending_syscall_end
+#define __paravirt_work_processed_syscall_target \
+						xen_work_processed_syscall
+
+#define paravirt_fsyscall_table			xen_fsyscall_table
+#define paravirt_fsys_bubble_down		xen_fsys_bubble_down
+
+#define MOV_FROM_IFA(reg)	\
+	movl reg = XSI_IFA;	\
+	;;			\
+	ld8 reg = [reg]
+
+#define MOV_FROM_ITIR(reg)	\
+	movl reg = XSI_ITIR;	\
+	;;			\
+	ld8 reg = [reg]
+
+#define MOV_FROM_ISR(reg)	\
+	movl reg = XSI_ISR;	\
+	;;			\
+	ld8 reg = [reg]
+
+#define MOV_FROM_IHA(reg)	\
+	movl reg = XSI_IHA;	\
+	;;			\
+	ld8 reg = [reg]
+
+#define MOV_FROM_IPSR(pred, reg)	\
+(pred)	movl reg = XSI_IPSR;		\
+	;;				\
+(pred)	ld8 reg = [reg]
+
+#define MOV_FROM_IIM(reg)	\
+	movl reg = XSI_IIM;	\
+	;;			\
+	ld8 reg = [reg]
+
+#define MOV_FROM_IIP(reg)	\
+	movl reg = XSI_IIP;	\
+	;;			\
+	ld8 reg = [reg]
+
+.macro __MOV_FROM_IVR reg, clob
+	.ifc "\reg", "r8"
+		XEN_HYPER_GET_IVR
+		.exitm
+	.endif
+	.ifc "\clob", "r8"
+		XEN_HYPER_GET_IVR
+		;;
+		mov \reg = r8
+		.exitm
+	.endif
+
+	mov \clob = r8
+	;;
+	XEN_HYPER_GET_IVR
+	;;
+	mov \reg = r8
+	;;
+	mov r8 = \clob
+.endm
+#define MOV_FROM_IVR(reg, clob)	__MOV_FROM_IVR reg, clob
+
+.macro __MOV_FROM_PSR pred, reg, clob
+	.ifc "\reg", "r8"
+		(\pred)	XEN_HYPER_GET_PSR;
+		.exitm
+	.endif
+	.ifc "\clob", "r8"
+		(\pred)	XEN_HYPER_GET_PSR
+		;;
+		(\pred)	mov \reg = r8
+		.exitm
+	.endif
+
+	(\pred)	mov \clob = r8
+	(\pred)	XEN_HYPER_GET_PSR
+	;;
+	(\pred)	mov \reg = r8
+	(\pred)	mov r8 = \clob
+.endm
+#define MOV_FROM_PSR(pred, reg, clob)	__MOV_FROM_PSR pred, reg, clob
+
+/* assuming ar.itc is read with interrupt disabled. */
+#define MOV_FROM_ITC(pred, pred_clob, reg, clob)		\
+(pred)	movl clob = XSI_ITC_OFFSET;				\
+	;;							\
+(pred)	ld8 clob = [clob];					\
+(pred)	mov reg = ar.itc;					\
+	;;							\
+(pred)	add reg = reg, clob;					\
+	;;							\
+(pred)	movl clob = XSI_ITC_LAST;				\
+	;;							\
+(pred)	ld8 clob = [clob];					\
+	;;							\
+(pred)	cmp.geu.unc pred_clob, p0 = clob, reg;			\
+	;;							\
+(pred_clob)	add reg = 1, clob;				\
+	;;							\
+(pred)	movl clob = XSI_ITC_LAST;				\
+	;;							\
+(pred)	st8 [clob] = reg
+
+
+#define MOV_TO_IFA(reg, clob)	\
+	movl clob = XSI_IFA;	\
+	;;			\
+	st8 [clob] = reg	\
+
+#define MOV_TO_ITIR(pred, reg, clob)	\
+(pred)	movl clob = XSI_ITIR;		\
+	;;				\
+(pred)	st8 [clob] = reg
+
+#define MOV_TO_IHA(pred, reg, clob)	\
+(pred)	movl clob = XSI_IHA;		\
+	;;				\
+(pred)	st8 [clob] = reg
+
+#define MOV_TO_IPSR(pred, reg, clob)	\
+(pred)	movl clob = XSI_IPSR;		\
+	;;				\
+(pred)	st8 [clob] = reg;		\
+	;;
+
+#define MOV_TO_IFS(pred, reg, clob)	\
+(pred)	movl clob = XSI_IFS;		\
+	;;				\
+(pred)	st8 [clob] = reg;		\
+	;;
+
+#define MOV_TO_IIP(reg, clob)	\
+	movl clob = XSI_IIP;	\
+	;;			\
+	st8 [clob] = reg
+
+.macro ____MOV_TO_KR kr, reg, clob0, clob1
+	.ifc "\clob0", "r9"
+		.error "clob0 \clob0 must not be r9"
+	.endif
+	.ifc "\clob1", "r8"
+		.error "clob1 \clob1 must not be r8"
+	.endif
+
+	.ifnc "\reg", "r9"
+		.ifnc "\clob1", "r9"
+			mov \clob1 = r9
+		.endif
+		mov r9 = \reg
+	.endif
+	.ifnc "\clob0", "r8"
+		mov \clob0 = r8
+	.endif
+	mov r8 = \kr
+	;;
+	XEN_HYPER_SET_KR
+
+	.ifnc "\reg", "r9"
+		.ifnc "\clob1", "r9"
+			mov r9 = \clob1
+		.endif
+	.endif
+	.ifnc "\clob0", "r8"
+		mov r8 = \clob0
+	.endif
+.endm
+
+.macro __MOV_TO_KR kr, reg, clob0, clob1
+	.ifc "\clob0", "r9"
+		____MOV_TO_KR \kr, \reg, \clob1, \clob0
+		.exitm
+	.endif
+	.ifc "\clob1", "r8"
+		____MOV_TO_KR \kr, \reg, \clob1, \clob0
+		.exitm
+	.endif
+
+	____MOV_TO_KR \kr, \reg, \clob0, \clob1
+.endm
+
+#define MOV_TO_KR(kr, reg, clob0, clob1) \
+	__MOV_TO_KR IA64_KR_ ## kr, reg, clob0, clob1
+
+
+.macro __ITC_I pred, reg, clob
+	.ifc "\reg", "r8"
+		(\pred)	XEN_HYPER_ITC_I
+		.exitm
+	.endif
+	.ifc "\clob", "r8"
+		(\pred)	mov r8 = \reg
+		;;
+		(\pred)	XEN_HYPER_ITC_I
+		.exitm
+	.endif
+
+	(\pred)	mov \clob = r8
+	(\pred)	mov r8 = \reg
+	;;
+	(\pred)	XEN_HYPER_ITC_I
+	;;
+	(\pred)	mov r8 = \clob
+	;;
+.endm
+#define ITC_I(pred, reg, clob)	__ITC_I pred, reg, clob
+
+.macro __ITC_D pred, reg, clob
+	.ifc "\reg", "r8"
+		(\pred)	XEN_HYPER_ITC_D
+		;;
+		.exitm
+	.endif
+	.ifc "\clob", "r8"
+		(\pred)	mov r8 = \reg
+		;;
+		(\pred)	XEN_HYPER_ITC_D
+		;;
+		.exitm
+	.endif
+
+	(\pred)	mov \clob = r8
+	(\pred)	mov r8 = \reg
+	;;
+	(\pred)	XEN_HYPER_ITC_D
+	;;
+	(\pred)	mov r8 = \clob
+	;;
+.endm
+#define ITC_D(pred, reg, clob)	__ITC_D pred, reg, clob
+
+.macro __ITC_I_AND_D pred_i, pred_d, reg, clob
+	.ifc "\reg", "r8"
+		(\pred_i)XEN_HYPER_ITC_I
+		;;
+		(\pred_d)XEN_HYPER_ITC_D
+		;;
+		.exitm
+	.endif
+	.ifc "\clob", "r8"
+		mov r8 = \reg
+		;;
+		(\pred_i)XEN_HYPER_ITC_I
+		;;
+		(\pred_d)XEN_HYPER_ITC_D
+		;;
+		.exitm
+	.endif
+
+	mov \clob = r8
+	mov r8 = \reg
+	;;
+	(\pred_i)XEN_HYPER_ITC_I
+	;;
+	(\pred_d)XEN_HYPER_ITC_D
+	;;
+	mov r8 = \clob
+	;;
+.endm
+#define ITC_I_AND_D(pred_i, pred_d, reg, clob) \
+	__ITC_I_AND_D pred_i, pred_d, reg, clob
+
+.macro __THASH pred, reg0, reg1, clob
+	.ifc "\reg0", "r8"
+		(\pred)	mov r8 = \reg1
+		(\pred)	XEN_HYPER_THASH
+		.exitm
+	.endc
+	.ifc "\reg1", "r8"
+		(\pred)	XEN_HYPER_THASH
+		;;
+		(\pred)	mov \reg0 = r8
+		;;
+		.exitm
+	.endif
+	.ifc "\clob", "r8"
+		(\pred)	mov r8 = \reg1
+		(\pred)	XEN_HYPER_THASH
+		;;
+		(\pred)	mov \reg0 = r8
+		;;
+		.exitm
+	.endif
+
+	(\pred)	mov \clob = r8
+	(\pred)	mov r8 = \reg1
+	(\pred)	XEN_HYPER_THASH
+	;;
+	(\pred)	mov \reg0 = r8
+	(\pred)	mov r8 = \clob
+	;;
+.endm
+#define THASH(pred, reg0, reg1, clob) __THASH pred, reg0, reg1, clob
+
+#define SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(clob0, clob1)	\
+	mov clob0 = 1;						\
+	movl clob1 = XSI_PSR_IC;				\
+	;;							\
+	st4 [clob1] = clob0					\
+	;;
+
+#define SSM_PSR_IC_AND_SRLZ_D(clob0, clob1)	\
+	;;					\
+	srlz.d;					\
+	mov clob1 = 1;				\
+	movl clob0 = XSI_PSR_IC;		\
+	;;					\
+	st4 [clob0] = clob1
+
+#define RSM_PSR_IC(clob)	\
+	movl clob = XSI_PSR_IC;	\
+	;;			\
+	st4 [clob] = r0;	\
+	;;
+
+/* pred will be clobbered */
+#define MASK_TO_PEND_OFS    (-1)
+#define SSM_PSR_I(pred, pred_clob, clob)				\
+(pred)	movl clob = XSI_PSR_I_ADDR					\
+	;;								\
+(pred)	ld8 clob = [clob]						\
+	;;								\
+	/* if (pred) vpsr.i = 1 */					\
+	/* if (pred) (vcpu->vcpu_info->evtchn_upcall_mask)=0 */		\
+(pred)	st1 [clob] = r0, MASK_TO_PEND_OFS				\
+	;;								\
+	/* if (vcpu->vcpu_info->evtchn_upcall_pending) */		\
+(pred)	ld1 clob = [clob]						\
+	;;								\
+(pred)	cmp.ne.unc pred_clob, p0 = clob, r0				\
+	;;								\
+(pred_clob)XEN_HYPER_SSM_I	/* do areal ssm psr.i */
+
+#define RSM_PSR_I(pred, clob0, clob1)	\
+	movl clob0 = XSI_PSR_I_ADDR;	\
+	mov clob1 = 1;			\
+	;;				\
+	ld8 clob0 = [clob0];		\
+	;;				\
+(pred)	st1 [clob0] = clob1
+
+#define RSM_PSR_I_IC(clob0, clob1, clob2)		\
+	movl clob0 = XSI_PSR_I_ADDR;			\
+	movl clob1 = XSI_PSR_IC;			\
+	;;						\
+	ld8 clob0 = [clob0];				\
+	mov clob2 = 1;					\
+	;;						\
+	/* note: clears both vpsr.i and vpsr.ic! */	\
+	st1 [clob0] = clob2;				\
+	st4 [clob1] = r0;				\
+	;;
+
+#define RSM_PSR_DT		\
+	XEN_HYPER_RSM_PSR_DT
+
+#define RSM_PSR_BE_I(clob0, clob1)	\
+	RSM_PSR_I(p0, clob0, clob1);	\
+	rum psr.be
+
+#define SSM_PSR_DT_AND_SRLZ_I	\
+	XEN_HYPER_SSM_PSR_DT
+
+#define BSW_0(clob0, clob1, clob2)			\
+	;;						\
+	/* r16-r31 all now hold bank1 values */		\
+	mov clob2 = ar.unat;				\
+	movl clob0 = XSI_BANK1_R16;			\
+	movl clob1 = XSI_BANK1_R16 + 8;			\
+	;;						\
+.mem.offset 0, 0; st8.spill [clob0] = r16, 16;		\
+.mem.offset 8, 0; st8.spill [clob1] = r17, 16;		\
+	;;						\
+.mem.offset 0, 0; st8.spill [clob0] = r18, 16;		\
+.mem.offset 8, 0; st8.spill [clob1] = r19, 16;		\
+	;;						\
+.mem.offset 0, 0; st8.spill [clob0] = r20, 16;		\
+.mem.offset 8, 0; st8.spill [clob1] = r21, 16;		\
+	;;						\
+.mem.offset 0, 0; st8.spill [clob0] = r22, 16;		\
+.mem.offset 8, 0; st8.spill [clob1] = r23, 16;		\
+	;;						\
+.mem.offset 0, 0; st8.spill [clob0] = r24, 16;		\
+.mem.offset 8, 0; st8.spill [clob1] = r25, 16;		\
+	;;						\
+.mem.offset 0, 0; st8.spill [clob0] = r26, 16;		\
+.mem.offset 8, 0; st8.spill [clob1] = r27, 16;		\
+	;;						\
+.mem.offset 0, 0; st8.spill [clob0] = r28, 16;		\
+.mem.offset 8, 0; st8.spill [clob1] = r29, 16;		\
+	;;						\
+.mem.offset 0, 0; st8.spill [clob0] = r30, 16;		\
+.mem.offset 8, 0; st8.spill [clob1] = r31, 16;		\
+	;;						\
+	mov clob1 = ar.unat;				\
+	movl clob0 = XSI_B1NAT;				\
+	;;						\
+	st8 [clob0] = clob1;				\
+	mov ar.unat = clob2;				\
+	movl clob0 = XSI_BANKNUM;			\
+	;;						\
+	st4 [clob0] = r0
+
+
+	/* FIXME: THIS CODE IS NOT NaT SAFE! */
+#define XEN_BSW_1(clob)			\
+	mov clob = ar.unat;		\
+	movl r30 = XSI_B1NAT;		\
+	;;				\
+	ld8 r30 = [r30];		\
+	mov r31 = 1;			\
+	;;				\
+	mov ar.unat = r30;		\
+	movl r30 = XSI_BANKNUM;		\
+	;;				\
+	st4 [r30] = r31;		\
+	movl r30 = XSI_BANK1_R16;	\
+	movl r31 = XSI_BANK1_R16+8;	\
+	;;				\
+	ld8.fill r16 = [r30], 16;	\
+	ld8.fill r17 = [r31], 16;	\
+	;;				\
+	ld8.fill r18 = [r30], 16;	\
+	ld8.fill r19 = [r31], 16;	\
+	;;				\
+	ld8.fill r20 = [r30], 16;	\
+	ld8.fill r21 = [r31], 16;	\
+	;;				\
+	ld8.fill r22 = [r30], 16;	\
+	ld8.fill r23 = [r31], 16;	\
+	;;				\
+	ld8.fill r24 = [r30], 16;	\
+	ld8.fill r25 = [r31], 16;	\
+	;;				\
+	ld8.fill r26 = [r30], 16;	\
+	ld8.fill r27 = [r31], 16;	\
+	;;				\
+	ld8.fill r28 = [r30], 16;	\
+	ld8.fill r29 = [r31], 16;	\
+	;;				\
+	ld8.fill r30 = [r30];		\
+	ld8.fill r31 = [r31];		\
+	;;				\
+	mov ar.unat = clob
+
+#define BSW_1(clob0, clob1)	XEN_BSW_1(clob1)
+
+
+#define COVER	\
+	XEN_HYPER_COVER
+
+#define RFI			\
+	XEN_HYPER_RFI;		\
+	dv_serialize_data
diff --git a/arch/ia64/include/asm/xen/interface.h b/arch/ia64/include/asm/xen/interface.h
new file mode 100644
index 00000000..e951e740
--- /dev/null
+++ b/arch/ia64/include/asm/xen/interface.h
@@ -0,0 +1,355 @@
+/******************************************************************************
+ * arch-ia64/hypervisor-if.h
+ *
+ * Guest OS interface to IA64 Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright by those who contributed. (in alphabetical order)
+ *
+ * Anthony Xu <anthony.xu@intel.com>
+ * Eddie Dong <eddie.dong@intel.com>
+ * Fred Yang <fred.yang@intel.com>
+ * Kevin Tian <kevin.tian@intel.com>
+ * Alex Williamson <alex.williamson@hp.com>
+ * Chris Wright <chrisw@sous-sol.org>
+ * Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
+ * Dietmar Hahn <dietmar.hahn@fujitsu-siemens.com>
+ * Hollis Blanchard <hollisb@us.ibm.com>
+ * Isaku Yamahata <yamahata@valinux.co.jp>
+ * Jan Beulich <jbeulich@novell.com>
+ * John Levon <john.levon@sun.com>
+ * Kazuhiro Suzuki <kaz@jp.fujitsu.com>
+ * Keir Fraser <keir.fraser@citrix.com>
+ * Kouya Shimura <kouya@jp.fujitsu.com>
+ * Masaki Kanno <kanno.masaki@jp.fujitsu.com>
+ * Matt Chapman <matthewc@hp.com>
+ * Matthew Chapman <matthewc@hp.com>
+ * Samuel Thibault <samuel.thibault@eu.citrix.com>
+ * Tomonari Horikoshi <t.horikoshi@jp.fujitsu.com>
+ * Tristan Gingold <tgingold@free.fr>
+ * Tsunehisa Doi <Doi.Tsunehisa@jp.fujitsu.com>
+ * Yutaka Ezaki <yutaka.ezaki@jp.fujitsu.com>
+ * Zhang Xin <xing.z.zhang@intel.com>
+ * Zhang xiantao <xiantao.zhang@intel.com>
+ * dan.magenheimer@hp.com
+ * ian.pratt@cl.cam.ac.uk
+ * michael.fetterman@cl.cam.ac.uk
+ */
+
+#ifndef _ASM_IA64_XEN_INTERFACE_H
+#define _ASM_IA64_XEN_INTERFACE_H
+
+#define __DEFINE_GUEST_HANDLE(name, type)	\
+	typedef struct { type *p; } __guest_handle_ ## name
+
+#define DEFINE_GUEST_HANDLE_STRUCT(name)	\
+	__DEFINE_GUEST_HANDLE(name, struct name)
+#define DEFINE_GUEST_HANDLE(name)	__DEFINE_GUEST_HANDLE(name, name)
+#define GUEST_HANDLE(name)		__guest_handle_ ## name
+#define GUEST_HANDLE_64(name)		GUEST_HANDLE(name)
+#define set_xen_guest_handle(hnd, val)	do { (hnd).p = val; } while (0)
+
+#ifndef __ASSEMBLY__
+/* Guest handles for primitive C types. */
+__DEFINE_GUEST_HANDLE(uchar, unsigned char);
+__DEFINE_GUEST_HANDLE(uint, unsigned int);
+__DEFINE_GUEST_HANDLE(ulong, unsigned long);
+__DEFINE_GUEST_HANDLE(u64, unsigned long);
+DEFINE_GUEST_HANDLE(char);
+DEFINE_GUEST_HANDLE(int);
+DEFINE_GUEST_HANDLE(long);
+DEFINE_GUEST_HANDLE(void);
+
+typedef unsigned long xen_pfn_t;
+DEFINE_GUEST_HANDLE(xen_pfn_t);
+#define PRI_xen_pfn	"lx"
+#endif
+
+/* Arch specific VIRQs definition */
+#define VIRQ_ITC	VIRQ_ARCH_0	/* V. Virtual itc timer */
+#define VIRQ_MCA_CMC	VIRQ_ARCH_1	/* MCA cmc interrupt */
+#define VIRQ_MCA_CPE	VIRQ_ARCH_2	/* MCA cpe interrupt */
+
+/* Maximum number of virtual CPUs in multi-processor guests. */
+/* keep sizeof(struct shared_page) <= PAGE_SIZE.
+ * this is checked in arch/ia64/xen/hypervisor.c. */
+#define MAX_VIRT_CPUS	64
+
+#ifndef __ASSEMBLY__
+
+#define INVALID_MFN	(~0UL)
+
+union vac {
+	unsigned long value;
+	struct {
+		int a_int:1;
+		int a_from_int_cr:1;
+		int a_to_int_cr:1;
+		int a_from_psr:1;
+		int a_from_cpuid:1;
+		int a_cover:1;
+		int a_bsw:1;
+		long reserved:57;
+	};
+};
+
+union vdc {
+	unsigned long value;
+	struct {
+		int d_vmsw:1;
+		int d_extint:1;
+		int d_ibr_dbr:1;
+		int d_pmc:1;
+		int d_to_pmd:1;
+		int d_itm:1;
+		long reserved:58;
+	};
+};
+
+struct mapped_regs {
+	union vac vac;
+	union vdc vdc;
+	unsigned long virt_env_vaddr;
+	unsigned long reserved1[29];
+	unsigned long vhpi;
+	unsigned long reserved2[95];
+	union {
+		unsigned long vgr[16];
+		unsigned long bank1_regs[16];	/* bank1 regs (r16-r31)
+						   when bank0 active */
+	};
+	union {
+		unsigned long vbgr[16];
+		unsigned long bank0_regs[16];	/* bank0 regs (r16-r31)
+						   when bank1 active */
+	};
+	unsigned long vnat;
+	unsigned long vbnat;
+	unsigned long vcpuid[5];
+	unsigned long reserved3[11];
+	unsigned long vpsr;
+	unsigned long vpr;
+	unsigned long reserved4[76];
+	union {
+		unsigned long vcr[128];
+		struct {
+			unsigned long dcr;	/* CR0 */
+			unsigned long itm;
+			unsigned long iva;
+			unsigned long rsv1[5];
+			unsigned long pta;	/* CR8 */
+			unsigned long rsv2[7];
+			unsigned long ipsr;	/* CR16 */
+			unsigned long isr;
+			unsigned long rsv3;
+			unsigned long iip;
+			unsigned long ifa;
+			unsigned long itir;
+			unsigned long iipa;
+			unsigned long ifs;
+			unsigned long iim;	/* CR24 */
+			unsigned long iha;
+			unsigned long rsv4[38];
+			unsigned long lid;	/* CR64 */
+			unsigned long ivr;
+			unsigned long tpr;
+			unsigned long eoi;
+			unsigned long irr[4];
+			unsigned long itv;	/* CR72 */
+			unsigned long pmv;
+			unsigned long cmcv;
+			unsigned long rsv5[5];
+			unsigned long lrr0;	/* CR80 */
+			unsigned long lrr1;
+			unsigned long rsv6[46];
+		};
+	};
+	union {
+		unsigned long reserved5[128];
+		struct {
+			unsigned long precover_ifs;
+			unsigned long unat;	/* not sure if this is needed
+						   until NaT arch is done */
+			int interrupt_collection_enabled; /* virtual psr.ic */
+
+			/* virtual interrupt deliverable flag is
+			 * evtchn_upcall_mask in shared info area now.
+			 * interrupt_mask_addr is the address
+			 * of evtchn_upcall_mask for current vcpu
+			 */
+			unsigned char *interrupt_mask_addr;
+			int pending_interruption;
+			unsigned char vpsr_pp;
+			unsigned char vpsr_dfh;
+			unsigned char hpsr_dfh;
+			unsigned char hpsr_mfh;
+			unsigned long reserved5_1[4];
+			int metaphysical_mode;	/* 1 = use metaphys mapping
+						   0 = use virtual */
+			int banknum;		/* 0 or 1, which virtual
+						   register bank is active */
+			unsigned long rrs[8];	/* region registers */
+			unsigned long krs[8];	/* kernel registers */
+			unsigned long tmp[16];	/* temp registers
+						   (e.g. for hyperprivops) */
+
+			/* itc paravirtualization
+			 * vAR.ITC = mAR.ITC + itc_offset
+			 * itc_last is one which was lastly passed to
+			 * the guest OS in order to prevent it from
+			 * going backwords.
+			 */
+			unsigned long itc_offset;
+			unsigned long itc_last;
+		};
+	};
+};
+
+struct arch_vcpu_info {
+	/* nothing */
+};
+
+/*
+ * This structure is used for magic page in domain pseudo physical address
+ * space and the result of XENMEM_machine_memory_map.
+ * As the XENMEM_machine_memory_map result,
+ * xen_memory_map::nr_entries indicates the size in bytes
+ * including struct xen_ia64_memmap_info. Not the number of entries.
+ */
+struct xen_ia64_memmap_info {
+	uint64_t efi_memmap_size;	/* size of EFI memory map */
+	uint64_t efi_memdesc_size;	/* size of an EFI memory map
+					 * descriptor */
+	uint32_t efi_memdesc_version;	/* memory descriptor version */
+	void *memdesc[0];		/* array of efi_memory_desc_t */
+};
+
+struct arch_shared_info {
+	/* PFN of the start_info page.	*/
+	unsigned long start_info_pfn;
+
+	/* Interrupt vector for event channel.	*/
+	int evtchn_vector;
+
+	/* PFN of memmap_info page */
+	unsigned int memmap_info_num_pages;	/* currently only = 1 case is
+						   supported. */
+	unsigned long memmap_info_pfn;
+
+	uint64_t pad[31];
+};
+
+struct xen_callback {
+	unsigned long ip;
+};
+typedef struct xen_callback xen_callback_t;
+
+#endif /* !__ASSEMBLY__ */
+
+/* Size of the shared_info area (this is not related to page size).  */
+#define XSI_SHIFT			14
+#define XSI_SIZE			(1 << XSI_SHIFT)
+/* Log size of mapped_regs area (64 KB - only 4KB is used).  */
+#define XMAPPEDREGS_SHIFT		12
+#define XMAPPEDREGS_SIZE		(1 << XMAPPEDREGS_SHIFT)
+/* Offset of XASI (Xen arch shared info) wrt XSI_BASE.	*/
+#define XMAPPEDREGS_OFS			XSI_SIZE
+
+/* Hyperprivops.  */
+#define HYPERPRIVOP_START		0x1
+#define HYPERPRIVOP_RFI			(HYPERPRIVOP_START + 0x0)
+#define HYPERPRIVOP_RSM_DT		(HYPERPRIVOP_START + 0x1)
+#define HYPERPRIVOP_SSM_DT		(HYPERPRIVOP_START + 0x2)
+#define HYPERPRIVOP_COVER		(HYPERPRIVOP_START + 0x3)
+#define HYPERPRIVOP_ITC_D		(HYPERPRIVOP_START + 0x4)
+#define HYPERPRIVOP_ITC_I		(HYPERPRIVOP_START + 0x5)
+#define HYPERPRIVOP_SSM_I		(HYPERPRIVOP_START + 0x6)
+#define HYPERPRIVOP_GET_IVR		(HYPERPRIVOP_START + 0x7)
+#define HYPERPRIVOP_GET_TPR		(HYPERPRIVOP_START + 0x8)
+#define HYPERPRIVOP_SET_TPR		(HYPERPRIVOP_START + 0x9)
+#define HYPERPRIVOP_EOI			(HYPERPRIVOP_START + 0xa)
+#define HYPERPRIVOP_SET_ITM		(HYPERPRIVOP_START + 0xb)
+#define HYPERPRIVOP_THASH		(HYPERPRIVOP_START + 0xc)
+#define HYPERPRIVOP_PTC_GA		(HYPERPRIVOP_START + 0xd)
+#define HYPERPRIVOP_ITR_D		(HYPERPRIVOP_START + 0xe)
+#define HYPERPRIVOP_GET_RR		(HYPERPRIVOP_START + 0xf)
+#define HYPERPRIVOP_SET_RR		(HYPERPRIVOP_START + 0x10)
+#define HYPERPRIVOP_SET_KR		(HYPERPRIVOP_START + 0x11)
+#define HYPERPRIVOP_FC			(HYPERPRIVOP_START + 0x12)
+#define HYPERPRIVOP_GET_CPUID		(HYPERPRIVOP_START + 0x13)
+#define HYPERPRIVOP_GET_PMD		(HYPERPRIVOP_START + 0x14)
+#define HYPERPRIVOP_GET_EFLAG		(HYPERPRIVOP_START + 0x15)
+#define HYPERPRIVOP_SET_EFLAG		(HYPERPRIVOP_START + 0x16)
+#define HYPERPRIVOP_RSM_BE		(HYPERPRIVOP_START + 0x17)
+#define HYPERPRIVOP_GET_PSR		(HYPERPRIVOP_START + 0x18)
+#define HYPERPRIVOP_SET_RR0_TO_RR4	(HYPERPRIVOP_START + 0x19)
+#define HYPERPRIVOP_MAX			(0x1a)
+
+/* Fast and light hypercalls.  */
+#define __HYPERVISOR_ia64_fast_eoi	__HYPERVISOR_arch_1
+
+/* Xencomm macros.  */
+#define XENCOMM_INLINE_MASK		0xf800000000000000UL
+#define XENCOMM_INLINE_FLAG		0x8000000000000000UL
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Optimization features.
+ * The hypervisor may do some special optimizations for guests. This hypercall
+ * can be used to switch on/of these special optimizations.
+ */
+#define __HYPERVISOR_opt_feature	0x700UL
+
+#define XEN_IA64_OPTF_OFF		0x0
+#define XEN_IA64_OPTF_ON		0x1
+
+/*
+ * If this feature is switched on, the hypervisor inserts the
+ * tlb entries without calling the guests traphandler.
+ * This is useful in guests using region 7 for identity mapping
+ * like the linux kernel does.
+ */
+#define XEN_IA64_OPTF_IDENT_MAP_REG7	1
+
+/* Identity mapping of region 4 addresses in HVM. */
+#define XEN_IA64_OPTF_IDENT_MAP_REG4	2
+
+/* Identity mapping of region 5 addresses in HVM. */
+#define XEN_IA64_OPTF_IDENT_MAP_REG5	3
+
+#define XEN_IA64_OPTF_IDENT_MAP_NOT_SET	 (0)
+
+struct xen_ia64_opt_feature {
+	unsigned long cmd;	/* Which feature */
+	unsigned char on;	/* Switch feature on/off */
+	union {
+		struct {
+			/* The page protection bit mask of the pte.
+			 * This will be or'ed with the pte. */
+			unsigned long pgprot;
+			unsigned long key;	/* A protection key for itir.*/
+		};
+	};
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_IA64_XEN_INTERFACE_H */
diff --git a/arch/ia64/include/asm/xen/irq.h b/arch/ia64/include/asm/xen/irq.h
new file mode 100644
index 00000000..a9045098
--- /dev/null
+++ b/arch/ia64/include/asm/xen/irq.h
@@ -0,0 +1,44 @@
+/******************************************************************************
+ * arch/ia64/include/asm/xen/irq.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef _ASM_IA64_XEN_IRQ_H
+#define _ASM_IA64_XEN_IRQ_H
+
+/*
+ * The flat IRQ space is divided into two regions:
+ *  1. A one-to-one mapping of real physical IRQs. This space is only used
+ *     if we have physical device-access privilege. This region is at the
+ *     start of the IRQ space so that existing device drivers do not need
+ *     to be modified to translate physical IRQ numbers into our IRQ space.
+ *  3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These
+ *     are bound using the provided bind/unbind functions.
+ */
+
+#define XEN_PIRQ_BASE		0
+#define XEN_NR_PIRQS		256
+
+#define XEN_DYNIRQ_BASE		(XEN_PIRQ_BASE + XEN_NR_PIRQS)
+#define XEN_NR_DYNIRQS		(NR_CPUS * 8)
+
+#define XEN_NR_IRQS		(XEN_NR_PIRQS + XEN_NR_DYNIRQS)
+
+#endif /* _ASM_IA64_XEN_IRQ_H */
diff --git a/arch/ia64/include/asm/xen/minstate.h b/arch/ia64/include/asm/xen/minstate.h
new file mode 100644
index 00000000..c57fa910
--- /dev/null
+++ b/arch/ia64/include/asm/xen/minstate.h
@@ -0,0 +1,143 @@
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/* read ar.itc in advance, and use it before leaving bank 0 */
+#define XEN_ACCOUNT_GET_STAMP		\
+	MOV_FROM_ITC(pUStk, p6, r20, r2);
+#else
+#define XEN_ACCOUNT_GET_STAMP
+#endif
+
+/*
+ * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
+ * the minimum state necessary that allows us to turn psr.ic back
+ * on.
+ *
+ * Assumed state upon entry:
+ *	psr.ic: off
+ *	r31:	contains saved predicates (pr)
+ *
+ * Upon exit, the state is as follows:
+ *	psr.ic: off
+ *	 r2 = points to &pt_regs.r16
+ *	 r8 = contents of ar.ccv
+ *	 r9 = contents of ar.csd
+ *	r10 = contents of ar.ssd
+ *	r11 = FPSR_DEFAULT
+ *	r12 = kernel sp (kernel virtual address)
+ *	r13 = points to current task_struct (kernel virtual address)
+ *	p15 = TRUE if psr.i is set in cr.ipsr
+ *	predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
+ *		preserved
+ * CONFIG_XEN note: p6/p7 are not preserved
+ *
+ * Note that psr.ic is NOT turned on by this macro.  This is so that
+ * we can pass interruption state as arguments to a handler.
+ */
+#define XEN_DO_SAVE_MIN(__COVER,SAVE_IFS,EXTRA,WORKAROUND)					\
+	mov r16=IA64_KR(CURRENT);	/* M */							\
+	mov r27=ar.rsc;			/* M */							\
+	mov r20=r1;			/* A */							\
+	mov r25=ar.unat;		/* M */							\
+	MOV_FROM_IPSR(p0,r29);		/* M */							\
+	MOV_FROM_IIP(r28);		/* M */							\
+	mov r21=ar.fpsr;		/* M */							\
+	mov r26=ar.pfs;			/* I */							\
+	__COVER;			/* B;; (or nothing) */					\
+	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16;						\
+	;;											\
+	ld1 r17=[r16];				/* load current->thread.on_ustack flag */	\
+	st1 [r16]=r0;				/* clear current->thread.on_ustack flag */	\
+	adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16						\
+	/* switch from user to kernel RBS: */							\
+	;;											\
+	invala;				/* M */							\
+	/* SAVE_IFS;*/ /* see xen special handling below */					\
+	cmp.eq pKStk,pUStk=r0,r17;		/* are we in kernel mode already? */		\
+	;;											\
+(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
+	;;											\
+(pUStk)	mov.m r24=ar.rnat;									\
+(pUStk)	addl r22=IA64_RBS_OFFSET,r1;			/* compute base of RBS */		\
+(pKStk) mov r1=sp;					/* get sp  */				\
+	;;											\
+(pUStk) lfetch.fault.excl.nt1 [r22];								\
+(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
+(pUStk)	mov r23=ar.bspstore;				/* save ar.bspstore */			\
+	;;											\
+(pUStk)	mov ar.bspstore=r22;				/* switch to kernel RBS */		\
+(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1;			/* if in kernel mode, use sp (r12) */	\
+	;;											\
+(pUStk)	mov r18=ar.bsp;										\
+(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
+	adds r17=2*L1_CACHE_BYTES,r1;		/* really: biggest cache-line size */		\
+	adds r16=PT(CR_IPSR),r1;								\
+	;;											\
+	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES;						\
+	st8 [r16]=r29;		/* save cr.ipsr */						\
+	;;											\
+	lfetch.fault.excl.nt1 [r17];								\
+	tbit.nz p15,p0=r29,IA64_PSR_I_BIT;							\
+	mov r29=b0										\
+	;;											\
+	WORKAROUND;										\
+	adds r16=PT(R8),r1;	/* initialize first base pointer */				\
+	adds r17=PT(R9),r1;	/* initialize second base pointer */				\
+(pKStk)	mov r18=r0;		/* make sure r18 isn't NaT */					\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r8,16;								\
+.mem.offset 8,0; st8.spill [r17]=r9,16;								\
+        ;;											\
+.mem.offset 0,0; st8.spill [r16]=r10,24;							\
+	movl r8=XSI_PRECOVER_IFS;								\
+.mem.offset 8,0; st8.spill [r17]=r11,24;							\
+        ;;											\
+	/* xen special handling for possibly lazy cover */					\
+	/* SAVE_MIN case in dispatch_ia32_handler: mov r30=r0 */				\
+	ld8 r30=[r8];										\
+(pUStk)	sub r18=r18,r22;	/* r18=RSE.ndirty*8 */						\
+	st8 [r16]=r28,16;	/* save cr.iip */						\
+	;;											\
+	st8 [r17]=r30,16;	/* save cr.ifs */						\
+	mov r8=ar.ccv;										\
+	mov r9=ar.csd;										\
+	mov r10=ar.ssd;										\
+	movl r11=FPSR_DEFAULT;   /* L-unit */							\
+	;;											\
+	st8 [r16]=r25,16;	/* save ar.unat */						\
+	st8 [r17]=r26,16;	/* save ar.pfs */						\
+	shl r18=r18,16;		/* compute ar.rsc to be used for "loadrs" */			\
+	;;											\
+	st8 [r16]=r27,16;	/* save ar.rsc */						\
+(pUStk)	st8 [r17]=r24,16;	/* save ar.rnat */						\
+(pKStk)	adds r17=16,r17;	/* skip over ar_rnat field */					\
+	;;			/* avoid RAW on r16 & r17 */					\
+(pUStk)	st8 [r16]=r23,16;	/* save ar.bspstore */						\
+	st8 [r17]=r31,16;	/* save predicates */						\
+(pKStk)	adds r16=16,r16;	/* skip over ar_bspstore field */				\
+	;;											\
+	st8 [r16]=r29,16;	/* save b0 */							\
+	st8 [r17]=r18,16;	/* save ar.rsc value for "loadrs" */				\
+	cmp.eq pNonSys,pSys=r0,r0	/* initialize pSys=0, pNonSys=1 */			\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r20,16;	/* save original r1 */				\
+.mem.offset 8,0; st8.spill [r17]=r12,16;							\
+	adds r12=-16,r1;	/* switch to kernel memory stack (with 16 bytes of scratch) */	\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r13,16;							\
+.mem.offset 8,0; st8.spill [r17]=r21,16;	/* save ar.fpsr */				\
+	mov r13=IA64_KR(CURRENT);	/* establish `current' */				\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r15,16;							\
+.mem.offset 8,0; st8.spill [r17]=r14,16;							\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r2,16;								\
+.mem.offset 8,0; st8.spill [r17]=r3,16;								\
+	XEN_ACCOUNT_GET_STAMP									\
+	adds r2=IA64_PT_REGS_R16_OFFSET,r1;							\
+	;;											\
+	EXTRA;											\
+	movl r1=__gp;		/* establish kernel global pointer */				\
+	;;											\
+	ACCOUNT_SYS_ENTER									\
+	BSW_1(r3,r14);	/* switch back to bank 1 (must be last in insn group) */		\
+	;;
diff --git a/arch/ia64/include/asm/xen/page.h b/arch/ia64/include/asm/xen/page.h
new file mode 100644
index 00000000..03441a78
--- /dev/null
+++ b/arch/ia64/include/asm/xen/page.h
@@ -0,0 +1,65 @@
+/******************************************************************************
+ * arch/ia64/include/asm/xen/page.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef _ASM_IA64_XEN_PAGE_H
+#define _ASM_IA64_XEN_PAGE_H
+
+#define INVALID_P2M_ENTRY	(~0UL)
+
+static inline unsigned long mfn_to_pfn(unsigned long mfn)
+{
+	return mfn;
+}
+
+static inline unsigned long pfn_to_mfn(unsigned long pfn)
+{
+	return pfn;
+}
+
+#define phys_to_machine_mapping_valid(_x)	(1)
+
+static inline void *mfn_to_virt(unsigned long mfn)
+{
+	return __va(mfn << PAGE_SHIFT);
+}
+
+static inline unsigned long virt_to_mfn(void *virt)
+{
+	return __pa(virt) >> PAGE_SHIFT;
+}
+
+/* for tpmfront.c */
+static inline unsigned long virt_to_machine(void *virt)
+{
+	return __pa(virt);
+}
+
+static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+	/* nothing */
+}
+
+#define pte_mfn(_x)	pte_pfn(_x)
+#define mfn_pte(_x, _y)	__pte_ma(0)		/* unmodified use */
+#define __pte_ma(_x)	((pte_t) {(_x)})        /* unmodified use */
+
+#endif /* _ASM_IA64_XEN_PAGE_H */
diff --git a/arch/ia64/include/asm/xen/patchlist.h b/arch/ia64/include/asm/xen/patchlist.h
new file mode 100644
index 00000000..eae944e8
--- /dev/null
+++ b/arch/ia64/include/asm/xen/patchlist.h
@@ -0,0 +1,38 @@
+/******************************************************************************
+ * arch/ia64/include/asm/xen/patchlist.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#define __paravirt_start_gate_fsyscall_patchlist		\
+	__xen_start_gate_fsyscall_patchlist
+#define __paravirt_end_gate_fsyscall_patchlist			\
+	__xen_end_gate_fsyscall_patchlist
+#define __paravirt_start_gate_brl_fsys_bubble_down_patchlist	\
+	__xen_start_gate_brl_fsys_bubble_down_patchlist
+#define __paravirt_end_gate_brl_fsys_bubble_down_patchlist	\
+	__xen_end_gate_brl_fsys_bubble_down_patchlist
+#define __paravirt_start_gate_vtop_patchlist			\
+	__xen_start_gate_vtop_patchlist
+#define __paravirt_end_gate_vtop_patchlist			\
+	__xen_end_gate_vtop_patchlist
+#define __paravirt_start_gate_mckinley_e9_patchlist		\
+	__xen_start_gate_mckinley_e9_patchlist
+#define __paravirt_end_gate_mckinley_e9_patchlist		\
+	__xen_end_gate_mckinley_e9_patchlist
diff --git a/arch/ia64/include/asm/xen/privop.h b/arch/ia64/include/asm/xen/privop.h
new file mode 100644
index 00000000..fb4ec5e0
--- /dev/null
+++ b/arch/ia64/include/asm/xen/privop.h
@@ -0,0 +1,135 @@
+#ifndef _ASM_IA64_XEN_PRIVOP_H
+#define _ASM_IA64_XEN_PRIVOP_H
+
+/*
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *	Dan Magenheimer <dan.magenheimer@hp.com>
+ *
+ * Paravirtualizations of privileged operations for Xen/ia64
+ *
+ *
+ * inline privop and paravirt_alt support
+ * Copyright (c) 2007 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ */
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>		/* arch-ia64.h requires uint64_t */
+#endif
+#include <asm/xen/interface.h>
+
+/* At 1 MB, before per-cpu space but still addressable using addl instead
+   of movl. */
+#define XSI_BASE			0xfffffffffff00000
+
+/* Address of mapped regs.  */
+#define XMAPPEDREGS_BASE		(XSI_BASE + XSI_SIZE)
+
+#ifdef __ASSEMBLY__
+#define XEN_HYPER_RFI			break HYPERPRIVOP_RFI
+#define XEN_HYPER_RSM_PSR_DT		break HYPERPRIVOP_RSM_DT
+#define XEN_HYPER_SSM_PSR_DT		break HYPERPRIVOP_SSM_DT
+#define XEN_HYPER_COVER			break HYPERPRIVOP_COVER
+#define XEN_HYPER_ITC_D			break HYPERPRIVOP_ITC_D
+#define XEN_HYPER_ITC_I			break HYPERPRIVOP_ITC_I
+#define XEN_HYPER_SSM_I			break HYPERPRIVOP_SSM_I
+#define XEN_HYPER_GET_IVR		break HYPERPRIVOP_GET_IVR
+#define XEN_HYPER_THASH			break HYPERPRIVOP_THASH
+#define XEN_HYPER_ITR_D			break HYPERPRIVOP_ITR_D
+#define XEN_HYPER_SET_KR		break HYPERPRIVOP_SET_KR
+#define XEN_HYPER_GET_PSR		break HYPERPRIVOP_GET_PSR
+#define XEN_HYPER_SET_RR0_TO_RR4	break HYPERPRIVOP_SET_RR0_TO_RR4
+
+#define XSI_IFS				(XSI_BASE + XSI_IFS_OFS)
+#define XSI_PRECOVER_IFS		(XSI_BASE + XSI_PRECOVER_IFS_OFS)
+#define XSI_IFA				(XSI_BASE + XSI_IFA_OFS)
+#define XSI_ISR				(XSI_BASE + XSI_ISR_OFS)
+#define XSI_IIM				(XSI_BASE + XSI_IIM_OFS)
+#define XSI_ITIR			(XSI_BASE + XSI_ITIR_OFS)
+#define XSI_PSR_I_ADDR			(XSI_BASE + XSI_PSR_I_ADDR_OFS)
+#define XSI_PSR_IC			(XSI_BASE + XSI_PSR_IC_OFS)
+#define XSI_IPSR			(XSI_BASE + XSI_IPSR_OFS)
+#define XSI_IIP				(XSI_BASE + XSI_IIP_OFS)
+#define XSI_B1NAT			(XSI_BASE + XSI_B1NATS_OFS)
+#define XSI_BANK1_R16			(XSI_BASE + XSI_BANK1_R16_OFS)
+#define XSI_BANKNUM			(XSI_BASE + XSI_BANKNUM_OFS)
+#define XSI_IHA				(XSI_BASE + XSI_IHA_OFS)
+#define XSI_ITC_OFFSET			(XSI_BASE + XSI_ITC_OFFSET_OFS)
+#define XSI_ITC_LAST			(XSI_BASE + XSI_ITC_LAST_OFS)
+#endif
+
+#ifndef __ASSEMBLY__
+
+/************************************************/
+/* Instructions paravirtualized for correctness */
+/************************************************/
+
+/* "fc" and "thash" are privilege-sensitive instructions, meaning they
+ *  may have different semantics depending on whether they are executed
+ *  at PL0 vs PL!=0.  When paravirtualized, these instructions mustn't
+ *  be allowed to execute directly, lest incorrect semantics result. */
+extern void xen_fc(void *addr);
+extern unsigned long xen_thash(unsigned long addr);
+
+/* Note that "ttag" and "cover" are also privilege-sensitive; "ttag"
+ * is not currently used (though it may be in a long-format VHPT system!)
+ * and the semantics of cover only change if psr.ic is off which is very
+ * rare (and currently non-existent outside of assembly code */
+
+/* There are also privilege-sensitive registers.  These registers are
+ * readable at any privilege level but only writable at PL0. */
+extern unsigned long xen_get_cpuid(int index);
+extern unsigned long xen_get_pmd(int index);
+
+#ifndef ASM_SUPPORTED
+extern unsigned long xen_get_eflag(void);	/* see xen_ia64_getreg */
+extern void xen_set_eflag(unsigned long);	/* see xen_ia64_setreg */
+#endif
+
+/************************************************/
+/* Instructions paravirtualized for performance */
+/************************************************/
+
+/* Xen uses memory-mapped virtual privileged registers for access to many
+ * performance-sensitive privileged registers.  Some, like the processor
+ * status register (psr), are broken up into multiple memory locations.
+ * Others, like "pend", are abstractions based on privileged registers.
+ * "Pend" is guaranteed to be set if reading cr.ivr would return a
+ * (non-spurious) interrupt. */
+#define XEN_MAPPEDREGS ((struct mapped_regs *)XMAPPEDREGS_BASE)
+
+#define XSI_PSR_I			\
+	(*XEN_MAPPEDREGS->interrupt_mask_addr)
+#define xen_get_virtual_psr_i()		\
+	(!XSI_PSR_I)
+#define xen_set_virtual_psr_i(_val)	\
+	({ XSI_PSR_I = (uint8_t)(_val) ? 0 : 1; })
+#define xen_set_virtual_psr_ic(_val)	\
+	({ XEN_MAPPEDREGS->interrupt_collection_enabled = _val ? 1 : 0; })
+#define xen_get_virtual_pend()		\
+	(*(((uint8_t *)XEN_MAPPEDREGS->interrupt_mask_addr) - 1))
+
+#ifndef ASM_SUPPORTED
+/* Although all privileged operations can be left to trap and will
+ * be properly handled by Xen, some are frequent enough that we use
+ * hyperprivops for performance. */
+extern unsigned long xen_get_psr(void);
+extern unsigned long xen_get_ivr(void);
+extern unsigned long xen_get_tpr(void);
+extern void xen_hyper_ssm_i(void);
+extern void xen_set_itm(unsigned long);
+extern void xen_set_tpr(unsigned long);
+extern void xen_eoi(unsigned long);
+extern unsigned long xen_get_rr(unsigned long index);
+extern void xen_set_rr(unsigned long index, unsigned long val);
+extern void xen_set_rr0_to_rr4(unsigned long val0, unsigned long val1,
+			       unsigned long val2, unsigned long val3,
+			       unsigned long val4);
+extern void xen_set_kr(unsigned long index, unsigned long val);
+extern void xen_ptcga(unsigned long addr, unsigned long size);
+#endif /* !ASM_SUPPORTED */
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_IA64_XEN_PRIVOP_H */
diff --git a/arch/ia64/include/asm/xen/xcom_hcall.h b/arch/ia64/include/asm/xen/xcom_hcall.h
new file mode 100644
index 00000000..20b2950c
--- /dev/null
+++ b/arch/ia64/include/asm/xen/xcom_hcall.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2006 Tristan Gingold <tristan.gingold@bull.net>, Bull SAS
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef _ASM_IA64_XEN_XCOM_HCALL_H
+#define _ASM_IA64_XEN_XCOM_HCALL_H
+
+/* These function creates inline or mini descriptor for the parameters and
+   calls the corresponding xencomm_arch_hypercall_X.
+   Architectures should defines HYPERVISOR_xxx as xencomm_hypercall_xxx unless
+   they want to use their own wrapper.  */
+extern int xencomm_hypercall_console_io(int cmd, int count, char *str);
+
+extern int xencomm_hypercall_event_channel_op(int cmd, void *op);
+
+extern int xencomm_hypercall_xen_version(int cmd, void *arg);
+
+extern int xencomm_hypercall_physdev_op(int cmd, void *op);
+
+extern int xencomm_hypercall_grant_table_op(unsigned int cmd, void *op,
+					    unsigned int count);
+
+extern int xencomm_hypercall_sched_op(int cmd, void *arg);
+
+extern int xencomm_hypercall_multicall(void *call_list, int nr_calls);
+
+extern int xencomm_hypercall_callback_op(int cmd, void *arg);
+
+extern int xencomm_hypercall_memory_op(unsigned int cmd, void *arg);
+
+extern int xencomm_hypercall_suspend(unsigned long srec);
+
+extern long xencomm_hypercall_vcpu_op(int cmd, int cpu, void *arg);
+
+extern long xencomm_hypercall_opt_feature(void *arg);
+
+#endif /* _ASM_IA64_XEN_XCOM_HCALL_H */
diff --git a/arch/ia64/include/asm/xen/xencomm.h b/arch/ia64/include/asm/xen/xencomm.h
new file mode 100644
index 00000000..cded677b
--- /dev/null
+++ b/arch/ia64/include/asm/xen/xencomm.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2006 Hollis Blanchard <hollisb@us.ibm.com>, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef _ASM_IA64_XEN_XENCOMM_H
+#define _ASM_IA64_XEN_XENCOMM_H
+
+#include <xen/xencomm.h>
+#include <asm/pgtable.h>
+
+/* Must be called before any hypercall.  */
+extern void xencomm_initialize(void);
+extern int xencomm_is_initialized(void);
+
+/* Check if virtual contiguity means physical contiguity
+ * where the passed address is a pointer value in virtual address.
+ * On ia64, identity mapping area in region 7 or the piece of region 5
+ * that is mapped by itr[IA64_TR_KERNEL]/dtr[IA64_TR_KERNEL]
+ */
+static inline int xencomm_is_phys_contiguous(unsigned long addr)
+{
+	return (PAGE_OFFSET <= addr &&
+		addr < (PAGE_OFFSET + (1UL << IA64_MAX_PHYS_BITS))) ||
+		(KERNEL_START <= addr &&
+		 addr < KERNEL_START + KERNEL_TR_PAGE_SIZE);
+}
+
+#endif /* _ASM_IA64_XEN_XENCOMM_H */
diff --git a/arch/ia64/include/asm/xor.h b/arch/ia64/include/asm/xor.h
new file mode 100644
index 00000000..a349e23d
--- /dev/null
+++ b/arch/ia64/include/asm/xor.h
@@ -0,0 +1,31 @@
+/*
+ * Optimized RAID-5 checksumming functions for IA-64.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * (for example /usr/src/linux/COPYING); if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+extern void xor_ia64_2(unsigned long, unsigned long *, unsigned long *);
+extern void xor_ia64_3(unsigned long, unsigned long *, unsigned long *,
+		       unsigned long *);
+extern void xor_ia64_4(unsigned long, unsigned long *, unsigned long *,
+		       unsigned long *, unsigned long *);
+extern void xor_ia64_5(unsigned long, unsigned long *, unsigned long *,
+		       unsigned long *, unsigned long *, unsigned long *);
+
+static struct xor_block_template xor_block_ia64 = {
+	.name =	"ia64",
+	.do_2 =	xor_ia64_2,
+	.do_3 =	xor_ia64_3,
+	.do_4 =	xor_ia64_4,
+	.do_5 =	xor_ia64_5,
+};
+
+#define XOR_TRY_TEMPLATES	xor_speed(&xor_block_ia64)
diff --git a/arch/ia64/install.sh b/arch/ia64/install.sh
new file mode 100644
index 00000000..0e932f5d
--- /dev/null
+++ b/arch/ia64/install.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+#
+# arch/ia64/install.sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+#
+# "make install" script for ia64 architecture
+#
+# Arguments:
+#   $1 - kernel version
+#   $2 - kernel image file
+#   $3 - kernel map file
+#   $4 - default install path (blank if root directory)
+#
+
+# User may have a custom install script
+
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
+
+# Default install - same as make zlilo
+
+if [ -f $4/vmlinuz ]; then
+	mv $4/vmlinuz $4/vmlinuz.old
+fi
+
+if [ -f $4/System.map ]; then
+	mv $4/System.map $4/System.old
+fi
+
+cat $2 > $4/vmlinuz
+cp $3 $4/System.map
+
+test -x /usr/sbin/elilo && /usr/sbin/elilo
diff --git a/arch/ia64/kernel/.gitignore b/arch/ia64/kernel/.gitignore
new file mode 100644
index 00000000..21cb0da5
--- /dev/null
+++ b/arch/ia64/kernel/.gitignore
@@ -0,0 +1,2 @@
+gate.lds
+vmlinux.lds
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
new file mode 100644
index 00000000..395c2f21
--- /dev/null
+++ b/arch/ia64/kernel/Makefile
@@ -0,0 +1,114 @@
+#
+# Makefile for the linux kernel.
+#
+
+ifdef CONFIG_DYNAMIC_FTRACE
+CFLAGS_REMOVE_ftrace.o = -pg
+endif
+
+extra-y	:= head.o init_task.o vmlinux.lds
+
+obj-y := entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o	\
+	 irq_lsapic.o ivt.o machvec.o pal.o paravirt_patchlist.o patch.o process.o perfmon.o ptrace.o sal.o		\
+	 salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
+	 unwind.o mca.o mca_asm.o topology.o dma-mapping.o
+
+obj-$(CONFIG_ACPI)		+= acpi.o acpi-ext.o
+obj-$(CONFIG_IA64_BRL_EMU)	+= brl_emu.o
+
+obj-$(CONFIG_IA64_PALINFO)	+= palinfo.o
+obj-$(CONFIG_IOSAPIC)		+= iosapic.o
+obj-$(CONFIG_MODULES)		+= module.o
+obj-$(CONFIG_SMP)		+= smp.o smpboot.o
+obj-$(CONFIG_NUMA)		+= numa.o
+obj-$(CONFIG_PERFMON)		+= perfmon_default_smpl.o
+obj-$(CONFIG_IA64_CYCLONE)	+= cyclone.o
+obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
+obj-$(CONFIG_IA64_MCA_RECOVERY)	+= mca_recovery.o
+obj-$(CONFIG_KPROBES)		+= kprobes.o jprobes.o
+obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
+obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o crash.o
+obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
+obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)	+= uncached.o
+obj-$(CONFIG_AUDIT)		+= audit.o
+obj-$(CONFIG_PCI_MSI)		+= msi_ia64.o
+mca_recovery-y			+= mca_drv.o mca_drv_asm.o
+obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o
+obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
+
+obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirtentry.o \
+				   paravirt_patch.o
+
+obj-$(CONFIG_IA64_ESI)		+= esi.o
+ifneq ($(CONFIG_IA64_ESI),)
+obj-y				+= esi_stub.o	# must be in kernel proper
+endif
+obj-$(CONFIG_DMAR)		+= pci-dma.o
+obj-$(CONFIG_SWIOTLB)		+= pci-swiotlb.o
+
+obj-$(CONFIG_BINFMT_ELF)	+= elfcore.o
+
+# fp_emulate() expects f2-f5,f16-f31 to contain the user-level state.
+CFLAGS_traps.o  += -mfixed-range=f2-f5,f16-f31
+
+# The gate DSO image is built using a special linker script.
+include $(srctree)/arch/ia64/kernel/Makefile.gate
+# tell compiled for native
+CPPFLAGS_gate.lds += -D__IA64_GATE_PARAVIRTUALIZED_NATIVE
+
+# Calculate NR_IRQ = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, ...) based on config
+define sed-y
+	"/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
+endef
+quiet_cmd_nr_irqs = GEN     $@
+define cmd_nr_irqs
+	(set -e; \
+	 echo "#ifndef __ASM_NR_IRQS_H__"; \
+	 echo "#define __ASM_NR_IRQS_H__"; \
+	 echo "/*"; \
+	 echo " * DO NOT MODIFY."; \
+	 echo " *"; \
+	 echo " * This file was generated by Kbuild"; \
+	 echo " *"; \
+	 echo " */"; \
+	 echo ""; \
+	 sed -ne $(sed-y) $<; \
+	 echo ""; \
+	 echo "#endif" ) > $@
+endef
+
+# We use internal kbuild rules to avoid the "is up to date" message from make
+arch/$(SRCARCH)/kernel/nr-irqs.s: arch/$(SRCARCH)/kernel/nr-irqs.c
+	$(Q)mkdir -p $(dir $@)
+	$(call if_changed_dep,cc_s_c)
+
+include/generated/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s
+	$(Q)mkdir -p $(dir $@)
+	$(call cmd,nr_irqs)
+
+#
+# native ivt.S, entry.S and fsys.S
+#
+ASM_PARAVIRT_OBJS = ivt.o entry.o fsys.o
+define paravirtualized_native
+AFLAGS_$(1) += -D__IA64_ASM_PARAVIRTUALIZED_NATIVE
+AFLAGS_pvchk-sed-$(1) += -D__IA64_ASM_PARAVIRTUALIZED_PVCHECK
+extra-y += pvchk-$(1)
+endef
+$(foreach obj,$(ASM_PARAVIRT_OBJS),$(eval $(call paravirtualized_native,$(obj))))
+
+#
+# Checker for paravirtualizations of privileged operations.
+#
+quiet_cmd_pv_check_sed = PVCHK   $@
+define cmd_pv_check_sed
+	sed -f $(srctree)/arch/$(SRCARCH)/scripts/pvcheck.sed $< > $@
+endef
+
+$(obj)/pvchk-sed-%.s: $(src)/%.S $(srctree)/arch/$(SRCARCH)/scripts/pvcheck.sed FORCE
+	$(call if_changed_dep,as_s_S)
+$(obj)/pvchk-%.s: $(obj)/pvchk-sed-%.s FORCE
+	$(call if_changed,pv_check_sed)
+$(obj)/pvchk-%.o: $(obj)/pvchk-%.s FORCE
+	$(call if_changed,as_o_S)
+.PRECIOUS: $(obj)/pvchk-sed-%.s $(obj)/pvchk-%.s $(obj)/pvchk-%.o
diff --git a/arch/ia64/kernel/Makefile.gate b/arch/ia64/kernel/Makefile.gate
new file mode 100644
index 00000000..ceeffc50
--- /dev/null
+++ b/arch/ia64/kernel/Makefile.gate
@@ -0,0 +1,27 @@
+# The gate DSO image is built using a special linker script.
+
+targets += gate.so gate-syms.o
+
+extra-y += gate.so gate-syms.o gate.lds gate.o
+
+CPPFLAGS_gate.lds := -P -C -U$(ARCH)
+
+quiet_cmd_gate = GATE $@
+      cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
+
+GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \
+		     $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE
+	$(call if_changed,gate)
+
+$(obj)/built-in.o: $(obj)/gate-syms.o
+$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o
+
+GATECFLAGS_gate-syms.o = -r
+$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE
+	$(call if_changed,gate)
+
+# gate-data.o contains the gate DSO image as data in section .data..gate.
+# We must build gate.so before we can assemble it.
+# Note: kbuild does not track this dependency due to usage of .incbin
+$(obj)/gate-data.o: $(obj)/gate.so
diff --git a/arch/ia64/kernel/acpi-ext.c b/arch/ia64/kernel/acpi-ext.c
new file mode 100644
index 00000000..8b9318d3
--- /dev/null
+++ b/arch/ia64/kernel/acpi-ext.c
@@ -0,0 +1,104 @@
+/*
+ * (c) Copyright 2003, 2006 Hewlett-Packard Development Company, L.P.
+ *	Alex Williamson <alex.williamson@hp.com>
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/acpi.h>
+
+#include <asm/acpi-ext.h>
+
+/*
+ * Device CSRs that do not appear in PCI config space should be described
+ * via ACPI.  This would normally be done with Address Space Descriptors
+ * marked as "consumer-only," but old versions of Windows and Linux ignore
+ * the producer/consumer flag, so HP invented a vendor-defined resource to
+ * describe the location and size of CSR space.
+ */
+
+struct acpi_vendor_uuid hp_ccsr_uuid = {
+	.subtype = 2,
+	.data = { 0xf9, 0xad, 0xe9, 0x69, 0x4f, 0x92, 0x5f, 0xab, 0xf6, 0x4a,
+	    0x24, 0xd2, 0x01, 0x37, 0x0e, 0xad },
+};
+
+static acpi_status hp_ccsr_locate(acpi_handle obj, u64 *base, u64 *length)
+{
+	acpi_status status;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	struct acpi_resource *resource;
+	struct acpi_resource_vendor_typed *vendor;
+
+	status = acpi_get_vendor_resource(obj, METHOD_NAME__CRS, &hp_ccsr_uuid,
+		&buffer);
+
+	resource = buffer.pointer;
+	vendor = &resource->data.vendor_typed;
+
+	if (ACPI_FAILURE(status) || vendor->byte_length < 16) {
+		status = AE_NOT_FOUND;
+		goto exit;
+	}
+
+	memcpy(base, vendor->byte_data, sizeof(*base));
+	memcpy(length, vendor->byte_data + 8, sizeof(*length));
+
+  exit:
+	kfree(buffer.pointer);
+	return status;
+}
+
+struct csr_space {
+	u64	base;
+	u64	length;
+};
+
+static acpi_status find_csr_space(struct acpi_resource *resource, void *data)
+{
+	struct csr_space *space = data;
+	struct acpi_resource_address64 addr;
+	acpi_status status;
+
+	status = acpi_resource_to_address64(resource, &addr);
+	if (ACPI_SUCCESS(status) &&
+	    addr.resource_type == ACPI_MEMORY_RANGE &&
+	    addr.address_length &&
+	    addr.producer_consumer == ACPI_CONSUMER) {
+		space->base = addr.minimum;
+		space->length = addr.address_length;
+		return AE_CTRL_TERMINATE;
+	}
+	return AE_OK;		/* keep looking */
+}
+
+static acpi_status hp_crs_locate(acpi_handle obj, u64 *base, u64 *length)
+{
+	struct csr_space space = { 0, 0 };
+
+	acpi_walk_resources(obj, METHOD_NAME__CRS, find_csr_space, &space);
+	if (!space.length)
+		return AE_NOT_FOUND;
+
+	*base = space.base;
+	*length = space.length;
+	return AE_OK;
+}
+
+acpi_status hp_acpi_csr_space(acpi_handle obj, u64 *csr_base, u64 *csr_length)
+{
+	acpi_status status;
+
+	status = hp_ccsr_locate(obj, csr_base, csr_length);
+	if (ACPI_SUCCESS(status))
+		return status;
+
+	return hp_crs_locate(obj, csr_base, csr_length);
+}
+EXPORT_SYMBOL(hp_acpi_csr_space);
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
new file mode 100644
index 00000000..f19de9f7
--- /dev/null
+++ b/arch/ia64/kernel/acpi.c
@@ -0,0 +1,1048 @@
+/*
+ *  acpi.c - Architecture-Specific Low-Level ACPI Support
+ *
+ *  Copyright (C) 1999 VA Linux Systems
+ *  Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
+ *  Copyright (C) 2000, 2002-2003 Hewlett-Packard Co.
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *  Copyright (C) 2000 Intel Corp.
+ *  Copyright (C) 2000,2001 J.I. Lee <jung-ik.lee@intel.com>
+ *  Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ *  Copyright (C) 2001 Jenna Hall <jenna.s.hall@intel.com>
+ *  Copyright (C) 2001 Takayoshi Kochi <t-kochi@bq.jp.nec.com>
+ *  Copyright (C) 2002 Erich Focht <efocht@ess.nec.de>
+ *  Copyright (C) 2004 Ashok Raj <ashok.raj@intel.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/irq.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+#include <linux/mmzone.h>
+#include <linux/nodemask.h>
+#include <linux/slab.h>
+#include <acpi/processor.h>
+#include <asm/io.h>
+#include <asm/iosapic.h>
+#include <asm/machvec.h>
+#include <asm/page.h>
+#include <asm/system.h>
+#include <asm/numa.h>
+#include <asm/sal.h>
+#include <asm/cyclone.h>
+#include <asm/xen/hypervisor.h>
+
+#define BAD_MADT_ENTRY(entry, end) (                                        \
+		(!entry) || (unsigned long)entry + sizeof(*entry) > end ||  \
+		((struct acpi_subtable_header *)entry)->length < sizeof(*entry))
+
+#define PREFIX			"ACPI: "
+
+u32 acpi_rsdt_forced;
+unsigned int acpi_cpei_override;
+unsigned int acpi_cpei_phys_cpuid;
+
+unsigned long acpi_wakeup_address = 0;
+
+#ifdef CONFIG_IA64_GENERIC
+static unsigned long __init acpi_find_rsdp(void)
+{
+	unsigned long rsdp_phys = 0;
+
+	if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
+		rsdp_phys = efi.acpi20;
+	else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
+		printk(KERN_WARNING PREFIX
+		       "v1.0/r0.71 tables no longer supported\n");
+	return rsdp_phys;
+}
+
+const char __init *
+acpi_get_sysname(void)
+{
+	unsigned long rsdp_phys;
+	struct acpi_table_rsdp *rsdp;
+	struct acpi_table_xsdt *xsdt;
+	struct acpi_table_header *hdr;
+#ifdef CONFIG_DMAR
+	u64 i, nentries;
+#endif
+
+	rsdp_phys = acpi_find_rsdp();
+	if (!rsdp_phys) {
+		printk(KERN_ERR
+		       "ACPI 2.0 RSDP not found, default to \"dig\"\n");
+		return "dig";
+	}
+
+	rsdp = (struct acpi_table_rsdp *)__va(rsdp_phys);
+	if (strncmp(rsdp->signature, ACPI_SIG_RSDP, sizeof(ACPI_SIG_RSDP) - 1)) {
+		printk(KERN_ERR
+		       "ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n");
+		return "dig";
+	}
+
+	xsdt = (struct acpi_table_xsdt *)__va(rsdp->xsdt_physical_address);
+	hdr = &xsdt->header;
+	if (strncmp(hdr->signature, ACPI_SIG_XSDT, sizeof(ACPI_SIG_XSDT) - 1)) {
+		printk(KERN_ERR
+		       "ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n");
+		return "dig";
+	}
+
+	if (!strcmp(hdr->oem_id, "HP")) {
+		return "hpzx1";
+	} else if (!strcmp(hdr->oem_id, "SGI")) {
+		if (!strcmp(hdr->oem_table_id + 4, "UV"))
+			return "uv";
+		else
+			return "sn2";
+	} else if (xen_pv_domain() && !strcmp(hdr->oem_id, "XEN")) {
+		return "xen";
+	}
+
+#ifdef CONFIG_DMAR
+	/* Look for Intel IOMMU */
+	nentries = (hdr->length - sizeof(*hdr)) /
+			 sizeof(xsdt->table_offset_entry[0]);
+	for (i = 0; i < nentries; i++) {
+		hdr = __va(xsdt->table_offset_entry[i]);
+		if (strncmp(hdr->signature, ACPI_SIG_DMAR,
+			sizeof(ACPI_SIG_DMAR) - 1) == 0)
+			return "dig_vtd";
+	}
+#endif
+
+	return "dig";
+}
+#endif /* CONFIG_IA64_GENERIC */
+
+#define ACPI_MAX_PLATFORM_INTERRUPTS	256
+
+/* Array to record platform interrupt vectors for generic interrupt routing. */
+int platform_intr_list[ACPI_MAX_PLATFORM_INTERRUPTS] = {
+	[0 ... ACPI_MAX_PLATFORM_INTERRUPTS - 1] = -1
+};
+
+enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC;
+
+/*
+ * Interrupt routing API for device drivers.  Provides interrupt vector for
+ * a generic platform event.  Currently only CPEI is implemented.
+ */
+int acpi_request_vector(u32 int_type)
+{
+	int vector = -1;
+
+	if (int_type < ACPI_MAX_PLATFORM_INTERRUPTS) {
+		/* corrected platform error interrupt */
+		vector = platform_intr_list[int_type];
+	} else
+		printk(KERN_ERR
+		       "acpi_request_vector(): invalid interrupt type\n");
+	return vector;
+}
+
+char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size)
+{
+	return __va(phys_addr);
+}
+
+void __init __acpi_unmap_table(char *map, unsigned long size)
+{
+}
+
+/* --------------------------------------------------------------------------
+                            Boot-time Table Parsing
+   -------------------------------------------------------------------------- */
+
+static int available_cpus __initdata;
+struct acpi_table_madt *acpi_madt __initdata;
+static u8 has_8259;
+
+static int __init
+acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header,
+			  const unsigned long end)
+{
+	struct acpi_madt_local_apic_override *lapic;
+
+	lapic = (struct acpi_madt_local_apic_override *)header;
+
+	if (BAD_MADT_ENTRY(lapic, end))
+		return -EINVAL;
+
+	if (lapic->address) {
+		iounmap(ipi_base_addr);
+		ipi_base_addr = ioremap(lapic->address, 0);
+	}
+	return 0;
+}
+
+static int __init
+acpi_parse_lsapic(struct acpi_subtable_header * header, const unsigned long end)
+{
+	struct acpi_madt_local_sapic *lsapic;
+
+	lsapic = (struct acpi_madt_local_sapic *)header;
+
+	/*Skip BAD_MADT_ENTRY check, as lsapic size could vary */
+
+	if (lsapic->lapic_flags & ACPI_MADT_ENABLED) {
+#ifdef CONFIG_SMP
+		smp_boot_data.cpu_phys_id[available_cpus] =
+		    (lsapic->id << 8) | lsapic->eid;
+#endif
+		++available_cpus;
+	}
+
+	total_cpus++;
+	return 0;
+}
+
+static int __init
+acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end)
+{
+	struct acpi_madt_local_apic_nmi *lacpi_nmi;
+
+	lacpi_nmi = (struct acpi_madt_local_apic_nmi *)header;
+
+	if (BAD_MADT_ENTRY(lacpi_nmi, end))
+		return -EINVAL;
+
+	/* TBD: Support lapic_nmi entries */
+	return 0;
+}
+
+static int __init
+acpi_parse_iosapic(struct acpi_subtable_header * header, const unsigned long end)
+{
+	struct acpi_madt_io_sapic *iosapic;
+
+	iosapic = (struct acpi_madt_io_sapic *)header;
+
+	if (BAD_MADT_ENTRY(iosapic, end))
+		return -EINVAL;
+
+	return iosapic_init(iosapic->address, iosapic->global_irq_base);
+}
+
+static unsigned int __initdata acpi_madt_rev;
+
+static int __init
+acpi_parse_plat_int_src(struct acpi_subtable_header * header,
+			const unsigned long end)
+{
+	struct acpi_madt_interrupt_source *plintsrc;
+	int vector;
+
+	plintsrc = (struct acpi_madt_interrupt_source *)header;
+
+	if (BAD_MADT_ENTRY(plintsrc, end))
+		return -EINVAL;
+
+	/*
+	 * Get vector assignment for this interrupt, set attributes,
+	 * and program the IOSAPIC routing table.
+	 */
+	vector = iosapic_register_platform_intr(plintsrc->type,
+						plintsrc->global_irq,
+						plintsrc->io_sapic_vector,
+						plintsrc->eid,
+						plintsrc->id,
+						((plintsrc->inti_flags & ACPI_MADT_POLARITY_MASK) ==
+						 ACPI_MADT_POLARITY_ACTIVE_HIGH) ?
+						IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
+						((plintsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) ==
+						 ACPI_MADT_TRIGGER_EDGE) ?
+						IOSAPIC_EDGE : IOSAPIC_LEVEL);
+
+	platform_intr_list[plintsrc->type] = vector;
+	if (acpi_madt_rev > 1) {
+		acpi_cpei_override = plintsrc->flags & ACPI_MADT_CPEI_OVERRIDE;
+	}
+
+	/*
+	 * Save the physical id, so we can check when its being removed
+	 */
+	acpi_cpei_phys_cpuid = ((plintsrc->id << 8) | (plintsrc->eid)) & 0xffff;
+
+	return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+unsigned int can_cpei_retarget(void)
+{
+	extern int cpe_vector;
+	extern unsigned int force_cpei_retarget;
+
+	/*
+	 * Only if CPEI is supported and the override flag
+	 * is present, otherwise return that its re-targettable
+	 * if we are in polling mode.
+	 */
+	if (cpe_vector > 0) {
+		if (acpi_cpei_override || force_cpei_retarget)
+			return 1;
+		else
+			return 0;
+	}
+	return 1;
+}
+
+unsigned int is_cpu_cpei_target(unsigned int cpu)
+{
+	unsigned int logical_id;
+
+	logical_id = cpu_logical_id(acpi_cpei_phys_cpuid);
+
+	if (logical_id == cpu)
+		return 1;
+	else
+		return 0;
+}
+
+void set_cpei_target_cpu(unsigned int cpu)
+{
+	acpi_cpei_phys_cpuid = cpu_physical_id(cpu);
+}
+#endif
+
+unsigned int get_cpei_target_cpu(void)
+{
+	return acpi_cpei_phys_cpuid;
+}
+
+static int __init
+acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
+		       const unsigned long end)
+{
+	struct acpi_madt_interrupt_override *p;
+
+	p = (struct acpi_madt_interrupt_override *)header;
+
+	if (BAD_MADT_ENTRY(p, end))
+		return -EINVAL;
+
+	iosapic_override_isa_irq(p->source_irq, p->global_irq,
+				 ((p->inti_flags & ACPI_MADT_POLARITY_MASK) ==
+				  ACPI_MADT_POLARITY_ACTIVE_HIGH) ?
+				 IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
+				 ((p->inti_flags & ACPI_MADT_TRIGGER_MASK) ==
+				 ACPI_MADT_TRIGGER_EDGE) ?
+				 IOSAPIC_EDGE : IOSAPIC_LEVEL);
+	return 0;
+}
+
+static int __init
+acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end)
+{
+	struct acpi_madt_nmi_source *nmi_src;
+
+	nmi_src = (struct acpi_madt_nmi_source *)header;
+
+	if (BAD_MADT_ENTRY(nmi_src, end))
+		return -EINVAL;
+
+	/* TBD: Support nimsrc entries */
+	return 0;
+}
+
+static void __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	if (!strncmp(oem_id, "IBM", 3) && (!strncmp(oem_table_id, "SERMOW", 6))) {
+
+		/*
+		 * Unfortunately ITC_DRIFT is not yet part of the
+		 * official SAL spec, so the ITC_DRIFT bit is not
+		 * set by the BIOS on this hardware.
+		 */
+		sal_platform_features |= IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT;
+
+		cyclone_setup();
+	}
+}
+
+static int __init acpi_parse_madt(struct acpi_table_header *table)
+{
+	if (!table)
+		return -EINVAL;
+
+	acpi_madt = (struct acpi_table_madt *)table;
+
+	acpi_madt_rev = acpi_madt->header.revision;
+
+	/* remember the value for reference after free_initmem() */
+#ifdef CONFIG_ITANIUM
+	has_8259 = 1;		/* Firmware on old Itanium systems is broken */
+#else
+	has_8259 = acpi_madt->flags & ACPI_MADT_PCAT_COMPAT;
+#endif
+	iosapic_system_init(has_8259);
+
+	/* Get base address of IPI Message Block */
+
+	if (acpi_madt->address)
+		ipi_base_addr = ioremap(acpi_madt->address, 0);
+
+	printk(KERN_INFO PREFIX "Local APIC address %p\n", ipi_base_addr);
+
+	acpi_madt_oem_check(acpi_madt->header.oem_id,
+			    acpi_madt->header.oem_table_id);
+
+	return 0;
+}
+
+#ifdef CONFIG_ACPI_NUMA
+
+#undef SLIT_DEBUG
+
+#define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32)
+
+static int __initdata srat_num_cpus;	/* number of cpus */
+static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];
+#define pxm_bit_set(bit)	(set_bit(bit,(void *)pxm_flag))
+#define pxm_bit_test(bit)	(test_bit(bit,(void *)pxm_flag))
+static struct acpi_table_slit __initdata *slit_table;
+cpumask_t early_cpu_possible_map = CPU_MASK_NONE;
+
+static int __init
+get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa)
+{
+	int pxm;
+
+	pxm = pa->proximity_domain_lo;
+	if (ia64_platform_is("sn2") || acpi_srat_revision >= 2)
+		pxm += pa->proximity_domain_hi[0] << 8;
+	return pxm;
+}
+
+static int __init
+get_memory_proximity_domain(struct acpi_srat_mem_affinity *ma)
+{
+	int pxm;
+
+	pxm = ma->proximity_domain;
+	if (!ia64_platform_is("sn2") && acpi_srat_revision <= 1)
+		pxm &= 0xff;
+
+	return pxm;
+}
+
+/*
+ * ACPI 2.0 SLIT (System Locality Information Table)
+ * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf
+ */
+void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
+{
+	u32 len;
+
+	len = sizeof(struct acpi_table_header) + 8
+	    + slit->locality_count * slit->locality_count;
+	if (slit->header.length != len) {
+		printk(KERN_ERR
+		       "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n",
+		       len, slit->header.length);
+		return;
+	}
+	slit_table = slit;
+}
+
+void __init
+acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
+{
+	int pxm;
+
+	if (!(pa->flags & ACPI_SRAT_CPU_ENABLED))
+		return;
+
+	if (srat_num_cpus >= ARRAY_SIZE(node_cpuid)) {
+		printk_once(KERN_WARNING
+			    "node_cpuid[%ld] is too small, may not be able to use all cpus\n",
+			    ARRAY_SIZE(node_cpuid));
+		return;
+	}
+	pxm = get_processor_proximity_domain(pa);
+
+	/* record this node in proximity bitmap */
+	pxm_bit_set(pxm);
+
+	node_cpuid[srat_num_cpus].phys_id =
+	    (pa->apic_id << 8) | (pa->local_sapic_eid);
+	/* nid should be overridden as logical node id later */
+	node_cpuid[srat_num_cpus].nid = pxm;
+	cpu_set(srat_num_cpus, early_cpu_possible_map);
+	srat_num_cpus++;
+}
+
+void __init
+acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
+{
+	unsigned long paddr, size;
+	int pxm;
+	struct node_memblk_s *p, *q, *pend;
+
+	pxm = get_memory_proximity_domain(ma);
+
+	/* fill node memory chunk structure */
+	paddr = ma->base_address;
+	size = ma->length;
+
+	/* Ignore disabled entries */
+	if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
+		return;
+
+	/* record this node in proximity bitmap */
+	pxm_bit_set(pxm);
+
+	/* Insertion sort based on base address */
+	pend = &node_memblk[num_node_memblks];
+	for (p = &node_memblk[0]; p < pend; p++) {
+		if (paddr < p->start_paddr)
+			break;
+	}
+	if (p < pend) {
+		for (q = pend - 1; q >= p; q--)
+			*(q + 1) = *q;
+	}
+	p->start_paddr = paddr;
+	p->size = size;
+	p->nid = pxm;
+	num_node_memblks++;
+}
+
+void __init acpi_numa_arch_fixup(void)
+{
+	int i, j, node_from, node_to;
+
+	/* If there's no SRAT, fix the phys_id and mark node 0 online */
+	if (srat_num_cpus == 0) {
+		node_set_online(0);
+		node_cpuid[0].phys_id = hard_smp_processor_id();
+		return;
+	}
+
+	/*
+	 * MCD - This can probably be dropped now.  No need for pxm ID to node ID
+	 * mapping with sparse node numbering iff MAX_PXM_DOMAINS <= MAX_NUMNODES.
+	 */
+	nodes_clear(node_online_map);
+	for (i = 0; i < MAX_PXM_DOMAINS; i++) {
+		if (pxm_bit_test(i)) {
+			int nid = acpi_map_pxm_to_node(i);
+			node_set_online(nid);
+		}
+	}
+
+	/* set logical node id in memory chunk structure */
+	for (i = 0; i < num_node_memblks; i++)
+		node_memblk[i].nid = pxm_to_node(node_memblk[i].nid);
+
+	/* assign memory bank numbers for each chunk on each node */
+	for_each_online_node(i) {
+		int bank;
+
+		bank = 0;
+		for (j = 0; j < num_node_memblks; j++)
+			if (node_memblk[j].nid == i)
+				node_memblk[j].bank = bank++;
+	}
+
+	/* set logical node id in cpu structure */
+	for_each_possible_early_cpu(i)
+		node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid);
+
+	printk(KERN_INFO "Number of logical nodes in system = %d\n",
+	       num_online_nodes());
+	printk(KERN_INFO "Number of memory chunks in system = %d\n",
+	       num_node_memblks);
+
+	if (!slit_table) {
+		for (i = 0; i < MAX_NUMNODES; i++)
+			for (j = 0; j < MAX_NUMNODES; j++)
+				node_distance(i, j) = i == j ? LOCAL_DISTANCE :
+							REMOTE_DISTANCE;
+		return;
+	}
+
+	memset(numa_slit, -1, sizeof(numa_slit));
+	for (i = 0; i < slit_table->locality_count; i++) {
+		if (!pxm_bit_test(i))
+			continue;
+		node_from = pxm_to_node(i);
+		for (j = 0; j < slit_table->locality_count; j++) {
+			if (!pxm_bit_test(j))
+				continue;
+			node_to = pxm_to_node(j);
+			node_distance(node_from, node_to) =
+			    slit_table->entry[i * slit_table->locality_count + j];
+		}
+	}
+
+#ifdef SLIT_DEBUG
+	printk("ACPI 2.0 SLIT locality table:\n");
+	for_each_online_node(i) {
+		for_each_online_node(j)
+		    printk("%03d ", node_distance(i, j));
+		printk("\n");
+	}
+#endif
+}
+#endif				/* CONFIG_ACPI_NUMA */
+
+/*
+ * success: return IRQ number (>=0)
+ * failure: return < 0
+ */
+int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
+{
+	if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM)
+		return gsi;
+
+	if (has_8259 && gsi < 16)
+		return isa_irq_to_vector(gsi);
+
+	return iosapic_register_intr(gsi,
+				     (polarity ==
+				      ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH :
+				     IOSAPIC_POL_LOW,
+				     (triggering ==
+				      ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE :
+				     IOSAPIC_LEVEL);
+}
+
+void acpi_unregister_gsi(u32 gsi)
+{
+	if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM)
+		return;
+
+	if (has_8259 && gsi < 16)
+		return;
+
+	iosapic_unregister_intr(gsi);
+}
+
+static int __init acpi_parse_fadt(struct acpi_table_header *table)
+{
+	struct acpi_table_header *fadt_header;
+	struct acpi_table_fadt *fadt;
+
+	if (!table)
+		return -EINVAL;
+
+	fadt_header = (struct acpi_table_header *)table;
+	if (fadt_header->revision != 3)
+		return -ENODEV;	/* Only deal with ACPI 2.0 FADT */
+
+	fadt = (struct acpi_table_fadt *)fadt_header;
+
+	acpi_register_gsi(NULL, fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE,
+				 ACPI_ACTIVE_LOW);
+	return 0;
+}
+
+int __init early_acpi_boot_init(void)
+{
+	int ret;
+
+	/*
+	 * do a partial walk of MADT to determine how many CPUs
+	 * we have including offline CPUs
+	 */
+	if (acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) {
+		printk(KERN_ERR PREFIX "Can't find MADT\n");
+		return 0;
+	}
+
+	ret = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC,
+		acpi_parse_lsapic, NR_CPUS);
+	if (ret < 1)
+		printk(KERN_ERR PREFIX
+		       "Error parsing MADT - no LAPIC entries\n");
+
+#ifdef CONFIG_SMP
+	if (available_cpus == 0) {
+		printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n");
+		printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id());
+		smp_boot_data.cpu_phys_id[available_cpus] =
+		    hard_smp_processor_id();
+		available_cpus = 1;	/* We've got at least one of these, no? */
+	}
+	smp_boot_data.cpu_count = available_cpus;
+#endif
+	/* Make boot-up look pretty */
+	printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus,
+	       total_cpus);
+
+	return 0;
+}
+
+int __init acpi_boot_init(void)
+{
+
+	/*
+	 * MADT
+	 * ----
+	 * Parse the Multiple APIC Description Table (MADT), if exists.
+	 * Note that this table provides platform SMP configuration
+	 * information -- the successor to MPS tables.
+	 */
+
+	if (acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) {
+		printk(KERN_ERR PREFIX "Can't find MADT\n");
+		goto skip_madt;
+	}
+
+	/* Local APIC */
+
+	if (acpi_table_parse_madt
+	    (ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, acpi_parse_lapic_addr_ovr, 0) < 0)
+		printk(KERN_ERR PREFIX
+		       "Error parsing LAPIC address override entry\n");
+
+	if (acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0)
+	    < 0)
+		printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
+
+	/* I/O APIC */
+
+	if (acpi_table_parse_madt
+	    (ACPI_MADT_TYPE_IO_SAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1) {
+		if (!ia64_platform_is("sn2"))
+			printk(KERN_ERR PREFIX
+			       "Error parsing MADT - no IOSAPIC entries\n");
+	}
+
+	/* System-Level Interrupt Routing */
+
+	if (acpi_table_parse_madt
+	    (ACPI_MADT_TYPE_INTERRUPT_SOURCE, acpi_parse_plat_int_src,
+	     ACPI_MAX_PLATFORM_INTERRUPTS) < 0)
+		printk(KERN_ERR PREFIX
+		       "Error parsing platform interrupt source entry\n");
+
+	if (acpi_table_parse_madt
+	    (ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, 0) < 0)
+		printk(KERN_ERR PREFIX
+		       "Error parsing interrupt source overrides entry\n");
+
+	if (acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src, 0) < 0)
+		printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
+      skip_madt:
+
+	/*
+	 * FADT says whether a legacy keyboard controller is present.
+	 * The FADT also contains an SCI_INT line, by which the system
+	 * gets interrupts such as power and sleep buttons.  If it's not
+	 * on a Legacy interrupt, it needs to be setup.
+	 */
+	if (acpi_table_parse(ACPI_SIG_FADT, acpi_parse_fadt))
+		printk(KERN_ERR PREFIX "Can't find FADT\n");
+
+#ifdef CONFIG_ACPI_NUMA
+#ifdef CONFIG_SMP
+	if (srat_num_cpus == 0) {
+		int cpu, i = 1;
+		for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++)
+			if (smp_boot_data.cpu_phys_id[cpu] !=
+			    hard_smp_processor_id())
+				node_cpuid[i++].phys_id =
+				    smp_boot_data.cpu_phys_id[cpu];
+	}
+#endif
+	build_cpu_to_node_map();
+#endif
+	return 0;
+}
+
+int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
+{
+	int tmp;
+
+	if (has_8259 && gsi < 16)
+		*irq = isa_irq_to_vector(gsi);
+	else {
+		tmp = gsi_to_irq(gsi);
+		if (tmp == -1)
+			return -1;
+		*irq = tmp;
+	}
+	return 0;
+}
+
+int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi)
+{
+	if (isa_irq >= 16)
+		return -1;
+	*gsi = isa_irq;
+	return 0;
+}
+
+/*
+ *  ACPI based hotplug CPU support
+ */
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+static __cpuinit
+int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
+{
+#ifdef CONFIG_ACPI_NUMA
+	int pxm_id;
+	int nid;
+
+	pxm_id = acpi_get_pxm(handle);
+	/*
+	 * We don't have cpu-only-node hotadd. But if the system equips
+	 * SRAT table, pxm is already found and node is ready.
+  	 * So, just pxm_to_nid(pxm) is OK.
+	 * This code here is for the system which doesn't have full SRAT
+  	 * table for possible cpus.
+	 */
+	nid = acpi_map_pxm_to_node(pxm_id);
+	node_cpuid[cpu].phys_id = physid;
+	node_cpuid[cpu].nid = nid;
+#endif
+	return (0);
+}
+
+int additional_cpus __initdata = -1;
+
+static __init int setup_additional_cpus(char *s)
+{
+	if (s)
+		additional_cpus = simple_strtol(s, NULL, 0);
+
+	return 0;
+}
+
+early_param("additional_cpus", setup_additional_cpus);
+
+/*
+ * cpu_possible_map should be static, it cannot change as CPUs
+ * are onlined, or offlined. The reason is per-cpu data-structures
+ * are allocated by some modules at init time, and dont expect to
+ * do this dynamically on cpu arrival/departure.
+ * cpu_present_map on the other hand can change dynamically.
+ * In case when cpu_hotplug is not compiled, then we resort to current
+ * behaviour, which is cpu_possible == cpu_present.
+ * - Ashok Raj
+ *
+ * Three ways to find out the number of additional hotplug CPUs:
+ * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
+ * - The user can overwrite it with additional_cpus=NUM
+ * - Otherwise don't reserve additional CPUs.
+ */
+__init void prefill_possible_map(void)
+{
+	int i;
+	int possible, disabled_cpus;
+
+	disabled_cpus = total_cpus - available_cpus;
+
+ 	if (additional_cpus == -1) {
+ 		if (disabled_cpus > 0)
+			additional_cpus = disabled_cpus;
+ 		else
+			additional_cpus = 0;
+ 	}
+
+	possible = available_cpus + additional_cpus;
+
+	if (possible > nr_cpu_ids)
+		possible = nr_cpu_ids;
+
+	printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
+		possible, max((possible - available_cpus), 0));
+
+	for (i = 0; i < possible; i++)
+		set_cpu_possible(i, true);
+}
+
+static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
+{
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	union acpi_object *obj;
+	struct acpi_madt_local_sapic *lsapic;
+	cpumask_t tmp_map;
+	int cpu, physid;
+
+	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
+		return -EINVAL;
+
+	if (!buffer.length || !buffer.pointer)
+		return -EINVAL;
+
+	obj = buffer.pointer;
+	if (obj->type != ACPI_TYPE_BUFFER)
+	{
+		kfree(buffer.pointer);
+		return -EINVAL;
+	}
+
+	lsapic = (struct acpi_madt_local_sapic *)obj->buffer.pointer;
+
+	if ((lsapic->header.type != ACPI_MADT_TYPE_LOCAL_SAPIC) ||
+	    (!(lsapic->lapic_flags & ACPI_MADT_ENABLED))) {
+		kfree(buffer.pointer);
+		return -EINVAL;
+	}
+
+	physid = ((lsapic->id << 8) | (lsapic->eid));
+
+	kfree(buffer.pointer);
+	buffer.length = ACPI_ALLOCATE_BUFFER;
+	buffer.pointer = NULL;
+
+	cpumask_complement(&tmp_map, cpu_present_mask);
+	cpu = cpumask_first(&tmp_map);
+	if (cpu >= nr_cpu_ids)
+		return -EINVAL;
+
+	acpi_map_cpu2node(handle, cpu, physid);
+
+	cpu_set(cpu, cpu_present_map);
+	ia64_cpu_to_sapicid[cpu] = physid;
+
+	acpi_processor_set_pdc(handle);
+
+	*pcpu = cpu;
+	return (0);
+}
+
+/* wrapper to silence section mismatch warning */
+int __ref acpi_map_lsapic(acpi_handle handle, int *pcpu)
+{
+	return _acpi_map_lsapic(handle, pcpu);
+}
+EXPORT_SYMBOL(acpi_map_lsapic);
+
+int acpi_unmap_lsapic(int cpu)
+{
+	ia64_cpu_to_sapicid[cpu] = -1;
+	cpu_clear(cpu, cpu_present_map);
+
+#ifdef CONFIG_ACPI_NUMA
+	/* NUMA specific cleanup's */
+#endif
+
+	return (0);
+}
+
+EXPORT_SYMBOL(acpi_unmap_lsapic);
+#endif				/* CONFIG_ACPI_HOTPLUG_CPU */
+
+#ifdef CONFIG_ACPI_NUMA
+static acpi_status __devinit
+acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret)
+{
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	union acpi_object *obj;
+	struct acpi_madt_io_sapic *iosapic;
+	unsigned int gsi_base;
+	int pxm, node;
+
+	/* Only care about objects w/ a method that returns the MADT */
+	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
+		return AE_OK;
+
+	if (!buffer.length || !buffer.pointer)
+		return AE_OK;
+
+	obj = buffer.pointer;
+	if (obj->type != ACPI_TYPE_BUFFER ||
+	    obj->buffer.length < sizeof(*iosapic)) {
+		kfree(buffer.pointer);
+		return AE_OK;
+	}
+
+	iosapic = (struct acpi_madt_io_sapic *)obj->buffer.pointer;
+
+	if (iosapic->header.type != ACPI_MADT_TYPE_IO_SAPIC) {
+		kfree(buffer.pointer);
+		return AE_OK;
+	}
+
+	gsi_base = iosapic->global_irq_base;
+
+	kfree(buffer.pointer);
+
+	/*
+	 * OK, it's an IOSAPIC MADT entry, look for a _PXM value to tell
+	 * us which node to associate this with.
+	 */
+	pxm = acpi_get_pxm(handle);
+	if (pxm < 0)
+		return AE_OK;
+
+	node = pxm_to_node(pxm);
+
+	if (node >= MAX_NUMNODES || !node_online(node) ||
+	    cpumask_empty(cpumask_of_node(node)))
+		return AE_OK;
+
+	/* We know a gsi to node mapping! */
+	map_iosapic_to_node(gsi_base, node);
+	return AE_OK;
+}
+
+static int __init
+acpi_map_iosapics (void)
+{
+	acpi_get_devices(NULL, acpi_map_iosapic, NULL, NULL);
+	return 0;
+}
+
+fs_initcall(acpi_map_iosapics);
+#endif				/* CONFIG_ACPI_NUMA */
+
+int __ref acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
+{
+	int err;
+
+	if ((err = iosapic_init(phys_addr, gsi_base)))
+		return err;
+
+#ifdef CONFIG_ACPI_NUMA
+	acpi_map_iosapic(handle, 0, NULL, NULL);
+#endif				/* CONFIG_ACPI_NUMA */
+
+	return 0;
+}
+
+EXPORT_SYMBOL(acpi_register_ioapic);
+
+int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
+{
+	return iosapic_remove(gsi_base);
+}
+
+EXPORT_SYMBOL(acpi_unregister_ioapic);
+
+/*
+ * acpi_suspend_lowlevel() - save kernel state and suspend.
+ *
+ * TBD when when IA64 starts to support suspend...
+ */
+int acpi_suspend_lowlevel(void) { return 0; }
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
new file mode 100644
index 00000000..af565016
--- /dev/null
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -0,0 +1,322 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ */
+
+#define ASM_OFFSETS_C 1
+
+#include <linux/sched.h>
+#include <linux/pid.h>
+#include <linux/clocksource.h>
+#include <linux/kbuild.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/siginfo.h>
+#include <asm/sigcontext.h>
+#include <asm/mca.h>
+
+#include <asm/xen/interface.h>
+#include <asm/xen/hypervisor.h>
+
+#include "../kernel/sigframe.h"
+#include "../kernel/fsyscall_gtod_data.h"
+
+void foo(void)
+{
+	DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct));
+	DEFINE(IA64_THREAD_INFO_SIZE, sizeof (struct thread_info));
+	DEFINE(IA64_PT_REGS_SIZE, sizeof (struct pt_regs));
+	DEFINE(IA64_SWITCH_STACK_SIZE, sizeof (struct switch_stack));
+	DEFINE(IA64_SIGINFO_SIZE, sizeof (struct siginfo));
+	DEFINE(IA64_CPU_SIZE, sizeof (struct cpuinfo_ia64));
+	DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe));
+	DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info));
+
+	BUILD_BUG_ON(sizeof(struct upid) != 32);
+	DEFINE(IA64_UPID_SHIFT, 5);
+
+	BLANK();
+
+	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
+	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
+	DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
+	DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
+	DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
+	DEFINE(TI_AC_UTIME, offsetof(struct thread_info, ac_utime));
+#endif
+
+	BLANK();
+
+	DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
+	DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
+	DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
+	DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid));
+	DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level));
+	DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0]));
+	DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
+	DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
+	DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
+	DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand));
+	DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal));
+	DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid));
+	DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp));
+	DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack));
+
+	BLANK();
+
+	DEFINE(IA64_SIGHAND_SIGLOCK_OFFSET,offsetof (struct sighand_struct, siglock));
+
+	BLANK();
+
+	DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct,
+							     group_stop_count));
+	DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending));
+
+	BLANK();
+
+	DEFINE(IA64_PT_REGS_B6_OFFSET, offsetof (struct pt_regs, b6));
+	DEFINE(IA64_PT_REGS_B7_OFFSET, offsetof (struct pt_regs, b7));
+	DEFINE(IA64_PT_REGS_AR_CSD_OFFSET, offsetof (struct pt_regs, ar_csd));
+	DEFINE(IA64_PT_REGS_AR_SSD_OFFSET, offsetof (struct pt_regs, ar_ssd));
+	DEFINE(IA64_PT_REGS_R8_OFFSET, offsetof (struct pt_regs, r8));
+	DEFINE(IA64_PT_REGS_R9_OFFSET, offsetof (struct pt_regs, r9));
+	DEFINE(IA64_PT_REGS_R10_OFFSET, offsetof (struct pt_regs, r10));
+	DEFINE(IA64_PT_REGS_R11_OFFSET, offsetof (struct pt_regs, r11));
+	DEFINE(IA64_PT_REGS_CR_IPSR_OFFSET, offsetof (struct pt_regs, cr_ipsr));
+	DEFINE(IA64_PT_REGS_CR_IIP_OFFSET, offsetof (struct pt_regs, cr_iip));
+	DEFINE(IA64_PT_REGS_CR_IFS_OFFSET, offsetof (struct pt_regs, cr_ifs));
+	DEFINE(IA64_PT_REGS_AR_UNAT_OFFSET, offsetof (struct pt_regs, ar_unat));
+	DEFINE(IA64_PT_REGS_AR_PFS_OFFSET, offsetof (struct pt_regs, ar_pfs));
+	DEFINE(IA64_PT_REGS_AR_RSC_OFFSET, offsetof (struct pt_regs, ar_rsc));
+	DEFINE(IA64_PT_REGS_AR_RNAT_OFFSET, offsetof (struct pt_regs, ar_rnat));
+
+	DEFINE(IA64_PT_REGS_AR_BSPSTORE_OFFSET, offsetof (struct pt_regs, ar_bspstore));
+	DEFINE(IA64_PT_REGS_PR_OFFSET, offsetof (struct pt_regs, pr));
+	DEFINE(IA64_PT_REGS_B0_OFFSET, offsetof (struct pt_regs, b0));
+	DEFINE(IA64_PT_REGS_LOADRS_OFFSET, offsetof (struct pt_regs, loadrs));
+	DEFINE(IA64_PT_REGS_R1_OFFSET, offsetof (struct pt_regs, r1));
+	DEFINE(IA64_PT_REGS_R12_OFFSET, offsetof (struct pt_regs, r12));
+	DEFINE(IA64_PT_REGS_R13_OFFSET, offsetof (struct pt_regs, r13));
+	DEFINE(IA64_PT_REGS_AR_FPSR_OFFSET, offsetof (struct pt_regs, ar_fpsr));
+	DEFINE(IA64_PT_REGS_R15_OFFSET, offsetof (struct pt_regs, r15));
+	DEFINE(IA64_PT_REGS_R14_OFFSET, offsetof (struct pt_regs, r14));
+	DEFINE(IA64_PT_REGS_R2_OFFSET, offsetof (struct pt_regs, r2));
+	DEFINE(IA64_PT_REGS_R3_OFFSET, offsetof (struct pt_regs, r3));
+	DEFINE(IA64_PT_REGS_R16_OFFSET, offsetof (struct pt_regs, r16));
+	DEFINE(IA64_PT_REGS_R17_OFFSET, offsetof (struct pt_regs, r17));
+	DEFINE(IA64_PT_REGS_R18_OFFSET, offsetof (struct pt_regs, r18));
+	DEFINE(IA64_PT_REGS_R19_OFFSET, offsetof (struct pt_regs, r19));
+	DEFINE(IA64_PT_REGS_R20_OFFSET, offsetof (struct pt_regs, r20));
+	DEFINE(IA64_PT_REGS_R21_OFFSET, offsetof (struct pt_regs, r21));
+	DEFINE(IA64_PT_REGS_R22_OFFSET, offsetof (struct pt_regs, r22));
+	DEFINE(IA64_PT_REGS_R23_OFFSET, offsetof (struct pt_regs, r23));
+	DEFINE(IA64_PT_REGS_R24_OFFSET, offsetof (struct pt_regs, r24));
+	DEFINE(IA64_PT_REGS_R25_OFFSET, offsetof (struct pt_regs, r25));
+	DEFINE(IA64_PT_REGS_R26_OFFSET, offsetof (struct pt_regs, r26));
+	DEFINE(IA64_PT_REGS_R27_OFFSET, offsetof (struct pt_regs, r27));
+	DEFINE(IA64_PT_REGS_R28_OFFSET, offsetof (struct pt_regs, r28));
+	DEFINE(IA64_PT_REGS_R29_OFFSET, offsetof (struct pt_regs, r29));
+	DEFINE(IA64_PT_REGS_R30_OFFSET, offsetof (struct pt_regs, r30));
+	DEFINE(IA64_PT_REGS_R31_OFFSET, offsetof (struct pt_regs, r31));
+	DEFINE(IA64_PT_REGS_AR_CCV_OFFSET, offsetof (struct pt_regs, ar_ccv));
+	DEFINE(IA64_PT_REGS_F6_OFFSET, offsetof (struct pt_regs, f6));
+	DEFINE(IA64_PT_REGS_F7_OFFSET, offsetof (struct pt_regs, f7));
+	DEFINE(IA64_PT_REGS_F8_OFFSET, offsetof (struct pt_regs, f8));
+	DEFINE(IA64_PT_REGS_F9_OFFSET, offsetof (struct pt_regs, f9));
+	DEFINE(IA64_PT_REGS_F10_OFFSET, offsetof (struct pt_regs, f10));
+	DEFINE(IA64_PT_REGS_F11_OFFSET, offsetof (struct pt_regs, f11));
+
+	BLANK();
+
+	DEFINE(IA64_SWITCH_STACK_CALLER_UNAT_OFFSET, offsetof (struct switch_stack, caller_unat));
+	DEFINE(IA64_SWITCH_STACK_AR_FPSR_OFFSET, offsetof (struct switch_stack, ar_fpsr));
+	DEFINE(IA64_SWITCH_STACK_F2_OFFSET, offsetof (struct switch_stack, f2));
+	DEFINE(IA64_SWITCH_STACK_F3_OFFSET, offsetof (struct switch_stack, f3));
+	DEFINE(IA64_SWITCH_STACK_F4_OFFSET, offsetof (struct switch_stack, f4));
+	DEFINE(IA64_SWITCH_STACK_F5_OFFSET, offsetof (struct switch_stack, f5));
+	DEFINE(IA64_SWITCH_STACK_F12_OFFSET, offsetof (struct switch_stack, f12));
+	DEFINE(IA64_SWITCH_STACK_F13_OFFSET, offsetof (struct switch_stack, f13));
+	DEFINE(IA64_SWITCH_STACK_F14_OFFSET, offsetof (struct switch_stack, f14));
+	DEFINE(IA64_SWITCH_STACK_F15_OFFSET, offsetof (struct switch_stack, f15));
+	DEFINE(IA64_SWITCH_STACK_F16_OFFSET, offsetof (struct switch_stack, f16));
+	DEFINE(IA64_SWITCH_STACK_F17_OFFSET, offsetof (struct switch_stack, f17));
+	DEFINE(IA64_SWITCH_STACK_F18_OFFSET, offsetof (struct switch_stack, f18));
+	DEFINE(IA64_SWITCH_STACK_F19_OFFSET, offsetof (struct switch_stack, f19));
+	DEFINE(IA64_SWITCH_STACK_F20_OFFSET, offsetof (struct switch_stack, f20));
+	DEFINE(IA64_SWITCH_STACK_F21_OFFSET, offsetof (struct switch_stack, f21));
+	DEFINE(IA64_SWITCH_STACK_F22_OFFSET, offsetof (struct switch_stack, f22));
+	DEFINE(IA64_SWITCH_STACK_F23_OFFSET, offsetof (struct switch_stack, f23));
+	DEFINE(IA64_SWITCH_STACK_F24_OFFSET, offsetof (struct switch_stack, f24));
+	DEFINE(IA64_SWITCH_STACK_F25_OFFSET, offsetof (struct switch_stack, f25));
+	DEFINE(IA64_SWITCH_STACK_F26_OFFSET, offsetof (struct switch_stack, f26));
+	DEFINE(IA64_SWITCH_STACK_F27_OFFSET, offsetof (struct switch_stack, f27));
+	DEFINE(IA64_SWITCH_STACK_F28_OFFSET, offsetof (struct switch_stack, f28));
+	DEFINE(IA64_SWITCH_STACK_F29_OFFSET, offsetof (struct switch_stack, f29));
+	DEFINE(IA64_SWITCH_STACK_F30_OFFSET, offsetof (struct switch_stack, f30));
+	DEFINE(IA64_SWITCH_STACK_F31_OFFSET, offsetof (struct switch_stack, f31));
+	DEFINE(IA64_SWITCH_STACK_R4_OFFSET, offsetof (struct switch_stack, r4));
+	DEFINE(IA64_SWITCH_STACK_R5_OFFSET, offsetof (struct switch_stack, r5));
+	DEFINE(IA64_SWITCH_STACK_R6_OFFSET, offsetof (struct switch_stack, r6));
+	DEFINE(IA64_SWITCH_STACK_R7_OFFSET, offsetof (struct switch_stack, r7));
+	DEFINE(IA64_SWITCH_STACK_B0_OFFSET, offsetof (struct switch_stack, b0));
+	DEFINE(IA64_SWITCH_STACK_B1_OFFSET, offsetof (struct switch_stack, b1));
+	DEFINE(IA64_SWITCH_STACK_B2_OFFSET, offsetof (struct switch_stack, b2));
+	DEFINE(IA64_SWITCH_STACK_B3_OFFSET, offsetof (struct switch_stack, b3));
+	DEFINE(IA64_SWITCH_STACK_B4_OFFSET, offsetof (struct switch_stack, b4));
+	DEFINE(IA64_SWITCH_STACK_B5_OFFSET, offsetof (struct switch_stack, b5));
+	DEFINE(IA64_SWITCH_STACK_AR_PFS_OFFSET, offsetof (struct switch_stack, ar_pfs));
+	DEFINE(IA64_SWITCH_STACK_AR_LC_OFFSET, offsetof (struct switch_stack, ar_lc));
+	DEFINE(IA64_SWITCH_STACK_AR_UNAT_OFFSET, offsetof (struct switch_stack, ar_unat));
+	DEFINE(IA64_SWITCH_STACK_AR_RNAT_OFFSET, offsetof (struct switch_stack, ar_rnat));
+	DEFINE(IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET, offsetof (struct switch_stack, ar_bspstore));
+	DEFINE(IA64_SWITCH_STACK_PR_OFFSET, offsetof (struct switch_stack, pr));
+
+	BLANK();
+
+	DEFINE(IA64_SIGCONTEXT_IP_OFFSET, offsetof (struct sigcontext, sc_ip));
+	DEFINE(IA64_SIGCONTEXT_AR_BSP_OFFSET, offsetof (struct sigcontext, sc_ar_bsp));
+	DEFINE(IA64_SIGCONTEXT_AR_FPSR_OFFSET, offsetof (struct sigcontext, sc_ar_fpsr));
+	DEFINE(IA64_SIGCONTEXT_AR_RNAT_OFFSET, offsetof (struct sigcontext, sc_ar_rnat));
+	DEFINE(IA64_SIGCONTEXT_AR_UNAT_OFFSET, offsetof (struct sigcontext, sc_ar_unat));
+	DEFINE(IA64_SIGCONTEXT_B0_OFFSET, offsetof (struct sigcontext, sc_br[0]));
+	DEFINE(IA64_SIGCONTEXT_CFM_OFFSET, offsetof (struct sigcontext, sc_cfm));
+	DEFINE(IA64_SIGCONTEXT_FLAGS_OFFSET, offsetof (struct sigcontext, sc_flags));
+	DEFINE(IA64_SIGCONTEXT_FR6_OFFSET, offsetof (struct sigcontext, sc_fr[6]));
+	DEFINE(IA64_SIGCONTEXT_PR_OFFSET, offsetof (struct sigcontext, sc_pr));
+	DEFINE(IA64_SIGCONTEXT_R12_OFFSET, offsetof (struct sigcontext, sc_gr[12]));
+	DEFINE(IA64_SIGCONTEXT_RBS_BASE_OFFSET,offsetof (struct sigcontext, sc_rbs_base));
+	DEFINE(IA64_SIGCONTEXT_LOADRS_OFFSET, offsetof (struct sigcontext, sc_loadrs));
+
+	BLANK();
+
+	DEFINE(IA64_SIGPENDING_SIGNAL_OFFSET, offsetof (struct sigpending, signal));
+
+	BLANK();
+
+	DEFINE(IA64_SIGFRAME_ARG0_OFFSET, offsetof (struct sigframe, arg0));
+	DEFINE(IA64_SIGFRAME_ARG1_OFFSET, offsetof (struct sigframe, arg1));
+	DEFINE(IA64_SIGFRAME_ARG2_OFFSET, offsetof (struct sigframe, arg2));
+	DEFINE(IA64_SIGFRAME_HANDLER_OFFSET, offsetof (struct sigframe, handler));
+	DEFINE(IA64_SIGFRAME_SIGCONTEXT_OFFSET, offsetof (struct sigframe, sc));
+	BLANK();
+    /* for assembly files which can't include sched.h: */
+	DEFINE(IA64_CLONE_VFORK, CLONE_VFORK);
+	DEFINE(IA64_CLONE_VM, CLONE_VM);
+
+	BLANK();
+	DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET,
+	       offsetof (struct cpuinfo_ia64, nsec_per_cyc));
+	DEFINE(IA64_CPUINFO_PTCE_BASE_OFFSET,
+	       offsetof (struct cpuinfo_ia64, ptce_base));
+	DEFINE(IA64_CPUINFO_PTCE_COUNT_OFFSET,
+	       offsetof (struct cpuinfo_ia64, ptce_count));
+	DEFINE(IA64_CPUINFO_PTCE_STRIDE_OFFSET,
+	       offsetof (struct cpuinfo_ia64, ptce_stride));
+	BLANK();
+	DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET,
+	       offsetof (struct timespec, tv_nsec));
+
+	DEFINE(CLONE_SETTLS_BIT, 19);
+#if CLONE_SETTLS != (1<<19)
+# error "CLONE_SETTLS_BIT incorrect, please fix"
+#endif
+
+	BLANK();
+	DEFINE(IA64_MCA_CPU_MCA_STACK_OFFSET,
+	       offsetof (struct ia64_mca_cpu, mca_stack));
+	DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET,
+	       offsetof (struct ia64_mca_cpu, init_stack));
+	BLANK();
+	DEFINE(IA64_SAL_OS_STATE_OS_GP_OFFSET,
+	       offsetof (struct ia64_sal_os_state, os_gp));
+	DEFINE(IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET,
+	       offsetof (struct ia64_sal_os_state, proc_state_param));
+	DEFINE(IA64_SAL_OS_STATE_SAL_RA_OFFSET,
+	       offsetof (struct ia64_sal_os_state, sal_ra));
+	DEFINE(IA64_SAL_OS_STATE_SAL_GP_OFFSET,
+	       offsetof (struct ia64_sal_os_state, sal_gp));
+	DEFINE(IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET,
+	       offsetof (struct ia64_sal_os_state, pal_min_state));
+	DEFINE(IA64_SAL_OS_STATE_OS_STATUS_OFFSET,
+	       offsetof (struct ia64_sal_os_state, os_status));
+	DEFINE(IA64_SAL_OS_STATE_CONTEXT_OFFSET,
+	       offsetof (struct ia64_sal_os_state, context));
+	DEFINE(IA64_SAL_OS_STATE_SIZE,
+	       sizeof (struct ia64_sal_os_state));
+	BLANK();
+
+	DEFINE(IA64_PMSA_GR_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_gr));
+	DEFINE(IA64_PMSA_BANK1_GR_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_bank1_gr));
+	DEFINE(IA64_PMSA_PR_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_pr));
+	DEFINE(IA64_PMSA_BR0_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_br0));
+	DEFINE(IA64_PMSA_RSC_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_rsc));
+	DEFINE(IA64_PMSA_IIP_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_iip));
+	DEFINE(IA64_PMSA_IPSR_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_ipsr));
+	DEFINE(IA64_PMSA_IFS_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_ifs));
+	DEFINE(IA64_PMSA_XIP_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_xip));
+	BLANK();
+
+	/* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
+	DEFINE(IA64_GTOD_LOCK_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, lock));
+	DEFINE(IA64_GTOD_WALL_TIME_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, wall_time));
+	DEFINE(IA64_GTOD_MONO_TIME_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, monotonic_time));
+	DEFINE(IA64_CLKSRC_MASK_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, clk_mask));
+	DEFINE(IA64_CLKSRC_MULT_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, clk_mult));
+	DEFINE(IA64_CLKSRC_SHIFT_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, clk_shift));
+	DEFINE(IA64_CLKSRC_MMIO_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, clk_fsys_mmio));
+	DEFINE(IA64_CLKSRC_CYCLE_LAST_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, clk_cycle_last));
+	DEFINE(IA64_ITC_JITTER_OFFSET,
+		offsetof (struct itc_jitter_data_t, itc_jitter));
+	DEFINE(IA64_ITC_LASTCYCLE_OFFSET,
+		offsetof (struct itc_jitter_data_t, itc_lastcycle));
+
+#ifdef CONFIG_XEN
+	BLANK();
+
+	DEFINE(XEN_NATIVE_ASM, XEN_NATIVE);
+	DEFINE(XEN_PV_DOMAIN_ASM, XEN_PV_DOMAIN);
+
+#define DEFINE_MAPPED_REG_OFS(sym, field) \
+	DEFINE(sym, (XMAPPEDREGS_OFS + offsetof(struct mapped_regs, field)))
+
+	DEFINE_MAPPED_REG_OFS(XSI_PSR_I_ADDR_OFS, interrupt_mask_addr);
+	DEFINE_MAPPED_REG_OFS(XSI_IPSR_OFS, ipsr);
+	DEFINE_MAPPED_REG_OFS(XSI_IIP_OFS, iip);
+	DEFINE_MAPPED_REG_OFS(XSI_IFS_OFS, ifs);
+	DEFINE_MAPPED_REG_OFS(XSI_PRECOVER_IFS_OFS, precover_ifs);
+	DEFINE_MAPPED_REG_OFS(XSI_ISR_OFS, isr);
+	DEFINE_MAPPED_REG_OFS(XSI_IFA_OFS, ifa);
+	DEFINE_MAPPED_REG_OFS(XSI_IIPA_OFS, iipa);
+	DEFINE_MAPPED_REG_OFS(XSI_IIM_OFS, iim);
+	DEFINE_MAPPED_REG_OFS(XSI_IHA_OFS, iha);
+	DEFINE_MAPPED_REG_OFS(XSI_ITIR_OFS, itir);
+	DEFINE_MAPPED_REG_OFS(XSI_PSR_IC_OFS, interrupt_collection_enabled);
+	DEFINE_MAPPED_REG_OFS(XSI_BANKNUM_OFS, banknum);
+	DEFINE_MAPPED_REG_OFS(XSI_BANK0_R16_OFS, bank0_regs[0]);
+	DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]);
+	DEFINE_MAPPED_REG_OFS(XSI_B0NATS_OFS, vbnat);
+	DEFINE_MAPPED_REG_OFS(XSI_B1NATS_OFS, vnat);
+	DEFINE_MAPPED_REG_OFS(XSI_ITC_OFFSET_OFS, itc_offset);
+	DEFINE_MAPPED_REG_OFS(XSI_ITC_LAST_OFS, itc_last);
+#endif /* CONFIG_XEN */
+}
diff --git a/arch/ia64/kernel/audit.c b/arch/ia64/kernel/audit.c
new file mode 100644
index 00000000..96a9d18f
--- /dev/null
+++ b/arch/ia64/kernel/audit.c
@@ -0,0 +1,60 @@
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/audit.h>
+#include <asm/unistd.h>
+
+static unsigned dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+static unsigned read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+static unsigned write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+static unsigned chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+static unsigned signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int audit_classify_arch(int arch)
+{
+	return 0;
+}
+
+int audit_classify_syscall(int abi, unsigned syscall)
+{
+	switch(syscall) {
+	case __NR_open:
+		return 2;
+	case __NR_openat:
+		return 3;
+	case __NR_execve:
+		return 5;
+	default:
+		return 0;
+	}
+}
+
+static int __init audit_classes_init(void)
+{
+	audit_register_class(AUDIT_CLASS_WRITE, write_class);
+	audit_register_class(AUDIT_CLASS_READ, read_class);
+	audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
+	audit_register_class(AUDIT_CLASS_CHATTR, chattr_class);
+	audit_register_class(AUDIT_CLASS_SIGNAL, signal_class);
+	return 0;
+}
+
+__initcall(audit_classes_init);
diff --git a/arch/ia64/kernel/brl_emu.c b/arch/ia64/kernel/brl_emu.c
new file mode 100644
index 00000000..0b286ca1
--- /dev/null
+++ b/arch/ia64/kernel/brl_emu.c
@@ -0,0 +1,234 @@
+/*
+ *  Emulation of the "brl" instruction for IA64 processors that
+ *  don't support it in hardware.
+ *  Author: Stephan Zeisset, Intel Corp. <Stephan.Zeisset@intel.com>
+ *
+ *    02/22/02	D. Mosberger	Clear si_flgs, si_isr, and si_imm to avoid
+ *				leaking kernel bits.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+
+extern char ia64_set_b1, ia64_set_b2, ia64_set_b3, ia64_set_b4, ia64_set_b5;
+
+struct illegal_op_return {
+	unsigned long fkt, arg1, arg2, arg3;
+};
+
+/*
+ *  The unimplemented bits of a virtual address must be set
+ *  to the value of the most significant implemented bit.
+ *  unimpl_va_mask includes all unimplemented bits and
+ *  the most significant implemented bit, so the result
+ *  of an and operation with the mask must be all 0's
+ *  or all 1's for the address to be valid.
+ */
+#define unimplemented_virtual_address(va) (						\
+	((va) & local_cpu_data->unimpl_va_mask) != 0 &&					\
+	((va) & local_cpu_data->unimpl_va_mask) != local_cpu_data->unimpl_va_mask	\
+)
+
+/*
+ *  The unimplemented bits of a physical address must be 0.
+ *  unimpl_pa_mask includes all unimplemented bits, so the result
+ *  of an and operation with the mask must be all 0's for the
+ *  address to be valid.
+ */
+#define unimplemented_physical_address(pa) (		\
+	((pa) & local_cpu_data->unimpl_pa_mask) != 0	\
+)
+
+/*
+ *  Handle an illegal operation fault that was caused by an
+ *  unimplemented "brl" instruction.
+ *  If we are not successful (e.g because the illegal operation
+ *  wasn't caused by a "brl" after all), we return -1.
+ *  If we are successful, we return either 0 or the address
+ *  of a "fixup" function for manipulating preserved register
+ *  state.
+ */
+
+struct illegal_op_return
+ia64_emulate_brl (struct pt_regs *regs, unsigned long ar_ec)
+{
+	unsigned long bundle[2];
+	unsigned long opcode, btype, qp, offset, cpl;
+	unsigned long next_ip;
+	struct siginfo siginfo;
+	struct illegal_op_return rv;
+	long tmp_taken, unimplemented_address;
+
+	rv.fkt = (unsigned long) -1;
+
+	/*
+	 *  Decode the instruction bundle.
+	 */
+
+	if (copy_from_user(bundle, (void *) (regs->cr_iip), sizeof(bundle)))
+		return rv;
+
+	next_ip = (unsigned long) regs->cr_iip + 16;
+
+	/* "brl" must be in slot 2. */
+	if (ia64_psr(regs)->ri != 1) return rv;
+
+	/* Must be "mlx" template */
+	if ((bundle[0] & 0x1e) != 0x4) return rv;
+
+	opcode = (bundle[1] >> 60);
+	btype = ((bundle[1] >> 29) & 0x7);
+	qp = ((bundle[1] >> 23) & 0x3f);
+	offset = ((bundle[1] & 0x0800000000000000L) << 4)
+		| ((bundle[1] & 0x00fffff000000000L) >> 32)
+		| ((bundle[1] & 0x00000000007fffffL) << 40)
+		| ((bundle[0] & 0xffff000000000000L) >> 24);
+
+	tmp_taken = regs->pr & (1L << qp);
+
+	switch(opcode) {
+
+		case 0xC:
+			/*
+			 *  Long Branch.
+			 */
+			if (btype != 0) return rv;
+			rv.fkt = 0;
+			if (!(tmp_taken)) {
+				/*
+				 *  Qualifying predicate is 0.
+				 *  Skip instruction.
+				 */
+				regs->cr_iip = next_ip;
+				ia64_psr(regs)->ri = 0;
+				return rv;
+			}
+			break;
+
+		case 0xD:
+			/*
+			 *  Long Call.
+			 */
+			rv.fkt = 0;
+			if (!(tmp_taken)) {
+				/*
+				 *  Qualifying predicate is 0.
+				 *  Skip instruction.
+				 */
+				regs->cr_iip = next_ip;
+				ia64_psr(regs)->ri = 0;
+				return rv;
+			}
+
+			/*
+			 *  BR[btype] = IP+16
+			 */
+			switch(btype) {
+				case 0:
+					regs->b0 = next_ip;
+					break;
+				case 1:
+					rv.fkt = (unsigned long) &ia64_set_b1;
+					break;
+				case 2:
+					rv.fkt = (unsigned long) &ia64_set_b2;
+					break;
+				case 3:
+					rv.fkt = (unsigned long) &ia64_set_b3;
+					break;
+				case 4:
+					rv.fkt = (unsigned long) &ia64_set_b4;
+					break;
+				case 5:
+					rv.fkt = (unsigned long) &ia64_set_b5;
+					break;
+				case 6:
+					regs->b6 = next_ip;
+					break;
+				case 7:
+					regs->b7 = next_ip;
+					break;
+			}
+			rv.arg1 = next_ip;
+
+			/*
+			 *  AR[PFS].pfm = CFM
+			 *  AR[PFS].pec = AR[EC]
+			 *  AR[PFS].ppl = PSR.cpl
+			 */
+			cpl = ia64_psr(regs)->cpl;
+			regs->ar_pfs = ((regs->cr_ifs & 0x3fffffffff)
+					| (ar_ec << 52) | (cpl << 62));
+
+			/*
+			 *  CFM.sof -= CFM.sol
+			 *  CFM.sol = 0
+			 *  CFM.sor = 0
+			 *  CFM.rrb.gr = 0
+			 *  CFM.rrb.fr = 0
+			 *  CFM.rrb.pr = 0
+			 */
+			regs->cr_ifs = ((regs->cr_ifs & 0xffffffc00000007f)
+					- ((regs->cr_ifs >> 7) & 0x7f));
+
+			break;
+
+		default:
+			/*
+			 *  Unknown opcode.
+			 */
+			return rv;
+
+	}
+
+	regs->cr_iip += offset;
+	ia64_psr(regs)->ri = 0;
+
+	if (ia64_psr(regs)->it == 0)
+		unimplemented_address = unimplemented_physical_address(regs->cr_iip);
+	else
+		unimplemented_address = unimplemented_virtual_address(regs->cr_iip);
+
+	if (unimplemented_address) {
+		/*
+		 *  The target address contains unimplemented bits.
+		 */
+		printk(KERN_DEBUG "Woah! Unimplemented Instruction Address Trap!\n");
+		siginfo.si_signo = SIGILL;
+		siginfo.si_errno = 0;
+		siginfo.si_flags = 0;
+		siginfo.si_isr = 0;
+		siginfo.si_imm = 0;
+		siginfo.si_code = ILL_BADIADDR;
+		force_sig_info(SIGILL, &siginfo, current);
+	} else if (ia64_psr(regs)->tb) {
+		/*
+		 *  Branch Tracing is enabled.
+		 *  Force a taken branch signal.
+		 */
+		siginfo.si_signo = SIGTRAP;
+		siginfo.si_errno = 0;
+		siginfo.si_code = TRAP_BRANCH;
+		siginfo.si_flags = 0;
+		siginfo.si_isr = 0;
+		siginfo.si_addr = 0;
+		siginfo.si_imm = 0;
+		force_sig_info(SIGTRAP, &siginfo, current);
+	} else if (ia64_psr(regs)->ss) {
+		/*
+		 *  Single Step is enabled.
+		 *  Force a trace signal.
+		 */
+		siginfo.si_signo = SIGTRAP;
+		siginfo.si_errno = 0;
+		siginfo.si_code = TRAP_TRACE;
+		siginfo.si_flags = 0;
+		siginfo.si_isr = 0;
+		siginfo.si_addr = 0;
+		siginfo.si_imm = 0;
+		force_sig_info(SIGTRAP, &siginfo, current);
+	}
+	return rv;
+}
diff --git a/arch/ia64/kernel/cpufreq/Kconfig b/arch/ia64/kernel/cpufreq/Kconfig
new file mode 100644
index 00000000..2d9d5279
--- /dev/null
+++ b/arch/ia64/kernel/cpufreq/Kconfig
@@ -0,0 +1,29 @@
+
+#
+# CPU Frequency scaling
+#
+
+menu "CPU Frequency scaling"
+
+source "drivers/cpufreq/Kconfig"
+
+if CPU_FREQ
+
+comment "CPUFreq processor drivers"
+
+config IA64_ACPI_CPUFREQ
+	tristate "ACPI Processor P-States driver"
+	select CPU_FREQ_TABLE
+	depends on ACPI_PROCESSOR
+	help
+	This driver adds a CPUFreq driver which utilizes the ACPI
+	Processor Performance States.
+
+	For details, take a look at <file:Documentation/cpu-freq/>.
+
+	If in doubt, say N.
+
+endif   # CPU_FREQ
+
+endmenu
+
diff --git a/arch/ia64/kernel/cpufreq/Makefile b/arch/ia64/kernel/cpufreq/Makefile
new file mode 100644
index 00000000..4838f2a5
--- /dev/null
+++ b/arch/ia64/kernel/cpufreq/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_IA64_ACPI_CPUFREQ)		+= acpi-cpufreq.o
+
diff --git a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
new file mode 100644
index 00000000..f09b1742
--- /dev/null
+++ b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
@@ -0,0 +1,437 @@
+/*
+ * arch/ia64/kernel/cpufreq/acpi-cpufreq.c
+ * This file provides the ACPI based P-state support. This
+ * module works with generic cpufreq infrastructure. Most of
+ * the code is based on i386 version
+ * (arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c)
+ *
+ * Copyright (C) 2005 Intel Corp
+ *      Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/pal.h>
+
+#include <linux/acpi.h>
+#include <acpi/processor.h>
+
+MODULE_AUTHOR("Venkatesh Pallipadi");
+MODULE_DESCRIPTION("ACPI Processor P-States Driver");
+MODULE_LICENSE("GPL");
+
+
+struct cpufreq_acpi_io {
+	struct acpi_processor_performance	acpi_data;
+	struct cpufreq_frequency_table		*freq_table;
+	unsigned int				resume;
+};
+
+static struct cpufreq_acpi_io	*acpi_io_data[NR_CPUS];
+
+static struct cpufreq_driver acpi_cpufreq_driver;
+
+
+static int
+processor_set_pstate (
+	u32	value)
+{
+	s64 retval;
+
+	pr_debug("processor_set_pstate\n");
+
+	retval = ia64_pal_set_pstate((u64)value);
+
+	if (retval) {
+		pr_debug("Failed to set freq to 0x%x, with error 0x%lx\n",
+		        value, retval);
+		return -ENODEV;
+	}
+	return (int)retval;
+}
+
+
+static int
+processor_get_pstate (
+	u32	*value)
+{
+	u64	pstate_index = 0;
+	s64 	retval;
+
+	pr_debug("processor_get_pstate\n");
+
+	retval = ia64_pal_get_pstate(&pstate_index,
+	                             PAL_GET_PSTATE_TYPE_INSTANT);
+	*value = (u32) pstate_index;
+
+	if (retval)
+		pr_debug("Failed to get current freq with "
+			"error 0x%lx, idx 0x%x\n", retval, *value);
+
+	return (int)retval;
+}
+
+
+/* To be used only after data->acpi_data is initialized */
+static unsigned
+extract_clock (
+	struct cpufreq_acpi_io *data,
+	unsigned value,
+	unsigned int cpu)
+{
+	unsigned long i;
+
+	pr_debug("extract_clock\n");
+
+	for (i = 0; i < data->acpi_data.state_count; i++) {
+		if (value == data->acpi_data.states[i].status)
+			return data->acpi_data.states[i].core_frequency;
+	}
+	return data->acpi_data.states[i-1].core_frequency;
+}
+
+
+static unsigned int
+processor_get_freq (
+	struct cpufreq_acpi_io	*data,
+	unsigned int		cpu)
+{
+	int			ret = 0;
+	u32			value = 0;
+	cpumask_t		saved_mask;
+	unsigned long 		clock_freq;
+
+	pr_debug("processor_get_freq\n");
+
+	saved_mask = current->cpus_allowed;
+	set_cpus_allowed_ptr(current, cpumask_of(cpu));
+	if (smp_processor_id() != cpu)
+		goto migrate_end;
+
+	/* processor_get_pstate gets the instantaneous frequency */
+	ret = processor_get_pstate(&value);
+
+	if (ret) {
+		set_cpus_allowed_ptr(current, &saved_mask);
+		printk(KERN_WARNING "get performance failed with error %d\n",
+		       ret);
+		ret = 0;
+		goto migrate_end;
+	}
+	clock_freq = extract_clock(data, value, cpu);
+	ret = (clock_freq*1000);
+
+migrate_end:
+	set_cpus_allowed_ptr(current, &saved_mask);
+	return ret;
+}
+
+
+static int
+processor_set_freq (
+	struct cpufreq_acpi_io	*data,
+	unsigned int		cpu,
+	int			state)
+{
+	int			ret = 0;
+	u32			value = 0;
+	struct cpufreq_freqs    cpufreq_freqs;
+	cpumask_t		saved_mask;
+	int			retval;
+
+	pr_debug("processor_set_freq\n");
+
+	saved_mask = current->cpus_allowed;
+	set_cpus_allowed_ptr(current, cpumask_of(cpu));
+	if (smp_processor_id() != cpu) {
+		retval = -EAGAIN;
+		goto migrate_end;
+	}
+
+	if (state == data->acpi_data.state) {
+		if (unlikely(data->resume)) {
+			pr_debug("Called after resume, resetting to P%d\n", state);
+			data->resume = 0;
+		} else {
+			pr_debug("Already at target state (P%d)\n", state);
+			retval = 0;
+			goto migrate_end;
+		}
+	}
+
+	pr_debug("Transitioning from P%d to P%d\n",
+		data->acpi_data.state, state);
+
+	/* cpufreq frequency struct */
+	cpufreq_freqs.cpu = cpu;
+	cpufreq_freqs.old = data->freq_table[data->acpi_data.state].frequency;
+	cpufreq_freqs.new = data->freq_table[state].frequency;
+
+	/* notify cpufreq */
+	cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE);
+
+	/*
+	 * First we write the target state's 'control' value to the
+	 * control_register.
+	 */
+
+	value = (u32) data->acpi_data.states[state].control;
+
+	pr_debug("Transitioning to state: 0x%08x\n", value);
+
+	ret = processor_set_pstate(value);
+	if (ret) {
+		unsigned int tmp = cpufreq_freqs.new;
+		cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE);
+		cpufreq_freqs.new = cpufreq_freqs.old;
+		cpufreq_freqs.old = tmp;
+		cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE);
+		cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE);
+		printk(KERN_WARNING "Transition failed with error %d\n", ret);
+		retval = -ENODEV;
+		goto migrate_end;
+	}
+
+	cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE);
+
+	data->acpi_data.state = state;
+
+	retval = 0;
+
+migrate_end:
+	set_cpus_allowed_ptr(current, &saved_mask);
+	return (retval);
+}
+
+
+static unsigned int
+acpi_cpufreq_get (
+	unsigned int		cpu)
+{
+	struct cpufreq_acpi_io *data = acpi_io_data[cpu];
+
+	pr_debug("acpi_cpufreq_get\n");
+
+	return processor_get_freq(data, cpu);
+}
+
+
+static int
+acpi_cpufreq_target (
+	struct cpufreq_policy   *policy,
+	unsigned int target_freq,
+	unsigned int relation)
+{
+	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
+	unsigned int next_state = 0;
+	unsigned int result = 0;
+
+	pr_debug("acpi_cpufreq_setpolicy\n");
+
+	result = cpufreq_frequency_table_target(policy,
+			data->freq_table, target_freq, relation, &next_state);
+	if (result)
+		return (result);
+
+	result = processor_set_freq(data, policy->cpu, next_state);
+
+	return (result);
+}
+
+
+static int
+acpi_cpufreq_verify (
+	struct cpufreq_policy   *policy)
+{
+	unsigned int result = 0;
+	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
+
+	pr_debug("acpi_cpufreq_verify\n");
+
+	result = cpufreq_frequency_table_verify(policy,
+			data->freq_table);
+
+	return (result);
+}
+
+
+static int
+acpi_cpufreq_cpu_init (
+	struct cpufreq_policy   *policy)
+{
+	unsigned int		i;
+	unsigned int		cpu = policy->cpu;
+	struct cpufreq_acpi_io	*data;
+	unsigned int		result = 0;
+
+	pr_debug("acpi_cpufreq_cpu_init\n");
+
+	data = kzalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL);
+	if (!data)
+		return (-ENOMEM);
+
+	acpi_io_data[cpu] = data;
+
+	result = acpi_processor_register_performance(&data->acpi_data, cpu);
+
+	if (result)
+		goto err_free;
+
+	/* capability check */
+	if (data->acpi_data.state_count <= 1) {
+		pr_debug("No P-States\n");
+		result = -ENODEV;
+		goto err_unreg;
+	}
+
+	if ((data->acpi_data.control_register.space_id !=
+					ACPI_ADR_SPACE_FIXED_HARDWARE) ||
+	    (data->acpi_data.status_register.space_id !=
+					ACPI_ADR_SPACE_FIXED_HARDWARE)) {
+		pr_debug("Unsupported address space [%d, %d]\n",
+			(u32) (data->acpi_data.control_register.space_id),
+			(u32) (data->acpi_data.status_register.space_id));
+		result = -ENODEV;
+		goto err_unreg;
+	}
+
+	/* alloc freq_table */
+	data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) *
+	                           (data->acpi_data.state_count + 1),
+	                           GFP_KERNEL);
+	if (!data->freq_table) {
+		result = -ENOMEM;
+		goto err_unreg;
+	}
+
+	/* detect transition latency */
+	policy->cpuinfo.transition_latency = 0;
+	for (i=0; i<data->acpi_data.state_count; i++) {
+		if ((data->acpi_data.states[i].transition_latency * 1000) >
+		    policy->cpuinfo.transition_latency) {
+			policy->cpuinfo.transition_latency =
+			    data->acpi_data.states[i].transition_latency * 1000;
+		}
+	}
+	policy->cur = processor_get_freq(data, policy->cpu);
+
+	/* table init */
+	for (i = 0; i <= data->acpi_data.state_count; i++)
+	{
+		data->freq_table[i].index = i;
+		if (i < data->acpi_data.state_count) {
+			data->freq_table[i].frequency =
+			      data->acpi_data.states[i].core_frequency * 1000;
+		} else {
+			data->freq_table[i].frequency = CPUFREQ_TABLE_END;
+		}
+	}
+
+	result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
+	if (result) {
+		goto err_freqfree;
+	}
+
+	/* notify BIOS that we exist */
+	acpi_processor_notify_smm(THIS_MODULE);
+
+	printk(KERN_INFO "acpi-cpufreq: CPU%u - ACPI performance management "
+	       "activated.\n", cpu);
+
+	for (i = 0; i < data->acpi_data.state_count; i++)
+		pr_debug("     %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n",
+			(i == data->acpi_data.state?'*':' '), i,
+			(u32) data->acpi_data.states[i].core_frequency,
+			(u32) data->acpi_data.states[i].power,
+			(u32) data->acpi_data.states[i].transition_latency,
+			(u32) data->acpi_data.states[i].bus_master_latency,
+			(u32) data->acpi_data.states[i].status,
+			(u32) data->acpi_data.states[i].control);
+
+	cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu);
+
+	/* the first call to ->target() should result in us actually
+	 * writing something to the appropriate registers. */
+	data->resume = 1;
+
+	return (result);
+
+ err_freqfree:
+	kfree(data->freq_table);
+ err_unreg:
+	acpi_processor_unregister_performance(&data->acpi_data, cpu);
+ err_free:
+	kfree(data);
+	acpi_io_data[cpu] = NULL;
+
+	return (result);
+}
+
+
+static int
+acpi_cpufreq_cpu_exit (
+	struct cpufreq_policy   *policy)
+{
+	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
+
+	pr_debug("acpi_cpufreq_cpu_exit\n");
+
+	if (data) {
+		cpufreq_frequency_table_put_attr(policy->cpu);
+		acpi_io_data[policy->cpu] = NULL;
+		acpi_processor_unregister_performance(&data->acpi_data,
+		                                      policy->cpu);
+		kfree(data);
+	}
+
+	return (0);
+}
+
+
+static struct freq_attr* acpi_cpufreq_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+
+static struct cpufreq_driver acpi_cpufreq_driver = {
+	.verify 	= acpi_cpufreq_verify,
+	.target 	= acpi_cpufreq_target,
+	.get 		= acpi_cpufreq_get,
+	.init		= acpi_cpufreq_cpu_init,
+	.exit		= acpi_cpufreq_cpu_exit,
+	.name		= "acpi-cpufreq",
+	.owner		= THIS_MODULE,
+	.attr           = acpi_cpufreq_attr,
+};
+
+
+static int __init
+acpi_cpufreq_init (void)
+{
+	pr_debug("acpi_cpufreq_init\n");
+
+ 	return cpufreq_register_driver(&acpi_cpufreq_driver);
+}
+
+
+static void __exit
+acpi_cpufreq_exit (void)
+{
+	pr_debug("acpi_cpufreq_exit\n");
+
+	cpufreq_unregister_driver(&acpi_cpufreq_driver);
+	return;
+}
+
+
+late_initcall(acpi_cpufreq_init);
+module_exit(acpi_cpufreq_exit);
+
diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
new file mode 100644
index 00000000..b942f403
--- /dev/null
+++ b/arch/ia64/kernel/crash.c
@@ -0,0 +1,286 @@
+/*
+ * arch/ia64/kernel/crash.c
+ *
+ * Architecture specific (ia64) functions for kexec based crash dumps.
+ *
+ * Created by: Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright (C) 2005 Intel Corp	Zou Nan hai <nanhai.zou@intel.com>
+ *
+ */
+#include <linux/smp.h>
+#include <linux/delay.h>
+#include <linux/crash_dump.h>
+#include <linux/bootmem.h>
+#include <linux/kexec.h>
+#include <linux/elfcore.h>
+#include <linux/sysctl.h>
+#include <linux/init.h>
+#include <linux/kdebug.h>
+
+#include <asm/mca.h>
+
+int kdump_status[NR_CPUS];
+static atomic_t kdump_cpu_frozen;
+atomic_t kdump_in_progress;
+static int kdump_freeze_monarch;
+static int kdump_on_init = 1;
+static int kdump_on_fatal_mca = 1;
+
+static inline Elf64_Word
+*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
+		size_t data_len)
+{
+	struct elf_note *note = (struct elf_note *)buf;
+	note->n_namesz = strlen(name) + 1;
+	note->n_descsz = data_len;
+	note->n_type   = type;
+	buf += (sizeof(*note) + 3)/4;
+	memcpy(buf, name, note->n_namesz);
+	buf += (note->n_namesz + 3)/4;
+	memcpy(buf, data, data_len);
+	buf += (data_len + 3)/4;
+	return buf;
+}
+
+static void
+final_note(void *buf)
+{
+	memset(buf, 0, sizeof(struct elf_note));
+}
+
+extern void ia64_dump_cpu_regs(void *);
+
+static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus);
+
+void
+crash_save_this_cpu(void)
+{
+	void *buf;
+	unsigned long cfm, sof, sol;
+
+	int cpu = smp_processor_id();
+	struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu);
+
+	elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg);
+	memset(prstatus, 0, sizeof(*prstatus));
+	prstatus->pr_pid = current->pid;
+
+	ia64_dump_cpu_regs(dst);
+	cfm = dst[43];
+	sol = (cfm >> 7) & 0x7f;
+	sof = cfm & 0x7f;
+	dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46],
+			sof - sol);
+
+	buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
+	if (!buf)
+		return;
+	buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, prstatus,
+			sizeof(*prstatus));
+	final_note(buf);
+}
+
+#ifdef CONFIG_SMP
+static int
+kdump_wait_cpu_freeze(void)
+{
+	int cpu_num = num_online_cpus() - 1;
+	int timeout = 1000;
+	while(timeout-- > 0) {
+		if (atomic_read(&kdump_cpu_frozen) == cpu_num)
+			return 0;
+		udelay(1000);
+	}
+	return 1;
+}
+#endif
+
+void
+machine_crash_shutdown(struct pt_regs *pt)
+{
+	/* This function is only called after the system
+	 * has paniced or is otherwise in a critical state.
+	 * The minimum amount of code to allow a kexec'd kernel
+	 * to run successfully needs to happen here.
+	 *
+	 * In practice this means shooting down the other cpus in
+	 * an SMP system.
+	 */
+	kexec_disable_iosapic();
+#ifdef CONFIG_SMP
+	/*
+	 * If kdump_on_init is set and an INIT is asserted here, kdump will
+	 * be started again via INIT monarch.
+	 */
+	local_irq_disable();
+	ia64_set_psr_mc();	/* mask MCA/INIT */
+	if (atomic_inc_return(&kdump_in_progress) != 1)
+		unw_init_running(kdump_cpu_freeze, NULL);
+
+	/*
+	 * Now this cpu is ready for kdump.
+	 * Stop all others by IPI or INIT.  They could receive INIT from
+	 * outside and might be INIT monarch, but only thing they have to
+	 * do is falling into kdump_cpu_freeze().
+	 *
+	 * If an INIT is asserted here:
+	 * - All receivers might be slaves, since some of cpus could already
+	 *   be frozen and INIT might be masked on monarch.  In this case,
+	 *   all slaves will be frozen soon since kdump_in_progress will let
+	 *   them into DIE_INIT_SLAVE_LEAVE.
+	 * - One might be a monarch, but INIT rendezvous will fail since
+	 *   at least this cpu already have INIT masked so it never join
+	 *   to the rendezvous.  In this case, all slaves and monarch will
+	 *   be frozen soon with no wait since the INIT rendezvous is skipped
+	 *   by kdump_in_progress.
+	 */
+	kdump_smp_send_stop();
+	/* not all cpu response to IPI, send INIT to freeze them */
+	if (kdump_wait_cpu_freeze()) {
+		kdump_smp_send_init();
+		/* wait again, don't go ahead if possible */
+		kdump_wait_cpu_freeze();
+	}
+#endif
+}
+
+static void
+machine_kdump_on_init(void)
+{
+	crash_save_vmcoreinfo();
+	local_irq_disable();
+	kexec_disable_iosapic();
+	machine_kexec(ia64_kimage);
+}
+
+void
+kdump_cpu_freeze(struct unw_frame_info *info, void *arg)
+{
+	int cpuid;
+
+	local_irq_disable();
+	cpuid = smp_processor_id();
+	crash_save_this_cpu();
+	current->thread.ksp = (__u64)info->sw - 16;
+
+	ia64_set_psr_mc();	/* mask MCA/INIT and stop reentrance */
+
+	atomic_inc(&kdump_cpu_frozen);
+	kdump_status[cpuid] = 1;
+	mb();
+	for (;;)
+		cpu_relax();
+}
+
+static int
+kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
+{
+	struct ia64_mca_notify_die *nd;
+	struct die_args *args = data;
+
+	if (atomic_read(&kdump_in_progress)) {
+		switch (val) {
+		case DIE_INIT_MONARCH_LEAVE:
+			if (!kdump_freeze_monarch)
+				break;
+			/* fall through */
+		case DIE_INIT_SLAVE_LEAVE:
+		case DIE_INIT_MONARCH_ENTER:
+		case DIE_MCA_RENDZVOUS_LEAVE:
+			unw_init_running(kdump_cpu_freeze, NULL);
+			break;
+		}
+	}
+
+	if (!kdump_on_init && !kdump_on_fatal_mca)
+		return NOTIFY_DONE;
+
+	if (!ia64_kimage) {
+		if (val == DIE_INIT_MONARCH_LEAVE)
+			ia64_mca_printk(KERN_NOTICE
+					"%s: kdump not configured\n",
+					__func__);
+		return NOTIFY_DONE;
+	}
+
+	if (val != DIE_INIT_MONARCH_LEAVE &&
+	    val != DIE_INIT_MONARCH_PROCESS &&
+	    val != DIE_MCA_MONARCH_LEAVE)
+		return NOTIFY_DONE;
+
+	nd = (struct ia64_mca_notify_die *)args->err;
+
+	switch (val) {
+	case DIE_INIT_MONARCH_PROCESS:
+		/* Reason code 1 means machine check rendezvous*/
+		if (kdump_on_init && (nd->sos->rv_rc != 1)) {
+			if (atomic_inc_return(&kdump_in_progress) != 1)
+				kdump_freeze_monarch = 1;
+		}
+		break;
+	case DIE_INIT_MONARCH_LEAVE:
+		/* Reason code 1 means machine check rendezvous*/
+		if (kdump_on_init && (nd->sos->rv_rc != 1))
+			machine_kdump_on_init();
+		break;
+	case DIE_MCA_MONARCH_LEAVE:
+		/* *(nd->data) indicate if MCA is recoverable */
+		if (kdump_on_fatal_mca && !(*(nd->data))) {
+			if (atomic_inc_return(&kdump_in_progress) == 1)
+				machine_kdump_on_init();
+			/* We got fatal MCA while kdump!? No way!! */
+		}
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+#ifdef CONFIG_SYSCTL
+static ctl_table kdump_ctl_table[] = {
+	{
+		.procname = "kdump_on_init",
+		.data = &kdump_on_init,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = proc_dointvec,
+	},
+	{
+		.procname = "kdump_on_fatal_mca",
+		.data = &kdump_on_fatal_mca,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = proc_dointvec,
+	},
+	{ }
+};
+
+static ctl_table sys_table[] = {
+	{
+	  .procname = "kernel",
+	  .mode = 0555,
+	  .child = kdump_ctl_table,
+	},
+	{ }
+};
+#endif
+
+static int
+machine_crash_setup(void)
+{
+	/* be notified before default_monarch_init_process */
+	static struct notifier_block kdump_init_notifier_nb = {
+		.notifier_call = kdump_init_notifier,
+		.priority = 1,
+	};
+	int ret;
+	if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0)
+		return ret;
+#ifdef CONFIG_SYSCTL
+	register_sysctl_table(sys_table);
+#endif
+	return 0;
+}
+
+__initcall(machine_crash_setup);
+
diff --git a/arch/ia64/kernel/crash_dump.c b/arch/ia64/kernel/crash_dump.c
new file mode 100644
index 00000000..c8c92986
--- /dev/null
+++ b/arch/ia64/kernel/crash_dump.c
@@ -0,0 +1,50 @@
+/*
+ *	kernel/crash_dump.c - Memory preserving reboot related code.
+ *
+ *	Created by: Simon Horman <horms@verge.net.au>
+ *	Original code moved from kernel/crash.c
+ *	Original code comment copied from the i386 version of this file
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/crash_dump.h>
+
+#include <asm/page.h>
+#include <asm/uaccess.h>
+
+/**
+ * copy_oldmem_page - copy one page from "oldmem"
+ * @pfn: page frame number to be copied
+ * @buf: target memory address for the copy; this can be in kernel address
+ *	space or user address space (see @userbuf)
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page (based on pfn) to begin the copy
+ * @userbuf: if set, @buf is in user address space, use copy_to_user(),
+ *	otherwise @buf is in kernel address space, use memcpy().
+ *
+ * Copy a page from "oldmem". For this page, there is no pte mapped
+ * in the current kernel. We stitch up a pte, similar to kmap_atomic.
+ *
+ * Calling copy_to_user() in atomic context is not desirable. Hence first
+ * copying the data to a pre-allocated kernel page and then copying to user
+ * space in non-atomic context.
+ */
+ssize_t
+copy_oldmem_page(unsigned long pfn, char *buf,
+		size_t csize, unsigned long offset, int userbuf)
+{
+	void  *vaddr;
+
+	if (!csize)
+		return 0;
+	vaddr = __va(pfn<<PAGE_SHIFT);
+	if (userbuf) {
+		if (copy_to_user(buf, (vaddr + offset), csize)) {
+			return -EFAULT;
+		}
+	} else
+		memcpy(buf, (vaddr + offset), csize);
+	return csize;
+}
+
diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c
new file mode 100644
index 00000000..f64097b5
--- /dev/null
+++ b/arch/ia64/kernel/cyclone.c
@@ -0,0 +1,124 @@
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <linux/timex.h>
+#include <linux/clocksource.h>
+#include <asm/io.h>
+
+/* IBM Summit (EXA) Cyclone counter code*/
+#define CYCLONE_CBAR_ADDR 0xFEB00CD0
+#define CYCLONE_PMCC_OFFSET 0x51A0
+#define CYCLONE_MPMC_OFFSET 0x51D0
+#define CYCLONE_MPCS_OFFSET 0x51A8
+#define CYCLONE_TIMER_FREQ 100000000
+
+int use_cyclone;
+void __init cyclone_setup(void)
+{
+	use_cyclone = 1;
+}
+
+static void __iomem *cyclone_mc;
+
+static cycle_t read_cyclone(struct clocksource *cs)
+{
+	return (cycle_t)readq((void __iomem *)cyclone_mc);
+}
+
+static struct clocksource clocksource_cyclone = {
+        .name           = "cyclone",
+        .rating         = 300,
+        .read           = read_cyclone,
+        .mask           = (1LL << 40) - 1,
+        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+int __init init_cyclone_clock(void)
+{
+	u64 __iomem *reg;
+	u64 base;	/* saved cyclone base address */
+	u64 offset;	/* offset from pageaddr to cyclone_timer register */
+	int i;
+	u32 __iomem *cyclone_timer;	/* Cyclone MPMC0 register */
+
+	if (!use_cyclone)
+		return 0;
+
+	printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
+
+	/* find base address */
+	offset = (CYCLONE_CBAR_ADDR);
+	reg = ioremap_nocache(offset, sizeof(u64));
+	if(!reg){
+		printk(KERN_ERR "Summit chipset: Could not find valid CBAR"
+				" register.\n");
+		use_cyclone = 0;
+		return -ENODEV;
+	}
+	base = readq(reg);
+	iounmap(reg);
+	if(!base){
+		printk(KERN_ERR "Summit chipset: Could not find valid CBAR"
+				" value.\n");
+		use_cyclone = 0;
+		return -ENODEV;
+	}
+
+	/* setup PMCC */
+	offset = (base + CYCLONE_PMCC_OFFSET);
+	reg = ioremap_nocache(offset, sizeof(u64));
+	if(!reg){
+		printk(KERN_ERR "Summit chipset: Could not find valid PMCC"
+				" register.\n");
+		use_cyclone = 0;
+		return -ENODEV;
+	}
+	writel(0x00000001,reg);
+	iounmap(reg);
+
+	/* setup MPCS */
+	offset = (base + CYCLONE_MPCS_OFFSET);
+	reg = ioremap_nocache(offset, sizeof(u64));
+	if(!reg){
+		printk(KERN_ERR "Summit chipset: Could not find valid MPCS"
+				" register.\n");
+		use_cyclone = 0;
+		return -ENODEV;
+	}
+	writel(0x00000001,reg);
+	iounmap(reg);
+
+	/* map in cyclone_timer */
+	offset = (base + CYCLONE_MPMC_OFFSET);
+	cyclone_timer = ioremap_nocache(offset, sizeof(u32));
+	if(!cyclone_timer){
+		printk(KERN_ERR "Summit chipset: Could not find valid MPMC"
+				" register.\n");
+		use_cyclone = 0;
+		return -ENODEV;
+	}
+
+	/*quick test to make sure its ticking*/
+	for(i=0; i<3; i++){
+		u32 old = readl(cyclone_timer);
+		int stall = 100;
+		while(stall--) barrier();
+		if(readl(cyclone_timer) == old){
+			printk(KERN_ERR "Summit chipset: Counter not counting!"
+					" DISABLED\n");
+			iounmap(cyclone_timer);
+			cyclone_timer = NULL;
+			use_cyclone = 0;
+			return -ENODEV;
+		}
+	}
+	/* initialize last tick */
+	cyclone_mc = cyclone_timer;
+	clocksource_cyclone.fsys_mmio = cyclone_timer;
+	clocksource_register_hz(&clocksource_cyclone, CYCLONE_TIMER_FREQ);
+
+	return 0;
+}
+
+__initcall(init_cyclone_clock);
diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c
new file mode 100644
index 00000000..f2c1600d
--- /dev/null
+++ b/arch/ia64/kernel/dma-mapping.c
@@ -0,0 +1,23 @@
+#include <linux/dma-mapping.h>
+
+/* Set this to 1 if there is a HW IOMMU in the system */
+int iommu_detected __read_mostly;
+
+struct dma_map_ops *dma_ops;
+EXPORT_SYMBOL(dma_ops);
+
+#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
+
+static int __init dma_init(void)
+{
+	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+
+	return 0;
+}
+fs_initcall(dma_init);
+
+struct dma_map_ops *dma_get_ops(struct device *dev)
+{
+	return dma_ops;
+}
+EXPORT_SYMBOL(dma_get_ops);
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
new file mode 100644
index 00000000..6fc03aff
--- /dev/null
+++ b/arch/ia64/kernel/efi.c
@@ -0,0 +1,1369 @@
+/*
+ * Extensible Firmware Interface
+ *
+ * Based on Extensible Firmware Interface Specification version 0.9
+ * April 30, 1999
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999-2003 Hewlett-Packard Co.
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ * (c) Copyright 2006 Hewlett-Packard Development Company, L.P.
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * All EFI Runtime Services are not implemented yet as EFI only
+ * supports physical mode addressing on SoftSDV. This is to be fixed
+ * in a future version.  --drummond 1999-07-20
+ *
+ * Implemented EFI runtime services and virtual mode calls.  --davidm
+ *
+ * Goutham Rao: <goutham.rao@intel.com>
+ *	Skip non-WB memory and ignore empty memory ranges.
+ */
+#include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/crash_dump.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/efi.h>
+#include <linux/kexec.h>
+#include <linux/mm.h>
+
+#include <asm/io.h>
+#include <asm/kregs.h>
+#include <asm/meminit.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/mca.h>
+#include <asm/tlbflush.h>
+
+#define EFI_DEBUG	0
+
+extern efi_status_t efi_call_phys (void *, ...);
+
+struct efi efi;
+EXPORT_SYMBOL(efi);
+static efi_runtime_services_t *runtime;
+static u64 mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL;
+
+#define efi_call_virt(f, args...)	(*(f))(args)
+
+#define STUB_GET_TIME(prefix, adjust_arg)				       \
+static efi_status_t							       \
+prefix##_get_time (efi_time_t *tm, efi_time_cap_t *tc)			       \
+{									       \
+	struct ia64_fpreg fr[6];					       \
+	efi_time_cap_t *atc = NULL;					       \
+	efi_status_t ret;						       \
+									       \
+	if (tc)								       \
+		atc = adjust_arg(tc);					       \
+	ia64_save_scratch_fpregs(fr);					       \
+	ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time),    \
+				adjust_arg(tm), atc);			       \
+	ia64_load_scratch_fpregs(fr);					       \
+	return ret;							       \
+}
+
+#define STUB_SET_TIME(prefix, adjust_arg)				       \
+static efi_status_t							       \
+prefix##_set_time (efi_time_t *tm)					       \
+{									       \
+	struct ia64_fpreg fr[6];					       \
+	efi_status_t ret;						       \
+									       \
+	ia64_save_scratch_fpregs(fr);					       \
+	ret = efi_call_##prefix((efi_set_time_t *) __va(runtime->set_time),    \
+				adjust_arg(tm));			       \
+	ia64_load_scratch_fpregs(fr);					       \
+	return ret;							       \
+}
+
+#define STUB_GET_WAKEUP_TIME(prefix, adjust_arg)			       \
+static efi_status_t							       \
+prefix##_get_wakeup_time (efi_bool_t *enabled, efi_bool_t *pending,	       \
+			  efi_time_t *tm)				       \
+{									       \
+	struct ia64_fpreg fr[6];					       \
+	efi_status_t ret;						       \
+									       \
+	ia64_save_scratch_fpregs(fr);					       \
+	ret = efi_call_##prefix(					       \
+		(efi_get_wakeup_time_t *) __va(runtime->get_wakeup_time),      \
+		adjust_arg(enabled), adjust_arg(pending), adjust_arg(tm));     \
+	ia64_load_scratch_fpregs(fr);					       \
+	return ret;							       \
+}
+
+#define STUB_SET_WAKEUP_TIME(prefix, adjust_arg)			       \
+static efi_status_t							       \
+prefix##_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm)		       \
+{									       \
+	struct ia64_fpreg fr[6];					       \
+	efi_time_t *atm = NULL;						       \
+	efi_status_t ret;						       \
+									       \
+	if (tm)								       \
+		atm = adjust_arg(tm);					       \
+	ia64_save_scratch_fpregs(fr);					       \
+	ret = efi_call_##prefix(					       \
+		(efi_set_wakeup_time_t *) __va(runtime->set_wakeup_time),      \
+		enabled, atm);						       \
+	ia64_load_scratch_fpregs(fr);					       \
+	return ret;							       \
+}
+
+#define STUB_GET_VARIABLE(prefix, adjust_arg)				       \
+static efi_status_t							       \
+prefix##_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr,      \
+		       unsigned long *data_size, void *data)		       \
+{									       \
+	struct ia64_fpreg fr[6];					       \
+	u32 *aattr = NULL;						       \
+	efi_status_t ret;						       \
+									       \
+	if (attr)							       \
+		aattr = adjust_arg(attr);				       \
+	ia64_save_scratch_fpregs(fr);					       \
+	ret = efi_call_##prefix(					       \
+		(efi_get_variable_t *) __va(runtime->get_variable),	       \
+		adjust_arg(name), adjust_arg(vendor), aattr,		       \
+		adjust_arg(data_size), adjust_arg(data));		       \
+	ia64_load_scratch_fpregs(fr);					       \
+	return ret;							       \
+}
+
+#define STUB_GET_NEXT_VARIABLE(prefix, adjust_arg)			       \
+static efi_status_t							       \
+prefix##_get_next_variable (unsigned long *name_size, efi_char16_t *name,      \
+			    efi_guid_t *vendor)				       \
+{									       \
+	struct ia64_fpreg fr[6];					       \
+	efi_status_t ret;						       \
+									       \
+	ia64_save_scratch_fpregs(fr);					       \
+	ret = efi_call_##prefix(					       \
+		(efi_get_next_variable_t *) __va(runtime->get_next_variable),  \
+		adjust_arg(name_size), adjust_arg(name), adjust_arg(vendor));  \
+	ia64_load_scratch_fpregs(fr);					       \
+	return ret;							       \
+}
+
+#define STUB_SET_VARIABLE(prefix, adjust_arg)				       \
+static efi_status_t							       \
+prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor,		       \
+		       unsigned long attr, unsigned long data_size,	       \
+		       void *data)					       \
+{									       \
+	struct ia64_fpreg fr[6];					       \
+	efi_status_t ret;						       \
+									       \
+	ia64_save_scratch_fpregs(fr);					       \
+	ret = efi_call_##prefix(					       \
+		(efi_set_variable_t *) __va(runtime->set_variable),	       \
+		adjust_arg(name), adjust_arg(vendor), attr, data_size,	       \
+		adjust_arg(data));					       \
+	ia64_load_scratch_fpregs(fr);					       \
+	return ret;							       \
+}
+
+#define STUB_GET_NEXT_HIGH_MONO_COUNT(prefix, adjust_arg)		       \
+static efi_status_t							       \
+prefix##_get_next_high_mono_count (u32 *count)				       \
+{									       \
+	struct ia64_fpreg fr[6];					       \
+	efi_status_t ret;						       \
+									       \
+	ia64_save_scratch_fpregs(fr);					       \
+	ret = efi_call_##prefix((efi_get_next_high_mono_count_t *)	       \
+				__va(runtime->get_next_high_mono_count),       \
+				adjust_arg(count));			       \
+	ia64_load_scratch_fpregs(fr);					       \
+	return ret;							       \
+}
+
+#define STUB_RESET_SYSTEM(prefix, adjust_arg)				       \
+static void								       \
+prefix##_reset_system (int reset_type, efi_status_t status,		       \
+		       unsigned long data_size, efi_char16_t *data)	       \
+{									       \
+	struct ia64_fpreg fr[6];					       \
+	efi_char16_t *adata = NULL;					       \
+									       \
+	if (data)							       \
+		adata = adjust_arg(data);				       \
+									       \
+	ia64_save_scratch_fpregs(fr);					       \
+	efi_call_##prefix(						       \
+		(efi_reset_system_t *) __va(runtime->reset_system),	       \
+		reset_type, status, data_size, adata);			       \
+	/* should not return, but just in case... */			       \
+	ia64_load_scratch_fpregs(fr);					       \
+}
+
+#define phys_ptr(arg)	((__typeof__(arg)) ia64_tpa(arg))
+
+STUB_GET_TIME(phys, phys_ptr)
+STUB_SET_TIME(phys, phys_ptr)
+STUB_GET_WAKEUP_TIME(phys, phys_ptr)
+STUB_SET_WAKEUP_TIME(phys, phys_ptr)
+STUB_GET_VARIABLE(phys, phys_ptr)
+STUB_GET_NEXT_VARIABLE(phys, phys_ptr)
+STUB_SET_VARIABLE(phys, phys_ptr)
+STUB_GET_NEXT_HIGH_MONO_COUNT(phys, phys_ptr)
+STUB_RESET_SYSTEM(phys, phys_ptr)
+
+#define id(arg)	arg
+
+STUB_GET_TIME(virt, id)
+STUB_SET_TIME(virt, id)
+STUB_GET_WAKEUP_TIME(virt, id)
+STUB_SET_WAKEUP_TIME(virt, id)
+STUB_GET_VARIABLE(virt, id)
+STUB_GET_NEXT_VARIABLE(virt, id)
+STUB_SET_VARIABLE(virt, id)
+STUB_GET_NEXT_HIGH_MONO_COUNT(virt, id)
+STUB_RESET_SYSTEM(virt, id)
+
+void
+efi_gettimeofday (struct timespec *ts)
+{
+	efi_time_t tm;
+
+	if ((*efi.get_time)(&tm, NULL) != EFI_SUCCESS) {
+		memset(ts, 0, sizeof(*ts));
+		return;
+	}
+
+	ts->tv_sec = mktime(tm.year, tm.month, tm.day,
+			    tm.hour, tm.minute, tm.second);
+	ts->tv_nsec = tm.nanosecond;
+}
+
+static int
+is_memory_available (efi_memory_desc_t *md)
+{
+	if (!(md->attribute & EFI_MEMORY_WB))
+		return 0;
+
+	switch (md->type) {
+	      case EFI_LOADER_CODE:
+	      case EFI_LOADER_DATA:
+	      case EFI_BOOT_SERVICES_CODE:
+	      case EFI_BOOT_SERVICES_DATA:
+	      case EFI_CONVENTIONAL_MEMORY:
+		return 1;
+	}
+	return 0;
+}
+
+typedef struct kern_memdesc {
+	u64 attribute;
+	u64 start;
+	u64 num_pages;
+} kern_memdesc_t;
+
+static kern_memdesc_t *kern_memmap;
+
+#define efi_md_size(md)	(md->num_pages << EFI_PAGE_SHIFT)
+
+static inline u64
+kmd_end(kern_memdesc_t *kmd)
+{
+	return (kmd->start + (kmd->num_pages << EFI_PAGE_SHIFT));
+}
+
+static inline u64
+efi_md_end(efi_memory_desc_t *md)
+{
+	return (md->phys_addr + efi_md_size(md));
+}
+
+static inline int
+efi_wb(efi_memory_desc_t *md)
+{
+	return (md->attribute & EFI_MEMORY_WB);
+}
+
+static inline int
+efi_uc(efi_memory_desc_t *md)
+{
+	return (md->attribute & EFI_MEMORY_UC);
+}
+
+static void
+walk (efi_freemem_callback_t callback, void *arg, u64 attr)
+{
+	kern_memdesc_t *k;
+	u64 start, end, voff;
+
+	voff = (attr == EFI_MEMORY_WB) ? PAGE_OFFSET : __IA64_UNCACHED_OFFSET;
+	for (k = kern_memmap; k->start != ~0UL; k++) {
+		if (k->attribute != attr)
+			continue;
+		start = PAGE_ALIGN(k->start);
+		end = (k->start + (k->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK;
+		if (start < end)
+			if ((*callback)(start + voff, end + voff, arg) < 0)
+				return;
+	}
+}
+
+/*
+ * Walk the EFI memory map and call CALLBACK once for each EFI memory
+ * descriptor that has memory that is available for OS use.
+ */
+void
+efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
+{
+	walk(callback, arg, EFI_MEMORY_WB);
+}
+
+/*
+ * Walk the EFI memory map and call CALLBACK once for each EFI memory
+ * descriptor that has memory that is available for uncached allocator.
+ */
+void
+efi_memmap_walk_uc (efi_freemem_callback_t callback, void *arg)
+{
+	walk(callback, arg, EFI_MEMORY_UC);
+}
+
+/*
+ * Look for the PAL_CODE region reported by EFI and map it using an
+ * ITR to enable safe PAL calls in virtual mode.  See IA-64 Processor
+ * Abstraction Layer chapter 11 in ADAG
+ */
+void *
+efi_get_pal_addr (void)
+{
+	void *efi_map_start, *efi_map_end, *p;
+	efi_memory_desc_t *md;
+	u64 efi_desc_size;
+	int pal_code_count = 0;
+	u64 vaddr, mask;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+		if (md->type != EFI_PAL_CODE)
+			continue;
+
+		if (++pal_code_count > 1) {
+			printk(KERN_ERR "Too many EFI Pal Code memory ranges, "
+			       "dropped @ %llx\n", md->phys_addr);
+			continue;
+		}
+		/*
+		 * The only ITLB entry in region 7 that is used is the one
+		 * installed by __start().  That entry covers a 64MB range.
+		 */
+		mask  = ~((1 << KERNEL_TR_PAGE_SHIFT) - 1);
+		vaddr = PAGE_OFFSET + md->phys_addr;
+
+		/*
+		 * We must check that the PAL mapping won't overlap with the
+		 * kernel mapping.
+		 *
+		 * PAL code is guaranteed to be aligned on a power of 2 between
+		 * 4k and 256KB and that only one ITR is needed to map it. This
+		 * implies that the PAL code is always aligned on its size,
+		 * i.e., the closest matching page size supported by the TLB.
+		 * Therefore PAL code is guaranteed never to cross a 64MB unless
+		 * it is bigger than 64MB (very unlikely!).  So for now the
+		 * following test is enough to determine whether or not we need
+		 * a dedicated ITR for the PAL code.
+		 */
+		if ((vaddr & mask) == (KERNEL_START & mask)) {
+			printk(KERN_INFO "%s: no need to install ITR for PAL code\n",
+			       __func__);
+			continue;
+		}
+
+		if (efi_md_size(md) > IA64_GRANULE_SIZE)
+			panic("Whoa!  PAL code size bigger than a granule!");
+
+#if EFI_DEBUG
+		mask  = ~((1 << IA64_GRANULE_SHIFT) - 1);
+
+		printk(KERN_INFO "CPU %d: mapping PAL code "
+                       "[0x%lx-0x%lx) into [0x%lx-0x%lx)\n",
+                       smp_processor_id(), md->phys_addr,
+                       md->phys_addr + efi_md_size(md),
+                       vaddr & mask, (vaddr & mask) + IA64_GRANULE_SIZE);
+#endif
+		return __va(md->phys_addr);
+	}
+	printk(KERN_WARNING "%s: no PAL-code memory-descriptor found\n",
+	       __func__);
+	return NULL;
+}
+
+
+static u8 __init palo_checksum(u8 *buffer, u32 length)
+{
+	u8 sum = 0;
+	u8 *end = buffer + length;
+
+	while (buffer < end)
+		sum = (u8) (sum + *(buffer++));
+
+	return sum;
+}
+
+/*
+ * Parse and handle PALO table which is published at:
+ * http://www.dig64.org/home/DIG64_PALO_R1_0.pdf
+ */
+static void __init handle_palo(unsigned long palo_phys)
+{
+	struct palo_table *palo = __va(palo_phys);
+	u8  checksum;
+
+	if (strncmp(palo->signature, PALO_SIG, sizeof(PALO_SIG) - 1)) {
+		printk(KERN_INFO "PALO signature incorrect.\n");
+		return;
+	}
+
+	checksum = palo_checksum((u8 *)palo, palo->length);
+	if (checksum) {
+		printk(KERN_INFO "PALO checksum incorrect.\n");
+		return;
+	}
+
+	setup_ptcg_sem(palo->max_tlb_purges, NPTCG_FROM_PALO);
+}
+
+void
+efi_map_pal_code (void)
+{
+	void *pal_vaddr = efi_get_pal_addr ();
+	u64 psr;
+
+	if (!pal_vaddr)
+		return;
+
+	/*
+	 * Cannot write to CRx with PSR.ic=1
+	 */
+	psr = ia64_clear_ic();
+	ia64_itr(0x1, IA64_TR_PALCODE,
+		 GRANULEROUNDDOWN((unsigned long) pal_vaddr),
+		 pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)),
+		 IA64_GRANULE_SHIFT);
+	paravirt_dv_serialize_data();
+	ia64_set_psr(psr);		/* restore psr */
+}
+
+void __init
+efi_init (void)
+{
+	void *efi_map_start, *efi_map_end;
+	efi_config_table_t *config_tables;
+	efi_char16_t *c16;
+	u64 efi_desc_size;
+	char *cp, vendor[100] = "unknown";
+	int i;
+	unsigned long palo_phys;
+
+	/*
+	 * It's too early to be able to use the standard kernel command line
+	 * support...
+	 */
+	for (cp = boot_command_line; *cp; ) {
+		if (memcmp(cp, "mem=", 4) == 0) {
+			mem_limit = memparse(cp + 4, &cp);
+		} else if (memcmp(cp, "max_addr=", 9) == 0) {
+			max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
+		} else if (memcmp(cp, "min_addr=", 9) == 0) {
+			min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
+		} else {
+			while (*cp != ' ' && *cp)
+				++cp;
+			while (*cp == ' ')
+				++cp;
+		}
+	}
+	if (min_addr != 0UL)
+		printk(KERN_INFO "Ignoring memory below %lluMB\n",
+		       min_addr >> 20);
+	if (max_addr != ~0UL)
+		printk(KERN_INFO "Ignoring memory above %lluMB\n",
+		       max_addr >> 20);
+
+	efi.systab = __va(ia64_boot_param->efi_systab);
+
+	/*
+	 * Verify the EFI Table
+	 */
+	if (efi.systab == NULL)
+		panic("Whoa! Can't find EFI system table.\n");
+	if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
+		panic("Whoa! EFI system table signature incorrect\n");
+	if ((efi.systab->hdr.revision >> 16) == 0)
+		printk(KERN_WARNING "Warning: EFI system table version "
+		       "%d.%02d, expected 1.00 or greater\n",
+		       efi.systab->hdr.revision >> 16,
+		       efi.systab->hdr.revision & 0xffff);
+
+	config_tables = __va(efi.systab->tables);
+
+	/* Show what we know for posterity */
+	c16 = __va(efi.systab->fw_vendor);
+	if (c16) {
+		for (i = 0;i < (int) sizeof(vendor) - 1 && *c16; ++i)
+			vendor[i] = *c16++;
+		vendor[i] = '\0';
+	}
+
+	printk(KERN_INFO "EFI v%u.%.02u by %s:",
+	       efi.systab->hdr.revision >> 16,
+	       efi.systab->hdr.revision & 0xffff, vendor);
+
+	efi.mps        = EFI_INVALID_TABLE_ADDR;
+	efi.acpi       = EFI_INVALID_TABLE_ADDR;
+	efi.acpi20     = EFI_INVALID_TABLE_ADDR;
+	efi.smbios     = EFI_INVALID_TABLE_ADDR;
+	efi.sal_systab = EFI_INVALID_TABLE_ADDR;
+	efi.boot_info  = EFI_INVALID_TABLE_ADDR;
+	efi.hcdp       = EFI_INVALID_TABLE_ADDR;
+	efi.uga        = EFI_INVALID_TABLE_ADDR;
+
+	palo_phys      = EFI_INVALID_TABLE_ADDR;
+
+	for (i = 0; i < (int) efi.systab->nr_tables; i++) {
+		if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
+			efi.mps = config_tables[i].table;
+			printk(" MPS=0x%lx", config_tables[i].table);
+		} else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
+			efi.acpi20 = config_tables[i].table;
+			printk(" ACPI 2.0=0x%lx", config_tables[i].table);
+		} else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
+			efi.acpi = config_tables[i].table;
+			printk(" ACPI=0x%lx", config_tables[i].table);
+		} else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
+			efi.smbios = config_tables[i].table;
+			printk(" SMBIOS=0x%lx", config_tables[i].table);
+		} else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) {
+			efi.sal_systab = config_tables[i].table;
+			printk(" SALsystab=0x%lx", config_tables[i].table);
+		} else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
+			efi.hcdp = config_tables[i].table;
+			printk(" HCDP=0x%lx", config_tables[i].table);
+		} else if (efi_guidcmp(config_tables[i].guid,
+			 PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID) == 0) {
+			palo_phys = config_tables[i].table;
+			printk(" PALO=0x%lx", config_tables[i].table);
+		}
+	}
+	printk("\n");
+
+	if (palo_phys != EFI_INVALID_TABLE_ADDR)
+		handle_palo(palo_phys);
+
+	runtime = __va(efi.systab->runtime);
+	efi.get_time = phys_get_time;
+	efi.set_time = phys_set_time;
+	efi.get_wakeup_time = phys_get_wakeup_time;
+	efi.set_wakeup_time = phys_set_wakeup_time;
+	efi.get_variable = phys_get_variable;
+	efi.get_next_variable = phys_get_next_variable;
+	efi.set_variable = phys_set_variable;
+	efi.get_next_high_mono_count = phys_get_next_high_mono_count;
+	efi.reset_system = phys_reset_system;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+#if EFI_DEBUG
+	/* print EFI memory map: */
+	{
+		efi_memory_desc_t *md;
+		void *p;
+
+		for (i = 0, p = efi_map_start; p < efi_map_end;
+		     ++i, p += efi_desc_size)
+		{
+			const char *unit;
+			unsigned long size;
+
+			md = p;
+			size = md->num_pages << EFI_PAGE_SHIFT;
+
+			if ((size >> 40) > 0) {
+				size >>= 40;
+				unit = "TB";
+			} else if ((size >> 30) > 0) {
+				size >>= 30;
+				unit = "GB";
+			} else if ((size >> 20) > 0) {
+				size >>= 20;
+				unit = "MB";
+			} else {
+				size >>= 10;
+				unit = "KB";
+			}
+
+			printk("mem%02d: type=%2u, attr=0x%016lx, "
+			       "range=[0x%016lx-0x%016lx) (%4lu%s)\n",
+			       i, md->type, md->attribute, md->phys_addr,
+			       md->phys_addr + efi_md_size(md), size, unit);
+		}
+	}
+#endif
+
+	efi_map_pal_code();
+	efi_enter_virtual_mode();
+}
+
+void
+efi_enter_virtual_mode (void)
+{
+	void *efi_map_start, *efi_map_end, *p;
+	efi_memory_desc_t *md;
+	efi_status_t status;
+	u64 efi_desc_size;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+		if (md->attribute & EFI_MEMORY_RUNTIME) {
+			/*
+			 * Some descriptors have multiple bits set, so the
+			 * order of the tests is relevant.
+			 */
+			if (md->attribute & EFI_MEMORY_WB) {
+				md->virt_addr = (u64) __va(md->phys_addr);
+			} else if (md->attribute & EFI_MEMORY_UC) {
+				md->virt_addr = (u64) ioremap(md->phys_addr, 0);
+			} else if (md->attribute & EFI_MEMORY_WC) {
+#if 0
+				md->virt_addr = ia64_remap(md->phys_addr,
+							   (_PAGE_A |
+							    _PAGE_P |
+							    _PAGE_D |
+							    _PAGE_MA_WC |
+							    _PAGE_PL_0 |
+							    _PAGE_AR_RW));
+#else
+				printk(KERN_INFO "EFI_MEMORY_WC mapping\n");
+				md->virt_addr = (u64) ioremap(md->phys_addr, 0);
+#endif
+			} else if (md->attribute & EFI_MEMORY_WT) {
+#if 0
+				md->virt_addr = ia64_remap(md->phys_addr,
+							   (_PAGE_A |
+							    _PAGE_P |
+							    _PAGE_D |
+							    _PAGE_MA_WT |
+							    _PAGE_PL_0 |
+							    _PAGE_AR_RW));
+#else
+				printk(KERN_INFO "EFI_MEMORY_WT mapping\n");
+				md->virt_addr = (u64) ioremap(md->phys_addr, 0);
+#endif
+			}
+		}
+	}
+
+	status = efi_call_phys(__va(runtime->set_virtual_address_map),
+			       ia64_boot_param->efi_memmap_size,
+			       efi_desc_size,
+			       ia64_boot_param->efi_memdesc_version,
+			       ia64_boot_param->efi_memmap);
+	if (status != EFI_SUCCESS) {
+		printk(KERN_WARNING "warning: unable to switch EFI into "
+		       "virtual mode (status=%lu)\n", status);
+		return;
+	}
+
+	/*
+	 * Now that EFI is in virtual mode, we call the EFI functions more
+	 * efficiently:
+	 */
+	efi.get_time = virt_get_time;
+	efi.set_time = virt_set_time;
+	efi.get_wakeup_time = virt_get_wakeup_time;
+	efi.set_wakeup_time = virt_set_wakeup_time;
+	efi.get_variable = virt_get_variable;
+	efi.get_next_variable = virt_get_next_variable;
+	efi.set_variable = virt_set_variable;
+	efi.get_next_high_mono_count = virt_get_next_high_mono_count;
+	efi.reset_system = virt_reset_system;
+}
+
+/*
+ * Walk the EFI memory map looking for the I/O port range.  There can only be
+ * one entry of this type, other I/O port ranges should be described via ACPI.
+ */
+u64
+efi_get_iobase (void)
+{
+	void *efi_map_start, *efi_map_end, *p;
+	efi_memory_desc_t *md;
+	u64 efi_desc_size;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+		if (md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) {
+			if (md->attribute & EFI_MEMORY_UC)
+				return md->phys_addr;
+		}
+	}
+	return 0;
+}
+
+static struct kern_memdesc *
+kern_memory_descriptor (unsigned long phys_addr)
+{
+	struct kern_memdesc *md;
+
+	for (md = kern_memmap; md->start != ~0UL; md++) {
+		if (phys_addr - md->start < (md->num_pages << EFI_PAGE_SHIFT))
+			 return md;
+	}
+	return NULL;
+}
+
+static efi_memory_desc_t *
+efi_memory_descriptor (unsigned long phys_addr)
+{
+	void *efi_map_start, *efi_map_end, *p;
+	efi_memory_desc_t *md;
+	u64 efi_desc_size;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+
+		if (phys_addr - md->phys_addr < efi_md_size(md))
+			 return md;
+	}
+	return NULL;
+}
+
+static int
+efi_memmap_intersects (unsigned long phys_addr, unsigned long size)
+{
+	void *efi_map_start, *efi_map_end, *p;
+	efi_memory_desc_t *md;
+	u64 efi_desc_size;
+	unsigned long end;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	end = phys_addr + size;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+		if (md->phys_addr < end && efi_md_end(md) > phys_addr)
+			return 1;
+	}
+	return 0;
+}
+
+u32
+efi_mem_type (unsigned long phys_addr)
+{
+	efi_memory_desc_t *md = efi_memory_descriptor(phys_addr);
+
+	if (md)
+		return md->type;
+	return 0;
+}
+
+u64
+efi_mem_attributes (unsigned long phys_addr)
+{
+	efi_memory_desc_t *md = efi_memory_descriptor(phys_addr);
+
+	if (md)
+		return md->attribute;
+	return 0;
+}
+EXPORT_SYMBOL(efi_mem_attributes);
+
+u64
+efi_mem_attribute (unsigned long phys_addr, unsigned long size)
+{
+	unsigned long end = phys_addr + size;
+	efi_memory_desc_t *md = efi_memory_descriptor(phys_addr);
+	u64 attr;
+
+	if (!md)
+		return 0;
+
+	/*
+	 * EFI_MEMORY_RUNTIME is not a memory attribute; it just tells
+	 * the kernel that firmware needs this region mapped.
+	 */
+	attr = md->attribute & ~EFI_MEMORY_RUNTIME;
+	do {
+		unsigned long md_end = efi_md_end(md);
+
+		if (end <= md_end)
+			return attr;
+
+		md = efi_memory_descriptor(md_end);
+		if (!md || (md->attribute & ~EFI_MEMORY_RUNTIME) != attr)
+			return 0;
+	} while (md);
+	return 0;	/* never reached */
+}
+
+u64
+kern_mem_attribute (unsigned long phys_addr, unsigned long size)
+{
+	unsigned long end = phys_addr + size;
+	struct kern_memdesc *md;
+	u64 attr;
+
+	/*
+	 * This is a hack for ioremap calls before we set up kern_memmap.
+	 * Maybe we should do efi_memmap_init() earlier instead.
+	 */
+	if (!kern_memmap) {
+		attr = efi_mem_attribute(phys_addr, size);
+		if (attr & EFI_MEMORY_WB)
+			return EFI_MEMORY_WB;
+		return 0;
+	}
+
+	md = kern_memory_descriptor(phys_addr);
+	if (!md)
+		return 0;
+
+	attr = md->attribute;
+	do {
+		unsigned long md_end = kmd_end(md);
+
+		if (end <= md_end)
+			return attr;
+
+		md = kern_memory_descriptor(md_end);
+		if (!md || md->attribute != attr)
+			return 0;
+	} while (md);
+	return 0;	/* never reached */
+}
+EXPORT_SYMBOL(kern_mem_attribute);
+
+int
+valid_phys_addr_range (unsigned long phys_addr, unsigned long size)
+{
+	u64 attr;
+
+	/*
+	 * /dev/mem reads and writes use copy_to_user(), which implicitly
+	 * uses a granule-sized kernel identity mapping.  It's really
+	 * only safe to do this for regions in kern_memmap.  For more
+	 * details, see Documentation/ia64/aliasing.txt.
+	 */
+	attr = kern_mem_attribute(phys_addr, size);
+	if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC)
+		return 1;
+	return 0;
+}
+
+int
+valid_mmap_phys_addr_range (unsigned long pfn, unsigned long size)
+{
+	unsigned long phys_addr = pfn << PAGE_SHIFT;
+	u64 attr;
+
+	attr = efi_mem_attribute(phys_addr, size);
+
+	/*
+	 * /dev/mem mmap uses normal user pages, so we don't need the entire
+	 * granule, but the entire region we're mapping must support the same
+	 * attribute.
+	 */
+	if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC)
+		return 1;
+
+	/*
+	 * Intel firmware doesn't tell us about all the MMIO regions, so
+	 * in general we have to allow mmap requests.  But if EFI *does*
+	 * tell us about anything inside this region, we should deny it.
+	 * The user can always map a smaller region to avoid the overlap.
+	 */
+	if (efi_memmap_intersects(phys_addr, size))
+		return 0;
+
+	return 1;
+}
+
+pgprot_t
+phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size,
+		     pgprot_t vma_prot)
+{
+	unsigned long phys_addr = pfn << PAGE_SHIFT;
+	u64 attr;
+
+	/*
+	 * For /dev/mem mmap, we use user mappings, but if the region is
+	 * in kern_memmap (and hence may be covered by a kernel mapping),
+	 * we must use the same attribute as the kernel mapping.
+	 */
+	attr = kern_mem_attribute(phys_addr, size);
+	if (attr & EFI_MEMORY_WB)
+		return pgprot_cacheable(vma_prot);
+	else if (attr & EFI_MEMORY_UC)
+		return pgprot_noncached(vma_prot);
+
+	/*
+	 * Some chipsets don't support UC access to memory.  If
+	 * WB is supported, we prefer that.
+	 */
+	if (efi_mem_attribute(phys_addr, size) & EFI_MEMORY_WB)
+		return pgprot_cacheable(vma_prot);
+
+	return pgprot_noncached(vma_prot);
+}
+
+int __init
+efi_uart_console_only(void)
+{
+	efi_status_t status;
+	char *s, name[] = "ConOut";
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+	efi_char16_t *utf16, name_utf16[32];
+	unsigned char data[1024];
+	unsigned long size = sizeof(data);
+	struct efi_generic_dev_path *hdr, *end_addr;
+	int uart = 0;
+
+	/* Convert to UTF-16 */
+	utf16 = name_utf16;
+	s = name;
+	while (*s)
+		*utf16++ = *s++ & 0x7f;
+	*utf16 = 0;
+
+	status = efi.get_variable(name_utf16, &guid, NULL, &size, data);
+	if (status != EFI_SUCCESS) {
+		printk(KERN_ERR "No EFI %s variable?\n", name);
+		return 0;
+	}
+
+	hdr = (struct efi_generic_dev_path *) data;
+	end_addr = (struct efi_generic_dev_path *) ((u8 *) data + size);
+	while (hdr < end_addr) {
+		if (hdr->type == EFI_DEV_MSG &&
+		    hdr->sub_type == EFI_DEV_MSG_UART)
+			uart = 1;
+		else if (hdr->type == EFI_DEV_END_PATH ||
+			  hdr->type == EFI_DEV_END_PATH2) {
+			if (!uart)
+				return 0;
+			if (hdr->sub_type == EFI_DEV_END_ENTIRE)
+				return 1;
+			uart = 0;
+		}
+		hdr = (struct efi_generic_dev_path *)((u8 *) hdr + hdr->length);
+	}
+	printk(KERN_ERR "Malformed %s value\n", name);
+	return 0;
+}
+
+/*
+ * Look for the first granule aligned memory descriptor memory
+ * that is big enough to hold EFI memory map. Make sure this
+ * descriptor is atleast granule sized so it does not get trimmed
+ */
+struct kern_memdesc *
+find_memmap_space (void)
+{
+	u64	contig_low=0, contig_high=0;
+	u64	as = 0, ae;
+	void *efi_map_start, *efi_map_end, *p, *q;
+	efi_memory_desc_t *md, *pmd = NULL, *check_md;
+	u64	space_needed, efi_desc_size;
+	unsigned long total_mem = 0;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	/*
+	 * Worst case: we need 3 kernel descriptors for each efi descriptor
+	 * (if every entry has a WB part in the middle, and UC head and tail),
+	 * plus one for the end marker.
+	 */
+	space_needed = sizeof(kern_memdesc_t) *
+		(3 * (ia64_boot_param->efi_memmap_size/efi_desc_size) + 1);
+
+	for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) {
+		md = p;
+		if (!efi_wb(md)) {
+			continue;
+		}
+		if (pmd == NULL || !efi_wb(pmd) ||
+		    efi_md_end(pmd) != md->phys_addr) {
+			contig_low = GRANULEROUNDUP(md->phys_addr);
+			contig_high = efi_md_end(md);
+			for (q = p + efi_desc_size; q < efi_map_end;
+			     q += efi_desc_size) {
+				check_md = q;
+				if (!efi_wb(check_md))
+					break;
+				if (contig_high != check_md->phys_addr)
+					break;
+				contig_high = efi_md_end(check_md);
+			}
+			contig_high = GRANULEROUNDDOWN(contig_high);
+		}
+		if (!is_memory_available(md) || md->type == EFI_LOADER_DATA)
+			continue;
+
+		/* Round ends inward to granule boundaries */
+		as = max(contig_low, md->phys_addr);
+		ae = min(contig_high, efi_md_end(md));
+
+		/* keep within max_addr= and min_addr= command line arg */
+		as = max(as, min_addr);
+		ae = min(ae, max_addr);
+		if (ae <= as)
+			continue;
+
+		/* avoid going over mem= command line arg */
+		if (total_mem + (ae - as) > mem_limit)
+			ae -= total_mem + (ae - as) - mem_limit;
+
+		if (ae <= as)
+			continue;
+
+		if (ae - as > space_needed)
+			break;
+	}
+	if (p >= efi_map_end)
+		panic("Can't allocate space for kernel memory descriptors");
+
+	return __va(as);
+}
+
+/*
+ * Walk the EFI memory map and gather all memory available for kernel
+ * to use.  We can allocate partial granules only if the unavailable
+ * parts exist, and are WB.
+ */
+unsigned long
+efi_memmap_init(u64 *s, u64 *e)
+{
+	struct kern_memdesc *k, *prev = NULL;
+	u64	contig_low=0, contig_high=0;
+	u64	as, ae, lim;
+	void *efi_map_start, *efi_map_end, *p, *q;
+	efi_memory_desc_t *md, *pmd = NULL, *check_md;
+	u64	efi_desc_size;
+	unsigned long total_mem = 0;
+
+	k = kern_memmap = find_memmap_space();
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) {
+		md = p;
+		if (!efi_wb(md)) {
+			if (efi_uc(md) &&
+			    (md->type == EFI_CONVENTIONAL_MEMORY ||
+			     md->type == EFI_BOOT_SERVICES_DATA)) {
+				k->attribute = EFI_MEMORY_UC;
+				k->start = md->phys_addr;
+				k->num_pages = md->num_pages;
+				k++;
+			}
+			continue;
+		}
+		if (pmd == NULL || !efi_wb(pmd) ||
+		    efi_md_end(pmd) != md->phys_addr) {
+			contig_low = GRANULEROUNDUP(md->phys_addr);
+			contig_high = efi_md_end(md);
+			for (q = p + efi_desc_size; q < efi_map_end;
+			     q += efi_desc_size) {
+				check_md = q;
+				if (!efi_wb(check_md))
+					break;
+				if (contig_high != check_md->phys_addr)
+					break;
+				contig_high = efi_md_end(check_md);
+			}
+			contig_high = GRANULEROUNDDOWN(contig_high);
+		}
+		if (!is_memory_available(md))
+			continue;
+
+#ifdef CONFIG_CRASH_DUMP
+		/* saved_max_pfn should ignore max_addr= command line arg */
+		if (saved_max_pfn < (efi_md_end(md) >> PAGE_SHIFT))
+			saved_max_pfn = (efi_md_end(md) >> PAGE_SHIFT);
+#endif
+		/*
+		 * Round ends inward to granule boundaries
+		 * Give trimmings to uncached allocator
+		 */
+		if (md->phys_addr < contig_low) {
+			lim = min(efi_md_end(md), contig_low);
+			if (efi_uc(md)) {
+				if (k > kern_memmap &&
+				    (k-1)->attribute == EFI_MEMORY_UC &&
+				    kmd_end(k-1) == md->phys_addr) {
+					(k-1)->num_pages +=
+						(lim - md->phys_addr)
+						>> EFI_PAGE_SHIFT;
+				} else {
+					k->attribute = EFI_MEMORY_UC;
+					k->start = md->phys_addr;
+					k->num_pages = (lim - md->phys_addr)
+						>> EFI_PAGE_SHIFT;
+					k++;
+				}
+			}
+			as = contig_low;
+		} else
+			as = md->phys_addr;
+
+		if (efi_md_end(md) > contig_high) {
+			lim = max(md->phys_addr, contig_high);
+			if (efi_uc(md)) {
+				if (lim == md->phys_addr && k > kern_memmap &&
+				    (k-1)->attribute == EFI_MEMORY_UC &&
+				    kmd_end(k-1) == md->phys_addr) {
+					(k-1)->num_pages += md->num_pages;
+				} else {
+					k->attribute = EFI_MEMORY_UC;
+					k->start = lim;
+					k->num_pages = (efi_md_end(md) - lim)
+						>> EFI_PAGE_SHIFT;
+					k++;
+				}
+			}
+			ae = contig_high;
+		} else
+			ae = efi_md_end(md);
+
+		/* keep within max_addr= and min_addr= command line arg */
+		as = max(as, min_addr);
+		ae = min(ae, max_addr);
+		if (ae <= as)
+			continue;
+
+		/* avoid going over mem= command line arg */
+		if (total_mem + (ae - as) > mem_limit)
+			ae -= total_mem + (ae - as) - mem_limit;
+
+		if (ae <= as)
+			continue;
+		if (prev && kmd_end(prev) == md->phys_addr) {
+			prev->num_pages += (ae - as) >> EFI_PAGE_SHIFT;
+			total_mem += ae - as;
+			continue;
+		}
+		k->attribute = EFI_MEMORY_WB;
+		k->start = as;
+		k->num_pages = (ae - as) >> EFI_PAGE_SHIFT;
+		total_mem += ae - as;
+		prev = k++;
+	}
+	k->start = ~0L; /* end-marker */
+
+	/* reserve the memory we are using for kern_memmap */
+	*s = (u64)kern_memmap;
+	*e = (u64)++k;
+
+	return total_mem;
+}
+
+void
+efi_initialize_iomem_resources(struct resource *code_resource,
+			       struct resource *data_resource,
+			       struct resource *bss_resource)
+{
+	struct resource *res;
+	void *efi_map_start, *efi_map_end, *p;
+	efi_memory_desc_t *md;
+	u64 efi_desc_size;
+	char *name;
+	unsigned long flags;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	res = NULL;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+
+		if (md->num_pages == 0) /* should not happen */
+			continue;
+
+		flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		switch (md->type) {
+
+			case EFI_MEMORY_MAPPED_IO:
+			case EFI_MEMORY_MAPPED_IO_PORT_SPACE:
+				continue;
+
+			case EFI_LOADER_CODE:
+			case EFI_LOADER_DATA:
+			case EFI_BOOT_SERVICES_DATA:
+			case EFI_BOOT_SERVICES_CODE:
+			case EFI_CONVENTIONAL_MEMORY:
+				if (md->attribute & EFI_MEMORY_WP) {
+					name = "System ROM";
+					flags |= IORESOURCE_READONLY;
+				} else if (md->attribute == EFI_MEMORY_UC)
+					name = "Uncached RAM";
+				else
+					name = "System RAM";
+				break;
+
+			case EFI_ACPI_MEMORY_NVS:
+				name = "ACPI Non-volatile Storage";
+				break;
+
+			case EFI_UNUSABLE_MEMORY:
+				name = "reserved";
+				flags |= IORESOURCE_DISABLED;
+				break;
+
+			case EFI_RESERVED_TYPE:
+			case EFI_RUNTIME_SERVICES_CODE:
+			case EFI_RUNTIME_SERVICES_DATA:
+			case EFI_ACPI_RECLAIM_MEMORY:
+			default:
+				name = "reserved";
+				break;
+		}
+
+		if ((res = kzalloc(sizeof(struct resource),
+				   GFP_KERNEL)) == NULL) {
+			printk(KERN_ERR
+			       "failed to allocate resource for iomem\n");
+			return;
+		}
+
+		res->name = name;
+		res->start = md->phys_addr;
+		res->end = md->phys_addr + efi_md_size(md) - 1;
+		res->flags = flags;
+
+		if (insert_resource(&iomem_resource, res) < 0)
+			kfree(res);
+		else {
+			/*
+			 * We don't know which region contains
+			 * kernel data so we try it repeatedly and
+			 * let the resource manager test it.
+			 */
+			insert_resource(res, code_resource);
+			insert_resource(res, data_resource);
+			insert_resource(res, bss_resource);
+#ifdef CONFIG_KEXEC
+                        insert_resource(res, &efi_memmap_res);
+                        insert_resource(res, &boot_param_res);
+			if (crashk_res.end > crashk_res.start)
+				insert_resource(res, &crashk_res);
+#endif
+		}
+	}
+}
+
+#ifdef CONFIG_KEXEC
+/* find a block of memory aligned to 64M exclude reserved regions
+   rsvd_regions are sorted
+ */
+unsigned long __init
+kdump_find_rsvd_region (unsigned long size, struct rsvd_region *r, int n)
+{
+	int i;
+	u64 start, end;
+	u64 alignment = 1UL << _PAGE_SIZE_64M;
+	void *efi_map_start, *efi_map_end, *p;
+	efi_memory_desc_t *md;
+	u64 efi_desc_size;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+		if (!efi_wb(md))
+			continue;
+		start = ALIGN(md->phys_addr, alignment);
+		end = efi_md_end(md);
+		for (i = 0; i < n; i++) {
+			if (__pa(r[i].start) >= start && __pa(r[i].end) < end) {
+				if (__pa(r[i].start) > start + size)
+					return start;
+				start = ALIGN(__pa(r[i].end), alignment);
+				if (i < n-1 &&
+				    __pa(r[i+1].start) < start + size)
+					continue;
+				else
+					break;
+			}
+		}
+		if (end > start + size)
+			return start;
+	}
+
+	printk(KERN_WARNING
+	       "Cannot reserve 0x%lx byte of memory for crashdump\n", size);
+	return ~0UL;
+}
+#endif
+
+#ifdef CONFIG_CRASH_DUMP
+/* locate the size find a the descriptor at a certain address */
+unsigned long __init
+vmcore_find_descriptor_size (unsigned long address)
+{
+	void *efi_map_start, *efi_map_end, *p;
+	efi_memory_desc_t *md;
+	u64 efi_desc_size;
+	unsigned long ret = 0;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+		if (efi_wb(md) && md->type == EFI_LOADER_DATA
+		    && md->phys_addr == address) {
+			ret = efi_md_size(md);
+			break;
+		}
+	}
+
+	if (ret == 0)
+		printk(KERN_WARNING "Cannot locate EFI vmcore descriptor\n");
+
+	return ret;
+}
+#endif
diff --git a/arch/ia64/kernel/efi_stub.S b/arch/ia64/kernel/efi_stub.S
new file mode 100644
index 00000000..a56e161d
--- /dev/null
+++ b/arch/ia64/kernel/efi_stub.S
@@ -0,0 +1,86 @@
+/*
+ * EFI call stub.
+ *
+ * Copyright (C) 1999-2001 Hewlett-Packard Co
+ *	David Mosberger <davidm@hpl.hp.com>
+ *
+ * This stub allows us to make EFI calls in physical mode with interrupts
+ * turned off.  We need this because we can't call SetVirtualMap() until
+ * the kernel has booted far enough to allow allocation of struct vma_struct
+ * entries (which we would need to map stuff with memory attributes other
+ * than uncached or writeback...).  Since the GetTime() service gets called
+ * earlier than that, we need to be able to make physical mode EFI calls from
+ * the kernel.
+ */
+
+/*
+ * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System
+ * Abstraction Layer Specification", revision 2.6e).  Note that
+ * psr.dfl and psr.dfh MUST be cleared, despite what this manual says.
+ * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call
+ * (the br.ia instruction fails unless psr.dfl and psr.dfh are
+ * cleared).  Fortunately, SAL promises not to touch the floating
+ * point regs, so at least we don't have to save f2-f127.
+ */
+#define PSR_BITS_TO_CLEAR						\
+	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT |		\
+	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |	\
+	 IA64_PSR_DFL | IA64_PSR_DFH)
+
+#define PSR_BITS_TO_SET							\
+	(IA64_PSR_BN)
+
+#include <asm/processor.h>
+#include <asm/asmmacro.h>
+
+/*
+ * Inputs:
+ *	in0 = address of function descriptor of EFI routine to call
+ *	in1..in7 = arguments to routine
+ *
+ * Outputs:
+ *	r8 = EFI_STATUS returned by called function
+ */
+
+GLOBAL_ENTRY(efi_call_phys)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+	alloc loc1=ar.pfs,8,7,7,0
+	ld8 r2=[in0],8			// load EFI function's entry point
+	mov loc0=rp
+	.body
+	;;
+	mov loc2=gp			// save global pointer
+	mov loc4=ar.rsc			// save RSE configuration
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	;;
+	ld8 gp=[in0]			// load EFI function's global pointer
+	movl r16=PSR_BITS_TO_CLEAR
+	mov loc3=psr			// save processor status word
+	movl r17=PSR_BITS_TO_SET
+	;;
+	or loc3=loc3,r17
+	mov b6=r2
+	;;
+	andcm r16=loc3,r16	// get psr with IT, DT, and RT bits cleared
+	br.call.sptk.many rp=ia64_switch_mode_phys
+.ret0:	mov out4=in5
+	mov out0=in1
+	mov out1=in2
+	mov out2=in3
+	mov out3=in4
+	mov out5=in6
+	mov out6=in7
+	mov loc5=r19
+	mov loc6=r20
+	br.call.sptk.many rp=b6		// call the EFI function
+.ret1:	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	mov r16=loc3
+	mov r19=loc5
+	mov r20=loc6
+	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
+.ret2:	mov ar.rsc=loc4			// restore RSE configuration
+	mov ar.pfs=loc1
+	mov rp=loc0
+	mov gp=loc2
+	br.ret.sptk.many rp
+END(efi_call_phys)
diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c
new file mode 100644
index 00000000..bac1639b
--- /dev/null
+++ b/arch/ia64/kernel/elfcore.c
@@ -0,0 +1,80 @@
+#include <linux/elf.h>
+#include <linux/coredump.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#include <asm/elf.h>
+
+
+Elf64_Half elf_core_extra_phdrs(void)
+{
+	return GATE_EHDR->e_phnum;
+}
+
+int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
+			       unsigned long limit)
+{
+	const struct elf_phdr *const gate_phdrs =
+		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
+	int i;
+	Elf64_Off ofs = 0;
+
+	for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
+		struct elf_phdr phdr = gate_phdrs[i];
+
+		if (phdr.p_type == PT_LOAD) {
+			phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz);
+			phdr.p_filesz = phdr.p_memsz;
+			if (ofs == 0) {
+				ofs = phdr.p_offset = offset;
+				offset += phdr.p_filesz;
+			} else {
+				phdr.p_offset = ofs;
+			}
+		} else {
+			phdr.p_offset += ofs;
+		}
+		phdr.p_paddr = 0; /* match other core phdrs */
+		*size += sizeof(phdr);
+		if (*size > limit || !dump_write(file, &phdr, sizeof(phdr)))
+			return 0;
+	}
+	return 1;
+}
+
+int elf_core_write_extra_data(struct file *file, size_t *size,
+			      unsigned long limit)
+{
+	const struct elf_phdr *const gate_phdrs =
+		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
+	int i;
+
+	for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
+		if (gate_phdrs[i].p_type == PT_LOAD) {
+			void *addr = (void *)gate_phdrs[i].p_vaddr;
+			size_t memsz = PAGE_ALIGN(gate_phdrs[i].p_memsz);
+
+			*size += memsz;
+			if (*size > limit || !dump_write(file, addr, memsz))
+				return 0;
+			break;
+		}
+	}
+	return 1;
+}
+
+size_t elf_core_extra_data_size(void)
+{
+	const struct elf_phdr *const gate_phdrs =
+		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
+	int i;
+	size_t size = 0;
+
+	for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
+		if (gate_phdrs[i].p_type == PT_LOAD) {
+			size += PAGE_ALIGN(gate_phdrs[i].p_memsz);
+			break;
+		}
+	}
+	return size;
+}
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
new file mode 100644
index 00000000..df477f8c
--- /dev/null
+++ b/arch/ia64/kernel/entry.S
@@ -0,0 +1,1785 @@
+/*
+ * arch/ia64/kernel/entry.S
+ *
+ * Kernel entry points.
+ *
+ * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999, 2002-2003
+ *	Asit Mallick <Asit.K.Mallick@intel.com>
+ * 	Don Dugger <Don.Dugger@intel.com>
+ *	Suresh Siddha <suresh.b.siddha@intel.com>
+ *	Fenghua Yu <fenghua.yu@intel.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ */
+/*
+ * ia64_switch_to now places correct virtual mapping in in TR2 for
+ * kernel stack. This allows us to handle interrupts without changing
+ * to physical mode.
+ *
+ * Jonathan Nicklin	<nicklin@missioncriticallinux.com>
+ * Patrick O'Rourke	<orourke@missioncriticallinux.com>
+ * 11/07/2000
+ */
+/*
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *                    pv_ops.
+ */
+/*
+ * Global (preserved) predicate usage on syscall entry/exit path:
+ *
+ *	pKStk:		See entry.h.
+ *	pUStk:		See entry.h.
+ *	pSys:		See entry.h.
+ *	pNonSys:	!pSys
+ */
+
+
+#include <asm/asmmacro.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/kregs.h>
+#include <asm/asm-offsets.h>
+#include <asm/pgtable.h>
+#include <asm/percpu.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+#include <asm/ftrace.h>
+
+#include "minstate.h"
+
+#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE
+	/*
+	 * execve() is special because in case of success, we need to
+	 * setup a null register window frame.
+	 */
+ENTRY(ia64_execve)
+	/*
+	 * Allocate 8 input registers since ptrace() may clobber them
+	 */
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+	alloc loc1=ar.pfs,8,2,4,0
+	mov loc0=rp
+	.body
+	mov out0=in0			// filename
+	;;				// stop bit between alloc and call
+	mov out1=in1			// argv
+	mov out2=in2			// envp
+	add out3=16,sp			// regs
+	br.call.sptk.many rp=sys_execve
+.ret0:
+	cmp4.ge p6,p7=r8,r0
+	mov ar.pfs=loc1			// restore ar.pfs
+	sxt4 r8=r8			// return 64-bit result
+	;;
+	stf.spill [sp]=f0
+(p6)	cmp.ne pKStk,pUStk=r0,r0	// a successful execve() lands us in user-mode...
+	mov rp=loc0
+(p6)	mov ar.pfs=r0			// clear ar.pfs on success
+(p7)	br.ret.sptk.many rp
+
+	/*
+	 * In theory, we'd have to zap this state only to prevent leaking of
+	 * security sensitive state (e.g., if current->mm->dumpable is zero).  However,
+	 * this executes in less than 20 cycles even on Itanium, so it's not worth
+	 * optimizing for...).
+	 */
+	mov ar.unat=0; 		mov ar.lc=0
+	mov r4=0;		mov f2=f0;		mov b1=r0
+	mov r5=0;		mov f3=f0;		mov b2=r0
+	mov r6=0;		mov f4=f0;		mov b3=r0
+	mov r7=0;		mov f5=f0;		mov b4=r0
+	ldf.fill f12=[sp];	mov f13=f0;		mov b5=r0
+	ldf.fill f14=[sp];	ldf.fill f15=[sp];	mov f16=f0
+	ldf.fill f17=[sp];	ldf.fill f18=[sp];	mov f19=f0
+	ldf.fill f20=[sp];	ldf.fill f21=[sp];	mov f22=f0
+	ldf.fill f23=[sp];	ldf.fill f24=[sp];	mov f25=f0
+	ldf.fill f26=[sp];	ldf.fill f27=[sp];	mov f28=f0
+	ldf.fill f29=[sp];	ldf.fill f30=[sp];	mov f31=f0
+	br.ret.sptk.many rp
+END(ia64_execve)
+
+/*
+ * sys_clone2(u64 flags, u64 ustack_base, u64 ustack_size, u64 parent_tidptr, u64 child_tidptr,
+ *	      u64 tls)
+ */
+GLOBAL_ENTRY(sys_clone2)
+	/*
+	 * Allocate 8 input registers since ptrace() may clobber them
+	 */
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+	alloc r16=ar.pfs,8,2,6,0
+	DO_SAVE_SWITCH_STACK
+	adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
+	mov loc0=rp
+	mov loc1=r16				// save ar.pfs across do_fork
+	.body
+	mov out1=in1
+	mov out3=in2
+	tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
+	mov out4=in3	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
+	;;
+(p6)	st8 [r2]=in5				// store TLS in r16 for copy_thread()
+	mov out5=in4	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
+	adds out2=IA64_SWITCH_STACK_SIZE+16,sp	// out2 = &regs
+	mov out0=in0				// out0 = clone_flags
+	br.call.sptk.many rp=do_fork
+.ret1:	.restore sp
+	adds sp=IA64_SWITCH_STACK_SIZE,sp	// pop the switch stack
+	mov ar.pfs=loc1
+	mov rp=loc0
+	br.ret.sptk.many rp
+END(sys_clone2)
+
+/*
+ * sys_clone(u64 flags, u64 ustack_base, u64 parent_tidptr, u64 child_tidptr, u64 tls)
+ *	Deprecated.  Use sys_clone2() instead.
+ */
+GLOBAL_ENTRY(sys_clone)
+	/*
+	 * Allocate 8 input registers since ptrace() may clobber them
+	 */
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+	alloc r16=ar.pfs,8,2,6,0
+	DO_SAVE_SWITCH_STACK
+	adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
+	mov loc0=rp
+	mov loc1=r16				// save ar.pfs across do_fork
+	.body
+	mov out1=in1
+	mov out3=16				// stacksize (compensates for 16-byte scratch area)
+	tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
+	mov out4=in2	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
+	;;
+(p6)	st8 [r2]=in4				// store TLS in r13 (tp)
+	mov out5=in3	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
+	adds out2=IA64_SWITCH_STACK_SIZE+16,sp	// out2 = &regs
+	mov out0=in0				// out0 = clone_flags
+	br.call.sptk.many rp=do_fork
+.ret2:	.restore sp
+	adds sp=IA64_SWITCH_STACK_SIZE,sp	// pop the switch stack
+	mov ar.pfs=loc1
+	mov rp=loc0
+	br.ret.sptk.many rp
+END(sys_clone)
+#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
+
+/*
+ * prev_task <- ia64_switch_to(struct task_struct *next)
+ *	With Ingo's new scheduler, interrupts are disabled when this routine gets
+ *	called.  The code starting at .map relies on this.  The rest of the code
+ *	doesn't care about the interrupt masking status.
+ */
+GLOBAL_ENTRY(__paravirt_switch_to)
+	.prologue
+	alloc r16=ar.pfs,1,0,0,0
+	DO_SAVE_SWITCH_STACK
+	.body
+
+	adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
+	movl r25=init_task
+	mov r27=IA64_KR(CURRENT_STACK)
+	adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
+	dep r20=0,in0,61,3		// physical address of "next"
+	;;
+	st8 [r22]=sp			// save kernel stack pointer of old task
+	shr.u r26=r20,IA64_GRANULE_SHIFT
+	cmp.eq p7,p6=r25,in0
+	;;
+	/*
+	 * If we've already mapped this task's page, we can skip doing it again.
+	 */
+(p6)	cmp.eq p7,p6=r26,r27
+(p6)	br.cond.dpnt .map
+	;;
+.done:
+	ld8 sp=[r21]			// load kernel stack pointer of new task
+	MOV_TO_KR(CURRENT, in0, r8, r9)		// update "current" application register
+	mov r8=r13			// return pointer to previously running task
+	mov r13=in0			// set "current" pointer
+	;;
+	DO_LOAD_SWITCH_STACK
+
+#ifdef CONFIG_SMP
+	sync.i				// ensure "fc"s done by this CPU are visible on other CPUs
+#endif
+	br.ret.sptk.many rp		// boogie on out in new context
+
+.map:
+	RSM_PSR_IC(r25)			// interrupts (psr.i) are already disabled here
+	movl r25=PAGE_KERNEL
+	;;
+	srlz.d
+	or r23=r25,r20			// construct PA | page properties
+	mov r25=IA64_GRANULE_SHIFT<<2
+	;;
+	MOV_TO_ITIR(p0, r25, r8)
+	MOV_TO_IFA(in0, r8)		// VA of next task...
+	;;
+	mov r25=IA64_TR_CURRENT_STACK
+	MOV_TO_KR(CURRENT_STACK, r26, r8, r9)	// remember last page we mapped...
+	;;
+	itr.d dtr[r25]=r23		// wire in new mapping...
+	SSM_PSR_IC_AND_SRLZ_D(r8, r9)	// reenable the psr.ic bit
+	br.cond.sptk .done
+END(__paravirt_switch_to)
+
+#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE
+/*
+ * Note that interrupts are enabled during save_switch_stack and load_switch_stack.  This
+ * means that we may get an interrupt with "sp" pointing to the new kernel stack while
+ * ar.bspstore is still pointing to the old kernel backing store area.  Since ar.rsc,
+ * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts, this is not a
+ * problem.  Also, we don't need to specify unwind information for preserved registers
+ * that are not modified in save_switch_stack as the right unwind information is already
+ * specified at the call-site of save_switch_stack.
+ */
+
+/*
+ * save_switch_stack:
+ *	- r16 holds ar.pfs
+ *	- b7 holds address to return to
+ *	- rp (b0) holds return address to save
+ */
+GLOBAL_ENTRY(save_switch_stack)
+	.prologue
+	.altrp b7
+	flushrs			// flush dirty regs to backing store (must be first in insn group)
+	.save @priunat,r17
+	mov r17=ar.unat		// preserve caller's
+	.body
+#ifdef CONFIG_ITANIUM
+	adds r2=16+128,sp
+	adds r3=16+64,sp
+	adds r14=SW(R4)+16,sp
+	;;
+	st8.spill [r14]=r4,16		// spill r4
+	lfetch.fault.excl.nt1 [r3],128
+	;;
+	lfetch.fault.excl.nt1 [r2],128
+	lfetch.fault.excl.nt1 [r3],128
+	;;
+	lfetch.fault.excl [r2]
+	lfetch.fault.excl [r3]
+	adds r15=SW(R5)+16,sp
+#else
+	add r2=16+3*128,sp
+	add r3=16,sp
+	add r14=SW(R4)+16,sp
+	;;
+	st8.spill [r14]=r4,SW(R6)-SW(R4)	// spill r4 and prefetch offset 0x1c0
+	lfetch.fault.excl.nt1 [r3],128	//		prefetch offset 0x010
+	;;
+	lfetch.fault.excl.nt1 [r3],128	//		prefetch offset 0x090
+	lfetch.fault.excl.nt1 [r2],128	//		prefetch offset 0x190
+	;;
+	lfetch.fault.excl.nt1 [r3]	//		prefetch offset 0x110
+	lfetch.fault.excl.nt1 [r2]	//		prefetch offset 0x210
+	adds r15=SW(R5)+16,sp
+#endif
+	;;
+	st8.spill [r15]=r5,SW(R7)-SW(R5)	// spill r5
+	mov.m ar.rsc=0			// put RSE in mode: enforced lazy, little endian, pl 0
+	add r2=SW(F2)+16,sp		// r2 = &sw->f2
+	;;
+	st8.spill [r14]=r6,SW(B0)-SW(R6)	// spill r6
+	mov.m r18=ar.fpsr		// preserve fpsr
+	add r3=SW(F3)+16,sp		// r3 = &sw->f3
+	;;
+	stf.spill [r2]=f2,32
+	mov.m r19=ar.rnat
+	mov r21=b0
+
+	stf.spill [r3]=f3,32
+	st8.spill [r15]=r7,SW(B2)-SW(R7)	// spill r7
+	mov r22=b1
+	;;
+	// since we're done with the spills, read and save ar.unat:
+	mov.m r29=ar.unat
+	mov.m r20=ar.bspstore
+	mov r23=b2
+	stf.spill [r2]=f4,32
+	stf.spill [r3]=f5,32
+	mov r24=b3
+	;;
+	st8 [r14]=r21,SW(B1)-SW(B0)		// save b0
+	st8 [r15]=r23,SW(B3)-SW(B2)		// save b2
+	mov r25=b4
+	mov r26=b5
+	;;
+	st8 [r14]=r22,SW(B4)-SW(B1)		// save b1
+	st8 [r15]=r24,SW(AR_PFS)-SW(B3)		// save b3
+	mov r21=ar.lc		// I-unit
+	stf.spill [r2]=f12,32
+	stf.spill [r3]=f13,32
+	;;
+	st8 [r14]=r25,SW(B5)-SW(B4)		// save b4
+	st8 [r15]=r16,SW(AR_LC)-SW(AR_PFS)	// save ar.pfs
+	stf.spill [r2]=f14,32
+	stf.spill [r3]=f15,32
+	;;
+	st8 [r14]=r26				// save b5
+	st8 [r15]=r21				// save ar.lc
+	stf.spill [r2]=f16,32
+	stf.spill [r3]=f17,32
+	;;
+	stf.spill [r2]=f18,32
+	stf.spill [r3]=f19,32
+	;;
+	stf.spill [r2]=f20,32
+	stf.spill [r3]=f21,32
+	;;
+	stf.spill [r2]=f22,32
+	stf.spill [r3]=f23,32
+	;;
+	stf.spill [r2]=f24,32
+	stf.spill [r3]=f25,32
+	;;
+	stf.spill [r2]=f26,32
+	stf.spill [r3]=f27,32
+	;;
+	stf.spill [r2]=f28,32
+	stf.spill [r3]=f29,32
+	;;
+	stf.spill [r2]=f30,SW(AR_UNAT)-SW(F30)
+	stf.spill [r3]=f31,SW(PR)-SW(F31)
+	add r14=SW(CALLER_UNAT)+16,sp
+	;;
+	st8 [r2]=r29,SW(AR_RNAT)-SW(AR_UNAT)	// save ar.unat
+	st8 [r14]=r17,SW(AR_FPSR)-SW(CALLER_UNAT) // save caller_unat
+	mov r21=pr
+	;;
+	st8 [r2]=r19,SW(AR_BSPSTORE)-SW(AR_RNAT) // save ar.rnat
+	st8 [r3]=r21				// save predicate registers
+	;;
+	st8 [r2]=r20				// save ar.bspstore
+	st8 [r14]=r18				// save fpsr
+	mov ar.rsc=3		// put RSE back into eager mode, pl 0
+	br.cond.sptk.many b7
+END(save_switch_stack)
+
+/*
+ * load_switch_stack:
+ *	- "invala" MUST be done at call site (normally in DO_LOAD_SWITCH_STACK)
+ *	- b7 holds address to return to
+ *	- must not touch r8-r11
+ */
+GLOBAL_ENTRY(load_switch_stack)
+	.prologue
+	.altrp b7
+
+	.body
+	lfetch.fault.nt1 [sp]
+	adds r2=SW(AR_BSPSTORE)+16,sp
+	adds r3=SW(AR_UNAT)+16,sp
+	mov ar.rsc=0						// put RSE into enforced lazy mode
+	adds r14=SW(CALLER_UNAT)+16,sp
+	adds r15=SW(AR_FPSR)+16,sp
+	;;
+	ld8 r27=[r2],(SW(B0)-SW(AR_BSPSTORE))	// bspstore
+	ld8 r29=[r3],(SW(B1)-SW(AR_UNAT))	// unat
+	;;
+	ld8 r21=[r2],16		// restore b0
+	ld8 r22=[r3],16		// restore b1
+	;;
+	ld8 r23=[r2],16		// restore b2
+	ld8 r24=[r3],16		// restore b3
+	;;
+	ld8 r25=[r2],16		// restore b4
+	ld8 r26=[r3],16		// restore b5
+	;;
+	ld8 r16=[r2],(SW(PR)-SW(AR_PFS))	// ar.pfs
+	ld8 r17=[r3],(SW(AR_RNAT)-SW(AR_LC))	// ar.lc
+	;;
+	ld8 r28=[r2]		// restore pr
+	ld8 r30=[r3]		// restore rnat
+	;;
+	ld8 r18=[r14],16	// restore caller's unat
+	ld8 r19=[r15],24	// restore fpsr
+	;;
+	ldf.fill f2=[r14],32
+	ldf.fill f3=[r15],32
+	;;
+	ldf.fill f4=[r14],32
+	ldf.fill f5=[r15],32
+	;;
+	ldf.fill f12=[r14],32
+	ldf.fill f13=[r15],32
+	;;
+	ldf.fill f14=[r14],32
+	ldf.fill f15=[r15],32
+	;;
+	ldf.fill f16=[r14],32
+	ldf.fill f17=[r15],32
+	;;
+	ldf.fill f18=[r14],32
+	ldf.fill f19=[r15],32
+	mov b0=r21
+	;;
+	ldf.fill f20=[r14],32
+	ldf.fill f21=[r15],32
+	mov b1=r22
+	;;
+	ldf.fill f22=[r14],32
+	ldf.fill f23=[r15],32
+	mov b2=r23
+	;;
+	mov ar.bspstore=r27
+	mov ar.unat=r29		// establish unat holding the NaT bits for r4-r7
+	mov b3=r24
+	;;
+	ldf.fill f24=[r14],32
+	ldf.fill f25=[r15],32
+	mov b4=r25
+	;;
+	ldf.fill f26=[r14],32
+	ldf.fill f27=[r15],32
+	mov b5=r26
+	;;
+	ldf.fill f28=[r14],32
+	ldf.fill f29=[r15],32
+	mov ar.pfs=r16
+	;;
+	ldf.fill f30=[r14],32
+	ldf.fill f31=[r15],24
+	mov ar.lc=r17
+	;;
+	ld8.fill r4=[r14],16
+	ld8.fill r5=[r15],16
+	mov pr=r28,-1
+	;;
+	ld8.fill r6=[r14],16
+	ld8.fill r7=[r15],16
+
+	mov ar.unat=r18				// restore caller's unat
+	mov ar.rnat=r30				// must restore after bspstore but before rsc!
+	mov ar.fpsr=r19				// restore fpsr
+	mov ar.rsc=3				// put RSE back into eager mode, pl 0
+	br.cond.sptk.many b7
+END(load_switch_stack)
+
+GLOBAL_ENTRY(prefetch_stack)
+	add r14 = -IA64_SWITCH_STACK_SIZE, sp
+	add r15 = IA64_TASK_THREAD_KSP_OFFSET, in0
+	;;
+	ld8 r16 = [r15]				// load next's stack pointer
+	lfetch.fault.excl [r14], 128
+	;;
+	lfetch.fault.excl [r14], 128
+	lfetch.fault [r16], 128
+	;;
+	lfetch.fault.excl [r14], 128
+	lfetch.fault [r16], 128
+	;;
+	lfetch.fault.excl [r14], 128
+	lfetch.fault [r16], 128
+	;;
+	lfetch.fault.excl [r14], 128
+	lfetch.fault [r16], 128
+	;;
+	lfetch.fault [r16], 128
+	br.ret.sptk.many rp
+END(prefetch_stack)
+
+GLOBAL_ENTRY(kernel_execve)
+	rum psr.ac
+	mov r15=__NR_execve			// put syscall number in place
+	break __BREAK_SYSCALL
+	br.ret.sptk.many rp
+END(kernel_execve)
+
+GLOBAL_ENTRY(clone)
+	mov r15=__NR_clone			// put syscall number in place
+	break __BREAK_SYSCALL
+	br.ret.sptk.many rp
+END(clone)
+
+	/*
+	 * Invoke a system call, but do some tracing before and after the call.
+	 * We MUST preserve the current register frame throughout this routine
+	 * because some system calls (such as ia64_execve) directly
+	 * manipulate ar.pfs.
+	 */
+GLOBAL_ENTRY(ia64_trace_syscall)
+	PT_REGS_UNWIND_INFO(0)
+	/*
+	 * We need to preserve the scratch registers f6-f11 in case the system
+	 * call is sigreturn.
+	 */
+	adds r16=PT(F6)+16,sp
+	adds r17=PT(F7)+16,sp
+	;;
+ 	stf.spill [r16]=f6,32
+ 	stf.spill [r17]=f7,32
+	;;
+ 	stf.spill [r16]=f8,32
+ 	stf.spill [r17]=f9,32
+	;;
+ 	stf.spill [r16]=f10
+ 	stf.spill [r17]=f11
+	br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args
+	cmp.lt p6,p0=r8,r0			// check tracehook
+	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
+	adds r3=PT(R10)+16,sp			// r3 = &pt_regs.r10
+	mov r10=0
+(p6)	br.cond.sptk strace_error		// syscall failed ->
+	adds r16=PT(F6)+16,sp
+	adds r17=PT(F7)+16,sp
+	;;
+	ldf.fill f6=[r16],32
+	ldf.fill f7=[r17],32
+	;;
+	ldf.fill f8=[r16],32
+	ldf.fill f9=[r17],32
+	;;
+	ldf.fill f10=[r16]
+	ldf.fill f11=[r17]
+	// the syscall number may have changed, so re-load it and re-calculate the
+	// syscall entry-point:
+	adds r15=PT(R15)+16,sp			// r15 = &pt_regs.r15 (syscall #)
+	;;
+	ld8 r15=[r15]
+	mov r3=NR_syscalls - 1
+	;;
+	adds r15=-1024,r15
+	movl r16=sys_call_table
+	;;
+	shladd r20=r15,3,r16			// r20 = sys_call_table + 8*(syscall-1024)
+	cmp.leu p6,p7=r15,r3
+	;;
+(p6)	ld8 r20=[r20]				// load address of syscall entry point
+(p7)	movl r20=sys_ni_syscall
+	;;
+	mov b6=r20
+	br.call.sptk.many rp=b6			// do the syscall
+.strace_check_retval:
+	cmp.lt p6,p0=r8,r0			// syscall failed?
+	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
+	adds r3=PT(R10)+16,sp			// r3 = &pt_regs.r10
+	mov r10=0
+(p6)	br.cond.sptk strace_error		// syscall failed ->
+	;;					// avoid RAW on r10
+.strace_save_retval:
+.mem.offset 0,0; st8.spill [r2]=r8		// store return value in slot for r8
+.mem.offset 8,0; st8.spill [r3]=r10		// clear error indication in slot for r10
+	br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
+.ret3:
+(pUStk)	cmp.eq.unc p6,p0=r0,r0			// p6 <- pUStk
+(pUStk)	rsm psr.i				// disable interrupts
+	br.cond.sptk ia64_work_pending_syscall_end
+
+strace_error:
+	ld8 r3=[r2]				// load pt_regs.r8
+	sub r9=0,r8				// negate return value to get errno value
+	;;
+	cmp.ne p6,p0=r3,r0			// is pt_regs.r8!=0?
+	adds r3=16,r2				// r3=&pt_regs.r10
+	;;
+(p6)	mov r10=-1
+(p6)	mov r8=r9
+	br.cond.sptk .strace_save_retval
+END(ia64_trace_syscall)
+
+	/*
+	 * When traced and returning from sigreturn, we invoke syscall_trace but then
+	 * go straight to ia64_leave_kernel rather than ia64_leave_syscall.
+	 */
+GLOBAL_ENTRY(ia64_strace_leave_kernel)
+	PT_REGS_UNWIND_INFO(0)
+{	/*
+	 * Some versions of gas generate bad unwind info if the first instruction of a
+	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
+	 */
+	nop.m 0
+	nop.i 0
+	br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
+}
+.ret4:	br.cond.sptk ia64_leave_kernel
+END(ia64_strace_leave_kernel)
+
+GLOBAL_ENTRY(ia64_ret_from_clone)
+	PT_REGS_UNWIND_INFO(0)
+{	/*
+	 * Some versions of gas generate bad unwind info if the first instruction of a
+	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
+	 */
+	nop.m 0
+	nop.i 0
+	/*
+	 * We need to call schedule_tail() to complete the scheduling process.
+	 * Called by ia64_switch_to() after do_fork()->copy_thread().  r8 contains the
+	 * address of the previously executing task.
+	 */
+	br.call.sptk.many rp=ia64_invoke_schedule_tail
+}
+.ret8:
+	adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
+	;;
+	ld4 r2=[r2]
+	;;
+	mov r8=0
+	and r2=_TIF_SYSCALL_TRACEAUDIT,r2
+	;;
+	cmp.ne p6,p0=r2,r0
+(p6)	br.cond.spnt .strace_check_retval
+	;;					// added stop bits to prevent r8 dependency
+END(ia64_ret_from_clone)
+	// fall through
+GLOBAL_ENTRY(ia64_ret_from_syscall)
+	PT_REGS_UNWIND_INFO(0)
+	cmp.ge p6,p7=r8,r0			// syscall executed successfully?
+	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
+	mov r10=r0				// clear error indication in r10
+(p7)	br.cond.spnt handle_syscall_error	// handle potential syscall failure
+#ifdef CONFIG_PARAVIRT
+	;;
+	br.cond.sptk.few ia64_leave_syscall
+	;;
+#endif /* CONFIG_PARAVIRT */
+END(ia64_ret_from_syscall)
+#ifndef CONFIG_PARAVIRT
+	// fall through
+#endif
+#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
+
+/*
+ * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't
+ *	need to switch to bank 0 and doesn't restore the scratch registers.
+ *	To avoid leaking kernel bits, the scratch registers are set to
+ *	the following known-to-be-safe values:
+ *
+ *		  r1: restored (global pointer)
+ *		  r2: cleared
+ *		  r3: 1 (when returning to user-level)
+ *	      r8-r11: restored (syscall return value(s))
+ *		 r12: restored (user-level stack pointer)
+ *		 r13: restored (user-level thread pointer)
+ *		 r14: set to __kernel_syscall_via_epc
+ *		 r15: restored (syscall #)
+ *	     r16-r17: cleared
+ *		 r18: user-level b6
+ *		 r19: cleared
+ *		 r20: user-level ar.fpsr
+ *		 r21: user-level b0
+ *		 r22: cleared
+ *		 r23: user-level ar.bspstore
+ *		 r24: user-level ar.rnat
+ *		 r25: user-level ar.unat
+ *		 r26: user-level ar.pfs
+ *		 r27: user-level ar.rsc
+ *		 r28: user-level ip
+ *		 r29: user-level psr
+ *		 r30: user-level cfm
+ *		 r31: user-level pr
+ *	      f6-f11: cleared
+ *		  pr: restored (user-level pr)
+ *		  b0: restored (user-level rp)
+ *	          b6: restored
+ *		  b7: set to __kernel_syscall_via_epc
+ *	     ar.unat: restored (user-level ar.unat)
+ *	      ar.pfs: restored (user-level ar.pfs)
+ *	      ar.rsc: restored (user-level ar.rsc)
+ *	     ar.rnat: restored (user-level ar.rnat)
+ *	 ar.bspstore: restored (user-level ar.bspstore)
+ *	     ar.fpsr: restored (user-level ar.fpsr)
+ *	      ar.ccv: cleared
+ *	      ar.csd: cleared
+ *	      ar.ssd: cleared
+ */
+GLOBAL_ENTRY(__paravirt_leave_syscall)
+	PT_REGS_UNWIND_INFO(0)
+	/*
+	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
+	 * user- or fsys-mode, hence we disable interrupts early on.
+	 *
+	 * p6 controls whether current_thread_info()->flags needs to be check for
+	 * extra work.  We always check for extra work when returning to user-level.
+	 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
+	 * is 0.  After extra work processing has been completed, execution
+	 * resumes at ia64_work_processed_syscall with p6 set to 1 if the extra-work-check
+	 * needs to be redone.
+	 */
+#ifdef CONFIG_PREEMPT
+	RSM_PSR_I(p0, r2, r18)			// disable interrupts
+	cmp.eq pLvSys,p0=r0,r0			// pLvSys=1: leave from syscall
+(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
+	;;
+	.pred.rel.mutex pUStk,pKStk
+(pKStk) ld4 r21=[r20]			// r21 <- preempt_count
+(pUStk)	mov r21=0			// r21 <- 0
+	;;
+	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
+#else /* !CONFIG_PREEMPT */
+	RSM_PSR_I(pUStk, r2, r18)
+	cmp.eq pLvSys,p0=r0,r0		// pLvSys=1: leave from syscall
+(pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
+#endif
+.global __paravirt_work_processed_syscall;
+__paravirt_work_processed_syscall:
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	adds r2=PT(LOADRS)+16,r12
+	MOV_FROM_ITC(pUStk, p9, r22, r19)	// fetch time at leave
+	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
+	;;
+(p6)	ld4 r31=[r18]				// load current_thread_info()->flags
+	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
+	adds r3=PT(AR_BSPSTORE)+16,r12		// deferred
+	;;
+#else
+	adds r2=PT(LOADRS)+16,r12
+	adds r3=PT(AR_BSPSTORE)+16,r12
+	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
+	;;
+(p6)	ld4 r31=[r18]				// load current_thread_info()->flags
+	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
+	nop.i 0
+	;;
+#endif
+	mov r16=ar.bsp				// M2  get existing backing store pointer
+	ld8 r18=[r2],PT(R9)-PT(B6)		// load b6
+(p6)	and r15=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
+	;;
+	ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE)	// load ar.bspstore (may be garbage)
+(p6)	cmp4.ne.unc p6,p0=r15, r0		// any special work pending?
+(p6)	br.cond.spnt .work_pending_syscall
+	;;
+	// start restoring the state saved on the kernel stack (struct pt_regs):
+	ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
+	ld8 r11=[r3],PT(CR_IIP)-PT(R11)
+(pNonSys) break 0		//      bug check: we shouldn't be here if pNonSys is TRUE!
+	;;
+	invala			// M0|1 invalidate ALAT
+	RSM_PSR_I_IC(r28, r29, r30)	// M2   turn off interrupts and interruption collection
+	cmp.eq p9,p0=r0,r0	// A    set p9 to indicate that we should restore cr.ifs
+
+	ld8 r29=[r2],16		// M0|1 load cr.ipsr
+	ld8 r28=[r3],16		// M0|1 load cr.iip
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
+	;;
+	ld8 r30=[r2],16		// M0|1 load cr.ifs
+	ld8 r25=[r3],16		// M0|1 load ar.unat
+(pUStk) add r15=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
+	;;
+#else
+	mov r22=r0		// A    clear r22
+	;;
+	ld8 r30=[r2],16		// M0|1 load cr.ifs
+	ld8 r25=[r3],16		// M0|1 load ar.unat
+(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
+	;;
+#endif
+	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
+	MOV_FROM_PSR(pKStk, r22, r21)	// M2   read PSR now that interrupts are disabled
+	nop 0
+	;;
+	ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
+	ld8 r27=[r3],PT(PR)-PT(AR_RSC)	// M0|1 load ar.rsc
+	mov f6=f0			// F    clear f6
+	;;
+	ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT)	// M0|1 load ar.rnat (may be garbage)
+	ld8 r31=[r3],PT(R1)-PT(PR)		// M0|1 load predicates
+	mov f7=f0				// F    clear f7
+	;;
+	ld8 r20=[r2],PT(R12)-PT(AR_FPSR)	// M0|1 load ar.fpsr
+	ld8.fill r1=[r3],16			// M0|1 load r1
+(pUStk) mov r17=1				// A
+	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk) st1 [r15]=r17				// M2|3
+#else
+(pUStk) st1 [r14]=r17				// M2|3
+#endif
+	ld8.fill r13=[r3],16			// M0|1
+	mov f8=f0				// F    clear f8
+	;;
+	ld8.fill r12=[r2]			// M0|1 restore r12 (sp)
+	ld8.fill r15=[r3]			// M0|1 restore r15
+	mov b6=r18				// I0   restore b6
+
+	LOAD_PHYS_STACK_REG_SIZE(r17)
+	mov f9=f0					// F    clear f9
+(pKStk) br.cond.dpnt.many skip_rbs_switch		// B
+
+	srlz.d				// M0   ensure interruption collection is off (for cover)
+	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
+	COVER				// B    add current frame into dirty partition & set cr.ifs
+	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	mov r19=ar.bsp			// M2   get new backing store pointer
+	st8 [r14]=r22			// M	save time at leave
+	mov f10=f0			// F    clear f10
+
+	mov r22=r0			// A	clear r22
+	movl r14=__kernel_syscall_via_epc // X
+	;;
+#else
+	mov r19=ar.bsp			// M2   get new backing store pointer
+	mov f10=f0			// F    clear f10
+
+	nop.m 0
+	movl r14=__kernel_syscall_via_epc // X
+	;;
+#endif
+	mov.m ar.csd=r0			// M2   clear ar.csd
+	mov.m ar.ccv=r0			// M2   clear ar.ccv
+	mov b7=r14			// I0   clear b7 (hint with __kernel_syscall_via_epc)
+
+	mov.m ar.ssd=r0			// M2   clear ar.ssd
+	mov f11=f0			// F    clear f11
+	br.cond.sptk.many rbs_switch	// B
+END(__paravirt_leave_syscall)
+
+GLOBAL_ENTRY(__paravirt_leave_kernel)
+	PT_REGS_UNWIND_INFO(0)
+	/*
+	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
+	 * user- or fsys-mode, hence we disable interrupts early on.
+	 *
+	 * p6 controls whether current_thread_info()->flags needs to be check for
+	 * extra work.  We always check for extra work when returning to user-level.
+	 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
+	 * is 0.  After extra work processing has been completed, execution
+	 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
+	 * needs to be redone.
+	 */
+#ifdef CONFIG_PREEMPT
+	RSM_PSR_I(p0, r17, r31)			// disable interrupts
+	cmp.eq p0,pLvSys=r0,r0			// pLvSys=0: leave from kernel
+(pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
+	;;
+	.pred.rel.mutex pUStk,pKStk
+(pKStk)	ld4 r21=[r20]			// r21 <- preempt_count
+(pUStk)	mov r21=0			// r21 <- 0
+	;;
+	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
+#else
+	RSM_PSR_I(pUStk, r17, r31)
+	cmp.eq p0,pLvSys=r0,r0		// pLvSys=0: leave from kernel
+(pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
+#endif
+.work_processed_kernel:
+	adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
+	;;
+(p6)	ld4 r31=[r17]				// load current_thread_info()->flags
+	adds r21=PT(PR)+16,r12
+	;;
+
+	lfetch [r21],PT(CR_IPSR)-PT(PR)
+	adds r2=PT(B6)+16,r12
+	adds r3=PT(R16)+16,r12
+	;;
+	lfetch [r21]
+	ld8 r28=[r2],8		// load b6
+	adds r29=PT(R24)+16,r12
+
+	ld8.fill r16=[r3],PT(AR_CSD)-PT(R16)
+	adds r30=PT(AR_CCV)+16,r12
+(p6)	and r19=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
+	;;
+	ld8.fill r24=[r29]
+	ld8 r15=[r30]		// load ar.ccv
+(p6)	cmp4.ne.unc p6,p0=r19, r0		// any special work pending?
+	;;
+	ld8 r29=[r2],16		// load b7
+	ld8 r30=[r3],16		// load ar.csd
+(p6)	br.cond.spnt .work_pending
+	;;
+	ld8 r31=[r2],16		// load ar.ssd
+	ld8.fill r8=[r3],16
+	;;
+	ld8.fill r9=[r2],16
+	ld8.fill r10=[r3],PT(R17)-PT(R10)
+	;;
+	ld8.fill r11=[r2],PT(R18)-PT(R11)
+	ld8.fill r17=[r3],16
+	;;
+	ld8.fill r18=[r2],16
+	ld8.fill r19=[r3],16
+	;;
+	ld8.fill r20=[r2],16
+	ld8.fill r21=[r3],16
+	mov ar.csd=r30
+	mov ar.ssd=r31
+	;;
+	RSM_PSR_I_IC(r23, r22, r25)	// initiate turning off of interrupt and interruption collection
+	invala			// invalidate ALAT
+	;;
+	ld8.fill r22=[r2],24
+	ld8.fill r23=[r3],24
+	mov b6=r28
+	;;
+	ld8.fill r25=[r2],16
+	ld8.fill r26=[r3],16
+	mov b7=r29
+	;;
+	ld8.fill r27=[r2],16
+	ld8.fill r28=[r3],16
+	;;
+	ld8.fill r29=[r2],16
+	ld8.fill r30=[r3],24
+	;;
+	ld8.fill r31=[r2],PT(F9)-PT(R31)
+	adds r3=PT(F10)-PT(F6),r3
+	;;
+	ldf.fill f9=[r2],PT(F6)-PT(F9)
+	ldf.fill f10=[r3],PT(F8)-PT(F10)
+	;;
+	ldf.fill f6=[r2],PT(F7)-PT(F6)
+	;;
+	ldf.fill f7=[r2],PT(F11)-PT(F7)
+	ldf.fill f8=[r3],32
+	;;
+	srlz.d	// ensure that inter. collection is off (VHPT is don't care, since text is pinned)
+	mov ar.ccv=r15
+	;;
+	ldf.fill f11=[r2]
+	BSW_0(r2, r3, r15)	// switch back to bank 0 (no stop bit required beforehand...)
+	;;
+(pUStk)	mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency)
+	adds r16=PT(CR_IPSR)+16,r12
+	adds r17=PT(CR_IIP)+16,r12
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	.pred.rel.mutex pUStk,pKStk
+	MOV_FROM_PSR(pKStk, r22, r29)	// M2 read PSR now that interrupts are disabled
+	MOV_FROM_ITC(pUStk, p9, r22, r29)	// M  fetch time at leave
+	nop.i 0
+	;;
+#else
+	MOV_FROM_PSR(pKStk, r22, r29)	// M2 read PSR now that interrupts are disabled
+	nop.i 0
+	nop.i 0
+	;;
+#endif
+	ld8 r29=[r16],16	// load cr.ipsr
+	ld8 r28=[r17],16	// load cr.iip
+	;;
+	ld8 r30=[r16],16	// load cr.ifs
+	ld8 r25=[r17],16	// load ar.unat
+	;;
+	ld8 r26=[r16],16	// load ar.pfs
+	ld8 r27=[r17],16	// load ar.rsc
+	cmp.eq p9,p0=r0,r0	// set p9 to indicate that we should restore cr.ifs
+	;;
+	ld8 r24=[r16],16	// load ar.rnat (may be garbage)
+	ld8 r23=[r17],16	// load ar.bspstore (may be garbage)
+	;;
+	ld8 r31=[r16],16	// load predicates
+	ld8 r21=[r17],16	// load b0
+	;;
+	ld8 r19=[r16],16	// load ar.rsc value for "loadrs"
+	ld8.fill r1=[r17],16	// load r1
+	;;
+	ld8.fill r12=[r16],16
+	ld8.fill r13=[r17],16
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk)	adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
+#else
+(pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
+#endif
+	;;
+	ld8 r20=[r16],16	// ar.fpsr
+	ld8.fill r15=[r17],16
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18	// deferred
+#endif
+	;;
+	ld8.fill r14=[r16],16
+	ld8.fill r2=[r17]
+(pUStk)	mov r17=1
+	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	//  mmi_ :  ld8 st1 shr;;         mmi_ : st8 st1 shr;;
+	//  mib  :  mov add br        ->  mib  : ld8 add br
+	//  bbb_ :  br  nop cover;;       mbb_ : mov br  cover;;
+	//
+	//  no one require bsp in r16 if (pKStk) branch is selected.
+(pUStk)	st8 [r3]=r22		// save time at leave
+(pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
+	shr.u r18=r19,16	// get byte size of existing "dirty" partition
+	;;
+	ld8.fill r3=[r16]	// deferred
+	LOAD_PHYS_STACK_REG_SIZE(r17)
+(pKStk)	br.cond.dpnt skip_rbs_switch
+	mov r16=ar.bsp		// get existing backing store pointer
+#else
+	ld8.fill r3=[r16]
+(pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
+	shr.u r18=r19,16	// get byte size of existing "dirty" partition
+	;;
+	mov r16=ar.bsp		// get existing backing store pointer
+	LOAD_PHYS_STACK_REG_SIZE(r17)
+(pKStk)	br.cond.dpnt skip_rbs_switch
+#endif
+
+	/*
+	 * Restore user backing store.
+	 *
+	 * NOTE: alloc, loadrs, and cover can't be predicated.
+	 */
+(pNonSys) br.cond.dpnt dont_preserve_current_frame
+	COVER				// add current frame into dirty partition and set cr.ifs
+	;;
+	mov r19=ar.bsp			// get new backing store pointer
+rbs_switch:
+	sub r16=r16,r18			// krbs = old bsp - size of dirty partition
+	cmp.ne p9,p0=r0,r0		// clear p9 to skip restore of cr.ifs
+	;;
+	sub r19=r19,r16			// calculate total byte size of dirty partition
+	add r18=64,r18			// don't force in0-in7 into memory...
+	;;
+	shl r19=r19,16			// shift size of dirty partition into loadrs position
+	;;
+dont_preserve_current_frame:
+	/*
+	 * To prevent leaking bits between the kernel and user-space,
+	 * we must clear the stacked registers in the "invalid" partition here.
+	 * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium,
+	 * 5 registers/cycle on McKinley).
+	 */
+#	define pRecurse	p6
+#	define pReturn	p7
+#ifdef CONFIG_ITANIUM
+#	define Nregs	10
+#else
+#	define Nregs	14
+#endif
+	alloc loc0=ar.pfs,2,Nregs-2,2,0
+	shr.u loc1=r18,9		// RNaTslots <= floor(dirtySize / (64*8))
+	sub r17=r17,r18			// r17 = (physStackedSize + 8) - dirtySize
+	;;
+	mov ar.rsc=r19			// load ar.rsc to be used for "loadrs"
+	shladd in0=loc1,3,r17
+	mov in1=0
+	;;
+	TEXT_ALIGN(32)
+rse_clear_invalid:
+#ifdef CONFIG_ITANIUM
+	// cycle 0
+ { .mii
+	alloc loc0=ar.pfs,2,Nregs-2,2,0
+	cmp.lt pRecurse,p0=Nregs*8,in0	// if more than Nregs regs left to clear, (re)curse
+	add out0=-Nregs*8,in0
+}{ .mfb
+	add out1=1,in1			// increment recursion count
+	nop.f 0
+	nop.b 0				// can't do br.call here because of alloc (WAW on CFM)
+	;;
+}{ .mfi	// cycle 1
+	mov loc1=0
+	nop.f 0
+	mov loc2=0
+}{ .mib
+	mov loc3=0
+	mov loc4=0
+(pRecurse) br.call.sptk.many b0=rse_clear_invalid
+
+}{ .mfi	// cycle 2
+	mov loc5=0
+	nop.f 0
+	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
+}{ .mib
+	mov loc6=0
+	mov loc7=0
+(pReturn) br.ret.sptk.many b0
+}
+#else /* !CONFIG_ITANIUM */
+	alloc loc0=ar.pfs,2,Nregs-2,2,0
+	cmp.lt pRecurse,p0=Nregs*8,in0	// if more than Nregs regs left to clear, (re)curse
+	add out0=-Nregs*8,in0
+	add out1=1,in1			// increment recursion count
+	mov loc1=0
+	mov loc2=0
+	;;
+	mov loc3=0
+	mov loc4=0
+	mov loc5=0
+	mov loc6=0
+	mov loc7=0
+(pRecurse) br.call.dptk.few b0=rse_clear_invalid
+	;;
+	mov loc8=0
+	mov loc9=0
+	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
+	mov loc10=0
+	mov loc11=0
+(pReturn) br.ret.dptk.many b0
+#endif /* !CONFIG_ITANIUM */
+#	undef pRecurse
+#	undef pReturn
+	;;
+	alloc r17=ar.pfs,0,0,0,0	// drop current register frame
+	;;
+	loadrs
+	;;
+skip_rbs_switch:
+	mov ar.unat=r25		// M2
+(pKStk)	extr.u r22=r22,21,1	// I0 extract current value of psr.pp from r22
+(pLvSys)mov r19=r0		// A  clear r19 for leave_syscall, no-op otherwise
+	;;
+(pUStk)	mov ar.bspstore=r23	// M2
+(pKStk)	dep r29=r22,r29,21,1	// I0 update ipsr.pp with psr.pp
+(pLvSys)mov r16=r0		// A  clear r16 for leave_syscall, no-op otherwise
+	;;
+	MOV_TO_IPSR(p0, r29, r25)	// M2
+	mov ar.pfs=r26		// I0
+(pLvSys)mov r17=r0		// A  clear r17 for leave_syscall, no-op otherwise
+
+	MOV_TO_IFS(p9, r30, r25)// M2
+	mov b0=r21		// I0
+(pLvSys)mov r18=r0		// A  clear r18 for leave_syscall, no-op otherwise
+
+	mov ar.fpsr=r20		// M2
+	MOV_TO_IIP(r28, r25)	// M2
+	nop 0
+	;;
+(pUStk)	mov ar.rnat=r24		// M2 must happen with RSE in lazy mode
+	nop 0
+(pLvSys)mov r2=r0
+
+	mov ar.rsc=r27		// M2
+	mov pr=r31,-1		// I0
+	RFI			// B
+
+	/*
+	 * On entry:
+	 *	r20 = &current->thread_info->pre_count (if CONFIG_PREEMPT)
+	 *	r31 = current->thread_info->flags
+	 * On exit:
+	 *	p6 = TRUE if work-pending-check needs to be redone
+	 *
+	 * Interrupts are disabled on entry, reenabled depend on work, and
+	 * disabled on exit.
+	 */
+.work_pending_syscall:
+	add r2=-8,r2
+	add r3=-8,r3
+	;;
+	st8 [r2]=r8
+	st8 [r3]=r10
+.work_pending:
+	tbit.z p6,p0=r31,TIF_NEED_RESCHED	// is resched not needed?
+(p6)	br.cond.sptk.few .notify
+#ifdef CONFIG_PREEMPT
+(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1
+	;;
+(pKStk) st4 [r20]=r21
+#endif
+	SSM_PSR_I(p0, p6, r2)	// enable interrupts
+	br.call.spnt.many rp=schedule
+.ret9:	cmp.eq p6,p0=r0,r0	// p6 <- 1 (re-check)
+	RSM_PSR_I(p0, r2, r20)	// disable interrupts
+	;;
+#ifdef CONFIG_PREEMPT
+(pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
+	;;
+(pKStk)	st4 [r20]=r0		// preempt_count() <- 0
+#endif
+(pLvSys)br.cond.sptk.few  __paravirt_pending_syscall_end
+	br.cond.sptk.many .work_processed_kernel
+
+.notify:
+(pUStk)	br.call.spnt.many rp=notify_resume_user
+.ret10:	cmp.ne p6,p0=r0,r0	// p6 <- 0 (don't re-check)
+(pLvSys)br.cond.sptk.few  __paravirt_pending_syscall_end
+	br.cond.sptk.many .work_processed_kernel
+
+.global __paravirt_pending_syscall_end;
+__paravirt_pending_syscall_end:
+	adds r2=PT(R8)+16,r12
+	adds r3=PT(R10)+16,r12
+	;;
+	ld8 r8=[r2]
+	ld8 r10=[r3]
+	br.cond.sptk.many __paravirt_work_processed_syscall_target
+END(__paravirt_leave_kernel)
+
+#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE
+ENTRY(handle_syscall_error)
+	/*
+	 * Some system calls (e.g., ptrace, mmap) can return arbitrary values which could
+	 * lead us to mistake a negative return value as a failed syscall.  Those syscall
+	 * must deposit a non-zero value in pt_regs.r8 to indicate an error.  If
+	 * pt_regs.r8 is zero, we assume that the call completed successfully.
+	 */
+	PT_REGS_UNWIND_INFO(0)
+	ld8 r3=[r2]		// load pt_regs.r8
+	;;
+	cmp.eq p6,p7=r3,r0	// is pt_regs.r8==0?
+	;;
+(p7)	mov r10=-1
+(p7)	sub r8=0,r8		// negate return value to get errno
+	br.cond.sptk ia64_leave_syscall
+END(handle_syscall_error)
+
+	/*
+	 * Invoke schedule_tail(task) while preserving in0-in7, which may be needed
+	 * in case a system call gets restarted.
+	 */
+GLOBAL_ENTRY(ia64_invoke_schedule_tail)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+	alloc loc1=ar.pfs,8,2,1,0
+	mov loc0=rp
+	mov out0=r8				// Address of previous task
+	;;
+	br.call.sptk.many rp=schedule_tail
+.ret11:	mov ar.pfs=loc1
+	mov rp=loc0
+	br.ret.sptk.many rp
+END(ia64_invoke_schedule_tail)
+
+	/*
+	 * Setup stack and call do_notify_resume_user(), keeping interrupts
+	 * disabled.
+	 *
+	 * Note that pSys and pNonSys need to be set up by the caller.
+	 * We declare 8 input registers so the system call args get preserved,
+	 * in case we need to restart a system call.
+	 */
+GLOBAL_ENTRY(notify_resume_user)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+	alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
+	mov r9=ar.unat
+	mov loc0=rp				// save return address
+	mov out0=0				// there is no "oldset"
+	adds out1=8,sp				// out1=&sigscratch->ar_pfs
+(pSys)	mov out2=1				// out2==1 => we're in a syscall
+	;;
+(pNonSys) mov out2=0				// out2==0 => not a syscall
+	.fframe 16
+	.spillsp ar.unat, 16
+	st8 [sp]=r9,-16				// allocate space for ar.unat and save it
+	st8 [out1]=loc1,-8			// save ar.pfs, out1=&sigscratch
+	.body
+	br.call.sptk.many rp=do_notify_resume_user
+.ret15:	.restore sp
+	adds sp=16,sp				// pop scratch stack space
+	;;
+	ld8 r9=[sp]				// load new unat from sigscratch->scratch_unat
+	mov rp=loc0
+	;;
+	mov ar.unat=r9
+	mov ar.pfs=loc1
+	br.ret.sptk.many rp
+END(notify_resume_user)
+
+ENTRY(sys_rt_sigreturn)
+	PT_REGS_UNWIND_INFO(0)
+	/*
+	 * Allocate 8 input registers since ptrace() may clobber them
+	 */
+	alloc r2=ar.pfs,8,0,1,0
+	.prologue
+	PT_REGS_SAVES(16)
+	adds sp=-16,sp
+	.body
+	cmp.eq pNonSys,pSys=r0,r0		// sigreturn isn't a normal syscall...
+	;;
+	/*
+	 * leave_kernel() restores f6-f11 from pt_regs, but since the streamlined
+	 * syscall-entry path does not save them we save them here instead.  Note: we
+	 * don't need to save any other registers that are not saved by the stream-lined
+	 * syscall path, because restore_sigcontext() restores them.
+	 */
+	adds r16=PT(F6)+32,sp
+	adds r17=PT(F7)+32,sp
+	;;
+ 	stf.spill [r16]=f6,32
+ 	stf.spill [r17]=f7,32
+	;;
+ 	stf.spill [r16]=f8,32
+ 	stf.spill [r17]=f9,32
+	;;
+ 	stf.spill [r16]=f10
+ 	stf.spill [r17]=f11
+	adds out0=16,sp				// out0 = &sigscratch
+	br.call.sptk.many rp=ia64_rt_sigreturn
+.ret19:	.restore sp,0
+	adds sp=16,sp
+	;;
+	ld8 r9=[sp]				// load new ar.unat
+	mov.sptk b7=r8,ia64_native_leave_kernel
+	;;
+	mov ar.unat=r9
+	br.many b7
+END(sys_rt_sigreturn)
+
+GLOBAL_ENTRY(ia64_prepare_handle_unaligned)
+	.prologue
+	/*
+	 * r16 = fake ar.pfs, we simply need to make sure privilege is still 0
+	 */
+	mov r16=r0
+	DO_SAVE_SWITCH_STACK
+	br.call.sptk.many rp=ia64_handle_unaligned	// stack frame setup in ivt
+.ret21:	.body
+	DO_LOAD_SWITCH_STACK
+	br.cond.sptk.many rp				// goes to ia64_leave_kernel
+END(ia64_prepare_handle_unaligned)
+
+	//
+	// unw_init_running(void (*callback)(info, arg), void *arg)
+	//
+#	define EXTRA_FRAME_SIZE	((UNW_FRAME_INFO_SIZE+15)&~15)
+
+GLOBAL_ENTRY(unw_init_running)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
+	alloc loc1=ar.pfs,2,3,3,0
+	;;
+	ld8 loc2=[in0],8
+	mov loc0=rp
+	mov r16=loc1
+	DO_SAVE_SWITCH_STACK
+	.body
+
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
+	.fframe IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE
+	SWITCH_STACK_SAVES(EXTRA_FRAME_SIZE)
+	adds sp=-EXTRA_FRAME_SIZE,sp
+	.body
+	;;
+	adds out0=16,sp				// &info
+	mov out1=r13				// current
+	adds out2=16+EXTRA_FRAME_SIZE,sp	// &switch_stack
+	br.call.sptk.many rp=unw_init_frame_info
+1:	adds out0=16,sp				// &info
+	mov b6=loc2
+	mov loc2=gp				// save gp across indirect function call
+	;;
+	ld8 gp=[in0]
+	mov out1=in1				// arg
+	br.call.sptk.many rp=b6			// invoke the callback function
+1:	mov gp=loc2				// restore gp
+
+	// For now, we don't allow changing registers from within
+	// unw_init_running; if we ever want to allow that, we'd
+	// have to do a load_switch_stack here:
+	.restore sp
+	adds sp=IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE,sp
+
+	mov ar.pfs=loc1
+	mov rp=loc0
+	br.ret.sptk.many rp
+END(unw_init_running)
+
+#ifdef CONFIG_FUNCTION_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
+GLOBAL_ENTRY(_mcount)
+	br ftrace_stub
+END(_mcount)
+
+.here:
+	br.ret.sptk.many b0
+
+GLOBAL_ENTRY(ftrace_caller)
+	alloc out0 = ar.pfs, 8, 0, 4, 0
+	mov out3 = r0
+	;;
+	mov out2 = b0
+	add r3 = 0x20, r3
+	mov out1 = r1;
+	br.call.sptk.many b0 = ftrace_patch_gp
+	//this might be called from module, so we must patch gp
+ftrace_patch_gp:
+	movl gp=__gp
+	mov b0 = r3
+	;;
+.global ftrace_call;
+ftrace_call:
+{
+	.mlx
+	nop.m 0x0
+	movl r3 = .here;;
+}
+	alloc loc0 = ar.pfs, 4, 4, 2, 0
+	;;
+	mov loc1 = b0
+	mov out0 = b0
+	mov loc2 = r8
+	mov loc3 = r15
+	;;
+	adds out0 = -MCOUNT_INSN_SIZE, out0
+	mov out1 = in2
+	mov b6 = r3
+
+	br.call.sptk.many b0 = b6
+	;;
+	mov ar.pfs = loc0
+	mov b0 = loc1
+	mov r8 = loc2
+	mov r15 = loc3
+	br ftrace_stub
+	;;
+END(ftrace_caller)
+
+#else
+GLOBAL_ENTRY(_mcount)
+	movl r2 = ftrace_stub
+	movl r3 = ftrace_trace_function;;
+	ld8 r3 = [r3];;
+	ld8 r3 = [r3];;
+	cmp.eq p7,p0 = r2, r3
+(p7)	br.sptk.many ftrace_stub
+	;;
+
+	alloc loc0 = ar.pfs, 4, 4, 2, 0
+	;;
+	mov loc1 = b0
+	mov out0 = b0
+	mov loc2 = r8
+	mov loc3 = r15
+	;;
+	adds out0 = -MCOUNT_INSN_SIZE, out0
+	mov out1 = in2
+	mov b6 = r3
+
+	br.call.sptk.many b0 = b6
+	;;
+	mov ar.pfs = loc0
+	mov b0 = loc1
+	mov r8 = loc2
+	mov r15 = loc3
+	br ftrace_stub
+	;;
+END(_mcount)
+#endif
+
+GLOBAL_ENTRY(ftrace_stub)
+	mov r3 = b0
+	movl r2 = _mcount_ret_helper
+	;;
+	mov b6 = r2
+	mov b7 = r3
+	br.ret.sptk.many b6
+
+_mcount_ret_helper:
+	mov b0 = r42
+	mov r1 = r41
+	mov ar.pfs = r40
+	br b7
+END(ftrace_stub)
+
+#endif /* CONFIG_FUNCTION_TRACER */
+
+	.rodata
+	.align 8
+	.globl sys_call_table
+sys_call_table:
+	data8 sys_ni_syscall		//  This must be sys_ni_syscall!  See ivt.S.
+	data8 sys_exit				// 1025
+	data8 sys_read
+	data8 sys_write
+	data8 sys_open
+	data8 sys_close
+	data8 sys_creat				// 1030
+	data8 sys_link
+	data8 sys_unlink
+	data8 ia64_execve
+	data8 sys_chdir
+	data8 sys_fchdir			// 1035
+	data8 sys_utimes
+	data8 sys_mknod
+	data8 sys_chmod
+	data8 sys_chown
+	data8 sys_lseek				// 1040
+	data8 sys_getpid
+	data8 sys_getppid
+	data8 sys_mount
+	data8 sys_umount
+	data8 sys_setuid			// 1045
+	data8 sys_getuid
+	data8 sys_geteuid
+	data8 sys_ptrace
+	data8 sys_access
+	data8 sys_sync				// 1050
+	data8 sys_fsync
+	data8 sys_fdatasync
+	data8 sys_kill
+	data8 sys_rename
+	data8 sys_mkdir				// 1055
+	data8 sys_rmdir
+	data8 sys_dup
+	data8 sys_ia64_pipe
+	data8 sys_times
+	data8 ia64_brk				// 1060
+	data8 sys_setgid
+	data8 sys_getgid
+	data8 sys_getegid
+	data8 sys_acct
+	data8 sys_ioctl				// 1065
+	data8 sys_fcntl
+	data8 sys_umask
+	data8 sys_chroot
+	data8 sys_ustat
+	data8 sys_dup2				// 1070
+	data8 sys_setreuid
+	data8 sys_setregid
+	data8 sys_getresuid
+	data8 sys_setresuid
+	data8 sys_getresgid			// 1075
+	data8 sys_setresgid
+	data8 sys_getgroups
+	data8 sys_setgroups
+	data8 sys_getpgid
+	data8 sys_setpgid			// 1080
+	data8 sys_setsid
+	data8 sys_getsid
+	data8 sys_sethostname
+	data8 sys_setrlimit
+	data8 sys_getrlimit			// 1085
+	data8 sys_getrusage
+	data8 sys_gettimeofday
+	data8 sys_settimeofday
+	data8 sys_select
+	data8 sys_poll				// 1090
+	data8 sys_symlink
+	data8 sys_readlink
+	data8 sys_uselib
+	data8 sys_swapon
+	data8 sys_swapoff			// 1095
+	data8 sys_reboot
+	data8 sys_truncate
+	data8 sys_ftruncate
+	data8 sys_fchmod
+	data8 sys_fchown			// 1100
+	data8 ia64_getpriority
+	data8 sys_setpriority
+	data8 sys_statfs
+	data8 sys_fstatfs
+	data8 sys_gettid			// 1105
+	data8 sys_semget
+	data8 sys_semop
+	data8 sys_semctl
+	data8 sys_msgget
+	data8 sys_msgsnd			// 1110
+	data8 sys_msgrcv
+	data8 sys_msgctl
+	data8 sys_shmget
+	data8 sys_shmat
+	data8 sys_shmdt				// 1115
+	data8 sys_shmctl
+	data8 sys_syslog
+	data8 sys_setitimer
+	data8 sys_getitimer
+	data8 sys_ni_syscall			// 1120		/* was: ia64_oldstat */
+	data8 sys_ni_syscall					/* was: ia64_oldlstat */
+	data8 sys_ni_syscall					/* was: ia64_oldfstat */
+	data8 sys_vhangup
+	data8 sys_lchown
+	data8 sys_remap_file_pages		// 1125
+	data8 sys_wait4
+	data8 sys_sysinfo
+	data8 sys_clone
+	data8 sys_setdomainname
+	data8 sys_newuname			// 1130
+	data8 sys_adjtimex
+	data8 sys_ni_syscall					/* was: ia64_create_module */
+	data8 sys_init_module
+	data8 sys_delete_module
+	data8 sys_ni_syscall			// 1135		/* was: sys_get_kernel_syms */
+	data8 sys_ni_syscall					/* was: sys_query_module */
+	data8 sys_quotactl
+	data8 sys_bdflush
+	data8 sys_sysfs
+	data8 sys_personality			// 1140
+	data8 sys_ni_syscall		// sys_afs_syscall
+	data8 sys_setfsuid
+	data8 sys_setfsgid
+	data8 sys_getdents
+	data8 sys_flock				// 1145
+	data8 sys_readv
+	data8 sys_writev
+	data8 sys_pread64
+	data8 sys_pwrite64
+	data8 sys_sysctl			// 1150
+	data8 sys_mmap
+	data8 sys_munmap
+	data8 sys_mlock
+	data8 sys_mlockall
+	data8 sys_mprotect			// 1155
+	data8 ia64_mremap
+	data8 sys_msync
+	data8 sys_munlock
+	data8 sys_munlockall
+	data8 sys_sched_getparam		// 1160
+	data8 sys_sched_setparam
+	data8 sys_sched_getscheduler
+	data8 sys_sched_setscheduler
+	data8 sys_sched_yield
+	data8 sys_sched_get_priority_max	// 1165
+	data8 sys_sched_get_priority_min
+	data8 sys_sched_rr_get_interval
+	data8 sys_nanosleep
+	data8 sys_nfsservctl
+	data8 sys_prctl				// 1170
+	data8 sys_getpagesize
+	data8 sys_mmap2
+	data8 sys_pciconfig_read
+	data8 sys_pciconfig_write
+	data8 sys_perfmonctl			// 1175
+	data8 sys_sigaltstack
+	data8 sys_rt_sigaction
+	data8 sys_rt_sigpending
+	data8 sys_rt_sigprocmask
+	data8 sys_rt_sigqueueinfo		// 1180
+	data8 sys_rt_sigreturn
+	data8 sys_rt_sigsuspend
+	data8 sys_rt_sigtimedwait
+	data8 sys_getcwd
+	data8 sys_capget			// 1185
+	data8 sys_capset
+	data8 sys_sendfile64
+	data8 sys_ni_syscall		// sys_getpmsg (STREAMS)
+	data8 sys_ni_syscall		// sys_putpmsg (STREAMS)
+	data8 sys_socket			// 1190
+	data8 sys_bind
+	data8 sys_connect
+	data8 sys_listen
+	data8 sys_accept
+	data8 sys_getsockname			// 1195
+	data8 sys_getpeername
+	data8 sys_socketpair
+	data8 sys_send
+	data8 sys_sendto
+	data8 sys_recv				// 1200
+	data8 sys_recvfrom
+	data8 sys_shutdown
+	data8 sys_setsockopt
+	data8 sys_getsockopt
+	data8 sys_sendmsg			// 1205
+	data8 sys_recvmsg
+	data8 sys_pivot_root
+	data8 sys_mincore
+	data8 sys_madvise
+	data8 sys_newstat			// 1210
+	data8 sys_newlstat
+	data8 sys_newfstat
+	data8 sys_clone2
+	data8 sys_getdents64
+	data8 sys_getunwind			// 1215
+	data8 sys_readahead
+	data8 sys_setxattr
+	data8 sys_lsetxattr
+	data8 sys_fsetxattr
+	data8 sys_getxattr			// 1220
+	data8 sys_lgetxattr
+	data8 sys_fgetxattr
+	data8 sys_listxattr
+	data8 sys_llistxattr
+	data8 sys_flistxattr			// 1225
+	data8 sys_removexattr
+	data8 sys_lremovexattr
+	data8 sys_fremovexattr
+	data8 sys_tkill
+	data8 sys_futex				// 1230
+	data8 sys_sched_setaffinity
+	data8 sys_sched_getaffinity
+	data8 sys_set_tid_address
+	data8 sys_fadvise64_64
+	data8 sys_tgkill 			// 1235
+	data8 sys_exit_group
+	data8 sys_lookup_dcookie
+	data8 sys_io_setup
+	data8 sys_io_destroy
+	data8 sys_io_getevents			// 1240
+	data8 sys_io_submit
+	data8 sys_io_cancel
+	data8 sys_epoll_create
+	data8 sys_epoll_ctl
+	data8 sys_epoll_wait			// 1245
+	data8 sys_restart_syscall
+	data8 sys_semtimedop
+	data8 sys_timer_create
+	data8 sys_timer_settime
+	data8 sys_timer_gettime			// 1250
+	data8 sys_timer_getoverrun
+	data8 sys_timer_delete
+	data8 sys_clock_settime
+	data8 sys_clock_gettime
+	data8 sys_clock_getres			// 1255
+	data8 sys_clock_nanosleep
+	data8 sys_fstatfs64
+	data8 sys_statfs64
+	data8 sys_mbind
+	data8 sys_get_mempolicy			// 1260
+	data8 sys_set_mempolicy
+	data8 sys_mq_open
+	data8 sys_mq_unlink
+	data8 sys_mq_timedsend
+	data8 sys_mq_timedreceive		// 1265
+	data8 sys_mq_notify
+	data8 sys_mq_getsetattr
+	data8 sys_kexec_load
+	data8 sys_ni_syscall			// reserved for vserver
+	data8 sys_waitid			// 1270
+	data8 sys_add_key
+	data8 sys_request_key
+	data8 sys_keyctl
+	data8 sys_ioprio_set
+	data8 sys_ioprio_get			// 1275
+	data8 sys_move_pages
+	data8 sys_inotify_init
+	data8 sys_inotify_add_watch
+	data8 sys_inotify_rm_watch
+	data8 sys_migrate_pages			// 1280
+	data8 sys_openat
+	data8 sys_mkdirat
+	data8 sys_mknodat
+	data8 sys_fchownat
+	data8 sys_futimesat			// 1285
+	data8 sys_newfstatat
+	data8 sys_unlinkat
+	data8 sys_renameat
+	data8 sys_linkat
+	data8 sys_symlinkat			// 1290
+	data8 sys_readlinkat
+	data8 sys_fchmodat
+	data8 sys_faccessat
+	data8 sys_pselect6
+	data8 sys_ppoll				// 1295
+	data8 sys_unshare
+	data8 sys_splice
+	data8 sys_set_robust_list
+	data8 sys_get_robust_list
+	data8 sys_sync_file_range		// 1300
+	data8 sys_tee
+	data8 sys_vmsplice
+	data8 sys_fallocate
+	data8 sys_getcpu
+	data8 sys_epoll_pwait			// 1305
+	data8 sys_utimensat
+	data8 sys_signalfd
+	data8 sys_ni_syscall
+	data8 sys_eventfd
+	data8 sys_timerfd_create		// 1310
+	data8 sys_timerfd_settime
+	data8 sys_timerfd_gettime
+	data8 sys_signalfd4
+	data8 sys_eventfd2
+	data8 sys_epoll_create1			// 1315
+	data8 sys_dup3
+	data8 sys_pipe2
+	data8 sys_inotify_init1
+	data8 sys_preadv
+	data8 sys_pwritev			// 1320
+	data8 sys_rt_tgsigqueueinfo
+	data8 sys_recvmmsg
+	data8 sys_fanotify_init
+	data8 sys_fanotify_mark
+	data8 sys_prlimit64			// 1325
+	data8 sys_name_to_handle_at
+	data8 sys_open_by_handle_at
+	data8 sys_clock_adjtime
+	data8 sys_syncfs
+	data8 sys_setns				// 1330
+	data8 sys_sendmmsg
+	data8 sys_ni_syscall	/* process_vm_readv */
+	data8 sys_ni_syscall	/* process_vm_writev */
+	data8 sys_accept4
+
+	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
+#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
diff --git a/arch/ia64/kernel/entry.h b/arch/ia64/kernel/entry.h
new file mode 100644
index 00000000..b83edac0
--- /dev/null
+++ b/arch/ia64/kernel/entry.h
@@ -0,0 +1,82 @@
+
+/*
+ * Preserved registers that are shared between code in ivt.S and
+ * entry.S.  Be careful not to step on these!
+ */
+#define PRED_LEAVE_SYSCALL	1 /* TRUE iff leave from syscall */
+#define PRED_KERNEL_STACK	2 /* returning to kernel-stacks? */
+#define PRED_USER_STACK		3 /* returning to user-stacks? */
+#define PRED_SYSCALL		4 /* inside a system call? */
+#define PRED_NON_SYSCALL	5 /* complement of PRED_SYSCALL */
+
+#ifdef __ASSEMBLY__
+# define PASTE2(x,y)	x##y
+# define PASTE(x,y)	PASTE2(x,y)
+
+# define pLvSys		PASTE(p,PRED_LEAVE_SYSCALL)
+# define pKStk		PASTE(p,PRED_KERNEL_STACK)
+# define pUStk		PASTE(p,PRED_USER_STACK)
+# define pSys		PASTE(p,PRED_SYSCALL)
+# define pNonSys	PASTE(p,PRED_NON_SYSCALL)
+#endif
+
+#define PT(f)		(IA64_PT_REGS_##f##_OFFSET)
+#define SW(f)		(IA64_SWITCH_STACK_##f##_OFFSET)
+#define SOS(f)		(IA64_SAL_OS_STATE_##f##_OFFSET)
+
+#define PT_REGS_SAVES(off)			\
+	.unwabi 3, 'i';				\
+	.fframe IA64_PT_REGS_SIZE+16+(off);	\
+	.spillsp rp, PT(CR_IIP)+16+(off);	\
+	.spillsp ar.pfs, PT(CR_IFS)+16+(off);	\
+	.spillsp ar.unat, PT(AR_UNAT)+16+(off);	\
+	.spillsp ar.fpsr, PT(AR_FPSR)+16+(off);	\
+	.spillsp pr, PT(PR)+16+(off);
+
+#define PT_REGS_UNWIND_INFO(off)		\
+	.prologue;				\
+	PT_REGS_SAVES(off);			\
+	.body
+
+#define SWITCH_STACK_SAVES(off)							\
+	.savesp ar.unat,SW(CALLER_UNAT)+16+(off);				\
+	.savesp ar.fpsr,SW(AR_FPSR)+16+(off);					\
+	.spillsp f2,SW(F2)+16+(off); .spillsp f3,SW(F3)+16+(off);		\
+	.spillsp f4,SW(F4)+16+(off); .spillsp f5,SW(F5)+16+(off);		\
+	.spillsp f16,SW(F16)+16+(off); .spillsp f17,SW(F17)+16+(off);		\
+	.spillsp f18,SW(F18)+16+(off); .spillsp f19,SW(F19)+16+(off);		\
+	.spillsp f20,SW(F20)+16+(off); .spillsp f21,SW(F21)+16+(off);		\
+	.spillsp f22,SW(F22)+16+(off); .spillsp f23,SW(F23)+16+(off);		\
+	.spillsp f24,SW(F24)+16+(off); .spillsp f25,SW(F25)+16+(off);		\
+	.spillsp f26,SW(F26)+16+(off); .spillsp f27,SW(F27)+16+(off);		\
+	.spillsp f28,SW(F28)+16+(off); .spillsp f29,SW(F29)+16+(off);		\
+	.spillsp f30,SW(F30)+16+(off); .spillsp f31,SW(F31)+16+(off);		\
+	.spillsp r4,SW(R4)+16+(off); .spillsp r5,SW(R5)+16+(off);		\
+	.spillsp r6,SW(R6)+16+(off); .spillsp r7,SW(R7)+16+(off);		\
+	.spillsp b0,SW(B0)+16+(off); .spillsp b1,SW(B1)+16+(off);		\
+	.spillsp b2,SW(B2)+16+(off); .spillsp b3,SW(B3)+16+(off);		\
+	.spillsp b4,SW(B4)+16+(off); .spillsp b5,SW(B5)+16+(off);		\
+	.spillsp ar.pfs,SW(AR_PFS)+16+(off); .spillsp ar.lc,SW(AR_LC)+16+(off);	\
+	.spillsp @priunat,SW(AR_UNAT)+16+(off);					\
+	.spillsp ar.rnat,SW(AR_RNAT)+16+(off);					\
+	.spillsp ar.bspstore,SW(AR_BSPSTORE)+16+(off);				\
+	.spillsp pr,SW(PR)+16+(off)
+
+#define DO_SAVE_SWITCH_STACK			\
+	movl r28=1f;				\
+	;;					\
+	.fframe IA64_SWITCH_STACK_SIZE;		\
+	adds sp=-IA64_SWITCH_STACK_SIZE,sp;	\
+	mov.ret.sptk b7=r28,1f;			\
+	SWITCH_STACK_SAVES(0);			\
+	br.cond.sptk.many save_switch_stack;	\
+1:
+
+#define DO_LOAD_SWITCH_STACK			\
+	movl r28=1f;				\
+	;;					\
+	invala;					\
+	mov.ret.sptk b7=r28,1f;			\
+	br.cond.sptk.many load_switch_stack;	\
+1:	.restore sp;				\
+	adds sp=IA64_SWITCH_STACK_SIZE,sp
diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c
new file mode 100644
index 00000000..c539c689
--- /dev/null
+++ b/arch/ia64/kernel/err_inject.c
@@ -0,0 +1,303 @@
+/*
+ * err_inject.c -
+ *	1.) Inject errors to a processor.
+ *	2.) Query error injection capabilities.
+ * This driver along with user space code can be acting as an error
+ * injection tool.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Written by: Fenghua Yu <fenghua.yu@intel.com>, Intel Corporation
+ * Copyright (C) 2006, Intel Corp.  All rights reserved.
+ *
+ */
+#include <linux/sysdev.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+
+#define ERR_INJ_DEBUG
+
+#define ERR_DATA_BUFFER_SIZE 3 		// Three 8-byte;
+
+#define define_one_ro(name) 						\
+static SYSDEV_ATTR(name, 0444, show_##name, NULL)
+
+#define define_one_rw(name) 						\
+static SYSDEV_ATTR(name, 0644, show_##name, store_##name)
+
+static u64 call_start[NR_CPUS];
+static u64 phys_addr[NR_CPUS];
+static u64 err_type_info[NR_CPUS];
+static u64 err_struct_info[NR_CPUS];
+static struct {
+	u64 data1;
+	u64 data2;
+	u64 data3;
+} __attribute__((__aligned__(16))) err_data_buffer[NR_CPUS];
+static s64 status[NR_CPUS];
+static u64 capabilities[NR_CPUS];
+static u64 resources[NR_CPUS];
+
+#define show(name) 							\
+static ssize_t 								\
+show_##name(struct sys_device *dev, struct sysdev_attribute *attr,	\
+		char *buf)						\
+{									\
+	u32 cpu=dev->id;						\
+	return sprintf(buf, "%lx\n", name[cpu]);			\
+}
+
+#define store(name)							\
+static ssize_t 								\
+store_##name(struct sys_device *dev, struct sysdev_attribute *attr,	\
+					const char *buf, size_t size)	\
+{									\
+	unsigned int cpu=dev->id;					\
+	name[cpu] = simple_strtoull(buf, NULL, 16);			\
+	return size;							\
+}
+
+show(call_start)
+
+/* It's user's responsibility to call the PAL procedure on a specific
+ * processor. The cpu number in driver is only used for storing data.
+ */
+static ssize_t
+store_call_start(struct sys_device *dev, struct sysdev_attribute *attr,
+		const char *buf, size_t size)
+{
+	unsigned int cpu=dev->id;
+	unsigned long call_start = simple_strtoull(buf, NULL, 16);
+
+#ifdef ERR_INJ_DEBUG
+	printk(KERN_DEBUG "pal_mc_err_inject for cpu%d:\n", cpu);
+	printk(KERN_DEBUG "err_type_info=%lx,\n", err_type_info[cpu]);
+	printk(KERN_DEBUG "err_struct_info=%lx,\n", err_struct_info[cpu]);
+	printk(KERN_DEBUG "err_data_buffer=%lx, %lx, %lx.\n",
+			  err_data_buffer[cpu].data1,
+			  err_data_buffer[cpu].data2,
+			  err_data_buffer[cpu].data3);
+#endif
+	switch (call_start) {
+	    case 0: /* Do nothing. */
+		break;
+	    case 1: /* Call pal_mc_error_inject in physical mode. */
+		status[cpu]=ia64_pal_mc_error_inject_phys(err_type_info[cpu],
+					err_struct_info[cpu],
+					ia64_tpa(&err_data_buffer[cpu]),
+					&capabilities[cpu],
+			 		&resources[cpu]);
+		break;
+	    case 2: /* Call pal_mc_error_inject in virtual mode. */
+		status[cpu]=ia64_pal_mc_error_inject_virt(err_type_info[cpu],
+					err_struct_info[cpu],
+					ia64_tpa(&err_data_buffer[cpu]),
+					&capabilities[cpu],
+			 		&resources[cpu]);
+		break;
+	    default:
+		status[cpu] = -EINVAL;
+		break;
+	}
+
+#ifdef ERR_INJ_DEBUG
+	printk(KERN_DEBUG "Returns: status=%d,\n", (int)status[cpu]);
+	printk(KERN_DEBUG "capapbilities=%lx,\n", capabilities[cpu]);
+	printk(KERN_DEBUG "resources=%lx\n", resources[cpu]);
+#endif
+	return size;
+}
+
+show(err_type_info)
+store(err_type_info)
+
+static ssize_t
+show_virtual_to_phys(struct sys_device *dev, struct sysdev_attribute *attr,
+			char *buf)
+{
+	unsigned int cpu=dev->id;
+	return sprintf(buf, "%lx\n", phys_addr[cpu]);
+}
+
+static ssize_t
+store_virtual_to_phys(struct sys_device *dev, struct sysdev_attribute *attr,
+			const char *buf, size_t size)
+{
+	unsigned int cpu=dev->id;
+	u64 virt_addr=simple_strtoull(buf, NULL, 16);
+	int ret;
+
+        ret = get_user_pages(current, current->mm, virt_addr,
+                        1, VM_READ, 0, NULL, NULL);
+	if (ret<=0) {
+#ifdef ERR_INJ_DEBUG
+		printk("Virtual address %lx is not existing.\n",virt_addr);
+#endif
+		return -EINVAL;
+	}
+
+	phys_addr[cpu] = ia64_tpa(virt_addr);
+	return size;
+}
+
+show(err_struct_info)
+store(err_struct_info)
+
+static ssize_t
+show_err_data_buffer(struct sys_device *dev,
+			struct sysdev_attribute *attr, char *buf)
+{
+	unsigned int cpu=dev->id;
+
+	return sprintf(buf, "%lx, %lx, %lx\n",
+			err_data_buffer[cpu].data1,
+			err_data_buffer[cpu].data2,
+			err_data_buffer[cpu].data3);
+}
+
+static ssize_t
+store_err_data_buffer(struct sys_device *dev,
+			struct sysdev_attribute *attr,
+			const char *buf, size_t size)
+{
+	unsigned int cpu=dev->id;
+	int ret;
+
+#ifdef ERR_INJ_DEBUG
+	printk("write err_data_buffer=[%lx,%lx,%lx] on cpu%d\n",
+		 err_data_buffer[cpu].data1,
+		 err_data_buffer[cpu].data2,
+		 err_data_buffer[cpu].data3,
+		 cpu);
+#endif
+	ret=sscanf(buf, "%lx, %lx, %lx",
+			&err_data_buffer[cpu].data1,
+			&err_data_buffer[cpu].data2,
+			&err_data_buffer[cpu].data3);
+	if (ret!=ERR_DATA_BUFFER_SIZE)
+		return -EINVAL;
+
+	return size;
+}
+
+show(status)
+show(capabilities)
+show(resources)
+
+define_one_rw(call_start);
+define_one_rw(err_type_info);
+define_one_rw(err_struct_info);
+define_one_rw(err_data_buffer);
+define_one_rw(virtual_to_phys);
+define_one_ro(status);
+define_one_ro(capabilities);
+define_one_ro(resources);
+
+static struct attribute *default_attrs[] = {
+	&attr_call_start.attr,
+	&attr_virtual_to_phys.attr,
+	&attr_err_type_info.attr,
+	&attr_err_struct_info.attr,
+	&attr_err_data_buffer.attr,
+	&attr_status.attr,
+	&attr_capabilities.attr,
+	&attr_resources.attr,
+	NULL
+};
+
+static struct attribute_group err_inject_attr_group = {
+	.attrs = default_attrs,
+	.name = "err_inject"
+};
+/* Add/Remove err_inject interface for CPU device */
+static int __cpuinit err_inject_add_dev(struct sys_device * sys_dev)
+{
+	return sysfs_create_group(&sys_dev->kobj, &err_inject_attr_group);
+}
+
+static int __cpuinit err_inject_remove_dev(struct sys_device * sys_dev)
+{
+	sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group);
+	return 0;
+}
+static int __cpuinit err_inject_cpu_callback(struct notifier_block *nfb,
+		unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct sys_device *sys_dev;
+
+	sys_dev = get_cpu_sysdev(cpu);
+	switch (action) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		err_inject_add_dev(sys_dev);
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		err_inject_remove_dev(sys_dev);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata err_inject_cpu_notifier =
+{
+	.notifier_call = err_inject_cpu_callback,
+};
+
+static int __init
+err_inject_init(void)
+{
+	int i;
+
+#ifdef ERR_INJ_DEBUG
+	printk(KERN_INFO "Enter error injection driver.\n");
+#endif
+	for_each_online_cpu(i) {
+		err_inject_cpu_callback(&err_inject_cpu_notifier, CPU_ONLINE,
+				(void *)(long)i);
+	}
+
+	register_hotcpu_notifier(&err_inject_cpu_notifier);
+
+	return 0;
+}
+
+static void __exit
+err_inject_exit(void)
+{
+	int i;
+	struct sys_device *sys_dev;
+
+#ifdef ERR_INJ_DEBUG
+	printk(KERN_INFO "Exit error injection driver.\n");
+#endif
+	for_each_online_cpu(i) {
+		sys_dev = get_cpu_sysdev(i);
+		sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group);
+	}
+	unregister_hotcpu_notifier(&err_inject_cpu_notifier);
+}
+
+module_init(err_inject_init);
+module_exit(err_inject_exit);
+
+MODULE_AUTHOR("Fenghua Yu <fenghua.yu@intel.com>");
+MODULE_DESCRIPTION("MC error injection kernel sysfs interface");
+MODULE_LICENSE("GPL");
diff --git a/arch/ia64/kernel/esi.c b/arch/ia64/kernel/esi.c
new file mode 100644
index 00000000..b0911112
--- /dev/null
+++ b/arch/ia64/kernel/esi.c
@@ -0,0 +1,205 @@
+/*
+ * Extensible SAL Interface (ESI) support routines.
+ *
+ * Copyright (C) 2006 Hewlett-Packard Co
+ * 	Alex Williamson <alex.williamson@hp.com>
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <asm/esi.h>
+#include <asm/sal.h>
+
+MODULE_AUTHOR("Alex Williamson <alex.williamson@hp.com>");
+MODULE_DESCRIPTION("Extensible SAL Interface (ESI) support");
+MODULE_LICENSE("GPL");
+
+#define MODULE_NAME	"esi"
+
+#define ESI_TABLE_GUID					\
+    EFI_GUID(0x43EA58DC, 0xCF28, 0x4b06, 0xB3,		\
+	     0x91, 0xB7, 0x50, 0x59, 0x34, 0x2B, 0xD4)
+
+enum esi_systab_entry_type {
+	ESI_DESC_ENTRY_POINT = 0
+};
+
+/*
+ * Entry type:	Size:
+ *	0	48
+ */
+#define ESI_DESC_SIZE(type)	"\060"[(unsigned) (type)]
+
+typedef struct ia64_esi_desc_entry_point {
+	u8 type;
+	u8 reserved1[15];
+	u64 esi_proc;
+	u64 gp;
+	efi_guid_t guid;
+} ia64_esi_desc_entry_point_t;
+
+struct pdesc {
+	void *addr;
+	void *gp;
+};
+
+static struct ia64_sal_systab *esi_systab;
+
+static int __init esi_init (void)
+{
+	efi_config_table_t *config_tables;
+	struct ia64_sal_systab *systab;
+	unsigned long esi = 0;
+	char *p;
+	int i;
+
+	config_tables = __va(efi.systab->tables);
+
+	for (i = 0; i < (int) efi.systab->nr_tables; ++i) {
+		if (efi_guidcmp(config_tables[i].guid, ESI_TABLE_GUID) == 0) {
+			esi = config_tables[i].table;
+			break;
+		}
+	}
+
+	if (!esi)
+		return -ENODEV;
+
+	systab = __va(esi);
+
+	if (strncmp(systab->signature, "ESIT", 4) != 0) {
+		printk(KERN_ERR "bad signature in ESI system table!");
+		return -ENODEV;
+	}
+
+	p = (char *) (systab + 1);
+	for (i = 0; i < systab->entry_count; i++) {
+		/*
+		 * The first byte of each entry type contains the type
+		 * descriptor.
+		 */
+		switch (*p) {
+		      case ESI_DESC_ENTRY_POINT:
+			break;
+		      default:
+			printk(KERN_WARNING "Unknown table type %d found in "
+			       "ESI table, ignoring rest of table\n", *p);
+			return -ENODEV;
+		}
+
+		p += ESI_DESC_SIZE(*p);
+	}
+
+	esi_systab = systab;
+	return 0;
+}
+
+
+int ia64_esi_call (efi_guid_t guid, struct ia64_sal_retval *isrvp,
+		   enum esi_proc_type proc_type, u64 func,
+		   u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6,
+		   u64 arg7)
+{
+	struct ia64_fpreg fr[6];
+	unsigned long flags = 0;
+	int i;
+	char *p;
+
+	if (!esi_systab)
+		return -1;
+
+	p = (char *) (esi_systab + 1);
+	for (i = 0; i < esi_systab->entry_count; i++) {
+		if (*p == ESI_DESC_ENTRY_POINT) {
+			ia64_esi_desc_entry_point_t *esi = (void *)p;
+			if (!efi_guidcmp(guid, esi->guid)) {
+				ia64_sal_handler esi_proc;
+				struct pdesc pdesc;
+
+				pdesc.addr = __va(esi->esi_proc);
+				pdesc.gp = __va(esi->gp);
+
+				esi_proc = (ia64_sal_handler) &pdesc;
+
+				ia64_save_scratch_fpregs(fr);
+				if (proc_type == ESI_PROC_SERIALIZED)
+					spin_lock_irqsave(&sal_lock, flags);
+				else if (proc_type == ESI_PROC_MP_SAFE)
+					local_irq_save(flags);
+				else
+					preempt_disable();
+				*isrvp = (*esi_proc)(func, arg1, arg2, arg3,
+						     arg4, arg5, arg6, arg7);
+				if (proc_type == ESI_PROC_SERIALIZED)
+					spin_unlock_irqrestore(&sal_lock,
+							       flags);
+				else if (proc_type == ESI_PROC_MP_SAFE)
+					local_irq_restore(flags);
+				else
+					preempt_enable();
+				ia64_load_scratch_fpregs(fr);
+				return 0;
+			}
+		}
+		p += ESI_DESC_SIZE(*p);
+	}
+	return -1;
+}
+EXPORT_SYMBOL_GPL(ia64_esi_call);
+
+int ia64_esi_call_phys (efi_guid_t guid, struct ia64_sal_retval *isrvp,
+			u64 func, u64 arg1, u64 arg2, u64 arg3, u64 arg4,
+			u64 arg5, u64 arg6, u64 arg7)
+{
+	struct ia64_fpreg fr[6];
+	unsigned long flags;
+	u64 esi_params[8];
+	char *p;
+	int i;
+
+	if (!esi_systab)
+		return -1;
+
+	p = (char *) (esi_systab + 1);
+	for (i = 0; i < esi_systab->entry_count; i++) {
+		if (*p == ESI_DESC_ENTRY_POINT) {
+			ia64_esi_desc_entry_point_t *esi = (void *)p;
+			if (!efi_guidcmp(guid, esi->guid)) {
+				ia64_sal_handler esi_proc;
+				struct pdesc pdesc;
+
+				pdesc.addr = (void *)esi->esi_proc;
+				pdesc.gp = (void *)esi->gp;
+
+				esi_proc = (ia64_sal_handler) &pdesc;
+
+				esi_params[0] = func;
+				esi_params[1] = arg1;
+				esi_params[2] = arg2;
+				esi_params[3] = arg3;
+				esi_params[4] = arg4;
+				esi_params[5] = arg5;
+				esi_params[6] = arg6;
+				esi_params[7] = arg7;
+				ia64_save_scratch_fpregs(fr);
+				spin_lock_irqsave(&sal_lock, flags);
+				*isrvp = esi_call_phys(esi_proc, esi_params);
+				spin_unlock_irqrestore(&sal_lock, flags);
+				ia64_load_scratch_fpregs(fr);
+				return 0;
+			}
+		}
+		p += ESI_DESC_SIZE(*p);
+	}
+	return -1;
+}
+EXPORT_SYMBOL_GPL(ia64_esi_call_phys);
+
+static void __exit esi_exit (void)
+{
+}
+
+module_init(esi_init);
+module_exit(esi_exit);	/* makes module removable... */
diff --git a/arch/ia64/kernel/esi_stub.S b/arch/ia64/kernel/esi_stub.S
new file mode 100644
index 00000000..6b3d6c1f
--- /dev/null
+++ b/arch/ia64/kernel/esi_stub.S
@@ -0,0 +1,96 @@
+/*
+ * ESI call stub.
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *	Alex Williamson <alex.williamson@hp.com>
+ *
+ * Based on EFI call stub by David Mosberger.  The stub is virtually
+ * identical to the one for EFI phys-mode calls, except that ESI
+ * calls may have up to 8 arguments, so they get passed to this routine
+ * through memory.
+ *
+ * This stub allows us to make ESI calls in physical mode with interrupts
+ * turned off.  ESI calls may not support calling from virtual mode.
+ *
+ * Google for "Extensible SAL specification" for a document describing the
+ * ESI standard.
+ */
+
+/*
+ * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System
+ * Abstraction Layer Specification", revision 2.6e).  Note that
+ * psr.dfl and psr.dfh MUST be cleared, despite what this manual says.
+ * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call
+ * (the br.ia instruction fails unless psr.dfl and psr.dfh are
+ * cleared).  Fortunately, SAL promises not to touch the floating
+ * point regs, so at least we don't have to save f2-f127.
+ */
+#define PSR_BITS_TO_CLEAR						\
+	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT |		\
+	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |	\
+	 IA64_PSR_DFL | IA64_PSR_DFH)
+
+#define PSR_BITS_TO_SET							\
+	(IA64_PSR_BN)
+
+#include <asm/processor.h>
+#include <asm/asmmacro.h>
+
+/*
+ * Inputs:
+ *	in0 = address of function descriptor of ESI routine to call
+ *	in1 = address of array of ESI parameters
+ *
+ * Outputs:
+ *	r8 = result returned by called function
+ */
+GLOBAL_ENTRY(esi_call_phys)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
+	alloc loc1=ar.pfs,2,7,8,0
+	ld8 r2=[in0],8			// load ESI function's entry point
+	mov loc0=rp
+	.body
+	;;
+	ld8 out0=[in1],8		// ESI params loaded from array
+	;;				// passing all as inputs doesn't work
+	ld8 out1=[in1],8
+	;;
+	ld8 out2=[in1],8
+	;;
+	ld8 out3=[in1],8
+	;;
+	ld8 out4=[in1],8
+	;;
+	ld8 out5=[in1],8
+	;;
+	ld8 out6=[in1],8
+	;;
+	ld8 out7=[in1]
+	mov loc2=gp			// save global pointer
+	mov loc4=ar.rsc			// save RSE configuration
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	;;
+	ld8 gp=[in0]			// load ESI function's global pointer
+	movl r16=PSR_BITS_TO_CLEAR
+	mov loc3=psr			// save processor status word
+	movl r17=PSR_BITS_TO_SET
+	;;
+	or loc3=loc3,r17
+	mov b6=r2
+	;;
+	andcm r16=loc3,r16	// get psr with IT, DT, and RT bits cleared
+	br.call.sptk.many rp=ia64_switch_mode_phys
+.ret0:	mov loc5=r19			// old ar.bsp
+	mov loc6=r20			// old sp
+	br.call.sptk.many rp=b6		// call the ESI function
+.ret1:	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	mov r16=loc3			// save virtual mode psr
+	mov r19=loc5			// save virtual mode bspstore
+	mov r20=loc6			// save virtual mode sp
+	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
+.ret2:	mov ar.rsc=loc4			// restore RSE configuration
+	mov ar.pfs=loc1
+	mov rp=loc0
+	mov gp=loc2
+	br.ret.sptk.many rp
+END(esi_call_phys)
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
new file mode 100644
index 00000000..331d42bd
--- /dev/null
+++ b/arch/ia64/kernel/fsys.S
@@ -0,0 +1,1049 @@
+/*
+ * This file contains the light-weight system call handlers (fsyscall-handlers).
+ *
+ * Copyright (C) 2003 Hewlett-Packard Co
+ * 	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 25-Sep-03 davidm	Implement fsys_rt_sigprocmask().
+ * 18-Feb-03 louisk	Implement fsys_gettimeofday().
+ * 28-Feb-03 davidm	Fixed several bugs in fsys_gettimeofday().  Tuned it some more,
+ *			probably broke it along the way... ;-)
+ * 13-Jul-04 clameter   Implement fsys_clock_gettime and revise fsys_gettimeofday to make
+ *                      it capable of using memory based clocks without falling back to C code.
+ * 08-Feb-07 Fenghua Yu Implement fsys_getcpu.
+ *
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/errno.h>
+#include <asm/asm-offsets.h>
+#include <asm/percpu.h>
+#include <asm/thread_info.h>
+#include <asm/sal.h>
+#include <asm/signal.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+
+#include "entry.h"
+#include "paravirt_inst.h"
+
+/*
+ * See Documentation/ia64/fsys.txt for details on fsyscalls.
+ *
+ * On entry to an fsyscall handler:
+ *   r10	= 0 (i.e., defaults to "successful syscall return")
+ *   r11	= saved ar.pfs (a user-level value)
+ *   r15	= system call number
+ *   r16	= "current" task pointer (in normal kernel-mode, this is in r13)
+ *   r32-r39	= system call arguments
+ *   b6		= return address (a user-level value)
+ *   ar.pfs	= previous frame-state (a user-level value)
+ *   PSR.be	= cleared to zero (i.e., little-endian byte order is in effect)
+ *   all other registers may contain values passed in from user-mode
+ *
+ * On return from an fsyscall handler:
+ *   r11	= saved ar.pfs (as passed into the fsyscall handler)
+ *   r15	= system call number (as passed into the fsyscall handler)
+ *   r32-r39	= system call arguments (as passed into the fsyscall handler)
+ *   b6		= return address (as passed into the fsyscall handler)
+ *   ar.pfs	= previous frame-state (as passed into the fsyscall handler)
+ */
+
+ENTRY(fsys_ni_syscall)
+	.prologue
+	.altrp b6
+	.body
+	mov r8=ENOSYS
+	mov r10=-1
+	FSYS_RETURN
+END(fsys_ni_syscall)
+
+ENTRY(fsys_getpid)
+	.prologue
+	.altrp b6
+	.body
+	add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
+	;;
+	ld8 r17=[r17]				// r17 = current->group_leader
+	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	;;
+	ld4 r9=[r9]
+	add r17=IA64_TASK_TGIDLINK_OFFSET,r17
+	;;
+	and r9=TIF_ALLWORK_MASK,r9
+	ld8 r17=[r17]				// r17 = current->group_leader->pids[PIDTYPE_PID].pid
+	;;
+	add r8=IA64_PID_LEVEL_OFFSET,r17
+	;;
+	ld4 r8=[r8]				// r8 = pid->level
+	add r17=IA64_PID_UPID_OFFSET,r17	// r17 = &pid->numbers[0]
+	;;
+	shl r8=r8,IA64_UPID_SHIFT
+	;;
+	add r17=r17,r8				// r17 = &pid->numbers[pid->level]
+	;;
+	ld4 r8=[r17]				// r8 = pid->numbers[pid->level].nr
+	;;
+	mov r17=0
+	;;
+	cmp.ne p8,p0=0,r9
+(p8)	br.spnt.many fsys_fallback_syscall
+	FSYS_RETURN
+END(fsys_getpid)
+
+ENTRY(fsys_getppid)
+	.prologue
+	.altrp b6
+	.body
+	add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
+	;;
+	ld8 r17=[r17]				// r17 = current->group_leader
+	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	;;
+
+	ld4 r9=[r9]
+	add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = &current->group_leader->real_parent
+	;;
+	and r9=TIF_ALLWORK_MASK,r9
+
+1:	ld8 r18=[r17]				// r18 = current->group_leader->real_parent
+	;;
+	cmp.ne p8,p0=0,r9
+	add r8=IA64_TASK_TGID_OFFSET,r18	// r8 = &current->group_leader->real_parent->tgid
+	;;
+
+	/*
+	 * The .acq is needed to ensure that the read of tgid has returned its data before
+	 * we re-check "real_parent".
+	 */
+	ld4.acq r8=[r8]				// r8 = current->group_leader->real_parent->tgid
+#ifdef CONFIG_SMP
+	/*
+	 * Re-read current->group_leader->real_parent.
+	 */
+	ld8 r19=[r17]				// r19 = current->group_leader->real_parent
+(p8)	br.spnt.many fsys_fallback_syscall
+	;;
+	cmp.ne p6,p0=r18,r19			// did real_parent change?
+	mov r19=0			// i must not leak kernel bits...
+(p6)	br.cond.spnt.few 1b			// yes -> redo the read of tgid and the check
+	;;
+	mov r17=0			// i must not leak kernel bits...
+	mov r18=0			// i must not leak kernel bits...
+#else
+	mov r17=0			// i must not leak kernel bits...
+	mov r18=0			// i must not leak kernel bits...
+	mov r19=0			// i must not leak kernel bits...
+#endif
+	FSYS_RETURN
+END(fsys_getppid)
+
+ENTRY(fsys_set_tid_address)
+	.prologue
+	.altrp b6
+	.body
+	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	add r17=IA64_TASK_TGIDLINK_OFFSET,r16
+	;;
+	ld4 r9=[r9]
+	tnat.z p6,p7=r32		// check argument register for being NaT
+	ld8 r17=[r17]				// r17 = current->pids[PIDTYPE_PID].pid
+	;;
+	and r9=TIF_ALLWORK_MASK,r9
+	add r8=IA64_PID_LEVEL_OFFSET,r17
+	add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
+	;;
+	ld4 r8=[r8]				// r8 = pid->level
+	add r17=IA64_PID_UPID_OFFSET,r17	// r17 = &pid->numbers[0]
+	;;
+	shl r8=r8,IA64_UPID_SHIFT
+	;;
+	add r17=r17,r8				// r17 = &pid->numbers[pid->level]
+	;;
+	ld4 r8=[r17]				// r8 = pid->numbers[pid->level].nr
+	;;
+	cmp.ne p8,p0=0,r9
+	mov r17=-1
+	;;
+(p6)	st8 [r18]=r32
+(p7)	st8 [r18]=r17
+(p8)	br.spnt.many fsys_fallback_syscall
+	;;
+	mov r17=0			// i must not leak kernel bits...
+	mov r18=0			// i must not leak kernel bits...
+	FSYS_RETURN
+END(fsys_set_tid_address)
+
+#if IA64_GTOD_LOCK_OFFSET !=0
+#error fsys_gettimeofday incompatible with changes to struct fsyscall_gtod_data_t
+#endif
+#if IA64_ITC_JITTER_OFFSET !=0
+#error fsys_gettimeofday incompatible with changes to struct itc_jitter_data_t
+#endif
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_DIVIDE_BY_1000 0x4000
+#define CLOCK_ADD_MONOTONIC 0x8000
+
+ENTRY(fsys_gettimeofday)
+	.prologue
+	.altrp b6
+	.body
+	mov r31 = r32
+	tnat.nz p6,p0 = r33		// guard against NaT argument
+(p6)    br.cond.spnt.few .fail_einval
+	mov r30 = CLOCK_DIVIDE_BY_1000
+	;;
+.gettime:
+	// Register map
+	// Incoming r31 = pointer to address where to place result
+	//          r30 = flags determining how time is processed
+	// r2,r3 = temp r4-r7 preserved
+	// r8 = result nanoseconds
+	// r9 = result seconds
+	// r10 = temporary storage for clock difference
+	// r11 = preserved: saved ar.pfs
+	// r12 = preserved: memory stack
+	// r13 = preserved: thread pointer
+	// r14 = address of mask / mask value
+	// r15 = preserved: system call number
+	// r16 = preserved: current task pointer
+	// r17 = (not used)
+	// r18 = (not used)
+	// r19 = address of itc_lastcycle
+	// r20 = struct fsyscall_gtod_data (= address of gtod_lock.sequence)
+	// r21 = address of mmio_ptr
+	// r22 = address of wall_time or monotonic_time
+	// r23 = address of shift / value
+	// r24 = address mult factor / cycle_last value
+	// r25 = itc_lastcycle value
+	// r26 = address clocksource cycle_last
+	// r27 = (not used)
+	// r28 = sequence number at the beginning of critcal section
+	// r29 = address of itc_jitter
+	// r30 = time processing flags / memory address
+	// r31 = pointer to result
+	// Predicates
+	// p6,p7 short term use
+	// p8 = timesource ar.itc
+	// p9 = timesource mmio64
+	// p10 = timesource mmio32 - not used
+	// p11 = timesource not to be handled by asm code
+	// p12 = memory time source ( = p9 | p10) - not used
+	// p13 = do cmpxchg with itc_lastcycle
+	// p14 = Divide by 1000
+	// p15 = Add monotonic
+	//
+	// Note that instructions are optimized for McKinley. McKinley can
+	// process two bundles simultaneously and therefore we continuously
+	// try to feed the CPU two bundles and then a stop.
+
+	add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
+	tnat.nz p6,p0 = r31		// guard against Nat argument
+(p6)	br.cond.spnt.few .fail_einval
+	movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
+	;;
+	ld4 r2 = [r2]			// process work pending flags
+	movl r29 = itc_jitter_data	// itc_jitter
+	add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20	// wall_time
+	add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
+	mov pr = r30,0xc000	// Set predicates according to function
+	;;
+	and r2 = TIF_ALLWORK_MASK,r2
+	add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
+(p15)	add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20	// monotonic_time
+	;;
+	add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20	// clksrc_cycle_last
+	cmp.ne p6, p0 = 0, r2	// Fallback if work is scheduled
+(p6)	br.cond.spnt.many fsys_fallback_syscall
+	;;
+	// Begin critical section
+.time_redo:
+	ld4.acq r28 = [r20]	// gtod_lock.sequence, Must take first
+	;;
+	and r28 = ~1,r28	// And make sequence even to force retry if odd
+	;;
+	ld8 r30 = [r21]		// clocksource->mmio_ptr
+	add r24 = IA64_CLKSRC_MULT_OFFSET,r20
+	ld4 r2 = [r29]		// itc_jitter value
+	add r23 = IA64_CLKSRC_SHIFT_OFFSET,r20
+	add r14 = IA64_CLKSRC_MASK_OFFSET,r20
+	;;
+	ld4 r3 = [r24]		// clocksource mult value
+	ld8 r14 = [r14]         // clocksource mask value
+	cmp.eq p8,p9 = 0,r30	// use cpu timer if no mmio_ptr
+	;;
+	setf.sig f7 = r3	// Setup for mult scaling of counter
+(p8)	cmp.ne p13,p0 = r2,r0	// need itc_jitter compensation, set p13
+	ld4 r23 = [r23]		// clocksource shift value
+	ld8 r24 = [r26]		// get clksrc_cycle_last value
+(p9)	cmp.eq p13,p0 = 0,r30	// if mmio_ptr, clear p13 jitter control
+	;;
+	.pred.rel.mutex p8,p9
+	MOV_FROM_ITC(p8, p6, r2, r10)	// CPU_TIMER. 36 clocks latency!!!
+(p9)	ld8 r2 = [r30]		// MMIO_TIMER. Could also have latency issues..
+(p13)	ld8 r25 = [r19]		// get itc_lastcycle value
+	ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET	// tv_sec
+	;;
+	ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET	// tv_nsec
+(p13)	sub r3 = r25,r2		// Diff needed before comparison (thanks davidm)
+	;;
+(p13)	cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared
+	sub r10 = r2,r24	// current_cycle - last_cycle
+	;;
+(p6)	sub r10 = r25,r24	// time we got was less than last_cycle
+(p7)	mov ar.ccv = r25	// more than last_cycle. Prep for cmpxchg
+	;;
+(p7)	cmpxchg8.rel r3 = [r19],r2,ar.ccv
+	;;
+(p7)	cmp.ne p7,p0 = r25,r3	// if cmpxchg not successful
+	;;
+(p7)	sub r10 = r3,r24	// then use new last_cycle instead
+	;;
+	and r10 = r10,r14	// Apply mask
+	;;
+	setf.sig f8 = r10
+	nop.i 123
+	;;
+	// fault check takes 5 cycles and we have spare time
+EX(.fail_efault, probe.w.fault r31, 3)
+	xmpy.l f8 = f8,f7	// nsec_per_cyc*(counter-last_counter)
+	;;
+	getf.sig r2 = f8
+	mf
+	;;
+	ld4 r10 = [r20]		// gtod_lock.sequence
+	shr.u r2 = r2,r23	// shift by factor
+	;;
+	add r8 = r8,r2		// Add xtime.nsecs
+	cmp4.ne p7,p0 = r28,r10
+(p7)	br.cond.dpnt.few .time_redo	// sequence number changed, redo
+	// End critical section.
+	// Now r8=tv->tv_nsec and r9=tv->tv_sec
+	mov r10 = r0
+	movl r2 = 1000000000
+	add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
+(p14)	movl r3 = 2361183241434822607	// Prep for / 1000 hack
+	;;
+.time_normalize:
+	mov r21 = r8
+	cmp.ge p6,p0 = r8,r2
+(p14)	shr.u r20 = r8, 3 // We can repeat this if necessary just wasting time
+	;;
+(p14)	setf.sig f8 = r20
+(p6)	sub r8 = r8,r2
+(p6)	add r9 = 1,r9		// two nops before the branch.
+(p14)	setf.sig f7 = r3	// Chances for repeats are 1 in 10000 for gettod
+(p6)	br.cond.dpnt.few .time_normalize
+	;;
+	// Divided by 8 though shift. Now divide by 125
+	// The compiler was able to do that with a multiply
+	// and a shift and we do the same
+EX(.fail_efault, probe.w.fault r23, 3)	// This also costs 5 cycles
+(p14)	xmpy.hu f8 = f8, f7		// xmpy has 5 cycles latency so use it
+	;;
+(p14)	getf.sig r2 = f8
+	;;
+	mov r8 = r0
+(p14)	shr.u r21 = r2, 4
+	;;
+EX(.fail_efault, st8 [r31] = r9)
+EX(.fail_efault, st8 [r23] = r21)
+	FSYS_RETURN
+.fail_einval:
+	mov r8 = EINVAL
+	mov r10 = -1
+	FSYS_RETURN
+.fail_efault:
+	mov r8 = EFAULT
+	mov r10 = -1
+	FSYS_RETURN
+END(fsys_gettimeofday)
+
+ENTRY(fsys_clock_gettime)
+	.prologue
+	.altrp b6
+	.body
+	cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
+	// Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
+(p6)	br.spnt.few fsys_fallback_syscall
+	mov r31 = r33
+	shl r30 = r32,15
+	br.many .gettime
+END(fsys_clock_gettime)
+
+/*
+ * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
+ */
+#if _NSIG_WORDS != 1
+# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
+#endif
+ENTRY(fsys_rt_sigprocmask)
+	.prologue
+	.altrp b6
+	.body
+
+	add r2=IA64_TASK_BLOCKED_OFFSET,r16
+	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	cmp4.ltu p6,p0=SIG_SETMASK,r32
+
+	cmp.ne p15,p0=r0,r34			// oset != NULL?
+	tnat.nz p8,p0=r34
+	add r31=IA64_TASK_SIGHAND_OFFSET,r16
+	;;
+	ld8 r3=[r2]				// read/prefetch current->blocked
+	ld4 r9=[r9]
+	tnat.nz.or p6,p0=r35
+
+	cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
+	tnat.nz.or p6,p0=r32
+(p6)	br.spnt.few .fail_einval		// fail with EINVAL
+	;;
+#ifdef CONFIG_SMP
+	ld8 r31=[r31]				// r31 <- current->sighand
+#endif
+	and r9=TIF_ALLWORK_MASK,r9
+	tnat.nz.or p8,p0=r33
+	;;
+	cmp.ne p7,p0=0,r9
+	cmp.eq p6,p0=r0,r33			// set == NULL?
+	add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31	// r31 <- current->sighand->siglock
+(p8)	br.spnt.few .fail_efault		// fail with EFAULT
+(p7)	br.spnt.many fsys_fallback_syscall	// got pending kernel work...
+(p6)	br.dpnt.many .store_mask		// -> short-circuit to just reading the signal mask
+
+	/* Argh, we actually have to do some work and _update_ the signal mask: */
+
+EX(.fail_efault, probe.r.fault r33, 3)		// verify user has read-access to *set
+EX(.fail_efault, ld8 r14=[r33])			// r14 <- *set
+	mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
+	;;
+
+	RSM_PSR_I(p0, r18, r19)			// mask interrupt delivery
+	andcm r14=r14,r17			// filter out SIGKILL & SIGSTOP
+	mov r8=EINVAL			// default to EINVAL
+
+#ifdef CONFIG_SMP
+	// __ticket_spin_trylock(r31)
+	ld4 r17=[r31]
+	;;
+	mov.m ar.ccv=r17
+	extr.u r9=r17,17,15
+	adds r19=1,r17
+	extr.u r18=r17,0,15
+	;;
+	cmp.eq p6,p7=r9,r18
+	;;
+(p6)	cmpxchg4.acq r9=[r31],r19,ar.ccv
+(p6)	dep.z r20=r19,1,15		// next serving ticket for unlock
+(p7)	br.cond.spnt.many .lock_contention
+	;;
+	cmp4.eq p0,p7=r9,r17
+	adds r31=2,r31
+(p7)	br.cond.spnt.many .lock_contention
+	ld8 r3=[r2]			// re-read current->blocked now that we hold the lock
+	;;
+#else
+	ld8 r3=[r2]			// re-read current->blocked now that we hold the lock
+#endif
+	add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
+	add r19=IA64_TASK_SIGNAL_OFFSET,r16
+	cmp4.eq p6,p0=SIG_BLOCK,r32
+	;;
+	ld8 r19=[r19]			// r19 <- current->signal
+	cmp4.eq p7,p0=SIG_UNBLOCK,r32
+	cmp4.eq p8,p0=SIG_SETMASK,r32
+	;;
+	ld8 r18=[r18]			// r18 <- current->pending.signal
+	.pred.rel.mutex p6,p7,p8
+(p6)	or r14=r3,r14			// SIG_BLOCK
+(p7)	andcm r14=r3,r14		// SIG_UNBLOCK
+
+(p8)	mov r14=r14			// SIG_SETMASK
+(p6)	mov r8=0			// clear error code
+	// recalc_sigpending()
+	add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
+
+	add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
+	;;
+	ld4 r17=[r17]		// r17 <- current->signal->group_stop_count
+(p7)	mov r8=0		// clear error code
+
+	ld8 r19=[r19]		// r19 <- current->signal->shared_pending
+	;;
+	cmp4.gt p6,p7=r17,r0	// p6/p7 <- (current->signal->group_stop_count > 0)?
+(p8)	mov r8=0		// clear error code
+
+	or r18=r18,r19		// r18 <- current->pending | current->signal->shared_pending
+	;;
+	// r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
+	andcm r18=r18,r14
+	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	;;
+
+(p7)	cmp.ne.or.andcm p6,p7=r18,r0		// p6/p7 <- signal pending
+	mov r19=0					// i must not leak kernel bits...
+(p6)	br.cond.dpnt.many .sig_pending
+	;;
+
+1:	ld4 r17=[r9]				// r17 <- current->thread_info->flags
+	;;
+	mov ar.ccv=r17
+	and r18=~_TIF_SIGPENDING,r17		// r18 <- r17 & ~(1 << TIF_SIGPENDING)
+	;;
+
+	st8 [r2]=r14				// update current->blocked with new mask
+	cmpxchg4.acq r8=[r9],r18,ar.ccv		// current->thread_info->flags <- r18
+	;;
+	cmp.ne p6,p0=r17,r8			// update failed?
+(p6)	br.cond.spnt.few 1b			// yes -> retry
+
+#ifdef CONFIG_SMP
+	// __ticket_spin_unlock(r31)
+	st2.rel [r31]=r20
+	mov r20=0					// i must not leak kernel bits...
+#endif
+	SSM_PSR_I(p0, p9, r31)
+	;;
+
+	srlz.d					// ensure psr.i is set again
+	mov r18=0					// i must not leak kernel bits...
+
+.store_mask:
+EX(.fail_efault, (p15) probe.w.fault r34, 3)	// verify user has write-access to *oset
+EX(.fail_efault, (p15) st8 [r34]=r3)
+	mov r2=0					// i must not leak kernel bits...
+	mov r3=0					// i must not leak kernel bits...
+	mov r8=0				// return 0
+	mov r9=0					// i must not leak kernel bits...
+	mov r14=0					// i must not leak kernel bits...
+	mov r17=0					// i must not leak kernel bits...
+	mov r31=0					// i must not leak kernel bits...
+	FSYS_RETURN
+
+.sig_pending:
+#ifdef CONFIG_SMP
+	// __ticket_spin_unlock(r31)
+	st2.rel [r31]=r20			// release the lock
+#endif
+	SSM_PSR_I(p0, p9, r17)
+	;;
+	srlz.d
+	br.sptk.many fsys_fallback_syscall	// with signal pending, do the heavy-weight syscall
+
+#ifdef CONFIG_SMP
+.lock_contention:
+	/* Rather than spinning here, fall back on doing a heavy-weight syscall.  */
+	SSM_PSR_I(p0, p9, r17)
+	;;
+	srlz.d
+	br.sptk.many fsys_fallback_syscall
+#endif
+END(fsys_rt_sigprocmask)
+
+/*
+ * fsys_getcpu doesn't use the third parameter in this implementation. It reads
+ * current_thread_info()->cpu and corresponding node in cpu_to_node_map.
+ */
+ENTRY(fsys_getcpu)
+	.prologue
+	.altrp b6
+	.body
+	;;
+	add r2=TI_FLAGS+IA64_TASK_SIZE,r16
+	tnat.nz p6,p0 = r32			// guard against NaT argument
+	add r3=TI_CPU+IA64_TASK_SIZE,r16
+	;;
+	ld4 r3=[r3]				// M r3 = thread_info->cpu
+	ld4 r2=[r2]				// M r2 = thread_info->flags
+(p6)    br.cond.spnt.few .fail_einval		// B
+	;;
+	tnat.nz p7,p0 = r33			// I guard against NaT argument
+(p7)    br.cond.spnt.few .fail_einval		// B
+#ifdef CONFIG_NUMA
+	movl r17=cpu_to_node_map
+	;;
+EX(.fail_efault, probe.w.fault r32, 3)		// M This takes 5 cycles
+EX(.fail_efault, probe.w.fault r33, 3)		// M This takes 5 cycles
+	shladd r18=r3,1,r17
+	;;
+	ld2 r20=[r18]				// r20 = cpu_to_node_map[cpu]
+	and r2 = TIF_ALLWORK_MASK,r2
+	;;
+	cmp.ne p8,p0=0,r2
+(p8)	br.spnt.many fsys_fallback_syscall
+	;;
+	;;
+EX(.fail_efault, st4 [r32] = r3)
+EX(.fail_efault, st2 [r33] = r20)
+	mov r8=0
+	;;
+#else
+EX(.fail_efault, probe.w.fault r32, 3)		// M This takes 5 cycles
+EX(.fail_efault, probe.w.fault r33, 3)		// M This takes 5 cycles
+	and r2 = TIF_ALLWORK_MASK,r2
+	;;
+	cmp.ne p8,p0=0,r2
+(p8)	br.spnt.many fsys_fallback_syscall
+	;;
+EX(.fail_efault, st4 [r32] = r3)
+EX(.fail_efault, st2 [r33] = r0)
+	mov r8=0
+	;;
+#endif
+	FSYS_RETURN
+END(fsys_getcpu)
+
+ENTRY(fsys_fallback_syscall)
+	.prologue
+	.altrp b6
+	.body
+	/*
+	 * We only get here from light-weight syscall handlers.  Thus, we already
+	 * know that r15 contains a valid syscall number.  No need to re-check.
+	 */
+	adds r17=-1024,r15
+	movl r14=sys_call_table
+	;;
+	RSM_PSR_I(p0, r26, r27)
+	shladd r18=r17,3,r14
+	;;
+	ld8 r18=[r18]				// load normal (heavy-weight) syscall entry-point
+	MOV_FROM_PSR(p0, r29, r26)		// read psr (12 cyc load latency)
+	mov r27=ar.rsc
+	mov r21=ar.fpsr
+	mov r26=ar.pfs
+END(fsys_fallback_syscall)
+	/* FALL THROUGH */
+GLOBAL_ENTRY(paravirt_fsys_bubble_down)
+	.prologue
+	.altrp b6
+	.body
+	/*
+	 * We get here for syscalls that don't have a lightweight
+	 * handler.  For those, we need to bubble down into the kernel
+	 * and that requires setting up a minimal pt_regs structure,
+	 * and initializing the CPU state more or less as if an
+	 * interruption had occurred.  To make syscall-restarts work,
+	 * we setup pt_regs such that cr_iip points to the second
+	 * instruction in syscall_via_break.  Decrementing the IP
+	 * hence will restart the syscall via break and not
+	 * decrementing IP will return us to the caller, as usual.
+	 * Note that we preserve the value of psr.pp rather than
+	 * initializing it from dcr.pp.  This makes it possible to
+	 * distinguish fsyscall execution from other privileged
+	 * execution.
+	 *
+	 * On entry:
+	 *	- normal fsyscall handler register usage, except
+	 *	  that we also have:
+	 *	- r18: address of syscall entry point
+	 *	- r21: ar.fpsr
+	 *	- r26: ar.pfs
+	 *	- r27: ar.rsc
+	 *	- r29: psr
+	 *
+	 * We used to clear some PSR bits here but that requires slow
+	 * serialization.  Fortuntely, that isn't really necessary.
+	 * The rationale is as follows: we used to clear bits
+	 * ~PSR_PRESERVED_BITS in PSR.L.  Since
+	 * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
+	 * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
+	 * However,
+	 *
+	 * PSR.BE : already is turned off in __kernel_syscall_via_epc()
+	 * PSR.AC : don't care (kernel normally turns PSR.AC on)
+	 * PSR.I  : already turned off by the time paravirt_fsys_bubble_down gets
+	 *	    invoked
+	 * PSR.DFL: always 0 (kernel never turns it on)
+	 * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
+	 *	    initiative
+	 * PSR.DI : always 0 (kernel never turns it on)
+	 * PSR.SI : always 0 (kernel never turns it on)
+	 * PSR.DB : don't care --- kernel never enables kernel-level
+	 *	    breakpoints
+	 * PSR.TB : must be 0 already; if it wasn't zero on entry to
+	 *          __kernel_syscall_via_epc, the branch to paravirt_fsys_bubble_down
+	 *          will trigger a taken branch; the taken-trap-handler then
+	 *          converts the syscall into a break-based system-call.
+	 */
+	/*
+	 * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
+	 * The rest we have to synthesize.
+	 */
+#	define PSR_ONE_BITS		((3 << IA64_PSR_CPL0_BIT)	\
+					 | (0x1 << IA64_PSR_RI_BIT)	\
+					 | IA64_PSR_BN | IA64_PSR_I)
+
+	invala					// M0|1
+	movl r14=ia64_ret_from_syscall		// X
+
+	nop.m 0
+	movl r28=__kernel_syscall_via_break	// X	create cr.iip
+	;;
+
+	mov r2=r16				// A    get task addr to addl-addressable register
+	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
+	mov r31=pr				// I0   save pr (2 cyc)
+	;;
+	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
+	addl r22=IA64_RBS_OFFSET,r2		// A    compute base of RBS
+	add r3=TI_FLAGS+IA64_TASK_SIZE,r2	// A
+	;;
+	ld4 r3=[r3]				// M0|1 r3 = current_thread_info()->flags
+	lfetch.fault.excl.nt1 [r22]		// M0|1 prefetch register backing-store
+	nop.i 0
+	;;
+	mov ar.rsc=0				// M2   set enforced lazy mode, pl 0, LE, loadrs=0
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	MOV_FROM_ITC(p0, p6, r30, r23)		// M    get cycle for accounting
+#else
+	nop.m 0
+#endif
+	nop.i 0
+	;;
+	mov r23=ar.bspstore			// M2 (12 cyc) save ar.bspstore
+	mov.m r24=ar.rnat			// M2 (5 cyc) read ar.rnat (dual-issues!)
+	nop.i 0
+	;;
+	mov ar.bspstore=r22			// M2 (6 cyc) switch to kernel RBS
+	movl r8=PSR_ONE_BITS			// X
+	;;
+	mov r25=ar.unat				// M2 (5 cyc) save ar.unat
+	mov r19=b6				// I0   save b6 (2 cyc)
+	mov r20=r1				// A    save caller's gp in r20
+	;;
+	or r29=r8,r29				// A    construct cr.ipsr value to save
+	mov b6=r18				// I0   copy syscall entry-point to b6 (7 cyc)
+	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack
+
+	mov r18=ar.bsp				// M2   save (kernel) ar.bsp (12 cyc)
+	cmp.ne pKStk,pUStk=r0,r0		// A    set pKStk <- 0, pUStk <- 1
+	br.call.sptk.many b7=ia64_syscall_setup	// B
+	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	// mov.m r30=ar.itc is called in advance
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
+	;;
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// time at last check in kernel
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// time at leave kernel
+	;;
+	ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME	// cumulated stime
+	ld8 r21=[r17]				// cumulated utime
+	sub r22=r19,r18				// stime before leave kernel
+	;;
+	st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP	// update stamp
+	sub r18=r30,r19				// elapsed time in user mode
+	;;
+	add r20=r20,r22				// sum stime
+	add r21=r21,r18				// sum utime
+	;;
+	st8 [r16]=r20				// update stime
+	st8 [r17]=r21				// update utime
+	;;
+#endif
+	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
+	mov rp=r14				// I0   set the real return addr
+	and r3=_TIF_SYSCALL_TRACEAUDIT,r3	// A
+	;;
+	SSM_PSR_I(p0, p6, r22)			// M2   we're on kernel stacks now, reenable irqs
+	cmp.eq p8,p0=r3,r0			// A
+(p10)	br.cond.spnt.many ia64_ret_from_syscall	// B    return if bad call-frame or r15 is a NaT
+
+	nop.m 0
+(p8)	br.call.sptk.many b6=b6			// B    (ignore return address)
+	br.cond.spnt ia64_trace_syscall		// B
+END(paravirt_fsys_bubble_down)
+
+	.rodata
+	.align 8
+	.globl paravirt_fsyscall_table
+
+	data8 paravirt_fsys_bubble_down
+paravirt_fsyscall_table:
+	data8 fsys_ni_syscall
+	data8 0				// exit			// 1025
+	data8 0				// read
+	data8 0				// write
+	data8 0				// open
+	data8 0				// close
+	data8 0				// creat		// 1030
+	data8 0				// link
+	data8 0				// unlink
+	data8 0				// execve
+	data8 0				// chdir
+	data8 0				// fchdir		// 1035
+	data8 0				// utimes
+	data8 0				// mknod
+	data8 0				// chmod
+	data8 0				// chown
+	data8 0				// lseek		// 1040
+	data8 fsys_getpid		// getpid
+	data8 fsys_getppid		// getppid
+	data8 0				// mount
+	data8 0				// umount
+	data8 0				// setuid		// 1045
+	data8 0				// getuid
+	data8 0				// geteuid
+	data8 0				// ptrace
+	data8 0				// access
+	data8 0				// sync			// 1050
+	data8 0				// fsync
+	data8 0				// fdatasync
+	data8 0				// kill
+	data8 0				// rename
+	data8 0				// mkdir		// 1055
+	data8 0				// rmdir
+	data8 0				// dup
+	data8 0				// pipe
+	data8 0				// times
+	data8 0				// brk			// 1060
+	data8 0				// setgid
+	data8 0				// getgid
+	data8 0				// getegid
+	data8 0				// acct
+	data8 0				// ioctl		// 1065
+	data8 0				// fcntl
+	data8 0				// umask
+	data8 0				// chroot
+	data8 0				// ustat
+	data8 0				// dup2			// 1070
+	data8 0				// setreuid
+	data8 0				// setregid
+	data8 0				// getresuid
+	data8 0				// setresuid
+	data8 0				// getresgid		// 1075
+	data8 0				// setresgid
+	data8 0				// getgroups
+	data8 0				// setgroups
+	data8 0				// getpgid
+	data8 0				// setpgid		// 1080
+	data8 0				// setsid
+	data8 0				// getsid
+	data8 0				// sethostname
+	data8 0				// setrlimit
+	data8 0				// getrlimit		// 1085
+	data8 0				// getrusage
+	data8 fsys_gettimeofday		// gettimeofday
+	data8 0				// settimeofday
+	data8 0				// select
+	data8 0				// poll			// 1090
+	data8 0				// symlink
+	data8 0				// readlink
+	data8 0				// uselib
+	data8 0				// swapon
+	data8 0				// swapoff		// 1095
+	data8 0				// reboot
+	data8 0				// truncate
+	data8 0				// ftruncate
+	data8 0				// fchmod
+	data8 0				// fchown		// 1100
+	data8 0				// getpriority
+	data8 0				// setpriority
+	data8 0				// statfs
+	data8 0				// fstatfs
+	data8 0				// gettid		// 1105
+	data8 0				// semget
+	data8 0				// semop
+	data8 0				// semctl
+	data8 0				// msgget
+	data8 0				// msgsnd		// 1110
+	data8 0				// msgrcv
+	data8 0				// msgctl
+	data8 0				// shmget
+	data8 0				// shmat
+	data8 0				// shmdt		// 1115
+	data8 0				// shmctl
+	data8 0				// syslog
+	data8 0				// setitimer
+	data8 0				// getitimer
+	data8 0					 		// 1120
+	data8 0
+	data8 0
+	data8 0				// vhangup
+	data8 0				// lchown
+	data8 0				// remap_file_pages	// 1125
+	data8 0				// wait4
+	data8 0				// sysinfo
+	data8 0				// clone
+	data8 0				// setdomainname
+	data8 0				// newuname		// 1130
+	data8 0				// adjtimex
+	data8 0
+	data8 0				// init_module
+	data8 0				// delete_module
+	data8 0							// 1135
+	data8 0
+	data8 0				// quotactl
+	data8 0				// bdflush
+	data8 0				// sysfs
+	data8 0				// personality		// 1140
+	data8 0				// afs_syscall
+	data8 0				// setfsuid
+	data8 0				// setfsgid
+	data8 0				// getdents
+	data8 0				// flock		// 1145
+	data8 0				// readv
+	data8 0				// writev
+	data8 0				// pread64
+	data8 0				// pwrite64
+	data8 0				// sysctl		// 1150
+	data8 0				// mmap
+	data8 0				// munmap
+	data8 0				// mlock
+	data8 0				// mlockall
+	data8 0				// mprotect		// 1155
+	data8 0				// mremap
+	data8 0				// msync
+	data8 0				// munlock
+	data8 0				// munlockall
+	data8 0				// sched_getparam	// 1160
+	data8 0				// sched_setparam
+	data8 0				// sched_getscheduler
+	data8 0				// sched_setscheduler
+	data8 0				// sched_yield
+	data8 0				// sched_get_priority_max	// 1165
+	data8 0				// sched_get_priority_min
+	data8 0				// sched_rr_get_interval
+	data8 0				// nanosleep
+	data8 0				// nfsservctl
+	data8 0				// prctl		// 1170
+	data8 0				// getpagesize
+	data8 0				// mmap2
+	data8 0				// pciconfig_read
+	data8 0				// pciconfig_write
+	data8 0				// perfmonctl		// 1175
+	data8 0				// sigaltstack
+	data8 0				// rt_sigaction
+	data8 0				// rt_sigpending
+	data8 fsys_rt_sigprocmask	// rt_sigprocmask
+	data8 0				// rt_sigqueueinfo	// 1180
+	data8 0				// rt_sigreturn
+	data8 0				// rt_sigsuspend
+	data8 0				// rt_sigtimedwait
+	data8 0				// getcwd
+	data8 0				// capget		// 1185
+	data8 0				// capset
+	data8 0				// sendfile
+	data8 0
+	data8 0
+	data8 0				// socket		// 1190
+	data8 0				// bind
+	data8 0				// connect
+	data8 0				// listen
+	data8 0				// accept
+	data8 0				// getsockname		// 1195
+	data8 0				// getpeername
+	data8 0				// socketpair
+	data8 0				// send
+	data8 0				// sendto
+	data8 0				// recv			// 1200
+	data8 0				// recvfrom
+	data8 0				// shutdown
+	data8 0				// setsockopt
+	data8 0				// getsockopt
+	data8 0				// sendmsg		// 1205
+	data8 0				// recvmsg
+	data8 0				// pivot_root
+	data8 0				// mincore
+	data8 0				// madvise
+	data8 0				// newstat		// 1210
+	data8 0				// newlstat
+	data8 0				// newfstat
+	data8 0				// clone2
+	data8 0				// getdents64
+	data8 0				// getunwind		// 1215
+	data8 0				// readahead
+	data8 0				// setxattr
+	data8 0				// lsetxattr
+	data8 0				// fsetxattr
+	data8 0				// getxattr		// 1220
+	data8 0				// lgetxattr
+	data8 0				// fgetxattr
+	data8 0				// listxattr
+	data8 0				// llistxattr
+	data8 0				// flistxattr		// 1225
+	data8 0				// removexattr
+	data8 0				// lremovexattr
+	data8 0				// fremovexattr
+	data8 0				// tkill
+	data8 0				// futex		// 1230
+	data8 0				// sched_setaffinity
+	data8 0				// sched_getaffinity
+	data8 fsys_set_tid_address	// set_tid_address
+	data8 0				// fadvise64_64
+	data8 0				// tgkill		// 1235
+	data8 0				// exit_group
+	data8 0				// lookup_dcookie
+	data8 0				// io_setup
+	data8 0				// io_destroy
+	data8 0				// io_getevents		// 1240
+	data8 0				// io_submit
+	data8 0				// io_cancel
+	data8 0				// epoll_create
+	data8 0				// epoll_ctl
+	data8 0				// epoll_wait		// 1245
+	data8 0				// restart_syscall
+	data8 0				// semtimedop
+	data8 0				// timer_create
+	data8 0				// timer_settime
+	data8 0				// timer_gettime 	// 1250
+	data8 0				// timer_getoverrun
+	data8 0				// timer_delete
+	data8 0				// clock_settime
+	data8 fsys_clock_gettime	// clock_gettime
+	data8 0				// clock_getres		// 1255
+	data8 0				// clock_nanosleep
+	data8 0				// fstatfs64
+	data8 0				// statfs64
+	data8 0				// mbind
+	data8 0				// get_mempolicy	// 1260
+	data8 0				// set_mempolicy
+	data8 0				// mq_open
+	data8 0				// mq_unlink
+	data8 0				// mq_timedsend
+	data8 0				// mq_timedreceive	// 1265
+	data8 0				// mq_notify
+	data8 0				// mq_getsetattr
+	data8 0				// kexec_load
+	data8 0				// vserver
+	data8 0				// waitid		// 1270
+	data8 0				// add_key
+	data8 0				// request_key
+	data8 0				// keyctl
+	data8 0				// ioprio_set
+	data8 0				// ioprio_get		// 1275
+	data8 0				// move_pages
+	data8 0				// inotify_init
+	data8 0				// inotify_add_watch
+	data8 0				// inotify_rm_watch
+	data8 0				// migrate_pages	// 1280
+	data8 0				// openat
+	data8 0				// mkdirat
+	data8 0				// mknodat
+	data8 0				// fchownat
+	data8 0				// futimesat		// 1285
+	data8 0				// newfstatat
+	data8 0				// unlinkat
+	data8 0				// renameat
+	data8 0				// linkat
+	data8 0				// symlinkat		// 1290
+	data8 0				// readlinkat
+	data8 0				// fchmodat
+	data8 0				// faccessat
+	data8 0
+	data8 0							// 1295
+	data8 0				// unshare
+	data8 0				// splice
+	data8 0				// set_robust_list
+	data8 0				// get_robust_list
+	data8 0				// sync_file_range	// 1300
+	data8 0				// tee
+	data8 0				// vmsplice
+	data8 0
+	data8 fsys_getcpu		// getcpu		// 1304
+
+	// fill in zeros for the remaining entries
+	.zero:
+	.space paravirt_fsyscall_table + 8*NR_syscalls - .zero, 0
diff --git a/arch/ia64/kernel/fsyscall_gtod_data.h b/arch/ia64/kernel/fsyscall_gtod_data.h
new file mode 100644
index 00000000..57d2ee6c
--- /dev/null
+++ b/arch/ia64/kernel/fsyscall_gtod_data.h
@@ -0,0 +1,23 @@
+/*
+ * (c) Copyright 2007 Hewlett-Packard Development Company, L.P.
+ *        Contributed by Peter Keilty <peter.keilty@hp.com>
+ *
+ * fsyscall gettimeofday data
+ */
+
+struct fsyscall_gtod_data_t {
+	seqlock_t	lock;
+	struct timespec	wall_time;
+	struct timespec monotonic_time;
+	cycle_t		clk_mask;
+	u32		clk_mult;
+	u32		clk_shift;
+	void		*clk_fsys_mmio;
+	cycle_t		clk_cycle_last;
+} ____cacheline_aligned;
+
+struct itc_jitter_data_t {
+	int		itc_jitter;
+	cycle_t		itc_lastcycle;
+} ____cacheline_aligned;
+
diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c
new file mode 100644
index 00000000..7fc8c961
--- /dev/null
+++ b/arch/ia64/kernel/ftrace.c
@@ -0,0 +1,206 @@
+/*
+ * Dynamic function tracing support.
+ *
+ * Copyright (C) 2008 Shaohua Li <shaohua.li@intel.com>
+ *
+ * For licencing details, see COPYING.
+ *
+ * Defines low-level handling of mcount calls when the kernel
+ * is compiled with the -pg flag. When using dynamic ftrace, the
+ * mcount call-sites get patched lazily with NOP till they are
+ * enabled. All code mutation routines here take effect atomically.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+
+#include <asm/cacheflush.h>
+#include <asm/patch.h>
+
+/* In IA64, each function will be added below two bundles with -pg option */
+static unsigned char __attribute__((aligned(8)))
+ftrace_orig_code[MCOUNT_INSN_SIZE] = {
+	0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */
+	0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */
+	0x05, 0x00, 0xc4, 0x00,             /* mov r42=b0 */
+	0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */
+	0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
+	0x08, 0x00, 0x00, 0x50              /* br.call.sptk.many b0 = _mcount;; */
+};
+
+struct ftrace_orig_insn {
+	u64 dummy1, dummy2, dummy3;
+	u64 dummy4:64-41+13;
+	u64 imm20:20;
+	u64 dummy5:3;
+	u64 sign:1;
+	u64 dummy6:4;
+};
+
+/* mcount stub will be converted below for nop */
+static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = {
+	0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
+	0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
+	0x00, 0x00, 0x04, 0x00,             /* nop.i 0x0 */
+	0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* nop.x 0x0;; */
+	0x00, 0x00, 0x04, 0x00
+};
+
+static unsigned char *ftrace_nop_replace(void)
+{
+	return ftrace_nop_code;
+}
+
+/*
+ * mcount stub will be converted below for call
+ * Note: Just the last instruction is changed against nop
+ * */
+static unsigned char __attribute__((aligned(8)))
+ftrace_call_code[MCOUNT_INSN_SIZE] = {
+	0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
+	0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
+	0x00, 0x00, 0x04, 0x00,             /* nop.i 0x0 */
+	0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
+	0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, /* brl.many .;;*/
+	0xf8, 0xff, 0xff, 0xc8
+};
+
+struct ftrace_call_insn {
+	u64 dummy1, dummy2;
+	u64 dummy3:48;
+	u64 imm39_l:16;
+	u64 imm39_h:23;
+	u64 dummy4:13;
+	u64 imm20:20;
+	u64 dummy5:3;
+	u64 i:1;
+	u64 dummy6:4;
+};
+
+static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+	struct ftrace_call_insn *code = (void *)ftrace_call_code;
+	unsigned long offset = addr - (ip + 0x10);
+
+	code->imm39_l = offset >> 24;
+	code->imm39_h = offset >> 40;
+	code->imm20 = offset >> 4;
+	code->i = offset >> 63;
+	return ftrace_call_code;
+}
+
+static int
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+		   unsigned char *new_code, int do_check)
+{
+	unsigned char replaced[MCOUNT_INSN_SIZE];
+
+	/*
+	 * Note: Due to modules and __init, code can
+	 *  disappear and change, we need to protect against faulting
+	 *  as well as code changing. We do this by using the
+	 *  probe_kernel_* functions.
+	 *
+	 * No real locking needed, this code is run through
+	 * kstop_machine, or before SMP starts.
+	 */
+
+	if (!do_check)
+		goto skip_check;
+
+	/* read the text we want to modify */
+	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+
+	/* Make sure it is what we expect it to be */
+	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+		return -EINVAL;
+
+skip_check:
+	/* replace the text with the new text */
+	if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE))
+		return -EPERM;
+	flush_icache_range(ip, ip + MCOUNT_INSN_SIZE);
+
+	return 0;
+}
+
+static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE];
+	unsigned long ip = rec->ip;
+
+	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+	if (rec->flags & FTRACE_FL_CONVERTED) {
+		struct ftrace_call_insn *call_insn, *tmp_call;
+
+		call_insn = (void *)ftrace_call_code;
+		tmp_call = (void *)replaced;
+		call_insn->imm39_l = tmp_call->imm39_l;
+		call_insn->imm39_h = tmp_call->imm39_h;
+		call_insn->imm20 = tmp_call->imm20;
+		call_insn->i = tmp_call->i;
+		if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0)
+			return -EINVAL;
+		return 0;
+	} else {
+		struct ftrace_orig_insn *call_insn, *tmp_call;
+
+		call_insn = (void *)ftrace_orig_code;
+		tmp_call = (void *)replaced;
+		call_insn->sign = tmp_call->sign;
+		call_insn->imm20 = tmp_call->imm20;
+		if (memcmp(replaced, ftrace_orig_code, MCOUNT_INSN_SIZE) != 0)
+			return -EINVAL;
+		return 0;
+	}
+}
+
+int ftrace_make_nop(struct module *mod,
+		    struct dyn_ftrace *rec, unsigned long addr)
+{
+	int ret;
+	char *new;
+
+	ret = ftrace_make_nop_check(rec, addr);
+	if (ret)
+		return ret;
+	new = ftrace_nop_replace();
+	return ftrace_modify_code(rec->ip, NULL, new, 0);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	unsigned char *old, *new;
+
+	old=  ftrace_nop_replace();
+	new = ftrace_call_replace(ip, addr);
+	return ftrace_modify_code(ip, old, new, 1);
+}
+
+/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	unsigned long ip;
+	unsigned long addr = ((struct fnptr *)ftrace_call)->ip;
+
+	if (func == ftrace_stub)
+		return 0;
+	ip = ((struct fnptr *)func)->ip;
+
+	ia64_patch_imm64(addr + 2, ip);
+
+	flush_icache_range(addr, addr + 16);
+	return 0;
+}
+
+/* run from kstop_machine */
+int __init ftrace_dyn_arch_init(void *data)
+{
+	*(unsigned long *)data = 0;
+
+	return 0;
+}
diff --git a/arch/ia64/kernel/gate-data.S b/arch/ia64/kernel/gate-data.S
new file mode 100644
index 00000000..b3ef1c72
--- /dev/null
+++ b/arch/ia64/kernel/gate-data.S
@@ -0,0 +1,3 @@
+	.section .data..gate, "aw"
+
+	.incbin "arch/ia64/kernel/gate.so"
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
new file mode 100644
index 00000000..245d3e1e
--- /dev/null
+++ b/arch/ia64/kernel/gate.S
@@ -0,0 +1,385 @@
+/*
+ * This file contains the code that gets mapped at the upper end of each task's text
+ * region.  For now, it contains the signal trampoline code only.
+ *
+ * Copyright (C) 1999-2003 Hewlett-Packard Co
+ * 	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+
+#include <asm/asmmacro.h>
+#include <asm/errno.h>
+#include <asm/asm-offsets.h>
+#include <asm/sigcontext.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+#include "paravirt_inst.h"
+
+/*
+ * We can't easily refer to symbols inside the kernel.  To avoid full runtime relocation,
+ * complications with the linker (which likes to create PLT stubs for branches
+ * to targets outside the shared object) and to avoid multi-phase kernel builds, we
+ * simply create minimalistic "patch lists" in special ELF sections.
+ */
+	.section ".data..patch.fsyscall_table", "a"
+	.previous
+#define LOAD_FSYSCALL_TABLE(reg)			\
+[1:]	movl reg=0;					\
+	.xdata4 ".data..patch.fsyscall_table", 1b-.
+
+	.section ".data..patch.brl_fsys_bubble_down", "a"
+	.previous
+#define BRL_COND_FSYS_BUBBLE_DOWN(pr)			\
+[1:](pr)brl.cond.sptk 0;				\
+	;;						\
+	.xdata4 ".data..patch.brl_fsys_bubble_down", 1b-.
+
+GLOBAL_ENTRY(__kernel_syscall_via_break)
+	.prologue
+	.altrp b6
+	.body
+	/*
+	 * Note: for (fast) syscall restart to work, the break instruction must be
+	 *	 the first one in the bundle addressed by syscall_via_break.
+	 */
+{ .mib
+	break 0x100000
+	nop.i 0
+	br.ret.sptk.many b6
+}
+END(__kernel_syscall_via_break)
+
+#	define ARG0_OFF		(16 + IA64_SIGFRAME_ARG0_OFFSET)
+#	define ARG1_OFF		(16 + IA64_SIGFRAME_ARG1_OFFSET)
+#	define ARG2_OFF		(16 + IA64_SIGFRAME_ARG2_OFFSET)
+#	define SIGHANDLER_OFF	(16 + IA64_SIGFRAME_HANDLER_OFFSET)
+#	define SIGCONTEXT_OFF	(16 + IA64_SIGFRAME_SIGCONTEXT_OFFSET)
+
+#	define FLAGS_OFF	IA64_SIGCONTEXT_FLAGS_OFFSET
+#	define CFM_OFF		IA64_SIGCONTEXT_CFM_OFFSET
+#	define FR6_OFF		IA64_SIGCONTEXT_FR6_OFFSET
+#	define BSP_OFF		IA64_SIGCONTEXT_AR_BSP_OFFSET
+#	define RNAT_OFF		IA64_SIGCONTEXT_AR_RNAT_OFFSET
+#	define UNAT_OFF		IA64_SIGCONTEXT_AR_UNAT_OFFSET
+#	define FPSR_OFF		IA64_SIGCONTEXT_AR_FPSR_OFFSET
+#	define PR_OFF		IA64_SIGCONTEXT_PR_OFFSET
+#	define RP_OFF		IA64_SIGCONTEXT_IP_OFFSET
+#	define SP_OFF		IA64_SIGCONTEXT_R12_OFFSET
+#	define RBS_BASE_OFF	IA64_SIGCONTEXT_RBS_BASE_OFFSET
+#	define LOADRS_OFF	IA64_SIGCONTEXT_LOADRS_OFFSET
+#	define base0		r2
+#	define base1		r3
+	/*
+	 * When we get here, the memory stack looks like this:
+	 *
+	 *   +===============================+
+       	 *   |				     |
+       	 *   //	    struct sigframe          //
+       	 *   |				     |
+	 *   +-------------------------------+ <-- sp+16
+	 *   |      16 byte of scratch       |
+	 *   |            space              |
+	 *   +-------------------------------+ <-- sp
+	 *
+	 * The register stack looks _exactly_ the way it looked at the time the signal
+	 * occurred.  In other words, we're treading on a potential mine-field: each
+	 * incoming general register may be a NaT value (including sp, in which case the
+	 * process ends up dying with a SIGSEGV).
+	 *
+	 * The first thing need to do is a cover to get the registers onto the backing
+	 * store.  Once that is done, we invoke the signal handler which may modify some
+	 * of the machine state.  After returning from the signal handler, we return
+	 * control to the previous context by executing a sigreturn system call.  A signal
+	 * handler may call the rt_sigreturn() function to directly return to a given
+	 * sigcontext.  However, the user-level sigreturn() needs to do much more than
+	 * calling the rt_sigreturn() system call as it needs to unwind the stack to
+	 * restore preserved registers that may have been saved on the signal handler's
+	 * call stack.
+	 */
+
+#define SIGTRAMP_SAVES										\
+	.unwabi 3, 's';		/* mark this as a sigtramp handler (saves scratch regs) */	\
+	.unwabi @svr4, 's'; /* backwards compatibility with old unwinders (remove in v2.7) */	\
+	.savesp ar.unat, UNAT_OFF+SIGCONTEXT_OFF;						\
+	.savesp ar.fpsr, FPSR_OFF+SIGCONTEXT_OFF;						\
+	.savesp pr, PR_OFF+SIGCONTEXT_OFF;     							\
+	.savesp rp, RP_OFF+SIGCONTEXT_OFF;							\
+	.savesp ar.pfs, CFM_OFF+SIGCONTEXT_OFF;							\
+	.vframesp SP_OFF+SIGCONTEXT_OFF
+
+GLOBAL_ENTRY(__kernel_sigtramp)
+	// describe the state that is active when we get here:
+	.prologue
+	SIGTRAMP_SAVES
+	.body
+
+	.label_state 1
+
+	adds base0=SIGHANDLER_OFF,sp
+	adds base1=RBS_BASE_OFF+SIGCONTEXT_OFF,sp
+	br.call.sptk.many rp=1f
+1:
+	ld8 r17=[base0],(ARG0_OFF-SIGHANDLER_OFF)	// get pointer to signal handler's plabel
+	ld8 r15=[base1]					// get address of new RBS base (or NULL)
+	cover				// push args in interrupted frame onto backing store
+	;;
+	cmp.ne p1,p0=r15,r0		// do we need to switch rbs? (note: pr is saved by kernel)
+	mov.m r9=ar.bsp			// fetch ar.bsp
+	.spillsp.p p1, ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
+(p1)	br.cond.spnt setup_rbs		// yup -> (clobbers p8, r14-r16, and r18-r20)
+back_from_setup_rbs:
+	alloc r8=ar.pfs,0,0,3,0
+	ld8 out0=[base0],16		// load arg0 (signum)
+	adds base1=(ARG1_OFF-(RBS_BASE_OFF+SIGCONTEXT_OFF)),base1
+	;;
+	ld8 out1=[base1]		// load arg1 (siginfop)
+	ld8 r10=[r17],8			// get signal handler entry point
+	;;
+	ld8 out2=[base0]		// load arg2 (sigcontextp)
+	ld8 gp=[r17]			// get signal handler's global pointer
+	adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
+	;;
+	.spillsp ar.bsp, BSP_OFF+SIGCONTEXT_OFF
+	st8 [base0]=r9			// save sc_ar_bsp
+	adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
+	adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
+	;;
+	stf.spill [base0]=f6,32
+	stf.spill [base1]=f7,32
+	;;
+	stf.spill [base0]=f8,32
+	stf.spill [base1]=f9,32
+	mov b6=r10
+	;;
+	stf.spill [base0]=f10,32
+	stf.spill [base1]=f11,32
+	;;
+	stf.spill [base0]=f12,32
+	stf.spill [base1]=f13,32
+	;;
+	stf.spill [base0]=f14,32
+	stf.spill [base1]=f15,32
+	br.call.sptk.many rp=b6			// call the signal handler
+.ret0:	adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
+	;;
+	ld8 r15=[base0]				// fetch sc_ar_bsp
+	mov r14=ar.bsp
+	;;
+	cmp.ne p1,p0=r14,r15			// do we need to restore the rbs?
+(p1)	br.cond.spnt restore_rbs		// yup -> (clobbers r14-r18, f6 & f7)
+	;;
+back_from_restore_rbs:
+	adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
+	adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
+	;;
+	ldf.fill f6=[base0],32
+	ldf.fill f7=[base1],32
+	;;
+	ldf.fill f8=[base0],32
+	ldf.fill f9=[base1],32
+	;;
+	ldf.fill f10=[base0],32
+	ldf.fill f11=[base1],32
+	;;
+	ldf.fill f12=[base0],32
+	ldf.fill f13=[base1],32
+	;;
+	ldf.fill f14=[base0],32
+	ldf.fill f15=[base1],32
+	mov r15=__NR_rt_sigreturn
+	.restore sp				// pop .prologue
+	break __BREAK_SYSCALL
+
+	.prologue
+	SIGTRAMP_SAVES
+setup_rbs:
+	mov ar.rsc=0				// put RSE into enforced lazy mode
+	;;
+	.save ar.rnat, r19
+	mov r19=ar.rnat				// save RNaT before switching backing store area
+	adds r14=(RNAT_OFF+SIGCONTEXT_OFF),sp
+
+	mov r18=ar.bspstore
+	mov ar.bspstore=r15			// switch over to new register backing store area
+	;;
+
+	.spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
+	st8 [r14]=r19				// save sc_ar_rnat
+	.body
+	mov.m r16=ar.bsp			// sc_loadrs <- (new bsp - new bspstore) << 16
+	adds r14=(LOADRS_OFF+SIGCONTEXT_OFF),sp
+	;;
+	invala
+	sub r15=r16,r15
+	extr.u r20=r18,3,6
+	;;
+	mov ar.rsc=0xf				// set RSE into eager mode, pl 3
+	cmp.eq p8,p0=63,r20
+	shl r15=r15,16
+	;;
+	st8 [r14]=r15				// save sc_loadrs
+(p8)	st8 [r18]=r19		// if bspstore points at RNaT slot, store RNaT there now
+	.restore sp				// pop .prologue
+	br.cond.sptk back_from_setup_rbs
+
+	.prologue
+	SIGTRAMP_SAVES
+	.spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
+	.body
+restore_rbs:
+	// On input:
+	//	r14 = bsp1 (bsp at the time of return from signal handler)
+	//	r15 = bsp0 (bsp at the time the signal occurred)
+	//
+	// Here, we need to calculate bspstore0, the value that ar.bspstore needs
+	// to be set to, based on bsp0 and the size of the dirty partition on
+	// the alternate stack (sc_loadrs >> 16).  This can be done with the
+	// following algorithm:
+	//
+	//  bspstore0 = rse_skip_regs(bsp0, -rse_num_regs(bsp1 - (loadrs >> 19), bsp1));
+	//
+	// This is what the code below does.
+	//
+	alloc r2=ar.pfs,0,0,0,0			// alloc null frame
+	adds r16=(LOADRS_OFF+SIGCONTEXT_OFF),sp
+	adds r18=(RNAT_OFF+SIGCONTEXT_OFF),sp
+	;;
+	ld8 r17=[r16]
+	ld8 r16=[r18]			// get new rnat
+	extr.u r18=r15,3,6	// r18 <- rse_slot_num(bsp0)
+	;;
+	mov ar.rsc=r17			// put RSE into enforced lazy mode
+	shr.u r17=r17,16
+	;;
+	sub r14=r14,r17		// r14 (bspstore1) <- bsp1 - (sc_loadrs >> 16)
+	shr.u r17=r17,3		// r17 <- (sc_loadrs >> 19)
+	;;
+	loadrs			// restore dirty partition
+	extr.u r14=r14,3,6	// r14 <- rse_slot_num(bspstore1)
+	;;
+	add r14=r14,r17		// r14 <- rse_slot_num(bspstore1) + (sc_loadrs >> 19)
+	;;
+	shr.u r14=r14,6		// r14 <- (rse_slot_num(bspstore1) + (sc_loadrs >> 19))/0x40
+	;;
+	sub r14=r14,r17		// r14 <- -rse_num_regs(bspstore1, bsp1)
+	movl r17=0x8208208208208209
+	;;
+	add r18=r18,r14		// r18 (delta) <- rse_slot_num(bsp0) - rse_num_regs(bspstore1,bsp1)
+	setf.sig f7=r17
+	cmp.lt p7,p0=r14,r0	// p7 <- (r14 < 0)?
+	;;
+(p7)	adds r18=-62,r18	// delta -= 62
+	;;
+	setf.sig f6=r18
+	;;
+	xmpy.h f6=f6,f7
+	;;
+	getf.sig r17=f6
+	;;
+	add r17=r17,r18
+	shr r18=r18,63
+	;;
+	shr r17=r17,5
+	;;
+	sub r17=r17,r18		// r17 = delta/63
+	;;
+	add r17=r14,r17		// r17 <- delta/63 - rse_num_regs(bspstore1, bsp1)
+	;;
+	shladd r15=r17,3,r15	// r15 <- bsp0 + 8*(delta/63 - rse_num_regs(bspstore1, bsp1))
+	;;
+	mov ar.bspstore=r15			// switch back to old register backing store area
+	;;
+	mov ar.rnat=r16				// restore RNaT
+	mov ar.rsc=0xf				// (will be restored later on from sc_ar_rsc)
+	// invala not necessary as that will happen when returning to user-mode
+	br.cond.sptk back_from_restore_rbs
+END(__kernel_sigtramp)
+
+/*
+ * On entry:
+ *	r11 = saved ar.pfs
+ *	r15 = system call #
+ *	b0  = saved return address
+ *	b6  = return address
+ * On exit:
+ *	r11 = saved ar.pfs
+ *	r15 = system call #
+ *	b0  = saved return address
+ *	all other "scratch" registers:	undefined
+ *	all "preserved" registers:	same as on entry
+ */
+
+GLOBAL_ENTRY(__kernel_syscall_via_epc)
+	.prologue
+	.altrp b6
+	.body
+{
+	/*
+	 * Note: the kernel cannot assume that the first two instructions in this
+	 * bundle get executed.  The remaining code must be safe even if
+	 * they do not get executed.
+	 */
+	adds r17=-1024,r15			// A
+	mov r10=0				// A    default to successful syscall execution
+	epc					// B	causes split-issue
+}
+	;;
+	RSM_PSR_BE_I(r20, r22)			// M2 (5 cyc to srlz.d)
+	LOAD_FSYSCALL_TABLE(r14)		// X
+	;;
+	mov r16=IA64_KR(CURRENT)		// M2 (12 cyc)
+	shladd r18=r17,3,r14			// A
+	mov r19=NR_syscalls-1			// A
+	;;
+	lfetch [r18]				// M0|1
+	MOV_FROM_PSR(p0, r29, r8)		// M2 (12 cyc)
+	// If r17 is a NaT, p6 will be zero
+	cmp.geu p6,p7=r19,r17			// A    (sysnr > 0 && sysnr < 1024+NR_syscalls)?
+	;;
+	mov r21=ar.fpsr				// M2 (12 cyc)
+	tnat.nz p10,p9=r15			// I0
+	mov.i r26=ar.pfs			// I0 (would stall anyhow due to srlz.d...)
+	;;
+	srlz.d					// M0 (forces split-issue) ensure PSR.BE==0
+(p6)	ld8 r18=[r18]				// M0|1
+	nop.i 0
+	;;
+	nop.m 0
+(p6)	tbit.z.unc p8,p0=r18,0			// I0 (dual-issues with "mov b7=r18"!)
+	nop.i 0
+	;;
+	SSM_PSR_I(p8, p14, r25)
+(p6)	mov b7=r18				// I0
+(p8)	br.dptk.many b7				// B
+
+	mov r27=ar.rsc				// M2 (12 cyc)
+/*
+ * brl.cond doesn't work as intended because the linker would convert this branch
+ * into a branch to a PLT.  Perhaps there will be a way to avoid this with some
+ * future version of the linker.  In the meantime, we just use an indirect branch
+ * instead.
+ */
+#ifdef CONFIG_ITANIUM
+(p6)	add r14=-8,r14				// r14 <- addr of fsys_bubble_down entry
+	;;
+(p6)	ld8 r14=[r14]				// r14 <- fsys_bubble_down
+	;;
+(p6)	mov b7=r14
+(p6)	br.sptk.many b7
+#else
+	BRL_COND_FSYS_BUBBLE_DOWN(p6)
+#endif
+	SSM_PSR_I(p0, p14, r10)
+	mov r10=-1
+(p10)	mov r8=EINVAL
+(p9)	mov r8=ENOSYS
+	FSYS_RETURN
+
+#ifdef CONFIG_PARAVIRT
+	/*
+	 * padd to make the size of this symbol constant
+	 * independent of paravirtualization.
+	 */
+	.align PAGE_SIZE / 8
+#endif
+END(__kernel_syscall_via_epc)
diff --git a/arch/ia64/kernel/gate.lds.S b/arch/ia64/kernel/gate.lds.S
new file mode 100644
index 00000000..d32b0855
--- /dev/null
+++ b/arch/ia64/kernel/gate.lds.S
@@ -0,0 +1,109 @@
+/*
+ * Linker script for gate DSO.  The gate pages are an ELF shared object
+ * prelinked to its virtual address, with only one read-only segment and
+ * one execute-only segment (both fit in one page).  This script controls
+ * its layout.
+ */
+
+
+#include <asm/system.h>
+#include "paravirt_patchlist.h"
+
+SECTIONS
+{
+	. = GATE_ADDR + SIZEOF_HEADERS;
+
+	.hash			: { *(.hash) }		:readable
+	.gnu.hash		: { *(.gnu.hash) }
+	.dynsym			: { *(.dynsym) }
+	.dynstr			: { *(.dynstr) }
+	.gnu.version		: { *(.gnu.version) }
+	.gnu.version_d		: { *(.gnu.version_d) }
+	.gnu.version_r		: { *(.gnu.version_r) }
+
+	.note			: { *(.note*) }		:readable	:note
+
+	.dynamic		: { *(.dynamic) }	:readable	:dynamic
+
+	/*
+	 * This linker script is used both with -r and with -shared.  For
+	 * the layouts to match, we need to skip more than enough space for
+	 * the dynamic symbol table et al.  If this amount is insufficient,
+	 * ld -shared will barf.  Just increase it here.
+	 */
+	. = GATE_ADDR + 0x600;
+
+	.data..patch		: {
+		__paravirt_start_gate_mckinley_e9_patchlist = .;
+		*(.data..patch.mckinley_e9)
+		__paravirt_end_gate_mckinley_e9_patchlist = .;
+
+		__paravirt_start_gate_vtop_patchlist = .;
+		*(.data..patch.vtop)
+		__paravirt_end_gate_vtop_patchlist = .;
+
+		__paravirt_start_gate_fsyscall_patchlist = .;
+		*(.data..patch.fsyscall_table)
+		__paravirt_end_gate_fsyscall_patchlist = .;
+
+		__paravirt_start_gate_brl_fsys_bubble_down_patchlist = .;
+		*(.data..patch.brl_fsys_bubble_down)
+		__paravirt_end_gate_brl_fsys_bubble_down_patchlist = .;
+	}						:readable
+
+	.IA_64.unwind_info	: { *(.IA_64.unwind_info*) }
+	.IA_64.unwind		: { *(.IA_64.unwind*) }	:readable	:unwind
+#ifdef HAVE_BUGGY_SEGREL
+	.text (GATE_ADDR + PAGE_SIZE) : { *(.text) *(.text.*) }	:readable
+#else
+	. = ALIGN(PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1));
+	.text			: { *(.text) *(.text.*) }	:epc
+#endif
+
+	/DISCARD/		: {
+		*(.got.plt) *(.got)
+		*(.data .data.* .gnu.linkonce.d.*)
+		*(.dynbss)
+		*(.bss .bss.* .gnu.linkonce.b.*)
+		*(__ex_table)
+		*(__mca_table)
+	}
+}
+
+/*
+ * ld does not recognize this name token; use the constant.
+ */
+#define	PT_IA_64_UNWIND	0x70000001
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+	readable	PT_LOAD	FILEHDR	PHDRS	FLAGS(4);	/* PF_R */
+#ifndef HAVE_BUGGY_SEGREL
+	epc		PT_LOAD	FILEHDR PHDRS	FLAGS(1);	/* PF_X */
+#endif
+	dynamic		PT_DYNAMIC		FLAGS(4);	/* PF_R */
+	note		PT_NOTE			FLAGS(4);	/* PF_R */
+	unwind		PT_IA_64_UNWIND;
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+	LINUX_2.5 {
+	global:
+		__kernel_syscall_via_break;
+		__kernel_syscall_via_epc;
+		__kernel_sigtramp;
+
+	local: *;
+	};
+}
+
+/* The ELF entry point can be used to set the AT_SYSINFO value.  */
+ENTRY(__kernel_syscall_via_epc)
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
new file mode 100644
index 00000000..17a9fba3
--- /dev/null
+++ b/arch/ia64/kernel/head.S
@@ -0,0 +1,1229 @@
+/*
+ * Here is where the ball gets rolling as far as the kernel is concerned.
+ * When control is transferred to _start, the bootload has already
+ * loaded us to the correct address.  All that's left to do here is
+ * to set up the kernel's global pointer and jump to the kernel
+ * entry point.
+ *
+ * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Intel Corp.
+ * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com>
+ * Copyright (C) 2002 Fenghua Yu <fenghua.yu@intel.com>
+ *   -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2.
+ * Copyright (C) 2004 Ashok Raj <ashok.raj@intel.com>
+ *   Support for CPU Hotplug
+ */
+
+
+#include <asm/asmmacro.h>
+#include <asm/fpu.h>
+#include <asm/kregs.h>
+#include <asm/mmu_context.h>
+#include <asm/asm-offsets.h>
+#include <asm/pal.h>
+#include <asm/paravirt.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/mca_asm.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+
+#ifdef CONFIG_HOTPLUG_CPU
+#define SAL_PSR_BITS_TO_SET				\
+	(IA64_PSR_AC | IA64_PSR_BN | IA64_PSR_MFH | IA64_PSR_MFL)
+
+#define SAVE_FROM_REG(src, ptr, dest)	\
+	mov dest=src;;						\
+	st8 [ptr]=dest,0x08
+
+#define RESTORE_REG(reg, ptr, _tmp)		\
+	ld8 _tmp=[ptr],0x08;;				\
+	mov reg=_tmp
+
+#define SAVE_BREAK_REGS(ptr, _idx, _breg, _dest)\
+	mov ar.lc=IA64_NUM_DBG_REGS-1;; 			\
+	mov _idx=0;; 								\
+1: 												\
+	SAVE_FROM_REG(_breg[_idx], ptr, _dest);;	\
+	add _idx=1,_idx;;							\
+	br.cloop.sptk.many 1b
+
+#define RESTORE_BREAK_REGS(ptr, _idx, _breg, _tmp, _lbl)\
+	mov ar.lc=IA64_NUM_DBG_REGS-1;;			\
+	mov _idx=0;;							\
+_lbl:  RESTORE_REG(_breg[_idx], ptr, _tmp);;	\
+	add _idx=1, _idx;;						\
+	br.cloop.sptk.many _lbl
+
+#define SAVE_ONE_RR(num, _reg, _tmp) \
+	movl _tmp=(num<<61);;	\
+	mov _reg=rr[_tmp]
+
+#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \
+	SAVE_ONE_RR(0,_r0, _tmp);; \
+	SAVE_ONE_RR(1,_r1, _tmp);; \
+	SAVE_ONE_RR(2,_r2, _tmp);; \
+	SAVE_ONE_RR(3,_r3, _tmp);; \
+	SAVE_ONE_RR(4,_r4, _tmp);; \
+	SAVE_ONE_RR(5,_r5, _tmp);; \
+	SAVE_ONE_RR(6,_r6, _tmp);; \
+	SAVE_ONE_RR(7,_r7, _tmp);;
+
+#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \
+	st8 [ptr]=_r0, 8;; \
+	st8 [ptr]=_r1, 8;; \
+	st8 [ptr]=_r2, 8;; \
+	st8 [ptr]=_r3, 8;; \
+	st8 [ptr]=_r4, 8;; \
+	st8 [ptr]=_r5, 8;; \
+	st8 [ptr]=_r6, 8;; \
+	st8 [ptr]=_r7, 8;;
+
+#define RESTORE_REGION_REGS(ptr, _idx1, _idx2, _tmp) \
+	mov		ar.lc=0x08-1;;						\
+	movl	_idx1=0x00;;						\
+RestRR:											\
+	dep.z	_idx2=_idx1,61,3;;					\
+	ld8		_tmp=[ptr],8;;						\
+	mov		rr[_idx2]=_tmp;;					\
+	srlz.d;;									\
+	add		_idx1=1,_idx1;;						\
+	br.cloop.sptk.few	RestRR
+
+#define SET_AREA_FOR_BOOTING_CPU(reg1, reg2) \
+	movl reg1=sal_state_for_booting_cpu;;	\
+	ld8 reg2=[reg1];;
+
+/*
+ * Adjust region registers saved before starting to save
+ * break regs and rest of the states that need to be preserved.
+ */
+#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(_reg1,_reg2,_pred)  \
+	SAVE_FROM_REG(b0,_reg1,_reg2);;						\
+	SAVE_FROM_REG(b1,_reg1,_reg2);;						\
+	SAVE_FROM_REG(b2,_reg1,_reg2);;						\
+	SAVE_FROM_REG(b3,_reg1,_reg2);;						\
+	SAVE_FROM_REG(b4,_reg1,_reg2);;						\
+	SAVE_FROM_REG(b5,_reg1,_reg2);;						\
+	st8 [_reg1]=r1,0x08;;								\
+	st8 [_reg1]=r12,0x08;;								\
+	st8 [_reg1]=r13,0x08;;								\
+	SAVE_FROM_REG(ar.fpsr,_reg1,_reg2);;				\
+	SAVE_FROM_REG(ar.pfs,_reg1,_reg2);;					\
+	SAVE_FROM_REG(ar.rnat,_reg1,_reg2);;				\
+	SAVE_FROM_REG(ar.unat,_reg1,_reg2);;				\
+	SAVE_FROM_REG(ar.bspstore,_reg1,_reg2);;			\
+	SAVE_FROM_REG(cr.dcr,_reg1,_reg2);;					\
+	SAVE_FROM_REG(cr.iva,_reg1,_reg2);;					\
+	SAVE_FROM_REG(cr.pta,_reg1,_reg2);;					\
+	SAVE_FROM_REG(cr.itv,_reg1,_reg2);;					\
+	SAVE_FROM_REG(cr.pmv,_reg1,_reg2);;					\
+	SAVE_FROM_REG(cr.cmcv,_reg1,_reg2);;				\
+	SAVE_FROM_REG(cr.lrr0,_reg1,_reg2);;				\
+	SAVE_FROM_REG(cr.lrr1,_reg1,_reg2);;				\
+	st8 [_reg1]=r4,0x08;;								\
+	st8 [_reg1]=r5,0x08;;								\
+	st8 [_reg1]=r6,0x08;;								\
+	st8 [_reg1]=r7,0x08;;								\
+	st8 [_reg1]=_pred,0x08;;							\
+	SAVE_FROM_REG(ar.lc, _reg1, _reg2);;				\
+	stf.spill.nta [_reg1]=f2,16;;						\
+	stf.spill.nta [_reg1]=f3,16;;						\
+	stf.spill.nta [_reg1]=f4,16;;						\
+	stf.spill.nta [_reg1]=f5,16;;						\
+	stf.spill.nta [_reg1]=f16,16;;						\
+	stf.spill.nta [_reg1]=f17,16;;						\
+	stf.spill.nta [_reg1]=f18,16;;						\
+	stf.spill.nta [_reg1]=f19,16;;						\
+	stf.spill.nta [_reg1]=f20,16;;						\
+	stf.spill.nta [_reg1]=f21,16;;						\
+	stf.spill.nta [_reg1]=f22,16;;						\
+	stf.spill.nta [_reg1]=f23,16;;						\
+	stf.spill.nta [_reg1]=f24,16;;						\
+	stf.spill.nta [_reg1]=f25,16;;						\
+	stf.spill.nta [_reg1]=f26,16;;						\
+	stf.spill.nta [_reg1]=f27,16;;						\
+	stf.spill.nta [_reg1]=f28,16;;						\
+	stf.spill.nta [_reg1]=f29,16;;						\
+	stf.spill.nta [_reg1]=f30,16;;						\
+	stf.spill.nta [_reg1]=f31,16;;
+
+#else
+#define SET_AREA_FOR_BOOTING_CPU(a1, a2)
+#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(a1,a2, a3)
+#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7)
+#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7)
+#endif
+
+#define SET_ONE_RR(num, pgsize, _tmp1, _tmp2, vhpt) \
+	movl _tmp1=(num << 61);;	\
+	mov _tmp2=((ia64_rid(IA64_REGION_ID_KERNEL, (num<<61)) << 8) | (pgsize << 2) | vhpt);; \
+	mov rr[_tmp1]=_tmp2
+
+	__PAGE_ALIGNED_DATA
+
+	.global empty_zero_page
+empty_zero_page:
+	.skip PAGE_SIZE
+
+	.global swapper_pg_dir
+swapper_pg_dir:
+	.skip PAGE_SIZE
+
+	.rodata
+halt_msg:
+	stringz "Halting kernel\n"
+
+	__REF
+
+	.global start_ap
+
+	/*
+	 * Start the kernel.  When the bootloader passes control to _start(), r28
+	 * points to the address of the boot parameter area.  Execution reaches
+	 * here in physical mode.
+	 */
+GLOBAL_ENTRY(_start)
+start_ap:
+	.prologue
+	.save rp, r0		// terminate unwind chain with a NULL rp
+	.body
+
+	rsm psr.i | psr.ic
+	;;
+	srlz.i
+	;;
+ {
+	flushrs				// must be first insn in group
+	srlz.i
+ }
+	;;
+	/*
+	 * Save the region registers, predicate before they get clobbered
+	 */
+	SAVE_REGION_REGS(r2, r8,r9,r10,r11,r12,r13,r14,r15);
+	mov r25=pr;;
+
+	/*
+	 * Initialize kernel region registers:
+	 *	rr[0]: VHPT enabled, page size = PAGE_SHIFT
+	 *	rr[1]: VHPT enabled, page size = PAGE_SHIFT
+	 *	rr[2]: VHPT enabled, page size = PAGE_SHIFT
+	 *	rr[3]: VHPT enabled, page size = PAGE_SHIFT
+	 *	rr[4]: VHPT enabled, page size = PAGE_SHIFT
+	 *	rr[5]: VHPT enabled, page size = PAGE_SHIFT
+	 *	rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT
+	 *	rr[7]: VHPT disabled, page size = IA64_GRANULE_SHIFT
+	 * We initialize all of them to prevent inadvertently assuming
+	 * something about the state of address translation early in boot.
+	 */
+	SET_ONE_RR(0, PAGE_SHIFT, r2, r16, 1);;
+	SET_ONE_RR(1, PAGE_SHIFT, r2, r16, 1);;
+	SET_ONE_RR(2, PAGE_SHIFT, r2, r16, 1);;
+	SET_ONE_RR(3, PAGE_SHIFT, r2, r16, 1);;
+	SET_ONE_RR(4, PAGE_SHIFT, r2, r16, 1);;
+	SET_ONE_RR(5, PAGE_SHIFT, r2, r16, 1);;
+	SET_ONE_RR(6, IA64_GRANULE_SHIFT, r2, r16, 0);;
+	SET_ONE_RR(7, IA64_GRANULE_SHIFT, r2, r16, 0);;
+	/*
+	 * Now pin mappings into the TLB for kernel text and data
+	 */
+	mov r18=KERNEL_TR_PAGE_SHIFT<<2
+	movl r17=KERNEL_START
+	;;
+	mov cr.itir=r18
+	mov cr.ifa=r17
+	mov r16=IA64_TR_KERNEL
+	mov r3=ip
+	movl r18=PAGE_KERNEL
+	;;
+	dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT
+	;;
+	or r18=r2,r18
+	;;
+	srlz.i
+	;;
+	itr.i itr[r16]=r18
+	;;
+	itr.d dtr[r16]=r18
+	;;
+	srlz.i
+
+	/*
+	 * Switch into virtual mode:
+	 */
+	movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \
+		  |IA64_PSR_DI|IA64_PSR_AC)
+	;;
+	mov cr.ipsr=r16
+	movl r17=1f
+	;;
+	mov cr.iip=r17
+	mov cr.ifs=r0
+	;;
+	rfi
+	;;
+1:	// now we are in virtual mode
+
+	SET_AREA_FOR_BOOTING_CPU(r2, r16);
+
+	STORE_REGION_REGS(r16, r8,r9,r10,r11,r12,r13,r14,r15);
+	SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(r16,r17,r25)
+	;;
+
+	// set IVT entry point---can't access I/O ports without it
+	movl r3=ia64_ivt
+	;;
+	mov cr.iva=r3
+	movl r2=FPSR_DEFAULT
+	;;
+	srlz.i
+	movl gp=__gp
+
+	mov ar.fpsr=r2
+	;;
+
+#define isAP	p2	// are we an Application Processor?
+#define isBP	p3	// are we the Bootstrap Processor?
+
+#ifdef CONFIG_SMP
+	/*
+	 * Find the init_task for the currently booting CPU.  At poweron, and in
+	 * UP mode, task_for_booting_cpu is NULL.
+	 */
+	movl r3=task_for_booting_cpu
+ 	;;
+	ld8 r3=[r3]
+	movl r2=init_task
+	;;
+	cmp.eq isBP,isAP=r3,r0
+	;;
+(isAP)	mov r2=r3
+#else
+	movl r2=init_task
+	cmp.eq isBP,isAP=r0,r0
+#endif
+	;;
+	tpa r3=r2		// r3 == phys addr of task struct
+	mov r16=-1
+(isBP)	br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it
+
+	// load mapping for stack (virtaddr in r2, physaddr in r3)
+	rsm psr.ic
+	movl r17=PAGE_KERNEL
+	;;
+	srlz.d
+	dep r18=0,r3,0,12
+	;;
+	or r18=r17,r18
+	dep r2=-1,r3,61,3	// IMVA of task
+	;;
+	mov r17=rr[r2]
+	shr.u r16=r3,IA64_GRANULE_SHIFT
+	;;
+	dep r17=0,r17,8,24
+	;;
+	mov cr.itir=r17
+	mov cr.ifa=r2
+
+	mov r19=IA64_TR_CURRENT_STACK
+	;;
+	itr.d dtr[r19]=r18
+	;;
+	ssm psr.ic
+	srlz.d
+  	;;
+
+.load_current:
+	// load the "current" pointer (r13) and ar.k6 with the current task
+	mov IA64_KR(CURRENT)=r2		// virtual address
+	mov IA64_KR(CURRENT_STACK)=r16
+	mov r13=r2
+	/*
+	 * Reserve space at the top of the stack for "struct pt_regs".  Kernel
+	 * threads don't store interesting values in that structure, but the space
+	 * still needs to be there because time-critical stuff such as the context
+	 * switching can be implemented more efficiently (for example, __switch_to()
+	 * always sets the psr.dfh bit of the task it is switching to).
+	 */
+
+	addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2
+	addl r2=IA64_RBS_OFFSET,r2	// initialize the RSE
+	mov ar.rsc=0		// place RSE in enforced lazy mode
+	;;
+	loadrs			// clear the dirty partition
+	movl r19=__phys_per_cpu_start
+	mov r18=PERCPU_PAGE_SIZE
+	;;
+#ifndef CONFIG_SMP
+	add r19=r19,r18
+	;;
+#else
+(isAP)	br.few 2f
+	movl r20=__cpu0_per_cpu
+	;;
+	shr.u r18=r18,3
+1:
+	ld8 r21=[r19],8;;
+	st8[r20]=r21,8
+	adds r18=-1,r18;;
+	cmp4.lt p7,p6=0,r18
+(p7)	br.cond.dptk.few 1b
+	mov r19=r20
+	;;
+2:
+#endif
+	tpa r19=r19
+	;;
+	.pred.rel.mutex isBP,isAP
+(isBP)	mov IA64_KR(PER_CPU_DATA)=r19	// per-CPU base for cpu0
+(isAP)	mov IA64_KR(PER_CPU_DATA)=r0	// clear physical per-CPU base
+	;;
+	mov ar.bspstore=r2	// establish the new RSE stack
+	;;
+	mov ar.rsc=0x3		// place RSE in eager mode
+
+(isBP)	dep r28=-1,r28,61,3	// make address virtual
+(isBP)	movl r2=ia64_boot_param
+	;;
+(isBP)	st8 [r2]=r28		// save the address of the boot param area passed by the bootloader
+
+#ifdef CONFIG_PARAVIRT
+
+	movl r14=hypervisor_setup_hooks
+	movl r15=hypervisor_type
+	mov r16=num_hypervisor_hooks
+	;;
+	ld8 r2=[r15]
+	;;
+	cmp.ltu p7,p0=r2,r16	// array size check
+	shladd r8=r2,3,r14
+	;;
+(p7)	ld8 r9=[r8]
+	;;
+(p7)	mov b1=r9
+(p7)	cmp.ne.unc p7,p0=r9,r0	// no actual branch to NULL
+	;;
+(p7)	br.call.sptk.many rp=b1
+
+	__INITDATA
+
+default_setup_hook = 0		// Currently nothing needs to be done.
+
+	.weak xen_setup_hook
+
+	.global hypervisor_type
+hypervisor_type:
+	data8		PARAVIRT_HYPERVISOR_TYPE_DEFAULT
+
+	// must have the same order with PARAVIRT_HYPERVISOR_TYPE_xxx
+
+hypervisor_setup_hooks:
+	data8		default_setup_hook
+	data8		xen_setup_hook
+num_hypervisor_hooks = (. - hypervisor_setup_hooks) / 8
+	.previous
+
+#endif
+
+#ifdef CONFIG_SMP
+(isAP)	br.call.sptk.many rp=start_secondary
+.ret0:
+(isAP)	br.cond.sptk self
+#endif
+
+	// This is executed by the bootstrap processor (bsp) only:
+
+#ifdef CONFIG_IA64_FW_EMU
+	// initialize PAL & SAL emulator:
+	br.call.sptk.many rp=sys_fw_init
+.ret1:
+#endif
+	br.call.sptk.many rp=start_kernel
+.ret2:	addl r3=@ltoff(halt_msg),gp
+	;;
+	alloc r2=ar.pfs,8,0,2,0
+	;;
+	ld8 out0=[r3]
+	br.call.sptk.many b0=console_print
+
+self:	hint @pause
+	br.sptk.many self		// endless loop
+END(_start)
+
+	.text
+
+GLOBAL_ENTRY(ia64_save_debug_regs)
+	alloc r16=ar.pfs,1,0,0,0
+	mov r20=ar.lc			// preserve ar.lc
+	mov ar.lc=IA64_NUM_DBG_REGS-1
+	mov r18=0
+	add r19=IA64_NUM_DBG_REGS*8,in0
+	;;
+1:	mov r16=dbr[r18]
+#ifdef CONFIG_ITANIUM
+	;;
+	srlz.d
+#endif
+	mov r17=ibr[r18]
+	add r18=1,r18
+	;;
+	st8.nta [in0]=r16,8
+	st8.nta [r19]=r17,8
+	br.cloop.sptk.many 1b
+	;;
+	mov ar.lc=r20			// restore ar.lc
+	br.ret.sptk.many rp
+END(ia64_save_debug_regs)
+
+GLOBAL_ENTRY(ia64_load_debug_regs)
+	alloc r16=ar.pfs,1,0,0,0
+	lfetch.nta [in0]
+	mov r20=ar.lc			// preserve ar.lc
+	add r19=IA64_NUM_DBG_REGS*8,in0
+	mov ar.lc=IA64_NUM_DBG_REGS-1
+	mov r18=-1
+	;;
+1:	ld8.nta r16=[in0],8
+	ld8.nta r17=[r19],8
+	add r18=1,r18
+	;;
+	mov dbr[r18]=r16
+#ifdef CONFIG_ITANIUM
+	;;
+	srlz.d				// Errata 132 (NoFix status)
+#endif
+	mov ibr[r18]=r17
+	br.cloop.sptk.many 1b
+	;;
+	mov ar.lc=r20			// restore ar.lc
+	br.ret.sptk.many rp
+END(ia64_load_debug_regs)
+
+GLOBAL_ENTRY(__ia64_save_fpu)
+	alloc r2=ar.pfs,1,4,0,0
+	adds loc0=96*16-16,in0
+	adds loc1=96*16-16-128,in0
+	;;
+	stf.spill.nta [loc0]=f127,-256
+	stf.spill.nta [loc1]=f119,-256
+	;;
+	stf.spill.nta [loc0]=f111,-256
+	stf.spill.nta [loc1]=f103,-256
+	;;
+	stf.spill.nta [loc0]=f95,-256
+	stf.spill.nta [loc1]=f87,-256
+	;;
+	stf.spill.nta [loc0]=f79,-256
+	stf.spill.nta [loc1]=f71,-256
+	;;
+	stf.spill.nta [loc0]=f63,-256
+	stf.spill.nta [loc1]=f55,-256
+	adds loc2=96*16-32,in0
+	;;
+	stf.spill.nta [loc0]=f47,-256
+	stf.spill.nta [loc1]=f39,-256
+	adds loc3=96*16-32-128,in0
+	;;
+	stf.spill.nta [loc2]=f126,-256
+	stf.spill.nta [loc3]=f118,-256
+	;;
+	stf.spill.nta [loc2]=f110,-256
+	stf.spill.nta [loc3]=f102,-256
+	;;
+	stf.spill.nta [loc2]=f94,-256
+	stf.spill.nta [loc3]=f86,-256
+	;;
+	stf.spill.nta [loc2]=f78,-256
+	stf.spill.nta [loc3]=f70,-256
+	;;
+	stf.spill.nta [loc2]=f62,-256
+	stf.spill.nta [loc3]=f54,-256
+	adds loc0=96*16-48,in0
+	;;
+	stf.spill.nta [loc2]=f46,-256
+	stf.spill.nta [loc3]=f38,-256
+	adds loc1=96*16-48-128,in0
+	;;
+	stf.spill.nta [loc0]=f125,-256
+	stf.spill.nta [loc1]=f117,-256
+	;;
+	stf.spill.nta [loc0]=f109,-256
+	stf.spill.nta [loc1]=f101,-256
+	;;
+	stf.spill.nta [loc0]=f93,-256
+	stf.spill.nta [loc1]=f85,-256
+	;;
+	stf.spill.nta [loc0]=f77,-256
+	stf.spill.nta [loc1]=f69,-256
+	;;
+	stf.spill.nta [loc0]=f61,-256
+	stf.spill.nta [loc1]=f53,-256
+	adds loc2=96*16-64,in0
+	;;
+	stf.spill.nta [loc0]=f45,-256
+	stf.spill.nta [loc1]=f37,-256
+	adds loc3=96*16-64-128,in0
+	;;
+	stf.spill.nta [loc2]=f124,-256
+	stf.spill.nta [loc3]=f116,-256
+	;;
+	stf.spill.nta [loc2]=f108,-256
+	stf.spill.nta [loc3]=f100,-256
+	;;
+	stf.spill.nta [loc2]=f92,-256
+	stf.spill.nta [loc3]=f84,-256
+	;;
+	stf.spill.nta [loc2]=f76,-256
+	stf.spill.nta [loc3]=f68,-256
+	;;
+	stf.spill.nta [loc2]=f60,-256
+	stf.spill.nta [loc3]=f52,-256
+	adds loc0=96*16-80,in0
+	;;
+	stf.spill.nta [loc2]=f44,-256
+	stf.spill.nta [loc3]=f36,-256
+	adds loc1=96*16-80-128,in0
+	;;
+	stf.spill.nta [loc0]=f123,-256
+	stf.spill.nta [loc1]=f115,-256
+	;;
+	stf.spill.nta [loc0]=f107,-256
+	stf.spill.nta [loc1]=f99,-256
+	;;
+	stf.spill.nta [loc0]=f91,-256
+	stf.spill.nta [loc1]=f83,-256
+	;;
+	stf.spill.nta [loc0]=f75,-256
+	stf.spill.nta [loc1]=f67,-256
+	;;
+	stf.spill.nta [loc0]=f59,-256
+	stf.spill.nta [loc1]=f51,-256
+	adds loc2=96*16-96,in0
+	;;
+	stf.spill.nta [loc0]=f43,-256
+	stf.spill.nta [loc1]=f35,-256
+	adds loc3=96*16-96-128,in0
+	;;
+	stf.spill.nta [loc2]=f122,-256
+	stf.spill.nta [loc3]=f114,-256
+	;;
+	stf.spill.nta [loc2]=f106,-256
+	stf.spill.nta [loc3]=f98,-256
+	;;
+	stf.spill.nta [loc2]=f90,-256
+	stf.spill.nta [loc3]=f82,-256
+	;;
+	stf.spill.nta [loc2]=f74,-256
+	stf.spill.nta [loc3]=f66,-256
+	;;
+	stf.spill.nta [loc2]=f58,-256
+	stf.spill.nta [loc3]=f50,-256
+	adds loc0=96*16-112,in0
+	;;
+	stf.spill.nta [loc2]=f42,-256
+	stf.spill.nta [loc3]=f34,-256
+	adds loc1=96*16-112-128,in0
+	;;
+	stf.spill.nta [loc0]=f121,-256
+	stf.spill.nta [loc1]=f113,-256
+	;;
+	stf.spill.nta [loc0]=f105,-256
+	stf.spill.nta [loc1]=f97,-256
+	;;
+	stf.spill.nta [loc0]=f89,-256
+	stf.spill.nta [loc1]=f81,-256
+	;;
+	stf.spill.nta [loc0]=f73,-256
+	stf.spill.nta [loc1]=f65,-256
+	;;
+	stf.spill.nta [loc0]=f57,-256
+	stf.spill.nta [loc1]=f49,-256
+	adds loc2=96*16-128,in0
+	;;
+	stf.spill.nta [loc0]=f41,-256
+	stf.spill.nta [loc1]=f33,-256
+	adds loc3=96*16-128-128,in0
+	;;
+	stf.spill.nta [loc2]=f120,-256
+	stf.spill.nta [loc3]=f112,-256
+	;;
+	stf.spill.nta [loc2]=f104,-256
+	stf.spill.nta [loc3]=f96,-256
+	;;
+	stf.spill.nta [loc2]=f88,-256
+	stf.spill.nta [loc3]=f80,-256
+	;;
+	stf.spill.nta [loc2]=f72,-256
+	stf.spill.nta [loc3]=f64,-256
+	;;
+	stf.spill.nta [loc2]=f56,-256
+	stf.spill.nta [loc3]=f48,-256
+	;;
+	stf.spill.nta [loc2]=f40
+	stf.spill.nta [loc3]=f32
+	br.ret.sptk.many rp
+END(__ia64_save_fpu)
+
+GLOBAL_ENTRY(__ia64_load_fpu)
+	alloc r2=ar.pfs,1,2,0,0
+	adds r3=128,in0
+	adds r14=256,in0
+	adds r15=384,in0
+	mov loc0=512
+	mov loc1=-1024+16
+	;;
+	ldf.fill.nta f32=[in0],loc0
+	ldf.fill.nta f40=[ r3],loc0
+	ldf.fill.nta f48=[r14],loc0
+	ldf.fill.nta f56=[r15],loc0
+	;;
+	ldf.fill.nta f64=[in0],loc0
+	ldf.fill.nta f72=[ r3],loc0
+	ldf.fill.nta f80=[r14],loc0
+	ldf.fill.nta f88=[r15],loc0
+	;;
+	ldf.fill.nta f96=[in0],loc1
+	ldf.fill.nta f104=[ r3],loc1
+	ldf.fill.nta f112=[r14],loc1
+	ldf.fill.nta f120=[r15],loc1
+	;;
+	ldf.fill.nta f33=[in0],loc0
+	ldf.fill.nta f41=[ r3],loc0
+	ldf.fill.nta f49=[r14],loc0
+	ldf.fill.nta f57=[r15],loc0
+	;;
+	ldf.fill.nta f65=[in0],loc0
+	ldf.fill.nta f73=[ r3],loc0
+	ldf.fill.nta f81=[r14],loc0
+	ldf.fill.nta f89=[r15],loc0
+	;;
+	ldf.fill.nta f97=[in0],loc1
+	ldf.fill.nta f105=[ r3],loc1
+	ldf.fill.nta f113=[r14],loc1
+	ldf.fill.nta f121=[r15],loc1
+	;;
+	ldf.fill.nta f34=[in0],loc0
+	ldf.fill.nta f42=[ r3],loc0
+	ldf.fill.nta f50=[r14],loc0
+	ldf.fill.nta f58=[r15],loc0
+	;;
+	ldf.fill.nta f66=[in0],loc0
+	ldf.fill.nta f74=[ r3],loc0
+	ldf.fill.nta f82=[r14],loc0
+	ldf.fill.nta f90=[r15],loc0
+	;;
+	ldf.fill.nta f98=[in0],loc1
+	ldf.fill.nta f106=[ r3],loc1
+	ldf.fill.nta f114=[r14],loc1
+	ldf.fill.nta f122=[r15],loc1
+	;;
+	ldf.fill.nta f35=[in0],loc0
+	ldf.fill.nta f43=[ r3],loc0
+	ldf.fill.nta f51=[r14],loc0
+	ldf.fill.nta f59=[r15],loc0
+	;;
+	ldf.fill.nta f67=[in0],loc0
+	ldf.fill.nta f75=[ r3],loc0
+	ldf.fill.nta f83=[r14],loc0
+	ldf.fill.nta f91=[r15],loc0
+	;;
+	ldf.fill.nta f99=[in0],loc1
+	ldf.fill.nta f107=[ r3],loc1
+	ldf.fill.nta f115=[r14],loc1
+	ldf.fill.nta f123=[r15],loc1
+	;;
+	ldf.fill.nta f36=[in0],loc0
+	ldf.fill.nta f44=[ r3],loc0
+	ldf.fill.nta f52=[r14],loc0
+	ldf.fill.nta f60=[r15],loc0
+	;;
+	ldf.fill.nta f68=[in0],loc0
+	ldf.fill.nta f76=[ r3],loc0
+	ldf.fill.nta f84=[r14],loc0
+	ldf.fill.nta f92=[r15],loc0
+	;;
+	ldf.fill.nta f100=[in0],loc1
+	ldf.fill.nta f108=[ r3],loc1
+	ldf.fill.nta f116=[r14],loc1
+	ldf.fill.nta f124=[r15],loc1
+	;;
+	ldf.fill.nta f37=[in0],loc0
+	ldf.fill.nta f45=[ r3],loc0
+	ldf.fill.nta f53=[r14],loc0
+	ldf.fill.nta f61=[r15],loc0
+	;;
+	ldf.fill.nta f69=[in0],loc0
+	ldf.fill.nta f77=[ r3],loc0
+	ldf.fill.nta f85=[r14],loc0
+	ldf.fill.nta f93=[r15],loc0
+	;;
+	ldf.fill.nta f101=[in0],loc1
+	ldf.fill.nta f109=[ r3],loc1
+	ldf.fill.nta f117=[r14],loc1
+	ldf.fill.nta f125=[r15],loc1
+	;;
+	ldf.fill.nta f38 =[in0],loc0
+	ldf.fill.nta f46 =[ r3],loc0
+	ldf.fill.nta f54 =[r14],loc0
+	ldf.fill.nta f62 =[r15],loc0
+	;;
+	ldf.fill.nta f70 =[in0],loc0
+	ldf.fill.nta f78 =[ r3],loc0
+	ldf.fill.nta f86 =[r14],loc0
+	ldf.fill.nta f94 =[r15],loc0
+	;;
+	ldf.fill.nta f102=[in0],loc1
+	ldf.fill.nta f110=[ r3],loc1
+	ldf.fill.nta f118=[r14],loc1
+	ldf.fill.nta f126=[r15],loc1
+	;;
+	ldf.fill.nta f39 =[in0],loc0
+	ldf.fill.nta f47 =[ r3],loc0
+	ldf.fill.nta f55 =[r14],loc0
+	ldf.fill.nta f63 =[r15],loc0
+	;;
+	ldf.fill.nta f71 =[in0],loc0
+	ldf.fill.nta f79 =[ r3],loc0
+	ldf.fill.nta f87 =[r14],loc0
+	ldf.fill.nta f95 =[r15],loc0
+	;;
+	ldf.fill.nta f103=[in0]
+	ldf.fill.nta f111=[ r3]
+	ldf.fill.nta f119=[r14]
+	ldf.fill.nta f127=[r15]
+	br.ret.sptk.many rp
+END(__ia64_load_fpu)
+
+GLOBAL_ENTRY(__ia64_init_fpu)
+	stf.spill [sp]=f0		// M3
+	mov	 f32=f0			// F
+	nop.b	 0
+
+	ldfps	 f33,f34=[sp]		// M0
+	ldfps	 f35,f36=[sp]		// M1
+	mov      f37=f0			// F
+	;;
+
+	setf.s	 f38=r0			// M2
+	setf.s	 f39=r0			// M3
+	mov      f40=f0			// F
+
+	ldfps	 f41,f42=[sp]		// M0
+	ldfps	 f43,f44=[sp]		// M1
+	mov      f45=f0			// F
+
+	setf.s	 f46=r0			// M2
+	setf.s	 f47=r0			// M3
+	mov      f48=f0			// F
+
+	ldfps	 f49,f50=[sp]		// M0
+	ldfps	 f51,f52=[sp]		// M1
+	mov      f53=f0			// F
+
+	setf.s	 f54=r0			// M2
+	setf.s	 f55=r0			// M3
+	mov      f56=f0			// F
+
+	ldfps	 f57,f58=[sp]		// M0
+	ldfps	 f59,f60=[sp]		// M1
+	mov      f61=f0			// F
+
+	setf.s	 f62=r0			// M2
+	setf.s	 f63=r0			// M3
+	mov      f64=f0			// F
+
+	ldfps	 f65,f66=[sp]		// M0
+	ldfps	 f67,f68=[sp]		// M1
+	mov      f69=f0			// F
+
+	setf.s	 f70=r0			// M2
+	setf.s	 f71=r0			// M3
+	mov      f72=f0			// F
+
+	ldfps	 f73,f74=[sp]		// M0
+	ldfps	 f75,f76=[sp]		// M1
+	mov      f77=f0			// F
+
+	setf.s	 f78=r0			// M2
+	setf.s	 f79=r0			// M3
+	mov      f80=f0			// F
+
+	ldfps	 f81,f82=[sp]		// M0
+	ldfps	 f83,f84=[sp]		// M1
+	mov      f85=f0			// F
+
+	setf.s	 f86=r0			// M2
+	setf.s	 f87=r0			// M3
+	mov      f88=f0			// F
+
+	/*
+	 * When the instructions are cached, it would be faster to initialize
+	 * the remaining registers with simply mov instructions (F-unit).
+	 * This gets the time down to ~29 cycles.  However, this would use up
+	 * 33 bundles, whereas continuing with the above pattern yields
+	 * 10 bundles and ~30 cycles.
+	 */
+
+	ldfps	 f89,f90=[sp]		// M0
+	ldfps	 f91,f92=[sp]		// M1
+	mov      f93=f0			// F
+
+	setf.s	 f94=r0			// M2
+	setf.s	 f95=r0			// M3
+	mov      f96=f0			// F
+
+	ldfps	 f97,f98=[sp]		// M0
+	ldfps	 f99,f100=[sp]		// M1
+	mov      f101=f0		// F
+
+	setf.s	 f102=r0		// M2
+	setf.s	 f103=r0		// M3
+	mov      f104=f0		// F
+
+	ldfps	 f105,f106=[sp]		// M0
+	ldfps	 f107,f108=[sp]		// M1
+	mov      f109=f0		// F
+
+	setf.s	 f110=r0		// M2
+	setf.s	 f111=r0		// M3
+	mov      f112=f0		// F
+
+	ldfps	 f113,f114=[sp]		// M0
+	ldfps	 f115,f116=[sp]		// M1
+	mov      f117=f0		// F
+
+	setf.s	 f118=r0		// M2
+	setf.s	 f119=r0		// M3
+	mov      f120=f0		// F
+
+	ldfps	 f121,f122=[sp]		// M0
+	ldfps	 f123,f124=[sp]		// M1
+	mov      f125=f0		// F
+
+	setf.s	 f126=r0		// M2
+	setf.s	 f127=r0		// M3
+	br.ret.sptk.many rp		// F
+END(__ia64_init_fpu)
+
+/*
+ * Switch execution mode from virtual to physical
+ *
+ * Inputs:
+ *	r16 = new psr to establish
+ * Output:
+ *	r19 = old virtual address of ar.bsp
+ *	r20 = old virtual address of sp
+ *
+ * Note: RSE must already be in enforced lazy mode
+ */
+GLOBAL_ENTRY(ia64_switch_mode_phys)
+ {
+	rsm psr.i | psr.ic		// disable interrupts and interrupt collection
+	mov r15=ip
+ }
+	;;
+ {
+	flushrs				// must be first insn in group
+	srlz.i
+ }
+	;;
+	mov cr.ipsr=r16			// set new PSR
+	add r3=1f-ia64_switch_mode_phys,r15
+
+	mov r19=ar.bsp
+	mov r20=sp
+	mov r14=rp			// get return address into a general register
+	;;
+
+	// going to physical mode, use tpa to translate virt->phys
+	tpa r17=r19
+	tpa r3=r3
+	tpa sp=sp
+	tpa r14=r14
+	;;
+
+	mov r18=ar.rnat			// save ar.rnat
+	mov ar.bspstore=r17		// this steps on ar.rnat
+	mov cr.iip=r3
+	mov cr.ifs=r0
+	;;
+	mov ar.rnat=r18			// restore ar.rnat
+	rfi				// must be last insn in group
+	;;
+1:	mov rp=r14
+	br.ret.sptk.many rp
+END(ia64_switch_mode_phys)
+
+/*
+ * Switch execution mode from physical to virtual
+ *
+ * Inputs:
+ *	r16 = new psr to establish
+ *	r19 = new bspstore to establish
+ *	r20 = new sp to establish
+ *
+ * Note: RSE must already be in enforced lazy mode
+ */
+GLOBAL_ENTRY(ia64_switch_mode_virt)
+ {
+	rsm psr.i | psr.ic		// disable interrupts and interrupt collection
+	mov r15=ip
+ }
+	;;
+ {
+	flushrs				// must be first insn in group
+	srlz.i
+ }
+	;;
+	mov cr.ipsr=r16			// set new PSR
+	add r3=1f-ia64_switch_mode_virt,r15
+
+	mov r14=rp			// get return address into a general register
+	;;
+
+	// going to virtual
+	//   - for code addresses, set upper bits of addr to KERNEL_START
+	//   - for stack addresses, copy from input argument
+	movl r18=KERNEL_START
+	dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
+	dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
+	mov sp=r20
+	;;
+	or r3=r3,r18
+	or r14=r14,r18
+	;;
+
+	mov r18=ar.rnat			// save ar.rnat
+	mov ar.bspstore=r19		// this steps on ar.rnat
+	mov cr.iip=r3
+	mov cr.ifs=r0
+	;;
+	mov ar.rnat=r18			// restore ar.rnat
+	rfi				// must be last insn in group
+	;;
+1:	mov rp=r14
+	br.ret.sptk.many rp
+END(ia64_switch_mode_virt)
+
+GLOBAL_ENTRY(ia64_delay_loop)
+	.prologue
+{	nop 0			// work around GAS unwind info generation bug...
+	.save ar.lc,r2
+	mov r2=ar.lc
+	.body
+	;;
+	mov ar.lc=r32
+}
+	;;
+	// force loop to be 32-byte aligned (GAS bug means we cannot use .align
+	// inside function body without corrupting unwind info).
+{	nop 0 }
+1:	br.cloop.sptk.few 1b
+	;;
+	mov ar.lc=r2
+	br.ret.sptk.many rp
+END(ia64_delay_loop)
+
+/*
+ * Return a CPU-local timestamp in nano-seconds.  This timestamp is
+ * NOT synchronized across CPUs its return value must never be
+ * compared against the values returned on another CPU.  The usage in
+ * kernel/sched.c ensures that.
+ *
+ * The return-value of sched_clock() is NOT supposed to wrap-around.
+ * If it did, it would cause some scheduling hiccups (at the worst).
+ * Fortunately, with a 64-bit cycle-counter ticking at 100GHz, even
+ * that would happen only once every 5+ years.
+ *
+ * The code below basically calculates:
+ *
+ *   (ia64_get_itc() * local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT
+ *
+ * except that the multiplication and the shift are done with 128-bit
+ * intermediate precision so that we can produce a full 64-bit result.
+ */
+GLOBAL_ENTRY(ia64_native_sched_clock)
+	addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
+	mov.m r9=ar.itc		// fetch cycle-counter				(35 cyc)
+	;;
+	ldf8 f8=[r8]
+	;;
+	setf.sig f9=r9		// certain to stall, so issue it _after_ ldf8...
+	;;
+	xmpy.lu f10=f9,f8	// calculate low 64 bits of 128-bit product	(4 cyc)
+	xmpy.hu f11=f9,f8	// calculate high 64 bits of 128-bit product
+	;;
+	getf.sig r8=f10		//						(5 cyc)
+	getf.sig r9=f11
+	;;
+	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
+	br.ret.sptk.many rp
+END(ia64_native_sched_clock)
+#ifndef CONFIG_PARAVIRT
+	//unsigned long long
+	//sched_clock(void) __attribute__((alias("ia64_native_sched_clock")));
+	.global sched_clock
+sched_clock = ia64_native_sched_clock
+#endif
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+GLOBAL_ENTRY(cycle_to_cputime)
+	alloc r16=ar.pfs,1,0,0,0
+	addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
+	;;
+	ldf8 f8=[r8]
+	;;
+	setf.sig f9=r32
+	;;
+	xmpy.lu f10=f9,f8	// calculate low 64 bits of 128-bit product	(4 cyc)
+	xmpy.hu f11=f9,f8	// calculate high 64 bits of 128-bit product
+	;;
+	getf.sig r8=f10		//						(5 cyc)
+	getf.sig r9=f11
+	;;
+	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
+	br.ret.sptk.many rp
+END(cycle_to_cputime)
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+
+GLOBAL_ENTRY(start_kernel_thread)
+	.prologue
+	.save rp, r0				// this is the end of the call-chain
+	.body
+	alloc r2 = ar.pfs, 0, 0, 2, 0
+	mov out0 = r9
+	mov out1 = r11;;
+	br.call.sptk.many rp = kernel_thread_helper;;
+	mov out0 = r8
+	br.call.sptk.many rp = sys_exit;;
+1:	br.sptk.few 1b				// not reached
+END(start_kernel_thread)
+
+#ifdef CONFIG_IA64_BRL_EMU
+
+/*
+ *  Assembly routines used by brl_emu.c to set preserved register state.
+ */
+
+#define SET_REG(reg)				\
+ GLOBAL_ENTRY(ia64_set_##reg);			\
+	alloc r16=ar.pfs,1,0,0,0;		\
+	mov reg=r32;				\
+	;;					\
+	br.ret.sptk.many rp;			\
+ END(ia64_set_##reg)
+
+SET_REG(b1);
+SET_REG(b2);
+SET_REG(b3);
+SET_REG(b4);
+SET_REG(b5);
+
+#endif /* CONFIG_IA64_BRL_EMU */
+
+#ifdef CONFIG_SMP
+
+#ifdef CONFIG_HOTPLUG_CPU
+GLOBAL_ENTRY(ia64_jump_to_sal)
+	alloc r16=ar.pfs,1,0,0,0;;
+	rsm psr.i  | psr.ic
+{
+	flushrs
+	srlz.i
+}
+	tpa r25=in0
+	movl r18=tlb_purge_done;;
+	DATA_VA_TO_PA(r18);;
+	mov b1=r18 	// Return location
+	movl r18=ia64_do_tlb_purge;;
+	DATA_VA_TO_PA(r18);;
+	mov b2=r18 	// doing tlb_flush work
+	mov ar.rsc=0  // Put RSE  in enforced lazy, LE mode
+	movl r17=1f;;
+	DATA_VA_TO_PA(r17);;
+	mov cr.iip=r17
+	movl r16=SAL_PSR_BITS_TO_SET;;
+	mov cr.ipsr=r16
+	mov cr.ifs=r0;;
+	rfi;;			// note: this unmask MCA/INIT (psr.mc)
+1:
+	/*
+	 * Invalidate all TLB data/inst
+	 */
+	br.sptk.many b2;; // jump to tlb purge code
+
+tlb_purge_done:
+	RESTORE_REGION_REGS(r25, r17,r18,r19);;
+	RESTORE_REG(b0, r25, r17);;
+	RESTORE_REG(b1, r25, r17);;
+	RESTORE_REG(b2, r25, r17);;
+	RESTORE_REG(b3, r25, r17);;
+	RESTORE_REG(b4, r25, r17);;
+	RESTORE_REG(b5, r25, r17);;
+	ld8 r1=[r25],0x08;;
+	ld8 r12=[r25],0x08;;
+	ld8 r13=[r25],0x08;;
+	RESTORE_REG(ar.fpsr, r25, r17);;
+	RESTORE_REG(ar.pfs, r25, r17);;
+	RESTORE_REG(ar.rnat, r25, r17);;
+	RESTORE_REG(ar.unat, r25, r17);;
+	RESTORE_REG(ar.bspstore, r25, r17);;
+	RESTORE_REG(cr.dcr, r25, r17);;
+	RESTORE_REG(cr.iva, r25, r17);;
+	RESTORE_REG(cr.pta, r25, r17);;
+	srlz.d;;	// required not to violate RAW dependency
+	RESTORE_REG(cr.itv, r25, r17);;
+	RESTORE_REG(cr.pmv, r25, r17);;
+	RESTORE_REG(cr.cmcv, r25, r17);;
+	RESTORE_REG(cr.lrr0, r25, r17);;
+	RESTORE_REG(cr.lrr1, r25, r17);;
+	ld8 r4=[r25],0x08;;
+	ld8 r5=[r25],0x08;;
+	ld8 r6=[r25],0x08;;
+	ld8 r7=[r25],0x08;;
+	ld8 r17=[r25],0x08;;
+	mov pr=r17,-1;;
+	RESTORE_REG(ar.lc, r25, r17);;
+	/*
+	 * Now Restore floating point regs
+	 */
+	ldf.fill.nta f2=[r25],16;;
+	ldf.fill.nta f3=[r25],16;;
+	ldf.fill.nta f4=[r25],16;;
+	ldf.fill.nta f5=[r25],16;;
+	ldf.fill.nta f16=[r25],16;;
+	ldf.fill.nta f17=[r25],16;;
+	ldf.fill.nta f18=[r25],16;;
+	ldf.fill.nta f19=[r25],16;;
+	ldf.fill.nta f20=[r25],16;;
+	ldf.fill.nta f21=[r25],16;;
+	ldf.fill.nta f22=[r25],16;;
+	ldf.fill.nta f23=[r25],16;;
+	ldf.fill.nta f24=[r25],16;;
+	ldf.fill.nta f25=[r25],16;;
+	ldf.fill.nta f26=[r25],16;;
+	ldf.fill.nta f27=[r25],16;;
+	ldf.fill.nta f28=[r25],16;;
+	ldf.fill.nta f29=[r25],16;;
+	ldf.fill.nta f30=[r25],16;;
+	ldf.fill.nta f31=[r25],16;;
+
+	/*
+	 * Now that we have done all the register restores
+	 * we are now ready for the big DIVE to SAL Land
+	 */
+	ssm psr.ic;;
+	srlz.d;;
+	br.ret.sptk.many b0;;
+END(ia64_jump_to_sal)
+#endif /* CONFIG_HOTPLUG_CPU */
+
+#endif /* CONFIG_SMP */
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
new file mode 100644
index 00000000..7f4a0ed2
--- /dev/null
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -0,0 +1,98 @@
+/*
+ * Architecture-specific kernel symbols
+ *
+ * Don't put any exports here unless it's defined in an assembler file.
+ * All other exports should be put directly after the definition.
+ */
+
+#include <linux/module.h>
+
+#include <linux/string.h>
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(strlen);
+
+#include<asm/pgtable.h>
+EXPORT_SYMBOL_GPL(empty_zero_page);
+
+#include <asm/checksum.h>
+EXPORT_SYMBOL(ip_fast_csum);		/* hand-coded assembly */
+EXPORT_SYMBOL(csum_ipv6_magic);
+
+#include <asm/page.h>
+EXPORT_SYMBOL(clear_page);
+EXPORT_SYMBOL(copy_page);
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+#include <linux/bootmem.h>
+EXPORT_SYMBOL(min_low_pfn);	/* defined by bootmem.c, but not exported by generic code */
+EXPORT_SYMBOL(max_low_pfn);	/* defined by bootmem.c, but not exported by generic code */
+#endif
+
+#include <asm/processor.h>
+EXPORT_SYMBOL(ia64_cpu_info);
+#ifdef CONFIG_SMP
+EXPORT_SYMBOL(local_per_cpu_offset);
+#endif
+
+#include <asm/uaccess.h>
+EXPORT_SYMBOL(__copy_user);
+EXPORT_SYMBOL(__do_clear_user);
+EXPORT_SYMBOL(__strlen_user);
+EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(__strnlen_user);
+
+/* from arch/ia64/lib */
+extern void __divsi3(void);
+extern void __udivsi3(void);
+extern void __modsi3(void);
+extern void __umodsi3(void);
+extern void __divdi3(void);
+extern void __udivdi3(void);
+extern void __moddi3(void);
+extern void __umoddi3(void);
+
+EXPORT_SYMBOL(__divsi3);
+EXPORT_SYMBOL(__udivsi3);
+EXPORT_SYMBOL(__modsi3);
+EXPORT_SYMBOL(__umodsi3);
+EXPORT_SYMBOL(__divdi3);
+EXPORT_SYMBOL(__udivdi3);
+EXPORT_SYMBOL(__moddi3);
+EXPORT_SYMBOL(__umoddi3);
+
+#if defined(CONFIG_MD_RAID456) || defined(CONFIG_MD_RAID456_MODULE)
+extern void xor_ia64_2(void);
+extern void xor_ia64_3(void);
+extern void xor_ia64_4(void);
+extern void xor_ia64_5(void);
+
+EXPORT_SYMBOL(xor_ia64_2);
+EXPORT_SYMBOL(xor_ia64_3);
+EXPORT_SYMBOL(xor_ia64_4);
+EXPORT_SYMBOL(xor_ia64_5);
+#endif
+
+#include <asm/pal.h>
+EXPORT_SYMBOL(ia64_pal_call_phys_stacked);
+EXPORT_SYMBOL(ia64_pal_call_phys_static);
+EXPORT_SYMBOL(ia64_pal_call_stacked);
+EXPORT_SYMBOL(ia64_pal_call_static);
+EXPORT_SYMBOL(ia64_load_scratch_fpregs);
+EXPORT_SYMBOL(ia64_save_scratch_fpregs);
+
+#include <asm/unwind.h>
+EXPORT_SYMBOL(unw_init_running);
+
+#if defined(CONFIG_IA64_ESI) || defined(CONFIG_IA64_ESI_MODULE)
+extern void esi_call_phys (void);
+EXPORT_SYMBOL_GPL(esi_call_phys);
+#endif
+extern char ia64_ivt[];
+EXPORT_SYMBOL(ia64_ivt);
+
+#include <asm/ftrace.h>
+#ifdef CONFIG_FUNCTION_TRACER
+/* mcount is defined in assembly */
+EXPORT_SYMBOL(_mcount);
+#endif
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
new file mode 100644
index 00000000..f9efe973
--- /dev/null
+++ b/arch/ia64/kernel/init_task.c
@@ -0,0 +1,42 @@
+/*
+ * This is where we statically allocate and initialize the initial
+ * task.
+ *
+ * Copyright (C) 1999, 2002-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init_task.h>
+#include <linux/mqueue.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+/*
+ * Initial task structure.
+ *
+ * We need to make sure that this is properly aligned due to the way process stacks are
+ * handled. This is done by having a special ".data..init_task" section...
+ */
+#define init_thread_info	init_task_mem.s.thread_info
+
+union {
+	struct {
+		struct task_struct task;
+		struct thread_info thread_info;
+	} s;
+	unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)];
+} init_task_mem asm ("init_task") __init_task_data =
+	{{
+	.task =		INIT_TASK(init_task_mem.s.task),
+	.thread_info =	INIT_THREAD_INFO(init_task_mem.s.task)
+}};
+
+EXPORT_SYMBOL(init_task);
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
new file mode 100644
index 00000000..b0f9afeb
--- /dev/null
+++ b/arch/ia64/kernel/iosapic.c
@@ -0,0 +1,1119 @@
+/*
+ * I/O SAPIC support.
+ *
+ * Copyright (C) 1999 Intel Corp.
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 2000-2002 J.I. Lee <jung-ik.lee@intel.com>
+ * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co.
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
+ *
+ * 00/04/19	D. Mosberger	Rewritten to mirror more closely the x86 I/O
+ *				APIC code.  In particular, we now have separate
+ *				handlers for edge and level triggered
+ *				interrupts.
+ * 00/10/27	Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector
+ *				allocation PCI to vector mapping, shared PCI
+ *				interrupts.
+ * 00/10/27	D. Mosberger	Document things a bit more to make them more
+ *				understandable.  Clean up much of the old
+ *				IOSAPIC cruft.
+ * 01/07/27	J.I. Lee	PCI irq routing, Platform/Legacy interrupts
+ *				and fixes for ACPI S5(SoftOff) support.
+ * 02/01/23	J.I. Lee	iosapic pgm fixes for PCI irq routing from _PRT
+ * 02/01/07     E. Focht        <efocht@ess.nec.de> Redirectable interrupt
+ *				vectors in iosapic_set_affinity(),
+ *				initializations for /proc/irq/#/smp_affinity
+ * 02/04/02	P. Diefenbaugh	Cleaned up ACPI PCI IRQ routing.
+ * 02/04/18	J.I. Lee	bug fix in iosapic_init_pci_irq
+ * 02/04/30	J.I. Lee	bug fix in find_iosapic to fix ACPI PCI IRQ to
+ *				IOSAPIC mapping error
+ * 02/07/29	T. Kochi	Allocate interrupt vectors dynamically
+ * 02/08/04	T. Kochi	Cleaned up terminology (irq, global system
+ *				interrupt, vector, etc.)
+ * 02/09/20	D. Mosberger	Simplified by taking advantage of ACPI's
+ *				pci_irq code.
+ * 03/02/19	B. Helgaas	Make pcat_compat system-wide, not per-IOSAPIC.
+ *				Remove iosapic_address & gsi_base from
+ *				external interfaces.  Rationalize
+ *				__init/__devinit attributes.
+ * 04/12/04 Ashok Raj	<ashok.raj@intel.com> Intel Corporation 2004
+ *				Updated to work with irq migration necessary
+ *				for CPU Hotplug
+ */
+/*
+ * Here is what the interrupt logic between a PCI device and the kernel looks
+ * like:
+ *
+ * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC,
+ *     INTD).  The device is uniquely identified by its bus-, and slot-number
+ *     (the function number does not matter here because all functions share
+ *     the same interrupt lines).
+ *
+ * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC
+ *     controller.  Multiple interrupt lines may have to share the same
+ *     IOSAPIC pin (if they're level triggered and use the same polarity).
+ *     Each interrupt line has a unique Global System Interrupt (GSI) number
+ *     which can be calculated as the sum of the controller's base GSI number
+ *     and the IOSAPIC pin number to which the line connects.
+ *
+ * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the
+ * IOSAPIC pin into the IA-64 interrupt vector.  This interrupt vector is then
+ * sent to the CPU.
+ *
+ * (4) The kernel recognizes an interrupt as an IRQ.  The IRQ interface is
+ *     used as architecture-independent interrupt handling mechanism in Linux.
+ *     As an IRQ is a number, we have to have
+ *     IA-64 interrupt vector number <-> IRQ number mapping.  On smaller
+ *     systems, we use one-to-one mapping between IA-64 vector and IRQ.  A
+ *     platform can implement platform_irq_to_vector(irq) and
+ *     platform_local_vector_to_irq(vector) APIs to differentiate the mapping.
+ *     Please see also arch/ia64/include/asm/hw_irq.h for those APIs.
+ *
+ * To sum up, there are three levels of mappings involved:
+ *
+ *	PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ
+ *
+ * Note: The term "IRQ" is loosely used everywhere in Linux kernel to
+ * describeinterrupts.  Now we use "IRQ" only for Linux IRQ's.  ISA IRQ
+ * (isa_irq) is the only exception in this source code.
+ */
+
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/string.h>
+#include <linux/bootmem.h>
+
+#include <asm/delay.h>
+#include <asm/hw_irq.h>
+#include <asm/io.h>
+#include <asm/iosapic.h>
+#include <asm/machvec.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+
+#undef DEBUG_INTERRUPT_ROUTING
+
+#ifdef DEBUG_INTERRUPT_ROUTING
+#define DBG(fmt...)	printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+static DEFINE_SPINLOCK(iosapic_lock);
+
+/*
+ * These tables map IA-64 vectors to the IOSAPIC pin that generates this
+ * vector.
+ */
+
+#define NO_REF_RTE	0
+
+static struct iosapic {
+	char __iomem	*addr;		/* base address of IOSAPIC */
+	unsigned int	gsi_base;	/* GSI base */
+	unsigned short	num_rte;	/* # of RTEs on this IOSAPIC */
+	int		rtes_inuse;	/* # of RTEs in use on this IOSAPIC */
+#ifdef CONFIG_NUMA
+	unsigned short	node;		/* numa node association via pxm */
+#endif
+	spinlock_t	lock;		/* lock for indirect reg access */
+} iosapic_lists[NR_IOSAPICS];
+
+struct iosapic_rte_info {
+	struct list_head rte_list;	/* RTEs sharing the same vector */
+	char		rte_index;	/* IOSAPIC RTE index */
+	int		refcnt;		/* reference counter */
+	struct iosapic	*iosapic;
+} ____cacheline_aligned;
+
+static struct iosapic_intr_info {
+	struct list_head rtes;		/* RTEs using this vector (empty =>
+					 * not an IOSAPIC interrupt) */
+	int		count;		/* # of registered RTEs */
+	u32		low32;		/* current value of low word of
+					 * Redirection table entry */
+	unsigned int	dest;		/* destination CPU physical ID */
+	unsigned char	dmode	: 3;	/* delivery mode (see iosapic.h) */
+	unsigned char 	polarity: 1;	/* interrupt polarity
+					 * (see iosapic.h) */
+	unsigned char	trigger	: 1;	/* trigger mode (see iosapic.h) */
+} iosapic_intr_info[NR_IRQS];
+
+static unsigned char pcat_compat __devinitdata;	/* 8259 compatibility flag */
+
+static inline void
+iosapic_write(struct iosapic *iosapic, unsigned int reg, u32 val)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&iosapic->lock, flags);
+	__iosapic_write(iosapic->addr, reg, val);
+	spin_unlock_irqrestore(&iosapic->lock, flags);
+}
+
+/*
+ * Find an IOSAPIC associated with a GSI
+ */
+static inline int
+find_iosapic (unsigned int gsi)
+{
+	int i;
+
+	for (i = 0; i < NR_IOSAPICS; i++) {
+		if ((unsigned) (gsi - iosapic_lists[i].gsi_base) <
+		    iosapic_lists[i].num_rte)
+			return i;
+	}
+
+	return -1;
+}
+
+static inline int __gsi_to_irq(unsigned int gsi)
+{
+	int irq;
+	struct iosapic_intr_info *info;
+	struct iosapic_rte_info *rte;
+
+	for (irq = 0; irq < NR_IRQS; irq++) {
+		info = &iosapic_intr_info[irq];
+		list_for_each_entry(rte, &info->rtes, rte_list)
+			if (rte->iosapic->gsi_base + rte->rte_index == gsi)
+				return irq;
+	}
+	return -1;
+}
+
+int
+gsi_to_irq (unsigned int gsi)
+{
+	unsigned long flags;
+	int irq;
+
+	spin_lock_irqsave(&iosapic_lock, flags);
+	irq = __gsi_to_irq(gsi);
+	spin_unlock_irqrestore(&iosapic_lock, flags);
+	return irq;
+}
+
+static struct iosapic_rte_info *find_rte(unsigned int irq, unsigned int gsi)
+{
+	struct iosapic_rte_info *rte;
+
+	list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list)
+		if (rte->iosapic->gsi_base + rte->rte_index == gsi)
+			return rte;
+	return NULL;
+}
+
+static void
+set_rte (unsigned int gsi, unsigned int irq, unsigned int dest, int mask)
+{
+	unsigned long pol, trigger, dmode;
+	u32 low32, high32;
+	int rte_index;
+	char redir;
+	struct iosapic_rte_info *rte;
+	ia64_vector vector = irq_to_vector(irq);
+
+	DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest);
+
+	rte = find_rte(irq, gsi);
+	if (!rte)
+		return;		/* not an IOSAPIC interrupt */
+
+	rte_index = rte->rte_index;
+	pol     = iosapic_intr_info[irq].polarity;
+	trigger = iosapic_intr_info[irq].trigger;
+	dmode   = iosapic_intr_info[irq].dmode;
+
+	redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0;
+
+#ifdef CONFIG_SMP
+	set_irq_affinity_info(irq, (int)(dest & 0xffff), redir);
+#endif
+
+	low32 = ((pol << IOSAPIC_POLARITY_SHIFT) |
+		 (trigger << IOSAPIC_TRIGGER_SHIFT) |
+		 (dmode << IOSAPIC_DELIVERY_SHIFT) |
+		 ((mask ? 1 : 0) << IOSAPIC_MASK_SHIFT) |
+		 vector);
+
+	/* dest contains both id and eid */
+	high32 = (dest << IOSAPIC_DEST_SHIFT);
+
+	iosapic_write(rte->iosapic, IOSAPIC_RTE_HIGH(rte_index), high32);
+	iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
+	iosapic_intr_info[irq].low32 = low32;
+	iosapic_intr_info[irq].dest = dest;
+}
+
+static void
+nop (struct irq_data *data)
+{
+	/* do nothing... */
+}
+
+
+#ifdef CONFIG_KEXEC
+void
+kexec_disable_iosapic(void)
+{
+	struct iosapic_intr_info *info;
+	struct iosapic_rte_info *rte;
+	ia64_vector vec;
+	int irq;
+
+	for (irq = 0; irq < NR_IRQS; irq++) {
+		info = &iosapic_intr_info[irq];
+		vec = irq_to_vector(irq);
+		list_for_each_entry(rte, &info->rtes,
+				rte_list) {
+			iosapic_write(rte->iosapic,
+					IOSAPIC_RTE_LOW(rte->rte_index),
+					IOSAPIC_MASK|vec);
+			iosapic_eoi(rte->iosapic->addr, vec);
+		}
+	}
+}
+#endif
+
+static void
+mask_irq (struct irq_data *data)
+{
+	unsigned int irq = data->irq;
+	u32 low32;
+	int rte_index;
+	struct iosapic_rte_info *rte;
+
+	if (!iosapic_intr_info[irq].count)
+		return;			/* not an IOSAPIC interrupt! */
+
+	/* set only the mask bit */
+	low32 = iosapic_intr_info[irq].low32 |= IOSAPIC_MASK;
+	list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) {
+		rte_index = rte->rte_index;
+		iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
+	}
+}
+
+static void
+unmask_irq (struct irq_data *data)
+{
+	unsigned int irq = data->irq;
+	u32 low32;
+	int rte_index;
+	struct iosapic_rte_info *rte;
+
+	if (!iosapic_intr_info[irq].count)
+		return;			/* not an IOSAPIC interrupt! */
+
+	low32 = iosapic_intr_info[irq].low32 &= ~IOSAPIC_MASK;
+	list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) {
+		rte_index = rte->rte_index;
+		iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
+	}
+}
+
+
+static int
+iosapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		     bool force)
+{
+#ifdef CONFIG_SMP
+	unsigned int irq = data->irq;
+	u32 high32, low32;
+	int cpu, dest, rte_index;
+	int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
+	struct iosapic_rte_info *rte;
+	struct iosapic *iosapic;
+
+	irq &= (~IA64_IRQ_REDIRECTED);
+
+	cpu = cpumask_first_and(cpu_online_mask, mask);
+	if (cpu >= nr_cpu_ids)
+		return -1;
+
+	if (irq_prepare_move(irq, cpu))
+		return -1;
+
+	dest = cpu_physical_id(cpu);
+
+	if (!iosapic_intr_info[irq].count)
+		return -1;			/* not an IOSAPIC interrupt */
+
+	set_irq_affinity_info(irq, dest, redir);
+
+	/* dest contains both id and eid */
+	high32 = dest << IOSAPIC_DEST_SHIFT;
+
+	low32 = iosapic_intr_info[irq].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT);
+	if (redir)
+		/* change delivery mode to lowest priority */
+		low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
+	else
+		/* change delivery mode to fixed */
+		low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
+	low32 &= IOSAPIC_VECTOR_MASK;
+	low32 |= irq_to_vector(irq);
+
+	iosapic_intr_info[irq].low32 = low32;
+	iosapic_intr_info[irq].dest = dest;
+	list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) {
+		iosapic = rte->iosapic;
+		rte_index = rte->rte_index;
+		iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32);
+		iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
+	}
+
+#endif
+	return 0;
+}
+
+/*
+ * Handlers for level-triggered interrupts.
+ */
+
+static unsigned int
+iosapic_startup_level_irq (struct irq_data *data)
+{
+	unmask_irq(data);
+	return 0;
+}
+
+static void
+iosapic_unmask_level_irq (struct irq_data *data)
+{
+	unsigned int irq = data->irq;
+	ia64_vector vec = irq_to_vector(irq);
+	struct iosapic_rte_info *rte;
+	int do_unmask_irq = 0;
+
+	irq_complete_move(irq);
+	if (unlikely(irqd_is_setaffinity_pending(data))) {
+		do_unmask_irq = 1;
+		mask_irq(data);
+	} else
+		unmask_irq(data);
+
+	list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list)
+		iosapic_eoi(rte->iosapic->addr, vec);
+
+	if (unlikely(do_unmask_irq)) {
+		irq_move_masked_irq(data);
+		unmask_irq(data);
+	}
+}
+
+#define iosapic_shutdown_level_irq	mask_irq
+#define iosapic_enable_level_irq	unmask_irq
+#define iosapic_disable_level_irq	mask_irq
+#define iosapic_ack_level_irq		nop
+
+static struct irq_chip irq_type_iosapic_level = {
+	.name =			"IO-SAPIC-level",
+	.irq_startup =		iosapic_startup_level_irq,
+	.irq_shutdown =		iosapic_shutdown_level_irq,
+	.irq_enable =		iosapic_enable_level_irq,
+	.irq_disable =		iosapic_disable_level_irq,
+	.irq_ack =		iosapic_ack_level_irq,
+	.irq_mask =		mask_irq,
+	.irq_unmask =		iosapic_unmask_level_irq,
+	.irq_set_affinity =	iosapic_set_affinity
+};
+
+/*
+ * Handlers for edge-triggered interrupts.
+ */
+
+static unsigned int
+iosapic_startup_edge_irq (struct irq_data *data)
+{
+	unmask_irq(data);
+	/*
+	 * IOSAPIC simply drops interrupts pended while the
+	 * corresponding pin was masked, so we can't know if an
+	 * interrupt is pending already.  Let's hope not...
+	 */
+	return 0;
+}
+
+static void
+iosapic_ack_edge_irq (struct irq_data *data)
+{
+	irq_complete_move(data->irq);
+	irq_move_irq(data);
+}
+
+#define iosapic_enable_edge_irq		unmask_irq
+#define iosapic_disable_edge_irq	nop
+
+static struct irq_chip irq_type_iosapic_edge = {
+	.name =			"IO-SAPIC-edge",
+	.irq_startup =		iosapic_startup_edge_irq,
+	.irq_shutdown =		iosapic_disable_edge_irq,
+	.irq_enable =		iosapic_enable_edge_irq,
+	.irq_disable =		iosapic_disable_edge_irq,
+	.irq_ack =		iosapic_ack_edge_irq,
+	.irq_mask =		mask_irq,
+	.irq_unmask =		unmask_irq,
+	.irq_set_affinity =	iosapic_set_affinity
+};
+
+static unsigned int
+iosapic_version (char __iomem *addr)
+{
+	/*
+	 * IOSAPIC Version Register return 32 bit structure like:
+	 * {
+	 *	unsigned int version   : 8;
+	 *	unsigned int reserved1 : 8;
+	 *	unsigned int max_redir : 8;
+	 *	unsigned int reserved2 : 8;
+	 * }
+	 */
+	return __iosapic_read(addr, IOSAPIC_VERSION);
+}
+
+static int iosapic_find_sharable_irq(unsigned long trigger, unsigned long pol)
+{
+	int i, irq = -ENOSPC, min_count = -1;
+	struct iosapic_intr_info *info;
+
+	/*
+	 * shared vectors for edge-triggered interrupts are not
+	 * supported yet
+	 */
+	if (trigger == IOSAPIC_EDGE)
+		return -EINVAL;
+
+	for (i = 0; i < NR_IRQS; i++) {
+		info = &iosapic_intr_info[i];
+		if (info->trigger == trigger && info->polarity == pol &&
+		    (info->dmode == IOSAPIC_FIXED ||
+		     info->dmode == IOSAPIC_LOWEST_PRIORITY) &&
+		    can_request_irq(i, IRQF_SHARED)) {
+			if (min_count == -1 || info->count < min_count) {
+				irq = i;
+				min_count = info->count;
+			}
+		}
+	}
+	return irq;
+}
+
+/*
+ * if the given vector is already owned by other,
+ *  assign a new vector for the other and make the vector available
+ */
+static void __init
+iosapic_reassign_vector (int irq)
+{
+	int new_irq;
+
+	if (iosapic_intr_info[irq].count) {
+		new_irq = create_irq();
+		if (new_irq < 0)
+			panic("%s: out of interrupt vectors!\n", __func__);
+		printk(KERN_INFO "Reassigning vector %d to %d\n",
+		       irq_to_vector(irq), irq_to_vector(new_irq));
+		memcpy(&iosapic_intr_info[new_irq], &iosapic_intr_info[irq],
+		       sizeof(struct iosapic_intr_info));
+		INIT_LIST_HEAD(&iosapic_intr_info[new_irq].rtes);
+		list_move(iosapic_intr_info[irq].rtes.next,
+			  &iosapic_intr_info[new_irq].rtes);
+		memset(&iosapic_intr_info[irq], 0,
+		       sizeof(struct iosapic_intr_info));
+		iosapic_intr_info[irq].low32 = IOSAPIC_MASK;
+		INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes);
+	}
+}
+
+static inline int irq_is_shared (int irq)
+{
+	return (iosapic_intr_info[irq].count > 1);
+}
+
+struct irq_chip*
+ia64_native_iosapic_get_irq_chip(unsigned long trigger)
+{
+	if (trigger == IOSAPIC_EDGE)
+		return &irq_type_iosapic_edge;
+	else
+		return &irq_type_iosapic_level;
+}
+
+static int
+register_intr (unsigned int gsi, int irq, unsigned char delivery,
+	       unsigned long polarity, unsigned long trigger)
+{
+	struct irq_chip *chip, *irq_type;
+	int index;
+	struct iosapic_rte_info *rte;
+
+	index = find_iosapic(gsi);
+	if (index < 0) {
+		printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
+		       __func__, gsi);
+		return -ENODEV;
+	}
+
+	rte = find_rte(irq, gsi);
+	if (!rte) {
+		rte = kzalloc(sizeof (*rte), GFP_ATOMIC);
+		if (!rte) {
+			printk(KERN_WARNING "%s: cannot allocate memory\n",
+			       __func__);
+			return -ENOMEM;
+		}
+
+		rte->iosapic	= &iosapic_lists[index];
+		rte->rte_index	= gsi - rte->iosapic->gsi_base;
+		rte->refcnt++;
+		list_add_tail(&rte->rte_list, &iosapic_intr_info[irq].rtes);
+		iosapic_intr_info[irq].count++;
+		iosapic_lists[index].rtes_inuse++;
+	}
+	else if (rte->refcnt == NO_REF_RTE) {
+		struct iosapic_intr_info *info = &iosapic_intr_info[irq];
+		if (info->count > 0 &&
+		    (info->trigger != trigger || info->polarity != polarity)){
+			printk (KERN_WARNING
+				"%s: cannot override the interrupt\n",
+				__func__);
+			return -EINVAL;
+		}
+		rte->refcnt++;
+		iosapic_intr_info[irq].count++;
+		iosapic_lists[index].rtes_inuse++;
+	}
+
+	iosapic_intr_info[irq].polarity = polarity;
+	iosapic_intr_info[irq].dmode    = delivery;
+	iosapic_intr_info[irq].trigger  = trigger;
+
+	irq_type = iosapic_get_irq_chip(trigger);
+
+	chip = irq_get_chip(irq);
+	if (irq_type != NULL && chip != irq_type) {
+		if (chip != &no_irq_chip)
+			printk(KERN_WARNING
+			       "%s: changing vector %d from %s to %s\n",
+			       __func__, irq_to_vector(irq),
+			       chip->name, irq_type->name);
+		chip = irq_type;
+	}
+	__irq_set_chip_handler_name_locked(irq, chip, trigger == IOSAPIC_EDGE ?
+					   handle_edge_irq : handle_level_irq,
+					   NULL);
+	return 0;
+}
+
+static unsigned int
+get_target_cpu (unsigned int gsi, int irq)
+{
+#ifdef CONFIG_SMP
+	static int cpu = -1;
+	extern int cpe_vector;
+	cpumask_t domain = irq_to_domain(irq);
+
+	/*
+	 * In case of vector shared by multiple RTEs, all RTEs that
+	 * share the vector need to use the same destination CPU.
+	 */
+	if (iosapic_intr_info[irq].count)
+		return iosapic_intr_info[irq].dest;
+
+	/*
+	 * If the platform supports redirection via XTP, let it
+	 * distribute interrupts.
+	 */
+	if (smp_int_redirect & SMP_IRQ_REDIRECTION)
+		return cpu_physical_id(smp_processor_id());
+
+	/*
+	 * Some interrupts (ACPI SCI, for instance) are registered
+	 * before the BSP is marked as online.
+	 */
+	if (!cpu_online(smp_processor_id()))
+		return cpu_physical_id(smp_processor_id());
+
+#ifdef CONFIG_ACPI
+	if (cpe_vector > 0 && irq_to_vector(irq) == IA64_CPEP_VECTOR)
+		return get_cpei_target_cpu();
+#endif
+
+#ifdef CONFIG_NUMA
+	{
+		int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
+		const struct cpumask *cpu_mask;
+
+		iosapic_index = find_iosapic(gsi);
+		if (iosapic_index < 0 ||
+		    iosapic_lists[iosapic_index].node == MAX_NUMNODES)
+			goto skip_numa_setup;
+
+		cpu_mask = cpumask_of_node(iosapic_lists[iosapic_index].node);
+		num_cpus = 0;
+		for_each_cpu_and(numa_cpu, cpu_mask, &domain) {
+			if (cpu_online(numa_cpu))
+				num_cpus++;
+		}
+
+		if (!num_cpus)
+			goto skip_numa_setup;
+
+		/* Use irq assignment to distribute across cpus in node */
+		cpu_index = irq % num_cpus;
+
+		for_each_cpu_and(numa_cpu, cpu_mask, &domain)
+			if (cpu_online(numa_cpu) && i++ >= cpu_index)
+				break;
+
+		if (numa_cpu < nr_cpu_ids)
+			return cpu_physical_id(numa_cpu);
+	}
+skip_numa_setup:
+#endif
+	/*
+	 * Otherwise, round-robin interrupt vectors across all the
+	 * processors.  (It'd be nice if we could be smarter in the
+	 * case of NUMA.)
+	 */
+	do {
+		if (++cpu >= nr_cpu_ids)
+			cpu = 0;
+	} while (!cpu_online(cpu) || !cpu_isset(cpu, domain));
+
+	return cpu_physical_id(cpu);
+#else  /* CONFIG_SMP */
+	return cpu_physical_id(smp_processor_id());
+#endif
+}
+
+static inline unsigned char choose_dmode(void)
+{
+#ifdef CONFIG_SMP
+	if (smp_int_redirect & SMP_IRQ_REDIRECTION)
+		return IOSAPIC_LOWEST_PRIORITY;
+#endif
+	return IOSAPIC_FIXED;
+}
+
+/*
+ * ACPI can describe IOSAPIC interrupts via static tables and namespace
+ * methods.  This provides an interface to register those interrupts and
+ * program the IOSAPIC RTE.
+ */
+int
+iosapic_register_intr (unsigned int gsi,
+		       unsigned long polarity, unsigned long trigger)
+{
+	int irq, mask = 1, err;
+	unsigned int dest;
+	unsigned long flags;
+	struct iosapic_rte_info *rte;
+	u32 low32;
+	unsigned char dmode;
+	struct irq_desc *desc;
+
+	/*
+	 * If this GSI has already been registered (i.e., it's a
+	 * shared interrupt, or we lost a race to register it),
+	 * don't touch the RTE.
+	 */
+	spin_lock_irqsave(&iosapic_lock, flags);
+	irq = __gsi_to_irq(gsi);
+	if (irq > 0) {
+		rte = find_rte(irq, gsi);
+		if(iosapic_intr_info[irq].count == 0) {
+			assign_irq_vector(irq);
+			dynamic_irq_init(irq);
+		} else if (rte->refcnt != NO_REF_RTE) {
+			rte->refcnt++;
+			goto unlock_iosapic_lock;
+		}
+	} else
+		irq = create_irq();
+
+	/* If vector is running out, we try to find a sharable vector */
+	if (irq < 0) {
+		irq = iosapic_find_sharable_irq(trigger, polarity);
+		if (irq < 0)
+			goto unlock_iosapic_lock;
+	}
+
+	desc = irq_to_desc(irq);
+	raw_spin_lock(&desc->lock);
+	dest = get_target_cpu(gsi, irq);
+	dmode = choose_dmode();
+	err = register_intr(gsi, irq, dmode, polarity, trigger);
+	if (err < 0) {
+		raw_spin_unlock(&desc->lock);
+		irq = err;
+		goto unlock_iosapic_lock;
+	}
+
+	/*
+	 * If the vector is shared and already unmasked for other
+	 * interrupt sources, don't mask it.
+	 */
+	low32 = iosapic_intr_info[irq].low32;
+	if (irq_is_shared(irq) && !(low32 & IOSAPIC_MASK))
+		mask = 0;
+	set_rte(gsi, irq, dest, mask);
+
+	printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
+	       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
+	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
+	       cpu_logical_id(dest), dest, irq_to_vector(irq));
+
+	raw_spin_unlock(&desc->lock);
+ unlock_iosapic_lock:
+	spin_unlock_irqrestore(&iosapic_lock, flags);
+	return irq;
+}
+
+void
+iosapic_unregister_intr (unsigned int gsi)
+{
+	unsigned long flags;
+	int irq, index;
+	u32 low32;
+	unsigned long trigger, polarity;
+	unsigned int dest;
+	struct iosapic_rte_info *rte;
+
+	/*
+	 * If the irq associated with the gsi is not found,
+	 * iosapic_unregister_intr() is unbalanced. We need to check
+	 * this again after getting locks.
+	 */
+	irq = gsi_to_irq(gsi);
+	if (irq < 0) {
+		printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n",
+		       gsi);
+		WARN_ON(1);
+		return;
+	}
+
+	spin_lock_irqsave(&iosapic_lock, flags);
+	if ((rte = find_rte(irq, gsi)) == NULL) {
+		printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n",
+		       gsi);
+		WARN_ON(1);
+		goto out;
+	}
+
+	if (--rte->refcnt > 0)
+		goto out;
+
+	rte->refcnt = NO_REF_RTE;
+
+	/* Mask the interrupt */
+	low32 = iosapic_intr_info[irq].low32 | IOSAPIC_MASK;
+	iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte->rte_index), low32);
+
+	iosapic_intr_info[irq].count--;
+	index = find_iosapic(gsi);
+	iosapic_lists[index].rtes_inuse--;
+	WARN_ON(iosapic_lists[index].rtes_inuse < 0);
+
+	trigger  = iosapic_intr_info[irq].trigger;
+	polarity = iosapic_intr_info[irq].polarity;
+	dest     = iosapic_intr_info[irq].dest;
+	printk(KERN_INFO
+	       "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d unregistered\n",
+	       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
+	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
+	       cpu_logical_id(dest), dest, irq_to_vector(irq));
+
+	if (iosapic_intr_info[irq].count == 0) {
+#ifdef CONFIG_SMP
+		/* Clear affinity */
+		cpumask_setall(irq_get_irq_data(irq)->affinity);
+#endif
+		/* Clear the interrupt information */
+		iosapic_intr_info[irq].dest = 0;
+		iosapic_intr_info[irq].dmode = 0;
+		iosapic_intr_info[irq].polarity = 0;
+		iosapic_intr_info[irq].trigger = 0;
+		iosapic_intr_info[irq].low32 |= IOSAPIC_MASK;
+
+		/* Destroy and reserve IRQ */
+		destroy_and_reserve_irq(irq);
+	}
+ out:
+	spin_unlock_irqrestore(&iosapic_lock, flags);
+}
+
+/*
+ * ACPI calls this when it finds an entry for a platform interrupt.
+ */
+int __init
+iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
+				int iosapic_vector, u16 eid, u16 id,
+				unsigned long polarity, unsigned long trigger)
+{
+	static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"};
+	unsigned char delivery;
+	int irq, vector, mask = 0;
+	unsigned int dest = ((id << 8) | eid) & 0xffff;
+
+	switch (int_type) {
+	      case ACPI_INTERRUPT_PMI:
+		irq = vector = iosapic_vector;
+		bind_irq_vector(irq, vector, CPU_MASK_ALL);
+		/*
+		 * since PMI vector is alloc'd by FW(ACPI) not by kernel,
+		 * we need to make sure the vector is available
+		 */
+		iosapic_reassign_vector(irq);
+		delivery = IOSAPIC_PMI;
+		break;
+	      case ACPI_INTERRUPT_INIT:
+		irq = create_irq();
+		if (irq < 0)
+			panic("%s: out of interrupt vectors!\n", __func__);
+		vector = irq_to_vector(irq);
+		delivery = IOSAPIC_INIT;
+		break;
+	      case ACPI_INTERRUPT_CPEI:
+		irq = vector = IA64_CPE_VECTOR;
+		BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL));
+		delivery = IOSAPIC_FIXED;
+		mask = 1;
+		break;
+	      default:
+		printk(KERN_ERR "%s: invalid int type 0x%x\n", __func__,
+		       int_type);
+		return -1;
+	}
+
+	register_intr(gsi, irq, delivery, polarity, trigger);
+
+	printk(KERN_INFO
+	       "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)"
+	       " vector %d\n",
+	       int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown",
+	       int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
+	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
+	       cpu_logical_id(dest), dest, vector);
+
+	set_rte(gsi, irq, dest, mask);
+	return vector;
+}
+
+/*
+ * ACPI calls this when it finds an entry for a legacy ISA IRQ override.
+ */
+void __devinit
+iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
+			  unsigned long polarity,
+			  unsigned long trigger)
+{
+	int vector, irq;
+	unsigned int dest = cpu_physical_id(smp_processor_id());
+	unsigned char dmode;
+
+	irq = vector = isa_irq_to_vector(isa_irq);
+	BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL));
+	dmode = choose_dmode();
+	register_intr(gsi, irq, dmode, polarity, trigger);
+
+	DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n",
+	    isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level",
+	    polarity == IOSAPIC_POL_HIGH ? "high" : "low",
+	    cpu_logical_id(dest), dest, vector);
+
+	set_rte(gsi, irq, dest, 1);
+}
+
+void __init
+ia64_native_iosapic_pcat_compat_init(void)
+{
+	if (pcat_compat) {
+		/*
+		 * Disable the compatibility mode interrupts (8259 style),
+		 * needs IN/OUT support enabled.
+		 */
+		printk(KERN_INFO
+		       "%s: Disabling PC-AT compatible 8259 interrupts\n",
+		       __func__);
+		outb(0xff, 0xA1);
+		outb(0xff, 0x21);
+	}
+}
+
+void __init
+iosapic_system_init (int system_pcat_compat)
+{
+	int irq;
+
+	for (irq = 0; irq < NR_IRQS; ++irq) {
+		iosapic_intr_info[irq].low32 = IOSAPIC_MASK;
+		/* mark as unused */
+		INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes);
+
+		iosapic_intr_info[irq].count = 0;
+	}
+
+	pcat_compat = system_pcat_compat;
+	if (pcat_compat)
+		iosapic_pcat_compat_init();
+}
+
+static inline int
+iosapic_alloc (void)
+{
+	int index;
+
+	for (index = 0; index < NR_IOSAPICS; index++)
+		if (!iosapic_lists[index].addr)
+			return index;
+
+	printk(KERN_WARNING "%s: failed to allocate iosapic\n", __func__);
+	return -1;
+}
+
+static inline void
+iosapic_free (int index)
+{
+	memset(&iosapic_lists[index], 0, sizeof(iosapic_lists[0]));
+}
+
+static inline int
+iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver)
+{
+	int index;
+	unsigned int gsi_end, base, end;
+
+	/* check gsi range */
+	gsi_end = gsi_base + ((ver >> 16) & 0xff);
+	for (index = 0; index < NR_IOSAPICS; index++) {
+		if (!iosapic_lists[index].addr)
+			continue;
+
+		base = iosapic_lists[index].gsi_base;
+		end  = base + iosapic_lists[index].num_rte - 1;
+
+		if (gsi_end < base || end < gsi_base)
+			continue; /* OK */
+
+		return -EBUSY;
+	}
+	return 0;
+}
+
+int __devinit
+iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
+{
+	int num_rte, err, index;
+	unsigned int isa_irq, ver;
+	char __iomem *addr;
+	unsigned long flags;
+
+	spin_lock_irqsave(&iosapic_lock, flags);
+	index = find_iosapic(gsi_base);
+	if (index >= 0) {
+		spin_unlock_irqrestore(&iosapic_lock, flags);
+		return -EBUSY;
+	}
+
+	addr = ioremap(phys_addr, 0);
+	if (addr == NULL) {
+		spin_unlock_irqrestore(&iosapic_lock, flags);
+		return -ENOMEM;
+	}
+	ver = iosapic_version(addr);
+	if ((err = iosapic_check_gsi_range(gsi_base, ver))) {
+		iounmap(addr);
+		spin_unlock_irqrestore(&iosapic_lock, flags);
+		return err;
+	}
+
+	/*
+	 * The MAX_REDIR register holds the highest input pin number
+	 * (starting from 0).  We add 1 so that we can use it for
+	 * number of pins (= RTEs)
+	 */
+	num_rte = ((ver >> 16) & 0xff) + 1;
+
+	index = iosapic_alloc();
+	iosapic_lists[index].addr = addr;
+	iosapic_lists[index].gsi_base = gsi_base;
+	iosapic_lists[index].num_rte = num_rte;
+#ifdef CONFIG_NUMA
+	iosapic_lists[index].node = MAX_NUMNODES;
+#endif
+	spin_lock_init(&iosapic_lists[index].lock);
+	spin_unlock_irqrestore(&iosapic_lock, flags);
+
+	if ((gsi_base == 0) && pcat_compat) {
+		/*
+		 * Map the legacy ISA devices into the IOSAPIC data.  Some of
+		 * these may get reprogrammed later on with data from the ACPI
+		 * Interrupt Source Override table.
+		 */
+		for (isa_irq = 0; isa_irq < 16; ++isa_irq)
+			iosapic_override_isa_irq(isa_irq, isa_irq,
+						 IOSAPIC_POL_HIGH,
+						 IOSAPIC_EDGE);
+	}
+	return 0;
+}
+
+#ifdef CONFIG_HOTPLUG
+int
+iosapic_remove (unsigned int gsi_base)
+{
+	int index, err = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&iosapic_lock, flags);
+	index = find_iosapic(gsi_base);
+	if (index < 0) {
+		printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n",
+		       __func__, gsi_base);
+		goto out;
+	}
+
+	if (iosapic_lists[index].rtes_inuse) {
+		err = -EBUSY;
+		printk(KERN_WARNING "%s: IOSAPIC for GSI base %u is busy\n",
+		       __func__, gsi_base);
+		goto out;
+	}
+
+	iounmap(iosapic_lists[index].addr);
+	iosapic_free(index);
+ out:
+	spin_unlock_irqrestore(&iosapic_lock, flags);
+	return err;
+}
+#endif /* CONFIG_HOTPLUG */
+
+#ifdef CONFIG_NUMA
+void __devinit
+map_iosapic_to_node(unsigned int gsi_base, int node)
+{
+	int index;
+
+	index = find_iosapic(gsi_base);
+	if (index < 0) {
+		printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
+		       __func__, gsi_base);
+		return;
+	}
+	iosapic_lists[index].node = node;
+	return;
+}
+#endif
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
new file mode 100644
index 00000000..ad696066
--- /dev/null
+++ b/arch/ia64/kernel/irq.c
@@ -0,0 +1,194 @@
+/*
+ *	linux/arch/ia64/kernel/irq.c
+ *
+ *	Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQs should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ *
+ * Copyright (C) Ashok Raj<ashok.raj@intel.com>, Intel Corporation 2004
+ *
+ * 4/14/2004: Added code to handle cpu migration and do safe irq
+ *			migration without losing interrupts for iosapic
+ *			architecture.
+ */
+
+#include <asm/delay.h>
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+	printk(KERN_ERR "Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id());
+}
+
+#ifdef CONFIG_IA64_GENERIC
+ia64_vector __ia64_irq_to_vector(int irq)
+{
+	return irq_cfg[irq].vector;
+}
+
+unsigned int __ia64_local_vector_to_irq (ia64_vector vec)
+{
+	return __get_cpu_var(vector_irq)[vec];
+}
+#endif
+
+/*
+ * Interrupt statistics:
+ */
+
+atomic_t irq_err_count;
+
+/*
+ * /proc/interrupts printing:
+ */
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+	seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 };
+
+void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
+{
+	if (irq < NR_IRQS) {
+		cpumask_copy(irq_get_irq_data(irq)->affinity,
+			     cpumask_of(cpu_logical_id(hwid)));
+		irq_redir[irq] = (char) (redir & 0xff);
+	}
+}
+
+bool is_affinity_mask_valid(const struct cpumask *cpumask)
+{
+	if (ia64_platform_is("sn2")) {
+		/* Only allow one CPU to be specified in the smp_affinity mask */
+		if (cpumask_weight(cpumask) != 1)
+			return false;
+	}
+	return true;
+}
+
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_HOTPLUG_CPU
+unsigned int vectors_in_migration[NR_IRQS];
+
+/*
+ * Since cpu_online_mask is already updated, we just need to check for
+ * affinity that has zeros
+ */
+static void migrate_irqs(void)
+{
+	int 		irq, new_cpu;
+
+	for (irq=0; irq < NR_IRQS; irq++) {
+		struct irq_desc *desc = irq_to_desc(irq);
+		struct irq_data *data = irq_desc_get_irq_data(desc);
+		struct irq_chip *chip = irq_data_get_irq_chip(data);
+
+		if (irqd_irq_disabled(data))
+			continue;
+
+		/*
+		 * No handling for now.
+		 * TBD: Implement a disable function so we can now
+		 * tell CPU not to respond to these local intr sources.
+		 * such as ITV,CPEI,MCA etc.
+		 */
+		if (irqd_is_per_cpu(data))
+			continue;
+
+		if (cpumask_any_and(data->affinity, cpu_online_mask)
+		    >= nr_cpu_ids) {
+			/*
+			 * Save it for phase 2 processing
+			 */
+			vectors_in_migration[irq] = irq;
+
+			new_cpu = cpumask_any(cpu_online_mask);
+
+			/*
+			 * Al three are essential, currently WARN_ON.. maybe panic?
+			 */
+			if (chip && chip->irq_disable &&
+				chip->irq_enable && chip->irq_set_affinity) {
+				chip->irq_disable(data);
+				chip->irq_set_affinity(data,
+						       cpumask_of(new_cpu), false);
+				chip->irq_enable(data);
+			} else {
+				WARN_ON((!chip || !chip->irq_disable ||
+					 !chip->irq_enable ||
+					 !chip->irq_set_affinity));
+			}
+		}
+	}
+}
+
+void fixup_irqs(void)
+{
+	unsigned int irq;
+	extern void ia64_process_pending_intr(void);
+	extern volatile int time_keeper_id;
+
+	/* Mask ITV to disable timer */
+	ia64_set_itv(1 << 16);
+
+	/*
+	 * Find a new timesync master
+	 */
+	if (smp_processor_id() == time_keeper_id) {
+		time_keeper_id = cpumask_first(cpu_online_mask);
+		printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id);
+	}
+
+	/*
+	 * Phase 1: Locate IRQs bound to this cpu and
+	 * relocate them for cpu removal.
+	 */
+	migrate_irqs();
+
+	/*
+	 * Phase 2: Perform interrupt processing for all entries reported in
+	 * local APIC.
+	 */
+	ia64_process_pending_intr();
+
+	/*
+	 * Phase 3: Now handle any interrupts not captured in local APIC.
+	 * This is to account for cases that device interrupted during the time the
+	 * rte was being disabled and re-programmed.
+	 */
+	for (irq=0; irq < NR_IRQS; irq++) {
+		if (vectors_in_migration[irq]) {
+			struct pt_regs *old_regs = set_irq_regs(NULL);
+
+			vectors_in_migration[irq]=0;
+			generic_handle_irq(irq);
+			set_irq_regs(old_regs);
+		}
+	}
+
+	/*
+	 * Now let processor die. We do irq disable and max_xtp() to
+	 * ensure there is no more interrupts routed to this processor.
+	 * But the local timer interrupt can have 1 pending which we
+	 * take care in timer_interrupt().
+	 */
+	max_xtp();
+	local_irq_disable();
+}
+#endif
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
new file mode 100644
index 00000000..782c3a35
--- /dev/null
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -0,0 +1,689 @@
+/*
+ * linux/arch/ia64/kernel/irq_ia64.c
+ *
+ * Copyright (C) 1998-2001 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ *  6/10/99: Updated to bring in sync with x86 version to facilitate
+ *	     support for SMP and different interrupt controllers.
+ *
+ * 09/15/00 Goutham Rao <goutham.rao@intel.com> Implemented pci_irq_to_vector
+ *                      PCI to vector allocation routine.
+ * 04/14/2004 Ashok Raj <ashok.raj@intel.com>
+ *						Added CPU Hotplug handling for IPF.
+ */
+
+#include <linux/module.h>
+
+#include <linux/jiffies.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/kernel_stat.h>
+#include <linux/ptrace.h>
+#include <linux/random.h>	/* for rand_initialize_irq() */
+#include <linux/signal.h>
+#include <linux/smp.h>
+#include <linux/threads.h>
+#include <linux/bitops.h>
+#include <linux/irq.h>
+#include <linux/ratelimit.h>
+#include <linux/acpi.h>
+#include <linux/sched.h>
+
+#include <asm/delay.h>
+#include <asm/intrinsics.h>
+#include <asm/io.h>
+#include <asm/hw_irq.h>
+#include <asm/machvec.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/tlbflush.h>
+
+#ifdef CONFIG_PERFMON
+# include <asm/perfmon.h>
+#endif
+
+#define IRQ_DEBUG	0
+
+#define IRQ_VECTOR_UNASSIGNED	(0)
+
+#define IRQ_UNUSED		(0)
+#define IRQ_USED		(1)
+#define IRQ_RSVD		(2)
+
+/* These can be overridden in platform_irq_init */
+int ia64_first_device_vector = IA64_DEF_FIRST_DEVICE_VECTOR;
+int ia64_last_device_vector = IA64_DEF_LAST_DEVICE_VECTOR;
+
+/* default base addr of IPI table */
+void __iomem *ipi_base_addr = ((void __iomem *)
+			       (__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR));
+
+static cpumask_t vector_allocation_domain(int cpu);
+
+/*
+ * Legacy IRQ to IA-64 vector translation table.
+ */
+__u8 isa_irq_to_vector_map[16] = {
+	/* 8259 IRQ translation, first 16 entries */
+	0x2f, 0x20, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29,
+	0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21
+};
+EXPORT_SYMBOL(isa_irq_to_vector_map);
+
+DEFINE_SPINLOCK(vector_lock);
+
+struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
+	[0 ... NR_IRQS - 1] = {
+		.vector = IRQ_VECTOR_UNASSIGNED,
+		.domain = CPU_MASK_NONE
+	}
+};
+
+DEFINE_PER_CPU(int[IA64_NUM_VECTORS], vector_irq) = {
+	[0 ... IA64_NUM_VECTORS - 1] = -1
+};
+
+static cpumask_t vector_table[IA64_NUM_VECTORS] = {
+	[0 ... IA64_NUM_VECTORS - 1] = CPU_MASK_NONE
+};
+
+static int irq_status[NR_IRQS] = {
+	[0 ... NR_IRQS -1] = IRQ_UNUSED
+};
+
+int check_irq_used(int irq)
+{
+	if (irq_status[irq] == IRQ_USED)
+		return 1;
+
+	return -1;
+}
+
+static inline int find_unassigned_irq(void)
+{
+	int irq;
+
+	for (irq = IA64_FIRST_DEVICE_VECTOR; irq < NR_IRQS; irq++)
+		if (irq_status[irq] == IRQ_UNUSED)
+			return irq;
+	return -ENOSPC;
+}
+
+static inline int find_unassigned_vector(cpumask_t domain)
+{
+	cpumask_t mask;
+	int pos, vector;
+
+	cpus_and(mask, domain, cpu_online_map);
+	if (cpus_empty(mask))
+		return -EINVAL;
+
+	for (pos = 0; pos < IA64_NUM_DEVICE_VECTORS; pos++) {
+		vector = IA64_FIRST_DEVICE_VECTOR + pos;
+		cpus_and(mask, domain, vector_table[vector]);
+		if (!cpus_empty(mask))
+			continue;
+		return vector;
+	}
+	return -ENOSPC;
+}
+
+static int __bind_irq_vector(int irq, int vector, cpumask_t domain)
+{
+	cpumask_t mask;
+	int cpu;
+	struct irq_cfg *cfg = &irq_cfg[irq];
+
+	BUG_ON((unsigned)irq >= NR_IRQS);
+	BUG_ON((unsigned)vector >= IA64_NUM_VECTORS);
+
+	cpus_and(mask, domain, cpu_online_map);
+	if (cpus_empty(mask))
+		return -EINVAL;
+	if ((cfg->vector == vector) && cpus_equal(cfg->domain, domain))
+		return 0;
+	if (cfg->vector != IRQ_VECTOR_UNASSIGNED)
+		return -EBUSY;
+	for_each_cpu_mask(cpu, mask)
+		per_cpu(vector_irq, cpu)[vector] = irq;
+	cfg->vector = vector;
+	cfg->domain = domain;
+	irq_status[irq] = IRQ_USED;
+	cpus_or(vector_table[vector], vector_table[vector], domain);
+	return 0;
+}
+
+int bind_irq_vector(int irq, int vector, cpumask_t domain)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	ret = __bind_irq_vector(irq, vector, domain);
+	spin_unlock_irqrestore(&vector_lock, flags);
+	return ret;
+}
+
+static void __clear_irq_vector(int irq)
+{
+	int vector, cpu;
+	cpumask_t mask;
+	cpumask_t domain;
+	struct irq_cfg *cfg = &irq_cfg[irq];
+
+	BUG_ON((unsigned)irq >= NR_IRQS);
+	BUG_ON(cfg->vector == IRQ_VECTOR_UNASSIGNED);
+	vector = cfg->vector;
+	domain = cfg->domain;
+	cpus_and(mask, cfg->domain, cpu_online_map);
+	for_each_cpu_mask(cpu, mask)
+		per_cpu(vector_irq, cpu)[vector] = -1;
+	cfg->vector = IRQ_VECTOR_UNASSIGNED;
+	cfg->domain = CPU_MASK_NONE;
+	irq_status[irq] = IRQ_UNUSED;
+	cpus_andnot(vector_table[vector], vector_table[vector], domain);
+}
+
+static void clear_irq_vector(int irq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	__clear_irq_vector(irq);
+	spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+int
+ia64_native_assign_irq_vector (int irq)
+{
+	unsigned long flags;
+	int vector, cpu;
+	cpumask_t domain = CPU_MASK_NONE;
+
+	vector = -ENOSPC;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	for_each_online_cpu(cpu) {
+		domain = vector_allocation_domain(cpu);
+		vector = find_unassigned_vector(domain);
+		if (vector >= 0)
+			break;
+	}
+	if (vector < 0)
+		goto out;
+	if (irq == AUTO_ASSIGN)
+		irq = vector;
+	BUG_ON(__bind_irq_vector(irq, vector, domain));
+ out:
+	spin_unlock_irqrestore(&vector_lock, flags);
+	return vector;
+}
+
+void
+ia64_native_free_irq_vector (int vector)
+{
+	if (vector < IA64_FIRST_DEVICE_VECTOR ||
+	    vector > IA64_LAST_DEVICE_VECTOR)
+		return;
+	clear_irq_vector(vector);
+}
+
+int
+reserve_irq_vector (int vector)
+{
+	if (vector < IA64_FIRST_DEVICE_VECTOR ||
+	    vector > IA64_LAST_DEVICE_VECTOR)
+		return -EINVAL;
+	return !!bind_irq_vector(vector, vector, CPU_MASK_ALL);
+}
+
+/*
+ * Initialize vector_irq on a new cpu. This function must be called
+ * with vector_lock held.
+ */
+void __setup_vector_irq(int cpu)
+{
+	int irq, vector;
+
+	/* Clear vector_irq */
+	for (vector = 0; vector < IA64_NUM_VECTORS; ++vector)
+		per_cpu(vector_irq, cpu)[vector] = -1;
+	/* Mark the inuse vectors */
+	for (irq = 0; irq < NR_IRQS; ++irq) {
+		if (!cpu_isset(cpu, irq_cfg[irq].domain))
+			continue;
+		vector = irq_to_vector(irq);
+		per_cpu(vector_irq, cpu)[vector] = irq;
+	}
+}
+
+#if defined(CONFIG_SMP) && (defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG))
+
+static enum vector_domain_type {
+	VECTOR_DOMAIN_NONE,
+	VECTOR_DOMAIN_PERCPU
+} vector_domain_type = VECTOR_DOMAIN_NONE;
+
+static cpumask_t vector_allocation_domain(int cpu)
+{
+	if (vector_domain_type == VECTOR_DOMAIN_PERCPU)
+		return cpumask_of_cpu(cpu);
+	return CPU_MASK_ALL;
+}
+
+static int __irq_prepare_move(int irq, int cpu)
+{
+	struct irq_cfg *cfg = &irq_cfg[irq];
+	int vector;
+	cpumask_t domain;
+
+	if (cfg->move_in_progress || cfg->move_cleanup_count)
+		return -EBUSY;
+	if (cfg->vector == IRQ_VECTOR_UNASSIGNED || !cpu_online(cpu))
+		return -EINVAL;
+	if (cpu_isset(cpu, cfg->domain))
+		return 0;
+	domain = vector_allocation_domain(cpu);
+	vector = find_unassigned_vector(domain);
+	if (vector < 0)
+		return -ENOSPC;
+	cfg->move_in_progress = 1;
+	cfg->old_domain = cfg->domain;
+	cfg->vector = IRQ_VECTOR_UNASSIGNED;
+	cfg->domain = CPU_MASK_NONE;
+	BUG_ON(__bind_irq_vector(irq, vector, domain));
+	return 0;
+}
+
+int irq_prepare_move(int irq, int cpu)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	ret = __irq_prepare_move(irq, cpu);
+	spin_unlock_irqrestore(&vector_lock, flags);
+	return ret;
+}
+
+void irq_complete_move(unsigned irq)
+{
+	struct irq_cfg *cfg = &irq_cfg[irq];
+	cpumask_t cleanup_mask;
+	int i;
+
+	if (likely(!cfg->move_in_progress))
+		return;
+
+	if (unlikely(cpu_isset(smp_processor_id(), cfg->old_domain)))
+		return;
+
+	cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+	cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+	for_each_cpu_mask(i, cleanup_mask)
+		platform_send_ipi(i, IA64_IRQ_MOVE_VECTOR, IA64_IPI_DM_INT, 0);
+	cfg->move_in_progress = 0;
+}
+
+static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id)
+{
+	int me = smp_processor_id();
+	ia64_vector vector;
+	unsigned long flags;
+
+	for (vector = IA64_FIRST_DEVICE_VECTOR;
+	     vector < IA64_LAST_DEVICE_VECTOR; vector++) {
+		int irq;
+		struct irq_desc *desc;
+		struct irq_cfg *cfg;
+		irq = __get_cpu_var(vector_irq)[vector];
+		if (irq < 0)
+			continue;
+
+		desc = irq_to_desc(irq);
+		cfg = irq_cfg + irq;
+		raw_spin_lock(&desc->lock);
+		if (!cfg->move_cleanup_count)
+			goto unlock;
+
+		if (!cpu_isset(me, cfg->old_domain))
+			goto unlock;
+
+		spin_lock_irqsave(&vector_lock, flags);
+		__get_cpu_var(vector_irq)[vector] = -1;
+		cpu_clear(me, vector_table[vector]);
+		spin_unlock_irqrestore(&vector_lock, flags);
+		cfg->move_cleanup_count--;
+	unlock:
+		raw_spin_unlock(&desc->lock);
+	}
+	return IRQ_HANDLED;
+}
+
+static struct irqaction irq_move_irqaction = {
+	.handler =	smp_irq_move_cleanup_interrupt,
+	.flags =	IRQF_DISABLED,
+	.name =		"irq_move"
+};
+
+static int __init parse_vector_domain(char *arg)
+{
+	if (!arg)
+		return -EINVAL;
+	if (!strcmp(arg, "percpu")) {
+		vector_domain_type = VECTOR_DOMAIN_PERCPU;
+		no_int_routing = 1;
+	}
+	return 0;
+}
+early_param("vector", parse_vector_domain);
+#else
+static cpumask_t vector_allocation_domain(int cpu)
+{
+	return CPU_MASK_ALL;
+}
+#endif
+
+
+void destroy_and_reserve_irq(unsigned int irq)
+{
+	unsigned long flags;
+
+	dynamic_irq_cleanup(irq);
+
+	spin_lock_irqsave(&vector_lock, flags);
+	__clear_irq_vector(irq);
+	irq_status[irq] = IRQ_RSVD;
+	spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+/*
+ * Dynamic irq allocate and deallocation for MSI
+ */
+int create_irq(void)
+{
+	unsigned long flags;
+	int irq, vector, cpu;
+	cpumask_t domain = CPU_MASK_NONE;
+
+	irq = vector = -ENOSPC;
+	spin_lock_irqsave(&vector_lock, flags);
+	for_each_online_cpu(cpu) {
+		domain = vector_allocation_domain(cpu);
+		vector = find_unassigned_vector(domain);
+		if (vector >= 0)
+			break;
+	}
+	if (vector < 0)
+		goto out;
+	irq = find_unassigned_irq();
+	if (irq < 0)
+		goto out;
+	BUG_ON(__bind_irq_vector(irq, vector, domain));
+ out:
+	spin_unlock_irqrestore(&vector_lock, flags);
+	if (irq >= 0)
+		dynamic_irq_init(irq);
+	return irq;
+}
+
+void destroy_irq(unsigned int irq)
+{
+	dynamic_irq_cleanup(irq);
+	clear_irq_vector(irq);
+}
+
+#ifdef CONFIG_SMP
+#	define IS_RESCHEDULE(vec)	(vec == IA64_IPI_RESCHEDULE)
+#	define IS_LOCAL_TLB_FLUSH(vec)	(vec == IA64_IPI_LOCAL_TLB_FLUSH)
+#else
+#	define IS_RESCHEDULE(vec)	(0)
+#	define IS_LOCAL_TLB_FLUSH(vec)	(0)
+#endif
+/*
+ * That's where the IVT branches when we get an external
+ * interrupt. This branches to the correct hardware IRQ handler via
+ * function ptr.
+ */
+void
+ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+	unsigned long saved_tpr;
+
+#if IRQ_DEBUG
+	{
+		unsigned long bsp, sp;
+
+		/*
+		 * Note: if the interrupt happened while executing in
+		 * the context switch routine (ia64_switch_to), we may
+		 * get a spurious stack overflow here.  This is
+		 * because the register and the memory stack are not
+		 * switched atomically.
+		 */
+		bsp = ia64_getreg(_IA64_REG_AR_BSP);
+		sp = ia64_getreg(_IA64_REG_SP);
+
+		if ((sp - bsp) < 1024) {
+			static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5);
+
+			if (__ratelimit(&ratelimit)) {
+				printk("ia64_handle_irq: DANGER: less than "
+				       "1KB of free stack space!!\n"
+				       "(bsp=0x%lx, sp=%lx)\n", bsp, sp);
+			}
+		}
+	}
+#endif /* IRQ_DEBUG */
+
+	/*
+	 * Always set TPR to limit maximum interrupt nesting depth to
+	 * 16 (without this, it would be ~240, which could easily lead
+	 * to kernel stack overflows).
+	 */
+	irq_enter();
+	saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
+	ia64_srlz_d();
+	while (vector != IA64_SPURIOUS_INT_VECTOR) {
+		int irq = local_vector_to_irq(vector);
+		struct irq_desc *desc = irq_to_desc(irq);
+
+		if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) {
+			smp_local_flush_tlb();
+			kstat_incr_irqs_this_cpu(irq, desc);
+		} else if (unlikely(IS_RESCHEDULE(vector))) {
+			scheduler_ipi();
+			kstat_incr_irqs_this_cpu(irq, desc);
+		} else {
+			ia64_setreg(_IA64_REG_CR_TPR, vector);
+			ia64_srlz_d();
+
+			if (unlikely(irq < 0)) {
+				printk(KERN_ERR "%s: Unexpected interrupt "
+				       "vector %d on CPU %d is not mapped "
+				       "to any IRQ!\n", __func__, vector,
+				       smp_processor_id());
+			} else
+				generic_handle_irq(irq);
+
+			/*
+			 * Disable interrupts and send EOI:
+			 */
+			local_irq_disable();
+			ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
+		}
+		ia64_eoi();
+		vector = ia64_get_ivr();
+	}
+	/*
+	 * This must be done *after* the ia64_eoi().  For example, the keyboard softirq
+	 * handler needs to be able to wait for further keyboard interrupts, which can't
+	 * come through until ia64_eoi() has been done.
+	 */
+	irq_exit();
+	set_irq_regs(old_regs);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * This function emulates a interrupt processing when a cpu is about to be
+ * brought down.
+ */
+void ia64_process_pending_intr(void)
+{
+	ia64_vector vector;
+	unsigned long saved_tpr;
+	extern unsigned int vectors_in_migration[NR_IRQS];
+
+	vector = ia64_get_ivr();
+
+	irq_enter();
+	saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
+	ia64_srlz_d();
+
+	 /*
+	  * Perform normal interrupt style processing
+	  */
+	while (vector != IA64_SPURIOUS_INT_VECTOR) {
+		int irq = local_vector_to_irq(vector);
+		struct irq_desc *desc = irq_to_desc(irq);
+
+		if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) {
+			smp_local_flush_tlb();
+			kstat_incr_irqs_this_cpu(irq, desc);
+		} else if (unlikely(IS_RESCHEDULE(vector))) {
+			kstat_incr_irqs_this_cpu(irq, desc);
+		} else {
+			struct pt_regs *old_regs = set_irq_regs(NULL);
+
+			ia64_setreg(_IA64_REG_CR_TPR, vector);
+			ia64_srlz_d();
+
+			/*
+			 * Now try calling normal ia64_handle_irq as it would have got called
+			 * from a real intr handler. Try passing null for pt_regs, hopefully
+			 * it will work. I hope it works!.
+			 * Probably could shared code.
+			 */
+			if (unlikely(irq < 0)) {
+				printk(KERN_ERR "%s: Unexpected interrupt "
+				       "vector %d on CPU %d not being mapped "
+				       "to any IRQ!!\n", __func__, vector,
+				       smp_processor_id());
+			} else {
+				vectors_in_migration[irq]=0;
+				generic_handle_irq(irq);
+			}
+			set_irq_regs(old_regs);
+
+			/*
+			 * Disable interrupts and send EOI
+			 */
+			local_irq_disable();
+			ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
+		}
+		ia64_eoi();
+		vector = ia64_get_ivr();
+	}
+	irq_exit();
+}
+#endif
+
+
+#ifdef CONFIG_SMP
+
+static irqreturn_t dummy_handler (int irq, void *dev_id)
+{
+	BUG();
+}
+
+static struct irqaction ipi_irqaction = {
+	.handler =	handle_IPI,
+	.flags =	IRQF_DISABLED,
+	.name =		"IPI"
+};
+
+/*
+ * KVM uses this interrupt to force a cpu out of guest mode
+ */
+static struct irqaction resched_irqaction = {
+	.handler =	dummy_handler,
+	.flags =	IRQF_DISABLED,
+	.name =		"resched"
+};
+
+static struct irqaction tlb_irqaction = {
+	.handler =	dummy_handler,
+	.flags =	IRQF_DISABLED,
+	.name =		"tlb_flush"
+};
+
+#endif
+
+void
+ia64_native_register_percpu_irq (ia64_vector vec, struct irqaction *action)
+{
+	unsigned int irq;
+
+	irq = vec;
+	BUG_ON(bind_irq_vector(irq, vec, CPU_MASK_ALL));
+	irq_set_status_flags(irq, IRQ_PER_CPU);
+	irq_set_chip(irq, &irq_type_ia64_lsapic);
+	if (action)
+		setup_irq(irq, action);
+	irq_set_handler(irq, handle_percpu_irq);
+}
+
+void __init
+ia64_native_register_ipi(void)
+{
+#ifdef CONFIG_SMP
+	register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction);
+	register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction);
+	register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, &tlb_irqaction);
+#endif
+}
+
+void __init
+init_IRQ (void)
+{
+#ifdef CONFIG_ACPI
+	acpi_boot_init();
+#endif
+	ia64_register_ipi();
+	register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL);
+#ifdef CONFIG_SMP
+#if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG)
+	if (vector_domain_type != VECTOR_DOMAIN_NONE)
+		register_percpu_irq(IA64_IRQ_MOVE_VECTOR, &irq_move_irqaction);
+#endif
+#endif
+#ifdef CONFIG_PERFMON
+	pfm_init_percpu();
+#endif
+	platform_irq_init();
+}
+
+void
+ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect)
+{
+	void __iomem *ipi_addr;
+	unsigned long ipi_data;
+	unsigned long phys_cpu_id;
+
+	phys_cpu_id = cpu_physical_id(cpu);
+
+	/*
+	 * cpu number is in 8bit ID and 8bit EID
+	 */
+
+	ipi_data = (delivery_mode << 8) | (vector & 0xff);
+	ipi_addr = ipi_base_addr + ((phys_cpu_id << 4) | ((redirect & 1) << 3));
+
+	writeq(ipi_data, ipi_addr);
+}
diff --git a/arch/ia64/kernel/irq_lsapic.c b/arch/ia64/kernel/irq_lsapic.c
new file mode 100644
index 00000000..1b3a776e
--- /dev/null
+++ b/arch/ia64/kernel/irq_lsapic.c
@@ -0,0 +1,44 @@
+/*
+ * LSAPIC Interrupt Controller
+ *
+ * This takes care of interrupts that are generated by the CPU's
+ * internal Streamlined Advanced Programmable Interrupt Controller
+ * (LSAPIC), such as the ITC and IPI interrupts.
+    *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 2000 Hewlett-Packard Co
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/irq.h>
+
+static unsigned int
+lsapic_noop_startup (struct irq_data *data)
+{
+	return 0;
+}
+
+static void
+lsapic_noop (struct irq_data *data)
+{
+	/* nothing to do... */
+}
+
+static int lsapic_retrigger(struct irq_data *data)
+{
+	ia64_resend_irq(data->irq);
+
+	return 1;
+}
+
+struct irq_chip irq_type_ia64_lsapic = {
+	.name =			"LSAPIC",
+	.irq_startup =		lsapic_noop_startup,
+	.irq_shutdown =		lsapic_noop,
+	.irq_enable =		lsapic_noop,
+	.irq_disable =		lsapic_noop,
+	.irq_ack =		lsapic_noop,
+	.irq_retrigger =	lsapic_retrigger,
+};
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
new file mode 100644
index 00000000..d93e396b
--- /dev/null
+++ b/arch/ia64/kernel/ivt.S
@@ -0,0 +1,1689 @@
+/*
+ * arch/ia64/kernel/ivt.S
+ *
+ * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *	David Mosberger <davidm@hpl.hp.com>
+ * Copyright (C) 2000, 2002-2003 Intel Co
+ *	Asit Mallick <asit.k.mallick@intel.com>
+ *      Suresh Siddha <suresh.b.siddha@intel.com>
+ *      Kenneth Chen <kenneth.w.chen@intel.com>
+ *      Fenghua Yu <fenghua.yu@intel.com>
+ *
+ * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling for SMP
+ * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB handler now uses virtual PT.
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *	Dan Magenheimer <dan.magenheimer@hp.com>
+ *      Xen paravirtualization
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *                    pv_ops.
+ *      Yaozu (Eddie) Dong <eddie.dong@intel.com>
+ */
+/*
+ * This file defines the interruption vector table used by the CPU.
+ * It does not include one entry per possible cause of interruption.
+ *
+ * The first 20 entries of the table contain 64 bundles each while the
+ * remaining 48 entries contain only 16 bundles each.
+ *
+ * The 64 bundles are used to allow inlining the whole handler for critical
+ * interruptions like TLB misses.
+ *
+ *  For each entry, the comment is as follows:
+ *
+ *		// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ *  entry offset ----/     /         /                  /          /
+ *  entry number ---------/         /                  /          /
+ *  size of the entry -------------/                  /          /
+ *  vector name -------------------------------------/          /
+ *  interruptions triggering this vector ----------------------/
+ *
+ * The table is 32KB in size and must be aligned on 32KB boundary.
+ * (The CPU ignores the 15 lower bits of the address)
+ *
+ * Table is based upon EAS2.6 (Oct 1999)
+ */
+
+
+#include <asm/asmmacro.h>
+#include <asm/break.h>
+#include <asm/kregs.h>
+#include <asm/asm-offsets.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+#include <asm/errno.h>
+
+#if 1
+# define PSR_DEFAULT_BITS	psr.ac
+#else
+# define PSR_DEFAULT_BITS	0
+#endif
+
+#if 0
+  /*
+   * This lets you track the last eight faults that occurred on the CPU.  Make sure ar.k2 isn't
+   * needed for something else before enabling this...
+   */
+# define DBG_FAULT(i)	mov r16=ar.k2;;	shl r16=r16,8;;	add r16=(i),r16;;mov ar.k2=r16
+#else
+# define DBG_FAULT(i)
+#endif
+
+#include "minstate.h"
+
+#define FAULT(n)									\
+	mov r31=pr;									\
+	mov r19=n;;			/* prepare to save predicates */		\
+	br.sptk.many dispatch_to_fault_handler
+
+	.section .text..ivt,"ax"
+
+	.align 32768	// align on 32KB boundary
+	.global ia64_ivt
+ia64_ivt:
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
+ENTRY(vhpt_miss)
+	DBG_FAULT(0)
+	/*
+	 * The VHPT vector is invoked when the TLB entry for the virtual page table
+	 * is missing.  This happens only as a result of a previous
+	 * (the "original") TLB miss, which may either be caused by an instruction
+	 * fetch or a data access (or non-access).
+	 *
+	 * What we do here is normal TLB miss handing for the _original_ miss,
+	 * followed by inserting the TLB entry for the virtual page table page
+	 * that the VHPT walker was attempting to access.  The latter gets
+	 * inserted as long as page table entry above pte level have valid
+	 * mappings for the faulting address.  The TLB entry for the original
+	 * miss gets inserted only if the pte entry indicates that the page is
+	 * present.
+	 *
+	 * do_page_fault gets invoked in the following cases:
+	 *	- the faulting virtual address uses unimplemented address bits
+	 *	- the faulting virtual address has no valid page table mapping
+	 */
+	MOV_FROM_IFA(r16)			// get address that caused the TLB miss
+#ifdef CONFIG_HUGETLB_PAGE
+	movl r18=PAGE_SHIFT
+	MOV_FROM_ITIR(r25)
+#endif
+	;;
+	RSM_PSR_DT				// use physical addressing for data
+	mov r31=pr				// save the predicate registers
+	mov r19=IA64_KR(PT_BASE)		// get page table base address
+	shl r21=r16,3				// shift bit 60 into sign bit
+	shr.u r17=r16,61			// get the region number into r17
+	;;
+	shr.u r22=r21,3
+#ifdef CONFIG_HUGETLB_PAGE
+	extr.u r26=r25,2,6
+	;;
+	cmp.ne p8,p0=r18,r26
+	sub r27=r26,r18
+	;;
+(p8)	dep r25=r18,r25,2,6
+(p8)	shr r22=r22,r27
+#endif
+	;;
+	cmp.eq p6,p7=5,r17			// is IFA pointing into to region 5?
+	shr.u r18=r22,PGDIR_SHIFT		// get bottom portion of pgd index bit
+	;;
+(p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
+
+	srlz.d
+	LOAD_PHYSICAL(p6, r19, swapper_pg_dir)	// region 5 is rooted at swapper_pg_dir
+
+	.pred.rel "mutex", p6, p7
+(p6)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
+(p7)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
+	;;
+(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=pgd_offset for region 5
+(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=pgd_offset for region[0-4]
+	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
+#ifdef CONFIG_PGTABLE_4
+	shr.u r28=r22,PUD_SHIFT			// shift pud index into position
+#else
+	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
+#endif
+	;;
+	ld8 r17=[r17]				// get *pgd (may be 0)
+	;;
+(p7)	cmp.eq p6,p7=r17,r0			// was pgd_present(*pgd) == NULL?
+#ifdef CONFIG_PGTABLE_4
+	dep r28=r28,r17,3,(PAGE_SHIFT-3)	// r28=pud_offset(pgd,addr)
+	;;
+	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
+(p7)	ld8 r29=[r28]				// get *pud (may be 0)
+	;;
+(p7)	cmp.eq.or.andcm p6,p7=r29,r0		// was pud_present(*pud) == NULL?
+	dep r17=r18,r29,3,(PAGE_SHIFT-3)	// r17=pmd_offset(pud,addr)
+#else
+	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// r17=pmd_offset(pgd,addr)
+#endif
+	;;
+(p7)	ld8 r20=[r17]				// get *pmd (may be 0)
+	shr.u r19=r22,PAGE_SHIFT		// shift pte index into position
+	;;
+(p7)	cmp.eq.or.andcm p6,p7=r20,r0		// was pmd_present(*pmd) == NULL?
+	dep r21=r19,r20,3,(PAGE_SHIFT-3)	// r21=pte_offset(pmd,addr)
+	;;
+(p7)	ld8 r18=[r21]				// read *pte
+	MOV_FROM_ISR(r19)			// cr.isr bit 32 tells us if this is an insn miss
+	;;
+(p7)	tbit.z p6,p7=r18,_PAGE_P_BIT		// page present bit cleared?
+	MOV_FROM_IHA(r22)			// get the VHPT address that caused the TLB miss
+	;;					// avoid RAW on p7
+(p7)	tbit.nz.unc p10,p11=r19,32		// is it an instruction TLB miss?
+	dep r23=0,r20,0,PAGE_SHIFT		// clear low bits to get page address
+	;;
+	ITC_I_AND_D(p10, p11, r18, r24)		// insert the instruction TLB entry and
+						// insert the data TLB entry
+(p6)	br.cond.spnt.many page_fault		// handle bad address/page not present (page fault)
+	MOV_TO_IFA(r22, r24)
+
+#ifdef CONFIG_HUGETLB_PAGE
+	MOV_TO_ITIR(p8, r25, r24)		// change to default page-size for VHPT
+#endif
+
+	/*
+	 * Now compute and insert the TLB entry for the virtual page table.  We never
+	 * execute in a page table page so there is no need to set the exception deferral
+	 * bit.
+	 */
+	adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23
+	;;
+	ITC_D(p7, r24, r25)
+	;;
+#ifdef CONFIG_SMP
+	/*
+	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
+	 * cannot possibly affect the following loads:
+	 */
+	dv_serialize_data
+
+	/*
+	 * Re-check pagetable entry.  If they changed, we may have received a ptc.g
+	 * between reading the pagetable and the "itc".  If so, flush the entry we
+	 * inserted and retry.  At this point, we have:
+	 *
+	 * r28 = equivalent of pud_offset(pgd, ifa)
+	 * r17 = equivalent of pmd_offset(pud, ifa)
+	 * r21 = equivalent of pte_offset(pmd, ifa)
+	 *
+	 * r29 = *pud
+	 * r20 = *pmd
+	 * r18 = *pte
+	 */
+	ld8 r25=[r21]				// read *pte again
+	ld8 r26=[r17]				// read *pmd again
+#ifdef CONFIG_PGTABLE_4
+	ld8 r19=[r28]				// read *pud again
+#endif
+	cmp.ne p6,p7=r0,r0
+	;;
+	cmp.ne.or.andcm p6,p7=r26,r20		// did *pmd change
+#ifdef CONFIG_PGTABLE_4
+	cmp.ne.or.andcm p6,p7=r19,r29		// did *pud change
+#endif
+	mov r27=PAGE_SHIFT<<2
+	;;
+(p6)	ptc.l r22,r27				// purge PTE page translation
+(p7)	cmp.ne.or.andcm p6,p7=r25,r18		// did *pte change
+	;;
+(p6)	ptc.l r16,r27				// purge translation
+#endif
+
+	mov pr=r31,-1				// restore predicate registers
+	RFI
+END(vhpt_miss)
+
+	.org ia64_ivt+0x400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
+ENTRY(itlb_miss)
+	DBG_FAULT(1)
+	/*
+	 * The ITLB handler accesses the PTE via the virtually mapped linear
+	 * page table.  If a nested TLB miss occurs, we switch into physical
+	 * mode, walk the page table, and then re-execute the PTE read and
+	 * go on normally after that.
+	 */
+	MOV_FROM_IFA(r16)			// get virtual address
+	mov r29=b0				// save b0
+	mov r31=pr				// save predicates
+.itlb_fault:
+	MOV_FROM_IHA(r17)			// get virtual address of PTE
+	movl r30=1f				// load nested fault continuation point
+	;;
+1:	ld8 r18=[r17]				// read *pte
+	;;
+	mov b0=r29
+	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
+(p6)	br.cond.spnt page_fault
+	;;
+	ITC_I(p0, r18, r19)
+	;;
+#ifdef CONFIG_SMP
+	/*
+	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
+	 * cannot possibly affect the following loads:
+	 */
+	dv_serialize_data
+
+	ld8 r19=[r17]				// read *pte again and see if same
+	mov r20=PAGE_SHIFT<<2			// setup page size for purge
+	;;
+	cmp.ne p7,p0=r18,r19
+	;;
+(p7)	ptc.l r16,r20
+#endif
+	mov pr=r31,-1
+	RFI
+END(itlb_miss)
+
+	.org ia64_ivt+0x0800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
+ENTRY(dtlb_miss)
+	DBG_FAULT(2)
+	/*
+	 * The DTLB handler accesses the PTE via the virtually mapped linear
+	 * page table.  If a nested TLB miss occurs, we switch into physical
+	 * mode, walk the page table, and then re-execute the PTE read and
+	 * go on normally after that.
+	 */
+	MOV_FROM_IFA(r16)			// get virtual address
+	mov r29=b0				// save b0
+	mov r31=pr				// save predicates
+dtlb_fault:
+	MOV_FROM_IHA(r17)			// get virtual address of PTE
+	movl r30=1f				// load nested fault continuation point
+	;;
+1:	ld8 r18=[r17]				// read *pte
+	;;
+	mov b0=r29
+	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
+(p6)	br.cond.spnt page_fault
+	;;
+	ITC_D(p0, r18, r19)
+	;;
+#ifdef CONFIG_SMP
+	/*
+	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
+	 * cannot possibly affect the following loads:
+	 */
+	dv_serialize_data
+
+	ld8 r19=[r17]				// read *pte again and see if same
+	mov r20=PAGE_SHIFT<<2			// setup page size for purge
+	;;
+	cmp.ne p7,p0=r18,r19
+	;;
+(p7)	ptc.l r16,r20
+#endif
+	mov pr=r31,-1
+	RFI
+END(dtlb_miss)
+
+	.org ia64_ivt+0x0c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
+ENTRY(alt_itlb_miss)
+	DBG_FAULT(3)
+	MOV_FROM_IFA(r16)	// get address that caused the TLB miss
+	movl r17=PAGE_KERNEL
+	MOV_FROM_IPSR(p0, r21)
+	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+	mov r31=pr
+	;;
+#ifdef CONFIG_DISABLE_VHPT
+	shr.u r22=r16,61			// get the region number into r21
+	;;
+	cmp.gt p8,p0=6,r22			// user mode
+	;;
+	THASH(p8, r17, r16, r23)
+	;;
+	MOV_TO_IHA(p8, r17, r23)
+(p8)	mov r29=b0				// save b0
+(p8)	br.cond.dptk .itlb_fault
+#endif
+	extr.u r23=r21,IA64_PSR_CPL0_BIT,2	// extract psr.cpl
+	and r19=r19,r16		// clear ed, reserved bits, and PTE control bits
+	shr.u r18=r16,57	// move address bit 61 to bit 4
+	;;
+	andcm r18=0x10,r18	// bit 4=~address-bit(61)
+	cmp.ne p8,p0=r0,r23	// psr.cpl != 0?
+	or r19=r17,r19		// insert PTE control bits into r19
+	;;
+	or r19=r19,r18		// set bit 4 (uncached) if the access was to region 6
+(p8)	br.cond.spnt page_fault
+	;;
+	ITC_I(p0, r19, r18)	// insert the TLB entry
+	mov pr=r31,-1
+	RFI
+END(alt_itlb_miss)
+
+	.org ia64_ivt+0x1000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
+ENTRY(alt_dtlb_miss)
+	DBG_FAULT(4)
+	MOV_FROM_IFA(r16)	// get address that caused the TLB miss
+	movl r17=PAGE_KERNEL
+	MOV_FROM_ISR(r20)
+	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+	MOV_FROM_IPSR(p0, r21)
+	mov r31=pr
+	mov r24=PERCPU_ADDR
+	;;
+#ifdef CONFIG_DISABLE_VHPT
+	shr.u r22=r16,61			// get the region number into r21
+	;;
+	cmp.gt p8,p0=6,r22			// access to region 0-5
+	;;
+	THASH(p8, r17, r16, r25)
+	;;
+	MOV_TO_IHA(p8, r17, r25)
+(p8)	mov r29=b0				// save b0
+(p8)	br.cond.dptk dtlb_fault
+#endif
+	cmp.ge p10,p11=r16,r24			// access to per_cpu_data?
+	tbit.z p12,p0=r16,61			// access to region 6?
+	mov r25=PERCPU_PAGE_SHIFT << 2
+	mov r26=PERCPU_PAGE_SIZE
+	nop.m 0
+	nop.b 0
+	;;
+(p10)	mov r19=IA64_KR(PER_CPU_DATA)
+(p11)	and r19=r19,r16				// clear non-ppn fields
+	extr.u r23=r21,IA64_PSR_CPL0_BIT,2	// extract psr.cpl
+	and r22=IA64_ISR_CODE_MASK,r20		// get the isr.code field
+	tbit.nz p6,p7=r20,IA64_ISR_SP_BIT	// is speculation bit on?
+	tbit.nz p9,p0=r20,IA64_ISR_NA_BIT	// is non-access bit on?
+	;;
+(p10)	sub r19=r19,r26
+	MOV_TO_ITIR(p10, r25, r24)
+	cmp.ne p8,p0=r0,r23
+(p9)	cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22	// check isr.code field
+(p12)	dep r17=-1,r17,4,1			// set ma=UC for region 6 addr
+(p8)	br.cond.spnt page_fault
+
+	dep r21=-1,r21,IA64_PSR_ED_BIT,1
+	;;
+	or r19=r19,r17		// insert PTE control bits into r19
+	MOV_TO_IPSR(p6, r21, r24)
+	;;
+	ITC_D(p7, r19, r18)	// insert the TLB entry
+	mov pr=r31,-1
+	RFI
+END(alt_dtlb_miss)
+
+	.org ia64_ivt+0x1400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
+ENTRY(nested_dtlb_miss)
+	/*
+	 * In the absence of kernel bugs, we get here when the virtually mapped linear
+	 * page table is accessed non-speculatively (e.g., in the Dirty-bit, Instruction
+	 * Access-bit, or Data Access-bit faults).  If the DTLB entry for the virtual page
+	 * table is missing, a nested TLB miss fault is triggered and control is
+	 * transferred to this point.  When this happens, we lookup the pte for the
+	 * faulting address by walking the page table in physical mode and return to the
+	 * continuation point passed in register r30 (or call page_fault if the address is
+	 * not mapped).
+	 *
+	 * Input:	r16:	faulting address
+	 *		r29:	saved b0
+	 *		r30:	continuation address
+	 *		r31:	saved pr
+	 *
+	 * Output:	r17:	physical address of PTE of faulting address
+	 *		r29:	saved b0
+	 *		r30:	continuation address
+	 *		r31:	saved pr
+	 *
+	 * Clobbered:	b0, r18, r19, r21, r22, psr.dt (cleared)
+	 */
+	RSM_PSR_DT				// switch to using physical data addressing
+	mov r19=IA64_KR(PT_BASE)		// get the page table base address
+	shl r21=r16,3				// shift bit 60 into sign bit
+	MOV_FROM_ITIR(r18)
+	;;
+	shr.u r17=r16,61			// get the region number into r17
+	extr.u r18=r18,2,6			// get the faulting page size
+	;;
+	cmp.eq p6,p7=5,r17			// is faulting address in region 5?
+	add r22=-PAGE_SHIFT,r18			// adjustment for hugetlb address
+	add r18=PGDIR_SHIFT-PAGE_SHIFT,r18
+	;;
+	shr.u r22=r16,r22
+	shr.u r18=r16,r18
+(p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
+
+	srlz.d
+	LOAD_PHYSICAL(p6, r19, swapper_pg_dir)	// region 5 is rooted at swapper_pg_dir
+
+	.pred.rel "mutex", p6, p7
+(p6)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
+(p7)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
+	;;
+(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=pgd_offset for region 5
+(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=pgd_offset for region[0-4]
+	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
+#ifdef CONFIG_PGTABLE_4
+	shr.u r18=r22,PUD_SHIFT			// shift pud index into position
+#else
+	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
+#endif
+	;;
+	ld8 r17=[r17]				// get *pgd (may be 0)
+	;;
+(p7)	cmp.eq p6,p7=r17,r0			// was pgd_present(*pgd) == NULL?
+	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// r17=p[u|m]d_offset(pgd,addr)
+	;;
+#ifdef CONFIG_PGTABLE_4
+(p7)	ld8 r17=[r17]				// get *pud (may be 0)
+	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
+	;;
+(p7)	cmp.eq.or.andcm p6,p7=r17,r0		// was pud_present(*pud) == NULL?
+	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// r17=pmd_offset(pud,addr)
+	;;
+#endif
+(p7)	ld8 r17=[r17]				// get *pmd (may be 0)
+	shr.u r19=r22,PAGE_SHIFT		// shift pte index into position
+	;;
+(p7)	cmp.eq.or.andcm p6,p7=r17,r0		// was pmd_present(*pmd) == NULL?
+	dep r17=r19,r17,3,(PAGE_SHIFT-3)	// r17=pte_offset(pmd,addr);
+(p6)	br.cond.spnt page_fault
+	mov b0=r30
+	br.sptk.many b0				// return to continuation point
+END(nested_dtlb_miss)
+
+	.org ia64_ivt+0x1800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
+ENTRY(ikey_miss)
+	DBG_FAULT(6)
+	FAULT(6)
+END(ikey_miss)
+
+	.org ia64_ivt+0x1c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ENTRY(dkey_miss)
+	DBG_FAULT(7)
+	FAULT(7)
+END(dkey_miss)
+
+	.org ia64_ivt+0x2000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
+ENTRY(dirty_bit)
+	DBG_FAULT(8)
+	/*
+	 * What we do here is to simply turn on the dirty bit in the PTE.  We need to
+	 * update both the page-table and the TLB entry.  To efficiently access the PTE,
+	 * we address it through the virtual page table.  Most likely, the TLB entry for
+	 * the relevant virtual page table page is still present in the TLB so we can
+	 * normally do this without additional TLB misses.  In case the necessary virtual
+	 * page table TLB entry isn't present, we take a nested TLB miss hit where we look
+	 * up the physical address of the L3 PTE and then continue at label 1 below.
+	 */
+	MOV_FROM_IFA(r16)			// get the address that caused the fault
+	movl r30=1f				// load continuation point in case of nested fault
+	;;
+	THASH(p0, r17, r16, r18)		// compute virtual address of L3 PTE
+	mov r29=b0				// save b0 in case of nested fault
+	mov r31=pr				// save pr
+#ifdef CONFIG_SMP
+	mov r28=ar.ccv				// save ar.ccv
+	;;
+1:	ld8 r18=[r17]
+	;;					// avoid RAW on r18
+	mov ar.ccv=r18				// set compare value for cmpxchg
+	or r25=_PAGE_D|_PAGE_A,r18		// set the dirty and accessed bits
+	tbit.z p7,p6 = r18,_PAGE_P_BIT		// Check present bit
+	;;
+(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only update if page is present
+	mov r24=PAGE_SHIFT<<2
+	;;
+(p6)	cmp.eq p6,p7=r26,r18			// Only compare if page is present
+	;;
+	ITC_D(p6, r25, r18)			// install updated PTE
+	;;
+	/*
+	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
+	 * cannot possibly affect the following loads:
+	 */
+	dv_serialize_data
+
+	ld8 r18=[r17]				// read PTE again
+	;;
+	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
+	;;
+(p7)	ptc.l r16,r24
+	mov b0=r29				// restore b0
+	mov ar.ccv=r28
+#else
+	;;
+1:	ld8 r18=[r17]
+	;;					// avoid RAW on r18
+	or r18=_PAGE_D|_PAGE_A,r18		// set the dirty and accessed bits
+	mov b0=r29				// restore b0
+	;;
+	st8 [r17]=r18				// store back updated PTE
+	ITC_D(p0, r18, r16)			// install updated PTE
+#endif
+	mov pr=r31,-1				// restore pr
+	RFI
+END(dirty_bit)
+
+	.org ia64_ivt+0x2400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
+ENTRY(iaccess_bit)
+	DBG_FAULT(9)
+	// Like Entry 8, except for instruction access
+	MOV_FROM_IFA(r16)			// get the address that caused the fault
+	movl r30=1f				// load continuation point in case of nested fault
+	mov r31=pr				// save predicates
+#ifdef CONFIG_ITANIUM
+	/*
+	 * Erratum 10 (IFA may contain incorrect address) has "NoFix" status.
+	 */
+	MOV_FROM_IPSR(p0, r17)
+	;;
+	MOV_FROM_IIP(r18)
+	tbit.z p6,p0=r17,IA64_PSR_IS_BIT	// IA64 instruction set?
+	;;
+(p6)	mov r16=r18				// if so, use cr.iip instead of cr.ifa
+#endif /* CONFIG_ITANIUM */
+	;;
+	THASH(p0, r17, r16, r18)		// compute virtual address of L3 PTE
+	mov r29=b0				// save b0 in case of nested fault)
+#ifdef CONFIG_SMP
+	mov r28=ar.ccv				// save ar.ccv
+	;;
+1:	ld8 r18=[r17]
+	;;
+	mov ar.ccv=r18				// set compare value for cmpxchg
+	or r25=_PAGE_A,r18			// set the accessed bit
+	tbit.z p7,p6 = r18,_PAGE_P_BIT	 	// Check present bit
+	;;
+(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only if page present
+	mov r24=PAGE_SHIFT<<2
+	;;
+(p6)	cmp.eq p6,p7=r26,r18			// Only if page present
+	;;
+	ITC_I(p6, r25, r26)			// install updated PTE
+	;;
+	/*
+	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
+	 * cannot possibly affect the following loads:
+	 */
+	dv_serialize_data
+
+	ld8 r18=[r17]				// read PTE again
+	;;
+	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
+	;;
+(p7)	ptc.l r16,r24
+	mov b0=r29				// restore b0
+	mov ar.ccv=r28
+#else /* !CONFIG_SMP */
+	;;
+1:	ld8 r18=[r17]
+	;;
+	or r18=_PAGE_A,r18			// set the accessed bit
+	mov b0=r29				// restore b0
+	;;
+	st8 [r17]=r18				// store back updated PTE
+	ITC_I(p0, r18, r16)			// install updated PTE
+#endif /* !CONFIG_SMP */
+	mov pr=r31,-1
+	RFI
+END(iaccess_bit)
+
+	.org ia64_ivt+0x2800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
+ENTRY(daccess_bit)
+	DBG_FAULT(10)
+	// Like Entry 8, except for data access
+	MOV_FROM_IFA(r16)			// get the address that caused the fault
+	movl r30=1f				// load continuation point in case of nested fault
+	;;
+	THASH(p0, r17, r16, r18)		// compute virtual address of L3 PTE
+	mov r31=pr
+	mov r29=b0				// save b0 in case of nested fault)
+#ifdef CONFIG_SMP
+	mov r28=ar.ccv				// save ar.ccv
+	;;
+1:	ld8 r18=[r17]
+	;;					// avoid RAW on r18
+	mov ar.ccv=r18				// set compare value for cmpxchg
+	or r25=_PAGE_A,r18			// set the dirty bit
+	tbit.z p7,p6 = r18,_PAGE_P_BIT		// Check present bit
+	;;
+(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only if page is present
+	mov r24=PAGE_SHIFT<<2
+	;;
+(p6)	cmp.eq p6,p7=r26,r18			// Only if page is present
+	;;
+	ITC_D(p6, r25, r26)			// install updated PTE
+	/*
+	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
+	 * cannot possibly affect the following loads:
+	 */
+	dv_serialize_data
+	;;
+	ld8 r18=[r17]				// read PTE again
+	;;
+	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
+	;;
+(p7)	ptc.l r16,r24
+	mov ar.ccv=r28
+#else
+	;;
+1:	ld8 r18=[r17]
+	;;					// avoid RAW on r18
+	or r18=_PAGE_A,r18			// set the accessed bit
+	;;
+	st8 [r17]=r18				// store back updated PTE
+	ITC_D(p0, r18, r16)			// install updated PTE
+#endif
+	mov b0=r29				// restore b0
+	mov pr=r31,-1
+	RFI
+END(daccess_bit)
+
+	.org ia64_ivt+0x2c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
+ENTRY(break_fault)
+	/*
+	 * The streamlined system call entry/exit paths only save/restore the initial part
+	 * of pt_regs.  This implies that the callers of system-calls must adhere to the
+	 * normal procedure calling conventions.
+	 *
+	 *   Registers to be saved & restored:
+	 *	CR registers: cr.ipsr, cr.iip, cr.ifs
+	 *	AR registers: ar.unat, ar.pfs, ar.rsc, ar.rnat, ar.bspstore, ar.fpsr
+	 * 	others: pr, b0, b6, loadrs, r1, r11, r12, r13, r15
+	 *   Registers to be restored only:
+	 * 	r8-r11: output value from the system call.
+	 *
+	 * During system call exit, scratch registers (including r15) are modified/cleared
+	 * to prevent leaking bits from kernel to user level.
+	 */
+	DBG_FAULT(11)
+	mov.m r16=IA64_KR(CURRENT)		// M2 r16 <- current task (12 cyc)
+	MOV_FROM_IPSR(p0, r29)			// M2 (12 cyc)
+	mov r31=pr				// I0 (2 cyc)
+
+	MOV_FROM_IIM(r17)			// M2 (2 cyc)
+	mov.m r27=ar.rsc			// M2 (12 cyc)
+	mov r18=__IA64_BREAK_SYSCALL		// A
+
+	mov.m ar.rsc=0				// M2
+	mov.m r21=ar.fpsr			// M2 (12 cyc)
+	mov r19=b6				// I0 (2 cyc)
+	;;
+	mov.m r23=ar.bspstore			// M2 (12 cyc)
+	mov.m r24=ar.rnat			// M2 (5 cyc)
+	mov.i r26=ar.pfs			// I0 (2 cyc)
+
+	invala					// M0|1
+	nop.m 0					// M
+	mov r20=r1				// A			save r1
+
+	nop.m 0
+	movl r30=sys_call_table			// X
+
+	MOV_FROM_IIP(r28)			// M2 (2 cyc)
+	cmp.eq p0,p7=r18,r17			// I0 is this a system call?
+(p7)	br.cond.spnt non_syscall		// B  no ->
+	//
+	// From this point on, we are definitely on the syscall-path
+	// and we can use (non-banked) scratch registers.
+	//
+///////////////////////////////////////////////////////////////////////
+	mov r1=r16				// A    move task-pointer to "addl"-addressable reg
+	mov r2=r16				// A    setup r2 for ia64_syscall_setup
+	add r9=TI_FLAGS+IA64_TASK_SIZE,r16	// A	r9 = &current_thread_info()->flags
+
+	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
+	adds r15=-1024,r15			// A    subtract 1024 from syscall number
+	mov r3=NR_syscalls - 1
+	;;
+	ld1.bias r17=[r16]			// M0|1 r17 = current->thread.on_ustack flag
+	ld4 r9=[r9]				// M0|1 r9 = current_thread_info()->flags
+	extr.u r8=r29,41,2			// I0   extract ei field from cr.ipsr
+
+	shladd r30=r15,3,r30			// A    r30 = sys_call_table + 8*(syscall-1024)
+	addl r22=IA64_RBS_OFFSET,r1		// A    compute base of RBS
+	cmp.leu p6,p7=r15,r3			// A    syscall number in range?
+	;;
+
+	lfetch.fault.excl.nt1 [r22]		// M0|1 prefetch RBS
+(p6)	ld8 r30=[r30]				// M0|1 load address of syscall entry point
+	tnat.nz.or p7,p0=r15			// I0	is syscall nr a NaT?
+
+	mov.m ar.bspstore=r22			// M2   switch to kernel RBS
+	cmp.eq p8,p9=2,r8			// A    isr.ei==2?
+	;;
+
+(p8)	mov r8=0				// A    clear ei to 0
+(p7)	movl r30=sys_ni_syscall			// X
+
+(p8)	adds r28=16,r28				// A    switch cr.iip to next bundle
+(p9)	adds r8=1,r8				// A    increment ei to next slot
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	;;
+	mov b6=r30				// I0   setup syscall handler branch reg early
+#else
+	nop.i 0
+	;;
+#endif
+
+	mov.m r25=ar.unat			// M2 (5 cyc)
+	dep r29=r8,r29,41,2			// I0   insert new ei into cr.ipsr
+	adds r15=1024,r15			// A    restore original syscall number
+	//
+	// If any of the above loads miss in L1D, we'll stall here until
+	// the data arrives.
+	//
+///////////////////////////////////////////////////////////////////////
+	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	MOV_FROM_ITC(p0, p14, r30, r18)		// M    get cycle for accounting
+#else
+	mov b6=r30				// I0   setup syscall handler branch reg early
+#endif
+	cmp.eq pKStk,pUStk=r0,r17		// A    were we on kernel stacks already?
+
+	and r9=_TIF_SYSCALL_TRACEAUDIT,r9	// A    mask trace or audit
+	mov r18=ar.bsp				// M2 (12 cyc)
+(pKStk)	br.cond.spnt .break_fixup		// B	we're already in kernel-mode -- fix up RBS
+	;;
+.back_from_break_fixup:
+(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1 // A    compute base of memory stack
+	cmp.eq p14,p0=r9,r0			// A    are syscalls being traced/audited?
+	br.call.sptk.many b7=ia64_syscall_setup	// B
+1:
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	// mov.m r30=ar.itc is called in advance, and r13 is current
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13	// A
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13	// A
+(pKStk)	br.cond.spnt .skip_accounting		// B	unlikely skip
+	;;
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// M  get last stamp
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// M  time at leave
+	;;
+	ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME	// M  cumulated stime
+	ld8 r21=[r17]				// M  cumulated utime
+	sub r22=r19,r18				// A  stime before leave
+	;;
+	st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP	// M  update stamp
+	sub r18=r30,r19				// A  elapsed time in user
+	;;
+	add r20=r20,r22				// A  sum stime
+	add r21=r21,r18				// A  sum utime
+	;;
+	st8 [r16]=r20				// M  update stime
+	st8 [r17]=r21				// M  update utime
+	;;
+.skip_accounting:
+#endif
+	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
+	nop 0
+	BSW_1(r2, r14)				// B (6 cyc) regs are saved, switch to bank 1
+	;;
+
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r16)	// M2	now it's safe to re-enable intr.-collection
+						// M0   ensure interruption collection is on
+	movl r3=ia64_ret_from_syscall		// X
+	;;
+	mov rp=r3				// I0   set the real return addr
+(p10)	br.cond.spnt.many ia64_ret_from_syscall	// B    return if bad call-frame or r15 is a NaT
+
+	SSM_PSR_I(p15, p15, r16)		// M2   restore psr.i
+(p14)	br.call.sptk.many b6=b6			// B    invoke syscall-handker (ignore return addr)
+	br.cond.spnt.many ia64_trace_syscall	// B	do syscall-tracing thingamagic
+	// NOT REACHED
+///////////////////////////////////////////////////////////////////////
+	// On entry, we optimistically assumed that we're coming from user-space.
+	// For the rare cases where a system-call is done from within the kernel,
+	// we fix things up at this point:
+.break_fixup:
+	add r1=-IA64_PT_REGS_SIZE,sp		// A    allocate space for pt_regs structure
+	mov ar.rnat=r24				// M2	restore kernel's AR.RNAT
+	;;
+	mov ar.bspstore=r23			// M2	restore kernel's AR.BSPSTORE
+	br.cond.sptk .back_from_break_fixup
+END(break_fault)
+
+	.org ia64_ivt+0x3000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
+ENTRY(interrupt)
+	/* interrupt handler has become too big to fit this area. */
+	br.sptk.many __interrupt
+END(interrupt)
+
+	.org ia64_ivt+0x3400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3400 Entry 13 (size 64 bundles) Reserved
+	DBG_FAULT(13)
+	FAULT(13)
+
+	.org ia64_ivt+0x3800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3800 Entry 14 (size 64 bundles) Reserved
+	DBG_FAULT(14)
+	FAULT(14)
+
+	/*
+	 * There is no particular reason for this code to be here, other than that
+	 * there happens to be space here that would go unused otherwise.  If this
+	 * fault ever gets "unreserved", simply moved the following code to a more
+	 * suitable spot...
+	 *
+	 * ia64_syscall_setup() is a separate subroutine so that it can
+	 *	allocate stacked registers so it can safely demine any
+	 *	potential NaT values from the input registers.
+	 *
+	 * On entry:
+	 *	- executing on bank 0 or bank 1 register set (doesn't matter)
+	 *	-  r1: stack pointer
+	 *	-  r2: current task pointer
+	 *	-  r3: preserved
+	 *	- r11: original contents (saved ar.pfs to be saved)
+	 *	- r12: original contents (sp to be saved)
+	 *	- r13: original contents (tp to be saved)
+	 *	- r15: original contents (syscall # to be saved)
+	 *	- r18: saved bsp (after switching to kernel stack)
+	 *	- r19: saved b6
+	 *	- r20: saved r1 (gp)
+	 *	- r21: saved ar.fpsr
+	 *	- r22: kernel's register backing store base (krbs_base)
+	 *	- r23: saved ar.bspstore
+	 *	- r24: saved ar.rnat
+	 *	- r25: saved ar.unat
+	 *	- r26: saved ar.pfs
+	 *	- r27: saved ar.rsc
+	 *	- r28: saved cr.iip
+	 *	- r29: saved cr.ipsr
+	 *	- r30: ar.itc for accounting (don't touch)
+	 *	- r31: saved pr
+	 *	-  b0: original contents (to be saved)
+	 * On exit:
+	 *	-  p10: TRUE if syscall is invoked with more than 8 out
+	 *		registers or r15's Nat is true
+	 *	-  r1: kernel's gp
+	 *	-  r3: preserved (same as on entry)
+	 *	-  r8: -EINVAL if p10 is true
+	 *	- r12: points to kernel stack
+	 *	- r13: points to current task
+	 *	- r14: preserved (same as on entry)
+	 *	- p13: preserved
+	 *	- p15: TRUE if interrupts need to be re-enabled
+	 *	- ar.fpsr: set to kernel settings
+	 *	-  b6: preserved (same as on entry)
+	 */
+#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE
+GLOBAL_ENTRY(ia64_syscall_setup)
+#if PT(B6) != 0
+# error This code assumes that b6 is the first field in pt_regs.
+#endif
+	st8 [r1]=r19				// save b6
+	add r16=PT(CR_IPSR),r1			// initialize first base pointer
+	add r17=PT(R11),r1			// initialize second base pointer
+	;;
+	alloc r19=ar.pfs,8,0,0,0		// ensure in0-in7 are writable
+	st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR)	// save cr.ipsr
+	tnat.nz p8,p0=in0
+
+	st8.spill [r17]=r11,PT(CR_IIP)-PT(R11)	// save r11
+	tnat.nz p9,p0=in1
+(pKStk)	mov r18=r0				// make sure r18 isn't NaT
+	;;
+
+	st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS)	// save ar.pfs
+	st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP)	// save cr.iip
+	mov r28=b0				// save b0 (2 cyc)
+	;;
+
+	st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT)	// save ar.unat
+	dep r19=0,r19,38,26			// clear all bits but 0..37 [I0]
+(p8)	mov in0=-1
+	;;
+
+	st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS)	// store ar.pfs.pfm in cr.ifs
+	extr.u r11=r19,7,7	// I0		// get sol of ar.pfs
+	and r8=0x7f,r19		// A		// get sof of ar.pfs
+
+	st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc
+	tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0
+(p9)	mov in1=-1
+	;;
+
+(pUStk) sub r18=r18,r22				// r18=RSE.ndirty*8
+	tnat.nz p10,p0=in2
+	add r11=8,r11
+	;;
+(pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16		// skip over ar_rnat field
+(pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17	// skip over ar_bspstore field
+	tnat.nz p11,p0=in3
+	;;
+(p10)	mov in2=-1
+	tnat.nz p12,p0=in4				// [I0]
+(p11)	mov in3=-1
+	;;
+(pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT)	// save ar.rnat
+(pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE)	// save ar.bspstore
+	shl r18=r18,16				// compute ar.rsc to be used for "loadrs"
+	;;
+	st8 [r16]=r31,PT(LOADRS)-PT(PR)		// save predicates
+	st8 [r17]=r28,PT(R1)-PT(B0)		// save b0
+	tnat.nz p13,p0=in5				// [I0]
+	;;
+	st8 [r16]=r18,PT(R12)-PT(LOADRS)	// save ar.rsc value for "loadrs"
+	st8.spill [r17]=r20,PT(R13)-PT(R1)	// save original r1
+(p12)	mov in4=-1
+	;;
+
+.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12)	// save r12
+.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13)		// save r13
+(p13)	mov in5=-1
+	;;
+	st8 [r16]=r21,PT(R8)-PT(AR_FPSR)	// save ar.fpsr
+	tnat.nz p13,p0=in6
+	cmp.lt p10,p9=r11,r8	// frame size can't be more than local+8
+	;;
+	mov r8=1
+(p9)	tnat.nz p10,p0=r15
+	adds r12=-16,r1		// switch to kernel memory stack (with 16 bytes of scratch)
+
+	st8.spill [r17]=r15			// save r15
+	tnat.nz p8,p0=in7
+	nop.i 0
+
+	mov r13=r2				// establish `current'
+	movl r1=__gp				// establish kernel global pointer
+	;;
+	st8 [r16]=r8		// ensure pt_regs.r8 != 0 (see handle_syscall_error)
+(p13)	mov in6=-1
+(p8)	mov in7=-1
+
+	cmp.eq pSys,pNonSys=r0,r0		// set pSys=1, pNonSys=0
+	movl r17=FPSR_DEFAULT
+	;;
+	mov.m ar.fpsr=r17			// set ar.fpsr to kernel default value
+(p10)	mov r8=-EINVAL
+	br.ret.sptk.many b7
+END(ia64_syscall_setup)
+#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
+
+	.org ia64_ivt+0x3c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3c00 Entry 15 (size 64 bundles) Reserved
+	DBG_FAULT(15)
+	FAULT(15)
+
+	.org ia64_ivt+0x4000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4000 Entry 16 (size 64 bundles) Reserved
+	DBG_FAULT(16)
+	FAULT(16)
+
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE)
+	/*
+	 * There is no particular reason for this code to be here, other than
+	 * that there happens to be space here that would go unused otherwise.
+	 * If this fault ever gets "unreserved", simply moved the following
+	 * code to a more suitable spot...
+	 *
+	 * account_sys_enter is called from SAVE_MIN* macros if accounting is
+	 * enabled and if the macro is entered from user mode.
+	 */
+GLOBAL_ENTRY(account_sys_enter)
+	// mov.m r20=ar.itc is called in advance, and r13 is current
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13
+	;;
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// time at last check in kernel
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// time at left from kernel
+        ;;
+	ld8 r23=[r16],TI_AC_STAMP-TI_AC_STIME	// cumulated stime
+	ld8 r21=[r17]				// cumulated utime
+	sub r22=r19,r18				// stime before leave kernel
+	;;
+	st8 [r16]=r20,TI_AC_STIME-TI_AC_STAMP	// update stamp
+	sub r18=r20,r19				// elapsed time in user mode
+	;;
+	add r23=r23,r22				// sum stime
+	add r21=r21,r18				// sum utime
+	;;
+	st8 [r16]=r23				// update stime
+	st8 [r17]=r21				// update utime
+	;;
+	br.ret.sptk.many rp
+END(account_sys_enter)
+#endif
+
+	.org ia64_ivt+0x4400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4400 Entry 17 (size 64 bundles) Reserved
+	DBG_FAULT(17)
+	FAULT(17)
+
+	.org ia64_ivt+0x4800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4800 Entry 18 (size 64 bundles) Reserved
+	DBG_FAULT(18)
+	FAULT(18)
+
+	.org ia64_ivt+0x4c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4c00 Entry 19 (size 64 bundles) Reserved
+	DBG_FAULT(19)
+	FAULT(19)
+
+//
+// --- End of long entries, Beginning of short entries
+//
+
+	.org ia64_ivt+0x5000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
+ENTRY(page_not_present)
+	DBG_FAULT(20)
+	MOV_FROM_IFA(r16)
+	RSM_PSR_DT
+	/*
+	 * The Linux page fault handler doesn't expect non-present pages to be in
+	 * the TLB.  Flush the existing entry now, so we meet that expectation.
+	 */
+	mov r17=PAGE_SHIFT<<2
+	;;
+	ptc.l r16,r17
+	;;
+	mov r31=pr
+	srlz.d
+	br.sptk.many page_fault
+END(page_not_present)
+
+	.org ia64_ivt+0x5100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52)
+ENTRY(key_permission)
+	DBG_FAULT(21)
+	MOV_FROM_IFA(r16)
+	RSM_PSR_DT
+	mov r31=pr
+	;;
+	srlz.d
+	br.sptk.many page_fault
+END(key_permission)
+
+	.org ia64_ivt+0x5200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
+ENTRY(iaccess_rights)
+	DBG_FAULT(22)
+	MOV_FROM_IFA(r16)
+	RSM_PSR_DT
+	mov r31=pr
+	;;
+	srlz.d
+	br.sptk.many page_fault
+END(iaccess_rights)
+
+	.org ia64_ivt+0x5300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
+ENTRY(daccess_rights)
+	DBG_FAULT(23)
+	MOV_FROM_IFA(r16)
+	RSM_PSR_DT
+	mov r31=pr
+	;;
+	srlz.d
+	br.sptk.many page_fault
+END(daccess_rights)
+
+	.org ia64_ivt+0x5400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
+ENTRY(general_exception)
+	DBG_FAULT(24)
+	MOV_FROM_ISR(r16)
+	mov r31=pr
+	;;
+	cmp4.eq p6,p0=0,r16
+(p6)	br.sptk.many dispatch_illegal_op_fault
+	;;
+	mov r19=24		// fault number
+	br.sptk.many dispatch_to_fault_handler
+END(general_exception)
+
+	.org ia64_ivt+0x5500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
+ENTRY(disabled_fp_reg)
+	DBG_FAULT(25)
+	rsm psr.dfh		// ensure we can access fph
+	;;
+	srlz.d
+	mov r31=pr
+	mov r19=25
+	br.sptk.many dispatch_to_fault_handler
+END(disabled_fp_reg)
+
+	.org ia64_ivt+0x5600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
+ENTRY(nat_consumption)
+	DBG_FAULT(26)
+
+	MOV_FROM_IPSR(p0, r16)
+	MOV_FROM_ISR(r17)
+	mov r31=pr				// save PR
+	;;
+	and r18=0xf,r17				// r18 = cr.ipsr.code{3:0}
+	tbit.z p6,p0=r17,IA64_ISR_NA_BIT
+	;;
+	cmp.ne.or p6,p0=IA64_ISR_CODE_LFETCH,r18
+	dep r16=-1,r16,IA64_PSR_ED_BIT,1
+(p6)	br.cond.spnt 1f		// branch if (cr.ispr.na == 0 || cr.ipsr.code{3:0} != LFETCH)
+	;;
+	MOV_TO_IPSR(p0, r16, r18)
+	mov pr=r31,-1
+	;;
+	RFI
+
+1:	mov pr=r31,-1
+	;;
+	FAULT(26)
+END(nat_consumption)
+
+	.org ia64_ivt+0x5700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
+ENTRY(speculation_vector)
+	DBG_FAULT(27)
+	/*
+	 * A [f]chk.[as] instruction needs to take the branch to the recovery code but
+	 * this part of the architecture is not implemented in hardware on some CPUs, such
+	 * as Itanium.  Thus, in general we need to emulate the behavior.  IIM contains
+	 * the relative target (not yet sign extended).  So after sign extending it we
+	 * simply add it to IIP.  We also need to reset the EI field of the IPSR to zero,
+	 * i.e., the slot to restart into.
+	 *
+	 * cr.imm contains zero_ext(imm21)
+	 */
+	MOV_FROM_IIM(r18)
+	;;
+	MOV_FROM_IIP(r17)
+	shl r18=r18,43			// put sign bit in position (43=64-21)
+	;;
+
+	MOV_FROM_IPSR(p0, r16)
+	shr r18=r18,39			// sign extend (39=43-4)
+	;;
+
+	add r17=r17,r18			// now add the offset
+	;;
+	MOV_TO_IIP(r17, r19)
+	dep r16=0,r16,41,2		// clear EI
+	;;
+
+	MOV_TO_IPSR(p0, r16, r19)
+	;;
+
+	RFI
+END(speculation_vector)
+
+	.org ia64_ivt+0x5800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5800 Entry 28 (size 16 bundles) Reserved
+	DBG_FAULT(28)
+	FAULT(28)
+
+	.org ia64_ivt+0x5900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
+ENTRY(debug_vector)
+	DBG_FAULT(29)
+	FAULT(29)
+END(debug_vector)
+
+	.org ia64_ivt+0x5a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
+ENTRY(unaligned_access)
+	DBG_FAULT(30)
+	mov r31=pr		// prepare to save predicates
+	;;
+	br.sptk.many dispatch_unaligned_handler
+END(unaligned_access)
+
+	.org ia64_ivt+0x5b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
+ENTRY(unsupported_data_reference)
+	DBG_FAULT(31)
+	FAULT(31)
+END(unsupported_data_reference)
+
+	.org ia64_ivt+0x5c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
+ENTRY(floating_point_fault)
+	DBG_FAULT(32)
+	FAULT(32)
+END(floating_point_fault)
+
+	.org ia64_ivt+0x5d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
+ENTRY(floating_point_trap)
+	DBG_FAULT(33)
+	FAULT(33)
+END(floating_point_trap)
+
+	.org ia64_ivt+0x5e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
+ENTRY(lower_privilege_trap)
+	DBG_FAULT(34)
+	FAULT(34)
+END(lower_privilege_trap)
+
+	.org ia64_ivt+0x5f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
+ENTRY(taken_branch_trap)
+	DBG_FAULT(35)
+	FAULT(35)
+END(taken_branch_trap)
+
+	.org ia64_ivt+0x6000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
+ENTRY(single_step_trap)
+	DBG_FAULT(36)
+	FAULT(36)
+END(single_step_trap)
+
+	.org ia64_ivt+0x6100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6100 Entry 37 (size 16 bundles) Reserved
+	DBG_FAULT(37)
+	FAULT(37)
+
+	.org ia64_ivt+0x6200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6200 Entry 38 (size 16 bundles) Reserved
+	DBG_FAULT(38)
+	FAULT(38)
+
+	.org ia64_ivt+0x6300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6300 Entry 39 (size 16 bundles) Reserved
+	DBG_FAULT(39)
+	FAULT(39)
+
+	.org ia64_ivt+0x6400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6400 Entry 40 (size 16 bundles) Reserved
+	DBG_FAULT(40)
+	FAULT(40)
+
+	.org ia64_ivt+0x6500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6500 Entry 41 (size 16 bundles) Reserved
+	DBG_FAULT(41)
+	FAULT(41)
+
+	.org ia64_ivt+0x6600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6600 Entry 42 (size 16 bundles) Reserved
+	DBG_FAULT(42)
+	FAULT(42)
+
+	.org ia64_ivt+0x6700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6700 Entry 43 (size 16 bundles) Reserved
+	DBG_FAULT(43)
+	FAULT(43)
+
+	.org ia64_ivt+0x6800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6800 Entry 44 (size 16 bundles) Reserved
+	DBG_FAULT(44)
+	FAULT(44)
+
+	.org ia64_ivt+0x6900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
+ENTRY(ia32_exception)
+	DBG_FAULT(45)
+	FAULT(45)
+END(ia32_exception)
+
+	.org ia64_ivt+0x6a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
+ENTRY(ia32_intercept)
+	DBG_FAULT(46)
+	FAULT(46)
+END(ia32_intercept)
+
+	.org ia64_ivt+0x6b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt  (74)
+ENTRY(ia32_interrupt)
+	DBG_FAULT(47)
+	FAULT(47)
+END(ia32_interrupt)
+
+	.org ia64_ivt+0x6c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6c00 Entry 48 (size 16 bundles) Reserved
+	DBG_FAULT(48)
+	FAULT(48)
+
+	.org ia64_ivt+0x6d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6d00 Entry 49 (size 16 bundles) Reserved
+	DBG_FAULT(49)
+	FAULT(49)
+
+	.org ia64_ivt+0x6e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6e00 Entry 50 (size 16 bundles) Reserved
+	DBG_FAULT(50)
+	FAULT(50)
+
+	.org ia64_ivt+0x6f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6f00 Entry 51 (size 16 bundles) Reserved
+	DBG_FAULT(51)
+	FAULT(51)
+
+	.org ia64_ivt+0x7000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7000 Entry 52 (size 16 bundles) Reserved
+	DBG_FAULT(52)
+	FAULT(52)
+
+	.org ia64_ivt+0x7100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7100 Entry 53 (size 16 bundles) Reserved
+	DBG_FAULT(53)
+	FAULT(53)
+
+	.org ia64_ivt+0x7200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7200 Entry 54 (size 16 bundles) Reserved
+	DBG_FAULT(54)
+	FAULT(54)
+
+	.org ia64_ivt+0x7300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7300 Entry 55 (size 16 bundles) Reserved
+	DBG_FAULT(55)
+	FAULT(55)
+
+	.org ia64_ivt+0x7400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7400 Entry 56 (size 16 bundles) Reserved
+	DBG_FAULT(56)
+	FAULT(56)
+
+	.org ia64_ivt+0x7500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7500 Entry 57 (size 16 bundles) Reserved
+	DBG_FAULT(57)
+	FAULT(57)
+
+	.org ia64_ivt+0x7600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7600 Entry 58 (size 16 bundles) Reserved
+	DBG_FAULT(58)
+	FAULT(58)
+
+	.org ia64_ivt+0x7700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7700 Entry 59 (size 16 bundles) Reserved
+	DBG_FAULT(59)
+	FAULT(59)
+
+	.org ia64_ivt+0x7800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7800 Entry 60 (size 16 bundles) Reserved
+	DBG_FAULT(60)
+	FAULT(60)
+
+	.org ia64_ivt+0x7900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7900 Entry 61 (size 16 bundles) Reserved
+	DBG_FAULT(61)
+	FAULT(61)
+
+	.org ia64_ivt+0x7a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7a00 Entry 62 (size 16 bundles) Reserved
+	DBG_FAULT(62)
+	FAULT(62)
+
+	.org ia64_ivt+0x7b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7b00 Entry 63 (size 16 bundles) Reserved
+	DBG_FAULT(63)
+	FAULT(63)
+
+	.org ia64_ivt+0x7c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7c00 Entry 64 (size 16 bundles) Reserved
+	DBG_FAULT(64)
+	FAULT(64)
+
+	.org ia64_ivt+0x7d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7d00 Entry 65 (size 16 bundles) Reserved
+	DBG_FAULT(65)
+	FAULT(65)
+
+	.org ia64_ivt+0x7e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7e00 Entry 66 (size 16 bundles) Reserved
+	DBG_FAULT(66)
+	FAULT(66)
+
+	.org ia64_ivt+0x7f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7f00 Entry 67 (size 16 bundles) Reserved
+	DBG_FAULT(67)
+	FAULT(67)
+
+	//-----------------------------------------------------------------------------------
+	// call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address)
+ENTRY(page_fault)
+	SSM_PSR_DT_AND_SRLZ_I
+	;;
+	SAVE_MIN_WITH_COVER
+	alloc r15=ar.pfs,0,0,3,0
+	MOV_FROM_IFA(out0)
+	MOV_FROM_ISR(out1)
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r14, r3)
+	adds r3=8,r2				// set up second base pointer
+	SSM_PSR_I(p15, p15, r14)		// restore psr.i
+	movl r14=ia64_leave_kernel
+	;;
+	SAVE_REST
+	mov rp=r14
+	;;
+	adds out2=16,r12			// out2 = pointer to pt_regs
+	br.call.sptk.many b6=ia64_do_page_fault	// ignore return address
+END(page_fault)
+
+ENTRY(non_syscall)
+	mov ar.rsc=r27			// restore ar.rsc before SAVE_MIN_WITH_COVER
+	;;
+	SAVE_MIN_WITH_COVER
+
+	// There is no particular reason for this code to be here, other than that
+	// there happens to be space here that would go unused otherwise.  If this
+	// fault ever gets "unreserved", simply moved the following code to a more
+	// suitable spot...
+
+	alloc r14=ar.pfs,0,0,2,0
+	MOV_FROM_IIM(out0)
+	add out1=16,sp
+	adds r3=8,r2			// set up second base pointer for SAVE_REST
+
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r15, r24)
+					// guarantee that interruption collection is on
+	SSM_PSR_I(p15, p15, r15)	// restore psr.i
+	movl r15=ia64_leave_kernel
+	;;
+	SAVE_REST
+	mov rp=r15
+	;;
+	br.call.sptk.many b6=ia64_bad_break	// avoid WAW on CFM and ignore return addr
+END(non_syscall)
+
+ENTRY(__interrupt)
+	DBG_FAULT(12)
+	mov r31=pr		// prepare to save predicates
+	;;
+	SAVE_MIN_WITH_COVER	// uses r31; defines r2 and r3
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r14)
+				// ensure everybody knows psr.ic is back on
+	adds r3=8,r2		// set up second base pointer for SAVE_REST
+	;;
+	SAVE_REST
+	;;
+	MCA_RECOVER_RANGE(interrupt)
+	alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
+	MOV_FROM_IVR(out0, r8)	// pass cr.ivr as first arg
+	add out1=16,sp		// pass pointer to pt_regs as second arg
+	;;
+	srlz.d			// make sure we see the effect of cr.ivr
+	movl r14=ia64_leave_kernel
+	;;
+	mov rp=r14
+	br.call.sptk.many b6=ia64_handle_irq
+END(__interrupt)
+
+	/*
+	 * There is no particular reason for this code to be here, other than that
+	 * there happens to be space here that would go unused otherwise.  If this
+	 * fault ever gets "unreserved", simply moved the following code to a more
+	 * suitable spot...
+	 */
+
+ENTRY(dispatch_unaligned_handler)
+	SAVE_MIN_WITH_COVER
+	;;
+	alloc r14=ar.pfs,0,0,2,0		// now it's safe (must be first in insn group!)
+	MOV_FROM_IFA(out0)
+	adds out1=16,sp
+
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24)
+						// guarantee that interruption collection is on
+	SSM_PSR_I(p15, p15, r3)			// restore psr.i
+	adds r3=8,r2				// set up second base pointer
+	;;
+	SAVE_REST
+	movl r14=ia64_leave_kernel
+	;;
+	mov rp=r14
+	br.sptk.many ia64_prepare_handle_unaligned
+END(dispatch_unaligned_handler)
+
+	/*
+	 * There is no particular reason for this code to be here, other than that
+	 * there happens to be space here that would go unused otherwise.  If this
+	 * fault ever gets "unreserved", simply moved the following code to a more
+	 * suitable spot...
+	 */
+
+ENTRY(dispatch_to_fault_handler)
+	/*
+	 * Input:
+	 *	psr.ic:	off
+	 *	r19:	fault vector number (e.g., 24 for General Exception)
+	 *	r31:	contains saved predicates (pr)
+	 */
+	SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,5,0
+	MOV_FROM_ISR(out1)
+	MOV_FROM_IFA(out2)
+	MOV_FROM_IIM(out3)
+	MOV_FROM_ITIR(out4)
+	;;
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, out0)
+						// guarantee that interruption collection is on
+	mov out0=r15
+	;;
+	SSM_PSR_I(p15, p15, r3)			// restore psr.i
+	adds r3=8,r2				// set up second base pointer for SAVE_REST
+	;;
+	SAVE_REST
+	movl r14=ia64_leave_kernel
+	;;
+	mov rp=r14
+	br.call.sptk.many b6=ia64_fault
+END(dispatch_to_fault_handler)
+
+	/*
+	 * Squatting in this space ...
+	 *
+	 * This special case dispatcher for illegal operation faults allows preserved
+	 * registers to be modified through a callback function (asm only) that is handed
+	 * back from the fault handler in r8. Up to three arguments can be passed to the
+	 * callback function by returning an aggregate with the callback as its first
+	 * element, followed by the arguments.
+	 */
+ENTRY(dispatch_illegal_op_fault)
+	.prologue
+	.body
+	SAVE_MIN_WITH_COVER
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24)
+				// guarantee that interruption collection is on
+	;;
+	SSM_PSR_I(p15, p15, r3)	// restore psr.i
+	adds r3=8,r2	// set up second base pointer for SAVE_REST
+	;;
+	alloc r14=ar.pfs,0,0,1,0	// must be first in insn group
+	mov out0=ar.ec
+	;;
+	SAVE_REST
+	PT_REGS_UNWIND_INFO(0)
+	;;
+	br.call.sptk.many rp=ia64_illegal_op_fault
+.ret0:	;;
+	alloc r14=ar.pfs,0,0,3,0	// must be first in insn group
+	mov out0=r9
+	mov out1=r10
+	mov out2=r11
+	movl r15=ia64_leave_kernel
+	;;
+	mov rp=r15
+	mov b6=r8
+	;;
+	cmp.ne p6,p0=0,r8
+(p6)	br.call.dpnt.many b6=b6		// call returns to ia64_leave_kernel
+	br.sptk.many ia64_leave_kernel
+END(dispatch_illegal_op_fault)
diff --git a/arch/ia64/kernel/jprobes.S b/arch/ia64/kernel/jprobes.S
new file mode 100644
index 00000000..f69389c7
--- /dev/null
+++ b/arch/ia64/kernel/jprobes.S
@@ -0,0 +1,90 @@
+/*
+ * Jprobe specific operations
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) Intel Corporation, 2005
+ *
+ * 2005-May     Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy
+ *              <anil.s.keshavamurthy@intel.com> initial implementation
+ *
+ * Jprobes (a.k.a. "jump probes" which is built on-top of kprobes) allow a
+ * probe to be inserted into the beginning of a function call.  The fundamental
+ * difference between a jprobe and a kprobe is the jprobe handler is executed
+ * in the same context as the target function, while the kprobe handlers
+ * are executed in interrupt context.
+ *
+ * For jprobes we initially gain control by placing a break point in the
+ * first instruction of the targeted function.  When we catch that specific
+ * break, we:
+ *        * set the return address to our jprobe_inst_return() function
+ *        * jump to the jprobe handler function
+ *
+ * Since we fixed up the return address, the jprobe handler will return to our
+ * jprobe_inst_return() function, giving us control again.  At this point we
+ * are back in the parents frame marker, so we do yet another call to our
+ * jprobe_break() function to fix up the frame marker as it would normally
+ * exist in the target function.
+ *
+ * Our jprobe_return function then transfers control back to kprobes.c by
+ * executing a break instruction using one of our reserved numbers.  When we
+ * catch that break in kprobes.c, we continue like we do for a normal kprobe
+ * by single stepping the emulated instruction, and then returning execution
+ * to the correct location.
+ */
+#include <asm/asmmacro.h>
+#include <asm/break.h>
+
+	/*
+	 * void jprobe_break(void)
+	 */
+	.section .kprobes.text, "ax"
+ENTRY(jprobe_break)
+	break.m __IA64_BREAK_JPROBE
+END(jprobe_break)
+
+	/*
+	 * void jprobe_inst_return(void)
+	 */
+GLOBAL_ENTRY(jprobe_inst_return)
+	br.call.sptk.many b0=jprobe_break
+END(jprobe_inst_return)
+
+GLOBAL_ENTRY(invalidate_stacked_regs)
+	movl r16=invalidate_restore_cfm
+	;;
+	mov b6=r16
+	;;
+	br.ret.sptk.many b6
+	;;
+invalidate_restore_cfm:
+	mov r16=ar.rsc
+	;;
+	mov ar.rsc=r0
+	;;
+	loadrs
+	;;
+	mov ar.rsc=r16
+	;;
+	br.cond.sptk.many rp
+END(invalidate_stacked_regs)
+
+GLOBAL_ENTRY(flush_register_stack)
+	// flush dirty regs to backing store (must be first in insn group)
+	flushrs
+	;;
+	br.ret.sptk.many rp
+END(flush_register_stack)
+
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
new file mode 100644
index 00000000..7026b29e
--- /dev/null
+++ b/arch/ia64/kernel/kprobes.c
@@ -0,0 +1,1129 @@
+/*
+ *  Kernel Probes (KProbes)
+ *  arch/ia64/kernel/kprobes.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ * Copyright (C) Intel Corporation, 2005
+ *
+ * 2005-Apr     Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy
+ *              <anil.s.keshavamurthy@intel.com> adapted from i386
+ */
+
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/preempt.h>
+#include <linux/moduleloader.h>
+#include <linux/kdebug.h>
+
+#include <asm/pgtable.h>
+#include <asm/sections.h>
+#include <asm/uaccess.h>
+
+extern void jprobe_inst_return(void);
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
+
+enum instruction_type {A, I, M, F, B, L, X, u};
+static enum instruction_type bundle_encoding[32][3] = {
+  { M, I, I },				/* 00 */
+  { M, I, I },				/* 01 */
+  { M, I, I },				/* 02 */
+  { M, I, I },				/* 03 */
+  { M, L, X },				/* 04 */
+  { M, L, X },				/* 05 */
+  { u, u, u },  			/* 06 */
+  { u, u, u },  			/* 07 */
+  { M, M, I },				/* 08 */
+  { M, M, I },				/* 09 */
+  { M, M, I },				/* 0A */
+  { M, M, I },				/* 0B */
+  { M, F, I },				/* 0C */
+  { M, F, I },				/* 0D */
+  { M, M, F },				/* 0E */
+  { M, M, F },				/* 0F */
+  { M, I, B },				/* 10 */
+  { M, I, B },				/* 11 */
+  { M, B, B },				/* 12 */
+  { M, B, B },				/* 13 */
+  { u, u, u },  			/* 14 */
+  { u, u, u },  			/* 15 */
+  { B, B, B },				/* 16 */
+  { B, B, B },				/* 17 */
+  { M, M, B },				/* 18 */
+  { M, M, B },				/* 19 */
+  { u, u, u },  			/* 1A */
+  { u, u, u },  			/* 1B */
+  { M, F, B },				/* 1C */
+  { M, F, B },				/* 1D */
+  { u, u, u },  			/* 1E */
+  { u, u, u },  			/* 1F */
+};
+
+/* Insert a long branch code */
+static void __kprobes set_brl_inst(void *from, void *to)
+{
+	s64 rel = ((s64) to - (s64) from) >> 4;
+	bundle_t *brl;
+	brl = (bundle_t *) ((u64) from & ~0xf);
+	brl->quad0.template = 0x05;	/* [MLX](stop) */
+	brl->quad0.slot0 = NOP_M_INST;	/* nop.m 0x0 */
+	brl->quad0.slot1_p0 = ((rel >> 20) & 0x7fffffffff) << 2;
+	brl->quad1.slot1_p1 = (((rel >> 20) & 0x7fffffffff) << 2) >> (64 - 46);
+	/* brl.cond.sptk.many.clr rel<<4 (qp=0) */
+	brl->quad1.slot2 = BRL_INST(rel >> 59, rel & 0xfffff);
+}
+
+/*
+ * In this function we check to see if the instruction
+ * is IP relative instruction and update the kprobe
+ * inst flag accordingly
+ */
+static void __kprobes update_kprobe_inst_flag(uint template, uint  slot,
+					      uint major_opcode,
+					      unsigned long kprobe_inst,
+					      struct kprobe *p)
+{
+	p->ainsn.inst_flag = 0;
+	p->ainsn.target_br_reg = 0;
+	p->ainsn.slot = slot;
+
+	/* Check for Break instruction
+	 * Bits 37:40 Major opcode to be zero
+	 * Bits 27:32 X6 to be zero
+	 * Bits 32:35 X3 to be zero
+	 */
+	if ((!major_opcode) && (!((kprobe_inst >> 27) & 0x1FF)) ) {
+		/* is a break instruction */
+	 	p->ainsn.inst_flag |= INST_FLAG_BREAK_INST;
+		return;
+	}
+
+	if (bundle_encoding[template][slot] == B) {
+		switch (major_opcode) {
+		  case INDIRECT_CALL_OPCODE:
+	 		p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG;
+			p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7);
+			break;
+		  case IP_RELATIVE_PREDICT_OPCODE:
+		  case IP_RELATIVE_BRANCH_OPCODE:
+			p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR;
+			break;
+		  case IP_RELATIVE_CALL_OPCODE:
+			p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR;
+			p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG;
+			p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7);
+			break;
+		}
+	} else if (bundle_encoding[template][slot] == X) {
+		switch (major_opcode) {
+		  case LONG_CALL_OPCODE:
+			p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG;
+			p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7);
+		  break;
+		}
+	}
+	return;
+}
+
+/*
+ * In this function we check to see if the instruction
+ * (qp) cmpx.crel.ctype p1,p2=r2,r3
+ * on which we are inserting kprobe is cmp instruction
+ * with ctype as unc.
+ */
+static uint __kprobes is_cmp_ctype_unc_inst(uint template, uint slot,
+					    uint major_opcode,
+					    unsigned long kprobe_inst)
+{
+	cmp_inst_t cmp_inst;
+	uint ctype_unc = 0;
+
+	if (!((bundle_encoding[template][slot] == I) ||
+		(bundle_encoding[template][slot] == M)))
+		goto out;
+
+	if (!((major_opcode == 0xC) || (major_opcode == 0xD) ||
+		(major_opcode == 0xE)))
+		goto out;
+
+	cmp_inst.l = kprobe_inst;
+	if ((cmp_inst.f.x2 == 0) || (cmp_inst.f.x2 == 1)) {
+		/* Integer compare - Register Register (A6 type)*/
+		if ((cmp_inst.f.tb == 0) && (cmp_inst.f.ta == 0)
+				&&(cmp_inst.f.c == 1))
+			ctype_unc = 1;
+	} else if ((cmp_inst.f.x2 == 2)||(cmp_inst.f.x2 == 3)) {
+		/* Integer compare - Immediate Register (A8 type)*/
+		if ((cmp_inst.f.ta == 0) &&(cmp_inst.f.c == 1))
+			ctype_unc = 1;
+	}
+out:
+	return ctype_unc;
+}
+
+/*
+ * In this function we check to see if the instruction
+ * on which we are inserting kprobe is supported.
+ * Returns qp value if supported
+ * Returns -EINVAL if unsupported
+ */
+static int __kprobes unsupported_inst(uint template, uint  slot,
+				      uint major_opcode,
+				      unsigned long kprobe_inst,
+				      unsigned long addr)
+{
+	int qp;
+
+	qp = kprobe_inst & 0x3f;
+	if (is_cmp_ctype_unc_inst(template, slot, major_opcode, kprobe_inst)) {
+		if (slot == 1 && qp)  {
+			printk(KERN_WARNING "Kprobes on cmp unc "
+					"instruction on slot 1 at <0x%lx> "
+					"is not supported\n", addr);
+			return -EINVAL;
+
+		}
+		qp = 0;
+	}
+	else if (bundle_encoding[template][slot] == I) {
+		if (major_opcode == 0) {
+			/*
+			 * Check for Integer speculation instruction
+			 * - Bit 33-35 to be equal to 0x1
+			 */
+			if (((kprobe_inst >> 33) & 0x7) == 1) {
+				printk(KERN_WARNING
+					"Kprobes on speculation inst at <0x%lx> not supported\n",
+						addr);
+				return -EINVAL;
+			}
+			/*
+			 * IP relative mov instruction
+			 *  - Bit 27-35 to be equal to 0x30
+			 */
+			if (((kprobe_inst >> 27) & 0x1FF) == 0x30) {
+				printk(KERN_WARNING
+					"Kprobes on \"mov r1=ip\" at <0x%lx> not supported\n",
+						addr);
+				return -EINVAL;
+
+			}
+		}
+		else if ((major_opcode == 5) &&	!(kprobe_inst & (0xFUl << 33)) &&
+				(kprobe_inst & (0x1UL << 12))) {
+			/* test bit instructions, tbit,tnat,tf
+			 * bit 33-36 to be equal to 0
+			 * bit 12 to be equal to 1
+			 */
+			if (slot == 1 && qp) {
+				printk(KERN_WARNING "Kprobes on test bit "
+						"instruction on slot at <0x%lx> "
+						"is not supported\n", addr);
+				return -EINVAL;
+			}
+			qp = 0;
+		}
+	}
+	else if (bundle_encoding[template][slot] == B) {
+		if (major_opcode == 7) {
+			/* IP-Relative Predict major code is 7 */
+			printk(KERN_WARNING "Kprobes on IP-Relative"
+					"Predict is not supported\n");
+			return -EINVAL;
+		}
+		else if (major_opcode == 2) {
+			/* Indirect Predict, major code is 2
+			 * bit 27-32 to be equal to 10 or 11
+			 */
+			int x6=(kprobe_inst >> 27) & 0x3F;
+			if ((x6 == 0x10) || (x6 == 0x11)) {
+				printk(KERN_WARNING "Kprobes on "
+					"Indirect Predict is not supported\n");
+				return -EINVAL;
+			}
+		}
+	}
+	/* kernel does not use float instruction, here for safety kprobe
+	 * will judge whether it is fcmp/flass/float approximation instruction
+	 */
+	else if (unlikely(bundle_encoding[template][slot] == F)) {
+		if ((major_opcode == 4 || major_opcode == 5) &&
+				(kprobe_inst  & (0x1 << 12))) {
+			/* fcmp/fclass unc instruction */
+			if (slot == 1 && qp) {
+				printk(KERN_WARNING "Kprobes on fcmp/fclass "
+					"instruction on slot at <0x%lx> "
+					"is not supported\n", addr);
+				return -EINVAL;
+
+			}
+			qp = 0;
+		}
+		if ((major_opcode == 0 || major_opcode == 1) &&
+			(kprobe_inst & (0x1UL << 33))) {
+			/* float Approximation instruction */
+			if (slot == 1 && qp) {
+				printk(KERN_WARNING "Kprobes on float Approx "
+					"instr at <0x%lx> is not supported\n",
+						addr);
+				return -EINVAL;
+			}
+			qp = 0;
+		}
+	}
+	return qp;
+}
+
+/*
+ * In this function we override the bundle with
+ * the break instruction at the given slot.
+ */
+static void __kprobes prepare_break_inst(uint template, uint  slot,
+					 uint major_opcode,
+					 unsigned long kprobe_inst,
+					 struct kprobe *p,
+					 int qp)
+{
+	unsigned long break_inst = BREAK_INST;
+	bundle_t *bundle = &p->opcode.bundle;
+
+	/*
+	 * Copy the original kprobe_inst qualifying predicate(qp)
+	 * to the break instruction
+	 */
+	break_inst |= qp;
+
+	switch (slot) {
+	  case 0:
+		bundle->quad0.slot0 = break_inst;
+		break;
+	  case 1:
+		bundle->quad0.slot1_p0 = break_inst;
+		bundle->quad1.slot1_p1 = break_inst >> (64-46);
+		break;
+	  case 2:
+		bundle->quad1.slot2 = break_inst;
+		break;
+	}
+
+	/*
+	 * Update the instruction flag, so that we can
+	 * emulate the instruction properly after we
+	 * single step on original instruction
+	 */
+	update_kprobe_inst_flag(template, slot, major_opcode, kprobe_inst, p);
+}
+
+static void __kprobes get_kprobe_inst(bundle_t *bundle, uint slot,
+	       	unsigned long *kprobe_inst, uint *major_opcode)
+{
+	unsigned long kprobe_inst_p0, kprobe_inst_p1;
+	unsigned int template;
+
+	template = bundle->quad0.template;
+
+	switch (slot) {
+	  case 0:
+		*major_opcode = (bundle->quad0.slot0 >> SLOT0_OPCODE_SHIFT);
+		*kprobe_inst = bundle->quad0.slot0;
+		  break;
+	  case 1:
+		*major_opcode = (bundle->quad1.slot1_p1 >> SLOT1_p1_OPCODE_SHIFT);
+		kprobe_inst_p0 = bundle->quad0.slot1_p0;
+		kprobe_inst_p1 = bundle->quad1.slot1_p1;
+		*kprobe_inst = kprobe_inst_p0 | (kprobe_inst_p1 << (64-46));
+		break;
+	  case 2:
+		*major_opcode = (bundle->quad1.slot2 >> SLOT2_OPCODE_SHIFT);
+		*kprobe_inst = bundle->quad1.slot2;
+		break;
+	}
+}
+
+/* Returns non-zero if the addr is in the Interrupt Vector Table */
+static int __kprobes in_ivt_functions(unsigned long addr)
+{
+	return (addr >= (unsigned long)__start_ivt_text
+		&& addr < (unsigned long)__end_ivt_text);
+}
+
+static int __kprobes valid_kprobe_addr(int template, int slot,
+				       unsigned long addr)
+{
+	if ((slot > 2) || ((bundle_encoding[template][1] == L) && slot > 1)) {
+		printk(KERN_WARNING "Attempting to insert unaligned kprobe "
+				"at 0x%lx\n", addr);
+		return -EINVAL;
+	}
+
+	if (in_ivt_functions(addr)) {
+		printk(KERN_WARNING "Kprobes can't be inserted inside "
+				"IVT functions at 0x%lx\n", addr);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	unsigned int i;
+	i = atomic_add_return(1, &kcb->prev_kprobe_index);
+	kcb->prev_kprobe[i-1].kp = kprobe_running();
+	kcb->prev_kprobe[i-1].status = kcb->kprobe_status;
+}
+
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	unsigned int i;
+	i = atomic_read(&kcb->prev_kprobe_index);
+	__get_cpu_var(current_kprobe) = kcb->prev_kprobe[i-1].kp;
+	kcb->kprobe_status = kcb->prev_kprobe[i-1].status;
+	atomic_sub(1, &kcb->prev_kprobe_index);
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p,
+			struct kprobe_ctlblk *kcb)
+{
+	__get_cpu_var(current_kprobe) = p;
+}
+
+static void kretprobe_trampoline(void)
+{
+}
+
+/*
+ * At this point the target function has been tricked into
+ * returning into our trampoline.  Lookup the associated instance
+ * and then:
+ *    - call the handler function
+ *    - cleanup by marking the instance as unused
+ *    - long jump back to the original return address
+ */
+int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kretprobe_instance *ri = NULL;
+	struct hlist_head *head, empty_rp;
+	struct hlist_node *node, *tmp;
+	unsigned long flags, orig_ret_address = 0;
+	unsigned long trampoline_address =
+		((struct fnptr *)kretprobe_trampoline)->ip;
+
+	INIT_HLIST_HEAD(&empty_rp);
+	kretprobe_hash_lock(current, &head, &flags);
+
+	/*
+	 * It is possible to have multiple instances associated with a given
+	 * task either because an multiple functions in the call path
+	 * have a return probe installed on them, and/or more than one return
+	 * return probe was registered for a target function.
+	 *
+	 * We can handle this because:
+	 *     - instances are always inserted at the head of the list
+	 *     - when multiple return probes are registered for the same
+	 *       function, the first instance's ret_addr will point to the
+	 *       real return address, and all the rest will point to
+	 *       kretprobe_trampoline
+	 */
+	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+		if (ri->task != current)
+			/* another task is sharing our hash bucket */
+			continue;
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
+	}
+
+	regs->cr_iip = orig_ret_address;
+
+	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+		if (ri->task != current)
+			/* another task is sharing our hash bucket */
+			continue;
+
+		if (ri->rp && ri->rp->handler)
+			ri->rp->handler(ri, regs);
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
+		recycle_rp_inst(ri, &empty_rp);
+
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
+	}
+
+	kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
+	reset_current_kprobe();
+	kretprobe_hash_unlock(current, &flags);
+	preempt_enable_no_resched();
+
+	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+		hlist_del(&ri->hlist);
+		kfree(ri);
+	}
+	/*
+	 * By returning a non-zero value, we are telling
+	 * kprobe_handler() that we don't want the post_handler
+	 * to run (and have re-enabled preemption)
+	 */
+	return 1;
+}
+
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+				      struct pt_regs *regs)
+{
+	ri->ret_addr = (kprobe_opcode_t *)regs->b0;
+
+	/* Replace the return addr with trampoline addr */
+	regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip;
+}
+
+/* Check the instruction in the slot is break */
+static int __kprobes __is_ia64_break_inst(bundle_t *bundle, uint slot)
+{
+	unsigned int major_opcode;
+	unsigned int template = bundle->quad0.template;
+	unsigned long kprobe_inst;
+
+	/* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
+	if (slot == 1 && bundle_encoding[template][1] == L)
+		slot++;
+
+	/* Get Kprobe probe instruction at given slot*/
+	get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode);
+
+	/* For break instruction,
+	 * Bits 37:40 Major opcode to be zero
+	 * Bits 27:32 X6 to be zero
+	 * Bits 32:35 X3 to be zero
+	 */
+	if (major_opcode || ((kprobe_inst >> 27) & 0x1FF)) {
+		/* Not a break instruction */
+		return 0;
+	}
+
+	/* Is a break instruction */
+	return 1;
+}
+
+/*
+ * In this function, we check whether the target bundle modifies IP or
+ * it triggers an exception. If so, it cannot be boostable.
+ */
+static int __kprobes can_boost(bundle_t *bundle, uint slot,
+			       unsigned long bundle_addr)
+{
+	unsigned int template = bundle->quad0.template;
+
+	do {
+		if (search_exception_tables(bundle_addr + slot) ||
+		    __is_ia64_break_inst(bundle, slot))
+			return 0;	/* exception may occur in this bundle*/
+	} while ((++slot) < 3);
+	template &= 0x1e;
+	if (template >= 0x10 /* including B unit */ ||
+	    template == 0x04 /* including X unit */ ||
+	    template == 0x06) /* undefined */
+		return 0;
+
+	return 1;
+}
+
+/* Prepare long jump bundle and disables other boosters if need */
+static void __kprobes prepare_booster(struct kprobe *p)
+{
+	unsigned long addr = (unsigned long)p->addr & ~0xFULL;
+	unsigned int slot = (unsigned long)p->addr & 0xf;
+	struct kprobe *other_kp;
+
+	if (can_boost(&p->ainsn.insn[0].bundle, slot, addr)) {
+		set_brl_inst(&p->ainsn.insn[1].bundle, (bundle_t *)addr + 1);
+		p->ainsn.inst_flag |= INST_FLAG_BOOSTABLE;
+	}
+
+	/* disables boosters in previous slots */
+	for (; addr < (unsigned long)p->addr; addr++) {
+		other_kp = get_kprobe((void *)addr);
+		if (other_kp)
+			other_kp->ainsn.inst_flag &= ~INST_FLAG_BOOSTABLE;
+	}
+}
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+	unsigned long addr = (unsigned long) p->addr;
+	unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL);
+	unsigned long kprobe_inst=0;
+	unsigned int slot = addr & 0xf, template, major_opcode = 0;
+	bundle_t *bundle;
+	int qp;
+
+	bundle = &((kprobe_opcode_t *)kprobe_addr)->bundle;
+	template = bundle->quad0.template;
+
+	if(valid_kprobe_addr(template, slot, addr))
+		return -EINVAL;
+
+	/* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
+	if (slot == 1 && bundle_encoding[template][1] == L)
+		slot++;
+
+	/* Get kprobe_inst and major_opcode from the bundle */
+	get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode);
+
+	qp = unsupported_inst(template, slot, major_opcode, kprobe_inst, addr);
+	if (qp < 0)
+		return -EINVAL;
+
+	p->ainsn.insn = get_insn_slot();
+	if (!p->ainsn.insn)
+		return -ENOMEM;
+	memcpy(&p->opcode, kprobe_addr, sizeof(kprobe_opcode_t));
+	memcpy(p->ainsn.insn, kprobe_addr, sizeof(kprobe_opcode_t));
+
+	prepare_break_inst(template, slot, major_opcode, kprobe_inst, p, qp);
+
+	prepare_booster(p);
+
+	return 0;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+	unsigned long arm_addr;
+	bundle_t *src, *dest;
+
+	arm_addr = ((unsigned long)p->addr) & ~0xFUL;
+	dest = &((kprobe_opcode_t *)arm_addr)->bundle;
+	src = &p->opcode.bundle;
+
+	flush_icache_range((unsigned long)p->ainsn.insn,
+			   (unsigned long)p->ainsn.insn +
+			   sizeof(kprobe_opcode_t) * MAX_INSN_SIZE);
+
+	switch (p->ainsn.slot) {
+		case 0:
+			dest->quad0.slot0 = src->quad0.slot0;
+			break;
+		case 1:
+			dest->quad1.slot1_p1 = src->quad1.slot1_p1;
+			break;
+		case 2:
+			dest->quad1.slot2 = src->quad1.slot2;
+			break;
+	}
+	flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+	unsigned long arm_addr;
+	bundle_t *src, *dest;
+
+	arm_addr = ((unsigned long)p->addr) & ~0xFUL;
+	dest = &((kprobe_opcode_t *)arm_addr)->bundle;
+	/* p->ainsn.insn contains the original unaltered kprobe_opcode_t */
+	src = &p->ainsn.insn->bundle;
+	switch (p->ainsn.slot) {
+		case 0:
+			dest->quad0.slot0 = src->quad0.slot0;
+			break;
+		case 1:
+			dest->quad1.slot1_p1 = src->quad1.slot1_p1;
+			break;
+		case 2:
+			dest->quad1.slot2 = src->quad1.slot2;
+			break;
+	}
+	flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+	if (p->ainsn.insn) {
+		free_insn_slot(p->ainsn.insn,
+			       p->ainsn.inst_flag & INST_FLAG_BOOSTABLE);
+		p->ainsn.insn = NULL;
+	}
+}
+/*
+ * We are resuming execution after a single step fault, so the pt_regs
+ * structure reflects the register state after we executed the instruction
+ * located in the kprobe (p->ainsn.insn->bundle).  We still need to adjust
+ * the ip to point back to the original stack address. To set the IP address
+ * to original stack address, handle the case where we need to fixup the
+ * relative IP address and/or fixup branch register.
+ */
+static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
+{
+	unsigned long bundle_addr = (unsigned long) (&p->ainsn.insn->bundle);
+	unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL;
+	unsigned long template;
+	int slot = ((unsigned long)p->addr & 0xf);
+
+	template = p->ainsn.insn->bundle.quad0.template;
+
+	if (slot == 1 && bundle_encoding[template][1] == L)
+		slot = 2;
+
+	if (p->ainsn.inst_flag & ~INST_FLAG_BOOSTABLE) {
+
+		if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) {
+			/* Fix relative IP address */
+			regs->cr_iip = (regs->cr_iip - bundle_addr) +
+					resume_addr;
+		}
+
+		if (p->ainsn.inst_flag & INST_FLAG_FIX_BRANCH_REG) {
+		/*
+		 * Fix target branch register, software convention is
+		 * to use either b0 or b6 or b7, so just checking
+		 * only those registers
+		 */
+			switch (p->ainsn.target_br_reg) {
+			case 0:
+				if ((regs->b0 == bundle_addr) ||
+					(regs->b0 == bundle_addr + 0x10)) {
+					regs->b0 = (regs->b0 - bundle_addr) +
+						resume_addr;
+				}
+				break;
+			case 6:
+				if ((regs->b6 == bundle_addr) ||
+					(regs->b6 == bundle_addr + 0x10)) {
+					regs->b6 = (regs->b6 - bundle_addr) +
+						resume_addr;
+				}
+				break;
+			case 7:
+				if ((regs->b7 == bundle_addr) ||
+					(regs->b7 == bundle_addr + 0x10)) {
+					regs->b7 = (regs->b7 - bundle_addr) +
+						resume_addr;
+				}
+				break;
+			} /* end switch */
+		}
+		goto turn_ss_off;
+	}
+
+	if (slot == 2) {
+		if (regs->cr_iip == bundle_addr + 0x10) {
+			regs->cr_iip = resume_addr + 0x10;
+		}
+	} else {
+		if (regs->cr_iip == bundle_addr) {
+			regs->cr_iip = resume_addr;
+		}
+	}
+
+turn_ss_off:
+	/* Turn off Single Step bit */
+	ia64_psr(regs)->ss = 0;
+}
+
+static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs)
+{
+	unsigned long bundle_addr = (unsigned long) &p->ainsn.insn->bundle;
+	unsigned long slot = (unsigned long)p->addr & 0xf;
+
+	/* single step inline if break instruction */
+	if (p->ainsn.inst_flag == INST_FLAG_BREAK_INST)
+		regs->cr_iip = (unsigned long)p->addr & ~0xFULL;
+	else
+		regs->cr_iip = bundle_addr & ~0xFULL;
+
+	if (slot > 2)
+		slot = 0;
+
+	ia64_psr(regs)->ri = slot;
+
+	/* turn on single stepping */
+	ia64_psr(regs)->ss = 1;
+}
+
+static int __kprobes is_ia64_break_inst(struct pt_regs *regs)
+{
+	unsigned int slot = ia64_psr(regs)->ri;
+	unsigned long *kprobe_addr = (unsigned long *)regs->cr_iip;
+	bundle_t bundle;
+
+	memcpy(&bundle, kprobe_addr, sizeof(bundle_t));
+
+	return __is_ia64_break_inst(&bundle, slot);
+}
+
+static int __kprobes pre_kprobes_handler(struct die_args *args)
+{
+	struct kprobe *p;
+	int ret = 0;
+	struct pt_regs *regs = args->regs;
+	kprobe_opcode_t *addr = (kprobe_opcode_t *)instruction_pointer(regs);
+	struct kprobe_ctlblk *kcb;
+
+	/*
+	 * We don't want to be preempted for the entire
+	 * duration of kprobe processing
+	 */
+	preempt_disable();
+	kcb = get_kprobe_ctlblk();
+
+	/* Handle recursion cases */
+	if (kprobe_running()) {
+		p = get_kprobe(addr);
+		if (p) {
+			if ((kcb->kprobe_status == KPROBE_HIT_SS) &&
+	 		     (p->ainsn.inst_flag == INST_FLAG_BREAK_INST)) {
+				ia64_psr(regs)->ss = 0;
+				goto no_kprobe;
+			}
+			/* We have reentered the pre_kprobe_handler(), since
+			 * another probe was hit while within the handler.
+			 * We here save the original kprobes variables and
+			 * just single step on the instruction of the new probe
+			 * without calling any user handlers.
+			 */
+			save_previous_kprobe(kcb);
+			set_current_kprobe(p, kcb);
+			kprobes_inc_nmissed_count(p);
+			prepare_ss(p, regs);
+			kcb->kprobe_status = KPROBE_REENTER;
+			return 1;
+		} else if (args->err == __IA64_BREAK_JPROBE) {
+			/*
+			 * jprobe instrumented function just completed
+			 */
+			p = __get_cpu_var(current_kprobe);
+			if (p->break_handler && p->break_handler(p, regs)) {
+				goto ss_probe;
+			}
+		} else if (!is_ia64_break_inst(regs)) {
+			/* The breakpoint instruction was removed by
+			 * another cpu right after we hit, no further
+			 * handling of this interrupt is appropriate
+			 */
+			ret = 1;
+			goto no_kprobe;
+		} else {
+			/* Not our break */
+			goto no_kprobe;
+		}
+	}
+
+	p = get_kprobe(addr);
+	if (!p) {
+		if (!is_ia64_break_inst(regs)) {
+			/*
+			 * The breakpoint instruction was removed right
+			 * after we hit it.  Another cpu has removed
+			 * either a probepoint or a debugger breakpoint
+			 * at this address.  In either case, no further
+			 * handling of this interrupt is appropriate.
+			 */
+			ret = 1;
+
+		}
+
+		/* Not one of our break, let kernel handle it */
+		goto no_kprobe;
+	}
+
+	set_current_kprobe(p, kcb);
+	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+	if (p->pre_handler && p->pre_handler(p, regs))
+		/*
+		 * Our pre-handler is specifically requesting that we just
+		 * do a return.  This is used for both the jprobe pre-handler
+		 * and the kretprobe trampoline
+		 */
+		return 1;
+
+ss_probe:
+#if !defined(CONFIG_PREEMPT)
+	if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) {
+		/* Boost up -- we can execute copied instructions directly */
+		ia64_psr(regs)->ri = p->ainsn.slot;
+		regs->cr_iip = (unsigned long)&p->ainsn.insn->bundle & ~0xFULL;
+		/* turn single stepping off */
+		ia64_psr(regs)->ss = 0;
+
+		reset_current_kprobe();
+		preempt_enable_no_resched();
+		return 1;
+	}
+#endif
+	prepare_ss(p, regs);
+	kcb->kprobe_status = KPROBE_HIT_SS;
+	return 1;
+
+no_kprobe:
+	preempt_enable_no_resched();
+	return ret;
+}
+
+static int __kprobes post_kprobes_handler(struct pt_regs *regs)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	if (!cur)
+		return 0;
+
+	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		cur->post_handler(cur, regs, 0);
+	}
+
+	resume_execution(cur, regs);
+
+	/*Restore back the original saved kprobes variables and continue. */
+	if (kcb->kprobe_status == KPROBE_REENTER) {
+		restore_previous_kprobe(kcb);
+		goto out;
+	}
+	reset_current_kprobe();
+
+out:
+	preempt_enable_no_resched();
+	return 1;
+}
+
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+
+	switch(kcb->kprobe_status) {
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+		/*
+		 * We are here because the instruction being single
+		 * stepped caused a page fault. We reset the current
+		 * kprobe and the instruction pointer points back to
+		 * the probe address and allow the page fault handler
+		 * to continue as a normal page fault.
+		 */
+		regs->cr_iip = ((unsigned long)cur->addr) & ~0xFULL;
+		ia64_psr(regs)->ri = ((unsigned long)cur->addr) & 0xf;
+		if (kcb->kprobe_status == KPROBE_REENTER)
+			restore_previous_kprobe(kcb);
+		else
+			reset_current_kprobe();
+		preempt_enable_no_resched();
+		break;
+	case KPROBE_HIT_ACTIVE:
+	case KPROBE_HIT_SSDONE:
+		/*
+		 * We increment the nmissed count for accounting,
+		 * we can also use npre/npostfault count for accouting
+		 * these specific fault cases.
+		 */
+		kprobes_inc_nmissed_count(cur);
+
+		/*
+		 * We come here because instructions in the pre/post
+		 * handler caused the page_fault, this could happen
+		 * if handler tries to access user space by
+		 * copy_from_user(), get_user() etc. Let the
+		 * user-specified handler try to fix it first.
+		 */
+		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+			return 1;
+		/*
+		 * In case the user-specified fault handler returned
+		 * zero, try to fix up.
+		 */
+		if (ia64_done_with_exception(regs))
+			return 1;
+
+		/*
+		 * Let ia64_do_page_fault() fix it.
+		 */
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+				       unsigned long val, void *data)
+{
+	struct die_args *args = (struct die_args *)data;
+	int ret = NOTIFY_DONE;
+
+	if (args->regs && user_mode(args->regs))
+		return ret;
+
+	switch(val) {
+	case DIE_BREAK:
+		/* err is break number from ia64_bad_break() */
+		if ((args->err >> 12) == (__IA64_BREAK_KPROBE >> 12)
+			|| args->err == __IA64_BREAK_JPROBE
+			|| args->err == 0)
+			if (pre_kprobes_handler(args))
+				ret = NOTIFY_STOP;
+		break;
+	case DIE_FAULT:
+		/* err is vector number from ia64_fault() */
+		if (args->err == 36)
+			if (post_kprobes_handler(args->regs))
+				ret = NOTIFY_STOP;
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
+struct param_bsp_cfm {
+	unsigned long ip;
+	unsigned long *bsp;
+	unsigned long cfm;
+};
+
+static void ia64_get_bsp_cfm(struct unw_frame_info *info, void *arg)
+{
+	unsigned long ip;
+	struct param_bsp_cfm *lp = arg;
+
+	do {
+		unw_get_ip(info, &ip);
+		if (ip == 0)
+			break;
+		if (ip == lp->ip) {
+			unw_get_bsp(info, (unsigned long*)&lp->bsp);
+			unw_get_cfm(info, (unsigned long*)&lp->cfm);
+			return;
+		}
+	} while (unw_unwind(info) >= 0);
+	lp->bsp = NULL;
+	lp->cfm = 0;
+	return;
+}
+
+unsigned long arch_deref_entry_point(void *entry)
+{
+	return ((struct fnptr *)entry)->ip;
+}
+
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct jprobe *jp = container_of(p, struct jprobe, kp);
+	unsigned long addr = arch_deref_entry_point(jp->entry);
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	struct param_bsp_cfm pa;
+	int bytes;
+
+	/*
+	 * Callee owns the argument space and could overwrite it, eg
+	 * tail call optimization. So to be absolutely safe
+	 * we save the argument space before transferring the control
+	 * to instrumented jprobe function which runs in
+	 * the process context
+	 */
+	pa.ip = regs->cr_iip;
+	unw_init_running(ia64_get_bsp_cfm, &pa);
+	bytes = (char *)ia64_rse_skip_regs(pa.bsp, pa.cfm & 0x3f)
+				- (char *)pa.bsp;
+	memcpy( kcb->jprobes_saved_stacked_regs,
+		pa.bsp,
+		bytes );
+	kcb->bsp = pa.bsp;
+	kcb->cfm = pa.cfm;
+
+	/* save architectural state */
+	kcb->jprobe_saved_regs = *regs;
+
+	/* after rfi, execute the jprobe instrumented function */
+	regs->cr_iip = addr & ~0xFULL;
+	ia64_psr(regs)->ri = addr & 0xf;
+	regs->r1 = ((struct fnptr *)(jp->entry))->gp;
+
+	/*
+	 * fix the return address to our jprobe_inst_return() function
+	 * in the jprobes.S file
+	 */
+	regs->b0 = ((struct fnptr *)(jprobe_inst_return))->ip;
+
+	return 1;
+}
+
+/* ia64 does not need this */
+void __kprobes jprobe_return(void)
+{
+}
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	int bytes;
+
+	/* restoring architectural state */
+	*regs = kcb->jprobe_saved_regs;
+
+	/* restoring the original argument space */
+	flush_register_stack();
+	bytes = (char *)ia64_rse_skip_regs(kcb->bsp, kcb->cfm & 0x3f)
+				- (char *)kcb->bsp;
+	memcpy( kcb->bsp,
+		kcb->jprobes_saved_stacked_regs,
+		bytes );
+	invalidate_stacked_regs();
+
+	preempt_enable_no_resched();
+	return 1;
+}
+
+static struct kprobe trampoline_p = {
+	.pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+	trampoline_p.addr =
+		(kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip;
+	return register_kprobe(&trampoline_p);
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+	if (p->addr ==
+		(kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip)
+		return 1;
+
+	return 0;
+}
diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
new file mode 100644
index 00000000..3d3aeef4
--- /dev/null
+++ b/arch/ia64/kernel/machine_kexec.c
@@ -0,0 +1,169 @@
+/*
+ * arch/ia64/kernel/machine_kexec.c
+ *
+ * Handle transition of Linux booting another kernel
+ * Copyright (C) 2005 Hewlett-Packard Development Comapny, L.P.
+ * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2006 Intel Corp, Zou Nan hai <nanhai.zou@intel.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/cpu.h>
+#include <linux/irq.h>
+#include <linux/efi.h>
+#include <linux/numa.h>
+#include <linux/mmzone.h>
+
+#include <asm/numa.h>
+#include <asm/mmu_context.h>
+#include <asm/setup.h>
+#include <asm/delay.h>
+#include <asm/meminit.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/mca.h>
+
+typedef NORET_TYPE void (*relocate_new_kernel_t)(
+					unsigned long indirection_page,
+					unsigned long start_address,
+					struct ia64_boot_param *boot_param,
+					unsigned long pal_addr) ATTRIB_NORET;
+
+struct kimage *ia64_kimage;
+
+struct resource efi_memmap_res = {
+        .name  = "EFI Memory Map",
+        .start = 0,
+        .end   = 0,
+        .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+struct resource boot_param_res = {
+        .name  = "Boot parameter",
+        .start = 0,
+        .end   = 0,
+        .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+
+/*
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+	void *control_code_buffer;
+	const unsigned long *func;
+
+	func = (unsigned long *)&relocate_new_kernel;
+	/* Pre-load control code buffer to minimize work in kexec path */
+	control_code_buffer = page_address(image->control_code_page);
+	memcpy((void *)control_code_buffer, (const void *)func[0],
+			relocate_new_kernel_size);
+	flush_icache_range((unsigned long)control_code_buffer,
+			(unsigned long)control_code_buffer + relocate_new_kernel_size);
+	ia64_kimage = image;
+
+	return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+static void ia64_machine_kexec(struct unw_frame_info *info, void *arg)
+{
+	struct kimage *image = arg;
+	relocate_new_kernel_t rnk;
+	void *pal_addr = efi_get_pal_addr();
+	unsigned long code_addr = (unsigned long)page_address(image->control_code_page);
+	int ii;
+	u64 fp, gp;
+	ia64_fptr_t *init_handler = (ia64_fptr_t *)ia64_os_init_on_kdump;
+
+	BUG_ON(!image);
+	if (image->type == KEXEC_TYPE_CRASH) {
+		crash_save_this_cpu();
+		current->thread.ksp = (__u64)info->sw - 16;
+
+		/* Register noop init handler */
+		fp = ia64_tpa(init_handler->fp);
+		gp = ia64_tpa(ia64_getreg(_IA64_REG_GP));
+		ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, fp, gp, 0, fp, gp, 0);
+	} else {
+		/* Unregister init handlers of current kernel */
+		ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, 0, 0, 0, 0, 0, 0);
+	}
+
+	/* Unregister mca handler - No more recovery on current kernel */
+	ia64_sal_set_vectors(SAL_VECTOR_OS_MCA, 0, 0, 0, 0, 0, 0);
+
+	/* Interrupts aren't acceptable while we reboot */
+	local_irq_disable();
+
+	/* Mask CMC and Performance Monitor interrupts */
+	ia64_setreg(_IA64_REG_CR_PMV, 1 << 16);
+	ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16);
+
+	/* Mask ITV and Local Redirect Registers */
+	ia64_set_itv(1 << 16);
+	ia64_set_lrr0(1 << 16);
+	ia64_set_lrr1(1 << 16);
+
+	/* terminate possible nested in-service interrupts */
+	for (ii = 0; ii < 16; ii++)
+		ia64_eoi();
+
+	/* unmask TPR and clear any pending interrupts */
+	ia64_setreg(_IA64_REG_CR_TPR, 0);
+	ia64_srlz_d();
+	while (ia64_get_ivr() != IA64_SPURIOUS_INT_VECTOR)
+		ia64_eoi();
+	platform_kernel_launch_event();
+	rnk = (relocate_new_kernel_t)&code_addr;
+	(*rnk)(image->head, image->start, ia64_boot_param,
+		     GRANULEROUNDDOWN((unsigned long) pal_addr));
+	BUG();
+}
+
+void machine_kexec(struct kimage *image)
+{
+	BUG_ON(!image);
+	unw_init_running(ia64_machine_kexec, image);
+	for(;;);
+}
+
+void arch_crash_save_vmcoreinfo(void)
+{
+#if defined(CONFIG_DISCONTIGMEM) || defined(CONFIG_SPARSEMEM)
+	VMCOREINFO_SYMBOL(pgdat_list);
+	VMCOREINFO_LENGTH(pgdat_list, MAX_NUMNODES);
+#endif
+#ifdef CONFIG_NUMA
+	VMCOREINFO_SYMBOL(node_memblk);
+	VMCOREINFO_LENGTH(node_memblk, NR_NODE_MEMBLKS);
+	VMCOREINFO_STRUCT_SIZE(node_memblk_s);
+	VMCOREINFO_OFFSET(node_memblk_s, start_paddr);
+	VMCOREINFO_OFFSET(node_memblk_s, size);
+#endif
+#ifdef CONFIG_PGTABLE_3
+	VMCOREINFO_CONFIG(PGTABLE_3);
+#elif  CONFIG_PGTABLE_4
+	VMCOREINFO_CONFIG(PGTABLE_4);
+#endif
+}
+
+unsigned long paddr_vmcoreinfo_note(void)
+{
+	return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note);
+}
+
diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c
new file mode 100644
index 00000000..d41a40ef
--- /dev/null
+++ b/arch/ia64/kernel/machvec.c
@@ -0,0 +1,91 @@
+#include <linux/module.h>
+#include <linux/dma-mapping.h>
+#include <asm/machvec.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_IA64_GENERIC
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+#include <asm/page.h>
+
+struct ia64_machine_vector ia64_mv;
+EXPORT_SYMBOL(ia64_mv);
+
+static struct ia64_machine_vector * __init
+lookup_machvec (const char *name)
+{
+	extern struct ia64_machine_vector machvec_start[];
+	extern struct ia64_machine_vector machvec_end[];
+	struct ia64_machine_vector *mv;
+
+	for (mv = machvec_start; mv < machvec_end; ++mv)
+		if (strcmp (mv->name, name) == 0)
+			return mv;
+
+	return 0;
+}
+
+void __init
+machvec_init (const char *name)
+{
+	struct ia64_machine_vector *mv;
+
+	if (!name)
+		name = acpi_get_sysname();
+	mv = lookup_machvec(name);
+	if (!mv)
+		panic("generic kernel failed to find machine vector for"
+		      " platform %s!", name);
+
+	ia64_mv = *mv;
+	printk(KERN_INFO "booting generic kernel on platform %s\n", name);
+}
+
+void __init
+machvec_init_from_cmdline(const char *cmdline)
+{
+	char str[64];
+	const char *start;
+	char *end;
+
+	if (! (start = strstr(cmdline, "machvec=")) )
+		return machvec_init(NULL);
+
+	strlcpy(str, start + strlen("machvec="), sizeof(str));
+	if ( (end = strchr(str, ' ')) )
+		*end = '\0';
+
+	return machvec_init(str);
+}
+
+#endif /* CONFIG_IA64_GENERIC */
+
+void
+machvec_setup (char **arg)
+{
+}
+EXPORT_SYMBOL(machvec_setup);
+
+void
+machvec_timer_interrupt (int irq, void *dev_id)
+{
+}
+EXPORT_SYMBOL(machvec_timer_interrupt);
+
+void
+machvec_dma_sync_single(struct device *hwdev, dma_addr_t dma_handle, size_t size,
+			enum dma_data_direction dir)
+{
+	mb();
+}
+EXPORT_SYMBOL(machvec_dma_sync_single);
+
+void
+machvec_dma_sync_sg(struct device *hwdev, struct scatterlist *sg, int n,
+		    enum dma_data_direction dir)
+{
+	mb();
+}
+EXPORT_SYMBOL(machvec_dma_sync_sg);
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
new file mode 100644
index 00000000..84fb405e
--- /dev/null
+++ b/arch/ia64/kernel/mca.c
@@ -0,0 +1,2158 @@
+/*
+ * File:	mca.c
+ * Purpose:	Generic MCA handling layer
+ *
+ * Copyright (C) 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Copyright (C) 2002 Dell Inc.
+ * Copyright (C) Matt Domsch <Matt_Domsch@dell.com>
+ *
+ * Copyright (C) 2002 Intel
+ * Copyright (C) Jenna Hall <jenna.s.hall@intel.com>
+ *
+ * Copyright (C) 2001 Intel
+ * Copyright (C) Fred Lewis <frederick.v.lewis@intel.com>
+ *
+ * Copyright (C) 2000 Intel
+ * Copyright (C) Chuck Fleckenstein <cfleck@co.intel.com>
+ *
+ * Copyright (C) 1999, 2004-2008 Silicon Graphics, Inc.
+ * Copyright (C) Vijay Chander <vijay@engr.sgi.com>
+ *
+ * Copyright (C) 2006 FUJITSU LIMITED
+ * Copyright (C) Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+ *
+ * 2000-03-29 Chuck Fleckenstein <cfleck@co.intel.com>
+ *	      Fixed PAL/SAL update issues, began MCA bug fixes, logging issues,
+ *	      added min save state dump, added INIT handler.
+ *
+ * 2001-01-03 Fred Lewis <frederick.v.lewis@intel.com>
+ *	      Added setup of CMCI and CPEI IRQs, logging of corrected platform
+ *	      errors, completed code for logging of corrected & uncorrected
+ *	      machine check errors, and updated for conformance with Nov. 2000
+ *	      revision of the SAL 3.0 spec.
+ *
+ * 2002-01-04 Jenna Hall <jenna.s.hall@intel.com>
+ *	      Aligned MCA stack to 16 bytes, added platform vs. CPU error flag,
+ *	      set SAL default return values, changed error record structure to
+ *	      linked list, added init call to sal_get_state_info_size().
+ *
+ * 2002-03-25 Matt Domsch <Matt_Domsch@dell.com>
+ *	      GUID cleanups.
+ *
+ * 2003-04-15 David Mosberger-Tang <davidm@hpl.hp.com>
+ *	      Added INIT backtrace support.
+ *
+ * 2003-12-08 Keith Owens <kaos@sgi.com>
+ *	      smp_call_function() must not be called from interrupt context
+ *	      (can deadlock on tasklist_lock).
+ *	      Use keventd to call smp_call_function().
+ *
+ * 2004-02-01 Keith Owens <kaos@sgi.com>
+ *	      Avoid deadlock when using printk() for MCA and INIT records.
+ *	      Delete all record printing code, moved to salinfo_decode in user
+ *	      space.  Mark variables and functions static where possible.
+ *	      Delete dead variables and functions.  Reorder to remove the need
+ *	      for forward declarations and to consolidate related code.
+ *
+ * 2005-08-12 Keith Owens <kaos@sgi.com>
+ *	      Convert MCA/INIT handlers to use per event stacks and SAL/OS
+ *	      state.
+ *
+ * 2005-10-07 Keith Owens <kaos@sgi.com>
+ *	      Add notify_die() hooks.
+ *
+ * 2006-09-15 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+ *	      Add printing support for MCA/INIT.
+ *
+ * 2007-04-27 Russ Anderson <rja@sgi.com>
+ *	      Support multiple cpus going through OS_MCA in the same event.
+ */
+#include <linux/jiffies.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/bootmem.h>
+#include <linux/acpi.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/workqueue.h>
+#include <linux/cpumask.h>
+#include <linux/kdebug.h>
+#include <linux/cpu.h>
+#include <linux/gfp.h>
+
+#include <asm/delay.h>
+#include <asm/machvec.h>
+#include <asm/meminit.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/sal.h>
+#include <asm/mca.h>
+#include <asm/kexec.h>
+
+#include <asm/irq.h>
+#include <asm/hw_irq.h>
+#include <asm/tlb.h>
+
+#include "mca_drv.h"
+#include "entry.h"
+
+#if defined(IA64_MCA_DEBUG_INFO)
+# define IA64_MCA_DEBUG(fmt...)	printk(fmt)
+#else
+# define IA64_MCA_DEBUG(fmt...)
+#endif
+
+#define NOTIFY_INIT(event, regs, arg, spin)				\
+do {									\
+	if ((notify_die((event), "INIT", (regs), (arg), 0, 0)		\
+			== NOTIFY_STOP) && ((spin) == 1))		\
+		ia64_mca_spin(__func__);				\
+} while (0)
+
+#define NOTIFY_MCA(event, regs, arg, spin)				\
+do {									\
+	if ((notify_die((event), "MCA", (regs), (arg), 0, 0)		\
+			== NOTIFY_STOP) && ((spin) == 1))		\
+		ia64_mca_spin(__func__);				\
+} while (0)
+
+/* Used by mca_asm.S */
+DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */
+DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */
+DEFINE_PER_CPU(u64, ia64_mca_pal_pte);	    /* PTE to map PAL code */
+DEFINE_PER_CPU(u64, ia64_mca_pal_base);    /* vaddr PAL code granule */
+DEFINE_PER_CPU(u64, ia64_mca_tr_reload);   /* Flag for TR reload */
+
+unsigned long __per_cpu_mca[NR_CPUS];
+
+/* In mca_asm.S */
+extern void			ia64_os_init_dispatch_monarch (void);
+extern void			ia64_os_init_dispatch_slave (void);
+
+static int monarch_cpu = -1;
+
+static ia64_mc_info_t		ia64_mc_info;
+
+#define MAX_CPE_POLL_INTERVAL (15*60*HZ) /* 15 minutes */
+#define MIN_CPE_POLL_INTERVAL (2*60*HZ)  /* 2 minutes */
+#define CMC_POLL_INTERVAL     (1*60*HZ)  /* 1 minute */
+#define CPE_HISTORY_LENGTH    5
+#define CMC_HISTORY_LENGTH    5
+
+#ifdef CONFIG_ACPI
+static struct timer_list cpe_poll_timer;
+#endif
+static struct timer_list cmc_poll_timer;
+/*
+ * This variable tells whether we are currently in polling mode.
+ * Start with this in the wrong state so we won't play w/ timers
+ * before the system is ready.
+ */
+static int cmc_polling_enabled = 1;
+
+/*
+ * Clearing this variable prevents CPE polling from getting activated
+ * in mca_late_init.  Use it if your system doesn't provide a CPEI,
+ * but encounters problems retrieving CPE logs.  This should only be
+ * necessary for debugging.
+ */
+static int cpe_poll_enabled = 1;
+
+extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe);
+
+static int mca_init __initdata;
+
+/*
+ * limited & delayed printing support for MCA/INIT handler
+ */
+
+#define mprintk(fmt...) ia64_mca_printk(fmt)
+
+#define MLOGBUF_SIZE (512+256*NR_CPUS)
+#define MLOGBUF_MSGMAX 256
+static char mlogbuf[MLOGBUF_SIZE];
+static DEFINE_SPINLOCK(mlogbuf_wlock);	/* mca context only */
+static DEFINE_SPINLOCK(mlogbuf_rlock);	/* normal context only */
+static unsigned long mlogbuf_start;
+static unsigned long mlogbuf_end;
+static unsigned int mlogbuf_finished = 0;
+static unsigned long mlogbuf_timestamp = 0;
+
+static int loglevel_save = -1;
+#define BREAK_LOGLEVEL(__console_loglevel)		\
+	oops_in_progress = 1;				\
+	if (loglevel_save < 0)				\
+		loglevel_save = __console_loglevel;	\
+	__console_loglevel = 15;
+
+#define RESTORE_LOGLEVEL(__console_loglevel)		\
+	if (loglevel_save >= 0) {			\
+		__console_loglevel = loglevel_save;	\
+		loglevel_save = -1;			\
+	}						\
+	mlogbuf_finished = 0;				\
+	oops_in_progress = 0;
+
+/*
+ * Push messages into buffer, print them later if not urgent.
+ */
+void ia64_mca_printk(const char *fmt, ...)
+{
+	va_list args;
+	int printed_len;
+	char temp_buf[MLOGBUF_MSGMAX];
+	char *p;
+
+	va_start(args, fmt);
+	printed_len = vscnprintf(temp_buf, sizeof(temp_buf), fmt, args);
+	va_end(args);
+
+	/* Copy the output into mlogbuf */
+	if (oops_in_progress) {
+		/* mlogbuf was abandoned, use printk directly instead. */
+		printk(temp_buf);
+	} else {
+		spin_lock(&mlogbuf_wlock);
+		for (p = temp_buf; *p; p++) {
+			unsigned long next = (mlogbuf_end + 1) % MLOGBUF_SIZE;
+			if (next != mlogbuf_start) {
+				mlogbuf[mlogbuf_end] = *p;
+				mlogbuf_end = next;
+			} else {
+				/* buffer full */
+				break;
+			}
+		}
+		mlogbuf[mlogbuf_end] = '\0';
+		spin_unlock(&mlogbuf_wlock);
+	}
+}
+EXPORT_SYMBOL(ia64_mca_printk);
+
+/*
+ * Print buffered messages.
+ *  NOTE: call this after returning normal context. (ex. from salinfod)
+ */
+void ia64_mlogbuf_dump(void)
+{
+	char temp_buf[MLOGBUF_MSGMAX];
+	char *p;
+	unsigned long index;
+	unsigned long flags;
+	unsigned int printed_len;
+
+	/* Get output from mlogbuf */
+	while (mlogbuf_start != mlogbuf_end) {
+		temp_buf[0] = '\0';
+		p = temp_buf;
+		printed_len = 0;
+
+		spin_lock_irqsave(&mlogbuf_rlock, flags);
+
+		index = mlogbuf_start;
+		while (index != mlogbuf_end) {
+			*p = mlogbuf[index];
+			index = (index + 1) % MLOGBUF_SIZE;
+			if (!*p)
+				break;
+			p++;
+			if (++printed_len >= MLOGBUF_MSGMAX - 1)
+				break;
+		}
+		*p = '\0';
+		if (temp_buf[0])
+			printk(temp_buf);
+		mlogbuf_start = index;
+
+		mlogbuf_timestamp = 0;
+		spin_unlock_irqrestore(&mlogbuf_rlock, flags);
+	}
+}
+EXPORT_SYMBOL(ia64_mlogbuf_dump);
+
+/*
+ * Call this if system is going to down or if immediate flushing messages to
+ * console is required. (ex. recovery was failed, crash dump is going to be
+ * invoked, long-wait rendezvous etc.)
+ *  NOTE: this should be called from monarch.
+ */
+static void ia64_mlogbuf_finish(int wait)
+{
+	BREAK_LOGLEVEL(console_loglevel);
+
+	spin_lock_init(&mlogbuf_rlock);
+	ia64_mlogbuf_dump();
+	printk(KERN_EMERG "mlogbuf_finish: printing switched to urgent mode, "
+		"MCA/INIT might be dodgy or fail.\n");
+
+	if (!wait)
+		return;
+
+	/* wait for console */
+	printk("Delaying for 5 seconds...\n");
+	udelay(5*1000000);
+
+	mlogbuf_finished = 1;
+}
+
+/*
+ * Print buffered messages from INIT context.
+ */
+static void ia64_mlogbuf_dump_from_init(void)
+{
+	if (mlogbuf_finished)
+		return;
+
+	if (mlogbuf_timestamp &&
+			time_before(jiffies, mlogbuf_timestamp + 30 * HZ)) {
+		printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT "
+			" and the system seems to be messed up.\n");
+		ia64_mlogbuf_finish(0);
+		return;
+	}
+
+	if (!spin_trylock(&mlogbuf_rlock)) {
+		printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT. "
+			"Generated messages other than stack dump will be "
+			"buffered to mlogbuf and will be printed later.\n");
+		printk(KERN_ERR "INIT: If messages would not printed after "
+			"this INIT, wait 30sec and assert INIT again.\n");
+		if (!mlogbuf_timestamp)
+			mlogbuf_timestamp = jiffies;
+		return;
+	}
+	spin_unlock(&mlogbuf_rlock);
+	ia64_mlogbuf_dump();
+}
+
+static void inline
+ia64_mca_spin(const char *func)
+{
+	if (monarch_cpu == smp_processor_id())
+		ia64_mlogbuf_finish(0);
+	mprintk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func);
+	while (1)
+		cpu_relax();
+}
+/*
+ * IA64_MCA log support
+ */
+#define IA64_MAX_LOGS		2	/* Double-buffering for nested MCAs */
+#define IA64_MAX_LOG_TYPES      4   /* MCA, INIT, CMC, CPE */
+
+typedef struct ia64_state_log_s
+{
+	spinlock_t	isl_lock;
+	int		isl_index;
+	unsigned long	isl_count;
+	ia64_err_rec_t  *isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */
+} ia64_state_log_t;
+
+static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES];
+
+#define IA64_LOG_ALLOCATE(it, size) \
+	{ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = \
+		(ia64_err_rec_t *)alloc_bootmem(size); \
+	ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = \
+		(ia64_err_rec_t *)alloc_bootmem(size);}
+#define IA64_LOG_LOCK_INIT(it) spin_lock_init(&ia64_state_log[it].isl_lock)
+#define IA64_LOG_LOCK(it)      spin_lock_irqsave(&ia64_state_log[it].isl_lock, s)
+#define IA64_LOG_UNLOCK(it)    spin_unlock_irqrestore(&ia64_state_log[it].isl_lock,s)
+#define IA64_LOG_NEXT_INDEX(it)    ia64_state_log[it].isl_index
+#define IA64_LOG_CURR_INDEX(it)    1 - ia64_state_log[it].isl_index
+#define IA64_LOG_INDEX_INC(it) \
+    {ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index; \
+    ia64_state_log[it].isl_count++;}
+#define IA64_LOG_INDEX_DEC(it) \
+    ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index
+#define IA64_LOG_NEXT_BUFFER(it)   (void *)((ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)]))
+#define IA64_LOG_CURR_BUFFER(it)   (void *)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)]))
+#define IA64_LOG_COUNT(it)         ia64_state_log[it].isl_count
+
+/*
+ * ia64_log_init
+ *	Reset the OS ia64 log buffer
+ * Inputs   :   info_type   (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE})
+ * Outputs	:	None
+ */
+static void __init
+ia64_log_init(int sal_info_type)
+{
+	u64	max_size = 0;
+
+	IA64_LOG_NEXT_INDEX(sal_info_type) = 0;
+	IA64_LOG_LOCK_INIT(sal_info_type);
+
+	// SAL will tell us the maximum size of any error record of this type
+	max_size = ia64_sal_get_state_info_size(sal_info_type);
+	if (!max_size)
+		/* alloc_bootmem() doesn't like zero-sized allocations! */
+		return;
+
+	// set up OS data structures to hold error info
+	IA64_LOG_ALLOCATE(sal_info_type, max_size);
+	memset(IA64_LOG_CURR_BUFFER(sal_info_type), 0, max_size);
+	memset(IA64_LOG_NEXT_BUFFER(sal_info_type), 0, max_size);
+}
+
+/*
+ * ia64_log_get
+ *
+ *	Get the current MCA log from SAL and copy it into the OS log buffer.
+ *
+ *  Inputs  :   info_type   (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE})
+ *              irq_safe    whether you can use printk at this point
+ *  Outputs :   size        (total record length)
+ *              *buffer     (ptr to error record)
+ *
+ */
+static u64
+ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe)
+{
+	sal_log_record_header_t     *log_buffer;
+	u64                         total_len = 0;
+	unsigned long               s;
+
+	IA64_LOG_LOCK(sal_info_type);
+
+	/* Get the process state information */
+	log_buffer = IA64_LOG_NEXT_BUFFER(sal_info_type);
+
+	total_len = ia64_sal_get_state_info(sal_info_type, (u64 *)log_buffer);
+
+	if (total_len) {
+		IA64_LOG_INDEX_INC(sal_info_type);
+		IA64_LOG_UNLOCK(sal_info_type);
+		if (irq_safe) {
+			IA64_MCA_DEBUG("%s: SAL error record type %d retrieved. Record length = %ld\n",
+				       __func__, sal_info_type, total_len);
+		}
+		*buffer = (u8 *) log_buffer;
+		return total_len;
+	} else {
+		IA64_LOG_UNLOCK(sal_info_type);
+		return 0;
+	}
+}
+
+/*
+ *  ia64_mca_log_sal_error_record
+ *
+ *  This function retrieves a specified error record type from SAL
+ *  and wakes up any processes waiting for error records.
+ *
+ *  Inputs  :   sal_info_type   (Type of error record MCA/CMC/CPE)
+ *              FIXME: remove MCA and irq_safe.
+ */
+static void
+ia64_mca_log_sal_error_record(int sal_info_type)
+{
+	u8 *buffer;
+	sal_log_record_header_t *rh;
+	u64 size;
+	int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA;
+#ifdef IA64_MCA_DEBUG_INFO
+	static const char * const rec_name[] = { "MCA", "INIT", "CMC", "CPE" };
+#endif
+
+	size = ia64_log_get(sal_info_type, &buffer, irq_safe);
+	if (!size)
+		return;
+
+	salinfo_log_wakeup(sal_info_type, buffer, size, irq_safe);
+
+	if (irq_safe)
+		IA64_MCA_DEBUG("CPU %d: SAL log contains %s error record\n",
+			smp_processor_id(),
+			sal_info_type < ARRAY_SIZE(rec_name) ? rec_name[sal_info_type] : "UNKNOWN");
+
+	/* Clear logs from corrected errors in case there's no user-level logger */
+	rh = (sal_log_record_header_t *)buffer;
+	if (rh->severity == sal_log_severity_corrected)
+		ia64_sal_clear_state_info(sal_info_type);
+}
+
+/*
+ * search_mca_table
+ *  See if the MCA surfaced in an instruction range
+ *  that has been tagged as recoverable.
+ *
+ *  Inputs
+ *	first	First address range to check
+ *	last	Last address range to check
+ *	ip	Instruction pointer, address we are looking for
+ *
+ * Return value:
+ *      1 on Success (in the table)/ 0 on Failure (not in the  table)
+ */
+int
+search_mca_table (const struct mca_table_entry *first,
+                const struct mca_table_entry *last,
+                unsigned long ip)
+{
+        const struct mca_table_entry *curr;
+        u64 curr_start, curr_end;
+
+        curr = first;
+        while (curr <= last) {
+                curr_start = (u64) &curr->start_addr + curr->start_addr;
+                curr_end = (u64) &curr->end_addr + curr->end_addr;
+
+                if ((ip >= curr_start) && (ip <= curr_end)) {
+                        return 1;
+                }
+                curr++;
+        }
+        return 0;
+}
+
+/* Given an address, look for it in the mca tables. */
+int mca_recover_range(unsigned long addr)
+{
+	extern struct mca_table_entry __start___mca_table[];
+	extern struct mca_table_entry __stop___mca_table[];
+
+	return search_mca_table(__start___mca_table, __stop___mca_table-1, addr);
+}
+EXPORT_SYMBOL_GPL(mca_recover_range);
+
+#ifdef CONFIG_ACPI
+
+int cpe_vector = -1;
+int ia64_cpe_irq = -1;
+
+static irqreturn_t
+ia64_mca_cpe_int_handler (int cpe_irq, void *arg)
+{
+	static unsigned long	cpe_history[CPE_HISTORY_LENGTH];
+	static int		index;
+	static DEFINE_SPINLOCK(cpe_history_lock);
+
+	IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n",
+		       __func__, cpe_irq, smp_processor_id());
+
+	/* SAL spec states this should run w/ interrupts enabled */
+	local_irq_enable();
+
+	spin_lock(&cpe_history_lock);
+	if (!cpe_poll_enabled && cpe_vector >= 0) {
+
+		int i, count = 1; /* we know 1 happened now */
+		unsigned long now = jiffies;
+
+		for (i = 0; i < CPE_HISTORY_LENGTH; i++) {
+			if (now - cpe_history[i] <= HZ)
+				count++;
+		}
+
+		IA64_MCA_DEBUG(KERN_INFO "CPE threshold %d/%d\n", count, CPE_HISTORY_LENGTH);
+		if (count >= CPE_HISTORY_LENGTH) {
+
+			cpe_poll_enabled = 1;
+			spin_unlock(&cpe_history_lock);
+			disable_irq_nosync(local_vector_to_irq(IA64_CPE_VECTOR));
+
+			/*
+			 * Corrected errors will still be corrected, but
+			 * make sure there's a log somewhere that indicates
+			 * something is generating more than we can handle.
+			 */
+			printk(KERN_WARNING "WARNING: Switching to polling CPE handler; error records may be lost\n");
+
+			mod_timer(&cpe_poll_timer, jiffies + MIN_CPE_POLL_INTERVAL);
+
+			/* lock already released, get out now */
+			goto out;
+		} else {
+			cpe_history[index++] = now;
+			if (index == CPE_HISTORY_LENGTH)
+				index = 0;
+		}
+	}
+	spin_unlock(&cpe_history_lock);
+out:
+	/* Get the CPE error record and log it */
+	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE);
+
+	local_irq_disable();
+
+	return IRQ_HANDLED;
+}
+
+#endif /* CONFIG_ACPI */
+
+#ifdef CONFIG_ACPI
+/*
+ * ia64_mca_register_cpev
+ *
+ *  Register the corrected platform error vector with SAL.
+ *
+ *  Inputs
+ *      cpev        Corrected Platform Error Vector number
+ *
+ *  Outputs
+ *      None
+ */
+void
+ia64_mca_register_cpev (int cpev)
+{
+	/* Register the CPE interrupt vector with SAL */
+	struct ia64_sal_retval isrv;
+
+	isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_CPE_INT, SAL_MC_PARAM_MECHANISM_INT, cpev, 0, 0);
+	if (isrv.status) {
+		printk(KERN_ERR "Failed to register Corrected Platform "
+		       "Error interrupt vector with SAL (status %ld)\n", isrv.status);
+		return;
+	}
+
+	IA64_MCA_DEBUG("%s: corrected platform error "
+		       "vector %#x registered\n", __func__, cpev);
+}
+#endif /* CONFIG_ACPI */
+
+/*
+ * ia64_mca_cmc_vector_setup
+ *
+ *  Setup the corrected machine check vector register in the processor.
+ *  (The interrupt is masked on boot. ia64_mca_late_init unmask this.)
+ *  This function is invoked on a per-processor basis.
+ *
+ * Inputs
+ *      None
+ *
+ * Outputs
+ *	None
+ */
+void __cpuinit
+ia64_mca_cmc_vector_setup (void)
+{
+	cmcv_reg_t	cmcv;
+
+	cmcv.cmcv_regval	= 0;
+	cmcv.cmcv_mask		= 1;        /* Mask/disable interrupt at first */
+	cmcv.cmcv_vector	= IA64_CMC_VECTOR;
+	ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval);
+
+	IA64_MCA_DEBUG("%s: CPU %d corrected machine check vector %#x registered.\n",
+		       __func__, smp_processor_id(), IA64_CMC_VECTOR);
+
+	IA64_MCA_DEBUG("%s: CPU %d CMCV = %#016lx\n",
+		       __func__, smp_processor_id(), ia64_getreg(_IA64_REG_CR_CMCV));
+}
+
+/*
+ * ia64_mca_cmc_vector_disable
+ *
+ *  Mask the corrected machine check vector register in the processor.
+ *  This function is invoked on a per-processor basis.
+ *
+ * Inputs
+ *      dummy(unused)
+ *
+ * Outputs
+ *	None
+ */
+static void
+ia64_mca_cmc_vector_disable (void *dummy)
+{
+	cmcv_reg_t	cmcv;
+
+	cmcv.cmcv_regval = ia64_getreg(_IA64_REG_CR_CMCV);
+
+	cmcv.cmcv_mask = 1; /* Mask/disable interrupt */
+	ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval);
+
+	IA64_MCA_DEBUG("%s: CPU %d corrected machine check vector %#x disabled.\n",
+		       __func__, smp_processor_id(), cmcv.cmcv_vector);
+}
+
+/*
+ * ia64_mca_cmc_vector_enable
+ *
+ *  Unmask the corrected machine check vector register in the processor.
+ *  This function is invoked on a per-processor basis.
+ *
+ * Inputs
+ *      dummy(unused)
+ *
+ * Outputs
+ *	None
+ */
+static void
+ia64_mca_cmc_vector_enable (void *dummy)
+{
+	cmcv_reg_t	cmcv;
+
+	cmcv.cmcv_regval = ia64_getreg(_IA64_REG_CR_CMCV);
+
+	cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */
+	ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval);
+
+	IA64_MCA_DEBUG("%s: CPU %d corrected machine check vector %#x enabled.\n",
+		       __func__, smp_processor_id(), cmcv.cmcv_vector);
+}
+
+/*
+ * ia64_mca_cmc_vector_disable_keventd
+ *
+ * Called via keventd (smp_call_function() is not safe in interrupt context) to
+ * disable the cmc interrupt vector.
+ */
+static void
+ia64_mca_cmc_vector_disable_keventd(struct work_struct *unused)
+{
+	on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 0);
+}
+
+/*
+ * ia64_mca_cmc_vector_enable_keventd
+ *
+ * Called via keventd (smp_call_function() is not safe in interrupt context) to
+ * enable the cmc interrupt vector.
+ */
+static void
+ia64_mca_cmc_vector_enable_keventd(struct work_struct *unused)
+{
+	on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 0);
+}
+
+/*
+ * ia64_mca_wakeup
+ *
+ *	Send an inter-cpu interrupt to wake-up a particular cpu.
+ *
+ *  Inputs  :   cpuid
+ *  Outputs :   None
+ */
+static void
+ia64_mca_wakeup(int cpu)
+{
+	platform_send_ipi(cpu, IA64_MCA_WAKEUP_VECTOR, IA64_IPI_DM_INT, 0);
+}
+
+/*
+ * ia64_mca_wakeup_all
+ *
+ *	Wakeup all the slave cpus which have rendez'ed previously.
+ *
+ *  Inputs  :   None
+ *  Outputs :   None
+ */
+static void
+ia64_mca_wakeup_all(void)
+{
+	int cpu;
+
+	/* Clear the Rendez checkin flag for all cpus */
+	for_each_online_cpu(cpu) {
+		if (ia64_mc_info.imi_rendez_checkin[cpu] == IA64_MCA_RENDEZ_CHECKIN_DONE)
+			ia64_mca_wakeup(cpu);
+	}
+
+}
+
+/*
+ * ia64_mca_rendez_interrupt_handler
+ *
+ *	This is handler used to put slave processors into spinloop
+ *	while the monarch processor does the mca handling and later
+ *	wake each slave up once the monarch is done.  The state
+ *	IA64_MCA_RENDEZ_CHECKIN_DONE indicates the cpu is rendez'ed
+ *	in SAL.  The state IA64_MCA_RENDEZ_CHECKIN_NOTDONE indicates
+ *	the cpu has come out of OS rendezvous.
+ *
+ *  Inputs  :   None
+ *  Outputs :   None
+ */
+static irqreturn_t
+ia64_mca_rendez_int_handler(int rendez_irq, void *arg)
+{
+	unsigned long flags;
+	int cpu = smp_processor_id();
+	struct ia64_mca_notify_die nd =
+		{ .sos = NULL, .monarch_cpu = &monarch_cpu };
+
+	/* Mask all interrupts */
+	local_irq_save(flags);
+
+	NOTIFY_MCA(DIE_MCA_RENDZVOUS_ENTER, get_irq_regs(), (long)&nd, 1);
+
+	ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE;
+	/* Register with the SAL monarch that the slave has
+	 * reached SAL
+	 */
+	ia64_sal_mc_rendez();
+
+	NOTIFY_MCA(DIE_MCA_RENDZVOUS_PROCESS, get_irq_regs(), (long)&nd, 1);
+
+	/* Wait for the monarch cpu to exit. */
+	while (monarch_cpu != -1)
+	       cpu_relax();	/* spin until monarch leaves */
+
+	NOTIFY_MCA(DIE_MCA_RENDZVOUS_LEAVE, get_irq_regs(), (long)&nd, 1);
+
+	ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+	/* Enable all interrupts */
+	local_irq_restore(flags);
+	return IRQ_HANDLED;
+}
+
+/*
+ * ia64_mca_wakeup_int_handler
+ *
+ *	The interrupt handler for processing the inter-cpu interrupt to the
+ *	slave cpu which was spinning in the rendez loop.
+ *	Since this spinning is done by turning off the interrupts and
+ *	polling on the wakeup-interrupt bit in the IRR, there is
+ *	nothing useful to be done in the handler.
+ *
+ *  Inputs  :   wakeup_irq  (Wakeup-interrupt bit)
+ *	arg		(Interrupt handler specific argument)
+ *  Outputs :   None
+ *
+ */
+static irqreturn_t
+ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg)
+{
+	return IRQ_HANDLED;
+}
+
+/* Function pointer for extra MCA recovery */
+int (*ia64_mca_ucmc_extension)
+	(void*,struct ia64_sal_os_state*)
+	= NULL;
+
+int
+ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *))
+{
+	if (ia64_mca_ucmc_extension)
+		return 1;
+
+	ia64_mca_ucmc_extension = fn;
+	return 0;
+}
+
+void
+ia64_unreg_MCA_extension(void)
+{
+	if (ia64_mca_ucmc_extension)
+		ia64_mca_ucmc_extension = NULL;
+}
+
+EXPORT_SYMBOL(ia64_reg_MCA_extension);
+EXPORT_SYMBOL(ia64_unreg_MCA_extension);
+
+
+static inline void
+copy_reg(const u64 *fr, u64 fnat, unsigned long *tr, unsigned long *tnat)
+{
+	u64 fslot, tslot, nat;
+	*tr = *fr;
+	fslot = ((unsigned long)fr >> 3) & 63;
+	tslot = ((unsigned long)tr >> 3) & 63;
+	*tnat &= ~(1UL << tslot);
+	nat = (fnat >> fslot) & 1;
+	*tnat |= (nat << tslot);
+}
+
+/* Change the comm field on the MCA/INT task to include the pid that
+ * was interrupted, it makes for easier debugging.  If that pid was 0
+ * (swapper or nested MCA/INIT) then use the start of the previous comm
+ * field suffixed with its cpu.
+ */
+
+static void
+ia64_mca_modify_comm(const struct task_struct *previous_current)
+{
+	char *p, comm[sizeof(current->comm)];
+	if (previous_current->pid)
+		snprintf(comm, sizeof(comm), "%s %d",
+			current->comm, previous_current->pid);
+	else {
+		int l;
+		if ((p = strchr(previous_current->comm, ' ')))
+			l = p - previous_current->comm;
+		else
+			l = strlen(previous_current->comm);
+		snprintf(comm, sizeof(comm), "%s %*s %d",
+			current->comm, l, previous_current->comm,
+			task_thread_info(previous_current)->cpu);
+	}
+	memcpy(current->comm, comm, sizeof(current->comm));
+}
+
+static void
+finish_pt_regs(struct pt_regs *regs, struct ia64_sal_os_state *sos,
+		unsigned long *nat)
+{
+	const pal_min_state_area_t *ms = sos->pal_min_state;
+	const u64 *bank;
+
+	/* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use
+	 * pmsa_{xip,xpsr,xfs}
+	 */
+	if (ia64_psr(regs)->ic) {
+		regs->cr_iip = ms->pmsa_iip;
+		regs->cr_ipsr = ms->pmsa_ipsr;
+		regs->cr_ifs = ms->pmsa_ifs;
+	} else {
+		regs->cr_iip = ms->pmsa_xip;
+		regs->cr_ipsr = ms->pmsa_xpsr;
+		regs->cr_ifs = ms->pmsa_xfs;
+
+		sos->iip = ms->pmsa_iip;
+		sos->ipsr = ms->pmsa_ipsr;
+		sos->ifs = ms->pmsa_ifs;
+	}
+	regs->pr = ms->pmsa_pr;
+	regs->b0 = ms->pmsa_br0;
+	regs->ar_rsc = ms->pmsa_rsc;
+	copy_reg(&ms->pmsa_gr[1-1], ms->pmsa_nat_bits, &regs->r1, nat);
+	copy_reg(&ms->pmsa_gr[2-1], ms->pmsa_nat_bits, &regs->r2, nat);
+	copy_reg(&ms->pmsa_gr[3-1], ms->pmsa_nat_bits, &regs->r3, nat);
+	copy_reg(&ms->pmsa_gr[8-1], ms->pmsa_nat_bits, &regs->r8, nat);
+	copy_reg(&ms->pmsa_gr[9-1], ms->pmsa_nat_bits, &regs->r9, nat);
+	copy_reg(&ms->pmsa_gr[10-1], ms->pmsa_nat_bits, &regs->r10, nat);
+	copy_reg(&ms->pmsa_gr[11-1], ms->pmsa_nat_bits, &regs->r11, nat);
+	copy_reg(&ms->pmsa_gr[12-1], ms->pmsa_nat_bits, &regs->r12, nat);
+	copy_reg(&ms->pmsa_gr[13-1], ms->pmsa_nat_bits, &regs->r13, nat);
+	copy_reg(&ms->pmsa_gr[14-1], ms->pmsa_nat_bits, &regs->r14, nat);
+	copy_reg(&ms->pmsa_gr[15-1], ms->pmsa_nat_bits, &regs->r15, nat);
+	if (ia64_psr(regs)->bn)
+		bank = ms->pmsa_bank1_gr;
+	else
+		bank = ms->pmsa_bank0_gr;
+	copy_reg(&bank[16-16], ms->pmsa_nat_bits, &regs->r16, nat);
+	copy_reg(&bank[17-16], ms->pmsa_nat_bits, &regs->r17, nat);
+	copy_reg(&bank[18-16], ms->pmsa_nat_bits, &regs->r18, nat);
+	copy_reg(&bank[19-16], ms->pmsa_nat_bits, &regs->r19, nat);
+	copy_reg(&bank[20-16], ms->pmsa_nat_bits, &regs->r20, nat);
+	copy_reg(&bank[21-16], ms->pmsa_nat_bits, &regs->r21, nat);
+	copy_reg(&bank[22-16], ms->pmsa_nat_bits, &regs->r22, nat);
+	copy_reg(&bank[23-16], ms->pmsa_nat_bits, &regs->r23, nat);
+	copy_reg(&bank[24-16], ms->pmsa_nat_bits, &regs->r24, nat);
+	copy_reg(&bank[25-16], ms->pmsa_nat_bits, &regs->r25, nat);
+	copy_reg(&bank[26-16], ms->pmsa_nat_bits, &regs->r26, nat);
+	copy_reg(&bank[27-16], ms->pmsa_nat_bits, &regs->r27, nat);
+	copy_reg(&bank[28-16], ms->pmsa_nat_bits, &regs->r28, nat);
+	copy_reg(&bank[29-16], ms->pmsa_nat_bits, &regs->r29, nat);
+	copy_reg(&bank[30-16], ms->pmsa_nat_bits, &regs->r30, nat);
+	copy_reg(&bank[31-16], ms->pmsa_nat_bits, &regs->r31, nat);
+}
+
+/* On entry to this routine, we are running on the per cpu stack, see
+ * mca_asm.h.  The original stack has not been touched by this event.  Some of
+ * the original stack's registers will be in the RBS on this stack.  This stack
+ * also contains a partial pt_regs and switch_stack, the rest of the data is in
+ * PAL minstate.
+ *
+ * The first thing to do is modify the original stack to look like a blocked
+ * task so we can run backtrace on the original task.  Also mark the per cpu
+ * stack as current to ensure that we use the correct task state, it also means
+ * that we can do backtrace on the MCA/INIT handler code itself.
+ */
+
+static struct task_struct *
+ia64_mca_modify_original_stack(struct pt_regs *regs,
+		const struct switch_stack *sw,
+		struct ia64_sal_os_state *sos,
+		const char *type)
+{
+	char *p;
+	ia64_va va;
+	extern char ia64_leave_kernel[];	/* Need asm address, not function descriptor */
+	const pal_min_state_area_t *ms = sos->pal_min_state;
+	struct task_struct *previous_current;
+	struct pt_regs *old_regs;
+	struct switch_stack *old_sw;
+	unsigned size = sizeof(struct pt_regs) +
+			sizeof(struct switch_stack) + 16;
+	unsigned long *old_bspstore, *old_bsp;
+	unsigned long *new_bspstore, *new_bsp;
+	unsigned long old_unat, old_rnat, new_rnat, nat;
+	u64 slots, loadrs = regs->loadrs;
+	u64 r12 = ms->pmsa_gr[12-1], r13 = ms->pmsa_gr[13-1];
+	u64 ar_bspstore = regs->ar_bspstore;
+	u64 ar_bsp = regs->ar_bspstore + (loadrs >> 16);
+	const char *msg;
+	int cpu = smp_processor_id();
+
+	previous_current = curr_task(cpu);
+	set_curr_task(cpu, current);
+	if ((p = strchr(current->comm, ' ')))
+		*p = '\0';
+
+	/* Best effort attempt to cope with MCA/INIT delivered while in
+	 * physical mode.
+	 */
+	regs->cr_ipsr = ms->pmsa_ipsr;
+	if (ia64_psr(regs)->dt == 0) {
+		va.l = r12;
+		if (va.f.reg == 0) {
+			va.f.reg = 7;
+			r12 = va.l;
+		}
+		va.l = r13;
+		if (va.f.reg == 0) {
+			va.f.reg = 7;
+			r13 = va.l;
+		}
+	}
+	if (ia64_psr(regs)->rt == 0) {
+		va.l = ar_bspstore;
+		if (va.f.reg == 0) {
+			va.f.reg = 7;
+			ar_bspstore = va.l;
+		}
+		va.l = ar_bsp;
+		if (va.f.reg == 0) {
+			va.f.reg = 7;
+			ar_bsp = va.l;
+		}
+	}
+
+	/* mca_asm.S ia64_old_stack() cannot assume that the dirty registers
+	 * have been copied to the old stack, the old stack may fail the
+	 * validation tests below.  So ia64_old_stack() must restore the dirty
+	 * registers from the new stack.  The old and new bspstore probably
+	 * have different alignments, so loadrs calculated on the old bsp
+	 * cannot be used to restore from the new bsp.  Calculate a suitable
+	 * loadrs for the new stack and save it in the new pt_regs, where
+	 * ia64_old_stack() can get it.
+	 */
+	old_bspstore = (unsigned long *)ar_bspstore;
+	old_bsp = (unsigned long *)ar_bsp;
+	slots = ia64_rse_num_regs(old_bspstore, old_bsp);
+	new_bspstore = (unsigned long *)((u64)current + IA64_RBS_OFFSET);
+	new_bsp = ia64_rse_skip_regs(new_bspstore, slots);
+	regs->loadrs = (new_bsp - new_bspstore) * 8 << 16;
+
+	/* Verify the previous stack state before we change it */
+	if (user_mode(regs)) {
+		msg = "occurred in user space";
+		/* previous_current is guaranteed to be valid when the task was
+		 * in user space, so ...
+		 */
+		ia64_mca_modify_comm(previous_current);
+		goto no_mod;
+	}
+
+	if (r13 != sos->prev_IA64_KR_CURRENT) {
+		msg = "inconsistent previous current and r13";
+		goto no_mod;
+	}
+
+	if (!mca_recover_range(ms->pmsa_iip)) {
+		if ((r12 - r13) >= KERNEL_STACK_SIZE) {
+			msg = "inconsistent r12 and r13";
+			goto no_mod;
+		}
+		if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) {
+			msg = "inconsistent ar.bspstore and r13";
+			goto no_mod;
+		}
+		va.p = old_bspstore;
+		if (va.f.reg < 5) {
+			msg = "old_bspstore is in the wrong region";
+			goto no_mod;
+		}
+		if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) {
+			msg = "inconsistent ar.bsp and r13";
+			goto no_mod;
+		}
+		size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8;
+		if (ar_bspstore + size > r12) {
+			msg = "no room for blocked state";
+			goto no_mod;
+		}
+	}
+
+	ia64_mca_modify_comm(previous_current);
+
+	/* Make the original task look blocked.  First stack a struct pt_regs,
+	 * describing the state at the time of interrupt.  mca_asm.S built a
+	 * partial pt_regs, copy it and fill in the blanks using minstate.
+	 */
+	p = (char *)r12 - sizeof(*regs);
+	old_regs = (struct pt_regs *)p;
+	memcpy(old_regs, regs, sizeof(*regs));
+	old_regs->loadrs = loadrs;
+	old_unat = old_regs->ar_unat;
+	finish_pt_regs(old_regs, sos, &old_unat);
+
+	/* Next stack a struct switch_stack.  mca_asm.S built a partial
+	 * switch_stack, copy it and fill in the blanks using pt_regs and
+	 * minstate.
+	 *
+	 * In the synthesized switch_stack, b0 points to ia64_leave_kernel,
+	 * ar.pfs is set to 0.
+	 *
+	 * unwind.c::unw_unwind() does special processing for interrupt frames.
+	 * It checks if the PRED_NON_SYSCALL predicate is set, if the predicate
+	 * is clear then unw_unwind() does _not_ adjust bsp over pt_regs.  Not
+	 * that this is documented, of course.  Set PRED_NON_SYSCALL in the
+	 * switch_stack on the original stack so it will unwind correctly when
+	 * unwind.c reads pt_regs.
+	 *
+	 * thread.ksp is updated to point to the synthesized switch_stack.
+	 */
+	p -= sizeof(struct switch_stack);
+	old_sw = (struct switch_stack *)p;
+	memcpy(old_sw, sw, sizeof(*sw));
+	old_sw->caller_unat = old_unat;
+	old_sw->ar_fpsr = old_regs->ar_fpsr;
+	copy_reg(&ms->pmsa_gr[4-1], ms->pmsa_nat_bits, &old_sw->r4, &old_unat);
+	copy_reg(&ms->pmsa_gr[5-1], ms->pmsa_nat_bits, &old_sw->r5, &old_unat);
+	copy_reg(&ms->pmsa_gr[6-1], ms->pmsa_nat_bits, &old_sw->r6, &old_unat);
+	copy_reg(&ms->pmsa_gr[7-1], ms->pmsa_nat_bits, &old_sw->r7, &old_unat);
+	old_sw->b0 = (u64)ia64_leave_kernel;
+	old_sw->b1 = ms->pmsa_br1;
+	old_sw->ar_pfs = 0;
+	old_sw->ar_unat = old_unat;
+	old_sw->pr = old_regs->pr | (1UL << PRED_NON_SYSCALL);
+	previous_current->thread.ksp = (u64)p - 16;
+
+	/* Finally copy the original stack's registers back to its RBS.
+	 * Registers from ar.bspstore through ar.bsp at the time of the event
+	 * are in the current RBS, copy them back to the original stack.  The
+	 * copy must be done register by register because the original bspstore
+	 * and the current one have different alignments, so the saved RNAT
+	 * data occurs at different places.
+	 *
+	 * mca_asm does cover, so the old_bsp already includes all registers at
+	 * the time of MCA/INIT.  It also does flushrs, so all registers before
+	 * this function have been written to backing store on the MCA/INIT
+	 * stack.
+	 */
+	new_rnat = ia64_get_rnat(ia64_rse_rnat_addr(new_bspstore));
+	old_rnat = regs->ar_rnat;
+	while (slots--) {
+		if (ia64_rse_is_rnat_slot(new_bspstore)) {
+			new_rnat = ia64_get_rnat(new_bspstore++);
+		}
+		if (ia64_rse_is_rnat_slot(old_bspstore)) {
+			*old_bspstore++ = old_rnat;
+			old_rnat = 0;
+		}
+		nat = (new_rnat >> ia64_rse_slot_num(new_bspstore)) & 1UL;
+		old_rnat &= ~(1UL << ia64_rse_slot_num(old_bspstore));
+		old_rnat |= (nat << ia64_rse_slot_num(old_bspstore));
+		*old_bspstore++ = *new_bspstore++;
+	}
+	old_sw->ar_bspstore = (unsigned long)old_bspstore;
+	old_sw->ar_rnat = old_rnat;
+
+	sos->prev_task = previous_current;
+	return previous_current;
+
+no_mod:
+	mprintk(KERN_INFO "cpu %d, %s %s, original stack not modified\n",
+			smp_processor_id(), type, msg);
+	old_unat = regs->ar_unat;
+	finish_pt_regs(regs, sos, &old_unat);
+	return previous_current;
+}
+
+/* The monarch/slave interaction is based on monarch_cpu and requires that all
+ * slaves have entered rendezvous before the monarch leaves.  If any cpu has
+ * not entered rendezvous yet then wait a bit.  The assumption is that any
+ * slave that has not rendezvoused after a reasonable time is never going to do
+ * so.  In this context, slave includes cpus that respond to the MCA rendezvous
+ * interrupt, as well as cpus that receive the INIT slave event.
+ */
+
+static void
+ia64_wait_for_slaves(int monarch, const char *type)
+{
+	int c, i , wait;
+
+	/*
+	 * wait 5 seconds total for slaves (arbitrary)
+	 */
+	for (i = 0; i < 5000; i++) {
+		wait = 0;
+		for_each_online_cpu(c) {
+			if (c == monarch)
+				continue;
+			if (ia64_mc_info.imi_rendez_checkin[c]
+					== IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
+				udelay(1000);		/* short wait */
+				wait = 1;
+				break;
+			}
+		}
+		if (!wait)
+			goto all_in;
+	}
+
+	/*
+	 * Maybe slave(s) dead. Print buffered messages immediately.
+	 */
+	ia64_mlogbuf_finish(0);
+	mprintk(KERN_INFO "OS %s slave did not rendezvous on cpu", type);
+	for_each_online_cpu(c) {
+		if (c == monarch)
+			continue;
+		if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
+			mprintk(" %d", c);
+	}
+	mprintk("\n");
+	return;
+
+all_in:
+	mprintk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type);
+	return;
+}
+
+/*  mca_insert_tr
+ *
+ *  Switch rid when TR reload and needed!
+ *  iord: 1: itr, 2: itr;
+ *
+*/
+static void mca_insert_tr(u64 iord)
+{
+
+	int i;
+	u64 old_rr;
+	struct ia64_tr_entry *p;
+	unsigned long psr;
+	int cpu = smp_processor_id();
+
+	if (!ia64_idtrs[cpu])
+		return;
+
+	psr = ia64_clear_ic();
+	for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) {
+		p = ia64_idtrs[cpu] + (iord - 1) * IA64_TR_ALLOC_MAX;
+		if (p->pte & 0x1) {
+			old_rr = ia64_get_rr(p->ifa);
+			if (old_rr != p->rr) {
+				ia64_set_rr(p->ifa, p->rr);
+				ia64_srlz_d();
+			}
+			ia64_ptr(iord, p->ifa, p->itir >> 2);
+			ia64_srlz_i();
+			if (iord & 0x1) {
+				ia64_itr(0x1, i, p->ifa, p->pte, p->itir >> 2);
+				ia64_srlz_i();
+			}
+			if (iord & 0x2) {
+				ia64_itr(0x2, i, p->ifa, p->pte, p->itir >> 2);
+				ia64_srlz_i();
+			}
+			if (old_rr != p->rr) {
+				ia64_set_rr(p->ifa, old_rr);
+				ia64_srlz_d();
+			}
+		}
+	}
+	ia64_set_psr(psr);
+}
+
+/*
+ * ia64_mca_handler
+ *
+ *	This is uncorrectable machine check handler called from OS_MCA
+ *	dispatch code which is in turn called from SAL_CHECK().
+ *	This is the place where the core of OS MCA handling is done.
+ *	Right now the logs are extracted and displayed in a well-defined
+ *	format. This handler code is supposed to be run only on the
+ *	monarch processor. Once the monarch is done with MCA handling
+ *	further MCA logging is enabled by clearing logs.
+ *	Monarch also has the duty of sending wakeup-IPIs to pull the
+ *	slave processors out of rendezvous spinloop.
+ *
+ *	If multiple processors call into OS_MCA, the first will become
+ *	the monarch.  Subsequent cpus will be recorded in the mca_cpu
+ *	bitmask.  After the first monarch has processed its MCA, it
+ *	will wake up the next cpu in the mca_cpu bitmask and then go
+ *	into the rendezvous loop.  When all processors have serviced
+ *	their MCA, the last monarch frees up the rest of the processors.
+ */
+void
+ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
+		 struct ia64_sal_os_state *sos)
+{
+	int recover, cpu = smp_processor_id();
+	struct task_struct *previous_current;
+	struct ia64_mca_notify_die nd =
+		{ .sos = sos, .monarch_cpu = &monarch_cpu, .data = &recover };
+	static atomic_t mca_count;
+	static cpumask_t mca_cpu;
+
+	if (atomic_add_return(1, &mca_count) == 1) {
+		monarch_cpu = cpu;
+		sos->monarch = 1;
+	} else {
+		cpu_set(cpu, mca_cpu);
+		sos->monarch = 0;
+	}
+	mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d "
+		"monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch);
+
+	previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
+
+	NOTIFY_MCA(DIE_MCA_MONARCH_ENTER, regs, (long)&nd, 1);
+
+	ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA;
+	if (sos->monarch) {
+		ia64_wait_for_slaves(cpu, "MCA");
+
+		/* Wakeup all the processors which are spinning in the
+		 * rendezvous loop.  They will leave SAL, then spin in the OS
+		 * with interrupts disabled until this monarch cpu leaves the
+		 * MCA handler.  That gets control back to the OS so we can
+		 * backtrace the other cpus, backtrace when spinning in SAL
+		 * does not work.
+		 */
+		ia64_mca_wakeup_all();
+	} else {
+		while (cpu_isset(cpu, mca_cpu))
+			cpu_relax();	/* spin until monarch wakes us */
+	}
+
+	NOTIFY_MCA(DIE_MCA_MONARCH_PROCESS, regs, (long)&nd, 1);
+
+	/* Get the MCA error record and log it */
+	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA);
+
+	/* MCA error recovery */
+	recover = (ia64_mca_ucmc_extension
+		&& ia64_mca_ucmc_extension(
+			IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA),
+			sos));
+
+	if (recover) {
+		sal_log_record_header_t *rh = IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA);
+		rh->severity = sal_log_severity_corrected;
+		ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA);
+		sos->os_status = IA64_MCA_CORRECTED;
+	} else {
+		/* Dump buffered message to console */
+		ia64_mlogbuf_finish(1);
+	}
+
+	if (__get_cpu_var(ia64_mca_tr_reload)) {
+		mca_insert_tr(0x1); /*Reload dynamic itrs*/
+		mca_insert_tr(0x2); /*Reload dynamic itrs*/
+	}
+
+	NOTIFY_MCA(DIE_MCA_MONARCH_LEAVE, regs, (long)&nd, 1);
+
+	if (atomic_dec_return(&mca_count) > 0) {
+		int i;
+
+		/* wake up the next monarch cpu,
+		 * and put this cpu in the rendez loop.
+		 */
+		for_each_online_cpu(i) {
+			if (cpu_isset(i, mca_cpu)) {
+				monarch_cpu = i;
+				cpu_clear(i, mca_cpu);	/* wake next cpu */
+				while (monarch_cpu != -1)
+					cpu_relax();	/* spin until last cpu leaves */
+				set_curr_task(cpu, previous_current);
+				ia64_mc_info.imi_rendez_checkin[cpu]
+						= IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+				return;
+			}
+		}
+	}
+	set_curr_task(cpu, previous_current);
+	ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+	monarch_cpu = -1;	/* This frees the slaves and previous monarchs */
+}
+
+static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd);
+static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd);
+
+/*
+ * ia64_mca_cmc_int_handler
+ *
+ *  This is corrected machine check interrupt handler.
+ *	Right now the logs are extracted and displayed in a well-defined
+ *	format.
+ *
+ * Inputs
+ *      interrupt number
+ *      client data arg ptr
+ *
+ * Outputs
+ *	None
+ */
+static irqreturn_t
+ia64_mca_cmc_int_handler(int cmc_irq, void *arg)
+{
+	static unsigned long	cmc_history[CMC_HISTORY_LENGTH];
+	static int		index;
+	static DEFINE_SPINLOCK(cmc_history_lock);
+
+	IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n",
+		       __func__, cmc_irq, smp_processor_id());
+
+	/* SAL spec states this should run w/ interrupts enabled */
+	local_irq_enable();
+
+	spin_lock(&cmc_history_lock);
+	if (!cmc_polling_enabled) {
+		int i, count = 1; /* we know 1 happened now */
+		unsigned long now = jiffies;
+
+		for (i = 0; i < CMC_HISTORY_LENGTH; i++) {
+			if (now - cmc_history[i] <= HZ)
+				count++;
+		}
+
+		IA64_MCA_DEBUG(KERN_INFO "CMC threshold %d/%d\n", count, CMC_HISTORY_LENGTH);
+		if (count >= CMC_HISTORY_LENGTH) {
+
+			cmc_polling_enabled = 1;
+			spin_unlock(&cmc_history_lock);
+			/* If we're being hit with CMC interrupts, we won't
+			 * ever execute the schedule_work() below.  Need to
+			 * disable CMC interrupts on this processor now.
+			 */
+			ia64_mca_cmc_vector_disable(NULL);
+			schedule_work(&cmc_disable_work);
+
+			/*
+			 * Corrected errors will still be corrected, but
+			 * make sure there's a log somewhere that indicates
+			 * something is generating more than we can handle.
+			 */
+			printk(KERN_WARNING "WARNING: Switching to polling CMC handler; error records may be lost\n");
+
+			mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL);
+
+			/* lock already released, get out now */
+			goto out;
+		} else {
+			cmc_history[index++] = now;
+			if (index == CMC_HISTORY_LENGTH)
+				index = 0;
+		}
+	}
+	spin_unlock(&cmc_history_lock);
+out:
+	/* Get the CMC error record and log it */
+	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ *  ia64_mca_cmc_int_caller
+ *
+ * 	Triggered by sw interrupt from CMC polling routine.  Calls
+ * 	real interrupt handler and either triggers a sw interrupt
+ * 	on the next cpu or does cleanup at the end.
+ *
+ * Inputs
+ *	interrupt number
+ *	client data arg ptr
+ * Outputs
+ * 	handled
+ */
+static irqreturn_t
+ia64_mca_cmc_int_caller(int cmc_irq, void *arg)
+{
+	static int start_count = -1;
+	unsigned int cpuid;
+
+	cpuid = smp_processor_id();
+
+	/* If first cpu, update count */
+	if (start_count == -1)
+		start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CMC);
+
+	ia64_mca_cmc_int_handler(cmc_irq, arg);
+
+	cpuid = cpumask_next(cpuid+1, cpu_online_mask);
+
+	if (cpuid < nr_cpu_ids) {
+		platform_send_ipi(cpuid, IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0);
+	} else {
+		/* If no log record, switch out of polling mode */
+		if (start_count == IA64_LOG_COUNT(SAL_INFO_TYPE_CMC)) {
+
+			printk(KERN_WARNING "Returning to interrupt driven CMC handler\n");
+			schedule_work(&cmc_enable_work);
+			cmc_polling_enabled = 0;
+
+		} else {
+
+			mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL);
+		}
+
+		start_count = -1;
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ *  ia64_mca_cmc_poll
+ *
+ *	Poll for Corrected Machine Checks (CMCs)
+ *
+ * Inputs   :   dummy(unused)
+ * Outputs  :   None
+ *
+ */
+static void
+ia64_mca_cmc_poll (unsigned long dummy)
+{
+	/* Trigger a CMC interrupt cascade  */
+	platform_send_ipi(first_cpu(cpu_online_map), IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0);
+}
+
+/*
+ *  ia64_mca_cpe_int_caller
+ *
+ * 	Triggered by sw interrupt from CPE polling routine.  Calls
+ * 	real interrupt handler and either triggers a sw interrupt
+ * 	on the next cpu or does cleanup at the end.
+ *
+ * Inputs
+ *	interrupt number
+ *	client data arg ptr
+ * Outputs
+ * 	handled
+ */
+#ifdef CONFIG_ACPI
+
+static irqreturn_t
+ia64_mca_cpe_int_caller(int cpe_irq, void *arg)
+{
+	static int start_count = -1;
+	static int poll_time = MIN_CPE_POLL_INTERVAL;
+	unsigned int cpuid;
+
+	cpuid = smp_processor_id();
+
+	/* If first cpu, update count */
+	if (start_count == -1)
+		start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CPE);
+
+	ia64_mca_cpe_int_handler(cpe_irq, arg);
+
+	cpuid = cpumask_next(cpuid+1, cpu_online_mask);
+
+	if (cpuid < NR_CPUS) {
+		platform_send_ipi(cpuid, IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0);
+	} else {
+		/*
+		 * If a log was recorded, increase our polling frequency,
+		 * otherwise, backoff or return to interrupt mode.
+		 */
+		if (start_count != IA64_LOG_COUNT(SAL_INFO_TYPE_CPE)) {
+			poll_time = max(MIN_CPE_POLL_INTERVAL, poll_time / 2);
+		} else if (cpe_vector < 0) {
+			poll_time = min(MAX_CPE_POLL_INTERVAL, poll_time * 2);
+		} else {
+			poll_time = MIN_CPE_POLL_INTERVAL;
+
+			printk(KERN_WARNING "Returning to interrupt driven CPE handler\n");
+			enable_irq(local_vector_to_irq(IA64_CPE_VECTOR));
+			cpe_poll_enabled = 0;
+		}
+
+		if (cpe_poll_enabled)
+			mod_timer(&cpe_poll_timer, jiffies + poll_time);
+		start_count = -1;
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ *  ia64_mca_cpe_poll
+ *
+ *	Poll for Corrected Platform Errors (CPEs), trigger interrupt
+ *	on first cpu, from there it will trickle through all the cpus.
+ *
+ * Inputs   :   dummy(unused)
+ * Outputs  :   None
+ *
+ */
+static void
+ia64_mca_cpe_poll (unsigned long dummy)
+{
+	/* Trigger a CPE interrupt cascade  */
+	platform_send_ipi(first_cpu(cpu_online_map), IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0);
+}
+
+#endif /* CONFIG_ACPI */
+
+static int
+default_monarch_init_process(struct notifier_block *self, unsigned long val, void *data)
+{
+	int c;
+	struct task_struct *g, *t;
+	if (val != DIE_INIT_MONARCH_PROCESS)
+		return NOTIFY_DONE;
+#ifdef CONFIG_KEXEC
+	if (atomic_read(&kdump_in_progress))
+		return NOTIFY_DONE;
+#endif
+
+	/*
+	 * FIXME: mlogbuf will brim over with INIT stack dumps.
+	 * To enable show_stack from INIT, we use oops_in_progress which should
+	 * be used in real oops. This would cause something wrong after INIT.
+	 */
+	BREAK_LOGLEVEL(console_loglevel);
+	ia64_mlogbuf_dump_from_init();
+
+	printk(KERN_ERR "Processes interrupted by INIT -");
+	for_each_online_cpu(c) {
+		struct ia64_sal_os_state *s;
+		t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET);
+		s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET);
+		g = s->prev_task;
+		if (g) {
+			if (g->pid)
+				printk(" %d", g->pid);
+			else
+				printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g);
+		}
+	}
+	printk("\n\n");
+	if (read_trylock(&tasklist_lock)) {
+		do_each_thread (g, t) {
+			printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);
+			show_stack(t, NULL);
+		} while_each_thread (g, t);
+		read_unlock(&tasklist_lock);
+	}
+	/* FIXME: This will not restore zapped printk locks. */
+	RESTORE_LOGLEVEL(console_loglevel);
+	return NOTIFY_DONE;
+}
+
+/*
+ * C portion of the OS INIT handler
+ *
+ * Called from ia64_os_init_dispatch
+ *
+ * Inputs: pointer to pt_regs where processor info was saved.  SAL/OS state for
+ * this event.  This code is used for both monarch and slave INIT events, see
+ * sos->monarch.
+ *
+ * All INIT events switch to the INIT stack and change the previous process to
+ * blocked status.  If one of the INIT events is the monarch then we are
+ * probably processing the nmi button/command.  Use the monarch cpu to dump all
+ * the processes.  The slave INIT events all spin until the monarch cpu
+ * returns.  We can also get INIT slave events for MCA, in which case the MCA
+ * process is the monarch.
+ */
+
+void
+ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
+		  struct ia64_sal_os_state *sos)
+{
+	static atomic_t slaves;
+	static atomic_t monarchs;
+	struct task_struct *previous_current;
+	int cpu = smp_processor_id();
+	struct ia64_mca_notify_die nd =
+		{ .sos = sos, .monarch_cpu = &monarch_cpu };
+
+	NOTIFY_INIT(DIE_INIT_ENTER, regs, (long)&nd, 0);
+
+	mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n",
+		sos->proc_state_param, cpu, sos->monarch);
+	salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0);
+
+	previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "INIT");
+	sos->os_status = IA64_INIT_RESUME;
+
+	/* FIXME: Workaround for broken proms that drive all INIT events as
+	 * slaves.  The last slave that enters is promoted to be a monarch.
+	 * Remove this code in September 2006, that gives platforms a year to
+	 * fix their proms and get their customers updated.
+	 */
+	if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) {
+		mprintk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n",
+		        __func__, cpu);
+		atomic_dec(&slaves);
+		sos->monarch = 1;
+	}
+
+	/* FIXME: Workaround for broken proms that drive all INIT events as
+	 * monarchs.  Second and subsequent monarchs are demoted to slaves.
+	 * Remove this code in September 2006, that gives platforms a year to
+	 * fix their proms and get their customers updated.
+	 */
+	if (sos->monarch && atomic_add_return(1, &monarchs) > 1) {
+		mprintk(KERN_WARNING "%s: Demoting cpu %d to slave.\n",
+			       __func__, cpu);
+		atomic_dec(&monarchs);
+		sos->monarch = 0;
+	}
+
+	if (!sos->monarch) {
+		ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT;
+
+#ifdef CONFIG_KEXEC
+		while (monarch_cpu == -1 && !atomic_read(&kdump_in_progress))
+			udelay(1000);
+#else
+		while (monarch_cpu == -1)
+			cpu_relax();	/* spin until monarch enters */
+#endif
+
+		NOTIFY_INIT(DIE_INIT_SLAVE_ENTER, regs, (long)&nd, 1);
+		NOTIFY_INIT(DIE_INIT_SLAVE_PROCESS, regs, (long)&nd, 1);
+
+#ifdef CONFIG_KEXEC
+		while (monarch_cpu != -1 && !atomic_read(&kdump_in_progress))
+			udelay(1000);
+#else
+		while (monarch_cpu != -1)
+			cpu_relax();	/* spin until monarch leaves */
+#endif
+
+		NOTIFY_INIT(DIE_INIT_SLAVE_LEAVE, regs, (long)&nd, 1);
+
+		mprintk("Slave on cpu %d returning to normal service.\n", cpu);
+		set_curr_task(cpu, previous_current);
+		ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+		atomic_dec(&slaves);
+		return;
+	}
+
+	monarch_cpu = cpu;
+	NOTIFY_INIT(DIE_INIT_MONARCH_ENTER, regs, (long)&nd, 1);
+
+	/*
+	 * Wait for a bit.  On some machines (e.g., HP's zx2000 and zx6000, INIT can be
+	 * generated via the BMC's command-line interface, but since the console is on the
+	 * same serial line, the user will need some time to switch out of the BMC before
+	 * the dump begins.
+	 */
+	mprintk("Delaying for 5 seconds...\n");
+	udelay(5*1000000);
+	ia64_wait_for_slaves(cpu, "INIT");
+	/* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through
+	 * to default_monarch_init_process() above and just print all the
+	 * tasks.
+	 */
+	NOTIFY_INIT(DIE_INIT_MONARCH_PROCESS, regs, (long)&nd, 1);
+	NOTIFY_INIT(DIE_INIT_MONARCH_LEAVE, regs, (long)&nd, 1);
+
+	mprintk("\nINIT dump complete.  Monarch on cpu %d returning to normal service.\n", cpu);
+	atomic_dec(&monarchs);
+	set_curr_task(cpu, previous_current);
+	monarch_cpu = -1;
+	return;
+}
+
+static int __init
+ia64_mca_disable_cpe_polling(char *str)
+{
+	cpe_poll_enabled = 0;
+	return 1;
+}
+
+__setup("disable_cpe_poll", ia64_mca_disable_cpe_polling);
+
+static struct irqaction cmci_irqaction = {
+	.handler =	ia64_mca_cmc_int_handler,
+	.flags =	IRQF_DISABLED,
+	.name =		"cmc_hndlr"
+};
+
+static struct irqaction cmcp_irqaction = {
+	.handler =	ia64_mca_cmc_int_caller,
+	.flags =	IRQF_DISABLED,
+	.name =		"cmc_poll"
+};
+
+static struct irqaction mca_rdzv_irqaction = {
+	.handler =	ia64_mca_rendez_int_handler,
+	.flags =	IRQF_DISABLED,
+	.name =		"mca_rdzv"
+};
+
+static struct irqaction mca_wkup_irqaction = {
+	.handler =	ia64_mca_wakeup_int_handler,
+	.flags =	IRQF_DISABLED,
+	.name =		"mca_wkup"
+};
+
+#ifdef CONFIG_ACPI
+static struct irqaction mca_cpe_irqaction = {
+	.handler =	ia64_mca_cpe_int_handler,
+	.flags =	IRQF_DISABLED,
+	.name =		"cpe_hndlr"
+};
+
+static struct irqaction mca_cpep_irqaction = {
+	.handler =	ia64_mca_cpe_int_caller,
+	.flags =	IRQF_DISABLED,
+	.name =		"cpe_poll"
+};
+#endif /* CONFIG_ACPI */
+
+/* Minimal format of the MCA/INIT stacks.  The pseudo processes that run on
+ * these stacks can never sleep, they cannot return from the kernel to user
+ * space, they do not appear in a normal ps listing.  So there is no need to
+ * format most of the fields.
+ */
+
+static void __cpuinit
+format_mca_init_stack(void *mca_data, unsigned long offset,
+		const char *type, int cpu)
+{
+	struct task_struct *p = (struct task_struct *)((char *)mca_data + offset);
+	struct thread_info *ti;
+	memset(p, 0, KERNEL_STACK_SIZE);
+	ti = task_thread_info(p);
+	ti->flags = _TIF_MCA_INIT;
+	ti->preempt_count = 1;
+	ti->task = p;
+	ti->cpu = cpu;
+	p->stack = ti;
+	p->state = TASK_UNINTERRUPTIBLE;
+	cpu_set(cpu, p->cpus_allowed);
+	INIT_LIST_HEAD(&p->tasks);
+	p->parent = p->real_parent = p->group_leader = p;
+	INIT_LIST_HEAD(&p->children);
+	INIT_LIST_HEAD(&p->sibling);
+	strncpy(p->comm, type, sizeof(p->comm)-1);
+}
+
+/* Caller prevents this from being called after init */
+static void * __init_refok mca_bootmem(void)
+{
+	return __alloc_bootmem(sizeof(struct ia64_mca_cpu),
+	                    KERNEL_STACK_SIZE, 0);
+}
+
+/* Do per-CPU MCA-related initialization.  */
+void __cpuinit
+ia64_mca_cpu_init(void *cpu_data)
+{
+	void *pal_vaddr;
+	void *data;
+	long sz = sizeof(struct ia64_mca_cpu);
+	int cpu = smp_processor_id();
+	static int first_time = 1;
+
+	/*
+	 * Structure will already be allocated if cpu has been online,
+	 * then offlined.
+	 */
+	if (__per_cpu_mca[cpu]) {
+		data = __va(__per_cpu_mca[cpu]);
+	} else {
+		if (first_time) {
+			data = mca_bootmem();
+			first_time = 0;
+		} else
+			data = (void *)__get_free_pages(GFP_KERNEL,
+							get_order(sz));
+		if (!data)
+			panic("Could not allocate MCA memory for cpu %d\n",
+					cpu);
+	}
+	format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, mca_stack),
+		"MCA", cpu);
+	format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, init_stack),
+		"INIT", cpu);
+	__get_cpu_var(ia64_mca_data) = __per_cpu_mca[cpu] = __pa(data);
+
+	/*
+	 * Stash away a copy of the PTE needed to map the per-CPU page.
+	 * We may need it during MCA recovery.
+	 */
+	__get_cpu_var(ia64_mca_per_cpu_pte) =
+		pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL));
+
+	/*
+	 * Also, stash away a copy of the PAL address and the PTE
+	 * needed to map it.
+	 */
+	pal_vaddr = efi_get_pal_addr();
+	if (!pal_vaddr)
+		return;
+	__get_cpu_var(ia64_mca_pal_base) =
+		GRANULEROUNDDOWN((unsigned long) pal_vaddr);
+	__get_cpu_var(ia64_mca_pal_pte) = pte_val(mk_pte_phys(__pa(pal_vaddr),
+							      PAGE_KERNEL));
+}
+
+static void __cpuinit ia64_mca_cmc_vector_adjust(void *dummy)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	if (!cmc_polling_enabled)
+		ia64_mca_cmc_vector_enable(NULL);
+	local_irq_restore(flags);
+}
+
+static int __cpuinit mca_cpu_callback(struct notifier_block *nfb,
+				      unsigned long action,
+				      void *hcpu)
+{
+	int hotcpu = (unsigned long) hcpu;
+
+	switch (action) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		smp_call_function_single(hotcpu, ia64_mca_cmc_vector_adjust,
+					 NULL, 0);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block mca_cpu_notifier __cpuinitdata = {
+	.notifier_call = mca_cpu_callback
+};
+
+/*
+ * ia64_mca_init
+ *
+ *  Do all the system level mca specific initialization.
+ *
+ *	1. Register spinloop and wakeup request interrupt vectors
+ *
+ *	2. Register OS_MCA handler entry point
+ *
+ *	3. Register OS_INIT handler entry point
+ *
+ *  4. Initialize MCA/CMC/INIT related log buffers maintained by the OS.
+ *
+ *  Note that this initialization is done very early before some kernel
+ *  services are available.
+ *
+ *  Inputs  :   None
+ *
+ *  Outputs :   None
+ */
+void __init
+ia64_mca_init(void)
+{
+	ia64_fptr_t *init_hldlr_ptr_monarch = (ia64_fptr_t *)ia64_os_init_dispatch_monarch;
+	ia64_fptr_t *init_hldlr_ptr_slave = (ia64_fptr_t *)ia64_os_init_dispatch_slave;
+	ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch;
+	int i;
+	long rc;
+	struct ia64_sal_retval isrv;
+	unsigned long timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */
+	static struct notifier_block default_init_monarch_nb = {
+		.notifier_call = default_monarch_init_process,
+		.priority = 0/* we need to notified last */
+	};
+
+	IA64_MCA_DEBUG("%s: begin\n", __func__);
+
+	/* Clear the Rendez checkin flag for all cpus */
+	for(i = 0 ; i < NR_CPUS; i++)
+		ia64_mc_info.imi_rendez_checkin[i] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+
+	/*
+	 * Register the rendezvous spinloop and wakeup mechanism with SAL
+	 */
+
+	/* Register the rendezvous interrupt vector with SAL */
+	while (1) {
+		isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_INT,
+					      SAL_MC_PARAM_MECHANISM_INT,
+					      IA64_MCA_RENDEZ_VECTOR,
+					      timeout,
+					      SAL_MC_PARAM_RZ_ALWAYS);
+		rc = isrv.status;
+		if (rc == 0)
+			break;
+		if (rc == -2) {
+			printk(KERN_INFO "Increasing MCA rendezvous timeout from "
+				"%ld to %ld milliseconds\n", timeout, isrv.v0);
+			timeout = isrv.v0;
+			NOTIFY_MCA(DIE_MCA_NEW_TIMEOUT, NULL, timeout, 0);
+			continue;
+		}
+		printk(KERN_ERR "Failed to register rendezvous interrupt "
+		       "with SAL (status %ld)\n", rc);
+		return;
+	}
+
+	/* Register the wakeup interrupt vector with SAL */
+	isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_WAKEUP,
+				      SAL_MC_PARAM_MECHANISM_INT,
+				      IA64_MCA_WAKEUP_VECTOR,
+				      0, 0);
+	rc = isrv.status;
+	if (rc) {
+		printk(KERN_ERR "Failed to register wakeup interrupt with SAL "
+		       "(status %ld)\n", rc);
+		return;
+	}
+
+	IA64_MCA_DEBUG("%s: registered MCA rendezvous spinloop and wakeup mech.\n", __func__);
+
+	ia64_mc_info.imi_mca_handler        = ia64_tpa(mca_hldlr_ptr->fp);
+	/*
+	 * XXX - disable SAL checksum by setting size to 0; should be
+	 *	ia64_tpa(ia64_os_mca_dispatch_end) - ia64_tpa(ia64_os_mca_dispatch);
+	 */
+	ia64_mc_info.imi_mca_handler_size	= 0;
+
+	/* Register the os mca handler with SAL */
+	if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_MCA,
+				       ia64_mc_info.imi_mca_handler,
+				       ia64_tpa(mca_hldlr_ptr->gp),
+				       ia64_mc_info.imi_mca_handler_size,
+				       0, 0, 0)))
+	{
+		printk(KERN_ERR "Failed to register OS MCA handler with SAL "
+		       "(status %ld)\n", rc);
+		return;
+	}
+
+	IA64_MCA_DEBUG("%s: registered OS MCA handler with SAL at 0x%lx, gp = 0x%lx\n", __func__,
+		       ia64_mc_info.imi_mca_handler, ia64_tpa(mca_hldlr_ptr->gp));
+
+	/*
+	 * XXX - disable SAL checksum by setting size to 0, should be
+	 * size of the actual init handler in mca_asm.S.
+	 */
+	ia64_mc_info.imi_monarch_init_handler		= ia64_tpa(init_hldlr_ptr_monarch->fp);
+	ia64_mc_info.imi_monarch_init_handler_size	= 0;
+	ia64_mc_info.imi_slave_init_handler		= ia64_tpa(init_hldlr_ptr_slave->fp);
+	ia64_mc_info.imi_slave_init_handler_size	= 0;
+
+	IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __func__,
+		       ia64_mc_info.imi_monarch_init_handler);
+
+	/* Register the os init handler with SAL */
+	if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_INIT,
+				       ia64_mc_info.imi_monarch_init_handler,
+				       ia64_tpa(ia64_getreg(_IA64_REG_GP)),
+				       ia64_mc_info.imi_monarch_init_handler_size,
+				       ia64_mc_info.imi_slave_init_handler,
+				       ia64_tpa(ia64_getreg(_IA64_REG_GP)),
+				       ia64_mc_info.imi_slave_init_handler_size)))
+	{
+		printk(KERN_ERR "Failed to register m/s INIT handlers with SAL "
+		       "(status %ld)\n", rc);
+		return;
+	}
+	if (register_die_notifier(&default_init_monarch_nb)) {
+		printk(KERN_ERR "Failed to register default monarch INIT process\n");
+		return;
+	}
+
+	IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __func__);
+
+	/* Initialize the areas set aside by the OS to buffer the
+	 * platform/processor error states for MCA/INIT/CMC
+	 * handling.
+	 */
+	ia64_log_init(SAL_INFO_TYPE_MCA);
+	ia64_log_init(SAL_INFO_TYPE_INIT);
+	ia64_log_init(SAL_INFO_TYPE_CMC);
+	ia64_log_init(SAL_INFO_TYPE_CPE);
+
+	mca_init = 1;
+	printk(KERN_INFO "MCA related initialization done\n");
+}
+
+/*
+ * ia64_mca_late_init
+ *
+ *	Opportunity to setup things that require initialization later
+ *	than ia64_mca_init.  Setup a timer to poll for CPEs if the
+ *	platform doesn't support an interrupt driven mechanism.
+ *
+ *  Inputs  :   None
+ *  Outputs :   Status
+ */
+static int __init
+ia64_mca_late_init(void)
+{
+	if (!mca_init)
+		return 0;
+
+	/*
+	 *  Configure the CMCI/P vector and handler. Interrupts for CMC are
+	 *  per-processor, so AP CMC interrupts are setup in smp_callin() (smpboot.c).
+	 */
+	register_percpu_irq(IA64_CMC_VECTOR, &cmci_irqaction);
+	register_percpu_irq(IA64_CMCP_VECTOR, &cmcp_irqaction);
+	ia64_mca_cmc_vector_setup();       /* Setup vector on BSP */
+
+	/* Setup the MCA rendezvous interrupt vector */
+	register_percpu_irq(IA64_MCA_RENDEZ_VECTOR, &mca_rdzv_irqaction);
+
+	/* Setup the MCA wakeup interrupt vector */
+	register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, &mca_wkup_irqaction);
+
+#ifdef CONFIG_ACPI
+	/* Setup the CPEI/P handler */
+	register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction);
+#endif
+
+	register_hotcpu_notifier(&mca_cpu_notifier);
+
+	/* Setup the CMCI/P vector and handler */
+	init_timer(&cmc_poll_timer);
+	cmc_poll_timer.function = ia64_mca_cmc_poll;
+
+	/* Unmask/enable the vector */
+	cmc_polling_enabled = 0;
+	schedule_work(&cmc_enable_work);
+
+	IA64_MCA_DEBUG("%s: CMCI/P setup and enabled.\n", __func__);
+
+#ifdef CONFIG_ACPI
+	/* Setup the CPEI/P vector and handler */
+	cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI);
+	init_timer(&cpe_poll_timer);
+	cpe_poll_timer.function = ia64_mca_cpe_poll;
+
+	{
+		unsigned int irq;
+
+		if (cpe_vector >= 0) {
+			/* If platform supports CPEI, enable the irq. */
+			irq = local_vector_to_irq(cpe_vector);
+			if (irq > 0) {
+				cpe_poll_enabled = 0;
+				irq_set_status_flags(irq, IRQ_PER_CPU);
+				setup_irq(irq, &mca_cpe_irqaction);
+				ia64_cpe_irq = irq;
+				ia64_mca_register_cpev(cpe_vector);
+				IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n",
+					__func__);
+				return 0;
+			}
+			printk(KERN_ERR "%s: Failed to find irq for CPE "
+					"interrupt handler, vector %d\n",
+					__func__, cpe_vector);
+		}
+		/* If platform doesn't support CPEI, get the timer going. */
+		if (cpe_poll_enabled) {
+			ia64_mca_cpe_poll(0UL);
+			IA64_MCA_DEBUG("%s: CPEP setup and enabled.\n", __func__);
+		}
+	}
+#endif
+
+	return 0;
+}
+
+device_initcall(ia64_mca_late_init);
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
new file mode 100644
index 00000000..d5bdf9de
--- /dev/null
+++ b/arch/ia64/kernel/mca_asm.S
@@ -0,0 +1,1122 @@
+/*
+ * File:	mca_asm.S
+ * Purpose:	assembly portion of the IA64 MCA handling
+ *
+ * Mods by cfleck to integrate into kernel build
+ *
+ * 2000-03-15 David Mosberger-Tang <davidm@hpl.hp.com>
+ *		Added various stop bits to get a clean compile
+ *
+ * 2000-03-29 Chuck Fleckenstein <cfleck@co.intel.com>
+ *		Added code to save INIT handoff state in pt_regs format,
+ *		switch to temp kstack, switch modes, jump to C INIT handler
+ *
+ * 2002-01-04 J.Hall <jenna.s.hall@intel.com>
+ *		Before entering virtual mode code:
+ *		 1. Check for TLB CPU error
+ *		 2. Restore current thread pointer to kr6
+ *		 3. Move stack ptr 16 bytes to conform to C calling convention
+ *
+ * 2004-11-12 Russ Anderson <rja@sgi.com>
+ *		Added per cpu MCA/INIT stack save areas.
+ *
+ * 2005-12-08 Keith Owens <kaos@sgi.com>
+ *		Use per cpu MCA/INIT stacks for all data.
+ */
+#include <linux/threads.h>
+
+#include <asm/asmmacro.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/mca_asm.h>
+#include <asm/mca.h>
+
+#include "entry.h"
+
+#define GET_IA64_MCA_DATA(reg)						\
+	GET_THIS_PADDR(reg, ia64_mca_data)				\
+	;;								\
+	ld8 reg=[reg]
+
+	.global ia64_do_tlb_purge
+	.global ia64_os_mca_dispatch
+	.global ia64_os_init_on_kdump
+	.global ia64_os_init_dispatch_monarch
+	.global ia64_os_init_dispatch_slave
+
+	.text
+	.align 16
+
+//StartMain////////////////////////////////////////////////////////////////////
+
+/*
+ * Just the TLB purge part is moved to a separate function
+ * so we can re-use the code for cpu hotplug code as well
+ * Caller should now setup b1, so we can branch once the
+ * tlb flush is complete.
+ */
+
+ia64_do_tlb_purge:
+#define O(member)	IA64_CPUINFO_##member##_OFFSET
+
+	GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2
+	;;
+	addl r17=O(PTCE_STRIDE),r2
+	addl r2=O(PTCE_BASE),r2
+	;;
+	ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));;	// r18=ptce_base
+	ld4 r19=[r2],4					// r19=ptce_count[0]
+	ld4 r21=[r17],4					// r21=ptce_stride[0]
+	;;
+	ld4 r20=[r2]					// r20=ptce_count[1]
+	ld4 r22=[r17]					// r22=ptce_stride[1]
+	mov r24=0
+	;;
+	adds r20=-1,r20
+	;;
+#undef O
+
+2:
+	cmp.ltu p6,p7=r24,r19
+(p7)	br.cond.dpnt.few 4f
+	mov ar.lc=r20
+3:
+	ptc.e r18
+	;;
+	add r18=r22,r18
+	br.cloop.sptk.few 3b
+	;;
+	add r18=r21,r18
+	add r24=1,r24
+	;;
+	br.sptk.few 2b
+4:
+	srlz.i 			// srlz.i implies srlz.d
+	;;
+
+        // Now purge addresses formerly mapped by TR registers
+	// 1. Purge ITR&DTR for kernel.
+	movl r16=KERNEL_START
+	mov r18=KERNEL_TR_PAGE_SHIFT<<2
+	;;
+	ptr.i r16, r18
+	ptr.d r16, r18
+	;;
+	srlz.i
+	;;
+	srlz.d
+	;;
+	// 3. Purge ITR for PAL code.
+	GET_THIS_PADDR(r2, ia64_mca_pal_base)
+	;;
+	ld8 r16=[r2]
+	mov r18=IA64_GRANULE_SHIFT<<2
+	;;
+	ptr.i r16,r18
+	;;
+	srlz.i
+	;;
+	// 4. Purge DTR for stack.
+	mov r16=IA64_KR(CURRENT_STACK)
+	;;
+	shl r16=r16,IA64_GRANULE_SHIFT
+	movl r19=PAGE_OFFSET
+	;;
+	add r16=r19,r16
+	mov r18=IA64_GRANULE_SHIFT<<2
+	;;
+	ptr.d r16,r18
+	;;
+	srlz.i
+	;;
+	// Now branch away to caller.
+	br.sptk.many b1
+	;;
+
+//EndMain//////////////////////////////////////////////////////////////////////
+
+//StartMain////////////////////////////////////////////////////////////////////
+
+ia64_os_mca_dispatch:
+	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	mov r19=1				// All MCA events are treated as monarch (for now)
+	br.sptk ia64_state_save			// save the state that is not in minstate
+1:
+
+	GET_IA64_MCA_DATA(r2)
+	// Using MCA stack, struct ia64_sal_os_state, variable proc_state_param
+	;;
+	add r3=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET+SOS(PROC_STATE_PARAM), r2
+	;;
+	ld8 r18=[r3]				// Get processor state parameter on existing PALE_CHECK.
+	;;
+	tbit.nz p6,p7=r18,60
+(p7)	br.spnt done_tlb_purge_and_reload
+
+	// The following code purges TC and TR entries. Then reload all TC entries.
+	// Purge percpu data TC entries.
+begin_tlb_purge_and_reload:
+	movl r18=ia64_reload_tr;;
+	LOAD_PHYSICAL(p0,r18,ia64_reload_tr);;
+	mov b1=r18;;
+	br.sptk.many ia64_do_tlb_purge;;
+
+ia64_reload_tr:
+	// Finally reload the TR registers.
+	// 1. Reload DTR/ITR registers for kernel.
+	mov r18=KERNEL_TR_PAGE_SHIFT<<2
+	movl r17=KERNEL_START
+	;;
+	mov cr.itir=r18
+	mov cr.ifa=r17
+        mov r16=IA64_TR_KERNEL
+	mov r19=ip
+	movl r18=PAGE_KERNEL
+	;;
+        dep r17=0,r19,0, KERNEL_TR_PAGE_SHIFT
+	;;
+	or r18=r17,r18
+	;;
+        itr.i itr[r16]=r18
+	;;
+        itr.d dtr[r16]=r18
+        ;;
+	srlz.i
+	srlz.d
+	;;
+	// 3. Reload ITR for PAL code.
+	GET_THIS_PADDR(r2, ia64_mca_pal_pte)
+	;;
+	ld8 r18=[r2]			// load PAL PTE
+	;;
+	GET_THIS_PADDR(r2, ia64_mca_pal_base)
+	;;
+	ld8 r16=[r2]			// load PAL vaddr
+	mov r19=IA64_GRANULE_SHIFT<<2
+	;;
+	mov cr.itir=r19
+	mov cr.ifa=r16
+	mov r20=IA64_TR_PALCODE
+	;;
+	itr.i itr[r20]=r18
+	;;
+	srlz.i
+	;;
+	// 4. Reload DTR for stack.
+	mov r16=IA64_KR(CURRENT_STACK)
+	;;
+	shl r16=r16,IA64_GRANULE_SHIFT
+	movl r19=PAGE_OFFSET
+	;;
+	add r18=r19,r16
+	movl r20=PAGE_KERNEL
+	;;
+	add r16=r20,r16
+	mov r19=IA64_GRANULE_SHIFT<<2
+	;;
+	mov cr.itir=r19
+	mov cr.ifa=r18
+	mov r20=IA64_TR_CURRENT_STACK
+	;;
+	itr.d dtr[r20]=r16
+	GET_THIS_PADDR(r2, ia64_mca_tr_reload)
+	mov r18 = 1
+	;;
+	srlz.d
+	;;
+	st8 [r2] =r18
+	;;
+
+done_tlb_purge_and_reload:
+
+	// switch to per cpu MCA stack
+	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_new_stack
+1:
+
+	// everything saved, now we can set the kernel registers
+	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_set_kernel_registers
+1:
+
+	// This must be done in physical mode
+	GET_IA64_MCA_DATA(r2)
+	;;
+	mov r7=r2
+
+        // Enter virtual mode from physical mode
+	VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4)
+
+	// This code returns to SAL via SOS r2, in general SAL has no unwind
+	// data.  To get a clean termination when backtracing the C MCA/INIT
+	// handler, set a dummy return address of 0 in this routine.  That
+	// requires that ia64_os_mca_virtual_begin be a global function.
+ENTRY(ia64_os_mca_virtual_begin)
+	.prologue
+	.save rp,r0
+	.body
+
+	mov ar.rsc=3				// set eager mode for C handler
+	mov r2=r7				// see GET_IA64_MCA_DATA above
+	;;
+
+	// Call virtual mode handler
+	alloc r14=ar.pfs,0,0,3,0
+	;;
+	DATA_PA_TO_VA(r2,r7)
+	;;
+	add out0=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_PT_REGS_OFFSET, r2
+	add out1=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SWITCH_STACK_OFFSET, r2
+	add out2=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET, r2
+	br.call.sptk.many    b0=ia64_mca_handler
+
+	// Revert back to physical mode before going back to SAL
+	PHYSICAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_end, r4)
+ia64_os_mca_virtual_end:
+
+END(ia64_os_mca_virtual_begin)
+
+	// switch back to previous stack
+	alloc r14=ar.pfs,0,0,0,0		// remove the MCA handler frame
+	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_old_stack
+1:
+
+	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_state_restore		// restore the SAL state
+1:
+
+	mov		b0=r12			// SAL_CHECK return address
+
+	br		b0
+
+//EndMain//////////////////////////////////////////////////////////////////////
+
+//StartMain////////////////////////////////////////////////////////////////////
+
+//
+// NOP init handler for kdump.  In panic situation, we may receive INIT
+// while kernel transition.  Since we initialize registers on leave from
+// current kernel, no longer monarch/slave handlers of current kernel in
+// virtual mode are called safely.
+// We can unregister these init handlers from SAL, however then the INIT
+// will result in warmboot by SAL and we cannot retrieve the crashdump.
+// Therefore register this NOP function to SAL, to prevent entering virtual
+// mode and resulting warmboot by SAL.
+//
+ia64_os_init_on_kdump:
+	mov		r8=r0		// IA64_INIT_RESUME
+	mov             r9=r10		// SAL_GP
+	mov		r22=r17		// *minstate
+	;;
+	mov		r10=r0		// return to same context
+	mov		b0=r12		// SAL_CHECK return address
+	br		b0
+
+//
+// SAL to OS entry point for INIT on all processors.  This has been defined for
+// registration purposes with SAL as a part of ia64_mca_init.  Monarch and
+// slave INIT have identical processing, except for the value of the
+// sos->monarch flag in r19.
+//
+
+ia64_os_init_dispatch_monarch:
+	mov r19=1				// Bow, bow, ye lower middle classes!
+	br.sptk ia64_os_init_dispatch
+
+ia64_os_init_dispatch_slave:
+	mov r19=0				// <igor>yeth, mathter</igor>
+
+ia64_os_init_dispatch:
+
+	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_state_save			// save the state that is not in minstate
+1:
+
+	// switch to per cpu INIT stack
+	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_new_stack
+1:
+
+	// everything saved, now we can set the kernel registers
+	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_set_kernel_registers
+1:
+
+	// This must be done in physical mode
+	GET_IA64_MCA_DATA(r2)
+	;;
+	mov r7=r2
+
+        // Enter virtual mode from physical mode
+	VIRTUAL_MODE_ENTER(r2, r3, ia64_os_init_virtual_begin, r4)
+
+	// This code returns to SAL via SOS r2, in general SAL has no unwind
+	// data.  To get a clean termination when backtracing the C MCA/INIT
+	// handler, set a dummy return address of 0 in this routine.  That
+	// requires that ia64_os_init_virtual_begin be a global function.
+ENTRY(ia64_os_init_virtual_begin)
+	.prologue
+	.save rp,r0
+	.body
+
+	mov ar.rsc=3				// set eager mode for C handler
+	mov r2=r7				// see GET_IA64_MCA_DATA above
+	;;
+
+	// Call virtual mode handler
+	alloc r14=ar.pfs,0,0,3,0
+	;;
+	DATA_PA_TO_VA(r2,r7)
+	;;
+	add out0=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_PT_REGS_OFFSET, r2
+	add out1=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_SWITCH_STACK_OFFSET, r2
+	add out2=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_SOS_OFFSET, r2
+	br.call.sptk.many    b0=ia64_init_handler
+
+	// Revert back to physical mode before going back to SAL
+	PHYSICAL_MODE_ENTER(r2, r3, ia64_os_init_virtual_end, r4)
+ia64_os_init_virtual_end:
+
+END(ia64_os_init_virtual_begin)
+
+	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_state_restore		// restore the SAL state
+1:
+
+	// switch back to previous stack
+	alloc r14=ar.pfs,0,0,0,0		// remove the INIT handler frame
+	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
+	LOAD_PHYSICAL(p0,r2,1f)			// return address
+	br.sptk ia64_old_stack
+1:
+
+	mov		b0=r12			// SAL_CHECK return address
+	br		b0
+
+//EndMain//////////////////////////////////////////////////////////////////////
+
+// common defines for the stubs
+#define	ms		r4
+#define	regs		r5
+#define	temp1		r2	/* careful, it overlaps with input registers */
+#define	temp2		r3	/* careful, it overlaps with input registers */
+#define	temp3		r7
+#define	temp4		r14
+
+
+//++
+// Name:
+//	ia64_state_save()
+//
+// Stub Description:
+//
+//	Save the state that is not in minstate.  This is sensitive to the layout of
+//	struct ia64_sal_os_state in mca.h.
+//
+//	r2 contains the return address, r3 contains either
+//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
+//
+//	The OS to SAL section of struct ia64_sal_os_state is set to a default
+//	value of cold boot (MCA) or warm boot (INIT) and return to the same
+//	context.  ia64_sal_os_state is also used to hold some registers that
+//	need to be saved and restored across the stack switches.
+//
+//	Most input registers to this stub come from PAL/SAL
+//	r1  os gp, physical
+//	r8  pal_proc entry point
+//	r9  sal_proc entry point
+//	r10 sal gp
+//	r11 MCA - rendevzous state, INIT - reason code
+//	r12 sal return address
+//	r17 pal min_state
+//	r18 processor state parameter
+//	r19 monarch flag, set by the caller of this routine
+//
+//	In addition to the SAL to OS state, this routine saves all the
+//	registers that appear in struct pt_regs and struct switch_stack,
+//	excluding those that are already in the PAL minstate area.  This
+//	results in a partial pt_regs and switch_stack, the C code copies the
+//	remaining registers from PAL minstate to pt_regs and switch_stack.  The
+//	resulting structures contain all the state of the original process when
+//	MCA/INIT occurred.
+//
+//--
+
+ia64_state_save:
+	add regs=MCA_SOS_OFFSET, r3
+	add ms=MCA_SOS_OFFSET+8, r3
+	mov b0=r2		// save return address
+	cmp.eq p1,p2=IA64_MCA_CPU_MCA_STACK_OFFSET, r3
+	;;
+	GET_IA64_MCA_DATA(temp2)
+	;;
+	add temp1=temp2, regs	// struct ia64_sal_os_state on MCA or INIT stack
+	add temp2=temp2, ms	// struct ia64_sal_os_state+8 on MCA or INIT stack
+	;;
+	mov regs=temp1		// save the start of sos
+	st8 [temp1]=r1,16	// os_gp
+	st8 [temp2]=r8,16	// pal_proc
+	;;
+	st8 [temp1]=r9,16	// sal_proc
+	st8 [temp2]=r11,16	// rv_rc
+	mov r11=cr.iipa
+	;;
+	st8 [temp1]=r18		// proc_state_param
+	st8 [temp2]=r19		// monarch
+	mov r6=IA64_KR(CURRENT)
+	add temp1=SOS(SAL_RA), regs
+	add temp2=SOS(SAL_GP), regs
+	;;
+	st8 [temp1]=r12,16	// sal_ra
+	st8 [temp2]=r10,16	// sal_gp
+	mov r12=cr.isr
+	;;
+	st8 [temp1]=r17,16	// pal_min_state
+	st8 [temp2]=r6,16	// prev_IA64_KR_CURRENT
+	mov r6=IA64_KR(CURRENT_STACK)
+	;;
+	st8 [temp1]=r6,16	// prev_IA64_KR_CURRENT_STACK
+	st8 [temp2]=r0,16	// prev_task, starts off as NULL
+	mov r6=cr.ifa
+	;;
+	st8 [temp1]=r12,16	// cr.isr
+	st8 [temp2]=r6,16	// cr.ifa
+	mov r12=cr.itir
+	;;
+	st8 [temp1]=r12,16	// cr.itir
+	st8 [temp2]=r11,16	// cr.iipa
+	mov r12=cr.iim
+	;;
+	st8 [temp1]=r12		// cr.iim
+(p1)	mov r12=IA64_MCA_COLD_BOOT
+(p2)	mov r12=IA64_INIT_WARM_BOOT
+	mov r6=cr.iha
+	add temp1=SOS(OS_STATUS), regs
+	;;
+	st8 [temp2]=r6		// cr.iha
+	add temp2=SOS(CONTEXT), regs
+	st8 [temp1]=r12		// os_status, default is cold boot
+	mov r6=IA64_MCA_SAME_CONTEXT
+	;;
+	st8 [temp2]=r6		// context, default is same context
+
+	// Save the pt_regs data that is not in minstate.  The previous code
+	// left regs at sos.
+	add regs=MCA_PT_REGS_OFFSET-MCA_SOS_OFFSET, regs
+	;;
+	add temp1=PT(B6), regs
+	mov temp3=b6
+	mov temp4=b7
+	add temp2=PT(B7), regs
+	;;
+	st8 [temp1]=temp3,PT(AR_CSD)-PT(B6)		// save b6
+	st8 [temp2]=temp4,PT(AR_SSD)-PT(B7)		// save b7
+	mov temp3=ar.csd
+	mov temp4=ar.ssd
+	cover						// must be last in group
+	;;
+	st8 [temp1]=temp3,PT(AR_UNAT)-PT(AR_CSD)	// save ar.csd
+	st8 [temp2]=temp4,PT(AR_PFS)-PT(AR_SSD)		// save ar.ssd
+	mov temp3=ar.unat
+	mov temp4=ar.pfs
+	;;
+	st8 [temp1]=temp3,PT(AR_RNAT)-PT(AR_UNAT)	// save ar.unat
+	st8 [temp2]=temp4,PT(AR_BSPSTORE)-PT(AR_PFS)	// save ar.pfs
+	mov temp3=ar.rnat
+	mov temp4=ar.bspstore
+	;;
+	st8 [temp1]=temp3,PT(LOADRS)-PT(AR_RNAT)	// save ar.rnat
+	st8 [temp2]=temp4,PT(AR_FPSR)-PT(AR_BSPSTORE)	// save ar.bspstore
+	mov temp3=ar.bsp
+	;;
+	sub temp3=temp3, temp4	// ar.bsp - ar.bspstore
+	mov temp4=ar.fpsr
+	;;
+	shl temp3=temp3,16	// compute ar.rsc to be used for "loadrs"
+	;;
+	st8 [temp1]=temp3,PT(AR_CCV)-PT(LOADRS)		// save loadrs
+	st8 [temp2]=temp4,PT(F6)-PT(AR_FPSR)		// save ar.fpsr
+	mov temp3=ar.ccv
+	;;
+	st8 [temp1]=temp3,PT(F7)-PT(AR_CCV)		// save ar.ccv
+	stf.spill [temp2]=f6,PT(F8)-PT(F6)
+	;;
+	stf.spill [temp1]=f7,PT(F9)-PT(F7)
+	stf.spill [temp2]=f8,PT(F10)-PT(F8)
+	;;
+	stf.spill [temp1]=f9,PT(F11)-PT(F9)
+	stf.spill [temp2]=f10
+	;;
+	stf.spill [temp1]=f11
+
+	// Save the switch_stack data that is not in minstate nor pt_regs.  The
+	// previous code left regs at pt_regs.
+	add regs=MCA_SWITCH_STACK_OFFSET-MCA_PT_REGS_OFFSET, regs
+	;;
+	add temp1=SW(F2), regs
+	add temp2=SW(F3), regs
+	;;
+	stf.spill [temp1]=f2,32
+	stf.spill [temp2]=f3,32
+	;;
+	stf.spill [temp1]=f4,32
+	stf.spill [temp2]=f5,32
+	;;
+	stf.spill [temp1]=f12,32
+	stf.spill [temp2]=f13,32
+	;;
+	stf.spill [temp1]=f14,32
+	stf.spill [temp2]=f15,32
+	;;
+	stf.spill [temp1]=f16,32
+	stf.spill [temp2]=f17,32
+	;;
+	stf.spill [temp1]=f18,32
+	stf.spill [temp2]=f19,32
+	;;
+	stf.spill [temp1]=f20,32
+	stf.spill [temp2]=f21,32
+	;;
+	stf.spill [temp1]=f22,32
+	stf.spill [temp2]=f23,32
+	;;
+	stf.spill [temp1]=f24,32
+	stf.spill [temp2]=f25,32
+	;;
+	stf.spill [temp1]=f26,32
+	stf.spill [temp2]=f27,32
+	;;
+	stf.spill [temp1]=f28,32
+	stf.spill [temp2]=f29,32
+	;;
+	stf.spill [temp1]=f30,SW(B2)-SW(F30)
+	stf.spill [temp2]=f31,SW(B3)-SW(F31)
+	mov temp3=b2
+	mov temp4=b3
+	;;
+	st8 [temp1]=temp3,16	// save b2
+	st8 [temp2]=temp4,16	// save b3
+	mov temp3=b4
+	mov temp4=b5
+	;;
+	st8 [temp1]=temp3,SW(AR_LC)-SW(B4)	// save b4
+	st8 [temp2]=temp4	// save b5
+	mov temp3=ar.lc
+	;;
+	st8 [temp1]=temp3	// save ar.lc
+
+	// FIXME: Some proms are incorrectly accessing the minstate area as
+	// cached data.  The C code uses region 6, uncached virtual.  Ensure
+	// that there is no cache data lying around for the first 1K of the
+	// minstate area.
+	// Remove this code in September 2006, that gives platforms a year to
+	// fix their proms and get their customers updated.
+
+	add r1=32*1,r17
+	add r2=32*2,r17
+	add r3=32*3,r17
+	add r4=32*4,r17
+	add r5=32*5,r17
+	add r6=32*6,r17
+	add r7=32*7,r17
+	;;
+	fc r17
+	fc r1
+	fc r2
+	fc r3
+	fc r4
+	fc r5
+	fc r6
+	fc r7
+	add r17=32*8,r17
+	add r1=32*8,r1
+	add r2=32*8,r2
+	add r3=32*8,r3
+	add r4=32*8,r4
+	add r5=32*8,r5
+	add r6=32*8,r6
+	add r7=32*8,r7
+	;;
+	fc r17
+	fc r1
+	fc r2
+	fc r3
+	fc r4
+	fc r5
+	fc r6
+	fc r7
+	add r17=32*8,r17
+	add r1=32*8,r1
+	add r2=32*8,r2
+	add r3=32*8,r3
+	add r4=32*8,r4
+	add r5=32*8,r5
+	add r6=32*8,r6
+	add r7=32*8,r7
+	;;
+	fc r17
+	fc r1
+	fc r2
+	fc r3
+	fc r4
+	fc r5
+	fc r6
+	fc r7
+	add r17=32*8,r17
+	add r1=32*8,r1
+	add r2=32*8,r2
+	add r3=32*8,r3
+	add r4=32*8,r4
+	add r5=32*8,r5
+	add r6=32*8,r6
+	add r7=32*8,r7
+	;;
+	fc r17
+	fc r1
+	fc r2
+	fc r3
+	fc r4
+	fc r5
+	fc r6
+	fc r7
+
+	br.sptk b0
+
+//EndStub//////////////////////////////////////////////////////////////////////
+
+
+//++
+// Name:
+//	ia64_state_restore()
+//
+// Stub Description:
+//
+//	Restore the SAL/OS state.  This is sensitive to the layout of struct
+//	ia64_sal_os_state in mca.h.
+//
+//	r2 contains the return address, r3 contains either
+//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
+//
+//	In addition to the SAL to OS state, this routine restores all the
+//	registers that appear in struct pt_regs and struct switch_stack,
+//	excluding those in the PAL minstate area.
+//
+//--
+
+ia64_state_restore:
+	// Restore the switch_stack data that is not in minstate nor pt_regs.
+	add regs=MCA_SWITCH_STACK_OFFSET, r3
+	mov b0=r2		// save return address
+	;;
+	GET_IA64_MCA_DATA(temp2)
+	;;
+	add regs=temp2, regs
+	;;
+	add temp1=SW(F2), regs
+	add temp2=SW(F3), regs
+	;;
+	ldf.fill f2=[temp1],32
+	ldf.fill f3=[temp2],32
+	;;
+	ldf.fill f4=[temp1],32
+	ldf.fill f5=[temp2],32
+	;;
+	ldf.fill f12=[temp1],32
+	ldf.fill f13=[temp2],32
+	;;
+	ldf.fill f14=[temp1],32
+	ldf.fill f15=[temp2],32
+	;;
+	ldf.fill f16=[temp1],32
+	ldf.fill f17=[temp2],32
+	;;
+	ldf.fill f18=[temp1],32
+	ldf.fill f19=[temp2],32
+	;;
+	ldf.fill f20=[temp1],32
+	ldf.fill f21=[temp2],32
+	;;
+	ldf.fill f22=[temp1],32
+	ldf.fill f23=[temp2],32
+	;;
+	ldf.fill f24=[temp1],32
+	ldf.fill f25=[temp2],32
+	;;
+	ldf.fill f26=[temp1],32
+	ldf.fill f27=[temp2],32
+	;;
+	ldf.fill f28=[temp1],32
+	ldf.fill f29=[temp2],32
+	;;
+	ldf.fill f30=[temp1],SW(B2)-SW(F30)
+	ldf.fill f31=[temp2],SW(B3)-SW(F31)
+	;;
+	ld8 temp3=[temp1],16	// restore b2
+	ld8 temp4=[temp2],16	// restore b3
+	;;
+	mov b2=temp3
+	mov b3=temp4
+	ld8 temp3=[temp1],SW(AR_LC)-SW(B4)	// restore b4
+	ld8 temp4=[temp2]	// restore b5
+	;;
+	mov b4=temp3
+	mov b5=temp4
+	ld8 temp3=[temp1]	// restore ar.lc
+	;;
+	mov ar.lc=temp3
+
+	// Restore the pt_regs data that is not in minstate.  The previous code
+	// left regs at switch_stack.
+	add regs=MCA_PT_REGS_OFFSET-MCA_SWITCH_STACK_OFFSET, regs
+	;;
+	add temp1=PT(B6), regs
+	add temp2=PT(B7), regs
+	;;
+	ld8 temp3=[temp1],PT(AR_CSD)-PT(B6)		// restore b6
+	ld8 temp4=[temp2],PT(AR_SSD)-PT(B7)		// restore b7
+	;;
+	mov b6=temp3
+	mov b7=temp4
+	ld8 temp3=[temp1],PT(AR_UNAT)-PT(AR_CSD)	// restore ar.csd
+	ld8 temp4=[temp2],PT(AR_PFS)-PT(AR_SSD)		// restore ar.ssd
+	;;
+	mov ar.csd=temp3
+	mov ar.ssd=temp4
+	ld8 temp3=[temp1]				// restore ar.unat
+	add temp1=PT(AR_CCV)-PT(AR_UNAT), temp1
+	ld8 temp4=[temp2],PT(AR_FPSR)-PT(AR_PFS)	// restore ar.pfs
+	;;
+	mov ar.unat=temp3
+	mov ar.pfs=temp4
+	// ar.rnat, ar.bspstore, loadrs are restore in ia64_old_stack.
+	ld8 temp3=[temp1],PT(F6)-PT(AR_CCV)		// restore ar.ccv
+	ld8 temp4=[temp2],PT(F7)-PT(AR_FPSR)		// restore ar.fpsr
+	;;
+	mov ar.ccv=temp3
+	mov ar.fpsr=temp4
+	ldf.fill f6=[temp1],PT(F8)-PT(F6)
+	ldf.fill f7=[temp2],PT(F9)-PT(F7)
+	;;
+	ldf.fill f8=[temp1],PT(F10)-PT(F8)
+	ldf.fill f9=[temp2],PT(F11)-PT(F9)
+	;;
+	ldf.fill f10=[temp1]
+	ldf.fill f11=[temp2]
+
+	// Restore the SAL to OS state. The previous code left regs at pt_regs.
+	add regs=MCA_SOS_OFFSET-MCA_PT_REGS_OFFSET, regs
+	;;
+	add temp1=SOS(SAL_RA), regs
+	add temp2=SOS(SAL_GP), regs
+	;;
+	ld8 r12=[temp1],16	// sal_ra
+	ld8 r9=[temp2],16	// sal_gp
+	;;
+	ld8 r22=[temp1],16	// pal_min_state, virtual
+	ld8 r13=[temp2],16	// prev_IA64_KR_CURRENT
+	;;
+	ld8 r16=[temp1],16	// prev_IA64_KR_CURRENT_STACK
+	ld8 r20=[temp2],16	// prev_task
+	;;
+	ld8 temp3=[temp1],16	// cr.isr
+	ld8 temp4=[temp2],16	// cr.ifa
+	;;
+	mov cr.isr=temp3
+	mov cr.ifa=temp4
+	ld8 temp3=[temp1],16	// cr.itir
+	ld8 temp4=[temp2],16	// cr.iipa
+	;;
+	mov cr.itir=temp3
+	mov cr.iipa=temp4
+	ld8 temp3=[temp1]	// cr.iim
+	ld8 temp4=[temp2]		// cr.iha
+	add temp1=SOS(OS_STATUS), regs
+	add temp2=SOS(CONTEXT), regs
+	;;
+	mov cr.iim=temp3
+	mov cr.iha=temp4
+	dep r22=0,r22,62,1	// pal_min_state, physical, uncached
+	mov IA64_KR(CURRENT)=r13
+	ld8 r8=[temp1]		// os_status
+	ld8 r10=[temp2]		// context
+
+	/* Wire IA64_TR_CURRENT_STACK to the stack that we are resuming to.  To
+	 * avoid any dependencies on the algorithm in ia64_switch_to(), just
+	 * purge any existing CURRENT_STACK mapping and insert the new one.
+	 *
+	 * r16 contains prev_IA64_KR_CURRENT_STACK, r13 contains
+	 * prev_IA64_KR_CURRENT, these values may have been changed by the C
+	 * code.  Do not use r8, r9, r10, r22, they contain values ready for
+	 * the return to SAL.
+	 */
+
+	mov r15=IA64_KR(CURRENT_STACK)		// physical granule mapped by IA64_TR_CURRENT_STACK
+	;;
+	shl r15=r15,IA64_GRANULE_SHIFT
+	;;
+	dep r15=-1,r15,61,3			// virtual granule
+	mov r18=IA64_GRANULE_SHIFT<<2		// for cr.itir.ps
+	;;
+	ptr.d r15,r18
+	;;
+	srlz.d
+
+	extr.u r19=r13,61,3			// r13 = prev_IA64_KR_CURRENT
+	shl r20=r16,IA64_GRANULE_SHIFT		// r16 = prev_IA64_KR_CURRENT_STACK
+	movl r21=PAGE_KERNEL			// page properties
+	;;
+	mov IA64_KR(CURRENT_STACK)=r16
+	cmp.ne p6,p0=RGN_KERNEL,r19		// new stack is in the kernel region?
+	or r21=r20,r21				// construct PA | page properties
+(p6)	br.spnt 1f				// the dreaded cpu 0 idle task in region 5:(
+	;;
+	mov cr.itir=r18
+	mov cr.ifa=r13
+	mov r20=IA64_TR_CURRENT_STACK
+	;;
+	itr.d dtr[r20]=r21
+	;;
+	srlz.d
+1:
+
+	br.sptk b0
+
+//EndStub//////////////////////////////////////////////////////////////////////
+
+
+//++
+// Name:
+//	ia64_new_stack()
+//
+// Stub Description:
+//
+//	Switch to the MCA/INIT stack.
+//
+//	r2 contains the return address, r3 contains either
+//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
+//
+//	On entry RBS is still on the original stack, this routine switches RBS
+//	to use the MCA/INIT stack.
+//
+//	On entry, sos->pal_min_state is physical, on exit it is virtual.
+//
+//--
+
+ia64_new_stack:
+	add regs=MCA_PT_REGS_OFFSET, r3
+	add temp2=MCA_SOS_OFFSET+SOS(PAL_MIN_STATE), r3
+	mov b0=r2			// save return address
+	GET_IA64_MCA_DATA(temp1)
+	invala
+	;;
+	add temp2=temp2, temp1		// struct ia64_sal_os_state.pal_min_state on MCA or INIT stack
+	add regs=regs, temp1		// struct pt_regs on MCA or INIT stack
+	;;
+	// Address of minstate area provided by PAL is physical, uncacheable.
+	// Convert to Linux virtual address in region 6 for C code.
+	ld8 ms=[temp2]			// pal_min_state, physical
+	;;
+	dep temp1=-1,ms,62,2		// set region 6
+	mov temp3=IA64_RBS_OFFSET-MCA_PT_REGS_OFFSET
+	;;
+	st8 [temp2]=temp1		// pal_min_state, virtual
+
+	add temp4=temp3, regs		// start of bspstore on new stack
+	;;
+	mov ar.bspstore=temp4		// switch RBS to MCA/INIT stack
+	;;
+	flushrs				// must be first in group
+	br.sptk b0
+
+//EndStub//////////////////////////////////////////////////////////////////////
+
+
+//++
+// Name:
+//	ia64_old_stack()
+//
+// Stub Description:
+//
+//	Switch to the old stack.
+//
+//	r2 contains the return address, r3 contains either
+//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
+//
+//	On entry, pal_min_state is virtual, on exit it is physical.
+//
+//	On entry RBS is on the MCA/INIT stack, this routine switches RBS
+//	back to the previous stack.
+//
+//	The psr is set to all zeroes.  SAL return requires either all zeroes or
+//	just psr.mc set.  Leaving psr.mc off allows INIT to be issued if this
+//	code does not perform correctly.
+//
+//	The dirty registers at the time of the event were flushed to the
+//	MCA/INIT stack in ia64_pt_regs_save().  Restore the dirty registers
+//	before reverting to the previous bspstore.
+//--
+
+ia64_old_stack:
+	add regs=MCA_PT_REGS_OFFSET, r3
+	mov b0=r2			// save return address
+	GET_IA64_MCA_DATA(temp2)
+	LOAD_PHYSICAL(p0,temp1,1f)
+	;;
+	mov cr.ipsr=r0
+	mov cr.ifs=r0
+	mov cr.iip=temp1
+	;;
+	invala
+	rfi
+1:
+
+	add regs=regs, temp2		// struct pt_regs on MCA or INIT stack
+	;;
+	add temp1=PT(LOADRS), regs
+	;;
+	ld8 temp2=[temp1],PT(AR_BSPSTORE)-PT(LOADRS)	// restore loadrs
+	;;
+	ld8 temp3=[temp1],PT(AR_RNAT)-PT(AR_BSPSTORE)	// restore ar.bspstore
+	mov ar.rsc=temp2
+	;;
+	loadrs
+	ld8 temp4=[temp1]		// restore ar.rnat
+	;;
+	mov ar.bspstore=temp3		// back to old stack
+	;;
+	mov ar.rnat=temp4
+	;;
+
+	br.sptk b0
+
+//EndStub//////////////////////////////////////////////////////////////////////
+
+
+//++
+// Name:
+//	ia64_set_kernel_registers()
+//
+// Stub Description:
+//
+//	Set the registers that are required by the C code in order to run on an
+//	MCA/INIT stack.
+//
+//	r2 contains the return address, r3 contains either
+//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
+//
+//--
+
+ia64_set_kernel_registers:
+	add temp3=MCA_SP_OFFSET, r3
+	mov b0=r2		// save return address
+	GET_IA64_MCA_DATA(temp1)
+	;;
+	add r12=temp1, temp3	// kernel stack pointer on MCA/INIT stack
+	add r13=temp1, r3	// set current to start of MCA/INIT stack
+	add r20=temp1, r3	// physical start of MCA/INIT stack
+	;;
+	DATA_PA_TO_VA(r12,temp2)
+	DATA_PA_TO_VA(r13,temp3)
+	;;
+	mov IA64_KR(CURRENT)=r13
+
+	/* Wire IA64_TR_CURRENT_STACK to the MCA/INIT handler stack.  To avoid
+	 * any dependencies on the algorithm in ia64_switch_to(), just purge
+	 * any existing CURRENT_STACK mapping and insert the new one.
+	 */
+
+	mov r16=IA64_KR(CURRENT_STACK)		// physical granule mapped by IA64_TR_CURRENT_STACK
+	;;
+	shl r16=r16,IA64_GRANULE_SHIFT
+	;;
+	dep r16=-1,r16,61,3			// virtual granule
+	mov r18=IA64_GRANULE_SHIFT<<2		// for cr.itir.ps
+	;;
+	ptr.d r16,r18
+	;;
+	srlz.d
+
+	shr.u r16=r20,IA64_GRANULE_SHIFT	// r20 = physical start of MCA/INIT stack
+	movl r21=PAGE_KERNEL			// page properties
+	;;
+	mov IA64_KR(CURRENT_STACK)=r16
+	or r21=r20,r21				// construct PA | page properties
+	;;
+	mov cr.itir=r18
+	mov cr.ifa=r13
+	mov r20=IA64_TR_CURRENT_STACK
+
+	movl r17=FPSR_DEFAULT
+	;;
+	mov.m ar.fpsr=r17			// set ar.fpsr to kernel default value
+	;;
+	itr.d dtr[r20]=r21
+	;;
+	srlz.d
+
+	br.sptk b0
+
+//EndStub//////////////////////////////////////////////////////////////////////
+
+#undef	ms
+#undef	regs
+#undef	temp1
+#undef	temp2
+#undef	temp3
+#undef	temp4
+
+
+// Support function for mca.c, it is here to avoid using inline asm.  Given the
+// address of an rnat slot, if that address is below the current ar.bspstore
+// then return the contents of that slot, otherwise return the contents of
+// ar.rnat.
+GLOBAL_ENTRY(ia64_get_rnat)
+	alloc r14=ar.pfs,1,0,0,0
+	mov ar.rsc=0
+	;;
+	mov r14=ar.bspstore
+	;;
+	cmp.lt p6,p7=in0,r14
+	;;
+(p6)	ld8 r8=[in0]
+(p7)	mov r8=ar.rnat
+	mov ar.rsc=3
+	br.ret.sptk.many rp
+END(ia64_get_rnat)
+
+
+// void ia64_set_psr_mc(void)
+//
+// Set psr.mc bit to mask MCA/INIT.
+GLOBAL_ENTRY(ia64_set_psr_mc)
+	rsm psr.i | psr.ic		// disable interrupts
+	;;
+	srlz.d
+	;;
+	mov r14 = psr			// get psr{36:35,31:0}
+	movl r15 = 1f
+	;;
+	dep r14 = -1, r14, PSR_MC, 1	// set psr.mc
+	;;
+	dep r14 = -1, r14, PSR_IC, 1	// set psr.ic
+	;;
+	dep r14 = -1, r14, PSR_BN, 1	// keep bank1 in use
+	;;
+	mov cr.ipsr = r14
+	mov cr.ifs = r0
+	mov cr.iip = r15
+	;;
+	rfi
+1:
+	br.ret.sptk.many rp
+END(ia64_set_psr_mc)
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
new file mode 100644
index 00000000..09b4d682
--- /dev/null
+++ b/arch/ia64/kernel/mca_drv.c
@@ -0,0 +1,795 @@
+/*
+ * File:	mca_drv.c
+ * Purpose:	Generic MCA handling layer
+ *
+ * Copyright (C) 2004 FUJITSU LIMITED
+ * Copyright (C) 2004 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+ * Copyright (C) 2005 Silicon Graphics, Inc
+ * Copyright (C) 2005 Keith Owens <kaos@sgi.com>
+ * Copyright (C) 2006 Russ Anderson <rja@sgi.com>
+ */
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kallsyms.h>
+#include <linux/bootmem.h>
+#include <linux/acpi.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/workqueue.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include <asm/delay.h>
+#include <asm/machvec.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/sal.h>
+#include <asm/mca.h>
+
+#include <asm/irq.h>
+#include <asm/hw_irq.h>
+
+#include "mca_drv.h"
+
+/* max size of SAL error record (default) */
+static int sal_rec_max = 10000;
+
+/* from mca_drv_asm.S */
+extern void *mca_handler_bhhook(void);
+
+static DEFINE_SPINLOCK(mca_bh_lock);
+
+typedef enum {
+	MCA_IS_LOCAL  = 0,
+	MCA_IS_GLOBAL = 1
+} mca_type_t;
+
+#define MAX_PAGE_ISOLATE 1024
+
+static struct page *page_isolate[MAX_PAGE_ISOLATE];
+static int num_page_isolate = 0;
+
+typedef enum {
+	ISOLATE_NG,
+	ISOLATE_OK,
+	ISOLATE_NONE
+} isolate_status_t;
+
+typedef enum {
+	MCA_NOT_RECOVERED = 0,
+	MCA_RECOVERED	  = 1
+} recovery_status_t;
+
+/*
+ *  This pool keeps pointers to the section part of SAL error record
+ */
+static struct {
+	slidx_list_t *buffer; /* section pointer list pool */
+	int	     cur_idx; /* Current index of section pointer list pool */
+	int	     max_idx; /* Maximum index of section pointer list pool */
+} slidx_pool;
+
+static int
+fatal_mca(const char *fmt, ...)
+{
+	va_list args;
+	char buf[256];
+
+	va_start(args, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, args);
+	va_end(args);
+	ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf);
+
+	return MCA_NOT_RECOVERED;
+}
+
+static int
+mca_recovered(const char *fmt, ...)
+{
+	va_list args;
+	char buf[256];
+
+	va_start(args, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, args);
+	va_end(args);
+	ia64_mca_printk(KERN_INFO "MCA: %s\n", buf);
+
+	return MCA_RECOVERED;
+}
+
+/**
+ * mca_page_isolate - isolate a poisoned page in order not to use it later
+ * @paddr:	poisoned memory location
+ *
+ * Return value:
+ *	one of isolate_status_t, ISOLATE_OK/NG/NONE.
+ */
+
+static isolate_status_t
+mca_page_isolate(unsigned long paddr)
+{
+	int i;
+	struct page *p;
+
+	/* whether physical address is valid or not */
+	if (!ia64_phys_addr_valid(paddr))
+		return ISOLATE_NONE;
+
+	if (!pfn_valid(paddr >> PAGE_SHIFT))
+		return ISOLATE_NONE;
+
+	/* convert physical address to physical page number */
+	p = pfn_to_page(paddr>>PAGE_SHIFT);
+
+	/* check whether a page number have been already registered or not */
+	for (i = 0; i < num_page_isolate; i++)
+		if (page_isolate[i] == p)
+			return ISOLATE_OK; /* already listed */
+
+	/* limitation check */
+	if (num_page_isolate == MAX_PAGE_ISOLATE)
+		return ISOLATE_NG;
+
+	/* kick pages having attribute 'SLAB' or 'Reserved' */
+	if (PageSlab(p) || PageReserved(p))
+		return ISOLATE_NG;
+
+	/* add attribute 'Reserved' and register the page */
+	get_page(p);
+	SetPageReserved(p);
+	page_isolate[num_page_isolate++] = p;
+
+	return ISOLATE_OK;
+}
+
+/**
+ * mca_hanlder_bh - Kill the process which occurred memory read error
+ * @paddr:	poisoned address received from MCA Handler
+ */
+
+void
+mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr)
+{
+	ia64_mlogbuf_dump();
+	printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, "
+		"iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n",
+	       raw_smp_processor_id(), current->pid, current_uid(),
+		iip, ipsr, paddr, current->comm);
+
+	spin_lock(&mca_bh_lock);
+	switch (mca_page_isolate(paddr)) {
+	case ISOLATE_OK:
+		printk(KERN_DEBUG "Page isolation: ( %lx ) success.\n", paddr);
+		break;
+	case ISOLATE_NG:
+		printk(KERN_CRIT "Page isolation: ( %lx ) failure.\n", paddr);
+		break;
+	default:
+		break;
+	}
+	spin_unlock(&mca_bh_lock);
+
+	/* This process is about to be killed itself */
+	do_exit(SIGKILL);
+}
+
+/**
+ * mca_make_peidx - Make index of processor error section
+ * @slpi:	pointer to record of processor error section
+ * @peidx:	pointer to index of processor error section
+ */
+
+static void
+mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx)
+{
+	/*
+	 * calculate the start address of
+	 *   "struct cpuid_info" and "sal_processor_static_info_t".
+	 */
+	u64 total_check_num = slpi->valid.num_cache_check
+				+ slpi->valid.num_tlb_check
+				+ slpi->valid.num_bus_check
+				+ slpi->valid.num_reg_file_check
+				+ slpi->valid.num_ms_check;
+	u64 head_size =	sizeof(sal_log_mod_error_info_t) * total_check_num
+			+ sizeof(sal_log_processor_info_t);
+	u64 mid_size  = slpi->valid.cpuid_info * sizeof(struct sal_cpuid_info);
+
+	peidx_head(peidx)   = slpi;
+	peidx_mid(peidx)    = (struct sal_cpuid_info *)
+		(slpi->valid.cpuid_info ? ((char*)slpi + head_size) : NULL);
+	peidx_bottom(peidx) = (sal_processor_static_info_t *)
+		(slpi->valid.psi_static_struct ?
+			((char*)slpi + head_size + mid_size) : NULL);
+}
+
+/**
+ * mca_make_slidx -  Make index of SAL error record
+ * @buffer:	pointer to SAL error record
+ * @slidx:	pointer to index of SAL error record
+ *
+ * Return value:
+ *	1 if record has platform error / 0 if not
+ */
+#define LOG_INDEX_ADD_SECT_PTR(sect, ptr) \
+	{slidx_list_t *hl = &slidx_pool.buffer[slidx_pool.cur_idx]; \
+	hl->hdr = ptr; \
+	list_add(&hl->list, &(sect)); \
+	slidx_pool.cur_idx = (slidx_pool.cur_idx + 1)%slidx_pool.max_idx; }
+
+static int
+mca_make_slidx(void *buffer, slidx_table_t *slidx)
+{
+	int platform_err = 0;
+	int record_len = ((sal_log_record_header_t*)buffer)->len;
+	u32 ercd_pos;
+	int sects;
+	sal_log_section_hdr_t *sp;
+
+	/*
+	 * Initialize index referring current record
+	 */
+	INIT_LIST_HEAD(&(slidx->proc_err));
+	INIT_LIST_HEAD(&(slidx->mem_dev_err));
+	INIT_LIST_HEAD(&(slidx->sel_dev_err));
+	INIT_LIST_HEAD(&(slidx->pci_bus_err));
+	INIT_LIST_HEAD(&(slidx->smbios_dev_err));
+	INIT_LIST_HEAD(&(slidx->pci_comp_err));
+	INIT_LIST_HEAD(&(slidx->plat_specific_err));
+	INIT_LIST_HEAD(&(slidx->host_ctlr_err));
+	INIT_LIST_HEAD(&(slidx->plat_bus_err));
+	INIT_LIST_HEAD(&(slidx->unsupported));
+
+	/*
+	 * Extract a Record Header
+	 */
+	slidx->header = buffer;
+
+	/*
+	 * Extract each section records
+	 * (arranged from "int ia64_log_platform_info_print()")
+	 */
+	for (ercd_pos = sizeof(sal_log_record_header_t), sects = 0;
+		ercd_pos < record_len; ercd_pos += sp->len, sects++) {
+		sp = (sal_log_section_hdr_t *)((char*)buffer + ercd_pos);
+		if (!efi_guidcmp(sp->guid, SAL_PROC_DEV_ERR_SECT_GUID)) {
+			LOG_INDEX_ADD_SECT_PTR(slidx->proc_err, sp);
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_MEM_DEV_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->mem_dev_err, sp);
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_SEL_DEV_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->sel_dev_err, sp);
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_PCI_BUS_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->pci_bus_err, sp);
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->smbios_dev_err, sp);
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_PCI_COMP_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->pci_comp_err, sp);
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_SPECIFIC_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->plat_specific_err, sp);
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_HOST_CTLR_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->host_ctlr_err, sp);
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_BUS_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->plat_bus_err, sp);
+		} else {
+			LOG_INDEX_ADD_SECT_PTR(slidx->unsupported, sp);
+		}
+	}
+	slidx->n_sections = sects;
+
+	return platform_err;
+}
+
+/**
+ * init_record_index_pools - Initialize pool of lists for SAL record index
+ *
+ * Return value:
+ *	0 on Success / -ENOMEM on Failure
+ */
+static int
+init_record_index_pools(void)
+{
+	int i;
+	int rec_max_size;  /* Maximum size of SAL error records */
+	int sect_min_size; /* Minimum size of SAL error sections */
+	/* minimum size table of each section */
+	static int sal_log_sect_min_sizes[] = {
+		sizeof(sal_log_processor_info_t)
+		+ sizeof(sal_processor_static_info_t),
+		sizeof(sal_log_mem_dev_err_info_t),
+		sizeof(sal_log_sel_dev_err_info_t),
+		sizeof(sal_log_pci_bus_err_info_t),
+		sizeof(sal_log_smbios_dev_err_info_t),
+		sizeof(sal_log_pci_comp_err_info_t),
+		sizeof(sal_log_plat_specific_err_info_t),
+		sizeof(sal_log_host_ctlr_err_info_t),
+		sizeof(sal_log_plat_bus_err_info_t),
+	};
+
+	/*
+	 * MCA handler cannot allocate new memory on flight,
+	 * so we preallocate enough memory to handle a SAL record.
+	 *
+	 * Initialize a handling set of slidx_pool:
+	 *   1. Pick up the max size of SAL error records
+	 *   2. Pick up the min size of SAL error sections
+	 *   3. Allocate the pool as enough to 2 SAL records
+	 *     (now we can estimate the maxinum of section in a record.)
+	 */
+
+	/* - 1 - */
+	rec_max_size = sal_rec_max;
+
+	/* - 2 - */
+	sect_min_size = sal_log_sect_min_sizes[0];
+	for (i = 1; i < sizeof sal_log_sect_min_sizes/sizeof(size_t); i++)
+		if (sect_min_size > sal_log_sect_min_sizes[i])
+			sect_min_size = sal_log_sect_min_sizes[i];
+
+	/* - 3 - */
+	slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1;
+	slidx_pool.buffer = (slidx_list_t *)
+		kmalloc(slidx_pool.max_idx * sizeof(slidx_list_t), GFP_KERNEL);
+
+	return slidx_pool.buffer ? 0 : -ENOMEM;
+}
+
+
+/*****************************************************************************
+ * Recovery functions                                                        *
+ *****************************************************************************/
+
+/**
+ * is_mca_global - Check whether this MCA is global or not
+ * @peidx:	pointer of index of processor error section
+ * @pbci:	pointer to pal_bus_check_info_t
+ * @sos:	pointer to hand off struct between SAL and OS
+ *
+ * Return value:
+ *	MCA_IS_LOCAL / MCA_IS_GLOBAL
+ */
+
+static mca_type_t
+is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci,
+	      struct ia64_sal_os_state *sos)
+{
+	pal_processor_state_info_t *psp =
+		(pal_processor_state_info_t*)peidx_psp(peidx);
+
+	/*
+	 * PAL can request a rendezvous, if the MCA has a global scope.
+	 * If "rz_always" flag is set, SAL requests MCA rendezvous
+	 * in spite of global MCA.
+	 * Therefore it is local MCA when rendezvous has not been requested.
+	 * Failed to rendezvous, the system must be down.
+	 */
+	switch (sos->rv_rc) {
+		case -1: /* SAL rendezvous unsuccessful */
+			return MCA_IS_GLOBAL;
+		case  0: /* SAL rendezvous not required */
+			return MCA_IS_LOCAL;
+		case  1: /* SAL rendezvous successful int */
+		case  2: /* SAL rendezvous successful int with init */
+		default:
+			break;
+	}
+
+	/*
+	 * If One or more Cache/TLB/Reg_File/Uarch_Check is here,
+	 * it would be a local MCA. (i.e. processor internal error)
+	 */
+	if (psp->tc || psp->cc || psp->rc || psp->uc)
+		return MCA_IS_LOCAL;
+	
+	/*
+	 * Bus_Check structure with Bus_Check.ib (internal bus error) flag set
+	 * would be a global MCA. (e.g. a system bus address parity error)
+	 */
+	if (!pbci || pbci->ib)
+		return MCA_IS_GLOBAL;
+
+	/*
+	 * Bus_Check structure with Bus_Check.eb (external bus error) flag set
+	 * could be either a local MCA or a global MCA.
+	 *
+	 * Referring Bus_Check.bsi:
+	 *   0: Unknown/unclassified
+	 *   1: BERR#
+	 *   2: BINIT#
+	 *   3: Hard Fail
+	 * (FIXME: Are these SGI specific or generic bsi values?)
+	 */
+	if (pbci->eb)
+		switch (pbci->bsi) {
+			case 0:
+				/* e.g. a load from poisoned memory */
+				return MCA_IS_LOCAL;
+			case 1:
+			case 2:
+			case 3:
+				return MCA_IS_GLOBAL;
+		}
+
+	return MCA_IS_GLOBAL;
+}
+
+/**
+ * get_target_identifier - Get the valid Cache or Bus check target identifier.
+ * @peidx:	pointer of index of processor error section
+ *
+ * Return value:
+ *	target address on Success / 0 on Failure
+ */
+static u64
+get_target_identifier(peidx_table_t *peidx)
+{
+	u64 target_address = 0;
+	sal_log_mod_error_info_t *smei;
+	pal_cache_check_info_t *pcci;
+	int i, level = 9;
+
+	/*
+	 * Look through the cache checks for a valid target identifier
+	 * If more than one valid target identifier, return the one
+	 * with the lowest cache level.
+	 */
+	for (i = 0; i < peidx_cache_check_num(peidx); i++) {
+		smei = (sal_log_mod_error_info_t *)peidx_cache_check(peidx, i);
+		if (smei->valid.target_identifier && smei->target_identifier) {
+			pcci = (pal_cache_check_info_t *)&(smei->check_info);
+			if (!target_address || (pcci->level < level)) {
+				target_address = smei->target_identifier;
+				level = pcci->level;
+				continue;
+			}
+		}
+	}
+	if (target_address)
+		return target_address;
+
+	/*
+	 * Look at the bus check for a valid target identifier
+	 */
+	smei = peidx_bus_check(peidx, 0);
+	if (smei && smei->valid.target_identifier)
+		return smei->target_identifier;
+
+	return 0;
+}
+
+/**
+ * recover_from_read_error - Try to recover the errors which type are "read"s.
+ * @slidx:	pointer of index of SAL error record
+ * @peidx:	pointer of index of processor error section
+ * @pbci:	pointer of pal_bus_check_info
+ * @sos:	pointer to hand off struct between SAL and OS
+ *
+ * Return value:
+ *	1 on Success / 0 on Failure
+ */
+
+static int
+recover_from_read_error(slidx_table_t *slidx,
+			peidx_table_t *peidx, pal_bus_check_info_t *pbci,
+			struct ia64_sal_os_state *sos)
+{
+	u64 target_identifier;
+	pal_min_state_area_t *pmsa;
+	struct ia64_psr *psr1, *psr2;
+	ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook;
+
+	/* Is target address valid? */
+	target_identifier = get_target_identifier(peidx);
+	if (!target_identifier)
+		return fatal_mca("target address not valid");
+
+	/*
+	 * cpu read or memory-mapped io read
+	 *
+	 *    offending process  affected process  OS MCA do
+	 *     kernel mode        kernel mode       down system
+	 *     kernel mode        user   mode       kill the process
+	 *     user   mode        kernel mode       down system (*)
+	 *     user   mode        user   mode       kill the process
+	 *
+	 * (*) You could terminate offending user-mode process
+	 *    if (pbci->pv && pbci->pl != 0) *and* if you sure
+	 *    the process not have any locks of kernel.
+	 */
+
+	/* Is minstate valid? */
+	if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate))
+		return fatal_mca("minstate not valid");
+	psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr);
+	psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr);
+
+	/*
+	 *  Check the privilege level of interrupted context.
+	 *   If it is user-mode, then terminate affected process.
+	 */
+
+	pmsa = sos->pal_min_state;
+	if (psr1->cpl != 0 ||
+	   ((psr2->cpl != 0) && mca_recover_range(pmsa->pmsa_iip))) {
+		/*
+		 *  setup for resume to bottom half of MCA,
+		 * "mca_handler_bhhook"
+		 */
+		/* pass to bhhook as argument (gr8, ...) */
+		pmsa->pmsa_gr[8-1] = target_identifier;
+		pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip;
+		pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr;
+		/* set interrupted return address (but no use) */
+		pmsa->pmsa_br0 = pmsa->pmsa_iip;
+		/* change resume address to bottom half */
+		pmsa->pmsa_iip = mca_hdlr_bh->fp;
+		pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp;
+		/* set cpl with kernel mode */
+		psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr;
+		psr2->cpl = 0;
+		psr2->ri  = 0;
+		psr2->bn  = 1;
+		psr2->i  = 0;
+
+		return mca_recovered("user memory corruption. "
+				"kill affected process - recovered.");
+	}
+
+	return fatal_mca("kernel context not recovered, iip 0x%lx\n",
+			 pmsa->pmsa_iip);
+}
+
+/**
+ * recover_from_platform_error - Recover from platform error.
+ * @slidx:	pointer of index of SAL error record
+ * @peidx:	pointer of index of processor error section
+ * @pbci:	pointer of pal_bus_check_info
+ * @sos:	pointer to hand off struct between SAL and OS
+ *
+ * Return value:
+ *	1 on Success / 0 on Failure
+ */
+
+static int
+recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx,
+			    pal_bus_check_info_t *pbci,
+			    struct ia64_sal_os_state *sos)
+{
+	int status = 0;
+	pal_processor_state_info_t *psp =
+		(pal_processor_state_info_t*)peidx_psp(peidx);
+
+	if (psp->bc && pbci->eb && pbci->bsi == 0) {
+		switch(pbci->type) {
+		case 1: /* partial read */
+		case 3: /* full line(cpu) read */
+		case 9: /* I/O space read */
+			status = recover_from_read_error(slidx, peidx, pbci,
+							 sos);
+			break;
+		case 0: /* unknown */
+		case 2: /* partial write */
+		case 4: /* full line write */
+		case 5: /* implicit or explicit write-back operation */
+		case 6: /* snoop probe */
+		case 7: /* incoming or outgoing ptc.g */
+		case 8: /* write coalescing transactions */
+		case 10: /* I/O space write */
+		case 11: /* inter-processor interrupt message(IPI) */
+		case 12: /* interrupt acknowledge or
+				external task priority cycle */
+		default:
+			break;
+		}
+	} else if (psp->cc && !psp->bc) {	/* Cache error */
+		status = recover_from_read_error(slidx, peidx, pbci, sos);
+	}
+
+	return status;
+}
+
+/*
+ * recover_from_tlb_check
+ * @peidx:	pointer of index of processor error section
+ *
+ * Return value:
+ *	1 on Success / 0 on Failure
+ */
+static int
+recover_from_tlb_check(peidx_table_t *peidx)
+{
+	sal_log_mod_error_info_t *smei;
+	pal_tlb_check_info_t *ptci;
+
+	smei = (sal_log_mod_error_info_t *)peidx_tlb_check(peidx, 0);
+	ptci = (pal_tlb_check_info_t *)&(smei->check_info);
+
+	/*
+	 * Look for signature of a duplicate TLB DTC entry, which is
+	 * a SW bug and always fatal.
+	 */
+	if (ptci->op == PAL_TLB_CHECK_OP_PURGE
+	    && !(ptci->itr || ptci->dtc || ptci->itc))
+		return fatal_mca("Duplicate TLB entry");
+
+	return mca_recovered("TLB check recovered");
+}
+
+/**
+ * recover_from_processor_error
+ * @platform:	whether there are some platform error section or not
+ * @slidx:	pointer of index of SAL error record
+ * @peidx:	pointer of index of processor error section
+ * @pbci:	pointer of pal_bus_check_info
+ * @sos:	pointer to hand off struct between SAL and OS
+ *
+ * Return value:
+ *	1 on Success / 0 on Failure
+ */
+
+static int
+recover_from_processor_error(int platform, slidx_table_t *slidx,
+			     peidx_table_t *peidx, pal_bus_check_info_t *pbci,
+			     struct ia64_sal_os_state *sos)
+{
+	pal_processor_state_info_t *psp =
+		(pal_processor_state_info_t*)peidx_psp(peidx);
+
+	/*
+	 * Processor recovery status must key off of the PAL recovery
+	 * status in the Processor State Parameter.
+	 */
+
+	/*
+	 * The machine check is corrected.
+	 */
+	if (psp->cm == 1)
+		return mca_recovered("machine check is already corrected.");
+
+	/*
+	 * The error was not contained.  Software must be reset.
+	 */
+	if (psp->us || psp->ci == 0)
+		return fatal_mca("error not contained");
+
+	/*
+	 * Look for recoverable TLB check
+	 */
+	if (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc))
+		return recover_from_tlb_check(peidx);
+
+	/*
+	 * The cache check and bus check bits have four possible states
+	 *   cc bc
+	 *    1  1	Memory error, attempt recovery
+	 *    1  0	Cache error, attempt recovery
+	 *    0  1	I/O error, attempt recovery
+	 *    0  0	Other error type, not recovered
+	 */
+	if (psp->cc == 0 && (psp->bc == 0 || pbci == NULL))
+		return fatal_mca("No cache or bus check");
+
+	/*
+	 * Cannot handle more than one bus check.
+	 */
+	if (peidx_bus_check_num(peidx) > 1)
+		return fatal_mca("Too many bus checks");
+
+	if (pbci->ib)
+		return fatal_mca("Internal Bus error");
+	if (pbci->eb && pbci->bsi > 0)
+		return fatal_mca("External bus check fatal status");
+
+	/*
+	 * This is a local MCA and estimated as a recoverable error.
+	 */
+	if (platform)
+		return recover_from_platform_error(slidx, peidx, pbci, sos);
+
+	/*
+	 * On account of strange SAL error record, we cannot recover.
+	 */
+	return fatal_mca("Strange SAL record");
+}
+
+/**
+ * mca_try_to_recover - Try to recover from MCA
+ * @rec:	pointer to a SAL error record
+ * @sos:	pointer to hand off struct between SAL and OS
+ *
+ * Return value:
+ *	1 on Success / 0 on Failure
+ */
+
+static int
+mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos)
+{
+	int platform_err;
+	int n_proc_err;
+	slidx_table_t slidx;
+	peidx_table_t peidx;
+	pal_bus_check_info_t pbci;
+
+	/* Make index of SAL error record */
+	platform_err = mca_make_slidx(rec, &slidx);
+
+	/* Count processor error sections */
+	n_proc_err = slidx_count(&slidx, proc_err);
+
+	 /* Now, OS can recover when there is one processor error section */
+	if (n_proc_err > 1)
+		return fatal_mca("Too Many Errors");
+	else if (n_proc_err == 0)
+		/* Weird SAL record ... We can't do anything */
+		return fatal_mca("Weird SAL record");
+
+	/* Make index of processor error section */
+	mca_make_peidx((sal_log_processor_info_t*)
+		slidx_first_entry(&slidx.proc_err)->hdr, &peidx);
+
+	/* Extract Processor BUS_CHECK[0] */
+	*((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0);
+
+	/* Check whether MCA is global or not */
+	if (is_mca_global(&peidx, &pbci, sos))
+		return fatal_mca("global MCA");
+	
+	/* Try to recover a processor error */
+	return recover_from_processor_error(platform_err, &slidx, &peidx,
+					    &pbci, sos);
+}
+
+/*
+ * =============================================================================
+ */
+
+int __init mca_external_handler_init(void)
+{
+	if (init_record_index_pools())
+		return -ENOMEM;
+
+	/* register external mca handlers */
+	if (ia64_reg_MCA_extension(mca_try_to_recover)) {	
+		printk(KERN_ERR "ia64_reg_MCA_extension failed.\n");
+		kfree(slidx_pool.buffer);
+		return -EFAULT;
+	}
+	return 0;
+}
+
+void __exit mca_external_handler_exit(void)
+{
+	/* unregister external mca handlers */
+	ia64_unreg_MCA_extension();
+	kfree(slidx_pool.buffer);
+}
+
+module_init(mca_external_handler_init);
+module_exit(mca_external_handler_exit);
+
+module_param(sal_rec_max, int, 0644);
+MODULE_PARM_DESC(sal_rec_max, "Max size of SAL error record");
+
+MODULE_DESCRIPTION("ia64 platform dependent mca handler driver");
+MODULE_LICENSE("GPL");
diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h
new file mode 100644
index 00000000..53b8ecb5
--- /dev/null
+++ b/arch/ia64/kernel/mca_drv.h
@@ -0,0 +1,122 @@
+/*
+ * File:	mca_drv.h
+ * Purpose:	Define helpers for Generic MCA handling
+ *
+ * Copyright (C) 2004 FUJITSU LIMITED
+ * Copyright (C) 2004 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+ */
+/*
+ * Processor error section:
+ *
+ *  +-sal_log_processor_info_t *info-------------+
+ *  | sal_log_section_hdr_t header;              |
+ *  | ...                                        |
+ *  | sal_log_mod_error_info_t info[0];          |
+ *  +-+----------------+-------------------------+
+ *    | CACHE_CHECK    |  ^ num_cache_check v
+ *    +----------------+
+ *    | TLB_CHECK      |  ^ num_tlb_check v
+ *    +----------------+
+ *    | BUS_CHECK      |  ^ num_bus_check v
+ *    +----------------+
+ *    | REG_FILE_CHECK |  ^ num_reg_file_check v
+ *    +----------------+
+ *    | MS_CHECK       |  ^ num_ms_check v
+ *  +-struct cpuid_info *id----------------------+
+ *  | regs[5];                                   |
+ *  | reserved;                                  |
+ *  +-sal_processor_static_info_t *regs----------+
+ *  | valid;                                     |
+ *  | ...                                        |
+ *  | fr[128];                                   |
+ *  +--------------------------------------------+
+ */
+
+/* peidx: index of processor error section */
+typedef struct peidx_table {
+	sal_log_processor_info_t        *info;
+	struct sal_cpuid_info           *id;
+	sal_processor_static_info_t     *regs;
+} peidx_table_t;
+
+#define peidx_head(p)   (((p)->info))
+#define peidx_mid(p)    (((p)->id))
+#define peidx_bottom(p) (((p)->regs))
+
+#define peidx_psp(p)           (&(peidx_head(p)->proc_state_parameter))
+#define peidx_field_valid(p)   (&(peidx_head(p)->valid))
+#define peidx_minstate_area(p) (&(peidx_bottom(p)->min_state_area))
+
+#define peidx_cache_check_num(p)    (peidx_head(p)->valid.num_cache_check)
+#define peidx_tlb_check_num(p)      (peidx_head(p)->valid.num_tlb_check)
+#define peidx_bus_check_num(p)      (peidx_head(p)->valid.num_bus_check)
+#define peidx_reg_file_check_num(p) (peidx_head(p)->valid.num_reg_file_check)
+#define peidx_ms_check_num(p)       (peidx_head(p)->valid.num_ms_check)
+
+#define peidx_cache_check_idx(p, n)    (n)
+#define peidx_tlb_check_idx(p, n)      (peidx_cache_check_idx(p, peidx_cache_check_num(p)) + n)
+#define peidx_bus_check_idx(p, n)      (peidx_tlb_check_idx(p, peidx_tlb_check_num(p)) + n)
+#define peidx_reg_file_check_idx(p, n) (peidx_bus_check_idx(p, peidx_bus_check_num(p)) + n)
+#define peidx_ms_check_idx(p, n)       (peidx_reg_file_check_idx(p, peidx_reg_file_check_num(p)) + n)
+
+#define peidx_mod_error_info(p, name, n) \
+({	int __idx = peidx_##name##_idx(p, n); \
+	sal_log_mod_error_info_t *__ret = NULL; \
+	if (peidx_##name##_num(p) > n) /*BUG*/ \
+		__ret = &(peidx_head(p)->info[__idx]); \
+	__ret; })
+
+#define peidx_cache_check(p, n)    peidx_mod_error_info(p, cache_check, n)
+#define peidx_tlb_check(p, n)      peidx_mod_error_info(p, tlb_check, n)
+#define peidx_bus_check(p, n)      peidx_mod_error_info(p, bus_check, n)
+#define peidx_reg_file_check(p, n) peidx_mod_error_info(p, reg_file_check, n)
+#define peidx_ms_check(p, n)       peidx_mod_error_info(p, ms_check, n)
+
+#define peidx_check_info(proc, name, n) \
+({ \
+	sal_log_mod_error_info_t *__info = peidx_mod_error_info(proc, name, n);\
+	u64 __temp = __info && __info->valid.check_info \
+		? __info->check_info : 0; \
+	__temp; })
+
+/* slidx: index of SAL log error record */
+
+typedef struct slidx_list {
+	struct list_head list;
+	sal_log_section_hdr_t *hdr;
+} slidx_list_t;
+
+typedef struct slidx_table {
+	sal_log_record_header_t *header;
+	int n_sections;			/* # of section headers */
+	struct list_head proc_err;
+	struct list_head mem_dev_err;
+	struct list_head sel_dev_err;
+	struct list_head pci_bus_err;
+	struct list_head smbios_dev_err;
+	struct list_head pci_comp_err;
+	struct list_head plat_specific_err;
+	struct list_head host_ctlr_err;
+	struct list_head plat_bus_err;
+	struct list_head unsupported;	/* list of unsupported sections */
+} slidx_table_t;
+
+#define slidx_foreach_entry(pos, head) \
+	list_for_each_entry(pos, head, list)
+#define slidx_first_entry(head) \
+	(((head)->next != (head)) ? list_entry((head)->next, typeof(slidx_list_t), list) : NULL)
+#define slidx_count(slidx, sec) \
+({	int __count = 0; \
+	slidx_list_t *__pos; \
+	slidx_foreach_entry(__pos, &((slidx)->sec)) { __count++; }\
+	__count; })
+
+struct mca_table_entry {
+	int start_addr;	/* location-relative starting address of MCA recoverable range */
+	int end_addr;	/* location-relative ending address of MCA recoverable range */
+};
+
+extern const struct mca_table_entry *search_mca_tables (unsigned long addr);
+extern int mca_recover_range(unsigned long);
+extern void ia64_mlogbuf_dump(void);
+
diff --git a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S
new file mode 100644
index 00000000..767ac2c2
--- /dev/null
+++ b/arch/ia64/kernel/mca_drv_asm.S
@@ -0,0 +1,55 @@
+/*
+ * File:        mca_drv_asm.S
+ * Purpose:     Assembly portion of Generic MCA handling
+ *
+ * Copyright (C) 2004 FUJITSU LIMITED
+ * Copyright (C) 2004 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+ */
+#include <linux/threads.h>
+
+#include <asm/asmmacro.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+
+GLOBAL_ENTRY(mca_handler_bhhook)
+	invala				// clear RSE ?
+	cover
+	;;
+	clrrrb
+	;;						
+	alloc	r16=ar.pfs,0,2,3,0	// make a new frame
+	mov	ar.rsc=0
+	mov	r13=IA64_KR(CURRENT)	// current task pointer
+	;;
+	mov	r2=r13
+	;;
+	addl	r22=IA64_RBS_OFFSET,r2
+	;;
+	mov	ar.bspstore=r22
+	addl	sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2
+	;;
+	adds	r2=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
+	;;
+	st1	[r2]=r0		// clear current->thread.on_ustack flag
+	mov	loc0=r16
+	movl	loc1=mca_handler_bh	// recovery C function
+	;;
+	mov	out0=r8			// poisoned address
+	mov	out1=r9			// iip
+	mov	out2=r10		// psr
+	mov	b6=loc1
+	;;
+	mov	loc1=rp
+	ssm	psr.ic
+	;;
+	srlz.i
+	;;
+	ssm	psr.i
+	br.call.sptk.many rp=b6		// does not return ...
+	;;
+	mov	ar.pfs=loc0
+	mov 	rp=loc1
+	;;
+	mov	r8=r0
+	br.ret.sptk.many rp
+END(mca_handler_bhhook)
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
new file mode 100644
index 00000000..d56753a1
--- /dev/null
+++ b/arch/ia64/kernel/minstate.h
@@ -0,0 +1,250 @@
+
+#include <asm/cache.h>
+
+#include "entry.h"
+#include "paravirt_inst.h"
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/* read ar.itc in advance, and use it before leaving bank 0 */
+#define ACCOUNT_GET_STAMP				\
+(pUStk) mov.m r20=ar.itc;
+#define ACCOUNT_SYS_ENTER				\
+(pUStk) br.call.spnt rp=account_sys_enter		\
+	;;
+#else
+#define ACCOUNT_GET_STAMP
+#define ACCOUNT_SYS_ENTER
+#endif
+
+.section ".data..patch.rse", "a"
+.previous
+
+/*
+ * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
+ * the minimum state necessary that allows us to turn psr.ic back
+ * on.
+ *
+ * Assumed state upon entry:
+ *	psr.ic: off
+ *	r31:	contains saved predicates (pr)
+ *
+ * Upon exit, the state is as follows:
+ *	psr.ic: off
+ *	 r2 = points to &pt_regs.r16
+ *	 r8 = contents of ar.ccv
+ *	 r9 = contents of ar.csd
+ *	r10 = contents of ar.ssd
+ *	r11 = FPSR_DEFAULT
+ *	r12 = kernel sp (kernel virtual address)
+ *	r13 = points to current task_struct (kernel virtual address)
+ *	p15 = TRUE if psr.i is set in cr.ipsr
+ *	predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
+ *		preserved
+ *
+ * Note that psr.ic is NOT turned on by this macro.  This is so that
+ * we can pass interruption state as arguments to a handler.
+ */
+#define IA64_NATIVE_DO_SAVE_MIN(__COVER,SAVE_IFS,EXTRA,WORKAROUND)				\
+	mov r16=IA64_KR(CURRENT);	/* M */							\
+	mov r27=ar.rsc;			/* M */							\
+	mov r20=r1;			/* A */							\
+	mov r25=ar.unat;		/* M */							\
+	MOV_FROM_IPSR(p0,r29);		/* M */							\
+	mov r26=ar.pfs;			/* I */							\
+	MOV_FROM_IIP(r28);			/* M */						\
+	mov r21=ar.fpsr;		/* M */							\
+	__COVER;				/* B;; (or nothing) */				\
+	;;											\
+	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16;						\
+	;;											\
+	ld1 r17=[r16];				/* load current->thread.on_ustack flag */	\
+	st1 [r16]=r0;				/* clear current->thread.on_ustack flag */	\
+	adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16						\
+	/* switch from user to kernel RBS: */							\
+	;;											\
+	invala;				/* M */							\
+	SAVE_IFS;										\
+	cmp.eq pKStk,pUStk=r0,r17;		/* are we in kernel mode already? */		\
+	;;											\
+(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
+	;;											\
+(pUStk)	mov.m r24=ar.rnat;									\
+(pUStk)	addl r22=IA64_RBS_OFFSET,r1;			/* compute base of RBS */		\
+(pKStk) mov r1=sp;					/* get sp  */				\
+	;;											\
+(pUStk) lfetch.fault.excl.nt1 [r22];								\
+(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
+(pUStk)	mov r23=ar.bspstore;				/* save ar.bspstore */			\
+	;;											\
+(pUStk)	mov ar.bspstore=r22;				/* switch to kernel RBS */		\
+(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1;			/* if in kernel mode, use sp (r12) */	\
+	;;											\
+(pUStk)	mov r18=ar.bsp;										\
+(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
+	adds r17=2*L1_CACHE_BYTES,r1;		/* really: biggest cache-line size */		\
+	adds r16=PT(CR_IPSR),r1;								\
+	;;											\
+	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES;						\
+	st8 [r16]=r29;		/* save cr.ipsr */						\
+	;;											\
+	lfetch.fault.excl.nt1 [r17];								\
+	tbit.nz p15,p0=r29,IA64_PSR_I_BIT;							\
+	mov r29=b0										\
+	;;											\
+	WORKAROUND;										\
+	adds r16=PT(R8),r1;	/* initialize first base pointer */				\
+	adds r17=PT(R9),r1;	/* initialize second base pointer */				\
+(pKStk)	mov r18=r0;		/* make sure r18 isn't NaT */					\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r8,16;								\
+.mem.offset 8,0; st8.spill [r17]=r9,16;								\
+        ;;											\
+.mem.offset 0,0; st8.spill [r16]=r10,24;							\
+.mem.offset 8,0; st8.spill [r17]=r11,24;							\
+        ;;											\
+	st8 [r16]=r28,16;	/* save cr.iip */						\
+	st8 [r17]=r30,16;	/* save cr.ifs */						\
+(pUStk)	sub r18=r18,r22;	/* r18=RSE.ndirty*8 */						\
+	mov r8=ar.ccv;										\
+	mov r9=ar.csd;										\
+	mov r10=ar.ssd;										\
+	movl r11=FPSR_DEFAULT;   /* L-unit */							\
+	;;											\
+	st8 [r16]=r25,16;	/* save ar.unat */						\
+	st8 [r17]=r26,16;	/* save ar.pfs */						\
+	shl r18=r18,16;		/* compute ar.rsc to be used for "loadrs" */			\
+	;;											\
+	st8 [r16]=r27,16;	/* save ar.rsc */						\
+(pUStk)	st8 [r17]=r24,16;	/* save ar.rnat */						\
+(pKStk)	adds r17=16,r17;	/* skip over ar_rnat field */					\
+	;;			/* avoid RAW on r16 & r17 */					\
+(pUStk)	st8 [r16]=r23,16;	/* save ar.bspstore */						\
+	st8 [r17]=r31,16;	/* save predicates */						\
+(pKStk)	adds r16=16,r16;	/* skip over ar_bspstore field */				\
+	;;											\
+	st8 [r16]=r29,16;	/* save b0 */							\
+	st8 [r17]=r18,16;	/* save ar.rsc value for "loadrs" */				\
+	cmp.eq pNonSys,pSys=r0,r0	/* initialize pSys=0, pNonSys=1 */			\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r20,16;	/* save original r1 */				\
+.mem.offset 8,0; st8.spill [r17]=r12,16;							\
+	adds r12=-16,r1;	/* switch to kernel memory stack (with 16 bytes of scratch) */	\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r13,16;							\
+.mem.offset 8,0; st8.spill [r17]=r21,16;	/* save ar.fpsr */				\
+	mov r13=IA64_KR(CURRENT);	/* establish `current' */				\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r15,16;							\
+.mem.offset 8,0; st8.spill [r17]=r14,16;							\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r2,16;								\
+.mem.offset 8,0; st8.spill [r17]=r3,16;								\
+	ACCOUNT_GET_STAMP									\
+	adds r2=IA64_PT_REGS_R16_OFFSET,r1;							\
+	;;											\
+	EXTRA;											\
+	movl r1=__gp;		/* establish kernel global pointer */				\
+	;;											\
+	ACCOUNT_SYS_ENTER									\
+	bsw.1;			/* switch back to bank 1 (must be last in insn group) */	\
+	;;
+
+/*
+ * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
+ *
+ * Assumed state upon entry:
+ *	psr.ic: on
+ *	r2:	points to &pt_regs.r16
+ *	r3:	points to &pt_regs.r17
+ *	r8:	contents of ar.ccv
+ *	r9:	contents of ar.csd
+ *	r10:	contents of ar.ssd
+ *	r11:	FPSR_DEFAULT
+ *
+ * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
+ */
+#define SAVE_REST				\
+.mem.offset 0,0; st8.spill [r2]=r16,16;		\
+.mem.offset 8,0; st8.spill [r3]=r17,16;		\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r18,16;		\
+.mem.offset 8,0; st8.spill [r3]=r19,16;		\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r20,16;		\
+.mem.offset 8,0; st8.spill [r3]=r21,16;		\
+	mov r18=b6;				\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r22,16;		\
+.mem.offset 8,0; st8.spill [r3]=r23,16;		\
+	mov r19=b7;				\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r24,16;		\
+.mem.offset 8,0; st8.spill [r3]=r25,16;		\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r26,16;		\
+.mem.offset 8,0; st8.spill [r3]=r27,16;		\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r28,16;		\
+.mem.offset 8,0; st8.spill [r3]=r29,16;		\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r30,16;		\
+.mem.offset 8,0; st8.spill [r3]=r31,32;		\
+	;;					\
+	mov ar.fpsr=r11;	/* M-unit */	\
+	st8 [r2]=r8,8;		/* ar.ccv */	\
+	adds r24=PT(B6)-PT(F7),r3;		\
+	;;					\
+	stf.spill [r2]=f6,32;			\
+	stf.spill [r3]=f7,32;			\
+	;;					\
+	stf.spill [r2]=f8,32;			\
+	stf.spill [r3]=f9,32;			\
+	;;					\
+	stf.spill [r2]=f10;			\
+	stf.spill [r3]=f11;			\
+	adds r25=PT(B7)-PT(F11),r3;		\
+	;;					\
+	st8 [r24]=r18,16;       /* b6 */	\
+	st8 [r25]=r19,16;       /* b7 */	\
+	;;					\
+	st8 [r24]=r9;        	/* ar.csd */	\
+	st8 [r25]=r10;      	/* ar.ssd */	\
+	;;
+
+#define RSE_WORKAROUND				\
+(pUStk) extr.u r17=r18,3,6;			\
+(pUStk)	sub r16=r18,r22;			\
+[1:](pKStk)	br.cond.sptk.many 1f;		\
+	.xdata4 ".data..patch.rse",1b-.		\
+	;;					\
+	cmp.ge p6,p7 = 33,r17;			\
+	;;					\
+(p6)	mov r17=0x310;				\
+(p7)	mov r17=0x308;				\
+	;;					\
+	cmp.leu p1,p0=r16,r17;			\
+(p1)	br.cond.sptk.many 1f;			\
+	dep.z r17=r26,0,62;			\
+	movl r16=2f;				\
+	;;					\
+	mov ar.pfs=r17;				\
+	dep r27=r0,r27,16,14;			\
+	mov b0=r16;				\
+	;;					\
+	br.ret.sptk b0;				\
+	;;					\
+2:						\
+	mov ar.rsc=r0				\
+	;;					\
+	flushrs;				\
+	;;					\
+	mov ar.bspstore=r22			\
+	;;					\
+	mov r18=ar.bsp;				\
+	;;					\
+1:						\
+	.pred.rel "mutex", pKStk, pUStk
+
+#define SAVE_MIN_WITH_COVER	DO_SAVE_MIN(COVER, mov r30=cr.ifs, , RSE_WORKAROUND)
+#define SAVE_MIN_WITH_COVER_R19	DO_SAVE_MIN(COVER, mov r30=cr.ifs, mov r15=r19, RSE_WORKAROUND)
+#define SAVE_MIN			DO_SAVE_MIN(     , mov r30=r0, , )
diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c
new file mode 100644
index 00000000..1481b0a2
--- /dev/null
+++ b/arch/ia64/kernel/module.c
@@ -0,0 +1,969 @@
+/*
+ * IA-64-specific support for kernel module loader.
+ *
+ * Copyright (C) 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Loosely based on patch by Rusty Russell.
+ */
+
+/* relocs tested so far:
+
+   DIR64LSB
+   FPTR64LSB
+   GPREL22
+   LDXMOV
+   LDXMOV
+   LTOFF22
+   LTOFF22X
+   LTOFF22X
+   LTOFF_FPTR22
+   PCREL21B	(for br.call only; br.cond is not supported out of modules!)
+   PCREL60B	(for brl.cond only; brl.call is not supported for modules!)
+   PCREL64LSB
+   SECREL32LSB
+   SEGREL64LSB
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/elf.h>
+#include <linux/moduleloader.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+
+#include <asm/patch.h>
+#include <asm/unaligned.h>
+
+#define ARCH_MODULE_DEBUG 0
+
+#if ARCH_MODULE_DEBUG
+# define DEBUGP printk
+# define inline
+#else
+# define DEBUGP(fmt , a...)
+#endif
+
+#ifdef CONFIG_ITANIUM
+# define USE_BRL	0
+#else
+# define USE_BRL	1
+#endif
+
+#define MAX_LTOFF	((uint64_t) (1 << 22))	/* max. allowable linkage-table offset */
+
+/* Define some relocation helper macros/types: */
+
+#define FORMAT_SHIFT	0
+#define FORMAT_BITS	3
+#define FORMAT_MASK	((1 << FORMAT_BITS) - 1)
+#define VALUE_SHIFT	3
+#define VALUE_BITS	5
+#define VALUE_MASK	((1 << VALUE_BITS) - 1)
+
+enum reloc_target_format {
+	/* direct encoded formats: */
+	RF_NONE = 0,
+	RF_INSN14 = 1,
+	RF_INSN22 = 2,
+	RF_INSN64 = 3,
+	RF_32MSB = 4,
+	RF_32LSB = 5,
+	RF_64MSB = 6,
+	RF_64LSB = 7,
+
+	/* formats that cannot be directly decoded: */
+	RF_INSN60,
+	RF_INSN21B,	/* imm21 form 1 */
+	RF_INSN21M,	/* imm21 form 2 */
+	RF_INSN21F	/* imm21 form 3 */
+};
+
+enum reloc_value_formula {
+	RV_DIRECT = 4,		/* S + A */
+	RV_GPREL = 5,		/* @gprel(S + A) */
+	RV_LTREL = 6,		/* @ltoff(S + A) */
+	RV_PLTREL = 7,		/* @pltoff(S + A) */
+	RV_FPTR = 8,		/* @fptr(S + A) */
+	RV_PCREL = 9,		/* S + A - P */
+	RV_LTREL_FPTR = 10,	/* @ltoff(@fptr(S + A)) */
+	RV_SEGREL = 11,		/* @segrel(S + A) */
+	RV_SECREL = 12,		/* @secrel(S + A) */
+	RV_BDREL = 13,		/* BD + A */
+	RV_LTV = 14,		/* S + A (like RV_DIRECT, except frozen at static link-time) */
+	RV_PCREL2 = 15,		/* S + A - P */
+	RV_SPECIAL = 16,	/* various (see below) */
+	RV_RSVD17 = 17,
+	RV_TPREL = 18,		/* @tprel(S + A) */
+	RV_LTREL_TPREL = 19,	/* @ltoff(@tprel(S + A)) */
+	RV_DTPMOD = 20,		/* @dtpmod(S + A) */
+	RV_LTREL_DTPMOD = 21,	/* @ltoff(@dtpmod(S + A)) */
+	RV_DTPREL = 22,		/* @dtprel(S + A) */
+	RV_LTREL_DTPREL = 23,	/* @ltoff(@dtprel(S + A)) */
+	RV_RSVD24 = 24,
+	RV_RSVD25 = 25,
+	RV_RSVD26 = 26,
+	RV_RSVD27 = 27
+	/* 28-31 reserved for implementation-specific purposes.  */
+};
+
+#define N(reloc)	[R_IA64_##reloc] = #reloc
+
+static const char *reloc_name[256] = {
+	N(NONE),		N(IMM14),		N(IMM22),		N(IMM64),
+	N(DIR32MSB),		N(DIR32LSB),		N(DIR64MSB),		N(DIR64LSB),
+	N(GPREL22),		N(GPREL64I),		N(GPREL32MSB),		N(GPREL32LSB),
+	N(GPREL64MSB),		N(GPREL64LSB),		N(LTOFF22),		N(LTOFF64I),
+	N(PLTOFF22),		N(PLTOFF64I),		N(PLTOFF64MSB),		N(PLTOFF64LSB),
+	N(FPTR64I),		N(FPTR32MSB),		N(FPTR32LSB),		N(FPTR64MSB),
+	N(FPTR64LSB),		N(PCREL60B),		N(PCREL21B),		N(PCREL21M),
+	N(PCREL21F),		N(PCREL32MSB),		N(PCREL32LSB),		N(PCREL64MSB),
+	N(PCREL64LSB),		N(LTOFF_FPTR22),	N(LTOFF_FPTR64I),	N(LTOFF_FPTR32MSB),
+	N(LTOFF_FPTR32LSB),	N(LTOFF_FPTR64MSB),	N(LTOFF_FPTR64LSB),	N(SEGREL32MSB),
+	N(SEGREL32LSB),		N(SEGREL64MSB),		N(SEGREL64LSB),		N(SECREL32MSB),
+	N(SECREL32LSB),		N(SECREL64MSB),		N(SECREL64LSB),		N(REL32MSB),
+	N(REL32LSB),		N(REL64MSB),		N(REL64LSB),		N(LTV32MSB),
+	N(LTV32LSB),		N(LTV64MSB),		N(LTV64LSB),		N(PCREL21BI),
+	N(PCREL22),		N(PCREL64I),		N(IPLTMSB),		N(IPLTLSB),
+	N(COPY),		N(LTOFF22X),		N(LDXMOV),		N(TPREL14),
+	N(TPREL22),		N(TPREL64I),		N(TPREL64MSB),		N(TPREL64LSB),
+	N(LTOFF_TPREL22),	N(DTPMOD64MSB),		N(DTPMOD64LSB),		N(LTOFF_DTPMOD22),
+	N(DTPREL14),		N(DTPREL22),		N(DTPREL64I),		N(DTPREL32MSB),
+	N(DTPREL32LSB),		N(DTPREL64MSB),		N(DTPREL64LSB),		N(LTOFF_DTPREL22)
+};
+
+#undef N
+
+/* Opaque struct for insns, to protect against derefs. */
+struct insn;
+
+static inline uint64_t
+bundle (const struct insn *insn)
+{
+	return (uint64_t) insn & ~0xfUL;
+}
+
+static inline int
+slot (const struct insn *insn)
+{
+	return (uint64_t) insn & 0x3;
+}
+
+static int
+apply_imm64 (struct module *mod, struct insn *insn, uint64_t val)
+{
+	if (slot(insn) != 2) {
+		printk(KERN_ERR "%s: invalid slot number %d for IMM64\n",
+		       mod->name, slot(insn));
+		return 0;
+	}
+	ia64_patch_imm64((u64) insn, val);
+	return 1;
+}
+
+static int
+apply_imm60 (struct module *mod, struct insn *insn, uint64_t val)
+{
+	if (slot(insn) != 2) {
+		printk(KERN_ERR "%s: invalid slot number %d for IMM60\n",
+		       mod->name, slot(insn));
+		return 0;
+	}
+	if (val + ((uint64_t) 1 << 59) >= (1UL << 60)) {
+		printk(KERN_ERR "%s: value %ld out of IMM60 range\n",
+			mod->name, (long) val);
+		return 0;
+	}
+	ia64_patch_imm60((u64) insn, val);
+	return 1;
+}
+
+static int
+apply_imm22 (struct module *mod, struct insn *insn, uint64_t val)
+{
+	if (val + (1 << 21) >= (1 << 22)) {
+		printk(KERN_ERR "%s: value %li out of IMM22 range\n",
+			mod->name, (long)val);
+		return 0;
+	}
+	ia64_patch((u64) insn, 0x01fffcfe000UL, (  ((val & 0x200000UL) << 15) /* bit 21 -> 36 */
+					         | ((val & 0x1f0000UL) <<  6) /* bit 16 -> 22 */
+					         | ((val & 0x00ff80UL) << 20) /* bit  7 -> 27 */
+					         | ((val & 0x00007fUL) << 13) /* bit  0 -> 13 */));
+	return 1;
+}
+
+static int
+apply_imm21b (struct module *mod, struct insn *insn, uint64_t val)
+{
+	if (val + (1 << 20) >= (1 << 21)) {
+		printk(KERN_ERR "%s: value %li out of IMM21b range\n",
+			mod->name, (long)val);
+		return 0;
+	}
+	ia64_patch((u64) insn, 0x11ffffe000UL, (  ((val & 0x100000UL) << 16) /* bit 20 -> 36 */
+					        | ((val & 0x0fffffUL) << 13) /* bit  0 -> 13 */));
+	return 1;
+}
+
+#if USE_BRL
+
+struct plt_entry {
+	/* Three instruction bundles in PLT. */
+ 	unsigned char bundle[2][16];
+};
+
+static const struct plt_entry ia64_plt_template = {
+	{
+		{
+			0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /*	     movl gp=TARGET_GP */
+			0x00, 0x00, 0x00, 0x60
+		},
+		{
+			0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*	     brl.many gp=TARGET_GP */
+			0x08, 0x00, 0x00, 0xc0
+		}
+	}
+};
+
+static int
+patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp)
+{
+	if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_gp)
+	    && apply_imm60(mod, (struct insn *) (plt->bundle[1] + 2),
+			   (target_ip - (int64_t) plt->bundle[1]) / 16))
+		return 1;
+	return 0;
+}
+
+unsigned long
+plt_target (struct plt_entry *plt)
+{
+	uint64_t b0, b1, *b = (uint64_t *) plt->bundle[1];
+	long off;
+
+	b0 = b[0]; b1 = b[1];
+	off = (  ((b1 & 0x00fffff000000000UL) >> 36)		/* imm20b -> bit 0 */
+	       | ((b0 >> 48) << 20) | ((b1 & 0x7fffffUL) << 36)	/* imm39 -> bit 20 */
+	       | ((b1 & 0x0800000000000000UL) << 0));		/* i -> bit 59 */
+	return (long) plt->bundle[1] + 16*off;
+}
+
+#else /* !USE_BRL */
+
+struct plt_entry {
+	/* Three instruction bundles in PLT. */
+ 	unsigned char bundle[3][16];
+};
+
+static const struct plt_entry ia64_plt_template = {
+	{
+		{
+			0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*	     movl r16=TARGET_IP */
+			0x02, 0x00, 0x00, 0x60
+		},
+		{
+			0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /*	     movl gp=TARGET_GP */
+			0x00, 0x00, 0x00, 0x60
+		},
+		{
+			0x11, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MIB] nop.m 0 */
+			0x60, 0x80, 0x04, 0x80, 0x03, 0x00, /*	     mov b6=r16 */
+			0x60, 0x00, 0x80, 0x00		    /*	     br.few b6 */
+		}
+	}
+};
+
+static int
+patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp)
+{
+	if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_ip)
+	    && apply_imm64(mod, (struct insn *) (plt->bundle[1] + 2), target_gp))
+		return 1;
+	return 0;
+}
+
+unsigned long
+plt_target (struct plt_entry *plt)
+{
+	uint64_t b0, b1, *b = (uint64_t *) plt->bundle[0];
+
+	b0 = b[0]; b1 = b[1];
+	return (  ((b1 & 0x000007f000000000) >> 36)		/* imm7b -> bit 0 */
+		| ((b1 & 0x07fc000000000000) >> 43)		/* imm9d -> bit 7 */
+		| ((b1 & 0x0003e00000000000) >> 29)		/* imm5c -> bit 16 */
+		| ((b1 & 0x0000100000000000) >> 23)		/* ic -> bit 21 */
+		| ((b0 >> 46) << 22) | ((b1 & 0x7fffff) << 40)	/* imm41 -> bit 22 */
+		| ((b1 & 0x0800000000000000) <<  4));		/* i -> bit 63 */
+}
+
+#endif /* !USE_BRL */
+
+void *
+module_alloc (unsigned long size)
+{
+	if (!size)
+		return NULL;
+	return vmalloc(size);
+}
+
+void
+module_free (struct module *mod, void *module_region)
+{
+	if (mod && mod->arch.init_unw_table &&
+	    module_region == mod->module_init) {
+		unw_remove_unwind_table(mod->arch.init_unw_table);
+		mod->arch.init_unw_table = NULL;
+	}
+	vfree(module_region);
+}
+
+/* Have we already seen one of these relocations? */
+/* FIXME: we could look in other sections, too --RR */
+static int
+duplicate_reloc (const Elf64_Rela *rela, unsigned int num)
+{
+	unsigned int i;
+
+	for (i = 0; i < num; i++) {
+		if (rela[i].r_info == rela[num].r_info && rela[i].r_addend == rela[num].r_addend)
+			return 1;
+	}
+	return 0;
+}
+
+/* Count how many GOT entries we may need */
+static unsigned int
+count_gots (const Elf64_Rela *rela, unsigned int num)
+{
+	unsigned int i, ret = 0;
+
+	/* Sure, this is order(n^2), but it's usually short, and not
+           time critical */
+	for (i = 0; i < num; i++) {
+		switch (ELF64_R_TYPE(rela[i].r_info)) {
+		      case R_IA64_LTOFF22:
+		      case R_IA64_LTOFF22X:
+		      case R_IA64_LTOFF64I:
+		      case R_IA64_LTOFF_FPTR22:
+		      case R_IA64_LTOFF_FPTR64I:
+		      case R_IA64_LTOFF_FPTR32MSB:
+		      case R_IA64_LTOFF_FPTR32LSB:
+		      case R_IA64_LTOFF_FPTR64MSB:
+		      case R_IA64_LTOFF_FPTR64LSB:
+			if (!duplicate_reloc(rela, i))
+				ret++;
+			break;
+		}
+	}
+	return ret;
+}
+
+/* Count how many PLT entries we may need */
+static unsigned int
+count_plts (const Elf64_Rela *rela, unsigned int num)
+{
+	unsigned int i, ret = 0;
+
+	/* Sure, this is order(n^2), but it's usually short, and not
+           time critical */
+	for (i = 0; i < num; i++) {
+		switch (ELF64_R_TYPE(rela[i].r_info)) {
+		      case R_IA64_PCREL21B:
+		      case R_IA64_PLTOFF22:
+		      case R_IA64_PLTOFF64I:
+		      case R_IA64_PLTOFF64MSB:
+		      case R_IA64_PLTOFF64LSB:
+		      case R_IA64_IPLTMSB:
+		      case R_IA64_IPLTLSB:
+			if (!duplicate_reloc(rela, i))
+				ret++;
+			break;
+		}
+	}
+	return ret;
+}
+
+/* We need to create an function-descriptors for any internal function
+   which is referenced. */
+static unsigned int
+count_fdescs (const Elf64_Rela *rela, unsigned int num)
+{
+	unsigned int i, ret = 0;
+
+	/* Sure, this is order(n^2), but it's usually short, and not time critical.  */
+	for (i = 0; i < num; i++) {
+		switch (ELF64_R_TYPE(rela[i].r_info)) {
+		      case R_IA64_FPTR64I:
+		      case R_IA64_FPTR32LSB:
+		      case R_IA64_FPTR32MSB:
+		      case R_IA64_FPTR64LSB:
+		      case R_IA64_FPTR64MSB:
+		      case R_IA64_LTOFF_FPTR22:
+		      case R_IA64_LTOFF_FPTR32LSB:
+		      case R_IA64_LTOFF_FPTR32MSB:
+		      case R_IA64_LTOFF_FPTR64I:
+		      case R_IA64_LTOFF_FPTR64LSB:
+		      case R_IA64_LTOFF_FPTR64MSB:
+		      case R_IA64_IPLTMSB:
+		      case R_IA64_IPLTLSB:
+			/*
+			 * Jumps to static functions sometimes go straight to their
+			 * offset.  Of course, that may not be possible if the jump is
+			 * from init -> core or vice. versa, so we need to generate an
+			 * FDESC (and PLT etc) for that.
+			 */
+		      case R_IA64_PCREL21B:
+			if (!duplicate_reloc(rela, i))
+				ret++;
+			break;
+		}
+	}
+	return ret;
+}
+
+int
+module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings,
+			   struct module *mod)
+{
+	unsigned long core_plts = 0, init_plts = 0, gots = 0, fdescs = 0;
+	Elf64_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum;
+
+	/*
+	 * To store the PLTs and function-descriptors, we expand the .text section for
+	 * core module-code and the .init.text section for initialization code.
+	 */
+	for (s = sechdrs; s < sechdrs_end; ++s)
+		if (strcmp(".core.plt", secstrings + s->sh_name) == 0)
+			mod->arch.core_plt = s;
+		else if (strcmp(".init.plt", secstrings + s->sh_name) == 0)
+			mod->arch.init_plt = s;
+		else if (strcmp(".got", secstrings + s->sh_name) == 0)
+			mod->arch.got = s;
+		else if (strcmp(".opd", secstrings + s->sh_name) == 0)
+			mod->arch.opd = s;
+		else if (strcmp(".IA_64.unwind", secstrings + s->sh_name) == 0)
+			mod->arch.unwind = s;
+#ifdef CONFIG_PARAVIRT
+		else if (strcmp(".paravirt_bundles",
+				secstrings + s->sh_name) == 0)
+			mod->arch.paravirt_bundles = s;
+		else if (strcmp(".paravirt_insts",
+				secstrings + s->sh_name) == 0)
+			mod->arch.paravirt_insts = s;
+#endif
+
+	if (!mod->arch.core_plt || !mod->arch.init_plt || !mod->arch.got || !mod->arch.opd) {
+		printk(KERN_ERR "%s: sections missing\n", mod->name);
+		return -ENOEXEC;
+	}
+
+	/* GOT and PLTs can occur in any relocated section... */
+	for (s = sechdrs + 1; s < sechdrs_end; ++s) {
+		const Elf64_Rela *rels = (void *)ehdr + s->sh_offset;
+		unsigned long numrels = s->sh_size/sizeof(Elf64_Rela);
+
+		if (s->sh_type != SHT_RELA)
+			continue;
+
+		gots += count_gots(rels, numrels);
+		fdescs += count_fdescs(rels, numrels);
+		if (strstr(secstrings + s->sh_name, ".init"))
+			init_plts += count_plts(rels, numrels);
+		else
+			core_plts += count_plts(rels, numrels);
+	}
+
+	mod->arch.core_plt->sh_type = SHT_NOBITS;
+	mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	mod->arch.core_plt->sh_addralign = 16;
+	mod->arch.core_plt->sh_size = core_plts * sizeof(struct plt_entry);
+	mod->arch.init_plt->sh_type = SHT_NOBITS;
+	mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	mod->arch.init_plt->sh_addralign = 16;
+	mod->arch.init_plt->sh_size = init_plts * sizeof(struct plt_entry);
+	mod->arch.got->sh_type = SHT_NOBITS;
+	mod->arch.got->sh_flags = ARCH_SHF_SMALL | SHF_ALLOC;
+	mod->arch.got->sh_addralign = 8;
+	mod->arch.got->sh_size = gots * sizeof(struct got_entry);
+	mod->arch.opd->sh_type = SHT_NOBITS;
+	mod->arch.opd->sh_flags = SHF_ALLOC;
+	mod->arch.opd->sh_addralign = 8;
+	mod->arch.opd->sh_size = fdescs * sizeof(struct fdesc);
+	DEBUGP("%s: core.plt=%lx, init.plt=%lx, got=%lx, fdesc=%lx\n",
+	       __func__, mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size,
+	       mod->arch.got->sh_size, mod->arch.opd->sh_size);
+	return 0;
+}
+
+static inline int
+in_init (const struct module *mod, uint64_t addr)
+{
+	return addr - (uint64_t) mod->module_init < mod->init_size;
+}
+
+static inline int
+in_core (const struct module *mod, uint64_t addr)
+{
+	return addr - (uint64_t) mod->module_core < mod->core_size;
+}
+
+static inline int
+is_internal (const struct module *mod, uint64_t value)
+{
+	return in_init(mod, value) || in_core(mod, value);
+}
+
+/*
+ * Get gp-relative offset for the linkage-table entry of VALUE.
+ */
+static uint64_t
+get_ltoff (struct module *mod, uint64_t value, int *okp)
+{
+	struct got_entry *got, *e;
+
+	if (!*okp)
+		return 0;
+
+	got = (void *) mod->arch.got->sh_addr;
+	for (e = got; e < got + mod->arch.next_got_entry; ++e)
+		if (e->val == value)
+			goto found;
+
+	/* Not enough GOT entries? */
+	BUG_ON(e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size));
+
+	e->val = value;
+	++mod->arch.next_got_entry;
+  found:
+	return (uint64_t) e - mod->arch.gp;
+}
+
+static inline int
+gp_addressable (struct module *mod, uint64_t value)
+{
+	return value - mod->arch.gp + MAX_LTOFF/2 < MAX_LTOFF;
+}
+
+/* Get PC-relative PLT entry for this value.  Returns 0 on failure. */
+static uint64_t
+get_plt (struct module *mod, const struct insn *insn, uint64_t value, int *okp)
+{
+	struct plt_entry *plt, *plt_end;
+	uint64_t target_ip, target_gp;
+
+	if (!*okp)
+		return 0;
+
+	if (in_init(mod, (uint64_t) insn)) {
+		plt = (void *) mod->arch.init_plt->sh_addr;
+		plt_end = (void *) plt + mod->arch.init_plt->sh_size;
+	} else {
+		plt = (void *) mod->arch.core_plt->sh_addr;
+		plt_end = (void *) plt + mod->arch.core_plt->sh_size;
+	}
+
+	/* "value" is a pointer to a function-descriptor; fetch the target ip/gp from it: */
+	target_ip = ((uint64_t *) value)[0];
+	target_gp = ((uint64_t *) value)[1];
+
+	/* Look for existing PLT entry. */
+	while (plt->bundle[0][0]) {
+		if (plt_target(plt) == target_ip)
+			goto found;
+		if (++plt >= plt_end)
+			BUG();
+	}
+	*plt = ia64_plt_template;
+	if (!patch_plt(mod, plt, target_ip, target_gp)) {
+		*okp = 0;
+		return 0;
+	}
+#if ARCH_MODULE_DEBUG
+	if (plt_target(plt) != target_ip) {
+		printk("%s: mistargeted PLT: wanted %lx, got %lx\n",
+		       __func__, target_ip, plt_target(plt));
+		*okp = 0;
+		return 0;
+	}
+#endif
+  found:
+	return (uint64_t) plt;
+}
+
+/* Get function descriptor for VALUE. */
+static uint64_t
+get_fdesc (struct module *mod, uint64_t value, int *okp)
+{
+	struct fdesc *fdesc = (void *) mod->arch.opd->sh_addr;
+
+	if (!*okp)
+		return 0;
+
+	if (!value) {
+		printk(KERN_ERR "%s: fdesc for zero requested!\n", mod->name);
+		return 0;
+	}
+
+	if (!is_internal(mod, value))
+		/*
+		 * If it's not a module-local entry-point, "value" already points to a
+		 * function-descriptor.
+		 */
+		return value;
+
+	/* Look for existing function descriptor. */
+	while (fdesc->ip) {
+		if (fdesc->ip == value)
+			return (uint64_t)fdesc;
+		if ((uint64_t) ++fdesc >= mod->arch.opd->sh_addr + mod->arch.opd->sh_size)
+			BUG();
+	}
+
+	/* Create new one */
+	fdesc->ip = value;
+	fdesc->gp = mod->arch.gp;
+	return (uint64_t) fdesc;
+}
+
+static inline int
+do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend,
+	  Elf64_Shdr *sec, void *location)
+{
+	enum reloc_target_format format = (r_type >> FORMAT_SHIFT) & FORMAT_MASK;
+	enum reloc_value_formula formula = (r_type >> VALUE_SHIFT) & VALUE_MASK;
+	uint64_t val;
+	int ok = 1;
+
+	val = sym->st_value + addend;
+
+	switch (formula) {
+	      case RV_SEGREL:	/* segment base is arbitrarily chosen to be 0 for kernel modules */
+	      case RV_DIRECT:
+		break;
+
+	      case RV_GPREL:	  val -= mod->arch.gp; break;
+	      case RV_LTREL:	  val = get_ltoff(mod, val, &ok); break;
+	      case RV_PLTREL:	  val = get_plt(mod, location, val, &ok); break;
+	      case RV_FPTR:	  val = get_fdesc(mod, val, &ok); break;
+	      case RV_SECREL:	  val -= sec->sh_addr; break;
+	      case RV_LTREL_FPTR: val = get_ltoff(mod, get_fdesc(mod, val, &ok), &ok); break;
+
+	      case RV_PCREL:
+		switch (r_type) {
+		      case R_IA64_PCREL21B:
+			if ((in_init(mod, val) && in_core(mod, (uint64_t)location)) ||
+			    (in_core(mod, val) && in_init(mod, (uint64_t)location))) {
+				/*
+				 * Init section may have been allocated far away from core,
+				 * if the branch won't reach, then allocate a plt for it.
+				 */
+				uint64_t delta = ((int64_t)val - (int64_t)location) / 16;
+				if (delta + (1 << 20) >= (1 << 21)) {
+					val = get_fdesc(mod, val, &ok);
+					val = get_plt(mod, location, val, &ok);
+				}
+			} else if (!is_internal(mod, val))
+				val = get_plt(mod, location, val, &ok);
+			/* FALL THROUGH */
+		      default:
+			val -= bundle(location);
+			break;
+
+		      case R_IA64_PCREL32MSB:
+		      case R_IA64_PCREL32LSB:
+		      case R_IA64_PCREL64MSB:
+		      case R_IA64_PCREL64LSB:
+			val -= (uint64_t) location;
+			break;
+
+		}
+		switch (r_type) {
+		      case R_IA64_PCREL60B: format = RF_INSN60; break;
+		      case R_IA64_PCREL21B: format = RF_INSN21B; break;
+		      case R_IA64_PCREL21M: format = RF_INSN21M; break;
+		      case R_IA64_PCREL21F: format = RF_INSN21F; break;
+		      default: break;
+		}
+		break;
+
+	      case RV_BDREL:
+		val -= (uint64_t) (in_init(mod, val) ? mod->module_init : mod->module_core);
+		break;
+
+	      case RV_LTV:
+		/* can link-time value relocs happen here?  */
+		BUG();
+		break;
+
+	      case RV_PCREL2:
+		if (r_type == R_IA64_PCREL21BI) {
+			if (!is_internal(mod, val)) {
+				printk(KERN_ERR "%s: %s reloc against "
+					"non-local symbol (%lx)\n", __func__,
+					reloc_name[r_type], (unsigned long)val);
+				return -ENOEXEC;
+			}
+			format = RF_INSN21B;
+		}
+		val -= bundle(location);
+		break;
+
+	      case RV_SPECIAL:
+		switch (r_type) {
+		      case R_IA64_IPLTMSB:
+		      case R_IA64_IPLTLSB:
+			val = get_fdesc(mod, get_plt(mod, location, val, &ok), &ok);
+			format = RF_64LSB;
+			if (r_type == R_IA64_IPLTMSB)
+				format = RF_64MSB;
+			break;
+
+		      case R_IA64_SUB:
+			val = addend - sym->st_value;
+			format = RF_INSN64;
+			break;
+
+		      case R_IA64_LTOFF22X:
+			if (gp_addressable(mod, val))
+				val -= mod->arch.gp;
+			else
+				val = get_ltoff(mod, val, &ok);
+			format = RF_INSN22;
+			break;
+
+		      case R_IA64_LDXMOV:
+			if (gp_addressable(mod, val)) {
+				/* turn "ld8" into "mov": */
+				DEBUGP("%s: patching ld8 at %p to mov\n", __func__, location);
+				ia64_patch((u64) location, 0x1fff80fe000UL, 0x10000000000UL);
+			}
+			return 0;
+
+		      default:
+			if (reloc_name[r_type])
+				printk(KERN_ERR "%s: special reloc %s not supported",
+				       mod->name, reloc_name[r_type]);
+			else
+				printk(KERN_ERR "%s: unknown special reloc %x\n",
+				       mod->name, r_type);
+			return -ENOEXEC;
+		}
+		break;
+
+	      case RV_TPREL:
+	      case RV_LTREL_TPREL:
+	      case RV_DTPMOD:
+	      case RV_LTREL_DTPMOD:
+	      case RV_DTPREL:
+	      case RV_LTREL_DTPREL:
+		printk(KERN_ERR "%s: %s reloc not supported\n",
+		       mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?");
+		return -ENOEXEC;
+
+	      default:
+		printk(KERN_ERR "%s: unknown reloc %x\n", mod->name, r_type);
+		return -ENOEXEC;
+	}
+
+	if (!ok)
+		return -ENOEXEC;
+
+	DEBUGP("%s: [%p]<-%016lx = %s(%lx)\n", __func__, location, val,
+	       reloc_name[r_type] ? reloc_name[r_type] : "?", sym->st_value + addend);
+
+	switch (format) {
+	      case RF_INSN21B:	ok = apply_imm21b(mod, location, (int64_t) val / 16); break;
+	      case RF_INSN22:	ok = apply_imm22(mod, location, val); break;
+	      case RF_INSN64:	ok = apply_imm64(mod, location, val); break;
+	      case RF_INSN60:	ok = apply_imm60(mod, location, (int64_t) val / 16); break;
+	      case RF_32LSB:	put_unaligned(val, (uint32_t *) location); break;
+	      case RF_64LSB:	put_unaligned(val, (uint64_t *) location); break;
+	      case RF_32MSB:	/* ia64 Linux is little-endian... */
+	      case RF_64MSB:	/* ia64 Linux is little-endian... */
+	      case RF_INSN14:	/* must be within-module, i.e., resolved by "ld -r" */
+	      case RF_INSN21M:	/* must be within-module, i.e., resolved by "ld -r" */
+	      case RF_INSN21F:	/* must be within-module, i.e., resolved by "ld -r" */
+		printk(KERN_ERR "%s: format %u needed by %s reloc is not supported\n",
+		       mod->name, format, reloc_name[r_type] ? reloc_name[r_type] : "?");
+		return -ENOEXEC;
+
+	      default:
+		printk(KERN_ERR "%s: relocation %s resulted in unknown format %u\n",
+		       mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?", format);
+		return -ENOEXEC;
+	}
+	return ok ? 0 : -ENOEXEC;
+}
+
+int
+apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex,
+		    unsigned int relsec, struct module *mod)
+{
+	unsigned int i, n = sechdrs[relsec].sh_size / sizeof(Elf64_Rela);
+	Elf64_Rela *rela = (void *) sechdrs[relsec].sh_addr;
+	Elf64_Shdr *target_sec;
+	int ret;
+
+	DEBUGP("%s: applying section %u (%u relocs) to %u\n", __func__,
+	       relsec, n, sechdrs[relsec].sh_info);
+
+	target_sec = sechdrs + sechdrs[relsec].sh_info;
+
+	if (target_sec->sh_entsize == ~0UL)
+		/*
+		 * If target section wasn't allocated, we don't need to relocate it.
+		 * Happens, e.g., for debug sections.
+		 */
+		return 0;
+
+	if (!mod->arch.gp) {
+		/*
+		 * XXX Should have an arch-hook for running this after final section
+		 *     addresses have been selected...
+		 */
+		uint64_t gp;
+		if (mod->core_size > MAX_LTOFF)
+			/*
+			 * This takes advantage of fact that SHF_ARCH_SMALL gets allocated
+			 * at the end of the module.
+			 */
+			gp = mod->core_size - MAX_LTOFF / 2;
+		else
+			gp = mod->core_size / 2;
+		gp = (uint64_t) mod->module_core + ((gp + 7) & -8);
+		mod->arch.gp = gp;
+		DEBUGP("%s: placing gp at 0x%lx\n", __func__, gp);
+	}
+
+	for (i = 0; i < n; i++) {
+		ret = do_reloc(mod, ELF64_R_TYPE(rela[i].r_info),
+			       ((Elf64_Sym *) sechdrs[symindex].sh_addr
+				+ ELF64_R_SYM(rela[i].r_info)),
+			       rela[i].r_addend, target_sec,
+			       (void *) target_sec->sh_addr + rela[i].r_offset);
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
+int
+apply_relocate (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex,
+		unsigned int relsec, struct module *mod)
+{
+	printk(KERN_ERR "module %s: REL relocs in section %u unsupported\n", mod->name, relsec);
+	return -ENOEXEC;
+}
+
+/*
+ * Modules contain a single unwind table which covers both the core and the init text
+ * sections but since the two are not contiguous, we need to split this table up such that
+ * we can register (and unregister) each "segment" separately.  Fortunately, this sounds
+ * more complicated than it really is.
+ */
+static void
+register_unwind_table (struct module *mod)
+{
+	struct unw_table_entry *start = (void *) mod->arch.unwind->sh_addr;
+	struct unw_table_entry *end = start + mod->arch.unwind->sh_size / sizeof (*start);
+	struct unw_table_entry tmp, *e1, *e2, *core, *init;
+	unsigned long num_init = 0, num_core = 0;
+
+	/* First, count how many init and core unwind-table entries there are.  */
+	for (e1 = start; e1 < end; ++e1)
+		if (in_init(mod, e1->start_offset))
+			++num_init;
+		else
+			++num_core;
+	/*
+	 * Second, sort the table such that all unwind-table entries for the init and core
+	 * text sections are nicely separated.  We do this with a stupid bubble sort
+	 * (unwind tables don't get ridiculously huge).
+	 */
+	for (e1 = start; e1 < end; ++e1) {
+		for (e2 = e1 + 1; e2 < end; ++e2) {
+			if (e2->start_offset < e1->start_offset) {
+				tmp = *e1;
+				*e1 = *e2;
+				*e2 = tmp;
+			}
+		}
+	}
+	/*
+	 * Third, locate the init and core segments in the unwind table:
+	 */
+	if (in_init(mod, start->start_offset)) {
+		init = start;
+		core = start + num_init;
+	} else {
+		core = start;
+		init = start + num_core;
+	}
+
+	DEBUGP("%s: name=%s, gp=%lx, num_init=%lu, num_core=%lu\n", __func__,
+	       mod->name, mod->arch.gp, num_init, num_core);
+
+	/*
+	 * Fourth, register both tables (if not empty).
+	 */
+	if (num_core > 0) {
+		mod->arch.core_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp,
+								core, core + num_core);
+		DEBUGP("%s:  core: handle=%p [%p-%p)\n", __func__,
+		       mod->arch.core_unw_table, core, core + num_core);
+	}
+	if (num_init > 0) {
+		mod->arch.init_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp,
+								init, init + num_init);
+		DEBUGP("%s:  init: handle=%p [%p-%p)\n", __func__,
+		       mod->arch.init_unw_table, init, init + num_init);
+	}
+}
+
+int
+module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod)
+{
+	DEBUGP("%s: init: entry=%p\n", __func__, mod->init);
+	if (mod->arch.unwind)
+		register_unwind_table(mod);
+#ifdef CONFIG_PARAVIRT
+        if (mod->arch.paravirt_bundles) {
+                struct paravirt_patch_site_bundle *start =
+                        (struct paravirt_patch_site_bundle *)
+                        mod->arch.paravirt_bundles->sh_addr;
+                struct paravirt_patch_site_bundle *end =
+                        (struct paravirt_patch_site_bundle *)
+                        (mod->arch.paravirt_bundles->sh_addr +
+                         mod->arch.paravirt_bundles->sh_size);
+
+                paravirt_patch_apply_bundle(start, end);
+        }
+        if (mod->arch.paravirt_insts) {
+                struct paravirt_patch_site_inst *start =
+                        (struct paravirt_patch_site_inst *)
+                        mod->arch.paravirt_insts->sh_addr;
+                struct paravirt_patch_site_inst *end =
+                        (struct paravirt_patch_site_inst *)
+                        (mod->arch.paravirt_insts->sh_addr +
+                         mod->arch.paravirt_insts->sh_size);
+
+                paravirt_patch_apply_inst(start, end);
+        }
+#endif
+	return 0;
+}
+
+void
+module_arch_cleanup (struct module *mod)
+{
+	if (mod->arch.init_unw_table)
+		unw_remove_unwind_table(mod->arch.init_unw_table);
+	if (mod->arch.core_unw_table)
+		unw_remove_unwind_table(mod->arch.core_unw_table);
+}
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
new file mode 100644
index 00000000..009df543
--- /dev/null
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -0,0 +1,214 @@
+/*
+ * MSI hooks for standard x86 apic
+ */
+
+#include <linux/pci.h>
+#include <linux/irq.h>
+#include <linux/msi.h>
+#include <linux/dmar.h>
+#include <asm/smp.h>
+#include <asm/msidef.h>
+
+static struct irq_chip	ia64_msi_chip;
+
+#ifdef CONFIG_SMP
+static int ia64_set_msi_irq_affinity(struct irq_data *idata,
+				     const cpumask_t *cpu_mask, bool force)
+{
+	struct msi_msg msg;
+	u32 addr, data;
+	int cpu = first_cpu(*cpu_mask);
+	unsigned int irq = idata->irq;
+
+	if (!cpu_online(cpu))
+		return -1;
+
+	if (irq_prepare_move(irq, cpu))
+		return -1;
+
+	get_cached_msi_msg(irq, &msg);
+
+	addr = msg.address_lo;
+	addr &= MSI_ADDR_DEST_ID_MASK;
+	addr |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu));
+	msg.address_lo = addr;
+
+	data = msg.data;
+	data &= MSI_DATA_VECTOR_MASK;
+	data |= MSI_DATA_VECTOR(irq_to_vector(irq));
+	msg.data = data;
+
+	write_msi_msg(irq, &msg);
+	cpumask_copy(idata->affinity, cpumask_of(cpu));
+
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
+{
+	struct msi_msg	msg;
+	unsigned long	dest_phys_id;
+	int	irq, vector;
+	cpumask_t mask;
+
+	irq = create_irq();
+	if (irq < 0)
+		return irq;
+
+	irq_set_msi_desc(irq, desc);
+	cpus_and(mask, irq_to_domain(irq), cpu_online_map);
+	dest_phys_id = cpu_physical_id(first_cpu(mask));
+	vector = irq_to_vector(irq);
+
+	msg.address_hi = 0;
+	msg.address_lo =
+		MSI_ADDR_HEADER |
+		MSI_ADDR_DEST_MODE_PHYS |
+		MSI_ADDR_REDIRECTION_CPU |
+		MSI_ADDR_DEST_ID_CPU(dest_phys_id);
+
+	msg.data =
+		MSI_DATA_TRIGGER_EDGE |
+		MSI_DATA_LEVEL_ASSERT |
+		MSI_DATA_DELIVERY_FIXED |
+		MSI_DATA_VECTOR(vector);
+
+	write_msi_msg(irq, &msg);
+	irq_set_chip_and_handler(irq, &ia64_msi_chip, handle_edge_irq);
+
+	return 0;
+}
+
+void ia64_teardown_msi_irq(unsigned int irq)
+{
+	destroy_irq(irq);
+}
+
+static void ia64_ack_msi_irq(struct irq_data *data)
+{
+	irq_complete_move(data->irq);
+	irq_move_irq(data);
+	ia64_eoi();
+}
+
+static int ia64_msi_retrigger_irq(struct irq_data *data)
+{
+	unsigned int vector = irq_to_vector(data->irq);
+	ia64_resend_irq(vector);
+
+	return 1;
+}
+
+/*
+ * Generic ops used on most IA64 platforms.
+ */
+static struct irq_chip ia64_msi_chip = {
+	.name			= "PCI-MSI",
+	.irq_mask		= mask_msi_irq,
+	.irq_unmask		= unmask_msi_irq,
+	.irq_ack		= ia64_ack_msi_irq,
+#ifdef CONFIG_SMP
+	.irq_set_affinity	= ia64_set_msi_irq_affinity,
+#endif
+	.irq_retrigger		= ia64_msi_retrigger_irq,
+};
+
+
+int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
+{
+	if (platform_setup_msi_irq)
+		return platform_setup_msi_irq(pdev, desc);
+
+	return ia64_setup_msi_irq(pdev, desc);
+}
+
+void arch_teardown_msi_irq(unsigned int irq)
+{
+	if (platform_teardown_msi_irq)
+		return platform_teardown_msi_irq(irq);
+
+	return ia64_teardown_msi_irq(irq);
+}
+
+#ifdef CONFIG_DMAR
+#ifdef CONFIG_SMP
+static int dmar_msi_set_affinity(struct irq_data *data,
+				 const struct cpumask *mask, bool force)
+{
+	unsigned int irq = data->irq;
+	struct irq_cfg *cfg = irq_cfg + irq;
+	struct msi_msg msg;
+	int cpu = cpumask_first(mask);
+
+	if (!cpu_online(cpu))
+		return -1;
+
+	if (irq_prepare_move(irq, cpu))
+		return -1;
+
+	dmar_msi_read(irq, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu));
+
+	dmar_msi_write(irq, &msg);
+	cpumask_copy(data->affinity, mask);
+
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+static struct irq_chip dmar_msi_type = {
+	.name = "DMAR_MSI",
+	.irq_unmask = dmar_msi_unmask,
+	.irq_mask = dmar_msi_mask,
+	.irq_ack = ia64_ack_msi_irq,
+#ifdef CONFIG_SMP
+	.irq_set_affinity = dmar_msi_set_affinity,
+#endif
+	.irq_retrigger = ia64_msi_retrigger_irq,
+};
+
+static int
+msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
+{
+	struct irq_cfg *cfg = irq_cfg + irq;
+	unsigned dest;
+	cpumask_t mask;
+
+	cpus_and(mask, irq_to_domain(irq), cpu_online_map);
+	dest = cpu_physical_id(first_cpu(mask));
+
+	msg->address_hi = 0;
+	msg->address_lo =
+		MSI_ADDR_HEADER |
+		MSI_ADDR_DEST_MODE_PHYS |
+		MSI_ADDR_REDIRECTION_CPU |
+		MSI_ADDR_DEST_ID_CPU(dest);
+
+	msg->data =
+		MSI_DATA_TRIGGER_EDGE |
+		MSI_DATA_LEVEL_ASSERT |
+		MSI_DATA_DELIVERY_FIXED |
+		MSI_DATA_VECTOR(cfg->vector);
+	return 0;
+}
+
+int arch_setup_dmar_msi(unsigned int irq)
+{
+	int ret;
+	struct msi_msg msg;
+
+	ret = msi_compose_msg(NULL, irq, &msg);
+	if (ret < 0)
+		return ret;
+	dmar_msi_write(irq, &msg);
+	irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+				      "edge");
+	return 0;
+}
+#endif /* CONFIG_DMAR */
+
diff --git a/arch/ia64/kernel/nr-irqs.c b/arch/ia64/kernel/nr-irqs.c
new file mode 100644
index 00000000..ee564575
--- /dev/null
+++ b/arch/ia64/kernel/nr-irqs.c
@@ -0,0 +1,25 @@
+/*
+ * calculate
+ * NR_IRQS = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, FOO_NR_IRQS...)
+ * depending on config.
+ * This must be calculated before processing asm-offset.c.
+ */
+
+#define ASM_OFFSETS_C 1
+
+#include <linux/kbuild.h>
+#include <linux/threads.h>
+#include <asm/native/irq.h>
+#include <asm/xen/irq.h>
+
+void foo(void)
+{
+	union paravirt_nr_irqs_max {
+		char ia64_native_nr_irqs[IA64_NATIVE_NR_IRQS];
+#ifdef CONFIG_XEN
+		char xen_nr_irqs[XEN_NR_IRQS];
+#endif
+	};
+
+	DEFINE(NR_IRQS, sizeof (union paravirt_nr_irqs_max));
+}
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
new file mode 100644
index 00000000..c93420c9
--- /dev/null
+++ b/arch/ia64/kernel/numa.c
@@ -0,0 +1,85 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ia64 kernel NUMA specific stuff
+ *
+ * Copyright (C) 2002 Erich Focht <efocht@ess.nec.de>
+ * Copyright (C) 2004 Silicon Graphics, Inc.
+ *   Jesse Barnes <jbarnes@sgi.com>
+ */
+#include <linux/topology.h>
+#include <linux/module.h>
+#include <asm/processor.h>
+#include <asm/smp.h>
+
+u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_to_node_map);
+
+cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
+EXPORT_SYMBOL(node_to_cpu_mask);
+
+void __cpuinit map_cpu_to_node(int cpu, int nid)
+{
+	int oldnid;
+	if (nid < 0) { /* just initialize by zero */
+		cpu_to_node_map[cpu] = 0;
+		return;
+	}
+	/* sanity check first */
+	oldnid = cpu_to_node_map[cpu];
+	if (cpu_isset(cpu, node_to_cpu_mask[oldnid])) {
+		return; /* nothing to do */
+	}
+	/* we don't have cpu-driven node hot add yet...
+	   In usual case, node is created from SRAT at boot time. */
+	if (!node_online(nid))
+		nid = first_online_node;
+	cpu_to_node_map[cpu] = nid;
+	cpu_set(cpu, node_to_cpu_mask[nid]);
+	return;
+}
+
+void __cpuinit unmap_cpu_from_node(int cpu, int nid)
+{
+	WARN_ON(!cpu_isset(cpu, node_to_cpu_mask[nid]));
+	WARN_ON(cpu_to_node_map[cpu] != nid);
+	cpu_to_node_map[cpu] = 0;
+	cpu_clear(cpu, node_to_cpu_mask[nid]);
+}
+
+
+/**
+ * build_cpu_to_node_map - setup cpu to node and node to cpumask arrays
+ *
+ * Build cpu to node mapping and initialize the per node cpu masks using
+ * info from the node_cpuid array handed to us by ACPI.
+ */
+void __init build_cpu_to_node_map(void)
+{
+	int cpu, i, node;
+
+	for(node=0; node < MAX_NUMNODES; node++)
+		cpus_clear(node_to_cpu_mask[node]);
+
+	for_each_possible_early_cpu(cpu) {
+		node = -1;
+		for (i = 0; i < NR_CPUS; ++i)
+			if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
+				node = node_cpuid[i].nid;
+				break;
+			}
+		map_cpu_to_node(cpu, node);
+	}
+}
diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
new file mode 100644
index 00000000..0b533441
--- /dev/null
+++ b/arch/ia64/kernel/pal.S
@@ -0,0 +1,298 @@
+/*
+ * PAL Firmware support
+ * IA-64 Processor Programmers Reference Vol 2
+ *
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co
+ *	David Mosberger <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 05/22/2000 eranian Added support for stacked register calls
+ * 05/24/2000 eranian Added support for physical mode static calls
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/processor.h>
+
+	.data
+pal_entry_point:
+	data8 ia64_pal_default_handler
+	.text
+
+/*
+ * Set the PAL entry point address.  This could be written in C code, but we
+ * do it here to keep it all in one module (besides, it's so trivial that it's
+ * not a big deal).
+ *
+ * in0		Address of the PAL entry point (text address, NOT a function
+ *		descriptor).
+ */
+GLOBAL_ENTRY(ia64_pal_handler_init)
+	alloc r3=ar.pfs,1,0,0,0
+	movl r2=pal_entry_point
+	;;
+	st8 [r2]=in0
+	br.ret.sptk.many rp
+END(ia64_pal_handler_init)
+
+/*
+ * Default PAL call handler.  This needs to be coded in assembly because it
+ * uses the static calling convention, i.e., the RSE may not be used and
+ * calls are done via "br.cond" (not "br.call").
+ */
+GLOBAL_ENTRY(ia64_pal_default_handler)
+	mov r8=-1
+	br.cond.sptk.many rp
+END(ia64_pal_default_handler)
+
+/*
+ * Make a PAL call using the static calling convention.
+ *
+ * in0         Index of PAL service
+ * in1 - in3   Remaining PAL arguments
+ */
+GLOBAL_ENTRY(ia64_pal_call_static)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
+	alloc loc1 = ar.pfs,4,5,0,0
+	movl loc2 = pal_entry_point
+1:	{
+	  mov r28 = in0
+	  mov r29 = in1
+	  mov r8 = ip
+	}
+	;;
+	ld8 loc2 = [loc2]		// loc2 <- entry point
+	adds r8 = 1f-1b,r8
+	mov loc4=ar.rsc			// save RSE configuration
+	;;
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	mov loc3 = psr
+	mov loc0 = rp
+	.body
+	mov r30 = in2
+
+	mov r31 = in3
+	mov b7 = loc2
+
+	rsm psr.i
+	;;
+	mov rp = r8
+	br.cond.sptk.many b7
+1:	mov psr.l = loc3
+	mov ar.rsc = loc4		// restore RSE configuration
+	mov ar.pfs = loc1
+	mov rp = loc0
+	;;
+	srlz.d				// seralize restoration of psr.l
+	br.ret.sptk.many b0
+END(ia64_pal_call_static)
+
+/*
+ * Make a PAL call using the stacked registers calling convention.
+ *
+ * Inputs:
+ *	in0         Index of PAL service
+ *	in2 - in3   Remaining PAL arguments
+ */
+GLOBAL_ENTRY(ia64_pal_call_stacked)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
+	alloc loc1 = ar.pfs,4,4,4,0
+	movl loc2 = pal_entry_point
+
+	mov r28  = in0			// Index MUST be copied to r28
+	mov out0 = in0			// AND in0 of PAL function
+	mov loc0 = rp
+	.body
+	;;
+	ld8 loc2 = [loc2]		// loc2 <- entry point
+	mov out1 = in1
+	mov out2 = in2
+	mov out3 = in3
+	mov loc3 = psr
+	;;
+	rsm psr.i
+	mov b7 = loc2
+	;;
+	br.call.sptk.many rp=b7		// now make the call
+.ret0:	mov psr.l  = loc3
+	mov ar.pfs = loc1
+	mov rp = loc0
+	;;
+	srlz.d				// serialize restoration of psr.l
+	br.ret.sptk.many b0
+END(ia64_pal_call_stacked)
+
+/*
+ * Make a physical mode PAL call using the static registers calling convention.
+ *
+ * Inputs:
+ *	in0         Index of PAL service
+ *	in2 - in3   Remaining PAL arguments
+ *
+ * PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel.
+ * So we don't need to clear them.
+ */
+#define PAL_PSR_BITS_TO_CLEAR						      \
+	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT  | IA64_PSR_DB | IA64_PSR_RT |\
+	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |	      \
+	 IA64_PSR_DFL | IA64_PSR_DFH)
+
+#define PAL_PSR_BITS_TO_SET						      \
+	(IA64_PSR_BN)
+
+
+GLOBAL_ENTRY(ia64_pal_call_phys_static)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
+	alloc loc1 = ar.pfs,4,7,0,0
+	movl loc2 = pal_entry_point
+1:	{
+	  mov r28  = in0		// copy procedure index
+	  mov r8   = ip			// save ip to compute branch
+	  mov loc0 = rp			// save rp
+	}
+	.body
+	;;
+	ld8 loc2 = [loc2]		// loc2 <- entry point
+	mov r29  = in1			// first argument
+	mov r30  = in2			// copy arg2
+	mov r31  = in3			// copy arg3
+	;;
+	mov loc3 = psr			// save psr
+	adds r8  = 1f-1b,r8		// calculate return address for call
+	;;
+	mov loc4=ar.rsc			// save RSE configuration
+	dep.z loc2=loc2,0,61		// convert pal entry point to physical
+	tpa r8=r8			// convert rp to physical
+	;;
+	mov b7 = loc2			// install target to branch reg
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	movl r16=PAL_PSR_BITS_TO_CLEAR
+	movl r17=PAL_PSR_BITS_TO_SET
+	;;
+	or loc3=loc3,r17		// add in psr the bits to set
+	;;
+	andcm r16=loc3,r16		// removes bits to clear from psr
+	br.call.sptk.many rp=ia64_switch_mode_phys
+	mov rp = r8			// install return address (physical)
+	mov loc5 = r19
+	mov loc6 = r20
+	br.cond.sptk.many b7
+1:
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	mov r16=loc3			// r16= original psr
+	mov r19=loc5
+	mov r20=loc6
+	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
+	mov psr.l = loc3		// restore init PSR
+
+	mov ar.pfs = loc1
+	mov rp = loc0
+	;;
+	mov ar.rsc=loc4			// restore RSE configuration
+	srlz.d				// seralize restoration of psr.l
+	br.ret.sptk.many b0
+END(ia64_pal_call_phys_static)
+
+/*
+ * Make a PAL call using the stacked registers in physical mode.
+ *
+ * Inputs:
+ *	in0         Index of PAL service
+ *	in2 - in3   Remaining PAL arguments
+ */
+GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
+	alloc	loc1 = ar.pfs,5,7,4,0
+	movl	loc2 = pal_entry_point
+1:	{
+	  mov r28  = in0		// copy procedure index
+	  mov loc0 = rp			// save rp
+	}
+	.body
+	;;
+	ld8 loc2 = [loc2]		// loc2 <- entry point
+	mov loc3 = psr			// save psr
+	;;
+	mov loc4=ar.rsc			// save RSE configuration
+	dep.z loc2=loc2,0,61		// convert pal entry point to physical
+	;;
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	movl r16=PAL_PSR_BITS_TO_CLEAR
+	movl r17=PAL_PSR_BITS_TO_SET
+	;;
+	or loc3=loc3,r17		// add in psr the bits to set
+	mov b7 = loc2			// install target to branch reg
+	;;
+	andcm r16=loc3,r16		// removes bits to clear from psr
+	br.call.sptk.many rp=ia64_switch_mode_phys
+
+	mov out0 = in0			// first argument
+	mov out1 = in1			// copy arg2
+	mov out2 = in2			// copy arg3
+	mov out3 = in3			// copy arg3
+	mov loc5 = r19
+	mov loc6 = r20
+
+	br.call.sptk.many rp=b7		// now make the call
+
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	mov r16=loc3			// r16= original psr
+	mov r19=loc5
+	mov r20=loc6
+	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
+
+	mov psr.l  = loc3		// restore init PSR
+	mov ar.pfs = loc1
+	mov rp = loc0
+	;;
+	mov ar.rsc=loc4			// restore RSE configuration
+	srlz.d				// seralize restoration of psr.l
+	br.ret.sptk.many b0
+END(ia64_pal_call_phys_stacked)
+
+/*
+ * Save scratch fp scratch regs which aren't saved in pt_regs already
+ * (fp10-fp15).
+ *
+ * NOTE: We need to do this since firmware (SAL and PAL) may use any of the
+ * scratch regs fp-low partition.
+ *
+ * Inputs:
+ *      in0	Address of stack storage for fp regs
+ */
+GLOBAL_ENTRY(ia64_save_scratch_fpregs)
+	alloc r3=ar.pfs,1,0,0,0
+	add r2=16,in0
+	;;
+	stf.spill [in0] = f10,32
+	stf.spill [r2]  = f11,32
+	;;
+	stf.spill [in0] = f12,32
+	stf.spill [r2]  = f13,32
+	;;
+	stf.spill [in0] = f14,32
+	stf.spill [r2]  = f15,32
+	br.ret.sptk.many rp
+END(ia64_save_scratch_fpregs)
+
+/*
+ * Load scratch fp scratch regs (fp10-fp15)
+ *
+ * Inputs:
+ *      in0	Address of stack storage for fp regs
+ */
+GLOBAL_ENTRY(ia64_load_scratch_fpregs)
+	alloc r3=ar.pfs,1,0,0,0
+	add r2=16,in0
+	;;
+	ldf.fill  f10 = [in0],32
+	ldf.fill  f11 = [r2],32
+	;;
+	ldf.fill  f12 = [in0],32
+	ldf.fill  f13 = [r2],32
+	;;
+	ldf.fill  f14 = [in0],32
+	ldf.fill  f15 = [r2],32
+	br.ret.sptk.many rp
+END(ia64_load_scratch_fpregs)
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
new file mode 100644
index 00000000..77597e5e
--- /dev/null
+++ b/arch/ia64/kernel/palinfo.c
@@ -0,0 +1,1095 @@
+/*
+ * palinfo.c
+ *
+ * Prints processor specific information reported by PAL.
+ * This code is based on specification of PAL as of the
+ * Intel IA-64 Architecture Software Developer's Manual v1.0.
+ *
+ *
+ * Copyright (C) 2000-2001, 2003 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2004 Intel Corporation
+ *  Ashok Raj <ashok.raj@intel.com>
+ *
+ * 05/26/2000	S.Eranian	initial release
+ * 08/21/2000	S.Eranian	updated to July 2000 PAL specs
+ * 02/05/2001   S.Eranian	fixed module support
+ * 10/23/2001	S.Eranian	updated pal_perf_mon_info bug fixes
+ * 03/24/2004	Ashok Raj	updated to work with CPU Hotplug
+ * 10/26/2006   Russ Anderson	updated processor features to rev 2.2 spec
+ */
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/efi.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+
+#include <asm/pal.h>
+#include <asm/sal.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <linux/smp.h>
+
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
+MODULE_DESCRIPTION("/proc interface to IA-64 PAL");
+MODULE_LICENSE("GPL");
+
+#define PALINFO_VERSION "0.5"
+
+typedef int (*palinfo_func_t)(char*);
+
+typedef struct {
+	const char		*name;		/* name of the proc entry */
+	palinfo_func_t		proc_read;	/* function to call for reading */
+	struct proc_dir_entry	*entry;		/* registered entry (removal) */
+} palinfo_entry_t;
+
+
+/*
+ *  A bunch of string array to get pretty printing
+ */
+
+static char *cache_types[] = {
+	"",			/* not used */
+	"Instruction",
+	"Data",
+	"Data/Instruction"	/* unified */
+};
+
+static const char *cache_mattrib[]={
+	"WriteThrough",
+	"WriteBack",
+	"",		/* reserved */
+	""		/* reserved */
+};
+
+static const char *cache_st_hints[]={
+	"Temporal, level 1",
+	"Reserved",
+	"Reserved",
+	"Non-temporal, all levels",
+	"Reserved",
+	"Reserved",
+	"Reserved",
+	"Reserved"
+};
+
+static const char *cache_ld_hints[]={
+	"Temporal, level 1",
+	"Non-temporal, level 1",
+	"Reserved",
+	"Non-temporal, all levels",
+	"Reserved",
+	"Reserved",
+	"Reserved",
+	"Reserved"
+};
+
+static const char *rse_hints[]={
+	"enforced lazy",
+	"eager stores",
+	"eager loads",
+	"eager loads and stores"
+};
+
+#define RSE_HINTS_COUNT ARRAY_SIZE(rse_hints)
+
+static const char *mem_attrib[]={
+	"WB",		/* 000 */
+	"SW",		/* 001 */
+	"010",		/* 010 */
+	"011",		/* 011 */
+	"UC",		/* 100 */
+	"UCE",		/* 101 */
+	"WC",		/* 110 */
+	"NaTPage"	/* 111 */
+};
+
+/*
+ * Take a 64bit vector and produces a string such that
+ * if bit n is set then 2^n in clear text is generated. The adjustment
+ * to the right unit is also done.
+ *
+ * Input:
+ *	- a pointer to a buffer to hold the string
+ *	- a 64-bit vector
+ * Ouput:
+ *	- a pointer to the end of the buffer
+ *
+ */
+static char *
+bitvector_process(char *p, u64 vector)
+{
+	int i,j;
+	const char *units[]={ "", "K", "M", "G", "T" };
+
+	for (i=0, j=0; i < 64; i++ , j=i/10) {
+		if (vector & 0x1) {
+			p += sprintf(p, "%d%s ", 1 << (i-j*10), units[j]);
+		}
+		vector >>= 1;
+	}
+	return p;
+}
+
+/*
+ * Take a 64bit vector and produces a string such that
+ * if bit n is set then register n is present. The function
+ * takes into account consecutive registers and prints out ranges.
+ *
+ * Input:
+ *	- a pointer to a buffer to hold the string
+ *	- a 64-bit vector
+ * Ouput:
+ *	- a pointer to the end of the buffer
+ *
+ */
+static char *
+bitregister_process(char *p, u64 *reg_info, int max)
+{
+	int i, begin, skip = 0;
+	u64 value = reg_info[0];
+
+	value >>= i = begin = ffs(value) - 1;
+
+	for(; i < max; i++ ) {
+
+		if (i != 0 && (i%64) == 0) value = *++reg_info;
+
+		if ((value & 0x1) == 0 && skip == 0) {
+			if (begin  <= i - 2)
+				p += sprintf(p, "%d-%d ", begin, i-1);
+			else
+				p += sprintf(p, "%d ", i-1);
+			skip  = 1;
+			begin = -1;
+		} else if ((value & 0x1) && skip == 1) {
+			skip = 0;
+			begin = i;
+		}
+		value >>=1;
+	}
+	if (begin > -1) {
+		if (begin < 127)
+			p += sprintf(p, "%d-127", begin);
+		else
+			p += sprintf(p, "127");
+	}
+
+	return p;
+}
+
+static int
+power_info(char *page)
+{
+	s64 status;
+	char *p = page;
+	u64 halt_info_buffer[8];
+	pal_power_mgmt_info_u_t *halt_info =(pal_power_mgmt_info_u_t *)halt_info_buffer;
+	int i;
+
+	status = ia64_pal_halt_info(halt_info);
+	if (status != 0) return 0;
+
+	for (i=0; i < 8 ; i++ ) {
+		if (halt_info[i].pal_power_mgmt_info_s.im == 1) {
+			p += sprintf(p,	"Power level %d:\n"
+				     "\tentry_latency       : %d cycles\n"
+				     "\texit_latency        : %d cycles\n"
+				     "\tpower consumption   : %d mW\n"
+				     "\tCache+TLB coherency : %s\n", i,
+				     halt_info[i].pal_power_mgmt_info_s.entry_latency,
+				     halt_info[i].pal_power_mgmt_info_s.exit_latency,
+				     halt_info[i].pal_power_mgmt_info_s.power_consumption,
+				     halt_info[i].pal_power_mgmt_info_s.co ? "Yes" : "No");
+		} else {
+			p += sprintf(p,"Power level %d: not implemented\n",i);
+		}
+	}
+	return p - page;
+}
+
+static int
+cache_info(char *page)
+{
+	char *p = page;
+	unsigned long i, levels, unique_caches;
+	pal_cache_config_info_t cci;
+	int j, k;
+	long status;
+
+	if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) {
+		printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status);
+		return 0;
+	}
+
+	p += sprintf(p, "Cache levels  : %ld\nUnique caches : %ld\n\n", levels, unique_caches);
+
+	for (i=0; i < levels; i++) {
+
+		for (j=2; j >0 ; j--) {
+
+			/* even without unification some level may not be present */
+			if ((status=ia64_pal_cache_config_info(i,j, &cci)) != 0) {
+				continue;
+			}
+			p += sprintf(p,
+				     "%s Cache level %lu:\n"
+				     "\tSize           : %u bytes\n"
+				     "\tAttributes     : ",
+				     cache_types[j+cci.pcci_unified], i+1,
+				     cci.pcci_cache_size);
+
+			if (cci.pcci_unified) p += sprintf(p, "Unified ");
+
+			p += sprintf(p, "%s\n", cache_mattrib[cci.pcci_cache_attr]);
+
+			p += sprintf(p,
+				     "\tAssociativity  : %d\n"
+				     "\tLine size      : %d bytes\n"
+				     "\tStride         : %d bytes\n",
+				     cci.pcci_assoc, 1<<cci.pcci_line_size, 1<<cci.pcci_stride);
+			if (j == 1)
+				p += sprintf(p, "\tStore latency  : N/A\n");
+			else
+				p += sprintf(p, "\tStore latency  : %d cycle(s)\n",
+						cci.pcci_st_latency);
+
+			p += sprintf(p,
+				     "\tLoad latency   : %d cycle(s)\n"
+				     "\tStore hints    : ", cci.pcci_ld_latency);
+
+			for(k=0; k < 8; k++ ) {
+				if ( cci.pcci_st_hints & 0x1)
+					p += sprintf(p, "[%s]", cache_st_hints[k]);
+				cci.pcci_st_hints >>=1;
+			}
+			p += sprintf(p, "\n\tLoad hints     : ");
+
+			for(k=0; k < 8; k++ ) {
+				if (cci.pcci_ld_hints & 0x1)
+					p += sprintf(p, "[%s]", cache_ld_hints[k]);
+				cci.pcci_ld_hints >>=1;
+			}
+			p += sprintf(p,
+				     "\n\tAlias boundary : %d byte(s)\n"
+				     "\tTag LSB        : %d\n"
+				     "\tTag MSB        : %d\n",
+				     1<<cci.pcci_alias_boundary, cci.pcci_tag_lsb,
+				     cci.pcci_tag_msb);
+
+			/* when unified, data(j=2) is enough */
+			if (cci.pcci_unified) break;
+		}
+	}
+	return p - page;
+}
+
+
+static int
+vm_info(char *page)
+{
+	char *p = page;
+	u64 tr_pages =0, vw_pages=0, tc_pages;
+	u64 attrib;
+	pal_vm_info_1_u_t vm_info_1;
+	pal_vm_info_2_u_t vm_info_2;
+	pal_tc_info_u_t	tc_info;
+	ia64_ptce_info_t ptce;
+	const char *sep;
+	int i, j;
+	long status;
+
+	if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) {
+		printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
+	} else {
+
+		p += sprintf(p,
+		     "Physical Address Space         : %d bits\n"
+		     "Virtual Address Space          : %d bits\n"
+		     "Protection Key Registers(PKR)  : %d\n"
+		     "Implemented bits in PKR.key    : %d\n"
+		     "Hash Tag ID                    : 0x%x\n"
+		     "Size of RR.rid                 : %d\n"
+		     "Max Purges                     : ",
+		     vm_info_1.pal_vm_info_1_s.phys_add_size,
+		     vm_info_2.pal_vm_info_2_s.impl_va_msb+1,
+		     vm_info_1.pal_vm_info_1_s.max_pkr+1,
+		     vm_info_1.pal_vm_info_1_s.key_size,
+		     vm_info_1.pal_vm_info_1_s.hash_tag_id,
+		     vm_info_2.pal_vm_info_2_s.rid_size);
+		if (vm_info_2.pal_vm_info_2_s.max_purges == PAL_MAX_PURGES)
+			p += sprintf(p, "unlimited\n");
+		else
+			p += sprintf(p, "%d\n",
+		     		vm_info_2.pal_vm_info_2_s.max_purges ?
+				vm_info_2.pal_vm_info_2_s.max_purges : 1);
+	}
+
+	if (ia64_pal_mem_attrib(&attrib) == 0) {
+		p += sprintf(p, "Supported memory attributes    : ");
+		sep = "";
+		for (i = 0; i < 8; i++) {
+			if (attrib & (1 << i)) {
+				p += sprintf(p, "%s%s", sep, mem_attrib[i]);
+				sep = ", ";
+			}
+		}
+		p += sprintf(p, "\n");
+	}
+
+	if ((status = ia64_pal_vm_page_size(&tr_pages, &vw_pages)) !=0) {
+		printk(KERN_ERR "ia64_pal_vm_page_size=%ld\n", status);
+	} else {
+
+		p += sprintf(p,
+			     "\nTLB walker                     : %simplemented\n"
+			     "Number of DTR                  : %d\n"
+			     "Number of ITR                  : %d\n"
+			     "TLB insertable page sizes      : ",
+			     vm_info_1.pal_vm_info_1_s.vw ? "" : "not ",
+			     vm_info_1.pal_vm_info_1_s.max_dtr_entry+1,
+			     vm_info_1.pal_vm_info_1_s.max_itr_entry+1);
+
+
+		p = bitvector_process(p, tr_pages);
+
+		p += sprintf(p, "\nTLB purgeable page sizes       : ");
+
+		p = bitvector_process(p, vw_pages);
+	}
+	if ((status=ia64_get_ptce(&ptce)) != 0) {
+		printk(KERN_ERR "ia64_get_ptce=%ld\n", status);
+	} else {
+		p += sprintf(p,
+		     "\nPurge base address             : 0x%016lx\n"
+		     "Purge outer loop count         : %d\n"
+		     "Purge inner loop count         : %d\n"
+		     "Purge outer loop stride        : %d\n"
+		     "Purge inner loop stride        : %d\n",
+		     ptce.base, ptce.count[0], ptce.count[1],
+		     ptce.stride[0], ptce.stride[1]);
+
+		p += sprintf(p,
+		     "TC Levels                      : %d\n"
+		     "Unique TC(s)                   : %d\n",
+		     vm_info_1.pal_vm_info_1_s.num_tc_levels,
+		     vm_info_1.pal_vm_info_1_s.max_unique_tcs);
+
+		for(i=0; i < vm_info_1.pal_vm_info_1_s.num_tc_levels; i++) {
+			for (j=2; j>0 ; j--) {
+				tc_pages = 0; /* just in case */
+
+
+				/* even without unification, some levels may not be present */
+				if ((status=ia64_pal_vm_info(i,j, &tc_info, &tc_pages)) != 0) {
+					continue;
+				}
+
+				p += sprintf(p,
+				     "\n%s Translation Cache Level %d:\n"
+				     "\tHash sets           : %d\n"
+				     "\tAssociativity       : %d\n"
+				     "\tNumber of entries   : %d\n"
+				     "\tFlags               : ",
+				     cache_types[j+tc_info.tc_unified], i+1,
+				     tc_info.tc_num_sets,
+				     tc_info.tc_associativity,
+				     tc_info.tc_num_entries);
+
+				if (tc_info.tc_pf)
+					p += sprintf(p, "PreferredPageSizeOptimized ");
+				if (tc_info.tc_unified)
+					p += sprintf(p, "Unified ");
+				if (tc_info.tc_reduce_tr)
+					p += sprintf(p, "TCReduction");
+
+				p += sprintf(p, "\n\tSupported page sizes: ");
+
+				p = bitvector_process(p, tc_pages);
+
+				/* when unified date (j=2) is enough */
+				if (tc_info.tc_unified)
+					break;
+			}
+		}
+	}
+	p += sprintf(p, "\n");
+
+	return p - page;
+}
+
+
+static int
+register_info(char *page)
+{
+	char *p = page;
+	u64 reg_info[2];
+	u64 info;
+	unsigned long phys_stacked;
+	pal_hints_u_t hints;
+	unsigned long iregs, dregs;
+	static const char * const info_type[] = {
+		"Implemented AR(s)",
+		"AR(s) with read side-effects",
+		"Implemented CR(s)",
+		"CR(s) with read side-effects",
+	};
+
+	for(info=0; info < 4; info++) {
+
+		if (ia64_pal_register_info(info, &reg_info[0], &reg_info[1]) != 0) return 0;
+
+		p += sprintf(p, "%-32s : ", info_type[info]);
+
+		p = bitregister_process(p, reg_info, 128);
+
+		p += sprintf(p, "\n");
+	}
+
+	if (ia64_pal_rse_info(&phys_stacked, &hints) == 0) {
+
+	p += sprintf(p,
+		     "RSE stacked physical registers   : %ld\n"
+		     "RSE load/store hints             : %ld (%s)\n",
+		     phys_stacked, hints.ph_data,
+		     hints.ph_data < RSE_HINTS_COUNT ? rse_hints[hints.ph_data]: "(??)");
+	}
+	if (ia64_pal_debug_info(&iregs, &dregs))
+		return 0;
+
+	p += sprintf(p,
+		     "Instruction debug register pairs : %ld\n"
+		     "Data debug register pairs        : %ld\n", iregs, dregs);
+
+	return p - page;
+}
+
+static char *proc_features_0[]={		/* Feature set 0 */
+	NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
+	NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL,
+	NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
+	NULL,NULL,NULL,NULL,NULL, NULL,NULL,NULL,NULL,
+	"Unimplemented instruction address fault",
+	"INIT, PMI, and LINT pins",
+	"Simple unimplemented instr addresses",
+	"Variable P-state performance",
+	"Virtual machine features implemented",
+	"XIP,XPSR,XFS implemented",
+	"XR1-XR3 implemented",
+	"Disable dynamic predicate prediction",
+	"Disable processor physical number",
+	"Disable dynamic data cache prefetch",
+	"Disable dynamic inst cache prefetch",
+	"Disable dynamic branch prediction",
+	NULL, NULL, NULL, NULL,
+	"Disable P-states",
+	"Enable MCA on Data Poisoning",
+	"Enable vmsw instruction",
+	"Enable extern environmental notification",
+	"Disable BINIT on processor time-out",
+	"Disable dynamic power management (DPM)",
+	"Disable coherency",
+	"Disable cache",
+	"Enable CMCI promotion",
+	"Enable MCA to BINIT promotion",
+	"Enable MCA promotion",
+	"Enable BERR promotion"
+};
+
+static char *proc_features_16[]={		/* Feature set 16 */
+	"Disable ETM",
+	"Enable ETM",
+	"Enable MCA on half-way timer",
+	"Enable snoop WC",
+	NULL,
+	"Enable Fast Deferral",
+	"Disable MCA on memory aliasing",
+	"Enable RSB",
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	"DP system processor",
+	"Low Voltage",
+	"HT supported",
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL
+};
+
+static char **proc_features[]={
+	proc_features_0,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL,
+	proc_features_16,
+	NULL, NULL, NULL, NULL,
+};
+
+static char * feature_set_info(char *page, u64 avail, u64 status, u64 control,
+							unsigned long set)
+{
+	char *p = page;
+	char **vf, **v;
+	int i;
+
+	vf = v = proc_features[set];
+	for(i=0; i < 64; i++, avail >>=1, status >>=1, control >>=1) {
+
+		if (!(control))		/* No remaining bits set */
+			break;
+		if (!(avail & 0x1))	/* Print only bits that are available */
+			continue;
+		if (vf)
+			v = vf + i;
+		if ( v && *v ) {
+			p += sprintf(p, "%-40s : %s %s\n", *v,
+				avail & 0x1 ? (status & 0x1 ?
+						"On " : "Off"): "",
+				avail & 0x1 ? (control & 0x1 ?
+						"Ctrl" : "NoCtrl"): "");
+		} else {
+			p += sprintf(p, "Feature set %2ld bit %2d\t\t\t"
+					" : %s %s\n",
+				set, i,
+				avail & 0x1 ? (status & 0x1 ?
+						"On " : "Off"): "",
+				avail & 0x1 ? (control & 0x1 ?
+						"Ctrl" : "NoCtrl"): "");
+		}
+	}
+	return p;
+}
+
+static int
+processor_info(char *page)
+{
+	char *p = page;
+	u64 avail=1, status=1, control=1, feature_set=0;
+	s64 ret;
+
+	do {
+		ret = ia64_pal_proc_get_features(&avail, &status, &control,
+						feature_set);
+		if (ret < 0) {
+			return p - page;
+		}
+		if (ret == 1) {
+			feature_set++;
+			continue;
+		}
+
+		p = feature_set_info(p, avail, status, control, feature_set);
+
+		feature_set++;
+	} while(1);
+
+	return p - page;
+}
+
+static const char *bus_features[]={
+	NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
+	NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL,
+	NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
+	NULL,NULL,
+	"Request  Bus Parking",
+	"Bus Lock Mask",
+	"Enable Half Transfer",
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL,
+	"Enable Cache Line Repl. Shared",
+	"Enable Cache Line Repl. Exclusive",
+	"Disable Transaction Queuing",
+	"Disable Response Error Checking",
+	"Disable Bus Error Checking",
+	"Disable Bus Requester Internal Error Signalling",
+	"Disable Bus Requester Error Signalling",
+	"Disable Bus Initialization Event Checking",
+	"Disable Bus Initialization Event Signalling",
+	"Disable Bus Address Error Checking",
+	"Disable Bus Address Error Signalling",
+	"Disable Bus Data Error Checking"
+};
+
+
+static int
+bus_info(char *page)
+{
+	char *p = page;
+	const char **v = bus_features;
+	pal_bus_features_u_t av, st, ct;
+	u64 avail, status, control;
+	int i;
+	s64 ret;
+
+	if ((ret=ia64_pal_bus_get_features(&av, &st, &ct)) != 0) return 0;
+
+	avail   = av.pal_bus_features_val;
+	status  = st.pal_bus_features_val;
+	control = ct.pal_bus_features_val;
+
+	for(i=0; i < 64; i++, v++, avail >>=1, status >>=1, control >>=1) {
+		if ( ! *v ) continue;
+		p += sprintf(p, "%-48s : %s%s %s\n", *v,
+				avail & 0x1 ? "" : "NotImpl",
+				avail & 0x1 ? (status  & 0x1 ? "On" : "Off"): "",
+				avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): "");
+	}
+	return p - page;
+}
+
+static int
+version_info(char *page)
+{
+	pal_version_u_t min_ver, cur_ver;
+	char *p = page;
+
+	if (ia64_pal_version(&min_ver, &cur_ver) != 0)
+		return 0;
+
+	p += sprintf(p,
+		     "PAL_vendor : 0x%02x (min=0x%02x)\n"
+		     "PAL_A      : %02x.%02x (min=%02x.%02x)\n"
+		     "PAL_B      : %02x.%02x (min=%02x.%02x)\n",
+		     cur_ver.pal_version_s.pv_pal_vendor,
+		     min_ver.pal_version_s.pv_pal_vendor,
+		     cur_ver.pal_version_s.pv_pal_a_model,
+		     cur_ver.pal_version_s.pv_pal_a_rev,
+		     min_ver.pal_version_s.pv_pal_a_model,
+		     min_ver.pal_version_s.pv_pal_a_rev,
+		     cur_ver.pal_version_s.pv_pal_b_model,
+		     cur_ver.pal_version_s.pv_pal_b_rev,
+		     min_ver.pal_version_s.pv_pal_b_model,
+		     min_ver.pal_version_s.pv_pal_b_rev);
+	return p - page;
+}
+
+static int
+perfmon_info(char *page)
+{
+	char *p = page;
+	u64 pm_buffer[16];
+	pal_perf_mon_info_u_t pm_info;
+
+	if (ia64_pal_perf_mon_info(pm_buffer, &pm_info) != 0) return 0;
+
+	p += sprintf(p,
+		     "PMC/PMD pairs                 : %d\n"
+		     "Counter width                 : %d bits\n"
+		     "Cycle event number            : %d\n"
+		     "Retired event number          : %d\n"
+		     "Implemented PMC               : ",
+		     pm_info.pal_perf_mon_info_s.generic, pm_info.pal_perf_mon_info_s.width,
+		     pm_info.pal_perf_mon_info_s.cycles, pm_info.pal_perf_mon_info_s.retired);
+
+	p = bitregister_process(p, pm_buffer, 256);
+	p += sprintf(p, "\nImplemented PMD               : ");
+	p = bitregister_process(p, pm_buffer+4, 256);
+	p += sprintf(p, "\nCycles count capable          : ");
+	p = bitregister_process(p, pm_buffer+8, 256);
+	p += sprintf(p, "\nRetired bundles count capable : ");
+
+#ifdef CONFIG_ITANIUM
+	/*
+	 * PAL_PERF_MON_INFO reports that only PMC4 can be used to count CPU_CYCLES
+	 * which is wrong, both PMC4 and PMD5 support it.
+	 */
+	if (pm_buffer[12] == 0x10) pm_buffer[12]=0x30;
+#endif
+
+	p = bitregister_process(p, pm_buffer+12, 256);
+
+	p += sprintf(p, "\n");
+
+	return p - page;
+}
+
+static int
+frequency_info(char *page)
+{
+	char *p = page;
+	struct pal_freq_ratio proc, itc, bus;
+	unsigned long base;
+
+	if (ia64_pal_freq_base(&base) == -1)
+		p += sprintf(p, "Output clock            : not implemented\n");
+	else
+		p += sprintf(p, "Output clock            : %ld ticks/s\n", base);
+
+	if (ia64_pal_freq_ratios(&proc, &bus, &itc) != 0) return 0;
+
+	p += sprintf(p,
+		     "Processor/Clock ratio   : %d/%d\n"
+		     "Bus/Clock ratio         : %d/%d\n"
+		     "ITC/Clock ratio         : %d/%d\n",
+		     proc.num, proc.den, bus.num, bus.den, itc.num, itc.den);
+
+	return p - page;
+}
+
+static int
+tr_info(char *page)
+{
+	char *p = page;
+	long status;
+	pal_tr_valid_u_t tr_valid;
+	u64 tr_buffer[4];
+	pal_vm_info_1_u_t vm_info_1;
+	pal_vm_info_2_u_t vm_info_2;
+	unsigned long i, j;
+	unsigned long max[3], pgm;
+	struct ifa_reg {
+		unsigned long valid:1;
+		unsigned long ig:11;
+		unsigned long vpn:52;
+	} *ifa_reg;
+	struct itir_reg {
+		unsigned long rv1:2;
+		unsigned long ps:6;
+		unsigned long key:24;
+		unsigned long rv2:32;
+	} *itir_reg;
+	struct gr_reg {
+		unsigned long p:1;
+		unsigned long rv1:1;
+		unsigned long ma:3;
+		unsigned long a:1;
+		unsigned long d:1;
+		unsigned long pl:2;
+		unsigned long ar:3;
+		unsigned long ppn:38;
+		unsigned long rv2:2;
+		unsigned long ed:1;
+		unsigned long ig:11;
+	} *gr_reg;
+	struct rid_reg {
+		unsigned long ig1:1;
+		unsigned long rv1:1;
+		unsigned long ig2:6;
+		unsigned long rid:24;
+		unsigned long rv2:32;
+	} *rid_reg;
+
+	if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) {
+		printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
+		return 0;
+	}
+	max[0] = vm_info_1.pal_vm_info_1_s.max_itr_entry+1;
+	max[1] = vm_info_1.pal_vm_info_1_s.max_dtr_entry+1;
+
+	for (i=0; i < 2; i++ ) {
+		for (j=0; j < max[i]; j++) {
+
+		status = ia64_pal_tr_read(j, i, tr_buffer, &tr_valid);
+		if (status != 0) {
+			printk(KERN_ERR "palinfo: pal call failed on tr[%lu:%lu]=%ld\n",
+			       i, j, status);
+			continue;
+		}
+
+		ifa_reg  = (struct ifa_reg *)&tr_buffer[2];
+
+		if (ifa_reg->valid == 0) continue;
+
+		gr_reg   = (struct gr_reg *)tr_buffer;
+		itir_reg = (struct itir_reg *)&tr_buffer[1];
+		rid_reg  = (struct rid_reg *)&tr_buffer[3];
+
+		pgm	 = -1 << (itir_reg->ps - 12);
+		p += sprintf(p,
+			     "%cTR%lu: av=%d pv=%d dv=%d mv=%d\n"
+			     "\tppn  : 0x%lx\n"
+			     "\tvpn  : 0x%lx\n"
+			     "\tps   : ",
+			     "ID"[i], j,
+			     tr_valid.pal_tr_valid_s.access_rights_valid,
+			     tr_valid.pal_tr_valid_s.priv_level_valid,
+			     tr_valid.pal_tr_valid_s.dirty_bit_valid,
+			     tr_valid.pal_tr_valid_s.mem_attr_valid,
+			     (gr_reg->ppn & pgm)<< 12, (ifa_reg->vpn & pgm)<< 12);
+
+		p = bitvector_process(p, 1<< itir_reg->ps);
+
+		p += sprintf(p,
+			     "\n\tpl   : %d\n"
+			     "\tar   : %d\n"
+			     "\trid  : %x\n"
+			     "\tp    : %d\n"
+			     "\tma   : %d\n"
+			     "\td    : %d\n",
+			     gr_reg->pl, gr_reg->ar, rid_reg->rid, gr_reg->p, gr_reg->ma,
+			     gr_reg->d);
+		}
+	}
+	return p - page;
+}
+
+
+
+/*
+ * List {name,function} pairs for every entry in /proc/palinfo/cpu*
+ */
+static palinfo_entry_t palinfo_entries[]={
+	{ "version_info",	version_info, },
+	{ "vm_info",		vm_info, },
+	{ "cache_info",		cache_info, },
+	{ "power_info",		power_info, },
+	{ "register_info",	register_info, },
+	{ "processor_info",	processor_info, },
+	{ "perfmon_info",	perfmon_info, },
+	{ "frequency_info",	frequency_info, },
+	{ "bus_info",		bus_info },
+	{ "tr_info",		tr_info, }
+};
+
+#define NR_PALINFO_ENTRIES	(int) ARRAY_SIZE(palinfo_entries)
+
+/*
+ * this array is used to keep track of the proc entries we create. This is
+ * required in the module mode when we need to remove all entries. The procfs code
+ * does not do recursion of deletion
+ *
+ * Notes:
+ *	- +1 accounts for the cpuN directory entry in /proc/pal
+ */
+#define NR_PALINFO_PROC_ENTRIES	(NR_CPUS*(NR_PALINFO_ENTRIES+1))
+
+static struct proc_dir_entry *palinfo_proc_entries[NR_PALINFO_PROC_ENTRIES];
+static struct proc_dir_entry *palinfo_dir;
+
+/*
+ * This data structure is used to pass which cpu,function is being requested
+ * It must fit in a 64bit quantity to be passed to the proc callback routine
+ *
+ * In SMP mode, when we get a request for another CPU, we must call that
+ * other CPU using IPI and wait for the result before returning.
+ */
+typedef union {
+	u64 value;
+	struct {
+		unsigned	req_cpu: 32;	/* for which CPU this info is */
+		unsigned	func_id: 32;	/* which function is requested */
+	} pal_func_cpu;
+} pal_func_cpu_u_t;
+
+#define req_cpu	pal_func_cpu.req_cpu
+#define func_id pal_func_cpu.func_id
+
+#ifdef CONFIG_SMP
+
+/*
+ * used to hold information about final function to call
+ */
+typedef struct {
+	palinfo_func_t	func;	/* pointer to function to call */
+	char		*page;	/* buffer to store results */
+	int		ret;	/* return value from call */
+} palinfo_smp_data_t;
+
+
+/*
+ * this function does the actual final call and he called
+ * from the smp code, i.e., this is the palinfo callback routine
+ */
+static void
+palinfo_smp_call(void *info)
+{
+	palinfo_smp_data_t *data = (palinfo_smp_data_t *)info;
+	data->ret = (*data->func)(data->page);
+}
+
+/*
+ * function called to trigger the IPI, we need to access a remote CPU
+ * Return:
+ *	0 : error or nothing to output
+ *	otherwise how many bytes in the "page" buffer were written
+ */
+static
+int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
+{
+	palinfo_smp_data_t ptr;
+	int ret;
+
+	ptr.func = palinfo_entries[f->func_id].proc_read;
+	ptr.page = page;
+	ptr.ret  = 0; /* just in case */
+
+
+	/* will send IPI to other CPU and wait for completion of remote call */
+	if ((ret=smp_call_function_single(f->req_cpu, palinfo_smp_call, &ptr, 1))) {
+		printk(KERN_ERR "palinfo: remote CPU call from %d to %d on function %d: "
+		       "error %d\n", smp_processor_id(), f->req_cpu, f->func_id, ret);
+		return 0;
+	}
+	return ptr.ret;
+}
+#else /* ! CONFIG_SMP */
+static
+int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
+{
+	printk(KERN_ERR "palinfo: should not be called with non SMP kernel\n");
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * Entry point routine: all calls go through this function
+ */
+static int
+palinfo_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+	int len=0;
+	pal_func_cpu_u_t *f = (pal_func_cpu_u_t *)&data;
+
+	/*
+	 * in SMP mode, we may need to call another CPU to get correct
+	 * information. PAL, by definition, is processor specific
+	 */
+	if (f->req_cpu == get_cpu())
+		len = (*palinfo_entries[f->func_id].proc_read)(page);
+	else
+		len = palinfo_handle_smp(f, page);
+
+	put_cpu();
+
+	if (len <= off+count) *eof = 1;
+
+	*start = page + off;
+	len   -= off;
+
+	if (len>count) len = count;
+	if (len<0) len = 0;
+
+	return len;
+}
+
+static void __cpuinit
+create_palinfo_proc_entries(unsigned int cpu)
+{
+#	define CPUSTR	"cpu%d"
+
+	pal_func_cpu_u_t f;
+	struct proc_dir_entry **pdir;
+	struct proc_dir_entry *cpu_dir;
+	int j;
+	char cpustr[sizeof(CPUSTR)];
+
+
+	/*
+	 * we keep track of created entries in a depth-first order for
+	 * cleanup purposes. Each entry is stored into palinfo_proc_entries
+	 */
+	sprintf(cpustr,CPUSTR, cpu);
+
+	cpu_dir = proc_mkdir(cpustr, palinfo_dir);
+
+	f.req_cpu = cpu;
+
+	/*
+	 * Compute the location to store per cpu entries
+	 * We dont store the top level entry in this list, but
+	 * remove it finally after removing all cpu entries.
+	 */
+	pdir = &palinfo_proc_entries[cpu*(NR_PALINFO_ENTRIES+1)];
+	*pdir++ = cpu_dir;
+	for (j=0; j < NR_PALINFO_ENTRIES; j++) {
+		f.func_id = j;
+		*pdir = create_proc_read_entry(
+				palinfo_entries[j].name, 0, cpu_dir,
+				palinfo_read_entry, (void *)f.value);
+		pdir++;
+	}
+}
+
+static void
+remove_palinfo_proc_entries(unsigned int hcpu)
+{
+	int j;
+	struct proc_dir_entry *cpu_dir, **pdir;
+
+	pdir = &palinfo_proc_entries[hcpu*(NR_PALINFO_ENTRIES+1)];
+	cpu_dir = *pdir;
+	*pdir++=NULL;
+	for (j=0; j < (NR_PALINFO_ENTRIES); j++) {
+		if ((*pdir)) {
+			remove_proc_entry ((*pdir)->name, cpu_dir);
+			*pdir ++= NULL;
+		}
+	}
+
+	if (cpu_dir) {
+		remove_proc_entry(cpu_dir->name, palinfo_dir);
+	}
+}
+
+static int __cpuinit palinfo_cpu_callback(struct notifier_block *nfb,
+					unsigned long action, void *hcpu)
+{
+	unsigned int hotcpu = (unsigned long)hcpu;
+
+	switch (action) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		create_palinfo_proc_entries(hotcpu);
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		remove_palinfo_proc_entries(hotcpu);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __refdata palinfo_cpu_notifier =
+{
+	.notifier_call = palinfo_cpu_callback,
+	.priority = 0,
+};
+
+static int __init
+palinfo_init(void)
+{
+	int i = 0;
+
+	printk(KERN_INFO "PAL Information Facility v%s\n", PALINFO_VERSION);
+	palinfo_dir = proc_mkdir("pal", NULL);
+
+	/* Create palinfo dirs in /proc for all online cpus */
+	for_each_online_cpu(i) {
+		create_palinfo_proc_entries(i);
+	}
+
+	/* Register for future delivery via notify registration */
+	register_hotcpu_notifier(&palinfo_cpu_notifier);
+
+	return 0;
+}
+
+static void __exit
+palinfo_exit(void)
+{
+	int i = 0;
+
+	/* remove all nodes: depth first pass. Could optimize this  */
+	for_each_online_cpu(i) {
+		remove_palinfo_proc_entries(i);
+	}
+
+	/*
+	 * Remove the top level entry finally
+	 */
+	remove_proc_entry(palinfo_dir->name, NULL);
+
+	/*
+	 * Unregister from cpu notifier callbacks
+	 */
+	unregister_hotcpu_notifier(&palinfo_cpu_notifier);
+}
+
+module_init(palinfo_init);
+module_exit(palinfo_exit);
diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c
new file mode 100644
index 00000000..a21d7bb9
--- /dev/null
+++ b/arch/ia64/kernel/paravirt.c
@@ -0,0 +1,900 @@
+/******************************************************************************
+ * arch/ia64/kernel/paravirt.c
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *     Yaozu (Eddie) Dong <eddie.dong@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/init.h>
+
+#include <linux/compiler.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+#include <asm/iosapic.h>
+#include <asm/paravirt.h>
+
+/***************************************************************************
+ * general info
+ */
+struct pv_info pv_info = {
+	.kernel_rpl = 0,
+	.paravirt_enabled = 0,
+	.name = "bare hardware"
+};
+
+/***************************************************************************
+ * pv_init_ops
+ * initialization hooks.
+ */
+
+static void __init
+ia64_native_patch_branch(unsigned long tag, unsigned long type);
+
+struct pv_init_ops pv_init_ops =
+{
+#ifdef ASM_SUPPORTED
+	.patch_bundle = ia64_native_patch_bundle,
+#endif
+	.patch_branch = ia64_native_patch_branch,
+};
+
+/***************************************************************************
+ * pv_cpu_ops
+ * intrinsics hooks.
+ */
+
+#ifndef ASM_SUPPORTED
+/* ia64_native_xxx are macros so that we have to make them real functions */
+
+#define DEFINE_VOID_FUNC1(name)					\
+	static void						\
+	ia64_native_ ## name ## _func(unsigned long arg)	\
+	{							\
+		ia64_native_ ## name(arg);			\
+	}
+
+#define DEFINE_VOID_FUNC1_VOID(name)				\
+	static void						\
+	ia64_native_ ## name ## _func(void *arg)		\
+	{							\
+		ia64_native_ ## name(arg);			\
+	}
+
+#define DEFINE_VOID_FUNC2(name)					\
+	static void						\
+	ia64_native_ ## name ## _func(unsigned long arg0,	\
+				      unsigned long arg1)	\
+	{							\
+		ia64_native_ ## name(arg0, arg1);		\
+	}
+
+#define DEFINE_FUNC0(name)			\
+	static unsigned long			\
+	ia64_native_ ## name ## _func(void)	\
+	{					\
+		return ia64_native_ ## name();	\
+	}
+
+#define DEFINE_FUNC1(name, type)			\
+	static unsigned long				\
+	ia64_native_ ## name ## _func(type arg)		\
+	{						\
+		return ia64_native_ ## name(arg);	\
+	}						\
+
+DEFINE_VOID_FUNC1_VOID(fc);
+DEFINE_VOID_FUNC1(intrin_local_irq_restore);
+
+DEFINE_VOID_FUNC2(ptcga);
+DEFINE_VOID_FUNC2(set_rr);
+
+DEFINE_FUNC0(get_psr_i);
+
+DEFINE_FUNC1(thash, unsigned long);
+DEFINE_FUNC1(get_cpuid, int);
+DEFINE_FUNC1(get_pmd, int);
+DEFINE_FUNC1(get_rr, unsigned long);
+
+static void
+ia64_native_ssm_i_func(void)
+{
+	ia64_native_ssm(IA64_PSR_I);
+}
+
+static void
+ia64_native_rsm_i_func(void)
+{
+	ia64_native_rsm(IA64_PSR_I);
+}
+
+static void
+ia64_native_set_rr0_to_rr4_func(unsigned long val0, unsigned long val1,
+				unsigned long val2, unsigned long val3,
+				unsigned long val4)
+{
+	ia64_native_set_rr0_to_rr4(val0, val1, val2, val3, val4);
+}
+
+#define CASE_GET_REG(id)				\
+	case _IA64_REG_ ## id:				\
+	res = ia64_native_getreg(_IA64_REG_ ## id);	\
+	break;
+#define CASE_GET_AR(id) CASE_GET_REG(AR_ ## id)
+#define CASE_GET_CR(id) CASE_GET_REG(CR_ ## id)
+
+unsigned long
+ia64_native_getreg_func(int regnum)
+{
+	unsigned long res = -1;
+	switch (regnum) {
+	CASE_GET_REG(GP);
+	/*CASE_GET_REG(IP);*/ /* returned ip value shouldn't be constant */
+	CASE_GET_REG(PSR);
+	CASE_GET_REG(TP);
+	CASE_GET_REG(SP);
+
+	CASE_GET_AR(KR0);
+	CASE_GET_AR(KR1);
+	CASE_GET_AR(KR2);
+	CASE_GET_AR(KR3);
+	CASE_GET_AR(KR4);
+	CASE_GET_AR(KR5);
+	CASE_GET_AR(KR6);
+	CASE_GET_AR(KR7);
+	CASE_GET_AR(RSC);
+	CASE_GET_AR(BSP);
+	CASE_GET_AR(BSPSTORE);
+	CASE_GET_AR(RNAT);
+	CASE_GET_AR(FCR);
+	CASE_GET_AR(EFLAG);
+	CASE_GET_AR(CSD);
+	CASE_GET_AR(SSD);
+	CASE_GET_AR(CFLAG);
+	CASE_GET_AR(FSR);
+	CASE_GET_AR(FIR);
+	CASE_GET_AR(FDR);
+	CASE_GET_AR(CCV);
+	CASE_GET_AR(UNAT);
+	CASE_GET_AR(FPSR);
+	CASE_GET_AR(ITC);
+	CASE_GET_AR(PFS);
+	CASE_GET_AR(LC);
+	CASE_GET_AR(EC);
+
+	CASE_GET_CR(DCR);
+	CASE_GET_CR(ITM);
+	CASE_GET_CR(IVA);
+	CASE_GET_CR(PTA);
+	CASE_GET_CR(IPSR);
+	CASE_GET_CR(ISR);
+	CASE_GET_CR(IIP);
+	CASE_GET_CR(IFA);
+	CASE_GET_CR(ITIR);
+	CASE_GET_CR(IIPA);
+	CASE_GET_CR(IFS);
+	CASE_GET_CR(IIM);
+	CASE_GET_CR(IHA);
+	CASE_GET_CR(LID);
+	CASE_GET_CR(IVR);
+	CASE_GET_CR(TPR);
+	CASE_GET_CR(EOI);
+	CASE_GET_CR(IRR0);
+	CASE_GET_CR(IRR1);
+	CASE_GET_CR(IRR2);
+	CASE_GET_CR(IRR3);
+	CASE_GET_CR(ITV);
+	CASE_GET_CR(PMV);
+	CASE_GET_CR(CMCV);
+	CASE_GET_CR(LRR0);
+	CASE_GET_CR(LRR1);
+
+	default:
+		printk(KERN_CRIT "wrong_getreg %d\n", regnum);
+		break;
+	}
+	return res;
+}
+
+#define CASE_SET_REG(id)				\
+	case _IA64_REG_ ## id:				\
+	ia64_native_setreg(_IA64_REG_ ## id, val);	\
+	break;
+#define CASE_SET_AR(id) CASE_SET_REG(AR_ ## id)
+#define CASE_SET_CR(id) CASE_SET_REG(CR_ ## id)
+
+void
+ia64_native_setreg_func(int regnum, unsigned long val)
+{
+	switch (regnum) {
+	case _IA64_REG_PSR_L:
+		ia64_native_setreg(_IA64_REG_PSR_L, val);
+		ia64_dv_serialize_data();
+		break;
+	CASE_SET_REG(SP);
+	CASE_SET_REG(GP);
+
+	CASE_SET_AR(KR0);
+	CASE_SET_AR(KR1);
+	CASE_SET_AR(KR2);
+	CASE_SET_AR(KR3);
+	CASE_SET_AR(KR4);
+	CASE_SET_AR(KR5);
+	CASE_SET_AR(KR6);
+	CASE_SET_AR(KR7);
+	CASE_SET_AR(RSC);
+	CASE_SET_AR(BSP);
+	CASE_SET_AR(BSPSTORE);
+	CASE_SET_AR(RNAT);
+	CASE_SET_AR(FCR);
+	CASE_SET_AR(EFLAG);
+	CASE_SET_AR(CSD);
+	CASE_SET_AR(SSD);
+	CASE_SET_AR(CFLAG);
+	CASE_SET_AR(FSR);
+	CASE_SET_AR(FIR);
+	CASE_SET_AR(FDR);
+	CASE_SET_AR(CCV);
+	CASE_SET_AR(UNAT);
+	CASE_SET_AR(FPSR);
+	CASE_SET_AR(ITC);
+	CASE_SET_AR(PFS);
+	CASE_SET_AR(LC);
+	CASE_SET_AR(EC);
+
+	CASE_SET_CR(DCR);
+	CASE_SET_CR(ITM);
+	CASE_SET_CR(IVA);
+	CASE_SET_CR(PTA);
+	CASE_SET_CR(IPSR);
+	CASE_SET_CR(ISR);
+	CASE_SET_CR(IIP);
+	CASE_SET_CR(IFA);
+	CASE_SET_CR(ITIR);
+	CASE_SET_CR(IIPA);
+	CASE_SET_CR(IFS);
+	CASE_SET_CR(IIM);
+	CASE_SET_CR(IHA);
+	CASE_SET_CR(LID);
+	CASE_SET_CR(IVR);
+	CASE_SET_CR(TPR);
+	CASE_SET_CR(EOI);
+	CASE_SET_CR(IRR0);
+	CASE_SET_CR(IRR1);
+	CASE_SET_CR(IRR2);
+	CASE_SET_CR(IRR3);
+	CASE_SET_CR(ITV);
+	CASE_SET_CR(PMV);
+	CASE_SET_CR(CMCV);
+	CASE_SET_CR(LRR0);
+	CASE_SET_CR(LRR1);
+	default:
+		printk(KERN_CRIT "wrong setreg %d\n", regnum);
+		break;
+	}
+}
+#else
+
+#define __DEFINE_FUNC(name, code)					\
+	extern const char ia64_native_ ## name ## _direct_start[];	\
+	extern const char ia64_native_ ## name ## _direct_end[];	\
+	asm (".align 32\n"						\
+	     ".proc ia64_native_" #name "_func\n"			\
+	     "ia64_native_" #name "_func:\n"				\
+	     "ia64_native_" #name "_direct_start:\n"			\
+	     code							\
+	     "ia64_native_" #name "_direct_end:\n"			\
+	     "br.cond.sptk.many b6\n"					\
+	     ".endp ia64_native_" #name "_func\n")
+
+#define DEFINE_VOID_FUNC0(name, code)				\
+	extern void						\
+	ia64_native_ ## name ## _func(void);			\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC1(name, code)				\
+	extern void						\
+	ia64_native_ ## name ## _func(unsigned long arg);	\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC1_VOID(name, code)			\
+	extern void						\
+	ia64_native_ ## name ## _func(void *arg);		\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC2(name, code)				\
+	extern void						\
+	ia64_native_ ## name ## _func(unsigned long arg0,	\
+				      unsigned long arg1);	\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_FUNC0(name, code)		\
+	extern unsigned long			\
+	ia64_native_ ## name ## _func(void);	\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_FUNC1(name, type, code)			\
+	extern unsigned long				\
+	ia64_native_ ## name ## _func(type arg);	\
+	__DEFINE_FUNC(name, code)
+
+DEFINE_VOID_FUNC1_VOID(fc,
+		       "fc r8\n");
+DEFINE_VOID_FUNC1(intrin_local_irq_restore,
+		  ";;\n"
+		  "     cmp.ne p6, p7 = r8, r0\n"
+		  ";;\n"
+		  "(p6) ssm psr.i\n"
+		  "(p7) rsm psr.i\n"
+		  ";;\n"
+		  "(p6) srlz.d\n");
+
+DEFINE_VOID_FUNC2(ptcga,
+		  "ptc.ga r8, r9\n");
+DEFINE_VOID_FUNC2(set_rr,
+		  "mov rr[r8] = r9\n");
+
+/* ia64_native_getreg(_IA64_REG_PSR) & IA64_PSR_I */
+DEFINE_FUNC0(get_psr_i,
+	     "mov r2 = " __stringify(1 << IA64_PSR_I_BIT) "\n"
+	     "mov r8 = psr\n"
+	     ";;\n"
+	     "and r8 = r2, r8\n");
+
+DEFINE_FUNC1(thash, unsigned long,
+	     "thash r8 = r8\n");
+DEFINE_FUNC1(get_cpuid, int,
+	     "mov r8 = cpuid[r8]\n");
+DEFINE_FUNC1(get_pmd, int,
+	     "mov r8 = pmd[r8]\n");
+DEFINE_FUNC1(get_rr, unsigned long,
+	     "mov r8 = rr[r8]\n");
+
+DEFINE_VOID_FUNC0(ssm_i,
+		  "ssm psr.i\n");
+DEFINE_VOID_FUNC0(rsm_i,
+		  "rsm psr.i\n");
+
+extern void
+ia64_native_set_rr0_to_rr4_func(unsigned long val0, unsigned long val1,
+				unsigned long val2, unsigned long val3,
+				unsigned long val4);
+__DEFINE_FUNC(set_rr0_to_rr4,
+	      "mov rr[r0] = r8\n"
+	      "movl r2 = 0x2000000000000000\n"
+	      ";;\n"
+	      "mov rr[r2] = r9\n"
+	      "shl r3 = r2, 1\n"	/* movl r3 = 0x4000000000000000 */
+	      ";;\n"
+	      "add r2 = r2, r3\n"	/* movl r2 = 0x6000000000000000 */
+	      "mov rr[r3] = r10\n"
+	      ";;\n"
+	      "mov rr[r2] = r11\n"
+	      "shl r3 = r3, 1\n"	/* movl r3 = 0x8000000000000000 */
+	      ";;\n"
+	      "mov rr[r3] = r14\n");
+
+extern unsigned long ia64_native_getreg_func(int regnum);
+asm(".global ia64_native_getreg_func\n");
+#define __DEFINE_GET_REG(id, reg)			\
+	"mov r2 = " __stringify(_IA64_REG_ ## id) "\n"	\
+	";;\n"						\
+	"cmp.eq p6, p0 = r2, r8\n"			\
+	";;\n"						\
+	"(p6) mov r8 = " #reg "\n"			\
+	"(p6) br.cond.sptk.many b6\n"			\
+	";;\n"
+#define __DEFINE_GET_AR(id, reg)	__DEFINE_GET_REG(AR_ ## id, ar.reg)
+#define __DEFINE_GET_CR(id, reg)	__DEFINE_GET_REG(CR_ ## id, cr.reg)
+
+__DEFINE_FUNC(getreg,
+	      __DEFINE_GET_REG(GP, gp)
+	      /*__DEFINE_GET_REG(IP, ip)*/ /* returned ip value shouldn't be constant */
+	      __DEFINE_GET_REG(PSR, psr)
+	      __DEFINE_GET_REG(TP, tp)
+	      __DEFINE_GET_REG(SP, sp)
+
+	      __DEFINE_GET_REG(AR_KR0, ar0)
+	      __DEFINE_GET_REG(AR_KR1, ar1)
+	      __DEFINE_GET_REG(AR_KR2, ar2)
+	      __DEFINE_GET_REG(AR_KR3, ar3)
+	      __DEFINE_GET_REG(AR_KR4, ar4)
+	      __DEFINE_GET_REG(AR_KR5, ar5)
+	      __DEFINE_GET_REG(AR_KR6, ar6)
+	      __DEFINE_GET_REG(AR_KR7, ar7)
+	      __DEFINE_GET_AR(RSC, rsc)
+	      __DEFINE_GET_AR(BSP, bsp)
+	      __DEFINE_GET_AR(BSPSTORE, bspstore)
+	      __DEFINE_GET_AR(RNAT, rnat)
+	      __DEFINE_GET_AR(FCR, fcr)
+	      __DEFINE_GET_AR(EFLAG, eflag)
+	      __DEFINE_GET_AR(CSD, csd)
+	      __DEFINE_GET_AR(SSD, ssd)
+	      __DEFINE_GET_REG(AR_CFLAG, ar27)
+	      __DEFINE_GET_AR(FSR, fsr)
+	      __DEFINE_GET_AR(FIR, fir)
+	      __DEFINE_GET_AR(FDR, fdr)
+	      __DEFINE_GET_AR(CCV, ccv)
+	      __DEFINE_GET_AR(UNAT, unat)
+	      __DEFINE_GET_AR(FPSR, fpsr)
+	      __DEFINE_GET_AR(ITC, itc)
+	      __DEFINE_GET_AR(PFS, pfs)
+	      __DEFINE_GET_AR(LC, lc)
+	      __DEFINE_GET_AR(EC, ec)
+
+	      __DEFINE_GET_CR(DCR, dcr)
+	      __DEFINE_GET_CR(ITM, itm)
+	      __DEFINE_GET_CR(IVA, iva)
+	      __DEFINE_GET_CR(PTA, pta)
+	      __DEFINE_GET_CR(IPSR, ipsr)
+	      __DEFINE_GET_CR(ISR, isr)
+	      __DEFINE_GET_CR(IIP, iip)
+	      __DEFINE_GET_CR(IFA, ifa)
+	      __DEFINE_GET_CR(ITIR, itir)
+	      __DEFINE_GET_CR(IIPA, iipa)
+	      __DEFINE_GET_CR(IFS, ifs)
+	      __DEFINE_GET_CR(IIM, iim)
+	      __DEFINE_GET_CR(IHA, iha)
+	      __DEFINE_GET_CR(LID, lid)
+	      __DEFINE_GET_CR(IVR, ivr)
+	      __DEFINE_GET_CR(TPR, tpr)
+	      __DEFINE_GET_CR(EOI, eoi)
+	      __DEFINE_GET_CR(IRR0, irr0)
+	      __DEFINE_GET_CR(IRR1, irr1)
+	      __DEFINE_GET_CR(IRR2, irr2)
+	      __DEFINE_GET_CR(IRR3, irr3)
+	      __DEFINE_GET_CR(ITV, itv)
+	      __DEFINE_GET_CR(PMV, pmv)
+	      __DEFINE_GET_CR(CMCV, cmcv)
+	      __DEFINE_GET_CR(LRR0, lrr0)
+	      __DEFINE_GET_CR(LRR1, lrr1)
+
+	      "mov r8 = -1\n"	/* unsupported case */
+	);
+
+extern void ia64_native_setreg_func(int regnum, unsigned long val);
+asm(".global ia64_native_setreg_func\n");
+#define __DEFINE_SET_REG(id, reg)			\
+	"mov r2 = " __stringify(_IA64_REG_ ## id) "\n"	\
+	";;\n"						\
+	"cmp.eq p6, p0 = r2, r9\n"			\
+	";;\n"						\
+	"(p6) mov " #reg " = r8\n"			\
+	"(p6) br.cond.sptk.many b6\n"			\
+	";;\n"
+#define __DEFINE_SET_AR(id, reg)	__DEFINE_SET_REG(AR_ ## id, ar.reg)
+#define __DEFINE_SET_CR(id, reg)	__DEFINE_SET_REG(CR_ ## id, cr.reg)
+__DEFINE_FUNC(setreg,
+	      "mov r2 = " __stringify(_IA64_REG_PSR_L) "\n"
+	      ";;\n"
+	      "cmp.eq p6, p0 = r2, r9\n"
+	      ";;\n"
+	      "(p6) mov psr.l = r8\n"
+#ifdef HAVE_SERIALIZE_DIRECTIVE
+	      ".serialize.data\n"
+#endif
+	      "(p6) br.cond.sptk.many b6\n"
+	      __DEFINE_SET_REG(GP, gp)
+	      __DEFINE_SET_REG(SP, sp)
+
+	      __DEFINE_SET_REG(AR_KR0, ar0)
+	      __DEFINE_SET_REG(AR_KR1, ar1)
+	      __DEFINE_SET_REG(AR_KR2, ar2)
+	      __DEFINE_SET_REG(AR_KR3, ar3)
+	      __DEFINE_SET_REG(AR_KR4, ar4)
+	      __DEFINE_SET_REG(AR_KR5, ar5)
+	      __DEFINE_SET_REG(AR_KR6, ar6)
+	      __DEFINE_SET_REG(AR_KR7, ar7)
+	      __DEFINE_SET_AR(RSC, rsc)
+	      __DEFINE_SET_AR(BSP, bsp)
+	      __DEFINE_SET_AR(BSPSTORE, bspstore)
+	      __DEFINE_SET_AR(RNAT, rnat)
+	      __DEFINE_SET_AR(FCR, fcr)
+	      __DEFINE_SET_AR(EFLAG, eflag)
+	      __DEFINE_SET_AR(CSD, csd)
+	      __DEFINE_SET_AR(SSD, ssd)
+	      __DEFINE_SET_REG(AR_CFLAG, ar27)
+	      __DEFINE_SET_AR(FSR, fsr)
+	      __DEFINE_SET_AR(FIR, fir)
+	      __DEFINE_SET_AR(FDR, fdr)
+	      __DEFINE_SET_AR(CCV, ccv)
+	      __DEFINE_SET_AR(UNAT, unat)
+	      __DEFINE_SET_AR(FPSR, fpsr)
+	      __DEFINE_SET_AR(ITC, itc)
+	      __DEFINE_SET_AR(PFS, pfs)
+	      __DEFINE_SET_AR(LC, lc)
+	      __DEFINE_SET_AR(EC, ec)
+
+	      __DEFINE_SET_CR(DCR, dcr)
+	      __DEFINE_SET_CR(ITM, itm)
+	      __DEFINE_SET_CR(IVA, iva)
+	      __DEFINE_SET_CR(PTA, pta)
+	      __DEFINE_SET_CR(IPSR, ipsr)
+	      __DEFINE_SET_CR(ISR, isr)
+	      __DEFINE_SET_CR(IIP, iip)
+	      __DEFINE_SET_CR(IFA, ifa)
+	      __DEFINE_SET_CR(ITIR, itir)
+	      __DEFINE_SET_CR(IIPA, iipa)
+	      __DEFINE_SET_CR(IFS, ifs)
+	      __DEFINE_SET_CR(IIM, iim)
+	      __DEFINE_SET_CR(IHA, iha)
+	      __DEFINE_SET_CR(LID, lid)
+	      __DEFINE_SET_CR(IVR, ivr)
+	      __DEFINE_SET_CR(TPR, tpr)
+	      __DEFINE_SET_CR(EOI, eoi)
+	      __DEFINE_SET_CR(IRR0, irr0)
+	      __DEFINE_SET_CR(IRR1, irr1)
+	      __DEFINE_SET_CR(IRR2, irr2)
+	      __DEFINE_SET_CR(IRR3, irr3)
+	      __DEFINE_SET_CR(ITV, itv)
+	      __DEFINE_SET_CR(PMV, pmv)
+	      __DEFINE_SET_CR(CMCV, cmcv)
+	      __DEFINE_SET_CR(LRR0, lrr0)
+	      __DEFINE_SET_CR(LRR1, lrr1)
+	);
+#endif
+
+struct pv_cpu_ops pv_cpu_ops = {
+	.fc		= ia64_native_fc_func,
+	.thash		= ia64_native_thash_func,
+	.get_cpuid	= ia64_native_get_cpuid_func,
+	.get_pmd	= ia64_native_get_pmd_func,
+	.ptcga		= ia64_native_ptcga_func,
+	.get_rr		= ia64_native_get_rr_func,
+	.set_rr		= ia64_native_set_rr_func,
+	.set_rr0_to_rr4	= ia64_native_set_rr0_to_rr4_func,
+	.ssm_i		= ia64_native_ssm_i_func,
+	.getreg		= ia64_native_getreg_func,
+	.setreg		= ia64_native_setreg_func,
+	.rsm_i		= ia64_native_rsm_i_func,
+	.get_psr_i	= ia64_native_get_psr_i_func,
+	.intrin_local_irq_restore
+			= ia64_native_intrin_local_irq_restore_func,
+};
+EXPORT_SYMBOL(pv_cpu_ops);
+
+/******************************************************************************
+ * replacement of hand written assembly codes.
+ */
+
+void
+paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch)
+{
+	extern unsigned long paravirt_switch_to_targ;
+	extern unsigned long paravirt_leave_syscall_targ;
+	extern unsigned long paravirt_work_processed_syscall_targ;
+	extern unsigned long paravirt_leave_kernel_targ;
+
+	paravirt_switch_to_targ = cpu_asm_switch->switch_to;
+	paravirt_leave_syscall_targ = cpu_asm_switch->leave_syscall;
+	paravirt_work_processed_syscall_targ =
+		cpu_asm_switch->work_processed_syscall;
+	paravirt_leave_kernel_targ = cpu_asm_switch->leave_kernel;
+}
+
+/***************************************************************************
+ * pv_iosapic_ops
+ * iosapic read/write hooks.
+ */
+
+static unsigned int
+ia64_native_iosapic_read(char __iomem *iosapic, unsigned int reg)
+{
+	return __ia64_native_iosapic_read(iosapic, reg);
+}
+
+static void
+ia64_native_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
+{
+	__ia64_native_iosapic_write(iosapic, reg, val);
+}
+
+struct pv_iosapic_ops pv_iosapic_ops = {
+	.pcat_compat_init = ia64_native_iosapic_pcat_compat_init,
+	.__get_irq_chip = ia64_native_iosapic_get_irq_chip,
+
+	.__read = ia64_native_iosapic_read,
+	.__write = ia64_native_iosapic_write,
+};
+
+/***************************************************************************
+ * pv_irq_ops
+ * irq operations
+ */
+
+struct pv_irq_ops pv_irq_ops = {
+	.register_ipi = ia64_native_register_ipi,
+
+	.assign_irq_vector = ia64_native_assign_irq_vector,
+	.free_irq_vector = ia64_native_free_irq_vector,
+	.register_percpu_irq = ia64_native_register_percpu_irq,
+
+	.resend_irq = ia64_native_resend_irq,
+};
+
+/***************************************************************************
+ * pv_time_ops
+ * time operations
+ */
+
+static int
+ia64_native_do_steal_accounting(unsigned long *new_itm)
+{
+	return 0;
+}
+
+struct pv_time_ops pv_time_ops = {
+	.do_steal_accounting = ia64_native_do_steal_accounting,
+	.sched_clock = ia64_native_sched_clock,
+};
+
+/***************************************************************************
+ * binary pacthing
+ * pv_init_ops.patch_bundle
+ */
+
+#ifdef ASM_SUPPORTED
+#define IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg)	\
+	__DEFINE_FUNC(get_ ## name,			\
+		      ";;\n"				\
+		      "mov r8 = " #reg "\n"		\
+		      ";;\n")
+
+#define IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg)	\
+	__DEFINE_FUNC(set_ ## name,			\
+		      ";;\n"				\
+		      "mov " #reg " = r8\n"		\
+		      ";;\n")
+
+#define IA64_NATIVE_PATCH_DEFINE_REG(name, reg)		\
+	IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg);	\
+	IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg)	\
+
+#define IA64_NATIVE_PATCH_DEFINE_AR(name, reg)			\
+	IA64_NATIVE_PATCH_DEFINE_REG(ar_ ## name, ar.reg)
+
+#define IA64_NATIVE_PATCH_DEFINE_CR(name, reg)			\
+	IA64_NATIVE_PATCH_DEFINE_REG(cr_ ## name, cr.reg)
+
+
+IA64_NATIVE_PATCH_DEFINE_GET_REG(psr, psr);
+IA64_NATIVE_PATCH_DEFINE_GET_REG(tp, tp);
+
+/* IA64_NATIVE_PATCH_DEFINE_SET_REG(psr_l, psr.l); */
+__DEFINE_FUNC(set_psr_l,
+	      ";;\n"
+	      "mov psr.l = r8\n"
+#ifdef HAVE_SERIALIZE_DIRECTIVE
+	      ".serialize.data\n"
+#endif
+	      ";;\n");
+
+IA64_NATIVE_PATCH_DEFINE_REG(gp, gp);
+IA64_NATIVE_PATCH_DEFINE_REG(sp, sp);
+
+IA64_NATIVE_PATCH_DEFINE_REG(kr0, ar0);
+IA64_NATIVE_PATCH_DEFINE_REG(kr1, ar1);
+IA64_NATIVE_PATCH_DEFINE_REG(kr2, ar2);
+IA64_NATIVE_PATCH_DEFINE_REG(kr3, ar3);
+IA64_NATIVE_PATCH_DEFINE_REG(kr4, ar4);
+IA64_NATIVE_PATCH_DEFINE_REG(kr5, ar5);
+IA64_NATIVE_PATCH_DEFINE_REG(kr6, ar6);
+IA64_NATIVE_PATCH_DEFINE_REG(kr7, ar7);
+
+IA64_NATIVE_PATCH_DEFINE_AR(rsc, rsc);
+IA64_NATIVE_PATCH_DEFINE_AR(bsp, bsp);
+IA64_NATIVE_PATCH_DEFINE_AR(bspstore, bspstore);
+IA64_NATIVE_PATCH_DEFINE_AR(rnat, rnat);
+IA64_NATIVE_PATCH_DEFINE_AR(fcr, fcr);
+IA64_NATIVE_PATCH_DEFINE_AR(eflag, eflag);
+IA64_NATIVE_PATCH_DEFINE_AR(csd, csd);
+IA64_NATIVE_PATCH_DEFINE_AR(ssd, ssd);
+IA64_NATIVE_PATCH_DEFINE_REG(ar27, ar27);
+IA64_NATIVE_PATCH_DEFINE_AR(fsr, fsr);
+IA64_NATIVE_PATCH_DEFINE_AR(fir, fir);
+IA64_NATIVE_PATCH_DEFINE_AR(fdr, fdr);
+IA64_NATIVE_PATCH_DEFINE_AR(ccv, ccv);
+IA64_NATIVE_PATCH_DEFINE_AR(unat, unat);
+IA64_NATIVE_PATCH_DEFINE_AR(fpsr, fpsr);
+IA64_NATIVE_PATCH_DEFINE_AR(itc, itc);
+IA64_NATIVE_PATCH_DEFINE_AR(pfs, pfs);
+IA64_NATIVE_PATCH_DEFINE_AR(lc, lc);
+IA64_NATIVE_PATCH_DEFINE_AR(ec, ec);
+
+IA64_NATIVE_PATCH_DEFINE_CR(dcr, dcr);
+IA64_NATIVE_PATCH_DEFINE_CR(itm, itm);
+IA64_NATIVE_PATCH_DEFINE_CR(iva, iva);
+IA64_NATIVE_PATCH_DEFINE_CR(pta, pta);
+IA64_NATIVE_PATCH_DEFINE_CR(ipsr, ipsr);
+IA64_NATIVE_PATCH_DEFINE_CR(isr, isr);
+IA64_NATIVE_PATCH_DEFINE_CR(iip, iip);
+IA64_NATIVE_PATCH_DEFINE_CR(ifa, ifa);
+IA64_NATIVE_PATCH_DEFINE_CR(itir, itir);
+IA64_NATIVE_PATCH_DEFINE_CR(iipa, iipa);
+IA64_NATIVE_PATCH_DEFINE_CR(ifs, ifs);
+IA64_NATIVE_PATCH_DEFINE_CR(iim, iim);
+IA64_NATIVE_PATCH_DEFINE_CR(iha, iha);
+IA64_NATIVE_PATCH_DEFINE_CR(lid, lid);
+IA64_NATIVE_PATCH_DEFINE_CR(ivr, ivr);
+IA64_NATIVE_PATCH_DEFINE_CR(tpr, tpr);
+IA64_NATIVE_PATCH_DEFINE_CR(eoi, eoi);
+IA64_NATIVE_PATCH_DEFINE_CR(irr0, irr0);
+IA64_NATIVE_PATCH_DEFINE_CR(irr1, irr1);
+IA64_NATIVE_PATCH_DEFINE_CR(irr2, irr2);
+IA64_NATIVE_PATCH_DEFINE_CR(irr3, irr3);
+IA64_NATIVE_PATCH_DEFINE_CR(itv, itv);
+IA64_NATIVE_PATCH_DEFINE_CR(pmv, pmv);
+IA64_NATIVE_PATCH_DEFINE_CR(cmcv, cmcv);
+IA64_NATIVE_PATCH_DEFINE_CR(lrr0, lrr0);
+IA64_NATIVE_PATCH_DEFINE_CR(lrr1, lrr1);
+
+static const struct paravirt_patch_bundle_elem ia64_native_patch_bundle_elems[]
+__initdata_or_module =
+{
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM(name, type)		\
+	{							\
+		(void*)ia64_native_ ## name ## _direct_start,	\
+		(void*)ia64_native_ ## name ## _direct_end,	\
+		PARAVIRT_PATCH_TYPE_ ## type,			\
+	}
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(fc, FC),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(thash, THASH),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(get_cpuid, GET_CPUID),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(get_pmd, GET_PMD),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(ptcga, PTCGA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(get_rr, GET_RR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr, SET_RR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr0_to_rr4, SET_RR0_TO_RR4),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(ssm_i, SSM_I),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(rsm_i, RSM_I),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(get_psr_i, GET_PSR_I),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(intrin_local_irq_restore,
+				      INTRIN_LOCAL_IRQ_RESTORE),
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg)			\
+	{								\
+		(void*)ia64_native_get_ ## name ## _direct_start,	\
+		(void*)ia64_native_get_ ## name ## _direct_end,		\
+		PARAVIRT_PATCH_TYPE_GETREG + _IA64_REG_ ## reg,		\
+	}
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg)			\
+	{								\
+		(void*)ia64_native_set_ ## name ## _direct_start,	\
+		(void*)ia64_native_set_ ## name ## _direct_end,		\
+		PARAVIRT_PATCH_TYPE_SETREG + _IA64_REG_ ## reg,		\
+	}
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(name, reg)		\
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg),	\
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg)		\
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(name, reg)		\
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar_ ## name, AR_ ## reg)
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(name, reg)		\
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(cr_ ## name, CR_ ## reg)
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(psr, PSR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(tp, TP),
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(psr_l, PSR_L),
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(gp, GP),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(sp, SP),
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr0, AR_KR0),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr1, AR_KR1),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr2, AR_KR2),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr3, AR_KR3),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr4, AR_KR4),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr5, AR_KR5),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr6, AR_KR6),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr7, AR_KR7),
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rsc, RSC),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bsp, BSP),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bspstore, BSPSTORE),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rnat, RNAT),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fcr, FCR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(eflag, EFLAG),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(csd, CSD),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ssd, SSD),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar27, AR_CFLAG),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fsr, FSR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fir, FIR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fdr, FDR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ccv, CCV),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(unat, UNAT),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fpsr, FPSR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(itc, ITC),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(pfs, PFS),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(lc, LC),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ec, EC),
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(dcr, DCR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itm, ITM),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iva, IVA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pta, PTA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ipsr, IPSR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(isr, ISR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iip, IIP),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifa, IFA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itir, ITIR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iipa, IIPA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifs, IFS),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iim, IIM),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iha, IHA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lid, LID),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ivr, IVR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(tpr, TPR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(eoi, EOI),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr0, IRR0),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr1, IRR1),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr2, IRR2),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr3, IRR3),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itv, ITV),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pmv, PMV),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(cmcv, CMCV),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr0, LRR0),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr1, LRR1),
+};
+
+unsigned long __init_or_module
+ia64_native_patch_bundle(void *sbundle, void *ebundle, unsigned long type)
+{
+	const unsigned long nelems = sizeof(ia64_native_patch_bundle_elems) /
+		sizeof(ia64_native_patch_bundle_elems[0]);
+
+	return __paravirt_patch_apply_bundle(sbundle, ebundle, type,
+					      ia64_native_patch_bundle_elems,
+					      nelems, NULL);
+}
+#endif /* ASM_SUPPOTED */
+
+extern const char ia64_native_switch_to[];
+extern const char ia64_native_leave_syscall[];
+extern const char ia64_native_work_processed_syscall[];
+extern const char ia64_native_leave_kernel[];
+
+const struct paravirt_patch_branch_target ia64_native_branch_target[]
+__initconst = {
+#define PARAVIRT_BR_TARGET(name, type)			\
+	{						\
+		ia64_native_ ## name,			\
+		PARAVIRT_PATCH_TYPE_BR_ ## type,	\
+	}
+	PARAVIRT_BR_TARGET(switch_to, SWITCH_TO),
+	PARAVIRT_BR_TARGET(leave_syscall, LEAVE_SYSCALL),
+	PARAVIRT_BR_TARGET(work_processed_syscall, WORK_PROCESSED_SYSCALL),
+	PARAVIRT_BR_TARGET(leave_kernel, LEAVE_KERNEL),
+};
+
+static void __init
+ia64_native_patch_branch(unsigned long tag, unsigned long type)
+{
+	const unsigned long nelem =
+		sizeof(ia64_native_branch_target) /
+		sizeof(ia64_native_branch_target[0]);
+	__paravirt_patch_apply_branch(tag, type,
+				      ia64_native_branch_target, nelem);
+}
diff --git a/arch/ia64/kernel/paravirt_inst.h b/arch/ia64/kernel/paravirt_inst.h
new file mode 100644
index 00000000..64d6d810
--- /dev/null
+++ b/arch/ia64/kernel/paravirt_inst.h
@@ -0,0 +1,31 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirt_inst.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifdef __IA64_ASM_PARAVIRTUALIZED_PVCHECK
+#include <asm/native/pvchk_inst.h>
+#elif defined(__IA64_ASM_PARAVIRTUALIZED_XEN)
+#include <asm/xen/inst.h>
+#include <asm/xen/minstate.h>
+#else
+#include <asm/native/inst.h>
+#endif
+
diff --git a/arch/ia64/kernel/paravirt_patch.c b/arch/ia64/kernel/paravirt_patch.c
new file mode 100644
index 00000000..bfdfef1b
--- /dev/null
+++ b/arch/ia64/kernel/paravirt_patch.c
@@ -0,0 +1,514 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirt_patch.c
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/init.h>
+#include <asm/intrinsics.h>
+#include <asm/kprobes.h>
+#include <asm/paravirt.h>
+#include <asm/paravirt_patch.h>
+
+typedef union ia64_inst {
+        struct {
+		unsigned long long qp : 6;
+		unsigned long long : 31;
+		unsigned long long opcode : 4;
+		unsigned long long reserved : 23;
+        } generic;
+        unsigned long long l;
+} ia64_inst_t;
+
+/*
+ * flush_icache_range() can't be used here.
+ * we are here before cpu_init() which initializes
+ * ia64_i_cache_stride_shift. flush_icache_range() uses it.
+ */
+void __init_or_module
+paravirt_flush_i_cache_range(const void *instr, unsigned long size)
+{
+	extern void paravirt_fc_i(const void *addr);
+	unsigned long i;
+
+	for (i = 0; i < size; i += sizeof(bundle_t))
+		paravirt_fc_i(instr + i);
+}
+
+bundle_t* __init_or_module
+paravirt_get_bundle(unsigned long tag)
+{
+	return (bundle_t *)(tag & ~3UL);
+}
+
+unsigned long __init_or_module
+paravirt_get_slot(unsigned long tag)
+{
+	return tag & 3UL;
+}
+
+unsigned long __init_or_module
+paravirt_get_num_inst(unsigned long stag, unsigned long etag)
+{
+	bundle_t *sbundle = paravirt_get_bundle(stag);
+	unsigned long sslot = paravirt_get_slot(stag);
+	bundle_t *ebundle = paravirt_get_bundle(etag);
+	unsigned long eslot = paravirt_get_slot(etag);
+
+	return (ebundle - sbundle) * 3 + eslot - sslot + 1;
+}
+
+unsigned long __init_or_module
+paravirt_get_next_tag(unsigned long tag)
+{
+	unsigned long slot = paravirt_get_slot(tag);
+
+	switch (slot) {
+	case 0:
+	case 1:
+		return tag + 1;
+	case 2: {
+		bundle_t *bundle = paravirt_get_bundle(tag);
+		return (unsigned long)(bundle + 1);
+	}
+	default:
+		BUG();
+	}
+	/* NOTREACHED */
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_slot0(const bundle_t *bundle)
+{
+	ia64_inst_t inst;
+	inst.l = bundle->quad0.slot0;
+	return inst;
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_slot1(const bundle_t *bundle)
+{
+	ia64_inst_t inst;
+	inst.l = bundle->quad0.slot1_p0 |
+		((unsigned long long)bundle->quad1.slot1_p1 << 18UL);
+	return inst;
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_slot2(const bundle_t *bundle)
+{
+	ia64_inst_t inst;
+	inst.l = bundle->quad1.slot2;
+	return inst;
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_inst(unsigned long tag)
+{
+	bundle_t *bundle = paravirt_get_bundle(tag);
+	unsigned long slot = paravirt_get_slot(tag);
+
+	switch (slot) {
+	case 0:
+		return paravirt_read_slot0(bundle);
+	case 1:
+		return paravirt_read_slot1(bundle);
+	case 2:
+		return paravirt_read_slot2(bundle);
+	default:
+		BUG();
+	}
+	/* NOTREACHED */
+}
+
+void __init_or_module
+paravirt_write_slot0(bundle_t *bundle, ia64_inst_t inst)
+{
+	bundle->quad0.slot0 = inst.l;
+}
+
+void __init_or_module
+paravirt_write_slot1(bundle_t *bundle, ia64_inst_t inst)
+{
+	bundle->quad0.slot1_p0 = inst.l;
+	bundle->quad1.slot1_p1 = inst.l >> 18UL;
+}
+
+void __init_or_module
+paravirt_write_slot2(bundle_t *bundle, ia64_inst_t inst)
+{
+	bundle->quad1.slot2 = inst.l;
+}
+
+void __init_or_module
+paravirt_write_inst(unsigned long tag, ia64_inst_t inst)
+{
+	bundle_t *bundle = paravirt_get_bundle(tag);
+	unsigned long slot = paravirt_get_slot(tag);
+
+	switch (slot) {
+	case 0:
+		paravirt_write_slot0(bundle, inst);
+		break;
+	case 1:
+		paravirt_write_slot1(bundle, inst);
+		break;
+	case 2:
+		paravirt_write_slot2(bundle, inst);
+		break;
+	default:
+		BUG();
+		break;
+	}
+	paravirt_flush_i_cache_range(bundle, sizeof(*bundle));
+}
+
+/* for debug */
+void
+paravirt_print_bundle(const bundle_t *bundle)
+{
+	const unsigned long *quad = (const unsigned long *)bundle;
+	ia64_inst_t slot0 = paravirt_read_slot0(bundle);
+	ia64_inst_t slot1 = paravirt_read_slot1(bundle);
+	ia64_inst_t slot2 = paravirt_read_slot2(bundle);
+
+	printk(KERN_DEBUG
+	       "bundle 0x%p 0x%016lx 0x%016lx\n", bundle, quad[0], quad[1]);
+	printk(KERN_DEBUG
+	       "bundle template 0x%x\n",
+	       bundle->quad0.template);
+	printk(KERN_DEBUG
+	       "slot0 0x%lx slot1_p0 0x%lx slot1_p1 0x%lx slot2 0x%lx\n",
+	       (unsigned long)bundle->quad0.slot0,
+	       (unsigned long)bundle->quad0.slot1_p0,
+	       (unsigned long)bundle->quad1.slot1_p1,
+	       (unsigned long)bundle->quad1.slot2);
+	printk(KERN_DEBUG
+	       "slot0 0x%016llx slot1 0x%016llx slot2 0x%016llx\n",
+	       slot0.l, slot1.l, slot2.l);
+}
+
+static int noreplace_paravirt __init_or_module = 0;
+
+static int __init setup_noreplace_paravirt(char *str)
+{
+	noreplace_paravirt = 1;
+	return 1;
+}
+__setup("noreplace-paravirt", setup_noreplace_paravirt);
+
+#ifdef ASM_SUPPORTED
+static void __init_or_module
+fill_nop_bundle(void *sbundle, void *ebundle)
+{
+	extern const char paravirt_nop_bundle[];
+	extern const unsigned long paravirt_nop_bundle_size;
+
+	void *bundle = sbundle;
+
+	BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0);
+	BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0);
+
+	while (bundle < ebundle) {
+		memcpy(bundle, paravirt_nop_bundle, paravirt_nop_bundle_size);
+
+		bundle += paravirt_nop_bundle_size;
+	}
+}
+
+/* helper function */
+unsigned long __init_or_module
+__paravirt_patch_apply_bundle(void *sbundle, void *ebundle, unsigned long type,
+			      const struct paravirt_patch_bundle_elem *elems,
+			      unsigned long nelems,
+			      const struct paravirt_patch_bundle_elem **found)
+{
+	unsigned long used = 0;
+	unsigned long i;
+
+	BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0);
+	BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0);
+
+	found = NULL;
+	for (i = 0; i < nelems; i++) {
+		const struct paravirt_patch_bundle_elem *p = &elems[i];
+		if (p->type == type) {
+			unsigned long need = p->ebundle - p->sbundle;
+			unsigned long room = ebundle - sbundle;
+
+			if (found != NULL)
+				*found = p;
+
+			if (room < need) {
+				/* no room to replace. skip it */
+				printk(KERN_DEBUG
+				       "the space is too small to put "
+				       "bundles. type %ld need %ld room %ld\n",
+				       type, need, room);
+				break;
+			}
+
+			used = need;
+			memcpy(sbundle, p->sbundle, used);
+			break;
+		}
+	}
+
+	return used;
+}
+
+void __init_or_module
+paravirt_patch_apply_bundle(const struct paravirt_patch_site_bundle *start,
+			    const struct paravirt_patch_site_bundle *end)
+{
+	const struct paravirt_patch_site_bundle *p;
+
+	if (noreplace_paravirt)
+		return;
+	if (pv_init_ops.patch_bundle == NULL)
+		return;
+
+	for (p = start; p < end; p++) {
+		unsigned long used;
+
+		used = (*pv_init_ops.patch_bundle)(p->sbundle, p->ebundle,
+						   p->type);
+		if (used == 0)
+			continue;
+
+		fill_nop_bundle(p->sbundle + used, p->ebundle);
+		paravirt_flush_i_cache_range(p->sbundle,
+					     p->ebundle - p->sbundle);
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+/*
+ * nop.i, nop.m, nop.f instruction are same format.
+ * but nop.b has differennt format.
+ * This doesn't support nop.b for now.
+ */
+static void __init_or_module
+fill_nop_inst(unsigned long stag, unsigned long etag)
+{
+	extern const bundle_t paravirt_nop_mfi_inst_bundle[];
+	unsigned long tag;
+	const ia64_inst_t nop_inst =
+		paravirt_read_slot0(paravirt_nop_mfi_inst_bundle);
+
+	for (tag = stag; tag < etag; tag = paravirt_get_next_tag(tag))
+		paravirt_write_inst(tag, nop_inst);
+}
+
+void __init_or_module
+paravirt_patch_apply_inst(const struct paravirt_patch_site_inst *start,
+			  const struct paravirt_patch_site_inst *end)
+{
+	const struct paravirt_patch_site_inst *p;
+
+	if (noreplace_paravirt)
+		return;
+	if (pv_init_ops.patch_inst == NULL)
+		return;
+
+	for (p = start; p < end; p++) {
+		unsigned long tag;
+		bundle_t *sbundle;
+		bundle_t *ebundle;
+
+		tag = (*pv_init_ops.patch_inst)(p->stag, p->etag, p->type);
+		if (tag == p->stag)
+			continue;
+
+		fill_nop_inst(tag, p->etag);
+		sbundle = paravirt_get_bundle(p->stag);
+		ebundle = paravirt_get_bundle(p->etag) + 1;
+		paravirt_flush_i_cache_range(sbundle, (ebundle - sbundle) *
+					     sizeof(bundle_t));
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+#endif /* ASM_SUPPOTED */
+
+/* brl.cond.sptk.many <target64> X3 */
+typedef union inst_x3_op {
+	ia64_inst_t inst;
+	struct {
+		unsigned long qp: 6;
+		unsigned long btyp: 3;
+		unsigned long unused: 3;
+		unsigned long p: 1;
+		unsigned long imm20b: 20;
+		unsigned long wh: 2;
+		unsigned long d: 1;
+		unsigned long i: 1;
+		unsigned long opcode: 4;
+	};
+	unsigned long l;
+} inst_x3_op_t;
+
+typedef union inst_x3_imm {
+	ia64_inst_t inst;
+	struct {
+		unsigned long unused: 2;
+		unsigned long imm39: 39;
+	};
+	unsigned long l;
+} inst_x3_imm_t;
+
+void __init_or_module
+paravirt_patch_reloc_brl(unsigned long tag, const void *target)
+{
+	unsigned long tag_op = paravirt_get_next_tag(tag);
+	unsigned long tag_imm = tag;
+	bundle_t *bundle = paravirt_get_bundle(tag);
+
+	ia64_inst_t inst_op = paravirt_read_inst(tag_op);
+	ia64_inst_t inst_imm = paravirt_read_inst(tag_imm);
+
+	inst_x3_op_t inst_x3_op = { .l = inst_op.l };
+	inst_x3_imm_t inst_x3_imm = { .l = inst_imm.l };
+
+	unsigned long imm60 =
+		((unsigned long)target - (unsigned long)bundle) >> 4;
+
+	BUG_ON(paravirt_get_slot(tag) != 1); /* MLX */
+	BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0);
+
+	/* imm60[59] 1bit */
+	inst_x3_op.i = (imm60 >> 59) & 1;
+	/* imm60[19:0] 20bit */
+	inst_x3_op.imm20b = imm60 & ((1UL << 20) - 1);
+	/* imm60[58:20] 39bit */
+	inst_x3_imm.imm39 = (imm60 >> 20) & ((1UL << 39) - 1);
+
+	inst_op.l = inst_x3_op.l;
+	inst_imm.l = inst_x3_imm.l;
+
+	paravirt_write_inst(tag_op, inst_op);
+	paravirt_write_inst(tag_imm, inst_imm);
+}
+
+/* br.cond.sptk.many <target25>	B1 */
+typedef union inst_b1 {
+	ia64_inst_t inst;
+	struct {
+		unsigned long qp: 6;
+		unsigned long btype: 3;
+		unsigned long unused: 3;
+		unsigned long p: 1;
+		unsigned long imm20b: 20;
+		unsigned long wh: 2;
+		unsigned long d: 1;
+		unsigned long s: 1;
+		unsigned long opcode: 4;
+	};
+	unsigned long l;
+} inst_b1_t;
+
+void __init
+paravirt_patch_reloc_br(unsigned long tag, const void *target)
+{
+	bundle_t *bundle = paravirt_get_bundle(tag);
+	ia64_inst_t inst = paravirt_read_inst(tag);
+	unsigned long target25 = (unsigned long)target - (unsigned long)bundle;
+	inst_b1_t inst_b1;
+
+	BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0);
+
+	inst_b1.l = inst.l;
+	if (target25 & (1UL << 63))
+		inst_b1.s = 1;
+	else
+		inst_b1.s = 0;
+
+	inst_b1.imm20b = target25 >> 4;
+	inst.l = inst_b1.l;
+
+	paravirt_write_inst(tag, inst);
+}
+
+void __init
+__paravirt_patch_apply_branch(
+	unsigned long tag, unsigned long type,
+	const struct paravirt_patch_branch_target *entries,
+	unsigned int nr_entries)
+{
+	unsigned int i;
+	for (i = 0; i < nr_entries; i++) {
+		if (entries[i].type == type) {
+			paravirt_patch_reloc_br(tag, entries[i].entry);
+			break;
+		}
+	}
+}
+
+static void __init
+paravirt_patch_apply_branch(const struct paravirt_patch_site_branch *start,
+			    const struct paravirt_patch_site_branch *end)
+{
+	const struct paravirt_patch_site_branch *p;
+
+	if (noreplace_paravirt)
+		return;
+	if (pv_init_ops.patch_branch == NULL)
+		return;
+
+	for (p = start; p < end; p++)
+		(*pv_init_ops.patch_branch)(p->tag, p->type);
+
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+void __init
+paravirt_patch_apply(void)
+{
+	extern const char __start_paravirt_bundles[];
+	extern const char __stop_paravirt_bundles[];
+	extern const char __start_paravirt_insts[];
+	extern const char __stop_paravirt_insts[];
+	extern const char __start_paravirt_branches[];
+	extern const char __stop_paravirt_branches[];
+
+	paravirt_patch_apply_bundle((const struct paravirt_patch_site_bundle *)
+				    __start_paravirt_bundles,
+				    (const struct paravirt_patch_site_bundle *)
+				    __stop_paravirt_bundles);
+	paravirt_patch_apply_inst((const struct paravirt_patch_site_inst *)
+				  __start_paravirt_insts,
+				  (const struct paravirt_patch_site_inst *)
+				  __stop_paravirt_insts);
+	paravirt_patch_apply_branch((const struct paravirt_patch_site_branch *)
+				    __start_paravirt_branches,
+				    (const struct paravirt_patch_site_branch *)
+				    __stop_paravirt_branches);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "linux"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/arch/ia64/kernel/paravirt_patchlist.c b/arch/ia64/kernel/paravirt_patchlist.c
new file mode 100644
index 00000000..0a707206
--- /dev/null
+++ b/arch/ia64/kernel/paravirt_patchlist.c
@@ -0,0 +1,81 @@
+/******************************************************************************
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/bug.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <asm/paravirt.h>
+
+#define DECLARE(name)						\
+	extern unsigned long					\
+		__ia64_native_start_gate_##name##_patchlist[];	\
+	extern unsigned long					\
+		__ia64_native_end_gate_##name##_patchlist[]
+
+DECLARE(fsyscall);
+DECLARE(brl_fsys_bubble_down);
+DECLARE(vtop);
+DECLARE(mckinley_e9);
+
+extern unsigned long __start_gate_section[];
+
+#define ASSIGN(name)							    \
+	.start_##name##_patchlist =					    \
+		(unsigned long)__ia64_native_start_gate_##name##_patchlist, \
+	.end_##name##_patchlist =					    \
+		(unsigned long)__ia64_native_end_gate_##name##_patchlist
+
+struct pv_patchdata pv_patchdata __initdata = {
+	ASSIGN(fsyscall),
+	ASSIGN(brl_fsys_bubble_down),
+	ASSIGN(vtop),
+	ASSIGN(mckinley_e9),
+
+	.gate_section = (void*)__start_gate_section,
+};
+
+
+unsigned long __init
+paravirt_get_gate_patchlist(enum pv_gate_patchlist type)
+{
+
+#define CASE(NAME, name)					\
+	case PV_GATE_START_##NAME:				\
+		return pv_patchdata.start_##name##_patchlist;	\
+	case PV_GATE_END_##NAME:				\
+		return pv_patchdata.end_##name##_patchlist;	\
+
+	switch (type) {
+		CASE(FSYSCALL, fsyscall);
+		CASE(BRL_FSYS_BUBBLE_DOWN, brl_fsys_bubble_down);
+		CASE(VTOP, vtop);
+		CASE(MCKINLEY_E9, mckinley_e9);
+	default:
+		BUG();
+		break;
+	}
+	return 0;
+}
+
+void * __init
+paravirt_get_gate_section(void)
+{
+	return pv_patchdata.gate_section;
+}
diff --git a/arch/ia64/kernel/paravirt_patchlist.h b/arch/ia64/kernel/paravirt_patchlist.h
new file mode 100644
index 00000000..0684aa6c
--- /dev/null
+++ b/arch/ia64/kernel/paravirt_patchlist.h
@@ -0,0 +1,28 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirt_patchlist.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#if defined(__IA64_GATE_PARAVIRTUALIZED_XEN)
+#include <asm/xen/patchlist.h>
+#else
+#include <asm/native/patchlist.h>
+#endif
+
diff --git a/arch/ia64/kernel/paravirtentry.S b/arch/ia64/kernel/paravirtentry.S
new file mode 100644
index 00000000..92d880c4
--- /dev/null
+++ b/arch/ia64/kernel/paravirtentry.S
@@ -0,0 +1,121 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirtentry.S
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/init.h>
+#include <asm/asmmacro.h>
+#include <asm/asm-offsets.h>
+#include <asm/paravirt_privop.h>
+#include <asm/paravirt_patch.h>
+#include "entry.h"
+
+#define DATA8(sym, init_value)			\
+	.pushsection .data..read_mostly ;	\
+	.align 8 ;				\
+	.global sym ;				\
+	sym: ;					\
+	data8 init_value ;			\
+	.popsection
+
+#define BRANCH(targ, reg, breg, type)					\
+	PARAVIRT_PATCH_SITE_BR(PARAVIRT_PATCH_TYPE_BR_ ## type) ;	\
+	;;								\
+	movl reg=targ ;							\
+	;;								\
+	ld8 reg=[reg] ;							\
+	;;								\
+	mov breg=reg ;							\
+	br.cond.sptk.many breg
+
+#define BRANCH_PROC(sym, reg, breg, type)				\
+	DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ;		\
+	GLOBAL_ENTRY(paravirt_ ## sym) ;				\
+		BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ;	\
+	END(paravirt_ ## sym)
+
+#define BRANCH_PROC_UNWINFO(sym, reg, breg, type)			\
+	DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ;		\
+	GLOBAL_ENTRY(paravirt_ ## sym) ;				\
+		PT_REGS_UNWIND_INFO(0) ;				\
+		BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ;	\
+	END(paravirt_ ## sym)
+
+
+BRANCH_PROC(switch_to, r22, b7, SWITCH_TO)
+BRANCH_PROC_UNWINFO(leave_syscall, r22, b7, LEAVE_SYSCALL)
+BRANCH_PROC(work_processed_syscall, r2, b7, WORK_PROCESSED_SYSCALL)
+BRANCH_PROC_UNWINFO(leave_kernel, r22, b7, LEAVE_KERNEL)
+
+
+#ifdef CONFIG_MODULES
+#define __INIT_OR_MODULE	.text
+#define __INITDATA_OR_MODULE	.data
+#else
+#define __INIT_OR_MODULE	__INIT
+#define __INITDATA_OR_MODULE	__INITDATA
+#endif /* CONFIG_MODULES */
+
+	__INIT_OR_MODULE
+	GLOBAL_ENTRY(paravirt_fc_i)
+	fc.i r32
+	br.ret.sptk.many rp
+	END(paravirt_fc_i)
+	__FINIT
+
+	__INIT_OR_MODULE
+	.align 32
+	GLOBAL_ENTRY(paravirt_nop_b_inst_bundle)
+	{
+		nop.b 0
+		nop.b 0
+		nop.b 0
+	}
+	END(paravirt_nop_b_inst_bundle)
+	__FINIT
+
+	/* NOTE: nop.[mfi] has same format */
+	__INIT_OR_MODULE
+	GLOBAL_ENTRY(paravirt_nop_mfi_inst_bundle)
+	{
+		nop.m 0
+		nop.f 0
+		nop.i 0
+	}
+	END(paravirt_nop_mfi_inst_bundle)
+	__FINIT
+
+	__INIT_OR_MODULE
+	GLOBAL_ENTRY(paravirt_nop_bundle)
+paravirt_nop_bundle_start:
+	{
+		nop 0
+		nop 0
+		nop 0
+	}
+paravirt_nop_bundle_end:
+	END(paravirt_nop_bundle)
+	__FINIT
+
+	__INITDATA_OR_MODULE
+	.align 8
+	.global paravirt_nop_bundle_size
+paravirt_nop_bundle_size:
+	data8	paravirt_nop_bundle_end - paravirt_nop_bundle_start
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c
new file mode 100644
index 00000000..68a1311d
--- /dev/null
+++ b/arch/ia64/kernel/patch.c
@@ -0,0 +1,257 @@
+/*
+ * Instruction-patching support.
+ *
+ * Copyright (C) 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/init.h>
+#include <linux/string.h>
+
+#include <asm/paravirt.h>
+#include <asm/patch.h>
+#include <asm/processor.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+
+/*
+ * This was adapted from code written by Tony Luck:
+ *
+ * The 64-bit value in a "movl reg=value" is scattered between the two words of the bundle
+ * like this:
+ *
+ * 6  6         5         4         3         2         1
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ * ABBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCDEEEEEFFFFFFFFFGGGGGGG
+ *
+ * CCCCCCCCCCCCCCCCCCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ * xxxxAFFFFFFFFFEEEEEDxGGGGGGGxxxxxxxxxxxxxBBBBBBBBBBBBBBBBBBBBBBB
+ */
+static u64
+get_imm64 (u64 insn_addr)
+{
+	u64 *p = (u64 *) (insn_addr & -16);	/* mask out slot number */
+
+	return ( (p[1] & 0x0800000000000000UL) << 4)  | /*A*/
+		((p[1] & 0x00000000007fffffUL) << 40) | /*B*/
+		((p[0] & 0xffffc00000000000UL) >> 24) | /*C*/
+		((p[1] & 0x0000100000000000UL) >> 23) | /*D*/
+		((p[1] & 0x0003e00000000000UL) >> 29) | /*E*/
+		((p[1] & 0x07fc000000000000UL) >> 43) | /*F*/
+		((p[1] & 0x000007f000000000UL) >> 36);  /*G*/
+}
+
+/* Patch instruction with "val" where "mask" has 1 bits. */
+void
+ia64_patch (u64 insn_addr, u64 mask, u64 val)
+{
+	u64 m0, m1, v0, v1, b0, b1, *b = (u64 *) (insn_addr & -16);
+#	define insn_mask ((1UL << 41) - 1)
+	unsigned long shift;
+
+	b0 = b[0]; b1 = b[1];
+	shift = 5 + 41 * (insn_addr % 16); /* 5 bits of template, then 3 x 41-bit instructions */
+	if (shift >= 64) {
+		m1 = mask << (shift - 64);
+		v1 = val << (shift - 64);
+	} else {
+		m0 = mask << shift; m1 = mask >> (64 - shift);
+		v0 = val  << shift; v1 = val >> (64 - shift);
+		b[0] = (b0 & ~m0) | (v0 & m0);
+	}
+	b[1] = (b1 & ~m1) | (v1 & m1);
+}
+
+void
+ia64_patch_imm64 (u64 insn_addr, u64 val)
+{
+	/* The assembler may generate offset pointing to either slot 1
+	   or slot 2 for a long (2-slot) instruction, occupying slots 1
+	   and 2.  */
+  	insn_addr &= -16UL;
+	ia64_patch(insn_addr + 2,
+		   0x01fffefe000UL, (  ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */
+				     | ((val & 0x0000000000200000UL) <<  0) /* bit 21 -> 21 */
+				     | ((val & 0x00000000001f0000UL) <<  6) /* bit 16 -> 22 */
+				     | ((val & 0x000000000000ff80UL) << 20) /* bit  7 -> 27 */
+				     | ((val & 0x000000000000007fUL) << 13) /* bit  0 -> 13 */));
+	ia64_patch(insn_addr + 1, 0x1ffffffffffUL, val >> 22);
+}
+
+void
+ia64_patch_imm60 (u64 insn_addr, u64 val)
+{
+	/* The assembler may generate offset pointing to either slot 1
+	   or slot 2 for a long (2-slot) instruction, occupying slots 1
+	   and 2.  */
+  	insn_addr &= -16UL;
+	ia64_patch(insn_addr + 2,
+		   0x011ffffe000UL, (  ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */
+				     | ((val & 0x00000000000fffffUL) << 13) /* bit  0 -> 13 */));
+	ia64_patch(insn_addr + 1, 0x1fffffffffcUL, val >> 18);
+}
+
+/*
+ * We need sometimes to load the physical address of a kernel
+ * object.  Often we can convert the virtual address to physical
+ * at execution time, but sometimes (either for performance reasons
+ * or during error recovery) we cannot to this.  Patch the marked
+ * bundles to load the physical address.
+ */
+void __init
+ia64_patch_vtop (unsigned long start, unsigned long end)
+{
+	s32 *offp = (s32 *) start;
+	u64 ip;
+
+	while (offp < (s32 *) end) {
+		ip = (u64) offp + *offp;
+
+		/* replace virtual address with corresponding physical address: */
+		ia64_patch_imm64(ip, ia64_tpa(get_imm64(ip)));
+		ia64_fc((void *) ip);
+		++offp;
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+/*
+ * Disable the RSE workaround by turning the conditional branch
+ * that we tagged in each place the workaround was used into an
+ * unconditional branch.
+ */
+void __init
+ia64_patch_rse (unsigned long start, unsigned long end)
+{
+	s32 *offp = (s32 *) start;
+	u64 ip, *b;
+
+	while (offp < (s32 *) end) {
+		ip = (u64) offp + *offp;
+
+		b = (u64 *)(ip & -16);
+		b[1] &= ~0xf800000L;
+		ia64_fc((void *) ip);
+		++offp;
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+void __init
+ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
+{
+	static int first_time = 1;
+	int need_workaround;
+	s32 *offp = (s32 *) start;
+	u64 *wp;
+
+	need_workaround = (local_cpu_data->family == 0x1f && local_cpu_data->model == 0);
+
+	if (first_time) {
+		first_time = 0;
+		if (need_workaround)
+			printk(KERN_INFO "Leaving McKinley Errata 9 workaround enabled\n");
+	}
+	if (need_workaround)
+		return;
+
+	while (offp < (s32 *) end) {
+		wp = (u64 *) ia64_imva((char *) offp + *offp);
+		wp[0] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
+		wp[1] = 0x0084006880000200UL;
+		wp[2] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
+		wp[3] = 0x0004000000000200UL;
+		ia64_fc(wp); ia64_fc(wp + 2);
+		++offp;
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+extern unsigned long ia64_native_fsyscall_table[NR_syscalls];
+extern char ia64_native_fsys_bubble_down[];
+struct pv_fsys_data pv_fsys_data __initdata = {
+	.fsyscall_table = (unsigned long *)ia64_native_fsyscall_table,
+	.fsys_bubble_down = (void *)ia64_native_fsys_bubble_down,
+};
+
+unsigned long * __init
+paravirt_get_fsyscall_table(void)
+{
+	return pv_fsys_data.fsyscall_table;
+}
+
+char * __init
+paravirt_get_fsys_bubble_down(void)
+{
+	return pv_fsys_data.fsys_bubble_down;
+}
+
+static void __init
+patch_fsyscall_table (unsigned long start, unsigned long end)
+{
+	u64 fsyscall_table = (u64)paravirt_get_fsyscall_table();
+	s32 *offp = (s32 *) start;
+	u64 ip;
+
+	while (offp < (s32 *) end) {
+		ip = (u64) ia64_imva((char *) offp + *offp);
+		ia64_patch_imm64(ip, fsyscall_table);
+		ia64_fc((void *) ip);
+		++offp;
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+static void __init
+patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
+{
+	u64 fsys_bubble_down = (u64)paravirt_get_fsys_bubble_down();
+	s32 *offp = (s32 *) start;
+	u64 ip;
+
+	while (offp < (s32 *) end) {
+		ip = (u64) offp + *offp;
+		ia64_patch_imm60((u64) ia64_imva((void *) ip),
+				 (u64) (fsys_bubble_down - (ip & -16)) / 16);
+		ia64_fc((void *) ip);
+		++offp;
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+void __init
+ia64_patch_gate (void)
+{
+#	define START(name)	paravirt_get_gate_patchlist(PV_GATE_START_##name)
+#	define END(name)	paravirt_get_gate_patchlist(PV_GATE_END_##name)
+
+	patch_fsyscall_table(START(FSYSCALL), END(FSYSCALL));
+	patch_brl_fsys_bubble_down(START(BRL_FSYS_BUBBLE_DOWN), END(BRL_FSYS_BUBBLE_DOWN));
+	ia64_patch_vtop(START(VTOP), END(VTOP));
+	ia64_patch_mckinley_e9(START(MCKINLEY_E9), END(MCKINLEY_E9));
+}
+
+void ia64_patch_phys_stack_reg(unsigned long val)
+{
+	s32 * offp = (s32 *) __start___phys_stack_reg_patchlist;
+	s32 * end = (s32 *) __end___phys_stack_reg_patchlist;
+	u64 ip, mask, imm;
+
+	/* see instruction format A4: adds r1 = imm13, r3 */
+	mask = (0x3fUL << 27) | (0x7f << 13);
+	imm = (((val >> 7) & 0x3f) << 27) | (val & 0x7f) << 13;
+
+	while (offp < end) {
+		ip = (u64) offp + *offp;
+		ia64_patch(ip, mask, imm);
+		ia64_fc((void *)ip);
+		++offp;
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
new file mode 100644
index 00000000..f6b1ff0a
--- /dev/null
+++ b/arch/ia64/kernel/pci-dma.c
@@ -0,0 +1,120 @@
+/*
+ * Dynamic DMA mapping support.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/dmar.h>
+#include <asm/iommu.h>
+#include <asm/machvec.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/system.h>
+
+#ifdef CONFIG_DMAR
+
+#include <linux/kernel.h>
+
+#include <asm/page.h>
+
+dma_addr_t bad_dma_address __read_mostly;
+EXPORT_SYMBOL(bad_dma_address);
+
+static int iommu_sac_force __read_mostly;
+
+int no_iommu __read_mostly;
+#ifdef CONFIG_IOMMU_DEBUG
+int force_iommu __read_mostly = 1;
+#else
+int force_iommu __read_mostly;
+#endif
+
+int iommu_pass_through;
+
+/* Dummy device used for NULL arguments (normally ISA). Better would
+   be probably a smaller DMA mask, but this is bug-to-bug compatible
+   to i386. */
+struct device fallback_dev = {
+	.init_name = "fallback device",
+	.coherent_dma_mask = DMA_BIT_MASK(32),
+	.dma_mask = &fallback_dev.coherent_dma_mask,
+};
+
+extern struct dma_map_ops intel_dma_ops;
+
+static int __init pci_iommu_init(void)
+{
+	if (iommu_detected)
+		intel_iommu_init();
+
+	return 0;
+}
+
+/* Must execute after PCI subsystem */
+fs_initcall(pci_iommu_init);
+
+void pci_iommu_shutdown(void)
+{
+	return;
+}
+
+void __init
+iommu_dma_init(void)
+{
+	return;
+}
+
+int iommu_dma_supported(struct device *dev, u64 mask)
+{
+	/* Copied from i386. Doesn't make much sense, because it will
+	   only work for pci_alloc_coherent.
+	   The caller just has to use GFP_DMA in this case. */
+	if (mask < DMA_BIT_MASK(24))
+		return 0;
+
+	/* Tell the device to use SAC when IOMMU force is on.  This
+	   allows the driver to use cheaper accesses in some cases.
+
+	   Problem with this is that if we overflow the IOMMU area and
+	   return DAC as fallback address the device may not handle it
+	   correctly.
+
+	   As a special case some controllers have a 39bit address
+	   mode that is as efficient as 32bit (aic79xx). Don't force
+	   SAC for these.  Assume all masks <= 40 bits are of this
+	   type. Normally this doesn't make any difference, but gives
+	   more gentle handling of IOMMU overflow. */
+	if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) {
+		dev_info(dev, "Force SAC with mask %llx\n", mask);
+		return 0;
+	}
+
+	return 1;
+}
+EXPORT_SYMBOL(iommu_dma_supported);
+
+void __init pci_iommu_alloc(void)
+{
+	dma_ops = &intel_dma_ops;
+
+	dma_ops->sync_single_for_cpu = machvec_dma_sync_single;
+	dma_ops->sync_sg_for_cpu = machvec_dma_sync_sg;
+	dma_ops->sync_single_for_device = machvec_dma_sync_single;
+	dma_ops->sync_sg_for_device = machvec_dma_sync_sg;
+	dma_ops->dma_supported = iommu_dma_supported;
+
+	/*
+	 * The order of these functions is important for
+	 * fall-back/fail-over reasons
+	 */
+	detect_intel_iommu();
+
+#ifdef CONFIG_SWIOTLB
+	pci_swiotlb_init();
+#endif
+}
+
+#endif
diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c
new file mode 100644
index 00000000..d9485d95
--- /dev/null
+++ b/arch/ia64/kernel/pci-swiotlb.c
@@ -0,0 +1,59 @@
+/* Glue code to lib/swiotlb.c */
+
+#include <linux/pci.h>
+#include <linux/gfp.h>
+#include <linux/cache.h>
+#include <linux/module.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/swiotlb.h>
+#include <asm/dma.h>
+#include <asm/iommu.h>
+#include <asm/machvec.h>
+
+int swiotlb __read_mostly;
+EXPORT_SYMBOL(swiotlb);
+
+static void *ia64_swiotlb_alloc_coherent(struct device *dev, size_t size,
+					 dma_addr_t *dma_handle, gfp_t gfp)
+{
+	if (dev->coherent_dma_mask != DMA_BIT_MASK(64))
+		gfp |= GFP_DMA;
+	return swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
+}
+
+struct dma_map_ops swiotlb_dma_ops = {
+	.alloc_coherent = ia64_swiotlb_alloc_coherent,
+	.free_coherent = swiotlb_free_coherent,
+	.map_page = swiotlb_map_page,
+	.unmap_page = swiotlb_unmap_page,
+	.map_sg = swiotlb_map_sg_attrs,
+	.unmap_sg = swiotlb_unmap_sg_attrs,
+	.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
+	.sync_single_for_device = swiotlb_sync_single_for_device,
+	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
+	.sync_sg_for_device = swiotlb_sync_sg_for_device,
+	.dma_supported = swiotlb_dma_supported,
+	.mapping_error = swiotlb_dma_mapping_error,
+};
+
+void __init swiotlb_dma_init(void)
+{
+	dma_ops = &swiotlb_dma_ops;
+	swiotlb_init(1);
+}
+
+void __init pci_swiotlb_init(void)
+{
+	if (!iommu_detected) {
+#ifdef CONFIG_IA64_GENERIC
+		swiotlb = 1;
+		printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n");
+		machvec_init("dig");
+		swiotlb_init(1);
+		dma_ops = &swiotlb_dma_ops;
+#else
+		panic("Unable to find Intel IOMMU");
+#endif
+	}
+}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
new file mode 100644
index 00000000..89accc62
--- /dev/null
+++ b/arch/ia64/kernel/perfmon.c
@@ -0,0 +1,6838 @@
+/*
+ * This file implements the perfmon-2 subsystem which is used
+ * to program the IA-64 Performance Monitoring Unit (PMU).
+ *
+ * The initial version of perfmon.c was written by
+ * Ganesh Venkitachalam, IBM Corp.
+ *
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
+ * David Mosberger, Hewlett Packard Co.
+ *
+ * Version Perfmon-2.x is a rewrite of perfmon-1.x
+ * by Stephane Eranian, Hewlett Packard Co.
+ *
+ * Copyright (C) 1999-2005  Hewlett Packard Co
+ *               Stephane Eranian <eranian@hpl.hp.com>
+ *               David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * More information about perfmon available at:
+ * 	http://www.hpl.hp.com/research/linux/perfmon
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/list.h>
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/vfs.h>
+#include <linux/smp.h>
+#include <linux/pagemap.h>
+#include <linux/mount.h>
+#include <linux/bitops.h>
+#include <linux/capability.h>
+#include <linux/rcupdate.h>
+#include <linux/completion.h>
+#include <linux/tracehook.h>
+#include <linux/slab.h>
+
+#include <asm/errno.h>
+#include <asm/intrinsics.h>
+#include <asm/page.h>
+#include <asm/perfmon.h>
+#include <asm/processor.h>
+#include <asm/signal.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/delay.h>
+
+#ifdef CONFIG_PERFMON
+/*
+ * perfmon context state
+ */
+#define PFM_CTX_UNLOADED	1	/* context is not loaded onto any task */
+#define PFM_CTX_LOADED		2	/* context is loaded onto a task */
+#define PFM_CTX_MASKED		3	/* context is loaded but monitoring is masked due to overflow */
+#define PFM_CTX_ZOMBIE		4	/* owner of the context is closing it */
+
+#define PFM_INVALID_ACTIVATION	(~0UL)
+
+#define PFM_NUM_PMC_REGS	64	/* PMC save area for ctxsw */
+#define PFM_NUM_PMD_REGS	64	/* PMD save area for ctxsw */
+
+/*
+ * depth of message queue
+ */
+#define PFM_MAX_MSGS		32
+#define PFM_CTXQ_EMPTY(g)	((g)->ctx_msgq_head == (g)->ctx_msgq_tail)
+
+/*
+ * type of a PMU register (bitmask).
+ * bitmask structure:
+ * 	bit0   : register implemented
+ * 	bit1   : end marker
+ * 	bit2-3 : reserved
+ * 	bit4   : pmc has pmc.pm
+ * 	bit5   : pmc controls a counter (has pmc.oi), pmd is used as counter
+ * 	bit6-7 : register type
+ * 	bit8-31: reserved
+ */
+#define PFM_REG_NOTIMPL		0x0 /* not implemented at all */
+#define PFM_REG_IMPL		0x1 /* register implemented */
+#define PFM_REG_END		0x2 /* end marker */
+#define PFM_REG_MONITOR		(0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */
+#define PFM_REG_COUNTING	(0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */
+#define PFM_REG_CONTROL		(0x4<<4|PFM_REG_IMPL) /* PMU control register */
+#define	PFM_REG_CONFIG		(0x8<<4|PFM_REG_IMPL) /* configuration register */
+#define PFM_REG_BUFFER	 	(0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */
+
+#define PMC_IS_LAST(i)	(pmu_conf->pmc_desc[i].type & PFM_REG_END)
+#define PMD_IS_LAST(i)	(pmu_conf->pmd_desc[i].type & PFM_REG_END)
+
+#define PMC_OVFL_NOTIFY(ctx, i)	((ctx)->ctx_pmds[i].flags &  PFM_REGFL_OVFL_NOTIFY)
+
+/* i assumed unsigned */
+#define PMC_IS_IMPL(i)	  (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL))
+#define PMD_IS_IMPL(i)	  (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL))
+
+/* XXX: these assume that register i is implemented */
+#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
+#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
+#define PMC_IS_MONITOR(i)  ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR)  == PFM_REG_MONITOR)
+#define PMC_IS_CONTROL(i)  ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL)  == PFM_REG_CONTROL)
+
+#define PMC_DFL_VAL(i)     pmu_conf->pmc_desc[i].default_value
+#define PMC_RSVD_MASK(i)   pmu_conf->pmc_desc[i].reserved_mask
+#define PMD_PMD_DEP(i)	   pmu_conf->pmd_desc[i].dep_pmd[0]
+#define PMC_PMD_DEP(i)	   pmu_conf->pmc_desc[i].dep_pmd[0]
+
+#define PFM_NUM_IBRS	  IA64_NUM_DBG_REGS
+#define PFM_NUM_DBRS	  IA64_NUM_DBG_REGS
+
+#define CTX_OVFL_NOBLOCK(c)	((c)->ctx_fl_block == 0)
+#define CTX_HAS_SMPL(c)		((c)->ctx_fl_is_sampling)
+#define PFM_CTX_TASK(h)		(h)->ctx_task
+
+#define PMU_PMC_OI		5 /* position of pmc.oi bit */
+
+/* XXX: does not support more than 64 PMDs */
+#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask)
+#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL)
+
+#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask)
+
+#define CTX_USED_IBR(ctx,n) 	(ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64)
+#define CTX_USED_DBR(ctx,n) 	(ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64)
+#define CTX_USES_DBREGS(ctx)	(((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1)
+#define PFM_CODE_RR	0	/* requesting code range restriction */
+#define PFM_DATA_RR	1	/* requestion data range restriction */
+
+#define PFM_CPUINFO_CLEAR(v)	pfm_get_cpu_var(pfm_syst_info) &= ~(v)
+#define PFM_CPUINFO_SET(v)	pfm_get_cpu_var(pfm_syst_info) |= (v)
+#define PFM_CPUINFO_GET()	pfm_get_cpu_var(pfm_syst_info)
+
+#define RDEP(x)	(1UL<<(x))
+
+/*
+ * context protection macros
+ * in SMP:
+ * 	- we need to protect against CPU concurrency (spin_lock)
+ * 	- we need to protect against PMU overflow interrupts (local_irq_disable)
+ * in UP:
+ * 	- we need to protect against PMU overflow interrupts (local_irq_disable)
+ *
+ * spin_lock_irqsave()/spin_unlock_irqrestore():
+ * 	in SMP: local_irq_disable + spin_lock
+ * 	in UP : local_irq_disable
+ *
+ * spin_lock()/spin_lock():
+ * 	in UP : removed automatically
+ * 	in SMP: protect against context accesses from other CPU. interrupts
+ * 	        are not masked. This is useful for the PMU interrupt handler
+ * 	        because we know we will not get PMU concurrency in that code.
+ */
+#define PROTECT_CTX(c, f) \
+	do {  \
+		DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, task_pid_nr(current))); \
+		spin_lock_irqsave(&(c)->ctx_lock, f); \
+		DPRINT(("spinlocked ctx %p  by [%d]\n", c, task_pid_nr(current))); \
+	} while(0)
+
+#define UNPROTECT_CTX(c, f) \
+	do { \
+		DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, task_pid_nr(current))); \
+		spin_unlock_irqrestore(&(c)->ctx_lock, f); \
+	} while(0)
+
+#define PROTECT_CTX_NOPRINT(c, f) \
+	do {  \
+		spin_lock_irqsave(&(c)->ctx_lock, f); \
+	} while(0)
+
+
+#define UNPROTECT_CTX_NOPRINT(c, f) \
+	do { \
+		spin_unlock_irqrestore(&(c)->ctx_lock, f); \
+	} while(0)
+
+
+#define PROTECT_CTX_NOIRQ(c) \
+	do {  \
+		spin_lock(&(c)->ctx_lock); \
+	} while(0)
+
+#define UNPROTECT_CTX_NOIRQ(c) \
+	do { \
+		spin_unlock(&(c)->ctx_lock); \
+	} while(0)
+
+
+#ifdef CONFIG_SMP
+
+#define GET_ACTIVATION()	pfm_get_cpu_var(pmu_activation_number)
+#define INC_ACTIVATION()	pfm_get_cpu_var(pmu_activation_number)++
+#define SET_ACTIVATION(c)	(c)->ctx_last_activation = GET_ACTIVATION()
+
+#else /* !CONFIG_SMP */
+#define SET_ACTIVATION(t) 	do {} while(0)
+#define GET_ACTIVATION(t) 	do {} while(0)
+#define INC_ACTIVATION(t) 	do {} while(0)
+#endif /* CONFIG_SMP */
+
+#define SET_PMU_OWNER(t, c)	do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0)
+#define GET_PMU_OWNER()		pfm_get_cpu_var(pmu_owner)
+#define GET_PMU_CTX()		pfm_get_cpu_var(pmu_ctx)
+
+#define LOCK_PFS(g)	    	spin_lock_irqsave(&pfm_sessions.pfs_lock, g)
+#define UNLOCK_PFS(g)	    	spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g)
+
+#define PFM_REG_RETFLAG_SET(flags, val)	do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0)
+
+/*
+ * cmp0 must be the value of pmc0
+ */
+#define PMC0_HAS_OVFL(cmp0)  (cmp0 & ~0x1UL)
+
+#define PFMFS_MAGIC 0xa0b4d889
+
+/*
+ * debugging
+ */
+#define PFM_DEBUGGING 1
+#ifdef PFM_DEBUGGING
+#define DPRINT(a) \
+	do { \
+		if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \
+	} while (0)
+
+#define DPRINT_ovfl(a) \
+	do { \
+		if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \
+	} while (0)
+#endif
+
+/*
+ * 64-bit software counter structure
+ *
+ * the next_reset_type is applied to the next call to pfm_reset_regs()
+ */
+typedef struct {
+	unsigned long	val;		/* virtual 64bit counter value */
+	unsigned long	lval;		/* last reset value */
+	unsigned long	long_reset;	/* reset value on sampling overflow */
+	unsigned long	short_reset;    /* reset value on overflow */
+	unsigned long	reset_pmds[4];  /* which other pmds to reset when this counter overflows */
+	unsigned long	smpl_pmds[4];   /* which pmds are accessed when counter overflow */
+	unsigned long	seed;		/* seed for random-number generator */
+	unsigned long	mask;		/* mask for random-number generator */
+	unsigned int 	flags;		/* notify/do not notify */
+	unsigned long	eventid;	/* overflow event identifier */
+} pfm_counter_t;
+
+/*
+ * context flags
+ */
+typedef struct {
+	unsigned int block:1;		/* when 1, task will blocked on user notifications */
+	unsigned int system:1;		/* do system wide monitoring */
+	unsigned int using_dbreg:1;	/* using range restrictions (debug registers) */
+	unsigned int is_sampling:1;	/* true if using a custom format */
+	unsigned int excl_idle:1;	/* exclude idle task in system wide session */
+	unsigned int going_zombie:1;	/* context is zombie (MASKED+blocking) */
+	unsigned int trap_reason:2;	/* reason for going into pfm_handle_work() */
+	unsigned int no_msg:1;		/* no message sent on overflow */
+	unsigned int can_restart:1;	/* allowed to issue a PFM_RESTART */
+	unsigned int reserved:22;
+} pfm_context_flags_t;
+
+#define PFM_TRAP_REASON_NONE		0x0	/* default value */
+#define PFM_TRAP_REASON_BLOCK		0x1	/* we need to block on overflow */
+#define PFM_TRAP_REASON_RESET		0x2	/* we need to reset PMDs */
+
+
+/*
+ * perfmon context: encapsulates all the state of a monitoring session
+ */
+
+typedef struct pfm_context {
+	spinlock_t		ctx_lock;		/* context protection */
+
+	pfm_context_flags_t	ctx_flags;		/* bitmask of flags  (block reason incl.) */
+	unsigned int		ctx_state;		/* state: active/inactive (no bitfield) */
+
+	struct task_struct 	*ctx_task;		/* task to which context is attached */
+
+	unsigned long		ctx_ovfl_regs[4];	/* which registers overflowed (notification) */
+
+	struct completion	ctx_restart_done;  	/* use for blocking notification mode */
+
+	unsigned long		ctx_used_pmds[4];	/* bitmask of PMD used            */
+	unsigned long		ctx_all_pmds[4];	/* bitmask of all accessible PMDs */
+	unsigned long		ctx_reload_pmds[4];	/* bitmask of force reload PMD on ctxsw in */
+
+	unsigned long		ctx_all_pmcs[4];	/* bitmask of all accessible PMCs */
+	unsigned long		ctx_reload_pmcs[4];	/* bitmask of force reload PMC on ctxsw in */
+	unsigned long		ctx_used_monitors[4];	/* bitmask of monitor PMC being used */
+
+	unsigned long		ctx_pmcs[PFM_NUM_PMC_REGS];	/*  saved copies of PMC values */
+
+	unsigned int		ctx_used_ibrs[1];		/* bitmask of used IBR (speedup ctxsw in) */
+	unsigned int		ctx_used_dbrs[1];		/* bitmask of used DBR (speedup ctxsw in) */
+	unsigned long		ctx_dbrs[IA64_NUM_DBG_REGS];	/* DBR values (cache) when not loaded */
+	unsigned long		ctx_ibrs[IA64_NUM_DBG_REGS];	/* IBR values (cache) when not loaded */
+
+	pfm_counter_t		ctx_pmds[PFM_NUM_PMD_REGS]; /* software state for PMDS */
+
+	unsigned long		th_pmcs[PFM_NUM_PMC_REGS];	/* PMC thread save state */
+	unsigned long		th_pmds[PFM_NUM_PMD_REGS];	/* PMD thread save state */
+
+	unsigned long		ctx_saved_psr_up;	/* only contains psr.up value */
+
+	unsigned long		ctx_last_activation;	/* context last activation number for last_cpu */
+	unsigned int		ctx_last_cpu;		/* CPU id of current or last CPU used (SMP only) */
+	unsigned int		ctx_cpu;		/* cpu to which perfmon is applied (system wide) */
+
+	int			ctx_fd;			/* file descriptor used my this context */
+	pfm_ovfl_arg_t		ctx_ovfl_arg;		/* argument to custom buffer format handler */
+
+	pfm_buffer_fmt_t	*ctx_buf_fmt;		/* buffer format callbacks */
+	void			*ctx_smpl_hdr;		/* points to sampling buffer header kernel vaddr */
+	unsigned long		ctx_smpl_size;		/* size of sampling buffer */
+	void			*ctx_smpl_vaddr;	/* user level virtual address of smpl buffer */
+
+	wait_queue_head_t 	ctx_msgq_wait;
+	pfm_msg_t		ctx_msgq[PFM_MAX_MSGS];
+	int			ctx_msgq_head;
+	int			ctx_msgq_tail;
+	struct fasync_struct	*ctx_async_queue;
+
+	wait_queue_head_t 	ctx_zombieq;		/* termination cleanup wait queue */
+} pfm_context_t;
+
+/*
+ * magic number used to verify that structure is really
+ * a perfmon context
+ */
+#define PFM_IS_FILE(f)		((f)->f_op == &pfm_file_ops)
+
+#define PFM_GET_CTX(t)	 	((pfm_context_t *)(t)->thread.pfm_context)
+
+#ifdef CONFIG_SMP
+#define SET_LAST_CPU(ctx, v)	(ctx)->ctx_last_cpu = (v)
+#define GET_LAST_CPU(ctx)	(ctx)->ctx_last_cpu
+#else
+#define SET_LAST_CPU(ctx, v)	do {} while(0)
+#define GET_LAST_CPU(ctx)	do {} while(0)
+#endif
+
+
+#define ctx_fl_block		ctx_flags.block
+#define ctx_fl_system		ctx_flags.system
+#define ctx_fl_using_dbreg	ctx_flags.using_dbreg
+#define ctx_fl_is_sampling	ctx_flags.is_sampling
+#define ctx_fl_excl_idle	ctx_flags.excl_idle
+#define ctx_fl_going_zombie	ctx_flags.going_zombie
+#define ctx_fl_trap_reason	ctx_flags.trap_reason
+#define ctx_fl_no_msg		ctx_flags.no_msg
+#define ctx_fl_can_restart	ctx_flags.can_restart
+
+#define PFM_SET_WORK_PENDING(t, v)	do { (t)->thread.pfm_needs_checking = v; } while(0);
+#define PFM_GET_WORK_PENDING(t)		(t)->thread.pfm_needs_checking
+
+/*
+ * global information about all sessions
+ * mostly used to synchronize between system wide and per-process
+ */
+typedef struct {
+	spinlock_t		pfs_lock;		   /* lock the structure */
+
+	unsigned int		pfs_task_sessions;	   /* number of per task sessions */
+	unsigned int		pfs_sys_sessions;	   /* number of per system wide sessions */
+	unsigned int		pfs_sys_use_dbregs;	   /* incremented when a system wide session uses debug regs */
+	unsigned int		pfs_ptrace_use_dbregs;	   /* incremented when a process uses debug regs */
+	struct task_struct	*pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */
+} pfm_session_t;
+
+/*
+ * information about a PMC or PMD.
+ * dep_pmd[]: a bitmask of dependent PMD registers
+ * dep_pmc[]: a bitmask of dependent PMC registers
+ */
+typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
+typedef struct {
+	unsigned int		type;
+	int			pm_pos;
+	unsigned long		default_value;	/* power-on default value */
+	unsigned long		reserved_mask;	/* bitmask of reserved bits */
+	pfm_reg_check_t		read_check;
+	pfm_reg_check_t		write_check;
+	unsigned long		dep_pmd[4];
+	unsigned long		dep_pmc[4];
+} pfm_reg_desc_t;
+
+/* assume cnum is a valid monitor */
+#define PMC_PM(cnum, val)	(((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1)
+
+/*
+ * This structure is initialized at boot time and contains
+ * a description of the PMU main characteristics.
+ *
+ * If the probe function is defined, detection is based
+ * on its return value: 
+ * 	- 0 means recognized PMU
+ * 	- anything else means not supported
+ * When the probe function is not defined, then the pmu_family field
+ * is used and it must match the host CPU family such that:
+ * 	- cpu->family & config->pmu_family != 0
+ */
+typedef struct {
+	unsigned long  ovfl_val;	/* overflow value for counters */
+
+	pfm_reg_desc_t *pmc_desc;	/* detailed PMC register dependencies descriptions */
+	pfm_reg_desc_t *pmd_desc;	/* detailed PMD register dependencies descriptions */
+
+	unsigned int   num_pmcs;	/* number of PMCS: computed at init time */
+	unsigned int   num_pmds;	/* number of PMDS: computed at init time */
+	unsigned long  impl_pmcs[4];	/* bitmask of implemented PMCS */
+	unsigned long  impl_pmds[4];	/* bitmask of implemented PMDS */
+
+	char	      *pmu_name;	/* PMU family name */
+	unsigned int  pmu_family;	/* cpuid family pattern used to identify pmu */
+	unsigned int  flags;		/* pmu specific flags */
+	unsigned int  num_ibrs;		/* number of IBRS: computed at init time */
+	unsigned int  num_dbrs;		/* number of DBRS: computed at init time */
+	unsigned int  num_counters;	/* PMC/PMD counting pairs : computed at init time */
+	int           (*probe)(void);   /* customized probe routine */
+	unsigned int  use_rr_dbregs:1;	/* set if debug registers used for range restriction */
+} pmu_config_t;
+/*
+ * PMU specific flags
+ */
+#define PFM_PMU_IRQ_RESEND	1	/* PMU needs explicit IRQ resend */
+
+/*
+ * debug register related type definitions
+ */
+typedef struct {
+	unsigned long ibr_mask:56;
+	unsigned long ibr_plm:4;
+	unsigned long ibr_ig:3;
+	unsigned long ibr_x:1;
+} ibr_mask_reg_t;
+
+typedef struct {
+	unsigned long dbr_mask:56;
+	unsigned long dbr_plm:4;
+	unsigned long dbr_ig:2;
+	unsigned long dbr_w:1;
+	unsigned long dbr_r:1;
+} dbr_mask_reg_t;
+
+typedef union {
+	unsigned long  val;
+	ibr_mask_reg_t ibr;
+	dbr_mask_reg_t dbr;
+} dbreg_t;
+
+
+/*
+ * perfmon command descriptions
+ */
+typedef struct {
+	int		(*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
+	char		*cmd_name;
+	int		cmd_flags;
+	unsigned int	cmd_narg;
+	size_t		cmd_argsize;
+	int		(*cmd_getsize)(void *arg, size_t *sz);
+} pfm_cmd_desc_t;
+
+#define PFM_CMD_FD		0x01	/* command requires a file descriptor */
+#define PFM_CMD_ARG_READ	0x02	/* command must read argument(s) */
+#define PFM_CMD_ARG_RW		0x04	/* command must read/write argument(s) */
+#define PFM_CMD_STOP		0x08	/* command does not work on zombie context */
+
+
+#define PFM_CMD_NAME(cmd)	pfm_cmd_tab[(cmd)].cmd_name
+#define PFM_CMD_READ_ARG(cmd)	(pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ)
+#define PFM_CMD_RW_ARG(cmd)	(pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW)
+#define PFM_CMD_USE_FD(cmd)	(pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD)
+#define PFM_CMD_STOPPED(cmd)	(pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP)
+
+#define PFM_CMD_ARG_MANY	-1 /* cannot be zero */
+
+typedef struct {
+	unsigned long pfm_spurious_ovfl_intr_count;	/* keep track of spurious ovfl interrupts */
+	unsigned long pfm_replay_ovfl_intr_count;	/* keep track of replayed ovfl interrupts */
+	unsigned long pfm_ovfl_intr_count; 		/* keep track of ovfl interrupts */
+	unsigned long pfm_ovfl_intr_cycles;		/* cycles spent processing ovfl interrupts */
+	unsigned long pfm_ovfl_intr_cycles_min;		/* min cycles spent processing ovfl interrupts */
+	unsigned long pfm_ovfl_intr_cycles_max;		/* max cycles spent processing ovfl interrupts */
+	unsigned long pfm_smpl_handler_calls;
+	unsigned long pfm_smpl_handler_cycles;
+	char pad[SMP_CACHE_BYTES] ____cacheline_aligned;
+} pfm_stats_t;
+
+/*
+ * perfmon internal variables
+ */
+static pfm_stats_t		pfm_stats[NR_CPUS];
+static pfm_session_t		pfm_sessions;	/* global sessions information */
+
+static DEFINE_SPINLOCK(pfm_alt_install_check);
+static pfm_intr_handler_desc_t  *pfm_alt_intr_handler;
+
+static struct proc_dir_entry 	*perfmon_dir;
+static pfm_uuid_t		pfm_null_uuid = {0,};
+
+static spinlock_t		pfm_buffer_fmt_lock;
+static LIST_HEAD(pfm_buffer_fmt_list);
+
+static pmu_config_t		*pmu_conf;
+
+/* sysctl() controls */
+pfm_sysctl_t pfm_sysctl;
+EXPORT_SYMBOL(pfm_sysctl);
+
+static ctl_table pfm_ctl_table[]={
+	{
+		.procname	= "debug",
+		.data		= &pfm_sysctl.debug,
+		.maxlen		= sizeof(int),
+		.mode		= 0666,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "debug_ovfl",
+		.data		= &pfm_sysctl.debug_ovfl,
+		.maxlen		= sizeof(int),
+		.mode		= 0666,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "fastctxsw",
+		.data		= &pfm_sysctl.fastctxsw,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "expert_mode",
+		.data		= &pfm_sysctl.expert_mode,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec,
+	},
+	{}
+};
+static ctl_table pfm_sysctl_dir[] = {
+	{
+		.procname	= "perfmon",
+		.mode		= 0555,
+		.child		= pfm_ctl_table,
+	},
+ 	{}
+};
+static ctl_table pfm_sysctl_root[] = {
+	{
+		.procname	= "kernel",
+		.mode		= 0555,
+		.child		= pfm_sysctl_dir,
+	},
+ 	{}
+};
+static struct ctl_table_header *pfm_sysctl_header;
+
+static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
+
+#define pfm_get_cpu_var(v)		__ia64_per_cpu_var(v)
+#define pfm_get_cpu_data(a,b)		per_cpu(a, b)
+
+static inline void
+pfm_put_task(struct task_struct *task)
+{
+	if (task != current) put_task_struct(task);
+}
+
+static inline void
+pfm_reserve_page(unsigned long a)
+{
+	SetPageReserved(vmalloc_to_page((void *)a));
+}
+static inline void
+pfm_unreserve_page(unsigned long a)
+{
+	ClearPageReserved(vmalloc_to_page((void*)a));
+}
+
+static inline unsigned long
+pfm_protect_ctx_ctxsw(pfm_context_t *x)
+{
+	spin_lock(&(x)->ctx_lock);
+	return 0UL;
+}
+
+static inline void
+pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f)
+{
+	spin_unlock(&(x)->ctx_lock);
+}
+
+static inline unsigned int
+pfm_do_munmap(struct mm_struct *mm, unsigned long addr, size_t len, int acct)
+{
+	return do_munmap(mm, addr, len);
+}
+
+static inline unsigned long 
+pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec)
+{
+	return get_unmapped_area(file, addr, len, pgoff, flags);
+}
+
+/* forward declaration */
+static const struct dentry_operations pfmfs_dentry_operations;
+
+static struct dentry *
+pfmfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data)
+{
+	return mount_pseudo(fs_type, "pfm:", NULL, &pfmfs_dentry_operations,
+			PFMFS_MAGIC);
+}
+
+static struct file_system_type pfm_fs_type = {
+	.name     = "pfmfs",
+	.mount    = pfmfs_mount,
+	.kill_sb  = kill_anon_super,
+};
+
+DEFINE_PER_CPU(unsigned long, pfm_syst_info);
+DEFINE_PER_CPU(struct task_struct *, pmu_owner);
+DEFINE_PER_CPU(pfm_context_t  *, pmu_ctx);
+DEFINE_PER_CPU(unsigned long, pmu_activation_number);
+EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info);
+
+
+/* forward declaration */
+static const struct file_operations pfm_file_ops;
+
+/*
+ * forward declarations
+ */
+#ifndef CONFIG_SMP
+static void pfm_lazy_save_regs (struct task_struct *ta);
+#endif
+
+void dump_pmu_state(const char *);
+static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
+
+#include "perfmon_itanium.h"
+#include "perfmon_mckinley.h"
+#include "perfmon_montecito.h"
+#include "perfmon_generic.h"
+
+static pmu_config_t *pmu_confs[]={
+	&pmu_conf_mont,
+	&pmu_conf_mck,
+	&pmu_conf_ita,
+	&pmu_conf_gen, /* must be last */
+	NULL
+};
+
+
+static int pfm_end_notify_user(pfm_context_t *ctx);
+
+static inline void
+pfm_clear_psr_pp(void)
+{
+	ia64_rsm(IA64_PSR_PP);
+	ia64_srlz_i();
+}
+
+static inline void
+pfm_set_psr_pp(void)
+{
+	ia64_ssm(IA64_PSR_PP);
+	ia64_srlz_i();
+}
+
+static inline void
+pfm_clear_psr_up(void)
+{
+	ia64_rsm(IA64_PSR_UP);
+	ia64_srlz_i();
+}
+
+static inline void
+pfm_set_psr_up(void)
+{
+	ia64_ssm(IA64_PSR_UP);
+	ia64_srlz_i();
+}
+
+static inline unsigned long
+pfm_get_psr(void)
+{
+	unsigned long tmp;
+	tmp = ia64_getreg(_IA64_REG_PSR);
+	ia64_srlz_i();
+	return tmp;
+}
+
+static inline void
+pfm_set_psr_l(unsigned long val)
+{
+	ia64_setreg(_IA64_REG_PSR_L, val);
+	ia64_srlz_i();
+}
+
+static inline void
+pfm_freeze_pmu(void)
+{
+	ia64_set_pmc(0,1UL);
+	ia64_srlz_d();
+}
+
+static inline void
+pfm_unfreeze_pmu(void)
+{
+	ia64_set_pmc(0,0UL);
+	ia64_srlz_d();
+}
+
+static inline void
+pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs)
+{
+	int i;
+
+	for (i=0; i < nibrs; i++) {
+		ia64_set_ibr(i, ibrs[i]);
+		ia64_dv_serialize_instruction();
+	}
+	ia64_srlz_i();
+}
+
+static inline void
+pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs)
+{
+	int i;
+
+	for (i=0; i < ndbrs; i++) {
+		ia64_set_dbr(i, dbrs[i]);
+		ia64_dv_serialize_data();
+	}
+	ia64_srlz_d();
+}
+
+/*
+ * PMD[i] must be a counter. no check is made
+ */
+static inline unsigned long
+pfm_read_soft_counter(pfm_context_t *ctx, int i)
+{
+	return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val);
+}
+
+/*
+ * PMD[i] must be a counter. no check is made
+ */
+static inline void
+pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val)
+{
+	unsigned long ovfl_val = pmu_conf->ovfl_val;
+
+	ctx->ctx_pmds[i].val = val  & ~ovfl_val;
+	/*
+	 * writing to unimplemented part is ignore, so we do not need to
+	 * mask off top part
+	 */
+	ia64_set_pmd(i, val & ovfl_val);
+}
+
+static pfm_msg_t *
+pfm_get_new_msg(pfm_context_t *ctx)
+{
+	int idx, next;
+
+	next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS;
+
+	DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
+	if (next == ctx->ctx_msgq_head) return NULL;
+
+ 	idx = 	ctx->ctx_msgq_tail;
+	ctx->ctx_msgq_tail = next;
+
+	DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx));
+
+	return ctx->ctx_msgq+idx;
+}
+
+static pfm_msg_t *
+pfm_get_next_msg(pfm_context_t *ctx)
+{
+	pfm_msg_t *msg;
+
+	DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
+
+	if (PFM_CTXQ_EMPTY(ctx)) return NULL;
+
+	/*
+	 * get oldest message
+	 */
+	msg = ctx->ctx_msgq+ctx->ctx_msgq_head;
+
+	/*
+	 * and move forward
+	 */
+	ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS;
+
+	DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type));
+
+	return msg;
+}
+
+static void
+pfm_reset_msgq(pfm_context_t *ctx)
+{
+	ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
+	DPRINT(("ctx=%p msgq reset\n", ctx));
+}
+
+static void *
+pfm_rvmalloc(unsigned long size)
+{
+	void *mem;
+	unsigned long addr;
+
+	size = PAGE_ALIGN(size);
+	mem  = vzalloc(size);
+	if (mem) {
+		//printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem);
+		addr = (unsigned long)mem;
+		while (size > 0) {
+			pfm_reserve_page(addr);
+			addr+=PAGE_SIZE;
+			size-=PAGE_SIZE;
+		}
+	}
+	return mem;
+}
+
+static void
+pfm_rvfree(void *mem, unsigned long size)
+{
+	unsigned long addr;
+
+	if (mem) {
+		DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size));
+		addr = (unsigned long) mem;
+		while ((long) size > 0) {
+			pfm_unreserve_page(addr);
+			addr+=PAGE_SIZE;
+			size-=PAGE_SIZE;
+		}
+		vfree(mem);
+	}
+	return;
+}
+
+static pfm_context_t *
+pfm_context_alloc(int ctx_flags)
+{
+	pfm_context_t *ctx;
+
+	/* 
+	 * allocate context descriptor 
+	 * must be able to free with interrupts disabled
+	 */
+	ctx = kzalloc(sizeof(pfm_context_t), GFP_KERNEL);
+	if (ctx) {
+		DPRINT(("alloc ctx @%p\n", ctx));
+
+		/*
+		 * init context protection lock
+		 */
+		spin_lock_init(&ctx->ctx_lock);
+
+		/*
+		 * context is unloaded
+		 */
+		ctx->ctx_state = PFM_CTX_UNLOADED;
+
+		/*
+		 * initialization of context's flags
+		 */
+		ctx->ctx_fl_block       = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
+		ctx->ctx_fl_system      = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
+		ctx->ctx_fl_no_msg      = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0;
+		/*
+		 * will move to set properties
+		 * ctx->ctx_fl_excl_idle   = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
+		 */
+
+		/*
+		 * init restart semaphore to locked
+		 */
+		init_completion(&ctx->ctx_restart_done);
+
+		/*
+		 * activation is used in SMP only
+		 */
+		ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
+		SET_LAST_CPU(ctx, -1);
+
+		/*
+		 * initialize notification message queue
+		 */
+		ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
+		init_waitqueue_head(&ctx->ctx_msgq_wait);
+		init_waitqueue_head(&ctx->ctx_zombieq);
+
+	}
+	return ctx;
+}
+
+static void
+pfm_context_free(pfm_context_t *ctx)
+{
+	if (ctx) {
+		DPRINT(("free ctx @%p\n", ctx));
+		kfree(ctx);
+	}
+}
+
+static void
+pfm_mask_monitoring(struct task_struct *task)
+{
+	pfm_context_t *ctx = PFM_GET_CTX(task);
+	unsigned long mask, val, ovfl_mask;
+	int i;
+
+	DPRINT_ovfl(("masking monitoring for [%d]\n", task_pid_nr(task)));
+
+	ovfl_mask = pmu_conf->ovfl_val;
+	/*
+	 * monitoring can only be masked as a result of a valid
+	 * counter overflow. In UP, it means that the PMU still
+	 * has an owner. Note that the owner can be different
+	 * from the current task. However the PMU state belongs
+	 * to the owner.
+	 * In SMP, a valid overflow only happens when task is
+	 * current. Therefore if we come here, we know that
+	 * the PMU state belongs to the current task, therefore
+	 * we can access the live registers.
+	 *
+	 * So in both cases, the live register contains the owner's
+	 * state. We can ONLY touch the PMU registers and NOT the PSR.
+	 *
+	 * As a consequence to this call, the ctx->th_pmds[] array
+	 * contains stale information which must be ignored
+	 * when context is reloaded AND monitoring is active (see
+	 * pfm_restart).
+	 */
+	mask = ctx->ctx_used_pmds[0];
+	for (i = 0; mask; i++, mask>>=1) {
+		/* skip non used pmds */
+		if ((mask & 0x1) == 0) continue;
+		val = ia64_get_pmd(i);
+
+		if (PMD_IS_COUNTING(i)) {
+			/*
+		 	 * we rebuild the full 64 bit value of the counter
+		 	 */
+			ctx->ctx_pmds[i].val += (val & ovfl_mask);
+		} else {
+			ctx->ctx_pmds[i].val = val;
+		}
+		DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
+			i,
+			ctx->ctx_pmds[i].val,
+			val & ovfl_mask));
+	}
+	/*
+	 * mask monitoring by setting the privilege level to 0
+	 * we cannot use psr.pp/psr.up for this, it is controlled by
+	 * the user
+	 *
+	 * if task is current, modify actual registers, otherwise modify
+	 * thread save state, i.e., what will be restored in pfm_load_regs()
+	 */
+	mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
+	for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
+		if ((mask & 0x1) == 0UL) continue;
+		ia64_set_pmc(i, ctx->th_pmcs[i] & ~0xfUL);
+		ctx->th_pmcs[i] &= ~0xfUL;
+		DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i]));
+	}
+	/*
+	 * make all of this visible
+	 */
+	ia64_srlz_d();
+}
+
+/*
+ * must always be done with task == current
+ *
+ * context must be in MASKED state when calling
+ */
+static void
+pfm_restore_monitoring(struct task_struct *task)
+{
+	pfm_context_t *ctx = PFM_GET_CTX(task);
+	unsigned long mask, ovfl_mask;
+	unsigned long psr, val;
+	int i, is_system;
+
+	is_system = ctx->ctx_fl_system;
+	ovfl_mask = pmu_conf->ovfl_val;
+
+	if (task != current) {
+		printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task_pid_nr(task), task_pid_nr(current));
+		return;
+	}
+	if (ctx->ctx_state != PFM_CTX_MASKED) {
+		printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__,
+			task_pid_nr(task), task_pid_nr(current), ctx->ctx_state);
+		return;
+	}
+	psr = pfm_get_psr();
+	/*
+	 * monitoring is masked via the PMC.
+	 * As we restore their value, we do not want each counter to
+	 * restart right away. We stop monitoring using the PSR,
+	 * restore the PMC (and PMD) and then re-establish the psr
+	 * as it was. Note that there can be no pending overflow at
+	 * this point, because monitoring was MASKED.
+	 *
+	 * system-wide session are pinned and self-monitoring
+	 */
+	if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
+		/* disable dcr pp */
+		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP);
+		pfm_clear_psr_pp();
+	} else {
+		pfm_clear_psr_up();
+	}
+	/*
+	 * first, we restore the PMD
+	 */
+	mask = ctx->ctx_used_pmds[0];
+	for (i = 0; mask; i++, mask>>=1) {
+		/* skip non used pmds */
+		if ((mask & 0x1) == 0) continue;
+
+		if (PMD_IS_COUNTING(i)) {
+			/*
+			 * we split the 64bit value according to
+			 * counter width
+			 */
+			val = ctx->ctx_pmds[i].val & ovfl_mask;
+			ctx->ctx_pmds[i].val &= ~ovfl_mask;
+		} else {
+			val = ctx->ctx_pmds[i].val;
+		}
+		ia64_set_pmd(i, val);
+
+		DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
+			i,
+			ctx->ctx_pmds[i].val,
+			val));
+	}
+	/*
+	 * restore the PMCs
+	 */
+	mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
+	for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
+		if ((mask & 0x1) == 0UL) continue;
+		ctx->th_pmcs[i] = ctx->ctx_pmcs[i];
+		ia64_set_pmc(i, ctx->th_pmcs[i]);
+		DPRINT(("[%d] pmc[%d]=0x%lx\n",
+					task_pid_nr(task), i, ctx->th_pmcs[i]));
+	}
+	ia64_srlz_d();
+
+	/*
+	 * must restore DBR/IBR because could be modified while masked
+	 * XXX: need to optimize 
+	 */
+	if (ctx->ctx_fl_using_dbreg) {
+		pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
+		pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
+	}
+
+	/*
+	 * now restore PSR
+	 */
+	if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
+		/* enable dcr pp */
+		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP);
+		ia64_srlz_i();
+	}
+	pfm_set_psr_l(psr);
+}
+
+static inline void
+pfm_save_pmds(unsigned long *pmds, unsigned long mask)
+{
+	int i;
+
+	ia64_srlz_d();
+
+	for (i=0; mask; i++, mask>>=1) {
+		if (mask & 0x1) pmds[i] = ia64_get_pmd(i);
+	}
+}
+
+/*
+ * reload from thread state (used for ctxw only)
+ */
+static inline void
+pfm_restore_pmds(unsigned long *pmds, unsigned long mask)
+{
+	int i;
+	unsigned long val, ovfl_val = pmu_conf->ovfl_val;
+
+	for (i=0; mask; i++, mask>>=1) {
+		if ((mask & 0x1) == 0) continue;
+		val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i];
+		ia64_set_pmd(i, val);
+	}
+	ia64_srlz_d();
+}
+
+/*
+ * propagate PMD from context to thread-state
+ */
+static inline void
+pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
+{
+	unsigned long ovfl_val = pmu_conf->ovfl_val;
+	unsigned long mask = ctx->ctx_all_pmds[0];
+	unsigned long val;
+	int i;
+
+	DPRINT(("mask=0x%lx\n", mask));
+
+	for (i=0; mask; i++, mask>>=1) {
+
+		val = ctx->ctx_pmds[i].val;
+
+		/*
+		 * We break up the 64 bit value into 2 pieces
+		 * the lower bits go to the machine state in the
+		 * thread (will be reloaded on ctxsw in).
+		 * The upper part stays in the soft-counter.
+		 */
+		if (PMD_IS_COUNTING(i)) {
+			ctx->ctx_pmds[i].val = val & ~ovfl_val;
+			 val &= ovfl_val;
+		}
+		ctx->th_pmds[i] = val;
+
+		DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n",
+			i,
+			ctx->th_pmds[i],
+			ctx->ctx_pmds[i].val));
+	}
+}
+
+/*
+ * propagate PMC from context to thread-state
+ */
+static inline void
+pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx)
+{
+	unsigned long mask = ctx->ctx_all_pmcs[0];
+	int i;
+
+	DPRINT(("mask=0x%lx\n", mask));
+
+	for (i=0; mask; i++, mask>>=1) {
+		/* masking 0 with ovfl_val yields 0 */
+		ctx->th_pmcs[i] = ctx->ctx_pmcs[i];
+		DPRINT(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i]));
+	}
+}
+
+
+
+static inline void
+pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask)
+{
+	int i;
+
+	for (i=0; mask; i++, mask>>=1) {
+		if ((mask & 0x1) == 0) continue;
+		ia64_set_pmc(i, pmcs[i]);
+	}
+	ia64_srlz_d();
+}
+
+static inline int
+pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b)
+{
+	return memcmp(a, b, sizeof(pfm_uuid_t));
+}
+
+static inline int
+pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs)
+{
+	int ret = 0;
+	if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs);
+	return ret;
+}
+
+static inline int
+pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size)
+{
+	int ret = 0;
+	if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size);
+	return ret;
+}
+
+
+static inline int
+pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags,
+		     int cpu, void *arg)
+{
+	int ret = 0;
+	if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg);
+	return ret;
+}
+
+static inline int
+pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags,
+		     int cpu, void *arg)
+{
+	int ret = 0;
+	if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg);
+	return ret;
+}
+
+static inline int
+pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
+{
+	int ret = 0;
+	if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs);
+	return ret;
+}
+
+static inline int
+pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
+{
+	int ret = 0;
+	if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs);
+	return ret;
+}
+
+static pfm_buffer_fmt_t *
+__pfm_find_buffer_fmt(pfm_uuid_t uuid)
+{
+	struct list_head * pos;
+	pfm_buffer_fmt_t * entry;
+
+	list_for_each(pos, &pfm_buffer_fmt_list) {
+		entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
+		if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0)
+			return entry;
+	}
+	return NULL;
+}
+ 
+/*
+ * find a buffer format based on its uuid
+ */
+static pfm_buffer_fmt_t *
+pfm_find_buffer_fmt(pfm_uuid_t uuid)
+{
+	pfm_buffer_fmt_t * fmt;
+	spin_lock(&pfm_buffer_fmt_lock);
+	fmt = __pfm_find_buffer_fmt(uuid);
+	spin_unlock(&pfm_buffer_fmt_lock);
+	return fmt;
+}
+ 
+int
+pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt)
+{
+	int ret = 0;
+
+	/* some sanity checks */
+	if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL;
+
+	/* we need at least a handler */
+	if (fmt->fmt_handler == NULL) return -EINVAL;
+
+	/*
+	 * XXX: need check validity of fmt_arg_size
+	 */
+
+	spin_lock(&pfm_buffer_fmt_lock);
+
+	if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) {
+		printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name);
+		ret = -EBUSY;
+		goto out;
+	} 
+	list_add(&fmt->fmt_list, &pfm_buffer_fmt_list);
+	printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name);
+
+out:
+	spin_unlock(&pfm_buffer_fmt_lock);
+ 	return ret;
+}
+EXPORT_SYMBOL(pfm_register_buffer_fmt);
+
+int
+pfm_unregister_buffer_fmt(pfm_uuid_t uuid)
+{
+	pfm_buffer_fmt_t *fmt;
+	int ret = 0;
+
+	spin_lock(&pfm_buffer_fmt_lock);
+
+	fmt = __pfm_find_buffer_fmt(uuid);
+	if (!fmt) {
+		printk(KERN_ERR "perfmon: cannot unregister format, not found\n");
+		ret = -EINVAL;
+		goto out;
+	}
+	list_del_init(&fmt->fmt_list);
+	printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name);
+
+out:
+	spin_unlock(&pfm_buffer_fmt_lock);
+	return ret;
+
+}
+EXPORT_SYMBOL(pfm_unregister_buffer_fmt);
+
+extern void update_pal_halt_status(int);
+
+static int
+pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
+{
+	unsigned long flags;
+	/*
+	 * validity checks on cpu_mask have been done upstream
+	 */
+	LOCK_PFS(flags);
+
+	DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
+		pfm_sessions.pfs_sys_sessions,
+		pfm_sessions.pfs_task_sessions,
+		pfm_sessions.pfs_sys_use_dbregs,
+		is_syswide,
+		cpu));
+
+	if (is_syswide) {
+		/*
+		 * cannot mix system wide and per-task sessions
+		 */
+		if (pfm_sessions.pfs_task_sessions > 0UL) {
+			DPRINT(("system wide not possible, %u conflicting task_sessions\n",
+			  	pfm_sessions.pfs_task_sessions));
+			goto abort;
+		}
+
+		if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict;
+
+		DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id()));
+
+		pfm_sessions.pfs_sys_session[cpu] = task;
+
+		pfm_sessions.pfs_sys_sessions++ ;
+
+	} else {
+		if (pfm_sessions.pfs_sys_sessions) goto abort;
+		pfm_sessions.pfs_task_sessions++;
+	}
+
+	DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
+		pfm_sessions.pfs_sys_sessions,
+		pfm_sessions.pfs_task_sessions,
+		pfm_sessions.pfs_sys_use_dbregs,
+		is_syswide,
+		cpu));
+
+	/*
+	 * disable default_idle() to go to PAL_HALT
+	 */
+	update_pal_halt_status(0);
+
+	UNLOCK_PFS(flags);
+
+	return 0;
+
+error_conflict:
+	DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n",
+  		task_pid_nr(pfm_sessions.pfs_sys_session[cpu]),
+		cpu));
+abort:
+	UNLOCK_PFS(flags);
+
+	return -EBUSY;
+
+}
+
+static int
+pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu)
+{
+	unsigned long flags;
+	/*
+	 * validity checks on cpu_mask have been done upstream
+	 */
+	LOCK_PFS(flags);
+
+	DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
+		pfm_sessions.pfs_sys_sessions,
+		pfm_sessions.pfs_task_sessions,
+		pfm_sessions.pfs_sys_use_dbregs,
+		is_syswide,
+		cpu));
+
+
+	if (is_syswide) {
+		pfm_sessions.pfs_sys_session[cpu] = NULL;
+		/*
+		 * would not work with perfmon+more than one bit in cpu_mask
+		 */
+		if (ctx && ctx->ctx_fl_using_dbreg) {
+			if (pfm_sessions.pfs_sys_use_dbregs == 0) {
+				printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx);
+			} else {
+				pfm_sessions.pfs_sys_use_dbregs--;
+			}
+		}
+		pfm_sessions.pfs_sys_sessions--;
+	} else {
+		pfm_sessions.pfs_task_sessions--;
+	}
+	DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
+		pfm_sessions.pfs_sys_sessions,
+		pfm_sessions.pfs_task_sessions,
+		pfm_sessions.pfs_sys_use_dbregs,
+		is_syswide,
+		cpu));
+
+	/*
+	 * if possible, enable default_idle() to go into PAL_HALT
+	 */
+	if (pfm_sessions.pfs_task_sessions == 0 && pfm_sessions.pfs_sys_sessions == 0)
+		update_pal_halt_status(1);
+
+	UNLOCK_PFS(flags);
+
+	return 0;
+}
+
+/*
+ * removes virtual mapping of the sampling buffer.
+ * IMPORTANT: cannot be called with interrupts disable, e.g. inside
+ * a PROTECT_CTX() section.
+ */
+static int
+pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long size)
+{
+	int r;
+
+	/* sanity checks */
+	if (task->mm == NULL || size == 0UL || vaddr == NULL) {
+		printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task_pid_nr(task), task->mm);
+		return -EINVAL;
+	}
+
+	DPRINT(("smpl_vaddr=%p size=%lu\n", vaddr, size));
+
+	/*
+	 * does the actual unmapping
+	 */
+	down_write(&task->mm->mmap_sem);
+
+	DPRINT(("down_write done smpl_vaddr=%p size=%lu\n", vaddr, size));
+
+	r = pfm_do_munmap(task->mm, (unsigned long)vaddr, size, 0);
+
+	up_write(&task->mm->mmap_sem);
+	if (r !=0) {
+		printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task_pid_nr(task), vaddr, size);
+	}
+
+	DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r));
+
+	return 0;
+}
+
+/*
+ * free actual physical storage used by sampling buffer
+ */
+#if 0
+static int
+pfm_free_smpl_buffer(pfm_context_t *ctx)
+{
+	pfm_buffer_fmt_t *fmt;
+
+	if (ctx->ctx_smpl_hdr == NULL) goto invalid_free;
+
+	/*
+	 * we won't use the buffer format anymore
+	 */
+	fmt = ctx->ctx_buf_fmt;
+
+	DPRINT(("sampling buffer @%p size %lu vaddr=%p\n",
+		ctx->ctx_smpl_hdr,
+		ctx->ctx_smpl_size,
+		ctx->ctx_smpl_vaddr));
+
+	pfm_buf_fmt_exit(fmt, current, NULL, NULL);
+
+	/*
+	 * free the buffer
+	 */
+	pfm_rvfree(ctx->ctx_smpl_hdr, ctx->ctx_smpl_size);
+
+	ctx->ctx_smpl_hdr  = NULL;
+	ctx->ctx_smpl_size = 0UL;
+
+	return 0;
+
+invalid_free:
+	printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", task_pid_nr(current));
+	return -EINVAL;
+}
+#endif
+
+static inline void
+pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt)
+{
+	if (fmt == NULL) return;
+
+	pfm_buf_fmt_exit(fmt, current, NULL, NULL);
+
+}
+
+/*
+ * pfmfs should _never_ be mounted by userland - too much of security hassle,
+ * no real gain from having the whole whorehouse mounted. So we don't need
+ * any operations on the root directory. However, we need a non-trivial
+ * d_name - pfm: will go nicely and kill the special-casing in procfs.
+ */
+static struct vfsmount *pfmfs_mnt __read_mostly;
+
+static int __init
+init_pfm_fs(void)
+{
+	int err = register_filesystem(&pfm_fs_type);
+	if (!err) {
+		pfmfs_mnt = kern_mount(&pfm_fs_type);
+		err = PTR_ERR(pfmfs_mnt);
+		if (IS_ERR(pfmfs_mnt))
+			unregister_filesystem(&pfm_fs_type);
+		else
+			err = 0;
+	}
+	return err;
+}
+
+static ssize_t
+pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
+{
+	pfm_context_t *ctx;
+	pfm_msg_t *msg;
+	ssize_t ret;
+	unsigned long flags;
+  	DECLARE_WAITQUEUE(wait, current);
+	if (PFM_IS_FILE(filp) == 0) {
+		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current));
+		return -EINVAL;
+	}
+
+	ctx = filp->private_data;
+	if (ctx == NULL) {
+		printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", task_pid_nr(current));
+		return -EINVAL;
+	}
+
+	/*
+	 * check even when there is no message
+	 */
+	if (size < sizeof(pfm_msg_t)) {
+		DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t)));
+		return -EINVAL;
+	}
+
+	PROTECT_CTX(ctx, flags);
+
+  	/*
+	 * put ourselves on the wait queue
+	 */
+  	add_wait_queue(&ctx->ctx_msgq_wait, &wait);
+
+
+  	for(;;) {
+		/*
+		 * check wait queue
+		 */
+
+  		set_current_state(TASK_INTERRUPTIBLE);
+
+		DPRINT(("head=%d tail=%d\n", ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
+
+		ret = 0;
+		if(PFM_CTXQ_EMPTY(ctx) == 0) break;
+
+		UNPROTECT_CTX(ctx, flags);
+
+		/*
+		 * check non-blocking read
+		 */
+      		ret = -EAGAIN;
+		if(filp->f_flags & O_NONBLOCK) break;
+
+		/*
+		 * check pending signals
+		 */
+		if(signal_pending(current)) {
+			ret = -EINTR;
+			break;
+		}
+      		/*
+		 * no message, so wait
+		 */
+      		schedule();
+
+		PROTECT_CTX(ctx, flags);
+	}
+	DPRINT(("[%d] back to running ret=%ld\n", task_pid_nr(current), ret));
+  	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&ctx->ctx_msgq_wait, &wait);
+
+	if (ret < 0) goto abort;
+
+	ret = -EINVAL;
+	msg = pfm_get_next_msg(ctx);
+	if (msg == NULL) {
+		printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, task_pid_nr(current));
+		goto abort_locked;
+	}
+
+	DPRINT(("fd=%d type=%d\n", msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type));
+
+	ret = -EFAULT;
+  	if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t);
+
+abort_locked:
+	UNPROTECT_CTX(ctx, flags);
+abort:
+	return ret;
+}
+
+static ssize_t
+pfm_write(struct file *file, const char __user *ubuf,
+			  size_t size, loff_t *ppos)
+{
+	DPRINT(("pfm_write called\n"));
+	return -EINVAL;
+}
+
+static unsigned int
+pfm_poll(struct file *filp, poll_table * wait)
+{
+	pfm_context_t *ctx;
+	unsigned long flags;
+	unsigned int mask = 0;
+
+	if (PFM_IS_FILE(filp) == 0) {
+		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current));
+		return 0;
+	}
+
+	ctx = filp->private_data;
+	if (ctx == NULL) {
+		printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", task_pid_nr(current));
+		return 0;
+	}
+
+
+	DPRINT(("pfm_poll ctx_fd=%d before poll_wait\n", ctx->ctx_fd));
+
+	poll_wait(filp, &ctx->ctx_msgq_wait, wait);
+
+	PROTECT_CTX(ctx, flags);
+
+	if (PFM_CTXQ_EMPTY(ctx) == 0)
+		mask =  POLLIN | POLLRDNORM;
+
+	UNPROTECT_CTX(ctx, flags);
+
+	DPRINT(("pfm_poll ctx_fd=%d mask=0x%x\n", ctx->ctx_fd, mask));
+
+	return mask;
+}
+
+static long
+pfm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	DPRINT(("pfm_ioctl called\n"));
+	return -EINVAL;
+}
+
+/*
+ * interrupt cannot be masked when coming here
+ */
+static inline int
+pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on)
+{
+	int ret;
+
+	ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue);
+
+	DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
+		task_pid_nr(current),
+		fd,
+		on,
+		ctx->ctx_async_queue, ret));
+
+	return ret;
+}
+
+static int
+pfm_fasync(int fd, struct file *filp, int on)
+{
+	pfm_context_t *ctx;
+	int ret;
+
+	if (PFM_IS_FILE(filp) == 0) {
+		printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", task_pid_nr(current));
+		return -EBADF;
+	}
+
+	ctx = filp->private_data;
+	if (ctx == NULL) {
+		printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", task_pid_nr(current));
+		return -EBADF;
+	}
+	/*
+	 * we cannot mask interrupts during this call because this may
+	 * may go to sleep if memory is not readily avalaible.
+	 *
+	 * We are protected from the conetxt disappearing by the get_fd()/put_fd()
+	 * done in caller. Serialization of this function is ensured by caller.
+	 */
+	ret = pfm_do_fasync(fd, filp, ctx, on);
+
+
+	DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
+		fd,
+		on,
+		ctx->ctx_async_queue, ret));
+
+	return ret;
+}
+
+#ifdef CONFIG_SMP
+/*
+ * this function is exclusively called from pfm_close().
+ * The context is not protected at that time, nor are interrupts
+ * on the remote CPU. That's necessary to avoid deadlocks.
+ */
+static void
+pfm_syswide_force_stop(void *info)
+{
+	pfm_context_t   *ctx = (pfm_context_t *)info;
+	struct pt_regs *regs = task_pt_regs(current);
+	struct task_struct *owner;
+	unsigned long flags;
+	int ret;
+
+	if (ctx->ctx_cpu != smp_processor_id()) {
+		printk(KERN_ERR "perfmon: pfm_syswide_force_stop for CPU%d  but on CPU%d\n",
+			ctx->ctx_cpu,
+			smp_processor_id());
+		return;
+	}
+	owner = GET_PMU_OWNER();
+	if (owner != ctx->ctx_task) {
+		printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n",
+			smp_processor_id(),
+			task_pid_nr(owner), task_pid_nr(ctx->ctx_task));
+		return;
+	}
+	if (GET_PMU_CTX() != ctx) {
+		printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected ctx %p instead of %p\n",
+			smp_processor_id(),
+			GET_PMU_CTX(), ctx);
+		return;
+	}
+
+	DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), task_pid_nr(ctx->ctx_task)));
+	/*
+	 * the context is already protected in pfm_close(), we simply
+	 * need to mask interrupts to avoid a PMU interrupt race on
+	 * this CPU
+	 */
+	local_irq_save(flags);
+
+	ret = pfm_context_unload(ctx, NULL, 0, regs);
+	if (ret) {
+		DPRINT(("context_unload returned %d\n", ret));
+	}
+
+	/*
+	 * unmask interrupts, PMU interrupts are now spurious here
+	 */
+	local_irq_restore(flags);
+}
+
+static void
+pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx)
+{
+	int ret;
+
+	DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu));
+	ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 1);
+	DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret));
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * called for each close(). Partially free resources.
+ * When caller is self-monitoring, the context is unloaded.
+ */
+static int
+pfm_flush(struct file *filp, fl_owner_t id)
+{
+	pfm_context_t *ctx;
+	struct task_struct *task;
+	struct pt_regs *regs;
+	unsigned long flags;
+	unsigned long smpl_buf_size = 0UL;
+	void *smpl_buf_vaddr = NULL;
+	int state, is_system;
+
+	if (PFM_IS_FILE(filp) == 0) {
+		DPRINT(("bad magic for\n"));
+		return -EBADF;
+	}
+
+	ctx = filp->private_data;
+	if (ctx == NULL) {
+		printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", task_pid_nr(current));
+		return -EBADF;
+	}
+
+	/*
+	 * remove our file from the async queue, if we use this mode.
+	 * This can be done without the context being protected. We come
+	 * here when the context has become unreachable by other tasks.
+	 *
+	 * We may still have active monitoring at this point and we may
+	 * end up in pfm_overflow_handler(). However, fasync_helper()
+	 * operates with interrupts disabled and it cleans up the
+	 * queue. If the PMU handler is called prior to entering
+	 * fasync_helper() then it will send a signal. If it is
+	 * invoked after, it will find an empty queue and no
+	 * signal will be sent. In both case, we are safe
+	 */
+	PROTECT_CTX(ctx, flags);
+
+	state     = ctx->ctx_state;
+	is_system = ctx->ctx_fl_system;
+
+	task = PFM_CTX_TASK(ctx);
+	regs = task_pt_regs(task);
+
+	DPRINT(("ctx_state=%d is_current=%d\n",
+		state,
+		task == current ? 1 : 0));
+
+	/*
+	 * if state == UNLOADED, then task is NULL
+	 */
+
+	/*
+	 * we must stop and unload because we are losing access to the context.
+	 */
+	if (task == current) {
+#ifdef CONFIG_SMP
+		/*
+		 * the task IS the owner but it migrated to another CPU: that's bad
+		 * but we must handle this cleanly. Unfortunately, the kernel does
+		 * not provide a mechanism to block migration (while the context is loaded).
+		 *
+		 * We need to release the resource on the ORIGINAL cpu.
+		 */
+		if (is_system && ctx->ctx_cpu != smp_processor_id()) {
+
+			DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
+			/*
+			 * keep context protected but unmask interrupt for IPI
+			 */
+			local_irq_restore(flags);
+
+			pfm_syswide_cleanup_other_cpu(ctx);
+
+			/*
+			 * restore interrupt masking
+			 */
+			local_irq_save(flags);
+
+			/*
+			 * context is unloaded at this point
+			 */
+		} else
+#endif /* CONFIG_SMP */
+		{
+
+			DPRINT(("forcing unload\n"));
+			/*
+		 	* stop and unload, returning with state UNLOADED
+		 	* and session unreserved.
+		 	*/
+			pfm_context_unload(ctx, NULL, 0, regs);
+
+			DPRINT(("ctx_state=%d\n", ctx->ctx_state));
+		}
+	}
+
+	/*
+	 * remove virtual mapping, if any, for the calling task.
+	 * cannot reset ctx field until last user is calling close().
+	 *
+	 * ctx_smpl_vaddr must never be cleared because it is needed
+	 * by every task with access to the context
+	 *
+	 * When called from do_exit(), the mm context is gone already, therefore
+	 * mm is NULL, i.e., the VMA is already gone  and we do not have to
+	 * do anything here
+	 */
+	if (ctx->ctx_smpl_vaddr && current->mm) {
+		smpl_buf_vaddr = ctx->ctx_smpl_vaddr;
+		smpl_buf_size  = ctx->ctx_smpl_size;
+	}
+
+	UNPROTECT_CTX(ctx, flags);
+
+	/*
+	 * if there was a mapping, then we systematically remove it
+	 * at this point. Cannot be done inside critical section
+	 * because some VM function reenables interrupts.
+	 *
+	 */
+	if (smpl_buf_vaddr) pfm_remove_smpl_mapping(current, smpl_buf_vaddr, smpl_buf_size);
+
+	return 0;
+}
+/*
+ * called either on explicit close() or from exit_files(). 
+ * Only the LAST user of the file gets to this point, i.e., it is
+ * called only ONCE.
+ *
+ * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero 
+ * (fput()),i.e, last task to access the file. Nobody else can access the 
+ * file at this point.
+ *
+ * When called from exit_files(), the VMA has been freed because exit_mm()
+ * is executed before exit_files().
+ *
+ * When called from exit_files(), the current task is not yet ZOMBIE but we
+ * flush the PMU state to the context. 
+ */
+static int
+pfm_close(struct inode *inode, struct file *filp)
+{
+	pfm_context_t *ctx;
+	struct task_struct *task;
+	struct pt_regs *regs;
+  	DECLARE_WAITQUEUE(wait, current);
+	unsigned long flags;
+	unsigned long smpl_buf_size = 0UL;
+	void *smpl_buf_addr = NULL;
+	int free_possible = 1;
+	int state, is_system;
+
+	DPRINT(("pfm_close called private=%p\n", filp->private_data));
+
+	if (PFM_IS_FILE(filp) == 0) {
+		DPRINT(("bad magic\n"));
+		return -EBADF;
+	}
+	
+	ctx = filp->private_data;
+	if (ctx == NULL) {
+		printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", task_pid_nr(current));
+		return -EBADF;
+	}
+
+	PROTECT_CTX(ctx, flags);
+
+	state     = ctx->ctx_state;
+	is_system = ctx->ctx_fl_system;
+
+	task = PFM_CTX_TASK(ctx);
+	regs = task_pt_regs(task);
+
+	DPRINT(("ctx_state=%d is_current=%d\n", 
+		state,
+		task == current ? 1 : 0));
+
+	/*
+	 * if task == current, then pfm_flush() unloaded the context
+	 */
+	if (state == PFM_CTX_UNLOADED) goto doit;
+
+	/*
+	 * context is loaded/masked and task != current, we need to
+	 * either force an unload or go zombie
+	 */
+
+	/*
+	 * The task is currently blocked or will block after an overflow.
+	 * we must force it to wakeup to get out of the
+	 * MASKED state and transition to the unloaded state by itself.
+	 *
+	 * This situation is only possible for per-task mode
+	 */
+	if (state == PFM_CTX_MASKED && CTX_OVFL_NOBLOCK(ctx) == 0) {
+
+		/*
+		 * set a "partial" zombie state to be checked
+		 * upon return from down() in pfm_handle_work().
+		 *
+		 * We cannot use the ZOMBIE state, because it is checked
+		 * by pfm_load_regs() which is called upon wakeup from down().
+		 * In such case, it would free the context and then we would
+		 * return to pfm_handle_work() which would access the
+		 * stale context. Instead, we set a flag invisible to pfm_load_regs()
+		 * but visible to pfm_handle_work().
+		 *
+		 * For some window of time, we have a zombie context with
+		 * ctx_state = MASKED  and not ZOMBIE
+		 */
+		ctx->ctx_fl_going_zombie = 1;
+
+		/*
+		 * force task to wake up from MASKED state
+		 */
+		complete(&ctx->ctx_restart_done);
+
+		DPRINT(("waking up ctx_state=%d\n", state));
+
+		/*
+		 * put ourself to sleep waiting for the other
+		 * task to report completion
+		 *
+		 * the context is protected by mutex, therefore there
+		 * is no risk of being notified of completion before
+		 * begin actually on the waitq.
+		 */
+  		set_current_state(TASK_INTERRUPTIBLE);
+  		add_wait_queue(&ctx->ctx_zombieq, &wait);
+
+		UNPROTECT_CTX(ctx, flags);
+
+		/*
+		 * XXX: check for signals :
+		 * 	- ok for explicit close
+		 * 	- not ok when coming from exit_files()
+		 */
+      		schedule();
+
+
+		PROTECT_CTX(ctx, flags);
+
+
+		remove_wait_queue(&ctx->ctx_zombieq, &wait);
+  		set_current_state(TASK_RUNNING);
+
+		/*
+		 * context is unloaded at this point
+		 */
+		DPRINT(("after zombie wakeup ctx_state=%d for\n", state));
+	}
+	else if (task != current) {
+#ifdef CONFIG_SMP
+		/*
+	 	 * switch context to zombie state
+	 	 */
+		ctx->ctx_state = PFM_CTX_ZOMBIE;
+
+		DPRINT(("zombie ctx for [%d]\n", task_pid_nr(task)));
+		/*
+		 * cannot free the context on the spot. deferred until
+		 * the task notices the ZOMBIE state
+		 */
+		free_possible = 0;
+#else
+		pfm_context_unload(ctx, NULL, 0, regs);
+#endif
+	}
+
+doit:
+	/* reload state, may have changed during  opening of critical section */
+	state = ctx->ctx_state;
+
+	/*
+	 * the context is still attached to a task (possibly current)
+	 * we cannot destroy it right now
+	 */
+
+	/*
+	 * we must free the sampling buffer right here because
+	 * we cannot rely on it being cleaned up later by the
+	 * monitored task. It is not possible to free vmalloc'ed
+	 * memory in pfm_load_regs(). Instead, we remove the buffer
+	 * now. should there be subsequent PMU overflow originally
+	 * meant for sampling, the will be converted to spurious
+	 * and that's fine because the monitoring tools is gone anyway.
+	 */
+	if (ctx->ctx_smpl_hdr) {
+		smpl_buf_addr = ctx->ctx_smpl_hdr;
+		smpl_buf_size = ctx->ctx_smpl_size;
+		/* no more sampling */
+		ctx->ctx_smpl_hdr = NULL;
+		ctx->ctx_fl_is_sampling = 0;
+	}
+
+	DPRINT(("ctx_state=%d free_possible=%d addr=%p size=%lu\n",
+		state,
+		free_possible,
+		smpl_buf_addr,
+		smpl_buf_size));
+
+	if (smpl_buf_addr) pfm_exit_smpl_buffer(ctx->ctx_buf_fmt);
+
+	/*
+	 * UNLOADED that the session has already been unreserved.
+	 */
+	if (state == PFM_CTX_ZOMBIE) {
+		pfm_unreserve_session(ctx, ctx->ctx_fl_system , ctx->ctx_cpu);
+	}
+
+	/*
+	 * disconnect file descriptor from context must be done
+	 * before we unlock.
+	 */
+	filp->private_data = NULL;
+
+	/*
+	 * if we free on the spot, the context is now completely unreachable
+	 * from the callers side. The monitored task side is also cut, so we
+	 * can freely cut.
+	 *
+	 * If we have a deferred free, only the caller side is disconnected.
+	 */
+	UNPROTECT_CTX(ctx, flags);
+
+	/*
+	 * All memory free operations (especially for vmalloc'ed memory)
+	 * MUST be done with interrupts ENABLED.
+	 */
+	if (smpl_buf_addr)  pfm_rvfree(smpl_buf_addr, smpl_buf_size);
+
+	/*
+	 * return the memory used by the context
+	 */
+	if (free_possible) pfm_context_free(ctx);
+
+	return 0;
+}
+
+static int
+pfm_no_open(struct inode *irrelevant, struct file *dontcare)
+{
+	DPRINT(("pfm_no_open called\n"));
+	return -ENXIO;
+}
+
+
+
+static const struct file_operations pfm_file_ops = {
+	.llseek		= no_llseek,
+	.read		= pfm_read,
+	.write		= pfm_write,
+	.poll		= pfm_poll,
+	.unlocked_ioctl = pfm_ioctl,
+	.open		= pfm_no_open,	/* special open code to disallow open via /proc */
+	.fasync		= pfm_fasync,
+	.release	= pfm_close,
+	.flush		= pfm_flush
+};
+
+static int
+pfmfs_delete_dentry(const struct dentry *dentry)
+{
+	return 1;
+}
+
+static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+	return dynamic_dname(dentry, buffer, buflen, "pfm:[%lu]",
+			     dentry->d_inode->i_ino);
+}
+
+static const struct dentry_operations pfmfs_dentry_operations = {
+	.d_delete = pfmfs_delete_dentry,
+	.d_dname = pfmfs_dname,
+};
+
+
+static struct file *
+pfm_alloc_file(pfm_context_t *ctx)
+{
+	struct file *file;
+	struct inode *inode;
+	struct path path;
+	struct qstr this = { .name = "" };
+
+	/*
+	 * allocate a new inode
+	 */
+	inode = new_inode(pfmfs_mnt->mnt_sb);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode));
+
+	inode->i_mode = S_IFCHR|S_IRUGO;
+	inode->i_uid  = current_fsuid();
+	inode->i_gid  = current_fsgid();
+
+	/*
+	 * allocate a new dcache entry
+	 */
+	path.dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
+	if (!path.dentry) {
+		iput(inode);
+		return ERR_PTR(-ENOMEM);
+	}
+	path.mnt = mntget(pfmfs_mnt);
+
+	d_add(path.dentry, inode);
+
+	file = alloc_file(&path, FMODE_READ, &pfm_file_ops);
+	if (!file) {
+		path_put(&path);
+		return ERR_PTR(-ENFILE);
+	}
+
+	file->f_flags = O_RDONLY;
+	file->private_data = ctx;
+
+	return file;
+}
+
+static int
+pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size)
+{
+	DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size));
+
+	while (size > 0) {
+		unsigned long pfn = ia64_tpa(buf) >> PAGE_SHIFT;
+
+
+		if (remap_pfn_range(vma, addr, pfn, PAGE_SIZE, PAGE_READONLY))
+			return -ENOMEM;
+
+		addr  += PAGE_SIZE;
+		buf   += PAGE_SIZE;
+		size  -= PAGE_SIZE;
+	}
+	return 0;
+}
+
+/*
+ * allocate a sampling buffer and remaps it into the user address space of the task
+ */
+static int
+pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr)
+{
+	struct mm_struct *mm = task->mm;
+	struct vm_area_struct *vma = NULL;
+	unsigned long size;
+	void *smpl_buf;
+
+
+	/*
+	 * the fixed header + requested size and align to page boundary
+	 */
+	size = PAGE_ALIGN(rsize);
+
+	DPRINT(("sampling buffer rsize=%lu size=%lu bytes\n", rsize, size));
+
+	/*
+	 * check requested size to avoid Denial-of-service attacks
+	 * XXX: may have to refine this test
+	 * Check against address space limit.
+	 *
+	 * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur)
+	 * 	return -ENOMEM;
+	 */
+	if (size > task_rlimit(task, RLIMIT_MEMLOCK))
+		return -ENOMEM;
+
+	/*
+	 * We do the easy to undo allocations first.
+ 	 *
+	 * pfm_rvmalloc(), clears the buffer, so there is no leak
+	 */
+	smpl_buf = pfm_rvmalloc(size);
+	if (smpl_buf == NULL) {
+		DPRINT(("Can't allocate sampling buffer\n"));
+		return -ENOMEM;
+	}
+
+	DPRINT(("smpl_buf @%p\n", smpl_buf));
+
+	/* allocate vma */
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	if (!vma) {
+		DPRINT(("Cannot allocate vma\n"));
+		goto error_kmem;
+	}
+	INIT_LIST_HEAD(&vma->anon_vma_chain);
+
+	/*
+	 * partially initialize the vma for the sampling buffer
+	 */
+	vma->vm_mm	     = mm;
+	vma->vm_file	     = filp;
+	vma->vm_flags	     = VM_READ| VM_MAYREAD |VM_RESERVED;
+	vma->vm_page_prot    = PAGE_READONLY; /* XXX may need to change */
+
+	/*
+	 * Now we have everything we need and we can initialize
+	 * and connect all the data structures
+	 */
+
+	ctx->ctx_smpl_hdr   = smpl_buf;
+	ctx->ctx_smpl_size  = size; /* aligned size */
+
+	/*
+	 * Let's do the difficult operations next.
+	 *
+	 * now we atomically find some area in the address space and
+	 * remap the buffer in it.
+	 */
+	down_write(&task->mm->mmap_sem);
+
+	/* find some free area in address space, must have mmap sem held */
+	vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0);
+	if (vma->vm_start == 0UL) {
+		DPRINT(("Cannot find unmapped area for size %ld\n", size));
+		up_write(&task->mm->mmap_sem);
+		goto error;
+	}
+	vma->vm_end = vma->vm_start + size;
+	vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
+
+	DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start));
+
+	/* can only be applied to current task, need to have the mm semaphore held when called */
+	if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) {
+		DPRINT(("Can't remap buffer\n"));
+		up_write(&task->mm->mmap_sem);
+		goto error;
+	}
+
+	get_file(filp);
+
+	/*
+	 * now insert the vma in the vm list for the process, must be
+	 * done with mmap lock held
+	 */
+	insert_vm_struct(mm, vma);
+
+	mm->total_vm  += size >> PAGE_SHIFT;
+	vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
+							vma_pages(vma));
+	up_write(&task->mm->mmap_sem);
+
+	/*
+	 * keep track of user level virtual address
+	 */
+	ctx->ctx_smpl_vaddr = (void *)vma->vm_start;
+	*(unsigned long *)user_vaddr = vma->vm_start;
+
+	return 0;
+
+error:
+	kmem_cache_free(vm_area_cachep, vma);
+error_kmem:
+	pfm_rvfree(smpl_buf, size);
+
+	return -ENOMEM;
+}
+
+/*
+ * XXX: do something better here
+ */
+static int
+pfm_bad_permissions(struct task_struct *task)
+{
+	const struct cred *tcred;
+	uid_t uid = current_uid();
+	gid_t gid = current_gid();
+	int ret;
+
+	rcu_read_lock();
+	tcred = __task_cred(task);
+
+	/* inspired by ptrace_attach() */
+	DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n",
+		uid,
+		gid,
+		tcred->euid,
+		tcred->suid,
+		tcred->uid,
+		tcred->egid,
+		tcred->sgid));
+
+	ret = ((uid != tcred->euid)
+	       || (uid != tcred->suid)
+	       || (uid != tcred->uid)
+	       || (gid != tcred->egid)
+	       || (gid != tcred->sgid)
+	       || (gid != tcred->gid)) && !capable(CAP_SYS_PTRACE);
+
+	rcu_read_unlock();
+	return ret;
+}
+
+static int
+pfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx)
+{
+	int ctx_flags;
+
+	/* valid signal */
+
+	ctx_flags = pfx->ctx_flags;
+
+	if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
+
+		/*
+		 * cannot block in this mode
+		 */
+		if (ctx_flags & PFM_FL_NOTIFY_BLOCK) {
+			DPRINT(("cannot use blocking mode when in system wide monitoring\n"));
+			return -EINVAL;
+		}
+	} else {
+	}
+	/* probably more to add here */
+
+	return 0;
+}
+
+static int
+pfm_setup_buffer_fmt(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned int ctx_flags,
+		     unsigned int cpu, pfarg_context_t *arg)
+{
+	pfm_buffer_fmt_t *fmt = NULL;
+	unsigned long size = 0UL;
+	void *uaddr = NULL;
+	void *fmt_arg = NULL;
+	int ret = 0;
+#define PFM_CTXARG_BUF_ARG(a)	(pfm_buffer_fmt_t *)(a+1)
+
+	/* invoke and lock buffer format, if found */
+	fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id);
+	if (fmt == NULL) {
+		DPRINT(("[%d] cannot find buffer format\n", task_pid_nr(task)));
+		return -EINVAL;
+	}
+
+	/*
+	 * buffer argument MUST be contiguous to pfarg_context_t
+	 */
+	if (fmt->fmt_arg_size) fmt_arg = PFM_CTXARG_BUF_ARG(arg);
+
+	ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg);
+
+	DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task_pid_nr(task), ctx_flags, cpu, fmt_arg, ret));
+
+	if (ret) goto error;
+
+	/* link buffer format and context */
+	ctx->ctx_buf_fmt = fmt;
+	ctx->ctx_fl_is_sampling = 1; /* assume record() is defined */
+
+	/*
+	 * check if buffer format wants to use perfmon buffer allocation/mapping service
+	 */
+	ret = pfm_buf_fmt_getsize(fmt, task, ctx_flags, cpu, fmt_arg, &size);
+	if (ret) goto error;
+
+	if (size) {
+		/*
+		 * buffer is always remapped into the caller's address space
+		 */
+		ret = pfm_smpl_buffer_alloc(current, filp, ctx, size, &uaddr);
+		if (ret) goto error;
+
+		/* keep track of user address of buffer */
+		arg->ctx_smpl_vaddr = uaddr;
+	}
+	ret = pfm_buf_fmt_init(fmt, task, ctx->ctx_smpl_hdr, ctx_flags, cpu, fmt_arg);
+
+error:
+	return ret;
+}
+
+static void
+pfm_reset_pmu_state(pfm_context_t *ctx)
+{
+	int i;
+
+	/*
+	 * install reset values for PMC.
+	 */
+	for (i=1; PMC_IS_LAST(i) == 0; i++) {
+		if (PMC_IS_IMPL(i) == 0) continue;
+		ctx->ctx_pmcs[i] = PMC_DFL_VAL(i);
+		DPRINT(("pmc[%d]=0x%lx\n", i, ctx->ctx_pmcs[i]));
+	}
+	/*
+	 * PMD registers are set to 0UL when the context in memset()
+	 */
+
+	/*
+	 * On context switched restore, we must restore ALL pmc and ALL pmd even
+	 * when they are not actively used by the task. In UP, the incoming process
+	 * may otherwise pick up left over PMC, PMD state from the previous process.
+	 * As opposed to PMD, stale PMC can cause harm to the incoming
+	 * process because they may change what is being measured.
+	 * Therefore, we must systematically reinstall the entire
+	 * PMC state. In SMP, the same thing is possible on the
+	 * same CPU but also on between 2 CPUs.
+	 *
+	 * The problem with PMD is information leaking especially
+	 * to user level when psr.sp=0
+	 *
+	 * There is unfortunately no easy way to avoid this problem
+	 * on either UP or SMP. This definitively slows down the
+	 * pfm_load_regs() function.
+	 */
+
+	 /*
+	  * bitmask of all PMCs accessible to this context
+	  *
+	  * PMC0 is treated differently.
+	  */
+	ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1;
+
+	/*
+	 * bitmask of all PMDs that are accessible to this context
+	 */
+	ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0];
+
+	DPRINT(("<%d> all_pmcs=0x%lx all_pmds=0x%lx\n", ctx->ctx_fd, ctx->ctx_all_pmcs[0],ctx->ctx_all_pmds[0]));
+
+	/*
+	 * useful in case of re-enable after disable
+	 */
+	ctx->ctx_used_ibrs[0] = 0UL;
+	ctx->ctx_used_dbrs[0] = 0UL;
+}
+
+static int
+pfm_ctx_getsize(void *arg, size_t *sz)
+{
+	pfarg_context_t *req = (pfarg_context_t *)arg;
+	pfm_buffer_fmt_t *fmt;
+
+	*sz = 0;
+
+	if (!pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) return 0;
+
+	fmt = pfm_find_buffer_fmt(req->ctx_smpl_buf_id);
+	if (fmt == NULL) {
+		DPRINT(("cannot find buffer format\n"));
+		return -EINVAL;
+	}
+	/* get just enough to copy in user parameters */
+	*sz = fmt->fmt_arg_size;
+	DPRINT(("arg_size=%lu\n", *sz));
+
+	return 0;
+}
+
+
+
+/*
+ * cannot attach if :
+ * 	- kernel task
+ * 	- task not owned by caller
+ * 	- task incompatible with context mode
+ */
+static int
+pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
+{
+	/*
+	 * no kernel task or task not owner by caller
+	 */
+	if (task->mm == NULL) {
+		DPRINT(("task [%d] has not memory context (kernel thread)\n", task_pid_nr(task)));
+		return -EPERM;
+	}
+	if (pfm_bad_permissions(task)) {
+		DPRINT(("no permission to attach to  [%d]\n", task_pid_nr(task)));
+		return -EPERM;
+	}
+	/*
+	 * cannot block in self-monitoring mode
+	 */
+	if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) {
+		DPRINT(("cannot load a blocking context on self for [%d]\n", task_pid_nr(task)));
+		return -EINVAL;
+	}
+
+	if (task->exit_state == EXIT_ZOMBIE) {
+		DPRINT(("cannot attach to  zombie task [%d]\n", task_pid_nr(task)));
+		return -EBUSY;
+	}
+
+	/*
+	 * always ok for self
+	 */
+	if (task == current) return 0;
+
+	if (!task_is_stopped_or_traced(task)) {
+		DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task_pid_nr(task), task->state));
+		return -EBUSY;
+	}
+	/*
+	 * make sure the task is off any CPU
+	 */
+	wait_task_inactive(task, 0);
+
+	/* more to come... */
+
+	return 0;
+}
+
+static int
+pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task)
+{
+	struct task_struct *p = current;
+	int ret;
+
+	/* XXX: need to add more checks here */
+	if (pid < 2) return -EPERM;
+
+	if (pid != task_pid_vnr(current)) {
+
+		read_lock(&tasklist_lock);
+
+		p = find_task_by_vpid(pid);
+
+		/* make sure task cannot go away while we operate on it */
+		if (p) get_task_struct(p);
+
+		read_unlock(&tasklist_lock);
+
+		if (p == NULL) return -ESRCH;
+	}
+
+	ret = pfm_task_incompatible(ctx, p);
+	if (ret == 0) {
+		*task = p;
+	} else if (p != current) {
+		pfm_put_task(p);
+	}
+	return ret;
+}
+
+
+
+static int
+pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+	pfarg_context_t *req = (pfarg_context_t *)arg;
+	struct file *filp;
+	struct path path;
+	int ctx_flags;
+	int fd;
+	int ret;
+
+	/* let's check the arguments first */
+	ret = pfarg_is_sane(current, req);
+	if (ret < 0)
+		return ret;
+
+	ctx_flags = req->ctx_flags;
+
+	ret = -ENOMEM;
+
+	fd = get_unused_fd();
+	if (fd < 0)
+		return fd;
+
+	ctx = pfm_context_alloc(ctx_flags);
+	if (!ctx)
+		goto error;
+
+	filp = pfm_alloc_file(ctx);
+	if (IS_ERR(filp)) {
+		ret = PTR_ERR(filp);
+		goto error_file;
+	}
+
+	req->ctx_fd = ctx->ctx_fd = fd;
+
+	/*
+	 * does the user want to sample?
+	 */
+	if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) {
+		ret = pfm_setup_buffer_fmt(current, filp, ctx, ctx_flags, 0, req);
+		if (ret)
+			goto buffer_error;
+	}
+
+	DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d\n",
+		ctx,
+		ctx_flags,
+		ctx->ctx_fl_system,
+		ctx->ctx_fl_block,
+		ctx->ctx_fl_excl_idle,
+		ctx->ctx_fl_no_msg,
+		ctx->ctx_fd));
+
+	/*
+	 * initialize soft PMU state
+	 */
+	pfm_reset_pmu_state(ctx);
+
+	fd_install(fd, filp);
+
+	return 0;
+
+buffer_error:
+	path = filp->f_path;
+	put_filp(filp);
+	path_put(&path);
+
+	if (ctx->ctx_buf_fmt) {
+		pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs);
+	}
+error_file:
+	pfm_context_free(ctx);
+
+error:
+	put_unused_fd(fd);
+	return ret;
+}
+
+static inline unsigned long
+pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset)
+{
+	unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset;
+	unsigned long new_seed, old_seed = reg->seed, mask = reg->mask;
+	extern unsigned long carta_random32 (unsigned long seed);
+
+	if (reg->flags & PFM_REGFL_RANDOM) {
+		new_seed = carta_random32(old_seed);
+		val -= (old_seed & mask);	/* counter values are negative numbers! */
+		if ((mask >> 32) != 0)
+			/* construct a full 64-bit random value: */
+			new_seed |= carta_random32(old_seed >> 32) << 32;
+		reg->seed = new_seed;
+	}
+	reg->lval = val;
+	return val;
+}
+
+static void
+pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset)
+{
+	unsigned long mask = ovfl_regs[0];
+	unsigned long reset_others = 0UL;
+	unsigned long val;
+	int i;
+
+	/*
+	 * now restore reset value on sampling overflowed counters
+	 */
+	mask >>= PMU_FIRST_COUNTER;
+	for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {
+
+		if ((mask & 0x1UL) == 0UL) continue;
+
+		ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset);
+		reset_others        |= ctx->ctx_pmds[i].reset_pmds[0];
+
+		DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val));
+	}
+
+	/*
+	 * Now take care of resetting the other registers
+	 */
+	for(i = 0; reset_others; i++, reset_others >>= 1) {
+
+		if ((reset_others & 0x1) == 0) continue;
+
+		ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset);
+
+		DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n",
+			  is_long_reset ? "long" : "short", i, val));
+	}
+}
+
+static void
+pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset)
+{
+	unsigned long mask = ovfl_regs[0];
+	unsigned long reset_others = 0UL;
+	unsigned long val;
+	int i;
+
+	DPRINT_ovfl(("ovfl_regs=0x%lx is_long_reset=%d\n", ovfl_regs[0], is_long_reset));
+
+	if (ctx->ctx_state == PFM_CTX_MASKED) {
+		pfm_reset_regs_masked(ctx, ovfl_regs, is_long_reset);
+		return;
+	}
+
+	/*
+	 * now restore reset value on sampling overflowed counters
+	 */
+	mask >>= PMU_FIRST_COUNTER;
+	for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {
+
+		if ((mask & 0x1UL) == 0UL) continue;
+
+		val           = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset);
+		reset_others |= ctx->ctx_pmds[i].reset_pmds[0];
+
+		DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val));
+
+		pfm_write_soft_counter(ctx, i, val);
+	}
+
+	/*
+	 * Now take care of resetting the other registers
+	 */
+	for(i = 0; reset_others; i++, reset_others >>= 1) {
+
+		if ((reset_others & 0x1) == 0) continue;
+
+		val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset);
+
+		if (PMD_IS_COUNTING(i)) {
+			pfm_write_soft_counter(ctx, i, val);
+		} else {
+			ia64_set_pmd(i, val);
+		}
+		DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n",
+			  is_long_reset ? "long" : "short", i, val));
+	}
+	ia64_srlz_d();
+}
+
+static int
+pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+	struct task_struct *task;
+	pfarg_reg_t *req = (pfarg_reg_t *)arg;
+	unsigned long value, pmc_pm;
+	unsigned long smpl_pmds, reset_pmds, impl_pmds;
+	unsigned int cnum, reg_flags, flags, pmc_type;
+	int i, can_access_pmu = 0, is_loaded, is_system, expert_mode;
+	int is_monitor, is_counting, state;
+	int ret = -EINVAL;
+	pfm_reg_check_t	wr_func;
+#define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z))
+
+	state     = ctx->ctx_state;
+	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
+	is_system = ctx->ctx_fl_system;
+	task      = ctx->ctx_task;
+	impl_pmds = pmu_conf->impl_pmds[0];
+
+	if (state == PFM_CTX_ZOMBIE) return -EINVAL;
+
+	if (is_loaded) {
+		/*
+		 * In system wide and when the context is loaded, access can only happen
+		 * when the caller is running on the CPU being monitored by the session.
+		 * It does not have to be the owner (ctx_task) of the context per se.
+		 */
+		if (is_system && ctx->ctx_cpu != smp_processor_id()) {
+			DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
+			return -EBUSY;
+		}
+		can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
+	}
+	expert_mode = pfm_sysctl.expert_mode; 
+
+	for (i = 0; i < count; i++, req++) {
+
+		cnum       = req->reg_num;
+		reg_flags  = req->reg_flags;
+		value      = req->reg_value;
+		smpl_pmds  = req->reg_smpl_pmds[0];
+		reset_pmds = req->reg_reset_pmds[0];
+		flags      = 0;
+
+
+		if (cnum >= PMU_MAX_PMCS) {
+			DPRINT(("pmc%u is invalid\n", cnum));
+			goto error;
+		}
+
+		pmc_type   = pmu_conf->pmc_desc[cnum].type;
+		pmc_pm     = (value >> pmu_conf->pmc_desc[cnum].pm_pos) & 0x1;
+		is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING ? 1 : 0;
+		is_monitor  = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 1 : 0;
+
+		/*
+		 * we reject all non implemented PMC as well
+		 * as attempts to modify PMC[0-3] which are used
+		 * as status registers by the PMU
+		 */
+		if ((pmc_type & PFM_REG_IMPL) == 0 || (pmc_type & PFM_REG_CONTROL) == PFM_REG_CONTROL) {
+			DPRINT(("pmc%u is unimplemented or no-access pmc_type=%x\n", cnum, pmc_type));
+			goto error;
+		}
+		wr_func = pmu_conf->pmc_desc[cnum].write_check;
+		/*
+		 * If the PMC is a monitor, then if the value is not the default:
+		 * 	- system-wide session: PMCx.pm=1 (privileged monitor)
+		 * 	- per-task           : PMCx.pm=0 (user monitor)
+		 */
+		if (is_monitor && value != PMC_DFL_VAL(cnum) && is_system ^ pmc_pm) {
+			DPRINT(("pmc%u pmc_pm=%lu is_system=%d\n",
+				cnum,
+				pmc_pm,
+				is_system));
+			goto error;
+		}
+
+		if (is_counting) {
+			/*
+		 	 * enforce generation of overflow interrupt. Necessary on all
+		 	 * CPUs.
+		 	 */
+			value |= 1 << PMU_PMC_OI;
+
+			if (reg_flags & PFM_REGFL_OVFL_NOTIFY) {
+				flags |= PFM_REGFL_OVFL_NOTIFY;
+			}
+
+			if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM;
+
+			/* verify validity of smpl_pmds */
+			if ((smpl_pmds & impl_pmds) != smpl_pmds) {
+				DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum));
+				goto error;
+			}
+
+			/* verify validity of reset_pmds */
+			if ((reset_pmds & impl_pmds) != reset_pmds) {
+				DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum));
+				goto error;
+			}
+		} else {
+			if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) {
+				DPRINT(("cannot set ovfl_notify or random on pmc%u\n", cnum));
+				goto error;
+			}
+			/* eventid on non-counting monitors are ignored */
+		}
+
+		/*
+		 * execute write checker, if any
+		 */
+		if (likely(expert_mode == 0 && wr_func)) {
+			ret = (*wr_func)(task, ctx, cnum, &value, regs);
+			if (ret) goto error;
+			ret = -EINVAL;
+		}
+
+		/*
+		 * no error on this register
+		 */
+		PFM_REG_RETFLAG_SET(req->reg_flags, 0);
+
+		/*
+		 * Now we commit the changes to the software state
+		 */
+
+		/*
+		 * update overflow information
+		 */
+		if (is_counting) {
+			/*
+		 	 * full flag update each time a register is programmed
+		 	 */
+			ctx->ctx_pmds[cnum].flags = flags;
+
+			ctx->ctx_pmds[cnum].reset_pmds[0] = reset_pmds;
+			ctx->ctx_pmds[cnum].smpl_pmds[0]  = smpl_pmds;
+			ctx->ctx_pmds[cnum].eventid       = req->reg_smpl_eventid;
+
+			/*
+			 * Mark all PMDS to be accessed as used.
+			 *
+			 * We do not keep track of PMC because we have to
+			 * systematically restore ALL of them.
+			 *
+			 * We do not update the used_monitors mask, because
+			 * if we have not programmed them, then will be in
+			 * a quiescent state, therefore we will not need to
+			 * mask/restore then when context is MASKED.
+			 */
+			CTX_USED_PMD(ctx, reset_pmds);
+			CTX_USED_PMD(ctx, smpl_pmds);
+			/*
+		 	 * make sure we do not try to reset on
+		 	 * restart because we have established new values
+		 	 */
+			if (state == PFM_CTX_MASKED) ctx->ctx_ovfl_regs[0] &= ~1UL << cnum;
+		}
+		/*
+		 * Needed in case the user does not initialize the equivalent
+		 * PMD. Clearing is done indirectly via pfm_reset_pmu_state() so there is no
+		 * possible leak here.
+		 */
+		CTX_USED_PMD(ctx, pmu_conf->pmc_desc[cnum].dep_pmd[0]);
+
+		/*
+		 * keep track of the monitor PMC that we are using.
+		 * we save the value of the pmc in ctx_pmcs[] and if
+		 * the monitoring is not stopped for the context we also
+		 * place it in the saved state area so that it will be
+		 * picked up later by the context switch code.
+		 *
+		 * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs().
+		 *
+		 * The value in th_pmcs[] may be modified on overflow, i.e.,  when
+		 * monitoring needs to be stopped.
+		 */
+		if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum);
+
+		/*
+		 * update context state
+		 */
+		ctx->ctx_pmcs[cnum] = value;
+
+		if (is_loaded) {
+			/*
+			 * write thread state
+			 */
+			if (is_system == 0) ctx->th_pmcs[cnum] = value;
+
+			/*
+			 * write hardware register if we can
+			 */
+			if (can_access_pmu) {
+				ia64_set_pmc(cnum, value);
+			}
+#ifdef CONFIG_SMP
+			else {
+				/*
+				 * per-task SMP only here
+				 *
+			 	 * we are guaranteed that the task is not running on the other CPU,
+			 	 * we indicate that this PMD will need to be reloaded if the task
+			 	 * is rescheduled on the CPU it ran last on.
+			 	 */
+				ctx->ctx_reload_pmcs[0] |= 1UL << cnum;
+			}
+#endif
+		}
+
+		DPRINT(("pmc[%u]=0x%lx ld=%d apmu=%d flags=0x%x all_pmcs=0x%lx used_pmds=0x%lx eventid=%ld smpl_pmds=0x%lx reset_pmds=0x%lx reloads_pmcs=0x%lx used_monitors=0x%lx ovfl_regs=0x%lx\n",
+			  cnum,
+			  value,
+			  is_loaded,
+			  can_access_pmu,
+			  flags,
+			  ctx->ctx_all_pmcs[0],
+			  ctx->ctx_used_pmds[0],
+			  ctx->ctx_pmds[cnum].eventid,
+			  smpl_pmds,
+			  reset_pmds,
+			  ctx->ctx_reload_pmcs[0],
+			  ctx->ctx_used_monitors[0],
+			  ctx->ctx_ovfl_regs[0]));
+	}
+
+	/*
+	 * make sure the changes are visible
+	 */
+	if (can_access_pmu) ia64_srlz_d();
+
+	return 0;
+error:
+	PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
+	return ret;
+}
+
+static int
+pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+	struct task_struct *task;
+	pfarg_reg_t *req = (pfarg_reg_t *)arg;
+	unsigned long value, hw_value, ovfl_mask;
+	unsigned int cnum;
+	int i, can_access_pmu = 0, state;
+	int is_counting, is_loaded, is_system, expert_mode;
+	int ret = -EINVAL;
+	pfm_reg_check_t wr_func;
+
+
+	state     = ctx->ctx_state;
+	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
+	is_system = ctx->ctx_fl_system;
+	ovfl_mask = pmu_conf->ovfl_val;
+	task      = ctx->ctx_task;
+
+	if (unlikely(state == PFM_CTX_ZOMBIE)) return -EINVAL;
+
+	/*
+	 * on both UP and SMP, we can only write to the PMC when the task is
+	 * the owner of the local PMU.
+	 */
+	if (likely(is_loaded)) {
+		/*
+		 * In system wide and when the context is loaded, access can only happen
+		 * when the caller is running on the CPU being monitored by the session.
+		 * It does not have to be the owner (ctx_task) of the context per se.
+		 */
+		if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
+			DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
+			return -EBUSY;
+		}
+		can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
+	}
+	expert_mode = pfm_sysctl.expert_mode; 
+
+	for (i = 0; i < count; i++, req++) {
+
+		cnum  = req->reg_num;
+		value = req->reg_value;
+
+		if (!PMD_IS_IMPL(cnum)) {
+			DPRINT(("pmd[%u] is unimplemented or invalid\n", cnum));
+			goto abort_mission;
+		}
+		is_counting = PMD_IS_COUNTING(cnum);
+		wr_func     = pmu_conf->pmd_desc[cnum].write_check;
+
+		/*
+		 * execute write checker, if any
+		 */
+		if (unlikely(expert_mode == 0 && wr_func)) {
+			unsigned long v = value;
+
+			ret = (*wr_func)(task, ctx, cnum, &v, regs);
+			if (ret) goto abort_mission;
+
+			value = v;
+			ret   = -EINVAL;
+		}
+
+		/*
+		 * no error on this register
+		 */
+		PFM_REG_RETFLAG_SET(req->reg_flags, 0);
+
+		/*
+		 * now commit changes to software state
+		 */
+		hw_value = value;
+
+		/*
+		 * update virtualized (64bits) counter
+		 */
+		if (is_counting) {
+			/*
+			 * write context state
+			 */
+			ctx->ctx_pmds[cnum].lval = value;
+
+			/*
+			 * when context is load we use the split value
+			 */
+			if (is_loaded) {
+				hw_value = value &  ovfl_mask;
+				value    = value & ~ovfl_mask;
+			}
+		}
+		/*
+		 * update reset values (not just for counters)
+		 */
+		ctx->ctx_pmds[cnum].long_reset  = req->reg_long_reset;
+		ctx->ctx_pmds[cnum].short_reset = req->reg_short_reset;
+
+		/*
+		 * update randomization parameters (not just for counters)
+		 */
+		ctx->ctx_pmds[cnum].seed = req->reg_random_seed;
+		ctx->ctx_pmds[cnum].mask = req->reg_random_mask;
+
+		/*
+		 * update context value
+		 */
+		ctx->ctx_pmds[cnum].val  = value;
+
+		/*
+		 * Keep track of what we use
+		 *
+		 * We do not keep track of PMC because we have to
+		 * systematically restore ALL of them.
+		 */
+		CTX_USED_PMD(ctx, PMD_PMD_DEP(cnum));
+
+		/*
+		 * mark this PMD register used as well
+		 */
+		CTX_USED_PMD(ctx, RDEP(cnum));
+
+		/*
+		 * make sure we do not try to reset on
+		 * restart because we have established new values
+		 */
+		if (is_counting && state == PFM_CTX_MASKED) {
+			ctx->ctx_ovfl_regs[0] &= ~1UL << cnum;
+		}
+
+		if (is_loaded) {
+			/*
+		 	 * write thread state
+		 	 */
+			if (is_system == 0) ctx->th_pmds[cnum] = hw_value;
+
+			/*
+			 * write hardware register if we can
+			 */
+			if (can_access_pmu) {
+				ia64_set_pmd(cnum, hw_value);
+			} else {
+#ifdef CONFIG_SMP
+				/*
+			 	 * we are guaranteed that the task is not running on the other CPU,
+			 	 * we indicate that this PMD will need to be reloaded if the task
+			 	 * is rescheduled on the CPU it ran last on.
+			 	 */
+				ctx->ctx_reload_pmds[0] |= 1UL << cnum;
+#endif
+			}
+		}
+
+		DPRINT(("pmd[%u]=0x%lx ld=%d apmu=%d, hw_value=0x%lx ctx_pmd=0x%lx  short_reset=0x%lx "
+			  "long_reset=0x%lx notify=%c seed=0x%lx mask=0x%lx used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n",
+			cnum,
+			value,
+			is_loaded,
+			can_access_pmu,
+			hw_value,
+			ctx->ctx_pmds[cnum].val,
+			ctx->ctx_pmds[cnum].short_reset,
+			ctx->ctx_pmds[cnum].long_reset,
+			PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N',
+			ctx->ctx_pmds[cnum].seed,
+			ctx->ctx_pmds[cnum].mask,
+			ctx->ctx_used_pmds[0],
+			ctx->ctx_pmds[cnum].reset_pmds[0],
+			ctx->ctx_reload_pmds[0],
+			ctx->ctx_all_pmds[0],
+			ctx->ctx_ovfl_regs[0]));
+	}
+
+	/*
+	 * make changes visible
+	 */
+	if (can_access_pmu) ia64_srlz_d();
+
+	return 0;
+
+abort_mission:
+	/*
+	 * for now, we have only one possibility for error
+	 */
+	PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
+	return ret;
+}
+
+/*
+ * By the way of PROTECT_CONTEXT(), interrupts are masked while we are in this function.
+ * Therefore we know, we do not have to worry about the PMU overflow interrupt. If an
+ * interrupt is delivered during the call, it will be kept pending until we leave, making
+ * it appears as if it had been generated at the UNPROTECT_CONTEXT(). At least we are
+ * guaranteed to return consistent data to the user, it may simply be old. It is not
+ * trivial to treat the overflow while inside the call because you may end up in
+ * some module sampling buffer code causing deadlocks.
+ */
+static int
+pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+	struct task_struct *task;
+	unsigned long val = 0UL, lval, ovfl_mask, sval;
+	pfarg_reg_t *req = (pfarg_reg_t *)arg;
+	unsigned int cnum, reg_flags = 0;
+	int i, can_access_pmu = 0, state;
+	int is_loaded, is_system, is_counting, expert_mode;
+	int ret = -EINVAL;
+	pfm_reg_check_t rd_func;
+
+	/*
+	 * access is possible when loaded only for
+	 * self-monitoring tasks or in UP mode
+	 */
+
+	state     = ctx->ctx_state;
+	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
+	is_system = ctx->ctx_fl_system;
+	ovfl_mask = pmu_conf->ovfl_val;
+	task      = ctx->ctx_task;
+
+	if (state == PFM_CTX_ZOMBIE) return -EINVAL;
+
+	if (likely(is_loaded)) {
+		/*
+		 * In system wide and when the context is loaded, access can only happen
+		 * when the caller is running on the CPU being monitored by the session.
+		 * It does not have to be the owner (ctx_task) of the context per se.
+		 */
+		if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
+			DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
+			return -EBUSY;
+		}
+		/*
+		 * this can be true when not self-monitoring only in UP
+		 */
+		can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
+
+		if (can_access_pmu) ia64_srlz_d();
+	}
+	expert_mode = pfm_sysctl.expert_mode; 
+
+	DPRINT(("ld=%d apmu=%d ctx_state=%d\n",
+		is_loaded,
+		can_access_pmu,
+		state));
+
+	/*
+	 * on both UP and SMP, we can only read the PMD from the hardware register when
+	 * the task is the owner of the local PMU.
+	 */
+
+	for (i = 0; i < count; i++, req++) {
+
+		cnum        = req->reg_num;
+		reg_flags   = req->reg_flags;
+
+		if (unlikely(!PMD_IS_IMPL(cnum))) goto error;
+		/*
+		 * we can only read the register that we use. That includes
+		 * the one we explicitly initialize AND the one we want included
+		 * in the sampling buffer (smpl_regs).
+		 *
+		 * Having this restriction allows optimization in the ctxsw routine
+		 * without compromising security (leaks)
+		 */
+		if (unlikely(!CTX_IS_USED_PMD(ctx, cnum))) goto error;
+
+		sval        = ctx->ctx_pmds[cnum].val;
+		lval        = ctx->ctx_pmds[cnum].lval;
+		is_counting = PMD_IS_COUNTING(cnum);
+
+		/*
+		 * If the task is not the current one, then we check if the
+		 * PMU state is still in the local live register due to lazy ctxsw.
+		 * If true, then we read directly from the registers.
+		 */
+		if (can_access_pmu){
+			val = ia64_get_pmd(cnum);
+		} else {
+			/*
+			 * context has been saved
+			 * if context is zombie, then task does not exist anymore.
+			 * In this case, we use the full value saved in the context (pfm_flush_regs()).
+			 */
+			val = is_loaded ? ctx->th_pmds[cnum] : 0UL;
+		}
+		rd_func = pmu_conf->pmd_desc[cnum].read_check;
+
+		if (is_counting) {
+			/*
+			 * XXX: need to check for overflow when loaded
+			 */
+			val &= ovfl_mask;
+			val += sval;
+		}
+
+		/*
+		 * execute read checker, if any
+		 */
+		if (unlikely(expert_mode == 0 && rd_func)) {
+			unsigned long v = val;
+			ret = (*rd_func)(ctx->ctx_task, ctx, cnum, &v, regs);
+			if (ret) goto error;
+			val = v;
+			ret = -EINVAL;
+		}
+
+		PFM_REG_RETFLAG_SET(reg_flags, 0);
+
+		DPRINT(("pmd[%u]=0x%lx\n", cnum, val));
+
+		/*
+		 * update register return value, abort all if problem during copy.
+		 * we only modify the reg_flags field. no check mode is fine because
+		 * access has been verified upfront in sys_perfmonctl().
+		 */
+		req->reg_value            = val;
+		req->reg_flags            = reg_flags;
+		req->reg_last_reset_val   = lval;
+	}
+
+	return 0;
+
+error:
+	PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
+	return ret;
+}
+
+int
+pfm_mod_write_pmcs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
+{
+	pfm_context_t *ctx;
+
+	if (req == NULL) return -EINVAL;
+
+ 	ctx = GET_PMU_CTX();
+
+	if (ctx == NULL) return -EINVAL;
+
+	/*
+	 * for now limit to current task, which is enough when calling
+	 * from overflow handler
+	 */
+	if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
+
+	return pfm_write_pmcs(ctx, req, nreq, regs);
+}
+EXPORT_SYMBOL(pfm_mod_write_pmcs);
+
+int
+pfm_mod_read_pmds(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
+{
+	pfm_context_t *ctx;
+
+	if (req == NULL) return -EINVAL;
+
+ 	ctx = GET_PMU_CTX();
+
+	if (ctx == NULL) return -EINVAL;
+
+	/*
+	 * for now limit to current task, which is enough when calling
+	 * from overflow handler
+	 */
+	if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
+
+	return pfm_read_pmds(ctx, req, nreq, regs);
+}
+EXPORT_SYMBOL(pfm_mod_read_pmds);
+
+/*
+ * Only call this function when a process it trying to
+ * write the debug registers (reading is always allowed)
+ */
+int
+pfm_use_debug_registers(struct task_struct *task)
+{
+	pfm_context_t *ctx = task->thread.pfm_context;
+	unsigned long flags;
+	int ret = 0;
+
+	if (pmu_conf->use_rr_dbregs == 0) return 0;
+
+	DPRINT(("called for [%d]\n", task_pid_nr(task)));
+
+	/*
+	 * do it only once
+	 */
+	if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0;
+
+	/*
+	 * Even on SMP, we do not need to use an atomic here because
+	 * the only way in is via ptrace() and this is possible only when the
+	 * process is stopped. Even in the case where the ctxsw out is not totally
+	 * completed by the time we come here, there is no way the 'stopped' process
+	 * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine.
+	 * So this is always safe.
+	 */
+	if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1;
+
+	LOCK_PFS(flags);
+
+	/*
+	 * We cannot allow setting breakpoints when system wide monitoring
+	 * sessions are using the debug registers.
+	 */
+	if (pfm_sessions.pfs_sys_use_dbregs> 0)
+		ret = -1;
+	else
+		pfm_sessions.pfs_ptrace_use_dbregs++;
+
+	DPRINT(("ptrace_use_dbregs=%u  sys_use_dbregs=%u by [%d] ret = %d\n",
+		  pfm_sessions.pfs_ptrace_use_dbregs,
+		  pfm_sessions.pfs_sys_use_dbregs,
+		  task_pid_nr(task), ret));
+
+	UNLOCK_PFS(flags);
+
+	return ret;
+}
+
+/*
+ * This function is called for every task that exits with the
+ * IA64_THREAD_DBG_VALID set. This indicates a task which was
+ * able to use the debug registers for debugging purposes via
+ * ptrace(). Therefore we know it was not using them for
+ * performance monitoring, so we only decrement the number
+ * of "ptraced" debug register users to keep the count up to date
+ */
+int
+pfm_release_debug_registers(struct task_struct *task)
+{
+	unsigned long flags;
+	int ret;
+
+	if (pmu_conf->use_rr_dbregs == 0) return 0;
+
+	LOCK_PFS(flags);
+	if (pfm_sessions.pfs_ptrace_use_dbregs == 0) {
+		printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task_pid_nr(task));
+		ret = -1;
+	}  else {
+		pfm_sessions.pfs_ptrace_use_dbregs--;
+		ret = 0;
+	}
+	UNLOCK_PFS(flags);
+
+	return ret;
+}
+
+static int
+pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+	struct task_struct *task;
+	pfm_buffer_fmt_t *fmt;
+	pfm_ovfl_ctrl_t rst_ctrl;
+	int state, is_system;
+	int ret = 0;
+
+	state     = ctx->ctx_state;
+	fmt       = ctx->ctx_buf_fmt;
+	is_system = ctx->ctx_fl_system;
+	task      = PFM_CTX_TASK(ctx);
+
+	switch(state) {
+		case PFM_CTX_MASKED:
+			break;
+		case PFM_CTX_LOADED: 
+			if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break;
+			/* fall through */
+		case PFM_CTX_UNLOADED:
+		case PFM_CTX_ZOMBIE:
+			DPRINT(("invalid state=%d\n", state));
+			return -EBUSY;
+		default:
+			DPRINT(("state=%d, cannot operate (no active_restart handler)\n", state));
+			return -EINVAL;
+	}
+
+	/*
+ 	 * In system wide and when the context is loaded, access can only happen
+ 	 * when the caller is running on the CPU being monitored by the session.
+ 	 * It does not have to be the owner (ctx_task) of the context per se.
+ 	 */
+	if (is_system && ctx->ctx_cpu != smp_processor_id()) {
+		DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
+		return -EBUSY;
+	}
+
+	/* sanity check */
+	if (unlikely(task == NULL)) {
+		printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", task_pid_nr(current));
+		return -EINVAL;
+	}
+
+	if (task == current || is_system) {
+
+		fmt = ctx->ctx_buf_fmt;
+
+		DPRINT(("restarting self %d ovfl=0x%lx\n",
+			task_pid_nr(task),
+			ctx->ctx_ovfl_regs[0]));
+
+		if (CTX_HAS_SMPL(ctx)) {
+
+			prefetch(ctx->ctx_smpl_hdr);
+
+			rst_ctrl.bits.mask_monitoring = 0;
+			rst_ctrl.bits.reset_ovfl_pmds = 0;
+
+			if (state == PFM_CTX_LOADED)
+				ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
+			else
+				ret = pfm_buf_fmt_restart(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
+		} else {
+			rst_ctrl.bits.mask_monitoring = 0;
+			rst_ctrl.bits.reset_ovfl_pmds = 1;
+		}
+
+		if (ret == 0) {
+			if (rst_ctrl.bits.reset_ovfl_pmds)
+				pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
+
+			if (rst_ctrl.bits.mask_monitoring == 0) {
+				DPRINT(("resuming monitoring for [%d]\n", task_pid_nr(task)));
+
+				if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task);
+			} else {
+				DPRINT(("keeping monitoring stopped for [%d]\n", task_pid_nr(task)));
+
+				// cannot use pfm_stop_monitoring(task, regs);
+			}
+		}
+		/*
+		 * clear overflowed PMD mask to remove any stale information
+		 */
+		ctx->ctx_ovfl_regs[0] = 0UL;
+
+		/*
+		 * back to LOADED state
+		 */
+		ctx->ctx_state = PFM_CTX_LOADED;
+
+		/*
+		 * XXX: not really useful for self monitoring
+		 */
+		ctx->ctx_fl_can_restart = 0;
+
+		return 0;
+	}
+
+	/* 
+	 * restart another task
+	 */
+
+	/*
+	 * When PFM_CTX_MASKED, we cannot issue a restart before the previous 
+	 * one is seen by the task.
+	 */
+	if (state == PFM_CTX_MASKED) {
+		if (ctx->ctx_fl_can_restart == 0) return -EINVAL;
+		/*
+		 * will prevent subsequent restart before this one is
+		 * seen by other task
+		 */
+		ctx->ctx_fl_can_restart = 0;
+	}
+
+	/*
+	 * if blocking, then post the semaphore is PFM_CTX_MASKED, i.e.
+	 * the task is blocked or on its way to block. That's the normal
+	 * restart path. If the monitoring is not masked, then the task
+	 * can be actively monitoring and we cannot directly intervene.
+	 * Therefore we use the trap mechanism to catch the task and
+	 * force it to reset the buffer/reset PMDs.
+	 *
+	 * if non-blocking, then we ensure that the task will go into
+	 * pfm_handle_work() before returning to user mode.
+	 *
+	 * We cannot explicitly reset another task, it MUST always
+	 * be done by the task itself. This works for system wide because
+	 * the tool that is controlling the session is logically doing 
+	 * "self-monitoring".
+	 */
+	if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) {
+		DPRINT(("unblocking [%d]\n", task_pid_nr(task)));
+		complete(&ctx->ctx_restart_done);
+	} else {
+		DPRINT(("[%d] armed exit trap\n", task_pid_nr(task)));
+
+		ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET;
+
+		PFM_SET_WORK_PENDING(task, 1);
+
+		set_notify_resume(task);
+
+		/*
+		 * XXX: send reschedule if task runs on another CPU
+		 */
+	}
+	return 0;
+}
+
+static int
+pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+	unsigned int m = *(unsigned int *)arg;
+
+	pfm_sysctl.debug = m == 0 ? 0 : 1;
+
+	printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off");
+
+	if (m == 0) {
+		memset(pfm_stats, 0, sizeof(pfm_stats));
+		for(m=0; m < NR_CPUS; m++) pfm_stats[m].pfm_ovfl_intr_cycles_min = ~0UL;
+	}
+	return 0;
+}
+
+/*
+ * arg can be NULL and count can be zero for this function
+ */
+static int
+pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+	struct thread_struct *thread = NULL;
+	struct task_struct *task;
+	pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg;
+	unsigned long flags;
+	dbreg_t dbreg;
+	unsigned int rnum;
+	int first_time;
+	int ret = 0, state;
+	int i, can_access_pmu = 0;
+	int is_system, is_loaded;
+
+	if (pmu_conf->use_rr_dbregs == 0) return -EINVAL;
+
+	state     = ctx->ctx_state;
+	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
+	is_system = ctx->ctx_fl_system;
+	task      = ctx->ctx_task;
+
+	if (state == PFM_CTX_ZOMBIE) return -EINVAL;
+
+	/*
+	 * on both UP and SMP, we can only write to the PMC when the task is
+	 * the owner of the local PMU.
+	 */
+	if (is_loaded) {
+		thread = &task->thread;
+		/*
+		 * In system wide and when the context is loaded, access can only happen
+		 * when the caller is running on the CPU being monitored by the session.
+		 * It does not have to be the owner (ctx_task) of the context per se.
+		 */
+		if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
+			DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
+			return -EBUSY;
+		}
+		can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
+	}
+
+	/*
+	 * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w
+	 * ensuring that no real breakpoint can be installed via this call.
+	 *
+	 * IMPORTANT: regs can be NULL in this function
+	 */
+
+	first_time = ctx->ctx_fl_using_dbreg == 0;
+
+	/*
+	 * don't bother if we are loaded and task is being debugged
+	 */
+	if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) {
+		DPRINT(("debug registers already in use for [%d]\n", task_pid_nr(task)));
+		return -EBUSY;
+	}
+
+	/*
+	 * check for debug registers in system wide mode
+	 *
+	 * If though a check is done in pfm_context_load(),
+	 * we must repeat it here, in case the registers are
+	 * written after the context is loaded
+	 */
+	if (is_loaded) {
+		LOCK_PFS(flags);
+
+		if (first_time && is_system) {
+			if (pfm_sessions.pfs_ptrace_use_dbregs)
+				ret = -EBUSY;
+			else
+				pfm_sessions.pfs_sys_use_dbregs++;
+		}
+		UNLOCK_PFS(flags);
+	}
+
+	if (ret != 0) return ret;
+
+	/*
+	 * mark ourself as user of the debug registers for
+	 * perfmon purposes.
+	 */
+	ctx->ctx_fl_using_dbreg = 1;
+
+	/*
+ 	 * clear hardware registers to make sure we don't
+ 	 * pick up stale state.
+	 *
+	 * for a system wide session, we do not use
+	 * thread.dbr, thread.ibr because this process
+	 * never leaves the current CPU and the state
+	 * is shared by all processes running on it
+ 	 */
+	if (first_time && can_access_pmu) {
+		DPRINT(("[%d] clearing ibrs, dbrs\n", task_pid_nr(task)));
+		for (i=0; i < pmu_conf->num_ibrs; i++) {
+			ia64_set_ibr(i, 0UL);
+			ia64_dv_serialize_instruction();
+		}
+		ia64_srlz_i();
+		for (i=0; i < pmu_conf->num_dbrs; i++) {
+			ia64_set_dbr(i, 0UL);
+			ia64_dv_serialize_data();
+		}
+		ia64_srlz_d();
+	}
+
+	/*
+	 * Now install the values into the registers
+	 */
+	for (i = 0; i < count; i++, req++) {
+
+		rnum      = req->dbreg_num;
+		dbreg.val = req->dbreg_value;
+
+		ret = -EINVAL;
+
+		if ((mode == PFM_CODE_RR && rnum >= PFM_NUM_IBRS) || ((mode == PFM_DATA_RR) && rnum >= PFM_NUM_DBRS)) {
+			DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n",
+				  rnum, dbreg.val, mode, i, count));
+
+			goto abort_mission;
+		}
+
+		/*
+		 * make sure we do not install enabled breakpoint
+		 */
+		if (rnum & 0x1) {
+			if (mode == PFM_CODE_RR)
+				dbreg.ibr.ibr_x = 0;
+			else
+				dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0;
+		}
+
+		PFM_REG_RETFLAG_SET(req->dbreg_flags, 0);
+
+		/*
+		 * Debug registers, just like PMC, can only be modified
+		 * by a kernel call. Moreover, perfmon() access to those
+		 * registers are centralized in this routine. The hardware
+		 * does not modify the value of these registers, therefore,
+		 * if we save them as they are written, we can avoid having
+		 * to save them on context switch out. This is made possible
+		 * by the fact that when perfmon uses debug registers, ptrace()
+		 * won't be able to modify them concurrently.
+		 */
+		if (mode == PFM_CODE_RR) {
+			CTX_USED_IBR(ctx, rnum);
+
+			if (can_access_pmu) {
+				ia64_set_ibr(rnum, dbreg.val);
+				ia64_dv_serialize_instruction();
+			}
+
+			ctx->ctx_ibrs[rnum] = dbreg.val;
+
+			DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x ld=%d apmu=%d\n",
+				rnum, dbreg.val, ctx->ctx_used_ibrs[0], is_loaded, can_access_pmu));
+		} else {
+			CTX_USED_DBR(ctx, rnum);
+
+			if (can_access_pmu) {
+				ia64_set_dbr(rnum, dbreg.val);
+				ia64_dv_serialize_data();
+			}
+			ctx->ctx_dbrs[rnum] = dbreg.val;
+
+			DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x ld=%d apmu=%d\n",
+				rnum, dbreg.val, ctx->ctx_used_dbrs[0], is_loaded, can_access_pmu));
+		}
+	}
+
+	return 0;
+
+abort_mission:
+	/*
+	 * in case it was our first attempt, we undo the global modifications
+	 */
+	if (first_time) {
+		LOCK_PFS(flags);
+		if (ctx->ctx_fl_system) {
+			pfm_sessions.pfs_sys_use_dbregs--;
+		}
+		UNLOCK_PFS(flags);
+		ctx->ctx_fl_using_dbreg = 0;
+	}
+	/*
+	 * install error return flag
+	 */
+	PFM_REG_RETFLAG_SET(req->dbreg_flags, PFM_REG_RETFL_EINVAL);
+
+	return ret;
+}
+
+static int
+pfm_write_ibrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+	return pfm_write_ibr_dbr(PFM_CODE_RR, ctx, arg, count, regs);
+}
+
+static int
+pfm_write_dbrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+	return pfm_write_ibr_dbr(PFM_DATA_RR, ctx, arg, count, regs);
+}
+
+int
+pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
+{
+	pfm_context_t *ctx;
+
+	if (req == NULL) return -EINVAL;
+
+ 	ctx = GET_PMU_CTX();
+
+	if (ctx == NULL) return -EINVAL;
+
+	/*
+	 * for now limit to current task, which is enough when calling
+	 * from overflow handler
+	 */
+	if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
+
+	return pfm_write_ibrs(ctx, req, nreq, regs);
+}
+EXPORT_SYMBOL(pfm_mod_write_ibrs);
+
+int
+pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
+{
+	pfm_context_t *ctx;
+
+	if (req == NULL) return -EINVAL;
+
+ 	ctx = GET_PMU_CTX();
+
+	if (ctx == NULL) return -EINVAL;
+
+	/*
+	 * for now limit to current task, which is enough when calling
+	 * from overflow handler
+	 */
+	if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
+
+	return pfm_write_dbrs(ctx, req, nreq, regs);
+}
+EXPORT_SYMBOL(pfm_mod_write_dbrs);
+
+
+static int
+pfm_get_features(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+	pfarg_features_t *req = (pfarg_features_t *)arg;
+
+	req->ft_version = PFM_VERSION;
+	return 0;
+}
+
+static int
+pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+	struct pt_regs *tregs;
+	struct task_struct *task = PFM_CTX_TASK(ctx);
+	int state, is_system;
+
+	state     = ctx->ctx_state;
+	is_system = ctx->ctx_fl_system;
+
+	/*
+	 * context must be attached to issue the stop command (includes LOADED,MASKED,ZOMBIE)
+	 */
+	if (state == PFM_CTX_UNLOADED) return -EINVAL;
+
+	/*
+ 	 * In system wide and when the context is loaded, access can only happen
+ 	 * when the caller is running on the CPU being monitored by the session.
+ 	 * It does not have to be the owner (ctx_task) of the context per se.
+ 	 */
+	if (is_system && ctx->ctx_cpu != smp_processor_id()) {
+		DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
+		return -EBUSY;
+	}
+	DPRINT(("task [%d] ctx_state=%d is_system=%d\n",
+		task_pid_nr(PFM_CTX_TASK(ctx)),
+		state,
+		is_system));
+	/*
+	 * in system mode, we need to update the PMU directly
+	 * and the user level state of the caller, which may not
+	 * necessarily be the creator of the context.
+	 */
+	if (is_system) {
+		/*
+		 * Update local PMU first
+		 *
+		 * disable dcr pp
+		 */
+		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP);
+		ia64_srlz_i();
+
+		/*
+		 * update local cpuinfo
+		 */
+		PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
+
+		/*
+		 * stop monitoring, does srlz.i
+		 */
+		pfm_clear_psr_pp();
+
+		/*
+		 * stop monitoring in the caller
+		 */
+		ia64_psr(regs)->pp = 0;
+
+		return 0;
+	}
+	/*
+	 * per-task mode
+	 */
+
+	if (task == current) {
+		/* stop monitoring  at kernel level */
+		pfm_clear_psr_up();
+
+		/*
+	 	 * stop monitoring at the user level
+	 	 */
+		ia64_psr(regs)->up = 0;
+	} else {
+		tregs = task_pt_regs(task);
+
+		/*
+	 	 * stop monitoring at the user level
+	 	 */
+		ia64_psr(tregs)->up = 0;
+
+		/*
+		 * monitoring disabled in kernel at next reschedule
+		 */
+		ctx->ctx_saved_psr_up = 0;
+		DPRINT(("task=[%d]\n", task_pid_nr(task)));
+	}
+	return 0;
+}
+
+
+static int
+pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+	struct pt_regs *tregs;
+	int state, is_system;
+
+	state     = ctx->ctx_state;
+	is_system = ctx->ctx_fl_system;
+
+	if (state != PFM_CTX_LOADED) return -EINVAL;
+
+	/*
+ 	 * In system wide and when the context is loaded, access can only happen
+ 	 * when the caller is running on the CPU being monitored by the session.
+ 	 * It does not have to be the owner (ctx_task) of the context per se.
+ 	 */
+	if (is_system && ctx->ctx_cpu != smp_processor_id()) {
+		DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
+		return -EBUSY;
+	}
+
+	/*
+	 * in system mode, we need to update the PMU directly
+	 * and the user level state of the caller, which may not
+	 * necessarily be the creator of the context.
+	 */
+	if (is_system) {
+
+		/*
+		 * set user level psr.pp for the caller
+		 */
+		ia64_psr(regs)->pp = 1;
+
+		/*
+		 * now update the local PMU and cpuinfo
+		 */
+		PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP);
+
+		/*
+		 * start monitoring at kernel level
+		 */
+		pfm_set_psr_pp();
+
+		/* enable dcr pp */
+		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP);
+		ia64_srlz_i();
+
+		return 0;
+	}
+
+	/*
+	 * per-process mode
+	 */
+
+	if (ctx->ctx_task == current) {
+
+		/* start monitoring at kernel level */
+		pfm_set_psr_up();
+
+		/*
+		 * activate monitoring at user level
+		 */
+		ia64_psr(regs)->up = 1;
+
+	} else {
+		tregs = task_pt_regs(ctx->ctx_task);
+
+		/*
+		 * start monitoring at the kernel level the next
+		 * time the task is scheduled
+		 */
+		ctx->ctx_saved_psr_up = IA64_PSR_UP;
+
+		/*
+		 * activate monitoring at user level
+		 */
+		ia64_psr(tregs)->up = 1;
+	}
+	return 0;
+}
+
+static int
+pfm_get_pmc_reset(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+	pfarg_reg_t *req = (pfarg_reg_t *)arg;
+	unsigned int cnum;
+	int i;
+	int ret = -EINVAL;
+
+	for (i = 0; i < count; i++, req++) {
+
+		cnum = req->reg_num;
+
+		if (!PMC_IS_IMPL(cnum)) goto abort_mission;
+
+		req->reg_value = PMC_DFL_VAL(cnum);
+
+		PFM_REG_RETFLAG_SET(req->reg_flags, 0);
+
+		DPRINT(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, req->reg_value));
+	}
+	return 0;
+
+abort_mission:
+	PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
+	return ret;
+}
+
+static int
+pfm_check_task_exist(pfm_context_t *ctx)
+{
+	struct task_struct *g, *t;
+	int ret = -ESRCH;
+
+	read_lock(&tasklist_lock);
+
+	do_each_thread (g, t) {
+		if (t->thread.pfm_context == ctx) {
+			ret = 0;
+			goto out;
+		}
+	} while_each_thread (g, t);
+out:
+	read_unlock(&tasklist_lock);
+
+	DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx));
+
+	return ret;
+}
+
+static int
+pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+	struct task_struct *task;
+	struct thread_struct *thread;
+	struct pfm_context_t *old;
+	unsigned long flags;
+#ifndef CONFIG_SMP
+	struct task_struct *owner_task = NULL;
+#endif
+	pfarg_load_t *req = (pfarg_load_t *)arg;
+	unsigned long *pmcs_source, *pmds_source;
+	int the_cpu;
+	int ret = 0;
+	int state, is_system, set_dbregs = 0;
+
+	state     = ctx->ctx_state;
+	is_system = ctx->ctx_fl_system;
+	/*
+	 * can only load from unloaded or terminated state
+	 */
+	if (state != PFM_CTX_UNLOADED) {
+		DPRINT(("cannot load to [%d], invalid ctx_state=%d\n",
+			req->load_pid,
+			ctx->ctx_state));
+		return -EBUSY;
+	}
+
+	DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg));
+
+	if (CTX_OVFL_NOBLOCK(ctx) == 0 && req->load_pid == current->pid) {
+		DPRINT(("cannot use blocking mode on self\n"));
+		return -EINVAL;
+	}
+
+	ret = pfm_get_task(ctx, req->load_pid, &task);
+	if (ret) {
+		DPRINT(("load_pid [%d] get_task=%d\n", req->load_pid, ret));
+		return ret;
+	}
+
+	ret = -EINVAL;
+
+	/*
+	 * system wide is self monitoring only
+	 */
+	if (is_system && task != current) {
+		DPRINT(("system wide is self monitoring only load_pid=%d\n",
+			req->load_pid));
+		goto error;
+	}
+
+	thread = &task->thread;
+
+	ret = 0;
+	/*
+	 * cannot load a context which is using range restrictions,
+	 * into a task that is being debugged.
+	 */
+	if (ctx->ctx_fl_using_dbreg) {
+		if (thread->flags & IA64_THREAD_DBG_VALID) {
+			ret = -EBUSY;
+			DPRINT(("load_pid [%d] task is debugged, cannot load range restrictions\n", req->load_pid));
+			goto error;
+		}
+		LOCK_PFS(flags);
+
+		if (is_system) {
+			if (pfm_sessions.pfs_ptrace_use_dbregs) {
+				DPRINT(("cannot load [%d] dbregs in use\n",
+							task_pid_nr(task)));
+				ret = -EBUSY;
+			} else {
+				pfm_sessions.pfs_sys_use_dbregs++;
+				DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task_pid_nr(task), pfm_sessions.pfs_sys_use_dbregs));
+				set_dbregs = 1;
+			}
+		}
+
+		UNLOCK_PFS(flags);
+
+		if (ret) goto error;
+	}
+
+	/*
+	 * SMP system-wide monitoring implies self-monitoring.
+	 *
+	 * The programming model expects the task to
+	 * be pinned on a CPU throughout the session.
+	 * Here we take note of the current CPU at the
+	 * time the context is loaded. No call from
+	 * another CPU will be allowed.
+	 *
+	 * The pinning via shed_setaffinity()
+	 * must be done by the calling task prior
+	 * to this call.
+	 *
+	 * systemwide: keep track of CPU this session is supposed to run on
+	 */
+	the_cpu = ctx->ctx_cpu = smp_processor_id();
+
+	ret = -EBUSY;
+	/*
+	 * now reserve the session
+	 */
+	ret = pfm_reserve_session(current, is_system, the_cpu);
+	if (ret) goto error;
+
+	/*
+	 * task is necessarily stopped at this point.
+	 *
+	 * If the previous context was zombie, then it got removed in
+	 * pfm_save_regs(). Therefore we should not see it here.
+	 * If we see a context, then this is an active context
+	 *
+	 * XXX: needs to be atomic
+	 */
+	DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n",
+		thread->pfm_context, ctx));
+
+	ret = -EBUSY;
+	old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *));
+	if (old != NULL) {
+		DPRINT(("load_pid [%d] already has a context\n", req->load_pid));
+		goto error_unres;
+	}
+
+	pfm_reset_msgq(ctx);
+
+	ctx->ctx_state = PFM_CTX_LOADED;
+
+	/*
+	 * link context to task
+	 */
+	ctx->ctx_task = task;
+
+	if (is_system) {
+		/*
+		 * we load as stopped
+		 */
+		PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE);
+		PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
+
+		if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE);
+	} else {
+		thread->flags |= IA64_THREAD_PM_VALID;
+	}
+
+	/*
+	 * propagate into thread-state
+	 */
+	pfm_copy_pmds(task, ctx);
+	pfm_copy_pmcs(task, ctx);
+
+	pmcs_source = ctx->th_pmcs;
+	pmds_source = ctx->th_pmds;
+
+	/*
+	 * always the case for system-wide
+	 */
+	if (task == current) {
+
+		if (is_system == 0) {
+
+			/* allow user level control */
+			ia64_psr(regs)->sp = 0;
+			DPRINT(("clearing psr.sp for [%d]\n", task_pid_nr(task)));
+
+			SET_LAST_CPU(ctx, smp_processor_id());
+			INC_ACTIVATION();
+			SET_ACTIVATION(ctx);
+#ifndef CONFIG_SMP
+			/*
+			 * push the other task out, if any
+			 */
+			owner_task = GET_PMU_OWNER();
+			if (owner_task) pfm_lazy_save_regs(owner_task);
+#endif
+		}
+		/*
+		 * load all PMD from ctx to PMU (as opposed to thread state)
+		 * restore all PMC from ctx to PMU
+		 */
+		pfm_restore_pmds(pmds_source, ctx->ctx_all_pmds[0]);
+		pfm_restore_pmcs(pmcs_source, ctx->ctx_all_pmcs[0]);
+
+		ctx->ctx_reload_pmcs[0] = 0UL;
+		ctx->ctx_reload_pmds[0] = 0UL;
+
+		/*
+		 * guaranteed safe by earlier check against DBG_VALID
+		 */
+		if (ctx->ctx_fl_using_dbreg) {
+			pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
+			pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
+		}
+		/*
+		 * set new ownership
+		 */
+		SET_PMU_OWNER(task, ctx);
+
+		DPRINT(("context loaded on PMU for [%d]\n", task_pid_nr(task)));
+	} else {
+		/*
+		 * when not current, task MUST be stopped, so this is safe
+		 */
+		regs = task_pt_regs(task);
+
+		/* force a full reload */
+		ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
+		SET_LAST_CPU(ctx, -1);
+
+		/* initial saved psr (stopped) */
+		ctx->ctx_saved_psr_up = 0UL;
+		ia64_psr(regs)->up = ia64_psr(regs)->pp = 0;
+	}
+
+	ret = 0;
+
+error_unres:
+	if (ret) pfm_unreserve_session(ctx, ctx->ctx_fl_system, the_cpu);
+error:
+	/*
+	 * we must undo the dbregs setting (for system-wide)
+	 */
+	if (ret && set_dbregs) {
+		LOCK_PFS(flags);
+		pfm_sessions.pfs_sys_use_dbregs--;
+		UNLOCK_PFS(flags);
+	}
+	/*
+	 * release task, there is now a link with the context
+	 */
+	if (is_system == 0 && task != current) {
+		pfm_put_task(task);
+
+		if (ret == 0) {
+			ret = pfm_check_task_exist(ctx);
+			if (ret) {
+				ctx->ctx_state = PFM_CTX_UNLOADED;
+				ctx->ctx_task  = NULL;
+			}
+		}
+	}
+	return ret;
+}
+
+/*
+ * in this function, we do not need to increase the use count
+ * for the task via get_task_struct(), because we hold the
+ * context lock. If the task were to disappear while having
+ * a context attached, it would go through pfm_exit_thread()
+ * which also grabs the context lock  and would therefore be blocked
+ * until we are here.
+ */
+static void pfm_flush_pmds(struct task_struct *, pfm_context_t *ctx);
+
+static int
+pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+	struct task_struct *task = PFM_CTX_TASK(ctx);
+	struct pt_regs *tregs;
+	int prev_state, is_system;
+	int ret;
+
+	DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task_pid_nr(task) : -1));
+
+	prev_state = ctx->ctx_state;
+	is_system  = ctx->ctx_fl_system;
+
+	/*
+	 * unload only when necessary
+	 */
+	if (prev_state == PFM_CTX_UNLOADED) {
+		DPRINT(("ctx_state=%d, nothing to do\n", prev_state));
+		return 0;
+	}
+
+	/*
+	 * clear psr and dcr bits
+	 */
+	ret = pfm_stop(ctx, NULL, 0, regs);
+	if (ret) return ret;
+
+	ctx->ctx_state = PFM_CTX_UNLOADED;
+
+	/*
+	 * in system mode, we need to update the PMU directly
+	 * and the user level state of the caller, which may not
+	 * necessarily be the creator of the context.
+	 */
+	if (is_system) {
+
+		/*
+		 * Update cpuinfo
+		 *
+		 * local PMU is taken care of in pfm_stop()
+		 */
+		PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE);
+		PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE);
+
+		/*
+		 * save PMDs in context
+		 * release ownership
+		 */
+		pfm_flush_pmds(current, ctx);
+
+		/*
+		 * at this point we are done with the PMU
+		 * so we can unreserve the resource.
+		 */
+		if (prev_state != PFM_CTX_ZOMBIE) 
+			pfm_unreserve_session(ctx, 1 , ctx->ctx_cpu);
+
+		/*
+		 * disconnect context from task
+		 */
+		task->thread.pfm_context = NULL;
+		/*
+		 * disconnect task from context
+		 */
+		ctx->ctx_task = NULL;
+
+		/*
+		 * There is nothing more to cleanup here.
+		 */
+		return 0;
+	}
+
+	/*
+	 * per-task mode
+	 */
+	tregs = task == current ? regs : task_pt_regs(task);
+
+	if (task == current) {
+		/*
+		 * cancel user level control
+		 */
+		ia64_psr(regs)->sp = 1;
+
+		DPRINT(("setting psr.sp for [%d]\n", task_pid_nr(task)));
+	}
+	/*
+	 * save PMDs to context
+	 * release ownership
+	 */
+	pfm_flush_pmds(task, ctx);
+
+	/*
+	 * at this point we are done with the PMU
+	 * so we can unreserve the resource.
+	 *
+	 * when state was ZOMBIE, we have already unreserved.
+	 */
+	if (prev_state != PFM_CTX_ZOMBIE) 
+		pfm_unreserve_session(ctx, 0 , ctx->ctx_cpu);
+
+	/*
+	 * reset activation counter and psr
+	 */
+	ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
+	SET_LAST_CPU(ctx, -1);
+
+	/*
+	 * PMU state will not be restored
+	 */
+	task->thread.flags &= ~IA64_THREAD_PM_VALID;
+
+	/*
+	 * break links between context and task
+	 */
+	task->thread.pfm_context  = NULL;
+	ctx->ctx_task             = NULL;
+
+	PFM_SET_WORK_PENDING(task, 0);
+
+	ctx->ctx_fl_trap_reason  = PFM_TRAP_REASON_NONE;
+	ctx->ctx_fl_can_restart  = 0;
+	ctx->ctx_fl_going_zombie = 0;
+
+	DPRINT(("disconnected [%d] from context\n", task_pid_nr(task)));
+
+	return 0;
+}
+
+
+/*
+ * called only from exit_thread(): task == current
+ * we come here only if current has a context attached (loaded or masked)
+ */
+void
+pfm_exit_thread(struct task_struct *task)
+{
+	pfm_context_t *ctx;
+	unsigned long flags;
+	struct pt_regs *regs = task_pt_regs(task);
+	int ret, state;
+	int free_ok = 0;
+
+	ctx = PFM_GET_CTX(task);
+
+	PROTECT_CTX(ctx, flags);
+
+	DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task_pid_nr(task)));
+
+	state = ctx->ctx_state;
+	switch(state) {
+		case PFM_CTX_UNLOADED:
+			/*
+	 		 * only comes to this function if pfm_context is not NULL, i.e., cannot
+			 * be in unloaded state
+	 		 */
+			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task_pid_nr(task));
+			break;
+		case PFM_CTX_LOADED:
+		case PFM_CTX_MASKED:
+			ret = pfm_context_unload(ctx, NULL, 0, regs);
+			if (ret) {
+				printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret);
+			}
+			DPRINT(("ctx unloaded for current state was %d\n", state));
+
+			pfm_end_notify_user(ctx);
+			break;
+		case PFM_CTX_ZOMBIE:
+			ret = pfm_context_unload(ctx, NULL, 0, regs);
+			if (ret) {
+				printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret);
+			}
+			free_ok = 1;
+			break;
+		default:
+			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task_pid_nr(task), state);
+			break;
+	}
+	UNPROTECT_CTX(ctx, flags);
+
+	{ u64 psr = pfm_get_psr();
+	  BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
+	  BUG_ON(GET_PMU_OWNER());
+	  BUG_ON(ia64_psr(regs)->up);
+	  BUG_ON(ia64_psr(regs)->pp);
+	}
+
+	/*
+	 * All memory free operations (especially for vmalloc'ed memory)
+	 * MUST be done with interrupts ENABLED.
+	 */
+	if (free_ok) pfm_context_free(ctx);
+}
+
+/*
+ * functions MUST be listed in the increasing order of their index (see permfon.h)
+ */
+#define PFM_CMD(name, flags, arg_count, arg_type, getsz) { name, #name, flags, arg_count, sizeof(arg_type), getsz }
+#define PFM_CMD_S(name, flags) { name, #name, flags, 0, 0, NULL }
+#define PFM_CMD_PCLRWS	(PFM_CMD_FD|PFM_CMD_ARG_RW|PFM_CMD_STOP)
+#define PFM_CMD_PCLRW	(PFM_CMD_FD|PFM_CMD_ARG_RW)
+#define PFM_CMD_NONE	{ NULL, "no-cmd", 0, 0, 0, NULL}
+
+static pfm_cmd_desc_t pfm_cmd_tab[]={
+/* 0  */PFM_CMD_NONE,
+/* 1  */PFM_CMD(pfm_write_pmcs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
+/* 2  */PFM_CMD(pfm_write_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
+/* 3  */PFM_CMD(pfm_read_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
+/* 4  */PFM_CMD_S(pfm_stop, PFM_CMD_PCLRWS),
+/* 5  */PFM_CMD_S(pfm_start, PFM_CMD_PCLRWS),
+/* 6  */PFM_CMD_NONE,
+/* 7  */PFM_CMD_NONE,
+/* 8  */PFM_CMD(pfm_context_create, PFM_CMD_ARG_RW, 1, pfarg_context_t, pfm_ctx_getsize),
+/* 9  */PFM_CMD_NONE,
+/* 10 */PFM_CMD_S(pfm_restart, PFM_CMD_PCLRW),
+/* 11 */PFM_CMD_NONE,
+/* 12 */PFM_CMD(pfm_get_features, PFM_CMD_ARG_RW, 1, pfarg_features_t, NULL),
+/* 13 */PFM_CMD(pfm_debug, 0, 1, unsigned int, NULL),
+/* 14 */PFM_CMD_NONE,
+/* 15 */PFM_CMD(pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
+/* 16 */PFM_CMD(pfm_context_load, PFM_CMD_PCLRWS, 1, pfarg_load_t, NULL),
+/* 17 */PFM_CMD_S(pfm_context_unload, PFM_CMD_PCLRWS),
+/* 18 */PFM_CMD_NONE,
+/* 19 */PFM_CMD_NONE,
+/* 20 */PFM_CMD_NONE,
+/* 21 */PFM_CMD_NONE,
+/* 22 */PFM_CMD_NONE,
+/* 23 */PFM_CMD_NONE,
+/* 24 */PFM_CMD_NONE,
+/* 25 */PFM_CMD_NONE,
+/* 26 */PFM_CMD_NONE,
+/* 27 */PFM_CMD_NONE,
+/* 28 */PFM_CMD_NONE,
+/* 29 */PFM_CMD_NONE,
+/* 30 */PFM_CMD_NONE,
+/* 31 */PFM_CMD_NONE,
+/* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL),
+/* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL)
+};
+#define PFM_CMD_COUNT	(sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t))
+
+static int
+pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags)
+{
+	struct task_struct *task;
+	int state, old_state;
+
+recheck:
+	state = ctx->ctx_state;
+	task  = ctx->ctx_task;
+
+	if (task == NULL) {
+		DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state));
+		return 0;
+	}
+
+	DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n",
+		ctx->ctx_fd,
+		state,
+		task_pid_nr(task),
+		task->state, PFM_CMD_STOPPED(cmd)));
+
+	/*
+	 * self-monitoring always ok.
+	 *
+	 * for system-wide the caller can either be the creator of the
+	 * context (to one to which the context is attached to) OR
+	 * a task running on the same CPU as the session.
+	 */
+	if (task == current || ctx->ctx_fl_system) return 0;
+
+	/*
+	 * we are monitoring another thread
+	 */
+	switch(state) {
+		case PFM_CTX_UNLOADED:
+			/*
+			 * if context is UNLOADED we are safe to go
+			 */
+			return 0;
+		case PFM_CTX_ZOMBIE:
+			/*
+			 * no command can operate on a zombie context
+			 */
+			DPRINT(("cmd %d state zombie cannot operate on context\n", cmd));
+			return -EINVAL;
+		case PFM_CTX_MASKED:
+			/*
+			 * PMU state has been saved to software even though
+			 * the thread may still be running.
+			 */
+			if (cmd != PFM_UNLOAD_CONTEXT) return 0;
+	}
+
+	/*
+	 * context is LOADED or MASKED. Some commands may need to have 
+	 * the task stopped.
+	 *
+	 * We could lift this restriction for UP but it would mean that
+	 * the user has no guarantee the task would not run between
+	 * two successive calls to perfmonctl(). That's probably OK.
+	 * If this user wants to ensure the task does not run, then
+	 * the task must be stopped.
+	 */
+	if (PFM_CMD_STOPPED(cmd)) {
+		if (!task_is_stopped_or_traced(task)) {
+			DPRINT(("[%d] task not in stopped state\n", task_pid_nr(task)));
+			return -EBUSY;
+		}
+		/*
+		 * task is now stopped, wait for ctxsw out
+		 *
+		 * This is an interesting point in the code.
+		 * We need to unprotect the context because
+		 * the pfm_save_regs() routines needs to grab
+		 * the same lock. There are danger in doing
+		 * this because it leaves a window open for
+		 * another task to get access to the context
+		 * and possibly change its state. The one thing
+		 * that is not possible is for the context to disappear
+		 * because we are protected by the VFS layer, i.e.,
+		 * get_fd()/put_fd().
+		 */
+		old_state = state;
+
+		UNPROTECT_CTX(ctx, flags);
+
+		wait_task_inactive(task, 0);
+
+		PROTECT_CTX(ctx, flags);
+
+		/*
+		 * we must recheck to verify if state has changed
+		 */
+		if (ctx->ctx_state != old_state) {
+			DPRINT(("old_state=%d new_state=%d\n", old_state, ctx->ctx_state));
+			goto recheck;
+		}
+	}
+	return 0;
+}
+
+/*
+ * system-call entry point (must return long)
+ */
+asmlinkage long
+sys_perfmonctl (int fd, int cmd, void __user *arg, int count)
+{
+	struct file *file = NULL;
+	pfm_context_t *ctx = NULL;
+	unsigned long flags = 0UL;
+	void *args_k = NULL;
+	long ret; /* will expand int return types */
+	size_t base_sz, sz, xtra_sz = 0;
+	int narg, completed_args = 0, call_made = 0, cmd_flags;
+	int (*func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
+	int (*getsize)(void *arg, size_t *sz);
+#define PFM_MAX_ARGSIZE	4096
+
+	/*
+	 * reject any call if perfmon was disabled at initialization
+	 */
+	if (unlikely(pmu_conf == NULL)) return -ENOSYS;
+
+	if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) {
+		DPRINT(("invalid cmd=%d\n", cmd));
+		return -EINVAL;
+	}
+
+	func      = pfm_cmd_tab[cmd].cmd_func;
+	narg      = pfm_cmd_tab[cmd].cmd_narg;
+	base_sz   = pfm_cmd_tab[cmd].cmd_argsize;
+	getsize   = pfm_cmd_tab[cmd].cmd_getsize;
+	cmd_flags = pfm_cmd_tab[cmd].cmd_flags;
+
+	if (unlikely(func == NULL)) {
+		DPRINT(("invalid cmd=%d\n", cmd));
+		return -EINVAL;
+	}
+
+	DPRINT(("cmd=%s idx=%d narg=0x%x argsz=%lu count=%d\n",
+		PFM_CMD_NAME(cmd),
+		cmd,
+		narg,
+		base_sz,
+		count));
+
+	/*
+	 * check if number of arguments matches what the command expects
+	 */
+	if (unlikely((narg == PFM_CMD_ARG_MANY && count <= 0) || (narg > 0 && narg != count)))
+		return -EINVAL;
+
+restart_args:
+	sz = xtra_sz + base_sz*count;
+	/*
+	 * limit abuse to min page size
+	 */
+	if (unlikely(sz > PFM_MAX_ARGSIZE)) {
+		printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", task_pid_nr(current), sz);
+		return -E2BIG;
+	}
+
+	/*
+	 * allocate default-sized argument buffer
+	 */
+	if (likely(count && args_k == NULL)) {
+		args_k = kmalloc(PFM_MAX_ARGSIZE, GFP_KERNEL);
+		if (args_k == NULL) return -ENOMEM;
+	}
+
+	ret = -EFAULT;
+
+	/*
+	 * copy arguments
+	 *
+	 * assume sz = 0 for command without parameters
+	 */
+	if (sz && copy_from_user(args_k, arg, sz)) {
+		DPRINT(("cannot copy_from_user %lu bytes @%p\n", sz, arg));
+		goto error_args;
+	}
+
+	/*
+	 * check if command supports extra parameters
+	 */
+	if (completed_args == 0 && getsize) {
+		/*
+		 * get extra parameters size (based on main argument)
+		 */
+		ret = (*getsize)(args_k, &xtra_sz);
+		if (ret) goto error_args;
+
+		completed_args = 1;
+
+		DPRINT(("restart_args sz=%lu xtra_sz=%lu\n", sz, xtra_sz));
+
+		/* retry if necessary */
+		if (likely(xtra_sz)) goto restart_args;
+	}
+
+	if (unlikely((cmd_flags & PFM_CMD_FD) == 0)) goto skip_fd;
+
+	ret = -EBADF;
+
+	file = fget(fd);
+	if (unlikely(file == NULL)) {
+		DPRINT(("invalid fd %d\n", fd));
+		goto error_args;
+	}
+	if (unlikely(PFM_IS_FILE(file) == 0)) {
+		DPRINT(("fd %d not related to perfmon\n", fd));
+		goto error_args;
+	}
+
+	ctx = file->private_data;
+	if (unlikely(ctx == NULL)) {
+		DPRINT(("no context for fd %d\n", fd));
+		goto error_args;
+	}
+	prefetch(&ctx->ctx_state);
+
+	PROTECT_CTX(ctx, flags);
+
+	/*
+	 * check task is stopped
+	 */
+	ret = pfm_check_task_state(ctx, cmd, flags);
+	if (unlikely(ret)) goto abort_locked;
+
+skip_fd:
+	ret = (*func)(ctx, args_k, count, task_pt_regs(current));
+
+	call_made = 1;
+
+abort_locked:
+	if (likely(ctx)) {
+		DPRINT(("context unlocked\n"));
+		UNPROTECT_CTX(ctx, flags);
+	}
+
+	/* copy argument back to user, if needed */
+	if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT;
+
+error_args:
+	if (file)
+		fput(file);
+
+	kfree(args_k);
+
+	DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret));
+
+	return ret;
+}
+
+static void
+pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_regs *regs)
+{
+	pfm_buffer_fmt_t *fmt = ctx->ctx_buf_fmt;
+	pfm_ovfl_ctrl_t rst_ctrl;
+	int state;
+	int ret = 0;
+
+	state = ctx->ctx_state;
+	/*
+	 * Unlock sampling buffer and reset index atomically
+	 * XXX: not really needed when blocking
+	 */
+	if (CTX_HAS_SMPL(ctx)) {
+
+		rst_ctrl.bits.mask_monitoring = 0;
+		rst_ctrl.bits.reset_ovfl_pmds = 0;
+
+		if (state == PFM_CTX_LOADED)
+			ret = pfm_buf_fmt_restart_active(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
+		else
+			ret = pfm_buf_fmt_restart(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
+	} else {
+		rst_ctrl.bits.mask_monitoring = 0;
+		rst_ctrl.bits.reset_ovfl_pmds = 1;
+	}
+
+	if (ret == 0) {
+		if (rst_ctrl.bits.reset_ovfl_pmds) {
+			pfm_reset_regs(ctx, &ovfl_regs, PFM_PMD_LONG_RESET);
+		}
+		if (rst_ctrl.bits.mask_monitoring == 0) {
+			DPRINT(("resuming monitoring\n"));
+			if (ctx->ctx_state == PFM_CTX_MASKED) pfm_restore_monitoring(current);
+		} else {
+			DPRINT(("stopping monitoring\n"));
+			//pfm_stop_monitoring(current, regs);
+		}
+		ctx->ctx_state = PFM_CTX_LOADED;
+	}
+}
+
+/*
+ * context MUST BE LOCKED when calling
+ * can only be called for current
+ */
+static void
+pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs)
+{
+	int ret;
+
+	DPRINT(("entering for [%d]\n", task_pid_nr(current)));
+
+	ret = pfm_context_unload(ctx, NULL, 0, regs);
+	if (ret) {
+		printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", task_pid_nr(current), ret);
+	}
+
+	/*
+	 * and wakeup controlling task, indicating we are now disconnected
+	 */
+	wake_up_interruptible(&ctx->ctx_zombieq);
+
+	/*
+	 * given that context is still locked, the controlling
+	 * task will only get access when we return from
+	 * pfm_handle_work().
+	 */
+}
+
+static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds);
+
+ /*
+  * pfm_handle_work() can be called with interrupts enabled
+  * (TIF_NEED_RESCHED) or disabled. The down_interruptible
+  * call may sleep, therefore we must re-enable interrupts
+  * to avoid deadlocks. It is safe to do so because this function
+  * is called ONLY when returning to user level (pUStk=1), in which case
+  * there is no risk of kernel stack overflow due to deep
+  * interrupt nesting.
+  */
+void
+pfm_handle_work(void)
+{
+	pfm_context_t *ctx;
+	struct pt_regs *regs;
+	unsigned long flags, dummy_flags;
+	unsigned long ovfl_regs;
+	unsigned int reason;
+	int ret;
+
+	ctx = PFM_GET_CTX(current);
+	if (ctx == NULL) {
+		printk(KERN_ERR "perfmon: [%d] has no PFM context\n",
+			task_pid_nr(current));
+		return;
+	}
+
+	PROTECT_CTX(ctx, flags);
+
+	PFM_SET_WORK_PENDING(current, 0);
+
+	regs = task_pt_regs(current);
+
+	/*
+	 * extract reason for being here and clear
+	 */
+	reason = ctx->ctx_fl_trap_reason;
+	ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE;
+	ovfl_regs = ctx->ctx_ovfl_regs[0];
+
+	DPRINT(("reason=%d state=%d\n", reason, ctx->ctx_state));
+
+	/*
+	 * must be done before we check for simple-reset mode
+	 */
+	if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE)
+		goto do_zombie;
+
+	//if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking;
+	if (reason == PFM_TRAP_REASON_RESET)
+		goto skip_blocking;
+
+	/*
+	 * restore interrupt mask to what it was on entry.
+	 * Could be enabled/diasbled.
+	 */
+	UNPROTECT_CTX(ctx, flags);
+
+	/*
+	 * force interrupt enable because of down_interruptible()
+	 */
+	local_irq_enable();
+
+	DPRINT(("before block sleeping\n"));
+
+	/*
+	 * may go through without blocking on SMP systems
+	 * if restart has been received already by the time we call down()
+	 */
+	ret = wait_for_completion_interruptible(&ctx->ctx_restart_done);
+
+	DPRINT(("after block sleeping ret=%d\n", ret));
+
+	/*
+	 * lock context and mask interrupts again
+	 * We save flags into a dummy because we may have
+	 * altered interrupts mask compared to entry in this
+	 * function.
+	 */
+	PROTECT_CTX(ctx, dummy_flags);
+
+	/*
+	 * we need to read the ovfl_regs only after wake-up
+	 * because we may have had pfm_write_pmds() in between
+	 * and that can changed PMD values and therefore 
+	 * ovfl_regs is reset for these new PMD values.
+	 */
+	ovfl_regs = ctx->ctx_ovfl_regs[0];
+
+	if (ctx->ctx_fl_going_zombie) {
+do_zombie:
+		DPRINT(("context is zombie, bailing out\n"));
+		pfm_context_force_terminate(ctx, regs);
+		goto nothing_to_do;
+	}
+	/*
+	 * in case of interruption of down() we don't restart anything
+	 */
+	if (ret < 0)
+		goto nothing_to_do;
+
+skip_blocking:
+	pfm_resume_after_ovfl(ctx, ovfl_regs, regs);
+	ctx->ctx_ovfl_regs[0] = 0UL;
+
+nothing_to_do:
+	/*
+	 * restore flags as they were upon entry
+	 */
+	UNPROTECT_CTX(ctx, flags);
+}
+
+static int
+pfm_notify_user(pfm_context_t *ctx, pfm_msg_t *msg)
+{
+	if (ctx->ctx_state == PFM_CTX_ZOMBIE) {
+		DPRINT(("ignoring overflow notification, owner is zombie\n"));
+		return 0;
+	}
+
+	DPRINT(("waking up somebody\n"));
+
+	if (msg) wake_up_interruptible(&ctx->ctx_msgq_wait);
+
+	/*
+	 * safe, we are not in intr handler, nor in ctxsw when
+	 * we come here
+	 */
+	kill_fasync (&ctx->ctx_async_queue, SIGIO, POLL_IN);
+
+	return 0;
+}
+
+static int
+pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds)
+{
+	pfm_msg_t *msg = NULL;
+
+	if (ctx->ctx_fl_no_msg == 0) {
+		msg = pfm_get_new_msg(ctx);
+		if (msg == NULL) {
+			printk(KERN_ERR "perfmon: pfm_ovfl_notify_user no more notification msgs\n");
+			return -1;
+		}
+
+		msg->pfm_ovfl_msg.msg_type         = PFM_MSG_OVFL;
+		msg->pfm_ovfl_msg.msg_ctx_fd       = ctx->ctx_fd;
+		msg->pfm_ovfl_msg.msg_active_set   = 0;
+		msg->pfm_ovfl_msg.msg_ovfl_pmds[0] = ovfl_pmds;
+		msg->pfm_ovfl_msg.msg_ovfl_pmds[1] = 0UL;
+		msg->pfm_ovfl_msg.msg_ovfl_pmds[2] = 0UL;
+		msg->pfm_ovfl_msg.msg_ovfl_pmds[3] = 0UL;
+		msg->pfm_ovfl_msg.msg_tstamp       = 0UL;
+	}
+
+	DPRINT(("ovfl msg: msg=%p no_msg=%d fd=%d ovfl_pmds=0x%lx\n",
+		msg,
+		ctx->ctx_fl_no_msg,
+		ctx->ctx_fd,
+		ovfl_pmds));
+
+	return pfm_notify_user(ctx, msg);
+}
+
+static int
+pfm_end_notify_user(pfm_context_t *ctx)
+{
+	pfm_msg_t *msg;
+
+	msg = pfm_get_new_msg(ctx);
+	if (msg == NULL) {
+		printk(KERN_ERR "perfmon: pfm_end_notify_user no more notification msgs\n");
+		return -1;
+	}
+	/* no leak */
+	memset(msg, 0, sizeof(*msg));
+
+	msg->pfm_end_msg.msg_type    = PFM_MSG_END;
+	msg->pfm_end_msg.msg_ctx_fd  = ctx->ctx_fd;
+	msg->pfm_ovfl_msg.msg_tstamp = 0UL;
+
+	DPRINT(("end msg: msg=%p no_msg=%d ctx_fd=%d\n",
+		msg,
+		ctx->ctx_fl_no_msg,
+		ctx->ctx_fd));
+
+	return pfm_notify_user(ctx, msg);
+}
+
+/*
+ * main overflow processing routine.
+ * it can be called from the interrupt path or explicitly during the context switch code
+ */
+static void pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx,
+				unsigned long pmc0, struct pt_regs *regs)
+{
+	pfm_ovfl_arg_t *ovfl_arg;
+	unsigned long mask;
+	unsigned long old_val, ovfl_val, new_val;
+	unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds;
+	unsigned long tstamp;
+	pfm_ovfl_ctrl_t	ovfl_ctrl;
+	unsigned int i, has_smpl;
+	int must_notify = 0;
+
+	if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) goto stop_monitoring;
+
+	/*
+	 * sanity test. Should never happen
+	 */
+	if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check;
+
+	tstamp   = ia64_get_itc();
+	mask     = pmc0 >> PMU_FIRST_COUNTER;
+	ovfl_val = pmu_conf->ovfl_val;
+	has_smpl = CTX_HAS_SMPL(ctx);
+
+	DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s "
+		     "used_pmds=0x%lx\n",
+			pmc0,
+			task ? task_pid_nr(task): -1,
+			(regs ? regs->cr_iip : 0),
+			CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking",
+			ctx->ctx_used_pmds[0]));
+
+
+	/*
+	 * first we update the virtual counters
+	 * assume there was a prior ia64_srlz_d() issued
+	 */
+	for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) {
+
+		/* skip pmd which did not overflow */
+		if ((mask & 0x1) == 0) continue;
+
+		/*
+		 * Note that the pmd is not necessarily 0 at this point as qualified events
+		 * may have happened before the PMU was frozen. The residual count is not
+		 * taken into consideration here but will be with any read of the pmd via
+		 * pfm_read_pmds().
+		 */
+		old_val              = new_val = ctx->ctx_pmds[i].val;
+		new_val             += 1 + ovfl_val;
+		ctx->ctx_pmds[i].val = new_val;
+
+		/*
+		 * check for overflow condition
+		 */
+		if (likely(old_val > new_val)) {
+			ovfl_pmds |= 1UL << i;
+			if (PMC_OVFL_NOTIFY(ctx, i)) ovfl_notify |= 1UL << i;
+		}
+
+		DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n",
+			i,
+			new_val,
+			old_val,
+			ia64_get_pmd(i) & ovfl_val,
+			ovfl_pmds,
+			ovfl_notify));
+	}
+
+	/*
+	 * there was no 64-bit overflow, nothing else to do
+	 */
+	if (ovfl_pmds == 0UL) return;
+
+	/* 
+	 * reset all control bits
+	 */
+	ovfl_ctrl.val = 0;
+	reset_pmds    = 0UL;
+
+	/*
+	 * if a sampling format module exists, then we "cache" the overflow by 
+	 * calling the module's handler() routine.
+	 */
+	if (has_smpl) {
+		unsigned long start_cycles, end_cycles;
+		unsigned long pmd_mask;
+		int j, k, ret = 0;
+		int this_cpu = smp_processor_id();
+
+		pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER;
+		ovfl_arg = &ctx->ctx_ovfl_arg;
+
+		prefetch(ctx->ctx_smpl_hdr);
+
+		for(i=PMU_FIRST_COUNTER; pmd_mask && ret == 0; i++, pmd_mask >>=1) {
+
+			mask = 1UL << i;
+
+			if ((pmd_mask & 0x1) == 0) continue;
+
+			ovfl_arg->ovfl_pmd      = (unsigned char )i;
+			ovfl_arg->ovfl_notify   = ovfl_notify & mask ? 1 : 0;
+			ovfl_arg->active_set    = 0;
+			ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */
+			ovfl_arg->smpl_pmds[0]  = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0];
+
+			ovfl_arg->pmd_value      = ctx->ctx_pmds[i].val;
+			ovfl_arg->pmd_last_reset = ctx->ctx_pmds[i].lval;
+			ovfl_arg->pmd_eventid    = ctx->ctx_pmds[i].eventid;
+
+			/*
+		 	 * copy values of pmds of interest. Sampling format may copy them
+		 	 * into sampling buffer.
+		 	 */
+			if (smpl_pmds) {
+				for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) {
+					if ((smpl_pmds & 0x1) == 0) continue;
+					ovfl_arg->smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ?  pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j);
+					DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg->smpl_pmds_values[k-1]));
+				}
+			}
+
+			pfm_stats[this_cpu].pfm_smpl_handler_calls++;
+
+			start_cycles = ia64_get_itc();
+
+			/*
+		 	 * call custom buffer format record (handler) routine
+		 	 */
+			ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, ovfl_arg, regs, tstamp);
+
+			end_cycles = ia64_get_itc();
+
+			/*
+			 * For those controls, we take the union because they have
+			 * an all or nothing behavior.
+			 */
+			ovfl_ctrl.bits.notify_user     |= ovfl_arg->ovfl_ctrl.bits.notify_user;
+			ovfl_ctrl.bits.block_task      |= ovfl_arg->ovfl_ctrl.bits.block_task;
+			ovfl_ctrl.bits.mask_monitoring |= ovfl_arg->ovfl_ctrl.bits.mask_monitoring;
+			/*
+			 * build the bitmask of pmds to reset now
+			 */
+			if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask;
+
+			pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles;
+		}
+		/*
+		 * when the module cannot handle the rest of the overflows, we abort right here
+		 */
+		if (ret && pmd_mask) {
+			DPRINT(("handler aborts leftover ovfl_pmds=0x%lx\n",
+				pmd_mask<<PMU_FIRST_COUNTER));
+		}
+		/*
+		 * remove the pmds we reset now from the set of pmds to reset in pfm_restart()
+		 */
+		ovfl_pmds &= ~reset_pmds;
+	} else {
+		/*
+		 * when no sampling module is used, then the default
+		 * is to notify on overflow if requested by user
+		 */
+		ovfl_ctrl.bits.notify_user     = ovfl_notify ? 1 : 0;
+		ovfl_ctrl.bits.block_task      = ovfl_notify ? 1 : 0;
+		ovfl_ctrl.bits.mask_monitoring = ovfl_notify ? 1 : 0; /* XXX: change for saturation */
+		ovfl_ctrl.bits.reset_ovfl_pmds = ovfl_notify ? 0 : 1;
+		/*
+		 * if needed, we reset all overflowed pmds
+		 */
+		if (ovfl_notify == 0) reset_pmds = ovfl_pmds;
+	}
+
+	DPRINT_ovfl(("ovfl_pmds=0x%lx reset_pmds=0x%lx\n", ovfl_pmds, reset_pmds));
+
+	/*
+	 * reset the requested PMD registers using the short reset values
+	 */
+	if (reset_pmds) {
+		unsigned long bm = reset_pmds;
+		pfm_reset_regs(ctx, &bm, PFM_PMD_SHORT_RESET);
+	}
+
+	if (ovfl_notify && ovfl_ctrl.bits.notify_user) {
+		/*
+		 * keep track of what to reset when unblocking
+		 */
+		ctx->ctx_ovfl_regs[0] = ovfl_pmds;
+
+		/*
+		 * check for blocking context 
+		 */
+		if (CTX_OVFL_NOBLOCK(ctx) == 0 && ovfl_ctrl.bits.block_task) {
+
+			ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCK;
+
+			/*
+			 * set the perfmon specific checking pending work for the task
+			 */
+			PFM_SET_WORK_PENDING(task, 1);
+
+			/*
+			 * when coming from ctxsw, current still points to the
+			 * previous task, therefore we must work with task and not current.
+			 */
+			set_notify_resume(task);
+		}
+		/*
+		 * defer until state is changed (shorten spin window). the context is locked
+		 * anyway, so the signal receiver would come spin for nothing.
+		 */
+		must_notify = 1;
+	}
+
+	DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n",
+			GET_PMU_OWNER() ? task_pid_nr(GET_PMU_OWNER()) : -1,
+			PFM_GET_WORK_PENDING(task),
+			ctx->ctx_fl_trap_reason,
+			ovfl_pmds,
+			ovfl_notify,
+			ovfl_ctrl.bits.mask_monitoring ? 1 : 0));
+	/*
+	 * in case monitoring must be stopped, we toggle the psr bits
+	 */
+	if (ovfl_ctrl.bits.mask_monitoring) {
+		pfm_mask_monitoring(task);
+		ctx->ctx_state = PFM_CTX_MASKED;
+		ctx->ctx_fl_can_restart = 1;
+	}
+
+	/*
+	 * send notification now
+	 */
+	if (must_notify) pfm_ovfl_notify_user(ctx, ovfl_notify);
+
+	return;
+
+sanity_check:
+	printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n",
+			smp_processor_id(),
+			task ? task_pid_nr(task) : -1,
+			pmc0);
+	return;
+
+stop_monitoring:
+	/*
+	 * in SMP, zombie context is never restored but reclaimed in pfm_load_regs().
+	 * Moreover, zombies are also reclaimed in pfm_save_regs(). Therefore we can
+	 * come here as zombie only if the task is the current task. In which case, we
+	 * can access the PMU  hardware directly.
+	 *
+	 * Note that zombies do have PM_VALID set. So here we do the minimal.
+	 *
+	 * In case the context was zombified it could not be reclaimed at the time
+	 * the monitoring program exited. At this point, the PMU reservation has been
+	 * returned, the sampiing buffer has been freed. We must convert this call
+	 * into a spurious interrupt. However, we must also avoid infinite overflows
+	 * by stopping monitoring for this task. We can only come here for a per-task
+	 * context. All we need to do is to stop monitoring using the psr bits which
+	 * are always task private. By re-enabling secure montioring, we ensure that
+	 * the monitored task will not be able to re-activate monitoring.
+	 * The task will eventually be context switched out, at which point the context
+	 * will be reclaimed (that includes releasing ownership of the PMU).
+	 *
+	 * So there might be a window of time where the number of per-task session is zero
+	 * yet one PMU might have a owner and get at most one overflow interrupt for a zombie
+	 * context. This is safe because if a per-task session comes in, it will push this one
+	 * out and by the virtue on pfm_save_regs(), this one will disappear. If a system wide
+	 * session is force on that CPU, given that we use task pinning, pfm_save_regs() will
+	 * also push our zombie context out.
+	 *
+	 * Overall pretty hairy stuff....
+	 */
+	DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task_pid_nr(task): -1));
+	pfm_clear_psr_up();
+	ia64_psr(regs)->up = 0;
+	ia64_psr(regs)->sp = 1;
+	return;
+}
+
+static int
+pfm_do_interrupt_handler(void *arg, struct pt_regs *regs)
+{
+	struct task_struct *task;
+	pfm_context_t *ctx;
+	unsigned long flags;
+	u64 pmc0;
+	int this_cpu = smp_processor_id();
+	int retval = 0;
+
+	pfm_stats[this_cpu].pfm_ovfl_intr_count++;
+
+	/*
+	 * srlz.d done before arriving here
+	 */
+	pmc0 = ia64_get_pmc(0);
+
+	task = GET_PMU_OWNER();
+	ctx  = GET_PMU_CTX();
+
+	/*
+	 * if we have some pending bits set
+	 * assumes : if any PMC0.bit[63-1] is set, then PMC0.fr = 1
+	 */
+	if (PMC0_HAS_OVFL(pmc0) && task) {
+		/*
+		 * we assume that pmc0.fr is always set here
+		 */
+
+		/* sanity check */
+		if (!ctx) goto report_spurious1;
+
+		if (ctx->ctx_fl_system == 0 && (task->thread.flags & IA64_THREAD_PM_VALID) == 0) 
+			goto report_spurious2;
+
+		PROTECT_CTX_NOPRINT(ctx, flags);
+
+		pfm_overflow_handler(task, ctx, pmc0, regs);
+
+		UNPROTECT_CTX_NOPRINT(ctx, flags);
+
+	} else {
+		pfm_stats[this_cpu].pfm_spurious_ovfl_intr_count++;
+		retval = -1;
+	}
+	/*
+	 * keep it unfrozen at all times
+	 */
+	pfm_unfreeze_pmu();
+
+	return retval;
+
+report_spurious1:
+	printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n",
+		this_cpu, task_pid_nr(task));
+	pfm_unfreeze_pmu();
+	return -1;
+report_spurious2:
+	printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n", 
+		this_cpu, 
+		task_pid_nr(task));
+	pfm_unfreeze_pmu();
+	return -1;
+}
+
+static irqreturn_t
+pfm_interrupt_handler(int irq, void *arg)
+{
+	unsigned long start_cycles, total_cycles;
+	unsigned long min, max;
+	int this_cpu;
+	int ret;
+	struct pt_regs *regs = get_irq_regs();
+
+	this_cpu = get_cpu();
+	if (likely(!pfm_alt_intr_handler)) {
+		min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min;
+		max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max;
+
+		start_cycles = ia64_get_itc();
+
+		ret = pfm_do_interrupt_handler(arg, regs);
+
+		total_cycles = ia64_get_itc();
+
+		/*
+		 * don't measure spurious interrupts
+		 */
+		if (likely(ret == 0)) {
+			total_cycles -= start_cycles;
+
+			if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles;
+			if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles;
+
+			pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles;
+		}
+	}
+	else {
+		(*pfm_alt_intr_handler->handler)(irq, arg, regs);
+	}
+
+	put_cpu();
+	return IRQ_HANDLED;
+}
+
+/*
+ * /proc/perfmon interface, for debug only
+ */
+
+#define PFM_PROC_SHOW_HEADER	((void *)(long)nr_cpu_ids+1)
+
+static void *
+pfm_proc_start(struct seq_file *m, loff_t *pos)
+{
+	if (*pos == 0) {
+		return PFM_PROC_SHOW_HEADER;
+	}
+
+	while (*pos <= nr_cpu_ids) {
+		if (cpu_online(*pos - 1)) {
+			return (void *)*pos;
+		}
+		++*pos;
+	}
+	return NULL;
+}
+
+static void *
+pfm_proc_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return pfm_proc_start(m, pos);
+}
+
+static void
+pfm_proc_stop(struct seq_file *m, void *v)
+{
+}
+
+static void
+pfm_proc_show_header(struct seq_file *m)
+{
+	struct list_head * pos;
+	pfm_buffer_fmt_t * entry;
+	unsigned long flags;
+
+ 	seq_printf(m,
+		"perfmon version           : %u.%u\n"
+		"model                     : %s\n"
+		"fastctxsw                 : %s\n"
+		"expert mode               : %s\n"
+		"ovfl_mask                 : 0x%lx\n"
+		"PMU flags                 : 0x%x\n",
+		PFM_VERSION_MAJ, PFM_VERSION_MIN,
+		pmu_conf->pmu_name,
+		pfm_sysctl.fastctxsw > 0 ? "Yes": "No",
+		pfm_sysctl.expert_mode > 0 ? "Yes": "No",
+		pmu_conf->ovfl_val,
+		pmu_conf->flags);
+
+  	LOCK_PFS(flags);
+
+ 	seq_printf(m,
+ 		"proc_sessions             : %u\n"
+ 		"sys_sessions              : %u\n"
+ 		"sys_use_dbregs            : %u\n"
+ 		"ptrace_use_dbregs         : %u\n",
+ 		pfm_sessions.pfs_task_sessions,
+ 		pfm_sessions.pfs_sys_sessions,
+ 		pfm_sessions.pfs_sys_use_dbregs,
+ 		pfm_sessions.pfs_ptrace_use_dbregs);
+
+  	UNLOCK_PFS(flags);
+
+	spin_lock(&pfm_buffer_fmt_lock);
+
+	list_for_each(pos, &pfm_buffer_fmt_list) {
+		entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
+		seq_printf(m, "format                    : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n",
+			entry->fmt_uuid[0],
+			entry->fmt_uuid[1],
+			entry->fmt_uuid[2],
+			entry->fmt_uuid[3],
+			entry->fmt_uuid[4],
+			entry->fmt_uuid[5],
+			entry->fmt_uuid[6],
+			entry->fmt_uuid[7],
+			entry->fmt_uuid[8],
+			entry->fmt_uuid[9],
+			entry->fmt_uuid[10],
+			entry->fmt_uuid[11],
+			entry->fmt_uuid[12],
+			entry->fmt_uuid[13],
+			entry->fmt_uuid[14],
+			entry->fmt_uuid[15],
+			entry->fmt_name);
+	}
+	spin_unlock(&pfm_buffer_fmt_lock);
+
+}
+
+static int
+pfm_proc_show(struct seq_file *m, void *v)
+{
+	unsigned long psr;
+	unsigned int i;
+	int cpu;
+
+	if (v == PFM_PROC_SHOW_HEADER) {
+		pfm_proc_show_header(m);
+		return 0;
+	}
+
+	/* show info for CPU (v - 1) */
+
+	cpu = (long)v - 1;
+	seq_printf(m,
+		"CPU%-2d overflow intrs      : %lu\n"
+		"CPU%-2d overflow cycles     : %lu\n"
+		"CPU%-2d overflow min        : %lu\n"
+		"CPU%-2d overflow max        : %lu\n"
+		"CPU%-2d smpl handler calls  : %lu\n"
+		"CPU%-2d smpl handler cycles : %lu\n"
+		"CPU%-2d spurious intrs      : %lu\n"
+		"CPU%-2d replay   intrs      : %lu\n"
+		"CPU%-2d syst_wide           : %d\n"
+		"CPU%-2d dcr_pp              : %d\n"
+		"CPU%-2d exclude idle        : %d\n"
+		"CPU%-2d owner               : %d\n"
+		"CPU%-2d context             : %p\n"
+		"CPU%-2d activations         : %lu\n",
+		cpu, pfm_stats[cpu].pfm_ovfl_intr_count,
+		cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles,
+		cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_min,
+		cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_max,
+		cpu, pfm_stats[cpu].pfm_smpl_handler_calls,
+		cpu, pfm_stats[cpu].pfm_smpl_handler_cycles,
+		cpu, pfm_stats[cpu].pfm_spurious_ovfl_intr_count,
+		cpu, pfm_stats[cpu].pfm_replay_ovfl_intr_count,
+		cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_SYST_WIDE ? 1 : 0,
+		cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_DCR_PP ? 1 : 0,
+		cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0,
+		cpu, pfm_get_cpu_data(pmu_owner, cpu) ? pfm_get_cpu_data(pmu_owner, cpu)->pid: -1,
+		cpu, pfm_get_cpu_data(pmu_ctx, cpu),
+		cpu, pfm_get_cpu_data(pmu_activation_number, cpu));
+
+	if (num_online_cpus() == 1 && pfm_sysctl.debug > 0) {
+
+		psr = pfm_get_psr();
+
+		ia64_srlz_d();
+
+		seq_printf(m, 
+			"CPU%-2d psr                 : 0x%lx\n"
+			"CPU%-2d pmc0                : 0x%lx\n", 
+			cpu, psr,
+			cpu, ia64_get_pmc(0));
+
+		for (i=0; PMC_IS_LAST(i) == 0;  i++) {
+			if (PMC_IS_COUNTING(i) == 0) continue;
+   			seq_printf(m, 
+				"CPU%-2d pmc%u                : 0x%lx\n"
+   				"CPU%-2d pmd%u                : 0x%lx\n", 
+				cpu, i, ia64_get_pmc(i),
+				cpu, i, ia64_get_pmd(i));
+  		}
+	}
+	return 0;
+}
+
+const struct seq_operations pfm_seq_ops = {
+	.start =	pfm_proc_start,
+ 	.next =		pfm_proc_next,
+ 	.stop =		pfm_proc_stop,
+ 	.show =		pfm_proc_show
+};
+
+static int
+pfm_proc_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &pfm_seq_ops);
+}
+
+
+/*
+ * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens
+ * during pfm_enable() hence before pfm_start(). We cannot assume monitoring
+ * is active or inactive based on mode. We must rely on the value in
+ * local_cpu_data->pfm_syst_info
+ */
+void
+pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin)
+{
+	struct pt_regs *regs;
+	unsigned long dcr;
+	unsigned long dcr_pp;
+
+	dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0;
+
+	/*
+	 * pid 0 is guaranteed to be the idle task. There is one such task with pid 0
+	 * on every CPU, so we can rely on the pid to identify the idle task.
+	 */
+	if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) {
+		regs = task_pt_regs(task);
+		ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0;
+		return;
+	}
+	/*
+	 * if monitoring has started
+	 */
+	if (dcr_pp) {
+		dcr = ia64_getreg(_IA64_REG_CR_DCR);
+		/*
+		 * context switching in?
+		 */
+		if (is_ctxswin) {
+			/* mask monitoring for the idle task */
+			ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP);
+			pfm_clear_psr_pp();
+			ia64_srlz_i();
+			return;
+		}
+		/*
+		 * context switching out
+		 * restore monitoring for next task
+		 *
+		 * Due to inlining this odd if-then-else construction generates
+		 * better code.
+		 */
+		ia64_setreg(_IA64_REG_CR_DCR, dcr |IA64_DCR_PP);
+		pfm_set_psr_pp();
+		ia64_srlz_i();
+	}
+}
+
+#ifdef CONFIG_SMP
+
+static void
+pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
+{
+	struct task_struct *task = ctx->ctx_task;
+
+	ia64_psr(regs)->up = 0;
+	ia64_psr(regs)->sp = 1;
+
+	if (GET_PMU_OWNER() == task) {
+		DPRINT(("cleared ownership for [%d]\n",
+					task_pid_nr(ctx->ctx_task)));
+		SET_PMU_OWNER(NULL, NULL);
+	}
+
+	/*
+	 * disconnect the task from the context and vice-versa
+	 */
+	PFM_SET_WORK_PENDING(task, 0);
+
+	task->thread.pfm_context  = NULL;
+	task->thread.flags       &= ~IA64_THREAD_PM_VALID;
+
+	DPRINT(("force cleanup for [%d]\n",  task_pid_nr(task)));
+}
+
+
+/*
+ * in 2.6, interrupts are masked when we come here and the runqueue lock is held
+ */
+void
+pfm_save_regs(struct task_struct *task)
+{
+	pfm_context_t *ctx;
+	unsigned long flags;
+	u64 psr;
+
+
+	ctx = PFM_GET_CTX(task);
+	if (ctx == NULL) return;
+
+	/*
+ 	 * we always come here with interrupts ALREADY disabled by
+ 	 * the scheduler. So we simply need to protect against concurrent
+	 * access, not CPU concurrency.
+	 */
+	flags = pfm_protect_ctx_ctxsw(ctx);
+
+	if (ctx->ctx_state == PFM_CTX_ZOMBIE) {
+		struct pt_regs *regs = task_pt_regs(task);
+
+		pfm_clear_psr_up();
+
+		pfm_force_cleanup(ctx, regs);
+
+		BUG_ON(ctx->ctx_smpl_hdr);
+
+		pfm_unprotect_ctx_ctxsw(ctx, flags);
+
+		pfm_context_free(ctx);
+		return;
+	}
+
+	/*
+	 * save current PSR: needed because we modify it
+	 */
+	ia64_srlz_d();
+	psr = pfm_get_psr();
+
+	BUG_ON(psr & (IA64_PSR_I));
+
+	/*
+	 * stop monitoring:
+	 * This is the last instruction which may generate an overflow
+	 *
+	 * We do not need to set psr.sp because, it is irrelevant in kernel.
+	 * It will be restored from ipsr when going back to user level
+	 */
+	pfm_clear_psr_up();
+
+	/*
+	 * keep a copy of psr.up (for reload)
+	 */
+	ctx->ctx_saved_psr_up = psr & IA64_PSR_UP;
+
+	/*
+	 * release ownership of this PMU.
+	 * PM interrupts are masked, so nothing
+	 * can happen.
+	 */
+	SET_PMU_OWNER(NULL, NULL);
+
+	/*
+	 * we systematically save the PMD as we have no
+	 * guarantee we will be schedule at that same
+	 * CPU again.
+	 */
+	pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]);
+
+	/*
+	 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
+	 * we will need it on the restore path to check
+	 * for pending overflow.
+	 */
+	ctx->th_pmcs[0] = ia64_get_pmc(0);
+
+	/*
+	 * unfreeze PMU if had pending overflows
+	 */
+	if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
+
+	/*
+	 * finally, allow context access.
+	 * interrupts will still be masked after this call.
+	 */
+	pfm_unprotect_ctx_ctxsw(ctx, flags);
+}
+
+#else /* !CONFIG_SMP */
+void
+pfm_save_regs(struct task_struct *task)
+{
+	pfm_context_t *ctx;
+	u64 psr;
+
+	ctx = PFM_GET_CTX(task);
+	if (ctx == NULL) return;
+
+	/*
+	 * save current PSR: needed because we modify it
+	 */
+	psr = pfm_get_psr();
+
+	BUG_ON(psr & (IA64_PSR_I));
+
+	/*
+	 * stop monitoring:
+	 * This is the last instruction which may generate an overflow
+	 *
+	 * We do not need to set psr.sp because, it is irrelevant in kernel.
+	 * It will be restored from ipsr when going back to user level
+	 */
+	pfm_clear_psr_up();
+
+	/*
+	 * keep a copy of psr.up (for reload)
+	 */
+	ctx->ctx_saved_psr_up = psr & IA64_PSR_UP;
+}
+
+static void
+pfm_lazy_save_regs (struct task_struct *task)
+{
+	pfm_context_t *ctx;
+	unsigned long flags;
+
+	{ u64 psr  = pfm_get_psr();
+	  BUG_ON(psr & IA64_PSR_UP);
+	}
+
+	ctx = PFM_GET_CTX(task);
+
+	/*
+	 * we need to mask PMU overflow here to
+	 * make sure that we maintain pmc0 until
+	 * we save it. overflow interrupts are
+	 * treated as spurious if there is no
+	 * owner.
+	 *
+	 * XXX: I don't think this is necessary
+	 */
+	PROTECT_CTX(ctx,flags);
+
+	/*
+	 * release ownership of this PMU.
+	 * must be done before we save the registers.
+	 *
+	 * after this call any PMU interrupt is treated
+	 * as spurious.
+	 */
+	SET_PMU_OWNER(NULL, NULL);
+
+	/*
+	 * save all the pmds we use
+	 */
+	pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]);
+
+	/*
+	 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
+	 * it is needed to check for pended overflow
+	 * on the restore path
+	 */
+	ctx->th_pmcs[0] = ia64_get_pmc(0);
+
+	/*
+	 * unfreeze PMU if had pending overflows
+	 */
+	if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
+
+	/*
+	 * now get can unmask PMU interrupts, they will
+	 * be treated as purely spurious and we will not
+	 * lose any information
+	 */
+	UNPROTECT_CTX(ctx,flags);
+}
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_SMP
+/*
+ * in 2.6, interrupts are masked when we come here and the runqueue lock is held
+ */
+void
+pfm_load_regs (struct task_struct *task)
+{
+	pfm_context_t *ctx;
+	unsigned long pmc_mask = 0UL, pmd_mask = 0UL;
+	unsigned long flags;
+	u64 psr, psr_up;
+	int need_irq_resend;
+
+	ctx = PFM_GET_CTX(task);
+	if (unlikely(ctx == NULL)) return;
+
+	BUG_ON(GET_PMU_OWNER());
+
+	/*
+	 * possible on unload
+	 */
+	if (unlikely((task->thread.flags & IA64_THREAD_PM_VALID) == 0)) return;
+
+	/*
+ 	 * we always come here with interrupts ALREADY disabled by
+ 	 * the scheduler. So we simply need to protect against concurrent
+	 * access, not CPU concurrency.
+	 */
+	flags = pfm_protect_ctx_ctxsw(ctx);
+	psr   = pfm_get_psr();
+
+	need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND;
+
+	BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
+	BUG_ON(psr & IA64_PSR_I);
+
+	if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) {
+		struct pt_regs *regs = task_pt_regs(task);
+
+		BUG_ON(ctx->ctx_smpl_hdr);
+
+		pfm_force_cleanup(ctx, regs);
+
+		pfm_unprotect_ctx_ctxsw(ctx, flags);
+
+		/*
+		 * this one (kmalloc'ed) is fine with interrupts disabled
+		 */
+		pfm_context_free(ctx);
+
+		return;
+	}
+
+	/*
+	 * we restore ALL the debug registers to avoid picking up
+	 * stale state.
+	 */
+	if (ctx->ctx_fl_using_dbreg) {
+		pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
+		pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
+	}
+	/*
+	 * retrieve saved psr.up
+	 */
+	psr_up = ctx->ctx_saved_psr_up;
+
+	/*
+	 * if we were the last user of the PMU on that CPU,
+	 * then nothing to do except restore psr
+	 */
+	if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) {
+
+		/*
+		 * retrieve partial reload masks (due to user modifications)
+		 */
+		pmc_mask = ctx->ctx_reload_pmcs[0];
+		pmd_mask = ctx->ctx_reload_pmds[0];
+
+	} else {
+		/*
+	 	 * To avoid leaking information to the user level when psr.sp=0,
+	 	 * we must reload ALL implemented pmds (even the ones we don't use).
+	 	 * In the kernel we only allow PFM_READ_PMDS on registers which
+	 	 * we initialized or requested (sampling) so there is no risk there.
+	 	 */
+		pmd_mask = pfm_sysctl.fastctxsw ?  ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0];
+
+		/*
+	 	 * ALL accessible PMCs are systematically reloaded, unused registers
+	 	 * get their default (from pfm_reset_pmu_state()) values to avoid picking
+	 	 * up stale configuration.
+	 	 *
+	 	 * PMC0 is never in the mask. It is always restored separately.
+	 	 */
+		pmc_mask = ctx->ctx_all_pmcs[0];
+	}
+	/*
+	 * when context is MASKED, we will restore PMC with plm=0
+	 * and PMD with stale information, but that's ok, nothing
+	 * will be captured.
+	 *
+	 * XXX: optimize here
+	 */
+	if (pmd_mask) pfm_restore_pmds(ctx->th_pmds, pmd_mask);
+	if (pmc_mask) pfm_restore_pmcs(ctx->th_pmcs, pmc_mask);
+
+	/*
+	 * check for pending overflow at the time the state
+	 * was saved.
+	 */
+	if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) {
+		/*
+		 * reload pmc0 with the overflow information
+		 * On McKinley PMU, this will trigger a PMU interrupt
+		 */
+		ia64_set_pmc(0, ctx->th_pmcs[0]);
+		ia64_srlz_d();
+		ctx->th_pmcs[0] = 0UL;
+
+		/*
+		 * will replay the PMU interrupt
+		 */
+		if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR);
+
+		pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++;
+	}
+
+	/*
+	 * we just did a reload, so we reset the partial reload fields
+	 */
+	ctx->ctx_reload_pmcs[0] = 0UL;
+	ctx->ctx_reload_pmds[0] = 0UL;
+
+	SET_LAST_CPU(ctx, smp_processor_id());
+
+	/*
+	 * dump activation value for this PMU
+	 */
+	INC_ACTIVATION();
+	/*
+	 * record current activation for this context
+	 */
+	SET_ACTIVATION(ctx);
+
+	/*
+	 * establish new ownership. 
+	 */
+	SET_PMU_OWNER(task, ctx);
+
+	/*
+	 * restore the psr.up bit. measurement
+	 * is active again.
+	 * no PMU interrupt can happen at this point
+	 * because we still have interrupts disabled.
+	 */
+	if (likely(psr_up)) pfm_set_psr_up();
+
+	/*
+	 * allow concurrent access to context
+	 */
+	pfm_unprotect_ctx_ctxsw(ctx, flags);
+}
+#else /*  !CONFIG_SMP */
+/*
+ * reload PMU state for UP kernels
+ * in 2.5 we come here with interrupts disabled
+ */
+void
+pfm_load_regs (struct task_struct *task)
+{
+	pfm_context_t *ctx;
+	struct task_struct *owner;
+	unsigned long pmd_mask, pmc_mask;
+	u64 psr, psr_up;
+	int need_irq_resend;
+
+	owner = GET_PMU_OWNER();
+	ctx   = PFM_GET_CTX(task);
+	psr   = pfm_get_psr();
+
+	BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
+	BUG_ON(psr & IA64_PSR_I);
+
+	/*
+	 * we restore ALL the debug registers to avoid picking up
+	 * stale state.
+	 *
+	 * This must be done even when the task is still the owner
+	 * as the registers may have been modified via ptrace()
+	 * (not perfmon) by the previous task.
+	 */
+	if (ctx->ctx_fl_using_dbreg) {
+		pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
+		pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
+	}
+
+	/*
+	 * retrieved saved psr.up
+	 */
+	psr_up = ctx->ctx_saved_psr_up;
+	need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND;
+
+	/*
+	 * short path, our state is still there, just
+	 * need to restore psr and we go
+	 *
+	 * we do not touch either PMC nor PMD. the psr is not touched
+	 * by the overflow_handler. So we are safe w.r.t. to interrupt
+	 * concurrency even without interrupt masking.
+	 */
+	if (likely(owner == task)) {
+		if (likely(psr_up)) pfm_set_psr_up();
+		return;
+	}
+
+	/*
+	 * someone else is still using the PMU, first push it out and
+	 * then we'll be able to install our stuff !
+	 *
+	 * Upon return, there will be no owner for the current PMU
+	 */
+	if (owner) pfm_lazy_save_regs(owner);
+
+	/*
+	 * To avoid leaking information to the user level when psr.sp=0,
+	 * we must reload ALL implemented pmds (even the ones we don't use).
+	 * In the kernel we only allow PFM_READ_PMDS on registers which
+	 * we initialized or requested (sampling) so there is no risk there.
+	 */
+	pmd_mask = pfm_sysctl.fastctxsw ?  ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0];
+
+	/*
+	 * ALL accessible PMCs are systematically reloaded, unused registers
+	 * get their default (from pfm_reset_pmu_state()) values to avoid picking
+	 * up stale configuration.
+	 *
+	 * PMC0 is never in the mask. It is always restored separately
+	 */
+	pmc_mask = ctx->ctx_all_pmcs[0];
+
+	pfm_restore_pmds(ctx->th_pmds, pmd_mask);
+	pfm_restore_pmcs(ctx->th_pmcs, pmc_mask);
+
+	/*
+	 * check for pending overflow at the time the state
+	 * was saved.
+	 */
+	if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) {
+		/*
+		 * reload pmc0 with the overflow information
+		 * On McKinley PMU, this will trigger a PMU interrupt
+		 */
+		ia64_set_pmc(0, ctx->th_pmcs[0]);
+		ia64_srlz_d();
+
+		ctx->th_pmcs[0] = 0UL;
+
+		/*
+		 * will replay the PMU interrupt
+		 */
+		if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR);
+
+		pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++;
+	}
+
+	/*
+	 * establish new ownership. 
+	 */
+	SET_PMU_OWNER(task, ctx);
+
+	/*
+	 * restore the psr.up bit. measurement
+	 * is active again.
+	 * no PMU interrupt can happen at this point
+	 * because we still have interrupts disabled.
+	 */
+	if (likely(psr_up)) pfm_set_psr_up();
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * this function assumes monitoring is stopped
+ */
+static void
+pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
+{
+	u64 pmc0;
+	unsigned long mask2, val, pmd_val, ovfl_val;
+	int i, can_access_pmu = 0;
+	int is_self;
+
+	/*
+	 * is the caller the task being monitored (or which initiated the
+	 * session for system wide measurements)
+	 */
+	is_self = ctx->ctx_task == task ? 1 : 0;
+
+	/*
+	 * can access PMU is task is the owner of the PMU state on the current CPU
+	 * or if we are running on the CPU bound to the context in system-wide mode
+	 * (that is not necessarily the task the context is attached to in this mode).
+	 * In system-wide we always have can_access_pmu true because a task running on an
+	 * invalid processor is flagged earlier in the call stack (see pfm_stop).
+	 */
+	can_access_pmu = (GET_PMU_OWNER() == task) || (ctx->ctx_fl_system && ctx->ctx_cpu == smp_processor_id());
+	if (can_access_pmu) {
+		/*
+		 * Mark the PMU as not owned
+		 * This will cause the interrupt handler to do nothing in case an overflow
+		 * interrupt was in-flight
+		 * This also guarantees that pmc0 will contain the final state
+		 * It virtually gives us full control on overflow processing from that point
+		 * on.
+		 */
+		SET_PMU_OWNER(NULL, NULL);
+		DPRINT(("releasing ownership\n"));
+
+		/*
+		 * read current overflow status:
+		 *
+		 * we are guaranteed to read the final stable state
+		 */
+		ia64_srlz_d();
+		pmc0 = ia64_get_pmc(0); /* slow */
+
+		/*
+		 * reset freeze bit, overflow status information destroyed
+		 */
+		pfm_unfreeze_pmu();
+	} else {
+		pmc0 = ctx->th_pmcs[0];
+		/*
+		 * clear whatever overflow status bits there were
+		 */
+		ctx->th_pmcs[0] = 0;
+	}
+	ovfl_val = pmu_conf->ovfl_val;
+	/*
+	 * we save all the used pmds
+	 * we take care of overflows for counting PMDs
+	 *
+	 * XXX: sampling situation is not taken into account here
+	 */
+	mask2 = ctx->ctx_used_pmds[0];
+
+	DPRINT(("is_self=%d ovfl_val=0x%lx mask2=0x%lx\n", is_self, ovfl_val, mask2));
+
+	for (i = 0; mask2; i++, mask2>>=1) {
+
+		/* skip non used pmds */
+		if ((mask2 & 0x1) == 0) continue;
+
+		/*
+		 * can access PMU always true in system wide mode
+		 */
+		val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : ctx->th_pmds[i];
+
+		if (PMD_IS_COUNTING(i)) {
+			DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n",
+				task_pid_nr(task),
+				i,
+				ctx->ctx_pmds[i].val,
+				val & ovfl_val));
+
+			/*
+			 * we rebuild the full 64 bit value of the counter
+			 */
+			val = ctx->ctx_pmds[i].val + (val & ovfl_val);
+
+			/*
+			 * now everything is in ctx_pmds[] and we need
+			 * to clear the saved context from save_regs() such that
+			 * pfm_read_pmds() gets the correct value
+			 */
+			pmd_val = 0UL;
+
+			/*
+			 * take care of overflow inline
+			 */
+			if (pmc0 & (1UL << i)) {
+				val += 1 + ovfl_val;
+				DPRINT(("[%d] pmd[%d] overflowed\n", task_pid_nr(task), i));
+			}
+		}
+
+		DPRINT(("[%d] ctx_pmd[%d]=0x%lx  pmd_val=0x%lx\n", task_pid_nr(task), i, val, pmd_val));
+
+		if (is_self) ctx->th_pmds[i] = pmd_val;
+
+		ctx->ctx_pmds[i].val = val;
+	}
+}
+
+static struct irqaction perfmon_irqaction = {
+	.handler = pfm_interrupt_handler,
+	.flags   = IRQF_DISABLED,
+	.name    = "perfmon"
+};
+
+static void
+pfm_alt_save_pmu_state(void *data)
+{
+	struct pt_regs *regs;
+
+	regs = task_pt_regs(current);
+
+	DPRINT(("called\n"));
+
+	/*
+	 * should not be necessary but
+	 * let's take not risk
+	 */
+	pfm_clear_psr_up();
+	pfm_clear_psr_pp();
+	ia64_psr(regs)->pp = 0;
+
+	/*
+	 * This call is required
+	 * May cause a spurious interrupt on some processors
+	 */
+	pfm_freeze_pmu();
+
+	ia64_srlz_d();
+}
+
+void
+pfm_alt_restore_pmu_state(void *data)
+{
+	struct pt_regs *regs;
+
+	regs = task_pt_regs(current);
+
+	DPRINT(("called\n"));
+
+	/*
+	 * put PMU back in state expected
+	 * by perfmon
+	 */
+	pfm_clear_psr_up();
+	pfm_clear_psr_pp();
+	ia64_psr(regs)->pp = 0;
+
+	/*
+	 * perfmon runs with PMU unfrozen at all times
+	 */
+	pfm_unfreeze_pmu();
+
+	ia64_srlz_d();
+}
+
+int
+pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
+{
+	int ret, i;
+	int reserve_cpu;
+
+	/* some sanity checks */
+	if (hdl == NULL || hdl->handler == NULL) return -EINVAL;
+
+	/* do the easy test first */
+	if (pfm_alt_intr_handler) return -EBUSY;
+
+	/* one at a time in the install or remove, just fail the others */
+	if (!spin_trylock(&pfm_alt_install_check)) {
+		return -EBUSY;
+	}
+
+	/* reserve our session */
+	for_each_online_cpu(reserve_cpu) {
+		ret = pfm_reserve_session(NULL, 1, reserve_cpu);
+		if (ret) goto cleanup_reserve;
+	}
+
+	/* save the current system wide pmu states */
+	ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 1);
+	if (ret) {
+		DPRINT(("on_each_cpu() failed: %d\n", ret));
+		goto cleanup_reserve;
+	}
+
+	/* officially change to the alternate interrupt handler */
+	pfm_alt_intr_handler = hdl;
+
+	spin_unlock(&pfm_alt_install_check);
+
+	return 0;
+
+cleanup_reserve:
+	for_each_online_cpu(i) {
+		/* don't unreserve more than we reserved */
+		if (i >= reserve_cpu) break;
+
+		pfm_unreserve_session(NULL, 1, i);
+	}
+
+	spin_unlock(&pfm_alt_install_check);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pfm_install_alt_pmu_interrupt);
+
+int
+pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
+{
+	int i;
+	int ret;
+
+	if (hdl == NULL) return -EINVAL;
+
+	/* cannot remove someone else's handler! */
+	if (pfm_alt_intr_handler != hdl) return -EINVAL;
+
+	/* one at a time in the install or remove, just fail the others */
+	if (!spin_trylock(&pfm_alt_install_check)) {
+		return -EBUSY;
+	}
+
+	pfm_alt_intr_handler = NULL;
+
+	ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1);
+	if (ret) {
+		DPRINT(("on_each_cpu() failed: %d\n", ret));
+	}
+
+	for_each_online_cpu(i) {
+		pfm_unreserve_session(NULL, 1, i);
+	}
+
+	spin_unlock(&pfm_alt_install_check);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pfm_remove_alt_pmu_interrupt);
+
+/*
+ * perfmon initialization routine, called from the initcall() table
+ */
+static int init_pfm_fs(void);
+
+static int __init
+pfm_probe_pmu(void)
+{
+	pmu_config_t **p;
+	int family;
+
+	family = local_cpu_data->family;
+	p      = pmu_confs;
+
+	while(*p) {
+		if ((*p)->probe) {
+			if ((*p)->probe() == 0) goto found;
+		} else if ((*p)->pmu_family == family || (*p)->pmu_family == 0xff) {
+			goto found;
+		}
+		p++;
+	}
+	return -1;
+found:
+	pmu_conf = *p;
+	return 0;
+}
+
+static const struct file_operations pfm_proc_fops = {
+	.open		= pfm_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+int __init
+pfm_init(void)
+{
+	unsigned int n, n_counters, i;
+
+	printk("perfmon: version %u.%u IRQ %u\n",
+		PFM_VERSION_MAJ,
+		PFM_VERSION_MIN,
+		IA64_PERFMON_VECTOR);
+
+	if (pfm_probe_pmu()) {
+		printk(KERN_INFO "perfmon: disabled, there is no support for processor family %d\n", 
+				local_cpu_data->family);
+		return -ENODEV;
+	}
+
+	/*
+	 * compute the number of implemented PMD/PMC from the
+	 * description tables
+	 */
+	n = 0;
+	for (i=0; PMC_IS_LAST(i) == 0;  i++) {
+		if (PMC_IS_IMPL(i) == 0) continue;
+		pmu_conf->impl_pmcs[i>>6] |= 1UL << (i&63);
+		n++;
+	}
+	pmu_conf->num_pmcs = n;
+
+	n = 0; n_counters = 0;
+	for (i=0; PMD_IS_LAST(i) == 0;  i++) {
+		if (PMD_IS_IMPL(i) == 0) continue;
+		pmu_conf->impl_pmds[i>>6] |= 1UL << (i&63);
+		n++;
+		if (PMD_IS_COUNTING(i)) n_counters++;
+	}
+	pmu_conf->num_pmds      = n;
+	pmu_conf->num_counters  = n_counters;
+
+	/*
+	 * sanity checks on the number of debug registers
+	 */
+	if (pmu_conf->use_rr_dbregs) {
+		if (pmu_conf->num_ibrs > IA64_NUM_DBG_REGS) {
+			printk(KERN_INFO "perfmon: unsupported number of code debug registers (%u)\n", pmu_conf->num_ibrs);
+			pmu_conf = NULL;
+			return -1;
+		}
+		if (pmu_conf->num_dbrs > IA64_NUM_DBG_REGS) {
+			printk(KERN_INFO "perfmon: unsupported number of data debug registers (%u)\n", pmu_conf->num_ibrs);
+			pmu_conf = NULL;
+			return -1;
+		}
+	}
+
+	printk("perfmon: %s PMU detected, %u PMCs, %u PMDs, %u counters (%lu bits)\n",
+	       pmu_conf->pmu_name,
+	       pmu_conf->num_pmcs,
+	       pmu_conf->num_pmds,
+	       pmu_conf->num_counters,
+	       ffz(pmu_conf->ovfl_val));
+
+	/* sanity check */
+	if (pmu_conf->num_pmds >= PFM_NUM_PMD_REGS || pmu_conf->num_pmcs >= PFM_NUM_PMC_REGS) {
+		printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n");
+		pmu_conf = NULL;
+		return -1;
+	}
+
+	/*
+	 * create /proc/perfmon (mostly for debugging purposes)
+	 */
+	perfmon_dir = proc_create("perfmon", S_IRUGO, NULL, &pfm_proc_fops);
+	if (perfmon_dir == NULL) {
+		printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n");
+		pmu_conf = NULL;
+		return -1;
+	}
+
+	/*
+	 * create /proc/sys/kernel/perfmon (for debugging purposes)
+	 */
+	pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root);
+
+	/*
+	 * initialize all our spinlocks
+	 */
+	spin_lock_init(&pfm_sessions.pfs_lock);
+	spin_lock_init(&pfm_buffer_fmt_lock);
+
+	init_pfm_fs();
+
+	for(i=0; i < NR_CPUS; i++) pfm_stats[i].pfm_ovfl_intr_cycles_min = ~0UL;
+
+	return 0;
+}
+
+__initcall(pfm_init);
+
+/*
+ * this function is called before pfm_init()
+ */
+void
+pfm_init_percpu (void)
+{
+	static int first_time=1;
+	/*
+	 * make sure no measurement is active
+	 * (may inherit programmed PMCs from EFI).
+	 */
+	pfm_clear_psr_pp();
+	pfm_clear_psr_up();
+
+	/*
+	 * we run with the PMU not frozen at all times
+	 */
+	pfm_unfreeze_pmu();
+
+	if (first_time) {
+		register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
+		first_time=0;
+	}
+
+	ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
+	ia64_srlz_d();
+}
+
+/*
+ * used for debug purposes only
+ */
+void
+dump_pmu_state(const char *from)
+{
+	struct task_struct *task;
+	struct pt_regs *regs;
+	pfm_context_t *ctx;
+	unsigned long psr, dcr, info, flags;
+	int i, this_cpu;
+
+	local_irq_save(flags);
+
+	this_cpu = smp_processor_id();
+	regs     = task_pt_regs(current);
+	info     = PFM_CPUINFO_GET();
+	dcr      = ia64_getreg(_IA64_REG_CR_DCR);
+
+	if (info == 0 && ia64_psr(regs)->pp == 0 && (dcr & IA64_DCR_PP) == 0) {
+		local_irq_restore(flags);
+		return;
+	}
+
+	printk("CPU%d from %s() current [%d] iip=0x%lx %s\n", 
+		this_cpu, 
+		from, 
+		task_pid_nr(current),
+		regs->cr_iip,
+		current->comm);
+
+	task = GET_PMU_OWNER();
+	ctx  = GET_PMU_CTX();
+
+	printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task_pid_nr(task) : -1, ctx);
+
+	psr = pfm_get_psr();
+
+	printk("->CPU%d pmc0=0x%lx psr.pp=%d psr.up=%d dcr.pp=%d syst_info=0x%lx user_psr.up=%d user_psr.pp=%d\n", 
+		this_cpu,
+		ia64_get_pmc(0),
+		psr & IA64_PSR_PP ? 1 : 0,
+		psr & IA64_PSR_UP ? 1 : 0,
+		dcr & IA64_DCR_PP ? 1 : 0,
+		info,
+		ia64_psr(regs)->up,
+		ia64_psr(regs)->pp);
+
+	ia64_psr(regs)->up = 0;
+	ia64_psr(regs)->pp = 0;
+
+	for (i=1; PMC_IS_LAST(i) == 0; i++) {
+		if (PMC_IS_IMPL(i) == 0) continue;
+		printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, ctx->th_pmcs[i]);
+	}
+
+	for (i=1; PMD_IS_LAST(i) == 0; i++) {
+		if (PMD_IS_IMPL(i) == 0) continue;
+		printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, ctx->th_pmds[i]);
+	}
+
+	if (ctx) {
+		printk("->CPU%d ctx_state=%d vaddr=%p addr=%p fd=%d ctx_task=[%d] saved_psr_up=0x%lx\n",
+				this_cpu,
+				ctx->ctx_state,
+				ctx->ctx_smpl_vaddr,
+				ctx->ctx_smpl_hdr,
+				ctx->ctx_msgq_head,
+				ctx->ctx_msgq_tail,
+				ctx->ctx_saved_psr_up);
+	}
+	local_irq_restore(flags);
+}
+
+/*
+ * called from process.c:copy_thread(). task is new child.
+ */
+void
+pfm_inherit(struct task_struct *task, struct pt_regs *regs)
+{
+	struct thread_struct *thread;
+
+	DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task_pid_nr(task)));
+
+	thread = &task->thread;
+
+	/*
+	 * cut links inherited from parent (current)
+	 */
+	thread->pfm_context = NULL;
+
+	PFM_SET_WORK_PENDING(task, 0);
+
+	/*
+	 * the psr bits are already set properly in copy_threads()
+	 */
+}
+#else  /* !CONFIG_PERFMON */
+asmlinkage long
+sys_perfmonctl (int fd, int cmd, void *arg, int count)
+{
+	return -ENOSYS;
+}
+#endif /* CONFIG_PERFMON */
diff --git a/arch/ia64/kernel/perfmon_default_smpl.c b/arch/ia64/kernel/perfmon_default_smpl.c
new file mode 100644
index 00000000..30c644ea
--- /dev/null
+++ b/arch/ia64/kernel/perfmon_default_smpl.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright (C) 2002-2003 Hewlett-Packard Co
+ *               Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * This file implements the default sampling buffer format
+ * for the Linux/ia64 perfmon-2 subsystem.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <asm/delay.h>
+#include <linux/smp.h>
+
+#include <asm/perfmon.h>
+#include <asm/perfmon_default_smpl.h>
+
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
+MODULE_DESCRIPTION("perfmon default sampling format");
+MODULE_LICENSE("GPL");
+
+#define DEFAULT_DEBUG 1
+
+#ifdef DEFAULT_DEBUG
+#define DPRINT(a) \
+	do { \
+		if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \
+	} while (0)
+
+#define DPRINT_ovfl(a) \
+	do { \
+		if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \
+	} while (0)
+
+#else
+#define DPRINT(a)
+#define DPRINT_ovfl(a)
+#endif
+
+static int
+default_validate(struct task_struct *task, unsigned int flags, int cpu, void *data)
+{
+	pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t*)data;
+	int ret = 0;
+
+	if (data == NULL) {
+		DPRINT(("[%d] no argument passed\n", task_pid_nr(task)));
+		return -EINVAL;
+	}
+
+	DPRINT(("[%d] validate flags=0x%x CPU%d\n", task_pid_nr(task), flags, cpu));
+
+	/*
+	 * must hold at least the buffer header + one minimally sized entry
+	 */
+	if (arg->buf_size < PFM_DEFAULT_SMPL_MIN_BUF_SIZE) return -EINVAL;
+
+	DPRINT(("buf_size=%lu\n", arg->buf_size));
+
+	return ret;
+}
+
+static int
+default_get_size(struct task_struct *task, unsigned int flags, int cpu, void *data, unsigned long *size)
+{
+	pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
+
+	/*
+	 * size has been validated in default_validate
+	 */
+	*size = arg->buf_size;
+
+	return 0;
+}
+
+static int
+default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *data)
+{
+	pfm_default_smpl_hdr_t *hdr;
+	pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
+
+	hdr = (pfm_default_smpl_hdr_t *)buf;
+
+	hdr->hdr_version      = PFM_DEFAULT_SMPL_VERSION;
+	hdr->hdr_buf_size     = arg->buf_size;
+	hdr->hdr_cur_offs     = sizeof(*hdr);
+	hdr->hdr_overflows    = 0UL;
+	hdr->hdr_count        = 0UL;
+
+	DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n",
+		task_pid_nr(task),
+		buf,
+		hdr->hdr_buf_size,
+		sizeof(*hdr),
+		hdr->hdr_version,
+		hdr->hdr_cur_offs));
+
+	return 0;
+}
+
+static int
+default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp)
+{
+	pfm_default_smpl_hdr_t *hdr;
+	pfm_default_smpl_entry_t *ent;
+	void *cur, *last;
+	unsigned long *e, entry_size;
+	unsigned int npmds, i;
+	unsigned char ovfl_pmd;
+	unsigned char ovfl_notify;
+
+	if (unlikely(buf == NULL || arg == NULL|| regs == NULL || task == NULL)) {
+		DPRINT(("[%d] invalid arguments buf=%p arg=%p\n", task->pid, buf, arg));
+		return -EINVAL;
+	}
+
+	hdr         = (pfm_default_smpl_hdr_t *)buf;
+	cur         = buf+hdr->hdr_cur_offs;
+	last        = buf+hdr->hdr_buf_size;
+	ovfl_pmd    = arg->ovfl_pmd;
+	ovfl_notify = arg->ovfl_notify;
+
+	/*
+	 * precheck for sanity
+	 */
+	if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
+
+	npmds = hweight64(arg->smpl_pmds[0]);
+
+	ent = (pfm_default_smpl_entry_t *)cur;
+
+	prefetch(arg->smpl_pmds_values);
+
+	entry_size = sizeof(*ent) + (npmds << 3);
+
+	/* position for first pmd */
+	e = (unsigned long *)(ent+1);
+
+	hdr->hdr_count++;
+
+	DPRINT_ovfl(("[%d] count=%lu cur=%p last=%p free_bytes=%lu ovfl_pmd=%d ovfl_notify=%d npmds=%u\n",
+			task->pid,
+			hdr->hdr_count,
+			cur, last,
+			last-cur,
+			ovfl_pmd,
+			ovfl_notify, npmds));
+
+	/*
+	 * current = task running at the time of the overflow.
+	 *
+	 * per-task mode:
+	 * 	- this is usually the task being monitored.
+	 * 	  Under certain conditions, it might be a different task
+	 *
+	 * system-wide:
+	 * 	- this is not necessarily the task controlling the session
+	 */
+	ent->pid            = current->pid;
+	ent->ovfl_pmd  	    = ovfl_pmd;
+	ent->last_reset_val = arg->pmd_last_reset; //pmd[0].reg_last_reset_val;
+
+	/*
+	 * where did the fault happen (includes slot number)
+	 */
+	ent->ip = regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3);
+
+	ent->tstamp    = stamp;
+	ent->cpu       = smp_processor_id();
+	ent->set       = arg->active_set;
+	ent->tgid      = current->tgid;
+
+	/*
+	 * selectively store PMDs in increasing index number
+	 */
+	if (npmds) {
+		unsigned long *val = arg->smpl_pmds_values;
+		for(i=0; i < npmds; i++) {
+			*e++ = *val++;
+		}
+	}
+
+	/*
+	 * update position for next entry
+	 */
+	hdr->hdr_cur_offs += entry_size;
+	cur               += entry_size;
+
+	/*
+	 * post check to avoid losing the last sample
+	 */
+	if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
+
+	/*
+	 * keep same ovfl_pmds, ovfl_notify
+	 */
+	arg->ovfl_ctrl.bits.notify_user     = 0;
+	arg->ovfl_ctrl.bits.block_task      = 0;
+	arg->ovfl_ctrl.bits.mask_monitoring = 0;
+	arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; /* reset before returning from interrupt handler */
+
+	return 0;
+full:
+	DPRINT_ovfl(("sampling buffer full free=%lu, count=%lu, ovfl_notify=%d\n", last-cur, hdr->hdr_count, ovfl_notify));
+
+	/*
+	 * increment number of buffer overflow.
+	 * important to detect duplicate set of samples.
+	 */
+	hdr->hdr_overflows++;
+
+	/*
+	 * if no notification requested, then we saturate the buffer
+	 */
+	if (ovfl_notify == 0) {
+		arg->ovfl_ctrl.bits.notify_user     = 0;
+		arg->ovfl_ctrl.bits.block_task      = 0;
+		arg->ovfl_ctrl.bits.mask_monitoring = 1;
+		arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0;
+	} else {
+		arg->ovfl_ctrl.bits.notify_user     = 1;
+		arg->ovfl_ctrl.bits.block_task      = 1; /* ignored for non-blocking context */
+		arg->ovfl_ctrl.bits.mask_monitoring = 1;
+		arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; /* no reset now */
+	}
+	return -1; /* we are full, sorry */
+}
+
+static int
+default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
+{
+	pfm_default_smpl_hdr_t *hdr;
+
+	hdr = (pfm_default_smpl_hdr_t *)buf;
+
+	hdr->hdr_count    = 0UL;
+	hdr->hdr_cur_offs = sizeof(*hdr);
+
+	ctrl->bits.mask_monitoring = 0;
+	ctrl->bits.reset_ovfl_pmds = 1; /* uses long-reset values */
+
+	return 0;
+}
+
+static int
+default_exit(struct task_struct *task, void *buf, struct pt_regs *regs)
+{
+	DPRINT(("[%d] exit(%p)\n", task_pid_nr(task), buf));
+	return 0;
+}
+
+static pfm_buffer_fmt_t default_fmt={
+ 	.fmt_name 	    = "default_format",
+ 	.fmt_uuid	    = PFM_DEFAULT_SMPL_UUID,
+ 	.fmt_arg_size	    = sizeof(pfm_default_smpl_arg_t),
+ 	.fmt_validate	    = default_validate,
+ 	.fmt_getsize	    = default_get_size,
+ 	.fmt_init	    = default_init,
+ 	.fmt_handler	    = default_handler,
+ 	.fmt_restart	    = default_restart,
+ 	.fmt_restart_active = default_restart,
+ 	.fmt_exit	    = default_exit,
+};
+
+static int __init
+pfm_default_smpl_init_module(void)
+{
+	int ret;
+
+	ret = pfm_register_buffer_fmt(&default_fmt);
+	if (ret == 0) {
+		printk("perfmon_default_smpl: %s v%u.%u registered\n",
+			default_fmt.fmt_name,
+			PFM_DEFAULT_SMPL_VERSION_MAJ,
+			PFM_DEFAULT_SMPL_VERSION_MIN);
+	} else {
+		printk("perfmon_default_smpl: %s cannot register ret=%d\n",
+			default_fmt.fmt_name,
+			ret);
+	}
+
+	return ret;
+}
+
+static void __exit
+pfm_default_smpl_cleanup_module(void)
+{
+	int ret;
+	ret = pfm_unregister_buffer_fmt(default_fmt.fmt_uuid);
+
+	printk("perfmon_default_smpl: unregister %s=%d\n", default_fmt.fmt_name, ret);
+}
+
+module_init(pfm_default_smpl_init_module);
+module_exit(pfm_default_smpl_cleanup_module);
+
diff --git a/arch/ia64/kernel/perfmon_generic.h b/arch/ia64/kernel/perfmon_generic.h
new file mode 100644
index 00000000..67489478
--- /dev/null
+++ b/arch/ia64/kernel/perfmon_generic.h
@@ -0,0 +1,45 @@
+/*
+ * This file contains the generic PMU register description tables
+ * and pmc checker used by perfmon.c.
+ *
+ * Copyright (C) 2002-2003  Hewlett Packard Co
+ *               Stephane Eranian <eranian@hpl.hp.com>
+ */
+
+static pfm_reg_desc_t pfm_gen_pmc_desc[PMU_MAX_PMCS]={
+/* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc4  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+static pfm_reg_desc_t pfm_gen_pmd_desc[PMU_MAX_PMDS]={
+/* pmd0  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd1  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd2  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd3  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd4  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
+/* pmd5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
+/* pmd6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
+/* pmd7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf_gen={
+	.pmu_name   = "Generic",
+	.pmu_family = 0xff, /* any */
+	.ovfl_val   = (1UL << 32) - 1,
+	.num_ibrs   = 0, /* does not use */
+	.num_dbrs   = 0, /* does not use */
+	.pmd_desc   = pfm_gen_pmd_desc,
+	.pmc_desc   = pfm_gen_pmc_desc
+};
+
diff --git a/arch/ia64/kernel/perfmon_itanium.h b/arch/ia64/kernel/perfmon_itanium.h
new file mode 100644
index 00000000..d1d508a0
--- /dev/null
+++ b/arch/ia64/kernel/perfmon_itanium.h
@@ -0,0 +1,115 @@
+/*
+ * This file contains the Itanium PMU register description tables
+ * and pmc checker used by perfmon.c.
+ *
+ * Copyright (C) 2002-2003  Hewlett Packard Co
+ *               Stephane Eranian <eranian@hpl.hp.com>
+ */
+static int pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
+
+static pfm_reg_desc_t pfm_ita_pmc_desc[PMU_MAX_PMCS]={
+/* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc4  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc5  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc6  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc7  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc8  */ { PFM_REG_CONFIG  , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc9  */ { PFM_REG_CONFIG  , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc10 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0000000010000000UL, -1UL, NULL, pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc13 */ { PFM_REG_CONFIG  , 0, 0x0003ffff00000001UL, -1UL, NULL, pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={
+/* pmd0  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd1  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd2  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd3  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd4  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
+/* pmd5  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
+/* pmd6  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
+/* pmd7  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
+/* pmd8  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd9  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd10 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd11 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd12 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd13 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd14 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd15 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd16 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd17 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+static int
+pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
+{
+	int ret;
+	int is_loaded;
+
+	/* sanitfy check */
+	if (ctx == NULL) return -EINVAL;
+
+	is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
+
+	/*
+	 * we must clear the (instruction) debug registers if pmc13.ta bit is cleared
+	 * before they are written (fl_using_dbreg==0) to avoid picking up stale information.
+	 */
+	if (cnum == 13 && is_loaded && ((*val & 0x1) == 0UL) && ctx->ctx_fl_using_dbreg == 0) {
+
+		DPRINT(("pmc[%d]=0x%lx has active pmc13.ta cleared, clearing ibr\n", cnum, *val));
+
+		/* don't mix debug with perfmon */
+		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
+
+		/*
+		 * a count of 0 will mark the debug registers as in use and also
+		 * ensure that they are properly cleared.
+		 */
+		ret = pfm_write_ibr_dbr(1, ctx, NULL, 0, regs);
+		if (ret) return ret;
+	}
+
+	/*
+	 * we must clear the (data) debug registers if pmc11.pt bit is cleared
+	 * before they are written (fl_using_dbreg==0) to avoid picking up stale information.
+	 */
+	if (cnum == 11 && is_loaded && ((*val >> 28)& 0x1) == 0 && ctx->ctx_fl_using_dbreg == 0) {
+
+		DPRINT(("pmc[%d]=0x%lx has active pmc11.pt cleared, clearing dbr\n", cnum, *val));
+
+		/* don't mix debug with perfmon */
+		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
+
+		/*
+		 * a count of 0 will mark the debug registers as in use and also
+		 * ensure that they are properly cleared.
+		 */
+		ret = pfm_write_ibr_dbr(0, ctx, NULL, 0, regs);
+		if (ret) return ret;
+	}
+	return 0;
+}
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf_ita={
+	.pmu_name      = "Itanium",
+	.pmu_family    = 0x7,
+	.ovfl_val      = (1UL << 32) - 1,
+	.pmd_desc      = pfm_ita_pmd_desc,
+	.pmc_desc      = pfm_ita_pmc_desc,
+	.num_ibrs      = 8,
+	.num_dbrs      = 8,
+	.use_rr_dbregs = 1, /* debug register are use for range retrictions */
+};
+
+
diff --git a/arch/ia64/kernel/perfmon_mckinley.h b/arch/ia64/kernel/perfmon_mckinley.h
new file mode 100644
index 00000000..c4bec7a9
--- /dev/null
+++ b/arch/ia64/kernel/perfmon_mckinley.h
@@ -0,0 +1,187 @@
+/*
+ * This file contains the McKinley PMU register description tables
+ * and pmc checker used by perfmon.c.
+ *
+ * Copyright (C) 2002-2003  Hewlett Packard Co
+ *               Stephane Eranian <eranian@hpl.hp.com>
+ */
+static int pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
+
+static pfm_reg_desc_t pfm_mck_pmc_desc[PMU_MAX_PMCS]={
+/* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc4  */ { PFM_REG_COUNTING, 6, 0x0000000000800000UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc5  */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL,  pfm_mck_pmc_check, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc6  */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL,  pfm_mck_pmc_check, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc7  */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL,  pfm_mck_pmc_check, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc8  */ { PFM_REG_CONFIG  , 0, 0xffffffff3fffffffUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc9  */ { PFM_REG_CONFIG  , 0, 0xffffffff3ffffffcUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc10 */ { PFM_REG_MONITOR , 4, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0UL, 0x30f01cf, NULL,  pfm_mck_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, 0xffffUL, NULL,  pfm_mck_pmc_check, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc13 */ { PFM_REG_CONFIG  , 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc14 */ { PFM_REG_CONFIG  , 0, 0x0db60db60db60db6UL, 0x2492UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc15 */ { PFM_REG_CONFIG  , 0, 0x00000000fffffff0UL, 0xfUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+static pfm_reg_desc_t pfm_mck_pmd_desc[PMU_MAX_PMDS]={
+/* pmd0  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd1  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd2  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd3  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd4  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
+/* pmd5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
+/* pmd6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
+/* pmd7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
+/* pmd8  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd9  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd10 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd11 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd12 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd13 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd14 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd15 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd16 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd17 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+/*
+ * PMC reserved fields must have their power-up values preserved
+ */
+static int
+pfm_mck_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs)
+{
+	unsigned long tmp1, tmp2, ival = *val;
+
+	/* remove reserved areas from user value */
+	tmp1 = ival & PMC_RSVD_MASK(cnum);
+
+	/* get reserved fields values */
+	tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum);
+
+	*val = tmp1 | tmp2;
+
+	DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n",
+		  cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val));
+	return 0;
+}
+
+/*
+ * task can be NULL if the context is unloaded
+ */
+static int
+pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
+{
+	int ret = 0, check_case1 = 0;
+	unsigned long val8 = 0, val14 = 0, val13 = 0;
+	int is_loaded;
+
+	/* first preserve the reserved fields */
+	pfm_mck_reserved(cnum, val, regs);
+
+	/* sanitfy check */
+	if (ctx == NULL) return -EINVAL;
+
+	is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
+
+	/*
+	 * we must clear the debug registers if pmc13 has a value which enable
+	 * memory pipeline event constraints. In this case we need to clear the
+	 * the debug registers if they have not yet been accessed. This is required
+	 * to avoid picking stale state.
+	 * PMC13 is "active" if:
+	 * 	one of the pmc13.cfg_dbrpXX field is different from 0x3
+	 * AND
+	 * 	at the corresponding pmc13.ena_dbrpXX is set.
+	 */
+	DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, *val, ctx->ctx_fl_using_dbreg, is_loaded));
+
+	if (cnum == 13 && is_loaded
+	    && (*val & 0x1e00000000000UL) && (*val & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) {
+
+		DPRINT(("pmc[%d]=0x%lx has active pmc13 settings, clearing dbr\n", cnum, *val));
+
+		/* don't mix debug with perfmon */
+		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
+
+		/*
+		 * a count of 0 will mark the debug registers as in use and also
+		 * ensure that they are properly cleared.
+		 */
+		ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs);
+		if (ret) return ret;
+	}
+	/*
+	 * we must clear the (instruction) debug registers if any pmc14.ibrpX bit is enabled
+	 * before they are (fl_using_dbreg==0) to avoid picking up stale information.
+	 */
+	if (cnum == 14 && is_loaded && ((*val & 0x2222UL) != 0x2222UL) && ctx->ctx_fl_using_dbreg == 0) {
+
+		DPRINT(("pmc[%d]=0x%lx has active pmc14 settings, clearing ibr\n", cnum, *val));
+
+		/* don't mix debug with perfmon */
+		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
+
+		/*
+		 * a count of 0 will mark the debug registers as in use and also
+		 * ensure that they are properly cleared.
+		 */
+		ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs);
+		if (ret) return ret;
+
+	}
+
+	switch(cnum) {
+		case  4: *val |= 1UL << 23; /* force power enable bit */
+			 break;
+		case  8: val8 = *val;
+			 val13 = ctx->ctx_pmcs[13];
+			 val14 = ctx->ctx_pmcs[14];
+			 check_case1 = 1;
+			 break;
+		case 13: val8  = ctx->ctx_pmcs[8];
+			 val13 = *val;
+			 val14 = ctx->ctx_pmcs[14];
+			 check_case1 = 1;
+			 break;
+		case 14: val8  = ctx->ctx_pmcs[8];
+			 val13 = ctx->ctx_pmcs[13];
+			 val14 = *val;
+			 check_case1 = 1;
+			 break;
+	}
+	/* check illegal configuration which can produce inconsistencies in tagging
+	 * i-side events in L1D and L2 caches
+	 */
+	if (check_case1) {
+		ret =   ((val13 >> 45) & 0xf) == 0
+		   && ((val8 & 0x1) == 0)
+		   && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0)
+		       ||(((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0));
+
+		if (ret) DPRINT((KERN_DEBUG "perfmon: failure check_case1\n"));
+	}
+
+	return ret ? -EINVAL : 0;
+}
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf_mck={
+	.pmu_name      = "Itanium 2",
+	.pmu_family    = 0x1f,
+	.flags	       = PFM_PMU_IRQ_RESEND,
+	.ovfl_val      = (1UL << 47) - 1,
+	.pmd_desc      = pfm_mck_pmd_desc,
+	.pmc_desc      = pfm_mck_pmc_desc,
+	.num_ibrs       = 8,
+	.num_dbrs       = 8,
+	.use_rr_dbregs = 1 /* debug register are use for range restrictions */
+};
+
+
diff --git a/arch/ia64/kernel/perfmon_montecito.h b/arch/ia64/kernel/perfmon_montecito.h
new file mode 100644
index 00000000..7f8da4c7
--- /dev/null
+++ b/arch/ia64/kernel/perfmon_montecito.h
@@ -0,0 +1,269 @@
+/*
+ * This file contains the Montecito PMU register description tables
+ * and pmc checker used by perfmon.c.
+ *
+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
+ *               Contributed by Stephane Eranian <eranian@hpl.hp.com>
+ */
+static int pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
+
+#define RDEP_MONT_ETB	(RDEP(38)|RDEP(39)|RDEP(48)|RDEP(49)|RDEP(50)|RDEP(51)|RDEP(52)|RDEP(53)|RDEP(54)|\
+			 RDEP(55)|RDEP(56)|RDEP(57)|RDEP(58)|RDEP(59)|RDEP(60)|RDEP(61)|RDEP(62)|RDEP(63))
+#define RDEP_MONT_DEAR  (RDEP(32)|RDEP(33)|RDEP(36))
+#define RDEP_MONT_IEAR  (RDEP(34)|RDEP(35))
+
+static pfm_reg_desc_t pfm_mont_pmc_desc[PMU_MAX_PMCS]={
+/* pmc0  */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc4  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(4),0, 0, 0}, {0,0, 0, 0}},
+/* pmc5  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(5),0, 0, 0}, {0,0, 0, 0}},
+/* pmc6  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(6),0, 0, 0}, {0,0, 0, 0}},
+/* pmc7  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(7),0, 0, 0}, {0,0, 0, 0}},
+/* pmc8  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(8),0, 0, 0}, {0,0, 0, 0}},
+/* pmc9  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(9),0, 0, 0}, {0,0, 0, 0}},
+/* pmc10 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(10),0, 0, 0}, {0,0, 0, 0}},
+/* pmc11 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(11),0, 0, 0}, {0,0, 0, 0}},
+/* pmc12 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(12),0, 0, 0}, {0,0, 0, 0}},
+/* pmc13 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(13),0, 0, 0}, {0,0, 0, 0}},
+/* pmc14 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(14),0, 0, 0}, {0,0, 0, 0}},
+/* pmc15 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(15),0, 0, 0}, {0,0, 0, 0}},
+/* pmc16 */ { PFM_REG_NOTIMPL, },
+/* pmc17 */ { PFM_REG_NOTIMPL, },
+/* pmc18 */ { PFM_REG_NOTIMPL, },
+/* pmc19 */ { PFM_REG_NOTIMPL, },
+/* pmc20 */ { PFM_REG_NOTIMPL, },
+/* pmc21 */ { PFM_REG_NOTIMPL, },
+/* pmc22 */ { PFM_REG_NOTIMPL, },
+/* pmc23 */ { PFM_REG_NOTIMPL, },
+/* pmc24 */ { PFM_REG_NOTIMPL, },
+/* pmc25 */ { PFM_REG_NOTIMPL, },
+/* pmc26 */ { PFM_REG_NOTIMPL, },
+/* pmc27 */ { PFM_REG_NOTIMPL, },
+/* pmc28 */ { PFM_REG_NOTIMPL, },
+/* pmc29 */ { PFM_REG_NOTIMPL, },
+/* pmc30 */ { PFM_REG_NOTIMPL, },
+/* pmc31 */ { PFM_REG_NOTIMPL, },
+/* pmc32 */ { PFM_REG_CONFIG,  0, 0x30f01ffffffffffUL, 0x30f01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc33 */ { PFM_REG_CONFIG,  0, 0x0,  0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc34 */ { PFM_REG_CONFIG,  0, 0xf01ffffffffffUL, 0xf01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc35 */ { PFM_REG_CONFIG,  0, 0x0,  0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc36 */ { PFM_REG_CONFIG,  0, 0xfffffff0, 0xf, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc37 */ { PFM_REG_MONITOR, 4, 0x0, 0x3fff, NULL, pfm_mont_pmc_check, {RDEP_MONT_IEAR, 0, 0, 0}, {0, 0, 0, 0}},
+/* pmc38 */ { PFM_REG_CONFIG,  0, 0xdb6, 0x2492, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc39 */ { PFM_REG_MONITOR, 6, 0x0, 0xffcf, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}},
+/* pmc40 */ { PFM_REG_MONITOR, 6, 0x2000000, 0xf01cf, NULL, pfm_mont_pmc_check, {RDEP_MONT_DEAR,0, 0, 0}, {0,0, 0, 0}},
+/* pmc41 */ { PFM_REG_CONFIG,  0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc42 */ { PFM_REG_MONITOR, 6, 0x0, 0x7ff4f, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}},
+	    { PFM_REG_END    , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+static pfm_reg_desc_t pfm_mont_pmd_desc[PMU_MAX_PMDS]={
+/* pmd0  */ { PFM_REG_NOTIMPL, }, 
+/* pmd1  */ { PFM_REG_NOTIMPL, },
+/* pmd2  */ { PFM_REG_NOTIMPL, },
+/* pmd3  */ { PFM_REG_NOTIMPL, },
+/* pmd4  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(4),0, 0, 0}},
+/* pmd5  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(5),0, 0, 0}},
+/* pmd6  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(6),0, 0, 0}},
+/* pmd7  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(7),0, 0, 0}},
+/* pmd8  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(8),0, 0, 0}}, 
+/* pmd9  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(9),0, 0, 0}},
+/* pmd10 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(10),0, 0, 0}},
+/* pmd11 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(11),0, 0, 0}},
+/* pmd12 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(12),0, 0, 0}},
+/* pmd13 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(13),0, 0, 0}},
+/* pmd14 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(14),0, 0, 0}},
+/* pmd15 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(15),0, 0, 0}},
+/* pmd16 */ { PFM_REG_NOTIMPL, },
+/* pmd17 */ { PFM_REG_NOTIMPL, },
+/* pmd18 */ { PFM_REG_NOTIMPL, },
+/* pmd19 */ { PFM_REG_NOTIMPL, },
+/* pmd20 */ { PFM_REG_NOTIMPL, },
+/* pmd21 */ { PFM_REG_NOTIMPL, },
+/* pmd22 */ { PFM_REG_NOTIMPL, },
+/* pmd23 */ { PFM_REG_NOTIMPL, },
+/* pmd24 */ { PFM_REG_NOTIMPL, },
+/* pmd25 */ { PFM_REG_NOTIMPL, },
+/* pmd26 */ { PFM_REG_NOTIMPL, },
+/* pmd27 */ { PFM_REG_NOTIMPL, },
+/* pmd28 */ { PFM_REG_NOTIMPL, },
+/* pmd29 */ { PFM_REG_NOTIMPL, },
+/* pmd30 */ { PFM_REG_NOTIMPL, },
+/* pmd31 */ { PFM_REG_NOTIMPL, },
+/* pmd32 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(33)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}},
+/* pmd33 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}},
+/* pmd34 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(35),0, 0, 0}, {RDEP(37),0, 0, 0}},
+/* pmd35 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(34),0, 0, 0}, {RDEP(37),0, 0, 0}},
+/* pmd36 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(33),0, 0, 0}, {RDEP(40),0, 0, 0}},
+/* pmd37 */ { PFM_REG_NOTIMPL, },
+/* pmd38 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd39 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd40 */ { PFM_REG_NOTIMPL, },
+/* pmd41 */ { PFM_REG_NOTIMPL, },
+/* pmd42 */ { PFM_REG_NOTIMPL, },
+/* pmd43 */ { PFM_REG_NOTIMPL, },
+/* pmd44 */ { PFM_REG_NOTIMPL, },
+/* pmd45 */ { PFM_REG_NOTIMPL, },
+/* pmd46 */ { PFM_REG_NOTIMPL, },
+/* pmd47 */ { PFM_REG_NOTIMPL, },
+/* pmd48 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd49 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd50 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd51 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd52 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd53 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd54 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd55 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd56 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd57 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd58 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd59 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd60 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd61 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd62 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd63 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+	    { PFM_REG_END   , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+/*
+ * PMC reserved fields must have their power-up values preserved
+ */
+static int
+pfm_mont_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs)
+{
+	unsigned long tmp1, tmp2, ival = *val;
+
+	/* remove reserved areas from user value */
+	tmp1 = ival & PMC_RSVD_MASK(cnum);
+
+	/* get reserved fields values */
+	tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum);
+
+	*val = tmp1 | tmp2;
+
+	DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n",
+		  cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val));
+	return 0;
+}
+
+/*
+ * task can be NULL if the context is unloaded
+ */
+static int
+pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
+{
+	int ret = 0;
+	unsigned long val32 = 0, val38 = 0, val41 = 0;
+	unsigned long tmpval;
+	int check_case1 = 0;
+	int is_loaded;
+
+	/* first preserve the reserved fields */
+	pfm_mont_reserved(cnum, val, regs);
+
+	tmpval = *val;
+
+	/* sanity check */
+	if (ctx == NULL) return -EINVAL;
+
+	is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
+
+	/*
+	 * we must clear the debug registers if pmc41 has a value which enable
+	 * memory pipeline event constraints. In this case we need to clear the
+	 * the debug registers if they have not yet been accessed. This is required
+	 * to avoid picking stale state.
+	 * PMC41 is "active" if:
+	 * 	one of the pmc41.cfg_dtagXX field is different from 0x3
+	 * AND
+	 * 	at the corresponding pmc41.en_dbrpXX is set.
+	 * AND
+	 *	ctx_fl_using_dbreg == 0  (i.e., dbr not yet used)
+	 */
+	DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, tmpval, ctx->ctx_fl_using_dbreg, is_loaded));
+
+	if (cnum == 41 && is_loaded 
+	    && (tmpval & 0x1e00000000000UL) && (tmpval & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) {
+
+		DPRINT(("pmc[%d]=0x%lx has active pmc41 settings, clearing dbr\n", cnum, tmpval));
+
+		/* don't mix debug with perfmon */
+		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
+
+		/*
+		 * a count of 0 will mark the debug registers if:
+		 * AND
+		 */
+		ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs);
+		if (ret) return ret;
+	}
+	/*
+	 * we must clear the (instruction) debug registers if:
+	 * 	pmc38.ig_ibrpX is 0 (enabled)
+	 * AND
+	 *	ctx_fl_using_dbreg == 0  (i.e., dbr not yet used)
+	 */
+	if (cnum == 38 && is_loaded && ((tmpval & 0x492UL) != 0x492UL) && ctx->ctx_fl_using_dbreg == 0) {
+
+		DPRINT(("pmc38=0x%lx has active pmc38 settings, clearing ibr\n", tmpval));
+
+		/* don't mix debug with perfmon */
+		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
+
+		/*
+		 * a count of 0 will mark the debug registers as in use and also
+		 * ensure that they are properly cleared.
+		 */
+		ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs);
+		if (ret) return ret;
+
+	}
+	switch(cnum) {
+		case  32: val32 = *val;
+			  val38 = ctx->ctx_pmcs[38];
+			  val41 = ctx->ctx_pmcs[41];
+			  check_case1 = 1;
+			  break;
+		case  38: val38 = *val;
+			  val32 = ctx->ctx_pmcs[32];
+			  val41 = ctx->ctx_pmcs[41];
+			  check_case1 = 1;
+			  break;
+		case  41: val41 = *val;
+			  val32 = ctx->ctx_pmcs[32];
+			  val38 = ctx->ctx_pmcs[38];
+			  check_case1 = 1;
+			  break;
+	}
+	/* check illegal configuration which can produce inconsistencies in tagging
+	 * i-side events in L1D and L2 caches
+	 */
+	if (check_case1) {
+		ret =   (((val41 >> 45) & 0xf) == 0 && ((val32>>57) & 0x1) == 0)
+		     && ((((val38>>1) & 0x3) == 0x2 || ((val38>>1) & 0x3) == 0)
+		     ||  (((val38>>4) & 0x3) == 0x2 || ((val38>>4) & 0x3) == 0));
+		if (ret) {
+			DPRINT(("invalid config pmc38=0x%lx pmc41=0x%lx pmc32=0x%lx\n", val38, val41, val32));
+			return -EINVAL;
+		}
+	}
+	*val = tmpval;
+	return 0;
+}
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf_mont={
+	.pmu_name        = "Montecito",
+	.pmu_family      = 0x20,
+	.flags           = PFM_PMU_IRQ_RESEND,
+	.ovfl_val        = (1UL << 47) - 1,
+	.pmd_desc        = pfm_mont_pmd_desc,
+	.pmc_desc        = pfm_mont_pmc_desc,
+	.num_ibrs        = 8,
+	.num_dbrs        = 8,
+	.use_rr_dbregs   = 1 /* debug register are use for range retrictions */
+};
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
new file mode 100644
index 00000000..6d33c5cc
--- /dev/null
+++ b/arch/ia64/kernel/process.c
@@ -0,0 +1,803 @@
+/*
+ * Architecture-specific setup.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * 04/11/17 Ashok Raj	<ashok.raj@intel.com> Added CPU Hotplug Support
+ *
+ * 2005-10-07 Keith Owens <kaos@sgi.com>
+ *	      Add notify_die() hooks.
+ */
+#include <linux/cpu.h>
+#include <linux/pm.h>
+#include <linux/elf.h>
+#include <linux/errno.h>
+#include <linux/kallsyms.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/personality.h>
+#include <linux/sched.h>
+#include <linux/stddef.h>
+#include <linux/thread_info.h>
+#include <linux/unistd.h>
+#include <linux/efi.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/kdebug.h>
+#include <linux/utsname.h>
+#include <linux/tracehook.h>
+
+#include <asm/cpu.h>
+#include <asm/delay.h>
+#include <asm/elf.h>
+#include <asm/irq.h>
+#include <asm/kexec.h>
+#include <asm/pgalloc.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/tlbflush.h>
+#include <asm/uaccess.h>
+#include <asm/unwind.h>
+#include <asm/user.h>
+
+#include "entry.h"
+
+#ifdef CONFIG_PERFMON
+# include <asm/perfmon.h>
+#endif
+
+#include "sigframe.h"
+
+void (*ia64_mark_idle)(int);
+
+unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
+EXPORT_SYMBOL(boot_option_idle_override);
+void (*pm_idle) (void);
+EXPORT_SYMBOL(pm_idle);
+void (*pm_power_off) (void);
+EXPORT_SYMBOL(pm_power_off);
+
+void
+ia64_do_show_stack (struct unw_frame_info *info, void *arg)
+{
+	unsigned long ip, sp, bsp;
+	char buf[128];			/* don't make it so big that it overflows the stack! */
+
+	printk("\nCall Trace:\n");
+	do {
+		unw_get_ip(info, &ip);
+		if (ip == 0)
+			break;
+
+		unw_get_sp(info, &sp);
+		unw_get_bsp(info, &bsp);
+		snprintf(buf, sizeof(buf),
+			 " [<%016lx>] %%s\n"
+			 "                                sp=%016lx bsp=%016lx\n",
+			 ip, sp, bsp);
+		print_symbol(buf, ip);
+	} while (unw_unwind(info) >= 0);
+}
+
+void
+show_stack (struct task_struct *task, unsigned long *sp)
+{
+	if (!task)
+		unw_init_running(ia64_do_show_stack, NULL);
+	else {
+		struct unw_frame_info info;
+
+		unw_init_from_blocked_task(&info, task);
+		ia64_do_show_stack(&info, NULL);
+	}
+}
+
+void
+dump_stack (void)
+{
+	show_stack(NULL, NULL);
+}
+
+EXPORT_SYMBOL(dump_stack);
+
+void
+show_regs (struct pt_regs *regs)
+{
+	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
+
+	print_modules();
+	printk("\nPid: %d, CPU %d, comm: %20s\n", task_pid_nr(current),
+			smp_processor_id(), current->comm);
+	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s (%s)\n",
+	       regs->cr_ipsr, regs->cr_ifs, ip, print_tainted(),
+	       init_utsname()->release);
+	print_symbol("ip is at %s\n", ip);
+	printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
+	       regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
+	printk("rnat: %016lx bsps: %016lx pr  : %016lx\n",
+	       regs->ar_rnat, regs->ar_bspstore, regs->pr);
+	printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
+	       regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
+	printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
+	printk("b0  : %016lx b6  : %016lx b7  : %016lx\n", regs->b0, regs->b6, regs->b7);
+	printk("f6  : %05lx%016lx f7  : %05lx%016lx\n",
+	       regs->f6.u.bits[1], regs->f6.u.bits[0],
+	       regs->f7.u.bits[1], regs->f7.u.bits[0]);
+	printk("f8  : %05lx%016lx f9  : %05lx%016lx\n",
+	       regs->f8.u.bits[1], regs->f8.u.bits[0],
+	       regs->f9.u.bits[1], regs->f9.u.bits[0]);
+	printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
+	       regs->f10.u.bits[1], regs->f10.u.bits[0],
+	       regs->f11.u.bits[1], regs->f11.u.bits[0]);
+
+	printk("r1  : %016lx r2  : %016lx r3  : %016lx\n", regs->r1, regs->r2, regs->r3);
+	printk("r8  : %016lx r9  : %016lx r10 : %016lx\n", regs->r8, regs->r9, regs->r10);
+	printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, regs->r12, regs->r13);
+	printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, regs->r15, regs->r16);
+	printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, regs->r18, regs->r19);
+	printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, regs->r21, regs->r22);
+	printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, regs->r24, regs->r25);
+	printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, regs->r27, regs->r28);
+	printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, regs->r30, regs->r31);
+
+	if (user_mode(regs)) {
+		/* print the stacked registers */
+		unsigned long val, *bsp, ndirty;
+		int i, sof, is_nat = 0;
+
+		sof = regs->cr_ifs & 0x7f;	/* size of frame */
+		ndirty = (regs->loadrs >> 19);
+		bsp = ia64_rse_skip_regs((unsigned long *) regs->ar_bspstore, ndirty);
+		for (i = 0; i < sof; ++i) {
+			get_user(val, (unsigned long __user *) ia64_rse_skip_regs(bsp, i));
+			printk("r%-3u:%c%016lx%s", 32 + i, is_nat ? '*' : ' ', val,
+			       ((i == sof - 1) || (i % 3) == 2) ? "\n" : " ");
+		}
+	} else
+		show_stack(NULL, NULL);
+}
+
+/* local support for deprecated console_print */
+void
+console_print(const char *s)
+{
+	printk(KERN_EMERG "%s", s);
+}
+
+void
+do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
+{
+	if (fsys_mode(current, &scr->pt)) {
+		/*
+		 * defer signal-handling etc. until we return to
+		 * privilege-level 0.
+		 */
+		if (!ia64_psr(&scr->pt)->lp)
+			ia64_psr(&scr->pt)->lp = 1;
+		return;
+	}
+
+#ifdef CONFIG_PERFMON
+	if (current->thread.pfm_needs_checking)
+		/*
+		 * Note: pfm_handle_work() allow us to call it with interrupts
+		 * disabled, and may enable interrupts within the function.
+		 */
+		pfm_handle_work();
+#endif
+
+	/* deal with pending signal delivery */
+	if (test_thread_flag(TIF_SIGPENDING)) {
+		local_irq_enable();	/* force interrupt enable */
+		ia64_do_signal(scr, in_syscall);
+	}
+
+	if (test_thread_flag(TIF_NOTIFY_RESUME)) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(&scr->pt);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
+	}
+
+	/* copy user rbs to kernel rbs */
+	if (unlikely(test_thread_flag(TIF_RESTORE_RSE))) {
+		local_irq_enable();	/* force interrupt enable */
+		ia64_sync_krbs();
+	}
+
+	local_irq_disable();	/* force interrupt disable */
+}
+
+static int pal_halt        = 1;
+static int can_do_pal_halt = 1;
+
+static int __init nohalt_setup(char * str)
+{
+	pal_halt = can_do_pal_halt = 0;
+	return 1;
+}
+__setup("nohalt", nohalt_setup);
+
+void
+update_pal_halt_status(int status)
+{
+	can_do_pal_halt = pal_halt && status;
+}
+
+/*
+ * We use this if we don't have any better idle routine..
+ */
+void
+default_idle (void)
+{
+	local_irq_enable();
+	while (!need_resched()) {
+		if (can_do_pal_halt) {
+			local_irq_disable();
+			if (!need_resched()) {
+				safe_halt();
+			}
+			local_irq_enable();
+		} else
+			cpu_relax();
+	}
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/* We don't actually take CPU down, just spin without interrupts. */
+static inline void play_dead(void)
+{
+	unsigned int this_cpu = smp_processor_id();
+
+	/* Ack it */
+	__get_cpu_var(cpu_state) = CPU_DEAD;
+
+	max_xtp();
+	local_irq_disable();
+	idle_task_exit();
+	ia64_jump_to_sal(&sal_boot_rendez_state[this_cpu]);
+	/*
+	 * The above is a point of no-return, the processor is
+	 * expected to be in SAL loop now.
+	 */
+	BUG();
+}
+#else
+static inline void play_dead(void)
+{
+	BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static void do_nothing(void *unused)
+{
+}
+
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
+ * pm_idle and update to new pm_idle value. Required while changing pm_idle
+ * handler on SMP systems.
+ *
+ * Caller must have changed pm_idle to the new value before the call. Old
+ * pm_idle value will not be used by any CPU after the return of this function.
+ */
+void cpu_idle_wait(void)
+{
+	smp_mb();
+	/* kick all the CPUs so that they exit out of pm_idle */
+	smp_call_function(do_nothing, NULL, 1);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
+void __attribute__((noreturn))
+cpu_idle (void)
+{
+	void (*mark_idle)(int) = ia64_mark_idle;
+  	int cpu = smp_processor_id();
+
+	/* endless idle loop with no priority at all */
+	while (1) {
+		if (can_do_pal_halt) {
+			current_thread_info()->status &= ~TS_POLLING;
+			/*
+			 * TS_POLLING-cleared state must be visible before we
+			 * test NEED_RESCHED:
+			 */
+			smp_mb();
+		} else {
+			current_thread_info()->status |= TS_POLLING;
+		}
+
+		if (!need_resched()) {
+			void (*idle)(void);
+#ifdef CONFIG_SMP
+			min_xtp();
+#endif
+			rmb();
+			if (mark_idle)
+				(*mark_idle)(1);
+
+			idle = pm_idle;
+			if (!idle)
+				idle = default_idle;
+			(*idle)();
+			if (mark_idle)
+				(*mark_idle)(0);
+#ifdef CONFIG_SMP
+			normal_xtp();
+#endif
+		}
+		preempt_enable_no_resched();
+		schedule();
+		preempt_disable();
+		check_pgt_cache();
+		if (cpu_is_offline(cpu))
+			play_dead();
+	}
+}
+
+void
+ia64_save_extra (struct task_struct *task)
+{
+#ifdef CONFIG_PERFMON
+	unsigned long info;
+#endif
+
+	if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
+		ia64_save_debug_regs(&task->thread.dbr[0]);
+
+#ifdef CONFIG_PERFMON
+	if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
+		pfm_save_regs(task);
+
+	info = __get_cpu_var(pfm_syst_info);
+	if (info & PFM_CPUINFO_SYST_WIDE)
+		pfm_syst_wide_update_task(task, info, 0);
+#endif
+}
+
+void
+ia64_load_extra (struct task_struct *task)
+{
+#ifdef CONFIG_PERFMON
+	unsigned long info;
+#endif
+
+	if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
+		ia64_load_debug_regs(&task->thread.dbr[0]);
+
+#ifdef CONFIG_PERFMON
+	if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
+		pfm_load_regs(task);
+
+	info = __get_cpu_var(pfm_syst_info);
+	if (info & PFM_CPUINFO_SYST_WIDE) 
+		pfm_syst_wide_update_task(task, info, 1);
+#endif
+}
+
+/*
+ * Copy the state of an ia-64 thread.
+ *
+ * We get here through the following  call chain:
+ *
+ *	from user-level:	from kernel:
+ *
+ *	<clone syscall>	        <some kernel call frames>
+ *	sys_clone		   :
+ *	do_fork			do_fork
+ *	copy_thread		copy_thread
+ *
+ * This means that the stack layout is as follows:
+ *
+ *	+---------------------+ (highest addr)
+ *	|   struct pt_regs    |
+ *	+---------------------+
+ *	| struct switch_stack |
+ *	+---------------------+
+ *	|                     |
+ *	|    memory stack     |
+ *	|                     | <-- sp (lowest addr)
+ *	+---------------------+
+ *
+ * Observe that we copy the unat values that are in pt_regs and switch_stack.  Spilling an
+ * integer to address X causes bit N in ar.unat to be set to the NaT bit of the register,
+ * with N=(X & 0x1ff)/8.  Thus, copying the unat value preserves the NaT bits ONLY if the
+ * pt_regs structure in the parent is congruent to that of the child, modulo 512.  Since
+ * the stack is page aligned and the page size is at least 4KB, this is always the case,
+ * so there is nothing to worry about.
+ */
+int
+copy_thread(unsigned long clone_flags,
+	     unsigned long user_stack_base, unsigned long user_stack_size,
+	     struct task_struct *p, struct pt_regs *regs)
+{
+	extern char ia64_ret_from_clone;
+	struct switch_stack *child_stack, *stack;
+	unsigned long rbs, child_rbs, rbs_size;
+	struct pt_regs *child_ptregs;
+	int retval = 0;
+
+#ifdef CONFIG_SMP
+	/*
+	 * For SMP idle threads, fork_by_hand() calls do_fork with
+	 * NULL regs.
+	 */
+	if (!regs)
+		return 0;
+#endif
+
+	stack = ((struct switch_stack *) regs) - 1;
+
+	child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET) - 1;
+	child_stack = (struct switch_stack *) child_ptregs - 1;
+
+	/* copy parent's switch_stack & pt_regs to child: */
+	memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack));
+
+	rbs = (unsigned long) current + IA64_RBS_OFFSET;
+	child_rbs = (unsigned long) p + IA64_RBS_OFFSET;
+	rbs_size = stack->ar_bspstore - rbs;
+
+	/* copy the parent's register backing store to the child: */
+	memcpy((void *) child_rbs, (void *) rbs, rbs_size);
+
+	if (likely(user_mode(child_ptregs))) {
+		if (clone_flags & CLONE_SETTLS)
+			child_ptregs->r13 = regs->r16;	/* see sys_clone2() in entry.S */
+		if (user_stack_base) {
+			child_ptregs->r12 = user_stack_base + user_stack_size - 16;
+			child_ptregs->ar_bspstore = user_stack_base;
+			child_ptregs->ar_rnat = 0;
+			child_ptregs->loadrs = 0;
+		}
+	} else {
+		/*
+		 * Note: we simply preserve the relative position of
+		 * the stack pointer here.  There is no need to
+		 * allocate a scratch area here, since that will have
+		 * been taken care of by the caller of sys_clone()
+		 * already.
+		 */
+		child_ptregs->r12 = (unsigned long) child_ptregs - 16; /* kernel sp */
+		child_ptregs->r13 = (unsigned long) p;		/* set `current' pointer */
+	}
+	child_stack->ar_bspstore = child_rbs + rbs_size;
+	child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
+
+	/* copy parts of thread_struct: */
+	p->thread.ksp = (unsigned long) child_stack - 16;
+
+	/* stop some PSR bits from being inherited.
+	 * the psr.up/psr.pp bits must be cleared on fork but inherited on execve()
+	 * therefore we must specify them explicitly here and not include them in
+	 * IA64_PSR_BITS_TO_CLEAR.
+	 */
+	child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
+				 & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP));
+
+	/*
+	 * NOTE: The calling convention considers all floating point
+	 * registers in the high partition (fph) to be scratch.  Since
+	 * the only way to get to this point is through a system call,
+	 * we know that the values in fph are all dead.  Hence, there
+	 * is no need to inherit the fph state from the parent to the
+	 * child and all we have to do is to make sure that
+	 * IA64_THREAD_FPH_VALID is cleared in the child.
+	 *
+	 * XXX We could push this optimization a bit further by
+	 * clearing IA64_THREAD_FPH_VALID on ANY system call.
+	 * However, it's not clear this is worth doing.  Also, it
+	 * would be a slight deviation from the normal Linux system
+	 * call behavior where scratch registers are preserved across
+	 * system calls (unless used by the system call itself).
+	 */
+#	define THREAD_FLAGS_TO_CLEAR	(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID \
+					 | IA64_THREAD_PM_VALID)
+#	define THREAD_FLAGS_TO_SET	0
+	p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR)
+			   | THREAD_FLAGS_TO_SET);
+	ia64_drop_fpu(p);	/* don't pick up stale state from a CPU's fph */
+
+#ifdef CONFIG_PERFMON
+	if (current->thread.pfm_context)
+		pfm_inherit(p, child_ptregs);
+#endif
+	return retval;
+}
+
+static void
+do_copy_task_regs (struct task_struct *task, struct unw_frame_info *info, void *arg)
+{
+	unsigned long mask, sp, nat_bits = 0, ar_rnat, urbs_end, cfm;
+	unsigned long uninitialized_var(ip);	/* GCC be quiet */
+	elf_greg_t *dst = arg;
+	struct pt_regs *pt;
+	char nat;
+	int i;
+
+	memset(dst, 0, sizeof(elf_gregset_t));	/* don't leak any kernel bits to user-level */
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	unw_get_sp(info, &sp);
+	pt = (struct pt_regs *) (sp + 16);
+
+	urbs_end = ia64_get_user_rbs_end(task, pt, &cfm);
+
+	if (ia64_sync_user_rbs(task, info->sw, pt->ar_bspstore, urbs_end) < 0)
+		return;
+
+	ia64_peek(task, info->sw, urbs_end, (long) ia64_rse_rnat_addr((long *) urbs_end),
+		  &ar_rnat);
+
+	/*
+	 * coredump format:
+	 *	r0-r31
+	 *	NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
+	 *	predicate registers (p0-p63)
+	 *	b0-b7
+	 *	ip cfm user-mask
+	 *	ar.rsc ar.bsp ar.bspstore ar.rnat
+	 *	ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
+	 */
+
+	/* r0 is zero */
+	for (i = 1, mask = (1UL << i); i < 32; ++i) {
+		unw_get_gr(info, i, &dst[i], &nat);
+		if (nat)
+			nat_bits |= mask;
+		mask <<= 1;
+	}
+	dst[32] = nat_bits;
+	unw_get_pr(info, &dst[33]);
+
+	for (i = 0; i < 8; ++i)
+		unw_get_br(info, i, &dst[34 + i]);
+
+	unw_get_rp(info, &ip);
+	dst[42] = ip + ia64_psr(pt)->ri;
+	dst[43] = cfm;
+	dst[44] = pt->cr_ipsr & IA64_PSR_UM;
+
+	unw_get_ar(info, UNW_AR_RSC, &dst[45]);
+	/*
+	 * For bsp and bspstore, unw_get_ar() would return the kernel
+	 * addresses, but we need the user-level addresses instead:
+	 */
+	dst[46] = urbs_end;	/* note: by convention PT_AR_BSP points to the end of the urbs! */
+	dst[47] = pt->ar_bspstore;
+	dst[48] = ar_rnat;
+	unw_get_ar(info, UNW_AR_CCV, &dst[49]);
+	unw_get_ar(info, UNW_AR_UNAT, &dst[50]);
+	unw_get_ar(info, UNW_AR_FPSR, &dst[51]);
+	dst[52] = pt->ar_pfs;	/* UNW_AR_PFS is == to pt->cr_ifs for interrupt frames */
+	unw_get_ar(info, UNW_AR_LC, &dst[53]);
+	unw_get_ar(info, UNW_AR_EC, &dst[54]);
+	unw_get_ar(info, UNW_AR_CSD, &dst[55]);
+	unw_get_ar(info, UNW_AR_SSD, &dst[56]);
+}
+
+void
+do_dump_task_fpu (struct task_struct *task, struct unw_frame_info *info, void *arg)
+{
+	elf_fpreg_t *dst = arg;
+	int i;
+
+	memset(dst, 0, sizeof(elf_fpregset_t));	/* don't leak any "random" bits */
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/* f0 is 0.0, f1 is 1.0 */
+
+	for (i = 2; i < 32; ++i)
+		unw_get_fr(info, i, dst + i);
+
+	ia64_flush_fph(task);
+	if ((task->thread.flags & IA64_THREAD_FPH_VALID) != 0)
+		memcpy(dst + 32, task->thread.fph, 96*16);
+}
+
+void
+do_copy_regs (struct unw_frame_info *info, void *arg)
+{
+	do_copy_task_regs(current, info, arg);
+}
+
+void
+do_dump_fpu (struct unw_frame_info *info, void *arg)
+{
+	do_dump_task_fpu(current, info, arg);
+}
+
+void
+ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
+{
+	unw_init_running(do_copy_regs, dst);
+}
+
+int
+dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
+{
+	unw_init_running(do_dump_fpu, dst);
+	return 1;	/* f0-f31 are always valid so we always return 1 */
+}
+
+long
+sys_execve (const char __user *filename,
+	    const char __user *const __user *argv,
+	    const char __user *const __user *envp,
+	    struct pt_regs *regs)
+{
+	char *fname;
+	int error;
+
+	fname = getname(filename);
+	error = PTR_ERR(fname);
+	if (IS_ERR(fname))
+		goto out;
+	error = do_execve(fname, argv, envp, regs);
+	putname(fname);
+out:
+	return error;
+}
+
+pid_t
+kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
+{
+	extern void start_kernel_thread (void);
+	unsigned long *helper_fptr = (unsigned long *) &start_kernel_thread;
+	struct {
+		struct switch_stack sw;
+		struct pt_regs pt;
+	} regs;
+
+	memset(&regs, 0, sizeof(regs));
+	regs.pt.cr_iip = helper_fptr[0];	/* set entry point (IP) */
+	regs.pt.r1 = helper_fptr[1];		/* set GP */
+	regs.pt.r9 = (unsigned long) fn;	/* 1st argument */
+	regs.pt.r11 = (unsigned long) arg;	/* 2nd argument */
+	/* Preserve PSR bits, except for bits 32-34 and 37-45, which we can't read.  */
+	regs.pt.cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN;
+	regs.pt.cr_ifs = 1UL << 63;		/* mark as valid, empty frame */
+	regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR);
+	regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET;
+	regs.sw.pr = (1 << PRED_KERNEL_STACK);
+	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs.pt, 0, NULL, NULL);
+}
+EXPORT_SYMBOL(kernel_thread);
+
+/* This gets called from kernel_thread() via ia64_invoke_thread_helper().  */
+int
+kernel_thread_helper (int (*fn)(void *), void *arg)
+{
+	return (*fn)(arg);
+}
+
+/*
+ * Flush thread state.  This is called when a thread does an execve().
+ */
+void
+flush_thread (void)
+{
+	/* drop floating-point and debug-register state if it exists: */
+	current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID);
+	ia64_drop_fpu(current);
+}
+
+/*
+ * Clean up state associated with current thread.  This is called when
+ * the thread calls exit().
+ */
+void
+exit_thread (void)
+{
+
+	ia64_drop_fpu(current);
+#ifdef CONFIG_PERFMON
+       /* if needed, stop monitoring and flush state to perfmon context */
+	if (current->thread.pfm_context)
+		pfm_exit_thread(current);
+
+	/* free debug register resources */
+	if (current->thread.flags & IA64_THREAD_DBG_VALID)
+		pfm_release_debug_registers(current);
+#endif
+}
+
+unsigned long
+get_wchan (struct task_struct *p)
+{
+	struct unw_frame_info info;
+	unsigned long ip;
+	int count = 0;
+
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
+
+	/*
+	 * Note: p may not be a blocked task (it could be current or
+	 * another process running on some other CPU.  Rather than
+	 * trying to determine if p is really blocked, we just assume
+	 * it's blocked and rely on the unwind routines to fail
+	 * gracefully if the process wasn't really blocked after all.
+	 * --davidm 99/12/15
+	 */
+	unw_init_from_blocked_task(&info, p);
+	do {
+		if (p->state == TASK_RUNNING)
+			return 0;
+		if (unw_unwind(&info) < 0)
+			return 0;
+		unw_get_ip(&info, &ip);
+		if (!in_sched_functions(ip))
+			return ip;
+	} while (count++ < 16);
+	return 0;
+}
+
+void
+cpu_halt (void)
+{
+	pal_power_mgmt_info_u_t power_info[8];
+	unsigned long min_power;
+	int i, min_power_state;
+
+	if (ia64_pal_halt_info(power_info) != 0)
+		return;
+
+	min_power_state = 0;
+	min_power = power_info[0].pal_power_mgmt_info_s.power_consumption;
+	for (i = 1; i < 8; ++i)
+		if (power_info[i].pal_power_mgmt_info_s.im
+		    && power_info[i].pal_power_mgmt_info_s.power_consumption < min_power) {
+			min_power = power_info[i].pal_power_mgmt_info_s.power_consumption;
+			min_power_state = i;
+		}
+
+	while (1)
+		ia64_pal_halt(min_power_state);
+}
+
+void machine_shutdown(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		if (cpu != smp_processor_id())
+			cpu_down(cpu);
+	}
+#endif
+#ifdef CONFIG_KEXEC
+	kexec_disable_iosapic();
+#endif
+}
+
+void
+machine_restart (char *restart_cmd)
+{
+	(void) notify_die(DIE_MACHINE_RESTART, restart_cmd, NULL, 0, 0, 0);
+	(*efi.reset_system)(EFI_RESET_WARM, 0, 0, NULL);
+}
+
+void
+machine_halt (void)
+{
+	(void) notify_die(DIE_MACHINE_HALT, "", NULL, 0, 0, 0);
+	cpu_halt();
+}
+
+void
+machine_power_off (void)
+{
+	if (pm_power_off)
+		pm_power_off();
+	machine_halt();
+}
+
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
new file mode 100644
index 00000000..8848f43d
--- /dev/null
+++ b/arch/ia64/kernel/ptrace.c
@@ -0,0 +1,2236 @@
+/*
+ * Kernel support for the ptrace() and syscall tracing interfaces.
+ *
+ * Copyright (C) 1999-2005 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2006 Intel Co
+ *  2006-08-12	- IA64 Native Utrace implementation support added by
+ *	Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+ *
+ * Derived from the x86 and Alpha versions.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/security.h>
+#include <linux/audit.h>
+#include <linux/signal.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
+#include <linux/tracehook.h>
+
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace_offsets.h>
+#include <asm/rse.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/unwind.h>
+#ifdef CONFIG_PERFMON
+#include <asm/perfmon.h>
+#endif
+
+#include "entry.h"
+
+/*
+ * Bits in the PSR that we allow ptrace() to change:
+ *	be, up, ac, mfl, mfh (the user mask; five bits total)
+ *	db (debug breakpoint fault; one bit)
+ *	id (instruction debug fault disable; one bit)
+ *	dd (data debug fault disable; one bit)
+ *	ri (restart instruction; two bits)
+ *	is (instruction set; one bit)
+ */
+#define IPSR_MASK (IA64_PSR_UM | IA64_PSR_DB | IA64_PSR_IS	\
+		   | IA64_PSR_ID | IA64_PSR_DD | IA64_PSR_RI)
+
+#define MASK(nbits)	((1UL << (nbits)) - 1)	/* mask with NBITS bits set */
+#define PFM_MASK	MASK(38)
+
+#define PTRACE_DEBUG	0
+
+#if PTRACE_DEBUG
+# define dprintk(format...)	printk(format)
+# define inline
+#else
+# define dprintk(format...)
+#endif
+
+/* Return TRUE if PT was created due to kernel-entry via a system-call.  */
+
+static inline int
+in_syscall (struct pt_regs *pt)
+{
+	return (long) pt->cr_ifs >= 0;
+}
+
+/*
+ * Collect the NaT bits for r1-r31 from scratch_unat and return a NaT
+ * bitset where bit i is set iff the NaT bit of register i is set.
+ */
+unsigned long
+ia64_get_scratch_nat_bits (struct pt_regs *pt, unsigned long scratch_unat)
+{
+#	define GET_BITS(first, last, unat)				\
+	({								\
+		unsigned long bit = ia64_unat_pos(&pt->r##first);	\
+		unsigned long nbits = (last - first + 1);		\
+		unsigned long mask = MASK(nbits) << first;		\
+		unsigned long dist;					\
+		if (bit < first)					\
+			dist = 64 + bit - first;			\
+		else							\
+			dist = bit - first;				\
+		ia64_rotr(unat, dist) & mask;				\
+	})
+	unsigned long val;
+
+	/*
+	 * Registers that are stored consecutively in struct pt_regs
+	 * can be handled in parallel.  If the register order in
+	 * struct_pt_regs changes, this code MUST be updated.
+	 */
+	val  = GET_BITS( 1,  1, scratch_unat);
+	val |= GET_BITS( 2,  3, scratch_unat);
+	val |= GET_BITS(12, 13, scratch_unat);
+	val |= GET_BITS(14, 14, scratch_unat);
+	val |= GET_BITS(15, 15, scratch_unat);
+	val |= GET_BITS( 8, 11, scratch_unat);
+	val |= GET_BITS(16, 31, scratch_unat);
+	return val;
+
+#	undef GET_BITS
+}
+
+/*
+ * Set the NaT bits for the scratch registers according to NAT and
+ * return the resulting unat (assuming the scratch registers are
+ * stored in PT).
+ */
+unsigned long
+ia64_put_scratch_nat_bits (struct pt_regs *pt, unsigned long nat)
+{
+#	define PUT_BITS(first, last, nat)				\
+	({								\
+		unsigned long bit = ia64_unat_pos(&pt->r##first);	\
+		unsigned long nbits = (last - first + 1);		\
+		unsigned long mask = MASK(nbits) << first;		\
+		long dist;						\
+		if (bit < first)					\
+			dist = 64 + bit - first;			\
+		else							\
+			dist = bit - first;				\
+		ia64_rotl(nat & mask, dist);				\
+	})
+	unsigned long scratch_unat;
+
+	/*
+	 * Registers that are stored consecutively in struct pt_regs
+	 * can be handled in parallel.  If the register order in
+	 * struct_pt_regs changes, this code MUST be updated.
+	 */
+	scratch_unat  = PUT_BITS( 1,  1, nat);
+	scratch_unat |= PUT_BITS( 2,  3, nat);
+	scratch_unat |= PUT_BITS(12, 13, nat);
+	scratch_unat |= PUT_BITS(14, 14, nat);
+	scratch_unat |= PUT_BITS(15, 15, nat);
+	scratch_unat |= PUT_BITS( 8, 11, nat);
+	scratch_unat |= PUT_BITS(16, 31, nat);
+
+	return scratch_unat;
+
+#	undef PUT_BITS
+}
+
+#define IA64_MLX_TEMPLATE	0x2
+#define IA64_MOVL_OPCODE	6
+
+void
+ia64_increment_ip (struct pt_regs *regs)
+{
+	unsigned long w0, ri = ia64_psr(regs)->ri + 1;
+
+	if (ri > 2) {
+		ri = 0;
+		regs->cr_iip += 16;
+	} else if (ri == 2) {
+		get_user(w0, (char __user *) regs->cr_iip + 0);
+		if (((w0 >> 1) & 0xf) == IA64_MLX_TEMPLATE) {
+			/*
+			 * rfi'ing to slot 2 of an MLX bundle causes
+			 * an illegal operation fault.  We don't want
+			 * that to happen...
+			 */
+			ri = 0;
+			regs->cr_iip += 16;
+		}
+	}
+	ia64_psr(regs)->ri = ri;
+}
+
+void
+ia64_decrement_ip (struct pt_regs *regs)
+{
+	unsigned long w0, ri = ia64_psr(regs)->ri - 1;
+
+	if (ia64_psr(regs)->ri == 0) {
+		regs->cr_iip -= 16;
+		ri = 2;
+		get_user(w0, (char __user *) regs->cr_iip + 0);
+		if (((w0 >> 1) & 0xf) == IA64_MLX_TEMPLATE) {
+			/*
+			 * rfi'ing to slot 2 of an MLX bundle causes
+			 * an illegal operation fault.  We don't want
+			 * that to happen...
+			 */
+			ri = 1;
+		}
+	}
+	ia64_psr(regs)->ri = ri;
+}
+
+/*
+ * This routine is used to read an rnat bits that are stored on the
+ * kernel backing store.  Since, in general, the alignment of the user
+ * and kernel are different, this is not completely trivial.  In
+ * essence, we need to construct the user RNAT based on up to two
+ * kernel RNAT values and/or the RNAT value saved in the child's
+ * pt_regs.
+ *
+ * user rbs
+ *
+ * +--------+ <-- lowest address
+ * | slot62 |
+ * +--------+
+ * |  rnat  | 0x....1f8
+ * +--------+
+ * | slot00 | \
+ * +--------+ |
+ * | slot01 | > child_regs->ar_rnat
+ * +--------+ |
+ * | slot02 | /				kernel rbs
+ * +--------+				+--------+
+ *	    <- child_regs->ar_bspstore	| slot61 | <-- krbs
+ * +- - - - +				+--------+
+ *					| slot62 |
+ * +- - - - +				+--------+
+ *					|  rnat	 |
+ * +- - - - +				+--------+
+ *   vrnat				| slot00 |
+ * +- - - - +				+--------+
+ *					=	 =
+ *					+--------+
+ *					| slot00 | \
+ *					+--------+ |
+ *					| slot01 | > child_stack->ar_rnat
+ *					+--------+ |
+ *					| slot02 | /
+ *					+--------+
+ *						  <--- child_stack->ar_bspstore
+ *
+ * The way to think of this code is as follows: bit 0 in the user rnat
+ * corresponds to some bit N (0 <= N <= 62) in one of the kernel rnat
+ * value.  The kernel rnat value holding this bit is stored in
+ * variable rnat0.  rnat1 is loaded with the kernel rnat value that
+ * form the upper bits of the user rnat value.
+ *
+ * Boundary cases:
+ *
+ * o when reading the rnat "below" the first rnat slot on the kernel
+ *   backing store, rnat0/rnat1 are set to 0 and the low order bits are
+ *   merged in from pt->ar_rnat.
+ *
+ * o when reading the rnat "above" the last rnat slot on the kernel
+ *   backing store, rnat0/rnat1 gets its value from sw->ar_rnat.
+ */
+static unsigned long
+get_rnat (struct task_struct *task, struct switch_stack *sw,
+	  unsigned long *krbs, unsigned long *urnat_addr,
+	  unsigned long *urbs_end)
+{
+	unsigned long rnat0 = 0, rnat1 = 0, urnat = 0, *slot0_kaddr;
+	unsigned long umask = 0, mask, m;
+	unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift;
+	long num_regs, nbits;
+	struct pt_regs *pt;
+
+	pt = task_pt_regs(task);
+	kbsp = (unsigned long *) sw->ar_bspstore;
+	ubspstore = (unsigned long *) pt->ar_bspstore;
+
+	if (urbs_end < urnat_addr)
+		nbits = ia64_rse_num_regs(urnat_addr - 63, urbs_end);
+	else
+		nbits = 63;
+	mask = MASK(nbits);
+	/*
+	 * First, figure out which bit number slot 0 in user-land maps
+	 * to in the kernel rnat.  Do this by figuring out how many
+	 * register slots we're beyond the user's backingstore and
+	 * then computing the equivalent address in kernel space.
+	 */
+	num_regs = ia64_rse_num_regs(ubspstore, urnat_addr + 1);
+	slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs);
+	shift = ia64_rse_slot_num(slot0_kaddr);
+	rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr);
+	rnat0_kaddr = rnat1_kaddr - 64;
+
+	if (ubspstore + 63 > urnat_addr) {
+		/* some bits need to be merged in from pt->ar_rnat */
+		umask = MASK(ia64_rse_slot_num(ubspstore)) & mask;
+		urnat = (pt->ar_rnat & umask);
+		mask &= ~umask;
+		if (!mask)
+			return urnat;
+	}
+
+	m = mask << shift;
+	if (rnat0_kaddr >= kbsp)
+		rnat0 = sw->ar_rnat;
+	else if (rnat0_kaddr > krbs)
+		rnat0 = *rnat0_kaddr;
+	urnat |= (rnat0 & m) >> shift;
+
+	m = mask >> (63 - shift);
+	if (rnat1_kaddr >= kbsp)
+		rnat1 = sw->ar_rnat;
+	else if (rnat1_kaddr > krbs)
+		rnat1 = *rnat1_kaddr;
+	urnat |= (rnat1 & m) << (63 - shift);
+	return urnat;
+}
+
+/*
+ * The reverse of get_rnat.
+ */
+static void
+put_rnat (struct task_struct *task, struct switch_stack *sw,
+	  unsigned long *krbs, unsigned long *urnat_addr, unsigned long urnat,
+	  unsigned long *urbs_end)
+{
+	unsigned long rnat0 = 0, rnat1 = 0, *slot0_kaddr, umask = 0, mask, m;
+	unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift;
+	long num_regs, nbits;
+	struct pt_regs *pt;
+	unsigned long cfm, *urbs_kargs;
+
+	pt = task_pt_regs(task);
+	kbsp = (unsigned long *) sw->ar_bspstore;
+	ubspstore = (unsigned long *) pt->ar_bspstore;
+
+	urbs_kargs = urbs_end;
+	if (in_syscall(pt)) {
+		/*
+		 * If entered via syscall, don't allow user to set rnat bits
+		 * for syscall args.
+		 */
+		cfm = pt->cr_ifs;
+		urbs_kargs = ia64_rse_skip_regs(urbs_end, -(cfm & 0x7f));
+	}
+
+	if (urbs_kargs >= urnat_addr)
+		nbits = 63;
+	else {
+		if ((urnat_addr - 63) >= urbs_kargs)
+			return;
+		nbits = ia64_rse_num_regs(urnat_addr - 63, urbs_kargs);
+	}
+	mask = MASK(nbits);
+
+	/*
+	 * First, figure out which bit number slot 0 in user-land maps
+	 * to in the kernel rnat.  Do this by figuring out how many
+	 * register slots we're beyond the user's backingstore and
+	 * then computing the equivalent address in kernel space.
+	 */
+	num_regs = ia64_rse_num_regs(ubspstore, urnat_addr + 1);
+	slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs);
+	shift = ia64_rse_slot_num(slot0_kaddr);
+	rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr);
+	rnat0_kaddr = rnat1_kaddr - 64;
+
+	if (ubspstore + 63 > urnat_addr) {
+		/* some bits need to be place in pt->ar_rnat: */
+		umask = MASK(ia64_rse_slot_num(ubspstore)) & mask;
+		pt->ar_rnat = (pt->ar_rnat & ~umask) | (urnat & umask);
+		mask &= ~umask;
+		if (!mask)
+			return;
+	}
+	/*
+	 * Note: Section 11.1 of the EAS guarantees that bit 63 of an
+	 * rnat slot is ignored. so we don't have to clear it here.
+	 */
+	rnat0 = (urnat << shift);
+	m = mask << shift;
+	if (rnat0_kaddr >= kbsp)
+		sw->ar_rnat = (sw->ar_rnat & ~m) | (rnat0 & m);
+	else if (rnat0_kaddr > krbs)
+		*rnat0_kaddr = ((*rnat0_kaddr & ~m) | (rnat0 & m));
+
+	rnat1 = (urnat >> (63 - shift));
+	m = mask >> (63 - shift);
+	if (rnat1_kaddr >= kbsp)
+		sw->ar_rnat = (sw->ar_rnat & ~m) | (rnat1 & m);
+	else if (rnat1_kaddr > krbs)
+		*rnat1_kaddr = ((*rnat1_kaddr & ~m) | (rnat1 & m));
+}
+
+static inline int
+on_kernel_rbs (unsigned long addr, unsigned long bspstore,
+	       unsigned long urbs_end)
+{
+	unsigned long *rnat_addr = ia64_rse_rnat_addr((unsigned long *)
+						      urbs_end);
+	return (addr >= bspstore && addr <= (unsigned long) rnat_addr);
+}
+
+/*
+ * Read a word from the user-level backing store of task CHILD.  ADDR
+ * is the user-level address to read the word from, VAL a pointer to
+ * the return value, and USER_BSP gives the end of the user-level
+ * backing store (i.e., it's the address that would be in ar.bsp after
+ * the user executed a "cover" instruction).
+ *
+ * This routine takes care of accessing the kernel register backing
+ * store for those registers that got spilled there.  It also takes
+ * care of calculating the appropriate RNaT collection words.
+ */
+long
+ia64_peek (struct task_struct *child, struct switch_stack *child_stack,
+	   unsigned long user_rbs_end, unsigned long addr, long *val)
+{
+	unsigned long *bspstore, *krbs, regnum, *laddr, *urbs_end, *rnat_addr;
+	struct pt_regs *child_regs;
+	size_t copied;
+	long ret;
+
+	urbs_end = (long *) user_rbs_end;
+	laddr = (unsigned long *) addr;
+	child_regs = task_pt_regs(child);
+	bspstore = (unsigned long *) child_regs->ar_bspstore;
+	krbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
+	if (on_kernel_rbs(addr, (unsigned long) bspstore,
+			  (unsigned long) urbs_end))
+	{
+		/*
+		 * Attempt to read the RBS in an area that's actually
+		 * on the kernel RBS => read the corresponding bits in
+		 * the kernel RBS.
+		 */
+		rnat_addr = ia64_rse_rnat_addr(laddr);
+		ret = get_rnat(child, child_stack, krbs, rnat_addr, urbs_end);
+
+		if (laddr == rnat_addr) {
+			/* return NaT collection word itself */
+			*val = ret;
+			return 0;
+		}
+
+		if (((1UL << ia64_rse_slot_num(laddr)) & ret) != 0) {
+			/*
+			 * It is implementation dependent whether the
+			 * data portion of a NaT value gets saved on a
+			 * st8.spill or RSE spill (e.g., see EAS 2.6,
+			 * 4.4.4.6 Register Spill and Fill).  To get
+			 * consistent behavior across all possible
+			 * IA-64 implementations, we return zero in
+			 * this case.
+			 */
+			*val = 0;
+			return 0;
+		}
+
+		if (laddr < urbs_end) {
+			/*
+			 * The desired word is on the kernel RBS and
+			 * is not a NaT.
+			 */
+			regnum = ia64_rse_num_regs(bspstore, laddr);
+			*val = *ia64_rse_skip_regs(krbs, regnum);
+			return 0;
+		}
+	}
+	copied = access_process_vm(child, addr, &ret, sizeof(ret), 0);
+	if (copied != sizeof(ret))
+		return -EIO;
+	*val = ret;
+	return 0;
+}
+
+long
+ia64_poke (struct task_struct *child, struct switch_stack *child_stack,
+	   unsigned long user_rbs_end, unsigned long addr, long val)
+{
+	unsigned long *bspstore, *krbs, regnum, *laddr;
+	unsigned long *urbs_end = (long *) user_rbs_end;
+	struct pt_regs *child_regs;
+
+	laddr = (unsigned long *) addr;
+	child_regs = task_pt_regs(child);
+	bspstore = (unsigned long *) child_regs->ar_bspstore;
+	krbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
+	if (on_kernel_rbs(addr, (unsigned long) bspstore,
+			  (unsigned long) urbs_end))
+	{
+		/*
+		 * Attempt to write the RBS in an area that's actually
+		 * on the kernel RBS => write the corresponding bits
+		 * in the kernel RBS.
+		 */
+		if (ia64_rse_is_rnat_slot(laddr))
+			put_rnat(child, child_stack, krbs, laddr, val,
+				 urbs_end);
+		else {
+			if (laddr < urbs_end) {
+				regnum = ia64_rse_num_regs(bspstore, laddr);
+				*ia64_rse_skip_regs(krbs, regnum) = val;
+			}
+		}
+	} else if (access_process_vm(child, addr, &val, sizeof(val), 1)
+		   != sizeof(val))
+		return -EIO;
+	return 0;
+}
+
+/*
+ * Calculate the address of the end of the user-level register backing
+ * store.  This is the address that would have been stored in ar.bsp
+ * if the user had executed a "cover" instruction right before
+ * entering the kernel.  If CFMP is not NULL, it is used to return the
+ * "current frame mask" that was active at the time the kernel was
+ * entered.
+ */
+unsigned long
+ia64_get_user_rbs_end (struct task_struct *child, struct pt_regs *pt,
+		       unsigned long *cfmp)
+{
+	unsigned long *krbs, *bspstore, cfm = pt->cr_ifs;
+	long ndirty;
+
+	krbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
+	bspstore = (unsigned long *) pt->ar_bspstore;
+	ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19));
+
+	if (in_syscall(pt))
+		ndirty += (cfm & 0x7f);
+	else
+		cfm &= ~(1UL << 63);	/* clear valid bit */
+
+	if (cfmp)
+		*cfmp = cfm;
+	return (unsigned long) ia64_rse_skip_regs(bspstore, ndirty);
+}
+
+/*
+ * Synchronize (i.e, write) the RSE backing store living in kernel
+ * space to the VM of the CHILD task.  SW and PT are the pointers to
+ * the switch_stack and pt_regs structures, respectively.
+ * USER_RBS_END is the user-level address at which the backing store
+ * ends.
+ */
+long
+ia64_sync_user_rbs (struct task_struct *child, struct switch_stack *sw,
+		    unsigned long user_rbs_start, unsigned long user_rbs_end)
+{
+	unsigned long addr, val;
+	long ret;
+
+	/* now copy word for word from kernel rbs to user rbs: */
+	for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) {
+		ret = ia64_peek(child, sw, user_rbs_end, addr, &val);
+		if (ret < 0)
+			return ret;
+		if (access_process_vm(child, addr, &val, sizeof(val), 1)
+		    != sizeof(val))
+			return -EIO;
+	}
+	return 0;
+}
+
+static long
+ia64_sync_kernel_rbs (struct task_struct *child, struct switch_stack *sw,
+		unsigned long user_rbs_start, unsigned long user_rbs_end)
+{
+	unsigned long addr, val;
+	long ret;
+
+	/* now copy word for word from user rbs to kernel rbs: */
+	for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) {
+		if (access_process_vm(child, addr, &val, sizeof(val), 0)
+				!= sizeof(val))
+			return -EIO;
+
+		ret = ia64_poke(child, sw, user_rbs_end, addr, val);
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
+typedef long (*syncfunc_t)(struct task_struct *, struct switch_stack *,
+			    unsigned long, unsigned long);
+
+static void do_sync_rbs(struct unw_frame_info *info, void *arg)
+{
+	struct pt_regs *pt;
+	unsigned long urbs_end;
+	syncfunc_t fn = arg;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+	pt = task_pt_regs(info->task);
+	urbs_end = ia64_get_user_rbs_end(info->task, pt, NULL);
+
+	fn(info->task, info->sw, pt->ar_bspstore, urbs_end);
+}
+
+/*
+ * when a thread is stopped (ptraced), debugger might change thread's user
+ * stack (change memory directly), and we must avoid the RSE stored in kernel
+ * to override user stack (user space's RSE is newer than kernel's in the
+ * case). To workaround the issue, we copy kernel RSE to user RSE before the
+ * task is stopped, so user RSE has updated data.  we then copy user RSE to
+ * kernel after the task is resummed from traced stop and kernel will use the
+ * newer RSE to return to user. TIF_RESTORE_RSE is the flag to indicate we need
+ * synchronize user RSE to kernel.
+ */
+void ia64_ptrace_stop(void)
+{
+	if (test_and_set_tsk_thread_flag(current, TIF_RESTORE_RSE))
+		return;
+	set_notify_resume(current);
+	unw_init_running(do_sync_rbs, ia64_sync_user_rbs);
+}
+
+/*
+ * This is called to read back the register backing store.
+ */
+void ia64_sync_krbs(void)
+{
+	clear_tsk_thread_flag(current, TIF_RESTORE_RSE);
+
+	unw_init_running(do_sync_rbs, ia64_sync_kernel_rbs);
+}
+
+/*
+ * After PTRACE_ATTACH, a thread's register backing store area in user
+ * space is assumed to contain correct data whenever the thread is
+ * stopped.  arch_ptrace_stop takes care of this on tracing stops.
+ * But if the child was already stopped for job control when we attach
+ * to it, then it might not ever get into ptrace_stop by the time we
+ * want to examine the user memory containing the RBS.
+ */
+void
+ptrace_attach_sync_user_rbs (struct task_struct *child)
+{
+	int stopped = 0;
+	struct unw_frame_info info;
+
+	/*
+	 * If the child is in TASK_STOPPED, we need to change that to
+	 * TASK_TRACED momentarily while we operate on it.  This ensures
+	 * that the child won't be woken up and return to user mode while
+	 * we are doing the sync.  (It can only be woken up for SIGKILL.)
+	 */
+
+	read_lock(&tasklist_lock);
+	if (child->sighand) {
+		spin_lock_irq(&child->sighand->siglock);
+		if (child->state == TASK_STOPPED &&
+		    !test_and_set_tsk_thread_flag(child, TIF_RESTORE_RSE)) {
+			set_notify_resume(child);
+
+			child->state = TASK_TRACED;
+			stopped = 1;
+		}
+		spin_unlock_irq(&child->sighand->siglock);
+	}
+	read_unlock(&tasklist_lock);
+
+	if (!stopped)
+		return;
+
+	unw_init_from_blocked_task(&info, child);
+	do_sync_rbs(&info, ia64_sync_user_rbs);
+
+	/*
+	 * Now move the child back into TASK_STOPPED if it should be in a
+	 * job control stop, so that SIGCONT can be used to wake it up.
+	 */
+	read_lock(&tasklist_lock);
+	if (child->sighand) {
+		spin_lock_irq(&child->sighand->siglock);
+		if (child->state == TASK_TRACED &&
+		    (child->signal->flags & SIGNAL_STOP_STOPPED)) {
+			child->state = TASK_STOPPED;
+		}
+		spin_unlock_irq(&child->sighand->siglock);
+	}
+	read_unlock(&tasklist_lock);
+}
+
+static inline int
+thread_matches (struct task_struct *thread, unsigned long addr)
+{
+	unsigned long thread_rbs_end;
+	struct pt_regs *thread_regs;
+
+	if (ptrace_check_attach(thread, 0) < 0)
+		/*
+		 * If the thread is not in an attachable state, we'll
+		 * ignore it.  The net effect is that if ADDR happens
+		 * to overlap with the portion of the thread's
+		 * register backing store that is currently residing
+		 * on the thread's kernel stack, then ptrace() may end
+		 * up accessing a stale value.  But if the thread
+		 * isn't stopped, that's a problem anyhow, so we're
+		 * doing as well as we can...
+		 */
+		return 0;
+
+	thread_regs = task_pt_regs(thread);
+	thread_rbs_end = ia64_get_user_rbs_end(thread, thread_regs, NULL);
+	if (!on_kernel_rbs(addr, thread_regs->ar_bspstore, thread_rbs_end))
+		return 0;
+
+	return 1;	/* looks like we've got a winner */
+}
+
+/*
+ * Write f32-f127 back to task->thread.fph if it has been modified.
+ */
+inline void
+ia64_flush_fph (struct task_struct *task)
+{
+	struct ia64_psr *psr = ia64_psr(task_pt_regs(task));
+
+	/*
+	 * Prevent migrating this task while
+	 * we're fiddling with the FPU state
+	 */
+	preempt_disable();
+	if (ia64_is_local_fpu_owner(task) && psr->mfh) {
+		psr->mfh = 0;
+		task->thread.flags |= IA64_THREAD_FPH_VALID;
+		ia64_save_fpu(&task->thread.fph[0]);
+	}
+	preempt_enable();
+}
+
+/*
+ * Sync the fph state of the task so that it can be manipulated
+ * through thread.fph.  If necessary, f32-f127 are written back to
+ * thread.fph or, if the fph state hasn't been used before, thread.fph
+ * is cleared to zeroes.  Also, access to f32-f127 is disabled to
+ * ensure that the task picks up the state from thread.fph when it
+ * executes again.
+ */
+void
+ia64_sync_fph (struct task_struct *task)
+{
+	struct ia64_psr *psr = ia64_psr(task_pt_regs(task));
+
+	ia64_flush_fph(task);
+	if (!(task->thread.flags & IA64_THREAD_FPH_VALID)) {
+		task->thread.flags |= IA64_THREAD_FPH_VALID;
+		memset(&task->thread.fph, 0, sizeof(task->thread.fph));
+	}
+	ia64_drop_fpu(task);
+	psr->dfh = 1;
+}
+
+/*
+ * Change the machine-state of CHILD such that it will return via the normal
+ * kernel exit-path, rather than the syscall-exit path.
+ */
+static void
+convert_to_non_syscall (struct task_struct *child, struct pt_regs  *pt,
+			unsigned long cfm)
+{
+	struct unw_frame_info info, prev_info;
+	unsigned long ip, sp, pr;
+
+	unw_init_from_blocked_task(&info, child);
+	while (1) {
+		prev_info = info;
+		if (unw_unwind(&info) < 0)
+			return;
+
+		unw_get_sp(&info, &sp);
+		if ((long)((unsigned long)child + IA64_STK_OFFSET - sp)
+		    < IA64_PT_REGS_SIZE) {
+			dprintk("ptrace.%s: ran off the top of the kernel "
+				"stack\n", __func__);
+			return;
+		}
+		if (unw_get_pr (&prev_info, &pr) < 0) {
+			unw_get_rp(&prev_info, &ip);
+			dprintk("ptrace.%s: failed to read "
+				"predicate register (ip=0x%lx)\n",
+				__func__, ip);
+			return;
+		}
+		if (unw_is_intr_frame(&info)
+		    && (pr & (1UL << PRED_USER_STACK)))
+			break;
+	}
+
+	/*
+	 * Note: at the time of this call, the target task is blocked
+	 * in notify_resume_user() and by clearling PRED_LEAVE_SYSCALL
+	 * (aka, "pLvSys") we redirect execution from
+	 * .work_pending_syscall_end to .work_processed_kernel.
+	 */
+	unw_get_pr(&prev_info, &pr);
+	pr &= ~((1UL << PRED_SYSCALL) | (1UL << PRED_LEAVE_SYSCALL));
+	pr |=  (1UL << PRED_NON_SYSCALL);
+	unw_set_pr(&prev_info, pr);
+
+	pt->cr_ifs = (1UL << 63) | cfm;
+	/*
+	 * Clear the memory that is NOT written on syscall-entry to
+	 * ensure we do not leak kernel-state to user when execution
+	 * resumes.
+	 */
+	pt->r2 = 0;
+	pt->r3 = 0;
+	pt->r14 = 0;
+	memset(&pt->r16, 0, 16*8);	/* clear r16-r31 */
+	memset(&pt->f6, 0, 6*16);	/* clear f6-f11 */
+	pt->b7 = 0;
+	pt->ar_ccv = 0;
+	pt->ar_csd = 0;
+	pt->ar_ssd = 0;
+}
+
+static int
+access_nat_bits (struct task_struct *child, struct pt_regs *pt,
+		 struct unw_frame_info *info,
+		 unsigned long *data, int write_access)
+{
+	unsigned long regnum, nat_bits, scratch_unat, dummy = 0;
+	char nat = 0;
+
+	if (write_access) {
+		nat_bits = *data;
+		scratch_unat = ia64_put_scratch_nat_bits(pt, nat_bits);
+		if (unw_set_ar(info, UNW_AR_UNAT, scratch_unat) < 0) {
+			dprintk("ptrace: failed to set ar.unat\n");
+			return -1;
+		}
+		for (regnum = 4; regnum <= 7; ++regnum) {
+			unw_get_gr(info, regnum, &dummy, &nat);
+			unw_set_gr(info, regnum, dummy,
+				   (nat_bits >> regnum) & 1);
+		}
+	} else {
+		if (unw_get_ar(info, UNW_AR_UNAT, &scratch_unat) < 0) {
+			dprintk("ptrace: failed to read ar.unat\n");
+			return -1;
+		}
+		nat_bits = ia64_get_scratch_nat_bits(pt, scratch_unat);
+		for (regnum = 4; regnum <= 7; ++regnum) {
+			unw_get_gr(info, regnum, &dummy, &nat);
+			nat_bits |= (nat != 0) << regnum;
+		}
+		*data = nat_bits;
+	}
+	return 0;
+}
+
+static int
+access_uarea (struct task_struct *child, unsigned long addr,
+	      unsigned long *data, int write_access);
+
+static long
+ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
+{
+	unsigned long psr, ec, lc, rnat, bsp, cfm, nat_bits, val;
+	struct unw_frame_info info;
+	struct ia64_fpreg fpval;
+	struct switch_stack *sw;
+	struct pt_regs *pt;
+	long ret, retval = 0;
+	char nat = 0;
+	int i;
+
+	if (!access_ok(VERIFY_WRITE, ppr, sizeof(struct pt_all_user_regs)))
+		return -EIO;
+
+	pt = task_pt_regs(child);
+	sw = (struct switch_stack *) (child->thread.ksp + 16);
+	unw_init_from_blocked_task(&info, child);
+	if (unw_unwind_to_user(&info) < 0) {
+		return -EIO;
+	}
+
+	if (((unsigned long) ppr & 0x7) != 0) {
+		dprintk("ptrace:unaligned register address %p\n", ppr);
+		return -EIO;
+	}
+
+	if (access_uarea(child, PT_CR_IPSR, &psr, 0) < 0
+	    || access_uarea(child, PT_AR_EC, &ec, 0) < 0
+	    || access_uarea(child, PT_AR_LC, &lc, 0) < 0
+	    || access_uarea(child, PT_AR_RNAT, &rnat, 0) < 0
+	    || access_uarea(child, PT_AR_BSP, &bsp, 0) < 0
+	    || access_uarea(child, PT_CFM, &cfm, 0)
+	    || access_uarea(child, PT_NAT_BITS, &nat_bits, 0))
+		return -EIO;
+
+	/* control regs */
+
+	retval |= __put_user(pt->cr_iip, &ppr->cr_iip);
+	retval |= __put_user(psr, &ppr->cr_ipsr);
+
+	/* app regs */
+
+	retval |= __put_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]);
+	retval |= __put_user(pt->ar_rsc, &ppr->ar[PT_AUR_RSC]);
+	retval |= __put_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]);
+	retval |= __put_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]);
+	retval |= __put_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]);
+	retval |= __put_user(pt->ar_fpsr, &ppr->ar[PT_AUR_FPSR]);
+
+	retval |= __put_user(ec, &ppr->ar[PT_AUR_EC]);
+	retval |= __put_user(lc, &ppr->ar[PT_AUR_LC]);
+	retval |= __put_user(rnat, &ppr->ar[PT_AUR_RNAT]);
+	retval |= __put_user(bsp, &ppr->ar[PT_AUR_BSP]);
+	retval |= __put_user(cfm, &ppr->cfm);
+
+	/* gr1-gr3 */
+
+	retval |= __copy_to_user(&ppr->gr[1], &pt->r1, sizeof(long));
+	retval |= __copy_to_user(&ppr->gr[2], &pt->r2, sizeof(long) *2);
+
+	/* gr4-gr7 */
+
+	for (i = 4; i < 8; i++) {
+		if (unw_access_gr(&info, i, &val, &nat, 0) < 0)
+			return -EIO;
+		retval |= __put_user(val, &ppr->gr[i]);
+	}
+
+	/* gr8-gr11 */
+
+	retval |= __copy_to_user(&ppr->gr[8], &pt->r8, sizeof(long) * 4);
+
+	/* gr12-gr15 */
+
+	retval |= __copy_to_user(&ppr->gr[12], &pt->r12, sizeof(long) * 2);
+	retval |= __copy_to_user(&ppr->gr[14], &pt->r14, sizeof(long));
+	retval |= __copy_to_user(&ppr->gr[15], &pt->r15, sizeof(long));
+
+	/* gr16-gr31 */
+
+	retval |= __copy_to_user(&ppr->gr[16], &pt->r16, sizeof(long) * 16);
+
+	/* b0 */
+
+	retval |= __put_user(pt->b0, &ppr->br[0]);
+
+	/* b1-b5 */
+
+	for (i = 1; i < 6; i++) {
+		if (unw_access_br(&info, i, &val, 0) < 0)
+			return -EIO;
+		__put_user(val, &ppr->br[i]);
+	}
+
+	/* b6-b7 */
+
+	retval |= __put_user(pt->b6, &ppr->br[6]);
+	retval |= __put_user(pt->b7, &ppr->br[7]);
+
+	/* fr2-fr5 */
+
+	for (i = 2; i < 6; i++) {
+		if (unw_get_fr(&info, i, &fpval) < 0)
+			return -EIO;
+		retval |= __copy_to_user(&ppr->fr[i], &fpval, sizeof (fpval));
+	}
+
+	/* fr6-fr11 */
+
+	retval |= __copy_to_user(&ppr->fr[6], &pt->f6,
+				 sizeof(struct ia64_fpreg) * 6);
+
+	/* fp scratch regs(12-15) */
+
+	retval |= __copy_to_user(&ppr->fr[12], &sw->f12,
+				 sizeof(struct ia64_fpreg) * 4);
+
+	/* fr16-fr31 */
+
+	for (i = 16; i < 32; i++) {
+		if (unw_get_fr(&info, i, &fpval) < 0)
+			return -EIO;
+		retval |= __copy_to_user(&ppr->fr[i], &fpval, sizeof (fpval));
+	}
+
+	/* fph */
+
+	ia64_flush_fph(child);
+	retval |= __copy_to_user(&ppr->fr[32], &child->thread.fph,
+				 sizeof(ppr->fr[32]) * 96);
+
+	/*  preds */
+
+	retval |= __put_user(pt->pr, &ppr->pr);
+
+	/* nat bits */
+
+	retval |= __put_user(nat_bits, &ppr->nat);
+
+	ret = retval ? -EIO : 0;
+	return ret;
+}
+
+static long
+ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
+{
+	unsigned long psr, rsc, ec, lc, rnat, bsp, cfm, nat_bits, val = 0;
+	struct unw_frame_info info;
+	struct switch_stack *sw;
+	struct ia64_fpreg fpval;
+	struct pt_regs *pt;
+	long ret, retval = 0;
+	int i;
+
+	memset(&fpval, 0, sizeof(fpval));
+
+	if (!access_ok(VERIFY_READ, ppr, sizeof(struct pt_all_user_regs)))
+		return -EIO;
+
+	pt = task_pt_regs(child);
+	sw = (struct switch_stack *) (child->thread.ksp + 16);
+	unw_init_from_blocked_task(&info, child);
+	if (unw_unwind_to_user(&info) < 0) {
+		return -EIO;
+	}
+
+	if (((unsigned long) ppr & 0x7) != 0) {
+		dprintk("ptrace:unaligned register address %p\n", ppr);
+		return -EIO;
+	}
+
+	/* control regs */
+
+	retval |= __get_user(pt->cr_iip, &ppr->cr_iip);
+	retval |= __get_user(psr, &ppr->cr_ipsr);
+
+	/* app regs */
+
+	retval |= __get_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]);
+	retval |= __get_user(rsc, &ppr->ar[PT_AUR_RSC]);
+	retval |= __get_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]);
+	retval |= __get_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]);
+	retval |= __get_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]);
+	retval |= __get_user(pt->ar_fpsr, &ppr->ar[PT_AUR_FPSR]);
+
+	retval |= __get_user(ec, &ppr->ar[PT_AUR_EC]);
+	retval |= __get_user(lc, &ppr->ar[PT_AUR_LC]);
+	retval |= __get_user(rnat, &ppr->ar[PT_AUR_RNAT]);
+	retval |= __get_user(bsp, &ppr->ar[PT_AUR_BSP]);
+	retval |= __get_user(cfm, &ppr->cfm);
+
+	/* gr1-gr3 */
+
+	retval |= __copy_from_user(&pt->r1, &ppr->gr[1], sizeof(long));
+	retval |= __copy_from_user(&pt->r2, &ppr->gr[2], sizeof(long) * 2);
+
+	/* gr4-gr7 */
+
+	for (i = 4; i < 8; i++) {
+		retval |= __get_user(val, &ppr->gr[i]);
+		/* NaT bit will be set via PT_NAT_BITS: */
+		if (unw_set_gr(&info, i, val, 0) < 0)
+			return -EIO;
+	}
+
+	/* gr8-gr11 */
+
+	retval |= __copy_from_user(&pt->r8, &ppr->gr[8], sizeof(long) * 4);
+
+	/* gr12-gr15 */
+
+	retval |= __copy_from_user(&pt->r12, &ppr->gr[12], sizeof(long) * 2);
+	retval |= __copy_from_user(&pt->r14, &ppr->gr[14], sizeof(long));
+	retval |= __copy_from_user(&pt->r15, &ppr->gr[15], sizeof(long));
+
+	/* gr16-gr31 */
+
+	retval |= __copy_from_user(&pt->r16, &ppr->gr[16], sizeof(long) * 16);
+
+	/* b0 */
+
+	retval |= __get_user(pt->b0, &ppr->br[0]);
+
+	/* b1-b5 */
+
+	for (i = 1; i < 6; i++) {
+		retval |= __get_user(val, &ppr->br[i]);
+		unw_set_br(&info, i, val);
+	}
+
+	/* b6-b7 */
+
+	retval |= __get_user(pt->b6, &ppr->br[6]);
+	retval |= __get_user(pt->b7, &ppr->br[7]);
+
+	/* fr2-fr5 */
+
+	for (i = 2; i < 6; i++) {
+		retval |= __copy_from_user(&fpval, &ppr->fr[i], sizeof(fpval));
+		if (unw_set_fr(&info, i, fpval) < 0)
+			return -EIO;
+	}
+
+	/* fr6-fr11 */
+
+	retval |= __copy_from_user(&pt->f6, &ppr->fr[6],
+				   sizeof(ppr->fr[6]) * 6);
+
+	/* fp scratch regs(12-15) */
+
+	retval |= __copy_from_user(&sw->f12, &ppr->fr[12],
+				   sizeof(ppr->fr[12]) * 4);
+
+	/* fr16-fr31 */
+
+	for (i = 16; i < 32; i++) {
+		retval |= __copy_from_user(&fpval, &ppr->fr[i],
+					   sizeof(fpval));
+		if (unw_set_fr(&info, i, fpval) < 0)
+			return -EIO;
+	}
+
+	/* fph */
+
+	ia64_sync_fph(child);
+	retval |= __copy_from_user(&child->thread.fph, &ppr->fr[32],
+				   sizeof(ppr->fr[32]) * 96);
+
+	/* preds */
+
+	retval |= __get_user(pt->pr, &ppr->pr);
+
+	/* nat bits */
+
+	retval |= __get_user(nat_bits, &ppr->nat);
+
+	retval |= access_uarea(child, PT_CR_IPSR, &psr, 1);
+	retval |= access_uarea(child, PT_AR_RSC, &rsc, 1);
+	retval |= access_uarea(child, PT_AR_EC, &ec, 1);
+	retval |= access_uarea(child, PT_AR_LC, &lc, 1);
+	retval |= access_uarea(child, PT_AR_RNAT, &rnat, 1);
+	retval |= access_uarea(child, PT_AR_BSP, &bsp, 1);
+	retval |= access_uarea(child, PT_CFM, &cfm, 1);
+	retval |= access_uarea(child, PT_NAT_BITS, &nat_bits, 1);
+
+	ret = retval ? -EIO : 0;
+	return ret;
+}
+
+void
+user_enable_single_step (struct task_struct *child)
+{
+	struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child));
+
+	set_tsk_thread_flag(child, TIF_SINGLESTEP);
+	child_psr->ss = 1;
+}
+
+void
+user_enable_block_step (struct task_struct *child)
+{
+	struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child));
+
+	set_tsk_thread_flag(child, TIF_SINGLESTEP);
+	child_psr->tb = 1;
+}
+
+void
+user_disable_single_step (struct task_struct *child)
+{
+	struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child));
+
+	/* make sure the single step/taken-branch trap bits are not set: */
+	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
+	child_psr->ss = 0;
+	child_psr->tb = 0;
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure the single step bit is not set.
+ */
+void
+ptrace_disable (struct task_struct *child)
+{
+	user_disable_single_step(child);
+}
+
+long
+arch_ptrace (struct task_struct *child, long request,
+	     unsigned long addr, unsigned long data)
+{
+	switch (request) {
+	case PTRACE_PEEKTEXT:
+	case PTRACE_PEEKDATA:
+		/* read word at location addr */
+		if (access_process_vm(child, addr, &data, sizeof(data), 0)
+		    != sizeof(data))
+			return -EIO;
+		/* ensure return value is not mistaken for error code */
+		force_successful_syscall_return();
+		return data;
+
+	/* PTRACE_POKETEXT and PTRACE_POKEDATA is handled
+	 * by the generic ptrace_request().
+	 */
+
+	case PTRACE_PEEKUSR:
+		/* read the word at addr in the USER area */
+		if (access_uarea(child, addr, &data, 0) < 0)
+			return -EIO;
+		/* ensure return value is not mistaken for error code */
+		force_successful_syscall_return();
+		return data;
+
+	case PTRACE_POKEUSR:
+		/* write the word at addr in the USER area */
+		if (access_uarea(child, addr, &data, 1) < 0)
+			return -EIO;
+		return 0;
+
+	case PTRACE_OLD_GETSIGINFO:
+		/* for backwards-compatibility */
+		return ptrace_request(child, PTRACE_GETSIGINFO, addr, data);
+
+	case PTRACE_OLD_SETSIGINFO:
+		/* for backwards-compatibility */
+		return ptrace_request(child, PTRACE_SETSIGINFO, addr, data);
+
+	case PTRACE_GETREGS:
+		return ptrace_getregs(child,
+				      (struct pt_all_user_regs __user *) data);
+
+	case PTRACE_SETREGS:
+		return ptrace_setregs(child,
+				      (struct pt_all_user_regs __user *) data);
+
+	default:
+		return ptrace_request(child, request, addr, data);
+	}
+}
+
+
+/* "asmlinkage" so the input arguments are preserved... */
+
+asmlinkage long
+syscall_trace_enter (long arg0, long arg1, long arg2, long arg3,
+		     long arg4, long arg5, long arg6, long arg7,
+		     struct pt_regs regs)
+{
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		if (tracehook_report_syscall_entry(&regs))
+			return -ENOSYS;
+
+	/* copy user rbs to kernel rbs */
+	if (test_thread_flag(TIF_RESTORE_RSE))
+		ia64_sync_krbs();
+
+	if (unlikely(current->audit_context)) {
+		long syscall;
+		int arch;
+
+		syscall = regs.r15;
+		arch = AUDIT_ARCH_IA64;
+
+		audit_syscall_entry(arch, syscall, arg0, arg1, arg2, arg3);
+	}
+
+	return 0;
+}
+
+/* "asmlinkage" so the input arguments are preserved... */
+
+asmlinkage void
+syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
+		     long arg4, long arg5, long arg6, long arg7,
+		     struct pt_regs regs)
+{
+	int step;
+
+	if (unlikely(current->audit_context)) {
+		int success = AUDITSC_RESULT(regs.r10);
+		long result = regs.r8;
+
+		if (success != AUDITSC_SUCCESS)
+			result = -result;
+		audit_syscall_exit(success, result);
+	}
+
+	step = test_thread_flag(TIF_SINGLESTEP);
+	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall_exit(&regs, step);
+
+	/* copy user rbs to kernel rbs */
+	if (test_thread_flag(TIF_RESTORE_RSE))
+		ia64_sync_krbs();
+}
+
+/* Utrace implementation starts here */
+struct regset_get {
+	void *kbuf;
+	void __user *ubuf;
+};
+
+struct regset_set {
+	const void *kbuf;
+	const void __user *ubuf;
+};
+
+struct regset_getset {
+	struct task_struct *target;
+	const struct user_regset *regset;
+	union {
+		struct regset_get get;
+		struct regset_set set;
+	} u;
+	unsigned int pos;
+	unsigned int count;
+	int ret;
+};
+
+static int
+access_elf_gpreg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	struct pt_regs *pt;
+	unsigned long *ptr = NULL;
+	int ret;
+	char nat = 0;
+
+	pt = task_pt_regs(target);
+	switch (addr) {
+	case ELF_GR_OFFSET(1):
+		ptr = &pt->r1;
+		break;
+	case ELF_GR_OFFSET(2):
+	case ELF_GR_OFFSET(3):
+		ptr = (void *)&pt->r2 + (addr - ELF_GR_OFFSET(2));
+		break;
+	case ELF_GR_OFFSET(4) ... ELF_GR_OFFSET(7):
+		if (write_access) {
+			/* read NaT bit first: */
+			unsigned long dummy;
+
+			ret = unw_get_gr(info, addr/8, &dummy, &nat);
+			if (ret < 0)
+				return ret;
+		}
+		return unw_access_gr(info, addr/8, data, &nat, write_access);
+	case ELF_GR_OFFSET(8) ... ELF_GR_OFFSET(11):
+		ptr = (void *)&pt->r8 + addr - ELF_GR_OFFSET(8);
+		break;
+	case ELF_GR_OFFSET(12):
+	case ELF_GR_OFFSET(13):
+		ptr = (void *)&pt->r12 + addr - ELF_GR_OFFSET(12);
+		break;
+	case ELF_GR_OFFSET(14):
+		ptr = &pt->r14;
+		break;
+	case ELF_GR_OFFSET(15):
+		ptr = &pt->r15;
+	}
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+	return 0;
+}
+
+static int
+access_elf_breg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	struct pt_regs *pt;
+	unsigned long *ptr = NULL;
+
+	pt = task_pt_regs(target);
+	switch (addr) {
+	case ELF_BR_OFFSET(0):
+		ptr = &pt->b0;
+		break;
+	case ELF_BR_OFFSET(1) ... ELF_BR_OFFSET(5):
+		return unw_access_br(info, (addr - ELF_BR_OFFSET(0))/8,
+				     data, write_access);
+	case ELF_BR_OFFSET(6):
+		ptr = &pt->b6;
+		break;
+	case ELF_BR_OFFSET(7):
+		ptr = &pt->b7;
+	}
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+	return 0;
+}
+
+static int
+access_elf_areg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	struct pt_regs *pt;
+	unsigned long cfm, urbs_end;
+	unsigned long *ptr = NULL;
+
+	pt = task_pt_regs(target);
+	if (addr >= ELF_AR_RSC_OFFSET && addr <= ELF_AR_SSD_OFFSET) {
+		switch (addr) {
+		case ELF_AR_RSC_OFFSET:
+			/* force PL3 */
+			if (write_access)
+				pt->ar_rsc = *data | (3 << 2);
+			else
+				*data = pt->ar_rsc;
+			return 0;
+		case ELF_AR_BSP_OFFSET:
+			/*
+			 * By convention, we use PT_AR_BSP to refer to
+			 * the end of the user-level backing store.
+			 * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof)
+			 * to get the real value of ar.bsp at the time
+			 * the kernel was entered.
+			 *
+			 * Furthermore, when changing the contents of
+			 * PT_AR_BSP (or PT_CFM) while the task is
+			 * blocked in a system call, convert the state
+			 * so that the non-system-call exit
+			 * path is used.  This ensures that the proper
+			 * state will be picked up when resuming
+			 * execution.  However, it *also* means that
+			 * once we write PT_AR_BSP/PT_CFM, it won't be
+			 * possible to modify the syscall arguments of
+			 * the pending system call any longer.  This
+			 * shouldn't be an issue because modifying
+			 * PT_AR_BSP/PT_CFM generally implies that
+			 * we're either abandoning the pending system
+			 * call or that we defer it's re-execution
+			 * (e.g., due to GDB doing an inferior
+			 * function call).
+			 */
+			urbs_end = ia64_get_user_rbs_end(target, pt, &cfm);
+			if (write_access) {
+				if (*data != urbs_end) {
+					if (in_syscall(pt))
+						convert_to_non_syscall(target,
+								       pt,
+								       cfm);
+					/*
+					 * Simulate user-level write
+					 * of ar.bsp:
+					 */
+					pt->loadrs = 0;
+					pt->ar_bspstore = *data;
+				}
+			} else
+				*data = urbs_end;
+			return 0;
+		case ELF_AR_BSPSTORE_OFFSET:
+			ptr = &pt->ar_bspstore;
+			break;
+		case ELF_AR_RNAT_OFFSET:
+			ptr = &pt->ar_rnat;
+			break;
+		case ELF_AR_CCV_OFFSET:
+			ptr = &pt->ar_ccv;
+			break;
+		case ELF_AR_UNAT_OFFSET:
+			ptr = &pt->ar_unat;
+			break;
+		case ELF_AR_FPSR_OFFSET:
+			ptr = &pt->ar_fpsr;
+			break;
+		case ELF_AR_PFS_OFFSET:
+			ptr = &pt->ar_pfs;
+			break;
+		case ELF_AR_LC_OFFSET:
+			return unw_access_ar(info, UNW_AR_LC, data,
+					     write_access);
+		case ELF_AR_EC_OFFSET:
+			return unw_access_ar(info, UNW_AR_EC, data,
+					     write_access);
+		case ELF_AR_CSD_OFFSET:
+			ptr = &pt->ar_csd;
+			break;
+		case ELF_AR_SSD_OFFSET:
+			ptr = &pt->ar_ssd;
+		}
+	} else if (addr >= ELF_CR_IIP_OFFSET && addr <= ELF_CR_IPSR_OFFSET) {
+		switch (addr) {
+		case ELF_CR_IIP_OFFSET:
+			ptr = &pt->cr_iip;
+			break;
+		case ELF_CFM_OFFSET:
+			urbs_end = ia64_get_user_rbs_end(target, pt, &cfm);
+			if (write_access) {
+				if (((cfm ^ *data) & PFM_MASK) != 0) {
+					if (in_syscall(pt))
+						convert_to_non_syscall(target,
+								       pt,
+								       cfm);
+					pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK)
+						      | (*data & PFM_MASK));
+				}
+			} else
+				*data = cfm;
+			return 0;
+		case ELF_CR_IPSR_OFFSET:
+			if (write_access) {
+				unsigned long tmp = *data;
+				/* psr.ri==3 is a reserved value: SDM 2:25 */
+				if ((tmp & IA64_PSR_RI) == IA64_PSR_RI)
+					tmp &= ~IA64_PSR_RI;
+				pt->cr_ipsr = ((tmp & IPSR_MASK)
+					       | (pt->cr_ipsr & ~IPSR_MASK));
+			} else
+				*data = (pt->cr_ipsr & IPSR_MASK);
+			return 0;
+		}
+	} else if (addr == ELF_NAT_OFFSET)
+		return access_nat_bits(target, pt, info,
+				       data, write_access);
+	else if (addr == ELF_PR_OFFSET)
+		ptr = &pt->pr;
+	else
+		return -1;
+
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+
+	return 0;
+}
+
+static int
+access_elf_reg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	if (addr >= ELF_GR_OFFSET(1) && addr <= ELF_GR_OFFSET(15))
+		return access_elf_gpreg(target, info, addr, data, write_access);
+	else if (addr >= ELF_BR_OFFSET(0) && addr <= ELF_BR_OFFSET(7))
+		return access_elf_breg(target, info, addr, data, write_access);
+	else
+		return access_elf_areg(target, info, addr, data, write_access);
+}
+
+void do_gpregs_get(struct unw_frame_info *info, void *arg)
+{
+	struct pt_regs *pt;
+	struct regset_getset *dst = arg;
+	elf_greg_t tmp[16];
+	unsigned int i, index, min_copy;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/*
+	 * coredump format:
+	 *      r0-r31
+	 *      NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
+	 *      predicate registers (p0-p63)
+	 *      b0-b7
+	 *      ip cfm user-mask
+	 *      ar.rsc ar.bsp ar.bspstore ar.rnat
+	 *      ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
+	 */
+
+
+	/* Skip r0 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) {
+		dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count,
+						      &dst->u.get.kbuf,
+						      &dst->u.get.ubuf,
+						      0, ELF_GR_OFFSET(1));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* gr1 - gr15 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) {
+		index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t);
+		min_copy = ELF_GR_OFFSET(16) > (dst->pos + dst->count) ?
+			 (dst->pos + dst->count) : ELF_GR_OFFSET(16);
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+				index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 0) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_GR_OFFSET(1), ELF_GR_OFFSET(16));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* r16-r31 */
+	if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) {
+		pt = task_pt_regs(dst->target);
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, &pt->r16,
+				ELF_GR_OFFSET(16), ELF_NAT_OFFSET);
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* nat, pr, b0 - b7 */
+	if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) {
+		index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t);
+		min_copy = ELF_CR_IIP_OFFSET > (dst->pos + dst->count) ?
+			 (dst->pos + dst->count) : ELF_CR_IIP_OFFSET;
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+				index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 0) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET);
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat
+	 * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
+	 */
+	if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) {
+		index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t);
+		min_copy = ELF_AR_END_OFFSET > (dst->pos + dst->count) ?
+			 (dst->pos + dst->count) : ELF_AR_END_OFFSET;
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+				index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 0) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET);
+	}
+}
+
+void do_gpregs_set(struct unw_frame_info *info, void *arg)
+{
+	struct pt_regs *pt;
+	struct regset_getset *dst = arg;
+	elf_greg_t tmp[16];
+	unsigned int i, index;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/* Skip r0 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) {
+		dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count,
+						       &dst->u.set.kbuf,
+						       &dst->u.set.ubuf,
+						       0, ELF_GR_OFFSET(1));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* gr1-gr15 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) {
+		i = dst->pos;
+		index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_GR_OFFSET(1), ELF_GR_OFFSET(16));
+		if (dst->ret)
+			return;
+		for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 1) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		if (dst->count == 0)
+			return;
+	}
+
+	/* gr16-gr31 */
+	if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) {
+		pt = task_pt_regs(dst->target);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, &pt->r16,
+				ELF_GR_OFFSET(16), ELF_NAT_OFFSET);
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* nat, pr, b0 - b7 */
+	if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) {
+		i = dst->pos;
+		index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET);
+		if (dst->ret)
+			return;
+		for (; i < dst->pos; i += sizeof(elf_greg_t), index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 1) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		if (dst->count == 0)
+			return;
+	}
+
+	/* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat
+	 * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
+	 */
+	if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) {
+		i = dst->pos;
+		index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET);
+		if (dst->ret)
+			return;
+		for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 1) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+	}
+}
+
+#define ELF_FP_OFFSET(i)	(i * sizeof(elf_fpreg_t))
+
+void do_fpregs_get(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	elf_fpreg_t tmp[30];
+	int index, min_copy, i;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/* Skip pos 0 and 1 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) {
+		dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count,
+						      &dst->u.get.kbuf,
+						      &dst->u.get.ubuf,
+						      0, ELF_FP_OFFSET(2));
+		if (dst->count == 0 || dst->ret)
+			return;
+	}
+
+	/* fr2-fr31 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) {
+		index = (dst->pos - ELF_FP_OFFSET(2)) / sizeof(elf_fpreg_t);
+
+		min_copy = min(((unsigned int)ELF_FP_OFFSET(32)),
+				dst->pos + dst->count);
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_fpreg_t),
+				index++)
+			if (unw_get_fr(info, i / sizeof(elf_fpreg_t),
+					 &tmp[index])) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_FP_OFFSET(2), ELF_FP_OFFSET(32));
+		if (dst->count == 0 || dst->ret)
+			return;
+	}
+
+	/* fph */
+	if (dst->count > 0) {
+		ia64_flush_fph(dst->target);
+		if (task->thread.flags & IA64_THREAD_FPH_VALID)
+			dst->ret = user_regset_copyout(
+				&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				&dst->target->thread.fph,
+				ELF_FP_OFFSET(32), -1);
+		else
+			/* Zero fill instead.  */
+			dst->ret = user_regset_copyout_zero(
+				&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				ELF_FP_OFFSET(32), -1);
+	}
+}
+
+void do_fpregs_set(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	elf_fpreg_t fpreg, tmp[30];
+	int index, start, end;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/* Skip pos 0 and 1 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) {
+		dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count,
+						       &dst->u.set.kbuf,
+						       &dst->u.set.ubuf,
+						       0, ELF_FP_OFFSET(2));
+		if (dst->count == 0 || dst->ret)
+			return;
+	}
+
+	/* fr2-fr31 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) {
+		start = dst->pos;
+		end = min(((unsigned int)ELF_FP_OFFSET(32)),
+			 dst->pos + dst->count);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_FP_OFFSET(2), ELF_FP_OFFSET(32));
+		if (dst->ret)
+			return;
+
+		if (start & 0xF) { /* only write high part */
+			if (unw_get_fr(info, start / sizeof(elf_fpreg_t),
+					 &fpreg)) {
+				dst->ret = -EIO;
+				return;
+			}
+			tmp[start / sizeof(elf_fpreg_t) - 2].u.bits[0]
+				= fpreg.u.bits[0];
+			start &= ~0xFUL;
+		}
+		if (end & 0xF) { /* only write low part */
+			if (unw_get_fr(info, end / sizeof(elf_fpreg_t),
+					&fpreg)) {
+				dst->ret = -EIO;
+				return;
+			}
+			tmp[end / sizeof(elf_fpreg_t) - 2].u.bits[1]
+				= fpreg.u.bits[1];
+			end = (end + 0xF) & ~0xFUL;
+		}
+
+		for ( ;	start < end ; start += sizeof(elf_fpreg_t)) {
+			index = start / sizeof(elf_fpreg_t);
+			if (unw_set_fr(info, index, tmp[index - 2])) {
+				dst->ret = -EIO;
+				return;
+			}
+		}
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* fph */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(128)) {
+		ia64_sync_fph(dst->target);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+						&dst->u.set.kbuf,
+						&dst->u.set.ubuf,
+						&dst->target->thread.fph,
+						ELF_FP_OFFSET(32), -1);
+	}
+}
+
+static int
+do_regset_call(void (*call)(struct unw_frame_info *, void *),
+	       struct task_struct *target,
+	       const struct user_regset *regset,
+	       unsigned int pos, unsigned int count,
+	       const void *kbuf, const void __user *ubuf)
+{
+	struct regset_getset info = { .target = target, .regset = regset,
+				 .pos = pos, .count = count,
+				 .u.set = { .kbuf = kbuf, .ubuf = ubuf },
+				 .ret = 0 };
+
+	if (target == current)
+		unw_init_running(call, &info);
+	else {
+		struct unw_frame_info ufi;
+		memset(&ufi, 0, sizeof(ufi));
+		unw_init_from_blocked_task(&ufi, target);
+		(*call)(&ufi, &info);
+	}
+
+	return info.ret;
+}
+
+static int
+gpregs_get(struct task_struct *target,
+	   const struct user_regset *regset,
+	   unsigned int pos, unsigned int count,
+	   void *kbuf, void __user *ubuf)
+{
+	return do_regset_call(do_gpregs_get, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int gpregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	return do_regset_call(do_gpregs_set, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static void do_gpregs_writeback(struct unw_frame_info *info, void *arg)
+{
+	do_sync_rbs(info, ia64_sync_user_rbs);
+}
+
+/*
+ * This is called to write back the register backing store.
+ * ptrace does this before it stops, so that a tracer reading the user
+ * memory after the thread stops will get the current register data.
+ */
+static int
+gpregs_writeback(struct task_struct *target,
+		 const struct user_regset *regset,
+		 int now)
+{
+	if (test_and_set_tsk_thread_flag(target, TIF_RESTORE_RSE))
+		return 0;
+	set_notify_resume(target);
+	return do_regset_call(do_gpregs_writeback, target, regset, 0, 0,
+		NULL, NULL);
+}
+
+static int
+fpregs_active(struct task_struct *target, const struct user_regset *regset)
+{
+	return (target->thread.flags & IA64_THREAD_FPH_VALID) ? 128 : 32;
+}
+
+static int fpregs_get(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	return do_regset_call(do_fpregs_get, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int fpregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	return do_regset_call(do_fpregs_set, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int
+access_uarea(struct task_struct *child, unsigned long addr,
+	      unsigned long *data, int write_access)
+{
+	unsigned int pos = -1; /* an invalid value */
+	int ret;
+	unsigned long *ptr, regnum;
+
+	if ((addr & 0x7) != 0) {
+		dprintk("ptrace: unaligned register address 0x%lx\n", addr);
+		return -1;
+	}
+	if ((addr >= PT_NAT_BITS + 8 && addr < PT_F2) ||
+		(addr >= PT_R7 + 8 && addr < PT_B1) ||
+		(addr >= PT_AR_LC + 8 && addr < PT_CR_IPSR) ||
+		(addr >= PT_AR_SSD + 8 && addr < PT_DBR)) {
+		dprintk("ptrace: rejecting access to register "
+					"address 0x%lx\n", addr);
+		return -1;
+	}
+
+	switch (addr) {
+	case PT_F32 ... (PT_F127 + 15):
+		pos = addr - PT_F32 + ELF_FP_OFFSET(32);
+		break;
+	case PT_F2 ... (PT_F5 + 15):
+		pos = addr - PT_F2 + ELF_FP_OFFSET(2);
+		break;
+	case PT_F10 ... (PT_F31 + 15):
+		pos = addr - PT_F10 + ELF_FP_OFFSET(10);
+		break;
+	case PT_F6 ... (PT_F9 + 15):
+		pos = addr - PT_F6 + ELF_FP_OFFSET(6);
+		break;
+	}
+
+	if (pos != -1) {
+		if (write_access)
+			ret = fpregs_set(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		else
+			ret = fpregs_get(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		if (ret != 0)
+			return -1;
+		return 0;
+	}
+
+	switch (addr) {
+	case PT_NAT_BITS:
+		pos = ELF_NAT_OFFSET;
+		break;
+	case PT_R4 ... PT_R7:
+		pos = addr - PT_R4 + ELF_GR_OFFSET(4);
+		break;
+	case PT_B1 ... PT_B5:
+		pos = addr - PT_B1 + ELF_BR_OFFSET(1);
+		break;
+	case PT_AR_EC:
+		pos = ELF_AR_EC_OFFSET;
+		break;
+	case PT_AR_LC:
+		pos = ELF_AR_LC_OFFSET;
+		break;
+	case PT_CR_IPSR:
+		pos = ELF_CR_IPSR_OFFSET;
+		break;
+	case PT_CR_IIP:
+		pos = ELF_CR_IIP_OFFSET;
+		break;
+	case PT_CFM:
+		pos = ELF_CFM_OFFSET;
+		break;
+	case PT_AR_UNAT:
+		pos = ELF_AR_UNAT_OFFSET;
+		break;
+	case PT_AR_PFS:
+		pos = ELF_AR_PFS_OFFSET;
+		break;
+	case PT_AR_RSC:
+		pos = ELF_AR_RSC_OFFSET;
+		break;
+	case PT_AR_RNAT:
+		pos = ELF_AR_RNAT_OFFSET;
+		break;
+	case PT_AR_BSPSTORE:
+		pos = ELF_AR_BSPSTORE_OFFSET;
+		break;
+	case PT_PR:
+		pos = ELF_PR_OFFSET;
+		break;
+	case PT_B6:
+		pos = ELF_BR_OFFSET(6);
+		break;
+	case PT_AR_BSP:
+		pos = ELF_AR_BSP_OFFSET;
+		break;
+	case PT_R1 ... PT_R3:
+		pos = addr - PT_R1 + ELF_GR_OFFSET(1);
+		break;
+	case PT_R12 ... PT_R15:
+		pos = addr - PT_R12 + ELF_GR_OFFSET(12);
+		break;
+	case PT_R8 ... PT_R11:
+		pos = addr - PT_R8 + ELF_GR_OFFSET(8);
+		break;
+	case PT_R16 ... PT_R31:
+		pos = addr - PT_R16 + ELF_GR_OFFSET(16);
+		break;
+	case PT_AR_CCV:
+		pos = ELF_AR_CCV_OFFSET;
+		break;
+	case PT_AR_FPSR:
+		pos = ELF_AR_FPSR_OFFSET;
+		break;
+	case PT_B0:
+		pos = ELF_BR_OFFSET(0);
+		break;
+	case PT_B7:
+		pos = ELF_BR_OFFSET(7);
+		break;
+	case PT_AR_CSD:
+		pos = ELF_AR_CSD_OFFSET;
+		break;
+	case PT_AR_SSD:
+		pos = ELF_AR_SSD_OFFSET;
+		break;
+	}
+
+	if (pos != -1) {
+		if (write_access)
+			ret = gpregs_set(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		else
+			ret = gpregs_get(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		if (ret != 0)
+			return -1;
+		return 0;
+	}
+
+	/* access debug registers */
+	if (addr >= PT_IBR) {
+		regnum = (addr - PT_IBR) >> 3;
+		ptr = &child->thread.ibr[0];
+	} else {
+		regnum = (addr - PT_DBR) >> 3;
+		ptr = &child->thread.dbr[0];
+	}
+
+	if (regnum >= 8) {
+		dprintk("ptrace: rejecting access to register "
+				"address 0x%lx\n", addr);
+		return -1;
+	}
+#ifdef CONFIG_PERFMON
+	/*
+	 * Check if debug registers are used by perfmon. This
+	 * test must be done once we know that we can do the
+	 * operation, i.e. the arguments are all valid, but
+	 * before we start modifying the state.
+	 *
+	 * Perfmon needs to keep a count of how many processes
+	 * are trying to modify the debug registers for system
+	 * wide monitoring sessions.
+	 *
+	 * We also include read access here, because they may
+	 * cause the PMU-installed debug register state
+	 * (dbr[], ibr[]) to be reset. The two arrays are also
+	 * used by perfmon, but we do not use
+	 * IA64_THREAD_DBG_VALID. The registers are restored
+	 * by the PMU context switch code.
+	 */
+	if (pfm_use_debug_registers(child))
+		return -1;
+#endif
+
+	if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
+		child->thread.flags |= IA64_THREAD_DBG_VALID;
+		memset(child->thread.dbr, 0,
+				sizeof(child->thread.dbr));
+		memset(child->thread.ibr, 0,
+				sizeof(child->thread.ibr));
+	}
+
+	ptr += regnum;
+
+	if ((regnum & 1) && write_access) {
+		/* don't let the user set kernel-level breakpoints: */
+		*ptr = *data & ~(7UL << 56);
+		return 0;
+	}
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+	return 0;
+}
+
+static const struct user_regset native_regsets[] = {
+	{
+		.core_note_type = NT_PRSTATUS,
+		.n = ELF_NGREG,
+		.size = sizeof(elf_greg_t), .align = sizeof(elf_greg_t),
+		.get = gpregs_get, .set = gpregs_set,
+		.writeback = gpregs_writeback
+	},
+	{
+		.core_note_type = NT_PRFPREG,
+		.n = ELF_NFPREG,
+		.size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t),
+		.get = fpregs_get, .set = fpregs_set, .active = fpregs_active
+	},
+};
+
+static const struct user_regset_view user_ia64_view = {
+	.name = "ia64",
+	.e_machine = EM_IA_64,
+	.regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *tsk)
+{
+	return &user_ia64_view;
+}
+
+struct syscall_get_set_args {
+	unsigned int i;
+	unsigned int n;
+	unsigned long *args;
+	struct pt_regs *regs;
+	int rw;
+};
+
+static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data)
+{
+	struct syscall_get_set_args *args = data;
+	struct pt_regs *pt = args->regs;
+	unsigned long *krbs, cfm, ndirty;
+	int i, count;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	cfm = pt->cr_ifs;
+	krbs = (unsigned long *)info->task + IA64_RBS_OFFSET/8;
+	ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19));
+
+	count = 0;
+	if (in_syscall(pt))
+		count = min_t(int, args->n, cfm & 0x7f);
+
+	for (i = 0; i < count; i++) {
+		if (args->rw)
+			*ia64_rse_skip_regs(krbs, ndirty + i + args->i) =
+				args->args[i];
+		else
+			args->args[i] = *ia64_rse_skip_regs(krbs,
+				ndirty + i + args->i);
+	}
+
+	if (!args->rw) {
+		while (i < args->n) {
+			args->args[i] = 0;
+			i++;
+		}
+	}
+}
+
+void ia64_syscall_get_set_arguments(struct task_struct *task,
+	struct pt_regs *regs, unsigned int i, unsigned int n,
+	unsigned long *args, int rw)
+{
+	struct syscall_get_set_args data = {
+		.i = i,
+		.n = n,
+		.args = args,
+		.regs = regs,
+		.rw = rw,
+	};
+
+	if (task == current)
+		unw_init_running(syscall_get_set_args_cb, &data);
+	else {
+		struct unw_frame_info ufi;
+		memset(&ufi, 0, sizeof(ufi));
+		unw_init_from_blocked_task(&ufi, task);
+		syscall_get_set_args_cb(&ufi, &data);
+	}
+}
diff --git a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S
new file mode 100644
index 00000000..c370e02f
--- /dev/null
+++ b/arch/ia64/kernel/relocate_kernel.S
@@ -0,0 +1,325 @@
+/*
+ * arch/ia64/kernel/relocate_kernel.S
+ *
+ * Relocate kexec'able kernel and start it
+ *
+ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright (C) 2005 Khalid Aziz  <khalid.aziz@hp.com>
+ * Copyright (C) 2005 Intel Corp,  Zou Nan hai <nanhai.zou@intel.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+#include <asm/asmmacro.h>
+#include <asm/kregs.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mca_asm.h>
+
+       /* Must be relocatable PIC code callable as a C function
+        */
+GLOBAL_ENTRY(relocate_new_kernel)
+	.prologue
+	alloc r31=ar.pfs,4,0,0,0
+        .body
+.reloc_entry:
+{
+	rsm psr.i| psr.ic
+	mov r2=ip
+}
+	;;
+{
+        flushrs                         // must be first insn in group
+        srlz.i
+}
+	;;
+	dep r2=0,r2,61,3		//to physical address
+	;;
+	//first switch to physical mode
+	add r3=1f-.reloc_entry, r2
+	movl r16 = IA64_PSR_AC|IA64_PSR_BN|IA64_PSR_IC
+	mov ar.rsc=0	          	// put RSE in enforced lazy mode
+	;;
+	add sp=(memory_stack_end - 16 - .reloc_entry),r2
+	add r8=(register_stack - .reloc_entry),r2
+	;;
+	mov r18=ar.rnat
+	mov ar.bspstore=r8
+	;;
+        mov cr.ipsr=r16
+        mov cr.iip=r3
+        mov cr.ifs=r0
+	srlz.i
+	;;
+	mov ar.rnat=r18
+	rfi				// note: this unmask MCA/INIT (psr.mc)
+	;;
+1:
+	//physical mode code begin
+	mov b6=in1
+	dep r28=0,in2,61,3	//to physical address
+
+	// purge all TC entries
+#define O(member)       IA64_CPUINFO_##member##_OFFSET
+        GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2
+        ;;
+        addl r17=O(PTCE_STRIDE),r2
+        addl r2=O(PTCE_BASE),r2
+        ;;
+        ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));;    	// r18=ptce_base
+        ld4 r19=[r2],4                                  // r19=ptce_count[0]
+        ld4 r21=[r17],4                                 // r21=ptce_stride[0]
+        ;;
+        ld4 r20=[r2]                                    // r20=ptce_count[1]
+        ld4 r22=[r17]                                   // r22=ptce_stride[1]
+        mov r24=r0
+        ;;
+        adds r20=-1,r20
+        ;;
+#undef O
+2:
+        cmp.ltu p6,p7=r24,r19
+(p7)    br.cond.dpnt.few 4f
+        mov ar.lc=r20
+3:
+        ptc.e r18
+        ;;
+        add r18=r22,r18
+        br.cloop.sptk.few 3b
+        ;;
+        add r18=r21,r18
+        add r24=1,r24
+        ;;
+        br.sptk.few 2b
+4:
+        srlz.i
+        ;;
+	// purge TR entry for kernel text and data
+        movl r16=KERNEL_START
+        mov r18=KERNEL_TR_PAGE_SHIFT<<2
+        ;;
+        ptr.i r16, r18
+        ptr.d r16, r18
+        ;;
+        srlz.i
+        ;;
+
+        // purge TR entry for pal code
+        mov r16=in3
+        mov r18=IA64_GRANULE_SHIFT<<2
+        ;;
+        ptr.i r16,r18
+        ;;
+        srlz.i
+	;;
+
+        // purge TR entry for stack
+        mov r16=IA64_KR(CURRENT_STACK)
+        ;;
+        shl r16=r16,IA64_GRANULE_SHIFT
+        movl r19=PAGE_OFFSET
+        ;;
+        add r16=r19,r16
+        mov r18=IA64_GRANULE_SHIFT<<2
+        ;;
+        ptr.d r16,r18
+        ;;
+        srlz.i
+	;;
+
+	//copy segments
+	movl r16=PAGE_MASK
+        mov  r30=in0                    // in0 is page_list
+        br.sptk.few .dest_page
+	;;
+.loop:
+	ld8  r30=[in0], 8;;
+.dest_page:
+	tbit.z p0, p6=r30, 0;;    	// 0x1 dest page
+(p6)	and r17=r30, r16
+(p6)	br.cond.sptk.few .loop;;
+
+	tbit.z p0, p6=r30, 1;;		// 0x2 indirect page
+(p6)	and in0=r30, r16
+(p6)	br.cond.sptk.few .loop;;
+
+	tbit.z p0, p6=r30, 2;;		// 0x4 end flag
+(p6)	br.cond.sptk.few .end_loop;;
+
+	tbit.z p6, p0=r30, 3;;		// 0x8 source page
+(p6)	br.cond.sptk.few .loop
+
+	and r18=r30, r16
+
+	// simple copy page, may optimize later
+	movl r14=PAGE_SIZE/8 - 1;;
+	mov ar.lc=r14;;
+1:
+	ld8 r14=[r18], 8;;
+	st8 [r17]=r14;;
+	fc.i r17
+	add r17=8, r17
+	br.ctop.sptk.few 1b
+	br.sptk.few .loop
+	;;
+
+.end_loop:
+	sync.i			// for fc.i
+	;;
+	srlz.i
+	;;
+	srlz.d
+	;;
+	br.call.sptk.many b0=b6;;
+
+.align  32
+memory_stack:
+	.fill           8192, 1, 0
+memory_stack_end:
+register_stack:
+	.fill           8192, 1, 0
+register_stack_end:
+relocate_new_kernel_end:
+END(relocate_new_kernel)
+
+.global relocate_new_kernel_size
+relocate_new_kernel_size:
+	data8	relocate_new_kernel_end - relocate_new_kernel
+
+GLOBAL_ENTRY(ia64_dump_cpu_regs)
+        .prologue
+        alloc loc0=ar.pfs,1,2,0,0
+        .body
+        mov     ar.rsc=0                // put RSE in enforced lazy mode
+        add     loc1=4*8, in0           // save r4 and r5 first
+        ;;
+{
+        flushrs                         // flush dirty regs to backing store
+        srlz.i
+}
+        st8 [loc1]=r4, 8
+        ;;
+        st8 [loc1]=r5, 8
+        ;;
+        add loc1=32*8, in0
+        mov r4=ar.rnat
+        ;;
+        st8 [in0]=r0, 8			// r0
+        st8 [loc1]=r4, 8		// rnat
+        mov r5=pr
+        ;;
+        st8 [in0]=r1, 8			// r1
+        st8 [loc1]=r5, 8		// pr
+        mov r4=b0
+        ;;
+        st8 [in0]=r2, 8			// r2
+        st8 [loc1]=r4, 8		// b0
+        mov r5=b1;
+        ;;
+        st8 [in0]=r3, 24		// r3
+        st8 [loc1]=r5, 8		// b1
+        mov r4=b2
+        ;;
+        st8 [in0]=r6, 8			// r6
+        st8 [loc1]=r4, 8		// b2
+	mov r5=b3
+        ;;
+        st8 [in0]=r7, 8			// r7
+        st8 [loc1]=r5, 8		// b3
+        mov r4=b4
+        ;;
+        st8 [in0]=r8, 8			// r8
+        st8 [loc1]=r4, 8		// b4
+        mov r5=b5
+        ;;
+        st8 [in0]=r9, 8			// r9
+        st8 [loc1]=r5, 8		// b5
+        mov r4=b6
+        ;;
+        st8 [in0]=r10, 8		// r10
+        st8 [loc1]=r5, 8		// b6
+        mov r5=b7
+        ;;
+        st8 [in0]=r11, 8		// r11
+        st8 [loc1]=r5, 8		// b7
+        mov r4=b0
+        ;;
+        st8 [in0]=r12, 8		// r12
+        st8 [loc1]=r4, 8		// ip
+        mov r5=loc0
+	;;
+        st8 [in0]=r13, 8		// r13
+        extr.u r5=r5, 0, 38		// ar.pfs.pfm
+	mov r4=r0			// user mask
+        ;;
+        st8 [in0]=r14, 8		// r14
+        st8 [loc1]=r5, 8		// cfm
+        ;;
+        st8 [in0]=r15, 8		// r15
+        st8 [loc1]=r4, 8        	// user mask
+	mov r5=ar.rsc
+        ;;
+        st8 [in0]=r16, 8		// r16
+        st8 [loc1]=r5, 8        	// ar.rsc
+        mov r4=ar.bsp
+        ;;
+        st8 [in0]=r17, 8		// r17
+        st8 [loc1]=r4, 8        	// ar.bsp
+        mov r5=ar.bspstore
+        ;;
+        st8 [in0]=r18, 8		// r18
+        st8 [loc1]=r5, 8        	// ar.bspstore
+        mov r4=ar.rnat
+        ;;
+        st8 [in0]=r19, 8		// r19
+        st8 [loc1]=r4, 8        	// ar.rnat
+        mov r5=ar.ccv
+        ;;
+        st8 [in0]=r20, 8		// r20
+	st8 [loc1]=r5, 8        	// ar.ccv
+        mov r4=ar.unat
+        ;;
+        st8 [in0]=r21, 8		// r21
+        st8 [loc1]=r4, 8        	// ar.unat
+        mov r5 = ar.fpsr
+        ;;
+        st8 [in0]=r22, 8		// r22
+        st8 [loc1]=r5, 8        	// ar.fpsr
+        mov r4 = ar.unat
+        ;;
+        st8 [in0]=r23, 8		// r23
+        st8 [loc1]=r4, 8        	// unat
+        mov r5 = ar.fpsr
+        ;;
+        st8 [in0]=r24, 8		// r24
+        st8 [loc1]=r5, 8        	// fpsr
+        mov r4 = ar.pfs
+        ;;
+        st8 [in0]=r25, 8		// r25
+        st8 [loc1]=r4, 8        	// ar.pfs
+        mov r5 = ar.lc
+        ;;
+        st8 [in0]=r26, 8		// r26
+        st8 [loc1]=r5, 8        	// ar.lc
+        mov r4 = ar.ec
+        ;;
+        st8 [in0]=r27, 8		// r27
+        st8 [loc1]=r4, 8        	// ar.ec
+        mov r5 = ar.csd
+        ;;
+        st8 [in0]=r28, 8		// r28
+        st8 [loc1]=r5, 8        	// ar.csd
+        mov r4 = ar.ssd
+        ;;
+        st8 [in0]=r29, 8		// r29
+        st8 [loc1]=r4, 8        	// ar.ssd
+        ;;
+        st8 [in0]=r30, 8		// r30
+        ;;
+	st8 [in0]=r31, 8		// r31
+        mov ar.pfs=loc0
+        ;;
+        br.ret.sptk.many rp
+END(ia64_dump_cpu_regs)
+
+
diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c
new file mode 100644
index 00000000..0464173e
--- /dev/null
+++ b/arch/ia64/kernel/sal.c
@@ -0,0 +1,405 @@
+/*
+ * System Abstraction Layer (SAL) interface routines.
+ *
+ * Copyright (C) 1998, 1999, 2001, 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+
+#include <asm/delay.h>
+#include <asm/page.h>
+#include <asm/sal.h>
+#include <asm/pal.h>
+
+ __cacheline_aligned DEFINE_SPINLOCK(sal_lock);
+unsigned long sal_platform_features;
+
+unsigned short sal_revision;
+unsigned short sal_version;
+
+#define SAL_MAJOR(x) ((x) >> 8)
+#define SAL_MINOR(x) ((x) & 0xff)
+
+static struct {
+	void *addr;	/* function entry point */
+	void *gpval;	/* gp value to use */
+} pdesc;
+
+static long
+default_handler (void)
+{
+	return -1;
+}
+
+ia64_sal_handler ia64_sal = (ia64_sal_handler) default_handler;
+ia64_sal_desc_ptc_t *ia64_ptc_domain_info;
+
+const char *
+ia64_sal_strerror (long status)
+{
+	const char *str;
+	switch (status) {
+	      case 0: str = "Call completed without error"; break;
+	      case 1: str = "Effect a warm boot of the system to complete "
+			      "the update"; break;
+	      case -1: str = "Not implemented"; break;
+	      case -2: str = "Invalid argument"; break;
+	      case -3: str = "Call completed with error"; break;
+	      case -4: str = "Virtual address not registered"; break;
+	      case -5: str = "No information available"; break;
+	      case -6: str = "Insufficient space to add the entry"; break;
+	      case -7: str = "Invalid entry_addr value"; break;
+	      case -8: str = "Invalid interrupt vector"; break;
+	      case -9: str = "Requested memory not available"; break;
+	      case -10: str = "Unable to write to the NVM device"; break;
+	      case -11: str = "Invalid partition type specified"; break;
+	      case -12: str = "Invalid NVM_Object id specified"; break;
+	      case -13: str = "NVM_Object already has the maximum number "
+				"of partitions"; break;
+	      case -14: str = "Insufficient space in partition for the "
+				"requested write sub-function"; break;
+	      case -15: str = "Insufficient data buffer space for the "
+				"requested read record sub-function"; break;
+	      case -16: str = "Scratch buffer required for the write/delete "
+				"sub-function"; break;
+	      case -17: str = "Insufficient space in the NVM_Object for the "
+				"requested create sub-function"; break;
+	      case -18: str = "Invalid value specified in the partition_rec "
+				"argument"; break;
+	      case -19: str = "Record oriented I/O not supported for this "
+				"partition"; break;
+	      case -20: str = "Bad format of record to be written or "
+				"required keyword variable not "
+				"specified"; break;
+	      default: str = "Unknown SAL status code"; break;
+	}
+	return str;
+}
+
+void __init
+ia64_sal_handler_init (void *entry_point, void *gpval)
+{
+	/* fill in the SAL procedure descriptor and point ia64_sal to it: */
+	pdesc.addr = entry_point;
+	pdesc.gpval = gpval;
+	ia64_sal = (ia64_sal_handler) &pdesc;
+}
+
+static void __init
+check_versions (struct ia64_sal_systab *systab)
+{
+	sal_revision = (systab->sal_rev_major << 8) | systab->sal_rev_minor;
+	sal_version = (systab->sal_b_rev_major << 8) | systab->sal_b_rev_minor;
+
+	/* Check for broken firmware */
+	if ((sal_revision == SAL_VERSION_CODE(49, 29))
+	    && (sal_version == SAL_VERSION_CODE(49, 29)))
+	{
+		/*
+		 * Old firmware for zx2000 prototypes have this weird version number,
+		 * reset it to something sane.
+		 */
+		sal_revision = SAL_VERSION_CODE(2, 8);
+		sal_version = SAL_VERSION_CODE(0, 0);
+	}
+
+	if (ia64_platform_is("sn2") && (sal_revision == SAL_VERSION_CODE(2, 9)))
+		/*
+		 * SGI Altix has hard-coded version 2.9 in their prom
+		 * but they actually implement 3.2, so let's fix it here.
+		 */
+		sal_revision = SAL_VERSION_CODE(3, 2);
+}
+
+static void __init
+sal_desc_entry_point (void *p)
+{
+	struct ia64_sal_desc_entry_point *ep = p;
+	ia64_pal_handler_init(__va(ep->pal_proc));
+	ia64_sal_handler_init(__va(ep->sal_proc), __va(ep->gp));
+}
+
+#ifdef CONFIG_SMP
+static void __init
+set_smp_redirect (int flag)
+{
+#ifndef CONFIG_HOTPLUG_CPU
+	if (no_int_routing)
+		smp_int_redirect &= ~flag;
+	else
+		smp_int_redirect |= flag;
+#else
+	/*
+	 * For CPU Hotplug we dont want to do any chipset supported
+	 * interrupt redirection. The reason is this would require that
+	 * All interrupts be stopped and hard bind the irq to a cpu.
+	 * Later when the interrupt is fired we need to set the redir hint
+	 * on again in the vector. This is cumbersome for something that the
+	 * user mode irq balancer will solve anyways.
+	 */
+	no_int_routing=1;
+	smp_int_redirect &= ~flag;
+#endif
+}
+#else
+#define set_smp_redirect(flag)	do { } while (0)
+#endif
+
+static void __init
+sal_desc_platform_feature (void *p)
+{
+	struct ia64_sal_desc_platform_feature *pf = p;
+	sal_platform_features = pf->feature_mask;
+
+	printk(KERN_INFO "SAL Platform features:");
+	if (!sal_platform_features) {
+		printk(" None\n");
+		return;
+	}
+
+	if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_BUS_LOCK)
+		printk(" BusLock");
+	if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT) {
+		printk(" IRQ_Redirection");
+		set_smp_redirect(SMP_IRQ_REDIRECTION);
+	}
+	if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT) {
+		printk(" IPI_Redirection");
+		set_smp_redirect(SMP_IPI_REDIRECTION);
+	}
+	if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)
+		printk(" ITC_Drift");
+	printk("\n");
+}
+
+#ifdef CONFIG_SMP
+static void __init
+sal_desc_ap_wakeup (void *p)
+{
+	struct ia64_sal_desc_ap_wakeup *ap = p;
+
+	switch (ap->mechanism) {
+	case IA64_SAL_AP_EXTERNAL_INT:
+		ap_wakeup_vector = ap->vector;
+		printk(KERN_INFO "SAL: AP wakeup using external interrupt "
+				"vector 0x%lx\n", ap_wakeup_vector);
+		break;
+	default:
+		printk(KERN_ERR "SAL: AP wakeup mechanism unsupported!\n");
+		break;
+	}
+}
+
+static void __init
+chk_nointroute_opt(void)
+{
+	char *cp;
+
+	for (cp = boot_command_line; *cp; ) {
+		if (memcmp(cp, "nointroute", 10) == 0) {
+			no_int_routing = 1;
+			printk ("no_int_routing on\n");
+			break;
+		} else {
+			while (*cp != ' ' && *cp)
+				++cp;
+			while (*cp == ' ')
+				++cp;
+		}
+	}
+}
+
+#else
+static void __init sal_desc_ap_wakeup(void *p) { }
+#endif
+
+/*
+ * HP rx5670 firmware polls for interrupts during SAL_CACHE_FLUSH by reading
+ * cr.ivr, but it never writes cr.eoi.  This leaves any interrupt marked as
+ * "in-service" and masks other interrupts of equal or lower priority.
+ *
+ * HP internal defect reports: F1859, F2775, F3031.
+ */
+static int sal_cache_flush_drops_interrupts;
+
+static int __init
+force_pal_cache_flush(char *str)
+{
+	sal_cache_flush_drops_interrupts = 1;
+	return 0;
+}
+early_param("force_pal_cache_flush", force_pal_cache_flush);
+
+void __init
+check_sal_cache_flush (void)
+{
+	unsigned long flags;
+	int cpu;
+	u64 vector, cache_type = 3;
+	struct ia64_sal_retval isrv;
+
+	if (sal_cache_flush_drops_interrupts)
+		return;
+
+	cpu = get_cpu();
+	local_irq_save(flags);
+
+	/*
+	 * Send ourselves a timer interrupt, wait until it's reported, and see
+	 * if SAL_CACHE_FLUSH drops it.
+	 */
+	platform_send_ipi(cpu, IA64_TIMER_VECTOR, IA64_IPI_DM_INT, 0);
+
+	while (!ia64_get_irr(IA64_TIMER_VECTOR))
+		cpu_relax();
+
+	SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0);
+
+	if (isrv.status)
+		printk(KERN_ERR "SAL_CAL_FLUSH failed with %ld\n", isrv.status);
+
+	if (ia64_get_irr(IA64_TIMER_VECTOR)) {
+		vector = ia64_get_ivr();
+		ia64_eoi();
+		WARN_ON(vector != IA64_TIMER_VECTOR);
+	} else {
+		sal_cache_flush_drops_interrupts = 1;
+		printk(KERN_ERR "SAL: SAL_CACHE_FLUSH drops interrupts; "
+			"PAL_CACHE_FLUSH will be used instead\n");
+		ia64_eoi();
+	}
+
+	local_irq_restore(flags);
+	put_cpu();
+}
+
+s64
+ia64_sal_cache_flush (u64 cache_type)
+{
+	struct ia64_sal_retval isrv;
+
+	if (sal_cache_flush_drops_interrupts) {
+		unsigned long flags;
+		u64 progress;
+		s64 rc;
+
+		progress = 0;
+		local_irq_save(flags);
+		rc = ia64_pal_cache_flush(cache_type,
+			PAL_CACHE_FLUSH_INVALIDATE, &progress, NULL);
+		local_irq_restore(flags);
+		return rc;
+	}
+
+	SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0);
+	return isrv.status;
+}
+EXPORT_SYMBOL_GPL(ia64_sal_cache_flush);
+
+void __init
+ia64_sal_init (struct ia64_sal_systab *systab)
+{
+	char *p;
+	int i;
+
+	if (!systab) {
+		printk(KERN_WARNING "Hmm, no SAL System Table.\n");
+		return;
+	}
+
+	if (strncmp(systab->signature, "SST_", 4) != 0)
+		printk(KERN_ERR "bad signature in system table!");
+
+	check_versions(systab);
+#ifdef CONFIG_SMP
+	chk_nointroute_opt();
+#endif
+
+	/* revisions are coded in BCD, so %x does the job for us */
+	printk(KERN_INFO "SAL %x.%x: %.32s %.32s%sversion %x.%x\n",
+			SAL_MAJOR(sal_revision), SAL_MINOR(sal_revision),
+			systab->oem_id, systab->product_id,
+			systab->product_id[0] ? " " : "",
+			SAL_MAJOR(sal_version), SAL_MINOR(sal_version));
+
+	p = (char *) (systab + 1);
+	for (i = 0; i < systab->entry_count; i++) {
+		/*
+		 * The first byte of each entry type contains the type
+		 * descriptor.
+		 */
+		switch (*p) {
+		case SAL_DESC_ENTRY_POINT:
+			sal_desc_entry_point(p);
+			break;
+		case SAL_DESC_PLATFORM_FEATURE:
+			sal_desc_platform_feature(p);
+			break;
+		case SAL_DESC_PTC:
+			ia64_ptc_domain_info = (ia64_sal_desc_ptc_t *)p;
+			break;
+		case SAL_DESC_AP_WAKEUP:
+			sal_desc_ap_wakeup(p);
+			break;
+		}
+		p += SAL_DESC_SIZE(*p);
+	}
+
+}
+
+int
+ia64_sal_oemcall(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1,
+		 u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7)
+{
+	if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX)
+		return -1;
+	SAL_CALL(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
+	return 0;
+}
+EXPORT_SYMBOL(ia64_sal_oemcall);
+
+int
+ia64_sal_oemcall_nolock(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1,
+			u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6,
+			u64 arg7)
+{
+	if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX)
+		return -1;
+	SAL_CALL_NOLOCK(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6,
+			arg7);
+	return 0;
+}
+EXPORT_SYMBOL(ia64_sal_oemcall_nolock);
+
+int
+ia64_sal_oemcall_reentrant(struct ia64_sal_retval *isrvp, u64 oemfunc,
+			   u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5,
+			   u64 arg6, u64 arg7)
+{
+	if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX)
+		return -1;
+	SAL_CALL_REENTRANT(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6,
+			   arg7);
+	return 0;
+}
+EXPORT_SYMBOL(ia64_sal_oemcall_reentrant);
+
+long
+ia64_sal_freq_base (unsigned long which, unsigned long *ticks_per_second,
+		    unsigned long *drift_info)
+{
+	struct ia64_sal_retval isrv;
+
+	SAL_CALL(isrv, SAL_FREQ_BASE, which, 0, 0, 0, 0, 0, 0);
+	*ticks_per_second = isrv.v0;
+	*drift_info = isrv.v1;
+	return isrv.status;
+}
+EXPORT_SYMBOL_GPL(ia64_sal_freq_base);
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
new file mode 100644
index 00000000..79802e54
--- /dev/null
+++ b/arch/ia64/kernel/salinfo.c
@@ -0,0 +1,705 @@
+/*
+ * salinfo.c
+ *
+ * Creates entries in /proc/sal for various system features.
+ *
+ * Copyright (c) 2003, 2006 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (c) 2003 Hewlett-Packard Co
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * 10/30/2001	jbarnes@sgi.com		copied much of Stephane's palinfo
+ *					code to create this file
+ * Oct 23 2003	kaos@sgi.com
+ *   Replace IPI with set_cpus_allowed() to read a record from the required cpu.
+ *   Redesign salinfo log processing to separate interrupt and user space
+ *   contexts.
+ *   Cache the record across multi-block reads from user space.
+ *   Support > 64 cpus.
+ *   Delete module_exit and MOD_INC/DEC_COUNT, salinfo cannot be a module.
+ *
+ * Jan 28 2004	kaos@sgi.com
+ *   Periodically check for outstanding MCA or INIT records.
+ *
+ * Dec  5 2004	kaos@sgi.com
+ *   Standardize which records are cleared automatically.
+ *
+ * Aug 18 2005	kaos@sgi.com
+ *   mca.c may not pass a buffer, a NULL buffer just indicates that a new
+ *   record is available in SAL.
+ *   Replace some NR_CPUS by cpus_online, for hotplug cpu.
+ *
+ * Jan  5 2006        kaos@sgi.com
+ *   Handle hotplug cpus coming online.
+ *   Handle hotplug cpus going offline while they still have outstanding records.
+ *   Use the cpu_* macros consistently.
+ *   Replace the counting semaphore with a mutex and a test if the cpumask is non-empty.
+ *   Modify the locking to make the test for "work to do" an atomic operation.
+ */
+
+#include <linux/capability.h>
+#include <linux/cpu.h>
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/timer.h>
+#include <linux/vmalloc.h>
+#include <linux/semaphore.h>
+
+#include <asm/sal.h>
+#include <asm/uaccess.h>
+
+MODULE_AUTHOR("Jesse Barnes <jbarnes@sgi.com>");
+MODULE_DESCRIPTION("/proc interface to IA-64 SAL features");
+MODULE_LICENSE("GPL");
+
+static int salinfo_read(char *page, char **start, off_t off, int count, int *eof, void *data);
+
+typedef struct {
+	const char		*name;		/* name of the proc entry */
+	unsigned long           feature;        /* feature bit */
+	struct proc_dir_entry	*entry;		/* registered entry (removal) */
+} salinfo_entry_t;
+
+/*
+ * List {name,feature} pairs for every entry in /proc/sal/<feature>
+ * that this module exports
+ */
+static salinfo_entry_t salinfo_entries[]={
+	{ "bus_lock",           IA64_SAL_PLATFORM_FEATURE_BUS_LOCK, },
+	{ "irq_redirection",	IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT, },
+	{ "ipi_redirection",	IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT, },
+	{ "itc_drift",		IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT, },
+};
+
+#define NR_SALINFO_ENTRIES ARRAY_SIZE(salinfo_entries)
+
+static char *salinfo_log_name[] = {
+	"mca",
+	"init",
+	"cmc",
+	"cpe",
+};
+
+static struct proc_dir_entry *salinfo_proc_entries[
+	ARRAY_SIZE(salinfo_entries) +			/* /proc/sal/bus_lock */
+	ARRAY_SIZE(salinfo_log_name) +			/* /proc/sal/{mca,...} */
+	(2 * ARRAY_SIZE(salinfo_log_name)) +		/* /proc/sal/mca/{event,data} */
+	1];						/* /proc/sal */
+
+/* Some records we get ourselves, some are accessed as saved data in buffers
+ * that are owned by mca.c.
+ */
+struct salinfo_data_saved {
+	u8*			buffer;
+	u64			size;
+	u64			id;
+	int			cpu;
+};
+
+/* State transitions.  Actions are :-
+ *   Write "read <cpunum>" to the data file.
+ *   Write "clear <cpunum>" to the data file.
+ *   Write "oemdata <cpunum> <offset> to the data file.
+ *   Read from the data file.
+ *   Close the data file.
+ *
+ * Start state is NO_DATA.
+ *
+ * NO_DATA
+ *    write "read <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "oemdata <cpunum> <offset> -> return -EINVAL.
+ *    read data -> return EOF.
+ *    close -> unchanged.  Free record areas.
+ *
+ * LOG_RECORD
+ *    write "read <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA.
+ *    read data -> return the INIT/MCA/CMC/CPE record.
+ *    close -> unchanged.  Keep record areas.
+ *
+ * OEMDATA
+ *    write "read <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA.
+ *    read data -> return the formatted oemdata.
+ *    close -> unchanged.  Keep record areas.
+ *
+ * Closing the data file does not change the state.  This allows shell scripts
+ * to manipulate salinfo data, each shell redirection opens the file, does one
+ * action then closes it again.  The record areas are only freed at close when
+ * the state is NO_DATA.
+ */
+enum salinfo_state {
+	STATE_NO_DATA,
+	STATE_LOG_RECORD,
+	STATE_OEMDATA,
+};
+
+struct salinfo_data {
+	cpumask_t		cpu_event;	/* which cpus have outstanding events */
+	struct semaphore	mutex;
+	u8			*log_buffer;
+	u64			log_size;
+	u8			*oemdata;	/* decoded oem data */
+	u64			oemdata_size;
+	int			open;		/* single-open to prevent races */
+	u8			type;
+	u8			saved_num;	/* using a saved record? */
+	enum salinfo_state	state :8;	/* processing state */
+	u8			padding;
+	int			cpu_check;	/* next CPU to check */
+	struct salinfo_data_saved data_saved[5];/* save last 5 records from mca.c, must be < 255 */
+};
+
+static struct salinfo_data salinfo_data[ARRAY_SIZE(salinfo_log_name)];
+
+static DEFINE_SPINLOCK(data_lock);
+static DEFINE_SPINLOCK(data_saved_lock);
+
+/** salinfo_platform_oemdata - optional callback to decode oemdata from an error
+ * record.
+ * @sect_header: pointer to the start of the section to decode.
+ * @oemdata: returns vmalloc area containing the decoded output.
+ * @oemdata_size: returns length of decoded output (strlen).
+ *
+ * Description: If user space asks for oem data to be decoded by the kernel
+ * and/or prom and the platform has set salinfo_platform_oemdata to the address
+ * of a platform specific routine then call that routine.  salinfo_platform_oemdata
+ * vmalloc's and formats its output area, returning the address of the text
+ * and its strlen.  Returns 0 for success, -ve for error.  The callback is
+ * invoked on the cpu that generated the error record.
+ */
+int (*salinfo_platform_oemdata)(const u8 *sect_header, u8 **oemdata, u64 *oemdata_size);
+
+struct salinfo_platform_oemdata_parms {
+	const u8 *efi_guid;
+	u8 **oemdata;
+	u64 *oemdata_size;
+	int ret;
+};
+
+/* Kick the mutex that tells user space that there is work to do.  Instead of
+ * trying to track the state of the mutex across multiple cpus, in user
+ * context, interrupt context, non-maskable interrupt context and hotplug cpu,
+ * it is far easier just to grab the mutex if it is free then release it.
+ *
+ * This routine must be called with data_saved_lock held, to make the down/up
+ * operation atomic.
+ */
+static void
+salinfo_work_to_do(struct salinfo_data *data)
+{
+	(void)(down_trylock(&data->mutex) ?: 0);
+	up(&data->mutex);
+}
+
+static void
+salinfo_platform_oemdata_cpu(void *context)
+{
+	struct salinfo_platform_oemdata_parms *parms = context;
+	parms->ret = salinfo_platform_oemdata(parms->efi_guid, parms->oemdata, parms->oemdata_size);
+}
+
+static void
+shift1_data_saved (struct salinfo_data *data, int shift)
+{
+	memcpy(data->data_saved+shift, data->data_saved+shift+1,
+	       (ARRAY_SIZE(data->data_saved) - (shift+1)) * sizeof(data->data_saved[0]));
+	memset(data->data_saved + ARRAY_SIZE(data->data_saved) - 1, 0,
+	       sizeof(data->data_saved[0]));
+}
+
+/* This routine is invoked in interrupt context.  Note: mca.c enables
+ * interrupts before calling this code for CMC/CPE.  MCA and INIT events are
+ * not irq safe, do not call any routines that use spinlocks, they may deadlock.
+ * MCA and INIT records are recorded, a timer event will look for any
+ * outstanding events and wake up the user space code.
+ *
+ * The buffer passed from mca.c points to the output from ia64_log_get. This is
+ * a persistent buffer but its contents can change between the interrupt and
+ * when user space processes the record.  Save the record id to identify
+ * changes.  If the buffer is NULL then just update the bitmap.
+ */
+void
+salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe)
+{
+	struct salinfo_data *data = salinfo_data + type;
+	struct salinfo_data_saved *data_saved;
+	unsigned long flags = 0;
+	int i;
+	int saved_size = ARRAY_SIZE(data->data_saved);
+
+	BUG_ON(type >= ARRAY_SIZE(salinfo_log_name));
+
+	if (irqsafe)
+		spin_lock_irqsave(&data_saved_lock, flags);
+	if (buffer) {
+		for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
+			if (!data_saved->buffer)
+				break;
+		}
+		if (i == saved_size) {
+			if (!data->saved_num) {
+				shift1_data_saved(data, 0);
+				data_saved = data->data_saved + saved_size - 1;
+			} else
+				data_saved = NULL;
+		}
+		if (data_saved) {
+			data_saved->cpu = smp_processor_id();
+			data_saved->id = ((sal_log_record_header_t *)buffer)->id;
+			data_saved->size = size;
+			data_saved->buffer = buffer;
+		}
+	}
+	cpu_set(smp_processor_id(), data->cpu_event);
+	if (irqsafe) {
+		salinfo_work_to_do(data);
+		spin_unlock_irqrestore(&data_saved_lock, flags);
+	}
+}
+
+/* Check for outstanding MCA/INIT records every minute (arbitrary) */
+#define SALINFO_TIMER_DELAY (60*HZ)
+static struct timer_list salinfo_timer;
+extern void ia64_mlogbuf_dump(void);
+
+static void
+salinfo_timeout_check(struct salinfo_data *data)
+{
+	unsigned long flags;
+	if (!data->open)
+		return;
+	if (!cpus_empty(data->cpu_event)) {
+		spin_lock_irqsave(&data_saved_lock, flags);
+		salinfo_work_to_do(data);
+		spin_unlock_irqrestore(&data_saved_lock, flags);
+	}
+}
+
+static void
+salinfo_timeout (unsigned long arg)
+{
+	ia64_mlogbuf_dump();
+	salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA);
+	salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT);
+	salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY;
+	add_timer(&salinfo_timer);
+}
+
+static int
+salinfo_event_open(struct inode *inode, struct file *file)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	return 0;
+}
+
+static ssize_t
+salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct proc_dir_entry *entry = PDE(inode);
+	struct salinfo_data *data = entry->data;
+	char cmd[32];
+	size_t size;
+	int i, n, cpu = -1;
+
+retry:
+	if (cpus_empty(data->cpu_event) && down_trylock(&data->mutex)) {
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+		if (down_interruptible(&data->mutex))
+			return -EINTR;
+	}
+
+	n = data->cpu_check;
+	for (i = 0; i < nr_cpu_ids; i++) {
+		if (cpu_isset(n, data->cpu_event)) {
+			if (!cpu_online(n)) {
+				cpu_clear(n, data->cpu_event);
+				continue;
+			}
+			cpu = n;
+			break;
+		}
+		if (++n == nr_cpu_ids)
+			n = 0;
+	}
+
+	if (cpu == -1)
+		goto retry;
+
+	ia64_mlogbuf_dump();
+
+	/* for next read, start checking at next CPU */
+	data->cpu_check = cpu;
+	if (++data->cpu_check == nr_cpu_ids)
+		data->cpu_check = 0;
+
+	snprintf(cmd, sizeof(cmd), "read %d\n", cpu);
+
+	size = strlen(cmd);
+	if (size > count)
+		size = count;
+	if (copy_to_user(buffer, cmd, size))
+		return -EFAULT;
+
+	return size;
+}
+
+static const struct file_operations salinfo_event_fops = {
+	.open  = salinfo_event_open,
+	.read  = salinfo_event_read,
+	.llseek = noop_llseek,
+};
+
+static int
+salinfo_log_open(struct inode *inode, struct file *file)
+{
+	struct proc_dir_entry *entry = PDE(inode);
+	struct salinfo_data *data = entry->data;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	spin_lock(&data_lock);
+	if (data->open) {
+		spin_unlock(&data_lock);
+		return -EBUSY;
+	}
+	data->open = 1;
+	spin_unlock(&data_lock);
+
+	if (data->state == STATE_NO_DATA &&
+	    !(data->log_buffer = vmalloc(ia64_sal_get_state_info_size(data->type)))) {
+		data->open = 0;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int
+salinfo_log_release(struct inode *inode, struct file *file)
+{
+	struct proc_dir_entry *entry = PDE(inode);
+	struct salinfo_data *data = entry->data;
+
+	if (data->state == STATE_NO_DATA) {
+		vfree(data->log_buffer);
+		vfree(data->oemdata);
+		data->log_buffer = NULL;
+		data->oemdata = NULL;
+	}
+	spin_lock(&data_lock);
+	data->open = 0;
+	spin_unlock(&data_lock);
+	return 0;
+}
+
+static void
+call_on_cpu(int cpu, void (*fn)(void *), void *arg)
+{
+	cpumask_t save_cpus_allowed = current->cpus_allowed;
+	set_cpus_allowed_ptr(current, cpumask_of(cpu));
+	(*fn)(arg);
+	set_cpus_allowed_ptr(current, &save_cpus_allowed);
+}
+
+static void
+salinfo_log_read_cpu(void *context)
+{
+	struct salinfo_data *data = context;
+	sal_log_record_header_t *rh;
+	data->log_size = ia64_sal_get_state_info(data->type, (u64 *) data->log_buffer);
+	rh = (sal_log_record_header_t *)(data->log_buffer);
+	/* Clear corrected errors as they are read from SAL */
+	if (rh->severity == sal_log_severity_corrected)
+		ia64_sal_clear_state_info(data->type);
+}
+
+static void
+salinfo_log_new_read(int cpu, struct salinfo_data *data)
+{
+	struct salinfo_data_saved *data_saved;
+	unsigned long flags;
+	int i;
+	int saved_size = ARRAY_SIZE(data->data_saved);
+
+	data->saved_num = 0;
+	spin_lock_irqsave(&data_saved_lock, flags);
+retry:
+	for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
+		if (data_saved->buffer && data_saved->cpu == cpu) {
+			sal_log_record_header_t *rh = (sal_log_record_header_t *)(data_saved->buffer);
+			data->log_size = data_saved->size;
+			memcpy(data->log_buffer, rh, data->log_size);
+			barrier();	/* id check must not be moved */
+			if (rh->id == data_saved->id) {
+				data->saved_num = i+1;
+				break;
+			}
+			/* saved record changed by mca.c since interrupt, discard it */
+			shift1_data_saved(data, i);
+			goto retry;
+		}
+	}
+	spin_unlock_irqrestore(&data_saved_lock, flags);
+
+	if (!data->saved_num)
+		call_on_cpu(cpu, salinfo_log_read_cpu, data);
+	if (!data->log_size) {
+		data->state = STATE_NO_DATA;
+		cpu_clear(cpu, data->cpu_event);
+	} else {
+		data->state = STATE_LOG_RECORD;
+	}
+}
+
+static ssize_t
+salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct proc_dir_entry *entry = PDE(inode);
+	struct salinfo_data *data = entry->data;
+	u8 *buf;
+	u64 bufsize;
+
+	if (data->state == STATE_LOG_RECORD) {
+		buf = data->log_buffer;
+		bufsize = data->log_size;
+	} else if (data->state == STATE_OEMDATA) {
+		buf = data->oemdata;
+		bufsize = data->oemdata_size;
+	} else {
+		buf = NULL;
+		bufsize = 0;
+	}
+	return simple_read_from_buffer(buffer, count, ppos, buf, bufsize);
+}
+
+static void
+salinfo_log_clear_cpu(void *context)
+{
+	struct salinfo_data *data = context;
+	ia64_sal_clear_state_info(data->type);
+}
+
+static int
+salinfo_log_clear(struct salinfo_data *data, int cpu)
+{
+	sal_log_record_header_t *rh;
+	unsigned long flags;
+	spin_lock_irqsave(&data_saved_lock, flags);
+	data->state = STATE_NO_DATA;
+	if (!cpu_isset(cpu, data->cpu_event)) {
+		spin_unlock_irqrestore(&data_saved_lock, flags);
+		return 0;
+	}
+	cpu_clear(cpu, data->cpu_event);
+	if (data->saved_num) {
+		shift1_data_saved(data, data->saved_num - 1);
+		data->saved_num = 0;
+	}
+	spin_unlock_irqrestore(&data_saved_lock, flags);
+	rh = (sal_log_record_header_t *)(data->log_buffer);
+	/* Corrected errors have already been cleared from SAL */
+	if (rh->severity != sal_log_severity_corrected)
+		call_on_cpu(cpu, salinfo_log_clear_cpu, data);
+	/* clearing a record may make a new record visible */
+	salinfo_log_new_read(cpu, data);
+	if (data->state == STATE_LOG_RECORD) {
+		spin_lock_irqsave(&data_saved_lock, flags);
+		cpu_set(cpu, data->cpu_event);
+		salinfo_work_to_do(data);
+		spin_unlock_irqrestore(&data_saved_lock, flags);
+	}
+	return 0;
+}
+
+static ssize_t
+salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct proc_dir_entry *entry = PDE(inode);
+	struct salinfo_data *data = entry->data;
+	char cmd[32];
+	size_t size;
+	u32 offset;
+	int cpu;
+
+	size = sizeof(cmd);
+	if (count < size)
+		size = count;
+	if (copy_from_user(cmd, buffer, size))
+		return -EFAULT;
+
+	if (sscanf(cmd, "read %d", &cpu) == 1) {
+		salinfo_log_new_read(cpu, data);
+	} else if (sscanf(cmd, "clear %d", &cpu) == 1) {
+		int ret;
+		if ((ret = salinfo_log_clear(data, cpu)))
+			count = ret;
+	} else if (sscanf(cmd, "oemdata %d %d", &cpu, &offset) == 2) {
+		if (data->state != STATE_LOG_RECORD && data->state != STATE_OEMDATA)
+			return -EINVAL;
+		if (offset > data->log_size - sizeof(efi_guid_t))
+			return -EINVAL;
+		data->state = STATE_OEMDATA;
+		if (salinfo_platform_oemdata) {
+			struct salinfo_platform_oemdata_parms parms = {
+				.efi_guid = data->log_buffer + offset,
+				.oemdata = &data->oemdata,
+				.oemdata_size = &data->oemdata_size
+			};
+			call_on_cpu(cpu, salinfo_platform_oemdata_cpu, &parms);
+			if (parms.ret)
+				count = parms.ret;
+		} else
+			data->oemdata_size = 0;
+	} else
+		return -EINVAL;
+
+	return count;
+}
+
+static const struct file_operations salinfo_data_fops = {
+	.open    = salinfo_log_open,
+	.release = salinfo_log_release,
+	.read    = salinfo_log_read,
+	.write   = salinfo_log_write,
+	.llseek  = default_llseek,
+};
+
+static int __cpuinit
+salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
+{
+	unsigned int i, cpu = (unsigned long)hcpu;
+	unsigned long flags;
+	struct salinfo_data *data;
+	switch (action) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		spin_lock_irqsave(&data_saved_lock, flags);
+		for (i = 0, data = salinfo_data;
+		     i < ARRAY_SIZE(salinfo_data);
+		     ++i, ++data) {
+			cpu_set(cpu, data->cpu_event);
+			salinfo_work_to_do(data);
+		}
+		spin_unlock_irqrestore(&data_saved_lock, flags);
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		spin_lock_irqsave(&data_saved_lock, flags);
+		for (i = 0, data = salinfo_data;
+		     i < ARRAY_SIZE(salinfo_data);
+		     ++i, ++data) {
+			struct salinfo_data_saved *data_saved;
+			int j;
+			for (j = ARRAY_SIZE(data->data_saved) - 1, data_saved = data->data_saved + j;
+			     j >= 0;
+			     --j, --data_saved) {
+				if (data_saved->buffer && data_saved->cpu == cpu) {
+					shift1_data_saved(data, j);
+				}
+			}
+			cpu_clear(cpu, data->cpu_event);
+		}
+		spin_unlock_irqrestore(&data_saved_lock, flags);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block salinfo_cpu_notifier __cpuinitdata =
+{
+	.notifier_call = salinfo_cpu_callback,
+	.priority = 0,
+};
+
+static int __init
+salinfo_init(void)
+{
+	struct proc_dir_entry *salinfo_dir; /* /proc/sal dir entry */
+	struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */
+	struct proc_dir_entry *dir, *entry;
+	struct salinfo_data *data;
+	int i, j;
+
+	salinfo_dir = proc_mkdir("sal", NULL);
+	if (!salinfo_dir)
+		return 0;
+
+	for (i=0; i < NR_SALINFO_ENTRIES; i++) {
+		/* pass the feature bit in question as misc data */
+		*sdir++ = create_proc_read_entry (salinfo_entries[i].name, 0, salinfo_dir,
+						  salinfo_read, (void *)salinfo_entries[i].feature);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) {
+		data = salinfo_data + i;
+		data->type = i;
+		sema_init(&data->mutex, 1);
+		dir = proc_mkdir(salinfo_log_name[i], salinfo_dir);
+		if (!dir)
+			continue;
+
+		entry = proc_create_data("event", S_IRUSR, dir,
+					 &salinfo_event_fops, data);
+		if (!entry)
+			continue;
+		*sdir++ = entry;
+
+		entry = proc_create_data("data", S_IRUSR | S_IWUSR, dir,
+					 &salinfo_data_fops, data);
+		if (!entry)
+			continue;
+		*sdir++ = entry;
+
+		/* we missed any events before now */
+		for_each_online_cpu(j)
+			cpu_set(j, data->cpu_event);
+
+		*sdir++ = dir;
+	}
+
+	*sdir++ = salinfo_dir;
+
+	init_timer(&salinfo_timer);
+	salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY;
+	salinfo_timer.function = &salinfo_timeout;
+	add_timer(&salinfo_timer);
+
+	register_hotcpu_notifier(&salinfo_cpu_notifier);
+
+	return 0;
+}
+
+/*
+ * 'data' contains an integer that corresponds to the feature we're
+ * testing
+ */
+static int
+salinfo_read(char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+	int len = 0;
+
+	len = sprintf(page, (sal_platform_features & (unsigned long)data) ? "1\n" : "0\n");
+
+	if (len <= off+count) *eof = 1;
+
+	*start = page + off;
+	len   -= off;
+
+	if (len>count) len = count;
+	if (len<0) len = 0;
+
+	return len;
+}
+
+module_init(salinfo_init);
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
new file mode 100644
index 00000000..5e2c7249
--- /dev/null
+++ b/arch/ia64/kernel/setup.c
@@ -0,0 +1,1051 @@
+/*
+ * Architecture-specific setup.
+ *
+ * Copyright (C) 1998-2001, 2003-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2000, 2004 Intel Corp
+ * 	Rohit Seth <rohit.seth@intel.com>
+ * 	Suresh Siddha <suresh.b.siddha@intel.com>
+ * 	Gordon Jin <gordon.jin@intel.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ *
+ * 12/26/04 S.Siddha, G.Jin, R.Seth
+ *			Add multi-threading and multi-core detection
+ * 11/12/01 D.Mosberger Convert get_cpuinfo() to seq_file based show_cpuinfo().
+ * 04/04/00 D.Mosberger renamed cpu_initialized to cpu_online_map
+ * 03/31/00 R.Seth	cpu_initialized and current->processor fixes
+ * 02/04/00 D.Mosberger	some more get_cpuinfo fixes...
+ * 02/01/00 R.Seth	fixed get_cpuinfo for SMP
+ * 01/07/99 S.Eranian	added the support for command line argument
+ * 06/24/99 W.Drummond	added boot_cpu_data.
+ * 05/28/05 Z. Menyhart	Dynamic stride size for "flush_icache_range()"
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/acpi.h>
+#include <linux/bootmem.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/threads.h>
+#include <linux/screen_info.h>
+#include <linux/dmi.h>
+#include <linux/serial.h>
+#include <linux/serial_core.h>
+#include <linux/efi.h>
+#include <linux/initrd.h>
+#include <linux/pm.h>
+#include <linux/cpufreq.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+
+#include <asm/machvec.h>
+#include <asm/mca.h>
+#include <asm/meminit.h>
+#include <asm/page.h>
+#include <asm/paravirt.h>
+#include <asm/paravirt_patch.h>
+#include <asm/patch.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/system.h>
+#include <asm/tlbflush.h>
+#include <asm/unistd.h>
+#include <asm/hpsim.h>
+
+#if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE)
+# error "struct cpuinfo_ia64 too big!"
+#endif
+
+#ifdef CONFIG_SMP
+unsigned long __per_cpu_offset[NR_CPUS];
+EXPORT_SYMBOL(__per_cpu_offset);
+#endif
+
+DEFINE_PER_CPU(struct cpuinfo_ia64, ia64_cpu_info);
+DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
+unsigned long ia64_cycles_per_usec;
+struct ia64_boot_param *ia64_boot_param;
+struct screen_info screen_info;
+unsigned long vga_console_iobase;
+unsigned long vga_console_membase;
+
+static struct resource data_resource = {
+	.name	= "Kernel data",
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource code_resource = {
+	.name	= "Kernel code",
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource bss_resource = {
+	.name	= "Kernel bss",
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+unsigned long ia64_max_cacheline_size;
+
+unsigned long ia64_iobase;	/* virtual address for I/O accesses */
+EXPORT_SYMBOL(ia64_iobase);
+struct io_space io_space[MAX_IO_SPACES];
+EXPORT_SYMBOL(io_space);
+unsigned int num_io_spaces;
+
+/*
+ * "flush_icache_range()" needs to know what processor dependent stride size to use
+ * when it makes i-cache(s) coherent with d-caches.
+ */
+#define	I_CACHE_STRIDE_SHIFT	5	/* Safest way to go: 32 bytes by 32 bytes */
+unsigned long ia64_i_cache_stride_shift = ~0;
+/*
+ * "clflush_cache_range()" needs to know what processor dependent stride size to
+ * use when it flushes cache lines including both d-cache and i-cache.
+ */
+/* Safest way to go: 32 bytes by 32 bytes */
+#define	CACHE_STRIDE_SHIFT	5
+unsigned long ia64_cache_stride_shift = ~0;
+
+/*
+ * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1).  This
+ * mask specifies a mask of address bits that must be 0 in order for two buffers to be
+ * mergeable by the I/O MMU (i.e., the end address of the first buffer and the start
+ * address of the second buffer must be aligned to (merge_mask+1) in order to be
+ * mergeable).  By default, we assume there is no I/O MMU which can merge physically
+ * discontiguous buffers, so we set the merge_mask to ~0UL, which corresponds to a iommu
+ * page-size of 2^64.
+ */
+unsigned long ia64_max_iommu_merge_mask = ~0UL;
+EXPORT_SYMBOL(ia64_max_iommu_merge_mask);
+
+/*
+ * We use a special marker for the end of memory and it uses the extra (+1) slot
+ */
+struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata;
+int num_rsvd_regions __initdata;
+
+
+/*
+ * Filter incoming memory segments based on the primitive map created from the boot
+ * parameters. Segments contained in the map are removed from the memory ranges. A
+ * caller-specified function is called with the memory ranges that remain after filtering.
+ * This routine does not assume the incoming segments are sorted.
+ */
+int __init
+filter_rsvd_memory (u64 start, u64 end, void *arg)
+{
+	u64 range_start, range_end, prev_start;
+	void (*func)(unsigned long, unsigned long, int);
+	int i;
+
+#if IGNORE_PFN0
+	if (start == PAGE_OFFSET) {
+		printk(KERN_WARNING "warning: skipping physical page 0\n");
+		start += PAGE_SIZE;
+		if (start >= end) return 0;
+	}
+#endif
+	/*
+	 * lowest possible address(walker uses virtual)
+	 */
+	prev_start = PAGE_OFFSET;
+	func = arg;
+
+	for (i = 0; i < num_rsvd_regions; ++i) {
+		range_start = max(start, prev_start);
+		range_end   = min(end, rsvd_region[i].start);
+
+		if (range_start < range_end)
+			call_pernode_memory(__pa(range_start), range_end - range_start, func);
+
+		/* nothing more available in this segment */
+		if (range_end == end) return 0;
+
+		prev_start = rsvd_region[i].end;
+	}
+	/* end of memory marker allows full processing inside loop body */
+	return 0;
+}
+
+/*
+ * Similar to "filter_rsvd_memory()", but the reserved memory ranges
+ * are not filtered out.
+ */
+int __init
+filter_memory(u64 start, u64 end, void *arg)
+{
+	void (*func)(unsigned long, unsigned long, int);
+
+#if IGNORE_PFN0
+	if (start == PAGE_OFFSET) {
+		printk(KERN_WARNING "warning: skipping physical page 0\n");
+		start += PAGE_SIZE;
+		if (start >= end)
+			return 0;
+	}
+#endif
+	func = arg;
+	if (start < end)
+		call_pernode_memory(__pa(start), end - start, func);
+	return 0;
+}
+
+static void __init
+sort_regions (struct rsvd_region *rsvd_region, int max)
+{
+	int j;
+
+	/* simple bubble sorting */
+	while (max--) {
+		for (j = 0; j < max; ++j) {
+			if (rsvd_region[j].start > rsvd_region[j+1].start) {
+				struct rsvd_region tmp;
+				tmp = rsvd_region[j];
+				rsvd_region[j] = rsvd_region[j + 1];
+				rsvd_region[j + 1] = tmp;
+			}
+		}
+	}
+}
+
+/*
+ * Request address space for all standard resources
+ */
+static int __init register_memory(void)
+{
+	code_resource.start = ia64_tpa(_text);
+	code_resource.end   = ia64_tpa(_etext) - 1;
+	data_resource.start = ia64_tpa(_etext);
+	data_resource.end   = ia64_tpa(_edata) - 1;
+	bss_resource.start  = ia64_tpa(__bss_start);
+	bss_resource.end    = ia64_tpa(_end) - 1;
+	efi_initialize_iomem_resources(&code_resource, &data_resource,
+			&bss_resource);
+
+	return 0;
+}
+
+__initcall(register_memory);
+
+
+#ifdef CONFIG_KEXEC
+
+/*
+ * This function checks if the reserved crashkernel is allowed on the specific
+ * IA64 machine flavour. Machines without an IO TLB use swiotlb and require
+ * some memory below 4 GB (i.e. in 32 bit area), see the implementation of
+ * lib/swiotlb.c. The hpzx1 architecture has an IO TLB but cannot use that
+ * in kdump case. See the comment in sba_init() in sba_iommu.c.
+ *
+ * So, the only machvec that really supports loading the kdump kernel
+ * over 4 GB is "sn2".
+ */
+static int __init check_crashkernel_memory(unsigned long pbase, size_t size)
+{
+	if (ia64_platform_is("sn2") || ia64_platform_is("uv"))
+		return 1;
+	else
+		return pbase < (1UL << 32);
+}
+
+static void __init setup_crashkernel(unsigned long total, int *n)
+{
+	unsigned long long base = 0, size = 0;
+	int ret;
+
+	ret = parse_crashkernel(boot_command_line, total,
+			&size, &base);
+	if (ret == 0 && size > 0) {
+		if (!base) {
+			sort_regions(rsvd_region, *n);
+			base = kdump_find_rsvd_region(size,
+					rsvd_region, *n);
+		}
+
+		if (!check_crashkernel_memory(base, size)) {
+			pr_warning("crashkernel: There would be kdump memory "
+				"at %ld GB but this is unusable because it "
+				"must\nbe below 4 GB. Change the memory "
+				"configuration of the machine.\n",
+				(unsigned long)(base >> 30));
+			return;
+		}
+
+		if (base != ~0UL) {
+			printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+					"for crashkernel (System RAM: %ldMB)\n",
+					(unsigned long)(size >> 20),
+					(unsigned long)(base >> 20),
+					(unsigned long)(total >> 20));
+			rsvd_region[*n].start =
+				(unsigned long)__va(base);
+			rsvd_region[*n].end =
+				(unsigned long)__va(base + size);
+			(*n)++;
+			crashk_res.start = base;
+			crashk_res.end = base + size - 1;
+		}
+	}
+	efi_memmap_res.start = ia64_boot_param->efi_memmap;
+	efi_memmap_res.end = efi_memmap_res.start +
+		ia64_boot_param->efi_memmap_size;
+	boot_param_res.start = __pa(ia64_boot_param);
+	boot_param_res.end = boot_param_res.start +
+		sizeof(*ia64_boot_param);
+}
+#else
+static inline void __init setup_crashkernel(unsigned long total, int *n)
+{}
+#endif
+
+/**
+ * reserve_memory - setup reserved memory areas
+ *
+ * Setup the reserved memory areas set aside for the boot parameters,
+ * initrd, etc.  There are currently %IA64_MAX_RSVD_REGIONS defined,
+ * see arch/ia64/include/asm/meminit.h if you need to define more.
+ */
+void __init
+reserve_memory (void)
+{
+	int n = 0;
+	unsigned long total_memory;
+
+	/*
+	 * none of the entries in this table overlap
+	 */
+	rsvd_region[n].start = (unsigned long) ia64_boot_param;
+	rsvd_region[n].end   = rsvd_region[n].start + sizeof(*ia64_boot_param);
+	n++;
+
+	rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->efi_memmap);
+	rsvd_region[n].end   = rsvd_region[n].start + ia64_boot_param->efi_memmap_size;
+	n++;
+
+	rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->command_line);
+	rsvd_region[n].end   = (rsvd_region[n].start
+				+ strlen(__va(ia64_boot_param->command_line)) + 1);
+	n++;
+
+	rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START);
+	rsvd_region[n].end   = (unsigned long) ia64_imva(_end);
+	n++;
+
+	n += paravirt_reserve_memory(&rsvd_region[n]);
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (ia64_boot_param->initrd_start) {
+		rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start);
+		rsvd_region[n].end   = rsvd_region[n].start + ia64_boot_param->initrd_size;
+		n++;
+	}
+#endif
+
+#ifdef CONFIG_CRASH_DUMP
+	if (reserve_elfcorehdr(&rsvd_region[n].start,
+			       &rsvd_region[n].end) == 0)
+		n++;
+#endif
+
+	total_memory = efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
+	n++;
+
+	setup_crashkernel(total_memory, &n);
+
+	/* end of memory marker */
+	rsvd_region[n].start = ~0UL;
+	rsvd_region[n].end   = ~0UL;
+	n++;
+
+	num_rsvd_regions = n;
+	BUG_ON(IA64_MAX_RSVD_REGIONS + 1 < n);
+
+	sort_regions(rsvd_region, num_rsvd_regions);
+}
+
+
+/**
+ * find_initrd - get initrd parameters from the boot parameter structure
+ *
+ * Grab the initrd start and end from the boot parameter struct given us by
+ * the boot loader.
+ */
+void __init
+find_initrd (void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (ia64_boot_param->initrd_start) {
+		initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start);
+		initrd_end   = initrd_start+ia64_boot_param->initrd_size;
+
+		printk(KERN_INFO "Initial ramdisk at: 0x%lx (%llu bytes)\n",
+		       initrd_start, ia64_boot_param->initrd_size);
+	}
+#endif
+}
+
+static void __init
+io_port_init (void)
+{
+	unsigned long phys_iobase;
+
+	/*
+	 * Set `iobase' based on the EFI memory map or, failing that, the
+	 * value firmware left in ar.k0.
+	 *
+	 * Note that in ia32 mode, IN/OUT instructions use ar.k0 to compute
+	 * the port's virtual address, so ia32_load_state() loads it with a
+	 * user virtual address.  But in ia64 mode, glibc uses the
+	 * *physical* address in ar.k0 to mmap the appropriate area from
+	 * /dev/mem, and the inX()/outX() interfaces use MMIO.  In both
+	 * cases, user-mode can only use the legacy 0-64K I/O port space.
+	 *
+	 * ar.k0 is not involved in kernel I/O port accesses, which can use
+	 * any of the I/O port spaces and are done via MMIO using the
+	 * virtual mmio_base from the appropriate io_space[].
+	 */
+	phys_iobase = efi_get_iobase();
+	if (!phys_iobase) {
+		phys_iobase = ia64_get_kr(IA64_KR_IO_BASE);
+		printk(KERN_INFO "No I/O port range found in EFI memory map, "
+			"falling back to AR.KR0 (0x%lx)\n", phys_iobase);
+	}
+	ia64_iobase = (unsigned long) ioremap(phys_iobase, 0);
+	ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase));
+
+	/* setup legacy IO port space */
+	io_space[0].mmio_base = ia64_iobase;
+	io_space[0].sparse = 1;
+	num_io_spaces = 1;
+}
+
+/**
+ * early_console_setup - setup debugging console
+ *
+ * Consoles started here require little enough setup that we can start using
+ * them very early in the boot process, either right after the machine
+ * vector initialization, or even before if the drivers can detect their hw.
+ *
+ * Returns non-zero if a console couldn't be setup.
+ */
+static inline int __init
+early_console_setup (char *cmdline)
+{
+	int earlycons = 0;
+
+#ifdef CONFIG_SERIAL_SGI_L1_CONSOLE
+	{
+		extern int sn_serial_console_early_setup(void);
+		if (!sn_serial_console_early_setup())
+			earlycons++;
+	}
+#endif
+#ifdef CONFIG_EFI_PCDP
+	if (!efi_setup_pcdp_console(cmdline))
+		earlycons++;
+#endif
+	if (!simcons_register())
+		earlycons++;
+
+	return (earlycons) ? 0 : -1;
+}
+
+static inline void
+mark_bsp_online (void)
+{
+#ifdef CONFIG_SMP
+	/* If we register an early console, allow CPU 0 to printk */
+	cpu_set(smp_processor_id(), cpu_online_map);
+#endif
+}
+
+static __initdata int nomca;
+static __init int setup_nomca(char *s)
+{
+	nomca = 1;
+	return 0;
+}
+early_param("nomca", setup_nomca);
+
+#ifdef CONFIG_CRASH_DUMP
+int __init reserve_elfcorehdr(u64 *start, u64 *end)
+{
+	u64 length;
+
+	/* We get the address using the kernel command line,
+	 * but the size is extracted from the EFI tables.
+	 * Both address and size are required for reservation
+	 * to work properly.
+	 */
+
+	if (!is_vmcore_usable())
+		return -EINVAL;
+
+	if ((length = vmcore_find_descriptor_size(elfcorehdr_addr)) == 0) {
+		vmcore_unusable();
+		return -EINVAL;
+	}
+
+	*start = (unsigned long)__va(elfcorehdr_addr);
+	*end = *start + length;
+	return 0;
+}
+
+#endif /* CONFIG_PROC_VMCORE */
+
+void __init
+setup_arch (char **cmdline_p)
+{
+	unw_init();
+
+	paravirt_arch_setup_early();
+
+	ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist);
+	paravirt_patch_apply();
+
+	*cmdline_p = __va(ia64_boot_param->command_line);
+	strlcpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE);
+
+	efi_init();
+	io_port_init();
+
+#ifdef CONFIG_IA64_GENERIC
+	/* machvec needs to be parsed from the command line
+	 * before parse_early_param() is called to ensure
+	 * that ia64_mv is initialised before any command line
+	 * settings may cause console setup to occur
+	 */
+	machvec_init_from_cmdline(*cmdline_p);
+#endif
+
+	parse_early_param();
+
+	if (early_console_setup(*cmdline_p) == 0)
+		mark_bsp_online();
+
+#ifdef CONFIG_ACPI
+	/* Initialize the ACPI boot-time table parser */
+	acpi_table_init();
+	early_acpi_boot_init();
+# ifdef CONFIG_ACPI_NUMA
+	acpi_numa_init();
+#  ifdef CONFIG_ACPI_HOTPLUG_CPU
+	prefill_possible_map();
+#  endif
+	per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ?
+		32 : cpus_weight(early_cpu_possible_map)),
+		additional_cpus > 0 ? additional_cpus : 0);
+# endif
+#endif /* CONFIG_APCI_BOOT */
+
+#ifdef CONFIG_SMP
+	smp_build_cpu_map();
+#endif
+	find_memory();
+
+	/* process SAL system table: */
+	ia64_sal_init(__va(efi.sal_systab));
+
+#ifdef CONFIG_ITANIUM
+	ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist);
+#else
+	{
+		unsigned long num_phys_stacked;
+
+		if (ia64_pal_rse_info(&num_phys_stacked, 0) == 0 && num_phys_stacked > 96)
+			ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist);
+	}
+#endif
+
+#ifdef CONFIG_SMP
+	cpu_physical_id(0) = hard_smp_processor_id();
+#endif
+
+	cpu_init();	/* initialize the bootstrap CPU */
+	mmu_context_init();	/* initialize context_id bitmap */
+
+	paravirt_banner();
+	paravirt_arch_setup_console(cmdline_p);
+
+#ifdef CONFIG_VT
+	if (!conswitchp) {
+# if defined(CONFIG_DUMMY_CONSOLE)
+		conswitchp = &dummy_con;
+# endif
+# if defined(CONFIG_VGA_CONSOLE)
+		/*
+		 * Non-legacy systems may route legacy VGA MMIO range to system
+		 * memory.  vga_con probes the MMIO hole, so memory looks like
+		 * a VGA device to it.  The EFI memory map can tell us if it's
+		 * memory so we can avoid this problem.
+		 */
+		if (efi_mem_type(0xA0000) != EFI_CONVENTIONAL_MEMORY)
+			conswitchp = &vga_con;
+# endif
+	}
+#endif
+
+	/* enable IA-64 Machine Check Abort Handling unless disabled */
+	if (paravirt_arch_setup_nomca())
+		nomca = 1;
+	if (!nomca)
+		ia64_mca_init();
+
+	platform_setup(cmdline_p);
+#ifndef CONFIG_IA64_HP_SIM
+	check_sal_cache_flush();
+#endif
+	paging_init();
+}
+
+/*
+ * Display cpu info for all CPUs.
+ */
+static int
+show_cpuinfo (struct seq_file *m, void *v)
+{
+#ifdef CONFIG_SMP
+#	define lpj	c->loops_per_jiffy
+#	define cpunum	c->cpu
+#else
+#	define lpj	loops_per_jiffy
+#	define cpunum	0
+#endif
+	static struct {
+		unsigned long mask;
+		const char *feature_name;
+	} feature_bits[] = {
+		{ 1UL << 0, "branchlong" },
+		{ 1UL << 1, "spontaneous deferral"},
+		{ 1UL << 2, "16-byte atomic ops" }
+	};
+	char features[128], *cp, *sep;
+	struct cpuinfo_ia64 *c = v;
+	unsigned long mask;
+	unsigned long proc_freq;
+	int i, size;
+
+	mask = c->features;
+
+	/* build the feature string: */
+	memcpy(features, "standard", 9);
+	cp = features;
+	size = sizeof(features);
+	sep = "";
+	for (i = 0; i < ARRAY_SIZE(feature_bits) && size > 1; ++i) {
+		if (mask & feature_bits[i].mask) {
+			cp += snprintf(cp, size, "%s%s", sep,
+				       feature_bits[i].feature_name),
+			sep = ", ";
+			mask &= ~feature_bits[i].mask;
+			size = sizeof(features) - (cp - features);
+		}
+	}
+	if (mask && size > 1) {
+		/* print unknown features as a hex value */
+		snprintf(cp, size, "%s0x%lx", sep, mask);
+	}
+
+	proc_freq = cpufreq_quick_get(cpunum);
+	if (!proc_freq)
+		proc_freq = c->proc_freq / 1000;
+
+	seq_printf(m,
+		   "processor  : %d\n"
+		   "vendor     : %s\n"
+		   "arch       : IA-64\n"
+		   "family     : %u\n"
+		   "model      : %u\n"
+		   "model name : %s\n"
+		   "revision   : %u\n"
+		   "archrev    : %u\n"
+		   "features   : %s\n"
+		   "cpu number : %lu\n"
+		   "cpu regs   : %u\n"
+		   "cpu MHz    : %lu.%03lu\n"
+		   "itc MHz    : %lu.%06lu\n"
+		   "BogoMIPS   : %lu.%02lu\n",
+		   cpunum, c->vendor, c->family, c->model,
+		   c->model_name, c->revision, c->archrev,
+		   features, c->ppn, c->number,
+		   proc_freq / 1000, proc_freq % 1000,
+		   c->itc_freq / 1000000, c->itc_freq % 1000000,
+		   lpj*HZ/500000, (lpj*HZ/5000) % 100);
+#ifdef CONFIG_SMP
+	seq_printf(m, "siblings   : %u\n", cpus_weight(cpu_core_map[cpunum]));
+	if (c->socket_id != -1)
+		seq_printf(m, "physical id: %u\n", c->socket_id);
+	if (c->threads_per_core > 1 || c->cores_per_socket > 1)
+		seq_printf(m,
+			   "core id    : %u\n"
+			   "thread id  : %u\n",
+			   c->core_id, c->thread_id);
+#endif
+	seq_printf(m,"\n");
+
+	return 0;
+}
+
+static void *
+c_start (struct seq_file *m, loff_t *pos)
+{
+#ifdef CONFIG_SMP
+	while (*pos < nr_cpu_ids && !cpu_online(*pos))
+		++*pos;
+#endif
+	return *pos < nr_cpu_ids ? cpu_data(*pos) : NULL;
+}
+
+static void *
+c_next (struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return c_start(m, pos);
+}
+
+static void
+c_stop (struct seq_file *m, void *v)
+{
+}
+
+const struct seq_operations cpuinfo_op = {
+	.start =	c_start,
+	.next =		c_next,
+	.stop =		c_stop,
+	.show =		show_cpuinfo
+};
+
+#define MAX_BRANDS	8
+static char brandname[MAX_BRANDS][128];
+
+static char * __cpuinit
+get_model_name(__u8 family, __u8 model)
+{
+	static int overflow;
+	char brand[128];
+	int i;
+
+	memcpy(brand, "Unknown", 8);
+	if (ia64_pal_get_brand_info(brand)) {
+		if (family == 0x7)
+			memcpy(brand, "Merced", 7);
+		else if (family == 0x1f) switch (model) {
+			case 0: memcpy(brand, "McKinley", 9); break;
+			case 1: memcpy(brand, "Madison", 8); break;
+			case 2: memcpy(brand, "Madison up to 9M cache", 23); break;
+		}
+	}
+	for (i = 0; i < MAX_BRANDS; i++)
+		if (strcmp(brandname[i], brand) == 0)
+			return brandname[i];
+	for (i = 0; i < MAX_BRANDS; i++)
+		if (brandname[i][0] == '\0')
+			return strcpy(brandname[i], brand);
+	if (overflow++ == 0)
+		printk(KERN_ERR
+		       "%s: Table overflow. Some processor model information will be missing\n",
+		       __func__);
+	return "Unknown";
+}
+
+static void __cpuinit
+identify_cpu (struct cpuinfo_ia64 *c)
+{
+	union {
+		unsigned long bits[5];
+		struct {
+			/* id 0 & 1: */
+			char vendor[16];
+
+			/* id 2 */
+			u64 ppn;		/* processor serial number */
+
+			/* id 3: */
+			unsigned number		:  8;
+			unsigned revision	:  8;
+			unsigned model		:  8;
+			unsigned family		:  8;
+			unsigned archrev	:  8;
+			unsigned reserved	: 24;
+
+			/* id 4: */
+			u64 features;
+		} field;
+	} cpuid;
+	pal_vm_info_1_u_t vm1;
+	pal_vm_info_2_u_t vm2;
+	pal_status_t status;
+	unsigned long impl_va_msb = 50, phys_addr_size = 44;	/* Itanium defaults */
+	int i;
+	for (i = 0; i < 5; ++i)
+		cpuid.bits[i] = ia64_get_cpuid(i);
+
+	memcpy(c->vendor, cpuid.field.vendor, 16);
+#ifdef CONFIG_SMP
+	c->cpu = smp_processor_id();
+
+	/* below default values will be overwritten  by identify_siblings() 
+	 * for Multi-Threading/Multi-Core capable CPUs
+	 */
+	c->threads_per_core = c->cores_per_socket = c->num_log = 1;
+	c->socket_id = -1;
+
+	identify_siblings(c);
+
+	if (c->threads_per_core > smp_num_siblings)
+		smp_num_siblings = c->threads_per_core;
+#endif
+	c->ppn = cpuid.field.ppn;
+	c->number = cpuid.field.number;
+	c->revision = cpuid.field.revision;
+	c->model = cpuid.field.model;
+	c->family = cpuid.field.family;
+	c->archrev = cpuid.field.archrev;
+	c->features = cpuid.field.features;
+	c->model_name = get_model_name(c->family, c->model);
+
+	status = ia64_pal_vm_summary(&vm1, &vm2);
+	if (status == PAL_STATUS_SUCCESS) {
+		impl_va_msb = vm2.pal_vm_info_2_s.impl_va_msb;
+		phys_addr_size = vm1.pal_vm_info_1_s.phys_add_size;
+	}
+	c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1));
+	c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1));
+}
+
+/*
+ * Do the following calculations:
+ *
+ * 1. the max. cache line size.
+ * 2. the minimum of the i-cache stride sizes for "flush_icache_range()".
+ * 3. the minimum of the cache stride sizes for "clflush_cache_range()".
+ */
+static void __cpuinit
+get_cache_info(void)
+{
+	unsigned long line_size, max = 1;
+	unsigned long l, levels, unique_caches;
+	pal_cache_config_info_t cci;
+	long status;
+
+        status = ia64_pal_cache_summary(&levels, &unique_caches);
+        if (status != 0) {
+                printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n",
+                       __func__, status);
+                max = SMP_CACHE_BYTES;
+		/* Safest setup for "flush_icache_range()" */
+		ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT;
+		/* Safest setup for "clflush_cache_range()" */
+		ia64_cache_stride_shift = CACHE_STRIDE_SHIFT;
+		goto out;
+        }
+
+	for (l = 0; l < levels; ++l) {
+		/* cache_type (data_or_unified)=2 */
+		status = ia64_pal_cache_config_info(l, 2, &cci);
+		if (status != 0) {
+			printk(KERN_ERR "%s: ia64_pal_cache_config_info"
+				"(l=%lu, 2) failed (status=%ld)\n",
+				__func__, l, status);
+			max = SMP_CACHE_BYTES;
+			/* The safest setup for "flush_icache_range()" */
+			cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
+			/* The safest setup for "clflush_cache_range()" */
+			ia64_cache_stride_shift = CACHE_STRIDE_SHIFT;
+			cci.pcci_unified = 1;
+		} else {
+			if (cci.pcci_stride < ia64_cache_stride_shift)
+				ia64_cache_stride_shift = cci.pcci_stride;
+
+			line_size = 1 << cci.pcci_line_size;
+			if (line_size > max)
+				max = line_size;
+		}
+
+		if (!cci.pcci_unified) {
+			/* cache_type (instruction)=1*/
+			status = ia64_pal_cache_config_info(l, 1, &cci);
+			if (status != 0) {
+				printk(KERN_ERR "%s: ia64_pal_cache_config_info"
+					"(l=%lu, 1) failed (status=%ld)\n",
+					__func__, l, status);
+				/* The safest setup for flush_icache_range() */
+				cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
+			}
+		}
+		if (cci.pcci_stride < ia64_i_cache_stride_shift)
+			ia64_i_cache_stride_shift = cci.pcci_stride;
+	}
+  out:
+	if (max > ia64_max_cacheline_size)
+		ia64_max_cacheline_size = max;
+}
+
+/*
+ * cpu_init() initializes state that is per-CPU.  This function acts
+ * as a 'CPU state barrier', nothing should get across.
+ */
+void __cpuinit
+cpu_init (void)
+{
+	extern void __cpuinit ia64_mmu_init (void *);
+	static unsigned long max_num_phys_stacked = IA64_NUM_PHYS_STACK_REG;
+	unsigned long num_phys_stacked;
+	pal_vm_info_2_u_t vmi;
+	unsigned int max_ctx;
+	struct cpuinfo_ia64 *cpu_info;
+	void *cpu_data;
+
+	cpu_data = per_cpu_init();
+#ifdef CONFIG_SMP
+	/*
+	 * insert boot cpu into sibling and core mapes
+	 * (must be done after per_cpu area is setup)
+	 */
+	if (smp_processor_id() == 0) {
+		cpu_set(0, per_cpu(cpu_sibling_map, 0));
+		cpu_set(0, cpu_core_map[0]);
+	} else {
+		/*
+		 * Set ar.k3 so that assembly code in MCA handler can compute
+		 * physical addresses of per cpu variables with a simple:
+		 *   phys = ar.k3 + &per_cpu_var
+		 * and the alt-dtlb-miss handler can set per-cpu mapping into
+		 * the TLB when needed. head.S already did this for cpu0.
+		 */
+		ia64_set_kr(IA64_KR_PER_CPU_DATA,
+			    ia64_tpa(cpu_data) - (long) __per_cpu_start);
+	}
+#endif
+
+	get_cache_info();
+
+	/*
+	 * We can't pass "local_cpu_data" to identify_cpu() because we haven't called
+	 * ia64_mmu_init() yet.  And we can't call ia64_mmu_init() first because it
+	 * depends on the data returned by identify_cpu().  We break the dependency by
+	 * accessing cpu_data() through the canonical per-CPU address.
+	 */
+	cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(ia64_cpu_info) - __per_cpu_start);
+	identify_cpu(cpu_info);
+
+#ifdef CONFIG_MCKINLEY
+	{
+#		define FEATURE_SET 16
+		struct ia64_pal_retval iprv;
+
+		if (cpu_info->family == 0x1f) {
+			PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, FEATURE_SET, 0);
+			if ((iprv.status == 0) && (iprv.v0 & 0x80) && (iprv.v2 & 0x80))
+				PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES,
+				              (iprv.v1 | 0x80), FEATURE_SET, 0);
+		}
+	}
+#endif
+
+	/* Clear the stack memory reserved for pt_regs: */
+	memset(task_pt_regs(current), 0, sizeof(struct pt_regs));
+
+	ia64_set_kr(IA64_KR_FPU_OWNER, 0);
+
+	/*
+	 * Initialize the page-table base register to a global
+	 * directory with all zeroes.  This ensure that we can handle
+	 * TLB-misses to user address-space even before we created the
+	 * first user address-space.  This may happen, e.g., due to
+	 * aggressive use of lfetch.fault.
+	 */
+	ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page)));
+
+	/*
+	 * Initialize default control register to defer speculative faults except
+	 * for those arising from TLB misses, which are not deferred.  The
+	 * kernel MUST NOT depend on a particular setting of these bits (in other words,
+	 * the kernel must have recovery code for all speculative accesses).  Turn on
+	 * dcr.lc as per recommendation by the architecture team.  Most IA-32 apps
+	 * shouldn't be affected by this (moral: keep your ia32 locks aligned and you'll
+	 * be fine).
+	 */
+	ia64_setreg(_IA64_REG_CR_DCR,  (  IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR
+					| IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC));
+	atomic_inc(&init_mm.mm_count);
+	current->active_mm = &init_mm;
+	BUG_ON(current->mm);
+
+	ia64_mmu_init(ia64_imva(cpu_data));
+	ia64_mca_cpu_init(ia64_imva(cpu_data));
+
+	/* Clear ITC to eliminate sched_clock() overflows in human time.  */
+	ia64_set_itc(0);
+
+	/* disable all local interrupt sources: */
+	ia64_set_itv(1 << 16);
+	ia64_set_lrr0(1 << 16);
+	ia64_set_lrr1(1 << 16);
+	ia64_setreg(_IA64_REG_CR_PMV, 1 << 16);
+	ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16);
+
+	/* clear TPR & XTP to enable all interrupt classes: */
+	ia64_setreg(_IA64_REG_CR_TPR, 0);
+
+	/* Clear any pending interrupts left by SAL/EFI */
+	while (ia64_get_ivr() != IA64_SPURIOUS_INT_VECTOR)
+		ia64_eoi();
+
+#ifdef CONFIG_SMP
+	normal_xtp();
+#endif
+
+	/* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
+	if (ia64_pal_vm_summary(NULL, &vmi) == 0) {
+		max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
+		setup_ptcg_sem(vmi.pal_vm_info_2_s.max_purges, NPTCG_FROM_PAL);
+	} else {
+		printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
+		max_ctx = (1U << 15) - 1;	/* use architected minimum */
+	}
+	while (max_ctx < ia64_ctx.max_ctx) {
+		unsigned int old = ia64_ctx.max_ctx;
+		if (cmpxchg(&ia64_ctx.max_ctx, old, max_ctx) == old)
+			break;
+	}
+
+	if (ia64_pal_rse_info(&num_phys_stacked, NULL) != 0) {
+		printk(KERN_WARNING "cpu_init: PAL RSE info failed; assuming 96 physical "
+		       "stacked regs\n");
+		num_phys_stacked = 96;
+	}
+	/* size of physical stacked register partition plus 8 bytes: */
+	if (num_phys_stacked > max_num_phys_stacked) {
+		ia64_patch_phys_stack_reg(num_phys_stacked*8 + 8);
+		max_num_phys_stacked = num_phys_stacked;
+	}
+	platform_cpu_init();
+	pm_idle = default_idle;
+}
+
+void __init
+check_bugs (void)
+{
+	ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles,
+			       (unsigned long) __end___mckinley_e9_bundles);
+}
+
+static int __init run_dmi_scan(void)
+{
+	dmi_scan_machine();
+	return 0;
+}
+core_initcall(run_dmi_scan);
diff --git a/arch/ia64/kernel/sigframe.h b/arch/ia64/kernel/sigframe.h
new file mode 100644
index 00000000..9fd9a193
--- /dev/null
+++ b/arch/ia64/kernel/sigframe.h
@@ -0,0 +1,25 @@
+struct sigscratch {
+	unsigned long scratch_unat;	/* ar.unat for the general registers saved in pt */
+	unsigned long ar_pfs;		/* for syscalls, the user-level function-state  */
+	struct pt_regs pt;
+};
+
+struct sigframe {
+	/*
+	 * Place signal handler args where user-level unwinder can find them easily.
+	 * DO NOT MOVE THESE.  They are part of the IA-64 Linux ABI and there is
+	 * user-level code that depends on their presence!
+	 */
+	unsigned long arg0;		/* signum */
+	unsigned long arg1;		/* siginfo pointer */
+	unsigned long arg2;		/* sigcontext pointer */
+	/*
+	 * End of architected state.
+	 */
+
+	void __user *handler;		/* pointer to the plabel of the signal handler */
+	struct siginfo info;
+	struct sigcontext sc;
+};
+
+extern void ia64_do_signal (struct sigscratch *, long);
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
new file mode 100644
index 00000000..7bdafc87
--- /dev/null
+++ b/arch/ia64/kernel/signal.c
@@ -0,0 +1,556 @@
+/*
+ * Architecture-specific signal handling support.
+ *
+ * Copyright (C) 1999-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Derived from i386 and Alpha versions.
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/ptrace.h>
+#include <linux/tracehook.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/binfmts.h>
+#include <linux/unistd.h>
+#include <linux/wait.h>
+
+#include <asm/intrinsics.h>
+#include <asm/uaccess.h>
+#include <asm/rse.h>
+#include <asm/sigcontext.h>
+
+#include "sigframe.h"
+
+#define DEBUG_SIG	0
+#define STACK_ALIGN	16		/* minimal alignment for stack pointer */
+#define _BLOCKABLE	(~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+#if _NSIG_WORDS > 1
+# define PUT_SIGSET(k,u)	__copy_to_user((u)->sig, (k)->sig, sizeof(sigset_t))
+# define GET_SIGSET(k,u)	__copy_from_user((k)->sig, (u)->sig, sizeof(sigset_t))
+#else
+# define PUT_SIGSET(k,u)	__put_user((k)->sig[0], &(u)->sig[0])
+# define GET_SIGSET(k,u)	__get_user((k)->sig[0], &(u)->sig[0])
+#endif
+
+asmlinkage long
+sys_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, long arg2,
+		 long arg3, long arg4, long arg5, long arg6, long arg7,
+		 struct pt_regs regs)
+{
+	return do_sigaltstack(uss, uoss, regs.r12);
+}
+
+static long
+restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
+{
+	unsigned long ip, flags, nat, um, cfm, rsc;
+	long err;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+	/* restore scratch that always needs gets updated during signal delivery: */
+	err  = __get_user(flags, &sc->sc_flags);
+	err |= __get_user(nat, &sc->sc_nat);
+	err |= __get_user(ip, &sc->sc_ip);			/* instruction pointer */
+	err |= __get_user(cfm, &sc->sc_cfm);
+	err |= __get_user(um, &sc->sc_um);			/* user mask */
+	err |= __get_user(rsc, &sc->sc_ar_rsc);
+	err |= __get_user(scr->pt.ar_unat, &sc->sc_ar_unat);
+	err |= __get_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr);
+	err |= __get_user(scr->pt.ar_pfs, &sc->sc_ar_pfs);
+	err |= __get_user(scr->pt.pr, &sc->sc_pr);		/* predicates */
+	err |= __get_user(scr->pt.b0, &sc->sc_br[0]);		/* b0 (rp) */
+	err |= __get_user(scr->pt.b6, &sc->sc_br[6]);		/* b6 */
+	err |= __copy_from_user(&scr->pt.r1, &sc->sc_gr[1], 8);	/* r1 */
+	err |= __copy_from_user(&scr->pt.r8, &sc->sc_gr[8], 4*8);	/* r8-r11 */
+	err |= __copy_from_user(&scr->pt.r12, &sc->sc_gr[12], 2*8);	/* r12-r13 */
+	err |= __copy_from_user(&scr->pt.r15, &sc->sc_gr[15], 8);	/* r15 */
+
+	scr->pt.cr_ifs = cfm | (1UL << 63);
+	scr->pt.ar_rsc = rsc | (3 << 2); /* force PL3 */
+
+	/* establish new instruction pointer: */
+	scr->pt.cr_iip = ip & ~0x3UL;
+	ia64_psr(&scr->pt)->ri = ip & 0x3;
+	scr->pt.cr_ipsr = (scr->pt.cr_ipsr & ~IA64_PSR_UM) | (um & IA64_PSR_UM);
+
+	scr->scratch_unat = ia64_put_scratch_nat_bits(&scr->pt, nat);
+
+	if (!(flags & IA64_SC_FLAG_IN_SYSCALL)) {
+		/* Restore most scratch-state only when not in syscall. */
+		err |= __get_user(scr->pt.ar_ccv, &sc->sc_ar_ccv);		/* ar.ccv */
+		err |= __get_user(scr->pt.b7, &sc->sc_br[7]);			/* b7 */
+		err |= __get_user(scr->pt.r14, &sc->sc_gr[14]);			/* r14 */
+		err |= __copy_from_user(&scr->pt.ar_csd, &sc->sc_ar25, 2*8); /* ar.csd & ar.ssd */
+		err |= __copy_from_user(&scr->pt.r2, &sc->sc_gr[2], 2*8);	/* r2-r3 */
+		err |= __copy_from_user(&scr->pt.r16, &sc->sc_gr[16], 16*8);	/* r16-r31 */
+	}
+
+	if ((flags & IA64_SC_FLAG_FPH_VALID) != 0) {
+		struct ia64_psr *psr = ia64_psr(&scr->pt);
+
+		err |= __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16);
+		psr->mfh = 0;	/* drop signal handler's fph contents... */
+		preempt_disable();
+		if (psr->dfh)
+			ia64_drop_fpu(current);
+		else {
+			/* We already own the local fph, otherwise psr->dfh wouldn't be 0.  */
+			__ia64_load_fpu(current->thread.fph);
+			ia64_set_local_fpu_owner(current);
+		}
+		preempt_enable();
+	}
+	return err;
+}
+
+int
+copy_siginfo_to_user (siginfo_t __user *to, siginfo_t *from)
+{
+	if (!access_ok(VERIFY_WRITE, to, sizeof(siginfo_t)))
+		return -EFAULT;
+	if (from->si_code < 0) {
+		if (__copy_to_user(to, from, sizeof(siginfo_t)))
+			return -EFAULT;
+		return 0;
+	} else {
+		int err;
+
+		/*
+		 * If you change siginfo_t structure, please be sure this code is fixed
+		 * accordingly.  It should never copy any pad contained in the structure
+		 * to avoid security leaks, but must copy the generic 3 ints plus the
+		 * relevant union member.
+		 */
+		err = __put_user(from->si_signo, &to->si_signo);
+		err |= __put_user(from->si_errno, &to->si_errno);
+		err |= __put_user((short)from->si_code, &to->si_code);
+		switch (from->si_code >> 16) {
+		      case __SI_FAULT >> 16:
+			err |= __put_user(from->si_flags, &to->si_flags);
+			err |= __put_user(from->si_isr, &to->si_isr);
+		      case __SI_POLL >> 16:
+			err |= __put_user(from->si_addr, &to->si_addr);
+			err |= __put_user(from->si_imm, &to->si_imm);
+			break;
+		      case __SI_TIMER >> 16:
+			err |= __put_user(from->si_tid, &to->si_tid);
+			err |= __put_user(from->si_overrun, &to->si_overrun);
+			err |= __put_user(from->si_ptr, &to->si_ptr);
+			break;
+		      case __SI_RT >> 16:	/* Not generated by the kernel as of now.  */
+		      case __SI_MESGQ >> 16:
+			err |= __put_user(from->si_uid, &to->si_uid);
+			err |= __put_user(from->si_pid, &to->si_pid);
+			err |= __put_user(from->si_ptr, &to->si_ptr);
+			break;
+		      case __SI_CHLD >> 16:
+			err |= __put_user(from->si_utime, &to->si_utime);
+			err |= __put_user(from->si_stime, &to->si_stime);
+			err |= __put_user(from->si_status, &to->si_status);
+		      default:
+			err |= __put_user(from->si_uid, &to->si_uid);
+			err |= __put_user(from->si_pid, &to->si_pid);
+			break;
+		}
+		return err;
+	}
+}
+
+long
+ia64_rt_sigreturn (struct sigscratch *scr)
+{
+	extern char ia64_strace_leave_kernel, ia64_leave_kernel;
+	struct sigcontext __user *sc;
+	struct siginfo si;
+	sigset_t set;
+	long retval;
+
+	sc = &((struct sigframe __user *) (scr->pt.r12 + 16))->sc;
+
+	/*
+	 * When we return to the previously executing context, r8 and r10 have already
+	 * been setup the way we want them.  Indeed, if the signal wasn't delivered while
+	 * in a system call, we must not touch r8 or r10 as otherwise user-level state
+	 * could be corrupted.
+	 */
+	retval = (long) &ia64_leave_kernel;
+	if (test_thread_flag(TIF_SYSCALL_TRACE)
+	    || test_thread_flag(TIF_SYSCALL_AUDIT))
+		/*
+		 * strace expects to be notified after sigreturn returns even though the
+		 * context to which we return may not be in the middle of a syscall.
+		 * Thus, the return-value that strace displays for sigreturn is
+		 * meaningless.
+		 */
+		retval = (long) &ia64_strace_leave_kernel;
+
+	if (!access_ok(VERIFY_READ, sc, sizeof(*sc)))
+		goto give_sigsegv;
+
+	if (GET_SIGSET(&set, &sc->sc_mask))
+		goto give_sigsegv;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+
+	spin_lock_irq(&current->sighand->siglock);
+	{
+		current->blocked = set;
+		recalc_sigpending();
+	}
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (restore_sigcontext(sc, scr))
+		goto give_sigsegv;
+
+#if DEBUG_SIG
+	printk("SIG return (%s:%d): sp=%lx ip=%lx\n",
+	       current->comm, current->pid, scr->pt.r12, scr->pt.cr_iip);
+#endif
+	/*
+	 * It is more difficult to avoid calling this function than to
+	 * call it and ignore errors.
+	 */
+	do_sigaltstack(&sc->sc_stack, NULL, scr->pt.r12);
+	return retval;
+
+  give_sigsegv:
+	si.si_signo = SIGSEGV;
+	si.si_errno = 0;
+	si.si_code = SI_KERNEL;
+	si.si_pid = task_pid_vnr(current);
+	si.si_uid = current_uid();
+	si.si_addr = sc;
+	force_sig_info(SIGSEGV, &si, current);
+	return retval;
+}
+
+/*
+ * This does just the minimum required setup of sigcontext.
+ * Specifically, it only installs data that is either not knowable at
+ * the user-level or that gets modified before execution in the
+ * trampoline starts.  Everything else is done at the user-level.
+ */
+static long
+setup_sigcontext (struct sigcontext __user *sc, sigset_t *mask, struct sigscratch *scr)
+{
+	unsigned long flags = 0, ifs, cfm, nat;
+	long err = 0;
+
+	ifs = scr->pt.cr_ifs;
+
+	if (on_sig_stack((unsigned long) sc))
+		flags |= IA64_SC_FLAG_ONSTACK;
+	if ((ifs & (1UL << 63)) == 0)
+		/* if cr_ifs doesn't have the valid bit set, we got here through a syscall */
+		flags |= IA64_SC_FLAG_IN_SYSCALL;
+	cfm = ifs & ((1UL << 38) - 1);
+	ia64_flush_fph(current);
+	if ((current->thread.flags & IA64_THREAD_FPH_VALID)) {
+		flags |= IA64_SC_FLAG_FPH_VALID;
+		err = __copy_to_user(&sc->sc_fr[32], current->thread.fph, 96*16);
+	}
+
+	nat = ia64_get_scratch_nat_bits(&scr->pt, scr->scratch_unat);
+
+	err |= __put_user(flags, &sc->sc_flags);
+	err |= __put_user(nat, &sc->sc_nat);
+	err |= PUT_SIGSET(mask, &sc->sc_mask);
+	err |= __put_user(cfm, &sc->sc_cfm);
+	err |= __put_user(scr->pt.cr_ipsr & IA64_PSR_UM, &sc->sc_um);
+	err |= __put_user(scr->pt.ar_rsc, &sc->sc_ar_rsc);
+	err |= __put_user(scr->pt.ar_unat, &sc->sc_ar_unat);		/* ar.unat */
+	err |= __put_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr);		/* ar.fpsr */
+	err |= __put_user(scr->pt.ar_pfs, &sc->sc_ar_pfs);
+	err |= __put_user(scr->pt.pr, &sc->sc_pr);			/* predicates */
+	err |= __put_user(scr->pt.b0, &sc->sc_br[0]);			/* b0 (rp) */
+	err |= __put_user(scr->pt.b6, &sc->sc_br[6]);			/* b6 */
+	err |= __copy_to_user(&sc->sc_gr[1], &scr->pt.r1, 8);		/* r1 */
+	err |= __copy_to_user(&sc->sc_gr[8], &scr->pt.r8, 4*8);		/* r8-r11 */
+	err |= __copy_to_user(&sc->sc_gr[12], &scr->pt.r12, 2*8);	/* r12-r13 */
+	err |= __copy_to_user(&sc->sc_gr[15], &scr->pt.r15, 8);		/* r15 */
+	err |= __put_user(scr->pt.cr_iip + ia64_psr(&scr->pt)->ri, &sc->sc_ip);
+
+	if (!(flags & IA64_SC_FLAG_IN_SYSCALL)) {
+		/* Copy scratch regs to sigcontext if the signal didn't interrupt a syscall. */
+		err |= __put_user(scr->pt.ar_ccv, &sc->sc_ar_ccv);		/* ar.ccv */
+		err |= __put_user(scr->pt.b7, &sc->sc_br[7]);			/* b7 */
+		err |= __put_user(scr->pt.r14, &sc->sc_gr[14]);			/* r14 */
+		err |= __copy_to_user(&sc->sc_ar25, &scr->pt.ar_csd, 2*8); /* ar.csd & ar.ssd */
+		err |= __copy_to_user(&sc->sc_gr[2], &scr->pt.r2, 2*8);		/* r2-r3 */
+		err |= __copy_to_user(&sc->sc_gr[16], &scr->pt.r16, 16*8);	/* r16-r31 */
+	}
+	return err;
+}
+
+/*
+ * Check whether the register-backing store is already on the signal stack.
+ */
+static inline int
+rbs_on_sig_stack (unsigned long bsp)
+{
+	return (bsp - current->sas_ss_sp < current->sas_ss_size);
+}
+
+static long
+force_sigsegv_info (int sig, void __user *addr)
+{
+	unsigned long flags;
+	struct siginfo si;
+
+	if (sig == SIGSEGV) {
+		/*
+		 * Acquiring siglock around the sa_handler-update is almost
+		 * certainly overkill, but this isn't a
+		 * performance-critical path and I'd rather play it safe
+		 * here than having to debug a nasty race if and when
+		 * something changes in kernel/signal.c that would make it
+		 * no longer safe to modify sa_handler without holding the
+		 * lock.
+		 */
+		spin_lock_irqsave(&current->sighand->siglock, flags);
+		current->sighand->action[sig - 1].sa.sa_handler = SIG_DFL;
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
+	}
+	si.si_signo = SIGSEGV;
+	si.si_errno = 0;
+	si.si_code = SI_KERNEL;
+	si.si_pid = task_pid_vnr(current);
+	si.si_uid = current_uid();
+	si.si_addr = addr;
+	force_sig_info(SIGSEGV, &si, current);
+	return 0;
+}
+
+static long
+setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set,
+	     struct sigscratch *scr)
+{
+	extern char __kernel_sigtramp[];
+	unsigned long tramp_addr, new_rbs = 0, new_sp;
+	struct sigframe __user *frame;
+	long err;
+
+	new_sp = scr->pt.r12;
+	tramp_addr = (unsigned long) __kernel_sigtramp;
+	if (ka->sa.sa_flags & SA_ONSTACK) {
+		int onstack = sas_ss_flags(new_sp);
+
+		if (onstack == 0) {
+			new_sp = current->sas_ss_sp + current->sas_ss_size;
+			/*
+			 * We need to check for the register stack being on the
+			 * signal stack separately, because it's switched
+			 * separately (memory stack is switched in the kernel,
+			 * register stack is switched in the signal trampoline).
+			 */
+			if (!rbs_on_sig_stack(scr->pt.ar_bspstore))
+				new_rbs = ALIGN(current->sas_ss_sp,
+						sizeof(long));
+		} else if (onstack == SS_ONSTACK) {
+			unsigned long check_sp;
+
+			/*
+			 * If we are on the alternate signal stack and would
+			 * overflow it, don't. Return an always-bogus address
+			 * instead so we will die with SIGSEGV.
+			 */
+			check_sp = (new_sp - sizeof(*frame)) & -STACK_ALIGN;
+			if (!likely(on_sig_stack(check_sp)))
+				return force_sigsegv_info(sig, (void __user *)
+							  check_sp);
+		}
+	}
+	frame = (void __user *) ((new_sp - sizeof(*frame)) & -STACK_ALIGN);
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		return force_sigsegv_info(sig, frame);
+
+	err  = __put_user(sig, &frame->arg0);
+	err |= __put_user(&frame->info, &frame->arg1);
+	err |= __put_user(&frame->sc, &frame->arg2);
+	err |= __put_user(new_rbs, &frame->sc.sc_rbs_base);
+	err |= __put_user(0, &frame->sc.sc_loadrs);	/* initialize to zero */
+	err |= __put_user(ka->sa.sa_handler, &frame->handler);
+
+	err |= copy_siginfo_to_user(&frame->info, info);
+
+	err |= __put_user(current->sas_ss_sp, &frame->sc.sc_stack.ss_sp);
+	err |= __put_user(current->sas_ss_size, &frame->sc.sc_stack.ss_size);
+	err |= __put_user(sas_ss_flags(scr->pt.r12), &frame->sc.sc_stack.ss_flags);
+	err |= setup_sigcontext(&frame->sc, set, scr);
+
+	if (unlikely(err))
+		return force_sigsegv_info(sig, frame);
+
+	scr->pt.r12 = (unsigned long) frame - 16;	/* new stack pointer */
+	scr->pt.ar_fpsr = FPSR_DEFAULT;			/* reset fpsr for signal handler */
+	scr->pt.cr_iip = tramp_addr;
+	ia64_psr(&scr->pt)->ri = 0;			/* start executing in first slot */
+	ia64_psr(&scr->pt)->be = 0;			/* force little-endian byte-order */
+	/*
+	 * Force the interruption function mask to zero.  This has no effect when a
+	 * system-call got interrupted by a signal (since, in that case, scr->pt_cr_ifs is
+	 * ignored), but it has the desirable effect of making it possible to deliver a
+	 * signal with an incomplete register frame (which happens when a mandatory RSE
+	 * load faults).  Furthermore, it has no negative effect on the getting the user's
+	 * dirty partition preserved, because that's governed by scr->pt.loadrs.
+	 */
+	scr->pt.cr_ifs = (1UL << 63);
+
+	/*
+	 * Note: this affects only the NaT bits of the scratch regs (the ones saved in
+	 * pt_regs), which is exactly what we want.
+	 */
+	scr->scratch_unat = 0; /* ensure NaT bits of r12 is clear */
+
+#if DEBUG_SIG
+	printk("SIG deliver (%s:%d): sig=%d sp=%lx ip=%lx handler=%p\n",
+	       current->comm, current->pid, sig, scr->pt.r12, frame->sc.sc_ip, frame->handler);
+#endif
+	return 1;
+}
+
+static long
+handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset,
+	       struct sigscratch *scr)
+{
+	if (!setup_frame(sig, ka, info, oldset, scr))
+		return 0;
+
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
+		sigaddset(&current->blocked, sig);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	/*
+	 * Let tracing know that we've done the handler setup.
+	 */
+	tracehook_signal_handler(sig, info, ka, &scr->pt,
+				 test_thread_flag(TIF_SINGLESTEP));
+
+	return 1;
+}
+
+/*
+ * Note that `init' is a special process: it doesn't get signals it doesn't want to
+ * handle.  Thus you cannot kill init even with a SIGKILL even by mistake.
+ */
+void
+ia64_do_signal (struct sigscratch *scr, long in_syscall)
+{
+	struct k_sigaction ka;
+	sigset_t *oldset;
+	siginfo_t info;
+	long restart = in_syscall;
+	long errno = scr->pt.r8;
+
+	/*
+	 * In the ia64_leave_kernel code path, we want the common case to go fast, which
+	 * is why we may in certain cases get here from kernel mode. Just return without
+	 * doing anything if so.
+	 */
+	if (!user_mode(&scr->pt))
+		return;
+
+	if (current_thread_info()->status & TS_RESTORE_SIGMASK)
+		oldset = &current->saved_sigmask;
+	else
+		oldset = &current->blocked;
+
+	/*
+	 * This only loops in the rare cases of handle_signal() failing, in which case we
+	 * need to push through a forced SIGSEGV.
+	 */
+	while (1) {
+		int signr = get_signal_to_deliver(&info, &ka, &scr->pt, NULL);
+
+		/*
+		 * get_signal_to_deliver() may have run a debugger (via notify_parent())
+		 * and the debugger may have modified the state (e.g., to arrange for an
+		 * inferior call), thus it's important to check for restarting _after_
+		 * get_signal_to_deliver().
+		 */
+		if ((long) scr->pt.r10 != -1)
+			/*
+			 * A system calls has to be restarted only if one of the error codes
+			 * ERESTARTNOHAND, ERESTARTSYS, or ERESTARTNOINTR is returned.  If r10
+			 * isn't -1 then r8 doesn't hold an error code and we don't need to
+			 * restart the syscall, so we can clear the "restart" flag here.
+			 */
+			restart = 0;
+
+		if (signr <= 0)
+			break;
+
+		if (unlikely(restart)) {
+			switch (errno) {
+			      case ERESTART_RESTARTBLOCK:
+			      case ERESTARTNOHAND:
+				scr->pt.r8 = EINTR;
+				/* note: scr->pt.r10 is already -1 */
+				break;
+
+			      case ERESTARTSYS:
+				if ((ka.sa.sa_flags & SA_RESTART) == 0) {
+					scr->pt.r8 = EINTR;
+					/* note: scr->pt.r10 is already -1 */
+					break;
+				}
+			      case ERESTARTNOINTR:
+				ia64_decrement_ip(&scr->pt);
+				restart = 0; /* don't restart twice if handle_signal() fails... */
+			}
+		}
+
+		/*
+		 * Whee!  Actually deliver the signal.  If the delivery failed, we need to
+		 * continue to iterate in this loop so we can deliver the SIGSEGV...
+		 */
+		if (handle_signal(signr, &ka, &info, oldset, scr)) {
+			/*
+			 * A signal was successfully delivered; the saved
+			 * sigmask will have been stored in the signal frame,
+			 * and will be restored by sigreturn, so we can simply
+			 * clear the TS_RESTORE_SIGMASK flag.
+			 */
+			current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+			return;
+		}
+	}
+
+	/* Did we come from a system call? */
+	if (restart) {
+		/* Restart the system call - no handlers present */
+		if (errno == ERESTARTNOHAND || errno == ERESTARTSYS || errno == ERESTARTNOINTR
+		    || errno == ERESTART_RESTARTBLOCK)
+		{
+			/*
+			 * Note: the syscall number is in r15 which is saved in
+			 * pt_regs so all we need to do here is adjust ip so that
+			 * the "break" instruction gets re-executed.
+			 */
+			ia64_decrement_ip(&scr->pt);
+			if (errno == ERESTART_RESTARTBLOCK)
+				scr->pt.r15 = __NR_restart_syscall;
+		}
+	}
+
+	/* if there's no signal to deliver, we just put the saved sigmask
+	 * back */
+	if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
+		current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+	}
+}
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
new file mode 100644
index 00000000..be450a3e
--- /dev/null
+++ b/arch/ia64/kernel/smp.c
@@ -0,0 +1,343 @@
+/*
+ * SMP Support
+ *
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Lots of stuff stolen from arch/alpha/kernel/smp.c
+ *
+ * 01/05/16 Rohit Seth <rohit.seth@intel.com>  IA64-SMP functions. Reorganized
+ * the existing code (on the lines of x86 port).
+ * 00/09/11 David Mosberger <davidm@hpl.hp.com> Do loops_per_jiffy
+ * calibration on each CPU.
+ * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> fixed logical processor id
+ * 00/03/31 Rohit Seth <rohit.seth@intel.com>	Fixes for Bootstrap Processor
+ * & cpu_online_map now gets done here (instead of setup.c)
+ * 99/10/05 davidm	Update to bring it in sync with new command-line processing
+ *  scheme.
+ * 10/13/00 Goutham Rao <goutham.rao@intel.com> Updated smp_call_function and
+ *		smp_call_function_single to resend IPI on timeouts
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/kernel_stat.h>
+#include <linux/mm.h>
+#include <linux/cache.h>
+#include <linux/delay.h>
+#include <linux/efi.h>
+#include <linux/bitops.h>
+#include <linux/kexec.h>
+
+#include <asm/atomic.h>
+#include <asm/current.h>
+#include <asm/delay.h>
+#include <asm/machvec.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+#include <asm/tlbflush.h>
+#include <asm/unistd.h>
+#include <asm/mca.h>
+
+/*
+ * Note: alignment of 4 entries/cacheline was empirically determined
+ * to be a good tradeoff between hot cachelines & spreading the array
+ * across too many cacheline.
+ */
+static struct local_tlb_flush_counts {
+	unsigned int count;
+} __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS];
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned short [NR_CPUS],
+				     shadow_flush_counts);
+
+#define IPI_CALL_FUNC		0
+#define IPI_CPU_STOP		1
+#define IPI_CALL_FUNC_SINGLE	2
+#define IPI_KDUMP_CPU_STOP	3
+
+/* This needs to be cacheline aligned because it is written to by *other* CPUs.  */
+static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, ipi_operation);
+
+extern void cpu_halt (void);
+
+static void
+stop_this_cpu(void)
+{
+	/*
+	 * Remove this CPU:
+	 */
+	cpu_clear(smp_processor_id(), cpu_online_map);
+	max_xtp();
+	local_irq_disable();
+	cpu_halt();
+}
+
+void
+cpu_die(void)
+{
+	max_xtp();
+	local_irq_disable();
+	cpu_halt();
+	/* Should never be here */
+	BUG();
+	for (;;);
+}
+
+irqreturn_t
+handle_IPI (int irq, void *dev_id)
+{
+	int this_cpu = get_cpu();
+	unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation);
+	unsigned long ops;
+
+	mb();	/* Order interrupt and bit testing. */
+	while ((ops = xchg(pending_ipis, 0)) != 0) {
+		mb();	/* Order bit clearing and data access. */
+		do {
+			unsigned long which;
+
+			which = ffz(~ops);
+			ops &= ~(1 << which);
+
+			switch (which) {
+			case IPI_CPU_STOP:
+				stop_this_cpu();
+				break;
+			case IPI_CALL_FUNC:
+				generic_smp_call_function_interrupt();
+				break;
+			case IPI_CALL_FUNC_SINGLE:
+				generic_smp_call_function_single_interrupt();
+				break;
+#ifdef CONFIG_KEXEC
+			case IPI_KDUMP_CPU_STOP:
+				unw_init_running(kdump_cpu_freeze, NULL);
+				break;
+#endif
+			default:
+				printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n",
+						this_cpu, which);
+				break;
+			}
+		} while (ops);
+		mb();	/* Order data access and bit testing. */
+	}
+	put_cpu();
+	return IRQ_HANDLED;
+}
+
+
+
+/*
+ * Called with preemption disabled.
+ */
+static inline void
+send_IPI_single (int dest_cpu, int op)
+{
+	set_bit(op, &per_cpu(ipi_operation, dest_cpu));
+	platform_send_ipi(dest_cpu, IA64_IPI_VECTOR, IA64_IPI_DM_INT, 0);
+}
+
+/*
+ * Called with preemption disabled.
+ */
+static inline void
+send_IPI_allbutself (int op)
+{
+	unsigned int i;
+
+	for_each_online_cpu(i) {
+		if (i != smp_processor_id())
+			send_IPI_single(i, op);
+	}
+}
+
+/*
+ * Called with preemption disabled.
+ */
+static inline void
+send_IPI_mask(const struct cpumask *mask, int op)
+{
+	unsigned int cpu;
+
+	for_each_cpu(cpu, mask) {
+			send_IPI_single(cpu, op);
+	}
+}
+
+/*
+ * Called with preemption disabled.
+ */
+static inline void
+send_IPI_all (int op)
+{
+	int i;
+
+	for_each_online_cpu(i) {
+		send_IPI_single(i, op);
+	}
+}
+
+/*
+ * Called with preemption disabled.
+ */
+static inline void
+send_IPI_self (int op)
+{
+	send_IPI_single(smp_processor_id(), op);
+}
+
+#ifdef CONFIG_KEXEC
+void
+kdump_smp_send_stop(void)
+{
+ 	send_IPI_allbutself(IPI_KDUMP_CPU_STOP);
+}
+
+void
+kdump_smp_send_init(void)
+{
+	unsigned int cpu, self_cpu;
+	self_cpu = smp_processor_id();
+	for_each_online_cpu(cpu) {
+		if (cpu != self_cpu) {
+			if(kdump_status[cpu] == 0)
+				platform_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0);
+		}
+	}
+}
+#endif
+/*
+ * Called with preemption disabled.
+ */
+void
+smp_send_reschedule (int cpu)
+{
+	platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0);
+}
+EXPORT_SYMBOL_GPL(smp_send_reschedule);
+
+/*
+ * Called with preemption disabled.
+ */
+static void
+smp_send_local_flush_tlb (int cpu)
+{
+	platform_send_ipi(cpu, IA64_IPI_LOCAL_TLB_FLUSH, IA64_IPI_DM_INT, 0);
+}
+
+void
+smp_local_flush_tlb(void)
+{
+	/*
+	 * Use atomic ops. Otherwise, the load/increment/store sequence from
+	 * a "++" operation can have the line stolen between the load & store.
+	 * The overhead of the atomic op in negligible in this case & offers
+	 * significant benefit for the brief periods where lots of cpus
+	 * are simultaneously flushing TLBs.
+	 */
+	ia64_fetchadd(1, &local_tlb_flush_counts[smp_processor_id()].count, acq);
+	local_flush_tlb_all();
+}
+
+#define FLUSH_DELAY	5 /* Usec backoff to eliminate excessive cacheline bouncing */
+
+void
+smp_flush_tlb_cpumask(cpumask_t xcpumask)
+{
+	unsigned short *counts = __ia64_per_cpu_var(shadow_flush_counts);
+	cpumask_t cpumask = xcpumask;
+	int mycpu, cpu, flush_mycpu = 0;
+
+	preempt_disable();
+	mycpu = smp_processor_id();
+
+	for_each_cpu_mask(cpu, cpumask)
+		counts[cpu] = local_tlb_flush_counts[cpu].count & 0xffff;
+
+	mb();
+	for_each_cpu_mask(cpu, cpumask) {
+		if (cpu == mycpu)
+			flush_mycpu = 1;
+		else
+			smp_send_local_flush_tlb(cpu);
+	}
+
+	if (flush_mycpu)
+		smp_local_flush_tlb();
+
+	for_each_cpu_mask(cpu, cpumask)
+		while(counts[cpu] == (local_tlb_flush_counts[cpu].count & 0xffff))
+			udelay(FLUSH_DELAY);
+
+	preempt_enable();
+}
+
+void
+smp_flush_tlb_all (void)
+{
+	on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1);
+}
+
+void
+smp_flush_tlb_mm (struct mm_struct *mm)
+{
+	cpumask_var_t cpus;
+	preempt_disable();
+	/* this happens for the common case of a single-threaded fork():  */
+	if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1))
+	{
+		local_finish_flush_tlb_mm(mm);
+		preempt_enable();
+		return;
+	}
+	if (!alloc_cpumask_var(&cpus, GFP_ATOMIC)) {
+		smp_call_function((void (*)(void *))local_finish_flush_tlb_mm,
+			mm, 1);
+	} else {
+		cpumask_copy(cpus, mm_cpumask(mm));
+		smp_call_function_many(cpus,
+			(void (*)(void *))local_finish_flush_tlb_mm, mm, 1);
+		free_cpumask_var(cpus);
+	}
+	local_irq_disable();
+	local_finish_flush_tlb_mm(mm);
+	local_irq_enable();
+	preempt_enable();
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+	send_IPI_single(cpu, IPI_CALL_FUNC_SINGLE);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+	send_IPI_mask(mask, IPI_CALL_FUNC);
+}
+
+/*
+ * this function calls the 'stop' function on all other CPUs in the system.
+ */
+void
+smp_send_stop (void)
+{
+	send_IPI_allbutself(IPI_CPU_STOP);
+}
+
+int
+setup_profiling_timer (unsigned int multiplier)
+{
+	return -EINVAL;
+}
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
new file mode 100644
index 00000000..14ec6410
--- /dev/null
+++ b/arch/ia64/kernel/smpboot.c
@@ -0,0 +1,929 @@
+/*
+ * SMP boot-related support
+ *
+ * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2001, 2004-2005 Intel Corp
+ * 	Rohit Seth <rohit.seth@intel.com>
+ * 	Suresh Siddha <suresh.b.siddha@intel.com>
+ * 	Gordon Jin <gordon.jin@intel.com>
+ *	Ashok Raj  <ashok.raj@intel.com>
+ *
+ * 01/05/16 Rohit Seth <rohit.seth@intel.com>	Moved SMP booting functions from smp.c to here.
+ * 01/04/27 David Mosberger <davidm@hpl.hp.com>	Added ITC synching code.
+ * 02/07/31 David Mosberger <davidm@hpl.hp.com>	Switch over to hotplug-CPU boot-sequence.
+ *						smp_boot_cpus()/smp_commence() is replaced by
+ *						smp_prepare_cpus()/__cpu_up()/smp_cpus_done().
+ * 04/06/21 Ashok Raj		<ashok.raj@intel.com> Added CPU Hotplug Support
+ * 04/12/26 Jin Gordon <gordon.jin@intel.com>
+ * 04/12/26 Rohit Seth <rohit.seth@intel.com>
+ *						Add multi-threading and multi-core detection
+ * 05/01/30 Suresh Siddha <suresh.b.siddha@intel.com>
+ *						Setup cpu_sibling_map and cpu_core_map
+ */
+
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/bootmem.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/efi.h>
+#include <linux/percpu.h>
+#include <linux/bitops.h>
+
+#include <asm/atomic.h>
+#include <asm/cache.h>
+#include <asm/current.h>
+#include <asm/delay.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/machvec.h>
+#include <asm/mca.h>
+#include <asm/page.h>
+#include <asm/paravirt.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+#include <asm/tlbflush.h>
+#include <asm/unistd.h>
+#include <asm/sn/arch.h>
+
+#define SMP_DEBUG 0
+
+#if SMP_DEBUG
+#define Dprintk(x...)  printk(x)
+#else
+#define Dprintk(x...)
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+#ifdef CONFIG_PERMIT_BSP_REMOVE
+#define bsp_remove_ok	1
+#else
+#define bsp_remove_ok	0
+#endif
+
+/*
+ * Store all idle threads, this can be reused instead of creating
+ * a new thread. Also avoids complicated thread destroy functionality
+ * for idle threads.
+ */
+struct task_struct *idle_thread_array[NR_CPUS];
+
+/*
+ * Global array allocated for NR_CPUS at boot time
+ */
+struct sal_to_os_boot sal_boot_rendez_state[NR_CPUS];
+
+/*
+ * start_ap in head.S uses this to store current booting cpu
+ * info.
+ */
+struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0];
+
+#define set_brendez_area(x) (sal_state_for_booting_cpu = &sal_boot_rendez_state[(x)]);
+
+#define get_idle_for_cpu(x)		(idle_thread_array[(x)])
+#define set_idle_for_cpu(x,p)	(idle_thread_array[(x)] = (p))
+
+#else
+
+#define get_idle_for_cpu(x)		(NULL)
+#define set_idle_for_cpu(x,p)
+#define set_brendez_area(x)
+#endif
+
+
+/*
+ * ITC synchronization related stuff:
+ */
+#define MASTER	(0)
+#define SLAVE	(SMP_CACHE_BYTES/8)
+
+#define NUM_ROUNDS	64	/* magic value */
+#define NUM_ITERS	5	/* likewise */
+
+static DEFINE_SPINLOCK(itc_sync_lock);
+static volatile unsigned long go[SLAVE + 1];
+
+#define DEBUG_ITC_SYNC	0
+
+extern void start_ap (void);
+extern unsigned long ia64_iobase;
+
+struct task_struct *task_for_booting_cpu;
+
+/*
+ * State for each CPU
+ */
+DEFINE_PER_CPU(int, cpu_state);
+
+cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_core_map);
+DEFINE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
+
+int smp_num_siblings = 1;
+
+/* which logical CPU number maps to which CPU (physical APIC ID) */
+volatile int ia64_cpu_to_sapicid[NR_CPUS];
+EXPORT_SYMBOL(ia64_cpu_to_sapicid);
+
+static volatile cpumask_t cpu_callin_map;
+
+struct smp_boot_data smp_boot_data __initdata;
+
+unsigned long ap_wakeup_vector = -1; /* External Int use to wakeup APs */
+
+char __initdata no_int_routing;
+
+unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */
+
+#ifdef CONFIG_FORCE_CPEI_RETARGET
+#define CPEI_OVERRIDE_DEFAULT	(1)
+#else
+#define CPEI_OVERRIDE_DEFAULT	(0)
+#endif
+
+unsigned int force_cpei_retarget = CPEI_OVERRIDE_DEFAULT;
+
+static int __init
+cmdl_force_cpei(char *str)
+{
+	int value=0;
+
+	get_option (&str, &value);
+	force_cpei_retarget = value;
+
+	return 1;
+}
+
+__setup("force_cpei=", cmdl_force_cpei);
+
+static int __init
+nointroute (char *str)
+{
+	no_int_routing = 1;
+	printk ("no_int_routing on\n");
+	return 1;
+}
+
+__setup("nointroute", nointroute);
+
+static void fix_b0_for_bsp(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	int cpuid;
+	static int fix_bsp_b0 = 1;
+
+	cpuid = smp_processor_id();
+
+	/*
+	 * Cache the b0 value on the first AP that comes up
+	 */
+	if (!(fix_bsp_b0 && cpuid))
+		return;
+
+	sal_boot_rendez_state[0].br[0] = sal_boot_rendez_state[cpuid].br[0];
+	printk ("Fixed BSP b0 value from CPU %d\n", cpuid);
+
+	fix_bsp_b0 = 0;
+#endif
+}
+
+void
+sync_master (void *arg)
+{
+	unsigned long flags, i;
+
+	go[MASTER] = 0;
+
+	local_irq_save(flags);
+	{
+		for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
+			while (!go[MASTER])
+				cpu_relax();
+			go[MASTER] = 0;
+			go[SLAVE] = ia64_get_itc();
+		}
+	}
+	local_irq_restore(flags);
+}
+
+/*
+ * Return the number of cycles by which our itc differs from the itc on the master
+ * (time-keeper) CPU.  A positive number indicates our itc is ahead of the master,
+ * negative that it is behind.
+ */
+static inline long
+get_delta (long *rt, long *master)
+{
+	unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
+	unsigned long tcenter, t0, t1, tm;
+	long i;
+
+	for (i = 0; i < NUM_ITERS; ++i) {
+		t0 = ia64_get_itc();
+		go[MASTER] = 1;
+		while (!(tm = go[SLAVE]))
+			cpu_relax();
+		go[SLAVE] = 0;
+		t1 = ia64_get_itc();
+
+		if (t1 - t0 < best_t1 - best_t0)
+			best_t0 = t0, best_t1 = t1, best_tm = tm;
+	}
+
+	*rt = best_t1 - best_t0;
+	*master = best_tm - best_t0;
+
+	/* average best_t0 and best_t1 without overflow: */
+	tcenter = (best_t0/2 + best_t1/2);
+	if (best_t0 % 2 + best_t1 % 2 == 2)
+		++tcenter;
+	return tcenter - best_tm;
+}
+
+/*
+ * Synchronize ar.itc of the current (slave) CPU with the ar.itc of the MASTER CPU
+ * (normally the time-keeper CPU).  We use a closed loop to eliminate the possibility of
+ * unaccounted-for errors (such as getting a machine check in the middle of a calibration
+ * step).  The basic idea is for the slave to ask the master what itc value it has and to
+ * read its own itc before and after the master responds.  Each iteration gives us three
+ * timestamps:
+ *
+ *	slave		master
+ *
+ *	t0 ---\
+ *             ---\
+ *		   --->
+ *			tm
+ *		   /---
+ *	       /---
+ *	t1 <---
+ *
+ *
+ * The goal is to adjust the slave's ar.itc such that tm falls exactly half-way between t0
+ * and t1.  If we achieve this, the clocks are synchronized provided the interconnect
+ * between the slave and the master is symmetric.  Even if the interconnect were
+ * asymmetric, we would still know that the synchronization error is smaller than the
+ * roundtrip latency (t0 - t1).
+ *
+ * When the interconnect is quiet and symmetric, this lets us synchronize the itc to
+ * within one or two cycles.  However, we can only *guarantee* that the synchronization is
+ * accurate to within a round-trip time, which is typically in the range of several
+ * hundred cycles (e.g., ~500 cycles).  In practice, this means that the itc's are usually
+ * almost perfectly synchronized, but we shouldn't assume that the accuracy is much better
+ * than half a micro second or so.
+ */
+void
+ia64_sync_itc (unsigned int master)
+{
+	long i, delta, adj, adjust_latency = 0, done = 0;
+	unsigned long flags, rt, master_time_stamp, bound;
+#if DEBUG_ITC_SYNC
+	struct {
+		long rt;	/* roundtrip time */
+		long master;	/* master's timestamp */
+		long diff;	/* difference between midpoint and master's timestamp */
+		long lat;	/* estimate of itc adjustment latency */
+	} t[NUM_ROUNDS];
+#endif
+
+	/*
+	 * Make sure local timer ticks are disabled while we sync.  If
+	 * they were enabled, we'd have to worry about nasty issues
+	 * like setting the ITC ahead of (or a long time before) the
+	 * next scheduled tick.
+	 */
+	BUG_ON((ia64_get_itv() & (1 << 16)) == 0);
+
+	go[MASTER] = 1;
+
+	if (smp_call_function_single(master, sync_master, NULL, 0) < 0) {
+		printk(KERN_ERR "sync_itc: failed to get attention of CPU %u!\n", master);
+		return;
+	}
+
+	while (go[MASTER])
+		cpu_relax();	/* wait for master to be ready */
+
+	spin_lock_irqsave(&itc_sync_lock, flags);
+	{
+		for (i = 0; i < NUM_ROUNDS; ++i) {
+			delta = get_delta(&rt, &master_time_stamp);
+			if (delta == 0) {
+				done = 1;	/* let's lock on to this... */
+				bound = rt;
+			}
+
+			if (!done) {
+				if (i > 0) {
+					adjust_latency += -delta;
+					adj = -delta + adjust_latency/4;
+				} else
+					adj = -delta;
+
+				ia64_set_itc(ia64_get_itc() + adj);
+			}
+#if DEBUG_ITC_SYNC
+			t[i].rt = rt;
+			t[i].master = master_time_stamp;
+			t[i].diff = delta;
+			t[i].lat = adjust_latency/4;
+#endif
+		}
+	}
+	spin_unlock_irqrestore(&itc_sync_lock, flags);
+
+#if DEBUG_ITC_SYNC
+	for (i = 0; i < NUM_ROUNDS; ++i)
+		printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
+		       t[i].rt, t[i].master, t[i].diff, t[i].lat);
+#endif
+
+	printk(KERN_INFO "CPU %d: synchronized ITC with CPU %u (last diff %ld cycles, "
+	       "maxerr %lu cycles)\n", smp_processor_id(), master, delta, rt);
+}
+
+/*
+ * Ideally sets up per-cpu profiling hooks.  Doesn't do much now...
+ */
+static inline void __devinit
+smp_setup_percpu_timer (void)
+{
+}
+
+static void __cpuinit
+smp_callin (void)
+{
+	int cpuid, phys_id, itc_master;
+	struct cpuinfo_ia64 *last_cpuinfo, *this_cpuinfo;
+	extern void ia64_init_itm(void);
+	extern volatile int time_keeper_id;
+
+#ifdef CONFIG_PERFMON
+	extern void pfm_init_percpu(void);
+#endif
+
+	cpuid = smp_processor_id();
+	phys_id = hard_smp_processor_id();
+	itc_master = time_keeper_id;
+
+	if (cpu_online(cpuid)) {
+		printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n",
+		       phys_id, cpuid);
+		BUG();
+	}
+
+	fix_b0_for_bsp();
+
+	/*
+	 * numa_node_id() works after this.
+	 */
+	set_numa_node(cpu_to_node_map[cpuid]);
+	set_numa_mem(local_memory_node(cpu_to_node_map[cpuid]));
+
+	ipi_call_lock_irq();
+	spin_lock(&vector_lock);
+	/* Setup the per cpu irq handling data structures */
+	__setup_vector_irq(cpuid);
+	notify_cpu_starting(cpuid);
+	cpu_set(cpuid, cpu_online_map);
+	per_cpu(cpu_state, cpuid) = CPU_ONLINE;
+	spin_unlock(&vector_lock);
+	ipi_call_unlock_irq();
+
+	smp_setup_percpu_timer();
+
+	ia64_mca_cmc_vector_setup();	/* Setup vector on AP */
+
+#ifdef CONFIG_PERFMON
+	pfm_init_percpu();
+#endif
+
+	local_irq_enable();
+
+	if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
+		/*
+		 * Synchronize the ITC with the BP.  Need to do this after irqs are
+		 * enabled because ia64_sync_itc() calls smp_call_function_single(), which
+		 * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls
+		 * local_bh_enable(), which bugs out if irqs are not enabled...
+		 */
+		Dprintk("Going to syncup ITC with ITC Master.\n");
+		ia64_sync_itc(itc_master);
+	}
+
+	/*
+	 * Get our bogomips.
+	 */
+	ia64_init_itm();
+
+	/*
+	 * Delay calibration can be skipped if new processor is identical to the
+	 * previous processor.
+	 */
+	last_cpuinfo = cpu_data(cpuid - 1);
+	this_cpuinfo = local_cpu_data;
+	if (last_cpuinfo->itc_freq != this_cpuinfo->itc_freq ||
+	    last_cpuinfo->proc_freq != this_cpuinfo->proc_freq ||
+	    last_cpuinfo->features != this_cpuinfo->features ||
+	    last_cpuinfo->revision != this_cpuinfo->revision ||
+	    last_cpuinfo->family != this_cpuinfo->family ||
+	    last_cpuinfo->archrev != this_cpuinfo->archrev ||
+	    last_cpuinfo->model != this_cpuinfo->model)
+		calibrate_delay();
+	local_cpu_data->loops_per_jiffy = loops_per_jiffy;
+
+	/*
+	 * Allow the master to continue.
+	 */
+	cpu_set(cpuid, cpu_callin_map);
+	Dprintk("Stack on CPU %d at about %p\n",cpuid, &cpuid);
+}
+
+
+/*
+ * Activate a secondary processor.  head.S calls this.
+ */
+int __cpuinit
+start_secondary (void *unused)
+{
+	/* Early console may use I/O ports */
+	ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase));
+#ifndef CONFIG_PRINTK_TIME
+	Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id());
+#endif
+	efi_map_pal_code();
+	cpu_init();
+	preempt_disable();
+	smp_callin();
+
+	cpu_idle();
+	return 0;
+}
+
+struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
+{
+	return NULL;
+}
+
+struct create_idle {
+	struct work_struct work;
+	struct task_struct *idle;
+	struct completion done;
+	int cpu;
+};
+
+void __cpuinit
+do_fork_idle(struct work_struct *work)
+{
+	struct create_idle *c_idle =
+		container_of(work, struct create_idle, work);
+
+	c_idle->idle = fork_idle(c_idle->cpu);
+	complete(&c_idle->done);
+}
+
+static int __cpuinit
+do_boot_cpu (int sapicid, int cpu)
+{
+	int timeout;
+	struct create_idle c_idle = {
+		.work = __WORK_INITIALIZER(c_idle.work, do_fork_idle),
+		.cpu	= cpu,
+		.done	= COMPLETION_INITIALIZER(c_idle.done),
+	};
+
+	/*
+	 * We can't use kernel_thread since we must avoid to
+	 * reschedule the child.
+	 */
+ 	c_idle.idle = get_idle_for_cpu(cpu);
+ 	if (c_idle.idle) {
+		init_idle(c_idle.idle, cpu);
+ 		goto do_rest;
+	}
+
+	schedule_work(&c_idle.work);
+	wait_for_completion(&c_idle.done);
+
+	if (IS_ERR(c_idle.idle))
+		panic("failed fork for CPU %d", cpu);
+
+	set_idle_for_cpu(cpu, c_idle.idle);
+
+do_rest:
+	task_for_booting_cpu = c_idle.idle;
+
+	Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid);
+
+	set_brendez_area(cpu);
+	platform_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0);
+
+	/*
+	 * Wait 10s total for the AP to start
+	 */
+	Dprintk("Waiting on callin_map ...");
+	for (timeout = 0; timeout < 100000; timeout++) {
+		if (cpu_isset(cpu, cpu_callin_map))
+			break;  /* It has booted */
+		udelay(100);
+	}
+	Dprintk("\n");
+
+	if (!cpu_isset(cpu, cpu_callin_map)) {
+		printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid);
+		ia64_cpu_to_sapicid[cpu] = -1;
+		cpu_clear(cpu, cpu_online_map);  /* was set in smp_callin() */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int __init
+decay (char *str)
+{
+	int ticks;
+	get_option (&str, &ticks);
+	return 1;
+}
+
+__setup("decay=", decay);
+
+/*
+ * Initialize the logical CPU number to SAPICID mapping
+ */
+void __init
+smp_build_cpu_map (void)
+{
+	int sapicid, cpu, i;
+	int boot_cpu_id = hard_smp_processor_id();
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		ia64_cpu_to_sapicid[cpu] = -1;
+	}
+
+	ia64_cpu_to_sapicid[0] = boot_cpu_id;
+	cpus_clear(cpu_present_map);
+	set_cpu_present(0, true);
+	set_cpu_possible(0, true);
+	for (cpu = 1, i = 0; i < smp_boot_data.cpu_count; i++) {
+		sapicid = smp_boot_data.cpu_phys_id[i];
+		if (sapicid == boot_cpu_id)
+			continue;
+		set_cpu_present(cpu, true);
+		set_cpu_possible(cpu, true);
+		ia64_cpu_to_sapicid[cpu] = sapicid;
+		cpu++;
+	}
+}
+
+/*
+ * Cycle through the APs sending Wakeup IPIs to boot each.
+ */
+void __init
+smp_prepare_cpus (unsigned int max_cpus)
+{
+	int boot_cpu_id = hard_smp_processor_id();
+
+	/*
+	 * Initialize the per-CPU profiling counter/multiplier
+	 */
+
+	smp_setup_percpu_timer();
+
+	/*
+	 * We have the boot CPU online for sure.
+	 */
+	cpu_set(0, cpu_online_map);
+	cpu_set(0, cpu_callin_map);
+
+	local_cpu_data->loops_per_jiffy = loops_per_jiffy;
+	ia64_cpu_to_sapicid[0] = boot_cpu_id;
+
+	printk(KERN_INFO "Boot processor id 0x%x/0x%x\n", 0, boot_cpu_id);
+
+	current_thread_info()->cpu = 0;
+
+	/*
+	 * If SMP should be disabled, then really disable it!
+	 */
+	if (!max_cpus) {
+		printk(KERN_INFO "SMP mode deactivated.\n");
+		init_cpu_online(cpumask_of(0));
+		init_cpu_present(cpumask_of(0));
+		init_cpu_possible(cpumask_of(0));
+		return;
+	}
+}
+
+void __devinit smp_prepare_boot_cpu(void)
+{
+	cpu_set(smp_processor_id(), cpu_online_map);
+	cpu_set(smp_processor_id(), cpu_callin_map);
+	set_numa_node(cpu_to_node_map[smp_processor_id()]);
+	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
+	paravirt_post_smp_prepare_boot_cpu();
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static inline void
+clear_cpu_sibling_map(int cpu)
+{
+	int i;
+
+	for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
+		cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
+	for_each_cpu_mask(i, cpu_core_map[cpu])
+		cpu_clear(cpu, cpu_core_map[i]);
+
+	per_cpu(cpu_sibling_map, cpu) = cpu_core_map[cpu] = CPU_MASK_NONE;
+}
+
+static void
+remove_siblinginfo(int cpu)
+{
+	int last = 0;
+
+	if (cpu_data(cpu)->threads_per_core == 1 &&
+	    cpu_data(cpu)->cores_per_socket == 1) {
+		cpu_clear(cpu, cpu_core_map[cpu]);
+		cpu_clear(cpu, per_cpu(cpu_sibling_map, cpu));
+		return;
+	}
+
+	last = (cpus_weight(cpu_core_map[cpu]) == 1 ? 1 : 0);
+
+	/* remove it from all sibling map's */
+	clear_cpu_sibling_map(cpu);
+}
+
+extern void fixup_irqs(void);
+
+int migrate_platform_irqs(unsigned int cpu)
+{
+	int new_cpei_cpu;
+	struct irq_data *data = NULL;
+	const struct cpumask *mask;
+	int 		retval = 0;
+
+	/*
+	 * dont permit CPEI target to removed.
+	 */
+	if (cpe_vector > 0 && is_cpu_cpei_target(cpu)) {
+		printk ("CPU (%d) is CPEI Target\n", cpu);
+		if (can_cpei_retarget()) {
+			/*
+			 * Now re-target the CPEI to a different processor
+			 */
+			new_cpei_cpu = any_online_cpu(cpu_online_map);
+			mask = cpumask_of(new_cpei_cpu);
+			set_cpei_target_cpu(new_cpei_cpu);
+			data = irq_get_irq_data(ia64_cpe_irq);
+			/*
+			 * Switch for now, immediately, we need to do fake intr
+			 * as other interrupts, but need to study CPEI behaviour with
+			 * polling before making changes.
+			 */
+			if (data && data->chip) {
+				data->chip->irq_disable(data);
+				data->chip->irq_set_affinity(data, mask, false);
+				data->chip->irq_enable(data);
+				printk ("Re-targeting CPEI to cpu %d\n", new_cpei_cpu);
+			}
+		}
+		if (!data) {
+			printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu);
+			retval = -EBUSY;
+		}
+	}
+	return retval;
+}
+
+/* must be called with cpucontrol mutex held */
+int __cpu_disable(void)
+{
+	int cpu = smp_processor_id();
+
+	/*
+	 * dont permit boot processor for now
+	 */
+	if (cpu == 0 && !bsp_remove_ok) {
+		printk ("Your platform does not support removal of BSP\n");
+		return (-EBUSY);
+	}
+
+	if (ia64_platform_is("sn2")) {
+		if (!sn_cpu_disable_allowed(cpu))
+			return -EBUSY;
+	}
+
+	cpu_clear(cpu, cpu_online_map);
+
+	if (migrate_platform_irqs(cpu)) {
+		cpu_set(cpu, cpu_online_map);
+		return -EBUSY;
+	}
+
+	remove_siblinginfo(cpu);
+	fixup_irqs();
+	local_flush_tlb_all();
+	cpu_clear(cpu, cpu_callin_map);
+	return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+	unsigned int i;
+
+	for (i = 0; i < 100; i++) {
+		/* They ack this in play_dead by setting CPU_DEAD */
+		if (per_cpu(cpu_state, cpu) == CPU_DEAD)
+		{
+			printk ("CPU %d is now offline\n", cpu);
+			return;
+		}
+		msleep(100);
+	}
+ 	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+void
+smp_cpus_done (unsigned int dummy)
+{
+	int cpu;
+	unsigned long bogosum = 0;
+
+	/*
+	 * Allow the user to impress friends.
+	 */
+
+	for_each_online_cpu(cpu) {
+		bogosum += cpu_data(cpu)->loops_per_jiffy;
+	}
+
+	printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+	       (int)num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100);
+}
+
+static inline void __devinit
+set_cpu_sibling_map(int cpu)
+{
+	int i;
+
+	for_each_online_cpu(i) {
+		if ((cpu_data(cpu)->socket_id == cpu_data(i)->socket_id)) {
+			cpu_set(i, cpu_core_map[cpu]);
+			cpu_set(cpu, cpu_core_map[i]);
+			if (cpu_data(cpu)->core_id == cpu_data(i)->core_id) {
+				cpu_set(i, per_cpu(cpu_sibling_map, cpu));
+				cpu_set(cpu, per_cpu(cpu_sibling_map, i));
+			}
+		}
+	}
+}
+
+int __cpuinit
+__cpu_up (unsigned int cpu)
+{
+	int ret;
+	int sapicid;
+
+	sapicid = ia64_cpu_to_sapicid[cpu];
+	if (sapicid == -1)
+		return -EINVAL;
+
+	/*
+	 * Already booted cpu? not valid anymore since we dont
+	 * do idle loop tightspin anymore.
+	 */
+	if (cpu_isset(cpu, cpu_callin_map))
+		return -EINVAL;
+
+	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
+	/* Processor goes to start_secondary(), sets online flag */
+	ret = do_boot_cpu(sapicid, cpu);
+	if (ret < 0)
+		return ret;
+
+	if (cpu_data(cpu)->threads_per_core == 1 &&
+	    cpu_data(cpu)->cores_per_socket == 1) {
+		cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
+		cpu_set(cpu, cpu_core_map[cpu]);
+		return 0;
+	}
+
+	set_cpu_sibling_map(cpu);
+
+	return 0;
+}
+
+/*
+ * Assume that CPUs have been discovered by some platform-dependent interface.  For
+ * SoftSDV/Lion, that would be ACPI.
+ *
+ * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP().
+ */
+void __init
+init_smp_config(void)
+{
+	struct fptr {
+		unsigned long fp;
+		unsigned long gp;
+	} *ap_startup;
+	long sal_ret;
+
+	/* Tell SAL where to drop the APs.  */
+	ap_startup = (struct fptr *) start_ap;
+	sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ,
+				       ia64_tpa(ap_startup->fp), ia64_tpa(ap_startup->gp), 0, 0, 0, 0);
+	if (sal_ret < 0)
+		printk(KERN_ERR "SMP: Can't set SAL AP Boot Rendezvous: %s\n",
+		       ia64_sal_strerror(sal_ret));
+}
+
+/*
+ * identify_siblings(cpu) gets called from identify_cpu. This populates the 
+ * information related to logical execution units in per_cpu_data structure.
+ */
+void __devinit
+identify_siblings(struct cpuinfo_ia64 *c)
+{
+	long status;
+	u16 pltid;
+	pal_logical_to_physical_t info;
+
+	status = ia64_pal_logical_to_phys(-1, &info);
+	if (status != PAL_STATUS_SUCCESS) {
+		if (status != PAL_STATUS_UNIMPLEMENTED) {
+			printk(KERN_ERR
+				"ia64_pal_logical_to_phys failed with %ld\n",
+				status);
+			return;
+		}
+
+		info.overview_ppid = 0;
+		info.overview_cpp  = 1;
+		info.overview_tpc  = 1;
+	}
+
+	status = ia64_sal_physical_id_info(&pltid);
+	if (status != PAL_STATUS_SUCCESS) {
+		if (status != PAL_STATUS_UNIMPLEMENTED)
+			printk(KERN_ERR
+				"ia64_sal_pltid failed with %ld\n",
+				status);
+		return;
+	}
+
+	c->socket_id =  (pltid << 8) | info.overview_ppid;
+
+	if (info.overview_cpp == 1 && info.overview_tpc == 1)
+		return;
+
+	c->cores_per_socket = info.overview_cpp;
+	c->threads_per_core = info.overview_tpc;
+	c->num_log = info.overview_num_log;
+
+	c->core_id = info.log1_cid;
+	c->thread_id = info.log1_tid;
+}
+
+/*
+ * returns non zero, if multi-threading is enabled
+ * on at least one physical package. Due to hotplug cpu
+ * and (maxcpus=), all threads may not necessarily be enabled
+ * even though the processor supports multi-threading.
+ */
+int is_multithreading_enabled(void)
+{
+	int i, j;
+
+	for_each_present_cpu(i) {
+		for_each_present_cpu(j) {
+			if (j == i)
+				continue;
+			if ((cpu_data(j)->socket_id == cpu_data(i)->socket_id)) {
+				if (cpu_data(j)->core_id == cpu_data(i)->core_id)
+					return 1;
+			}
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(is_multithreading_enabled);
diff --git a/arch/ia64/kernel/stacktrace.c b/arch/ia64/kernel/stacktrace.c
new file mode 100644
index 00000000..5af2783a
--- /dev/null
+++ b/arch/ia64/kernel/stacktrace.c
@@ -0,0 +1,39 @@
+/*
+ * arch/ia64/kernel/stacktrace.c
+ *
+ * Stack trace management functions
+ *
+ */
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+#include <linux/module.h>
+
+static void
+ia64_do_save_stack(struct unw_frame_info *info, void *arg)
+{
+	struct stack_trace *trace = arg;
+	unsigned long ip;
+	int skip = trace->skip;
+
+	trace->nr_entries = 0;
+	do {
+		unw_get_ip(info, &ip);
+		if (ip == 0)
+			break;
+		if (skip == 0) {
+			trace->entries[trace->nr_entries++] = ip;
+			if (trace->nr_entries == trace->max_entries)
+				break;
+		} else
+			skip--;
+	} while (unw_unwind(info) >= 0);
+}
+
+/*
+ * Save stack-backtrace addresses into a stack_trace buffer.
+ */
+void save_stack_trace(struct stack_trace *trace)
+{
+	unw_init_running(ia64_do_save_stack, trace);
+}
+EXPORT_SYMBOL(save_stack_trace);
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
new file mode 100644
index 00000000..609d5005
--- /dev/null
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -0,0 +1,209 @@
+/*
+ * This file contains various system calls that have different calling
+ * conventions on different platforms.
+ *
+ * Copyright (C) 1999-2000, 2002-2003, 2005 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/sched.h>
+#include <linux/shm.h>
+#include <linux/file.h>		/* doh, must come after sched.h... */
+#include <linux/smp.h>
+#include <linux/syscalls.h>
+#include <linux/highuid.h>
+#include <linux/hugetlb.h>
+
+#include <asm/shmparam.h>
+#include <asm/uaccess.h>
+
+unsigned long
+arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len,
+			unsigned long pgoff, unsigned long flags)
+{
+	long map_shared = (flags & MAP_SHARED);
+	unsigned long start_addr, align_mask = PAGE_SIZE - 1;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+
+	if (len > RGN_MAP_LIMIT)
+		return -ENOMEM;
+
+	/* handle fixed mapping: prevent overlap with huge pages */
+	if (flags & MAP_FIXED) {
+		if (is_hugepage_only_range(mm, addr, len))
+			return -EINVAL;
+		return addr;
+	}
+
+#ifdef CONFIG_HUGETLB_PAGE
+	if (REGION_NUMBER(addr) == RGN_HPAGE)
+		addr = 0;
+#endif
+	if (!addr)
+		addr = mm->free_area_cache;
+
+	if (map_shared && (TASK_SIZE > 0xfffffffful))
+		/*
+		 * For 64-bit tasks, align shared segments to 1MB to avoid potential
+		 * performance penalty due to virtual aliasing (see ASDM).  For 32-bit
+		 * tasks, we prefer to avoid exhausting the address space too quickly by
+		 * limiting alignment to a single page.
+		 */
+		align_mask = SHMLBA - 1;
+
+  full_search:
+	start_addr = addr = (addr + align_mask) & ~align_mask;
+
+	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+		/* At this point:  (!vma || addr < vma->vm_end). */
+		if (TASK_SIZE - len < addr || RGN_MAP_LIMIT - len < REGION_OFFSET(addr)) {
+			if (start_addr != TASK_UNMAPPED_BASE) {
+				/* Start a new search --- just in case we missed some holes.  */
+				addr = TASK_UNMAPPED_BASE;
+				goto full_search;
+			}
+			return -ENOMEM;
+		}
+		if (!vma || addr + len <= vma->vm_start) {
+			/* Remember the address where we stopped this search:  */
+			mm->free_area_cache = addr + len;
+			return addr;
+		}
+		addr = (vma->vm_end + align_mask) & ~align_mask;
+	}
+}
+
+asmlinkage long
+ia64_getpriority (int which, int who)
+{
+	long prio;
+
+	prio = sys_getpriority(which, who);
+	if (prio >= 0) {
+		force_successful_syscall_return();
+		prio = 20 - prio;
+	}
+	return prio;
+}
+
+/* XXX obsolete, but leave it here until the old libc is gone... */
+asmlinkage unsigned long
+sys_getpagesize (void)
+{
+	return PAGE_SIZE;
+}
+
+asmlinkage unsigned long
+ia64_brk (unsigned long brk)
+{
+	unsigned long retval = sys_brk(brk);
+	force_successful_syscall_return();
+	return retval;
+}
+
+/*
+ * On IA-64, we return the two file descriptors in ret0 and ret1 (r8
+ * and r9) as this is faster than doing a copy_to_user().
+ */
+asmlinkage long
+sys_ia64_pipe (void)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	int fd[2];
+	int retval;
+
+	retval = do_pipe_flags(fd, 0);
+	if (retval)
+		goto out;
+	retval = fd[0];
+	regs->r9 = fd[1];
+  out:
+	return retval;
+}
+
+int ia64_mmap_check(unsigned long addr, unsigned long len,
+		unsigned long flags)
+{
+	unsigned long roff;
+
+	/*
+	 * Don't permit mappings into unmapped space, the virtual page table
+	 * of a region, or across a region boundary.  Note: RGN_MAP_LIMIT is
+	 * equal to 2^n-PAGE_SIZE (for some integer n <= 61) and len > 0.
+	 */
+	roff = REGION_OFFSET(addr);
+	if ((len > RGN_MAP_LIMIT) || (roff > (RGN_MAP_LIMIT - len)))
+		return -EINVAL;
+	return 0;
+}
+
+/*
+ * mmap2() is like mmap() except that the offset is expressed in units
+ * of PAGE_SIZE (instead of bytes).  This allows to mmap2() (pieces
+ * of) files that are larger than the address space of the CPU.
+ */
+asmlinkage unsigned long
+sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff)
+{
+	addr = sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
+	if (!IS_ERR((void *) addr))
+		force_successful_syscall_return();
+	return addr;
+}
+
+asmlinkage unsigned long
+sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, int fd, long off)
+{
+	if (offset_in_page(off) != 0)
+		return -EINVAL;
+
+	addr = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+	if (!IS_ERR((void *) addr))
+		force_successful_syscall_return();
+	return addr;
+}
+
+asmlinkage unsigned long
+ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags,
+	     unsigned long new_addr)
+{
+	extern unsigned long do_mremap (unsigned long addr,
+					unsigned long old_len,
+					unsigned long new_len,
+					unsigned long flags,
+					unsigned long new_addr);
+
+	down_write(&current->mm->mmap_sem);
+	{
+		addr = do_mremap(addr, old_len, new_len, flags, new_addr);
+	}
+	up_write(&current->mm->mmap_sem);
+
+	if (IS_ERR((void *) addr))
+		return addr;
+
+	force_successful_syscall_return();
+	return addr;
+}
+
+#ifndef CONFIG_PCI
+
+asmlinkage long
+sys_pciconfig_read (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len,
+		    void *buf)
+{
+	return -ENOSYS;
+}
+
+asmlinkage long
+sys_pciconfig_write (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len,
+		     void *buf)
+{
+	return -ENOSYS;
+}
+
+#endif /* CONFIG_PCI */
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
new file mode 100644
index 00000000..85118dfe
--- /dev/null
+++ b/arch/ia64/kernel/time.c
@@ -0,0 +1,490 @@
+/*
+ * linux/arch/ia64/kernel/time.c
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *	David Mosberger <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ * Copyright (C) 1999-2000 VA Linux Systems
+ * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com>
+ */
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/profile.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/interrupt.h>
+#include <linux/efi.h>
+#include <linux/timex.h>
+#include <linux/clocksource.h>
+#include <linux/platform_device.h>
+
+#include <asm/machvec.h>
+#include <asm/delay.h>
+#include <asm/hw_irq.h>
+#include <asm/paravirt.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+
+#include "fsyscall_gtod_data.h"
+
+static cycle_t itc_get_cycles(struct clocksource *cs);
+
+struct fsyscall_gtod_data_t fsyscall_gtod_data = {
+	.lock = __SEQLOCK_UNLOCKED(fsyscall_gtod_data.lock),
+};
+
+struct itc_jitter_data_t itc_jitter_data;
+
+volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */
+
+#ifdef CONFIG_IA64_DEBUG_IRQ
+
+unsigned long last_cli_ip;
+EXPORT_SYMBOL(last_cli_ip);
+
+#endif
+
+#ifdef CONFIG_PARAVIRT
+/* We need to define a real function for sched_clock, to override the
+   weak default version */
+unsigned long long sched_clock(void)
+{
+        return paravirt_sched_clock();
+}
+#endif
+
+#ifdef CONFIG_PARAVIRT
+static void
+paravirt_clocksource_resume(struct clocksource *cs)
+{
+	if (pv_time_ops.clocksource_resume)
+		pv_time_ops.clocksource_resume();
+}
+#endif
+
+static struct clocksource clocksource_itc = {
+	.name           = "itc",
+	.rating         = 350,
+	.read           = itc_get_cycles,
+	.mask           = CLOCKSOURCE_MASK(64),
+	.flags          = CLOCK_SOURCE_IS_CONTINUOUS,
+#ifdef CONFIG_PARAVIRT
+	.resume		= paravirt_clocksource_resume,
+#endif
+};
+static struct clocksource *itc_clocksource;
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+
+#include <linux/kernel_stat.h>
+
+extern cputime_t cycle_to_cputime(u64 cyc);
+
+/*
+ * Called from the context switch with interrupts disabled, to charge all
+ * accumulated times to the current process, and to prepare accounting on
+ * the next process.
+ */
+void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
+{
+	struct thread_info *pi = task_thread_info(prev);
+	struct thread_info *ni = task_thread_info(next);
+	cputime_t delta_stime, delta_utime;
+	__u64 now;
+
+	now = ia64_get_itc();
+
+	delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
+	if (idle_task(smp_processor_id()) != prev)
+		account_system_time(prev, 0, delta_stime, delta_stime);
+	else
+		account_idle_time(delta_stime);
+
+	if (pi->ac_utime) {
+		delta_utime = cycle_to_cputime(pi->ac_utime);
+		account_user_time(prev, delta_utime, delta_utime);
+	}
+
+	pi->ac_stamp = ni->ac_stamp = now;
+	ni->ac_stime = ni->ac_utime = 0;
+}
+
+/*
+ * Account time for a transition between system, hard irq or soft irq state.
+ * Note that this function is called with interrupts enabled.
+ */
+void account_system_vtime(struct task_struct *tsk)
+{
+	struct thread_info *ti = task_thread_info(tsk);
+	unsigned long flags;
+	cputime_t delta_stime;
+	__u64 now;
+
+	local_irq_save(flags);
+
+	now = ia64_get_itc();
+
+	delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
+	if (irq_count() || idle_task(smp_processor_id()) != tsk)
+		account_system_time(tsk, 0, delta_stime, delta_stime);
+	else
+		account_idle_time(delta_stime);
+	ti->ac_stime = 0;
+
+	ti->ac_stamp = now;
+
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(account_system_vtime);
+
+/*
+ * Called from the timer interrupt handler to charge accumulated user time
+ * to the current process.  Must be called with interrupts disabled.
+ */
+void account_process_tick(struct task_struct *p, int user_tick)
+{
+	struct thread_info *ti = task_thread_info(p);
+	cputime_t delta_utime;
+
+	if (ti->ac_utime) {
+		delta_utime = cycle_to_cputime(ti->ac_utime);
+		account_user_time(p, delta_utime, delta_utime);
+		ti->ac_utime = 0;
+	}
+}
+
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+
+static irqreturn_t
+timer_interrupt (int irq, void *dev_id)
+{
+	unsigned long new_itm;
+
+	if (cpu_is_offline(smp_processor_id())) {
+		return IRQ_HANDLED;
+	}
+
+	platform_timer_interrupt(irq, dev_id);
+
+	new_itm = local_cpu_data->itm_next;
+
+	if (!time_after(ia64_get_itc(), new_itm))
+		printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n",
+		       ia64_get_itc(), new_itm);
+
+	profile_tick(CPU_PROFILING);
+
+	if (paravirt_do_steal_accounting(&new_itm))
+		goto skip_process_time_accounting;
+
+	while (1) {
+		update_process_times(user_mode(get_irq_regs()));
+
+		new_itm += local_cpu_data->itm_delta;
+
+		if (smp_processor_id() == time_keeper_id)
+			xtime_update(1);
+
+		local_cpu_data->itm_next = new_itm;
+
+		if (time_after(new_itm, ia64_get_itc()))
+			break;
+
+		/*
+		 * Allow IPIs to interrupt the timer loop.
+		 */
+		local_irq_enable();
+		local_irq_disable();
+	}
+
+skip_process_time_accounting:
+
+	do {
+		/*
+		 * If we're too close to the next clock tick for
+		 * comfort, we increase the safety margin by
+		 * intentionally dropping the next tick(s).  We do NOT
+		 * update itm.next because that would force us to call
+		 * xtime_update() which in turn would let our clock run
+		 * too fast (with the potentially devastating effect
+		 * of losing monotony of time).
+		 */
+		while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2))
+			new_itm += local_cpu_data->itm_delta;
+		ia64_set_itm(new_itm);
+		/* double check, in case we got hit by a (slow) PMI: */
+	} while (time_after_eq(ia64_get_itc(), new_itm));
+	return IRQ_HANDLED;
+}
+
+/*
+ * Encapsulate access to the itm structure for SMP.
+ */
+void
+ia64_cpu_local_tick (void)
+{
+	int cpu = smp_processor_id();
+	unsigned long shift = 0, delta;
+
+	/* arrange for the cycle counter to generate a timer interrupt: */
+	ia64_set_itv(IA64_TIMER_VECTOR);
+
+	delta = local_cpu_data->itm_delta;
+	/*
+	 * Stagger the timer tick for each CPU so they don't occur all at (almost) the
+	 * same time:
+	 */
+	if (cpu) {
+		unsigned long hi = 1UL << ia64_fls(cpu);
+		shift = (2*(cpu - hi) + 1) * delta/hi/2;
+	}
+	local_cpu_data->itm_next = ia64_get_itc() + delta + shift;
+	ia64_set_itm(local_cpu_data->itm_next);
+}
+
+static int nojitter;
+
+static int __init nojitter_setup(char *str)
+{
+	nojitter = 1;
+	printk("Jitter checking for ITC timers disabled\n");
+	return 1;
+}
+
+__setup("nojitter", nojitter_setup);
+
+
+void __devinit
+ia64_init_itm (void)
+{
+	unsigned long platform_base_freq, itc_freq;
+	struct pal_freq_ratio itc_ratio, proc_ratio;
+	long status, platform_base_drift, itc_drift;
+
+	/*
+	 * According to SAL v2.6, we need to use a SAL call to determine the platform base
+	 * frequency and then a PAL call to determine the frequency ratio between the ITC
+	 * and the base frequency.
+	 */
+	status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
+				    &platform_base_freq, &platform_base_drift);
+	if (status != 0) {
+		printk(KERN_ERR "SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status));
+	} else {
+		status = ia64_pal_freq_ratios(&proc_ratio, NULL, &itc_ratio);
+		if (status != 0)
+			printk(KERN_ERR "PAL_FREQ_RATIOS failed with status=%ld\n", status);
+	}
+	if (status != 0) {
+		/* invent "random" values */
+		printk(KERN_ERR
+		       "SAL/PAL failed to obtain frequency info---inventing reasonable values\n");
+		platform_base_freq = 100000000;
+		platform_base_drift = -1;	/* no drift info */
+		itc_ratio.num = 3;
+		itc_ratio.den = 1;
+	}
+	if (platform_base_freq < 40000000) {
+		printk(KERN_ERR "Platform base frequency %lu bogus---resetting to 75MHz!\n",
+		       platform_base_freq);
+		platform_base_freq = 75000000;
+		platform_base_drift = -1;
+	}
+	if (!proc_ratio.den)
+		proc_ratio.den = 1;	/* avoid division by zero */
+	if (!itc_ratio.den)
+		itc_ratio.den = 1;	/* avoid division by zero */
+
+	itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den;
+
+	local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ;
+	printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%u/%u, "
+	       "ITC freq=%lu.%03luMHz", smp_processor_id(),
+	       platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000,
+	       itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000);
+
+	if (platform_base_drift != -1) {
+		itc_drift = platform_base_drift*itc_ratio.num/itc_ratio.den;
+		printk("+/-%ldppm\n", itc_drift);
+	} else {
+		itc_drift = -1;
+		printk("\n");
+	}
+
+	local_cpu_data->proc_freq = (platform_base_freq*proc_ratio.num)/proc_ratio.den;
+	local_cpu_data->itc_freq = itc_freq;
+	local_cpu_data->cyc_per_usec = (itc_freq + USEC_PER_SEC/2) / USEC_PER_SEC;
+	local_cpu_data->nsec_per_cyc = ((NSEC_PER_SEC<<IA64_NSEC_PER_CYC_SHIFT)
+					+ itc_freq/2)/itc_freq;
+
+	if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
+#ifdef CONFIG_SMP
+		/* On IA64 in an SMP configuration ITCs are never accurately synchronized.
+		 * Jitter compensation requires a cmpxchg which may limit
+		 * the scalability of the syscalls for retrieving time.
+		 * The ITC synchronization is usually successful to within a few
+		 * ITC ticks but this is not a sure thing. If you need to improve
+		 * timer performance in SMP situations then boot the kernel with the
+		 * "nojitter" option. However, doing so may result in time fluctuating (maybe
+		 * even going backward) if the ITC offsets between the individual CPUs
+		 * are too large.
+		 */
+		if (!nojitter)
+			itc_jitter_data.itc_jitter = 1;
+#endif
+	} else
+		/*
+		 * ITC is drifty and we have not synchronized the ITCs in smpboot.c.
+		 * ITC values may fluctuate significantly between processors.
+		 * Clock should not be used for hrtimers. Mark itc as only
+		 * useful for boot and testing.
+		 *
+		 * Note that jitter compensation is off! There is no point of
+		 * synchronizing ITCs since they may be large differentials
+		 * that change over time.
+		 *
+		 * The only way to fix this would be to repeatedly sync the
+		 * ITCs. Until that time we have to avoid ITC.
+		 */
+		clocksource_itc.rating = 50;
+
+	paravirt_init_missing_ticks_accounting(smp_processor_id());
+
+	/* avoid softlock up message when cpu is unplug and plugged again. */
+	touch_softlockup_watchdog();
+
+	/* Setup the CPU local timer tick */
+	ia64_cpu_local_tick();
+
+	if (!itc_clocksource) {
+		clocksource_register_hz(&clocksource_itc,
+						local_cpu_data->itc_freq);
+		itc_clocksource = &clocksource_itc;
+	}
+}
+
+static cycle_t itc_get_cycles(struct clocksource *cs)
+{
+	unsigned long lcycle, now, ret;
+
+	if (!itc_jitter_data.itc_jitter)
+		return get_cycles();
+
+	lcycle = itc_jitter_data.itc_lastcycle;
+	now = get_cycles();
+	if (lcycle && time_after(lcycle, now))
+		return lcycle;
+
+	/*
+	 * Keep track of the last timer value returned.
+	 * In an SMP environment, you could lose out in contention of
+	 * cmpxchg. If so, your cmpxchg returns new value which the
+	 * winner of contention updated to. Use the new value instead.
+	 */
+	ret = cmpxchg(&itc_jitter_data.itc_lastcycle, lcycle, now);
+	if (unlikely(ret != lcycle))
+		return ret;
+
+	return now;
+}
+
+
+static struct irqaction timer_irqaction = {
+	.handler =	timer_interrupt,
+	.flags =	IRQF_DISABLED | IRQF_IRQPOLL,
+	.name =		"timer"
+};
+
+static struct platform_device rtc_efi_dev = {
+	.name = "rtc-efi",
+	.id = -1,
+};
+
+static int __init rtc_init(void)
+{
+	if (platform_device_register(&rtc_efi_dev) < 0)
+		printk(KERN_ERR "unable to register rtc device...\n");
+
+	/* not necessarily an error */
+	return 0;
+}
+module_init(rtc_init);
+
+void read_persistent_clock(struct timespec *ts)
+{
+	efi_gettimeofday(ts);
+}
+
+void __init
+time_init (void)
+{
+	register_percpu_irq(IA64_TIMER_VECTOR, &timer_irqaction);
+	ia64_init_itm();
+}
+
+/*
+ * Generic udelay assumes that if preemption is allowed and the thread
+ * migrates to another CPU, that the ITC values are synchronized across
+ * all CPUs.
+ */
+static void
+ia64_itc_udelay (unsigned long usecs)
+{
+	unsigned long start = ia64_get_itc();
+	unsigned long end = start + usecs*local_cpu_data->cyc_per_usec;
+
+	while (time_before(ia64_get_itc(), end))
+		cpu_relax();
+}
+
+void (*ia64_udelay)(unsigned long usecs) = &ia64_itc_udelay;
+
+void
+udelay (unsigned long usecs)
+{
+	(*ia64_udelay)(usecs);
+}
+EXPORT_SYMBOL(udelay);
+
+/* IA64 doesn't cache the timezone */
+void update_vsyscall_tz(void)
+{
+}
+
+void update_vsyscall(struct timespec *wall, struct timespec *wtm,
+			struct clocksource *c, u32 mult)
+{
+        unsigned long flags;
+
+        write_seqlock_irqsave(&fsyscall_gtod_data.lock, flags);
+
+        /* copy fsyscall clock data */
+        fsyscall_gtod_data.clk_mask = c->mask;
+        fsyscall_gtod_data.clk_mult = mult;
+        fsyscall_gtod_data.clk_shift = c->shift;
+        fsyscall_gtod_data.clk_fsys_mmio = c->fsys_mmio;
+        fsyscall_gtod_data.clk_cycle_last = c->cycle_last;
+
+	/* copy kernel time structures */
+        fsyscall_gtod_data.wall_time.tv_sec = wall->tv_sec;
+        fsyscall_gtod_data.wall_time.tv_nsec = wall->tv_nsec;
+	fsyscall_gtod_data.monotonic_time.tv_sec = wtm->tv_sec
+							+ wall->tv_sec;
+	fsyscall_gtod_data.monotonic_time.tv_nsec = wtm->tv_nsec
+							+ wall->tv_nsec;
+
+	/* normalize */
+	while (fsyscall_gtod_data.monotonic_time.tv_nsec >= NSEC_PER_SEC) {
+		fsyscall_gtod_data.monotonic_time.tv_nsec -= NSEC_PER_SEC;
+		fsyscall_gtod_data.monotonic_time.tv_sec++;
+	}
+
+        write_sequnlock_irqrestore(&fsyscall_gtod_data.lock, flags);
+}
+
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
new file mode 100644
index 00000000..0e0e0cc9
--- /dev/null
+++ b/arch/ia64/kernel/topology.c
@@ -0,0 +1,466 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * This file contains NUMA specific variables and functions which can
+ * be split away from DISCONTIGMEM and are used on NUMA machines with
+ * contiguous memory.
+ * 		2002/08/07 Erich Focht <efocht@ess.nec.de>
+ * Populate cpu entries in sysfs for non-numa systems as well
+ *  	Intel Corporation - Ashok Raj
+ * 02/27/2006 Zhang, Yanmin
+ *	Populate cpu cache entries in sysfs for cpu cache info
+ */
+
+#include <linux/cpu.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/node.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/nodemask.h>
+#include <linux/notifier.h>
+#include <asm/mmzone.h>
+#include <asm/numa.h>
+#include <asm/cpu.h>
+
+static struct ia64_cpu *sysfs_cpus;
+
+void arch_fix_phys_package_id(int num, u32 slot)
+{
+#ifdef CONFIG_SMP
+	if (cpu_data(num)->socket_id == -1)
+		cpu_data(num)->socket_id = slot;
+#endif
+}
+EXPORT_SYMBOL_GPL(arch_fix_phys_package_id);
+
+
+#ifdef CONFIG_HOTPLUG_CPU
+int __ref arch_register_cpu(int num)
+{
+#ifdef CONFIG_ACPI
+	/*
+	 * If CPEI can be re-targeted or if this is not
+	 * CPEI target, then it is hotpluggable
+	 */
+	if (can_cpei_retarget() || !is_cpu_cpei_target(num))
+		sysfs_cpus[num].cpu.hotpluggable = 1;
+	map_cpu_to_node(num, node_cpuid[num].nid);
+#endif
+	return register_cpu(&sysfs_cpus[num].cpu, num);
+}
+EXPORT_SYMBOL(arch_register_cpu);
+
+void __ref arch_unregister_cpu(int num)
+{
+	unregister_cpu(&sysfs_cpus[num].cpu);
+#ifdef CONFIG_ACPI
+	unmap_cpu_from_node(num, cpu_to_node(num));
+#endif
+}
+EXPORT_SYMBOL(arch_unregister_cpu);
+#else
+static int __init arch_register_cpu(int num)
+{
+	return register_cpu(&sysfs_cpus[num].cpu, num);
+}
+#endif /*CONFIG_HOTPLUG_CPU*/
+
+
+static int __init topology_init(void)
+{
+	int i, err = 0;
+
+#ifdef CONFIG_NUMA
+	/*
+	 * MCD - Do we want to register all ONLINE nodes, or all POSSIBLE nodes?
+	 */
+	for_each_online_node(i) {
+		if ((err = register_one_node(i)))
+			goto out;
+	}
+#endif
+
+	sysfs_cpus = kzalloc(sizeof(struct ia64_cpu) * NR_CPUS, GFP_KERNEL);
+	if (!sysfs_cpus)
+		panic("kzalloc in topology_init failed - NR_CPUS too big?");
+
+	for_each_present_cpu(i) {
+		if((err = arch_register_cpu(i)))
+			goto out;
+	}
+out:
+	return err;
+}
+
+subsys_initcall(topology_init);
+
+
+/*
+ * Export cpu cache information through sysfs
+ */
+
+/*
+ *  A bunch of string array to get pretty printing
+ */
+static const char *cache_types[] = {
+	"",			/* not used */
+	"Instruction",
+	"Data",
+	"Unified"	/* unified */
+};
+
+static const char *cache_mattrib[]={
+	"WriteThrough",
+	"WriteBack",
+	"",		/* reserved */
+	""		/* reserved */
+};
+
+struct cache_info {
+	pal_cache_config_info_t	cci;
+	cpumask_t shared_cpu_map;
+	int level;
+	int type;
+	struct kobject kobj;
+};
+
+struct cpu_cache_info {
+	struct cache_info *cache_leaves;
+	int	num_cache_leaves;
+	struct kobject kobj;
+};
+
+static struct cpu_cache_info	all_cpu_cache_info[NR_CPUS] __cpuinitdata;
+#define LEAF_KOBJECT_PTR(x,y)    (&all_cpu_cache_info[x].cache_leaves[y])
+
+#ifdef CONFIG_SMP
+static void __cpuinit cache_shared_cpu_map_setup( unsigned int cpu,
+		struct cache_info * this_leaf)
+{
+	pal_cache_shared_info_t	csi;
+	int num_shared, i = 0;
+	unsigned int j;
+
+	if (cpu_data(cpu)->threads_per_core <= 1 &&
+		cpu_data(cpu)->cores_per_socket <= 1) {
+		cpu_set(cpu, this_leaf->shared_cpu_map);
+		return;
+	}
+
+	if (ia64_pal_cache_shared_info(this_leaf->level,
+					this_leaf->type,
+					0,
+					&csi) != PAL_STATUS_SUCCESS)
+		return;
+
+	num_shared = (int) csi.num_shared;
+	do {
+		for_each_possible_cpu(j)
+			if (cpu_data(cpu)->socket_id == cpu_data(j)->socket_id
+				&& cpu_data(j)->core_id == csi.log1_cid
+				&& cpu_data(j)->thread_id == csi.log1_tid)
+				cpu_set(j, this_leaf->shared_cpu_map);
+
+		i++;
+	} while (i < num_shared &&
+		ia64_pal_cache_shared_info(this_leaf->level,
+				this_leaf->type,
+				i,
+				&csi) == PAL_STATUS_SUCCESS);
+}
+#else
+static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu,
+		struct cache_info * this_leaf)
+{
+	cpu_set(cpu, this_leaf->shared_cpu_map);
+	return;
+}
+#endif
+
+static ssize_t show_coherency_line_size(struct cache_info *this_leaf,
+					char *buf)
+{
+	return sprintf(buf, "%u\n", 1 << this_leaf->cci.pcci_line_size);
+}
+
+static ssize_t show_ways_of_associativity(struct cache_info *this_leaf,
+					char *buf)
+{
+	return sprintf(buf, "%u\n", this_leaf->cci.pcci_assoc);
+}
+
+static ssize_t show_attributes(struct cache_info *this_leaf, char *buf)
+{
+	return sprintf(buf,
+			"%s\n",
+			cache_mattrib[this_leaf->cci.pcci_cache_attr]);
+}
+
+static ssize_t show_size(struct cache_info *this_leaf, char *buf)
+{
+	return sprintf(buf, "%uK\n", this_leaf->cci.pcci_cache_size / 1024);
+}
+
+static ssize_t show_number_of_sets(struct cache_info *this_leaf, char *buf)
+{
+	unsigned number_of_sets = this_leaf->cci.pcci_cache_size;
+	number_of_sets /= this_leaf->cci.pcci_assoc;
+	number_of_sets /= 1 << this_leaf->cci.pcci_line_size;
+
+	return sprintf(buf, "%u\n", number_of_sets);
+}
+
+static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf)
+{
+	ssize_t	len;
+	cpumask_t shared_cpu_map;
+
+	cpus_and(shared_cpu_map, this_leaf->shared_cpu_map, cpu_online_map);
+	len = cpumask_scnprintf(buf, NR_CPUS+1, &shared_cpu_map);
+	len += sprintf(buf+len, "\n");
+	return len;
+}
+
+static ssize_t show_type(struct cache_info *this_leaf, char *buf)
+{
+	int type = this_leaf->type + this_leaf->cci.pcci_unified;
+	return sprintf(buf, "%s\n", cache_types[type]);
+}
+
+static ssize_t show_level(struct cache_info *this_leaf, char *buf)
+{
+	return sprintf(buf, "%u\n", this_leaf->level);
+}
+
+struct cache_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct cache_info *, char *);
+	ssize_t (*store)(struct cache_info *, const char *, size_t count);
+};
+
+#ifdef define_one_ro
+	#undef define_one_ro
+#endif
+#define define_one_ro(_name) \
+	static struct cache_attr _name = \
+__ATTR(_name, 0444, show_##_name, NULL)
+
+define_one_ro(level);
+define_one_ro(type);
+define_one_ro(coherency_line_size);
+define_one_ro(ways_of_associativity);
+define_one_ro(size);
+define_one_ro(number_of_sets);
+define_one_ro(shared_cpu_map);
+define_one_ro(attributes);
+
+static struct attribute * cache_default_attrs[] = {
+	&type.attr,
+	&level.attr,
+	&coherency_line_size.attr,
+	&ways_of_associativity.attr,
+	&attributes.attr,
+	&size.attr,
+	&number_of_sets.attr,
+	&shared_cpu_map.attr,
+	NULL
+};
+
+#define to_object(k) container_of(k, struct cache_info, kobj)
+#define to_attr(a) container_of(a, struct cache_attr, attr)
+
+static ssize_t cache_show(struct kobject * kobj, struct attribute * attr, char * buf)
+{
+	struct cache_attr *fattr = to_attr(attr);
+	struct cache_info *this_leaf = to_object(kobj);
+	ssize_t ret;
+
+	ret = fattr->show ? fattr->show(this_leaf, buf) : 0;
+	return ret;
+}
+
+static const struct sysfs_ops cache_sysfs_ops = {
+	.show   = cache_show
+};
+
+static struct kobj_type cache_ktype = {
+	.sysfs_ops	= &cache_sysfs_ops,
+	.default_attrs	= cache_default_attrs,
+};
+
+static struct kobj_type cache_ktype_percpu_entry = {
+	.sysfs_ops	= &cache_sysfs_ops,
+};
+
+static void __cpuinit cpu_cache_sysfs_exit(unsigned int cpu)
+{
+	kfree(all_cpu_cache_info[cpu].cache_leaves);
+	all_cpu_cache_info[cpu].cache_leaves = NULL;
+	all_cpu_cache_info[cpu].num_cache_leaves = 0;
+	memset(&all_cpu_cache_info[cpu].kobj, 0, sizeof(struct kobject));
+	return;
+}
+
+static int __cpuinit cpu_cache_sysfs_init(unsigned int cpu)
+{
+	unsigned long i, levels, unique_caches;
+	pal_cache_config_info_t cci;
+	int j;
+	long status;
+	struct cache_info *this_cache;
+	int num_cache_leaves = 0;
+
+	if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) {
+		printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status);
+		return -1;
+	}
+
+	this_cache=kzalloc(sizeof(struct cache_info)*unique_caches,
+			GFP_KERNEL);
+	if (this_cache == NULL)
+		return -ENOMEM;
+
+	for (i=0; i < levels; i++) {
+		for (j=2; j >0 ; j--) {
+			if ((status=ia64_pal_cache_config_info(i,j, &cci)) !=
+					PAL_STATUS_SUCCESS)
+				continue;
+
+			this_cache[num_cache_leaves].cci = cci;
+			this_cache[num_cache_leaves].level = i + 1;
+			this_cache[num_cache_leaves].type = j;
+
+			cache_shared_cpu_map_setup(cpu,
+					&this_cache[num_cache_leaves]);
+			num_cache_leaves ++;
+		}
+	}
+
+	all_cpu_cache_info[cpu].cache_leaves = this_cache;
+	all_cpu_cache_info[cpu].num_cache_leaves = num_cache_leaves;
+
+	memset(&all_cpu_cache_info[cpu].kobj, 0, sizeof(struct kobject));
+
+	return 0;
+}
+
+/* Add cache interface for CPU device */
+static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
+{
+	unsigned int cpu = sys_dev->id;
+	unsigned long i, j;
+	struct cache_info *this_object;
+	int retval = 0;
+	cpumask_t oldmask;
+
+	if (all_cpu_cache_info[cpu].kobj.parent)
+		return 0;
+
+	oldmask = current->cpus_allowed;
+	retval = set_cpus_allowed_ptr(current, cpumask_of(cpu));
+	if (unlikely(retval))
+		return retval;
+
+	retval = cpu_cache_sysfs_init(cpu);
+	set_cpus_allowed_ptr(current, &oldmask);
+	if (unlikely(retval < 0))
+		return retval;
+
+	retval = kobject_init_and_add(&all_cpu_cache_info[cpu].kobj,
+				      &cache_ktype_percpu_entry, &sys_dev->kobj,
+				      "%s", "cache");
+	if (unlikely(retval < 0)) {
+		cpu_cache_sysfs_exit(cpu);
+		return retval;
+	}
+
+	for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) {
+		this_object = LEAF_KOBJECT_PTR(cpu,i);
+		retval = kobject_init_and_add(&(this_object->kobj),
+					      &cache_ktype,
+					      &all_cpu_cache_info[cpu].kobj,
+					      "index%1lu", i);
+		if (unlikely(retval)) {
+			for (j = 0; j < i; j++) {
+				kobject_put(&(LEAF_KOBJECT_PTR(cpu,j)->kobj));
+			}
+			kobject_put(&all_cpu_cache_info[cpu].kobj);
+			cpu_cache_sysfs_exit(cpu);
+			return retval;
+		}
+		kobject_uevent(&(this_object->kobj), KOBJ_ADD);
+	}
+	kobject_uevent(&all_cpu_cache_info[cpu].kobj, KOBJ_ADD);
+	return retval;
+}
+
+/* Remove cache interface for CPU device */
+static int __cpuinit cache_remove_dev(struct sys_device * sys_dev)
+{
+	unsigned int cpu = sys_dev->id;
+	unsigned long i;
+
+	for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++)
+		kobject_put(&(LEAF_KOBJECT_PTR(cpu,i)->kobj));
+
+	if (all_cpu_cache_info[cpu].kobj.parent) {
+		kobject_put(&all_cpu_cache_info[cpu].kobj);
+		memset(&all_cpu_cache_info[cpu].kobj,
+			0,
+			sizeof(struct kobject));
+	}
+
+	cpu_cache_sysfs_exit(cpu);
+
+	return 0;
+}
+
+/*
+ * When a cpu is hot-plugged, do a check and initiate
+ * cache kobject if necessary
+ */
+static int __cpuinit cache_cpu_callback(struct notifier_block *nfb,
+		unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct sys_device *sys_dev;
+
+	sys_dev = get_cpu_sysdev(cpu);
+	switch (action) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		cache_add_dev(sys_dev);
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		cache_remove_dev(sys_dev);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata cache_cpu_notifier =
+{
+	.notifier_call = cache_cpu_callback
+};
+
+static int __init cache_sysfs_init(void)
+{
+	int i;
+
+	for_each_online_cpu(i) {
+		struct sys_device *sys_dev = get_cpu_sysdev((unsigned int)i);
+		cache_add_dev(sys_dev);
+	}
+
+	register_hotcpu_notifier(&cache_cpu_notifier);
+
+	return 0;
+}
+
+device_initcall(cache_sysfs_init);
+
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
new file mode 100644
index 00000000..fd80e700
--- /dev/null
+++ b/arch/ia64/kernel/traps.c
@@ -0,0 +1,651 @@
+/*
+ * Architecture-specific trap handling.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 05/12/00 grao <goutham.rao@intel.com> : added isr in siginfo for SIGFPE
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h>		/* For unblank_screen() */
+#include <linux/module.h>       /* for EXPORT_SYMBOL */
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/delay.h>		/* for ssleep() */
+#include <linux/kdebug.h>
+
+#include <asm/fpswa.h>
+#include <asm/intrinsics.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+
+fpswa_interface_t *fpswa_interface;
+EXPORT_SYMBOL(fpswa_interface);
+
+void __init
+trap_init (void)
+{
+	if (ia64_boot_param->fpswa)
+		/* FPSWA fixup: make the interface pointer a kernel virtual address: */
+		fpswa_interface = __va(ia64_boot_param->fpswa);
+}
+
+int
+die (const char *str, struct pt_regs *regs, long err)
+{
+	static struct {
+		spinlock_t lock;
+		u32 lock_owner;
+		int lock_owner_depth;
+	} die = {
+		.lock =	__SPIN_LOCK_UNLOCKED(die.lock),
+		.lock_owner = -1,
+		.lock_owner_depth = 0
+	};
+	static int die_counter;
+	int cpu = get_cpu();
+
+	if (die.lock_owner != cpu) {
+		console_verbose();
+		spin_lock_irq(&die.lock);
+		die.lock_owner = cpu;
+		die.lock_owner_depth = 0;
+		bust_spinlocks(1);
+	}
+	put_cpu();
+
+	if (++die.lock_owner_depth < 3) {
+		printk("%s[%d]: %s %ld [%d]\n",
+		current->comm, task_pid_nr(current), str, err, ++die_counter);
+		if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV)
+	            != NOTIFY_STOP)
+			show_regs(regs);
+		else
+			regs = NULL;
+  	} else
+		printk(KERN_ERR "Recursive die() failure, output suppressed\n");
+
+	bust_spinlocks(0);
+	die.lock_owner = -1;
+	add_taint(TAINT_DIE);
+	spin_unlock_irq(&die.lock);
+
+	if (!regs)
+		return 1;
+
+	if (panic_on_oops)
+		panic("Fatal exception");
+
+  	do_exit(SIGSEGV);
+	return 0;
+}
+
+int
+die_if_kernel (char *str, struct pt_regs *regs, long err)
+{
+	if (!user_mode(regs))
+		return die(str, regs, err);
+	return 0;
+}
+
+void
+__kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
+{
+	siginfo_t siginfo;
+	int sig, code;
+
+	/* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */
+	siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
+	siginfo.si_imm = break_num;
+	siginfo.si_flags = 0;		/* clear __ISR_VALID */
+	siginfo.si_isr = 0;
+
+	switch (break_num) {
+	      case 0: /* unknown error (used by GCC for __builtin_abort()) */
+		if (notify_die(DIE_BREAK, "break 0", regs, break_num, TRAP_BRKPT, SIGTRAP)
+			       	== NOTIFY_STOP)
+			return;
+		if (die_if_kernel("bugcheck!", regs, break_num))
+			return;
+		sig = SIGILL; code = ILL_ILLOPC;
+		break;
+
+	      case 1: /* integer divide by zero */
+		sig = SIGFPE; code = FPE_INTDIV;
+		break;
+
+	      case 2: /* integer overflow */
+		sig = SIGFPE; code = FPE_INTOVF;
+		break;
+
+	      case 3: /* range check/bounds check */
+		sig = SIGFPE; code = FPE_FLTSUB;
+		break;
+
+	      case 4: /* null pointer dereference */
+		sig = SIGSEGV; code = SEGV_MAPERR;
+		break;
+
+	      case 5: /* misaligned data */
+		sig = SIGSEGV; code = BUS_ADRALN;
+		break;
+
+	      case 6: /* decimal overflow */
+		sig = SIGFPE; code = __FPE_DECOVF;
+		break;
+
+	      case 7: /* decimal divide by zero */
+		sig = SIGFPE; code = __FPE_DECDIV;
+		break;
+
+	      case 8: /* packed decimal error */
+		sig = SIGFPE; code = __FPE_DECERR;
+		break;
+
+	      case 9: /* invalid ASCII digit */
+		sig = SIGFPE; code = __FPE_INVASC;
+		break;
+
+	      case 10: /* invalid decimal digit */
+		sig = SIGFPE; code = __FPE_INVDEC;
+		break;
+
+	      case 11: /* paragraph stack overflow */
+		sig = SIGSEGV; code = __SEGV_PSTKOVF;
+		break;
+
+	      case 0x3f000 ... 0x3ffff:	/* bundle-update in progress */
+		sig = SIGILL; code = __ILL_BNDMOD;
+		break;
+
+	      default:
+		if ((break_num < 0x40000 || break_num > 0x100000)
+		    && die_if_kernel("Bad break", regs, break_num))
+			return;
+
+		if (break_num < 0x80000) {
+			sig = SIGILL; code = __ILL_BREAK;
+		} else {
+			if (notify_die(DIE_BREAK, "bad break", regs, break_num, TRAP_BRKPT, SIGTRAP)
+					== NOTIFY_STOP)
+				return;
+			sig = SIGTRAP; code = TRAP_BRKPT;
+		}
+	}
+	siginfo.si_signo = sig;
+	siginfo.si_errno = 0;
+	siginfo.si_code = code;
+	force_sig_info(sig, &siginfo, current);
+}
+
+/*
+ * disabled_fph_fault() is called when a user-level process attempts to access f32..f127
+ * and it doesn't own the fp-high register partition.  When this happens, we save the
+ * current fph partition in the task_struct of the fpu-owner (if necessary) and then load
+ * the fp-high partition of the current task (if necessary).  Note that the kernel has
+ * access to fph by the time we get here, as the IVT's "Disabled FP-Register" handler takes
+ * care of clearing psr.dfh.
+ */
+static inline void
+disabled_fph_fault (struct pt_regs *regs)
+{
+	struct ia64_psr *psr = ia64_psr(regs);
+
+	/* first, grant user-level access to fph partition: */
+	psr->dfh = 0;
+
+	/*
+	 * Make sure that no other task gets in on this processor
+	 * while we're claiming the FPU
+	 */
+	preempt_disable();
+#ifndef CONFIG_SMP
+	{
+		struct task_struct *fpu_owner
+			= (struct task_struct *)ia64_get_kr(IA64_KR_FPU_OWNER);
+
+		if (ia64_is_local_fpu_owner(current)) {
+			preempt_enable_no_resched();
+			return;
+		}
+
+		if (fpu_owner)
+			ia64_flush_fph(fpu_owner);
+	}
+#endif /* !CONFIG_SMP */
+	ia64_set_local_fpu_owner(current);
+	if ((current->thread.flags & IA64_THREAD_FPH_VALID) != 0) {
+		__ia64_load_fpu(current->thread.fph);
+		psr->mfh = 0;
+	} else {
+		__ia64_init_fpu();
+		/*
+		 * Set mfh because the state in thread.fph does not match the state in
+		 * the fph partition.
+		 */
+		psr->mfh = 1;
+	}
+	preempt_enable_no_resched();
+}
+
+static inline int
+fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long *pr, long *ifs,
+	    struct pt_regs *regs)
+{
+	fp_state_t fp_state;
+	fpswa_ret_t ret;
+
+	if (!fpswa_interface)
+		return -1;
+
+	memset(&fp_state, 0, sizeof(fp_state_t));
+
+	/*
+	 * compute fp_state.  only FP registers f6 - f11 are used by the
+	 * kernel, so set those bits in the mask and set the low volatile
+	 * pointer to point to these registers.
+	 */
+	fp_state.bitmask_low64 = 0xfc0;  /* bit6..bit11 */
+
+	fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) &regs->f6;
+	/*
+	 * unsigned long (*EFI_FPSWA) (
+	 *      unsigned long    trap_type,
+	 *	void             *Bundle,
+	 *	unsigned long    *pipsr,
+	 *	unsigned long    *pfsr,
+	 *	unsigned long    *pisr,
+	 *	unsigned long    *ppreds,
+	 *	unsigned long    *pifs,
+	 *	void             *fp_state);
+	 */
+	ret = (*fpswa_interface->fpswa)((unsigned long) fp_fault, bundle,
+					(unsigned long *) ipsr, (unsigned long *) fpsr,
+					(unsigned long *) isr, (unsigned long *) pr,
+					(unsigned long *) ifs, &fp_state);
+
+	return ret.status;
+}
+
+struct fpu_swa_msg {
+	unsigned long count;
+	unsigned long time;
+};
+static DEFINE_PER_CPU(struct fpu_swa_msg, cpulast);
+DECLARE_PER_CPU(struct fpu_swa_msg, cpulast);
+static struct fpu_swa_msg last __cacheline_aligned;
+
+
+/*
+ * Handle floating-point assist faults and traps.
+ */
+static int
+handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
+{
+	long exception, bundle[2];
+	unsigned long fault_ip;
+	struct siginfo siginfo;
+
+	fault_ip = regs->cr_iip;
+	if (!fp_fault && (ia64_psr(regs)->ri == 0))
+		fault_ip -= 16;
+	if (copy_from_user(bundle, (void __user *) fault_ip, sizeof(bundle)))
+		return -1;
+
+	if (!(current->thread.flags & IA64_THREAD_FPEMU_NOPRINT))  {
+		unsigned long count, current_jiffies = jiffies;
+		struct fpu_swa_msg *cp = &__get_cpu_var(cpulast);
+
+		if (unlikely(current_jiffies > cp->time))
+			cp->count = 0;
+		if (unlikely(cp->count < 5)) {
+			cp->count++;
+			cp->time = current_jiffies + 5 * HZ;
+
+			/* minimize races by grabbing a copy of count BEFORE checking last.time. */
+			count = last.count;
+			barrier();
+
+			/*
+			 * Lower 4 bits are used as a count. Upper bits are a sequence
+			 * number that is updated when count is reset. The cmpxchg will
+			 * fail is seqno has changed. This minimizes mutiple cpus
+			 * resetting the count.
+			 */
+			if (current_jiffies > last.time)
+				(void) cmpxchg_acq(&last.count, count, 16 + (count & ~15));
+
+			/* used fetchadd to atomically update the count */
+			if ((last.count & 15) < 5 && (ia64_fetchadd(1, &last.count, acq) & 15) < 5) {
+				last.time = current_jiffies + 5 * HZ;
+				printk(KERN_WARNING
+		       			"%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
+		       			current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr);
+			}
+		}
+	}
+
+	exception = fp_emulate(fp_fault, bundle, &regs->cr_ipsr, &regs->ar_fpsr, &isr, &regs->pr,
+			       &regs->cr_ifs, regs);
+	if (fp_fault) {
+		if (exception == 0) {
+			/* emulation was successful */
+			ia64_increment_ip(regs);
+		} else if (exception == -1) {
+			printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n");
+			return -1;
+		} else {
+			/* is next instruction a trap? */
+			if (exception & 2) {
+				ia64_increment_ip(regs);
+			}
+			siginfo.si_signo = SIGFPE;
+			siginfo.si_errno = 0;
+			siginfo.si_code = __SI_FAULT;	/* default code */
+			siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
+			if (isr & 0x11) {
+				siginfo.si_code = FPE_FLTINV;
+			} else if (isr & 0x22) {
+				/* denormal operand gets the same si_code as underflow 
+				* see arch/i386/kernel/traps.c:math_error()  */
+				siginfo.si_code = FPE_FLTUND;
+			} else if (isr & 0x44) {
+				siginfo.si_code = FPE_FLTDIV;
+			}
+			siginfo.si_isr = isr;
+			siginfo.si_flags = __ISR_VALID;
+			siginfo.si_imm = 0;
+			force_sig_info(SIGFPE, &siginfo, current);
+		}
+	} else {
+		if (exception == -1) {
+			printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n");
+			return -1;
+		} else if (exception != 0) {
+			/* raise exception */
+			siginfo.si_signo = SIGFPE;
+			siginfo.si_errno = 0;
+			siginfo.si_code = __SI_FAULT;	/* default code */
+			siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
+			if (isr & 0x880) {
+				siginfo.si_code = FPE_FLTOVF;
+			} else if (isr & 0x1100) {
+				siginfo.si_code = FPE_FLTUND;
+			} else if (isr & 0x2200) {
+				siginfo.si_code = FPE_FLTRES;
+			}
+			siginfo.si_isr = isr;
+			siginfo.si_flags = __ISR_VALID;
+			siginfo.si_imm = 0;
+			force_sig_info(SIGFPE, &siginfo, current);
+		}
+	}
+	return 0;
+}
+
+struct illegal_op_return {
+	unsigned long fkt, arg1, arg2, arg3;
+};
+
+struct illegal_op_return
+ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3,
+		       long arg4, long arg5, long arg6, long arg7,
+		       struct pt_regs regs)
+{
+	struct illegal_op_return rv;
+	struct siginfo si;
+	char buf[128];
+
+#ifdef CONFIG_IA64_BRL_EMU
+	{
+		extern struct illegal_op_return ia64_emulate_brl (struct pt_regs *, unsigned long);
+
+		rv = ia64_emulate_brl(&regs, ec);
+		if (rv.fkt != (unsigned long) -1)
+			return rv;
+	}
+#endif
+
+	sprintf(buf, "IA-64 Illegal operation fault");
+	rv.fkt = 0;
+	if (die_if_kernel(buf, &regs, 0))
+		return rv;
+
+	memset(&si, 0, sizeof(si));
+	si.si_signo = SIGILL;
+	si.si_code = ILL_ILLOPC;
+	si.si_addr = (void __user *) (regs.cr_iip + ia64_psr(&regs)->ri);
+	force_sig_info(SIGILL, &si, current);
+	return rv;
+}
+
+void __kprobes
+ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
+	    unsigned long iim, unsigned long itir, long arg5, long arg6,
+	    long arg7, struct pt_regs regs)
+{
+	unsigned long code, error = isr, iip;
+	struct siginfo siginfo;
+	char buf[128];
+	int result, sig;
+	static const char *reason[] = {
+		"IA-64 Illegal Operation fault",
+		"IA-64 Privileged Operation fault",
+		"IA-64 Privileged Register fault",
+		"IA-64 Reserved Register/Field fault",
+		"Disabled Instruction Set Transition fault",
+		"Unknown fault 5", "Unknown fault 6", "Unknown fault 7", "Illegal Hazard fault",
+		"Unknown fault 9", "Unknown fault 10", "Unknown fault 11", "Unknown fault 12",
+		"Unknown fault 13", "Unknown fault 14", "Unknown fault 15"
+	};
+
+	if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) {
+		/*
+		 * This fault was due to lfetch.fault, set "ed" bit in the psr to cancel
+		 * the lfetch.
+		 */
+		ia64_psr(&regs)->ed = 1;
+		return;
+	}
+
+	iip = regs.cr_iip + ia64_psr(&regs)->ri;
+
+	switch (vector) {
+	      case 24: /* General Exception */
+		code = (isr >> 4) & 0xf;
+		sprintf(buf, "General Exception: %s%s", reason[code],
+			(code == 3) ? ((isr & (1UL << 37))
+				       ? " (RSE access)" : " (data access)") : "");
+		if (code == 8) {
+# ifdef CONFIG_IA64_PRINT_HAZARDS
+			printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n",
+			       current->comm, task_pid_nr(current),
+			       regs.cr_iip + ia64_psr(&regs)->ri, regs.pr);
+# endif
+			return;
+		}
+		break;
+
+	      case 25: /* Disabled FP-Register */
+		if (isr & 2) {
+			disabled_fph_fault(&regs);
+			return;
+		}
+		sprintf(buf, "Disabled FPL fault---not supposed to happen!");
+		break;
+
+	      case 26: /* NaT Consumption */
+		if (user_mode(&regs)) {
+			void __user *addr;
+
+			if (((isr >> 4) & 0xf) == 2) {
+				/* NaT page consumption */
+				sig = SIGSEGV;
+				code = SEGV_ACCERR;
+				addr = (void __user *) ifa;
+			} else {
+				/* register NaT consumption */
+				sig = SIGILL;
+				code = ILL_ILLOPN;
+				addr = (void __user *) (regs.cr_iip
+							+ ia64_psr(&regs)->ri);
+			}
+			siginfo.si_signo = sig;
+			siginfo.si_code = code;
+			siginfo.si_errno = 0;
+			siginfo.si_addr = addr;
+			siginfo.si_imm = vector;
+			siginfo.si_flags = __ISR_VALID;
+			siginfo.si_isr = isr;
+			force_sig_info(sig, &siginfo, current);
+			return;
+		} else if (ia64_done_with_exception(&regs))
+			return;
+		sprintf(buf, "NaT consumption");
+		break;
+
+	      case 31: /* Unsupported Data Reference */
+		if (user_mode(&regs)) {
+			siginfo.si_signo = SIGILL;
+			siginfo.si_code = ILL_ILLOPN;
+			siginfo.si_errno = 0;
+			siginfo.si_addr = (void __user *) iip;
+			siginfo.si_imm = vector;
+			siginfo.si_flags = __ISR_VALID;
+			siginfo.si_isr = isr;
+			force_sig_info(SIGILL, &siginfo, current);
+			return;
+		}
+		sprintf(buf, "Unsupported data reference");
+		break;
+
+	      case 29: /* Debug */
+	      case 35: /* Taken Branch Trap */
+	      case 36: /* Single Step Trap */
+		if (fsys_mode(current, &regs)) {
+			extern char __kernel_syscall_via_break[];
+			/*
+			 * Got a trap in fsys-mode: Taken Branch Trap
+			 * and Single Step trap need special handling;
+			 * Debug trap is ignored (we disable it here
+			 * and re-enable it in the lower-privilege trap).
+			 */
+			if (unlikely(vector == 29)) {
+				set_thread_flag(TIF_DB_DISABLED);
+				ia64_psr(&regs)->db = 0;
+				ia64_psr(&regs)->lp = 1;
+				return;
+			}
+			/* re-do the system call via break 0x100000: */
+			regs.cr_iip = (unsigned long) __kernel_syscall_via_break;
+			ia64_psr(&regs)->ri = 0;
+			ia64_psr(&regs)->cpl = 3;
+			return;
+		}
+		switch (vector) {
+		      case 29:
+			siginfo.si_code = TRAP_HWBKPT;
+#ifdef CONFIG_ITANIUM
+			/*
+			 * Erratum 10 (IFA may contain incorrect address) now has
+			 * "NoFix" status.  There are no plans for fixing this.
+			 */
+			if (ia64_psr(&regs)->is == 0)
+			  ifa = regs.cr_iip;
+#endif
+			break;
+		      case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break;
+		      case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break;
+		}
+		if (notify_die(DIE_FAULT, "ia64_fault", &regs, vector, siginfo.si_code, SIGTRAP)
+			       	== NOTIFY_STOP)
+			return;
+		siginfo.si_signo = SIGTRAP;
+		siginfo.si_errno = 0;
+		siginfo.si_addr  = (void __user *) ifa;
+		siginfo.si_imm   = 0;
+		siginfo.si_flags = __ISR_VALID;
+		siginfo.si_isr   = isr;
+		force_sig_info(SIGTRAP, &siginfo, current);
+		return;
+
+	      case 32: /* fp fault */
+	      case 33: /* fp trap */
+		result = handle_fpu_swa((vector == 32) ? 1 : 0, &regs, isr);
+		if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) {
+			siginfo.si_signo = SIGFPE;
+			siginfo.si_errno = 0;
+			siginfo.si_code = FPE_FLTINV;
+			siginfo.si_addr = (void __user *) iip;
+			siginfo.si_flags = __ISR_VALID;
+			siginfo.si_isr = isr;
+			siginfo.si_imm = 0;
+			force_sig_info(SIGFPE, &siginfo, current);
+		}
+		return;
+
+	      case 34:
+		if (isr & 0x2) {
+			/* Lower-Privilege Transfer Trap */
+
+			/* If we disabled debug traps during an fsyscall,
+			 * re-enable them here.
+			 */
+			if (test_thread_flag(TIF_DB_DISABLED)) {
+				clear_thread_flag(TIF_DB_DISABLED);
+				ia64_psr(&regs)->db = 1;
+			}
+
+			/*
+			 * Just clear PSR.lp and then return immediately:
+			 * all the interesting work (e.g., signal delivery)
+			 * is done in the kernel exit path.
+			 */
+			ia64_psr(&regs)->lp = 0;
+			return;
+		} else {
+			/* Unimplemented Instr. Address Trap */
+			if (user_mode(&regs)) {
+				siginfo.si_signo = SIGILL;
+				siginfo.si_code = ILL_BADIADDR;
+				siginfo.si_errno = 0;
+				siginfo.si_flags = 0;
+				siginfo.si_isr = 0;
+				siginfo.si_imm = 0;
+				siginfo.si_addr = (void __user *) iip;
+				force_sig_info(SIGILL, &siginfo, current);
+				return;
+			}
+			sprintf(buf, "Unimplemented Instruction Address fault");
+		}
+		break;
+
+	      case 45:
+		printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n");
+		printk(KERN_ERR "  iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n",
+		       iip, ifa, isr);
+		force_sig(SIGSEGV, current);
+		break;
+
+	      case 46:
+		printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n");
+		printk(KERN_ERR "  iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n",
+		       iip, ifa, isr, iim);
+		force_sig(SIGSEGV, current);
+		return;
+
+	      case 47:
+		sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16);
+		break;
+
+	      default:
+		sprintf(buf, "Fault %lu", vector);
+		break;
+	}
+	if (!die_if_kernel(buf, &regs, error))
+		force_sig(SIGILL, current);
+}
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
new file mode 100644
index 00000000..622772b7
--- /dev/null
+++ b/arch/ia64/kernel/unaligned.c
@@ -0,0 +1,1542 @@
+/*
+ * Architecture-specific unaligned trap handling.
+ *
+ * Copyright (C) 1999-2002, 2004 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 2002/12/09   Fix rotating register handling (off-by-1 error, missing fr-rotation).  Fix
+ *		get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame
+ *		stacked register returns an undefined value; it does NOT trigger a
+ *		"rsvd register fault").
+ * 2001/10/11	Fix unaligned access to rotating registers in s/w pipelined loops.
+ * 2001/08/13	Correct size of extended floats (float_fsz) from 16 to 10 bytes.
+ * 2001/01/17	Add support emulation of unaligned kernel accesses.
+ */
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/tty.h>
+#include <linux/ratelimit.h>
+
+#include <asm/intrinsics.h>
+#include <asm/processor.h>
+#include <asm/rse.h>
+#include <asm/uaccess.h>
+#include <asm/unaligned.h>
+
+extern int die_if_kernel(char *str, struct pt_regs *regs, long err);
+
+#undef DEBUG_UNALIGNED_TRAP
+
+#ifdef DEBUG_UNALIGNED_TRAP
+# define DPRINT(a...)	do { printk("%s %u: ", __func__, __LINE__); printk (a); } while (0)
+# define DDUMP(str,vp,len)	dump(str, vp, len)
+
+static void
+dump (const char *str, void *vp, size_t len)
+{
+	unsigned char *cp = vp;
+	int i;
+
+	printk("%s", str);
+	for (i = 0; i < len; ++i)
+		printk (" %02x", *cp++);
+	printk("\n");
+}
+#else
+# define DPRINT(a...)
+# define DDUMP(str,vp,len)
+#endif
+
+#define IA64_FIRST_STACKED_GR	32
+#define IA64_FIRST_ROTATING_FR	32
+#define SIGN_EXT9		0xffffffffffffff00ul
+
+/*
+ *  sysctl settable hook which tells the kernel whether to honor the
+ *  IA64_THREAD_UAC_NOPRINT prctl.  Because this is user settable, we want
+ *  to allow the super user to enable/disable this for security reasons
+ *  (i.e. don't allow attacker to fill up logs with unaligned accesses).
+ */
+int no_unaligned_warning;
+int unaligned_dump_stack;
+
+/*
+ * For M-unit:
+ *
+ *  opcode |   m  |   x6    |
+ * --------|------|---------|
+ * [40-37] | [36] | [35:30] |
+ * --------|------|---------|
+ *     4   |   1  |    6    | = 11 bits
+ * --------------------------
+ * However bits [31:30] are not directly useful to distinguish between
+ * load/store so we can use [35:32] instead, which gives the following
+ * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
+ * checking the m-bit until later in the load/store emulation.
+ */
+#define IA64_OPCODE_MASK	0x1ef
+#define IA64_OPCODE_SHIFT	32
+
+/*
+ * Table C-28 Integer Load/Store
+ *
+ * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
+ *
+ * ld8.fill, st8.fill  MUST be aligned because the RNATs are based on
+ * the address (bits [8:3]), so we must failed.
+ */
+#define LD_OP            0x080
+#define LDS_OP           0x081
+#define LDA_OP           0x082
+#define LDSA_OP          0x083
+#define LDBIAS_OP        0x084
+#define LDACQ_OP         0x085
+/* 0x086, 0x087 are not relevant */
+#define LDCCLR_OP        0x088
+#define LDCNC_OP         0x089
+#define LDCCLRACQ_OP     0x08a
+#define ST_OP            0x08c
+#define STREL_OP         0x08d
+/* 0x08e,0x8f are not relevant */
+
+/*
+ * Table C-29 Integer Load +Reg
+ *
+ * we use the ld->m (bit [36:36]) field to determine whether or not we have
+ * a load/store of this form.
+ */
+
+/*
+ * Table C-30 Integer Load/Store +Imm
+ *
+ * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
+ *
+ * ld8.fill, st8.fill  must be aligned because the Nat register are based on
+ * the address, so we must fail and the program must be fixed.
+ */
+#define LD_IMM_OP            0x0a0
+#define LDS_IMM_OP           0x0a1
+#define LDA_IMM_OP           0x0a2
+#define LDSA_IMM_OP          0x0a3
+#define LDBIAS_IMM_OP        0x0a4
+#define LDACQ_IMM_OP         0x0a5
+/* 0x0a6, 0xa7 are not relevant */
+#define LDCCLR_IMM_OP        0x0a8
+#define LDCNC_IMM_OP         0x0a9
+#define LDCCLRACQ_IMM_OP     0x0aa
+#define ST_IMM_OP            0x0ac
+#define STREL_IMM_OP         0x0ad
+/* 0x0ae,0xaf are not relevant */
+
+/*
+ * Table C-32 Floating-point Load/Store
+ */
+#define LDF_OP           0x0c0
+#define LDFS_OP          0x0c1
+#define LDFA_OP          0x0c2
+#define LDFSA_OP         0x0c3
+/* 0x0c6 is irrelevant */
+#define LDFCCLR_OP       0x0c8
+#define LDFCNC_OP        0x0c9
+/* 0x0cb is irrelevant  */
+#define STF_OP           0x0cc
+
+/*
+ * Table C-33 Floating-point Load +Reg
+ *
+ * we use the ld->m (bit [36:36]) field to determine whether or not we have
+ * a load/store of this form.
+ */
+
+/*
+ * Table C-34 Floating-point Load/Store +Imm
+ */
+#define LDF_IMM_OP       0x0e0
+#define LDFS_IMM_OP      0x0e1
+#define LDFA_IMM_OP      0x0e2
+#define LDFSA_IMM_OP     0x0e3
+/* 0x0e6 is irrelevant */
+#define LDFCCLR_IMM_OP   0x0e8
+#define LDFCNC_IMM_OP    0x0e9
+#define STF_IMM_OP       0x0ec
+
+typedef struct {
+	unsigned long	 qp:6;	/* [0:5]   */
+	unsigned long    r1:7;	/* [6:12]  */
+	unsigned long   imm:7;	/* [13:19] */
+	unsigned long    r3:7;	/* [20:26] */
+	unsigned long     x:1;  /* [27:27] */
+	unsigned long  hint:2;	/* [28:29] */
+	unsigned long x6_sz:2;	/* [30:31] */
+	unsigned long x6_op:4;	/* [32:35], x6 = x6_sz|x6_op */
+	unsigned long     m:1;	/* [36:36] */
+	unsigned long    op:4;	/* [37:40] */
+	unsigned long   pad:23; /* [41:63] */
+} load_store_t;
+
+
+typedef enum {
+	UPD_IMMEDIATE,	/* ldXZ r1=[r3],imm(9) */
+	UPD_REG		/* ldXZ r1=[r3],r2     */
+} update_t;
+
+/*
+ * We use tables to keep track of the offsets of registers in the saved state.
+ * This way we save having big switch/case statements.
+ *
+ * We use bit 0 to indicate switch_stack or pt_regs.
+ * The offset is simply shifted by 1 bit.
+ * A 2-byte value should be enough to hold any kind of offset
+ *
+ * In case the calling convention changes (and thus pt_regs/switch_stack)
+ * simply use RSW instead of RPT or vice-versa.
+ */
+
+#define RPO(x)	((size_t) &((struct pt_regs *)0)->x)
+#define RSO(x)	((size_t) &((struct switch_stack *)0)->x)
+
+#define RPT(x)		(RPO(x) << 1)
+#define RSW(x)		(1| RSO(x)<<1)
+
+#define GR_OFFS(x)	(gr_info[x]>>1)
+#define GR_IN_SW(x)	(gr_info[x] & 0x1)
+
+#define FR_OFFS(x)	(fr_info[x]>>1)
+#define FR_IN_SW(x)	(fr_info[x] & 0x1)
+
+static u16 gr_info[32]={
+	0,			/* r0 is read-only : WE SHOULD NEVER GET THIS */
+
+	RPT(r1), RPT(r2), RPT(r3),
+
+	RSW(r4), RSW(r5), RSW(r6), RSW(r7),
+
+	RPT(r8), RPT(r9), RPT(r10), RPT(r11),
+	RPT(r12), RPT(r13), RPT(r14), RPT(r15),
+
+	RPT(r16), RPT(r17), RPT(r18), RPT(r19),
+	RPT(r20), RPT(r21), RPT(r22), RPT(r23),
+	RPT(r24), RPT(r25), RPT(r26), RPT(r27),
+	RPT(r28), RPT(r29), RPT(r30), RPT(r31)
+};
+
+static u16 fr_info[32]={
+	0,			/* constant : WE SHOULD NEVER GET THIS */
+	0,			/* constant : WE SHOULD NEVER GET THIS */
+
+	RSW(f2), RSW(f3), RSW(f4), RSW(f5),
+
+	RPT(f6), RPT(f7), RPT(f8), RPT(f9),
+	RPT(f10), RPT(f11),
+
+	RSW(f12), RSW(f13), RSW(f14),
+	RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
+	RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
+	RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
+	RSW(f30), RSW(f31)
+};
+
+/* Invalidate ALAT entry for integer register REGNO.  */
+static void
+invala_gr (int regno)
+{
+#	define F(reg)	case reg: ia64_invala_gr(reg); break
+
+	switch (regno) {
+		F(  0); F(  1); F(  2); F(  3); F(  4); F(  5); F(  6); F(  7);
+		F(  8); F(  9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
+		F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
+		F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
+		F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
+		F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
+		F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
+		F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
+		F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
+		F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
+		F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
+		F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
+		F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
+		F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
+		F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
+		F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
+	}
+#	undef F
+}
+
+/* Invalidate ALAT entry for floating-point register REGNO.  */
+static void
+invala_fr (int regno)
+{
+#	define F(reg)	case reg: ia64_invala_fr(reg); break
+
+	switch (regno) {
+		F(  0); F(  1); F(  2); F(  3); F(  4); F(  5); F(  6); F(  7);
+		F(  8); F(  9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
+		F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
+		F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
+		F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
+		F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
+		F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
+		F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
+		F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
+		F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
+		F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
+		F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
+		F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
+		F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
+		F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
+		F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
+	}
+#	undef F
+}
+
+static inline unsigned long
+rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg)
+{
+	reg += rrb;
+	if (reg >= sor)
+		reg -= sor;
+	return reg;
+}
+
+static void
+set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
+{
+	struct switch_stack *sw = (struct switch_stack *) regs - 1;
+	unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
+	unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
+	unsigned long rnats, nat_mask;
+	unsigned long on_kbs;
+	long sof = (regs->cr_ifs) & 0x7f;
+	long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
+	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
+	long ridx = r1 - 32;
+
+	if (ridx >= sof) {
+		/* this should never happen, as the "rsvd register fault" has higher priority */
+		DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
+		return;
+	}
+
+	if (ridx < sor)
+		ridx = rotate_reg(sor, rrb_gr, ridx);
+
+	DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
+	       r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
+
+	on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
+	addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
+	if (addr >= kbs) {
+		/* the register is on the kernel backing store: easy... */
+		rnat_addr = ia64_rse_rnat_addr(addr);
+		if ((unsigned long) rnat_addr >= sw->ar_bspstore)
+			rnat_addr = &sw->ar_rnat;
+		nat_mask = 1UL << ia64_rse_slot_num(addr);
+
+		*addr = val;
+		if (nat)
+			*rnat_addr |=  nat_mask;
+		else
+			*rnat_addr &= ~nat_mask;
+		return;
+	}
+
+	if (!user_stack(current, regs)) {
+		DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1);
+		return;
+	}
+
+	bspstore = (unsigned long *)regs->ar_bspstore;
+	ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
+	bsp     = ia64_rse_skip_regs(ubs_end, -sof);
+	addr    = ia64_rse_skip_regs(bsp, ridx);
+
+	DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
+
+	ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
+
+	rnat_addr = ia64_rse_rnat_addr(addr);
+
+	ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
+	DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n",
+	       (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1);
+
+	nat_mask = 1UL << ia64_rse_slot_num(addr);
+	if (nat)
+		rnats |=  nat_mask;
+	else
+		rnats &= ~nat_mask;
+	ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats);
+
+	DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats);
+}
+
+
+static void
+get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat)
+{
+	struct switch_stack *sw = (struct switch_stack *) regs - 1;
+	unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
+	unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
+	unsigned long rnats, nat_mask;
+	unsigned long on_kbs;
+	long sof = (regs->cr_ifs) & 0x7f;
+	long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
+	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
+	long ridx = r1 - 32;
+
+	if (ridx >= sof) {
+		/* read of out-of-frame register returns an undefined value; 0 in our case.  */
+		DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
+		goto fail;
+	}
+
+	if (ridx < sor)
+		ridx = rotate_reg(sor, rrb_gr, ridx);
+
+	DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
+	       r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
+
+	on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
+	addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
+	if (addr >= kbs) {
+		/* the register is on the kernel backing store: easy... */
+		*val = *addr;
+		if (nat) {
+			rnat_addr = ia64_rse_rnat_addr(addr);
+			if ((unsigned long) rnat_addr >= sw->ar_bspstore)
+				rnat_addr = &sw->ar_rnat;
+			nat_mask = 1UL << ia64_rse_slot_num(addr);
+			*nat = (*rnat_addr & nat_mask) != 0;
+		}
+		return;
+	}
+
+	if (!user_stack(current, regs)) {
+		DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
+		goto fail;
+	}
+
+	bspstore = (unsigned long *)regs->ar_bspstore;
+	ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
+	bsp     = ia64_rse_skip_regs(ubs_end, -sof);
+	addr    = ia64_rse_skip_regs(bsp, ridx);
+
+	DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
+
+	ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
+
+	if (nat) {
+		rnat_addr = ia64_rse_rnat_addr(addr);
+		nat_mask = 1UL << ia64_rse_slot_num(addr);
+
+		DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats);
+
+		ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
+		*nat = (rnats & nat_mask) != 0;
+	}
+	return;
+
+  fail:
+	*val = 0;
+	if (nat)
+		*nat = 0;
+	return;
+}
+
+
+static void
+setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
+{
+	struct switch_stack *sw = (struct switch_stack *) regs - 1;
+	unsigned long addr;
+	unsigned long bitmask;
+	unsigned long *unat;
+
+	/*
+	 * First takes care of stacked registers
+	 */
+	if (regnum >= IA64_FIRST_STACKED_GR) {
+		set_rse_reg(regs, regnum, val, nat);
+		return;
+	}
+
+	/*
+	 * Using r0 as a target raises a General Exception fault which has higher priority
+	 * than the Unaligned Reference fault.
+	 */
+
+	/*
+	 * Now look at registers in [0-31] range and init correct UNAT
+	 */
+	if (GR_IN_SW(regnum)) {
+		addr = (unsigned long)sw;
+		unat = &sw->ar_unat;
+	} else {
+		addr = (unsigned long)regs;
+		unat = &sw->caller_unat;
+	}
+	DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
+	       addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
+	/*
+	 * add offset from base of struct
+	 * and do it !
+	 */
+	addr += GR_OFFS(regnum);
+
+	*(unsigned long *)addr = val;
+
+	/*
+	 * We need to clear the corresponding UNAT bit to fully emulate the load
+	 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
+	 */
+	bitmask   = 1UL << (addr >> 3 & 0x3f);
+	DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat);
+	if (nat) {
+		*unat |= bitmask;
+	} else {
+		*unat &= ~bitmask;
+	}
+	DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
+}
+
+/*
+ * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
+ * range from 32-127, result is in the range from 0-95.
+ */
+static inline unsigned long
+fph_index (struct pt_regs *regs, long regnum)
+{
+	unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
+	return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
+}
+
+static void
+setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
+{
+	struct switch_stack *sw = (struct switch_stack *)regs - 1;
+	unsigned long addr;
+
+	/*
+	 * From EAS-2.5: FPDisableFault has higher priority than Unaligned
+	 * Fault. Thus, when we get here, we know the partition is enabled.
+	 * To update f32-f127, there are three choices:
+	 *
+	 *	(1) save f32-f127 to thread.fph and update the values there
+	 *	(2) use a gigantic switch statement to directly access the registers
+	 *	(3) generate code on the fly to update the desired register
+	 *
+	 * For now, we are using approach (1).
+	 */
+	if (regnum >= IA64_FIRST_ROTATING_FR) {
+		ia64_sync_fph(current);
+		current->thread.fph[fph_index(regs, regnum)] = *fpval;
+	} else {
+		/*
+		 * pt_regs or switch_stack ?
+		 */
+		if (FR_IN_SW(regnum)) {
+			addr = (unsigned long)sw;
+		} else {
+			addr = (unsigned long)regs;
+		}
+
+		DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));
+
+		addr += FR_OFFS(regnum);
+		*(struct ia64_fpreg *)addr = *fpval;
+
+		/*
+		 * mark the low partition as being used now
+		 *
+		 * It is highly unlikely that this bit is not already set, but
+		 * let's do it for safety.
+		 */
+		regs->cr_ipsr |= IA64_PSR_MFL;
+	}
+}
+
+/*
+ * Those 2 inline functions generate the spilled versions of the constant floating point
+ * registers which can be used with stfX
+ */
+static inline void
+float_spill_f0 (struct ia64_fpreg *final)
+{
+	ia64_stf_spill(final, 0);
+}
+
+static inline void
+float_spill_f1 (struct ia64_fpreg *final)
+{
+	ia64_stf_spill(final, 1);
+}
+
+static void
+getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
+{
+	struct switch_stack *sw = (struct switch_stack *) regs - 1;
+	unsigned long addr;
+
+	/*
+	 * From EAS-2.5: FPDisableFault has higher priority than
+	 * Unaligned Fault. Thus, when we get here, we know the partition is
+	 * enabled.
+	 *
+	 * When regnum > 31, the register is still live and we need to force a save
+	 * to current->thread.fph to get access to it.  See discussion in setfpreg()
+	 * for reasons and other ways of doing this.
+	 */
+	if (regnum >= IA64_FIRST_ROTATING_FR) {
+		ia64_flush_fph(current);
+		*fpval = current->thread.fph[fph_index(regs, regnum)];
+	} else {
+		/*
+		 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
+		 * not saved, we must generate their spilled form on the fly
+		 */
+		switch(regnum) {
+		case 0:
+			float_spill_f0(fpval);
+			break;
+		case 1:
+			float_spill_f1(fpval);
+			break;
+		default:
+			/*
+			 * pt_regs or switch_stack ?
+			 */
+			addr =  FR_IN_SW(regnum) ? (unsigned long)sw
+						 : (unsigned long)regs;
+
+			DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",
+			       FR_IN_SW(regnum), addr, FR_OFFS(regnum));
+
+			addr  += FR_OFFS(regnum);
+			*fpval = *(struct ia64_fpreg *)addr;
+		}
+	}
+}
+
+
+static void
+getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
+{
+	struct switch_stack *sw = (struct switch_stack *) regs - 1;
+	unsigned long addr, *unat;
+
+	if (regnum >= IA64_FIRST_STACKED_GR) {
+		get_rse_reg(regs, regnum, val, nat);
+		return;
+	}
+
+	/*
+	 * take care of r0 (read-only always evaluate to 0)
+	 */
+	if (regnum == 0) {
+		*val = 0;
+		if (nat)
+			*nat = 0;
+		return;
+	}
+
+	/*
+	 * Now look at registers in [0-31] range and init correct UNAT
+	 */
+	if (GR_IN_SW(regnum)) {
+		addr = (unsigned long)sw;
+		unat = &sw->ar_unat;
+	} else {
+		addr = (unsigned long)regs;
+		unat = &sw->caller_unat;
+	}
+
+	DPRINT("addr_base=%lx offset=0x%x\n", addr,  GR_OFFS(regnum));
+
+	addr += GR_OFFS(regnum);
+
+	*val  = *(unsigned long *)addr;
+
+	/*
+	 * do it only when requested
+	 */
+	if (nat)
+		*nat  = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;
+}
+
+static void
+emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa)
+{
+	/*
+	 * IMPORTANT:
+	 * Given the way we handle unaligned speculative loads, we should
+	 * not get to this point in the code but we keep this sanity check,
+	 * just in case.
+	 */
+	if (ld.x6_op == 1 || ld.x6_op == 3) {
+		printk(KERN_ERR "%s: register update on speculative load, error\n", __func__);
+		if (die_if_kernel("unaligned reference on speculative load with register update\n",
+				  regs, 30))
+			return;
+	}
+
+
+	/*
+	 * at this point, we know that the base register to update is valid i.e.,
+	 * it's not r0
+	 */
+	if (type == UPD_IMMEDIATE) {
+		unsigned long imm;
+
+		/*
+		 * Load +Imm: ldXZ r1=[r3],imm(9)
+		 *
+		 *
+		 * form imm9: [13:19] contain the first 7 bits
+		 */
+		imm = ld.x << 7 | ld.imm;
+
+		/*
+		 * sign extend (1+8bits) if m set
+		 */
+		if (ld.m) imm |= SIGN_EXT9;
+
+		/*
+		 * ifa == r3 and we know that the NaT bit on r3 was clear so
+		 * we can directly use ifa.
+		 */
+		ifa += imm;
+
+		setreg(ld.r3, ifa, 0, regs);
+
+		DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);
+
+	} else if (ld.m) {
+		unsigned long r2;
+		int nat_r2;
+
+		/*
+		 * Load +Reg Opcode: ldXZ r1=[r3],r2
+		 *
+		 * Note: that we update r3 even in the case of ldfX.a
+		 * (where the load does not happen)
+		 *
+		 * The way the load algorithm works, we know that r3 does not
+		 * have its NaT bit set (would have gotten NaT consumption
+		 * before getting the unaligned fault). So we can use ifa
+		 * which equals r3 at this point.
+		 *
+		 * IMPORTANT:
+		 * The above statement holds ONLY because we know that we
+		 * never reach this code when trying to do a ldX.s.
+		 * If we ever make it to here on an ldfX.s then
+		 */
+		getreg(ld.imm, &r2, &nat_r2, regs);
+
+		ifa += r2;
+
+		/*
+		 * propagate Nat r2 -> r3
+		 */
+		setreg(ld.r3, ifa, nat_r2, regs);
+
+		DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);
+	}
+}
+
+
+static int
+emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
+{
+	unsigned int len = 1 << ld.x6_sz;
+	unsigned long val = 0;
+
+	/*
+	 * r0, as target, doesn't need to be checked because Illegal Instruction
+	 * faults have higher priority than unaligned faults.
+	 *
+	 * r0 cannot be found as the base as it would never generate an
+	 * unaligned reference.
+	 */
+
+	/*
+	 * ldX.a we will emulate load and also invalidate the ALAT entry.
+	 * See comment below for explanation on how we handle ldX.a
+	 */
+
+	if (len != 2 && len != 4 && len != 8) {
+		DPRINT("unknown size: x6=%d\n", ld.x6_sz);
+		return -1;
+	}
+	/* this assumes little-endian byte-order: */
+	if (copy_from_user(&val, (void __user *) ifa, len))
+		return -1;
+	setreg(ld.r1, val, 0, regs);
+
+	/*
+	 * check for updates on any kind of loads
+	 */
+	if (ld.op == 0x5 || ld.m)
+		emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
+
+	/*
+	 * handling of various loads (based on EAS2.4):
+	 *
+	 * ldX.acq (ordered load):
+	 *	- acquire semantics would have been used, so force fence instead.
+	 *
+	 * ldX.c.clr (check load and clear):
+	 *	- if we get to this handler, it's because the entry was not in the ALAT.
+	 *	  Therefore the operation reverts to a normal load
+	 *
+	 * ldX.c.nc (check load no clear):
+	 *	- same as previous one
+	 *
+	 * ldX.c.clr.acq (ordered check load and clear):
+	 *	- same as above for c.clr part. The load needs to have acquire semantics. So
+	 *	  we use the fence semantics which is stronger and thus ensures correctness.
+	 *
+	 * ldX.a (advanced load):
+	 *	- suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
+	 *	  address doesn't match requested size alignment. This means that we would
+	 *	  possibly need more than one load to get the result.
+	 *
+	 *	  The load part can be handled just like a normal load, however the difficult
+	 *	  part is to get the right thing into the ALAT. The critical piece of information
+	 *	  in the base address of the load & size. To do that, a ld.a must be executed,
+	 *	  clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
+	 *	  if we use the same target register, we will be okay for the check.a instruction.
+	 *	  If we look at the store, basically a stX [r3]=r1 checks the ALAT  for any entry
+	 *	  which would overlap within [r3,r3+X] (the size of the load was store in the
+	 *	  ALAT). If such an entry is found the entry is invalidated. But this is not good
+	 *	  enough, take the following example:
+	 *		r3=3
+	 *		ld4.a r1=[r3]
+	 *
+	 *	  Could be emulated by doing:
+	 *		ld1.a r1=[r3],1
+	 *		store to temporary;
+	 *		ld1.a r1=[r3],1
+	 *		store & shift to temporary;
+	 *		ld1.a r1=[r3],1
+	 *		store & shift to temporary;
+	 *		ld1.a r1=[r3]
+	 *		store & shift to temporary;
+	 *		r1=temporary
+	 *
+	 *	  So in this case, you would get the right value is r1 but the wrong info in
+	 *	  the ALAT.  Notice that you could do it in reverse to finish with address 3
+	 *	  but you would still get the size wrong.  To get the size right, one needs to
+	 *	  execute exactly the same kind of load. You could do it from a aligned
+	 *	  temporary location, but you would get the address wrong.
+	 *
+	 *	  So no matter what, it is not possible to emulate an advanced load
+	 *	  correctly. But is that really critical ?
+	 *
+	 *	  We will always convert ld.a into a normal load with ALAT invalidated.  This
+	 *	  will enable compiler to do optimization where certain code path after ld.a
+	 *	  is not required to have ld.c/chk.a, e.g., code path with no intervening stores.
+	 *
+	 *	  If there is a store after the advanced load, one must either do a ld.c.* or
+	 *	  chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
+	 *	  entry found in ALAT), and that's perfectly ok because:
+	 *
+	 *		- ld.c.*, if the entry is not present a  normal load is executed
+	 *		- chk.a.*, if the entry is not present, execution jumps to recovery code
+	 *
+	 *	  In either case, the load can be potentially retried in another form.
+	 *
+	 *	  ALAT must be invalidated for the register (so that chk.a or ld.c don't pick
+	 *	  up a stale entry later). The register base update MUST also be performed.
+	 */
+
+	/*
+	 * when the load has the .acq completer then
+	 * use ordering fence.
+	 */
+	if (ld.x6_op == 0x5 || ld.x6_op == 0xa)
+		mb();
+
+	/*
+	 * invalidate ALAT entry in case of advanced load
+	 */
+	if (ld.x6_op == 0x2)
+		invala_gr(ld.r1);
+
+	return 0;
+}
+
+static int
+emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
+{
+	unsigned long r2;
+	unsigned int len = 1 << ld.x6_sz;
+
+	/*
+	 * if we get to this handler, Nat bits on both r3 and r2 have already
+	 * been checked. so we don't need to do it
+	 *
+	 * extract the value to be stored
+	 */
+	getreg(ld.imm, &r2, NULL, regs);
+
+	/*
+	 * we rely on the macros in unaligned.h for now i.e.,
+	 * we let the compiler figure out how to read memory gracefully.
+	 *
+	 * We need this switch/case because the way the inline function
+	 * works. The code is optimized by the compiler and looks like
+	 * a single switch/case.
+	 */
+	DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);
+
+	if (len != 2 && len != 4 && len != 8) {
+		DPRINT("unknown size: x6=%d\n", ld.x6_sz);
+		return -1;
+	}
+
+	/* this assumes little-endian byte-order: */
+	if (copy_to_user((void __user *) ifa, &r2, len))
+		return -1;
+
+	/*
+	 * stX [r3]=r2,imm(9)
+	 *
+	 * NOTE:
+	 * ld.r3 can never be r0, because r0 would not generate an
+	 * unaligned access.
+	 */
+	if (ld.op == 0x5) {
+		unsigned long imm;
+
+		/*
+		 * form imm9: [12:6] contain first 7bits
+		 */
+		imm = ld.x << 7 | ld.r1;
+		/*
+		 * sign extend (8bits) if m set
+		 */
+		if (ld.m) imm |= SIGN_EXT9;
+		/*
+		 * ifa == r3 (NaT is necessarily cleared)
+		 */
+		ifa += imm;
+
+		DPRINT("imm=%lx r3=%lx\n", imm, ifa);
+
+		setreg(ld.r3, ifa, 0, regs);
+	}
+	/*
+	 * we don't have alat_invalidate_multiple() so we need
+	 * to do the complete flush :-<<
+	 */
+	ia64_invala();
+
+	/*
+	 * stX.rel: use fence instead of release
+	 */
+	if (ld.x6_op == 0xd)
+		mb();
+
+	return 0;
+}
+
+/*
+ * floating point operations sizes in bytes
+ */
+static const unsigned char float_fsz[4]={
+	10, /* extended precision (e) */
+	8,  /* integer (8)            */
+	4,  /* single precision (s)   */
+	8   /* double precision (d)   */
+};
+
+static inline void
+mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+	ia64_ldfe(6, init);
+	ia64_stop();
+	ia64_stf_spill(final, 6);
+}
+
+static inline void
+mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+	ia64_ldf8(6, init);
+	ia64_stop();
+	ia64_stf_spill(final, 6);
+}
+
+static inline void
+mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+	ia64_ldfs(6, init);
+	ia64_stop();
+	ia64_stf_spill(final, 6);
+}
+
+static inline void
+mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+	ia64_ldfd(6, init);
+	ia64_stop();
+	ia64_stf_spill(final, 6);
+}
+
+static inline void
+float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+	ia64_ldf_fill(6, init);
+	ia64_stop();
+	ia64_stfe(final, 6);
+}
+
+static inline void
+float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+	ia64_ldf_fill(6, init);
+	ia64_stop();
+	ia64_stf8(final, 6);
+}
+
+static inline void
+float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+	ia64_ldf_fill(6, init);
+	ia64_stop();
+	ia64_stfs(final, 6);
+}
+
+static inline void
+float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+	ia64_ldf_fill(6, init);
+	ia64_stop();
+	ia64_stfd(final, 6);
+}
+
+static int
+emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
+{
+	struct ia64_fpreg fpr_init[2];
+	struct ia64_fpreg fpr_final[2];
+	unsigned long len = float_fsz[ld.x6_sz];
+
+	/*
+	 * fr0 & fr1 don't need to be checked because Illegal Instruction faults have
+	 * higher priority than unaligned faults.
+	 *
+	 * r0 cannot be found as the base as it would never generate an unaligned
+	 * reference.
+	 */
+
+	/*
+	 * make sure we get clean buffers
+	 */
+	memset(&fpr_init, 0, sizeof(fpr_init));
+	memset(&fpr_final, 0, sizeof(fpr_final));
+
+	/*
+	 * ldfpX.a: we don't try to emulate anything but we must
+	 * invalidate the ALAT entry and execute updates, if any.
+	 */
+	if (ld.x6_op != 0x2) {
+		/*
+		 * This assumes little-endian byte-order.  Note that there is no "ldfpe"
+		 * instruction:
+		 */
+		if (copy_from_user(&fpr_init[0], (void __user *) ifa, len)
+		    || copy_from_user(&fpr_init[1], (void __user *) (ifa + len), len))
+			return -1;
+
+		DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz);
+		DDUMP("frp_init =", &fpr_init, 2*len);
+		/*
+		 * XXX fixme
+		 * Could optimize inlines by using ldfpX & 2 spills
+		 */
+		switch( ld.x6_sz ) {
+			case 0:
+				mem2float_extended(&fpr_init[0], &fpr_final[0]);
+				mem2float_extended(&fpr_init[1], &fpr_final[1]);
+				break;
+			case 1:
+				mem2float_integer(&fpr_init[0], &fpr_final[0]);
+				mem2float_integer(&fpr_init[1], &fpr_final[1]);
+				break;
+			case 2:
+				mem2float_single(&fpr_init[0], &fpr_final[0]);
+				mem2float_single(&fpr_init[1], &fpr_final[1]);
+				break;
+			case 3:
+				mem2float_double(&fpr_init[0], &fpr_final[0]);
+				mem2float_double(&fpr_init[1], &fpr_final[1]);
+				break;
+		}
+		DDUMP("fpr_final =", &fpr_final, 2*len);
+		/*
+		 * XXX fixme
+		 *
+		 * A possible optimization would be to drop fpr_final and directly
+		 * use the storage from the saved context i.e., the actual final
+		 * destination (pt_regs, switch_stack or thread structure).
+		 */
+		setfpreg(ld.r1, &fpr_final[0], regs);
+		setfpreg(ld.imm, &fpr_final[1], regs);
+	}
+
+	/*
+	 * Check for updates: only immediate updates are available for this
+	 * instruction.
+	 */
+	if (ld.m) {
+		/*
+		 * the immediate is implicit given the ldsz of the operation:
+		 * single: 8 (2x4) and for  all others it's 16 (2x8)
+		 */
+		ifa += len<<1;
+
+		/*
+		 * IMPORTANT:
+		 * the fact that we force the NaT of r3 to zero is ONLY valid
+		 * as long as we don't come here with a ldfpX.s.
+		 * For this reason we keep this sanity check
+		 */
+		if (ld.x6_op == 1 || ld.x6_op == 3)
+			printk(KERN_ERR "%s: register update on speculative load pair, error\n",
+			       __func__);
+
+		setreg(ld.r3, ifa, 0, regs);
+	}
+
+	/*
+	 * Invalidate ALAT entries, if any, for both registers.
+	 */
+	if (ld.x6_op == 0x2) {
+		invala_fr(ld.r1);
+		invala_fr(ld.imm);
+	}
+	return 0;
+}
+
+
+static int
+emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
+{
+	struct ia64_fpreg fpr_init;
+	struct ia64_fpreg fpr_final;
+	unsigned long len = float_fsz[ld.x6_sz];
+
+	/*
+	 * fr0 & fr1 don't need to be checked because Illegal Instruction
+	 * faults have higher priority than unaligned faults.
+	 *
+	 * r0 cannot be found as the base as it would never generate an
+	 * unaligned reference.
+	 */
+
+	/*
+	 * make sure we get clean buffers
+	 */
+	memset(&fpr_init,0, sizeof(fpr_init));
+	memset(&fpr_final,0, sizeof(fpr_final));
+
+	/*
+	 * ldfX.a we don't try to emulate anything but we must
+	 * invalidate the ALAT entry.
+	 * See comments in ldX for descriptions on how the various loads are handled.
+	 */
+	if (ld.x6_op != 0x2) {
+		if (copy_from_user(&fpr_init, (void __user *) ifa, len))
+			return -1;
+
+		DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
+		DDUMP("fpr_init =", &fpr_init, len);
+		/*
+		 * we only do something for x6_op={0,8,9}
+		 */
+		switch( ld.x6_sz ) {
+			case 0:
+				mem2float_extended(&fpr_init, &fpr_final);
+				break;
+			case 1:
+				mem2float_integer(&fpr_init, &fpr_final);
+				break;
+			case 2:
+				mem2float_single(&fpr_init, &fpr_final);
+				break;
+			case 3:
+				mem2float_double(&fpr_init, &fpr_final);
+				break;
+		}
+		DDUMP("fpr_final =", &fpr_final, len);
+		/*
+		 * XXX fixme
+		 *
+		 * A possible optimization would be to drop fpr_final and directly
+		 * use the storage from the saved context i.e., the actual final
+		 * destination (pt_regs, switch_stack or thread structure).
+		 */
+		setfpreg(ld.r1, &fpr_final, regs);
+	}
+
+	/*
+	 * check for updates on any loads
+	 */
+	if (ld.op == 0x7 || ld.m)
+		emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
+
+	/*
+	 * invalidate ALAT entry in case of advanced floating point loads
+	 */
+	if (ld.x6_op == 0x2)
+		invala_fr(ld.r1);
+
+	return 0;
+}
+
+
+static int
+emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
+{
+	struct ia64_fpreg fpr_init;
+	struct ia64_fpreg fpr_final;
+	unsigned long len = float_fsz[ld.x6_sz];
+
+	/*
+	 * make sure we get clean buffers
+	 */
+	memset(&fpr_init,0, sizeof(fpr_init));
+	memset(&fpr_final,0, sizeof(fpr_final));
+
+	/*
+	 * if we get to this handler, Nat bits on both r3 and r2 have already
+	 * been checked. so we don't need to do it
+	 *
+	 * extract the value to be stored
+	 */
+	getfpreg(ld.imm, &fpr_init, regs);
+	/*
+	 * during this step, we extract the spilled registers from the saved
+	 * context i.e., we refill. Then we store (no spill) to temporary
+	 * aligned location
+	 */
+	switch( ld.x6_sz ) {
+		case 0:
+			float2mem_extended(&fpr_init, &fpr_final);
+			break;
+		case 1:
+			float2mem_integer(&fpr_init, &fpr_final);
+			break;
+		case 2:
+			float2mem_single(&fpr_init, &fpr_final);
+			break;
+		case 3:
+			float2mem_double(&fpr_init, &fpr_final);
+			break;
+	}
+	DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
+	DDUMP("fpr_init =", &fpr_init, len);
+	DDUMP("fpr_final =", &fpr_final, len);
+
+	if (copy_to_user((void __user *) ifa, &fpr_final, len))
+		return -1;
+
+	/*
+	 * stfX [r3]=r2,imm(9)
+	 *
+	 * NOTE:
+	 * ld.r3 can never be r0, because r0 would not generate an
+	 * unaligned access.
+	 */
+	if (ld.op == 0x7) {
+		unsigned long imm;
+
+		/*
+		 * form imm9: [12:6] contain first 7bits
+		 */
+		imm = ld.x << 7 | ld.r1;
+		/*
+		 * sign extend (8bits) if m set
+		 */
+		if (ld.m)
+			imm |= SIGN_EXT9;
+		/*
+		 * ifa == r3 (NaT is necessarily cleared)
+		 */
+		ifa += imm;
+
+		DPRINT("imm=%lx r3=%lx\n", imm, ifa);
+
+		setreg(ld.r3, ifa, 0, regs);
+	}
+	/*
+	 * we don't have alat_invalidate_multiple() so we need
+	 * to do the complete flush :-<<
+	 */
+	ia64_invala();
+
+	return 0;
+}
+
+/*
+ * Make sure we log the unaligned access, so that user/sysadmin can notice it and
+ * eventually fix the program.  However, we don't want to do that for every access so we
+ * pace it with jiffies.
+ */
+static DEFINE_RATELIMIT_STATE(logging_rate_limit, 5 * HZ, 5);
+
+void
+ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
+{
+	struct ia64_psr *ipsr = ia64_psr(regs);
+	mm_segment_t old_fs = get_fs();
+	unsigned long bundle[2];
+	unsigned long opcode;
+	struct siginfo si;
+	const struct exception_table_entry *eh = NULL;
+	union {
+		unsigned long l;
+		load_store_t insn;
+	} u;
+	int ret = -1;
+
+	if (ia64_psr(regs)->be) {
+		/* we don't support big-endian accesses */
+		if (die_if_kernel("big-endian unaligned accesses are not supported", regs, 0))
+			return;
+		goto force_sigbus;
+	}
+
+	/*
+	 * Treat kernel accesses for which there is an exception handler entry the same as
+	 * user-level unaligned accesses.  Otherwise, a clever program could trick this
+	 * handler into reading an arbitrary kernel addresses...
+	 */
+	if (!user_mode(regs))
+		eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri);
+	if (user_mode(regs) || eh) {
+		if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
+			goto force_sigbus;
+
+		if (!no_unaligned_warning &&
+		    !(current->thread.flags & IA64_THREAD_UAC_NOPRINT) &&
+		    __ratelimit(&logging_rate_limit))
+		{
+			char buf[200];	/* comm[] is at most 16 bytes... */
+			size_t len;
+
+			len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
+				      "ip=0x%016lx\n\r", current->comm,
+				      task_pid_nr(current),
+				      ifa, regs->cr_iip + ipsr->ri);
+			/*
+			 * Don't call tty_write_message() if we're in the kernel; we might
+			 * be holding locks...
+			 */
+			if (user_mode(regs))
+				tty_write_message(current->signal->tty, buf);
+			buf[len-1] = '\0';	/* drop '\r' */
+			/* watch for command names containing %s */
+			printk(KERN_WARNING "%s", buf);
+		} else {
+			if (no_unaligned_warning) {
+				printk_once(KERN_WARNING "%s(%d) encountered an "
+				       "unaligned exception which required\n"
+				       "kernel assistance, which degrades "
+				       "the performance of the application.\n"
+				       "Unaligned exception warnings have "
+				       "been disabled by the system "
+				       "administrator\n"
+				       "echo 0 > /proc/sys/kernel/ignore-"
+				       "unaligned-usertrap to re-enable\n",
+				       current->comm, task_pid_nr(current));
+			}
+		}
+	} else {
+		if (__ratelimit(&logging_rate_limit)) {
+			printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
+			       ifa, regs->cr_iip + ipsr->ri);
+			if (unaligned_dump_stack)
+				dump_stack();
+		}
+		set_fs(KERNEL_DS);
+	}
+
+	DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n",
+	       regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it);
+
+	if (__copy_from_user(bundle, (void __user *) regs->cr_iip, 16))
+		goto failure;
+
+	/*
+	 * extract the instruction from the bundle given the slot number
+	 */
+	switch (ipsr->ri) {
+	      case 0: u.l = (bundle[0] >>  5); break;
+	      case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break;
+	      case 2: u.l = (bundle[1] >> 23); break;
+	}
+	opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK;
+
+	DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
+	       "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm,
+	       u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op);
+
+	/*
+	 * IMPORTANT:
+	 * Notice that the switch statement DOES not cover all possible instructions
+	 * that DO generate unaligned references. This is made on purpose because for some
+	 * instructions it DOES NOT make sense to try and emulate the access. Sometimes it
+	 * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
+	 * the program will get a signal and die:
+	 *
+	 *	load/store:
+	 *		- ldX.spill
+	 *		- stX.spill
+	 *	Reason: RNATs are based on addresses
+	 *		- ld16
+	 *		- st16
+	 *	Reason: ld16 and st16 are supposed to occur in a single
+	 *		memory op
+	 *
+	 *	synchronization:
+	 *		- cmpxchg
+	 *		- fetchadd
+	 *		- xchg
+	 *	Reason: ATOMIC operations cannot be emulated properly using multiple
+	 *	        instructions.
+	 *
+	 *	speculative loads:
+	 *		- ldX.sZ
+	 *	Reason: side effects, code must be ready to deal with failure so simpler
+	 *		to let the load fail.
+	 * ---------------------------------------------------------------------------------
+	 * XXX fixme
+	 *
+	 * I would like to get rid of this switch case and do something
+	 * more elegant.
+	 */
+	switch (opcode) {
+	      case LDS_OP:
+	      case LDSA_OP:
+		if (u.insn.x)
+			/* oops, really a semaphore op (cmpxchg, etc) */
+			goto failure;
+		/* no break */
+	      case LDS_IMM_OP:
+	      case LDSA_IMM_OP:
+	      case LDFS_OP:
+	      case LDFSA_OP:
+	      case LDFS_IMM_OP:
+		/*
+		 * The instruction will be retried with deferred exceptions turned on, and
+		 * we should get Nat bit installed
+		 *
+		 * IMPORTANT: When PSR_ED is set, the register & immediate update forms
+		 * are actually executed even though the operation failed. So we don't
+		 * need to take care of this.
+		 */
+		DPRINT("forcing PSR_ED\n");
+		regs->cr_ipsr |= IA64_PSR_ED;
+		goto done;
+
+	      case LD_OP:
+	      case LDA_OP:
+	      case LDBIAS_OP:
+	      case LDACQ_OP:
+	      case LDCCLR_OP:
+	      case LDCNC_OP:
+	      case LDCCLRACQ_OP:
+		if (u.insn.x)
+			/* oops, really a semaphore op (cmpxchg, etc) */
+			goto failure;
+		/* no break */
+	      case LD_IMM_OP:
+	      case LDA_IMM_OP:
+	      case LDBIAS_IMM_OP:
+	      case LDACQ_IMM_OP:
+	      case LDCCLR_IMM_OP:
+	      case LDCNC_IMM_OP:
+	      case LDCCLRACQ_IMM_OP:
+		ret = emulate_load_int(ifa, u.insn, regs);
+		break;
+
+	      case ST_OP:
+	      case STREL_OP:
+		if (u.insn.x)
+			/* oops, really a semaphore op (cmpxchg, etc) */
+			goto failure;
+		/* no break */
+	      case ST_IMM_OP:
+	      case STREL_IMM_OP:
+		ret = emulate_store_int(ifa, u.insn, regs);
+		break;
+
+	      case LDF_OP:
+	      case LDFA_OP:
+	      case LDFCCLR_OP:
+	      case LDFCNC_OP:
+		if (u.insn.x)
+			ret = emulate_load_floatpair(ifa, u.insn, regs);
+		else
+			ret = emulate_load_float(ifa, u.insn, regs);
+		break;
+
+	      case LDF_IMM_OP:
+	      case LDFA_IMM_OP:
+	      case LDFCCLR_IMM_OP:
+	      case LDFCNC_IMM_OP:
+		ret = emulate_load_float(ifa, u.insn, regs);
+		break;
+
+	      case STF_OP:
+	      case STF_IMM_OP:
+		ret = emulate_store_float(ifa, u.insn, regs);
+		break;
+
+	      default:
+		goto failure;
+	}
+	DPRINT("ret=%d\n", ret);
+	if (ret)
+		goto failure;
+
+	if (ipsr->ri == 2)
+		/*
+		 * given today's architecture this case is not likely to happen because a
+		 * memory access instruction (M) can never be in the last slot of a
+		 * bundle. But let's keep it for now.
+		 */
+		regs->cr_iip += 16;
+	ipsr->ri = (ipsr->ri + 1) & 0x3;
+
+	DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip);
+  done:
+	set_fs(old_fs);		/* restore original address limit */
+	return;
+
+  failure:
+	/* something went wrong... */
+	if (!user_mode(regs)) {
+		if (eh) {
+			ia64_handle_exception(regs, eh);
+			goto done;
+		}
+		if (die_if_kernel("error during unaligned kernel access\n", regs, ret))
+			return;
+		/* NOT_REACHED */
+	}
+  force_sigbus:
+	si.si_signo = SIGBUS;
+	si.si_errno = 0;
+	si.si_code = BUS_ADRALN;
+	si.si_addr = (void __user *) ifa;
+	si.si_flags = 0;
+	si.si_isr = 0;
+	si.si_imm = 0;
+	force_sig_info(SIGBUS, &si, current);
+	goto done;
+}
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
new file mode 100644
index 00000000..c4696d21
--- /dev/null
+++ b/arch/ia64/kernel/uncached.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) 2001-2008 Silicon Graphics, Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * A simple uncached page allocator using the generic allocator. This
+ * allocator first utilizes the spare (spill) pages found in the EFI
+ * memmap and will then start converting cached pages to uncached ones
+ * at a granule at a time. Node awareness is implemented by having a
+ * pool of pages per node.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/efi.h>
+#include <linux/genalloc.h>
+#include <linux/gfp.h>
+#include <asm/page.h>
+#include <asm/pal.h>
+#include <asm/system.h>
+#include <asm/pgtable.h>
+#include <asm/atomic.h>
+#include <asm/tlbflush.h>
+#include <asm/sn/arch.h>
+
+
+extern void __init efi_memmap_walk_uc(efi_freemem_callback_t, void *);
+
+struct uncached_pool {
+	struct gen_pool *pool;
+	struct mutex add_chunk_mutex;	/* serialize adding a converted chunk */
+	int nchunks_added;		/* #of converted chunks added to pool */
+	atomic_t status;		/* smp called function's return status*/
+};
+
+#define MAX_CONVERTED_CHUNKS_PER_NODE	2
+
+struct uncached_pool uncached_pools[MAX_NUMNODES];
+
+
+static void uncached_ipi_visibility(void *data)
+{
+	int status;
+	struct uncached_pool *uc_pool = (struct uncached_pool *)data;
+
+	status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
+	if ((status != PAL_VISIBILITY_OK) &&
+	    (status != PAL_VISIBILITY_OK_REMOTE_NEEDED))
+		atomic_inc(&uc_pool->status);
+}
+
+
+static void uncached_ipi_mc_drain(void *data)
+{
+	int status;
+	struct uncached_pool *uc_pool = (struct uncached_pool *)data;
+
+	status = ia64_pal_mc_drain();
+	if (status != PAL_STATUS_SUCCESS)
+		atomic_inc(&uc_pool->status);
+}
+
+
+/*
+ * Add a new chunk of uncached memory pages to the specified pool.
+ *
+ * @pool: pool to add new chunk of uncached memory to
+ * @nid: node id of node to allocate memory from, or -1
+ *
+ * This is accomplished by first allocating a granule of cached memory pages
+ * and then converting them to uncached memory pages.
+ */
+static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
+{
+	struct page *page;
+	int status, i, nchunks_added = uc_pool->nchunks_added;
+	unsigned long c_addr, uc_addr;
+
+	if (mutex_lock_interruptible(&uc_pool->add_chunk_mutex) != 0)
+		return -1;	/* interrupted by a signal */
+
+	if (uc_pool->nchunks_added > nchunks_added) {
+		/* someone added a new chunk while we were waiting */
+		mutex_unlock(&uc_pool->add_chunk_mutex);
+		return 0;
+	}
+
+	if (uc_pool->nchunks_added >= MAX_CONVERTED_CHUNKS_PER_NODE) {
+		mutex_unlock(&uc_pool->add_chunk_mutex);
+		return -1;
+	}
+
+	/* attempt to allocate a granule's worth of cached memory pages */
+
+	page = alloc_pages_exact_node(nid,
+				GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+				IA64_GRANULE_SHIFT-PAGE_SHIFT);
+	if (!page) {
+		mutex_unlock(&uc_pool->add_chunk_mutex);
+		return -1;
+	}
+
+	/* convert the memory pages from cached to uncached */
+
+	c_addr = (unsigned long)page_address(page);
+	uc_addr = c_addr - PAGE_OFFSET + __IA64_UNCACHED_OFFSET;
+
+	/*
+	 * There's a small race here where it's possible for someone to
+	 * access the page through /dev/mem halfway through the conversion
+	 * to uncached - not sure it's really worth bothering about
+	 */
+	for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++)
+		SetPageUncached(&page[i]);
+
+	flush_tlb_kernel_range(uc_addr, uc_addr + IA64_GRANULE_SIZE);
+
+	status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
+	if (status == PAL_VISIBILITY_OK_REMOTE_NEEDED) {
+		atomic_set(&uc_pool->status, 0);
+		status = smp_call_function(uncached_ipi_visibility, uc_pool, 1);
+		if (status || atomic_read(&uc_pool->status))
+			goto failed;
+	} else if (status != PAL_VISIBILITY_OK)
+		goto failed;
+
+	preempt_disable();
+
+	if (ia64_platform_is("sn2"))
+		sn_flush_all_caches(uc_addr, IA64_GRANULE_SIZE);
+	else
+		flush_icache_range(uc_addr, uc_addr + IA64_GRANULE_SIZE);
+
+	/* flush the just introduced uncached translation from the TLB */
+	local_flush_tlb_all();
+
+	preempt_enable();
+
+	status = ia64_pal_mc_drain();
+	if (status != PAL_STATUS_SUCCESS)
+		goto failed;
+	atomic_set(&uc_pool->status, 0);
+	status = smp_call_function(uncached_ipi_mc_drain, uc_pool, 1);
+	if (status || atomic_read(&uc_pool->status))
+		goto failed;
+
+	/*
+	 * The chunk of memory pages has been converted to uncached so now we
+	 * can add it to the pool.
+	 */
+	status = gen_pool_add(uc_pool->pool, uc_addr, IA64_GRANULE_SIZE, nid);
+	if (status)
+		goto failed;
+
+	uc_pool->nchunks_added++;
+	mutex_unlock(&uc_pool->add_chunk_mutex);
+	return 0;
+
+	/* failed to convert or add the chunk so give it back to the kernel */
+failed:
+	for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++)
+		ClearPageUncached(&page[i]);
+
+	free_pages(c_addr, IA64_GRANULE_SHIFT-PAGE_SHIFT);
+	mutex_unlock(&uc_pool->add_chunk_mutex);
+	return -1;
+}
+
+
+/*
+ * uncached_alloc_page
+ *
+ * @starting_nid: node id of node to start with, or -1
+ * @n_pages: number of contiguous pages to allocate
+ *
+ * Allocate the specified number of contiguous uncached pages on the
+ * the requested node. If not enough contiguous uncached pages are available
+ * on the requested node, roundrobin starting with the next higher node.
+ */
+unsigned long uncached_alloc_page(int starting_nid, int n_pages)
+{
+	unsigned long uc_addr;
+	struct uncached_pool *uc_pool;
+	int nid;
+
+	if (unlikely(starting_nid >= MAX_NUMNODES))
+		return 0;
+
+	if (starting_nid < 0)
+		starting_nid = numa_node_id();
+	nid = starting_nid;
+
+	do {
+		if (!node_state(nid, N_HIGH_MEMORY))
+			continue;
+		uc_pool = &uncached_pools[nid];
+		if (uc_pool->pool == NULL)
+			continue;
+		do {
+			uc_addr = gen_pool_alloc(uc_pool->pool,
+						 n_pages * PAGE_SIZE);
+			if (uc_addr != 0)
+				return uc_addr;
+		} while (uncached_add_chunk(uc_pool, nid) == 0);
+
+	} while ((nid = (nid + 1) % MAX_NUMNODES) != starting_nid);
+
+	return 0;
+}
+EXPORT_SYMBOL(uncached_alloc_page);
+
+
+/*
+ * uncached_free_page
+ *
+ * @uc_addr: uncached address of first page to free
+ * @n_pages: number of contiguous pages to free
+ *
+ * Free the specified number of uncached pages.
+ */
+void uncached_free_page(unsigned long uc_addr, int n_pages)
+{
+	int nid = paddr_to_nid(uc_addr - __IA64_UNCACHED_OFFSET);
+	struct gen_pool *pool = uncached_pools[nid].pool;
+
+	if (unlikely(pool == NULL))
+		return;
+
+	if ((uc_addr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET)
+		panic("uncached_free_page invalid address %lx\n", uc_addr);
+
+	gen_pool_free(pool, uc_addr, n_pages * PAGE_SIZE);
+}
+EXPORT_SYMBOL(uncached_free_page);
+
+
+/*
+ * uncached_build_memmap,
+ *
+ * @uc_start: uncached starting address of a chunk of uncached memory
+ * @uc_end: uncached ending address of a chunk of uncached memory
+ * @arg: ignored, (NULL argument passed in on call to efi_memmap_walk_uc())
+ *
+ * Called at boot time to build a map of pages that can be used for
+ * memory special operations.
+ */
+static int __init uncached_build_memmap(u64 uc_start, u64 uc_end, void *arg)
+{
+	int nid = paddr_to_nid(uc_start - __IA64_UNCACHED_OFFSET);
+	struct gen_pool *pool = uncached_pools[nid].pool;
+	size_t size = uc_end - uc_start;
+
+	touch_softlockup_watchdog();
+
+	if (pool != NULL) {
+		memset((char *)uc_start, 0, size);
+		(void) gen_pool_add(pool, uc_start, size, nid);
+	}
+	return 0;
+}
+
+
+static int __init uncached_init(void)
+{
+	int nid;
+
+	for_each_node_state(nid, N_ONLINE) {
+		uncached_pools[nid].pool = gen_pool_create(PAGE_SHIFT, nid);
+		mutex_init(&uncached_pools[nid].add_chunk_mutex);
+	}
+
+	efi_memmap_walk_uc(uncached_build_memmap, NULL);
+	return 0;
+}
+
+__initcall(uncached_init);
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
new file mode 100644
index 00000000..fed6afa2
--- /dev/null
+++ b/arch/ia64/kernel/unwind.c
@@ -0,0 +1,2320 @@
+/*
+ * Copyright (C) 1999-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2003 Fenghua Yu <fenghua.yu@intel.com>
+ * 	- Change pt_regs_off() to make it less dependent on pt_regs structure.
+ */
+/*
+ * This file implements call frame unwind support for the Linux
+ * kernel.  Parsing and processing the unwind information is
+ * time-consuming, so this implementation translates the unwind
+ * descriptors into unwind scripts.  These scripts are very simple
+ * (basically a sequence of assignments) and efficient to execute.
+ * They are cached for later re-use.  Each script is specific for a
+ * given instruction pointer address and the set of predicate values
+ * that the script depends on (most unwind descriptors are
+ * unconditional and scripts often do not depend on predicates at
+ * all).  This code is based on the unwind conventions described in
+ * the "IA-64 Software Conventions and Runtime Architecture" manual.
+ *
+ * SMP conventions:
+ *	o updates to the global unwind data (in structure "unw") are serialized
+ *	  by the unw.lock spinlock
+ *	o each unwind script has its own read-write lock; a thread must acquire
+ *	  a read lock before executing a script and must acquire a write lock
+ *	  before modifying a script
+ *	o if both the unw.lock spinlock and a script's read-write lock must be
+ *	  acquired, then the read-write lock must be acquired first.
+ */
+#include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/elf.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <asm/unwind.h>
+
+#include <asm/delay.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/ptrace_offsets.h>
+#include <asm/rse.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include "entry.h"
+#include "unwind_i.h"
+
+#define UNW_LOG_CACHE_SIZE	7	/* each unw_script is ~256 bytes in size */
+#define UNW_CACHE_SIZE		(1 << UNW_LOG_CACHE_SIZE)
+
+#define UNW_LOG_HASH_SIZE	(UNW_LOG_CACHE_SIZE + 1)
+#define UNW_HASH_SIZE		(1 << UNW_LOG_HASH_SIZE)
+
+#define UNW_STATS	0	/* WARNING: this disabled interrupts for long time-spans!! */
+
+#ifdef UNW_DEBUG
+  static unsigned int unw_debug_level = UNW_DEBUG;
+#  define UNW_DEBUG_ON(n)	unw_debug_level >= n
+   /* Do not code a printk level, not all debug lines end in newline */
+#  define UNW_DPRINT(n, ...)  if (UNW_DEBUG_ON(n)) printk(__VA_ARGS__)
+#  undef inline
+#  define inline
+#else /* !UNW_DEBUG */
+#  define UNW_DEBUG_ON(n)  0
+#  define UNW_DPRINT(n, ...)
+#endif /* UNW_DEBUG */
+
+#if UNW_STATS
+# define STAT(x...)	x
+#else
+# define STAT(x...)
+#endif
+
+#define alloc_reg_state()	kmalloc(sizeof(struct unw_reg_state), GFP_ATOMIC)
+#define free_reg_state(usr)	kfree(usr)
+#define alloc_labeled_state()	kmalloc(sizeof(struct unw_labeled_state), GFP_ATOMIC)
+#define free_labeled_state(usr)	kfree(usr)
+
+typedef unsigned long unw_word;
+typedef unsigned char unw_hash_index_t;
+
+static struct {
+	spinlock_t lock;			/* spinlock for unwind data */
+
+	/* list of unwind tables (one per load-module) */
+	struct unw_table *tables;
+
+	unsigned long r0;			/* constant 0 for r0 */
+
+	/* table of registers that prologues can save (and order in which they're saved): */
+	const unsigned char save_order[8];
+
+	/* maps a preserved register index (preg_index) to corresponding switch_stack offset: */
+	unsigned short sw_off[sizeof(struct unw_frame_info) / 8];
+
+	unsigned short lru_head;		/* index of lead-recently used script */
+	unsigned short lru_tail;		/* index of most-recently used script */
+
+	/* index into unw_frame_info for preserved register i */
+	unsigned short preg_index[UNW_NUM_REGS];
+
+	short pt_regs_offsets[32];
+
+	/* unwind table for the kernel: */
+	struct unw_table kernel_table;
+
+	/* unwind table describing the gate page (kernel code that is mapped into user space): */
+	size_t gate_table_size;
+	unsigned long *gate_table;
+
+	/* hash table that maps instruction pointer to script index: */
+	unsigned short hash[UNW_HASH_SIZE];
+
+	/* script cache: */
+	struct unw_script cache[UNW_CACHE_SIZE];
+
+# ifdef UNW_DEBUG
+	const char *preg_name[UNW_NUM_REGS];
+# endif
+# if UNW_STATS
+	struct {
+		struct {
+			int lookups;
+			int hinted_hits;
+			int normal_hits;
+			int collision_chain_traversals;
+		} cache;
+		struct {
+			unsigned long build_time;
+			unsigned long run_time;
+			unsigned long parse_time;
+			int builds;
+			int news;
+			int collisions;
+			int runs;
+		} script;
+		struct {
+			unsigned long init_time;
+			unsigned long unwind_time;
+			int inits;
+			int unwinds;
+		} api;
+	} stat;
+# endif
+} unw = {
+	.tables = &unw.kernel_table,
+	.lock = __SPIN_LOCK_UNLOCKED(unw.lock),
+	.save_order = {
+		UNW_REG_RP, UNW_REG_PFS, UNW_REG_PSP, UNW_REG_PR,
+		UNW_REG_UNAT, UNW_REG_LC, UNW_REG_FPSR, UNW_REG_PRI_UNAT_GR
+	},
+	.preg_index = {
+		offsetof(struct unw_frame_info, pri_unat_loc)/8,	/* PRI_UNAT_GR */
+		offsetof(struct unw_frame_info, pri_unat_loc)/8,	/* PRI_UNAT_MEM */
+		offsetof(struct unw_frame_info, bsp_loc)/8,
+		offsetof(struct unw_frame_info, bspstore_loc)/8,
+		offsetof(struct unw_frame_info, pfs_loc)/8,
+		offsetof(struct unw_frame_info, rnat_loc)/8,
+		offsetof(struct unw_frame_info, psp)/8,
+		offsetof(struct unw_frame_info, rp_loc)/8,
+		offsetof(struct unw_frame_info, r4)/8,
+		offsetof(struct unw_frame_info, r5)/8,
+		offsetof(struct unw_frame_info, r6)/8,
+		offsetof(struct unw_frame_info, r7)/8,
+		offsetof(struct unw_frame_info, unat_loc)/8,
+		offsetof(struct unw_frame_info, pr_loc)/8,
+		offsetof(struct unw_frame_info, lc_loc)/8,
+		offsetof(struct unw_frame_info, fpsr_loc)/8,
+		offsetof(struct unw_frame_info, b1_loc)/8,
+		offsetof(struct unw_frame_info, b2_loc)/8,
+		offsetof(struct unw_frame_info, b3_loc)/8,
+		offsetof(struct unw_frame_info, b4_loc)/8,
+		offsetof(struct unw_frame_info, b5_loc)/8,
+		offsetof(struct unw_frame_info, f2_loc)/8,
+		offsetof(struct unw_frame_info, f3_loc)/8,
+		offsetof(struct unw_frame_info, f4_loc)/8,
+		offsetof(struct unw_frame_info, f5_loc)/8,
+		offsetof(struct unw_frame_info, fr_loc[16 - 16])/8,
+		offsetof(struct unw_frame_info, fr_loc[17 - 16])/8,
+		offsetof(struct unw_frame_info, fr_loc[18 - 16])/8,
+		offsetof(struct unw_frame_info, fr_loc[19 - 16])/8,
+		offsetof(struct unw_frame_info, fr_loc[20 - 16])/8,
+		offsetof(struct unw_frame_info, fr_loc[21 - 16])/8,
+		offsetof(struct unw_frame_info, fr_loc[22 - 16])/8,
+		offsetof(struct unw_frame_info, fr_loc[23 - 16])/8,
+		offsetof(struct unw_frame_info, fr_loc[24 - 16])/8,
+		offsetof(struct unw_frame_info, fr_loc[25 - 16])/8,
+		offsetof(struct unw_frame_info, fr_loc[26 - 16])/8,
+		offsetof(struct unw_frame_info, fr_loc[27 - 16])/8,
+		offsetof(struct unw_frame_info, fr_loc[28 - 16])/8,
+		offsetof(struct unw_frame_info, fr_loc[29 - 16])/8,
+		offsetof(struct unw_frame_info, fr_loc[30 - 16])/8,
+		offsetof(struct unw_frame_info, fr_loc[31 - 16])/8,
+	},
+	.pt_regs_offsets = {
+		[0] = -1,
+		offsetof(struct pt_regs,  r1),
+		offsetof(struct pt_regs,  r2),
+		offsetof(struct pt_regs,  r3),
+		[4] = -1, [5] = -1, [6] = -1, [7] = -1,
+		offsetof(struct pt_regs,  r8),
+		offsetof(struct pt_regs,  r9),
+		offsetof(struct pt_regs, r10),
+		offsetof(struct pt_regs, r11),
+		offsetof(struct pt_regs, r12),
+		offsetof(struct pt_regs, r13),
+		offsetof(struct pt_regs, r14),
+		offsetof(struct pt_regs, r15),
+		offsetof(struct pt_regs, r16),
+		offsetof(struct pt_regs, r17),
+		offsetof(struct pt_regs, r18),
+		offsetof(struct pt_regs, r19),
+		offsetof(struct pt_regs, r20),
+		offsetof(struct pt_regs, r21),
+		offsetof(struct pt_regs, r22),
+		offsetof(struct pt_regs, r23),
+		offsetof(struct pt_regs, r24),
+		offsetof(struct pt_regs, r25),
+		offsetof(struct pt_regs, r26),
+		offsetof(struct pt_regs, r27),
+		offsetof(struct pt_regs, r28),
+		offsetof(struct pt_regs, r29),
+		offsetof(struct pt_regs, r30),
+		offsetof(struct pt_regs, r31),
+	},
+	.hash = { [0 ... UNW_HASH_SIZE - 1] = -1 },
+#ifdef UNW_DEBUG
+	.preg_name = {
+		"pri_unat_gr", "pri_unat_mem", "bsp", "bspstore", "ar.pfs", "ar.rnat", "psp", "rp",
+		"r4", "r5", "r6", "r7",
+		"ar.unat", "pr", "ar.lc", "ar.fpsr",
+		"b1", "b2", "b3", "b4", "b5",
+		"f2", "f3", "f4", "f5",
+		"f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
+		"f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
+	}
+#endif
+};
+
+static inline int
+read_only (void *addr)
+{
+	return (unsigned long) ((char *) addr - (char *) &unw.r0) < sizeof(unw.r0);
+}
+
+/*
+ * Returns offset of rREG in struct pt_regs.
+ */
+static inline unsigned long
+pt_regs_off (unsigned long reg)
+{
+	short off = -1;
+
+	if (reg < ARRAY_SIZE(unw.pt_regs_offsets))
+		off = unw.pt_regs_offsets[reg];
+
+	if (off < 0) {
+		UNW_DPRINT(0, "unwind.%s: bad scratch reg r%lu\n", __func__, reg);
+		off = 0;
+	}
+	return (unsigned long) off;
+}
+
+static inline struct pt_regs *
+get_scratch_regs (struct unw_frame_info *info)
+{
+	if (!info->pt) {
+		/* This should not happen with valid unwind info.  */
+		UNW_DPRINT(0, "unwind.%s: bad unwind info: resetting info->pt\n", __func__);
+		if (info->flags & UNW_FLAG_INTERRUPT_FRAME)
+			info->pt = (unsigned long) ((struct pt_regs *) info->psp - 1);
+		else
+			info->pt = info->sp - 16;
+	}
+	UNW_DPRINT(3, "unwind.%s: sp 0x%lx pt 0x%lx\n", __func__, info->sp, info->pt);
+	return (struct pt_regs *) info->pt;
+}
+
+/* Unwind accessors.  */
+
+int
+unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val, char *nat, int write)
+{
+	unsigned long *addr, *nat_addr, nat_mask = 0, dummy_nat;
+	struct unw_ireg *ireg;
+	struct pt_regs *pt;
+
+	if ((unsigned) regnum - 1 >= 127) {
+		if (regnum == 0 && !write) {
+			*val = 0;	/* read r0 always returns 0 */
+			*nat = 0;
+			return 0;
+		}
+		UNW_DPRINT(0, "unwind.%s: trying to access non-existent r%u\n",
+			   __func__, regnum);
+		return -1;
+	}
+
+	if (regnum < 32) {
+		if (regnum >= 4 && regnum <= 7) {
+			/* access a preserved register */
+			ireg = &info->r4 + (regnum - 4);
+			addr = ireg->loc;
+			if (addr) {
+				nat_addr = addr + ireg->nat.off;
+				switch (ireg->nat.type) {
+				      case UNW_NAT_VAL:
+					/* simulate getf.sig/setf.sig */
+					if (write) {
+						if (*nat) {
+							/* write NaTVal and be done with it */
+							addr[0] = 0;
+							addr[1] = 0x1fffe;
+							return 0;
+						}
+						addr[1] = 0x1003e;
+					} else {
+						if (addr[0] == 0 && addr[1] == 0x1ffe) {
+							/* return NaT and be done with it */
+							*val = 0;
+							*nat = 1;
+							return 0;
+						}
+					}
+					/* fall through */
+				      case UNW_NAT_NONE:
+					dummy_nat = 0;
+					nat_addr = &dummy_nat;
+					break;
+
+				      case UNW_NAT_MEMSTK:
+					nat_mask = (1UL << ((long) addr & 0x1f8)/8);
+					break;
+
+				      case UNW_NAT_REGSTK:
+					nat_addr = ia64_rse_rnat_addr(addr);
+					if ((unsigned long) addr < info->regstk.limit
+					    || (unsigned long) addr >= info->regstk.top)
+					{
+						UNW_DPRINT(0, "unwind.%s: %p outside of regstk "
+							"[0x%lx-0x%lx)\n",
+							__func__, (void *) addr,
+							info->regstk.limit,
+							info->regstk.top);
+						return -1;
+					}
+					if ((unsigned long) nat_addr >= info->regstk.top)
+						nat_addr = &info->sw->ar_rnat;
+					nat_mask = (1UL << ia64_rse_slot_num(addr));
+					break;
+				}
+			} else {
+				addr = &info->sw->r4 + (regnum - 4);
+				nat_addr = &info->sw->ar_unat;
+				nat_mask = (1UL << ((long) addr & 0x1f8)/8);
+			}
+		} else {
+			/* access a scratch register */
+			pt = get_scratch_regs(info);
+			addr = (unsigned long *) ((unsigned long)pt + pt_regs_off(regnum));
+			if (info->pri_unat_loc)
+				nat_addr = info->pri_unat_loc;
+			else
+				nat_addr = &info->sw->caller_unat;
+			nat_mask = (1UL << ((long) addr & 0x1f8)/8);
+		}
+	} else {
+		/* access a stacked register */
+		addr = ia64_rse_skip_regs((unsigned long *) info->bsp, regnum - 32);
+		nat_addr = ia64_rse_rnat_addr(addr);
+		if ((unsigned long) addr < info->regstk.limit
+		    || (unsigned long) addr >= info->regstk.top)
+		{
+			UNW_DPRINT(0, "unwind.%s: ignoring attempt to access register outside "
+				   "of rbs\n",  __func__);
+			return -1;
+		}
+		if ((unsigned long) nat_addr >= info->regstk.top)
+			nat_addr = &info->sw->ar_rnat;
+		nat_mask = (1UL << ia64_rse_slot_num(addr));
+	}
+
+	if (write) {
+		if (read_only(addr)) {
+			UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
+				__func__);
+		} else {
+			*addr = *val;
+			if (*nat)
+				*nat_addr |= nat_mask;
+			else
+				*nat_addr &= ~nat_mask;
+		}
+	} else {
+		if ((*nat_addr & nat_mask) == 0) {
+			*val = *addr;
+			*nat = 0;
+		} else {
+			*val = 0;	/* if register is a NaT, *addr may contain kernel data! */
+			*nat = 1;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL(unw_access_gr);
+
+int
+unw_access_br (struct unw_frame_info *info, int regnum, unsigned long *val, int write)
+{
+	unsigned long *addr;
+	struct pt_regs *pt;
+
+	switch (regnum) {
+		/* scratch: */
+	      case 0: pt = get_scratch_regs(info); addr = &pt->b0; break;
+	      case 6: pt = get_scratch_regs(info); addr = &pt->b6; break;
+	      case 7: pt = get_scratch_regs(info); addr = &pt->b7; break;
+
+		/* preserved: */
+	      case 1: case 2: case 3: case 4: case 5:
+		addr = *(&info->b1_loc + (regnum - 1));
+		if (!addr)
+			addr = &info->sw->b1 + (regnum - 1);
+		break;
+
+	      default:
+		UNW_DPRINT(0, "unwind.%s: trying to access non-existent b%u\n",
+			   __func__, regnum);
+		return -1;
+	}
+	if (write)
+		if (read_only(addr)) {
+			UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
+				__func__);
+		} else
+			*addr = *val;
+	else
+		*val = *addr;
+	return 0;
+}
+EXPORT_SYMBOL(unw_access_br);
+
+int
+unw_access_fr (struct unw_frame_info *info, int regnum, struct ia64_fpreg *val, int write)
+{
+	struct ia64_fpreg *addr = NULL;
+	struct pt_regs *pt;
+
+	if ((unsigned) (regnum - 2) >= 126) {
+		UNW_DPRINT(0, "unwind.%s: trying to access non-existent f%u\n",
+			   __func__, regnum);
+		return -1;
+	}
+
+	if (regnum <= 5) {
+		addr = *(&info->f2_loc + (regnum - 2));
+		if (!addr)
+			addr = &info->sw->f2 + (regnum - 2);
+	} else if (regnum <= 15) {
+		if (regnum <= 11) {
+			pt = get_scratch_regs(info);
+			addr = &pt->f6  + (regnum - 6);
+		}
+		else
+			addr = &info->sw->f12 + (regnum - 12);
+	} else if (regnum <= 31) {
+		addr = info->fr_loc[regnum - 16];
+		if (!addr)
+			addr = &info->sw->f16 + (regnum - 16);
+	} else {
+		struct task_struct *t = info->task;
+
+		if (write)
+			ia64_sync_fph(t);
+		else
+			ia64_flush_fph(t);
+		addr = t->thread.fph + (regnum - 32);
+	}
+
+	if (write)
+		if (read_only(addr)) {
+			UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
+				__func__);
+		} else
+			*addr = *val;
+	else
+		*val = *addr;
+	return 0;
+}
+EXPORT_SYMBOL(unw_access_fr);
+
+int
+unw_access_ar (struct unw_frame_info *info, int regnum, unsigned long *val, int write)
+{
+	unsigned long *addr;
+	struct pt_regs *pt;
+
+	switch (regnum) {
+	      case UNW_AR_BSP:
+		addr = info->bsp_loc;
+		if (!addr)
+			addr = &info->sw->ar_bspstore;
+		break;
+
+	      case UNW_AR_BSPSTORE:
+		addr = info->bspstore_loc;
+		if (!addr)
+			addr = &info->sw->ar_bspstore;
+		break;
+
+	      case UNW_AR_PFS:
+		addr = info->pfs_loc;
+		if (!addr)
+			addr = &info->sw->ar_pfs;
+		break;
+
+	      case UNW_AR_RNAT:
+		addr = info->rnat_loc;
+		if (!addr)
+			addr = &info->sw->ar_rnat;
+		break;
+
+	      case UNW_AR_UNAT:
+		addr = info->unat_loc;
+		if (!addr)
+			addr = &info->sw->caller_unat;
+		break;
+
+	      case UNW_AR_LC:
+		addr = info->lc_loc;
+		if (!addr)
+			addr = &info->sw->ar_lc;
+		break;
+
+	      case UNW_AR_EC:
+		if (!info->cfm_loc)
+			return -1;
+		if (write)
+			*info->cfm_loc =
+				(*info->cfm_loc & ~(0x3fUL << 52)) | ((*val & 0x3f) << 52);
+		else
+			*val = (*info->cfm_loc >> 52) & 0x3f;
+		return 0;
+
+	      case UNW_AR_FPSR:
+		addr = info->fpsr_loc;
+		if (!addr)
+			addr = &info->sw->ar_fpsr;
+		break;
+
+	      case UNW_AR_RSC:
+		pt = get_scratch_regs(info);
+		addr = &pt->ar_rsc;
+		break;
+
+	      case UNW_AR_CCV:
+		pt = get_scratch_regs(info);
+		addr = &pt->ar_ccv;
+		break;
+
+	      case UNW_AR_CSD:
+		pt = get_scratch_regs(info);
+		addr = &pt->ar_csd;
+		break;
+
+	      case UNW_AR_SSD:
+		pt = get_scratch_regs(info);
+		addr = &pt->ar_ssd;
+		break;
+
+	      default:
+		UNW_DPRINT(0, "unwind.%s: trying to access non-existent ar%u\n",
+			   __func__, regnum);
+		return -1;
+	}
+
+	if (write) {
+		if (read_only(addr)) {
+			UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
+				__func__);
+		} else
+			*addr = *val;
+	} else
+		*val = *addr;
+	return 0;
+}
+EXPORT_SYMBOL(unw_access_ar);
+
+int
+unw_access_pr (struct unw_frame_info *info, unsigned long *val, int write)
+{
+	unsigned long *addr;
+
+	addr = info->pr_loc;
+	if (!addr)
+		addr = &info->sw->pr;
+
+	if (write) {
+		if (read_only(addr)) {
+			UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
+				__func__);
+		} else
+			*addr = *val;
+	} else
+		*val = *addr;
+	return 0;
+}
+EXPORT_SYMBOL(unw_access_pr);
+
+
+/* Routines to manipulate the state stack.  */
+
+static inline void
+push (struct unw_state_record *sr)
+{
+	struct unw_reg_state *rs;
+
+	rs = alloc_reg_state();
+	if (!rs) {
+		printk(KERN_ERR "unwind: cannot stack reg state!\n");
+		return;
+	}
+	memcpy(rs, &sr->curr, sizeof(*rs));
+	sr->curr.next = rs;
+}
+
+static void
+pop (struct unw_state_record *sr)
+{
+	struct unw_reg_state *rs = sr->curr.next;
+
+	if (!rs) {
+		printk(KERN_ERR "unwind: stack underflow!\n");
+		return;
+	}
+	memcpy(&sr->curr, rs, sizeof(*rs));
+	free_reg_state(rs);
+}
+
+/* Make a copy of the state stack.  Non-recursive to avoid stack overflows.  */
+static struct unw_reg_state *
+dup_state_stack (struct unw_reg_state *rs)
+{
+	struct unw_reg_state *copy, *prev = NULL, *first = NULL;
+
+	while (rs) {
+		copy = alloc_reg_state();
+		if (!copy) {
+			printk(KERN_ERR "unwind.dup_state_stack: out of memory\n");
+			return NULL;
+		}
+		memcpy(copy, rs, sizeof(*copy));
+		if (first)
+			prev->next = copy;
+		else
+			first = copy;
+		rs = rs->next;
+		prev = copy;
+	}
+	return first;
+}
+
+/* Free all stacked register states (but not RS itself).  */
+static void
+free_state_stack (struct unw_reg_state *rs)
+{
+	struct unw_reg_state *p, *next;
+
+	for (p = rs->next; p != NULL; p = next) {
+		next = p->next;
+		free_reg_state(p);
+	}
+	rs->next = NULL;
+}
+
+/* Unwind decoder routines */
+
+static enum unw_register_index __attribute_const__
+decode_abreg (unsigned char abreg, int memory)
+{
+	switch (abreg) {
+	      case 0x04 ... 0x07: return UNW_REG_R4 + (abreg - 0x04);
+	      case 0x22 ... 0x25: return UNW_REG_F2 + (abreg - 0x22);
+	      case 0x30 ... 0x3f: return UNW_REG_F16 + (abreg - 0x30);
+	      case 0x41 ... 0x45: return UNW_REG_B1 + (abreg - 0x41);
+	      case 0x60: return UNW_REG_PR;
+	      case 0x61: return UNW_REG_PSP;
+	      case 0x62: return memory ? UNW_REG_PRI_UNAT_MEM : UNW_REG_PRI_UNAT_GR;
+	      case 0x63: return UNW_REG_RP;
+	      case 0x64: return UNW_REG_BSP;
+	      case 0x65: return UNW_REG_BSPSTORE;
+	      case 0x66: return UNW_REG_RNAT;
+	      case 0x67: return UNW_REG_UNAT;
+	      case 0x68: return UNW_REG_FPSR;
+	      case 0x69: return UNW_REG_PFS;
+	      case 0x6a: return UNW_REG_LC;
+	      default:
+		break;
+	}
+	UNW_DPRINT(0, "unwind.%s: bad abreg=0x%x\n", __func__, abreg);
+	return UNW_REG_LC;
+}
+
+static void
+set_reg (struct unw_reg_info *reg, enum unw_where where, int when, unsigned long val)
+{
+	reg->val = val;
+	reg->where = where;
+	if (reg->when == UNW_WHEN_NEVER)
+		reg->when = when;
+}
+
+static void
+alloc_spill_area (unsigned long *offp, unsigned long regsize,
+		  struct unw_reg_info *lo, struct unw_reg_info *hi)
+{
+	struct unw_reg_info *reg;
+
+	for (reg = hi; reg >= lo; --reg) {
+		if (reg->where == UNW_WHERE_SPILL_HOME) {
+			reg->where = UNW_WHERE_PSPREL;
+			*offp -= regsize;
+			reg->val = *offp;
+		}
+	}
+}
+
+static inline void
+spill_next_when (struct unw_reg_info **regp, struct unw_reg_info *lim, unw_word t)
+{
+	struct unw_reg_info *reg;
+
+	for (reg = *regp; reg <= lim; ++reg) {
+		if (reg->where == UNW_WHERE_SPILL_HOME) {
+			reg->when = t;
+			*regp = reg + 1;
+			return;
+		}
+	}
+	UNW_DPRINT(0, "unwind.%s: excess spill!\n",  __func__);
+}
+
+static inline void
+finish_prologue (struct unw_state_record *sr)
+{
+	struct unw_reg_info *reg;
+	unsigned long off;
+	int i;
+
+	/*
+	 * First, resolve implicit register save locations (see Section "11.4.2.3 Rules
+	 * for Using Unwind Descriptors", rule 3):
+	 */
+	for (i = 0; i < (int) ARRAY_SIZE(unw.save_order); ++i) {
+		reg = sr->curr.reg + unw.save_order[i];
+		if (reg->where == UNW_WHERE_GR_SAVE) {
+			reg->where = UNW_WHERE_GR;
+			reg->val = sr->gr_save_loc++;
+		}
+	}
+
+	/*
+	 * Next, compute when the fp, general, and branch registers get
+	 * saved.  This must come before alloc_spill_area() because
+	 * we need to know which registers are spilled to their home
+	 * locations.
+	 */
+	if (sr->imask) {
+		unsigned char kind, mask = 0, *cp = sr->imask;
+		int t;
+		static const unsigned char limit[3] = {
+			UNW_REG_F31, UNW_REG_R7, UNW_REG_B5
+		};
+		struct unw_reg_info *(regs[3]);
+
+		regs[0] = sr->curr.reg + UNW_REG_F2;
+		regs[1] = sr->curr.reg + UNW_REG_R4;
+		regs[2] = sr->curr.reg + UNW_REG_B1;
+
+		for (t = 0; t < sr->region_len; ++t) {
+			if ((t & 3) == 0)
+				mask = *cp++;
+			kind = (mask >> 2*(3-(t & 3))) & 3;
+			if (kind > 0)
+				spill_next_when(&regs[kind - 1], sr->curr.reg + limit[kind - 1],
+						sr->region_start + t);
+		}
+	}
+	/*
+	 * Next, lay out the memory stack spill area:
+	 */
+	if (sr->any_spills) {
+		off = sr->spill_offset;
+		alloc_spill_area(&off, 16, sr->curr.reg + UNW_REG_F2, sr->curr.reg + UNW_REG_F31);
+		alloc_spill_area(&off,  8, sr->curr.reg + UNW_REG_B1, sr->curr.reg + UNW_REG_B5);
+		alloc_spill_area(&off,  8, sr->curr.reg + UNW_REG_R4, sr->curr.reg + UNW_REG_R7);
+	}
+}
+
+/*
+ * Region header descriptors.
+ */
+
+static void
+desc_prologue (int body, unw_word rlen, unsigned char mask, unsigned char grsave,
+	       struct unw_state_record *sr)
+{
+	int i, region_start;
+
+	if (!(sr->in_body || sr->first_region))
+		finish_prologue(sr);
+	sr->first_region = 0;
+
+	/* check if we're done: */
+	if (sr->when_target < sr->region_start + sr->region_len) {
+		sr->done = 1;
+		return;
+	}
+
+	region_start = sr->region_start + sr->region_len;
+
+	for (i = 0; i < sr->epilogue_count; ++i)
+		pop(sr);
+	sr->epilogue_count = 0;
+	sr->epilogue_start = UNW_WHEN_NEVER;
+
+	sr->region_start = region_start;
+	sr->region_len = rlen;
+	sr->in_body = body;
+
+	if (!body) {
+		push(sr);
+
+		for (i = 0; i < 4; ++i) {
+			if (mask & 0x8)
+				set_reg(sr->curr.reg + unw.save_order[i], UNW_WHERE_GR,
+					sr->region_start + sr->region_len - 1, grsave++);
+			mask <<= 1;
+		}
+		sr->gr_save_loc = grsave;
+		sr->any_spills = 0;
+		sr->imask = NULL;
+		sr->spill_offset = 0x10;	/* default to psp+16 */
+	}
+}
+
+/*
+ * Prologue descriptors.
+ */
+
+static inline void
+desc_abi (unsigned char abi, unsigned char context, struct unw_state_record *sr)
+{
+	if (abi == 3 && context == 'i') {
+		sr->flags |= UNW_FLAG_INTERRUPT_FRAME;
+		UNW_DPRINT(3, "unwind.%s: interrupt frame\n",  __func__);
+	}
+	else
+		UNW_DPRINT(0, "unwind%s: ignoring unwabi(abi=0x%x,context=0x%x)\n",
+				__func__, abi, context);
+}
+
+static inline void
+desc_br_gr (unsigned char brmask, unsigned char gr, struct unw_state_record *sr)
+{
+	int i;
+
+	for (i = 0; i < 5; ++i) {
+		if (brmask & 1)
+			set_reg(sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_GR,
+				sr->region_start + sr->region_len - 1, gr++);
+		brmask >>= 1;
+	}
+}
+
+static inline void
+desc_br_mem (unsigned char brmask, struct unw_state_record *sr)
+{
+	int i;
+
+	for (i = 0; i < 5; ++i) {
+		if (brmask & 1) {
+			set_reg(sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_SPILL_HOME,
+				sr->region_start + sr->region_len - 1, 0);
+			sr->any_spills = 1;
+		}
+		brmask >>= 1;
+	}
+}
+
+static inline void
+desc_frgr_mem (unsigned char grmask, unw_word frmask, struct unw_state_record *sr)
+{
+	int i;
+
+	for (i = 0; i < 4; ++i) {
+		if ((grmask & 1) != 0) {
+			set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_SPILL_HOME,
+				sr->region_start + sr->region_len - 1, 0);
+			sr->any_spills = 1;
+		}
+		grmask >>= 1;
+	}
+	for (i = 0; i < 20; ++i) {
+		if ((frmask & 1) != 0) {
+			int base = (i < 4) ? UNW_REG_F2 : UNW_REG_F16 - 4;
+			set_reg(sr->curr.reg + base + i, UNW_WHERE_SPILL_HOME,
+				sr->region_start + sr->region_len - 1, 0);
+			sr->any_spills = 1;
+		}
+		frmask >>= 1;
+	}
+}
+
+static inline void
+desc_fr_mem (unsigned char frmask, struct unw_state_record *sr)
+{
+	int i;
+
+	for (i = 0; i < 4; ++i) {
+		if ((frmask & 1) != 0) {
+			set_reg(sr->curr.reg + UNW_REG_F2 + i, UNW_WHERE_SPILL_HOME,
+				sr->region_start + sr->region_len - 1, 0);
+			sr->any_spills = 1;
+		}
+		frmask >>= 1;
+	}
+}
+
+static inline void
+desc_gr_gr (unsigned char grmask, unsigned char gr, struct unw_state_record *sr)
+{
+	int i;
+
+	for (i = 0; i < 4; ++i) {
+		if ((grmask & 1) != 0)
+			set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_GR,
+				sr->region_start + sr->region_len - 1, gr++);
+		grmask >>= 1;
+	}
+}
+
+static inline void
+desc_gr_mem (unsigned char grmask, struct unw_state_record *sr)
+{
+	int i;
+
+	for (i = 0; i < 4; ++i) {
+		if ((grmask & 1) != 0) {
+			set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_SPILL_HOME,
+				sr->region_start + sr->region_len - 1, 0);
+			sr->any_spills = 1;
+		}
+		grmask >>= 1;
+	}
+}
+
+static inline void
+desc_mem_stack_f (unw_word t, unw_word size, struct unw_state_record *sr)
+{
+	set_reg(sr->curr.reg + UNW_REG_PSP, UNW_WHERE_NONE,
+		sr->region_start + min_t(int, t, sr->region_len - 1), 16*size);
+}
+
+static inline void
+desc_mem_stack_v (unw_word t, struct unw_state_record *sr)
+{
+	sr->curr.reg[UNW_REG_PSP].when = sr->region_start + min_t(int, t, sr->region_len - 1);
+}
+
+static inline void
+desc_reg_gr (unsigned char reg, unsigned char dst, struct unw_state_record *sr)
+{
+	set_reg(sr->curr.reg + reg, UNW_WHERE_GR, sr->region_start + sr->region_len - 1, dst);
+}
+
+static inline void
+desc_reg_psprel (unsigned char reg, unw_word pspoff, struct unw_state_record *sr)
+{
+	set_reg(sr->curr.reg + reg, UNW_WHERE_PSPREL, sr->region_start + sr->region_len - 1,
+		0x10 - 4*pspoff);
+}
+
+static inline void
+desc_reg_sprel (unsigned char reg, unw_word spoff, struct unw_state_record *sr)
+{
+	set_reg(sr->curr.reg + reg, UNW_WHERE_SPREL, sr->region_start + sr->region_len - 1,
+		4*spoff);
+}
+
+static inline void
+desc_rp_br (unsigned char dst, struct unw_state_record *sr)
+{
+	sr->return_link_reg = dst;
+}
+
+static inline void
+desc_reg_when (unsigned char regnum, unw_word t, struct unw_state_record *sr)
+{
+	struct unw_reg_info *reg = sr->curr.reg + regnum;
+
+	if (reg->where == UNW_WHERE_NONE)
+		reg->where = UNW_WHERE_GR_SAVE;
+	reg->when = sr->region_start + min_t(int, t, sr->region_len - 1);
+}
+
+static inline void
+desc_spill_base (unw_word pspoff, struct unw_state_record *sr)
+{
+	sr->spill_offset = 0x10 - 4*pspoff;
+}
+
+static inline unsigned char *
+desc_spill_mask (unsigned char *imaskp, struct unw_state_record *sr)
+{
+	sr->imask = imaskp;
+	return imaskp + (2*sr->region_len + 7)/8;
+}
+
+/*
+ * Body descriptors.
+ */
+static inline void
+desc_epilogue (unw_word t, unw_word ecount, struct unw_state_record *sr)
+{
+	sr->epilogue_start = sr->region_start + sr->region_len - 1 - t;
+	sr->epilogue_count = ecount + 1;
+}
+
+static inline void
+desc_copy_state (unw_word label, struct unw_state_record *sr)
+{
+	struct unw_labeled_state *ls;
+
+	for (ls = sr->labeled_states; ls; ls = ls->next) {
+		if (ls->label == label) {
+			free_state_stack(&sr->curr);
+			memcpy(&sr->curr, &ls->saved_state, sizeof(sr->curr));
+			sr->curr.next = dup_state_stack(ls->saved_state.next);
+			return;
+		}
+	}
+	printk(KERN_ERR "unwind: failed to find state labeled 0x%lx\n", label);
+}
+
+static inline void
+desc_label_state (unw_word label, struct unw_state_record *sr)
+{
+	struct unw_labeled_state *ls;
+
+	ls = alloc_labeled_state();
+	if (!ls) {
+		printk(KERN_ERR "unwind.desc_label_state(): out of memory\n");
+		return;
+	}
+	ls->label = label;
+	memcpy(&ls->saved_state, &sr->curr, sizeof(ls->saved_state));
+	ls->saved_state.next = dup_state_stack(sr->curr.next);
+
+	/* insert into list of labeled states: */
+	ls->next = sr->labeled_states;
+	sr->labeled_states = ls;
+}
+
+/*
+ * General descriptors.
+ */
+
+static inline int
+desc_is_active (unsigned char qp, unw_word t, struct unw_state_record *sr)
+{
+	if (sr->when_target <= sr->region_start + min_t(int, t, sr->region_len - 1))
+		return 0;
+	if (qp > 0) {
+		if ((sr->pr_val & (1UL << qp)) == 0)
+			return 0;
+		sr->pr_mask |= (1UL << qp);
+	}
+	return 1;
+}
+
+static inline void
+desc_restore_p (unsigned char qp, unw_word t, unsigned char abreg, struct unw_state_record *sr)
+{
+	struct unw_reg_info *r;
+
+	if (!desc_is_active(qp, t, sr))
+		return;
+
+	r = sr->curr.reg + decode_abreg(abreg, 0);
+	r->where = UNW_WHERE_NONE;
+	r->when = UNW_WHEN_NEVER;
+	r->val = 0;
+}
+
+static inline void
+desc_spill_reg_p (unsigned char qp, unw_word t, unsigned char abreg, unsigned char x,
+		     unsigned char ytreg, struct unw_state_record *sr)
+{
+	enum unw_where where = UNW_WHERE_GR;
+	struct unw_reg_info *r;
+
+	if (!desc_is_active(qp, t, sr))
+		return;
+
+	if (x)
+		where = UNW_WHERE_BR;
+	else if (ytreg & 0x80)
+		where = UNW_WHERE_FR;
+
+	r = sr->curr.reg + decode_abreg(abreg, 0);
+	r->where = where;
+	r->when = sr->region_start + min_t(int, t, sr->region_len - 1);
+	r->val = (ytreg & 0x7f);
+}
+
+static inline void
+desc_spill_psprel_p (unsigned char qp, unw_word t, unsigned char abreg, unw_word pspoff,
+		     struct unw_state_record *sr)
+{
+	struct unw_reg_info *r;
+
+	if (!desc_is_active(qp, t, sr))
+		return;
+
+	r = sr->curr.reg + decode_abreg(abreg, 1);
+	r->where = UNW_WHERE_PSPREL;
+	r->when = sr->region_start + min_t(int, t, sr->region_len - 1);
+	r->val = 0x10 - 4*pspoff;
+}
+
+static inline void
+desc_spill_sprel_p (unsigned char qp, unw_word t, unsigned char abreg, unw_word spoff,
+		       struct unw_state_record *sr)
+{
+	struct unw_reg_info *r;
+
+	if (!desc_is_active(qp, t, sr))
+		return;
+
+	r = sr->curr.reg + decode_abreg(abreg, 1);
+	r->where = UNW_WHERE_SPREL;
+	r->when = sr->region_start + min_t(int, t, sr->region_len - 1);
+	r->val = 4*spoff;
+}
+
+#define UNW_DEC_BAD_CODE(code)			printk(KERN_ERR "unwind: unknown code 0x%02x\n", \
+						       code);
+
+/*
+ * region headers:
+ */
+#define UNW_DEC_PROLOGUE_GR(fmt,r,m,gr,arg)	desc_prologue(0,r,m,gr,arg)
+#define UNW_DEC_PROLOGUE(fmt,b,r,arg)		desc_prologue(b,r,0,32,arg)
+/*
+ * prologue descriptors:
+ */
+#define UNW_DEC_ABI(fmt,a,c,arg)		desc_abi(a,c,arg)
+#define UNW_DEC_BR_GR(fmt,b,g,arg)		desc_br_gr(b,g,arg)
+#define UNW_DEC_BR_MEM(fmt,b,arg)		desc_br_mem(b,arg)
+#define UNW_DEC_FRGR_MEM(fmt,g,f,arg)		desc_frgr_mem(g,f,arg)
+#define UNW_DEC_FR_MEM(fmt,f,arg)		desc_fr_mem(f,arg)
+#define UNW_DEC_GR_GR(fmt,m,g,arg)		desc_gr_gr(m,g,arg)
+#define UNW_DEC_GR_MEM(fmt,m,arg)		desc_gr_mem(m,arg)
+#define UNW_DEC_MEM_STACK_F(fmt,t,s,arg)	desc_mem_stack_f(t,s,arg)
+#define UNW_DEC_MEM_STACK_V(fmt,t,arg)		desc_mem_stack_v(t,arg)
+#define UNW_DEC_REG_GR(fmt,r,d,arg)		desc_reg_gr(r,d,arg)
+#define UNW_DEC_REG_PSPREL(fmt,r,o,arg)		desc_reg_psprel(r,o,arg)
+#define UNW_DEC_REG_SPREL(fmt,r,o,arg)		desc_reg_sprel(r,o,arg)
+#define UNW_DEC_REG_WHEN(fmt,r,t,arg)		desc_reg_when(r,t,arg)
+#define UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg)	desc_reg_when(UNW_REG_PRI_UNAT_GR,t,arg)
+#define UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg)	desc_reg_when(UNW_REG_PRI_UNAT_MEM,t,arg)
+#define UNW_DEC_PRIUNAT_GR(fmt,r,arg)		desc_reg_gr(UNW_REG_PRI_UNAT_GR,r,arg)
+#define UNW_DEC_PRIUNAT_PSPREL(fmt,o,arg)	desc_reg_psprel(UNW_REG_PRI_UNAT_MEM,o,arg)
+#define UNW_DEC_PRIUNAT_SPREL(fmt,o,arg)	desc_reg_sprel(UNW_REG_PRI_UNAT_MEM,o,arg)
+#define UNW_DEC_RP_BR(fmt,d,arg)		desc_rp_br(d,arg)
+#define UNW_DEC_SPILL_BASE(fmt,o,arg)		desc_spill_base(o,arg)
+#define UNW_DEC_SPILL_MASK(fmt,m,arg)		(m = desc_spill_mask(m,arg))
+/*
+ * body descriptors:
+ */
+#define UNW_DEC_EPILOGUE(fmt,t,c,arg)		desc_epilogue(t,c,arg)
+#define UNW_DEC_COPY_STATE(fmt,l,arg)		desc_copy_state(l,arg)
+#define UNW_DEC_LABEL_STATE(fmt,l,arg)		desc_label_state(l,arg)
+/*
+ * general unwind descriptors:
+ */
+#define UNW_DEC_SPILL_REG_P(f,p,t,a,x,y,arg)	desc_spill_reg_p(p,t,a,x,y,arg)
+#define UNW_DEC_SPILL_REG(f,t,a,x,y,arg)	desc_spill_reg_p(0,t,a,x,y,arg)
+#define UNW_DEC_SPILL_PSPREL_P(f,p,t,a,o,arg)	desc_spill_psprel_p(p,t,a,o,arg)
+#define UNW_DEC_SPILL_PSPREL(f,t,a,o,arg)	desc_spill_psprel_p(0,t,a,o,arg)
+#define UNW_DEC_SPILL_SPREL_P(f,p,t,a,o,arg)	desc_spill_sprel_p(p,t,a,o,arg)
+#define UNW_DEC_SPILL_SPREL(f,t,a,o,arg)	desc_spill_sprel_p(0,t,a,o,arg)
+#define UNW_DEC_RESTORE_P(f,p,t,a,arg)		desc_restore_p(p,t,a,arg)
+#define UNW_DEC_RESTORE(f,t,a,arg)		desc_restore_p(0,t,a,arg)
+
+#include "unwind_decoder.c"
+
+
+/* Unwind scripts. */
+
+static inline unw_hash_index_t
+hash (unsigned long ip)
+{
+	/* magic number = ((sqrt(5)-1)/2)*2^64 */
+	static const unsigned long hashmagic = 0x9e3779b97f4a7c16UL;
+
+	return (ip >> 4) * hashmagic >> (64 - UNW_LOG_HASH_SIZE);
+}
+
+static inline long
+cache_match (struct unw_script *script, unsigned long ip, unsigned long pr)
+{
+	read_lock(&script->lock);
+	if (ip == script->ip && ((pr ^ script->pr_val) & script->pr_mask) == 0)
+		/* keep the read lock... */
+		return 1;
+	read_unlock(&script->lock);
+	return 0;
+}
+
+static inline struct unw_script *
+script_lookup (struct unw_frame_info *info)
+{
+	struct unw_script *script = unw.cache + info->hint;
+	unsigned short index;
+	unsigned long ip, pr;
+
+	if (UNW_DEBUG_ON(0))
+		return NULL;	/* Always regenerate scripts in debug mode */
+
+	STAT(++unw.stat.cache.lookups);
+
+	ip = info->ip;
+	pr = info->pr;
+
+	if (cache_match(script, ip, pr)) {
+		STAT(++unw.stat.cache.hinted_hits);
+		return script;
+	}
+
+	index = unw.hash[hash(ip)];
+	if (index >= UNW_CACHE_SIZE)
+		return NULL;
+
+	script = unw.cache + index;
+	while (1) {
+		if (cache_match(script, ip, pr)) {
+			/* update hint; no locking required as single-word writes are atomic */
+			STAT(++unw.stat.cache.normal_hits);
+			unw.cache[info->prev_script].hint = script - unw.cache;
+			return script;
+		}
+		if (script->coll_chain >= UNW_HASH_SIZE)
+			return NULL;
+		script = unw.cache + script->coll_chain;
+		STAT(++unw.stat.cache.collision_chain_traversals);
+	}
+}
+
+/*
+ * On returning, a write lock for the SCRIPT is still being held.
+ */
+static inline struct unw_script *
+script_new (unsigned long ip)
+{
+	struct unw_script *script, *prev, *tmp;
+	unw_hash_index_t index;
+	unsigned short head;
+
+	STAT(++unw.stat.script.news);
+
+	/*
+	 * Can't (easily) use cmpxchg() here because of ABA problem
+	 * that is intrinsic in cmpxchg()...
+	 */
+	head = unw.lru_head;
+	script = unw.cache + head;
+	unw.lru_head = script->lru_chain;
+
+	/*
+	 * We'd deadlock here if we interrupted a thread that is holding a read lock on
+	 * script->lock.  Thus, if the write_trylock() fails, we simply bail out.  The
+	 * alternative would be to disable interrupts whenever we hold a read-lock, but
+	 * that seems silly.
+	 */
+	if (!write_trylock(&script->lock))
+		return NULL;
+
+	/* re-insert script at the tail of the LRU chain: */
+	unw.cache[unw.lru_tail].lru_chain = head;
+	unw.lru_tail = head;
+
+	/* remove the old script from the hash table (if it's there): */
+	if (script->ip) {
+		index = hash(script->ip);
+		tmp = unw.cache + unw.hash[index];
+		prev = NULL;
+		while (1) {
+			if (tmp == script) {
+				if (prev)
+					prev->coll_chain = tmp->coll_chain;
+				else
+					unw.hash[index] = tmp->coll_chain;
+				break;
+			} else
+				prev = tmp;
+			if (tmp->coll_chain >= UNW_CACHE_SIZE)
+			/* old script wasn't in the hash-table */
+				break;
+			tmp = unw.cache + tmp->coll_chain;
+		}
+	}
+
+	/* enter new script in the hash table */
+	index = hash(ip);
+	script->coll_chain = unw.hash[index];
+	unw.hash[index] = script - unw.cache;
+
+	script->ip = ip;	/* set new IP while we're holding the locks */
+
+	STAT(if (script->coll_chain < UNW_CACHE_SIZE) ++unw.stat.script.collisions);
+
+	script->flags = 0;
+	script->hint = 0;
+	script->count = 0;
+	return script;
+}
+
+static void
+script_finalize (struct unw_script *script, struct unw_state_record *sr)
+{
+	script->pr_mask = sr->pr_mask;
+	script->pr_val = sr->pr_val;
+	/*
+	 * We could down-grade our write-lock on script->lock here but
+	 * the rwlock API doesn't offer atomic lock downgrading, so
+	 * we'll just keep the write-lock and release it later when
+	 * we're done using the script.
+	 */
+}
+
+static inline void
+script_emit (struct unw_script *script, struct unw_insn insn)
+{
+	if (script->count >= UNW_MAX_SCRIPT_LEN) {
+		UNW_DPRINT(0, "unwind.%s: script exceeds maximum size of %u instructions!\n",
+			__func__, UNW_MAX_SCRIPT_LEN);
+		return;
+	}
+	script->insn[script->count++] = insn;
+}
+
+static inline void
+emit_nat_info (struct unw_state_record *sr, int i, struct unw_script *script)
+{
+	struct unw_reg_info *r = sr->curr.reg + i;
+	enum unw_insn_opcode opc;
+	struct unw_insn insn;
+	unsigned long val = 0;
+
+	switch (r->where) {
+	      case UNW_WHERE_GR:
+		if (r->val >= 32) {
+			/* register got spilled to a stacked register */
+			opc = UNW_INSN_SETNAT_TYPE;
+			val = UNW_NAT_REGSTK;
+		} else
+			/* register got spilled to a scratch register */
+			opc = UNW_INSN_SETNAT_MEMSTK;
+		break;
+
+	      case UNW_WHERE_FR:
+		opc = UNW_INSN_SETNAT_TYPE;
+		val = UNW_NAT_VAL;
+		break;
+
+	      case UNW_WHERE_BR:
+		opc = UNW_INSN_SETNAT_TYPE;
+		val = UNW_NAT_NONE;
+		break;
+
+	      case UNW_WHERE_PSPREL:
+	      case UNW_WHERE_SPREL:
+		opc = UNW_INSN_SETNAT_MEMSTK;
+		break;
+
+	      default:
+		UNW_DPRINT(0, "unwind.%s: don't know how to emit nat info for where = %u\n",
+			   __func__, r->where);
+		return;
+	}
+	insn.opc = opc;
+	insn.dst = unw.preg_index[i];
+	insn.val = val;
+	script_emit(script, insn);
+}
+
+static void
+compile_reg (struct unw_state_record *sr, int i, struct unw_script *script)
+{
+	struct unw_reg_info *r = sr->curr.reg + i;
+	enum unw_insn_opcode opc;
+	unsigned long val, rval;
+	struct unw_insn insn;
+	long need_nat_info;
+
+	if (r->where == UNW_WHERE_NONE || r->when >= sr->when_target)
+		return;
+
+	opc = UNW_INSN_MOVE;
+	val = rval = r->val;
+	need_nat_info = (i >= UNW_REG_R4 && i <= UNW_REG_R7);
+
+	switch (r->where) {
+	      case UNW_WHERE_GR:
+		if (rval >= 32) {
+			opc = UNW_INSN_MOVE_STACKED;
+			val = rval - 32;
+		} else if (rval >= 4 && rval <= 7) {
+			if (need_nat_info) {
+				opc = UNW_INSN_MOVE2;
+				need_nat_info = 0;
+			}
+			val = unw.preg_index[UNW_REG_R4 + (rval - 4)];
+		} else if (rval == 0) {
+			opc = UNW_INSN_MOVE_CONST;
+			val = 0;
+		} else {
+			/* register got spilled to a scratch register */
+			opc = UNW_INSN_MOVE_SCRATCH;
+			val = pt_regs_off(rval);
+		}
+		break;
+
+	      case UNW_WHERE_FR:
+		if (rval <= 5)
+			val = unw.preg_index[UNW_REG_F2  + (rval -  2)];
+		else if (rval >= 16 && rval <= 31)
+			val = unw.preg_index[UNW_REG_F16 + (rval - 16)];
+		else {
+			opc = UNW_INSN_MOVE_SCRATCH;
+			if (rval <= 11)
+				val = offsetof(struct pt_regs, f6) + 16*(rval - 6);
+			else
+				UNW_DPRINT(0, "unwind.%s: kernel may not touch f%lu\n",
+					   __func__, rval);
+		}
+		break;
+
+	      case UNW_WHERE_BR:
+		if (rval >= 1 && rval <= 5)
+			val = unw.preg_index[UNW_REG_B1 + (rval - 1)];
+		else {
+			opc = UNW_INSN_MOVE_SCRATCH;
+			if (rval == 0)
+				val = offsetof(struct pt_regs, b0);
+			else if (rval == 6)
+				val = offsetof(struct pt_regs, b6);
+			else
+				val = offsetof(struct pt_regs, b7);
+		}
+		break;
+
+	      case UNW_WHERE_SPREL:
+		opc = UNW_INSN_ADD_SP;
+		break;
+
+	      case UNW_WHERE_PSPREL:
+		opc = UNW_INSN_ADD_PSP;
+		break;
+
+	      default:
+		UNW_DPRINT(0, "unwind%s: register %u has unexpected `where' value of %u\n",
+			   __func__, i, r->where);
+		break;
+	}
+	insn.opc = opc;
+	insn.dst = unw.preg_index[i];
+	insn.val = val;
+	script_emit(script, insn);
+	if (need_nat_info)
+		emit_nat_info(sr, i, script);
+
+	if (i == UNW_REG_PSP) {
+		/*
+		 * info->psp must contain the _value_ of the previous
+		 * sp, not it's save location.  We get this by
+		 * dereferencing the value we just stored in
+		 * info->psp:
+		 */
+		insn.opc = UNW_INSN_LOAD;
+		insn.dst = insn.val = unw.preg_index[UNW_REG_PSP];
+		script_emit(script, insn);
+	}
+}
+
+static inline const struct unw_table_entry *
+lookup (struct unw_table *table, unsigned long rel_ip)
+{
+	const struct unw_table_entry *e = NULL;
+	unsigned long lo, hi, mid;
+
+	/* do a binary search for right entry: */
+	for (lo = 0, hi = table->length; lo < hi; ) {
+		mid = (lo + hi) / 2;
+		e = &table->array[mid];
+		if (rel_ip < e->start_offset)
+			hi = mid;
+		else if (rel_ip >= e->end_offset)
+			lo = mid + 1;
+		else
+			break;
+	}
+	if (rel_ip < e->start_offset || rel_ip >= e->end_offset)
+		return NULL;
+	return e;
+}
+
+/*
+ * Build an unwind script that unwinds from state OLD_STATE to the
+ * entrypoint of the function that called OLD_STATE.
+ */
+static inline struct unw_script *
+build_script (struct unw_frame_info *info)
+{
+	const struct unw_table_entry *e = NULL;
+	struct unw_script *script = NULL;
+	struct unw_labeled_state *ls, *next;
+	unsigned long ip = info->ip;
+	struct unw_state_record sr;
+	struct unw_table *table, *prev;
+	struct unw_reg_info *r;
+	struct unw_insn insn;
+	u8 *dp, *desc_end;
+	u64 hdr;
+	int i;
+	STAT(unsigned long start, parse_start;)
+
+	STAT(++unw.stat.script.builds; start = ia64_get_itc());
+
+	/* build state record */
+	memset(&sr, 0, sizeof(sr));
+	for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r)
+		r->when = UNW_WHEN_NEVER;
+	sr.pr_val = info->pr;
+
+	UNW_DPRINT(3, "unwind.%s: ip 0x%lx\n", __func__, ip);
+	script = script_new(ip);
+	if (!script) {
+		UNW_DPRINT(0, "unwind.%s: failed to create unwind script\n",  __func__);
+		STAT(unw.stat.script.build_time += ia64_get_itc() - start);
+		return NULL;
+	}
+	unw.cache[info->prev_script].hint = script - unw.cache;
+
+	/* search the kernels and the modules' unwind tables for IP: */
+
+	STAT(parse_start = ia64_get_itc());
+
+	prev = NULL;
+	for (table = unw.tables; table; table = table->next) {
+		if (ip >= table->start && ip < table->end) {
+			/*
+			 * Leave the kernel unwind table at the very front,
+			 * lest moving it breaks some assumption elsewhere.
+			 * Otherwise, move the matching table to the second
+			 * position in the list so that traversals can benefit
+			 * from commonality in backtrace paths.
+			 */
+			if (prev && prev != unw.tables) {
+				/* unw is safe - we're already spinlocked */
+				prev->next = table->next;
+				table->next = unw.tables->next;
+				unw.tables->next = table;
+			}
+			e = lookup(table, ip - table->segment_base);
+			break;
+		}
+		prev = table;
+	}
+	if (!e) {
+		/* no info, return default unwinder (leaf proc, no mem stack, no saved regs)  */
+		UNW_DPRINT(1, "unwind.%s: no unwind info for ip=0x%lx (prev ip=0x%lx)\n",
+			__func__, ip, unw.cache[info->prev_script].ip);
+		sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR;
+		sr.curr.reg[UNW_REG_RP].when = -1;
+		sr.curr.reg[UNW_REG_RP].val = 0;
+		compile_reg(&sr, UNW_REG_RP, script);
+		script_finalize(script, &sr);
+		STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start);
+		STAT(unw.stat.script.build_time += ia64_get_itc() - start);
+		return script;
+	}
+
+	sr.when_target = (3*((ip & ~0xfUL) - (table->segment_base + e->start_offset))/16
+			  + (ip & 0xfUL));
+	hdr = *(u64 *) (table->segment_base + e->info_offset);
+	dp =   (u8 *)  (table->segment_base + e->info_offset + 8);
+	desc_end = dp + 8*UNW_LENGTH(hdr);
+
+	while (!sr.done && dp < desc_end)
+		dp = unw_decode(dp, sr.in_body, &sr);
+
+	if (sr.when_target > sr.epilogue_start) {
+		/*
+		 * sp has been restored and all values on the memory stack below
+		 * psp also have been restored.
+		 */
+		sr.curr.reg[UNW_REG_PSP].val = 0;
+		sr.curr.reg[UNW_REG_PSP].where = UNW_WHERE_NONE;
+		sr.curr.reg[UNW_REG_PSP].when = UNW_WHEN_NEVER;
+		for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r)
+			if ((r->where == UNW_WHERE_PSPREL && r->val <= 0x10)
+			    || r->where == UNW_WHERE_SPREL)
+			{
+				r->val = 0;
+				r->where = UNW_WHERE_NONE;
+				r->when = UNW_WHEN_NEVER;
+			}
+	}
+
+	script->flags = sr.flags;
+
+	/*
+	 * If RP did't get saved, generate entry for the return link
+	 * register.
+	 */
+	if (sr.curr.reg[UNW_REG_RP].when >= sr.when_target) {
+		sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR;
+		sr.curr.reg[UNW_REG_RP].when = -1;
+		sr.curr.reg[UNW_REG_RP].val = sr.return_link_reg;
+		UNW_DPRINT(1, "unwind.%s: using default for rp at ip=0x%lx where=%d val=0x%lx\n",
+			   __func__, ip, sr.curr.reg[UNW_REG_RP].where,
+			   sr.curr.reg[UNW_REG_RP].val);
+	}
+
+#ifdef UNW_DEBUG
+	UNW_DPRINT(1, "unwind.%s: state record for func 0x%lx, t=%u:\n",
+		__func__, table->segment_base + e->start_offset, sr.when_target);
+	for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) {
+		if (r->where != UNW_WHERE_NONE || r->when != UNW_WHEN_NEVER) {
+			UNW_DPRINT(1, "  %s <- ", unw.preg_name[r - sr.curr.reg]);
+			switch (r->where) {
+			      case UNW_WHERE_GR:     UNW_DPRINT(1, "r%lu", r->val); break;
+			      case UNW_WHERE_FR:     UNW_DPRINT(1, "f%lu", r->val); break;
+			      case UNW_WHERE_BR:     UNW_DPRINT(1, "b%lu", r->val); break;
+			      case UNW_WHERE_SPREL:  UNW_DPRINT(1, "[sp+0x%lx]", r->val); break;
+			      case UNW_WHERE_PSPREL: UNW_DPRINT(1, "[psp+0x%lx]", r->val); break;
+			      case UNW_WHERE_NONE:
+				UNW_DPRINT(1, "%s+0x%lx", unw.preg_name[r - sr.curr.reg], r->val);
+				break;
+
+			      default:
+				UNW_DPRINT(1, "BADWHERE(%d)", r->where);
+				break;
+			}
+			UNW_DPRINT(1, "\t\t%d\n", r->when);
+		}
+	}
+#endif
+
+	STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start);
+
+	/* translate state record into unwinder instructions: */
+
+	/*
+	 * First, set psp if we're dealing with a fixed-size frame;
+	 * subsequent instructions may depend on this value.
+	 */
+	if (sr.when_target > sr.curr.reg[UNW_REG_PSP].when
+	    && (sr.curr.reg[UNW_REG_PSP].where == UNW_WHERE_NONE)
+	    && sr.curr.reg[UNW_REG_PSP].val != 0) {
+		/* new psp is sp plus frame size */
+		insn.opc = UNW_INSN_ADD;
+		insn.dst = offsetof(struct unw_frame_info, psp)/8;
+		insn.val = sr.curr.reg[UNW_REG_PSP].val;	/* frame size */
+		script_emit(script, insn);
+	}
+
+	/* determine where the primary UNaT is: */
+	if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_GR].when)
+		i = UNW_REG_PRI_UNAT_MEM;
+	else if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when)
+		i = UNW_REG_PRI_UNAT_GR;
+	else if (sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when > sr.curr.reg[UNW_REG_PRI_UNAT_GR].when)
+		i = UNW_REG_PRI_UNAT_MEM;
+	else
+		i = UNW_REG_PRI_UNAT_GR;
+
+	compile_reg(&sr, i, script);
+
+	for (i = UNW_REG_BSP; i < UNW_NUM_REGS; ++i)
+		compile_reg(&sr, i, script);
+
+	/* free labeled register states & stack: */
+
+	STAT(parse_start = ia64_get_itc());
+	for (ls = sr.labeled_states; ls; ls = next) {
+		next = ls->next;
+		free_state_stack(&ls->saved_state);
+		free_labeled_state(ls);
+	}
+	free_state_stack(&sr.curr);
+	STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start);
+
+	script_finalize(script, &sr);
+	STAT(unw.stat.script.build_time += ia64_get_itc() - start);
+	return script;
+}
+
+/*
+ * Apply the unwinding actions represented by OPS and update SR to
+ * reflect the state that existed upon entry to the function that this
+ * unwinder represents.
+ */
+static inline void
+run_script (struct unw_script *script, struct unw_frame_info *state)
+{
+	struct unw_insn *ip, *limit, next_insn;
+	unsigned long opc, dst, val, off;
+	unsigned long *s = (unsigned long *) state;
+	STAT(unsigned long start;)
+
+	STAT(++unw.stat.script.runs; start = ia64_get_itc());
+	state->flags = script->flags;
+	ip = script->insn;
+	limit = script->insn + script->count;
+	next_insn = *ip;
+
+	while (ip++ < limit) {
+		opc = next_insn.opc;
+		dst = next_insn.dst;
+		val = next_insn.val;
+		next_insn = *ip;
+
+	  redo:
+		switch (opc) {
+		      case UNW_INSN_ADD:
+			s[dst] += val;
+			break;
+
+		      case UNW_INSN_MOVE2:
+			if (!s[val])
+				goto lazy_init;
+			s[dst+1] = s[val+1];
+			s[dst] = s[val];
+			break;
+
+		      case UNW_INSN_MOVE:
+			if (!s[val])
+				goto lazy_init;
+			s[dst] = s[val];
+			break;
+
+		      case UNW_INSN_MOVE_SCRATCH:
+			if (state->pt) {
+				s[dst] = (unsigned long) get_scratch_regs(state) + val;
+			} else {
+				s[dst] = 0;
+				UNW_DPRINT(0, "unwind.%s: no state->pt, dst=%ld, val=%ld\n",
+					   __func__, dst, val);
+			}
+			break;
+
+		      case UNW_INSN_MOVE_CONST:
+			if (val == 0)
+				s[dst] = (unsigned long) &unw.r0;
+			else {
+				s[dst] = 0;
+				UNW_DPRINT(0, "unwind.%s: UNW_INSN_MOVE_CONST bad val=%ld\n",
+					   __func__, val);
+			}
+			break;
+
+
+		      case UNW_INSN_MOVE_STACKED:
+			s[dst] = (unsigned long) ia64_rse_skip_regs((unsigned long *)state->bsp,
+								    val);
+			break;
+
+		      case UNW_INSN_ADD_PSP:
+			s[dst] = state->psp + val;
+			break;
+
+		      case UNW_INSN_ADD_SP:
+			s[dst] = state->sp + val;
+			break;
+
+		      case UNW_INSN_SETNAT_MEMSTK:
+			if (!state->pri_unat_loc)
+				state->pri_unat_loc = &state->sw->caller_unat;
+			/* register off. is a multiple of 8, so the least 3 bits (type) are 0 */
+			s[dst+1] = ((unsigned long) state->pri_unat_loc - s[dst]) | UNW_NAT_MEMSTK;
+			break;
+
+		      case UNW_INSN_SETNAT_TYPE:
+			s[dst+1] = val;
+			break;
+
+		      case UNW_INSN_LOAD:
+#ifdef UNW_DEBUG
+			if ((s[val] & (local_cpu_data->unimpl_va_mask | 0x7)) != 0
+			    || s[val] < TASK_SIZE)
+			{
+				UNW_DPRINT(0, "unwind.%s: rejecting bad psp=0x%lx\n",
+					   __func__, s[val]);
+				break;
+			}
+#endif
+			s[dst] = *(unsigned long *) s[val];
+			break;
+		}
+	}
+	STAT(unw.stat.script.run_time += ia64_get_itc() - start);
+	return;
+
+  lazy_init:
+	off = unw.sw_off[val];
+	s[val] = (unsigned long) state->sw + off;
+	if (off >= offsetof(struct switch_stack, r4) && off <= offsetof(struct switch_stack, r7))
+		/*
+		 * We're initializing a general register: init NaT info, too.  Note that
+		 * the offset is a multiple of 8 which gives us the 3 bits needed for
+		 * the type field.
+		 */
+		s[val+1] = (offsetof(struct switch_stack, ar_unat) - off) | UNW_NAT_MEMSTK;
+	goto redo;
+}
+
+static int
+find_save_locs (struct unw_frame_info *info)
+{
+	int have_write_lock = 0;
+	struct unw_script *scr;
+	unsigned long flags = 0;
+
+	if ((info->ip & (local_cpu_data->unimpl_va_mask | 0xf)) || info->ip < TASK_SIZE) {
+		/* don't let obviously bad addresses pollute the cache */
+		/* FIXME: should really be level 0 but it occurs too often. KAO */
+		UNW_DPRINT(1, "unwind.%s: rejecting bad ip=0x%lx\n", __func__, info->ip);
+		info->rp_loc = NULL;
+		return -1;
+	}
+
+	scr = script_lookup(info);
+	if (!scr) {
+		spin_lock_irqsave(&unw.lock, flags);
+		scr = build_script(info);
+		if (!scr) {
+			spin_unlock_irqrestore(&unw.lock, flags);
+			UNW_DPRINT(0,
+				   "unwind.%s: failed to locate/build unwind script for ip %lx\n",
+				   __func__, info->ip);
+			return -1;
+		}
+		have_write_lock = 1;
+	}
+	info->hint = scr->hint;
+	info->prev_script = scr - unw.cache;
+
+	run_script(scr, info);
+
+	if (have_write_lock) {
+		write_unlock(&scr->lock);
+		spin_unlock_irqrestore(&unw.lock, flags);
+	} else
+		read_unlock(&scr->lock);
+	return 0;
+}
+
+static int
+unw_valid(const struct unw_frame_info *info, unsigned long* p)
+{
+	unsigned long loc = (unsigned long)p;
+	return (loc >= info->regstk.limit && loc < info->regstk.top) ||
+	       (loc >= info->memstk.top && loc < info->memstk.limit);
+}
+
+int
+unw_unwind (struct unw_frame_info *info)
+{
+	unsigned long prev_ip, prev_sp, prev_bsp;
+	unsigned long ip, pr, num_regs;
+	STAT(unsigned long start, flags;)
+	int retval;
+
+	STAT(local_irq_save(flags); ++unw.stat.api.unwinds; start = ia64_get_itc());
+
+	prev_ip = info->ip;
+	prev_sp = info->sp;
+	prev_bsp = info->bsp;
+
+	/* validate the return IP pointer */
+	if (!unw_valid(info, info->rp_loc)) {
+		/* FIXME: should really be level 0 but it occurs too often. KAO */
+		UNW_DPRINT(1, "unwind.%s: failed to locate return link (ip=0x%lx)!\n",
+			   __func__, info->ip);
+		STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
+		return -1;
+	}
+	/* restore the ip */
+	ip = info->ip = *info->rp_loc;
+	if (ip < GATE_ADDR) {
+		UNW_DPRINT(2, "unwind.%s: reached user-space (ip=0x%lx)\n", __func__, ip);
+		STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
+		return -1;
+	}
+
+	/* validate the previous stack frame pointer */
+	if (!unw_valid(info, info->pfs_loc)) {
+		UNW_DPRINT(0, "unwind.%s: failed to locate ar.pfs!\n", __func__);
+		STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
+		return -1;
+	}
+	/* restore the cfm: */
+	info->cfm_loc = info->pfs_loc;
+
+	/* restore the bsp: */
+	pr = info->pr;
+	num_regs = 0;
+	if ((info->flags & UNW_FLAG_INTERRUPT_FRAME)) {
+		info->pt = info->sp + 16;
+		if ((pr & (1UL << PRED_NON_SYSCALL)) != 0)
+			num_regs = *info->cfm_loc & 0x7f;		/* size of frame */
+		info->pfs_loc =
+			(unsigned long *) (info->pt + offsetof(struct pt_regs, ar_pfs));
+		UNW_DPRINT(3, "unwind.%s: interrupt_frame pt 0x%lx\n", __func__, info->pt);
+	} else
+		num_regs = (*info->cfm_loc >> 7) & 0x7f;	/* size of locals */
+	info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->bsp, -num_regs);
+	if (info->bsp < info->regstk.limit || info->bsp > info->regstk.top) {
+		UNW_DPRINT(0, "unwind.%s: bsp (0x%lx) out of range [0x%lx-0x%lx]\n",
+			__func__, info->bsp, info->regstk.limit, info->regstk.top);
+		STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
+		return -1;
+	}
+
+	/* restore the sp: */
+	info->sp = info->psp;
+	if (info->sp < info->memstk.top || info->sp > info->memstk.limit) {
+		UNW_DPRINT(0, "unwind.%s: sp (0x%lx) out of range [0x%lx-0x%lx]\n",
+			__func__, info->sp, info->memstk.top, info->memstk.limit);
+		STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
+		return -1;
+	}
+
+	if (info->ip == prev_ip && info->sp == prev_sp && info->bsp == prev_bsp) {
+		UNW_DPRINT(0, "unwind.%s: ip, sp, bsp unchanged; stopping here (ip=0x%lx)\n",
+			   __func__, ip);
+		STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
+		return -1;
+	}
+
+	/* as we unwind, the saved ar.unat becomes the primary unat: */
+	info->pri_unat_loc = info->unat_loc;
+
+	/* finally, restore the predicates: */
+	unw_get_pr(info, &info->pr);
+
+	retval = find_save_locs(info);
+	STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
+	return retval;
+}
+EXPORT_SYMBOL(unw_unwind);
+
+int
+unw_unwind_to_user (struct unw_frame_info *info)
+{
+	unsigned long ip, sp, pr = info->pr;
+
+	do {
+		unw_get_sp(info, &sp);
+		if ((long)((unsigned long)info->task + IA64_STK_OFFSET - sp)
+		    < IA64_PT_REGS_SIZE) {
+			UNW_DPRINT(0, "unwind.%s: ran off the top of the kernel stack\n",
+				   __func__);
+			break;
+		}
+		if (unw_is_intr_frame(info) &&
+		    (pr & (1UL << PRED_USER_STACK)))
+			return 0;
+		if (unw_get_pr (info, &pr) < 0) {
+			unw_get_rp(info, &ip);
+			UNW_DPRINT(0, "unwind.%s: failed to read "
+				   "predicate register (ip=0x%lx)\n",
+				__func__, ip);
+			return -1;
+		}
+	} while (unw_unwind(info) >= 0);
+	unw_get_ip(info, &ip);
+	UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n",
+		   __func__, ip);
+	return -1;
+}
+EXPORT_SYMBOL(unw_unwind_to_user);
+
+static void
+init_frame_info (struct unw_frame_info *info, struct task_struct *t,
+		 struct switch_stack *sw, unsigned long stktop)
+{
+	unsigned long rbslimit, rbstop, stklimit;
+	STAT(unsigned long start, flags;)
+
+	STAT(local_irq_save(flags); ++unw.stat.api.inits; start = ia64_get_itc());
+
+	/*
+	 * Subtle stuff here: we _could_ unwind through the switch_stack frame but we
+	 * don't want to do that because it would be slow as each preserved register would
+	 * have to be processed.  Instead, what we do here is zero out the frame info and
+	 * start the unwind process at the function that created the switch_stack frame.
+	 * When a preserved value in switch_stack needs to be accessed, run_script() will
+	 * initialize the appropriate pointer on demand.
+	 */
+	memset(info, 0, sizeof(*info));
+
+	rbslimit = (unsigned long) t + IA64_RBS_OFFSET;
+	stklimit = (unsigned long) t + IA64_STK_OFFSET;
+
+	rbstop   = sw->ar_bspstore;
+	if (rbstop > stklimit || rbstop < rbslimit)
+		rbstop = rbslimit;
+
+	if (stktop <= rbstop)
+		stktop = rbstop;
+	if (stktop > stklimit)
+		stktop = stklimit;
+
+	info->regstk.limit = rbslimit;
+	info->regstk.top   = rbstop;
+	info->memstk.limit = stklimit;
+	info->memstk.top   = stktop;
+	info->task = t;
+	info->sw  = sw;
+	info->sp = info->psp = stktop;
+	info->pr = sw->pr;
+	UNW_DPRINT(3, "unwind.%s:\n"
+		   "  task   0x%lx\n"
+		   "  rbs = [0x%lx-0x%lx)\n"
+		   "  stk = [0x%lx-0x%lx)\n"
+		   "  pr     0x%lx\n"
+		   "  sw     0x%lx\n"
+		   "  sp     0x%lx\n",
+		   __func__, (unsigned long) t, rbslimit, rbstop, stktop, stklimit,
+		   info->pr, (unsigned long) info->sw, info->sp);
+	STAT(unw.stat.api.init_time += ia64_get_itc() - start; local_irq_restore(flags));
+}
+
+void
+unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t, struct switch_stack *sw)
+{
+	unsigned long sol;
+
+	init_frame_info(info, t, sw, (unsigned long) (sw + 1) - 16);
+	info->cfm_loc = &sw->ar_pfs;
+	sol = (*info->cfm_loc >> 7) & 0x7f;
+	info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->regstk.top, -sol);
+	info->ip = sw->b0;
+	UNW_DPRINT(3, "unwind.%s:\n"
+		   "  bsp    0x%lx\n"
+		   "  sol    0x%lx\n"
+		   "  ip     0x%lx\n",
+		   __func__, info->bsp, sol, info->ip);
+	find_save_locs(info);
+}
+
+EXPORT_SYMBOL(unw_init_frame_info);
+
+void
+unw_init_from_blocked_task (struct unw_frame_info *info, struct task_struct *t)
+{
+	struct switch_stack *sw = (struct switch_stack *) (t->thread.ksp + 16);
+
+	UNW_DPRINT(1, "unwind.%s\n", __func__);
+	unw_init_frame_info(info, t, sw);
+}
+EXPORT_SYMBOL(unw_init_from_blocked_task);
+
+static void
+init_unwind_table (struct unw_table *table, const char *name, unsigned long segment_base,
+		   unsigned long gp, const void *table_start, const void *table_end)
+{
+	const struct unw_table_entry *start = table_start, *end = table_end;
+
+	table->name = name;
+	table->segment_base = segment_base;
+	table->gp = gp;
+	table->start = segment_base + start[0].start_offset;
+	table->end = segment_base + end[-1].end_offset;
+	table->array = start;
+	table->length = end - start;
+}
+
+void *
+unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned long gp,
+		      const void *table_start, const void *table_end)
+{
+	const struct unw_table_entry *start = table_start, *end = table_end;
+	struct unw_table *table;
+	unsigned long flags;
+
+	if (end - start <= 0) {
+		UNW_DPRINT(0, "unwind.%s: ignoring attempt to insert empty unwind table\n",
+			   __func__);
+		return NULL;
+	}
+
+	table = kmalloc(sizeof(*table), GFP_USER);
+	if (!table)
+		return NULL;
+
+	init_unwind_table(table, name, segment_base, gp, table_start, table_end);
+
+	spin_lock_irqsave(&unw.lock, flags);
+	{
+		/* keep kernel unwind table at the front (it's searched most commonly): */
+		table->next = unw.tables->next;
+		unw.tables->next = table;
+	}
+	spin_unlock_irqrestore(&unw.lock, flags);
+
+	return table;
+}
+
+void
+unw_remove_unwind_table (void *handle)
+{
+	struct unw_table *table, *prev;
+	struct unw_script *tmp;
+	unsigned long flags;
+	long index;
+
+	if (!handle) {
+		UNW_DPRINT(0, "unwind.%s: ignoring attempt to remove non-existent unwind table\n",
+			   __func__);
+		return;
+	}
+
+	table = handle;
+	if (table == &unw.kernel_table) {
+		UNW_DPRINT(0, "unwind.%s: sorry, freeing the kernel's unwind table is a "
+			   "no-can-do!\n", __func__);
+		return;
+	}
+
+	spin_lock_irqsave(&unw.lock, flags);
+	{
+		/* first, delete the table: */
+
+		for (prev = (struct unw_table *) &unw.tables; prev; prev = prev->next)
+			if (prev->next == table)
+				break;
+		if (!prev) {
+			UNW_DPRINT(0, "unwind.%s: failed to find unwind table %p\n",
+				   __func__, (void *) table);
+			spin_unlock_irqrestore(&unw.lock, flags);
+			return;
+		}
+		prev->next = table->next;
+	}
+	spin_unlock_irqrestore(&unw.lock, flags);
+
+	/* next, remove hash table entries for this table */
+
+	for (index = 0; index < UNW_HASH_SIZE; ++index) {
+		tmp = unw.cache + unw.hash[index];
+		if (unw.hash[index] >= UNW_CACHE_SIZE
+		    || tmp->ip < table->start || tmp->ip >= table->end)
+			continue;
+
+		write_lock(&tmp->lock);
+		{
+			if (tmp->ip >= table->start && tmp->ip < table->end) {
+				unw.hash[index] = tmp->coll_chain;
+				tmp->ip = 0;
+			}
+		}
+		write_unlock(&tmp->lock);
+	}
+
+	kfree(table);
+}
+
+static int __init
+create_gate_table (void)
+{
+	const struct unw_table_entry *entry, *start, *end;
+	unsigned long *lp, segbase = GATE_ADDR;
+	size_t info_size, size;
+	char *info;
+	Elf64_Phdr *punw = NULL, *phdr = (Elf64_Phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
+	int i;
+
+	for (i = 0; i < GATE_EHDR->e_phnum; ++i, ++phdr)
+		if (phdr->p_type == PT_IA_64_UNWIND) {
+			punw = phdr;
+			break;
+		}
+
+	if (!punw) {
+		printk("%s: failed to find gate DSO's unwind table!\n", __func__);
+		return 0;
+	}
+
+	start = (const struct unw_table_entry *) punw->p_vaddr;
+	end = (struct unw_table_entry *) ((char *) start + punw->p_memsz);
+	size  = 0;
+
+	unw_add_unwind_table("linux-gate.so", segbase, 0, start, end);
+
+	for (entry = start; entry < end; ++entry)
+		size += 3*8 + 8 + 8*UNW_LENGTH(*(u64 *) (segbase + entry->info_offset));
+	size += 8;	/* reserve space for "end of table" marker */
+
+	unw.gate_table = kmalloc(size, GFP_KERNEL);
+	if (!unw.gate_table) {
+		unw.gate_table_size = 0;
+		printk(KERN_ERR "%s: unable to create unwind data for gate page!\n", __func__);
+		return 0;
+	}
+	unw.gate_table_size = size;
+
+	lp = unw.gate_table;
+	info = (char *) unw.gate_table + size;
+
+	for (entry = start; entry < end; ++entry, lp += 3) {
+		info_size = 8 + 8*UNW_LENGTH(*(u64 *) (segbase + entry->info_offset));
+		info -= info_size;
+		memcpy(info, (char *) segbase + entry->info_offset, info_size);
+
+		lp[0] = segbase + entry->start_offset;		/* start */
+		lp[1] = segbase + entry->end_offset;		/* end */
+		lp[2] = info - (char *) unw.gate_table;		/* info */
+	}
+	*lp = 0;	/* end-of-table marker */
+	return 0;
+}
+
+__initcall(create_gate_table);
+
+void __init
+unw_init (void)
+{
+	extern char __gp[];
+	extern void unw_hash_index_t_is_too_narrow (void);
+	long i, off;
+
+	if (8*sizeof(unw_hash_index_t) < UNW_LOG_HASH_SIZE)
+		unw_hash_index_t_is_too_narrow();
+
+	unw.sw_off[unw.preg_index[UNW_REG_PRI_UNAT_GR]] = SW(CALLER_UNAT);
+	unw.sw_off[unw.preg_index[UNW_REG_BSPSTORE]] = SW(AR_BSPSTORE);
+	unw.sw_off[unw.preg_index[UNW_REG_PFS]] = SW(AR_PFS);
+	unw.sw_off[unw.preg_index[UNW_REG_RP]] = SW(B0);
+	unw.sw_off[unw.preg_index[UNW_REG_UNAT]] = SW(CALLER_UNAT);
+	unw.sw_off[unw.preg_index[UNW_REG_PR]] = SW(PR);
+	unw.sw_off[unw.preg_index[UNW_REG_LC]] = SW(AR_LC);
+	unw.sw_off[unw.preg_index[UNW_REG_FPSR]] = SW(AR_FPSR);
+	for (i = UNW_REG_R4, off = SW(R4); i <= UNW_REG_R7; ++i, off += 8)
+		unw.sw_off[unw.preg_index[i]] = off;
+	for (i = UNW_REG_B1, off = SW(B1); i <= UNW_REG_B5; ++i, off += 8)
+		unw.sw_off[unw.preg_index[i]] = off;
+	for (i = UNW_REG_F2, off = SW(F2); i <= UNW_REG_F5; ++i, off += 16)
+		unw.sw_off[unw.preg_index[i]] = off;
+	for (i = UNW_REG_F16, off = SW(F16); i <= UNW_REG_F31; ++i, off += 16)
+		unw.sw_off[unw.preg_index[i]] = off;
+
+	for (i = 0; i < UNW_CACHE_SIZE; ++i) {
+		if (i > 0)
+			unw.cache[i].lru_chain = (i - 1);
+		unw.cache[i].coll_chain = -1;
+		rwlock_init(&unw.cache[i].lock);
+	}
+	unw.lru_head = UNW_CACHE_SIZE - 1;
+	unw.lru_tail = 0;
+
+	init_unwind_table(&unw.kernel_table, "kernel", KERNEL_START, (unsigned long) __gp,
+			  __start_unwind, __end_unwind);
+}
+
+/*
+ * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED
+ *
+ *	This system call has been deprecated.  The new and improved way to get
+ *	at the kernel's unwind info is via the gate DSO.  The address of the
+ *	ELF header for this DSO is passed to user-level via AT_SYSINFO_EHDR.
+ *
+ * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED
+ *
+ * This system call copies the unwind data into the buffer pointed to by BUF and returns
+ * the size of the unwind data.  If BUF_SIZE is smaller than the size of the unwind data
+ * or if BUF is NULL, nothing is copied, but the system call still returns the size of the
+ * unwind data.
+ *
+ * The first portion of the unwind data contains an unwind table and rest contains the
+ * associated unwind info (in no particular order).  The unwind table consists of a table
+ * of entries of the form:
+ *
+ *	u64 start;	(64-bit address of start of function)
+ *	u64 end;	(64-bit address of start of function)
+ *	u64 info;	(BUF-relative offset to unwind info)
+ *
+ * The end of the unwind table is indicated by an entry with a START address of zero.
+ *
+ * Please see the IA-64 Software Conventions and Runtime Architecture manual for details
+ * on the format of the unwind info.
+ *
+ * ERRORS
+ *	EFAULT	BUF points outside your accessible address space.
+ */
+asmlinkage long
+sys_getunwind (void __user *buf, size_t buf_size)
+{
+	if (buf && buf_size >= unw.gate_table_size)
+		if (copy_to_user(buf, unw.gate_table, unw.gate_table_size) != 0)
+			return -EFAULT;
+	return unw.gate_table_size;
+}
diff --git a/arch/ia64/kernel/unwind_decoder.c b/arch/ia64/kernel/unwind_decoder.c
new file mode 100644
index 00000000..50ac2d82
--- /dev/null
+++ b/arch/ia64/kernel/unwind_decoder.c
@@ -0,0 +1,459 @@
+/*
+ * Copyright (C) 2000 Hewlett-Packard Co
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Generic IA-64 unwind info decoder.
+ *
+ * This file is used both by the Linux kernel and objdump.  Please keep
+ * the two copies of this file in sync.
+ *
+ * You need to customize the decoder by defining the following
+ * macros/constants before including this file:
+ *
+ *  Types:
+ *	unw_word	Unsigned integer type with at least 64 bits 
+ *
+ *  Register names:
+ *	UNW_REG_BSP
+ *	UNW_REG_BSPSTORE
+ *	UNW_REG_FPSR
+ *	UNW_REG_LC
+ *	UNW_REG_PFS
+ *	UNW_REG_PR
+ *	UNW_REG_RNAT
+ *	UNW_REG_PSP
+ *	UNW_REG_RP
+ *	UNW_REG_UNAT
+ *
+ *  Decoder action macros:
+ *	UNW_DEC_BAD_CODE(code)
+ *	UNW_DEC_ABI(fmt,abi,context,arg)
+ *	UNW_DEC_BR_GR(fmt,brmask,gr,arg)
+ *	UNW_DEC_BR_MEM(fmt,brmask,arg)
+ *	UNW_DEC_COPY_STATE(fmt,label,arg)
+ *	UNW_DEC_EPILOGUE(fmt,t,ecount,arg)
+ *	UNW_DEC_FRGR_MEM(fmt,grmask,frmask,arg)
+ *	UNW_DEC_FR_MEM(fmt,frmask,arg)
+ *	UNW_DEC_GR_GR(fmt,grmask,gr,arg)
+ *	UNW_DEC_GR_MEM(fmt,grmask,arg)
+ *	UNW_DEC_LABEL_STATE(fmt,label,arg)
+ *	UNW_DEC_MEM_STACK_F(fmt,t,size,arg)
+ *	UNW_DEC_MEM_STACK_V(fmt,t,arg)
+ *	UNW_DEC_PRIUNAT_GR(fmt,r,arg)
+ *	UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg)
+ *	UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg)
+ *	UNW_DEC_PRIUNAT_WHEN_PSPREL(fmt,pspoff,arg)
+ *	UNW_DEC_PRIUNAT_WHEN_SPREL(fmt,spoff,arg)
+ *	UNW_DEC_PROLOGUE(fmt,body,rlen,arg)
+ *	UNW_DEC_PROLOGUE_GR(fmt,rlen,mask,grsave,arg)
+ *	UNW_DEC_REG_PSPREL(fmt,reg,pspoff,arg)
+ *	UNW_DEC_REG_REG(fmt,src,dst,arg)
+ *	UNW_DEC_REG_SPREL(fmt,reg,spoff,arg)
+ *	UNW_DEC_REG_WHEN(fmt,reg,t,arg)
+ *	UNW_DEC_RESTORE(fmt,t,abreg,arg)
+ *	UNW_DEC_RESTORE_P(fmt,qp,t,abreg,arg)
+ *	UNW_DEC_SPILL_BASE(fmt,pspoff,arg)
+ *	UNW_DEC_SPILL_MASK(fmt,imaskp,arg)
+ *	UNW_DEC_SPILL_PSPREL(fmt,t,abreg,pspoff,arg)
+ *	UNW_DEC_SPILL_PSPREL_P(fmt,qp,t,abreg,pspoff,arg)
+ *	UNW_DEC_SPILL_REG(fmt,t,abreg,x,ytreg,arg)
+ *	UNW_DEC_SPILL_REG_P(fmt,qp,t,abreg,x,ytreg,arg)
+ *	UNW_DEC_SPILL_SPREL(fmt,t,abreg,spoff,arg)
+ *	UNW_DEC_SPILL_SPREL_P(fmt,qp,t,abreg,pspoff,arg)
+ */
+
+static unw_word
+unw_decode_uleb128 (unsigned char **dpp)
+{
+  unsigned shift = 0;
+  unw_word byte, result = 0;
+  unsigned char *bp = *dpp;
+
+  while (1)
+    {
+      byte = *bp++;
+      result |= (byte & 0x7f) << shift;
+      if ((byte & 0x80) == 0)
+	break;
+      shift += 7;
+    }
+  *dpp = bp;
+  return result;
+}
+
+static unsigned char *
+unw_decode_x1 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char byte1, abreg;
+  unw_word t, off;
+
+  byte1 = *dp++;
+  t = unw_decode_uleb128 (&dp);
+  off = unw_decode_uleb128 (&dp);
+  abreg = (byte1 & 0x7f);
+  if (byte1 & 0x80)
+	  UNW_DEC_SPILL_SPREL(X1, t, abreg, off, arg);
+  else
+	  UNW_DEC_SPILL_PSPREL(X1, t, abreg, off, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_x2 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char byte1, byte2, abreg, x, ytreg;
+  unw_word t;
+
+  byte1 = *dp++; byte2 = *dp++;
+  t = unw_decode_uleb128 (&dp);
+  abreg = (byte1 & 0x7f);
+  ytreg = byte2;
+  x = (byte1 >> 7) & 1;
+  if ((byte1 & 0x80) == 0 && ytreg == 0)
+    UNW_DEC_RESTORE(X2, t, abreg, arg);
+  else
+    UNW_DEC_SPILL_REG(X2, t, abreg, x, ytreg, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_x3 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char byte1, byte2, abreg, qp;
+  unw_word t, off;
+
+  byte1 = *dp++; byte2 = *dp++;
+  t = unw_decode_uleb128 (&dp);
+  off = unw_decode_uleb128 (&dp);
+
+  qp = (byte1 & 0x3f);
+  abreg = (byte2 & 0x7f);
+
+  if (byte1 & 0x80)
+    UNW_DEC_SPILL_SPREL_P(X3, qp, t, abreg, off, arg);
+  else
+    UNW_DEC_SPILL_PSPREL_P(X3, qp, t, abreg, off, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_x4 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char byte1, byte2, byte3, qp, abreg, x, ytreg;
+  unw_word t;
+
+  byte1 = *dp++; byte2 = *dp++; byte3 = *dp++;
+  t = unw_decode_uleb128 (&dp);
+
+  qp = (byte1 & 0x3f);
+  abreg = (byte2 & 0x7f);
+  x = (byte2 >> 7) & 1;
+  ytreg = byte3;
+
+  if ((byte2 & 0x80) == 0 && byte3 == 0)
+    UNW_DEC_RESTORE_P(X4, qp, t, abreg, arg);
+  else
+    UNW_DEC_SPILL_REG_P(X4, qp, t, abreg, x, ytreg, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_r1 (unsigned char *dp, unsigned char code, void *arg)
+{
+  int body = (code & 0x20) != 0;
+  unw_word rlen;
+
+  rlen = (code & 0x1f);
+  UNW_DEC_PROLOGUE(R1, body, rlen, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_r2 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char byte1, mask, grsave;
+  unw_word rlen;
+
+  byte1 = *dp++;
+
+  mask = ((code & 0x7) << 1) | ((byte1 >> 7) & 1);
+  grsave = (byte1 & 0x7f);
+  rlen = unw_decode_uleb128 (&dp);
+  UNW_DEC_PROLOGUE_GR(R2, rlen, mask, grsave, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_r3 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unw_word rlen;
+
+  rlen = unw_decode_uleb128 (&dp);
+  UNW_DEC_PROLOGUE(R3, ((code & 0x3) == 1), rlen, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_p1 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char brmask = (code & 0x1f);
+
+  UNW_DEC_BR_MEM(P1, brmask, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_p2_p5 (unsigned char *dp, unsigned char code, void *arg)
+{
+  if ((code & 0x10) == 0)
+    {
+      unsigned char byte1 = *dp++;
+
+      UNW_DEC_BR_GR(P2, ((code & 0xf) << 1) | ((byte1 >> 7) & 1),
+		    (byte1 & 0x7f), arg);
+    }
+  else if ((code & 0x08) == 0)
+    {
+      unsigned char byte1 = *dp++, r, dst;
+
+      r = ((code & 0x7) << 1) | ((byte1 >> 7) & 1);
+      dst = (byte1 & 0x7f);
+      switch (r)
+	{
+	case 0: UNW_DEC_REG_GR(P3, UNW_REG_PSP, dst, arg); break;
+	case 1: UNW_DEC_REG_GR(P3, UNW_REG_RP, dst, arg); break;
+	case 2: UNW_DEC_REG_GR(P3, UNW_REG_PFS, dst, arg); break;
+	case 3: UNW_DEC_REG_GR(P3, UNW_REG_PR, dst, arg); break;
+	case 4: UNW_DEC_REG_GR(P3, UNW_REG_UNAT, dst, arg); break;
+	case 5: UNW_DEC_REG_GR(P3, UNW_REG_LC, dst, arg); break;
+	case 6: UNW_DEC_RP_BR(P3, dst, arg); break;
+	case 7: UNW_DEC_REG_GR(P3, UNW_REG_RNAT, dst, arg); break;
+	case 8: UNW_DEC_REG_GR(P3, UNW_REG_BSP, dst, arg); break;
+	case 9: UNW_DEC_REG_GR(P3, UNW_REG_BSPSTORE, dst, arg); break;
+	case 10: UNW_DEC_REG_GR(P3, UNW_REG_FPSR, dst, arg); break;
+	case 11: UNW_DEC_PRIUNAT_GR(P3, dst, arg); break;
+	default: UNW_DEC_BAD_CODE(r); break;
+	}
+    }
+  else if ((code & 0x7) == 0)
+    UNW_DEC_SPILL_MASK(P4, dp, arg);
+  else if ((code & 0x7) == 1)
+    {
+      unw_word grmask, frmask, byte1, byte2, byte3;
+
+      byte1 = *dp++; byte2 = *dp++; byte3 = *dp++;
+      grmask = ((byte1 >> 4) & 0xf);
+      frmask = ((byte1 & 0xf) << 16) | (byte2 << 8) | byte3;
+      UNW_DEC_FRGR_MEM(P5, grmask, frmask, arg);
+    }
+  else
+    UNW_DEC_BAD_CODE(code);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_p6 (unsigned char *dp, unsigned char code, void *arg)
+{
+  int gregs = (code & 0x10) != 0;
+  unsigned char mask = (code & 0x0f);
+
+  if (gregs)
+    UNW_DEC_GR_MEM(P6, mask, arg);
+  else
+    UNW_DEC_FR_MEM(P6, mask, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_p7_p10 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char r, byte1, byte2;
+  unw_word t, size;
+
+  if ((code & 0x10) == 0)
+    {
+      r = (code & 0xf);
+      t = unw_decode_uleb128 (&dp);
+      switch (r)
+	{
+	case 0:
+	  size = unw_decode_uleb128 (&dp);
+	  UNW_DEC_MEM_STACK_F(P7, t, size, arg);
+	  break;
+
+	case 1: UNW_DEC_MEM_STACK_V(P7, t, arg); break;
+	case 2: UNW_DEC_SPILL_BASE(P7, t, arg); break;
+	case 3: UNW_DEC_REG_SPREL(P7, UNW_REG_PSP, t, arg); break;
+	case 4: UNW_DEC_REG_WHEN(P7, UNW_REG_RP, t, arg); break;
+	case 5: UNW_DEC_REG_PSPREL(P7, UNW_REG_RP, t, arg); break;
+	case 6: UNW_DEC_REG_WHEN(P7, UNW_REG_PFS, t, arg); break;
+	case 7: UNW_DEC_REG_PSPREL(P7, UNW_REG_PFS, t, arg); break;
+	case 8: UNW_DEC_REG_WHEN(P7, UNW_REG_PR, t, arg); break;
+	case 9: UNW_DEC_REG_PSPREL(P7, UNW_REG_PR, t, arg); break;
+	case 10: UNW_DEC_REG_WHEN(P7, UNW_REG_LC, t, arg); break;
+	case 11: UNW_DEC_REG_PSPREL(P7, UNW_REG_LC, t, arg); break;
+	case 12: UNW_DEC_REG_WHEN(P7, UNW_REG_UNAT, t, arg); break;
+	case 13: UNW_DEC_REG_PSPREL(P7, UNW_REG_UNAT, t, arg); break;
+	case 14: UNW_DEC_REG_WHEN(P7, UNW_REG_FPSR, t, arg); break;
+	case 15: UNW_DEC_REG_PSPREL(P7, UNW_REG_FPSR, t, arg); break;
+	default: UNW_DEC_BAD_CODE(r); break;
+	}
+    }
+  else
+    {
+      switch (code & 0xf)
+	{
+	case 0x0: /* p8 */
+	  {
+	    r = *dp++;
+	    t = unw_decode_uleb128 (&dp);
+	    switch (r)
+	      {
+	      case  1: UNW_DEC_REG_SPREL(P8, UNW_REG_RP, t, arg); break;
+	      case  2: UNW_DEC_REG_SPREL(P8, UNW_REG_PFS, t, arg); break;
+	      case  3: UNW_DEC_REG_SPREL(P8, UNW_REG_PR, t, arg); break;
+	      case  4: UNW_DEC_REG_SPREL(P8, UNW_REG_LC, t, arg); break;
+	      case  5: UNW_DEC_REG_SPREL(P8, UNW_REG_UNAT, t, arg); break;
+	      case  6: UNW_DEC_REG_SPREL(P8, UNW_REG_FPSR, t, arg); break;
+	      case  7: UNW_DEC_REG_WHEN(P8, UNW_REG_BSP, t, arg); break;
+	      case  8: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSP, t, arg); break;
+	      case  9: UNW_DEC_REG_SPREL(P8, UNW_REG_BSP, t, arg); break;
+	      case 10: UNW_DEC_REG_WHEN(P8, UNW_REG_BSPSTORE, t, arg); break;
+	      case 11: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSPSTORE, t, arg); break;
+	      case 12: UNW_DEC_REG_SPREL(P8, UNW_REG_BSPSTORE, t, arg); break;
+	      case 13: UNW_DEC_REG_WHEN(P8, UNW_REG_RNAT, t, arg); break;
+	      case 14: UNW_DEC_REG_PSPREL(P8, UNW_REG_RNAT, t, arg); break;
+	      case 15: UNW_DEC_REG_SPREL(P8, UNW_REG_RNAT, t, arg); break;
+	      case 16: UNW_DEC_PRIUNAT_WHEN_GR(P8, t, arg); break;
+	      case 17: UNW_DEC_PRIUNAT_PSPREL(P8, t, arg); break;
+	      case 18: UNW_DEC_PRIUNAT_SPREL(P8, t, arg); break;
+	      case 19: UNW_DEC_PRIUNAT_WHEN_MEM(P8, t, arg); break;
+	      default: UNW_DEC_BAD_CODE(r); break;
+	    }
+	  }
+	  break;
+
+	case 0x1:
+	  byte1 = *dp++; byte2 = *dp++;
+	  UNW_DEC_GR_GR(P9, (byte1 & 0xf), (byte2 & 0x7f), arg);
+	  break;
+
+	case 0xf: /* p10 */
+	  byte1 = *dp++; byte2 = *dp++;
+	  UNW_DEC_ABI(P10, byte1, byte2, arg);
+	  break;
+
+	case 0x9:
+	  return unw_decode_x1 (dp, code, arg);
+
+	case 0xa:
+	  return unw_decode_x2 (dp, code, arg);
+
+	case 0xb:
+	  return unw_decode_x3 (dp, code, arg);
+
+	case 0xc:
+	  return unw_decode_x4 (dp, code, arg);
+
+	default:
+	  UNW_DEC_BAD_CODE(code);
+	  break;
+	}
+    }
+  return dp;
+}
+
+static unsigned char *
+unw_decode_b1 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unw_word label = (code & 0x1f);
+
+  if ((code & 0x20) != 0)
+    UNW_DEC_COPY_STATE(B1, label, arg);
+  else
+    UNW_DEC_LABEL_STATE(B1, label, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_b2 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unw_word t;
+
+  t = unw_decode_uleb128 (&dp);
+  UNW_DEC_EPILOGUE(B2, t, (code & 0x1f), arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_b3_x4 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unw_word t, ecount, label;
+
+  if ((code & 0x10) == 0)
+    {
+      t = unw_decode_uleb128 (&dp);
+      ecount = unw_decode_uleb128 (&dp);
+      UNW_DEC_EPILOGUE(B3, t, ecount, arg);
+    }
+  else if ((code & 0x07) == 0)
+    {
+      label = unw_decode_uleb128 (&dp);
+      if ((code & 0x08) != 0)
+	UNW_DEC_COPY_STATE(B4, label, arg);
+      else
+	UNW_DEC_LABEL_STATE(B4, label, arg);
+    }
+  else
+    switch (code & 0x7)
+      {
+      case 1: return unw_decode_x1 (dp, code, arg);
+      case 2: return unw_decode_x2 (dp, code, arg);
+      case 3: return unw_decode_x3 (dp, code, arg);
+      case 4: return unw_decode_x4 (dp, code, arg);
+      default: UNW_DEC_BAD_CODE(code); break;
+      }
+  return dp;
+}
+
+typedef unsigned char *(*unw_decoder) (unsigned char *, unsigned char, void *);
+
+static unw_decoder unw_decode_table[2][8] =
+{
+  /* prologue table: */
+  {
+    unw_decode_r1,	/* 0 */
+    unw_decode_r1,
+    unw_decode_r2,
+    unw_decode_r3,
+    unw_decode_p1,	/* 4 */
+    unw_decode_p2_p5,
+    unw_decode_p6,
+    unw_decode_p7_p10
+  },
+  {
+    unw_decode_r1,	/* 0 */
+    unw_decode_r1,
+    unw_decode_r2,
+    unw_decode_r3,
+    unw_decode_b1,	/* 4 */
+    unw_decode_b1,
+    unw_decode_b2,
+    unw_decode_b3_x4
+  }
+};
+
+/*
+ * Decode one descriptor and return address of next descriptor.
+ */
+static inline unsigned char *
+unw_decode (unsigned char *dp, int inside_body, void *arg)
+{
+  unw_decoder decoder;
+  unsigned char code;
+
+  code = *dp++;
+  decoder = unw_decode_table[inside_body][code >> 5];
+  dp = (*decoder) (dp, code, arg);
+  return dp;
+}
diff --git a/arch/ia64/kernel/unwind_i.h b/arch/ia64/kernel/unwind_i.h
new file mode 100644
index 00000000..96693a6a
--- /dev/null
+++ b/arch/ia64/kernel/unwind_i.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Kernel unwind support.
+ */
+
+#define UNW_VER(x)		((x) >> 48)
+#define UNW_FLAG_MASK		0x0000ffff00000000
+#define UNW_FLAG_OSMASK		0x0000f00000000000
+#define UNW_FLAG_EHANDLER(x)	((x) & 0x0000000100000000L)
+#define UNW_FLAG_UHANDLER(x)	((x) & 0x0000000200000000L)
+#define UNW_LENGTH(x)		((x) & 0x00000000ffffffffL)
+
+enum unw_register_index {
+	/* primary unat: */
+	UNW_REG_PRI_UNAT_GR,
+	UNW_REG_PRI_UNAT_MEM,
+
+	/* register stack */
+	UNW_REG_BSP,					/* register stack pointer */
+	UNW_REG_BSPSTORE,
+	UNW_REG_PFS,					/* previous function state */
+	UNW_REG_RNAT,
+	/* memory stack */
+	UNW_REG_PSP,					/* previous memory stack pointer */
+	/* return pointer: */
+	UNW_REG_RP,
+
+	/* preserved registers: */
+	UNW_REG_R4, UNW_REG_R5, UNW_REG_R6, UNW_REG_R7,
+	UNW_REG_UNAT, UNW_REG_PR, UNW_REG_LC, UNW_REG_FPSR,
+	UNW_REG_B1, UNW_REG_B2, UNW_REG_B3, UNW_REG_B4, UNW_REG_B5,
+	UNW_REG_F2, UNW_REG_F3, UNW_REG_F4, UNW_REG_F5,
+	UNW_REG_F16, UNW_REG_F17, UNW_REG_F18, UNW_REG_F19,
+	UNW_REG_F20, UNW_REG_F21, UNW_REG_F22, UNW_REG_F23,
+	UNW_REG_F24, UNW_REG_F25, UNW_REG_F26, UNW_REG_F27,
+	UNW_REG_F28, UNW_REG_F29, UNW_REG_F30, UNW_REG_F31,
+	UNW_NUM_REGS
+};
+
+struct unw_info_block {
+	u64 header;
+	u64 desc[0];		/* unwind descriptors */
+	/* personality routine and language-specific data follow behind descriptors */
+};
+
+struct unw_table {
+	struct unw_table *next;		/* must be first member! */
+	const char *name;
+	unsigned long gp;		/* global pointer for this load-module */
+	unsigned long segment_base;	/* base for offsets in the unwind table entries */
+	unsigned long start;
+	unsigned long end;
+	const struct unw_table_entry *array;
+	unsigned long length;
+};
+
+enum unw_where {
+	UNW_WHERE_NONE,			/* register isn't saved at all */
+	UNW_WHERE_GR,			/* register is saved in a general register */
+	UNW_WHERE_FR,			/* register is saved in a floating-point register */
+	UNW_WHERE_BR,			/* register is saved in a branch register */
+	UNW_WHERE_SPREL,		/* register is saved on memstack (sp-relative) */
+	UNW_WHERE_PSPREL,		/* register is saved on memstack (psp-relative) */
+	/*
+	 * At the end of each prologue these locations get resolved to
+	 * UNW_WHERE_PSPREL and UNW_WHERE_GR, respectively:
+	 */
+	UNW_WHERE_SPILL_HOME,		/* register is saved in its spill home */
+	UNW_WHERE_GR_SAVE		/* register is saved in next general register */
+};
+
+#define UNW_WHEN_NEVER	0x7fffffff
+
+struct unw_reg_info {
+	unsigned long val;		/* save location: register number or offset */
+	enum unw_where where;		/* where the register gets saved */
+	int when;			/* when the register gets saved */
+};
+
+struct unw_reg_state {
+	struct unw_reg_state *next;		/* next (outer) element on state stack */
+	struct unw_reg_info reg[UNW_NUM_REGS];	/* register save locations */
+};
+
+struct unw_labeled_state {
+	struct unw_labeled_state *next;		/* next labeled state (or NULL) */
+	unsigned long label;			/* label for this state */
+	struct unw_reg_state saved_state;
+};
+
+struct unw_state_record {
+	unsigned int first_region : 1;	/* is this the first region? */
+	unsigned int done : 1;		/* are we done scanning descriptors? */
+	unsigned int any_spills : 1;	/* got any register spills? */
+	unsigned int in_body : 1;	/* are we inside a body (as opposed to a prologue)? */
+	unsigned long flags;		/* see UNW_FLAG_* in unwind.h */
+
+	u8 *imask;			/* imask of spill_mask record or NULL */
+	unsigned long pr_val;		/* predicate values */
+	unsigned long pr_mask;		/* predicate mask */
+	long spill_offset;		/* psp-relative offset for spill base */
+	int region_start;
+	int region_len;
+	int epilogue_start;
+	int epilogue_count;
+	int when_target;
+
+	u8 gr_save_loc;			/* next general register to use for saving a register */
+	u8 return_link_reg;		/* branch register in which the return link is passed */
+
+	struct unw_labeled_state *labeled_states;	/* list of all labeled states */
+	struct unw_reg_state curr;	/* current state */
+};
+
+enum unw_nat_type {
+	UNW_NAT_NONE,		/* NaT not represented */
+	UNW_NAT_VAL,		/* NaT represented by NaT value (fp reg) */
+	UNW_NAT_MEMSTK,		/* NaT value is in unat word at offset OFF  */
+	UNW_NAT_REGSTK		/* NaT is in rnat */
+};
+
+enum unw_insn_opcode {
+	UNW_INSN_ADD,			/* s[dst] += val */
+	UNW_INSN_ADD_PSP,		/* s[dst] = (s.psp + val) */
+	UNW_INSN_ADD_SP,		/* s[dst] = (s.sp + val) */
+	UNW_INSN_MOVE,			/* s[dst] = s[val] */
+	UNW_INSN_MOVE2,			/* s[dst] = s[val]; s[dst+1] = s[val+1] */
+	UNW_INSN_MOVE_STACKED,		/* s[dst] = ia64_rse_skip(*s.bsp, val) */
+	UNW_INSN_SETNAT_MEMSTK,		/* s[dst+1].nat.type = MEMSTK;
+					   s[dst+1].nat.off = *s.pri_unat - s[dst] */
+	UNW_INSN_SETNAT_TYPE,		/* s[dst+1].nat.type = val */
+	UNW_INSN_LOAD,			/* s[dst] = *s[val] */
+	UNW_INSN_MOVE_SCRATCH,		/* s[dst] = scratch reg "val" */
+	UNW_INSN_MOVE_CONST,            /* s[dst] = constant reg "val" */
+};
+
+struct unw_insn {
+	unsigned int opc	:  4;
+	unsigned int dst	:  9;
+	signed int val		: 19;
+};
+
+/*
+ * Preserved general static registers (r4-r7) give rise to two script
+ * instructions; everything else yields at most one instruction; at
+ * the end of the script, the psp gets popped, accounting for one more
+ * instruction.
+ */
+#define UNW_MAX_SCRIPT_LEN	(UNW_NUM_REGS + 5)
+
+struct unw_script {
+	unsigned long ip;		/* ip this script is for */
+	unsigned long pr_mask;		/* mask of predicates script depends on */
+	unsigned long pr_val;		/* predicate values this script is for */
+	rwlock_t lock;
+	unsigned int flags;		/* see UNW_FLAG_* in unwind.h */
+	unsigned short lru_chain;	/* used for least-recently-used chain */
+	unsigned short coll_chain;	/* used for hash collisions */
+	unsigned short hint;		/* hint for next script to try (or -1) */
+	unsigned short count;		/* number of instructions in script */
+	struct unw_insn insn[UNW_MAX_SCRIPT_LEN];
+};
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
new file mode 100644
index 00000000..53c0ba00
--- /dev/null
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -0,0 +1,255 @@
+
+#include <asm/cache.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/pgtable.h>
+
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf64-ia64-little")
+OUTPUT_ARCH(ia64)
+ENTRY(phys_start)
+jiffies = jiffies_64;
+
+PHDRS {
+	code   PT_LOAD;
+	percpu PT_LOAD;
+	data   PT_LOAD;
+	note   PT_NOTE;
+	unwind 0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */
+}
+
+SECTIONS {
+	/*
+	 * unwind exit sections must be discarded before
+	 * the rest of the sections get included.
+	 */
+	/DISCARD/ : {
+		*(.IA_64.unwind.exit.text)
+		*(.IA_64.unwind_info.exit.text)
+		*(.comment)
+		*(.note)
+	}
+
+	v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */
+	phys_start = _start - LOAD_OFFSET;
+
+	code : {
+	} :code
+	. = KERNEL_START;
+
+	_text = .;
+	_stext = .;
+
+	.text : AT(ADDR(.text) - LOAD_OFFSET) {
+		__start_ivt_text = .;
+		*(.text..ivt)
+		__end_ivt_text = .;
+		TEXT_TEXT
+		SCHED_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		*(.gnu.linkonce.t*)
+	}
+
+	.text2 : AT(ADDR(.text2) - LOAD_OFFSET)	{
+		*(.text2)
+	}
+
+#ifdef CONFIG_SMP
+	.text..lock : AT(ADDR(.text..lock) - LOAD_OFFSET) {
+		*(.text..lock)
+	}
+#endif
+	_etext = .;
+
+	/*
+	 * Read-only data
+	 */
+	NOTES :code :note       /* put .notes in text and mark in PT_NOTE  */
+	code_continues : {
+	} : code               /* switch back to regular program...  */
+
+	EXCEPTION_TABLE(16)
+
+	/* MCA table */
+	. = ALIGN(16);
+	__mca_table : AT(ADDR(__mca_table) - LOAD_OFFSET) {
+		__start___mca_table = .;
+		*(__mca_table)
+		__stop___mca_table = .;
+	}
+
+	.data..patch.phys_stack_reg : AT(ADDR(.data..patch.phys_stack_reg) - LOAD_OFFSET) {
+		__start___phys_stack_reg_patchlist = .;
+		*(.data..patch.phys_stack_reg)
+		__end___phys_stack_reg_patchlist = .;
+	}
+
+	/*
+	 * Global data
+	 */
+	_data = .;
+
+	/* Unwind info & table: */
+	. = ALIGN(8);
+	.IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET) {
+		*(.IA_64.unwind_info*)
+	}
+	.IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET) {
+		__start_unwind = .;
+		*(.IA_64.unwind*)
+		__end_unwind = .;
+	} :code :unwind
+	code_continues2 : {
+	} : code
+
+	RODATA
+
+	.opd : AT(ADDR(.opd) - LOAD_OFFSET) {
+		*(.opd)
+	}
+
+	/*
+	 * Initialization code and data:
+	 */
+	. = ALIGN(PAGE_SIZE);
+	__init_begin = .;
+
+	INIT_TEXT_SECTION(PAGE_SIZE)
+	INIT_DATA_SECTION(16)
+
+	.data..patch.vtop : AT(ADDR(.data..patch.vtop) - LOAD_OFFSET) {
+		__start___vtop_patchlist = .;
+		*(.data..patch.vtop)
+		__end___vtop_patchlist = .;
+	}
+
+	.data..patch.rse : AT(ADDR(.data..patch.rse) - LOAD_OFFSET) {
+		__start___rse_patchlist = .;
+		*(.data..patch.rse)
+		__end___rse_patchlist = .;
+	}
+
+	.data..patch.mckinley_e9 : AT(ADDR(.data..patch.mckinley_e9) - LOAD_OFFSET) {
+		__start___mckinley_e9_bundles = .;
+		*(.data..patch.mckinley_e9)
+		__end___mckinley_e9_bundles = .;
+	}
+
+#if defined(CONFIG_PARAVIRT)
+	. = ALIGN(16);
+	.paravirt_bundles : AT(ADDR(.paravirt_bundles) - LOAD_OFFSET) {
+		__start_paravirt_bundles = .;
+		*(.paravirt_bundles)
+		__stop_paravirt_bundles = .;
+	}
+	. = ALIGN(16);
+	.paravirt_insts : AT(ADDR(.paravirt_insts) - LOAD_OFFSET) {
+		__start_paravirt_insts = .;
+		*(.paravirt_insts)
+		__stop_paravirt_insts = .;
+	}
+	. = ALIGN(16);
+	.paravirt_branches : AT(ADDR(.paravirt_branches) - LOAD_OFFSET) {
+		__start_paravirt_branches = .;
+		*(.paravirt_branches)
+		__stop_paravirt_branches = .;
+	}
+#endif
+
+#if defined(CONFIG_IA64_GENERIC)
+	/* Machine Vector */
+	. = ALIGN(16);
+	.machvec : AT(ADDR(.machvec) - LOAD_OFFSET) {
+		machvec_start = .;
+		*(.machvec)
+		machvec_end = .;
+	}
+#endif
+
+#ifdef	CONFIG_SMP
+	. = ALIGN(PERCPU_PAGE_SIZE);
+	__cpu0_per_cpu = .;
+	. = . + PERCPU_PAGE_SIZE;   /* cpu0 per-cpu space */
+#endif
+
+	. = ALIGN(PAGE_SIZE);
+	__init_end = .;
+
+	.data..page_aligned : AT(ADDR(.data..page_aligned) - LOAD_OFFSET) {
+		PAGE_ALIGNED_DATA(PAGE_SIZE)
+		. = ALIGN(PAGE_SIZE);
+		__start_gate_section = .;
+		*(.data..gate)
+		__stop_gate_section = .;
+#ifdef CONFIG_XEN
+		. = ALIGN(PAGE_SIZE);
+		__xen_start_gate_section = .;
+		*(.data..gate.xen)
+		__xen_stop_gate_section = .;
+#endif
+	}
+	/*
+	 * make sure the gate page doesn't expose
+	 * kernel data
+	 */
+	. = ALIGN(PAGE_SIZE);
+
+	/* Per-cpu data: */
+	. = ALIGN(PERCPU_PAGE_SIZE);
+	PERCPU_VADDR(SMP_CACHE_BYTES, PERCPU_ADDR, :percpu)
+	__phys_per_cpu_start = __per_cpu_load;
+	/*
+	 * ensure percpu data fits
+	 * into percpu page size
+	 */
+	. = __phys_per_cpu_start + PERCPU_PAGE_SIZE;
+
+	data : {
+	} :data
+	.data : AT(ADDR(.data) - LOAD_OFFSET) {
+		_sdata  =  .;
+		INIT_TASK_DATA(PAGE_SIZE)
+		CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES)
+		READ_MOSTLY_DATA(SMP_CACHE_BYTES)
+		DATA_DATA
+		*(.data1)
+		*(.gnu.linkonce.d*)
+		CONSTRUCTORS
+	}
+
+	. = ALIGN(16);	/* gp must be 16-byte aligned for exc. table */
+	.got : AT(ADDR(.got) - LOAD_OFFSET) {
+		*(.got.plt)
+		*(.got)
+	}
+	__gp = ADDR(.got) + 0x200000;
+
+	/*
+	 * We want the small data sections together,
+	 * so single-instruction offsets can access
+	 * them all, and initialized data all before
+	 * uninitialized, so we can shorten the
+	 * on-disk segment size.
+	 */
+	.sdata : AT(ADDR(.sdata) - LOAD_OFFSET) {
+		*(.sdata)
+		*(.sdata1)
+		*(.srdata)
+	}
+	_edata  =  .;
+
+	BSS_SECTION(0, 0, 0)
+
+	_end = .;
+
+	code : {
+	} :code
+
+	STABS_DEBUG
+	DWARF_DEBUG
+
+	/* Default discards */
+	DISCARDS
+}
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
new file mode 100644
index 00000000..fa4d1e59
--- /dev/null
+++ b/arch/ia64/kvm/Kconfig
@@ -0,0 +1,54 @@
+#
+# KVM configuration
+#
+
+source "virt/kvm/Kconfig"
+
+menuconfig VIRTUALIZATION
+	bool "Virtualization"
+	depends on HAVE_KVM || IA64
+	default y
+	---help---
+	  Say Y here to get to see options for using your Linux host to run other
+	  operating systems inside virtual machines (guests).
+	  This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and disabled.
+
+if VIRTUALIZATION
+
+config KVM
+	tristate "Kernel-based Virtual Machine (KVM) support"
+	depends on HAVE_KVM && MODULES && EXPERIMENTAL
+	# for device assignment:
+	depends on PCI
+	select PREEMPT_NOTIFIERS
+	select ANON_INODES
+	select HAVE_KVM_IRQCHIP
+	select KVM_APIC_ARCHITECTURE
+	select KVM_MMIO
+	---help---
+	  Support hosting fully virtualized guest machines using hardware
+	  virtualization extensions.  You will need a fairly recent
+	  processor equipped with virtualization extensions. You will also
+	  need to select one or more of the processor modules below.
+
+	  This module provides access to the hardware capabilities through
+	  a character device node named /dev/kvm.
+
+	  To compile this as a module, choose M here: the module
+	  will be called kvm.
+
+	  If unsure, say N.
+
+config KVM_INTEL
+	tristate "KVM for Intel Itanium 2 processors support"
+	depends on KVM && m
+	---help---
+	  Provides support for KVM on Itanium 2 processors equipped with the VT
+	  extensions.
+
+source drivers/vhost/Kconfig
+source drivers/virtio/Kconfig
+
+endif # VIRTUALIZATION
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
new file mode 100644
index 00000000..db3d7c5d
--- /dev/null
+++ b/arch/ia64/kvm/Makefile
@@ -0,0 +1,66 @@
+#This Make file is to generate asm-offsets.h and build source.
+#
+
+#Generate asm-offsets.h for vmm module build
+offsets-file := asm-offsets.h
+
+always  := $(offsets-file)
+targets := $(offsets-file)
+targets += arch/ia64/kvm/asm-offsets.s
+
+# Default sed regexp - multiline due to syntax constraints
+define sed-y
+	"/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
+endef
+
+quiet_cmd_offsets = GEN     $@
+define cmd_offsets
+	(set -e; \
+	 echo "#ifndef __ASM_KVM_OFFSETS_H__"; \
+	 echo "#define __ASM_KVM_OFFSETS_H__"; \
+	 echo "/*"; \
+	 echo " * DO NOT MODIFY."; \
+	 echo " *"; \
+	 echo " * This file was generated by Makefile"; \
+	 echo " *"; \
+	 echo " */"; \
+	 echo ""; \
+	 sed -ne $(sed-y) $<; \
+	 echo ""; \
+	 echo "#endif" ) > $@
+endef
+
+# We use internal rules to avoid the "is up to date" message from make
+arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c \
+			$(wildcard $(srctree)/arch/ia64/include/asm/*.h)\
+			$(wildcard $(srctree)/include/linux/*.h)
+	$(call if_changed_dep,cc_s_c)
+
+$(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s
+	$(call cmd,offsets)
+
+FORCE : $(obj)/$(offsets-file)
+
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
+asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
+
+common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
+		coalesced_mmio.o irq_comm.o assigned-dev.o)
+
+ifeq ($(CONFIG_IOMMU_API),y)
+common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
+endif
+
+kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
+obj-$(CONFIG_KVM) += kvm.o
+
+CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
+kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
+	vtlb.o process.o kvm_lib.o
+#Add link memcpy and memset to avoid possible structure assignment error
+kvm-intel-objs += memcpy.o memset.o
+obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
new file mode 100644
index 00000000..9324c875
--- /dev/null
+++ b/arch/ia64/kvm/asm-offsets.c
@@ -0,0 +1,241 @@
+/*
+ * asm-offsets.c Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ *
+ * Anthony Xu    <anthony.xu@intel.com>
+ * Xiantao Zhang <xiantao.zhang@intel.com>
+ * Copyright (c) 2007 Intel Corporation  KVM support.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/kbuild.h>
+
+#include "vcpu.h"
+
+void foo(void)
+{
+	DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
+	DEFINE(VMM_PT_REGS_SIZE, sizeof(struct kvm_pt_regs));
+
+	BLANK();
+
+	DEFINE(VMM_VCPU_META_RR0_OFFSET,
+			offsetof(struct kvm_vcpu, arch.metaphysical_rr0));
+	DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
+			offsetof(struct kvm_vcpu,
+				arch.metaphysical_saved_rr0));
+	DEFINE(VMM_VCPU_VRR0_OFFSET,
+			offsetof(struct kvm_vcpu, arch.vrr[0]));
+	DEFINE(VMM_VPD_IRR0_OFFSET,
+			offsetof(struct vpd, irr[0]));
+	DEFINE(VMM_VCPU_ITC_CHECK_OFFSET,
+			offsetof(struct kvm_vcpu, arch.itc_check));
+	DEFINE(VMM_VCPU_IRQ_CHECK_OFFSET,
+			offsetof(struct kvm_vcpu, arch.irq_check));
+	DEFINE(VMM_VPD_VHPI_OFFSET,
+			offsetof(struct vpd, vhpi));
+	DEFINE(VMM_VCPU_VSA_BASE_OFFSET,
+			offsetof(struct kvm_vcpu, arch.vsa_base));
+	DEFINE(VMM_VCPU_VPD_OFFSET,
+			offsetof(struct kvm_vcpu, arch.vpd));
+	DEFINE(VMM_VCPU_IRQ_CHECK,
+			offsetof(struct kvm_vcpu, arch.irq_check));
+	DEFINE(VMM_VCPU_TIMER_PENDING,
+			offsetof(struct kvm_vcpu, arch.timer_pending));
+	DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
+			offsetof(struct kvm_vcpu, arch.metaphysical_saved_rr0));
+	DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
+			offsetof(struct kvm_vcpu, arch.mode_flags));
+	DEFINE(VMM_VCPU_ITC_OFS_OFFSET,
+			offsetof(struct kvm_vcpu, arch.itc_offset));
+	DEFINE(VMM_VCPU_LAST_ITC_OFFSET,
+			offsetof(struct kvm_vcpu, arch.last_itc));
+	DEFINE(VMM_VCPU_SAVED_GP_OFFSET,
+			offsetof(struct kvm_vcpu, arch.saved_gp));
+
+	BLANK();
+
+	DEFINE(VMM_PT_REGS_B6_OFFSET,
+				offsetof(struct kvm_pt_regs, b6));
+	DEFINE(VMM_PT_REGS_B7_OFFSET,
+				offsetof(struct kvm_pt_regs, b7));
+	DEFINE(VMM_PT_REGS_AR_CSD_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_csd));
+	DEFINE(VMM_PT_REGS_AR_SSD_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_ssd));
+	DEFINE(VMM_PT_REGS_R8_OFFSET,
+				offsetof(struct kvm_pt_regs, r8));
+	DEFINE(VMM_PT_REGS_R9_OFFSET,
+				offsetof(struct kvm_pt_regs, r9));
+	DEFINE(VMM_PT_REGS_R10_OFFSET,
+				offsetof(struct kvm_pt_regs, r10));
+	DEFINE(VMM_PT_REGS_R11_OFFSET,
+				offsetof(struct kvm_pt_regs, r11));
+	DEFINE(VMM_PT_REGS_CR_IPSR_OFFSET,
+				offsetof(struct kvm_pt_regs, cr_ipsr));
+	DEFINE(VMM_PT_REGS_CR_IIP_OFFSET,
+				offsetof(struct kvm_pt_regs, cr_iip));
+	DEFINE(VMM_PT_REGS_CR_IFS_OFFSET,
+				offsetof(struct kvm_pt_regs, cr_ifs));
+	DEFINE(VMM_PT_REGS_AR_UNAT_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_unat));
+	DEFINE(VMM_PT_REGS_AR_PFS_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_pfs));
+	DEFINE(VMM_PT_REGS_AR_RSC_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_rsc));
+	DEFINE(VMM_PT_REGS_AR_RNAT_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_rnat));
+
+	DEFINE(VMM_PT_REGS_AR_BSPSTORE_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_bspstore));
+	DEFINE(VMM_PT_REGS_PR_OFFSET,
+				offsetof(struct kvm_pt_regs, pr));
+	DEFINE(VMM_PT_REGS_B0_OFFSET,
+				offsetof(struct kvm_pt_regs, b0));
+	DEFINE(VMM_PT_REGS_LOADRS_OFFSET,
+				offsetof(struct kvm_pt_regs, loadrs));
+	DEFINE(VMM_PT_REGS_R1_OFFSET,
+				offsetof(struct kvm_pt_regs, r1));
+	DEFINE(VMM_PT_REGS_R12_OFFSET,
+				offsetof(struct kvm_pt_regs, r12));
+	DEFINE(VMM_PT_REGS_R13_OFFSET,
+				offsetof(struct kvm_pt_regs, r13));
+	DEFINE(VMM_PT_REGS_AR_FPSR_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_fpsr));
+	DEFINE(VMM_PT_REGS_R15_OFFSET,
+				offsetof(struct kvm_pt_regs, r15));
+	DEFINE(VMM_PT_REGS_R14_OFFSET,
+				offsetof(struct kvm_pt_regs, r14));
+	DEFINE(VMM_PT_REGS_R2_OFFSET,
+				offsetof(struct kvm_pt_regs, r2));
+	DEFINE(VMM_PT_REGS_R3_OFFSET,
+				offsetof(struct kvm_pt_regs, r3));
+	DEFINE(VMM_PT_REGS_R16_OFFSET,
+				offsetof(struct kvm_pt_regs, r16));
+	DEFINE(VMM_PT_REGS_R17_OFFSET,
+				offsetof(struct kvm_pt_regs, r17));
+	DEFINE(VMM_PT_REGS_R18_OFFSET,
+				offsetof(struct kvm_pt_regs, r18));
+	DEFINE(VMM_PT_REGS_R19_OFFSET,
+				offsetof(struct kvm_pt_regs, r19));
+	DEFINE(VMM_PT_REGS_R20_OFFSET,
+				offsetof(struct kvm_pt_regs, r20));
+	DEFINE(VMM_PT_REGS_R21_OFFSET,
+				offsetof(struct kvm_pt_regs, r21));
+	DEFINE(VMM_PT_REGS_R22_OFFSET,
+				offsetof(struct kvm_pt_regs, r22));
+	DEFINE(VMM_PT_REGS_R23_OFFSET,
+				offsetof(struct kvm_pt_regs, r23));
+	DEFINE(VMM_PT_REGS_R24_OFFSET,
+				offsetof(struct kvm_pt_regs, r24));
+	DEFINE(VMM_PT_REGS_R25_OFFSET,
+				offsetof(struct kvm_pt_regs, r25));
+	DEFINE(VMM_PT_REGS_R26_OFFSET,
+				offsetof(struct kvm_pt_regs, r26));
+	DEFINE(VMM_PT_REGS_R27_OFFSET,
+				offsetof(struct kvm_pt_regs, r27));
+	DEFINE(VMM_PT_REGS_R28_OFFSET,
+				offsetof(struct kvm_pt_regs, r28));
+	DEFINE(VMM_PT_REGS_R29_OFFSET,
+				offsetof(struct kvm_pt_regs, r29));
+	DEFINE(VMM_PT_REGS_R30_OFFSET,
+				offsetof(struct kvm_pt_regs, r30));
+	DEFINE(VMM_PT_REGS_R31_OFFSET,
+				offsetof(struct kvm_pt_regs, r31));
+	DEFINE(VMM_PT_REGS_AR_CCV_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_ccv));
+	DEFINE(VMM_PT_REGS_F6_OFFSET,
+				offsetof(struct kvm_pt_regs, f6));
+	DEFINE(VMM_PT_REGS_F7_OFFSET,
+				offsetof(struct kvm_pt_regs, f7));
+	DEFINE(VMM_PT_REGS_F8_OFFSET,
+				offsetof(struct kvm_pt_regs, f8));
+	DEFINE(VMM_PT_REGS_F9_OFFSET,
+				offsetof(struct kvm_pt_regs, f9));
+	DEFINE(VMM_PT_REGS_F10_OFFSET,
+				offsetof(struct kvm_pt_regs, f10));
+	DEFINE(VMM_PT_REGS_F11_OFFSET,
+				offsetof(struct kvm_pt_regs, f11));
+	DEFINE(VMM_PT_REGS_R4_OFFSET,
+				offsetof(struct kvm_pt_regs, r4));
+	DEFINE(VMM_PT_REGS_R5_OFFSET,
+				offsetof(struct kvm_pt_regs, r5));
+	DEFINE(VMM_PT_REGS_R6_OFFSET,
+				offsetof(struct kvm_pt_regs, r6));
+	DEFINE(VMM_PT_REGS_R7_OFFSET,
+				offsetof(struct kvm_pt_regs, r7));
+	DEFINE(VMM_PT_REGS_EML_UNAT_OFFSET,
+				offsetof(struct kvm_pt_regs, eml_unat));
+	DEFINE(VMM_VCPU_IIPA_OFFSET,
+				offsetof(struct kvm_vcpu, arch.cr_iipa));
+	DEFINE(VMM_VCPU_OPCODE_OFFSET,
+				offsetof(struct kvm_vcpu, arch.opcode));
+	DEFINE(VMM_VCPU_CAUSE_OFFSET, offsetof(struct kvm_vcpu, arch.cause));
+	DEFINE(VMM_VCPU_ISR_OFFSET,
+				offsetof(struct kvm_vcpu, arch.cr_isr));
+	DEFINE(VMM_PT_REGS_R16_SLOT,
+				(((offsetof(struct kvm_pt_regs, r16)
+				- sizeof(struct kvm_pt_regs)) >> 3) & 0x3f));
+	DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
+				offsetof(struct kvm_vcpu, arch.mode_flags));
+	DEFINE(VMM_VCPU_GP_OFFSET, offsetof(struct kvm_vcpu, arch.__gp));
+	BLANK();
+
+	DEFINE(VMM_VPD_BASE_OFFSET, offsetof(struct kvm_vcpu, arch.vpd));
+	DEFINE(VMM_VPD_VIFS_OFFSET, offsetof(struct vpd, ifs));
+	DEFINE(VMM_VLSAPIC_INSVC_BASE_OFFSET,
+			offsetof(struct kvm_vcpu, arch.insvc[0]));
+	DEFINE(VMM_VPD_VPTA_OFFSET, offsetof(struct vpd, pta));
+	DEFINE(VMM_VPD_VPSR_OFFSET, offsetof(struct vpd, vpsr));
+
+	DEFINE(VMM_CTX_R4_OFFSET, offsetof(union context, gr[4]));
+	DEFINE(VMM_CTX_R5_OFFSET, offsetof(union context, gr[5]));
+	DEFINE(VMM_CTX_R12_OFFSET, offsetof(union context, gr[12]));
+	DEFINE(VMM_CTX_R13_OFFSET, offsetof(union context, gr[13]));
+	DEFINE(VMM_CTX_KR0_OFFSET, offsetof(union context, ar[0]));
+	DEFINE(VMM_CTX_KR1_OFFSET, offsetof(union context, ar[1]));
+	DEFINE(VMM_CTX_B0_OFFSET, offsetof(union context, br[0]));
+	DEFINE(VMM_CTX_B1_OFFSET, offsetof(union context, br[1]));
+	DEFINE(VMM_CTX_B2_OFFSET, offsetof(union context, br[2]));
+	DEFINE(VMM_CTX_RR0_OFFSET, offsetof(union context, rr[0]));
+	DEFINE(VMM_CTX_RSC_OFFSET, offsetof(union context, ar[16]));
+	DEFINE(VMM_CTX_BSPSTORE_OFFSET, offsetof(union context, ar[18]));
+	DEFINE(VMM_CTX_RNAT_OFFSET, offsetof(union context, ar[19]));
+	DEFINE(VMM_CTX_FCR_OFFSET, offsetof(union context, ar[21]));
+	DEFINE(VMM_CTX_EFLAG_OFFSET, offsetof(union context, ar[24]));
+	DEFINE(VMM_CTX_CFLG_OFFSET, offsetof(union context, ar[27]));
+	DEFINE(VMM_CTX_FSR_OFFSET, offsetof(union context, ar[28]));
+	DEFINE(VMM_CTX_FIR_OFFSET, offsetof(union context, ar[29]));
+	DEFINE(VMM_CTX_FDR_OFFSET, offsetof(union context, ar[30]));
+	DEFINE(VMM_CTX_UNAT_OFFSET, offsetof(union context, ar[36]));
+	DEFINE(VMM_CTX_FPSR_OFFSET, offsetof(union context, ar[40]));
+	DEFINE(VMM_CTX_PFS_OFFSET, offsetof(union context, ar[64]));
+	DEFINE(VMM_CTX_LC_OFFSET, offsetof(union context, ar[65]));
+	DEFINE(VMM_CTX_DCR_OFFSET, offsetof(union context, cr[0]));
+	DEFINE(VMM_CTX_IVA_OFFSET, offsetof(union context, cr[2]));
+	DEFINE(VMM_CTX_PTA_OFFSET, offsetof(union context, cr[8]));
+	DEFINE(VMM_CTX_IBR0_OFFSET, offsetof(union context, ibr[0]));
+	DEFINE(VMM_CTX_DBR0_OFFSET, offsetof(union context, dbr[0]));
+	DEFINE(VMM_CTX_F2_OFFSET, offsetof(union context, fr[2]));
+	DEFINE(VMM_CTX_F3_OFFSET, offsetof(union context, fr[3]));
+	DEFINE(VMM_CTX_F32_OFFSET, offsetof(union context, fr[32]));
+	DEFINE(VMM_CTX_F33_OFFSET, offsetof(union context, fr[33]));
+	DEFINE(VMM_CTX_PKR0_OFFSET, offsetof(union context, pkr[0]));
+	DEFINE(VMM_CTX_PSR_OFFSET, offsetof(union context, psr));
+	BLANK();
+}
diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h
new file mode 100644
index 00000000..c0785a72
--- /dev/null
+++ b/arch/ia64/kvm/irq.h
@@ -0,0 +1,33 @@
+/*
+ * irq.h: In-kernel interrupt controller related definitions
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Authors:
+ *   Xiantao Zhang <xiantao.zhang@intel.com>
+ *
+ */
+
+#ifndef __IRQ_H
+#define __IRQ_H
+
+#include "lapic.h"
+
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+	return 1;
+}
+
+#endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
new file mode 100644
index 00000000..8213efe1
--- /dev/null
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -0,0 +1,1975 @@
+/*
+ * kvm_ia64.c: Basic KVM suppport On Itanium series processors
+ *
+ *
+ * 	Copyright (C) 2007, Intel Corporation.
+ *  	Xiantao Zhang  (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/percpu.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/bitops.h>
+#include <linux/hrtimer.h>
+#include <linux/uaccess.h>
+#include <linux/iommu.h>
+#include <linux/intel-iommu.h>
+
+#include <asm/pgtable.h>
+#include <asm/gcc_intrin.h>
+#include <asm/pal.h>
+#include <asm/cacheflush.h>
+#include <asm/div64.h>
+#include <asm/tlb.h>
+#include <asm/elf.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/clksupport.h>
+#include <asm/sn/shub_mmr.h>
+
+#include "misc.h"
+#include "vti.h"
+#include "iodev.h"
+#include "ioapic.h"
+#include "lapic.h"
+#include "irq.h"
+
+static unsigned long kvm_vmm_base;
+static unsigned long kvm_vsa_base;
+static unsigned long kvm_vm_buffer;
+static unsigned long kvm_vm_buffer_size;
+unsigned long kvm_vmm_gp;
+
+static long vp_env_info;
+
+static struct kvm_vmm_info *kvm_vmm_info;
+
+static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu);
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+	{ NULL }
+};
+
+static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu)
+{
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+	if (vcpu->kvm->arch.is_sn2)
+		return rtc_time();
+	else
+#endif
+		return ia64_getreg(_IA64_REG_AR_ITC);
+}
+
+static void kvm_flush_icache(unsigned long start, unsigned long len)
+{
+	int l;
+
+	for (l = 0; l < (len + 32); l += 32)
+		ia64_fc((void *)(start + l));
+
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+static void kvm_flush_tlb_all(void)
+{
+	unsigned long i, j, count0, count1, stride0, stride1, addr;
+	long flags;
+
+	addr    = local_cpu_data->ptce_base;
+	count0  = local_cpu_data->ptce_count[0];
+	count1  = local_cpu_data->ptce_count[1];
+	stride0 = local_cpu_data->ptce_stride[0];
+	stride1 = local_cpu_data->ptce_stride[1];
+
+	local_irq_save(flags);
+	for (i = 0; i < count0; ++i) {
+		for (j = 0; j < count1; ++j) {
+			ia64_ptce(addr);
+			addr += stride1;
+		}
+		addr += stride0;
+	}
+	local_irq_restore(flags);
+	ia64_srlz_i();			/* srlz.i implies srlz.d */
+}
+
+long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva,
+			(u64)opt_handler);
+
+	return iprv.status;
+}
+
+static  DEFINE_SPINLOCK(vp_lock);
+
+int kvm_arch_hardware_enable(void *garbage)
+{
+	long  status;
+	long  tmp_base;
+	unsigned long pte;
+	unsigned long saved_psr;
+	int slot;
+
+	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
+	local_irq_save(saved_psr);
+	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
+	local_irq_restore(saved_psr);
+	if (slot < 0)
+		return -EINVAL;
+
+	spin_lock(&vp_lock);
+	status = ia64_pal_vp_init_env(kvm_vsa_base ?
+				VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
+			__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
+	if (status != 0) {
+		spin_unlock(&vp_lock);
+		printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
+		return -EINVAL;
+	}
+
+	if (!kvm_vsa_base) {
+		kvm_vsa_base = tmp_base;
+		printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base);
+	}
+	spin_unlock(&vp_lock);
+	ia64_ptr_entry(0x3, slot);
+
+	return 0;
+}
+
+void kvm_arch_hardware_disable(void *garbage)
+{
+
+	long status;
+	int slot;
+	unsigned long pte;
+	unsigned long saved_psr;
+	unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA);
+
+	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
+				PAGE_KERNEL));
+
+	local_irq_save(saved_psr);
+	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
+	local_irq_restore(saved_psr);
+	if (slot < 0)
+		return;
+
+	status = ia64_pal_vp_exit_env(host_iva);
+	if (status)
+		printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n",
+				status);
+	ia64_ptr_entry(0x3, slot);
+}
+
+void kvm_arch_check_processor_compat(void *rtn)
+{
+	*(int *)rtn = 0;
+}
+
+int kvm_dev_ioctl_check_extension(long ext)
+{
+
+	int r;
+
+	switch (ext) {
+	case KVM_CAP_IRQCHIP:
+	case KVM_CAP_MP_STATE:
+	case KVM_CAP_IRQ_INJECT_STATUS:
+		r = 1;
+		break;
+	case KVM_CAP_COALESCED_MMIO:
+		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
+		break;
+	case KVM_CAP_IOMMU:
+		r = iommu_found();
+		break;
+	default:
+		r = 0;
+	}
+	return r;
+
+}
+
+static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+	kvm_run->hw.hardware_exit_reason = 1;
+	return 0;
+}
+
+static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	struct kvm_mmio_req *p;
+	struct kvm_io_device *mmio_dev;
+	int r;
+
+	p = kvm_get_vcpu_ioreq(vcpu);
+
+	if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS)
+		goto mmio;
+	vcpu->mmio_needed = 1;
+	vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr;
+	vcpu->mmio_size = kvm_run->mmio.len = p->size;
+	vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir;
+
+	if (vcpu->mmio_is_write)
+		memcpy(vcpu->mmio_data, &p->data, p->size);
+	memcpy(kvm_run->mmio.data, &p->data, p->size);
+	kvm_run->exit_reason = KVM_EXIT_MMIO;
+	return 0;
+mmio:
+	if (p->dir)
+		r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr,
+				    p->size, &p->data);
+	else
+		r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr,
+				     p->size, &p->data);
+	if (r)
+		printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
+	p->state = STATE_IORESP_READY;
+
+	return 1;
+}
+
+static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+
+	if (p->exit_reason == EXIT_REASON_PAL_CALL)
+		return kvm_pal_emul(vcpu, kvm_run);
+	else {
+		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+		kvm_run->hw.hardware_exit_reason = 2;
+		return 0;
+	}
+}
+
+static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+
+	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
+		kvm_sal_emul(vcpu);
+		return 1;
+	} else {
+		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+		kvm_run->hw.hardware_exit_reason = 3;
+		return 0;
+	}
+
+}
+
+static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector)
+{
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+	if (!test_and_set_bit(vector, &vpd->irr[0])) {
+		vcpu->arch.irq_new_pending = 1;
+		kvm_vcpu_kick(vcpu);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ *  offset: address offset to IPI space.
+ *  value:  deliver value.
+ */
+static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
+				uint64_t vector)
+{
+	switch (dm) {
+	case SAPIC_FIXED:
+		break;
+	case SAPIC_NMI:
+		vector = 2;
+		break;
+	case SAPIC_EXTINT:
+		vector = 0;
+		break;
+	case SAPIC_INIT:
+	case SAPIC_PMI:
+	default:
+		printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
+		return;
+	}
+	__apic_accept_irq(vcpu, vector);
+}
+
+static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
+			unsigned long eid)
+{
+	union ia64_lid lid;
+	int i;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		lid.val = VCPU_LID(vcpu);
+		if (lid.id == id && lid.eid == eid)
+			return vcpu;
+	}
+
+	return NULL;
+}
+
+static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
+	struct kvm_vcpu *target_vcpu;
+	struct kvm_pt_regs *regs;
+	union ia64_ipi_a addr = p->u.ipi_data.addr;
+	union ia64_ipi_d data = p->u.ipi_data.data;
+
+	target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid);
+	if (!target_vcpu)
+		return handle_vm_error(vcpu, kvm_run);
+
+	if (!target_vcpu->arch.launched) {
+		regs = vcpu_regs(target_vcpu);
+
+		regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip;
+		regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp;
+
+		target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+		if (waitqueue_active(&target_vcpu->wq))
+			wake_up_interruptible(&target_vcpu->wq);
+	} else {
+		vcpu_deliver_ipi(target_vcpu, data.dm, data.vector);
+		if (target_vcpu != vcpu)
+			kvm_vcpu_kick(target_vcpu);
+	}
+
+	return 1;
+}
+
+struct call_data {
+	struct kvm_ptc_g ptc_g_data;
+	struct kvm_vcpu *vcpu;
+};
+
+static void vcpu_global_purge(void *info)
+{
+	struct call_data *p = (struct call_data *)info;
+	struct kvm_vcpu *vcpu = p->vcpu;
+
+	if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
+		return;
+
+	set_bit(KVM_REQ_PTC_G, &vcpu->requests);
+	if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) {
+		vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] =
+							p->ptc_g_data;
+	} else {
+		clear_bit(KVM_REQ_PTC_G, &vcpu->requests);
+		vcpu->arch.ptc_g_count = 0;
+		set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
+	}
+}
+
+static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
+	struct kvm *kvm = vcpu->kvm;
+	struct call_data call_data;
+	int i;
+	struct kvm_vcpu *vcpui;
+
+	call_data.ptc_g_data = p->u.ptc_g_data;
+
+	kvm_for_each_vcpu(i, vcpui, kvm) {
+		if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED ||
+				vcpu == vcpui)
+			continue;
+
+		if (waitqueue_active(&vcpui->wq))
+			wake_up_interruptible(&vcpui->wq);
+
+		if (vcpui->cpu != -1) {
+			call_data.vcpu = vcpui;
+			smp_call_function_single(vcpui->cpu,
+					vcpu_global_purge, &call_data, 1);
+		} else
+			printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
+
+	}
+	return 1;
+}
+
+static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	return 1;
+}
+
+static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu)
+{
+	unsigned long pte, rtc_phys_addr, map_addr;
+	int slot;
+
+	map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT);
+	rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC;
+	pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC));
+	slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT);
+	vcpu->arch.sn_rtc_tr_slot = slot;
+	if (slot < 0) {
+		printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n");
+		slot = 0;
+	}
+	return slot;
+}
+
+int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+{
+
+	ktime_t kt;
+	long itc_diff;
+	unsigned long vcpu_now_itc;
+	unsigned long expires;
+	struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
+	unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec;
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+	if (irqchip_in_kernel(vcpu->kvm)) {
+
+		vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset;
+
+		if (time_after(vcpu_now_itc, vpd->itm)) {
+			vcpu->arch.timer_check = 1;
+			return 1;
+		}
+		itc_diff = vpd->itm - vcpu_now_itc;
+		if (itc_diff < 0)
+			itc_diff = -itc_diff;
+
+		expires = div64_u64(itc_diff, cyc_per_usec);
+		kt = ktime_set(0, 1000 * expires);
+
+		vcpu->arch.ht_active = 1;
+		hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
+
+		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+		kvm_vcpu_block(vcpu);
+		hrtimer_cancel(p_ht);
+		vcpu->arch.ht_active = 0;
+
+		if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) ||
+				kvm_cpu_has_pending_timer(vcpu))
+			if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
+				vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+
+		if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
+			return -EINTR;
+		return 1;
+	} else {
+		printk(KERN_ERR"kvm: Unsupported userspace halt!");
+		return 0;
+	}
+}
+
+static int handle_vm_shutdown(struct kvm_vcpu *vcpu,
+		struct kvm_run *kvm_run)
+{
+	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
+	return 0;
+}
+
+static int handle_external_interrupt(struct kvm_vcpu *vcpu,
+		struct kvm_run *kvm_run)
+{
+	return 1;
+}
+
+static int handle_vcpu_debug(struct kvm_vcpu *vcpu,
+				struct kvm_run *kvm_run)
+{
+	printk("VMM: %s", vcpu->arch.log_buf);
+	return 1;
+}
+
+static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
+		struct kvm_run *kvm_run) = {
+	[EXIT_REASON_VM_PANIC]              = handle_vm_error,
+	[EXIT_REASON_MMIO_INSTRUCTION]      = handle_mmio,
+	[EXIT_REASON_PAL_CALL]              = handle_pal_call,
+	[EXIT_REASON_SAL_CALL]              = handle_sal_call,
+	[EXIT_REASON_SWITCH_RR6]            = handle_switch_rr6,
+	[EXIT_REASON_VM_DESTROY]            = handle_vm_shutdown,
+	[EXIT_REASON_EXTERNAL_INTERRUPT]    = handle_external_interrupt,
+	[EXIT_REASON_IPI]		    = handle_ipi,
+	[EXIT_REASON_PTC_G]		    = handle_global_purge,
+	[EXIT_REASON_DEBUG]		    = handle_vcpu_debug,
+
+};
+
+static const int kvm_vti_max_exit_handlers =
+		sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers);
+
+static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p_exit_data;
+
+	p_exit_data = kvm_get_exit_data(vcpu);
+	return p_exit_data->exit_reason;
+}
+
+/*
+ * The guest has exited.  See if we can fix it or if we need userspace
+ * assistance.
+ */
+static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+	u32 exit_reason = kvm_get_exit_reason(vcpu);
+	vcpu->arch.last_exit = exit_reason;
+
+	if (exit_reason < kvm_vti_max_exit_handlers
+			&& kvm_vti_exit_handlers[exit_reason])
+		return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run);
+	else {
+		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+		kvm_run->hw.hardware_exit_reason = exit_reason;
+	}
+	return 0;
+}
+
+static inline void vti_set_rr6(unsigned long rr6)
+{
+	ia64_set_rr(RR6, rr6);
+	ia64_srlz_i();
+}
+
+static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu)
+{
+	unsigned long pte;
+	struct kvm *kvm = vcpu->kvm;
+	int r;
+
+	/*Insert a pair of tr to map vmm*/
+	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
+	r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
+	if (r < 0)
+		goto out;
+	vcpu->arch.vmm_tr_slot = r;
+	/*Insert a pairt of tr to map data of vm*/
+	pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL));
+	r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE,
+					pte, KVM_VM_DATA_SHIFT);
+	if (r < 0)
+		goto out;
+	vcpu->arch.vm_tr_slot = r;
+
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+	if (kvm->arch.is_sn2) {
+		r = kvm_sn2_setup_mappings(vcpu);
+		if (r < 0)
+			goto out;
+	}
+#endif
+
+	r = 0;
+out:
+	return r;
+}
+
+static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
+	ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+	if (kvm->arch.is_sn2)
+		ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot);
+#endif
+}
+
+static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
+{
+	unsigned long psr;
+	int r;
+	int cpu = smp_processor_id();
+
+	if (vcpu->arch.last_run_cpu != cpu ||
+			per_cpu(last_vcpu, cpu) != vcpu) {
+		per_cpu(last_vcpu, cpu) = vcpu;
+		vcpu->arch.last_run_cpu = cpu;
+		kvm_flush_tlb_all();
+	}
+
+	vcpu->arch.host_rr6 = ia64_get_rr(RR6);
+	vti_set_rr6(vcpu->arch.vmm_rr);
+	local_irq_save(psr);
+	r = kvm_insert_vmm_mapping(vcpu);
+	local_irq_restore(psr);
+	return r;
+}
+
+static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
+{
+	kvm_purge_vmm_mapping(vcpu);
+	vti_set_rr6(vcpu->arch.host_rr6);
+}
+
+static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	union context *host_ctx, *guest_ctx;
+	int r, idx;
+
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+again:
+	if (signal_pending(current)) {
+		r = -EINTR;
+		kvm_run->exit_reason = KVM_EXIT_INTR;
+		goto out;
+	}
+
+	preempt_disable();
+	local_irq_disable();
+
+	/*Get host and guest context with guest address space.*/
+	host_ctx = kvm_get_host_context(vcpu);
+	guest_ctx = kvm_get_guest_context(vcpu);
+
+	clear_bit(KVM_REQ_KICK, &vcpu->requests);
+
+	r = kvm_vcpu_pre_transition(vcpu);
+	if (r < 0)
+		goto vcpu_run_fail;
+
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+	vcpu->mode = IN_GUEST_MODE;
+	kvm_guest_enter();
+
+	/*
+	 * Transition to the guest
+	 */
+	kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
+
+	kvm_vcpu_post_transition(vcpu);
+
+	vcpu->arch.launched = 1;
+	set_bit(KVM_REQ_KICK, &vcpu->requests);
+	local_irq_enable();
+
+	/*
+	 * We must have an instruction between local_irq_enable() and
+	 * kvm_guest_exit(), so the timer interrupt isn't delayed by
+	 * the interrupt shadow.  The stat.exits increment will do nicely.
+	 * But we need to prevent reordering, hence this barrier():
+	 */
+	barrier();
+	kvm_guest_exit();
+	vcpu->mode = OUTSIDE_GUEST_MODE;
+	preempt_enable();
+
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+	r = kvm_handle_exit(kvm_run, vcpu);
+
+	if (r > 0) {
+		if (!need_resched())
+			goto again;
+	}
+
+out:
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+	if (r > 0) {
+		kvm_resched(vcpu);
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
+		goto again;
+	}
+
+	return r;
+
+vcpu_run_fail:
+	local_irq_enable();
+	preempt_enable();
+	kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+	goto out;
+}
+
+static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
+{
+	struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu);
+
+	if (!vcpu->mmio_is_write)
+		memcpy(&p->data, vcpu->mmio_data, 8);
+	p->state = STATE_IORESP_READY;
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	int r;
+	sigset_t sigsaved;
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
+	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
+		kvm_vcpu_block(vcpu);
+		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+		r = -EAGAIN;
+		goto out;
+	}
+
+	if (vcpu->mmio_needed) {
+		memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
+		kvm_set_mmio_data(vcpu);
+		vcpu->mmio_read_completed = 1;
+		vcpu->mmio_needed = 0;
+	}
+	r = __vcpu_run(vcpu, kvm_run);
+out:
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	return r;
+}
+
+struct kvm *kvm_arch_alloc_vm(void)
+{
+
+	struct kvm *kvm;
+	uint64_t  vm_base;
+
+	BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE);
+
+	vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
+
+	if (!vm_base)
+		return NULL;
+
+	memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
+	kvm = (struct kvm *)(vm_base +
+			offsetof(struct kvm_vm_data, kvm_vm_struct));
+	kvm->arch.vm_base = vm_base;
+	printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base);
+
+	return kvm;
+}
+
+struct kvm_io_range {
+	unsigned long start;
+	unsigned long size;
+	unsigned long type;
+};
+
+static const struct kvm_io_range io_ranges[] = {
+	{VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER},
+	{MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO},
+	{LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO},
+	{IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC},
+	{PIB_START, PIB_SIZE, GPFN_PIB},
+};
+
+static void kvm_build_io_pmt(struct kvm *kvm)
+{
+	unsigned long i, j;
+
+	/* Mark I/O ranges */
+	for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range));
+							i++) {
+		for (j = io_ranges[i].start;
+				j < io_ranges[i].start + io_ranges[i].size;
+				j += PAGE_SIZE)
+			kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT,
+					io_ranges[i].type, 0);
+	}
+
+}
+
+/*Use unused rids to virtualize guest rid.*/
+#define GUEST_PHYSICAL_RR0	0x1739
+#define GUEST_PHYSICAL_RR4	0x2739
+#define VMM_INIT_RR		0x1660
+
+int kvm_arch_init_vm(struct kvm *kvm)
+{
+	BUG_ON(!kvm);
+
+	kvm->arch.is_sn2 = ia64_platform_is("sn2");
+
+	kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
+	kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
+	kvm->arch.vmm_init_rr = VMM_INIT_RR;
+
+	/*
+	 *Fill P2M entries for MMIO/IO ranges
+	 */
+	kvm_build_io_pmt(kvm);
+
+	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
+
+	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
+	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
+
+	return 0;
+}
+
+static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
+					struct kvm_irqchip *chip)
+{
+	int r;
+
+	r = 0;
+	switch (chip->chip_id) {
+	case KVM_IRQCHIP_IOAPIC:
+		r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+	return r;
+}
+
+static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
+{
+	int r;
+
+	r = 0;
+	switch (chip->chip_id) {
+	case KVM_IRQCHIP_IOAPIC:
+		r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+	return r;
+}
+
+#define RESTORE_REGS(_x) vcpu->arch._x = regs->_x
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+	int i;
+
+	for (i = 0; i < 16; i++) {
+		vpd->vgr[i] = regs->vpd.vgr[i];
+		vpd->vbgr[i] = regs->vpd.vbgr[i];
+	}
+	for (i = 0; i < 128; i++)
+		vpd->vcr[i] = regs->vpd.vcr[i];
+	vpd->vhpi = regs->vpd.vhpi;
+	vpd->vnat = regs->vpd.vnat;
+	vpd->vbnat = regs->vpd.vbnat;
+	vpd->vpsr = regs->vpd.vpsr;
+
+	vpd->vpr = regs->vpd.vpr;
+
+	memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context));
+
+	RESTORE_REGS(mp_state);
+	RESTORE_REGS(vmm_rr);
+	memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS);
+	memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS);
+	RESTORE_REGS(itr_regions);
+	RESTORE_REGS(dtr_regions);
+	RESTORE_REGS(tc_regions);
+	RESTORE_REGS(irq_check);
+	RESTORE_REGS(itc_check);
+	RESTORE_REGS(timer_check);
+	RESTORE_REGS(timer_pending);
+	RESTORE_REGS(last_itc);
+	for (i = 0; i < 8; i++) {
+		vcpu->arch.vrr[i] = regs->vrr[i];
+		vcpu->arch.ibr[i] = regs->ibr[i];
+		vcpu->arch.dbr[i] = regs->dbr[i];
+	}
+	for (i = 0; i < 4; i++)
+		vcpu->arch.insvc[i] = regs->insvc[i];
+	RESTORE_REGS(xtp);
+	RESTORE_REGS(metaphysical_rr0);
+	RESTORE_REGS(metaphysical_rr4);
+	RESTORE_REGS(metaphysical_saved_rr0);
+	RESTORE_REGS(metaphysical_saved_rr4);
+	RESTORE_REGS(fp_psr);
+	RESTORE_REGS(saved_gp);
+
+	vcpu->arch.irq_new_pending = 1;
+	vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu);
+	set_bit(KVM_REQ_RESUME, &vcpu->requests);
+
+	return 0;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+		unsigned int ioctl, unsigned long arg)
+{
+	struct kvm *kvm = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	int r = -ENOTTY;
+
+	switch (ioctl) {
+	case KVM_SET_MEMORY_REGION: {
+		struct kvm_memory_region kvm_mem;
+		struct kvm_userspace_memory_region kvm_userspace_mem;
+
+		r = -EFAULT;
+		if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
+			goto out;
+		kvm_userspace_mem.slot = kvm_mem.slot;
+		kvm_userspace_mem.flags = kvm_mem.flags;
+		kvm_userspace_mem.guest_phys_addr =
+					kvm_mem.guest_phys_addr;
+		kvm_userspace_mem.memory_size = kvm_mem.memory_size;
+		r = kvm_vm_ioctl_set_memory_region(kvm,
+					&kvm_userspace_mem, 0);
+		if (r)
+			goto out;
+		break;
+		}
+	case KVM_CREATE_IRQCHIP:
+		r = -EFAULT;
+		r = kvm_ioapic_init(kvm);
+		if (r)
+			goto out;
+		r = kvm_setup_default_irq_routing(kvm);
+		if (r) {
+			mutex_lock(&kvm->slots_lock);
+			kvm_ioapic_destroy(kvm);
+			mutex_unlock(&kvm->slots_lock);
+			goto out;
+		}
+		break;
+	case KVM_IRQ_LINE_STATUS:
+	case KVM_IRQ_LINE: {
+		struct kvm_irq_level irq_event;
+
+		r = -EFAULT;
+		if (copy_from_user(&irq_event, argp, sizeof irq_event))
+			goto out;
+		r = -ENXIO;
+		if (irqchip_in_kernel(kvm)) {
+			__s32 status;
+			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+				    irq_event.irq, irq_event.level);
+			if (ioctl == KVM_IRQ_LINE_STATUS) {
+				r = -EFAULT;
+				irq_event.status = status;
+				if (copy_to_user(argp, &irq_event,
+							sizeof irq_event))
+					goto out;
+			}
+			r = 0;
+		}
+		break;
+		}
+	case KVM_GET_IRQCHIP: {
+		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
+		struct kvm_irqchip chip;
+
+		r = -EFAULT;
+		if (copy_from_user(&chip, argp, sizeof chip))
+				goto out;
+		r = -ENXIO;
+		if (!irqchip_in_kernel(kvm))
+			goto out;
+		r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
+		if (r)
+			goto out;
+		r = -EFAULT;
+		if (copy_to_user(argp, &chip, sizeof chip))
+				goto out;
+		r = 0;
+		break;
+		}
+	case KVM_SET_IRQCHIP: {
+		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
+		struct kvm_irqchip chip;
+
+		r = -EFAULT;
+		if (copy_from_user(&chip, argp, sizeof chip))
+				goto out;
+		r = -ENXIO;
+		if (!irqchip_in_kernel(kvm))
+			goto out;
+		r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+		}
+	default:
+		;
+	}
+out:
+	return r;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+		struct kvm_sregs *sregs)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+		struct kvm_sregs *sregs)
+{
+	return -EINVAL;
+
+}
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+		struct kvm_translation *tr)
+{
+
+	return -EINVAL;
+}
+
+static int kvm_alloc_vmm_area(void)
+{
+	if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) {
+		kvm_vmm_base = __get_free_pages(GFP_KERNEL,
+				get_order(KVM_VMM_SIZE));
+		if (!kvm_vmm_base)
+			return -ENOMEM;
+
+		memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
+		kvm_vm_buffer = kvm_vmm_base + VMM_SIZE;
+
+		printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n",
+				kvm_vmm_base, kvm_vm_buffer);
+	}
+
+	return 0;
+}
+
+static void kvm_free_vmm_area(void)
+{
+	if (kvm_vmm_base) {
+		/*Zero this area before free to avoid bits leak!!*/
+		memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
+		free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE));
+		kvm_vmm_base  = 0;
+		kvm_vm_buffer = 0;
+		kvm_vsa_base = 0;
+	}
+}
+
+static int vti_init_vpd(struct kvm_vcpu *vcpu)
+{
+	int i;
+	union cpuid3_t cpuid3;
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+	if (IS_ERR(vpd))
+		return PTR_ERR(vpd);
+
+	/* CPUID init */
+	for (i = 0; i < 5; i++)
+		vpd->vcpuid[i] = ia64_get_cpuid(i);
+
+	/* Limit the CPUID number to 5 */
+	cpuid3.value = vpd->vcpuid[3];
+	cpuid3.number = 4;	/* 5 - 1 */
+	vpd->vcpuid[3] = cpuid3.value;
+
+	/*Set vac and vdc fields*/
+	vpd->vac.a_from_int_cr = 1;
+	vpd->vac.a_to_int_cr = 1;
+	vpd->vac.a_from_psr = 1;
+	vpd->vac.a_from_cpuid = 1;
+	vpd->vac.a_cover = 1;
+	vpd->vac.a_bsw = 1;
+	vpd->vac.a_int = 1;
+	vpd->vdc.d_vmsw = 1;
+
+	/*Set virtual buffer*/
+	vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE;
+
+	return 0;
+}
+
+static int vti_create_vp(struct kvm_vcpu *vcpu)
+{
+	long ret;
+	struct vpd *vpd = vcpu->arch.vpd;
+	unsigned long  vmm_ivt;
+
+	vmm_ivt = kvm_vmm_info->vmm_ivt;
+
+	printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt);
+
+	ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0);
+
+	if (ret) {
+		printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void init_ptce_info(struct kvm_vcpu *vcpu)
+{
+	ia64_ptce_info_t ptce = {0};
+
+	ia64_get_ptce(&ptce);
+	vcpu->arch.ptce_base = ptce.base;
+	vcpu->arch.ptce_count[0] = ptce.count[0];
+	vcpu->arch.ptce_count[1] = ptce.count[1];
+	vcpu->arch.ptce_stride[0] = ptce.stride[0];
+	vcpu->arch.ptce_stride[1] = ptce.stride[1];
+}
+
+static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu)
+{
+	struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
+
+	if (hrtimer_cancel(p_ht))
+		hrtimer_start_expires(p_ht, HRTIMER_MODE_ABS);
+}
+
+static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data)
+{
+	struct kvm_vcpu *vcpu;
+	wait_queue_head_t *q;
+
+	vcpu  = container_of(data, struct kvm_vcpu, arch.hlt_timer);
+	q = &vcpu->wq;
+
+	if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
+		goto out;
+
+	if (waitqueue_active(q))
+		wake_up_interruptible(q);
+
+out:
+	vcpu->arch.timer_fired = 1;
+	vcpu->arch.timer_check = 1;
+	return HRTIMER_NORESTART;
+}
+
+#define PALE_RESET_ENTRY    0x80000000ffffffb0UL
+
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu *v;
+	int r;
+	int i;
+	long itc_offset;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	union context *p_ctx = &vcpu->arch.guest;
+	struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu);
+
+	/*Init vcpu context for first run.*/
+	if (IS_ERR(vmm_vcpu))
+		return PTR_ERR(vmm_vcpu);
+
+	if (kvm_vcpu_is_bsp(vcpu)) {
+		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+
+		/*Set entry address for first run.*/
+		regs->cr_iip = PALE_RESET_ENTRY;
+
+		/*Initialize itc offset for vcpus*/
+		itc_offset = 0UL - kvm_get_itc(vcpu);
+		for (i = 0; i < KVM_MAX_VCPUS; i++) {
+			v = (struct kvm_vcpu *)((char *)vcpu +
+					sizeof(struct kvm_vcpu_data) * i);
+			v->arch.itc_offset = itc_offset;
+			v->arch.last_itc = 0;
+		}
+	} else
+		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
+
+	r = -ENOMEM;
+	vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL);
+	if (!vcpu->arch.apic)
+		goto out;
+	vcpu->arch.apic->vcpu = vcpu;
+
+	p_ctx->gr[1] = 0;
+	p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET);
+	p_ctx->gr[13] = (unsigned long)vmm_vcpu;
+	p_ctx->psr = 0x1008522000UL;
+	p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
+	p_ctx->caller_unat = 0;
+	p_ctx->pr = 0x0;
+	p_ctx->ar[36] = 0x0; /*unat*/
+	p_ctx->ar[19] = 0x0; /*rnat*/
+	p_ctx->ar[18] = (unsigned long)vmm_vcpu +
+				((sizeof(struct kvm_vcpu)+15) & ~15);
+	p_ctx->ar[64] = 0x0; /*pfs*/
+	p_ctx->cr[0] = 0x7e04UL;
+	p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt;
+	p_ctx->cr[8] = 0x3c;
+
+	/*Initialize region register*/
+	p_ctx->rr[0] = 0x30;
+	p_ctx->rr[1] = 0x30;
+	p_ctx->rr[2] = 0x30;
+	p_ctx->rr[3] = 0x30;
+	p_ctx->rr[4] = 0x30;
+	p_ctx->rr[5] = 0x30;
+	p_ctx->rr[7] = 0x30;
+
+	/*Initialize branch register 0*/
+	p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry;
+
+	vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr;
+	vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0;
+	vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4;
+
+	hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	vcpu->arch.hlt_timer.function = hlt_timer_fn;
+
+	vcpu->arch.last_run_cpu = -1;
+	vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id);
+	vcpu->arch.vsa_base = kvm_vsa_base;
+	vcpu->arch.__gp = kvm_vmm_gp;
+	vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
+	vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id);
+	vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id);
+	init_ptce_info(vcpu);
+
+	r = 0;
+out:
+	return r;
+}
+
+static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
+{
+	unsigned long psr;
+	int r;
+
+	local_irq_save(psr);
+	r = kvm_insert_vmm_mapping(vcpu);
+	local_irq_restore(psr);
+	if (r)
+		goto fail;
+	r = kvm_vcpu_init(vcpu, vcpu->kvm, id);
+	if (r)
+		goto fail;
+
+	r = vti_init_vpd(vcpu);
+	if (r) {
+		printk(KERN_DEBUG"kvm: vpd init error!!\n");
+		goto uninit;
+	}
+
+	r = vti_create_vp(vcpu);
+	if (r)
+		goto uninit;
+
+	kvm_purge_vmm_mapping(vcpu);
+
+	return 0;
+uninit:
+	kvm_vcpu_uninit(vcpu);
+fail:
+	return r;
+}
+
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
+		unsigned int id)
+{
+	struct kvm_vcpu *vcpu;
+	unsigned long vm_base = kvm->arch.vm_base;
+	int r;
+	int cpu;
+
+	BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
+
+	r = -EINVAL;
+	if (id >= KVM_MAX_VCPUS) {
+		printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
+				KVM_MAX_VCPUS);
+		goto fail;
+	}
+
+	r = -ENOMEM;
+	if (!vm_base) {
+		printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
+		goto fail;
+	}
+	vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data,
+					vcpu_data[id].vcpu_struct));
+	vcpu->kvm = kvm;
+
+	cpu = get_cpu();
+	r = vti_vcpu_setup(vcpu, id);
+	put_cpu();
+
+	if (r) {
+		printk(KERN_DEBUG"kvm: vcpu_setup error!!\n");
+		goto fail;
+	}
+
+	return vcpu;
+fail:
+	return ERR_PTR(r);
+}
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+					struct kvm_guest_debug *dbg)
+{
+	return -EINVAL;
+}
+
+void kvm_arch_free_vm(struct kvm *kvm)
+{
+	unsigned long vm_base = kvm->arch.vm_base;
+
+	if (vm_base) {
+		memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
+		free_pages(vm_base, get_order(KVM_VM_DATA_SIZE));
+	}
+
+}
+
+static void kvm_release_vm_pages(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int i, j;
+	unsigned long base_gfn;
+
+	slots = kvm_memslots(kvm);
+	for (i = 0; i < slots->nmemslots; i++) {
+		memslot = &slots->memslots[i];
+		base_gfn = memslot->base_gfn;
+
+		for (j = 0; j < memslot->npages; j++) {
+			if (memslot->rmap[j])
+				put_page((struct page *)memslot->rmap[j]);
+		}
+	}
+}
+
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+	kvm_iommu_unmap_guest(kvm);
+#ifdef  KVM_CAP_DEVICE_ASSIGNMENT
+	kvm_free_all_assigned_devices(kvm);
+#endif
+	kfree(kvm->arch.vioapic);
+	kvm_release_vm_pages(kvm);
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	if (cpu != vcpu->cpu) {
+		vcpu->cpu = cpu;
+		if (vcpu->arch.ht_active)
+			kvm_migrate_hlt_timer(vcpu);
+	}
+}
+
+#define SAVE_REGS(_x) 	regs->_x = vcpu->arch._x
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+	int i;
+
+	vcpu_load(vcpu);
+
+	for (i = 0; i < 16; i++) {
+		regs->vpd.vgr[i] = vpd->vgr[i];
+		regs->vpd.vbgr[i] = vpd->vbgr[i];
+	}
+	for (i = 0; i < 128; i++)
+		regs->vpd.vcr[i] = vpd->vcr[i];
+	regs->vpd.vhpi = vpd->vhpi;
+	regs->vpd.vnat = vpd->vnat;
+	regs->vpd.vbnat = vpd->vbnat;
+	regs->vpd.vpsr = vpd->vpsr;
+	regs->vpd.vpr = vpd->vpr;
+
+	memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context));
+
+	SAVE_REGS(mp_state);
+	SAVE_REGS(vmm_rr);
+	memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
+	memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS);
+	SAVE_REGS(itr_regions);
+	SAVE_REGS(dtr_regions);
+	SAVE_REGS(tc_regions);
+	SAVE_REGS(irq_check);
+	SAVE_REGS(itc_check);
+	SAVE_REGS(timer_check);
+	SAVE_REGS(timer_pending);
+	SAVE_REGS(last_itc);
+	for (i = 0; i < 8; i++) {
+		regs->vrr[i] = vcpu->arch.vrr[i];
+		regs->ibr[i] = vcpu->arch.ibr[i];
+		regs->dbr[i] = vcpu->arch.dbr[i];
+	}
+	for (i = 0; i < 4; i++)
+		regs->insvc[i] = vcpu->arch.insvc[i];
+	regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu);
+	SAVE_REGS(xtp);
+	SAVE_REGS(metaphysical_rr0);
+	SAVE_REGS(metaphysical_rr4);
+	SAVE_REGS(metaphysical_saved_rr0);
+	SAVE_REGS(metaphysical_saved_rr4);
+	SAVE_REGS(fp_psr);
+	SAVE_REGS(saved_gp);
+
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu,
+				  struct kvm_ia64_vcpu_stack *stack)
+{
+	memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack));
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu,
+				  struct kvm_ia64_vcpu_stack *stack)
+{
+	memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu),
+	       sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu));
+
+	vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data;
+	return 0;
+}
+
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+
+	hrtimer_cancel(&vcpu->arch.hlt_timer);
+	kfree(vcpu->arch.apic);
+}
+
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+			 unsigned int ioctl, unsigned long arg)
+{
+	struct kvm_vcpu *vcpu = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	struct kvm_ia64_vcpu_stack *stack = NULL;
+	long r;
+
+	switch (ioctl) {
+	case KVM_IA64_VCPU_GET_STACK: {
+		struct kvm_ia64_vcpu_stack __user *user_stack;
+	        void __user *first_p = argp;
+
+		r = -EFAULT;
+		if (copy_from_user(&user_stack, first_p, sizeof(void *)))
+			goto out;
+
+		if (!access_ok(VERIFY_WRITE, user_stack,
+			       sizeof(struct kvm_ia64_vcpu_stack))) {
+			printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: "
+			       "Illegal user destination address for stack\n");
+			goto out;
+		}
+		stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
+		if (!stack) {
+			r = -ENOMEM;
+			goto out;
+		}
+
+		r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack);
+		if (r)
+			goto out;
+
+		if (copy_to_user(user_stack, stack,
+				 sizeof(struct kvm_ia64_vcpu_stack))) {
+			r = -EFAULT;
+			goto out;
+		}
+
+		break;
+	}
+	case KVM_IA64_VCPU_SET_STACK: {
+		struct kvm_ia64_vcpu_stack __user *user_stack;
+	        void __user *first_p = argp;
+
+		r = -EFAULT;
+		if (copy_from_user(&user_stack, first_p, sizeof(void *)))
+			goto out;
+
+		if (!access_ok(VERIFY_READ, user_stack,
+			    sizeof(struct kvm_ia64_vcpu_stack))) {
+			printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: "
+			       "Illegal user address for stack\n");
+			goto out;
+		}
+		stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
+		if (!stack) {
+			r = -ENOMEM;
+			goto out;
+		}
+		if (copy_from_user(stack, user_stack,
+				   sizeof(struct kvm_ia64_vcpu_stack)))
+			goto out;
+
+		r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack);
+		break;
+	}
+
+	default:
+		r = -EINVAL;
+	}
+
+out:
+	kfree(stack);
+	return r;
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+		struct kvm_memory_slot *memslot,
+		struct kvm_memory_slot old,
+		struct kvm_userspace_memory_region *mem,
+		int user_alloc)
+{
+	unsigned long i;
+	unsigned long pfn;
+	int npages = memslot->npages;
+	unsigned long base_gfn = memslot->base_gfn;
+
+	if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
+		return -ENOMEM;
+
+	for (i = 0; i < npages; i++) {
+		pfn = gfn_to_pfn(kvm, base_gfn + i);
+		if (!kvm_is_mmio_pfn(pfn)) {
+			kvm_set_pmt_entry(kvm, base_gfn + i,
+					pfn << PAGE_SHIFT,
+				_PAGE_AR_RWX | _PAGE_MA_WB);
+			memslot->rmap[i] = (unsigned long)pfn_to_page(pfn);
+		} else {
+			kvm_set_pmt_entry(kvm, base_gfn + i,
+					GPFN_PHYS_MMIO | (pfn << PAGE_SHIFT),
+					_PAGE_MA_UC);
+			memslot->rmap[i] = 0;
+			}
+	}
+
+	return 0;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+		struct kvm_userspace_memory_region *mem,
+		struct kvm_memory_slot old,
+		int user_alloc)
+{
+	return;
+}
+
+void kvm_arch_flush_shadow(struct kvm *kvm)
+{
+	kvm_flush_remote_tlbs(kvm);
+}
+
+long kvm_arch_dev_ioctl(struct file *filp,
+			unsigned int ioctl, unsigned long arg)
+{
+	return -EINVAL;
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	kvm_vcpu_uninit(vcpu);
+}
+
+static int vti_cpu_has_kvm_support(void)
+{
+	long  avail = 1, status = 1, control = 1;
+	long ret;
+
+	ret = ia64_pal_proc_get_features(&avail, &status, &control, 0);
+	if (ret)
+		goto out;
+
+	if (!(avail & PAL_PROC_VM_BIT))
+		goto out;
+
+	printk(KERN_DEBUG"kvm: Hardware Supports VT\n");
+
+	ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info);
+	if (ret)
+		goto out;
+	printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size);
+
+	if (!(vp_env_info & VP_OPCODE)) {
+		printk(KERN_WARNING"kvm: No opcode ability on hardware, "
+				"vm_env_info:0x%lx\n", vp_env_info);
+	}
+
+	return 1;
+out:
+	return 0;
+}
+
+
+/*
+ * On SN2, the ITC isn't stable, so copy in fast path code to use the
+ * SN2 RTC, replacing the ITC based default verion.
+ */
+static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info,
+			  struct module *module)
+{
+	unsigned long new_ar, new_ar_sn2;
+	unsigned long module_base;
+
+	if (!ia64_platform_is("sn2"))
+		return;
+
+	module_base = (unsigned long)module->module_core;
+
+	new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base;
+	new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base;
+
+	printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC "
+	       "as source\n");
+
+	/*
+	 * Copy the SN2 version of mov_ar into place. They are both
+	 * the same size, so 6 bundles is sufficient (6 * 0x10).
+	 */
+	memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60);
+}
+
+static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
+			    struct module *module)
+{
+	unsigned long module_base;
+	unsigned long vmm_size;
+
+	unsigned long vmm_offset, func_offset, fdesc_offset;
+	struct fdesc *p_fdesc;
+
+	BUG_ON(!module);
+
+	if (!kvm_vmm_base) {
+		printk("kvm: kvm area hasn't been initialized yet!!\n");
+		return -EFAULT;
+	}
+
+	/*Calculate new position of relocated vmm module.*/
+	module_base = (unsigned long)module->module_core;
+	vmm_size = module->core_size;
+	if (unlikely(vmm_size > KVM_VMM_SIZE))
+		return -EFAULT;
+
+	memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
+	kvm_patch_vmm(vmm_info, module);
+	kvm_flush_icache(kvm_vmm_base, vmm_size);
+
+	/*Recalculate kvm_vmm_info based on new VMM*/
+	vmm_offset = vmm_info->vmm_ivt - module_base;
+	kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset;
+	printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n",
+			kvm_vmm_info->vmm_ivt);
+
+	fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base;
+	kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE +
+							fdesc_offset);
+	func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base;
+	p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
+	p_fdesc->ip = KVM_VMM_BASE + func_offset;
+	p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base);
+
+	printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n",
+			KVM_VMM_BASE+func_offset);
+
+	fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base;
+	kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE +
+			fdesc_offset);
+	func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base;
+	p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
+	p_fdesc->ip = KVM_VMM_BASE + func_offset;
+	p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base);
+
+	kvm_vmm_gp = p_fdesc->gp;
+
+	printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n",
+						kvm_vmm_info->vmm_entry);
+	printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n",
+						KVM_VMM_BASE + func_offset);
+
+	return 0;
+}
+
+int kvm_arch_init(void *opaque)
+{
+	int r;
+	struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque;
+
+	if (!vti_cpu_has_kvm_support()) {
+		printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n");
+		r = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (kvm_vmm_info) {
+		printk(KERN_ERR "kvm: Already loaded VMM module!\n");
+		r = -EEXIST;
+		goto out;
+	}
+
+	r = -ENOMEM;
+	kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL);
+	if (!kvm_vmm_info)
+		goto out;
+
+	if (kvm_alloc_vmm_area())
+		goto out_free0;
+
+	r = kvm_relocate_vmm(vmm_info, vmm_info->module);
+	if (r)
+		goto out_free1;
+
+	return 0;
+
+out_free1:
+	kvm_free_vmm_area();
+out_free0:
+	kfree(kvm_vmm_info);
+out:
+	return r;
+}
+
+void kvm_arch_exit(void)
+{
+	kvm_free_vmm_area();
+	kfree(kvm_vmm_info);
+	kvm_vmm_info = NULL;
+}
+
+static void kvm_ia64_sync_dirty_log(struct kvm *kvm,
+				    struct kvm_memory_slot *memslot)
+{
+	int i;
+	long base;
+	unsigned long n;
+	unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
+			offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
+
+	n = kvm_dirty_bitmap_bytes(memslot);
+	base = memslot->base_gfn / BITS_PER_LONG;
+
+	spin_lock(&kvm->arch.dirty_log_lock);
+	for (i = 0; i < n/sizeof(long); ++i) {
+		memslot->dirty_bitmap[i] = dirty_bitmap[base + i];
+		dirty_bitmap[base + i] = 0;
+	}
+	spin_unlock(&kvm->arch.dirty_log_lock);
+}
+
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+		struct kvm_dirty_log *log)
+{
+	int r;
+	unsigned long n;
+	struct kvm_memory_slot *memslot;
+	int is_dirty = 0;
+
+	mutex_lock(&kvm->slots_lock);
+
+	r = -EINVAL;
+	if (log->slot >= KVM_MEMORY_SLOTS)
+		goto out;
+
+	memslot = &kvm->memslots->memslots[log->slot];
+	r = -ENOENT;
+	if (!memslot->dirty_bitmap)
+		goto out;
+
+	kvm_ia64_sync_dirty_log(kvm, memslot);
+	r = kvm_get_dirty_log(kvm, log, &is_dirty);
+	if (r)
+		goto out;
+
+	/* If nothing is dirty, don't bother messing with page tables. */
+	if (is_dirty) {
+		kvm_flush_remote_tlbs(kvm);
+		n = kvm_dirty_bitmap_bytes(memslot);
+		memset(memslot->dirty_bitmap, 0, n);
+	}
+	r = 0;
+out:
+	mutex_unlock(&kvm->slots_lock);
+	return r;
+}
+
+int kvm_arch_hardware_setup(void)
+{
+	return 0;
+}
+
+void kvm_arch_hardware_unsetup(void)
+{
+}
+
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+	int me;
+	int cpu = vcpu->cpu;
+
+	if (waitqueue_active(&vcpu->wq))
+		wake_up_interruptible(&vcpu->wq);
+
+	me = get_cpu();
+	if (cpu != me && (unsigned) cpu < nr_cpu_ids && cpu_online(cpu))
+		if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
+			smp_send_reschedule(cpu);
+	put_cpu();
+}
+
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
+{
+	return __apic_accept_irq(vcpu, irq->vector);
+}
+
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
+{
+	return apic->vcpu->vcpu_id == dest;
+}
+
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
+{
+	return 0;
+}
+
+int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
+{
+	return vcpu1->arch.xtp - vcpu2->arch.xtp;
+}
+
+int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+		int short_hand, int dest, int dest_mode)
+{
+	struct kvm_lapic *target = vcpu->arch.apic;
+	return (dest_mode == 0) ?
+		kvm_apic_match_physical_addr(target, dest) :
+		kvm_apic_match_logical_addr(target, dest);
+}
+
+static int find_highest_bits(int *dat)
+{
+	u32  bits, bitnum;
+	int i;
+
+	/* loop for all 256 bits */
+	for (i = 7; i >= 0 ; i--) {
+		bits = dat[i];
+		if (bits) {
+			bitnum = fls(bits);
+			return i * 32 + bitnum - 1;
+		}
+	}
+
+	return -1;
+}
+
+int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
+{
+    struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+    if (vpd->irr[0] & (1UL << NMI_VECTOR))
+		return NMI_VECTOR;
+    if (vpd->irr[0] & (1UL << ExtINT_VECTOR))
+		return ExtINT_VECTOR;
+
+    return find_highest_bits((int *)&vpd->irr[0]);
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.timer_fired;
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+{
+	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) ||
+		(kvm_highest_pending_irq(vcpu) != -1);
+}
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	mp_state->mp_state = vcpu->arch.mp_state;
+	return 0;
+}
+
+static int vcpu_reset(struct kvm_vcpu *vcpu)
+{
+	int r;
+	long psr;
+	local_irq_save(psr);
+	r = kvm_insert_vmm_mapping(vcpu);
+	local_irq_restore(psr);
+	if (r)
+		goto fail;
+
+	vcpu->arch.launched = 0;
+	kvm_arch_vcpu_uninit(vcpu);
+	r = kvm_arch_vcpu_init(vcpu);
+	if (r)
+		goto fail;
+
+	kvm_purge_vmm_mapping(vcpu);
+	r = 0;
+fail:
+	return r;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	int r = 0;
+
+	vcpu->arch.mp_state = mp_state->mp_state;
+	if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)
+		r = vcpu_reset(vcpu);
+	return r;
+}
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
new file mode 100644
index 00000000..cb548ee9
--- /dev/null
+++ b/arch/ia64/kvm/kvm_fw.c
@@ -0,0 +1,674 @@
+/*
+ * PAL/SAL call delegation
+ *
+ * Copyright (c) 2004 Li Susie <susie.li@intel.com>
+ * Copyright (c) 2005 Yu Ke <ke.yu@intel.com>
+ * Copyright (c) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/smp.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/clksupport.h>
+#include <asm/sn/shub_mmr.h>
+
+#include "vti.h"
+#include "misc.h"
+
+#include <asm/pal.h>
+#include <asm/sal.h>
+#include <asm/tlb.h>
+
+/*
+ * Handy macros to make sure that the PAL return values start out
+ * as something meaningful.
+ */
+#define INIT_PAL_STATUS_UNIMPLEMENTED(x)		\
+	{						\
+		x.status = PAL_STATUS_UNIMPLEMENTED;	\
+		x.v0 = 0;				\
+		x.v1 = 0;				\
+		x.v2 = 0;				\
+	}
+
+#define INIT_PAL_STATUS_SUCCESS(x)			\
+	{						\
+		x.status = PAL_STATUS_SUCCESS;		\
+		x.v0 = 0;				\
+		x.v1 = 0;				\
+		x.v2 = 0;				\
+    }
+
+static void kvm_get_pal_call_data(struct kvm_vcpu *vcpu,
+		u64 *gr28, u64 *gr29, u64 *gr30, u64 *gr31) {
+	struct exit_ctl_data *p;
+
+	if (vcpu) {
+		p = &vcpu->arch.exit_data;
+		if (p->exit_reason == EXIT_REASON_PAL_CALL) {
+			*gr28 = p->u.pal_data.gr28;
+			*gr29 = p->u.pal_data.gr29;
+			*gr30 = p->u.pal_data.gr30;
+			*gr31 = p->u.pal_data.gr31;
+			return ;
+		}
+	}
+	printk(KERN_DEBUG"Failed to get vcpu pal data!!!\n");
+}
+
+static void set_pal_result(struct kvm_vcpu *vcpu,
+		struct ia64_pal_retval result) {
+
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+	if (p->exit_reason == EXIT_REASON_PAL_CALL) {
+		p->u.pal_data.ret = result;
+		return ;
+	}
+	INIT_PAL_STATUS_UNIMPLEMENTED(p->u.pal_data.ret);
+}
+
+static void set_sal_result(struct kvm_vcpu *vcpu,
+		struct sal_ret_values result) {
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
+		p->u.sal_data.ret = result;
+		return ;
+	}
+	printk(KERN_WARNING"Failed to set sal result!!\n");
+}
+
+struct cache_flush_args {
+	u64 cache_type;
+	u64 operation;
+	u64 progress;
+	long status;
+};
+
+cpumask_t cpu_cache_coherent_map;
+
+static void remote_pal_cache_flush(void *data)
+{
+	struct cache_flush_args *args = data;
+	long status;
+	u64 progress = args->progress;
+
+	status = ia64_pal_cache_flush(args->cache_type, args->operation,
+					&progress, NULL);
+	if (status != 0)
+	args->status = status;
+}
+
+static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu)
+{
+	u64 gr28, gr29, gr30, gr31;
+	struct ia64_pal_retval result = {0, 0, 0, 0};
+	struct cache_flush_args args = {0, 0, 0, 0};
+	long psr;
+
+	gr28 = gr29 = gr30 = gr31 = 0;
+	kvm_get_pal_call_data(vcpu, &gr28, &gr29, &gr30, &gr31);
+
+	if (gr31 != 0)
+		printk(KERN_ERR"vcpu:%p called cache_flush error!\n", vcpu);
+
+	/* Always call Host Pal in int=1 */
+	gr30 &= ~PAL_CACHE_FLUSH_CHK_INTRS;
+	args.cache_type = gr29;
+	args.operation = gr30;
+	smp_call_function(remote_pal_cache_flush,
+				(void *)&args, 1);
+	if (args.status != 0)
+		printk(KERN_ERR"pal_cache_flush error!,"
+				"status:0x%lx\n", args.status);
+	/*
+	 * Call Host PAL cache flush
+	 * Clear psr.ic when call PAL_CACHE_FLUSH
+	 */
+	local_irq_save(psr);
+	result.status = ia64_pal_cache_flush(gr29, gr30, &result.v1,
+						&result.v0);
+	local_irq_restore(psr);
+	if (result.status != 0)
+		printk(KERN_ERR"vcpu:%p crashed due to cache_flush err:%ld"
+				"in1:%lx,in2:%lx\n",
+				vcpu, result.status, gr29, gr30);
+
+#if 0
+	if (gr29 == PAL_CACHE_TYPE_COHERENT) {
+		cpus_setall(vcpu->arch.cache_coherent_map);
+		cpu_clear(vcpu->cpu, vcpu->arch.cache_coherent_map);
+		cpus_setall(cpu_cache_coherent_map);
+		cpu_clear(vcpu->cpu, cpu_cache_coherent_map);
+	}
+#endif
+	return result;
+}
+
+struct ia64_pal_retval pal_cache_summary(struct kvm_vcpu *vcpu)
+{
+
+	struct ia64_pal_retval result;
+
+	PAL_CALL(result, PAL_CACHE_SUMMARY, 0, 0, 0);
+	return result;
+}
+
+static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu)
+{
+
+	struct ia64_pal_retval result;
+
+	PAL_CALL(result, PAL_FREQ_BASE, 0, 0, 0);
+
+	/*
+	 * PAL_FREQ_BASE may not be implemented in some platforms,
+	 * call SAL instead.
+	 */
+	if (result.v0 == 0) {
+		result.status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
+							&result.v0,
+							&result.v1);
+		result.v2 = 0;
+	}
+
+	return result;
+}
+
+/*
+ * On the SGI SN2, the ITC isn't stable. Emulation backed by the SN2
+ * RTC is used instead. This function patches the ratios from SAL
+ * to match the RTC before providing them to the guest.
+ */
+static void sn2_patch_itc_freq_ratios(struct ia64_pal_retval *result)
+{
+	struct pal_freq_ratio *ratio;
+	unsigned long sal_freq, sal_drift, factor;
+
+	result->status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
+					    &sal_freq, &sal_drift);
+	ratio = (struct pal_freq_ratio *)&result->v2;
+	factor = ((sal_freq * 3) + (sn_rtc_cycles_per_second / 2)) /
+		sn_rtc_cycles_per_second;
+
+	ratio->num = 3;
+	ratio->den = factor;
+}
+
+static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
+{
+	struct ia64_pal_retval result;
+
+	PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0);
+
+	if (vcpu->kvm->arch.is_sn2)
+		sn2_patch_itc_freq_ratios(&result);
+
+	return result;
+}
+
+static struct ia64_pal_retval pal_logical_to_physica(struct kvm_vcpu *vcpu)
+{
+	struct ia64_pal_retval result;
+
+	INIT_PAL_STATUS_UNIMPLEMENTED(result);
+	return result;
+}
+
+static struct ia64_pal_retval pal_platform_addr(struct kvm_vcpu *vcpu)
+{
+
+	struct ia64_pal_retval result;
+
+	INIT_PAL_STATUS_SUCCESS(result);
+	return result;
+}
+
+static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu)
+{
+
+	struct ia64_pal_retval result = {0, 0, 0, 0};
+	long in0, in1, in2, in3;
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+	result.status = ia64_pal_proc_get_features(&result.v0, &result.v1,
+			&result.v2, in2);
+
+	return result;
+}
+
+static struct ia64_pal_retval pal_register_info(struct kvm_vcpu *vcpu)
+{
+
+	struct ia64_pal_retval result = {0, 0, 0, 0};
+	long in0, in1, in2, in3;
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+	result.status = ia64_pal_register_info(in1, &result.v1, &result.v2);
+
+	return result;
+}
+
+static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu)
+{
+
+	pal_cache_config_info_t ci;
+	long status;
+	unsigned long in0, in1, in2, in3, r9, r10;
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+	status = ia64_pal_cache_config_info(in1, in2, &ci);
+	r9 = ci.pcci_info_1.pcci1_data;
+	r10 = ci.pcci_info_2.pcci2_data;
+	return ((struct ia64_pal_retval){status, r9, r10, 0});
+}
+
+#define GUEST_IMPL_VA_MSB	59
+#define GUEST_RID_BITS		18
+
+static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu)
+{
+
+	pal_vm_info_1_u_t vminfo1;
+	pal_vm_info_2_u_t vminfo2;
+	struct ia64_pal_retval result;
+
+	PAL_CALL(result, PAL_VM_SUMMARY, 0, 0, 0);
+	if (!result.status) {
+		vminfo1.pvi1_val = result.v0;
+		vminfo1.pal_vm_info_1_s.max_itr_entry = 8;
+		vminfo1.pal_vm_info_1_s.max_dtr_entry = 8;
+		result.v0 = vminfo1.pvi1_val;
+		vminfo2.pal_vm_info_2_s.impl_va_msb = GUEST_IMPL_VA_MSB;
+		vminfo2.pal_vm_info_2_s.rid_size = GUEST_RID_BITS;
+		result.v1 = vminfo2.pvi2_val;
+	}
+
+	return result;
+}
+
+static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu)
+{
+	struct ia64_pal_retval result;
+	unsigned long in0, in1, in2, in3;
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+
+	result.status = ia64_pal_vm_info(in1, in2,
+			(pal_tc_info_u_t *)&result.v1, &result.v2);
+
+	return result;
+}
+
+static  u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu)
+{
+	u64 index = 0;
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+	if (p->exit_reason == EXIT_REASON_PAL_CALL)
+		index = p->u.pal_data.gr28;
+
+	return index;
+}
+
+static void prepare_for_halt(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.timer_pending = 1;
+	vcpu->arch.timer_fired = 0;
+}
+
+static struct ia64_pal_retval pal_perf_mon_info(struct kvm_vcpu *vcpu)
+{
+	long status;
+	unsigned long in0, in1, in2, in3, r9;
+	unsigned long pm_buffer[16];
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+	status = ia64_pal_perf_mon_info(pm_buffer,
+				(pal_perf_mon_info_u_t *) &r9);
+	if (status != 0) {
+		printk(KERN_DEBUG"PAL_PERF_MON_INFO fails ret=%ld\n", status);
+	} else {
+		if (in1)
+			memcpy((void *)in1, pm_buffer, sizeof(pm_buffer));
+		else {
+			status = PAL_STATUS_EINVAL;
+			printk(KERN_WARNING"Invalid parameters "
+						"for PAL call:0x%lx!\n", in0);
+		}
+	}
+	return (struct ia64_pal_retval){status, r9, 0, 0};
+}
+
+static struct ia64_pal_retval pal_halt_info(struct kvm_vcpu *vcpu)
+{
+	unsigned long in0, in1, in2, in3;
+	long status;
+	unsigned long res = 1000UL | (1000UL << 16) | (10UL << 32)
+					| (1UL << 61) | (1UL << 60);
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+	if (in1) {
+		memcpy((void *)in1, &res, sizeof(res));
+		status = 0;
+	} else{
+		status = PAL_STATUS_EINVAL;
+		printk(KERN_WARNING"Invalid parameters "
+					"for PAL call:0x%lx!\n", in0);
+	}
+
+	return (struct ia64_pal_retval){status, 0, 0, 0};
+}
+
+static struct ia64_pal_retval pal_mem_attrib(struct kvm_vcpu *vcpu)
+{
+	unsigned long r9;
+	long status;
+
+	status = ia64_pal_mem_attrib(&r9);
+
+	return (struct ia64_pal_retval){status, r9, 0, 0};
+}
+
+static void remote_pal_prefetch_visibility(void *v)
+{
+	s64 trans_type = (s64)v;
+	ia64_pal_prefetch_visibility(trans_type);
+}
+
+static struct ia64_pal_retval pal_prefetch_visibility(struct kvm_vcpu *vcpu)
+{
+	struct ia64_pal_retval result = {0, 0, 0, 0};
+	unsigned long in0, in1, in2, in3;
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+	result.status = ia64_pal_prefetch_visibility(in1);
+	if (result.status == 0) {
+		/* Must be performed on all remote processors
+		in the coherence domain. */
+		smp_call_function(remote_pal_prefetch_visibility,
+					(void *)in1, 1);
+		/* Unnecessary on remote processor for other vcpus!*/
+		result.status = 1;
+	}
+	return result;
+}
+
+static void remote_pal_mc_drain(void *v)
+{
+	ia64_pal_mc_drain();
+}
+
+static struct ia64_pal_retval pal_get_brand_info(struct kvm_vcpu *vcpu)
+{
+	struct ia64_pal_retval result = {0, 0, 0, 0};
+	unsigned long in0, in1, in2, in3;
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+
+	if (in1 == 0 && in2) {
+		char brand_info[128];
+		result.status = ia64_pal_get_brand_info(brand_info);
+		if (result.status == PAL_STATUS_SUCCESS)
+			memcpy((void *)in2, brand_info, 128);
+	} else {
+		result.status = PAL_STATUS_REQUIRES_MEMORY;
+		printk(KERN_WARNING"Invalid parameters for "
+					"PAL call:0x%lx!\n", in0);
+	}
+
+	return result;
+}
+
+int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+
+	u64 gr28;
+	struct ia64_pal_retval result;
+	int ret = 1;
+
+	gr28 = kvm_get_pal_call_index(vcpu);
+	switch (gr28) {
+	case PAL_CACHE_FLUSH:
+		result = pal_cache_flush(vcpu);
+		break;
+	case PAL_MEM_ATTRIB:
+		result = pal_mem_attrib(vcpu);
+		break;
+	case PAL_CACHE_SUMMARY:
+		result = pal_cache_summary(vcpu);
+		break;
+	case PAL_PERF_MON_INFO:
+		result = pal_perf_mon_info(vcpu);
+		break;
+	case PAL_HALT_INFO:
+		result = pal_halt_info(vcpu);
+		break;
+	case PAL_HALT_LIGHT:
+	{
+		INIT_PAL_STATUS_SUCCESS(result);
+		prepare_for_halt(vcpu);
+		if (kvm_highest_pending_irq(vcpu) == -1)
+			ret = kvm_emulate_halt(vcpu);
+	}
+		break;
+
+	case PAL_PREFETCH_VISIBILITY:
+		result = pal_prefetch_visibility(vcpu);
+		break;
+	case PAL_MC_DRAIN:
+		result.status = ia64_pal_mc_drain();
+		/* FIXME: All vcpus likely call PAL_MC_DRAIN.
+		   That causes the congestion. */
+		smp_call_function(remote_pal_mc_drain, NULL, 1);
+		break;
+
+	case PAL_FREQ_RATIOS:
+		result = pal_freq_ratios(vcpu);
+		break;
+
+	case PAL_FREQ_BASE:
+		result = pal_freq_base(vcpu);
+		break;
+
+	case PAL_LOGICAL_TO_PHYSICAL :
+		result = pal_logical_to_physica(vcpu);
+		break;
+
+	case PAL_VM_SUMMARY :
+		result = pal_vm_summary(vcpu);
+		break;
+
+	case PAL_VM_INFO :
+		result = pal_vm_info(vcpu);
+		break;
+	case PAL_PLATFORM_ADDR :
+		result = pal_platform_addr(vcpu);
+		break;
+	case PAL_CACHE_INFO:
+		result = pal_cache_info(vcpu);
+		break;
+	case PAL_PTCE_INFO:
+		INIT_PAL_STATUS_SUCCESS(result);
+		result.v1 = (1L << 32) | 1L;
+		break;
+	case PAL_REGISTER_INFO:
+		result = pal_register_info(vcpu);
+		break;
+	case PAL_VM_PAGE_SIZE:
+		result.status = ia64_pal_vm_page_size(&result.v0,
+							&result.v1);
+		break;
+	case PAL_RSE_INFO:
+		result.status = ia64_pal_rse_info(&result.v0,
+					(pal_hints_u_t *)&result.v1);
+		break;
+	case PAL_PROC_GET_FEATURES:
+		result = pal_proc_get_features(vcpu);
+		break;
+	case PAL_DEBUG_INFO:
+		result.status = ia64_pal_debug_info(&result.v0,
+							&result.v1);
+		break;
+	case PAL_VERSION:
+		result.status = ia64_pal_version(
+				(pal_version_u_t *)&result.v0,
+				(pal_version_u_t *)&result.v1);
+		break;
+	case PAL_FIXED_ADDR:
+		result.status = PAL_STATUS_SUCCESS;
+		result.v0 = vcpu->vcpu_id;
+		break;
+	case PAL_BRAND_INFO:
+		result = pal_get_brand_info(vcpu);
+		break;
+	case PAL_GET_PSTATE:
+	case PAL_CACHE_SHARED_INFO:
+		INIT_PAL_STATUS_UNIMPLEMENTED(result);
+		break;
+	default:
+		INIT_PAL_STATUS_UNIMPLEMENTED(result);
+		printk(KERN_WARNING"kvm: Unsupported pal call,"
+					" index:0x%lx\n", gr28);
+	}
+	set_pal_result(vcpu, result);
+	return ret;
+}
+
+static struct sal_ret_values sal_emulator(struct kvm *kvm,
+				long index, unsigned long in1,
+				unsigned long in2, unsigned long in3,
+				unsigned long in4, unsigned long in5,
+				unsigned long in6, unsigned long in7)
+{
+	unsigned long r9  = 0;
+	unsigned long r10 = 0;
+	long r11 = 0;
+	long status;
+
+	status = 0;
+	switch (index) {
+	case SAL_FREQ_BASE:
+		status = ia64_sal_freq_base(in1, &r9, &r10);
+		break;
+	case SAL_PCI_CONFIG_READ:
+		printk(KERN_WARNING"kvm: Not allowed to call here!"
+			" SAL_PCI_CONFIG_READ\n");
+		break;
+	case SAL_PCI_CONFIG_WRITE:
+		printk(KERN_WARNING"kvm: Not allowed to call here!"
+			" SAL_PCI_CONFIG_WRITE\n");
+		break;
+	case SAL_SET_VECTORS:
+		if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) {
+			if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) {
+				status = -2;
+			} else {
+				kvm->arch.rdv_sal_data.boot_ip = in2;
+				kvm->arch.rdv_sal_data.boot_gp = in3;
+			}
+			printk("Rendvous called! iip:%lx\n\n", in2);
+		} else
+			printk(KERN_WARNING"kvm: CALLED SAL_SET_VECTORS %lu."
+							"ignored...\n", in1);
+		break;
+	case SAL_GET_STATE_INFO:
+		/* No more info.  */
+		status = -5;
+		r9 = 0;
+		break;
+	case SAL_GET_STATE_INFO_SIZE:
+		/* Return a dummy size.  */
+		status = 0;
+		r9 = 128;
+		break;
+	case SAL_CLEAR_STATE_INFO:
+		/* Noop.  */
+		break;
+	case SAL_MC_RENDEZ:
+		printk(KERN_WARNING
+			"kvm: called SAL_MC_RENDEZ. ignored...\n");
+		break;
+	case SAL_MC_SET_PARAMS:
+		printk(KERN_WARNING
+			"kvm: called  SAL_MC_SET_PARAMS.ignored!\n");
+		break;
+	case SAL_CACHE_FLUSH:
+		if (1) {
+			/*Flush using SAL.
+			This method is faster but has a side
+			effect on other vcpu running on
+			this cpu.  */
+			status = ia64_sal_cache_flush(in1);
+		} else {
+			/*Maybe need to implement the method
+			without side effect!*/
+			status = 0;
+		}
+		break;
+	case SAL_CACHE_INIT:
+		printk(KERN_WARNING
+			"kvm: called SAL_CACHE_INIT.  ignored...\n");
+		break;
+	case SAL_UPDATE_PAL:
+		printk(KERN_WARNING
+			"kvm: CALLED SAL_UPDATE_PAL.  ignored...\n");
+		break;
+	default:
+		printk(KERN_WARNING"kvm: called SAL_CALL with unknown index."
+						" index:%ld\n", index);
+		status = -1;
+		break;
+	}
+	return ((struct sal_ret_values) {status, r9, r10, r11});
+}
+
+static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1,
+		u64 *in2, u64 *in3, u64 *in4, u64 *in5, u64 *in6, u64 *in7){
+
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+
+	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
+		*in0 = p->u.sal_data.in0;
+		*in1 = p->u.sal_data.in1;
+		*in2 = p->u.sal_data.in2;
+		*in3 = p->u.sal_data.in3;
+		*in4 = p->u.sal_data.in4;
+		*in5 = p->u.sal_data.in5;
+		*in6 = p->u.sal_data.in6;
+		*in7 = p->u.sal_data.in7;
+		return ;
+	}
+	*in0 = 0;
+}
+
+void kvm_sal_emul(struct kvm_vcpu *vcpu)
+{
+
+	struct sal_ret_values result;
+	u64 index, in1, in2, in3, in4, in5, in6, in7;
+
+	kvm_get_sal_call_data(vcpu, &index, &in1, &in2,
+			&in3, &in4, &in5, &in6, &in7);
+	result = sal_emulator(vcpu->kvm, index, in1, in2, in3,
+					in4, in5, in6, in7);
+	set_sal_result(vcpu, result);
+}
diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c
new file mode 100644
index 00000000..f1268b8e
--- /dev/null
+++ b/arch/ia64/kvm/kvm_lib.c
@@ -0,0 +1,21 @@
+/*
+ * kvm_lib.c: Compile some libraries for kvm-intel module.
+ *
+ *	Just include kernel's library, and disable symbols export.
+ * 	Copyright (C) 2008, Intel Corporation.
+ *  	Xiantao Zhang  (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#undef CONFIG_MODULES
+#include <linux/module.h>
+#undef CONFIG_KALLSYMS
+#undef EXPORT_SYMBOL
+#undef EXPORT_SYMBOL_GPL
+#define EXPORT_SYMBOL(sym)
+#define EXPORT_SYMBOL_GPL(sym)
+#include "../../../lib/vsprintf.c"
+#include "../../../lib/ctype.c"
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h
new file mode 100644
index 00000000..b2bcaa27
--- /dev/null
+++ b/arch/ia64/kvm/kvm_minstate.h
@@ -0,0 +1,266 @@
+/*
+ *  kvm_minstate.h: min save macros
+ *  Copyright (c) 2007, Intel Corporation.
+ *
+ *  Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ *  Xiantao Zhang (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+
+#include <asm/asmmacro.h>
+#include <asm/types.h>
+#include <asm/kregs.h>
+#include <asm/kvm_host.h>
+
+#include "asm-offsets.h"
+
+#define KVM_MINSTATE_START_SAVE_MIN	     					\
+	mov ar.rsc = 0;/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */\
+	;;									\
+	mov.m r28 = ar.rnat;                                  			\
+	addl r22 = VMM_RBS_OFFSET,r1;            /* compute base of RBS */	\
+	;;									\
+	lfetch.fault.excl.nt1 [r22];						\
+	addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1;  \
+	mov r23 = ar.bspstore;			/* save ar.bspstore */          \
+	;;									\
+	mov ar.bspstore = r22;				/* switch to kernel RBS */\
+	;;									\
+	mov r18 = ar.bsp;							\
+	mov ar.rsc = 0x3;     /* set eager mode, pl 0, little-endian, loadrs=0 */
+
+
+
+#define KVM_MINSTATE_END_SAVE_MIN						\
+	bsw.1;          /* switch back to bank 1 (must be last in insn group) */\
+	;;
+
+
+#define PAL_VSA_SYNC_READ						\
+	/* begin to call pal vps sync_read */				\
+{.mii;									\
+	add r25 = VMM_VPD_BASE_OFFSET, r21;				\
+	nop 0x0;							\
+	mov r24=ip;							\
+	;;								\
+}									\
+{.mmb									\
+	add r24=0x20, r24;						\
+	ld8 r25 = [r25];      /* read vpd base */			\
+	br.cond.sptk kvm_vps_sync_read;		/*call the service*/	\
+	;;								\
+};									\
+
+
+#define KVM_MINSTATE_GET_CURRENT(reg)   mov reg=r21
+
+/*
+ * KVM_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
+ * the minimum state necessary that allows us to turn psr.ic back
+ * on.
+ *
+ * Assumed state upon entry:
+ *  psr.ic: off
+ *  r31:	contains saved predicates (pr)
+ *
+ * Upon exit, the state is as follows:
+ *  psr.ic: off
+ *   r2 = points to &pt_regs.r16
+ *   r8 = contents of ar.ccv
+ *   r9 = contents of ar.csd
+ *  r10 = contents of ar.ssd
+ *  r11 = FPSR_DEFAULT
+ *  r12 = kernel sp (kernel virtual address)
+ *  r13 = points to current task_struct (kernel virtual address)
+ *  p15 = TRUE if psr.i is set in cr.ipsr
+ *  predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
+ *	  preserved
+ *
+ * Note that psr.ic is NOT turned on by this macro.  This is so that
+ * we can pass interruption state as arguments to a handler.
+ */
+
+
+#define PT(f) (VMM_PT_REGS_##f##_OFFSET)
+
+#define KVM_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)			\
+	KVM_MINSTATE_GET_CURRENT(r16);  /* M (or M;;I) */	\
+	mov r27 = ar.rsc;         /* M */			\
+	mov r20 = r1;         /* A */				\
+	mov r25 = ar.unat;        /* M */			\
+	mov r29 = cr.ipsr;        /* M */			\
+	mov r26 = ar.pfs;         /* I */			\
+	mov r18 = cr.isr;         				\
+	COVER;              /* B;; (or nothing) */		\
+	;;							\
+	tbit.z p0,p15 = r29,IA64_PSR_I_BIT;			\
+	mov r1 = r16;						\
+/*	mov r21=r16;	*/					\
+	/* switch from user to kernel RBS: */			\
+	;;							\
+	invala;             /* M */				\
+	SAVE_IFS;						\
+	;;							\
+	KVM_MINSTATE_START_SAVE_MIN				\
+	adds r17 = 2*L1_CACHE_BYTES,r1;/* cache-line size */	\
+	adds r16 = PT(CR_IPSR),r1;				\
+	;;							\
+	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES;		\
+	st8 [r16] = r29;      /* save cr.ipsr */		\
+	;;							\
+	lfetch.fault.excl.nt1 [r17];				\
+	tbit.nz p15,p0 = r29,IA64_PSR_I_BIT;			\
+	mov r29 = b0						\
+	;;							\
+	adds r16 = PT(R8),r1; /* initialize first base pointer */\
+	adds r17 = PT(R9),r1; /* initialize second base pointer */\
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r8,16;			\
+.mem.offset 8,0; st8.spill [r17] = r9,16;			\
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r10,24;			\
+.mem.offset 8,0; st8.spill [r17] = r11,24;			\
+	;;							\
+	mov r9 = cr.iip;         /* M */			\
+	mov r10 = ar.fpsr;        /* M */			\
+	;;							\
+	st8 [r16] = r9,16;    /* save cr.iip */			\
+	st8 [r17] = r30,16;   /* save cr.ifs */			\
+	sub r18 = r18,r22;    /* r18=RSE.ndirty*8 */		\
+	;;							\
+	st8 [r16] = r25,16;   /* save ar.unat */		\
+	st8 [r17] = r26,16;    /* save ar.pfs */		\
+	shl r18 = r18,16;     /* calu ar.rsc used for "loadrs" */\
+	;;							\
+	st8 [r16] = r27,16;   /* save ar.rsc */			\
+	st8 [r17] = r28,16;   /* save ar.rnat */		\
+	;;          /* avoid RAW on r16 & r17 */		\
+	st8 [r16] = r23,16;   /* save ar.bspstore */		\
+	st8 [r17] = r31,16;   /* save predicates */		\
+	;;							\
+	st8 [r16] = r29,16;   /* save b0 */			\
+	st8 [r17] = r18,16;   /* save ar.rsc value for "loadrs" */\
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r20,16;/* save original r1 */  \
+.mem.offset 8,0; st8.spill [r17] = r12,16;			\
+	adds r12 = -16,r1;    /* switch to kernel memory stack */  \
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r13,16;			\
+.mem.offset 8,0; st8.spill [r17] = r10,16;	/* save ar.fpsr */\
+	mov r13 = r21;   /* establish `current' */		\
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r15,16;			\
+.mem.offset 8,0; st8.spill [r17] = r14,16;			\
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r2,16;			\
+.mem.offset 8,0; st8.spill [r17] = r3,16;			\
+	adds r2 = VMM_PT_REGS_R16_OFFSET,r1;			\
+	 ;;							\
+	adds r16 = VMM_VCPU_IIPA_OFFSET,r13;			\
+	adds r17 = VMM_VCPU_ISR_OFFSET,r13;			\
+	mov r26 = cr.iipa;					\
+	mov r27 = cr.isr;					\
+	;;							\
+	st8 [r16] = r26;					\
+	st8 [r17] = r27;					\
+	;;							\
+	EXTRA;							\
+	mov r8 = ar.ccv;					\
+	mov r9 = ar.csd;					\
+	mov r10 = ar.ssd;					\
+	movl r11 = FPSR_DEFAULT;   /* L-unit */			\
+	adds r17 = VMM_VCPU_GP_OFFSET,r13;			\
+	;;							\
+	ld8 r1 = [r17];/* establish kernel global pointer */	\
+	;;							\
+	PAL_VSA_SYNC_READ					\
+	KVM_MINSTATE_END_SAVE_MIN
+
+/*
+ * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
+ *
+ * Assumed state upon entry:
+ *  psr.ic: on
+ *  r2: points to &pt_regs.f6
+ *  r3: points to &pt_regs.f7
+ *  r8: contents of ar.ccv
+ *  r9: contents of ar.csd
+ *  r10:	contents of ar.ssd
+ *  r11:	FPSR_DEFAULT
+ *
+ * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
+ */
+#define KVM_SAVE_REST				\
+.mem.offset 0,0; st8.spill [r2] = r16,16;	\
+.mem.offset 8,0; st8.spill [r3] = r17,16;	\
+	;;				\
+.mem.offset 0,0; st8.spill [r2] = r18,16;	\
+.mem.offset 8,0; st8.spill [r3] = r19,16;	\
+	;;				\
+.mem.offset 0,0; st8.spill [r2] = r20,16;	\
+.mem.offset 8,0; st8.spill [r3] = r21,16;	\
+	mov r18=b6;			\
+	;;				\
+.mem.offset 0,0; st8.spill [r2] = r22,16;	\
+.mem.offset 8,0; st8.spill [r3] = r23,16;	\
+	mov r19 = b7;				\
+	;;					\
+.mem.offset 0,0; st8.spill [r2] = r24,16;	\
+.mem.offset 8,0; st8.spill [r3] = r25,16;	\
+	;;					\
+.mem.offset 0,0; st8.spill [r2] = r26,16;	\
+.mem.offset 8,0; st8.spill [r3] = r27,16;	\
+	;;					\
+.mem.offset 0,0; st8.spill [r2] = r28,16;	\
+.mem.offset 8,0; st8.spill [r3] = r29,16;	\
+	;;					\
+.mem.offset 0,0; st8.spill [r2] = r30,16;	\
+.mem.offset 8,0; st8.spill [r3] = r31,32;	\
+	;;					\
+	mov ar.fpsr = r11;			\
+	st8 [r2] = r8,8;			\
+	adds r24 = PT(B6)-PT(F7),r3;		\
+	adds r25 = PT(B7)-PT(F7),r3;		\
+	;;					\
+	st8 [r24] = r18,16;       /* b6 */	\
+	st8 [r25] = r19,16;       /* b7 */	\
+	adds r2 = PT(R4)-PT(F6),r2;		\
+	adds r3 = PT(R5)-PT(F7),r3;		\
+	;;					\
+	st8 [r24] = r9;	/* ar.csd */		\
+	st8 [r25] = r10;	/* ar.ssd */	\
+	;;					\
+	mov r18 = ar.unat;			\
+	adds r19 = PT(EML_UNAT)-PT(R4),r2;	\
+	;;					\
+	st8 [r19] = r18; /* eml_unat */ 	\
+
+
+#define KVM_SAVE_EXTRA				\
+.mem.offset 0,0; st8.spill [r2] = r4,16;	\
+.mem.offset 8,0; st8.spill [r3] = r5,16;	\
+	;;					\
+.mem.offset 0,0; st8.spill [r2] = r6,16;	\
+.mem.offset 8,0; st8.spill [r3] = r7;		\
+	;;					\
+	mov r26 = ar.unat;			\
+	;;					\
+	st8 [r2] = r26;/* eml_unat */ 		\
+
+#define KVM_SAVE_MIN_WITH_COVER		KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs,)
+#define KVM_SAVE_MIN_WITH_COVER_R19	KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs, mov r15 = r19)
+#define KVM_SAVE_MIN			KVM_DO_SAVE_MIN(     , mov r30 = r0, )
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
new file mode 100644
index 00000000..c5f92a92
--- /dev/null
+++ b/arch/ia64/kvm/lapic.h
@@ -0,0 +1,30 @@
+#ifndef __KVM_IA64_LAPIC_H
+#define __KVM_IA64_LAPIC_H
+
+#include <linux/kvm_host.h>
+
+/*
+ * vlsapic
+ */
+struct kvm_lapic{
+	struct kvm_vcpu *vcpu;
+	uint64_t insvc[4];
+	uint64_t vhpi;
+	uint8_t xtp;
+	uint8_t pal_init_pending;
+	uint8_t pad[2];
+};
+
+int kvm_create_lapic(struct kvm_vcpu *vcpu);
+void kvm_free_lapic(struct kvm_vcpu *vcpu);
+
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
+int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+		int short_hand, int dest, int dest_mode);
+int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
+#define kvm_apic_present(x) (true)
+#define kvm_lapic_enabled(x) (true)
+
+#endif
diff --git a/arch/ia64/kvm/memcpy.S b/arch/ia64/kvm/memcpy.S
new file mode 100644
index 00000000..c04cdbe9
--- /dev/null
+++ b/arch/ia64/kvm/memcpy.S
@@ -0,0 +1 @@
+#include "../lib/memcpy.S"
diff --git a/arch/ia64/kvm/memset.S b/arch/ia64/kvm/memset.S
new file mode 100644
index 00000000..83c3066d
--- /dev/null
+++ b/arch/ia64/kvm/memset.S
@@ -0,0 +1 @@
+#include "../lib/memset.S"
diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h
new file mode 100644
index 00000000..dd979e00
--- /dev/null
+++ b/arch/ia64/kvm/misc.h
@@ -0,0 +1,94 @@
+#ifndef __KVM_IA64_MISC_H
+#define __KVM_IA64_MISC_H
+
+#include <linux/kvm_host.h>
+/*
+ * misc.h
+ * 	Copyright (C) 2007, Intel Corporation.
+ *  	Xiantao Zhang  (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+/*
+ *Return p2m base address at host side!
+ */
+static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
+{
+	return (uint64_t *)(kvm->arch.vm_base +
+				offsetof(struct kvm_vm_data, kvm_p2m));
+}
+
+static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
+		u64 paddr, u64 mem_flags)
+{
+	uint64_t *pmt_base = kvm_host_get_pmt(kvm);
+	unsigned long pte;
+
+	pte = PAGE_ALIGN(paddr) | mem_flags;
+	pmt_base[gfn] = pte;
+}
+
+/*Function for translating host address to guest address*/
+
+static inline void *to_guest(struct kvm *kvm, void *addr)
+{
+	return (void *)((unsigned long)(addr) - kvm->arch.vm_base +
+			KVM_VM_DATA_BASE);
+}
+
+/*Function for translating guest address to host address*/
+
+static inline void *to_host(struct kvm *kvm, void *addr)
+{
+	return (void *)((unsigned long)addr - KVM_VM_DATA_BASE
+			+ kvm->arch.vm_base);
+}
+
+/* Get host context of the vcpu */
+static inline union context *kvm_get_host_context(struct kvm_vcpu *vcpu)
+{
+	union context *ctx = &vcpu->arch.host;
+	return to_guest(vcpu->kvm, ctx);
+}
+
+/* Get guest context of the vcpu */
+static inline union context *kvm_get_guest_context(struct kvm_vcpu *vcpu)
+{
+	union context *ctx = &vcpu->arch.guest;
+	return  to_guest(vcpu->kvm, ctx);
+}
+
+/* kvm get exit data from gvmm! */
+static inline struct exit_ctl_data *kvm_get_exit_data(struct kvm_vcpu *vcpu)
+{
+	return &vcpu->arch.exit_data;
+}
+
+/*kvm get vcpu ioreq for kvm module!*/
+static inline struct kvm_mmio_req *kvm_get_vcpu_ioreq(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p_ctl_data;
+
+	if (vcpu) {
+		p_ctl_data = kvm_get_exit_data(vcpu);
+		if (p_ctl_data->exit_reason == EXIT_REASON_MMIO_INSTRUCTION)
+			return &p_ctl_data->u.ioreq;
+	}
+
+	return NULL;
+}
+
+#endif
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
new file mode 100644
index 00000000..f1e17d3d
--- /dev/null
+++ b/arch/ia64/kvm/mmio.c
@@ -0,0 +1,336 @@
+/*
+ * mmio.c: MMIO emulation components.
+ * Copyright (c) 2004, Intel Corporation.
+ *  Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ *  Kun Tian (Kevin Tian) (Kevin.tian@intel.com)
+ *
+ * Copyright (c) 2007 Intel Corporation  KVM support.
+ * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
+ * Xiantao Zhang  (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/kvm_host.h>
+
+#include "vcpu.h"
+
+static void vlsapic_write_xtp(struct kvm_vcpu *v, uint8_t val)
+{
+	VLSAPIC_XTP(v) = val;
+}
+
+/*
+ * LSAPIC OFFSET
+ */
+#define PIB_LOW_HALF(ofst)     !(ofst & (1 << 20))
+#define PIB_OFST_INTA          0x1E0000
+#define PIB_OFST_XTP           0x1E0008
+
+/*
+ * execute write IPI op.
+ */
+static void vlsapic_write_ipi(struct kvm_vcpu *vcpu,
+					uint64_t addr, uint64_t data)
+{
+	struct exit_ctl_data *p = &current_vcpu->arch.exit_data;
+	unsigned long psr;
+
+	local_irq_save(psr);
+
+	p->exit_reason = EXIT_REASON_IPI;
+	p->u.ipi_data.addr.val = addr;
+	p->u.ipi_data.data.val = data;
+	vmm_transition(current_vcpu);
+
+	local_irq_restore(psr);
+
+}
+
+void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
+			unsigned long length, unsigned long val)
+{
+	addr &= (PIB_SIZE - 1);
+
+	switch (addr) {
+	case PIB_OFST_INTA:
+		panic_vm(v, "Undefined write on PIB INTA\n");
+		break;
+	case PIB_OFST_XTP:
+		if (length == 1) {
+			vlsapic_write_xtp(v, val);
+		} else {
+			panic_vm(v, "Undefined write on PIB XTP\n");
+		}
+		break;
+	default:
+		if (PIB_LOW_HALF(addr)) {
+			/*Lower half */
+			if (length != 8)
+				panic_vm(v, "Can't LHF write with size %ld!\n",
+						length);
+			else
+				vlsapic_write_ipi(v, addr, val);
+		} else {   /*Upper half */
+			panic_vm(v, "IPI-UHF write %lx\n", addr);
+		}
+		break;
+	}
+}
+
+unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
+		unsigned long length)
+{
+	uint64_t result = 0;
+
+	addr &= (PIB_SIZE - 1);
+
+	switch (addr) {
+	case PIB_OFST_INTA:
+		if (length == 1) /* 1 byte load */
+			; /* There is no i8259, there is no INTA access*/
+		else
+			panic_vm(v, "Undefined read on PIB INTA\n");
+
+		break;
+	case PIB_OFST_XTP:
+		if (length == 1) {
+			result = VLSAPIC_XTP(v);
+		} else {
+			panic_vm(v, "Undefined read on PIB XTP\n");
+		}
+		break;
+	default:
+		panic_vm(v, "Undefined addr access for lsapic!\n");
+		break;
+	}
+	return result;
+}
+
+static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
+					u16 s, int ma, int dir)
+{
+	unsigned long iot;
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+	unsigned long psr;
+
+	iot = __gpfn_is_io(src_pa >> PAGE_SHIFT);
+
+	local_irq_save(psr);
+
+	/*Intercept the access for PIB range*/
+	if (iot == GPFN_PIB) {
+		if (!dir)
+			lsapic_write(vcpu, src_pa, s, *dest);
+		else
+			*dest = lsapic_read(vcpu, src_pa, s);
+		goto out;
+	}
+	p->exit_reason = EXIT_REASON_MMIO_INSTRUCTION;
+	p->u.ioreq.addr = src_pa;
+	p->u.ioreq.size = s;
+	p->u.ioreq.dir = dir;
+	if (dir == IOREQ_WRITE)
+		p->u.ioreq.data = *dest;
+	p->u.ioreq.state = STATE_IOREQ_READY;
+	vmm_transition(vcpu);
+
+	if (p->u.ioreq.state == STATE_IORESP_READY) {
+		if (dir == IOREQ_READ)
+			/* it's necessary to ensure zero extending */
+			*dest = p->u.ioreq.data & (~0UL >> (64-(s*8)));
+	} else
+		panic_vm(vcpu, "Unhandled mmio access returned!\n");
+out:
+	local_irq_restore(psr);
+	return ;
+}
+
+/*
+   dir 1: read 0:write
+   inst_type 0:integer 1:floating point
+ */
+#define SL_INTEGER	0	/* store/load interger*/
+#define SL_FLOATING	1     	/* store/load floating*/
+
+void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
+{
+	struct kvm_pt_regs *regs;
+	IA64_BUNDLE bundle;
+	int slot, dir = 0;
+	int inst_type = -1;
+	u16 size = 0;
+	u64 data, slot1a, slot1b, temp, update_reg;
+	s32 imm;
+	INST64 inst;
+
+	regs = vcpu_regs(vcpu);
+
+	if (fetch_code(vcpu, regs->cr_iip, &bundle)) {
+		/* if fetch code fail, return and try again */
+		return;
+	}
+	slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
+	if (!slot)
+		inst.inst = bundle.slot0;
+	else if (slot == 1) {
+		slot1a = bundle.slot1a;
+		slot1b = bundle.slot1b;
+		inst.inst = slot1a + (slot1b << 18);
+	} else if (slot == 2)
+		inst.inst = bundle.slot2;
+
+	/* Integer Load/Store */
+	if (inst.M1.major == 4 && inst.M1.m == 0 && inst.M1.x == 0) {
+		inst_type = SL_INTEGER;
+		size = (inst.M1.x6 & 0x3);
+		if ((inst.M1.x6 >> 2) > 0xb) {
+			/*write*/
+			dir = IOREQ_WRITE;
+			data = vcpu_get_gr(vcpu, inst.M4.r2);
+		} else if ((inst.M1.x6 >> 2) < 0xb) {
+			/*read*/
+			dir = IOREQ_READ;
+		}
+	} else if (inst.M2.major == 4 && inst.M2.m == 1 && inst.M2.x == 0) {
+		/* Integer Load + Reg update */
+		inst_type = SL_INTEGER;
+		dir = IOREQ_READ;
+		size = (inst.M2.x6 & 0x3);
+		temp = vcpu_get_gr(vcpu, inst.M2.r3);
+		update_reg = vcpu_get_gr(vcpu, inst.M2.r2);
+		temp += update_reg;
+		vcpu_set_gr(vcpu, inst.M2.r3, temp, 0);
+	} else if (inst.M3.major == 5) {
+		/*Integer Load/Store + Imm update*/
+		inst_type = SL_INTEGER;
+		size = (inst.M3.x6&0x3);
+		if ((inst.M5.x6 >> 2) > 0xb) {
+			/*write*/
+			dir = IOREQ_WRITE;
+			data = vcpu_get_gr(vcpu, inst.M5.r2);
+			temp = vcpu_get_gr(vcpu, inst.M5.r3);
+			imm = (inst.M5.s << 31) | (inst.M5.i << 30) |
+				(inst.M5.imm7 << 23);
+			temp += imm >> 23;
+			vcpu_set_gr(vcpu, inst.M5.r3, temp, 0);
+
+		} else if ((inst.M3.x6 >> 2) < 0xb) {
+			/*read*/
+			dir = IOREQ_READ;
+			temp = vcpu_get_gr(vcpu, inst.M3.r3);
+			imm = (inst.M3.s << 31) | (inst.M3.i << 30) |
+				(inst.M3.imm7 << 23);
+			temp += imm >> 23;
+			vcpu_set_gr(vcpu, inst.M3.r3, temp, 0);
+
+		}
+	} else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B
+				&& inst.M9.m == 0 && inst.M9.x == 0) {
+		/* Floating-point spill*/
+		struct ia64_fpreg v;
+
+		inst_type = SL_FLOATING;
+		dir = IOREQ_WRITE;
+		vcpu_get_fpreg(vcpu, inst.M9.f2, &v);
+		/* Write high word. FIXME: this is a kludge!  */
+		v.u.bits[1] &= 0x3ffff;
+		mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1], 8,
+			    ma, IOREQ_WRITE);
+		data = v.u.bits[0];
+		size = 3;
+	} else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) {
+		/* Floating-point spill + Imm update */
+		struct ia64_fpreg v;
+
+		inst_type = SL_FLOATING;
+		dir = IOREQ_WRITE;
+		vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
+		temp = vcpu_get_gr(vcpu, inst.M10.r3);
+		imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
+			(inst.M10.imm7 << 23);
+		temp += imm >> 23;
+		vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
+
+		/* Write high word.FIXME: this is a kludge!  */
+		v.u.bits[1] &= 0x3ffff;
+		mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1],
+			    8, ma, IOREQ_WRITE);
+		data = v.u.bits[0];
+		size = 3;
+	} else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) {
+		/* Floating-point stf8 + Imm update */
+		struct ia64_fpreg v;
+		inst_type = SL_FLOATING;
+		dir = IOREQ_WRITE;
+		size = 3;
+		vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
+		data = v.u.bits[0]; /* Significand.  */
+		temp = vcpu_get_gr(vcpu, inst.M10.r3);
+		imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
+			(inst.M10.imm7 << 23);
+		temp += imm >> 23;
+		vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
+	} else if (inst.M15.major == 7 && inst.M15.x6 >= 0x2c
+			&& inst.M15.x6 <= 0x2f) {
+		temp = vcpu_get_gr(vcpu, inst.M15.r3);
+		imm = (inst.M15.s << 31) | (inst.M15.i << 30) |
+			(inst.M15.imm7 << 23);
+		temp += imm >> 23;
+		vcpu_set_gr(vcpu, inst.M15.r3, temp, 0);
+
+		vcpu_increment_iip(vcpu);
+		return;
+	} else if (inst.M12.major == 6 && inst.M12.m == 1
+			&& inst.M12.x == 1 && inst.M12.x6 == 1) {
+		/* Floating-point Load Pair + Imm ldfp8 M12*/
+		struct ia64_fpreg v;
+
+		inst_type = SL_FLOATING;
+		dir = IOREQ_READ;
+		size = 8;     /*ldfd*/
+		mmio_access(vcpu, padr, &data, size, ma, dir);
+		v.u.bits[0] = data;
+		v.u.bits[1] = 0x1003E;
+		vcpu_set_fpreg(vcpu, inst.M12.f1, &v);
+		padr += 8;
+		mmio_access(vcpu, padr, &data, size, ma, dir);
+		v.u.bits[0] = data;
+		v.u.bits[1] = 0x1003E;
+		vcpu_set_fpreg(vcpu, inst.M12.f2, &v);
+		padr += 8;
+		vcpu_set_gr(vcpu, inst.M12.r3, padr, 0);
+		vcpu_increment_iip(vcpu);
+		return;
+	} else {
+		inst_type = -1;
+		panic_vm(vcpu, "Unsupported MMIO access instruction! "
+				"Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
+				bundle.i64[0], bundle.i64[1]);
+	}
+
+	size = 1 << size;
+	if (dir == IOREQ_WRITE) {
+		mmio_access(vcpu, padr, &data, size, ma, dir);
+	} else {
+		mmio_access(vcpu, padr, &data, size, ma, dir);
+		if (inst_type == SL_INTEGER)
+			vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
+		else
+			panic_vm(vcpu, "Unsupported instruction type!\n");
+
+	}
+	vcpu_increment_iip(vcpu);
+}
diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S
new file mode 100644
index 00000000..f793be3e
--- /dev/null
+++ b/arch/ia64/kvm/optvfault.S
@@ -0,0 +1,1090 @@
+/*
+ * arch/ia64/kvm/optvfault.S
+ * optimize virtualization fault handler
+ *
+ * Copyright (C) 2006 Intel Co
+ *	Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
+ * Copyright (C) 2008 Intel Co
+ *      Add the support for Tukwila processors.
+ *	Xiantao Zhang <xiantao.zhang@intel.com>
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/processor.h>
+#include <asm/kvm_host.h>
+
+#include "vti.h"
+#include "asm-offsets.h"
+
+#define ACCE_MOV_FROM_AR
+#define ACCE_MOV_FROM_RR
+#define ACCE_MOV_TO_RR
+#define ACCE_RSM
+#define ACCE_SSM
+#define ACCE_MOV_TO_PSR
+#define ACCE_THASH
+
+#define VMX_VPS_SYNC_READ			\
+	add r16=VMM_VPD_BASE_OFFSET,r21;	\
+	mov r17 = b0;				\
+	mov r18 = r24;				\
+	mov r19 = r25;				\
+	mov r20 = r31;				\
+	;;					\
+{.mii;						\
+	ld8 r16 = [r16];			\
+	nop 0x0;				\
+	mov r24 = ip;				\
+	;;					\
+};						\
+{.mmb;						\
+	add r24=0x20, r24;			\
+	mov r25 =r16;				\
+	br.sptk.many kvm_vps_sync_read;		\
+};						\
+	mov b0 = r17;				\
+	mov r24 = r18;				\
+	mov r25 = r19;				\
+	mov r31 = r20
+
+ENTRY(kvm_vps_entry)
+	adds r29 = VMM_VCPU_VSA_BASE_OFFSET,r21
+	;;
+	ld8 r29 = [r29]
+	;;
+	add r29 = r29, r30
+	;;
+	mov b0 = r29
+	br.sptk.many b0
+END(kvm_vps_entry)
+
+/*
+ *	Inputs:
+ *	r24 : return address
+ *  	r25 : vpd
+ *	r29 : scratch
+ *
+ */
+GLOBAL_ENTRY(kvm_vps_sync_read)
+	movl r30 = PAL_VPS_SYNC_READ
+	;;
+	br.sptk.many kvm_vps_entry
+END(kvm_vps_sync_read)
+
+/*
+ *	Inputs:
+ *	r24 : return address
+ *  	r25 : vpd
+ *	r29 : scratch
+ *
+ */
+GLOBAL_ENTRY(kvm_vps_sync_write)
+	movl r30 = PAL_VPS_SYNC_WRITE
+	;;
+	br.sptk.many kvm_vps_entry
+END(kvm_vps_sync_write)
+
+/*
+ *	Inputs:
+ *	r23 : pr
+ *	r24 : guest b0
+ *  	r25 : vpd
+ *
+ */
+GLOBAL_ENTRY(kvm_vps_resume_normal)
+	movl r30 = PAL_VPS_RESUME_NORMAL
+	;;
+	mov pr=r23,-2
+	br.sptk.many kvm_vps_entry
+END(kvm_vps_resume_normal)
+
+/*
+ *	Inputs:
+ *	r23 : pr
+ *	r24 : guest b0
+ *  	r25 : vpd
+ *	r17 : isr
+ */
+GLOBAL_ENTRY(kvm_vps_resume_handler)
+	movl r30 = PAL_VPS_RESUME_HANDLER
+	;;
+	ld8 r26=[r25]
+	shr r17=r17,IA64_ISR_IR_BIT
+	;;
+	dep r26=r17,r26,63,1   // bit 63 of r26 indicate whether enable CFLE
+	mov pr=r23,-2
+	br.sptk.many kvm_vps_entry
+END(kvm_vps_resume_handler)
+
+//mov r1=ar3
+GLOBAL_ENTRY(kvm_asm_mov_from_ar)
+#ifndef ACCE_MOV_FROM_AR
+	br.many kvm_virtualization_fault_back
+#endif
+	add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
+	add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
+	extr.u r17=r25,6,7
+	;;
+	ld8 r18=[r18]
+	mov r19=ar.itc
+	mov r24=b0
+	;;
+	add r19=r19,r18
+	addl r20=@gprel(asm_mov_to_reg),gp
+	;;
+	st8 [r16] = r19
+	adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
+	shladd r17=r17,4,r20
+	;;
+	mov b0=r17
+	br.sptk.few b0
+	;;
+END(kvm_asm_mov_from_ar)
+
+/*
+ * Special SGI SN2 optimized version of mov_from_ar using the SN2 RTC
+ * clock as it's source for emulating the ITC. This version will be
+ * copied on top of the original version if the host is determined to
+ * be an SN2.
+ */
+GLOBAL_ENTRY(kvm_asm_mov_from_ar_sn2)
+	add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
+	movl r19 = (KVM_VMM_BASE+(1<<KVM_VMM_SHIFT))
+
+	add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
+	extr.u r17=r25,6,7
+	mov r24=b0
+	;;
+	ld8 r18=[r18]
+	ld8 r19=[r19]
+	addl r20=@gprel(asm_mov_to_reg),gp
+	;;
+	add r19=r19,r18
+	shladd r17=r17,4,r20
+	;;
+	adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
+	st8 [r16] = r19
+	mov b0=r17
+	br.sptk.few b0
+	;;
+END(kvm_asm_mov_from_ar_sn2)
+
+
+
+// mov r1=rr[r3]
+GLOBAL_ENTRY(kvm_asm_mov_from_rr)
+#ifndef ACCE_MOV_FROM_RR
+	br.many kvm_virtualization_fault_back
+#endif
+	extr.u r16=r25,20,7
+	extr.u r17=r25,6,7
+	addl r20=@gprel(asm_mov_from_reg),gp
+	;;
+	adds r30=kvm_asm_mov_from_rr_back_1-asm_mov_from_reg,r20
+	shladd r16=r16,4,r20
+	mov r24=b0
+	;;
+	add r27=VMM_VCPU_VRR0_OFFSET,r21
+	mov b0=r16
+	br.many b0
+	;;
+kvm_asm_mov_from_rr_back_1:
+	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
+	adds r22=asm_mov_to_reg-asm_mov_from_reg,r20
+	shr.u r26=r19,61
+	;;
+	shladd r17=r17,4,r22
+	shladd r27=r26,3,r27
+	;;
+	ld8 r19=[r27]
+	mov b0=r17
+	br.many b0
+END(kvm_asm_mov_from_rr)
+
+
+// mov rr[r3]=r2
+GLOBAL_ENTRY(kvm_asm_mov_to_rr)
+#ifndef ACCE_MOV_TO_RR
+	br.many kvm_virtualization_fault_back
+#endif
+	extr.u r16=r25,20,7
+	extr.u r17=r25,13,7
+	addl r20=@gprel(asm_mov_from_reg),gp
+	;;
+	adds r30=kvm_asm_mov_to_rr_back_1-asm_mov_from_reg,r20
+	shladd r16=r16,4,r20
+	mov r22=b0
+	;;
+	add r27=VMM_VCPU_VRR0_OFFSET,r21
+	mov b0=r16
+	br.many b0
+	;;
+kvm_asm_mov_to_rr_back_1:
+	adds r30=kvm_asm_mov_to_rr_back_2-asm_mov_from_reg,r20
+	shr.u r23=r19,61
+	shladd r17=r17,4,r20
+	;;
+	//if rr6, go back
+	cmp.eq p6,p0=6,r23
+	mov b0=r22
+	(p6) br.cond.dpnt.many kvm_virtualization_fault_back
+	;;
+	mov r28=r19
+	mov b0=r17
+	br.many b0
+kvm_asm_mov_to_rr_back_2:
+	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
+	shladd r27=r23,3,r27
+	;; // vrr.rid<<4 |0xe
+	st8 [r27]=r19
+	mov b0=r30
+	;;
+	extr.u r16=r19,8,26
+	extr.u r18 =r19,2,6
+	mov r17 =0xe
+	;;
+	shladd r16 = r16, 4, r17
+	extr.u r19 =r19,0,8
+	;;
+	shl r16 = r16,8
+	;;
+	add r19 = r19, r16
+	;; //set ve 1
+	dep r19=-1,r19,0,1
+	cmp.lt p6,p0=14,r18
+	;;
+	(p6) mov r18=14
+	;;
+	(p6) dep r19=r18,r19,2,6
+	;;
+	cmp.eq p6,p0=0,r23
+	;;
+	cmp.eq.or p6,p0=4,r23
+	;;
+	adds r16=VMM_VCPU_MODE_FLAGS_OFFSET,r21
+	(p6) adds r17=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
+	;;
+	ld4 r16=[r16]
+	cmp.eq p7,p0=r0,r0
+	(p6) shladd r17=r23,1,r17
+	;;
+	(p6) st8 [r17]=r19
+	(p6) tbit.nz p6,p7=r16,0
+	;;
+	(p7) mov rr[r28]=r19
+	mov r24=r22
+	br.many b0
+END(kvm_asm_mov_to_rr)
+
+
+//rsm
+GLOBAL_ENTRY(kvm_asm_rsm)
+#ifndef ACCE_RSM
+	br.many kvm_virtualization_fault_back
+#endif
+	VMX_VPS_SYNC_READ
+	;;
+	extr.u r26=r25,6,21
+	extr.u r27=r25,31,2
+	;;
+	extr.u r28=r25,36,1
+	dep r26=r27,r26,21,2
+	;;
+	add r17=VPD_VPSR_START_OFFSET,r16
+	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
+	//r26 is imm24
+	dep r26=r28,r26,23,1
+	;;
+	ld8 r18=[r17]
+	movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI
+	ld4 r23=[r22]
+	sub r27=-1,r26
+	mov r24=b0
+	;;
+	mov r20=cr.ipsr
+	or r28=r27,r28
+	and r19=r18,r27
+	;;
+	st8 [r17]=r19
+	and r20=r20,r28
+	/* Comment it out due to short of fp lazy alorgithm support
+	adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
+	;;
+	ld8 r27=[r27]
+	;;
+	tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT
+	;;
+	(p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
+	*/
+	;;
+	mov cr.ipsr=r20
+	tbit.nz p6,p0=r23,0
+	;;
+	tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT
+	(p6) br.dptk kvm_resume_to_guest_with_sync
+	;;
+	add r26=VMM_VCPU_META_RR0_OFFSET,r21
+	add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
+	dep r23=-1,r23,0,1
+	;;
+	ld8 r26=[r26]
+	ld8 r27=[r27]
+	st4 [r22]=r23
+	dep.z r28=4,61,3
+	;;
+	mov rr[r0]=r26
+	;;
+	mov rr[r28]=r27
+	;;
+	srlz.d
+	br.many kvm_resume_to_guest_with_sync
+END(kvm_asm_rsm)
+
+
+//ssm
+GLOBAL_ENTRY(kvm_asm_ssm)
+#ifndef ACCE_SSM
+	br.many kvm_virtualization_fault_back
+#endif
+	VMX_VPS_SYNC_READ
+	;;
+	extr.u r26=r25,6,21
+	extr.u r27=r25,31,2
+	;;
+	extr.u r28=r25,36,1
+	dep r26=r27,r26,21,2
+	;;  //r26 is imm24
+	add r27=VPD_VPSR_START_OFFSET,r16
+	dep r26=r28,r26,23,1
+	;;  //r19 vpsr
+	ld8 r29=[r27]
+	mov r24=b0
+	;;
+	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
+	mov r20=cr.ipsr
+	or r19=r29,r26
+	;;
+	ld4 r23=[r22]
+	st8 [r27]=r19
+	or r20=r20,r26
+	;;
+	mov cr.ipsr=r20
+	movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
+	;;
+	and r19=r28,r19
+	tbit.z p6,p0=r23,0
+	;;
+	cmp.ne.or p6,p0=r28,r19
+	(p6) br.dptk kvm_asm_ssm_1
+	;;
+	add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
+	add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
+	dep r23=0,r23,0,1
+	;;
+	ld8 r26=[r26]
+	ld8 r27=[r27]
+	st4 [r22]=r23
+	dep.z r28=4,61,3
+	;;
+	mov rr[r0]=r26
+	;;
+	mov rr[r28]=r27
+	;;
+	srlz.d
+	;;
+kvm_asm_ssm_1:
+	tbit.nz p6,p0=r29,IA64_PSR_I_BIT
+	;;
+	tbit.z.or p6,p0=r19,IA64_PSR_I_BIT
+	(p6) br.dptk kvm_resume_to_guest_with_sync
+	;;
+	add r29=VPD_VTPR_START_OFFSET,r16
+	add r30=VPD_VHPI_START_OFFSET,r16
+	;;
+	ld8 r29=[r29]
+	ld8 r30=[r30]
+	;;
+	extr.u r17=r29,4,4
+	extr.u r18=r29,16,1
+	;;
+	dep r17=r18,r17,4,1
+	;;
+	cmp.gt p6,p0=r30,r17
+	(p6) br.dpnt.few kvm_asm_dispatch_vexirq
+	br.many kvm_resume_to_guest_with_sync
+END(kvm_asm_ssm)
+
+
+//mov psr.l=r2
+GLOBAL_ENTRY(kvm_asm_mov_to_psr)
+#ifndef ACCE_MOV_TO_PSR
+	br.many kvm_virtualization_fault_back
+#endif
+	VMX_VPS_SYNC_READ
+	;;
+	extr.u r26=r25,13,7 //r2
+	addl r20=@gprel(asm_mov_from_reg),gp
+	;;
+	adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20
+	shladd r26=r26,4,r20
+	mov r24=b0
+	;;
+	add r27=VPD_VPSR_START_OFFSET,r16
+	mov b0=r26
+	br.many b0
+	;;
+kvm_asm_mov_to_psr_back:
+	ld8 r17=[r27]
+	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
+	dep r19=0,r19,32,32
+	;;
+	ld4 r23=[r22]
+	dep r18=0,r17,0,32
+	;;
+	add r30=r18,r19
+	movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
+	;;
+	st8 [r27]=r30
+	and r27=r28,r30
+	and r29=r28,r17
+	;;
+	cmp.eq p5,p0=r29,r27
+	cmp.eq p6,p7=r28,r27
+	(p5) br.many kvm_asm_mov_to_psr_1
+	;;
+	//virtual to physical
+	(p7) add r26=VMM_VCPU_META_RR0_OFFSET,r21
+	(p7) add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
+	(p7) dep r23=-1,r23,0,1
+	;;
+	//physical to virtual
+	(p6) add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
+	(p6) add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
+	(p6) dep r23=0,r23,0,1
+	;;
+	ld8 r26=[r26]
+	ld8 r27=[r27]
+	st4 [r22]=r23
+	dep.z r28=4,61,3
+	;;
+	mov rr[r0]=r26
+	;;
+	mov rr[r28]=r27
+	;;
+	srlz.d
+	;;
+kvm_asm_mov_to_psr_1:
+	mov r20=cr.ipsr
+	movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT
+	;;
+	or r19=r19,r28
+	dep r20=0,r20,0,32
+	;;
+	add r20=r19,r20
+	mov b0=r24
+	;;
+	/* Comment it out due to short of fp lazy algorithm support
+	adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
+	;;
+	ld8 r27=[r27]
+	;;
+	tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT
+	;;
+	(p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
+	;;
+	*/
+	mov cr.ipsr=r20
+	cmp.ne p6,p0=r0,r0
+	;;
+	tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT
+	tbit.z.or p6,p0=r30,IA64_PSR_I_BIT
+	(p6) br.dpnt.few kvm_resume_to_guest_with_sync
+	;;
+	add r29=VPD_VTPR_START_OFFSET,r16
+	add r30=VPD_VHPI_START_OFFSET,r16
+	;;
+	ld8 r29=[r29]
+	ld8 r30=[r30]
+	;;
+	extr.u r17=r29,4,4
+	extr.u r18=r29,16,1
+	;;
+	dep r17=r18,r17,4,1
+	;;
+	cmp.gt p6,p0=r30,r17
+	(p6) br.dpnt.few kvm_asm_dispatch_vexirq
+	br.many kvm_resume_to_guest_with_sync
+END(kvm_asm_mov_to_psr)
+
+
+ENTRY(kvm_asm_dispatch_vexirq)
+//increment iip
+	mov r17 = b0
+	mov r18 = r31
+{.mii
+	add r25=VMM_VPD_BASE_OFFSET,r21
+	nop 0x0
+	mov r24 = ip
+	;;
+}
+{.mmb
+	add r24 = 0x20, r24
+	ld8 r25 = [r25]
+	br.sptk.many kvm_vps_sync_write
+}
+	mov b0 =r17
+	mov r16=cr.ipsr
+	mov r31 = r18
+	mov r19 = 37
+	;;
+	extr.u r17=r16,IA64_PSR_RI_BIT,2
+	tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
+	;;
+	(p6) mov r18=cr.iip
+	(p6) mov r17=r0
+	(p7) add r17=1,r17
+	;;
+	(p6) add r18=0x10,r18
+	dep r16=r17,r16,IA64_PSR_RI_BIT,2
+	;;
+	(p6) mov cr.iip=r18
+	mov cr.ipsr=r16
+	mov r30 =1
+	br.many kvm_dispatch_vexirq
+END(kvm_asm_dispatch_vexirq)
+
+// thash
+// TODO: add support when pta.vf = 1
+GLOBAL_ENTRY(kvm_asm_thash)
+#ifndef ACCE_THASH
+	br.many kvm_virtualization_fault_back
+#endif
+	extr.u r17=r25,20,7		// get r3 from opcode in r25
+	extr.u r18=r25,6,7		// get r1 from opcode in r25
+	addl r20=@gprel(asm_mov_from_reg),gp
+	;;
+	adds r30=kvm_asm_thash_back1-asm_mov_from_reg,r20
+	shladd r17=r17,4,r20	// get addr of MOVE_FROM_REG(r17)
+	adds r16=VMM_VPD_BASE_OFFSET,r21	// get vcpu.arch.priveregs
+	;;
+	mov r24=b0
+	;;
+	ld8 r16=[r16]		// get VPD addr
+	mov b0=r17
+	br.many b0			// r19 return value
+	;;
+kvm_asm_thash_back1:
+	shr.u r23=r19,61		// get RR number
+	adds r28=VMM_VCPU_VRR0_OFFSET,r21	// get vcpu->arch.vrr[0]'s addr
+	adds r16=VMM_VPD_VPTA_OFFSET,r16	// get vpta
+	;;
+	shladd r27=r23,3,r28	// get vcpu->arch.vrr[r23]'s addr
+	ld8 r17=[r16]		// get PTA
+	mov r26=1
+	;;
+	extr.u r29=r17,2,6	// get pta.size
+	ld8 r28=[r27]		// get vcpu->arch.vrr[r23]'s value
+	;;
+	mov b0=r24
+	//Fallback to C if pta.vf is set
+	tbit.nz p6,p0=r17, 8
+	;;
+	(p6) mov r24=EVENT_THASH
+	(p6) br.cond.dpnt.many kvm_virtualization_fault_back
+	extr.u r28=r28,2,6	// get rr.ps
+	shl r22=r26,r29		// 1UL << pta.size
+	;;
+	shr.u r23=r19,r28	// vaddr >> rr.ps
+	adds r26=3,r29		// pta.size + 3
+	shl r27=r17,3		// pta << 3
+	;;
+	shl r23=r23,3		// (vaddr >> rr.ps) << 3
+	shr.u r27=r27,r26	// (pta << 3) >> (pta.size+3)
+	movl r16=7<<61
+	;;
+	adds r22=-1,r22		// (1UL << pta.size) - 1
+	shl r27=r27,r29		// ((pta<<3)>>(pta.size+3))<<pta.size
+	and r19=r19,r16		// vaddr & VRN_MASK
+	;;
+	and r22=r22,r23		// vhpt_offset
+	or r19=r19,r27 // (vadr&VRN_MASK)|(((pta<<3)>>(pta.size + 3))<<pta.size)
+	adds r26=asm_mov_to_reg-asm_mov_from_reg,r20
+	;;
+	or r19=r19,r22		// calc pval
+	shladd r17=r18,4,r26
+	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
+	;;
+	mov b0=r17
+	br.many b0
+END(kvm_asm_thash)
+
+#define MOV_TO_REG0	\
+{;			\
+	nop.b 0x0;		\
+	nop.b 0x0;		\
+	nop.b 0x0;		\
+	;;			\
+};
+
+
+#define MOV_TO_REG(n)	\
+{;			\
+	mov r##n##=r19;	\
+	mov b0=r30;	\
+	br.sptk.many b0;	\
+	;;			\
+};
+
+
+#define MOV_FROM_REG(n)	\
+{;				\
+	mov r19=r##n##;		\
+	mov b0=r30;		\
+	br.sptk.many b0;		\
+	;;				\
+};
+
+
+#define MOV_TO_BANK0_REG(n)			\
+ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##);	\
+{;						\
+	mov r26=r2;				\
+	mov r2=r19;				\
+	bsw.1;					\
+	;;						\
+};						\
+{;						\
+	mov r##n##=r2;				\
+	nop.b 0x0;					\
+	bsw.0;					\
+	;;						\
+};						\
+{;						\
+	mov r2=r26;				\
+	mov b0=r30;				\
+	br.sptk.many b0;				\
+	;;						\
+};						\
+END(asm_mov_to_bank0_reg##n##)
+
+
+#define MOV_FROM_BANK0_REG(n)			\
+ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##);	\
+{;						\
+	mov r26=r2;				\
+	nop.b 0x0;					\
+	bsw.1;					\
+	;;						\
+};						\
+{;						\
+	mov r2=r##n##;				\
+	nop.b 0x0;					\
+	bsw.0;					\
+	;;						\
+};						\
+{;						\
+	mov r19=r2;				\
+	mov r2=r26;				\
+	mov b0=r30;				\
+};						\
+{;						\
+	nop.b 0x0;					\
+	nop.b 0x0;					\
+	br.sptk.many b0;				\
+	;;						\
+};						\
+END(asm_mov_from_bank0_reg##n##)
+
+
+#define JMP_TO_MOV_TO_BANK0_REG(n)		\
+{;						\
+	nop.b 0x0;					\
+	nop.b 0x0;					\
+	br.sptk.many asm_mov_to_bank0_reg##n##;	\
+	;;						\
+}
+
+
+#define JMP_TO_MOV_FROM_BANK0_REG(n)		\
+{;						\
+	nop.b 0x0;					\
+	nop.b 0x0;					\
+	br.sptk.many asm_mov_from_bank0_reg##n##;	\
+	;;						\
+}
+
+
+MOV_FROM_BANK0_REG(16)
+MOV_FROM_BANK0_REG(17)
+MOV_FROM_BANK0_REG(18)
+MOV_FROM_BANK0_REG(19)
+MOV_FROM_BANK0_REG(20)
+MOV_FROM_BANK0_REG(21)
+MOV_FROM_BANK0_REG(22)
+MOV_FROM_BANK0_REG(23)
+MOV_FROM_BANK0_REG(24)
+MOV_FROM_BANK0_REG(25)
+MOV_FROM_BANK0_REG(26)
+MOV_FROM_BANK0_REG(27)
+MOV_FROM_BANK0_REG(28)
+MOV_FROM_BANK0_REG(29)
+MOV_FROM_BANK0_REG(30)
+MOV_FROM_BANK0_REG(31)
+
+
+// mov from reg table
+ENTRY(asm_mov_from_reg)
+	MOV_FROM_REG(0)
+	MOV_FROM_REG(1)
+	MOV_FROM_REG(2)
+	MOV_FROM_REG(3)
+	MOV_FROM_REG(4)
+	MOV_FROM_REG(5)
+	MOV_FROM_REG(6)
+	MOV_FROM_REG(7)
+	MOV_FROM_REG(8)
+	MOV_FROM_REG(9)
+	MOV_FROM_REG(10)
+	MOV_FROM_REG(11)
+	MOV_FROM_REG(12)
+	MOV_FROM_REG(13)
+	MOV_FROM_REG(14)
+	MOV_FROM_REG(15)
+	JMP_TO_MOV_FROM_BANK0_REG(16)
+	JMP_TO_MOV_FROM_BANK0_REG(17)
+	JMP_TO_MOV_FROM_BANK0_REG(18)
+	JMP_TO_MOV_FROM_BANK0_REG(19)
+	JMP_TO_MOV_FROM_BANK0_REG(20)
+	JMP_TO_MOV_FROM_BANK0_REG(21)
+	JMP_TO_MOV_FROM_BANK0_REG(22)
+	JMP_TO_MOV_FROM_BANK0_REG(23)
+	JMP_TO_MOV_FROM_BANK0_REG(24)
+	JMP_TO_MOV_FROM_BANK0_REG(25)
+	JMP_TO_MOV_FROM_BANK0_REG(26)
+	JMP_TO_MOV_FROM_BANK0_REG(27)
+	JMP_TO_MOV_FROM_BANK0_REG(28)
+	JMP_TO_MOV_FROM_BANK0_REG(29)
+	JMP_TO_MOV_FROM_BANK0_REG(30)
+	JMP_TO_MOV_FROM_BANK0_REG(31)
+	MOV_FROM_REG(32)
+	MOV_FROM_REG(33)
+	MOV_FROM_REG(34)
+	MOV_FROM_REG(35)
+	MOV_FROM_REG(36)
+	MOV_FROM_REG(37)
+	MOV_FROM_REG(38)
+	MOV_FROM_REG(39)
+	MOV_FROM_REG(40)
+	MOV_FROM_REG(41)
+	MOV_FROM_REG(42)
+	MOV_FROM_REG(43)
+	MOV_FROM_REG(44)
+	MOV_FROM_REG(45)
+	MOV_FROM_REG(46)
+	MOV_FROM_REG(47)
+	MOV_FROM_REG(48)
+	MOV_FROM_REG(49)
+	MOV_FROM_REG(50)
+	MOV_FROM_REG(51)
+	MOV_FROM_REG(52)
+	MOV_FROM_REG(53)
+	MOV_FROM_REG(54)
+	MOV_FROM_REG(55)
+	MOV_FROM_REG(56)
+	MOV_FROM_REG(57)
+	MOV_FROM_REG(58)
+	MOV_FROM_REG(59)
+	MOV_FROM_REG(60)
+	MOV_FROM_REG(61)
+	MOV_FROM_REG(62)
+	MOV_FROM_REG(63)
+	MOV_FROM_REG(64)
+	MOV_FROM_REG(65)
+	MOV_FROM_REG(66)
+	MOV_FROM_REG(67)
+	MOV_FROM_REG(68)
+	MOV_FROM_REG(69)
+	MOV_FROM_REG(70)
+	MOV_FROM_REG(71)
+	MOV_FROM_REG(72)
+	MOV_FROM_REG(73)
+	MOV_FROM_REG(74)
+	MOV_FROM_REG(75)
+	MOV_FROM_REG(76)
+	MOV_FROM_REG(77)
+	MOV_FROM_REG(78)
+	MOV_FROM_REG(79)
+	MOV_FROM_REG(80)
+	MOV_FROM_REG(81)
+	MOV_FROM_REG(82)
+	MOV_FROM_REG(83)
+	MOV_FROM_REG(84)
+	MOV_FROM_REG(85)
+	MOV_FROM_REG(86)
+	MOV_FROM_REG(87)
+	MOV_FROM_REG(88)
+	MOV_FROM_REG(89)
+	MOV_FROM_REG(90)
+	MOV_FROM_REG(91)
+	MOV_FROM_REG(92)
+	MOV_FROM_REG(93)
+	MOV_FROM_REG(94)
+	MOV_FROM_REG(95)
+	MOV_FROM_REG(96)
+	MOV_FROM_REG(97)
+	MOV_FROM_REG(98)
+	MOV_FROM_REG(99)
+	MOV_FROM_REG(100)
+	MOV_FROM_REG(101)
+	MOV_FROM_REG(102)
+	MOV_FROM_REG(103)
+	MOV_FROM_REG(104)
+	MOV_FROM_REG(105)
+	MOV_FROM_REG(106)
+	MOV_FROM_REG(107)
+	MOV_FROM_REG(108)
+	MOV_FROM_REG(109)
+	MOV_FROM_REG(110)
+	MOV_FROM_REG(111)
+	MOV_FROM_REG(112)
+	MOV_FROM_REG(113)
+	MOV_FROM_REG(114)
+	MOV_FROM_REG(115)
+	MOV_FROM_REG(116)
+	MOV_FROM_REG(117)
+	MOV_FROM_REG(118)
+	MOV_FROM_REG(119)
+	MOV_FROM_REG(120)
+	MOV_FROM_REG(121)
+	MOV_FROM_REG(122)
+	MOV_FROM_REG(123)
+	MOV_FROM_REG(124)
+	MOV_FROM_REG(125)
+	MOV_FROM_REG(126)
+	MOV_FROM_REG(127)
+END(asm_mov_from_reg)
+
+
+/* must be in bank 0
+ * parameter:
+ * r31: pr
+ * r24: b0
+ */
+ENTRY(kvm_resume_to_guest_with_sync)
+	adds r19=VMM_VPD_BASE_OFFSET,r21
+	mov r16 = r31
+	mov r17 = r24
+	;;
+{.mii
+	ld8 r25 =[r19]
+	nop 0x0
+	mov r24 = ip
+	;;
+}
+{.mmb
+	add r24 =0x20, r24
+	nop 0x0
+	br.sptk.many kvm_vps_sync_write
+}
+
+	mov r31 = r16
+	mov r24 =r17
+	;;
+	br.sptk.many kvm_resume_to_guest
+END(kvm_resume_to_guest_with_sync)
+
+ENTRY(kvm_resume_to_guest)
+	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+	;;
+	ld8 r1 =[r16]
+	adds r20 = VMM_VCPU_VSA_BASE_OFFSET,r21
+	;;
+	mov r16=cr.ipsr
+	;;
+	ld8 r20 = [r20]
+	adds r19=VMM_VPD_BASE_OFFSET,r21
+	;;
+	ld8 r25=[r19]
+	extr.u r17=r16,IA64_PSR_RI_BIT,2
+	tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
+	;;
+	(p6) mov r18=cr.iip
+	(p6) mov r17=r0
+	;;
+	(p6) add r18=0x10,r18
+	(p7) add r17=1,r17
+	;;
+	(p6) mov cr.iip=r18
+	dep r16=r17,r16,IA64_PSR_RI_BIT,2
+	;;
+	mov cr.ipsr=r16
+	adds r19= VPD_VPSR_START_OFFSET,r25
+	add r28=PAL_VPS_RESUME_NORMAL,r20
+	add r29=PAL_VPS_RESUME_HANDLER,r20
+	;;
+	ld8 r19=[r19]
+	mov b0=r29
+	mov r27=cr.isr
+	;;
+	tbit.z p6,p7 = r19,IA64_PSR_IC_BIT		// p7=vpsr.ic
+	shr r27=r27,IA64_ISR_IR_BIT
+	;;
+	(p6) ld8 r26=[r25]
+	(p7) mov b0=r28
+	;;
+	(p6) dep r26=r27,r26,63,1
+	mov pr=r31,-2
+	br.sptk.many b0             // call pal service
+	;;
+END(kvm_resume_to_guest)
+
+
+MOV_TO_BANK0_REG(16)
+MOV_TO_BANK0_REG(17)
+MOV_TO_BANK0_REG(18)
+MOV_TO_BANK0_REG(19)
+MOV_TO_BANK0_REG(20)
+MOV_TO_BANK0_REG(21)
+MOV_TO_BANK0_REG(22)
+MOV_TO_BANK0_REG(23)
+MOV_TO_BANK0_REG(24)
+MOV_TO_BANK0_REG(25)
+MOV_TO_BANK0_REG(26)
+MOV_TO_BANK0_REG(27)
+MOV_TO_BANK0_REG(28)
+MOV_TO_BANK0_REG(29)
+MOV_TO_BANK0_REG(30)
+MOV_TO_BANK0_REG(31)
+
+
+// mov to reg table
+ENTRY(asm_mov_to_reg)
+	MOV_TO_REG0
+	MOV_TO_REG(1)
+	MOV_TO_REG(2)
+	MOV_TO_REG(3)
+	MOV_TO_REG(4)
+	MOV_TO_REG(5)
+	MOV_TO_REG(6)
+	MOV_TO_REG(7)
+	MOV_TO_REG(8)
+	MOV_TO_REG(9)
+	MOV_TO_REG(10)
+	MOV_TO_REG(11)
+	MOV_TO_REG(12)
+	MOV_TO_REG(13)
+	MOV_TO_REG(14)
+	MOV_TO_REG(15)
+	JMP_TO_MOV_TO_BANK0_REG(16)
+	JMP_TO_MOV_TO_BANK0_REG(17)
+	JMP_TO_MOV_TO_BANK0_REG(18)
+	JMP_TO_MOV_TO_BANK0_REG(19)
+	JMP_TO_MOV_TO_BANK0_REG(20)
+	JMP_TO_MOV_TO_BANK0_REG(21)
+	JMP_TO_MOV_TO_BANK0_REG(22)
+	JMP_TO_MOV_TO_BANK0_REG(23)
+	JMP_TO_MOV_TO_BANK0_REG(24)
+	JMP_TO_MOV_TO_BANK0_REG(25)
+	JMP_TO_MOV_TO_BANK0_REG(26)
+	JMP_TO_MOV_TO_BANK0_REG(27)
+	JMP_TO_MOV_TO_BANK0_REG(28)
+	JMP_TO_MOV_TO_BANK0_REG(29)
+	JMP_TO_MOV_TO_BANK0_REG(30)
+	JMP_TO_MOV_TO_BANK0_REG(31)
+	MOV_TO_REG(32)
+	MOV_TO_REG(33)
+	MOV_TO_REG(34)
+	MOV_TO_REG(35)
+	MOV_TO_REG(36)
+	MOV_TO_REG(37)
+	MOV_TO_REG(38)
+	MOV_TO_REG(39)
+	MOV_TO_REG(40)
+	MOV_TO_REG(41)
+	MOV_TO_REG(42)
+	MOV_TO_REG(43)
+	MOV_TO_REG(44)
+	MOV_TO_REG(45)
+	MOV_TO_REG(46)
+	MOV_TO_REG(47)
+	MOV_TO_REG(48)
+	MOV_TO_REG(49)
+	MOV_TO_REG(50)
+	MOV_TO_REG(51)
+	MOV_TO_REG(52)
+	MOV_TO_REG(53)
+	MOV_TO_REG(54)
+	MOV_TO_REG(55)
+	MOV_TO_REG(56)
+	MOV_TO_REG(57)
+	MOV_TO_REG(58)
+	MOV_TO_REG(59)
+	MOV_TO_REG(60)
+	MOV_TO_REG(61)
+	MOV_TO_REG(62)
+	MOV_TO_REG(63)
+	MOV_TO_REG(64)
+	MOV_TO_REG(65)
+	MOV_TO_REG(66)
+	MOV_TO_REG(67)
+	MOV_TO_REG(68)
+	MOV_TO_REG(69)
+	MOV_TO_REG(70)
+	MOV_TO_REG(71)
+	MOV_TO_REG(72)
+	MOV_TO_REG(73)
+	MOV_TO_REG(74)
+	MOV_TO_REG(75)
+	MOV_TO_REG(76)
+	MOV_TO_REG(77)
+	MOV_TO_REG(78)
+	MOV_TO_REG(79)
+	MOV_TO_REG(80)
+	MOV_TO_REG(81)
+	MOV_TO_REG(82)
+	MOV_TO_REG(83)
+	MOV_TO_REG(84)
+	MOV_TO_REG(85)
+	MOV_TO_REG(86)
+	MOV_TO_REG(87)
+	MOV_TO_REG(88)
+	MOV_TO_REG(89)
+	MOV_TO_REG(90)
+	MOV_TO_REG(91)
+	MOV_TO_REG(92)
+	MOV_TO_REG(93)
+	MOV_TO_REG(94)
+	MOV_TO_REG(95)
+	MOV_TO_REG(96)
+	MOV_TO_REG(97)
+	MOV_TO_REG(98)
+	MOV_TO_REG(99)
+	MOV_TO_REG(100)
+	MOV_TO_REG(101)
+	MOV_TO_REG(102)
+	MOV_TO_REG(103)
+	MOV_TO_REG(104)
+	MOV_TO_REG(105)
+	MOV_TO_REG(106)
+	MOV_TO_REG(107)
+	MOV_TO_REG(108)
+	MOV_TO_REG(109)
+	MOV_TO_REG(110)
+	MOV_TO_REG(111)
+	MOV_TO_REG(112)
+	MOV_TO_REG(113)
+	MOV_TO_REG(114)
+	MOV_TO_REG(115)
+	MOV_TO_REG(116)
+	MOV_TO_REG(117)
+	MOV_TO_REG(118)
+	MOV_TO_REG(119)
+	MOV_TO_REG(120)
+	MOV_TO_REG(121)
+	MOV_TO_REG(122)
+	MOV_TO_REG(123)
+	MOV_TO_REG(124)
+	MOV_TO_REG(125)
+	MOV_TO_REG(126)
+	MOV_TO_REG(127)
+END(asm_mov_to_reg)
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
new file mode 100644
index 00000000..b0398740
--- /dev/null
+++ b/arch/ia64/kvm/process.c
@@ -0,0 +1,1024 @@
+/*
+ * process.c: handle interruption inject for guests.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *  	Shaofan Li (Susue Li) <susie.li@intel.com>
+ *  	Xiaoyan Feng (Fleming Feng)  <fleming.feng@intel.com>
+ *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ *  	Xiantao Zhang (xiantao.zhang@intel.com)
+ */
+#include "vcpu.h"
+
+#include <asm/pal.h>
+#include <asm/sal.h>
+#include <asm/fpswa.h>
+#include <asm/kregs.h>
+#include <asm/tlb.h>
+
+fpswa_interface_t *vmm_fpswa_interface;
+
+#define IA64_VHPT_TRANS_VECTOR			0x0000
+#define IA64_INST_TLB_VECTOR			0x0400
+#define IA64_DATA_TLB_VECTOR			0x0800
+#define IA64_ALT_INST_TLB_VECTOR		0x0c00
+#define IA64_ALT_DATA_TLB_VECTOR		0x1000
+#define IA64_DATA_NESTED_TLB_VECTOR		0x1400
+#define IA64_INST_KEY_MISS_VECTOR		0x1800
+#define IA64_DATA_KEY_MISS_VECTOR		0x1c00
+#define IA64_DIRTY_BIT_VECTOR			0x2000
+#define IA64_INST_ACCESS_BIT_VECTOR		0x2400
+#define IA64_DATA_ACCESS_BIT_VECTOR		0x2800
+#define IA64_BREAK_VECTOR			0x2c00
+#define IA64_EXTINT_VECTOR			0x3000
+#define IA64_PAGE_NOT_PRESENT_VECTOR		0x5000
+#define IA64_KEY_PERMISSION_VECTOR		0x5100
+#define IA64_INST_ACCESS_RIGHTS_VECTOR		0x5200
+#define IA64_DATA_ACCESS_RIGHTS_VECTOR		0x5300
+#define IA64_GENEX_VECTOR			0x5400
+#define IA64_DISABLED_FPREG_VECTOR		0x5500
+#define IA64_NAT_CONSUMPTION_VECTOR		0x5600
+#define IA64_SPECULATION_VECTOR		0x5700 /* UNUSED */
+#define IA64_DEBUG_VECTOR			0x5900
+#define IA64_UNALIGNED_REF_VECTOR		0x5a00
+#define IA64_UNSUPPORTED_DATA_REF_VECTOR	0x5b00
+#define IA64_FP_FAULT_VECTOR			0x5c00
+#define IA64_FP_TRAP_VECTOR			0x5d00
+#define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR 	0x5e00
+#define IA64_TAKEN_BRANCH_TRAP_VECTOR		0x5f00
+#define IA64_SINGLE_STEP_TRAP_VECTOR		0x6000
+
+/* SDM vol2 5.5 - IVA based interruption handling */
+#define INITIAL_PSR_VALUE_AT_INTERRUPTION (IA64_PSR_UP | IA64_PSR_MFL |\
+			IA64_PSR_MFH | IA64_PSR_PK | IA64_PSR_DT |    	\
+			IA64_PSR_RT | IA64_PSR_MC|IA64_PSR_IT)
+
+#define DOMN_PAL_REQUEST    0x110000
+#define DOMN_SAL_REQUEST    0x110001
+
+static u64 vec2off[68] = {0x0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800,
+	0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00,
+	0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5100, 0x5200, 0x5300, 0x5400,
+	0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00,
+	0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600,
+	0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00,
+	0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800,
+	0x7900, 0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00
+};
+
+static void collect_interruption(struct kvm_vcpu *vcpu)
+{
+	u64 ipsr;
+	u64 vdcr;
+	u64 vifs;
+	unsigned long vpsr;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	vpsr = vcpu_get_psr(vcpu);
+	vcpu_bsw0(vcpu);
+	if (vpsr & IA64_PSR_IC) {
+
+		/* Sync mpsr id/da/dd/ss/ed bits to vipsr
+		 * since after guest do rfi, we still want these bits on in
+		 * mpsr
+		 */
+
+		ipsr = regs->cr_ipsr;
+		vpsr = vpsr | (ipsr & (IA64_PSR_ID | IA64_PSR_DA
+					| IA64_PSR_DD | IA64_PSR_SS
+					| IA64_PSR_ED));
+		vcpu_set_ipsr(vcpu, vpsr);
+
+		/* Currently, for trap, we do not advance IIP to next
+		 * instruction. That's because we assume caller already
+		 * set up IIP correctly
+		 */
+
+		vcpu_set_iip(vcpu , regs->cr_iip);
+
+		/* set vifs.v to zero */
+		vifs = VCPU(vcpu, ifs);
+		vifs &= ~IA64_IFS_V;
+		vcpu_set_ifs(vcpu, vifs);
+
+		vcpu_set_iipa(vcpu, VMX(vcpu, cr_iipa));
+	}
+
+	vdcr = VCPU(vcpu, dcr);
+
+	/* Set guest psr
+	 * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged
+	 * be: set to the value of dcr.be
+	 * pp: set to the value of dcr.pp
+	 */
+	vpsr &= INITIAL_PSR_VALUE_AT_INTERRUPTION;
+	vpsr |= (vdcr & IA64_DCR_BE);
+
+	/* VDCR pp bit position is different from VPSR pp bit */
+	if (vdcr & IA64_DCR_PP) {
+		vpsr |= IA64_PSR_PP;
+	} else {
+		vpsr &= ~IA64_PSR_PP;
+	}
+
+	vcpu_set_psr(vcpu, vpsr);
+
+}
+
+void inject_guest_interruption(struct kvm_vcpu *vcpu, u64 vec)
+{
+	u64 viva;
+	struct kvm_pt_regs *regs;
+	union ia64_isr pt_isr;
+
+	regs = vcpu_regs(vcpu);
+
+	/* clear cr.isr.ir (incomplete register frame)*/
+	pt_isr.val = VMX(vcpu, cr_isr);
+	pt_isr.ir = 0;
+	VMX(vcpu, cr_isr) = pt_isr.val;
+
+	collect_interruption(vcpu);
+
+	viva = vcpu_get_iva(vcpu);
+	regs->cr_iip = viva + vec;
+}
+
+static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa)
+{
+	union ia64_rr rr, rr1;
+
+	rr.val = vcpu_get_rr(vcpu, ifa);
+	rr1.val = 0;
+	rr1.ps = rr.ps;
+	rr1.rid = rr.rid;
+	return (rr1.val);
+}
+
+/*
+ * Set vIFA & vITIR & vIHA, when vPSR.ic =1
+ * Parameter:
+ *  set_ifa: if true, set vIFA
+ *  set_itir: if true, set vITIR
+ *  set_iha: if true, set vIHA
+ */
+void set_ifa_itir_iha(struct kvm_vcpu *vcpu, u64 vadr,
+		int set_ifa, int set_itir, int set_iha)
+{
+	long vpsr;
+	u64 value;
+
+	vpsr = VCPU(vcpu, vpsr);
+	/* Vol2, Table 8-1 */
+	if (vpsr & IA64_PSR_IC) {
+		if (set_ifa)
+			vcpu_set_ifa(vcpu, vadr);
+		if (set_itir) {
+			value = vcpu_get_itir_on_fault(vcpu, vadr);
+			vcpu_set_itir(vcpu, value);
+		}
+
+		if (set_iha) {
+			value = vcpu_thash(vcpu, vadr);
+			vcpu_set_iha(vcpu, value);
+		}
+	}
+}
+
+/*
+ * Data TLB Fault
+ *  @ Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void dtlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	/* If vPSR.ic, IFA, ITIR, IHA */
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
+	inject_guest_interruption(vcpu, IA64_DATA_TLB_VECTOR);
+}
+
+/*
+ * Instruction TLB Fault
+ *  @ Instruction TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	/* If vPSR.ic, IFA, ITIR, IHA */
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
+	inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR);
+}
+
+/*
+ * Data Nested TLB Fault
+ *  @ Data Nested TLB Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void nested_dtlb(struct kvm_vcpu *vcpu)
+{
+	inject_guest_interruption(vcpu, IA64_DATA_NESTED_TLB_VECTOR);
+}
+
+/*
+ * Alternate Data TLB Fault
+ *  @ Alternate Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
+	inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR);
+}
+
+/*
+ * Data TLB Fault
+ *  @ Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void alt_itlb(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
+	inject_guest_interruption(vcpu, IA64_ALT_INST_TLB_VECTOR);
+}
+
+/* Deal with:
+ *  VHPT Translation Vector
+ */
+static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	/* If vPSR.ic, IFA, ITIR, IHA*/
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
+	inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR);
+}
+
+/*
+ * VHPT Instruction Fault
+ *  @ VHPT Translation vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	_vhpt_fault(vcpu, vadr);
+}
+
+/*
+ * VHPT Data Fault
+ *  @ VHPT Translation vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	_vhpt_fault(vcpu, vadr);
+}
+
+/*
+ * Deal with:
+ *  General Exception vector
+ */
+void _general_exception(struct kvm_vcpu *vcpu)
+{
+	inject_guest_interruption(vcpu, IA64_GENEX_VECTOR);
+}
+
+/*
+ * Illegal Operation Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void illegal_op(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+
+/*
+ * Illegal Dependency Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void illegal_dep(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+
+/*
+ * Reserved Register/Field Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void rsv_reg_field(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+/*
+ * Privileged Operation Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+
+void privilege_op(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+
+/*
+ * Unimplement Data Address Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void unimpl_daddr(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+
+/*
+ * Privileged Register Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void privilege_reg(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+
+/* Deal with
+ *  Nat consumption vector
+ * Parameter:
+ *  vaddr: Optional, if t == REGISTER
+ */
+static void _nat_consumption_fault(struct kvm_vcpu *vcpu, u64 vadr,
+						enum tlb_miss_type t)
+{
+	/* If vPSR.ic && t == DATA/INST, IFA */
+	if (t == DATA || t == INSTRUCTION) {
+		/* IFA */
+		set_ifa_itir_iha(vcpu, vadr, 1, 0, 0);
+	}
+
+	inject_guest_interruption(vcpu, IA64_NAT_CONSUMPTION_VECTOR);
+}
+
+/*
+ * Instruction Nat Page Consumption Fault
+ *  @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void inat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	_nat_consumption_fault(vcpu, vadr, INSTRUCTION);
+}
+
+/*
+ * Register Nat Consumption Fault
+ *  @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void rnat_consumption(struct kvm_vcpu *vcpu)
+{
+	_nat_consumption_fault(vcpu, 0, REGISTER);
+}
+
+/*
+ * Data Nat Page Consumption Fault
+ *  @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	_nat_consumption_fault(vcpu, vadr, DATA);
+}
+
+/* Deal with
+ *  Page not present vector
+ */
+static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	/* If vPSR.ic, IFA, ITIR */
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
+	inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
+}
+
+void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	__page_not_present(vcpu, vadr);
+}
+
+void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	__page_not_present(vcpu, vadr);
+}
+
+/* Deal with
+ *  Data access rights vector
+ */
+void data_access_rights(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	/* If vPSR.ic, IFA, ITIR */
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
+	inject_guest_interruption(vcpu, IA64_DATA_ACCESS_RIGHTS_VECTOR);
+}
+
+fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr,
+		unsigned long *fpsr, unsigned long *isr, unsigned long *pr,
+		unsigned long *ifs, struct kvm_pt_regs *regs)
+{
+	fp_state_t fp_state;
+	fpswa_ret_t ret;
+	struct kvm_vcpu *vcpu = current_vcpu;
+
+	uint64_t old_rr7 = ia64_get_rr(7UL<<61);
+
+	if (!vmm_fpswa_interface)
+		return (fpswa_ret_t) {-1, 0, 0, 0};
+
+	memset(&fp_state, 0, sizeof(fp_state_t));
+
+	/*
+	 * compute fp_state.  only FP registers f6 - f11 are used by the
+	 * vmm, so set those bits in the mask and set the low volatile
+	 * pointer to point to these registers.
+	 */
+	fp_state.bitmask_low64 = 0xfc0;  /* bit6..bit11 */
+
+	fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) &regs->f6;
+
+   /*
+	 * unsigned long (*EFI_FPSWA) (
+	 *      unsigned long    trap_type,
+	 *      void             *Bundle,
+	 *      unsigned long    *pipsr,
+	 *      unsigned long    *pfsr,
+	 *      unsigned long    *pisr,
+	 *      unsigned long    *ppreds,
+	 *      unsigned long    *pifs,
+	 *      void             *fp_state);
+	 */
+	/*Call host fpswa interface directly to virtualize
+	 *guest fpswa request!
+	 */
+	ia64_set_rr(7UL << 61, vcpu->arch.host.rr[7]);
+	ia64_srlz_d();
+
+	ret = (*vmm_fpswa_interface->fpswa) (fp_fault, bundle,
+			ipsr, fpsr, isr, pr, ifs, &fp_state);
+	ia64_set_rr(7UL << 61, old_rr7);
+	ia64_srlz_d();
+	return ret;
+}
+
+/*
+ * Handle floating-point assist faults and traps for domain.
+ */
+unsigned long vmm_handle_fpu_swa(int fp_fault, struct kvm_pt_regs *regs,
+					unsigned long isr)
+{
+	struct kvm_vcpu *v = current_vcpu;
+	IA64_BUNDLE bundle;
+	unsigned long fault_ip;
+	fpswa_ret_t ret;
+
+	fault_ip = regs->cr_iip;
+	/*
+	 * When the FP trap occurs, the trapping instruction is completed.
+	 * If ipsr.ri == 0, there is the trapping instruction in previous
+	 * bundle.
+	 */
+	if (!fp_fault && (ia64_psr(regs)->ri == 0))
+		fault_ip -= 16;
+
+	if (fetch_code(v, fault_ip, &bundle))
+		return -EAGAIN;
+
+	if (!bundle.i64[0] && !bundle.i64[1])
+		return -EACCES;
+
+	ret = vmm_fp_emulate(fp_fault, &bundle, &regs->cr_ipsr, &regs->ar_fpsr,
+			&isr, &regs->pr, &regs->cr_ifs, regs);
+	return ret.status;
+}
+
+void reflect_interruption(u64 ifa, u64 isr, u64 iim,
+		u64 vec, struct kvm_pt_regs *regs)
+{
+	u64 vector;
+	int status ;
+	struct kvm_vcpu *vcpu = current_vcpu;
+	u64 vpsr = VCPU(vcpu, vpsr);
+
+	vector = vec2off[vec];
+
+	if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
+		panic_vm(vcpu, "Interruption with vector :0x%lx occurs "
+						"with psr.ic = 0\n", vector);
+		return;
+	}
+
+	switch (vec) {
+	case 32: 	/*IA64_FP_FAULT_VECTOR*/
+		status = vmm_handle_fpu_swa(1, regs, isr);
+		if (!status) {
+			vcpu_increment_iip(vcpu);
+			return;
+		} else if (-EAGAIN == status)
+			return;
+		break;
+	case 33:	/*IA64_FP_TRAP_VECTOR*/
+		status = vmm_handle_fpu_swa(0, regs, isr);
+		if (!status)
+			return ;
+		break;
+	}
+
+	VCPU(vcpu, isr) = isr;
+	VCPU(vcpu, iipa) = regs->cr_iip;
+	if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
+		VCPU(vcpu, iim) = iim;
+	else
+		set_ifa_itir_iha(vcpu, ifa, 1, 1, 1);
+
+	inject_guest_interruption(vcpu, vector);
+}
+
+static unsigned long kvm_trans_pal_call_args(struct kvm_vcpu *vcpu,
+						unsigned long arg)
+{
+	struct thash_data *data;
+	unsigned long gpa, poff;
+
+	if (!is_physical_mode(vcpu)) {
+		/* Depends on caller to provide the DTR or DTC mapping.*/
+		data = vtlb_lookup(vcpu, arg, D_TLB);
+		if (data)
+			gpa = data->page_flags & _PAGE_PPN_MASK;
+		else {
+			data = vhpt_lookup(arg);
+			if (!data)
+				return 0;
+			gpa = data->gpaddr & _PAGE_PPN_MASK;
+		}
+
+		poff = arg & (PSIZE(data->ps) - 1);
+		arg = PAGEALIGN(gpa, data->ps) | poff;
+	}
+	arg = kvm_gpa_to_mpa(arg << 1 >> 1);
+
+	return (unsigned long)__va(arg);
+}
+
+static void set_pal_call_data(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+	unsigned long gr28 = vcpu_get_gr(vcpu, 28);
+	unsigned long gr29 = vcpu_get_gr(vcpu, 29);
+	unsigned long gr30 = vcpu_get_gr(vcpu, 30);
+
+	/*FIXME:For static and stacked convention, firmware
+	 * has put the parameters in gr28-gr31 before
+	 * break to vmm  !!*/
+
+	switch (gr28) {
+	case PAL_PERF_MON_INFO:
+	case PAL_HALT_INFO:
+		p->u.pal_data.gr29 =  kvm_trans_pal_call_args(vcpu, gr29);
+		p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
+		break;
+	case PAL_BRAND_INFO:
+		p->u.pal_data.gr29 = gr29;
+		p->u.pal_data.gr30 = kvm_trans_pal_call_args(vcpu, gr30);
+		break;
+	default:
+		p->u.pal_data.gr29 = gr29;
+		p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
+	}
+	p->u.pal_data.gr28 = gr28;
+	p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31);
+
+	p->exit_reason = EXIT_REASON_PAL_CALL;
+}
+
+static void get_pal_call_result(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+	if (p->exit_reason == EXIT_REASON_PAL_CALL) {
+		vcpu_set_gr(vcpu, 8, p->u.pal_data.ret.status, 0);
+		vcpu_set_gr(vcpu, 9, p->u.pal_data.ret.v0, 0);
+		vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
+		vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
+	} else
+		panic_vm(vcpu, "Mis-set for exit reason!\n");
+}
+
+static void set_sal_call_data(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+	p->u.sal_data.in0 = vcpu_get_gr(vcpu, 32);
+	p->u.sal_data.in1 = vcpu_get_gr(vcpu, 33);
+	p->u.sal_data.in2 = vcpu_get_gr(vcpu, 34);
+	p->u.sal_data.in3 = vcpu_get_gr(vcpu, 35);
+	p->u.sal_data.in4 = vcpu_get_gr(vcpu, 36);
+	p->u.sal_data.in5 = vcpu_get_gr(vcpu, 37);
+	p->u.sal_data.in6 = vcpu_get_gr(vcpu, 38);
+	p->u.sal_data.in7 = vcpu_get_gr(vcpu, 39);
+	p->exit_reason = EXIT_REASON_SAL_CALL;
+}
+
+static void get_sal_call_result(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
+		vcpu_set_gr(vcpu, 8, p->u.sal_data.ret.r8, 0);
+		vcpu_set_gr(vcpu, 9, p->u.sal_data.ret.r9, 0);
+		vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
+		vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
+	} else
+		panic_vm(vcpu, "Mis-set for exit reason!\n");
+}
+
+void  kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
+		unsigned long isr, unsigned long iim)
+{
+	struct kvm_vcpu *v = current_vcpu;
+	long psr;
+
+	if (ia64_psr(regs)->cpl == 0) {
+		/* Allow hypercalls only when cpl = 0.  */
+		if (iim == DOMN_PAL_REQUEST) {
+			local_irq_save(psr);
+			set_pal_call_data(v);
+			vmm_transition(v);
+			get_pal_call_result(v);
+			vcpu_increment_iip(v);
+			local_irq_restore(psr);
+			return;
+		} else if (iim == DOMN_SAL_REQUEST) {
+			local_irq_save(psr);
+			set_sal_call_data(v);
+			vmm_transition(v);
+			get_sal_call_result(v);
+			vcpu_increment_iip(v);
+			local_irq_restore(psr);
+			return;
+		}
+	}
+	reflect_interruption(ifa, isr, iim, 11, regs);
+}
+
+void check_pending_irq(struct kvm_vcpu *vcpu)
+{
+	int  mask, h_pending, h_inservice;
+	u64 isr;
+	unsigned long  vpsr;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	h_pending = highest_pending_irq(vcpu);
+	if (h_pending == NULL_VECTOR) {
+		update_vhpi(vcpu, NULL_VECTOR);
+		return;
+	}
+	h_inservice = highest_inservice_irq(vcpu);
+
+	vpsr = VCPU(vcpu, vpsr);
+	mask = irq_masked(vcpu, h_pending, h_inservice);
+	if ((vpsr & IA64_PSR_I) && IRQ_NO_MASKED == mask) {
+		isr = vpsr & IA64_PSR_RI;
+		update_vhpi(vcpu, h_pending);
+		reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
+	} else if (mask == IRQ_MASKED_BY_INSVC) {
+		if (VCPU(vcpu, vhpi))
+			update_vhpi(vcpu, NULL_VECTOR);
+	} else {
+		/* masked by vpsr.i or vtpr.*/
+		update_vhpi(vcpu, h_pending);
+	}
+}
+
+static void generate_exirq(struct kvm_vcpu *vcpu)
+{
+	unsigned  vpsr;
+	uint64_t isr;
+
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	vpsr = VCPU(vcpu, vpsr);
+	isr = vpsr & IA64_PSR_RI;
+	if (!(vpsr & IA64_PSR_IC))
+		panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n");
+	reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
+}
+
+void vhpi_detection(struct kvm_vcpu *vcpu)
+{
+	uint64_t    threshold, vhpi;
+	union ia64_tpr       vtpr;
+	struct ia64_psr vpsr;
+
+	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+	vtpr.val = VCPU(vcpu, tpr);
+
+	threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic;
+	vhpi = VCPU(vcpu, vhpi);
+	if (vhpi > threshold) {
+		/* interrupt actived*/
+		generate_exirq(vcpu);
+	}
+}
+
+void leave_hypervisor_tail(void)
+{
+	struct kvm_vcpu *v = current_vcpu;
+
+	if (VMX(v, timer_check)) {
+		VMX(v, timer_check) = 0;
+		if (VMX(v, itc_check)) {
+			if (vcpu_get_itc(v) > VCPU(v, itm)) {
+				if (!(VCPU(v, itv) & (1 << 16))) {
+					vcpu_pend_interrupt(v, VCPU(v, itv)
+							& 0xff);
+					VMX(v, itc_check) = 0;
+				} else {
+					v->arch.timer_pending = 1;
+				}
+				VMX(v, last_itc) = VCPU(v, itm) + 1;
+			}
+		}
+	}
+
+	rmb();
+	if (v->arch.irq_new_pending) {
+		v->arch.irq_new_pending = 0;
+		VMX(v, irq_check) = 0;
+		check_pending_irq(v);
+		return;
+	}
+	if (VMX(v, irq_check)) {
+		VMX(v, irq_check) = 0;
+		vhpi_detection(v);
+	}
+}
+
+static inline void handle_lds(struct kvm_pt_regs *regs)
+{
+	regs->cr_ipsr |= IA64_PSR_ED;
+}
+
+void physical_tlb_miss(struct kvm_vcpu *vcpu, unsigned long vadr, int type)
+{
+	unsigned long pte;
+	union ia64_rr rr;
+
+	rr.val = ia64_get_rr(vadr);
+	pte =  vadr & _PAGE_PPN_MASK;
+	pte = pte | PHY_PAGE_WB;
+	thash_vhpt_insert(vcpu, pte, (u64)(rr.ps << 2), vadr, type);
+	return;
+}
+
+void kvm_page_fault(u64 vadr , u64 vec, struct kvm_pt_regs *regs)
+{
+	unsigned long vpsr;
+	int type;
+
+	u64 vhpt_adr, gppa, pteval, rr, itir;
+	union ia64_isr misr;
+	union ia64_pta vpta;
+	struct thash_data *data;
+	struct kvm_vcpu *v = current_vcpu;
+
+	vpsr = VCPU(v, vpsr);
+	misr.val = VMX(v, cr_isr);
+
+	type = vec;
+
+	if (is_physical_mode(v) && (!(vadr << 1 >> 62))) {
+		if (vec == 2) {
+			if (__gpfn_is_io((vadr << 1) >> (PAGE_SHIFT + 1))) {
+				emulate_io_inst(v, ((vadr << 1) >> 1), 4);
+				return;
+			}
+		}
+		physical_tlb_miss(v, vadr, type);
+		return;
+	}
+	data = vtlb_lookup(v, vadr, type);
+	if (data != 0) {
+		if (type == D_TLB) {
+			gppa = (vadr & ((1UL << data->ps) - 1))
+				+ (data->ppn >> (data->ps - 12) << data->ps);
+			if (__gpfn_is_io(gppa >> PAGE_SHIFT)) {
+				if (data->pl >= ((regs->cr_ipsr >>
+						IA64_PSR_CPL0_BIT) & 3))
+					emulate_io_inst(v, gppa, data->ma);
+				else {
+					vcpu_set_isr(v, misr.val);
+					data_access_rights(v, vadr);
+				}
+				return ;
+			}
+		}
+		thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type);
+
+	} else if (type == D_TLB) {
+		if (misr.sp) {
+			handle_lds(regs);
+			return;
+		}
+
+		rr = vcpu_get_rr(v, vadr);
+		itir = rr & (RR_RID_MASK | RR_PS_MASK);
+
+		if (!vhpt_enabled(v, vadr, misr.rs ? RSE_REF : DATA_REF)) {
+			if (vpsr & IA64_PSR_IC) {
+				vcpu_set_isr(v, misr.val);
+				alt_dtlb(v, vadr);
+			} else {
+				nested_dtlb(v);
+			}
+			return ;
+		}
+
+		vpta.val = vcpu_get_pta(v);
+		/* avoid recursively walking (short format) VHPT */
+
+		vhpt_adr = vcpu_thash(v, vadr);
+		if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
+			/* VHPT successfully read.  */
+			if (!(pteval & _PAGE_P)) {
+				if (vpsr & IA64_PSR_IC) {
+					vcpu_set_isr(v, misr.val);
+					dtlb_fault(v, vadr);
+				} else {
+					nested_dtlb(v);
+				}
+			} else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) {
+				thash_purge_and_insert(v, pteval, itir,
+								vadr, D_TLB);
+			} else if (vpsr & IA64_PSR_IC) {
+				vcpu_set_isr(v, misr.val);
+				dtlb_fault(v, vadr);
+			} else {
+				nested_dtlb(v);
+			}
+		} else {
+			/* Can't read VHPT.  */
+			if (vpsr & IA64_PSR_IC) {
+				vcpu_set_isr(v, misr.val);
+				dvhpt_fault(v, vadr);
+			} else {
+				nested_dtlb(v);
+			}
+		}
+	} else if (type == I_TLB) {
+		if (!(vpsr & IA64_PSR_IC))
+			misr.ni = 1;
+		if (!vhpt_enabled(v, vadr, INST_REF)) {
+			vcpu_set_isr(v, misr.val);
+			alt_itlb(v, vadr);
+			return;
+		}
+
+		vpta.val = vcpu_get_pta(v);
+
+		vhpt_adr = vcpu_thash(v, vadr);
+		if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
+			/* VHPT successfully read.  */
+			if (pteval & _PAGE_P) {
+				if ((pteval & _PAGE_MA_MASK) == _PAGE_MA_ST) {
+					vcpu_set_isr(v, misr.val);
+					itlb_fault(v, vadr);
+					return ;
+				}
+				rr = vcpu_get_rr(v, vadr);
+				itir = rr & (RR_RID_MASK | RR_PS_MASK);
+				thash_purge_and_insert(v, pteval, itir,
+							vadr, I_TLB);
+			} else {
+				vcpu_set_isr(v, misr.val);
+				inst_page_not_present(v, vadr);
+			}
+		} else {
+			vcpu_set_isr(v, misr.val);
+			ivhpt_fault(v, vadr);
+		}
+	}
+}
+
+void kvm_vexirq(struct kvm_vcpu *vcpu)
+{
+	u64 vpsr, isr;
+	struct kvm_pt_regs *regs;
+
+	regs = vcpu_regs(vcpu);
+	vpsr = VCPU(vcpu, vpsr);
+	isr = vpsr & IA64_PSR_RI;
+	reflect_interruption(0, isr, 0, 12, regs); /*EXT IRQ*/
+}
+
+void kvm_ia64_handle_irq(struct kvm_vcpu *v)
+{
+	struct exit_ctl_data *p = &v->arch.exit_data;
+	long psr;
+
+	local_irq_save(psr);
+	p->exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
+	vmm_transition(v);
+	local_irq_restore(psr);
+
+	VMX(v, timer_check) = 1;
+
+}
+
+static void ptc_ga_remote_func(struct kvm_vcpu *v, int pos)
+{
+	u64 oldrid, moldrid, oldpsbits, vaddr;
+	struct kvm_ptc_g *p = &v->arch.ptc_g_data[pos];
+	vaddr = p->vaddr;
+
+	oldrid = VMX(v, vrr[0]);
+	VMX(v, vrr[0]) = p->rr;
+	oldpsbits = VMX(v, psbits[0]);
+	VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vaddr)]);
+	moldrid = ia64_get_rr(0x0);
+	ia64_set_rr(0x0, vrrtomrr(p->rr));
+	ia64_srlz_d();
+
+	vaddr = PAGEALIGN(vaddr, p->ps);
+	thash_purge_entries_remote(v, vaddr, p->ps);
+
+	VMX(v, vrr[0]) = oldrid;
+	VMX(v, psbits[0]) = oldpsbits;
+	ia64_set_rr(0x0, moldrid);
+	ia64_dv_serialize_data();
+}
+
+static void vcpu_do_resume(struct kvm_vcpu *vcpu)
+{
+	/*Re-init VHPT and VTLB once from resume*/
+	vcpu->arch.vhpt.num = VHPT_NUM_ENTRIES;
+	thash_init(&vcpu->arch.vhpt, VHPT_SHIFT);
+	vcpu->arch.vtlb.num = VTLB_NUM_ENTRIES;
+	thash_init(&vcpu->arch.vtlb, VTLB_SHIFT);
+
+	ia64_set_pta(vcpu->arch.vhpt.pta.val);
+}
+
+static void vmm_sanity_check(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+	if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) {
+		panic_vm(vcpu, "Failed to do vmm sanity check,"
+			"it maybe caused by crashed vmm!!\n\n");
+	}
+}
+
+static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
+{
+	vmm_sanity_check(vcpu); /*Guarantee vcpu running on healthy vmm!*/
+
+	if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
+		vcpu_do_resume(vcpu);
+		return;
+	}
+
+	if (unlikely(test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))) {
+		thash_purge_all(vcpu);
+		return;
+	}
+
+	if (test_and_clear_bit(KVM_REQ_PTC_G, &vcpu->requests)) {
+		while (vcpu->arch.ptc_g_count > 0)
+			ptc_ga_remote_func(vcpu, --vcpu->arch.ptc_g_count);
+	}
+}
+
+void vmm_transition(struct kvm_vcpu *vcpu)
+{
+	ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd,
+			1, 0, 0, 0, 0, 0);
+	vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host);
+	ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd,
+						1, 0, 0, 0, 0, 0);
+	kvm_do_resume_op(vcpu);
+}
+
+void vmm_panic_handler(u64 vec)
+{
+	struct kvm_vcpu *vcpu = current_vcpu;
+	vmm_sanity = 0;
+	panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n",
+			vec2off[vec]);
+}
diff --git a/arch/ia64/kvm/trampoline.S b/arch/ia64/kvm/trampoline.S
new file mode 100644
index 00000000..30897d44
--- /dev/null
+++ b/arch/ia64/kvm/trampoline.S
@@ -0,0 +1,1038 @@
+/* Save all processor states
+ *
+ * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com>
+ * Copyright (c) 2007 Anthony Xu   <anthony.xu@intel.com>
+ */
+
+#include <asm/asmmacro.h>
+#include "asm-offsets.h"
+
+
+#define CTX(name)    VMM_CTX_##name##_OFFSET
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_BRANCH_REGS			\
+	add	r2 = CTX(B0),r32;		\
+	add	r3 = CTX(B1),r32;		\
+	mov	r16 = b0;			\
+	mov	r17 = b1;			\
+	;;					\
+	st8	[r2]=r16,16;			\
+	st8	[r3]=r17,16;			\
+	;;					\
+	mov	r16 = b2;			\
+	mov	r17 = b3;			\
+	;;					\
+	st8	[r2]=r16,16;			\
+	st8	[r3]=r17,16;			\
+	;;					\
+	mov	r16 = b4;			\
+	mov	r17 = b5;			\
+	;;					\
+	st8	[r2]=r16;   			\
+	st8	[r3]=r17;   			\
+	;;
+
+	/*
+	 *	r33:		context_t base address
+	 */
+#define	RESTORE_BRANCH_REGS			\
+	add	r2 = CTX(B0),r33;		\
+	add	r3 = CTX(B1),r33;		\
+	;;					\
+	ld8	r16=[r2],16;			\
+	ld8	r17=[r3],16;			\
+	;;					\
+	mov	b0 = r16;			\
+	mov	b1 = r17;			\
+	;;					\
+	ld8	r16=[r2],16;			\
+	ld8	r17=[r3],16;			\
+	;;					\
+	mov	b2 = r16;			\
+	mov	b3 = r17;			\
+	;;					\
+	ld8	r16=[r2];   			\
+	ld8	r17=[r3];   			\
+	;;					\
+	mov	b4=r16;				\
+	mov	b5=r17;				\
+	;;
+
+
+	/*
+	 *	r32: context_t base address
+	 *	bsw == 1
+	 *	Save all bank1 general registers, r4 ~ r7
+	 */
+#define	SAVE_GENERAL_REGS			\
+	add	r2=CTX(R4),r32;			\
+	add	r3=CTX(R5),r32;			\
+	;;					\
+.mem.offset 0,0;        			\
+	st8.spill	[r2]=r4,16;		\
+.mem.offset 8,0;        			\
+	st8.spill	[r3]=r5,16;		\
+	;;					\
+.mem.offset 0,0;        			\
+	st8.spill	[r2]=r6,48;		\
+.mem.offset 8,0;        			\
+	st8.spill	[r3]=r7,48;		\
+	;;                          		\
+.mem.offset 0,0;        			\
+    st8.spill    [r2]=r12;			\
+.mem.offset 8,0;				\
+    st8.spill    [r3]=r13;			\
+    ;;
+
+	/*
+	 *	r33: context_t base address
+	 *	bsw == 1
+	 */
+#define	RESTORE_GENERAL_REGS			\
+	add	r2=CTX(R4),r33;			\
+	add	r3=CTX(R5),r33;			\
+	;;					\
+	ld8.fill	r4=[r2],16;		\
+	ld8.fill	r5=[r3],16;		\
+	;;					\
+	ld8.fill	r6=[r2],48;		\
+	ld8.fill	r7=[r3],48;		\
+	;;					\
+	ld8.fill    r12=[r2];			\
+	ld8.fill    r13 =[r3];			\
+	;;
+
+
+
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_KERNEL_REGS			\
+	add	r2 = CTX(KR0),r32;		\
+	add	r3 = CTX(KR1),r32;		\
+	mov	r16 = ar.k0;			\
+	mov	r17 = ar.k1;			\
+	;;		        		\
+	st8	[r2] = r16,16;			\
+	st8	[r3] = r17,16;			\
+	;;		        		\
+	mov	r16 = ar.k2;			\
+	mov	r17 = ar.k3;			\
+	;;		        		\
+	st8	[r2] = r16,16;			\
+	st8	[r3] = r17,16;			\
+	;;					\
+	mov	r16 = ar.k4;			\
+	mov	r17 = ar.k5;			\
+	;;				    	\
+	st8	[r2] = r16,16;			\
+	st8	[r3] = r17,16;			\
+	;;					\
+	mov	r16 = ar.k6;			\
+	mov	r17 = ar.k7;			\
+	;;		    			\
+	st8	[r2] = r16;     		\
+	st8	[r3] = r17;			\
+	;;
+
+
+
+	/*
+	 *	r33:		context_t base address
+	 */
+#define	RESTORE_KERNEL_REGS			\
+	add	r2 = CTX(KR0),r33;		\
+	add	r3 = CTX(KR1),r33;		\
+	;;		    			\
+	ld8	r16=[r2],16;     		\
+	ld8	r17=[r3],16;			\
+	;;					\
+	mov	ar.k0=r16;  			\
+	mov	ar.k1=r17;	    		\
+	;;		        		\
+	ld8	r16=[r2],16;			\
+	ld8	r17=[r3],16;			\
+	;;		        		\
+	mov	ar.k2=r16;   			\
+	mov	ar.k3=r17;	    		\
+	;;		        		\
+	ld8	r16=[r2],16;			\
+	ld8	r17=[r3],16;			\
+	;;					\
+	mov	ar.k4=r16;			\
+	mov	ar.k5=r17;	    		\
+	;;				    	\
+	ld8	r16=[r2],16;			\
+	ld8	r17=[r3],16;			\
+	;;					\
+	mov	ar.k6=r16;  			\
+	mov	ar.k7=r17;	    		\
+	;;
+
+
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_APP_REGS				\
+	add  r2 = CTX(BSPSTORE),r32;		\
+	mov  r16 = ar.bspstore;			\
+	;;					\
+	st8  [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\
+	mov  r16 = ar.rnat;			\
+	;;					\
+	st8  [r2] = r16,CTX(FCR)-CTX(RNAT);	\
+	mov  r16 = ar.fcr;			\
+	;;					\
+	st8  [r2] = r16,CTX(EFLAG)-CTX(FCR);	\
+	mov  r16 = ar.eflag;			\
+	;;					\
+	st8  [r2] = r16,CTX(CFLG)-CTX(EFLAG);	\
+	mov  r16 = ar.cflg;			\
+	;;					\
+	st8  [r2] = r16,CTX(FSR)-CTX(CFLG);	\
+	mov  r16 = ar.fsr;			\
+	;;					\
+	st8  [r2] = r16,CTX(FIR)-CTX(FSR);	\
+	mov  r16 = ar.fir;			\
+	;;					\
+	st8  [r2] = r16,CTX(FDR)-CTX(FIR);	\
+	mov  r16 = ar.fdr;			\
+	;;					\
+	st8  [r2] = r16,CTX(UNAT)-CTX(FDR);	\
+	mov  r16 = ar.unat;			\
+	;;					\
+	st8  [r2] = r16,CTX(FPSR)-CTX(UNAT);	\
+	mov  r16 = ar.fpsr;			\
+	;;					\
+	st8  [r2] = r16,CTX(PFS)-CTX(FPSR);	\
+	mov  r16 = ar.pfs;			\
+	;;					\
+	st8  [r2] = r16,CTX(LC)-CTX(PFS);	\
+	mov  r16 = ar.lc;			\
+	;;					\
+	st8  [r2] = r16;			\
+	;;
+
+	/*
+	 *	r33:		context_t base address
+	 */
+#define	RESTORE_APP_REGS			\
+	add  r2=CTX(BSPSTORE),r33;		\
+	;;					\
+	ld8  r16=[r2],CTX(RNAT)-CTX(BSPSTORE);	\
+	;;					\
+	mov  ar.bspstore=r16;			\
+	ld8  r16=[r2],CTX(FCR)-CTX(RNAT);	\
+	;;					\
+	mov  ar.rnat=r16;			\
+	ld8  r16=[r2],CTX(EFLAG)-CTX(FCR);	\
+	;;					\
+	mov  ar.fcr=r16;			\
+	ld8  r16=[r2],CTX(CFLG)-CTX(EFLAG);	\
+	;;					\
+	mov  ar.eflag=r16;			\
+	ld8  r16=[r2],CTX(FSR)-CTX(CFLG);	\
+	;;					\
+	mov  ar.cflg=r16;			\
+	ld8  r16=[r2],CTX(FIR)-CTX(FSR);	\
+	;;					\
+	mov  ar.fsr=r16;			\
+	ld8  r16=[r2],CTX(FDR)-CTX(FIR);	\
+	;;					\
+	mov  ar.fir=r16;			\
+	ld8  r16=[r2],CTX(UNAT)-CTX(FDR);	\
+	;;					\
+	mov  ar.fdr=r16;			\
+	ld8  r16=[r2],CTX(FPSR)-CTX(UNAT);	\
+	;;					\
+	mov  ar.unat=r16;			\
+	ld8  r16=[r2],CTX(PFS)-CTX(FPSR);	\
+	;;					\
+	mov  ar.fpsr=r16;			\
+	ld8  r16=[r2],CTX(LC)-CTX(PFS);		\
+	;;					\
+	mov  ar.pfs=r16;			\
+	ld8  r16=[r2];				\
+	;;					\
+	mov  ar.lc=r16;				\
+	;;
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_CTL_REGS				\
+	add	r2 = CTX(DCR),r32;		\
+	mov	r16 = cr.dcr;			\
+	;;					\
+	st8	[r2] = r16,CTX(IVA)-CTX(DCR);	\
+	;;                          		\
+	mov	r16 = cr.iva;			\
+	;;					\
+	st8	[r2] = r16,CTX(PTA)-CTX(IVA);	\
+	;;					\
+	mov r16 = cr.pta;			\
+	;;					\
+	st8 [r2] = r16 ;			\
+	;;
+
+	/*
+	 *	r33:		context_t base address
+	 */
+#define	RESTORE_CTL_REGS				\
+	add	r2 = CTX(DCR),r33;	        	\
+	;;						\
+	ld8	r16 = [r2],CTX(IVA)-CTX(DCR);		\
+	;;                      			\
+	mov	cr.dcr = r16;				\
+	dv_serialize_data;				\
+	;;						\
+	ld8	r16 = [r2],CTX(PTA)-CTX(IVA);		\
+	;;						\
+	mov	cr.iva = r16;				\
+	dv_serialize_data;				\
+	;;						\
+	ld8 r16 = [r2];					\
+	;;						\
+	mov cr.pta = r16;				\
+	dv_serialize_data;				\
+	;;
+
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_REGION_REGS			\
+	add	r2=CTX(RR0),r32;		\
+	mov	r16=rr[r0];			\
+	dep.z	r18=1,61,3;			\
+	;;					\
+	st8	[r2]=r16,8;			\
+	mov	r17=rr[r18];			\
+	dep.z	r18=2,61,3;			\
+	;;					\
+	st8	[r2]=r17,8;			\
+	mov	r16=rr[r18];			\
+	dep.z	r18=3,61,3;			\
+	;;					\
+	st8	[r2]=r16,8;			\
+	mov	r17=rr[r18];			\
+	dep.z	r18=4,61,3;			\
+	;;					\
+	st8	[r2]=r17,8;			\
+	mov	r16=rr[r18];			\
+	dep.z	r18=5,61,3;			\
+	;;					\
+	st8	[r2]=r16,8;			\
+	mov	r17=rr[r18];			\
+	dep.z	r18=7,61,3;			\
+	;;					\
+	st8	[r2]=r17,16;			\
+	mov	r16=rr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;			\
+	;;
+
+	/*
+	 *	r33:context_t base address
+	 */
+#define	RESTORE_REGION_REGS	\
+	add	r2=CTX(RR0),r33;\
+	mov r18=r0;		\
+	;;			\
+	ld8	r20=[r2],8;	\
+	;;	/* rr0 */	\
+	ld8	r21=[r2],8;	\
+	;;	/* rr1 */	\
+	ld8	r22=[r2],8;	\
+	;;	/* rr2 */	\
+	ld8	r23=[r2],8;	\
+	;;	/* rr3 */	\
+	ld8	r24=[r2],8;	\
+	;;	/* rr4 */	\
+	ld8	r25=[r2],16;	\
+	;;	/* rr5 */	\
+	ld8	r27=[r2];	\
+	;;	/* rr7 */	\
+	mov rr[r18]=r20;	\
+	dep.z	r18=1,61,3;	\
+	;;  /* rr1 */		\
+	mov rr[r18]=r21;	\
+	dep.z	r18=2,61,3;	\
+	;;  /* rr2 */		\
+	mov rr[r18]=r22;	\
+	dep.z	r18=3,61,3;	\
+	;;  /* rr3 */		\
+	mov rr[r18]=r23;	\
+	dep.z	r18=4,61,3;	\
+	;;  /* rr4 */		\
+	mov rr[r18]=r24;	\
+	dep.z	r18=5,61,3;	\
+	;;  /* rr5 */		\
+	mov rr[r18]=r25;	\
+	dep.z	r18=7,61,3;	\
+	;;  /* rr7 */		\
+	mov rr[r18]=r27;	\
+	;;			\
+	srlz.i;			\
+	;;
+
+
+
+	/*
+	 *	r32:	context_t base address
+	 *	r36~r39:scratch registers
+	 */
+#define	SAVE_DEBUG_REGS				\
+	add	r2=CTX(IBR0),r32;		\
+	add	r3=CTX(DBR0),r32;		\
+	mov	r16=ibr[r0];			\
+	mov	r17=dbr[r0];			\
+	;;					\
+	st8	[r2]=r16,8; 			\
+	st8	[r3]=r17,8;	    		\
+	add	r18=1,r0;		    	\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=2,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=2,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=3,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=4,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=5,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=6,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=7,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	;;
+
+
+/*
+ *      r33:    point to context_t structure
+ *      ar.lc are corrupted.
+ */
+#define RESTORE_DEBUG_REGS			\
+	add	r2=CTX(IBR0),r33;		\
+	add	r3=CTX(DBR0),r33;		\
+	mov r16=7;    				\
+	mov r17=r0;				\
+	;;                    			\
+	mov ar.lc = r16;			\
+	;; 					\
+1:						\
+	ld8 r18=[r2],8;		    		\
+	ld8 r19=[r3],8;				\
+	;;					\
+	mov ibr[r17]=r18;			\
+	mov dbr[r17]=r19;			\
+	;;   					\
+	srlz.i;					\
+	;; 					\
+	add r17=1,r17;				\
+	br.cloop.sptk 1b;			\
+	;;
+
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_FPU_LOW				\
+	add	r2=CTX(F2),r32;			\
+	add	r3=CTX(F3),r32;			\
+	;;					\
+	stf.spill.nta	[r2]=f2,32;		\
+	stf.spill.nta	[r3]=f3,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f4,32;		\
+	stf.spill.nta	[r3]=f5,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f6,32;		\
+	stf.spill.nta	[r3]=f7,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f8,32;		\
+	stf.spill.nta	[r3]=f9,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f10,32;		\
+	stf.spill.nta	[r3]=f11,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f12,32;		\
+	stf.spill.nta	[r3]=f13,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f14,32;		\
+	stf.spill.nta	[r3]=f15,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f16,32;		\
+	stf.spill.nta	[r3]=f17,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f18,32;		\
+	stf.spill.nta	[r3]=f19,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f20,32;		\
+	stf.spill.nta	[r3]=f21,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f22,32;		\
+	stf.spill.nta	[r3]=f23,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f24,32;		\
+	stf.spill.nta	[r3]=f25,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f26,32;		\
+	stf.spill.nta	[r3]=f27,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f28,32;		\
+	stf.spill.nta	[r3]=f29,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f30;		\
+	stf.spill.nta	[r3]=f31;		\
+	;;
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_FPU_HIGH				\
+	add	r2=CTX(F32),r32;		\
+	add	r3=CTX(F33),r32;		\
+	;;					\
+	stf.spill.nta	[r2]=f32,32;		\
+	stf.spill.nta	[r3]=f33,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f34,32;		\
+	stf.spill.nta	[r3]=f35,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f36,32;		\
+	stf.spill.nta	[r3]=f37,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f38,32;		\
+	stf.spill.nta	[r3]=f39,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f40,32;		\
+	stf.spill.nta	[r3]=f41,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f42,32;		\
+	stf.spill.nta	[r3]=f43,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f44,32;		\
+	stf.spill.nta	[r3]=f45,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f46,32;		\
+	stf.spill.nta	[r3]=f47,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f48,32;		\
+	stf.spill.nta	[r3]=f49,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f50,32;		\
+	stf.spill.nta	[r3]=f51,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f52,32;		\
+	stf.spill.nta	[r3]=f53,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f54,32;		\
+	stf.spill.nta	[r3]=f55,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f56,32;		\
+	stf.spill.nta	[r3]=f57,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f58,32;		\
+	stf.spill.nta	[r3]=f59,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f60,32;		\
+	stf.spill.nta	[r3]=f61,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f62,32;		\
+	stf.spill.nta	[r3]=f63,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f64,32;		\
+	stf.spill.nta	[r3]=f65,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f66,32;		\
+	stf.spill.nta	[r3]=f67,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f68,32;		\
+	stf.spill.nta	[r3]=f69,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f70,32;		\
+	stf.spill.nta	[r3]=f71,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f72,32;		\
+	stf.spill.nta	[r3]=f73,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f74,32;		\
+	stf.spill.nta	[r3]=f75,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f76,32;		\
+	stf.spill.nta	[r3]=f77,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f78,32;		\
+	stf.spill.nta	[r3]=f79,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f80,32;		\
+	stf.spill.nta	[r3]=f81,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f82,32;		\
+	stf.spill.nta	[r3]=f83,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f84,32;		\
+	stf.spill.nta	[r3]=f85,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f86,32;		\
+	stf.spill.nta	[r3]=f87,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f88,32;		\
+	stf.spill.nta	[r3]=f89,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f90,32;		\
+	stf.spill.nta	[r3]=f91,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f92,32;		\
+	stf.spill.nta	[r3]=f93,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f94,32;		\
+	stf.spill.nta	[r3]=f95,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f96,32;		\
+	stf.spill.nta	[r3]=f97,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f98,32;		\
+	stf.spill.nta	[r3]=f99,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f100,32;		\
+	stf.spill.nta	[r3]=f101,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f102,32;		\
+	stf.spill.nta	[r3]=f103,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f104,32;		\
+	stf.spill.nta	[r3]=f105,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f106,32;		\
+	stf.spill.nta	[r3]=f107,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f108,32;		\
+	stf.spill.nta	[r3]=f109,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f110,32;		\
+	stf.spill.nta	[r3]=f111,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f112,32;		\
+	stf.spill.nta	[r3]=f113,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f114,32;		\
+	stf.spill.nta	[r3]=f115,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f116,32;		\
+	stf.spill.nta	[r3]=f117,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f118,32;		\
+	stf.spill.nta	[r3]=f119,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f120,32;		\
+	stf.spill.nta	[r3]=f121,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f122,32;		\
+	stf.spill.nta	[r3]=f123,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f124,32;		\
+	stf.spill.nta	[r3]=f125,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f126;		\
+	stf.spill.nta	[r3]=f127;		\
+	;;
+
+     /*
+      *      r33:    point to context_t structure
+      */
+#define	RESTORE_FPU_LOW				\
+    add     r2 = CTX(F2), r33;			\
+    add     r3 = CTX(F3), r33;			\
+    ;;						\
+    ldf.fill.nta f2 = [r2], 32;			\
+    ldf.fill.nta f3 = [r3], 32;			\
+    ;;						\
+    ldf.fill.nta f4 = [r2], 32;			\
+    ldf.fill.nta f5 = [r3], 32;			\
+    ;;						\
+    ldf.fill.nta f6 = [r2], 32;			\
+    ldf.fill.nta f7 = [r3], 32;			\
+    ;;						\
+    ldf.fill.nta f8 = [r2], 32;			\
+    ldf.fill.nta f9 = [r3], 32;			\
+    ;;						\
+    ldf.fill.nta f10 = [r2], 32;		\
+    ldf.fill.nta f11 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f12 = [r2], 32;		\
+    ldf.fill.nta f13 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f14 = [r2], 32;		\
+    ldf.fill.nta f15 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f16 = [r2], 32;		\
+    ldf.fill.nta f17 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f18 = [r2], 32;		\
+    ldf.fill.nta f19 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f20 = [r2], 32;		\
+    ldf.fill.nta f21 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f22 = [r2], 32;		\
+    ldf.fill.nta f23 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f24 = [r2], 32;		\
+    ldf.fill.nta f25 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f26 = [r2], 32;		\
+    ldf.fill.nta f27 = [r3], 32;		\
+	;;					\
+    ldf.fill.nta f28 = [r2], 32;		\
+    ldf.fill.nta f29 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f30 = [r2], 32;		\
+    ldf.fill.nta f31 = [r3], 32;		\
+    ;;
+
+
+
+    /*
+     *      r33:    point to context_t structure
+     */
+#define	RESTORE_FPU_HIGH			\
+    add     r2 = CTX(F32), r33;			\
+    add     r3 = CTX(F33), r33;			\
+    ;;						\
+    ldf.fill.nta f32 = [r2], 32;		\
+    ldf.fill.nta f33 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f34 = [r2], 32;		\
+    ldf.fill.nta f35 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f36 = [r2], 32;		\
+    ldf.fill.nta f37 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f38 = [r2], 32;		\
+    ldf.fill.nta f39 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f40 = [r2], 32;		\
+    ldf.fill.nta f41 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f42 = [r2], 32;		\
+    ldf.fill.nta f43 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f44 = [r2], 32;		\
+    ldf.fill.nta f45 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f46 = [r2], 32;		\
+    ldf.fill.nta f47 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f48 = [r2], 32;		\
+    ldf.fill.nta f49 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f50 = [r2], 32;		\
+    ldf.fill.nta f51 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f52 = [r2], 32;		\
+    ldf.fill.nta f53 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f54 = [r2], 32;		\
+    ldf.fill.nta f55 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f56 = [r2], 32;		\
+    ldf.fill.nta f57 = [r3], 32;   		\
+    ;;						\
+    ldf.fill.nta f58 = [r2], 32;		\
+    ldf.fill.nta f59 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f60 = [r2], 32;		\
+    ldf.fill.nta f61 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f62 = [r2], 32;		\
+    ldf.fill.nta f63 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f64 = [r2], 32;		\
+    ldf.fill.nta f65 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f66 = [r2], 32;		\
+    ldf.fill.nta f67 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f68 = [r2], 32;		\
+    ldf.fill.nta f69 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f70 = [r2], 32;		\
+    ldf.fill.nta f71 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f72 = [r2], 32;		\
+    ldf.fill.nta f73 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f74 = [r2], 32;		\
+    ldf.fill.nta f75 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f76 = [r2], 32;		\
+    ldf.fill.nta f77 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f78 = [r2], 32;		\
+    ldf.fill.nta f79 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f80 = [r2], 32;		\
+    ldf.fill.nta f81 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f82 = [r2], 32;		\
+    ldf.fill.nta f83 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f84 = [r2], 32;		\
+    ldf.fill.nta f85 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f86 = [r2], 32;		\
+    ldf.fill.nta f87 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f88 = [r2], 32;		\
+    ldf.fill.nta f89 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f90 = [r2], 32;		\
+    ldf.fill.nta f91 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f92 = [r2], 32;		\
+    ldf.fill.nta f93 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f94 = [r2], 32;		\
+    ldf.fill.nta f95 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f96 = [r2], 32;		\
+    ldf.fill.nta f97 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f98 = [r2], 32;		\
+    ldf.fill.nta f99 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f100 = [r2], 32;		\
+    ldf.fill.nta f101 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f102 = [r2], 32;		\
+    ldf.fill.nta f103 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f104 = [r2], 32;		\
+    ldf.fill.nta f105 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f106 = [r2], 32;		\
+    ldf.fill.nta f107 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f108 = [r2], 32;		\
+    ldf.fill.nta f109 = [r3], 32;   		\
+    ;;						\
+    ldf.fill.nta f110 = [r2], 32;		\
+    ldf.fill.nta f111 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f112 = [r2], 32;		\
+    ldf.fill.nta f113 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f114 = [r2], 32;		\
+    ldf.fill.nta f115 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f116 = [r2], 32;		\
+    ldf.fill.nta f117 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f118 = [r2], 32;		\
+    ldf.fill.nta f119 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f120 = [r2], 32;		\
+    ldf.fill.nta f121 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f122 = [r2], 32;		\
+    ldf.fill.nta f123 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f124 = [r2], 32;		\
+    ldf.fill.nta f125 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f126 = [r2], 32;		\
+    ldf.fill.nta f127 = [r3], 32;		\
+    ;;
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_PTK_REGS				\
+    add r2=CTX(PKR0), r32;			\
+    mov r16=7;    				\
+    ;;                         			\
+    mov ar.lc=r16;  				\
+    mov r17=r0;					\
+    ;;						\
+1:						\
+    mov r18=pkr[r17];				\
+    ;;                     			\
+    srlz.i;					\
+    ;; 						\
+    st8 [r2]=r18, 8;				\
+    ;;    					\
+    add r17 =1,r17;				\
+    ;;                     			\
+    br.cloop.sptk 1b;				\
+    ;;
+
+/*
+ *      r33:    point to context_t structure
+ *      ar.lc are corrupted.
+ */
+#define RESTORE_PTK_REGS	    		\
+    add r2=CTX(PKR0), r33;			\
+    mov r16=7;    				\
+    ;;                         			\
+    mov ar.lc=r16;  				\
+    mov r17=r0;					\
+    ;;						\
+1: 						\
+    ld8 r18=[r2], 8;				\
+    ;;						\
+    mov pkr[r17]=r18;				\
+    ;;    					\
+    srlz.i;					\
+    ;; 						\
+    add r17 =1,r17;				\
+    ;;                     			\
+    br.cloop.sptk 1b;				\
+    ;;
+
+
+/*
+ * void vmm_trampoline( context_t * from,
+ *			context_t * to)
+ *
+ * 	from:	r32
+ *	to:	r33
+ *  note: interrupt disabled before call this function.
+ */
+GLOBAL_ENTRY(vmm_trampoline)
+    mov r16 = psr
+    adds r2 = CTX(PSR), r32
+    ;;
+    st8 [r2] = r16, 8       // psr
+    mov r17 = pr
+    ;;
+    st8 [r2] = r17, 8       // pr
+    mov r18 = ar.unat
+    ;;
+    st8 [r2] = r18
+    mov r17 = ar.rsc
+    ;;
+    adds r2 = CTX(RSC),r32
+    ;;
+    st8 [r2]= r17
+    mov ar.rsc =0
+    flushrs
+    ;;
+    SAVE_GENERAL_REGS
+    ;;
+    SAVE_KERNEL_REGS
+    ;;
+    SAVE_APP_REGS
+    ;;
+    SAVE_BRANCH_REGS
+    ;;
+    SAVE_CTL_REGS
+    ;;
+    SAVE_REGION_REGS
+    ;;
+    //SAVE_DEBUG_REGS
+    ;;
+    rsm  psr.dfl
+    ;;
+    srlz.d
+    ;;
+    SAVE_FPU_LOW
+    ;;
+    rsm  psr.dfh
+    ;;
+    srlz.d
+    ;;
+    SAVE_FPU_HIGH
+    ;;
+    SAVE_PTK_REGS
+    ;;
+    RESTORE_PTK_REGS
+    ;;
+    RESTORE_FPU_HIGH
+    ;;
+    RESTORE_FPU_LOW
+    ;;
+    //RESTORE_DEBUG_REGS
+    ;;
+    RESTORE_REGION_REGS
+    ;;
+    RESTORE_CTL_REGS
+    ;;
+    RESTORE_BRANCH_REGS
+    ;;
+    RESTORE_APP_REGS
+    ;;
+    RESTORE_KERNEL_REGS
+    ;;
+    RESTORE_GENERAL_REGS
+    ;;
+    adds r2=CTX(PSR), r33
+    ;;
+    ld8 r16=[r2], 8       // psr
+    ;;
+    mov psr.l=r16
+    ;;
+    srlz.d
+    ;;
+    ld8 r16=[r2], 8       // pr
+    ;;
+    mov pr =r16,-1
+    ld8 r16=[r2]       // unat
+    ;;
+    mov ar.unat=r16
+    ;;
+    adds r2=CTX(RSC),r33
+    ;;
+    ld8 r16 =[r2]
+    ;;
+    mov ar.rsc = r16
+    ;;
+    br.ret.sptk.few b0
+END(vmm_trampoline)
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
new file mode 100644
index 00000000..958815c9
--- /dev/null
+++ b/arch/ia64/kvm/vcpu.c
@@ -0,0 +1,2209 @@
+/*
+ * kvm_vcpu.c: handling all virtual cpu related thing.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *  Shaofan Li (Susue Li) <susie.li@intel.com>
+ *  Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ *  Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ *  Xiantao Zhang <xiantao.zhang@intel.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/types.h>
+
+#include <asm/processor.h>
+#include <asm/ia64regs.h>
+#include <asm/gcc_intrin.h>
+#include <asm/kregs.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+
+#include "asm-offsets.h"
+#include "vcpu.h"
+
+/*
+ * Special notes:
+ * - Index by it/dt/rt sequence
+ * - Only existing mode transitions are allowed in this table
+ * - RSE is placed at lazy mode when emulating guest partial mode
+ * - If gva happens to be rr0 and rr4, only allowed case is identity
+ *   mapping (gva=gpa), or panic! (How?)
+ */
+int mm_switch_table[8][8] = {
+	/*  2004/09/12(Kevin): Allow switch to self */
+	/*
+	 *  (it,dt,rt): (0,0,0) -> (1,1,1)
+	 *  This kind of transition usually occurs in the very early
+	 *  stage of Linux boot up procedure. Another case is in efi
+	 *  and pal calls. (see "arch/ia64/kernel/head.S")
+	 *
+	 *  (it,dt,rt): (0,0,0) -> (0,1,1)
+	 *  This kind of transition is found when OSYa exits efi boot
+	 *  service. Due to gva = gpa in this case (Same region),
+	 *  data access can be satisfied though itlb entry for physical
+	 *  emulation is hit.
+	 */
+	{SW_SELF, 0,  0,  SW_NOP, 0,  0,  0,  SW_P2V},
+	{0,  0,  0,  0,  0,  0,  0,  0},
+	{0,  0,  0,  0,  0,  0,  0,  0},
+	/*
+	 *  (it,dt,rt): (0,1,1) -> (1,1,1)
+	 *  This kind of transition is found in OSYa.
+	 *
+	 *  (it,dt,rt): (0,1,1) -> (0,0,0)
+	 *  This kind of transition is found in OSYa
+	 */
+	{SW_NOP, 0,  0,  SW_SELF, 0,  0,  0,  SW_P2V},
+	/* (1,0,0)->(1,1,1) */
+	{0,  0,  0,  0,  0,  0,  0,  SW_P2V},
+	/*
+	 *  (it,dt,rt): (1,0,1) -> (1,1,1)
+	 *  This kind of transition usually occurs when Linux returns
+	 *  from the low level TLB miss handlers.
+	 *  (see "arch/ia64/kernel/ivt.S")
+	 */
+	{0,  0,  0,  0,  0,  SW_SELF, 0,  SW_P2V},
+	{0,  0,  0,  0,  0,  0,  0,  0},
+	/*
+	 *  (it,dt,rt): (1,1,1) -> (1,0,1)
+	 *  This kind of transition usually occurs in Linux low level
+	 *  TLB miss handler. (see "arch/ia64/kernel/ivt.S")
+	 *
+	 *  (it,dt,rt): (1,1,1) -> (0,0,0)
+	 *  This kind of transition usually occurs in pal and efi calls,
+	 *  which requires running in physical mode.
+	 *  (see "arch/ia64/kernel/head.S")
+	 *  (1,1,1)->(1,0,0)
+	 */
+
+	{SW_V2P, 0,  0,  0,  SW_V2P, SW_V2P, 0,  SW_SELF},
+};
+
+void physical_mode_init(struct kvm_vcpu  *vcpu)
+{
+	vcpu->arch.mode_flags = GUEST_IN_PHY;
+}
+
+void switch_to_physical_rid(struct kvm_vcpu *vcpu)
+{
+	unsigned long psr;
+
+	/* Save original virtual mode rr[0] and rr[4] */
+	psr = ia64_clear_ic();
+	ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0);
+	ia64_srlz_d();
+	ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4);
+	ia64_srlz_d();
+
+	ia64_set_psr(psr);
+	return;
+}
+
+void switch_to_virtual_rid(struct kvm_vcpu *vcpu)
+{
+	unsigned long psr;
+
+	psr = ia64_clear_ic();
+	ia64_set_rr(VRN0 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0);
+	ia64_srlz_d();
+	ia64_set_rr(VRN4 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4);
+	ia64_srlz_d();
+	ia64_set_psr(psr);
+	return;
+}
+
+static int mm_switch_action(struct ia64_psr opsr, struct ia64_psr npsr)
+{
+	return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)];
+}
+
+void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
+					struct ia64_psr new_psr)
+{
+	int act;
+	act = mm_switch_action(old_psr, new_psr);
+	switch (act) {
+	case SW_V2P:
+		/*printk("V -> P mode transition: (0x%lx -> 0x%lx)\n",
+		old_psr.val, new_psr.val);*/
+		switch_to_physical_rid(vcpu);
+		/*
+		 * Set rse to enforced lazy, to prevent active rse
+		 *save/restor when guest physical mode.
+		 */
+		vcpu->arch.mode_flags |= GUEST_IN_PHY;
+		break;
+	case SW_P2V:
+		switch_to_virtual_rid(vcpu);
+		/*
+		 * recover old mode which is saved when entering
+		 * guest physical mode
+		 */
+		vcpu->arch.mode_flags &= ~GUEST_IN_PHY;
+		break;
+	case SW_SELF:
+		break;
+	case SW_NOP:
+		break;
+	default:
+		/* Sanity check */
+		break;
+	}
+	return;
+}
+
+/*
+ * In physical mode, insert tc/tr for region 0 and 4 uses
+ * RID[0] and RID[4] which is for physical mode emulation.
+ * However what those inserted tc/tr wants is rid for
+ * virtual mode. So original virtual rid needs to be restored
+ * before insert.
+ *
+ * Operations which required such switch include:
+ *  - insertions (itc.*, itr.*)
+ *  - purges (ptc.* and ptr.*)
+ *  - tpa
+ *  - tak
+ *  - thash?, ttag?
+ * All above needs actual virtual rid for destination entry.
+ */
+
+void check_mm_mode_switch(struct kvm_vcpu *vcpu,  struct ia64_psr old_psr,
+					struct ia64_psr new_psr)
+{
+
+	if ((old_psr.dt != new_psr.dt)
+			|| (old_psr.it != new_psr.it)
+			|| (old_psr.rt != new_psr.rt))
+		switch_mm_mode(vcpu, old_psr, new_psr);
+
+	return;
+}
+
+
+/*
+ * In physical mode, insert tc/tr for region 0 and 4 uses
+ * RID[0] and RID[4] which is for physical mode emulation.
+ * However what those inserted tc/tr wants is rid for
+ * virtual mode. So original virtual rid needs to be restored
+ * before insert.
+ *
+ * Operations which required such switch include:
+ *  - insertions (itc.*, itr.*)
+ *  - purges (ptc.* and ptr.*)
+ *  - tpa
+ *  - tak
+ *  - thash?, ttag?
+ * All above needs actual virtual rid for destination entry.
+ */
+
+void prepare_if_physical_mode(struct kvm_vcpu *vcpu)
+{
+	if (is_physical_mode(vcpu)) {
+		vcpu->arch.mode_flags |= GUEST_PHY_EMUL;
+		switch_to_virtual_rid(vcpu);
+	}
+	return;
+}
+
+/* Recover always follows prepare */
+void recover_if_physical_mode(struct kvm_vcpu *vcpu)
+{
+	if (is_physical_mode(vcpu))
+		switch_to_physical_rid(vcpu);
+	vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL;
+	return;
+}
+
+#define RPT(x)	((u16) &((struct kvm_pt_regs *)0)->x)
+
+static u16 gr_info[32] = {
+	0, 	/* r0 is read-only : WE SHOULD NEVER GET THIS */
+	RPT(r1), RPT(r2), RPT(r3),
+	RPT(r4), RPT(r5), RPT(r6), RPT(r7),
+	RPT(r8), RPT(r9), RPT(r10), RPT(r11),
+	RPT(r12), RPT(r13), RPT(r14), RPT(r15),
+	RPT(r16), RPT(r17), RPT(r18), RPT(r19),
+	RPT(r20), RPT(r21), RPT(r22), RPT(r23),
+	RPT(r24), RPT(r25), RPT(r26), RPT(r27),
+	RPT(r28), RPT(r29), RPT(r30), RPT(r31)
+};
+
+#define IA64_FIRST_STACKED_GR   32
+#define IA64_FIRST_ROTATING_FR  32
+
+static inline unsigned long
+rotate_reg(unsigned long sor, unsigned long rrb, unsigned long reg)
+{
+	reg += rrb;
+	if (reg >= sor)
+		reg -= sor;
+	return reg;
+}
+
+/*
+ * Return the (rotated) index for floating point register
+ * be in the REGNUM (REGNUM must range from 32-127,
+ * result is in the range from 0-95.
+ */
+static inline unsigned long fph_index(struct kvm_pt_regs *regs,
+						long regnum)
+{
+	unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
+	return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
+}
+
+/*
+ * The inverse of the above: given bspstore and the number of
+ * registers, calculate ar.bsp.
+ */
+static inline unsigned long *kvm_rse_skip_regs(unsigned long *addr,
+							long num_regs)
+{
+	long delta = ia64_rse_slot_num(addr) + num_regs;
+	int i = 0;
+
+	if (num_regs < 0)
+		delta -= 0x3e;
+	if (delta < 0) {
+		while (delta <= -0x3f) {
+			i--;
+			delta += 0x3f;
+		}
+	} else {
+		while (delta >= 0x3f) {
+			i++;
+			delta -= 0x3f;
+		}
+	}
+
+	return addr + num_regs + i;
+}
+
+static void get_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
+					unsigned long *val, int *nat)
+{
+	unsigned long *bsp, *addr, *rnat_addr, *bspstore;
+	unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
+	unsigned long nat_mask;
+	unsigned long old_rsc, new_rsc;
+	long sof = (regs->cr_ifs) & 0x7f;
+	long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
+	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
+	long ridx = r1 - 32;
+
+	if (ridx < sor)
+		ridx = rotate_reg(sor, rrb_gr, ridx);
+
+	old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
+	new_rsc = old_rsc&(~(0x3));
+	ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
+
+	bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
+	bsp = kbs + (regs->loadrs >> 19);
+
+	addr = kvm_rse_skip_regs(bsp, -sof + ridx);
+	nat_mask = 1UL << ia64_rse_slot_num(addr);
+	rnat_addr = ia64_rse_rnat_addr(addr);
+
+	if (addr >= bspstore) {
+		ia64_flushrs();
+		ia64_mf();
+		bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
+	}
+	*val = *addr;
+	if (nat) {
+		if (bspstore < rnat_addr)
+			*nat = (int)!!(ia64_getreg(_IA64_REG_AR_RNAT)
+							& nat_mask);
+		else
+			*nat = (int)!!((*rnat_addr) & nat_mask);
+		ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
+	}
+}
+
+void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
+				unsigned long val, unsigned long nat)
+{
+	unsigned long *bsp, *bspstore, *addr, *rnat_addr;
+	unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
+	unsigned long nat_mask;
+	unsigned long old_rsc, new_rsc, psr;
+	unsigned long rnat;
+	long sof = (regs->cr_ifs) & 0x7f;
+	long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
+	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
+	long ridx = r1 - 32;
+
+	if (ridx < sor)
+		ridx = rotate_reg(sor, rrb_gr, ridx);
+
+	old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
+	/* put RSC to lazy mode, and set loadrs 0 */
+	new_rsc = old_rsc & (~0x3fff0003);
+	ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
+	bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */
+
+	addr = kvm_rse_skip_regs(bsp, -sof + ridx);
+	nat_mask = 1UL << ia64_rse_slot_num(addr);
+	rnat_addr = ia64_rse_rnat_addr(addr);
+
+	local_irq_save(psr);
+	bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
+	if (addr >= bspstore) {
+
+		ia64_flushrs();
+		ia64_mf();
+		*addr = val;
+		bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
+		rnat = ia64_getreg(_IA64_REG_AR_RNAT);
+		if (bspstore < rnat_addr)
+			rnat = rnat & (~nat_mask);
+		else
+			*rnat_addr = (*rnat_addr)&(~nat_mask);
+
+		ia64_mf();
+		ia64_loadrs();
+		ia64_setreg(_IA64_REG_AR_RNAT, rnat);
+	} else {
+		rnat = ia64_getreg(_IA64_REG_AR_RNAT);
+		*addr = val;
+		if (bspstore < rnat_addr)
+			rnat = rnat&(~nat_mask);
+		else
+			*rnat_addr = (*rnat_addr) & (~nat_mask);
+
+		ia64_setreg(_IA64_REG_AR_BSPSTORE, (unsigned long)bspstore);
+		ia64_setreg(_IA64_REG_AR_RNAT, rnat);
+	}
+	local_irq_restore(psr);
+	ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
+}
+
+void getreg(unsigned long regnum, unsigned long *val,
+				int *nat, struct kvm_pt_regs *regs)
+{
+	unsigned long addr, *unat;
+	if (regnum >= IA64_FIRST_STACKED_GR) {
+		get_rse_reg(regs, regnum, val, nat);
+		return;
+	}
+
+	/*
+	 * Now look at registers in [0-31] range and init correct UNAT
+	 */
+	addr = (unsigned long)regs;
+	unat = &regs->eml_unat;
+
+	addr += gr_info[regnum];
+
+	*val  = *(unsigned long *)addr;
+	/*
+	 * do it only when requested
+	 */
+	if (nat)
+		*nat  = (*unat >> ((addr >> 3) & 0x3f)) & 0x1UL;
+}
+
+void setreg(unsigned long regnum, unsigned long val,
+			int nat, struct kvm_pt_regs *regs)
+{
+	unsigned long addr;
+	unsigned long bitmask;
+	unsigned long *unat;
+
+	/*
+	 * First takes care of stacked registers
+	 */
+	if (regnum >= IA64_FIRST_STACKED_GR) {
+		set_rse_reg(regs, regnum, val, nat);
+		return;
+	}
+
+	/*
+	 * Now look at registers in [0-31] range and init correct UNAT
+	 */
+	addr = (unsigned long)regs;
+	unat = &regs->eml_unat;
+	/*
+	 * add offset from base of struct
+	 * and do it !
+	 */
+	addr += gr_info[regnum];
+
+	*(unsigned long *)addr = val;
+
+	/*
+	 * We need to clear the corresponding UNAT bit to fully emulate the load
+	 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
+	 */
+	bitmask   = 1UL << ((addr >> 3) & 0x3f);
+	if (nat)
+		*unat |= bitmask;
+	 else
+		*unat &= ~bitmask;
+
+}
+
+u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	unsigned long val;
+
+	if (!reg)
+		return 0;
+	getreg(reg, &val, 0, regs);
+	return val;
+}
+
+void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg, u64 value, int nat)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	long sof = (regs->cr_ifs) & 0x7f;
+
+	if (!reg)
+		return;
+	if (reg >= sof + 32)
+		return;
+	setreg(reg, value, nat, regs);	/* FIXME: handle NATs later*/
+}
+
+void getfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
+				struct kvm_pt_regs *regs)
+{
+	/* Take floating register rotation into consideration*/
+	if (regnum >= IA64_FIRST_ROTATING_FR)
+		regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
+#define CASE_FIXED_FP(reg)			\
+	case  (reg) :				\
+		ia64_stf_spill(fpval, reg);	\
+	break
+
+	switch (regnum) {
+		CASE_FIXED_FP(0);
+		CASE_FIXED_FP(1);
+		CASE_FIXED_FP(2);
+		CASE_FIXED_FP(3);
+		CASE_FIXED_FP(4);
+		CASE_FIXED_FP(5);
+
+		CASE_FIXED_FP(6);
+		CASE_FIXED_FP(7);
+		CASE_FIXED_FP(8);
+		CASE_FIXED_FP(9);
+		CASE_FIXED_FP(10);
+		CASE_FIXED_FP(11);
+
+		CASE_FIXED_FP(12);
+		CASE_FIXED_FP(13);
+		CASE_FIXED_FP(14);
+		CASE_FIXED_FP(15);
+		CASE_FIXED_FP(16);
+		CASE_FIXED_FP(17);
+		CASE_FIXED_FP(18);
+		CASE_FIXED_FP(19);
+		CASE_FIXED_FP(20);
+		CASE_FIXED_FP(21);
+		CASE_FIXED_FP(22);
+		CASE_FIXED_FP(23);
+		CASE_FIXED_FP(24);
+		CASE_FIXED_FP(25);
+		CASE_FIXED_FP(26);
+		CASE_FIXED_FP(27);
+		CASE_FIXED_FP(28);
+		CASE_FIXED_FP(29);
+		CASE_FIXED_FP(30);
+		CASE_FIXED_FP(31);
+		CASE_FIXED_FP(32);
+		CASE_FIXED_FP(33);
+		CASE_FIXED_FP(34);
+		CASE_FIXED_FP(35);
+		CASE_FIXED_FP(36);
+		CASE_FIXED_FP(37);
+		CASE_FIXED_FP(38);
+		CASE_FIXED_FP(39);
+		CASE_FIXED_FP(40);
+		CASE_FIXED_FP(41);
+		CASE_FIXED_FP(42);
+		CASE_FIXED_FP(43);
+		CASE_FIXED_FP(44);
+		CASE_FIXED_FP(45);
+		CASE_FIXED_FP(46);
+		CASE_FIXED_FP(47);
+		CASE_FIXED_FP(48);
+		CASE_FIXED_FP(49);
+		CASE_FIXED_FP(50);
+		CASE_FIXED_FP(51);
+		CASE_FIXED_FP(52);
+		CASE_FIXED_FP(53);
+		CASE_FIXED_FP(54);
+		CASE_FIXED_FP(55);
+		CASE_FIXED_FP(56);
+		CASE_FIXED_FP(57);
+		CASE_FIXED_FP(58);
+		CASE_FIXED_FP(59);
+		CASE_FIXED_FP(60);
+		CASE_FIXED_FP(61);
+		CASE_FIXED_FP(62);
+		CASE_FIXED_FP(63);
+		CASE_FIXED_FP(64);
+		CASE_FIXED_FP(65);
+		CASE_FIXED_FP(66);
+		CASE_FIXED_FP(67);
+		CASE_FIXED_FP(68);
+		CASE_FIXED_FP(69);
+		CASE_FIXED_FP(70);
+		CASE_FIXED_FP(71);
+		CASE_FIXED_FP(72);
+		CASE_FIXED_FP(73);
+		CASE_FIXED_FP(74);
+		CASE_FIXED_FP(75);
+		CASE_FIXED_FP(76);
+		CASE_FIXED_FP(77);
+		CASE_FIXED_FP(78);
+		CASE_FIXED_FP(79);
+		CASE_FIXED_FP(80);
+		CASE_FIXED_FP(81);
+		CASE_FIXED_FP(82);
+		CASE_FIXED_FP(83);
+		CASE_FIXED_FP(84);
+		CASE_FIXED_FP(85);
+		CASE_FIXED_FP(86);
+		CASE_FIXED_FP(87);
+		CASE_FIXED_FP(88);
+		CASE_FIXED_FP(89);
+		CASE_FIXED_FP(90);
+		CASE_FIXED_FP(91);
+		CASE_FIXED_FP(92);
+		CASE_FIXED_FP(93);
+		CASE_FIXED_FP(94);
+		CASE_FIXED_FP(95);
+		CASE_FIXED_FP(96);
+		CASE_FIXED_FP(97);
+		CASE_FIXED_FP(98);
+		CASE_FIXED_FP(99);
+		CASE_FIXED_FP(100);
+		CASE_FIXED_FP(101);
+		CASE_FIXED_FP(102);
+		CASE_FIXED_FP(103);
+		CASE_FIXED_FP(104);
+		CASE_FIXED_FP(105);
+		CASE_FIXED_FP(106);
+		CASE_FIXED_FP(107);
+		CASE_FIXED_FP(108);
+		CASE_FIXED_FP(109);
+		CASE_FIXED_FP(110);
+		CASE_FIXED_FP(111);
+		CASE_FIXED_FP(112);
+		CASE_FIXED_FP(113);
+		CASE_FIXED_FP(114);
+		CASE_FIXED_FP(115);
+		CASE_FIXED_FP(116);
+		CASE_FIXED_FP(117);
+		CASE_FIXED_FP(118);
+		CASE_FIXED_FP(119);
+		CASE_FIXED_FP(120);
+		CASE_FIXED_FP(121);
+		CASE_FIXED_FP(122);
+		CASE_FIXED_FP(123);
+		CASE_FIXED_FP(124);
+		CASE_FIXED_FP(125);
+		CASE_FIXED_FP(126);
+		CASE_FIXED_FP(127);
+	}
+#undef CASE_FIXED_FP
+}
+
+void setfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
+					struct kvm_pt_regs *regs)
+{
+	/* Take floating register rotation into consideration*/
+	if (regnum >= IA64_FIRST_ROTATING_FR)
+		regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
+
+#define CASE_FIXED_FP(reg)			\
+	case (reg) :				\
+		ia64_ldf_fill(reg, fpval);	\
+	break
+
+	switch (regnum) {
+		CASE_FIXED_FP(2);
+		CASE_FIXED_FP(3);
+		CASE_FIXED_FP(4);
+		CASE_FIXED_FP(5);
+
+		CASE_FIXED_FP(6);
+		CASE_FIXED_FP(7);
+		CASE_FIXED_FP(8);
+		CASE_FIXED_FP(9);
+		CASE_FIXED_FP(10);
+		CASE_FIXED_FP(11);
+
+		CASE_FIXED_FP(12);
+		CASE_FIXED_FP(13);
+		CASE_FIXED_FP(14);
+		CASE_FIXED_FP(15);
+		CASE_FIXED_FP(16);
+		CASE_FIXED_FP(17);
+		CASE_FIXED_FP(18);
+		CASE_FIXED_FP(19);
+		CASE_FIXED_FP(20);
+		CASE_FIXED_FP(21);
+		CASE_FIXED_FP(22);
+		CASE_FIXED_FP(23);
+		CASE_FIXED_FP(24);
+		CASE_FIXED_FP(25);
+		CASE_FIXED_FP(26);
+		CASE_FIXED_FP(27);
+		CASE_FIXED_FP(28);
+		CASE_FIXED_FP(29);
+		CASE_FIXED_FP(30);
+		CASE_FIXED_FP(31);
+		CASE_FIXED_FP(32);
+		CASE_FIXED_FP(33);
+		CASE_FIXED_FP(34);
+		CASE_FIXED_FP(35);
+		CASE_FIXED_FP(36);
+		CASE_FIXED_FP(37);
+		CASE_FIXED_FP(38);
+		CASE_FIXED_FP(39);
+		CASE_FIXED_FP(40);
+		CASE_FIXED_FP(41);
+		CASE_FIXED_FP(42);
+		CASE_FIXED_FP(43);
+		CASE_FIXED_FP(44);
+		CASE_FIXED_FP(45);
+		CASE_FIXED_FP(46);
+		CASE_FIXED_FP(47);
+		CASE_FIXED_FP(48);
+		CASE_FIXED_FP(49);
+		CASE_FIXED_FP(50);
+		CASE_FIXED_FP(51);
+		CASE_FIXED_FP(52);
+		CASE_FIXED_FP(53);
+		CASE_FIXED_FP(54);
+		CASE_FIXED_FP(55);
+		CASE_FIXED_FP(56);
+		CASE_FIXED_FP(57);
+		CASE_FIXED_FP(58);
+		CASE_FIXED_FP(59);
+		CASE_FIXED_FP(60);
+		CASE_FIXED_FP(61);
+		CASE_FIXED_FP(62);
+		CASE_FIXED_FP(63);
+		CASE_FIXED_FP(64);
+		CASE_FIXED_FP(65);
+		CASE_FIXED_FP(66);
+		CASE_FIXED_FP(67);
+		CASE_FIXED_FP(68);
+		CASE_FIXED_FP(69);
+		CASE_FIXED_FP(70);
+		CASE_FIXED_FP(71);
+		CASE_FIXED_FP(72);
+		CASE_FIXED_FP(73);
+		CASE_FIXED_FP(74);
+		CASE_FIXED_FP(75);
+		CASE_FIXED_FP(76);
+		CASE_FIXED_FP(77);
+		CASE_FIXED_FP(78);
+		CASE_FIXED_FP(79);
+		CASE_FIXED_FP(80);
+		CASE_FIXED_FP(81);
+		CASE_FIXED_FP(82);
+		CASE_FIXED_FP(83);
+		CASE_FIXED_FP(84);
+		CASE_FIXED_FP(85);
+		CASE_FIXED_FP(86);
+		CASE_FIXED_FP(87);
+		CASE_FIXED_FP(88);
+		CASE_FIXED_FP(89);
+		CASE_FIXED_FP(90);
+		CASE_FIXED_FP(91);
+		CASE_FIXED_FP(92);
+		CASE_FIXED_FP(93);
+		CASE_FIXED_FP(94);
+		CASE_FIXED_FP(95);
+		CASE_FIXED_FP(96);
+		CASE_FIXED_FP(97);
+		CASE_FIXED_FP(98);
+		CASE_FIXED_FP(99);
+		CASE_FIXED_FP(100);
+		CASE_FIXED_FP(101);
+		CASE_FIXED_FP(102);
+		CASE_FIXED_FP(103);
+		CASE_FIXED_FP(104);
+		CASE_FIXED_FP(105);
+		CASE_FIXED_FP(106);
+		CASE_FIXED_FP(107);
+		CASE_FIXED_FP(108);
+		CASE_FIXED_FP(109);
+		CASE_FIXED_FP(110);
+		CASE_FIXED_FP(111);
+		CASE_FIXED_FP(112);
+		CASE_FIXED_FP(113);
+		CASE_FIXED_FP(114);
+		CASE_FIXED_FP(115);
+		CASE_FIXED_FP(116);
+		CASE_FIXED_FP(117);
+		CASE_FIXED_FP(118);
+		CASE_FIXED_FP(119);
+		CASE_FIXED_FP(120);
+		CASE_FIXED_FP(121);
+		CASE_FIXED_FP(122);
+		CASE_FIXED_FP(123);
+		CASE_FIXED_FP(124);
+		CASE_FIXED_FP(125);
+		CASE_FIXED_FP(126);
+		CASE_FIXED_FP(127);
+	}
+}
+
+void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
+						struct ia64_fpreg *val)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	getfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
+}
+
+void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
+						struct ia64_fpreg *val)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	if (reg > 1)
+		setfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
+}
+
+/*
+ * The Altix RTC is mapped specially here for the vmm module
+ */
+#define SN_RTC_BASE	(u64 *)(KVM_VMM_BASE+(1UL<<KVM_VMM_SHIFT))
+static long kvm_get_itc(struct kvm_vcpu *vcpu)
+{
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+	struct kvm *kvm = (struct kvm *)KVM_VM_BASE;
+
+	if (kvm->arch.is_sn2)
+		return (*SN_RTC_BASE);
+	else
+#endif
+		return ia64_getreg(_IA64_REG_AR_ITC);
+}
+
+/************************************************************************
+ * lsapic timer
+ ***********************************************************************/
+u64 vcpu_get_itc(struct kvm_vcpu *vcpu)
+{
+	unsigned long guest_itc;
+	guest_itc = VMX(vcpu, itc_offset) + kvm_get_itc(vcpu);
+
+	if (guest_itc >= VMX(vcpu, last_itc)) {
+		VMX(vcpu, last_itc) = guest_itc;
+		return  guest_itc;
+	} else
+		return VMX(vcpu, last_itc);
+}
+
+static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val);
+static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
+{
+	struct kvm_vcpu *v;
+	struct kvm *kvm;
+	int i;
+	long itc_offset = val - kvm_get_itc(vcpu);
+	unsigned long vitv = VCPU(vcpu, itv);
+
+	kvm = (struct kvm *)KVM_VM_BASE;
+
+	if (kvm_vcpu_is_bsp(vcpu)) {
+		for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) {
+			v = (struct kvm_vcpu *)((char *)vcpu +
+					sizeof(struct kvm_vcpu_data) * i);
+			VMX(v, itc_offset) = itc_offset;
+			VMX(v, last_itc) = 0;
+		}
+	}
+	VMX(vcpu, last_itc) = 0;
+	if (VCPU(vcpu, itm) <= val) {
+		VMX(vcpu, itc_check) = 0;
+		vcpu_unpend_interrupt(vcpu, vitv);
+	} else {
+		VMX(vcpu, itc_check) = 1;
+		vcpu_set_itm(vcpu, VCPU(vcpu, itm));
+	}
+
+}
+
+static inline u64 vcpu_get_itm(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, itm));
+}
+
+static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val)
+{
+	unsigned long vitv = VCPU(vcpu, itv);
+	VCPU(vcpu, itm) = val;
+
+	if (val > vcpu_get_itc(vcpu)) {
+		VMX(vcpu, itc_check) = 1;
+		vcpu_unpend_interrupt(vcpu, vitv);
+		VMX(vcpu, timer_pending) = 0;
+	} else
+		VMX(vcpu, itc_check) = 0;
+}
+
+#define  ITV_VECTOR(itv)    (itv&0xff)
+#define  ITV_IRQ_MASK(itv)  (itv&(1<<16))
+
+static inline void vcpu_set_itv(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, itv) = val;
+	if (!ITV_IRQ_MASK(val) && vcpu->arch.timer_pending) {
+		vcpu_pend_interrupt(vcpu, ITV_VECTOR(val));
+		vcpu->arch.timer_pending = 0;
+	}
+}
+
+static inline void vcpu_set_eoi(struct kvm_vcpu *vcpu, u64 val)
+{
+	int vec;
+
+	vec = highest_inservice_irq(vcpu);
+	if (vec == NULL_VECTOR)
+		return;
+	VMX(vcpu, insvc[vec >> 6]) &= ~(1UL << (vec & 63));
+	VCPU(vcpu, eoi) = 0;
+	vcpu->arch.irq_new_pending = 1;
+
+}
+
+/* See Table 5-8 in SDM vol2 for the definition */
+int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice)
+{
+	union ia64_tpr vtpr;
+
+	vtpr.val = VCPU(vcpu, tpr);
+
+	if (h_inservice == NMI_VECTOR)
+		return IRQ_MASKED_BY_INSVC;
+
+	if (h_pending == NMI_VECTOR) {
+		/* Non Maskable Interrupt */
+		return IRQ_NO_MASKED;
+	}
+
+	if (h_inservice == ExtINT_VECTOR)
+		return IRQ_MASKED_BY_INSVC;
+
+	if (h_pending == ExtINT_VECTOR) {
+		if (vtpr.mmi) {
+			/* mask all external IRQ */
+			return IRQ_MASKED_BY_VTPR;
+		} else
+			return IRQ_NO_MASKED;
+	}
+
+	if (is_higher_irq(h_pending, h_inservice)) {
+		if (is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4)))
+			return IRQ_NO_MASKED;
+		else
+			return IRQ_MASKED_BY_VTPR;
+	} else {
+		return IRQ_MASKED_BY_INSVC;
+	}
+}
+
+void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
+{
+	long spsr;
+	int ret;
+
+	local_irq_save(spsr);
+	ret = test_and_set_bit(vec, &VCPU(vcpu, irr[0]));
+	local_irq_restore(spsr);
+
+	vcpu->arch.irq_new_pending = 1;
+}
+
+void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
+{
+	long spsr;
+	int ret;
+
+	local_irq_save(spsr);
+	ret = test_and_clear_bit(vec, &VCPU(vcpu, irr[0]));
+	local_irq_restore(spsr);
+	if (ret) {
+		vcpu->arch.irq_new_pending = 1;
+		wmb();
+	}
+}
+
+void update_vhpi(struct kvm_vcpu *vcpu, int vec)
+{
+	u64 vhpi;
+
+	if (vec == NULL_VECTOR)
+		vhpi = 0;
+	else if (vec == NMI_VECTOR)
+		vhpi = 32;
+	else if (vec == ExtINT_VECTOR)
+		vhpi = 16;
+	else
+		vhpi = vec >> 4;
+
+	VCPU(vcpu, vhpi) = vhpi;
+	if (VCPU(vcpu, vac).a_int)
+		ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT,
+				(u64)vcpu->arch.vpd, 0, 0, 0, 0, 0, 0);
+}
+
+u64 vcpu_get_ivr(struct kvm_vcpu *vcpu)
+{
+	int vec, h_inservice, mask;
+
+	vec = highest_pending_irq(vcpu);
+	h_inservice = highest_inservice_irq(vcpu);
+	mask = irq_masked(vcpu, vec, h_inservice);
+	if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) {
+		if (VCPU(vcpu, vhpi))
+			update_vhpi(vcpu, NULL_VECTOR);
+		return IA64_SPURIOUS_INT_VECTOR;
+	}
+	if (mask == IRQ_MASKED_BY_VTPR) {
+		update_vhpi(vcpu, vec);
+		return IA64_SPURIOUS_INT_VECTOR;
+	}
+	VMX(vcpu, insvc[vec >> 6]) |= (1UL << (vec & 63));
+	vcpu_unpend_interrupt(vcpu, vec);
+	return  (u64)vec;
+}
+
+/**************************************************************************
+  Privileged operation emulation routines
+ **************************************************************************/
+u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	union ia64_pta vpta;
+	union ia64_rr vrr;
+	u64 pval;
+	u64 vhpt_offset;
+
+	vpta.val = vcpu_get_pta(vcpu);
+	vrr.val = vcpu_get_rr(vcpu, vadr);
+	vhpt_offset = ((vadr >> vrr.ps) << 3) & ((1UL << (vpta.size)) - 1);
+	if (vpta.vf) {
+		pval = ia64_call_vsa(PAL_VPS_THASH, vadr, vrr.val,
+				vpta.val, 0, 0, 0, 0);
+	} else {
+		pval = (vadr & VRN_MASK) | vhpt_offset |
+			(vpta.val << 3 >> (vpta.size + 3) << (vpta.size));
+	}
+	return  pval;
+}
+
+u64 vcpu_ttag(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	union ia64_rr vrr;
+	union ia64_pta vpta;
+	u64 pval;
+
+	vpta.val = vcpu_get_pta(vcpu);
+	vrr.val = vcpu_get_rr(vcpu, vadr);
+	if (vpta.vf) {
+		pval = ia64_call_vsa(PAL_VPS_TTAG, vadr, vrr.val,
+						0, 0, 0, 0, 0);
+	} else
+		pval = 1;
+
+	return  pval;
+}
+
+u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	struct thash_data *data;
+	union ia64_pta vpta;
+	u64 key;
+
+	vpta.val = vcpu_get_pta(vcpu);
+	if (vpta.vf == 0) {
+		key = 1;
+		return key;
+	}
+	data = vtlb_lookup(vcpu, vadr, D_TLB);
+	if (!data || !data->p)
+		key = 1;
+	else
+		key = data->key;
+
+	return key;
+}
+
+void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long thash, vadr;
+
+	vadr = vcpu_get_gr(vcpu, inst.M46.r3);
+	thash = vcpu_thash(vcpu, vadr);
+	vcpu_set_gr(vcpu, inst.M46.r1, thash, 0);
+}
+
+void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long tag, vadr;
+
+	vadr = vcpu_get_gr(vcpu, inst.M46.r3);
+	tag = vcpu_ttag(vcpu, vadr);
+	vcpu_set_gr(vcpu, inst.M46.r1, tag, 0);
+}
+
+int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, unsigned long *padr)
+{
+	struct thash_data *data;
+	union ia64_isr visr, pt_isr;
+	struct kvm_pt_regs *regs;
+	struct ia64_psr vpsr;
+
+	regs = vcpu_regs(vcpu);
+	pt_isr.val = VMX(vcpu, cr_isr);
+	visr.val = 0;
+	visr.ei = pt_isr.ei;
+	visr.ir = pt_isr.ir;
+	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+	visr.na = 1;
+
+	data = vhpt_lookup(vadr);
+	if (data) {
+		if (data->p == 0) {
+			vcpu_set_isr(vcpu, visr.val);
+			data_page_not_present(vcpu, vadr);
+			return IA64_FAULT;
+		} else if (data->ma == VA_MATTR_NATPAGE) {
+			vcpu_set_isr(vcpu, visr.val);
+			dnat_page_consumption(vcpu, vadr);
+			return IA64_FAULT;
+		} else {
+			*padr = (data->gpaddr >> data->ps << data->ps) |
+				(vadr & (PSIZE(data->ps) - 1));
+			return IA64_NO_FAULT;
+		}
+	}
+
+	data = vtlb_lookup(vcpu, vadr, D_TLB);
+	if (data) {
+		if (data->p == 0) {
+			vcpu_set_isr(vcpu, visr.val);
+			data_page_not_present(vcpu, vadr);
+			return IA64_FAULT;
+		} else if (data->ma == VA_MATTR_NATPAGE) {
+			vcpu_set_isr(vcpu, visr.val);
+			dnat_page_consumption(vcpu, vadr);
+			return IA64_FAULT;
+		} else{
+			*padr = ((data->ppn >> (data->ps - 12)) << data->ps)
+				| (vadr & (PSIZE(data->ps) - 1));
+			return IA64_NO_FAULT;
+		}
+	}
+	if (!vhpt_enabled(vcpu, vadr, NA_REF)) {
+		if (vpsr.ic) {
+			vcpu_set_isr(vcpu, visr.val);
+			alt_dtlb(vcpu, vadr);
+			return IA64_FAULT;
+		} else {
+			nested_dtlb(vcpu);
+			return IA64_FAULT;
+		}
+	} else {
+		if (vpsr.ic) {
+			vcpu_set_isr(vcpu, visr.val);
+			dvhpt_fault(vcpu, vadr);
+			return IA64_FAULT;
+		} else{
+			nested_dtlb(vcpu);
+			return IA64_FAULT;
+		}
+	}
+
+	return IA64_NO_FAULT;
+}
+
+int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r1, r3;
+
+	r3 = vcpu_get_gr(vcpu, inst.M46.r3);
+
+	if (vcpu_tpa(vcpu, r3, &r1))
+		return IA64_FAULT;
+
+	vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+	return(IA64_NO_FAULT);
+}
+
+void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r1, r3;
+
+	r3 = vcpu_get_gr(vcpu, inst.M46.r3);
+	r1 = vcpu_tak(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+}
+
+/************************************
+ * Insert/Purge translation register/cache
+ ************************************/
+void vcpu_itc_i(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
+{
+	thash_purge_and_insert(vcpu, pte, itir, ifa, I_TLB);
+}
+
+void vcpu_itc_d(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
+{
+	thash_purge_and_insert(vcpu, pte, itir, ifa, D_TLB);
+}
+
+void vcpu_itr_i(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
+{
+	u64 ps, va, rid;
+	struct thash_data *p_itr;
+
+	ps = itir_ps(itir);
+	va = PAGEALIGN(ifa, ps);
+	pte &= ~PAGE_FLAGS_RV_MASK;
+	rid = vcpu_get_rr(vcpu, ifa);
+	rid = rid & RR_RID_MASK;
+	p_itr = (struct thash_data *)&vcpu->arch.itrs[slot];
+	vcpu_set_tr(p_itr, pte, itir, va, rid);
+	vcpu_quick_region_set(VMX(vcpu, itr_regions), va);
+}
+
+
+void vcpu_itr_d(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
+{
+	u64 gpfn;
+	u64 ps, va, rid;
+	struct thash_data *p_dtr;
+
+	ps = itir_ps(itir);
+	va = PAGEALIGN(ifa, ps);
+	pte &= ~PAGE_FLAGS_RV_MASK;
+
+	if (ps != _PAGE_SIZE_16M)
+		thash_purge_entries(vcpu, va, ps);
+	gpfn = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
+	if (__gpfn_is_io(gpfn))
+		pte |= VTLB_PTE_IO;
+	rid = vcpu_get_rr(vcpu, va);
+	rid = rid & RR_RID_MASK;
+	p_dtr = (struct thash_data *)&vcpu->arch.dtrs[slot];
+	vcpu_set_tr((struct thash_data *)&vcpu->arch.dtrs[slot],
+							pte, itir, va, rid);
+	vcpu_quick_region_set(VMX(vcpu, dtr_regions), va);
+}
+
+void vcpu_ptr_d(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
+{
+	int index;
+	u64 va;
+
+	va = PAGEALIGN(ifa, ps);
+	while ((index = vtr_find_overlap(vcpu, va, ps, D_TLB)) >= 0)
+		vcpu->arch.dtrs[index].page_flags = 0;
+
+	thash_purge_entries(vcpu, va, ps);
+}
+
+void vcpu_ptr_i(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
+{
+	int index;
+	u64 va;
+
+	va = PAGEALIGN(ifa, ps);
+	while ((index = vtr_find_overlap(vcpu, va, ps, I_TLB)) >= 0)
+		vcpu->arch.itrs[index].page_flags = 0;
+
+	thash_purge_entries(vcpu, va, ps);
+}
+
+void vcpu_ptc_l(struct kvm_vcpu *vcpu, u64 va, u64 ps)
+{
+	va = PAGEALIGN(va, ps);
+	thash_purge_entries(vcpu, va, ps);
+}
+
+void vcpu_ptc_e(struct kvm_vcpu *vcpu, u64 va)
+{
+	thash_purge_all(vcpu);
+}
+
+void vcpu_ptc_ga(struct kvm_vcpu *vcpu, u64 va, u64 ps)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+	long psr;
+	local_irq_save(psr);
+	p->exit_reason = EXIT_REASON_PTC_G;
+
+	p->u.ptc_g_data.rr = vcpu_get_rr(vcpu, va);
+	p->u.ptc_g_data.vaddr = va;
+	p->u.ptc_g_data.ps = ps;
+	vmm_transition(vcpu);
+	/* Do Local Purge Here*/
+	vcpu_ptc_l(vcpu, va, ps);
+	local_irq_restore(psr);
+}
+
+
+void vcpu_ptc_g(struct kvm_vcpu *vcpu, u64 va, u64 ps)
+{
+	vcpu_ptc_ga(vcpu, va, ps);
+}
+
+void kvm_ptc_e(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	vcpu_ptc_e(vcpu, ifa);
+}
+
+void kvm_ptc_g(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa, itir;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	itir = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_ptc_g(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_ptc_ga(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa, itir;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	itir = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_ptc_ga(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_ptc_l(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa, itir;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	itir = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_ptc_l(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_ptr_d(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa, itir;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	itir = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_ptr_d(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_ptr_i(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa, itir;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	itir = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_ptr_i(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_itr_d(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long itir, ifa, pte, slot;
+
+	slot = vcpu_get_gr(vcpu, inst.M45.r3);
+	pte = vcpu_get_gr(vcpu, inst.M45.r2);
+	itir = vcpu_get_itir(vcpu);
+	ifa = vcpu_get_ifa(vcpu);
+	vcpu_itr_d(vcpu, slot, pte, itir, ifa);
+}
+
+
+
+void kvm_itr_i(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long itir, ifa, pte, slot;
+
+	slot = vcpu_get_gr(vcpu, inst.M45.r3);
+	pte = vcpu_get_gr(vcpu, inst.M45.r2);
+	itir = vcpu_get_itir(vcpu);
+	ifa = vcpu_get_ifa(vcpu);
+	vcpu_itr_i(vcpu, slot, pte, itir, ifa);
+}
+
+void kvm_itc_d(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long itir, ifa, pte;
+
+	itir = vcpu_get_itir(vcpu);
+	ifa = vcpu_get_ifa(vcpu);
+	pte = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_itc_d(vcpu, pte, itir, ifa);
+}
+
+void kvm_itc_i(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long itir, ifa, pte;
+
+	itir = vcpu_get_itir(vcpu);
+	ifa = vcpu_get_ifa(vcpu);
+	pte = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_itc_i(vcpu, pte, itir, ifa);
+}
+
+/*************************************
+ * Moves to semi-privileged registers
+ *************************************/
+
+void kvm_mov_to_ar_imm(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long imm;
+
+	if (inst.M30.s)
+		imm = -inst.M30.imm;
+	else
+		imm = inst.M30.imm;
+
+	vcpu_set_itc(vcpu, imm);
+}
+
+void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r2;
+
+	r2 = vcpu_get_gr(vcpu, inst.M29.r2);
+	vcpu_set_itc(vcpu, r2);
+}
+
+void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r1;
+
+	r1 = vcpu_get_itc(vcpu);
+	vcpu_set_gr(vcpu, inst.M31.r1, r1, 0);
+}
+
+/**************************************************************************
+  struct kvm_vcpu protection key register access routines
+ **************************************************************************/
+
+unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg)
+{
+	return ((unsigned long)ia64_get_pkr(reg));
+}
+
+void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val)
+{
+	ia64_set_pkr(reg, val);
+}
+
+/********************************
+ * Moves to privileged registers
+ ********************************/
+unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg,
+					unsigned long val)
+{
+	union ia64_rr oldrr, newrr;
+	unsigned long rrval;
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+	unsigned long psr;
+
+	oldrr.val = vcpu_get_rr(vcpu, reg);
+	newrr.val = val;
+	vcpu->arch.vrr[reg >> VRN_SHIFT] = val;
+
+	switch ((unsigned long)(reg >> VRN_SHIFT)) {
+	case VRN6:
+		vcpu->arch.vmm_rr = vrrtomrr(val);
+		local_irq_save(psr);
+		p->exit_reason = EXIT_REASON_SWITCH_RR6;
+		vmm_transition(vcpu);
+		local_irq_restore(psr);
+		break;
+	case VRN4:
+		rrval = vrrtomrr(val);
+		vcpu->arch.metaphysical_saved_rr4 = rrval;
+		if (!is_physical_mode(vcpu))
+			ia64_set_rr(reg, rrval);
+		break;
+	case VRN0:
+		rrval = vrrtomrr(val);
+		vcpu->arch.metaphysical_saved_rr0 = rrval;
+		if (!is_physical_mode(vcpu))
+			ia64_set_rr(reg, rrval);
+		break;
+	default:
+		ia64_set_rr(reg, vrrtomrr(val));
+		break;
+	}
+
+	return (IA64_NO_FAULT);
+}
+
+void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r2;
+
+	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
+	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
+	vcpu_set_rr(vcpu, r3, r2);
+}
+
+void kvm_mov_to_dbr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+}
+
+void kvm_mov_to_ibr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+}
+
+void kvm_mov_to_pmc(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r2;
+
+	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
+	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
+	vcpu_set_pmc(vcpu, r3, r2);
+}
+
+void kvm_mov_to_pmd(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r2;
+
+	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
+	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
+	vcpu_set_pmd(vcpu, r3, r2);
+}
+
+void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	u64 r3, r2;
+
+	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
+	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
+	vcpu_set_pkr(vcpu, r3, r2);
+}
+
+void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_rr(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void kvm_mov_from_pkr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_pkr(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void kvm_mov_from_dbr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_dbr(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void kvm_mov_from_ibr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_ibr(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_pmc(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg)
+{
+	/* FIXME: This could get called as a result of a rsvd-reg fault */
+	if (reg > (ia64_get_cpuid(3) & 0xff))
+		return 0;
+	else
+		return ia64_get_cpuid(reg);
+}
+
+void kvm_mov_from_cpuid(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_cpuid(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void vcpu_set_tpr(struct kvm_vcpu *vcpu, unsigned long val)
+{
+	VCPU(vcpu, tpr) = val;
+	vcpu->arch.irq_check = 1;
+}
+
+unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r2;
+
+	r2 = vcpu_get_gr(vcpu, inst.M32.r2);
+	VCPU(vcpu, vcr[inst.M32.cr3]) = r2;
+
+	switch (inst.M32.cr3) {
+	case 0:
+		vcpu_set_dcr(vcpu, r2);
+		break;
+	case 1:
+		vcpu_set_itm(vcpu, r2);
+		break;
+	case 66:
+		vcpu_set_tpr(vcpu, r2);
+		break;
+	case 67:
+		vcpu_set_eoi(vcpu, r2);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long tgt = inst.M33.r1;
+	unsigned long val;
+
+	switch (inst.M33.cr3) {
+	case 65:
+		val = vcpu_get_ivr(vcpu);
+		vcpu_set_gr(vcpu, tgt, val, 0);
+		break;
+
+	case 67:
+		vcpu_set_gr(vcpu, tgt, 0L, 0);
+		break;
+	default:
+		val = VCPU(vcpu, vcr[inst.M33.cr3]);
+		vcpu_set_gr(vcpu, tgt, val, 0);
+		break;
+	}
+
+	return 0;
+}
+
+void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
+{
+
+	unsigned long mask;
+	struct kvm_pt_regs *regs;
+	struct ia64_psr old_psr, new_psr;
+
+	old_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+
+	regs = vcpu_regs(vcpu);
+	/* We only support guest as:
+	 *  vpsr.pk = 0
+	 *  vpsr.is = 0
+	 * Otherwise panic
+	 */
+	if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
+		panic_vm(vcpu, "Only support guests with vpsr.pk =0 "
+				"& vpsr.is=0\n");
+
+	/*
+	 * For those IA64_PSR bits: id/da/dd/ss/ed/ia
+	 * Since these bits will become 0, after success execution of each
+	 * instruction, we will change set them to mIA64_PSR
+	 */
+	VCPU(vcpu, vpsr) = val
+		& (~(IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD |
+			IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA));
+
+	if (!old_psr.i && (val & IA64_PSR_I)) {
+		/* vpsr.i 0->1 */
+		vcpu->arch.irq_check = 1;
+	}
+	new_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+
+	/*
+	 * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr)
+	 * , except for the following bits:
+	 *  ic/i/dt/si/rt/mc/it/bn/vm
+	 */
+	mask =  IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI +
+		IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN +
+		IA64_PSR_VM;
+
+	regs->cr_ipsr = (regs->cr_ipsr & mask) | (val & (~mask));
+
+	check_mm_mode_switch(vcpu, old_psr, new_psr);
+
+	return ;
+}
+
+unsigned long vcpu_cover(struct kvm_vcpu *vcpu)
+{
+	struct ia64_psr vpsr;
+
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+
+	if (!vpsr.ic)
+		VCPU(vcpu, ifs) = regs->cr_ifs;
+	regs->cr_ifs = IA64_IFS_V;
+	return (IA64_NO_FAULT);
+}
+
+
+
+/**************************************************************************
+  VCPU banked general register access routines
+ **************************************************************************/
+#define vcpu_bsw0_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT)	\
+	do {     							\
+		__asm__ __volatile__ (					\
+				";;extr.u %0 = %3,%6,16;;\n"		\
+				"dep %1 = %0, %1, 0, 16;;\n"		\
+				"st8 [%4] = %1\n"			\
+				"extr.u %0 = %2, 16, 16;;\n"		\
+				"dep %3 = %0, %3, %6, 16;;\n"		\
+				"st8 [%5] = %3\n"			\
+				::"r"(i), "r"(*b1unat), "r"(*b0unat),	\
+				"r"(*runat), "r"(b1unat), "r"(runat),	\
+				"i"(VMM_PT_REGS_R16_SLOT) : "memory");	\
+	} while (0)
+
+void vcpu_bsw0(struct kvm_vcpu *vcpu)
+{
+	unsigned long i;
+
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	unsigned long *r = &regs->r16;
+	unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
+	unsigned long *b1 = &VCPU(vcpu, vgr[0]);
+	unsigned long *runat = &regs->eml_unat;
+	unsigned long *b0unat = &VCPU(vcpu, vbnat);
+	unsigned long *b1unat = &VCPU(vcpu, vnat);
+
+
+	if (VCPU(vcpu, vpsr) & IA64_PSR_BN) {
+		for (i = 0; i < 16; i++) {
+			*b1++ = *r;
+			*r++ = *b0++;
+		}
+		vcpu_bsw0_unat(i, b0unat, b1unat, runat,
+				VMM_PT_REGS_R16_SLOT);
+		VCPU(vcpu, vpsr) &= ~IA64_PSR_BN;
+	}
+}
+
+#define vcpu_bsw1_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT)	\
+	do {             						\
+		__asm__ __volatile__ (";;extr.u %0 = %3, %6, 16;;\n"	\
+				"dep %1 = %0, %1, 16, 16;;\n"		\
+				"st8 [%4] = %1\n"			\
+				"extr.u %0 = %2, 0, 16;;\n"		\
+				"dep %3 = %0, %3, %6, 16;;\n"		\
+				"st8 [%5] = %3\n"			\
+				::"r"(i), "r"(*b0unat), "r"(*b1unat),	\
+				"r"(*runat), "r"(b0unat), "r"(runat),	\
+				"i"(VMM_PT_REGS_R16_SLOT) : "memory");	\
+	} while (0)
+
+void vcpu_bsw1(struct kvm_vcpu *vcpu)
+{
+	unsigned long i;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	unsigned long *r = &regs->r16;
+	unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
+	unsigned long *b1 = &VCPU(vcpu, vgr[0]);
+	unsigned long *runat = &regs->eml_unat;
+	unsigned long *b0unat = &VCPU(vcpu, vbnat);
+	unsigned long *b1unat = &VCPU(vcpu, vnat);
+
+	if (!(VCPU(vcpu, vpsr) & IA64_PSR_BN)) {
+		for (i = 0; i < 16; i++) {
+			*b0++ = *r;
+			*r++ = *b1++;
+		}
+		vcpu_bsw1_unat(i, b0unat, b1unat, runat,
+				VMM_PT_REGS_R16_SLOT);
+		VCPU(vcpu, vpsr) |= IA64_PSR_BN;
+	}
+}
+
+void vcpu_rfi(struct kvm_vcpu *vcpu)
+{
+	unsigned long ifs, psr;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	psr = VCPU(vcpu, ipsr);
+	if (psr & IA64_PSR_BN)
+		vcpu_bsw1(vcpu);
+	else
+		vcpu_bsw0(vcpu);
+	vcpu_set_psr(vcpu, psr);
+	ifs = VCPU(vcpu, ifs);
+	if (ifs >> 63)
+		regs->cr_ifs = ifs;
+	regs->cr_iip = VCPU(vcpu, iip);
+}
+
+/*
+   VPSR can't keep track of below bits of guest PSR
+   This function gets guest PSR
+ */
+
+unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu)
+{
+	unsigned long mask;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	mask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL |
+		IA64_PSR_MFH | IA64_PSR_CPL | IA64_PSR_RI;
+	return (VCPU(vcpu, vpsr) & ~mask) | (regs->cr_ipsr & mask);
+}
+
+void kvm_rsm(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long vpsr;
+	unsigned long imm24 = (inst.M44.i<<23) | (inst.M44.i2<<21)
+					| inst.M44.imm;
+
+	vpsr = vcpu_get_psr(vcpu);
+	vpsr &= (~imm24);
+	vcpu_set_psr(vcpu, vpsr);
+}
+
+void kvm_ssm(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long vpsr;
+	unsigned long imm24 = (inst.M44.i << 23) | (inst.M44.i2 << 21)
+				| inst.M44.imm;
+
+	vpsr = vcpu_get_psr(vcpu);
+	vpsr |= imm24;
+	vcpu_set_psr(vcpu, vpsr);
+}
+
+/* Generate Mask
+ * Parameter:
+ *  bit -- starting bit
+ *  len -- how many bits
+ */
+#define MASK(bit,len)				   	\
+({							\
+		__u64	ret;				\
+							\
+		__asm __volatile("dep %0=-1, r0, %1, %2"\
+				: "=r" (ret):		\
+		  "M" (bit),				\
+		  "M" (len));				\
+		ret;					\
+})
+
+void vcpu_set_psr_l(struct kvm_vcpu *vcpu, unsigned long val)
+{
+	val = (val & MASK(0, 32)) | (vcpu_get_psr(vcpu) & MASK(32, 32));
+	vcpu_set_psr(vcpu, val);
+}
+
+void kvm_mov_to_psr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long val;
+
+	val = vcpu_get_gr(vcpu, inst.M35.r2);
+	vcpu_set_psr_l(vcpu, val);
+}
+
+void kvm_mov_from_psr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long val;
+
+	val = vcpu_get_psr(vcpu);
+	val = (val & MASK(0, 32)) | (val & MASK(35, 2));
+	vcpu_set_gr(vcpu, inst.M33.r1, val, 0);
+}
+
+void vcpu_increment_iip(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
+	if (ipsr->ri == 2) {
+		ipsr->ri = 0;
+		regs->cr_iip += 16;
+	} else
+		ipsr->ri++;
+}
+
+void vcpu_decrement_iip(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
+
+	if (ipsr->ri == 0) {
+		ipsr->ri = 2;
+		regs->cr_iip -= 16;
+	} else
+		ipsr->ri--;
+}
+
+/** Emulate a privileged operation.
+ *
+ *
+ * @param vcpu virtual cpu
+ * @cause the reason cause virtualization fault
+ * @opcode the instruction code which cause virtualization fault
+ */
+
+void kvm_emulate(struct kvm_vcpu *vcpu, struct kvm_pt_regs *regs)
+{
+	unsigned long status, cause, opcode ;
+	INST64 inst;
+
+	status = IA64_NO_FAULT;
+	cause = VMX(vcpu, cause);
+	opcode = VMX(vcpu, opcode);
+	inst.inst = opcode;
+	/*
+	 * Switch to actual virtual rid in rr0 and rr4,
+	 * which is required by some tlb related instructions.
+	 */
+	prepare_if_physical_mode(vcpu);
+
+	switch (cause) {
+	case EVENT_RSM:
+		kvm_rsm(vcpu, inst);
+		break;
+	case EVENT_SSM:
+		kvm_ssm(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_PSR:
+		kvm_mov_to_psr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_PSR:
+		kvm_mov_from_psr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_CR:
+		kvm_mov_from_cr(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_CR:
+		kvm_mov_to_cr(vcpu, inst);
+		break;
+	case EVENT_BSW_0:
+		vcpu_bsw0(vcpu);
+		break;
+	case EVENT_BSW_1:
+		vcpu_bsw1(vcpu);
+		break;
+	case EVENT_COVER:
+		vcpu_cover(vcpu);
+		break;
+	case EVENT_RFI:
+		vcpu_rfi(vcpu);
+		break;
+	case EVENT_ITR_D:
+		kvm_itr_d(vcpu, inst);
+		break;
+	case EVENT_ITR_I:
+		kvm_itr_i(vcpu, inst);
+		break;
+	case EVENT_PTR_D:
+		kvm_ptr_d(vcpu, inst);
+		break;
+	case EVENT_PTR_I:
+		kvm_ptr_i(vcpu, inst);
+		break;
+	case EVENT_ITC_D:
+		kvm_itc_d(vcpu, inst);
+		break;
+	case EVENT_ITC_I:
+		kvm_itc_i(vcpu, inst);
+		break;
+	case EVENT_PTC_L:
+		kvm_ptc_l(vcpu, inst);
+		break;
+	case EVENT_PTC_G:
+		kvm_ptc_g(vcpu, inst);
+		break;
+	case EVENT_PTC_GA:
+		kvm_ptc_ga(vcpu, inst);
+		break;
+	case EVENT_PTC_E:
+		kvm_ptc_e(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_RR:
+		kvm_mov_to_rr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_RR:
+		kvm_mov_from_rr(vcpu, inst);
+		break;
+	case EVENT_THASH:
+		kvm_thash(vcpu, inst);
+		break;
+	case EVENT_TTAG:
+		kvm_ttag(vcpu, inst);
+		break;
+	case EVENT_TPA:
+		status = kvm_tpa(vcpu, inst);
+		break;
+	case EVENT_TAK:
+		kvm_tak(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_AR_IMM:
+		kvm_mov_to_ar_imm(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_AR:
+		kvm_mov_to_ar_reg(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_AR:
+		kvm_mov_from_ar_reg(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_DBR:
+		kvm_mov_to_dbr(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_IBR:
+		kvm_mov_to_ibr(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_PMC:
+		kvm_mov_to_pmc(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_PMD:
+		kvm_mov_to_pmd(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_PKR:
+		kvm_mov_to_pkr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_DBR:
+		kvm_mov_from_dbr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_IBR:
+		kvm_mov_from_ibr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_PMC:
+		kvm_mov_from_pmc(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_PKR:
+		kvm_mov_from_pkr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_CPUID:
+		kvm_mov_from_cpuid(vcpu, inst);
+		break;
+	case EVENT_VMSW:
+		status = IA64_FAULT;
+		break;
+	default:
+		break;
+	};
+	/*Assume all status is NO_FAULT ?*/
+	if (status == IA64_NO_FAULT && cause != EVENT_RFI)
+		vcpu_increment_iip(vcpu);
+
+	recover_if_physical_mode(vcpu);
+}
+
+void init_vcpu(struct kvm_vcpu *vcpu)
+{
+	int i;
+
+	vcpu->arch.mode_flags = GUEST_IN_PHY;
+	VMX(vcpu, vrr[0]) = 0x38;
+	VMX(vcpu, vrr[1]) = 0x38;
+	VMX(vcpu, vrr[2]) = 0x38;
+	VMX(vcpu, vrr[3]) = 0x38;
+	VMX(vcpu, vrr[4]) = 0x38;
+	VMX(vcpu, vrr[5]) = 0x38;
+	VMX(vcpu, vrr[6]) = 0x38;
+	VMX(vcpu, vrr[7]) = 0x38;
+	VCPU(vcpu, vpsr) = IA64_PSR_BN;
+	VCPU(vcpu, dcr) = 0;
+	/* pta.size must not be 0.  The minimum is 15 (32k) */
+	VCPU(vcpu, pta) = 15 << 2;
+	VCPU(vcpu, itv) = 0x10000;
+	VCPU(vcpu, itm) = 0;
+	VMX(vcpu, last_itc) = 0;
+
+	VCPU(vcpu, lid) = VCPU_LID(vcpu);
+	VCPU(vcpu, ivr) = 0;
+	VCPU(vcpu, tpr) = 0x10000;
+	VCPU(vcpu, eoi) = 0;
+	VCPU(vcpu, irr[0]) = 0;
+	VCPU(vcpu, irr[1]) = 0;
+	VCPU(vcpu, irr[2]) = 0;
+	VCPU(vcpu, irr[3]) = 0;
+	VCPU(vcpu, pmv) = 0x10000;
+	VCPU(vcpu, cmcv) = 0x10000;
+	VCPU(vcpu, lrr0) = 0x10000;   /* default reset value? */
+	VCPU(vcpu, lrr1) = 0x10000;   /* default reset value? */
+	update_vhpi(vcpu, NULL_VECTOR);
+	VLSAPIC_XTP(vcpu) = 0x80;	/* disabled */
+
+	for (i = 0; i < 4; i++)
+		VLSAPIC_INSVC(vcpu, i) = 0;
+}
+
+void kvm_init_all_rr(struct kvm_vcpu *vcpu)
+{
+	unsigned long psr;
+
+	local_irq_save(psr);
+
+	/* WARNING: not allow co-exist of both virtual mode and physical
+	 * mode in same region
+	 */
+
+	vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(VMX(vcpu, vrr[VRN0]));
+	vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(VMX(vcpu, vrr[VRN4]));
+
+	if (is_physical_mode(vcpu)) {
+		if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
+			panic_vm(vcpu, "Machine Status conflicts!\n");
+
+		ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
+		ia64_dv_serialize_data();
+		ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4);
+		ia64_dv_serialize_data();
+	} else {
+		ia64_set_rr((VRN0 << VRN_SHIFT),
+				vcpu->arch.metaphysical_saved_rr0);
+		ia64_dv_serialize_data();
+		ia64_set_rr((VRN4 << VRN_SHIFT),
+				vcpu->arch.metaphysical_saved_rr4);
+		ia64_dv_serialize_data();
+	}
+	ia64_set_rr((VRN1 << VRN_SHIFT),
+			vrrtomrr(VMX(vcpu, vrr[VRN1])));
+	ia64_dv_serialize_data();
+	ia64_set_rr((VRN2 << VRN_SHIFT),
+			vrrtomrr(VMX(vcpu, vrr[VRN2])));
+	ia64_dv_serialize_data();
+	ia64_set_rr((VRN3 << VRN_SHIFT),
+			vrrtomrr(VMX(vcpu, vrr[VRN3])));
+	ia64_dv_serialize_data();
+	ia64_set_rr((VRN5 << VRN_SHIFT),
+			vrrtomrr(VMX(vcpu, vrr[VRN5])));
+	ia64_dv_serialize_data();
+	ia64_set_rr((VRN7 << VRN_SHIFT),
+			vrrtomrr(VMX(vcpu, vrr[VRN7])));
+	ia64_dv_serialize_data();
+	ia64_srlz_d();
+	ia64_set_psr(psr);
+}
+
+int vmm_entry(void)
+{
+	struct kvm_vcpu *v;
+	v = current_vcpu;
+
+	ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)v->arch.vpd,
+						0, 0, 0, 0, 0, 0);
+	kvm_init_vtlb(v);
+	kvm_init_vhpt(v);
+	init_vcpu(v);
+	kvm_init_all_rr(v);
+	vmm_reset_entry();
+
+	return 0;
+}
+
+static void kvm_show_registers(struct kvm_pt_regs *regs)
+{
+	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
+
+	struct kvm_vcpu *vcpu = current_vcpu;
+	if (vcpu != NULL)
+		printk("vcpu 0x%p vcpu %d\n",
+		       vcpu, vcpu->vcpu_id);
+
+	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]\n",
+	       regs->cr_ipsr, regs->cr_ifs, ip);
+
+	printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
+	       regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
+	printk("rnat: %016lx bspstore: %016lx pr  : %016lx\n",
+	       regs->ar_rnat, regs->ar_bspstore, regs->pr);
+	printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
+	       regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
+	printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
+	printk("b0  : %016lx b6  : %016lx b7  : %016lx\n", regs->b0,
+							regs->b6, regs->b7);
+	printk("f6  : %05lx%016lx f7  : %05lx%016lx\n",
+	       regs->f6.u.bits[1], regs->f6.u.bits[0],
+	       regs->f7.u.bits[1], regs->f7.u.bits[0]);
+	printk("f8  : %05lx%016lx f9  : %05lx%016lx\n",
+	       regs->f8.u.bits[1], regs->f8.u.bits[0],
+	       regs->f9.u.bits[1], regs->f9.u.bits[0]);
+	printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
+	       regs->f10.u.bits[1], regs->f10.u.bits[0],
+	       regs->f11.u.bits[1], regs->f11.u.bits[0]);
+
+	printk("r1  : %016lx r2  : %016lx r3  : %016lx\n", regs->r1,
+							regs->r2, regs->r3);
+	printk("r8  : %016lx r9  : %016lx r10 : %016lx\n", regs->r8,
+							regs->r9, regs->r10);
+	printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11,
+							regs->r12, regs->r13);
+	printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14,
+							regs->r15, regs->r16);
+	printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17,
+							regs->r18, regs->r19);
+	printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20,
+							regs->r21, regs->r22);
+	printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23,
+							regs->r24, regs->r25);
+	printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26,
+							regs->r27, regs->r28);
+	printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29,
+							regs->r30, regs->r31);
+
+}
+
+void panic_vm(struct kvm_vcpu *v, const char *fmt, ...)
+{
+	va_list args;
+	char buf[256];
+
+	struct kvm_pt_regs *regs = vcpu_regs(v);
+	struct exit_ctl_data *p = &v->arch.exit_data;
+	va_start(args, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, args);
+	va_end(args);
+	printk(buf);
+	kvm_show_registers(regs);
+	p->exit_reason = EXIT_REASON_VM_PANIC;
+	vmm_transition(v);
+	/*Never to return*/
+	while (1);
+}
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
new file mode 100644
index 00000000..988911b4
--- /dev/null
+++ b/arch/ia64/kvm/vcpu.h
@@ -0,0 +1,752 @@
+/*
+ *  vcpu.h: vcpu routines
+ *  	Copyright (c) 2005, Intel Corporation.
+ *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ *  	Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ *
+ * 	Copyright (c) 2007, Intel Corporation.
+ *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ *	Xiantao Zhang (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+
+#ifndef __KVM_VCPU_H__
+#define __KVM_VCPU_H__
+
+#include <asm/types.h>
+#include <asm/fpu.h>
+#include <asm/processor.h>
+
+#ifndef __ASSEMBLY__
+#include "vti.h"
+
+#include <linux/kvm_host.h>
+#include <linux/spinlock.h>
+
+typedef unsigned long IA64_INST;
+
+typedef union U_IA64_BUNDLE {
+	unsigned long i64[2];
+	struct { unsigned long template:5, slot0:41, slot1a:18,
+		slot1b:23, slot2:41; };
+	/* NOTE: following doesn't work because bitfields can't cross natural
+	   size boundaries
+	   struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; }; */
+} IA64_BUNDLE;
+
+typedef union U_INST64_A5 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5,
+		imm9d:9, s:1, major:4; };
+} INST64_A5;
+
+typedef union U_INST64_B4 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6,
+		wh:2, d:1, un1:1, major:4; };
+} INST64_B4;
+
+typedef union U_INST64_B8 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; };
+} INST64_B8;
+
+typedef union U_INST64_B9 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; };
+} INST64_B9;
+
+typedef union U_INST64_I19 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; };
+} INST64_I19;
+
+typedef union U_INST64_I26 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
+} INST64_I26;
+
+typedef union U_INST64_I27 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4; };
+} INST64_I27;
+
+typedef union U_INST64_I28 { /* not privileged (mov from AR) */
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
+} INST64_I28;
+
+typedef union U_INST64_M28 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M28;
+
+typedef union U_INST64_M29 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M29;
+
+typedef union U_INST64_M30 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, imm:7, ar3:7, x4:4, x2:2,
+		x3:3, s:1, major:4; };
+} INST64_M30;
+
+typedef union U_INST64_M31 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M31;
+
+typedef union U_INST64_M32 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M32;
+
+typedef union U_INST64_M33 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M33;
+
+typedef union U_INST64_M35 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
+
+} INST64_M35;
+
+typedef union U_INST64_M36 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; };
+} INST64_M36;
+
+typedef union U_INST64_M37 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm20a:20, :1, x4:4, x2:2, x3:3,
+		i:1, major:4; };
+} INST64_M37;
+
+typedef union U_INST64_M41 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
+} INST64_M41;
+
+typedef union U_INST64_M42 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M42;
+
+typedef union U_INST64_M43 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M43;
+
+typedef union U_INST64_M44 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; };
+} INST64_M44;
+
+typedef union U_INST64_M45 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M45;
+
+typedef union U_INST64_M46 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6,
+		x3:3, un1:1, major:4; };
+} INST64_M46;
+
+typedef union U_INST64_M47 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; };
+} INST64_M47;
+
+typedef union U_INST64_M1{
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M1;
+
+typedef union U_INST64_M2{
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, r2:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M2;
+
+typedef union U_INST64_M3{
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, imm7:7, r3:7, i:1, hint:2,
+		x6:6, s:1, major:4; };
+} INST64_M3;
+
+typedef union U_INST64_M4 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M4;
+
+typedef union U_INST64_M5 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm7:7, r2:7, r3:7, i:1, hint:2,
+		x6:6, s:1, major:4; };
+} INST64_M5;
+
+typedef union U_INST64_M6 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M6;
+
+typedef union U_INST64_M9 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, f2:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M9;
+
+typedef union U_INST64_M10 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm7:7, f2:7, r3:7, i:1, hint:2,
+		x6:6, s:1, major:4; };
+} INST64_M10;
+
+typedef union U_INST64_M12 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, f1:7, f2:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M12;
+
+typedef union U_INST64_M15 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, imm7:7, r3:7, i:1, hint:2,
+		x6:6, s:1, major:4; };
+} INST64_M15;
+
+typedef union U_INST64 {
+	IA64_INST inst;
+	struct { unsigned long :37, major:4; } generic;
+	INST64_A5 A5;	/* used in build_hypercall_bundle only */
+	INST64_B4 B4;	/* used in build_hypercall_bundle only */
+	INST64_B8 B8;	/* rfi, bsw.[01] */
+	INST64_B9 B9;	/* break.b */
+	INST64_I19 I19;	/* used in build_hypercall_bundle only */
+	INST64_I26 I26;	/* mov register to ar (I unit) */
+	INST64_I27 I27;	/* mov immediate to ar (I unit) */
+	INST64_I28 I28;	/* mov from ar (I unit) */
+	INST64_M1  M1;	/* ld integer */
+	INST64_M2  M2;
+	INST64_M3  M3;
+	INST64_M4  M4;	/* st integer */
+	INST64_M5  M5;
+	INST64_M6  M6;	/* ldfd floating pointer 		*/
+	INST64_M9  M9;	/* stfd floating pointer		*/
+	INST64_M10 M10;	/* stfd floating pointer		*/
+	INST64_M12 M12;     /* ldfd pair floating pointer		*/
+	INST64_M15 M15;	/* lfetch + imm update			*/
+	INST64_M28 M28;	/* purge translation cache entry	*/
+	INST64_M29 M29;	/* mov register to ar (M unit)		*/
+	INST64_M30 M30;	/* mov immediate to ar (M unit)		*/
+	INST64_M31 M31;	/* mov from ar (M unit)			*/
+	INST64_M32 M32;	/* mov reg to cr			*/
+	INST64_M33 M33;	/* mov from cr				*/
+	INST64_M35 M35;	/* mov to psr				*/
+	INST64_M36 M36;	/* mov from psr				*/
+	INST64_M37 M37;	/* break.m				*/
+	INST64_M41 M41;	/* translation cache insert		*/
+	INST64_M42 M42;	/* mov to indirect reg/translation reg insert*/
+	INST64_M43 M43;	/* mov from indirect reg		*/
+	INST64_M44 M44;	/* set/reset system mask		*/
+	INST64_M45 M45;	/* translation purge			*/
+	INST64_M46 M46;	/* translation access (tpa,tak)		*/
+	INST64_M47 M47;	/* purge translation entry		*/
+} INST64;
+
+#define MASK_41 ((unsigned long)0x1ffffffffff)
+
+/* Virtual address memory attributes encoding */
+#define VA_MATTR_WB         0x0
+#define VA_MATTR_UC         0x4
+#define VA_MATTR_UCE        0x5
+#define VA_MATTR_WC         0x6
+#define VA_MATTR_NATPAGE    0x7
+
+#define PMASK(size)         (~((size) - 1))
+#define PSIZE(size)         (1UL<<(size))
+#define CLEARLSB(ppn, nbits)    (((ppn) >> (nbits)) << (nbits))
+#define PAGEALIGN(va, ps)	CLEARLSB(va, ps)
+#define PAGE_FLAGS_RV_MASK   (0x2|(0x3UL<<50)|(((1UL<<11)-1)<<53))
+#define _PAGE_MA_ST     (0x1 <<  2) /* is reserved for software use */
+
+#define ARCH_PAGE_SHIFT   12
+
+#define INVALID_TI_TAG (1UL << 63)
+
+#define VTLB_PTE_P_BIT      0
+#define VTLB_PTE_IO_BIT     60
+#define VTLB_PTE_IO         (1UL<<VTLB_PTE_IO_BIT)
+#define VTLB_PTE_P          (1UL<<VTLB_PTE_P_BIT)
+
+#define vcpu_quick_region_check(_tr_regions,_ifa)		\
+	(_tr_regions & (1 << ((unsigned long)_ifa >> 61)))
+
+#define vcpu_quick_region_set(_tr_regions,_ifa)             \
+	do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0)
+
+static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir,
+		u64 va, u64 rid)
+{
+	trp->page_flags = pte;
+	trp->itir = itir;
+	trp->vadr = va;
+	trp->rid = rid;
+}
+
+extern u64 kvm_get_mpt_entry(u64 gpfn);
+
+/* Return I/ */
+static inline u64 __gpfn_is_io(u64 gpfn)
+{
+	u64  pte;
+	pte = kvm_get_mpt_entry(gpfn);
+	if (!(pte & GPFN_INV_MASK)) {
+		pte = pte & GPFN_IO_MASK;
+		if (pte != GPFN_PHYS_MMIO)
+			return pte;
+	}
+	return 0;
+}
+#endif
+#define IA64_NO_FAULT	0
+#define IA64_FAULT	1
+
+#define VMM_RBS_OFFSET  ((VMM_TASK_SIZE + 15) & ~15)
+
+#define SW_BAD  0   /* Bad mode transitition */
+#define SW_V2P  1   /* Physical emulatino is activated */
+#define SW_P2V  2   /* Exit physical mode emulation */
+#define SW_SELF 3   /* No mode transition */
+#define SW_NOP  4   /* Mode transition, but without action required */
+
+#define GUEST_IN_PHY    0x1
+#define GUEST_PHY_EMUL  0x2
+
+#define current_vcpu ((struct kvm_vcpu *) ia64_getreg(_IA64_REG_TP))
+
+#define VRN_SHIFT	61
+#define VRN_MASK	0xe000000000000000
+#define VRN0		0x0UL
+#define VRN1		0x1UL
+#define VRN2		0x2UL
+#define VRN3		0x3UL
+#define VRN4		0x4UL
+#define VRN5		0x5UL
+#define VRN6		0x6UL
+#define VRN7		0x7UL
+
+#define IRQ_NO_MASKED         0
+#define IRQ_MASKED_BY_VTPR    1
+#define IRQ_MASKED_BY_INSVC   2   /* masked by inservice IRQ */
+
+#define PTA_BASE_SHIFT      15
+
+#define IA64_PSR_VM_BIT     46
+#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT)
+
+/* Interruption Function State */
+#define IA64_IFS_V_BIT      63
+#define IA64_IFS_V  (__IA64_UL(1) << IA64_IFS_V_BIT)
+
+#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX)
+#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX)
+
+#ifndef __ASSEMBLY__
+
+#include <asm/gcc_intrin.h>
+
+#define is_physical_mode(v)		\
+	((v->arch.mode_flags) & GUEST_IN_PHY)
+
+#define is_virtual_mode(v)	\
+	(!is_physical_mode(v))
+
+#define MODE_IND(psr)	\
+	(((psr).it << 2) + ((psr).dt << 1) + (psr).rt)
+
+#ifndef CONFIG_SMP
+#define _vmm_raw_spin_lock(x)	 do {}while(0)
+#define _vmm_raw_spin_unlock(x) do {}while(0)
+#else
+typedef struct {
+	volatile unsigned int lock;
+} vmm_spinlock_t;
+#define _vmm_raw_spin_lock(x)						\
+	do {								\
+		__u32 *ia64_spinlock_ptr = (__u32 *) (x);		\
+		__u64 ia64_spinlock_val;				\
+		ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
+		if (unlikely(ia64_spinlock_val)) {			\
+			do {						\
+				while (*ia64_spinlock_ptr)		\
+				ia64_barrier();				\
+				ia64_spinlock_val =			\
+				ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
+			} while (ia64_spinlock_val);			\
+		}							\
+	} while (0)
+
+#define _vmm_raw_spin_unlock(x)				\
+	do { barrier();				\
+		((vmm_spinlock_t *)x)->lock = 0; } \
+while (0)
+#endif
+
+void vmm_spin_lock(vmm_spinlock_t *lock);
+void vmm_spin_unlock(vmm_spinlock_t *lock);
+enum {
+	I_TLB = 1,
+	D_TLB = 2
+};
+
+union kvm_va {
+	struct {
+		unsigned long off : 60;		/* intra-region offset */
+		unsigned long reg :  4;		/* region number */
+	} f;
+	unsigned long l;
+	void *p;
+};
+
+#define __kvm_pa(x)     ({union kvm_va _v; _v.l = (long) (x);		\
+						_v.f.reg = 0; _v.l; })
+#define __kvm_va(x)     ({union kvm_va _v; _v.l = (long) (x);		\
+				_v.f.reg = -1; _v.p; })
+
+#define _REGION_ID(x)           ({union ia64_rr _v; _v.val = (long)(x); \
+						_v.rid; })
+#define _REGION_PAGE_SIZE(x)    ({union ia64_rr _v; _v.val = (long)(x); \
+						_v.ps; })
+#define _REGION_HW_WALKER(x)    ({union ia64_rr _v; _v.val = (long)(x);	\
+						_v.ve; })
+
+enum vhpt_ref{ DATA_REF, NA_REF, INST_REF, RSE_REF };
+enum tlb_miss_type { INSTRUCTION, DATA, REGISTER };
+
+#define VCPU(_v, _x) ((_v)->arch.vpd->_x)
+#define VMX(_v, _x)  ((_v)->arch._x)
+
+#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.insvc[i])
+#define VLSAPIC_XTP(_v)        VMX(_v, xtp)
+
+static inline unsigned long itir_ps(unsigned long itir)
+{
+	return ((itir >> 2) & 0x3f);
+}
+
+
+/**************************************************************************
+  VCPU control register access routines
+ **************************************************************************/
+
+static inline u64 vcpu_get_itir(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, itir));
+}
+
+static inline void vcpu_set_itir(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, itir) = val;
+}
+
+static inline u64 vcpu_get_ifa(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, ifa));
+}
+
+static inline void vcpu_set_ifa(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, ifa) = val;
+}
+
+static inline u64 vcpu_get_iva(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, iva));
+}
+
+static inline u64 vcpu_get_pta(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, pta));
+}
+
+static inline u64 vcpu_get_lid(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, lid));
+}
+
+static inline u64 vcpu_get_tpr(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, tpr));
+}
+
+static inline u64 vcpu_get_eoi(struct kvm_vcpu *vcpu)
+{
+	return (0UL);		/*reads of eoi always return 0 */
+}
+
+static inline u64 vcpu_get_irr0(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, irr[0]));
+}
+
+static inline u64 vcpu_get_irr1(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, irr[1]));
+}
+
+static inline u64 vcpu_get_irr2(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, irr[2]));
+}
+
+static inline u64 vcpu_get_irr3(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, irr[3]));
+}
+
+static inline void vcpu_set_dcr(struct kvm_vcpu *vcpu, u64 val)
+{
+	ia64_setreg(_IA64_REG_CR_DCR, val);
+}
+
+static inline void vcpu_set_isr(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, isr) = val;
+}
+
+static inline void vcpu_set_lid(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, lid) = val;
+}
+
+static inline void vcpu_set_ipsr(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, ipsr) = val;
+}
+
+static inline void vcpu_set_iip(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, iip) = val;
+}
+
+static inline void vcpu_set_ifs(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, ifs) = val;
+}
+
+static inline void vcpu_set_iipa(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, iipa) = val;
+}
+
+static inline void vcpu_set_iha(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, iha) = val;
+}
+
+
+static inline u64 vcpu_get_rr(struct kvm_vcpu *vcpu, u64 reg)
+{
+	return vcpu->arch.vrr[reg>>61];
+}
+
+/**************************************************************************
+  VCPU debug breakpoint register access routines
+ **************************************************************************/
+
+static inline void vcpu_set_dbr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
+{
+	__ia64_set_dbr(reg, val);
+}
+
+static inline void vcpu_set_ibr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
+{
+	ia64_set_ibr(reg, val);
+}
+
+static inline u64 vcpu_get_dbr(struct kvm_vcpu *vcpu, u64 reg)
+{
+	return ((u64)__ia64_get_dbr(reg));
+}
+
+static inline u64 vcpu_get_ibr(struct kvm_vcpu *vcpu, u64 reg)
+{
+	return ((u64)ia64_get_ibr(reg));
+}
+
+/**************************************************************************
+  VCPU performance monitor register access routines
+ **************************************************************************/
+static inline void vcpu_set_pmc(struct kvm_vcpu *vcpu, u64 reg, u64 val)
+{
+	/* NOTE: Writes to unimplemented PMC registers are discarded */
+	ia64_set_pmc(reg, val);
+}
+
+static inline void vcpu_set_pmd(struct kvm_vcpu *vcpu, u64 reg, u64 val)
+{
+	/* NOTE: Writes to unimplemented PMD registers are discarded */
+	ia64_set_pmd(reg, val);
+}
+
+static inline u64 vcpu_get_pmc(struct kvm_vcpu *vcpu, u64 reg)
+{
+	/* NOTE: Reads from unimplemented PMC registers return zero */
+	return ((u64)ia64_get_pmc(reg));
+}
+
+static inline u64 vcpu_get_pmd(struct kvm_vcpu *vcpu, u64 reg)
+{
+	/* NOTE: Reads from unimplemented PMD registers return zero */
+	return ((u64)ia64_get_pmd(reg));
+}
+
+static inline unsigned long vrrtomrr(unsigned long val)
+{
+	union ia64_rr rr;
+	rr.val = val;
+	rr.rid = (rr.rid << 4) | 0xe;
+	if (rr.ps > PAGE_SHIFT)
+		rr.ps = PAGE_SHIFT;
+	rr.ve = 1;
+	return rr.val;
+}
+
+
+static inline int highest_bits(int *dat)
+{
+	u32  bits, bitnum;
+	int i;
+
+	/* loop for all 256 bits */
+	for (i = 7; i >= 0 ; i--) {
+		bits = dat[i];
+		if (bits) {
+			bitnum = fls(bits);
+			return i * 32 + bitnum - 1;
+		}
+	}
+	return NULL_VECTOR;
+}
+
+/*
+ * The pending irq is higher than the inservice one.
+ *
+ */
+static inline int is_higher_irq(int pending, int inservice)
+{
+	return ((pending > inservice)
+			|| ((pending != NULL_VECTOR)
+				&& (inservice == NULL_VECTOR)));
+}
+
+static inline int is_higher_class(int pending, int mic)
+{
+	return ((pending >> 4) > mic);
+}
+
+/*
+ * Return 0-255 for pending irq.
+ *        NULL_VECTOR: when no pending.
+ */
+static inline int highest_pending_irq(struct kvm_vcpu *vcpu)
+{
+	if (VCPU(vcpu, irr[0]) & (1UL<<NMI_VECTOR))
+		return NMI_VECTOR;
+	if (VCPU(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR))
+		return ExtINT_VECTOR;
+
+	return highest_bits((int *)&VCPU(vcpu, irr[0]));
+}
+
+static inline int highest_inservice_irq(struct kvm_vcpu *vcpu)
+{
+	if (VMX(vcpu, insvc[0]) & (1UL<<NMI_VECTOR))
+		return NMI_VECTOR;
+	if (VMX(vcpu, insvc[0]) & (1UL<<ExtINT_VECTOR))
+		return ExtINT_VECTOR;
+
+	return highest_bits((int *)&(VMX(vcpu, insvc[0])));
+}
+
+extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
+					struct ia64_fpreg *val);
+extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
+					struct ia64_fpreg *val);
+extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg);
+extern void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg,
+			u64 val, int nat);
+extern unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu);
+extern void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val);
+extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr);
+extern void vcpu_bsw0(struct kvm_vcpu *vcpu);
+extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte,
+					u64 itir, u64 va, int type);
+extern struct thash_data *vhpt_lookup(u64 va);
+extern u64 guest_vhpt_lookup(u64 iha, u64 *pte);
+extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps);
+extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps);
+extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va);
+extern void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte,
+		u64 itir, u64 ifa, int type);
+extern void thash_purge_all(struct kvm_vcpu *v);
+extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v,
+						u64 va, int is_data);
+extern int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va,
+						u64 ps, int is_data);
+
+extern void vcpu_increment_iip(struct kvm_vcpu *v);
+extern void vcpu_decrement_iip(struct kvm_vcpu *vcpu);
+extern void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
+extern void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
+extern void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr);
+extern void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr);
+extern void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr);
+extern void nested_dtlb(struct kvm_vcpu *vcpu);
+extern void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr);
+extern int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref);
+
+extern void update_vhpi(struct kvm_vcpu *vcpu, int vec);
+extern int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice);
+
+extern int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle);
+extern void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma);
+extern void vmm_transition(struct kvm_vcpu *vcpu);
+extern void vmm_trampoline(union context *from, union context *to);
+extern int vmm_entry(void);
+extern  u64 vcpu_get_itc(struct kvm_vcpu *vcpu);
+
+extern void vmm_reset_entry(void);
+void kvm_init_vtlb(struct kvm_vcpu *v);
+void kvm_init_vhpt(struct kvm_vcpu *v);
+void thash_init(struct thash_cb *hcb, u64 sz);
+
+void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
+u64 kvm_gpa_to_mpa(u64 gpa);
+extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
+		u64 arg4, u64 arg5, u64 arg6, u64 arg7);
+
+extern long vmm_sanity;
+
+#endif
+#endif	/* __VCPU_H__ */
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
new file mode 100644
index 00000000..f0b9cac8
--- /dev/null
+++ b/arch/ia64/kvm/vmm.c
@@ -0,0 +1,99 @@
+/*
+ * vmm.c: vmm module interface with kvm module
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ *  Xiantao Zhang (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+
+#include<linux/kernel.h>
+#include<linux/module.h>
+#include<asm/fpswa.h>
+
+#include "vcpu.h"
+
+MODULE_AUTHOR("Intel");
+MODULE_LICENSE("GPL");
+
+extern char kvm_ia64_ivt;
+extern char kvm_asm_mov_from_ar;
+extern char kvm_asm_mov_from_ar_sn2;
+extern fpswa_interface_t *vmm_fpswa_interface;
+
+long vmm_sanity = 1;
+
+struct kvm_vmm_info vmm_info = {
+	.module			= THIS_MODULE,
+	.vmm_entry		= vmm_entry,
+	.tramp_entry		= vmm_trampoline,
+	.vmm_ivt		= (unsigned long)&kvm_ia64_ivt,
+	.patch_mov_ar		= (unsigned long)&kvm_asm_mov_from_ar,
+	.patch_mov_ar_sn2	= (unsigned long)&kvm_asm_mov_from_ar_sn2,
+};
+
+static int __init  kvm_vmm_init(void)
+{
+
+	vmm_fpswa_interface = fpswa_interface;
+
+	/*Register vmm data to kvm side*/
+	return kvm_init(&vmm_info, 1024, 0, THIS_MODULE);
+}
+
+static void __exit kvm_vmm_exit(void)
+{
+	kvm_exit();
+	return ;
+}
+
+void vmm_spin_lock(vmm_spinlock_t *lock)
+{
+	_vmm_raw_spin_lock(lock);
+}
+
+void vmm_spin_unlock(vmm_spinlock_t *lock)
+{
+	_vmm_raw_spin_unlock(lock);
+}
+
+static void vcpu_debug_exit(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+	long psr;
+
+	local_irq_save(psr);
+	p->exit_reason = EXIT_REASON_DEBUG;
+	vmm_transition(vcpu);
+	local_irq_restore(psr);
+}
+
+asmlinkage int printk(const char *fmt, ...)
+{
+	struct kvm_vcpu *vcpu = current_vcpu;
+	va_list args;
+	int r;
+
+	memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN);
+	va_start(args, fmt);
+	r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args);
+	va_end(args);
+	vcpu_debug_exit(vcpu);
+	return r;
+}
+
+module_init(kvm_vmm_init)
+module_exit(kvm_vmm_exit)
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
new file mode 100644
index 00000000..24018484
--- /dev/null
+++ b/arch/ia64/kvm/vmm_ivt.S
@@ -0,0 +1,1392 @@
+/*
+ * arch/ia64/kvm/vmm_ivt.S
+ *
+ * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
+ *      Stephane Eranian <eranian@hpl.hp.com>
+ *      David Mosberger <davidm@hpl.hp.com>
+ * Copyright (C) 2000, 2002-2003 Intel Co
+ *      Asit Mallick <asit.k.mallick@intel.com>
+ *      Suresh Siddha <suresh.b.siddha@intel.com>
+ *      Kenneth Chen <kenneth.w.chen@intel.com>
+ *      Fenghua Yu <fenghua.yu@intel.com>
+ *
+ *
+ * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling
+ * for SMP
+ * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB
+ * handler now uses virtual PT.
+ *
+ * 07/6/20 Xuefei Xu  (Anthony Xu) (anthony.xu@intel.com)
+ *              Supporting Intel virtualization architecture
+ *
+ */
+
+/*
+ * This file defines the interruption vector table used by the CPU.
+ * It does not include one entry per possible cause of interruption.
+ *
+ * The first 20 entries of the table contain 64 bundles each while the
+ * remaining 48 entries contain only 16 bundles each.
+ *
+ * The 64 bundles are used to allow inlining the whole handler for
+ * critical
+ * interruptions like TLB misses.
+ *
+ *  For each entry, the comment is as follows:
+ *
+ *              // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss
+ *              (12,51)
+ *  entry offset ----/     /         /                  /
+ *  /
+ *  entry number ---------/         /                  /
+ *  /
+ *  size of the entry -------------/                  /
+ *  /
+ *  vector name -------------------------------------/
+ *  /
+ *  interruptions triggering this vector
+ *  ----------------------/
+ *
+ * The table is 32KB in size and must be aligned on 32KB
+ * boundary.
+ * (The CPU ignores the 15 lower bits of the address)
+ *
+ * Table is based upon EAS2.6 (Oct 1999)
+ */
+
+
+#include <asm/asmmacro.h>
+#include <asm/cache.h>
+#include <asm/pgtable.h>
+
+#include "asm-offsets.h"
+#include "vcpu.h"
+#include "kvm_minstate.h"
+#include "vti.h"
+
+#if 1
+# define PSR_DEFAULT_BITS   psr.ac
+#else
+# define PSR_DEFAULT_BITS   0
+#endif
+
+#define KVM_FAULT(n)    \
+	kvm_fault_##n:;          \
+	mov r19=n;;          \
+	br.sptk.many kvm_vmm_panic;         \
+	;;                  \
+
+#define KVM_REFLECT(n)    \
+	mov r31=pr;           \
+	mov r19=n;       /* prepare to save predicates */ \
+	mov r29=cr.ipsr;      \
+	;;      \
+	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;       \
+(p7)	br.sptk.many kvm_dispatch_reflection;        \
+	br.sptk.many kvm_vmm_panic;      \
+
+GLOBAL_ENTRY(kvm_vmm_panic)
+	KVM_SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,1,0
+	mov out0=r15
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i    // guarantee that interruption collection is on
+	;;
+	(p15) ssm psr.i               // restore psr.
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	KVM_SAVE_REST
+	mov rp=r14
+	;;
+	br.call.sptk.many b6=vmm_panic_handler;
+END(kvm_vmm_panic)
+
+    .section .text..ivt,"ax"
+
+    .align 32768    // align on 32KB boundary
+    .global kvm_ia64_ivt
+kvm_ia64_ivt:
+///////////////////////////////////////////////////////////////
+// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
+ENTRY(kvm_vhpt_miss)
+	KVM_FAULT(0)
+END(kvm_vhpt_miss)
+
+    .org kvm_ia64_ivt+0x400
+////////////////////////////////////////////////////////////////
+// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
+ENTRY(kvm_itlb_miss)
+	mov r31 = pr
+	mov r29=cr.ipsr;
+	;;
+	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p6)	br.sptk kvm_alt_itlb_miss
+	mov r19 = 1
+	br.sptk kvm_itlb_miss_dispatch
+	KVM_FAULT(1);
+END(kvm_itlb_miss)
+
+    .org kvm_ia64_ivt+0x0800
+//////////////////////////////////////////////////////////////////
+// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
+ENTRY(kvm_dtlb_miss)
+	mov r31 = pr
+	mov r29=cr.ipsr;
+	;;
+	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p6)	br.sptk kvm_alt_dtlb_miss
+	br.sptk kvm_dtlb_miss_dispatch
+END(kvm_dtlb_miss)
+
+     .org kvm_ia64_ivt+0x0c00
+////////////////////////////////////////////////////////////////////
+// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
+ENTRY(kvm_alt_itlb_miss)
+	mov r16=cr.ifa    // get address that caused the TLB miss
+	;;
+	movl r17=PAGE_KERNEL
+	mov r24=cr.ipsr
+	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+	;;
+	and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
+	;;
+	or r19=r17,r19      // insert PTE control bits into r19
+	;;
+	movl r20=IA64_GRANULE_SHIFT<<2
+	;;
+	mov cr.itir=r20
+	;;
+	itc.i r19		// insert the TLB entry
+	mov pr=r31,-1
+	rfi
+END(kvm_alt_itlb_miss)
+
+    .org kvm_ia64_ivt+0x1000
+/////////////////////////////////////////////////////////////////////
+// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
+ENTRY(kvm_alt_dtlb_miss)
+	mov r16=cr.ifa		// get address that caused the TLB miss
+	;;
+	movl r17=PAGE_KERNEL
+	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+	mov r24=cr.ipsr
+	;;
+	and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
+	;;
+	or r19=r19,r17	// insert PTE control bits into r19
+	;;
+	movl r20=IA64_GRANULE_SHIFT<<2
+	;;
+	mov cr.itir=r20
+	;;
+	itc.d r19		// insert the TLB entry
+	mov pr=r31,-1
+	rfi
+END(kvm_alt_dtlb_miss)
+
+    .org kvm_ia64_ivt+0x1400
+//////////////////////////////////////////////////////////////////////
+// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
+ENTRY(kvm_nested_dtlb_miss)
+	KVM_FAULT(5)
+END(kvm_nested_dtlb_miss)
+
+    .org kvm_ia64_ivt+0x1800
+/////////////////////////////////////////////////////////////////////
+// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
+ENTRY(kvm_ikey_miss)
+	KVM_REFLECT(6)
+END(kvm_ikey_miss)
+
+    .org kvm_ia64_ivt+0x1c00
+/////////////////////////////////////////////////////////////////////
+// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ENTRY(kvm_dkey_miss)
+	KVM_REFLECT(7)
+END(kvm_dkey_miss)
+
+    .org kvm_ia64_ivt+0x2000
+////////////////////////////////////////////////////////////////////
+// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
+ENTRY(kvm_dirty_bit)
+	KVM_REFLECT(8)
+END(kvm_dirty_bit)
+
+    .org kvm_ia64_ivt+0x2400
+////////////////////////////////////////////////////////////////////
+// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
+ENTRY(kvm_iaccess_bit)
+	KVM_REFLECT(9)
+END(kvm_iaccess_bit)
+
+    .org kvm_ia64_ivt+0x2800
+///////////////////////////////////////////////////////////////////
+// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
+ENTRY(kvm_daccess_bit)
+	KVM_REFLECT(10)
+END(kvm_daccess_bit)
+
+    .org kvm_ia64_ivt+0x2c00
+/////////////////////////////////////////////////////////////////
+// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
+ENTRY(kvm_break_fault)
+	mov r31=pr
+	mov r19=11
+	mov r29=cr.ipsr
+	;;
+	KVM_SAVE_MIN_WITH_COVER_R19
+	;;
+	alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!)
+	mov out0=cr.ifa
+	mov out2=cr.isr     // FIXME: pity to make this slow access twice
+	mov out3=cr.iim     // FIXME: pity to make this slow access twice
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i         // guarantee that interruption collection is on
+	;;
+	(p15)ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	KVM_SAVE_REST
+	mov rp=r14
+	;;
+	adds out1=16,sp
+	br.call.sptk.many b6=kvm_ia64_handle_break
+	;;
+END(kvm_break_fault)
+
+    .org kvm_ia64_ivt+0x3000
+/////////////////////////////////////////////////////////////////
+// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
+ENTRY(kvm_interrupt)
+	mov r31=pr		// prepare to save predicates
+	mov r19=12
+	mov r29=cr.ipsr
+	;;
+	tbit.z p6,p7=r29,IA64_PSR_VM_BIT
+	tbit.z p0,p15=r29,IA64_PSR_I_BIT
+	;;
+(p7)	br.sptk kvm_dispatch_interrupt
+	;;
+	mov r27=ar.rsc		/* M */
+	mov r20=r1			/* A */
+	mov r25=ar.unat		/* M */
+	mov r26=ar.pfs		/* I */
+	mov r28=cr.iip		/* M */
+	cover			/* B (or nothing) */
+	;;
+	mov r1=sp
+	;;
+	invala			/* M */
+	mov r30=cr.ifs
+	;;
+	addl r1=-VMM_PT_REGS_SIZE,r1
+	;;
+	adds r17=2*L1_CACHE_BYTES,r1	/* really: biggest cache-line size */
+	adds r16=PT(CR_IPSR),r1
+	;;
+	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
+	st8 [r16]=r29			/* save cr.ipsr */
+	;;
+	lfetch.fault.excl.nt1 [r17]
+	mov r29=b0
+	;;
+	adds r16=PT(R8),r1  	/* initialize first base pointer */
+	adds r17=PT(R9),r1  	/* initialize second base pointer */
+	mov r18=r0      		/* make sure r18 isn't NaT */
+	;;
+.mem.offset 0,0; st8.spill [r16]=r8,16
+.mem.offset 8,0; st8.spill [r17]=r9,16
+        ;;
+.mem.offset 0,0; st8.spill [r16]=r10,24
+.mem.offset 8,0; st8.spill [r17]=r11,24
+        ;;
+	st8 [r16]=r28,16		/* save cr.iip */
+	st8 [r17]=r30,16		/* save cr.ifs */
+	mov r8=ar.fpsr		/* M */
+	mov r9=ar.csd
+	mov r10=ar.ssd
+	movl r11=FPSR_DEFAULT	/* L-unit */
+	;;
+	st8 [r16]=r25,16		/* save ar.unat */
+	st8 [r17]=r26,16		/* save ar.pfs */
+	shl r18=r18,16		/* compute ar.rsc to be used for "loadrs" */
+	;;
+	st8 [r16]=r27,16		/* save ar.rsc */
+	adds r17=16,r17		/* skip over ar_rnat field */
+	;;
+	st8 [r17]=r31,16		/* save predicates */
+	adds r16=16,r16		/* skip over ar_bspstore field */
+	;;
+	st8 [r16]=r29,16		/* save b0 */
+	st8 [r17]=r18,16		/* save ar.rsc value for "loadrs" */
+	;;
+.mem.offset 0,0; st8.spill [r16]=r20,16    /* save original r1 */
+.mem.offset 8,0; st8.spill [r17]=r12,16
+	adds r12=-16,r1
+	/* switch to kernel memory stack (with 16 bytes of scratch) */
+	;;
+.mem.offset 0,0; st8.spill [r16]=r13,16
+.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
+	;;
+.mem.offset 0,0; st8.spill [r16]=r15,16
+.mem.offset 8,0; st8.spill [r17]=r14,16
+	dep r14=-1,r0,60,4
+	;;
+.mem.offset 0,0; st8.spill [r16]=r2,16
+.mem.offset 8,0; st8.spill [r17]=r3,16
+	adds r2=VMM_PT_REGS_R16_OFFSET,r1
+	adds r14 = VMM_VCPU_GP_OFFSET,r13
+	;;
+	mov r8=ar.ccv
+	ld8 r14 = [r14]
+	;;
+	mov r1=r14       /* establish kernel global pointer */
+	;;                                          \
+	bsw.1
+	;;
+	alloc r14=ar.pfs,0,0,1,0	// must be first in an insn group
+	mov out0=r13
+	;;
+	ssm psr.ic
+	;;
+	srlz.i
+	;;
+	//(p15) ssm psr.i
+	adds r3=8,r2		// set up second base pointer for SAVE_REST
+	srlz.i			// ensure everybody knows psr.ic is back on
+	;;
+.mem.offset 0,0; st8.spill [r2]=r16,16
+.mem.offset 8,0; st8.spill [r3]=r17,16
+	;;
+.mem.offset 0,0; st8.spill [r2]=r18,16
+.mem.offset 8,0; st8.spill [r3]=r19,16
+	;;
+.mem.offset 0,0; st8.spill [r2]=r20,16
+.mem.offset 8,0; st8.spill [r3]=r21,16
+	mov r18=b6
+	;;
+.mem.offset 0,0; st8.spill [r2]=r22,16
+.mem.offset 8,0; st8.spill [r3]=r23,16
+	mov r19=b7
+	;;
+.mem.offset 0,0; st8.spill [r2]=r24,16
+.mem.offset 8,0; st8.spill [r3]=r25,16
+	;;
+.mem.offset 0,0; st8.spill [r2]=r26,16
+.mem.offset 8,0; st8.spill [r3]=r27,16
+	;;
+.mem.offset 0,0; st8.spill [r2]=r28,16
+.mem.offset 8,0; st8.spill [r3]=r29,16
+	;;
+.mem.offset 0,0; st8.spill [r2]=r30,16
+.mem.offset 8,0; st8.spill [r3]=r31,32
+	;;
+	mov ar.fpsr=r11       /* M-unit */
+	st8 [r2]=r8,8         /* ar.ccv */
+	adds r24=PT(B6)-PT(F7),r3
+	;;
+	stf.spill [r2]=f6,32
+	stf.spill [r3]=f7,32
+	;;
+	stf.spill [r2]=f8,32
+	stf.spill [r3]=f9,32
+	;;
+	stf.spill [r2]=f10
+	stf.spill [r3]=f11
+	adds r25=PT(B7)-PT(F11),r3
+	;;
+	st8 [r24]=r18,16       /* b6 */
+	st8 [r25]=r19,16       /* b7 */
+	;;
+	st8 [r24]=r9           /* ar.csd */
+	st8 [r25]=r10          /* ar.ssd */
+	;;
+	srlz.d		// make sure we see the effect of cr.ivr
+	addl r14=@gprel(ia64_leave_nested),gp
+	;;
+	mov rp=r14
+	br.call.sptk.many b6=kvm_ia64_handle_irq
+	;;
+END(kvm_interrupt)
+
+    .global kvm_dispatch_vexirq
+    .org kvm_ia64_ivt+0x3400
+//////////////////////////////////////////////////////////////////////
+// 0x3400 Entry 13 (size 64 bundles) Reserved
+ENTRY(kvm_virtual_exirq)
+	mov r31=pr
+	mov r19=13
+	mov r30 =r0
+	;;
+kvm_dispatch_vexirq:
+	cmp.eq p6,p0 = 1,r30
+	;;
+(p6)	add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
+	;;
+(p6)	ld8 r1 = [r29]
+	;;
+	KVM_SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,1,0
+	mov out0=r13
+
+	ssm psr.ic
+	;;
+	srlz.i // guarantee that interruption collection is on
+	;;
+	(p15) ssm psr.i               // restore psr.i
+	adds r3=8,r2                // set up second base pointer
+	;;
+	KVM_SAVE_REST
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	mov rp=r14
+	br.call.sptk.many b6=kvm_vexirq
+END(kvm_virtual_exirq)
+
+    .org kvm_ia64_ivt+0x3800
+/////////////////////////////////////////////////////////////////////
+// 0x3800 Entry 14 (size 64 bundles) Reserved
+	KVM_FAULT(14)
+	// this code segment is from 2.6.16.13
+
+    .org kvm_ia64_ivt+0x3c00
+///////////////////////////////////////////////////////////////////////
+// 0x3c00 Entry 15 (size 64 bundles) Reserved
+	KVM_FAULT(15)
+
+    .org kvm_ia64_ivt+0x4000
+///////////////////////////////////////////////////////////////////////
+// 0x4000 Entry 16 (size 64 bundles) Reserved
+	KVM_FAULT(16)
+
+    .org kvm_ia64_ivt+0x4400
+//////////////////////////////////////////////////////////////////////
+// 0x4400 Entry 17 (size 64 bundles) Reserved
+	KVM_FAULT(17)
+
+    .org kvm_ia64_ivt+0x4800
+//////////////////////////////////////////////////////////////////////
+// 0x4800 Entry 18 (size 64 bundles) Reserved
+	KVM_FAULT(18)
+
+    .org kvm_ia64_ivt+0x4c00
+//////////////////////////////////////////////////////////////////////
+// 0x4c00 Entry 19 (size 64 bundles) Reserved
+	KVM_FAULT(19)
+
+    .org kvm_ia64_ivt+0x5000
+//////////////////////////////////////////////////////////////////////
+// 0x5000 Entry 20 (size 16 bundles) Page Not Present
+ENTRY(kvm_page_not_present)
+	KVM_REFLECT(20)
+END(kvm_page_not_present)
+
+    .org kvm_ia64_ivt+0x5100
+///////////////////////////////////////////////////////////////////////
+// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
+ENTRY(kvm_key_permission)
+	KVM_REFLECT(21)
+END(kvm_key_permission)
+
+    .org kvm_ia64_ivt+0x5200
+//////////////////////////////////////////////////////////////////////
+// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
+ENTRY(kvm_iaccess_rights)
+	KVM_REFLECT(22)
+END(kvm_iaccess_rights)
+
+    .org kvm_ia64_ivt+0x5300
+//////////////////////////////////////////////////////////////////////
+// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
+ENTRY(kvm_daccess_rights)
+	KVM_REFLECT(23)
+END(kvm_daccess_rights)
+
+    .org kvm_ia64_ivt+0x5400
+/////////////////////////////////////////////////////////////////////
+// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
+ENTRY(kvm_general_exception)
+	KVM_REFLECT(24)
+	KVM_FAULT(24)
+END(kvm_general_exception)
+
+    .org kvm_ia64_ivt+0x5500
+//////////////////////////////////////////////////////////////////////
+// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
+ENTRY(kvm_disabled_fp_reg)
+	KVM_REFLECT(25)
+END(kvm_disabled_fp_reg)
+
+    .org kvm_ia64_ivt+0x5600
+////////////////////////////////////////////////////////////////////
+// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
+ENTRY(kvm_nat_consumption)
+	KVM_REFLECT(26)
+END(kvm_nat_consumption)
+
+    .org kvm_ia64_ivt+0x5700
+/////////////////////////////////////////////////////////////////////
+// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
+ENTRY(kvm_speculation_vector)
+	KVM_REFLECT(27)
+END(kvm_speculation_vector)
+
+    .org kvm_ia64_ivt+0x5800
+/////////////////////////////////////////////////////////////////////
+// 0x5800 Entry 28 (size 16 bundles) Reserved
+	KVM_FAULT(28)
+
+    .org kvm_ia64_ivt+0x5900
+///////////////////////////////////////////////////////////////////
+// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
+ENTRY(kvm_debug_vector)
+	KVM_FAULT(29)
+END(kvm_debug_vector)
+
+    .org kvm_ia64_ivt+0x5a00
+///////////////////////////////////////////////////////////////
+// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
+ENTRY(kvm_unaligned_access)
+	KVM_REFLECT(30)
+END(kvm_unaligned_access)
+
+    .org kvm_ia64_ivt+0x5b00
+//////////////////////////////////////////////////////////////////////
+// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
+ENTRY(kvm_unsupported_data_reference)
+	KVM_REFLECT(31)
+END(kvm_unsupported_data_reference)
+
+    .org kvm_ia64_ivt+0x5c00
+////////////////////////////////////////////////////////////////////
+// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
+ENTRY(kvm_floating_point_fault)
+	KVM_REFLECT(32)
+END(kvm_floating_point_fault)
+
+    .org kvm_ia64_ivt+0x5d00
+/////////////////////////////////////////////////////////////////////
+// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
+ENTRY(kvm_floating_point_trap)
+	KVM_REFLECT(33)
+END(kvm_floating_point_trap)
+
+    .org kvm_ia64_ivt+0x5e00
+//////////////////////////////////////////////////////////////////////
+// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
+ENTRY(kvm_lower_privilege_trap)
+	KVM_REFLECT(34)
+END(kvm_lower_privilege_trap)
+
+    .org kvm_ia64_ivt+0x5f00
+//////////////////////////////////////////////////////////////////////
+// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
+ENTRY(kvm_taken_branch_trap)
+	KVM_REFLECT(35)
+END(kvm_taken_branch_trap)
+
+    .org kvm_ia64_ivt+0x6000
+////////////////////////////////////////////////////////////////////
+// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
+ENTRY(kvm_single_step_trap)
+	KVM_REFLECT(36)
+END(kvm_single_step_trap)
+    .global kvm_virtualization_fault_back
+    .org kvm_ia64_ivt+0x6100
+/////////////////////////////////////////////////////////////////////
+// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
+ENTRY(kvm_virtualization_fault)
+	mov r31=pr
+	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+	;;
+	st8 [r16] = r1
+	adds r17 = VMM_VCPU_GP_OFFSET, r21
+	;;
+	ld8 r1 = [r17]
+	cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
+	cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
+	cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
+	cmp.eq p9,p0=EVENT_RSM,r24
+	cmp.eq p10,p0=EVENT_SSM,r24
+	cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
+	cmp.eq p12,p0=EVENT_THASH,r24
+(p6)	br.dptk.many kvm_asm_mov_from_ar
+(p7)	br.dptk.many kvm_asm_mov_from_rr
+(p8)	br.dptk.many kvm_asm_mov_to_rr
+(p9)	br.dptk.many kvm_asm_rsm
+(p10)	br.dptk.many kvm_asm_ssm
+(p11)	br.dptk.many kvm_asm_mov_to_psr
+(p12)	br.dptk.many kvm_asm_thash
+	;;
+kvm_virtualization_fault_back:
+	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+	;;
+	ld8 r1 = [r16]
+	;;
+	mov r19=37
+	adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
+	adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
+	;;
+	st8 [r16] = r24
+	st8 [r17] = r25
+	;;
+	cmp.ne p6,p0=EVENT_RFI, r24
+(p6)	br.sptk kvm_dispatch_virtualization_fault
+	;;
+	adds r18=VMM_VPD_BASE_OFFSET,r21
+	;;
+	ld8 r18=[r18]
+	;;
+	adds r18=VMM_VPD_VIFS_OFFSET,r18
+	;;
+	ld8 r18=[r18]
+	;;
+	tbit.z p6,p0=r18,63
+(p6)	br.sptk kvm_dispatch_virtualization_fault
+	;;
+//if vifs.v=1 desert current register frame
+	alloc r18=ar.pfs,0,0,0,0
+	br.sptk kvm_dispatch_virtualization_fault
+END(kvm_virtualization_fault)
+
+    .org kvm_ia64_ivt+0x6200
+//////////////////////////////////////////////////////////////
+// 0x6200 Entry 38 (size 16 bundles) Reserved
+	KVM_FAULT(38)
+
+    .org kvm_ia64_ivt+0x6300
+/////////////////////////////////////////////////////////////////
+// 0x6300 Entry 39 (size 16 bundles) Reserved
+	KVM_FAULT(39)
+
+    .org kvm_ia64_ivt+0x6400
+/////////////////////////////////////////////////////////////////
+// 0x6400 Entry 40 (size 16 bundles) Reserved
+	KVM_FAULT(40)
+
+    .org kvm_ia64_ivt+0x6500
+//////////////////////////////////////////////////////////////////
+// 0x6500 Entry 41 (size 16 bundles) Reserved
+	KVM_FAULT(41)
+
+    .org kvm_ia64_ivt+0x6600
+//////////////////////////////////////////////////////////////////
+// 0x6600 Entry 42 (size 16 bundles) Reserved
+	KVM_FAULT(42)
+
+    .org kvm_ia64_ivt+0x6700
+//////////////////////////////////////////////////////////////////
+// 0x6700 Entry 43 (size 16 bundles) Reserved
+	KVM_FAULT(43)
+
+    .org kvm_ia64_ivt+0x6800
+//////////////////////////////////////////////////////////////////
+// 0x6800 Entry 44 (size 16 bundles) Reserved
+	KVM_FAULT(44)
+
+    .org kvm_ia64_ivt+0x6900
+///////////////////////////////////////////////////////////////////
+// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
+//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
+ENTRY(kvm_ia32_exception)
+	KVM_FAULT(45)
+END(kvm_ia32_exception)
+
+    .org kvm_ia64_ivt+0x6a00
+////////////////////////////////////////////////////////////////////
+// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
+ENTRY(kvm_ia32_intercept)
+	KVM_FAULT(47)
+END(kvm_ia32_intercept)
+
+    .org kvm_ia64_ivt+0x6c00
+/////////////////////////////////////////////////////////////////////
+// 0x6c00 Entry 48 (size 16 bundles) Reserved
+	KVM_FAULT(48)
+
+    .org kvm_ia64_ivt+0x6d00
+//////////////////////////////////////////////////////////////////////
+// 0x6d00 Entry 49 (size 16 bundles) Reserved
+	KVM_FAULT(49)
+
+    .org kvm_ia64_ivt+0x6e00
+//////////////////////////////////////////////////////////////////////
+// 0x6e00 Entry 50 (size 16 bundles) Reserved
+	KVM_FAULT(50)
+
+    .org kvm_ia64_ivt+0x6f00
+/////////////////////////////////////////////////////////////////////
+// 0x6f00 Entry 51 (size 16 bundles) Reserved
+	KVM_FAULT(52)
+
+    .org kvm_ia64_ivt+0x7100
+////////////////////////////////////////////////////////////////////
+// 0x7100 Entry 53 (size 16 bundles) Reserved
+	KVM_FAULT(53)
+
+    .org kvm_ia64_ivt+0x7200
+/////////////////////////////////////////////////////////////////////
+// 0x7200 Entry 54 (size 16 bundles) Reserved
+	KVM_FAULT(54)
+
+    .org kvm_ia64_ivt+0x7300
+////////////////////////////////////////////////////////////////////
+// 0x7300 Entry 55 (size 16 bundles) Reserved
+	KVM_FAULT(55)
+
+    .org kvm_ia64_ivt+0x7400
+////////////////////////////////////////////////////////////////////
+// 0x7400 Entry 56 (size 16 bundles) Reserved
+	KVM_FAULT(56)
+
+    .org kvm_ia64_ivt+0x7500
+/////////////////////////////////////////////////////////////////////
+// 0x7500 Entry 57 (size 16 bundles) Reserved
+	KVM_FAULT(57)
+
+    .org kvm_ia64_ivt+0x7600
+/////////////////////////////////////////////////////////////////////
+// 0x7600 Entry 58 (size 16 bundles) Reserved
+	KVM_FAULT(58)
+
+    .org kvm_ia64_ivt+0x7700
+////////////////////////////////////////////////////////////////////
+// 0x7700 Entry 59 (size 16 bundles) Reserved
+	KVM_FAULT(59)
+
+    .org kvm_ia64_ivt+0x7800
+////////////////////////////////////////////////////////////////////
+// 0x7800 Entry 60 (size 16 bundles) Reserved
+	KVM_FAULT(60)
+
+    .org kvm_ia64_ivt+0x7900
+/////////////////////////////////////////////////////////////////////
+// 0x7900 Entry 61 (size 16 bundles) Reserved
+	KVM_FAULT(61)
+
+    .org kvm_ia64_ivt+0x7a00
+/////////////////////////////////////////////////////////////////////
+// 0x7a00 Entry 62 (size 16 bundles) Reserved
+	KVM_FAULT(62)
+
+    .org kvm_ia64_ivt+0x7b00
+/////////////////////////////////////////////////////////////////////
+// 0x7b00 Entry 63 (size 16 bundles) Reserved
+	KVM_FAULT(63)
+
+    .org kvm_ia64_ivt+0x7c00
+////////////////////////////////////////////////////////////////////
+// 0x7c00 Entry 64 (size 16 bundles) Reserved
+	KVM_FAULT(64)
+
+    .org kvm_ia64_ivt+0x7d00
+/////////////////////////////////////////////////////////////////////
+// 0x7d00 Entry 65 (size 16 bundles) Reserved
+	KVM_FAULT(65)
+
+    .org kvm_ia64_ivt+0x7e00
+/////////////////////////////////////////////////////////////////////
+// 0x7e00 Entry 66 (size 16 bundles) Reserved
+	KVM_FAULT(66)
+
+    .org kvm_ia64_ivt+0x7f00
+////////////////////////////////////////////////////////////////////
+// 0x7f00 Entry 67 (size 16 bundles) Reserved
+	KVM_FAULT(67)
+
+    .org kvm_ia64_ivt+0x8000
+// There is no particular reason for this code to be here, other than that
+// there happens to be space here that would go unused otherwise.  If this
+// fault ever gets "unreserved", simply moved the following code to a more
+// suitable spot...
+
+
+ENTRY(kvm_dtlb_miss_dispatch)
+	mov r19 = 2
+	KVM_SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,3,0
+	mov out0=cr.ifa
+	mov out1=r15
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i     // guarantee that interruption collection is on
+	;;
+	(p15) ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
+	;;
+	KVM_SAVE_REST
+	KVM_SAVE_EXTRA
+	mov rp=r14
+	;;
+	adds out2=16,r12
+	br.call.sptk.many b6=kvm_page_fault
+END(kvm_dtlb_miss_dispatch)
+
+ENTRY(kvm_itlb_miss_dispatch)
+
+	KVM_SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,3,0
+	mov out0=cr.ifa
+	mov out1=r15
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i   // guarantee that interruption collection is on
+	;;
+	(p15) ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	KVM_SAVE_REST
+	mov rp=r14
+	;;
+	adds out2=16,r12
+	br.call.sptk.many b6=kvm_page_fault
+END(kvm_itlb_miss_dispatch)
+
+ENTRY(kvm_dispatch_reflection)
+/*
+ * Input:
+ *  psr.ic: off
+ *  r19:    intr type (offset into ivt, see ia64_int.h)
+ *  r31:    contains saved predicates (pr)
+ */
+	KVM_SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,5,0
+	mov out0=cr.ifa
+	mov out1=cr.isr
+	mov out2=cr.iim
+	mov out3=r15
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i   // guarantee that interruption collection is on
+	;;
+	(p15) ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	KVM_SAVE_REST
+	mov rp=r14
+	;;
+	adds out4=16,r12
+	br.call.sptk.many b6=reflect_interruption
+END(kvm_dispatch_reflection)
+
+ENTRY(kvm_dispatch_virtualization_fault)
+	adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
+	adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
+	;;
+	st8 [r16] = r24
+	st8 [r17] = r25
+	;;
+	KVM_SAVE_MIN_WITH_COVER_R19
+	;;
+	alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!)
+	mov out0=r13        //vcpu
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i    // guarantee that interruption collection is on
+	;;
+	(p15) ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
+	;;
+	KVM_SAVE_REST
+	KVM_SAVE_EXTRA
+	mov rp=r14
+	;;
+	adds out1=16,sp         //regs
+	br.call.sptk.many b6=kvm_emulate
+END(kvm_dispatch_virtualization_fault)
+
+
+ENTRY(kvm_dispatch_interrupt)
+	KVM_SAVE_MIN_WITH_COVER_R19	// uses r31; defines r2 and r3
+	;;
+	alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
+	adds r3=8,r2		// set up second base pointer for SAVE_REST
+	;;
+	ssm psr.ic
+	;;
+	srlz.i
+	;;
+	(p15) ssm psr.i
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	KVM_SAVE_REST
+	mov rp=r14
+	;;
+	mov out0=r13		// pass pointer to pt_regs as second arg
+	br.call.sptk.many b6=kvm_ia64_handle_irq
+END(kvm_dispatch_interrupt)
+
+GLOBAL_ENTRY(ia64_leave_nested)
+	rsm psr.i
+	;;
+	adds r21=PT(PR)+16,r12
+	;;
+	lfetch [r21],PT(CR_IPSR)-PT(PR)
+	adds r2=PT(B6)+16,r12
+	adds r3=PT(R16)+16,r12
+	;;
+	lfetch [r21]
+	ld8 r28=[r2],8		// load b6
+	adds r29=PT(R24)+16,r12
+
+	ld8.fill r16=[r3]
+	adds r3=PT(AR_CSD)-PT(R16),r3
+	adds r30=PT(AR_CCV)+16,r12
+	;;
+	ld8.fill r24=[r29]
+	ld8 r15=[r30]		// load ar.ccv
+	;;
+	ld8 r29=[r2],16		// load b7
+	ld8 r30=[r3],16		// load ar.csd
+	;;
+	ld8 r31=[r2],16		// load ar.ssd
+	ld8.fill r8=[r3],16
+	;;
+	ld8.fill r9=[r2],16
+	ld8.fill r10=[r3],PT(R17)-PT(R10)
+	;;
+	ld8.fill r11=[r2],PT(R18)-PT(R11)
+	ld8.fill r17=[r3],16
+	;;
+	ld8.fill r18=[r2],16
+	ld8.fill r19=[r3],16
+	;;
+	ld8.fill r20=[r2],16
+	ld8.fill r21=[r3],16
+	mov ar.csd=r30
+	mov ar.ssd=r31
+	;;
+	rsm psr.i | psr.ic
+	// initiate turning off of interrupt and interruption collection
+	invala			// invalidate ALAT
+	;;
+	srlz.i
+	;;
+	ld8.fill r22=[r2],24
+	ld8.fill r23=[r3],24
+	mov b6=r28
+	;;
+	ld8.fill r25=[r2],16
+	ld8.fill r26=[r3],16
+	mov b7=r29
+	;;
+	ld8.fill r27=[r2],16
+	ld8.fill r28=[r3],16
+	;;
+	ld8.fill r29=[r2],16
+	ld8.fill r30=[r3],24
+	;;
+	ld8.fill r31=[r2],PT(F9)-PT(R31)
+	adds r3=PT(F10)-PT(F6),r3
+	;;
+	ldf.fill f9=[r2],PT(F6)-PT(F9)
+	ldf.fill f10=[r3],PT(F8)-PT(F10)
+	;;
+	ldf.fill f6=[r2],PT(F7)-PT(F6)
+	;;
+	ldf.fill f7=[r2],PT(F11)-PT(F7)
+	ldf.fill f8=[r3],32
+	;;
+	srlz.i			// ensure interruption collection is off
+	mov ar.ccv=r15
+	;;
+	bsw.0	// switch back to bank 0 (no stop bit required beforehand...)
+	;;
+	ldf.fill f11=[r2]
+//	mov r18=r13
+//	mov r21=r13
+	adds r16=PT(CR_IPSR)+16,r12
+	adds r17=PT(CR_IIP)+16,r12
+	;;
+	ld8 r29=[r16],16	// load cr.ipsr
+	ld8 r28=[r17],16	// load cr.iip
+	;;
+	ld8 r30=[r16],16	// load cr.ifs
+	ld8 r25=[r17],16	// load ar.unat
+	;;
+	ld8 r26=[r16],16	// load ar.pfs
+	ld8 r27=[r17],16	// load ar.rsc
+	cmp.eq p9,p0=r0,r0
+	// set p9 to indicate that we should restore cr.ifs
+	;;
+	ld8 r24=[r16],16	// load ar.rnat (may be garbage)
+	ld8 r23=[r17],16// load ar.bspstore (may be garbage)
+	;;
+	ld8 r31=[r16],16	// load predicates
+	ld8 r22=[r17],16	// load b0
+	;;
+	ld8 r19=[r16],16	// load ar.rsc value for "loadrs"
+	ld8.fill r1=[r17],16	// load r1
+	;;
+	ld8.fill r12=[r16],16
+	ld8.fill r13=[r17],16
+	;;
+	ld8 r20=[r16],16	// ar.fpsr
+	ld8.fill r15=[r17],16
+	;;
+	ld8.fill r14=[r16],16
+	ld8.fill r2=[r17]
+	;;
+	ld8.fill r3=[r16]
+	;;
+	mov r16=ar.bsp		// get existing backing store pointer
+	;;
+	mov b0=r22
+	mov ar.pfs=r26
+	mov cr.ifs=r30
+	mov cr.ipsr=r29
+	mov ar.fpsr=r20
+	mov cr.iip=r28
+	;;
+	mov ar.rsc=r27
+	mov ar.unat=r25
+	mov pr=r31,-1
+	rfi
+END(ia64_leave_nested)
+
+GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
+/*
+ * work.need_resched etc. mustn't get changed
+ *by this CPU before it returns to
+ * user- or fsys-mode, hence we disable interrupts early on:
+ */
+	adds r2 = PT(R4)+16,r12
+	adds r3 = PT(R5)+16,r12
+	adds r8 = PT(EML_UNAT)+16,r12
+	;;
+	ld8 r8 = [r8]
+	;;
+	mov ar.unat=r8
+	;;
+	ld8.fill r4=[r2],16    //load r4
+	ld8.fill r5=[r3],16    //load r5
+	;;
+	ld8.fill r6=[r2]    //load r6
+	ld8.fill r7=[r3]    //load r7
+	;;
+END(ia64_leave_hypervisor_prepare)
+//fall through
+GLOBAL_ENTRY(ia64_leave_hypervisor)
+	rsm psr.i
+	;;
+	br.call.sptk.many b0=leave_hypervisor_tail
+	;;
+	adds r20=PT(PR)+16,r12
+	adds r8=PT(EML_UNAT)+16,r12
+	;;
+	ld8 r8=[r8]
+	;;
+	mov ar.unat=r8
+	;;
+	lfetch [r20],PT(CR_IPSR)-PT(PR)
+	adds r2 = PT(B6)+16,r12
+	adds r3 = PT(B7)+16,r12
+	;;
+	lfetch [r20]
+	;;
+	ld8 r24=[r2],16        /* B6 */
+	ld8 r25=[r3],16        /* B7 */
+	;;
+	ld8 r26=[r2],16        /* ar_csd */
+	ld8 r27=[r3],16        /* ar_ssd */
+	mov b6 = r24
+	;;
+	ld8.fill r8=[r2],16
+	ld8.fill r9=[r3],16
+	mov b7 = r25
+	;;
+	mov ar.csd = r26
+	mov ar.ssd = r27
+	;;
+	ld8.fill r10=[r2],PT(R15)-PT(R10)
+	ld8.fill r11=[r3],PT(R14)-PT(R11)
+	;;
+	ld8.fill r15=[r2],PT(R16)-PT(R15)
+	ld8.fill r14=[r3],PT(R17)-PT(R14)
+	;;
+	ld8.fill r16=[r2],16
+	ld8.fill r17=[r3],16
+	;;
+	ld8.fill r18=[r2],16
+	ld8.fill r19=[r3],16
+	;;
+	ld8.fill r20=[r2],16
+	ld8.fill r21=[r3],16
+	;;
+	ld8.fill r22=[r2],16
+	ld8.fill r23=[r3],16
+	;;
+	ld8.fill r24=[r2],16
+	ld8.fill r25=[r3],16
+	;;
+	ld8.fill r26=[r2],16
+	ld8.fill r27=[r3],16
+	;;
+	ld8.fill r28=[r2],16
+	ld8.fill r29=[r3],16
+	;;
+	ld8.fill r30=[r2],PT(F6)-PT(R30)
+	ld8.fill r31=[r3],PT(F7)-PT(R31)
+	;;
+	rsm psr.i | psr.ic
+	// initiate turning off of interrupt and interruption collection
+	invala          // invalidate ALAT
+	;;
+	srlz.i          // ensure interruption collection is off
+	;;
+	bsw.0
+	;;
+	adds r16 = PT(CR_IPSR)+16,r12
+	adds r17 = PT(CR_IIP)+16,r12
+	mov r21=r13		// get current
+	;;
+	ld8 r31=[r16],16    // load cr.ipsr
+	ld8 r30=[r17],16    // load cr.iip
+	;;
+	ld8 r29=[r16],16    // load cr.ifs
+	ld8 r28=[r17],16    // load ar.unat
+	;;
+	ld8 r27=[r16],16    // load ar.pfs
+	ld8 r26=[r17],16    // load ar.rsc
+	;;
+	ld8 r25=[r16],16    // load ar.rnat
+	ld8 r24=[r17],16    // load ar.bspstore
+	;;
+	ld8 r23=[r16],16    // load predicates
+	ld8 r22=[r17],16    // load b0
+	;;
+	ld8 r20=[r16],16    // load ar.rsc value for "loadrs"
+	ld8.fill r1=[r17],16    //load r1
+	;;
+	ld8.fill r12=[r16],16    //load r12
+	ld8.fill r13=[r17],PT(R2)-PT(R13)    //load r13
+	;;
+	ld8 r19=[r16],PT(R3)-PT(AR_FPSR)    //load ar_fpsr
+	ld8.fill r2=[r17],PT(AR_CCV)-PT(R2)    //load r2
+	;;
+	ld8.fill r3=[r16]	//load r3
+	ld8 r18=[r17]	//load ar_ccv
+	;;
+	mov ar.fpsr=r19
+	mov ar.ccv=r18
+	shr.u r18=r20,16
+	;;
+kvm_rbs_switch:
+	mov r19=96
+
+kvm_dont_preserve_current_frame:
+/*
+    * To prevent leaking bits between the hypervisor and guest domain,
+    * we must clear the stacked registers in the "invalid" partition here.
+    * 5 registers/cycle on McKinley).
+    */
+#   define pRecurse	p6
+#   define pReturn	p7
+#   define Nregs	14
+
+	alloc loc0=ar.pfs,2,Nregs-2,2,0
+	shr.u loc1=r18,9	// RNaTslots <= floor(dirtySize / (64*8))
+	sub r19=r19,r18		// r19 = (physStackedSize + 8) - dirtySize
+	;;
+	mov ar.rsc=r20		// load ar.rsc to be used for "loadrs"
+	shladd in0=loc1,3,r19
+	mov in1=0
+	;;
+	TEXT_ALIGN(32)
+kvm_rse_clear_invalid:
+	alloc loc0=ar.pfs,2,Nregs-2,2,0
+	cmp.lt pRecurse,p0=Nregs*8,in0
+	// if more than Nregs regs left to clear, (re)curse
+	add out0=-Nregs*8,in0
+	add out1=1,in1		// increment recursion count
+	mov loc1=0
+	mov loc2=0
+	;;
+	mov loc3=0
+	mov loc4=0
+	mov loc5=0
+	mov loc6=0
+	mov loc7=0
+(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
+	;;
+	mov loc8=0
+	mov loc9=0
+	cmp.ne pReturn,p0=r0,in1
+	// if recursion count != 0, we need to do a br.ret
+	mov loc10=0
+	mov loc11=0
+(pReturn) br.ret.dptk.many b0
+
+#	undef pRecurse
+#	undef pReturn
+
+// loadrs has already been shifted
+	alloc r16=ar.pfs,0,0,0,0    // drop current register frame
+	;;
+	loadrs
+	;;
+	mov ar.bspstore=r24
+	;;
+	mov ar.unat=r28
+	mov ar.rnat=r25
+	mov ar.rsc=r26
+	;;
+	mov cr.ipsr=r31
+	mov cr.iip=r30
+	mov cr.ifs=r29
+	mov ar.pfs=r27
+	adds r18=VMM_VPD_BASE_OFFSET,r21
+	;;
+	ld8 r18=[r18]   //vpd
+	adds r17=VMM_VCPU_ISR_OFFSET,r21
+	;;
+	ld8 r17=[r17]
+	adds r19=VMM_VPD_VPSR_OFFSET,r18
+	;;
+	ld8 r19=[r19]        //vpsr
+	mov r25=r18
+	adds r16= VMM_VCPU_GP_OFFSET,r21
+	;;
+	ld8 r16= [r16] // Put gp in r24
+	movl r24=@gprel(ia64_vmm_entry)  // calculate return address
+	;;
+	add  r24=r24,r16
+	;;
+	br.sptk.many  kvm_vps_sync_write       // call the service
+	;;
+END(ia64_leave_hypervisor)
+// fall through
+GLOBAL_ENTRY(ia64_vmm_entry)
+/*
+ *  must be at bank 0
+ *  parameter:
+ *  r17:cr.isr
+ *  r18:vpd
+ *  r19:vpsr
+ *  r22:b0
+ *  r23:predicate
+ */
+	mov r24=r22
+	mov r25=r18
+	tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT        // p1=vpsr.ic
+(p1) 	br.cond.sptk.few kvm_vps_resume_normal
+(p2)	br.cond.sptk.many kvm_vps_resume_handler
+	;;
+END(ia64_vmm_entry)
+
+/*
+ * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
+ *                  u64 arg3, u64 arg4, u64 arg5,
+ *                  u64 arg6, u64 arg7);
+ *
+ * XXX: The currently defined services use only 4 args at the max. The
+ *  rest are not consumed.
+ */
+GLOBAL_ENTRY(ia64_call_vsa)
+    .regstk 4,4,0,0
+
+rpsave  =   loc0
+pfssave =   loc1
+psrsave =   loc2
+entry   =   loc3
+hostret =   r24
+
+	alloc   pfssave=ar.pfs,4,4,0,0
+	mov rpsave=rp
+	adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
+	;;
+	ld8 entry=[entry]
+1:	mov hostret=ip
+	mov r25=in1         // copy arguments
+	mov r26=in2
+	mov r27=in3
+	mov psrsave=psr
+	;;
+	tbit.nz p6,p0=psrsave,14    // IA64_PSR_I
+	tbit.nz p7,p0=psrsave,13    // IA64_PSR_IC
+	;;
+	add hostret=2f-1b,hostret   // calculate return address
+	add entry=entry,in0
+	;;
+	rsm psr.i | psr.ic
+	;;
+	srlz.i
+	mov b6=entry
+	br.cond.sptk b6         // call the service
+2:
+// Architectural sequence for enabling interrupts if necessary
+(p7)    ssm psr.ic
+	;;
+(p7)    srlz.i
+	;;
+(p6)    ssm psr.i
+	;;
+	mov rp=rpsave
+	mov ar.pfs=pfssave
+	mov r8=r31
+	;;
+	srlz.d
+	br.ret.sptk rp
+
+END(ia64_call_vsa)
+
+#define  INIT_BSPSTORE  ((4<<30)-(12<<20)-0x100)
+
+GLOBAL_ENTRY(vmm_reset_entry)
+	//set up ipsr, iip, vpd.vpsr, dcr
+	// For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
+	// For DCR: all bits 0
+	bsw.0
+	;;
+	mov r21 =r13
+	adds r14=-VMM_PT_REGS_SIZE, r12
+	;;
+	movl r6=0x501008826000      // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
+	movl r10=0x8000000000000000
+	adds r16=PT(CR_IIP), r14
+	adds r20=PT(R1), r14
+	;;
+	rsm psr.ic | psr.i
+	;;
+	srlz.i
+	;;
+	mov ar.rsc = 0
+	;;
+	flushrs
+	;;
+	mov ar.bspstore = 0
+	// clear BSPSTORE
+	;;
+	mov cr.ipsr=r6
+	mov cr.ifs=r10
+	ld8 r4 = [r16] // Set init iip for first run.
+	ld8 r1 = [r20]
+	;;
+	mov cr.iip=r4
+	adds r16=VMM_VPD_BASE_OFFSET,r13
+	;;
+	ld8 r18=[r16]
+	;;
+	adds r19=VMM_VPD_VPSR_OFFSET,r18
+	;;
+	ld8 r19=[r19]
+	mov r17=r0
+	mov r22=r0
+	mov r23=r0
+	br.cond.sptk ia64_vmm_entry
+	br.ret.sptk  b0
+END(vmm_reset_entry)
diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h
new file mode 100644
index 00000000..b214b5b0
--- /dev/null
+++ b/arch/ia64/kvm/vti.h
@@ -0,0 +1,290 @@
+/*
+ * vti.h: prototype for generial vt related interface
+ *   	Copyright (c) 2004, Intel Corporation.
+ *
+ *	Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
+ *	Fred Yang (fred.yang@intel.com)
+ * 	Kun Tian (Kevin Tian) (kevin.tian@intel.com)
+ *
+ *  	Copyright (c) 2007, Intel Corporation.
+ *  	Zhang xiantao <xiantao.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#ifndef _KVM_VT_I_H
+#define _KVM_VT_I_H
+
+#ifndef __ASSEMBLY__
+#include <asm/page.h>
+
+#include <linux/kvm_host.h>
+
+/* define itr.i and itr.d  in ia64_itr function */
+#define	ITR	0x01
+#define	DTR	0x02
+#define	IaDTR	0x03
+
+#define IA64_TR_VMM       6 /*itr6, dtr6 : maps vmm code, vmbuffer*/
+#define IA64_TR_VM_DATA   7 /*dtr7       : maps current vm data*/
+
+#define RR6 (6UL<<61)
+#define RR7 (7UL<<61)
+
+
+/* config_options in pal_vp_init_env */
+#define	VP_INITIALIZE	1UL
+#define	VP_FR_PMC	1UL<<1
+#define	VP_OPCODE	1UL<<8
+#define	VP_CAUSE	1UL<<9
+#define VP_FW_ACC   	1UL<<63
+
+/* init vp env with initializing vm_buffer */
+#define	VP_INIT_ENV_INITALIZE  (VP_INITIALIZE | VP_FR_PMC |\
+	VP_OPCODE | VP_CAUSE | VP_FW_ACC)
+/* init vp env without initializing vm_buffer */
+#define	VP_INIT_ENV  VP_FR_PMC | VP_OPCODE | VP_CAUSE | VP_FW_ACC
+
+#define		PAL_VP_CREATE   265
+/* Stacked Virt. Initializes a new VPD for the operation of
+ * a new virtual processor in the virtual environment.
+ */
+#define		PAL_VP_ENV_INFO 266
+/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/
+#define		PAL_VP_EXIT_ENV 267
+/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/
+#define		PAL_VP_INIT_ENV 268
+/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/
+#define		PAL_VP_REGISTER 269
+/*Stacked Virt. Register a different host IVT for the virtual processor.*/
+#define		PAL_VP_RESUME   270
+/* Renamed from PAL_VP_RESUME */
+#define		PAL_VP_RESTORE  270
+/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/
+#define		PAL_VP_SUSPEND  271
+/* Renamed from PAL_VP_SUSPEND */
+#define		PAL_VP_SAVE	271
+/* Stacked Virt. Suspends operation for the specified virtual processor on
+ * the logical processor.
+ */
+#define		PAL_VP_TERMINATE 272
+/* Stacked Virt. Terminates operation for the specified virtual processor.*/
+
+union vac {
+	unsigned long value;
+	struct {
+		unsigned int a_int:1;
+		unsigned int a_from_int_cr:1;
+		unsigned int a_to_int_cr:1;
+		unsigned int a_from_psr:1;
+		unsigned int a_from_cpuid:1;
+		unsigned int a_cover:1;
+		unsigned int a_bsw:1;
+		long reserved:57;
+	};
+};
+
+union vdc {
+	unsigned long value;
+	struct {
+		unsigned int d_vmsw:1;
+		unsigned int d_extint:1;
+		unsigned int d_ibr_dbr:1;
+		unsigned int d_pmc:1;
+		unsigned int d_to_pmd:1;
+		unsigned int d_itm:1;
+		long reserved:58;
+	};
+};
+
+struct vpd {
+	union vac   vac;
+	union vdc   vdc;
+	unsigned long  virt_env_vaddr;
+	unsigned long  reserved1[29];
+	unsigned long  vhpi;
+	unsigned long  reserved2[95];
+	unsigned long  vgr[16];
+	unsigned long  vbgr[16];
+	unsigned long  vnat;
+	unsigned long  vbnat;
+	unsigned long  vcpuid[5];
+	unsigned long  reserved3[11];
+	unsigned long  vpsr;
+	unsigned long  vpr;
+	unsigned long  reserved4[76];
+	union {
+		unsigned long  vcr[128];
+		struct {
+			unsigned long dcr;
+			unsigned long itm;
+			unsigned long iva;
+			unsigned long rsv1[5];
+			unsigned long pta;
+			unsigned long rsv2[7];
+			unsigned long ipsr;
+			unsigned long isr;
+			unsigned long rsv3;
+			unsigned long iip;
+			unsigned long ifa;
+			unsigned long itir;
+			unsigned long iipa;
+			unsigned long ifs;
+			unsigned long iim;
+			unsigned long iha;
+			unsigned long rsv4[38];
+			unsigned long lid;
+			unsigned long ivr;
+			unsigned long tpr;
+			unsigned long eoi;
+			unsigned long irr[4];
+			unsigned long itv;
+			unsigned long pmv;
+			unsigned long cmcv;
+			unsigned long rsv5[5];
+			unsigned long lrr0;
+			unsigned long lrr1;
+			unsigned long rsv6[46];
+		};
+	};
+	unsigned long  reserved5[128];
+	unsigned long  reserved6[3456];
+	unsigned long  vmm_avail[128];
+	unsigned long  reserved7[4096];
+};
+
+#define PAL_PROC_VM_BIT		(1UL << 40)
+#define PAL_PROC_VMSW_BIT	(1UL << 54)
+
+static inline s64 ia64_pal_vp_env_info(u64 *buffer_size,
+		u64 *vp_env_info)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0);
+	*buffer_size = iprv.v0;
+	*vp_env_info = iprv.v1;
+	return iprv.status;
+}
+
+static inline s64 ia64_pal_vp_exit_env(u64 iva)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0);
+	return iprv.status;
+}
+
+static inline s64 ia64_pal_vp_init_env(u64 config_options, u64 pbase_addr,
+			u64 vbase_addr, u64 *vsa_base)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr,
+			vbase_addr);
+	*vsa_base = iprv.v0;
+
+	return iprv.status;
+}
+
+static inline s64 ia64_pal_vp_restore(u64 *vpd, u64 pal_proc_vector)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0);
+
+	return iprv.status;
+}
+
+static inline s64 ia64_pal_vp_save(u64 *vpd, u64 pal_proc_vector)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0);
+
+	return iprv.status;
+}
+
+#endif
+
+/*VPD field offset*/
+#define VPD_VAC_START_OFFSET		0
+#define VPD_VDC_START_OFFSET		8
+#define VPD_VHPI_START_OFFSET		256
+#define VPD_VGR_START_OFFSET		1024
+#define VPD_VBGR_START_OFFSET		1152
+#define VPD_VNAT_START_OFFSET		1280
+#define VPD_VBNAT_START_OFFSET		1288
+#define VPD_VCPUID_START_OFFSET		1296
+#define VPD_VPSR_START_OFFSET		1424
+#define VPD_VPR_START_OFFSET		1432
+#define VPD_VRSE_CFLE_START_OFFSET	1440
+#define VPD_VCR_START_OFFSET		2048
+#define VPD_VTPR_START_OFFSET		2576
+#define VPD_VRR_START_OFFSET		3072
+#define VPD_VMM_VAIL_START_OFFSET	31744
+
+/*Virtualization faults*/
+
+#define EVENT_MOV_TO_AR			 1
+#define EVENT_MOV_TO_AR_IMM		 2
+#define EVENT_MOV_FROM_AR		 3
+#define EVENT_MOV_TO_CR			 4
+#define EVENT_MOV_FROM_CR		 5
+#define EVENT_MOV_TO_PSR		 6
+#define EVENT_MOV_FROM_PSR		 7
+#define EVENT_ITC_D			 8
+#define EVENT_ITC_I			 9
+#define EVENT_MOV_TO_RR			 10
+#define EVENT_MOV_TO_DBR		 11
+#define EVENT_MOV_TO_IBR		 12
+#define EVENT_MOV_TO_PKR		 13
+#define EVENT_MOV_TO_PMC		 14
+#define EVENT_MOV_TO_PMD		 15
+#define EVENT_ITR_D			 16
+#define EVENT_ITR_I			 17
+#define EVENT_MOV_FROM_RR		 18
+#define EVENT_MOV_FROM_DBR		 19
+#define EVENT_MOV_FROM_IBR		 20
+#define EVENT_MOV_FROM_PKR		 21
+#define EVENT_MOV_FROM_PMC		 22
+#define EVENT_MOV_FROM_CPUID		 23
+#define EVENT_SSM			 24
+#define EVENT_RSM			 25
+#define EVENT_PTC_L			 26
+#define EVENT_PTC_G			 27
+#define EVENT_PTC_GA			 28
+#define EVENT_PTR_D			 29
+#define EVENT_PTR_I			 30
+#define EVENT_THASH			 31
+#define EVENT_TTAG			 32
+#define EVENT_TPA			 33
+#define EVENT_TAK			 34
+#define EVENT_PTC_E			 35
+#define EVENT_COVER			 36
+#define EVENT_RFI			 37
+#define EVENT_BSW_0			 38
+#define EVENT_BSW_1			 39
+#define EVENT_VMSW			 40
+
+/**PAL virtual services offsets */
+#define PAL_VPS_RESUME_NORMAL           0x0000
+#define PAL_VPS_RESUME_HANDLER          0x0400
+#define PAL_VPS_SYNC_READ               0x0800
+#define PAL_VPS_SYNC_WRITE              0x0c00
+#define PAL_VPS_SET_PENDING_INTERRUPT   0x1000
+#define PAL_VPS_THASH                   0x1400
+#define PAL_VPS_TTAG                    0x1800
+#define PAL_VPS_RESTORE                 0x1c00
+#define PAL_VPS_SAVE                    0x2000
+
+#endif/* _VT_I_H*/
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
new file mode 100644
index 00000000..4332f7ee
--- /dev/null
+++ b/arch/ia64/kvm/vtlb.c
@@ -0,0 +1,640 @@
+/*
+ * vtlb.c: guest virtual tlb handling module.
+ * Copyright (c) 2004, Intel Corporation.
+ *  Yaozu Dong (Eddie Dong) <Eddie.dong@intel.com>
+ *  Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *  Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
+ *  Xiantao Zhang <xiantao.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include "vcpu.h"
+
+#include <linux/rwsem.h>
+
+#include <asm/tlb.h>
+
+/*
+ * Check to see if the address rid:va is translated by the TLB
+ */
+
+static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va)
+{
+	return ((trp->p) && (trp->rid == rid)
+				&& ((va-trp->vadr) < PSIZE(trp->ps)));
+}
+
+/*
+ * Only for GUEST TR format.
+ */
+static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva)
+{
+	u64 sa1, ea1;
+
+	if (!trp->p || trp->rid != rid)
+		return 0;
+
+	sa1 = trp->vadr;
+	ea1 = sa1 + PSIZE(trp->ps) - 1;
+	eva -= 1;
+	if ((sva > ea1) || (sa1 > eva))
+		return 0;
+	else
+		return 1;
+
+}
+
+void machine_tlb_purge(u64 va, u64 ps)
+{
+	ia64_ptcl(va, ps << 2);
+}
+
+void local_flush_tlb_all(void)
+{
+	int i, j;
+	unsigned long flags, count0, count1;
+	unsigned long stride0, stride1, addr;
+
+	addr    = current_vcpu->arch.ptce_base;
+	count0  = current_vcpu->arch.ptce_count[0];
+	count1  = current_vcpu->arch.ptce_count[1];
+	stride0 = current_vcpu->arch.ptce_stride[0];
+	stride1 = current_vcpu->arch.ptce_stride[1];
+
+	local_irq_save(flags);
+	for (i = 0; i < count0; ++i) {
+		for (j = 0; j < count1; ++j) {
+			ia64_ptce(addr);
+			addr += stride1;
+		}
+		addr += stride0;
+	}
+	local_irq_restore(flags);
+	ia64_srlz_i();          /* srlz.i implies srlz.d */
+}
+
+int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref)
+{
+	union ia64_rr    vrr;
+	union ia64_pta   vpta;
+	struct  ia64_psr   vpsr;
+
+	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+	vrr.val = vcpu_get_rr(vcpu, vadr);
+	vpta.val = vcpu_get_pta(vcpu);
+
+	if (vrr.ve & vpta.ve) {
+		switch (ref) {
+		case DATA_REF:
+		case NA_REF:
+			return vpsr.dt;
+		case INST_REF:
+			return vpsr.dt && vpsr.it && vpsr.ic;
+		case RSE_REF:
+			return vpsr.dt && vpsr.rt;
+
+		}
+	}
+	return 0;
+}
+
+struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag)
+{
+	u64 index, pfn, rid, pfn_bits;
+
+	pfn_bits = vpta.size - 5 - 8;
+	pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr);
+	rid = _REGION_ID(vrr);
+	index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1));
+	*tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16);
+
+	return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) +
+				(index << 5));
+}
+
+struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type)
+{
+
+	struct thash_data *trp;
+	int  i;
+	u64 rid;
+
+	rid = vcpu_get_rr(vcpu, va);
+	rid = rid & RR_RID_MASK;
+	if (type == D_TLB) {
+		if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
+			for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
+						i < NDTRS; i++, trp++) {
+				if (__is_tr_translated(trp, rid, va))
+					return trp;
+			}
+		}
+	} else {
+		if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
+			for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
+					i < NITRS; i++, trp++) {
+				if (__is_tr_translated(trp, rid, va))
+					return trp;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte)
+{
+	union ia64_rr rr;
+	struct thash_data *head;
+	unsigned long ps, gpaddr;
+
+	ps = itir_ps(itir);
+	rr.val = ia64_get_rr(ifa);
+
+	 gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) |
+					(ifa & ((1UL << ps) - 1));
+
+	head = (struct thash_data *)ia64_thash(ifa);
+	head->etag = INVALID_TI_TAG;
+	ia64_mf();
+	head->page_flags = pte & ~PAGE_FLAGS_RV_MASK;
+	head->itir = rr.ps << 2;
+	head->etag = ia64_ttag(ifa);
+	head->gpaddr = gpaddr;
+}
+
+void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
+{
+	u64 i, dirty_pages = 1;
+	u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
+	vmm_spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
+	void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE;
+
+	dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
+
+	vmm_spin_lock(lock);
+	for (i = 0; i < dirty_pages; i++) {
+		/* avoid RMW */
+		if (!test_bit(base_gfn + i, dirty_bitmap))
+			set_bit(base_gfn + i , dirty_bitmap);
+	}
+	vmm_spin_unlock(lock);
+}
+
+void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type)
+{
+	u64 phy_pte, psr;
+	union ia64_rr mrr;
+
+	mrr.val = ia64_get_rr(va);
+	phy_pte = translate_phy_pte(&pte, itir, va);
+
+	if (itir_ps(itir) >= mrr.ps) {
+		vhpt_insert(phy_pte, itir, va, pte);
+	} else {
+		phy_pte  &= ~PAGE_FLAGS_RV_MASK;
+		psr = ia64_clear_ic();
+		ia64_itc(type, va, phy_pte, itir_ps(itir));
+		paravirt_dv_serialize_data();
+		ia64_set_psr(psr);
+	}
+
+	if (!(pte&VTLB_PTE_IO))
+		mark_pages_dirty(v, pte, itir_ps(itir));
+}
+
+/*
+ *   vhpt lookup
+ */
+struct thash_data *vhpt_lookup(u64 va)
+{
+	struct thash_data *head;
+	u64 tag;
+
+	head = (struct thash_data *)ia64_thash(va);
+	tag = ia64_ttag(va);
+	if (head->etag == tag)
+		return head;
+	return NULL;
+}
+
+u64 guest_vhpt_lookup(u64 iha, u64 *pte)
+{
+	u64 ret;
+	struct thash_data *data;
+
+	data = __vtr_lookup(current_vcpu, iha, D_TLB);
+	if (data != NULL)
+		thash_vhpt_insert(current_vcpu, data->page_flags,
+			data->itir, iha, D_TLB);
+
+	asm volatile ("rsm psr.ic|psr.i;;"
+			"srlz.d;;"
+			"ld8.s r9=[%1];;"
+			"tnat.nz p6,p7=r9;;"
+			"(p6) mov %0=1;"
+			"(p6) mov r9=r0;"
+			"(p7) extr.u r9=r9,0,53;;"
+			"(p7) mov %0=r0;"
+			"(p7) st8 [%2]=r9;;"
+			"ssm psr.ic;;"
+			"srlz.d;;"
+			"ssm psr.i;;"
+			"srlz.d;;"
+			: "=r"(ret) : "r"(iha), "r"(pte):"memory");
+
+	return ret;
+}
+
+/*
+ *  purge software guest tlb
+ */
+
+static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps)
+{
+	struct thash_data *cur;
+	u64 start, curadr, size, psbits, tag, rr_ps, num;
+	union ia64_rr vrr;
+	struct thash_cb *hcb = &v->arch.vtlb;
+
+	vrr.val = vcpu_get_rr(v, va);
+	psbits = VMX(v, psbits[(va >> 61)]);
+	start = va & ~((1UL << ps) - 1);
+	while (psbits) {
+		curadr = start;
+		rr_ps = __ffs(psbits);
+		psbits &= ~(1UL << rr_ps);
+		num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps));
+		size = PSIZE(rr_ps);
+		vrr.ps = rr_ps;
+		while (num) {
+			cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag);
+			if (cur->etag == tag && cur->ps == rr_ps)
+				cur->etag = INVALID_TI_TAG;
+			curadr += size;
+			num--;
+		}
+	}
+}
+
+
+/*
+ *  purge VHPT and machine TLB
+ */
+static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps)
+{
+	struct thash_data *cur;
+	u64 start, size, tag, num;
+	union ia64_rr rr;
+
+	start = va & ~((1UL << ps) - 1);
+	rr.val = ia64_get_rr(va);
+	size = PSIZE(rr.ps);
+	num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps));
+	while (num) {
+		cur = (struct thash_data *)ia64_thash(start);
+		tag = ia64_ttag(start);
+		if (cur->etag == tag)
+			cur->etag = INVALID_TI_TAG;
+		start += size;
+		num--;
+	}
+	machine_tlb_purge(va, ps);
+}
+
+/*
+ * Insert an entry into hash TLB or VHPT.
+ * NOTES:
+ *  1: When inserting VHPT to thash, "va" is a must covered
+ *  address by the inserted machine VHPT entry.
+ *  2: The format of entry is always in TLB.
+ *  3: The caller need to make sure the new entry will not overlap
+ *     with any existed entry.
+ */
+void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va)
+{
+	struct thash_data *head;
+	union ia64_rr vrr;
+	u64 tag;
+	struct thash_cb *hcb = &v->arch.vtlb;
+
+	vrr.val = vcpu_get_rr(v, va);
+	vrr.ps = itir_ps(itir);
+	VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps);
+	head = vsa_thash(hcb->pta, va, vrr.val, &tag);
+	head->page_flags = pte;
+	head->itir = itir;
+	head->etag = tag;
+}
+
+int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type)
+{
+	struct thash_data  *trp;
+	int  i;
+	u64 end, rid;
+
+	rid = vcpu_get_rr(vcpu, va);
+	rid = rid & RR_RID_MASK;
+	end = va + PSIZE(ps);
+	if (type == D_TLB) {
+		if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
+			for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
+					i < NDTRS; i++, trp++) {
+				if (__is_tr_overlap(trp, rid, va, end))
+					return i;
+			}
+		}
+	} else {
+		if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
+			for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
+					i < NITRS; i++, trp++) {
+				if (__is_tr_overlap(trp, rid, va, end))
+					return i;
+			}
+		}
+	}
+	return -1;
+}
+
+/*
+ * Purge entries in VTLB and VHPT
+ */
+void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps)
+{
+	if (vcpu_quick_region_check(v->arch.tc_regions, va))
+		vtlb_purge(v, va, ps);
+	vhpt_purge(v, va, ps);
+}
+
+void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps)
+{
+	u64 old_va = va;
+	va = REGION_OFFSET(va);
+	if (vcpu_quick_region_check(v->arch.tc_regions, old_va))
+		vtlb_purge(v, va, ps);
+	vhpt_purge(v, va, ps);
+}
+
+u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
+{
+	u64 ps, ps_mask, paddr, maddr, io_mask;
+	union pte_flags phy_pte;
+
+	ps = itir_ps(itir);
+	ps_mask = ~((1UL << ps) - 1);
+	phy_pte.val = *pte;
+	paddr = *pte;
+	paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask);
+	maddr = kvm_get_mpt_entry(paddr >> PAGE_SHIFT);
+	io_mask = maddr & GPFN_IO_MASK;
+	if (io_mask && (io_mask != GPFN_PHYS_MMIO)) {
+		*pte |= VTLB_PTE_IO;
+		return -1;
+	}
+	maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) |
+					(paddr & ~PAGE_MASK);
+	phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT;
+	return phy_pte.val;
+}
+
+/*
+ * Purge overlap TCs and then insert the new entry to emulate itc ops.
+ * Notes: Only TC entry can purge and insert.
+ */
+void  thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
+						u64 ifa, int type)
+{
+	u64 ps;
+	u64 phy_pte, io_mask, index;
+	union ia64_rr vrr, mrr;
+
+	ps = itir_ps(itir);
+	vrr.val = vcpu_get_rr(v, ifa);
+	mrr.val = ia64_get_rr(ifa);
+
+	index = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
+	io_mask = kvm_get_mpt_entry(index) & GPFN_IO_MASK;
+	phy_pte = translate_phy_pte(&pte, itir, ifa);
+
+	/* Ensure WB attribute if pte is related to a normal mem page,
+	 * which is required by vga acceleration since qemu maps shared
+	 * vram buffer with WB.
+	 */
+	if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT) &&
+			io_mask != GPFN_PHYS_MMIO) {
+		pte &= ~_PAGE_MA_MASK;
+		phy_pte &= ~_PAGE_MA_MASK;
+	}
+
+	vtlb_purge(v, ifa, ps);
+	vhpt_purge(v, ifa, ps);
+
+	if ((ps != mrr.ps) || (pte & VTLB_PTE_IO)) {
+		vtlb_insert(v, pte, itir, ifa);
+		vcpu_quick_region_set(VMX(v, tc_regions), ifa);
+	}
+	if (pte & VTLB_PTE_IO)
+		return;
+
+	if (ps >= mrr.ps)
+		vhpt_insert(phy_pte, itir, ifa, pte);
+	else {
+		u64 psr;
+		phy_pte  &= ~PAGE_FLAGS_RV_MASK;
+		psr = ia64_clear_ic();
+		ia64_itc(type, ifa, phy_pte, ps);
+		paravirt_dv_serialize_data();
+		ia64_set_psr(psr);
+	}
+	if (!(pte&VTLB_PTE_IO))
+		mark_pages_dirty(v, pte, ps);
+
+}
+
+/*
+ * Purge all TCs or VHPT entries including those in Hash table.
+ *
+ */
+
+void thash_purge_all(struct kvm_vcpu *v)
+{
+	int i;
+	struct thash_data *head;
+	struct thash_cb  *vtlb, *vhpt;
+	vtlb = &v->arch.vtlb;
+	vhpt = &v->arch.vhpt;
+
+	for (i = 0; i < 8; i++)
+		VMX(v, psbits[i]) = 0;
+
+	head = vtlb->hash;
+	for (i = 0; i < vtlb->num; i++) {
+		head->page_flags = 0;
+		head->etag = INVALID_TI_TAG;
+		head->itir = 0;
+		head->next = 0;
+		head++;
+	};
+
+	head = vhpt->hash;
+	for (i = 0; i < vhpt->num; i++) {
+		head->page_flags = 0;
+		head->etag = INVALID_TI_TAG;
+		head->itir = 0;
+		head->next = 0;
+		head++;
+	};
+
+	local_flush_tlb_all();
+}
+
+/*
+ * Lookup the hash table and its collision chain to find an entry
+ * covering this address rid:va or the entry.
+ *
+ * INPUT:
+ *  in: TLB format for both VHPT & TLB.
+ */
+struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
+{
+	struct thash_data  *cch;
+	u64    psbits, ps, tag;
+	union ia64_rr vrr;
+
+	struct thash_cb *hcb = &v->arch.vtlb;
+
+	cch = __vtr_lookup(v, va, is_data);
+	if (cch)
+		return cch;
+
+	if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0)
+		return NULL;
+
+	psbits = VMX(v, psbits[(va >> 61)]);
+	vrr.val = vcpu_get_rr(v, va);
+	while (psbits) {
+		ps = __ffs(psbits);
+		psbits &= ~(1UL << ps);
+		vrr.ps = ps;
+		cch = vsa_thash(hcb->pta, va, vrr.val, &tag);
+		if (cch->etag == tag && cch->ps == ps)
+			return cch;
+	}
+
+	return NULL;
+}
+
+/*
+ * Initialize internal control data before service.
+ */
+void thash_init(struct thash_cb *hcb, u64 sz)
+{
+	int i;
+	struct thash_data *head;
+
+	hcb->pta.val = (unsigned long)hcb->hash;
+	hcb->pta.vf = 1;
+	hcb->pta.ve = 1;
+	hcb->pta.size = sz;
+	head = hcb->hash;
+	for (i = 0; i < hcb->num; i++) {
+		head->page_flags = 0;
+		head->itir = 0;
+		head->etag = INVALID_TI_TAG;
+		head->next = 0;
+		head++;
+	}
+}
+
+u64 kvm_get_mpt_entry(u64 gpfn)
+{
+	u64 *base = (u64 *) KVM_P2M_BASE;
+
+	if (gpfn >= (KVM_P2M_SIZE >> 3))
+		panic_vm(current_vcpu, "Invalid gpfn =%lx\n", gpfn);
+
+	return *(base + gpfn);
+}
+
+u64 kvm_lookup_mpa(u64 gpfn)
+{
+	u64 maddr;
+	maddr = kvm_get_mpt_entry(gpfn);
+	return maddr&_PAGE_PPN_MASK;
+}
+
+u64 kvm_gpa_to_mpa(u64 gpa)
+{
+	u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT);
+	return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK);
+}
+
+/*
+ * Fetch guest bundle code.
+ * INPUT:
+ *  gip: guest ip
+ *  pbundle: used to return fetched bundle.
+ */
+int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle)
+{
+	u64     gpip = 0;   /* guest physical IP*/
+	u64     *vpa;
+	struct thash_data    *tlb;
+	u64     maddr;
+
+	if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) {
+		/* I-side physical mode */
+		gpip = gip;
+	} else {
+		tlb = vtlb_lookup(vcpu, gip, I_TLB);
+		if (tlb)
+			gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) |
+				(gip & (PSIZE(tlb->ps) - 1));
+	}
+	if (gpip) {
+		maddr = kvm_gpa_to_mpa(gpip);
+	} else {
+		tlb = vhpt_lookup(gip);
+		if (tlb == NULL) {
+			ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2);
+			return IA64_FAULT;
+		}
+		maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps)
+					| (gip & (PSIZE(tlb->ps) - 1));
+	}
+	vpa = (u64 *)__kvm_va(maddr);
+
+	pbundle->i64[0] = *vpa++;
+	pbundle->i64[1] = *vpa;
+
+	return IA64_NO_FAULT;
+}
+
+void kvm_init_vhpt(struct kvm_vcpu *v)
+{
+	v->arch.vhpt.num = VHPT_NUM_ENTRIES;
+	thash_init(&v->arch.vhpt, VHPT_SHIFT);
+	ia64_set_pta(v->arch.vhpt.pta.val);
+	/*Enable VHPT here?*/
+}
+
+void kvm_init_vtlb(struct kvm_vcpu *v)
+{
+	v->arch.vtlb.num = VTLB_NUM_ENTRIES;
+	thash_init(&v->arch.vtlb, VTLB_SHIFT);
+}
diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile
new file mode 100644
index 00000000..98771e2a
--- /dev/null
+++ b/arch/ia64/lib/Makefile
@@ -0,0 +1,50 @@
+#
+# Makefile for ia64-specific library routines..
+#
+
+obj-y := io.o
+
+lib-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o			\
+	__divdi3.o __udivdi3.o __moddi3.o __umoddi3.o			\
+	checksum.o clear_page.o csum_partial_copy.o			\
+	clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o	\
+	flush.o ip_fast_csum.o do_csum.o				\
+	memset.o strlen.o xor.o
+
+obj-$(CONFIG_ITANIUM)	+= copy_page.o copy_user.o memcpy.o
+obj-$(CONFIG_MCKINLEY)	+= copy_page_mck.o memcpy_mck.o
+lib-$(CONFIG_PERFMON)	+= carta_random.o
+
+AFLAGS___divdi3.o	=
+AFLAGS___udivdi3.o	= -DUNSIGNED
+AFLAGS___moddi3.o	= 	     -DMODULO
+AFLAGS___umoddi3.o	= -DUNSIGNED -DMODULO
+
+AFLAGS___divsi3.o	=
+AFLAGS___udivsi3.o	= -DUNSIGNED
+AFLAGS___modsi3.o	=	     -DMODULO
+AFLAGS___umodsi3.o	= -DUNSIGNED -DMODULO
+
+$(obj)/__divdi3.o: $(src)/idiv64.S FORCE
+	$(call if_changed_dep,as_o_S)
+
+$(obj)/__udivdi3.o: $(src)/idiv64.S FORCE
+	$(call if_changed_dep,as_o_S)
+
+$(obj)/__moddi3.o: $(src)/idiv64.S FORCE
+	$(call if_changed_dep,as_o_S)
+
+$(obj)/__umoddi3.o: $(src)/idiv64.S FORCE
+	$(call if_changed_dep,as_o_S)
+
+$(obj)/__divsi3.o: $(src)/idiv32.S FORCE
+	$(call if_changed_dep,as_o_S)
+
+$(obj)/__udivsi3.o: $(src)/idiv32.S FORCE
+	$(call if_changed_dep,as_o_S)
+
+$(obj)/__modsi3.o: $(src)/idiv32.S FORCE
+	$(call if_changed_dep,as_o_S)
+
+$(obj)/__umodsi3.o: $(src)/idiv32.S FORCE
+	$(call if_changed_dep,as_o_S)
diff --git a/arch/ia64/lib/carta_random.S b/arch/ia64/lib/carta_random.S
new file mode 100644
index 00000000..d0674c36
--- /dev/null
+++ b/arch/ia64/lib/carta_random.S
@@ -0,0 +1,54 @@
+/*
+ * Fast, simple, yet decent quality random number generator based on
+ * a paper by David G. Carta ("Two Fast Implementations of the
+ * `Minimal Standard' Random Number Generator," Communications of the
+ * ACM, January, 1990).
+ *
+ * Copyright (C) 2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <asm/asmmacro.h>
+
+#define a	r2
+#define m	r3
+#define lo	r8
+#define hi	r9
+#define t0	r16
+#define t1	r17
+#define	seed	r32
+
+GLOBAL_ENTRY(carta_random32)
+	movl	a = (16807 << 16) | 16807
+	;;
+	pmpyshr2.u t0 = a, seed, 0
+	pmpyshr2.u t1 = a, seed, 16
+	;;
+	unpack2.l t0 = t1, t0
+	dep	m = -1, r0, 0, 31
+	;;
+	zxt4	lo = t0
+	shr.u	hi = t0, 32
+	;;
+	dep	t0 = 0, hi, 15, 49	// t0 = (hi & 0x7fff)
+	;;
+	shl	t0 = t0, 16		// t0 = (hi & 0x7fff) << 16
+	shr	t1 = hi, 15		// t1 = (hi >> 15)
+	;;
+	add	lo = lo, t0
+	;;
+	cmp.gtu	p6, p0 = lo, m
+	;;
+(p6)	and	lo = lo, m
+	;;
+(p6)	add	lo = 1, lo
+	;;
+	add	lo = lo, t1
+	;;
+	cmp.gtu p6, p0 = lo, m
+	;;
+(p6)	and	lo = lo, m
+	;;
+(p6)	add	lo = 1, lo
+	br.ret.sptk.many rp
+END(carta_random32)
diff --git a/arch/ia64/lib/checksum.c b/arch/ia64/lib/checksum.c
new file mode 100644
index 00000000..9fc95502
--- /dev/null
+++ b/arch/ia64/lib/checksum.c
@@ -0,0 +1,101 @@
+/*
+ * Network checksum routines
+ *
+ * Copyright (C) 1999, 2003 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * Most of the code coming from arch/alpha/lib/checksum.c
+ *
+ * This file contains network checksum routines that are better done
+ * in an architecture-specific manner due to speed..
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <asm/byteorder.h>
+
+static inline unsigned short
+from64to16 (unsigned long x)
+{
+	/* add up 32-bit words for 33 bits */
+	x = (x & 0xffffffff) + (x >> 32);
+	/* add up 16-bit and 17-bit words for 17+c bits */
+	x = (x & 0xffff) + (x >> 16);
+	/* add up 16-bit and 2-bit for 16+c bit */
+	x = (x & 0xffff) + (x >> 16);
+	/* add up carry.. */
+	x = (x & 0xffff) + (x >> 16);
+	return x;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented.
+ */
+__sum16
+csum_tcpudp_magic (__be32 saddr, __be32 daddr, unsigned short len,
+		   unsigned short proto, __wsum sum)
+{
+	return (__force __sum16)~from64to16(
+		(__force u64)saddr + (__force u64)daddr +
+		(__force u64)sum + ((len + proto) << 8));
+}
+
+EXPORT_SYMBOL(csum_tcpudp_magic);
+
+__wsum
+csum_tcpudp_nofold (__be32 saddr, __be32 daddr, unsigned short len,
+		    unsigned short proto, __wsum sum)
+{
+	unsigned long result;
+
+	result = (__force u64)saddr + (__force u64)daddr +
+		 (__force u64)sum + ((len + proto) << 8);
+
+	/* Fold down to 32-bits so we don't lose in the typedef-less network stack.  */
+	/* 64 to 33 */
+	result = (result & 0xffffffff) + (result >> 32);
+	/* 33 to 32 */
+	result = (result & 0xffffffff) + (result >> 32);
+	return (__force __wsum)result;
+}
+EXPORT_SYMBOL(csum_tcpudp_nofold);
+
+extern unsigned long do_csum (const unsigned char *, long);
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+__wsum csum_partial(const void *buff, int len, __wsum sum)
+{
+	u64 result = do_csum(buff, len);
+
+	/* add in old sum, and carry.. */
+	result += (__force u32)sum;
+	/* 32+c bits -> 32 bits */
+	result = (result & 0xffffffff) + (result >> 32);
+	return (__force __wsum)result;
+}
+
+EXPORT_SYMBOL(csum_partial);
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+__sum16 ip_compute_csum (const void *buff, int len)
+{
+	return (__force __sum16)~do_csum(buff,len);
+}
+
+EXPORT_SYMBOL(ip_compute_csum);
diff --git a/arch/ia64/lib/clear_page.S b/arch/ia64/lib/clear_page.S
new file mode 100644
index 00000000..2d814e7e
--- /dev/null
+++ b/arch/ia64/lib/clear_page.S
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 1999-2002 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
+ *
+ * 1/06/01 davidm	Tuned for Itanium.
+ * 2/12/02 kchen	Tuned for both Itanium and McKinley
+ * 3/08/02 davidm	Some more tweaking
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_ITANIUM
+# define L3_LINE_SIZE	64	// Itanium L3 line size
+# define PREFETCH_LINES	9	// magic number
+#else
+# define L3_LINE_SIZE	128	// McKinley L3 line size
+# define PREFETCH_LINES	12	// magic number
+#endif
+
+#define saved_lc	r2
+#define dst_fetch	r3
+#define dst1		r8
+#define dst2		r9
+#define dst3		r10
+#define dst4		r11
+
+#define dst_last	r31
+
+GLOBAL_ENTRY(clear_page)
+	.prologue
+	.regstk 1,0,0,0
+	mov r16 = PAGE_SIZE/L3_LINE_SIZE-1	// main loop count, -1=repeat/until
+	.save ar.lc, saved_lc
+	mov saved_lc = ar.lc
+
+	.body
+	mov ar.lc = (PREFETCH_LINES - 1)
+	mov dst_fetch = in0
+	adds dst1 = 16, in0
+	adds dst2 = 32, in0
+	;;
+.fetch:	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
+	adds dst3 = 48, in0		// executing this multiple times is harmless
+	br.cloop.sptk.few .fetch
+	;;
+	addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch
+	mov ar.lc = r16			// one L3 line per iteration
+	adds dst4 = 64, in0
+	;;
+#ifdef CONFIG_ITANIUM
+	// Optimized for Itanium
+1:	stf.spill.nta [dst1] = f0, 64
+	stf.spill.nta [dst2] = f0, 64
+	cmp.lt p8,p0=dst_fetch, dst_last
+	;;
+#else
+	// Optimized for McKinley
+1:	stf.spill.nta [dst1] = f0, 64
+	stf.spill.nta [dst2] = f0, 64
+	stf.spill.nta [dst3] = f0, 64
+	stf.spill.nta [dst4] = f0, 128
+	cmp.lt p8,p0=dst_fetch, dst_last
+	;;
+	stf.spill.nta [dst1] = f0, 64
+	stf.spill.nta [dst2] = f0, 64
+#endif
+	stf.spill.nta [dst3] = f0, 64
+(p8)	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
+	br.cloop.sptk.few 1b
+	;;
+	mov ar.lc = saved_lc		// restore lc
+	br.ret.sptk.many rp
+END(clear_page)
diff --git a/arch/ia64/lib/clear_user.S b/arch/ia64/lib/clear_user.S
new file mode 100644
index 00000000..eecd8577
--- /dev/null
+++ b/arch/ia64/lib/clear_user.S
@@ -0,0 +1,209 @@
+/*
+ * This routine clears to zero a linear memory buffer in user space.
+ *
+ * Inputs:
+ *	in0:	address of buffer
+ *	in1:	length of buffer in bytes
+ * Outputs:
+ *	r8:	number of bytes that didn't get cleared due to a fault
+ *
+ * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ */
+
+#include <asm/asmmacro.h>
+
+//
+// arguments
+//
+#define buf		r32
+#define len		r33
+
+//
+// local registers
+//
+#define cnt		r16
+#define buf2		r17
+#define saved_lc	r18
+#define saved_pfs	r19
+#define tmp		r20
+#define len2		r21
+#define len3		r22
+
+//
+// Theory of operations:
+//	- we check whether or not the buffer is small, i.e., less than 17
+//	  in which case we do the byte by byte loop.
+//
+//	- Otherwise we go progressively from 1 byte store to 8byte store in
+//	  the head part, the body is a 16byte store loop and we finish we the
+//	  tail for the last 15 bytes.
+//	  The good point about this breakdown is that the long buffer handling
+//	  contains only 2 branches.
+//
+//	The reason for not using shifting & masking for both the head and the
+//	tail is to stay semantically correct. This routine is not supposed
+//	to write bytes outside of the buffer. While most of the time this would
+//	be ok, we can't tolerate a mistake. A classical example is the case
+//	of multithreaded code were to the extra bytes touched is actually owned
+//	by another thread which runs concurrently to ours. Another, less likely,
+//	example is with device drivers where reading an I/O mapped location may
+//	have side effects (same thing for writing).
+//
+
+GLOBAL_ENTRY(__do_clear_user)
+	.prologue
+	.save ar.pfs, saved_pfs
+	alloc	saved_pfs=ar.pfs,2,0,0,0
+	cmp.eq p6,p0=r0,len		// check for zero length
+	.save ar.lc, saved_lc
+	mov saved_lc=ar.lc		// preserve ar.lc (slow)
+	.body
+	;;				// avoid WAW on CFM
+	adds tmp=-1,len			// br.ctop is repeat/until
+	mov ret0=len			// return value is length at this point
+(p6)	br.ret.spnt.many rp
+	;;
+	cmp.lt p6,p0=16,len		// if len > 16 then long memset
+	mov ar.lc=tmp			// initialize lc for small count
+(p6)	br.cond.dptk .long_do_clear
+	;;				// WAR on ar.lc
+	//
+	// worst case 16 iterations, avg 8 iterations
+	//
+	// We could have played with the predicates to use the extra
+	// M slot for 2 stores/iteration but the cost the initialization
+	// the various counters compared to how long the loop is supposed
+	// to last on average does not make this solution viable.
+	//
+1:
+	EX( .Lexit1, st1 [buf]=r0,1 )
+	adds len=-1,len			// countdown length using len
+	br.cloop.dptk 1b
+	;;				// avoid RAW on ar.lc
+	//
+	// .Lexit4: comes from byte by byte loop
+	//	    len contains bytes left
+.Lexit1:
+	mov ret0=len			// faster than using ar.lc
+	mov ar.lc=saved_lc
+	br.ret.sptk.many rp		// end of short clear_user
+
+
+	//
+	// At this point we know we have more than 16 bytes to copy
+	// so we focus on alignment (no branches required)
+	//
+	// The use of len/len2 for countdown of the number of bytes left
+	// instead of ret0 is due to the fact that the exception code
+	// changes the values of r8.
+	//
+.long_do_clear:
+	tbit.nz p6,p0=buf,0		// odd alignment (for long_do_clear)
+	;;
+	EX( .Lexit3, (p6) st1 [buf]=r0,1 )	// 1-byte aligned
+(p6)	adds len=-1,len;;		// sync because buf is modified
+	tbit.nz p6,p0=buf,1
+	;;
+	EX( .Lexit3, (p6) st2 [buf]=r0,2 )	// 2-byte aligned
+(p6)	adds len=-2,len;;
+	tbit.nz p6,p0=buf,2
+	;;
+	EX( .Lexit3, (p6) st4 [buf]=r0,4 )	// 4-byte aligned
+(p6)	adds len=-4,len;;
+	tbit.nz p6,p0=buf,3
+	;;
+	EX( .Lexit3, (p6) st8 [buf]=r0,8 )	// 8-byte aligned
+(p6)	adds len=-8,len;;
+	shr.u cnt=len,4		// number of 128-bit (2x64bit) words
+	;;
+	cmp.eq p6,p0=r0,cnt
+	adds tmp=-1,cnt
+(p6)	br.cond.dpnt .dotail		// we have less than 16 bytes left
+	;;
+	adds buf2=8,buf			// setup second base pointer
+	mov ar.lc=tmp
+	;;
+
+	//
+	// 16bytes/iteration core loop
+	//
+	// The second store can never generate a fault because
+	// we come into the loop only when we are 16-byte aligned.
+	// This means that if we cross a page then it will always be
+	// in the first store and never in the second.
+	//
+	//
+	// We need to keep track of the remaining length. A possible (optimistic)
+	// way would be to use ar.lc and derive how many byte were left by
+	// doing : left= 16*ar.lc + 16.  this would avoid the addition at
+	// every iteration.
+	// However we need to keep the synchronization point. A template
+	// M;;MB does not exist and thus we can keep the addition at no
+	// extra cycle cost (use a nop slot anyway). It also simplifies the
+	// (unlikely)  error recovery code
+	//
+
+2:	EX(.Lexit3, st8 [buf]=r0,16 )
+	;;				// needed to get len correct when error
+	st8 [buf2]=r0,16
+	adds len=-16,len
+	br.cloop.dptk 2b
+	;;
+	mov ar.lc=saved_lc
+	//
+	// tail correction based on len only
+	//
+	// We alternate the use of len3,len2 to allow parallelism and correct
+	// error handling. We also reuse p6/p7 to return correct value.
+	// The addition of len2/len3 does not cost anything more compared to
+	// the regular memset as we had empty slots.
+	//
+.dotail:
+	mov len2=len			// for parallelization of error handling
+	mov len3=len
+	tbit.nz p6,p0=len,3
+	;;
+	EX( .Lexit2, (p6) st8 [buf]=r0,8 )	// at least 8 bytes
+(p6)	adds len3=-8,len2
+	tbit.nz p7,p6=len,2
+	;;
+	EX( .Lexit2, (p7) st4 [buf]=r0,4 )	// at least 4 bytes
+(p7)	adds len2=-4,len3
+	tbit.nz p6,p7=len,1
+	;;
+	EX( .Lexit2, (p6) st2 [buf]=r0,2 )	// at least 2 bytes
+(p6)	adds len3=-2,len2
+	tbit.nz p7,p6=len,0
+	;;
+	EX( .Lexit2, (p7) st1 [buf]=r0 )	// only 1 byte left
+	mov ret0=r0				// success
+	br.ret.sptk.many rp			// end of most likely path
+
+	//
+	// Outlined error handling code
+	//
+
+	//
+	// .Lexit3: comes from core loop, need restore pr/lc
+	//	    len contains bytes left
+	//
+	//
+	// .Lexit2:
+	//	if p6 -> coming from st8 or st2 : len2 contains what's left
+	//	if p7 -> coming from st4 or st1 : len3 contains what's left
+	// We must restore lc/pr even though might not have been used.
+.Lexit2:
+	.pred.rel "mutex", p6, p7
+(p6)	mov len=len2
+(p7)	mov len=len3
+	;;
+	//
+	// .Lexit4: comes from head, need not restore pr/lc
+	//	    len contains bytes left
+	//
+.Lexit3:
+	mov ret0=len
+	mov ar.lc=saved_lc
+	br.ret.sptk.many rp
+END(__do_clear_user)
diff --git a/arch/ia64/lib/copy_page.S b/arch/ia64/lib/copy_page.S
new file mode 100644
index 00000000..127d1d05
--- /dev/null
+++ b/arch/ia64/lib/copy_page.S
@@ -0,0 +1,98 @@
+/*
+ *
+ * Optimized version of the standard copy_page() function
+ *
+ * Inputs:
+ *	in0:	address of target page
+ *	in1:	address of source page
+ * Output:
+ *	no return value
+ *
+ * Copyright (C) 1999, 2001 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *	David Mosberger <davidm@hpl.hp.com>
+ *
+ * 4/06/01 davidm	Tuned to make it perform well both for cached and uncached copies.
+ */
+#include <asm/asmmacro.h>
+#include <asm/page.h>
+
+#define PIPE_DEPTH	3
+#define EPI		p[PIPE_DEPTH-1]
+
+#define lcount		r16
+#define saved_pr	r17
+#define saved_lc	r18
+#define saved_pfs	r19
+#define src1		r20
+#define src2		r21
+#define tgt1		r22
+#define tgt2		r23
+#define srcf		r24
+#define tgtf		r25
+#define tgt_last	r26
+
+#define Nrot		((8*PIPE_DEPTH+7)&~7)
+
+GLOBAL_ENTRY(copy_page)
+	.prologue
+	.save ar.pfs, saved_pfs
+	alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot
+
+	.rotr t1[PIPE_DEPTH], t2[PIPE_DEPTH], t3[PIPE_DEPTH], t4[PIPE_DEPTH], \
+	      t5[PIPE_DEPTH], t6[PIPE_DEPTH], t7[PIPE_DEPTH], t8[PIPE_DEPTH]
+	.rotp p[PIPE_DEPTH]
+
+	.save ar.lc, saved_lc
+	mov saved_lc=ar.lc
+	mov ar.ec=PIPE_DEPTH
+
+	mov lcount=PAGE_SIZE/64-1
+	.save pr, saved_pr
+	mov saved_pr=pr
+	mov pr.rot=1<<16
+
+	.body
+
+	mov src1=in1
+	adds src2=8,in1
+	mov tgt_last = PAGE_SIZE
+	;;
+	adds tgt2=8,in0
+	add srcf=512,in1
+	mov ar.lc=lcount
+	mov tgt1=in0
+	add tgtf=512,in0
+	add tgt_last = tgt_last, in0
+	;;
+1:
+(p[0])	ld8 t1[0]=[src1],16
+(EPI)	st8 [tgt1]=t1[PIPE_DEPTH-1],16
+(p[0])	ld8 t2[0]=[src2],16
+(EPI)	st8 [tgt2]=t2[PIPE_DEPTH-1],16
+	cmp.ltu p6,p0 = tgtf, tgt_last
+	;;
+(p[0])	ld8 t3[0]=[src1],16
+(EPI)	st8 [tgt1]=t3[PIPE_DEPTH-1],16
+(p[0])	ld8 t4[0]=[src2],16
+(EPI)	st8 [tgt2]=t4[PIPE_DEPTH-1],16
+	;;
+(p[0])	ld8 t5[0]=[src1],16
+(EPI)	st8 [tgt1]=t5[PIPE_DEPTH-1],16
+(p[0])	ld8 t6[0]=[src2],16
+(EPI)	st8 [tgt2]=t6[PIPE_DEPTH-1],16
+	;;
+(p[0])	ld8 t7[0]=[src1],16
+(EPI)	st8 [tgt1]=t7[PIPE_DEPTH-1],16
+(p[0])	ld8 t8[0]=[src2],16
+(EPI)	st8 [tgt2]=t8[PIPE_DEPTH-1],16
+
+(p6)	lfetch [srcf], 64
+(p6)	lfetch [tgtf], 64
+	br.ctop.sptk.few 1b
+	;;
+	mov pr=saved_pr,0xffffffffffff0000	// restore predicates
+	mov ar.pfs=saved_pfs
+	mov ar.lc=saved_lc
+	br.ret.sptk.many rp
+END(copy_page)
diff --git a/arch/ia64/lib/copy_page_mck.S b/arch/ia64/lib/copy_page_mck.S
new file mode 100644
index 00000000..3c45d60a
--- /dev/null
+++ b/arch/ia64/lib/copy_page_mck.S
@@ -0,0 +1,185 @@
+/*
+ * McKinley-optimized version of copy_page().
+ *
+ * Copyright (C) 2002 Hewlett-Packard Co
+ *	David Mosberger <davidm@hpl.hp.com>
+ *
+ * Inputs:
+ *	in0:	address of target page
+ *	in1:	address of source page
+ * Output:
+ *	no return value
+ *
+ * General idea:
+ *	- use regular loads and stores to prefetch data to avoid consuming M-slot just for
+ *	  lfetches => good for in-cache performance
+ *	- avoid l2 bank-conflicts by not storing into the same 16-byte bank within a single
+ *	  cycle
+ *
+ * Principle of operation:
+ *	First, note that L1 has a line-size of 64 bytes and L2 a line-size of 128 bytes.
+ *	To avoid secondary misses in L2, we prefetch both source and destination with a line-size
+ *	of 128 bytes.  When both of these lines are in the L2 and the first half of the
+ *	source line is in L1, we start copying the remaining words.  The second half of the
+ *	source line is prefetched in an earlier iteration, so that by the time we start
+ *	accessing it, it's also present in the L1.
+ *
+ *	We use a software-pipelined loop to control the overall operation.  The pipeline
+ *	has 2*PREFETCH_DIST+K stages.  The first PREFETCH_DIST stages are used for prefetching
+ *	source cache-lines.  The second PREFETCH_DIST stages are used for prefetching destination
+ *	cache-lines, the last K stages are used to copy the cache-line words not copied by
+ *	the prefetches.  The four relevant points in the pipelined are called A, B, C, D:
+ *	p[A] is TRUE if a source-line should be prefetched, p[B] is TRUE if a destination-line
+ *	should be prefetched, p[C] is TRUE if the second half of an L2 line should be brought
+ *	into L1D and p[D] is TRUE if a cacheline needs to be copied.
+ *
+ *	This all sounds very complicated, but thanks to the modulo-scheduled loop support,
+ *	the resulting code is very regular and quite easy to follow (once you get the idea).
+ *
+ *	As a secondary optimization, the first 2*PREFETCH_DIST iterations are implemented
+ *	as the separate .prefetch_loop.  Logically, this loop performs exactly like the
+ *	main-loop (.line_copy), but has all known-to-be-predicated-off instructions removed,
+ *	so that each loop iteration is faster (again, good for cached case).
+ *
+ *	When reading the code, it helps to keep the following picture in mind:
+ *
+ *	       word 0 word 1
+ *            +------+------+---
+ *	      |	v[x] | 	t1  | ^
+ *	      |	t2   |	t3  | |
+ *	      |	t4   |	t5  | |
+ *	      |	t6   |	t7  | | 128 bytes
+ *     	      |	n[y] | 	t9  | |	(L2 cache line)
+ *	      |	t10  | 	t11 | |
+ *	      |	t12  | 	t13 | |
+ *	      |	t14  | 	t15 | v
+ *	      +------+------+---
+ *
+ *	Here, v[x] is copied by the (memory) prefetch.  n[y] is loaded at p[C]
+ *	to fetch the second-half of the L2 cache line into L1, and the tX words are copied in
+ *	an order that avoids bank conflicts.
+ */
+#include <asm/asmmacro.h>
+#include <asm/page.h>
+
+#define PREFETCH_DIST	8		// McKinley sustains 16 outstanding L2 misses (8 ld, 8 st)
+
+#define src0		r2
+#define src1		r3
+#define dst0		r9
+#define dst1		r10
+#define src_pre_mem	r11
+#define dst_pre_mem	r14
+#define src_pre_l2	r15
+#define dst_pre_l2	r16
+#define t1		r17
+#define t2		r18
+#define t3		r19
+#define t4		r20
+#define t5		t1	// alias!
+#define t6		t2	// alias!
+#define t7		t3	// alias!
+#define t9		t5	// alias!
+#define t10		t4	// alias!
+#define t11		t7	// alias!
+#define t12		t6	// alias!
+#define t14		t10	// alias!
+#define t13		r21
+#define t15		r22
+
+#define saved_lc	r23
+#define saved_pr	r24
+
+#define	A	0
+#define B	(PREFETCH_DIST)
+#define C	(B + PREFETCH_DIST)
+#define D	(C + 3)
+#define N	(D + 1)
+#define Nrot	((N + 7) & ~7)
+
+GLOBAL_ENTRY(copy_page)
+	.prologue
+	alloc r8 = ar.pfs, 2, Nrot-2, 0, Nrot
+
+	.rotr v[2*PREFETCH_DIST], n[D-C+1]
+	.rotp p[N]
+
+	.save ar.lc, saved_lc
+	mov saved_lc = ar.lc
+	.save pr, saved_pr
+	mov saved_pr = pr
+	.body
+
+	mov src_pre_mem = in1
+	mov pr.rot = 0x10000
+	mov ar.ec = 1				// special unrolled loop
+
+	mov dst_pre_mem = in0
+	mov ar.lc = 2*PREFETCH_DIST - 1
+
+	add src_pre_l2 = 8*8, in1
+	add dst_pre_l2 = 8*8, in0
+	add src0 = 8, in1			// first t1 src
+	add src1 = 3*8, in1			// first t3 src
+	add dst0 = 8, in0			// first t1 dst
+	add dst1 = 3*8, in0			// first t3 dst
+	mov t1 = (PAGE_SIZE/128) - (2*PREFETCH_DIST) - 1
+	nop.m 0
+	nop.i 0
+	;;
+	// same as .line_copy loop, but with all predicated-off instructions removed:
+.prefetch_loop:
+(p[A])	ld8 v[A] = [src_pre_mem], 128		// M0
+(p[B])	st8 [dst_pre_mem] = v[B], 128		// M2
+	br.ctop.sptk .prefetch_loop
+	;;
+	cmp.eq p16, p0 = r0, r0			// reset p16 to 1 (br.ctop cleared it to zero)
+	mov ar.lc = t1				// with 64KB pages, t1 is too big to fit in 8 bits!
+	mov ar.ec = N				// # of stages in pipeline
+	;;
+.line_copy:
+(p[D])	ld8 t2 = [src0], 3*8			// M0
+(p[D])	ld8 t4 = [src1], 3*8			// M1
+(p[B])	st8 [dst_pre_mem] = v[B], 128		// M2 prefetch dst from memory
+(p[D])	st8 [dst_pre_l2] = n[D-C], 128		// M3 prefetch dst from L2
+	;;
+(p[A])	ld8 v[A] = [src_pre_mem], 128		// M0 prefetch src from memory
+(p[C])	ld8 n[0] = [src_pre_l2], 128		// M1 prefetch src from L2
+(p[D])	st8 [dst0] =  t1, 8			// M2
+(p[D])	st8 [dst1] =  t3, 8			// M3
+	;;
+(p[D])	ld8  t5 = [src0], 8
+(p[D])	ld8  t7 = [src1], 3*8
+(p[D])	st8 [dst0] =  t2, 3*8
+(p[D])	st8 [dst1] =  t4, 3*8
+	;;
+(p[D])	ld8  t6 = [src0], 3*8
+(p[D])	ld8 t10 = [src1], 8
+(p[D])	st8 [dst0] =  t5, 8
+(p[D])	st8 [dst1] =  t7, 3*8
+	;;
+(p[D])	ld8  t9 = [src0], 3*8
+(p[D])	ld8 t11 = [src1], 3*8
+(p[D])	st8 [dst0] =  t6, 3*8
+(p[D])	st8 [dst1] = t10, 8
+	;;
+(p[D])	ld8 t12 = [src0], 8
+(p[D])	ld8 t14 = [src1], 8
+(p[D])	st8 [dst0] =  t9, 3*8
+(p[D])	st8 [dst1] = t11, 3*8
+	;;
+(p[D])	ld8 t13 = [src0], 4*8
+(p[D])	ld8 t15 = [src1], 4*8
+(p[D])	st8 [dst0] = t12, 8
+(p[D])	st8 [dst1] = t14, 8
+	;;
+(p[D-1])ld8  t1 = [src0], 8
+(p[D-1])ld8  t3 = [src1], 8
+(p[D])	st8 [dst0] = t13, 4*8
+(p[D])	st8 [dst1] = t15, 4*8
+	br.ctop.sptk .line_copy
+	;;
+	mov ar.lc = saved_lc
+	mov pr = saved_pr, -1
+	br.ret.sptk.many rp
+END(copy_page)
diff --git a/arch/ia64/lib/copy_user.S b/arch/ia64/lib/copy_user.S
new file mode 100644
index 00000000..c952bdc6
--- /dev/null
+++ b/arch/ia64/lib/copy_user.S
@@ -0,0 +1,610 @@
+/*
+ *
+ * Optimized version of the copy_user() routine.
+ * It is used to copy date across the kernel/user boundary.
+ *
+ * The source and destination are always on opposite side of
+ * the boundary. When reading from user space we must catch
+ * faults on loads. When writing to user space we must catch
+ * errors on stores. Note that because of the nature of the copy
+ * we don't need to worry about overlapping regions.
+ *
+ *
+ * Inputs:
+ *	in0	address of source buffer
+ *	in1	address of destination buffer
+ *	in2	number of bytes to copy
+ *
+ * Outputs:
+ *	ret0	0 in case of success. The number of bytes NOT copied in
+ *		case of error.
+ *
+ * Copyright (C) 2000-2001 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * Fixme:
+ *	- handle the case where we have more than 16 bytes and the alignment
+ *	  are different.
+ *	- more benchmarking
+ *	- fix extraneous stop bit introduced by the EX() macro.
+ */
+
+#include <asm/asmmacro.h>
+
+//
+// Tuneable parameters
+//
+#define COPY_BREAK	16	// we do byte copy below (must be >=16)
+#define PIPE_DEPTH	21	// pipe depth
+
+#define EPI		p[PIPE_DEPTH-1]
+
+//
+// arguments
+//
+#define dst		in0
+#define src		in1
+#define len		in2
+
+//
+// local registers
+//
+#define t1		r2	// rshift in bytes
+#define t2		r3	// lshift in bytes
+#define rshift		r14	// right shift in bits
+#define lshift		r15	// left shift in bits
+#define word1		r16
+#define word2		r17
+#define cnt		r18
+#define len2		r19
+#define saved_lc	r20
+#define saved_pr	r21
+#define tmp		r22
+#define val		r23
+#define src1		r24
+#define dst1		r25
+#define src2		r26
+#define dst2		r27
+#define len1		r28
+#define enddst		r29
+#define endsrc		r30
+#define saved_pfs	r31
+
+GLOBAL_ENTRY(__copy_user)
+	.prologue
+	.save ar.pfs, saved_pfs
+	alloc saved_pfs=ar.pfs,3,((2*PIPE_DEPTH+7)&~7),0,((2*PIPE_DEPTH+7)&~7)
+
+	.rotr val1[PIPE_DEPTH],val2[PIPE_DEPTH]
+	.rotp p[PIPE_DEPTH]
+
+	adds len2=-1,len	// br.ctop is repeat/until
+	mov ret0=r0
+
+	;;			// RAW of cfm when len=0
+	cmp.eq p8,p0=r0,len	// check for zero length
+	.save ar.lc, saved_lc
+	mov saved_lc=ar.lc	// preserve ar.lc (slow)
+(p8)	br.ret.spnt.many rp	// empty mempcy()
+	;;
+	add enddst=dst,len	// first byte after end of source
+	add endsrc=src,len	// first byte after end of destination
+	.save pr, saved_pr
+	mov saved_pr=pr		// preserve predicates
+
+	.body
+
+	mov dst1=dst		// copy because of rotation
+	mov ar.ec=PIPE_DEPTH
+	mov pr.rot=1<<16	// p16=true all others are false
+
+	mov src1=src		// copy because of rotation
+	mov ar.lc=len2		// initialize lc for small count
+	cmp.lt p10,p7=COPY_BREAK,len	// if len > COPY_BREAK then long copy
+
+	xor tmp=src,dst		// same alignment test prepare
+(p10)	br.cond.dptk .long_copy_user
+	;;			// RAW pr.rot/p16 ?
+	//
+	// Now we do the byte by byte loop with software pipeline
+	//
+	// p7 is necessarily false by now
+1:
+	EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1)
+	EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
+	br.ctop.dptk.few 1b
+	;;
+	mov ar.lc=saved_lc
+	mov pr=saved_pr,0xffffffffffff0000
+	mov ar.pfs=saved_pfs		// restore ar.ec
+	br.ret.sptk.many rp		// end of short memcpy
+
+	//
+	// Not 8-byte aligned
+	//
+.diff_align_copy_user:
+	// At this point we know we have more than 16 bytes to copy
+	// and also that src and dest do _not_ have the same alignment.
+	and src2=0x7,src1				// src offset
+	and dst2=0x7,dst1				// dst offset
+	;;
+	// The basic idea is that we copy byte-by-byte at the head so
+	// that we can reach 8-byte alignment for both src1 and dst1.
+	// Then copy the body using software pipelined 8-byte copy,
+	// shifting the two back-to-back words right and left, then copy
+	// the tail by copying byte-by-byte.
+	//
+	// Fault handling. If the byte-by-byte at the head fails on the
+	// load, then restart and finish the pipleline by copying zeros
+	// to the dst1. Then copy zeros for the rest of dst1.
+	// If 8-byte software pipeline fails on the load, do the same as
+	// failure_in3 does. If the byte-by-byte at the tail fails, it is
+	// handled simply by failure_in_pipe1.
+	//
+	// The case p14 represents the source has more bytes in the
+	// the first word (by the shifted part), whereas the p15 needs to
+	// copy some bytes from the 2nd word of the source that has the
+	// tail of the 1st of the destination.
+	//
+
+	//
+	// Optimization. If dst1 is 8-byte aligned (quite common), we don't need
+	// to copy the head to dst1, to start 8-byte copy software pipeline.
+	// We know src1 is not 8-byte aligned in this case.
+	//
+	cmp.eq p14,p15=r0,dst2
+(p15)	br.cond.spnt 1f
+	;;
+	sub t1=8,src2
+	mov t2=src2
+	;;
+	shl rshift=t2,3
+	sub len1=len,t1					// set len1
+	;;
+	sub lshift=64,rshift
+	;;
+	br.cond.spnt .word_copy_user
+	;;
+1:
+	cmp.leu	p14,p15=src2,dst2
+	sub t1=dst2,src2
+	;;
+	.pred.rel "mutex", p14, p15
+(p14)	sub word1=8,src2				// (8 - src offset)
+(p15)	sub t1=r0,t1					// absolute value
+(p15)	sub word1=8,dst2				// (8 - dst offset)
+	;;
+	// For the case p14, we don't need to copy the shifted part to
+	// the 1st word of destination.
+	sub t2=8,t1
+(p14)	sub word1=word1,t1
+	;;
+	sub len1=len,word1				// resulting len
+(p15)	shl rshift=t1,3					// in bits
+(p14)	shl rshift=t2,3
+	;;
+(p14)	sub len1=len1,t1
+	adds cnt=-1,word1
+	;;
+	sub lshift=64,rshift
+	mov ar.ec=PIPE_DEPTH
+	mov pr.rot=1<<16	// p16=true all others are false
+	mov ar.lc=cnt
+	;;
+2:
+	EX(.failure_in_pipe2,(p16) ld1 val1[0]=[src1],1)
+	EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
+	br.ctop.dptk.few 2b
+	;;
+	clrrrb
+	;;
+.word_copy_user:
+	cmp.gtu p9,p0=16,len1
+(p9)	br.cond.spnt 4f			// if (16 > len1) skip 8-byte copy
+	;;
+	shr.u cnt=len1,3		// number of 64-bit words
+	;;
+	adds cnt=-1,cnt
+	;;
+	.pred.rel "mutex", p14, p15
+(p14)	sub src1=src1,t2
+(p15)	sub src1=src1,t1
+	//
+	// Now both src1 and dst1 point to an 8-byte aligned address. And
+	// we have more than 8 bytes to copy.
+	//
+	mov ar.lc=cnt
+	mov ar.ec=PIPE_DEPTH
+	mov pr.rot=1<<16	// p16=true all others are false
+	;;
+3:
+	//
+	// The pipleline consists of 3 stages:
+	// 1 (p16):	Load a word from src1
+	// 2 (EPI_1):	Shift right pair, saving to tmp
+	// 3 (EPI):	Store tmp to dst1
+	//
+	// To make it simple, use at least 2 (p16) loops to set up val1[n]
+	// because we need 2 back-to-back val1[] to get tmp.
+	// Note that this implies EPI_2 must be p18 or greater.
+	//
+
+#define EPI_1		p[PIPE_DEPTH-2]
+#define SWITCH(pred, shift)	cmp.eq pred,p0=shift,rshift
+#define CASE(pred, shift)	\
+	(pred)	br.cond.spnt .copy_user_bit##shift
+#define BODY(rshift)						\
+.copy_user_bit##rshift:						\
+1:								\
+	EX(.failure_out,(EPI) st8 [dst1]=tmp,8);		\
+(EPI_1) shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift;	\
+	EX(3f,(p16) ld8 val1[1]=[src1],8);			\
+(p16)	mov val1[0]=r0;						\
+	br.ctop.dptk 1b;					\
+	;;							\
+	br.cond.sptk.many .diff_align_do_tail;			\
+2:								\
+(EPI)	st8 [dst1]=tmp,8;					\
+(EPI_1)	shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift;	\
+3:								\
+(p16)	mov val1[1]=r0;						\
+(p16)	mov val1[0]=r0;						\
+	br.ctop.dptk 2b;					\
+	;;							\
+	br.cond.sptk.many .failure_in2
+
+	//
+	// Since the instruction 'shrp' requires a fixed 128-bit value
+	// specifying the bits to shift, we need to provide 7 cases
+	// below.
+	//
+	SWITCH(p6, 8)
+	SWITCH(p7, 16)
+	SWITCH(p8, 24)
+	SWITCH(p9, 32)
+	SWITCH(p10, 40)
+	SWITCH(p11, 48)
+	SWITCH(p12, 56)
+	;;
+	CASE(p6, 8)
+	CASE(p7, 16)
+	CASE(p8, 24)
+	CASE(p9, 32)
+	CASE(p10, 40)
+	CASE(p11, 48)
+	CASE(p12, 56)
+	;;
+	BODY(8)
+	BODY(16)
+	BODY(24)
+	BODY(32)
+	BODY(40)
+	BODY(48)
+	BODY(56)
+	;;
+.diff_align_do_tail:
+	.pred.rel "mutex", p14, p15
+(p14)	sub src1=src1,t1
+(p14)	adds dst1=-8,dst1
+(p15)	sub dst1=dst1,t1
+	;;
+4:
+	// Tail correction.
+	//
+	// The problem with this piplelined loop is that the last word is not
+	// loaded and thus parf of the last word written is not correct.
+	// To fix that, we simply copy the tail byte by byte.
+
+	sub len1=endsrc,src1,1
+	clrrrb
+	;;
+	mov ar.ec=PIPE_DEPTH
+	mov pr.rot=1<<16	// p16=true all others are false
+	mov ar.lc=len1
+	;;
+5:
+	EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1)
+	EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
+	br.ctop.dptk.few 5b
+	;;
+	mov ar.lc=saved_lc
+	mov pr=saved_pr,0xffffffffffff0000
+	mov ar.pfs=saved_pfs
+	br.ret.sptk.many rp
+
+	//
+	// Beginning of long mempcy (i.e. > 16 bytes)
+	//
+.long_copy_user:
+	tbit.nz p6,p7=src1,0	// odd alignment
+	and tmp=7,tmp
+	;;
+	cmp.eq p10,p8=r0,tmp
+	mov len1=len		// copy because of rotation
+(p8)	br.cond.dpnt .diff_align_copy_user
+	;;
+	// At this point we know we have more than 16 bytes to copy
+	// and also that both src and dest have the same alignment
+	// which may not be the one we want. So for now we must move
+	// forward slowly until we reach 16byte alignment: no need to
+	// worry about reaching the end of buffer.
+	//
+	EX(.failure_in1,(p6) ld1 val1[0]=[src1],1)	// 1-byte aligned
+(p6)	adds len1=-1,len1;;
+	tbit.nz p7,p0=src1,1
+	;;
+	EX(.failure_in1,(p7) ld2 val1[1]=[src1],2)	// 2-byte aligned
+(p7)	adds len1=-2,len1;;
+	tbit.nz p8,p0=src1,2
+	;;
+	//
+	// Stop bit not required after ld4 because if we fail on ld4
+	// we have never executed the ld1, therefore st1 is not executed.
+	//
+	EX(.failure_in1,(p8) ld4 val2[0]=[src1],4)	// 4-byte aligned
+	;;
+	EX(.failure_out,(p6) st1 [dst1]=val1[0],1)
+	tbit.nz p9,p0=src1,3
+	;;
+	//
+	// Stop bit not required after ld8 because if we fail on ld8
+	// we have never executed the ld2, therefore st2 is not executed.
+	//
+	EX(.failure_in1,(p9) ld8 val2[1]=[src1],8)	// 8-byte aligned
+	EX(.failure_out,(p7) st2 [dst1]=val1[1],2)
+(p8)	adds len1=-4,len1
+	;;
+	EX(.failure_out, (p8) st4 [dst1]=val2[0],4)
+(p9)	adds len1=-8,len1;;
+	shr.u cnt=len1,4		// number of 128-bit (2x64bit) words
+	;;
+	EX(.failure_out, (p9) st8 [dst1]=val2[1],8)
+	tbit.nz p6,p0=len1,3
+	cmp.eq p7,p0=r0,cnt
+	adds tmp=-1,cnt			// br.ctop is repeat/until
+(p7)	br.cond.dpnt .dotail		// we have less than 16 bytes left
+	;;
+	adds src2=8,src1
+	adds dst2=8,dst1
+	mov ar.lc=tmp
+	;;
+	//
+	// 16bytes/iteration
+	//
+2:
+	EX(.failure_in3,(p16) ld8 val1[0]=[src1],16)
+(p16)	ld8 val2[0]=[src2],16
+
+	EX(.failure_out, (EPI)	st8 [dst1]=val1[PIPE_DEPTH-1],16)
+(EPI)	st8 [dst2]=val2[PIPE_DEPTH-1],16
+	br.ctop.dptk 2b
+	;;			// RAW on src1 when fall through from loop
+	//
+	// Tail correction based on len only
+	//
+	// No matter where we come from (loop or test) the src1 pointer
+	// is 16 byte aligned AND we have less than 16 bytes to copy.
+	//
+.dotail:
+	EX(.failure_in1,(p6) ld8 val1[0]=[src1],8)	// at least 8 bytes
+	tbit.nz p7,p0=len1,2
+	;;
+	EX(.failure_in1,(p7) ld4 val1[1]=[src1],4)	// at least 4 bytes
+	tbit.nz p8,p0=len1,1
+	;;
+	EX(.failure_in1,(p8) ld2 val2[0]=[src1],2)	// at least 2 bytes
+	tbit.nz p9,p0=len1,0
+	;;
+	EX(.failure_out, (p6) st8 [dst1]=val1[0],8)
+	;;
+	EX(.failure_in1,(p9) ld1 val2[1]=[src1])	// only 1 byte left
+	mov ar.lc=saved_lc
+	;;
+	EX(.failure_out,(p7) st4 [dst1]=val1[1],4)
+	mov pr=saved_pr,0xffffffffffff0000
+	;;
+	EX(.failure_out, (p8)	st2 [dst1]=val2[0],2)
+	mov ar.pfs=saved_pfs
+	;;
+	EX(.failure_out, (p9)	st1 [dst1]=val2[1])
+	br.ret.sptk.many rp
+
+
+	//
+	// Here we handle the case where the byte by byte copy fails
+	// on the load.
+	// Several factors make the zeroing of the rest of the buffer kind of
+	// tricky:
+	//	- the pipeline: loads/stores are not in sync (pipeline)
+	//
+	//	  In the same loop iteration, the dst1 pointer does not directly
+	//	  reflect where the faulty load was.
+	//
+	//	- pipeline effect
+	//	  When you get a fault on load, you may have valid data from
+	//	  previous loads not yet store in transit. Such data must be
+	//	  store normally before moving onto zeroing the rest.
+	//
+	//	- single/multi dispersal independence.
+	//
+	// solution:
+	//	- we don't disrupt the pipeline, i.e. data in transit in
+	//	  the software pipeline will be eventually move to memory.
+	//	  We simply replace the load with a simple mov and keep the
+	//	  pipeline going. We can't really do this inline because
+	//	  p16 is always reset to 1 when lc > 0.
+	//
+.failure_in_pipe1:
+	sub ret0=endsrc,src1	// number of bytes to zero, i.e. not copied
+1:
+(p16)	mov val1[0]=r0
+(EPI)	st1 [dst1]=val1[PIPE_DEPTH-1],1
+	br.ctop.dptk 1b
+	;;
+	mov pr=saved_pr,0xffffffffffff0000
+	mov ar.lc=saved_lc
+	mov ar.pfs=saved_pfs
+	br.ret.sptk.many rp
+
+	//
+	// This is the case where the byte by byte copy fails on the load
+	// when we copy the head. We need to finish the pipeline and copy
+	// zeros for the rest of the destination. Since this happens
+	// at the top we still need to fill the body and tail.
+.failure_in_pipe2:
+	sub ret0=endsrc,src1	// number of bytes to zero, i.e. not copied
+2:
+(p16)	mov val1[0]=r0
+(EPI)	st1 [dst1]=val1[PIPE_DEPTH-1],1
+	br.ctop.dptk 2b
+	;;
+	sub len=enddst,dst1,1		// precompute len
+	br.cond.dptk.many .failure_in1bis
+	;;
+
+	//
+	// Here we handle the head & tail part when we check for alignment.
+	// The following code handles only the load failures. The
+	// main diffculty comes from the fact that loads/stores are
+	// scheduled. So when you fail on a load, the stores corresponding
+	// to previous successful loads must be executed.
+	//
+	// However some simplifications are possible given the way
+	// things work.
+	//
+	// 1) HEAD
+	// Theory of operation:
+	//
+	//  Page A   | Page B
+	//  ---------|-----
+	//          1|8 x
+	//	  1 2|8 x
+	//	    4|8 x
+	//	  1 4|8 x
+	//        2 4|8 x
+	//      1 2 4|8 x
+	//	     |1
+	//	     |2 x
+	//	     |4 x
+	//
+	// page_size >= 4k (2^12).  (x means 4, 2, 1)
+	// Here we suppose Page A exists and Page B does not.
+	//
+	// As we move towards eight byte alignment we may encounter faults.
+	// The numbers on each page show the size of the load (current alignment).
+	//
+	// Key point:
+	//	- if you fail on 1, 2, 4 then you have never executed any smaller
+	//	  size loads, e.g. failing ld4 means no ld1 nor ld2 executed
+	//	  before.
+	//
+	// This allows us to simplify the cleanup code, because basically you
+	// only have to worry about "pending" stores in the case of a failing
+	// ld8(). Given the way the code is written today, this means only
+	// worry about st2, st4. There we can use the information encapsulated
+	// into the predicates.
+	//
+	// Other key point:
+	//	- if you fail on the ld8 in the head, it means you went straight
+	//	  to it, i.e. 8byte alignment within an unexisting page.
+	// Again this comes from the fact that if you crossed just for the ld8 then
+	// you are 8byte aligned but also 16byte align, therefore you would
+	// either go for the 16byte copy loop OR the ld8 in the tail part.
+	// The combination ld1, ld2, ld4, ld8 where you fail on ld8 is impossible
+	// because it would mean you had 15bytes to copy in which case you
+	// would have defaulted to the byte by byte copy.
+	//
+	//
+	// 2) TAIL
+	// Here we now we have less than 16 bytes AND we are either 8 or 16 byte
+	// aligned.
+	//
+	// Key point:
+	// This means that we either:
+	//		- are right on a page boundary
+	//	OR
+	//		- are at more than 16 bytes from a page boundary with
+	//		  at most 15 bytes to copy: no chance of crossing.
+	//
+	// This allows us to assume that if we fail on a load we haven't possibly
+	// executed any of the previous (tail) ones, so we don't need to do
+	// any stores. For instance, if we fail on ld2, this means we had
+	// 2 or 3 bytes left to copy and we did not execute the ld8 nor ld4.
+	//
+	// This means that we are in a situation similar the a fault in the
+	// head part. That's nice!
+	//
+.failure_in1:
+	sub ret0=endsrc,src1	// number of bytes to zero, i.e. not copied
+	sub len=endsrc,src1,1
+	//
+	// we know that ret0 can never be zero at this point
+	// because we failed why trying to do a load, i.e. there is still
+	// some work to do.
+	// The failure_in1bis and length problem is taken care of at the
+	// calling side.
+	//
+	;;
+.failure_in1bis:		// from (.failure_in3)
+	mov ar.lc=len		// Continue with a stupid byte store.
+	;;
+5:
+	st1 [dst1]=r0,1
+	br.cloop.dptk 5b
+	;;
+	mov pr=saved_pr,0xffffffffffff0000
+	mov ar.lc=saved_lc
+	mov ar.pfs=saved_pfs
+	br.ret.sptk.many rp
+
+	//
+	// Here we simply restart the loop but instead
+	// of doing loads we fill the pipeline with zeroes
+	// We can't simply store r0 because we may have valid
+	// data in transit in the pipeline.
+	// ar.lc and ar.ec are setup correctly at this point
+	//
+	// we MUST use src1/endsrc here and not dst1/enddst because
+	// of the pipeline effect.
+	//
+.failure_in3:
+	sub ret0=endsrc,src1	// number of bytes to zero, i.e. not copied
+	;;
+2:
+(p16)	mov val1[0]=r0
+(p16)	mov val2[0]=r0
+(EPI)	st8 [dst1]=val1[PIPE_DEPTH-1],16
+(EPI)	st8 [dst2]=val2[PIPE_DEPTH-1],16
+	br.ctop.dptk 2b
+	;;
+	cmp.ne p6,p0=dst1,enddst	// Do we need to finish the tail ?
+	sub len=enddst,dst1,1		// precompute len
+(p6)	br.cond.dptk .failure_in1bis
+	;;
+	mov pr=saved_pr,0xffffffffffff0000
+	mov ar.lc=saved_lc
+	mov ar.pfs=saved_pfs
+	br.ret.sptk.many rp
+
+.failure_in2:
+	sub ret0=endsrc,src1
+	cmp.ne p6,p0=dst1,enddst	// Do we need to finish the tail ?
+	sub len=enddst,dst1,1		// precompute len
+(p6)	br.cond.dptk .failure_in1bis
+	;;
+	mov pr=saved_pr,0xffffffffffff0000
+	mov ar.lc=saved_lc
+	mov ar.pfs=saved_pfs
+	br.ret.sptk.many rp
+
+	//
+	// handling of failures on stores: that's the easy part
+	//
+.failure_out:
+	sub ret0=enddst,dst1
+	mov pr=saved_pr,0xffffffffffff0000
+	mov ar.lc=saved_lc
+
+	mov ar.pfs=saved_pfs
+	br.ret.sptk.many rp
+END(__copy_user)
diff --git a/arch/ia64/lib/csum_partial_copy.c b/arch/ia64/lib/csum_partial_copy.c
new file mode 100644
index 00000000..118daf5a
--- /dev/null
+++ b/arch/ia64/lib/csum_partial_copy.c
@@ -0,0 +1,140 @@
+/*
+ * Network Checksum & Copy routine
+ *
+ * Copyright (C) 1999, 2003-2004 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * Most of the code has been imported from Linux/Alpha
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+
+#include <asm/uaccess.h>
+
+/*
+ * XXX Fixme: those 2 inlines are meant for debugging and will go away
+ */
+static inline unsigned
+short from64to16(unsigned long x)
+{
+	/* add up 32-bit words for 33 bits */
+	x = (x & 0xffffffff) + (x >> 32);
+	/* add up 16-bit and 17-bit words for 17+c bits */
+	x = (x & 0xffff) + (x >> 16);
+	/* add up 16-bit and 2-bit for 16+c bit */
+	x = (x & 0xffff) + (x >> 16);
+	/* add up carry.. */
+	x = (x & 0xffff) + (x >> 16);
+	return x;
+}
+
+static inline
+unsigned long do_csum_c(const unsigned char * buff, int len, unsigned int psum)
+{
+	int odd, count;
+	unsigned long result = (unsigned long)psum;
+
+	if (len <= 0)
+		goto out;
+	odd = 1 & (unsigned long) buff;
+	if (odd) {
+		result = *buff << 8;
+		len--;
+		buff++;
+	}
+	count = len >> 1;		/* nr of 16-bit words.. */
+	if (count) {
+		if (2 & (unsigned long) buff) {
+			result += *(unsigned short *) buff;
+			count--;
+			len -= 2;
+			buff += 2;
+		}
+		count >>= 1;		/* nr of 32-bit words.. */
+		if (count) {
+			if (4 & (unsigned long) buff) {
+				result += *(unsigned int *) buff;
+				count--;
+				len -= 4;
+				buff += 4;
+			}
+			count >>= 1;	/* nr of 64-bit words.. */
+			if (count) {
+				unsigned long carry = 0;
+				do {
+					unsigned long w = *(unsigned long *) buff;
+					count--;
+					buff += 8;
+					result += carry;
+					result += w;
+					carry = (w > result);
+				} while (count);
+				result += carry;
+				result = (result & 0xffffffff) + (result >> 32);
+			}
+			if (len & 4) {
+				result += *(unsigned int *) buff;
+				buff += 4;
+			}
+		}
+		if (len & 2) {
+			result += *(unsigned short *) buff;
+			buff += 2;
+		}
+	}
+	if (len & 1)
+		result += *buff;
+
+	result = from64to16(result);
+
+	if (odd)
+		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+
+out:
+	return result;
+}
+
+/*
+ * XXX Fixme
+ *
+ * This is very ugly but temporary. THIS NEEDS SERIOUS ENHANCEMENTS.
+ * But it's very tricky to get right even in C.
+ */
+extern unsigned long do_csum(const unsigned char *, long);
+
+__wsum
+csum_partial_copy_from_user(const void __user *src, void *dst,
+				int len, __wsum psum, int *errp)
+{
+	unsigned long result;
+
+	/* XXX Fixme
+	 * for now we separate the copy from checksum for obvious
+	 * alignment difficulties. Look at the Alpha code and you'll be
+	 * scared.
+	 */
+
+	if (__copy_from_user(dst, src, len) != 0 && errp)
+		*errp = -EFAULT;
+
+	result = do_csum(dst, len);
+
+	/* add in old sum, and carry.. */
+	result += (__force u32)psum;
+	/* 32+c bits -> 32 bits */
+	result = (result & 0xffffffff) + (result >> 32);
+	return (__force __wsum)result;
+}
+
+EXPORT_SYMBOL(csum_partial_copy_from_user);
+
+__wsum
+csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
+{
+	return csum_partial_copy_from_user((__force const void __user *)src,
+					   dst, len, sum, NULL);
+}
+
+EXPORT_SYMBOL(csum_partial_copy_nocheck);
diff --git a/arch/ia64/lib/do_csum.S b/arch/ia64/lib/do_csum.S
new file mode 100644
index 00000000..1a431a5c
--- /dev/null
+++ b/arch/ia64/lib/do_csum.S
@@ -0,0 +1,323 @@
+/*
+ *
+ * Optmized version of the standard do_csum() function
+ *
+ * Return: a 64bit quantity containing the 16bit Internet checksum
+ *
+ * Inputs:
+ *	in0: address of buffer to checksum (char *)
+ *	in1: length of the buffer (int)
+ *
+ * Copyright (C) 1999, 2001-2002 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 02/04/22	Ken Chen <kenneth.w.chen@intel.com>
+ *		Data locality study on the checksum buffer.
+ *		More optimization cleanup - remove excessive stop bits.
+ * 02/04/08	David Mosberger <davidm@hpl.hp.com>
+ *		More cleanup and tuning.
+ * 01/04/18	Jun Nakajima <jun.nakajima@intel.com>
+ *		Clean up and optimize and the software pipeline, loading two
+ *		back-to-back 8-byte words per loop. Clean up the initialization
+ *		for the loop. Support the cases where load latency = 1 or 2.
+ *		Set CONFIG_IA64_LOAD_LATENCY to 1 or 2 (default).
+ */
+
+#include <asm/asmmacro.h>
+
+//
+// Theory of operations:
+//	The goal is to go as quickly as possible to the point where
+//	we can checksum 16 bytes/loop. Before reaching that point we must
+//	take care of incorrect alignment of first byte.
+//
+//	The code hereafter also takes care of the "tail" part of the buffer
+//	before entering the core loop, if any. The checksum is a sum so it
+//	allows us to commute operations. So we do the "head" and "tail"
+//	first to finish at full speed in the body. Once we get the head and
+//	tail values, we feed them into the pipeline, very handy initialization.
+//
+//	Of course we deal with the special case where the whole buffer fits
+//	into one 8 byte word. In this case we have only one entry in the pipeline.
+//
+//	We use a (LOAD_LATENCY+2)-stage pipeline in the loop to account for
+//	possible load latency and also to accommodate for head and tail.
+//
+//	The end of the function deals with folding the checksum from 64bits
+//	down to 16bits taking care of the carry.
+//
+//	This version avoids synchronization in the core loop by also using a
+//	pipeline for the accumulation of the checksum in resultx[] (x=1,2).
+//
+//	 wordx[] (x=1,2)
+//	|---|
+//      |   | 0			: new value loaded in pipeline
+//	|---|
+//      |   | -			: in transit data
+//	|---|
+//      |   | LOAD_LATENCY	: current value to add to checksum
+//	|---|
+//      |   | LOAD_LATENCY+1	: previous value added to checksum
+//      |---|			(previous iteration)
+//
+//	resultx[] (x=1,2)
+//	|---|
+//      |   | 0			: initial value
+//	|---|
+//      |   | LOAD_LATENCY-1	: new checksum
+//	|---|
+//      |   | LOAD_LATENCY	: previous value of checksum
+//	|---|
+//      |   | LOAD_LATENCY+1	: final checksum when out of the loop
+//      |---|
+//
+//
+//	See RFC1071 "Computing the Internet Checksum" for various techniques for
+//	calculating the Internet checksum.
+//
+// NOT YET DONE:
+//	- Maybe another algorithm which would take care of the folding at the
+//	  end in a different manner
+//	- Work with people more knowledgeable than me on the network stack
+//	  to figure out if we could not split the function depending on the
+//	  type of packet or alignment we get. Like the ip_fast_csum() routine
+//	  where we know we have at least 20bytes worth of data to checksum.
+//	- Do a better job of handling small packets.
+//	- Note on prefetching: it was found that under various load, i.e. ftp read/write,
+//	  nfs read/write, the L1 cache hit rate is at 60% and L2 cache hit rate is at 99.8%
+//	  on the data that buffer points to (partly because the checksum is often preceded by
+//	  a copy_from_user()).  This finding indiate that lfetch will not be beneficial since
+//	  the data is already in the cache.
+//
+
+#define saved_pfs	r11
+#define hmask		r16
+#define tmask		r17
+#define first1		r18
+#define firstval	r19
+#define firstoff	r20
+#define last		r21
+#define lastval		r22
+#define lastoff		r23
+#define saved_lc	r24
+#define saved_pr	r25
+#define tmp1		r26
+#define tmp2		r27
+#define tmp3		r28
+#define carry1		r29
+#define carry2		r30
+#define first2		r31
+
+#define buf		in0
+#define len		in1
+
+#define LOAD_LATENCY	2	// XXX fix me
+
+#if (LOAD_LATENCY != 1) && (LOAD_LATENCY != 2)
+# error "Only 1 or 2 is supported/tested for LOAD_LATENCY."
+#endif
+
+#define PIPE_DEPTH			(LOAD_LATENCY+2)
+#define ELD	p[LOAD_LATENCY]		// end of load
+#define ELD_1	p[LOAD_LATENCY+1]	// and next stage
+
+// unsigned long do_csum(unsigned char *buf,long len)
+
+GLOBAL_ENTRY(do_csum)
+	.prologue
+	.save ar.pfs, saved_pfs
+	alloc saved_pfs=ar.pfs,2,16,0,16
+	.rotr word1[4], word2[4],result1[LOAD_LATENCY+2],result2[LOAD_LATENCY+2]
+	.rotp p[PIPE_DEPTH], pC1[2], pC2[2]
+	mov ret0=r0		// in case we have zero length
+	cmp.lt p0,p6=r0,len	// check for zero length or negative (32bit len)
+	;;
+	add tmp1=buf,len	// last byte's address
+	.save pr, saved_pr
+	mov saved_pr=pr		// preserve predicates (rotation)
+(p6)	br.ret.spnt.many rp	// return if zero or negative length
+
+	mov hmask=-1		// initialize head mask
+	tbit.nz p15,p0=buf,0	// is buf an odd address?
+	and first1=-8,buf	// 8-byte align down address of first1 element
+
+	and firstoff=7,buf	// how many bytes off for first1 element
+	mov tmask=-1		// initialize tail mask
+
+	;;
+	adds tmp2=-1,tmp1	// last-1
+	and lastoff=7,tmp1	// how many bytes off for last element
+	;;
+	sub tmp1=8,lastoff	// complement to lastoff
+	and last=-8,tmp2	// address of word containing last byte
+	;;
+	sub tmp3=last,first1	// tmp3=distance from first1 to last
+	.save ar.lc, saved_lc
+	mov saved_lc=ar.lc	// save lc
+	cmp.eq p8,p9=last,first1	// everything fits in one word ?
+
+	ld8 firstval=[first1],8	// load, ahead of time, "first1" word
+	and tmp1=7, tmp1	// make sure that if tmp1==8 -> tmp1=0
+	shl tmp2=firstoff,3	// number of bits
+	;;
+(p9)	ld8 lastval=[last]	// load, ahead of time, "last" word, if needed
+	shl tmp1=tmp1,3		// number of bits
+(p9)	adds tmp3=-8,tmp3	// effectively loaded
+	;;
+(p8)	mov lastval=r0		// we don't need lastval if first1==last
+	shl hmask=hmask,tmp2	// build head mask, mask off [0,first1off[
+	shr.u tmask=tmask,tmp1	// build tail mask, mask off ]8,lastoff]
+	;;
+	.body
+#define count tmp3
+
+(p8)	and hmask=hmask,tmask	// apply tail mask to head mask if 1 word only
+(p9)	and word2[0]=lastval,tmask	// mask last it as appropriate
+	shr.u count=count,3	// how many 8-byte?
+	;;
+	// If count is odd, finish this 8-byte word so that we can
+	// load two back-to-back 8-byte words per loop thereafter.
+	and word1[0]=firstval,hmask	// and mask it as appropriate
+	tbit.nz p10,p11=count,0		// if (count is odd)
+	;;
+(p8)	mov result1[0]=word1[0]
+(p9)	add result1[0]=word1[0],word2[0]
+	;;
+	cmp.ltu p6,p0=result1[0],word1[0]	// check the carry
+	cmp.eq.or.andcm p8,p0=0,count		// exit if zero 8-byte
+	;;
+(p6)	adds result1[0]=1,result1[0]
+(p8)	br.cond.dptk .do_csum_exit	// if (within an 8-byte word)
+(p11)	br.cond.dptk .do_csum16		// if (count is even)
+
+	// Here count is odd.
+	ld8 word1[1]=[first1],8		// load an 8-byte word
+	cmp.eq p9,p10=1,count		// if (count == 1)
+	adds count=-1,count		// loaded an 8-byte word
+	;;
+	add result1[0]=result1[0],word1[1]
+	;;
+	cmp.ltu p6,p0=result1[0],word1[1]
+	;;
+(p6)	adds result1[0]=1,result1[0]
+(p9)	br.cond.sptk .do_csum_exit	// if (count == 1) exit
+	// Fall through to calculate the checksum, feeding result1[0] as
+	// the initial value in result1[0].
+	//
+	// Calculate the checksum loading two 8-byte words per loop.
+	//
+.do_csum16:
+	add first2=8,first1
+	shr.u count=count,1	// we do 16 bytes per loop
+	;;
+	adds count=-1,count
+	mov carry1=r0
+	mov carry2=r0
+	brp.loop.imp 1f,2f
+	;;
+	mov ar.ec=PIPE_DEPTH
+	mov ar.lc=count	// set lc
+	mov pr.rot=1<<16
+	// result1[0] must be initialized in advance.
+	mov result2[0]=r0
+	;;
+	.align 32
+1:
+(ELD_1)	cmp.ltu pC1[0],p0=result1[LOAD_LATENCY],word1[LOAD_LATENCY+1]
+(pC1[1])adds carry1=1,carry1
+(ELD_1)	cmp.ltu pC2[0],p0=result2[LOAD_LATENCY],word2[LOAD_LATENCY+1]
+(pC2[1])adds carry2=1,carry2
+(ELD)	add result1[LOAD_LATENCY-1]=result1[LOAD_LATENCY],word1[LOAD_LATENCY]
+(ELD)	add result2[LOAD_LATENCY-1]=result2[LOAD_LATENCY],word2[LOAD_LATENCY]
+2:
+(p[0])	ld8 word1[0]=[first1],16
+(p[0])	ld8 word2[0]=[first2],16
+	br.ctop.sptk 1b
+	;;
+	// Since len is a 32-bit value, carry cannot be larger than a 64-bit value.
+(pC1[1])adds carry1=1,carry1	// since we miss the last one
+(pC2[1])adds carry2=1,carry2
+	;;
+	add result1[LOAD_LATENCY+1]=result1[LOAD_LATENCY+1],carry1
+	add result2[LOAD_LATENCY+1]=result2[LOAD_LATENCY+1],carry2
+	;;
+	cmp.ltu p6,p0=result1[LOAD_LATENCY+1],carry1
+	cmp.ltu p7,p0=result2[LOAD_LATENCY+1],carry2
+	;;
+(p6)	adds result1[LOAD_LATENCY+1]=1,result1[LOAD_LATENCY+1]
+(p7)	adds result2[LOAD_LATENCY+1]=1,result2[LOAD_LATENCY+1]
+	;;
+	add result1[0]=result1[LOAD_LATENCY+1],result2[LOAD_LATENCY+1]
+	;;
+	cmp.ltu p6,p0=result1[0],result2[LOAD_LATENCY+1]
+	;;
+(p6)	adds result1[0]=1,result1[0]
+	;;
+.do_csum_exit:
+	//
+	// now fold 64 into 16 bits taking care of carry
+	// that's not very good because it has lots of sequentiality
+	//
+	mov tmp3=0xffff
+	zxt4 tmp1=result1[0]
+	shr.u tmp2=result1[0],32
+	;;
+	add result1[0]=tmp1,tmp2
+	;;
+	and tmp1=result1[0],tmp3
+	shr.u tmp2=result1[0],16
+	;;
+	add result1[0]=tmp1,tmp2
+	;;
+	and tmp1=result1[0],tmp3
+	shr.u tmp2=result1[0],16
+	;;
+	add result1[0]=tmp1,tmp2
+	;;
+	and tmp1=result1[0],tmp3
+	shr.u tmp2=result1[0],16
+	;;
+	add ret0=tmp1,tmp2
+	mov pr=saved_pr,0xffffffffffff0000
+	;;
+	// if buf was odd then swap bytes
+	mov ar.pfs=saved_pfs		// restore ar.ec
+(p15)	mux1 ret0=ret0,@rev		// reverse word
+	;;
+	mov ar.lc=saved_lc
+(p15)	shr.u ret0=ret0,64-16	// + shift back to position = swap bytes
+	br.ret.sptk.many rp
+
+//	I (Jun Nakajima) wrote an equivalent code (see below), but it was
+//	not much better than the original. So keep the original there so that
+//	someone else can challenge.
+//
+//	shr.u word1[0]=result1[0],32
+//	zxt4 result1[0]=result1[0]
+//	;;
+//	add result1[0]=result1[0],word1[0]
+//	;;
+//	zxt2 result2[0]=result1[0]
+//	extr.u word1[0]=result1[0],16,16
+//	shr.u carry1=result1[0],32
+//	;;
+//	add result2[0]=result2[0],word1[0]
+//	;;
+//	add result2[0]=result2[0],carry1
+//	;;
+//	extr.u ret0=result2[0],16,16
+//	;;
+//	add ret0=ret0,result2[0]
+//	;;
+//	zxt2 ret0=ret0
+//	mov ar.pfs=saved_pfs		 // restore ar.ec
+//	mov pr=saved_pr,0xffffffffffff0000
+//	;;
+//	// if buf was odd then swap bytes
+//	mov ar.lc=saved_lc
+//(p15)	mux1 ret0=ret0,@rev		// reverse word
+//	;;
+//(p15)	shr.u ret0=ret0,64-16	// + shift back to position = swap bytes
+//	br.ret.sptk.many rp
+
+END(do_csum)
diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S
new file mode 100644
index 00000000..1d8c8886
--- /dev/null
+++ b/arch/ia64/lib/flush.S
@@ -0,0 +1,117 @@
+/*
+ * Cache flushing routines.
+ *
+ * Copyright (C) 1999-2001, 2005 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 05/28/05 Zoltan Menyhart	Dynamic stride size
+ */
+
+#include <asm/asmmacro.h>
+
+
+	/*
+	 * flush_icache_range(start,end)
+	 *
+	 *	Make i-cache(s) coherent with d-caches.
+	 *
+	 *	Must deal with range from start to end-1 but nothing else (need to
+	 *	be careful not to touch addresses that may be unmapped).
+	 *
+	 *	Note: "in0" and "in1" are preserved for debugging purposes.
+	 */
+	.section .kprobes.text,"ax"
+GLOBAL_ENTRY(flush_icache_range)
+
+	.prologue
+	alloc	r2=ar.pfs,2,0,0,0
+	movl	r3=ia64_i_cache_stride_shift
+ 	mov	r21=1
+	;;
+	ld8	r20=[r3]		// r20: stride shift
+	sub	r22=in1,r0,1		// last byte address
+	;;
+	shr.u	r23=in0,r20		// start / (stride size)
+	shr.u	r22=r22,r20		// (last byte address) / (stride size)
+	shl	r21=r21,r20		// r21: stride size of the i-cache(s)
+	;;
+	sub	r8=r22,r23		// number of strides - 1
+	shl	r24=r23,r20		// r24: addresses for "fc.i" =
+					//	"start" rounded down to stride boundary
+	.save	ar.lc,r3
+	mov	r3=ar.lc		// save ar.lc
+	;;
+
+	.body
+	mov	ar.lc=r8
+	;;
+	/*
+	 * 32 byte aligned loop, even number of (actually 2) bundles
+	 */
+.Loop:	fc.i	r24			// issuable on M0 only
+	add	r24=r21,r24		// we flush "stride size" bytes per iteration
+	nop.i	0
+	br.cloop.sptk.few .Loop
+	;;
+	sync.i
+	;;
+	srlz.i
+	;;
+	mov	ar.lc=r3		// restore ar.lc
+	br.ret.sptk.many rp
+END(flush_icache_range)
+
+	/*
+	 * clflush_cache_range(start,size)
+	 *
+	 *	Flush cache lines from start to start+size-1.
+	 *
+	 *	Must deal with range from start to start+size-1 but nothing else
+	 *	(need to be careful not to touch addresses that may be
+	 *	unmapped).
+	 *
+	 *	Note: "in0" and "in1" are preserved for debugging purposes.
+	 */
+	.section .kprobes.text,"ax"
+GLOBAL_ENTRY(clflush_cache_range)
+
+	.prologue
+	alloc	r2=ar.pfs,2,0,0,0
+	movl	r3=ia64_cache_stride_shift
+	mov	r21=1
+	add     r22=in1,in0
+	;;
+	ld8	r20=[r3]		// r20: stride shift
+	sub	r22=r22,r0,1		// last byte address
+	;;
+	shr.u	r23=in0,r20		// start / (stride size)
+	shr.u	r22=r22,r20		// (last byte address) / (stride size)
+	shl	r21=r21,r20		// r21: stride size of the i-cache(s)
+	;;
+	sub	r8=r22,r23		// number of strides - 1
+	shl	r24=r23,r20		// r24: addresses for "fc" =
+					//	"start" rounded down to stride
+					//	boundary
+	.save	ar.lc,r3
+	mov	r3=ar.lc		// save ar.lc
+	;;
+
+	.body
+	mov	ar.lc=r8
+	;;
+	/*
+	 * 32 byte aligned loop, even number of (actually 2) bundles
+	 */
+.Loop_fc:
+	fc	r24		// issuable on M0 only
+	add	r24=r21,r24	// we flush "stride size" bytes per iteration
+	nop.i	0
+	br.cloop.sptk.few .Loop_fc
+	;;
+	sync.i
+	;;
+	srlz.i
+	;;
+	mov	ar.lc=r3		// restore ar.lc
+	br.ret.sptk.many rp
+END(clflush_cache_range)
diff --git a/arch/ia64/lib/idiv32.S b/arch/ia64/lib/idiv32.S
new file mode 100644
index 00000000..2ac28bf0
--- /dev/null
+++ b/arch/ia64/lib/idiv32.S
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2000 Hewlett-Packard Co
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 32-bit integer division.
+ *
+ * This code is based on the application note entitled "Divide, Square Root
+ * and Remainder Algorithms for the IA-64 Architecture".  This document
+ * is available as Intel document number 248725-002 or via the web at
+ * http://developer.intel.com/software/opensource/numerics/
+ *
+ * For more details on the theory behind these algorithms, see "IA-64
+ * and Elementary Functions" by Peter Markstein; HP Professional Books
+ * (http://www.hp.com/go/retailbooks/)
+ */
+
+#include <asm/asmmacro.h>
+
+#ifdef MODULO
+# define OP	mod
+#else
+# define OP	div
+#endif
+
+#ifdef UNSIGNED
+# define SGN	u
+# define EXTEND	zxt4
+# define INT_TO_FP(a,b)	fcvt.xuf.s1 a=b
+# define FP_TO_INT(a,b)	fcvt.fxu.trunc.s1 a=b
+#else
+# define SGN
+# define EXTEND	sxt4
+# define INT_TO_FP(a,b)	fcvt.xf a=b
+# define FP_TO_INT(a,b)	fcvt.fx.trunc.s1 a=b
+#endif
+
+#define PASTE1(a,b)	a##b
+#define PASTE(a,b)	PASTE1(a,b)
+#define NAME		PASTE(PASTE(__,SGN),PASTE(OP,si3))
+
+GLOBAL_ENTRY(NAME)
+	.regstk 2,0,0,0
+	// Transfer inputs to FP registers.
+	mov r2 = 0xffdd			// r2 = -34 + 65535 (fp reg format bias)
+	EXTEND in0 = in0		// in0 = a
+	EXTEND in1 = in1		// in1 = b
+	;;
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+#ifdef MODULO
+	sub in1 = r0, in1		// in1 = -b
+#endif
+	;;
+	// Convert the inputs to FP, to avoid FP software-assist faults.
+	INT_TO_FP(f8, f8)
+	INT_TO_FP(f9, f9)
+	;;
+	setf.exp f7 = r2		// f7 = 2^-34
+	frcpa.s1 f6, p6 = f8, f9	// y0 = frcpa(b)
+	;;
+(p6)	fmpy.s1 f8 = f8, f6		// q0 = a*y0
+(p6)	fnma.s1 f6 = f9, f6, f1		// e0 = -b*y0 + 1 
+	;;
+#ifdef MODULO
+	setf.sig f9 = in1		// f9 = -b
+#endif
+(p6)	fma.s1 f8 = f6, f8, f8		// q1 = e0*q0 + q0
+(p6)	fma.s1 f6 = f6, f6, f7		// e1 = e0*e0 + 2^-34
+	;;
+#ifdef MODULO
+	setf.sig f7 = in0
+#endif
+(p6)	fma.s1 f6 = f6, f8, f8		// q2 = e1*q1 + q1
+	;;
+	FP_TO_INT(f6, f6)		// q = trunc(q2)
+	;;
+#ifdef MODULO
+	xma.l f6 = f6, f9, f7		// r = q*(-b) + a
+	;;
+#endif
+	getf.sig r8 = f6		// transfer result to result register
+	br.ret.sptk.many rp
+END(NAME)
diff --git a/arch/ia64/lib/idiv64.S b/arch/ia64/lib/idiv64.S
new file mode 100644
index 00000000..f69bd2b0
--- /dev/null
+++ b/arch/ia64/lib/idiv64.S
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 1999-2000 Hewlett-Packard Co
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 64-bit integer division.
+ *
+ * This code is based on the application note entitled "Divide, Square Root
+ * and Remainder Algorithms for the IA-64 Architecture".  This document
+ * is available as Intel document number 248725-002 or via the web at
+ * http://developer.intel.com/software/opensource/numerics/
+ *
+ * For more details on the theory behind these algorithms, see "IA-64
+ * and Elementary Functions" by Peter Markstein; HP Professional Books
+ * (http://www.hp.com/go/retailbooks/)
+ */
+
+#include <asm/asmmacro.h>
+
+#ifdef MODULO
+# define OP	mod
+#else
+# define OP	div
+#endif
+
+#ifdef UNSIGNED
+# define SGN	u
+# define INT_TO_FP(a,b)	fcvt.xuf.s1 a=b
+# define FP_TO_INT(a,b)	fcvt.fxu.trunc.s1 a=b
+#else
+# define SGN
+# define INT_TO_FP(a,b)	fcvt.xf a=b
+# define FP_TO_INT(a,b)	fcvt.fx.trunc.s1 a=b
+#endif
+
+#define PASTE1(a,b)	a##b
+#define PASTE(a,b)	PASTE1(a,b)
+#define NAME		PASTE(PASTE(__,SGN),PASTE(OP,di3))
+
+GLOBAL_ENTRY(NAME)
+	.regstk 2,0,0,0
+	// Transfer inputs to FP registers.
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+	;;
+	// Convert the inputs to FP, to avoid FP software-assist faults.
+	INT_TO_FP(f8, f8)
+	INT_TO_FP(f9, f9)
+	;;
+	frcpa.s1 f11, p6 = f8, f9	// y0 = frcpa(b)
+	;;
+(p6)	fmpy.s1 f7 = f8, f11		// q0 = a*y0
+(p6)	fnma.s1 f6 = f9, f11, f1	// e0 = -b*y0 + 1
+	;;
+(p6)	fma.s1 f10 = f7, f6, f7		// q1 = q0*e0 + q0
+(p6)	fmpy.s1 f7 = f6, f6		// e1 = e0*e0
+	;;
+#ifdef MODULO
+	sub in1 = r0, in1		// in1 = -b
+#endif
+(p6)	fma.s1 f10 = f10, f7, f10	// q2 = q1*e1 + q1
+(p6)	fma.s1 f6 = f11, f6, f11	// y1 = y0*e0 + y0
+	;;
+(p6)	fma.s1 f6 = f6, f7, f6		// y2 = y1*e1 + y1
+(p6)	fnma.s1 f7 = f9, f10, f8	// r = -b*q2 + a
+	;;
+#ifdef MODULO
+	setf.sig f8 = in0		// f8 = a
+	setf.sig f9 = in1		// f9 = -b
+#endif
+(p6)	fma.s1 f11 = f7, f6, f10	// q3 = r*y2 + q2
+	;;
+	FP_TO_INT(f11, f11)		// q = trunc(q3)
+	;;
+#ifdef MODULO
+	xma.l f11 = f11, f9, f8		// r = q*(-b) + a
+	;;
+#endif
+	getf.sig r8 = f11		// transfer result to result register
+	br.ret.sptk.many rp
+END(NAME)
diff --git a/arch/ia64/lib/io.c b/arch/ia64/lib/io.c
new file mode 100644
index 00000000..bcd16f8a
--- /dev/null
+++ b/arch/ia64/lib/io.c
@@ -0,0 +1,164 @@
+#include <linux/module.h>
+#include <linux/types.h>
+
+#include <asm/io.h>
+
+/*
+ * Copy data from IO memory space to "real" memory space.
+ * This needs to be optimized.
+ */
+void memcpy_fromio(void *to, const volatile void __iomem *from, long count)
+{
+	char *dst = to;
+
+	while (count) {
+		count--;
+		*dst++ = readb(from++);
+	}
+}
+EXPORT_SYMBOL(memcpy_fromio);
+
+/*
+ * Copy data from "real" memory space to IO memory space.
+ * This needs to be optimized.
+ */
+void memcpy_toio(volatile void __iomem *to, const void *from, long count)
+{
+	const char *src = from;
+
+	while (count) {
+		count--;
+		writeb(*src++, to++);
+	}
+}
+EXPORT_SYMBOL(memcpy_toio);
+
+/*
+ * "memset" on IO memory space.
+ * This needs to be optimized.
+ */
+void memset_io(volatile void __iomem *dst, int c, long count)
+{
+	unsigned char ch = (char)(c & 0xff);
+
+	while (count) {
+		count--;
+		writeb(ch, dst);
+		dst++;
+	}
+}
+EXPORT_SYMBOL(memset_io);
+
+#ifdef CONFIG_IA64_GENERIC
+
+#undef __ia64_inb
+#undef __ia64_inw
+#undef __ia64_inl
+#undef __ia64_outb
+#undef __ia64_outw
+#undef __ia64_outl
+#undef __ia64_readb
+#undef __ia64_readw
+#undef __ia64_readl
+#undef __ia64_readq
+#undef __ia64_readb_relaxed
+#undef __ia64_readw_relaxed
+#undef __ia64_readl_relaxed
+#undef __ia64_readq_relaxed
+#undef __ia64_writeb
+#undef __ia64_writew
+#undef __ia64_writel
+#undef __ia64_writeq
+#undef __ia64_mmiowb
+
+unsigned int
+__ia64_inb (unsigned long port)
+{
+	return ___ia64_inb(port);
+}
+
+unsigned int
+__ia64_inw (unsigned long port)
+{
+	return ___ia64_inw(port);
+}
+
+unsigned int
+__ia64_inl (unsigned long port)
+{
+	return ___ia64_inl(port);
+}
+
+void
+__ia64_outb (unsigned char val, unsigned long port)
+{
+	___ia64_outb(val, port);
+}
+
+void
+__ia64_outw (unsigned short val, unsigned long port)
+{
+	___ia64_outw(val, port);
+}
+
+void
+__ia64_outl (unsigned int val, unsigned long port)
+{
+	___ia64_outl(val, port);
+}
+
+unsigned char
+__ia64_readb (void __iomem *addr)
+{
+	return ___ia64_readb (addr);
+}
+
+unsigned short
+__ia64_readw (void __iomem *addr)
+{
+	return ___ia64_readw (addr);
+}
+
+unsigned int
+__ia64_readl (void __iomem *addr)
+{
+	return ___ia64_readl (addr);
+}
+
+unsigned long
+__ia64_readq (void __iomem *addr)
+{
+	return ___ia64_readq (addr);
+}
+
+unsigned char
+__ia64_readb_relaxed (void __iomem *addr)
+{
+	return ___ia64_readb (addr);
+}
+
+unsigned short
+__ia64_readw_relaxed (void __iomem *addr)
+{
+	return ___ia64_readw (addr);
+}
+
+unsigned int
+__ia64_readl_relaxed (void __iomem *addr)
+{
+	return ___ia64_readl (addr);
+}
+
+unsigned long
+__ia64_readq_relaxed (void __iomem *addr)
+{
+	return ___ia64_readq (addr);
+}
+
+void
+__ia64_mmiowb(void)
+{
+	___ia64_mmiowb();
+}
+
+#endif /* CONFIG_IA64_GENERIC */
diff --git a/arch/ia64/lib/ip_fast_csum.S b/arch/ia64/lib/ip_fast_csum.S
new file mode 100644
index 00000000..620d9dc5
--- /dev/null
+++ b/arch/ia64/lib/ip_fast_csum.S
@@ -0,0 +1,144 @@
+/*
+ * Optmized version of the ip_fast_csum() function
+ * Used for calculating IP header checksum
+ *
+ * Return: 16bit checksum, complemented
+ *
+ * Inputs:
+ *      in0: address of buffer to checksum (char *)
+ *      in1: length of the buffer (int)
+ *
+ * Copyright (C) 2002, 2006 Intel Corp.
+ * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com>
+ */
+
+#include <asm/asmmacro.h>
+
+/*
+ * Since we know that most likely this function is called with buf aligned
+ * on 4-byte boundary and 20 bytes in length, we can execution rather quickly
+ * versus calling generic version of do_csum, which has lots of overhead in
+ * handling various alignments and sizes.  However, due to lack of constrains
+ * put on the function input argument, cases with alignment not on 4-byte or
+ * size not equal to 20 bytes will be handled by the generic do_csum function.
+ */
+
+#define in0	r32
+#define in1	r33
+#define in2	r34
+#define in3	r35
+#define in4	r36
+#define ret0	r8
+
+GLOBAL_ENTRY(ip_fast_csum)
+	.prologue
+	.body
+	cmp.ne	p6,p7=5,in1	// size other than 20 byte?
+	and	r14=3,in0	// is it aligned on 4-byte?
+	add	r15=4,in0	// second source pointer
+	;;
+	cmp.ne.or.andcm p6,p7=r14,r0
+	;;
+(p7)	ld4	r20=[in0],8
+(p7)	ld4	r21=[r15],8
+(p6)	br.spnt	.generic
+	;;
+	ld4	r22=[in0],8
+	ld4	r23=[r15],8
+	;;
+	ld4	r24=[in0]
+	add	r20=r20,r21
+	add	r22=r22,r23
+	;;
+	add	r20=r20,r22
+	;;
+	add	r20=r20,r24
+	;;
+	shr.u	ret0=r20,16	// now need to add the carry
+	zxt2	r20=r20
+	;;
+	add	r20=ret0,r20
+	;;
+	shr.u	ret0=r20,16	// add carry again
+	zxt2	r20=r20
+	;;
+	add	r20=ret0,r20
+	;;
+	shr.u	ret0=r20,16
+	zxt2	r20=r20
+	;;
+	add	r20=ret0,r20
+	mov	r9=0xffff
+	;;
+	andcm	ret0=r9,r20
+	.restore sp		// reset frame state
+	br.ret.sptk.many b0
+	;;
+
+.generic:
+	.prologue
+	.save ar.pfs, r35
+	alloc	r35=ar.pfs,2,2,2,0
+	.save rp, r34
+	mov	r34=b0
+	.body
+	dep.z	out1=in1,2,30
+	mov	out0=in0
+	;;
+	br.call.sptk.many b0=do_csum
+	;;
+	andcm	ret0=-1,ret0
+	mov	ar.pfs=r35
+	mov	b0=r34
+	br.ret.sptk.many b0
+END(ip_fast_csum)
+
+GLOBAL_ENTRY(csum_ipv6_magic)
+	ld4	r20=[in0],4
+	ld4	r21=[in1],4
+	zxt4	in2=in2
+	;;
+	ld4	r22=[in0],4
+	ld4	r23=[in1],4
+	dep	r15=in3,in2,32,16
+	;;
+	ld4	r24=[in0],4
+	ld4	r25=[in1],4
+	mux1	r15=r15,@rev
+	add	r16=r20,r21
+	add	r17=r22,r23
+	zxt4	in4=in4
+	;;
+	ld4	r26=[in0],4
+	ld4	r27=[in1],4
+	shr.u	r15=r15,16
+	add	r18=r24,r25
+	add	r8=r16,r17
+	;;
+	add	r19=r26,r27
+	add	r8=r8,r18
+	;;
+	add	r8=r8,r19
+	add	r15=r15,in4
+	;;
+	add	r8=r8,r15
+	;;
+	shr.u	r10=r8,32	// now fold sum into short
+	zxt4	r11=r8
+	;;
+	add	r8=r10,r11
+	;;
+	shr.u	r10=r8,16	// yeah, keep it rolling
+	zxt2	r11=r8
+	;;
+	add	r8=r10,r11
+	;;
+	shr.u	r10=r8,16	// three times lucky
+	zxt2	r11=r8
+	;;
+	add	r8=r10,r11
+	mov	r9=0xffff
+	;;
+	andcm	r8=r9,r8
+	br.ret.sptk.many b0
+END(csum_ipv6_magic)
diff --git a/arch/ia64/lib/memcpy.S b/arch/ia64/lib/memcpy.S
new file mode 100644
index 00000000..448908d8
--- /dev/null
+++ b/arch/ia64/lib/memcpy.S
@@ -0,0 +1,301 @@
+/*
+ *
+ * Optimized version of the standard memcpy() function
+ *
+ * Inputs:
+ * 	in0:	destination address
+ *	in1:	source address
+ *	in2:	number of bytes to copy
+ * Output:
+ * 	no return value
+ *
+ * Copyright (C) 2000-2001 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <asm/asmmacro.h>
+
+GLOBAL_ENTRY(memcpy)
+
+#	define MEM_LAT	21		/* latency to memory */
+
+#	define dst	r2
+#	define src	r3
+#	define retval	r8
+#	define saved_pfs r9
+#	define saved_lc	r10
+#	define saved_pr	r11
+#	define cnt	r16
+#	define src2	r17
+#	define t0	r18
+#	define t1	r19
+#	define t2	r20
+#	define t3	r21
+#	define t4	r22
+#	define src_end	r23
+
+#	define N	(MEM_LAT + 4)
+#	define Nrot	((N + 7) & ~7)
+
+	/*
+	 * First, check if everything (src, dst, len) is a multiple of eight.  If
+	 * so, we handle everything with no taken branches (other than the loop
+	 * itself) and a small icache footprint.  Otherwise, we jump off to
+	 * the more general copy routine handling arbitrary
+	 * sizes/alignment etc.
+	 */
+	.prologue
+	.save ar.pfs, saved_pfs
+	alloc saved_pfs=ar.pfs,3,Nrot,0,Nrot
+	.save ar.lc, saved_lc
+	mov saved_lc=ar.lc
+	or t0=in0,in1
+	;;
+
+	or t0=t0,in2
+	.save pr, saved_pr
+	mov saved_pr=pr
+
+	.body
+
+	cmp.eq p6,p0=in2,r0	// zero length?
+	mov retval=in0		// return dst
+(p6)	br.ret.spnt.many rp	// zero length, return immediately
+	;;
+
+	mov dst=in0		// copy because of rotation
+	shr.u cnt=in2,3		// number of 8-byte words to copy
+	mov pr.rot=1<<16
+	;;
+
+	adds cnt=-1,cnt		// br.ctop is repeat/until
+	cmp.gtu p7,p0=16,in2	// copying less than 16 bytes?
+	mov ar.ec=N
+	;;
+
+	and t0=0x7,t0
+	mov ar.lc=cnt
+	;;
+	cmp.ne p6,p0=t0,r0
+
+	mov src=in1		// copy because of rotation
+(p7)	br.cond.spnt.few .memcpy_short
+(p6)	br.cond.spnt.few .memcpy_long
+	;;
+	nop.m	0
+	;;
+	nop.m	0
+	nop.i	0
+	;;
+	nop.m	0
+	;;
+	.rotr val[N]
+	.rotp p[N]
+	.align 32
+1: { .mib
+(p[0])	ld8 val[0]=[src],8
+	nop.i 0
+	brp.loop.imp 1b, 2f
+}
+2: { .mfb
+(p[N-1])st8 [dst]=val[N-1],8
+	nop.f 0
+	br.ctop.dptk.few 1b
+}
+	;;
+	mov ar.lc=saved_lc
+	mov pr=saved_pr,-1
+	mov ar.pfs=saved_pfs
+	br.ret.sptk.many rp
+
+	/*
+	 * Small (<16 bytes) unaligned copying is done via a simple byte-at-the-time
+	 * copy loop.  This performs relatively poorly on Itanium, but it doesn't
+	 * get used very often (gcc inlines small copies) and due to atomicity
+	 * issues, we want to avoid read-modify-write of entire words.
+	 */
+	.align 32
+.memcpy_short:
+	adds cnt=-1,in2		// br.ctop is repeat/until
+	mov ar.ec=MEM_LAT
+	brp.loop.imp 1f, 2f
+	;;
+	mov ar.lc=cnt
+	;;
+	nop.m	0
+	;;
+	nop.m	0
+	nop.i	0
+	;;
+	nop.m	0
+	;;
+	nop.m	0
+	;;
+	/*
+	 * It is faster to put a stop bit in the loop here because it makes
+	 * the pipeline shorter (and latency is what matters on short copies).
+	 */
+	.align 32
+1: { .mib
+(p[0])	ld1 val[0]=[src],1
+	nop.i 0
+	brp.loop.imp 1b, 2f
+} ;;
+2: { .mfb
+(p[MEM_LAT-1])st1 [dst]=val[MEM_LAT-1],1
+	nop.f 0
+	br.ctop.dptk.few 1b
+} ;;
+	mov ar.lc=saved_lc
+	mov pr=saved_pr,-1
+	mov ar.pfs=saved_pfs
+	br.ret.sptk.many rp
+
+	/*
+	 * Large (>= 16 bytes) copying is done in a fancy way.  Latency isn't
+	 * an overriding concern here, but throughput is.  We first do
+	 * sub-word copying until the destination is aligned, then we check
+	 * if the source is also aligned.  If so, we do a simple load/store-loop
+	 * until there are less than 8 bytes left over and then we do the tail,
+	 * by storing the last few bytes using sub-word copying.  If the source
+	 * is not aligned, we branch off to the non-congruent loop.
+	 *
+	 *   stage:   op:
+	 *         0  ld
+	 *	   :
+	 * MEM_LAT+3  shrp
+	 * MEM_LAT+4  st
+	 *
+	 * On Itanium, the pipeline itself runs without stalls.  However,  br.ctop
+	 * seems to introduce an unavoidable bubble in the pipeline so the overall
+	 * latency is 2 cycles/iteration.  This gives us a _copy_ throughput
+	 * of 4 byte/cycle.  Still not bad.
+	 */
+#	undef N
+#	undef Nrot
+#	define N	(MEM_LAT + 5)		/* number of stages */
+#	define Nrot	((N+1 + 2 + 7) & ~7)	/* number of rotating regs */
+
+#define LOG_LOOP_SIZE	6
+
+.memcpy_long:
+	alloc t3=ar.pfs,3,Nrot,0,Nrot	// resize register frame
+	and t0=-8,src		// t0 = src & ~7
+	and t2=7,src		// t2 = src & 7
+	;;
+	ld8 t0=[t0]		// t0 = 1st source word
+	adds src2=7,src		// src2 = (src + 7)
+	sub t4=r0,dst		// t4 = -dst
+	;;
+	and src2=-8,src2	// src2 = (src + 7) & ~7
+	shl t2=t2,3		// t2 = 8*(src & 7)
+	shl t4=t4,3		// t4 = 8*(dst & 7)
+	;;
+	ld8 t1=[src2]		// t1 = 1st source word if src is 8-byte aligned, 2nd otherwise
+	sub t3=64,t2		// t3 = 64-8*(src & 7)
+	shr.u t0=t0,t2
+	;;
+	add src_end=src,in2
+	shl t1=t1,t3
+	mov pr=t4,0x38		// (p5,p4,p3)=(dst & 7)
+	;;
+	or t0=t0,t1
+	mov cnt=r0
+	adds src_end=-1,src_end
+	;;
+(p3)	st1 [dst]=t0,1
+(p3)	shr.u t0=t0,8
+(p3)	adds cnt=1,cnt
+	;;
+(p4)	st2 [dst]=t0,2
+(p4)	shr.u t0=t0,16
+(p4)	adds cnt=2,cnt
+	;;
+(p5)	st4 [dst]=t0,4
+(p5)	adds cnt=4,cnt
+	and src_end=-8,src_end	// src_end = last word of source buffer
+	;;
+
+	// At this point, dst is aligned to 8 bytes and there at least 16-7=9 bytes left to copy:
+
+1:{	add src=cnt,src			// make src point to remainder of source buffer
+	sub cnt=in2,cnt			// cnt = number of bytes left to copy
+	mov t4=ip
+  }	;;
+	and src2=-8,src			// align source pointer
+	adds t4=.memcpy_loops-1b,t4
+	mov ar.ec=N
+
+	and t0=7,src			// t0 = src & 7
+	shr.u t2=cnt,3			// t2 = number of 8-byte words left to copy
+	shl cnt=cnt,3			// move bits 0-2 to 3-5
+	;;
+
+	.rotr val[N+1], w[2]
+	.rotp p[N]
+
+	cmp.ne p6,p0=t0,r0		// is src aligned, too?
+	shl t0=t0,LOG_LOOP_SIZE		// t0 = 8*(src & 7)
+	adds t2=-1,t2			// br.ctop is repeat/until
+	;;
+	add t4=t0,t4
+	mov pr=cnt,0x38			// set (p5,p4,p3) to # of bytes last-word bytes to copy
+	mov ar.lc=t2
+	;;
+	nop.m	0
+	;;
+	nop.m	0
+	nop.i	0
+	;;
+	nop.m	0
+	;;
+(p6)	ld8 val[1]=[src2],8		// prime the pump...
+	mov b6=t4
+	br.sptk.few b6
+	;;
+
+.memcpy_tail:
+	// At this point, (p5,p4,p3) are set to the number of bytes left to copy (which is
+	// less than 8) and t0 contains the last few bytes of the src buffer:
+(p5)	st4 [dst]=t0,4
+(p5)	shr.u t0=t0,32
+	mov ar.lc=saved_lc
+	;;
+(p4)	st2 [dst]=t0,2
+(p4)	shr.u t0=t0,16
+	mov ar.pfs=saved_pfs
+	;;
+(p3)	st1 [dst]=t0
+	mov pr=saved_pr,-1
+	br.ret.sptk.many rp
+
+///////////////////////////////////////////////////////
+	.align 64
+
+#define COPY(shift,index)									\
+ 1: { .mib											\
+	(p[0])		ld8 val[0]=[src2],8;							\
+	(p[MEM_LAT+3])	shrp w[0]=val[MEM_LAT+3],val[MEM_LAT+4-index],shift;			\
+			brp.loop.imp 1b, 2f							\
+    };												\
+ 2: { .mfb											\
+	(p[MEM_LAT+4])	st8 [dst]=w[1],8;							\
+			nop.f 0;								\
+			br.ctop.dptk.few 1b;							\
+    };												\
+			;;									\
+			ld8 val[N-1]=[src_end];	/* load last word (may be same as val[N]) */	\
+			;;									\
+			shrp t0=val[N-1],val[N-index],shift;					\
+			br .memcpy_tail
+.memcpy_loops:
+	COPY(0, 1) /* no point special casing this---it doesn't go any faster without shrp */
+	COPY(8, 0)
+	COPY(16, 0)
+	COPY(24, 0)
+	COPY(32, 0)
+	COPY(40, 0)
+	COPY(48, 0)
+	COPY(56, 0)
+
+END(memcpy)
diff --git a/arch/ia64/lib/memcpy_mck.S b/arch/ia64/lib/memcpy_mck.S
new file mode 100644
index 00000000..ab0f8763
--- /dev/null
+++ b/arch/ia64/lib/memcpy_mck.S
@@ -0,0 +1,666 @@
+/*
+ * Itanium 2-optimized version of memcpy and copy_user function
+ *
+ * Inputs:
+ * 	in0:	destination address
+ *	in1:	source address
+ *	in2:	number of bytes to copy
+ * Output:
+ *	for memcpy:    return dest
+ * 	for copy_user: return 0 if success,
+ *		       or number of byte NOT copied if error occurred.
+ *
+ * Copyright (C) 2002 Intel Corp.
+ * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
+ */
+#include <asm/asmmacro.h>
+#include <asm/page.h>
+
+#define EK(y...) EX(y)
+
+/* McKinley specific optimization */
+
+#define retval		r8
+#define saved_pfs	r31
+#define saved_lc	r10
+#define saved_pr	r11
+#define saved_in0	r14
+#define saved_in1	r15
+#define saved_in2	r16
+
+#define src0		r2
+#define src1		r3
+#define dst0		r17
+#define dst1		r18
+#define cnt		r9
+
+/* r19-r30 are temp for each code section */
+#define PREFETCH_DIST	8
+#define src_pre_mem	r19
+#define dst_pre_mem	r20
+#define src_pre_l2	r21
+#define dst_pre_l2	r22
+#define t1		r23
+#define t2		r24
+#define t3		r25
+#define t4		r26
+#define t5		t1	// alias!
+#define t6		t2	// alias!
+#define t7		t3	// alias!
+#define n8		r27
+#define t9		t5	// alias!
+#define t10		t4	// alias!
+#define t11		t7	// alias!
+#define t12		t6	// alias!
+#define t14		t10	// alias!
+#define t13		r28
+#define t15		r29
+#define tmp		r30
+
+/* defines for long_copy block */
+#define	A	0
+#define B	(PREFETCH_DIST)
+#define C	(B + PREFETCH_DIST)
+#define D	(C + 1)
+#define N	(D + 1)
+#define Nrot	((N + 7) & ~7)
+
+/* alias */
+#define in0		r32
+#define in1		r33
+#define in2		r34
+
+GLOBAL_ENTRY(memcpy)
+	and	r28=0x7,in0
+	and	r29=0x7,in1
+	mov	f6=f0
+	mov	retval=in0
+	br.cond.sptk .common_code
+	;;
+END(memcpy)
+GLOBAL_ENTRY(__copy_user)
+	.prologue
+// check dest alignment
+	and	r28=0x7,in0
+	and	r29=0x7,in1
+	mov	f6=f1
+	mov	saved_in0=in0	// save dest pointer
+	mov	saved_in1=in1	// save src pointer
+	mov	retval=r0	// initialize return value
+	;;
+.common_code:
+	cmp.gt	p15,p0=8,in2	// check for small size
+	cmp.ne	p13,p0=0,r28	// check dest alignment
+	cmp.ne	p14,p0=0,r29	// check src alignment
+	add	src0=0,in1
+	sub	r30=8,r28	// for .align_dest
+	mov	saved_in2=in2	// save len
+	;;
+	add	dst0=0,in0
+	add	dst1=1,in0	// dest odd index
+	cmp.le	p6,p0 = 1,r30	// for .align_dest
+(p15)	br.cond.dpnt .memcpy_short
+(p13)	br.cond.dpnt .align_dest
+(p14)	br.cond.dpnt .unaligned_src
+	;;
+
+// both dest and src are aligned on 8-byte boundary
+.aligned_src:
+	.save ar.pfs, saved_pfs
+	alloc	saved_pfs=ar.pfs,3,Nrot-3,0,Nrot
+	.save pr, saved_pr
+	mov	saved_pr=pr
+
+	shr.u	cnt=in2,7	// this much cache line
+	;;
+	cmp.lt	p6,p0=2*PREFETCH_DIST,cnt
+	cmp.lt	p7,p8=1,cnt
+	.save ar.lc, saved_lc
+	mov	saved_lc=ar.lc
+	.body
+	add	cnt=-1,cnt
+	add	src_pre_mem=0,in1	// prefetch src pointer
+	add	dst_pre_mem=0,in0	// prefetch dest pointer
+	;;
+(p7)	mov	ar.lc=cnt	// prefetch count
+(p8)	mov	ar.lc=r0
+(p6)	br.cond.dpnt .long_copy
+	;;
+
+.prefetch:
+	lfetch.fault	  [src_pre_mem], 128
+	lfetch.fault.excl [dst_pre_mem], 128
+	br.cloop.dptk.few .prefetch
+	;;
+
+.medium_copy:
+	and	tmp=31,in2	// copy length after iteration
+	shr.u	r29=in2,5	// number of 32-byte iteration
+	add	dst1=8,dst0	// 2nd dest pointer
+	;;
+	add	cnt=-1,r29	// ctop iteration adjustment
+	cmp.eq	p10,p0=r29,r0	// do we really need to loop?
+	add	src1=8,src0	// 2nd src pointer
+	cmp.le	p6,p0=8,tmp
+	;;
+	cmp.le	p7,p0=16,tmp
+	mov	ar.lc=cnt	// loop setup
+	cmp.eq	p16,p17 = r0,r0
+	mov	ar.ec=2
+(p10)	br.dpnt.few .aligned_src_tail
+	;;
+	TEXT_ALIGN(32)
+1:
+EX(.ex_handler, (p16)	ld8	r34=[src0],16)
+EK(.ex_handler, (p16)	ld8	r38=[src1],16)
+EX(.ex_handler, (p17)	st8	[dst0]=r33,16)
+EK(.ex_handler, (p17)	st8	[dst1]=r37,16)
+	;;
+EX(.ex_handler, (p16)	ld8	r32=[src0],16)
+EK(.ex_handler, (p16)	ld8	r36=[src1],16)
+EX(.ex_handler, (p16)	st8	[dst0]=r34,16)
+EK(.ex_handler, (p16)	st8	[dst1]=r38,16)
+	br.ctop.dptk.few 1b
+	;;
+
+.aligned_src_tail:
+EX(.ex_handler, (p6)	ld8	t1=[src0])
+	mov	ar.lc=saved_lc
+	mov	ar.pfs=saved_pfs
+EX(.ex_hndlr_s, (p7)	ld8	t2=[src1],8)
+	cmp.le	p8,p0=24,tmp
+	and	r21=-8,tmp
+	;;
+EX(.ex_hndlr_s, (p8)	ld8	t3=[src1])
+EX(.ex_handler, (p6)	st8	[dst0]=t1)	// store byte 1
+	and	in2=7,tmp	// remaining length
+EX(.ex_hndlr_d, (p7)	st8	[dst1]=t2,8)	// store byte 2
+	add	src0=src0,r21	// setting up src pointer
+	add	dst0=dst0,r21	// setting up dest pointer
+	;;
+EX(.ex_handler, (p8)	st8	[dst1]=t3)	// store byte 3
+	mov	pr=saved_pr,-1
+	br.dptk.many .memcpy_short
+	;;
+
+/* code taken from copy_page_mck */
+.long_copy:
+	.rotr v[2*PREFETCH_DIST]
+	.rotp p[N]
+
+	mov src_pre_mem = src0
+	mov pr.rot = 0x10000
+	mov ar.ec = 1				// special unrolled loop
+
+	mov dst_pre_mem = dst0
+
+	add src_pre_l2 = 8*8, src0
+	add dst_pre_l2 = 8*8, dst0
+	;;
+	add src0 = 8, src_pre_mem		// first t1 src
+	mov ar.lc = 2*PREFETCH_DIST - 1
+	shr.u cnt=in2,7				// number of lines
+	add src1 = 3*8, src_pre_mem		// first t3 src
+	add dst0 = 8, dst_pre_mem		// first t1 dst
+	add dst1 = 3*8, dst_pre_mem		// first t3 dst
+	;;
+	and tmp=127,in2				// remaining bytes after this block
+	add cnt = -(2*PREFETCH_DIST) - 1, cnt
+	// same as .line_copy loop, but with all predicated-off instructions removed:
+.prefetch_loop:
+EX(.ex_hndlr_lcpy_1, (p[A])	ld8 v[A] = [src_pre_mem], 128)		// M0
+EK(.ex_hndlr_lcpy_1, (p[B])	st8 [dst_pre_mem] = v[B], 128)		// M2
+	br.ctop.sptk .prefetch_loop
+	;;
+	cmp.eq p16, p0 = r0, r0			// reset p16 to 1
+	mov ar.lc = cnt
+	mov ar.ec = N				// # of stages in pipeline
+	;;
+.line_copy:
+EX(.ex_handler,	(p[D])	ld8 t2 = [src0], 3*8)			// M0
+EK(.ex_handler,	(p[D])	ld8 t4 = [src1], 3*8)			// M1
+EX(.ex_handler_lcpy,	(p[B])	st8 [dst_pre_mem] = v[B], 128)		// M2 prefetch dst from memory
+EK(.ex_handler_lcpy,	(p[D])	st8 [dst_pre_l2] = n8, 128)		// M3 prefetch dst from L2
+	;;
+EX(.ex_handler_lcpy,	(p[A])	ld8 v[A] = [src_pre_mem], 128)		// M0 prefetch src from memory
+EK(.ex_handler_lcpy,	(p[C])	ld8 n8 = [src_pre_l2], 128)		// M1 prefetch src from L2
+EX(.ex_handler,	(p[D])	st8 [dst0] =  t1, 8)			// M2
+EK(.ex_handler,	(p[D])	st8 [dst1] =  t3, 8)			// M3
+	;;
+EX(.ex_handler,	(p[D])	ld8  t5 = [src0], 8)
+EK(.ex_handler,	(p[D])	ld8  t7 = [src1], 3*8)
+EX(.ex_handler,	(p[D])	st8 [dst0] =  t2, 3*8)
+EK(.ex_handler,	(p[D])	st8 [dst1] =  t4, 3*8)
+	;;
+EX(.ex_handler,	(p[D])	ld8  t6 = [src0], 3*8)
+EK(.ex_handler,	(p[D])	ld8 t10 = [src1], 8)
+EX(.ex_handler,	(p[D])	st8 [dst0] =  t5, 8)
+EK(.ex_handler,	(p[D])	st8 [dst1] =  t7, 3*8)
+	;;
+EX(.ex_handler,	(p[D])	ld8  t9 = [src0], 3*8)
+EK(.ex_handler,	(p[D])	ld8 t11 = [src1], 3*8)
+EX(.ex_handler,	(p[D])	st8 [dst0] =  t6, 3*8)
+EK(.ex_handler,	(p[D])	st8 [dst1] = t10, 8)
+	;;
+EX(.ex_handler,	(p[D])	ld8 t12 = [src0], 8)
+EK(.ex_handler,	(p[D])	ld8 t14 = [src1], 8)
+EX(.ex_handler,	(p[D])	st8 [dst0] =  t9, 3*8)
+EK(.ex_handler,	(p[D])	st8 [dst1] = t11, 3*8)
+	;;
+EX(.ex_handler,	(p[D])	ld8 t13 = [src0], 4*8)
+EK(.ex_handler,	(p[D])	ld8 t15 = [src1], 4*8)
+EX(.ex_handler,	(p[D])	st8 [dst0] = t12, 8)
+EK(.ex_handler,	(p[D])	st8 [dst1] = t14, 8)
+	;;
+EX(.ex_handler,	(p[C])	ld8  t1 = [src0], 8)
+EK(.ex_handler,	(p[C])	ld8  t3 = [src1], 8)
+EX(.ex_handler,	(p[D])	st8 [dst0] = t13, 4*8)
+EK(.ex_handler,	(p[D])	st8 [dst1] = t15, 4*8)
+	br.ctop.sptk .line_copy
+	;;
+
+	add dst0=-8,dst0
+	add src0=-8,src0
+	mov in2=tmp
+	.restore sp
+	br.sptk.many .medium_copy
+	;;
+
+#define BLOCK_SIZE	128*32
+#define blocksize	r23
+#define curlen		r24
+
+// dest is on 8-byte boundary, src is not. We need to do
+// ld8-ld8, shrp, then st8.  Max 8 byte copy per cycle.
+.unaligned_src:
+	.prologue
+	.save ar.pfs, saved_pfs
+	alloc	saved_pfs=ar.pfs,3,5,0,8
+	.save ar.lc, saved_lc
+	mov	saved_lc=ar.lc
+	.save pr, saved_pr
+	mov	saved_pr=pr
+	.body
+.4k_block:
+	mov	saved_in0=dst0	// need to save all input arguments
+	mov	saved_in2=in2
+	mov	blocksize=BLOCK_SIZE
+	;;
+	cmp.lt	p6,p7=blocksize,in2
+	mov	saved_in1=src0
+	;;
+(p6)	mov	in2=blocksize
+	;;
+	shr.u	r21=in2,7	// this much cache line
+	shr.u	r22=in2,4	// number of 16-byte iteration
+	and	curlen=15,in2	// copy length after iteration
+	and	r30=7,src0	// source alignment
+	;;
+	cmp.lt	p7,p8=1,r21
+	add	cnt=-1,r21
+	;;
+
+	add	src_pre_mem=0,src0	// prefetch src pointer
+	add	dst_pre_mem=0,dst0	// prefetch dest pointer
+	and	src0=-8,src0		// 1st src pointer
+(p7)	mov	ar.lc = cnt
+(p8)	mov	ar.lc = r0
+	;;
+	TEXT_ALIGN(32)
+1:	lfetch.fault	  [src_pre_mem], 128
+	lfetch.fault.excl [dst_pre_mem], 128
+	br.cloop.dptk.few 1b
+	;;
+
+	shladd	dst1=r22,3,dst0	// 2nd dest pointer
+	shladd	src1=r22,3,src0	// 2nd src pointer
+	cmp.eq	p8,p9=r22,r0	// do we really need to loop?
+	cmp.le	p6,p7=8,curlen;	// have at least 8 byte remaining?
+	add	cnt=-1,r22	// ctop iteration adjustment
+	;;
+EX(.ex_handler, (p9)	ld8	r33=[src0],8)	// loop primer
+EK(.ex_handler, (p9)	ld8	r37=[src1],8)
+(p8)	br.dpnt.few .noloop
+	;;
+
+// The jump address is calculated based on src alignment. The COPYU
+// macro below need to confine its size to power of two, so an entry
+// can be caulated using shl instead of an expensive multiply. The
+// size is then hard coded by the following #define to match the
+// actual size.  This make it somewhat tedious when COPYU macro gets
+// changed and this need to be adjusted to match.
+#define LOOP_SIZE 6
+1:
+	mov	r29=ip		// jmp_table thread
+	mov	ar.lc=cnt
+	;;
+	add	r29=.jump_table - 1b - (.jmp1-.jump_table), r29
+	shl	r28=r30, LOOP_SIZE	// jmp_table thread
+	mov	ar.ec=2		// loop setup
+	;;
+	add	r29=r29,r28		// jmp_table thread
+	cmp.eq	p16,p17=r0,r0
+	;;
+	mov	b6=r29			// jmp_table thread
+	;;
+	br.cond.sptk.few b6
+
+// for 8-15 byte case
+// We will skip the loop, but need to replicate the side effect
+// that the loop produces.
+.noloop:
+EX(.ex_handler, (p6)	ld8	r37=[src1],8)
+	add	src0=8,src0
+(p6)	shl	r25=r30,3
+	;;
+EX(.ex_handler, (p6)	ld8	r27=[src1])
+(p6)	shr.u	r28=r37,r25
+(p6)	sub	r26=64,r25
+	;;
+(p6)	shl	r27=r27,r26
+	;;
+(p6)	or	r21=r28,r27
+
+.unaligned_src_tail:
+/* check if we have more than blocksize to copy, if so go back */
+	cmp.gt	p8,p0=saved_in2,blocksize
+	;;
+(p8)	add	dst0=saved_in0,blocksize
+(p8)	add	src0=saved_in1,blocksize
+(p8)	sub	in2=saved_in2,blocksize
+(p8)	br.dpnt	.4k_block
+	;;
+
+/* we have up to 15 byte to copy in the tail.
+ * part of work is already done in the jump table code
+ * we are at the following state.
+ * src side:
+ * 
+ *   xxxxxx xx                   <----- r21 has xxxxxxxx already
+ * -------- -------- --------
+ * 0        8        16
+ *          ^
+ *          |
+ *          src1
+ * 
+ * dst
+ * -------- -------- --------
+ * ^
+ * |
+ * dst1
+ */
+EX(.ex_handler, (p6)	st8	[dst1]=r21,8)	// more than 8 byte to copy
+(p6)	add	curlen=-8,curlen	// update length
+	mov	ar.pfs=saved_pfs
+	;;
+	mov	ar.lc=saved_lc
+	mov	pr=saved_pr,-1
+	mov	in2=curlen	// remaining length
+	mov	dst0=dst1	// dest pointer
+	add	src0=src1,r30	// forward by src alignment
+	;;
+
+// 7 byte or smaller.
+.memcpy_short:
+	cmp.le	p8,p9   = 1,in2
+	cmp.le	p10,p11 = 2,in2
+	cmp.le	p12,p13 = 3,in2
+	cmp.le	p14,p15 = 4,in2
+	add	src1=1,src0	// second src pointer
+	add	dst1=1,dst0	// second dest pointer
+	;;
+
+EX(.ex_handler_short, (p8)	ld1	t1=[src0],2)
+EK(.ex_handler_short, (p10)	ld1	t2=[src1],2)
+(p9)	br.ret.dpnt rp		// 0 byte copy
+	;;
+
+EX(.ex_handler_short, (p8)	st1	[dst0]=t1,2)
+EK(.ex_handler_short, (p10)	st1	[dst1]=t2,2)
+(p11)	br.ret.dpnt rp		// 1 byte copy
+
+EX(.ex_handler_short, (p12)	ld1	t3=[src0],2)
+EK(.ex_handler_short, (p14)	ld1	t4=[src1],2)
+(p13)	br.ret.dpnt rp		// 2 byte copy
+	;;
+
+	cmp.le	p6,p7   = 5,in2
+	cmp.le	p8,p9   = 6,in2
+	cmp.le	p10,p11 = 7,in2
+
+EX(.ex_handler_short, (p12)	st1	[dst0]=t3,2)
+EK(.ex_handler_short, (p14)	st1	[dst1]=t4,2)
+(p15)	br.ret.dpnt rp		// 3 byte copy
+	;;
+
+EX(.ex_handler_short, (p6)	ld1	t5=[src0],2)
+EK(.ex_handler_short, (p8)	ld1	t6=[src1],2)
+(p7)	br.ret.dpnt rp		// 4 byte copy
+	;;
+
+EX(.ex_handler_short, (p6)	st1	[dst0]=t5,2)
+EK(.ex_handler_short, (p8)	st1	[dst1]=t6,2)
+(p9)	br.ret.dptk rp		// 5 byte copy
+
+EX(.ex_handler_short, (p10)	ld1	t7=[src0],2)
+(p11)	br.ret.dptk rp		// 6 byte copy
+	;;
+
+EX(.ex_handler_short, (p10)	st1	[dst0]=t7,2)
+	br.ret.dptk rp		// done all cases
+
+
+/* Align dest to nearest 8-byte boundary. We know we have at
+ * least 7 bytes to copy, enough to crawl to 8-byte boundary.
+ * Actual number of byte to crawl depend on the dest alignment.
+ * 7 byte or less is taken care at .memcpy_short
+
+ * src0 - source even index
+ * src1 - source  odd index
+ * dst0 - dest even index
+ * dst1 - dest  odd index
+ * r30  - distance to 8-byte boundary
+ */
+
+.align_dest:
+	add	src1=1,in1	// source odd index
+	cmp.le	p7,p0 = 2,r30	// for .align_dest
+	cmp.le	p8,p0 = 3,r30	// for .align_dest
+EX(.ex_handler_short, (p6)	ld1	t1=[src0],2)
+	cmp.le	p9,p0 = 4,r30	// for .align_dest
+	cmp.le	p10,p0 = 5,r30
+	;;
+EX(.ex_handler_short, (p7)	ld1	t2=[src1],2)
+EK(.ex_handler_short, (p8)	ld1	t3=[src0],2)
+	cmp.le	p11,p0 = 6,r30
+EX(.ex_handler_short, (p6)	st1	[dst0] = t1,2)
+	cmp.le	p12,p0 = 7,r30
+	;;
+EX(.ex_handler_short, (p9)	ld1	t4=[src1],2)
+EK(.ex_handler_short, (p10)	ld1	t5=[src0],2)
+EX(.ex_handler_short, (p7)	st1	[dst1] = t2,2)
+EK(.ex_handler_short, (p8)	st1	[dst0] = t3,2)
+	;;
+EX(.ex_handler_short, (p11)	ld1	t6=[src1],2)
+EK(.ex_handler_short, (p12)	ld1	t7=[src0],2)
+	cmp.eq	p6,p7=r28,r29
+EX(.ex_handler_short, (p9)	st1	[dst1] = t4,2)
+EK(.ex_handler_short, (p10)	st1	[dst0] = t5,2)
+	sub	in2=in2,r30
+	;;
+EX(.ex_handler_short, (p11)	st1	[dst1] = t6,2)
+EK(.ex_handler_short, (p12)	st1	[dst0] = t7)
+	add	dst0=in0,r30	// setup arguments
+	add	src0=in1,r30
+(p6)	br.cond.dptk .aligned_src
+(p7)	br.cond.dpnt .unaligned_src
+	;;
+
+/* main loop body in jump table format */
+#define COPYU(shift)									\
+1:											\
+EX(.ex_handler,  (p16)	ld8	r32=[src0],8);		/* 1 */				\
+EK(.ex_handler,  (p16)	ld8	r36=[src1],8);						\
+		 (p17)	shrp	r35=r33,r34,shift;;	/* 1 */				\
+EX(.ex_handler,  (p6)	ld8	r22=[src1]);	/* common, prime for tail section */	\
+		 nop.m	0;								\
+		 (p16)	shrp	r38=r36,r37,shift;					\
+EX(.ex_handler,  (p17)	st8	[dst0]=r35,8);		/* 1 */				\
+EK(.ex_handler,  (p17)	st8	[dst1]=r39,8);						\
+		 br.ctop.dptk.few 1b;;							\
+		 (p7)	add	src1=-8,src1;	/* back out for <8 byte case */		\
+		 shrp	r21=r22,r38,shift;	/* speculative work */			\
+		 br.sptk.few .unaligned_src_tail /* branch out of jump table */		\
+		 ;;
+	TEXT_ALIGN(32)
+.jump_table:
+	COPYU(8)	// unaligned cases
+.jmp1:
+	COPYU(16)
+	COPYU(24)
+	COPYU(32)
+	COPYU(40)
+	COPYU(48)
+	COPYU(56)
+
+#undef A
+#undef B
+#undef C
+#undef D
+
+/*
+ * Due to lack of local tag support in gcc 2.x assembler, it is not clear which
+ * instruction failed in the bundle.  The exception algorithm is that we
+ * first figure out the faulting address, then detect if there is any
+ * progress made on the copy, if so, redo the copy from last known copied
+ * location up to the faulting address (exclusive). In the copy_from_user
+ * case, remaining byte in kernel buffer will be zeroed.
+ *
+ * Take copy_from_user as an example, in the code there are multiple loads
+ * in a bundle and those multiple loads could span over two pages, the
+ * faulting address is calculated as page_round_down(max(src0, src1)).
+ * This is based on knowledge that if we can access one byte in a page, we
+ * can access any byte in that page.
+ *
+ * predicate used in the exception handler:
+ * p6-p7: direction
+ * p10-p11: src faulting addr calculation
+ * p12-p13: dst faulting addr calculation
+ */
+
+#define A	r19
+#define B	r20
+#define C	r21
+#define D	r22
+#define F	r28
+
+#define memset_arg0	r32
+#define memset_arg2	r33
+
+#define saved_retval	loc0
+#define saved_rtlink	loc1
+#define saved_pfs_stack	loc2
+
+.ex_hndlr_s:
+	add	src0=8,src0
+	br.sptk .ex_handler
+	;;
+.ex_hndlr_d:
+	add	dst0=8,dst0
+	br.sptk .ex_handler
+	;;
+.ex_hndlr_lcpy_1:
+	mov	src1=src_pre_mem
+	mov	dst1=dst_pre_mem
+	cmp.gtu	p10,p11=src_pre_mem,saved_in1
+	cmp.gtu	p12,p13=dst_pre_mem,saved_in0
+	;;
+(p10)	add	src0=8,saved_in1
+(p11)	mov	src0=saved_in1
+(p12)	add	dst0=8,saved_in0
+(p13)	mov	dst0=saved_in0
+	br.sptk	.ex_handler
+.ex_handler_lcpy:
+	// in line_copy block, the preload addresses should always ahead
+	// of the other two src/dst pointers.  Furthermore, src1/dst1 should
+	// always ahead of src0/dst0.
+	mov	src1=src_pre_mem
+	mov	dst1=dst_pre_mem
+.ex_handler:
+	mov	pr=saved_pr,-1		// first restore pr, lc, and pfs
+	mov	ar.lc=saved_lc
+	mov	ar.pfs=saved_pfs
+	;;
+.ex_handler_short: // fault occurred in these sections didn't change pr, lc, pfs
+	cmp.ltu	p6,p7=saved_in0, saved_in1	// get the copy direction
+	cmp.ltu	p10,p11=src0,src1
+	cmp.ltu	p12,p13=dst0,dst1
+	fcmp.eq	p8,p0=f6,f0		// is it memcpy?
+	mov	tmp = dst0
+	;;
+(p11)	mov	src1 = src0		// pick the larger of the two
+(p13)	mov	dst0 = dst1		// make dst0 the smaller one
+(p13)	mov	dst1 = tmp		// and dst1 the larger one
+	;;
+(p6)	dep	F = r0,dst1,0,PAGE_SHIFT // usr dst round down to page boundary
+(p7)	dep	F = r0,src1,0,PAGE_SHIFT // usr src round down to page boundary
+	;;
+(p6)	cmp.le	p14,p0=dst0,saved_in0	// no progress has been made on store
+(p7)	cmp.le	p14,p0=src0,saved_in1	// no progress has been made on load
+	mov	retval=saved_in2
+(p8)	ld1	tmp=[src1]		// force an oops for memcpy call
+(p8)	st1	[dst1]=r0		// force an oops for memcpy call
+(p14)	br.ret.sptk.many rp
+
+/*
+ * The remaining byte to copy is calculated as:
+ *
+ * A =	(faulting_addr - orig_src)	-> len to faulting ld address
+ *	or 
+ * 	(faulting_addr - orig_dst)	-> len to faulting st address
+ * B =	(cur_dst - orig_dst)		-> len copied so far
+ * C =	A - B				-> len need to be copied
+ * D =	orig_len - A			-> len need to be zeroed
+ */
+(p6)	sub	A = F, saved_in0
+(p7)	sub	A = F, saved_in1
+	clrrrb
+	;;
+	alloc	saved_pfs_stack=ar.pfs,3,3,3,0
+	cmp.lt	p8,p0=A,r0
+	sub	B = dst0, saved_in0	// how many byte copied so far
+	;;
+(p8)	mov	A = 0;			// A shouldn't be negative, cap it
+	;;
+	sub	C = A, B
+	sub	D = saved_in2, A
+	;;
+	cmp.gt	p8,p0=C,r0		// more than 1 byte?
+	add	memset_arg0=saved_in0, A
+(p6)	mov	memset_arg2=0		// copy_to_user should not call memset
+(p7)	mov	memset_arg2=D		// copy_from_user need to have kbuf zeroed
+	mov	r8=0
+	mov	saved_retval = D
+	mov	saved_rtlink = b0
+
+	add	out0=saved_in0, B
+	add	out1=saved_in1, B
+	mov	out2=C
+(p8)	br.call.sptk.few b0=__copy_user	// recursive call
+	;;
+
+	add	saved_retval=saved_retval,r8	// above might return non-zero value
+	cmp.gt	p8,p0=memset_arg2,r0	// more than 1 byte?
+	mov	out0=memset_arg0	// *s
+	mov	out1=r0			// c
+	mov	out2=memset_arg2	// n
+(p8)	br.call.sptk.few b0=memset
+	;;
+
+	mov	retval=saved_retval
+	mov	ar.pfs=saved_pfs_stack
+	mov	b0=saved_rtlink
+	br.ret.sptk.many rp
+
+/* end of McKinley specific optimization */
+END(__copy_user)
diff --git a/arch/ia64/lib/memset.S b/arch/ia64/lib/memset.S
new file mode 100644
index 00000000..f26c16ae
--- /dev/null
+++ b/arch/ia64/lib/memset.S
@@ -0,0 +1,362 @@
+/* Optimized version of the standard memset() function.
+
+   Copyright (c) 2002 Hewlett-Packard Co/CERN
+	Sverre Jarp <Sverre.Jarp@cern.ch>
+
+   Return: dest
+
+   Inputs:
+        in0:    dest
+        in1:    value
+        in2:    count
+
+   The algorithm is fairly straightforward: set byte by byte until we
+   we get to a 16B-aligned address, then loop on 128 B chunks using an
+   early store as prefetching, then loop on 32B chucks, then clear remaining
+   words, finally clear remaining bytes.
+   Since a stf.spill f0 can store 16B in one go, we use this instruction
+   to get peak speed when value = 0.  */
+
+#include <asm/asmmacro.h>
+#undef ret
+
+#define dest		in0
+#define value		in1
+#define	cnt		in2
+
+#define tmp		r31
+#define save_lc		r30
+#define ptr0		r29
+#define ptr1		r28
+#define ptr2		r27
+#define ptr3		r26
+#define ptr9 		r24
+#define	loopcnt		r23
+#define linecnt		r22
+#define bytecnt		r21
+
+#define fvalue		f6
+
+// This routine uses only scratch predicate registers (p6 - p15)
+#define p_scr		p6			// default register for same-cycle branches
+#define p_nz		p7
+#define p_zr		p8
+#define p_unalgn	p9
+#define p_y		p11
+#define p_n		p12
+#define p_yy		p13
+#define p_nn		p14
+
+#define MIN1		15
+#define MIN1P1HALF	8
+#define LINE_SIZE	128
+#define LSIZE_SH        7			// shift amount
+#define PREF_AHEAD	8
+
+GLOBAL_ENTRY(memset)
+{ .mmi
+	.prologue
+	alloc	tmp = ar.pfs, 3, 0, 0, 0
+	lfetch.nt1 [dest]			//
+	.save   ar.lc, save_lc
+	mov.i	save_lc = ar.lc
+	.body
+} { .mmi
+	mov	ret0 = dest			// return value
+	cmp.ne	p_nz, p_zr = value, r0		// use stf.spill if value is zero
+	cmp.eq	p_scr, p0 = cnt, r0
+;; }
+{ .mmi
+	and	ptr2 = -(MIN1+1), dest		// aligned address
+	and	tmp = MIN1, dest		// prepare to check for correct alignment
+	tbit.nz p_y, p_n = dest, 0		// Do we have an odd address? (M_B_U)
+} { .mib
+	mov	ptr1 = dest
+	mux1	value = value, @brcst		// create 8 identical bytes in word
+(p_scr)	br.ret.dpnt.many rp			// return immediately if count = 0
+;; }
+{ .mib
+	cmp.ne	p_unalgn, p0 = tmp, r0		//
+} { .mib
+	sub	bytecnt = (MIN1+1), tmp		// NB: # of bytes to move is 1 higher than loopcnt
+	cmp.gt	p_scr, p0 = 16, cnt		// is it a minimalistic task?
+(p_scr)	br.cond.dptk.many .move_bytes_unaligned	// go move just a few (M_B_U)
+;; }
+{ .mmi
+(p_unalgn) add	ptr1 = (MIN1+1), ptr2		// after alignment
+(p_unalgn) add	ptr2 = MIN1P1HALF, ptr2		// after alignment
+(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3	// should we do a st8 ?
+;; }
+{ .mib
+(p_y)	add	cnt = -8, cnt			//
+(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2	// should we do a st4 ?
+} { .mib
+(p_y)	st8	[ptr2] = value,-4		//
+(p_n)	add	ptr2 = 4, ptr2			//
+;; }
+{ .mib
+(p_yy)	add	cnt = -4, cnt			//
+(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1	// should we do a st2 ?
+} { .mib
+(p_yy)	st4	[ptr2] = value,-2		//
+(p_nn)	add	ptr2 = 2, ptr2			//
+;; }
+{ .mmi
+	mov	tmp = LINE_SIZE+1		// for compare
+(p_y)	add	cnt = -2, cnt			//
+(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0	// should we do a st1 ?
+} { .mmi
+	setf.sig fvalue=value			// transfer value to FLP side
+(p_y)	st2	[ptr2] = value,-1		//
+(p_n)	add	ptr2 = 1, ptr2			//
+;; }
+
+{ .mmi
+(p_yy)	st1	[ptr2] = value 			//
+  	cmp.gt	p_scr, p0 = tmp, cnt		// is it a minimalistic task?
+} { .mbb
+(p_yy)	add	cnt = -1, cnt			//
+(p_scr)	br.cond.dpnt.many .fraction_of_line	// go move just a few
+;; }
+
+{ .mib
+	nop.m 0
+	shr.u	linecnt = cnt, LSIZE_SH
+(p_zr)	br.cond.dptk.many .l1b			// Jump to use stf.spill
+;; }
+
+	TEXT_ALIGN(32) // --------------------- //  L1A: store ahead into cache lines; fill later
+{ .mmi
+	and	tmp = -(LINE_SIZE), cnt		// compute end of range
+	mov	ptr9 = ptr1			// used for prefetching
+	and	cnt = (LINE_SIZE-1), cnt	// remainder
+} { .mmi
+	mov	loopcnt = PREF_AHEAD-1		// default prefetch loop
+	cmp.gt	p_scr, p0 = PREF_AHEAD, linecnt	// check against actual value
+;; }
+{ .mmi
+(p_scr)	add	loopcnt = -1, linecnt		//
+	add	ptr2 = 8, ptr1			// start of stores (beyond prefetch stores)
+	add	ptr1 = tmp, ptr1		// first address beyond total range
+;; }
+{ .mmi
+	add	tmp = -1, linecnt		// next loop count
+	mov.i	ar.lc = loopcnt			//
+;; }
+.pref_l1a:
+{ .mib
+	stf8 [ptr9] = fvalue, 128		// Do stores one cache line apart
+	nop.i	0
+	br.cloop.dptk.few .pref_l1a
+;; }
+{ .mmi
+	add	ptr0 = 16, ptr2			// Two stores in parallel
+	mov.i	ar.lc = tmp			//
+;; }
+.l1ax:
+ { .mmi
+	stf8 [ptr2] = fvalue, 8
+	stf8 [ptr0] = fvalue, 8
+ ;; }
+ { .mmi
+	stf8 [ptr2] = fvalue, 24
+	stf8 [ptr0] = fvalue, 24
+ ;; }
+ { .mmi
+	stf8 [ptr2] = fvalue, 8
+	stf8 [ptr0] = fvalue, 8
+ ;; }
+ { .mmi
+	stf8 [ptr2] = fvalue, 24
+	stf8 [ptr0] = fvalue, 24
+ ;; }
+ { .mmi
+	stf8 [ptr2] = fvalue, 8
+	stf8 [ptr0] = fvalue, 8
+ ;; }
+ { .mmi
+	stf8 [ptr2] = fvalue, 24
+	stf8 [ptr0] = fvalue, 24
+ ;; }
+ { .mmi
+	stf8 [ptr2] = fvalue, 8
+	stf8 [ptr0] = fvalue, 32
+ 	cmp.lt	p_scr, p0 = ptr9, ptr1		// do we need more prefetching?
+ ;; }
+{ .mmb
+	stf8 [ptr2] = fvalue, 24
+(p_scr)	stf8 [ptr9] = fvalue, 128
+	br.cloop.dptk.few .l1ax
+;; }
+{ .mbb
+	cmp.le  p_scr, p0 = 8, cnt		// just a few bytes left ?
+(p_scr) br.cond.dpnt.many  .fraction_of_line	// Branch no. 2
+	br.cond.dpnt.many  .move_bytes_from_alignment	// Branch no. 3
+;; }
+
+	TEXT_ALIGN(32)
+.l1b:	// ------------------------------------ //  L1B: store ahead into cache lines; fill later
+{ .mmi
+	and	tmp = -(LINE_SIZE), cnt		// compute end of range
+	mov	ptr9 = ptr1			// used for prefetching
+	and	cnt = (LINE_SIZE-1), cnt	// remainder
+} { .mmi
+	mov	loopcnt = PREF_AHEAD-1		// default prefetch loop
+	cmp.gt	p_scr, p0 = PREF_AHEAD, linecnt	// check against actual value
+;; }
+{ .mmi
+(p_scr)	add	loopcnt = -1, linecnt
+	add	ptr2 = 16, ptr1			// start of stores (beyond prefetch stores)
+	add	ptr1 = tmp, ptr1		// first address beyond total range
+;; }
+{ .mmi
+	add	tmp = -1, linecnt		// next loop count
+	mov.i	ar.lc = loopcnt
+;; }
+.pref_l1b:
+{ .mib
+	stf.spill [ptr9] = f0, 128		// Do stores one cache line apart
+	nop.i   0
+	br.cloop.dptk.few .pref_l1b
+;; }
+{ .mmi
+	add	ptr0 = 16, ptr2			// Two stores in parallel
+	mov.i	ar.lc = tmp
+;; }
+.l1bx:
+ { .mmi
+	stf.spill [ptr2] = f0, 32
+	stf.spill [ptr0] = f0, 32
+ ;; }
+ { .mmi
+	stf.spill [ptr2] = f0, 32
+	stf.spill [ptr0] = f0, 32
+ ;; }
+ { .mmi
+	stf.spill [ptr2] = f0, 32
+	stf.spill [ptr0] = f0, 64
+ 	cmp.lt	p_scr, p0 = ptr9, ptr1		// do we need more prefetching?
+ ;; }
+{ .mmb
+	stf.spill [ptr2] = f0, 32
+(p_scr)	stf.spill [ptr9] = f0, 128
+	br.cloop.dptk.few .l1bx
+;; }
+{ .mib
+	cmp.gt  p_scr, p0 = 8, cnt		// just a few bytes left ?
+(p_scr)	br.cond.dpnt.many  .move_bytes_from_alignment	//
+;; }
+
+.fraction_of_line:
+{ .mib
+	add	ptr2 = 16, ptr1
+	shr.u	loopcnt = cnt, 5   		// loopcnt = cnt / 32
+;; }
+{ .mib
+	cmp.eq	p_scr, p0 = loopcnt, r0
+	add	loopcnt = -1, loopcnt
+(p_scr)	br.cond.dpnt.many .store_words
+;; }
+{ .mib
+	and	cnt = 0x1f, cnt			// compute the remaining cnt
+	mov.i   ar.lc = loopcnt
+;; }
+	TEXT_ALIGN(32)
+.l2:	// ------------------------------------ //  L2A:  store 32B in 2 cycles
+{ .mmb
+	stf8	[ptr1] = fvalue, 8
+	stf8	[ptr2] = fvalue, 8
+;; } { .mmb
+	stf8	[ptr1] = fvalue, 24
+	stf8	[ptr2] = fvalue, 24
+	br.cloop.dptk.many .l2
+;; }
+.store_words:
+{ .mib
+	cmp.gt	p_scr, p0 = 8, cnt		// just a few bytes left ?
+(p_scr)	br.cond.dpnt.many .move_bytes_from_alignment	// Branch
+;; }
+
+{ .mmi
+	stf8	[ptr1] = fvalue, 8		// store
+	cmp.le	p_y, p_n = 16, cnt
+	add	cnt = -8, cnt			// subtract
+;; }
+{ .mmi
+(p_y)	stf8	[ptr1] = fvalue, 8		// store
+(p_y)	cmp.le.unc p_yy, p_nn = 16, cnt
+(p_y)	add	cnt = -8, cnt			// subtract
+;; }
+{ .mmi						// store
+(p_yy)	stf8	[ptr1] = fvalue, 8
+(p_yy)	add	cnt = -8, cnt			// subtract
+;; }
+
+.move_bytes_from_alignment:
+{ .mib
+	cmp.eq	p_scr, p0 = cnt, r0
+	tbit.nz.unc p_y, p0 = cnt, 2		// should we terminate with a st4 ?
+(p_scr)	br.cond.dpnt.few .restore_and_exit
+;; }
+{ .mib
+(p_y)	st4	[ptr1] = value,4
+	tbit.nz.unc p_yy, p0 = cnt, 1		// should we terminate with a st2 ?
+;; }
+{ .mib
+(p_yy)	st2	[ptr1] = value,2
+	tbit.nz.unc p_y, p0 = cnt, 0		// should we terminate with a st1 ?
+;; }
+
+{ .mib
+(p_y)	st1	[ptr1] = value
+;; }
+.restore_and_exit:
+{ .mib
+	nop.m	0
+	mov.i	ar.lc = save_lc
+	br.ret.sptk.many rp
+;; }
+
+.move_bytes_unaligned:
+{ .mmi
+       .pred.rel "mutex",p_y, p_n
+       .pred.rel "mutex",p_yy, p_nn
+(p_n)	cmp.le  p_yy, p_nn = 4, cnt
+(p_y)	cmp.le  p_yy, p_nn = 5, cnt
+(p_n)	add	ptr2 = 2, ptr1
+} { .mmi
+(p_y)	add	ptr2 = 3, ptr1
+(p_y)	st1	[ptr1] = value, 1		// fill 1 (odd-aligned) byte [15, 14 (or less) left]
+(p_y)	add	cnt = -1, cnt
+;; }
+{ .mmi
+(p_yy)	cmp.le.unc p_y, p0 = 8, cnt
+	add	ptr3 = ptr1, cnt		// prepare last store
+	mov.i	ar.lc = save_lc
+} { .mmi
+(p_yy)	st2	[ptr1] = value, 4		// fill 2 (aligned) bytes
+(p_yy)	st2	[ptr2] = value, 4		// fill 2 (aligned) bytes [11, 10 (o less) left]
+(p_yy)	add	cnt = -4, cnt
+;; }
+{ .mmi
+(p_y)	cmp.le.unc p_yy, p0 = 8, cnt
+	add	ptr3 = -1, ptr3			// last store
+	tbit.nz p_scr, p0 = cnt, 1		// will there be a st2 at the end ?
+} { .mmi
+(p_y)	st2	[ptr1] = value, 4		// fill 2 (aligned) bytes
+(p_y)	st2	[ptr2] = value, 4		// fill 2 (aligned) bytes [7, 6 (or less) left]
+(p_y)	add	cnt = -4, cnt
+;; }
+{ .mmi
+(p_yy)	st2	[ptr1] = value, 4		// fill 2 (aligned) bytes
+(p_yy)	st2	[ptr2] = value, 4		// fill 2 (aligned) bytes [3, 2 (or less) left]
+	tbit.nz p_y, p0 = cnt, 0		// will there be a st1 at the end ?
+} { .mmi
+(p_yy)	add	cnt = -4, cnt
+;; }
+{ .mmb
+(p_scr)	st2	[ptr1] = value			// fill 2 (aligned) bytes
+(p_y)	st1	[ptr3] = value			// fill last byte (using ptr3)
+	br.ret.sptk.many rp
+}
+END(memset)
diff --git a/arch/ia64/lib/strlen.S b/arch/ia64/lib/strlen.S
new file mode 100644
index 00000000..e0cdac0a
--- /dev/null
+++ b/arch/ia64/lib/strlen.S
@@ -0,0 +1,192 @@
+/*
+ *
+ * Optimized version of the standard strlen() function
+ *
+ *
+ * Inputs:
+ *	in0	address of string
+ *
+ * Outputs:
+ *	ret0	the number of characters in the string (0 if empty string)
+ *	does not count the \0
+ *
+ * Copyright (C) 1999, 2001 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 09/24/99 S.Eranian add speculation recovery code
+ */
+
+#include <asm/asmmacro.h>
+
+//
+//
+// This is an enhanced version of the basic strlen. it includes a combination
+// of compute zero index (czx), parallel comparisons, speculative loads and
+// loop unroll using rotating registers.
+//
+// General Ideas about the algorithm:
+//	  The goal is to look at the string in chunks of 8 bytes.
+//	  so we need to do a few extra checks at the beginning because the
+//	  string may not be 8-byte aligned. In this case we load the 8byte
+//	  quantity which includes the start of the string and mask the unused
+//	  bytes with 0xff to avoid confusing czx.
+//	  We use speculative loads and software pipelining to hide memory
+//	  latency and do read ahead safely. This way we defer any exception.
+//
+//	  Because we don't want the kernel to be relying on particular
+//	  settings of the DCR register, we provide recovery code in case
+//	  speculation fails. The recovery code is going to "redo" the work using
+//	  only normal loads. If we still get a fault then we generate a
+//	  kernel panic. Otherwise we return the strlen as usual.
+//
+//	  The fact that speculation may fail can be caused, for instance, by
+//	  the DCR.dm bit being set. In this case TLB misses are deferred, i.e.,
+//	  a NaT bit will be set if the translation is not present. The normal
+//	  load, on the other hand, will cause the translation to be inserted
+//	  if the mapping exists.
+//
+//	  It should be noted that we execute recovery code only when we need
+//	  to use the data that has been speculatively loaded: we don't execute
+//	  recovery code on pure read ahead data.
+//
+// Remarks:
+//	- the cmp r0,r0 is used as a fast way to initialize a predicate
+//	  register to 1. This is required to make sure that we get the parallel
+//	  compare correct.
+//
+//	- we don't use the epilogue counter to exit the loop but we need to set
+//	  it to zero beforehand.
+//
+//	- after the loop we must test for Nat values because neither the
+//	  czx nor cmp instruction raise a NaT consumption fault. We must be
+//	  careful not to look too far for a Nat for which we don't care.
+//	  For instance we don't need to look at a NaT in val2 if the zero byte
+//	  was in val1.
+//
+//	- Clearly performance tuning is required.
+//
+//
+//
+#define saved_pfs	r11
+#define	tmp		r10
+#define base		r16
+#define orig		r17
+#define saved_pr	r18
+#define src		r19
+#define mask		r20
+#define val		r21
+#define val1		r22
+#define val2		r23
+
+GLOBAL_ENTRY(strlen)
+	.prologue
+	.save ar.pfs, saved_pfs
+	alloc saved_pfs=ar.pfs,11,0,0,8 // rotating must be multiple of 8
+
+	.rotr v[2], w[2]	// declares our 4 aliases
+
+	extr.u tmp=in0,0,3	// tmp=least significant 3 bits
+	mov orig=in0		// keep trackof initial byte address
+	dep src=0,in0,0,3	// src=8byte-aligned in0 address
+	.save pr, saved_pr
+	mov saved_pr=pr		// preserve predicates (rotation)
+	;;
+
+	.body
+
+	ld8 v[1]=[src],8	// must not speculate: can fail here
+	shl tmp=tmp,3		// multiply by 8bits/byte
+	mov mask=-1		// our mask
+	;;
+	ld8.s w[1]=[src],8	// speculatively load next
+	cmp.eq p6,p0=r0,r0	// sets p6 to true for cmp.and
+	sub tmp=64,tmp		// how many bits to shift our mask on the right
+	;;
+	shr.u	mask=mask,tmp	// zero enough bits to hold v[1] valuable part
+	mov ar.ec=r0		// clear epilogue counter (saved in ar.pfs)
+	;;
+	add base=-16,src	// keep track of aligned base
+	or v[1]=v[1],mask	// now we have a safe initial byte pattern
+	;;
+1:
+	ld8.s v[0]=[src],8	// speculatively load next
+	czx1.r val1=v[1]	// search 0 byte from right
+	czx1.r val2=w[1]	// search 0 byte from right following 8bytes
+	;;
+	ld8.s w[0]=[src],8	// speculatively load next to next
+	cmp.eq.and p6,p0=8,val1	// p6 = p6 and val1==8
+	cmp.eq.and p6,p0=8,val2	// p6 = p6 and mask==8
+(p6)	br.wtop.dptk 1b		// loop until p6 == 0
+	;;
+	//
+	// We must return try the recovery code iff
+	// val1_is_nat || (val1==8 && val2_is_nat)
+	//
+	// XXX Fixme
+	//	- there must be a better way of doing the test
+	//
+	cmp.eq  p8,p9=8,val1	// p6 = val1 had zero (disambiguate)
+	tnat.nz p6,p7=val1	// test NaT on val1
+(p6)	br.cond.spnt .recover	// jump to recovery if val1 is NaT
+	;;
+	//
+	// if we come here p7 is true, i.e., initialized for // cmp
+	//
+	cmp.eq.and  p7,p0=8,val1// val1==8?
+	tnat.nz.and p7,p0=val2	// test NaT if val2
+(p7)	br.cond.spnt .recover	// jump to recovery if val2 is NaT
+	;;
+(p8)	mov val1=val2		// the other test got us out of the loop
+(p8)	adds src=-16,src	// correct position when 3 ahead
+(p9)	adds src=-24,src	// correct position when 4 ahead
+	;;
+	sub ret0=src,orig	// distance from base
+	sub tmp=8,val1		// which byte in word
+	mov pr=saved_pr,0xffffffffffff0000
+	;;
+	sub ret0=ret0,tmp	// adjust
+	mov ar.pfs=saved_pfs	// because of ar.ec, restore no matter what
+	br.ret.sptk.many rp	// end of normal execution
+
+	//
+	// Outlined recovery code when speculation failed
+	//
+	// This time we don't use speculation and rely on the normal exception
+	// mechanism. that's why the loop is not as good as the previous one
+	// because read ahead is not possible
+	//
+	// IMPORTANT:
+	// Please note that in the case of strlen() as opposed to strlen_user()
+	// we don't use the exception mechanism, as this function is not
+	// supposed to fail. If that happens it means we have a bug and the
+	// code will cause of kernel fault.
+	//
+	// XXX Fixme
+	//	- today we restart from the beginning of the string instead
+	//	  of trying to continue where we left off.
+	//
+.recover:
+	ld8 val=[base],8	// will fail if unrecoverable fault
+	;;
+	or val=val,mask		// remask first bytes
+	cmp.eq p0,p6=r0,r0	// nullify first ld8 in loop
+	;;
+	//
+	// ar.ec is still zero here
+	//
+2:
+(p6)	ld8 val=[base],8	// will fail if unrecoverable fault
+	;;
+	czx1.r val1=val		// search 0 byte from right
+	;;
+	cmp.eq p6,p0=8,val1	// val1==8 ?
+(p6)	br.wtop.dptk 2b		// loop until p6 == 0
+	;;			// (avoid WAW on p63)
+	sub ret0=base,orig	// distance from base
+	sub tmp=8,val1
+	mov pr=saved_pr,0xffffffffffff0000
+	;;
+	sub ret0=ret0,tmp	// length=now - back -1
+	mov ar.pfs=saved_pfs	// because of ar.ec, restore no matter what
+	br.ret.sptk.many rp	// end of successful recovery code
+END(strlen)
diff --git a/arch/ia64/lib/strlen_user.S b/arch/ia64/lib/strlen_user.S
new file mode 100644
index 00000000..c71eded4
--- /dev/null
+++ b/arch/ia64/lib/strlen_user.S
@@ -0,0 +1,198 @@
+/*
+ * Optimized version of the strlen_user() function
+ *
+ * Inputs:
+ *	in0	address of buffer
+ *
+ * Outputs:
+ *	ret0	0 in case of fault, strlen(buffer)+1 otherwise
+ *
+ * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 01/19/99 S.Eranian heavily enhanced version (see details below)
+ * 09/24/99 S.Eranian added speculation recovery code
+ */
+
+#include <asm/asmmacro.h>
+
+//
+// int strlen_user(char *)
+// ------------------------
+// Returns:
+//	- length of string + 1
+//	- 0 in case an exception is raised
+//
+// This is an enhanced version of the basic strlen_user. it includes a
+// combination of compute zero index (czx), parallel comparisons, speculative
+// loads and loop unroll using rotating registers.
+//
+// General Ideas about the algorithm:
+//	  The goal is to look at the string in chunks of 8 bytes.
+//	  so we need to do a few extra checks at the beginning because the
+//	  string may not be 8-byte aligned. In this case we load the 8byte
+//	  quantity which includes the start of the string and mask the unused
+//	  bytes with 0xff to avoid confusing czx.
+//	  We use speculative loads and software pipelining to hide memory
+//	  latency and do read ahead safely. This way we defer any exception.
+//
+//	  Because we don't want the kernel to be relying on particular
+//	  settings of the DCR register, we provide recovery code in case
+//	  speculation fails. The recovery code is going to "redo" the work using
+//	  only normal loads. If we still get a fault then we return an
+//	  error (ret0=0). Otherwise we return the strlen+1 as usual.
+//	  The fact that speculation may fail can be caused, for instance, by
+//	  the DCR.dm bit being set. In this case TLB misses are deferred, i.e.,
+//	  a NaT bit will be set if the translation is not present. The normal
+//	  load, on the other hand, will cause the translation to be inserted
+//	  if the mapping exists.
+//
+//	  It should be noted that we execute recovery code only when we need
+//	  to use the data that has been speculatively loaded: we don't execute
+//	  recovery code on pure read ahead data.
+//
+// Remarks:
+//	- the cmp r0,r0 is used as a fast way to initialize a predicate
+//	  register to 1. This is required to make sure that we get the parallel
+//	  compare correct.
+//
+//	- we don't use the epilogue counter to exit the loop but we need to set
+//	  it to zero beforehand.
+//
+//	- after the loop we must test for Nat values because neither the
+//	  czx nor cmp instruction raise a NaT consumption fault. We must be
+//	  careful not to look too far for a Nat for which we don't care.
+//	  For instance we don't need to look at a NaT in val2 if the zero byte
+//	  was in val1.
+//
+//	- Clearly performance tuning is required.
+//
+
+#define saved_pfs	r11
+#define	tmp		r10
+#define base		r16
+#define orig		r17
+#define saved_pr	r18
+#define src		r19
+#define mask		r20
+#define val		r21
+#define val1		r22
+#define val2		r23
+
+GLOBAL_ENTRY(__strlen_user)
+	.prologue
+	.save ar.pfs, saved_pfs
+	alloc saved_pfs=ar.pfs,11,0,0,8
+
+	.rotr v[2], w[2]	// declares our 4 aliases
+
+	extr.u tmp=in0,0,3	// tmp=least significant 3 bits
+	mov orig=in0		// keep trackof initial byte address
+	dep src=0,in0,0,3	// src=8byte-aligned in0 address
+	.save pr, saved_pr
+	mov saved_pr=pr		// preserve predicates (rotation)
+	;;
+
+	.body
+
+	ld8.s v[1]=[src],8	// load the initial 8bytes (must speculate)
+	shl tmp=tmp,3		// multiply by 8bits/byte
+	mov mask=-1		// our mask
+	;;
+	ld8.s w[1]=[src],8	// load next 8 bytes in 2nd pipeline
+	cmp.eq p6,p0=r0,r0	// sets p6 (required because of // cmp.and)
+	sub tmp=64,tmp		// how many bits to shift our mask on the right
+	;;
+	shr.u	mask=mask,tmp	// zero enough bits to hold v[1] valuable part
+	mov ar.ec=r0		// clear epilogue counter (saved in ar.pfs)
+	;;
+	add base=-16,src	// keep track of aligned base
+	chk.s v[1], .recover	// if already NaT, then directly skip to recover
+	or v[1]=v[1],mask	// now we have a safe initial byte pattern
+	;;
+1:
+	ld8.s v[0]=[src],8	// speculatively load next
+	czx1.r val1=v[1]	// search 0 byte from right
+	czx1.r val2=w[1]	// search 0 byte from right following 8bytes
+	;;
+	ld8.s w[0]=[src],8	// speculatively load next to next
+	cmp.eq.and p6,p0=8,val1	// p6 = p6 and val1==8
+	cmp.eq.and p6,p0=8,val2	// p6 = p6 and mask==8
+(p6)	br.wtop.dptk.few 1b	// loop until p6 == 0
+	;;
+	//
+	// We must return try the recovery code iff
+	// val1_is_nat || (val1==8 && val2_is_nat)
+	//
+	// XXX Fixme
+	//	- there must be a better way of doing the test
+	//
+	cmp.eq  p8,p9=8,val1	// p6 = val1 had zero (disambiguate)
+	tnat.nz p6,p7=val1	// test NaT on val1
+(p6)	br.cond.spnt .recover	// jump to recovery if val1 is NaT
+	;;
+	//
+	// if we come here p7 is true, i.e., initialized for // cmp
+	//
+	cmp.eq.and  p7,p0=8,val1// val1==8?
+	tnat.nz.and p7,p0=val2	// test NaT if val2
+(p7)	br.cond.spnt .recover	// jump to recovery if val2 is NaT
+	;;
+(p8)	mov val1=val2		// val2 contains the value
+(p8)	adds src=-16,src	// correct position when 3 ahead
+(p9)	adds src=-24,src	// correct position when 4 ahead
+	;;
+	sub ret0=src,orig	// distance from origin
+	sub tmp=7,val1		// 7=8-1 because this strlen returns strlen+1
+	mov pr=saved_pr,0xffffffffffff0000
+	;;
+	sub ret0=ret0,tmp	// length=now - back -1
+	mov ar.pfs=saved_pfs	// because of ar.ec, restore no matter what
+	br.ret.sptk.many rp	// end of normal execution
+
+	//
+	// Outlined recovery code when speculation failed
+	//
+	// This time we don't use speculation and rely on the normal exception
+	// mechanism. that's why the loop is not as good as the previous one
+	// because read ahead is not possible
+	//
+	// XXX Fixme
+	//	- today we restart from the beginning of the string instead
+	//	  of trying to continue where we left off.
+	//
+.recover:
+	EX(.Lexit1, ld8 val=[base],8)	// load the initial bytes
+	;;
+	or val=val,mask			// remask first bytes
+	cmp.eq p0,p6=r0,r0		// nullify first ld8 in loop
+	;;
+	//
+	// ar.ec is still zero here
+	//
+2:
+	EX(.Lexit1, (p6) ld8 val=[base],8)
+	;;
+	czx1.r val1=val		// search 0 byte from right
+	;;
+	cmp.eq p6,p0=8,val1	// val1==8 ?
+(p6)	br.wtop.dptk.few 2b	// loop until p6 == 0
+	;;
+	sub ret0=base,orig	// distance from base
+	sub tmp=7,val1		// 7=8-1 because this strlen returns strlen+1
+	mov pr=saved_pr,0xffffffffffff0000
+	;;
+	sub ret0=ret0,tmp	// length=now - back -1
+	mov ar.pfs=saved_pfs	// because of ar.ec, restore no matter what
+	br.ret.sptk.many rp	// end of successful recovery code
+
+	//
+	// We failed even on the normal load (called from exception handler)
+	//
+.Lexit1:
+	mov ret0=0
+	mov pr=saved_pr,0xffffffffffff0000
+	mov ar.pfs=saved_pfs	// because of ar.ec, restore no matter what
+	br.ret.sptk.many rp
+END(__strlen_user)
diff --git a/arch/ia64/lib/strncpy_from_user.S b/arch/ia64/lib/strncpy_from_user.S
new file mode 100644
index 00000000..a504381f
--- /dev/null
+++ b/arch/ia64/lib/strncpy_from_user.S
@@ -0,0 +1,44 @@
+/*
+ * Just like strncpy() except that if a fault occurs during copying,
+ * -EFAULT is returned.
+ *
+ * Inputs:
+ *	in0:	address of destination buffer
+ *	in1:	address of string to be copied
+ *	in2:	length of buffer in bytes
+ * Outputs:
+ *	r8:	-EFAULT in case of fault or number of bytes copied if no fault
+ *
+ * Copyright (C) 1998-2001 Hewlett-Packard Co
+ * Copyright (C) 1998-2001 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 00/03/06 D. Mosberger Fixed to return proper return value (bug found by
+ *			 by Andreas Schwab <schwab@suse.de>).
+ */
+
+#include <asm/asmmacro.h>
+
+GLOBAL_ENTRY(__strncpy_from_user)
+	alloc r2=ar.pfs,3,0,0,0
+	mov r8=0
+	mov r9=in1
+	;;
+	add r10=in1,in2
+	cmp.eq p6,p0=r0,in2
+(p6)	br.ret.spnt.many rp
+
+	// XXX braindead copy loop---this needs to be optimized
+.Loop1:
+	EX(.Lexit, ld1 r8=[in1],1)
+	;;
+	EX(.Lexit, st1 [in0]=r8,1)
+	cmp.ne p6,p7=r8,r0
+	;;
+(p6)	cmp.ne.unc p8,p0=in1,r10
+(p8)	br.cond.dpnt.few .Loop1
+	;;
+(p6)	mov r8=in2		// buffer filled up---return buffer length
+(p7)	sub r8=in1,r9,1		// return string length (excluding NUL character)
+[.Lexit:]
+	br.ret.sptk.many rp
+END(__strncpy_from_user)
diff --git a/arch/ia64/lib/strnlen_user.S b/arch/ia64/lib/strnlen_user.S
new file mode 100644
index 00000000..d09066b1
--- /dev/null
+++ b/arch/ia64/lib/strnlen_user.S
@@ -0,0 +1,45 @@
+/*
+ * Returns 0 if exception before NUL or reaching the supplied limit (N),
+ * a value greater than N if the string is longer than the limit, else
+ * strlen.
+ *
+ * Inputs:
+ *	in0:	address of buffer
+ *	in1:	string length limit N
+ * Outputs:
+ *	r8:	0 in case of fault, strlen(buffer)+1 otherwise
+ *
+ * Copyright (C) 1999, 2001 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <asm/asmmacro.h>
+
+GLOBAL_ENTRY(__strnlen_user)
+	.prologue
+	alloc r2=ar.pfs,2,0,0,0
+	.save ar.lc, r16
+	mov r16=ar.lc			// preserve ar.lc
+
+	.body
+
+	add r3=-1,in1
+	;;
+	mov ar.lc=r3
+	mov r9=0
+	;;
+	// XXX braindead strlen loop---this needs to be optimized
+.Loop1:
+	EXCLR(.Lexit, ld1 r8=[in0],1)
+	add r9=1,r9
+	;;
+	cmp.eq p6,p0=r8,r0
+(p6)	br.cond.dpnt .Lexit
+	br.cloop.dptk.few .Loop1
+
+	add r9=1,in1			// NUL not found---return N+1
+	;;
+.Lexit:
+	mov r8=r9
+	mov ar.lc=r16			// restore ar.lc
+	br.ret.sptk.many rp
+END(__strnlen_user)
diff --git a/arch/ia64/lib/xor.S b/arch/ia64/lib/xor.S
new file mode 100644
index 00000000..54e3f7ea
--- /dev/null
+++ b/arch/ia64/lib/xor.S
@@ -0,0 +1,184 @@
+/*
+ * arch/ia64/lib/xor.S
+ *
+ * Optimized RAID-5 checksumming functions for IA-64.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * (for example /usr/src/linux/COPYING); if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <asm/asmmacro.h>
+
+GLOBAL_ENTRY(xor_ia64_2)
+	.prologue
+	.fframe 0
+	.save ar.pfs, r31
+	alloc r31 = ar.pfs, 3, 0, 13, 16
+	.save ar.lc, r30
+	mov r30 = ar.lc
+	.save pr, r29
+	mov r29 = pr
+	;;
+	.body
+	mov r8 = in1
+	mov ar.ec = 6 + 2
+	shr in0 = in0, 3
+	;;
+	adds in0 = -1, in0
+	mov r16 = in1
+	mov r17 = in2
+	;;
+	mov ar.lc = in0
+	mov pr.rot = 1 << 16
+	;;
+	.rotr s1[6+1], s2[6+1], d[2]
+	.rotp p[6+2]
+0:
+(p[0])	ld8.nta s1[0] = [r16], 8
+(p[0])	ld8.nta s2[0] = [r17], 8
+(p[6])	xor d[0] = s1[6], s2[6]
+(p[6+1])st8.nta [r8] = d[1], 8
+	nop.f 0
+	br.ctop.dptk.few 0b
+	;;
+	mov ar.lc = r30
+	mov pr = r29, -1
+	br.ret.sptk.few rp
+END(xor_ia64_2)
+
+GLOBAL_ENTRY(xor_ia64_3)
+	.prologue
+	.fframe 0
+	.save ar.pfs, r31
+	alloc r31 = ar.pfs, 4, 0, 20, 24
+	.save ar.lc, r30
+	mov r30 = ar.lc
+	.save pr, r29
+	mov r29 = pr
+	;;
+	.body
+	mov r8 = in1
+	mov ar.ec = 6 + 2
+	shr in0 = in0, 3
+	;;
+	adds in0 = -1, in0
+	mov r16 = in1
+	mov r17 = in2
+	;;
+	mov r18 = in3
+	mov ar.lc = in0
+	mov pr.rot = 1 << 16
+	;;
+	.rotr s1[6+1], s2[6+1], s3[6+1], d[2]
+	.rotp p[6+2]
+0:
+(p[0])	ld8.nta s1[0] = [r16], 8
+(p[0])	ld8.nta s2[0] = [r17], 8
+(p[6])	xor d[0] = s1[6], s2[6]
+	;;
+(p[0])	ld8.nta s3[0] = [r18], 8
+(p[6+1])st8.nta [r8] = d[1], 8
+(p[6])	xor d[0] = d[0], s3[6]
+	br.ctop.dptk.few 0b
+	;;
+	mov ar.lc = r30
+	mov pr = r29, -1
+	br.ret.sptk.few rp
+END(xor_ia64_3)
+
+GLOBAL_ENTRY(xor_ia64_4)
+	.prologue
+	.fframe 0
+	.save ar.pfs, r31
+	alloc r31 = ar.pfs, 5, 0, 27, 32
+	.save ar.lc, r30
+	mov r30 = ar.lc
+	.save pr, r29
+	mov r29 = pr
+	;;
+	.body
+	mov r8 = in1
+	mov ar.ec = 6 + 2
+	shr in0 = in0, 3
+	;;
+	adds in0 = -1, in0
+	mov r16 = in1
+	mov r17 = in2
+	;;
+	mov r18 = in3
+	mov ar.lc = in0
+	mov pr.rot = 1 << 16
+	mov r19 = in4
+	;;
+	.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
+	.rotp p[6+2]
+0:
+(p[0])	ld8.nta s1[0] = [r16], 8
+(p[0])	ld8.nta s2[0] = [r17], 8
+(p[6])	xor d[0] = s1[6], s2[6]
+(p[0])	ld8.nta s3[0] = [r18], 8
+(p[0])	ld8.nta s4[0] = [r19], 8
+(p[6])	xor r20 = s3[6], s4[6]
+	;;
+(p[6+1])st8.nta [r8] = d[1], 8
+(p[6])	xor d[0] = d[0], r20
+	br.ctop.dptk.few 0b
+	;;
+	mov ar.lc = r30
+	mov pr = r29, -1
+	br.ret.sptk.few rp
+END(xor_ia64_4)
+
+GLOBAL_ENTRY(xor_ia64_5)
+	.prologue
+	.fframe 0
+	.save ar.pfs, r31
+	alloc r31 = ar.pfs, 6, 0, 34, 40
+	.save ar.lc, r30
+	mov r30 = ar.lc
+	.save pr, r29
+	mov r29 = pr
+	;;
+	.body
+	mov r8 = in1
+	mov ar.ec = 6 + 2
+	shr in0 = in0, 3
+	;;
+	adds in0 = -1, in0
+	mov r16 = in1
+	mov r17 = in2
+	;;
+	mov r18 = in3
+	mov ar.lc = in0
+	mov pr.rot = 1 << 16
+	mov r19 = in4
+	mov r20 = in5
+	;;
+	.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
+	.rotp p[6+2]
+0:
+(p[0])	ld8.nta s1[0] = [r16], 8
+(p[0])	ld8.nta s2[0] = [r17], 8
+(p[6])	xor d[0] = s1[6], s2[6]
+(p[0])	ld8.nta s3[0] = [r18], 8
+(p[0])	ld8.nta s4[0] = [r19], 8
+(p[6])	xor r21 = s3[6], s4[6]
+	;;
+(p[0])	ld8.nta s5[0] = [r20], 8
+(p[6+1])st8.nta [r8] = d[1], 8
+(p[6])	xor d[0] = d[0], r21
+	;;
+(p[6])	  xor d[0] = d[0], s5[6]
+	nop.f 0
+	br.ctop.dptk.few 0b
+	;;
+	mov ar.lc = r30
+	mov pr = r29, -1
+	br.ret.sptk.few rp
+END(xor_ia64_5)
diff --git a/arch/ia64/mm/Makefile b/arch/ia64/mm/Makefile
new file mode 100644
index 00000000..bb0a01a8
--- /dev/null
+++ b/arch/ia64/mm/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile for the ia64-specific parts of the memory manager.
+#
+
+obj-y := init.o fault.o tlb.o extable.o ioremap.o
+
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+obj-$(CONFIG_NUMA)	   += numa.o
+obj-$(CONFIG_DISCONTIGMEM) += discontig.o
+obj-$(CONFIG_SPARSEMEM)	   += discontig.o
+obj-$(CONFIG_FLATMEM)	   += contig.o
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
new file mode 100644
index 00000000..f114a3b1
--- /dev/null
+++ b/arch/ia64/mm/contig.c
@@ -0,0 +1,355 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 2003 Silicon Graphics, Inc. All rights reserved.
+ *
+ * Routines used by ia64 machines with contiguous (or virtually contiguous)
+ * memory.
+ */
+#include <linux/bootmem.h>
+#include <linux/efi.h>
+#include <linux/mm.h>
+#include <linux/nmi.h>
+#include <linux/swap.h>
+
+#include <asm/meminit.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/sections.h>
+#include <asm/mca.h>
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+static unsigned long max_gap;
+#endif
+
+/**
+ * show_mem - give short summary of memory stats
+ *
+ * Shows a simple page count of reserved and used pages in the system.
+ * For discontig machines, it does this on a per-pgdat basis.
+ */
+void show_mem(unsigned int filter)
+{
+	int i, total_reserved = 0;
+	int total_shared = 0, total_cached = 0;
+	unsigned long total_present = 0;
+	pg_data_t *pgdat;
+
+	printk(KERN_INFO "Mem-info:\n");
+	show_free_areas(filter);
+	printk(KERN_INFO "Node memory in pages:\n");
+	for_each_online_pgdat(pgdat) {
+		unsigned long present;
+		unsigned long flags;
+		int shared = 0, cached = 0, reserved = 0;
+		int nid = pgdat->node_id;
+
+		if (skip_free_areas_node(filter, nid))
+			continue;
+		pgdat_resize_lock(pgdat, &flags);
+		present = pgdat->node_present_pages;
+		for(i = 0; i < pgdat->node_spanned_pages; i++) {
+			struct page *page;
+			if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
+				touch_nmi_watchdog();
+			if (pfn_valid(pgdat->node_start_pfn + i))
+				page = pfn_to_page(pgdat->node_start_pfn + i);
+			else {
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+				if (max_gap < LARGE_GAP)
+					continue;
+#endif
+				i = vmemmap_find_next_valid_pfn(nid, i) - 1;
+				continue;
+			}
+			if (PageReserved(page))
+				reserved++;
+			else if (PageSwapCache(page))
+				cached++;
+			else if (page_count(page))
+				shared += page_count(page)-1;
+		}
+		pgdat_resize_unlock(pgdat, &flags);
+		total_present += present;
+		total_reserved += reserved;
+		total_cached += cached;
+		total_shared += shared;
+		printk(KERN_INFO "Node %4d:  RAM: %11ld, rsvd: %8d, "
+		       "shrd: %10d, swpd: %10d\n", nid,
+		       present, reserved, shared, cached);
+	}
+	printk(KERN_INFO "%ld pages of RAM\n", total_present);
+	printk(KERN_INFO "%d reserved pages\n", total_reserved);
+	printk(KERN_INFO "%d pages shared\n", total_shared);
+	printk(KERN_INFO "%d pages swap cached\n", total_cached);
+	printk(KERN_INFO "Total of %ld pages in page table cache\n",
+	       quicklist_total_size());
+	printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages());
+}
+
+
+/* physical address where the bootmem map is located */
+unsigned long bootmap_start;
+
+/**
+ * find_bootmap_location - callback to find a memory area for the bootmap
+ * @start: start of region
+ * @end: end of region
+ * @arg: unused callback data
+ *
+ * Find a place to put the bootmap and return its starting address in
+ * bootmap_start.  This address must be page-aligned.
+ */
+static int __init
+find_bootmap_location (u64 start, u64 end, void *arg)
+{
+	u64 needed = *(unsigned long *)arg;
+	u64 range_start, range_end, free_start;
+	int i;
+
+#if IGNORE_PFN0
+	if (start == PAGE_OFFSET) {
+		start += PAGE_SIZE;
+		if (start >= end)
+			return 0;
+	}
+#endif
+
+	free_start = PAGE_OFFSET;
+
+	for (i = 0; i < num_rsvd_regions; i++) {
+		range_start = max(start, free_start);
+		range_end   = min(end, rsvd_region[i].start & PAGE_MASK);
+
+		free_start = PAGE_ALIGN(rsvd_region[i].end);
+
+		if (range_end <= range_start)
+			continue; /* skip over empty range */
+
+		if (range_end - range_start >= needed) {
+			bootmap_start = __pa(range_start);
+			return -1;	/* done */
+		}
+
+		/* nothing more available in this segment */
+		if (range_end == end)
+			return 0;
+	}
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+static void *cpu_data;
+/**
+ * per_cpu_init - setup per-cpu variables
+ *
+ * Allocate and setup per-cpu data areas.
+ */
+void * __cpuinit
+per_cpu_init (void)
+{
+	static bool first_time = true;
+	void *cpu0_data = __cpu0_per_cpu;
+	unsigned int cpu;
+
+	if (!first_time)
+		goto skip;
+	first_time = false;
+
+	/*
+	 * get_free_pages() cannot be used before cpu_init() done.
+	 * BSP allocates PERCPU_PAGE_SIZE bytes for all possible CPUs
+	 * to avoid that AP calls get_zeroed_page().
+	 */
+	for_each_possible_cpu(cpu) {
+		void *src = cpu == 0 ? cpu0_data : __phys_per_cpu_start;
+
+		memcpy(cpu_data, src, __per_cpu_end - __per_cpu_start);
+		__per_cpu_offset[cpu] = (char *)cpu_data - __per_cpu_start;
+		per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
+
+		/*
+		 * percpu area for cpu0 is moved from the __init area
+		 * which is setup by head.S and used till this point.
+		 * Update ar.k3.  This move is ensures that percpu
+		 * area for cpu0 is on the correct node and its
+		 * virtual address isn't insanely far from other
+		 * percpu areas which is important for congruent
+		 * percpu allocator.
+		 */
+		if (cpu == 0)
+			ia64_set_kr(IA64_KR_PER_CPU_DATA, __pa(cpu_data) -
+				    (unsigned long)__per_cpu_start);
+
+		cpu_data += PERCPU_PAGE_SIZE;
+	}
+skip:
+	return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
+}
+
+static inline void
+alloc_per_cpu_data(void)
+{
+	cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * num_possible_cpus(),
+				   PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+}
+
+/**
+ * setup_per_cpu_areas - setup percpu areas
+ *
+ * Arch code has already allocated and initialized percpu areas.  All
+ * this function has to do is to teach the determined layout to the
+ * dynamic percpu allocator, which happens to be more complex than
+ * creating whole new ones using helpers.
+ */
+void __init
+setup_per_cpu_areas(void)
+{
+	struct pcpu_alloc_info *ai;
+	struct pcpu_group_info *gi;
+	unsigned int cpu;
+	ssize_t static_size, reserved_size, dyn_size;
+	int rc;
+
+	ai = pcpu_alloc_alloc_info(1, num_possible_cpus());
+	if (!ai)
+		panic("failed to allocate pcpu_alloc_info");
+	gi = &ai->groups[0];
+
+	/* units are assigned consecutively to possible cpus */
+	for_each_possible_cpu(cpu)
+		gi->cpu_map[gi->nr_units++] = cpu;
+
+	/* set parameters */
+	static_size = __per_cpu_end - __per_cpu_start;
+	reserved_size = PERCPU_MODULE_RESERVE;
+	dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size;
+	if (dyn_size < 0)
+		panic("percpu area overflow static=%zd reserved=%zd\n",
+		      static_size, reserved_size);
+
+	ai->static_size		= static_size;
+	ai->reserved_size	= reserved_size;
+	ai->dyn_size		= dyn_size;
+	ai->unit_size		= PERCPU_PAGE_SIZE;
+	ai->atom_size		= PAGE_SIZE;
+	ai->alloc_size		= PERCPU_PAGE_SIZE;
+
+	rc = pcpu_setup_first_chunk(ai, __per_cpu_start + __per_cpu_offset[0]);
+	if (rc)
+		panic("failed to setup percpu area (err=%d)", rc);
+
+	pcpu_free_alloc_info(ai);
+}
+#else
+#define alloc_per_cpu_data() do { } while (0)
+#endif /* CONFIG_SMP */
+
+/**
+ * find_memory - setup memory map
+ *
+ * Walk the EFI memory map and find usable memory for the system, taking
+ * into account reserved areas.
+ */
+void __init
+find_memory (void)
+{
+	unsigned long bootmap_size;
+
+	reserve_memory();
+
+	/* first find highest page frame number */
+	min_low_pfn = ~0UL;
+	max_low_pfn = 0;
+	efi_memmap_walk(find_max_min_low_pfn, NULL);
+	max_pfn = max_low_pfn;
+	/* how many bytes to cover all the pages */
+	bootmap_size = bootmem_bootmap_pages(max_pfn) << PAGE_SHIFT;
+
+	/* look for a location to hold the bootmap */
+	bootmap_start = ~0UL;
+	efi_memmap_walk(find_bootmap_location, &bootmap_size);
+	if (bootmap_start == ~0UL)
+		panic("Cannot find %ld bytes for bootmap\n", bootmap_size);
+
+	bootmap_size = init_bootmem_node(NODE_DATA(0),
+			(bootmap_start >> PAGE_SHIFT), 0, max_pfn);
+
+	/* Free all available memory, then mark bootmem-map as being in use. */
+	efi_memmap_walk(filter_rsvd_memory, free_bootmem);
+	reserve_bootmem(bootmap_start, bootmap_size, BOOTMEM_DEFAULT);
+
+	find_initrd();
+
+	alloc_per_cpu_data();
+}
+
+static int count_pages(u64 start, u64 end, void *arg)
+{
+	unsigned long *count = arg;
+
+	*count += (end - start) >> PAGE_SHIFT;
+	return 0;
+}
+
+/*
+ * Set up the page tables.
+ */
+
+void __init
+paging_init (void)
+{
+	unsigned long max_dma;
+	unsigned long max_zone_pfns[MAX_NR_ZONES];
+
+	num_physpages = 0;
+	efi_memmap_walk(count_pages, &num_physpages);
+
+	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+#ifdef CONFIG_ZONE_DMA
+	max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+	max_zone_pfns[ZONE_DMA] = max_dma;
+#endif
+	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+	efi_memmap_walk(filter_memory, register_active_ranges);
+	efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
+	if (max_gap < LARGE_GAP) {
+		vmem_map = (struct page *) 0;
+		free_area_init_nodes(max_zone_pfns);
+	} else {
+		unsigned long map_size;
+
+		/* allocate virtual_mem_map */
+
+		map_size = PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) *
+			sizeof(struct page));
+		VMALLOC_END -= map_size;
+		vmem_map = (struct page *) VMALLOC_END;
+		efi_memmap_walk(create_mem_map_page_table, NULL);
+
+		/*
+		 * alloc_node_mem_map makes an adjustment for mem_map
+		 * which isn't compatible with vmem_map.
+		 */
+		NODE_DATA(0)->node_mem_map = vmem_map +
+			find_min_pfn_with_active_regions();
+		free_area_init_nodes(max_zone_pfns);
+
+		printk("Virtual mem_map starts at 0x%p\n", mem_map);
+	}
+#else /* !CONFIG_VIRTUAL_MEM_MAP */
+	add_active_range(0, 0, max_low_pfn);
+	free_area_init_nodes(max_zone_pfns);
+#endif /* !CONFIG_VIRTUAL_MEM_MAP */
+	zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
+}
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
new file mode 100644
index 00000000..c641333c
--- /dev/null
+++ b/arch/ia64/mm/discontig.c
@@ -0,0 +1,825 @@
+/*
+ * Copyright (c) 2000, 2003 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (c) 2001 Intel Corp.
+ * Copyright (c) 2001 Tony Luck <tony.luck@intel.com>
+ * Copyright (c) 2002 NEC Corp.
+ * Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com>
+ * Copyright (c) 2004 Silicon Graphics, Inc
+ *	Russ Anderson <rja@sgi.com>
+ *	Jesse Barnes <jbarnes@sgi.com>
+ *	Jack Steiner <steiner@sgi.com>
+ */
+
+/*
+ * Platform initialization for Discontig Memory
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/nmi.h>
+#include <linux/swap.h>
+#include <linux/bootmem.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+#include <linux/nodemask.h>
+#include <linux/slab.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/meminit.h>
+#include <asm/numa.h>
+#include <asm/sections.h>
+
+/*
+ * Track per-node information needed to setup the boot memory allocator, the
+ * per-node areas, and the real VM.
+ */
+struct early_node_data {
+	struct ia64_node_data *node_data;
+	unsigned long pernode_addr;
+	unsigned long pernode_size;
+	unsigned long num_physpages;
+#ifdef CONFIG_ZONE_DMA
+	unsigned long num_dma_physpages;
+#endif
+	unsigned long min_pfn;
+	unsigned long max_pfn;
+};
+
+static struct early_node_data mem_data[MAX_NUMNODES] __initdata;
+static nodemask_t memory_less_mask __initdata;
+
+pg_data_t *pgdat_list[MAX_NUMNODES];
+
+/*
+ * To prevent cache aliasing effects, align per-node structures so that they
+ * start at addresses that are strided by node number.
+ */
+#define MAX_NODE_ALIGN_OFFSET	(32 * 1024 * 1024)
+#define NODEDATA_ALIGN(addr, node)						\
+	((((addr) + 1024*1024-1) & ~(1024*1024-1)) + 				\
+	     (((node)*PERCPU_PAGE_SIZE) & (MAX_NODE_ALIGN_OFFSET - 1)))
+
+/**
+ * build_node_maps - callback to setup bootmem structs for each node
+ * @start: physical start of range
+ * @len: length of range
+ * @node: node where this range resides
+ *
+ * We allocate a struct bootmem_data for each piece of memory that we wish to
+ * treat as a virtually contiguous block (i.e. each node). Each such block
+ * must start on an %IA64_GRANULE_SIZE boundary, so we round the address down
+ * if necessary.  Any non-existent pages will simply be part of the virtual
+ * memmap.  We also update min_low_pfn and max_low_pfn here as we receive
+ * memory ranges from the caller.
+ */
+static int __init build_node_maps(unsigned long start, unsigned long len,
+				  int node)
+{
+	unsigned long spfn, epfn, end = start + len;
+	struct bootmem_data *bdp = &bootmem_node_data[node];
+
+	epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT;
+	spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT;
+
+	if (!bdp->node_low_pfn) {
+		bdp->node_min_pfn = spfn;
+		bdp->node_low_pfn = epfn;
+	} else {
+		bdp->node_min_pfn = min(spfn, bdp->node_min_pfn);
+		bdp->node_low_pfn = max(epfn, bdp->node_low_pfn);
+	}
+
+	return 0;
+}
+
+/**
+ * early_nr_cpus_node - return number of cpus on a given node
+ * @node: node to check
+ *
+ * Count the number of cpus on @node.  We can't use nr_cpus_node() yet because
+ * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been
+ * called yet.  Note that node 0 will also count all non-existent cpus.
+ */
+static int __meminit early_nr_cpus_node(int node)
+{
+	int cpu, n = 0;
+
+	for_each_possible_early_cpu(cpu)
+		if (node == node_cpuid[cpu].nid)
+			n++;
+
+	return n;
+}
+
+/**
+ * compute_pernodesize - compute size of pernode data
+ * @node: the node id.
+ */
+static unsigned long __meminit compute_pernodesize(int node)
+{
+	unsigned long pernodesize = 0, cpus;
+
+	cpus = early_nr_cpus_node(node);
+	pernodesize += PERCPU_PAGE_SIZE * cpus;
+	pernodesize += node * L1_CACHE_BYTES;
+	pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
+	pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
+	pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
+	pernodesize = PAGE_ALIGN(pernodesize);
+	return pernodesize;
+}
+
+/**
+ * per_cpu_node_setup - setup per-cpu areas on each node
+ * @cpu_data: per-cpu area on this node
+ * @node: node to setup
+ *
+ * Copy the static per-cpu data into the region we just set aside and then
+ * setup __per_cpu_offset for each CPU on this node.  Return a pointer to
+ * the end of the area.
+ */
+static void *per_cpu_node_setup(void *cpu_data, int node)
+{
+#ifdef CONFIG_SMP
+	int cpu;
+
+	for_each_possible_early_cpu(cpu) {
+		void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start;
+
+		if (node != node_cpuid[cpu].nid)
+			continue;
+
+		memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start);
+		__per_cpu_offset[cpu] = (char *)__va(cpu_data) -
+			__per_cpu_start;
+
+		/*
+		 * percpu area for cpu0 is moved from the __init area
+		 * which is setup by head.S and used till this point.
+		 * Update ar.k3.  This move is ensures that percpu
+		 * area for cpu0 is on the correct node and its
+		 * virtual address isn't insanely far from other
+		 * percpu areas which is important for congruent
+		 * percpu allocator.
+		 */
+		if (cpu == 0)
+			ia64_set_kr(IA64_KR_PER_CPU_DATA,
+				    (unsigned long)cpu_data -
+				    (unsigned long)__per_cpu_start);
+
+		cpu_data += PERCPU_PAGE_SIZE;
+	}
+#endif
+	return cpu_data;
+}
+
+#ifdef CONFIG_SMP
+/**
+ * setup_per_cpu_areas - setup percpu areas
+ *
+ * Arch code has already allocated and initialized percpu areas.  All
+ * this function has to do is to teach the determined layout to the
+ * dynamic percpu allocator, which happens to be more complex than
+ * creating whole new ones using helpers.
+ */
+void __init setup_per_cpu_areas(void)
+{
+	struct pcpu_alloc_info *ai;
+	struct pcpu_group_info *uninitialized_var(gi);
+	unsigned int *cpu_map;
+	void *base;
+	unsigned long base_offset;
+	unsigned int cpu;
+	ssize_t static_size, reserved_size, dyn_size;
+	int node, prev_node, unit, nr_units, rc;
+
+	ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids);
+	if (!ai)
+		panic("failed to allocate pcpu_alloc_info");
+	cpu_map = ai->groups[0].cpu_map;
+
+	/* determine base */
+	base = (void *)ULONG_MAX;
+	for_each_possible_cpu(cpu)
+		base = min(base,
+			   (void *)(__per_cpu_offset[cpu] + __per_cpu_start));
+	base_offset = (void *)__per_cpu_start - base;
+
+	/* build cpu_map, units are grouped by node */
+	unit = 0;
+	for_each_node(node)
+		for_each_possible_cpu(cpu)
+			if (node == node_cpuid[cpu].nid)
+				cpu_map[unit++] = cpu;
+	nr_units = unit;
+
+	/* set basic parameters */
+	static_size = __per_cpu_end - __per_cpu_start;
+	reserved_size = PERCPU_MODULE_RESERVE;
+	dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size;
+	if (dyn_size < 0)
+		panic("percpu area overflow static=%zd reserved=%zd\n",
+		      static_size, reserved_size);
+
+	ai->static_size		= static_size;
+	ai->reserved_size	= reserved_size;
+	ai->dyn_size		= dyn_size;
+	ai->unit_size		= PERCPU_PAGE_SIZE;
+	ai->atom_size		= PAGE_SIZE;
+	ai->alloc_size		= PERCPU_PAGE_SIZE;
+
+	/*
+	 * CPUs are put into groups according to node.  Walk cpu_map
+	 * and create new groups at node boundaries.
+	 */
+	prev_node = -1;
+	ai->nr_groups = 0;
+	for (unit = 0; unit < nr_units; unit++) {
+		cpu = cpu_map[unit];
+		node = node_cpuid[cpu].nid;
+
+		if (node == prev_node) {
+			gi->nr_units++;
+			continue;
+		}
+		prev_node = node;
+
+		gi = &ai->groups[ai->nr_groups++];
+		gi->nr_units		= 1;
+		gi->base_offset		= __per_cpu_offset[cpu] + base_offset;
+		gi->cpu_map		= &cpu_map[unit];
+	}
+
+	rc = pcpu_setup_first_chunk(ai, base);
+	if (rc)
+		panic("failed to setup percpu area (err=%d)", rc);
+
+	pcpu_free_alloc_info(ai);
+}
+#endif
+
+/**
+ * fill_pernode - initialize pernode data.
+ * @node: the node id.
+ * @pernode: physical address of pernode data
+ * @pernodesize: size of the pernode data
+ */
+static void __init fill_pernode(int node, unsigned long pernode,
+	unsigned long pernodesize)
+{
+	void *cpu_data;
+	int cpus = early_nr_cpus_node(node);
+	struct bootmem_data *bdp = &bootmem_node_data[node];
+
+	mem_data[node].pernode_addr = pernode;
+	mem_data[node].pernode_size = pernodesize;
+	memset(__va(pernode), 0, pernodesize);
+
+	cpu_data = (void *)pernode;
+	pernode += PERCPU_PAGE_SIZE * cpus;
+	pernode += node * L1_CACHE_BYTES;
+
+	pgdat_list[node] = __va(pernode);
+	pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
+
+	mem_data[node].node_data = __va(pernode);
+	pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
+
+	pgdat_list[node]->bdata = bdp;
+	pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
+
+	cpu_data = per_cpu_node_setup(cpu_data, node);
+
+	return;
+}
+
+/**
+ * find_pernode_space - allocate memory for memory map and per-node structures
+ * @start: physical start of range
+ * @len: length of range
+ * @node: node where this range resides
+ *
+ * This routine reserves space for the per-cpu data struct, the list of
+ * pg_data_ts and the per-node data struct.  Each node will have something like
+ * the following in the first chunk of addr. space large enough to hold it.
+ *
+ *    ________________________
+ *   |                        |
+ *   |~~~~~~~~~~~~~~~~~~~~~~~~| <-- NODEDATA_ALIGN(start, node) for the first
+ *   |    PERCPU_PAGE_SIZE *  |     start and length big enough
+ *   |    cpus_on_this_node   | Node 0 will also have entries for all non-existent cpus.
+ *   |------------------------|
+ *   |   local pg_data_t *    |
+ *   |------------------------|
+ *   |  local ia64_node_data  |
+ *   |------------------------|
+ *   |          ???           |
+ *   |________________________|
+ *
+ * Once this space has been set aside, the bootmem maps are initialized.  We
+ * could probably move the allocation of the per-cpu and ia64_node_data space
+ * outside of this function and use alloc_bootmem_node(), but doing it here
+ * is straightforward and we get the alignments we want so...
+ */
+static int __init find_pernode_space(unsigned long start, unsigned long len,
+				     int node)
+{
+	unsigned long spfn, epfn;
+	unsigned long pernodesize = 0, pernode, pages, mapsize;
+	struct bootmem_data *bdp = &bootmem_node_data[node];
+
+	spfn = start >> PAGE_SHIFT;
+	epfn = (start + len) >> PAGE_SHIFT;
+
+	pages = bdp->node_low_pfn - bdp->node_min_pfn;
+	mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
+
+	/*
+	 * Make sure this memory falls within this node's usable memory
+	 * since we may have thrown some away in build_maps().
+	 */
+	if (spfn < bdp->node_min_pfn || epfn > bdp->node_low_pfn)
+		return 0;
+
+	/* Don't setup this node's local space twice... */
+	if (mem_data[node].pernode_addr)
+		return 0;
+
+	/*
+	 * Calculate total size needed, incl. what's necessary
+	 * for good alignment and alias prevention.
+	 */
+	pernodesize = compute_pernodesize(node);
+	pernode = NODEDATA_ALIGN(start, node);
+
+	/* Is this range big enough for what we want to store here? */
+	if (start + len > (pernode + pernodesize + mapsize))
+		fill_pernode(node, pernode, pernodesize);
+
+	return 0;
+}
+
+/**
+ * free_node_bootmem - free bootmem allocator memory for use
+ * @start: physical start of range
+ * @len: length of range
+ * @node: node where this range resides
+ *
+ * Simply calls the bootmem allocator to free the specified ranged from
+ * the given pg_data_t's bdata struct.  After this function has been called
+ * for all the entries in the EFI memory map, the bootmem allocator will
+ * be ready to service allocation requests.
+ */
+static int __init free_node_bootmem(unsigned long start, unsigned long len,
+				    int node)
+{
+	free_bootmem_node(pgdat_list[node], start, len);
+
+	return 0;
+}
+
+/**
+ * reserve_pernode_space - reserve memory for per-node space
+ *
+ * Reserve the space used by the bootmem maps & per-node space in the boot
+ * allocator so that when we actually create the real mem maps we don't
+ * use their memory.
+ */
+static void __init reserve_pernode_space(void)
+{
+	unsigned long base, size, pages;
+	struct bootmem_data *bdp;
+	int node;
+
+	for_each_online_node(node) {
+		pg_data_t *pdp = pgdat_list[node];
+
+		if (node_isset(node, memory_less_mask))
+			continue;
+
+		bdp = pdp->bdata;
+
+		/* First the bootmem_map itself */
+		pages = bdp->node_low_pfn - bdp->node_min_pfn;
+		size = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
+		base = __pa(bdp->node_bootmem_map);
+		reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT);
+
+		/* Now the per-node space */
+		size = mem_data[node].pernode_size;
+		base = __pa(mem_data[node].pernode_addr);
+		reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT);
+	}
+}
+
+static void __meminit scatter_node_data(void)
+{
+	pg_data_t **dst;
+	int node;
+
+	/*
+	 * for_each_online_node() can't be used at here.
+	 * node_online_map is not set for hot-added nodes at this time,
+	 * because we are halfway through initialization of the new node's
+	 * structures.  If for_each_online_node() is used, a new node's
+	 * pg_data_ptrs will be not initialized. Instead of using it,
+	 * pgdat_list[] is checked.
+	 */
+	for_each_node(node) {
+		if (pgdat_list[node]) {
+			dst = LOCAL_DATA_ADDR(pgdat_list[node])->pg_data_ptrs;
+			memcpy(dst, pgdat_list, sizeof(pgdat_list));
+		}
+	}
+}
+
+/**
+ * initialize_pernode_data - fixup per-cpu & per-node pointers
+ *
+ * Each node's per-node area has a copy of the global pg_data_t list, so
+ * we copy that to each node here, as well as setting the per-cpu pointer
+ * to the local node data structure.  The active_cpus field of the per-node
+ * structure gets setup by the platform_cpu_init() function later.
+ */
+static void __init initialize_pernode_data(void)
+{
+	int cpu, node;
+
+	scatter_node_data();
+
+#ifdef CONFIG_SMP
+	/* Set the node_data pointer for each per-cpu struct */
+	for_each_possible_early_cpu(cpu) {
+		node = node_cpuid[cpu].nid;
+		per_cpu(ia64_cpu_info, cpu).node_data =
+			mem_data[node].node_data;
+	}
+#else
+	{
+		struct cpuinfo_ia64 *cpu0_cpu_info;
+		cpu = 0;
+		node = node_cpuid[cpu].nid;
+		cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start +
+			((char *)&ia64_cpu_info - __per_cpu_start));
+		cpu0_cpu_info->node_data = mem_data[node].node_data;
+	}
+#endif /* CONFIG_SMP */
+}
+
+/**
+ * memory_less_node_alloc - * attempt to allocate memory on the best NUMA slit
+ * 	node but fall back to any other node when __alloc_bootmem_node fails
+ *	for best.
+ * @nid: node id
+ * @pernodesize: size of this node's pernode data
+ */
+static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize)
+{
+	void *ptr = NULL;
+	u8 best = 0xff;
+	int bestnode = -1, node, anynode = 0;
+
+	for_each_online_node(node) {
+		if (node_isset(node, memory_less_mask))
+			continue;
+		else if (node_distance(nid, node) < best) {
+			best = node_distance(nid, node);
+			bestnode = node;
+		}
+		anynode = node;
+	}
+
+	if (bestnode == -1)
+		bestnode = anynode;
+
+	ptr = __alloc_bootmem_node(pgdat_list[bestnode], pernodesize,
+		PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+
+	return ptr;
+}
+
+/**
+ * memory_less_nodes - allocate and initialize CPU only nodes pernode
+ *	information.
+ */
+static void __init memory_less_nodes(void)
+{
+	unsigned long pernodesize;
+	void *pernode;
+	int node;
+
+	for_each_node_mask(node, memory_less_mask) {
+		pernodesize = compute_pernodesize(node);
+		pernode = memory_less_node_alloc(node, pernodesize);
+		fill_pernode(node, __pa(pernode), pernodesize);
+	}
+
+	return;
+}
+
+/**
+ * find_memory - walk the EFI memory map and setup the bootmem allocator
+ *
+ * Called early in boot to setup the bootmem allocator, and to
+ * allocate the per-cpu and per-node structures.
+ */
+void __init find_memory(void)
+{
+	int node;
+
+	reserve_memory();
+
+	if (num_online_nodes() == 0) {
+		printk(KERN_ERR "node info missing!\n");
+		node_set_online(0);
+	}
+
+	nodes_or(memory_less_mask, memory_less_mask, node_online_map);
+	min_low_pfn = -1;
+	max_low_pfn = 0;
+
+	/* These actually end up getting called by call_pernode_memory() */
+	efi_memmap_walk(filter_rsvd_memory, build_node_maps);
+	efi_memmap_walk(filter_rsvd_memory, find_pernode_space);
+	efi_memmap_walk(find_max_min_low_pfn, NULL);
+
+	for_each_online_node(node)
+		if (bootmem_node_data[node].node_low_pfn) {
+			node_clear(node, memory_less_mask);
+			mem_data[node].min_pfn = ~0UL;
+		}
+
+	efi_memmap_walk(filter_memory, register_active_ranges);
+
+	/*
+	 * Initialize the boot memory maps in reverse order since that's
+	 * what the bootmem allocator expects
+	 */
+	for (node = MAX_NUMNODES - 1; node >= 0; node--) {
+		unsigned long pernode, pernodesize, map;
+		struct bootmem_data *bdp;
+
+		if (!node_online(node))
+			continue;
+		else if (node_isset(node, memory_less_mask))
+			continue;
+
+		bdp = &bootmem_node_data[node];
+		pernode = mem_data[node].pernode_addr;
+		pernodesize = mem_data[node].pernode_size;
+		map = pernode + pernodesize;
+
+		init_bootmem_node(pgdat_list[node],
+				  map>>PAGE_SHIFT,
+				  bdp->node_min_pfn,
+				  bdp->node_low_pfn);
+	}
+
+	efi_memmap_walk(filter_rsvd_memory, free_node_bootmem);
+
+	reserve_pernode_space();
+	memory_less_nodes();
+	initialize_pernode_data();
+
+	max_pfn = max_low_pfn;
+
+	find_initrd();
+}
+
+#ifdef CONFIG_SMP
+/**
+ * per_cpu_init - setup per-cpu variables
+ *
+ * find_pernode_space() does most of this already, we just need to set
+ * local_per_cpu_offset
+ */
+void __cpuinit *per_cpu_init(void)
+{
+	int cpu;
+	static int first_time = 1;
+
+	if (first_time) {
+		first_time = 0;
+		for_each_possible_early_cpu(cpu)
+			per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
+	}
+
+	return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
+}
+#endif /* CONFIG_SMP */
+
+/**
+ * show_mem - give short summary of memory stats
+ *
+ * Shows a simple page count of reserved and used pages in the system.
+ * For discontig machines, it does this on a per-pgdat basis.
+ */
+void show_mem(unsigned int filter)
+{
+	int i, total_reserved = 0;
+	int total_shared = 0, total_cached = 0;
+	unsigned long total_present = 0;
+	pg_data_t *pgdat;
+
+	printk(KERN_INFO "Mem-info:\n");
+	show_free_areas(filter);
+	printk(KERN_INFO "Node memory in pages:\n");
+	for_each_online_pgdat(pgdat) {
+		unsigned long present;
+		unsigned long flags;
+		int shared = 0, cached = 0, reserved = 0;
+		int nid = pgdat->node_id;
+
+		if (skip_free_areas_node(filter, nid))
+			continue;
+		pgdat_resize_lock(pgdat, &flags);
+		present = pgdat->node_present_pages;
+		for(i = 0; i < pgdat->node_spanned_pages; i++) {
+			struct page *page;
+			if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
+				touch_nmi_watchdog();
+			if (pfn_valid(pgdat->node_start_pfn + i))
+				page = pfn_to_page(pgdat->node_start_pfn + i);
+			else {
+				i = vmemmap_find_next_valid_pfn(nid, i) - 1;
+				continue;
+			}
+			if (PageReserved(page))
+				reserved++;
+			else if (PageSwapCache(page))
+				cached++;
+			else if (page_count(page))
+				shared += page_count(page)-1;
+		}
+		pgdat_resize_unlock(pgdat, &flags);
+		total_present += present;
+		total_reserved += reserved;
+		total_cached += cached;
+		total_shared += shared;
+		printk(KERN_INFO "Node %4d:  RAM: %11ld, rsvd: %8d, "
+		       "shrd: %10d, swpd: %10d\n", nid,
+		       present, reserved, shared, cached);
+	}
+	printk(KERN_INFO "%ld pages of RAM\n", total_present);
+	printk(KERN_INFO "%d reserved pages\n", total_reserved);
+	printk(KERN_INFO "%d pages shared\n", total_shared);
+	printk(KERN_INFO "%d pages swap cached\n", total_cached);
+	printk(KERN_INFO "Total of %ld pages in page table cache\n",
+	       quicklist_total_size());
+	printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages());
+}
+
+/**
+ * call_pernode_memory - use SRAT to call callback functions with node info
+ * @start: physical start of range
+ * @len: length of range
+ * @arg: function to call for each range
+ *
+ * efi_memmap_walk() knows nothing about layout of memory across nodes. Find
+ * out to which node a block of memory belongs.  Ignore memory that we cannot
+ * identify, and split blocks that run across multiple nodes.
+ *
+ * Take this opportunity to round the start address up and the end address
+ * down to page boundaries.
+ */
+void call_pernode_memory(unsigned long start, unsigned long len, void *arg)
+{
+	unsigned long rs, re, end = start + len;
+	void (*func)(unsigned long, unsigned long, int);
+	int i;
+
+	start = PAGE_ALIGN(start);
+	end &= PAGE_MASK;
+	if (start >= end)
+		return;
+
+	func = arg;
+
+	if (!num_node_memblks) {
+		/* No SRAT table, so assume one node (node 0) */
+		if (start < end)
+			(*func)(start, end - start, 0);
+		return;
+	}
+
+	for (i = 0; i < num_node_memblks; i++) {
+		rs = max(start, node_memblk[i].start_paddr);
+		re = min(end, node_memblk[i].start_paddr +
+			 node_memblk[i].size);
+
+		if (rs < re)
+			(*func)(rs, re - rs, node_memblk[i].nid);
+
+		if (re == end)
+			break;
+	}
+}
+
+/**
+ * count_node_pages - callback to build per-node memory info structures
+ * @start: physical start of range
+ * @len: length of range
+ * @node: node where this range resides
+ *
+ * Each node has it's own number of physical pages, DMAable pages, start, and
+ * end page frame number.  This routine will be called by call_pernode_memory()
+ * for each piece of usable memory and will setup these values for each node.
+ * Very similar to build_maps().
+ */
+static __init int count_node_pages(unsigned long start, unsigned long len, int node)
+{
+	unsigned long end = start + len;
+
+	mem_data[node].num_physpages += len >> PAGE_SHIFT;
+#ifdef CONFIG_ZONE_DMA
+	if (start <= __pa(MAX_DMA_ADDRESS))
+		mem_data[node].num_dma_physpages +=
+			(min(end, __pa(MAX_DMA_ADDRESS)) - start) >>PAGE_SHIFT;
+#endif
+	start = GRANULEROUNDDOWN(start);
+	end = GRANULEROUNDUP(end);
+	mem_data[node].max_pfn = max(mem_data[node].max_pfn,
+				     end >> PAGE_SHIFT);
+	mem_data[node].min_pfn = min(mem_data[node].min_pfn,
+				     start >> PAGE_SHIFT);
+
+	return 0;
+}
+
+/**
+ * paging_init - setup page tables
+ *
+ * paging_init() sets up the page tables for each node of the system and frees
+ * the bootmem allocator memory for general use.
+ */
+void __init paging_init(void)
+{
+	unsigned long max_dma;
+	unsigned long pfn_offset = 0;
+	unsigned long max_pfn = 0;
+	int node;
+	unsigned long max_zone_pfns[MAX_NR_ZONES];
+
+	max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+
+	efi_memmap_walk(filter_rsvd_memory, count_node_pages);
+
+	sparse_memory_present_with_active_regions(MAX_NUMNODES);
+	sparse_init();
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+	VMALLOC_END -= PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) *
+		sizeof(struct page));
+	vmem_map = (struct page *) VMALLOC_END;
+	efi_memmap_walk(create_mem_map_page_table, NULL);
+	printk("Virtual mem_map starts at 0x%p\n", vmem_map);
+#endif
+
+	for_each_online_node(node) {
+		num_physpages += mem_data[node].num_physpages;
+		pfn_offset = mem_data[node].min_pfn;
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+		NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset;
+#endif
+		if (mem_data[node].max_pfn > max_pfn)
+			max_pfn = mem_data[node].max_pfn;
+	}
+
+	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+#ifdef CONFIG_ZONE_DMA
+	max_zone_pfns[ZONE_DMA] = max_dma;
+#endif
+	max_zone_pfns[ZONE_NORMAL] = max_pfn;
+	free_area_init_nodes(max_zone_pfns);
+
+	zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+pg_data_t *arch_alloc_nodedata(int nid)
+{
+	unsigned long size = compute_pernodesize(nid);
+
+	return kzalloc(size, GFP_KERNEL);
+}
+
+void arch_free_nodedata(pg_data_t *pgdat)
+{
+	kfree(pgdat);
+}
+
+void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
+{
+	pgdat_list[update_node] = update_pgdat;
+	scatter_node_data();
+}
+#endif
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+int __meminit vmemmap_populate(struct page *start_page,
+						unsigned long size, int node)
+{
+	return vmemmap_populate_basepages(start_page, size, node);
+}
+#endif
diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c
new file mode 100644
index 00000000..c99a41e2
--- /dev/null
+++ b/arch/ia64/mm/extable.c
@@ -0,0 +1,115 @@
+/*
+ * Kernel exception handling table support.  Derived from arch/alpha/mm/extable.c.
+ *
+ * Copyright (C) 1998, 1999, 2001-2002, 2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/sort.h>
+
+#include <asm/uaccess.h>
+#include <linux/module.h>
+
+static int cmp_ex(const void *a, const void *b)
+{
+	const struct exception_table_entry *l = a, *r = b;
+	u64 lip = (u64) &l->addr + l->addr;
+	u64 rip = (u64) &r->addr + r->addr;
+
+	/* avoid overflow */
+	if (lip > rip)
+		return 1;
+	if (lip < rip)
+		return -1;
+	return 0;
+}
+
+static void swap_ex(void *a, void *b, int size)
+{
+	struct exception_table_entry *l = a, *r = b, tmp;
+	u64 delta = (u64) r - (u64) l;
+
+	tmp = *l;
+	l->addr = r->addr + delta;
+	l->cont = r->cont + delta;
+	r->addr = tmp.addr - delta;
+	r->cont = tmp.cont - delta;
+}
+
+/*
+ * Sort the exception table. It's usually already sorted, but there
+ * may be unordered entries due to multiple text sections (such as the
+ * .init text section). Note that the exception-table-entries contain
+ * location-relative addresses, which requires a bit of care during
+ * sorting to avoid overflows in the offset members (e.g., it would
+ * not be safe to make a temporary copy of an exception-table entry on
+ * the stack, because the stack may be more than 2GB away from the
+ * exception-table).
+ */
+void sort_extable (struct exception_table_entry *start,
+		   struct exception_table_entry *finish)
+{
+	sort(start, finish - start, sizeof(struct exception_table_entry),
+	     cmp_ex, swap_ex);
+}
+
+static inline unsigned long ex_to_addr(const struct exception_table_entry *x)
+{
+	return (unsigned long)&x->addr + x->addr;
+}
+
+#ifdef CONFIG_MODULES
+/*
+ * Any entry referring to the module init will be at the beginning or
+ * the end.
+ */
+void trim_init_extable(struct module *m)
+{
+	/*trim the beginning*/
+	while (m->num_exentries &&
+	       within_module_init(ex_to_addr(&m->extable[0]), m)) {
+		m->extable++;
+		m->num_exentries--;
+	}
+	/*trim the end*/
+	while (m->num_exentries &&
+	       within_module_init(ex_to_addr(&m->extable[m->num_exentries-1]),
+				  m))
+		m->num_exentries--;
+}
+#endif /* CONFIG_MODULES */
+
+const struct exception_table_entry *
+search_extable (const struct exception_table_entry *first,
+		const struct exception_table_entry *last,
+		unsigned long ip)
+{
+	const struct exception_table_entry *mid;
+	unsigned long mid_ip;
+	long diff;
+
+        while (first <= last) {
+		mid = &first[(last - first)/2];
+		mid_ip = (u64) &mid->addr + mid->addr;
+		diff = mid_ip - ip;
+                if (diff == 0)
+                        return mid;
+                else if (diff < 0)
+                        first = mid + 1;
+                else
+                        last = mid - 1;
+        }
+        return NULL;
+}
+
+void
+ia64_handle_exception (struct pt_regs *regs, const struct exception_table_entry *e)
+{
+	long fix = (u64) &e->cont + e->cont;
+
+	regs->r8 = -EFAULT;
+	if (fix & 4)
+		regs->r9 = 0;
+	regs->cr_iip = fix & ~0xf;
+	ia64_psr(regs)->ri = fix & 0x3;		/* set continuation slot number */
+}
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
new file mode 100644
index 00000000..20b35937
--- /dev/null
+++ b/arch/ia64/mm/fault.c
@@ -0,0 +1,282 @@
+/*
+ * MMU fault handling support.
+ *
+ * Copyright (C) 1998-2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/prefetch.h>
+
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+extern int die(char *, struct pt_regs *, long);
+
+#ifdef CONFIG_KPROBES
+static inline int notify_page_fault(struct pt_regs *regs, int trap)
+{
+	int ret = 0;
+
+	if (!user_mode(regs)) {
+		/* kprobe_running() needs smp_processor_id() */
+		preempt_disable();
+		if (kprobe_running() && kprobe_fault_handler(regs, trap))
+			ret = 1;
+		preempt_enable();
+	}
+
+	return ret;
+}
+#else
+static inline int notify_page_fault(struct pt_regs *regs, int trap)
+{
+	return 0;
+}
+#endif
+
+/*
+ * Return TRUE if ADDRESS points at a page in the kernel's mapped segment
+ * (inside region 5, on ia64) and that page is present.
+ */
+static int
+mapped_kernel_page_is_present (unsigned long address)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ptep, pte;
+
+	pgd = pgd_offset_k(address);
+	if (pgd_none(*pgd) || pgd_bad(*pgd))
+		return 0;
+
+	pud = pud_offset(pgd, address);
+	if (pud_none(*pud) || pud_bad(*pud))
+		return 0;
+
+	pmd = pmd_offset(pud, address);
+	if (pmd_none(*pmd) || pmd_bad(*pmd))
+		return 0;
+
+	ptep = pte_offset_kernel(pmd, address);
+	if (!ptep)
+		return 0;
+
+	pte = *ptep;
+	return pte_present(pte);
+}
+
+void __kprobes
+ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs)
+{
+	int signal = SIGSEGV, code = SEGV_MAPERR;
+	struct vm_area_struct *vma, *prev_vma;
+	struct mm_struct *mm = current->mm;
+	struct siginfo si;
+	unsigned long mask;
+	int fault;
+
+	/* mmap_sem is performance critical.... */
+	prefetchw(&mm->mmap_sem);
+
+	/*
+	 * If we're in an interrupt or have no user context, we must not take the fault..
+	 */
+	if (in_atomic() || !mm)
+		goto no_context;
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+	/*
+	 * If fault is in region 5 and we are in the kernel, we may already
+	 * have the mmap_sem (pfn_valid macro is called during mmap). There
+	 * is no vma for region 5 addr's anyway, so skip getting the semaphore
+	 * and go directly to the exception handling code.
+	 */
+
+	if ((REGION_NUMBER(address) == 5) && !user_mode(regs))
+		goto bad_area_no_up;
+#endif
+
+	/*
+	 * This is to handle the kprobes on user space access instructions
+	 */
+	if (notify_page_fault(regs, TRAP_BRKPT))
+		return;
+
+	down_read(&mm->mmap_sem);
+
+	vma = find_vma_prev(mm, address, &prev_vma);
+	if (!vma && !prev_vma )
+		goto bad_area;
+
+        /*
+         * find_vma_prev() returns vma such that address < vma->vm_end or NULL
+         *
+         * May find no vma, but could be that the last vm area is the
+         * register backing store that needs to expand upwards, in
+         * this case vma will be null, but prev_vma will ne non-null
+         */
+        if (( !vma && prev_vma ) || (address < vma->vm_start) )
+		goto check_expansion;
+
+  good_area:
+	code = SEGV_ACCERR;
+
+	/* OK, we've got a good vm_area for this memory area.  Check the access permissions: */
+
+#	define VM_READ_BIT	0
+#	define VM_WRITE_BIT	1
+#	define VM_EXEC_BIT	2
+
+#	if (((1 << VM_READ_BIT) != VM_READ || (1 << VM_WRITE_BIT) != VM_WRITE) \
+	    || (1 << VM_EXEC_BIT) != VM_EXEC)
+#		error File is out of sync with <linux/mm.h>.  Please update.
+#	endif
+
+	if (((isr >> IA64_ISR_R_BIT) & 1UL) && (!(vma->vm_flags & (VM_READ | VM_WRITE))))
+		goto bad_area;
+
+	mask = (  (((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
+		| (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT));
+
+	if ((vma->vm_flags & mask) != mask)
+		goto bad_area;
+
+	/*
+	 * If for any reason at all we couldn't handle the fault, make
+	 * sure we exit gracefully rather than endlessly redo the
+	 * fault.
+	 */
+	fault = handle_mm_fault(mm, vma, address, (mask & VM_WRITE) ? FAULT_FLAG_WRITE : 0);
+	if (unlikely(fault & VM_FAULT_ERROR)) {
+		/*
+		 * We ran out of memory, or some other thing happened
+		 * to us that made us unable to handle the page fault
+		 * gracefully.
+		 */
+		if (fault & VM_FAULT_OOM) {
+			goto out_of_memory;
+		} else if (fault & VM_FAULT_SIGBUS) {
+			signal = SIGBUS;
+			goto bad_area;
+		}
+		BUG();
+	}
+	if (fault & VM_FAULT_MAJOR)
+		current->maj_flt++;
+	else
+		current->min_flt++;
+	up_read(&mm->mmap_sem);
+	return;
+
+  check_expansion:
+	if (!(prev_vma && (prev_vma->vm_flags & VM_GROWSUP) && (address == prev_vma->vm_end))) {
+		if (!vma)
+			goto bad_area;
+		if (!(vma->vm_flags & VM_GROWSDOWN))
+			goto bad_area;
+		if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
+		    || REGION_OFFSET(address) >= RGN_MAP_LIMIT)
+			goto bad_area;
+		if (expand_stack(vma, address))
+			goto bad_area;
+	} else {
+		vma = prev_vma;
+		if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
+		    || REGION_OFFSET(address) >= RGN_MAP_LIMIT)
+			goto bad_area;
+		/*
+		 * Since the register backing store is accessed sequentially,
+		 * we disallow growing it by more than a page at a time.
+		 */
+		if (address > vma->vm_end + PAGE_SIZE - sizeof(long))
+			goto bad_area;
+		if (expand_upwards(vma, address))
+			goto bad_area;
+	}
+	goto good_area;
+
+  bad_area:
+	up_read(&mm->mmap_sem);
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+  bad_area_no_up:
+#endif
+	if ((isr & IA64_ISR_SP)
+	    || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH))
+	{
+		/*
+		 * This fault was due to a speculative load or lfetch.fault, set the "ed"
+		 * bit in the psr to ensure forward progress.  (Target register will get a
+		 * NaT for ld.s, lfetch will be canceled.)
+		 */
+		ia64_psr(regs)->ed = 1;
+		return;
+	}
+	if (user_mode(regs)) {
+		si.si_signo = signal;
+		si.si_errno = 0;
+		si.si_code = code;
+		si.si_addr = (void __user *) address;
+		si.si_isr = isr;
+		si.si_flags = __ISR_VALID;
+		force_sig_info(signal, &si, current);
+		return;
+	}
+
+  no_context:
+	if ((isr & IA64_ISR_SP)
+	    || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH))
+	{
+		/*
+		 * This fault was due to a speculative load or lfetch.fault, set the "ed"
+		 * bit in the psr to ensure forward progress.  (Target register will get a
+		 * NaT for ld.s, lfetch will be canceled.)
+		 */
+		ia64_psr(regs)->ed = 1;
+		return;
+	}
+
+	/*
+	 * Since we have no vma's for region 5, we might get here even if the address is
+	 * valid, due to the VHPT walker inserting a non present translation that becomes
+	 * stale. If that happens, the non present fault handler already purged the stale
+	 * translation, which fixed the problem. So, we check to see if the translation is
+	 * valid, and return if it is.
+	 */
+	if (REGION_NUMBER(address) == 5 && mapped_kernel_page_is_present(address))
+		return;
+
+	if (ia64_done_with_exception(regs))
+		return;
+
+	/*
+	 * Oops. The kernel tried to access some bad page. We'll have to terminate things
+	 * with extreme prejudice.
+	 */
+	bust_spinlocks(1);
+
+	if (address < PAGE_SIZE)
+		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference (address %016lx)\n", address);
+	else
+		printk(KERN_ALERT "Unable to handle kernel paging request at "
+		       "virtual address %016lx\n", address);
+	if (die("Oops", regs, isr))
+		regs = NULL;
+	bust_spinlocks(0);
+	if (regs)
+		do_exit(SIGKILL);
+	return;
+
+  out_of_memory:
+	up_read(&mm->mmap_sem);
+	if (!user_mode(regs))
+		goto no_context;
+	pagefault_out_of_memory();
+}
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
new file mode 100644
index 00000000..5ca674b7
--- /dev/null
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -0,0 +1,207 @@
+/*
+ * IA-64 Huge TLB Page Support for Kernel.
+ *
+ * Copyright (C) 2002-2004 Rohit Seth <rohit.seth@intel.com>
+ * Copyright (C) 2003-2004 Ken Chen <kenneth.w.chen@intel.com>
+ *
+ * Sep, 2003: add numa support
+ * Feb, 2004: dynamic hugetlb page size via boot parameter
+ */
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/module.h>
+#include <linux/sysctl.h>
+#include <linux/log2.h>
+#include <asm/mman.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+
+unsigned int hpage_shift = HPAGE_SHIFT_DEFAULT;
+EXPORT_SYMBOL(hpage_shift);
+
+pte_t *
+huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
+{
+	unsigned long taddr = htlbpage_to_page(addr);
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte = NULL;
+
+	pgd = pgd_offset(mm, taddr);
+	pud = pud_alloc(mm, pgd, taddr);
+	if (pud) {
+		pmd = pmd_alloc(mm, pud, taddr);
+		if (pmd)
+			pte = pte_alloc_map(mm, NULL, pmd, taddr);
+	}
+	return pte;
+}
+
+pte_t *
+huge_pte_offset (struct mm_struct *mm, unsigned long addr)
+{
+	unsigned long taddr = htlbpage_to_page(addr);
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte = NULL;
+
+	pgd = pgd_offset(mm, taddr);
+	if (pgd_present(*pgd)) {
+		pud = pud_offset(pgd, taddr);
+		if (pud_present(*pud)) {
+			pmd = pmd_offset(pud, taddr);
+			if (pmd_present(*pmd))
+				pte = pte_offset_map(pmd, taddr);
+		}
+	}
+
+	return pte;
+}
+
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+	return 0;
+}
+
+#define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; }
+
+/*
+ * Don't actually need to do any preparation, but need to make sure
+ * the address is in the right region.
+ */
+int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len)
+{
+	if (len & ~HPAGE_MASK)
+		return -EINVAL;
+	if (addr & ~HPAGE_MASK)
+		return -EINVAL;
+	if (REGION_NUMBER(addr) != RGN_HPAGE)
+		return -EINVAL;
+
+	return 0;
+}
+
+struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int write)
+{
+	struct page *page;
+	pte_t *ptep;
+
+	if (REGION_NUMBER(addr) != RGN_HPAGE)
+		return ERR_PTR(-EINVAL);
+
+	ptep = huge_pte_offset(mm, addr);
+	if (!ptep || pte_none(*ptep))
+		return NULL;
+	page = pte_page(*ptep);
+	page += ((addr & ~HPAGE_MASK) >> PAGE_SHIFT);
+	return page;
+}
+int pmd_huge(pmd_t pmd)
+{
+	return 0;
+}
+
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
+struct page *
+follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write)
+{
+	return NULL;
+}
+
+void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+			unsigned long addr, unsigned long end,
+			unsigned long floor, unsigned long ceiling)
+{
+	/*
+	 * This is called to free hugetlb page tables.
+	 *
+	 * The offset of these addresses from the base of the hugetlb
+	 * region must be scaled down by HPAGE_SIZE/PAGE_SIZE so that
+	 * the standard free_pgd_range will free the right page tables.
+	 *
+	 * If floor and ceiling are also in the hugetlb region, they
+	 * must likewise be scaled down; but if outside, left unchanged.
+	 */
+
+	addr = htlbpage_to_page(addr);
+	end  = htlbpage_to_page(end);
+	if (REGION_NUMBER(floor) == RGN_HPAGE)
+		floor = htlbpage_to_page(floor);
+	if (REGION_NUMBER(ceiling) == RGN_HPAGE)
+		ceiling = htlbpage_to_page(ceiling);
+
+	free_pgd_range(tlb, addr, end, floor, ceiling);
+}
+
+unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
+		unsigned long pgoff, unsigned long flags)
+{
+	struct vm_area_struct *vmm;
+
+	if (len > RGN_MAP_LIMIT)
+		return -ENOMEM;
+	if (len & ~HPAGE_MASK)
+		return -EINVAL;
+
+	/* Handle MAP_FIXED */
+	if (flags & MAP_FIXED) {
+		if (prepare_hugepage_range(file, addr, len))
+			return -EINVAL;
+		return addr;
+	}
+
+	/* This code assumes that RGN_HPAGE != 0. */
+	if ((REGION_NUMBER(addr) != RGN_HPAGE) || (addr & (HPAGE_SIZE - 1)))
+		addr = HPAGE_REGION_BASE;
+	else
+		addr = ALIGN(addr, HPAGE_SIZE);
+	for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
+		/* At this point:  (!vmm || addr < vmm->vm_end). */
+		if (REGION_OFFSET(addr) + len > RGN_MAP_LIMIT)
+			return -ENOMEM;
+		if (!vmm || (addr + len) <= vmm->vm_start)
+			return addr;
+		addr = ALIGN(vmm->vm_end, HPAGE_SIZE);
+	}
+}
+
+static int __init hugetlb_setup_sz(char *str)
+{
+	u64 tr_pages;
+	unsigned long long size;
+
+	if (ia64_pal_vm_page_size(&tr_pages, NULL) != 0)
+		/*
+		 * shouldn't happen, but just in case.
+		 */
+		tr_pages = 0x15557000UL;
+
+	size = memparse(str, &str);
+	if (*str || !is_power_of_2(size) || !(tr_pages & size) ||
+		size <= PAGE_SIZE ||
+		size >= (1UL << PAGE_SHIFT << MAX_ORDER)) {
+		printk(KERN_WARNING "Invalid huge page size specified\n");
+		return 1;
+	}
+
+	hpage_shift = __ffs(size);
+	/*
+	 * boot cpu already executed ia64_mmu_init, and has HPAGE_SHIFT_DEFAULT
+	 * override here with new page shift.
+	 */
+	ia64_set_rr(HPAGE_REGION_BASE, hpage_shift << 2);
+	return 0;
+}
+early_param("hugepagesz", hugetlb_setup_sz);
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
new file mode 100644
index 00000000..00cb0e26
--- /dev/null
+++ b/arch/ia64/mm/init.c
@@ -0,0 +1,717 @@
+/*
+ * Initialize MMU support.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+#include <linux/bootmem.h>
+#include <linux/efi.h>
+#include <linux/elf.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#include <linux/personality.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/proc_fs.h>
+#include <linux/bitops.h>
+#include <linux/kexec.h>
+
+#include <asm/dma.h>
+#include <asm/io.h>
+#include <asm/machvec.h>
+#include <asm/numa.h>
+#include <asm/patch.h>
+#include <asm/pgalloc.h>
+#include <asm/sal.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/tlb.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <asm/mca.h>
+#include <asm/paravirt.h>
+
+extern void ia64_tlb_init (void);
+
+unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+unsigned long VMALLOC_END = VMALLOC_END_INIT;
+EXPORT_SYMBOL(VMALLOC_END);
+struct page *vmem_map;
+EXPORT_SYMBOL(vmem_map);
+#endif
+
+struct page *zero_page_memmap_ptr;	/* map entry for zero page */
+EXPORT_SYMBOL(zero_page_memmap_ptr);
+
+void
+__ia64_sync_icache_dcache (pte_t pte)
+{
+	unsigned long addr;
+	struct page *page;
+
+	page = pte_page(pte);
+	addr = (unsigned long) page_address(page);
+
+	if (test_bit(PG_arch_1, &page->flags))
+		return;				/* i-cache is already coherent with d-cache */
+
+	flush_icache_range(addr, addr + (PAGE_SIZE << compound_order(page)));
+	set_bit(PG_arch_1, &page->flags);	/* mark page as clean */
+}
+
+/*
+ * Since DMA is i-cache coherent, any (complete) pages that were written via
+ * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
+ * flush them when they get mapped into an executable vm-area.
+ */
+void
+dma_mark_clean(void *addr, size_t size)
+{
+	unsigned long pg_addr, end;
+
+	pg_addr = PAGE_ALIGN((unsigned long) addr);
+	end = (unsigned long) addr + size;
+	while (pg_addr + PAGE_SIZE <= end) {
+		struct page *page = virt_to_page(pg_addr);
+		set_bit(PG_arch_1, &page->flags);
+		pg_addr += PAGE_SIZE;
+	}
+}
+
+inline void
+ia64_set_rbs_bot (void)
+{
+	unsigned long stack_size = rlimit_max(RLIMIT_STACK) & -16;
+
+	if (stack_size > MAX_USER_STACK_SIZE)
+		stack_size = MAX_USER_STACK_SIZE;
+	current->thread.rbs_bot = PAGE_ALIGN(current->mm->start_stack - stack_size);
+}
+
+/*
+ * This performs some platform-dependent address space initialization.
+ * On IA-64, we want to setup the VM area for the register backing
+ * store (which grows upwards) and install the gateway page which is
+ * used for signal trampolines, etc.
+ */
+void
+ia64_init_addr_space (void)
+{
+	struct vm_area_struct *vma;
+
+	ia64_set_rbs_bot();
+
+	/*
+	 * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore
+	 * the problem.  When the process attempts to write to the register backing store
+	 * for the first time, it will get a SEGFAULT in this case.
+	 */
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	if (vma) {
+		INIT_LIST_HEAD(&vma->anon_vma_chain);
+		vma->vm_mm = current->mm;
+		vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
+		vma->vm_end = vma->vm_start + PAGE_SIZE;
+		vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT;
+		vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+		down_write(&current->mm->mmap_sem);
+		if (insert_vm_struct(current->mm, vma)) {
+			up_write(&current->mm->mmap_sem);
+			kmem_cache_free(vm_area_cachep, vma);
+			return;
+		}
+		up_write(&current->mm->mmap_sem);
+	}
+
+	/* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
+	if (!(current->personality & MMAP_PAGE_ZERO)) {
+		vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+		if (vma) {
+			INIT_LIST_HEAD(&vma->anon_vma_chain);
+			vma->vm_mm = current->mm;
+			vma->vm_end = PAGE_SIZE;
+			vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
+			vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED;
+			down_write(&current->mm->mmap_sem);
+			if (insert_vm_struct(current->mm, vma)) {
+				up_write(&current->mm->mmap_sem);
+				kmem_cache_free(vm_area_cachep, vma);
+				return;
+			}
+			up_write(&current->mm->mmap_sem);
+		}
+	}
+}
+
+void
+free_initmem (void)
+{
+	unsigned long addr, eaddr;
+
+	addr = (unsigned long) ia64_imva(__init_begin);
+	eaddr = (unsigned long) ia64_imva(__init_end);
+	while (addr < eaddr) {
+		ClearPageReserved(virt_to_page(addr));
+		init_page_count(virt_to_page(addr));
+		free_page(addr);
+		++totalram_pages;
+		addr += PAGE_SIZE;
+	}
+	printk(KERN_INFO "Freeing unused kernel memory: %ldkB freed\n",
+	       (__init_end - __init_begin) >> 10);
+}
+
+void __init
+free_initrd_mem (unsigned long start, unsigned long end)
+{
+	struct page *page;
+	/*
+	 * EFI uses 4KB pages while the kernel can use 4KB or bigger.
+	 * Thus EFI and the kernel may have different page sizes. It is
+	 * therefore possible to have the initrd share the same page as
+	 * the end of the kernel (given current setup).
+	 *
+	 * To avoid freeing/using the wrong page (kernel sized) we:
+	 *	- align up the beginning of initrd
+	 *	- align down the end of initrd
+	 *
+	 *  |             |
+	 *  |=============| a000
+	 *  |             |
+	 *  |             |
+	 *  |             | 9000
+	 *  |/////////////|
+	 *  |/////////////|
+	 *  |=============| 8000
+	 *  |///INITRD////|
+	 *  |/////////////|
+	 *  |/////////////| 7000
+	 *  |             |
+	 *  |KKKKKKKKKKKKK|
+	 *  |=============| 6000
+	 *  |KKKKKKKKKKKKK|
+	 *  |KKKKKKKKKKKKK|
+	 *  K=kernel using 8KB pages
+	 *
+	 * In this example, we must free page 8000 ONLY. So we must align up
+	 * initrd_start and keep initrd_end as is.
+	 */
+	start = PAGE_ALIGN(start);
+	end = end & PAGE_MASK;
+
+	if (start < end)
+		printk(KERN_INFO "Freeing initrd memory: %ldkB freed\n", (end - start) >> 10);
+
+	for (; start < end; start += PAGE_SIZE) {
+		if (!virt_addr_valid(start))
+			continue;
+		page = virt_to_page(start);
+		ClearPageReserved(page);
+		init_page_count(page);
+		free_page(start);
+		++totalram_pages;
+	}
+}
+
+/*
+ * This installs a clean page in the kernel's page table.
+ */
+static struct page * __init
+put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	if (!PageReserved(page))
+		printk(KERN_ERR "put_kernel_page: page at 0x%p not in reserved memory\n",
+		       page_address(page));
+
+	pgd = pgd_offset_k(address);		/* note: this is NOT pgd_offset()! */
+
+	{
+		pud = pud_alloc(&init_mm, pgd, address);
+		if (!pud)
+			goto out;
+		pmd = pmd_alloc(&init_mm, pud, address);
+		if (!pmd)
+			goto out;
+		pte = pte_alloc_kernel(pmd, address);
+		if (!pte)
+			goto out;
+		if (!pte_none(*pte))
+			goto out;
+		set_pte(pte, mk_pte(page, pgprot));
+	}
+  out:
+	/* no need for flush_tlb */
+	return page;
+}
+
+static void __init
+setup_gate (void)
+{
+	void *gate_section;
+	struct page *page;
+
+	/*
+	 * Map the gate page twice: once read-only to export the ELF
+	 * headers etc. and once execute-only page to enable
+	 * privilege-promotion via "epc":
+	 */
+	gate_section = paravirt_get_gate_section();
+	page = virt_to_page(ia64_imva(gate_section));
+	put_kernel_page(page, GATE_ADDR, PAGE_READONLY);
+#ifdef HAVE_BUGGY_SEGREL
+	page = virt_to_page(ia64_imva(gate_section + PAGE_SIZE));
+	put_kernel_page(page, GATE_ADDR + PAGE_SIZE, PAGE_GATE);
+#else
+	put_kernel_page(page, GATE_ADDR + PERCPU_PAGE_SIZE, PAGE_GATE);
+	/* Fill in the holes (if any) with read-only zero pages: */
+	{
+		unsigned long addr;
+
+		for (addr = GATE_ADDR + PAGE_SIZE;
+		     addr < GATE_ADDR + PERCPU_PAGE_SIZE;
+		     addr += PAGE_SIZE)
+		{
+			put_kernel_page(ZERO_PAGE(0), addr,
+					PAGE_READONLY);
+			put_kernel_page(ZERO_PAGE(0), addr + PERCPU_PAGE_SIZE,
+					PAGE_READONLY);
+		}
+	}
+#endif
+	ia64_patch_gate();
+}
+
+void __devinit
+ia64_mmu_init (void *my_cpu_data)
+{
+	unsigned long pta, impl_va_bits;
+	extern void __devinit tlb_init (void);
+
+#ifdef CONFIG_DISABLE_VHPT
+#	define VHPT_ENABLE_BIT	0
+#else
+#	define VHPT_ENABLE_BIT	1
+#endif
+
+	/*
+	 * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped
+	 * address space.  The IA-64 architecture guarantees that at least 50 bits of
+	 * virtual address space are implemented but if we pick a large enough page size
+	 * (e.g., 64KB), the mapped address space is big enough that it will overlap with
+	 * VMLPT.  I assume that once we run on machines big enough to warrant 64KB pages,
+	 * IMPL_VA_MSB will be significantly bigger, so this is unlikely to become a
+	 * problem in practice.  Alternatively, we could truncate the top of the mapped
+	 * address space to not permit mappings that would overlap with the VMLPT.
+	 * --davidm 00/12/06
+	 */
+#	define pte_bits			3
+#	define mapped_space_bits	(3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT)
+	/*
+	 * The virtual page table has to cover the entire implemented address space within
+	 * a region even though not all of this space may be mappable.  The reason for
+	 * this is that the Access bit and Dirty bit fault handlers perform
+	 * non-speculative accesses to the virtual page table, so the address range of the
+	 * virtual page table itself needs to be covered by virtual page table.
+	 */
+#	define vmlpt_bits		(impl_va_bits - PAGE_SHIFT + pte_bits)
+#	define POW2(n)			(1ULL << (n))
+
+	impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61)));
+
+	if (impl_va_bits < 51 || impl_va_bits > 61)
+		panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1);
+	/*
+	 * mapped_space_bits - PAGE_SHIFT is the total number of ptes we need,
+	 * which must fit into "vmlpt_bits - pte_bits" slots. Second half of
+	 * the test makes sure that our mapped space doesn't overlap the
+	 * unimplemented hole in the middle of the region.
+	 */
+	if ((mapped_space_bits - PAGE_SHIFT > vmlpt_bits - pte_bits) ||
+	    (mapped_space_bits > impl_va_bits - 1))
+		panic("Cannot build a big enough virtual-linear page table"
+		      " to cover mapped address space.\n"
+		      " Try using a smaller page size.\n");
+
+
+	/* place the VMLPT at the end of each page-table mapped region: */
+	pta = POW2(61) - POW2(vmlpt_bits);
+
+	/*
+	 * Set the (virtually mapped linear) page table address.  Bit
+	 * 8 selects between the short and long format, bits 2-7 the
+	 * size of the table, and bit 0 whether the VHPT walker is
+	 * enabled.
+	 */
+	ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT);
+
+	ia64_tlb_init();
+
+#ifdef	CONFIG_HUGETLB_PAGE
+	ia64_set_rr(HPAGE_REGION_BASE, HPAGE_SHIFT << 2);
+	ia64_srlz_d();
+#endif
+}
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+int vmemmap_find_next_valid_pfn(int node, int i)
+{
+	unsigned long end_address, hole_next_pfn;
+	unsigned long stop_address;
+	pg_data_t *pgdat = NODE_DATA(node);
+
+	end_address = (unsigned long) &vmem_map[pgdat->node_start_pfn + i];
+	end_address = PAGE_ALIGN(end_address);
+
+	stop_address = (unsigned long) &vmem_map[
+		pgdat->node_start_pfn + pgdat->node_spanned_pages];
+
+	do {
+		pgd_t *pgd;
+		pud_t *pud;
+		pmd_t *pmd;
+		pte_t *pte;
+
+		pgd = pgd_offset_k(end_address);
+		if (pgd_none(*pgd)) {
+			end_address += PGDIR_SIZE;
+			continue;
+		}
+
+		pud = pud_offset(pgd, end_address);
+		if (pud_none(*pud)) {
+			end_address += PUD_SIZE;
+			continue;
+		}
+
+		pmd = pmd_offset(pud, end_address);
+		if (pmd_none(*pmd)) {
+			end_address += PMD_SIZE;
+			continue;
+		}
+
+		pte = pte_offset_kernel(pmd, end_address);
+retry_pte:
+		if (pte_none(*pte)) {
+			end_address += PAGE_SIZE;
+			pte++;
+			if ((end_address < stop_address) &&
+			    (end_address != ALIGN(end_address, 1UL << PMD_SHIFT)))
+				goto retry_pte;
+			continue;
+		}
+		/* Found next valid vmem_map page */
+		break;
+	} while (end_address < stop_address);
+
+	end_address = min(end_address, stop_address);
+	end_address = end_address - (unsigned long) vmem_map + sizeof(struct page) - 1;
+	hole_next_pfn = end_address / sizeof(struct page);
+	return hole_next_pfn - pgdat->node_start_pfn;
+}
+
+int __init create_mem_map_page_table(u64 start, u64 end, void *arg)
+{
+	unsigned long address, start_page, end_page;
+	struct page *map_start, *map_end;
+	int node;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	map_start = vmem_map + (__pa(start) >> PAGE_SHIFT);
+	map_end   = vmem_map + (__pa(end) >> PAGE_SHIFT);
+
+	start_page = (unsigned long) map_start & PAGE_MASK;
+	end_page = PAGE_ALIGN((unsigned long) map_end);
+	node = paddr_to_nid(__pa(start));
+
+	for (address = start_page; address < end_page; address += PAGE_SIZE) {
+		pgd = pgd_offset_k(address);
+		if (pgd_none(*pgd))
+			pgd_populate(&init_mm, pgd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
+		pud = pud_offset(pgd, address);
+
+		if (pud_none(*pud))
+			pud_populate(&init_mm, pud, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
+		pmd = pmd_offset(pud, address);
+
+		if (pmd_none(*pmd))
+			pmd_populate_kernel(&init_mm, pmd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
+		pte = pte_offset_kernel(pmd, address);
+
+		if (pte_none(*pte))
+			set_pte(pte, pfn_pte(__pa(alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)) >> PAGE_SHIFT,
+					     PAGE_KERNEL));
+	}
+	return 0;
+}
+
+struct memmap_init_callback_data {
+	struct page *start;
+	struct page *end;
+	int nid;
+	unsigned long zone;
+};
+
+static int __meminit
+virtual_memmap_init(u64 start, u64 end, void *arg)
+{
+	struct memmap_init_callback_data *args;
+	struct page *map_start, *map_end;
+
+	args = (struct memmap_init_callback_data *) arg;
+	map_start = vmem_map + (__pa(start) >> PAGE_SHIFT);
+	map_end   = vmem_map + (__pa(end) >> PAGE_SHIFT);
+
+	if (map_start < args->start)
+		map_start = args->start;
+	if (map_end > args->end)
+		map_end = args->end;
+
+	/*
+	 * We have to initialize "out of bounds" struct page elements that fit completely
+	 * on the same pages that were allocated for the "in bounds" elements because they
+	 * may be referenced later (and found to be "reserved").
+	 */
+	map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1)) / sizeof(struct page);
+	map_end += ((PAGE_ALIGN((unsigned long) map_end) - (unsigned long) map_end)
+		    / sizeof(struct page));
+
+	if (map_start < map_end)
+		memmap_init_zone((unsigned long)(map_end - map_start),
+				 args->nid, args->zone, page_to_pfn(map_start),
+				 MEMMAP_EARLY);
+	return 0;
+}
+
+void __meminit
+memmap_init (unsigned long size, int nid, unsigned long zone,
+	     unsigned long start_pfn)
+{
+	if (!vmem_map)
+		memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY);
+	else {
+		struct page *start;
+		struct memmap_init_callback_data args;
+
+		start = pfn_to_page(start_pfn);
+		args.start = start;
+		args.end = start + size;
+		args.nid = nid;
+		args.zone = zone;
+
+		efi_memmap_walk(virtual_memmap_init, &args);
+	}
+}
+
+int
+ia64_pfn_valid (unsigned long pfn)
+{
+	char byte;
+	struct page *pg = pfn_to_page(pfn);
+
+	return     (__get_user(byte, (char __user *) pg) == 0)
+		&& ((((u64)pg & PAGE_MASK) == (((u64)(pg + 1) - 1) & PAGE_MASK))
+			|| (__get_user(byte, (char __user *) (pg + 1) - 1) == 0));
+}
+EXPORT_SYMBOL(ia64_pfn_valid);
+
+int __init find_largest_hole(u64 start, u64 end, void *arg)
+{
+	u64 *max_gap = arg;
+
+	static u64 last_end = PAGE_OFFSET;
+
+	/* NOTE: this algorithm assumes efi memmap table is ordered */
+
+	if (*max_gap < (start - last_end))
+		*max_gap = start - last_end;
+	last_end = end;
+	return 0;
+}
+
+#endif /* CONFIG_VIRTUAL_MEM_MAP */
+
+int __init register_active_ranges(u64 start, u64 len, int nid)
+{
+	u64 end = start + len;
+
+#ifdef CONFIG_KEXEC
+	if (start > crashk_res.start && start < crashk_res.end)
+		start = crashk_res.end;
+	if (end > crashk_res.start && end < crashk_res.end)
+		end = crashk_res.start;
+#endif
+
+	if (start < end)
+		add_active_range(nid, __pa(start) >> PAGE_SHIFT,
+			__pa(end) >> PAGE_SHIFT);
+	return 0;
+}
+
+static int __init
+count_reserved_pages(u64 start, u64 end, void *arg)
+{
+	unsigned long num_reserved = 0;
+	unsigned long *count = arg;
+
+	for (; start < end; start += PAGE_SIZE)
+		if (PageReserved(virt_to_page(start)))
+			++num_reserved;
+	*count += num_reserved;
+	return 0;
+}
+
+int
+find_max_min_low_pfn (u64 start, u64 end, void *arg)
+{
+	unsigned long pfn_start, pfn_end;
+#ifdef CONFIG_FLATMEM
+	pfn_start = (PAGE_ALIGN(__pa(start))) >> PAGE_SHIFT;
+	pfn_end = (PAGE_ALIGN(__pa(end - 1))) >> PAGE_SHIFT;
+#else
+	pfn_start = GRANULEROUNDDOWN(__pa(start)) >> PAGE_SHIFT;
+	pfn_end = GRANULEROUNDUP(__pa(end - 1)) >> PAGE_SHIFT;
+#endif
+	min_low_pfn = min(min_low_pfn, pfn_start);
+	max_low_pfn = max(max_low_pfn, pfn_end);
+	return 0;
+}
+
+/*
+ * Boot command-line option "nolwsys" can be used to disable the use of any light-weight
+ * system call handler.  When this option is in effect, all fsyscalls will end up bubbling
+ * down into the kernel and calling the normal (heavy-weight) syscall handler.  This is
+ * useful for performance testing, but conceivably could also come in handy for debugging
+ * purposes.
+ */
+
+static int nolwsys __initdata;
+
+static int __init
+nolwsys_setup (char *s)
+{
+	nolwsys = 1;
+	return 1;
+}
+
+__setup("nolwsys", nolwsys_setup);
+
+void __init
+mem_init (void)
+{
+	long reserved_pages, codesize, datasize, initsize;
+	pg_data_t *pgdat;
+	int i;
+
+	BUG_ON(PTRS_PER_PGD * sizeof(pgd_t) != PAGE_SIZE);
+	BUG_ON(PTRS_PER_PMD * sizeof(pmd_t) != PAGE_SIZE);
+	BUG_ON(PTRS_PER_PTE * sizeof(pte_t) != PAGE_SIZE);
+
+#ifdef CONFIG_PCI
+	/*
+	 * This needs to be called _after_ the command line has been parsed but _before_
+	 * any drivers that may need the PCI DMA interface are initialized or bootmem has
+	 * been freed.
+	 */
+	platform_dma_init();
+#endif
+
+#ifdef CONFIG_FLATMEM
+	BUG_ON(!mem_map);
+	max_mapnr = max_low_pfn;
+#endif
+
+	high_memory = __va(max_low_pfn * PAGE_SIZE);
+
+	for_each_online_pgdat(pgdat)
+		if (pgdat->bdata->node_bootmem_map)
+			totalram_pages += free_all_bootmem_node(pgdat);
+
+	reserved_pages = 0;
+	efi_memmap_walk(count_reserved_pages, &reserved_pages);
+
+	codesize =  (unsigned long) _etext - (unsigned long) _stext;
+	datasize =  (unsigned long) _edata - (unsigned long) _etext;
+	initsize =  (unsigned long) __init_end - (unsigned long) __init_begin;
+
+	printk(KERN_INFO "Memory: %luk/%luk available (%luk code, %luk reserved, "
+	       "%luk data, %luk init)\n", nr_free_pages() << (PAGE_SHIFT - 10),
+	       num_physpages << (PAGE_SHIFT - 10), codesize >> 10,
+	       reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10);
+
+
+	/*
+	 * For fsyscall entrpoints with no light-weight handler, use the ordinary
+	 * (heavy-weight) handler, but mark it by setting bit 0, so the fsyscall entry
+	 * code can tell them apart.
+	 */
+	for (i = 0; i < NR_syscalls; ++i) {
+		extern unsigned long sys_call_table[NR_syscalls];
+		unsigned long *fsyscall_table = paravirt_get_fsyscall_table();
+
+		if (!fsyscall_table[i] || nolwsys)
+			fsyscall_table[i] = sys_call_table[i] | 1;
+	}
+	setup_gate();
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int arch_add_memory(int nid, u64 start, u64 size)
+{
+	pg_data_t *pgdat;
+	struct zone *zone;
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	int ret;
+
+	pgdat = NODE_DATA(nid);
+
+	zone = pgdat->node_zones + ZONE_NORMAL;
+	ret = __add_pages(nid, zone, start_pfn, nr_pages);
+
+	if (ret)
+		printk("%s: Problem encountered in __add_pages() as ret=%d\n",
+		       __func__,  ret);
+
+	return ret;
+}
+#endif
+
+/*
+ * Even when CONFIG_IA32_SUPPORT is not enabled it is
+ * useful to have the Linux/x86 domain registered to
+ * avoid an attempted module load when emulators call
+ * personality(PER_LINUX32). This saves several milliseconds
+ * on each such call.
+ */
+static struct exec_domain ia32_exec_domain;
+
+static int __init
+per_linux32_init(void)
+{
+	ia32_exec_domain.name = "Linux/x86";
+	ia32_exec_domain.handler = NULL;
+	ia32_exec_domain.pers_low = PER_LINUX32;
+	ia32_exec_domain.pers_high = PER_LINUX32;
+	ia32_exec_domain.signal_map = default_exec_domain.signal_map;
+	ia32_exec_domain.signal_invmap = default_exec_domain.signal_invmap;
+	register_exec_domain(&ia32_exec_domain);
+
+	return 0;
+}
+
+__initcall(per_linux32_init);
diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c
new file mode 100644
index 00000000..3dccdd8e
--- /dev/null
+++ b/arch/ia64/mm/ioremap.c
@@ -0,0 +1,121 @@
+/*
+ * (c) Copyright 2006, 2007 Hewlett-Packard Development Company, L.P.
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/compiler.h>
+#include <linux/module.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <asm/meminit.h>
+
+static inline void __iomem *
+__ioremap (unsigned long phys_addr)
+{
+	return (void __iomem *) (__IA64_UNCACHED_OFFSET | phys_addr);
+}
+
+void __iomem *
+early_ioremap (unsigned long phys_addr, unsigned long size)
+{
+	return __ioremap(phys_addr);
+}
+
+void __iomem *
+ioremap (unsigned long phys_addr, unsigned long size)
+{
+	void __iomem *addr;
+	struct vm_struct *area;
+	unsigned long offset;
+	pgprot_t prot;
+	u64 attr;
+	unsigned long gran_base, gran_size;
+	unsigned long page_base;
+
+	/*
+	 * For things in kern_memmap, we must use the same attribute
+	 * as the rest of the kernel.  For more details, see
+	 * Documentation/ia64/aliasing.txt.
+	 */
+	attr = kern_mem_attribute(phys_addr, size);
+	if (attr & EFI_MEMORY_WB)
+		return (void __iomem *) phys_to_virt(phys_addr);
+	else if (attr & EFI_MEMORY_UC)
+		return __ioremap(phys_addr);
+
+	/*
+	 * Some chipsets don't support UC access to memory.  If
+	 * WB is supported for the whole granule, we prefer that.
+	 */
+	gran_base = GRANULEROUNDDOWN(phys_addr);
+	gran_size = GRANULEROUNDUP(phys_addr + size) - gran_base;
+	if (efi_mem_attribute(gran_base, gran_size) & EFI_MEMORY_WB)
+		return (void __iomem *) phys_to_virt(phys_addr);
+
+	/*
+	 * WB is not supported for the whole granule, so we can't use
+	 * the region 7 identity mapping.  If we can safely cover the
+	 * area with kernel page table mappings, we can use those
+	 * instead.
+	 */
+	page_base = phys_addr & PAGE_MASK;
+	size = PAGE_ALIGN(phys_addr + size) - page_base;
+	if (efi_mem_attribute(page_base, size) & EFI_MEMORY_WB) {
+		prot = PAGE_KERNEL;
+
+		/*
+		 * Mappings have to be page-aligned
+		 */
+		offset = phys_addr & ~PAGE_MASK;
+		phys_addr &= PAGE_MASK;
+
+		/*
+		 * Ok, go for it..
+		 */
+		area = get_vm_area(size, VM_IOREMAP);
+		if (!area)
+			return NULL;
+
+		area->phys_addr = phys_addr;
+		addr = (void __iomem *) area->addr;
+		if (ioremap_page_range((unsigned long) addr,
+				(unsigned long) addr + size, phys_addr, prot)) {
+			vunmap((void __force *) addr);
+			return NULL;
+		}
+
+		return (void __iomem *) (offset + (char __iomem *)addr);
+	}
+
+	return __ioremap(phys_addr);
+}
+EXPORT_SYMBOL(ioremap);
+
+void __iomem *
+ioremap_nocache (unsigned long phys_addr, unsigned long size)
+{
+	if (kern_mem_attribute(phys_addr, size) & EFI_MEMORY_WB)
+		return NULL;
+
+	return __ioremap(phys_addr);
+}
+EXPORT_SYMBOL(ioremap_nocache);
+
+void
+early_iounmap (volatile void __iomem *addr, unsigned long size)
+{
+}
+
+void
+iounmap (volatile void __iomem *addr)
+{
+	if (REGION_NUMBER(addr) == RGN_GATE)
+		vunmap((void *) ((unsigned long) addr & PAGE_MASK));
+}
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
new file mode 100644
index 00000000..3efea7d0
--- /dev/null
+++ b/arch/ia64/mm/numa.c
@@ -0,0 +1,92 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * This file contains NUMA specific variables and functions which can
+ * be split away from DISCONTIGMEM and are used on NUMA machines with
+ * contiguous memory.
+ * 
+ *                         2002/08/07 Erich Focht <efocht@ess.nec.de>
+ */
+
+#include <linux/cpu.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/node.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/module.h>
+#include <asm/mmzone.h>
+#include <asm/numa.h>
+
+
+/*
+ * The following structures are usually initialized by ACPI or
+ * similar mechanisms and describe the NUMA characteristics of the machine.
+ */
+int num_node_memblks;
+struct node_memblk_s node_memblk[NR_NODE_MEMBLKS];
+struct node_cpuid_s node_cpuid[NR_CPUS] =
+	{ [0 ... NR_CPUS-1] = { .phys_id = 0, .nid = NUMA_NO_NODE } };
+
+/*
+ * This is a matrix with "distances" between nodes, they should be
+ * proportional to the memory access latency ratios.
+ */
+u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES];
+
+/* Identify which cnode a physical address resides on */
+int
+paddr_to_nid(unsigned long paddr)
+{
+	int	i;
+
+	for (i = 0; i < num_node_memblks; i++)
+		if (paddr >= node_memblk[i].start_paddr &&
+		    paddr < node_memblk[i].start_paddr + node_memblk[i].size)
+			break;
+
+	return (i < num_node_memblks) ? node_memblk[i].nid : (num_node_memblks ? -1 : 0);
+}
+
+#if defined(CONFIG_SPARSEMEM) && defined(CONFIG_NUMA)
+/*
+ * Because of holes evaluate on section limits.
+ * If the section of memory exists, then return the node where the section
+ * resides.  Otherwise return node 0 as the default.  This is used by
+ * SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where
+ * the section resides.
+ */
+int __meminit __early_pfn_to_nid(unsigned long pfn)
+{
+	int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec;
+
+	for (i = 0; i < num_node_memblks; i++) {
+		ssec = node_memblk[i].start_paddr >> PA_SECTION_SHIFT;
+		esec = (node_memblk[i].start_paddr + node_memblk[i].size +
+			((1L << PA_SECTION_SHIFT) - 1)) >> PA_SECTION_SHIFT;
+		if (section >= ssec && section < esec)
+			return node_memblk[i].nid;
+	}
+
+	return -1;
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+/*
+ *  SRAT information is stored in node_memblk[], then we can use SRAT
+ *  information at memory-hot-add if necessary.
+ */
+
+int memory_add_physaddr_to_nid(u64 addr)
+{
+	int nid = paddr_to_nid(addr);
+	if (nid < 0)
+		return 0;
+	return nid;
+}
+
+EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+#endif
+#endif
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
new file mode 100644
index 00000000..7b3cdc6c
--- /dev/null
+++ b/arch/ia64/mm/tlb.c
@@ -0,0 +1,562 @@
+/*
+ * TLB support routines.
+ *
+ * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 08/02/00 A. Mallick <asit.k.mallick@intel.com>
+ *		Modified RID allocation for SMP
+ *          Goutham Rao <goutham.rao@intel.com>
+ *              IPI based ptc implementation and A-step IPI implementation.
+ * Rohit Seth <rohit.seth@intel.com>
+ * Ken Chen <kenneth.w.chen@intel.com>
+ * Christophe de Dinechin <ddd@hp.com>: Avoid ptc.e on memory allocation
+ * Copyright (C) 2007 Intel Corp
+ *	Fenghua Yu <fenghua.yu@intel.com>
+ *	Add multiple ptc.g/ptc.ga instruction support in global tlb purge.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/slab.h>
+
+#include <asm/delay.h>
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+#include <asm/pal.h>
+#include <asm/tlbflush.h>
+#include <asm/dma.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/tlb.h>
+
+static struct {
+	u64 mask;		/* mask of supported purge page-sizes */
+	unsigned long max_bits;	/* log2 of largest supported purge page-size */
+} purge;
+
+struct ia64_ctx ia64_ctx = {
+	.lock =	__SPIN_LOCK_UNLOCKED(ia64_ctx.lock),
+	.next =	1,
+	.max_ctx = ~0U
+};
+
+DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
+DEFINE_PER_CPU(u8, ia64_tr_num);  /*Number of TR slots in current processor*/
+DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/
+
+struct ia64_tr_entry *ia64_idtrs[NR_CPUS];
+
+/*
+ * Initializes the ia64_ctx.bitmap array based on max_ctx+1.
+ * Called after cpu_init() has setup ia64_ctx.max_ctx based on
+ * maximum RID that is supported by boot CPU.
+ */
+void __init
+mmu_context_init (void)
+{
+	ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
+	ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
+}
+
+/*
+ * Acquire the ia64_ctx.lock before calling this function!
+ */
+void
+wrap_mmu_context (struct mm_struct *mm)
+{
+	int i, cpu;
+	unsigned long flush_bit;
+
+	for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) {
+		flush_bit = xchg(&ia64_ctx.flushmap[i], 0);
+		ia64_ctx.bitmap[i] ^= flush_bit;
+	}
+ 
+	/* use offset at 300 to skip daemons */
+	ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
+				ia64_ctx.max_ctx, 300);
+	ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
+				ia64_ctx.max_ctx, ia64_ctx.next);
+
+	/*
+	 * can't call flush_tlb_all() here because of race condition
+	 * with O(1) scheduler [EF]
+	 */
+	cpu = get_cpu(); /* prevent preemption/migration */
+	for_each_online_cpu(i)
+		if (i != cpu)
+			per_cpu(ia64_need_tlb_flush, i) = 1;
+	put_cpu();
+	local_flush_tlb_all();
+}
+
+/*
+ * Implement "spinaphores" ... like counting semaphores, but they
+ * spin instead of sleeping.  If there are ever any other users for
+ * this primitive it can be moved up to a spinaphore.h header.
+ */
+struct spinaphore {
+	unsigned long	ticket;
+	unsigned long	serve;
+};
+
+static inline void spinaphore_init(struct spinaphore *ss, int val)
+{
+	ss->ticket = 0;
+	ss->serve = val;
+}
+
+static inline void down_spin(struct spinaphore *ss)
+{
+	unsigned long t = ia64_fetchadd(1, &ss->ticket, acq), serve;
+
+	if (time_before(t, ss->serve))
+		return;
+
+	ia64_invala();
+
+	for (;;) {
+		asm volatile ("ld8.c.nc %0=[%1]" : "=r"(serve) : "r"(&ss->serve) : "memory");
+		if (time_before(t, serve))
+			return;
+		cpu_relax();
+	}
+}
+
+static inline void up_spin(struct spinaphore *ss)
+{
+	ia64_fetchadd(1, &ss->serve, rel);
+}
+
+static struct spinaphore ptcg_sem;
+static u16 nptcg = 1;
+static int need_ptcg_sem = 1;
+static int toolatetochangeptcgsem = 0;
+
+/*
+ * Kernel parameter "nptcg=" overrides max number of concurrent global TLB
+ * purges which is reported from either PAL or SAL PALO.
+ *
+ * We don't have sanity checking for nptcg value. It's the user's responsibility
+ * for valid nptcg value on the platform. Otherwise, kernel may hang in some
+ * cases.
+ */
+static int __init
+set_nptcg(char *str)
+{
+	int value = 0;
+
+	get_option(&str, &value);
+	setup_ptcg_sem(value, NPTCG_FROM_KERNEL_PARAMETER);
+
+	return 1;
+}
+
+__setup("nptcg=", set_nptcg);
+
+/*
+ * Maximum number of simultaneous ptc.g purges in the system can
+ * be defined by PAL_VM_SUMMARY (in which case we should take
+ * the smallest value for any cpu in the system) or by the PAL
+ * override table (in which case we should ignore the value from
+ * PAL_VM_SUMMARY).
+ *
+ * Kernel parameter "nptcg=" overrides maximum number of simultanesous ptc.g
+ * purges defined in either PAL_VM_SUMMARY or PAL override table. In this case,
+ * we should ignore the value from either PAL_VM_SUMMARY or PAL override table.
+ *
+ * Complicating the logic here is the fact that num_possible_cpus()
+ * isn't fully setup until we start bringing cpus online.
+ */
+void
+setup_ptcg_sem(int max_purges, int nptcg_from)
+{
+	static int kp_override;
+	static int palo_override;
+	static int firstcpu = 1;
+
+	if (toolatetochangeptcgsem) {
+		if (nptcg_from == NPTCG_FROM_PAL && max_purges == 0)
+			BUG_ON(1 < nptcg);
+		else
+			BUG_ON(max_purges < nptcg);
+		return;
+	}
+
+	if (nptcg_from == NPTCG_FROM_KERNEL_PARAMETER) {
+		kp_override = 1;
+		nptcg = max_purges;
+		goto resetsema;
+	}
+	if (kp_override) {
+		need_ptcg_sem = num_possible_cpus() > nptcg;
+		return;
+	}
+
+	if (nptcg_from == NPTCG_FROM_PALO) {
+		palo_override = 1;
+
+		/* In PALO max_purges == 0 really means it! */
+		if (max_purges == 0)
+			panic("Whoa! Platform does not support global TLB purges.\n");
+		nptcg = max_purges;
+		if (nptcg == PALO_MAX_TLB_PURGES) {
+			need_ptcg_sem = 0;
+			return;
+		}
+		goto resetsema;
+	}
+	if (palo_override) {
+		if (nptcg != PALO_MAX_TLB_PURGES)
+			need_ptcg_sem = (num_possible_cpus() > nptcg);
+		return;
+	}
+
+	/* In PAL_VM_SUMMARY max_purges == 0 actually means 1 */
+	if (max_purges == 0) max_purges = 1;
+
+	if (firstcpu) {
+		nptcg = max_purges;
+		firstcpu = 0;
+	}
+	if (max_purges < nptcg)
+		nptcg = max_purges;
+	if (nptcg == PAL_MAX_PURGES) {
+		need_ptcg_sem = 0;
+		return;
+	} else
+		need_ptcg_sem = (num_possible_cpus() > nptcg);
+
+resetsema:
+	spinaphore_init(&ptcg_sem, max_purges);
+}
+
+void
+ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
+		       unsigned long end, unsigned long nbits)
+{
+	struct mm_struct *active_mm = current->active_mm;
+
+	toolatetochangeptcgsem = 1;
+
+	if (mm != active_mm) {
+		/* Restore region IDs for mm */
+		if (mm && active_mm) {
+			activate_context(mm);
+		} else {
+			flush_tlb_all();
+			return;
+		}
+	}
+
+	if (need_ptcg_sem)
+		down_spin(&ptcg_sem);
+
+	do {
+		/*
+		 * Flush ALAT entries also.
+		 */
+		ia64_ptcga(start, (nbits << 2));
+		ia64_srlz_i();
+		start += (1UL << nbits);
+	} while (start < end);
+
+	if (need_ptcg_sem)
+		up_spin(&ptcg_sem);
+
+        if (mm != active_mm) {
+                activate_context(active_mm);
+        }
+}
+
+void
+local_flush_tlb_all (void)
+{
+	unsigned long i, j, flags, count0, count1, stride0, stride1, addr;
+
+	addr    = local_cpu_data->ptce_base;
+	count0  = local_cpu_data->ptce_count[0];
+	count1  = local_cpu_data->ptce_count[1];
+	stride0 = local_cpu_data->ptce_stride[0];
+	stride1 = local_cpu_data->ptce_stride[1];
+
+	local_irq_save(flags);
+	for (i = 0; i < count0; ++i) {
+		for (j = 0; j < count1; ++j) {
+			ia64_ptce(addr);
+			addr += stride1;
+		}
+		addr += stride0;
+	}
+	local_irq_restore(flags);
+	ia64_srlz_i();			/* srlz.i implies srlz.d */
+}
+
+void
+flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
+		 unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long size = end - start;
+	unsigned long nbits;
+
+#ifndef CONFIG_SMP
+	if (mm != current->active_mm) {
+		mm->context = 0;
+		return;
+	}
+#endif
+
+	nbits = ia64_fls(size + 0xfff);
+	while (unlikely (((1UL << nbits) & purge.mask) == 0) &&
+			(nbits < purge.max_bits))
+		++nbits;
+	if (nbits > purge.max_bits)
+		nbits = purge.max_bits;
+	start &= ~((1UL << nbits) - 1);
+
+	preempt_disable();
+#ifdef CONFIG_SMP
+	if (mm != current->active_mm || cpumask_weight(mm_cpumask(mm)) != 1) {
+		platform_global_tlb_purge(mm, start, end, nbits);
+		preempt_enable();
+		return;
+	}
+#endif
+	do {
+		ia64_ptcl(start, (nbits<<2));
+		start += (1UL << nbits);
+	} while (start < end);
+	preempt_enable();
+	ia64_srlz_i();			/* srlz.i implies srlz.d */
+}
+EXPORT_SYMBOL(flush_tlb_range);
+
+void __devinit
+ia64_tlb_init (void)
+{
+	ia64_ptce_info_t uninitialized_var(ptce_info); /* GCC be quiet */
+	u64 tr_pgbits;
+	long status;
+	pal_vm_info_1_u_t vm_info_1;
+	pal_vm_info_2_u_t vm_info_2;
+	int cpu = smp_processor_id();
+
+	if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) {
+		printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld; "
+		       "defaulting to architected purge page-sizes.\n", status);
+		purge.mask = 0x115557000UL;
+	}
+	purge.max_bits = ia64_fls(purge.mask);
+
+	ia64_get_ptce(&ptce_info);
+	local_cpu_data->ptce_base = ptce_info.base;
+	local_cpu_data->ptce_count[0] = ptce_info.count[0];
+	local_cpu_data->ptce_count[1] = ptce_info.count[1];
+	local_cpu_data->ptce_stride[0] = ptce_info.stride[0];
+	local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
+
+	local_flush_tlb_all();	/* nuke left overs from bootstrapping... */
+	status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2);
+
+	if (status) {
+		printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
+		per_cpu(ia64_tr_num, cpu) = 8;
+		return;
+	}
+	per_cpu(ia64_tr_num, cpu) = vm_info_1.pal_vm_info_1_s.max_itr_entry+1;
+	if (per_cpu(ia64_tr_num, cpu) >
+				(vm_info_1.pal_vm_info_1_s.max_dtr_entry+1))
+		per_cpu(ia64_tr_num, cpu) =
+				vm_info_1.pal_vm_info_1_s.max_dtr_entry+1;
+	if (per_cpu(ia64_tr_num, cpu) > IA64_TR_ALLOC_MAX) {
+		static int justonce = 1;
+		per_cpu(ia64_tr_num, cpu) = IA64_TR_ALLOC_MAX;
+		if (justonce) {
+			justonce = 0;
+			printk(KERN_DEBUG "TR register number exceeds "
+			       "IA64_TR_ALLOC_MAX!\n");
+		}
+	}
+}
+
+/*
+ * is_tr_overlap
+ *
+ * Check overlap with inserted TRs.
+ */
+static int is_tr_overlap(struct ia64_tr_entry *p, u64 va, u64 log_size)
+{
+	u64 tr_log_size;
+	u64 tr_end;
+	u64 va_rr = ia64_get_rr(va);
+	u64 va_rid = RR_TO_RID(va_rr);
+	u64 va_end = va + (1<<log_size) - 1;
+
+	if (va_rid != RR_TO_RID(p->rr))
+		return 0;
+	tr_log_size = (p->itir & 0xff) >> 2;
+	tr_end = p->ifa + (1<<tr_log_size) - 1;
+
+	if (va > tr_end || p->ifa > va_end)
+		return 0;
+	return 1;
+
+}
+
+/*
+ * ia64_insert_tr in virtual mode. Allocate a TR slot
+ *
+ * target_mask : 0x1 : itr, 0x2 : dtr, 0x3 : idtr
+ *
+ * va 	: virtual address.
+ * pte 	: pte entries inserted.
+ * log_size: range to be covered.
+ *
+ * Return value:  <0 :  error No.
+ *
+ *		  >=0 : slot number allocated for TR.
+ * Must be called with preemption disabled.
+ */
+int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size)
+{
+	int i, r;
+	unsigned long psr;
+	struct ia64_tr_entry *p;
+	int cpu = smp_processor_id();
+
+	if (!ia64_idtrs[cpu]) {
+		ia64_idtrs[cpu] = kmalloc(2 * IA64_TR_ALLOC_MAX *
+				sizeof (struct ia64_tr_entry), GFP_KERNEL);
+		if (!ia64_idtrs[cpu])
+			return -ENOMEM;
+	}
+	r = -EINVAL;
+	/*Check overlap with existing TR entries*/
+	if (target_mask & 0x1) {
+		p = ia64_idtrs[cpu];
+		for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
+								i++, p++) {
+			if (p->pte & 0x1)
+				if (is_tr_overlap(p, va, log_size)) {
+					printk(KERN_DEBUG "Overlapped Entry"
+						"Inserted for TR Reigster!!\n");
+					goto out;
+			}
+		}
+	}
+	if (target_mask & 0x2) {
+		p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX;
+		for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
+								i++, p++) {
+			if (p->pte & 0x1)
+				if (is_tr_overlap(p, va, log_size)) {
+					printk(KERN_DEBUG "Overlapped Entry"
+						"Inserted for TR Reigster!!\n");
+					goto out;
+				}
+		}
+	}
+
+	for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) {
+		switch (target_mask & 0x3) {
+		case 1:
+			if (!((ia64_idtrs[cpu] + i)->pte & 0x1))
+				goto found;
+			continue;
+		case 2:
+			if (!((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1))
+				goto found;
+			continue;
+		case 3:
+			if (!((ia64_idtrs[cpu] + i)->pte & 0x1) &&
+			    !((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1))
+				goto found;
+			continue;
+		default:
+			r = -EINVAL;
+			goto out;
+		}
+	}
+found:
+	if (i >= per_cpu(ia64_tr_num, cpu))
+		return -EBUSY;
+
+	/*Record tr info for mca hander use!*/
+	if (i > per_cpu(ia64_tr_used, cpu))
+		per_cpu(ia64_tr_used, cpu) = i;
+
+	psr = ia64_clear_ic();
+	if (target_mask & 0x1) {
+		ia64_itr(0x1, i, va, pte, log_size);
+		ia64_srlz_i();
+		p = ia64_idtrs[cpu] + i;
+		p->ifa = va;
+		p->pte = pte;
+		p->itir = log_size << 2;
+		p->rr = ia64_get_rr(va);
+	}
+	if (target_mask & 0x2) {
+		ia64_itr(0x2, i, va, pte, log_size);
+		ia64_srlz_i();
+		p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i;
+		p->ifa = va;
+		p->pte = pte;
+		p->itir = log_size << 2;
+		p->rr = ia64_get_rr(va);
+	}
+	ia64_set_psr(psr);
+	r = i;
+out:
+	return r;
+}
+EXPORT_SYMBOL_GPL(ia64_itr_entry);
+
+/*
+ * ia64_purge_tr
+ *
+ * target_mask: 0x1: purge itr, 0x2 : purge dtr, 0x3 purge idtr.
+ * slot: slot number to be freed.
+ *
+ * Must be called with preemption disabled.
+ */
+void ia64_ptr_entry(u64 target_mask, int slot)
+{
+	int cpu = smp_processor_id();
+	int i;
+	struct ia64_tr_entry *p;
+
+	if (slot < IA64_TR_ALLOC_BASE || slot >= per_cpu(ia64_tr_num, cpu))
+		return;
+
+	if (target_mask & 0x1) {
+		p = ia64_idtrs[cpu] + slot;
+		if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
+			p->pte = 0;
+			ia64_ptr(0x1, p->ifa, p->itir>>2);
+			ia64_srlz_i();
+		}
+	}
+
+	if (target_mask & 0x2) {
+		p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + slot;
+		if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
+			p->pte = 0;
+			ia64_ptr(0x2, p->ifa, p->itir>>2);
+			ia64_srlz_i();
+		}
+	}
+
+	for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) {
+		if (((ia64_idtrs[cpu] + i)->pte & 0x1) ||
+		    ((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1))
+			break;
+	}
+	per_cpu(ia64_tr_used, cpu) = i;
+}
+EXPORT_SYMBOL_GPL(ia64_ptr_entry);
diff --git a/arch/ia64/module.lds b/arch/ia64/module.lds
new file mode 100644
index 00000000..6481f42f
--- /dev/null
+++ b/arch/ia64/module.lds
@@ -0,0 +1,13 @@
+SECTIONS {
+	/* Group unwind sections into a single section: */
+	.IA_64.unwind_info : { *(.IA_64.unwind_info*) }
+	.IA_64.unwind : { *(.IA_64.unwind*) }
+	/*
+	 * Create place-holder sections to hold the PLTs, GOT, and
+	 * official procedure-descriptors (.opd).
+	 */
+	.core.plt : { BYTE(0) }
+	.init.plt : { BYTE(0) }
+	.got : { BYTE(0) }
+	.opd : { BYTE(0) }
+}
diff --git a/arch/ia64/oprofile/Makefile b/arch/ia64/oprofile/Makefile
new file mode 100644
index 00000000..aad27a71
--- /dev/null
+++ b/arch/ia64/oprofile/Makefile
@@ -0,0 +1,10 @@
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
+		oprof.o cpu_buffer.o buffer_sync.o \
+		event_buffer.o oprofile_files.o \
+		oprofilefs.o oprofile_stats.o \
+		timer_int.o )
+
+oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
+oprofile-$(CONFIG_PERFMON) += perfmon.o
diff --git a/arch/ia64/oprofile/backtrace.c b/arch/ia64/oprofile/backtrace.c
new file mode 100644
index 00000000..f7b79899
--- /dev/null
+++ b/arch/ia64/oprofile/backtrace.c
@@ -0,0 +1,132 @@
+/**
+ * @file backtrace.c
+ *
+ * @remark Copyright 2004 Silicon Graphics Inc.  All Rights Reserved.
+ * @remark Read the file COPYING
+ *
+ * @author Greg Banks <gnb@melbourne.sgi.com>
+ * @author Keith Owens <kaos@melbourne.sgi.com>
+ * Based on work done for the ia64 port of the SGI kernprof patch, which is
+ *    Copyright (c) 2003-2004 Silicon Graphics Inc.  All Rights Reserved.
+ */
+
+#include <linux/oprofile.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+
+/*
+ * For IA64 we need to perform a complex little dance to get both
+ * the struct pt_regs and a synthetic struct switch_stack in place
+ * to allow the unwind code to work.  This dance requires our unwind
+ * using code to be called from a function called from unw_init_running().
+ * There we only get a single void* data pointer, so use this struct
+ * to hold all the data we need during the unwind.
+ */
+typedef struct
+{
+	unsigned int depth;
+	struct pt_regs *regs;
+	struct unw_frame_info frame;
+	unsigned long *prev_pfs_loc;	/* state for WAR for old spinlock ool code */
+} ia64_backtrace_t;
+
+/* Returns non-zero if the PC is in the Interrupt Vector Table */
+static __inline__ int in_ivt_code(unsigned long pc)
+{
+	extern char ia64_ivt[];
+	return (pc >= (u_long)ia64_ivt && pc < (u_long)ia64_ivt+32768);
+}
+
+/*
+ * Unwind to next stack frame.
+ */
+static __inline__ int next_frame(ia64_backtrace_t *bt)
+{
+	/*
+	 * Avoid unsightly console message from unw_unwind() when attempting
+	 * to unwind through the Interrupt Vector Table which has no unwind
+	 * information.
+	 */
+	if (in_ivt_code(bt->frame.ip))
+		return 0;
+
+	/*
+	 * WAR for spinlock contention from leaf functions.  ia64_spinlock_contention_pre3_4
+	 * has ar.pfs == r0.  Leaf functions do not modify ar.pfs so ar.pfs remains
+	 * as 0, stopping the backtrace.  Record the previous ar.pfs when the current
+	 * IP is in ia64_spinlock_contention_pre3_4 then unwind, if pfs_loc has not changed
+	 * after unwind then use pt_regs.ar_pfs which is where the real ar.pfs is for
+	 * leaf functions.
+	 */
+	if (bt->prev_pfs_loc && bt->regs && bt->frame.pfs_loc == bt->prev_pfs_loc)
+		bt->frame.pfs_loc = &bt->regs->ar_pfs;
+	bt->prev_pfs_loc = NULL;
+
+	return unw_unwind(&bt->frame) == 0;
+}
+
+
+static void do_ia64_backtrace(struct unw_frame_info *info, void *vdata)
+{
+	ia64_backtrace_t *bt = vdata;
+	struct switch_stack *sw;
+	int count = 0;
+	u_long pc, sp;
+
+	sw = (struct switch_stack *)(info+1);
+	/* padding from unw_init_running */
+	sw = (struct switch_stack *)(((unsigned long)sw + 15) & ~15);
+
+	unw_init_frame_info(&bt->frame, current, sw);
+
+	/* skip over interrupt frame and oprofile calls */
+	do {
+		unw_get_sp(&bt->frame, &sp);
+		if (sp >= (u_long)bt->regs)
+			break;
+		if (!next_frame(bt))
+			return;
+	} while (count++ < 200);
+
+	/* finally, grab the actual sample */
+	while (bt->depth-- && next_frame(bt)) {
+		unw_get_ip(&bt->frame, &pc);
+		oprofile_add_trace(pc);
+		if (unw_is_intr_frame(&bt->frame)) {
+			/*
+			 * Interrupt received on kernel stack; this can
+			 * happen when timer interrupt fires while processing
+			 * a softirq from the tail end of a hardware interrupt
+			 * which interrupted a system call.  Don't laugh, it
+			 * happens!  Splice the backtrace into two parts to
+			 * avoid spurious cycles in the gprof output.
+			 */
+			/* TODO: split rather than drop the 2nd half */
+			break;
+		}
+	}
+}
+
+void
+ia64_backtrace(struct pt_regs * const regs, unsigned int depth)
+{
+	ia64_backtrace_t bt;
+	unsigned long flags;
+
+	/*
+	 * On IA64 there is little hope of getting backtraces from
+	 * user space programs -- the problems of getting the unwind
+	 * information from arbitrary user programs are extreme.
+	 */
+	if (user_mode(regs))
+		return;
+
+	bt.depth = depth;
+	bt.regs = regs;
+	bt.prev_pfs_loc = NULL;
+	local_irq_save(flags);
+	unw_init_running(do_ia64_backtrace, &bt);
+	local_irq_restore(flags);
+}
diff --git a/arch/ia64/oprofile/init.c b/arch/ia64/oprofile/init.c
new file mode 100644
index 00000000..31b545c3
--- /dev/null
+++ b/arch/ia64/oprofile/init.c
@@ -0,0 +1,38 @@
+/**
+ * @file init.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/oprofile.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+ 
+extern int perfmon_init(struct oprofile_operations *ops);
+extern void perfmon_exit(void);
+extern void ia64_backtrace(struct pt_regs * const regs, unsigned int depth);
+
+int __init oprofile_arch_init(struct oprofile_operations *ops)
+{
+	int ret = -ENODEV;
+
+#ifdef CONFIG_PERFMON
+	/* perfmon_init() can fail, but we have no way to report it */
+	ret = perfmon_init(ops);
+#endif
+	ops->backtrace = ia64_backtrace;
+
+	return ret;
+}
+
+
+void oprofile_arch_exit(void)
+{
+#ifdef CONFIG_PERFMON
+	perfmon_exit();
+#endif
+}
diff --git a/arch/ia64/oprofile/perfmon.c b/arch/ia64/oprofile/perfmon.c
new file mode 100644
index 00000000..192d3e8e
--- /dev/null
+++ b/arch/ia64/oprofile/perfmon.c
@@ -0,0 +1,99 @@
+/**
+ * @file perfmon.c
+ *
+ * @remark Copyright 2003 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/oprofile.h>
+#include <linux/sched.h>
+#include <asm/perfmon.h>
+#include <asm/ptrace.h>
+#include <asm/errno.h>
+
+static int allow_ints;
+
+static int
+perfmon_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg,
+                struct pt_regs *regs, unsigned long stamp)
+{
+	int event = arg->pmd_eventid;
+ 
+	arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1;
+
+	/* the owner of the oprofile event buffer may have exited
+	 * without perfmon being shutdown (e.g. SIGSEGV)
+	 */
+	if (allow_ints)
+		oprofile_add_sample(regs, event);
+	return 0;
+}
+
+
+static int perfmon_start(void)
+{
+	allow_ints = 1;
+	return 0;
+}
+
+
+static void perfmon_stop(void)
+{
+	allow_ints = 0;
+}
+
+
+#define OPROFILE_FMT_UUID { \
+	0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69, 0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c }
+
+static pfm_buffer_fmt_t oprofile_fmt = {
+ 	.fmt_name 	    = "oprofile_format",
+ 	.fmt_uuid	    = OPROFILE_FMT_UUID,
+ 	.fmt_handler	    = perfmon_handler,
+};
+
+
+static char *get_cpu_type(void)
+{
+	__u8 family = local_cpu_data->family;
+
+	switch (family) {
+		case 0x07:
+			return "ia64/itanium";
+		case 0x1f:
+			return "ia64/itanium2";
+		default:
+			return "ia64/ia64";
+	}
+}
+
+
+/* all the ops are handled via userspace for IA64 perfmon */
+
+static int using_perfmon;
+
+int perfmon_init(struct oprofile_operations *ops)
+{
+	int ret = pfm_register_buffer_fmt(&oprofile_fmt);
+	if (ret)
+		return -ENODEV;
+
+	ops->cpu_type = get_cpu_type();
+	ops->start = perfmon_start;
+	ops->stop = perfmon_stop;
+	using_perfmon = 1;
+	printk(KERN_INFO "oprofile: using perfmon.\n");
+	return 0;
+}
+
+
+void perfmon_exit(void)
+{
+	if (!using_perfmon)
+		return;
+
+	pfm_unregister_buffer_fmt(oprofile_fmt.fmt_uuid);
+}
diff --git a/arch/ia64/pci/Makefile b/arch/ia64/pci/Makefile
new file mode 100644
index 00000000..fb14dc52
--- /dev/null
+++ b/arch/ia64/pci/Makefile
@@ -0,0 +1,4 @@
+#
+# Makefile for the ia64-specific parts of the pci bus
+#
+obj-y		:= pci.o fixup.o
diff --git a/arch/ia64/pci/fixup.c b/arch/ia64/pci/fixup.c
new file mode 100644
index 00000000..f5959c0c
--- /dev/null
+++ b/arch/ia64/pci/fixup.c
@@ -0,0 +1,69 @@
+/*
+ * Exceptions for specific devices. Usually work-arounds for fatal design flaws.
+ * Derived from fixup.c of i386 tree.
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+
+#include <asm/machvec.h>
+
+/*
+ * Fixup to mark boot BIOS video selected by BIOS before it changes
+ *
+ * From information provided by "Jon Smirl" <jonsmirl@gmail.com>
+ *
+ * The standard boot ROM sequence for an x86 machine uses the BIOS
+ * to select an initial video card for boot display. This boot video
+ * card will have it's BIOS copied to C0000 in system RAM.
+ * IORESOURCE_ROM_SHADOW is used to associate the boot video
+ * card with this copy. On laptops this copy has to be used since
+ * the main ROM may be compressed or combined with another image.
+ * See pci_map_rom() for use of this flag. IORESOURCE_ROM_SHADOW
+ * is marked here since the boot video device will be the only enabled
+ * video device at this point.
+ */
+
+static void __devinit pci_fixup_video(struct pci_dev *pdev)
+{
+	struct pci_dev *bridge;
+	struct pci_bus *bus;
+	u16 config;
+
+	if ((strcmp(platform_name, "dig") != 0)
+	    && (strcmp(platform_name, "hpzx1")  != 0))
+		return;
+	/* Maybe, this machine supports legacy memory map. */
+
+	if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
+		return;
+
+	/* Is VGA routed to us? */
+	bus = pdev->bus;
+	while (bus) {
+		bridge = bus->self;
+
+		/*
+		 * From information provided by
+		 * "David Miller" <davem@davemloft.net>
+		 * The bridge control register is valid for PCI header
+		 * type BRIDGE, or CARDBUS. Host to PCI controllers use
+		 * PCI header type NORMAL.
+		 */
+		if (bridge
+		    &&((bridge->hdr_type == PCI_HEADER_TYPE_BRIDGE)
+		       ||(bridge->hdr_type == PCI_HEADER_TYPE_CARDBUS))) {
+			pci_read_config_word(bridge, PCI_BRIDGE_CONTROL,
+						&config);
+			if (!(config & PCI_BRIDGE_CTL_VGA))
+				return;
+		}
+		bus = bus->parent;
+	}
+	pci_read_config_word(pdev, PCI_COMMAND, &config);
+	if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) {
+		pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW;
+		dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n");
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video);
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
new file mode 100644
index 00000000..aa2533ae
--- /dev/null
+++ b/arch/ia64/pci/pci.c
@@ -0,0 +1,797 @@
+/*
+ * pci.c - Low-Level PCI Access in IA-64
+ *
+ * Derived from bios32.c of i386 tree.
+ *
+ * (c) Copyright 2002, 2005 Hewlett-Packard Development Company, L.P.
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
+ * Copyright (C) 2004 Silicon Graphics, Inc.
+ *
+ * Note: Above list of copyright holders is incomplete...
+ */
+
+#include <linux/acpi.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/bootmem.h>
+
+#include <asm/machvec.h>
+#include <asm/page.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/sal.h>
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/hw_irq.h>
+
+/*
+ * Low-level SAL-based PCI configuration access functions. Note that SAL
+ * calls are already serialized (via sal_lock), so we don't need another
+ * synchronization mechanism here.
+ */
+
+#define PCI_SAL_ADDRESS(seg, bus, devfn, reg)		\
+	(((u64) seg << 24) | (bus << 16) | (devfn << 8) | (reg))
+
+/* SAL 3.2 adds support for extended config space. */
+
+#define PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg)	\
+	(((u64) seg << 28) | (bus << 20) | (devfn << 12) | (reg))
+
+int raw_pci_read(unsigned int seg, unsigned int bus, unsigned int devfn,
+	      int reg, int len, u32 *value)
+{
+	u64 addr, data = 0;
+	int mode, result;
+
+	if (!value || (seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095))
+		return -EINVAL;
+
+	if ((seg | reg) <= 255) {
+		addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg);
+		mode = 0;
+	} else if (sal_revision >= SAL_VERSION_CODE(3,2)) {
+		addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg);
+		mode = 1;
+	} else {
+		return -EINVAL;
+	}
+
+	result = ia64_sal_pci_config_read(addr, mode, len, &data);
+	if (result != 0)
+		return -EINVAL;
+
+	*value = (u32) data;
+	return 0;
+}
+
+int raw_pci_write(unsigned int seg, unsigned int bus, unsigned int devfn,
+	       int reg, int len, u32 value)
+{
+	u64 addr;
+	int mode, result;
+
+	if ((seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095))
+		return -EINVAL;
+
+	if ((seg | reg) <= 255) {
+		addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg);
+		mode = 0;
+	} else if (sal_revision >= SAL_VERSION_CODE(3,2)) {
+		addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg);
+		mode = 1;
+	} else {
+		return -EINVAL;
+	}
+	result = ia64_sal_pci_config_write(addr, mode, len, value);
+	if (result != 0)
+		return -EINVAL;
+	return 0;
+}
+
+static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
+							int size, u32 *value)
+{
+	return raw_pci_read(pci_domain_nr(bus), bus->number,
+				 devfn, where, size, value);
+}
+
+static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
+							int size, u32 value)
+{
+	return raw_pci_write(pci_domain_nr(bus), bus->number,
+				  devfn, where, size, value);
+}
+
+struct pci_ops pci_root_ops = {
+	.read = pci_read,
+	.write = pci_write,
+};
+
+/* Called by ACPI when it finds a new root bus.  */
+
+static struct pci_controller * __devinit
+alloc_pci_controller (int seg)
+{
+	struct pci_controller *controller;
+
+	controller = kzalloc(sizeof(*controller), GFP_KERNEL);
+	if (!controller)
+		return NULL;
+
+	controller->segment = seg;
+	controller->node = -1;
+	return controller;
+}
+
+struct pci_root_info {
+	struct acpi_device *bridge;
+	struct pci_controller *controller;
+	char *name;
+};
+
+static unsigned int
+new_space (u64 phys_base, int sparse)
+{
+	u64 mmio_base;
+	int i;
+
+	if (phys_base == 0)
+		return 0;	/* legacy I/O port space */
+
+	mmio_base = (u64) ioremap(phys_base, 0);
+	for (i = 0; i < num_io_spaces; i++)
+		if (io_space[i].mmio_base == mmio_base &&
+		    io_space[i].sparse == sparse)
+			return i;
+
+	if (num_io_spaces == MAX_IO_SPACES) {
+		printk(KERN_ERR "PCI: Too many IO port spaces "
+			"(MAX_IO_SPACES=%lu)\n", MAX_IO_SPACES);
+		return ~0;
+	}
+
+	i = num_io_spaces++;
+	io_space[i].mmio_base = mmio_base;
+	io_space[i].sparse = sparse;
+
+	return i;
+}
+
+static u64 __devinit
+add_io_space (struct pci_root_info *info, struct acpi_resource_address64 *addr)
+{
+	struct resource *resource;
+	char *name;
+	unsigned long base, min, max, base_port;
+	unsigned int sparse = 0, space_nr, len;
+
+	resource = kzalloc(sizeof(*resource), GFP_KERNEL);
+	if (!resource) {
+		printk(KERN_ERR "PCI: No memory for %s I/O port space\n",
+			info->name);
+		goto out;
+	}
+
+	len = strlen(info->name) + 32;
+	name = kzalloc(len, GFP_KERNEL);
+	if (!name) {
+		printk(KERN_ERR "PCI: No memory for %s I/O port space name\n",
+			info->name);
+		goto free_resource;
+	}
+
+	min = addr->minimum;
+	max = min + addr->address_length - 1;
+	if (addr->info.io.translation_type == ACPI_SPARSE_TRANSLATION)
+		sparse = 1;
+
+	space_nr = new_space(addr->translation_offset, sparse);
+	if (space_nr == ~0)
+		goto free_name;
+
+	base = __pa(io_space[space_nr].mmio_base);
+	base_port = IO_SPACE_BASE(space_nr);
+	snprintf(name, len, "%s I/O Ports %08lx-%08lx", info->name,
+		base_port + min, base_port + max);
+
+	/*
+	 * The SDM guarantees the legacy 0-64K space is sparse, but if the
+	 * mapping is done by the processor (not the bridge), ACPI may not
+	 * mark it as sparse.
+	 */
+	if (space_nr == 0)
+		sparse = 1;
+
+	resource->name  = name;
+	resource->flags = IORESOURCE_MEM;
+	resource->start = base + (sparse ? IO_SPACE_SPARSE_ENCODING(min) : min);
+	resource->end   = base + (sparse ? IO_SPACE_SPARSE_ENCODING(max) : max);
+	insert_resource(&iomem_resource, resource);
+
+	return base_port;
+
+free_name:
+	kfree(name);
+free_resource:
+	kfree(resource);
+out:
+	return ~0;
+}
+
+static acpi_status __devinit resource_to_window(struct acpi_resource *resource,
+	struct acpi_resource_address64 *addr)
+{
+	acpi_status status;
+
+	/*
+	 * We're only interested in _CRS descriptors that are
+	 *	- address space descriptors for memory or I/O space
+	 *	- non-zero size
+	 *	- producers, i.e., the address space is routed downstream,
+	 *	  not consumed by the bridge itself
+	 */
+	status = acpi_resource_to_address64(resource, addr);
+	if (ACPI_SUCCESS(status) &&
+	    (addr->resource_type == ACPI_MEMORY_RANGE ||
+	     addr->resource_type == ACPI_IO_RANGE) &&
+	    addr->address_length &&
+	    addr->producer_consumer == ACPI_PRODUCER)
+		return AE_OK;
+
+	return AE_ERROR;
+}
+
+static acpi_status __devinit
+count_window (struct acpi_resource *resource, void *data)
+{
+	unsigned int *windows = (unsigned int *) data;
+	struct acpi_resource_address64 addr;
+	acpi_status status;
+
+	status = resource_to_window(resource, &addr);
+	if (ACPI_SUCCESS(status))
+		(*windows)++;
+
+	return AE_OK;
+}
+
+static __devinit acpi_status add_window(struct acpi_resource *res, void *data)
+{
+	struct pci_root_info *info = data;
+	struct pci_window *window;
+	struct acpi_resource_address64 addr;
+	acpi_status status;
+	unsigned long flags, offset = 0;
+	struct resource *root;
+
+	/* Return AE_OK for non-window resources to keep scanning for more */
+	status = resource_to_window(res, &addr);
+	if (!ACPI_SUCCESS(status))
+		return AE_OK;
+
+	if (addr.resource_type == ACPI_MEMORY_RANGE) {
+		flags = IORESOURCE_MEM;
+		root = &iomem_resource;
+		offset = addr.translation_offset;
+	} else if (addr.resource_type == ACPI_IO_RANGE) {
+		flags = IORESOURCE_IO;
+		root = &ioport_resource;
+		offset = add_io_space(info, &addr);
+		if (offset == ~0)
+			return AE_OK;
+	} else
+		return AE_OK;
+
+	window = &info->controller->window[info->controller->windows++];
+	window->resource.name = info->name;
+	window->resource.flags = flags;
+	window->resource.start = addr.minimum + offset;
+	window->resource.end = window->resource.start + addr.address_length - 1;
+	window->resource.child = NULL;
+	window->offset = offset;
+
+	if (insert_resource(root, &window->resource)) {
+		dev_err(&info->bridge->dev,
+			"can't allocate host bridge window %pR\n",
+			&window->resource);
+	} else {
+		if (offset)
+			dev_info(&info->bridge->dev, "host bridge window %pR "
+				 "(PCI address [%#llx-%#llx])\n",
+				 &window->resource,
+				 window->resource.start - offset,
+				 window->resource.end - offset);
+		else
+			dev_info(&info->bridge->dev,
+				 "host bridge window %pR\n",
+				 &window->resource);
+	}
+
+	return AE_OK;
+}
+
+static void __devinit
+pcibios_setup_root_windows(struct pci_bus *bus, struct pci_controller *ctrl)
+{
+	int i;
+
+	pci_bus_remove_resources(bus);
+	for (i = 0; i < ctrl->windows; i++) {
+		struct resource *res = &ctrl->window[i].resource;
+		/* HP's firmware has a hack to work around a Windows bug.
+		 * Ignore these tiny memory ranges */
+		if ((res->flags & IORESOURCE_MEM) &&
+		    (res->end - res->start < 16))
+			continue;
+		pci_bus_add_resource(bus, res, 0);
+	}
+}
+
+struct pci_bus * __devinit
+pci_acpi_scan_root(struct acpi_pci_root *root)
+{
+	struct acpi_device *device = root->device;
+	int domain = root->segment;
+	int bus = root->secondary.start;
+	struct pci_controller *controller;
+	unsigned int windows = 0;
+	struct pci_bus *pbus;
+	char *name;
+	int pxm;
+
+	controller = alloc_pci_controller(domain);
+	if (!controller)
+		goto out1;
+
+	controller->acpi_handle = device->handle;
+
+	pxm = acpi_get_pxm(controller->acpi_handle);
+#ifdef CONFIG_NUMA
+	if (pxm >= 0)
+		controller->node = pxm_to_node(pxm);
+#endif
+
+	acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window,
+			&windows);
+	if (windows) {
+		struct pci_root_info info;
+
+		controller->window =
+			kmalloc_node(sizeof(*controller->window) * windows,
+				     GFP_KERNEL, controller->node);
+		if (!controller->window)
+			goto out2;
+
+		name = kmalloc(16, GFP_KERNEL);
+		if (!name)
+			goto out3;
+
+		sprintf(name, "PCI Bus %04x:%02x", domain, bus);
+		info.bridge = device;
+		info.controller = controller;
+		info.name = name;
+		acpi_walk_resources(device->handle, METHOD_NAME__CRS,
+			add_window, &info);
+	}
+	/*
+	 * See arch/x86/pci/acpi.c.
+	 * The desired pci bus might already be scanned in a quirk. We
+	 * should handle the case here, but it appears that IA64 hasn't
+	 * such quirk. So we just ignore the case now.
+	 */
+	pbus = pci_scan_bus_parented(NULL, bus, &pci_root_ops, controller);
+
+	return pbus;
+
+out3:
+	kfree(controller->window);
+out2:
+	kfree(controller);
+out1:
+	return NULL;
+}
+
+void pcibios_resource_to_bus(struct pci_dev *dev,
+		struct pci_bus_region *region, struct resource *res)
+{
+	struct pci_controller *controller = PCI_CONTROLLER(dev);
+	unsigned long offset = 0;
+	int i;
+
+	for (i = 0; i < controller->windows; i++) {
+		struct pci_window *window = &controller->window[i];
+		if (!(window->resource.flags & res->flags))
+			continue;
+		if (window->resource.start > res->start)
+			continue;
+		if (window->resource.end < res->end)
+			continue;
+		offset = window->offset;
+		break;
+	}
+
+	region->start = res->start - offset;
+	region->end = res->end - offset;
+}
+EXPORT_SYMBOL(pcibios_resource_to_bus);
+
+void pcibios_bus_to_resource(struct pci_dev *dev,
+		struct resource *res, struct pci_bus_region *region)
+{
+	struct pci_controller *controller = PCI_CONTROLLER(dev);
+	unsigned long offset = 0;
+	int i;
+
+	for (i = 0; i < controller->windows; i++) {
+		struct pci_window *window = &controller->window[i];
+		if (!(window->resource.flags & res->flags))
+			continue;
+		if (window->resource.start - window->offset > region->start)
+			continue;
+		if (window->resource.end - window->offset < region->end)
+			continue;
+		offset = window->offset;
+		break;
+	}
+
+	res->start = region->start + offset;
+	res->end = region->end + offset;
+}
+EXPORT_SYMBOL(pcibios_bus_to_resource);
+
+static int __devinit is_valid_resource(struct pci_dev *dev, int idx)
+{
+	unsigned int i, type_mask = IORESOURCE_IO | IORESOURCE_MEM;
+	struct resource *devr = &dev->resource[idx], *busr;
+
+	if (!dev->bus)
+		return 0;
+
+	pci_bus_for_each_resource(dev->bus, busr, i) {
+		if (!busr || ((busr->flags ^ devr->flags) & type_mask))
+			continue;
+		if ((devr->start) && (devr->start >= busr->start) &&
+				(devr->end <= busr->end))
+			return 1;
+	}
+	return 0;
+}
+
+static void __devinit
+pcibios_fixup_resources(struct pci_dev *dev, int start, int limit)
+{
+	struct pci_bus_region region;
+	int i;
+
+	for (i = start; i < limit; i++) {
+		if (!dev->resource[i].flags)
+			continue;
+		region.start = dev->resource[i].start;
+		region.end = dev->resource[i].end;
+		pcibios_bus_to_resource(dev, &dev->resource[i], &region);
+		if ((is_valid_resource(dev, i)))
+			pci_claim_resource(dev, i);
+	}
+}
+
+void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
+{
+	pcibios_fixup_resources(dev, 0, PCI_BRIDGE_RESOURCES);
+}
+EXPORT_SYMBOL_GPL(pcibios_fixup_device_resources);
+
+static void __devinit pcibios_fixup_bridge_resources(struct pci_dev *dev)
+{
+	pcibios_fixup_resources(dev, PCI_BRIDGE_RESOURCES, PCI_NUM_RESOURCES);
+}
+
+/*
+ *  Called after each bus is probed, but before its children are examined.
+ */
+void __devinit
+pcibios_fixup_bus (struct pci_bus *b)
+{
+	struct pci_dev *dev;
+
+	if (b->self) {
+		pci_read_bridge_bases(b);
+		pcibios_fixup_bridge_resources(b->self);
+	} else {
+		pcibios_setup_root_windows(b, b->sysdata);
+	}
+	list_for_each_entry(dev, &b->devices, bus_list)
+		pcibios_fixup_device_resources(dev);
+	platform_pci_fixup_bus(b);
+
+	return;
+}
+
+void __devinit
+pcibios_update_irq (struct pci_dev *dev, int irq)
+{
+	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
+
+	/* ??? FIXME -- record old value for shutdown.  */
+}
+
+int
+pcibios_enable_device (struct pci_dev *dev, int mask)
+{
+	int ret;
+
+	ret = pci_enable_resources(dev, mask);
+	if (ret < 0)
+		return ret;
+
+	if (!dev->msi_enabled)
+		return acpi_pci_irq_enable(dev);
+	return 0;
+}
+
+void
+pcibios_disable_device (struct pci_dev *dev)
+{
+	BUG_ON(atomic_read(&dev->enable_cnt));
+	if (!dev->msi_enabled)
+		acpi_pci_irq_disable(dev);
+}
+
+resource_size_t
+pcibios_align_resource (void *data, const struct resource *res,
+		        resource_size_t size, resource_size_t align)
+{
+	return res->start;
+}
+
+/*
+ * PCI BIOS setup, always defaults to SAL interface
+ */
+char * __init
+pcibios_setup (char *str)
+{
+	return str;
+}
+
+int
+pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma,
+		     enum pci_mmap_state mmap_state, int write_combine)
+{
+	unsigned long size = vma->vm_end - vma->vm_start;
+	pgprot_t prot;
+
+	/*
+	 * I/O space cannot be accessed via normal processor loads and
+	 * stores on this platform.
+	 */
+	if (mmap_state == pci_mmap_io)
+		/*
+		 * XXX we could relax this for I/O spaces for which ACPI
+		 * indicates that the space is 1-to-1 mapped.  But at the
+		 * moment, we don't support multiple PCI address spaces and
+		 * the legacy I/O space is not 1-to-1 mapped, so this is moot.
+		 */
+		return -EINVAL;
+
+	if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
+		return -EINVAL;
+
+	prot = phys_mem_access_prot(NULL, vma->vm_pgoff, size,
+				    vma->vm_page_prot);
+
+	/*
+	 * If the user requested WC, the kernel uses UC or WC for this region,
+	 * and the chipset supports WC, we can use WC. Otherwise, we have to
+	 * use the same attribute the kernel uses.
+	 */
+	if (write_combine &&
+	    ((pgprot_val(prot) & _PAGE_MA_MASK) == _PAGE_MA_UC ||
+	     (pgprot_val(prot) & _PAGE_MA_MASK) == _PAGE_MA_WC) &&
+	    efi_range_is_wc(vma->vm_start, vma->vm_end - vma->vm_start))
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	else
+		vma->vm_page_prot = prot;
+
+	if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			     vma->vm_end - vma->vm_start, vma->vm_page_prot))
+		return -EAGAIN;
+
+	return 0;
+}
+
+/**
+ * ia64_pci_get_legacy_mem - generic legacy mem routine
+ * @bus: bus to get legacy memory base address for
+ *
+ * Find the base of legacy memory for @bus.  This is typically the first
+ * megabyte of bus address space for @bus or is simply 0 on platforms whose
+ * chipsets support legacy I/O and memory routing.  Returns the base address
+ * or an error pointer if an error occurred.
+ *
+ * This is the ia64 generic version of this routine.  Other platforms
+ * are free to override it with a machine vector.
+ */
+char *ia64_pci_get_legacy_mem(struct pci_bus *bus)
+{
+	return (char *)__IA64_UNCACHED_OFFSET;
+}
+
+/**
+ * pci_mmap_legacy_page_range - map legacy memory space to userland
+ * @bus: bus whose legacy space we're mapping
+ * @vma: vma passed in by mmap
+ *
+ * Map legacy memory space for this device back to userspace using a machine
+ * vector to get the base address.
+ */
+int
+pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma,
+			   enum pci_mmap_state mmap_state)
+{
+	unsigned long size = vma->vm_end - vma->vm_start;
+	pgprot_t prot;
+	char *addr;
+
+	/* We only support mmap'ing of legacy memory space */
+	if (mmap_state != pci_mmap_mem)
+		return -ENOSYS;
+
+	/*
+	 * Avoid attribute aliasing.  See Documentation/ia64/aliasing.txt
+	 * for more details.
+	 */
+	if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
+		return -EINVAL;
+	prot = phys_mem_access_prot(NULL, vma->vm_pgoff, size,
+				    vma->vm_page_prot);
+
+	addr = pci_get_legacy_mem(bus);
+	if (IS_ERR(addr))
+		return PTR_ERR(addr);
+
+	vma->vm_pgoff += (unsigned long)addr >> PAGE_SHIFT;
+	vma->vm_page_prot = prot;
+
+	if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			    size, vma->vm_page_prot))
+		return -EAGAIN;
+
+	return 0;
+}
+
+/**
+ * ia64_pci_legacy_read - read from legacy I/O space
+ * @bus: bus to read
+ * @port: legacy port value
+ * @val: caller allocated storage for returned value
+ * @size: number of bytes to read
+ *
+ * Simply reads @size bytes from @port and puts the result in @val.
+ *
+ * Again, this (and the write routine) are generic versions that can be
+ * overridden by the platform.  This is necessary on platforms that don't
+ * support legacy I/O routing or that hard fail on legacy I/O timeouts.
+ */
+int ia64_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size)
+{
+	int ret = size;
+
+	switch (size) {
+	case 1:
+		*val = inb(port);
+		break;
+	case 2:
+		*val = inw(port);
+		break;
+	case 4:
+		*val = inl(port);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+/**
+ * ia64_pci_legacy_write - perform a legacy I/O write
+ * @bus: bus pointer
+ * @port: port to write
+ * @val: value to write
+ * @size: number of bytes to write from @val
+ *
+ * Simply writes @size bytes of @val to @port.
+ */
+int ia64_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size)
+{
+	int ret = size;
+
+	switch (size) {
+	case 1:
+		outb(val, port);
+		break;
+	case 2:
+		outw(val, port);
+		break;
+	case 4:
+		outl(val, port);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+/**
+ * set_pci_cacheline_size - determine cacheline size for PCI devices
+ *
+ * We want to use the line-size of the outer-most cache.  We assume
+ * that this line-size is the same for all CPUs.
+ *
+ * Code mostly taken from arch/ia64/kernel/palinfo.c:cache_info().
+ */
+static void __init set_pci_dfl_cacheline_size(void)
+{
+	unsigned long levels, unique_caches;
+	long status;
+	pal_cache_config_info_t cci;
+
+	status = ia64_pal_cache_summary(&levels, &unique_caches);
+	if (status != 0) {
+		printk(KERN_ERR "%s: ia64_pal_cache_summary() failed "
+			"(status=%ld)\n", __func__, status);
+		return;
+	}
+
+	status = ia64_pal_cache_config_info(levels - 1,
+				/* cache_type (data_or_unified)= */ 2, &cci);
+	if (status != 0) {
+		printk(KERN_ERR "%s: ia64_pal_cache_config_info() failed "
+			"(status=%ld)\n", __func__, status);
+		return;
+	}
+	pci_dfl_cache_line_size = (1 << cci.pcci_line_size) / 4;
+}
+
+u64 ia64_dma_get_required_mask(struct device *dev)
+{
+	u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT);
+	u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT));
+	u64 mask;
+
+	if (!high_totalram) {
+		/* convert to mask just covering totalram */
+		low_totalram = (1 << (fls(low_totalram) - 1));
+		low_totalram += low_totalram - 1;
+		mask = low_totalram;
+	} else {
+		high_totalram = (1 << (fls(high_totalram) - 1));
+		high_totalram += high_totalram - 1;
+		mask = (((u64)high_totalram) << 32) + 0xffffffff;
+	}
+	return mask;
+}
+EXPORT_SYMBOL_GPL(ia64_dma_get_required_mask);
+
+u64 dma_get_required_mask(struct device *dev)
+{
+	return platform_dma_get_required_mask(dev);
+}
+EXPORT_SYMBOL_GPL(dma_get_required_mask);
+
+static int __init pcibios_init(void)
+{
+	set_pci_dfl_cacheline_size();
+	return 0;
+}
+
+subsys_initcall(pcibios_init);
diff --git a/arch/ia64/scripts/check-gas b/arch/ia64/scripts/check-gas
new file mode 100755
index 00000000..2499e0b2
--- /dev/null
+++ b/arch/ia64/scripts/check-gas
@@ -0,0 +1,15 @@
+#!/bin/sh
+dir=$(dirname $0)
+CC=$1
+OBJDUMP=$2
+tmp=${TMPDIR:-/tmp}
+out=$tmp/out$$.o
+$CC -c $dir/check-gas-asm.S -o $out
+res=$($OBJDUMP -r --section .data $out | fgrep 00004 | tr -s ' ' |cut -f3 -d' ')
+rm -f $out
+if [ $res != ".text" ]; then
+	echo buggy
+else
+	echo good
+fi
+exit 0
diff --git a/arch/ia64/scripts/check-gas-asm.S b/arch/ia64/scripts/check-gas-asm.S
new file mode 100644
index 00000000..010e1d22
--- /dev/null
+++ b/arch/ia64/scripts/check-gas-asm.S
@@ -0,0 +1,2 @@
+[1:]	nop 0
+	.xdata4 ".data", 0, 1b-.
diff --git a/arch/ia64/scripts/check-model.c b/arch/ia64/scripts/check-model.c
new file mode 100644
index 00000000..e1d4e86e
--- /dev/null
+++ b/arch/ia64/scripts/check-model.c
@@ -0,0 +1 @@
+int __attribute__ ((__model__ (__small__))) x;
diff --git a/arch/ia64/scripts/check-segrel.S b/arch/ia64/scripts/check-segrel.S
new file mode 100644
index 00000000..3be4e3db
--- /dev/null
+++ b/arch/ia64/scripts/check-segrel.S
@@ -0,0 +1,4 @@
+	.rodata
+	data4 @segrel(start)
+	.data
+start:
diff --git a/arch/ia64/scripts/check-segrel.lds b/arch/ia64/scripts/check-segrel.lds
new file mode 100644
index 00000000..85a0d54f
--- /dev/null
+++ b/arch/ia64/scripts/check-segrel.lds
@@ -0,0 +1,12 @@
+SECTIONS {
+	. = SIZEOF_HEADERS;
+	.rodata : { *(.rodata) } :ro
+	.note : { *(.note*) }
+	. = 0xa0000;
+	.data : { *(.data) } :dat
+	/DISCARD/ : { *(*) }
+}
+PHDRS {
+  ro PT_LOAD FILEHDR PHDRS;
+  dat PT_LOAD;
+}
diff --git a/arch/ia64/scripts/check-serialize.S b/arch/ia64/scripts/check-serialize.S
new file mode 100644
index 00000000..0400c106
--- /dev/null
+++ b/arch/ia64/scripts/check-serialize.S
@@ -0,0 +1,2 @@
+	.serialize.data
+	.serialize.instruction
diff --git a/arch/ia64/scripts/check-text-align.S b/arch/ia64/scripts/check-text-align.S
new file mode 100644
index 00000000..03f586ab
--- /dev/null
+++ b/arch/ia64/scripts/check-text-align.S
@@ -0,0 +1,6 @@
+	.proc foo
+	.prologue
+foo:	.save rp, r2
+	nop 0
+	.align 64
+	.endp foo
diff --git a/arch/ia64/scripts/pvcheck.sed b/arch/ia64/scripts/pvcheck.sed
new file mode 100644
index 00000000..e59809a3
--- /dev/null
+++ b/arch/ia64/scripts/pvcheck.sed
@@ -0,0 +1,33 @@
+#
+# Checker for paravirtualizations of privileged operations.
+#
+s/ssm.*psr\.ic.*/.warning \"ssm psr.ic should not be used directly\"/g
+s/rsm.*psr\.ic.*/.warning \"rsm psr.ic should not be used directly\"/g
+s/ssm.*psr\.i.*/.warning \"ssm psr.i should not be used directly\"/g
+s/rsm.*psr\.i.*/.warning \"rsm psr.i should not be used directly\"/g
+s/ssm.*psr\.dt.*/.warning \"ssm psr.dt should not be used directly\"/g
+s/rsm.*psr\.dt.*/.warning \"rsm psr.dt should not be used directly\"/g
+s/mov.*=.*cr\.ifa/.warning \"cr.ifa should not used directly\"/g
+s/mov.*=.*cr\.itir/.warning \"cr.itir should not used directly\"/g
+s/mov.*=.*cr\.isr/.warning \"cr.isr should not used directly\"/g
+s/mov.*=.*cr\.iha/.warning \"cr.iha should not used directly\"/g
+s/mov.*=.*cr\.ipsr/.warning \"cr.ipsr should not used directly\"/g
+s/mov.*=.*cr\.iim/.warning \"cr.iim should not used directly\"/g
+s/mov.*=.*cr\.iip/.warning \"cr.iip should not used directly\"/g
+s/mov.*=.*cr\.ivr/.warning \"cr.ivr should not used directly\"/g
+s/mov.*=[^\.]*psr/.warning \"psr should not used directly\"/g	# avoid ar.fpsr
+s/mov.*=.*ar\.eflags/.warning \"ar.eflags should not used directly\"/g
+s/mov.*=.*ar\.itc.*/.warning \"ar.itc should not used directly\"/g
+s/mov.*cr\.ifa.*=.*/.warning \"cr.ifa should not used directly\"/g
+s/mov.*cr\.itir.*=.*/.warning \"cr.itir should not used directly\"/g
+s/mov.*cr\.iha.*=.*/.warning \"cr.iha should not used directly\"/g
+s/mov.*cr\.ipsr.*=.*/.warning \"cr.ipsr should not used directly\"/g
+s/mov.*cr\.ifs.*=.*/.warning \"cr.ifs should not used directly\"/g
+s/mov.*cr\.iip.*=.*/.warning \"cr.iip should not used directly\"/g
+s/mov.*cr\.kr.*=.*/.warning \"cr.kr should not used directly\"/g
+s/mov.*ar\.eflags.*=.*/.warning \"ar.eflags should not used directly\"/g
+s/itc\.i.*/.warning \"itc.i should not be used directly.\"/g
+s/itc\.d.*/.warning \"itc.d should not be used directly.\"/g
+s/bsw\.0/.warning \"bsw.0 should not be used directly.\"/g
+s/bsw\.1/.warning \"bsw.1 should not be used directly.\"/g
+s/ptc\.ga.*/.warning \"ptc.ga should not be used directly.\"/g
diff --git a/arch/ia64/scripts/toolchain-flags b/arch/ia64/scripts/toolchain-flags
new file mode 100755
index 00000000..3f0c2ada
--- /dev/null
+++ b/arch/ia64/scripts/toolchain-flags
@@ -0,0 +1,53 @@
+#!/bin/sh
+#
+# Check whether linker can handle cross-segment @segrel():
+#
+CPPFLAGS=""
+CC=$1
+OBJDUMP=$2
+READELF=$3
+dir=$(dirname $0)
+tmp=${TMPDIR:-/tmp}
+out=$tmp/out$$
+
+# Check whether cross-segment segment-relative relocs work fine.  We need
+# that for building the gate DSO:
+
+$CC -nostdlib -static -Wl,-T$dir/check-segrel.lds $dir/check-segrel.S -o $out
+res=$($OBJDUMP --full --section .rodata $out | fgrep 000 | cut -f3 -d' ')
+rm -f $out
+if [ $res != 00000a00 ]; then
+    CPPFLAGS="$CPPFLAGS -DHAVE_BUGGY_SEGREL"
+    cat >&2 <<EOF
+warning: your linker cannot handle cross-segment segment-relative relocations.
+         please upgrade to a newer version (it is safe to use this linker, but
+         the kernel will be bigger than strictly necessary).
+EOF
+fi
+
+# Check whether .align inside a function works as expected.
+
+$CC -c $dir/check-text-align.S -o $out
+$READELF -u $out | fgrep -q 'prologue(rlen=12)'
+res=$?
+rm -f $out
+if [ $res -eq 0 ]; then
+    CPPFLAGS="$CPPFLAGS -DHAVE_WORKING_TEXT_ALIGN"
+fi
+
+if ! $CC -c $dir/check-model.c -o $out 2>&1 | grep  __model__ | grep -q attrib
+then
+    CPPFLAGS="$CPPFLAGS -DHAVE_MODEL_SMALL_ATTRIBUTE"
+fi
+rm -f $out
+
+# Check whether assembler supports .serialize.{data,instruction} directive.
+
+$CC -c $dir/check-serialize.S -o $out 2>/dev/null
+res=$?
+rm -f $out
+if [ $res -eq 0 ]; then
+    CPPFLAGS="$CPPFLAGS -DHAVE_SERIALIZE_DIRECTIVE"
+fi
+
+echo $CPPFLAGS
diff --git a/arch/ia64/scripts/unwcheck.py b/arch/ia64/scripts/unwcheck.py
new file mode 100644
index 00000000..2bfd941f
--- /dev/null
+++ b/arch/ia64/scripts/unwcheck.py
@@ -0,0 +1,64 @@
+#!/usr/bin/python
+#
+# Usage: unwcheck.py FILE
+#
+# This script checks the unwind info of each function in file FILE
+# and verifies that the sum of the region-lengths matches the total
+# length of the function.
+#
+# Based on a shell/awk script originally written by Harish Patil,
+# which was converted to Perl by Matthew Chapman, which was converted
+# to Python by David Mosberger.
+#
+import os
+import re
+import sys
+
+if len(sys.argv) != 2:
+    print "Usage: %s FILE" % sys.argv[0]
+    sys.exit(2)
+
+readelf = os.getenv("READELF", "readelf")
+
+start_pattern = re.compile("<([^>]*)>: \[0x([0-9a-f]+)-0x([0-9a-f]+)\]")
+rlen_pattern  = re.compile(".*rlen=([0-9]+)")
+
+def check_func (func, slots, rlen_sum):
+    if slots != rlen_sum:
+        global num_errors
+        num_errors += 1
+        if not func: func = "[%#x-%#x]" % (start, end)
+        print "ERROR: %s: %lu slots, total region length = %lu" % (func, slots, rlen_sum)
+    return
+
+num_funcs = 0
+num_errors = 0
+func = False
+slots = 0
+rlen_sum = 0
+for line in os.popen("%s -u %s" % (readelf, sys.argv[1])):
+    m = start_pattern.match(line)
+    if m:
+        check_func(func, slots, rlen_sum)
+
+        func  = m.group(1)
+        start = long(m.group(2), 16)
+        end   = long(m.group(3), 16)
+        slots = 3 * (end - start) / 16
+        rlen_sum = 0L
+        num_funcs += 1
+    else:
+        m = rlen_pattern.match(line)
+        if m:
+            rlen_sum += long(m.group(1))
+check_func(func, slots, rlen_sum)
+
+if num_errors == 0:
+    print "No errors detected in %u functions." % num_funcs
+else:
+    if num_errors > 1:
+        err="errors"
+    else:
+        err="error"
+    print "%u %s detected in %u functions." % (num_errors, err, num_funcs)
+    sys.exit(1)
diff --git a/arch/ia64/sn/Makefile b/arch/ia64/sn/Makefile
new file mode 100644
index 00000000..79a7df02
--- /dev/null
+++ b/arch/ia64/sn/Makefile
@@ -0,0 +1,12 @@
+# arch/ia64/sn/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2004 Silicon Graphics, Inc.  All Rights Reserved.
+#
+# Makefile for the sn ia64 subplatform
+#
+
+obj-y += kernel/ pci/
diff --git a/arch/ia64/sn/include/ioerror.h b/arch/ia64/sn/include/ioerror.h
new file mode 100644
index 00000000..e68f2b07
--- /dev/null
+++ b/arch/ia64/sn/include/ioerror.h
@@ -0,0 +1,81 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2003 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_IOERROR_H
+#define _ASM_IA64_SN_IOERROR_H
+
+/*
+ * IO error structure.
+ *
+ * This structure would expand to hold the information retrieved from
+ * all IO related error registers.
+ *
+ * This structure is defined to hold all system specific
+ * information related to a single error.
+ *
+ * This serves a couple of purpose.
+ *      - Error handling often involves translating one form of address to other
+ *        form. So, instead of having different data structures at each level,
+ *        we have a single structure, and the appropriate fields get filled in
+ *        at each layer.
+ *      - This provides a way to dump all error related information in any layer
+ *        of erorr handling (debugging aid).
+ *
+ * A second possibility is to allow each layer to define its own error
+ * data structure, and fill in the proper fields. This has the advantage
+ * of isolating the layers.
+ * A big concern is the potential stack usage (and overflow), if each layer
+ * defines these structures on stack (assuming we don't want to do kmalloc.
+ *
+ * Any layer wishing to pass extra information to a layer next to it in
+ * error handling hierarchy, can do so as a separate parameter.
+ */
+
+typedef struct io_error_s {
+    /* Bit fields indicating which structure fields are valid */
+    union {
+	struct {
+	    unsigned                ievb_errortype:1;
+	    unsigned                ievb_widgetnum:1;
+	    unsigned                ievb_widgetdev:1;
+	    unsigned                ievb_srccpu:1;
+	    unsigned                ievb_srcnode:1;
+	    unsigned                ievb_errnode:1;
+	    unsigned                ievb_sysioaddr:1;
+	    unsigned                ievb_xtalkaddr:1;
+	    unsigned                ievb_busspace:1;
+	    unsigned                ievb_busaddr:1;
+	    unsigned                ievb_vaddr:1;
+	    unsigned                ievb_memaddr:1;
+	    unsigned		    ievb_epc:1;
+	    unsigned		    ievb_ef:1;
+	    unsigned		    ievb_tnum:1;
+	} iev_b;
+	unsigned                iev_a;
+    } ie_v;
+
+    short                   ie_errortype;	/* error type: extra info about error */
+    short                   ie_widgetnum;	/* Widget number that's in error */
+    short                   ie_widgetdev;	/* Device within widget in error */
+    cpuid_t                 ie_srccpu;	/* CPU on srcnode generating error */
+    cnodeid_t               ie_srcnode;		/* Node which caused the error   */
+    cnodeid_t               ie_errnode;		/* Node where error was noticed  */
+    iopaddr_t               ie_sysioaddr;	/* Sys specific IO address       */
+    iopaddr_t               ie_xtalkaddr;	/* Xtalk (48bit) addr of Error   */
+    iopaddr_t               ie_busspace;	/* Bus specific address space    */
+    iopaddr_t               ie_busaddr;		/* Bus specific address          */
+    caddr_t                 ie_vaddr;	/* Virtual address of error      */
+    iopaddr_t               ie_memaddr;		/* Physical memory address       */
+    caddr_t		    ie_epc;		/* pc when error reported	 */
+    caddr_t		    ie_ef;		/* eframe when error reported	 */
+    short		    ie_tnum;		/* Xtalk TNUM field */
+} ioerror_t;
+
+#define	IOERROR_INIT(e)		do { (e)->ie_v.iev_a = 0; } while (0)
+#define	IOERROR_SETVALUE(e,f,v)	do { (e)->ie_ ## f = (v); (e)->ie_v.iev_b.ievb_ ## f = 1; } while (0)
+
+#endif /* _ASM_IA64_SN_IOERROR_H */
diff --git a/arch/ia64/sn/include/tio.h b/arch/ia64/sn/include/tio.h
new file mode 100644
index 00000000..6b2e7b75
--- /dev/null
+++ b/arch/ia64/sn/include/tio.h
@@ -0,0 +1,41 @@
+/* 
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_TIO_H
+#define _ASM_IA64_SN_TIO_H
+
+#define	TIO_MMR_ADDR_MOD
+
+#define TIO_NODE_ID     TIO_MMR_ADDR_MOD(0x0000000090060e80)
+
+#define TIO_ITTE_BASE   0xb0008800        /* base of translation table entries */
+#define TIO_ITTE(bigwin)        (TIO_ITTE_BASE + 8*(bigwin))
+
+#define TIO_ITTE_OFFSET_BITS    8       /* size of offset field */
+#define TIO_ITTE_OFFSET_MASK    ((1<<TIO_ITTE_OFFSET_BITS)-1)
+#define TIO_ITTE_OFFSET_SHIFT   0
+
+#define TIO_ITTE_WIDGET_BITS    2       /* size of widget field */
+#define TIO_ITTE_WIDGET_MASK    ((1<<TIO_ITTE_WIDGET_BITS)-1)
+#define TIO_ITTE_WIDGET_SHIFT   12
+#define TIO_ITTE_VALID_MASK	0x1
+#define TIO_ITTE_VALID_SHIFT	16
+
+#define TIO_ITTE_WIDGET(itte) \
+	(((itte) >> TIO_ITTE_WIDGET_SHIFT) & TIO_ITTE_WIDGET_MASK)
+#define TIO_ITTE_VALID(itte) \
+	(((itte) >> TIO_ITTE_VALID_SHIFT) & TIO_ITTE_VALID_MASK)
+
+#define TIO_ITTE_PUT(nasid, bigwin, widget, addr, valid) \
+        REMOTE_HUB_S((nasid), TIO_ITTE(bigwin), \
+                (((((addr) >> TIO_BWIN_SIZE_BITS) & \
+                   TIO_ITTE_OFFSET_MASK) << TIO_ITTE_OFFSET_SHIFT) | \
+                (((widget) & TIO_ITTE_WIDGET_MASK) << TIO_ITTE_WIDGET_SHIFT)) | \
+		(( (valid) & TIO_ITTE_VALID_MASK) << TIO_ITTE_VALID_SHIFT))
+
+#endif /*  _ASM_IA64_SN_TIO_H */
diff --git a/arch/ia64/sn/include/xtalk/hubdev.h b/arch/ia64/sn/include/xtalk/hubdev.h
new file mode 100644
index 00000000..8182583c
--- /dev/null
+++ b/arch/ia64/sn/include/xtalk/hubdev.h
@@ -0,0 +1,91 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_XTALK_HUBDEV_H
+#define _ASM_IA64_SN_XTALK_HUBDEV_H
+
+#include "xtalk/xwidgetdev.h"
+
+#define HUB_WIDGET_ID_MAX 0xf
+#define DEV_PER_WIDGET (2*2*8)
+#define IIO_ITTE_WIDGET_BITS    4       /* size of widget field */
+#define IIO_ITTE_WIDGET_MASK    ((1<<IIO_ITTE_WIDGET_BITS)-1)
+#define IIO_ITTE_WIDGET_SHIFT   8
+
+#define IIO_ITTE_WIDGET(itte)	\
+	(((itte) >> IIO_ITTE_WIDGET_SHIFT) & IIO_ITTE_WIDGET_MASK)
+
+/*
+ * Use the top big window as a surrogate for the first small window
+ */
+#define SWIN0_BIGWIN            HUB_NUM_BIG_WINDOW
+#define IIO_NUM_ITTES   7
+#define HUB_NUM_BIG_WINDOW      (IIO_NUM_ITTES - 1)
+
+/* This struct is shared between the PROM and the kernel.
+ * Changes to this struct will require corresponding changes to the kernel.
+ */
+struct sn_flush_device_common {
+	int sfdl_bus;
+	int sfdl_slot;
+	int sfdl_pin;
+	struct common_bar_list {
+		unsigned long start;
+		unsigned long end;
+	} sfdl_bar_list[6];
+	unsigned long sfdl_force_int_addr;
+	unsigned long sfdl_flush_value;
+	volatile unsigned long *sfdl_flush_addr;
+	u32 sfdl_persistent_busnum;
+	u32 sfdl_persistent_segment;
+	struct pcibus_info *sfdl_pcibus_info;
+};
+
+/* This struct is kernel only and is not used by the PROM */
+struct sn_flush_device_kernel {
+	spinlock_t sfdl_flush_lock;
+	struct sn_flush_device_common *common;
+};
+
+/* 01/16/06 This struct is the old PROM/kernel struct and needs to be included
+ * for older official PROMs to function on the new kernel base.  This struct
+ * will be removed when the next official PROM release occurs. */
+
+struct sn_flush_device_war {
+	struct sn_flush_device_common common;
+	u32 filler; /* older PROMs expect the default size of a spinlock_t */
+};
+
+/*
+ * **widget_p - Used as an array[wid_num][device] of sn_flush_device_kernel.
+ */
+struct sn_flush_nasid_entry  {
+	struct sn_flush_device_kernel **widget_p; // Used as an array of wid_num
+	u64 iio_itte[8];
+};
+
+struct hubdev_info {
+	geoid_t				hdi_geoid;
+	short				hdi_nasid;
+	short				hdi_peer_nasid;   /* Dual Porting Peer */
+
+	struct sn_flush_nasid_entry	hdi_flush_nasid_list;
+	struct xwidget_info		hdi_xwidget_info[HUB_WIDGET_ID_MAX + 1];
+
+
+	void				*hdi_nodepda;
+	void				*hdi_node_vertex;
+	u32				max_segment_number;
+	u32				max_pcibus_number;
+};
+
+extern void hubdev_init_node(nodepda_t *, cnodeid_t);
+extern void hub_error_init(struct hubdev_info *);
+extern void ice_error_init(struct hubdev_info *);
+
+
+#endif /* _ASM_IA64_SN_XTALK_HUBDEV_H */
diff --git a/arch/ia64/sn/include/xtalk/xbow.h b/arch/ia64/sn/include/xtalk/xbow.h
new file mode 100644
index 00000000..90f37a41
--- /dev/null
+++ b/arch/ia64/sn/include/xtalk/xbow.h
@@ -0,0 +1,301 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992-1997,2000-2006 Silicon Graphics, Inc. All Rights
+ * Reserved.
+ */
+#ifndef _ASM_IA64_SN_XTALK_XBOW_H
+#define _ASM_IA64_SN_XTALK_XBOW_H
+
+#define XBOW_PORT_8	0x8
+#define XBOW_PORT_C	0xc
+#define XBOW_PORT_F	0xf
+
+#define MAX_XBOW_PORTS	8	/* number of ports on xbow chip */
+#define BASE_XBOW_PORT	XBOW_PORT_8	/* Lowest external port */
+
+#define	XBOW_CREDIT	4
+
+#define MAX_XBOW_NAME 	16
+
+/* Register set for each xbow link */
+typedef volatile struct xb_linkregs_s {
+/*
+ * we access these through synergy unswizzled space, so the address
+ * gets twiddled (i.e. references to 0x4 actually go to 0x0 and vv.)
+ * That's why we put the register first and filler second.
+ */
+	u32 link_ibf;
+	u32 filler0;	/* filler for proper alignment */
+	u32 link_control;
+	u32 filler1;
+	u32 link_status;
+	u32 filler2;
+	u32 link_arb_upper;
+	u32 filler3;
+	u32 link_arb_lower;
+	u32 filler4;
+	u32 link_status_clr;
+	u32 filler5;
+	u32 link_reset;
+	u32 filler6;
+	u32 link_aux_status;
+	u32 filler7;
+} xb_linkregs_t;
+
+typedef volatile struct xbow_s {
+	/* standard widget configuration 0x000000-0x000057 */
+	struct widget_cfg xb_widget;  /* 0x000000 */
+
+	/* helper fieldnames for accessing bridge widget */
+
+#define xb_wid_id 		xb_widget.w_id
+#define xb_wid_stat 		xb_widget.w_status
+#define xb_wid_err_upper 	xb_widget.w_err_upper_addr
+#define xb_wid_err_lower 	xb_widget.w_err_lower_addr
+#define xb_wid_control		xb_widget.w_control
+#define xb_wid_req_timeout 	xb_widget.w_req_timeout
+#define xb_wid_int_upper 	xb_widget.w_intdest_upper_addr
+#define xb_wid_int_lower 	xb_widget.w_intdest_lower_addr
+#define xb_wid_err_cmdword 	xb_widget.w_err_cmd_word
+#define xb_wid_llp 		xb_widget.w_llp_cfg
+#define xb_wid_stat_clr 	xb_widget.w_tflush
+
+/*
+ * we access these through synergy unswizzled space, so the address
+ * gets twiddled (i.e. references to 0x4 actually go to 0x0 and vv.)
+ * That's why we put the register first and filler second.
+ */
+	/* xbow-specific widget configuration    0x000058-0x0000FF */
+	u32 xb_wid_arb_reload; /* 0x00005C */
+	u32 _pad_000058;
+	u32 xb_perf_ctr_a;	/* 0x000064 */
+	u32 _pad_000060;
+	u32 xb_perf_ctr_b;	/* 0x00006c */
+	u32 _pad_000068;
+	u32 xb_nic;		/* 0x000074 */
+	u32 _pad_000070;
+
+	/* Xbridge only */
+	u32 xb_w0_rst_fnc;	/* 0x00007C */
+	u32 _pad_000078;
+	u32 xb_l8_rst_fnc;	/* 0x000084 */
+	u32 _pad_000080;
+	u32 xb_l9_rst_fnc;	/* 0x00008c */
+	u32 _pad_000088;
+	u32 xb_la_rst_fnc;	/* 0x000094 */
+	u32 _pad_000090;
+	u32 xb_lb_rst_fnc;	/* 0x00009c */
+	u32 _pad_000098;
+	u32 xb_lc_rst_fnc;	/* 0x0000a4 */
+	u32 _pad_0000a0;
+	u32 xb_ld_rst_fnc;	/* 0x0000ac */
+	u32 _pad_0000a8;
+	u32 xb_le_rst_fnc;	/* 0x0000b4 */
+	u32 _pad_0000b0;
+	u32 xb_lf_rst_fnc;	/* 0x0000bc */
+	u32 _pad_0000b8;
+	u32 xb_lock;		/* 0x0000c4 */
+	u32 _pad_0000c0;
+	u32 xb_lock_clr;	/* 0x0000cc */
+	u32 _pad_0000c8;
+	/* end of Xbridge only */
+	u32 _pad_0000d0[12];
+
+	/* Link Specific Registers, port 8..15   0x000100-0x000300 */
+	xb_linkregs_t xb_link_raw[MAX_XBOW_PORTS];
+} xbow_t;
+
+#define xb_link(p) xb_link_raw[(p) & (MAX_XBOW_PORTS - 1)]
+
+#define XB_FLAGS_EXISTS		0x1	/* device exists */
+#define XB_FLAGS_MASTER		0x2
+#define XB_FLAGS_SLAVE		0x0
+#define XB_FLAGS_GBR		0x4
+#define XB_FLAGS_16BIT		0x8
+#define XB_FLAGS_8BIT		0x0
+
+/* is widget port number valid?  (based on version 7.0 of xbow spec) */
+#define XBOW_WIDGET_IS_VALID(wid) ((wid) >= XBOW_PORT_8 && (wid) <= XBOW_PORT_F)
+
+/* whether to use upper or lower arbitration register, given source widget id */
+#define XBOW_ARB_IS_UPPER(wid) 	((wid) >= XBOW_PORT_8 && (wid) <= XBOW_PORT_B)
+#define XBOW_ARB_IS_LOWER(wid) 	((wid) >= XBOW_PORT_C && (wid) <= XBOW_PORT_F)
+
+/* offset of arbitration register, given source widget id */
+#define XBOW_ARB_OFF(wid) 	(XBOW_ARB_IS_UPPER(wid) ? 0x1c : 0x24)
+
+#define	XBOW_WID_ID		WIDGET_ID
+#define	XBOW_WID_STAT		WIDGET_STATUS
+#define	XBOW_WID_ERR_UPPER	WIDGET_ERR_UPPER_ADDR
+#define	XBOW_WID_ERR_LOWER	WIDGET_ERR_LOWER_ADDR
+#define	XBOW_WID_CONTROL	WIDGET_CONTROL
+#define	XBOW_WID_REQ_TO		WIDGET_REQ_TIMEOUT
+#define	XBOW_WID_INT_UPPER	WIDGET_INTDEST_UPPER_ADDR
+#define	XBOW_WID_INT_LOWER	WIDGET_INTDEST_LOWER_ADDR
+#define	XBOW_WID_ERR_CMDWORD	WIDGET_ERR_CMD_WORD
+#define	XBOW_WID_LLP		WIDGET_LLP_CFG
+#define	XBOW_WID_STAT_CLR	WIDGET_TFLUSH
+#define XBOW_WID_ARB_RELOAD 	0x5c
+#define XBOW_WID_PERF_CTR_A 	0x64
+#define XBOW_WID_PERF_CTR_B 	0x6c
+#define XBOW_WID_NIC 		0x74
+
+/* Xbridge only */
+#define XBOW_W0_RST_FNC		0x00007C
+#define	XBOW_L8_RST_FNC		0x000084
+#define	XBOW_L9_RST_FNC		0x00008c
+#define	XBOW_LA_RST_FNC		0x000094
+#define	XBOW_LB_RST_FNC		0x00009c
+#define	XBOW_LC_RST_FNC		0x0000a4
+#define	XBOW_LD_RST_FNC		0x0000ac
+#define	XBOW_LE_RST_FNC		0x0000b4
+#define	XBOW_LF_RST_FNC		0x0000bc
+#define XBOW_RESET_FENCE(x) ((x) > 7 && (x) < 16) ? \
+				(XBOW_W0_RST_FNC + ((x) - 7) * 8) : \
+				((x) == 0) ? XBOW_W0_RST_FNC : 0
+#define XBOW_LOCK		0x0000c4
+#define XBOW_LOCK_CLR		0x0000cc
+/* End of Xbridge only */
+
+/* used only in ide, but defined here within the reserved portion */
+/* of the widget0 address space (before 0xf4) */
+#define	XBOW_WID_UNDEF		0xe4
+
+/* xbow link register set base, legal value for x is 0x8..0xf */
+#define	XB_LINK_BASE		0x100
+#define	XB_LINK_OFFSET		0x40
+#define	XB_LINK_REG_BASE(x)	(XB_LINK_BASE + ((x) & (MAX_XBOW_PORTS - 1)) * XB_LINK_OFFSET)
+
+#define	XB_LINK_IBUF_FLUSH(x)	(XB_LINK_REG_BASE(x) + 0x4)
+#define	XB_LINK_CTRL(x)		(XB_LINK_REG_BASE(x) + 0xc)
+#define	XB_LINK_STATUS(x)	(XB_LINK_REG_BASE(x) + 0x14)
+#define	XB_LINK_ARB_UPPER(x)	(XB_LINK_REG_BASE(x) + 0x1c)
+#define	XB_LINK_ARB_LOWER(x)	(XB_LINK_REG_BASE(x) + 0x24)
+#define	XB_LINK_STATUS_CLR(x)	(XB_LINK_REG_BASE(x) + 0x2c)
+#define	XB_LINK_RESET(x)	(XB_LINK_REG_BASE(x) + 0x34)
+#define	XB_LINK_AUX_STATUS(x)	(XB_LINK_REG_BASE(x) + 0x3c)
+
+/* link_control(x) */
+#define	XB_CTRL_LINKALIVE_IE		0x80000000	/* link comes alive */
+/* reserved:			0x40000000 */
+#define	XB_CTRL_PERF_CTR_MODE_MSK	0x30000000	/* perf counter mode */
+#define	XB_CTRL_IBUF_LEVEL_MSK		0x0e000000	/* input packet buffer
+							   level */
+#define	XB_CTRL_8BIT_MODE		0x01000000	/* force link into 8
+							   bit mode */
+#define XB_CTRL_BAD_LLP_PKT		0x00800000	/* force bad LLP
+							   packet */
+#define XB_CTRL_WIDGET_CR_MSK		0x007c0000	/* LLP widget credit
+							   mask */
+#define XB_CTRL_WIDGET_CR_SHFT	18			/* LLP widget credit
+							   shift */
+#define XB_CTRL_ILLEGAL_DST_IE		0x00020000	/* illegal destination
+							 */
+#define XB_CTRL_OALLOC_IBUF_IE		0x00010000	/* overallocated input
+							   buffer */
+/* reserved:			0x0000fe00 */
+#define XB_CTRL_BNDWDTH_ALLOC_IE	0x00000100	/* bandwidth alloc */
+#define XB_CTRL_RCV_CNT_OFLOW_IE	0x00000080	/* rcv retry overflow */
+#define XB_CTRL_XMT_CNT_OFLOW_IE	0x00000040	/* xmt retry overflow */
+#define XB_CTRL_XMT_MAX_RTRY_IE		0x00000020	/* max transmit retry */
+#define XB_CTRL_RCV_IE			0x00000010	/* receive */
+#define XB_CTRL_XMT_RTRY_IE		0x00000008	/* transmit retry */
+/* reserved:			0x00000004 */
+#define	XB_CTRL_MAXREQ_TOUT_IE		0x00000002	/* maximum request
+							   timeout */
+#define	XB_CTRL_SRC_TOUT_IE		0x00000001	/* source timeout */
+
+/* link_status(x) */
+#define	XB_STAT_LINKALIVE		XB_CTRL_LINKALIVE_IE
+/* reserved:			0x7ff80000 */
+#define	XB_STAT_MULTI_ERR		0x00040000	/* multi error */
+#define	XB_STAT_ILLEGAL_DST_ERR		XB_CTRL_ILLEGAL_DST_IE
+#define	XB_STAT_OALLOC_IBUF_ERR		XB_CTRL_OALLOC_IBUF_IE
+#define	XB_STAT_BNDWDTH_ALLOC_ID_MSK	0x0000ff00	/* port bitmask */
+#define	XB_STAT_RCV_CNT_OFLOW_ERR	XB_CTRL_RCV_CNT_OFLOW_IE
+#define	XB_STAT_XMT_CNT_OFLOW_ERR	XB_CTRL_XMT_CNT_OFLOW_IE
+#define	XB_STAT_XMT_MAX_RTRY_ERR	XB_CTRL_XMT_MAX_RTRY_IE
+#define	XB_STAT_RCV_ERR			XB_CTRL_RCV_IE
+#define	XB_STAT_XMT_RTRY_ERR		XB_CTRL_XMT_RTRY_IE
+/* reserved:			0x00000004 */
+#define	XB_STAT_MAXREQ_TOUT_ERR		XB_CTRL_MAXREQ_TOUT_IE
+#define	XB_STAT_SRC_TOUT_ERR		XB_CTRL_SRC_TOUT_IE
+
+/* link_aux_status(x) */
+#define	XB_AUX_STAT_RCV_CNT	0xff000000
+#define	XB_AUX_STAT_XMT_CNT	0x00ff0000
+#define	XB_AUX_STAT_TOUT_DST	0x0000ff00
+#define	XB_AUX_LINKFAIL_RST_BAD	0x00000040
+#define	XB_AUX_STAT_PRESENT	0x00000020
+#define	XB_AUX_STAT_PORT_WIDTH	0x00000010
+/*	reserved:		0x0000000f */
+
+/*
+ * link_arb_upper/link_arb_lower(x), (reg) should be the link_arb_upper
+ * register if (x) is 0x8..0xb, link_arb_lower if (x) is 0xc..0xf
+ */
+#define	XB_ARB_GBR_MSK		0x1f
+#define	XB_ARB_RR_MSK		0x7
+#define	XB_ARB_GBR_SHFT(x)	(((x) & 0x3) * 8)
+#define	XB_ARB_RR_SHFT(x)	(((x) & 0x3) * 8 + 5)
+#define	XB_ARB_GBR_CNT(reg,x)	((reg) >> XB_ARB_GBR_SHFT(x) & XB_ARB_GBR_MSK)
+#define	XB_ARB_RR_CNT(reg,x)	((reg) >> XB_ARB_RR_SHFT(x) & XB_ARB_RR_MSK)
+
+/* XBOW_WID_STAT */
+#define	XB_WID_STAT_LINK_INTR_SHFT	(24)
+#define	XB_WID_STAT_LINK_INTR_MASK	(0xFF << XB_WID_STAT_LINK_INTR_SHFT)
+#define	XB_WID_STAT_LINK_INTR(x) \
+	(0x1 << (((x)&7) + XB_WID_STAT_LINK_INTR_SHFT))
+#define	XB_WID_STAT_WIDGET0_INTR	0x00800000
+#define XB_WID_STAT_SRCID_MASK		0x000003c0	/* Xbridge only */
+#define	XB_WID_STAT_REG_ACC_ERR		0x00000020
+#define XB_WID_STAT_RECV_TOUT		0x00000010	/* Xbridge only */
+#define XB_WID_STAT_ARB_TOUT		0x00000008	/* Xbridge only */
+#define	XB_WID_STAT_XTALK_ERR		0x00000004
+#define XB_WID_STAT_DST_TOUT		0x00000002	/* Xbridge only */
+#define	XB_WID_STAT_MULTI_ERR		0x00000001
+
+#define XB_WID_STAT_SRCID_SHFT		6
+
+/* XBOW_WID_CONTROL */
+#define XB_WID_CTRL_REG_ACC_IE		XB_WID_STAT_REG_ACC_ERR
+#define XB_WID_CTRL_RECV_TOUT		XB_WID_STAT_RECV_TOUT
+#define XB_WID_CTRL_ARB_TOUT		XB_WID_STAT_ARB_TOUT
+#define XB_WID_CTRL_XTALK_IE		XB_WID_STAT_XTALK_ERR
+
+/* XBOW_WID_INT_UPPER */
+/* defined in xwidget.h for WIDGET_INTDEST_UPPER_ADDR */
+
+/* XBOW WIDGET part number, in the ID register */
+#define XBOW_WIDGET_PART_NUM	0x0		/* crossbow */
+#define XXBOW_WIDGET_PART_NUM	0xd000		/* Xbridge */
+#define	XBOW_WIDGET_MFGR_NUM	0x0
+#define	XXBOW_WIDGET_MFGR_NUM	0x0
+#define PXBOW_WIDGET_PART_NUM   0xd100		/* PIC */
+
+#define	XBOW_REV_1_0		0x1	/* xbow rev 1.0 is "1" */
+#define	XBOW_REV_1_1		0x2	/* xbow rev 1.1 is "2" */
+#define XBOW_REV_1_2		0x3	/* xbow rev 1.2 is "3" */
+#define XBOW_REV_1_3		0x4	/* xbow rev 1.3 is "4" */
+#define XBOW_REV_2_0		0x5	/* xbow rev 2.0 is "5" */
+
+#define XXBOW_PART_REV_1_0		(XXBOW_WIDGET_PART_NUM << 4 | 0x1 )
+#define XXBOW_PART_REV_2_0		(XXBOW_WIDGET_PART_NUM << 4 | 0x2 )
+
+/* XBOW_WID_ARB_RELOAD */
+#define	XBOW_WID_ARB_RELOAD_INT	0x3f	/* GBR reload interval */
+
+#define IS_XBRIDGE_XBOW(wid) \
+	(XWIDGET_PART_NUM(wid) == XXBOW_WIDGET_PART_NUM && \
+	XWIDGET_MFG_NUM(wid) == XXBOW_WIDGET_MFGR_NUM)
+
+#define IS_PIC_XBOW(wid) \
+	(XWIDGET_PART_NUM(wid) == PXBOW_WIDGET_PART_NUM && \
+	XWIDGET_MFG_NUM(wid) == XXBOW_WIDGET_MFGR_NUM)
+
+#define XBOW_WAR_ENABLED(pv, widid) ((1 << XWIDGET_REV_NUM(widid)) & pv)
+
+#endif /* _ASM_IA64_SN_XTALK_XBOW_H */
diff --git a/arch/ia64/sn/include/xtalk/xwidgetdev.h b/arch/ia64/sn/include/xtalk/xwidgetdev.h
new file mode 100644
index 00000000..2800eda0
--- /dev/null
+++ b/arch/ia64/sn/include/xtalk/xwidgetdev.h
@@ -0,0 +1,70 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992-1997,2000-2003 Silicon Graphics, Inc. All Rights Reserved.
+ */
+#ifndef _ASM_IA64_SN_XTALK_XWIDGET_H
+#define _ASM_IA64_SN_XTALK_XWIDGET_H
+
+/* WIDGET_ID */
+#define WIDGET_REV_NUM                  0xf0000000
+#define WIDGET_PART_NUM                 0x0ffff000
+#define WIDGET_MFG_NUM                  0x00000ffe
+#define WIDGET_REV_NUM_SHFT             28
+#define WIDGET_PART_NUM_SHFT            12
+#define WIDGET_MFG_NUM_SHFT             1
+
+#define XWIDGET_PART_NUM(widgetid) (((widgetid) & WIDGET_PART_NUM) >> WIDGET_PART_NUM_SHFT)
+#define XWIDGET_REV_NUM(widgetid) (((widgetid) & WIDGET_REV_NUM) >> WIDGET_REV_NUM_SHFT)
+#define XWIDGET_MFG_NUM(widgetid) (((widgetid) & WIDGET_MFG_NUM) >> WIDGET_MFG_NUM_SHFT)
+#define XWIDGET_PART_REV_NUM(widgetid) ((XWIDGET_PART_NUM(widgetid) << 4) | \
+                                        XWIDGET_REV_NUM(widgetid))
+#define XWIDGET_PART_REV_NUM_REV(partrev) (partrev & 0xf)
+
+/* widget configuration registers */
+struct widget_cfg{
+	u32	w_id;	/* 0x04 */
+	u32	w_pad_0;	/* 0x00 */
+	u32	w_status;	/* 0x0c */
+	u32	w_pad_1;	/* 0x08 */
+	u32	w_err_upper_addr;	/* 0x14 */
+	u32	w_pad_2;	/* 0x10 */
+	u32	w_err_lower_addr;	/* 0x1c */
+	u32	w_pad_3;	/* 0x18 */
+	u32	w_control;	/* 0x24 */
+	u32	w_pad_4;	/* 0x20 */
+	u32	w_req_timeout;	/* 0x2c */
+	u32	w_pad_5;	/* 0x28 */
+	u32	w_intdest_upper_addr;	/* 0x34 */
+	u32	w_pad_6;	/* 0x30 */
+	u32	w_intdest_lower_addr;	/* 0x3c */
+	u32	w_pad_7;	/* 0x38 */
+	u32	w_err_cmd_word;	/* 0x44 */
+	u32	w_pad_8;	/* 0x40 */
+	u32	w_llp_cfg;	/* 0x4c */
+	u32	w_pad_9;	/* 0x48 */
+	u32	w_tflush;	/* 0x54 */
+	u32	w_pad_10;	/* 0x50 */
+};
+
+/*
+ * Crosstalk Widget Hardware Identification, as defined in the Crosstalk spec.
+ */
+struct xwidget_hwid{
+	int		mfg_num;
+	int		rev_num;
+	int		part_num;
+};
+
+struct xwidget_info{
+
+	struct xwidget_hwid	xwi_hwid;	/* Widget Identification */
+	char			xwi_masterxid;	/* Hub's Widget Port Number */
+	void			*xwi_hubinfo;     /* Hub's provider private info */
+	u64			*xwi_hub_provider; /* prom provider functions */
+	void			*xwi_vertex;
+};
+
+#endif                          /* _ASM_IA64_SN_XTALK_XWIDGET_H */
diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile
new file mode 100644
index 00000000..d27df1d4
--- /dev/null
+++ b/arch/ia64/sn/kernel/Makefile
@@ -0,0 +1,18 @@
+# arch/ia64/sn/kernel/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1999,2001-2006,2008 Silicon Graphics, Inc.  All Rights Reserved.
+#
+
+ccflags-y := -Iarch/ia64/sn/include
+
+obj-y				+= setup.o bte.o bte_error.o irq.o mca.o idle.o \
+				   huberror.o io_acpi_init.o io_common.o \
+				   io_init.o iomv.o klconflib.o pio_phys.o \
+				   sn2/
+obj-$(CONFIG_IA64_GENERIC)      += machvec.o
+obj-$(CONFIG_SGI_TIOCX)		+= tiocx.o
+obj-$(CONFIG_PCI_MSI)		+= msi_sn.o
diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c
new file mode 100644
index 00000000..cad775a1
--- /dev/null
+++ b/arch/ia64/sn/kernel/bte.c
@@ -0,0 +1,471 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000-2007 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <linux/module.h>
+#include <asm/sn/nodepda.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/arch.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/pda.h>
+#include <asm/sn/shubio.h>
+#include <asm/nodedata.h>
+#include <asm/delay.h>
+
+#include <linux/bootmem.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <asm/sn/bte.h>
+
+#ifndef L1_CACHE_MASK
+#define L1_CACHE_MASK (L1_CACHE_BYTES - 1)
+#endif
+
+/* two interfaces on two btes */
+#define MAX_INTERFACES_TO_TRY		4
+#define MAX_NODES_TO_TRY		2
+
+static struct bteinfo_s *bte_if_on_node(nasid_t nasid, int interface)
+{
+	nodepda_t *tmp_nodepda;
+
+	if (nasid_to_cnodeid(nasid) == -1)
+		return (struct bteinfo_s *)NULL;
+
+	tmp_nodepda = NODEPDA(nasid_to_cnodeid(nasid));
+	return &tmp_nodepda->bte_if[interface];
+
+}
+
+static inline void bte_start_transfer(struct bteinfo_s *bte, u64 len, u64 mode)
+{
+	if (is_shub2()) {
+		BTE_CTRL_STORE(bte, (IBLS_BUSY | ((len) | (mode) << 24)));
+	} else {
+		BTE_LNSTAT_STORE(bte, len);
+		BTE_CTRL_STORE(bte, mode);
+	}
+}
+
+/************************************************************************
+ * Block Transfer Engine copy related functions.
+ *
+ ***********************************************************************/
+
+/*
+ * bte_copy(src, dest, len, mode, notification)
+ *
+ * Use the block transfer engine to move kernel memory from src to dest
+ * using the assigned mode.
+ *
+ * Parameters:
+ *   src - physical address of the transfer source.
+ *   dest - physical address of the transfer destination.
+ *   len - number of bytes to transfer from source to dest.
+ *   mode - hardware defined.  See reference information
+ *          for IBCT0/1 in the SHUB Programmers Reference
+ *   notification - kernel virtual address of the notification cache
+ *                  line.  If NULL, the default is used and
+ *                  the bte_copy is synchronous.
+ *
+ * NOTE:  This function requires src, dest, and len to
+ * be cacheline aligned.
+ */
+bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
+{
+	u64 transfer_size;
+	u64 transfer_stat;
+	u64 notif_phys_addr;
+	struct bteinfo_s *bte;
+	bte_result_t bte_status;
+	unsigned long irq_flags;
+	unsigned long itc_end = 0;
+	int nasid_to_try[MAX_NODES_TO_TRY];
+	int my_nasid = cpuid_to_nasid(raw_smp_processor_id());
+	int bte_if_index, nasid_index;
+	int bte_first, btes_per_node = BTES_PER_NODE;
+
+	BTE_PRINTK(("bte_copy(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%p)\n",
+		    src, dest, len, mode, notification));
+
+	if (len == 0) {
+		return BTE_SUCCESS;
+	}
+
+	BUG_ON(len & L1_CACHE_MASK);
+	BUG_ON(src & L1_CACHE_MASK);
+	BUG_ON(dest & L1_CACHE_MASK);
+	BUG_ON(len > BTE_MAX_XFER);
+
+	/*
+	 * Start with interface corresponding to cpu number
+	 */
+	bte_first = raw_smp_processor_id() % btes_per_node;
+
+	if (mode & BTE_USE_DEST) {
+		/* try remote then local */
+		nasid_to_try[0] = NASID_GET(dest);
+		if (mode & BTE_USE_ANY) {
+			nasid_to_try[1] = my_nasid;
+		} else {
+			nasid_to_try[1] = (int)NULL;
+		}
+	} else {
+		/* try local then remote */
+		nasid_to_try[0] = my_nasid;
+		if (mode & BTE_USE_ANY) {
+			nasid_to_try[1] = NASID_GET(dest);
+		} else {
+			nasid_to_try[1] = (int)NULL;
+		}
+	}
+
+retry_bteop:
+	do {
+		local_irq_save(irq_flags);
+
+		bte_if_index = bte_first;
+		nasid_index = 0;
+
+		/* Attempt to lock one of the BTE interfaces. */
+		while (nasid_index < MAX_NODES_TO_TRY) {
+			bte = bte_if_on_node(nasid_to_try[nasid_index],bte_if_index);
+
+			if (bte == NULL) {
+				nasid_index++;
+				continue;
+			}
+
+			if (spin_trylock(&bte->spinlock)) {
+				if (!(*bte->most_rcnt_na & BTE_WORD_AVAILABLE) ||
+				    (BTE_LNSTAT_LOAD(bte) & BTE_ACTIVE)) {
+					/* Got the lock but BTE still busy */
+					spin_unlock(&bte->spinlock);
+				} else {
+					/* we got the lock and it's not busy */
+					break;
+				}
+			}
+
+			bte_if_index = (bte_if_index + 1) % btes_per_node; /* Next interface */
+			if (bte_if_index == bte_first) {
+				/*
+				 * We've tried all interfaces on this node
+				 */
+				nasid_index++;
+			}
+
+			bte = NULL;
+		}
+
+		if (bte != NULL) {
+			break;
+		}
+
+		local_irq_restore(irq_flags);
+
+		if (!(mode & BTE_WACQUIRE)) {
+			return BTEFAIL_NOTAVAIL;
+		}
+	} while (1);
+
+	if (notification == NULL) {
+		/* User does not want to be notified. */
+		bte->most_rcnt_na = &bte->notify;
+	} else {
+		bte->most_rcnt_na = notification;
+	}
+
+	/* Calculate the number of cache lines to transfer. */
+	transfer_size = ((len >> L1_CACHE_SHIFT) & BTE_LEN_MASK);
+
+	/* Initialize the notification to a known value. */
+	*bte->most_rcnt_na = BTE_WORD_BUSY;
+	notif_phys_addr = (u64)bte->most_rcnt_na;
+
+	/* Set the source and destination registers */
+	BTE_PRINTKV(("IBSA = 0x%lx)\n", src));
+	BTE_SRC_STORE(bte, src);
+	BTE_PRINTKV(("IBDA = 0x%lx)\n", dest));
+	BTE_DEST_STORE(bte, dest);
+
+	/* Set the notification register */
+	BTE_PRINTKV(("IBNA = 0x%lx)\n", notif_phys_addr));
+	BTE_NOTIF_STORE(bte, notif_phys_addr);
+
+	/* Initiate the transfer */
+	BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode)));
+	bte_start_transfer(bte, transfer_size, BTE_VALID_MODE(mode));
+
+	itc_end = ia64_get_itc() + (40000000 * local_cpu_data->cyc_per_usec);
+
+	spin_unlock_irqrestore(&bte->spinlock, irq_flags);
+
+	if (notification != NULL) {
+		return BTE_SUCCESS;
+	}
+
+	while ((transfer_stat = *bte->most_rcnt_na) == BTE_WORD_BUSY) {
+		cpu_relax();
+		if (ia64_get_itc() > itc_end) {
+			BTE_PRINTK(("BTE timeout nasid 0x%x bte%d IBLS = 0x%lx na 0x%lx\n",
+				NASID_GET(bte->bte_base_addr), bte->bte_num,
+				BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na) );
+			bte->bte_error_count++;
+			bte->bh_error = IBLS_ERROR;
+			bte_error_handler((unsigned long)NODEPDA(bte->bte_cnode));
+			*bte->most_rcnt_na = BTE_WORD_AVAILABLE;
+			goto retry_bteop;
+		}
+	}
+
+	BTE_PRINTKV((" Delay Done.  IBLS = 0x%lx, most_rcnt_na = 0x%lx\n",
+		     BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na));
+
+	if (transfer_stat & IBLS_ERROR) {
+		bte_status = BTE_GET_ERROR_STATUS(transfer_stat);
+	} else {
+		bte_status = BTE_SUCCESS;
+	}
+	*bte->most_rcnt_na = BTE_WORD_AVAILABLE;
+
+	BTE_PRINTK(("Returning status is 0x%lx and most_rcnt_na is 0x%lx\n",
+		    BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na));
+
+	return bte_status;
+}
+
+EXPORT_SYMBOL(bte_copy);
+
+/*
+ * bte_unaligned_copy(src, dest, len, mode)
+ *
+ * use the block transfer engine to move kernel
+ * memory from src to dest using the assigned mode.
+ *
+ * Parameters:
+ *   src - physical address of the transfer source.
+ *   dest - physical address of the transfer destination.
+ *   len - number of bytes to transfer from source to dest.
+ *   mode - hardware defined.  See reference information
+ *          for IBCT0/1 in the SGI documentation.
+ *
+ * NOTE: If the source, dest, and len are all cache line aligned,
+ * then it would be _FAR_ preferable to use bte_copy instead.
+ */
+bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
+{
+	int destFirstCacheOffset;
+	u64 headBteSource;
+	u64 headBteLen;
+	u64 headBcopySrcOffset;
+	u64 headBcopyDest;
+	u64 headBcopyLen;
+	u64 footBteSource;
+	u64 footBteLen;
+	u64 footBcopyDest;
+	u64 footBcopyLen;
+	bte_result_t rv;
+	char *bteBlock, *bteBlock_unaligned;
+
+	if (len == 0) {
+		return BTE_SUCCESS;
+	}
+
+	/* temporary buffer used during unaligned transfers */
+	bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES, GFP_KERNEL);
+	if (bteBlock_unaligned == NULL) {
+		return BTEFAIL_NOTAVAIL;
+	}
+	bteBlock = (char *)L1_CACHE_ALIGN((u64) bteBlock_unaligned);
+
+	headBcopySrcOffset = src & L1_CACHE_MASK;
+	destFirstCacheOffset = dest & L1_CACHE_MASK;
+
+	/*
+	 * At this point, the transfer is broken into
+	 * (up to) three sections.  The first section is
+	 * from the start address to the first physical
+	 * cache line, the second is from the first physical
+	 * cache line to the last complete cache line,
+	 * and the third is from the last cache line to the
+	 * end of the buffer.  The first and third sections
+	 * are handled by bte copying into a temporary buffer
+	 * and then bcopy'ing the necessary section into the
+	 * final location.  The middle section is handled with
+	 * a standard bte copy.
+	 *
+	 * One nasty exception to the above rule is when the
+	 * source and destination are not symmetrically
+	 * mis-aligned.  If the source offset from the first
+	 * cache line is different from the destination offset,
+	 * we make the first section be the entire transfer
+	 * and the bcopy the entire block into place.
+	 */
+	if (headBcopySrcOffset == destFirstCacheOffset) {
+
+		/*
+		 * Both the source and destination are the same
+		 * distance from a cache line boundary so we can
+		 * use the bte to transfer the bulk of the
+		 * data.
+		 */
+		headBteSource = src & ~L1_CACHE_MASK;
+		headBcopyDest = dest;
+		if (headBcopySrcOffset) {
+			headBcopyLen =
+			    (len >
+			     (L1_CACHE_BYTES -
+			      headBcopySrcOffset) ? L1_CACHE_BYTES
+			     - headBcopySrcOffset : len);
+			headBteLen = L1_CACHE_BYTES;
+		} else {
+			headBcopyLen = 0;
+			headBteLen = 0;
+		}
+
+		if (len > headBcopyLen) {
+			footBcopyLen = (len - headBcopyLen) & L1_CACHE_MASK;
+			footBteLen = L1_CACHE_BYTES;
+
+			footBteSource = src + len - footBcopyLen;
+			footBcopyDest = dest + len - footBcopyLen;
+
+			if (footBcopyDest == (headBcopyDest + headBcopyLen)) {
+				/*
+				 * We have two contiguous bcopy
+				 * blocks.  Merge them.
+				 */
+				headBcopyLen += footBcopyLen;
+				headBteLen += footBteLen;
+			} else if (footBcopyLen > 0) {
+				rv = bte_copy(footBteSource,
+					      ia64_tpa((unsigned long)bteBlock),
+					      footBteLen, mode, NULL);
+				if (rv != BTE_SUCCESS) {
+					kfree(bteBlock_unaligned);
+					return rv;
+				}
+
+				memcpy(__va(footBcopyDest),
+				       (char *)bteBlock, footBcopyLen);
+			}
+		} else {
+			footBcopyLen = 0;
+			footBteLen = 0;
+		}
+
+		if (len > (headBcopyLen + footBcopyLen)) {
+			/* now transfer the middle. */
+			rv = bte_copy((src + headBcopyLen),
+				      (dest +
+				       headBcopyLen),
+				      (len - headBcopyLen -
+				       footBcopyLen), mode, NULL);
+			if (rv != BTE_SUCCESS) {
+				kfree(bteBlock_unaligned);
+				return rv;
+			}
+
+		}
+	} else {
+
+		/*
+		 * The transfer is not symmetric, we will
+		 * allocate a buffer large enough for all the
+		 * data, bte_copy into that buffer and then
+		 * bcopy to the destination.
+		 */
+
+		headBcopySrcOffset = src & L1_CACHE_MASK;
+		headBcopyDest = dest;
+		headBcopyLen = len;
+
+		headBteSource = src - headBcopySrcOffset;
+		/* Add the leading and trailing bytes from source */
+		headBteLen = L1_CACHE_ALIGN(len + headBcopySrcOffset);
+	}
+
+	if (headBcopyLen > 0) {
+		rv = bte_copy(headBteSource,
+			      ia64_tpa((unsigned long)bteBlock), headBteLen,
+			      mode, NULL);
+		if (rv != BTE_SUCCESS) {
+			kfree(bteBlock_unaligned);
+			return rv;
+		}
+
+		memcpy(__va(headBcopyDest), ((char *)bteBlock +
+					     headBcopySrcOffset), headBcopyLen);
+	}
+	kfree(bteBlock_unaligned);
+	return BTE_SUCCESS;
+}
+
+EXPORT_SYMBOL(bte_unaligned_copy);
+
+/************************************************************************
+ * Block Transfer Engine initialization functions.
+ *
+ ***********************************************************************/
+
+/*
+ * bte_init_node(nodepda, cnode)
+ *
+ * Initialize the nodepda structure with BTE base addresses and
+ * spinlocks.
+ */
+void bte_init_node(nodepda_t * mynodepda, cnodeid_t cnode)
+{
+	int i;
+
+	/*
+	 * Indicate that all the block transfer engines on this node
+	 * are available.
+	 */
+
+	/*
+	 * Allocate one bte_recover_t structure per node.  It holds
+	 * the recovery lock for node.  All the bte interface structures
+	 * will point at this one bte_recover structure to get the lock.
+	 */
+	spin_lock_init(&mynodepda->bte_recovery_lock);
+	init_timer(&mynodepda->bte_recovery_timer);
+	mynodepda->bte_recovery_timer.function = bte_error_handler;
+	mynodepda->bte_recovery_timer.data = (unsigned long)mynodepda;
+
+	for (i = 0; i < BTES_PER_NODE; i++) {
+		u64 *base_addr;
+
+		/* Which link status register should we use? */
+		base_addr = (u64 *)
+		    REMOTE_HUB_ADDR(cnodeid_to_nasid(cnode), BTE_BASE_ADDR(i));
+		mynodepda->bte_if[i].bte_base_addr = base_addr;
+		mynodepda->bte_if[i].bte_source_addr = BTE_SOURCE_ADDR(base_addr);
+		mynodepda->bte_if[i].bte_destination_addr = BTE_DEST_ADDR(base_addr);
+		mynodepda->bte_if[i].bte_control_addr = BTE_CTRL_ADDR(base_addr);
+		mynodepda->bte_if[i].bte_notify_addr = BTE_NOTIF_ADDR(base_addr);
+
+		/*
+		 * Initialize the notification and spinlock
+		 * so the first transfer can occur.
+		 */
+		mynodepda->bte_if[i].most_rcnt_na =
+		    &(mynodepda->bte_if[i].notify);
+		mynodepda->bte_if[i].notify = BTE_WORD_AVAILABLE;
+		spin_lock_init(&mynodepda->bte_if[i].spinlock);
+
+		mynodepda->bte_if[i].bte_cnode = cnode;
+		mynodepda->bte_if[i].bte_error_count = 0;
+		mynodepda->bte_if[i].bte_num = i;
+		mynodepda->bte_if[i].cleanup_active = 0;
+		mynodepda->bte_if[i].bh_error = 0;
+	}
+
+}
diff --git a/arch/ia64/sn/kernel/bte_error.c b/arch/ia64/sn/kernel/bte_error.c
new file mode 100644
index 00000000..4cb09f3f
--- /dev/null
+++ b/arch/ia64/sn/kernel/bte_error.c
@@ -0,0 +1,260 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000-2007 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <linux/types.h>
+#include <asm/sn/sn_sal.h>
+#include "ioerror.h"
+#include <asm/sn/addrs.h>
+#include <asm/sn/shubio.h>
+#include <asm/sn/geo.h>
+#include "xtalk/xwidgetdev.h"
+#include "xtalk/hubdev.h"
+#include <asm/sn/bte.h>
+#include <asm/param.h>
+
+/*
+ * Bte error handling is done in two parts.  The first captures
+ * any crb related errors.  Since there can be multiple crbs per
+ * interface and multiple interfaces active, we need to wait until
+ * all active crbs are completed.  This is the first job of the
+ * second part error handler.  When all bte related CRBs are cleanly
+ * completed, it resets the interfaces and gets them ready for new
+ * transfers to be queued.
+ */
+
+void bte_error_handler(unsigned long);
+
+/*
+ * Wait until all BTE related CRBs are completed
+ * and then reset the interfaces.
+ */
+int shub1_bte_error_handler(unsigned long _nodepda)
+{
+	struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
+	struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer;
+	nasid_t nasid;
+	int i;
+	int valid_crbs;
+	ii_imem_u_t imem;	/* II IMEM Register */
+	ii_icrb0_d_u_t icrbd;	/* II CRB Register D */
+	ii_ibcr_u_t ibcr;
+	ii_icmr_u_t icmr;
+	ii_ieclr_u_t ieclr;
+
+	BTE_PRINTK(("shub1_bte_error_handler(%p) - %d\n", err_nodepda,
+		    smp_processor_id()));
+
+	if ((err_nodepda->bte_if[0].bh_error == BTE_SUCCESS) &&
+	    (err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) {
+		BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda,
+			    smp_processor_id()));
+		return 1;
+	}
+
+	/* Determine information about our hub */
+	nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode);
+
+	/*
+	 * A BTE transfer can use multiple CRBs.  We need to make sure
+	 * that all the BTE CRBs are complete (or timed out) before
+	 * attempting to clean up the error.  Resetting the BTE while
+	 * there are still BTE CRBs active will hang the BTE.
+	 * We should look at all the CRBs to see if they are allocated
+	 * to the BTE and see if they are still active.  When none
+	 * are active, we can continue with the cleanup.
+	 *
+	 * We also want to make sure that the local NI port is up.
+	 * When a router resets the NI port can go down, while it
+	 * goes through the LLP handshake, but then comes back up.
+	 */
+	icmr.ii_icmr_regval = REMOTE_HUB_L(nasid, IIO_ICMR);
+	if (icmr.ii_icmr_fld_s.i_crb_mark != 0) {
+		/*
+		 * There are errors which still need to be cleaned up by
+		 * hubiio_crb_error_handler
+		 */
+		mod_timer(recovery_timer, jiffies + (HZ * 5));
+		BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
+			    smp_processor_id()));
+		return 1;
+	}
+	if (icmr.ii_icmr_fld_s.i_crb_vld != 0) {
+
+		valid_crbs = icmr.ii_icmr_fld_s.i_crb_vld;
+
+		for (i = 0; i < IIO_NUM_CRBS; i++) {
+			if (!((1 << i) & valid_crbs)) {
+				/* This crb was not marked as valid, ignore */
+				continue;
+			}
+			icrbd.ii_icrb0_d_regval =
+			    REMOTE_HUB_L(nasid, IIO_ICRB_D(i));
+			if (icrbd.d_bteop) {
+				mod_timer(recovery_timer, jiffies + (HZ * 5));
+				BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n",
+					    err_nodepda, smp_processor_id(),
+					    i));
+				return 1;
+			}
+		}
+	}
+
+	BTE_PRINTK(("eh:%p:%d Cleaning up\n", err_nodepda, smp_processor_id()));
+	/* Re-enable both bte interfaces */
+	imem.ii_imem_regval = REMOTE_HUB_L(nasid, IIO_IMEM);
+	imem.ii_imem_fld_s.i_b0_esd = imem.ii_imem_fld_s.i_b1_esd = 1;
+	REMOTE_HUB_S(nasid, IIO_IMEM, imem.ii_imem_regval);
+
+	/* Clear BTE0/1 error bits */
+	ieclr.ii_ieclr_regval = 0;
+	if (err_nodepda->bte_if[0].bh_error != BTE_SUCCESS)
+		ieclr.ii_ieclr_fld_s.i_e_bte_0 = 1;
+	if (err_nodepda->bte_if[1].bh_error != BTE_SUCCESS)
+		ieclr.ii_ieclr_fld_s.i_e_bte_1 = 1;
+	REMOTE_HUB_S(nasid, IIO_IECLR, ieclr.ii_ieclr_regval);
+
+	/* Reinitialize both BTE state machines. */
+	ibcr.ii_ibcr_regval = REMOTE_HUB_L(nasid, IIO_IBCR);
+	ibcr.ii_ibcr_fld_s.i_soft_reset = 1;
+	REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval);
+
+	del_timer(recovery_timer);
+	return 0;
+}
+
+/*
+ * Wait until all BTE related CRBs are completed
+ * and then reset the interfaces.
+ */
+int shub2_bte_error_handler(unsigned long _nodepda)
+{
+	struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
+	struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer;
+	struct bteinfo_s *bte;
+	nasid_t nasid;
+	u64 status;
+	int i;
+
+	nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode);
+
+	/*
+	 * Verify that all the BTEs are complete
+	 */
+	for (i = 0; i < BTES_PER_NODE; i++) {
+		bte = &err_nodepda->bte_if[i];
+		status = BTE_LNSTAT_LOAD(bte);
+		if (status & IBLS_ERROR) {
+			bte->bh_error = BTE_SHUB2_ERROR(status);
+			continue;
+		}
+		if (!(status & IBLS_BUSY))
+			continue;
+		mod_timer(recovery_timer, jiffies + (HZ * 5));
+		BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
+			    smp_processor_id()));
+		return 1;
+	}
+	if (ia64_sn_bte_recovery(nasid))
+		panic("bte_error_handler(): Fatal BTE Error");
+
+	del_timer(recovery_timer);
+	return 0;
+}
+
+/*
+ * Wait until all BTE related CRBs are completed
+ * and then reset the interfaces.
+ */
+void bte_error_handler(unsigned long _nodepda)
+{
+	struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
+	spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock;
+	int i;
+	unsigned long irq_flags;
+	volatile u64 *notify;
+	bte_result_t bh_error;
+
+	BTE_PRINTK(("bte_error_handler(%p) - %d\n", err_nodepda,
+		    smp_processor_id()));
+
+	spin_lock_irqsave(recovery_lock, irq_flags);
+
+	/*
+	 * Lock all interfaces on this node to prevent new transfers
+	 * from being queued.
+	 */
+	for (i = 0; i < BTES_PER_NODE; i++) {
+		if (err_nodepda->bte_if[i].cleanup_active) {
+			continue;
+		}
+		spin_lock(&err_nodepda->bte_if[i].spinlock);
+		BTE_PRINTK(("eh:%p:%d locked %d\n", err_nodepda,
+			    smp_processor_id(), i));
+		err_nodepda->bte_if[i].cleanup_active = 1;
+	}
+
+	if (is_shub1()) {
+		if (shub1_bte_error_handler(_nodepda)) {
+			spin_unlock_irqrestore(recovery_lock, irq_flags);
+			return;
+		}
+	} else {
+		if (shub2_bte_error_handler(_nodepda)) {
+			spin_unlock_irqrestore(recovery_lock, irq_flags);
+			return;
+		}
+	}
+
+	for (i = 0; i < BTES_PER_NODE; i++) {
+		bh_error = err_nodepda->bte_if[i].bh_error;
+		if (bh_error != BTE_SUCCESS) {
+			/* There is an error which needs to be notified */
+			notify = err_nodepda->bte_if[i].most_rcnt_na;
+			BTE_PRINTK(("cnode %d bte %d error=0x%lx\n",
+				    err_nodepda->bte_if[i].bte_cnode,
+				    err_nodepda->bte_if[i].bte_num,
+				    IBLS_ERROR | (u64) bh_error));
+			*notify = IBLS_ERROR | bh_error;
+			err_nodepda->bte_if[i].bh_error = BTE_SUCCESS;
+		}
+
+		err_nodepda->bte_if[i].cleanup_active = 0;
+		BTE_PRINTK(("eh:%p:%d Unlocked %d\n", err_nodepda,
+			    smp_processor_id(), i));
+		spin_unlock(&err_nodepda->bte_if[i].spinlock);
+	}
+
+	spin_unlock_irqrestore(recovery_lock, irq_flags);
+}
+
+/*
+ * First part error handler.  This is called whenever any error CRB interrupt
+ * is generated by the II.
+ */
+void
+bte_crb_error_handler(cnodeid_t cnode, int btenum,
+                      int crbnum, ioerror_t * ioe, int bteop)
+{
+	struct bteinfo_s *bte;
+
+
+	bte = &(NODEPDA(cnode)->bte_if[btenum]);
+
+	/*
+	 * The caller has already figured out the error type, we save that
+	 * in the bte handle structure for the thread exercising the
+	 * interface to consume.
+	 */
+	bte->bh_error = ioe->ie_errortype + BTEFAIL_OFFSET;
+	bte->bte_error_count++;
+
+	BTE_PRINTK(("Got an error on cnode %d bte %d: HW error type 0x%x\n",
+		bte->bte_cnode, bte->bte_num, ioe->ie_errortype));
+	bte_error_handler((unsigned long) NODEPDA(cnode));
+}
+
diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c
new file mode 100644
index 00000000..08b0d9bb
--- /dev/null
+++ b/arch/ia64/sn/kernel/huberror.c
@@ -0,0 +1,218 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000,2002-2007 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <asm/delay.h>
+#include <asm/sn/sn_sal.h>
+#include "ioerror.h"
+#include <asm/sn/addrs.h>
+#include <asm/sn/shubio.h>
+#include <asm/sn/geo.h>
+#include "xtalk/xwidgetdev.h"
+#include "xtalk/hubdev.h"
+#include <asm/sn/bte.h>
+
+void hubiio_crb_error_handler(struct hubdev_info *hubdev_info);
+extern void bte_crb_error_handler(cnodeid_t, int, int, ioerror_t *,
+				  int);
+static irqreturn_t hub_eint_handler(int irq, void *arg)
+{
+	struct hubdev_info *hubdev_info;
+	struct ia64_sal_retval ret_stuff;
+	nasid_t nasid;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	hubdev_info = (struct hubdev_info *)arg;
+	nasid = hubdev_info->hdi_nasid;
+
+	if (is_shub1()) {
+		SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT,
+			(u64) nasid, 0, 0, 0, 0, 0, 0);
+
+		if ((int)ret_stuff.v0)
+			panic("%s: Fatal %s Error", __func__,
+				((nasid & 1) ? "TIO" : "HUBII"));
+
+		if (!(nasid & 1)) /* Not a TIO, handle CRB errors */
+			(void)hubiio_crb_error_handler(hubdev_info);
+	} else
+		if (nasid & 1) {	/* TIO errors */
+			SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT,
+				(u64) nasid, 0, 0, 0, 0, 0, 0);
+
+			if ((int)ret_stuff.v0)
+				panic("%s: Fatal TIO Error", __func__);
+		} else
+			bte_error_handler((unsigned long)NODEPDA(nasid_to_cnodeid(nasid)));
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Free the hub CRB "crbnum" which encountered an error.
+ * Assumption is, error handling was successfully done,
+ * and we now want to return the CRB back to Hub for normal usage.
+ *
+ * In order to free the CRB, all that's needed is to de-allocate it
+ *
+ * Assumption:
+ *      No other processor is mucking around with the hub control register.
+ *      So, upper layer has to single thread this.
+ */
+void hubiio_crb_free(struct hubdev_info *hubdev_info, int crbnum)
+{
+	ii_icrb0_b_u_t icrbb;
+
+	/*
+	 * The hardware does NOT clear the mark bit, so it must get cleared
+	 * here to be sure the error is not processed twice.
+	 */
+	icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(hubdev_info->hdi_nasid,
+					       IIO_ICRB_B(crbnum));
+	icrbb.b_mark = 0;
+	REMOTE_HUB_S(hubdev_info->hdi_nasid, IIO_ICRB_B(crbnum),
+		     icrbb.ii_icrb0_b_regval);
+	/*
+	 * Deallocate the register wait till hub indicates it's done.
+	 */
+	REMOTE_HUB_S(hubdev_info->hdi_nasid, IIO_ICDR, (IIO_ICDR_PND | crbnum));
+	while (REMOTE_HUB_L(hubdev_info->hdi_nasid, IIO_ICDR) & IIO_ICDR_PND)
+		cpu_relax();
+
+}
+
+/*
+ * hubiio_crb_error_handler
+ *
+ *	This routine gets invoked when a hub gets an error 
+ *	interrupt. So, the routine is running in interrupt context
+ *	at error interrupt level.
+ * Action:
+ *	It's responsible for identifying ALL the CRBs that are marked
+ *	with error, and process them. 
+ *	
+ * 	If you find the CRB that's marked with error, map this to the
+ *	reason it caused error, and invoke appropriate error handler.
+ *
+ *	XXX Be aware of the information in the context register.
+ *
+ * NOTE:
+ *	Use REMOTE_HUB_* macro instead of LOCAL_HUB_* so that the interrupt
+ *	handler can be run on any node. (not necessarily the node 
+ *	corresponding to the hub that encountered error).
+ */
+
+void hubiio_crb_error_handler(struct hubdev_info *hubdev_info)
+{
+	nasid_t nasid;
+	ii_icrb0_a_u_t icrba;	/* II CRB Register A */
+	ii_icrb0_b_u_t icrbb;	/* II CRB Register B */
+	ii_icrb0_c_u_t icrbc;	/* II CRB Register C */
+	ii_icrb0_d_u_t icrbd;	/* II CRB Register D */
+	ii_icrb0_e_u_t icrbe;	/* II CRB Register D */
+	int i;
+	int num_errors = 0;	/* Num of errors handled */
+	ioerror_t ioerror;
+
+	nasid = hubdev_info->hdi_nasid;
+
+	/*
+	 * XXX - Add locking for any recovery actions
+	 */
+	/*
+	 * Scan through all CRBs in the Hub, and handle the errors
+	 * in any of the CRBs marked.
+	 */
+	for (i = 0; i < IIO_NUM_CRBS; i++) {
+		/* Check this crb entry to see if it is in error. */
+		icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(nasid, IIO_ICRB_B(i));
+
+		if (icrbb.b_mark == 0) {
+			continue;
+		}
+
+		icrba.ii_icrb0_a_regval = REMOTE_HUB_L(nasid, IIO_ICRB_A(i));
+
+		IOERROR_INIT(&ioerror);
+
+		/* read other CRB error registers. */
+		icrbc.ii_icrb0_c_regval = REMOTE_HUB_L(nasid, IIO_ICRB_C(i));
+		icrbd.ii_icrb0_d_regval = REMOTE_HUB_L(nasid, IIO_ICRB_D(i));
+		icrbe.ii_icrb0_e_regval = REMOTE_HUB_L(nasid, IIO_ICRB_E(i));
+
+		IOERROR_SETVALUE(&ioerror, errortype, icrbb.b_ecode);
+
+		/* Check if this error is due to BTE operation,
+		 * and handle it separately.
+		 */
+		if (icrbd.d_bteop ||
+		    ((icrbb.b_initiator == IIO_ICRB_INIT_BTE0 ||
+		      icrbb.b_initiator == IIO_ICRB_INIT_BTE1) &&
+		     (icrbb.b_imsgtype == IIO_ICRB_IMSGT_BTE ||
+		      icrbb.b_imsgtype == IIO_ICRB_IMSGT_SN1NET))) {
+
+			int bte_num;
+
+			if (icrbd.d_bteop)
+				bte_num = icrbc.c_btenum;
+			else	/* b_initiator bit 2 gives BTE number */
+				bte_num = (icrbb.b_initiator & 0x4) >> 2;
+
+			hubiio_crb_free(hubdev_info, i);
+
+			bte_crb_error_handler(nasid_to_cnodeid(nasid), bte_num,
+					      i, &ioerror, icrbd.d_bteop);
+			num_errors++;
+			continue;
+		}
+	}
+}
+
+/*
+ * Function	: hub_error_init
+ * Purpose	: initialize the error handling requirements for a given hub.
+ * Parameters	: cnode, the compact nodeid.
+ * Assumptions	: Called only once per hub, either by a local cpu. Or by a
+ *			remote cpu, when this hub is headless.(cpuless)
+ * Returns	: None
+ */
+void hub_error_init(struct hubdev_info *hubdev_info)
+{
+
+	if (request_irq(SGI_II_ERROR, hub_eint_handler, IRQF_SHARED,
+			"SN_hub_error", hubdev_info)) {
+		printk(KERN_ERR "hub_error_init: Failed to request_irq for 0x%p\n",
+		    hubdev_info);
+		return;
+	}
+	sn_set_err_irq_affinity(SGI_II_ERROR);
+}
+
+
+/*
+ * Function	: ice_error_init
+ * Purpose	: initialize the error handling requirements for a given tio.
+ * Parameters	: cnode, the compact nodeid.
+ * Assumptions	: Called only once per tio.
+ * Returns	: None
+ */
+void ice_error_init(struct hubdev_info *hubdev_info)
+{
+
+        if (request_irq
+            (SGI_TIO_ERROR, (void *)hub_eint_handler, IRQF_SHARED, "SN_TIO_error",
+             (void *)hubdev_info)) {
+                printk("ice_error_init: request_irq() error hubdev_info 0x%p\n",
+                       hubdev_info);
+		return;
+	}
+	sn_set_err_irq_affinity(SGI_TIO_ERROR);
+}
+
diff --git a/arch/ia64/sn/kernel/idle.c b/arch/ia64/sn/kernel/idle.c
new file mode 100644
index 00000000..49d178f0
--- /dev/null
+++ b/arch/ia64/sn/kernel/idle.c
@@ -0,0 +1,30 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2001-2004 Silicon Graphics, Inc.  All rights reserved.
+ */
+
+#include <asm/sn/leds.h>
+
+void snidle(int state)
+{
+	if (state) {
+		if (pda->idle_flag == 0) {
+			/* 
+			 * Turn the activity LED off.
+			 */
+			set_led_bits(0, LED_CPU_ACTIVITY);
+		}
+
+		pda->idle_flag = 1;
+	} else {
+		/* 
+		 * Turn the activity LED on.
+		 */
+		set_led_bits(LED_CPU_ACTIVITY, LED_CPU_ACTIVITY);
+
+		pda->idle_flag = 0;
+	}
+}
diff --git a/arch/ia64/sn/kernel/io_acpi_init.c b/arch/ia64/sn/kernel/io_acpi_init.c
new file mode 100644
index 00000000..8cdcb173
--- /dev/null
+++ b/arch/ia64/sn/kernel/io_acpi_init.c
@@ -0,0 +1,509 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2006 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <asm/sn/types.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/pcidev.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/sn_sal.h>
+#include "xtalk/hubdev.h"
+#include <linux/acpi.h>
+#include <linux/slab.h>
+
+
+/*
+ * The code in this file will only be executed when running with
+ * a PROM that has ACPI IO support. (i.e., SN_ACPI_BASE_SUPPORT() == 1)
+ */
+
+
+/*
+ * This value must match the UUID the PROM uses
+ * (io/acpi/defblk.c) when building a vendor descriptor.
+ */
+struct acpi_vendor_uuid sn_uuid = {
+	.subtype = 0,
+	.data	= { 0x2c, 0xc6, 0xa6, 0xfe, 0x9c, 0x44, 0xda, 0x11,
+		    0xa2, 0x7c, 0x08, 0x00, 0x69, 0x13, 0xea, 0x51 },
+};
+
+struct sn_pcidev_match {
+	u8 bus;
+	unsigned int devfn;
+	acpi_handle handle;
+};
+
+/*
+ * Perform the early IO init in PROM.
+ */
+static long
+sal_ioif_init(u64 *result)
+{
+	struct ia64_sal_retval isrv = {0,0,0,0};
+
+	SAL_CALL_NOLOCK(isrv,
+			SN_SAL_IOIF_INIT, 0, 0, 0, 0, 0, 0, 0);
+	*result = isrv.v0;
+	return isrv.status;
+}
+
+/*
+ * sn_acpi_hubdev_init() - This function is called by acpi_ns_get_device_callback()
+ *			   for all SGIHUB and SGITIO acpi devices defined in the
+ *			   DSDT. It obtains the hubdev_info pointer from the
+ *			   ACPI vendor resource, which the PROM setup, and sets up the
+ *			   hubdev_info in the pda.
+ */
+
+static acpi_status __init
+sn_acpi_hubdev_init(acpi_handle handle, u32 depth, void *context, void **ret)
+{
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	u64 addr;
+	struct hubdev_info *hubdev;
+	struct hubdev_info *hubdev_ptr;
+	int i;
+	u64 nasid;
+	struct acpi_resource *resource;
+	acpi_status status;
+	struct acpi_resource_vendor_typed *vendor;
+	extern void sn_common_hubdev_init(struct hubdev_info *);
+
+	status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS,
+					  &sn_uuid, &buffer);
+	if (ACPI_FAILURE(status)) {
+		acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer);
+		printk(KERN_ERR
+		       "sn_acpi_hubdev_init: acpi_get_vendor_resource() "
+		       "(0x%x) failed for: %s\n", status,
+			(char *)name_buffer.pointer);
+		kfree(name_buffer.pointer);
+		return AE_OK;		/* Continue walking namespace */
+	}
+
+	resource = buffer.pointer;
+	vendor = &resource->data.vendor_typed;
+	if ((vendor->byte_length - sizeof(struct acpi_vendor_uuid)) !=
+	    sizeof(struct hubdev_info *)) {
+		acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer);
+		printk(KERN_ERR
+		       "sn_acpi_hubdev_init: Invalid vendor data length: "
+		       "%d for: %s\n",
+			vendor->byte_length, (char *)name_buffer.pointer);
+		kfree(name_buffer.pointer);
+		goto exit;
+	}
+
+	memcpy(&addr, vendor->byte_data, sizeof(struct hubdev_info *));
+	hubdev_ptr = __va((struct hubdev_info *) addr);
+
+	nasid = hubdev_ptr->hdi_nasid;
+	i = nasid_to_cnodeid(nasid);
+	hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo);
+	*hubdev = *hubdev_ptr;
+	sn_common_hubdev_init(hubdev);
+
+exit:
+	kfree(buffer.pointer);
+	return AE_OK;		/* Continue walking namespace */
+}
+
+/*
+ * sn_get_bussoft_ptr() - The pcibus_bussoft pointer is found in
+ *			  the ACPI Vendor resource for this bus.
+ */
+static struct pcibus_bussoft *
+sn_get_bussoft_ptr(struct pci_bus *bus)
+{
+	u64 addr;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	acpi_handle handle;
+	struct pcibus_bussoft *prom_bussoft_ptr;
+	struct acpi_resource *resource;
+	acpi_status status;
+	struct acpi_resource_vendor_typed *vendor;
+
+
+	handle = PCI_CONTROLLER(bus)->acpi_handle;
+	status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS,
+					  &sn_uuid, &buffer);
+	if (ACPI_FAILURE(status)) {
+		acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer);
+		printk(KERN_ERR "%s: "
+		       "acpi_get_vendor_resource() failed (0x%x) for: %s\n",
+		       __func__, status, (char *)name_buffer.pointer);
+		kfree(name_buffer.pointer);
+		return NULL;
+	}
+	resource = buffer.pointer;
+	vendor = &resource->data.vendor_typed;
+
+	if ((vendor->byte_length - sizeof(struct acpi_vendor_uuid)) !=
+	     sizeof(struct pcibus_bussoft *)) {
+		printk(KERN_ERR
+		       "%s: Invalid vendor data length %d\n",
+			__func__, vendor->byte_length);
+		kfree(buffer.pointer);
+		return NULL;
+	}
+	memcpy(&addr, vendor->byte_data, sizeof(struct pcibus_bussoft *));
+	prom_bussoft_ptr = __va((struct pcibus_bussoft *) addr);
+	kfree(buffer.pointer);
+
+	return prom_bussoft_ptr;
+}
+
+/*
+ * sn_extract_device_info - Extract the pcidev_info and the sn_irq_info
+ *			    pointers from the vendor resource using the
+ *			    provided acpi handle, and copy the structures
+ *			    into the argument buffers.
+ */
+static int
+sn_extract_device_info(acpi_handle handle, struct pcidev_info **pcidev_info,
+		    struct sn_irq_info **sn_irq_info)
+{
+	u64 addr;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	struct sn_irq_info *irq_info, *irq_info_prom;
+	struct pcidev_info *pcidev_ptr, *pcidev_prom_ptr;
+	struct acpi_resource *resource;
+	int ret = 0;
+	acpi_status status;
+	struct acpi_resource_vendor_typed *vendor;
+
+	/*
+	 * The pointer to this device's pcidev_info structure in
+	 * the PROM, is in the vendor resource.
+	 */
+	status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS,
+					  &sn_uuid, &buffer);
+	if (ACPI_FAILURE(status)) {
+		acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer);
+		printk(KERN_ERR
+		       "%s: acpi_get_vendor_resource() failed (0x%x) for: %s\n",
+			__func__, status, (char *)name_buffer.pointer);
+		kfree(name_buffer.pointer);
+		return 1;
+	}
+
+	resource = buffer.pointer;
+	vendor = &resource->data.vendor_typed;
+	if ((vendor->byte_length - sizeof(struct acpi_vendor_uuid)) !=
+	    sizeof(struct pci_devdev_info *)) {
+		acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer);
+		printk(KERN_ERR
+		       "%s: Invalid vendor data length: %d for: %s\n",
+			 __func__, vendor->byte_length,
+			(char *)name_buffer.pointer);
+		kfree(name_buffer.pointer);
+		ret = 1;
+		goto exit;
+	}
+
+	pcidev_ptr = kzalloc(sizeof(struct pcidev_info), GFP_KERNEL);
+	if (!pcidev_ptr)
+		panic("%s: Unable to alloc memory for pcidev_info", __func__);
+
+	memcpy(&addr, vendor->byte_data, sizeof(struct pcidev_info *));
+	pcidev_prom_ptr = __va(addr);
+	memcpy(pcidev_ptr, pcidev_prom_ptr, sizeof(struct pcidev_info));
+
+	/* Get the IRQ info */
+	irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL);
+	if (!irq_info)
+		 panic("%s: Unable to alloc memory for sn_irq_info", __func__);
+
+	if (pcidev_ptr->pdi_sn_irq_info) {
+		irq_info_prom = __va(pcidev_ptr->pdi_sn_irq_info);
+		memcpy(irq_info, irq_info_prom, sizeof(struct sn_irq_info));
+	}
+
+	*pcidev_info = pcidev_ptr;
+	*sn_irq_info = irq_info;
+
+exit:
+	kfree(buffer.pointer);
+	return ret;
+}
+
+static unsigned int
+get_host_devfn(acpi_handle device_handle, acpi_handle rootbus_handle)
+{
+	unsigned long long adr;
+	acpi_handle child;
+	unsigned int devfn;
+	int function;
+	acpi_handle parent;
+	int slot;
+	acpi_status status;
+	struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+
+	acpi_get_name(device_handle, ACPI_FULL_PATHNAME, &name_buffer);
+
+	/*
+	 * Do an upward search to find the root bus device, and
+	 * obtain the host devfn from the previous child device.
+	 */
+	child = device_handle;
+	while (child) {
+		status = acpi_get_parent(child, &parent);
+		if (ACPI_FAILURE(status)) {
+			printk(KERN_ERR "%s: acpi_get_parent() failed "
+			       "(0x%x) for: %s\n", __func__, status,
+				(char *)name_buffer.pointer);
+			panic("%s: Unable to find host devfn\n", __func__);
+		}
+		if (parent == rootbus_handle)
+			break;
+		child = parent;
+	}
+	if (!child) {
+		printk(KERN_ERR "%s: Unable to find root bus for: %s\n",
+		       __func__, (char *)name_buffer.pointer);
+		BUG();
+	}
+
+	status = acpi_evaluate_integer(child, METHOD_NAME__ADR, NULL, &adr);
+	if (ACPI_FAILURE(status)) {
+		printk(KERN_ERR "%s: Unable to get _ADR (0x%x) for: %s\n",
+		       __func__, status, (char *)name_buffer.pointer);
+		panic("%s: Unable to find host devfn\n", __func__);
+	}
+
+	kfree(name_buffer.pointer);
+
+	slot = (adr >> 16) & 0xffff;
+	function = adr & 0xffff;
+	devfn = PCI_DEVFN(slot, function);
+	return devfn;
+}
+
+/*
+ * find_matching_device - Callback routine to find the ACPI device
+ *			  that matches up with our pci_dev device.
+ *			  Matching is done on bus number and devfn.
+ *			  To find the bus number for a particular
+ *			  ACPI device, we must look at the _BBN method
+ *			  of its parent.
+ */
+static acpi_status
+find_matching_device(acpi_handle handle, u32 lvl, void *context, void **rv)
+{
+	unsigned long long bbn = -1;
+	unsigned long long adr;
+	acpi_handle parent = NULL;
+	acpi_status status;
+	unsigned int devfn;
+	int function;
+	int slot;
+	struct sn_pcidev_match *info = context;
+	struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+
+        status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL,
+                                       &adr);
+        if (ACPI_SUCCESS(status)) {
+		status = acpi_get_parent(handle, &parent);
+		if (ACPI_FAILURE(status)) {
+			acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer);
+			printk(KERN_ERR
+			       "%s: acpi_get_parent() failed (0x%x) for: %s\n",
+				__func__, status, (char *)name_buffer.pointer);
+			kfree(name_buffer.pointer);
+			return AE_OK;
+		}
+		status = acpi_evaluate_integer(parent, METHOD_NAME__BBN,
+					       NULL, &bbn);
+		if (ACPI_FAILURE(status)) {
+			acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer);
+			printk(KERN_ERR
+			  "%s: Failed to find _BBN in parent of: %s\n",
+					__func__, (char *)name_buffer.pointer);
+			kfree(name_buffer.pointer);
+			return AE_OK;
+		}
+
+                slot = (adr >> 16) & 0xffff;
+                function = adr & 0xffff;
+                devfn = PCI_DEVFN(slot, function);
+                if ((info->devfn == devfn) && (info->bus == bbn)) {
+			/* We have a match! */
+			info->handle = handle;
+			return 1;
+		}
+	}
+	return AE_OK;
+}
+
+/*
+ * sn_acpi_get_pcidev_info - Search ACPI namespace for the acpi
+ *			     device matching the specified pci_dev,
+ *			     and return the pcidev info and irq info.
+ */
+int
+sn_acpi_get_pcidev_info(struct pci_dev *dev, struct pcidev_info **pcidev_info,
+			struct sn_irq_info **sn_irq_info)
+{
+	unsigned int host_devfn;
+	struct sn_pcidev_match pcidev_match;
+	acpi_handle rootbus_handle;
+	unsigned long long segment;
+	acpi_status status;
+	struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+
+	rootbus_handle = PCI_CONTROLLER(dev)->acpi_handle;
+        status = acpi_evaluate_integer(rootbus_handle, METHOD_NAME__SEG, NULL,
+                                       &segment);
+        if (ACPI_SUCCESS(status)) {
+		if (segment != pci_domain_nr(dev)) {
+			acpi_get_name(rootbus_handle, ACPI_FULL_PATHNAME,
+				&name_buffer);
+			printk(KERN_ERR
+			       "%s: Segment number mismatch, 0x%llx vs 0x%x for: %s\n",
+			       __func__, segment, pci_domain_nr(dev),
+			       (char *)name_buffer.pointer);
+			kfree(name_buffer.pointer);
+			return 1;
+		}
+	} else {
+		acpi_get_name(rootbus_handle, ACPI_FULL_PATHNAME, &name_buffer);
+		printk(KERN_ERR "%s: Unable to get __SEG from: %s\n",
+		       __func__, (char *)name_buffer.pointer);
+		kfree(name_buffer.pointer);
+		return 1;
+	}
+
+	/*
+	 * We want to search all devices in this segment/domain
+	 * of the ACPI namespace for the matching ACPI device,
+	 * which holds the pcidev_info pointer in its vendor resource.
+	 */
+	pcidev_match.bus = dev->bus->number;
+	pcidev_match.devfn = dev->devfn;
+	pcidev_match.handle = NULL;
+
+	acpi_walk_namespace(ACPI_TYPE_DEVICE, rootbus_handle, ACPI_UINT32_MAX,
+			    find_matching_device, NULL, &pcidev_match, NULL);
+
+	if (!pcidev_match.handle) {
+		printk(KERN_ERR
+		       "%s: Could not find matching ACPI device for %s.\n",
+		       __func__, pci_name(dev));
+		return 1;
+	}
+
+	if (sn_extract_device_info(pcidev_match.handle, pcidev_info, sn_irq_info))
+		return 1;
+
+	/* Build up the pcidev_info.pdi_slot_host_handle */
+	host_devfn = get_host_devfn(pcidev_match.handle, rootbus_handle);
+	(*pcidev_info)->pdi_slot_host_handle =
+			((unsigned long) pci_domain_nr(dev) << 40) |
+					/* bus == 0 */
+					host_devfn;
+	return 0;
+}
+
+/*
+ * sn_acpi_slot_fixup - Obtain the pcidev_info and sn_irq_info.
+ *			Perform any SN specific slot fixup.
+ *			At present there does not appear to be
+ *			any generic way to handle a ROM image
+ *			that has been shadowed by the PROM, so
+ *			we pass a pointer to it	within the
+ *			pcidev_info structure.
+ */
+
+void
+sn_acpi_slot_fixup(struct pci_dev *dev)
+{
+	void __iomem *addr;
+	struct pcidev_info *pcidev_info = NULL;
+	struct sn_irq_info *sn_irq_info = NULL;
+	size_t image_size, size;
+
+	if (sn_acpi_get_pcidev_info(dev, &pcidev_info, &sn_irq_info)) {
+		panic("%s:  Failure obtaining pcidev_info for %s\n",
+		      __func__, pci_name(dev));
+	}
+
+	if (pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE]) {
+		/*
+		 * A valid ROM image exists and has been shadowed by the
+		 * PROM. Setup the pci_dev ROM resource with the address
+		 * of the shadowed copy, and the actual length of the ROM image.
+		 */
+		size = pci_resource_len(dev, PCI_ROM_RESOURCE);
+		addr = ioremap(pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE],
+			       size);
+		image_size = pci_get_rom_size(dev, addr, size);
+		dev->resource[PCI_ROM_RESOURCE].start = (unsigned long) addr;
+		dev->resource[PCI_ROM_RESOURCE].end =
+					(unsigned long) addr + image_size - 1;
+		dev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_BIOS_COPY;
+	}
+	sn_pci_fixup_slot(dev, pcidev_info, sn_irq_info);
+}
+
+EXPORT_SYMBOL(sn_acpi_slot_fixup);
+
+
+/*
+ * sn_acpi_bus_fixup -  Perform SN specific setup of software structs
+ *			(pcibus_bussoft, pcidev_info) and hardware
+ *			registers, for the specified bus and devices under it.
+ */
+void
+sn_acpi_bus_fixup(struct pci_bus *bus)
+{
+	struct pci_dev *pci_dev = NULL;
+	struct pcibus_bussoft *prom_bussoft_ptr;
+
+	if (!bus->parent) {	/* If root bus */
+		prom_bussoft_ptr = sn_get_bussoft_ptr(bus);
+		if (prom_bussoft_ptr == NULL) {
+			printk(KERN_ERR
+			       "%s: 0x%04x:0x%02x Unable to "
+			       "obtain prom_bussoft_ptr\n",
+			       __func__, pci_domain_nr(bus), bus->number);
+			return;
+		}
+		sn_common_bus_fixup(bus, prom_bussoft_ptr);
+	}
+	list_for_each_entry(pci_dev, &bus->devices, bus_list) {
+		sn_acpi_slot_fixup(pci_dev);
+	}
+}
+
+/*
+ * sn_io_acpi_init - PROM has ACPI support for IO, defining at a minimum the
+ *		     nodes and root buses in the DSDT. As a result, bus scanning
+ *		     will be initiated by the Linux ACPI code.
+ */
+
+void __init
+sn_io_acpi_init(void)
+{
+	u64 result;
+	long status;
+
+	/* SN Altix does not follow the IOSAPIC IRQ routing model */
+	acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
+
+	/* Setup hubdev_info for all SGIHUB/SGITIO devices */
+	acpi_get_devices("SGIHUB", sn_acpi_hubdev_init, NULL, NULL);
+	acpi_get_devices("SGITIO", sn_acpi_hubdev_init, NULL, NULL);
+
+	status = sal_ioif_init(&result);
+	if (status || result)
+		panic("sal_ioif_init failed: [%lx] %s\n",
+		      status, ia64_sal_strerror(status));
+}
diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c
new file mode 100644
index 00000000..4433dd01
--- /dev/null
+++ b/arch/ia64/sn/kernel/io_common.c
@@ -0,0 +1,566 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2006 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/bootmem.h>
+#include <linux/slab.h>
+#include <asm/sn/types.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/sn_feature_sets.h>
+#include <asm/sn/geo.h>
+#include <asm/sn/io.h>
+#include <asm/sn/l1.h>
+#include <asm/sn/module.h>
+#include <asm/sn/pcibr_provider.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
+#include <asm/sn/simulator.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/tioca_provider.h>
+#include <asm/sn/tioce_provider.h>
+#include "xtalk/hubdev.h"
+#include "xtalk/xwidgetdev.h"
+#include <linux/acpi.h>
+#include <asm/sn/sn2/sn_hwperf.h>
+#include <asm/sn/acpi.h>
+
+extern void sn_init_cpei_timer(void);
+extern void register_sn_procfs(void);
+extern void sn_io_acpi_init(void);
+extern void sn_io_init(void);
+
+
+static struct list_head sn_sysdata_list;
+
+/* sysdata list struct */
+struct sysdata_el {
+	struct list_head entry;
+	void *sysdata;
+};
+
+int sn_ioif_inited;		/* SN I/O infrastructure initialized? */
+
+int sn_acpi_rev;		/* SN ACPI revision */
+EXPORT_SYMBOL_GPL(sn_acpi_rev);
+
+struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES];	/* indexed by asic type */
+
+/*
+ * Hooks and struct for unsupported pci providers
+ */
+
+static dma_addr_t
+sn_default_pci_map(struct pci_dev *pdev, unsigned long paddr, size_t size, int type)
+{
+	return 0;
+}
+
+static void
+sn_default_pci_unmap(struct pci_dev *pdev, dma_addr_t addr, int direction)
+{
+	return;
+}
+
+static void *
+sn_default_pci_bus_fixup(struct pcibus_bussoft *soft, struct pci_controller *controller)
+{
+	return NULL;
+}
+
+static struct sn_pcibus_provider sn_pci_default_provider = {
+	.dma_map = sn_default_pci_map,
+	.dma_map_consistent = sn_default_pci_map,
+	.dma_unmap = sn_default_pci_unmap,
+	.bus_fixup = sn_default_pci_bus_fixup,
+};
+
+/*
+ * Retrieve the DMA Flush List given nasid, widget, and device.
+ * This list is needed to implement the WAR - Flush DMA data on PIO Reads.
+ */
+static inline u64
+sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num,
+			     u64 address)
+{
+	struct ia64_sal_retval ret_stuff;
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+
+	SAL_CALL_NOLOCK(ret_stuff,
+			(u64) SN_SAL_IOIF_GET_DEVICE_DMAFLUSH_LIST,
+			(u64) nasid, (u64) widget_num,
+			(u64) device_num, (u64) address, 0, 0, 0);
+	return ret_stuff.status;
+}
+
+/*
+ * sn_pcidev_info_get() - Retrieve the pcidev_info struct for the specified
+ *			  device.
+ */
+inline struct pcidev_info *
+sn_pcidev_info_get(struct pci_dev *dev)
+{
+	struct pcidev_info *pcidev;
+
+	list_for_each_entry(pcidev,
+			    &(SN_PLATFORM_DATA(dev)->pcidev_info), pdi_list) {
+		if (pcidev->pdi_linux_pcidev == dev)
+			return pcidev;
+	}
+	return NULL;
+}
+
+/* Older PROM flush WAR
+ *
+ * 01/16/06 -- This war will be in place until a new official PROM is released.
+ * Additionally note that the struct sn_flush_device_war also has to be
+ * removed from arch/ia64/sn/include/xtalk/hubdev.h
+ */
+
+static s64 sn_device_fixup_war(u64 nasid, u64 widget, int device,
+			       struct sn_flush_device_common *common)
+{
+	struct sn_flush_device_war *war_list;
+	struct sn_flush_device_war *dev_entry;
+	struct ia64_sal_retval isrv = {0,0,0,0};
+
+	printk_once(KERN_WARNING
+		"PROM version < 4.50 -- implementing old PROM flush WAR\n");
+
+	war_list = kzalloc(DEV_PER_WIDGET * sizeof(*war_list), GFP_KERNEL);
+	BUG_ON(!war_list);
+
+	SAL_CALL_NOLOCK(isrv, SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST,
+			nasid, widget, __pa(war_list), 0, 0, 0 ,0);
+	if (isrv.status)
+		panic("sn_device_fixup_war failed: %s\n",
+		      ia64_sal_strerror(isrv.status));
+
+	dev_entry = war_list + device;
+	memcpy(common,dev_entry, sizeof(*common));
+	kfree(war_list);
+
+	return isrv.status;
+}
+
+/*
+ * sn_common_hubdev_init() - This routine is called to initialize the HUB data
+ *			     structure for each node in the system.
+ */
+void __init
+sn_common_hubdev_init(struct hubdev_info *hubdev)
+{
+
+	struct sn_flush_device_kernel *sn_flush_device_kernel;
+	struct sn_flush_device_kernel *dev_entry;
+	s64 status;
+	int widget, device, size;
+
+	/* Attach the error interrupt handlers */
+	if (hubdev->hdi_nasid & 1)	/* If TIO */
+		ice_error_init(hubdev);
+	else
+		hub_error_init(hubdev);
+
+	for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++)
+		hubdev->hdi_xwidget_info[widget].xwi_hubinfo = hubdev;
+
+	if (!hubdev->hdi_flush_nasid_list.widget_p)
+		return;
+
+	size = (HUB_WIDGET_ID_MAX + 1) *
+		sizeof(struct sn_flush_device_kernel *);
+	hubdev->hdi_flush_nasid_list.widget_p =
+		kzalloc(size, GFP_KERNEL);
+	BUG_ON(!hubdev->hdi_flush_nasid_list.widget_p);
+
+	for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) {
+		size = DEV_PER_WIDGET *
+			sizeof(struct sn_flush_device_kernel);
+		sn_flush_device_kernel = kzalloc(size, GFP_KERNEL);
+		BUG_ON(!sn_flush_device_kernel);
+
+		dev_entry = sn_flush_device_kernel;
+		for (device = 0; device < DEV_PER_WIDGET;
+		     device++, dev_entry++) {
+			size = sizeof(struct sn_flush_device_common);
+			dev_entry->common = kzalloc(size, GFP_KERNEL);
+			BUG_ON(!dev_entry->common);
+			if (sn_prom_feature_available(PRF_DEVICE_FLUSH_LIST))
+				status = sal_get_device_dmaflush_list(
+					     hubdev->hdi_nasid, widget, device,
+					     (u64)(dev_entry->common));
+			else
+				status = sn_device_fixup_war(hubdev->hdi_nasid,
+							     widget, device,
+							     dev_entry->common);
+			if (status != SALRET_OK)
+				panic("SAL call failed: %s\n",
+				      ia64_sal_strerror(status));
+
+			spin_lock_init(&dev_entry->sfdl_flush_lock);
+		}
+
+		if (sn_flush_device_kernel)
+			hubdev->hdi_flush_nasid_list.widget_p[widget] =
+							 sn_flush_device_kernel;
+	}
+}
+
+void sn_pci_unfixup_slot(struct pci_dev *dev)
+{
+	struct pci_dev *host_pci_dev = SN_PCIDEV_INFO(dev)->host_pci_dev;
+
+	sn_irq_unfixup(dev);
+	pci_dev_put(host_pci_dev);
+	pci_dev_put(dev);
+}
+
+/*
+ * sn_pci_fixup_slot()
+ */
+void sn_pci_fixup_slot(struct pci_dev *dev, struct pcidev_info *pcidev_info,
+		       struct sn_irq_info *sn_irq_info)
+{
+	int segment = pci_domain_nr(dev->bus);
+	struct pcibus_bussoft *bs;
+	struct pci_bus *host_pci_bus;
+	struct pci_dev *host_pci_dev;
+	unsigned int bus_no, devfn;
+
+	pci_dev_get(dev); /* for the sysdata pointer */
+
+	/* Add pcidev_info to list in pci_controller.platform_data */
+	list_add_tail(&pcidev_info->pdi_list,
+		      &(SN_PLATFORM_DATA(dev->bus)->pcidev_info));
+	/*
+	 * Using the PROMs values for the PCI host bus, get the Linux
+	 * PCI host_pci_dev struct and set up host bus linkages
+ 	 */
+
+	bus_no = (pcidev_info->pdi_slot_host_handle >> 32) & 0xff;
+	devfn = pcidev_info->pdi_slot_host_handle & 0xffffffff;
+ 	host_pci_bus = pci_find_bus(segment, bus_no);
+ 	host_pci_dev = pci_get_slot(host_pci_bus, devfn);
+
+	pcidev_info->host_pci_dev = host_pci_dev;
+	pcidev_info->pdi_linux_pcidev = dev;
+	pcidev_info->pdi_host_pcidev_info = SN_PCIDEV_INFO(host_pci_dev);
+	bs = SN_PCIBUS_BUSSOFT(dev->bus);
+	pcidev_info->pdi_pcibus_info = bs;
+
+	if (bs && bs->bs_asic_type < PCIIO_ASIC_MAX_TYPES) {
+		SN_PCIDEV_BUSPROVIDER(dev) = sn_pci_provider[bs->bs_asic_type];
+	} else {
+		SN_PCIDEV_BUSPROVIDER(dev) = &sn_pci_default_provider;
+	}
+
+	/* Only set up IRQ stuff if this device has a host bus context */
+	if (bs && sn_irq_info->irq_irq) {
+		pcidev_info->pdi_sn_irq_info = sn_irq_info;
+		dev->irq = pcidev_info->pdi_sn_irq_info->irq_irq;
+		sn_irq_fixup(dev, sn_irq_info);
+	} else {
+		pcidev_info->pdi_sn_irq_info = NULL;
+		kfree(sn_irq_info);
+	}
+}
+
+/*
+ * sn_common_bus_fixup - Perform platform specific bus fixup.
+ *			 Execute the ASIC specific fixup routine
+ *			 for this bus.
+ */
+void
+sn_common_bus_fixup(struct pci_bus *bus,
+		    struct pcibus_bussoft *prom_bussoft_ptr)
+{
+	int cnode;
+	struct pci_controller *controller;
+	struct hubdev_info *hubdev_info;
+	int nasid;
+	void *provider_soft;
+	struct sn_pcibus_provider *provider;
+	struct sn_platform_data *sn_platform_data;
+
+	controller = PCI_CONTROLLER(bus);
+	/*
+	 * Per-provider fixup.  Copies the bus soft structure from prom
+	 * to local area and links SN_PCIBUS_BUSSOFT().
+	 */
+
+	if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES) {
+		printk(KERN_WARNING "sn_common_bus_fixup: Unsupported asic type, %d",
+		       prom_bussoft_ptr->bs_asic_type);
+		return;
+	}
+
+	if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB)
+		return;	/* no further fixup necessary */
+
+	provider = sn_pci_provider[prom_bussoft_ptr->bs_asic_type];
+	if (provider == NULL)
+		panic("sn_common_bus_fixup: No provider registered for this asic type, %d",
+		      prom_bussoft_ptr->bs_asic_type);
+
+	if (provider->bus_fixup)
+		provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr,
+				 controller);
+	else
+		provider_soft = NULL;
+
+	/*
+	 * Generic bus fixup goes here.  Don't reference prom_bussoft_ptr
+	 * after this point.
+	 */
+	controller->platform_data = kzalloc(sizeof(struct sn_platform_data),
+					    GFP_KERNEL);
+	BUG_ON(controller->platform_data == NULL);
+	sn_platform_data =
+			(struct sn_platform_data *) controller->platform_data;
+	sn_platform_data->provider_soft = provider_soft;
+	INIT_LIST_HEAD(&((struct sn_platform_data *)
+			 controller->platform_data)->pcidev_info);
+	nasid = NASID_GET(SN_PCIBUS_BUSSOFT(bus)->bs_base);
+	cnode = nasid_to_cnodeid(nasid);
+	hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
+	SN_PCIBUS_BUSSOFT(bus)->bs_xwidget_info =
+	    &(hubdev_info->hdi_xwidget_info[SN_PCIBUS_BUSSOFT(bus)->bs_xid]);
+
+	/*
+	 * If the node information we obtained during the fixup phase is
+	 * invalid then set controller->node to -1 (undetermined)
+	 */
+	if (controller->node >= num_online_nodes()) {
+		struct pcibus_bussoft *b = SN_PCIBUS_BUSSOFT(bus);
+
+		printk(KERN_WARNING "Device ASIC=%u XID=%u PBUSNUM=%u "
+		       "L_IO=%llx L_MEM=%llx BASE=%llx\n",
+		       b->bs_asic_type, b->bs_xid, b->bs_persist_busnum,
+		       b->bs_legacy_io, b->bs_legacy_mem, b->bs_base);
+		printk(KERN_WARNING "on node %d but only %d nodes online."
+		       "Association set to undetermined.\n",
+		       controller->node, num_online_nodes());
+		controller->node = -1;
+	}
+}
+
+void sn_bus_store_sysdata(struct pci_dev *dev)
+{
+	struct sysdata_el *element;
+
+	element = kzalloc(sizeof(struct sysdata_el), GFP_KERNEL);
+	if (!element) {
+		dev_dbg(&dev->dev, "%s: out of memory!\n", __func__);
+		return;
+	}
+	element->sysdata = SN_PCIDEV_INFO(dev);
+	list_add(&element->entry, &sn_sysdata_list);
+}
+
+void sn_bus_free_sysdata(void)
+{
+	struct sysdata_el *element;
+	struct list_head *list, *safe;
+
+	list_for_each_safe(list, safe, &sn_sysdata_list) {
+		element = list_entry(list, struct sysdata_el, entry);
+		list_del(&element->entry);
+		list_del(&(((struct pcidev_info *)
+			     (element->sysdata))->pdi_list));
+		kfree(element->sysdata);
+		kfree(element);
+	}
+	return;
+}
+
+/*
+ * hubdev_init_node() - Creates the HUB data structure and link them to it's
+ *			own NODE specific data area.
+ */
+void __init hubdev_init_node(nodepda_t * npda, cnodeid_t node)
+{
+	struct hubdev_info *hubdev_info;
+	int size;
+	pg_data_t *pg;
+
+	size = sizeof(struct hubdev_info);
+
+	if (node >= num_online_nodes())	/* Headless/memless IO nodes */
+		pg = NODE_DATA(0);
+	else
+		pg = NODE_DATA(node);
+
+	hubdev_info = (struct hubdev_info *)alloc_bootmem_node(pg, size);
+
+	npda->pdinfo = (void *)hubdev_info;
+}
+
+geoid_t
+cnodeid_get_geoid(cnodeid_t cnode)
+{
+	struct hubdev_info *hubdev;
+
+	hubdev = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
+	return hubdev->hdi_geoid;
+}
+
+void sn_generate_path(struct pci_bus *pci_bus, char *address)
+{
+	nasid_t nasid;
+	cnodeid_t cnode;
+	geoid_t geoid;
+	moduleid_t moduleid;
+	u16 bricktype;
+
+	nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base);
+	cnode = nasid_to_cnodeid(nasid);
+	geoid = cnodeid_get_geoid(cnode);
+	moduleid = geo_module(geoid);
+
+	sprintf(address, "module_%c%c%c%c%.2d",
+		'0'+RACK_GET_CLASS(MODULE_GET_RACK(moduleid)),
+		'0'+RACK_GET_GROUP(MODULE_GET_RACK(moduleid)),
+		'0'+RACK_GET_NUM(MODULE_GET_RACK(moduleid)),
+		MODULE_GET_BTCHAR(moduleid), MODULE_GET_BPOS(moduleid));
+
+	/* Tollhouse requires slot id to be displayed */
+	bricktype = MODULE_GET_BTYPE(moduleid);
+	if ((bricktype == L1_BRICKTYPE_191010) ||
+	    (bricktype == L1_BRICKTYPE_1932))
+			sprintf(address + strlen(address), "^%d",
+						geo_slot(geoid));
+}
+
+void __devinit
+sn_pci_fixup_bus(struct pci_bus *bus)
+{
+
+	if (SN_ACPI_BASE_SUPPORT())
+		sn_acpi_bus_fixup(bus);
+	else
+		sn_bus_fixup(bus);
+}
+
+/*
+ * sn_io_early_init - Perform early IO (and some non-IO) initialization.
+ *		      In particular, setup the sn_pci_provider[] array.
+ *		      This needs to be done prior to any bus scanning
+ *		      (acpi_scan_init()) in the ACPI case, as the SN
+ *		      bus fixup code will reference the array.
+ */
+static int __init
+sn_io_early_init(void)
+{
+	int i;
+
+	if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM())
+		return 0;
+
+	/* we set the acpi revision to that of the DSDT table OEM rev. */
+	{
+		struct acpi_table_header *header = NULL;
+
+		acpi_get_table(ACPI_SIG_DSDT, 1, &header);
+		BUG_ON(header == NULL);
+		sn_acpi_rev = header->oem_revision;
+	}
+
+	/*
+	 * prime sn_pci_provider[].  Individual provider init routines will
+	 * override their respective default entries.
+	 */
+
+	for (i = 0; i < PCIIO_ASIC_MAX_TYPES; i++)
+		sn_pci_provider[i] = &sn_pci_default_provider;
+
+	pcibr_init_provider();
+	tioca_init_provider();
+	tioce_init_provider();
+
+	/*
+	 * This is needed to avoid bounce limit checks in the blk layer
+	 */
+	ia64_max_iommu_merge_mask = ~PAGE_MASK;
+
+	sn_irq_lh_init();
+	INIT_LIST_HEAD(&sn_sysdata_list);
+	sn_init_cpei_timer();
+
+#ifdef CONFIG_PROC_FS
+	register_sn_procfs();
+#endif
+
+	{
+		struct acpi_table_header *header;
+		(void)acpi_get_table(ACPI_SIG_DSDT, 1, &header);
+		printk(KERN_INFO "ACPI  DSDT OEM Rev 0x%x\n",
+			header->oem_revision);
+	}
+	if (SN_ACPI_BASE_SUPPORT())
+		sn_io_acpi_init();
+	else
+		sn_io_init();
+	return 0;
+}
+
+arch_initcall(sn_io_early_init);
+
+/*
+ * sn_io_late_init() - Perform any final platform specific IO initialization.
+ */
+
+int __init
+sn_io_late_init(void)
+{
+	struct pci_bus *bus;
+	struct pcibus_bussoft *bussoft;
+	cnodeid_t cnode;
+	nasid_t nasid;
+	cnodeid_t near_cnode;
+
+	if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM())
+		return 0;
+
+	/*
+	 * Setup closest node in pci_controller->node for
+	 * PIC, TIOCP, TIOCE (TIOCA does it during bus fixup using
+	 * info from the PROM).
+	 */
+	bus = NULL;
+	while ((bus = pci_find_next_bus(bus)) != NULL) {
+		bussoft = SN_PCIBUS_BUSSOFT(bus);
+		nasid = NASID_GET(bussoft->bs_base);
+		cnode = nasid_to_cnodeid(nasid);
+		if ((bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCP) ||
+		    (bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCE) ||
+		    (bussoft->bs_asic_type == PCIIO_ASIC_TYPE_PIC)) {
+			/* PCI Bridge: find nearest node with CPUs */
+			int e = sn_hwperf_get_nearest_node(cnode, NULL,
+							   &near_cnode);
+			if (e < 0) {
+				near_cnode = (cnodeid_t)-1; /* use any node */
+				printk(KERN_WARNING "sn_io_late_init: failed "
+				       "to find near node with CPUs for "
+				       "node %d, err=%d\n", cnode, e);
+			}
+			PCI_CONTROLLER(bus)->node = near_cnode;
+		}
+	}
+
+	sn_ioif_inited = 1;	/* SN I/O infrastructure now initialized */
+
+	return 0;
+}
+
+fs_initcall(sn_io_late_init);
+
+EXPORT_SYMBOL(sn_pci_unfixup_slot);
+EXPORT_SYMBOL(sn_bus_store_sysdata);
+EXPORT_SYMBOL(sn_bus_free_sysdata);
+EXPORT_SYMBOL(sn_generate_path);
+
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
new file mode 100644
index 00000000..98079f29
--- /dev/null
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -0,0 +1,376 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2006 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/slab.h>
+#include <asm/sn/types.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/io.h>
+#include <asm/sn/module.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
+#include <asm/sn/sn_sal.h>
+#include "xtalk/hubdev.h"
+
+/*
+ * The code in this file will only be executed when running with
+ * a PROM that does _not_ have base ACPI IO support.
+ * (i.e., SN_ACPI_BASE_SUPPORT() == 0)
+ */
+
+static int max_segment_number;		 /* Default highest segment number */
+static int max_pcibus_number = 255;	/* Default highest pci bus number */
+
+
+/*
+ * Retrieve the hub device info structure for the given nasid.
+ */
+static inline u64 sal_get_hubdev_info(u64 handle, u64 address)
+{
+	struct ia64_sal_retval ret_stuff;
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+
+	SAL_CALL_NOLOCK(ret_stuff,
+			(u64) SN_SAL_IOIF_GET_HUBDEV_INFO,
+			(u64) handle, (u64) address, 0, 0, 0, 0, 0);
+	return ret_stuff.v0;
+}
+
+/*
+ * Retrieve the pci bus information given the bus number.
+ */
+static inline u64 sal_get_pcibus_info(u64 segment, u64 busnum, u64 address)
+{
+	struct ia64_sal_retval ret_stuff;
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+
+	SAL_CALL_NOLOCK(ret_stuff,
+			(u64) SN_SAL_IOIF_GET_PCIBUS_INFO,
+			(u64) segment, (u64) busnum, (u64) address, 0, 0, 0, 0);
+	return ret_stuff.v0;
+}
+
+/*
+ * Retrieve the pci device information given the bus and device|function number.
+ */
+static inline u64
+sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev,
+		    u64 sn_irq_info)
+{
+	struct ia64_sal_retval ret_stuff;
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+
+	SAL_CALL_NOLOCK(ret_stuff,
+			(u64) SN_SAL_IOIF_GET_PCIDEV_INFO,
+			(u64) segment, (u64) bus_number, (u64) devfn,
+			(u64) pci_dev,
+			sn_irq_info, 0, 0);
+	return ret_stuff.v0;
+}
+
+
+/*
+ * sn_fixup_ionodes() - This routine initializes the HUB data structure for
+ *			each node in the system. This function is only
+ *			executed when running with a non-ACPI capable PROM.
+ */
+static void __init sn_fixup_ionodes(void)
+{
+
+	struct hubdev_info *hubdev;
+	u64 status;
+	u64 nasid;
+	int i;
+	extern void sn_common_hubdev_init(struct hubdev_info *);
+
+	/*
+	 * Get SGI Specific HUB chipset information.
+	 * Inform Prom that this kernel can support domain bus numbering.
+	 */
+	for (i = 0; i < num_cnodes; i++) {
+		hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo);
+		nasid = cnodeid_to_nasid(i);
+		hubdev->max_segment_number = 0xffffffff;
+		hubdev->max_pcibus_number = 0xff;
+		status = sal_get_hubdev_info(nasid, (u64) __pa(hubdev));
+		if (status)
+			continue;
+
+		/* Save the largest Domain and pcibus numbers found. */
+		if (hubdev->max_segment_number) {
+			/*
+			 * Dealing with a Prom that supports segments.
+			 */
+			max_segment_number = hubdev->max_segment_number;
+			max_pcibus_number = hubdev->max_pcibus_number;
+		}
+		sn_common_hubdev_init(hubdev);
+	}
+}
+
+/*
+ * sn_pci_legacy_window_fixup - Create PCI controller windows for
+ *				legacy IO and MEM space. This needs to
+ *				be done here, as the PROM does not have
+ *				ACPI support defining the root buses
+ *				and their resources (_CRS),
+ */
+static void
+sn_legacy_pci_window_fixup(struct pci_controller *controller,
+			   u64 legacy_io, u64 legacy_mem)
+{
+		controller->window = kcalloc(2, sizeof(struct pci_window),
+					     GFP_KERNEL);
+		BUG_ON(controller->window == NULL);
+		controller->window[0].offset = legacy_io;
+		controller->window[0].resource.name = "legacy_io";
+		controller->window[0].resource.flags = IORESOURCE_IO;
+		controller->window[0].resource.start = legacy_io;
+		controller->window[0].resource.end =
+	    			controller->window[0].resource.start + 0xffff;
+		controller->window[0].resource.parent = &ioport_resource;
+		controller->window[1].offset = legacy_mem;
+		controller->window[1].resource.name = "legacy_mem";
+		controller->window[1].resource.flags = IORESOURCE_MEM;
+		controller->window[1].resource.start = legacy_mem;
+		controller->window[1].resource.end =
+	    	       controller->window[1].resource.start + (1024 * 1024) - 1;
+		controller->window[1].resource.parent = &iomem_resource;
+		controller->windows = 2;
+}
+
+/*
+ * sn_pci_window_fixup() - Create a pci_window for each device resource.
+ *			   It will setup pci_windows for use by
+ *			   pcibios_bus_to_resource(), pcibios_resource_to_bus(),
+ *			   etc.
+ */
+static void
+sn_pci_window_fixup(struct pci_dev *dev, unsigned int count,
+		    s64 * pci_addrs)
+{
+	struct pci_controller *controller = PCI_CONTROLLER(dev->bus);
+	unsigned int i;
+	unsigned int idx;
+	unsigned int new_count;
+	struct pci_window *new_window;
+
+	if (count == 0)
+		return;
+	idx = controller->windows;
+	new_count = controller->windows + count;
+	new_window = kcalloc(new_count, sizeof(struct pci_window), GFP_KERNEL);
+	BUG_ON(new_window == NULL);
+	if (controller->window) {
+		memcpy(new_window, controller->window,
+		       sizeof(struct pci_window) * controller->windows);
+		kfree(controller->window);
+	}
+
+	/* Setup a pci_window for each device resource. */
+	for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+		if (pci_addrs[i] == -1)
+			continue;
+
+		new_window[idx].offset = dev->resource[i].start - pci_addrs[i];
+		new_window[idx].resource = dev->resource[i];
+		idx++;
+	}
+
+	controller->windows = new_count;
+	controller->window = new_window;
+}
+
+/*
+ * sn_io_slot_fixup() -   We are not running with an ACPI capable PROM,
+ *			  and need to convert the pci_dev->resource
+ *			  'start' and 'end' addresses to mapped addresses,
+ *			  and setup the pci_controller->window array entries.
+ */
+void
+sn_io_slot_fixup(struct pci_dev *dev)
+{
+	unsigned int count = 0;
+	int idx;
+	s64 pci_addrs[PCI_ROM_RESOURCE + 1];
+	unsigned long addr, end, size, start;
+	struct pcidev_info *pcidev_info;
+	struct sn_irq_info *sn_irq_info;
+	int status;
+
+	pcidev_info = kzalloc(sizeof(struct pcidev_info), GFP_KERNEL);
+	if (!pcidev_info)
+		panic("%s: Unable to alloc memory for pcidev_info", __func__);
+
+	sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL);
+	if (!sn_irq_info)
+		panic("%s: Unable to alloc memory for sn_irq_info", __func__);
+
+	/* Call to retrieve pci device information needed by kernel. */
+	status = sal_get_pcidev_info((u64) pci_domain_nr(dev),
+		(u64) dev->bus->number,
+		dev->devfn,
+		(u64) __pa(pcidev_info),
+		(u64) __pa(sn_irq_info));
+
+	BUG_ON(status); /* Cannot get platform pci device information */
+
+
+	/* Copy over PIO Mapped Addresses */
+	for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) {
+
+		if (!pcidev_info->pdi_pio_mapped_addr[idx]) {
+			pci_addrs[idx] = -1;
+			continue;
+		}
+
+		start = dev->resource[idx].start;
+		end = dev->resource[idx].end;
+		size = end - start;
+		if (size == 0) {
+			pci_addrs[idx] = -1;
+			continue;
+		}
+		pci_addrs[idx] = start;
+		count++;
+		addr = pcidev_info->pdi_pio_mapped_addr[idx];
+		addr = ((addr << 4) >> 4) | __IA64_UNCACHED_OFFSET;
+		dev->resource[idx].start = addr;
+		dev->resource[idx].end = addr + size;
+
+		/*
+		 * if it's already in the device structure, remove it before
+		 * inserting
+		 */
+		if (dev->resource[idx].parent && dev->resource[idx].parent->child)
+			release_resource(&dev->resource[idx]);
+
+		if (dev->resource[idx].flags & IORESOURCE_IO)
+			insert_resource(&ioport_resource, &dev->resource[idx]);
+		else
+			insert_resource(&iomem_resource, &dev->resource[idx]);
+		/*
+		 * If ROM, set the actual ROM image size, and mark as
+		 * shadowed in PROM.
+		 */
+		if (idx == PCI_ROM_RESOURCE) {
+			size_t image_size;
+			void __iomem *rom;
+
+			rom = ioremap(pci_resource_start(dev, PCI_ROM_RESOURCE),
+				      size + 1);
+			image_size = pci_get_rom_size(dev, rom, size + 1);
+			dev->resource[PCI_ROM_RESOURCE].end =
+				dev->resource[PCI_ROM_RESOURCE].start +
+				image_size - 1;
+			dev->resource[PCI_ROM_RESOURCE].flags |=
+						 IORESOURCE_ROM_BIOS_COPY;
+		}
+	}
+	/* Create a pci_window in the pci_controller struct for
+	 * each device resource.
+	 */
+	if (count > 0)
+		sn_pci_window_fixup(dev, count, pci_addrs);
+
+	sn_pci_fixup_slot(dev, pcidev_info, sn_irq_info);
+}
+
+EXPORT_SYMBOL(sn_io_slot_fixup);
+
+/*
+ * sn_pci_controller_fixup() - This routine sets up a bus's resources
+ *			       consistent with the Linux PCI abstraction layer.
+ */
+static void __init
+sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
+{
+	s64 status = 0;
+	struct pci_controller *controller;
+	struct pcibus_bussoft *prom_bussoft_ptr;
+
+
+ 	status = sal_get_pcibus_info((u64) segment, (u64) busnum,
+ 				     (u64) ia64_tpa(&prom_bussoft_ptr));
+ 	if (status > 0)
+		return;		/*bus # does not exist */
+	prom_bussoft_ptr = __va(prom_bussoft_ptr);
+
+	controller = kzalloc(sizeof(*controller), GFP_KERNEL);
+	BUG_ON(!controller);
+	controller->segment = segment;
+
+	/*
+	 * Temporarily save the prom_bussoft_ptr for use by sn_bus_fixup().
+	 * (platform_data will be overwritten later in sn_common_bus_fixup())
+	 */
+	controller->platform_data = prom_bussoft_ptr;
+
+	bus = pci_scan_bus(busnum, &pci_root_ops, controller);
+ 	if (bus == NULL)
+ 		goto error_return; /* error, or bus already scanned */
+
+	bus->sysdata = controller;
+
+	return;
+
+error_return:
+
+	kfree(controller);
+	return;
+}
+
+/*
+ * sn_bus_fixup
+ */
+void
+sn_bus_fixup(struct pci_bus *bus)
+{
+	struct pci_dev *pci_dev = NULL;
+	struct pcibus_bussoft *prom_bussoft_ptr;
+
+	if (!bus->parent) {  /* If root bus */
+		prom_bussoft_ptr = PCI_CONTROLLER(bus)->platform_data;
+		if (prom_bussoft_ptr == NULL) {
+			printk(KERN_ERR
+			       "sn_bus_fixup: 0x%04x:0x%02x Unable to "
+			       "obtain prom_bussoft_ptr\n",
+			       pci_domain_nr(bus), bus->number);
+			return;
+		}
+		sn_common_bus_fixup(bus, prom_bussoft_ptr);
+		sn_legacy_pci_window_fixup(PCI_CONTROLLER(bus),
+					   prom_bussoft_ptr->bs_legacy_io,
+					   prom_bussoft_ptr->bs_legacy_mem);
+        }
+        list_for_each_entry(pci_dev, &bus->devices, bus_list) {
+                sn_io_slot_fixup(pci_dev);
+        }
+
+}
+
+/*
+ * sn_io_init - PROM does not have ACPI support to define nodes or root buses,
+ *		so we need to do things the hard way, including initiating the
+ *		bus scanning ourselves.
+ */
+
+void __init sn_io_init(void)
+{
+	int i, j;
+
+	sn_fixup_ionodes();
+
+	/* busses are not known yet ... */
+	for (i = 0; i <= max_segment_number; i++)
+		for (j = 0; j <= max_pcibus_number; j++)
+			sn_pci_controller_fixup(i, j, NULL);
+}
diff --git a/arch/ia64/sn/kernel/iomv.c b/arch/ia64/sn/kernel/iomv.c
new file mode 100644
index 00000000..c77ebdf9
--- /dev/null
+++ b/arch/ia64/sn/kernel/iomv.c
@@ -0,0 +1,82 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2003, 2006 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <asm/io.h>
+#include <asm/delay.h>
+#include <asm/vga.h>
+#include <asm/sn/nodepda.h>
+#include <asm/sn/simulator.h>
+#include <asm/sn/pda.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/shub_mmr.h>
+#include <asm/sn/acpi.h>
+
+#define IS_LEGACY_VGA_IOPORT(p) \
+	(((p) >= 0x3b0 && (p) <= 0x3bb) || ((p) >= 0x3c0 && (p) <= 0x3df))
+
+/**
+ * sn_io_addr - convert an in/out port to an i/o address
+ * @port: port to convert
+ *
+ * Legacy in/out instructions are converted to ld/st instructions
+ * on IA64.  This routine will convert a port number into a valid
+ * SN i/o address.  Used by sn_in*() and sn_out*().
+ */
+
+void *sn_io_addr(unsigned long port)
+{
+	if (!IS_RUNNING_ON_SIMULATOR()) {
+		if (IS_LEGACY_VGA_IOPORT(port))
+			return (__ia64_mk_io_addr(port));
+		/* On sn2, legacy I/O ports don't point at anything */
+		if (port < (64 * 1024))
+			return NULL;
+		if (SN_ACPI_BASE_SUPPORT())
+			return (__ia64_mk_io_addr(port));
+		else
+			return ((void *)(port | __IA64_UNCACHED_OFFSET));
+	} else {
+		/* but the simulator uses them... */
+		unsigned long addr;
+
+		/*
+		 * word align port, but need more than 10 bits
+		 * for accessing registers in bedrock local block
+		 * (so we don't do port&0xfff)
+		 */
+		addr = (is_shub2() ? 0xc00000028c000000UL : 0xc0000087cc000000UL) | ((port >> 2) << 12);
+		if ((port >= 0x1f0 && port <= 0x1f7) || port == 0x3f6 || port == 0x3f7)
+			addr |= port;
+		return (void *)addr;
+	}
+}
+
+EXPORT_SYMBOL(sn_io_addr);
+
+/**
+ * __sn_mmiowb - I/O space memory barrier
+ *
+ * See arch/ia64/include/asm/io.h and Documentation/DocBook/deviceiobook.tmpl
+ * for details.
+ *
+ * On SN2, we wait for the PIO_WRITE_STATUS SHub register to clear.
+ * See PV 871084 for details about the WAR about zero value.
+ *
+ */
+void __sn_mmiowb(void)
+{
+	volatile unsigned long *adr = pda->pio_write_status_addr;
+	unsigned long val = pda->pio_write_status_val;
+
+	while ((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != val)
+		cpu_relax();
+}
+
+EXPORT_SYMBOL(__sn_mmiowb);
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
new file mode 100644
index 00000000..81a1f4e6
--- /dev/null
+++ b/arch/ia64/sn/kernel/irq.c
@@ -0,0 +1,497 @@
+/*
+ * Platform dependent support for SGI SN
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000-2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <linux/irq.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/rculist.h>
+#include <linux/slab.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/arch.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/pcibr_provider.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
+#include <asm/sn/shub_mmr.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/sn_feature_sets.h>
+
+static void register_intr_pda(struct sn_irq_info *sn_irq_info);
+static void unregister_intr_pda(struct sn_irq_info *sn_irq_info);
+
+extern int sn_ioif_inited;
+struct list_head **sn_irq_lh;
+static DEFINE_SPINLOCK(sn_irq_info_lock); /* non-IRQ lock */
+
+u64 sn_intr_alloc(nasid_t local_nasid, int local_widget,
+				     struct sn_irq_info *sn_irq_info,
+				     int req_irq, nasid_t req_nasid,
+				     int req_slice)
+{
+	struct ia64_sal_retval ret_stuff;
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+
+	SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT,
+			(u64) SAL_INTR_ALLOC, (u64) local_nasid,
+			(u64) local_widget, __pa(sn_irq_info), (u64) req_irq,
+			(u64) req_nasid, (u64) req_slice);
+
+	return ret_stuff.status;
+}
+
+void sn_intr_free(nasid_t local_nasid, int local_widget,
+				struct sn_irq_info *sn_irq_info)
+{
+	struct ia64_sal_retval ret_stuff;
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+
+	SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT,
+			(u64) SAL_INTR_FREE, (u64) local_nasid,
+			(u64) local_widget, (u64) sn_irq_info->irq_irq,
+			(u64) sn_irq_info->irq_cookie, 0, 0);
+}
+
+u64 sn_intr_redirect(nasid_t local_nasid, int local_widget,
+		      struct sn_irq_info *sn_irq_info,
+		      nasid_t req_nasid, int req_slice)
+{
+	struct ia64_sal_retval ret_stuff;
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+
+	SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT,
+			(u64) SAL_INTR_REDIRECT, (u64) local_nasid,
+			(u64) local_widget, __pa(sn_irq_info),
+			(u64) req_nasid, (u64) req_slice, 0);
+
+	return ret_stuff.status;
+}
+
+static unsigned int sn_startup_irq(struct irq_data *data)
+{
+	return 0;
+}
+
+static void sn_shutdown_irq(struct irq_data *data)
+{
+}
+
+extern void ia64_mca_register_cpev(int);
+
+static void sn_disable_irq(struct irq_data *data)
+{
+	if (data->irq == local_vector_to_irq(IA64_CPE_VECTOR))
+		ia64_mca_register_cpev(0);
+}
+
+static void sn_enable_irq(struct irq_data *data)
+{
+	if (data->irq == local_vector_to_irq(IA64_CPE_VECTOR))
+		ia64_mca_register_cpev(data->irq);
+}
+
+static void sn_ack_irq(struct irq_data *data)
+{
+	u64 event_occurred, mask;
+	unsigned int irq = data->irq & 0xff;
+
+	event_occurred = HUB_L((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED));
+	mask = event_occurred & SH_ALL_INT_MASK;
+	HUB_S((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED_ALIAS), mask);
+	__set_bit(irq, (volatile void *)pda->sn_in_service_ivecs);
+
+	irq_move_irq(data);
+}
+
+static void sn_irq_info_free(struct rcu_head *head);
+
+struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info,
+				       nasid_t nasid, int slice)
+{
+	int vector;
+	int cpuid;
+#ifdef CONFIG_SMP
+	int cpuphys;
+#endif
+	int64_t bridge;
+	int local_widget, status;
+	nasid_t local_nasid;
+	struct sn_irq_info *new_irq_info;
+	struct sn_pcibus_provider *pci_provider;
+
+	bridge = (u64) sn_irq_info->irq_bridge;
+	if (!bridge) {
+		return NULL; /* irq is not a device interrupt */
+	}
+
+	local_nasid = NASID_GET(bridge);
+
+	if (local_nasid & 1)
+		local_widget = TIO_SWIN_WIDGETNUM(bridge);
+	else
+		local_widget = SWIN_WIDGETNUM(bridge);
+	vector = sn_irq_info->irq_irq;
+
+	/* Make use of SAL_INTR_REDIRECT if PROM supports it */
+	status = sn_intr_redirect(local_nasid, local_widget, sn_irq_info, nasid, slice);
+	if (!status) {
+		new_irq_info = sn_irq_info;
+		goto finish_up;
+	}
+
+	/*
+	 * PROM does not support SAL_INTR_REDIRECT, or it failed.
+	 * Revert to old method.
+	 */
+	new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC);
+	if (new_irq_info == NULL)
+		return NULL;
+
+	memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info));
+
+	/* Free the old PROM new_irq_info structure */
+	sn_intr_free(local_nasid, local_widget, new_irq_info);
+	unregister_intr_pda(new_irq_info);
+
+	/* allocate a new PROM new_irq_info struct */
+	status = sn_intr_alloc(local_nasid, local_widget,
+			       new_irq_info, vector,
+			       nasid, slice);
+
+	/* SAL call failed */
+	if (status) {
+		kfree(new_irq_info);
+		return NULL;
+	}
+
+	register_intr_pda(new_irq_info);
+	spin_lock(&sn_irq_info_lock);
+	list_replace_rcu(&sn_irq_info->list, &new_irq_info->list);
+	spin_unlock(&sn_irq_info_lock);
+	call_rcu(&sn_irq_info->rcu, sn_irq_info_free);
+
+
+finish_up:
+	/* Update kernels new_irq_info with new target info */
+	cpuid = nasid_slice_to_cpuid(new_irq_info->irq_nasid,
+				     new_irq_info->irq_slice);
+	new_irq_info->irq_cpuid = cpuid;
+
+	pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type];
+
+	/*
+	 * If this represents a line interrupt, target it.  If it's
+	 * an msi (irq_int_bit < 0), it's already targeted.
+	 */
+	if (new_irq_info->irq_int_bit >= 0 &&
+	    pci_provider && pci_provider->target_interrupt)
+		(pci_provider->target_interrupt)(new_irq_info);
+
+#ifdef CONFIG_SMP
+	cpuphys = cpu_physical_id(cpuid);
+	set_irq_affinity_info((vector & 0xff), cpuphys, 0);
+#endif
+
+	return new_irq_info;
+}
+
+static int sn_set_affinity_irq(struct irq_data *data,
+			       const struct cpumask *mask, bool force)
+{
+	struct sn_irq_info *sn_irq_info, *sn_irq_info_safe;
+	unsigned int irq = data->irq;
+	nasid_t nasid;
+	int slice;
+
+	nasid = cpuid_to_nasid(cpumask_first(mask));
+	slice = cpuid_to_slice(cpumask_first(mask));
+
+	list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe,
+				 sn_irq_lh[irq], list)
+		(void)sn_retarget_vector(sn_irq_info, nasid, slice);
+
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+void sn_set_err_irq_affinity(unsigned int irq)
+{
+        /*
+         * On systems which support CPU disabling (SHub2), all error interrupts
+         * are targeted at the boot CPU.
+         */
+        if (is_shub2() && sn_prom_feature_available(PRF_CPU_DISABLE_SUPPORT))
+                set_irq_affinity_info(irq, cpu_physical_id(0), 0);
+}
+#else
+void sn_set_err_irq_affinity(unsigned int irq) { }
+#endif
+
+static void
+sn_mask_irq(struct irq_data *data)
+{
+}
+
+static void
+sn_unmask_irq(struct irq_data *data)
+{
+}
+
+struct irq_chip irq_type_sn = {
+	.name			= "SN hub",
+	.irq_startup		= sn_startup_irq,
+	.irq_shutdown		= sn_shutdown_irq,
+	.irq_enable		= sn_enable_irq,
+	.irq_disable		= sn_disable_irq,
+	.irq_ack		= sn_ack_irq,
+	.irq_mask		= sn_mask_irq,
+	.irq_unmask		= sn_unmask_irq,
+	.irq_set_affinity	= sn_set_affinity_irq
+};
+
+ia64_vector sn_irq_to_vector(int irq)
+{
+	if (irq >= IA64_NUM_VECTORS)
+		return 0;
+	return (ia64_vector)irq;
+}
+
+unsigned int sn_local_vector_to_irq(u8 vector)
+{
+	return (CPU_VECTOR_TO_IRQ(smp_processor_id(), vector));
+}
+
+void sn_irq_init(void)
+{
+	int i;
+
+	ia64_first_device_vector = IA64_SN2_FIRST_DEVICE_VECTOR;
+	ia64_last_device_vector = IA64_SN2_LAST_DEVICE_VECTOR;
+
+	for (i = 0; i < NR_IRQS; i++) {
+		if (irq_get_chip(i) == &no_irq_chip)
+			irq_set_chip(i, &irq_type_sn);
+	}
+}
+
+static void register_intr_pda(struct sn_irq_info *sn_irq_info)
+{
+	int irq = sn_irq_info->irq_irq;
+	int cpu = sn_irq_info->irq_cpuid;
+
+	if (pdacpu(cpu)->sn_last_irq < irq) {
+		pdacpu(cpu)->sn_last_irq = irq;
+	}
+
+	if (pdacpu(cpu)->sn_first_irq == 0 || pdacpu(cpu)->sn_first_irq > irq)
+		pdacpu(cpu)->sn_first_irq = irq;
+}
+
+static void unregister_intr_pda(struct sn_irq_info *sn_irq_info)
+{
+	int irq = sn_irq_info->irq_irq;
+	int cpu = sn_irq_info->irq_cpuid;
+	struct sn_irq_info *tmp_irq_info;
+	int i, foundmatch;
+
+	rcu_read_lock();
+	if (pdacpu(cpu)->sn_last_irq == irq) {
+		foundmatch = 0;
+		for (i = pdacpu(cpu)->sn_last_irq - 1;
+		     i && !foundmatch; i--) {
+			list_for_each_entry_rcu(tmp_irq_info,
+						sn_irq_lh[i],
+						list) {
+				if (tmp_irq_info->irq_cpuid == cpu) {
+					foundmatch = 1;
+					break;
+				}
+			}
+		}
+		pdacpu(cpu)->sn_last_irq = i;
+	}
+
+	if (pdacpu(cpu)->sn_first_irq == irq) {
+		foundmatch = 0;
+		for (i = pdacpu(cpu)->sn_first_irq + 1;
+		     i < NR_IRQS && !foundmatch; i++) {
+			list_for_each_entry_rcu(tmp_irq_info,
+						sn_irq_lh[i],
+						list) {
+				if (tmp_irq_info->irq_cpuid == cpu) {
+					foundmatch = 1;
+					break;
+				}
+			}
+		}
+		pdacpu(cpu)->sn_first_irq = ((i == NR_IRQS) ? 0 : i);
+	}
+	rcu_read_unlock();
+}
+
+static void sn_irq_info_free(struct rcu_head *head)
+{
+	struct sn_irq_info *sn_irq_info;
+
+	sn_irq_info = container_of(head, struct sn_irq_info, rcu);
+	kfree(sn_irq_info);
+}
+
+void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info)
+{
+	nasid_t nasid = sn_irq_info->irq_nasid;
+	int slice = sn_irq_info->irq_slice;
+	int cpu = nasid_slice_to_cpuid(nasid, slice);
+#ifdef CONFIG_SMP
+	int cpuphys;
+#endif
+
+	pci_dev_get(pci_dev);
+	sn_irq_info->irq_cpuid = cpu;
+	sn_irq_info->irq_pciioinfo = SN_PCIDEV_INFO(pci_dev);
+
+	/* link it into the sn_irq[irq] list */
+	spin_lock(&sn_irq_info_lock);
+	list_add_rcu(&sn_irq_info->list, sn_irq_lh[sn_irq_info->irq_irq]);
+	reserve_irq_vector(sn_irq_info->irq_irq);
+	spin_unlock(&sn_irq_info_lock);
+
+	register_intr_pda(sn_irq_info);
+#ifdef CONFIG_SMP
+	cpuphys = cpu_physical_id(cpu);
+	set_irq_affinity_info(sn_irq_info->irq_irq, cpuphys, 0);
+	/*
+	 * Affinity was set by the PROM, prevent it from
+	 * being reset by the request_irq() path.
+	 */
+	irqd_mark_affinity_was_set(irq_get_irq_data(sn_irq_info->irq_irq));
+#endif
+}
+
+void sn_irq_unfixup(struct pci_dev *pci_dev)
+{
+	struct sn_irq_info *sn_irq_info;
+
+	/* Only cleanup IRQ stuff if this device has a host bus context */
+	if (!SN_PCIDEV_BUSSOFT(pci_dev))
+		return;
+
+	sn_irq_info = SN_PCIDEV_INFO(pci_dev)->pdi_sn_irq_info;
+	if (!sn_irq_info)
+		return;
+	if (!sn_irq_info->irq_irq) {
+		kfree(sn_irq_info);
+		return;
+	}
+
+	unregister_intr_pda(sn_irq_info);
+	spin_lock(&sn_irq_info_lock);
+	list_del_rcu(&sn_irq_info->list);
+	spin_unlock(&sn_irq_info_lock);
+	if (list_empty(sn_irq_lh[sn_irq_info->irq_irq]))
+		free_irq_vector(sn_irq_info->irq_irq);
+	call_rcu(&sn_irq_info->rcu, sn_irq_info_free);
+	pci_dev_put(pci_dev);
+
+}
+
+static inline void
+sn_call_force_intr_provider(struct sn_irq_info *sn_irq_info)
+{
+	struct sn_pcibus_provider *pci_provider;
+
+	pci_provider = sn_pci_provider[sn_irq_info->irq_bridge_type];
+
+	/* Don't force an interrupt if the irq has been disabled */
+	if (!irqd_irq_disabled(irq_get_irq_data(sn_irq_info->irq_irq)) &&
+	    pci_provider && pci_provider->force_interrupt)
+		(*pci_provider->force_interrupt)(sn_irq_info);
+}
+
+/*
+ * Check for lost interrupts.  If the PIC int_status reg. says that
+ * an interrupt has been sent, but not handled, and the interrupt
+ * is not pending in either the cpu irr regs or in the soft irr regs,
+ * and the interrupt is not in service, then the interrupt may have
+ * been lost.  Force an interrupt on that pin.  It is possible that
+ * the interrupt is in flight, so we may generate a spurious interrupt,
+ * but we should never miss a real lost interrupt.
+ */
+static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info)
+{
+	u64 regval;
+	struct pcidev_info *pcidev_info;
+	struct pcibus_info *pcibus_info;
+
+	/*
+	 * Bridge types attached to TIO (anything but PIC) do not need this WAR
+	 * since they do not target Shub II interrupt registers.  If that
+	 * ever changes, this check needs to accommodate.
+	 */
+	if (sn_irq_info->irq_bridge_type != PCIIO_ASIC_TYPE_PIC)
+		return;
+
+	pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
+	if (!pcidev_info)
+		return;
+
+	pcibus_info =
+	    (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info->
+	    pdi_pcibus_info;
+	regval = pcireg_intr_status_get(pcibus_info);
+
+	if (!ia64_get_irr(irq_to_vector(irq))) {
+		if (!test_bit(irq, pda->sn_in_service_ivecs)) {
+			regval &= 0xff;
+			if (sn_irq_info->irq_int_bit & regval &
+			    sn_irq_info->irq_last_intr) {
+				regval &= ~(sn_irq_info->irq_int_bit & regval);
+				sn_call_force_intr_provider(sn_irq_info);
+			}
+		}
+	}
+	sn_irq_info->irq_last_intr = regval;
+}
+
+void sn_lb_int_war_check(void)
+{
+	struct sn_irq_info *sn_irq_info;
+	int i;
+
+	if (!sn_ioif_inited || pda->sn_first_irq == 0)
+		return;
+
+	rcu_read_lock();
+	for (i = pda->sn_first_irq; i <= pda->sn_last_irq; i++) {
+		list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[i], list) {
+			sn_check_intr(i, sn_irq_info);
+		}
+	}
+	rcu_read_unlock();
+}
+
+void __init sn_irq_lh_init(void)
+{
+	int i;
+
+	sn_irq_lh = kmalloc(sizeof(struct list_head *) * NR_IRQS, GFP_KERNEL);
+	if (!sn_irq_lh)
+		panic("SN PCI INIT: Failed to allocate memory for PCI init\n");
+
+	for (i = 0; i < NR_IRQS; i++) {
+		sn_irq_lh[i] = kmalloc(sizeof(struct list_head), GFP_KERNEL);
+		if (!sn_irq_lh[i])
+			panic("SN PCI INIT: Failed IRQ memory allocation\n");
+
+		INIT_LIST_HEAD(sn_irq_lh[i]);
+	}
+}
diff --git a/arch/ia64/sn/kernel/klconflib.c b/arch/ia64/sn/kernel/klconflib.c
new file mode 100644
index 00000000..87682b48
--- /dev/null
+++ b/arch/ia64/sn/kernel/klconflib.c
@@ -0,0 +1,107 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <asm/sn/types.h>
+#include <asm/sn/module.h>
+#include <asm/sn/l1.h>
+
+char brick_types[MAX_BRICK_TYPES + 1] = "cri.xdpn%#=vo^kjbf890123456789...";
+/*
+ * Format a module id for printing.
+ *
+ * There are three possible formats:
+ *
+ *   MODULE_FORMAT_BRIEF	is the brief 6-character format, including
+ *				the actual brick-type as recorded in the 
+ *				moduleid_t, eg. 002c15 for a C-brick, or
+ *				101#17 for a PX-brick.
+ *
+ *   MODULE_FORMAT_LONG		is the hwgraph format, eg. rack/002/bay/15
+ *				of rack/101/bay/17 (note that the brick
+ *				type does not appear in this format).
+ *
+ *   MODULE_FORMAT_LCD		is like MODULE_FORMAT_BRIEF, except that it
+ *				ensures that the module id provided appears
+ *				exactly as it would on the LCD display of
+ *				the corresponding brick, eg. still 002c15
+ *				for a C-brick, but 101p17 for a PX-brick.
+ *
+ * maule (9/13/04):  Removed top-level check for (fmt == MODULE_FORMAT_LCD)
+ * making MODULE_FORMAT_LCD equivalent to MODULE_FORMAT_BRIEF.  It was
+ * decided that all callers should assume the returned string should be what
+ * is displayed on the brick L1 LCD.
+ */
+void
+format_module_id(char *buffer, moduleid_t m, int fmt)
+{
+	int rack, position;
+	unsigned char brickchar;
+
+	rack = MODULE_GET_RACK(m);
+	brickchar = MODULE_GET_BTCHAR(m);
+
+	/* Be sure we use the same brick type character as displayed
+	 * on the brick's LCD
+	 */
+	switch (brickchar) 
+	{
+	case L1_BRICKTYPE_GA:
+	case L1_BRICKTYPE_OPUS_TIO:
+		brickchar = L1_BRICKTYPE_C;
+		break;
+
+	case L1_BRICKTYPE_PX:
+	case L1_BRICKTYPE_PE:
+	case L1_BRICKTYPE_PA:
+	case L1_BRICKTYPE_SA: /* we can move this to the "I's" later
+			       * if that makes more sense
+			       */
+		brickchar = L1_BRICKTYPE_P;
+		break;
+
+	case L1_BRICKTYPE_IX:
+	case L1_BRICKTYPE_IA:
+
+		brickchar = L1_BRICKTYPE_I;
+		break;
+	}
+
+	position = MODULE_GET_BPOS(m);
+
+	if ((fmt == MODULE_FORMAT_BRIEF) || (fmt == MODULE_FORMAT_LCD)) {
+		/* Brief module number format, eg. 002c15 */
+
+		/* Decompress the rack number */
+		*buffer++ = '0' + RACK_GET_CLASS(rack);
+		*buffer++ = '0' + RACK_GET_GROUP(rack);
+		*buffer++ = '0' + RACK_GET_NUM(rack);
+
+		/* Add the brick type */
+		*buffer++ = brickchar;
+	}
+	else if (fmt == MODULE_FORMAT_LONG) {
+		/* Fuller hwgraph format, eg. rack/002/bay/15 */
+
+		strcpy(buffer, "rack" "/");  buffer += strlen(buffer);
+
+		*buffer++ = '0' + RACK_GET_CLASS(rack);
+		*buffer++ = '0' + RACK_GET_GROUP(rack);
+		*buffer++ = '0' + RACK_GET_NUM(rack);
+
+		strcpy(buffer, "/" "bay" "/");  buffer += strlen(buffer);
+	}
+
+	/* Add the bay position, using at least two digits */
+	if (position < 10)
+		*buffer++ = '0';
+	sprintf(buffer, "%d", position);
+}
diff --git a/arch/ia64/sn/kernel/machvec.c b/arch/ia64/sn/kernel/machvec.c
new file mode 100644
index 00000000..02bb9155
--- /dev/null
+++ b/arch/ia64/sn/kernel/machvec.c
@@ -0,0 +1,11 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2002-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#define MACHVEC_PLATFORM_NAME	sn2
+#define MACHVEC_PLATFORM_HEADER	<asm/machvec_sn2.h>
+#include <asm/machvec_init.h>
diff --git a/arch/ia64/sn/kernel/mca.c b/arch/ia64/sn/kernel/mca.c
new file mode 100644
index 00000000..27793f7a
--- /dev/null
+++ b/arch/ia64/sn/kernel/mca.c
@@ -0,0 +1,146 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/vmalloc.h>
+#include <linux/mutex.h>
+#include <asm/mca.h>
+#include <asm/sal.h>
+#include <asm/sn/sn_sal.h>
+
+/*
+ * Interval for calling SAL to poll for errors that do NOT cause error
+ * interrupts. SAL will raise a CPEI if any errors are present that
+ * need to be logged.
+ */
+#define CPEI_INTERVAL	(5*HZ)
+
+struct timer_list sn_cpei_timer;
+void sn_init_cpei_timer(void);
+
+/* Printing oemdata from mca uses data that is not passed through SAL, it is
+ * global.  Only one user at a time.
+ */
+static DEFINE_MUTEX(sn_oemdata_mutex);
+static u8 **sn_oemdata;
+static u64 *sn_oemdata_size, sn_oemdata_bufsize;
+
+/*
+ * print_hook
+ *
+ * This function is the callback routine that SAL calls to log error
+ * info for platform errors.  buf is appended to sn_oemdata, resizing as
+ * required.
+ * Note: this is a SAL to OS callback, running under the same rules as the SAL
+ * code.  SAL calls are run with preempt disabled so this routine must not
+ * sleep.  vmalloc can sleep so print_hook cannot resize the output buffer
+ * itself, instead it must set the required size and return to let the caller
+ * resize the buffer then redrive the SAL call.
+ */
+static int print_hook(const char *fmt, ...)
+{
+	char buf[400];
+	int len;
+	va_list args;
+	va_start(args, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, args);
+	va_end(args);
+	len = strlen(buf);
+	if (*sn_oemdata_size + len <= sn_oemdata_bufsize)
+		memcpy(*sn_oemdata + *sn_oemdata_size, buf, len);
+	*sn_oemdata_size += len;
+	return 0;
+}
+
+static void sn_cpei_handler(int irq, void *devid, struct pt_regs *regs)
+{
+	/*
+	 * this function's sole purpose is to call SAL when we receive
+	 * a CE interrupt from SHUB or when the timer routine decides
+	 * we need to call SAL to check for CEs.
+	 */
+
+	/* CALL SAL_LOG_CE */
+
+	ia64_sn_plat_cpei_handler();
+}
+
+static void sn_cpei_timer_handler(unsigned long dummy)
+{
+	sn_cpei_handler(-1, NULL, NULL);
+	mod_timer(&sn_cpei_timer, jiffies + CPEI_INTERVAL);
+}
+
+void sn_init_cpei_timer(void)
+{
+	init_timer(&sn_cpei_timer);
+	sn_cpei_timer.expires = jiffies + CPEI_INTERVAL;
+	sn_cpei_timer.function = sn_cpei_timer_handler;
+	add_timer(&sn_cpei_timer);
+}
+
+static int
+sn_platform_plat_specific_err_print(const u8 * sect_header, u8 ** oemdata,
+				    u64 * oemdata_size)
+{
+	mutex_lock(&sn_oemdata_mutex);
+	sn_oemdata = oemdata;
+	sn_oemdata_size = oemdata_size;
+	sn_oemdata_bufsize = 0;
+	*sn_oemdata_size = PAGE_SIZE;	/* first guess at how much data will be generated */
+	while (*sn_oemdata_size > sn_oemdata_bufsize) {
+		u8 *newbuf = vmalloc(*sn_oemdata_size);
+		if (!newbuf) {
+			mutex_unlock(&sn_oemdata_mutex);
+			printk(KERN_ERR "%s: unable to extend sn_oemdata\n",
+			       __func__);
+			return 1;
+		}
+		vfree(*sn_oemdata);
+		*sn_oemdata = newbuf;
+		sn_oemdata_bufsize = *sn_oemdata_size;
+		*sn_oemdata_size = 0;
+		ia64_sn_plat_specific_err_print(print_hook, (char *)sect_header);
+	}
+	mutex_unlock(&sn_oemdata_mutex);
+	return 0;
+}
+
+/* Callback when userspace salinfo wants to decode oem data via the platform
+ * kernel and/or prom.
+ */
+int sn_salinfo_platform_oemdata(const u8 *sect_header, u8 **oemdata, u64 *oemdata_size)
+{
+	efi_guid_t guid = *(efi_guid_t *)sect_header;
+	int valid = 0;
+	*oemdata_size = 0;
+	vfree(*oemdata);
+	*oemdata = NULL;
+	if (efi_guidcmp(guid, SAL_PLAT_SPECIFIC_ERR_SECT_GUID) == 0) {
+		sal_log_plat_specific_err_info_t *psei = (sal_log_plat_specific_err_info_t *)sect_header;
+		valid = psei->valid.oem_data;
+	} else if (efi_guidcmp(guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID) == 0) {
+		sal_log_mem_dev_err_info_t *mdei = (sal_log_mem_dev_err_info_t *)sect_header;
+		valid = mdei->valid.oem_data;
+	}
+	if (valid)
+		return sn_platform_plat_specific_err_print(sect_header, oemdata, oemdata_size);
+	else
+		return 0;
+}
+
+static int __init sn_salinfo_init(void)
+{
+	if (ia64_platform_is("sn2"))
+		salinfo_platform_oemdata = &sn_salinfo_platform_oemdata;
+	return 0;
+}
+
+module_init(sn_salinfo_init)
diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c
new file mode 100644
index 00000000..2b98b9e0
--- /dev/null
+++ b/arch/ia64/sn/kernel/msi_sn.c
@@ -0,0 +1,238 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2006 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/irq.h>
+#include <linux/pci.h>
+#include <linux/cpumask.h>
+#include <linux/msi.h>
+#include <linux/slab.h>
+
+#include <asm/sn/addrs.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
+#include <asm/sn/nodepda.h>
+
+struct sn_msi_info {
+	u64 pci_addr;
+	struct sn_irq_info *sn_irq_info;
+};
+
+static struct sn_msi_info sn_msi_info[NR_IRQS];
+
+static struct irq_chip sn_msi_chip;
+
+void sn_teardown_msi_irq(unsigned int irq)
+{
+	nasid_t nasid;
+	int widget;
+	struct pci_dev *pdev;
+	struct pcidev_info *sn_pdev;
+	struct sn_irq_info *sn_irq_info;
+	struct pcibus_bussoft *bussoft;
+	struct sn_pcibus_provider *provider;
+
+	sn_irq_info = sn_msi_info[irq].sn_irq_info;
+	if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0)
+		return;
+
+	sn_pdev = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
+	pdev = sn_pdev->pdi_linux_pcidev;
+	provider = SN_PCIDEV_BUSPROVIDER(pdev);
+
+	(*provider->dma_unmap)(pdev,
+			       sn_msi_info[irq].pci_addr,
+			       PCI_DMA_FROMDEVICE);
+	sn_msi_info[irq].pci_addr = 0;
+
+	bussoft = SN_PCIDEV_BUSSOFT(pdev);
+	nasid = NASID_GET(bussoft->bs_base);
+	widget = (nasid & 1) ?
+			TIO_SWIN_WIDGETNUM(bussoft->bs_base) :
+			SWIN_WIDGETNUM(bussoft->bs_base);
+
+	sn_intr_free(nasid, widget, sn_irq_info);
+	sn_msi_info[irq].sn_irq_info = NULL;
+
+	destroy_irq(irq);
+}
+
+int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry)
+{
+	struct msi_msg msg;
+	int widget;
+	int status;
+	nasid_t nasid;
+	u64 bus_addr;
+	struct sn_irq_info *sn_irq_info;
+	struct pcibus_bussoft *bussoft = SN_PCIDEV_BUSSOFT(pdev);
+	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
+	int irq;
+
+	if (!entry->msi_attrib.is_64)
+		return -EINVAL;
+
+	if (bussoft == NULL)
+		return -EINVAL;
+
+	if (provider == NULL || provider->dma_map_consistent == NULL)
+		return -EINVAL;
+
+	irq = create_irq();
+	if (irq < 0)
+		return irq;
+
+	/*
+	 * Set up the vector plumbing.  Let the prom (via sn_intr_alloc)
+	 * decide which cpu to direct this msi at by default.
+	 */
+
+	nasid = NASID_GET(bussoft->bs_base);
+	widget = (nasid & 1) ?
+			TIO_SWIN_WIDGETNUM(bussoft->bs_base) :
+			SWIN_WIDGETNUM(bussoft->bs_base);
+
+	sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL);
+	if (! sn_irq_info) {
+		destroy_irq(irq);
+		return -ENOMEM;
+	}
+
+	status = sn_intr_alloc(nasid, widget, sn_irq_info, irq, -1, -1);
+	if (status) {
+		kfree(sn_irq_info);
+		destroy_irq(irq);
+		return -ENOMEM;
+	}
+
+	sn_irq_info->irq_int_bit = -1;		/* mark this as an MSI irq */
+	sn_irq_fixup(pdev, sn_irq_info);
+
+	/* Prom probably should fill these in, but doesn't ... */
+	sn_irq_info->irq_bridge_type = bussoft->bs_asic_type;
+	sn_irq_info->irq_bridge = (void *)bussoft->bs_base;
+
+	/*
+	 * Map the xio address into bus space
+	 */
+	bus_addr = (*provider->dma_map_consistent)(pdev,
+					sn_irq_info->irq_xtalkaddr,
+					sizeof(sn_irq_info->irq_xtalkaddr),
+					SN_DMA_MSI|SN_DMA_ADDR_XIO);
+	if (! bus_addr) {
+		sn_intr_free(nasid, widget, sn_irq_info);
+		kfree(sn_irq_info);
+		destroy_irq(irq);
+		return -ENOMEM;
+	}
+
+	sn_msi_info[irq].sn_irq_info = sn_irq_info;
+	sn_msi_info[irq].pci_addr = bus_addr;
+
+	msg.address_hi = (u32)(bus_addr >> 32);
+	msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff);
+
+	/*
+	 * In the SN platform, bit 16 is a "send vector" bit which
+	 * must be present in order to move the vector through the system.
+	 */
+	msg.data = 0x100 + irq;
+
+	irq_set_msi_desc(irq, entry);
+	write_msi_msg(irq, &msg);
+	irq_set_chip_and_handler(irq, &sn_msi_chip, handle_edge_irq);
+
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+static int sn_set_msi_irq_affinity(struct irq_data *data,
+				   const struct cpumask *cpu_mask, bool force)
+{
+	struct msi_msg msg;
+	int slice;
+	nasid_t nasid;
+	u64 bus_addr;
+	struct pci_dev *pdev;
+	struct pcidev_info *sn_pdev;
+	struct sn_irq_info *sn_irq_info;
+	struct sn_irq_info *new_irq_info;
+	struct sn_pcibus_provider *provider;
+	unsigned int cpu, irq = data->irq;
+
+	cpu = cpumask_first(cpu_mask);
+	sn_irq_info = sn_msi_info[irq].sn_irq_info;
+	if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0)
+		return -1;
+
+	/*
+	 * Release XIO resources for the old MSI PCI address
+	 */
+
+	get_cached_msi_msg(irq, &msg);
+        sn_pdev = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
+	pdev = sn_pdev->pdi_linux_pcidev;
+	provider = SN_PCIDEV_BUSPROVIDER(pdev);
+
+	bus_addr = (u64)(msg.address_hi) << 32 | (u64)(msg.address_lo);
+	(*provider->dma_unmap)(pdev, bus_addr, PCI_DMA_FROMDEVICE);
+	sn_msi_info[irq].pci_addr = 0;
+
+	nasid = cpuid_to_nasid(cpu);
+	slice = cpuid_to_slice(cpu);
+
+	new_irq_info = sn_retarget_vector(sn_irq_info, nasid, slice);
+	sn_msi_info[irq].sn_irq_info = new_irq_info;
+	if (new_irq_info == NULL)
+		return -1;
+
+	/*
+	 * Map the xio address into bus space
+	 */
+
+	bus_addr = (*provider->dma_map_consistent)(pdev,
+					new_irq_info->irq_xtalkaddr,
+					sizeof(new_irq_info->irq_xtalkaddr),
+					SN_DMA_MSI|SN_DMA_ADDR_XIO);
+
+	sn_msi_info[irq].pci_addr = bus_addr;
+	msg.address_hi = (u32)(bus_addr >> 32);
+	msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff);
+
+	write_msi_msg(irq, &msg);
+	cpumask_copy(data->affinity, cpu_mask);
+
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+static void sn_ack_msi_irq(struct irq_data *data)
+{
+	irq_move_irq(data);
+	ia64_eoi();
+}
+
+static int sn_msi_retrigger_irq(struct irq_data *data)
+{
+	unsigned int vector = data->irq;
+	ia64_resend_irq(vector);
+
+	return 1;
+}
+
+static struct irq_chip sn_msi_chip = {
+	.name			= "PCI-MSI",
+	.irq_mask		= mask_msi_irq,
+	.irq_unmask		= unmask_msi_irq,
+	.irq_ack		= sn_ack_msi_irq,
+#ifdef CONFIG_SMP
+	.irq_set_affinity	= sn_set_msi_irq_affinity,
+#endif
+	.irq_retrigger		= sn_msi_retrigger_irq,
+};
diff --git a/arch/ia64/sn/kernel/pio_phys.S b/arch/ia64/sn/kernel/pio_phys.S
new file mode 100644
index 00000000..3c7d48d6
--- /dev/null
+++ b/arch/ia64/sn/kernel/pio_phys.S
@@ -0,0 +1,71 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ *
+ * This file contains macros used to access MMR registers via
+ * uncached physical addresses.
+ *      pio_phys_read_mmr  - read an MMR
+ *      pio_phys_write_mmr - write an MMR
+ *      pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0
+ *              Second MMR will be skipped if address is NULL
+ *
+ * Addresses passed to these routines should be uncached physical addresses
+ * 	ie., 0x80000....
+ */
+
+
+
+#include <asm/asmmacro.h>
+#include <asm/page.h>
+
+GLOBAL_ENTRY(pio_phys_read_mmr)
+	.prologue
+	.regstk 1,0,0,0
+	.body
+	mov r2=psr
+	rsm psr.i | psr.dt
+	;;
+	srlz.d
+	ld8.acq r8=[r32]
+	;;
+	mov psr.l=r2;;
+	srlz.d
+	br.ret.sptk.many rp
+END(pio_phys_read_mmr)
+
+GLOBAL_ENTRY(pio_phys_write_mmr)
+	.prologue
+	.regstk 2,0,0,0
+	.body
+	mov r2=psr
+	rsm psr.i | psr.dt
+	;;
+	srlz.d
+	st8.rel [r32]=r33
+	;;
+	mov psr.l=r2;;
+	srlz.d
+	br.ret.sptk.many rp
+END(pio_phys_write_mmr)
+
+GLOBAL_ENTRY(pio_atomic_phys_write_mmrs)
+	.prologue
+	.regstk 4,0,0,0
+	.body
+	mov r2=psr
+	cmp.ne p9,p0=r34,r0;
+	rsm psr.i | psr.dt | psr.ic
+	;;
+	srlz.d
+	st8.rel [r32]=r33
+(p9)	st8.rel [r34]=r35
+	;;
+	mov psr.l=r2;;
+	srlz.d
+	br.ret.sptk.many rp
+END(pio_atomic_phys_write_mmrs)
+
+
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
new file mode 100644
index 00000000..77db0b51
--- /dev/null
+++ b/arch/ia64/sn/kernel/setup.c
@@ -0,0 +1,775 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/kdev_t.h>
+#include <linux/string.h>
+#include <linux/screen_info.h>
+#include <linux/console.h>
+#include <linux/timex.h>
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/serial.h>
+#include <linux/irq.h>
+#include <linux/bootmem.h>
+#include <linux/mmzone.h>
+#include <linux/interrupt.h>
+#include <linux/acpi.h>
+#include <linux/compiler.h>
+#include <linux/root_dev.h>
+#include <linux/nodemask.h>
+#include <linux/pm.h>
+#include <linux/efi.h>
+
+#include <asm/io.h>
+#include <asm/sal.h>
+#include <asm/machvec.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/vga.h>
+#include <asm/sn/arch.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/pda.h>
+#include <asm/sn/nodepda.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/simulator.h>
+#include <asm/sn/leds.h>
+#include <asm/sn/bte.h>
+#include <asm/sn/shub_mmr.h>
+#include <asm/sn/clksupport.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/geo.h>
+#include <asm/sn/sn_feature_sets.h>
+#include "xtalk/xwidgetdev.h"
+#include "xtalk/hubdev.h"
+#include <asm/sn/klconfig.h>
+
+
+DEFINE_PER_CPU(struct pda_s, pda_percpu);
+
+#define MAX_PHYS_MEMORY		(1UL << IA64_MAX_PHYS_BITS)	/* Max physical address supported */
+
+extern void bte_init_node(nodepda_t *, cnodeid_t);
+
+extern void sn_timer_init(void);
+extern unsigned long last_time_offset;
+extern void (*ia64_mark_idle) (int);
+extern void snidle(int);
+
+unsigned long sn_rtc_cycles_per_second;
+EXPORT_SYMBOL(sn_rtc_cycles_per_second);
+
+DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
+EXPORT_PER_CPU_SYMBOL(__sn_hub_info);
+
+DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]);
+EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid);
+
+DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda);
+EXPORT_PER_CPU_SYMBOL(__sn_nodepda);
+
+char sn_system_serial_number_string[128];
+EXPORT_SYMBOL(sn_system_serial_number_string);
+u64 sn_partition_serial_number;
+EXPORT_SYMBOL(sn_partition_serial_number);
+u8 sn_partition_id;
+EXPORT_SYMBOL(sn_partition_id);
+u8 sn_system_size;
+EXPORT_SYMBOL(sn_system_size);
+u8 sn_sharing_domain_size;
+EXPORT_SYMBOL(sn_sharing_domain_size);
+u8 sn_coherency_id;
+EXPORT_SYMBOL(sn_coherency_id);
+u8 sn_region_size;
+EXPORT_SYMBOL(sn_region_size);
+int sn_prom_type;	/* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */
+
+short physical_node_map[MAX_NUMALINK_NODES];
+static unsigned long sn_prom_features[MAX_PROM_FEATURE_SETS];
+
+EXPORT_SYMBOL(physical_node_map);
+
+int num_cnodes;
+
+static void sn_init_pdas(char **);
+static void build_cnode_tables(void);
+
+static nodepda_t *nodepdaindr[MAX_COMPACT_NODES];
+
+/*
+ * The format of "screen_info" is strange, and due to early i386-setup
+ * code. This is just enough to make the console code think we're on a
+ * VGA color display.
+ */
+struct screen_info sn_screen_info = {
+	.orig_x = 0,
+	.orig_y = 0,
+	.orig_video_mode = 3,
+	.orig_video_cols = 80,
+	.orig_video_ega_bx = 3,
+	.orig_video_lines = 25,
+	.orig_video_isVGA = 1,
+	.orig_video_points = 16
+};
+
+/*
+ * This routine can only be used during init, since
+ * smp_boot_data is an init data structure.
+ * We have to use smp_boot_data.cpu_phys_id to find
+ * the physical id of the processor because the normal
+ * cpu_physical_id() relies on data structures that
+ * may not be initialized yet.
+ */
+
+static int __init pxm_to_nasid(int pxm)
+{
+	int i;
+	int nid;
+
+	nid = pxm_to_node(pxm);
+	for (i = 0; i < num_node_memblks; i++) {
+		if (node_memblk[i].nid == nid) {
+			return NASID_GET(node_memblk[i].start_paddr);
+		}
+	}
+	return -1;
+}
+
+/**
+ * early_sn_setup - early setup routine for SN platforms
+ *
+ * Sets up an initial console to aid debugging.  Intended primarily
+ * for bringup.  See start_kernel() in init/main.c.
+ */
+
+void __init early_sn_setup(void)
+{
+	efi_system_table_t *efi_systab;
+	efi_config_table_t *config_tables;
+	struct ia64_sal_systab *sal_systab;
+	struct ia64_sal_desc_entry_point *ep;
+	char *p;
+	int i, j;
+
+	/*
+	 * Parse enough of the SAL tables to locate the SAL entry point. Since, console
+	 * IO on SN2 is done via SAL calls, early_printk won't work without this.
+	 *
+	 * This code duplicates some of the ACPI table parsing that is in efi.c & sal.c.
+	 * Any changes to those file may have to be made here as well.
+	 */
+	efi_systab = (efi_system_table_t *) __va(ia64_boot_param->efi_systab);
+	config_tables = __va(efi_systab->tables);
+	for (i = 0; i < efi_systab->nr_tables; i++) {
+		if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) ==
+		    0) {
+			sal_systab = __va(config_tables[i].table);
+			p = (char *)(sal_systab + 1);
+			for (j = 0; j < sal_systab->entry_count; j++) {
+				if (*p == SAL_DESC_ENTRY_POINT) {
+					ep = (struct ia64_sal_desc_entry_point
+					      *)p;
+					ia64_sal_handler_init(__va
+							      (ep->sal_proc),
+							      __va(ep->gp));
+					return;
+				}
+				p += SAL_DESC_SIZE(*p);
+			}
+		}
+	}
+	/* Uh-oh, SAL not available?? */
+	printk(KERN_ERR "failed to find SAL entry point\n");
+}
+
+extern int platform_intr_list[];
+static int __cpuinitdata shub_1_1_found;
+
+/*
+ * sn_check_for_wars
+ *
+ * Set flag for enabling shub specific wars
+ */
+
+static inline int __cpuinit is_shub_1_1(int nasid)
+{
+	unsigned long id;
+	int rev;
+
+	if (is_shub2())
+		return 0;
+	id = REMOTE_HUB_L(nasid, SH1_SHUB_ID);
+	rev = (id & SH1_SHUB_ID_REVISION_MASK) >> SH1_SHUB_ID_REVISION_SHFT;
+	return rev <= 2;
+}
+
+static void __cpuinit sn_check_for_wars(void)
+{
+	int cnode;
+
+	if (is_shub2()) {
+		/* none yet */
+	} else {
+		for_each_online_node(cnode) {
+			if (is_shub_1_1(cnodeid_to_nasid(cnode)))
+				shub_1_1_found = 1;
+		}
+	}
+}
+
+/*
+ * Scan the EFI PCDP table (if it exists) for an acceptable VGA console
+ * output device.  If one exists, pick it and set sn_legacy_{io,mem} to
+ * reflect the bus offsets needed to address it.
+ *
+ * Since pcdp support in SN is not supported in the 2.4 kernel (or at least
+ * the one lbs is based on) just declare the needed structs here.
+ *
+ * Reference spec http://www.dig64.org/specifications/DIG64_PCDPv20.pdf
+ *
+ * Returns 0 if no acceptable vga is found, !0 otherwise.
+ *
+ * Note:  This stuff is duped here because Altix requires the PCDP to
+ * locate a usable VGA device due to lack of proper ACPI support.  Structures
+ * could be used from drivers/firmware/pcdp.h, but it was decided that moving
+ * this file to a more public location just for Altix use was undesirable.
+ */
+
+struct hcdp_uart_desc {
+	u8	pad[45];
+};
+
+struct pcdp {
+	u8	signature[4];	/* should be 'HCDP' */
+	u32	length;
+	u8	rev;		/* should be >=3 for pcdp, <3 for hcdp */
+	u8	sum;
+	u8	oem_id[6];
+	u64	oem_tableid;
+	u32	oem_rev;
+	u32	creator_id;
+	u32	creator_rev;
+	u32	num_type0;
+	struct hcdp_uart_desc uart[0];	/* num_type0 of these */
+	/* pcdp descriptors follow */
+}  __attribute__((packed));
+
+struct pcdp_device_desc {
+	u8	type;
+	u8	primary;
+	u16	length;
+	u16	index;
+	/* interconnect specific structure follows */
+	/* device specific structure follows that */
+}  __attribute__((packed));
+
+struct pcdp_interface_pci {
+	u8	type;		/* 1 == pci */
+	u8	reserved;
+	u16	length;
+	u8	segment;
+	u8	bus;
+	u8 	dev;
+	u8	fun;
+	u16	devid;
+	u16	vendid;
+	u32	acpi_interrupt;
+	u64	mmio_tra;
+	u64	ioport_tra;
+	u8	flags;
+	u8	translation;
+}  __attribute__((packed));
+
+struct pcdp_vga_device {
+	u8	num_eas_desc;
+	/* ACPI Extended Address Space Desc follows */
+}  __attribute__((packed));
+
+/* from pcdp_device_desc.primary */
+#define PCDP_PRIMARY_CONSOLE	0x01
+
+/* from pcdp_device_desc.type */
+#define PCDP_CONSOLE_INOUT	0x0
+#define PCDP_CONSOLE_DEBUG	0x1
+#define PCDP_CONSOLE_OUT	0x2
+#define PCDP_CONSOLE_IN		0x3
+#define PCDP_CONSOLE_TYPE_VGA	0x8
+
+#define PCDP_CONSOLE_VGA	(PCDP_CONSOLE_TYPE_VGA | PCDP_CONSOLE_OUT)
+
+/* from pcdp_interface_pci.type */
+#define PCDP_IF_PCI		1
+
+/* from pcdp_interface_pci.translation */
+#define PCDP_PCI_TRANS_IOPORT	0x02
+#define PCDP_PCI_TRANS_MMIO	0x01
+
+#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
+static void
+sn_scan_pcdp(void)
+{
+	u8 *bp;
+	struct pcdp *pcdp;
+	struct pcdp_device_desc device;
+	struct pcdp_interface_pci if_pci;
+	extern struct efi efi;
+
+	if (efi.hcdp == EFI_INVALID_TABLE_ADDR)
+		return;		/* no hcdp/pcdp table */
+
+	pcdp = __va(efi.hcdp);
+
+	if (pcdp->rev < 3)
+		return;		/* only support PCDP (rev >= 3) */
+
+	for (bp = (u8 *)&pcdp->uart[pcdp->num_type0];
+	     bp < (u8 *)pcdp + pcdp->length;
+	     bp += device.length) {
+		memcpy(&device, bp, sizeof(device));
+		if (! (device.primary & PCDP_PRIMARY_CONSOLE))
+			continue;	/* not primary console */
+
+		if (device.type != PCDP_CONSOLE_VGA)
+			continue;	/* not VGA descriptor */
+
+		memcpy(&if_pci, bp+sizeof(device), sizeof(if_pci));
+		if (if_pci.type != PCDP_IF_PCI)
+			continue;	/* not PCI interconnect */
+
+		if (if_pci.translation & PCDP_PCI_TRANS_IOPORT)
+			vga_console_iobase = if_pci.ioport_tra;
+
+		if (if_pci.translation & PCDP_PCI_TRANS_MMIO)
+			vga_console_membase =
+				if_pci.mmio_tra | __IA64_UNCACHED_OFFSET;
+
+		break; /* once we find the primary, we're done */
+	}
+}
+#endif
+
+static unsigned long sn2_rtc_initial;
+
+/**
+ * sn_setup - SN platform setup routine
+ * @cmdline_p: kernel command line
+ *
+ * Handles platform setup for SN machines.  This includes determining
+ * the RTC frequency (via a SAL call), initializing secondary CPUs, and
+ * setting up per-node data areas.  The console is also initialized here.
+ */
+void __init sn_setup(char **cmdline_p)
+{
+	long status, ticks_per_sec, drift;
+	u32 version = sn_sal_rev();
+	extern void sn_cpu_init(void);
+
+	sn2_rtc_initial = rtc_time();
+	ia64_sn_plat_set_error_handling_features();	// obsolete
+	ia64_sn_set_os_feature(OSF_MCA_SLV_TO_OS_INIT_SLV);
+	ia64_sn_set_os_feature(OSF_FEAT_LOG_SBES);
+	/*
+	 * Note: The calls to notify the PROM of ACPI and PCI Segment
+	 *	 support must be done prior to acpi_load_tables(), as
+	 *	 an ACPI capable PROM will rebuild the DSDT as result
+	 *	 of the call.
+	 */
+	ia64_sn_set_os_feature(OSF_PCISEGMENT_ENABLE);
+	ia64_sn_set_os_feature(OSF_ACPI_ENABLE);
+
+	/* Load the new DSDT and SSDT tables into the global table list. */
+	acpi_table_init();
+
+#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
+	/*
+	 * Handle SN vga console.
+	 *
+	 * SN systems do not have enough ACPI table information
+	 * being passed from prom to identify VGA adapters and the legacy
+	 * addresses to access them.  Until that is done, SN systems rely
+	 * on the PCDP table to identify the primary VGA console if one
+	 * exists.
+	 *
+	 * However, kernel PCDP support is optional, and even if it is built
+	 * into the kernel, it will not be used if the boot cmdline contains
+	 * console= directives.
+	 *
+	 * So, to work around this mess, we duplicate some of the PCDP code
+	 * here so that the primary VGA console (as defined by PCDP) will
+	 * work on SN systems even if a different console (e.g. serial) is
+	 * selected on the boot line (or CONFIG_EFI_PCDP is off).
+	 */
+
+	if (! vga_console_membase)
+		sn_scan_pcdp();
+
+	/*
+	 *	Setup legacy IO space.
+	 *	vga_console_iobase maps to PCI IO Space address 0 on the
+	 * 	bus containing the VGA console.
+	 */
+	if (vga_console_iobase) {
+		io_space[0].mmio_base =
+			(unsigned long) ioremap(vga_console_iobase, 0);
+		io_space[0].sparse = 0;
+	}
+
+	if (vga_console_membase) {
+		/* usable vga ... make tty0 the preferred default console */
+		if (!strstr(*cmdline_p, "console="))
+			add_preferred_console("tty", 0, NULL);
+	} else {
+		printk(KERN_DEBUG "SGI: Disabling VGA console\n");
+		if (!strstr(*cmdline_p, "console="))
+			add_preferred_console("ttySG", 0, NULL);
+#ifdef CONFIG_DUMMY_CONSOLE
+		conswitchp = &dummy_con;
+#else
+		conswitchp = NULL;
+#endif				/* CONFIG_DUMMY_CONSOLE */
+	}
+#endif				/* def(CONFIG_VT) && def(CONFIG_VGA_CONSOLE) */
+
+	MAX_DMA_ADDRESS = PAGE_OFFSET + MAX_PHYS_MEMORY;
+
+	/*
+	 * Build the tables for managing cnodes.
+	 */
+	build_cnode_tables();
+
+	status =
+	    ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec,
+			       &drift);
+	if (status != 0 || ticks_per_sec < 100000) {
+		printk(KERN_WARNING
+		       "unable to determine platform RTC clock frequency, guessing.\n");
+		/* PROM gives wrong value for clock freq. so guess */
+		sn_rtc_cycles_per_second = 1000000000000UL / 30000UL;
+	} else
+		sn_rtc_cycles_per_second = ticks_per_sec;
+
+	platform_intr_list[ACPI_INTERRUPT_CPEI] = IA64_CPE_VECTOR;
+
+	printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF);
+
+	/*
+	 * we set the default root device to /dev/hda
+	 * to make simulation easy
+	 */
+	ROOT_DEV = Root_HDA1;
+
+	/*
+	 * Create the PDAs and NODEPDAs for all the cpus.
+	 */
+	sn_init_pdas(cmdline_p);
+
+	ia64_mark_idle = &snidle;
+
+	/*
+	 * For the bootcpu, we do this here. All other cpus will make the
+	 * call as part of cpu_init in slave cpu initialization.
+	 */
+	sn_cpu_init();
+
+#ifdef CONFIG_SMP
+	init_smp_config();
+#endif
+	screen_info = sn_screen_info;
+
+	sn_timer_init();
+
+	/*
+	 * set pm_power_off to a SAL call to allow
+	 * sn machines to power off. The SAL call can be replaced
+	 * by an ACPI interface call when ACPI is fully implemented
+	 * for sn.
+	 */
+	pm_power_off = ia64_sn_power_down;
+	current->thread.flags |= IA64_THREAD_MIGRATION;
+}
+
+/**
+ * sn_init_pdas - setup node data areas
+ *
+ * One time setup for Node Data Area.  Called by sn_setup().
+ */
+static void __init sn_init_pdas(char **cmdline_p)
+{
+	cnodeid_t cnode;
+
+	/*
+	 * Allocate & initialize the nodepda for each node.
+	 */
+	for_each_online_node(cnode) {
+		nodepdaindr[cnode] =
+		    alloc_bootmem_node(NODE_DATA(cnode), sizeof(nodepda_t));
+		memset(nodepdaindr[cnode]->phys_cpuid, -1,
+		    sizeof(nodepdaindr[cnode]->phys_cpuid));
+		spin_lock_init(&nodepdaindr[cnode]->ptc_lock);
+	}
+
+	/*
+	 * Allocate & initialize nodepda for TIOs.  For now, put them on node 0.
+	 */
+	for (cnode = num_online_nodes(); cnode < num_cnodes; cnode++)
+		nodepdaindr[cnode] =
+		    alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t));
+
+	/*
+	 * Now copy the array of nodepda pointers to each nodepda.
+	 */
+	for (cnode = 0; cnode < num_cnodes; cnode++)
+		memcpy(nodepdaindr[cnode]->pernode_pdaindr, nodepdaindr,
+		       sizeof(nodepdaindr));
+
+	/*
+	 * Set up IO related platform-dependent nodepda fields.
+	 * The following routine actually sets up the hubinfo struct
+	 * in nodepda.
+	 */
+	for_each_online_node(cnode) {
+		bte_init_node(nodepdaindr[cnode], cnode);
+	}
+
+	/*
+	 * Initialize the per node hubdev.  This includes IO Nodes and
+	 * headless/memless nodes.
+	 */
+	for (cnode = 0; cnode < num_cnodes; cnode++) {
+		hubdev_init_node(nodepdaindr[cnode], cnode);
+	}
+}
+
+/**
+ * sn_cpu_init - initialize per-cpu data areas
+ * @cpuid: cpuid of the caller
+ *
+ * Called during cpu initialization on each cpu as it starts.
+ * Currently, initializes the per-cpu data area for SNIA.
+ * Also sets up a few fields in the nodepda.  Also known as
+ * platform_cpu_init() by the ia64 machvec code.
+ */
+void __cpuinit sn_cpu_init(void)
+{
+	int cpuid;
+	int cpuphyid;
+	int nasid;
+	int subnode;
+	int slice;
+	int cnode;
+	int i;
+	static int wars_have_been_checked, set_cpu0_number;
+
+	cpuid = smp_processor_id();
+	if (cpuid == 0 && IS_MEDUSA()) {
+		if (ia64_sn_is_fake_prom())
+			sn_prom_type = 2;
+		else
+			sn_prom_type = 1;
+		printk(KERN_INFO "Running on medusa with %s PROM\n",
+		       (sn_prom_type == 1) ? "real" : "fake");
+	}
+
+	memset(pda, 0, sizeof(pda));
+	if (ia64_sn_get_sn_info(0, &sn_hub_info->shub2,
+				&sn_hub_info->nasid_bitmask,
+				&sn_hub_info->nasid_shift,
+				&sn_system_size, &sn_sharing_domain_size,
+				&sn_partition_id, &sn_coherency_id,
+				&sn_region_size))
+		BUG();
+	sn_hub_info->as_shift = sn_hub_info->nasid_shift - 2;
+
+	/*
+	 * Don't check status. The SAL call is not supported on all PROMs
+	 * but a failure is harmless.
+	 * Architecturally, cpu_init is always called twice on cpu 0. We
+	 * should set cpu_number on cpu 0 once.
+	 */
+	if (cpuid == 0) {
+		if (!set_cpu0_number) {
+			(void) ia64_sn_set_cpu_number(cpuid);
+			set_cpu0_number = 1;
+		}
+	} else
+		(void) ia64_sn_set_cpu_number(cpuid);
+
+	/*
+	 * The boot cpu makes this call again after platform initialization is
+	 * complete.
+	 */
+	if (nodepdaindr[0] == NULL)
+		return;
+
+	for (i = 0; i < MAX_PROM_FEATURE_SETS; i++)
+		if (ia64_sn_get_prom_feature_set(i, &sn_prom_features[i]) != 0)
+			break;
+
+	cpuphyid = get_sapicid();
+
+	if (ia64_sn_get_sapic_info(cpuphyid, &nasid, &subnode, &slice))
+		BUG();
+
+	for (i=0; i < MAX_NUMNODES; i++) {
+		if (nodepdaindr[i]) {
+			nodepdaindr[i]->phys_cpuid[cpuid].nasid = nasid;
+			nodepdaindr[i]->phys_cpuid[cpuid].slice = slice;
+			nodepdaindr[i]->phys_cpuid[cpuid].subnode = subnode;
+		}
+	}
+
+	cnode = nasid_to_cnodeid(nasid);
+
+	sn_nodepda = nodepdaindr[cnode];
+
+	pda->led_address =
+	    (typeof(pda->led_address)) (LED0 + (slice << LED_CPU_SHIFT));
+	pda->led_state = LED_ALWAYS_SET;
+	pda->hb_count = HZ / 2;
+	pda->hb_state = 0;
+	pda->idle_flag = 0;
+
+	if (cpuid != 0) {
+		/* copy cpu 0's sn_cnodeid_to_nasid table to this cpu's */
+		memcpy(sn_cnodeid_to_nasid,
+		       (&per_cpu(__sn_cnodeid_to_nasid, 0)),
+		       sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid)));
+	}
+
+	/*
+	 * Check for WARs.
+	 * Only needs to be done once, on BSP.
+	 * Has to be done after loop above, because it uses this cpu's
+	 * sn_cnodeid_to_nasid table which was just initialized if this
+	 * isn't cpu 0.
+	 * Has to be done before assignment below.
+	 */
+	if (!wars_have_been_checked) {
+		sn_check_for_wars();
+		wars_have_been_checked = 1;
+	}
+	sn_hub_info->shub_1_1_found = shub_1_1_found;
+
+	/*
+	 * Set up addresses of PIO/MEM write status registers.
+	 */
+	{
+		u64 pio1[] = {SH1_PIO_WRITE_STATUS_0, 0, SH1_PIO_WRITE_STATUS_1, 0};
+		u64 pio2[] = {SH2_PIO_WRITE_STATUS_0, SH2_PIO_WRITE_STATUS_2,
+			SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3};
+		u64 *pio;
+		pio = is_shub1() ? pio1 : pio2;
+		pda->pio_write_status_addr =
+		   (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, pio[slice]);
+		pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0;
+	}
+
+	/*
+	 * WAR addresses for SHUB 1.x.
+	 */
+	if (local_node_data->active_cpu_count++ == 0 && is_shub1()) {
+		int buddy_nasid;
+		buddy_nasid =
+		    cnodeid_to_nasid(numa_node_id() ==
+				     num_online_nodes() - 1 ? 0 : numa_node_id() + 1);
+		pda->pio_shub_war_cam_addr =
+		    (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid,
+							      SH1_PI_CAM_CONTROL);
+	}
+}
+
+/*
+ * Build tables for converting between NASIDs and cnodes.
+ */
+static inline int __init board_needs_cnode(int type)
+{
+	return (type == KLTYPE_SNIA || type == KLTYPE_TIO);
+}
+
+void __init build_cnode_tables(void)
+{
+	int nasid;
+	int node;
+	lboard_t *brd;
+
+	memset(physical_node_map, -1, sizeof(physical_node_map));
+	memset(sn_cnodeid_to_nasid, -1,
+			sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid)));
+
+	/*
+	 * First populate the tables with C/M bricks. This ensures that
+	 * cnode == node for all C & M bricks.
+	 */
+	for_each_online_node(node) {
+		nasid = pxm_to_nasid(node_to_pxm(node));
+		sn_cnodeid_to_nasid[node] = nasid;
+		physical_node_map[nasid] = node;
+	}
+
+	/*
+	 * num_cnodes is total number of C/M/TIO bricks. Because of the 256 node
+	 * limit on the number of nodes, we can't use the generic node numbers 
+	 * for this. Note that num_cnodes is incremented below as TIOs or
+	 * headless/memoryless nodes are discovered.
+	 */
+	num_cnodes = num_online_nodes();
+
+	/* fakeprom does not support klgraph */
+	if (IS_RUNNING_ON_FAKE_PROM())
+		return;
+
+	/* Find TIOs & headless/memoryless nodes and add them to the tables */
+	for_each_online_node(node) {
+		kl_config_hdr_t *klgraph_header;
+		nasid = cnodeid_to_nasid(node);
+		klgraph_header = ia64_sn_get_klconfig_addr(nasid);
+		BUG_ON(klgraph_header == NULL);
+		brd = NODE_OFFSET_TO_LBOARD(nasid, klgraph_header->ch_board_info);
+		while (brd) {
+			if (board_needs_cnode(brd->brd_type) && physical_node_map[brd->brd_nasid] < 0) {
+				sn_cnodeid_to_nasid[num_cnodes] = brd->brd_nasid;
+				physical_node_map[brd->brd_nasid] = num_cnodes++;
+			}
+			brd = find_lboard_next(brd);
+		}
+	}
+}
+
+int
+nasid_slice_to_cpuid(int nasid, int slice)
+{
+	long cpu;
+
+	for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+		if (cpuid_to_nasid(cpu) == nasid &&
+					cpuid_to_slice(cpu) == slice)
+			return cpu;
+
+	return -1;
+}
+
+int sn_prom_feature_available(int id)
+{
+	if (id >= BITS_PER_LONG * MAX_PROM_FEATURE_SETS)
+		return 0;
+	return test_bit(id, sn_prom_features);
+}
+
+void
+sn_kernel_launch_event(void)
+{
+	/* ignore status until we understand possible failure, if any*/
+	if (ia64_sn_kernel_launch_event())
+		printk(KERN_ERR "KEXEC is not supported in this PROM, Please update the PROM.\n");
+}
+EXPORT_SYMBOL(sn_prom_feature_available);
+
diff --git a/arch/ia64/sn/kernel/sn2/Makefile b/arch/ia64/sn/kernel/sn2/Makefile
new file mode 100644
index 00000000..3d09108d
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/Makefile
@@ -0,0 +1,15 @@
+# arch/ia64/sn/kernel/sn2/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1999,2001-2002 Silicon Graphics, Inc. All rights reserved.
+#
+# sn2 specific kernel files
+#
+
+ccflags-y := -Iarch/ia64/sn/include
+
+obj-y += cache.o io.o ptc_deadlock.o sn2_smp.o sn_proc_fs.o \
+	 prominfo_proc.o timer.o timer_interrupt.o sn_hwperf.o
diff --git a/arch/ia64/sn/kernel/sn2/cache.c b/arch/ia64/sn/kernel/sn2/cache.c
new file mode 100644
index 00000000..2862cb33
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/cache.c
@@ -0,0 +1,41 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ * 
+ * Copyright (C) 2001-2003, 2006 Silicon Graphics, Inc. All rights reserved.
+ *
+ */
+#include <linux/module.h>
+#include <asm/pgalloc.h>
+#include <asm/sn/arch.h>
+
+/**
+ * sn_flush_all_caches - flush a range of address from all caches (incl. L4)
+ * @flush_addr: identity mapped region 7 address to start flushing
+ * @bytes: number of bytes to flush
+ *
+ * Flush a range of addresses from all caches including L4. 
+ * All addresses fully or partially contained within 
+ * @flush_addr to @flush_addr + @bytes are flushed
+ * from all caches.
+ */
+void
+sn_flush_all_caches(long flush_addr, long bytes)
+{
+	unsigned long addr = flush_addr;
+
+	/* SHub1 requires a cached address */
+	if (is_shub1() && (addr & RGN_BITS) == RGN_BASE(RGN_UNCACHED))
+		addr = (addr - RGN_BASE(RGN_UNCACHED)) + RGN_BASE(RGN_KERNEL);
+
+	flush_icache_range(addr, addr + bytes);
+	/*
+	 * The last call may have returned before the caches
+	 * were actually flushed, so we call it again to make
+	 * sure.
+	 */
+	flush_icache_range(addr, addr + bytes);
+	mb();
+}
+EXPORT_SYMBOL(sn_flush_all_caches);
diff --git a/arch/ia64/sn/kernel/sn2/io.c b/arch/ia64/sn/kernel/sn2/io.c
new file mode 100644
index 00000000..a12c0586
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/io.c
@@ -0,0 +1,101 @@
+/* 
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2003 Silicon Graphics, Inc. All rights reserved.
+ *
+ * The generic kernel requires function pointers to these routines, so
+ * we wrap the inlines from asm/ia64/sn/sn2/io.h here.
+ */
+
+#include <asm/sn/io.h>
+
+#ifdef CONFIG_IA64_GENERIC
+
+#undef __sn_inb
+#undef __sn_inw
+#undef __sn_inl
+#undef __sn_outb
+#undef __sn_outw
+#undef __sn_outl
+#undef __sn_readb
+#undef __sn_readw
+#undef __sn_readl
+#undef __sn_readq
+#undef __sn_readb_relaxed
+#undef __sn_readw_relaxed
+#undef __sn_readl_relaxed
+#undef __sn_readq_relaxed
+
+unsigned int __sn_inb(unsigned long port)
+{
+	return ___sn_inb(port);
+}
+
+unsigned int __sn_inw(unsigned long port)
+{
+	return ___sn_inw(port);
+}
+
+unsigned int __sn_inl(unsigned long port)
+{
+	return ___sn_inl(port);
+}
+
+void __sn_outb(unsigned char val, unsigned long port)
+{
+	___sn_outb(val, port);
+}
+
+void __sn_outw(unsigned short val, unsigned long port)
+{
+	___sn_outw(val, port);
+}
+
+void __sn_outl(unsigned int val, unsigned long port)
+{
+	___sn_outl(val, port);
+}
+
+unsigned char __sn_readb(void __iomem *addr)
+{
+	return ___sn_readb(addr);
+}
+
+unsigned short __sn_readw(void __iomem *addr)
+{
+	return ___sn_readw(addr);
+}
+
+unsigned int __sn_readl(void __iomem *addr)
+{
+	return ___sn_readl(addr);
+}
+
+unsigned long __sn_readq(void __iomem *addr)
+{
+	return ___sn_readq(addr);
+}
+
+unsigned char __sn_readb_relaxed(void __iomem *addr)
+{
+	return ___sn_readb_relaxed(addr);
+}
+
+unsigned short __sn_readw_relaxed(void __iomem *addr)
+{
+	return ___sn_readw_relaxed(addr);
+}
+
+unsigned int __sn_readl_relaxed(void __iomem *addr)
+{
+	return ___sn_readl_relaxed(addr);
+}
+
+unsigned long __sn_readq_relaxed(void __iomem *addr)
+{
+	return ___sn_readq_relaxed(addr);
+}
+
+#endif
diff --git a/arch/ia64/sn/kernel/sn2/prominfo_proc.c b/arch/ia64/sn/kernel/sn2/prominfo_proc.c
new file mode 100644
index 00000000..e6332881
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/prominfo_proc.c
@@ -0,0 +1,274 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999,2001-2004, 2006 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * Module to export the system's Firmware Interface Tables, including
+ * PROM revision numbers and banners, in /proc
+ */
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/nodemask.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/addrs.h>
+
+MODULE_DESCRIPTION("PROM version reporting for /proc");
+MODULE_AUTHOR("Chad Talbott");
+MODULE_LICENSE("GPL");
+
+/* Standard Intel FIT entry types */
+#define FIT_ENTRY_FIT_HEADER	0x00	/* FIT header entry */
+#define FIT_ENTRY_PAL_B		0x01	/* PAL_B entry */
+/* Entries 0x02 through 0x0D reserved by Intel */
+#define FIT_ENTRY_PAL_A_PROC	0x0E	/* Processor-specific PAL_A entry */
+#define FIT_ENTRY_PAL_A		0x0F	/* PAL_A entry, same as... */
+#define FIT_ENTRY_PAL_A_GEN	0x0F	/* ...Generic PAL_A entry */
+#define FIT_ENTRY_UNUSED	0x7F	/* Unused (reserved by Intel?) */
+/* OEM-defined entries range from 0x10 to 0x7E. */
+#define FIT_ENTRY_SAL_A		0x10	/* SAL_A entry */
+#define FIT_ENTRY_SAL_B		0x11	/* SAL_B entry */
+#define FIT_ENTRY_SALRUNTIME	0x12	/* SAL runtime entry */
+#define FIT_ENTRY_EFI		0x1F	/* EFI entry */
+#define FIT_ENTRY_FPSWA		0x20	/* embedded fpswa entry */
+#define FIT_ENTRY_VMLINUX	0x21	/* embedded vmlinux entry */
+
+#define FIT_MAJOR_SHIFT	(32 + 8)
+#define FIT_MAJOR_MASK	((1 << 8) - 1)
+#define FIT_MINOR_SHIFT	32
+#define FIT_MINOR_MASK	((1 << 8) - 1)
+
+#define FIT_MAJOR(q)	\
+	((unsigned) ((q) >> FIT_MAJOR_SHIFT) & FIT_MAJOR_MASK)
+#define FIT_MINOR(q)	\
+	((unsigned) ((q) >> FIT_MINOR_SHIFT) & FIT_MINOR_MASK)
+
+#define FIT_TYPE_SHIFT	(32 + 16)
+#define FIT_TYPE_MASK	((1 << 7) - 1)
+
+#define FIT_TYPE(q)	\
+	((unsigned) ((q) >> FIT_TYPE_SHIFT) & FIT_TYPE_MASK)
+
+struct fit_type_map_t {
+	unsigned char type;
+	const char *name;
+};
+
+static const struct fit_type_map_t fit_entry_types[] = {
+	{FIT_ENTRY_FIT_HEADER, "FIT Header"},
+	{FIT_ENTRY_PAL_A_GEN, "Generic PAL_A"},
+	{FIT_ENTRY_PAL_A_PROC, "Processor-specific PAL_A"},
+	{FIT_ENTRY_PAL_A, "PAL_A"},
+	{FIT_ENTRY_PAL_B, "PAL_B"},
+	{FIT_ENTRY_SAL_A, "SAL_A"},
+	{FIT_ENTRY_SAL_B, "SAL_B"},
+	{FIT_ENTRY_SALRUNTIME, "SAL runtime"},
+	{FIT_ENTRY_EFI, "EFI"},
+	{FIT_ENTRY_VMLINUX, "Embedded Linux"},
+	{FIT_ENTRY_FPSWA, "Embedded FPSWA"},
+	{FIT_ENTRY_UNUSED, "Unused"},
+	{0xff, "Error"},
+};
+
+static const char *fit_type_name(unsigned char type)
+{
+	struct fit_type_map_t const *mapp;
+
+	for (mapp = fit_entry_types; mapp->type != 0xff; mapp++)
+		if (type == mapp->type)
+			return mapp->name;
+
+	if ((type > FIT_ENTRY_PAL_A) && (type < FIT_ENTRY_UNUSED))
+		return "OEM type";
+	if ((type > FIT_ENTRY_PAL_B) && (type < FIT_ENTRY_PAL_A))
+		return "Reserved";
+
+	return "Unknown type";
+}
+
+static int
+get_fit_entry(unsigned long nasid, int index, unsigned long *fentry,
+	      char *banner, int banlen)
+{
+	return ia64_sn_get_fit_compt(nasid, index, fentry, banner, banlen);
+}
+
+
+/*
+ * These two routines display the FIT table for each node.
+ */
+static int dump_fit_entry(char *page, unsigned long *fentry)
+{
+	unsigned type;
+
+	type = FIT_TYPE(fentry[1]);
+	return sprintf(page, "%02x %-25s %x.%02x %016lx %u\n",
+		       type,
+		       fit_type_name(type),
+		       FIT_MAJOR(fentry[1]), FIT_MINOR(fentry[1]),
+		       fentry[0],
+		       /* mult by sixteen to get size in bytes */
+		       (unsigned)(fentry[1] & 0xffffff) * 16);
+}
+
+
+/*
+ * We assume that the fit table will be small enough that we can print
+ * the whole thing into one page.  (This is true for our default 16kB
+ * pages -- each entry is about 60 chars wide when printed.)  I read
+ * somewhere that the maximum size of the FIT is 128 entries, so we're
+ * OK except for 4kB pages (and no one is going to do that on SN
+ * anyway).
+ */
+static int
+dump_fit(char *page, unsigned long nasid)
+{
+	unsigned long fentry[2];
+	int index;
+	char *p;
+
+	p = page;
+	for (index=0;;index++) {
+		BUG_ON(index * 60 > PAGE_SIZE);
+		if (get_fit_entry(nasid, index, fentry, NULL, 0))
+			break;
+		p += dump_fit_entry(p, fentry);
+	}
+
+	return p - page;
+}
+
+static int
+dump_version(char *page, unsigned long nasid)
+{
+	unsigned long fentry[2];
+	char banner[128];
+	int index;
+	int len;
+
+	for (index = 0; ; index++) {
+		if (get_fit_entry(nasid, index, fentry, banner,
+				  sizeof(banner)))
+			return 0;
+		if (FIT_TYPE(fentry[1]) == FIT_ENTRY_SAL_A)
+			break;
+	}
+
+	len = sprintf(page, "%x.%02x\n", FIT_MAJOR(fentry[1]),
+		      FIT_MINOR(fentry[1]));
+	page += len;
+
+	if (banner[0])
+		len += snprintf(page, PAGE_SIZE-len, "%s\n", banner);
+
+	return len;
+}
+
+/* same as in proc_misc.c */
+static int
+proc_calc_metrics(char *page, char **start, off_t off, int count, int *eof,
+		  int len)
+{
+	if (len <= off + count)
+		*eof = 1;
+	*start = page + off;
+	len -= off;
+	if (len > count)
+		len = count;
+	if (len < 0)
+		len = 0;
+	return len;
+}
+
+static int
+read_version_entry(char *page, char **start, off_t off, int count, int *eof,
+		   void *data)
+{
+	int len;
+
+	/* data holds the NASID of the node */
+	len = dump_version(page, (unsigned long)data);
+	len = proc_calc_metrics(page, start, off, count, eof, len);
+	return len;
+}
+
+static int
+read_fit_entry(char *page, char **start, off_t off, int count, int *eof,
+	       void *data)
+{
+	int len;
+
+	/* data holds the NASID of the node */
+	len = dump_fit(page, (unsigned long)data);
+	len = proc_calc_metrics(page, start, off, count, eof, len);
+
+	return len;
+}
+
+/* module entry points */
+int __init prominfo_init(void);
+void __exit prominfo_exit(void);
+
+module_init(prominfo_init);
+module_exit(prominfo_exit);
+
+static struct proc_dir_entry **proc_entries;
+static struct proc_dir_entry *sgi_prominfo_entry;
+
+#define NODE_NAME_LEN 11
+
+int __init prominfo_init(void)
+{
+	struct proc_dir_entry **entp;
+	cnodeid_t cnodeid;
+	unsigned long nasid;
+	int size;
+	char name[NODE_NAME_LEN];
+
+	if (!ia64_platform_is("sn2"))
+		return 0;
+
+	size = num_online_nodes() * sizeof(struct proc_dir_entry *);
+	proc_entries = kzalloc(size, GFP_KERNEL);
+	if (!proc_entries)
+		return -ENOMEM;
+
+	sgi_prominfo_entry = proc_mkdir("sgi_prominfo", NULL);
+
+	entp = proc_entries;
+	for_each_online_node(cnodeid) {
+		sprintf(name, "node%d", cnodeid);
+		*entp = proc_mkdir(name, sgi_prominfo_entry);
+		nasid = cnodeid_to_nasid(cnodeid);
+		create_proc_read_entry("fit", 0, *entp, read_fit_entry,
+					   (void *)nasid);
+		create_proc_read_entry("version", 0, *entp,
+					   read_version_entry, (void *)nasid);
+		entp++;
+	}
+
+	return 0;
+}
+
+void __exit prominfo_exit(void)
+{
+	struct proc_dir_entry **entp;
+	unsigned int cnodeid;
+	char name[NODE_NAME_LEN];
+
+	entp = proc_entries;
+	for_each_online_node(cnodeid) {
+		remove_proc_entry("fit", *entp);
+		remove_proc_entry("version", *entp);
+		sprintf(name, "node%d", cnodeid);
+		remove_proc_entry(name, sgi_prominfo_entry);
+		entp++;
+	}
+	remove_proc_entry("sgi_prominfo", NULL);
+	kfree(proc_entries);
+}
diff --git a/arch/ia64/sn/kernel/sn2/ptc_deadlock.S b/arch/ia64/sn/kernel/sn2/ptc_deadlock.S
new file mode 100644
index 00000000..bebbcc4f
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/ptc_deadlock.S
@@ -0,0 +1,92 @@
+/* 
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <asm/types.h>
+#include <asm/sn/shub_mmr.h>
+
+#define DEADLOCKBIT	SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_SHFT
+#define WRITECOUNTMASK	SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK
+#define ALIAS_OFFSET	8
+
+
+	.global	sn2_ptc_deadlock_recovery_core
+	.proc  	sn2_ptc_deadlock_recovery_core
+
+sn2_ptc_deadlock_recovery_core:
+	.regstk 6,0,0,0
+
+	ptc0  	 = in0
+	data0 	 = in1
+	ptc1  	 = in2
+	data1 	 = in3
+	piowc 	 = in4
+	zeroval  = in5
+	piowcphy = r30
+	psrsave  = r2
+	scr1	 = r16
+	scr2	 = r17
+	mask	 = r18
+
+
+	extr.u	piowcphy=piowc,0,61;;	// Convert piowc to uncached physical address
+	dep	piowcphy=-1,piowcphy,63,1
+	movl	mask=WRITECOUNTMASK
+	mov	r8=r0
+
+1:
+	cmp.ne  p8,p9=r0,ptc1		// Test for shub type (ptc1 non-null on shub1)
+					// p8 = 1 if shub1, p9 = 1 if shub2
+
+	add	scr2=ALIAS_OFFSET,piowc	// Address of WRITE_STATUS alias register 
+	mov	scr1=7;;		// Clear DEADLOCK, WRITE_ERROR, MULTI_WRITE_ERROR
+(p8)	st8.rel	[scr2]=scr1;;
+(p9)	ld8.acq	scr1=[scr2];;
+
+5:	ld8.acq	scr1=[piowc];;		// Wait for PIOs to complete.
+	hint	@pause
+	and	scr2=scr1,mask;;	// mask of writecount bits
+	cmp.ne	p6,p0=zeroval,scr2
+(p6)	br.cond.sptk 5b
+	
+
+
+	////////////// BEGIN PHYSICAL MODE ////////////////////
+	mov psrsave=psr			// Disable IC (no PMIs)
+	rsm psr.i | psr.dt | psr.ic;;
+	srlz.i;;
+
+	st8.rel [ptc0]=data0		// Write PTC0 & wait for completion.
+
+5:	ld8.acq	scr1=[piowcphy];;	// Wait for PIOs to complete.
+	hint	@pause
+	and	scr2=scr1,mask;;	// mask of writecount bits
+	cmp.ne	p6,p0=zeroval,scr2
+(p6)	br.cond.sptk 5b;;
+
+	tbit.nz	p8,p7=scr1,DEADLOCKBIT;;// Test for DEADLOCK
+(p7)	cmp.ne p7,p0=r0,ptc1;;		// Test for non-null ptc1
+	
+(p7)	st8.rel [ptc1]=data1;;		// Now write PTC1.
+
+5:	ld8.acq	scr1=[piowcphy];;	// Wait for PIOs to complete.
+	hint	@pause
+	and	scr2=scr1,mask;;	// mask of writecount bits
+	cmp.ne	p6,p0=zeroval,scr2
+(p6)	br.cond.sptk 5b
+	
+	tbit.nz	p8,p0=scr1,DEADLOCKBIT;;// Test for DEADLOCK
+
+	mov psr.l=psrsave;;		// Reenable IC
+	srlz.i;;
+	////////////// END   PHYSICAL MODE ////////////////////
+
+(p8)	add	r8=1,r8
+(p8)	br.cond.spnt 1b;;		// Repeat if DEADLOCK occurred.
+
+	br.ret.sptk	rp
+	.endp sn2_ptc_deadlock_recovery_core
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
new file mode 100644
index 00000000..e884ba4e
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -0,0 +1,573 @@
+/*
+ * SN2 Platform specific SMP Support
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2006 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/threads.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/nodemask.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <asm/processor.h>
+#include <asm/irq.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+#include <asm/delay.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/tlb.h>
+#include <asm/numa.h>
+#include <asm/hw_irq.h>
+#include <asm/current.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/shub_mmr.h>
+#include <asm/sn/nodepda.h>
+#include <asm/sn/rw_mmr.h>
+#include <asm/sn/sn_feature_sets.h>
+
+DEFINE_PER_CPU(struct ptc_stats, ptcstats);
+DECLARE_PER_CPU(struct ptc_stats, ptcstats);
+
+static  __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock);
+
+/* 0 = old algorithm (no IPI flushes), 1 = ipi deadlock flush, 2 = ipi instead of SHUB ptc, >2 = always ipi */
+static int sn2_flush_opt = 0;
+
+extern unsigned long
+sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
+			       volatile unsigned long *, unsigned long,
+			       volatile unsigned long *, unsigned long);
+void
+sn2_ptc_deadlock_recovery(short *, short, short, int,
+			  volatile unsigned long *, unsigned long,
+			  volatile unsigned long *, unsigned long);
+
+/*
+ * Note: some is the following is captured here to make degugging easier
+ * (the macros make more sense if you see the debug patch - not posted)
+ */
+#define sn2_ptctest	0
+#define local_node_uses_ptc_ga(sh1)	((sh1) ? 1 : 0)
+#define max_active_pio(sh1)		((sh1) ? 32 : 7)
+#define reset_max_active_on_deadlock()	1
+#define PTC_LOCK(sh1)			((sh1) ? &sn2_global_ptc_lock : &sn_nodepda->ptc_lock)
+
+struct ptc_stats {
+	unsigned long ptc_l;
+	unsigned long change_rid;
+	unsigned long shub_ptc_flushes;
+	unsigned long nodes_flushed;
+	unsigned long deadlocks;
+	unsigned long deadlocks2;
+	unsigned long lock_itc_clocks;
+	unsigned long shub_itc_clocks;
+	unsigned long shub_itc_clocks_max;
+	unsigned long shub_ptc_flushes_not_my_mm;
+	unsigned long shub_ipi_flushes;
+	unsigned long shub_ipi_flushes_itc_clocks;
+};
+
+#define sn2_ptctest	0
+
+static inline unsigned long wait_piowc(void)
+{
+	volatile unsigned long *piows;
+	unsigned long zeroval, ws;
+
+	piows = pda->pio_write_status_addr;
+	zeroval = pda->pio_write_status_val;
+	do {
+		cpu_relax();
+	} while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != zeroval);
+	return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0;
+}
+
+/**
+ * sn_migrate - SN-specific task migration actions
+ * @task: Task being migrated to new CPU
+ *
+ * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order.
+ * Context switching user threads which have memory-mapped MMIO may cause
+ * PIOs to issue from separate CPUs, thus the PIO writes must be drained
+ * from the previous CPU's Shub before execution resumes on the new CPU.
+ */
+void sn_migrate(struct task_struct *task)
+{
+	pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu);
+	volatile unsigned long *adr = last_pda->pio_write_status_addr;
+	unsigned long val = last_pda->pio_write_status_val;
+
+	/* Drain PIO writes from old CPU's Shub */
+	while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK)
+			!= val))
+		cpu_relax();
+}
+
+void sn_tlb_migrate_finish(struct mm_struct *mm)
+{
+	/* flush_tlb_mm is inefficient if more than 1 users of mm */
+	if (mm == current->mm && mm && atomic_read(&mm->mm_users) == 1)
+		flush_tlb_mm(mm);
+}
+
+static void
+sn2_ipi_flush_all_tlb(struct mm_struct *mm)
+{
+	unsigned long itc;
+
+	itc = ia64_get_itc();
+	smp_flush_tlb_cpumask(*mm_cpumask(mm));
+	itc = ia64_get_itc() - itc;
+	__get_cpu_var(ptcstats).shub_ipi_flushes_itc_clocks += itc;
+	__get_cpu_var(ptcstats).shub_ipi_flushes++;
+}
+
+/**
+ * sn2_global_tlb_purge - globally purge translation cache of virtual address range
+ * @mm: mm_struct containing virtual address range
+ * @start: start of virtual address range
+ * @end: end of virtual address range
+ * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc))
+ *
+ * Purges the translation caches of all processors of the given virtual address
+ * range.
+ *
+ * Note:
+ * 	- cpu_vm_mask is a bit mask that indicates which cpus have loaded the context.
+ * 	- cpu_vm_mask is converted into a nodemask of the nodes containing the
+ * 	  cpus in cpu_vm_mask.
+ *	- if only one bit is set in cpu_vm_mask & it is the current cpu & the
+ *	  process is purging its own virtual address range, then only the
+ *	  local TLB needs to be flushed. This flushing can be done using
+ *	  ptc.l. This is the common case & avoids the global spinlock.
+ *	- if multiple cpus have loaded the context, then flushing has to be
+ *	  done with ptc.g/MMRs under protection of the global ptc_lock.
+ */
+
+void
+sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
+		     unsigned long end, unsigned long nbits)
+{
+	int i, ibegin, shub1, cnode, mynasid, cpu, lcpu = 0, nasid;
+	int mymm = (mm == current->active_mm && mm == current->mm);
+	int use_cpu_ptcga;
+	volatile unsigned long *ptc0, *ptc1;
+	unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0;
+	short nasids[MAX_NUMNODES], nix;
+	nodemask_t nodes_flushed;
+	int active, max_active, deadlock, flush_opt = sn2_flush_opt;
+
+	if (flush_opt > 2) {
+		sn2_ipi_flush_all_tlb(mm);
+		return;
+	}
+
+	nodes_clear(nodes_flushed);
+	i = 0;
+
+	for_each_cpu(cpu, mm_cpumask(mm)) {
+		cnode = cpu_to_node(cpu);
+		node_set(cnode, nodes_flushed);
+		lcpu = cpu;
+		i++;
+	}
+
+	if (i == 0)
+		return;
+
+	preempt_disable();
+
+	if (likely(i == 1 && lcpu == smp_processor_id() && mymm)) {
+		do {
+			ia64_ptcl(start, nbits << 2);
+			start += (1UL << nbits);
+		} while (start < end);
+		ia64_srlz_i();
+		__get_cpu_var(ptcstats).ptc_l++;
+		preempt_enable();
+		return;
+	}
+
+	if (atomic_read(&mm->mm_users) == 1 && mymm) {
+		flush_tlb_mm(mm);
+		__get_cpu_var(ptcstats).change_rid++;
+		preempt_enable();
+		return;
+	}
+
+	if (flush_opt == 2) {
+		sn2_ipi_flush_all_tlb(mm);
+		preempt_enable();
+		return;
+	}
+
+	itc = ia64_get_itc();
+	nix = 0;
+	for_each_node_mask(cnode, nodes_flushed)
+		nasids[nix++] = cnodeid_to_nasid(cnode);
+
+	rr_value = (mm->context << 3) | REGION_NUMBER(start);
+
+	shub1 = is_shub1();
+	if (shub1) {
+		data0 = (1UL << SH1_PTC_0_A_SHFT) |
+		    	(nbits << SH1_PTC_0_PS_SHFT) |
+			(rr_value << SH1_PTC_0_RID_SHFT) |
+		    	(1UL << SH1_PTC_0_START_SHFT);
+		ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0);
+		ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1);
+	} else {
+		data0 = (1UL << SH2_PTC_A_SHFT) |
+			(nbits << SH2_PTC_PS_SHFT) |
+		    	(1UL << SH2_PTC_START_SHFT);
+		ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + 
+			(rr_value << SH2_PTC_RID_SHFT));
+		ptc1 = NULL;
+	}
+	
+
+	mynasid = get_nasid();
+	use_cpu_ptcga = local_node_uses_ptc_ga(shub1);
+	max_active = max_active_pio(shub1);
+
+	itc = ia64_get_itc();
+	spin_lock_irqsave(PTC_LOCK(shub1), flags);
+	itc2 = ia64_get_itc();
+
+	__get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc;
+	__get_cpu_var(ptcstats).shub_ptc_flushes++;
+	__get_cpu_var(ptcstats).nodes_flushed += nix;
+	if (!mymm)
+		 __get_cpu_var(ptcstats).shub_ptc_flushes_not_my_mm++;
+
+	if (use_cpu_ptcga && !mymm) {
+		old_rr = ia64_get_rr(start);
+		ia64_set_rr(start, (old_rr & 0xff) | (rr_value << 8));
+		ia64_srlz_d();
+	}
+
+	wait_piowc();
+	do {
+		if (shub1)
+			data1 = start | (1UL << SH1_PTC_1_START_SHFT);
+		else
+			data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK);
+		deadlock = 0;
+		active = 0;
+		for (ibegin = 0, i = 0; i < nix; i++) {
+			nasid = nasids[i];
+			if (use_cpu_ptcga && unlikely(nasid == mynasid)) {
+				ia64_ptcga(start, nbits << 2);
+				ia64_srlz_i();
+			} else {
+				ptc0 = CHANGE_NASID(nasid, ptc0);
+				if (ptc1)
+					ptc1 = CHANGE_NASID(nasid, ptc1);
+				pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, data1);
+				active++;
+			}
+			if (active >= max_active || i == (nix - 1)) {
+				if ((deadlock = wait_piowc())) {
+					if (flush_opt == 1)
+						goto done;
+					sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1);
+					if (reset_max_active_on_deadlock())
+						max_active = 1;
+				}
+				active = 0;
+				ibegin = i + 1;
+			}
+		}
+		start += (1UL << nbits);
+	} while (start < end);
+
+done:
+	itc2 = ia64_get_itc() - itc2;
+	__get_cpu_var(ptcstats).shub_itc_clocks += itc2;
+	if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max)
+		__get_cpu_var(ptcstats).shub_itc_clocks_max = itc2;
+
+	if (old_rr) {
+		ia64_set_rr(start, old_rr);
+		ia64_srlz_d();
+	}
+
+	spin_unlock_irqrestore(PTC_LOCK(shub1), flags);
+
+	if (flush_opt == 1 && deadlock) {
+		__get_cpu_var(ptcstats).deadlocks++;
+		sn2_ipi_flush_all_tlb(mm);
+	}
+
+	preempt_enable();
+}
+
+/*
+ * sn2_ptc_deadlock_recovery
+ *
+ * Recover from PTC deadlocks conditions. Recovery requires stepping thru each 
+ * TLB flush transaction.  The recovery sequence is somewhat tricky & is
+ * coded in assembly language.
+ */
+
+void
+sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid,
+			  volatile unsigned long *ptc0, unsigned long data0,
+			  volatile unsigned long *ptc1, unsigned long data1)
+{
+	short nasid, i;
+	unsigned long *piows, zeroval, n;
+
+	__get_cpu_var(ptcstats).deadlocks++;
+
+	piows = (unsigned long *) pda->pio_write_status_addr;
+	zeroval = pda->pio_write_status_val;
+
+
+	for (i=ib; i <= ie; i++) {
+		nasid = nasids[i];
+		if (local_node_uses_ptc_ga(is_shub1()) && nasid == mynasid)
+			continue;
+		ptc0 = CHANGE_NASID(nasid, ptc0);
+		if (ptc1)
+			ptc1 = CHANGE_NASID(nasid, ptc1);
+
+		n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval);
+		__get_cpu_var(ptcstats).deadlocks2 += n;
+	}
+
+}
+
+/**
+ * sn_send_IPI_phys - send an IPI to a Nasid and slice
+ * @nasid: nasid to receive the interrupt (may be outside partition)
+ * @physid: physical cpuid to receive the interrupt.
+ * @vector: command to send
+ * @delivery_mode: delivery mechanism
+ *
+ * Sends an IPI (interprocessor interrupt) to the processor specified by
+ * @physid
+ *
+ * @delivery_mode can be one of the following
+ *
+ * %IA64_IPI_DM_INT - pend an interrupt
+ * %IA64_IPI_DM_PMI - pend a PMI
+ * %IA64_IPI_DM_NMI - pend an NMI
+ * %IA64_IPI_DM_INIT - pend an INIT interrupt
+ */
+void sn_send_IPI_phys(int nasid, long physid, int vector, int delivery_mode)
+{
+	long val;
+	unsigned long flags = 0;
+	volatile long *p;
+
+	p = (long *)GLOBAL_MMR_PHYS_ADDR(nasid, SH_IPI_INT);
+	val = (1UL << SH_IPI_INT_SEND_SHFT) |
+	    (physid << SH_IPI_INT_PID_SHFT) |
+	    ((long)delivery_mode << SH_IPI_INT_TYPE_SHFT) |
+	    ((long)vector << SH_IPI_INT_IDX_SHFT) |
+	    (0x000feeUL << SH_IPI_INT_BASE_SHFT);
+
+	mb();
+	if (enable_shub_wars_1_1()) {
+		spin_lock_irqsave(&sn2_global_ptc_lock, flags);
+	}
+	pio_phys_write_mmr(p, val);
+	if (enable_shub_wars_1_1()) {
+		wait_piowc();
+		spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
+	}
+
+}
+
+EXPORT_SYMBOL(sn_send_IPI_phys);
+
+/**
+ * sn2_send_IPI - send an IPI to a processor
+ * @cpuid: target of the IPI
+ * @vector: command to send
+ * @delivery_mode: delivery mechanism
+ * @redirect: redirect the IPI?
+ *
+ * Sends an IPI (InterProcessor Interrupt) to the processor specified by
+ * @cpuid.  @vector specifies the command to send, while @delivery_mode can 
+ * be one of the following
+ *
+ * %IA64_IPI_DM_INT - pend an interrupt
+ * %IA64_IPI_DM_PMI - pend a PMI
+ * %IA64_IPI_DM_NMI - pend an NMI
+ * %IA64_IPI_DM_INIT - pend an INIT interrupt
+ */
+void sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect)
+{
+	long physid;
+	int nasid;
+
+	physid = cpu_physical_id(cpuid);
+	nasid = cpuid_to_nasid(cpuid);
+
+	/* the following is used only when starting cpus at boot time */
+	if (unlikely(nasid == -1))
+		ia64_sn_get_sapic_info(physid, &nasid, NULL, NULL);
+
+	sn_send_IPI_phys(nasid, physid, vector, delivery_mode);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/**
+ * sn_cpu_disable_allowed - Determine if a CPU can be disabled.
+ * @cpu - CPU that is requested to be disabled.
+ *
+ * CPU disable is only allowed on SHub2 systems running with a PROM
+ * that supports CPU disable. It is not permitted to disable the boot processor.
+ */
+bool sn_cpu_disable_allowed(int cpu)
+{
+	if (is_shub2() && sn_prom_feature_available(PRF_CPU_DISABLE_SUPPORT)) {
+		if (cpu != 0)
+			return true;
+		else
+			printk(KERN_WARNING
+			      "Disabling the boot processor is not allowed.\n");
+
+	} else
+		printk(KERN_WARNING
+		       "CPU disable is not supported on this system.\n");
+
+	return false;
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+#ifdef CONFIG_PROC_FS
+
+#define PTC_BASENAME	"sgi_sn/ptc_statistics"
+
+static void *sn2_ptc_seq_start(struct seq_file *file, loff_t * offset)
+{
+	if (*offset < nr_cpu_ids)
+		return offset;
+	return NULL;
+}
+
+static void *sn2_ptc_seq_next(struct seq_file *file, void *data, loff_t * offset)
+{
+	(*offset)++;
+	if (*offset < nr_cpu_ids)
+		return offset;
+	return NULL;
+}
+
+static void sn2_ptc_seq_stop(struct seq_file *file, void *data)
+{
+}
+
+static int sn2_ptc_seq_show(struct seq_file *file, void *data)
+{
+	struct ptc_stats *stat;
+	int cpu;
+
+	cpu = *(loff_t *) data;
+
+	if (!cpu) {
+		seq_printf(file,
+			   "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2 ipi_fluches ipi_nsec\n");
+		seq_printf(file, "# ptctest %d, flushopt %d\n", sn2_ptctest, sn2_flush_opt);
+	}
+
+	if (cpu < nr_cpu_ids && cpu_online(cpu)) {
+		stat = &per_cpu(ptcstats, cpu);
+		seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l,
+				stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed,
+				stat->deadlocks,
+				1000 * stat->lock_itc_clocks / per_cpu(ia64_cpu_info, cpu).cyc_per_usec,
+				1000 * stat->shub_itc_clocks / per_cpu(ia64_cpu_info, cpu).cyc_per_usec,
+				1000 * stat->shub_itc_clocks_max / per_cpu(ia64_cpu_info, cpu).cyc_per_usec,
+				stat->shub_ptc_flushes_not_my_mm,
+				stat->deadlocks2,
+				stat->shub_ipi_flushes,
+				1000 * stat->shub_ipi_flushes_itc_clocks / per_cpu(ia64_cpu_info, cpu).cyc_per_usec);
+	}
+	return 0;
+}
+
+static ssize_t sn2_ptc_proc_write(struct file *file, const char __user *user, size_t count, loff_t *data)
+{
+	int cpu;
+	char optstr[64];
+
+	if (count == 0 || count > sizeof(optstr))
+		return -EINVAL;
+	if (copy_from_user(optstr, user, count))
+		return -EFAULT;
+	optstr[count - 1] = '\0';
+	sn2_flush_opt = simple_strtoul(optstr, NULL, 0);
+
+	for_each_online_cpu(cpu)
+		memset(&per_cpu(ptcstats, cpu), 0, sizeof(struct ptc_stats));
+
+	return count;
+}
+
+static const struct seq_operations sn2_ptc_seq_ops = {
+	.start = sn2_ptc_seq_start,
+	.next = sn2_ptc_seq_next,
+	.stop = sn2_ptc_seq_stop,
+	.show = sn2_ptc_seq_show
+};
+
+static int sn2_ptc_proc_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &sn2_ptc_seq_ops);
+}
+
+static const struct file_operations proc_sn2_ptc_operations = {
+	.open = sn2_ptc_proc_open,
+	.read = seq_read,
+	.write = sn2_ptc_proc_write,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+static struct proc_dir_entry *proc_sn2_ptc;
+
+static int __init sn2_ptc_init(void)
+{
+	if (!ia64_platform_is("sn2"))
+		return 0;
+
+	proc_sn2_ptc = proc_create(PTC_BASENAME, 0444,
+				   NULL, &proc_sn2_ptc_operations);
+	if (!proc_sn2_ptc) {
+		printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME);
+		return -EINVAL;
+	}
+	spin_lock_init(&sn2_global_ptc_lock);
+	return 0;
+}
+
+static void __exit sn2_ptc_exit(void)
+{
+	remove_proc_entry(PTC_BASENAME, NULL);
+}
+
+module_init(sn2_ptc_init);
+module_exit(sn2_ptc_exit);
+#endif /* CONFIG_PROC_FS */
+
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
new file mode 100644
index 00000000..30862c03
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -0,0 +1,998 @@
+/* 
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004-2006 Silicon Graphics, Inc. All rights reserved.
+ *
+ * SGI Altix topology and hardware performance monitoring API.
+ * Mark Goodwin <markgw@sgi.com>. 
+ *
+ * Creates /proc/sgi_sn/sn_topology (read-only) to export
+ * info about Altix nodes, routers, CPUs and NumaLink
+ * interconnection/topology.
+ *
+ * Also creates a dynamic misc device named "sn_hwperf"
+ * that supports an ioctl interface to call down into SAL
+ * to discover hw objects, topology and to read/write
+ * memory mapped registers, e.g. for performance monitoring.
+ * The "sn_hwperf" device is registered only after the procfs
+ * file is first opened, i.e. only if/when it's needed. 
+ *
+ * This API is used by SGI Performance Co-Pilot and other
+ * tools, see http://oss.sgi.com/projects/pcp
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/seq_file.h>
+#include <linux/miscdevice.h>
+#include <linux/utsname.h>
+#include <linux/cpumask.h>
+#include <linux/nodemask.h>
+#include <linux/smp.h>
+#include <linux/mutex.h>
+
+#include <asm/processor.h>
+#include <asm/topology.h>
+#include <asm/uaccess.h>
+#include <asm/sal.h>
+#include <asm/sn/io.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/module.h>
+#include <asm/sn/geo.h>
+#include <asm/sn/sn2/sn_hwperf.h>
+#include <asm/sn/addrs.h>
+
+static void *sn_hwperf_salheap = NULL;
+static int sn_hwperf_obj_cnt = 0;
+static nasid_t sn_hwperf_master_nasid = INVALID_NASID;
+static int sn_hwperf_init(void);
+static DEFINE_MUTEX(sn_hwperf_init_mutex);
+
+#define cnode_possible(n)	((n) < num_cnodes)
+
+static int sn_hwperf_enum_objects(int *nobj, struct sn_hwperf_object_info **ret)
+{
+	int e;
+	u64 sz;
+	struct sn_hwperf_object_info *objbuf = NULL;
+
+	if ((e = sn_hwperf_init()) < 0) {
+		printk(KERN_ERR "sn_hwperf_init failed: err %d\n", e);
+		goto out;
+	}
+
+	sz = sn_hwperf_obj_cnt * sizeof(struct sn_hwperf_object_info);
+	objbuf = vmalloc(sz);
+	if (objbuf == NULL) {
+		printk("sn_hwperf_enum_objects: vmalloc(%d) failed\n", (int)sz);
+		e = -ENOMEM;
+		goto out;
+	}
+
+	e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, SN_HWPERF_ENUM_OBJECTS,
+		0, sz, (u64) objbuf, 0, 0, NULL);
+	if (e != SN_HWPERF_OP_OK) {
+		e = -EINVAL;
+		vfree(objbuf);
+	}
+
+out:
+	*nobj = sn_hwperf_obj_cnt;
+	*ret = objbuf;
+	return e;
+}
+
+static int sn_hwperf_location_to_bpos(char *location,
+	int *rack, int *bay, int *slot, int *slab)
+{
+	char type;
+
+	/* first scan for an old style geoid string */
+	if (sscanf(location, "%03d%c%02d#%d",
+		rack, &type, bay, slab) == 4)
+		*slot = 0; 
+	else /* scan for a new bladed geoid string */
+	if (sscanf(location, "%03d%c%02d^%02d#%d",
+		rack, &type, bay, slot, slab) != 5)
+		return -1; 
+	/* success */
+	return 0;
+}
+
+static int sn_hwperf_geoid_to_cnode(char *location)
+{
+	int cnode;
+	geoid_t geoid;
+	moduleid_t module_id;
+	int rack, bay, slot, slab;
+	int this_rack, this_bay, this_slot, this_slab;
+
+	if (sn_hwperf_location_to_bpos(location, &rack, &bay, &slot, &slab))
+		return -1;
+
+	/*
+	 * FIXME: replace with cleaner for_each_XXX macro which addresses
+	 * both compute and IO nodes once ACPI3.0 is available.
+	 */
+	for (cnode = 0; cnode < num_cnodes; cnode++) {
+		geoid = cnodeid_get_geoid(cnode);
+		module_id = geo_module(geoid);
+		this_rack = MODULE_GET_RACK(module_id);
+		this_bay = MODULE_GET_BPOS(module_id);
+		this_slot = geo_slot(geoid);
+		this_slab = geo_slab(geoid);
+		if (rack == this_rack && bay == this_bay &&
+			slot == this_slot && slab == this_slab) {
+			break;
+		}
+	}
+
+	return cnode_possible(cnode) ? cnode : -1;
+}
+
+static int sn_hwperf_obj_to_cnode(struct sn_hwperf_object_info * obj)
+{
+	if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj))
+		BUG();
+	if (SN_HWPERF_FOREIGN(obj))
+		return -1;
+	return sn_hwperf_geoid_to_cnode(obj->location);
+}
+
+static int sn_hwperf_generic_ordinal(struct sn_hwperf_object_info *obj,
+				struct sn_hwperf_object_info *objs)
+{
+	int ordinal;
+	struct sn_hwperf_object_info *p;
+
+	for (ordinal=0, p=objs; p != obj; p++) {
+		if (SN_HWPERF_FOREIGN(p))
+			continue;
+		if (SN_HWPERF_SAME_OBJTYPE(p, obj))
+			ordinal++;
+	}
+
+	return ordinal;
+}
+
+static const char *slabname_node =	"node"; /* SHub asic */
+static const char *slabname_ionode =	"ionode"; /* TIO asic */
+static const char *slabname_router =	"router"; /* NL3R or NL4R */
+static const char *slabname_other =	"other"; /* unknown asic */
+
+static const char *sn_hwperf_get_slabname(struct sn_hwperf_object_info *obj,
+			struct sn_hwperf_object_info *objs, int *ordinal)
+{
+	int isnode;
+	const char *slabname = slabname_other;
+
+	if ((isnode = SN_HWPERF_IS_NODE(obj)) || SN_HWPERF_IS_IONODE(obj)) {
+	    	slabname = isnode ? slabname_node : slabname_ionode;
+		*ordinal = sn_hwperf_obj_to_cnode(obj);
+	}
+	else {
+		*ordinal = sn_hwperf_generic_ordinal(obj, objs);
+		if (SN_HWPERF_IS_ROUTER(obj))
+			slabname = slabname_router;
+	}
+
+	return slabname;
+}
+
+static void print_pci_topology(struct seq_file *s)
+{
+	char *p;
+	size_t sz;
+	int e;
+
+	for (sz = PAGE_SIZE; sz < 16 * PAGE_SIZE; sz += PAGE_SIZE) {
+		if (!(p = kmalloc(sz, GFP_KERNEL)))
+			break;
+		e = ia64_sn_ioif_get_pci_topology(__pa(p), sz);
+		if (e == SALRET_OK)
+			seq_puts(s, p);
+		kfree(p);
+		if (e == SALRET_OK || e == SALRET_NOT_IMPLEMENTED)
+			break;
+	}
+}
+
+static inline int sn_hwperf_has_cpus(cnodeid_t node)
+{
+	return node < MAX_NUMNODES && node_online(node) && nr_cpus_node(node);
+}
+
+static inline int sn_hwperf_has_mem(cnodeid_t node)
+{
+	return node < MAX_NUMNODES && node_online(node) && NODE_DATA(node)->node_present_pages;
+}
+
+static struct sn_hwperf_object_info *
+sn_hwperf_findobj_id(struct sn_hwperf_object_info *objbuf,
+	int nobj, int id)
+{
+	int i;
+	struct sn_hwperf_object_info *p = objbuf;
+
+	for (i=0; i < nobj; i++, p++) {
+		if (p->id == id)
+			return p;
+	}
+
+	return NULL;
+
+}
+
+static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objbuf,
+	int nobj, cnodeid_t node, cnodeid_t *near_mem_node, cnodeid_t *near_cpu_node)
+{
+	int e;
+	struct sn_hwperf_object_info *nodeobj = NULL;
+	struct sn_hwperf_object_info *op;
+	struct sn_hwperf_object_info *dest;
+	struct sn_hwperf_object_info *router;
+	struct sn_hwperf_port_info ptdata[16];
+	int sz, i, j;
+	cnodeid_t c;
+	int found_mem = 0;
+	int found_cpu = 0;
+
+	if (!cnode_possible(node))
+		return -EINVAL;
+
+	if (sn_hwperf_has_cpus(node)) {
+		if (near_cpu_node)
+			*near_cpu_node = node;
+		found_cpu++;
+	}
+
+	if (sn_hwperf_has_mem(node)) {
+		if (near_mem_node)
+			*near_mem_node = node;
+		found_mem++;
+	}
+
+	if (found_cpu && found_mem)
+		return 0; /* trivially successful */
+
+	/* find the argument node object */
+	for (i=0, op=objbuf; i < nobj; i++, op++) {
+		if (!SN_HWPERF_IS_NODE(op) && !SN_HWPERF_IS_IONODE(op))
+			continue;
+		if (node == sn_hwperf_obj_to_cnode(op)) {
+			nodeobj = op;
+			break;
+		}
+	}
+	if (!nodeobj) {
+		e = -ENOENT;
+		goto err;
+	}
+
+	/* get it's interconnect topology */
+	sz = op->ports * sizeof(struct sn_hwperf_port_info);
+	BUG_ON(sz > sizeof(ptdata));
+	e = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
+			      SN_HWPERF_ENUM_PORTS, nodeobj->id, sz,
+			      (u64)&ptdata, 0, 0, NULL);
+	if (e != SN_HWPERF_OP_OK) {
+		e = -EINVAL;
+		goto err;
+	}
+
+	/* find nearest node with cpus and nearest memory */
+	for (router=NULL, j=0; j < op->ports; j++) {
+		dest = sn_hwperf_findobj_id(objbuf, nobj, ptdata[j].conn_id);
+		if (dest && SN_HWPERF_IS_ROUTER(dest))
+			router = dest;
+		if (!dest || SN_HWPERF_FOREIGN(dest) ||
+		    !SN_HWPERF_IS_NODE(dest) || SN_HWPERF_IS_IONODE(dest)) {
+			continue;
+		}
+		c = sn_hwperf_obj_to_cnode(dest);
+		if (!found_cpu && sn_hwperf_has_cpus(c)) {
+			if (near_cpu_node)
+				*near_cpu_node = c;
+			found_cpu++;
+		}
+		if (!found_mem && sn_hwperf_has_mem(c)) {
+			if (near_mem_node)
+				*near_mem_node = c;
+			found_mem++;
+		}
+	}
+
+	if (router && (!found_cpu || !found_mem)) {
+		/* search for a node connected to the same router */
+		sz = router->ports * sizeof(struct sn_hwperf_port_info);
+		BUG_ON(sz > sizeof(ptdata));
+		e = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
+				      SN_HWPERF_ENUM_PORTS, router->id, sz,
+				      (u64)&ptdata, 0, 0, NULL);
+		if (e != SN_HWPERF_OP_OK) {
+			e = -EINVAL;
+			goto err;
+		}
+		for (j=0; j < router->ports; j++) {
+			dest = sn_hwperf_findobj_id(objbuf, nobj,
+				ptdata[j].conn_id);
+			if (!dest || dest->id == node ||
+			    SN_HWPERF_FOREIGN(dest) ||
+			    !SN_HWPERF_IS_NODE(dest) ||
+			    SN_HWPERF_IS_IONODE(dest)) {
+				continue;
+			}
+			c = sn_hwperf_obj_to_cnode(dest);
+			if (!found_cpu && sn_hwperf_has_cpus(c)) {
+				if (near_cpu_node)
+					*near_cpu_node = c;
+				found_cpu++;
+			}
+			if (!found_mem && sn_hwperf_has_mem(c)) {
+				if (near_mem_node)
+					*near_mem_node = c;
+				found_mem++;
+			}
+			if (found_cpu && found_mem)
+				break;
+		}
+	}
+
+	if (!found_cpu || !found_mem) {
+		/* resort to _any_ node with CPUs and memory */
+		for (i=0, op=objbuf; i < nobj; i++, op++) {
+			if (SN_HWPERF_FOREIGN(op) ||
+			    SN_HWPERF_IS_IONODE(op) ||
+			    !SN_HWPERF_IS_NODE(op)) {
+				continue;
+			}
+			c = sn_hwperf_obj_to_cnode(op);
+			if (!found_cpu && sn_hwperf_has_cpus(c)) {
+				if (near_cpu_node)
+					*near_cpu_node = c;
+				found_cpu++;
+			}
+			if (!found_mem && sn_hwperf_has_mem(c)) {
+				if (near_mem_node)
+					*near_mem_node = c;
+				found_mem++;
+			}
+			if (found_cpu && found_mem)
+				break;
+		}
+	}
+
+	if (!found_cpu || !found_mem)
+		e = -ENODATA;
+
+err:
+	return e;
+}
+
+
+static int sn_topology_show(struct seq_file *s, void *d)
+{
+	int sz;
+	int pt;
+	int e = 0;
+	int i;
+	int j;
+	const char *slabname;
+	int ordinal;
+	char slice;
+	struct cpuinfo_ia64 *c;
+	struct sn_hwperf_port_info *ptdata;
+	struct sn_hwperf_object_info *p;
+	struct sn_hwperf_object_info *obj = d;	/* this object */
+	struct sn_hwperf_object_info *objs = s->private; /* all objects */
+	u8 shubtype;
+	u8 system_size;
+	u8 sharing_size;
+	u8 partid;
+	u8 coher;
+	u8 nasid_shift;
+	u8 region_size;
+	u16 nasid_mask;
+	int nasid_msb;
+
+	if (obj == objs) {
+		seq_printf(s, "# sn_topology version 2\n");
+		seq_printf(s, "# objtype ordinal location partition"
+			" [attribute value [, ...]]\n");
+
+		if (ia64_sn_get_sn_info(0,
+			&shubtype, &nasid_mask, &nasid_shift, &system_size,
+			&sharing_size, &partid, &coher, &region_size))
+			BUG();
+		for (nasid_msb=63; nasid_msb > 0; nasid_msb--) {
+			if (((u64)nasid_mask << nasid_shift) & (1ULL << nasid_msb))
+				break;
+		}
+		seq_printf(s, "partition %u %s local "
+			"shubtype %s, "
+			"nasid_mask 0x%016llx, "
+			"nasid_bits %d:%d, "
+			"system_size %d, "
+			"sharing_size %d, "
+			"coherency_domain %d, "
+			"region_size %d\n",
+
+			partid, utsname()->nodename,
+			shubtype ? "shub2" : "shub1", 
+			(u64)nasid_mask << nasid_shift, nasid_msb, nasid_shift,
+			system_size, sharing_size, coher, region_size);
+
+		print_pci_topology(s);
+	}
+
+	if (SN_HWPERF_FOREIGN(obj)) {
+		/* private in another partition: not interesting */
+		return 0;
+	}
+
+	for (i = 0; i < SN_HWPERF_MAXSTRING && obj->name[i]; i++) {
+		if (obj->name[i] == ' ')
+			obj->name[i] = '_';
+	}
+
+	slabname = sn_hwperf_get_slabname(obj, objs, &ordinal);
+	seq_printf(s, "%s %d %s %s asic %s", slabname, ordinal, obj->location,
+		obj->sn_hwp_this_part ? "local" : "shared", obj->name);
+
+	if (ordinal < 0 || (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj)))
+		seq_putc(s, '\n');
+	else {
+		cnodeid_t near_mem = -1;
+		cnodeid_t near_cpu = -1;
+
+		seq_printf(s, ", nasid 0x%x", cnodeid_to_nasid(ordinal));
+
+		if (sn_hwperf_get_nearest_node_objdata(objs, sn_hwperf_obj_cnt,
+			ordinal, &near_mem, &near_cpu) == 0) {
+			seq_printf(s, ", near_mem_nodeid %d, near_cpu_nodeid %d",
+				near_mem, near_cpu);
+		}
+
+		if (!SN_HWPERF_IS_IONODE(obj)) {
+			for_each_online_node(i) {
+				seq_printf(s, i ? ":%d" : ", dist %d",
+					node_distance(ordinal, i));
+			}
+		}
+
+		seq_putc(s, '\n');
+
+		/*
+		 * CPUs on this node, if any
+		 */
+		if (!SN_HWPERF_IS_IONODE(obj)) {
+			for_each_cpu_and(i, cpu_online_mask,
+					 cpumask_of_node(ordinal)) {
+				slice = 'a' + cpuid_to_slice(i);
+				c = cpu_data(i);
+				seq_printf(s, "cpu %d %s%c local"
+					   " freq %luMHz, arch ia64",
+					   i, obj->location, slice,
+					   c->proc_freq / 1000000);
+				for_each_online_cpu(j) {
+					seq_printf(s, j ? ":%d" : ", dist %d",
+						   node_distance(
+						    	cpu_to_node(i),
+						    	cpu_to_node(j)));
+				}
+				seq_putc(s, '\n');
+			}
+		}
+	}
+
+	if (obj->ports) {
+		/*
+		 * numalink ports
+		 */
+		sz = obj->ports * sizeof(struct sn_hwperf_port_info);
+		if ((ptdata = kmalloc(sz, GFP_KERNEL)) == NULL)
+			return -ENOMEM;
+		e = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
+				      SN_HWPERF_ENUM_PORTS, obj->id, sz,
+				      (u64) ptdata, 0, 0, NULL);
+		if (e != SN_HWPERF_OP_OK)
+			return -EINVAL;
+		for (ordinal=0, p=objs; p != obj; p++) {
+			if (!SN_HWPERF_FOREIGN(p))
+				ordinal += p->ports;
+		}
+		for (pt = 0; pt < obj->ports; pt++) {
+			for (p = objs, i = 0; i < sn_hwperf_obj_cnt; i++, p++) {
+				if (ptdata[pt].conn_id == p->id) {
+					break;
+				}
+			}
+			seq_printf(s, "numalink %d %s-%d",
+			    ordinal+pt, obj->location, ptdata[pt].port);
+
+			if (i >= sn_hwperf_obj_cnt) {
+				/* no connection */
+				seq_puts(s, " local endpoint disconnected"
+					    ", protocol unknown\n");
+				continue;
+			}
+
+			if (obj->sn_hwp_this_part && p->sn_hwp_this_part)
+				/* both ends local to this partition */
+				seq_puts(s, " local");
+			else if (SN_HWPERF_FOREIGN(p))
+				/* both ends of the link in foreign partiton */
+				seq_puts(s, " foreign");
+			else
+				/* link straddles a partition */
+				seq_puts(s, " shared");
+
+			/*
+			 * Unlikely, but strictly should query the LLP config
+			 * registers because an NL4R can be configured to run
+			 * NL3 protocol, even when not talking to an NL3 router.
+			 * Ditto for node-node.
+			 */
+			seq_printf(s, " endpoint %s-%d, protocol %s\n",
+				p->location, ptdata[pt].conn_port,
+				(SN_HWPERF_IS_NL3ROUTER(obj) ||
+				SN_HWPERF_IS_NL3ROUTER(p)) ?  "LLP3" : "LLP4");
+		}
+		kfree(ptdata);
+	}
+
+	return 0;
+}
+
+static void *sn_topology_start(struct seq_file *s, loff_t * pos)
+{
+	struct sn_hwperf_object_info *objs = s->private;
+
+	if (*pos < sn_hwperf_obj_cnt)
+		return (void *)(objs + *pos);
+
+	return NULL;
+}
+
+static void *sn_topology_next(struct seq_file *s, void *v, loff_t * pos)
+{
+	++*pos;
+	return sn_topology_start(s, pos);
+}
+
+static void sn_topology_stop(struct seq_file *m, void *v)
+{
+	return;
+}
+
+/*
+ * /proc/sgi_sn/sn_topology, read-only using seq_file
+ */
+static const struct seq_operations sn_topology_seq_ops = {
+	.start = sn_topology_start,
+	.next = sn_topology_next,
+	.stop = sn_topology_stop,
+	.show = sn_topology_show
+};
+
+struct sn_hwperf_op_info {
+	u64 op;
+	struct sn_hwperf_ioctl_args *a;
+	void *p;
+	int *v0;
+	int ret;
+};
+
+static void sn_hwperf_call_sal(void *info)
+{
+	struct sn_hwperf_op_info *op_info = info;
+	int r;
+
+	r = ia64_sn_hwperf_op(sn_hwperf_master_nasid, op_info->op,
+		      op_info->a->arg, op_info->a->sz,
+		      (u64) op_info->p, 0, 0, op_info->v0);
+	op_info->ret = r;
+}
+
+static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info)
+{
+	u32 cpu;
+	u32 use_ipi;
+	int r = 0;
+	cpumask_t save_allowed;
+	
+	cpu = (op_info->a->arg & SN_HWPERF_ARG_CPU_MASK) >> 32;
+	use_ipi = op_info->a->arg & SN_HWPERF_ARG_USE_IPI_MASK;
+	op_info->a->arg &= SN_HWPERF_ARG_OBJID_MASK;
+
+	if (cpu != SN_HWPERF_ARG_ANY_CPU) {
+		if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
+			r = -EINVAL;
+			goto out;
+		}
+	}
+
+	if (cpu == SN_HWPERF_ARG_ANY_CPU || cpu == get_cpu()) {
+		/* don't care, or already on correct cpu */
+		sn_hwperf_call_sal(op_info);
+	}
+	else {
+		if (use_ipi) {
+			/* use an interprocessor interrupt to call SAL */
+			smp_call_function_single(cpu, sn_hwperf_call_sal,
+				op_info, 1);
+		}
+		else {
+			/* migrate the task before calling SAL */ 
+			save_allowed = current->cpus_allowed;
+			set_cpus_allowed_ptr(current, cpumask_of(cpu));
+			sn_hwperf_call_sal(op_info);
+			set_cpus_allowed_ptr(current, &save_allowed);
+		}
+	}
+	r = op_info->ret;
+
+out:
+	return r;
+}
+
+/* map SAL hwperf error code to system error code */
+static int sn_hwperf_map_err(int hwperf_err)
+{
+	int e;
+
+	switch(hwperf_err) {
+	case SN_HWPERF_OP_OK:
+		e = 0;
+		break;
+
+	case SN_HWPERF_OP_NOMEM:
+		e = -ENOMEM;
+		break;
+
+	case SN_HWPERF_OP_NO_PERM:
+		e = -EPERM;
+		break;
+
+	case SN_HWPERF_OP_IO_ERROR:
+		e = -EIO;
+		break;
+
+	case SN_HWPERF_OP_BUSY:
+		e = -EBUSY;
+		break;
+
+	case SN_HWPERF_OP_RECONFIGURE:
+		e = -EAGAIN;
+		break;
+
+	case SN_HWPERF_OP_INVAL:
+	default:
+		e = -EINVAL;
+		break;
+	}
+
+	return e;
+}
+
+/*
+ * ioctl for "sn_hwperf" misc device
+ */
+static long sn_hwperf_ioctl(struct file *fp, u32 op, unsigned long arg)
+{
+	struct sn_hwperf_ioctl_args a;
+	struct cpuinfo_ia64 *cdata;
+	struct sn_hwperf_object_info *objs;
+	struct sn_hwperf_object_info *cpuobj;
+	struct sn_hwperf_op_info op_info;
+	void *p = NULL;
+	int nobj;
+	char slice;
+	int node;
+	int r;
+	int v0;
+	int i;
+	int j;
+
+	/* only user requests are allowed here */
+	if ((op & SN_HWPERF_OP_MASK) < 10) {
+		r = -EINVAL;
+		goto error;
+	}
+	r = copy_from_user(&a, (const void __user *)arg,
+		sizeof(struct sn_hwperf_ioctl_args));
+	if (r != 0) {
+		r = -EFAULT;
+		goto error;
+	}
+
+	/*
+	 * Allocate memory to hold a kernel copy of the user buffer. The
+	 * buffer contents are either copied in or out (or both) of user
+	 * space depending on the flags encoded in the requested operation.
+	 */
+	if (a.ptr) {
+		p = vmalloc(a.sz);
+		if (!p) {
+			r = -ENOMEM;
+			goto error;
+		}
+	}
+
+	if (op & SN_HWPERF_OP_MEM_COPYIN) {
+		r = copy_from_user(p, (const void __user *)a.ptr, a.sz);
+		if (r != 0) {
+			r = -EFAULT;
+			goto error;
+		}
+	}
+
+	switch (op) {
+	case SN_HWPERF_GET_CPU_INFO:
+		if (a.sz == sizeof(u64)) {
+			/* special case to get size needed */
+			*(u64 *) p = (u64) num_online_cpus() *
+				sizeof(struct sn_hwperf_object_info);
+		} else
+		if (a.sz < num_online_cpus() * sizeof(struct sn_hwperf_object_info)) {
+			r = -ENOMEM;
+			goto error;
+		} else
+		if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) {
+			int cpuobj_index = 0;
+
+			memset(p, 0, a.sz);
+			for (i = 0; i < nobj; i++) {
+				if (!SN_HWPERF_IS_NODE(objs + i))
+					continue;
+				node = sn_hwperf_obj_to_cnode(objs + i);
+				for_each_online_cpu(j) {
+					if (node != cpu_to_node(j))
+						continue;
+					cpuobj = (struct sn_hwperf_object_info *) p + cpuobj_index++;
+					slice = 'a' + cpuid_to_slice(j);
+					cdata = cpu_data(j);
+					cpuobj->id = j;
+					snprintf(cpuobj->name,
+						 sizeof(cpuobj->name),
+						 "CPU %luMHz %s",
+						 cdata->proc_freq / 1000000,
+						 cdata->vendor);
+					snprintf(cpuobj->location,
+						 sizeof(cpuobj->location),
+						 "%s%c", objs[i].location,
+						 slice);
+				}
+			}
+
+			vfree(objs);
+		}
+		break;
+
+	case SN_HWPERF_GET_NODE_NASID:
+		if (a.sz != sizeof(u64) ||
+		   (node = a.arg) < 0 || !cnode_possible(node)) {
+			r = -EINVAL;
+			goto error;
+		}
+		*(u64 *)p = (u64)cnodeid_to_nasid(node);
+		break;
+
+	case SN_HWPERF_GET_OBJ_NODE:
+		i = a.arg;
+		if (a.sz != sizeof(u64) || i < 0) {
+			r = -EINVAL;
+			goto error;
+		}
+		if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) {
+			if (i >= nobj) {
+				r = -EINVAL;
+				vfree(objs);
+				goto error;
+			}
+			if (objs[i].id != a.arg) {
+				for (i = 0; i < nobj; i++) {
+					if (objs[i].id == a.arg)
+						break;
+				}
+			}
+			if (i == nobj) {
+				r = -EINVAL;
+				vfree(objs);
+				goto error;
+			}
+
+			if (!SN_HWPERF_IS_NODE(objs + i) &&
+			    !SN_HWPERF_IS_IONODE(objs + i)) {
+			    	r = -ENOENT;
+				vfree(objs);
+				goto error;
+			}
+
+			*(u64 *)p = (u64)sn_hwperf_obj_to_cnode(objs + i);
+			vfree(objs);
+		}
+		break;
+
+	case SN_HWPERF_GET_MMRS:
+	case SN_HWPERF_SET_MMRS:
+	case SN_HWPERF_OBJECT_DISTANCE:
+		op_info.p = p;
+		op_info.a = &a;
+		op_info.v0 = &v0;
+		op_info.op = op;
+		r = sn_hwperf_op_cpu(&op_info);
+		if (r) {
+			r = sn_hwperf_map_err(r);
+			a.v0 = v0;
+			goto error;
+		}
+		break;
+
+	default:
+		/* all other ops are a direct SAL call */
+		r = ia64_sn_hwperf_op(sn_hwperf_master_nasid, op,
+			      a.arg, a.sz, (u64) p, 0, 0, &v0);
+		if (r) {
+			r = sn_hwperf_map_err(r);
+			goto error;
+		}
+		a.v0 = v0;
+		break;
+	}
+
+	if (op & SN_HWPERF_OP_MEM_COPYOUT) {
+		r = copy_to_user((void __user *)a.ptr, p, a.sz);
+		if (r != 0) {
+			r = -EFAULT;
+			goto error;
+		}
+	}
+
+error:
+	vfree(p);
+
+	return r;
+}
+
+static const struct file_operations sn_hwperf_fops = {
+	.unlocked_ioctl = sn_hwperf_ioctl,
+	.llseek = noop_llseek,
+};
+
+static struct miscdevice sn_hwperf_dev = {
+	MISC_DYNAMIC_MINOR,
+	"sn_hwperf",
+	&sn_hwperf_fops
+};
+
+static int sn_hwperf_init(void)
+{
+	u64 v;
+	int salr;
+	int e = 0;
+
+	/* single threaded, once-only initialization */
+	mutex_lock(&sn_hwperf_init_mutex);
+
+	if (sn_hwperf_salheap) {
+		mutex_unlock(&sn_hwperf_init_mutex);
+		return e;
+	}
+
+	/*
+	 * The PROM code needs a fixed reference node. For convenience the
+	 * same node as the console I/O is used.
+	 */
+	sn_hwperf_master_nasid = (nasid_t) ia64_sn_get_console_nasid();
+
+	/*
+	 * Request the needed size and install the PROM scratch area.
+	 * The PROM keeps various tracking bits in this memory area.
+	 */
+	salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
+				 (u64) SN_HWPERF_GET_HEAPSIZE, 0,
+				 (u64) sizeof(u64), (u64) &v, 0, 0, NULL);
+	if (salr != SN_HWPERF_OP_OK) {
+		e = -EINVAL;
+		goto out;
+	}
+
+	if ((sn_hwperf_salheap = vmalloc(v)) == NULL) {
+		e = -ENOMEM;
+		goto out;
+	}
+	salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
+				 SN_HWPERF_INSTALL_HEAP, 0, v,
+				 (u64) sn_hwperf_salheap, 0, 0, NULL);
+	if (salr != SN_HWPERF_OP_OK) {
+		e = -EINVAL;
+		goto out;
+	}
+
+	salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
+				 SN_HWPERF_OBJECT_COUNT, 0,
+				 sizeof(u64), (u64) &v, 0, 0, NULL);
+	if (salr != SN_HWPERF_OP_OK) {
+		e = -EINVAL;
+		goto out;
+	}
+	sn_hwperf_obj_cnt = (int)v;
+
+out:
+	if (e < 0 && sn_hwperf_salheap) {
+		vfree(sn_hwperf_salheap);
+		sn_hwperf_salheap = NULL;
+		sn_hwperf_obj_cnt = 0;
+	}
+	mutex_unlock(&sn_hwperf_init_mutex);
+	return e;
+}
+
+int sn_topology_open(struct inode *inode, struct file *file)
+{
+	int e;
+	struct seq_file *seq;
+	struct sn_hwperf_object_info *objbuf;
+	int nobj;
+
+	if ((e = sn_hwperf_enum_objects(&nobj, &objbuf)) == 0) {
+		e = seq_open(file, &sn_topology_seq_ops);
+		seq = file->private_data;
+		seq->private = objbuf;
+	}
+
+	return e;
+}
+
+int sn_topology_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq = file->private_data;
+
+	vfree(seq->private);
+	return seq_release(inode, file);
+}
+
+int sn_hwperf_get_nearest_node(cnodeid_t node,
+	cnodeid_t *near_mem_node, cnodeid_t *near_cpu_node)
+{
+	int e;
+	int nobj;
+	struct sn_hwperf_object_info *objbuf;
+
+	if ((e = sn_hwperf_enum_objects(&nobj, &objbuf)) == 0) {
+		e = sn_hwperf_get_nearest_node_objdata(objbuf, nobj,
+			node, near_mem_node, near_cpu_node);
+		vfree(objbuf);
+	}
+
+	return e;
+}
+
+static int __devinit sn_hwperf_misc_register_init(void)
+{
+	int e;
+
+	if (!ia64_platform_is("sn2"))
+		return 0;
+
+	sn_hwperf_init();
+
+	/*
+	 * Register a dynamic misc device for hwperf ioctls. Platforms
+	 * supporting hotplug will create /dev/sn_hwperf, else user
+	 * can to look up the minor number in /proc/misc.
+	 */
+	if ((e = misc_register(&sn_hwperf_dev)) != 0) {
+		printk(KERN_ERR "sn_hwperf_misc_register_init: failed to "
+		"register misc device for \"%s\"\n", sn_hwperf_dev.name);
+	}
+
+	return e;
+}
+
+device_initcall(sn_hwperf_misc_register_init); /* after misc_init() */
+EXPORT_SYMBOL(sn_hwperf_get_nearest_node);
diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
new file mode 100644
index 00000000..7aab87f4
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
@@ -0,0 +1,117 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifdef CONFIG_PROC_FS
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <asm/uaccess.h>
+#include <asm/sn/sn_sal.h>
+
+static int partition_id_show(struct seq_file *s, void *p)
+{
+	seq_printf(s, "%d\n", sn_partition_id);
+	return 0;
+}
+
+static int partition_id_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, partition_id_show, NULL);
+}
+
+static int system_serial_number_show(struct seq_file *s, void *p)
+{
+	seq_printf(s, "%s\n", sn_system_serial_number());
+	return 0;
+}
+
+static int system_serial_number_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, system_serial_number_show, NULL);
+}
+
+static int licenseID_show(struct seq_file *s, void *p)
+{
+	seq_printf(s, "0x%llx\n", sn_partition_serial_number_val());
+	return 0;
+}
+
+static int licenseID_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, licenseID_show, NULL);
+}
+
+static int coherence_id_show(struct seq_file *s, void *p)
+{
+	seq_printf(s, "%d\n", partition_coherence_id());
+
+	return 0;
+}
+
+static int coherence_id_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, coherence_id_show, NULL);
+}
+
+/* /proc/sgi_sn/sn_topology uses seq_file, see sn_hwperf.c */
+extern int sn_topology_open(struct inode *, struct file *);
+extern int sn_topology_release(struct inode *, struct file *);
+
+static const struct file_operations proc_partition_id_fops = {
+	.open		= partition_id_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const struct file_operations proc_system_sn_fops = {
+	.open		= system_serial_number_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const struct file_operations proc_license_id_fops = {
+	.open		= licenseID_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const struct file_operations proc_coherence_id_fops = {
+	.open		= coherence_id_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const struct file_operations proc_sn_topo_fops = {
+	.open		= sn_topology_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= sn_topology_release,
+};
+
+void register_sn_procfs(void)
+{
+	static struct proc_dir_entry *sgi_proc_dir = NULL;
+
+	BUG_ON(sgi_proc_dir != NULL);
+	if (!(sgi_proc_dir = proc_mkdir("sgi_sn", NULL)))
+		return;
+
+	proc_create("partition_id", 0444, sgi_proc_dir,
+		    &proc_partition_id_fops);
+	proc_create("system_serial_number", 0444, sgi_proc_dir,
+		    &proc_system_sn_fops);
+	proc_create("licenseID", 0444, sgi_proc_dir, &proc_license_id_fops);
+	proc_create("coherence_id", 0444, sgi_proc_dir,
+		    &proc_coherence_id_fops);
+	proc_create("sn_topology", 0444, sgi_proc_dir, &proc_sn_topo_fops);
+}
+
+#endif /* CONFIG_PROC_FS */
diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c
new file mode 100644
index 00000000..c34efda1
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/timer.c
@@ -0,0 +1,61 @@
+/*
+ * linux/arch/ia64/sn/kernel/sn2/timer.c
+ *
+ * Copyright (C) 2003 Silicon Graphics, Inc.
+ * Copyright (C) 2003 Hewlett-Packard Co
+ *	David Mosberger <davidm@hpl.hp.com>: updated for new timer-interpolation infrastructure
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/interrupt.h>
+#include <linux/clocksource.h>
+
+#include <asm/hw_irq.h>
+#include <asm/system.h>
+#include <asm/timex.h>
+
+#include <asm/sn/leds.h>
+#include <asm/sn/shub_mmr.h>
+#include <asm/sn/clksupport.h>
+
+extern unsigned long sn_rtc_cycles_per_second;
+
+static cycle_t read_sn2(struct clocksource *cs)
+{
+	return (cycle_t)readq(RTC_COUNTER_ADDR);
+}
+
+static struct clocksource clocksource_sn2 = {
+        .name           = "sn2_rtc",
+        .rating         = 450,
+        .read           = read_sn2,
+        .mask           = (1LL << 55) - 1,
+        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+/*
+ * sn udelay uses the RTC instead of the ITC because the ITC is not
+ * synchronized across all CPUs, and the thread may migrate to another CPU
+ * if preemption is enabled.
+ */
+static void
+ia64_sn_udelay (unsigned long usecs)
+{
+	unsigned long start = rtc_time();
+	unsigned long end = start +
+			usecs * sn_rtc_cycles_per_second / 1000000;
+
+	while (time_before((unsigned long)rtc_time(), end))
+		cpu_relax();
+}
+
+void __init sn_timer_init(void)
+{
+	clocksource_sn2.fsys_mmio = RTC_COUNTER_ADDR;
+	clocksource_register_hz(&clocksource_sn2, sn_rtc_cycles_per_second);
+
+	ia64_udelay = &ia64_sn_udelay;
+}
diff --git a/arch/ia64/sn/kernel/sn2/timer_interrupt.c b/arch/ia64/sn/kernel/sn2/timer_interrupt.c
new file mode 100644
index 00000000..103d6ea8
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/timer_interrupt.c
@@ -0,0 +1,60 @@
+/*
+ *
+ *
+ * Copyright (c) 2005, 2006 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it 
+ * under the terms of version 2 of the GNU General Public License 
+ * as published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
+ * 
+ * Further, this software is distributed without any warranty that it is 
+ * free of the rightful claim of any third person regarding infringement 
+ * or the like.  Any license provided herein, whether implied or 
+ * otherwise, applies only to this software file.  Patent licenses, if 
+ * any, provided herein do not apply to combinations of this program with 
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public 
+ * License along with this program; if not, write the Free Software 
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/NoticeExplan
+ */
+
+#include <linux/interrupt.h>
+#include <asm/sn/pda.h>
+#include <asm/sn/leds.h>
+
+extern void sn_lb_int_war_check(void);
+extern irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+
+#define SN_LB_INT_WAR_INTERVAL 100
+
+void sn_timer_interrupt(int irq, void *dev_id)
+{
+	/* LED blinking */
+	if (!pda->hb_count--) {
+		pda->hb_count = HZ / 2;
+		set_led_bits(pda->hb_state ^=
+			     LED_CPU_HEARTBEAT, LED_CPU_HEARTBEAT);
+	}
+
+	if (is_shub1()) {
+		if (enable_shub_wars_1_1()) {
+			/* Bugfix code for SHUB 1.1 */
+			if (pda->pio_shub_war_cam_addr)
+				*pda->pio_shub_war_cam_addr = 0x8000000000000010UL;
+		}
+		if (pda->sn_lb_int_war_ticks == 0)
+			sn_lb_int_war_check();
+		pda->sn_lb_int_war_ticks++;
+		if (pda->sn_lb_int_war_ticks >= SN_LB_INT_WAR_INTERVAL)
+			pda->sn_lb_int_war_ticks = 0;
+	}
+}
diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c
new file mode 100644
index 00000000..c1bd1cfd
--- /dev/null
+++ b/arch/ia64/sn/kernel/tiocx.c
@@ -0,0 +1,562 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2005 Silicon Graphics, Inc.  All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/proc_fs.h>
+#include <linux/capability.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/io.h>
+#include <asm/sn/types.h>
+#include <asm/sn/shubio.h>
+#include <asm/sn/tiocx.h>
+#include <asm/sn/l1.h>
+#include <asm/sn/module.h>
+#include "tio.h"
+#include "xtalk/xwidgetdev.h"
+#include "xtalk/hubdev.h"
+
+#define CX_DEV_NONE 0
+#define DEVICE_NAME "tiocx"
+#define WIDGET_ID 0
+#define TIOCX_DEBUG 0
+
+#if TIOCX_DEBUG
+#define DBG(fmt...)    printk(KERN_ALERT fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+struct device_attribute dev_attr_cxdev_control;
+
+/**
+ * tiocx_match - Try to match driver id list with device.
+ * @dev: device pointer
+ * @drv: driver pointer
+ *
+ * Returns 1 if match, 0 otherwise.
+ */
+static int tiocx_match(struct device *dev, struct device_driver *drv)
+{
+	struct cx_dev *cx_dev = to_cx_dev(dev);
+	struct cx_drv *cx_drv = to_cx_driver(drv);
+	const struct cx_device_id *ids = cx_drv->id_table;
+
+	if (!ids)
+		return 0;
+
+	while (ids->part_num) {
+		if (ids->part_num == cx_dev->cx_id.part_num)
+			return 1;
+		ids++;
+	}
+	return 0;
+
+}
+
+static int tiocx_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	return -ENODEV;
+}
+
+static void tiocx_bus_release(struct device *dev)
+{
+	kfree(to_cx_dev(dev));
+}
+
+/**
+ * cx_device_match - Find cx_device in the id table.
+ * @ids: id table from driver
+ * @cx_device: part/mfg id for the device
+ *
+ */
+static const struct cx_device_id *cx_device_match(const struct cx_device_id
+						  *ids,
+						  struct cx_dev *cx_device)
+{
+	/*
+	 * NOTES: We may want to check for CX_ANY_ID too.
+	 *        Do we want to match against nasid too?
+	 *        CX_DEV_NONE == 0, if the driver tries to register for
+	 *        part/mfg == 0 we should return no-match (NULL) here.
+	 */
+	while (ids->part_num && ids->mfg_num) {
+		if (ids->part_num == cx_device->cx_id.part_num &&
+		    ids->mfg_num == cx_device->cx_id.mfg_num)
+			return ids;
+		ids++;
+	}
+
+	return NULL;
+}
+
+/**
+ * cx_device_probe - Look for matching device.
+ *			Call driver probe routine if found.
+ * @cx_driver: driver table (cx_drv struct) from driver
+ * @cx_device: part/mfg id for the device
+ */
+static int cx_device_probe(struct device *dev)
+{
+	const struct cx_device_id *id;
+	struct cx_drv *cx_drv = to_cx_driver(dev->driver);
+	struct cx_dev *cx_dev = to_cx_dev(dev);
+	int error = 0;
+
+	if (!cx_dev->driver && cx_drv->probe) {
+		id = cx_device_match(cx_drv->id_table, cx_dev);
+		if (id) {
+			if ((error = cx_drv->probe(cx_dev, id)) < 0)
+				return error;
+			else
+				cx_dev->driver = cx_drv;
+		}
+	}
+
+	return error;
+}
+
+/**
+ * cx_driver_remove - Remove driver from device struct.
+ * @dev: device
+ */
+static int cx_driver_remove(struct device *dev)
+{
+	struct cx_dev *cx_dev = to_cx_dev(dev);
+	struct cx_drv *cx_drv = cx_dev->driver;
+	if (cx_drv->remove)
+		cx_drv->remove(cx_dev);
+	cx_dev->driver = NULL;
+	return 0;
+}
+
+struct bus_type tiocx_bus_type = {
+	.name = "tiocx",
+	.match = tiocx_match,
+	.uevent = tiocx_uevent,
+	.probe = cx_device_probe,
+	.remove = cx_driver_remove,
+};
+
+/**
+ * cx_driver_register - Register the driver.
+ * @cx_driver: driver table (cx_drv struct) from driver
+ * 
+ * Called from the driver init routine to register a driver.
+ * The cx_drv struct contains the driver name, a pointer to
+ * a table of part/mfg numbers and a pointer to the driver's
+ * probe/attach routine.
+ */
+int cx_driver_register(struct cx_drv *cx_driver)
+{
+	cx_driver->driver.name = cx_driver->name;
+	cx_driver->driver.bus = &tiocx_bus_type;
+
+	return driver_register(&cx_driver->driver);
+}
+
+/**
+ * cx_driver_unregister - Unregister the driver.
+ * @cx_driver: driver table (cx_drv struct) from driver
+ */
+int cx_driver_unregister(struct cx_drv *cx_driver)
+{
+	driver_unregister(&cx_driver->driver);
+	return 0;
+}
+
+/**
+ * cx_device_register - Register a device.
+ * @nasid: device's nasid
+ * @part_num: device's part number
+ * @mfg_num: device's manufacturer number
+ * @hubdev: hub info associated with this device
+ * @bt: board type of the device
+ *
+ */
+int
+cx_device_register(nasid_t nasid, int part_num, int mfg_num,
+		   struct hubdev_info *hubdev, int bt)
+{
+	struct cx_dev *cx_dev;
+
+	cx_dev = kzalloc(sizeof(struct cx_dev), GFP_KERNEL);
+	DBG("cx_dev= 0x%p\n", cx_dev);
+	if (cx_dev == NULL)
+		return -ENOMEM;
+
+	cx_dev->cx_id.part_num = part_num;
+	cx_dev->cx_id.mfg_num = mfg_num;
+	cx_dev->cx_id.nasid = nasid;
+	cx_dev->hubdev = hubdev;
+	cx_dev->bt = bt;
+
+	cx_dev->dev.parent = NULL;
+	cx_dev->dev.bus = &tiocx_bus_type;
+	cx_dev->dev.release = tiocx_bus_release;
+	dev_set_name(&cx_dev->dev, "%d", cx_dev->cx_id.nasid);
+	device_register(&cx_dev->dev);
+	get_device(&cx_dev->dev);
+
+	device_create_file(&cx_dev->dev, &dev_attr_cxdev_control);
+
+	return 0;
+}
+
+/**
+ * cx_device_unregister - Unregister a device.
+ * @cx_dev: part/mfg id for the device
+ */
+int cx_device_unregister(struct cx_dev *cx_dev)
+{
+	put_device(&cx_dev->dev);
+	device_unregister(&cx_dev->dev);
+	return 0;
+}
+
+/**
+ * cx_device_reload - Reload the device.
+ * @nasid: device's nasid
+ * @part_num: device's part number
+ * @mfg_num: device's manufacturer number
+ *
+ * Remove the device associated with 'nasid' from device list and then
+ * call device-register with the given part/mfg numbers.
+ */
+static int cx_device_reload(struct cx_dev *cx_dev)
+{
+	cx_device_unregister(cx_dev);
+	return cx_device_register(cx_dev->cx_id.nasid, cx_dev->cx_id.part_num,
+				  cx_dev->cx_id.mfg_num, cx_dev->hubdev,
+				  cx_dev->bt);
+}
+
+static inline u64 tiocx_intr_alloc(nasid_t nasid, int widget,
+					u64 sn_irq_info,
+					int req_irq, nasid_t req_nasid,
+					int req_slice)
+{
+	struct ia64_sal_retval rv;
+	rv.status = 0;
+	rv.v0 = 0;
+
+	ia64_sal_oemcall_nolock(&rv, SN_SAL_IOIF_INTERRUPT,
+				SAL_INTR_ALLOC, nasid,
+				widget, sn_irq_info, req_irq,
+				req_nasid, req_slice);
+	return rv.status;
+}
+
+static inline void tiocx_intr_free(nasid_t nasid, int widget,
+				   struct sn_irq_info *sn_irq_info)
+{
+	struct ia64_sal_retval rv;
+	rv.status = 0;
+	rv.v0 = 0;
+
+	ia64_sal_oemcall_nolock(&rv, SN_SAL_IOIF_INTERRUPT,
+				SAL_INTR_FREE, nasid,
+				widget, sn_irq_info->irq_irq,
+				sn_irq_info->irq_cookie, 0, 0);
+}
+
+struct sn_irq_info *tiocx_irq_alloc(nasid_t nasid, int widget, int irq,
+				    nasid_t req_nasid, int slice)
+{
+	struct sn_irq_info *sn_irq_info;
+	int status;
+	int sn_irq_size = sizeof(struct sn_irq_info);
+
+	if ((nasid & 1) == 0)
+		return NULL;
+
+	sn_irq_info = kzalloc(sn_irq_size, GFP_KERNEL);
+	if (sn_irq_info == NULL)
+		return NULL;
+
+	status = tiocx_intr_alloc(nasid, widget, __pa(sn_irq_info), irq,
+				  req_nasid, slice);
+	if (status) {
+		kfree(sn_irq_info);
+		return NULL;
+	} else {
+		return sn_irq_info;
+	}
+}
+
+void tiocx_irq_free(struct sn_irq_info *sn_irq_info)
+{
+	u64 bridge = (u64) sn_irq_info->irq_bridge;
+	nasid_t nasid = NASID_GET(bridge);
+	int widget;
+
+	if (nasid & 1) {
+		widget = TIO_SWIN_WIDGETNUM(bridge);
+		tiocx_intr_free(nasid, widget, sn_irq_info);
+		kfree(sn_irq_info);
+	}
+}
+
+u64 tiocx_dma_addr(u64 addr)
+{
+	return PHYS_TO_TIODMA(addr);
+}
+
+u64 tiocx_swin_base(int nasid)
+{
+	return TIO_SWIN_BASE(nasid, TIOCX_CORELET);
+}
+
+EXPORT_SYMBOL(cx_driver_register);
+EXPORT_SYMBOL(cx_driver_unregister);
+EXPORT_SYMBOL(cx_device_register);
+EXPORT_SYMBOL(cx_device_unregister);
+EXPORT_SYMBOL(tiocx_irq_alloc);
+EXPORT_SYMBOL(tiocx_irq_free);
+EXPORT_SYMBOL(tiocx_bus_type);
+EXPORT_SYMBOL(tiocx_dma_addr);
+EXPORT_SYMBOL(tiocx_swin_base);
+
+static void tio_conveyor_set(nasid_t nasid, int enable_flag)
+{
+	u64 ice_frz;
+	u64 disable_cb = (1ull << 61);
+
+	if (!(nasid & 1))
+		return;
+
+	ice_frz = REMOTE_HUB_L(nasid, TIO_ICE_FRZ_CFG);
+	if (enable_flag) {
+		if (!(ice_frz & disable_cb))	/* already enabled */
+			return;
+		ice_frz &= ~disable_cb;
+	} else {
+		if (ice_frz & disable_cb)	/* already disabled */
+			return;
+		ice_frz |= disable_cb;
+	}
+	DBG(KERN_ALERT "TIO_ICE_FRZ_CFG= 0x%lx\n", ice_frz);
+	REMOTE_HUB_S(nasid, TIO_ICE_FRZ_CFG, ice_frz);
+}
+
+#define tio_conveyor_enable(nasid) tio_conveyor_set(nasid, 1)
+#define tio_conveyor_disable(nasid) tio_conveyor_set(nasid, 0)
+
+static void tio_corelet_reset(nasid_t nasid, int corelet)
+{
+	if (!(nasid & 1))
+		return;
+
+	REMOTE_HUB_S(nasid, TIO_ICE_PMI_TX_CFG, 1 << corelet);
+	udelay(2000);
+	REMOTE_HUB_S(nasid, TIO_ICE_PMI_TX_CFG, 0);
+	udelay(2000);
+}
+
+static int is_fpga_tio(int nasid, int *bt)
+{
+	u16 uninitialized_var(ioboard_type);	/* GCC be quiet */
+	long rc;
+
+	rc = ia64_sn_sysctl_ioboard_get(nasid, &ioboard_type);
+	if (rc) {
+		printk(KERN_WARNING "ia64_sn_sysctl_ioboard_get failed: %ld\n",
+		       rc);
+		return 0;
+	}
+
+	switch (ioboard_type) {
+	case L1_BRICKTYPE_SA:
+	case L1_BRICKTYPE_ATHENA:
+	case L1_BOARDTYPE_DAYTONA:
+		*bt = ioboard_type;
+		return 1;
+	}
+
+	return 0;
+}
+
+static int bitstream_loaded(nasid_t nasid)
+{
+	u64 cx_credits;
+
+	cx_credits = REMOTE_HUB_L(nasid, TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3);
+	cx_credits &= TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3_CREDIT_CNT_MASK;
+	DBG("cx_credits= 0x%lx\n", cx_credits);
+
+	return (cx_credits == 0xf) ? 1 : 0;
+}
+
+static int tiocx_reload(struct cx_dev *cx_dev)
+{
+	int part_num = CX_DEV_NONE;
+	int mfg_num = CX_DEV_NONE;
+	nasid_t nasid = cx_dev->cx_id.nasid;
+
+	if (bitstream_loaded(nasid)) {
+		u64 cx_id;
+		int rv;
+
+		rv = ia64_sn_sysctl_tio_clock_reset(nasid);
+		if (rv) {
+			printk(KERN_ALERT "CX port JTAG reset failed.\n");
+		} else {
+			cx_id = *(volatile u64 *)
+				(TIO_SWIN_BASE(nasid, TIOCX_CORELET) +
+					  WIDGET_ID);
+			part_num = XWIDGET_PART_NUM(cx_id);
+			mfg_num = XWIDGET_MFG_NUM(cx_id);
+			DBG("part= 0x%x, mfg= 0x%x\n", part_num, mfg_num);
+			/* just ignore it if it's a CE */
+			if (part_num == TIO_CE_ASIC_PARTNUM)
+				return 0;
+		}
+	}
+
+	cx_dev->cx_id.part_num = part_num;
+	cx_dev->cx_id.mfg_num = mfg_num;
+
+	/*
+	 * Delete old device and register the new one.  It's ok if
+	 * part_num/mfg_num == CX_DEV_NONE.  We want to register
+	 * devices in the table even if a bitstream isn't loaded.
+	 * That allows use to see that a bitstream isn't loaded via
+	 * TIOCX_IOCTL_DEV_LIST.
+	 */
+	return cx_device_reload(cx_dev);
+}
+
+static ssize_t show_cxdev_control(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cx_dev *cx_dev = to_cx_dev(dev);
+
+	return sprintf(buf, "0x%x 0x%x 0x%x 0x%x\n",
+		       cx_dev->cx_id.nasid,
+		       cx_dev->cx_id.part_num, cx_dev->cx_id.mfg_num,
+		       cx_dev->bt);
+}
+
+static ssize_t store_cxdev_control(struct device *dev, struct device_attribute *attr, const char *buf,
+				   size_t count)
+{
+	int n;
+	struct cx_dev *cx_dev = to_cx_dev(dev);
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (count <= 0)
+		return 0;
+
+	n = simple_strtoul(buf, NULL, 0);
+
+	switch (n) {
+	case 1:
+		tio_corelet_reset(cx_dev->cx_id.nasid, TIOCX_CORELET);
+		tiocx_reload(cx_dev);
+		break;
+	case 2:
+		tiocx_reload(cx_dev);
+		break;
+	case 3:
+		tio_corelet_reset(cx_dev->cx_id.nasid, TIOCX_CORELET);
+		break;
+	default:
+		break;
+	}
+
+	return count;
+}
+
+DEVICE_ATTR(cxdev_control, 0644, show_cxdev_control, store_cxdev_control);
+
+static int __init tiocx_init(void)
+{
+	cnodeid_t cnodeid;
+	int found_tiocx_device = 0;
+
+	if (!ia64_platform_is("sn2"))
+		return 0;
+
+	bus_register(&tiocx_bus_type);
+
+	for (cnodeid = 0; cnodeid < num_cnodes; cnodeid++) {
+		nasid_t nasid;
+		int bt;
+
+		nasid = cnodeid_to_nasid(cnodeid);
+
+		if ((nasid & 0x1) && is_fpga_tio(nasid, &bt)) {
+			struct hubdev_info *hubdev;
+			struct xwidget_info *widgetp;
+
+			DBG("Found TIO at nasid 0x%x\n", nasid);
+
+			hubdev =
+			    (struct hubdev_info *)(NODEPDA(cnodeid)->pdinfo);
+
+			widgetp = &hubdev->hdi_xwidget_info[TIOCX_CORELET];
+
+			/* The CE hangs off of the CX port but is not an FPGA */
+			if (widgetp->xwi_hwid.part_num == TIO_CE_ASIC_PARTNUM)
+				continue;
+
+			tio_corelet_reset(nasid, TIOCX_CORELET);
+			tio_conveyor_enable(nasid);
+
+			if (cx_device_register
+			    (nasid, widgetp->xwi_hwid.part_num,
+			     widgetp->xwi_hwid.mfg_num, hubdev, bt) < 0)
+				return -ENXIO;
+			else
+				found_tiocx_device++;
+		}
+	}
+
+	/* It's ok if we find zero devices. */
+	DBG("found_tiocx_device= %d\n", found_tiocx_device);
+
+	return 0;
+}
+
+static int cx_remove_device(struct device * dev, void * data)
+{
+	struct cx_dev *cx_dev = to_cx_dev(dev);
+	device_remove_file(dev, &dev_attr_cxdev_control);
+	cx_device_unregister(cx_dev);
+	return 0;
+}
+
+static void __exit tiocx_exit(void)
+{
+	DBG("tiocx_exit\n");
+
+	/*
+	 * Unregister devices.
+	 */
+	bus_for_each_dev(&tiocx_bus_type, NULL, NULL, cx_remove_device);
+	bus_unregister(&tiocx_bus_type);
+}
+
+fs_initcall(tiocx_init);
+module_exit(tiocx_exit);
+
+/************************************************************************
+ * Module licensing and description
+ ************************************************************************/
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Bruce Losure <blosure@sgi.com>");
+MODULE_DESCRIPTION("TIOCX module");
+MODULE_SUPPORTED_DEVICE(DEVICE_NAME);
diff --git a/arch/ia64/sn/pci/Makefile b/arch/ia64/sn/pci/Makefile
new file mode 100644
index 00000000..df2a9014
--- /dev/null
+++ b/arch/ia64/sn/pci/Makefile
@@ -0,0 +1,12 @@
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+#
+# Makefile for the sn pci general routines.
+
+ccflags-y := -Iarch/ia64/sn/include
+
+obj-y := pci_dma.o tioca_provider.o tioce_provider.o pcibr/
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
new file mode 100644
index 00000000..a9d310de
--- /dev/null
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -0,0 +1,486 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000,2002-2005 Silicon Graphics, Inc. All rights reserved.
+ *
+ * Routines for PCI DMA mapping.  See Documentation/DMA-API.txt for
+ * a description of how these routines should be used.
+ */
+
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <linux/dma-mapping.h>
+#include <asm/dma.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
+#include <asm/sn/sn_sal.h>
+
+#define SG_ENT_VIRT_ADDRESS(sg)	(sg_virt((sg)))
+#define SG_ENT_PHYS_ADDRESS(SG)	virt_to_phys(SG_ENT_VIRT_ADDRESS(SG))
+
+/**
+ * sn_dma_supported - test a DMA mask
+ * @dev: device to test
+ * @mask: DMA mask to test
+ *
+ * Return whether the given PCI device DMA address mask can be supported
+ * properly.  For example, if your device can only drive the low 24-bits
+ * during PCI bus mastering, then you would pass 0x00ffffff as the mask to
+ * this function.  Of course, SN only supports devices that have 32 or more
+ * address bits when using the PMU.
+ */
+static int sn_dma_supported(struct device *dev, u64 mask)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	if (mask < 0x7fffffff)
+		return 0;
+	return 1;
+}
+
+/**
+ * sn_dma_set_mask - set the DMA mask
+ * @dev: device to set
+ * @dma_mask: new mask
+ *
+ * Set @dev's DMA mask if the hw supports it.
+ */
+int sn_dma_set_mask(struct device *dev, u64 dma_mask)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	if (!sn_dma_supported(dev, dma_mask))
+		return 0;
+
+	*dev->dma_mask = dma_mask;
+	return 1;
+}
+EXPORT_SYMBOL(sn_dma_set_mask);
+
+/**
+ * sn_dma_alloc_coherent - allocate memory for coherent DMA
+ * @dev: device to allocate for
+ * @size: size of the region
+ * @dma_handle: DMA (bus) address
+ * @flags: memory allocation flags
+ *
+ * dma_alloc_coherent() returns a pointer to a memory region suitable for
+ * coherent DMA traffic to/from a PCI device.  On SN platforms, this means
+ * that @dma_handle will have the %PCIIO_DMA_CMD flag set.
+ *
+ * This interface is usually used for "command" streams (e.g. the command
+ * queue for a SCSI controller).  See Documentation/DMA-API.txt for
+ * more information.
+ */
+static void *sn_dma_alloc_coherent(struct device *dev, size_t size,
+				   dma_addr_t * dma_handle, gfp_t flags)
+{
+	void *cpuaddr;
+	unsigned long phys_addr;
+	int node;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
+
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	/*
+	 * Allocate the memory.
+	 */
+	node = pcibus_to_node(pdev->bus);
+	if (likely(node >=0)) {
+		struct page *p = alloc_pages_exact_node(node,
+						flags, get_order(size));
+
+		if (likely(p))
+			cpuaddr = page_address(p);
+		else
+			return NULL;
+	} else
+		cpuaddr = (void *)__get_free_pages(flags, get_order(size));
+
+	if (unlikely(!cpuaddr))
+		return NULL;
+
+	memset(cpuaddr, 0x0, size);
+
+	/* physical addr. of the memory we just got */
+	phys_addr = __pa(cpuaddr);
+
+	/*
+	 * 64 bit address translations should never fail.
+	 * 32 bit translations can fail if there are insufficient mapping
+	 * resources.
+	 */
+
+	*dma_handle = provider->dma_map_consistent(pdev, phys_addr, size,
+						   SN_DMA_ADDR_PHYS);
+	if (!*dma_handle) {
+		printk(KERN_ERR "%s: out of ATEs\n", __func__);
+		free_pages((unsigned long)cpuaddr, get_order(size));
+		return NULL;
+	}
+
+	return cpuaddr;
+}
+
+/**
+ * sn_pci_free_coherent - free memory associated with coherent DMAable region
+ * @dev: device to free for
+ * @size: size to free
+ * @cpu_addr: kernel virtual address to free
+ * @dma_handle: DMA address associated with this region
+ *
+ * Frees the memory allocated by dma_alloc_coherent(), potentially unmapping
+ * any associated IOMMU mappings.
+ */
+static void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
+				 dma_addr_t dma_handle)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
+
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	provider->dma_unmap(pdev, dma_handle, 0);
+	free_pages((unsigned long)cpu_addr, get_order(size));
+}
+
+/**
+ * sn_dma_map_single_attrs - map a single page for DMA
+ * @dev: device to map for
+ * @cpu_addr: kernel virtual address of the region to map
+ * @size: size of the region
+ * @direction: DMA direction
+ * @attrs: optional dma attributes
+ *
+ * Map the region pointed to by @cpu_addr for DMA and return the
+ * DMA address.
+ *
+ * We map this to the one step pcibr_dmamap_trans interface rather than
+ * the two step pcibr_dmamap_alloc/pcibr_dmamap_addr because we have
+ * no way of saving the dmamap handle from the alloc to later free
+ * (which is pretty much unacceptable).
+ *
+ * mappings with the DMA_ATTR_WRITE_BARRIER get mapped with
+ * dma_map_consistent() so that writes force a flush of pending DMA.
+ * (See "SGI Altix Architecture Considerations for Linux Device Drivers",
+ * Document Number: 007-4763-001)
+ *
+ * TODO: simplify our interface;
+ *       figure out how to save dmamap handle so can use two step.
+ */
+static dma_addr_t sn_dma_map_page(struct device *dev, struct page *page,
+				  unsigned long offset, size_t size,
+				  enum dma_data_direction dir,
+				  struct dma_attrs *attrs)
+{
+	void *cpu_addr = page_address(page) + offset;
+	dma_addr_t dma_addr;
+	unsigned long phys_addr;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
+	int dmabarr;
+
+	dmabarr = dma_get_attr(DMA_ATTR_WRITE_BARRIER, attrs);
+
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	phys_addr = __pa(cpu_addr);
+	if (dmabarr)
+		dma_addr = provider->dma_map_consistent(pdev, phys_addr,
+							size, SN_DMA_ADDR_PHYS);
+	else
+		dma_addr = provider->dma_map(pdev, phys_addr, size,
+					     SN_DMA_ADDR_PHYS);
+
+	if (!dma_addr) {
+		printk(KERN_ERR "%s: out of ATEs\n", __func__);
+		return 0;
+	}
+	return dma_addr;
+}
+
+/**
+ * sn_dma_unmap_single_attrs - unamp a DMA mapped page
+ * @dev: device to sync
+ * @dma_addr: DMA address to sync
+ * @size: size of region
+ * @direction: DMA direction
+ * @attrs: optional dma attributes
+ *
+ * This routine is supposed to sync the DMA region specified
+ * by @dma_handle into the coherence domain.  On SN, we're always cache
+ * coherent, so we just need to free any ATEs associated with this mapping.
+ */
+static void sn_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
+			      size_t size, enum dma_data_direction dir,
+			      struct dma_attrs *attrs)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
+
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	provider->dma_unmap(pdev, dma_addr, dir);
+}
+
+/**
+ * sn_dma_unmap_sg - unmap a DMA scatterlist
+ * @dev: device to unmap
+ * @sg: scatterlist to unmap
+ * @nhwentries: number of scatterlist entries
+ * @direction: DMA direction
+ * @attrs: optional dma attributes
+ *
+ * Unmap a set of streaming mode DMA translations.
+ */
+static void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sgl,
+			    int nhwentries, enum dma_data_direction dir,
+			    struct dma_attrs *attrs)
+{
+	int i;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
+	struct scatterlist *sg;
+
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	for_each_sg(sgl, sg, nhwentries, i) {
+		provider->dma_unmap(pdev, sg->dma_address, dir);
+		sg->dma_address = (dma_addr_t) NULL;
+		sg->dma_length = 0;
+	}
+}
+
+/**
+ * sn_dma_map_sg - map a scatterlist for DMA
+ * @dev: device to map for
+ * @sg: scatterlist to map
+ * @nhwentries: number of entries
+ * @direction: direction of the DMA transaction
+ * @attrs: optional dma attributes
+ *
+ * mappings with the DMA_ATTR_WRITE_BARRIER get mapped with
+ * dma_map_consistent() so that writes force a flush of pending DMA.
+ * (See "SGI Altix Architecture Considerations for Linux Device Drivers",
+ * Document Number: 007-4763-001)
+ *
+ * Maps each entry of @sg for DMA.
+ */
+static int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl,
+			 int nhwentries, enum dma_data_direction dir,
+			 struct dma_attrs *attrs)
+{
+	unsigned long phys_addr;
+	struct scatterlist *saved_sg = sgl, *sg;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
+	int i;
+	int dmabarr;
+
+	dmabarr = dma_get_attr(DMA_ATTR_WRITE_BARRIER, attrs);
+
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	/*
+	 * Setup a DMA address for each entry in the scatterlist.
+	 */
+	for_each_sg(sgl, sg, nhwentries, i) {
+		dma_addr_t dma_addr;
+		phys_addr = SG_ENT_PHYS_ADDRESS(sg);
+		if (dmabarr)
+			dma_addr = provider->dma_map_consistent(pdev,
+								phys_addr,
+								sg->length,
+								SN_DMA_ADDR_PHYS);
+		else
+			dma_addr = provider->dma_map(pdev, phys_addr,
+						     sg->length,
+						     SN_DMA_ADDR_PHYS);
+
+		sg->dma_address = dma_addr;
+		if (!sg->dma_address) {
+			printk(KERN_ERR "%s: out of ATEs\n", __func__);
+
+			/*
+			 * Free any successfully allocated entries.
+			 */
+			if (i > 0)
+				sn_dma_unmap_sg(dev, saved_sg, i, dir, attrs);
+			return 0;
+		}
+
+		sg->dma_length = sg->length;
+	}
+
+	return nhwentries;
+}
+
+static void sn_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+				       size_t size, enum dma_data_direction dir)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+}
+
+static void sn_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+					  size_t size,
+					  enum dma_data_direction dir)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+}
+
+static void sn_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+				   int nelems, enum dma_data_direction dir)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+}
+
+static void sn_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+				      int nelems, enum dma_data_direction dir)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+}
+
+static int sn_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return 0;
+}
+
+u64 sn_dma_get_required_mask(struct device *dev)
+{
+	return DMA_BIT_MASK(64);
+}
+EXPORT_SYMBOL_GPL(sn_dma_get_required_mask);
+
+char *sn_pci_get_legacy_mem(struct pci_bus *bus)
+{
+	if (!SN_PCIBUS_BUSSOFT(bus))
+		return ERR_PTR(-ENODEV);
+
+	return (char *)(SN_PCIBUS_BUSSOFT(bus)->bs_legacy_mem | __IA64_UNCACHED_OFFSET);
+}
+
+int sn_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size)
+{
+	unsigned long addr;
+	int ret;
+	struct ia64_sal_retval isrv;
+
+	/*
+	 * First, try the SN_SAL_IOIF_PCI_SAFE SAL call which can work
+	 * around hw issues at the pci bus level.  SGI proms older than
+	 * 4.10 don't implement this.
+	 */
+
+	SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE,
+		 pci_domain_nr(bus), bus->number,
+		 0, /* io */
+		 0, /* read */
+		 port, size, __pa(val));
+
+	if (isrv.status == 0)
+		return size;
+
+	/*
+	 * If the above failed, retry using the SAL_PROBE call which should
+	 * be present in all proms (but which cannot work round PCI chipset
+	 * bugs).  This code is retained for compatibility with old
+	 * pre-4.10 proms, and should be removed at some point in the future.
+	 */
+
+	if (!SN_PCIBUS_BUSSOFT(bus))
+		return -ENODEV;
+
+	addr = SN_PCIBUS_BUSSOFT(bus)->bs_legacy_io | __IA64_UNCACHED_OFFSET;
+	addr += port;
+
+	ret = ia64_sn_probe_mem(addr, (long)size, (void *)val);
+
+	if (ret == 2)
+		return -EINVAL;
+
+	if (ret == 1)
+		*val = -1;
+
+	return size;
+}
+
+int sn_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size)
+{
+	int ret = size;
+	unsigned long paddr;
+	unsigned long *addr;
+	struct ia64_sal_retval isrv;
+
+	/*
+	 * First, try the SN_SAL_IOIF_PCI_SAFE SAL call which can work
+	 * around hw issues at the pci bus level.  SGI proms older than
+	 * 4.10 don't implement this.
+	 */
+
+	SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE,
+		 pci_domain_nr(bus), bus->number,
+		 0, /* io */
+		 1, /* write */
+		 port, size, __pa(&val));
+
+	if (isrv.status == 0)
+		return size;
+
+	/*
+	 * If the above failed, retry using the SAL_PROBE call which should
+	 * be present in all proms (but which cannot work round PCI chipset
+	 * bugs).  This code is retained for compatibility with old
+	 * pre-4.10 proms, and should be removed at some point in the future.
+	 */
+
+	if (!SN_PCIBUS_BUSSOFT(bus)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	/* Put the phys addr in uncached space */
+	paddr = SN_PCIBUS_BUSSOFT(bus)->bs_legacy_io | __IA64_UNCACHED_OFFSET;
+	paddr += port;
+	addr = (unsigned long *)paddr;
+
+	switch (size) {
+	case 1:
+		*(volatile u8 *)(addr) = (u8)(val);
+		break;
+	case 2:
+		*(volatile u16 *)(addr) = (u16)(val);
+		break;
+	case 4:
+		*(volatile u32 *)(addr) = (u32)(val);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+ out:
+	return ret;
+}
+
+static struct dma_map_ops sn_dma_ops = {
+	.alloc_coherent		= sn_dma_alloc_coherent,
+	.free_coherent		= sn_dma_free_coherent,
+	.map_page		= sn_dma_map_page,
+	.unmap_page		= sn_dma_unmap_page,
+	.map_sg			= sn_dma_map_sg,
+	.unmap_sg		= sn_dma_unmap_sg,
+	.sync_single_for_cpu 	= sn_dma_sync_single_for_cpu,
+	.sync_sg_for_cpu	= sn_dma_sync_sg_for_cpu,
+	.sync_single_for_device = sn_dma_sync_single_for_device,
+	.sync_sg_for_device	= sn_dma_sync_sg_for_device,
+	.mapping_error		= sn_dma_mapping_error,
+	.dma_supported		= sn_dma_supported,
+};
+
+void sn_dma_init(void)
+{
+	dma_ops = &sn_dma_ops;
+}
diff --git a/arch/ia64/sn/pci/pcibr/Makefile b/arch/ia64/sn/pci/pcibr/Makefile
new file mode 100644
index 00000000..396bcae3
--- /dev/null
+++ b/arch/ia64/sn/pci/pcibr/Makefile
@@ -0,0 +1,13 @@
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2002-2004 Silicon Graphics, Inc.  All Rights Reserved.
+#
+# Makefile for the sn2 io routines.
+
+ccflags-y := -Iarch/ia64/sn/include
+
+obj-y				+=  pcibr_dma.o pcibr_reg.o \
+				    pcibr_ate.o pcibr_provider.o
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c
new file mode 100644
index 00000000..5bc34eac
--- /dev/null
+++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c
@@ -0,0 +1,177 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2001-2006 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/pcibr_provider.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
+
+int pcibr_invalidate_ate;	/* by default don't invalidate ATE on free */
+
+/*
+ * mark_ate: Mark the ate as either free or inuse.
+ */
+static void mark_ate(struct ate_resource *ate_resource, int start, int number,
+		     u64 value)
+{
+	u64 *ate = ate_resource->ate;
+	int index;
+	int length = 0;
+
+	for (index = start; length < number; index++, length++)
+		ate[index] = value;
+}
+
+/*
+ * find_free_ate:  Find the first free ate index starting from the given
+ *		   index for the desired consecutive count.
+ */
+static int find_free_ate(struct ate_resource *ate_resource, int start,
+			 int count)
+{
+	u64 *ate = ate_resource->ate;
+	int index;
+	int start_free;
+
+	for (index = start; index < ate_resource->num_ate;) {
+		if (!ate[index]) {
+			int i;
+			int free;
+			free = 0;
+			start_free = index;	/* Found start free ate */
+			for (i = start_free; i < ate_resource->num_ate; i++) {
+				if (!ate[i]) {	/* This is free */
+					if (++free == count)
+						return start_free;
+				} else {
+					index = i + 1;
+					break;
+				}
+			}
+			if (i >= ate_resource->num_ate)
+				return -1;
+		} else
+			index++;	/* Try next ate */
+	}
+
+	return -1;
+}
+
+/*
+ * free_ate_resource:  Free the requested number of ATEs.
+ */
+static inline void free_ate_resource(struct ate_resource *ate_resource,
+				     int start)
+{
+	mark_ate(ate_resource, start, ate_resource->ate[start], 0);
+	if ((ate_resource->lowest_free_index > start) ||
+	    (ate_resource->lowest_free_index < 0))
+		ate_resource->lowest_free_index = start;
+}
+
+/*
+ * alloc_ate_resource:  Allocate the requested number of ATEs.
+ */
+static inline int alloc_ate_resource(struct ate_resource *ate_resource,
+				     int ate_needed)
+{
+	int start_index;
+
+	/*
+	 * Check for ate exhaustion.
+	 */
+	if (ate_resource->lowest_free_index < 0)
+		return -1;
+
+	/*
+	 * Find the required number of free consecutive ates.
+	 */
+	start_index =
+	    find_free_ate(ate_resource, ate_resource->lowest_free_index,
+			  ate_needed);
+	if (start_index >= 0)
+		mark_ate(ate_resource, start_index, ate_needed, ate_needed);
+
+	ate_resource->lowest_free_index =
+	    find_free_ate(ate_resource, ate_resource->lowest_free_index, 1);
+
+	return start_index;
+}
+
+/*
+ * Allocate "count" contiguous Bridge Address Translation Entries
+ * on the specified bridge to be used for PCI to XTALK mappings.
+ * Indices in rm map range from 1..num_entries.  Indices returned
+ * to caller range from 0..num_entries-1.
+ *
+ * Return the start index on success, -1 on failure.
+ */
+int pcibr_ate_alloc(struct pcibus_info *pcibus_info, int count)
+{
+	int status;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pcibus_info->pbi_lock, flags);
+	status = alloc_ate_resource(&pcibus_info->pbi_int_ate_resource, count);
+	spin_unlock_irqrestore(&pcibus_info->pbi_lock, flags);
+
+	return status;
+}
+
+/*
+ * Setup an Address Translation Entry as specified.  Use either the Bridge
+ * internal maps or the external map RAM, as appropriate.
+ */
+static inline u64 __iomem *pcibr_ate_addr(struct pcibus_info *pcibus_info,
+				       int ate_index)
+{
+	if (ate_index < pcibus_info->pbi_int_ate_size) {
+		return pcireg_int_ate_addr(pcibus_info, ate_index);
+	}
+	panic("pcibr_ate_addr: invalid ate_index 0x%x", ate_index);
+}
+
+/*
+ * Update the ate.
+ */
+void inline
+ate_write(struct pcibus_info *pcibus_info, int ate_index, int count,
+	  volatile u64 ate)
+{
+	while (count-- > 0) {
+		if (ate_index < pcibus_info->pbi_int_ate_size) {
+			pcireg_int_ate_set(pcibus_info, ate_index, ate);
+		} else {
+			panic("ate_write: invalid ate_index 0x%x", ate_index);
+		}
+		ate_index++;
+		ate += IOPGSIZE;
+	}
+
+	pcireg_tflush_get(pcibus_info);	/* wait until Bridge PIO complete */
+}
+
+void pcibr_ate_free(struct pcibus_info *pcibus_info, int index)
+{
+
+	volatile u64 ate;
+	int count;
+	unsigned long flags;
+
+	if (pcibr_invalidate_ate) {
+		/* For debugging purposes, clear the valid bit in the ATE */
+		ate = *pcibr_ate_addr(pcibus_info, index);
+		count = pcibus_info->pbi_int_ate_resource.ate[index];
+		ate_write(pcibus_info, index, count, (ate & ~PCI32_ATE_V));
+	}
+
+	spin_lock_irqsave(&pcibus_info->pbi_lock, flags);
+	free_ate_resource(&pcibus_info->pbi_int_ate_resource, index);
+	spin_unlock_irqrestore(&pcibus_info->pbi_lock, flags);
+}
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
new file mode 100644
index 00000000..33def666
--- /dev/null
+++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
@@ -0,0 +1,412 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2001-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/geo.h>
+#include <asm/sn/pcibr_provider.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
+#include <asm/sn/pic.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/tiocp.h>
+#include "tio.h"
+#include "xtalk/xwidgetdev.h"
+#include "xtalk/hubdev.h"
+
+extern int sn_ioif_inited;
+
+/* =====================================================================
+ *    DMA MANAGEMENT
+ *
+ *      The Bridge ASIC provides three methods of doing DMA: via a "direct map"
+ *      register available in 32-bit PCI space (which selects a contiguous 2G
+ *	address space on some other widget), via "direct" addressing via 64-bit
+ *      PCI space (all destination information comes from the PCI address,
+ *      including transfer attributes), and via a "mapped" region that allows 
+ *      a bunch of different small mappings to be established with the PMU.
+ *
+ *      For efficiency, we most prefer to use the 32bit direct mapping facility,
+ *      since it requires no resource allocations. The advantage of using the
+ *      PMU over the 64-bit direct is that single-cycle PCI addressing can be
+ *      used; the advantage of using 64-bit direct over PMU addressing is that
+ *      we do not have to allocate entries in the PMU.
+ */
+
+static dma_addr_t
+pcibr_dmamap_ate32(struct pcidev_info *info,
+		   u64 paddr, size_t req_size, u64 flags, int dma_flags)
+{
+
+	struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info;
+	struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info->
+	    pdi_pcibus_info;
+	u8 internal_device = (PCI_SLOT(pcidev_info->pdi_host_pcidev_info->
+					    pdi_linux_pcidev->devfn)) - 1;
+	int ate_count;
+	int ate_index;
+	u64 ate_flags = flags | PCI32_ATE_V;
+	u64 ate;
+	u64 pci_addr;
+	u64 xio_addr;
+	u64 offset;
+
+	/* PIC in PCI-X mode does not supports 32bit PageMap mode */
+	if (IS_PIC_SOFT(pcibus_info) && IS_PCIX(pcibus_info)) {
+		return 0;
+	}
+
+	/* Calculate the number of ATEs needed. */
+	if (!(MINIMAL_ATE_FLAG(paddr, req_size))) {
+		ate_count = IOPG((IOPGSIZE - 1)	/* worst case start offset */
+				 +req_size	/* max mapping bytes */
+				 - 1) + 1;	/* round UP */
+	} else {		/* assume requested target is page aligned */
+		ate_count = IOPG(req_size	/* max mapping bytes */
+				 - 1) + 1;	/* round UP */
+	}
+
+	/* Get the number of ATEs required. */
+	ate_index = pcibr_ate_alloc(pcibus_info, ate_count);
+	if (ate_index < 0)
+		return 0;
+
+	/* In PCI-X mode, Prefetch not supported */
+	if (IS_PCIX(pcibus_info))
+		ate_flags &= ~(PCI32_ATE_PREF);
+
+	if (SN_DMA_ADDRTYPE(dma_flags == SN_DMA_ADDR_PHYS))
+		xio_addr = IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) :
+	    					      PHYS_TO_TIODMA(paddr);
+	else
+		xio_addr = paddr;
+
+	offset = IOPGOFF(xio_addr);
+	ate = ate_flags | (xio_addr - offset);
+
+	/* If PIC, put the targetid in the ATE */
+	if (IS_PIC_SOFT(pcibus_info)) {
+		ate |= (pcibus_info->pbi_hub_xid << PIC_ATE_TARGETID_SHFT);
+	}
+
+	/*
+	 * If we're mapping for MSI, set the MSI bit in the ATE.  If it's a
+	 * TIOCP based pci bus, we also need to set the PIO bit in the ATE.
+	 */
+	if (dma_flags & SN_DMA_MSI) {
+		ate |= PCI32_ATE_MSI;
+		if (IS_TIOCP_SOFT(pcibus_info))
+			ate |= PCI32_ATE_PIO;
+	}
+
+	ate_write(pcibus_info, ate_index, ate_count, ate);
+
+	/*
+	 * Set up the DMA mapped Address.
+	 */
+	pci_addr = PCI32_MAPPED_BASE + offset + IOPGSIZE * ate_index;
+
+	/*
+	 * If swap was set in device in pcibr_endian_set()
+	 * we need to turn swapping on.
+	 */
+	if (pcibus_info->pbi_devreg[internal_device] & PCIBR_DEV_SWAP_DIR)
+		ATE_SWAP_ON(pci_addr);
+
+
+	return pci_addr;
+}
+
+static dma_addr_t
+pcibr_dmatrans_direct64(struct pcidev_info * info, u64 paddr,
+			u64 dma_attributes, int dma_flags)
+{
+	struct pcibus_info *pcibus_info = (struct pcibus_info *)
+	    ((info->pdi_host_pcidev_info)->pdi_pcibus_info);
+	u64 pci_addr;
+
+	/* Translate to Crosstalk View of Physical Address */
+	if (SN_DMA_ADDRTYPE(dma_flags) == SN_DMA_ADDR_PHYS)
+		pci_addr = IS_PIC_SOFT(pcibus_info) ?
+				PHYS_TO_DMA(paddr) :
+				PHYS_TO_TIODMA(paddr);
+	else
+		pci_addr = paddr;
+	pci_addr |= dma_attributes;
+
+	/* Handle Bus mode */
+	if (IS_PCIX(pcibus_info))
+		pci_addr &= ~PCI64_ATTR_PREF;
+
+	/* Handle Bridge Chipset differences */
+	if (IS_PIC_SOFT(pcibus_info)) {
+		pci_addr |=
+		    ((u64) pcibus_info->
+		     pbi_hub_xid << PIC_PCI64_ATTR_TARG_SHFT);
+	} else
+		pci_addr |= (dma_flags & SN_DMA_MSI) ?
+				TIOCP_PCI64_CMDTYPE_MSI :
+				TIOCP_PCI64_CMDTYPE_MEM;
+
+	/* If PCI mode, func zero uses VCHAN0, every other func uses VCHAN1 */
+	if (!IS_PCIX(pcibus_info) && PCI_FUNC(info->pdi_linux_pcidev->devfn))
+		pci_addr |= PCI64_ATTR_VIRTUAL;
+
+	return pci_addr;
+}
+
+static dma_addr_t
+pcibr_dmatrans_direct32(struct pcidev_info * info,
+			u64 paddr, size_t req_size, u64 flags, int dma_flags)
+{
+	struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info;
+	struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info->
+	    pdi_pcibus_info;
+	u64 xio_addr;
+
+	u64 xio_base;
+	u64 offset;
+	u64 endoff;
+
+	if (IS_PCIX(pcibus_info)) {
+		return 0;
+	}
+
+	if (dma_flags & SN_DMA_MSI)
+		return 0;
+
+	if (SN_DMA_ADDRTYPE(dma_flags) == SN_DMA_ADDR_PHYS)
+		xio_addr = IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) :
+	    					      PHYS_TO_TIODMA(paddr);
+	else
+		xio_addr = paddr;
+
+	xio_base = pcibus_info->pbi_dir_xbase;
+	offset = xio_addr - xio_base;
+	endoff = req_size + offset;
+	if ((req_size > (1ULL << 31)) ||	/* Too Big */
+	    (xio_addr < xio_base) ||	/* Out of range for mappings */
+	    (endoff > (1ULL << 31))) {	/* Too Big */
+		return 0;
+	}
+
+	return PCI32_DIRECT_BASE | offset;
+}
+
+/*
+ * Wrapper routine for freeing DMA maps
+ * DMA mappings for Direct 64 and 32 do not have any DMA maps.
+ */
+void
+pcibr_dma_unmap(struct pci_dev *hwdev, dma_addr_t dma_handle, int direction)
+{
+	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev);
+	struct pcibus_info *pcibus_info =
+	    (struct pcibus_info *)pcidev_info->pdi_pcibus_info;
+
+	if (IS_PCI32_MAPPED(dma_handle)) {
+		int ate_index;
+
+		ate_index =
+		    IOPG((ATE_SWAP_OFF(dma_handle) - PCI32_MAPPED_BASE));
+		pcibr_ate_free(pcibus_info, ate_index);
+	}
+}
+
+/*
+ * On SN systems there is a race condition between a PIO read response and 
+ * DMA's.  In rare cases, the read response may beat the DMA, causing the
+ * driver to think that data in memory is complete and meaningful.  This code
+ * eliminates that race.  This routine is called by the PIO read routines
+ * after doing the read.  For PIC this routine then forces a fake interrupt
+ * on another line, which is logically associated with the slot that the PIO
+ * is addressed to.  It then spins while watching the memory location that
+ * the interrupt is targeted to.  When the interrupt response arrives, we 
+ * are sure that the DMA has landed in memory and it is safe for the driver
+ * to proceed.	For TIOCP use the Device(x) Write Request Buffer Flush 
+ * Bridge register since it ensures the data has entered the coherence domain,
+ * unlike the PIC Device(x) Write Request Buffer Flush register.
+ */
+
+void sn_dma_flush(u64 addr)
+{
+	nasid_t nasid;
+	int is_tio;
+	int wid_num;
+	int i, j;
+	unsigned long flags;
+	u64 itte;
+	struct hubdev_info *hubinfo;
+	struct sn_flush_device_kernel *p;
+	struct sn_flush_device_common *common;
+	struct sn_flush_nasid_entry *flush_nasid_list;
+
+	if (!sn_ioif_inited)
+		return;
+
+	nasid = NASID_GET(addr);
+	if (-1 == nasid_to_cnodeid(nasid))
+		return;
+
+	hubinfo = (NODEPDA(nasid_to_cnodeid(nasid)))->pdinfo;
+
+	BUG_ON(!hubinfo);
+
+	flush_nasid_list = &hubinfo->hdi_flush_nasid_list;
+	if (flush_nasid_list->widget_p == NULL)
+		return;
+
+	is_tio = (nasid & 1);
+	if (is_tio) {
+		int itte_index;
+
+		if (TIO_HWIN(addr))
+			itte_index = 0;
+		else if (TIO_BWIN_WINDOWNUM(addr))
+			itte_index = TIO_BWIN_WINDOWNUM(addr);
+		else
+			itte_index = -1;
+
+		if (itte_index >= 0) {
+			itte = flush_nasid_list->iio_itte[itte_index];
+			if (! TIO_ITTE_VALID(itte))
+				return;
+			wid_num = TIO_ITTE_WIDGET(itte);
+		} else
+			wid_num = TIO_SWIN_WIDGETNUM(addr);
+	} else {
+		if (BWIN_WINDOWNUM(addr)) {
+			itte = flush_nasid_list->iio_itte[BWIN_WINDOWNUM(addr)];
+			wid_num = IIO_ITTE_WIDGET(itte);
+		} else
+			wid_num = SWIN_WIDGETNUM(addr);
+	}
+	if (flush_nasid_list->widget_p[wid_num] == NULL)
+		return;
+	p = &flush_nasid_list->widget_p[wid_num][0];
+
+	/* find a matching BAR */
+	for (i = 0; i < DEV_PER_WIDGET; i++,p++) {
+		common = p->common;
+		for (j = 0; j < PCI_ROM_RESOURCE; j++) {
+			if (common->sfdl_bar_list[j].start == 0)
+				break;
+			if (addr >= common->sfdl_bar_list[j].start
+			    && addr <= common->sfdl_bar_list[j].end)
+				break;
+		}
+		if (j < PCI_ROM_RESOURCE && common->sfdl_bar_list[j].start != 0)
+			break;
+	}
+
+	/* if no matching BAR, return without doing anything. */
+	if (i == DEV_PER_WIDGET)
+		return;
+
+	/*
+	 * For TIOCP use the Device(x) Write Request Buffer Flush Bridge
+	 * register since it ensures the data has entered the coherence
+	 * domain, unlike PIC.
+	 */
+	if (is_tio) {
+		/*
+	 	 * Note:  devices behind TIOCE should never be matched in the
+		 * above code, and so the following code is PIC/CP centric.
+		 * If CE ever needs the sn_dma_flush mechanism, we will have
+		 * to account for that here and in tioce_bus_fixup().
+	 	 */
+		u32 tio_id = HUB_L(TIO_IOSPACE_ADDR(nasid, TIO_NODE_ID));
+		u32 revnum = XWIDGET_PART_REV_NUM(tio_id);
+
+		/* TIOCP BRINGUP WAR (PV907516): Don't write buffer flush reg */
+		if ((1 << XWIDGET_PART_REV_NUM_REV(revnum)) & PV907516) {
+			return;
+		} else {
+			pcireg_wrb_flush_get(common->sfdl_pcibus_info,
+					     (common->sfdl_slot - 1));
+		}
+	} else {
+		spin_lock_irqsave(&p->sfdl_flush_lock, flags);
+		*common->sfdl_flush_addr = 0;
+
+		/* force an interrupt. */
+		*(volatile u32 *)(common->sfdl_force_int_addr) = 1;
+
+		/* wait for the interrupt to come back. */
+		while (*(common->sfdl_flush_addr) != 0x10f)
+			cpu_relax();
+
+		/* okay, everything is synched up. */
+		spin_unlock_irqrestore(&p->sfdl_flush_lock, flags);
+	}
+	return;
+}
+
+/*
+ * DMA interfaces.  Called from pci_dma.c routines.
+ */
+
+dma_addr_t
+pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size, int dma_flags)
+{
+	dma_addr_t dma_handle;
+	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev);
+
+	/* SN cannot support DMA addresses smaller than 32 bits. */
+	if (hwdev->dma_mask < 0x7fffffff) {
+		return 0;
+	}
+
+	if (hwdev->dma_mask == ~0UL) {
+		/*
+		 * Handle the most common case: 64 bit cards.  This
+		 * call should always succeed.
+		 */
+
+		dma_handle = pcibr_dmatrans_direct64(pcidev_info, phys_addr,
+						     PCI64_ATTR_PREF, dma_flags);
+	} else {
+		/* Handle 32-63 bit cards via direct mapping */
+		dma_handle = pcibr_dmatrans_direct32(pcidev_info, phys_addr,
+						     size, 0, dma_flags);
+		if (!dma_handle) {
+			/*
+			 * It is a 32 bit card and we cannot do direct mapping,
+			 * so we use an ATE.
+			 */
+
+			dma_handle = pcibr_dmamap_ate32(pcidev_info, phys_addr,
+							size, PCI32_ATE_PREF,
+							dma_flags);
+		}
+	}
+
+	return dma_handle;
+}
+
+dma_addr_t
+pcibr_dma_map_consistent(struct pci_dev * hwdev, unsigned long phys_addr,
+			 size_t size, int dma_flags)
+{
+	dma_addr_t dma_handle;
+	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev);
+
+	if (hwdev->dev.coherent_dma_mask == ~0UL) {
+		dma_handle = pcibr_dmatrans_direct64(pcidev_info, phys_addr,
+					    PCI64_ATTR_BAR, dma_flags);
+	} else {
+		dma_handle = (dma_addr_t) pcibr_dmamap_ate32(pcidev_info,
+						    phys_addr, size,
+						    PCI32_ATE_BAR, dma_flags);
+	}
+
+	return dma_handle;
+}
+
+EXPORT_SYMBOL(sn_dma_flush);
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
new file mode 100644
index 00000000..3cb5cf37
--- /dev/null
+++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
@@ -0,0 +1,264 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2001-2004, 2006 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/geo.h>
+#include <asm/sn/pcibr_provider.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/pic.h>
+#include <asm/sn/sn2/sn_hwperf.h>
+#include "xtalk/xwidgetdev.h"
+#include "xtalk/hubdev.h"
+
+int
+sal_pcibr_slot_enable(struct pcibus_info *soft, int device, void *resp,
+                      char **ssdt)
+{
+	struct ia64_sal_retval ret_stuff;
+	u64 busnum;
+	u64 segment;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+
+	segment = soft->pbi_buscommon.bs_persist_segment;
+	busnum = soft->pbi_buscommon.bs_persist_busnum;
+	SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_SLOT_ENABLE, segment,
+			busnum, (u64) device, (u64) resp, (u64)ia64_tpa(ssdt),
+			0, 0);
+
+	return (int)ret_stuff.v0;
+}
+
+int
+sal_pcibr_slot_disable(struct pcibus_info *soft, int device, int action,
+		       void *resp)
+{
+	struct ia64_sal_retval ret_stuff;
+	u64 busnum;
+	u64 segment;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+
+	segment = soft->pbi_buscommon.bs_persist_segment;
+	busnum = soft->pbi_buscommon.bs_persist_busnum;
+	SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_SLOT_DISABLE,
+			segment, busnum, (u64) device, (u64) action,
+			(u64) resp, 0, 0);
+
+	return (int)ret_stuff.v0;
+}
+
+static int sal_pcibr_error_interrupt(struct pcibus_info *soft)
+{
+	struct ia64_sal_retval ret_stuff;
+	u64 busnum;
+	int segment;
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+
+	segment = soft->pbi_buscommon.bs_persist_segment;
+	busnum = soft->pbi_buscommon.bs_persist_busnum;
+	SAL_CALL_NOLOCK(ret_stuff,
+			(u64) SN_SAL_IOIF_ERROR_INTERRUPT,
+			(u64) segment, (u64) busnum, 0, 0, 0, 0, 0);
+
+	return (int)ret_stuff.v0;
+}
+
+u16 sn_ioboard_to_pci_bus(struct pci_bus *pci_bus)
+{
+	long rc;
+	u16 uninitialized_var(ioboard);		/* GCC be quiet */
+	nasid_t nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base);
+
+	rc = ia64_sn_sysctl_ioboard_get(nasid, &ioboard);
+	if (rc) {
+		printk(KERN_WARNING "ia64_sn_sysctl_ioboard_get failed: %ld\n",
+		       rc);
+		return 0;
+	}
+
+	return ioboard;
+}
+
+/* 
+ * PCI Bridge Error interrupt handler.  Gets invoked whenever a PCI 
+ * bridge sends an error interrupt.
+ */
+static irqreturn_t
+pcibr_error_intr_handler(int irq, void *arg)
+{
+	struct pcibus_info *soft = arg;
+
+	if (sal_pcibr_error_interrupt(soft) < 0)
+		panic("pcibr_error_intr_handler(): Fatal Bridge Error");
+
+	return IRQ_HANDLED;
+}
+
+void *
+pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller)
+{
+	int nasid, cnode, j;
+	struct hubdev_info *hubdev_info;
+	struct pcibus_info *soft;
+	struct sn_flush_device_kernel *sn_flush_device_kernel;
+	struct sn_flush_device_common *common;
+
+	if (! IS_PCI_BRIDGE_ASIC(prom_bussoft->bs_asic_type)) {
+		return NULL;
+	}
+
+	/*
+	 * Allocate kernel bus soft and copy from prom.
+	 */
+
+	soft = kmalloc(sizeof(struct pcibus_info), GFP_KERNEL);
+	if (!soft) {
+		return NULL;
+	}
+
+	memcpy(soft, prom_bussoft, sizeof(struct pcibus_info));
+	soft->pbi_buscommon.bs_base = (unsigned long)
+		ioremap(REGION_OFFSET(soft->pbi_buscommon.bs_base),
+			sizeof(struct pic));
+
+	spin_lock_init(&soft->pbi_lock);
+
+	/*
+	 * register the bridge's error interrupt handler
+	 */
+	if (request_irq(SGI_PCIASIC_ERROR, pcibr_error_intr_handler,
+			IRQF_SHARED, "PCIBR error", (void *)(soft))) {
+		printk(KERN_WARNING
+		       "pcibr cannot allocate interrupt for error handler\n");
+	}
+	sn_set_err_irq_affinity(SGI_PCIASIC_ERROR);
+
+	/* 
+	 * Update the Bridge with the "kernel" pagesize 
+	 */
+	if (PAGE_SIZE < 16384) {
+		pcireg_control_bit_clr(soft, PCIBR_CTRL_PAGE_SIZE);
+	} else {
+		pcireg_control_bit_set(soft, PCIBR_CTRL_PAGE_SIZE);
+	}
+
+	nasid = NASID_GET(soft->pbi_buscommon.bs_base);
+	cnode = nasid_to_cnodeid(nasid);
+	hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
+
+	if (hubdev_info->hdi_flush_nasid_list.widget_p) {
+		sn_flush_device_kernel = hubdev_info->hdi_flush_nasid_list.
+		    widget_p[(int)soft->pbi_buscommon.bs_xid];
+		if (sn_flush_device_kernel) {
+			for (j = 0; j < DEV_PER_WIDGET;
+			     j++, sn_flush_device_kernel++) {
+				common = sn_flush_device_kernel->common;
+				if (common->sfdl_slot == -1)
+					continue;
+				if ((common->sfdl_persistent_segment ==
+				     soft->pbi_buscommon.bs_persist_segment) &&
+				     (common->sfdl_persistent_busnum ==
+				     soft->pbi_buscommon.bs_persist_busnum))
+					common->sfdl_pcibus_info =
+					    soft;
+			}
+		}
+	}
+
+	/* Setup the PMU ATE map */
+	soft->pbi_int_ate_resource.lowest_free_index = 0;
+	soft->pbi_int_ate_resource.ate =
+	    kzalloc(soft->pbi_int_ate_size * sizeof(u64), GFP_KERNEL);
+
+	if (!soft->pbi_int_ate_resource.ate) {
+		kfree(soft);
+		return NULL;
+	}
+
+	return soft;
+}
+
+void pcibr_force_interrupt(struct sn_irq_info *sn_irq_info)
+{
+	struct pcidev_info *pcidev_info;
+	struct pcibus_info *pcibus_info;
+	int bit = sn_irq_info->irq_int_bit;
+
+	if (! sn_irq_info->irq_bridge)
+		return;
+
+	pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
+	if (pcidev_info) {
+		pcibus_info =
+		    (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info->
+		    pdi_pcibus_info;
+		pcireg_force_intr_set(pcibus_info, bit);
+	}
+}
+
+void pcibr_target_interrupt(struct sn_irq_info *sn_irq_info)
+{
+	struct pcidev_info *pcidev_info;
+	struct pcibus_info *pcibus_info;
+	int bit = sn_irq_info->irq_int_bit;
+	u64 xtalk_addr = sn_irq_info->irq_xtalkaddr;
+
+	pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
+	if (pcidev_info) {
+		pcibus_info =
+		    (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info->
+		    pdi_pcibus_info;
+
+		/* Disable the device's IRQ   */
+		pcireg_intr_enable_bit_clr(pcibus_info, (1 << bit));
+
+		/* Change the device's IRQ    */
+		pcireg_intr_addr_addr_set(pcibus_info, bit, xtalk_addr);
+
+		/* Re-enable the device's IRQ */
+		pcireg_intr_enable_bit_set(pcibus_info, (1 << bit));
+
+		pcibr_force_interrupt(sn_irq_info);
+	}
+}
+
+/*
+ * Provider entries for PIC/CP
+ */
+
+struct sn_pcibus_provider pcibr_provider = {
+	.dma_map = pcibr_dma_map,
+	.dma_map_consistent = pcibr_dma_map_consistent,
+	.dma_unmap = pcibr_dma_unmap,
+	.bus_fixup = pcibr_bus_fixup,
+	.force_interrupt = pcibr_force_interrupt,
+	.target_interrupt = pcibr_target_interrupt
+};
+
+int
+pcibr_init_provider(void)
+{
+	sn_pci_provider[PCIIO_ASIC_TYPE_PIC] = &pcibr_provider;
+	sn_pci_provider[PCIIO_ASIC_TYPE_TIOCP] = &pcibr_provider;
+
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(sal_pcibr_slot_enable);
+EXPORT_SYMBOL_GPL(sal_pcibr_slot_disable);
+EXPORT_SYMBOL_GPL(sn_ioboard_to_pci_bus);
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_reg.c b/arch/ia64/sn/pci/pcibr/pcibr_reg.c
new file mode 100644
index 00000000..8b8bbd51
--- /dev/null
+++ b/arch/ia64/sn/pci/pcibr/pcibr_reg.c
@@ -0,0 +1,285 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <asm/sn/io.h>
+#include <asm/sn/pcibr_provider.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
+#include <asm/sn/pic.h>
+#include <asm/sn/tiocp.h>
+
+union br_ptr {
+	struct tiocp tio;
+	struct pic pic;
+};
+
+/*
+ * Control Register Access -- Read/Write                            0000_0020
+ */
+void pcireg_control_bit_clr(struct pcibus_info *pcibus_info, u64 bits)
+{
+	union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base;
+
+	if (pcibus_info) {
+		switch (pcibus_info->pbi_bridge_type) {
+		case PCIBR_BRIDGETYPE_TIOCP:
+			__sn_clrq_relaxed(&ptr->tio.cp_control, bits);
+			break;
+		case PCIBR_BRIDGETYPE_PIC:
+			__sn_clrq_relaxed(&ptr->pic.p_wid_control, bits);
+			break;
+		default:
+			panic
+			    ("pcireg_control_bit_clr: unknown bridgetype bridge 0x%p",
+			     ptr);
+		}
+	}
+}
+
+void pcireg_control_bit_set(struct pcibus_info *pcibus_info, u64 bits)
+{
+	union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base;
+
+	if (pcibus_info) {
+		switch (pcibus_info->pbi_bridge_type) {
+		case PCIBR_BRIDGETYPE_TIOCP:
+			__sn_setq_relaxed(&ptr->tio.cp_control, bits);
+			break;
+		case PCIBR_BRIDGETYPE_PIC:
+			__sn_setq_relaxed(&ptr->pic.p_wid_control, bits);
+			break;
+		default:
+			panic
+			    ("pcireg_control_bit_set: unknown bridgetype bridge 0x%p",
+			     ptr);
+		}
+	}
+}
+
+/*
+ * PCI/PCIX Target Flush Register Access -- Read Only		    0000_0050
+ */
+u64 pcireg_tflush_get(struct pcibus_info *pcibus_info)
+{
+	union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base;
+	u64 ret = 0;
+
+	if (pcibus_info) {
+		switch (pcibus_info->pbi_bridge_type) {
+		case PCIBR_BRIDGETYPE_TIOCP:
+			ret = __sn_readq_relaxed(&ptr->tio.cp_tflush);
+			break;
+		case PCIBR_BRIDGETYPE_PIC:
+			ret = __sn_readq_relaxed(&ptr->pic.p_wid_tflush);
+			break;
+		default:
+			panic
+			    ("pcireg_tflush_get: unknown bridgetype bridge 0x%p",
+			     ptr);
+		}
+	}
+
+	/* Read of the Target Flush should always return zero */
+	if (ret != 0)
+		panic("pcireg_tflush_get:Target Flush failed\n");
+
+	return ret;
+}
+
+/*
+ * Interrupt Status Register Access -- Read Only		    0000_0100
+ */
+u64 pcireg_intr_status_get(struct pcibus_info * pcibus_info)
+{
+	union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base;
+	u64 ret = 0;
+
+	if (pcibus_info) {
+		switch (pcibus_info->pbi_bridge_type) {
+		case PCIBR_BRIDGETYPE_TIOCP:
+			ret = __sn_readq_relaxed(&ptr->tio.cp_int_status);
+			break;
+		case PCIBR_BRIDGETYPE_PIC:
+			ret = __sn_readq_relaxed(&ptr->pic.p_int_status);
+			break;
+		default:
+			panic
+			    ("pcireg_intr_status_get: unknown bridgetype bridge 0x%p",
+			     ptr);
+		}
+	}
+	return ret;
+}
+
+/*
+ * Interrupt Enable Register Access -- Read/Write                   0000_0108
+ */
+void pcireg_intr_enable_bit_clr(struct pcibus_info *pcibus_info, u64 bits)
+{
+	union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base;
+
+	if (pcibus_info) {
+		switch (pcibus_info->pbi_bridge_type) {
+		case PCIBR_BRIDGETYPE_TIOCP:
+			__sn_clrq_relaxed(&ptr->tio.cp_int_enable, bits);
+			break;
+		case PCIBR_BRIDGETYPE_PIC:
+			__sn_clrq_relaxed(&ptr->pic.p_int_enable, bits);
+			break;
+		default:
+			panic
+			    ("pcireg_intr_enable_bit_clr: unknown bridgetype bridge 0x%p",
+			     ptr);
+		}
+	}
+}
+
+void pcireg_intr_enable_bit_set(struct pcibus_info *pcibus_info, u64 bits)
+{
+	union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base;
+
+	if (pcibus_info) {
+		switch (pcibus_info->pbi_bridge_type) {
+		case PCIBR_BRIDGETYPE_TIOCP:
+			__sn_setq_relaxed(&ptr->tio.cp_int_enable, bits);
+			break;
+		case PCIBR_BRIDGETYPE_PIC:
+			__sn_setq_relaxed(&ptr->pic.p_int_enable, bits);
+			break;
+		default:
+			panic
+			    ("pcireg_intr_enable_bit_set: unknown bridgetype bridge 0x%p",
+			     ptr);
+		}
+	}
+}
+
+/*
+ * Intr Host Address Register (int_addr) -- Read/Write  0000_0130 - 0000_0168
+ */
+void pcireg_intr_addr_addr_set(struct pcibus_info *pcibus_info, int int_n,
+			       u64 addr)
+{
+	union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base;
+
+	if (pcibus_info) {
+		switch (pcibus_info->pbi_bridge_type) {
+		case PCIBR_BRIDGETYPE_TIOCP:
+			__sn_clrq_relaxed(&ptr->tio.cp_int_addr[int_n],
+			    TIOCP_HOST_INTR_ADDR);
+			__sn_setq_relaxed(&ptr->tio.cp_int_addr[int_n],
+			    (addr & TIOCP_HOST_INTR_ADDR));
+			break;
+		case PCIBR_BRIDGETYPE_PIC:
+			__sn_clrq_relaxed(&ptr->pic.p_int_addr[int_n],
+			    PIC_HOST_INTR_ADDR);
+			__sn_setq_relaxed(&ptr->pic.p_int_addr[int_n],
+			    (addr & PIC_HOST_INTR_ADDR));
+			break;
+		default:
+			panic
+			    ("pcireg_intr_addr_addr_get: unknown bridgetype bridge 0x%p",
+			     ptr);
+		}
+	}
+}
+
+/*
+ * Force Interrupt Register Access -- Write Only	0000_01C0 - 0000_01F8
+ */
+void pcireg_force_intr_set(struct pcibus_info *pcibus_info, int int_n)
+{
+	union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base;
+
+	if (pcibus_info) {
+		switch (pcibus_info->pbi_bridge_type) {
+		case PCIBR_BRIDGETYPE_TIOCP:
+			writeq(1, &ptr->tio.cp_force_pin[int_n]);
+			break;
+		case PCIBR_BRIDGETYPE_PIC:
+			writeq(1, &ptr->pic.p_force_pin[int_n]);
+			break;
+		default:
+			panic
+			    ("pcireg_force_intr_set: unknown bridgetype bridge 0x%p",
+			     ptr);
+		}
+	}
+}
+
+/*
+ * Device(x) Write Buffer Flush Reg Access -- Read Only 0000_0240 - 0000_0258
+ */
+u64 pcireg_wrb_flush_get(struct pcibus_info *pcibus_info, int device)
+{
+	union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base;
+	u64 ret = 0;
+
+	if (pcibus_info) {
+		switch (pcibus_info->pbi_bridge_type) {
+		case PCIBR_BRIDGETYPE_TIOCP:
+			ret =
+			    __sn_readq_relaxed(&ptr->tio.cp_wr_req_buf[device]);
+			break;
+		case PCIBR_BRIDGETYPE_PIC:
+			ret =
+			    __sn_readq_relaxed(&ptr->pic.p_wr_req_buf[device]);
+			break;
+		default:
+		      panic("pcireg_wrb_flush_get: unknown bridgetype bridge 0x%p", ptr);
+		}
+
+	}
+	/* Read of the Write Buffer Flush should always return zero */
+	return ret;
+}
+
+void pcireg_int_ate_set(struct pcibus_info *pcibus_info, int ate_index,
+			u64 val)
+{
+	union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base;
+
+	if (pcibus_info) {
+		switch (pcibus_info->pbi_bridge_type) {
+		case PCIBR_BRIDGETYPE_TIOCP:
+			writeq(val, &ptr->tio.cp_int_ate_ram[ate_index]);
+			break;
+		case PCIBR_BRIDGETYPE_PIC:
+			writeq(val, &ptr->pic.p_int_ate_ram[ate_index]);
+			break;
+		default:
+			panic
+			    ("pcireg_int_ate_set: unknown bridgetype bridge 0x%p",
+			     ptr);
+		}
+	}
+}
+
+u64 __iomem *pcireg_int_ate_addr(struct pcibus_info *pcibus_info, int ate_index)
+{
+	union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base;
+	u64 __iomem *ret = NULL;
+
+	if (pcibus_info) {
+		switch (pcibus_info->pbi_bridge_type) {
+		case PCIBR_BRIDGETYPE_TIOCP:
+			ret = &ptr->tio.cp_int_ate_ram[ate_index];
+			break;
+		case PCIBR_BRIDGETYPE_PIC:
+			ret = &ptr->pic.p_int_ate_ram[ate_index];
+			break;
+		default:
+			panic
+			    ("pcireg_int_ate_addr: unknown bridgetype bridge 0x%p",
+			     ptr);
+		}
+	}
+	return ret;
+}
diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c
new file mode 100644
index 00000000..9c271be9
--- /dev/null
+++ b/arch/ia64/sn/pci/tioca_provider.c
@@ -0,0 +1,675 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2003-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/bitmap.h>
+#include <linux/slab.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/io.h>
+#include <asm/sn/pcidev.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/tioca_provider.h>
+
+u32 tioca_gart_found;
+EXPORT_SYMBOL(tioca_gart_found);	/* used by agp-sgi */
+
+LIST_HEAD(tioca_list);
+EXPORT_SYMBOL(tioca_list);	/* used by agp-sgi */
+
+static int tioca_gart_init(struct tioca_kernel *);
+
+/**
+ * tioca_gart_init - Initialize SGI TIOCA GART
+ * @tioca_common: ptr to common prom/kernel struct identifying the 
+ *
+ * If the indicated tioca has devices present, initialize its associated
+ * GART MMR's and kernel memory.
+ */
+static int
+tioca_gart_init(struct tioca_kernel *tioca_kern)
+{
+	u64 ap_reg;
+	u64 offset;
+	struct page *tmp;
+	struct tioca_common *tioca_common;
+	struct tioca __iomem *ca_base;
+
+	tioca_common = tioca_kern->ca_common;
+	ca_base = (struct tioca __iomem *)tioca_common->ca_common.bs_base;
+
+	if (list_empty(tioca_kern->ca_devices))
+		return 0;
+
+	ap_reg = 0;
+
+	/*
+	 * Validate aperature size
+	 */
+
+	switch (CA_APERATURE_SIZE >> 20) {
+	case 4:
+		ap_reg |= (0x3ff << CA_GART_AP_SIZE_SHFT);	/* 4MB */
+		break;
+	case 8:
+		ap_reg |= (0x3fe << CA_GART_AP_SIZE_SHFT);	/* 8MB */
+		break;
+	case 16:
+		ap_reg |= (0x3fc << CA_GART_AP_SIZE_SHFT);	/* 16MB */
+		break;
+	case 32:
+		ap_reg |= (0x3f8 << CA_GART_AP_SIZE_SHFT);	/* 32 MB */
+		break;
+	case 64:
+		ap_reg |= (0x3f0 << CA_GART_AP_SIZE_SHFT);	/* 64 MB */
+		break;
+	case 128:
+		ap_reg |= (0x3e0 << CA_GART_AP_SIZE_SHFT);	/* 128 MB */
+		break;
+	case 256:
+		ap_reg |= (0x3c0 << CA_GART_AP_SIZE_SHFT);	/* 256 MB */
+		break;
+	case 512:
+		ap_reg |= (0x380 << CA_GART_AP_SIZE_SHFT);	/* 512 MB */
+		break;
+	case 1024:
+		ap_reg |= (0x300 << CA_GART_AP_SIZE_SHFT);	/* 1GB */
+		break;
+	case 2048:
+		ap_reg |= (0x200 << CA_GART_AP_SIZE_SHFT);	/* 2GB */
+		break;
+	case 4096:
+		ap_reg |= (0x000 << CA_GART_AP_SIZE_SHFT);	/* 4 GB */
+		break;
+	default:
+		printk(KERN_ERR "%s:  Invalid CA_APERATURE_SIZE "
+		       "0x%lx\n", __func__, (ulong) CA_APERATURE_SIZE);
+		return -1;
+	}
+
+	/*
+	 * Set up other aperature parameters
+	 */
+
+	if (PAGE_SIZE >= 16384) {
+		tioca_kern->ca_ap_pagesize = 16384;
+		ap_reg |= CA_GART_PAGE_SIZE;
+	} else {
+		tioca_kern->ca_ap_pagesize = 4096;
+	}
+
+	tioca_kern->ca_ap_size = CA_APERATURE_SIZE;
+	tioca_kern->ca_ap_bus_base = CA_APERATURE_BASE;
+	tioca_kern->ca_gart_entries =
+	    tioca_kern->ca_ap_size / tioca_kern->ca_ap_pagesize;
+
+	ap_reg |= (CA_GART_AP_ENB_AGP | CA_GART_AP_ENB_PCI);
+	ap_reg |= tioca_kern->ca_ap_bus_base;
+
+	/*
+	 * Allocate and set up the GART
+	 */
+
+	tioca_kern->ca_gart_size = tioca_kern->ca_gart_entries * sizeof(u64);
+	tmp =
+	    alloc_pages_node(tioca_kern->ca_closest_node,
+			     GFP_KERNEL | __GFP_ZERO,
+			     get_order(tioca_kern->ca_gart_size));
+
+	if (!tmp) {
+		printk(KERN_ERR "%s:  Could not allocate "
+		       "%llu bytes (order %d) for GART\n",
+		       __func__,
+		       tioca_kern->ca_gart_size,
+		       get_order(tioca_kern->ca_gart_size));
+		return -ENOMEM;
+	}
+
+	tioca_kern->ca_gart = page_address(tmp);
+	tioca_kern->ca_gart_coretalk_addr =
+	    PHYS_TO_TIODMA(virt_to_phys(tioca_kern->ca_gart));
+
+	/*
+	 * Compute PCI/AGP convenience fields 
+	 */
+
+	offset = CA_PCI32_MAPPED_BASE - CA_APERATURE_BASE;
+	tioca_kern->ca_pciap_base = CA_PCI32_MAPPED_BASE;
+	tioca_kern->ca_pciap_size = CA_PCI32_MAPPED_SIZE;
+	tioca_kern->ca_pcigart_start = offset / tioca_kern->ca_ap_pagesize;
+	tioca_kern->ca_pcigart_base =
+	    tioca_kern->ca_gart_coretalk_addr + offset;
+	tioca_kern->ca_pcigart =
+	    &tioca_kern->ca_gart[tioca_kern->ca_pcigart_start];
+	tioca_kern->ca_pcigart_entries =
+	    tioca_kern->ca_pciap_size / tioca_kern->ca_ap_pagesize;
+	tioca_kern->ca_pcigart_pagemap =
+	    kzalloc(tioca_kern->ca_pcigart_entries / 8, GFP_KERNEL);
+	if (!tioca_kern->ca_pcigart_pagemap) {
+		free_pages((unsigned long)tioca_kern->ca_gart,
+			   get_order(tioca_kern->ca_gart_size));
+		return -1;
+	}
+
+	offset = CA_AGP_MAPPED_BASE - CA_APERATURE_BASE;
+	tioca_kern->ca_gfxap_base = CA_AGP_MAPPED_BASE;
+	tioca_kern->ca_gfxap_size = CA_AGP_MAPPED_SIZE;
+	tioca_kern->ca_gfxgart_start = offset / tioca_kern->ca_ap_pagesize;
+	tioca_kern->ca_gfxgart_base =
+	    tioca_kern->ca_gart_coretalk_addr + offset;
+	tioca_kern->ca_gfxgart =
+	    &tioca_kern->ca_gart[tioca_kern->ca_gfxgart_start];
+	tioca_kern->ca_gfxgart_entries =
+	    tioca_kern->ca_gfxap_size / tioca_kern->ca_ap_pagesize;
+
+	/*
+	 * various control settings:
+	 *      use agp op-combining
+	 *      use GET semantics to fetch memory
+	 *      participate in coherency domain
+	 * 	DISABLE GART PREFETCHING due to hw bug tracked in SGI PV930029
+	 */
+
+	__sn_setq_relaxed(&ca_base->ca_control1,
+			CA_AGPDMA_OP_ENB_COMBDELAY);	/* PV895469 ? */
+	__sn_clrq_relaxed(&ca_base->ca_control2, CA_GART_MEM_PARAM);
+	__sn_setq_relaxed(&ca_base->ca_control2,
+			(0x2ull << CA_GART_MEM_PARAM_SHFT));
+	tioca_kern->ca_gart_iscoherent = 1;
+	__sn_clrq_relaxed(&ca_base->ca_control2,
+	    		(CA_GART_WR_PREFETCH_ENB | CA_GART_RD_PREFETCH_ENB));
+
+	/*
+	 * Unmask GART fetch error interrupts.  Clear residual errors first.
+	 */
+
+	writeq(CA_GART_FETCH_ERR, &ca_base->ca_int_status_alias);
+	writeq(CA_GART_FETCH_ERR, &ca_base->ca_mult_error_alias);
+	__sn_clrq_relaxed(&ca_base->ca_int_mask, CA_GART_FETCH_ERR);
+
+	/*
+	 * Program the aperature and gart registers in TIOCA
+	 */
+
+	writeq(ap_reg, &ca_base->ca_gart_aperature);
+	writeq(tioca_kern->ca_gart_coretalk_addr|1, &ca_base->ca_gart_ptr_table);
+
+	return 0;
+}
+
+/**
+ * tioca_fastwrite_enable - enable AGP FW for a tioca and its functions
+ * @tioca_kernel: structure representing the CA
+ *
+ * Given a CA, scan all attached functions making sure they all support
+ * FastWrite.  If so, enable FastWrite for all functions and the CA itself.
+ */
+
+void
+tioca_fastwrite_enable(struct tioca_kernel *tioca_kern)
+{
+	int cap_ptr;
+	u32 reg;
+	struct tioca __iomem *tioca_base;
+	struct pci_dev *pdev;
+	struct tioca_common *common;
+
+	common = tioca_kern->ca_common;
+
+	/*
+	 * Scan all vga controllers on this bus making sure they all
+	 * support FW.  If not, return.
+	 */
+
+	list_for_each_entry(pdev, tioca_kern->ca_devices, bus_list) {
+		if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8))
+			continue;
+
+		cap_ptr = pci_find_capability(pdev, PCI_CAP_ID_AGP);
+		if (!cap_ptr)
+			return;	/* no AGP CAP means no FW */
+
+		pci_read_config_dword(pdev, cap_ptr + PCI_AGP_STATUS, &reg);
+		if (!(reg & PCI_AGP_STATUS_FW))
+			return;	/* function doesn't support FW */
+	}
+
+	/*
+	 * Set fw for all vga fn's
+	 */
+
+	list_for_each_entry(pdev, tioca_kern->ca_devices, bus_list) {
+		if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8))
+			continue;
+
+		cap_ptr = pci_find_capability(pdev, PCI_CAP_ID_AGP);
+		pci_read_config_dword(pdev, cap_ptr + PCI_AGP_COMMAND, &reg);
+		reg |= PCI_AGP_COMMAND_FW;
+		pci_write_config_dword(pdev, cap_ptr + PCI_AGP_COMMAND, reg);
+	}
+
+	/*
+	 * Set ca's fw to match
+	 */
+
+	tioca_base = (struct tioca __iomem*)common->ca_common.bs_base;
+	__sn_setq_relaxed(&tioca_base->ca_control1, CA_AGP_FW_ENABLE);
+}
+
+EXPORT_SYMBOL(tioca_fastwrite_enable);	/* used by agp-sgi */
+
+/**
+ * tioca_dma_d64 - create a DMA mapping using 64-bit direct mode
+ * @paddr: system physical address
+ *
+ * Map @paddr into 64-bit CA bus space.  No device context is necessary.
+ * Bits 53:0 come from the coretalk address.  We just need to mask in the
+ * following optional bits of the 64-bit pci address:
+ *
+ * 63:60 - Coretalk Packet Type -  0x1 for Mem Get/Put (coherent)
+ *                                 0x2 for PIO (non-coherent)
+ *                                 We will always use 0x1
+ * 55:55 - Swap bytes		   Currently unused
+ */
+static u64
+tioca_dma_d64(unsigned long paddr)
+{
+	dma_addr_t bus_addr;
+
+	bus_addr = PHYS_TO_TIODMA(paddr);
+
+	BUG_ON(!bus_addr);
+	BUG_ON(bus_addr >> 54);
+
+	/* Set upper nibble to Cache Coherent Memory op */
+	bus_addr |= (1UL << 60);
+
+	return bus_addr;
+}
+
+/**
+ * tioca_dma_d48 - create a DMA mapping using 48-bit direct mode
+ * @pdev: linux pci_dev representing the function
+ * @paddr: system physical address
+ *
+ * Map @paddr into 64-bit bus space of the CA associated with @pcidev_info.
+ *
+ * The CA agp 48 bit direct address falls out as follows:
+ *
+ * When direct mapping AGP addresses, the 48 bit AGP address is
+ * constructed as follows:
+ *
+ * [47:40] - Low 8 bits of the page Node ID extracted from coretalk
+ *              address [47:40].  The upper 8 node bits are fixed
+ *              and come from the xxx register bits [5:0]
+ * [39:38] - Chiplet ID extracted from coretalk address [39:38]
+ * [37:00] - node offset extracted from coretalk address [37:00]
+ * 
+ * Since the node id in general will be non-zero, and the chiplet id
+ * will always be non-zero, it follows that the device must support
+ * a dma mask of at least 0xffffffffff (40 bits) to target node 0
+ * and in general should be 0xffffffffffff (48 bits) to target nodes
+ * up to 255.  Nodes above 255 need the support of the xxx register,
+ * and so a given CA can only directly target nodes in the range
+ * xxx - xxx+255.
+ */
+static u64
+tioca_dma_d48(struct pci_dev *pdev, u64 paddr)
+{
+	struct tioca_common *tioca_common;
+	struct tioca __iomem *ca_base;
+	u64 ct_addr;
+	dma_addr_t bus_addr;
+	u32 node_upper;
+	u64 agp_dma_extn;
+	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev);
+
+	tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info;
+	ca_base = (struct tioca __iomem *)tioca_common->ca_common.bs_base;
+
+	ct_addr = PHYS_TO_TIODMA(paddr);
+	if (!ct_addr)
+		return 0;
+
+	bus_addr = (dma_addr_t) (ct_addr & 0xffffffffffffUL);
+	node_upper = ct_addr >> 48;
+
+	if (node_upper > 64) {
+		printk(KERN_ERR "%s:  coretalk addr 0x%p node id out "
+		       "of range\n", __func__, (void *)ct_addr);
+		return 0;
+	}
+
+	agp_dma_extn = __sn_readq_relaxed(&ca_base->ca_agp_dma_addr_extn);
+	if (node_upper != (agp_dma_extn >> CA_AGP_DMA_NODE_ID_SHFT)) {
+		printk(KERN_ERR "%s:  coretalk upper node (%u) "
+		       "mismatch with ca_agp_dma_addr_extn (%llu)\n",
+		       __func__,
+		       node_upper, (agp_dma_extn >> CA_AGP_DMA_NODE_ID_SHFT));
+		return 0;
+	}
+
+	return bus_addr;
+}
+
+/**
+ * tioca_dma_mapped - create a DMA mapping using a CA GART 
+ * @pdev: linux pci_dev representing the function
+ * @paddr: host physical address to map
+ * @req_size: len (bytes) to map
+ *
+ * Map @paddr into CA address space using the GART mechanism.  The mapped
+ * dma_addr_t is guaranteed to be contiguous in CA bus space.
+ */
+static dma_addr_t
+tioca_dma_mapped(struct pci_dev *pdev, unsigned long paddr, size_t req_size)
+{
+	int ps, ps_shift, entry, entries, mapsize;
+	u64 xio_addr, end_xio_addr;
+	struct tioca_common *tioca_common;
+	struct tioca_kernel *tioca_kern;
+	dma_addr_t bus_addr = 0;
+	struct tioca_dmamap *ca_dmamap;
+	void *map;
+	unsigned long flags;
+	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev);
+
+	tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info;
+	tioca_kern = (struct tioca_kernel *)tioca_common->ca_kernel_private;
+
+	xio_addr = PHYS_TO_TIODMA(paddr);
+	if (!xio_addr)
+		return 0;
+
+	spin_lock_irqsave(&tioca_kern->ca_lock, flags);
+
+	/*
+	 * allocate a map struct
+	 */
+
+	ca_dmamap = kzalloc(sizeof(struct tioca_dmamap), GFP_ATOMIC);
+	if (!ca_dmamap)
+		goto map_return;
+
+	/*
+	 * Locate free entries that can hold req_size.  Account for
+	 * unaligned start/length when allocating.
+	 */
+
+	ps = tioca_kern->ca_ap_pagesize;	/* will be power of 2 */
+	ps_shift = ffs(ps) - 1;
+	end_xio_addr = xio_addr + req_size - 1;
+
+	entries = (end_xio_addr >> ps_shift) - (xio_addr >> ps_shift) + 1;
+
+	map = tioca_kern->ca_pcigart_pagemap;
+	mapsize = tioca_kern->ca_pcigart_entries;
+
+	entry = bitmap_find_next_zero_area(map, mapsize, 0, entries, 0);
+	if (entry >= mapsize) {
+		kfree(ca_dmamap);
+		goto map_return;
+	}
+
+	bitmap_set(map, entry, entries);
+
+	bus_addr = tioca_kern->ca_pciap_base + (entry * ps);
+
+	ca_dmamap->cad_dma_addr = bus_addr;
+	ca_dmamap->cad_gart_size = entries;
+	ca_dmamap->cad_gart_entry = entry;
+	list_add(&ca_dmamap->cad_list, &tioca_kern->ca_dmamaps);
+
+	if (xio_addr % ps) {
+		tioca_kern->ca_pcigart[entry] = tioca_paddr_to_gart(xio_addr);
+		bus_addr += xio_addr & (ps - 1);
+		xio_addr &= ~(ps - 1);
+		xio_addr += ps;
+		entry++;
+	}
+
+	while (xio_addr < end_xio_addr) {
+		tioca_kern->ca_pcigart[entry] = tioca_paddr_to_gart(xio_addr);
+		xio_addr += ps;
+		entry++;
+	}
+
+	tioca_tlbflush(tioca_kern);
+
+map_return:
+	spin_unlock_irqrestore(&tioca_kern->ca_lock, flags);
+	return bus_addr;
+}
+
+/**
+ * tioca_dma_unmap - release CA mapping resources
+ * @pdev: linux pci_dev representing the function
+ * @bus_addr: bus address returned by an earlier tioca_dma_map
+ * @dir: mapping direction (unused)
+ *
+ * Locate mapping resources associated with @bus_addr and release them.
+ * For mappings created using the direct modes (64 or 48) there are no
+ * resources to release.
+ */
+static void
+tioca_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
+{
+	int i, entry;
+	struct tioca_common *tioca_common;
+	struct tioca_kernel *tioca_kern;
+	struct tioca_dmamap *map;
+	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev);
+	unsigned long flags;
+
+	tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info;
+	tioca_kern = (struct tioca_kernel *)tioca_common->ca_kernel_private;
+
+	/* return straight away if this isn't be a mapped address */
+
+	if (bus_addr < tioca_kern->ca_pciap_base ||
+	    bus_addr >= (tioca_kern->ca_pciap_base + tioca_kern->ca_pciap_size))
+		return;
+
+	spin_lock_irqsave(&tioca_kern->ca_lock, flags);
+
+	list_for_each_entry(map, &tioca_kern->ca_dmamaps, cad_list)
+	    if (map->cad_dma_addr == bus_addr)
+		break;
+
+	BUG_ON(map == NULL);
+
+	entry = map->cad_gart_entry;
+
+	for (i = 0; i < map->cad_gart_size; i++, entry++) {
+		clear_bit(entry, tioca_kern->ca_pcigart_pagemap);
+		tioca_kern->ca_pcigart[entry] = 0;
+	}
+	tioca_tlbflush(tioca_kern);
+
+	list_del(&map->cad_list);
+	spin_unlock_irqrestore(&tioca_kern->ca_lock, flags);
+	kfree(map);
+}
+
+/**
+ * tioca_dma_map - map pages for PCI DMA
+ * @pdev: linux pci_dev representing the function
+ * @paddr: host physical address to map
+ * @byte_count: bytes to map
+ *
+ * This is the main wrapper for mapping host physical pages to CA PCI space.
+ * The mapping mode used is based on the devices dma_mask.  As a last resort
+ * use the GART mapped mode.
+ */
+static u64
+tioca_dma_map(struct pci_dev *pdev, unsigned long paddr, size_t byte_count, int dma_flags)
+{
+	u64 mapaddr;
+
+	/*
+	 * Not supported for now ...
+	 */
+	if (dma_flags & SN_DMA_MSI)
+		return 0;
+
+	/*
+	 * If card is 64 or 48 bit addressable, use a direct mapping.  32
+	 * bit direct is so restrictive w.r.t. where the memory resides that
+	 * we don't use it even though CA has some support.
+	 */
+
+	if (pdev->dma_mask == ~0UL)
+		mapaddr = tioca_dma_d64(paddr);
+	else if (pdev->dma_mask == 0xffffffffffffUL)
+		mapaddr = tioca_dma_d48(pdev, paddr);
+	else
+		mapaddr = 0;
+
+	/* Last resort ... use PCI portion of CA GART */
+
+	if (mapaddr == 0)
+		mapaddr = tioca_dma_mapped(pdev, paddr, byte_count);
+
+	return mapaddr;
+}
+
+/**
+ * tioca_error_intr_handler - SGI TIO CA error interrupt handler
+ * @irq: unused
+ * @arg: pointer to tioca_common struct for the given CA
+ *
+ * Handle a CA error interrupt.  Simply a wrapper around a SAL call which
+ * defers processing to the SGI prom.
+ */
+static irqreturn_t
+tioca_error_intr_handler(int irq, void *arg)
+{
+	struct tioca_common *soft = arg;
+	struct ia64_sal_retval ret_stuff;
+	u64 segment;
+	u64 busnum;
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+
+	segment = soft->ca_common.bs_persist_segment;
+	busnum = soft->ca_common.bs_persist_busnum;
+
+	SAL_CALL_NOLOCK(ret_stuff,
+			(u64) SN_SAL_IOIF_ERROR_INTERRUPT,
+			segment, busnum, 0, 0, 0, 0, 0);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * tioca_bus_fixup - perform final PCI fixup for a TIO CA bus
+ * @prom_bussoft: Common prom/kernel struct representing the bus
+ *
+ * Replicates the tioca_common pointed to by @prom_bussoft in kernel
+ * space.  Allocates and initializes a kernel-only area for a given CA,
+ * and sets up an irq for handling CA error interrupts.
+ *
+ * On successful setup, returns the kernel version of tioca_common back to
+ * the caller.
+ */
+static void *
+tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller)
+{
+	struct tioca_common *tioca_common;
+	struct tioca_kernel *tioca_kern;
+	struct pci_bus *bus;
+
+	/* sanity check prom rev */
+
+	if (is_shub1() && sn_sal_rev() < 0x0406) {
+		printk
+		    (KERN_ERR "%s:  SGI prom rev 4.06 or greater required "
+		     "for tioca support\n", __func__);
+		return NULL;
+	}
+
+	/*
+	 * Allocate kernel bus soft and copy from prom.
+	 */
+
+	tioca_common = kzalloc(sizeof(struct tioca_common), GFP_KERNEL);
+	if (!tioca_common)
+		return NULL;
+
+	memcpy(tioca_common, prom_bussoft, sizeof(struct tioca_common));
+	tioca_common->ca_common.bs_base = (unsigned long)
+		ioremap(REGION_OFFSET(tioca_common->ca_common.bs_base),
+			sizeof(struct tioca_common));
+
+	/* init kernel-private area */
+
+	tioca_kern = kzalloc(sizeof(struct tioca_kernel), GFP_KERNEL);
+	if (!tioca_kern) {
+		kfree(tioca_common);
+		return NULL;
+	}
+
+	tioca_kern->ca_common = tioca_common;
+	spin_lock_init(&tioca_kern->ca_lock);
+	INIT_LIST_HEAD(&tioca_kern->ca_dmamaps);
+	tioca_kern->ca_closest_node =
+	    nasid_to_cnodeid(tioca_common->ca_closest_nasid);
+	tioca_common->ca_kernel_private = (u64) tioca_kern;
+
+	bus = pci_find_bus(tioca_common->ca_common.bs_persist_segment,
+		tioca_common->ca_common.bs_persist_busnum);
+	BUG_ON(!bus);
+	tioca_kern->ca_devices = &bus->devices;
+
+	/* init GART */
+
+	if (tioca_gart_init(tioca_kern) < 0) {
+		kfree(tioca_kern);
+		kfree(tioca_common);
+		return NULL;
+	}
+
+	tioca_gart_found++;
+	list_add(&tioca_kern->ca_list, &tioca_list);
+
+	if (request_irq(SGI_TIOCA_ERROR,
+			tioca_error_intr_handler,
+			IRQF_SHARED, "TIOCA error", (void *)tioca_common))
+		printk(KERN_WARNING
+		       "%s:  Unable to get irq %d.  "
+		       "Error interrupts won't be routed for TIOCA bus %d\n",
+		       __func__, SGI_TIOCA_ERROR,
+		       (int)tioca_common->ca_common.bs_persist_busnum);
+
+	sn_set_err_irq_affinity(SGI_TIOCA_ERROR);
+
+	/* Setup locality information */
+	controller->node = tioca_kern->ca_closest_node;
+	return tioca_common;
+}
+
+static struct sn_pcibus_provider tioca_pci_interfaces = {
+	.dma_map = tioca_dma_map,
+	.dma_map_consistent = tioca_dma_map,
+	.dma_unmap = tioca_dma_unmap,
+	.bus_fixup = tioca_bus_fixup,
+	.force_interrupt = NULL,
+	.target_interrupt = NULL
+};
+
+/**
+ * tioca_init_provider - init SN PCI provider ops for TIO CA
+ */
+int
+tioca_init_provider(void)
+{
+	sn_pci_provider[PCIIO_ASIC_TYPE_TIOCA] = &tioca_pci_interfaces;
+	return 0;
+}
diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c
new file mode 100644
index 00000000..27faba03
--- /dev/null
+++ b/arch/ia64/sn/pci/tioce_provider.c
@@ -0,0 +1,1061 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2003-2006 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/io.h>
+#include <asm/sn/pcidev.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/tioce_provider.h>
+
+/*
+ * 1/26/2006
+ *
+ * WAR for SGI PV 944642.  For revA TIOCE, need to use the following recipe
+ * (taken from the above PV) before and after accessing tioce internal MMR's
+ * to avoid tioce lockups.
+ *
+ * The recipe as taken from the PV:
+ *
+ *	if(mmr address < 0x45000) {
+ *		if(mmr address == 0 or 0x80)
+ *			mmr wrt or read address 0xc0
+ *		else if(mmr address == 0x148 or 0x200)
+ *			mmr wrt or read address 0x28
+ *		else
+ *			mmr wrt or read address 0x158
+ *
+ *		do desired mmr access (rd or wrt)
+ *
+ *		if(mmr address == 0x100)
+ *			mmr wrt or read address 0x38
+ *		mmr wrt or read address 0xb050
+ *	} else
+ *		do desired mmr access
+ *
+ * According to hw, we can use reads instead of writes to the above address
+ *
+ * Note this WAR can only to be used for accessing internal MMR's in the
+ * TIOCE Coretalk Address Range 0x0 - 0x07ff_ffff.  This includes the
+ * "Local CE Registers and Memories" and "PCI Compatible Config Space" address
+ * spaces from table 2-1 of the "CE Programmer's Reference Overview" document.
+ *
+ * All registers defined in struct tioce will meet that criteria.
+ */
+
+static void inline
+tioce_mmr_war_pre(struct tioce_kernel *kern, void __iomem *mmr_addr)
+{
+	u64 mmr_base;
+	u64 mmr_offset;
+
+	if (kern->ce_common->ce_rev != TIOCE_REV_A)
+		return;
+
+	mmr_base = kern->ce_common->ce_pcibus.bs_base;
+	mmr_offset = (unsigned long)mmr_addr - mmr_base;
+
+	if (mmr_offset < 0x45000) {
+		u64 mmr_war_offset;
+
+		if (mmr_offset == 0 || mmr_offset == 0x80)
+			mmr_war_offset = 0xc0;
+		else if (mmr_offset == 0x148 || mmr_offset == 0x200)
+			mmr_war_offset = 0x28;
+		else
+			mmr_war_offset = 0x158;
+
+		readq_relaxed((void __iomem *)(mmr_base + mmr_war_offset));
+	}
+}
+
+static void inline
+tioce_mmr_war_post(struct tioce_kernel *kern, void __iomem *mmr_addr)
+{
+	u64 mmr_base;
+	u64 mmr_offset;
+
+	if (kern->ce_common->ce_rev != TIOCE_REV_A)
+		return;
+
+	mmr_base = kern->ce_common->ce_pcibus.bs_base;
+	mmr_offset = (unsigned long)mmr_addr - mmr_base;
+
+	if (mmr_offset < 0x45000) {
+		if (mmr_offset == 0x100)
+			readq_relaxed((void __iomem *)(mmr_base + 0x38));
+		readq_relaxed((void __iomem *)(mmr_base + 0xb050));
+	}
+}
+
+/* load mmr contents into a variable */
+#define tioce_mmr_load(kern, mmrp, varp) do {\
+	tioce_mmr_war_pre(kern, mmrp); \
+	*(varp) = readq_relaxed(mmrp); \
+	tioce_mmr_war_post(kern, mmrp); \
+} while (0)
+
+/* store variable contents into mmr */
+#define tioce_mmr_store(kern, mmrp, varp) do {\
+	tioce_mmr_war_pre(kern, mmrp); \
+	writeq(*varp, mmrp); \
+	tioce_mmr_war_post(kern, mmrp); \
+} while (0)
+
+/* store immediate value into mmr */
+#define tioce_mmr_storei(kern, mmrp, val) do {\
+	tioce_mmr_war_pre(kern, mmrp); \
+	writeq(val, mmrp); \
+	tioce_mmr_war_post(kern, mmrp); \
+} while (0)
+
+/* set bits (immediate value) into mmr */
+#define tioce_mmr_seti(kern, mmrp, bits) do {\
+	u64 tmp; \
+	tioce_mmr_load(kern, mmrp, &tmp); \
+	tmp |= (bits); \
+	tioce_mmr_store(kern, mmrp, &tmp); \
+} while (0)
+
+/* clear bits (immediate value) into mmr */
+#define tioce_mmr_clri(kern, mmrp, bits) do { \
+	u64 tmp; \
+	tioce_mmr_load(kern, mmrp, &tmp); \
+	tmp &= ~(bits); \
+	tioce_mmr_store(kern, mmrp, &tmp); \
+} while (0)
+
+/**
+ * Bus address ranges for the 5 flavors of TIOCE DMA
+ */
+
+#define TIOCE_D64_MIN	0x8000000000000000UL
+#define TIOCE_D64_MAX	0xffffffffffffffffUL
+#define TIOCE_D64_ADDR(a)	((a) >= TIOCE_D64_MIN)
+
+#define TIOCE_D32_MIN	0x0000000080000000UL
+#define TIOCE_D32_MAX	0x00000000ffffffffUL
+#define TIOCE_D32_ADDR(a)	((a) >= TIOCE_D32_MIN && (a) <= TIOCE_D32_MAX)
+
+#define TIOCE_M32_MIN	0x0000000000000000UL
+#define TIOCE_M32_MAX	0x000000007fffffffUL
+#define TIOCE_M32_ADDR(a)	((a) >= TIOCE_M32_MIN && (a) <= TIOCE_M32_MAX)
+
+#define TIOCE_M40_MIN	0x0000004000000000UL
+#define TIOCE_M40_MAX	0x0000007fffffffffUL
+#define TIOCE_M40_ADDR(a)	((a) >= TIOCE_M40_MIN && (a) <= TIOCE_M40_MAX)
+
+#define TIOCE_M40S_MIN	0x0000008000000000UL
+#define TIOCE_M40S_MAX	0x000000ffffffffffUL
+#define TIOCE_M40S_ADDR(a)	((a) >= TIOCE_M40S_MIN && (a) <= TIOCE_M40S_MAX)
+
+/*
+ * ATE manipulation macros.
+ */
+
+#define ATE_PAGESHIFT(ps)	(__ffs(ps))
+#define ATE_PAGEMASK(ps)	((ps)-1)
+
+#define ATE_PAGE(x, ps) ((x) >> ATE_PAGESHIFT(ps))
+#define ATE_NPAGES(start, len, pagesize) \
+	(ATE_PAGE((start)+(len)-1, pagesize) - ATE_PAGE(start, pagesize) + 1)
+
+#define ATE_VALID(ate)	((ate) & (1UL << 63))
+#define ATE_MAKE(addr, ps, msi) \
+	(((addr) & ~ATE_PAGEMASK(ps)) | (1UL << 63) | ((msi)?(1UL << 62):0))
+
+/*
+ * Flavors of ate-based mapping supported by tioce_alloc_map()
+ */
+
+#define TIOCE_ATE_M32	1
+#define TIOCE_ATE_M40	2
+#define TIOCE_ATE_M40S	3
+
+#define KB(x)	((u64)(x) << 10)
+#define MB(x)	((u64)(x) << 20)
+#define GB(x)	((u64)(x) << 30)
+
+/**
+ * tioce_dma_d64 - create a DMA mapping using 64-bit direct mode
+ * @ct_addr: system coretalk address
+ *
+ * Map @ct_addr into 64-bit CE bus space.  No device context is necessary
+ * and no CE mapping are consumed.
+ *
+ * Bits 53:0 come from the coretalk address.  The remaining bits are set as
+ * follows:
+ *
+ * 63    - must be 1 to indicate d64 mode to CE hardware
+ * 62    - barrier bit ... controlled with tioce_dma_barrier()
+ * 61    - msi bit ... specified through dma_flags
+ * 60:54 - reserved, MBZ
+ */
+static u64
+tioce_dma_d64(unsigned long ct_addr, int dma_flags)
+{
+	u64 bus_addr;
+
+	bus_addr = ct_addr | (1UL << 63);
+	if (dma_flags & SN_DMA_MSI)
+		bus_addr |= (1UL << 61);
+
+	return bus_addr;
+}
+
+/**
+ * pcidev_to_tioce - return misc ce related pointers given a pci_dev
+ * @pci_dev: pci device context
+ * @base: ptr to store struct tioce_mmr * for the CE holding this device
+ * @kernel: ptr to store struct tioce_kernel * for the CE holding this device
+ * @port: ptr to store the CE port number that this device is on
+ *
+ * Return pointers to various CE-related structures for the CE upstream of
+ * @pci_dev.
+ */
+static inline void
+pcidev_to_tioce(struct pci_dev *pdev, struct tioce __iomem **base,
+		struct tioce_kernel **kernel, int *port)
+{
+	struct pcidev_info *pcidev_info;
+	struct tioce_common *ce_common;
+	struct tioce_kernel *ce_kernel;
+
+	pcidev_info = SN_PCIDEV_INFO(pdev);
+	ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info;
+	ce_kernel = (struct tioce_kernel *)ce_common->ce_kernel_private;
+
+	if (base)
+		*base = (struct tioce __iomem *)ce_common->ce_pcibus.bs_base;
+	if (kernel)
+		*kernel = ce_kernel;
+
+	/*
+	 * we use port as a zero-based value internally, even though the
+	 * documentation is 1-based.
+	 */
+	if (port)
+		*port =
+		    (pdev->bus->number < ce_kernel->ce_port1_secondary) ? 0 : 1;
+}
+
+/**
+ * tioce_alloc_map - Given a coretalk address, map it to pcie bus address
+ * space using one of the various ATE-based address modes.
+ * @ce_kern: tioce context
+ * @type: map mode to use
+ * @port: 0-based port that the requesting device is downstream of
+ * @ct_addr: the coretalk address to map
+ * @len: number of bytes to map
+ *
+ * Given the addressing type, set up various parameters that define the
+ * ATE pool to use.  Search for a contiguous block of entries to cover the
+ * length, and if enough resources exist, fill in the ATEs and construct a
+ * tioce_dmamap struct to track the mapping.
+ */
+static u64
+tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port,
+		u64 ct_addr, int len, int dma_flags)
+{
+	int i;
+	int j;
+	int first;
+	int last;
+	int entries;
+	int nates;
+	u64 pagesize;
+	int msi_capable, msi_wanted;
+	u64 *ate_shadow;
+	u64 __iomem *ate_reg;
+	u64 addr;
+	struct tioce __iomem *ce_mmr;
+	u64 bus_base;
+	struct tioce_dmamap *map;
+
+	ce_mmr = (struct tioce __iomem *)ce_kern->ce_common->ce_pcibus.bs_base;
+
+	switch (type) {
+	case TIOCE_ATE_M32:
+		/*
+		 * The first 64 entries of the ate3240 pool are dedicated to
+		 * super-page (TIOCE_ATE_M40S) mode.
+		 */
+		first = 64;
+		entries = TIOCE_NUM_M3240_ATES - 64;
+		ate_shadow = ce_kern->ce_ate3240_shadow;
+		ate_reg = ce_mmr->ce_ure_ate3240;
+		pagesize = ce_kern->ce_ate3240_pagesize;
+		bus_base = TIOCE_M32_MIN;
+		msi_capable = 1;
+		break;
+	case TIOCE_ATE_M40:
+		first = 0;
+		entries = TIOCE_NUM_M40_ATES;
+		ate_shadow = ce_kern->ce_ate40_shadow;
+		ate_reg = ce_mmr->ce_ure_ate40;
+		pagesize = MB(64);
+		bus_base = TIOCE_M40_MIN;
+		msi_capable = 0;
+		break;
+	case TIOCE_ATE_M40S:
+		/*
+		 * ate3240 entries 0-31 are dedicated to port1 super-page
+		 * mappings.  ate3240 entries 32-63 are dedicated to port2.
+		 */
+		first = port * 32;
+		entries = 32;
+		ate_shadow = ce_kern->ce_ate3240_shadow;
+		ate_reg = ce_mmr->ce_ure_ate3240;
+		pagesize = GB(16);
+		bus_base = TIOCE_M40S_MIN;
+		msi_capable = 0;
+		break;
+	default:
+		return 0;
+	}
+
+	msi_wanted = dma_flags & SN_DMA_MSI;
+	if (msi_wanted && !msi_capable)
+		return 0;
+
+	nates = ATE_NPAGES(ct_addr, len, pagesize);
+	if (nates > entries)
+		return 0;
+
+	last = first + entries - nates;
+	for (i = first; i <= last; i++) {
+		if (ATE_VALID(ate_shadow[i]))
+			continue;
+
+		for (j = i; j < i + nates; j++)
+			if (ATE_VALID(ate_shadow[j]))
+				break;
+
+		if (j >= i + nates)
+			break;
+	}
+
+	if (i > last)
+		return 0;
+
+	map = kzalloc(sizeof(struct tioce_dmamap), GFP_ATOMIC);
+	if (!map)
+		return 0;
+
+	addr = ct_addr;
+	for (j = 0; j < nates; j++) {
+		u64 ate;
+
+		ate = ATE_MAKE(addr, pagesize, msi_wanted);
+		ate_shadow[i + j] = ate;
+		tioce_mmr_storei(ce_kern, &ate_reg[i + j], ate);
+		addr += pagesize;
+	}
+
+	map->refcnt = 1;
+	map->nbytes = nates * pagesize;
+	map->ct_start = ct_addr & ~ATE_PAGEMASK(pagesize);
+	map->pci_start = bus_base + (i * pagesize);
+	map->ate_hw = &ate_reg[i];
+	map->ate_shadow = &ate_shadow[i];
+	map->ate_count = nates;
+
+	list_add(&map->ce_dmamap_list, &ce_kern->ce_dmamap_list);
+
+	return (map->pci_start + (ct_addr - map->ct_start));
+}
+
+/**
+ * tioce_dma_d32 - create a DMA mapping using 32-bit direct mode
+ * @pdev: linux pci_dev representing the function
+ * @paddr: system physical address
+ *
+ * Map @paddr into 32-bit bus space of the CE associated with @pcidev_info.
+ */
+static u64
+tioce_dma_d32(struct pci_dev *pdev, u64 ct_addr, int dma_flags)
+{
+	int dma_ok;
+	int port;
+	struct tioce __iomem *ce_mmr;
+	struct tioce_kernel *ce_kern;
+	u64 ct_upper;
+	u64 ct_lower;
+	dma_addr_t bus_addr;
+
+	if (dma_flags & SN_DMA_MSI)
+		return 0;
+
+	ct_upper = ct_addr & ~0x3fffffffUL;
+	ct_lower = ct_addr & 0x3fffffffUL;
+
+	pcidev_to_tioce(pdev, &ce_mmr, &ce_kern, &port);
+
+	if (ce_kern->ce_port[port].dirmap_refcnt == 0) {
+		u64 tmp;
+
+		ce_kern->ce_port[port].dirmap_shadow = ct_upper;
+		tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port],
+				 ct_upper);
+		tmp = ce_mmr->ce_ure_dir_map[port];
+		dma_ok = 1;
+	} else
+		dma_ok = (ce_kern->ce_port[port].dirmap_shadow == ct_upper);
+
+	if (dma_ok) {
+		ce_kern->ce_port[port].dirmap_refcnt++;
+		bus_addr = TIOCE_D32_MIN + ct_lower;
+	} else
+		bus_addr = 0;
+
+	return bus_addr;
+}
+
+/**
+ * tioce_dma_barrier - swizzle a TIOCE bus address to include or exclude
+ * the barrier bit.
+ * @bus_addr:  bus address to swizzle
+ *
+ * Given a TIOCE bus address, set the appropriate bit to indicate barrier
+ * attributes.
+ */
+static u64
+tioce_dma_barrier(u64 bus_addr, int on)
+{
+	u64 barrier_bit;
+
+	/* barrier not supported in M40/M40S mode */
+	if (TIOCE_M40_ADDR(bus_addr) || TIOCE_M40S_ADDR(bus_addr))
+		return bus_addr;
+
+	if (TIOCE_D64_ADDR(bus_addr))
+		barrier_bit = (1UL << 62);
+	else			/* must be m32 or d32 */
+		barrier_bit = (1UL << 30);
+
+	return (on) ? (bus_addr | barrier_bit) : (bus_addr & ~barrier_bit);
+}
+
+/**
+ * tioce_dma_unmap - release CE mapping resources
+ * @pdev: linux pci_dev representing the function
+ * @bus_addr: bus address returned by an earlier tioce_dma_map
+ * @dir: mapping direction (unused)
+ *
+ * Locate mapping resources associated with @bus_addr and release them.
+ * For mappings created using the direct modes there are no resources
+ * to release.
+ */
+void
+tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
+{
+	int i;
+	int port;
+	struct tioce_kernel *ce_kern;
+	struct tioce __iomem *ce_mmr;
+	unsigned long flags;
+
+	bus_addr = tioce_dma_barrier(bus_addr, 0);
+	pcidev_to_tioce(pdev, &ce_mmr, &ce_kern, &port);
+
+	/* nothing to do for D64 */
+
+	if (TIOCE_D64_ADDR(bus_addr))
+		return;
+
+	spin_lock_irqsave(&ce_kern->ce_lock, flags);
+
+	if (TIOCE_D32_ADDR(bus_addr)) {
+		if (--ce_kern->ce_port[port].dirmap_refcnt == 0) {
+			ce_kern->ce_port[port].dirmap_shadow = 0;
+			tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port],
+					 0);
+		}
+	} else {
+		struct tioce_dmamap *map;
+
+		list_for_each_entry(map, &ce_kern->ce_dmamap_list,
+				    ce_dmamap_list) {
+			u64 last;
+
+			last = map->pci_start + map->nbytes - 1;
+			if (bus_addr >= map->pci_start && bus_addr <= last)
+				break;
+		}
+
+		if (&map->ce_dmamap_list == &ce_kern->ce_dmamap_list) {
+			printk(KERN_WARNING
+			       "%s:  %s - no map found for bus_addr 0x%llx\n",
+			       __func__, pci_name(pdev), bus_addr);
+		} else if (--map->refcnt == 0) {
+			for (i = 0; i < map->ate_count; i++) {
+				map->ate_shadow[i] = 0;
+				tioce_mmr_storei(ce_kern, &map->ate_hw[i], 0);
+			}
+
+			list_del(&map->ce_dmamap_list);
+			kfree(map);
+		}
+	}
+
+	spin_unlock_irqrestore(&ce_kern->ce_lock, flags);
+}
+
+/**
+ * tioce_do_dma_map - map pages for PCI DMA
+ * @pdev: linux pci_dev representing the function
+ * @paddr: host physical address to map
+ * @byte_count: bytes to map
+ *
+ * This is the main wrapper for mapping host physical pages to CE PCI space.
+ * The mapping mode used is based on the device's dma_mask.
+ */
+static u64
+tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count,
+		 int barrier, int dma_flags)
+{
+	unsigned long flags;
+	u64 ct_addr;
+	u64 mapaddr = 0;
+	struct tioce_kernel *ce_kern;
+	struct tioce_dmamap *map;
+	int port;
+	u64 dma_mask;
+
+	dma_mask = (barrier) ? pdev->dev.coherent_dma_mask : pdev->dma_mask;
+
+	/* cards must be able to address at least 31 bits */
+	if (dma_mask < 0x7fffffffUL)
+		return 0;
+
+	if (SN_DMA_ADDRTYPE(dma_flags) == SN_DMA_ADDR_PHYS)
+		ct_addr = PHYS_TO_TIODMA(paddr);
+	else
+		ct_addr = paddr;
+
+	/*
+	 * If the device can generate 64 bit addresses, create a D64 map.
+	 */
+	if (dma_mask == ~0UL) {
+		mapaddr = tioce_dma_d64(ct_addr, dma_flags);
+		if (mapaddr)
+			goto dma_map_done;
+	}
+
+	pcidev_to_tioce(pdev, NULL, &ce_kern, &port);
+
+	spin_lock_irqsave(&ce_kern->ce_lock, flags);
+
+	/*
+	 * D64 didn't work ... See if we have an existing map that covers
+	 * this address range.  Must account for devices dma_mask here since
+	 * an existing map might have been done in a mode using more pci
+	 * address bits than this device can support.
+	 */
+	list_for_each_entry(map, &ce_kern->ce_dmamap_list, ce_dmamap_list) {
+		u64 last;
+
+		last = map->ct_start + map->nbytes - 1;
+		if (ct_addr >= map->ct_start &&
+		    ct_addr + byte_count - 1 <= last &&
+		    map->pci_start <= dma_mask) {
+			map->refcnt++;
+			mapaddr = map->pci_start + (ct_addr - map->ct_start);
+			break;
+		}
+	}
+
+	/*
+	 * If we don't have a map yet, and the card can generate 40
+	 * bit addresses, try the M40/M40S modes.  Note these modes do not
+	 * support a barrier bit, so if we need a consistent map these
+	 * won't work.
+	 */
+	if (!mapaddr && !barrier && dma_mask >= 0xffffffffffUL) {
+		/*
+		 * We have two options for 40-bit mappings:  16GB "super" ATEs
+		 * and 64MB "regular" ATEs.  We'll try both if needed for a
+		 * given mapping but which one we try first depends on the
+		 * size.  For requests >64MB, prefer to use a super page with
+		 * regular as the fallback. Otherwise, try in the reverse order.
+		 */
+
+		if (byte_count > MB(64)) {
+			mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M40S,
+						  port, ct_addr, byte_count,
+						  dma_flags);
+			if (!mapaddr)
+				mapaddr =
+				    tioce_alloc_map(ce_kern, TIOCE_ATE_M40, -1,
+						    ct_addr, byte_count,
+						    dma_flags);
+		} else {
+			mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M40, -1,
+						  ct_addr, byte_count,
+						  dma_flags);
+			if (!mapaddr)
+				mapaddr =
+				    tioce_alloc_map(ce_kern, TIOCE_ATE_M40S,
+						    port, ct_addr, byte_count,
+						    dma_flags);
+		}
+	}
+
+	/*
+	 * 32-bit direct is the next mode to try
+	 */
+	if (!mapaddr && dma_mask >= 0xffffffffUL)
+		mapaddr = tioce_dma_d32(pdev, ct_addr, dma_flags);
+
+	/*
+	 * Last resort, try 32-bit ATE-based map.
+	 */
+	if (!mapaddr)
+		mapaddr =
+		    tioce_alloc_map(ce_kern, TIOCE_ATE_M32, -1, ct_addr,
+				    byte_count, dma_flags);
+
+	spin_unlock_irqrestore(&ce_kern->ce_lock, flags);
+
+dma_map_done:
+	if (mapaddr && barrier)
+		mapaddr = tioce_dma_barrier(mapaddr, 1);
+
+	return mapaddr;
+}
+
+/**
+ * tioce_dma - standard pci dma map interface
+ * @pdev: pci device requesting the map
+ * @paddr: system physical address to map into pci space
+ * @byte_count: # bytes to map
+ *
+ * Simply call tioce_do_dma_map() to create a map with the barrier bit clear
+ * in the address.
+ */
+static u64
+tioce_dma(struct pci_dev *pdev, unsigned long  paddr, size_t byte_count, int dma_flags)
+{
+	return tioce_do_dma_map(pdev, paddr, byte_count, 0, dma_flags);
+}
+
+/**
+ * tioce_dma_consistent - consistent pci dma map interface
+ * @pdev: pci device requesting the map
+ * @paddr: system physical address to map into pci space
+ * @byte_count: # bytes to map
+ *
+ * Simply call tioce_do_dma_map() to create a map with the barrier bit set
+ * in the address.
+ */
+static u64
+tioce_dma_consistent(struct pci_dev *pdev, unsigned long  paddr, size_t byte_count, int dma_flags)
+{
+	return tioce_do_dma_map(pdev, paddr, byte_count, 1, dma_flags);
+}
+
+/**
+ * tioce_error_intr_handler - SGI TIO CE error interrupt handler
+ * @irq: unused
+ * @arg: pointer to tioce_common struct for the given CE
+ *
+ * Handle a CE error interrupt.  Simply a wrapper around a SAL call which
+ * defers processing to the SGI prom.
+ */
+static irqreturn_t
+tioce_error_intr_handler(int irq, void *arg)
+{
+	struct tioce_common *soft = arg;
+	struct ia64_sal_retval ret_stuff;
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+
+	SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_ERROR_INTERRUPT,
+			soft->ce_pcibus.bs_persist_segment,
+			soft->ce_pcibus.bs_persist_busnum, 0, 0, 0, 0, 0);
+
+	if (ret_stuff.v0)
+		panic("tioce_error_intr_handler:  Fatal TIOCE error");
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * tioce_reserve_m32 - reserve M32 ATEs for the indicated address range
+ * @tioce_kernel: TIOCE context to reserve ATEs for
+ * @base: starting bus address to reserve
+ * @limit: last bus address to reserve
+ *
+ * If base/limit falls within the range of bus space mapped through the
+ * M32 space, reserve the resources corresponding to the range.
+ */
+static void
+tioce_reserve_m32(struct tioce_kernel *ce_kern, u64 base, u64 limit)
+{
+	int ate_index, last_ate, ps;
+	struct tioce __iomem *ce_mmr;
+
+	ce_mmr = (struct tioce __iomem *)ce_kern->ce_common->ce_pcibus.bs_base;
+	ps = ce_kern->ce_ate3240_pagesize;
+	ate_index = ATE_PAGE(base, ps);
+	last_ate = ate_index + ATE_NPAGES(base, limit-base+1, ps) - 1;
+
+	if (ate_index < 64)
+		ate_index = 64;
+
+	if (last_ate >= TIOCE_NUM_M3240_ATES)
+		last_ate = TIOCE_NUM_M3240_ATES - 1;
+
+	while (ate_index <= last_ate) {
+		u64 ate;
+
+		ate = ATE_MAKE(0xdeadbeef, ps, 0);
+		ce_kern->ce_ate3240_shadow[ate_index] = ate;
+		tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_ate3240[ate_index],
+				 ate);
+		ate_index++;
+	}
+}
+
+/**
+ * tioce_kern_init - init kernel structures related to a given TIOCE
+ * @tioce_common: ptr to a cached tioce_common struct that originated in prom
+ */
+static struct tioce_kernel *
+tioce_kern_init(struct tioce_common *tioce_common)
+{
+	int i;
+	int ps;
+	int dev;
+	u32 tmp;
+	unsigned int seg, bus;
+	struct tioce __iomem *tioce_mmr;
+	struct tioce_kernel *tioce_kern;
+
+	tioce_kern = kzalloc(sizeof(struct tioce_kernel), GFP_KERNEL);
+	if (!tioce_kern) {
+		return NULL;
+	}
+
+	tioce_kern->ce_common = tioce_common;
+	spin_lock_init(&tioce_kern->ce_lock);
+	INIT_LIST_HEAD(&tioce_kern->ce_dmamap_list);
+	tioce_common->ce_kernel_private = (u64) tioce_kern;
+
+	/*
+	 * Determine the secondary bus number of the port2 logical PPB.
+	 * This is used to decide whether a given pci device resides on
+	 * port1 or port2.  Note:  We don't have enough plumbing set up
+	 * here to use pci_read_config_xxx() so use raw_pci_read().
+	 */
+
+	seg = tioce_common->ce_pcibus.bs_persist_segment;
+	bus = tioce_common->ce_pcibus.bs_persist_busnum;
+
+	raw_pci_read(seg, bus, PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1,&tmp);
+	tioce_kern->ce_port1_secondary = (u8) tmp;
+
+	/*
+	 * Set PMU pagesize to the largest size available, and zero out
+	 * the ATEs.
+	 */
+
+	tioce_mmr = (struct tioce __iomem *)tioce_common->ce_pcibus.bs_base;
+	tioce_mmr_clri(tioce_kern, &tioce_mmr->ce_ure_page_map,
+		       CE_URE_PAGESIZE_MASK);
+	tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_ure_page_map,
+		       CE_URE_256K_PAGESIZE);
+	ps = tioce_kern->ce_ate3240_pagesize = KB(256);
+
+	for (i = 0; i < TIOCE_NUM_M40_ATES; i++) {
+		tioce_kern->ce_ate40_shadow[i] = 0;
+		tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate40[i], 0);
+	}
+
+	for (i = 0; i < TIOCE_NUM_M3240_ATES; i++) {
+		tioce_kern->ce_ate3240_shadow[i] = 0;
+		tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate3240[i], 0);
+	}
+
+	/*
+	 * Reserve ATEs corresponding to reserved address ranges.  These
+	 * include:
+	 *
+	 *	Memory space covered by each PPB mem base/limit register
+	 * 	Memory space covered by each PPB prefetch base/limit register
+	 *
+	 * These bus ranges are for pio (downstream) traffic only, and so
+	 * cannot be used for DMA.
+	 */
+
+	for (dev = 1; dev <= 2; dev++) {
+		u64 base, limit;
+
+		/* mem base/limit */
+
+		raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
+				  PCI_MEMORY_BASE, 2, &tmp);
+		base = (u64)tmp << 16;
+
+		raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
+				  PCI_MEMORY_LIMIT, 2, &tmp);
+		limit = (u64)tmp << 16;
+		limit |= 0xfffffUL;
+
+		if (base < limit)
+			tioce_reserve_m32(tioce_kern, base, limit);
+
+		/*
+		 * prefetch mem base/limit.  The tioce ppb's have 64-bit
+		 * decoders, so read the upper portions w/o checking the
+		 * attributes.
+		 */
+
+		raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
+				  PCI_PREF_MEMORY_BASE, 2, &tmp);
+		base = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16;
+
+		raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
+				  PCI_PREF_BASE_UPPER32, 4, &tmp);
+		base |= (u64)tmp << 32;
+
+		raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
+				  PCI_PREF_MEMORY_LIMIT, 2, &tmp);
+
+		limit = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16;
+		limit |= 0xfffffUL;
+
+		raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
+				  PCI_PREF_LIMIT_UPPER32, 4, &tmp);
+		limit |= (u64)tmp << 32;
+
+		if ((base < limit) && TIOCE_M32_ADDR(base))
+			tioce_reserve_m32(tioce_kern, base, limit);
+	}
+
+	return tioce_kern;
+}
+
+/**
+ * tioce_force_interrupt - implement altix force_interrupt() backend for CE
+ * @sn_irq_info: sn asic irq that we need an interrupt generated for
+ *
+ * Given an sn_irq_info struct, set the proper bit in ce_adm_force_int to
+ * force a secondary interrupt to be generated.  This is to work around an
+ * asic issue where there is a small window of opportunity for a legacy device
+ * interrupt to be lost.
+ */
+static void
+tioce_force_interrupt(struct sn_irq_info *sn_irq_info)
+{
+	struct pcidev_info *pcidev_info;
+	struct tioce_common *ce_common;
+	struct tioce_kernel *ce_kern;
+	struct tioce __iomem *ce_mmr;
+	u64 force_int_val;
+
+	if (!sn_irq_info->irq_bridge)
+		return;
+
+	if (sn_irq_info->irq_bridge_type != PCIIO_ASIC_TYPE_TIOCE)
+		return;
+
+	pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
+	if (!pcidev_info)
+		return;
+
+	ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info;
+	ce_mmr = (struct tioce __iomem *)ce_common->ce_pcibus.bs_base;
+	ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private;
+
+	/*
+	 * TIOCE Rev A workaround (PV 945826), force an interrupt by writing
+	 * the TIO_INTx register directly (1/26/2006)
+	 */
+	if (ce_common->ce_rev == TIOCE_REV_A) {
+		u64 int_bit_mask = (1ULL << sn_irq_info->irq_int_bit);
+		u64 status;
+
+		tioce_mmr_load(ce_kern, &ce_mmr->ce_adm_int_status, &status);
+		if (status & int_bit_mask) {
+			u64 force_irq = (1 << 8) | sn_irq_info->irq_irq;
+			u64 ctalk = sn_irq_info->irq_xtalkaddr;
+			u64 nasid, offset;
+
+			nasid = (ctalk & CTALK_NASID_MASK) >> CTALK_NASID_SHFT;
+			offset = (ctalk & CTALK_NODE_OFFSET);
+			HUB_S(TIO_IOSPACE_ADDR(nasid, offset), force_irq);
+		}
+
+		return;
+	}
+
+	/*
+	 * irq_int_bit is originally set up by prom, and holds the interrupt
+	 * bit shift (not mask) as defined by the bit definitions in the
+	 * ce_adm_int mmr.  These shifts are not the same for the
+	 * ce_adm_force_int register, so do an explicit mapping here to make
+	 * things clearer.
+	 */
+
+	switch (sn_irq_info->irq_int_bit) {
+	case CE_ADM_INT_PCIE_PORT1_DEV_A_SHFT:
+		force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT1_DEV_A_SHFT;
+		break;
+	case CE_ADM_INT_PCIE_PORT1_DEV_B_SHFT:
+		force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT1_DEV_B_SHFT;
+		break;
+	case CE_ADM_INT_PCIE_PORT1_DEV_C_SHFT:
+		force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT1_DEV_C_SHFT;
+		break;
+	case CE_ADM_INT_PCIE_PORT1_DEV_D_SHFT:
+		force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT1_DEV_D_SHFT;
+		break;
+	case CE_ADM_INT_PCIE_PORT2_DEV_A_SHFT:
+		force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT2_DEV_A_SHFT;
+		break;
+	case CE_ADM_INT_PCIE_PORT2_DEV_B_SHFT:
+		force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT2_DEV_B_SHFT;
+		break;
+	case CE_ADM_INT_PCIE_PORT2_DEV_C_SHFT:
+		force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT2_DEV_C_SHFT;
+		break;
+	case CE_ADM_INT_PCIE_PORT2_DEV_D_SHFT:
+		force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT2_DEV_D_SHFT;
+		break;
+	default:
+		return;
+	}
+	tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_force_int, force_int_val);
+}
+
+/**
+ * tioce_target_interrupt - implement set_irq_affinity for tioce resident
+ * functions.  Note:  only applies to line interrupts, not MSI's.
+ *
+ * @sn_irq_info: SN IRQ context
+ *
+ * Given an sn_irq_info, set the associated CE device's interrupt destination
+ * register.  Since the interrupt destination registers are on a per-ce-slot
+ * basis, this will retarget line interrupts for all functions downstream of
+ * the slot.
+ */
+static void
+tioce_target_interrupt(struct sn_irq_info *sn_irq_info)
+{
+	struct pcidev_info *pcidev_info;
+	struct tioce_common *ce_common;
+	struct tioce_kernel *ce_kern;
+	struct tioce __iomem *ce_mmr;
+	int bit;
+	u64 vector;
+
+	pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
+	if (!pcidev_info)
+		return;
+
+	ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info;
+	ce_mmr = (struct tioce __iomem *)ce_common->ce_pcibus.bs_base;
+	ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private;
+
+	bit = sn_irq_info->irq_int_bit;
+
+	tioce_mmr_seti(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit));
+	vector = (u64)sn_irq_info->irq_irq << INTR_VECTOR_SHFT;
+	vector |= sn_irq_info->irq_xtalkaddr;
+	tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_int_dest[bit], vector);
+	tioce_mmr_clri(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit));
+
+	tioce_force_interrupt(sn_irq_info);
+}
+
+/**
+ * tioce_bus_fixup - perform final PCI fixup for a TIO CE bus
+ * @prom_bussoft: Common prom/kernel struct representing the bus
+ *
+ * Replicates the tioce_common pointed to by @prom_bussoft in kernel
+ * space.  Allocates and initializes a kernel-only area for a given CE,
+ * and sets up an irq for handling CE error interrupts.
+ *
+ * On successful setup, returns the kernel version of tioce_common back to
+ * the caller.
+ */
+static void *
+tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller)
+{
+	struct tioce_common *tioce_common;
+	struct tioce_kernel *tioce_kern;
+	struct tioce __iomem *tioce_mmr;
+
+	/*
+	 * Allocate kernel bus soft and copy from prom.
+	 */
+
+	tioce_common = kzalloc(sizeof(struct tioce_common), GFP_KERNEL);
+	if (!tioce_common)
+		return NULL;
+
+	memcpy(tioce_common, prom_bussoft, sizeof(struct tioce_common));
+	tioce_common->ce_pcibus.bs_base = (unsigned long)
+		ioremap(REGION_OFFSET(tioce_common->ce_pcibus.bs_base),
+			sizeof(struct tioce_common));
+
+	tioce_kern = tioce_kern_init(tioce_common);
+	if (tioce_kern == NULL) {
+		kfree(tioce_common);
+		return NULL;
+	}
+
+	/*
+	 * Clear out any transient errors before registering the error
+	 * interrupt handler.
+	 */
+
+	tioce_mmr = (struct tioce __iomem *)tioce_common->ce_pcibus.bs_base;
+	tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_int_status_alias, ~0ULL);
+	tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_error_summary_alias,
+		       ~0ULL);
+	tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_dre_comp_err_addr, 0ULL);
+
+	if (request_irq(SGI_PCIASIC_ERROR,
+			tioce_error_intr_handler,
+			IRQF_SHARED, "TIOCE error", (void *)tioce_common))
+		printk(KERN_WARNING
+		       "%s:  Unable to get irq %d.  "
+		       "Error interrupts won't be routed for "
+		       "TIOCE bus %04x:%02x\n",
+		       __func__, SGI_PCIASIC_ERROR,
+		       tioce_common->ce_pcibus.bs_persist_segment,
+		       tioce_common->ce_pcibus.bs_persist_busnum);
+
+	sn_set_err_irq_affinity(SGI_PCIASIC_ERROR);
+	return tioce_common;
+}
+
+static struct sn_pcibus_provider tioce_pci_interfaces = {
+	.dma_map = tioce_dma,
+	.dma_map_consistent = tioce_dma_consistent,
+	.dma_unmap = tioce_dma_unmap,
+	.bus_fixup = tioce_bus_fixup,
+	.force_interrupt = tioce_force_interrupt,
+	.target_interrupt = tioce_target_interrupt
+};
+
+/**
+ * tioce_init_provider - init SN PCI provider ops for TIO CE
+ */
+int
+tioce_init_provider(void)
+{
+	sn_pci_provider[PCIIO_ASIC_TYPE_TIOCE] = &tioce_pci_interfaces;
+	return 0;
+}
diff --git a/arch/ia64/uv/Makefile b/arch/ia64/uv/Makefile
new file mode 100644
index 00000000..aa9f9194
--- /dev/null
+++ b/arch/ia64/uv/Makefile
@@ -0,0 +1,12 @@
+# arch/ia64/uv/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+#
+# Makefile for the sn uv subplatform
+#
+
+obj-y += kernel/
diff --git a/arch/ia64/uv/kernel/Makefile b/arch/ia64/uv/kernel/Makefile
new file mode 100644
index 00000000..124e441d
--- /dev/null
+++ b/arch/ia64/uv/kernel/Makefile
@@ -0,0 +1,13 @@
+# arch/ia64/uv/kernel/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+#
+
+ccflags-y := -Iarch/ia64/sn/include
+
+obj-y				+= setup.o
+obj-$(CONFIG_IA64_GENERIC)      += machvec.o
diff --git a/arch/ia64/uv/kernel/machvec.c b/arch/ia64/uv/kernel/machvec.c
new file mode 100644
index 00000000..50737a9d
--- /dev/null
+++ b/arch/ia64/uv/kernel/machvec.c
@@ -0,0 +1,11 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#define MACHVEC_PLATFORM_NAME	uv
+#define MACHVEC_PLATFORM_HEADER	<asm/machvec_uv.h>
+#include <asm/machvec_init.h>
diff --git a/arch/ia64/uv/kernel/setup.c b/arch/ia64/uv/kernel/setup.c
new file mode 100644
index 00000000..f1490657
--- /dev/null
+++ b/arch/ia64/uv/kernel/setup.c
@@ -0,0 +1,116 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV Core Functions
+ *
+ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <asm/sn/simulator.h>
+#include <asm/uv/uv_mmrs.h>
+#include <asm/uv/uv_hub.h>
+
+DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
+EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
+
+#ifdef CONFIG_IA64_SGI_UV
+int sn_prom_type;
+long sn_partition_id;
+EXPORT_SYMBOL(sn_partition_id);
+long sn_coherency_id;
+EXPORT_SYMBOL_GPL(sn_coherency_id);
+long sn_region_size;
+EXPORT_SYMBOL(sn_region_size);
+#endif
+
+struct redir_addr {
+	unsigned long redirect;
+	unsigned long alias;
+};
+
+#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
+
+static __initdata struct redir_addr redir_addrs[] = {
+	{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_SI_ALIAS0_OVERLAY_CONFIG},
+	{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_SI_ALIAS1_OVERLAY_CONFIG},
+	{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_SI_ALIAS2_OVERLAY_CONFIG},
+};
+
+static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
+{
+	union uvh_si_alias0_overlay_config_u alias;
+	union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) {
+		alias.v = uv_read_local_mmr(redir_addrs[i].alias);
+		if (alias.s.base == 0) {
+			*size = (1UL << alias.s.m_alias);
+			redirect.v = uv_read_local_mmr(redir_addrs[i].redirect);
+			*base = (unsigned long)redirect.s.dest_base << DEST_SHIFT;
+			return;
+		}
+	}
+	BUG();
+}
+
+void __init uv_setup(char **cmdline_p)
+{
+	union uvh_si_addr_map_config_u m_n_config;
+	union uvh_node_id_u node_id;
+	unsigned long gnode_upper;
+	int nid, cpu, m_val, n_val;
+	unsigned long mmr_base, lowmem_redir_base, lowmem_redir_size;
+
+	if (IS_MEDUSA()) {
+		lowmem_redir_base = 0;
+		lowmem_redir_size = 0;
+		node_id.v = 0;
+		m_n_config.s.m_skt = 37;
+		m_n_config.s.n_skt = 0;
+		mmr_base = 0;
+#if 0
+		/* Need BIOS calls - TDB */
+		if (!ia64_sn_is_fake_prom())
+			sn_prom_type = 1;
+		else
+#endif
+			sn_prom_type = 2;
+		printk(KERN_INFO "Running on medusa with %s PROM\n",
+					(sn_prom_type == 1) ? "real" : "fake");
+	} else {
+		get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
+		node_id.v = uv_read_local_mmr(UVH_NODE_ID);
+		m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
+		mmr_base =
+			uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
+				~UV_MMR_ENABLE;
+	}
+
+	m_val = m_n_config.s.m_skt;
+	n_val = m_n_config.s.n_skt;
+	printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
+
+	gnode_upper = (((unsigned long)node_id.s.node_id) &
+		       ~((1 << n_val) - 1)) << m_val;
+
+	for_each_present_cpu(cpu) {
+		nid = cpu_to_node(cpu);
+		uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
+		uv_cpu_hub_info(cpu)->lowmem_remap_top =
+			lowmem_redir_base + lowmem_redir_size;
+		uv_cpu_hub_info(cpu)->m_val = m_val;
+		uv_cpu_hub_info(cpu)->n_val = n_val;
+		uv_cpu_hub_info(cpu)->pnode_mask = (1 << n_val) -1;
+		uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
+		uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
+		uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
+		uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */
+		printk(KERN_DEBUG "UV cpu %d, nid %d\n", cpu, nid);
+	}
+}
+
diff --git a/arch/ia64/xen/Kconfig b/arch/ia64/xen/Kconfig
new file mode 100644
index 00000000..515e0826
--- /dev/null
+++ b/arch/ia64/xen/Kconfig
@@ -0,0 +1,25 @@
+#
+# This Kconfig describes xen/ia64 options
+#
+
+config XEN
+	bool "Xen hypervisor support"
+	default y
+	depends on PARAVIRT && MCKINLEY && IA64_PAGE_SIZE_16KB && EXPERIMENTAL
+	select XEN_XENCOMM
+	select NO_IDLE_HZ
+	# followings are required to save/restore.
+	select ARCH_SUSPEND_POSSIBLE
+	select SUSPEND
+	select PM_SLEEP
+	help
+	  Enable Xen hypervisor support.  Resulting kernel runs
+	  both as a guest OS on Xen and natively on hardware.
+
+config XEN_XENCOMM
+	depends on XEN
+	bool
+
+config NO_IDLE_HZ
+	depends on XEN
+	bool
diff --git a/arch/ia64/xen/Makefile b/arch/ia64/xen/Makefile
new file mode 100644
index 00000000..e6f4a0a7
--- /dev/null
+++ b/arch/ia64/xen/Makefile
@@ -0,0 +1,37 @@
+#
+# Makefile for Xen components
+#
+
+obj-y := hypercall.o xenivt.o xensetup.o xen_pv_ops.o irq_xen.o \
+	 hypervisor.o xencomm.o xcom_hcall.o grant-table.o time.o suspend.o \
+	 gate-data.o
+
+obj-$(CONFIG_IA64_GENERIC) += machvec.o
+
+# The gate DSO image is built using a special linker script.
+include $(srctree)/arch/ia64/kernel/Makefile.gate
+
+# tell compiled for xen
+CPPFLAGS_gate.lds += -D__IA64_GATE_PARAVIRTUALIZED_XEN
+AFLAGS_gate.o += -D__IA64_ASM_PARAVIRTUALIZED_XEN -D__IA64_GATE_PARAVIRTUALIZED_XEN
+
+# use same file of native.
+$(obj)/gate.o: $(src)/../kernel/gate.S FORCE
+	$(call if_changed_dep,as_o_S)
+$(obj)/gate.lds: $(src)/../kernel/gate.lds.S FORCE
+	$(call if_changed_dep,cpp_lds_S)
+
+
+AFLAGS_xenivt.o += -D__IA64_ASM_PARAVIRTUALIZED_XEN
+
+# xen multi compile
+ASM_PARAVIRT_MULTI_COMPILE_SRCS = ivt.S entry.S fsys.S
+ASM_PARAVIRT_OBJS = $(addprefix xen-,$(ASM_PARAVIRT_MULTI_COMPILE_SRCS:.S=.o))
+obj-y += $(ASM_PARAVIRT_OBJS)
+define paravirtualized_xen
+AFLAGS_$(1) += -D__IA64_ASM_PARAVIRTUALIZED_XEN
+endef
+$(foreach o,$(ASM_PARAVIRT_OBJS),$(eval $(call paravirtualized_xen,$(o))))
+
+$(obj)/xen-%.o: $(src)/../kernel/%.S FORCE
+	$(call if_changed_dep,as_o_S)
diff --git a/arch/ia64/xen/gate-data.S b/arch/ia64/xen/gate-data.S
new file mode 100644
index 00000000..6f95b6b3
--- /dev/null
+++ b/arch/ia64/xen/gate-data.S
@@ -0,0 +1,3 @@
+	.section .data..gate.xen, "aw"
+
+	.incbin "arch/ia64/xen/gate.so"
diff --git a/arch/ia64/xen/grant-table.c b/arch/ia64/xen/grant-table.c
new file mode 100644
index 00000000..48cca376
--- /dev/null
+++ b/arch/ia64/xen/grant-table.c
@@ -0,0 +1,156 @@
+/******************************************************************************
+ * arch/ia64/xen/grant-table.c
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#include <xen/interface/xen.h>
+#include <xen/interface/memory.h>
+#include <xen/grant_table.h>
+
+#include <asm/xen/hypervisor.h>
+
+struct vm_struct *xen_alloc_vm_area(unsigned long size)
+{
+	int order;
+	unsigned long virt;
+	unsigned long nr_pages;
+	struct vm_struct *area;
+
+	order = get_order(size);
+	virt = __get_free_pages(GFP_KERNEL, order);
+	if (virt == 0)
+		goto err0;
+	nr_pages = 1 << order;
+	scrub_pages(virt, nr_pages);
+
+	area = kmalloc(sizeof(*area), GFP_KERNEL);
+	if (area == NULL)
+		goto err1;
+
+	area->flags = VM_IOREMAP;
+	area->addr = (void *)virt;
+	area->size = size;
+	area->pages = NULL;
+	area->nr_pages = nr_pages;
+	area->phys_addr = 0;	/* xenbus_map_ring_valloc uses this field!  */
+
+	return area;
+
+err1:
+	free_pages(virt, order);
+err0:
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(xen_alloc_vm_area);
+
+void xen_free_vm_area(struct vm_struct *area)
+{
+	unsigned int order = get_order(area->size);
+	unsigned long i;
+	unsigned long phys_addr = __pa(area->addr);
+
+	/* This area is used for foreign page mappping.
+	 * So underlying machine page may not be assigned. */
+	for (i = 0; i < (1 << order); i++) {
+		unsigned long ret;
+		unsigned long gpfn = (phys_addr >> PAGE_SHIFT) + i;
+		struct xen_memory_reservation reservation = {
+			.nr_extents   = 1,
+			.address_bits = 0,
+			.extent_order = 0,
+			.domid        = DOMID_SELF
+		};
+		set_xen_guest_handle(reservation.extent_start, &gpfn);
+		ret = HYPERVISOR_memory_op(XENMEM_populate_physmap,
+					   &reservation);
+		BUG_ON(ret != 1);
+	}
+	free_pages((unsigned long)area->addr, order);
+	kfree(area);
+}
+EXPORT_SYMBOL_GPL(xen_free_vm_area);
+
+
+/****************************************************************************
+ * grant table hack
+ * cmd: GNTTABOP_xxx
+ */
+
+int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
+			   unsigned long max_nr_gframes,
+			   struct grant_entry **__shared)
+{
+	*__shared = __va(frames[0] << PAGE_SHIFT);
+	return 0;
+}
+
+void arch_gnttab_unmap_shared(struct grant_entry *shared,
+			      unsigned long nr_gframes)
+{
+	/* nothing */
+}
+
+static void
+gnttab_map_grant_ref_pre(struct gnttab_map_grant_ref *uop)
+{
+	uint32_t flags;
+
+	flags = uop->flags;
+
+	if (flags & GNTMAP_host_map) {
+		if (flags & GNTMAP_application_map) {
+			printk(KERN_DEBUG
+			       "GNTMAP_application_map is not supported yet: "
+			       "flags 0x%x\n", flags);
+			BUG();
+		}
+		if (flags & GNTMAP_contains_pte) {
+			printk(KERN_DEBUG
+			       "GNTMAP_contains_pte is not supported yet: "
+			       "flags 0x%x\n", flags);
+			BUG();
+		}
+	} else if (flags & GNTMAP_device_map) {
+		printk("GNTMAP_device_map is not supported yet 0x%x\n", flags);
+		BUG();	/* not yet. actually this flag is not used. */
+	} else {
+		BUG();
+	}
+}
+
+int
+HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
+{
+	if (cmd == GNTTABOP_map_grant_ref) {
+		unsigned int i;
+		for (i = 0; i < count; i++) {
+			gnttab_map_grant_ref_pre(
+				(struct gnttab_map_grant_ref *)uop + i);
+		}
+	}
+	return xencomm_hypercall_grant_table_op(cmd, uop, count);
+}
+
+EXPORT_SYMBOL(HYPERVISOR_grant_table_op);
diff --git a/arch/ia64/xen/hypercall.S b/arch/ia64/xen/hypercall.S
new file mode 100644
index 00000000..08847aa1
--- /dev/null
+++ b/arch/ia64/xen/hypercall.S
@@ -0,0 +1,88 @@
+/*
+ * Support routines for Xen hypercalls
+ *
+ * Copyright (C) 2005 Dan Magenheimer <dan.magenheimer@hp.com>
+ * Copyright (C) 2008 Yaozu (Eddie) Dong <eddie.dong@intel.com>
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/intrinsics.h>
+#include <asm/xen/privop.h>
+
+#ifdef __INTEL_COMPILER
+/*
+ * Hypercalls without parameter.
+ */
+#define __HCALL0(name,hcall)		\
+	GLOBAL_ENTRY(name);		\
+	break	hcall;			\
+	br.ret.sptk.many rp;		\
+	END(name)
+
+/*
+ * Hypercalls with 1 parameter.
+ */
+#define __HCALL1(name,hcall)		\
+	GLOBAL_ENTRY(name);		\
+	mov r8=r32;			\
+	break	hcall;			\
+	br.ret.sptk.many rp;		\
+	END(name)
+
+/*
+ * Hypercalls with 2 parameters.
+ */
+#define __HCALL2(name,hcall)		\
+	GLOBAL_ENTRY(name);		\
+	mov r8=r32;			\
+	mov r9=r33;			\
+	break	hcall;			\
+	br.ret.sptk.many rp;		\
+	END(name)
+
+__HCALL0(xen_get_psr, HYPERPRIVOP_GET_PSR)
+__HCALL0(xen_get_ivr, HYPERPRIVOP_GET_IVR)
+__HCALL0(xen_get_tpr, HYPERPRIVOP_GET_TPR)
+__HCALL0(xen_hyper_ssm_i, HYPERPRIVOP_SSM_I)
+
+__HCALL1(xen_set_tpr, HYPERPRIVOP_SET_TPR)
+__HCALL1(xen_eoi, HYPERPRIVOP_EOI)
+__HCALL1(xen_thash, HYPERPRIVOP_THASH)
+__HCALL1(xen_set_itm, HYPERPRIVOP_SET_ITM)
+__HCALL1(xen_get_rr, HYPERPRIVOP_GET_RR)
+__HCALL1(xen_fc, HYPERPRIVOP_FC)
+__HCALL1(xen_get_cpuid, HYPERPRIVOP_GET_CPUID)
+__HCALL1(xen_get_pmd, HYPERPRIVOP_GET_PMD)
+
+__HCALL2(xen_ptcga, HYPERPRIVOP_PTC_GA)
+__HCALL2(xen_set_rr, HYPERPRIVOP_SET_RR)
+__HCALL2(xen_set_kr, HYPERPRIVOP_SET_KR)
+
+GLOBAL_ENTRY(xen_set_rr0_to_rr4)
+	mov r8=r32
+	mov r9=r33
+	mov r10=r34
+	mov r11=r35
+	mov r14=r36
+	XEN_HYPER_SET_RR0_TO_RR4
+	br.ret.sptk.many rp
+	;;
+END(xen_set_rr0_to_rr4)
+#endif
+
+GLOBAL_ENTRY(xen_send_ipi)
+	mov r14=r32
+	mov r15=r33
+	mov r2=0x400
+	break 0x1000
+	;;
+	br.ret.sptk.many rp
+	;;
+END(xen_send_ipi)
+
+GLOBAL_ENTRY(__hypercall)
+	mov r2=r37
+	break 0x1000
+	br.ret.sptk.many b0
+	;;
+END(__hypercall)
diff --git a/arch/ia64/xen/hypervisor.c b/arch/ia64/xen/hypervisor.c
new file mode 100644
index 00000000..cac4d97c
--- /dev/null
+++ b/arch/ia64/xen/hypervisor.c
@@ -0,0 +1,96 @@
+/******************************************************************************
+ * arch/ia64/xen/hypervisor.c
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/efi.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/privop.h>
+
+#include "irq_xen.h"
+
+struct shared_info *HYPERVISOR_shared_info __read_mostly =
+	(struct shared_info *)XSI_BASE;
+EXPORT_SYMBOL(HYPERVISOR_shared_info);
+
+DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
+
+struct start_info *xen_start_info;
+EXPORT_SYMBOL(xen_start_info);
+
+EXPORT_SYMBOL(xen_domain_type);
+
+EXPORT_SYMBOL(__hypercall);
+
+/* Stolen from arch/x86/xen/enlighten.c */
+/*
+ * Flag to determine whether vcpu info placement is available on all
+ * VCPUs.  We assume it is to start with, and then set it to zero on
+ * the first failure.  This is because it can succeed on some VCPUs
+ * and not others, since it can involve hypervisor memory allocation,
+ * or because the guest failed to guarantee all the appropriate
+ * constraints on all VCPUs (ie buffer can't cross a page boundary).
+ *
+ * Note that any particular CPU may be using a placed vcpu structure,
+ * but we can only optimise if the all are.
+ *
+ * 0: not available, 1: available
+ */
+
+static void __init xen_vcpu_setup(int cpu)
+{
+	/*
+	 * WARNING:
+	 * before changing MAX_VIRT_CPUS,
+	 * check that shared_info fits on a page
+	 */
+	BUILD_BUG_ON(sizeof(struct shared_info) > PAGE_SIZE);
+	per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+}
+
+void __init xen_setup_vcpu_info_placement(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		xen_vcpu_setup(cpu);
+}
+
+void __cpuinit
+xen_cpu_init(void)
+{
+	xen_smp_intr_init();
+}
+
+/**************************************************************************
+ * opt feature
+ */
+void
+xen_ia64_enable_opt_feature(void)
+{
+	/* Enable region 7 identity map optimizations in Xen */
+	struct xen_ia64_opt_feature optf;
+
+	optf.cmd = XEN_IA64_OPTF_IDENT_MAP_REG7;
+	optf.on = XEN_IA64_OPTF_ON;
+	optf.pgprot = pgprot_val(PAGE_KERNEL);
+	optf.key = 0;	/* No key on linux. */
+	HYPERVISOR_opt_feature(&optf);
+}
diff --git a/arch/ia64/xen/irq_xen.c b/arch/ia64/xen/irq_xen.c
new file mode 100644
index 00000000..b279e142
--- /dev/null
+++ b/arch/ia64/xen/irq_xen.c
@@ -0,0 +1,444 @@
+/******************************************************************************
+ * arch/ia64/xen/irq_xen.c
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/cpu.h>
+
+#include <xen/interface/xen.h>
+#include <xen/interface/callback.h>
+#include <xen/events.h>
+
+#include <asm/xen/privop.h>
+
+#include "irq_xen.h"
+
+/***************************************************************************
+ * pv_irq_ops
+ * irq operations
+ */
+
+static int
+xen_assign_irq_vector(int irq)
+{
+	struct physdev_irq irq_op;
+
+	irq_op.irq = irq;
+	if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
+		return -ENOSPC;
+
+	return irq_op.vector;
+}
+
+static void
+xen_free_irq_vector(int vector)
+{
+	struct physdev_irq irq_op;
+
+	if (vector < IA64_FIRST_DEVICE_VECTOR ||
+	    vector > IA64_LAST_DEVICE_VECTOR)
+		return;
+
+	irq_op.vector = vector;
+	if (HYPERVISOR_physdev_op(PHYSDEVOP_free_irq_vector, &irq_op))
+		printk(KERN_WARNING "%s: xen_free_irq_vecotr fail vector=%d\n",
+		       __func__, vector);
+}
+
+
+static DEFINE_PER_CPU(int, xen_timer_irq) = -1;
+static DEFINE_PER_CPU(int, xen_ipi_irq) = -1;
+static DEFINE_PER_CPU(int, xen_resched_irq) = -1;
+static DEFINE_PER_CPU(int, xen_cmc_irq) = -1;
+static DEFINE_PER_CPU(int, xen_cmcp_irq) = -1;
+static DEFINE_PER_CPU(int, xen_cpep_irq) = -1;
+#define NAME_SIZE	15
+static DEFINE_PER_CPU(char[NAME_SIZE], xen_timer_name);
+static DEFINE_PER_CPU(char[NAME_SIZE], xen_ipi_name);
+static DEFINE_PER_CPU(char[NAME_SIZE], xen_resched_name);
+static DEFINE_PER_CPU(char[NAME_SIZE], xen_cmc_name);
+static DEFINE_PER_CPU(char[NAME_SIZE], xen_cmcp_name);
+static DEFINE_PER_CPU(char[NAME_SIZE], xen_cpep_name);
+#undef NAME_SIZE
+
+struct saved_irq {
+	unsigned int irq;
+	struct irqaction *action;
+};
+/* 16 should be far optimistic value, since only several percpu irqs
+ * are registered early.
+ */
+#define MAX_LATE_IRQ	16
+static struct saved_irq saved_percpu_irqs[MAX_LATE_IRQ];
+static unsigned short late_irq_cnt;
+static unsigned short saved_irq_cnt;
+static int xen_slab_ready;
+
+#ifdef CONFIG_SMP
+#include <linux/sched.h>
+
+/* Dummy stub. Though we may check XEN_RESCHEDULE_VECTOR before __do_IRQ,
+ * it ends up to issue several memory accesses upon percpu data and
+ * thus adds unnecessary traffic to other paths.
+ */
+static irqreturn_t
+xen_dummy_handler(int irq, void *dev_id)
+{
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t
+xen_resched_handler(int irq, void *dev_id)
+{
+	scheduler_ipi();
+	return IRQ_HANDLED;
+}
+
+static struct irqaction xen_ipi_irqaction = {
+	.handler =	handle_IPI,
+	.flags =	IRQF_DISABLED,
+	.name =		"IPI"
+};
+
+static struct irqaction xen_resched_irqaction = {
+	.handler =	xen_resched_handler,
+	.flags =	IRQF_DISABLED,
+	.name =		"resched"
+};
+
+static struct irqaction xen_tlb_irqaction = {
+	.handler =	xen_dummy_handler,
+	.flags =	IRQF_DISABLED,
+	.name =		"tlb_flush"
+};
+#endif
+
+/*
+ * This is xen version percpu irq registration, which needs bind
+ * to xen specific evtchn sub-system. One trick here is that xen
+ * evtchn binding interface depends on kmalloc because related
+ * port needs to be freed at device/cpu down. So we cache the
+ * registration on BSP before slab is ready and then deal them
+ * at later point. For rest instances happening after slab ready,
+ * we hook them to xen evtchn immediately.
+ *
+ * FIXME: MCA is not supported by far, and thus "nomca" boot param is
+ * required.
+ */
+static void
+__xen_register_percpu_irq(unsigned int cpu, unsigned int vec,
+			struct irqaction *action, int save)
+{
+	int irq = 0;
+
+	if (xen_slab_ready) {
+		switch (vec) {
+		case IA64_TIMER_VECTOR:
+			snprintf(per_cpu(xen_timer_name, cpu),
+				 sizeof(per_cpu(xen_timer_name, cpu)),
+				 "%s%d", action->name, cpu);
+			irq = bind_virq_to_irqhandler(VIRQ_ITC, cpu,
+				action->handler, action->flags,
+				per_cpu(xen_timer_name, cpu), action->dev_id);
+			per_cpu(xen_timer_irq, cpu) = irq;
+			break;
+		case IA64_IPI_RESCHEDULE:
+			snprintf(per_cpu(xen_resched_name, cpu),
+				 sizeof(per_cpu(xen_resched_name, cpu)),
+				 "%s%d", action->name, cpu);
+			irq = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, cpu,
+				action->handler, action->flags,
+				per_cpu(xen_resched_name, cpu), action->dev_id);
+			per_cpu(xen_resched_irq, cpu) = irq;
+			break;
+		case IA64_IPI_VECTOR:
+			snprintf(per_cpu(xen_ipi_name, cpu),
+				 sizeof(per_cpu(xen_ipi_name, cpu)),
+				 "%s%d", action->name, cpu);
+			irq = bind_ipi_to_irqhandler(XEN_IPI_VECTOR, cpu,
+				action->handler, action->flags,
+				per_cpu(xen_ipi_name, cpu), action->dev_id);
+			per_cpu(xen_ipi_irq, cpu) = irq;
+			break;
+		case IA64_CMC_VECTOR:
+			snprintf(per_cpu(xen_cmc_name, cpu),
+				 sizeof(per_cpu(xen_cmc_name, cpu)),
+				 "%s%d", action->name, cpu);
+			irq = bind_virq_to_irqhandler(VIRQ_MCA_CMC, cpu,
+						action->handler,
+						action->flags,
+						per_cpu(xen_cmc_name, cpu),
+						action->dev_id);
+			per_cpu(xen_cmc_irq, cpu) = irq;
+			break;
+		case IA64_CMCP_VECTOR:
+			snprintf(per_cpu(xen_cmcp_name, cpu),
+				 sizeof(per_cpu(xen_cmcp_name, cpu)),
+				 "%s%d", action->name, cpu);
+			irq = bind_ipi_to_irqhandler(XEN_CMCP_VECTOR, cpu,
+						action->handler,
+						action->flags,
+						per_cpu(xen_cmcp_name, cpu),
+						action->dev_id);
+			per_cpu(xen_cmcp_irq, cpu) = irq;
+			break;
+		case IA64_CPEP_VECTOR:
+			snprintf(per_cpu(xen_cpep_name, cpu),
+				 sizeof(per_cpu(xen_cpep_name, cpu)),
+				 "%s%d", action->name, cpu);
+			irq = bind_ipi_to_irqhandler(XEN_CPEP_VECTOR, cpu,
+						action->handler,
+						action->flags,
+						per_cpu(xen_cpep_name, cpu),
+						action->dev_id);
+			per_cpu(xen_cpep_irq, cpu) = irq;
+			break;
+		case IA64_CPE_VECTOR:
+		case IA64_MCA_RENDEZ_VECTOR:
+		case IA64_PERFMON_VECTOR:
+		case IA64_MCA_WAKEUP_VECTOR:
+		case IA64_SPURIOUS_INT_VECTOR:
+			/* No need to complain, these aren't supported. */
+			break;
+		default:
+			printk(KERN_WARNING "Percpu irq %d is unsupported "
+			       "by xen!\n", vec);
+			break;
+		}
+		BUG_ON(irq < 0);
+
+		if (irq > 0) {
+			/*
+			 * Mark percpu.  Without this, migrate_irqs() will
+			 * mark the interrupt for migrations and trigger it
+			 * on cpu hotplug.
+			 */
+			irq_set_status_flags(irq, IRQ_PER_CPU);
+		}
+	}
+
+	/* For BSP, we cache registered percpu irqs, and then re-walk
+	 * them when initializing APs
+	 */
+	if (!cpu && save) {
+		BUG_ON(saved_irq_cnt == MAX_LATE_IRQ);
+		saved_percpu_irqs[saved_irq_cnt].irq = vec;
+		saved_percpu_irqs[saved_irq_cnt].action = action;
+		saved_irq_cnt++;
+		if (!xen_slab_ready)
+			late_irq_cnt++;
+	}
+}
+
+static void
+xen_register_percpu_irq(ia64_vector vec, struct irqaction *action)
+{
+	__xen_register_percpu_irq(smp_processor_id(), vec, action, 1);
+}
+
+static void
+xen_bind_early_percpu_irq(void)
+{
+	int i;
+
+	xen_slab_ready = 1;
+	/* There's no race when accessing this cached array, since only
+	 * BSP will face with such step shortly
+	 */
+	for (i = 0; i < late_irq_cnt; i++)
+		__xen_register_percpu_irq(smp_processor_id(),
+					  saved_percpu_irqs[i].irq,
+					  saved_percpu_irqs[i].action, 0);
+}
+
+/* FIXME: There's no obvious point to check whether slab is ready. So
+ * a hack is used here by utilizing a late time hook.
+ */
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int __devinit
+unbind_evtchn_callback(struct notifier_block *nfb,
+		       unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+
+	if (action == CPU_DEAD) {
+		/* Unregister evtchn.  */
+		if (per_cpu(xen_cpep_irq, cpu) >= 0) {
+			unbind_from_irqhandler(per_cpu(xen_cpep_irq, cpu),
+					       NULL);
+			per_cpu(xen_cpep_irq, cpu) = -1;
+		}
+		if (per_cpu(xen_cmcp_irq, cpu) >= 0) {
+			unbind_from_irqhandler(per_cpu(xen_cmcp_irq, cpu),
+					       NULL);
+			per_cpu(xen_cmcp_irq, cpu) = -1;
+		}
+		if (per_cpu(xen_cmc_irq, cpu) >= 0) {
+			unbind_from_irqhandler(per_cpu(xen_cmc_irq, cpu), NULL);
+			per_cpu(xen_cmc_irq, cpu) = -1;
+		}
+		if (per_cpu(xen_ipi_irq, cpu) >= 0) {
+			unbind_from_irqhandler(per_cpu(xen_ipi_irq, cpu), NULL);
+			per_cpu(xen_ipi_irq, cpu) = -1;
+		}
+		if (per_cpu(xen_resched_irq, cpu) >= 0) {
+			unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu),
+					       NULL);
+			per_cpu(xen_resched_irq, cpu) = -1;
+		}
+		if (per_cpu(xen_timer_irq, cpu) >= 0) {
+			unbind_from_irqhandler(per_cpu(xen_timer_irq, cpu),
+					       NULL);
+			per_cpu(xen_timer_irq, cpu) = -1;
+		}
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block unbind_evtchn_notifier = {
+	.notifier_call = unbind_evtchn_callback,
+	.priority = 0
+};
+#endif
+
+void xen_smp_intr_init_early(unsigned int cpu)
+{
+#ifdef CONFIG_SMP
+	unsigned int i;
+
+	for (i = 0; i < saved_irq_cnt; i++)
+		__xen_register_percpu_irq(cpu, saved_percpu_irqs[i].irq,
+					  saved_percpu_irqs[i].action, 0);
+#endif
+}
+
+void xen_smp_intr_init(void)
+{
+#ifdef CONFIG_SMP
+	unsigned int cpu = smp_processor_id();
+	struct callback_register event = {
+		.type = CALLBACKTYPE_event,
+		.address = { .ip = (unsigned long)&xen_event_callback },
+	};
+
+	if (cpu == 0) {
+		/* Initialization was already done for boot cpu.  */
+#ifdef CONFIG_HOTPLUG_CPU
+		/* Register the notifier only once.  */
+		register_cpu_notifier(&unbind_evtchn_notifier);
+#endif
+		return;
+	}
+
+	/* This should be piggyback when setup vcpu guest context */
+	BUG_ON(HYPERVISOR_callback_op(CALLBACKOP_register, &event));
+#endif /* CONFIG_SMP */
+}
+
+void __init
+xen_irq_init(void)
+{
+	struct callback_register event = {
+		.type = CALLBACKTYPE_event,
+		.address = { .ip = (unsigned long)&xen_event_callback },
+	};
+
+	xen_init_IRQ();
+	BUG_ON(HYPERVISOR_callback_op(CALLBACKOP_register, &event));
+	late_time_init = xen_bind_early_percpu_irq;
+}
+
+void
+xen_platform_send_ipi(int cpu, int vector, int delivery_mode, int redirect)
+{
+#ifdef CONFIG_SMP
+	/* TODO: we need to call vcpu_up here */
+	if (unlikely(vector == ap_wakeup_vector)) {
+		/* XXX
+		 * This should be in __cpu_up(cpu) in ia64 smpboot.c
+		 * like x86. But don't want to modify it,
+		 * keep it untouched.
+		 */
+		xen_smp_intr_init_early(cpu);
+
+		xen_send_ipi(cpu, vector);
+		/* vcpu_prepare_and_up(cpu); */
+		return;
+	}
+#endif
+
+	switch (vector) {
+	case IA64_IPI_VECTOR:
+		xen_send_IPI_one(cpu, XEN_IPI_VECTOR);
+		break;
+	case IA64_IPI_RESCHEDULE:
+		xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
+		break;
+	case IA64_CMCP_VECTOR:
+		xen_send_IPI_one(cpu, XEN_CMCP_VECTOR);
+		break;
+	case IA64_CPEP_VECTOR:
+		xen_send_IPI_one(cpu, XEN_CPEP_VECTOR);
+		break;
+	case IA64_TIMER_VECTOR: {
+		/* this is used only once by check_sal_cache_flush()
+		   at boot time */
+		static int used = 0;
+		if (!used) {
+			xen_send_ipi(cpu, IA64_TIMER_VECTOR);
+			used = 1;
+			break;
+		}
+		/* fallthrough */
+	}
+	default:
+		printk(KERN_WARNING "Unsupported IPI type 0x%x\n",
+		       vector);
+		notify_remote_via_irq(0); /* defaults to 0 irq */
+		break;
+	}
+}
+
+static void __init
+xen_register_ipi(void)
+{
+#ifdef CONFIG_SMP
+	register_percpu_irq(IA64_IPI_VECTOR, &xen_ipi_irqaction);
+	register_percpu_irq(IA64_IPI_RESCHEDULE, &xen_resched_irqaction);
+	register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, &xen_tlb_irqaction);
+#endif
+}
+
+static void
+xen_resend_irq(unsigned int vector)
+{
+	(void)resend_irq_on_evtchn(vector);
+}
+
+const struct pv_irq_ops xen_irq_ops __initdata = {
+	.register_ipi = xen_register_ipi,
+
+	.assign_irq_vector = xen_assign_irq_vector,
+	.free_irq_vector = xen_free_irq_vector,
+	.register_percpu_irq = xen_register_percpu_irq,
+
+	.resend_irq = xen_resend_irq,
+};
diff --git a/arch/ia64/xen/irq_xen.h b/arch/ia64/xen/irq_xen.h
new file mode 100644
index 00000000..26110f33
--- /dev/null
+++ b/arch/ia64/xen/irq_xen.h
@@ -0,0 +1,34 @@
+/******************************************************************************
+ * arch/ia64/xen/irq_xen.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef IRQ_XEN_H
+#define IRQ_XEN_H
+
+extern void (*late_time_init)(void);
+extern char xen_event_callback;
+void __init xen_init_IRQ(void);
+
+extern const struct pv_irq_ops xen_irq_ops __initdata;
+extern void xen_smp_intr_init(void);
+extern void xen_send_ipi(int cpu, int vec);
+
+#endif /* IRQ_XEN_H */
diff --git a/arch/ia64/xen/machvec.c b/arch/ia64/xen/machvec.c
new file mode 100644
index 00000000..4ad588a7
--- /dev/null
+++ b/arch/ia64/xen/machvec.c
@@ -0,0 +1,4 @@
+#define MACHVEC_PLATFORM_NAME           xen
+#define MACHVEC_PLATFORM_HEADER         <asm/machvec_xen.h>
+#include <asm/machvec_init.h>
+
diff --git a/arch/ia64/xen/suspend.c b/arch/ia64/xen/suspend.c
new file mode 100644
index 00000000..419c8620
--- /dev/null
+++ b/arch/ia64/xen/suspend.c
@@ -0,0 +1,59 @@
+/******************************************************************************
+ * arch/ia64/xen/suspend.c
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * suspend/resume
+ */
+
+#include <xen/xen-ops.h>
+#include <asm/xen/hypervisor.h>
+#include "time.h"
+
+void
+xen_mm_pin_all(void)
+{
+	/* nothing */
+}
+
+void
+xen_mm_unpin_all(void)
+{
+	/* nothing */
+}
+
+void
+xen_arch_pre_suspend()
+{
+	/* nothing */
+}
+
+void
+xen_arch_post_suspend(int suspend_cancelled)
+{
+	if (suspend_cancelled)
+		return;
+
+	xen_ia64_enable_opt_feature();
+	/* add more if necessary */
+}
+
+void xen_arch_resume(void)
+{
+	xen_timer_resume_on_aps();
+}
diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c
new file mode 100644
index 00000000..1f8244a7
--- /dev/null
+++ b/arch/ia64/xen/time.c
@@ -0,0 +1,257 @@
+/******************************************************************************
+ * arch/ia64/xen/time.c
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/kernel_stat.h>
+#include <linux/posix-timers.h>
+#include <linux/irq.h>
+#include <linux/clocksource.h>
+
+#include <asm/timex.h>
+
+#include <asm/xen/hypervisor.h>
+
+#include <xen/interface/vcpu.h>
+
+#include "../kernel/fsyscall_gtod_data.h"
+
+static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
+static DEFINE_PER_CPU(unsigned long, xen_stolen_time);
+static DEFINE_PER_CPU(unsigned long, xen_blocked_time);
+
+/* taken from i386/kernel/time-xen.c */
+static void xen_init_missing_ticks_accounting(int cpu)
+{
+	struct vcpu_register_runstate_memory_area area;
+	struct vcpu_runstate_info *runstate = &per_cpu(xen_runstate, cpu);
+	int rc;
+
+	memset(runstate, 0, sizeof(*runstate));
+
+	area.addr.v = runstate;
+	rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu,
+				&area);
+	WARN_ON(rc && rc != -ENOSYS);
+
+	per_cpu(xen_blocked_time, cpu) = runstate->time[RUNSTATE_blocked];
+	per_cpu(xen_stolen_time, cpu) = runstate->time[RUNSTATE_runnable]
+					    + runstate->time[RUNSTATE_offline];
+}
+
+/*
+ * Runstate accounting
+ */
+/* stolen from arch/x86/xen/time.c */
+static void get_runstate_snapshot(struct vcpu_runstate_info *res)
+{
+	u64 state_time;
+	struct vcpu_runstate_info *state;
+
+	BUG_ON(preemptible());
+
+	state = &__get_cpu_var(xen_runstate);
+
+	/*
+	 * The runstate info is always updated by the hypervisor on
+	 * the current CPU, so there's no need to use anything
+	 * stronger than a compiler barrier when fetching it.
+	 */
+	do {
+		state_time = state->state_entry_time;
+		rmb();
+		*res = *state;
+		rmb();
+	} while (state->state_entry_time != state_time);
+}
+
+#define NS_PER_TICK (1000000000LL/HZ)
+
+static unsigned long
+consider_steal_time(unsigned long new_itm)
+{
+	unsigned long stolen, blocked;
+	unsigned long delta_itm = 0, stolentick = 0;
+	int cpu = smp_processor_id();
+	struct vcpu_runstate_info runstate;
+	struct task_struct *p = current;
+
+	get_runstate_snapshot(&runstate);
+
+	/*
+	 * Check for vcpu migration effect
+	 * In this case, itc value is reversed.
+	 * This causes huge stolen value.
+	 * This function just checks and reject this effect.
+	 */
+	if (!time_after_eq(runstate.time[RUNSTATE_blocked],
+			   per_cpu(xen_blocked_time, cpu)))
+		blocked = 0;
+
+	if (!time_after_eq(runstate.time[RUNSTATE_runnable] +
+			   runstate.time[RUNSTATE_offline],
+			   per_cpu(xen_stolen_time, cpu)))
+		stolen = 0;
+
+	if (!time_after(delta_itm + new_itm, ia64_get_itc()))
+		stolentick = ia64_get_itc() - new_itm;
+
+	do_div(stolentick, NS_PER_TICK);
+	stolentick++;
+
+	do_div(stolen, NS_PER_TICK);
+
+	if (stolen > stolentick)
+		stolen = stolentick;
+
+	stolentick -= stolen;
+	do_div(blocked, NS_PER_TICK);
+
+	if (blocked > stolentick)
+		blocked = stolentick;
+
+	if (stolen > 0 || blocked > 0) {
+		account_steal_ticks(stolen);
+		account_idle_ticks(blocked);
+		run_local_timers();
+
+		rcu_check_callbacks(cpu, user_mode(get_irq_regs()));
+
+		scheduler_tick();
+		run_posix_cpu_timers(p);
+		delta_itm += local_cpu_data->itm_delta * (stolen + blocked);
+
+		if (cpu == time_keeper_id)
+			xtime_update(stolen + blocked);
+
+		local_cpu_data->itm_next = delta_itm + new_itm;
+
+		per_cpu(xen_stolen_time, cpu) += NS_PER_TICK * stolen;
+		per_cpu(xen_blocked_time, cpu) += NS_PER_TICK * blocked;
+	}
+	return delta_itm;
+}
+
+static int xen_do_steal_accounting(unsigned long *new_itm)
+{
+	unsigned long delta_itm;
+	delta_itm = consider_steal_time(*new_itm);
+	*new_itm += delta_itm;
+	if (time_after(*new_itm, ia64_get_itc()) && delta_itm)
+		return 1;
+
+	return 0;
+}
+
+static void xen_itc_jitter_data_reset(void)
+{
+	u64 lcycle, ret;
+
+	do {
+		lcycle = itc_jitter_data.itc_lastcycle;
+		ret = cmpxchg(&itc_jitter_data.itc_lastcycle, lcycle, 0);
+	} while (unlikely(ret != lcycle));
+}
+
+/* based on xen_sched_clock() in arch/x86/xen/time.c. */
+/*
+ * This relies on HAVE_UNSTABLE_SCHED_CLOCK. If it can't be defined,
+ * something similar logic should be implemented here.
+ */
+/*
+ * Xen sched_clock implementation.  Returns the number of unstolen
+ * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
+ * states.
+ */
+static unsigned long long xen_sched_clock(void)
+{
+	struct vcpu_runstate_info runstate;
+
+	unsigned long long now;
+	unsigned long long offset;
+	unsigned long long ret;
+
+	/*
+	 * Ideally sched_clock should be called on a per-cpu basis
+	 * anyway, so preempt should already be disabled, but that's
+	 * not current practice at the moment.
+	 */
+	preempt_disable();
+
+	/*
+	 * both ia64_native_sched_clock() and xen's runstate are
+	 * based on mAR.ITC. So difference of them makes sense.
+	 */
+	now = ia64_native_sched_clock();
+
+	get_runstate_snapshot(&runstate);
+
+	WARN_ON(runstate.state != RUNSTATE_running);
+
+	offset = 0;
+	if (now > runstate.state_entry_time)
+		offset = now - runstate.state_entry_time;
+	ret = runstate.time[RUNSTATE_blocked] +
+		runstate.time[RUNSTATE_running] +
+		offset;
+
+	preempt_enable();
+
+	return ret;
+}
+
+struct pv_time_ops xen_time_ops __initdata = {
+	.init_missing_ticks_accounting	= xen_init_missing_ticks_accounting,
+	.do_steal_accounting		= xen_do_steal_accounting,
+	.clocksource_resume		= xen_itc_jitter_data_reset,
+	.sched_clock			= xen_sched_clock,
+};
+
+/* Called after suspend, to resume time.  */
+static void xen_local_tick_resume(void)
+{
+	/* Just trigger a tick.  */
+	ia64_cpu_local_tick();
+	touch_softlockup_watchdog();
+}
+
+void
+xen_timer_resume(void)
+{
+	unsigned int cpu;
+
+	xen_local_tick_resume();
+
+	for_each_online_cpu(cpu)
+		xen_init_missing_ticks_accounting(cpu);
+}
+
+static void ia64_cpu_local_tick_fn(void *unused)
+{
+	xen_local_tick_resume();
+	xen_init_missing_ticks_accounting(smp_processor_id());
+}
+
+void
+xen_timer_resume_on_aps(void)
+{
+	smp_call_function(&ia64_cpu_local_tick_fn, NULL, 1);
+}
diff --git a/arch/ia64/xen/time.h b/arch/ia64/xen/time.h
new file mode 100644
index 00000000..f98d7e1a
--- /dev/null
+++ b/arch/ia64/xen/time.h
@@ -0,0 +1,24 @@
+/******************************************************************************
+ * arch/ia64/xen/time.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+extern struct pv_time_ops xen_time_ops __initdata;
+void xen_timer_resume_on_aps(void);
diff --git a/arch/ia64/xen/xcom_hcall.c b/arch/ia64/xen/xcom_hcall.c
new file mode 100644
index 00000000..ccaf7431
--- /dev/null
+++ b/arch/ia64/xen/xcom_hcall.c
@@ -0,0 +1,441 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ *          Tristan Gingold <tristan.gingold@bull.net>
+ *
+ *          Copyright (c) 2007
+ *          Isaku Yamahata <yamahata at valinux co jp>
+ *                          VA Linux Systems Japan K.K.
+ *          consolidate mini and inline version.
+ */
+
+#include <linux/module.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/memory.h>
+#include <xen/interface/grant_table.h>
+#include <xen/interface/callback.h>
+#include <xen/interface/vcpu.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/xencomm.h>
+
+/* Xencomm notes:
+ * This file defines hypercalls to be used by xencomm.  The hypercalls simply
+ * create inlines or mini descriptors for pointers and then call the raw arch
+ * hypercall xencomm_arch_hypercall_XXX
+ *
+ * If the arch wants to directly use these hypercalls, simply define macros
+ * in asm/xen/hypercall.h, eg:
+ *  #define HYPERVISOR_sched_op xencomm_hypercall_sched_op
+ *
+ * The arch may also define HYPERVISOR_xxx as a function and do more operations
+ * before/after doing the hypercall.
+ *
+ * Note: because only inline or mini descriptors are created these functions
+ * must only be called with in kernel memory parameters.
+ */
+
+int
+xencomm_hypercall_console_io(int cmd, int count, char *str)
+{
+	/* xen early printk uses console io hypercall before
+	 * xencomm initialization. In that case, we just ignore it.
+	 */
+	if (!xencomm_is_initialized())
+		return 0;
+
+	return xencomm_arch_hypercall_console_io
+		(cmd, count, xencomm_map_no_alloc(str, count));
+}
+EXPORT_SYMBOL_GPL(xencomm_hypercall_console_io);
+
+int
+xencomm_hypercall_event_channel_op(int cmd, void *op)
+{
+	struct xencomm_handle *desc;
+	desc = xencomm_map_no_alloc(op, sizeof(struct evtchn_op));
+	if (desc == NULL)
+		return -EINVAL;
+
+	return xencomm_arch_hypercall_event_channel_op(cmd, desc);
+}
+EXPORT_SYMBOL_GPL(xencomm_hypercall_event_channel_op);
+
+int
+xencomm_hypercall_xen_version(int cmd, void *arg)
+{
+	struct xencomm_handle *desc;
+	unsigned int argsize;
+
+	switch (cmd) {
+	case XENVER_version:
+		/* do not actually pass an argument */
+		return xencomm_arch_hypercall_xen_version(cmd, 0);
+	case XENVER_extraversion:
+		argsize = sizeof(struct xen_extraversion);
+		break;
+	case XENVER_compile_info:
+		argsize = sizeof(struct xen_compile_info);
+		break;
+	case XENVER_capabilities:
+		argsize = sizeof(struct xen_capabilities_info);
+		break;
+	case XENVER_changeset:
+		argsize = sizeof(struct xen_changeset_info);
+		break;
+	case XENVER_platform_parameters:
+		argsize = sizeof(struct xen_platform_parameters);
+		break;
+	case XENVER_get_features:
+		argsize = (arg == NULL) ? 0 : sizeof(struct xen_feature_info);
+		break;
+
+	default:
+		printk(KERN_DEBUG
+		       "%s: unknown version op %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+
+	desc = xencomm_map_no_alloc(arg, argsize);
+	if (desc == NULL)
+		return -EINVAL;
+
+	return xencomm_arch_hypercall_xen_version(cmd, desc);
+}
+EXPORT_SYMBOL_GPL(xencomm_hypercall_xen_version);
+
+int
+xencomm_hypercall_physdev_op(int cmd, void *op)
+{
+	unsigned int argsize;
+
+	switch (cmd) {
+	case PHYSDEVOP_apic_read:
+	case PHYSDEVOP_apic_write:
+		argsize = sizeof(struct physdev_apic);
+		break;
+	case PHYSDEVOP_alloc_irq_vector:
+	case PHYSDEVOP_free_irq_vector:
+		argsize = sizeof(struct physdev_irq);
+		break;
+	case PHYSDEVOP_irq_status_query:
+		argsize = sizeof(struct physdev_irq_status_query);
+		break;
+
+	default:
+		printk(KERN_DEBUG
+		       "%s: unknown physdev op %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+
+	return xencomm_arch_hypercall_physdev_op
+		(cmd, xencomm_map_no_alloc(op, argsize));
+}
+
+static int
+xencommize_grant_table_op(struct xencomm_mini **xc_area,
+			  unsigned int cmd, void *op, unsigned int count,
+			  struct xencomm_handle **desc)
+{
+	struct xencomm_handle *desc1;
+	unsigned int argsize;
+
+	switch (cmd) {
+	case GNTTABOP_map_grant_ref:
+		argsize = sizeof(struct gnttab_map_grant_ref);
+		break;
+	case GNTTABOP_unmap_grant_ref:
+		argsize = sizeof(struct gnttab_unmap_grant_ref);
+		break;
+	case GNTTABOP_setup_table:
+	{
+		struct gnttab_setup_table *setup = op;
+
+		argsize = sizeof(*setup);
+
+		if (count != 1)
+			return -EINVAL;
+		desc1 = __xencomm_map_no_alloc
+			(xen_guest_handle(setup->frame_list),
+			 setup->nr_frames *
+			 sizeof(*xen_guest_handle(setup->frame_list)),
+			 *xc_area);
+		if (desc1 == NULL)
+			return -EINVAL;
+		(*xc_area)++;
+		set_xen_guest_handle(setup->frame_list, (void *)desc1);
+		break;
+	}
+	case GNTTABOP_dump_table:
+		argsize = sizeof(struct gnttab_dump_table);
+		break;
+	case GNTTABOP_transfer:
+		argsize = sizeof(struct gnttab_transfer);
+		break;
+	case GNTTABOP_copy:
+		argsize = sizeof(struct gnttab_copy);
+		break;
+	case GNTTABOP_query_size:
+		argsize = sizeof(struct gnttab_query_size);
+		break;
+	default:
+		printk(KERN_DEBUG "%s: unknown hypercall grant table op %d\n",
+		       __func__, cmd);
+		BUG();
+	}
+
+	*desc = __xencomm_map_no_alloc(op, count * argsize, *xc_area);
+	if (*desc == NULL)
+		return -EINVAL;
+	(*xc_area)++;
+
+	return 0;
+}
+
+int
+xencomm_hypercall_grant_table_op(unsigned int cmd, void *op,
+				 unsigned int count)
+{
+	int rc;
+	struct xencomm_handle *desc;
+	XENCOMM_MINI_ALIGNED(xc_area, 2);
+
+	rc = xencommize_grant_table_op(&xc_area, cmd, op, count, &desc);
+	if (rc)
+		return rc;
+
+	return xencomm_arch_hypercall_grant_table_op(cmd, desc, count);
+}
+EXPORT_SYMBOL_GPL(xencomm_hypercall_grant_table_op);
+
+int
+xencomm_hypercall_sched_op(int cmd, void *arg)
+{
+	struct xencomm_handle *desc;
+	unsigned int argsize;
+
+	switch (cmd) {
+	case SCHEDOP_yield:
+	case SCHEDOP_block:
+		argsize = 0;
+		break;
+	case SCHEDOP_shutdown:
+		argsize = sizeof(struct sched_shutdown);
+		break;
+	case SCHEDOP_poll:
+	{
+		struct sched_poll *poll = arg;
+		struct xencomm_handle *ports;
+
+		argsize = sizeof(struct sched_poll);
+		ports = xencomm_map_no_alloc(xen_guest_handle(poll->ports),
+				     sizeof(*xen_guest_handle(poll->ports)));
+
+		set_xen_guest_handle(poll->ports, (void *)ports);
+		break;
+	}
+	default:
+		printk(KERN_DEBUG "%s: unknown sched op %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+
+	desc = xencomm_map_no_alloc(arg, argsize);
+	if (desc == NULL)
+		return -EINVAL;
+
+	return xencomm_arch_hypercall_sched_op(cmd, desc);
+}
+EXPORT_SYMBOL_GPL(xencomm_hypercall_sched_op);
+
+int
+xencomm_hypercall_multicall(void *call_list, int nr_calls)
+{
+	int rc;
+	int i;
+	struct multicall_entry *mce;
+	struct xencomm_handle *desc;
+	XENCOMM_MINI_ALIGNED(xc_area, nr_calls * 2);
+
+	for (i = 0; i < nr_calls; i++) {
+		mce = (struct multicall_entry *)call_list + i;
+
+		switch (mce->op) {
+		case __HYPERVISOR_update_va_mapping:
+		case __HYPERVISOR_mmu_update:
+			/* No-op on ia64.  */
+			break;
+		case __HYPERVISOR_grant_table_op:
+			rc = xencommize_grant_table_op
+				(&xc_area,
+				 mce->args[0], (void *)mce->args[1],
+				 mce->args[2], &desc);
+			if (rc)
+				return rc;
+			mce->args[1] = (unsigned long)desc;
+			break;
+		case __HYPERVISOR_memory_op:
+		default:
+			printk(KERN_DEBUG
+			       "%s: unhandled multicall op entry op %lu\n",
+			       __func__, mce->op);
+			return -ENOSYS;
+		}
+	}
+
+	desc = xencomm_map_no_alloc(call_list,
+				    nr_calls * sizeof(struct multicall_entry));
+	if (desc == NULL)
+		return -EINVAL;
+
+	return xencomm_arch_hypercall_multicall(desc, nr_calls);
+}
+EXPORT_SYMBOL_GPL(xencomm_hypercall_multicall);
+
+int
+xencomm_hypercall_callback_op(int cmd, void *arg)
+{
+	unsigned int argsize;
+	switch (cmd) {
+	case CALLBACKOP_register:
+		argsize = sizeof(struct callback_register);
+		break;
+	case CALLBACKOP_unregister:
+		argsize = sizeof(struct callback_unregister);
+		break;
+	default:
+		printk(KERN_DEBUG
+		       "%s: unknown callback op %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+
+	return xencomm_arch_hypercall_callback_op
+		(cmd, xencomm_map_no_alloc(arg, argsize));
+}
+
+static int
+xencommize_memory_reservation(struct xencomm_mini *xc_area,
+			      struct xen_memory_reservation *mop)
+{
+	struct xencomm_handle *desc;
+
+	desc = __xencomm_map_no_alloc(xen_guest_handle(mop->extent_start),
+			mop->nr_extents *
+			sizeof(*xen_guest_handle(mop->extent_start)),
+			xc_area);
+	if (desc == NULL)
+		return -EINVAL;
+
+	set_xen_guest_handle(mop->extent_start, (void *)desc);
+	return 0;
+}
+
+int
+xencomm_hypercall_memory_op(unsigned int cmd, void *arg)
+{
+	GUEST_HANDLE(xen_pfn_t) extent_start_va[2] = { {NULL}, {NULL} };
+	struct xen_memory_reservation *xmr = NULL;
+	int rc;
+	struct xencomm_handle *desc;
+	unsigned int argsize;
+	XENCOMM_MINI_ALIGNED(xc_area, 2);
+
+	switch (cmd) {
+	case XENMEM_increase_reservation:
+	case XENMEM_decrease_reservation:
+	case XENMEM_populate_physmap:
+		xmr = (struct xen_memory_reservation *)arg;
+		set_xen_guest_handle(extent_start_va[0],
+				     xen_guest_handle(xmr->extent_start));
+
+		argsize = sizeof(*xmr);
+		rc = xencommize_memory_reservation(xc_area, xmr);
+		if (rc)
+			return rc;
+		xc_area++;
+		break;
+
+	case XENMEM_maximum_ram_page:
+		argsize = 0;
+		break;
+
+	case XENMEM_add_to_physmap:
+		argsize = sizeof(struct xen_add_to_physmap);
+		break;
+
+	default:
+		printk(KERN_DEBUG "%s: unknown memory op %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+
+	desc = xencomm_map_no_alloc(arg, argsize);
+	if (desc == NULL)
+		return -EINVAL;
+
+	rc = xencomm_arch_hypercall_memory_op(cmd, desc);
+
+	switch (cmd) {
+	case XENMEM_increase_reservation:
+	case XENMEM_decrease_reservation:
+	case XENMEM_populate_physmap:
+		set_xen_guest_handle(xmr->extent_start,
+				     xen_guest_handle(extent_start_va[0]));
+		break;
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(xencomm_hypercall_memory_op);
+
+int
+xencomm_hypercall_suspend(unsigned long srec)
+{
+	struct sched_shutdown arg;
+
+	arg.reason = SHUTDOWN_suspend;
+
+	return xencomm_arch_hypercall_sched_op(
+		SCHEDOP_shutdown, xencomm_map_no_alloc(&arg, sizeof(arg)));
+}
+
+long
+xencomm_hypercall_vcpu_op(int cmd, int cpu, void *arg)
+{
+	unsigned int argsize;
+	switch (cmd) {
+	case VCPUOP_register_runstate_memory_area: {
+		struct vcpu_register_runstate_memory_area *area =
+			(struct vcpu_register_runstate_memory_area *)arg;
+		argsize = sizeof(*arg);
+		set_xen_guest_handle(area->addr.h,
+		     (void *)xencomm_map_no_alloc(area->addr.v,
+						  sizeof(area->addr.v)));
+		break;
+	}
+
+	default:
+		printk(KERN_DEBUG "%s: unknown vcpu op %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+
+	return xencomm_arch_hypercall_vcpu_op(cmd, cpu,
+					xencomm_map_no_alloc(arg, argsize));
+}
+
+long
+xencomm_hypercall_opt_feature(void *arg)
+{
+	return xencomm_arch_hypercall_opt_feature(
+		xencomm_map_no_alloc(arg,
+				     sizeof(struct xen_ia64_opt_feature)));
+}
diff --git a/arch/ia64/xen/xen_pv_ops.c b/arch/ia64/xen/xen_pv_ops.c
new file mode 100644
index 00000000..3e8d350f
--- /dev/null
+++ b/arch/ia64/xen/xen_pv_ops.c
@@ -0,0 +1,1141 @@
+/******************************************************************************
+ * arch/ia64/xen/xen_pv_ops.c
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/console.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/pm.h>
+#include <linux/unistd.h>
+
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/xencomm.h>
+#include <asm/xen/privop.h>
+
+#include "irq_xen.h"
+#include "time.h"
+
+/***************************************************************************
+ * general info
+ */
+static struct pv_info xen_info __initdata = {
+	.kernel_rpl = 2,	/* or 1: determin at runtime */
+	.paravirt_enabled = 1,
+	.name = "Xen/ia64",
+};
+
+#define IA64_RSC_PL_SHIFT	2
+#define IA64_RSC_PL_BIT_SIZE	2
+#define IA64_RSC_PL_MASK	\
+	(((1UL << IA64_RSC_PL_BIT_SIZE) - 1) << IA64_RSC_PL_SHIFT)
+
+static void __init
+xen_info_init(void)
+{
+	/* Xenified Linux/ia64 may run on pl = 1 or 2.
+	 * determin at run time. */
+	unsigned long rsc = ia64_getreg(_IA64_REG_AR_RSC);
+	unsigned int rpl = (rsc & IA64_RSC_PL_MASK) >> IA64_RSC_PL_SHIFT;
+	xen_info.kernel_rpl = rpl;
+}
+
+/***************************************************************************
+ * pv_init_ops
+ * initialization hooks.
+ */
+
+static void
+xen_panic_hypercall(struct unw_frame_info *info, void *arg)
+{
+	current->thread.ksp = (__u64)info->sw - 16;
+	HYPERVISOR_shutdown(SHUTDOWN_crash);
+	/* we're never actually going to get here... */
+}
+
+static int
+xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	unw_init_running(xen_panic_hypercall, NULL);
+	/* we're never actually going to get here... */
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block xen_panic_block = {
+	xen_panic_event, NULL, 0 /* try to go last */
+};
+
+static void xen_pm_power_off(void)
+{
+	local_irq_disable();
+	HYPERVISOR_shutdown(SHUTDOWN_poweroff);
+}
+
+static void __init
+xen_banner(void)
+{
+	printk(KERN_INFO
+	       "Running on Xen! pl = %d start_info_pfn=0x%lx nr_pages=%ld "
+	       "flags=0x%x\n",
+	       xen_info.kernel_rpl,
+	       HYPERVISOR_shared_info->arch.start_info_pfn,
+	       xen_start_info->nr_pages, xen_start_info->flags);
+}
+
+static int __init
+xen_reserve_memory(struct rsvd_region *region)
+{
+	region->start = (unsigned long)__va(
+		(HYPERVISOR_shared_info->arch.start_info_pfn << PAGE_SHIFT));
+	region->end   = region->start + PAGE_SIZE;
+	return 1;
+}
+
+static void __init
+xen_arch_setup_early(void)
+{
+	struct shared_info *s;
+	BUG_ON(!xen_pv_domain());
+
+	s = HYPERVISOR_shared_info;
+	xen_start_info = __va(s->arch.start_info_pfn << PAGE_SHIFT);
+
+	/* Must be done before any hypercall.  */
+	xencomm_initialize();
+
+	xen_setup_features();
+	/* Register a call for panic conditions. */
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &xen_panic_block);
+	pm_power_off = xen_pm_power_off;
+
+	xen_ia64_enable_opt_feature();
+}
+
+static void __init
+xen_arch_setup_console(char **cmdline_p)
+{
+	add_preferred_console("xenboot", 0, NULL);
+	add_preferred_console("tty", 0, NULL);
+	/* use hvc_xen */
+	add_preferred_console("hvc", 0, NULL);
+
+#if !defined(CONFIG_VT) || !defined(CONFIG_DUMMY_CONSOLE)
+	conswitchp = NULL;
+#endif
+}
+
+static int __init
+xen_arch_setup_nomca(void)
+{
+	return 1;
+}
+
+static void __init
+xen_post_smp_prepare_boot_cpu(void)
+{
+	xen_setup_vcpu_info_placement();
+}
+
+#ifdef ASM_SUPPORTED
+static unsigned long __init_or_module
+xen_patch_bundle(void *sbundle, void *ebundle, unsigned long type);
+#endif
+static void __init
+xen_patch_branch(unsigned long tag, unsigned long type);
+
+static const struct pv_init_ops xen_init_ops __initconst = {
+	.banner = xen_banner,
+
+	.reserve_memory = xen_reserve_memory,
+
+	.arch_setup_early = xen_arch_setup_early,
+	.arch_setup_console = xen_arch_setup_console,
+	.arch_setup_nomca = xen_arch_setup_nomca,
+
+	.post_smp_prepare_boot_cpu = xen_post_smp_prepare_boot_cpu,
+#ifdef ASM_SUPPORTED
+	.patch_bundle = xen_patch_bundle,
+#endif
+	.patch_branch = xen_patch_branch,
+};
+
+/***************************************************************************
+ * pv_fsys_data
+ * addresses for fsys
+ */
+
+extern unsigned long xen_fsyscall_table[NR_syscalls];
+extern char xen_fsys_bubble_down[];
+struct pv_fsys_data xen_fsys_data __initdata = {
+	.fsyscall_table = (unsigned long *)xen_fsyscall_table,
+	.fsys_bubble_down = (void *)xen_fsys_bubble_down,
+};
+
+/***************************************************************************
+ * pv_patchdata
+ * patchdata addresses
+ */
+
+#define DECLARE(name)							\
+	extern unsigned long __xen_start_gate_##name##_patchlist[];	\
+	extern unsigned long __xen_end_gate_##name##_patchlist[]
+
+DECLARE(fsyscall);
+DECLARE(brl_fsys_bubble_down);
+DECLARE(vtop);
+DECLARE(mckinley_e9);
+
+extern unsigned long __xen_start_gate_section[];
+
+#define ASSIGN(name)							\
+	.start_##name##_patchlist =					\
+		(unsigned long)__xen_start_gate_##name##_patchlist,	\
+	.end_##name##_patchlist =					\
+		(unsigned long)__xen_end_gate_##name##_patchlist
+
+static struct pv_patchdata xen_patchdata __initdata = {
+	ASSIGN(fsyscall),
+	ASSIGN(brl_fsys_bubble_down),
+	ASSIGN(vtop),
+	ASSIGN(mckinley_e9),
+
+	.gate_section = (void*)__xen_start_gate_section,
+};
+
+/***************************************************************************
+ * pv_cpu_ops
+ * intrinsics hooks.
+ */
+
+#ifndef ASM_SUPPORTED
+static void
+xen_set_itm_with_offset(unsigned long val)
+{
+	/* ia64_cpu_local_tick() calls this with interrupt enabled. */
+	/* WARN_ON(!irqs_disabled()); */
+	xen_set_itm(val - XEN_MAPPEDREGS->itc_offset);
+}
+
+static unsigned long
+xen_get_itm_with_offset(void)
+{
+	/* unused at this moment */
+	printk(KERN_DEBUG "%s is called.\n", __func__);
+
+	WARN_ON(!irqs_disabled());
+	return ia64_native_getreg(_IA64_REG_CR_ITM) +
+		XEN_MAPPEDREGS->itc_offset;
+}
+
+/* ia64_set_itc() is only called by
+ * cpu_init() with ia64_set_itc(0) and ia64_sync_itc().
+ * So XEN_MAPPEDRESG->itc_offset cal be considered as almost constant.
+ */
+static void
+xen_set_itc(unsigned long val)
+{
+	unsigned long mitc;
+
+	WARN_ON(!irqs_disabled());
+	mitc = ia64_native_getreg(_IA64_REG_AR_ITC);
+	XEN_MAPPEDREGS->itc_offset = val - mitc;
+	XEN_MAPPEDREGS->itc_last = val;
+}
+
+static unsigned long
+xen_get_itc(void)
+{
+	unsigned long res;
+	unsigned long itc_offset;
+	unsigned long itc_last;
+	unsigned long ret_itc_last;
+
+	itc_offset = XEN_MAPPEDREGS->itc_offset;
+	do {
+		itc_last = XEN_MAPPEDREGS->itc_last;
+		res = ia64_native_getreg(_IA64_REG_AR_ITC);
+		res += itc_offset;
+		if (itc_last >= res)
+			res = itc_last + 1;
+		ret_itc_last = cmpxchg(&XEN_MAPPEDREGS->itc_last,
+				       itc_last, res);
+	} while (unlikely(ret_itc_last != itc_last));
+	return res;
+
+#if 0
+	/* ia64_itc_udelay() calls ia64_get_itc() with interrupt enabled.
+	   Should it be paravirtualized instead? */
+	WARN_ON(!irqs_disabled());
+	itc_offset = XEN_MAPPEDREGS->itc_offset;
+	itc_last = XEN_MAPPEDREGS->itc_last;
+	res = ia64_native_getreg(_IA64_REG_AR_ITC);
+	res += itc_offset;
+	if (itc_last >= res)
+		res = itc_last + 1;
+	XEN_MAPPEDREGS->itc_last = res;
+	return res;
+#endif
+}
+
+static void xen_setreg(int regnum, unsigned long val)
+{
+	switch (regnum) {
+	case _IA64_REG_AR_KR0 ... _IA64_REG_AR_KR7:
+		xen_set_kr(regnum - _IA64_REG_AR_KR0, val);
+		break;
+	case _IA64_REG_AR_ITC:
+		xen_set_itc(val);
+		break;
+	case _IA64_REG_CR_TPR:
+		xen_set_tpr(val);
+		break;
+	case _IA64_REG_CR_ITM:
+		xen_set_itm_with_offset(val);
+		break;
+	case _IA64_REG_CR_EOI:
+		xen_eoi(val);
+		break;
+	default:
+		ia64_native_setreg_func(regnum, val);
+		break;
+	}
+}
+
+static unsigned long xen_getreg(int regnum)
+{
+	unsigned long res;
+
+	switch (regnum) {
+	case _IA64_REG_PSR:
+		res = xen_get_psr();
+		break;
+	case _IA64_REG_AR_ITC:
+		res = xen_get_itc();
+		break;
+	case _IA64_REG_CR_ITM:
+		res = xen_get_itm_with_offset();
+		break;
+	case _IA64_REG_CR_IVR:
+		res = xen_get_ivr();
+		break;
+	case _IA64_REG_CR_TPR:
+		res = xen_get_tpr();
+		break;
+	default:
+		res = ia64_native_getreg_func(regnum);
+		break;
+	}
+	return res;
+}
+
+/* turning on interrupts is a bit more complicated.. write to the
+ * memory-mapped virtual psr.i bit first (to avoid race condition),
+ * then if any interrupts were pending, we have to execute a hyperprivop
+ * to ensure the pending interrupt gets delivered; else we're done! */
+static void
+xen_ssm_i(void)
+{
+	int old = xen_get_virtual_psr_i();
+	xen_set_virtual_psr_i(1);
+	barrier();
+	if (!old && xen_get_virtual_pend())
+		xen_hyper_ssm_i();
+}
+
+/* turning off interrupts can be paravirtualized simply by writing
+ * to a memory-mapped virtual psr.i bit (implemented as a 16-bit bool) */
+static void
+xen_rsm_i(void)
+{
+	xen_set_virtual_psr_i(0);
+	barrier();
+}
+
+static unsigned long
+xen_get_psr_i(void)
+{
+	return xen_get_virtual_psr_i() ? IA64_PSR_I : 0;
+}
+
+static void
+xen_intrin_local_irq_restore(unsigned long mask)
+{
+	if (mask & IA64_PSR_I)
+		xen_ssm_i();
+	else
+		xen_rsm_i();
+}
+#else
+#define __DEFINE_FUNC(name, code)					\
+	extern const char xen_ ## name ## _direct_start[];		\
+	extern const char xen_ ## name ## _direct_end[];		\
+	asm (".align 32\n"						\
+	     ".proc xen_" #name "\n"					\
+	     "xen_" #name ":\n"						\
+	     "xen_" #name "_direct_start:\n"				\
+	     code							\
+	     "xen_" #name "_direct_end:\n"				\
+	     "br.cond.sptk.many b6\n"					\
+	     ".endp xen_" #name "\n")
+
+#define DEFINE_VOID_FUNC0(name, code)		\
+	extern void				\
+	xen_ ## name (void);			\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC1(name, code)		\
+	extern void				\
+	xen_ ## name (unsigned long arg);	\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC1_VOID(name, code)	\
+	extern void				\
+	xen_ ## name (void *arg);		\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC2(name, code)		\
+	extern void				\
+	xen_ ## name (unsigned long arg0,	\
+		      unsigned long arg1);	\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_FUNC0(name, code)		\
+	extern unsigned long			\
+	xen_ ## name (void);			\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_FUNC1(name, type, code)		\
+	extern unsigned long			\
+	xen_ ## name (type arg);		\
+	__DEFINE_FUNC(name, code)
+
+#define XEN_PSR_I_ADDR_ADDR     (XSI_BASE + XSI_PSR_I_ADDR_OFS)
+
+/*
+ * static void xen_set_itm_with_offset(unsigned long val)
+ *        xen_set_itm(val - XEN_MAPPEDREGS->itc_offset);
+ */
+/* 2 bundles */
+DEFINE_VOID_FUNC1(set_itm_with_offset,
+		  "mov r2 = " __stringify(XSI_BASE) " + "
+		  __stringify(XSI_ITC_OFFSET_OFS) "\n"
+		  ";;\n"
+		  "ld8 r3 = [r2]\n"
+		  ";;\n"
+		  "sub r8 = r8, r3\n"
+		  "break " __stringify(HYPERPRIVOP_SET_ITM) "\n");
+
+/*
+ * static unsigned long xen_get_itm_with_offset(void)
+ *    return ia64_native_getreg(_IA64_REG_CR_ITM) + XEN_MAPPEDREGS->itc_offset;
+ */
+/* 2 bundles */
+DEFINE_FUNC0(get_itm_with_offset,
+	     "mov r2 = " __stringify(XSI_BASE) " + "
+	     __stringify(XSI_ITC_OFFSET_OFS) "\n"
+	     ";;\n"
+	     "ld8 r3 = [r2]\n"
+	     "mov r8 = cr.itm\n"
+	     ";;\n"
+	     "add r8 = r8, r2\n");
+
+/*
+ * static void xen_set_itc(unsigned long val)
+ *	unsigned long mitc;
+ *
+ *	WARN_ON(!irqs_disabled());
+ *	mitc = ia64_native_getreg(_IA64_REG_AR_ITC);
+ *	XEN_MAPPEDREGS->itc_offset = val - mitc;
+ *	XEN_MAPPEDREGS->itc_last = val;
+ */
+/* 2 bundles */
+DEFINE_VOID_FUNC1(set_itc,
+		  "mov r2 = " __stringify(XSI_BASE) " + "
+		  __stringify(XSI_ITC_LAST_OFS) "\n"
+		  "mov r3 = ar.itc\n"
+		  ";;\n"
+		  "sub r3 = r8, r3\n"
+		  "st8 [r2] = r8, "
+		  __stringify(XSI_ITC_LAST_OFS) " - "
+		  __stringify(XSI_ITC_OFFSET_OFS) "\n"
+		  ";;\n"
+		  "st8 [r2] = r3\n");
+
+/*
+ * static unsigned long xen_get_itc(void)
+ *	unsigned long res;
+ *	unsigned long itc_offset;
+ *	unsigned long itc_last;
+ *	unsigned long ret_itc_last;
+ *
+ *	itc_offset = XEN_MAPPEDREGS->itc_offset;
+ *	do {
+ *		itc_last = XEN_MAPPEDREGS->itc_last;
+ *		res = ia64_native_getreg(_IA64_REG_AR_ITC);
+ *		res += itc_offset;
+ *		if (itc_last >= res)
+ *			res = itc_last + 1;
+ *		ret_itc_last = cmpxchg(&XEN_MAPPEDREGS->itc_last,
+ *				       itc_last, res);
+ *	} while (unlikely(ret_itc_last != itc_last));
+ *	return res;
+ */
+/* 5 bundles */
+DEFINE_FUNC0(get_itc,
+	     "mov r2 = " __stringify(XSI_BASE) " + "
+	     __stringify(XSI_ITC_OFFSET_OFS) "\n"
+	     ";;\n"
+	     "ld8 r9 = [r2], " __stringify(XSI_ITC_LAST_OFS) " - "
+	     __stringify(XSI_ITC_OFFSET_OFS) "\n"
+					/* r9 = itc_offset */
+					/* r2 = XSI_ITC_OFFSET */
+	     "888:\n"
+	     "mov r8 = ar.itc\n"	/* res = ar.itc */
+	     ";;\n"
+	     "ld8 r3 = [r2]\n"		/* r3 = itc_last */
+	     "add r8 = r8, r9\n"	/* res = ar.itc + itc_offset */
+	     ";;\n"
+	     "cmp.gtu p6, p0 = r3, r8\n"
+	     ";;\n"
+	     "(p6) add r8 = 1, r3\n"	/* if (itc_last > res) itc_last + 1 */
+	     ";;\n"
+	     "mov ar.ccv = r8\n"
+	     ";;\n"
+	     "cmpxchg8.acq r10 = [r2], r8, ar.ccv\n"
+	     ";;\n"
+	     "cmp.ne p6, p0 = r10, r3\n"
+	     "(p6) hint @pause\n"
+	     "(p6) br.cond.spnt 888b\n");
+
+DEFINE_VOID_FUNC1_VOID(fc,
+		       "break " __stringify(HYPERPRIVOP_FC) "\n");
+
+/*
+ * psr_i_addr_addr = XEN_PSR_I_ADDR_ADDR
+ * masked_addr = *psr_i_addr_addr
+ * pending_intr_addr = masked_addr - 1
+ * if (val & IA64_PSR_I) {
+ *   masked = *masked_addr
+ *   *masked_addr = 0:xen_set_virtual_psr_i(1)
+ *   compiler barrier
+ *   if (masked) {
+ *      uint8_t pending = *pending_intr_addr;
+ *      if (pending)
+ *              XEN_HYPER_SSM_I
+ *   }
+ * } else {
+ *   *masked_addr = 1:xen_set_virtual_psr_i(0)
+ * }
+ */
+/* 6 bundles */
+DEFINE_VOID_FUNC1(intrin_local_irq_restore,
+		  /* r8 = input value: 0 or IA64_PSR_I
+		   * p6 =  (flags & IA64_PSR_I)
+		   *    = if clause
+		   * p7 = !(flags & IA64_PSR_I)
+		   *    = else clause
+		   */
+		  "cmp.ne p6, p7 = r8, r0\n"
+		  "mov r9 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n"
+		  ";;\n"
+		  /* r9 = XEN_PSR_I_ADDR */
+		  "ld8 r9 = [r9]\n"
+		  ";;\n"
+
+		  /* r10 = masked previous value */
+		  "(p6)	ld1.acq r10 = [r9]\n"
+		  ";;\n"
+
+		  /* p8 = !masked interrupt masked previously? */
+		  "(p6)	cmp.ne.unc p8, p0 = r10, r0\n"
+
+		  /* p7 = else clause */
+		  "(p7)	mov r11 = 1\n"
+		  ";;\n"
+		  /* masked = 1 */
+		  "(p7)	st1.rel [r9] = r11\n"
+
+		  /* p6 = if clause */
+		  /* masked = 0
+		   * r9 = masked_addr - 1
+		   *    = pending_intr_addr
+		   */
+		  "(p8)	st1.rel [r9] = r0, -1\n"
+		  ";;\n"
+		  /* r8 = pending_intr */
+		  "(p8)	ld1.acq r11 = [r9]\n"
+		  ";;\n"
+		  /* p9 = interrupt pending? */
+		  "(p8)	cmp.ne.unc p9, p10 = r11, r0\n"
+		  ";;\n"
+		  "(p10) mf\n"
+		  /* issue hypercall to trigger interrupt */
+		  "(p9)	break " __stringify(HYPERPRIVOP_SSM_I) "\n");
+
+DEFINE_VOID_FUNC2(ptcga,
+		  "break " __stringify(HYPERPRIVOP_PTC_GA) "\n");
+DEFINE_VOID_FUNC2(set_rr,
+		  "break " __stringify(HYPERPRIVOP_SET_RR) "\n");
+
+/*
+ * tmp = XEN_MAPPEDREGS->interrupt_mask_addr = XEN_PSR_I_ADDR_ADDR;
+ * tmp = *tmp
+ * tmp = *tmp;
+ * psr_i = tmp? 0: IA64_PSR_I;
+ */
+/* 4 bundles */
+DEFINE_FUNC0(get_psr_i,
+	     "mov r9 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n"
+	     ";;\n"
+	     "ld8 r9 = [r9]\n"			/* r9 = XEN_PSR_I_ADDR */
+	     "mov r8 = 0\n"			/* psr_i = 0 */
+	     ";;\n"
+	     "ld1.acq r9 = [r9]\n"		/* r9 = XEN_PSR_I */
+	     ";;\n"
+	     "cmp.eq.unc p6, p0 = r9, r0\n"	/* p6 = (XEN_PSR_I != 0) */
+	     ";;\n"
+	     "(p6) mov r8 = " __stringify(1 << IA64_PSR_I_BIT) "\n");
+
+DEFINE_FUNC1(thash, unsigned long,
+	     "break " __stringify(HYPERPRIVOP_THASH) "\n");
+DEFINE_FUNC1(get_cpuid, int,
+	     "break " __stringify(HYPERPRIVOP_GET_CPUID) "\n");
+DEFINE_FUNC1(get_pmd, int,
+	     "break " __stringify(HYPERPRIVOP_GET_PMD) "\n");
+DEFINE_FUNC1(get_rr, unsigned long,
+	     "break " __stringify(HYPERPRIVOP_GET_RR) "\n");
+
+/*
+ * void xen_privop_ssm_i(void)
+ *
+ * int masked = !xen_get_virtual_psr_i();
+ *	// masked = *(*XEN_MAPPEDREGS->interrupt_mask_addr)
+ * xen_set_virtual_psr_i(1)
+ *	// *(*XEN_MAPPEDREGS->interrupt_mask_addr) = 0
+ * // compiler barrier
+ * if (masked) {
+ *	uint8_t* pend_int_addr =
+ *		(uint8_t*)(*XEN_MAPPEDREGS->interrupt_mask_addr) - 1;
+ *	uint8_t pending = *pend_int_addr;
+ *	if (pending)
+ *		XEN_HYPER_SSM_I
+ * }
+ */
+/* 4 bundles */
+DEFINE_VOID_FUNC0(ssm_i,
+		  "mov r8 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n"
+		  ";;\n"
+		  "ld8 r8 = [r8]\n"		/* r8 = XEN_PSR_I_ADDR */
+		  ";;\n"
+		  "ld1.acq r9 = [r8]\n"		/* r9 = XEN_PSR_I */
+		  ";;\n"
+		  "st1.rel [r8] = r0, -1\n"	/* psr_i = 0. enable interrupt
+						 * r8 = XEN_PSR_I_ADDR - 1
+						 *    = pend_int_addr
+						 */
+		  "cmp.eq.unc p0, p6 = r9, r0\n"/* p6 = !XEN_PSR_I
+						 * previously interrupt
+						 * masked?
+						 */
+		  ";;\n"
+		  "(p6) ld1.acq r8 = [r8]\n"	/* r8 = xen_pend_int */
+		  ";;\n"
+		  "(p6) cmp.eq.unc p6, p7 = r8, r0\n"	/*interrupt pending?*/
+		  ";;\n"
+		  /* issue hypercall to get interrupt */
+		  "(p7) break " __stringify(HYPERPRIVOP_SSM_I) "\n"
+		  ";;\n");
+
+/*
+ * psr_i_addr_addr = XEN_MAPPEDREGS->interrupt_mask_addr
+ *		   = XEN_PSR_I_ADDR_ADDR;
+ * psr_i_addr = *psr_i_addr_addr;
+ * *psr_i_addr = 1;
+ */
+/* 2 bundles */
+DEFINE_VOID_FUNC0(rsm_i,
+		  "mov r8 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n"
+						/* r8 = XEN_PSR_I_ADDR */
+		  "mov r9 = 1\n"
+		  ";;\n"
+		  "ld8 r8 = [r8]\n"		/* r8 = XEN_PSR_I */
+		  ";;\n"
+		  "st1.rel [r8] = r9\n");	/* XEN_PSR_I = 1 */
+
+extern void
+xen_set_rr0_to_rr4(unsigned long val0, unsigned long val1,
+		   unsigned long val2, unsigned long val3,
+		   unsigned long val4);
+__DEFINE_FUNC(set_rr0_to_rr4,
+	      "break " __stringify(HYPERPRIVOP_SET_RR0_TO_RR4) "\n");
+
+
+extern unsigned long xen_getreg(int regnum);
+#define __DEFINE_GET_REG(id, privop)					\
+	"mov r2 = " __stringify(_IA64_REG_ ## id) "\n"			\
+	";;\n"								\
+	"cmp.eq p6, p0 = r2, r8\n"					\
+	";;\n"								\
+	"(p6) break " __stringify(HYPERPRIVOP_GET_ ## privop) "\n"	\
+	"(p6) br.cond.sptk.many b6\n"					\
+	";;\n"
+
+__DEFINE_FUNC(getreg,
+	      __DEFINE_GET_REG(PSR, PSR)
+
+	      /* get_itc */
+	      "mov r2 = " __stringify(_IA64_REG_AR_ITC) "\n"
+	      ";;\n"
+	      "cmp.eq p6, p0 = r2, r8\n"
+	      ";;\n"
+	      "(p6) br.cond.spnt xen_get_itc\n"
+	      ";;\n"
+
+	      /* get itm */
+	      "mov r2 = " __stringify(_IA64_REG_CR_ITM) "\n"
+	      ";;\n"
+	      "cmp.eq p6, p0 = r2, r8\n"
+	      ";;\n"
+	      "(p6) br.cond.spnt xen_get_itm_with_offset\n"
+	      ";;\n"
+
+	      __DEFINE_GET_REG(CR_IVR, IVR)
+	      __DEFINE_GET_REG(CR_TPR, TPR)
+
+	      /* fall back */
+	      "movl r2 = ia64_native_getreg_func\n"
+	      ";;\n"
+	      "mov b7 = r2\n"
+	      ";;\n"
+	      "br.cond.sptk.many b7\n");
+
+extern void xen_setreg(int regnum, unsigned long val);
+#define __DEFINE_SET_REG(id, privop)					\
+	"mov r2 = " __stringify(_IA64_REG_ ## id) "\n"			\
+	";;\n"								\
+	"cmp.eq p6, p0 = r2, r9\n"					\
+	";;\n"								\
+	"(p6) break " __stringify(HYPERPRIVOP_ ## privop) "\n"		\
+	"(p6) br.cond.sptk.many b6\n"					\
+	";;\n"
+
+__DEFINE_FUNC(setreg,
+	      /* kr0 .. kr 7*/
+	      /*
+	       * if (_IA64_REG_AR_KR0 <= regnum &&
+	       *     regnum <= _IA64_REG_AR_KR7) {
+	       *     register __index asm ("r8") = regnum - _IA64_REG_AR_KR0
+	       *     register __val asm ("r9") = val
+	       *    "break HYPERPRIVOP_SET_KR"
+	       * }
+	       */
+	      "mov r17 = r9\n"
+	      "mov r2 = " __stringify(_IA64_REG_AR_KR0) "\n"
+	      ";;\n"
+	      "cmp.ge p6, p0 = r9, r2\n"
+	      "sub r17 = r17, r2\n"
+	      ";;\n"
+	      "(p6) cmp.ge.unc p7, p0 = "
+	      __stringify(_IA64_REG_AR_KR7) " - " __stringify(_IA64_REG_AR_KR0)
+	      ", r17\n"
+	      ";;\n"
+	      "(p7) mov r9 = r8\n"
+	      ";;\n"
+	      "(p7) mov r8 = r17\n"
+	      "(p7) break " __stringify(HYPERPRIVOP_SET_KR) "\n"
+
+	      /* set itm */
+	      "mov r2 = " __stringify(_IA64_REG_CR_ITM) "\n"
+	      ";;\n"
+	      "cmp.eq p6, p0 = r2, r8\n"
+	      ";;\n"
+	      "(p6) br.cond.spnt xen_set_itm_with_offset\n"
+
+	      /* set itc */
+	      "mov r2 = " __stringify(_IA64_REG_AR_ITC) "\n"
+	      ";;\n"
+	      "cmp.eq p6, p0 = r2, r8\n"
+	      ";;\n"
+	      "(p6) br.cond.spnt xen_set_itc\n"
+
+	      __DEFINE_SET_REG(CR_TPR, SET_TPR)
+	      __DEFINE_SET_REG(CR_EOI, EOI)
+
+	      /* fall back */
+	      "movl r2 = ia64_native_setreg_func\n"
+	      ";;\n"
+	      "mov b7 = r2\n"
+	      ";;\n"
+	      "br.cond.sptk.many b7\n");
+#endif
+
+static const struct pv_cpu_ops xen_cpu_ops __initconst = {
+	.fc		= xen_fc,
+	.thash		= xen_thash,
+	.get_cpuid	= xen_get_cpuid,
+	.get_pmd	= xen_get_pmd,
+	.getreg		= xen_getreg,
+	.setreg		= xen_setreg,
+	.ptcga		= xen_ptcga,
+	.get_rr		= xen_get_rr,
+	.set_rr		= xen_set_rr,
+	.set_rr0_to_rr4	= xen_set_rr0_to_rr4,
+	.ssm_i		= xen_ssm_i,
+	.rsm_i		= xen_rsm_i,
+	.get_psr_i	= xen_get_psr_i,
+	.intrin_local_irq_restore
+			= xen_intrin_local_irq_restore,
+};
+
+/******************************************************************************
+ * replacement of hand written assembly codes.
+ */
+
+extern char xen_switch_to;
+extern char xen_leave_syscall;
+extern char xen_work_processed_syscall;
+extern char xen_leave_kernel;
+
+const struct pv_cpu_asm_switch xen_cpu_asm_switch = {
+	.switch_to		= (unsigned long)&xen_switch_to,
+	.leave_syscall		= (unsigned long)&xen_leave_syscall,
+	.work_processed_syscall	= (unsigned long)&xen_work_processed_syscall,
+	.leave_kernel		= (unsigned long)&xen_leave_kernel,
+};
+
+/***************************************************************************
+ * pv_iosapic_ops
+ * iosapic read/write hooks.
+ */
+static void
+xen_pcat_compat_init(void)
+{
+	/* nothing */
+}
+
+static struct irq_chip*
+xen_iosapic_get_irq_chip(unsigned long trigger)
+{
+	return NULL;
+}
+
+static unsigned int
+xen_iosapic_read(char __iomem *iosapic, unsigned int reg)
+{
+	struct physdev_apic apic_op;
+	int ret;
+
+	apic_op.apic_physbase = (unsigned long)iosapic -
+					__IA64_UNCACHED_OFFSET;
+	apic_op.reg = reg;
+	ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
+	if (ret)
+		return ret;
+	return apic_op.value;
+}
+
+static void
+xen_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
+{
+	struct physdev_apic apic_op;
+
+	apic_op.apic_physbase = (unsigned long)iosapic -
+					__IA64_UNCACHED_OFFSET;
+	apic_op.reg = reg;
+	apic_op.value = val;
+	HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op);
+}
+
+static struct pv_iosapic_ops xen_iosapic_ops __initdata = {
+	.pcat_compat_init = xen_pcat_compat_init,
+	.__get_irq_chip = xen_iosapic_get_irq_chip,
+
+	.__read = xen_iosapic_read,
+	.__write = xen_iosapic_write,
+};
+
+/***************************************************************************
+ * pv_ops initialization
+ */
+
+void __init
+xen_setup_pv_ops(void)
+{
+	xen_info_init();
+	pv_info = xen_info;
+	pv_init_ops = xen_init_ops;
+	pv_fsys_data = xen_fsys_data;
+	pv_patchdata = xen_patchdata;
+	pv_cpu_ops = xen_cpu_ops;
+	pv_iosapic_ops = xen_iosapic_ops;
+	pv_irq_ops = xen_irq_ops;
+	pv_time_ops = xen_time_ops;
+
+	paravirt_cpu_asm_init(&xen_cpu_asm_switch);
+}
+
+#ifdef ASM_SUPPORTED
+/***************************************************************************
+ * binary pacthing
+ * pv_init_ops.patch_bundle
+ */
+
+#define DEFINE_FUNC_GETREG(name, privop)				\
+	DEFINE_FUNC0(get_ ## name,					\
+		     "break "__stringify(HYPERPRIVOP_GET_ ## privop) "\n")
+
+DEFINE_FUNC_GETREG(psr, PSR);
+DEFINE_FUNC_GETREG(eflag, EFLAG);
+DEFINE_FUNC_GETREG(ivr, IVR);
+DEFINE_FUNC_GETREG(tpr, TPR);
+
+#define DEFINE_FUNC_SET_KR(n)						\
+	DEFINE_VOID_FUNC0(set_kr ## n,					\
+			  ";;\n"					\
+			  "mov r9 = r8\n"				\
+			  "mov r8 = " #n "\n"				\
+			  "break " __stringify(HYPERPRIVOP_SET_KR) "\n")
+
+DEFINE_FUNC_SET_KR(0);
+DEFINE_FUNC_SET_KR(1);
+DEFINE_FUNC_SET_KR(2);
+DEFINE_FUNC_SET_KR(3);
+DEFINE_FUNC_SET_KR(4);
+DEFINE_FUNC_SET_KR(5);
+DEFINE_FUNC_SET_KR(6);
+DEFINE_FUNC_SET_KR(7);
+
+#define __DEFINE_FUNC_SETREG(name, privop)				\
+	DEFINE_VOID_FUNC0(name,						\
+			  "break "__stringify(HYPERPRIVOP_ ## privop) "\n")
+
+#define DEFINE_FUNC_SETREG(name, privop)			\
+	__DEFINE_FUNC_SETREG(set_ ## name, SET_ ## privop)
+
+DEFINE_FUNC_SETREG(eflag, EFLAG);
+DEFINE_FUNC_SETREG(tpr, TPR);
+__DEFINE_FUNC_SETREG(eoi, EOI);
+
+extern const char xen_check_events[];
+extern const char __xen_intrin_local_irq_restore_direct_start[];
+extern const char __xen_intrin_local_irq_restore_direct_end[];
+extern const unsigned long __xen_intrin_local_irq_restore_direct_reloc;
+
+asm (
+	".align 32\n"
+	".proc xen_check_events\n"
+	"xen_check_events:\n"
+	/* masked = 0
+	 * r9 = masked_addr - 1
+	 *    = pending_intr_addr
+	 */
+	"st1.rel [r9] = r0, -1\n"
+	";;\n"
+	/* r8 = pending_intr */
+	"ld1.acq r11 = [r9]\n"
+	";;\n"
+	/* p9 = interrupt pending? */
+	"cmp.ne p9, p10 = r11, r0\n"
+	";;\n"
+	"(p10) mf\n"
+	/* issue hypercall to trigger interrupt */
+	"(p9) break " __stringify(HYPERPRIVOP_SSM_I) "\n"
+	"br.cond.sptk.many b6\n"
+	".endp xen_check_events\n"
+	"\n"
+	".align 32\n"
+	".proc __xen_intrin_local_irq_restore_direct\n"
+	"__xen_intrin_local_irq_restore_direct:\n"
+	"__xen_intrin_local_irq_restore_direct_start:\n"
+	"1:\n"
+	"{\n"
+	"cmp.ne p6, p7 = r8, r0\n"
+	"mov r17 = ip\n" /* get ip to calc return address */
+	"mov r9 = "__stringify(XEN_PSR_I_ADDR_ADDR) "\n"
+	";;\n"
+	"}\n"
+	"{\n"
+	/* r9 = XEN_PSR_I_ADDR */
+	"ld8 r9 = [r9]\n"
+	";;\n"
+	/* r10 = masked previous value */
+	"(p6) ld1.acq r10 = [r9]\n"
+	"adds r17 =  1f - 1b, r17\n" /* calculate return address */
+	";;\n"
+	"}\n"
+	"{\n"
+	/* p8 = !masked interrupt masked previously? */
+	"(p6) cmp.ne.unc p8, p0 = r10, r0\n"
+	"\n"
+	/* p7 = else clause */
+	"(p7) mov r11 = 1\n"
+	";;\n"
+	"(p8) mov b6 = r17\n" /* set return address */
+	"}\n"
+	"{\n"
+	/* masked = 1 */
+	"(p7) st1.rel [r9] = r11\n"
+	"\n"
+	"[99:]\n"
+	"(p8) brl.cond.dptk.few xen_check_events\n"
+	"}\n"
+	/* pv calling stub is 5 bundles. fill nop to adjust return address */
+	"{\n"
+	"nop 0\n"
+	"nop 0\n"
+	"nop 0\n"
+	"}\n"
+	"1:\n"
+	"__xen_intrin_local_irq_restore_direct_end:\n"
+	".endp __xen_intrin_local_irq_restore_direct\n"
+	"\n"
+	".align 8\n"
+	"__xen_intrin_local_irq_restore_direct_reloc:\n"
+	"data8 99b\n"
+);
+
+static struct paravirt_patch_bundle_elem xen_patch_bundle_elems[]
+__initdata_or_module =
+{
+#define XEN_PATCH_BUNDLE_ELEM(name, type)		\
+	{						\
+		(void*)xen_ ## name ## _direct_start,	\
+		(void*)xen_ ## name ## _direct_end,	\
+		PARAVIRT_PATCH_TYPE_ ## type,		\
+	}
+
+	XEN_PATCH_BUNDLE_ELEM(fc, FC),
+	XEN_PATCH_BUNDLE_ELEM(thash, THASH),
+	XEN_PATCH_BUNDLE_ELEM(get_cpuid, GET_CPUID),
+	XEN_PATCH_BUNDLE_ELEM(get_pmd, GET_PMD),
+	XEN_PATCH_BUNDLE_ELEM(ptcga, PTCGA),
+	XEN_PATCH_BUNDLE_ELEM(get_rr, GET_RR),
+	XEN_PATCH_BUNDLE_ELEM(set_rr, SET_RR),
+	XEN_PATCH_BUNDLE_ELEM(set_rr0_to_rr4, SET_RR0_TO_RR4),
+	XEN_PATCH_BUNDLE_ELEM(ssm_i, SSM_I),
+	XEN_PATCH_BUNDLE_ELEM(rsm_i, RSM_I),
+	XEN_PATCH_BUNDLE_ELEM(get_psr_i, GET_PSR_I),
+	{
+		(void*)__xen_intrin_local_irq_restore_direct_start,
+		(void*)__xen_intrin_local_irq_restore_direct_end,
+		PARAVIRT_PATCH_TYPE_INTRIN_LOCAL_IRQ_RESTORE,
+	},
+
+#define XEN_PATCH_BUNDLE_ELEM_GETREG(name, reg)			\
+	{							\
+		xen_get_ ## name ## _direct_start,		\
+		xen_get_ ## name ## _direct_end,		\
+		PARAVIRT_PATCH_TYPE_GETREG + _IA64_REG_ ## reg, \
+	}
+
+	XEN_PATCH_BUNDLE_ELEM_GETREG(psr, PSR),
+	XEN_PATCH_BUNDLE_ELEM_GETREG(eflag, AR_EFLAG),
+
+	XEN_PATCH_BUNDLE_ELEM_GETREG(ivr, CR_IVR),
+	XEN_PATCH_BUNDLE_ELEM_GETREG(tpr, CR_TPR),
+
+	XEN_PATCH_BUNDLE_ELEM_GETREG(itc, AR_ITC),
+	XEN_PATCH_BUNDLE_ELEM_GETREG(itm_with_offset, CR_ITM),
+
+
+#define __XEN_PATCH_BUNDLE_ELEM_SETREG(name, reg)		\
+	{							\
+		xen_ ## name ## _direct_start,			\
+		xen_ ## name ## _direct_end,			\
+		PARAVIRT_PATCH_TYPE_SETREG + _IA64_REG_ ## reg, \
+	}
+
+#define XEN_PATCH_BUNDLE_ELEM_SETREG(name, reg)			\
+	__XEN_PATCH_BUNDLE_ELEM_SETREG(set_ ## name, reg)
+
+	XEN_PATCH_BUNDLE_ELEM_SETREG(kr0, AR_KR0),
+	XEN_PATCH_BUNDLE_ELEM_SETREG(kr1, AR_KR1),
+	XEN_PATCH_BUNDLE_ELEM_SETREG(kr2, AR_KR2),
+	XEN_PATCH_BUNDLE_ELEM_SETREG(kr3, AR_KR3),
+	XEN_PATCH_BUNDLE_ELEM_SETREG(kr4, AR_KR4),
+	XEN_PATCH_BUNDLE_ELEM_SETREG(kr5, AR_KR5),
+	XEN_PATCH_BUNDLE_ELEM_SETREG(kr6, AR_KR6),
+	XEN_PATCH_BUNDLE_ELEM_SETREG(kr7, AR_KR7),
+
+	XEN_PATCH_BUNDLE_ELEM_SETREG(eflag, AR_EFLAG),
+	XEN_PATCH_BUNDLE_ELEM_SETREG(tpr, CR_TPR),
+	__XEN_PATCH_BUNDLE_ELEM_SETREG(eoi, CR_EOI),
+
+	XEN_PATCH_BUNDLE_ELEM_SETREG(itc, AR_ITC),
+	XEN_PATCH_BUNDLE_ELEM_SETREG(itm_with_offset, CR_ITM),
+};
+
+static unsigned long __init_or_module
+xen_patch_bundle(void *sbundle, void *ebundle, unsigned long type)
+{
+	const unsigned long nelems = sizeof(xen_patch_bundle_elems) /
+		sizeof(xen_patch_bundle_elems[0]);
+	unsigned long used;
+	const struct paravirt_patch_bundle_elem *found;
+
+	used = __paravirt_patch_apply_bundle(sbundle, ebundle, type,
+					     xen_patch_bundle_elems, nelems,
+					     &found);
+
+	if (found == NULL)
+		/* fallback */
+		return ia64_native_patch_bundle(sbundle, ebundle, type);
+	if (used == 0)
+		return used;
+
+	/* relocation */
+	switch (type) {
+	case PARAVIRT_PATCH_TYPE_INTRIN_LOCAL_IRQ_RESTORE: {
+		unsigned long reloc =
+			__xen_intrin_local_irq_restore_direct_reloc;
+		unsigned long reloc_offset = reloc - (unsigned long)
+			__xen_intrin_local_irq_restore_direct_start;
+		unsigned long tag = (unsigned long)sbundle + reloc_offset;
+		paravirt_patch_reloc_brl(tag, xen_check_events);
+		break;
+	}
+	default:
+		/* nothing */
+		break;
+	}
+	return used;
+}
+#endif /* ASM_SUPPOTED */
+
+const struct paravirt_patch_branch_target xen_branch_target[]
+__initconst = {
+#define PARAVIRT_BR_TARGET(name, type)			\
+	{						\
+		&xen_ ## name,				\
+		PARAVIRT_PATCH_TYPE_BR_ ## type,	\
+	}
+	PARAVIRT_BR_TARGET(switch_to, SWITCH_TO),
+	PARAVIRT_BR_TARGET(leave_syscall, LEAVE_SYSCALL),
+	PARAVIRT_BR_TARGET(work_processed_syscall, WORK_PROCESSED_SYSCALL),
+	PARAVIRT_BR_TARGET(leave_kernel, LEAVE_KERNEL),
+};
+
+static void __init
+xen_patch_branch(unsigned long tag, unsigned long type)
+{
+	__paravirt_patch_apply_branch(tag, type, xen_branch_target,
+					ARRAY_SIZE(xen_branch_target));
+}
diff --git a/arch/ia64/xen/xencomm.c b/arch/ia64/xen/xencomm.c
new file mode 100644
index 00000000..1f5d7ac8
--- /dev/null
+++ b/arch/ia64/xen/xencomm.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2006 Hollis Blanchard <hollisb@us.ibm.com>, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/mm.h>
+
+static unsigned long kernel_virtual_offset;
+static int is_xencomm_initialized;
+
+/* for xen early printk. It uses console io hypercall which uses xencomm.
+ * However early printk may use it before xencomm initialization.
+ */
+int
+xencomm_is_initialized(void)
+{
+	return is_xencomm_initialized;
+}
+
+void
+xencomm_initialize(void)
+{
+	kernel_virtual_offset = KERNEL_START - ia64_tpa(KERNEL_START);
+	is_xencomm_initialized = 1;
+}
+
+/* Translate virtual address to physical address.  */
+unsigned long
+xencomm_vtop(unsigned long vaddr)
+{
+	struct page *page;
+	struct vm_area_struct *vma;
+
+	if (vaddr == 0)
+		return 0UL;
+
+	if (REGION_NUMBER(vaddr) == 5) {
+		pgd_t *pgd;
+		pud_t *pud;
+		pmd_t *pmd;
+		pte_t *ptep;
+
+		/* On ia64, TASK_SIZE refers to current.  It is not initialized
+		   during boot.
+		   Furthermore the kernel is relocatable and __pa() doesn't
+		   work on  addresses.  */
+		if (vaddr >= KERNEL_START
+		    && vaddr < (KERNEL_START + KERNEL_TR_PAGE_SIZE))
+			return vaddr - kernel_virtual_offset;
+
+		/* In kernel area -- virtually mapped.  */
+		pgd = pgd_offset_k(vaddr);
+		if (pgd_none(*pgd) || pgd_bad(*pgd))
+			return ~0UL;
+
+		pud = pud_offset(pgd, vaddr);
+		if (pud_none(*pud) || pud_bad(*pud))
+			return ~0UL;
+
+		pmd = pmd_offset(pud, vaddr);
+		if (pmd_none(*pmd) || pmd_bad(*pmd))
+			return ~0UL;
+
+		ptep = pte_offset_kernel(pmd, vaddr);
+		if (!ptep)
+			return ~0UL;
+
+		return (pte_val(*ptep) & _PFN_MASK) | (vaddr & ~PAGE_MASK);
+	}
+
+	if (vaddr > TASK_SIZE) {
+		/* percpu variables */
+		if (REGION_NUMBER(vaddr) == 7 &&
+		    REGION_OFFSET(vaddr) >= (1ULL << IA64_MAX_PHYS_BITS))
+			ia64_tpa(vaddr);
+
+		/* kernel address */
+		return __pa(vaddr);
+	}
+
+	/* XXX double-check (lack of) locking */
+	vma = find_extend_vma(current->mm, vaddr);
+	if (!vma)
+		return ~0UL;
+
+	/* We assume the page is modified.  */
+	page = follow_page(vma, vaddr, FOLL_WRITE | FOLL_TOUCH);
+	if (!page)
+		return ~0UL;
+
+	return (page_to_pfn(page) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK);
+}
diff --git a/arch/ia64/xen/xenivt.S b/arch/ia64/xen/xenivt.S
new file mode 100644
index 00000000..3e71d505
--- /dev/null
+++ b/arch/ia64/xen/xenivt.S
@@ -0,0 +1,52 @@
+/*
+ * arch/ia64/xen/ivt.S
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *	Dan Magenheimer <dan.magenheimer@hp.com>
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *                    pv_ops.
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/kregs.h>
+#include <asm/pgtable.h>
+
+#include "../kernel/minstate.h"
+
+	.section .text,"ax"
+GLOBAL_ENTRY(xen_event_callback)
+	mov r31=pr		// prepare to save predicates
+	;;
+	SAVE_MIN_WITH_COVER	// uses r31; defines r2 and r3
+	;;
+	movl r3=XSI_PSR_IC
+	mov r14=1
+	;;
+	st4 [r3]=r14
+	;;
+	adds r3=8,r2		// set up second base pointer for SAVE_REST
+	srlz.i			// ensure everybody knows psr.ic is back on
+	;;
+	SAVE_REST
+	;;
+1:
+	alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
+	add out0=16,sp		// pass pointer to pt_regs as first arg
+	;;
+	br.call.sptk.many b0=xen_evtchn_do_upcall
+	;;
+	movl r20=XSI_PSR_I_ADDR
+	;;
+	ld8 r20=[r20]
+	;;
+	adds r20=-1,r20		// vcpu_info->evtchn_upcall_pending
+	;;
+	ld1 r20=[r20]
+	;;
+	cmp.ne p6,p0=r20,r0	// if there are pending events,
+	(p6) br.spnt.few 1b	// call evtchn_do_upcall again.
+	br.sptk.many xen_leave_kernel	// we know ia64_leave_kernel is
+					// paravirtualized as xen_leave_kernel
+END(xen_event_callback)
diff --git a/arch/ia64/xen/xensetup.S b/arch/ia64/xen/xensetup.S
new file mode 100644
index 00000000..b820ed02
--- /dev/null
+++ b/arch/ia64/xen/xensetup.S
@@ -0,0 +1,81 @@
+/*
+ * Support routines for Xen
+ *
+ * Copyright (C) 2005 Dan Magenheimer <dan.magenheimer@hp.com>
+ */
+
+#include <asm/processor.h>
+#include <asm/asmmacro.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/paravirt.h>
+#include <asm/xen/privop.h>
+#include <linux/elfnote.h>
+#include <linux/init.h>
+#include <xen/interface/elfnote.h>
+
+	.section .data..read_mostly
+	.align 8
+	.global xen_domain_type
+xen_domain_type:
+	data4 XEN_NATIVE_ASM
+	.previous
+
+	__INIT
+ENTRY(startup_xen)
+	// Calculate load offset.
+	// The constant, LOAD_OFFSET, can't be used because the boot
+	// loader doesn't always load to the LMA specified by the vmlinux.lds.
+	mov r9=ip	// must be the first instruction to make sure
+			// that r9 = the physical address of startup_xen.
+			// Usually r9 = startup_xen - LOAD_OFFSET
+	movl r8=startup_xen
+	;;
+	sub r9=r9,r8	// Usually r9 = -LOAD_OFFSET.
+
+	mov r10=PARAVIRT_HYPERVISOR_TYPE_XEN
+	movl r11=_start
+	;;
+	add r11=r11,r9
+	movl r8=hypervisor_type
+	;;
+	add r8=r8,r9
+	mov b0=r11
+	;;
+	st8 [r8]=r10
+	br.cond.sptk.many b0
+	;;
+END(startup_xen)
+
+	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,	.asciz "linux")
+	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION,	.asciz "2.6")
+	ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION,	.asciz "xen-3.0")
+	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,		data8.ua startup_xen - LOAD_OFFSET)
+
+#define isBP	p3	// are we the Bootstrap Processor?
+
+GLOBAL_ENTRY(xen_setup_hook)
+	mov r8=XEN_PV_DOMAIN_ASM
+(isBP)	movl r9=xen_domain_type;;
+(isBP)	st4 [r9]=r8
+	movl r10=xen_ivt;;
+
+	mov cr.iva=r10
+
+	/* Set xsi base.  */
+#define FW_HYPERCALL_SET_SHARED_INFO_VA			0x600
+(isBP)	mov r2=FW_HYPERCALL_SET_SHARED_INFO_VA
+(isBP)	movl r28=XSI_BASE;;
+(isBP)	break 0x1000;;
+
+	/* setup pv_ops */
+(isBP)	mov r4=rp
+	;;
+(isBP)	br.call.sptk.many rp=xen_setup_pv_ops
+	;;
+(isBP)	mov rp=r4
+	;;
+
+	br.ret.sptk.many rp
+	;;
+END(xen_setup_hook)
author	root <root@artemis.panaceas.org>	2015-12-25 04:40:36 +0000
committer	root <root@artemis.panaceas.org>	2015-12-25 04:40:36 +0000
commit	849369d6c66d3054688672f97d31fceb8e8230fb (patch)
tree	6135abc790ca67dedbe07c39806591e70eda81ce /arch/ia64
download	linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.gz linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.bz2 linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.zip