[linux] Update to linux-2.6.17.

Signed-off-by: Christian Limpach <Christian.Limpach@xensource.com> --HG-- rename : patches/linux-2.6.16.33/blktap-aio-16_03_06.patch => patches/linux-2.6.17/blktap-aio-16_03_06.patch rename : patches/linux-2.6.16.33/fix-hz-suspend.patch => patches/linux-2.6.17/fix-hz-suspend.patch rename : patches/linux-2.6.16.33/fix-ide-cd-pio-mode.patch => patches/linux-2.6.17/fix-ide-cd-pio-mode.patch rename : patches/linux-2.6.16.33/git-2a8a3d5b65e86ec1dfef7d268c64a909eab94af7.patch => patches/linux-2.6.17/git-2a8a3d5b65e86ec1dfef7d268c64a909eab94af7.patch rename : patches/linux-2.6.16.33/git-2efe55a9cec8418f0e0cde3dc3787a42fddc4411.patch => patches/linux-2.6.17/git-2efe55a9cec8418f0e0cde3dc3787a42fddc4411.patch rename : patches/linux-2.6.16.33/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch => patches/linux-2.6.17/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch rename : patches/linux-2.6.16.33/git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch => patches/linux-2.6.17/git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch rename : patches/linux-2.6.16.33/git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch => patches/linux-2.6.17/git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch rename : patches/linux-2.6.16.33/i386-mach-io-check-nmi.patch => patches/linux-2.6.17/i386-mach-io-check-nmi.patch rename : patches/linux-2.6.16.33/ipv6-no-autoconf.patch => patches/linux-2.6.17/ipv6-no-autoconf.patch rename : patches/linux-2.6.16.33/kasprintf.patch => patches/linux-2.6.17/kasprintf.patch rename : patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch => patches/linux-2.6.17/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch rename : patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch => patches/linux-2.6.17/linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch rename : patches/linux-2.6.16.33/net-csum.patch => patches/linux-2.6.17/net-csum.patch rename : patches/linux-2.6.16.33/net-gso-0-base.patch => patches/linux-2.6.17/net-gso-0-base.patch rename : patches/linux-2.6.16.33/net-gso-1-check-dodgy.patch => patches/linux-2.6.17/net-gso-1-check-dodgy.patch rename : patches/linux-2.6.16.33/net-gso-2-checksum-fix.patch => patches/linux-2.6.17/net-gso-2-checksum-fix.patch rename : patches/linux-2.6.16.33/net-gso-3-fix-errorcheck.patch => patches/linux-2.6.17/net-gso-3-fix-errorcheck.patch rename : patches/linux-2.6.16.33/net-gso-4-kill-warnon.patch => patches/linux-2.6.17/net-gso-4-kill-warnon.patch rename : patches/linux-2.6.16.33/net-gso-5-rcv-mss.patch => patches/linux-2.6.17/net-gso-5-rcv-mss.patch rename : patches/linux-2.6.16.33/net-gso-6-linear-segmentation.patch => patches/linux-2.6.17/net-gso-6-linear-segmentation.patch rename : patches/linux-2.6.16.33/pmd-shared.patch => patches/linux-2.6.17/pmd-shared.patch rename : patches/linux-2.6.16.33/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch => patches/linux-2.6.17/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch rename : patches/linux-2.6.16.33/series => patches/linux-2.6.17/series rename : patches/linux-2.6.16.33/vsnprintf.patch => patches/linux-2.6.17/vsnprintf.patch rename : patches/linux-2.6.16.33/x86-elfnote-as-preprocessor-macro.patch => patches/linux-2.6.17/x86-elfnote-as-preprocessor-macro.patch rename : patches/linux-2.6.16.33/x86-increase-interrupt-vector-range.patch => patches/linux-2.6.17/x86-increase-interrupt-vector-range.patch rename : patches/linux-2.6.16.33/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch => patches/linux-2.6.17/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch rename : patches/linux-2.6.16.33/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch => patches/linux-2.6.17/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch rename : patches/linux-2.6.16.33/xen-hotplug.patch => patches/linux-2.6.17/xen-hotplug.patch rename : patches/linux-2.6.16.33/xenoprof-generic.patch => patches/linux-2.6.17/xenoprof-generic.patch
author: Christian Limpach <Christian.Limpach@xensource.com> 2007-01-12 14:30:49 +0000
committer: Christian Limpach <Christian.Limpach@xensource.com> 2007-01-12 14:30:49 +0000
commit: 432e7b62cb60b6f6f45e52b3db0268560d9b7cdb (patch)
tree: 5933250c7c05b423abff5ab16f19b70832820dbb /linux-2.6-xen-sparse
parent: a25c73ef380731b0ab4347b854e81827cb6a6131 (diff)
download: xen-432e7b62cb60b6f6f45e52b3db0268560d9b7cdb.tar.gz
xen-432e7b62cb60b6f6f45e52b3db0268560d9b7cdb.tar.bz2
xen-432e7b62cb60b6f6f45e52b3db0268560d9b7cdb.zip
135 files changed, 3600 insertions, 4796 deletions
diff --git a/linux-2.6-xen-sparse/arch/i386/Kconfig b/linux-2.6-xen-sparse/arch/i386/Kconfig
index 8c712269d0..957c2a92f7 100644
--- a/linux-2.6-xen-sparse/arch/i386/Kconfig
+++ b/linux-2.6-xen-sparse/arch/i386/Kconfig
@@ -37,6 +37,10 @@ config GENERIC_IOMAP
 	bool
 	default y
 
+config GENERIC_HWEIGHT
+	bool
+	default y
+
 config ARCH_MAY_HAVE_PC_FDC
 	bool
 	default y
@@ -49,6 +53,35 @@ source "init/Kconfig"
 
 menu "Processor type and features"
 
+config SMP
+	bool "Symmetric multi-processing support"
+	---help---
+	  This enables support for systems with more than one CPU. If you have
+	  a system with only one CPU, like most personal computers, say N. If
+	  you have a system with more than one CPU, say Y.
+
+	  If you say N here, the kernel will run on single and multiprocessor
+	  machines, but will use only one CPU of a multiprocessor machine. If
+	  you say Y here, the kernel will run on many, but not all,
+	  singleprocessor machines. On a singleprocessor machine, the kernel
+	  will run faster if you say N here.
+
+	  Note that if you say Y here and choose architecture "586" or
+	  "Pentium" under "Processor family", the kernel will not work on 486
+	  architectures. Similarly, multiprocessor kernels for the "PPro"
+	  architecture may not work on all Pentium based boards.
+
+	  People using multiprocessor machines who say Y here should also say
+	  Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
+	  Management" code will be disabled if you say Y here.
+
+	  See also the <file:Documentation/smp.txt>,
+	  <file:Documentation/i386/IO-APIC.txt>,
+	  <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
+	  <http://www.tldp.org/docs.html#howto>.
+
+	  If you don't know what to do here, say N.
+
 choice
 	prompt "Subarchitecture Type"
 	default X86_PC
@@ -89,6 +122,7 @@ config X86_VOYAGER
 
 config X86_NUMAQ
 	bool "NUMAQ (IBM/Sequent)"
+	select SMP
 	select NUMA
 	help
 	  This option is used for getting Linux to run on a (IBM/Sequent) NUMA
@@ -183,48 +217,6 @@ config HPET_EMULATE_RTC
 	depends on HPET_TIMER && RTC=y
 	default y
 
-config SMP
-	bool "Symmetric multi-processing support"
-	---help---
-	  This enables support for systems with more than one CPU. If you have
-	  a system with only one CPU, like most personal computers, say N. If
-	  you have a system with more than one CPU, say Y.
-
-	  If you say N here, the kernel will run on single and multiprocessor
-	  machines, but will use only one CPU of a multiprocessor machine. If
-	  you say Y here, the kernel will run on many, but not all,
-	  singleprocessor machines. On a singleprocessor machine, the kernel
-	  will run faster if you say N here.
-
-	  Note that if you say Y here and choose architecture "586" or
-	  "Pentium" under "Processor family", the kernel will not work on 486
-	  architectures. Similarly, multiprocessor kernels for the "PPro"
-	  architecture may not work on all Pentium based boards.
-
-	  People using multiprocessor machines who say Y here should also say
-	  Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
-	  Management" code will be disabled if you say Y here.
-
-	  See also the <file:Documentation/smp.txt>,
-	  <file:Documentation/i386/IO-APIC.txt>,
-	  <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
-	  <http://www.tldp.org/docs.html#howto>.
-
-	  If you don't know what to do here, say N.
-
-config SMP_ALTERNATIVES
-	bool "SMP alternatives support (EXPERIMENTAL)"
-	depends on SMP && EXPERIMENTAL
-	help
-	  Try to reduce the overhead of running an SMP kernel on a uniprocessor
-	  host slightly by replacing certain key instruction sequences
-	  according to whether we currently have more than one CPU available.
-	  This should provide a noticeable boost to performance when
-	  running SMP kernels on UP machines, and have negligible impact
-	  when running on an true SMP host.
-
-          If unsure, say N.
-	  
 config NR_CPUS
 	int "Maximum number of CPUs (2-255)"
 	range 2 255
@@ -249,6 +241,15 @@ config SCHED_SMT
 	  cost of slightly increased overhead in some places. If unsure say
 	  N here.
 
+config SCHED_MC
+	bool "Multi-core scheduler support"
+	depends on SMP
+	default y
+	help
+	  Multi-core scheduler support improves the CPU scheduler's decision
+	  making when dealing with multi-core CPU chips at a cost of slightly
+	  increased overhead in some places. If unsure say N here.
+
 source "kernel/Kconfig.preempt"
 
 config X86_UP_APIC
@@ -434,6 +435,7 @@ choice
 
 config NOHIGHMEM
 	bool "off"
+	depends on !X86_NUMAQ
 	---help---
 	  Linux can use up to 64 Gigabytes of physical memory on x86 systems.
 	  However, the address space of 32-bit x86 processors is only 4
@@ -470,6 +472,7 @@ config NOHIGHMEM
 
 config HIGHMEM4G
 	bool "4GB"
+	depends on !X86_NUMAQ
 	help
 	  Select this if you have a 32-bit processor and between 1 and 4
 	  gigabytes of physical RAM.
@@ -485,7 +488,7 @@ endchoice
 
 choice
 	depends on EXPERIMENTAL && !X86_PAE
-	prompt "Memory split"
+	prompt "Memory split" if EMBEDDED
 	default VMSPLIT_3G
 	help
 	  Select the desired split between kernel and user memory.
@@ -537,13 +540,15 @@ config NUMA
 	default n if X86_PC
 	default y if (X86_NUMAQ || X86_SUMMIT)
 
-# Need comments to help the hapless user trying to turn on NUMA support
-comment "NUMA (NUMA-Q) requires SMP, 64GB highmem support"
-	depends on X86_NUMAQ && (!HIGHMEM64G || !SMP)
-
 comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
 	depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI)
 
+config NODES_SHIFT
+	int
+	default "4" if X86_NUMAQ
+	default "3"
+	depends on NEED_MULTIPLE_NODES
+
 config HAVE_ARCH_BOOTMEM_NODE
 	bool
 	depends on NUMA
@@ -697,13 +702,18 @@ config BOOT_IOREMAP
 	default y
 
 config REGPARM
-	bool "Use register arguments (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
-	default n
+	bool "Use register arguments"
+	default y
 	help
-	Compile the kernel with -mregparm=3. This uses a different ABI
-	and passes the first three arguments of a function call in registers.
-	This will probably break binary only modules.
+	Compile the kernel with -mregparm=3. This instructs gcc to use
+	a more efficient function call ABI which passes the first three
+	arguments of a function call via registers, which results in denser
+	and faster code.
+
+	If this option is disabled, then the default ABI of passing
+	arguments via the stack is used.
+
+	If unsure, say Y.
 
 config SECCOMP
 	bool "Enable seccomp to safely compute untrusted bytecode"
@@ -772,26 +782,12 @@ config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
 	depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER
 	---help---
-	  Say Y here to experiment with turning CPUs off and on.  CPUs
-	  can be controlled through /sys/devices/system/cpu.
-
-	  Say N.
-
-config DOUBLEFAULT
-	default y
-	bool "Enable doublefault exception handler" if EMBEDDED
-	depends on !X86_NO_TSS
-	help
-          This option allows trapping of rare doublefault exceptions that
-          would otherwise cause a system to silently reboot. Disabling this
-          option saves about 4k and might cause you much additional grey
-          hair.
+	  Say Y here to experiment with turning CPUs off and on, and to
+	  enable suspend on SMP systems. CPUs can be controlled through
+	  /sys/devices/system/cpu.
 
 endmenu
 
-config ARCH_ENABLE_MEMORY_HOTPLUG
-	def_bool y
-	depends on HIGHMEM
 
 menu "Power management options (ACPI, APM)"
 	depends on !(X86_VOYAGER || XEN_UNPRIVILEGED_GUEST)
diff --git a/linux-2.6-xen-sparse/arch/i386/Kconfig.cpu b/linux-2.6-xen-sparse/arch/i386/Kconfig.cpu
index a52d7d494f..f90d8d0df1 100644
--- a/linux-2.6-xen-sparse/arch/i386/Kconfig.cpu
+++ b/linux-2.6-xen-sparse/arch/i386/Kconfig.cpu
@@ -311,5 +311,5 @@ config X86_OOSTORE
 
 config X86_TSC
 	bool
-	depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ
+	depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) && !X86_NUMAQ
 	default y
diff --git a/linux-2.6-xen-sparse/arch/i386/Kconfig.debug b/linux-2.6-xen-sparse/arch/i386/Kconfig.debug
new file mode 100644
index 0000000000..fb28fe7418
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/i386/Kconfig.debug
@@ -0,0 +1,94 @@
+menu "Kernel hacking"
+
+source "lib/Kconfig.debug"
+
+config EARLY_PRINTK
+	bool "Early printk" if EMBEDDED && DEBUG_KERNEL
+	default y
+	help
+	  Write kernel log output directly into the VGA buffer or to a serial
+	  port.
+
+	  This is useful for kernel debugging when your machine crashes very
+	  early before the console code is initialized. For normal operation
+	  it is not recommended because it looks ugly and doesn't cooperate
+	  with klogd/syslogd or the X server. You should normally N here,
+	  unless you want to debug such a crash.
+
+config DEBUG_STACKOVERFLOW
+	bool "Check for stack overflows"
+	depends on DEBUG_KERNEL
+	help
+	  This option will cause messages to be printed if free stack space
+	  drops below a certain limit.
+
+config DEBUG_STACK_USAGE
+	bool "Stack utilization instrumentation"
+	depends on DEBUG_KERNEL
+	help
+	  Enables the display of the minimum amount of free stack which each
+	  task has ever had available in the sysrq-T and sysrq-P debug output.
+
+	  This option will slow down process creation somewhat.
+
+config STACK_BACKTRACE_COLS
+	int "Stack backtraces per line" if DEBUG_KERNEL
+	range 1 3
+	default 2
+	help
+	  Selects how many stack backtrace entries per line to display.
+
+	  This can save screen space when displaying traces.
+
+comment "Page alloc debug is incompatible with Software Suspend on i386"
+	depends on DEBUG_KERNEL && SOFTWARE_SUSPEND
+
+config DEBUG_PAGEALLOC
+	bool "Debug page memory allocations"
+	depends on DEBUG_KERNEL && !SOFTWARE_SUSPEND && !HUGETLBFS
+	help
+	  Unmap pages from the kernel linear mapping after free_pages().
+	  This results in a large slowdown, but helps to find certain types
+	  of memory corruptions.
+
+config DEBUG_RODATA
+	bool "Write protect kernel read-only data structures"
+	depends on DEBUG_KERNEL
+	help
+	  Mark the kernel read-only data as write-protected in the pagetables,
+	  in order to catch accidental (and incorrect) writes to such const
+	  data. This option may have a slight performance impact because a
+	  portion of the kernel code won't be covered by a 2MB TLB anymore.
+	  If in doubt, say "N".
+
+config 4KSTACKS
+	bool "Use 4Kb for kernel stacks instead of 8Kb"
+	depends on DEBUG_KERNEL
+	help
+	  If you say Y here the kernel will use a 4Kb stacksize for the
+	  kernel stack attached to each process/thread. This facilitates
+	  running more threads on a system and also reduces the pressure
+	  on the VM subsystem for higher order allocations. This option
+	  will also use IRQ stacks to compensate for the reduced stackspace.
+
+config X86_FIND_SMP_CONFIG
+	bool
+	depends on X86_LOCAL_APIC || X86_VOYAGER
+	default y
+
+config X86_MPPARSE
+	bool
+	depends on X86_LOCAL_APIC && !X86_VISWS
+	default y
+
+config DOUBLEFAULT
+	default y
+	bool "Enable doublefault exception handler" if EMBEDDED
+	depends on !X86_NO_TSS
+	help
+          This option allows trapping of rare doublefault exceptions that
+          would otherwise cause a system to silently reboot. Disabling this
+          option saves about 4k and might cause you much additional grey
+          hair.
+
+endmenu
diff --git a/linux-2.6-xen-sparse/arch/i386/Makefile b/linux-2.6-xen-sparse/arch/i386/Makefile
index 99a8fa2aaa..d6d0f45999 100644
--- a/linux-2.6-xen-sparse/arch/i386/Makefile
+++ b/linux-2.6-xen-sparse/arch/i386/Makefile
@@ -39,6 +39,9 @@ include $(srctree)/arch/i386/Makefile.cpu
 
 cflags-$(CONFIG_REGPARM) += -mregparm=3
 
+# temporary until string.h is fixed
+cflags-y += -ffreestanding
+
 # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
 # a lot more stack due to the lack of sharing of stacklots:
 CFLAGS				+= $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;)
@@ -108,8 +111,8 @@ AFLAGS += $(mflags-y)
 
 boot := arch/i386/boot
 
-.PHONY: zImage bzImage compressed zlilo bzlilo \
-	zdisk bzdisk fdimage fdimage144 fdimage288 install
+PHONY += zImage bzImage compressed zlilo bzlilo \
+         zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install
 
 ifdef CONFIG_XEN
 CPPFLAGS := -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS)
@@ -141,7 +144,7 @@ zlilo bzlilo: vmlinux
 zdisk bzdisk: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zdisk
 
-fdimage fdimage144 fdimage288: vmlinux
+fdimage fdimage144 fdimage288 isoimage: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
 
 install:
@@ -159,7 +162,10 @@ define archhelp
   echo  '		   install to $$(INSTALL_PATH) and run lilo'
   echo  '  bzdisk       - Create a boot floppy in /dev/fd0'
   echo  '  fdimage      - Create a boot floppy image'
+  echo  '  isoimage     - Create a boot CD-ROM image'
 endef
 
-CLEAN_FILES += arch/$(ARCH)/boot/fdimage arch/$(ARCH)/boot/mtools.conf
+CLEAN_FILES += arch/$(ARCH)/boot/fdimage \
+	       arch/$(ARCH)/boot/image.iso \
+	       arch/$(ARCH)/boot/mtools.conf
 CLEAN_FILES += vmlinuz vmlinux-stripped
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/Makefile b/linux-2.6-xen-sparse/arch/i386/kernel/Makefile
index d0ccf7b375..ddaa19d896 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/Makefile
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/Makefile
@@ -6,8 +6,8 @@ extra-y := head.o init_task.o vmlinux.lds
 
 obj-y	:= process.o semaphore.o signal.o entry.o traps.o irq.o \
 		ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
-		pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
-		quirks.o i8237.o topology.o
+		pci-dma.o i386_ksyms.o i387.o bootflag.o \
+		quirks.o i8237.o topology.o alternative.o
 
 obj-y				+= cpu/
 obj-y				+= timers/
@@ -37,7 +37,6 @@ obj-$(CONFIG_EFI) 		+= efi.o efi_stub.o
 obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault.o
 obj-$(CONFIG_VM86)		+= vm86.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
-obj-$(CONFIG_SMP_ALTERNATIVES)	+= smpalts.o
 
 EXTRA_AFLAGS   := -traditional
 
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/acpi/boot-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/acpi/boot-xen.c
index 96e452577d..08e8352ea4 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/acpi/boot-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/acpi/boot-xen.c
@@ -671,10 +671,10 @@ unsigned long __init acpi_find_rsdp(void)
 	unsigned long rsdp_phys = 0;
 
 	if (efi_enabled) {
-		if (efi.acpi20)
-			return __pa(efi.acpi20);
-		else if (efi.acpi)
-			return __pa(efi.acpi);
+		if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
+			return efi.acpi20;
+		else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
+			return efi.acpi;
 	}
 	/*
 	 * Scan memory looking for the RSDP signature. First search EBDA (low
@@ -696,6 +696,9 @@ static int __init acpi_parse_madt_lapic_entries(void)
 {
 	int count;
 
+	if (!cpu_has_apic)
+		return -ENODEV;
+
 	/* 
 	 * Note that the LAPIC address is obtained from the MADT (32-bit value)
 	 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
@@ -754,6 +757,9 @@ static int __init acpi_parse_madt_ioapic_entries(void)
 		return -ENODEV;
 	}
 
+	if (!cpu_has_apic)
+		return -ENODEV;
+
 	/*
 	 * if "noapic" boot option, don't look for IO-APICs
 	 */
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/apic-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/apic-xen.c
index 07a0994307..1f8297bc65 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/apic-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/apic-xen.c
@@ -38,6 +38,7 @@
 #include <asm/i8253.h>
 
 #include <mach_apic.h>
+#include <mach_apicdef.h>
 #include <mach_ipi.h>
 
 #include "io_ports.h"
@@ -60,6 +61,18 @@ int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
  */
 int apic_verbosity;
 
+int modern_apic(void)
+{
+	unsigned int lvr, version;
+	/* AMD systems use old APIC versions, so check the CPU */
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+		boot_cpu_data.x86 >= 0xf)
+		return 1;
+	lvr = apic_read(APIC_LVR);
+	version = GET_APIC_VERSION(lvr);
+	return version >= 0x14;
+}
+
 /*
  * 'what should we do if we get a hw irq event on an illegal vector'.
  * each architecture has to answer this themselves.
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c
index ffcee15c86..4d1a486dee 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c
@@ -15,6 +15,10 @@
 #include <asm/mpspec.h>
 #include <asm/apic.h>
 #include <mach_apic.h>
+#else
+#ifdef CONFIG_XEN
+#define phys_pkg_id(a,b) a
+#endif
 #endif
 #include <asm/hypervisor.h>
 
@@ -28,9 +32,10 @@ DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
 EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
 #endif
 
-static int cachesize_override __devinitdata = -1;
-static int disable_x86_fxsr __devinitdata = 0;
-static int disable_x86_serial_nr __devinitdata = 1;
+static int cachesize_override __cpuinitdata = -1;
+static int disable_x86_fxsr __cpuinitdata;
+static int disable_x86_serial_nr __cpuinitdata = 1;
+static int disable_x86_sep __cpuinitdata;
 
 struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
 
@@ -62,7 +67,7 @@ static int __init cachesize_setup(char *str)
 }
 __setup("cachesize=", cachesize_setup);
 
-int __devinit get_model_name(struct cpuinfo_x86 *c)
+int __cpuinit get_model_name(struct cpuinfo_x86 *c)
 {
 	unsigned int *v;
 	char *p, *q;
@@ -92,7 +97,7 @@ int __devinit get_model_name(struct cpuinfo_x86 *c)
 }
 
 
-void __devinit display_cacheinfo(struct cpuinfo_x86 *c)
+void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
 {
 	unsigned int n, dummy, ecx, edx, l2size;
 
@@ -133,7 +138,7 @@ void __devinit display_cacheinfo(struct cpuinfo_x86 *c)
 /* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
 
 /* Look up CPU names by table lookup. */
-static char __devinit *table_lookup_model(struct cpuinfo_x86 *c)
+static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
 {
 	struct cpu_model_info *info;
 
@@ -154,7 +159,7 @@ static char __devinit *table_lookup_model(struct cpuinfo_x86 *c)
 }
 
 
-static void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
+static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
 {
 	char *v = c->x86_vendor_id;
 	int i;
@@ -190,6 +195,14 @@ static int __init x86_fxsr_setup(char * s)
 __setup("nofxsr", x86_fxsr_setup);
 
 
+static int __init x86_sep_setup(char * s)
+{
+	disable_x86_sep = 1;
+	return 1;
+}
+__setup("nosep", x86_sep_setup);
+
+
 /* Standard macro to see if a specific flag is changeable */
 static inline int flag_is_changeable_p(u32 flag)
 {
@@ -213,7 +226,7 @@ static inline int flag_is_changeable_p(u32 flag)
 
 
 /* Probe for the CPUID instruction */
-static int __devinit have_cpuid_p(void)
+static int __cpuinit have_cpuid_p(void)
 {
 	return flag_is_changeable_p(X86_EFLAGS_ID);
 }
@@ -257,10 +270,10 @@ static void __init early_cpu_detect(void)
 	}
 }
 
-void __devinit generic_identify(struct cpuinfo_x86 * c)
+void __cpuinit generic_identify(struct cpuinfo_x86 * c)
 {
 	u32 tfms, xlvl;
-	int junk;
+	int ebx;
 
 	if (have_cpuid_p()) {
 		/* Get vendor name */
@@ -276,7 +289,7 @@ void __devinit generic_identify(struct cpuinfo_x86 * c)
 		/* Intel-defined flags: level 0x00000001 */
 		if ( c->cpuid_level >= 0x00000001 ) {
 			u32 capability, excap;
-			cpuid(0x00000001, &tfms, &junk, &excap, &capability);
+			cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
 			c->x86_capability[0] = capability;
 			c->x86_capability[4] = excap;
 			c->x86 = (tfms >> 8) & 15;
@@ -286,6 +299,11 @@ void __devinit generic_identify(struct cpuinfo_x86 * c)
 			if (c->x86 >= 0x6)
 				c->x86_model += ((tfms >> 16) & 0xF) << 4;
 			c->x86_mask = tfms & 15;
+#ifdef CONFIG_SMP
+			c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
+#else
+			c->apicid = (ebx >> 24) & 0xFF;
+#endif
 		} else {
 			/* Have CPUID level 0 only - unheard of */
 			c->x86 = 4;
@@ -310,7 +328,7 @@ void __devinit generic_identify(struct cpuinfo_x86 * c)
 #endif
 }
 
-static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
 {
 	if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
 		/* Disable processor serial number */
@@ -338,7 +356,7 @@ __setup("serialnumber", x86_serial_nr_setup);
 /*
  * This does the hard work of actually picking apart the CPU stuff...
  */
-void __devinit identify_cpu(struct cpuinfo_x86 *c)
+void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 {
 	int i;
 
@@ -408,6 +426,10 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
 		clear_bit(X86_FEATURE_XMM, c->x86_capability);
 	}
 
+	/* SEP disabled? */
+	if (disable_x86_sep)
+		clear_bit(X86_FEATURE_SEP, c->x86_capability);
+
 	if (disable_pse)
 		clear_bit(X86_FEATURE_PSE, c->x86_capability);
 
@@ -420,7 +442,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
 		else
 			/* Last resort... */
 			sprintf(c->x86_model_id, "%02x/%02x",
-				c->x86_vendor, c->x86_model);
+				c->x86, c->x86_model);
 	}
 
 	/* Now the feature flags better reflect actual CPU features! */
@@ -456,7 +478,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
 }
 
 #ifdef CONFIG_X86_HT
-void __devinit detect_ht(struct cpuinfo_x86 *c)
+void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 {
 	u32 	eax, ebx, ecx, edx;
 	int 	index_msb, core_bits;
@@ -464,7 +486,6 @@ void __devinit detect_ht(struct cpuinfo_x86 *c)
 
 	cpuid(1, &eax, &ebx, &ecx, &edx);
 
-	c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
 
 	if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
 		return;
@@ -503,7 +524,7 @@ void __devinit detect_ht(struct cpuinfo_x86 *c)
 }
 #endif
 
-void __devinit print_cpu_info(struct cpuinfo_x86 *c)
+void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 {
 	char *vendor = NULL;
 
@@ -703,7 +724,7 @@ void __cpuinit cpu_init(void)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-void __devinit cpu_uninit(void)
+void __cpuinit cpu_uninit(void)
 {
 	int cpu = raw_smp_processor_id();
 	cpu_clear(cpu, cpu_initialized);
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/cpu/intel_cacheinfo-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/cpu/intel_cacheinfo-xen.c
new file mode 100644
index 0000000000..f549dd2af6
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/cpu/intel_cacheinfo-xen.c
@@ -0,0 +1,693 @@
+/*
+ *      Routines to indentify caches on Intel CPU.
+ *
+ *      Changes:
+ *      Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
+ *		Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
+ */
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/compiler.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+
+#include <asm/processor.h>
+#include <asm/smp.h>
+
+#define LVL_1_INST	1
+#define LVL_1_DATA	2
+#define LVL_2		3
+#define LVL_3		4
+#define LVL_TRACE	5
+
+struct _cache_table
+{
+	unsigned char descriptor;
+	char cache_type;
+	short size;
+};
+
+/* all the cache descriptor types we care about (no TLB or trace cache entries) */
+static struct _cache_table cache_table[] __cpuinitdata =
+{
+	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
+	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x23, LVL_3,      1024 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x25, LVL_3,      2048 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x29, LVL_3,      4096 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
+	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
+	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
+	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
+	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
+	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x44, LVL_2,      1024 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x45, LVL_2,      2048 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x46, LVL_3,      4096 },	/* 4-way set assoc, 64 byte line size */
+	{ 0x47, LVL_3,      8192 },	/* 8-way set assoc, 64 byte line size */
+	{ 0x49, LVL_3,      4096 },	/* 16-way set assoc, 64 byte line size */
+	{ 0x4a, LVL_3,      6144 },	/* 12-way set assoc, 64 byte line size */
+	{ 0x4b, LVL_3,      8192 },	/* 16-way set assoc, 64 byte line size */
+	{ 0x4c, LVL_3,     12288 },	/* 12-way set assoc, 64 byte line size */
+	{ 0x4d, LVL_3,     16384 },	/* 16-way set assoc, 64 byte line size */
+	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
+	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
+	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
+	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
+	{ 0x78, LVL_2,    1024 },	/* 4-way set assoc, 64 byte line size */
+	{ 0x79, LVL_2,     128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x7a, LVL_2,     256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x7b, LVL_2,     512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x7c, LVL_2,    1024 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x7d, LVL_2,    2048 },	/* 8-way set assoc, 64 byte line size */
+	{ 0x7f, LVL_2,     512 },	/* 2-way set assoc, 64 byte line size */
+	{ 0x82, LVL_2,     256 },	/* 8-way set assoc, 32 byte line size */
+	{ 0x83, LVL_2,     512 },	/* 8-way set assoc, 32 byte line size */
+	{ 0x84, LVL_2,    1024 },	/* 8-way set assoc, 32 byte line size */
+	{ 0x85, LVL_2,    2048 },	/* 8-way set assoc, 32 byte line size */
+	{ 0x86, LVL_2,     512 },	/* 4-way set assoc, 64 byte line size */
+	{ 0x87, LVL_2,    1024 },	/* 8-way set assoc, 64 byte line size */
+	{ 0x00, 0, 0}
+};
+
+
+enum _cache_type
+{
+	CACHE_TYPE_NULL	= 0,
+	CACHE_TYPE_DATA = 1,
+	CACHE_TYPE_INST = 2,
+	CACHE_TYPE_UNIFIED = 3
+};
+
+union _cpuid4_leaf_eax {
+	struct {
+		enum _cache_type	type:5;
+		unsigned int		level:3;
+		unsigned int		is_self_initializing:1;
+		unsigned int		is_fully_associative:1;
+		unsigned int		reserved:4;
+		unsigned int		num_threads_sharing:12;
+		unsigned int		num_cores_on_die:6;
+	} split;
+	u32 full;
+};
+
+union _cpuid4_leaf_ebx {
+	struct {
+		unsigned int		coherency_line_size:12;
+		unsigned int		physical_line_partition:10;
+		unsigned int		ways_of_associativity:10;
+	} split;
+	u32 full;
+};
+
+union _cpuid4_leaf_ecx {
+	struct {
+		unsigned int		number_of_sets:32;
+	} split;
+	u32 full;
+};
+
+struct _cpuid4_info {
+	union _cpuid4_leaf_eax eax;
+	union _cpuid4_leaf_ebx ebx;
+	union _cpuid4_leaf_ecx ecx;
+	unsigned long size;
+	cpumask_t shared_cpu_map;
+};
+
+static unsigned short			num_cache_leaves;
+
+static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
+{
+	unsigned int		eax, ebx, ecx, edx;
+	union _cpuid4_leaf_eax	cache_eax;
+
+	cpuid_count(4, index, &eax, &ebx, &ecx, &edx);
+	cache_eax.full = eax;
+	if (cache_eax.split.type == CACHE_TYPE_NULL)
+		return -EIO; /* better error ? */
+
+	this_leaf->eax.full = eax;
+	this_leaf->ebx.full = ebx;
+	this_leaf->ecx.full = ecx;
+	this_leaf->size = (this_leaf->ecx.split.number_of_sets + 1) *
+		(this_leaf->ebx.split.coherency_line_size + 1) *
+		(this_leaf->ebx.split.physical_line_partition + 1) *
+		(this_leaf->ebx.split.ways_of_associativity + 1);
+	return 0;
+}
+
+/* will only be called once; __init is safe here */
+static int __init find_num_cache_leaves(void)
+{
+	unsigned int		eax, ebx, ecx, edx;
+	union _cpuid4_leaf_eax	cache_eax;
+	int 			i = -1;
+
+	do {
+		++i;
+		/* Do cpuid(4) loop to find out num_cache_leaves */
+		cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
+		cache_eax.full = eax;
+	} while (cache_eax.split.type != CACHE_TYPE_NULL);
+	return i;
+}
+
+unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
+{
+	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
+	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
+	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
+	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
+#ifdef CONFIG_SMP
+#ifndef CONFIG_XEN
+	unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data);
+#endif
+#endif
+
+	if (c->cpuid_level > 3) {
+		static int is_initialized;
+
+		if (is_initialized == 0) {
+			/* Init num_cache_leaves from boot CPU */
+			num_cache_leaves = find_num_cache_leaves();
+			is_initialized++;
+		}
+
+		/*
+		 * Whenever possible use cpuid(4), deterministic cache
+		 * parameters cpuid leaf to find the cache details
+		 */
+		for (i = 0; i < num_cache_leaves; i++) {
+			struct _cpuid4_info this_leaf;
+
+			int retval;
+
+			retval = cpuid4_cache_lookup(i, &this_leaf);
+			if (retval >= 0) {
+				switch(this_leaf.eax.split.level) {
+				    case 1:
+					if (this_leaf.eax.split.type ==
+							CACHE_TYPE_DATA)
+						new_l1d = this_leaf.size/1024;
+					else if (this_leaf.eax.split.type ==
+							CACHE_TYPE_INST)
+						new_l1i = this_leaf.size/1024;
+					break;
+				    case 2:
+					new_l2 = this_leaf.size/1024;
+					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
+					index_msb = get_count_order(num_threads_sharing);
+					l2_id = c->apicid >> index_msb;
+					break;
+				    case 3:
+					new_l3 = this_leaf.size/1024;
+					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
+					index_msb = get_count_order(num_threads_sharing);
+					l3_id = c->apicid >> index_msb;
+					break;
+				    default:
+					break;
+				}
+			}
+		}
+	}
+	/*
+	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
+	 * trace cache
+	 */
+	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
+		/* supports eax=2  call */
+		int i, j, n;
+		int regs[4];
+		unsigned char *dp = (unsigned char *)regs;
+		int only_trace = 0;
+
+		if (num_cache_leaves != 0 && c->x86 == 15)
+			only_trace = 1;
+
+		/* Number of times to iterate */
+		n = cpuid_eax(2) & 0xFF;
+
+		for ( i = 0 ; i < n ; i++ ) {
+			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
+
+			/* If bit 31 is set, this is an unknown format */
+			for ( j = 0 ; j < 3 ; j++ ) {
+				if ( regs[j] < 0 ) regs[j] = 0;
+			}
+
+			/* Byte 0 is level count, not a descriptor */
+			for ( j = 1 ; j < 16 ; j++ ) {
+				unsigned char des = dp[j];
+				unsigned char k = 0;
+
+				/* look up this descriptor in the table */
+				while (cache_table[k].descriptor != 0)
+				{
+					if (cache_table[k].descriptor == des) {
+						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
+							break;
+						switch (cache_table[k].cache_type) {
+						case LVL_1_INST:
+							l1i += cache_table[k].size;
+							break;
+						case LVL_1_DATA:
+							l1d += cache_table[k].size;
+							break;
+						case LVL_2:
+							l2 += cache_table[k].size;
+							break;
+						case LVL_3:
+							l3 += cache_table[k].size;
+							break;
+						case LVL_TRACE:
+							trace += cache_table[k].size;
+							break;
+						}
+
+						break;
+					}
+
+					k++;
+				}
+			}
+		}
+	}
+
+	if (new_l1d)
+		l1d = new_l1d;
+
+	if (new_l1i)
+		l1i = new_l1i;
+
+	if (new_l2) {
+		l2 = new_l2;
+#ifdef CONFIG_SMP
+#ifndef CONFIG_XEN
+		cpu_llc_id[cpu] = l2_id;
+#endif
+#endif
+	}
+
+	if (new_l3) {
+		l3 = new_l3;
+#ifdef CONFIG_SMP
+#ifndef CONFIG_XEN
+		cpu_llc_id[cpu] = l3_id;
+#endif
+#endif
+	}
+
+	if (trace)
+		printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
+	else if ( l1i )
+		printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
+
+	if (l1d)
+		printk(", L1 D cache: %dK\n", l1d);
+	else
+		printk("\n");
+
+	if (l2)
+		printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
+
+	if (l3)
+		printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
+
+	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
+
+	return l2;
+}
+
+/* pointer to _cpuid4_info array (for each cache leaf) */
+static struct _cpuid4_info *cpuid4_info[NR_CPUS];
+#define CPUID4_INFO_IDX(x,y)    (&((cpuid4_info[x])[y]))
+
+#ifdef CONFIG_SMP
+static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
+{
+	struct _cpuid4_info	*this_leaf, *sibling_leaf;
+	unsigned long num_threads_sharing;
+	int index_msb, i;
+	struct cpuinfo_x86 *c = cpu_data;
+
+	this_leaf = CPUID4_INFO_IDX(cpu, index);
+	num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
+
+	if (num_threads_sharing == 1)
+		cpu_set(cpu, this_leaf->shared_cpu_map);
+	else {
+		index_msb = get_count_order(num_threads_sharing);
+
+		for_each_online_cpu(i) {
+			if (c[i].apicid >> index_msb ==
+			    c[cpu].apicid >> index_msb) {
+				cpu_set(i, this_leaf->shared_cpu_map);
+				if (i != cpu && cpuid4_info[i])  {
+					sibling_leaf = CPUID4_INFO_IDX(i, index);
+					cpu_set(cpu, sibling_leaf->shared_cpu_map);
+				}
+			}
+		}
+	}
+}
+static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
+{
+	struct _cpuid4_info	*this_leaf, *sibling_leaf;
+	int sibling;
+
+	this_leaf = CPUID4_INFO_IDX(cpu, index);
+	for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) {
+		sibling_leaf = CPUID4_INFO_IDX(sibling, index);	
+		cpu_clear(cpu, sibling_leaf->shared_cpu_map);
+	}
+}
+#else
+static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
+static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {}
+#endif
+
+static void free_cache_attributes(unsigned int cpu)
+{
+	kfree(cpuid4_info[cpu]);
+	cpuid4_info[cpu] = NULL;
+}
+
+static int __cpuinit detect_cache_attributes(unsigned int cpu)
+{
+	struct _cpuid4_info	*this_leaf;
+	unsigned long 		j;
+	int 			retval;
+	cpumask_t		oldmask;
+
+	if (num_cache_leaves == 0)
+		return -ENOENT;
+
+	cpuid4_info[cpu] = kmalloc(
+	    sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
+	if (unlikely(cpuid4_info[cpu] == NULL))
+		return -ENOMEM;
+	memset(cpuid4_info[cpu], 0,
+	    sizeof(struct _cpuid4_info) * num_cache_leaves);
+
+	oldmask = current->cpus_allowed;
+	retval = set_cpus_allowed(current, cpumask_of_cpu(cpu));
+	if (retval)
+		goto out;
+
+	/* Do cpuid and store the results */
+	retval = 0;
+	for (j = 0; j < num_cache_leaves; j++) {
+		this_leaf = CPUID4_INFO_IDX(cpu, j);
+		retval = cpuid4_cache_lookup(j, this_leaf);
+		if (unlikely(retval < 0))
+			break;
+		cache_shared_cpu_map_setup(cpu, j);
+	}
+	set_cpus_allowed(current, oldmask);
+
+out:
+	if (retval)
+		free_cache_attributes(cpu);
+	return retval;
+}
+
+#ifdef CONFIG_SYSFS
+
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
+
+/* pointer to kobject for cpuX/cache */
+static struct kobject * cache_kobject[NR_CPUS];
+
+struct _index_kobject {
+	struct kobject kobj;
+	unsigned int cpu;
+	unsigned short index;
+};
+
+/* pointer to array of kobjects for cpuX/cache/indexY */
+static struct _index_kobject *index_kobject[NR_CPUS];
+#define INDEX_KOBJECT_PTR(x,y)    (&((index_kobject[x])[y]))
+
+#define show_one_plus(file_name, object, val)				\
+static ssize_t show_##file_name						\
+			(struct _cpuid4_info *this_leaf, char *buf)	\
+{									\
+	return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \
+}
+
+show_one_plus(level, eax.split.level, 0);
+show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
+show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
+show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
+show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
+
+static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
+{
+	return sprintf (buf, "%luK\n", this_leaf->size / 1024);
+}
+
+static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf)
+{
+	char mask_str[NR_CPUS];
+	cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
+	return sprintf(buf, "%s\n", mask_str);
+}
+
+static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
+	switch(this_leaf->eax.split.type) {
+	    case CACHE_TYPE_DATA:
+		return sprintf(buf, "Data\n");
+		break;
+	    case CACHE_TYPE_INST:
+		return sprintf(buf, "Instruction\n");
+		break;
+	    case CACHE_TYPE_UNIFIED:
+		return sprintf(buf, "Unified\n");
+		break;
+	    default:
+		return sprintf(buf, "Unknown\n");
+		break;
+	}
+}
+
+struct _cache_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct _cpuid4_info *, char *);
+	ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
+};
+
+#define define_one_ro(_name) \
+static struct _cache_attr _name = \
+	__ATTR(_name, 0444, show_##_name, NULL)
+
+define_one_ro(level);
+define_one_ro(type);
+define_one_ro(coherency_line_size);
+define_one_ro(physical_line_partition);
+define_one_ro(ways_of_associativity);
+define_one_ro(number_of_sets);
+define_one_ro(size);
+define_one_ro(shared_cpu_map);
+
+static struct attribute * default_attrs[] = {
+	&type.attr,
+	&level.attr,
+	&coherency_line_size.attr,
+	&physical_line_partition.attr,
+	&ways_of_associativity.attr,
+	&number_of_sets.attr,
+	&size.attr,
+	&shared_cpu_map.attr,
+	NULL
+};
+
+#define to_object(k) container_of(k, struct _index_kobject, kobj)
+#define to_attr(a) container_of(a, struct _cache_attr, attr)
+
+static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
+{
+	struct _cache_attr *fattr = to_attr(attr);
+	struct _index_kobject *this_leaf = to_object(kobj);
+	ssize_t ret;
+
+	ret = fattr->show ?
+		fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
+			buf) :
+	       	0;
+	return ret;
+}
+
+static ssize_t store(struct kobject * kobj, struct attribute * attr,
+		     const char * buf, size_t count)
+{
+	return 0;
+}
+
+static struct sysfs_ops sysfs_ops = {
+	.show   = show,
+	.store  = store,
+};
+
+static struct kobj_type ktype_cache = {
+	.sysfs_ops	= &sysfs_ops,
+	.default_attrs	= default_attrs,
+};
+
+static struct kobj_type ktype_percpu_entry = {
+	.sysfs_ops	= &sysfs_ops,
+};
+
+static void cpuid4_cache_sysfs_exit(unsigned int cpu)
+{
+	kfree(cache_kobject[cpu]);
+	kfree(index_kobject[cpu]);
+	cache_kobject[cpu] = NULL;
+	index_kobject[cpu] = NULL;
+	free_cache_attributes(cpu);
+}
+
+static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
+{
+
+	if (num_cache_leaves == 0)
+		return -ENOENT;
+
+	detect_cache_attributes(cpu);
+	if (cpuid4_info[cpu] == NULL)
+		return -ENOENT;
+
+	/* Allocate all required memory */
+	cache_kobject[cpu] = kmalloc(sizeof(struct kobject), GFP_KERNEL);
+	if (unlikely(cache_kobject[cpu] == NULL))
+		goto err_out;
+	memset(cache_kobject[cpu], 0, sizeof(struct kobject));
+
+	index_kobject[cpu] = kmalloc(
+	    sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL);
+	if (unlikely(index_kobject[cpu] == NULL))
+		goto err_out;
+	memset(index_kobject[cpu], 0,
+	    sizeof(struct _index_kobject) * num_cache_leaves);
+
+	return 0;
+
+err_out:
+	cpuid4_cache_sysfs_exit(cpu);
+	return -ENOMEM;
+}
+
+/* Add/Remove cache interface for CPU device */
+static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
+{
+	unsigned int cpu = sys_dev->id;
+	unsigned long i, j;
+	struct _index_kobject *this_object;
+	int retval = 0;
+
+	retval = cpuid4_cache_sysfs_init(cpu);
+	if (unlikely(retval < 0))
+		return retval;
+
+	cache_kobject[cpu]->parent = &sys_dev->kobj;
+	kobject_set_name(cache_kobject[cpu], "%s", "cache");
+	cache_kobject[cpu]->ktype = &ktype_percpu_entry;
+	retval = kobject_register(cache_kobject[cpu]);
+
+	for (i = 0; i < num_cache_leaves; i++) {
+		this_object = INDEX_KOBJECT_PTR(cpu,i);
+		this_object->cpu = cpu;
+		this_object->index = i;
+		this_object->kobj.parent = cache_kobject[cpu];
+		kobject_set_name(&(this_object->kobj), "index%1lu", i);
+		this_object->kobj.ktype = &ktype_cache;
+		retval = kobject_register(&(this_object->kobj));
+		if (unlikely(retval)) {
+			for (j = 0; j < i; j++) {
+				kobject_unregister(
+					&(INDEX_KOBJECT_PTR(cpu,j)->kobj));
+			}
+			kobject_unregister(cache_kobject[cpu]);
+			cpuid4_cache_sysfs_exit(cpu);
+			break;
+		}
+	}
+	return retval;
+}
+
+static void __cpuexit cache_remove_dev(struct sys_device * sys_dev)
+{
+	unsigned int cpu = sys_dev->id;
+	unsigned long i;
+
+	for (i = 0; i < num_cache_leaves; i++) {
+		cache_remove_shared_cpu_map(cpu, i);
+		kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
+	}
+	kobject_unregister(cache_kobject[cpu]);
+	cpuid4_cache_sysfs_exit(cpu);
+	return;
+}
+
+static int cacheinfo_cpu_callback(struct notifier_block *nfb,
+					unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct sys_device *sys_dev;
+
+	sys_dev = get_cpu_sysdev(cpu);
+	switch (action) {
+	case CPU_ONLINE:
+		cache_add_dev(sys_dev);
+		break;
+	case CPU_DEAD:
+		cache_remove_dev(sys_dev);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block cacheinfo_cpu_notifier =
+{
+    .notifier_call = cacheinfo_cpu_callback,
+};
+
+static int __cpuinit cache_sysfs_init(void)
+{
+	int i;
+
+	if (num_cache_leaves == 0)
+		return 0;
+
+	register_cpu_notifier(&cacheinfo_cpu_notifier);
+
+	for_each_online_cpu(i) {
+		cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE,
+			(void *)(long)i);
+	}
+
+	return 0;
+}
+
+device_initcall(cache_sysfs_init);
+
+#endif
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/cpu/mtrr/main-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/cpu/mtrr/main-xen.c
index 06720038ff..380e20ec5b 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/cpu/mtrr/main-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/cpu/mtrr/main-xen.c
@@ -4,11 +4,12 @@
 #include <linux/module.h>
 #include <linux/seq_file.h>
 #include <asm/uaccess.h>
+#include <linux/mutex.h>
 
 #include <asm/mtrr.h>
 #include "mtrr.h"
 
-static DECLARE_MUTEX(mtrr_sem);
+static DEFINE_MUTEX(mtrr_mutex);
 
 void generic_get_mtrr(unsigned int reg, unsigned long *base,
 		      unsigned int *size, mtrr_type * type)
@@ -65,7 +66,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
 	int error;
 	dom0_op_t op;
 
-	down(&mtrr_sem);
+	mutex_lock(&mtrr_mutex);
 
 	op.cmd = DOM0_ADD_MEMTYPE;
 	op.u.add_memtype.mfn     = base;
@@ -73,7 +74,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
 	op.u.add_memtype.type    = type;
 	error = HYPERVISOR_dom0_op(&op);
 	if (error) {
-		up(&mtrr_sem);
+		mutex_unlock(&mtrr_mutex);
 		BUG_ON(error > 0);
 		return error;
 	}
@@ -81,7 +82,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
 	if (increment)
 		++usage_table[op.u.add_memtype.reg];
 
-	up(&mtrr_sem);
+	mutex_unlock(&mtrr_mutex);
 
 	return op.u.add_memtype.reg;
 }
@@ -118,7 +119,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
 	int error = -EINVAL;
 	dom0_op_t op;
 
-	down(&mtrr_sem);
+	mutex_lock(&mtrr_mutex);
 
 	if (reg < 0) {
 		/*  Search for existing MTRR  */
@@ -151,7 +152,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
 	}
 	error = reg;
  out:
-	up(&mtrr_sem);
+	mutex_unlock(&mtrr_mutex);
 	return error;
 }
 
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/crash.c b/linux-2.6-xen-sparse/arch/i386/kernel/crash.c
index 75aab8ef35..4b6698c7aa 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/crash.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/crash.c
@@ -69,7 +69,7 @@ static void crash_save_this_cpu(struct pt_regs *regs, int cpu)
 	 * for the data I pass, and I need tags
 	 * on the data to indicate what information I have
 	 * squirrelled away.  ELF notes happen to provide
-	 * all of that that no need to invent something new.
+	 * all of that, so there is no need to invent something new.
 	 */
 	buf = (u32*)per_cpu_ptr(crash_notes, cpu);
 	if (!buf)
@@ -106,7 +106,7 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
 		return 1;
 	local_irq_disable();
 
-	if (!user_mode(regs)) {
+	if (!user_mode_vm(regs)) {
 		crash_fixup_ss_esp(&fixed_regs, regs);
 		regs = &fixed_regs;
 	}
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S b/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S
index 8c5ace45ef..75654a1c84 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S
@@ -271,6 +271,10 @@ ENTRY(system_call)
 	pushl %eax			# save orig_eax
 	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
+	testl $TF_MASK,EFLAGS(%esp)
+	jz no_singlestep
+	orl $_TIF_SINGLESTEP,TI_flags(%ebp)
+no_singlestep:
 					# system call tracing in operation / emulation
 	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
 	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S b/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S
index 1d0278c69b..edf86d6fb1 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S
@@ -7,6 +7,7 @@
 #include <linux/linkage.h>
 #include <asm/segment.h>
 #include <asm/page.h>
+#include <asm/cache.h>
 #include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
 #include <xen/interface/xen.h>
@@ -92,6 +93,7 @@ ENTRY(empty_zero_page)
 /*
  * The Global Descriptor Table contains 28 quadwords, per-CPU.
  */
+	.align L1_CACHE_BYTES
 ENTRY(cpu_gdt_table)
 	.quad 0x0000000000000000	/* NULL descriptor */
 	.quad 0x0000000000000000	/* 0x0b reserved */
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c
index 844d6e9ad9..4b26c5bf8a 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c
@@ -394,8 +394,8 @@ static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
 {
 	int i, j;
 	Dprintk("Rotating IRQs among CPUs.\n");
-	for (i = 0; i < NR_CPUS; i++) {
-		for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
+	for_each_online_cpu(i) {
+		for (j = 0; j < NR_IRQS; j++) {
 			if (!irq_desc[j].action)
 				continue;
 			/* Is it a significant load ?  */
@@ -424,7 +424,7 @@ static void do_irq_balance(void)
 	unsigned long imbalance = 0;
 	cpumask_t allowed_mask, target_cpu_mask, tmp;
 
-	for (i = 0; i < NR_CPUS; i++) {
+	for_each_possible_cpu(i) {
 		int package_index;
 		CPU_IRQ(i) = 0;
 		if (!cpu_online(i))
@@ -465,9 +465,7 @@ static void do_irq_balance(void)
 		}
 	}
 	/* Find the least loaded processor package */
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!cpu_online(i))
-			continue;
+	for_each_online_cpu(i) {
 		if (i != CPU_TO_PACKAGEINDEX(i))
 			continue;
 		if (min_cpu_irq > CPU_IRQ(i)) {
@@ -484,9 +482,7 @@ tryanothercpu:
 	 */
 	tmp_cpu_irq = 0;
 	tmp_loaded = -1;
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!cpu_online(i))
-			continue;
+	for_each_online_cpu(i) {
 		if (i != CPU_TO_PACKAGEINDEX(i))
 			continue;
 		if (max_cpu_irq <= CPU_IRQ(i)) 
@@ -662,9 +658,7 @@ static int __init balanced_irq_init(void)
 	if (smp_num_siblings > 1 && !cpus_empty(tmp))
 		physical_balance = 1;
 
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!cpu_online(i))
-			continue;
+	for_each_online_cpu(i) {
 		irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
 		irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
 		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
@@ -681,9 +675,11 @@ static int __init balanced_irq_init(void)
 	else 
 		printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
 failed:
-	for (i = 0; i < NR_CPUS; i++) {
+	for_each_possible_cpu(i) {
 		kfree(irq_cpu_data[i].irq_delta);
+		irq_cpu_data[i].irq_delta = NULL;
 		kfree(irq_cpu_data[i].last_irq);
+		irq_cpu_data[i].last_irq = NULL;
 	}
 	return 0;
 }
@@ -691,7 +687,7 @@ failed:
 int __init irqbalance_disable(char *str)
 {
 	irqbalance_disabled = 1;
-	return 0;
+	return 1;
 }
 
 __setup("noirqbalance", irqbalance_disable);
@@ -1813,7 +1809,8 @@ static void __init setup_ioapic_ids_from_mpc(void)
 	 * Don't check I/O APIC IDs for xAPIC systems.  They have
 	 * no meaning without the serial APIC bus.
 	 */
-	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 < 15))
+	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
 		return;
 	/*
 	 * This is broken; anything with a real cpu count has to
@@ -2298,6 +2295,8 @@ static inline void unlock_ExtINT_logic(void)
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
+int timer_uses_ioapic_pin_0;
+
 /*
  * This code may look a bit paranoid, but it's supposed to cooperate with
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
@@ -2334,6 +2333,9 @@ static inline void check_timer(void)
 	pin2  = ioapic_i8259.pin;
 	apic2 = ioapic_i8259.apic;
 
+	if (pin1 == 0)
+		timer_uses_ioapic_pin_0 = 1;
+
 	printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
 		vector, apic1, pin1, apic2, pin2);
 
@@ -2419,6 +2421,7 @@ static inline void check_timer(void)
 		"report.  Then try booting with the 'noapic' option");
 }
 #else
+int timer_uses_ioapic_pin_0 = 0;
 #define check_timer() ((void)0)
 #endif
 
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c
index 926ba175c3..c77a40b8fd 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c
@@ -32,6 +32,7 @@
 #include <linux/miscdevice.h>
 #include <linux/spinlock.h>
 #include <linux/mm.h>
+#include <linux/mutex.h>
 #include <linux/syscalls.h>
 
 #include <asm/msr.h>
@@ -49,7 +50,7 @@ MODULE_LICENSE("GPL");
 #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */
 
 /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
-static DECLARE_MUTEX(microcode_sem);
+static DEFINE_MUTEX(microcode_mutex);
 				
 static int microcode_open (struct inode *unused1, struct file *unused2)
 {
@@ -90,37 +91,20 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_
 		return -EINVAL;
 	}
 
-	down(&microcode_sem);
+	mutex_lock(&microcode_mutex);
 
 	ret = do_microcode_update(buf, len);
 	if (!ret)
 		ret = (ssize_t)len;
 
-	up(&microcode_sem);
+	mutex_unlock(&microcode_mutex);
 
 	return ret;
 }
 
-static int microcode_ioctl (struct inode *inode, struct file *file, 
-		unsigned int cmd, unsigned long arg)
-{
-	switch (cmd) {
-		/* 
-		 *  XXX: will be removed after microcode_ctl 
-		 *  is updated to ignore failure of this ioctl()
-		 */
-		case MICROCODE_IOCFREE:
-			return 0;
-		default:
-			return -EINVAL;
-	}
-	return -EINVAL;
-}
-
 static struct file_operations microcode_fops = {
 	.owner		= THIS_MODULE,
 	.write		= microcode_write,
-	.ioctl		= microcode_ioctl,
 	.open		= microcode_open,
 };
 
@@ -151,7 +135,6 @@ static int __init microcode_init (void)
 static void __exit microcode_exit (void)
 {
 	misc_deregister(&microcode_dev);
-	printk(KERN_INFO "IA-32 Microcode Update Driver v" MICROCODE_VERSION " unregistered\n");
 }
 
 module_init(microcode_init)
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/mpparse-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/mpparse-xen.c
index f5daedb2df..48e0bc0f8a 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/mpparse-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/mpparse-xen.c
@@ -38,12 +38,6 @@
 int smp_found_config;
 unsigned int __initdata maxcpus = NR_CPUS;
 
-#ifdef CONFIG_HOTPLUG_CPU
-#define CPU_HOTPLUG_ENABLED	(1)
-#else
-#define CPU_HOTPLUG_ENABLED	(0)
-#endif
-
 /*
  * Various Linux-internal data structures created from the
  * MP-table.
@@ -110,21 +104,6 @@ static int __init mpf_checksum(unsigned char *mp, int len)
 static int mpc_record; 
 static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata;
 
-#ifdef CONFIG_X86_NUMAQ
-static int MP_valid_apicid(int apicid, int version)
-{
-	return hweight_long(apicid & 0xf) == 1 && (apicid >> 4) != 0xf;
-}
-#elif !defined(CONFIG_XEN)
-static int MP_valid_apicid(int apicid, int version)
-{
-	if (version >= 0x14)
-		return apicid < 0xff;
-	else
-		return apicid < 0xf;
-}
-#endif
-
 #ifndef CONFIG_XEN
 static void __devinit MP_processor_info (struct mpc_config_processor *m)
 {
@@ -191,12 +170,6 @@ static void __devinit MP_processor_info (struct mpc_config_processor *m)
 
 	ver = m->mpc_apicver;
 
-	if (!MP_valid_apicid(apicid, ver)) {
-		printk(KERN_WARNING "Processor #%d INVALID. (Max ID: %d).\n",
-			m->mpc_apicid, MAX_APICS);
-		return;
-	}
-
 	/*
 	 * Validate version
 	 */
@@ -226,7 +199,14 @@ static void __devinit MP_processor_info (struct mpc_config_processor *m)
 	cpu_set(num_processors, cpu_possible_map);
 	num_processors++;
 
-	if (CPU_HOTPLUG_ENABLED || (num_processors > 8)) {
+	/*
+	 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
+	 * but we need to work other dependencies like SMP_SUSPEND etc
+	 * before this can be done without some confusion.
+	 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
+	 *       - Ashok Raj <ashok.raj@intel.com>
+	 */
+	if (num_processors > 8) {
 		switch (boot_cpu_data.x86_vendor) {
 		case X86_VENDOR_INTEL:
 			if (!APIC_XAPIC(ver)) {
@@ -256,6 +236,13 @@ static void __init MP_bus_info (struct mpc_config_bus *m)
 
 	mpc_oem_bus_info(m, str, translation_table[mpc_record]);
 
+	if (m->mpc_busid >= MAX_MP_BUSSES) {
+		printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
+			" is too large, max. supported is %d\n",
+			m->mpc_busid, str, MAX_MP_BUSSES - 1);
+		return;
+	}
+
 	if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
 		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
 	} else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
@@ -844,6 +831,8 @@ void __init find_smp_config (void)
 #endif
 }
 
+int es7000_plat;
+
 /* --------------------------------------------------------------------------
                             ACPI-based MP Configuration
    -------------------------------------------------------------------------- */
@@ -957,7 +946,8 @@ void __init mp_register_ioapic (
 #ifndef CONFIG_XEN
 	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
 #endif
-	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15))
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+		&& !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
 		tmpid = io_apic_get_unique_id(idx, id);
 	else
 		tmpid = id;
@@ -1033,8 +1023,6 @@ void __init mp_override_legacy_irq (
 	return;
 }
 
-int es7000_plat;
-
 void __init mp_config_acpi_legacy_irqs (void)
 {
 	struct mpc_config_intsrc intsrc;
@@ -1164,7 +1152,17 @@ int mp_register_gsi (u32 gsi, int triggering, int polarity)
 		 */
 		int irq = gsi;
 		if (gsi < MAX_GSI_NUM) {
-			if (gsi > 15)
+			/*
+			 * Retain the VIA chipset work-around (gsi > 15), but
+			 * avoid a problem where the 8254 timer (IRQ0) is setup
+			 * via an override (so it's not on pin 0 of the ioapic),
+			 * and at the same time, the pin 0 interrupt is a PCI
+			 * type.  The gsi > 15 test could cause these two pins
+			 * to be shared as IRQ0, and they are not shareable.
+			 * So test for this condition, and if necessary, avoid
+			 * the pin collision.
+			 */
+			if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
 				gsi = pci_irq++;
 			/*
 			 * Don't assign IRQ used by ACPI SCI
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c
index 0dbc6a412e..b64dec23ec 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c
@@ -38,7 +38,6 @@
 #include <linux/kallsyms.h>
 #include <linux/ptrace.h>
 #include <linux/random.h>
-#include <linux/kprobes.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -245,7 +244,7 @@ void show_regs(struct pt_regs * regs)
 	printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
 	print_symbol("EIP is at %s\n", regs->eip);
 
-	if (user_mode(regs))
+	if (user_mode_vm(regs))
 		printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
 	printk(" EFLAGS: %08lx    %s  (%s %.*s)\n",
 	       regs->eflags, print_tainted(), system_utsname.release,
@@ -314,13 +313,6 @@ void exit_thread(void)
 	struct task_struct *tsk = current;
 	struct thread_struct *t = &tsk->thread;
 
-	/*
-	 * Remove function-return probe instances associated with this task
-	 * and put them back on the free list. Do not insert an exit probe for
-	 * this function, it will be disabled by kprobe_flush_task if you do.
-	 */
-	kprobe_flush_task(tsk);
-
 	/* The process may have allocated an io port bitmap... nuke it. */
 	if (unlikely(NULL != t->io_bitmap_ptr)) {
 		struct physdev_set_iobitmap set_iobitmap = { 0 };
@@ -731,7 +723,6 @@ unsigned long get_wchan(struct task_struct *p)
 	} while (count++ < 16);
 	return 0;
 }
-EXPORT_SYMBOL(get_wchan);
 
 /*
  * sys_alloc_thread_area: get a yet unused TLS descriptor index.
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
index 819f36f11e..63d993b58f 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
@@ -34,6 +34,7 @@
 #include <linux/initrd.h>
 #include <linux/bootmem.h>
 #include <linux/seq_file.h>
+#include <linux/platform_device.h>
 #include <linux/console.h>
 #include <linux/mca.h>
 #include <linux/root_dev.h>
@@ -49,6 +50,7 @@
 #include <linux/kexec.h>
 #include <linux/crash_dump.h>
 #include <linux/dmi.h>
+#include <linux/pfn.h>
 
 #include <video/edid.h>
 
@@ -1027,6 +1029,38 @@ efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
 	return 0;
 }
 
+ /*
+  * This function checks if the entire range <start,end> is mapped with type.
+  *
+  * Note: this function only works correct if the e820 table is sorted and
+  * not-overlapping, which is the case
+  */
+int __init
+e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
+{
+	u64 start = s;
+	u64 end = e;
+	int i;
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		if (type && ei->type != type)
+			continue;
+		/* is the region (part) in overlap with the current region ?*/
+		if (ei->addr >= end || ei->addr + ei->size <= start)
+			continue;
+		/* if the region is at the beginning of <start,end> we move
+		 * start to the end of the region since it's ok until there
+		 */
+		if (ei->addr <= start)
+			start = ei->addr + ei->size;
+		/* if start is now at or beyond end, we're done, full
+		 * coverage */
+		if (start >= end)
+			return 1; /* we're done */
+	}
+	return 0;
+}
+
 /*
  * Find the highest page frame number we have available
  */
@@ -1124,10 +1158,10 @@ static int __init
 free_available_memory(unsigned long start, unsigned long end, void *arg)
 {
 	/* check max_low_pfn */
-	if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
+	if (start >= (max_low_pfn << PAGE_SHIFT))
 		return 0;
-	if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
-		end = (max_low_pfn + 1) << PAGE_SHIFT;
+	if (end >= (max_low_pfn << PAGE_SHIFT))
+		end = max_low_pfn << PAGE_SHIFT;
 	if (start < end)
 		free_bootmem(start, end - start);
 
@@ -1380,7 +1414,7 @@ legacy_init_iomem_resources(struct e820entry *e820, int nr_map,
 		struct resource *res;
 		if (e820[i].addr + e820[i].size > 0x100000000ULL)
 			continue;
-		res = alloc_bootmem_low(sizeof(struct resource));
+		res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
 		switch (e820[i].type) {
 		case E820_RAM:	res->name = "System RAM"; break;
 		case E820_ACPI:	res->name = "ACPI Tables"; break;
@@ -1467,8 +1501,11 @@ e820_setup_gap(struct e820entry *e820, int nr_map)
 
 /*
  * Request address space for all standard resources
+ *
+ * This is called just before pcibios_init(), which is also a
+ * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
  */
-static void __init register_memory(void)
+static int __init request_standard_resources(void)
 {
 #ifdef CONFIG_XEN
 	struct xen_memory_map memmap;
@@ -1477,8 +1514,9 @@ static void __init register_memory(void)
 
 	/* Nothing to do if not running in dom0. */
 	if (!is_initial_xendomain())
-		return;
+		return 0;
 
+	printk("Setting up standard PCI resources\n");
 #ifdef CONFIG_XEN
 	memmap.nr_entries = E820MAX;
 	set_xen_guest_handle(memmap.buffer, machine_e820.map);
@@ -1503,6 +1541,13 @@ static void __init register_memory(void)
 	/* request I/O space for devices used on all i[345]86 PCs */
 	for (i = 0; i < STANDARD_IO_RESOURCES; i++)
 		request_resource(&ioport_resource, &standard_io_resources[i]);
+	return 0;
+}
+
+subsys_initcall(request_standard_resources);
+
+static void __init register_memory(void)
+{
 
 #ifdef CONFIG_XEN
 	e820_setup_gap(machine_e820.map, machine_e820.nr_map);
@@ -1511,101 +1556,6 @@ static void __init register_memory(void)
 #endif
 }
 
-/* Use inline assembly to define this because the nops are defined 
-   as inline assembly strings in the include files and we cannot 
-   get them easily into strings. */
-asm("\t.data\nintelnops: " 
-    GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
-    GENERIC_NOP7 GENERIC_NOP8); 
-asm("\t.data\nk8nops: " 
-    K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
-    K8_NOP7 K8_NOP8); 
-asm("\t.data\nk7nops: " 
-    K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
-    K7_NOP7 K7_NOP8); 
-    
-extern unsigned char intelnops[], k8nops[], k7nops[];
-static unsigned char *intel_nops[ASM_NOP_MAX+1] = { 
-     NULL,
-     intelnops,
-     intelnops + 1,
-     intelnops + 1 + 2,
-     intelnops + 1 + 2 + 3,
-     intelnops + 1 + 2 + 3 + 4,
-     intelnops + 1 + 2 + 3 + 4 + 5,
-     intelnops + 1 + 2 + 3 + 4 + 5 + 6,
-     intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-}; 
-static unsigned char *k8_nops[ASM_NOP_MAX+1] = { 
-     NULL,
-     k8nops,
-     k8nops + 1,
-     k8nops + 1 + 2,
-     k8nops + 1 + 2 + 3,
-     k8nops + 1 + 2 + 3 + 4,
-     k8nops + 1 + 2 + 3 + 4 + 5,
-     k8nops + 1 + 2 + 3 + 4 + 5 + 6,
-     k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-}; 
-static unsigned char *k7_nops[ASM_NOP_MAX+1] = { 
-     NULL,
-     k7nops,
-     k7nops + 1,
-     k7nops + 1 + 2,
-     k7nops + 1 + 2 + 3,
-     k7nops + 1 + 2 + 3 + 4,
-     k7nops + 1 + 2 + 3 + 4 + 5,
-     k7nops + 1 + 2 + 3 + 4 + 5 + 6,
-     k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-}; 
-static struct nop { 
-     int cpuid; 
-     unsigned char **noptable; 
-} noptypes[] = { 
-     { X86_FEATURE_K8, k8_nops }, 
-     { X86_FEATURE_K7, k7_nops }, 
-     { -1, NULL }
-}; 
-
-/* Replace instructions with better alternatives for this CPU type.
-
-   This runs before SMP is initialized to avoid SMP problems with
-   self modifying code. This implies that assymetric systems where
-   APs have less capabilities than the boot processor are not handled. 
-   Tough. Make sure you disable such features by hand. */ 
-void apply_alternatives(void *start, void *end) 
-{ 
-	struct alt_instr *a; 
-	int diff, i, k;
-        unsigned char **noptable = intel_nops; 
-	for (i = 0; noptypes[i].cpuid >= 0; i++) { 
-		if (boot_cpu_has(noptypes[i].cpuid)) { 
-			noptable = noptypes[i].noptable;
-			break;
-		}
-	} 
-	for (a = start; (void *)a < end; a++) { 
-		if (!boot_cpu_has(a->cpuid))
-			continue;
-		BUG_ON(a->replacementlen > a->instrlen); 
-		memcpy(a->instr, a->replacement, a->replacementlen); 
-		diff = a->instrlen - a->replacementlen; 
-		/* Pad the rest with nops */
-		for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
-			k = diff;
-			if (k > ASM_NOP_MAX)
-				k = ASM_NOP_MAX;
-			memcpy(a->instr + i, noptable[k], k); 
-		} 
-	}
-} 
-
-void __init alternative_instructions(void)
-{
-	extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-	apply_alternatives(__alt_instructions, __alt_instructions_end);
-}
-
 static char * __init machine_specific_memory_setup(void);
 
 #ifdef CONFIG_MCA
@@ -1636,7 +1586,7 @@ void __init setup_arch(char **cmdline_p)
 		panic_timeout = 1;
 
 	/* Register a call for panic conditions. */
-	notifier_chain_register(&panic_notifier_list, &xen_panic_block);
+	atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
 
 	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
 	HYPERVISOR_vm_assist(VMASST_CMD_enable,
@@ -1644,6 +1594,9 @@ void __init setup_arch(char **cmdline_p)
 
 	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
 	early_cpu_init();
+#ifdef CONFIG_SMP
+	prefill_possible_map();
+#endif
 
 	/*
 	 * FIXME: This isn't an official loader_type right
@@ -1731,6 +1684,16 @@ void __init setup_arch(char **cmdline_p)
 
 	parse_cmdline_early(cmdline_p);
 
+#ifdef CONFIG_EARLY_PRINTK
+	{
+		char *s = strstr(*cmdline_p, "earlyprintk=");
+		if (s) {
+			setup_early_printk(strchr(s, '=') + 1);
+			printk("early console enabled\n");
+		}
+	}
+#endif
+
 	max_low_pfn = setup_memory();
 
 	/*
@@ -1801,18 +1764,6 @@ void __init setup_arch(char **cmdline_p)
 	 * NOTE: at this point the bootmem allocator is fully available.
 	 */
 
-#ifdef CONFIG_EARLY_PRINTK
-	{
-		char *s = strstr(*cmdline_p, "earlyprintk=");
-		if (s) {
-			extern void setup_early_printk(char *);
-
-			setup_early_printk(strchr(s, '=') + 1);
-			printk("early console enabled\n");
-		}
-	}
-#endif
-
 	if (is_initial_xendomain())
 		dmi_scan_machine();
 
@@ -1825,10 +1776,6 @@ void __init setup_arch(char **cmdline_p)
 	set_iopl.iopl = 1;
 	HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
 
-#ifdef CONFIG_X86_IO_APIC
-	check_acpi_pci();	/* Checks more than just ACPI actually */
-#endif
-
 #ifdef CONFIG_ACPI
 	if (!is_initial_xendomain()) {
 		printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
@@ -1840,6 +1787,13 @@ void __init setup_arch(char **cmdline_p)
 	 * Parse the ACPI tables for possible boot-time SMP configuration.
 	 */
 	acpi_boot_table_init();
+#endif
+
+#ifdef CONFIG_X86_IO_APIC
+	check_acpi_pci();	/* Checks more than just ACPI actually */
+#endif
+
+#ifdef CONFIG_ACPI
 	acpi_boot_init();
 
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
@@ -1882,6 +1836,23 @@ xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
 	return NOTIFY_DONE;
 }
 
+static __init int add_pcspkr(void)
+{
+	struct platform_device *pd;
+	int ret;
+
+	pd = platform_device_alloc("pcspkr", -1);
+	if (!pd)
+		return -ENOMEM;
+
+	ret = platform_device_add(pd);
+	if (ret)
+		platform_device_put(pd);
+
+	return ret;
+}
+device_initcall(add_pcspkr);
+
 #include "setup_arch_post.h"
 /*
  * Local Variables:
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/smp-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/smp-xen.c
index c6aaa8b822..ea58808eac 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/smp-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/smp-xen.c
@@ -483,27 +483,23 @@ void unlock_ipi_call_lock(void)
 	spin_unlock_irq(&call_lock);
 }
 
-static struct call_data_struct * call_data;
-
-/*
- * this function sends a 'generic call function' IPI to all other CPUs
- * in the system.
- */
-
-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
-			int wait)
-/*
- * [SUMMARY] Run a function on all other CPUs.
- * <func> The function to run. This must be fast and non-blocking.
- * <info> An arbitrary pointer to pass to the function.
- * <nonatomic> currently unused.
- * <wait> If true, wait (atomically) until function has completed on other CPUs.
- * [RETURNS] 0 on success, else a negative status code. Does not return until
+static struct call_data_struct *call_data;
+
+/**
+ * smp_call_function(): Run a function on all other CPUs.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @nonatomic: currently unused.
+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code. Does not return until
  * remote CPUs are nearly ready to execute <<func>> or are or have executed.
  *
  * You must not call this function with disabled interrupts or from a
  * hardware interrupt handler or from a bottom half handler.
  */
+int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
+			int wait)
 {
 	struct call_data_struct data;
 	int cpus;
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/smpalts.c b/linux-2.6-xen-sparse/arch/i386/kernel/smpalts.c
deleted file mode 100644
index 5a32e547e4..0000000000
--- a/linux-2.6-xen-sparse/arch/i386/kernel/smpalts.c
+++ /dev/null
@@ -1,85 +0,0 @@
-#include <linux/kernel.h>
-#include <asm/system.h>
-#include <asm/smp_alt.h>
-#include <asm/processor.h>
-#include <asm/string.h>
-
-struct smp_replacement_record {
-	unsigned char targ_size;
-	unsigned char smp1_size;
-	unsigned char smp2_size;
-	unsigned char up_size;
-	unsigned char feature;
-	unsigned char data[0];
-};
-
-struct smp_alternative_record {
-	void *targ_start;
-	struct smp_replacement_record *repl;
-};
-
-extern struct smp_alternative_record __start_smp_alternatives_table,
-  __stop_smp_alternatives_table;
-extern unsigned long __init_begin, __init_end;
-
-void prepare_for_smp(void)
-{
-	struct smp_alternative_record *r;
-	printk(KERN_INFO "Enabling SMP...\n");
-	for (r = &__start_smp_alternatives_table;
-	     r != &__stop_smp_alternatives_table;
-	     r++) {
-		BUG_ON(r->repl->targ_size < r->repl->smp1_size);
-		BUG_ON(r->repl->targ_size < r->repl->smp2_size);
-		BUG_ON(r->repl->targ_size < r->repl->up_size);
-               if (system_state == SYSTEM_RUNNING &&
-                   r->targ_start >= (void *)&__init_begin &&
-                   r->targ_start < (void *)&__init_end)
-                       continue;
-		if (r->repl->feature != (unsigned char)-1 &&
-		    boot_cpu_has(r->repl->feature)) {
-			memcpy(r->targ_start,
-			       r->repl->data + r->repl->smp1_size,
-			       r->repl->smp2_size);
-			memset(r->targ_start + r->repl->smp2_size,
-			       0x90,
-			       r->repl->targ_size - r->repl->smp2_size);
-		} else {
-			memcpy(r->targ_start,
-			       r->repl->data,
-			       r->repl->smp1_size);
-			memset(r->targ_start + r->repl->smp1_size,
-			       0x90,
-			       r->repl->targ_size - r->repl->smp1_size);
-		}
-	}
-	/* Paranoia */
-	asm volatile ("jmp 1f\n1:");
-	mb();
-}
-
-void unprepare_for_smp(void)
-{
-	struct smp_alternative_record *r;
-	printk(KERN_INFO "Disabling SMP...\n");
-	for (r = &__start_smp_alternatives_table;
-	     r != &__stop_smp_alternatives_table;
-	     r++) {
-		BUG_ON(r->repl->targ_size < r->repl->smp1_size);
-		BUG_ON(r->repl->targ_size < r->repl->smp2_size);
-		BUG_ON(r->repl->targ_size < r->repl->up_size);
-               if (system_state == SYSTEM_RUNNING &&
-                   r->targ_start >= (void *)&__init_begin &&
-                   r->targ_start < (void *)&__init_end)
-                       continue;
-		memcpy(r->targ_start,
-		       r->repl->data + r->repl->smp1_size + r->repl->smp2_size,
-		       r->repl->up_size);
-		memset(r->targ_start + r->repl->up_size,
-		       0x90,
-		       r->repl->targ_size - r->repl->up_size);
-	}
-	/* Paranoia */
-	asm volatile ("jmp 1f\n1:");
-	mb();
-}
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/smpboot.c b/linux-2.6-xen-sparse/arch/i386/kernel/smpboot.c
index 1008255c62..825b2b4ca7 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/smpboot.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/smpboot.c
@@ -72,6 +72,9 @@ int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
 /* Core ID of each logical CPU */
 int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
 
+/* Last level cache ID of each logical CPU */
+int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
+
 /* representing HT siblings of each logical CPU */
 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(cpu_sibling_map);
@@ -310,7 +313,9 @@ static void __init synchronize_tsc_bp (void)
 			if (tsc_values[i] < avg)
 				realdelta = -realdelta;
 
-			printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
+			if (realdelta > 0)
+				printk(KERN_INFO "CPU#%d had %ld usecs TSC "
+					"skew, fixed it up.\n", i, realdelta);
 		}
 
 		sum += delta;
@@ -440,6 +445,18 @@ static void __devinit smp_callin(void)
 
 static int cpucount;
 
+/* maps the cpu to the sched domain representing multi-core */
+cpumask_t cpu_coregroup_map(int cpu)
+{
+	struct cpuinfo_x86 *c = cpu_data + cpu;
+	/*
+	 * For perf, we return last level cache shared map.
+	 * TBD: when power saving sched policy is added, we will return
+	 *      cpu_core_map when power saving policy is enabled
+	 */
+	return c->llc_shared_map;
+}
+
 /* representing cpus for which sibling maps can be computed */
 static cpumask_t cpu_sibling_setup_map;
 
@@ -459,12 +476,16 @@ set_cpu_sibling_map(int cpu)
 				cpu_set(cpu, cpu_sibling_map[i]);
 				cpu_set(i, cpu_core_map[cpu]);
 				cpu_set(cpu, cpu_core_map[i]);
+				cpu_set(i, c[cpu].llc_shared_map);
+				cpu_set(cpu, c[i].llc_shared_map);
 			}
 		}
 	} else {
 		cpu_set(cpu, cpu_sibling_map[cpu]);
 	}
 
+	cpu_set(cpu, c[cpu].llc_shared_map);
+
 	if (current_cpu_data.x86_max_cores == 1) {
 		cpu_core_map[cpu] = cpu_sibling_map[cpu];
 		c[cpu].booted_cores = 1;
@@ -472,6 +493,11 @@ set_cpu_sibling_map(int cpu)
 	}
 
 	for_each_cpu_mask(i, cpu_sibling_setup_map) {
+		if (cpu_llc_id[cpu] != BAD_APICID &&
+		    cpu_llc_id[cpu] == cpu_llc_id[i]) {
+			cpu_set(i, c[cpu].llc_shared_map);
+			cpu_set(cpu, c[i].llc_shared_map);
+		}
 		if (phys_proc_id[cpu] == phys_proc_id[i]) {
 			cpu_set(i, cpu_core_map[cpu]);
 			cpu_set(cpu, cpu_core_map[i]);
@@ -899,6 +925,7 @@ static int __devinit do_boot_cpu(int apicid, int cpu)
 	unsigned short nmi_high = 0, nmi_low = 0;
 
 	++cpucount;
+	alternatives_smp_switch(1);
 
 	/*
 	 * We can't use kernel_thread since we must avoid to
@@ -1002,7 +1029,6 @@ void cpu_exit_clear(void)
 
 	cpu_clear(cpu, cpu_callout_map);
 	cpu_clear(cpu, cpu_callin_map);
-	cpu_clear(cpu, cpu_present_map);
 
 	cpu_clear(cpu, smp_commenced_mask);
 	unmap_cpu_to_logical_apicid(cpu);
@@ -1014,31 +1040,20 @@ struct warm_boot_cpu_info {
 	int cpu;
 };
 
-static void __devinit do_warm_boot_cpu(void *p)
+static void __cpuinit do_warm_boot_cpu(void *p)
 {
 	struct warm_boot_cpu_info *info = p;
 	do_boot_cpu(info->apicid, info->cpu);
 	complete(info->complete);
 }
 
-int __devinit smp_prepare_cpu(int cpu)
+static int __cpuinit __smp_prepare_cpu(int cpu)
 {
 	DECLARE_COMPLETION(done);
 	struct warm_boot_cpu_info info;
 	struct work_struct task;
 	int	apicid, ret;
 
-	lock_cpu_hotplug();
-
-	/*
-	 * On x86, CPU0 is never offlined.  Trying to bring up an
-	 * already-booted CPU will hang.  So check for that case.
-	 */
-	if (cpu_online(cpu)) {
-		ret = -EINVAL;
-		goto exit;
-	}
-
 	apicid = x86_cpu_to_apicid[cpu];
 	if (apicid == BAD_APICID) {
 		ret = -ENODEV;
@@ -1063,7 +1078,6 @@ int __devinit smp_prepare_cpu(int cpu)
 	zap_low_mappings();
 	ret = 0;
 exit:
-	unlock_cpu_hotplug();
 	return ret;
 }
 #endif
@@ -1218,11 +1232,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
 		if (max_cpus <= cpucount+1)
 			continue;
 
-#ifdef CONFIG_SMP_ALTERNATIVES
-		if (kicked == 1)
-			prepare_for_smp();
-#endif
-
 		if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu))
 			printk("CPU #%d not responding - cannot use it.\n",
 								apicid);
@@ -1373,6 +1382,8 @@ void __cpu_die(unsigned int cpu)
 		/* They ack this in play_dead by setting CPU_DEAD */
 		if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
 			printk ("CPU %d is now offline\n", cpu);
+			if (1 == num_online_cpus())
+				alternatives_smp_switch(0);
 			return;
 		}
 		msleep(100);
@@ -1394,6 +1405,22 @@ void __cpu_die(unsigned int cpu)
 
 int __devinit __cpu_up(unsigned int cpu)
 {
+#ifdef CONFIG_HOTPLUG_CPU
+	int ret=0;
+
+	/*
+	 * We do warm boot only on cpus that had booted earlier
+	 * Otherwise cold boot is all handled from smp_boot_cpus().
+	 * cpu_callin_map is set during AP kickstart process. Its reset
+	 * when a cpu is taken offline from cpu_exit_clear().
+	 */
+	if (!cpu_isset(cpu, cpu_callin_map))
+		ret = __smp_prepare_cpu(cpu);
+
+	if (ret)
+		return -EIO;
+#endif
+
 	/* In case one didn't come up */
 	if (!cpu_isset(cpu, cpu_callin_map)) {
 		printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
@@ -1401,11 +1428,6 @@ int __devinit __cpu_up(unsigned int cpu)
 		return -EIO;
 	}
 
-#ifdef CONFIG_SMP_ALTERNATIVES
-	if (num_online_cpus() == 1)
-		prepare_for_smp();
-#endif
-
 	local_irq_enable();
 	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
 	/* Unleash the CPU! */
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/traps-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/traps-xen.c
index 1c4a42709d..be8bc18c5e 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/traps-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/traps-xen.c
@@ -98,32 +98,50 @@ asmlinkage void fixup_4gb_segment(void);
 asmlinkage void machine_check(void);
 
 static int kstack_depth_to_print = 24;
-struct notifier_block *i386die_chain;
-static DEFINE_SPINLOCK(die_notifier_lock);
+ATOMIC_NOTIFIER_HEAD(i386die_chain);
 
 int register_die_notifier(struct notifier_block *nb)
 {
-	int err = 0;
-	unsigned long flags;
-	spin_lock_irqsave(&die_notifier_lock, flags);
-	err = notifier_chain_register(&i386die_chain, nb);
-	spin_unlock_irqrestore(&die_notifier_lock, flags);
-	return err;
+	vmalloc_sync_all();
+	return atomic_notifier_chain_register(&i386die_chain, nb);
 }
 EXPORT_SYMBOL(register_die_notifier);
 
+int unregister_die_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&i386die_chain, nb);
+}
+EXPORT_SYMBOL(unregister_die_notifier);
+
 static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
 {
 	return	p > (void *)tinfo &&
 		p < (void *)tinfo + THREAD_SIZE - 3;
 }
 
-static void print_addr_and_symbol(unsigned long addr, char *log_lvl)
+/*
+ * Print CONFIG_STACK_BACKTRACE_COLS address/symbol entries per line.
+ */
+static inline int print_addr_and_symbol(unsigned long addr, char *log_lvl,
+					int printed)
 {
-	printk(log_lvl);
+	if (!printed)
+		printk(log_lvl);
+
+#if CONFIG_STACK_BACKTRACE_COLS == 1
 	printk(" [<%08lx>] ", addr);
+#else
+	printk(" <%08lx> ", addr);
+#endif
 	print_symbol("%s", addr);
-	printk("\n");
+
+	printed = (printed + 1) % CONFIG_STACK_BACKTRACE_COLS;
+	if (printed)
+		printk(" ");
+	else
+		printk("\n");
+
+	return printed;
 }
 
 static inline unsigned long print_context_stack(struct thread_info *tinfo,
@@ -131,20 +149,24 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
 				char *log_lvl)
 {
 	unsigned long addr;
+	int printed = 0; /* nr of entries already printed on current line */
 
 #ifdef	CONFIG_FRAME_POINTER
 	while (valid_stack_ptr(tinfo, (void *)ebp)) {
 		addr = *(unsigned long *)(ebp + 4);
-		print_addr_and_symbol(addr, log_lvl);
+		printed = print_addr_and_symbol(addr, log_lvl, printed);
 		ebp = *(unsigned long *)ebp;
 	}
 #else
 	while (valid_stack_ptr(tinfo, stack)) {
 		addr = *stack++;
 		if (__kernel_text_address(addr))
-			print_addr_and_symbol(addr, log_lvl);
+			printed = print_addr_and_symbol(addr, log_lvl, printed);
 	}
 #endif
+	if (printed)
+		printk("\n");
+
 	return ebp;
 }
 
@@ -172,8 +194,7 @@ static void show_trace_log_lvl(struct task_struct *task,
 		stack = (unsigned long*)context->previous_esp;
 		if (!stack)
 			break;
-		printk(log_lvl);
-		printk(" =======================\n");
+		printk("%s =======================\n", log_lvl);
 	}
 }
 
@@ -196,25 +217,20 @@ static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp,
 	}
 
 	stack = esp;
-	printk(log_lvl);
 	for(i = 0; i < kstack_depth_to_print; i++) {
 		if (kstack_end(stack))
 			break;
-		if (i && ((i % 8) == 0)) {
-			printk("\n");
-			printk(log_lvl);
-			printk("       ");
-		}
+		if (i && ((i % 8) == 0))
+			printk("\n%s       ", log_lvl);
 		printk("%08lx ", *stack++);
 	}
-	printk("\n");
-	printk(log_lvl);
-	printk("Call Trace:\n");
+	printk("\n%sCall Trace:\n", log_lvl);
 	show_trace_log_lvl(task, esp, log_lvl);
 }
 
 void show_stack(struct task_struct *task, unsigned long *esp)
 {
+	printk("       ");
 	show_stack_log_lvl(task, esp, "");
 }
 
@@ -239,7 +255,7 @@ void show_registers(struct pt_regs *regs)
 
 	esp = (unsigned long) (&regs->esp);
 	savesegment(ss, ss);
-	if (user_mode(regs)) {
+	if (user_mode_vm(regs)) {
 		in_kernel = 0;
 		esp = regs->esp;
 		ss = regs->xss & 0xffff;
@@ -339,6 +355,8 @@ void die(const char * str, struct pt_regs * regs, long err)
 	static int die_counter;
 	unsigned long flags;
 
+	oops_enter();
+
 	if (die.lock_owner != raw_smp_processor_id()) {
 		console_verbose();
 		spin_lock_irqsave(&die.lock, flags);
@@ -351,6 +369,9 @@ void die(const char * str, struct pt_regs * regs, long err)
 
 	if (++die.lock_owner_depth < 3) {
 		int nl = 0;
+		unsigned long esp;
+		unsigned short ss;
+
 		handle_BUG(regs);
 		printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
 #ifdef CONFIG_PREEMPT
@@ -371,8 +392,23 @@ void die(const char * str, struct pt_regs * regs, long err)
 #endif
 		if (nl)
 			printk("\n");
-	notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
-		show_registers(regs);
+		if (notify_die(DIE_OOPS, str, regs, err,
+					current->thread.trap_no, SIGSEGV) !=
+				NOTIFY_STOP) {
+			show_registers(regs);
+			/* Executive summary in case the oops scrolled away */
+			esp = (unsigned long) (&regs->esp);
+			savesegment(ss, ss);
+			if (user_mode(regs)) {
+				esp = regs->esp;
+				ss = regs->xss & 0xffff;
+			}
+			printk(KERN_EMERG "EIP: [<%08lx>] ", regs->eip);
+			print_symbol("%s", regs->eip);
+			printk(" SS:ESP %04x:%08lx\n", ss, esp);
+		}
+		else
+			regs = NULL;
   	} else
 		printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
 
@@ -380,6 +416,9 @@ void die(const char * str, struct pt_regs * regs, long err)
 	die.lock_owner = -1;
 	spin_unlock_irqrestore(&die.lock, flags);
 
+	if (!regs)
+		return;
+
 	if (kexec_should_crash(current))
 		crash_kexec(regs);
 
@@ -391,6 +430,7 @@ void die(const char * str, struct pt_regs * regs, long err)
 		ssleep(5);
 		panic("Fatal exception");
 	}
+	oops_exit();
 	do_exit(SIGSEGV);
 }
 
@@ -569,7 +609,7 @@ static DEFINE_SPINLOCK(nmi_print_lock);
 
 void die_nmi (struct pt_regs *regs, const char *msg)
 {
-	if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) ==
+	if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
 	    NOTIFY_STOP)
 		return;
 
@@ -591,7 +631,7 @@ void die_nmi (struct pt_regs *regs, const char *msg)
 	/* If we are in kernel we are probably nested up pretty bad
 	 * and might aswell get out now while we still can.
 	*/
-	if (!user_mode(regs)) {
+	if (!user_mode_vm(regs)) {
 		current->thread.trap_no = 2;
 		crash_kexec(regs);
 	}
@@ -608,7 +648,7 @@ static void default_do_nmi(struct pt_regs * regs)
 		reason = get_nmi_reason();
  
 	if (!(reason & 0xc0)) {
-		if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT)
+		if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
 							== NOTIFY_STOP)
 			return;
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -624,7 +664,7 @@ static void default_do_nmi(struct pt_regs * regs)
 		unknown_nmi_error(reason, regs);
 		return;
 	}
-	if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP)
+	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
 		return;
 	if (reason & 0x80)
 		mem_parity_error(reason, regs);
@@ -662,6 +702,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code)
 
 void set_nmi_callback(nmi_callback_t callback)
 {
+	vmalloc_sync_all();
 	rcu_assign_pointer(nmi_callback, callback);
 }
 EXPORT_SYMBOL_GPL(set_nmi_callback);
@@ -1089,6 +1130,6 @@ void smp_trap_init(trap_info_t *trap_ctxt)
 static int __init kstack_setup(char *s)
 {
 	kstack_depth_to_print = simple_strtoul(s, NULL, 0);
-	return 0;
+	return 1;
 }
 __setup("kstack=", kstack_setup);
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/traps.c b/linux-2.6-xen-sparse/arch/i386/kernel/traps.c
index 85d4645c1a..3078fd86da 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/traps.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/traps.c
@@ -92,32 +92,50 @@ asmlinkage void spurious_interrupt_bug(void);
 asmlinkage void machine_check(void);
 
 static int kstack_depth_to_print = 24;
-struct notifier_block *i386die_chain;
-static DEFINE_SPINLOCK(die_notifier_lock);
+ATOMIC_NOTIFIER_HEAD(i386die_chain);
 
 int register_die_notifier(struct notifier_block *nb)
 {
-	int err = 0;
-	unsigned long flags;
-	spin_lock_irqsave(&die_notifier_lock, flags);
-	err = notifier_chain_register(&i386die_chain, nb);
-	spin_unlock_irqrestore(&die_notifier_lock, flags);
-	return err;
+	vmalloc_sync_all();
+	return atomic_notifier_chain_register(&i386die_chain, nb);
 }
 EXPORT_SYMBOL(register_die_notifier);
 
+int unregister_die_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&i386die_chain, nb);
+}
+EXPORT_SYMBOL(unregister_die_notifier);
+
 static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
 {
 	return	p > (void *)tinfo &&
 		p < (void *)tinfo + THREAD_SIZE - 3;
 }
 
-static void print_addr_and_symbol(unsigned long addr, char *log_lvl)
+/*
+ * Print CONFIG_STACK_BACKTRACE_COLS address/symbol entries per line.
+ */
+static inline int print_addr_and_symbol(unsigned long addr, char *log_lvl,
+					int printed)
 {
-	printk(log_lvl);
+	if (!printed)
+		printk(log_lvl);
+
+#if CONFIG_STACK_BACKTRACE_COLS == 1
 	printk(" [<%08lx>] ", addr);
+#else
+	printk(" <%08lx> ", addr);
+#endif
 	print_symbol("%s", addr);
-	printk("\n");
+
+	printed = (printed + 1) % CONFIG_STACK_BACKTRACE_COLS;
+	if (printed)
+		printk(" ");
+	else
+		printk("\n");
+
+	return printed;
 }
 
 static inline unsigned long print_context_stack(struct thread_info *tinfo,
@@ -125,20 +143,24 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
 				char *log_lvl)
 {
 	unsigned long addr;
+	int printed = 0; /* nr of entries already printed on current line */
 
 #ifdef	CONFIG_FRAME_POINTER
 	while (valid_stack_ptr(tinfo, (void *)ebp)) {
 		addr = *(unsigned long *)(ebp + 4);
-		print_addr_and_symbol(addr, log_lvl);
+		printed = print_addr_and_symbol(addr, log_lvl, printed);
 		ebp = *(unsigned long *)ebp;
 	}
 #else
 	while (valid_stack_ptr(tinfo, stack)) {
 		addr = *stack++;
 		if (__kernel_text_address(addr))
-			print_addr_and_symbol(addr, log_lvl);
+			printed = print_addr_and_symbol(addr, log_lvl, printed);
 	}
 #endif
+	if (printed)
+		printk("\n");
+
 	return ebp;
 }
 
@@ -166,8 +188,7 @@ static void show_trace_log_lvl(struct task_struct *task,
 		stack = (unsigned long*)context->previous_esp;
 		if (!stack)
 			break;
-		printk(log_lvl);
-		printk(" =======================\n");
+		printk("%s =======================\n", log_lvl);
 	}
 }
 
@@ -190,25 +211,20 @@ static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp,
 	}
 
 	stack = esp;
-	printk(log_lvl);
 	for(i = 0; i < kstack_depth_to_print; i++) {
 		if (kstack_end(stack))
 			break;
-		if (i && ((i % 8) == 0)) {
-			printk("\n");
-			printk(log_lvl);
-			printk("       ");
-		}
+		if (i && ((i % 8) == 0))
+			printk("\n%s       ", log_lvl);
 		printk("%08lx ", *stack++);
 	}
-	printk("\n");
-	printk(log_lvl);
-	printk("Call Trace:\n");
+	printk("\n%sCall Trace:\n", log_lvl);
 	show_trace_log_lvl(task, esp, log_lvl);
 }
 
 void show_stack(struct task_struct *task, unsigned long *esp)
 {
+	printk("       ");
 	show_stack_log_lvl(task, esp, "");
 }
 
@@ -233,7 +249,7 @@ void show_registers(struct pt_regs *regs)
 
 	esp = (unsigned long) (&regs->esp);
 	savesegment(ss, ss);
-	if (user_mode(regs)) {
+	if (user_mode_vm(regs)) {
 		in_kernel = 0;
 		esp = regs->esp;
 		ss = regs->xss & 0xffff;
@@ -333,6 +349,8 @@ void die(const char * str, struct pt_regs * regs, long err)
 	static int die_counter;
 	unsigned long flags;
 
+	oops_enter();
+
 	if (die.lock_owner != raw_smp_processor_id()) {
 		console_verbose();
 		spin_lock_irqsave(&die.lock, flags);
@@ -345,6 +363,9 @@ void die(const char * str, struct pt_regs * regs, long err)
 
 	if (++die.lock_owner_depth < 3) {
 		int nl = 0;
+		unsigned long esp;
+		unsigned short ss;
+
 		handle_BUG(regs);
 		printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
 #ifdef CONFIG_PREEMPT
@@ -365,8 +386,23 @@ void die(const char * str, struct pt_regs * regs, long err)
 #endif
 		if (nl)
 			printk("\n");
-	notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
-		show_registers(regs);
+		if (notify_die(DIE_OOPS, str, regs, err,
+					current->thread.trap_no, SIGSEGV) !=
+				NOTIFY_STOP) {
+			show_registers(regs);
+			/* Executive summary in case the oops scrolled away */
+			esp = (unsigned long) (&regs->esp);
+			savesegment(ss, ss);
+			if (user_mode(regs)) {
+				esp = regs->esp;
+				ss = regs->xss & 0xffff;
+			}
+			printk(KERN_EMERG "EIP: [<%08lx>] ", regs->eip);
+			print_symbol("%s", regs->eip);
+			printk(" SS:ESP %04x:%08lx\n", ss, esp);
+		}
+		else
+			regs = NULL;
   	} else
 		printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
 
@@ -374,6 +410,9 @@ void die(const char * str, struct pt_regs * regs, long err)
 	die.lock_owner = -1;
 	spin_unlock_irqrestore(&die.lock, flags);
 
+	if (!regs)
+		return;
+
 	if (kexec_should_crash(current))
 		crash_kexec(regs);
 
@@ -385,6 +424,7 @@ void die(const char * str, struct pt_regs * regs, long err)
 		ssleep(5);
 		panic("Fatal exception");
 	}
+	oops_exit();
 	do_exit(SIGSEGV);
 }
 
@@ -594,7 +634,7 @@ static DEFINE_SPINLOCK(nmi_print_lock);
 
 void die_nmi (struct pt_regs *regs, const char *msg)
 {
-	if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) ==
+	if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
 	    NOTIFY_STOP)
 		return;
 
@@ -616,7 +656,7 @@ void die_nmi (struct pt_regs *regs, const char *msg)
 	/* If we are in kernel we are probably nested up pretty bad
 	 * and might aswell get out now while we still can.
 	*/
-	if (!user_mode(regs)) {
+	if (!user_mode_vm(regs)) {
 		current->thread.trap_no = 2;
 		crash_kexec(regs);
 	}
@@ -633,7 +673,7 @@ static void default_do_nmi(struct pt_regs * regs)
 		reason = get_nmi_reason();
  
 	if (!(reason & 0xc0)) {
-		if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT)
+		if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
 							== NOTIFY_STOP)
 			return;
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -649,7 +689,7 @@ static void default_do_nmi(struct pt_regs * regs)
 		unknown_nmi_error(reason, regs);
 		return;
 	}
-	if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP)
+	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
 		return;
 	if (reason & 0x80)
 		mem_parity_error(reason, regs);
@@ -687,6 +727,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code)
 
 void set_nmi_callback(nmi_callback_t callback)
 {
+	vmalloc_sync_all();
 	rcu_assign_pointer(nmi_callback, callback);
 }
 EXPORT_SYMBOL_GPL(set_nmi_callback);
@@ -1157,6 +1198,6 @@ void __init trap_init(void)
 static int __init kstack_setup(char *s)
 {
 	kstack_depth_to_print = simple_strtoul(s, NULL, 0);
-	return 0;
+	return 1;
 }
 __setup("kstack=", kstack_setup);
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c b/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c
index 0e3a0d46c8..c0adee5da5 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c
@@ -324,7 +324,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
 
 	/*call audit_syscall_exit since we do not exit via the normal paths */
 	if (unlikely(current->audit_context))
-		audit_syscall_exit(current, AUDITSC_RESULT(eax), eax);
+		audit_syscall_exit(AUDITSC_RESULT(eax), eax);
 
 	__asm__ __volatile__(
 		"movl %0,%%esp\n\t"
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/vmlinux.lds.S b/linux-2.6-xen-sparse/arch/i386/kernel/vmlinux.lds.S
index 83b386d6ce..1e3f8734e5 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/vmlinux.lds.S
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/vmlinux.lds.S
@@ -7,6 +7,7 @@
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/thread_info.h>
 #include <asm/page.h>
+#include <asm/cache.h>
 
 OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
 OUTPUT_ARCH(i386)
@@ -40,13 +41,6 @@ SECTIONS
   __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
   __stop___ex_table = .;
 
-  . = ALIGN(16);
-  __start_smp_alternatives_table = .;
-  __smp_alternatives : AT(ADDR(__smp_alternatives) - LOAD_OFFSET) { *(__smp_alternatives) }
-  __stop_smp_alternatives_table = .;
-
-  __smp_replacements : AT(ADDR(__smp_replacements) - LOAD_OFFSET) { *(__smp_replacements) }
-
   RODATA
 
   /* writeable */
@@ -81,6 +75,26 @@ SECTIONS
 	*(.data.init_task)
   }
 
+  /* might get freed after init */
+  . = ALIGN(4096);
+  __smp_alt_begin = .;
+  __smp_alt_instructions = .;
+  .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) {
+	*(.smp_altinstructions)
+  }
+  __smp_alt_instructions_end = .;
+  . = ALIGN(4);
+  __smp_locks = .;
+  .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
+	*(.smp_locks)
+  }
+  __smp_locks_end = .;
+  .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) {
+	*(.smp_altinstr_replacement)
+  }
+  . = ALIGN(4096);
+  __smp_alt_end = .;
+
   /* will be freed after init */
   . = ALIGN(4096);		/* Init code and data */
   __init_begin = .;
@@ -128,7 +142,7 @@ SECTIONS
   __initramfs_start = .;
   .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) }
   __initramfs_end = .;
-  . = ALIGN(32);
+  . = ALIGN(L1_CACHE_BYTES);
   __per_cpu_start = .;
   .data.percpu  : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) }
   __per_cpu_end = .;
diff --git a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
index 4939ab106e..7b620769f5 100644
--- a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
@@ -252,8 +252,9 @@ static void dump_fault_path(unsigned long address)
 
 	page = read_cr3();
 	page = ((unsigned long *) __va(page))[address >> 22];
-	printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
-	       machine_to_phys(page));
+	if (oops_may_print())
+		printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
+		       machine_to_phys(page));
 	/*
 	 * We must not directly access the pte in the highpte
 	 * case, the page table might be allocated in highmem.
@@ -261,7 +262,7 @@ static void dump_fault_path(unsigned long address)
 	 * it's allocated already.
 	 */
 #ifndef CONFIG_HIGHPTE
-	if (page & 1) {
+	if ((page & 1) && oops_may_print()) {
 		page &= PAGE_MASK;
 		address &= 0x003ff000;
 		page = machine_to_phys(page);
@@ -311,6 +312,76 @@ static int spurious_fault(struct pt_regs *regs,
 	return 1;
 }
 
+static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
+{
+	unsigned index = pgd_index(address);
+	pgd_t *pgd_k;
+	pud_t *pud, *pud_k;
+	pmd_t *pmd, *pmd_k;
+
+	pgd += index;
+	pgd_k = init_mm.pgd + index;
+
+	if (!pgd_present(*pgd_k))
+		return NULL;
+
+	/*
+	 * set_pgd(pgd, *pgd_k); here would be useless on PAE
+	 * and redundant with the set_pmd() on non-PAE. As would
+	 * set_pud.
+	 */
+
+	pud = pud_offset(pgd, address);
+	pud_k = pud_offset(pgd_k, address);
+	if (!pud_present(*pud_k))
+		return NULL;
+
+	pmd = pmd_offset(pud, address);
+	pmd_k = pmd_offset(pud_k, address);
+	if (!pmd_present(*pmd_k))
+		return NULL;
+	if (!pmd_present(*pmd))
+#ifndef CONFIG_XEN
+		set_pmd(pmd, *pmd_k);
+#else
+		/*
+		 * When running on Xen we must launder *pmd_k through
+		 * pmd_val() to ensure that _PAGE_PRESENT is correctly set.
+		 */
+		set_pmd(pmd, __pmd(pmd_val(*pmd_k)));
+#endif
+	else
+		BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
+	return pmd_k;
+}
+
+/*
+ * Handle a fault on the vmalloc or module mapping area
+ *
+ * This assumes no large pages in there.
+ */
+static inline int vmalloc_fault(unsigned long address)
+{
+	unsigned long pgd_paddr;
+	pmd_t *pmd_k;
+	pte_t *pte_k;
+	/*
+	 * Synchronize this task's top level page-table
+	 * with the 'reference' page table.
+	 *
+	 * Do _not_ use "current" here. We might be inside
+	 * an interrupt in the middle of a task switch..
+	 */
+	pgd_paddr = read_cr3();
+	pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
+	if (!pmd_k)
+		return -1;
+	pte_k = pte_offset_kernel(pmd_k, address);
+	if (!pte_present(*pte_k))
+		return -1;
+	return 0;
+}
+
 /*
  * This routine handles page faults.  It determines the address,
  * and the problem, and then passes it off to one of the appropriate
@@ -320,6 +391,8 @@ static int spurious_fault(struct pt_regs *regs,
  *	bit 0 == 0 means no page found, 1 means protection fault
  *	bit 1 == 0 means read, 1 means write
  *	bit 2 == 0 means kernel, 1 means user-mode
+ *	bit 3 == 1 means use of reserved bit detected
+ *	bit 4 == 1 means fault was an instruction fetch
  */
 fastcall void __kprobes do_page_fault(struct pt_regs *regs,
 				      unsigned long error_code)
@@ -339,13 +412,6 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
 	if (regs->eflags & X86_EFLAGS_VM)
 		error_code |= 4;
 
-	if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
-					SIGSEGV) == NOTIFY_STOP)
-		return;
-	/* It's safe to allow irq's after cr2 has been saved */
-	if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
-		local_irq_enable();
-
 	tsk = current;
 
 	si_code = SEGV_MAPERR;
@@ -361,25 +427,37 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
 	 *
 	 * This verifies that the fault happens in kernel space
 	 * (error_code & 4) == 0, and that the fault was not a
-	 * protection error (error_code & 1) == 0.
+	 * protection error (error_code & 9) == 0.
 	 */
-	if (unlikely(address >= TASK_SIZE)) { 
+	if (unlikely(address >= TASK_SIZE)) {
 #ifdef CONFIG_XEN
 		/* Faults in hypervisor area can never be patched up. */
 		if (address >= hypervisor_virt_start)
 			goto bad_area_nosemaphore;
 #endif
-		if (!(error_code & 5))
-			goto vmalloc_fault;
+		if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
+			return;
 		/* Can take a spurious fault if mapping changes R/O -> R/W. */
 		if (spurious_fault(regs, address, error_code))
 			return;
+		if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+						SIGSEGV) == NOTIFY_STOP)
+			return;
 		/* 
 		 * Don't take the mm semaphore here. If we fixup a prefetch
 		 * fault we could otherwise deadlock.
 		 */
 		goto bad_area_nosemaphore;
-	} 
+	}
+
+	if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+					SIGSEGV) == NOTIFY_STOP)
+		return;
+
+	/* It's safe to allow irq's after cr2 has been saved and the vmalloc
+	   fault has been handled. */
+	if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
+		local_irq_enable();
 
 	mm = tsk->mm;
 
@@ -550,21 +628,27 @@ no_context:
 
 	bust_spinlocks(1);
 
-#ifdef CONFIG_X86_PAE
-	if (error_code & 16) {
-		pte_t *pte = lookup_address(address);
+	if (oops_may_print()) {
+	#ifdef CONFIG_X86_PAE
+		if (error_code & 16) {
+			pte_t *pte = lookup_address(address);
 
-		if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
-			printk(KERN_CRIT "kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n", current->uid);
+			if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
+				printk(KERN_CRIT "kernel tried to execute "
+					"NX-protected page - exploit attempt? "
+					"(uid: %d)\n", current->uid);
+		}
+	#endif
+		if (address < PAGE_SIZE)
+			printk(KERN_ALERT "BUG: unable to handle kernel NULL "
+					"pointer dereference");
+		else
+			printk(KERN_ALERT "BUG: unable to handle kernel paging"
+					" request");
+		printk(" at virtual address %08lx\n",address);
+		printk(KERN_ALERT " printing eip:\n");
+		printk("%08lx\n", regs->eip);
 	}
-#endif
-	if (address < PAGE_SIZE)
-		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
-	else
-		printk(KERN_ALERT "Unable to handle kernel paging request");
-	printk(" at virtual address %08lx\n",address);
-	printk(KERN_ALERT " printing eip:\n");
-	printk("%08lx\n", regs->eip);
 	dump_fault_path(address);
 	tsk->thread.cr2 = address;
 	tsk->thread.trap_no = 14;
@@ -604,59 +688,41 @@ do_sigbus:
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_no = 14;
 	force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
-	return;
-
-vmalloc_fault:
-	{
-		/*
-		 * Synchronize this task's top level page-table
-		 * with the 'reference' page table.
-		 *
-		 * Do _not_ use "tsk" here. We might be inside
-		 * an interrupt in the middle of a task switch..
-		 */
-		int index = pgd_index(address);
-		unsigned long pgd_paddr;
-		pgd_t *pgd, *pgd_k;
-		pud_t *pud, *pud_k;
-		pmd_t *pmd, *pmd_k;
-		pte_t *pte_k;
-
-		pgd_paddr = read_cr3();
-		pgd = index + (pgd_t *)__va(pgd_paddr);
-		pgd_k = init_mm.pgd + index;
-
-		if (!pgd_present(*pgd_k))
-			goto no_context;
-
-		/*
-		 * set_pgd(pgd, *pgd_k); here would be useless on PAE
-		 * and redundant with the set_pmd() on non-PAE. As would
-		 * set_pud.
-		 */
+}
 
-		pud = pud_offset(pgd, address);
-		pud_k = pud_offset(pgd_k, address);
-		if (!pud_present(*pud_k))
-			goto no_context;
-		
-		pmd = pmd_offset(pud, address);
-		pmd_k = pmd_offset(pud_k, address);
-		if (!pmd_present(*pmd_k))
-			goto no_context;
-#ifndef CONFIG_XEN
-		set_pmd(pmd, *pmd_k);
-#else
-		/*
-		 * When running on Xen we must launder *pmd_k through
-		 * pmd_val() to ensure that _PAGE_PRESENT is correctly set.
-		 */
-		set_pmd(pmd, __pmd(pmd_val(*pmd_k)));
-#endif
+#ifndef CONFIG_X86_PAE
+void vmalloc_sync_all(void)
+{
+	/*
+	 * Note that races in the updates of insync and start aren't
+	 * problematic: insync can only get set bits added, and updates to
+	 * start are only improving performance (without affecting correctness
+	 * if undone).
+	 */
+	static DECLARE_BITMAP(insync, PTRS_PER_PGD);
+	static unsigned long start = TASK_SIZE;
+	unsigned long address;
 
-		pte_k = pte_offset_kernel(pmd_k, address);
-		if (!pte_present(*pte_k))
-			goto no_context;
-		return;
+	BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
+	for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
+		if (!test_bit(pgd_index(address), insync)) {
+			unsigned long flags;
+			struct page *page;
+
+			spin_lock_irqsave(&pgd_lock, flags);
+			for (page = pgd_list; page; page =
+					(struct page *)page->index)
+				if (!vmalloc_sync_one(page_address(page),
+								address)) {
+					BUG_ON(page != pgd_list);
+					break;
+				}
+			spin_unlock_irqrestore(&pgd_lock, flags);
+			if (!page)
+				set_bit(pgd_index(address), insync);
+		}
+		if (address == start && test_bit(pgd_index(address), insync))
+			start = address + PGDIR_SIZE;
 	}
 }
+#endif
diff --git a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
index 4d2b33068f..ac526672c1 100644
--- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
@@ -313,7 +313,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
 
 static void __meminit free_new_highpage(struct page *page, int pfn)
 {
-	set_page_count(page, 1);
+	init_page_count(page);
 	if (pfn < xen_start_info->nr_pages)
 		__free_page(page);
 	totalhigh_pages++;
@@ -664,7 +664,7 @@ void __init mem_init(void)
 	/* XEN: init and count low-mem pages outside initial allocation. */
 	for (pfn = xen_start_info->nr_pages; pfn < max_low_pfn; pfn++) {
 		ClearPageReserved(pfn_to_page(pfn));
-		set_page_count(pfn_to_page(pfn), 1);
+		init_page_count(pfn_to_page(pfn));
 		totalram_pages++;
 	}
 
@@ -721,6 +721,7 @@ void __init mem_init(void)
  * Specifically, in the case of x86, we will always add
  * memory to the highmem for now.
  */
+#ifdef CONFIG_MEMORY_HOTPLUG
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 int add_memory(u64 start, u64 size)
 {
@@ -737,6 +738,7 @@ int remove_memory(u64 start, u64 size)
 	return -EINVAL;
 }
 #endif
+#endif
 
 kmem_cache_t *pgd_cache;
 kmem_cache_t *pmd_cache;
@@ -795,21 +797,6 @@ static int noinline do_test_wp_bit(void)
 	return flag;
 }
 
-void free_initmem(void)
-{
-	unsigned long addr;
-
-	addr = (unsigned long)(&__init_begin);
-	for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
-		ClearPageReserved(virt_to_page(addr));
-		set_page_count(virt_to_page(addr), 1);
-		memset((void *)addr, 0xcc, PAGE_SIZE);
-		free_page(addr);
-		totalram_pages++;
-	}
-	printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (__init_end - __init_begin) >> 10);
-}
-
 #ifdef CONFIG_DEBUG_RODATA
 
 extern char __start_rodata, __end_rodata;
@@ -833,17 +820,31 @@ void mark_rodata_ro(void)
 }
 #endif
 
+void free_init_pages(char *what, unsigned long begin, unsigned long end)
+{
+	unsigned long addr;
+
+	for (addr = begin; addr < end; addr += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(addr));
+		init_page_count(virt_to_page(addr));
+		memset((void *)addr, 0xcc, PAGE_SIZE);
+		free_page(addr);
+		totalram_pages++;
+	}
+	printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
+}
+
+void free_initmem(void)
+{
+	free_init_pages("unused kernel memory",
+			(unsigned long)(&__init_begin),
+			(unsigned long)(&__init_end));
+}
 
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	if (start < end)
-		printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
-	for (; start < end; start += PAGE_SIZE) {
-		ClearPageReserved(virt_to_page(start));
-		set_page_count(virt_to_page(start), 1);
-		free_page(start);
-		totalram_pages++;
-	}
+	free_init_pages("initrd memory", start, end);
 }
 #endif
+
diff --git a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c
index 0ff01f52f8..9ffe43800e 100644
--- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c
@@ -45,7 +45,7 @@ void show_mem(void)
 	printk(KERN_INFO "Mem-info:\n");
 	show_free_areas();
 	printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		pgdat_resize_lock(pgdat, &flags);
 		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
 			page = pgdat_page_nr(pgdat, i);
@@ -242,7 +242,7 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
 	if (pte) {
 		SetPageForeign(pte, pte_free);
-		set_page_count(pte, 1);
+		init_page_count(pte);
 	}
 #endif
 	return pte;
@@ -257,7 +257,7 @@ void pte_free(struct page *pte)
 			va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0));
 
 	ClearPageForeign(pte);
-	set_page_count(pte, 1);
+	init_page_count(pte);
 
 	__free_page(pte);
 }
diff --git a/linux-2.6-xen-sparse/arch/i386/mm/pgtable.c b/linux-2.6-xen-sparse/arch/i386/mm/pgtable.c
index 8b2e540349..de8d187749 100644
--- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable.c
+++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable.c
@@ -37,7 +37,7 @@ void show_mem(void)
 	printk(KERN_INFO "Mem-info:\n");
 	show_free_areas();
 	printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		pgdat_resize_lock(pgdat, &flags);
 		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
 			page = pgdat_page_nr(pgdat, i);
diff --git a/linux-2.6-xen-sparse/arch/i386/pci/Makefile b/linux-2.6-xen-sparse/arch/i386/pci/Makefile
index 6d72b4efe7..4447f397f9 100644
--- a/linux-2.6-xen-sparse/arch/i386/pci/Makefile
+++ b/linux-2.6-xen-sparse/arch/i386/pci/Makefile
@@ -1,4 +1,4 @@
-obj-y				:= i386.o
+obj-y				:= i386.o init.o
 
 obj-$(CONFIG_PCI_BIOS)		+= pcbios.o
 obj-$(CONFIG_PCI_MMCONFIG)	+= mmconfig.o direct.o
diff --git a/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c b/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c
index ee2f87191f..0ea3514c04 100644
--- a/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c
@@ -260,13 +260,13 @@ static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, i
  */
 static int pirq_via586_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
 {
-	static unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
+	static unsigned int pirqmap[4] = { 3, 2, 5, 1 };
 	return read_config_nybble(router, 0x55, pirqmap[pirq-1]);
 }
 
 static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
 {
-	static unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
+	static unsigned int pirqmap[4] = { 3, 2, 5, 1 };
 	write_config_nybble(router, 0x55, pirqmap[pirq-1], irq);
 	return 1;
 }
@@ -592,7 +592,9 @@ static __init int via_router_probe(struct irq_router *r,
 	case PCI_DEVICE_ID_VIA_82C596:
 	case PCI_DEVICE_ID_VIA_82C686:
 	case PCI_DEVICE_ID_VIA_8231:
+	case PCI_DEVICE_ID_VIA_8233A:
 	case PCI_DEVICE_ID_VIA_8235:
+	case PCI_DEVICE_ID_VIA_8237:
 		/* FIXME: add new ones for 8233/5 */
 		r->name = "VIA";
 		r->get = pirq_via_get;
diff --git a/linux-2.6-xen-sparse/arch/ia64/Kconfig b/linux-2.6-xen-sparse/arch/ia64/Kconfig
index b354f44498..c02c6e8008 100644
--- a/linux-2.6-xen-sparse/arch/ia64/Kconfig
+++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig
@@ -34,6 +34,10 @@ config RWSEM_XCHGADD_ALGORITHM
 	bool
 	default y
 
+config GENERIC_FIND_NEXT_BIT
+	bool
+	default y
+
 config GENERIC_CALIBRATE_DELAY
 	bool
 	default y
@@ -42,6 +46,10 @@ config TIME_INTERPOLATION
 	bool
 	default y
 
+config DMI
+	bool
+	default y
+
 config EFI
 	bool
 	default y
@@ -290,9 +298,6 @@ config HOTPLUG_CPU
 	  can be controlled through /sys/devices/system/cpu/cpu#.
 	  Say N if you want to disable CPU hotplug.
 
-config ARCH_ENABLE_MEMORY_HOTPLUG
-	def_bool y
-
 config SCHED_SMT
 	bool "SMT scheduler support"
 	depends on SMP
@@ -302,6 +307,25 @@ config SCHED_SMT
 	  Intel IA64 chips with MultiThreading at a cost of slightly increased
 	  overhead in some places. If unsure say N here.
 
+config PERMIT_BSP_REMOVE
+	bool "Support removal of Bootstrap Processor"
+	depends on HOTPLUG_CPU
+	default n
+	---help---
+	Say Y here if your platform SAL will support removal of BSP with HOTPLUG_CPU
+	support. 
+
+config FORCE_CPEI_RETARGET
+	bool "Force assumption that CPEI can be re-targetted"
+	depends on PERMIT_BSP_REMOVE
+	default n
+	---help---
+	Say Y if you need to force the assumption that CPEI can be re-targetted to
+	any cpu in the system. This hint is available via ACPI 3.0 specifications.
+	Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP.
+	This option it useful to enable this feature on older BIOS's as well.
+	You can also enable this by using boot command line option force_cpei=1.
+
 config PREEMPT
 	bool "Preemptible Kernel"
         help
@@ -347,6 +371,16 @@ config NUMA
 	  Access).  This option is for configuring high-end multiprocessor
 	  server systems.  If in doubt, say N.
 
+config NODES_SHIFT
+	int "Max num nodes shift(3-10)"
+	range 3 10
+	default "8"
+	depends on NEED_MULTIPLE_NODES
+	help
+	  This option specifies the maximum number of nodes in your SSI system.
+	  MAX_NUMNODES will be 2^(This value).
+	  If in doubt, use the default.
+
 # VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent.
 # VIRTUAL_MEM_MAP has been retained for historical reasons.
 config VIRTUAL_MEM_MAP
@@ -407,6 +441,8 @@ config IA64_PALINFO
 config SGI_SN
 	def_bool y if (IA64_SGI_SN2 || IA64_GENERIC)
 
+source "drivers/sn/Kconfig"
+
 source "drivers/firmware/Kconfig"
 
 source "fs/Kconfig.binfmt"
diff --git a/linux-2.6-xen-sparse/arch/ia64/Makefile b/linux-2.6-xen-sparse/arch/ia64/Makefile
index 0fadfb3cc0..c68f030772 100644
--- a/linux-2.6-xen-sparse/arch/ia64/Makefile
+++ b/linux-2.6-xen-sparse/arch/ia64/Makefile
@@ -1,6 +1,9 @@
 #
 # ia64/Makefile
 #
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
 # This file is subject to the terms and conditions of the GNU General Public
 # License.  See the file "COPYING" in the main directory of this archive
 # for more details.
@@ -74,7 +77,7 @@ drivers-$(CONFIG_OPROFILE)	+= arch/ia64/oprofile/
 
 boot := arch/ia64/hp/sim/boot
 
-.PHONY: boot compressed check
+PHONY += boot compressed check
 
 all: compressed unwcheck
 
diff --git a/linux-2.6-xen-sparse/arch/ia64/dig/setup.c b/linux-2.6-xen-sparse/arch/ia64/dig/setup.c
index 7f3826991a..d051a6a22c 100644
--- a/linux-2.6-xen-sparse/arch/ia64/dig/setup.c
+++ b/linux-2.6-xen-sparse/arch/ia64/dig/setup.c
@@ -86,8 +86,3 @@ dig_setup (char **cmdline_p)
 	xen_start_info->console.domU.evtchn = 0;
 #endif
 }
-
-void __init
-dig_irq_init (void)
-{
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/entry.S b/linux-2.6-xen-sparse/arch/ia64/kernel/entry.S
index 67f37b2c53..f069e784b9 100644
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/entry.S
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/entry.S
@@ -1108,9 +1108,6 @@ skip_rbs_switch:
 	st8 [r2]=r8
 	st8 [r3]=r10
 .work_pending:
-	tbit.nz p6,p0=r31,TIF_SIGDELAYED		// signal delayed from  MCA/INIT/NMI/PMI context?
-(p6)	br.cond.sptk.few .sigdelayed
-	;;
 	tbit.z p6,p0=r31,TIF_NEED_RESCHED		// current_thread_info()->need_resched==0?
 (p6)	br.cond.sptk.few .notify
 #ifdef CONFIG_PREEMPT
@@ -1137,17 +1134,6 @@ skip_rbs_switch:
 (pLvSys)br.cond.sptk.few  .work_pending_syscall_end
 	br.cond.sptk.many .work_processed_kernel	// don't re-check
 
-// There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where
-// it could not be delivered.  Deliver it now.  The signal might be for us and
-// may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed
-// signal.
-
-.sigdelayed:
-	br.call.sptk.many rp=do_sigdelayed
-	cmp.eq p6,p0=r0,r0				// p6 <- 1, always re-check
-(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
-	br.cond.sptk.many .work_processed_kernel	// re-check
-
 .work_pending_syscall_end:
 	adds r2=PT(R8)+16,r12
 	adds r3=PT(R10)+16,r12
@@ -1625,5 +1611,11 @@ sys_call_table:
 	data8 sys_ni_syscall			// reserved for pselect
 	data8 sys_ni_syscall			// 1295 reserved for ppoll
 	data8 sys_unshare
+	data8 sys_splice
+	data8 sys_set_robust_list
+	data8 sys_get_robust_list
+	data8 sys_sync_file_range		// 1300
+	data8 sys_tee
+	data8 sys_vmsplice
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S b/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S
index 45377beaa4..0c2e0a358a 100644
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S
@@ -82,6 +82,7 @@ SECTIONS
 	*(.dynbss)
 	*(.bss .bss.* .gnu.linkonce.b.*)
 	*(__ex_table)
+	*(__mca_table)
   }
 }
 
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/iosapic.c b/linux-2.6-xen-sparse/arch/ia64/kernel/iosapic.c
index 5cbfa26004..52077814bd 100644
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/iosapic.c
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/iosapic.c
@@ -9,54 +9,65 @@
  * Copyright (C) 1999 VA Linux Systems
  * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
  *
- * 00/04/19	D. Mosberger	Rewritten to mirror more closely the x86 I/O APIC code.
- *				In particular, we now have separate handlers for edge
- *				and level triggered interrupts.
- * 00/10/27	Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector allocation
- *				PCI to vector mapping, shared PCI interrupts.
- * 00/10/27	D. Mosberger	Document things a bit more to make them more understandable.
- *				Clean up much of the old IOSAPIC cruft.
- * 01/07/27	J.I. Lee	PCI irq routing, Platform/Legacy interrupts and fixes for
- *				ACPI S5(SoftOff) support.
+ * 00/04/19	D. Mosberger	Rewritten to mirror more closely the x86 I/O
+ *				APIC code.  In particular, we now have separate
+ *				handlers for edge and level triggered
+ *				interrupts.
+ * 00/10/27	Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector
+ *				allocation PCI to vector mapping, shared PCI
+ *				interrupts.
+ * 00/10/27	D. Mosberger	Document things a bit more to make them more
+ *				understandable.  Clean up much of the old
+ *				IOSAPIC cruft.
+ * 01/07/27	J.I. Lee	PCI irq routing, Platform/Legacy interrupts
+ *				and fixes for ACPI S5(SoftOff) support.
  * 02/01/23	J.I. Lee	iosapic pgm fixes for PCI irq routing from _PRT
- * 02/01/07     E. Focht        <efocht@ess.nec.de> Redirectable interrupt vectors in
- *                              iosapic_set_affinity(), initializations for
- *                              /proc/irq/#/smp_affinity
+ * 02/01/07     E. Focht        <efocht@ess.nec.de> Redirectable interrupt
+ *				vectors in iosapic_set_affinity(),
+ *				initializations for /proc/irq/#/smp_affinity
  * 02/04/02	P. Diefenbaugh	Cleaned up ACPI PCI IRQ routing.
  * 02/04/18	J.I. Lee	bug fix in iosapic_init_pci_irq
- * 02/04/30	J.I. Lee	bug fix in find_iosapic to fix ACPI PCI IRQ to IOSAPIC mapping
- *				error
+ * 02/04/30	J.I. Lee	bug fix in find_iosapic to fix ACPI PCI IRQ to
+ *				IOSAPIC mapping error
  * 02/07/29	T. Kochi	Allocate interrupt vectors dynamically
- * 02/08/04	T. Kochi	Cleaned up terminology (irq, global system interrupt, vector, etc.)
- * 02/09/20	D. Mosberger	Simplified by taking advantage of ACPI's pci_irq code.
+ * 02/08/04	T. Kochi	Cleaned up terminology (irq, global system
+ *				interrupt, vector, etc.)
+ * 02/09/20	D. Mosberger	Simplified by taking advantage of ACPI's
+ *				pci_irq code.
  * 03/02/19	B. Helgaas	Make pcat_compat system-wide, not per-IOSAPIC.
- *				Remove iosapic_address & gsi_base from external interfaces.
- *				Rationalize __init/__devinit attributes.
+ *				Remove iosapic_address & gsi_base from
+ *				external interfaces.  Rationalize
+ *				__init/__devinit attributes.
  * 04/12/04 Ashok Raj	<ashok.raj@intel.com> Intel Corporation 2004
- *				Updated to work with irq migration necessary for CPU Hotplug
+ *				Updated to work with irq migration necessary
+ *				for CPU Hotplug
  */
 /*
- * Here is what the interrupt logic between a PCI device and the kernel looks like:
+ * Here is what the interrupt logic between a PCI device and the kernel looks
+ * like:
  *
- * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, INTD).  The
- *     device is uniquely identified by its bus--, and slot-number (the function
- *     number does not matter here because all functions share the same interrupt
- *     lines).
+ * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC,
+ *     INTD).  The device is uniquely identified by its bus-, and slot-number
+ *     (the function number does not matter here because all functions share
+ *     the same interrupt lines).
  *
- * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC controller.
- *     Multiple interrupt lines may have to share the same IOSAPIC pin (if they're level
- *     triggered and use the same polarity).  Each interrupt line has a unique Global
- *     System Interrupt (GSI) number which can be calculated as the sum of the controller's
- *     base GSI number and the IOSAPIC pin number to which the line connects.
+ * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC
+ *     controller.  Multiple interrupt lines may have to share the same
+ *     IOSAPIC pin (if they're level triggered and use the same polarity).
+ *     Each interrupt line has a unique Global System Interrupt (GSI) number
+ *     which can be calculated as the sum of the controller's base GSI number
+ *     and the IOSAPIC pin number to which the line connects.
  *
- * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the IOSAPIC pin
- *     into the IA-64 interrupt vector.  This interrupt vector is then sent to the CPU.
+ * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the
+ * IOSAPIC pin into the IA-64 interrupt vector.  This interrupt vector is then
+ * sent to the CPU.
  *
- * (4) The kernel recognizes an interrupt as an IRQ.  The IRQ interface is used as
- *     architecture-independent interrupt handling mechanism in Linux.  As an
- *     IRQ is a number, we have to have IA-64 interrupt vector number <-> IRQ number
- *     mapping.  On smaller systems, we use one-to-one mapping between IA-64 vector and
- *     IRQ.  A platform can implement platform_irq_to_vector(irq) and
+ * (4) The kernel recognizes an interrupt as an IRQ.  The IRQ interface is
+ *     used as architecture-independent interrupt handling mechanism in Linux.
+ *     As an IRQ is a number, we have to have
+ *     IA-64 interrupt vector number <-> IRQ number mapping.  On smaller
+ *     systems, we use one-to-one mapping between IA-64 vector and IRQ.  A
+ *     platform can implement platform_irq_to_vector(irq) and
  *     platform_local_vector_to_irq(vector) APIs to differentiate the mapping.
  *     Please see also include/asm-ia64/hw_irq.h for those APIs.
  *
@@ -64,9 +75,9 @@
  *
  *	PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ
  *
- * Note: The term "IRQ" is loosely used everywhere in Linux kernel to describe interrupts.
- * Now we use "IRQ" only for Linux IRQ's.  ISA IRQ (isa_irq) is the only exception in this
- * source code.
+ * Note: The term "IRQ" is loosely used everywhere in Linux kernel to
+ * describeinterrupts.  Now we use "IRQ" only for Linux IRQ's.  ISA IRQ
+ * (isa_irq) is the only exception in this source code.
  */
 #include <linux/config.h>
 
@@ -90,7 +101,6 @@
 #include <asm/ptrace.h>
 #include <asm/system.h>
 
-
 #undef DEBUG_INTERRUPT_ROUTING
 
 #ifdef DEBUG_INTERRUPT_ROUTING
@@ -99,36 +109,46 @@
 #define DBG(fmt...)
 #endif
 
-#define NR_PREALLOCATE_RTE_ENTRIES	(PAGE_SIZE / sizeof(struct iosapic_rte_info))
+#define NR_PREALLOCATE_RTE_ENTRIES \
+	(PAGE_SIZE / sizeof(struct iosapic_rte_info))
 #define RTE_PREALLOCATED	(1)
 
 static DEFINE_SPINLOCK(iosapic_lock);
 
-/* These tables map IA-64 vectors to the IOSAPIC pin that generates this vector. */
+/*
+ * These tables map IA-64 vectors to the IOSAPIC pin that generates this
+ * vector.
+ */
 
 struct iosapic_rte_info {
-	struct list_head rte_list;	/* node in list of RTEs sharing the same vector */
+	struct list_head rte_list;	/* node in list of RTEs sharing the
+					 * same vector */
 	char __iomem	*addr;		/* base address of IOSAPIC */
-	unsigned int	gsi_base;	/* first GSI assigned to this IOSAPIC */
+	unsigned int	gsi_base;	/* first GSI assigned to this
+					 * IOSAPIC */
 	char		rte_index;	/* IOSAPIC RTE index */
 	int		refcnt;		/* reference counter */
 	unsigned int	flags;		/* flags */
 } ____cacheline_aligned;
 
 static struct iosapic_intr_info {
-	struct list_head rtes;		/* RTEs using this vector (empty => not an IOSAPIC interrupt) */
+	struct list_head rtes;		/* RTEs using this vector (empty =>
+					 * not an IOSAPIC interrupt) */
 	int		count;		/* # of RTEs that shares this vector */
-	u32		low32;		/* current value of low word of Redirection table entry */
+	u32		low32;		/* current value of low word of
+					 * Redirection table entry */
 	unsigned int	dest;		/* destination CPU physical ID */
 	unsigned char	dmode	: 3;	/* delivery mode (see iosapic.h) */
-	unsigned char 	polarity: 1;	/* interrupt polarity (see iosapic.h) */
+	unsigned char 	polarity: 1;	/* interrupt polarity
+					 * (see iosapic.h) */
 	unsigned char	trigger	: 1;	/* trigger mode (see iosapic.h) */
 } iosapic_intr_info[IA64_NUM_VECTORS];
 
 static struct iosapic {
 	char __iomem	*addr;		/* base address of IOSAPIC */
-	unsigned int 	gsi_base;	/* first GSI assigned to this IOSAPIC */
-	unsigned short 	num_rte;	/* number of RTE in this IOSAPIC */
+	unsigned int 	gsi_base;	/* first GSI assigned to this
+					 * IOSAPIC */
+	unsigned short 	num_rte;	/* # of RTEs on this IOSAPIC */
 	int		rtes_inuse;	/* # of RTEs in use on this IOSAPIC */
 #ifdef CONFIG_NUMA
 	unsigned short	node;		/* numa node association via pxm */
@@ -218,7 +238,8 @@ find_iosapic (unsigned int gsi)
 	int i;
 
 	for (i = 0; i < NR_IOSAPICS; i++) {
-		if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < iosapic_lists[i].num_rte)
+		if ((unsigned) (gsi - iosapic_lists[i].gsi_base) <
+		    iosapic_lists[i].num_rte)
 			return i;
 	}
 
@@ -231,7 +252,8 @@ _gsi_to_vector (unsigned int gsi)
 	struct iosapic_intr_info *info;
 	struct iosapic_rte_info *rte;
 
-	for (info = iosapic_intr_info; info < iosapic_intr_info + IA64_NUM_VECTORS; ++info)
+	for (info = iosapic_intr_info; info <
+		     iosapic_intr_info + IA64_NUM_VECTORS; ++info)
 		list_for_each_entry(rte, &info->rtes, rte_list)
 			if (rte->gsi_base + rte->rte_index == gsi)
 				return info - iosapic_intr_info;
@@ -254,8 +276,8 @@ gsi_to_irq (unsigned int gsi)
 	unsigned long flags;
 	int irq;
 	/*
-	 * XXX fix me: this assumes an identity mapping vetween IA-64 vector and Linux irq
-	 * numbers...
+	 * XXX fix me: this assumes an identity mapping between IA-64 vector
+	 * and Linux irq numbers...
 	 */
 	spin_lock_irqsave(&iosapic_lock, flags);
 	{
@@ -266,7 +288,8 @@ gsi_to_irq (unsigned int gsi)
 	return irq;
 }
 
-static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi, unsigned int vec)
+static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi,
+						  unsigned int vec)
 {
 	struct iosapic_rte_info *rte;
 
@@ -306,7 +329,9 @@ set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask)
 
 		for (irq = 0; irq < NR_IRQS; ++irq)
 			if (irq_to_vector(irq) == vector) {
-				set_irq_affinity_info(irq, (int)(dest & 0xffff), redir);
+				set_irq_affinity_info(irq,
+						      (int)(dest & 0xffff),
+						      redir);
 				break;
 			}
 	}
@@ -328,7 +353,7 @@ set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask)
 }
 
 static void
-nop (unsigned int vector)
+nop (unsigned int irq)
 {
 	/* do nothing... */
 }
@@ -350,7 +375,8 @@ mask_irq (unsigned int irq)
 	{
 		/* set only the mask bit */
 		low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK;
-		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
+		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
+				    rte_list) {
 			addr = rte->addr;
 			rte_index = rte->rte_index;
 			iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
@@ -375,7 +401,8 @@ unmask_irq (unsigned int irq)
 	spin_lock_irqsave(&iosapic_lock, flags);
 	{
 		low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK;
-		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
+		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
+				    rte_list) {
 			addr = rte->addr;
 			rte_index = rte->rte_index;
 			iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
@@ -415,21 +442,25 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask)
 
 	spin_lock_irqsave(&iosapic_lock, flags);
 	{
-		low32 = iosapic_intr_info[vec].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT);
+		low32 = iosapic_intr_info[vec].low32 &
+			~(7 << IOSAPIC_DELIVERY_SHIFT);
 
 		if (redir)
 		        /* change delivery mode to lowest priority */
-			low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
+			low32 |= (IOSAPIC_LOWEST_PRIORITY <<
+				  IOSAPIC_DELIVERY_SHIFT);
 		else
 		        /* change delivery mode to fixed */
 			low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
 
 		iosapic_intr_info[vec].low32 = low32;
 		iosapic_intr_info[vec].dest = dest;
-		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
+		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
+				    rte_list) {
 			addr = rte->addr;
 			rte_index = rte->rte_index;
-			iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
+			iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index),
+				      high32);
 			iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
 		}
 	}
@@ -454,7 +485,7 @@ iosapic_end_level_irq (unsigned int irq)
 	ia64_vector vec = irq_to_vector(irq);
 	struct iosapic_rte_info *rte;
 
-	move_irq(irq);
+	move_native_irq(irq);
 	list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
 		iosapic_eoi(rte->addr, vec);
 }
@@ -496,13 +527,14 @@ iosapic_ack_edge_irq (unsigned int irq)
 {
 	irq_desc_t *idesc = irq_descp(irq);
 
-	move_irq(irq);
+	move_native_irq(irq);
 	/*
 	 * Once we have recorded IRQ_PENDING already, we can mask the
 	 * interrupt for real. This prevents IRQ storms from unhandled
 	 * devices.
 	 */
-	if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) == (IRQ_PENDING|IRQ_DISABLED))
+	if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) ==
+	    (IRQ_PENDING|IRQ_DISABLED))
 		mask_irq(irq);
 }
 
@@ -536,7 +568,8 @@ iosapic_version (char __iomem *addr)
 	return iosapic_read(addr, IOSAPIC_VERSION);
 }
 
-static int iosapic_find_sharable_vector (unsigned long trigger, unsigned long pol)
+static int iosapic_find_sharable_vector (unsigned long trigger,
+					 unsigned long pol)
 {
 	int i, vector = -1, min_count = -1;
 	struct iosapic_intr_info *info;
@@ -551,7 +584,8 @@ static int iosapic_find_sharable_vector (unsigned long trigger, unsigned long po
 	for (i = IA64_FIRST_DEVICE_VECTOR; i <= IA64_LAST_DEVICE_VECTOR; i++) {
 		info = &iosapic_intr_info[i];
 		if (info->trigger == trigger && info->polarity == pol &&
-		    (info->dmode == IOSAPIC_FIXED || info->dmode == IOSAPIC_LOWEST_PRIORITY)) {
+		    (info->dmode == IOSAPIC_FIXED || info->dmode ==
+		     IOSAPIC_LOWEST_PRIORITY)) {
 			if (min_count == -1 || info->count < min_count) {
 				vector = i;
 				min_count = info->count;
@@ -575,12 +609,15 @@ iosapic_reassign_vector (int vector)
 		new_vector = assign_irq_vector(AUTO_ASSIGN);
 		if (new_vector < 0)
 			panic("%s: out of interrupt vectors!\n", __FUNCTION__);
-		printk(KERN_INFO "Reassigning vector %d to %d\n", vector, new_vector);
+		printk(KERN_INFO "Reassigning vector %d to %d\n",
+		       vector, new_vector);
 		memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector],
 		       sizeof(struct iosapic_intr_info));
 		INIT_LIST_HEAD(&iosapic_intr_info[new_vector].rtes);
-		list_move(iosapic_intr_info[vector].rtes.next, &iosapic_intr_info[new_vector].rtes);
-		memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
+		list_move(iosapic_intr_info[vector].rtes.next,
+			  &iosapic_intr_info[new_vector].rtes);
+		memset(&iosapic_intr_info[vector], 0,
+		       sizeof(struct iosapic_intr_info));
 		iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
 		INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
 	}
@@ -593,7 +630,8 @@ static struct iosapic_rte_info *iosapic_alloc_rte (void)
 	int preallocated = 0;
 
 	if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) {
-		rte = alloc_bootmem(sizeof(struct iosapic_rte_info) * NR_PREALLOCATE_RTE_ENTRIES);
+		rte = alloc_bootmem(sizeof(struct iosapic_rte_info) *
+				    NR_PREALLOCATE_RTE_ENTRIES);
 		if (!rte)
 			return NULL;
 		for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++)
@@ -601,7 +639,8 @@ static struct iosapic_rte_info *iosapic_alloc_rte (void)
 	}
 
 	if (!list_empty(&free_rte_list)) {
-		rte = list_entry(free_rte_list.next, struct iosapic_rte_info, rte_list);
+		rte = list_entry(free_rte_list.next, struct iosapic_rte_info,
+				 rte_list);
 		list_del(&rte->rte_list);
 		preallocated++;
 	} else {
@@ -644,7 +683,8 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 
 	index = find_iosapic(gsi);
 	if (index < 0) {
-		printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", __FUNCTION__, gsi);
+		printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
+		       __FUNCTION__, gsi);
 		return -ENODEV;
 	}
 
@@ -655,7 +695,8 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 	if (!rte) {
 		rte = iosapic_alloc_rte();
 		if (!rte) {
-			printk(KERN_WARNING "%s: cannot allocate memory\n", __FUNCTION__);
+			printk(KERN_WARNING "%s: cannot allocate memory\n",
+			       __FUNCTION__);
 			return -ENOMEM;
 		}
 
@@ -671,7 +712,9 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 	else if (vector_is_shared(vector)) {
 		struct iosapic_intr_info *info = &iosapic_intr_info[vector];
 		if (info->trigger != trigger || info->polarity != polarity) {
-			printk (KERN_WARNING "%s: cannot override the interrupt\n", __FUNCTION__);
+			printk (KERN_WARNING
+				"%s: cannot override the interrupt\n",
+				__FUNCTION__);
 			return -EINVAL;
 		}
 	}
@@ -691,8 +734,10 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 	idesc = irq_descp(vector);
 	if (idesc->handler != irq_type) {
 		if (idesc->handler != &no_irq_type)
-			printk(KERN_WARNING "%s: changing vector %d from %s to %s\n",
-			       __FUNCTION__, vector, idesc->handler->typename, irq_type->typename);
+			printk(KERN_WARNING
+			       "%s: changing vector %d from %s to %s\n",
+			       __FUNCTION__, vector,
+			       idesc->handler->typename, irq_type->typename);
 		idesc->handler = irq_type;
 	}
 	return 0;
@@ -703,6 +748,7 @@ get_target_cpu (unsigned int gsi, int vector)
 {
 #ifdef CONFIG_SMP
 	static int cpu = -1;
+	extern int cpe_vector;
 
 	/*
 	 * In case of vector shared by multiple RTEs, all RTEs that
@@ -725,6 +771,11 @@ get_target_cpu (unsigned int gsi, int vector)
 	if (!cpu_online(smp_processor_id()))
 		return cpu_physical_id(smp_processor_id());
 
+#ifdef CONFIG_ACPI
+	if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR)
+		return get_cpei_target_cpu();
+#endif
+
 #ifdef CONFIG_NUMA
 	{
 		int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
@@ -747,7 +798,7 @@ get_target_cpu (unsigned int gsi, int vector)
 		if (!num_cpus)
 			goto skip_numa_setup;
 
-		/* Use vector assigment to distribute across cpus in node */
+		/* Use vector assignment to distribute across cpus in node */
 		cpu_index = vector % num_cpus;
 
 		for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++)
@@ -769,7 +820,7 @@ skip_numa_setup:
 	} while (!cpu_online(cpu));
 
 	return cpu_physical_id(cpu);
-#else
+#else  /* CONFIG_SMP */
 	return cpu_physical_id(smp_processor_id());
 #endif
 }
@@ -821,7 +872,8 @@ again:
 			if (list_empty(&iosapic_intr_info[vector].rtes))
 				free_irq_vector(vector);
 			spin_unlock(&iosapic_lock);
-			spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
+			spin_unlock_irqrestore(&irq_descp(vector)->lock,
+					       flags);
 			goto again;
 		}
 
@@ -830,7 +882,8 @@ again:
 			      polarity, trigger);
 		if (err < 0) {
 			spin_unlock(&iosapic_lock);
-			spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
+			spin_unlock_irqrestore(&irq_descp(vector)->lock,
+					       flags);
 			return err;
 		}
 
@@ -872,7 +925,8 @@ iosapic_unregister_intr (unsigned int gsi)
 	 */
 	irq = gsi_to_irq(gsi);
 	if (irq < 0) {
-		printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi);
+		printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n",
+		       gsi);
 		WARN_ON(1);
 		return;
 	}
@@ -883,7 +937,9 @@ iosapic_unregister_intr (unsigned int gsi)
 	spin_lock(&iosapic_lock);
 	{
 		if ((rte = gsi_vector_to_rte(gsi, vector)) == NULL) {
-			printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi);
+			printk(KERN_ERR
+			       "iosapic_unregister_intr(%u) unbalanced\n",
+			       gsi);
 			WARN_ON(1);
 			goto out;
 		}
@@ -893,7 +949,8 @@ iosapic_unregister_intr (unsigned int gsi)
 
 		/* Mask the interrupt */
 		low32 = iosapic_intr_info[vector].low32 | IOSAPIC_MASK;
-		iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index), low32);
+		iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index),
+			      low32);
 
 		/* Remove the rte entry from the list */
 		list_del(&rte->rte_list);
@@ -906,7 +963,9 @@ iosapic_unregister_intr (unsigned int gsi)
 		trigger	 = iosapic_intr_info[vector].trigger;
 		polarity = iosapic_intr_info[vector].polarity;
 		dest     = iosapic_intr_info[vector].dest;
-		printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d unregistered\n",
+		printk(KERN_INFO
+		       "GSI %u (%s, %s) -> CPU %d (0x%04x)"
+		       " vector %d unregistered\n",
 		       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
 		       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
 		       cpu_logical_id(dest), dest, vector);
@@ -919,12 +978,15 @@ iosapic_unregister_intr (unsigned int gsi)
 			idesc->handler = &no_irq_type;
 
 			/* Clear the interrupt information */
-			memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
+			memset(&iosapic_intr_info[vector], 0,
+			       sizeof(struct iosapic_intr_info));
 			iosapic_intr_info[vector].low32 |= IOSAPIC_MASK;
 			INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
 
 			if (idesc->action) {
-				printk(KERN_ERR "interrupt handlers still exist on IRQ %u\n", irq);
+				printk(KERN_ERR
+				       "interrupt handlers still exist on"
+				       "IRQ %u\n", irq);
 				WARN_ON(1);
 			}
 
@@ -939,7 +1001,6 @@ iosapic_unregister_intr (unsigned int gsi)
 
 /*
  * ACPI calls this when it finds an entry for a platform interrupt.
- * Note that the irq_base and IOSAPIC address must be set in iosapic_init().
  */
 int __init
 iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
@@ -973,13 +1034,16 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
 		mask = 1;
 		break;
 	      default:
-		printk(KERN_ERR "iosapic_register_platform_irq(): invalid int type 0x%x\n", int_type);
+		printk(KERN_ERR "%s: invalid int type 0x%x\n", __FUNCTION__,
+		       int_type);
 		return -1;
 	}
 
 	register_intr(gsi, vector, delivery, polarity, trigger);
 
-	printk(KERN_INFO "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
+	printk(KERN_INFO
+	       "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)"
+	       " vector %d\n",
 	       int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown",
 	       int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
 	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
@@ -989,10 +1053,8 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
 	return vector;
 }
 
-
 /*
  * ACPI calls this when it finds an entry for a legacy ISA IRQ override.
- * Note that the gsi_base and IOSAPIC address must be set in iosapic_init().
  */
 void __init
 iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
@@ -1021,7 +1083,8 @@ iosapic_system_init (int system_pcat_compat)
 
 	for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) {
 		iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
-		INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);	/* mark as unused */
+		/* mark as unused */
+		INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
 	}
 
 	pcat_compat = system_pcat_compat;
@@ -1030,10 +1093,12 @@ iosapic_system_init (int system_pcat_compat)
 
 	if (pcat_compat) {
 		/*
-		 * Disable the compatibility mode interrupts (8259 style), needs IN/OUT support
-		 * enabled.
+		 * Disable the compatibility mode interrupts (8259 style),
+		 * needs IN/OUT support enabled.
 		 */
-		printk(KERN_INFO "%s: Disabling PC-AT compatible 8259 interrupts\n", __FUNCTION__);
+		printk(KERN_INFO
+		       "%s: Disabling PC-AT compatible 8259 interrupts\n",
+		       __FUNCTION__);
 		outb(0xff, 0xA1);
 		outb(0xff, 0x21);
 	}
@@ -1073,10 +1138,7 @@ iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver)
 		base = iosapic_lists[index].gsi_base;
 		end  = base + iosapic_lists[index].num_rte - 1;
 
-		if (gsi_base < base && gsi_end < base)
-			continue;/* OK */
-
-		if (gsi_base > end && gsi_end > end)
+		if (gsi_end < base || end < gsi_base)
 			continue; /* OK */
 
 		return -EBUSY;
@@ -1122,12 +1184,14 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
 
 	if ((gsi_base == 0) && pcat_compat) {
 		/*
-		 * Map the legacy ISA devices into the IOSAPIC data.  Some of these may
-		 * get reprogrammed later on with data from the ACPI Interrupt Source
-		 * Override table.
+		 * Map the legacy ISA devices into the IOSAPIC data.  Some of
+		 * these may get reprogrammed later on with data from the ACPI
+		 * Interrupt Source Override table.
 		 */
 		for (isa_irq = 0; isa_irq < 16; ++isa_irq)
-			iosapic_override_isa_irq(isa_irq, isa_irq, IOSAPIC_POL_HIGH, IOSAPIC_EDGE);
+			iosapic_override_isa_irq(isa_irq, isa_irq,
+						 IOSAPIC_POL_HIGH,
+						 IOSAPIC_EDGE);
 	}
 	return 0;
 }
@@ -1150,7 +1214,8 @@ iosapic_remove (unsigned int gsi_base)
 
 		if (iosapic_lists[index].rtes_inuse) {
 			err = -EBUSY;
-			printk(KERN_WARNING "%s: IOSAPIC for GSI base %u is busy\n",
+			printk(KERN_WARNING
+			       "%s: IOSAPIC for GSI base %u is busy\n",
 			       __FUNCTION__, gsi_base);
 			goto out;
 		}
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/patch.c b/linux-2.6-xen-sparse/arch/ia64/kernel/patch.c
index 6e277e494d..ff943fafb7 100644
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/patch.c
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/patch.c
@@ -115,7 +115,7 @@ ia64_patch_vtop (unsigned long start, unsigned long end)
 	ia64_srlz_i();
 }
 
-void
+void __init
 ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
 {
 	static int first_time = 1;
@@ -149,7 +149,7 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
 	ia64_srlz_i();
 }
 
-static void
+static void __init
 patch_fsyscall_table (unsigned long start, unsigned long end)
 {
 	extern unsigned long fsyscall_table[NR_syscalls];
@@ -166,7 +166,7 @@ patch_fsyscall_table (unsigned long start, unsigned long end)
 	ia64_srlz_i();
 }
 
-static void
+static void __init
 patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
 {
 	extern char fsys_bubble_down[];
@@ -251,7 +251,7 @@ patch_brl_in_vdso(void)
 #define patch_brl_in_vdso()			do { } while (0)
 #endif
 
-void
+void __init
 ia64_patch_gate (void)
 {
 #	define START(name)	((unsigned long) __start_gate_##name##_patchlist)
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/perfmon.c b/linux-2.6-xen-sparse/arch/ia64/kernel/perfmon.c
index 2d5a61865d..7db6755b45 100644
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/perfmon.c
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/perfmon.c
@@ -5026,15 +5026,13 @@ abort_locked:
 	if (likely(ctx)) {
 		DPRINT(("context unlocked\n"));
 		UNPROTECT_CTX(ctx, flags);
+		fput(file);
 	}
 
 	/* copy argument back to user, if needed */
 	if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT;
 
 error_args:
-	if (file)
-		fput(file);
-
 	kfree(args_k);
 
 	DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret));
@@ -6813,6 +6811,7 @@ __initcall(pfm_init);
 void
 pfm_init_percpu (void)
 {
+	static int first_time=1;
 	/*
 	 * make sure no measurement is active
 	 * (may inherit programmed PMCs from EFI).
@@ -6825,8 +6824,10 @@ pfm_init_percpu (void)
 	 */
 	pfm_unfreeze_pmu();
 
-	if (smp_processor_id() == 0)
+	if (first_time) {
 		register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
+		first_time=0;
+	}
 
 	ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
 	ia64_srlz_d();
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c
index 4158332846..eef4270d53 100644
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c
@@ -37,11 +37,11 @@
 #include <linux/string.h>
 #include <linux/threads.h>
 #include <linux/tty.h>
+#include <linux/dmi.h>
 #include <linux/serial.h>
 #include <linux/serial_core.h>
 #include <linux/efi.h>
 #include <linux/initrd.h>
-#include <linux/platform.h>
 #include <linux/pm.h>
 #include <linux/cpufreq.h>
 
@@ -150,8 +150,8 @@ EXPORT_SYMBOL(ia64_max_iommu_merge_mask);
 /*
  * We use a special marker for the end of memory and it uses the extra (+1) slot
  */
-struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1];
-int num_rsvd_regions;
+struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata;
+int num_rsvd_regions __initdata;
 
 
 /*
@@ -160,7 +160,7 @@ int num_rsvd_regions;
  * caller-specified function is called with the memory ranges that remain after filtering.
  * This routine does not assume the incoming segments are sorted.
  */
-int
+int __init
 filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
 {
 	unsigned long range_start, range_end, prev_start;
@@ -196,7 +196,7 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
 	return 0;
 }
 
-static void
+static void __init
 sort_regions (struct rsvd_region *rsvd_region, int max)
 {
 	int j;
@@ -237,7 +237,7 @@ __initcall(register_memory);
  * initrd, etc.  There are currently %IA64_MAX_RSVD_REGIONS defined,
  * see include/asm-ia64/meminit.h if you need to define more.
  */
-void
+void __init
 reserve_memory (void)
 {
 	int n = 0;
@@ -298,7 +298,7 @@ reserve_memory (void)
  * Grab the initrd start and end from the boot parameter struct given us by
  * the boot loader.
  */
-void
+void __init
 find_initrd (void)
 {
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -400,7 +400,7 @@ mark_bsp_online (void)
 }
 
 #ifdef CONFIG_SMP
-static void
+static void __init
 check_for_logical_procs (void)
 {
 	pal_logical_to_physical_t info;
@@ -427,6 +427,14 @@ check_for_logical_procs (void)
 }
 #endif
 
+static __initdata int nomca;
+static __init int setup_nomca(char *s)
+{
+	nomca = 1;
+	return 0;
+}
+early_param("nomca", setup_nomca);
+
 void __init
 setup_arch (char **cmdline_p)
 {
@@ -451,35 +459,15 @@ setup_arch (char **cmdline_p)
 	efi_init();
 	io_port_init();
 
+	parse_early_param();
+
 #ifdef CONFIG_IA64_GENERIC
-	{
-		const char *mvec_name = strstr (*cmdline_p, "machvec=");
-		char str[64];
-
-		if (mvec_name) {
-			const char *end;
-			size_t len;
-
-			mvec_name += 8;
-			end = strchr (mvec_name, ' ');
-			if (end)
-				len = end - mvec_name;
-			else
-				len = strlen (mvec_name);
-			len = min(len, sizeof (str) - 1);
-			strncpy (str, mvec_name, len);
-			str[len] = '\0';
-			mvec_name = str;
-		} else
-			mvec_name = acpi_get_sysname();
-		machvec_init(mvec_name);
-	}
+	machvec_init(NULL);
 #endif
 
 	if (early_console_setup(*cmdline_p) == 0)
 		mark_bsp_online();
 
-	parse_early_param();
 #ifdef CONFIG_ACPI
 	/* Initialize the ACPI boot-time table parser */
 	acpi_table_init();
@@ -495,7 +483,7 @@ setup_arch (char **cmdline_p)
 	find_memory();
 
 	/* process SAL system table: */
-	ia64_sal_init(efi.sal_systab);
+	ia64_sal_init(__va(efi.sal_systab));
 
 	ia64_setup_printk_clock();
 
@@ -561,7 +549,7 @@ setup_arch (char **cmdline_p)
 
 
 	/* enable IA-64 Machine Check Abort Handling unless disabled */
-	if (!strstr(saved_command_line, "nomca"))
+	if (!nomca)
 		ia64_mca_init();
 
 	platform_setup(cmdline_p);
@@ -694,7 +682,7 @@ struct seq_operations cpuinfo_op = {
 	.show =		show_cpuinfo
 };
 
-void
+static void __cpuinit
 identify_cpu (struct cpuinfo_ia64 *c)
 {
 	union {
@@ -771,7 +759,7 @@ setup_per_cpu_areas (void)
  * In addition, the minimum of the i-cache stride sizes is calculated for
  * "flush_icache_range()".
  */
-static void
+static void __cpuinit
 get_max_cacheline_size (void)
 {
 	unsigned long line_size, max = 1;
@@ -834,10 +822,10 @@ get_max_cacheline_size (void)
  * cpu_init() initializes state that is per-CPU.  This function acts
  * as a 'CPU state barrier', nothing should get across.
  */
-void
+void __cpuinit
 cpu_init (void)
 {
-	extern void __devinit ia64_mmu_init (void *);
+	extern void __cpuinit ia64_mmu_init (void *);
 	unsigned long num_phys_stacked;
 	pal_vm_info_2_u_t vmi;
 	unsigned int max_ctx;
@@ -974,9 +962,16 @@ void sched_cacheflush(void)
 	ia64_sal_cache_flush(3);
 }
 
-void
+void __init
 check_bugs (void)
 {
 	ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles,
 			       (unsigned long) __end___mckinley_e9_bundles);
 }
+
+static int __init run_dmi_scan(void)
+{
+	dmi_scan_machine();
+	return 0;
+}
+core_initcall(run_dmi_scan);
diff --git a/linux-2.6-xen-sparse/arch/ia64/mm/ioremap.c b/linux-2.6-xen-sparse/arch/ia64/mm/ioremap.c
new file mode 100644
index 0000000000..4eff275155
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/mm/ioremap.c
@@ -0,0 +1,46 @@
+/*
+ * (c) Copyright 2006 Hewlett-Packard Development Company, L.P.
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/compiler.h>
+#include <linux/module.h>
+#include <linux/efi.h>
+#include <asm/io.h>
+
+static inline void __iomem *
+__ioremap (unsigned long offset, unsigned long size)
+{
+	offset = HYPERVISOR_ioremap(offset, size);
+	if (IS_ERR_VALUE(offset))
+		return (void __iomem*)offset;
+	return (void __iomem *) (__IA64_UNCACHED_OFFSET | offset);
+}
+
+void __iomem *
+ioremap (unsigned long offset, unsigned long size)
+{
+	if (efi_mem_attribute_range(offset, size, EFI_MEMORY_WB))
+		return phys_to_virt(offset);
+
+	if (efi_mem_attribute_range(offset, size, EFI_MEMORY_UC))
+		return __ioremap(offset, size);
+
+	/*
+	 * Someday this should check ACPI resources so we
+	 * can do the right thing for hot-plugged regions.
+	 */
+	return __ioremap(offset, size);
+}
+EXPORT_SYMBOL(ioremap);
+
+void __iomem *
+ioremap_nocache (unsigned long offset, unsigned long size)
+{
+	return __ioremap(offset, size);
+}
+EXPORT_SYMBOL(ioremap_nocache);
diff --git a/linux-2.6-xen-sparse/arch/um/kernel/physmem.c b/linux-2.6-xen-sparse/arch/um/kernel/physmem.c
index dece10dd3b..bd4627b4d9 100644
--- a/linux-2.6-xen-sparse/arch/um/kernel/physmem.c
+++ b/linux-2.6-xen-sparse/arch/um/kernel/physmem.c
@@ -9,6 +9,7 @@
 #include "linux/vmalloc.h"
 #include "linux/bootmem.h"
 #include "linux/module.h"
+#include "linux/pfn.h"
 #include "asm/types.h"
 #include "asm/pgtable.h"
 #include "kern_util.h"
@@ -281,7 +282,7 @@ int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem)
 
 	for(i = 0; i < total_pages; i++){
 		p = &map[i];
-		set_page_count(p, 0);
+		memset(p, 0, sizeof(struct page));
 		SetPageReserved(p);
 		INIT_LIST_HEAD(&p->lru);
 	}
@@ -318,8 +319,6 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
 	}
 }
 
-#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-
 extern int __syscall_stub_start, __binary_start;
 
 void setup_physmem(unsigned long start, unsigned long reserve_end,
@@ -410,6 +409,8 @@ unsigned long find_iomem(char *driver, unsigned long *len_out)
 			*len_out = region->size;
 			return(region->virt);
 		}
+
+		region = region->next;
 	}
 
 	return(0);
diff --git a/linux-2.6-xen-sparse/arch/x86_64/Kconfig b/linux-2.6-xen-sparse/arch/x86_64/Kconfig
index 5fcf581612..6aec19e2b0 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/Kconfig
+++ b/linux-2.6-xen-sparse/arch/x86_64/Kconfig
@@ -45,6 +45,10 @@ config RWSEM_GENERIC_SPINLOCK
 config RWSEM_XCHGADD_ALGORITHM
 	bool
 
+config GENERIC_HWEIGHT
+	bool
+	default y
+
 config GENERIC_CALIBRATE_DELAY
 	bool
 	default y
@@ -148,6 +152,11 @@ config X86_L1_CACHE_SHIFT
 	default "7" if GENERIC_CPU || MPSC
 	default "6" if MK8
 
+config X86_INTERNODE_CACHE_BYTES
+	int
+	default "4096" if X86_VSMP
+	default X86_L1_CACHE_BYTES if !X86_VSMP
+
 config X86_TSC
 	bool
 	depends on !X86_64_XEN
@@ -271,6 +280,15 @@ config SCHED_SMT
 	  cost of slightly increased overhead in some places. If unsure say
 	  N here.
 
+config SCHED_MC
+	bool "Multi-core scheduler support"
+	depends on SMP
+	default y
+	help
+	  Multi-core scheduler support improves the CPU scheduler's decision
+	  making when dealing with multi-core CPU chips at a cost of slightly
+	  increased overhead in some places. If unsure say N here.
+
 source "kernel/Kconfig.preempt"
 
 config NUMA
@@ -295,6 +313,11 @@ config K8_NUMA
 	 Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA
 	 instead, which also takes priority if both are compiled in.   
 
+config NODES_SHIFT
+	int
+	default "6"
+	depends on NEED_MULTIPLE_NODES
+
 # Dummy CONFIG option to select ACPI_NUMA from drivers/acpi/Kconfig.
 
 config X86_64_ACPI_NUMA
@@ -346,9 +369,13 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
 	def_bool y
 	depends on NUMA
 
+config OUT_OF_LINE_PFN_TO_PAGE
+	def_bool y
+	depends on DISCONTIGMEM
+
 config NR_CPUS
 	int "Maximum number of CPUs (2-256)"
-	range 2 256
+	range 2 255
 	depends on SMP
 	default "16" if X86_64_XEN
 	default "8"
@@ -368,8 +395,6 @@ config HOTPLUG_CPU
 		can be controlled through /sys/devices/system/cpu/cpu#.
 		Say N if you want to disable CPU hotplug.
 
-config ARCH_ENABLE_MEMORY_HOTPLUG
-	def_bool y
 
 config HPET_TIMER
 	bool
@@ -393,13 +418,15 @@ config GART_IOMMU
 	select SWIOTLB
 	depends on PCI && !X86_64_XEN
 	help
-	  Support the IOMMU. Needed to run systems with more than 3GB of memory
-	  properly with 32-bit PCI devices that do not support DAC (Double Address
-	  Cycle). The IOMMU can be turned off at runtime with the iommu=off parameter.
-	  Normally the kernel will take the right choice by itself.
-	  This option includes a driver for the AMD Opteron/Athlon64 northbridge IOMMU
-	  and a software emulation used on other systems.
-	  If unsure, say Y.
+	  Support for hardware IOMMU in AMD's Opteron/Athlon64 Processors
+	  and for the bounce buffering software IOMMU.
+	  Needed to run systems with more than 3GB of memory properly with
+	  32-bit PCI devices that do not support DAC (Double Address Cycle).
+	  The IOMMU can be turned off at runtime with the iommu=off parameter.
+  	  Normally the kernel will take the right choice by itself.
+  	  This option includes a driver for the AMD Opteron/Athlon64 IOMMU
+  	  northbridge and a software emulation used on other systems without
+	  hardware IOMMU.  If unsure, say Y.
 
 # need this always enabled with GART_IOMMU for the VIA workaround
 config SWIOTLB
@@ -459,10 +486,10 @@ config CRASH_DUMP
 config PHYSICAL_START
 	hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
 	default "0x1000000" if CRASH_DUMP
-	default "0x100000"
+	default "0x200000"
 	help
 	  This gives the physical address where the kernel is loaded. Normally
-	  for regular kernels this value is 0x100000 (1MB). But in the case
+	  for regular kernels this value is 0x200000 (2MB). But in the case
 	  of kexec on panic the fail safe kernel needs to run at a different
 	  address than the panic-ed kernel. This option is used to set the load
 	  address for kernels used to capture crash dump on being kexec'ed
@@ -494,6 +521,14 @@ config SECCOMP
 
 source kernel/Kconfig.hz
 
+config REORDER
+	bool "Function reordering"
+	default n
+	help
+         This option enables the toolchain to reorder functions for a more 
+         optimal TLB usage. If you have pretty much any version of binutils, 
+	 this can increase your kernel build time by roughly one minute.
+
 endmenu
 
 #
@@ -560,16 +595,6 @@ config XEN_PCIDEV_FE_DEBUG
 	help
 	  Enables some debug statements within the PCI Frontend.
 
-config UNORDERED_IO
-       bool "Unordered IO mapping access"
-       depends on EXPERIMENTAL
-       help
-         Use unordered stores to access IO memory mappings in device drivers.
-	 Still very experimental. When a driver works on IA64/ppc64/pa-risc it should
-	 work with this option, but it makes the drivers behave differently
-	 from i386. Requires that the driver writer used memory barriers
-	 properly.
-
 source "drivers/pci/pcie/Kconfig"
 
 source "drivers/pci/Kconfig"
diff --git a/linux-2.6-xen-sparse/arch/x86_64/Makefile b/linux-2.6-xen-sparse/arch/x86_64/Makefile
index 9164bf30a0..4b81d9f4c3 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/Makefile
+++ b/linux-2.6-xen-sparse/arch/x86_64/Makefile
@@ -24,39 +24,41 @@
 LDFLAGS		:= -m elf_x86_64
 OBJCOPYFLAGS	:= -O binary -R .note -R .comment -S
 LDFLAGS_vmlinux :=
-
 CHECKFLAGS      += -D__x86_64__ -m64
 
+cflags-y	:=
 cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
 cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
-CFLAGS += $(cflags-y)
+cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
 
 cppflags-$(CONFIG_XEN) += \
 	-D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION)
 CPPFLAGS += $(cppflags-y)
 
-CFLAGS += -m64
-CFLAGS += -mno-red-zone
-CFLAGS += -mcmodel=kernel
-CFLAGS += -pipe
+cflags-y += -m64
+cflags-y += -mno-red-zone
+cflags-y += -mcmodel=kernel
+cflags-y += -pipe
+cflags-$(CONFIG_REORDER) += -ffunction-sections
 # this makes reading assembly source easier, but produces worse code
 # actually it makes the kernel smaller too.
-CFLAGS += -fno-reorder-blocks	
-CFLAGS += -Wno-sign-compare
+cflags-y += -fno-reorder-blocks
+cflags-y += -Wno-sign-compare
 ifneq ($(CONFIG_UNWIND_INFO),y)
-CFLAGS += -fno-asynchronous-unwind-tables
+cflags-y += -fno-asynchronous-unwind-tables
 endif
 ifneq ($(CONFIG_DEBUG_INFO),y)
 # -fweb shrinks the kernel a bit, but the difference is very small
 # it also messes up debugging, so don't use it for now.
-#CFLAGS += $(call cc-option,-fweb)
+#cflags-y += $(call cc-option,-fweb)
 endif
 # -funit-at-a-time shrinks the kernel .text considerably
 # unfortunately it makes reading oopses harder.
-CFLAGS += $(call cc-option,-funit-at-a-time)
+cflags-y += $(call cc-option,-funit-at-a-time)
 # prevent gcc from generating any FP code by mistake
-CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
+cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
 
+CFLAGS += $(cflags-y)
 AFLAGS += -m64
 
 head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o
@@ -71,8 +73,8 @@ drivers-$(CONFIG_OPROFILE)		+= arch/x86_64/oprofile/
 
 boot := arch/x86_64/boot
 
-.PHONY: bzImage bzlilo install archmrproper \
-	fdimage fdimage144 fdimage288 archclean
+PHONY += bzImage bzlilo install archmrproper \
+	 fdimage fdimage144 fdimage288 isoimage archclean
 
 ifdef CONFIG_XEN
 CPPFLAGS := -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS)
@@ -104,7 +106,7 @@ bzlilo: vmlinux
 bzdisk: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) zdisk
 
-fdimage fdimage144 fdimage288: vmlinux
+fdimage fdimage144 fdimage288 isoimage: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@
 
 install:
@@ -117,11 +119,16 @@ archclean:
 define archhelp
   echo  '* bzImage	- Compressed kernel image (arch/$(ARCH)/boot/bzImage)'
   echo  '  install	- Install kernel using'
-  echo  '                  (your) ~/bin/installkernel or'
-  echo  '                  (distribution) /sbin/installkernel or'
-  echo  '        	  install to $$(INSTALL_PATH) and run lilo'
+  echo  '		   (your) ~/bin/installkernel or'
+  echo  '		   (distribution) /sbin/installkernel or'
+  echo  '		   install to $$(INSTALL_PATH) and run lilo'
+  echo  '  bzdisk       - Create a boot floppy in /dev/fd0'
+  echo  '  fdimage      - Create a boot floppy image'
+  echo  '  isoimage     - Create a boot CD-ROM image'
 endef
 
-CLEAN_FILES += arch/$(ARCH)/boot/fdimage arch/$(ARCH)/boot/mtools.conf
+CLEAN_FILES += arch/$(ARCH)/boot/fdimage \
+	       arch/$(ARCH)/boot/image.iso \
+	       arch/$(ARCH)/boot/mtools.conf
 
 
diff --git a/linux-2.6-xen-sparse/arch/x86_64/ia32/ia32entry-xen.S b/linux-2.6-xen-sparse/arch/x86_64/ia32/ia32entry-xen.S
index 73129e2dac..d9530e45d5 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/ia32/ia32entry-xen.S
+++ b/linux-2.6-xen-sparse/arch/x86_64/ia32/ia32entry-xen.S
@@ -16,7 +16,9 @@
 #include <linux/linkage.h>
 
 #define __XEN_X86_64 1
-	
+
+#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
+
 	.macro IA32_ARG_FIXUP noebp=0
 	movl	%edi,%r8d
 	.if \noebp
@@ -127,8 +129,8 @@ ENTRY(ia32_sysenter_target)
 	CFI_REMEMBER_STATE
 	jnz  sysenter_tracesys
 sysenter_do_call:	
-	cmpl	$(IA32_NR_syscalls),%eax
-	jae	ia32_badsys
+	cmpl	$(IA32_NR_syscalls-1),%eax
+	ja	ia32_badsys
 	IA32_ARG_FIXUP 1
 	call	*ia32_sys_call_table(,%rax,8)
 	movq	%rax,RAX-ARGOFFSET(%rsp)
@@ -231,8 +233,8 @@ ENTRY(ia32_cstar_target)
 	CFI_REMEMBER_STATE
 	jnz   cstar_tracesys
 cstar_do_call:	
-	cmpl $IA32_NR_syscalls,%eax
-	jae  ia32_badsys
+	cmpl $IA32_NR_syscalls-1,%eax
+	ja  ia32_badsys
 	IA32_ARG_FIXUP 1
 	call *ia32_sys_call_table(,%rax,8)
 	movq %rax,RAX-ARGOFFSET(%rsp)
@@ -323,8 +325,8 @@ ENTRY(ia32_syscall)
 	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
 	jnz ia32_tracesys
 ia32_do_syscall:	
-	cmpl $(IA32_NR_syscalls),%eax
-	jae  ia32_badsys
+	cmpl $(IA32_NR_syscalls-1),%eax
+	ja  ia32_badsys
 	IA32_ARG_FIXUP
 	call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
 ia32_sysret:
@@ -528,7 +530,7 @@ ia32_sys_call_table:
 	.quad sys_setdomainname
 	.quad sys_uname
 	.quad sys_modify_ldt
-	.quad sys32_adjtimex
+	.quad compat_sys_adjtimex
 	.quad sys32_mprotect		/* 125 */
 	.quad compat_sys_sigprocmask
 	.quad quiet_ni_syscall		/* create_module */
@@ -712,10 +714,13 @@ ia32_sys_call_table:
 	.quad sys_readlinkat		/* 305 */
 	.quad sys_fchmodat
 	.quad sys_faccessat
-	.quad sys_ni_syscall		/* pselect6 for now */
-	.quad sys_ni_syscall		/* ppoll for now */
+	.quad quiet_ni_syscall		/* pselect6 for now */
+	.quad quiet_ni_syscall		/* ppoll for now */
 	.quad sys_unshare		/* 310 */
+	.quad compat_sys_set_robust_list
+	.quad compat_sys_get_robust_list
+	.quad sys_splice
+	.quad sys_sync_file_range
+	.quad sys_tee
+	.quad compat_sys_vmsplice
 ia32_syscall_end:		
-	.rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8
-		.quad ni_syscall
-	.endr
diff --git a/linux-2.6-xen-sparse/arch/x86_64/ia32/vsyscall-sigreturn.S b/linux-2.6-xen-sparse/arch/x86_64/ia32/vsyscall-sigreturn.S
index 3947f45d08..6086ac6fe6 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/ia32/vsyscall-sigreturn.S
+++ b/linux-2.6-xen-sparse/arch/x86_64/ia32/vsyscall-sigreturn.S
@@ -32,9 +32,28 @@ __kernel_rt_sigreturn:
 	.size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
 
 	.section .eh_frame,"a",@progbits
+.LSTARTFRAMES:
+        .long .LENDCIES-.LSTARTCIES
+.LSTARTCIES:
+	.long 0			/* CIE ID */
+	.byte 1			/* Version number */
+	.string "zRS"		/* NUL-terminated augmentation string */
+	.uleb128 1		/* Code alignment factor */
+	.sleb128 -4		/* Data alignment factor */
+	.byte 8			/* Return address register column */
+	.uleb128 1		/* Augmentation value length */
+	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+	.byte 0x0c		/* DW_CFA_def_cfa */
+	.uleb128 4
+	.uleb128 4
+	.byte 0x88		/* DW_CFA_offset, column 0x8 */
+	.uleb128 1
+	.align 4
+.LENDCIES:
+
 	.long .LENDFDE2-.LSTARTFDE2	/* Length FDE */
 .LSTARTFDE2:
-	.long .LSTARTFDE2-.LSTARTFRAME	/* CIE pointer */
+	.long .LSTARTFDE2-.LSTARTFRAMES	/* CIE pointer */
 	/* HACK: The dwarf2 unwind routines will subtract 1 from the
 	   return address to get an address in the middle of the
 	   presumed call instruction.  Since we didn't get here via
@@ -97,7 +116,7 @@ __kernel_rt_sigreturn:
 
 	.long .LENDFDE3-.LSTARTFDE3	/* Length FDE */
 .LSTARTFDE3:
-	.long .LSTARTFDE3-.LSTARTFRAME	/* CIE pointer */
+	.long .LSTARTFDE3-.LSTARTFRAMES	/* CIE pointer */
 	/* HACK: See above wrt unwind library assumptions.  */
 	.long .LSTART_rt_sigreturn-1-.	/* PC-relative start address */
 	.long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile b/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile
index 5763a45e79..22afa8c03a 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile
@@ -8,7 +8,7 @@ obj-y	:= process.o signal.o entry.o traps.o irq.o \
 		ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
 		x8664_ksyms.o i387.o syscall.o vsyscall.o \
 		setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
-		dmi_scan.o pci-dma.o pci-nommu.o
+		pci-dma.o pci-nommu.o
 
 obj-$(CONFIG_X86_MCE)         += mce.o
 obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel.o
@@ -47,11 +47,10 @@ bootflag-y			+= ../../i386/kernel/bootflag.o
 cpuid-$(subst m,y,$(CONFIG_X86_CPUID))  += ../../i386/kernel/cpuid.o
 topology-y                     += ../../i386/kernel/topology.o
 microcode-$(subst m,y,$(CONFIG_MICROCODE))  += ../../i386/kernel/microcode.o
-intel_cacheinfo-y		+= ../../i386/kernel/cpu/intel_cacheinfo.o
+intel_cacheinfo-y		+= ../../i386/kernel/cpu/intel_cacheinfo-xen.o
 quirks-y			+= ../../i386/kernel/quirks.o
 i8237-y				+= ../../i386/kernel/i8237.o
 msr-$(subst m,y,$(CONFIG_X86_MSR))  += ../../i386/kernel/msr.o
-dmi_scan-y			+= ../../i386/kernel/dmi_scan.o
 
 ifdef CONFIG_XEN
 time-y				+= ../../i386/kernel/time-xen.o
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/acpi/Makefile b/linux-2.6-xen-sparse/arch/x86_64/kernel/acpi/Makefile
index 57b7fe1c11..aa84f6eb98 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/acpi/Makefile
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/acpi/Makefile
@@ -4,7 +4,6 @@ obj-$(CONFIG_ACPI_SLEEP)	+= sleep.o wakeup.o
 
 ifneq ($(CONFIG_ACPI_PROCESSOR),)
 obj-y			+= processor.o
-processor-y		:= ../../../i386/kernel/acpi/processor.o ../../../i386/kernel/acpi/cstate.o
 endif
 
 boot-$(CONFIG_XEN)		:= ../../../i386/kernel/acpi/boot-xen.o
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c
index 3152b914f9..a805584d23 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c
@@ -194,5 +194,5 @@ int __init APIC_init_uniprocessor (void)
 			setup_IO_APIC();
 #endif
 
-	return 0;
+	return 1;
 }
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c
index 37427e6285..22d0d9b72c 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c
@@ -78,6 +78,12 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
 		*addrp = __pa_symbol(&_end);
 		return 1;
 	}
+
+	if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
+		*addrp = ebda_addr + ebda_size;
+		return 1;
+	}
+
 	/* XXX ramdisk image here? */ 
 #else
 	if (last < (table_end<<PAGE_SHIFT)) {
@@ -89,7 +95,12 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
 } 
 
 #ifndef CONFIG_XEN
-int __init e820_mapped(unsigned long start, unsigned long end, unsigned type) 
+/*
+ * This function checks if any part of the range <start,end> is mapped
+ * with type.
+ */
+int __meminit
+e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
 { 
 	int i;
 	for (i = 0; i < e820.nr_map; i++) { 
@@ -104,6 +115,35 @@ int __init e820_mapped(unsigned long start, unsigned long end, unsigned type)
 }
 #endif
 
+/*
+ * This function checks if the entire range <start,end> is mapped with type.
+ *
+ * Note: this function only works correct if the e820 table is sorted and
+ * not-overlapping, which is the case
+ */
+int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type)
+{
+	int i;
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		if (type && ei->type != type)
+			continue;
+		/* is the region (part) in overlap with the current region ?*/
+		if (ei->addr >= end || ei->addr + ei->size <= start)
+			continue;
+
+		/* if the region is at the beginning of <start,end> we move
+		 * start to the end of the region since it's ok until there
+		 */
+		if (ei->addr <= start)
+			start = ei->addr + ei->size;
+		/* if start is now at or beyond end, we're done, full coverage */
+		if (start >= end)
+			return 1; /* we're done */
+	}
+	return 0;
+}
+
 /* 
  * Find a free area in a specific range. 
  */ 
@@ -119,7 +159,7 @@ unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsi
 			addr = start;
 		if (addr > ei->addr + ei->size) 
 			continue; 
-		while (bad_addr(&addr, size) && addr+size < ei->addr + ei->size)
+		while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
 			;
 		last = addr + size;
 		if (last > ei->addr + ei->size)
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/early_printk-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/early_printk-xen.c
index c1eb6b5392..17aab572fa 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/early_printk-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/early_printk-xen.c
@@ -18,13 +18,10 @@
 #define VGABASE		((void __iomem *)0xffffffff800b8000UL)
 #endif
 
-#define MAX_YPOS	max_ypos
-#define MAX_XPOS	max_xpos
-
 static int max_ypos = 25, max_xpos = 80;
 
 #ifndef CONFIG_XEN
-static int current_ypos = 1, current_xpos = 0; 
+static int current_ypos = 25, current_xpos = 0; 
 
 static void early_vga_write(struct console *con, const char *str, unsigned n)
 {
@@ -32,26 +29,26 @@ static void early_vga_write(struct console *con, const char *str, unsigned n)
 	int  i, k, j;
 
 	while ((c = *str++) != '\0' && n-- > 0) {
-		if (current_ypos >= MAX_YPOS) {
+		if (current_ypos >= max_ypos) {
 			/* scroll 1 line up */
-			for (k = 1, j = 0; k < MAX_YPOS; k++, j++) {
-				for (i = 0; i < MAX_XPOS; i++) {
-					writew(readw(VGABASE + 2*(MAX_XPOS*k + i)),
-					       VGABASE + 2*(MAX_XPOS*j + i));
+			for (k = 1, j = 0; k < max_ypos; k++, j++) {
+				for (i = 0; i < max_xpos; i++) {
+					writew(readw(VGABASE+2*(max_xpos*k+i)),
+					       VGABASE + 2*(max_xpos*j + i));
 				}
 			}
-			for (i = 0; i < MAX_XPOS; i++)
-				writew(0x720, VGABASE + 2*(MAX_XPOS*j + i));
-			current_ypos = MAX_YPOS-1;
+			for (i = 0; i < max_xpos; i++)
+				writew(0x720, VGABASE + 2*(max_xpos*j + i));
+			current_ypos = max_ypos-1;
 		}
 		if (c == '\n') {
 			current_xpos = 0;
 			current_ypos++;
 		} else if (c != '\r')  {
 			writew(((0x7 << 8) | (unsigned short) c),
-			       VGABASE + 2*(MAX_XPOS*current_ypos +
+			       VGABASE + 2*(max_xpos*current_ypos +
 						current_xpos++));
-			if (current_xpos >= MAX_XPOS) {
+			if (current_xpos >= max_xpos) {
 				current_xpos = 0;
 				current_ypos++;
 			}
@@ -66,7 +63,7 @@ static struct console early_vga_console = {
 	.index =	-1,
 };
 
-/* Serial functions loosely based on a similar package from Klaus P. Gerlicher */ 
+/* Serial functions loosely based on a similar package from Klaus P. Gerlicher */
 
 static int early_serial_base = 0x3f8;  /* ttyS0 */
 
@@ -86,30 +83,30 @@ static int early_serial_base = 0x3f8;  /* ttyS0 */
 #define DLL             0       /*  Divisor Latch Low         */
 #define DLH             1       /*  Divisor latch High        */
 
-static int early_serial_putc(unsigned char ch) 
-{ 
-	unsigned timeout = 0xffff; 
-	while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) 
+static int early_serial_putc(unsigned char ch)
+{
+	unsigned timeout = 0xffff;
+	while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
 		cpu_relax();
 	outb(ch, early_serial_base + TXR);
 	return timeout ? 0 : -1;
-} 
+}
 
 static void early_serial_write(struct console *con, const char *s, unsigned n)
 {
-	while (*s && n-- > 0) { 
-		early_serial_putc(*s); 
-		if (*s == '\n') 
-			early_serial_putc('\r'); 
-		s++; 
-	} 
-} 
+	while (*s && n-- > 0) {
+		early_serial_putc(*s);
+		if (*s == '\n')
+			early_serial_putc('\r');
+		s++;
+	}
+}
 
 #define DEFAULT_BAUD 9600
 
 static __init void early_serial_init(char *s)
 {
-	unsigned char c; 
+	unsigned char c;
 	unsigned divisor;
 	unsigned baud = DEFAULT_BAUD;
 	char *e;
@@ -118,7 +115,7 @@ static __init void early_serial_init(char *s)
 		++s;
 
 	if (*s) {
-		unsigned port; 
+		unsigned port;
 		if (!strncmp(s,"0x",2)) {
 			early_serial_base = simple_strtoul(s, &e, 16);
 		} else {
@@ -142,16 +139,16 @@ static __init void early_serial_init(char *s)
 	outb(0x3, early_serial_base + MCR);	/* DTR + RTS */
 
 	if (*s) {
-		baud = simple_strtoul(s, &e, 0); 
-		if (baud == 0 || s == e) 
+		baud = simple_strtoul(s, &e, 0);
+		if (baud == 0 || s == e)
 			baud = DEFAULT_BAUD;
-	} 
-	
-	divisor = 115200 / baud; 
-	c = inb(early_serial_base + LCR); 
-	outb(c | DLAB, early_serial_base + LCR); 
-	outb(divisor & 0xff, early_serial_base + DLL); 
-	outb((divisor >> 8) & 0xff, early_serial_base + DLH); 
+	}
+
+	divisor = 115200 / baud;
+	c = inb(early_serial_base + LCR);
+	outb(c | DLAB, early_serial_base + LCR);
+	outb(divisor & 0xff, early_serial_base + DLL);
+	outb((divisor >> 8) & 0xff, early_serial_base + DLH);
 	outb(c & ~DLAB, early_serial_base + LCR);
 }
 
@@ -240,67 +237,70 @@ struct console *early_console = &early_vga_console;
 static int early_console_initialized = 0;
 
 void early_printk(const char *fmt, ...)
-{ 
-	char buf[512]; 
-	int n; 
+{
+	char buf[512];
+	int n;
 	va_list ap;
 
-	va_start(ap,fmt); 
+	va_start(ap,fmt);
 	n = vscnprintf(buf,512,fmt,ap);
 	early_console->write(early_console,buf,n);
-	va_end(ap); 
-} 
+	va_end(ap);
+}
 
 static int __initdata keep_early;
 
-int __init setup_early_printk(char *opt) 
-{  
+int __init setup_early_printk(char *opt)
+{
 	char *space;
-	char buf[256]; 
+	char buf[256];
 
 	if (early_console_initialized)
-		return -1;
+		return 1;
 
-	strlcpy(buf,opt,sizeof(buf)); 
-	space = strchr(buf, ' '); 
+	strlcpy(buf,opt,sizeof(buf));
+	space = strchr(buf, ' ');
 	if (space)
-		*space = 0; 
+		*space = 0;
 
 	if (strstr(buf,"keep"))
-		keep_early = 1; 
+		keep_early = 1;
 
-	if (!strncmp(buf, "serial", 6)) { 
+	if (!strncmp(buf, "serial", 6)) {
 		early_serial_init(buf + 6);
 		early_console = &early_serial_console;
-	} else if (!strncmp(buf, "ttyS", 4)) { 
+	} else if (!strncmp(buf, "ttyS", 4)) {
 		early_serial_init(buf);
-		early_console = &early_serial_console;		
+		early_console = &early_serial_console;
 	} else if (!strncmp(buf, "vga", 3)
 	           && SCREEN_INFO.orig_video_isVGA == 1) {
 		max_xpos = SCREEN_INFO.orig_video_cols;
 		max_ypos = SCREEN_INFO.orig_video_lines;
-		early_console = &early_vga_console; 
+#ifndef CONFIG_XEN
+		current_ypos = SCREEN_INFO.orig_y;
+#endif
+		early_console = &early_vga_console;
  	} else if (!strncmp(buf, "simnow", 6)) {
  		simnow_init(buf + 6);
  		early_console = &simnow_console;
  		keep_early = 1;
 	}
 	early_console_initialized = 1;
-	register_console(early_console);       
+	register_console(early_console);
 	return 0;
 }
 
 void __init disable_early_printk(void)
-{ 
+{
 	if (!early_console_initialized || !early_console)
 		return;
 	if (!keep_early) {
 		printk("disabling early console\n");
 		unregister_console(early_console);
 		early_console_initialized = 0;
-	} else { 
+	} else {
 		printk("keeping early console\n");
 	}
-} 
+}
 
 __setup("earlyprintk=", setup_early_printk);
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S
index 687c486878..0de11d6c6c 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S
@@ -868,7 +868,7 @@ ENTRY(failsafe_callback)
 	movq (%rsp),%rcx
 	movq 8(%rsp),%r11
 	addq $0x30,%rsp
-	movq $-9999,%rdi	/* better code? */
+	movq $11,%rdi	/* SIGSEGV */
 	jmp do_exit			
 1:	/* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
 	movq (%rsp),%rcx
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S b/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S
index 0de0341a5f..c2446d0fa9 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S
@@ -79,9 +79,6 @@ NEXT_PAGE(level3_user_pgt)
 NEXT_PAGE(level2_kernel_pgt)
 	.fill	512,8,0
 
-NEXT_PAGE(empty_zero_page)
-	.skip PAGE_SIZE
-
 NEXT_PAGE(hypercall_page)
 	.fill	512,8,0
 
@@ -92,7 +89,7 @@ NEXT_PAGE(hypercall_page)
 	.align 16
 	.globl cpu_gdt_descr
 cpu_gdt_descr:
-	.word	gdt_end-cpu_gdt_table
+	.word	gdt_end-cpu_gdt_table-1
 gdt:
 	.quad	cpu_gdt_table
 #ifdef CONFIG_SMP
@@ -134,6 +131,11 @@ gdt_end:
 	/* zero the remaining page */
 	.fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0
 
+	.section .bss.page_aligned, "aw", @nobits
+	.align PAGE_SIZE
+ENTRY(empty_zero_page)
+	.skip PAGE_SIZE
+
 #ifdef CONFIG_XEN_COMPAT_030002
 /*
  * __xen_guest information
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c
index 1fb4c8f112..3abbf20696 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c
@@ -51,7 +51,7 @@ static int no_timer_check;
 int disable_timer_pin_1 __initdata;
 
 #ifndef CONFIG_XEN
-int timer_over_8254 __initdata = 1;
+int timer_over_8254 __initdata = 0;
 
 /* Where if anywhere is the i8259 connect in external int mode */
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
@@ -321,6 +321,18 @@ __setup("enable_8254_timer", setup_enable_8254_timer);
 #include <linux/pci_ids.h>
 #include <linux/pci.h>
 
+
+#ifdef CONFIG_ACPI
+
+static int nvidia_hpet_detected __initdata;
+
+static int __init nvidia_hpet_check(unsigned long phys, unsigned long size)
+{
+	nvidia_hpet_detected = 1;
+	return 0;
+}
+#endif
+
 /* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC
    off. Check for an Nvidia or VIA PCI bridge and turn it off.
    Use pci direct infrastructure because this runs before the PCI subsystem. 
@@ -360,18 +372,26 @@ void __init check_ioapic(void)
 					     force_iommu) &&
 					    !iommu_aperture_allowed) {
 						printk(KERN_INFO
-    "Looks like a VIA chipset. Disabling IOMMU. Overwrite with \"iommu=allowed\"\n");
+    "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n");
 						iommu_aperture_disabled = 1;
 					}
 #endif
 					return;
 				case PCI_VENDOR_ID_NVIDIA:
 #ifdef CONFIG_ACPI
-					/* All timer overrides on Nvidia
-				           seem to be wrong. Skip them. */
-					acpi_skip_timer_override = 1;
-					printk(KERN_INFO 
-	     "Nvidia board detected. Ignoring ACPI timer override.\n");
+					/*
+					 * All timer overrides on Nvidia are
+					 * wrong unless HPET is enabled.
+					 */
+					nvidia_hpet_detected = 0;
+					acpi_table_parse(ACPI_HPET,
+							nvidia_hpet_check);
+					if (nvidia_hpet_detected == 0) {
+						acpi_skip_timer_override = 1;
+						printk(KERN_INFO "Nvidia board "
+						    "detected. Ignoring ACPI "
+						    "timer override.\n");
+					}
 #endif
 					/* RED-PEN skip them on mptables too? */
 					return;
@@ -1849,6 +1869,8 @@ static inline void unlock_ExtINT_logic(void)
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
+int timer_uses_ioapic_pin_0;
+
 /*
  * This code may look a bit paranoid, but it's supposed to cooperate with
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
@@ -1886,6 +1908,9 @@ static inline void check_timer(void)
 	pin2  = ioapic_i8259.pin;
 	apic2 = ioapic_i8259.apic;
 
+	if (pin1 == 0)
+		timer_uses_ioapic_pin_0 = 1;
+
 	apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
 		vector, apic1, pin1, apic2, pin2);
 
@@ -1920,7 +1945,7 @@ static inline void check_timer(void)
 		 */
 		setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
 		if (timer_irq_works()) {
-			printk("works.\n");
+			apic_printk(APIC_VERBOSE," works.\n");
 			nmi_watchdog_default();
 			if (nmi_watchdog == NMI_IO_APIC) {
 				setup_nmi();
@@ -1932,7 +1957,7 @@ static inline void check_timer(void)
 		 */
 		clear_IO_APIC_pin(apic2, pin2);
 	}
-	printk(" failed.\n");
+	apic_printk(APIC_VERBOSE," failed.\n");
 
 	if (nmi_watchdog == NMI_IO_APIC) {
 		printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
@@ -1947,7 +1972,7 @@ static inline void check_timer(void)
 	enable_8259A_irq(0);
 
 	if (timer_irq_works()) {
-		apic_printk(APIC_QUIET, " works.\n");
+		apic_printk(APIC_VERBOSE," works.\n");
 		return;
 	}
 	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
@@ -1970,6 +1995,7 @@ static inline void check_timer(void)
 }
 #else
 #define check_timer() ((void)0)
+int timer_uses_ioapic_pin_0 = 0;
 #endif /* !CONFIG_XEN */
 
 static int __init notimercheck(char *s)
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/irq-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/irq-xen.c
index 268f8cd6dc..3068ce26e2 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/irq-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/irq-xen.c
@@ -38,9 +38,8 @@ int show_interrupts(struct seq_file *p, void *v)
 
 	if (i == 0) {
 		seq_printf(p, "           ");
-		for (j=0; j<NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "CPU%d       ",j);
+		for_each_online_cpu(j)
+			seq_printf(p, "CPU%d       ",j);
 		seq_putc(p, '\n');
 	}
 
@@ -53,10 +52,8 @@ int show_interrupts(struct seq_file *p, void *v)
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-		for (j=0; j<NR_CPUS; j++)
-			if (cpu_online(j))
-			seq_printf(p, "%10u ",
-				kstat_cpu(j).irqs[i]);
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
 		seq_printf(p, " %14s", irq_desc[i].handler->typename);
 
@@ -68,15 +65,13 @@ skip:
 		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 	} else if (i == NR_IRQS) {
 		seq_printf(p, "NMI: ");
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
 		seq_putc(p, '\n');
 #ifdef CONFIG_X86_LOCAL_APIC
 		seq_printf(p, "LOC: ");
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
 		seq_putc(p, '\n');
 #endif
 		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c
index a5f74372ae..a43ca1e1be 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c
@@ -107,11 +107,11 @@ static int __init mpf_checksum(unsigned char *mp, int len)
 }
 
 #ifndef CONFIG_XEN
-static void __init MP_processor_info (struct mpc_config_processor *m)
+static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
 {
 	int cpu;
 	unsigned char ver;
-	static int found_bsp=0;
+	cpumask_t tmp_map;
 
 	if (!(m->mpc_cpuflag & CPU_ENABLED)) {
 		disabled_cpus++;
@@ -134,8 +134,10 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
 		return;
 	}
 
-	cpu = num_processors++;
-	
+	num_processors++;
+	cpus_complement(tmp_map, cpu_present_map);
+	cpu = first_cpu(tmp_map);
+
 #if MAX_APICS < 255	
 	if ((int)m->mpc_apicid > MAX_APICS) {
 		printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
@@ -161,12 +163,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
  		 * entry is BSP, and so on.
  		 */
 		cpu = 0;
-
- 		bios_cpu_apicid[0] = m->mpc_apicid;
- 		x86_cpu_to_apicid[0] = m->mpc_apicid;
- 		found_bsp = 1;
- 	} else
-		cpu = num_processors - found_bsp;
+ 	}
 	bios_cpu_apicid[cpu] = m->mpc_apicid;
 	x86_cpu_to_apicid[cpu] = m->mpc_apicid;
 
@@ -697,7 +694,7 @@ void __init mp_register_lapic_address (
 }
 
 
-void __init mp_register_lapic (
+void __cpuinit mp_register_lapic (
 	u8			id, 
 	u8			enabled)
 {
@@ -981,7 +978,17 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
 		 */
 		int irq = gsi;
 		if (gsi < MAX_GSI_NUM) {
-			if (gsi > 15)
+			/*
+			 * Retain the VIA chipset work-around (gsi > 15), but
+			 * avoid a problem where the 8254 timer (IRQ0) is setup
+			 * via an override (so it's not on pin 0 of the ioapic),
+			 * and at the same time, the pin 0 interrupt is a PCI
+			 * type.  The gsi > 15 test could cause these two pins
+			 * to be shared as IRQ0, and they are not shareable.
+			 * So test for this condition, and if necessary, avoid
+			 * the pin collision.
+			 */
+			if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
 				gsi = pci_irq++;
 			/*
 			 * Don't assign IRQ used by ACPI SCI
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c
index 8267a772b7..c1642b033a 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c
@@ -38,8 +38,8 @@
 #include <linux/ptrace.h>
 #include <linux/utsname.h>
 #include <linux/random.h>
-#include <linux/kprobes.h>
 #include <linux/notifier.h>
+#include <linux/kprobes.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -75,24 +75,17 @@ EXPORT_SYMBOL(boot_option_idle_override);
 void (*pm_idle)(void);
 static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
 
-static struct notifier_block *idle_notifier;
-static DEFINE_SPINLOCK(idle_notifier_lock);
+static ATOMIC_NOTIFIER_HEAD(idle_notifier);
 
 void idle_notifier_register(struct notifier_block *n)
 {
-	unsigned long flags;
-	spin_lock_irqsave(&idle_notifier_lock, flags);
-	notifier_chain_register(&idle_notifier, n);
-	spin_unlock_irqrestore(&idle_notifier_lock, flags);
+	atomic_notifier_chain_register(&idle_notifier, n);
 }
 EXPORT_SYMBOL_GPL(idle_notifier_register);
 
 void idle_notifier_unregister(struct notifier_block *n)
 {
-	unsigned long flags;
-	spin_lock_irqsave(&idle_notifier_lock, flags);
-	notifier_chain_unregister(&idle_notifier, n);
-	spin_unlock_irqrestore(&idle_notifier_lock, flags);
+	atomic_notifier_chain_unregister(&idle_notifier, n);
 }
 EXPORT_SYMBOL(idle_notifier_unregister);
 
@@ -102,13 +95,13 @@ static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE;
 void enter_idle(void)
 {
 	__get_cpu_var(idle_state) = CPU_IDLE;
-	notifier_call_chain(&idle_notifier, IDLE_START, NULL);
+	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
 }
 
 static void __exit_idle(void)
 {
 	__get_cpu_var(idle_state) = CPU_NOT_IDLE;
-	notifier_call_chain(&idle_notifier, IDLE_END, NULL);
+	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
 }
 
 /* Called from interrupts to signify idle end */
@@ -309,13 +302,6 @@ void exit_thread(void)
 	struct task_struct *me = current;
 	struct thread_struct *t = &me->thread;
 
-	/*
-	 * Remove function-return probe instances associated with this task
-	 * and put them back on the free list. Do not insert an exit probe for
-	 * this function, it will be disabled by kprobe_flush_task if you do.
-	 */
-	kprobe_flush_task(me);
-
 	if (me->thread.io_bitmap_ptr) { 
 #ifndef CONFIG_X86_NO_TSS
 		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
@@ -512,7 +498,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 * multicall to indicate FPU task switch, rather than
 	 * synchronously trapping to Xen.
 	 * This must be here to ensure both math_state_restore() and
-	 * kernel_fpu_begin() work consistently.
+	 * kernel_fpu_begin() work consistently. 
 	 * The AMD workaround requires it to be after DS reload, or
 	 * after DS has been cleared, which we do in __prepare_arch_switch.
 	 */
@@ -591,7 +577,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 		HYPERVISOR_set_segment_base(SEGBASE_GS_USER, next->gs); 
 
 	/* 
-	 * Switch the PDA context.
+	 * Switch the PDA and FPU contexts.
 	 */
 	prev->userrsp = read_pda(oldrsp); 
 	write_pda(oldrsp, next->userrsp); 
@@ -781,10 +767,16 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 	}
 	case ARCH_GET_GS: { 
 		unsigned long base;
+		unsigned gsindex;
 		if (task->thread.gsindex == GS_TLS_SEL)
 			base = read_32bit_tls(task, GS_TLS);
-		else if (doit)
-			rdmsrl(MSR_KERNEL_GS_BASE, base);
+		else if (doit) {
+ 			asm("movl %%gs,%0" : "=r" (gsindex));
+			if (gsindex)
+				rdmsrl(MSR_KERNEL_GS_BASE, base);
+			else
+				base = task->thread.gs;
+		}
 		else
 			base = task->thread.gs;
 		ret = put_user(base, (unsigned long __user *)addr); 
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
index 43e53c88e4..5411f289ab 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
@@ -46,6 +46,7 @@
 #include <linux/cpufreq.h>
 #include <linux/dmi.h>
 #include <linux/dma-mapping.h>
+#include <linux/ctype.h>
 
 #include <asm/mtrr.h>
 #include <asm/uaccess.h>
@@ -67,6 +68,7 @@
 #include <asm/swiotlb.h>
 #include <asm/sections.h>
 #include <asm/gart-mapping.h>
+#include <asm/dmi.h>
 #ifdef CONFIG_XEN
 #include <linux/percpu.h>
 #include <xen/interface/physdev.h>
@@ -137,6 +139,12 @@ int bootloader_type;
 
 unsigned long saved_video_mode;
 
+/* 
+ * Early DMI memory
+ */
+int dmi_alloc_index;
+char dmi_alloc_data[DMI_MAX_DATA];
+
 /*
  * Setup options
  */
@@ -325,6 +333,13 @@ static void __init probe_roms(void)
 	}
 }
 
+/* Check for full argument with no trailing characters */
+static int fullarg(char *p, char *arg)
+{
+	int l = strlen(arg);
+	return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l]));
+}
+
 static __init void parse_cmdline_early (char ** cmdline_p)
 {
 	char c = ' ', *to = command_line, *from = COMMAND_LINE;
@@ -348,10 +363,10 @@ static __init void parse_cmdline_early (char ** cmdline_p)
 #endif
 #ifdef CONFIG_ACPI
 		/* "acpi=off" disables both ACPI table parsing and interpreter init */
-		if (!memcmp(from, "acpi=off", 8))
+		if (fullarg(from,"acpi=off"))
 			disable_acpi();
 
-		if (!memcmp(from, "acpi=force", 10)) { 
+		if (fullarg(from, "acpi=force")) { 
 			/* add later when we do DMI horrors: */
 			acpi_force = 1;
 			acpi_disabled = 0;
@@ -359,48 +374,45 @@ static __init void parse_cmdline_early (char ** cmdline_p)
 
 		/* acpi=ht just means: do ACPI MADT parsing 
 		   at bootup, but don't enable the full ACPI interpreter */
-		if (!memcmp(from, "acpi=ht", 7)) { 
+		if (fullarg(from, "acpi=ht")) { 
 			if (!acpi_force)
 				disable_acpi();
 			acpi_ht = 1; 
 		}
-                else if (!memcmp(from, "pci=noacpi", 10)) 
+                else if (fullarg(from, "pci=noacpi")) 
 			acpi_disable_pci();
-		else if (!memcmp(from, "acpi=noirq", 10))
+		else if (fullarg(from, "acpi=noirq"))
 			acpi_noirq_set();
 
-		else if (!memcmp(from, "acpi_sci=edge", 13))
+		else if (fullarg(from, "acpi_sci=edge"))
 			acpi_sci_flags.trigger =  1;
-		else if (!memcmp(from, "acpi_sci=level", 14))
+		else if (fullarg(from, "acpi_sci=level"))
 			acpi_sci_flags.trigger = 3;
-		else if (!memcmp(from, "acpi_sci=high", 13))
+		else if (fullarg(from, "acpi_sci=high"))
 			acpi_sci_flags.polarity = 1;
-		else if (!memcmp(from, "acpi_sci=low", 12))
+		else if (fullarg(from, "acpi_sci=low"))
 			acpi_sci_flags.polarity = 3;
 
 		/* acpi=strict disables out-of-spec workarounds */
-		else if (!memcmp(from, "acpi=strict", 11)) {
+		else if (fullarg(from, "acpi=strict")) {
 			acpi_strict = 1;
 		}
 #ifdef CONFIG_X86_IO_APIC
-		else if (!memcmp(from, "acpi_skip_timer_override", 24))
+		else if (fullarg(from, "acpi_skip_timer_override"))
 			acpi_skip_timer_override = 1;
 #endif
 #endif
 
 #ifndef CONFIG_XEN
-		if (!memcmp(from, "nolapic", 7) ||
-		    !memcmp(from, "disableapic", 11))
+		if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) {
+			clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
 			disable_apic = 1;
+		}
 
-		/* Don't confuse with noapictimer */
-		if (!memcmp(from, "noapic", 6) &&
-			(from[6] == ' ' || from[6] == 0))
+		if (fullarg(from, "noapic"))
 			skip_ioapic_setup = 1;
 
-		/* Make sure to not confuse with apic= */
-		if (!memcmp(from, "apic", 4) &&
-			(from[4] == ' ' || from[4] == 0)) {
+		if (fullarg(from,"apic")) {
 			skip_ioapic_setup = 0;
 			ioapic_force = 1;
 		}
@@ -440,7 +452,7 @@ static __init void parse_cmdline_early (char ** cmdline_p)
 			iommu_setup(from+6); 
 		}
 
-		if (!memcmp(from,"oops=panic", 10))
+		if (fullarg(from,"oops=panic"))
 			panic_on_oops = 1;
 
 		if (!memcmp(from, "noexec=", 7))
@@ -591,7 +603,7 @@ void __init alternative_instructions(void)
 static int __init noreplacement_setup(char *s)
 { 
      no_replacement = 1; 
-     return 0; 
+     return 1;
 } 
 
 __setup("noreplacement", noreplacement_setup); 
@@ -621,17 +633,28 @@ static inline void copy_edd(void)
 
 #ifndef CONFIG_XEN
 #define EBDA_ADDR_POINTER 0x40E
-static void __init reserve_ebda_region(void)
+
+unsigned __initdata ebda_addr;
+unsigned __initdata ebda_size;
+
+static void discover_ebda(void)
 {
-	unsigned int addr;
-	/** 
+	/*
 	 * there is a real-mode segmented pointer pointing to the 
 	 * 4K EBDA area at 0x40E
 	 */
-	addr = *(unsigned short *)phys_to_virt(EBDA_ADDR_POINTER);
-	addr <<= 4;
-	if (addr)
-		reserve_bootmem_generic(addr, PAGE_SIZE);
+	ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER;
+	ebda_addr <<= 4;
+
+	ebda_size = *(unsigned short *)(unsigned long)ebda_addr;
+
+	/* Round EBDA up to pages */
+	if (ebda_size == 0)
+		ebda_size = 1;
+	ebda_size <<= 10;
+	ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
+	if (ebda_size > 64*1024)
+		ebda_size = 64*1024;
 }
 #endif
 
@@ -642,7 +665,7 @@ void __init setup_arch(char **cmdline_p)
 
 #ifdef CONFIG_XEN
 	/* Register a call for panic conditions. */
-	notifier_chain_register(&panic_notifier_list, &xen_panic_block);
+	atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
 
  	ROOT_DEV = MKDEV(RAMDISK_MAJOR,0); 
 	kernel_end = 0;		/* dummy */
@@ -726,11 +749,18 @@ void __init setup_arch(char **cmdline_p)
 	 * we are rounding upwards:
 	 */
 	end_pfn = e820_end_of_ram();
+	num_physpages = end_pfn;		/* for pfn_valid */
 
 	check_efer();
 
+#ifndef CONFIG_XEN
+	discover_ebda();
+#endif
+
 	init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
 
+	/* dmi_scan_machine(); */
+
 #ifdef CONFIG_ACPI_NUMA
 	/*
 	 * Parse SRAT to discover nodes.
@@ -763,7 +793,8 @@ void __init setup_arch(char **cmdline_p)
 	reserve_bootmem_generic(0, PAGE_SIZE);
 
 	/* reserve ebda region */
-	reserve_ebda_region();
+	if (ebda_addr)
+		reserve_bootmem_generic(ebda_addr, ebda_size);
 #endif
 
 #ifdef CONFIG_SMP
@@ -898,6 +929,12 @@ void __init setup_arch(char **cmdline_p)
 
 	zap_low_mappings(0);
 
+	/*
+	 * set this early, so we dont allocate cpu0
+	 * if MADT list doesnt list BSP first
+	 * mpparse.c/MP_processor_info() allocates logical cpu numbers.
+	 */
+	cpu_set(0, cpu_present_map);
 #ifdef CONFIG_ACPI
 	/*
 	 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
@@ -1092,7 +1129,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
 	unsigned bits;
 #ifdef CONFIG_NUMA
 	int node = 0;
-	unsigned apicid = phys_proc_id[cpu];
+	unsigned apicid = hard_smp_processor_id();
 #endif
 
 	bits = 0;
@@ -1102,7 +1139,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
 	/* Low order bits define the core id (index of core in socket) */
 	cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1);
 	/* Convert the APIC ID into the socket ID */
-	phys_proc_id[cpu] >>= bits;
+	phys_proc_id[cpu] = phys_pkg_id(bits);
 
 #ifdef CONFIG_NUMA
   	node = phys_proc_id[cpu];
@@ -1128,8 +1165,8 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
  	}
 	numa_set_node(cpu, node);
 
-  	printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
-  			cpu, c->x86_max_cores, node, cpu_core_id[cpu]);
+  	printk(KERN_INFO "CPU %d/%x(%d) -> Node %d -> Core %d\n",
+  			cpu, apicid, c->x86_max_cores, node, cpu_core_id[cpu]);
 #endif
 #endif
 }
@@ -1187,8 +1224,6 @@ static int __init init_amd(struct cpuinfo_x86 *c)
 
 	if (c->extended_cpuid_level >= 0x80000008) {
 		c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
-		if (c->x86_max_cores & (c->x86_max_cores - 1))
-			c->x86_max_cores = 1;
 
 		amd_detect_cmp(c);
 	}
@@ -1205,7 +1240,6 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 
 	cpuid(1, &eax, &ebx, &ecx, &edx);
 
-	c->apicid = phys_pkg_id(0);
 
 	if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
 		return;
@@ -1275,7 +1309,7 @@ static void srat_detect_node(void)
 	   for now. */
 	node = apicid_to_node[hard_smp_processor_id()];
 	if (node == NUMA_NO_NODE)
-		node = 0;
+		node = first_node(node_online_map);
 	numa_set_node(cpu, node);
 
 	if (acpi_numa > 0)
@@ -1414,6 +1448,10 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 			c->x86_capability[2] = cpuid_edx(0x80860001);
 	}
 
+#ifdef CONFIG_X86_XEN_GENAPIC
+	c->apicid = phys_pkg_id(0);
+#endif
+
 	/*
 	 * Vendor-specific initialization.  In this section we
 	 * canonicalize the feature flags, meaning if there are
@@ -1521,7 +1559,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 
 		/* Intel-defined (#2) */
-		"pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est",
+		"pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
 		"tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -1604,8 +1642,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	{ 
 		int i; 
 		for ( i = 0 ; i < 32*NCAPINTS ; i++ )
-			if ( test_bit(i, &c->x86_capability) &&
-			     x86_cap_flags[i] != NULL )
+			if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
 				seq_printf(m, " %s", x86_cap_flags[i]);
 	}
 		
@@ -1670,3 +1707,22 @@ static int __init run_dmi_scan(void)
 }
 core_initcall(run_dmi_scan);
 
+#ifdef CONFIG_INPUT_PCSPKR
+#include <linux/platform_device.h>
+static __init int add_pcspkr(void)
+{
+	struct platform_device *pd;
+	int ret;
+
+	pd = platform_device_alloc("pcspkr", -1);
+	if (!pd)
+		return -ENOMEM;
+
+	ret = platform_device_add(pd);
+	if (ret)
+		platform_device_put(pd);
+
+	return ret;
+}
+device_initcall(add_pcspkr);
+#endif
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c
index 0c72b9b369..6dd154b049 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c
@@ -41,7 +41,7 @@ struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
 struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
 
 #ifndef CONFIG_X86_NO_IDT
-struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; 
+struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; 
 #endif
 
 char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
@@ -64,11 +64,11 @@ int __init nonx_setup(char *str)
 		do_not_nx = 1;
 		__supported_pte_mask &= ~_PAGE_NX;
         }
-	return 0;
+	return 1;
 } 
 __setup("noexec=", nonx_setup);	/* parsed early actually */
 
-int force_personality32 = READ_IMPLIES_EXEC;
+int force_personality32 = 0; 
 
 /* noexec32=on|off
 Control non executable heap for 32bit processes.
@@ -83,7 +83,7 @@ static int __init nonx32_setup(char *str)
 		force_personality32 &= ~READ_IMPLIES_EXEC;
 	else if (!strcmp(str, "off"))
 		force_personality32 |= READ_IMPLIES_EXEC;
-	return 0;
+	return 1;
 }
 __setup("noexec32=", nonx32_setup);
 
@@ -311,7 +311,7 @@ void __cpuinit cpu_init (void)
 		switch (v + 1) {
 #if DEBUG_STKSZ > EXCEPTION_STKSZ
 		case DEBUG_STACK:
-			cpu_pda[cpu].debugstack = (unsigned long)estacks;
+			cpu_pda(cpu)->debugstack = (unsigned long)estacks;
 			estacks += DEBUG_STKSZ;
 			break;
 #endif
@@ -349,12 +349,12 @@ void __cpuinit cpu_init (void)
 	 * Clear all 6 debug registers:
 	 */
 
-	set_debug(0UL, 0);
-	set_debug(0UL, 1);
-	set_debug(0UL, 2);
-	set_debug(0UL, 3);
-	set_debug(0UL, 6);
-	set_debug(0UL, 7);
+	set_debugreg(0UL, 0);
+	set_debugreg(0UL, 1);
+	set_debugreg(0UL, 2);
+	set_debugreg(0UL, 3);
+	set_debugreg(0UL, 6);
+	set_debugreg(0UL, 7);
 
 	fpu_init(); 
 }
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c
index 0406973b1d..1b9fad1bde 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c
@@ -80,7 +80,7 @@ static inline void leave_mm(unsigned long cpu)
 {
 	if (read_pda(mmu_state) == TLBSTATE_OK)
 		BUG();
-	clear_bit(cpu, &read_pda(active_mm)->cpu_vm_mask);
+	cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
 	load_cr3(swapper_pg_dir);
 }
 
@@ -91,7 +91,7 @@ static inline void leave_mm(unsigned long cpu)
  * [cpu0: the cpu that switches]
  * 1) switch_mm() either 1a) or 1b)
  * 1a) thread switch to a different mm
- * 1a1) clear_bit(cpu, &old_mm->cpu_vm_mask);
+ * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
  * 	Stop ipi delivery for the old mm. This is not synchronized with
  * 	the other cpus, but smp_invalidate_interrupt ignore flush ipis
  * 	for the wrong mm, and in the worst case we perform a superfluous
@@ -101,7 +101,7 @@ static inline void leave_mm(unsigned long cpu)
  *	was in lazy tlb mode.
  * 1a3) update cpu active_mm
  * 	Now cpu0 accepts tlb flushes for the new mm.
- * 1a4) set_bit(cpu, &new_mm->cpu_vm_mask);
+ * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
  * 	Now the other cpus will send tlb flush ipis.
  * 1a4) change cr3.
  * 1b) thread switch without mm change
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c
index 653af67c08..9e05e9835b 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c
@@ -48,10 +48,6 @@
 #include <asm/proto.h>
 #include <asm/nmi.h>
 
-#ifndef CONFIG_X86_NO_IDT
-extern struct gate_struct idt_table[256]; 
-#endif
-
 asmlinkage void divide_error(void);
 asmlinkage void debug(void);
 asmlinkage void nmi(void);
@@ -74,18 +70,20 @@ asmlinkage void alignment_check(void);
 asmlinkage void machine_check(void);
 asmlinkage void spurious_interrupt_bug(void);
 
-struct notifier_block *die_chain;
-static DEFINE_SPINLOCK(die_notifier_lock);
+ATOMIC_NOTIFIER_HEAD(die_chain);
 
 int register_die_notifier(struct notifier_block *nb)
 {
-	int err = 0;
-	unsigned long flags;
-	spin_lock_irqsave(&die_notifier_lock, flags);
-	err = notifier_chain_register(&die_chain, nb);
-	spin_unlock_irqrestore(&die_notifier_lock, flags);
-	return err;
+	vmalloc_sync_all();
+	return atomic_notifier_chain_register(&die_chain, nb);
+}
+EXPORT_SYMBOL(register_die_notifier);
+
+int unregister_die_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&die_chain, nb);
 }
+EXPORT_SYMBOL(unregister_die_notifier);
 
 static inline void conditional_sti(struct pt_regs *regs)
 {
@@ -104,6 +102,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
 {
 	if (regs->eflags & X86_EFLAGS_IF)
 		local_irq_disable();
+	/* Make sure to not schedule here because we could be running
+	   on an exception stack. */
 	preempt_enable_no_resched();
 }
 
@@ -125,7 +125,7 @@ int printk_address(unsigned long address)
 	if (!modname) 
 		modname = delim = ""; 		
         return printk("<%016lx>{%s%s%s%s%+ld}",
-		      address,delim,modname,delim,symname,offset); 
+		      address, delim, modname, delim, symname, offset); 
 } 
 #else
 int printk_address(unsigned long address)
@@ -339,13 +339,12 @@ void show_registers(struct pt_regs *regs)
 		show_stack(NULL, (unsigned long*)rsp);
 
 		printk("\nCode: ");
-		if(regs->rip < PAGE_OFFSET)
+		if (regs->rip < PAGE_OFFSET)
 			goto bad;
 
-		for(i=0;i<20;i++)
-		{
+		for (i=0; i<20; i++) {
 			unsigned char c;
-			if(__get_user(c, &((unsigned char*)regs->rip)[i])) {
+			if (__get_user(c, &((unsigned char*)regs->rip)[i])) {
 bad:
 				printk(" Bad RIP value.");
 				break;
@@ -390,6 +389,7 @@ void out_of_line_bug(void)
 
 static DEFINE_SPINLOCK(die_lock);
 static int die_owner = -1;
+static unsigned int die_nest_count;
 
 unsigned __kprobes long oops_begin(void)
 {
@@ -404,6 +404,7 @@ unsigned __kprobes long oops_begin(void)
 		else
 			spin_lock(&die_lock);
 	}
+	die_nest_count++;
 	die_owner = cpu;
 	console_verbose();
 	bust_spinlocks(1);
@@ -414,7 +415,13 @@ void __kprobes oops_end(unsigned long flags)
 { 
 	die_owner = -1;
 	bust_spinlocks(0);
-	spin_unlock_irqrestore(&die_lock, flags);
+	die_nest_count--;
+	if (die_nest_count)
+		/* We still own the lock */
+		local_irq_restore(flags);
+	else
+		/* Nest count reaches zero, release the lock. */
+		spin_unlock_irqrestore(&die_lock, flags);
 	if (panic_on_oops)
 		panic("Oops");
 }
@@ -470,6 +477,8 @@ void __kprobes die_nmi(char *str, struct pt_regs *regs)
 		panic("nmi watchdog");
 	printk("console shuts up ...\n");
 	oops_end(flags);
+	nmi_exit();
+	local_irq_enable();
 	do_exit(SIGSEGV);
 }
 #endif
@@ -480,8 +489,6 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
 {
 	struct task_struct *tsk = current;
 
-	conditional_sti(regs);
-
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_no = trapnr;
 
@@ -490,7 +497,7 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
 			printk(KERN_INFO
 			       "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
 			       tsk->comm, tsk->pid, str,
-			       regs->rip,regs->rsp,error_code); 
+			       regs->rip, regs->rsp, error_code); 
 
 		if (info)
 			force_sig_info(signr, info, tsk);
@@ -504,9 +511,9 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
 	{	     
 		const struct exception_table_entry *fixup;
 		fixup = search_exception_tables(regs->rip);
-		if (fixup) {
+		if (fixup)
 			regs->rip = fixup->fixup;
-		} else	
+		else	
 			die(str, regs, error_code);
 		return;
 	}
@@ -518,6 +525,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
 							== NOTIFY_STOP) \
 		return; \
+	conditional_sti(regs);						\
 	do_trap(trapnr, signr, str, regs, error_code, NULL); \
 }
 
@@ -532,6 +540,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
 							== NOTIFY_STOP) \
 		return; \
+	conditional_sti(regs);						\
 	do_trap(trapnr, signr, str, regs, error_code, &info); \
 }
 
@@ -545,7 +554,17 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
 DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
 DO_ERROR(18, SIGSEGV, "reserved", reserved)
-DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
+
+/* Runs on IST stack */
+asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code)
+{
+	if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
+			12, SIGBUS) == NOTIFY_STOP)
+		return;
+	preempt_conditional_sti(regs);
+	do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
+	preempt_conditional_cli(regs);
+}
 
 asmlinkage void do_double_fault(struct pt_regs * regs, long error_code)
 {
@@ -579,7 +598,7 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
 			printk(KERN_INFO
 		       "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
 			       tsk->comm, tsk->pid,
-			       regs->rip,regs->rsp,error_code); 
+			       regs->rip, regs->rsp, error_code); 
 
 		force_sig(SIGSEGV, tsk);
 		return;
@@ -683,8 +702,9 @@ asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code)
 	if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) {
 		return;
 	}
+	preempt_conditional_sti(regs);
 	do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
-	return;
+	preempt_conditional_cli(regs);
 }
 
 /* Help handler running on IST stack to switch back to user stack
@@ -1015,14 +1035,14 @@ void smp_trap_init(trap_info_t *trap_ctxt)
 static int __init oops_dummy(char *s)
 { 
 	panic_on_oops = 1;
-	return -1; 
+	return 1;
 } 
 __setup("oops=", oops_dummy); 
 
 static int __init kstack_setup(char *s)
 {
 	kstack_depth_to_print = simple_strtoul(s,NULL,0);
-	return 0;
+	return 1;
 }
 __setup("kstack=", kstack_setup);
 
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c
index d6f9285c44..4d8c3a3e70 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c
@@ -95,8 +95,6 @@ EXPORT_SYMBOL(smp_call_function);
 EXPORT_SYMBOL(screen_info);
 #endif
 
-EXPORT_SYMBOL(get_wchan);
-
 #ifdef CONFIG_X86_LOCAL_APIC
 EXPORT_SYMBOL_GPL(set_nmi_callback);
 EXPORT_SYMBOL_GPL(unset_nmi_callback);
@@ -107,7 +105,6 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback);
 #undef memcpy
 #undef memset
 #undef memmove
-#undef strlen
 
 extern void * memset(void *,int,__kernel_size_t);
 extern size_t strlen(const char *);
@@ -116,7 +113,6 @@ extern void * memcpy(void *,const void *,__kernel_size_t);
 extern void * __memcpy(void *,const void *,__kernel_size_t);
 
 EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(strlen);
 EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(__memcpy);
@@ -136,16 +132,12 @@ EXPORT_SYMBOL(rwsem_down_write_failed_thunk);
 EXPORT_SYMBOL(empty_zero_page);
 
 EXPORT_SYMBOL(die_chain);
-EXPORT_SYMBOL(register_die_notifier);
 
 #ifdef CONFIG_SMP
 EXPORT_SYMBOL(cpu_sibling_map);
 EXPORT_SYMBOL(smp_num_siblings);
 #endif
 
-extern void do_softirq_thunk(void);
-EXPORT_SYMBOL(do_softirq_thunk);
-
 #ifdef CONFIG_BUG
 EXPORT_SYMBOL(out_of_line_bug);
 #endif
diff --git a/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c b/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c
index cb3fe42f0e..3ad7799d30 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c
@@ -265,6 +265,8 @@ static int vmalloc_fault(unsigned long address)
 		return -1;
 	if (pgd_none(*pgd))
 		set_pgd(pgd, *pgd_ref);
+	else
+		BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
 
 	/* Below here mismatches are bugs because these lower tables
 	   are shared */
@@ -370,22 +372,14 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	if (!user_mode(regs))
 		error_code &= ~PF_USER; /* means kernel */
 
+	tsk = current;
+	mm = tsk->mm;
+	prefetchw(&mm->mmap_sem);
+
 	/* get the address */
 	address = HYPERVISOR_shared_info->vcpu_info[
 		smp_processor_id()].arch.cr2;
-	if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
-					SIGSEGV) == NOTIFY_STOP)
-		return;
-
-	if (likely(regs->eflags & X86_EFLAGS_IF))
-		local_irq_enable();
 
-	if (unlikely(page_fault_trace))
-		printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
-		       regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); 
-
-	tsk = current;
-	mm = tsk->mm;
 	info.si_code = SEGV_MAPERR;
 
 
@@ -410,13 +404,15 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 		 */
 		if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
 		      ((address >= VMALLOC_START && address < VMALLOC_END))) {
-			if (vmalloc_fault(address) < 0)
-				goto bad_area_nosemaphore;
-			return;
+			if (vmalloc_fault(address) >= 0)
+				return;
 		}
 		/* Can take a spurious fault if mapping changes R/O -> R/W. */
 		if (spurious_fault(regs, address, error_code))
 			return;
+		if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+						SIGSEGV) == NOTIFY_STOP)
+			return;
 		/*
 		 * Don't take the mm semaphore here. If we fixup a prefetch
 		 * fault we could otherwise deadlock.
@@ -424,6 +420,17 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 		goto bad_area_nosemaphore;
 	}
 
+	if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+					SIGSEGV) == NOTIFY_STOP)
+		return;
+
+	if (likely(regs->eflags & X86_EFLAGS_IF))
+		local_irq_enable();
+
+	if (unlikely(page_fault_trace))
+		printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
+		       regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); 
+
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(address, regs, error_code);
 
@@ -633,9 +640,51 @@ do_sigbus:
 	return;
 }
 
+DEFINE_SPINLOCK(pgd_lock);
+struct page *pgd_list;
+
+void vmalloc_sync_all(void)
+{
+	/* Note that races in the updates of insync and start aren't 
+	   problematic:
+	   insync can only get set bits added, and updates to start are only
+	   improving performance (without affecting correctness if undone). */
+	static DECLARE_BITMAP(insync, PTRS_PER_PGD);
+	static unsigned long start = VMALLOC_START & PGDIR_MASK;
+	unsigned long address;
+
+	for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
+		if (!test_bit(pgd_index(address), insync)) {
+			const pgd_t *pgd_ref = pgd_offset_k(address);
+			struct page *page;
+
+			if (pgd_none(*pgd_ref))
+				continue;
+			spin_lock(&pgd_lock);
+			for (page = pgd_list; page;
+			     page = (struct page *)page->index) {
+				pgd_t *pgd;
+				pgd = (pgd_t *)page_address(page) + pgd_index(address);
+				if (pgd_none(*pgd))
+					set_pgd(pgd, *pgd_ref);
+				else
+					BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
+			}
+			spin_unlock(&pgd_lock);
+			set_bit(pgd_index(address), insync);
+		}
+		if (address == start)
+			start = address + PGDIR_SIZE;
+	}
+	/* Check that there is no need to do the same for the modules area. */
+	BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
+	BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == 
+				(__START_KERNEL & PGDIR_MASK)));
+}
+
 static int __init enable_pagefaulttrace(char *str)
 {
 	page_fault_trace = 1;
-	return 0;
+	return 1;
 }
 __setup("pagefaulttrace", enable_pagefaulttrace);
diff --git a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c
index 032c31e532..4862272456 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c
@@ -185,7 +185,7 @@ void show_mem(void)
 	show_free_areas();
 	printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
                for (i = 0; i < pgdat->node_spanned_pages; ++i) {
 			page = pfn_to_page(pgdat->node_start_pfn + i);
 			total++;
@@ -207,7 +207,7 @@ void show_mem(void)
 
 int after_bootmem;
 
-static void *spp_getpage(void)
+static __init void *spp_getpage(void)
 { 
 	void *ptr;
 	if (after_bootmem)
@@ -230,7 +230,7 @@ static inline pud_t *pud_offset_u(unsigned long address)
 	return pud + pud_index(address);
 }
 
-static void set_pte_phys(unsigned long vaddr,
+static __init void set_pte_phys(unsigned long vaddr,
 			 unsigned long phys, pgprot_t prot, int user_mode)
 {
 	pgd_t *pgd;
@@ -345,7 +345,8 @@ static void set_pte_phys_ma(unsigned long vaddr,
 #define SET_FIXMAP_USER   1
 
 /* NOTE: this is meant to be run only at boot */
-void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
+void __init 
+__set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
 {
 	unsigned long address = __fix_to_virt(idx);
 
@@ -439,6 +440,18 @@ static inline int make_readonly(unsigned long paddr)
 	return readonly;
 }
 
+/* Must run before zap_low_mappings */
+__init void *early_ioremap(unsigned long addr, unsigned long size)
+{
+	return ioremap(addr, size);
+}
+
+/* To avoid virtual aliases later */
+__init void early_iounmap(void *addr, unsigned long size)
+{
+	iounmap(addr);
+}
+
 static void __meminit
 phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
 {
@@ -686,7 +699,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
 		pud_t *pud;
 
 		if (after_bootmem) {
-			pud = pud_offset_k(pgd, __PAGE_OFFSET);
+			pud = pud_offset_k(pgd, start & PGDIR_MASK);
 			make_page_readonly(pud, XENFEAT_writable_page_tables);
 			pud_phys = __pa(pud);
 		} else {
@@ -869,19 +882,50 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size)
 
 /*
  * Memory hotplug specific functions
- * These are only for non-NUMA machines right now.
  */
-#ifdef CONFIG_MEMORY_HOTPLUG
+#if defined(CONFIG_ACPI_HOTPLUG_MEMORY) || defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)
 
 void online_page(struct page *page)
 {
 	ClearPageReserved(page);
-	set_page_count(page, 1);
+	init_page_count(page);
 	__free_page(page);
 	totalram_pages++;
 	num_physpages++;
 }
 
+#ifndef CONFIG_MEMORY_HOTPLUG
+/*
+ * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
+ * just online the pages.
+ */
+int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
+{
+	int err = -EIO;
+	unsigned long pfn;
+	unsigned long total = 0, mem = 0;
+	for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
+		if (pfn_valid(pfn)) {
+			online_page(pfn_to_page(pfn));
+			err = 0;
+			mem++;
+		}
+		total++;
+	}
+	if (!err) {
+		z->spanned_pages += total;
+		z->present_pages += mem;
+		z->zone_pgdat->node_spanned_pages += total;
+		z->zone_pgdat->node_present_pages += mem;
+	}
+	return err;
+}
+#endif
+
+/*
+ * Memory is added always to NORMAL zone. This means you will never get
+ * additional DMA/DMA32 memory.
+ */
 int add_memory(u64 start, u64 size)
 {
 	struct pglist_data *pgdat = NODE_DATA(0);
@@ -949,7 +993,7 @@ void __init mem_init(void)
 	/* XEN: init and count pages outside initial allocation. */
 	for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
 		ClearPageReserved(pfn_to_page(pfn));
-		set_page_count(pfn_to_page(pfn), 1);
+		init_page_count(pfn_to_page(pfn));
 		totalram_pages++;
 	}
 	reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn);
@@ -1001,7 +1045,7 @@ void free_initmem(void)
 	addr = (unsigned long)(&__init_begin);
 	for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
 		ClearPageReserved(virt_to_page(addr));
-		set_page_count(virt_to_page(addr), 1);
+		init_page_count(virt_to_page(addr));
 		memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE); 
 		make_page_writable(
 			__va(__pa(addr)), XENFEAT_writable_page_tables);
@@ -1049,7 +1093,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 	printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
 	for (; start < end; start += PAGE_SIZE) {
 		ClearPageReserved(virt_to_page(start));
-		set_page_count(virt_to_page(start), 1);
+		init_page_count(virt_to_page(start));
 		free_page(start);
 		totalram_pages++;
 	}
diff --git a/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c b/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c
index 3641bfb805..b9fc4bb905 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c
@@ -206,6 +206,13 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
 	pte_t *pbase;
 	if (!base) 
 		return NULL;
+	/*
+	 * page_private is used to track the number of entries in
+	 * the page table page have non standard attributes.
+	 */
+	SetPagePrivate(base);
+	page_private(base) = 0;
+
 	address = __pa(address);
 	addr = address & LARGE_PAGE_MASK; 
 	pbase = (pte_t *)page_address(base);
@@ -238,26 +245,12 @@ static inline void flush_map(unsigned long address)
 	on_each_cpu(flush_kernel_map, (void *)address, 1, 1);
 }
 
-struct deferred_page { 
-	struct deferred_page *next; 
-	struct page *fpage;
-	unsigned long address;
-}; 
-static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */
+static struct page *deferred_pages; /* protected by init_mm.mmap_sem */
 
-static inline void save_page(unsigned long address, struct page *fpage)
+static inline void save_page(struct page *fpage)
 {
-	struct deferred_page *df;
-	df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL); 
-	if (!df) {
-		flush_map(address);
-		__free_page(fpage);
-	} else { 
-		df->next = df_list;
-		df->fpage = fpage;
-		df->address = address;
-		df_list = df;
-	} 			
+	fpage->lru.next = (struct list_head *)deferred_pages;
+	deferred_pages = fpage;
 }
 
 /* 
@@ -299,8 +292,8 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
 			set_pte(kpte, pfn_pte(pfn, prot));
 		} else {
  			/*
- 			 * split_large_page will take the reference for this change_page_attr
- 			 * on the split page.
+			 * split_large_page will take the reference for this
+			 * change_page_attr on the split page.
  			 */
 
 			struct page *split;
@@ -312,10 +305,11 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
 			set_pte(kpte,mk_pte(split, ref_prot2));
 			kpte_page = split;
 		}	
-		get_page(kpte_page);
+		page_private(kpte_page)++;
 	} else if ((kpte_flags & _PAGE_PSE) == 0) { 
 		set_pte(kpte, pfn_pte(pfn, ref_prot));
-		__put_page(kpte_page);
+		BUG_ON(page_private(kpte_page) == 0);
+		page_private(kpte_page)--;
 	} else
 		BUG();
 
@@ -329,16 +323,14 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
 #ifndef CONFIG_XEN
  	BUG_ON(PageReserved(kpte_page));
 #else
-	if (!PageReserved(kpte_page))
+	if (PageReserved(kpte_page))
+		return 0;
 #endif
-		switch (page_count(kpte_page)) {
-		case 1:
-			save_page(address, kpte_page); 		     
-			revert_page(address, ref_prot);
-			break;
-		case 0:
-			BUG(); /* memleak and failed 2M page regeneration */
-	 	}
+
+	if (page_private(kpte_page) == 0) {
+		save_page(kpte_page);
+		revert_page(address, ref_prot);
+	}
 	return 0;
 } 
 
@@ -390,17 +382,18 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot)
 
 void global_flush_tlb(void)
 { 
-	struct deferred_page *df, *next_df;
+	struct page *dpage;
 
 	down_read(&init_mm.mmap_sem);
-	df = xchg(&df_list, NULL);
+	dpage = xchg(&deferred_pages, NULL);
 	up_read(&init_mm.mmap_sem);
-	flush_map((df && !df->next) ? df->address : 0);
-	for (; df; df = next_df) { 
-		next_df = df->next;
-		if (df->fpage) 
-			__free_page(df->fpage);
-		kfree(df);
+
+	flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0);
+	while (dpage) {
+		struct page *tmp = dpage;
+		dpage = (struct page *)dpage->lru.next;
+		ClearPagePrivate(tmp);
+		__free_page(tmp);
 	} 
 } 
 
diff --git a/linux-2.6-xen-sparse/arch/x86_64/pci/Makefile b/linux-2.6-xen-sparse/arch/x86_64/pci/Makefile
index 73dd4786ad..1ea6ea9b68 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/pci/Makefile
+++ b/linux-2.6-xen-sparse/arch/x86_64/pci/Makefile
@@ -7,7 +7,7 @@ CFLAGS += -Iarch/i386/pci
 
 obj-y		:= i386.o
 obj-$(CONFIG_PCI_DIRECT)+= direct.o
-obj-y		+= fixup.o
+obj-y		+= fixup.o init.o
 obj-$(CONFIG_ACPI)	+= acpi.o
 obj-y			+= legacy.o irq.o common.o
 # mmconfig has a 64bit special
@@ -27,6 +27,7 @@ irq-y    += ../../i386/pci/irq.o
 common-y += ../../i386/pci/common.o
 fixup-y  += ../../i386/pci/fixup.o
 i386-y  += ../../i386/pci/i386.o
+init-y += ../../i386/pci/init.o
 
 ifdef CONFIG_XEN
 irq-y		:= ../../i386/pci/irq-xen.o
diff --git a/linux-2.6-xen-sparse/drivers/Makefile b/linux-2.6-xen-sparse/drivers/Makefile
index 73c1c4a087..26b29563df 100644
--- a/linux-2.6-xen-sparse/drivers/Makefile
+++ b/linux-2.6-xen-sparse/drivers/Makefile
@@ -25,9 +25,6 @@ obj-$(CONFIG_CONNECTOR)		+= connector/
 obj-$(CONFIG_FB_I810)           += video/i810/
 obj-$(CONFIG_FB_INTEL)          += video/intelfb/
 
-# we also need input/serio early so serio bus is initialized by the time
-# serial drivers start registering their serio ports
-obj-$(CONFIG_SERIO)		+= input/serio/
 obj-y				+= serial/
 obj-$(CONFIG_PARPORT)		+= parport/
 obj-y				+= base/ block/ misc/ mfd/ net/ media/
@@ -54,9 +51,11 @@ obj-$(CONFIG_TC)		+= tc/
 obj-$(CONFIG_USB)		+= usb/
 obj-$(CONFIG_PCI)		+= usb/
 obj-$(CONFIG_USB_GADGET)	+= usb/gadget/
+obj-$(CONFIG_SERIO)		+= input/serio/
 obj-$(CONFIG_GAMEPORT)		+= input/gameport/
 obj-$(CONFIG_INPUT)		+= input/
 obj-$(CONFIG_I2O)		+= message/
+obj-$(CONFIG_RTC_LIB)		+= rtc/
 obj-$(CONFIG_I2C)		+= i2c/
 obj-$(CONFIG_W1)		+= w1/
 obj-$(CONFIG_HWMON)		+= hwmon/
@@ -69,7 +68,9 @@ obj-$(CONFIG_MCA)		+= mca/
 obj-$(CONFIG_EISA)		+= eisa/
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_MMC)		+= mmc/
+obj-$(CONFIG_NEW_LEDS)		+= leds/
 obj-$(CONFIG_INFINIBAND)	+= infiniband/
+obj-$(CONFIG_IPATH_CORE)	+= infiniband/
 obj-$(CONFIG_SGI_SN)		+= sn/
 obj-y				+= firmware/
 obj-$(CONFIG_CRYPTO)		+= crypto/
diff --git a/linux-2.6-xen-sparse/drivers/acpi/Kconfig b/linux-2.6-xen-sparse/drivers/acpi/Kconfig
index b6e265acde..f0c892613a 100644
--- a/linux-2.6-xen-sparse/drivers/acpi/Kconfig
+++ b/linux-2.6-xen-sparse/drivers/acpi/Kconfig
@@ -205,6 +205,18 @@ config ACPI_IBM
 
 	  If you have an IBM ThinkPad laptop, say Y or M here.
 
+config ACPI_IBM_DOCK
+	bool "Legacy Docking Station Support"
+	depends on ACPI_IBM
+	default n
+	---help---
+	  Allows the ibm_acpi driver to handle docking station events.
+	  This support is obsoleted by CONFIG_HOTPLUG_PCI_ACPI.  It will
+	  allow locking and removing the laptop from the docking station,
+	  but will not properly connect PCI devices.
+
+	  If you are not sure, say N here.
+
 config ACPI_TOSHIBA
 	tristate "Toshiba Laptop Extras"
 	depends on X86
@@ -244,7 +256,8 @@ config ACPI_CUSTOM_DSDT_FILE
 	depends on ACPI_CUSTOM_DSDT
 	default ""
 	help
-	  Enter the full path name to the file wich includes the AmlCode declaration.
+	  Enter the full path name to the file which includes the AmlCode
+	  declaration.
 
 config ACPI_BLACKLIST_YEAR
 	int "Disable ACPI for systems before Jan 1st this year" if X86_32
@@ -317,7 +330,7 @@ config ACPI_CONTAINER
 config ACPI_HOTPLUG_MEMORY
 	tristate "Memory Hotplug"
 	depends on ACPI
-	depends on MEMORY_HOTPLUG
+	depends on MEMORY_HOTPLUG || X86_64
 	default n
 	help
 	  This driver adds supports for ACPI Memory Hotplug.  This driver
diff --git a/linux-2.6-xen-sparse/drivers/acpi/tables.c b/linux-2.6-xen-sparse/drivers/acpi/tables.c
deleted file mode 100644
index a60095ae88..0000000000
--- a/linux-2.6-xen-sparse/drivers/acpi/tables.c
+++ /dev/null
@@ -1,626 +0,0 @@
-/*
- *  acpi_tables.c - ACPI Boot-Time Table Parsing
- *
- *  Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- */
-
-#include <linux/config.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/smp.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/irq.h>
-#include <linux/errno.h>
-#include <linux/acpi.h>
-#include <linux/bootmem.h>
-
-#define PREFIX			"ACPI: "
-
-#define ACPI_MAX_TABLES		128
-
-static char *acpi_table_signatures[ACPI_TABLE_COUNT] = {
-	[ACPI_TABLE_UNKNOWN] = "????",
-	[ACPI_APIC] = "APIC",
-	[ACPI_BOOT] = "BOOT",
-	[ACPI_DBGP] = "DBGP",
-	[ACPI_DSDT] = "DSDT",
-	[ACPI_ECDT] = "ECDT",
-	[ACPI_ETDT] = "ETDT",
-	[ACPI_FADT] = "FACP",
-	[ACPI_FACS] = "FACS",
-	[ACPI_OEMX] = "OEM",
-	[ACPI_PSDT] = "PSDT",
-	[ACPI_SBST] = "SBST",
-	[ACPI_SLIT] = "SLIT",
-	[ACPI_SPCR] = "SPCR",
-	[ACPI_SRAT] = "SRAT",
-	[ACPI_SSDT] = "SSDT",
-	[ACPI_SPMI] = "SPMI",
-	[ACPI_HPET] = "HPET",
-	[ACPI_MCFG] = "MCFG",
-};
-
-static char *mps_inti_flags_polarity[] = { "dfl", "high", "res", "low" };
-static char *mps_inti_flags_trigger[] = { "dfl", "edge", "res", "level" };
-
-/* System Description Table (RSDT/XSDT) */
-struct acpi_table_sdt {
-	unsigned long pa;
-	enum acpi_table_id id;
-	unsigned long size;
-} __attribute__ ((packed));
-
-static unsigned long sdt_pa;	/* Physical Address */
-static unsigned long sdt_count;	/* Table count */
-
-static struct acpi_table_sdt sdt_entry[ACPI_MAX_TABLES] __initdata;
-
-void acpi_table_print(struct acpi_table_header *header, unsigned long phys_addr)
-{
-	char *name = NULL;
-
-	if (!header)
-		return;
-
-	/* Some table signatures aren't good table names */
-
-	if (!strncmp((char *)&header->signature,
-		     acpi_table_signatures[ACPI_APIC],
-		     sizeof(header->signature))) {
-		name = "MADT";
-	} else if (!strncmp((char *)&header->signature,
-			    acpi_table_signatures[ACPI_FADT],
-			    sizeof(header->signature))) {
-		name = "FADT";
-	} else
-		name = header->signature;
-
-	printk(KERN_DEBUG PREFIX
-	       "%.4s (v%3.3d %6.6s %8.8s 0x%08x %.4s 0x%08x) @ 0x%p\n", name,
-	       header->revision, header->oem_id, header->oem_table_id,
-	       header->oem_revision, header->asl_compiler_id,
-	       header->asl_compiler_revision, (void *)phys_addr);
-}
-
-void acpi_table_print_madt_entry(acpi_table_entry_header * header)
-{
-	if (!header)
-		return;
-
-	switch (header->type) {
-
-	case ACPI_MADT_LAPIC:
-		{
-			struct acpi_table_lapic *p =
-			    (struct acpi_table_lapic *)header;
-			printk(KERN_INFO PREFIX
-			       "LAPIC (acpi_id[0x%02x] lapic_id[0x%02x] %s)\n",
-			       p->acpi_id, p->id,
-			       p->flags.enabled ? "enabled" : "disabled");
-		}
-		break;
-
-	case ACPI_MADT_IOAPIC:
-		{
-			struct acpi_table_ioapic *p =
-			    (struct acpi_table_ioapic *)header;
-			printk(KERN_INFO PREFIX
-			       "IOAPIC (id[0x%02x] address[0x%08x] gsi_base[%d])\n",
-			       p->id, p->address, p->global_irq_base);
-		}
-		break;
-
-	case ACPI_MADT_INT_SRC_OVR:
-		{
-			struct acpi_table_int_src_ovr *p =
-			    (struct acpi_table_int_src_ovr *)header;
-			printk(KERN_INFO PREFIX
-			       "INT_SRC_OVR (bus %d bus_irq %d global_irq %d %s %s)\n",
-			       p->bus, p->bus_irq, p->global_irq,
-			       mps_inti_flags_polarity[p->flags.polarity],
-			       mps_inti_flags_trigger[p->flags.trigger]);
-			if (p->flags.reserved)
-				printk(KERN_INFO PREFIX
-				       "INT_SRC_OVR unexpected reserved flags: 0x%x\n",
-				       p->flags.reserved);
-
-		}
-		break;
-
-	case ACPI_MADT_NMI_SRC:
-		{
-			struct acpi_table_nmi_src *p =
-			    (struct acpi_table_nmi_src *)header;
-			printk(KERN_INFO PREFIX
-			       "NMI_SRC (%s %s global_irq %d)\n",
-			       mps_inti_flags_polarity[p->flags.polarity],
-			       mps_inti_flags_trigger[p->flags.trigger],
-			       p->global_irq);
-		}
-		break;
-
-	case ACPI_MADT_LAPIC_NMI:
-		{
-			struct acpi_table_lapic_nmi *p =
-			    (struct acpi_table_lapic_nmi *)header;
-			printk(KERN_INFO PREFIX
-			       "LAPIC_NMI (acpi_id[0x%02x] %s %s lint[0x%x])\n",
-			       p->acpi_id,
-			       mps_inti_flags_polarity[p->flags.polarity],
-			       mps_inti_flags_trigger[p->flags.trigger],
-			       p->lint);
-		}
-		break;
-
-	case ACPI_MADT_LAPIC_ADDR_OVR:
-		{
-			struct acpi_table_lapic_addr_ovr *p =
-			    (struct acpi_table_lapic_addr_ovr *)header;
-			printk(KERN_INFO PREFIX
-			       "LAPIC_ADDR_OVR (address[%p])\n",
-			       (void *)(unsigned long)p->address);
-		}
-		break;
-
-	case ACPI_MADT_IOSAPIC:
-		{
-			struct acpi_table_iosapic *p =
-			    (struct acpi_table_iosapic *)header;
-			printk(KERN_INFO PREFIX
-			       "IOSAPIC (id[0x%x] address[%p] gsi_base[%d])\n",
-			       p->id, (void *)(unsigned long)p->address,
-			       p->global_irq_base);
-		}
-		break;
-
-	case ACPI_MADT_LSAPIC:
-		{
-			struct acpi_table_lsapic *p =
-			    (struct acpi_table_lsapic *)header;
-			printk(KERN_INFO PREFIX
-			       "LSAPIC (acpi_id[0x%02x] lsapic_id[0x%02x] lsapic_eid[0x%02x] %s)\n",
-			       p->acpi_id, p->id, p->eid,
-			       p->flags.enabled ? "enabled" : "disabled");
-		}
-		break;
-
-	case ACPI_MADT_PLAT_INT_SRC:
-		{
-			struct acpi_table_plat_int_src *p =
-			    (struct acpi_table_plat_int_src *)header;
-			printk(KERN_INFO PREFIX
-			       "PLAT_INT_SRC (%s %s type[0x%x] id[0x%04x] eid[0x%x] iosapic_vector[0x%x] global_irq[0x%x]\n",
-			       mps_inti_flags_polarity[p->flags.polarity],
-			       mps_inti_flags_trigger[p->flags.trigger],
-			       p->type, p->id, p->eid, p->iosapic_vector,
-			       p->global_irq);
-		}
-		break;
-
-	default:
-		printk(KERN_WARNING PREFIX
-		       "Found unsupported MADT entry (type = 0x%x)\n",
-		       header->type);
-		break;
-	}
-}
-
-static int
-acpi_table_compute_checksum(void *table_pointer, unsigned long length)
-{
-	u8 *p = (u8 *) table_pointer;
-	unsigned long remains = length;
-	unsigned long sum = 0;
-
-	if (!p || !length)
-		return -EINVAL;
-
-	while (remains--)
-		sum += *p++;
-
-	return (sum & 0xFF);
-}
-
-/*
- * acpi_get_table_header_early()
- * for acpi_blacklisted(), acpi_table_get_sdt()
- */
-int __init
-acpi_get_table_header_early(enum acpi_table_id id,
-			    struct acpi_table_header **header)
-{
-	unsigned int i;
-	enum acpi_table_id temp_id;
-
-	/* DSDT is different from the rest */
-	if (id == ACPI_DSDT)
-		temp_id = ACPI_FADT;
-	else
-		temp_id = id;
-
-	/* Locate the table. */
-
-	for (i = 0; i < sdt_count; i++) {
-		if (sdt_entry[i].id != temp_id)
-			continue;
-		*header = (void *)
-		    __acpi_map_table(sdt_entry[i].pa, sdt_entry[i].size);
-		if (!*header) {
-			printk(KERN_WARNING PREFIX "Unable to map %s\n",
-			       acpi_table_signatures[temp_id]);
-			return -ENODEV;
-		}
-		break;
-	}
-
-	if (!*header) {
-		printk(KERN_WARNING PREFIX "%s not present\n",
-		       acpi_table_signatures[id]);
-		return -ENODEV;
-	}
-
-	/* Map the DSDT header via the pointer in the FADT */
-	if (id == ACPI_DSDT) {
-		struct fadt_descriptor_rev2 *fadt =
-		    (struct fadt_descriptor_rev2 *)*header;
-
-		if (fadt->revision == 3 && fadt->Xdsdt) {
-			*header = (void *)__acpi_map_table(fadt->Xdsdt,
-							   sizeof(struct
-								  acpi_table_header));
-		} else if (fadt->V1_dsdt) {
-			*header = (void *)__acpi_map_table(fadt->V1_dsdt,
-							   sizeof(struct
-								  acpi_table_header));
-		} else
-			*header = NULL;
-
-		if (!*header) {
-			printk(KERN_WARNING PREFIX "Unable to map DSDT\n");
-			return -ENODEV;
-		}
-	}
-
-	return 0;
-}
-
-int __init
-acpi_table_parse_madt_family(enum acpi_table_id id,
-			     unsigned long madt_size,
-			     int entry_id,
-			     acpi_madt_entry_handler handler,
-			     unsigned int max_entries)
-{
-	void *madt = NULL;
-	acpi_table_entry_header *entry;
-	unsigned int count = 0;
-	unsigned long madt_end;
-	unsigned int i;
-
-	if (!handler)
-		return -EINVAL;
-
-	/* Locate the MADT (if exists). There should only be one. */
-
-	for (i = 0; i < sdt_count; i++) {
-		if (sdt_entry[i].id != id)
-			continue;
-		madt = (void *)
-		    __acpi_map_table(sdt_entry[i].pa, sdt_entry[i].size);
-		if (!madt) {
-			printk(KERN_WARNING PREFIX "Unable to map %s\n",
-			       acpi_table_signatures[id]);
-			return -ENODEV;
-		}
-		break;
-	}
-
-	if (!madt) {
-		printk(KERN_WARNING PREFIX "%s not present\n",
-		       acpi_table_signatures[id]);
-		return -ENODEV;
-	}
-
-	madt_end = (unsigned long)madt + sdt_entry[i].size;
-
-	/* Parse all entries looking for a match. */
-
-	entry = (acpi_table_entry_header *)
-	    ((unsigned long)madt + madt_size);
-
-	while (((unsigned long)entry) + sizeof(acpi_table_entry_header) <
-	       madt_end) {
-		if (entry->type == entry_id
-		    && (!max_entries || count++ < max_entries))
-			if (handler(entry, madt_end))
-				return -EINVAL;
-
-		entry = (acpi_table_entry_header *)
-		    ((unsigned long)entry + entry->length);
-	}
-	if (max_entries && count > max_entries) {
-		printk(KERN_WARNING PREFIX "[%s:0x%02x] ignored %i entries of "
-		       "%i found\n", acpi_table_signatures[id], entry_id,
-		       count - max_entries, count);
-	}
-
-	return count;
-}
-
-int __init
-acpi_table_parse_madt(enum acpi_madt_entry_id id,
-		      acpi_madt_entry_handler handler, unsigned int max_entries)
-{
-	return acpi_table_parse_madt_family(ACPI_APIC,
-					    sizeof(struct acpi_table_madt), id,
-					    handler, max_entries);
-}
-
-int __init acpi_table_parse(enum acpi_table_id id, acpi_table_handler handler)
-{
-	int count = 0;
-	unsigned int i = 0;
-
-	if (!handler)
-		return -EINVAL;
-
-	for (i = 0; i < sdt_count; i++) {
-		if (sdt_entry[i].id != id)
-			continue;
-		count++;
-		if (count == 1)
-			handler(sdt_entry[i].pa, sdt_entry[i].size);
-
-		else
-			printk(KERN_WARNING PREFIX
-			       "%d duplicate %s table ignored.\n", count,
-			       acpi_table_signatures[id]);
-	}
-
-	return count;
-}
-
-static int __init acpi_table_get_sdt(struct acpi_table_rsdp *rsdp)
-{
-	struct acpi_table_header *header = NULL;
-	unsigned int i, id = 0;
-
-	if (!rsdp)
-		return -EINVAL;
-
-	/* First check XSDT (but only on ACPI 2.0-compatible systems) */
-
-	if ((rsdp->revision >= 2) &&
-	    (((struct acpi20_table_rsdp *)rsdp)->xsdt_address)) {
-
-		struct acpi_table_xsdt *mapped_xsdt = NULL;
-
-		sdt_pa = ((struct acpi20_table_rsdp *)rsdp)->xsdt_address;
-
-		/* map in just the header */
-		header = (struct acpi_table_header *)
-		    __acpi_map_table(sdt_pa, sizeof(struct acpi_table_header));
-
-		if (!header) {
-			printk(KERN_WARNING PREFIX
-			       "Unable to map XSDT header\n");
-			return -ENODEV;
-		}
-
-		/* remap in the entire table before processing */
-		mapped_xsdt = (struct acpi_table_xsdt *)
-		    __acpi_map_table(sdt_pa, header->length);
-		if (!mapped_xsdt) {
-			printk(KERN_WARNING PREFIX "Unable to map XSDT\n");
-			return -ENODEV;
-		}
-		header = &mapped_xsdt->header;
-
-		if (strncmp(header->signature, "XSDT", 4)) {
-			printk(KERN_WARNING PREFIX
-			       "XSDT signature incorrect\n");
-			return -ENODEV;
-		}
-
-		if (acpi_table_compute_checksum(header, header->length)) {
-			printk(KERN_WARNING PREFIX "Invalid XSDT checksum\n");
-			return -ENODEV;
-		}
-
-		sdt_count =
-		    (header->length - sizeof(struct acpi_table_header)) >> 3;
-		if (sdt_count > ACPI_MAX_TABLES) {
-			printk(KERN_WARNING PREFIX
-			       "Truncated %lu XSDT entries\n",
-			       (sdt_count - ACPI_MAX_TABLES));
-			sdt_count = ACPI_MAX_TABLES;
-		}
-
-		for (i = 0; i < sdt_count; i++)
-			sdt_entry[i].pa = (unsigned long)mapped_xsdt->entry[i];
-	}
-
-	/* Then check RSDT */
-
-	else if (rsdp->rsdt_address) {
-
-		struct acpi_table_rsdt *mapped_rsdt = NULL;
-
-		sdt_pa = rsdp->rsdt_address;
-
-		/* map in just the header */
-		header = (struct acpi_table_header *)
-		    __acpi_map_table(sdt_pa, sizeof(struct acpi_table_header));
-		if (!header) {
-			printk(KERN_WARNING PREFIX
-			       "Unable to map RSDT header\n");
-			return -ENODEV;
-		}
-
-		/* remap in the entire table before processing */
-		mapped_rsdt = (struct acpi_table_rsdt *)
-		    __acpi_map_table(sdt_pa, header->length);
-		if (!mapped_rsdt) {
-			printk(KERN_WARNING PREFIX "Unable to map RSDT\n");
-			return -ENODEV;
-		}
-		header = &mapped_rsdt->header;
-
-		if (strncmp(header->signature, "RSDT", 4)) {
-			printk(KERN_WARNING PREFIX
-			       "RSDT signature incorrect\n");
-			return -ENODEV;
-		}
-
-		if (acpi_table_compute_checksum(header, header->length)) {
-			printk(KERN_WARNING PREFIX "Invalid RSDT checksum\n");
-			return -ENODEV;
-		}
-
-		sdt_count =
-		    (header->length - sizeof(struct acpi_table_header)) >> 2;
-		if (sdt_count > ACPI_MAX_TABLES) {
-			printk(KERN_WARNING PREFIX
-			       "Truncated %lu RSDT entries\n",
-			       (sdt_count - ACPI_MAX_TABLES));
-			sdt_count = ACPI_MAX_TABLES;
-		}
-
-		for (i = 0; i < sdt_count; i++)
-			sdt_entry[i].pa = (unsigned long)mapped_rsdt->entry[i];
-	}
-
-	else {
-		printk(KERN_WARNING PREFIX
-		       "No System Description Table (RSDT/XSDT) specified in RSDP\n");
-		return -ENODEV;
-	}
-
-	acpi_table_print(header, sdt_pa);
-
-	for (i = 0; i < sdt_count; i++) {
-
-		/* map in just the header */
-		header = (struct acpi_table_header *)
-		    __acpi_map_table(sdt_entry[i].pa,
-				     sizeof(struct acpi_table_header));
-		if (!header)
-			continue;
-
-		/* remap in the entire table before processing */
-		header = (struct acpi_table_header *)
-		    __acpi_map_table(sdt_entry[i].pa, header->length);
-		if (!header)
-			continue;
-
-		acpi_table_print(header, sdt_entry[i].pa);
-
-		if (acpi_table_compute_checksum(header, header->length)) {
-			printk(KERN_WARNING "  >>> ERROR: Invalid checksum\n");
-			continue;
-		}
-
-		sdt_entry[i].size = header->length;
-
-		for (id = 0; id < ACPI_TABLE_COUNT; id++) {
-			if (!strncmp((char *)&header->signature,
-				     acpi_table_signatures[id],
-				     sizeof(header->signature))) {
-				sdt_entry[i].id = id;
-			}
-		}
-	}
-
-	/* 
-	 * The DSDT is *not* in the RSDT (why not? no idea.) but we want
-	 * to print its info, because this is what people usually blacklist
-	 * against. Unfortunately, we don't know the phys_addr, so just
-	 * print 0. Maybe no one will notice.
-	 */
-	if (!acpi_get_table_header_early(ACPI_DSDT, &header))
-		acpi_table_print(header, 0);
-
-	return 0;
-}
-
-/*
- * acpi_table_init()
- *
- * find RSDP, find and checksum SDT/XSDT.
- * checksum all tables, print SDT/XSDT
- * 
- * result: sdt_entry[] is initialized
- */
-#if defined(CONFIG_X86_XEN) || defined(CONFIG_X86_64_XEN)
-#define acpi_rsdp_phys_to_va(rsdp_phys) isa_bus_to_virt(rsdp_phys)
-#else
-#define acpi_rsdp_phys_to_va(rsdp_phys) __va(rsdp_phys)
-#endif
-
-int __init acpi_table_init(void)
-{
-	struct acpi_table_rsdp *rsdp = NULL;
-	unsigned long rsdp_phys = 0;
-	int result = 0;
-
-	/* Locate and map the Root System Description Table (RSDP) */
-
-	rsdp_phys = acpi_find_rsdp();
-	if (!rsdp_phys) {
-		printk(KERN_ERR PREFIX "Unable to locate RSDP\n");
-		return -ENODEV;
-	}
-
-	rsdp = (struct acpi_table_rsdp *)acpi_rsdp_phys_to_va(rsdp_phys);
-	if (!rsdp) {
-		printk(KERN_WARNING PREFIX "Unable to map RSDP\n");
-		return -ENODEV;
-	}
-
-	printk(KERN_DEBUG PREFIX
-	       "RSDP (v%3.3d %6.6s                                ) @ 0x%p\n",
-	       rsdp->revision, rsdp->oem_id, (void *)rsdp_phys);
-
-	if (rsdp->revision < 2)
-		result =
-		    acpi_table_compute_checksum(rsdp,
-						sizeof(struct acpi_table_rsdp));
-	else
-		result =
-		    acpi_table_compute_checksum(rsdp,
-						((struct acpi20_table_rsdp *)
-						 rsdp)->length);
-
-	if (result) {
-		printk(KERN_WARNING "  >>> ERROR: Invalid checksum\n");
-		return -ENODEV;
-	}
-
-	/* Locate and map the System Description table (RSDT/XSDT) */
-
-	if (acpi_table_get_sdt(rsdp))
-		return -ENODEV;
-
-	return 0;
-}
diff --git a/linux-2.6-xen-sparse/drivers/char/mem.c b/linux-2.6-xen-sparse/drivers/char/mem.c
index e65a3ef37a..ccfe2aee6c 100644
--- a/linux-2.6-xen-sparse/drivers/char/mem.c
+++ b/linux-2.6-xen-sparse/drivers/char/mem.c
@@ -27,6 +27,7 @@
 #include <linux/crash_dump.h>
 #include <linux/backing-dev.h>
 #include <linux/bootmem.h>
+#include <linux/pipe_fs_i.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -88,21 +89,15 @@ static inline int uncached_access(struct file *file, unsigned long addr)
 }
 
 #ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE
-static inline int valid_phys_addr_range(unsigned long addr, size_t *count)
+static inline int valid_phys_addr_range(unsigned long addr, size_t count)
 {
-	unsigned long end_mem;
-
-	end_mem = __pa(high_memory);
-	if (addr >= end_mem)
+	if (addr + count > __pa(high_memory))
 		return 0;
 
-	if (*count > end_mem - addr)
-		*count = end_mem - addr;
-
 	return 1;
 }
 
-static inline int valid_mmap_phys_addr_range(unsigned long addr, size_t *size)
+static inline int valid_mmap_phys_addr_range(unsigned long addr, size_t size)
 {
 	return 1;
 }
@@ -120,7 +115,7 @@ static ssize_t read_mem(struct file * file, char __user * buf,
 	ssize_t read, sz;
 	char *ptr;
 
-	if (!valid_phys_addr_range(p, &count))
+	if (!valid_phys_addr_range(p, count))
 		return -EFAULT;
 	read = 0;
 #ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
@@ -178,7 +173,7 @@ static ssize_t write_mem(struct file * file, const char __user * buf,
 	unsigned long copied;
 	void *ptr;
 
-	if (!valid_phys_addr_range(p, &count))
+	if (!valid_phys_addr_range(p, count))
 		return -EFAULT;
 
 	written = 0;
@@ -217,11 +212,9 @@ static ssize_t write_mem(struct file * file, const char __user * buf,
 
 		copied = copy_from_user(ptr, buf, sz);
 		if (copied) {
-			ssize_t ret;
-
-			ret = written + (sz - copied);
-			if (ret)
-				return ret;
+			written += sz - copied;
+			if (written)
+				break;
 			return -EFAULT;
 		}
 		buf += sz;
@@ -253,7 +246,7 @@ static int mmap_mem(struct file * file, struct vm_area_struct * vma)
 {
 	size_t size = vma->vm_end - vma->vm_start;
 
-	if (!valid_mmap_phys_addr_range(vma->vm_pgoff << PAGE_SHIFT, &size))
+	if (!valid_mmap_phys_addr_range(vma->vm_pgoff << PAGE_SHIFT, size))
 		return -EINVAL;
 
 	vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
@@ -458,11 +451,9 @@ do_write_kmem(void *p, unsigned long realp, const char __user * buf,
 
 		copied = copy_from_user(ptr, buf, sz);
 		if (copied) {
-			ssize_t ret;
-
-			ret = written + (sz - copied);
-			if (ret)
-				return ret;
+			written += sz - copied;
+			if (written)
+				break;
 			return -EFAULT;
 		}
 		buf += sz;
@@ -516,11 +507,10 @@ static ssize_t write_kmem(struct file * file, const char __user * buf,
 			if (len) {
 				written = copy_from_user(kbuf, buf, len);
 				if (written) {
-					ssize_t ret;
-
+					if (wrote + virtr)
+						break;
 					free_page((unsigned long)kbuf);
-					ret = wrote + virtr + (len - written);
-					return ret ? ret : -EFAULT;
+					return -EFAULT;
 				}
 			}
 			len = vwrite(kbuf, (char *)p, len);
@@ -565,8 +555,11 @@ static ssize_t write_port(struct file * file, const char __user * buf,
 		return -EFAULT;
 	while (count-- > 0 && i < 65536) {
 		char c;
-		if (__get_user(c, tmp)) 
+		if (__get_user(c, tmp)) {
+			if (tmp > buf)
+				break;
 			return -EFAULT; 
+		}
 		outb(c,i);
 		i++;
 		tmp++;
@@ -588,6 +581,18 @@ static ssize_t write_null(struct file * file, const char __user * buf,
 	return count;
 }
 
+static int pipe_to_null(struct pipe_inode_info *info, struct pipe_buffer *buf,
+			struct splice_desc *sd)
+{
+	return sd->len;
+}
+
+static ssize_t splice_write_null(struct pipe_inode_info *pipe,struct file *out,
+				 loff_t *ppos, size_t len, unsigned int flags)
+{
+	return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null);
+}
+
 #ifdef CONFIG_MMU
 /*
  * For fun, we are using the MMU for this.
@@ -799,6 +804,7 @@ static struct file_operations null_fops = {
 	.llseek		= null_lseek,
 	.read		= read_null,
 	.write		= write_null,
+	.splice_write	= splice_write_null,
 };
 
 #if defined(CONFIG_ISA) || !defined(__mc68000__)
@@ -913,7 +919,7 @@ static const struct {
 	unsigned int		minor;
 	char			*name;
 	umode_t			mode;
-	struct file_operations	*fops;
+	const struct file_operations	*fops;
 } devlist[] = { /* list of minor devices */
 	{1, "mem",     S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops},
 	{2, "kmem",    S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops},
diff --git a/linux-2.6-xen-sparse/drivers/char/tty_io.c b/linux-2.6-xen-sparse/drivers/char/tty_io.c
index 0372d93bca..3afba5ded8 100644
--- a/linux-2.6-xen-sparse/drivers/char/tty_io.c
+++ b/linux-2.6-xen-sparse/drivers/char/tty_io.c
@@ -130,7 +130,7 @@ LIST_HEAD(tty_drivers);			/* linked list of tty drivers */
 
 /* Semaphore to protect creating and releasing a tty. This is shared with
    vt.c for deeply disgusting hack reasons */
-DECLARE_MUTEX(tty_sem);
+DEFINE_MUTEX(tty_mutex);
 
 int console_use_vt = 1;
 
@@ -353,10 +353,10 @@ int tty_buffer_request_room(struct tty_struct *tty, size_t size)
 	spin_unlock_irqrestore(&tty->buf.lock, flags);
 	return size;
 }
-
 EXPORT_SYMBOL_GPL(tty_buffer_request_room);
 
-int tty_insert_flip_string(struct tty_struct *tty, unsigned char *chars, size_t size)
+int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars,
+				size_t size)
 {
 	int copied = 0;
 	do {
@@ -370,17 +370,16 @@ int tty_insert_flip_string(struct tty_struct *tty, unsigned char *chars, size_t
 		tb->used += space;
 		copied += space;
 		chars += space;
-/* 		printk("Flip insert %d.\n", space); */
 	}
 	/* There is a small chance that we need to split the data over
 	   several buffers. If this is the case we must loop */
 	while (unlikely(size > copied));
 	return copied;
 }
+EXPORT_SYMBOL(tty_insert_flip_string);
 
-EXPORT_SYMBOL_GPL(tty_insert_flip_string);
-
-int tty_insert_flip_string_flags(struct tty_struct *tty, unsigned char *chars, char *flags, size_t size)
+int tty_insert_flip_string_flags(struct tty_struct *tty,
+		const unsigned char *chars, const char *flags, size_t size)
 {
 	int copied = 0;
 	do {
@@ -401,9 +400,20 @@ int tty_insert_flip_string_flags(struct tty_struct *tty, unsigned char *chars, c
 	while (unlikely(size > copied));
 	return copied;
 }
+EXPORT_SYMBOL(tty_insert_flip_string_flags);
 
-EXPORT_SYMBOL_GPL(tty_insert_flip_string_flags);
-
+void tty_schedule_flip(struct tty_struct *tty)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&tty->buf.lock, flags);
+	if (tty->buf.tail != NULL) {
+		tty->buf.tail->active = 0;
+		tty->buf.tail->commit = tty->buf.tail->used;
+	}
+	spin_unlock_irqrestore(&tty->buf.lock, flags);
+	schedule_delayed_work(&tty->buf.work, 1);
+}
+EXPORT_SYMBOL(tty_schedule_flip);
 
 /*
  *	Prepare a block of space in the buffer for data. Returns the length
@@ -545,14 +555,12 @@ void tty_ldisc_put(int disc)
 	struct tty_ldisc *ld;
 	unsigned long flags;
 	
-	if (disc < N_TTY || disc >= NR_LDISCS)
-		BUG();
+	BUG_ON(disc < N_TTY || disc >= NR_LDISCS);
 		
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
 	ld = &tty_ldiscs[disc];
-	if(ld->refcount == 0)
-		BUG();
-	ld->refcount --;
+	BUG_ON(ld->refcount == 0);
+	ld->refcount--;
 	module_put(ld->owner);
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
 }
@@ -647,8 +655,7 @@ void tty_ldisc_deref(struct tty_ldisc *ld)
 {
 	unsigned long flags;
 
-	if(ld == NULL)
-		BUG();
+	BUG_ON(ld == NULL);
 		
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
 	if(ld->refcount == 0)
@@ -1099,8 +1106,8 @@ static void do_tty_hangup(void *data)
 				p->signal->tty = NULL;
 			if (!p->signal->leader)
 				continue;
-			send_group_sig_info(SIGHUP, SEND_SIG_PRIV, p);
-			send_group_sig_info(SIGCONT, SEND_SIG_PRIV, p);
+			group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
+			group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
 			if (tty->pgrp > 0)
 				p->signal->tty_old_pgrp = tty->pgrp;
 		} while_each_task_pid(tty->session, PIDTYPE_SID, p);
@@ -1190,11 +1197,11 @@ void disassociate_ctty(int on_exit)
 
 	lock_kernel();
 
-	down(&tty_sem);
+	mutex_lock(&tty_mutex);
 	tty = current->signal->tty;
 	if (tty) {
 		tty_pgrp = tty->pgrp;
-		up(&tty_sem);
+		mutex_unlock(&tty_mutex);
 		if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY)
 			tty_vhangup(tty);
 	} else {
@@ -1202,7 +1209,7 @@ void disassociate_ctty(int on_exit)
 			kill_pg(current->signal->tty_old_pgrp, SIGHUP, on_exit);
 			kill_pg(current->signal->tty_old_pgrp, SIGCONT, on_exit);
 		}
-		up(&tty_sem);
+		mutex_unlock(&tty_mutex);
 		unlock_kernel();	
 		return;
 	}
@@ -1213,7 +1220,7 @@ void disassociate_ctty(int on_exit)
 	}
 
 	/* Must lock changes to tty_old_pgrp */
-	down(&tty_sem);
+	mutex_lock(&tty_mutex);
 	current->signal->tty_old_pgrp = 0;
 	tty->session = 0;
 	tty->pgrp = -1;
@@ -1224,7 +1231,7 @@ void disassociate_ctty(int on_exit)
 		p->signal->tty = NULL;
 	} while_each_task_pid(current->signal->session, PIDTYPE_SID, p);
 	read_unlock(&tasklist_lock);
-	up(&tty_sem);
+	mutex_unlock(&tty_mutex);
 	unlock_kernel();
 }
 
@@ -1308,7 +1315,7 @@ static inline ssize_t do_tty_write(
 	ssize_t ret = 0, written = 0;
 	unsigned int chunk;
 	
-	if (down_interruptible(&tty->atomic_write)) {
+	if (mutex_lock_interruptible(&tty->atomic_write_lock)) {
 		return -ERESTARTSYS;
 	}
 
@@ -1331,7 +1338,7 @@ static inline ssize_t do_tty_write(
 	if (count < chunk)
 		chunk = count;
 
-	/* write_buf/write_cnt is protected by the atomic_write semaphore */
+	/* write_buf/write_cnt is protected by the atomic_write_lock mutex */
 	if (tty->write_cnt < chunk) {
 		unsigned char *buf;
 
@@ -1340,7 +1347,7 @@ static inline ssize_t do_tty_write(
 
 		buf = kmalloc(chunk, GFP_KERNEL);
 		if (!buf) {
-			up(&tty->atomic_write);
+			mutex_unlock(&tty->atomic_write_lock);
 			return -ENOMEM;
 		}
 		kfree(tty->write_buf);
@@ -1376,7 +1383,7 @@ static inline ssize_t do_tty_write(
 		inode->i_mtime = current_fs_time(inode->i_sb);
 		ret = written;
 	}
-	up(&tty->atomic_write);
+	mutex_unlock(&tty->atomic_write_lock);
 	return ret;
 }
 
@@ -1444,8 +1451,8 @@ static inline void tty_line_name(struct tty_driver *driver, int index, char *p)
 
 /*
  * WSH 06/09/97: Rewritten to remove races and properly clean up after a
- * failed open.  The new code protects the open with a semaphore, so it's
- * really quite straightforward.  The semaphore locking can probably be
+ * failed open.  The new code protects the open with a mutex, so it's
+ * really quite straightforward.  The mutex locking can probably be
  * relaxed for the (most common) case of reopening a tty.
  */
 static int init_dev(struct tty_driver *driver, int idx,
@@ -1642,7 +1649,7 @@ fast_track:
 success:
 	*ret_tty = tty;
 	
-	/* All paths come through here to release the semaphore */
+	/* All paths come through here to release the mutex */
 end_init:
 	return retval;
 
@@ -1735,7 +1742,7 @@ static void release_dev(struct file * filp)
 {
 	struct tty_struct *tty, *o_tty;
 	int	pty_master, tty_closing, o_tty_closing, do_sleep;
-	int	devpts_master, devpts;
+	int	devpts;
 	int	idx;
 	char	buf[64];
 	unsigned long flags;
@@ -1752,7 +1759,6 @@ static void release_dev(struct file * filp)
 	pty_master = (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 		      tty->driver->subtype == PTY_TYPE_MASTER);
 	devpts = (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) != 0;
-	devpts_master = pty_master && devpts;
 	o_tty = tty->link;
 
 #ifdef TTY_PARANOIA_CHECK
@@ -1839,7 +1845,7 @@ static void release_dev(struct file * filp)
 		/* Guard against races with tty->count changes elsewhere and
 		   opens on /dev/tty */
 		   
-		down(&tty_sem);
+		mutex_lock(&tty_mutex);
 		tty_closing = tty->count <= 1;
 		o_tty_closing = o_tty &&
 			(o_tty->count <= (pty_master ? 1 : 0));
@@ -1870,7 +1876,7 @@ static void release_dev(struct file * filp)
 
 		printk(KERN_WARNING "release_dev: %s: read/write wait queue "
 				    "active!\n", tty_name(tty, buf));
-		up(&tty_sem);
+		mutex_unlock(&tty_mutex);
 		schedule();
 	}	
 
@@ -1936,7 +1942,7 @@ static void release_dev(struct file * filp)
 		read_unlock(&tasklist_lock);
 	}
 
-	up(&tty_sem);
+	mutex_unlock(&tty_mutex);
 
 	/* check whether both sides are closing ... */
 	if (!tty_closing || (o_tty && !o_tty_closing))
@@ -2042,11 +2048,11 @@ retry_open:
 	index  = -1;
 	retval = 0;
 	
-	down(&tty_sem);
+	mutex_lock(&tty_mutex);
 
 	if (device == MKDEV(TTYAUX_MAJOR,0)) {
 		if (!current->signal->tty) {
-			up(&tty_sem);
+			mutex_unlock(&tty_mutex);
 			return -ENXIO;
 		}
 		driver = current->signal->tty->driver;
@@ -2072,18 +2078,18 @@ retry_open:
 			noctty = 1;
 			goto got_driver;
 		}
-		up(&tty_sem);
+		mutex_unlock(&tty_mutex);
 		return -ENODEV;
 	}
 
 	driver = get_tty_driver(device, &index);
 	if (!driver) {
-		up(&tty_sem);
+		mutex_unlock(&tty_mutex);
 		return -ENODEV;
 	}
 got_driver:
 	retval = init_dev(driver, index, &tty);
-	up(&tty_sem);
+	mutex_unlock(&tty_mutex);
 	if (retval)
 		return retval;
 
@@ -2169,9 +2175,9 @@ static int ptmx_open(struct inode * inode, struct file * filp)
 	}
 	up(&allocated_ptys_lock);
 
-	down(&tty_sem);
+	mutex_lock(&tty_mutex);
 	retval = init_dev(ptm_driver, index, &tty);
-	up(&tty_sem);
+	mutex_unlock(&tty_mutex);
 	
 	if (retval)
 		goto out;
@@ -2190,6 +2196,7 @@ static int ptmx_open(struct inode * inode, struct file * filp)
 		return 0;
 out1:
 	release_dev(filp);
+	return retval;
 out:
 	down(&allocated_ptys_lock);
 	idr_remove(&allocated_ptys, index);
@@ -2677,7 +2684,7 @@ static void __do_SAK(void *arg)
 	tty_hangup(tty);
 #else
 	struct tty_struct *tty = arg;
-	struct task_struct *p;
+	struct task_struct *g, *p;
 	int session;
 	int		i;
 	struct file	*filp;
@@ -2698,8 +2705,18 @@ static void __do_SAK(void *arg)
 		tty->driver->flush_buffer(tty);
 	
 	read_lock(&tasklist_lock);
+	/* Kill the entire session */
 	do_each_task_pid(session, PIDTYPE_SID, p) {
-		if (p->signal->tty == tty || session > 0) {
+		printk(KERN_NOTICE "SAK: killed process %d"
+			" (%s): p->signal->session==tty->session\n",
+			p->pid, p->comm);
+		send_sig(SIGKILL, p, 1);
+	} while_each_task_pid(session, PIDTYPE_SID, p);
+	/* Now kill any processes that happen to have the
+	 * tty open.
+	 */
+	do_each_thread(g, p) {
+		if (p->signal->tty == tty) {
 			printk(KERN_NOTICE "SAK: killed process %d"
 			    " (%s): p->signal->session==tty->session\n",
 			    p->pid, p->comm);
@@ -2723,14 +2740,14 @@ static void __do_SAK(void *arg)
 					printk(KERN_NOTICE "SAK: killed process %d"
 					    " (%s): fd#%d opened to the tty\n",
 					    p->pid, p->comm, i);
-					send_sig(SIGKILL, p, 1);
+					force_sig(SIGKILL, p);
 					break;
 				}
 			}
 			spin_unlock(&p->files->file_lock);
 		}
 		task_unlock(p);
-	} while_each_task_pid(session, PIDTYPE_SID, p);
+	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
 #endif
 }
@@ -2761,7 +2778,7 @@ static void flush_to_ldisc(void *private_)
 	struct tty_struct *tty = (struct tty_struct *) private_;
 	unsigned long 	flags;
 	struct tty_ldisc *disc;
-	struct tty_buffer *tbuf, *head;
+	struct tty_buffer *tbuf;
 	int count;
 	char *char_buf;
 	unsigned char *flag_buf;
@@ -2778,9 +2795,7 @@ static void flush_to_ldisc(void *private_)
 		goto out;
 	}
 	spin_lock_irqsave(&tty->buf.lock, flags);
-	head = tty->buf.head;
-	tty->buf.head = NULL;
-	while((tbuf = head) != NULL) {
+	while((tbuf = tty->buf.head) != NULL) {
 		while ((count = tbuf->commit - tbuf->read) != 0) {
 			char_buf = tbuf->char_buf_ptr + tbuf->read;
 			flag_buf = tbuf->flag_buf_ptr + tbuf->read;
@@ -2789,12 +2804,10 @@ static void flush_to_ldisc(void *private_)
 			disc->receive_buf(tty, char_buf, flag_buf, count);
 			spin_lock_irqsave(&tty->buf.lock, flags);
 		}
-		if (tbuf->active) {
-			tty->buf.head = head;
+		if (tbuf->active)
 			break;
-		}
-		head = tbuf->next;
-		if (head == NULL)
+		tty->buf.head = tbuf->next;
+		if (tty->buf.head == NULL)
 			tty->buf.tail = NULL;
 		tty_buffer_free(tty, tbuf);
 	}
@@ -2925,8 +2938,8 @@ static void initialize_tty_struct(struct tty_struct *tty)
 	init_waitqueue_head(&tty->write_wait);
 	init_waitqueue_head(&tty->read_wait);
 	INIT_WORK(&tty->hangup_work, do_tty_hangup, tty);
-	sema_init(&tty->atomic_read, 1);
-	sema_init(&tty->atomic_write, 1);
+	mutex_init(&tty->atomic_read_lock);
+	mutex_init(&tty->atomic_write_lock);
 	spin_lock_init(&tty->read_lock);
 	INIT_LIST_HEAD(&tty->tty_files);
 	INIT_WORK(&tty->SAK_work, NULL, NULL);
diff --git a/linux-2.6-xen-sparse/drivers/pci/Kconfig b/linux-2.6-xen-sparse/drivers/pci/Kconfig
index a47a5dfa01..065ea43aa2 100644
--- a/linux-2.6-xen-sparse/drivers/pci/Kconfig
+++ b/linux-2.6-xen-sparse/drivers/pci/Kconfig
@@ -12,24 +12,11 @@ config PCI_MSI
 	   generate an interrupt using an inbound Memory Write on its
 	   PCI bus instead of asserting a device IRQ pin.
 
-	   If you don't know what to do here, say N.
-
-config PCI_LEGACY_PROC
-	bool "Legacy /proc/pci interface"
-	depends on PCI
-	---help---
-	  This feature enables a procfs file -- /proc/pci -- that provides a 
-	  summary of PCI devices in the system. 
-
-	  This feature has been deprecated as of v2.5.53, in favor of using the 
-	  tool lspci(8). This feature may be removed at a future date. 
+	   Use of PCI MSI interrupts can be disabled at kernel boot time
+	   by using the 'pci=nomsi' option.  This disables MSI for the
+	   entire system.
 
-	  lspci can provide the same data, as well as much more. lspci is a part of
-	  the pci-utils package, which should be installed by your distribution. 
-	  See <file:Documentation/Changes> for information on where to get the latest
-	  version. 
-
-	  When in doubt, say N.
+	   If you don't know what to do here, say N.
 
 config PCI_DEBUG
 	bool "PCI Debugging"
diff --git a/linux-2.6-xen-sparse/drivers/serial/Kconfig b/linux-2.6-xen-sparse/drivers/serial/Kconfig
index c6be86d83e..db5ce63b9e 100644
--- a/linux-2.6-xen-sparse/drivers/serial/Kconfig
+++ b/linux-2.6-xen-sparse/drivers/serial/Kconfig
@@ -64,6 +64,33 @@ config SERIAL_8250_CONSOLE
 
 	  If unsure, say N.
 
+config SERIAL_8250_GSC
+	tristate
+	depends on SERIAL_8250 && GSC
+	default SERIAL_8250
+
+config SERIAL_8250_PCI
+	tristate "8250/16550 PCI device support" if EMBEDDED
+	depends on SERIAL_8250 && PCI
+	default SERIAL_8250
+	help
+	  This builds standard PCI serial support. You may be able to
+	  disable this feature if you only need legacy serial support.
+	  Saves about 9K.
+
+config SERIAL_8250_PNP
+	tristate "8250/16550 PNP device support" if EMBEDDED
+	depends on SERIAL_8250 && PNP
+	default SERIAL_8250
+	help
+	  This builds standard PNP serial support. You may be able to
+	  disable this feature if you only need legacy serial support.
+
+config SERIAL_8250_HP300
+	tristate
+	depends on SERIAL_8250 && HP300
+	default SERIAL_8250
+
 config SERIAL_8250_CS
 	tristate "8250/16550 PCMCIA device support"
 	depends on PCMCIA && SERIAL_8250
@@ -78,14 +105,6 @@ config SERIAL_8250_CS
 
 	  If unsure, say N.
 
-config SERIAL_8250_ACPI
-	bool "8250/16550 device discovery via ACPI namespace"
-	default y if IA64
-	depends on ACPI && SERIAL_8250
-	---help---
-	  If you wish to enable serial port discovery via the ACPI
-	  namespace, say Y here.  If unsure, say N.
-
 config SERIAL_8250_NR_UARTS
 	int "Maximum number of 8250/16550 serial ports"
 	depends on SERIAL_8250
@@ -583,6 +602,13 @@ config SERIAL_SUNSAB_CONSOLE
 	  on your Sparc system as the console, you can do so by answering
 	  Y to this option.
 
+config SERIAL_SUNHV
+	bool "Sun4v Hypervisor Console support"
+	depends on SPARC64
+	help
+	  This driver supports the console device found on SUN4V Sparc
+	  systems.  Say Y if you want to be able to use this device.
+
 config SERIAL_IP22_ZILOG
 	tristate "IP22 Zilog8530 serial support"
 	depends on SGI_IP22
@@ -622,22 +648,6 @@ config SERIAL_SH_SCI_CONSOLE
 	depends on SERIAL_SH_SCI=y
 	select SERIAL_CORE_CONSOLE
 
-config SERIAL_AU1X00
-	bool "Enable Au1x00 UART Support"
-	depends on MIPS && SOC_AU1X00
-	select SERIAL_CORE
-	help
-	  If you have an Alchemy AU1X00 processor (MIPS based) and you want
-	  to use serial ports, say Y.  Otherwise, say N.
-
-config SERIAL_AU1X00_CONSOLE
-	bool "Enable Au1x00 serial console"
-	depends on SERIAL_AU1X00
-	select SERIAL_CORE_CONSOLE
-	help
-	  If you have an Alchemy AU1X00 processor (MIPS based) and you want
-	  to use a console on a serial port, say Y.  Otherwise, say N.
-
 config SERIAL_CORE
 	tristate
 
@@ -861,7 +871,7 @@ config SERIAL_M32R_PLDSIO
 
 config SERIAL_TXX9
 	bool "TMPTX39XX/49XX SIO support"
-	depends HAS_TXX9_SERIAL && BROKEN
+	depends HAS_TXX9_SERIAL
 	select SERIAL_CORE
 	default y
 
diff --git a/linux-2.6-xen-sparse/drivers/video/Kconfig b/linux-2.6-xen-sparse/drivers/video/Kconfig
index cb5942d864..2fd9ab7f40 100644
--- a/linux-2.6-xen-sparse/drivers/video/Kconfig
+++ b/linux-2.6-xen-sparse/drivers/video/Kconfig
@@ -70,6 +70,22 @@ config FB_MACMODES
        depends on FB
        default n
 
+config FB_FIRMWARE_EDID
+       bool "Enable firmware EDID"
+       depends on FB
+       default y
+       ---help---
+         This enables access to the EDID transferred from the firmware.
+	 On the i386, this is from the Video BIOS. Enable this if DDC/I2C
+	 transfers do not work for your driver and if you are using
+	 nvidiafb, i810fb or savagefb.
+
+	 In general, choosing Y for this option is safe.  If you
+	 experience extremely long delays while booting before you get
+	 something on your display, try setting this to N.  Matrox cards in
+	 combination with certain motherboards and monitors are known to
+	 suffer from this problem.
+
 config FB_MODE_HELPERS
         bool "Enable Video Mode Handling Helpers"
         depends on FB
@@ -384,6 +400,8 @@ config FB_ASILIANT
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
+	help
+	  This is the frame buffer device driver for the Asiliant 69030 chipset
 
 config FB_IMSTT
 	bool "IMS Twin Turbo display support"
@@ -888,20 +906,6 @@ config FB_MATROX_MULTIHEAD
 	  There is no need for enabling 'Matrox multihead support' if you have
 	  only one Matrox card in the box.
 
-config FB_RADEON_OLD
-	tristate "ATI Radeon display support (Old driver)"
-	depends on FB && PCI
-	select FB_CFB_FILLRECT
-	select FB_CFB_COPYAREA
-	select FB_CFB_IMAGEBLIT
-	select FB_MACMODES if PPC
-	help
-	  Choose this option if you want to use an ATI Radeon graphics card as
-	  a framebuffer device.  There are both PCI and AGP versions.  You
-	  don't need to choose this to run the Radeon in plain VGA mode.
-	  There is a product page at
-	  <http://www.ati.com/na/pages/products/pc/radeon32/index.html>.
-
 config FB_RADEON
 	tristate "ATI Radeon display support"
 	depends on FB && PCI
@@ -959,7 +963,7 @@ config FB_ATY128
 
 config FB_ATY
 	tristate "ATI Mach64 display support" if PCI || ATARI
-	depends on FB
+	depends on FB && !SPARC32
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
@@ -1204,6 +1208,17 @@ config FB_AU1100
 	bool "Au1100 LCD Driver"
 	depends on (FB = y) && EXPERIMENTAL && PCI && MIPS && MIPS_PB1100=y
 
+config FB_AU1200
+	bool "Au1200 LCD Driver"
+	depends on FB && MIPS && SOC_AU1200
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	help
+	  This is the framebuffer driver for the AMD Au1200 SOC.  It can drive
+	  various panels and CRTs by passing in kernel cmd line option
+	  au1200fb:panel=<name>.
+
 source "drivers/video/geode/Kconfig"
 
 config FB_FFB
diff --git a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
index d6d3f7640a..fd9968a093 100644
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
@@ -229,7 +229,7 @@ static int increase_reservation(unsigned long nr_pages)
 
 		/* Relinquish the page back to the allocator. */
 		ClearPageReserved(page);
-		set_page_count(page, 1);
+		init_page_count(page);
 		__free_page(page);
 	}
 
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
index 5b07416fa2..1a91f27a14 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
@@ -272,13 +272,13 @@ static void backend_changed(struct xenbus_device *dev,
 		if (bd == NULL)
 			xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
 
-		down(&bd->bd_sem);
+		mutex_lock(&bd->bd_mutex);
 		if (info->users > 0)
 			xenbus_dev_error(dev, -EBUSY,
 					 "Device in use; refusing to close");
 		else
 			blkfront_closing(dev);
-		up(&bd->bd_sem);
+		mutex_unlock(&bd->bd_mutex);
 		bdput(bd);
 		break;
 	}
diff --git a/linux-2.6-xen-sparse/drivers/xen/console/console.c b/linux-2.6-xen-sparse/drivers/xen/console/console.c
index 3b48932e0a..c17e24895a 100644
--- a/linux-2.6-xen-sparse/drivers/xen/console/console.c
+++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c
@@ -595,16 +595,16 @@ static void xencons_close(struct tty_struct *tty, struct file *filp)
 	if (DUMMY_TTY(tty))
 		return;
 
-	down(&tty_sem);
+	mutex_lock(&tty_mutex);
 
 	if (tty->count != 1) {
-		up(&tty_sem);
+		mutex_unlock(&tty_mutex);
 		return;
 	}
 
 	/* Prevent other threads from re-opening this tty. */
 	set_bit(TTY_CLOSING, &tty->flags);
-	up(&tty_sem);
+	mutex_unlock(&tty_mutex);
 
 	tty->closing = 1;
 	tty_wait_until_sent(tty, 0);
diff --git a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c
index 5f06e1fdc9..9abe4251f9 100644
--- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c
+++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c
@@ -26,10 +26,6 @@
 #include <xen/cpu_hotplug.h>
 #include <xen/xenbus.h>
 
-#ifdef CONFIG_SMP_ALTERNATIVES
-#include <asm/smp_alt.h>
-#endif
-
 extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
 extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
 
@@ -84,7 +80,8 @@ void __init prefill_possible_map(void)
 {
 	int i, rc;
 
-	if (!cpus_empty(cpu_possible_map))
+	for_each_possible_cpu(i)
+	    if (i != smp_processor_id())
 		return;
 
 	for (i = 0; i < NR_CPUS; i++) {
@@ -351,7 +348,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 
 void __devinit smp_prepare_boot_cpu(void)
 {
-	prefill_possible_map();
 	cpu_present_map  = cpumask_of_cpu(0);
 	cpu_online_map   = cpumask_of_cpu(0);
 }
@@ -396,10 +392,8 @@ void __cpu_die(unsigned int cpu)
 
 	xen_smp_intr_exit(cpu);
 
-#ifdef CONFIG_SMP_ALTERNATIVES
 	if (num_online_cpus() == 1)
-		unprepare_for_smp();
-#endif
+		alternatives_smp_switch(0);
 }
 
 #else /* !CONFIG_HOTPLUG_CPU */
@@ -424,10 +418,8 @@ int __devinit __cpu_up(unsigned int cpu)
 	if (rc)
 		return rc;
 
-#ifdef CONFIG_SMP_ALTERNATIVES
 	if (num_online_cpus() == 1)
-		prepare_for_smp();
-#endif
+		alternatives_smp_switch(1);
 
 	/* This must be done before setting cpu_online_map */
 	set_cpu_sibling_map(cpu);
diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c
index 1884597430..e7b2c1030a 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c
@@ -1361,7 +1361,7 @@ static void netif_idx_release(u16 pending_idx)
 static void netif_page_release(struct page *page)
 {
 	/* Ready for next use. */
-	set_page_count(page, 1);
+	init_page_count(page);
 
 	netif_idx_release(page->index);
 }
diff --git a/linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c b/linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c
index 6be14aebdc..19b5462c24 100644
--- a/linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c
+++ b/linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c
@@ -43,7 +43,7 @@ static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
 	struct xen_pci_op *active_op = &pdev->sh_info->op;
 	unsigned long irq_flags;
 	evtchn_port_t port = pdev->evtchn;
-	nsec_t ns, ns_timeout;
+	s64 ns, ns_timeout;
 	struct timeval tv;
 
 	spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
@@ -62,7 +62,7 @@ static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
 	 * timeout in the past). 1s difference gives plenty of slack for error.
 	 */
 	do_gettimeofday(&tv);
-	ns_timeout = timeval_to_ns(&tv) + 2 * (nsec_t)NSEC_PER_SEC;
+	ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC;
 
 	clear_evtchn(port);
 
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
index 5dc62457db..eee22afbb8 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
@@ -68,7 +68,7 @@ static unsigned long xen_store_mfn;
 
 extern struct mutex xenwatch_mutex;
 
-static struct notifier_block *xenstore_chain;
+static ATOMIC_NOTIFIER_HEAD(xenstore_chain);
 
 static void wait_for_devices(struct xenbus_driver *xendrv);
 
@@ -748,7 +748,7 @@ int register_xenstore_notifier(struct notifier_block *nb)
 	if (xenstored_ready > 0)
 		ret = nb->notifier_call(nb, 0, NULL);
 	else
-		notifier_chain_register(&xenstore_chain, nb);
+		atomic_notifier_chain_register(&xenstore_chain, nb);
 
 	return ret;
 }
@@ -756,7 +756,7 @@ EXPORT_SYMBOL_GPL(register_xenstore_notifier);
 
 void unregister_xenstore_notifier(struct notifier_block *nb)
 {
-	notifier_chain_unregister(&xenstore_chain, nb);
+	atomic_notifier_chain_unregister(&xenstore_chain, nb);
 }
 EXPORT_SYMBOL_GPL(unregister_xenstore_notifier);
 
@@ -771,7 +771,7 @@ void xenbus_probe(void *unused)
 	xenbus_backend_probe_and_watch();
 
 	/* Notify others that xenstore is up */
-	notifier_call_chain(&xenstore_chain, 0, NULL);
+	atomic_notifier_call_chain(&xenstore_chain, 0, NULL);
 }
 
 
diff --git a/linux-2.6-xen-sparse/fs/Kconfig b/linux-2.6-xen-sparse/fs/Kconfig
index de4c01ef42..d510de7bd3 100644
--- a/linux-2.6-xen-sparse/fs/Kconfig
+++ b/linux-2.6-xen-sparse/fs/Kconfig
@@ -799,6 +799,7 @@ config PROC_KCORE
 config PROC_VMCORE
         bool "/proc/vmcore support (EXPERIMENTAL)"
         depends on PROC_FS && EXPERIMENTAL && CRASH_DUMP
+	default y
         help
         Exports the dump image of crashed kernel in ELF format.
 
@@ -842,6 +843,12 @@ config HUGETLBFS
 	bool "HugeTLB file system support"
 	depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN
 	depends !XEN
+	help
+	  hugetlbfs is a filesystem backing for HugeTLB pages, based on
+	  ramfs. For architectures that support it, say Y here and read
+	  <file:Documentation/vm/hugetlbpage.txt> for details.
+
+	  If unsure, say N.
 
 config HUGETLB_PAGE
 	def_bool HUGETLBFS
@@ -860,21 +867,9 @@ config RAMFS
 	  To compile this as a module, choose M here: the module will be called
 	  ramfs.
 
-config RELAYFS_FS
-	tristate "Relayfs file system support"
-	---help---
-	  Relayfs is a high-speed data relay filesystem designed to provide
-	  an efficient mechanism for tools and facilities to relay large
-	  amounts of data from kernel space to user space.
-
-	  To compile this code as a module, choose M here: the module will be
-	  called relayfs.
-
-	  If unsure, say N.
-
 config CONFIGFS_FS
 	tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on SYSFS && EXPERIMENTAL
 	help
 	  configfs is a ram-based filesystem that provides the converse
 	  of sysfs's functionality. Where sysfs is a filesystem-based
@@ -1568,6 +1563,7 @@ config RPCSEC_GSS_SPKM3
 	select CRYPTO
 	select CRYPTO_MD5
 	select CRYPTO_DES
+	select CRYPTO_CAST5
 	help
 	  Provides for secure RPC calls by means of a gss-api
 	  mechanism based on the SPKM3 public-key mechanism.
diff --git a/linux-2.6-xen-sparse/include/asm-i386/apic.h b/linux-2.6-xen-sparse/include/asm-i386/apic.h
index 1fb4d7115e..87476de389 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/apic.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/apic.h
@@ -141,6 +141,8 @@ void switch_ipi_to_APIC_timer(void *cpumask);
 
 extern int timer_over_8254;
 
+extern int modern_apic(void);
+
 #else /* !CONFIG_X86_LOCAL_APIC */
 static inline void lapic_shutdown(void) { }
 
diff --git a/linux-2.6-xen-sparse/include/asm-i386/atomic.h b/linux-2.6-xen-sparse/include/asm-i386/atomic.h
deleted file mode 100644
index 4a9e85d197..0000000000
--- a/linux-2.6-xen-sparse/include/asm-i386/atomic.h
+++ /dev/null
@@ -1,254 +0,0 @@
-#ifndef __ARCH_I386_ATOMIC__
-#define __ARCH_I386_ATOMIC__
-
-#include <linux/config.h>
-#include <linux/compiler.h>
-#include <asm/processor.h>
-#include <asm/smp_alt.h>
-
-/*
- * Atomic operations that C can't guarantee us.  Useful for
- * resource counting etc..
- */
-
-/*
- * Make sure gcc doesn't try to be clever and move things around
- * on us. We need to use _exactly_ the address the user gave us,
- * not some alias that contains the same information.
- */
-typedef struct { volatile int counter; } atomic_t;
-
-#define ATOMIC_INIT(i)	{ (i) }
-
-/**
- * atomic_read - read atomic variable
- * @v: pointer of type atomic_t
- * 
- * Atomically reads the value of @v.
- */ 
-#define atomic_read(v)		((v)->counter)
-
-/**
- * atomic_set - set atomic variable
- * @v: pointer of type atomic_t
- * @i: required value
- * 
- * Atomically sets the value of @v to @i.
- */ 
-#define atomic_set(v,i)		(((v)->counter) = (i))
-
-/**
- * atomic_add - add integer to atomic variable
- * @i: integer value to add
- * @v: pointer of type atomic_t
- * 
- * Atomically adds @i to @v.
- */
-static __inline__ void atomic_add(int i, atomic_t *v)
-{
-	__asm__ __volatile__(
-		LOCK "addl %1,%0"
-		:"=m" (v->counter)
-		:"ir" (i), "m" (v->counter));
-}
-
-/**
- * atomic_sub - subtract the atomic variable
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- * 
- * Atomically subtracts @i from @v.
- */
-static __inline__ void atomic_sub(int i, atomic_t *v)
-{
-	__asm__ __volatile__(
-		LOCK "subl %1,%0"
-		:"=m" (v->counter)
-		:"ir" (i), "m" (v->counter));
-}
-
-/**
- * atomic_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- * 
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-static __inline__ int atomic_sub_and_test(int i, atomic_t *v)
-{
-	unsigned char c;
-
-	__asm__ __volatile__(
-		LOCK "subl %2,%0; sete %1"
-		:"=m" (v->counter), "=qm" (c)
-		:"ir" (i), "m" (v->counter) : "memory");
-	return c;
-}
-
-/**
- * atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- * 
- * Atomically increments @v by 1.
- */ 
-static __inline__ void atomic_inc(atomic_t *v)
-{
-	__asm__ __volatile__(
-		LOCK "incl %0"
-		:"=m" (v->counter)
-		:"m" (v->counter));
-}
-
-/**
- * atomic_dec - decrement atomic variable
- * @v: pointer of type atomic_t
- * 
- * Atomically decrements @v by 1.
- */ 
-static __inline__ void atomic_dec(atomic_t *v)
-{
-	__asm__ __volatile__(
-		LOCK "decl %0"
-		:"=m" (v->counter)
-		:"m" (v->counter));
-}
-
-/**
- * atomic_dec_and_test - decrement and test
- * @v: pointer of type atomic_t
- * 
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */ 
-static __inline__ int atomic_dec_and_test(atomic_t *v)
-{
-	unsigned char c;
-
-	__asm__ __volatile__(
-		LOCK "decl %0; sete %1"
-		:"=m" (v->counter), "=qm" (c)
-		:"m" (v->counter) : "memory");
-	return c != 0;
-}
-
-/**
- * atomic_inc_and_test - increment and test 
- * @v: pointer of type atomic_t
- * 
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */ 
-static __inline__ int atomic_inc_and_test(atomic_t *v)
-{
-	unsigned char c;
-
-	__asm__ __volatile__(
-		LOCK "incl %0; sete %1"
-		:"=m" (v->counter), "=qm" (c)
-		:"m" (v->counter) : "memory");
-	return c != 0;
-}
-
-/**
- * atomic_add_negative - add and test if negative
- * @v: pointer of type atomic_t
- * @i: integer value to add
- * 
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */ 
-static __inline__ int atomic_add_negative(int i, atomic_t *v)
-{
-	unsigned char c;
-
-	__asm__ __volatile__(
-		LOCK "addl %2,%0; sets %1"
-		:"=m" (v->counter), "=qm" (c)
-		:"ir" (i), "m" (v->counter) : "memory");
-	return c;
-}
-
-/**
- * atomic_add_return - add and return
- * @v: pointer of type atomic_t
- * @i: integer value to add
- *
- * Atomically adds @i to @v and returns @i + @v
- */
-static __inline__ int atomic_add_return(int i, atomic_t *v)
-{
-	int __i;
-#ifdef CONFIG_M386
-	if(unlikely(boot_cpu_data.x86==3))
-		goto no_xadd;
-#endif
-	/* Modern 486+ processor */
-	__i = i;
-	__asm__ __volatile__(
-		LOCK "xaddl %0, %1;"
-		:"=r"(i)
-		:"m"(v->counter), "0"(i));
-	return i + __i;
-
-#ifdef CONFIG_M386
-no_xadd: /* Legacy 386 processor */
-	local_irq_disable();
-	__i = atomic_read(v);
-	atomic_set(v, i + __i);
-	local_irq_enable();
-	return i + __i;
-#endif
-}
-
-static __inline__ int atomic_sub_return(int i, atomic_t *v)
-{
-	return atomic_add_return(-i,v);
-}
-
-#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-
-/**
- * atomic_add_unless - add unless the number is a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns non-zero if @v was not @u, and zero otherwise.
- */
-#define atomic_add_unless(v, a, u)				\
-({								\
-	int c, old;						\
-	c = atomic_read(v);					\
-	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
-		c = old;					\
-	c != (u);						\
-})
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
-
-#define atomic_inc_return(v)  (atomic_add_return(1,v))
-#define atomic_dec_return(v)  (atomic_sub_return(1,v))
-
-/* These are x86-specific, used by some header files */
-#define atomic_clear_mask(mask, addr) \
-__asm__ __volatile__(LOCK "andl %0,%1" \
-: : "r" (~(mask)),"m" (*addr) : "memory")
-
-#define atomic_set_mask(mask, addr) \
-__asm__ __volatile__(LOCK "orl %0,%1" \
-: : "r" (mask),"m" (*(addr)) : "memory")
-
-/* Atomic operations are already serializing on x86 */
-#define smp_mb__before_atomic_dec()	barrier()
-#define smp_mb__after_atomic_dec()	barrier()
-#define smp_mb__before_atomic_inc()	barrier()
-#define smp_mb__after_atomic_inc()	barrier()
-
-#include <asm-generic/atomic.h>
-#endif
diff --git a/linux-2.6-xen-sparse/include/asm-i386/bitops.h b/linux-2.6-xen-sparse/include/asm-i386/bitops.h
deleted file mode 100644
index 5f438be07d..0000000000
--- a/linux-2.6-xen-sparse/include/asm-i386/bitops.h
+++ /dev/null
@@ -1,463 +0,0 @@
-#ifndef _I386_BITOPS_H
-#define _I386_BITOPS_H
-
-/*
- * Copyright 1992, Linus Torvalds.
- */
-
-#include <linux/config.h>
-#include <linux/compiler.h>
-#include <asm/smp_alt.h>
-
-/*
- * These have to be done with inline assembly: that way the bit-setting
- * is guaranteed to be atomic. All bit operations return 0 if the bit
- * was cleared before the operation and != 0 if it was not.
- *
- * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
- */
-
-#define ADDR (*(volatile long *) addr)
-
-/**
- * set_bit - Atomically set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * This function is atomic and may not be reordered.  See __set_bit()
- * if you do not require the atomic guarantees.
- *
- * Note: there are no guarantees that this function will not be reordered
- * on non x86 architectures, so if you are writting portable code,
- * make sure not to rely on its reordering guarantees.
- *
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static inline void set_bit(int nr, volatile unsigned long * addr)
-{
-	__asm__ __volatile__( LOCK
-		"btsl %1,%0"
-		:"+m" (ADDR)
-		:"Ir" (nr));
-}
-
-/**
- * __set_bit - Set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike set_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __set_bit(int nr, volatile unsigned long * addr)
-{
-	__asm__(
-		"btsl %1,%0"
-		:"+m" (ADDR)
-		:"Ir" (nr));
-}
-
-/**
- * clear_bit - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and may not be reordered.  However, it does
- * not contain a memory barrier, so if it is used for locking purposes,
- * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
- * in order to ensure changes are visible on other processors.
- */
-static inline void clear_bit(int nr, volatile unsigned long * addr)
-{
-	__asm__ __volatile__( LOCK
-		"btrl %1,%0"
-		:"+m" (ADDR)
-		:"Ir" (nr));
-}
-
-static inline void __clear_bit(int nr, volatile unsigned long * addr)
-{
-	__asm__ __volatile__(
-		"btrl %1,%0"
-		:"+m" (ADDR)
-		:"Ir" (nr));
-}
-#define smp_mb__before_clear_bit()	barrier()
-#define smp_mb__after_clear_bit()	barrier()
-
-/**
- * __change_bit - Toggle a bit in memory
- * @nr: the bit to change
- * @addr: the address to start counting from
- *
- * Unlike change_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __change_bit(int nr, volatile unsigned long * addr)
-{
-	__asm__ __volatile__(
-		"btcl %1,%0"
-		:"+m" (ADDR)
-		:"Ir" (nr));
-}
-
-/**
- * change_bit - Toggle a bit in memory
- * @nr: Bit to change
- * @addr: Address to start counting from
- *
- * change_bit() is atomic and may not be reordered. It may be
- * reordered on other architectures than x86.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static inline void change_bit(int nr, volatile unsigned long * addr)
-{
-	__asm__ __volatile__( LOCK
-		"btcl %1,%0"
-		:"+m" (ADDR)
-		:"Ir" (nr));
-}
-
-/**
- * test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.  
- * It may be reordered on other architectures than x86.
- * It also implies a memory barrier.
- */
-static inline int test_and_set_bit(int nr, volatile unsigned long * addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__( LOCK
-		"btsl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),"+m" (ADDR)
-		:"Ir" (nr) : "memory");
-	return oldbit;
-}
-
-/**
- * __test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.  
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_set_bit(int nr, volatile unsigned long * addr)
-{
-	int oldbit;
-
-	__asm__(
-		"btsl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),"+m" (ADDR)
-		:"Ir" (nr));
-	return oldbit;
-}
-
-/**
- * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It can be reorderdered on other architectures other than x86.
- * It also implies a memory barrier.
- */
-static inline int test_and_clear_bit(int nr, volatile unsigned long * addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__( LOCK
-		"btrl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),"+m" (ADDR)
-		:"Ir" (nr) : "memory");
-	return oldbit;
-}
-
-/**
- * __test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.  
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	int oldbit;
-
-	__asm__(
-		"btrl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),"+m" (ADDR)
-		:"Ir" (nr));
-	return oldbit;
-}
-
-/* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__(
-		"btcl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),"+m" (ADDR)
-		:"Ir" (nr) : "memory");
-	return oldbit;
-}
-
-/**
- * test_and_change_bit - Change a bit and return its old value
- * @nr: Bit to change
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.  
- * It also implies a memory barrier.
- */
-static inline int test_and_change_bit(int nr, volatile unsigned long* addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__( LOCK
-		"btcl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),"+m" (ADDR)
-		:"Ir" (nr) : "memory");
-	return oldbit;
-}
-
-#if 0 /* Fool kernel-doc since it doesn't do macros yet */
-/**
- * test_bit - Determine whether a bit is set
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static int test_bit(int nr, const volatile void * addr);
-#endif
-
-static __always_inline int constant_test_bit(int nr, const volatile unsigned long *addr)
-{
-	return ((1UL << (nr & 31)) & (addr[nr >> 5])) != 0;
-}
-
-static inline int variable_test_bit(int nr, const volatile unsigned long * addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__(
-		"btl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit)
-		:"m" (ADDR),"Ir" (nr));
-	return oldbit;
-}
-
-#define test_bit(nr,addr) \
-(__builtin_constant_p(nr) ? \
- constant_test_bit((nr),(addr)) : \
- variable_test_bit((nr),(addr)))
-
-#undef ADDR
-
-/**
- * find_first_zero_bit - find the first zero bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
- *
- * Returns the bit-number of the first zero bit, not the number of the byte
- * containing a bit.
- */
-static inline int find_first_zero_bit(const unsigned long *addr, unsigned size)
-{
-	int d0, d1, d2;
-	int res;
-
-	if (!size)
-		return 0;
-	/* This looks at memory. Mark it volatile to tell gcc not to move it around */
-	__asm__ __volatile__(
-		"movl $-1,%%eax\n\t"
-		"xorl %%edx,%%edx\n\t"
-		"repe; scasl\n\t"
-		"je 1f\n\t"
-		"xorl -4(%%edi),%%eax\n\t"
-		"subl $4,%%edi\n\t"
-		"bsfl %%eax,%%edx\n"
-		"1:\tsubl %%ebx,%%edi\n\t"
-		"shll $3,%%edi\n\t"
-		"addl %%edi,%%edx"
-		:"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
-		:"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory");
-	return res;
-}
-
-/**
- * find_next_zero_bit - find the first zero bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-int find_next_zero_bit(const unsigned long *addr, int size, int offset);
-
-/**
- * __ffs - find first bit in word.
- * @word: The word to search
- *
- * Undefined if no bit exists, so code should check against 0 first.
- */
-static inline unsigned long __ffs(unsigned long word)
-{
-	__asm__("bsfl %1,%0"
-		:"=r" (word)
-		:"rm" (word));
-	return word;
-}
-
-/**
- * find_first_bit - find the first set bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
- *
- * Returns the bit-number of the first set bit, not the number of the byte
- * containing a bit.
- */
-static inline unsigned find_first_bit(const unsigned long *addr, unsigned size)
-{
-	unsigned x = 0;
-
-	while (x < size) {
-		unsigned long val = *addr++;
-		if (val)
-			return __ffs(val) + x;
-		x += (sizeof(*addr)<<3);
-	}
-	return x;
-}
-
-/**
- * find_next_bit - find the first set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-int find_next_bit(const unsigned long *addr, int size, int offset);
-
-/**
- * ffz - find first zero in word.
- * @word: The word to search
- *
- * Undefined if no zero exists, so code should check against ~0UL first.
- */
-static inline unsigned long ffz(unsigned long word)
-{
-	__asm__("bsfl %1,%0"
-		:"=r" (word)
-		:"r" (~word));
-	return word;
-}
-
-#define fls64(x)   generic_fls64(x)
-
-#ifdef __KERNEL__
-
-/*
- * Every architecture must define this function. It's the fastest
- * way of searching a 140-bit bitmap where the first 100 bits are
- * unlikely to be set. It's guaranteed that at least one of the 140
- * bits is cleared.
- */
-static inline int sched_find_first_bit(const unsigned long *b)
-{
-	if (unlikely(b[0]))
-		return __ffs(b[0]);
-	if (unlikely(b[1]))
-		return __ffs(b[1]) + 32;
-	if (unlikely(b[2]))
-		return __ffs(b[2]) + 64;
-	if (b[3])
-		return __ffs(b[3]) + 96;
-	return __ffs(b[4]) + 128;
-}
-
-/**
- * ffs - find first bit set
- * @x: the word to search
- *
- * This is defined the same way as
- * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
- */
-static inline int ffs(int x)
-{
-	int r;
-
-	__asm__("bsfl %1,%0\n\t"
-		"jnz 1f\n\t"
-		"movl $-1,%0\n"
-		"1:" : "=r" (r) : "rm" (x));
-	return r+1;
-}
-
-/**
- * fls - find last bit set
- * @x: the word to search
- *
- * This is defined the same way as ffs.
- */
-static inline int fls(int x)
-{
-	int r;
-
-	__asm__("bsrl %1,%0\n\t"
-		"jnz 1f\n\t"
-		"movl $-1,%0\n"
-		"1:" : "=r" (r) : "rm" (x));
-	return r+1;
-}
-
-/**
- * hweightN - returns the hamming weight of a N-bit word
- * @x: the word to weigh
- *
- * The Hamming Weight of a number is the total number of bits set in it.
- */
-
-#define hweight32(x) generic_hweight32(x)
-#define hweight16(x) generic_hweight16(x)
-#define hweight8(x) generic_hweight8(x)
-
-#endif /* __KERNEL__ */
-
-#ifdef __KERNEL__
-
-#define ext2_set_bit(nr,addr) \
-	__test_and_set_bit((nr),(unsigned long*)addr)
-#define ext2_set_bit_atomic(lock,nr,addr) \
-        test_and_set_bit((nr),(unsigned long*)addr)
-#define ext2_clear_bit(nr, addr) \
-	__test_and_clear_bit((nr),(unsigned long*)addr)
-#define ext2_clear_bit_atomic(lock,nr, addr) \
-	        test_and_clear_bit((nr),(unsigned long*)addr)
-#define ext2_test_bit(nr, addr)      test_bit((nr),(unsigned long*)addr)
-#define ext2_find_first_zero_bit(addr, size) \
-	find_first_zero_bit((unsigned long*)addr, size)
-#define ext2_find_next_zero_bit(addr, size, off) \
-	find_next_zero_bit((unsigned long*)addr, size, off)
-
-/* Bitmap functions for the minix filesystem.  */
-#define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,(void*)addr)
-#define minix_set_bit(nr,addr) __set_bit(nr,(void*)addr)
-#define minix_test_and_clear_bit(nr,addr) __test_and_clear_bit(nr,(void*)addr)
-#define minix_test_bit(nr,addr) test_bit(nr,(void*)addr)
-#define minix_find_first_zero_bit(addr,size) \
-	find_first_zero_bit((void*)addr,size)
-
-#endif /* __KERNEL__ */
-
-#endif /* _I386_BITOPS_H */
diff --git a/linux-2.6-xen-sparse/include/asm-i386/futex.h b/linux-2.6-xen-sparse/include/asm-i386/futex.h
deleted file mode 100644
index 24c9fd31da..0000000000
--- a/linux-2.6-xen-sparse/include/asm-i386/futex.h
+++ /dev/null
@@ -1,108 +0,0 @@
-#ifndef _ASM_FUTEX_H
-#define _ASM_FUTEX_H
-
-#ifdef __KERNEL__
-
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/system.h>
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-
-#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
-  __asm__ __volatile (						\
-"1:	" insn "\n"						\
-"2:	.section .fixup,\"ax\"\n\
-3:	mov	%3, %1\n\
-	jmp	2b\n\
-	.previous\n\
-	.section __ex_table,\"a\"\n\
-	.align	8\n\
-	.long	1b,3b\n\
-	.previous"						\
-	: "=r" (oldval), "=r" (ret), "=m" (*uaddr)		\
-	: "i" (-EFAULT), "m" (*uaddr), "0" (oparg), "1" (0))
-
-#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
-  __asm__ __volatile (						\
-"1:	movl	%2, %0\n\
-	movl	%0, %3\n"					\
-	insn "\n"						\
-"2:	" LOCK "cmpxchgl %3, %2\n\
-	jnz	1b\n\
-3:	.section .fixup,\"ax\"\n\
-4:	mov	%5, %1\n\
-	jmp	3b\n\
-	.previous\n\
-	.section __ex_table,\"a\"\n\
-	.align	8\n\
-	.long	1b,4b,2b,4b\n\
-	.previous"						\
-	: "=&a" (oldval), "=&r" (ret), "=m" (*uaddr),		\
-	  "=&r" (tem)						\
-	: "r" (oparg), "i" (-EFAULT), "m" (*uaddr), "1" (0))
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-		return -EFAULT;
-
-	inc_preempt_count();
-
-	if (op == FUTEX_OP_SET)
-		__futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
-	else {
-#ifndef CONFIG_X86_BSWAP
-		if (boot_cpu_data.x86 == 3)
-			ret = -ENOSYS;
-		else
-#endif
-		switch (op) {
-		case FUTEX_OP_ADD:
-			__futex_atomic_op1(LOCK "xaddl %0, %2", ret,
-					   oldval, uaddr, oparg);
-			break;
-		case FUTEX_OP_OR:
-			__futex_atomic_op2("orl %4, %3", ret, oldval, uaddr,
-					   oparg);
-			break;
-		case FUTEX_OP_ANDN:
-			__futex_atomic_op2("andl %4, %3", ret, oldval, uaddr,
-					   ~oparg);
-			break;
-		case FUTEX_OP_XOR:
-			__futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr,
-					   oparg);
-			break;
-		default:
-			ret = -ENOSYS;
-		}
-	}
-
-	dec_preempt_count();
-
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-		default: ret = -ENOSYS;
-		}
-	}
-	return ret;
-}
-
-#endif
-#endif
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/io.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/io.h
index ed5203a573..6d47fdcd3f 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/io.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/io.h
@@ -233,23 +233,11 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int
  */
 #define __ISA_IO_base ((char __iomem *)(fix_to_virt(FIX_ISAMAP_BEGIN)))
 
-#define isa_readb(a) readb(__ISA_IO_base + (a))
-#define isa_readw(a) readw(__ISA_IO_base + (a))
-#define isa_readl(a) readl(__ISA_IO_base + (a))
-#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a))
-#define isa_writew(w,a) writew(w,__ISA_IO_base + (a))
-#define isa_writel(l,a) writel(l,__ISA_IO_base + (a))
-#define isa_memset_io(a,b,c)		memset_io(__ISA_IO_base + (a),(b),(c))
-#define isa_memcpy_fromio(a,b,c)	memcpy_fromio((a),__ISA_IO_base + (b),(c))
-#define isa_memcpy_toio(a,b,c)		memcpy_toio(__ISA_IO_base + (a),(b),(c))
-
-
 /*
  * Again, i386 does not require mem IO specific function.
  */
 
 #define eth_io_copy_and_sum(a,b,c,d)		eth_copy_and_sum((a),(void __force *)(b),(c),(d))
-#define isa_eth_io_copy_and_sum(a,b,c,d)	eth_copy_and_sum((a),(void __force *)(__ISA_IO_base + (b)),(c),(d))
 
 /**
  *	check_signature		-	find BIOS signatures
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h
index ff183db39e..b2c8eb6a1c 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h
@@ -205,8 +205,6 @@ extern int page_is_ram(unsigned long pagenr);
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
 #define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
 #ifdef CONFIG_FLATMEM
-#define pfn_to_page(pfn)	(mem_map + (pfn))
-#define page_to_pfn(page)	((unsigned long)((page) - mem_map))
 #define pfn_valid(pfn)		((pfn) < max_mapnr)
 #endif /* CONFIG_FLATMEM */
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
@@ -222,6 +220,7 @@ extern int page_is_ram(unsigned long pagenr);
 
 #endif /* __KERNEL__ */
 
+#include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
 #endif /* _I386_PAGE_H */
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h
index 833dbeed61..34cb67f06f 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h
@@ -82,4 +82,6 @@ static inline int pte_exec_kernel(pte_t pte)
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { (pte).pte_low })
 #define __swp_entry_to_pte(x)		((pte_t) { (x).val })
 
+void vmalloc_sync_all(void);
+
 #endif /* _I386_PGTABLE_2LEVEL_H */
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h
index 07f3e1f230..7c3c36b286 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h
@@ -180,4 +180,6 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
 
 #define __pmd_free_tlb(tlb, x)		do { } while (0)
 
+#define vmalloc_sync_all() ((void)0)
+
 #endif /* _I386_PGTABLE_3LEVEL_H */
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h
index 6bed6669c5..b4a967e7e9 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h
@@ -220,13 +220,12 @@ extern unsigned long pg0[];
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
  */
-#define __LARGE_PTE (_PAGE_PSE | _PAGE_PRESENT)
 static inline int pte_user(pte_t pte)		{ return (pte).pte_low & _PAGE_USER; }
 static inline int pte_read(pte_t pte)		{ return (pte).pte_low & _PAGE_USER; }
 static inline int pte_dirty(pte_t pte)		{ return (pte).pte_low & _PAGE_DIRTY; }
 static inline int pte_young(pte_t pte)		{ return (pte).pte_low & _PAGE_ACCESSED; }
 static inline int pte_write(pte_t pte)		{ return (pte).pte_low & _PAGE_RW; }
-static inline int pte_huge(pte_t pte)		{ return ((pte).pte_low & __LARGE_PTE) == __LARGE_PTE; }
+static inline int pte_huge(pte_t pte)		{ return (pte).pte_low & _PAGE_PSE; }
 
 /*
  * The following only works if pte_present() is not true.
@@ -243,7 +242,7 @@ static inline pte_t pte_mkexec(pte_t pte)	{ (pte).pte_low |= _PAGE_USER; return
 static inline pte_t pte_mkdirty(pte_t pte)	{ (pte).pte_low |= _PAGE_DIRTY; return pte; }
 static inline pte_t pte_mkyoung(pte_t pte)	{ (pte).pte_low |= _PAGE_ACCESSED; return pte; }
 static inline pte_t pte_mkwrite(pte_t pte)	{ (pte).pte_low |= _PAGE_RW; return pte; }
-static inline pte_t pte_mkhuge(pte_t pte)	{ (pte).pte_low |= __LARGE_PTE; return pte; }
+static inline pte_t pte_mkhuge(pte_t pte)	{ (pte).pte_low |= _PAGE_PSE; return pte; }
 
 #ifdef CONFIG_X86_PAE
 # include <asm/pgtable-3level.h>
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h
index fabab38fa2..b11e3eddf1 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h
@@ -20,6 +20,7 @@
 #include <linux/config.h>
 #include <linux/threads.h>
 #include <asm/percpu.h>
+#include <linux/cpumask.h>
 #include <xen/interface/physdev.h>
 
 /* flag for disabling the tsc */
@@ -68,6 +69,9 @@ struct cpuinfo_x86 {
 	char	pad0;
 	int	x86_power;
 	unsigned long loops_per_jiffy;
+#ifdef CONFIG_SMP
+	cpumask_t llc_shared_map;	/* cpus sharing the last level cache */
+#endif
 	unsigned char x86_max_cores;	/* cpuid returned max cores value */
 	unsigned char booted_cores;	/* number of cores as seen by OS */
 	unsigned char apicid;
@@ -106,6 +110,7 @@ extern struct cpuinfo_x86 cpu_data[];
 
 extern	int phys_proc_id[NR_CPUS];
 extern	int cpu_core_id[NR_CPUS];
+extern	int cpu_llc_id[NR_CPUS];
 extern char ignore_fpu_irq;
 
 extern void identify_cpu(struct cpuinfo_x86 *);
@@ -623,8 +628,6 @@ struct extended_sigtable {
 	unsigned int reserved[3];
 	struct extended_signature sigs[0];
 };
-/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */
-#define MICROCODE_IOCFREE	_IO('6',0)
 
 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
 static inline void rep_nop(void)
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/setup.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/setup.h
index 3918da5942..de9577a5f2 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/setup.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/setup.h
@@ -6,9 +6,7 @@
 #ifndef _i386_SETUP_H
 #define _i386_SETUP_H
 
-#define PFN_UP(x)	(((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-#define PFN_DOWN(x)	((x) >> PAGE_SHIFT)
-#define PFN_PHYS(x)	((unsigned long long)(x) << PAGE_SHIFT)
+#include <linux/pfn.h>
 
 /*
  * Reserved space for vmalloc and iomap - defined in asm/page.h
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/smp.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/smp.h
index b29479ca9b..4d3291941a 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/smp.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/smp.h
@@ -91,6 +91,7 @@ static __inline int logical_smp_processor_id(void)
 
 extern int __cpu_disable(void);
 extern void __cpu_die(unsigned int cpu);
+extern void prefill_possible_map(void);
 #endif /* !__ASSEMBLY__ */
 
 #else /* CONFIG_SMP */
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/spinlock.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/spinlock.h
index e81ce2c18b..20742a6f5a 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/spinlock.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/spinlock.h
@@ -6,7 +6,6 @@
 #include <asm/page.h>
 #include <linux/config.h>
 #include <linux/compiler.h>
-#include <asm/smp_alt.h>
 
 /*
  * Your basic SMP spinlocks, allowing only a single CPU anywhere
@@ -23,9 +22,8 @@
 		(*(volatile signed char *)(&(x)->slock) <= 0)
 
 #define __raw_spin_lock_string \
-	"\n1:\n" \
-	LOCK \
-	"decb %0\n\t" \
+	"\n1:\t" \
+	"lock ; decb %0\n\t" \
 	"jns 3f\n" \
 	"2:\t" \
 	"rep;nop\n\t" \
@@ -35,67 +33,52 @@
 	"3:\n\t"
 
 #define __raw_spin_lock_string_flags \
-	"\n1:\n" \
-	LOCK \
-	"decb %0\n\t" \
-	"jns 4f\n\t" \
+	"\n1:\t" \
+	"lock ; decb %0\n\t" \
+	"jns 5f\n" \
 	"2:\t" \
 	"testl $0x200, %1\n\t" \
-	"jz 3f\n\t" \
-	"#sti\n\t" \
+	"jz 4f\n\t" \
+	"#sti\n" \
 	"3:\t" \
 	"rep;nop\n\t" \
 	"cmpb $0, %0\n\t" \
 	"jle 3b\n\t" \
 	"#cli\n\t" \
 	"jmp 1b\n" \
-	"4:\n\t"
+	"4:\t" \
+	"rep;nop\n\t" \
+	"cmpb $0, %0\n\t" \
+	"jg 1b\n\t" \
+	"jmp 4b\n" \
+	"5:\n\t"
+
+#define __raw_spin_lock_string_up \
+	"\n\tdecb %0"
 
 static inline void __raw_spin_lock(raw_spinlock_t *lock)
 {
-	__asm__ __volatile__(
-		__raw_spin_lock_string
-		:"=m" (lock->slock) : : "memory");
+	alternative_smp(
+		__raw_spin_lock_string,
+		__raw_spin_lock_string_up,
+		"=m" (lock->slock) : : "memory");
 }
 
 static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
 {
-	__asm__ __volatile__(
-		__raw_spin_lock_string_flags
-		:"=m" (lock->slock) : "r" (flags) : "memory");
+	alternative_smp(
+		__raw_spin_lock_string_flags,
+		__raw_spin_lock_string_up,
+		"=m" (lock->slock) : "r" (flags) : "memory");
 }
 
 static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 {
 	char oldval;
-#ifdef CONFIG_SMP_ALTERNATIVES
-	__asm__ __volatile__(
-		"1:movb %1,%b0\n"
-		"movb $0,%1\n"
-		"2:"
-		".section __smp_alternatives,\"a\"\n"
-		".long 1b\n"
-		".long 3f\n"
-		".previous\n"
-		".section __smp_replacements,\"a\"\n"
-		"3: .byte 2b - 1b\n"
-		".byte 5f-4f\n"
-		".byte 0\n"
-		".byte 6f-5f\n"
-		".byte -1\n"
-		"4: xchgb %b0,%1\n"
-		"5: movb %1,%b0\n"
-		"movb $0,%1\n"
-		"6:\n"
-		".previous\n"
-		:"=q" (oldval), "=m" (lock->slock)
-		:"0" (0) : "memory");
-#else
 	__asm__ __volatile__(
 		"xchgb %b0,%1"
 		:"=q" (oldval), "=m" (lock->slock)
 		:"0" (0) : "memory");
-#endif
 	return oldval > 0;
 }
 
@@ -205,12 +188,12 @@ static inline int __raw_write_trylock(raw_rwlock_t *lock)
 
 static inline void __raw_read_unlock(raw_rwlock_t *rw)
 {
-	asm volatile(LOCK "incl %0" :"=m" (rw->lock) : : "memory");
+	asm volatile(LOCK_PREFIX "incl %0" :"=m" (rw->lock) : : "memory");
 }
 
 static inline void __raw_write_unlock(raw_rwlock_t *rw)
 {
-	asm volatile(LOCK "addl $" RW_LOCK_BIAS_STR ", %0"
+	asm volatile(LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ", %0"
 				 : "=m" (rw->lock) : : "memory");
 }
 
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h
index 3b7ce63148..812e7bce49 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h
@@ -3,12 +3,11 @@
 
 #include <linux/config.h>
 #include <linux/kernel.h>
-#include <linux/bitops.h>
-#include <asm/synch_bitops.h>
 #include <asm/segment.h>
 #include <asm/cpufeature.h>
+#include <linux/bitops.h> /* for LOCK_PREFIX */
+#include <asm/synch_bitops.h>
 #include <asm/hypervisor.h>
-#include <asm/smp_alt.h>
 
 #ifdef __KERNEL__
 
@@ -279,19 +278,19 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 	unsigned long prev;
 	switch (size) {
 	case 1:
-		__asm__ __volatile__(LOCK "cmpxchgb %b1,%2"
+		__asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
 				     : "=a"(prev)
 				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
 				     : "memory");
 		return prev;
 	case 2:
-		__asm__ __volatile__(LOCK "cmpxchgw %w1,%2"
+		__asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
 				     : "=a"(prev)
 				     : "r"(new), "m"(*__xg(ptr)), "0"(old)
 				     : "memory");
 		return prev;
 	case 4:
-		__asm__ __volatile__(LOCK "cmpxchgl %1,%2"
+		__asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
 				     : "=a"(prev)
 				     : "r"(new), "m"(*__xg(ptr)), "0"(old)
 				     : "memory");
@@ -344,7 +343,7 @@ static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long l
 				      unsigned long long new)
 {
 	unsigned long long prev;
-	__asm__ __volatile__(LOCK "cmpxchg8b %3"
+	__asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3"
 			     : "=A"(prev)
 			     : "b"((unsigned long)new),
 			       "c"((unsigned long)(new >> 32)),
@@ -360,67 +359,6 @@ static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long l
 
 #endif
     
-#ifdef __KERNEL__
-struct alt_instr { 
-	__u8 *instr; 		/* original instruction */
-	__u8 *replacement;
-	__u8  cpuid;		/* cpuid bit set for replacement */
-	__u8  instrlen;		/* length of original instruction */
-	__u8  replacementlen; 	/* length of new instruction, <= instrlen */ 
-	__u8  pad;
-}; 
-#endif
-
-/* 
- * Alternative instructions for different CPU types or capabilities.
- * 
- * This allows to use optimized instructions even on generic binary
- * kernels.
- * 
- * length of oldinstr must be longer or equal the length of newinstr
- * It can be padded with nops as needed.
- * 
- * For non barrier like inlines please define new variants
- * without volatile and memory clobber.
- */
-#define alternative(oldinstr, newinstr, feature) 	\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n" 		     \
-		      ".section .altinstructions,\"a\"\n"     	     \
-		      "  .align 4\n"				       \
-		      "  .long 661b\n"            /* label */          \
-		      "  .long 663f\n"		  /* new instruction */ 	\
-		      "  .byte %c0\n"             /* feature bit */    \
-		      "  .byte 662b-661b\n"       /* sourcelen */      \
-		      "  .byte 664f-663f\n"       /* replacementlen */ \
-		      ".previous\n"						\
-		      ".section .altinstr_replacement,\"ax\"\n"			\
-		      "663:\n\t" newinstr "\n664:\n"   /* replacement */    \
-		      ".previous" :: "i" (feature) : "memory")  
-
-/*
- * Alternative inline assembly with input.
- * 
- * Pecularities:
- * No memory clobber here. 
- * Argument numbers start with 1.
- * Best is to use constraints that are fixed size (like (%1) ... "r")
- * If you use variable sized constraints like "m" or "g" in the 
- * replacement maake sure to pad to the worst case length.
- */
-#define alternative_input(oldinstr, newinstr, feature, input...)		\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n"				\
-		      ".section .altinstructions,\"a\"\n"			\
-		      "  .align 4\n"						\
-		      "  .long 661b\n"            /* label */			\
-		      "  .long 663f\n"		  /* new instruction */ 	\
-		      "  .byte %c0\n"             /* feature bit */		\
-		      "  .byte 662b-661b\n"       /* sourcelen */		\
-		      "  .byte 664f-663f\n"       /* replacementlen */ 		\
-		      ".previous\n"						\
-		      ".section .altinstr_replacement,\"ax\"\n"			\
-		      "663:\n\t" newinstr "\n664:\n"   /* replacement */ 	\
-		      ".previous" :: "i" (feature), ##input)
-
 /*
  * Force strict CPU ordering.
  * And yes, this is required on UP too when we're talking
@@ -511,55 +449,11 @@ struct alt_instr {
 #endif
 
 #ifdef CONFIG_SMP
-#define smp_wmb()	wmb()
-#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE)
-#define smp_alt_mb(instr)                                           \
-__asm__ __volatile__("6667:\nnop\nnop\nnop\nnop\nnop\nnop\n6668:\n" \
-		     ".section __smp_alternatives,\"a\"\n"          \
-		     ".long 6667b\n"                                \
-                     ".long 6673f\n"                                \
-		     ".previous\n"                                  \
-		     ".section __smp_replacements,\"a\"\n"          \
-		     "6673:.byte 6668b-6667b\n"                     \
-		     ".byte 6670f-6669f\n"                          \
-		     ".byte 6671f-6670f\n"                          \
-                     ".byte 0\n"                                    \
-		     ".byte %c0\n"                                  \
-		     "6669:lock;addl $0,0(%%esp)\n"                 \
-		     "6670:" instr "\n"                             \
-		     "6671:\n"                                      \
-		     ".previous\n"                                  \
-		     :                                              \
-		     : "i" (X86_FEATURE_XMM2)                       \
-		     : "memory")
-#define smp_rmb() smp_alt_mb("lfence")
-#define smp_mb()  smp_alt_mb("mfence")
-#define set_mb(var, value) do {                                     \
-unsigned long __set_mb_temp;                                        \
-__asm__ __volatile__("6667:movl %1, %0\n6668:\n"                    \
-		     ".section __smp_alternatives,\"a\"\n"          \
-		     ".long 6667b\n"                                \
-		     ".long 6673f\n"                                \
-		     ".previous\n"                                  \
-		     ".section __smp_replacements,\"a\"\n"          \
-		     "6673: .byte 6668b-6667b\n"                    \
-		     ".byte 6670f-6669f\n"                          \
-		     ".byte 0\n"                                    \
-		     ".byte 6671f-6670f\n"                          \
-		     ".byte -1\n"                                   \
-		     "6669: xchg %1, %0\n"                          \
-		     "6670:movl %1, %0\n"                           \
-		     "6671:\n"                                      \
-		     ".previous\n"                                  \
-		     : "=m" (var), "=r" (__set_mb_temp)             \
-		     : "1" (value)                                  \
-		     : "memory"); } while (0)
-#else
-#define smp_rmb()	rmb()
 #define smp_mb()	mb()
-#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
-#endif
+#define smp_rmb()	rmb()
+#define smp_wmb()	wmb()
 #define smp_read_barrier_depends()	read_barrier_depends()
+#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
 #else
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
@@ -677,5 +571,8 @@ static inline void sched_cacheflush(void)
 }
 
 extern unsigned long arch_align_stack(unsigned long sp);
+extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
+
+void default_idle(void);
 
 #endif
diff --git a/linux-2.6-xen-sparse/include/asm-i386/page.h b/linux-2.6-xen-sparse/include/asm-i386/page.h
index 462b602216..70998454d6 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/page.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/page.h
@@ -126,8 +126,6 @@ extern int page_is_ram(unsigned long pagenr);
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
 #define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
 #ifdef CONFIG_FLATMEM
-#define pfn_to_page(pfn)	(mem_map + (pfn))
-#define page_to_pfn(page)	((unsigned long)((page) - mem_map))
 #define pfn_valid(pfn)		((pfn) < max_mapnr)
 #endif /* CONFIG_FLATMEM */
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
@@ -143,6 +141,7 @@ extern int page_is_ram(unsigned long pagenr);
 
 #endif /* __KERNEL__ */
 
+#include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
 #endif /* _I386_PAGE_H */
diff --git a/linux-2.6-xen-sparse/include/asm-i386/rwsem.h b/linux-2.6-xen-sparse/include/asm-i386/rwsem.h
deleted file mode 100644
index 75751b7fce..0000000000
--- a/linux-2.6-xen-sparse/include/asm-i386/rwsem.h
+++ /dev/null
@@ -1,294 +0,0 @@
-/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for i486+
- *
- * Written by David Howells (dhowells@redhat.com).
- *
- * Derived from asm-i386/semaphore.h
- *
- *
- * The MSW of the count is the negated number of active writers and waiting
- * lockers, and the LSW is the total number of active locks
- *
- * The lock count is initialized to 0 (no active and no waiting lockers).
- *
- * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an
- * uncontended lock. This can be determined because XADD returns the old value.
- * Readers increment by 1 and see a positive value when uncontended, negative
- * if there are writers (and maybe) readers waiting (in which case it goes to
- * sleep).
- *
- * The value of WAITING_BIAS supports up to 32766 waiting processes. This can
- * be extended to 65534 by manually checking the whole MSW rather than relying
- * on the S flag.
- *
- * The value of ACTIVE_BIAS supports up to 65535 active processes.
- *
- * This should be totally fair - if anything is waiting, a process that wants a
- * lock will go to the back of the queue. When the currently active lock is
- * released, if there's a writer at the front of the queue, then that and only
- * that will be woken up; if there's a bunch of consequtive readers at the
- * front, then they'll all be woken up, but no other readers will be.
- */
-
-#ifndef _I386_RWSEM_H
-#define _I386_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <asm/smp_alt.h>
-
-struct rwsem_waiter;
-
-extern struct rw_semaphore *FASTCALL(rwsem_down_read_failed(struct rw_semaphore *sem));
-extern struct rw_semaphore *FASTCALL(rwsem_down_write_failed(struct rw_semaphore *sem));
-extern struct rw_semaphore *FASTCALL(rwsem_wake(struct rw_semaphore *));
-extern struct rw_semaphore *FASTCALL(rwsem_downgrade_wake(struct rw_semaphore *sem));
-
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
-	signed long		count;
-#define RWSEM_UNLOCKED_VALUE		0x00000000
-#define RWSEM_ACTIVE_BIAS		0x00000001
-#define RWSEM_ACTIVE_MASK		0x0000ffff
-#define RWSEM_WAITING_BIAS		(-0x00010000)
-#define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-	spinlock_t		wait_lock;
-	struct list_head	wait_list;
-#if RWSEM_DEBUG
-	int			debug;
-#endif
-};
-
-/*
- * initialisation
- */
-#if RWSEM_DEBUG
-#define __RWSEM_DEBUG_INIT      , 0
-#else
-#define __RWSEM_DEBUG_INIT	/* */
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
-{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \
-	__RWSEM_DEBUG_INIT }
-
-#define DECLARE_RWSEM(name) \
-	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
-	sem->count = RWSEM_UNLOCKED_VALUE;
-	spin_lock_init(&sem->wait_lock);
-	INIT_LIST_HEAD(&sem->wait_list);
-#if RWSEM_DEBUG
-	sem->debug = 0;
-#endif
-}
-
-/*
- * lock for reading
- */
-static inline void __down_read(struct rw_semaphore *sem)
-{
-	__asm__ __volatile__(
-		"# beginning down_read\n\t"
-LOCK	        "  incl      (%%eax)\n\t" /* adds 0x00000001, returns the old value */
-		"  js        2f\n\t" /* jump if we weren't granted the lock */
-		"1:\n\t"
-		LOCK_SECTION_START("")
-		"2:\n\t"
-		"  pushl     %%ecx\n\t"
-		"  pushl     %%edx\n\t"
-		"  call      rwsem_down_read_failed\n\t"
-		"  popl      %%edx\n\t"
-		"  popl      %%ecx\n\t"
-		"  jmp       1b\n"
-		LOCK_SECTION_END
-		"# ending down_read\n\t"
-		: "=m"(sem->count)
-		: "a"(sem), "m"(sem->count)
-		: "memory", "cc");
-}
-
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
-	__s32 result, tmp;
-	__asm__ __volatile__(
-		"# beginning __down_read_trylock\n\t"
-		"  movl      %0,%1\n\t"
-		"1:\n\t"
-		"  movl	     %1,%2\n\t"
-		"  addl      %3,%2\n\t"
-		"  jle	     2f\n\t"
-LOCK	        "  cmpxchgl  %2,%0\n\t"
-		"  jnz	     1b\n\t"
-		"2:\n\t"
-		"# ending __down_read_trylock\n\t"
-		: "+m"(sem->count), "=&a"(result), "=&r"(tmp)
-		: "i"(RWSEM_ACTIVE_READ_BIAS)
-		: "memory", "cc");
-	return result>=0 ? 1 : 0;
-}
-
-/*
- * lock for writing
- */
-static inline void __down_write(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	tmp = RWSEM_ACTIVE_WRITE_BIAS;
-	__asm__ __volatile__(
-		"# beginning down_write\n\t"
-LOCK	        "  xadd      %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */
-		"  testl     %%edx,%%edx\n\t" /* was the count 0 before? */
-		"  jnz       2f\n\t" /* jump if we weren't granted the lock */
-		"1:\n\t"
-		LOCK_SECTION_START("")
-		"2:\n\t"
-		"  pushl     %%ecx\n\t"
-		"  call      rwsem_down_write_failed\n\t"
-		"  popl      %%ecx\n\t"
-		"  jmp       1b\n"
-		LOCK_SECTION_END
-		"# ending down_write"
-		: "=m"(sem->count), "=d"(tmp)
-		: "a"(sem), "1"(tmp), "m"(sem->count)
-		: "memory", "cc");
-}
-
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
-	signed long ret = cmpxchg(&sem->count,
-				  RWSEM_UNLOCKED_VALUE, 
-				  RWSEM_ACTIVE_WRITE_BIAS);
-	if (ret == RWSEM_UNLOCKED_VALUE)
-		return 1;
-	return 0;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
-	__s32 tmp = -RWSEM_ACTIVE_READ_BIAS;
-	__asm__ __volatile__(
-		"# beginning __up_read\n\t"
-LOCK	        "  xadd      %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */
-		"  js        2f\n\t" /* jump if the lock is being waited upon */
-		"1:\n\t"
-		LOCK_SECTION_START("")
-		"2:\n\t"
-		"  decw      %%dx\n\t" /* do nothing if still outstanding active readers */
-		"  jnz       1b\n\t"
-		"  pushl     %%ecx\n\t"
-		"  call      rwsem_wake\n\t"
-		"  popl      %%ecx\n\t"
-		"  jmp       1b\n"
-		LOCK_SECTION_END
-		"# ending __up_read\n"
-		: "=m"(sem->count), "=d"(tmp)
-		: "a"(sem), "1"(tmp), "m"(sem->count)
-		: "memory", "cc");
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
-	__asm__ __volatile__(
-		"# beginning __up_write\n\t"
-		"  movl      %2,%%edx\n\t"
-LOCK	        "  xaddl     %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */
-		"  jnz       2f\n\t" /* jump if the lock is being waited upon */
-		"1:\n\t"
-		LOCK_SECTION_START("")
-		"2:\n\t"
-		"  decw      %%dx\n\t" /* did the active count reduce to 0? */
-		"  jnz       1b\n\t" /* jump back if not */
-		"  pushl     %%ecx\n\t"
-		"  call      rwsem_wake\n\t"
-		"  popl      %%ecx\n\t"
-		"  jmp       1b\n"
-		LOCK_SECTION_END
-		"# ending __up_write\n"
-		: "=m"(sem->count)
-		: "a"(sem), "i"(-RWSEM_ACTIVE_WRITE_BIAS), "m"(sem->count)
-		: "memory", "cc", "edx");
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
-	__asm__ __volatile__(
-		"# beginning __downgrade_write\n\t"
-LOCK	        "  addl      %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */
-		"  js        2f\n\t" /* jump if the lock is being waited upon */
-		"1:\n\t"
-		LOCK_SECTION_START("")
-		"2:\n\t"
-		"  pushl     %%ecx\n\t"
-		"  pushl     %%edx\n\t"
-		"  call      rwsem_downgrade_wake\n\t"
-		"  popl      %%edx\n\t"
-		"  popl      %%ecx\n\t"
-		"  jmp       1b\n"
-		LOCK_SECTION_END
-		"# ending __downgrade_write\n"
-		: "=m"(sem->count)
-		: "a"(sem), "i"(-RWSEM_WAITING_BIAS), "m"(sem->count)
-		: "memory", "cc");
-}
-
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
-{
-	__asm__ __volatile__(
-LOCK	          "addl %1,%0"
-		: "=m"(sem->count)
-		: "ir"(delta), "m"(sem->count));
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
-{
-	int tmp = delta;
-
-	__asm__ __volatile__(
-LOCK  	          "xadd %0,(%2)"
-		: "+r"(tmp), "=m"(sem->count)
-		: "r"(sem), "m"(sem->count)
-		: "memory");
-
-	return tmp+delta;
-}
-
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
-	return (sem->count != 0);
-}
-
-#endif /* __KERNEL__ */
-#endif /* _I386_RWSEM_H */
diff --git a/linux-2.6-xen-sparse/include/asm-i386/smp_alt.h b/linux-2.6-xen-sparse/include/asm-i386/smp_alt.h
deleted file mode 100644
index 67307c3d33..0000000000
--- a/linux-2.6-xen-sparse/include/asm-i386/smp_alt.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef __ASM_SMP_ALT_H__
-#define __ASM_SMP_ALT_H__
-
-#include <linux/config.h>
-
-#ifdef CONFIG_SMP
-#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE)
-#define LOCK \
-        "6677: nop\n" \
-	".section __smp_alternatives,\"a\"\n" \
-	".long 6677b\n" \
-	".long 6678f\n" \
-	".previous\n" \
-	".section __smp_replacements,\"a\"\n" \
-	"6678: .byte 1\n" \
-	".byte 1\n" \
-	".byte 0\n" \
-        ".byte 1\n" \
-	".byte -1\n" \
-	"lock\n" \
-	"nop\n" \
-	".previous\n"
-void prepare_for_smp(void);
-void unprepare_for_smp(void);
-#else
-#define LOCK "lock ; "
-#endif
-#else
-#define LOCK ""
-#endif
-
-#endif /* __ASM_SMP_ALT_H__ */
diff --git a/linux-2.6-xen-sparse/include/asm-i386/spinlock.h b/linux-2.6-xen-sparse/include/asm-i386/spinlock.h
deleted file mode 100644
index 65a211156e..0000000000
--- a/linux-2.6-xen-sparse/include/asm-i386/spinlock.h
+++ /dev/null
@@ -1,217 +0,0 @@
-#ifndef __ASM_SPINLOCK_H
-#define __ASM_SPINLOCK_H
-
-#include <asm/atomic.h>
-#include <asm/rwlock.h>
-#include <asm/page.h>
-#include <linux/config.h>
-#include <linux/compiler.h>
-#include <asm/smp_alt.h>
-
-/*
- * Your basic SMP spinlocks, allowing only a single CPU anywhere
- *
- * Simple spin lock operations.  There are two variants, one clears IRQ's
- * on the local processor, one does not.
- *
- * We make no fairness assumptions. They have a cost.
- *
- * (the type definitions are in asm/spinlock_types.h)
- */
-
-#define __raw_spin_is_locked(x) \
-		(*(volatile signed char *)(&(x)->slock) <= 0)
-
-#define __raw_spin_lock_string \
-	"\n1:\t" \
-	LOCK \
-	"decb %0\n\t" \
-	"jns 3f\n" \
-	"2:\t" \
-	"rep;nop\n\t" \
-	"cmpb $0,%0\n\t" \
-	"jle 2b\n\t" \
-	"jmp 1b\n" \
-	"3:\n\t"
-
-#define __raw_spin_lock_string_flags \
-	"\n1:\t" \
-	LOCK \
-	"decb %0\n\t" \
-	"jns 4f\n\t" \
-	"2:\t" \
-	"testl $0x200, %1\n\t" \
-	"jz 3f\n\t" \
-	"sti\n\t" \
-	"3:\t" \
-	"rep;nop\n\t" \
-	"cmpb $0, %0\n\t" \
-	"jle 3b\n\t" \
-	"cli\n\t" \
-	"jmp 1b\n" \
-	"4:\n\t"
-
-static inline void __raw_spin_lock(raw_spinlock_t *lock)
-{
-	__asm__ __volatile__(
-		__raw_spin_lock_string
-		:"=m" (lock->slock) : : "memory");
-}
-
-static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
-{
-	__asm__ __volatile__(
-		__raw_spin_lock_string_flags
-		:"=m" (lock->slock) : "r" (flags) : "memory");
-}
-
-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
-{
-	char oldval;
-#ifdef CONFIG_SMP_ALTERNATIVES
-	__asm__ __volatile__(
-		"1:movb %1,%b0\n"
-		"movb $0,%1\n"
-		"2:"
-		".section __smp_alternatives,\"a\"\n"
-		".long 1b\n"
-		".long 3f\n"
-		".previous\n"
-		".section __smp_replacements,\"a\"\n"
-		"3: .byte 2b - 1b\n"
-		".byte 5f-4f\n"
-		".byte 0\n"
-		".byte 6f-5f\n"
-		".byte -1\n"
-		"4: xchgb %b0,%1\n"
-		"5: movb %1,%b0\n"
-		"movb $0,%1\n"
-		"6:\n"
-		".previous\n"
-		:"=q" (oldval), "=m" (lock->slock)
-		:"0" (0) : "memory");
-#else
-	__asm__ __volatile__(
-		"xchgb %b0,%1"
-		:"=q" (oldval), "=m" (lock->slock)
-		:"0" (0) : "memory");
-#endif
-	return oldval > 0;
-}
-
-/*
- * __raw_spin_unlock based on writing $1 to the low byte.
- * This method works. Despite all the confusion.
- * (except on PPro SMP or if we are using OOSTORE, so we use xchgb there)
- * (PPro errata 66, 92)
- */
-
-#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
-
-#define __raw_spin_unlock_string \
-	"movb $1,%0" \
-		:"=m" (lock->slock) : : "memory"
-
-
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
-{
-	__asm__ __volatile__(
-		__raw_spin_unlock_string
-	);
-}
-
-#else
-
-#define __raw_spin_unlock_string \
-	"xchgb %b0, %1" \
-		:"=q" (oldval), "=m" (lock->slock) \
-		:"0" (oldval) : "memory"
-
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
-{
-	char oldval = 1;
-
-	__asm__ __volatile__(
-		__raw_spin_unlock_string
-	);
-}
-
-#endif
-
-#define __raw_spin_unlock_wait(lock) \
-	do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0)
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- *
- * On x86, we implement read-write locks as a 32-bit counter
- * with the high bit (sign) being the "contended" bit.
- *
- * The inline assembly is non-obvious. Think about it.
- *
- * Changed to use the same technique as rw semaphores.  See
- * semaphore.h for details.  -ben
- *
- * the helpers are in arch/i386/kernel/semaphore.c
- */
-
-/**
- * read_can_lock - would read_trylock() succeed?
- * @lock: the rwlock in question.
- */
-#define __raw_read_can_lock(x)		((int)(x)->lock > 0)
-
-/**
- * write_can_lock - would write_trylock() succeed?
- * @lock: the rwlock in question.
- */
-#define __raw_write_can_lock(x)		((x)->lock == RW_LOCK_BIAS)
-
-static inline void __raw_read_lock(raw_rwlock_t *rw)
-{
-	__build_read_lock(rw, "__read_lock_failed");
-}
-
-static inline void __raw_write_lock(raw_rwlock_t *rw)
-{
-	__build_write_lock(rw, "__write_lock_failed");
-}
-
-static inline int __raw_read_trylock(raw_rwlock_t *lock)
-{
-	atomic_t *count = (atomic_t *)lock;
-	atomic_dec(count);
-	if (atomic_read(count) >= 0)
-		return 1;
-	atomic_inc(count);
-	return 0;
-}
-
-static inline int __raw_write_trylock(raw_rwlock_t *lock)
-{
-	atomic_t *count = (atomic_t *)lock;
-	if (atomic_sub_and_test(RW_LOCK_BIAS, count))
-		return 1;
-	atomic_add(RW_LOCK_BIAS, count);
-	return 0;
-}
-
-static inline void __raw_read_unlock(raw_rwlock_t *rw)
-{
-	asm volatile(LOCK "incl %0" :"=m" (rw->lock) : : "memory");
-}
-
-static inline void __raw_write_unlock(raw_rwlock_t *rw)
-{
-	asm volatile(LOCK "addl $" RW_LOCK_BIAS_STR ", %0"
-				 : "=m" (rw->lock) : : "memory");
-}
-
-#endif /* __ASM_SPINLOCK_H */
diff --git a/linux-2.6-xen-sparse/include/asm-i386/system.h b/linux-2.6-xen-sparse/include/asm-i386/system.h
deleted file mode 100644
index 0ca2efb762..0000000000
--- a/linux-2.6-xen-sparse/include/asm-i386/system.h
+++ /dev/null
@@ -1,606 +0,0 @@
-#ifndef __ASM_SYSTEM_H
-#define __ASM_SYSTEM_H
-
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <asm/segment.h>
-#include <asm/cpufeature.h>
-#include <asm/smp_alt.h>
-
-#ifdef __KERNEL__
-
-struct task_struct;	/* one of the stranger aspects of C forward declarations.. */
-extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next));
-
-#define switch_to(prev,next,last) do {					\
-	unsigned long esi,edi;						\
-	asm volatile("pushl %%ebp\n\t"					\
-		     "movl %%esp,%0\n\t"	/* save ESP */		\
-		     "movl %5,%%esp\n\t"	/* restore ESP */	\
-		     "movl $1f,%1\n\t"		/* save EIP */		\
-		     "pushl %6\n\t"		/* restore EIP */	\
-		     "jmp __switch_to\n"				\
-		     "1:\t"						\
-		     "popl %%ebp\n\t"					\
-		     :"=m" (prev->thread.esp),"=m" (prev->thread.eip),	\
-		      "=a" (last),"=S" (esi),"=D" (edi)			\
-		     :"m" (next->thread.esp),"m" (next->thread.eip),	\
-		      "2" (prev), "d" (next));				\
-} while (0)
-
-#define _set_base(addr,base) do { unsigned long __pr; \
-__asm__ __volatile__ ("movw %%dx,%1\n\t" \
-	"rorl $16,%%edx\n\t" \
-	"movb %%dl,%2\n\t" \
-	"movb %%dh,%3" \
-	:"=&d" (__pr) \
-	:"m" (*((addr)+2)), \
-	 "m" (*((addr)+4)), \
-	 "m" (*((addr)+7)), \
-         "0" (base) \
-        ); } while(0)
-
-#define _set_limit(addr,limit) do { unsigned long __lr; \
-__asm__ __volatile__ ("movw %%dx,%1\n\t" \
-	"rorl $16,%%edx\n\t" \
-	"movb %2,%%dh\n\t" \
-	"andb $0xf0,%%dh\n\t" \
-	"orb %%dh,%%dl\n\t" \
-	"movb %%dl,%2" \
-	:"=&d" (__lr) \
-	:"m" (*(addr)), \
-	 "m" (*((addr)+6)), \
-	 "0" (limit) \
-        ); } while(0)
-
-#define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) )
-#define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1) )
-
-/*
- * Load a segment. Fall back on loading the zero
- * segment if something goes wrong..
- */
-#define loadsegment(seg,value)			\
-	asm volatile("\n"			\
-		"1:\t"				\
-		"mov %0,%%" #seg "\n"		\
-		"2:\n"				\
-		".section .fixup,\"ax\"\n"	\
-		"3:\t"				\
-		"pushl $0\n\t"			\
-		"popl %%" #seg "\n\t"		\
-		"jmp 2b\n"			\
-		".previous\n"			\
-		".section __ex_table,\"a\"\n\t"	\
-		".align 4\n\t"			\
-		".long 1b,3b\n"			\
-		".previous"			\
-		: :"rm" (value))
-
-/*
- * Save a segment register away
- */
-#define savesegment(seg, value) \
-	asm volatile("mov %%" #seg ",%0":"=rm" (value))
-
-/*
- * Clear and set 'TS' bit respectively
- */
-#define clts() __asm__ __volatile__ ("clts")
-#define read_cr0() ({ \
-	unsigned int __dummy; \
-	__asm__ __volatile__( \
-		"movl %%cr0,%0\n\t" \
-		:"=r" (__dummy)); \
-	__dummy; \
-})
-#define write_cr0(x) \
-	__asm__ __volatile__("movl %0,%%cr0": :"r" (x));
-
-#define read_cr2() ({ \
-	unsigned int __dummy; \
-	__asm__ __volatile__( \
-		"movl %%cr2,%0\n\t" \
-		:"=r" (__dummy)); \
-	__dummy; \
-})
-#define write_cr2(x) \
-	__asm__ __volatile__("movl %0,%%cr2": :"r" (x));
-
-#define read_cr3() ({ \
-	unsigned int __dummy; \
-	__asm__ ( \
-		"movl %%cr3,%0\n\t" \
-		:"=r" (__dummy)); \
-	__dummy; \
-})
-#define write_cr3(x) \
-	__asm__ __volatile__("movl %0,%%cr3": :"r" (x));
-
-#define read_cr4() ({ \
-	unsigned int __dummy; \
-	__asm__( \
-		"movl %%cr4,%0\n\t" \
-		:"=r" (__dummy)); \
-	__dummy; \
-})
-
-#define read_cr4_safe() ({			      \
-	unsigned int __dummy;			      \
-	/* This could fault if %cr4 does not exist */ \
-	__asm__("1: movl %%cr4, %0		\n"   \
-		"2:				\n"   \
-		".section __ex_table,\"a\"	\n"   \
-		".long 1b,2b			\n"   \
-		".previous			\n"   \
-		: "=r" (__dummy): "0" (0));	      \
-	__dummy;				      \
-})
-
-#define write_cr4(x) \
-	__asm__ __volatile__("movl %0,%%cr4": :"r" (x));
-#define stts() write_cr0(8 | read_cr0())
-
-#endif	/* __KERNEL__ */
-
-#define wbinvd() \
-	__asm__ __volatile__ ("wbinvd": : :"memory");
-
-static inline unsigned long get_limit(unsigned long segment)
-{
-	unsigned long __limit;
-	__asm__("lsll %1,%0"
-		:"=r" (__limit):"r" (segment));
-	return __limit+1;
-}
-
-#define nop() __asm__ __volatile__ ("nop")
-
-#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
-
-#define tas(ptr) (xchg((ptr),1))
-
-struct __xchg_dummy { unsigned long a[100]; };
-#define __xg(x) ((struct __xchg_dummy *)(x))
-
-
-#ifdef CONFIG_X86_CMPXCHG64
-
-/*
- * The semantics of XCHGCMP8B are a bit strange, this is why
- * there is a loop and the loading of %%eax and %%edx has to
- * be inside. This inlines well in most cases, the cached
- * cost is around ~38 cycles. (in the future we might want
- * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that
- * might have an implicit FPU-save as a cost, so it's not
- * clear which path to go.)
- *
- * cmpxchg8b must be used with the lock prefix here to allow
- * the instruction to be executed atomically, see page 3-102
- * of the instruction set reference 24319102.pdf. We need
- * the reader side to see the coherent 64bit value.
- */
-static inline void __set_64bit (unsigned long long * ptr,
-		unsigned int low, unsigned int high)
-{
-	__asm__ __volatile__ (
-		"\n1:\t"
-		"movl (%0), %%eax\n\t"
-		"movl 4(%0), %%edx\n\t"
-		"lock cmpxchg8b (%0)\n\t"
-		"jnz 1b"
-		: /* no outputs */
-		:	"D"(ptr),
-			"b"(low),
-			"c"(high)
-		:	"ax","dx","memory");
-}
-
-static inline void __set_64bit_constant (unsigned long long *ptr,
-						 unsigned long long value)
-{
-	__set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL));
-}
-#define ll_low(x)	*(((unsigned int*)&(x))+0)
-#define ll_high(x)	*(((unsigned int*)&(x))+1)
-
-static inline void __set_64bit_var (unsigned long long *ptr,
-			 unsigned long long value)
-{
-	__set_64bit(ptr,ll_low(value), ll_high(value));
-}
-
-#define set_64bit(ptr,value) \
-(__builtin_constant_p(value) ? \
- __set_64bit_constant(ptr, value) : \
- __set_64bit_var(ptr, value) )
-
-#define _set_64bit(ptr,value) \
-(__builtin_constant_p(value) ? \
- __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
- __set_64bit(ptr, ll_low(value), ll_high(value)) )
-
-#endif
-
-/*
- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
- * Note 2: xchg has side effect, so that attribute volatile is necessary,
- *	  but generally the primitive is invalid, *ptr is output argument. --ANK
- */
-static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
-{
-	switch (size) {
-		case 1:
-			__asm__ __volatile__("xchgb %b0,%1"
-				:"=q" (x)
-				:"m" (*__xg(ptr)), "0" (x)
-				:"memory");
-			break;
-		case 2:
-			__asm__ __volatile__("xchgw %w0,%1"
-				:"=r" (x)
-				:"m" (*__xg(ptr)), "0" (x)
-				:"memory");
-			break;
-		case 4:
-			__asm__ __volatile__("xchgl %0,%1"
-				:"=r" (x)
-				:"m" (*__xg(ptr)), "0" (x)
-				:"memory");
-			break;
-	}
-	return x;
-}
-
-/*
- * Atomic compare and exchange.  Compare OLD with MEM, if identical,
- * store NEW in MEM.  Return the initial value in MEM.  Success is
- * indicated by comparing RETURN with OLD.
- */
-
-#ifdef CONFIG_X86_CMPXCHG
-#define __HAVE_ARCH_CMPXCHG 1
-#define cmpxchg(ptr,o,n)\
-	((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
-					(unsigned long)(n),sizeof(*(ptr))))
-#endif
-
-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
-				      unsigned long new, int size)
-{
-	unsigned long prev;
-	switch (size) {
-	case 1:
-		__asm__ __volatile__(LOCK "cmpxchgb %b1,%2"
-				     : "=a"(prev)
-				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
-				     : "memory");
-		return prev;
-	case 2:
-		__asm__ __volatile__(LOCK "cmpxchgw %w1,%2"
-				     : "=a"(prev)
-				     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-				     : "memory");
-		return prev;
-	case 4:
-		__asm__ __volatile__(LOCK "cmpxchgl %1,%2"
-				     : "=a"(prev)
-				     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-				     : "memory");
-		return prev;
-	}
-	return old;
-}
-
-#ifndef CONFIG_X86_CMPXCHG
-/*
- * Building a kernel capable running on 80386. It may be necessary to
- * simulate the cmpxchg on the 80386 CPU. For that purpose we define
- * a function for each of the sizes we support.
- */
-
-extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8);
-extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16);
-extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32);
-
-static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
-				      unsigned long new, int size)
-{
-	switch (size) {
-	case 1:
-		return cmpxchg_386_u8(ptr, old, new);
-	case 2:
-		return cmpxchg_386_u16(ptr, old, new);
-	case 4:
-		return cmpxchg_386_u32(ptr, old, new);
-	}
-	return old;
-}
-
-#define cmpxchg(ptr,o,n)						\
-({									\
-	__typeof__(*(ptr)) __ret;					\
-	if (likely(boot_cpu_data.x86 > 3))				\
-		__ret = __cmpxchg((ptr), (unsigned long)(o),		\
-					(unsigned long)(n), sizeof(*(ptr))); \
-	else								\
-		__ret = cmpxchg_386((ptr), (unsigned long)(o),		\
-					(unsigned long)(n), sizeof(*(ptr))); \
-	__ret;								\
-})
-#endif
-
-#ifdef CONFIG_X86_CMPXCHG64
-
-static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old,
-				      unsigned long long new)
-{
-	unsigned long long prev;
-	__asm__ __volatile__(LOCK "cmpxchg8b %3"
-			     : "=A"(prev)
-			     : "b"((unsigned long)new),
-			       "c"((unsigned long)(new >> 32)),
-			       "m"(*__xg(ptr)),
-			       "0"(old)
-			     : "memory");
-	return prev;
-}
-
-#define cmpxchg64(ptr,o,n)\
-	((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\
-					(unsigned long long)(n)))
-
-#endif
-    
-#ifdef __KERNEL__
-struct alt_instr { 
-	__u8 *instr; 		/* original instruction */
-	__u8 *replacement;
-	__u8  cpuid;		/* cpuid bit set for replacement */
-	__u8  instrlen;		/* length of original instruction */
-	__u8  replacementlen; 	/* length of new instruction, <= instrlen */ 
-	__u8  pad;
-}; 
-#endif
-
-/* 
- * Alternative instructions for different CPU types or capabilities.
- * 
- * This allows to use optimized instructions even on generic binary
- * kernels.
- * 
- * length of oldinstr must be longer or equal the length of newinstr
- * It can be padded with nops as needed.
- * 
- * For non barrier like inlines please define new variants
- * without volatile and memory clobber.
- */
-#define alternative(oldinstr, newinstr, feature) 	\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n" 		     \
-		      ".section .altinstructions,\"a\"\n"     	     \
-		      "  .align 4\n"				       \
-		      "  .long 661b\n"            /* label */          \
-		      "  .long 663f\n"		  /* new instruction */ 	\
-		      "  .byte %c0\n"             /* feature bit */    \
-		      "  .byte 662b-661b\n"       /* sourcelen */      \
-		      "  .byte 664f-663f\n"       /* replacementlen */ \
-		      ".previous\n"						\
-		      ".section .altinstr_replacement,\"ax\"\n"			\
-		      "663:\n\t" newinstr "\n664:\n"   /* replacement */    \
-		      ".previous" :: "i" (feature) : "memory")  
-
-/*
- * Alternative inline assembly with input.
- * 
- * Pecularities:
- * No memory clobber here. 
- * Argument numbers start with 1.
- * Best is to use constraints that are fixed size (like (%1) ... "r")
- * If you use variable sized constraints like "m" or "g" in the 
- * replacement maake sure to pad to the worst case length.
- */
-#define alternative_input(oldinstr, newinstr, feature, input...)		\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n"				\
-		      ".section .altinstructions,\"a\"\n"			\
-		      "  .align 4\n"						\
-		      "  .long 661b\n"            /* label */			\
-		      "  .long 663f\n"		  /* new instruction */ 	\
-		      "  .byte %c0\n"             /* feature bit */		\
-		      "  .byte 662b-661b\n"       /* sourcelen */		\
-		      "  .byte 664f-663f\n"       /* replacementlen */ 		\
-		      ".previous\n"						\
-		      ".section .altinstr_replacement,\"ax\"\n"			\
-		      "663:\n\t" newinstr "\n664:\n"   /* replacement */ 	\
-		      ".previous" :: "i" (feature), ##input)
-
-/*
- * Force strict CPU ordering.
- * And yes, this is required on UP too when we're talking
- * to devices.
- *
- * For now, "wmb()" doesn't actually do anything, as all
- * Intel CPU's follow what Intel calls a *Processor Order*,
- * in which all writes are seen in the program order even
- * outside the CPU.
- *
- * I expect future Intel CPU's to have a weaker ordering,
- * but I'd also expect them to finally get their act together
- * and add some real memory barriers if so.
- *
- * Some non intel clones support out of order store. wmb() ceases to be a
- * nop for these.
- */
- 
-
-/* 
- * Actually only lfence would be needed for mb() because all stores done 
- * by the kernel should be already ordered. But keep a full barrier for now. 
- */
-
-#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
-#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
-
-/**
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier.  All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads.  This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies.  See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- *	CPU 0				CPU 1
- *
- *	b = 2;
- *	memory_barrier();
- *	p = &b;				q = p;
- *					read_barrier_depends();
- *					d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends().  However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- *	CPU 0				CPU 1
- *
- *	a = 2;
- *	memory_barrier();
- *	b = 3;				y = b;
- *					read_barrier_depends();
- *					x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b".  Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
- * in cases like thiswhere there are no data dependencies.
- **/
-
-#define read_barrier_depends()	do { } while(0)
-
-#ifdef CONFIG_X86_OOSTORE
-/* Actually there are no OOO store capable CPUs for now that do SSE, 
-   but make it already an possibility. */
-#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
-#else
-#define wmb()	__asm__ __volatile__ ("": : :"memory")
-#endif
-
-#ifdef CONFIG_SMP
-#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE)
-#define smp_alt_mb(instr)                                           \
-__asm__ __volatile__("6667:\nnop\nnop\nnop\nnop\nnop\nnop\n6668:\n" \
-		     ".section __smp_alternatives,\"a\"\n"          \
-		     ".long 6667b\n"                                \
-                     ".long 6673f\n"                                \
-		     ".previous\n"                                  \
-		     ".section __smp_replacements,\"a\"\n"          \
-		     "6673:.byte 6668b-6667b\n"                     \
-		     ".byte 6670f-6669f\n"                          \
-		     ".byte 6671f-6670f\n"                          \
-                     ".byte 0\n"                                    \
-		     ".byte %c0\n"                                  \
-		     "6669:lock;addl $0,0(%%esp)\n"                 \
-		     "6670:" instr "\n"                             \
-		     "6671:\n"                                      \
-		     ".previous\n"                                  \
-		     :                                              \
-		     : "i" (X86_FEATURE_XMM2)                       \
-		     : "memory")
-#define smp_mb()  smp_alt_mb("mfence")
-#define smp_rmb() smp_alt_mb("lfence")
-#define set_mb(var, value) do {                                     \
-unsigned long __set_mb_temp;                                        \
-__asm__ __volatile__("6667:movl %1, %0\n6668:\n"                    \
-		     ".section __smp_alternatives,\"a\"\n"          \
-		     ".long 6667b\n"                                \
-		     ".long 6673f\n"                                \
-		     ".previous\n"                                  \
-		     ".section __smp_replacements,\"a\"\n"          \
-		     "6673: .byte 6668b-6667b\n"                    \
-		     ".byte 6670f-6669f\n"                          \
-		     ".byte 0\n"                                    \
-		     ".byte 6671f-6670f\n"                          \
-		     ".byte -1\n"                                   \
-		     "6669: xchg %1, %0\n"                          \
-		     "6670:movl %1, %0\n"                           \
-		     "6671:\n"                                      \
-		     ".previous\n"                                  \
-		     : "=m" (var), "=r" (__set_mb_temp)             \
-		     : "1" (value)                                  \
-		     : "memory"); } while (0)
-#else
-#define smp_mb()	mb()
-#define smp_rmb()	rmb()
-#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
-#endif
-#define smp_wmb()	wmb()
-#define smp_read_barrier_depends()	read_barrier_depends()
-#else
-#define smp_mb()	barrier()
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-#define smp_read_barrier_depends()	do { } while(0)
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
-#endif
-
-#define set_wmb(var, value) do { var = value; wmb(); } while (0)
-
-/* interrupt control.. */
-#define local_save_flags(x)	do { typecheck(unsigned long,x); __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */); } while (0)
-#define local_irq_restore(x) 	do { typecheck(unsigned long,x); __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc"); } while (0)
-#define local_irq_disable() 	__asm__ __volatile__("cli": : :"memory")
-#define local_irq_enable()	__asm__ __volatile__("sti": : :"memory")
-/* used in the idle loop; sti takes one instruction cycle to complete */
-#define safe_halt()		__asm__ __volatile__("sti; hlt": : :"memory")
-/* used when interrupts are already enabled or to shutdown the processor */
-#define halt()			__asm__ __volatile__("hlt": : :"memory")
-
-#define irqs_disabled()			\
-({					\
-	unsigned long flags;		\
-	local_save_flags(flags);	\
-	!(flags & (1<<9));		\
-})
-
-/* For spinlocks etc */
-#define local_irq_save(x)	__asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
-
-/*
- * disable hlt during certain critical i/o operations
- */
-#define HAVE_DISABLE_HLT
-void disable_hlt(void);
-void enable_hlt(void);
-
-extern int es7000_plat;
-void cpu_idle_wait(void);
-
-/*
- * On SMP systems, when the scheduler does migration-cost autodetection,
- * it needs a way to flush as much of the CPU's caches as possible:
- */
-static inline void sched_cacheflush(void)
-{
-	wbinvd();
-}
-
-extern unsigned long arch_align_stack(unsigned long sp);
-
-#endif
diff --git a/linux-2.6-xen-sparse/include/asm-ia64/intel_intrin.h b/linux-2.6-xen-sparse/include/asm-ia64/intel_intrin.h
index d4e5471f46..371e42de50 100644
--- a/linux-2.6-xen-sparse/include/asm-ia64/intel_intrin.h
+++ b/linux-2.6-xen-sparse/include/asm-ia64/intel_intrin.h
@@ -5,113 +5,10 @@
  *
  * Copyright (C) 2002,2003 Jun Nakajima <jun.nakajima@intel.com>
  * Copyright (C) 2002,2003 Suresh Siddha <suresh.b.siddha@intel.com>
+ * Copyright (C) 2005,2006 Hongjiu Lu <hongjiu.lu@intel.com>
  *
  */
-#include <asm/types.h>
-
-void  __lfetch(int lfhint, void *y);
-void  __lfetch_excl(int lfhint, void *y);
-void  __lfetch_fault(int lfhint, void *y);
-void  __lfetch_fault_excl(int lfhint, void *y);
-
-/* In the following, whichFloatReg should be an integer from 0-127 */
-void  __ldfs(const int whichFloatReg, void *src);
-void  __ldfd(const int whichFloatReg, void *src);
-void  __ldfe(const int whichFloatReg, void *src);
-void  __ldf8(const int whichFloatReg, void *src);
-void  __ldf_fill(const int whichFloatReg, void *src);
-void  __stfs(void *dst, const int whichFloatReg);
-void  __stfd(void *dst, const int whichFloatReg);
-void  __stfe(void *dst, const int whichFloatReg);
-void  __stf8(void *dst, const int whichFloatReg);
-void  __stf_spill(void *dst, const int whichFloatReg);
-
-void  __st1_rel(void *dst, const __s8  value);
-void  __st2_rel(void *dst, const __s16 value);
-void  __st4_rel(void *dst, const __s32 value);
-void  __st8_rel(void *dst, const __s64 value);
-__u8  __ld1_acq(void *src);
-__u16 __ld2_acq(void *src);
-__u32 __ld4_acq(void *src);
-__u64 __ld8_acq(void *src);
-
-__u64 __fetchadd4_acq(__u32 *addend, const int increment);
-__u64 __fetchadd4_rel(__u32 *addend, const int increment);
-__u64 __fetchadd8_acq(__u64 *addend, const int increment);
-__u64 __fetchadd8_rel(__u64 *addend, const int increment);
-
-__u64 __getf_exp(double d);
-
-/* OS Related Itanium(R) Intrinsics  */
-
-/* The names to use for whichReg and whichIndReg below come from
-   the include file asm/ia64regs.h */
-
-__u64 __getIndReg(const int whichIndReg, __s64 index);
-__u64 __getReg(const int whichReg);
-
-void  __setIndReg(const int whichIndReg, __s64 index, __u64 value);
-void  __setReg(const int whichReg, __u64 value);
-
-void  __mf(void);
-void  __mfa(void);
-void  __synci(void);
-void  __itcd(__s64 pa);
-void  __itci(__s64 pa);
-void  __itrd(__s64 whichTransReg, __s64 pa);
-void  __itri(__s64 whichTransReg, __s64 pa);
-void  __ptce(__s64 va);
-void  __ptcl(__s64 va, __s64 pagesz);
-void  __ptcg(__s64 va, __s64 pagesz);
-void  __ptcga(__s64 va, __s64 pagesz);
-void  __ptri(__s64 va, __s64 pagesz);
-void  __ptrd(__s64 va, __s64 pagesz);
-void  __invala (void);
-void  __invala_gr(const int whichGeneralReg /* 0-127 */ );
-void  __invala_fr(const int whichFloatReg /* 0-127 */ );
-void  __nop(const int);
-void  __fc(__u64 *addr);
-void  __sum(int mask);
-void  __rum(int mask);
-void  __ssm(int mask);
-void  __rsm(int mask);
-__u64 __thash(__s64);
-__u64 __ttag(__s64);
-__s64 __tpa(__s64);
-
-/* Intrinsics for implementing get/put_user macros */
-void __st_user(const char *tableName, __u64 addr, char size, char relocType, __u64 val);
-void __ld_user(const char *tableName, __u64 addr, char size, char relocType);
-
-/* This intrinsic does not generate code, it creates a barrier across which
- * the compiler will not schedule data access instructions.
- */
-void __memory_barrier(void);
-
-void __isrlz(void);
-void __dsrlz(void);
-
-__u64  _m64_mux1(__u64 a, const int n);
-__u64  __thash(__u64);
-
-/* Lock and Atomic Operation Related Intrinsics */
-__u64 _InterlockedExchange8(volatile __u8 *trgt, __u8 value);
-__u64 _InterlockedExchange16(volatile __u16 *trgt, __u16 value);
-__s64 _InterlockedExchange(volatile __u32 *trgt, __u32 value);
-__s64 _InterlockedExchange64(volatile __u64 *trgt, __u64 value);
-
-__u64 _InterlockedCompareExchange8_rel(volatile __u8 *dest, __u64 xchg, __u64 comp);
-__u64 _InterlockedCompareExchange8_acq(volatile __u8 *dest, __u64 xchg, __u64 comp);
-__u64 _InterlockedCompareExchange16_rel(volatile __u16 *dest, __u64 xchg, __u64 comp);
-__u64 _InterlockedCompareExchange16_acq(volatile __u16 *dest, __u64 xchg, __u64 comp);
-__u64 _InterlockedCompareExchange_rel(volatile __u32 *dest, __u64 xchg, __u64 comp);
-__u64 _InterlockedCompareExchange_acq(volatile __u32 *dest, __u64 xchg, __u64 comp);
-__u64 _InterlockedCompareExchange64_rel(volatile __u64 *dest, __u64 xchg, __u64 comp);
-__u64 _InterlockedCompareExchange64_acq(volatile __u64 *dest, __u64 xchg, __u64 comp);
-
-__s64 _m64_dep_mi(const int v, __s64 s, const int p, const int len);
-__s64 _m64_shrp(__s64 a, __s64 b, const int count);
-__s64 _m64_popcnt(__s64 a);
+#include <ia64intrin.h>
 
 #define ia64_barrier()		__memory_barrier()
 
@@ -122,15 +19,16 @@ __s64 _m64_popcnt(__s64 a);
 #define __ia64_getreg		__getReg
 #define __ia64_setreg		__setReg
 
-#define __ia64_hint(x)
+#define ia64_hint		__hint
+#define ia64_hint_pause		__hint_pause
 
-#define ia64_mux1_brcst	 0
-#define ia64_mux1_mix		 8
-#define ia64_mux1_shuf		 9
-#define ia64_mux1_alt		10
-#define ia64_mux1_rev		11
+#define ia64_mux1_brcst		_m64_mux1_brcst
+#define ia64_mux1_mix		_m64_mux1_mix
+#define ia64_mux1_shuf		_m64_mux1_shuf
+#define ia64_mux1_alt		_m64_mux1_alt
+#define ia64_mux1_rev		_m64_mux1_rev
 
-#define ia64_mux1		_m64_mux1
+#define ia64_mux1(x,v)		_m_to_int64(_m64_mux1(_m_from_int64(x), (v)))
 #define ia64_popcnt		_m64_popcnt
 #define ia64_getf_exp		__getf_exp
 #define ia64_shrp		_m64_shrp
@@ -158,7 +56,7 @@ __s64 _m64_popcnt(__s64 a);
 #define ia64_stf8		__stf8
 #define ia64_stf_spill		__stf_spill
 
-#define ia64_mf		__mf
+#define ia64_mf			__mf
 #define ia64_mfa		__mfa
 
 #define ia64_fetchadd4_acq	__fetchadd4_acq
@@ -234,10 +132,10 @@ __s64 _m64_popcnt(__s64 a);
 
 /* Values for lfhint in __lfetch and __lfetch_fault */
 
-#define ia64_lfhint_none   	0
-#define ia64_lfhint_nt1    	1
-#define ia64_lfhint_nt2    	2
-#define ia64_lfhint_nta    	3
+#define ia64_lfhint_none	__lfhint_none
+#define ia64_lfhint_nt1		__lfhint_nt1
+#define ia64_lfhint_nt2		__lfhint_nt2
+#define ia64_lfhint_nta		__lfhint_nta
 
 #define ia64_lfetch		__lfetch
 #define ia64_lfetch_excl	__lfetch_excl
@@ -256,4 +154,6 @@ do {							\
 
 #define __ia64_get_psr_i()	(__ia64_getreg(_IA64_REG_PSR) & 0x4000UL)
 
+#define __builtin_trap()	__break(0);
+
 #endif /* _ASM_IA64_INTEL_INTRIN_H */
diff --git a/linux-2.6-xen-sparse/include/asm-ia64/io.h b/linux-2.6-xen-sparse/include/asm-ia64/io.h
index 73c9b60904..42eeaf8643 100644
--- a/linux-2.6-xen-sparse/include/asm-ia64/io.h
+++ b/linux-2.6-xen-sparse/include/asm-ia64/io.h
@@ -90,8 +90,8 @@ phys_to_virt (unsigned long address)
 }
 
 #define ARCH_HAS_VALID_PHYS_ADDR_RANGE
-extern int valid_phys_addr_range (unsigned long addr, size_t *count); /* efi.c */
-extern int valid_mmap_phys_addr_range (unsigned long addr, size_t *count);
+extern int valid_phys_addr_range (unsigned long addr, size_t count); /* efi.c */
+extern int valid_mmap_phys_addr_range (unsigned long addr, size_t count);
 
 /*
  * The following two macros are deprecated and scheduled for removal.
@@ -453,27 +453,18 @@ __writeq (unsigned long val, volatile void __iomem *addr)
 # define outl_p		outl
 #endif
 
-/*
- * An "address" in IO memory space is not clearly either an integer or a pointer. We will
- * accept both, thus the casts.
- *
- * On ia-64, we access the physical I/O memory space through the uncached kernel region.
- */
-static inline void __iomem *
-ioremap (unsigned long offset, unsigned long size)
-{
-	offset = HYPERVISOR_ioremap(offset, size);
-	if (IS_ERR_VALUE(offset))
-		return (void __iomem*)offset;
-	return (void __iomem *) (__IA64_UNCACHED_OFFSET | (offset));
-}
+extern void __iomem * ioremap(unsigned long offset, unsigned long size);
+extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size);
 
 static inline void
 iounmap (volatile void __iomem *addr)
 {
 }
 
-#define ioremap_nocache(o,s)	ioremap(o,s)
+/* Use normal IO mappings for DMI */
+#define dmi_ioremap ioremap
+#define dmi_iounmap(x,l) iounmap(x)
+#define dmi_alloc(l) kmalloc(l, GFP_ATOMIC)
 
 # ifdef __KERNEL__
 
diff --git a/linux-2.6-xen-sparse/include/asm-ia64/machvec.h b/linux-2.6-xen-sparse/include/asm-ia64/machvec.h
index ca5ea994d6..a9c995a86c 100644
--- a/linux-2.6-xen-sparse/include/asm-ia64/machvec.h
+++ b/linux-2.6-xen-sparse/include/asm-ia64/machvec.h
@@ -20,6 +20,7 @@ struct scatterlist;
 struct page;
 struct mm_struct;
 struct pci_bus;
+struct task_struct;
 
 typedef void ia64_mv_setup_t (char **);
 typedef void ia64_mv_cpu_init_t (void);
@@ -34,6 +35,7 @@ typedef int ia64_mv_pci_legacy_read_t (struct pci_bus *, u16 port, u32 *val,
 				       u8 size);
 typedef int ia64_mv_pci_legacy_write_t (struct pci_bus *, u16 port, u32 val,
 					u8 size);
+typedef void ia64_mv_migrate_t(struct task_struct * task);
 
 /* DMA-mapping interface: */
 typedef void ia64_mv_dma_init (void);
@@ -85,6 +87,11 @@ machvec_noop_mm (struct mm_struct *mm)
 {
 }
 
+static inline void
+machvec_noop_task (struct task_struct *task)
+{
+}
+
 extern void machvec_setup (char **);
 extern void machvec_timer_interrupt (int, void *, struct pt_regs *);
 extern void machvec_dma_sync_single (struct device *, dma_addr_t, size_t, int);
@@ -146,6 +153,7 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *);
 #  define platform_readw_relaxed        ia64_mv.readw_relaxed
 #  define platform_readl_relaxed        ia64_mv.readl_relaxed
 #  define platform_readq_relaxed        ia64_mv.readq_relaxed
+#  define platform_migrate		ia64_mv.migrate
 # endif
 
 /* __attribute__((__aligned__(16))) is required to make size of the
@@ -194,6 +202,7 @@ struct ia64_machine_vector {
 	ia64_mv_readw_relaxed_t *readw_relaxed;
 	ia64_mv_readl_relaxed_t *readl_relaxed;
 	ia64_mv_readq_relaxed_t *readq_relaxed;
+	ia64_mv_migrate_t *migrate;
 } __attribute__((__aligned__(16))); /* align attrib? see above comment */
 
 #define MACHVEC_INIT(name)			\
@@ -238,6 +247,7 @@ struct ia64_machine_vector {
 	platform_readw_relaxed,			\
 	platform_readl_relaxed,			\
 	platform_readq_relaxed,			\
+	platform_migrate,			\
 }
 
 extern struct ia64_machine_vector ia64_mv;
@@ -337,9 +347,11 @@ extern ia64_mv_dma_supported		swiotlb_dma_supported;
 #endif
 #ifndef platform_pci_legacy_read
 # define platform_pci_legacy_read	ia64_pci_legacy_read
+extern int ia64_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size);
 #endif
 #ifndef platform_pci_legacy_write
 # define platform_pci_legacy_write	ia64_pci_legacy_write
+extern int ia64_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size);
 #endif
 #ifndef platform_inb
 # define platform_inb		__ia64_inb
@@ -386,5 +398,8 @@ extern ia64_mv_dma_supported		swiotlb_dma_supported;
 #ifndef platform_readq_relaxed
 # define platform_readq_relaxed	__ia64_readq_relaxed
 #endif
+#ifndef platform_migrate
+# define platform_migrate machvec_noop_task
+#endif
 
 #endif /* _ASM_IA64_MACHVEC_H */
diff --git a/linux-2.6-xen-sparse/include/asm-ia64/machvec_dig.h b/linux-2.6-xen-sparse/include/asm-ia64/machvec_dig.h
index 40ae167ed4..ccb777e37b 100644
--- a/linux-2.6-xen-sparse/include/asm-ia64/machvec_dig.h
+++ b/linux-2.6-xen-sparse/include/asm-ia64/machvec_dig.h
@@ -2,7 +2,6 @@
 #define _ASM_IA64_MACHVEC_DIG_h
 
 extern ia64_mv_setup_t dig_setup;
-extern ia64_mv_irq_init_t dig_irq_init;
 
 /*
  * This stuff has dual use!
@@ -13,7 +12,6 @@ extern ia64_mv_irq_init_t dig_irq_init;
  */
 #define platform_name		"dig"
 #define platform_setup		dig_setup
-#define platform_irq_init	dig_irq_init
 
 #ifdef CONFIG_XEN
 # define platform_dma_map_sg		dma_map_sg
diff --git a/linux-2.6-xen-sparse/include/asm-ia64/page.h b/linux-2.6-xen-sparse/include/asm-ia64/page.h
index fca73f00ef..a902f9df2d 100644
--- a/linux-2.6-xen-sparse/include/asm-ia64/page.h
+++ b/linux-2.6-xen-sparse/include/asm-ia64/page.h
@@ -57,6 +57,8 @@
 
 # define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
 # define ARCH_HAS_HUGEPAGE_ONLY_RANGE
+# define ARCH_HAS_PREPARE_HUGEPAGE_RANGE
+# define ARCH_HAS_HUGETLB_FREE_PGD_RANGE
 #endif /* CONFIG_HUGETLB_PAGE */
 
 #ifdef __ASSEMBLY__
@@ -104,17 +106,25 @@ extern int ia64_pfn_valid (unsigned long pfn);
 # define ia64_pfn_valid(pfn) 1
 #endif
 
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+extern struct page *vmem_map;
+#ifdef CONFIG_DISCONTIGMEM
+# define page_to_pfn(page)	((unsigned long) (page - vmem_map))
+# define pfn_to_page(pfn)	(vmem_map + (pfn))
+#endif
+#endif
+
+#if defined(CONFIG_FLATMEM) || defined(CONFIG_SPARSEMEM)
+/* FLATMEM always configures mem_map (mem_map = vmem_map if necessary) */
+#include <asm-generic/memory_model.h>
+#endif
+
 #ifdef CONFIG_FLATMEM
 # define pfn_valid(pfn)		(((pfn) < max_mapnr) && ia64_pfn_valid(pfn))
-# define page_to_pfn(page)	((unsigned long) (page - mem_map))
-# define pfn_to_page(pfn)	(mem_map + (pfn))
 #elif defined(CONFIG_DISCONTIGMEM)
-extern struct page *vmem_map;
 extern unsigned long min_low_pfn;
 extern unsigned long max_low_pfn;
 # define pfn_valid(pfn)		(((pfn) >= min_low_pfn) && ((pfn) < max_low_pfn) && ia64_pfn_valid(pfn))
-# define page_to_pfn(page)	((unsigned long) (page - vmem_map))
-# define pfn_to_page(pfn)	(vmem_map + (pfn))
 #endif
 
 #ifndef CONFIG_XEN
@@ -149,7 +159,7 @@ typedef union ia64_va {
 				 | (REGION_OFFSET(x) >> (HPAGE_SHIFT-PAGE_SHIFT)))
 # define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
 # define is_hugepage_only_range(mm, addr, len)		\
-	 (REGION_NUMBER(addr) == RGN_HPAGE &&	\
+	 (REGION_NUMBER(addr) == RGN_HPAGE ||	\
 	  REGION_NUMBER((addr)+(len)-1) == RGN_HPAGE)
 extern unsigned int hpage_shift;
 #endif
diff --git a/linux-2.6-xen-sparse/include/asm-ia64/pal.h b/linux-2.6-xen-sparse/include/asm-ia64/pal.h
index e476c94ad4..e8b71a7dd2 100644
--- a/linux-2.6-xen-sparse/include/asm-ia64/pal.h
+++ b/linux-2.6-xen-sparse/include/asm-ia64/pal.h
@@ -68,6 +68,7 @@
 #define PAL_SHUTDOWN		40	/* enter processor shutdown state */
 #define PAL_PREFETCH_VISIBILITY	41	/* Make Processor Prefetches Visible */
 #define PAL_LOGICAL_TO_PHYSICAL 42	/* returns information on logical to physical processor mapping */
+#define PAL_CACHE_SHARED_INFO	43	/* returns information on caches shared by logical processor */
 
 #define PAL_COPY_PAL		256	/* relocate PAL procedures and PAL PMI */
 #define PAL_HALT_INFO		257	/* return the low power capabilities of processor */
@@ -131,7 +132,7 @@ typedef u64				pal_cache_line_state_t;
 #define PAL_CACHE_LINE_STATE_MODIFIED	3	/* Modified */
 
 typedef struct pal_freq_ratio {
-	u64 den : 32, num : 32;	/* numerator & denominator */
+	u32 den, num;		/* numerator & denominator */
 } itc_ratio, proc_ratio;
 
 typedef	union  pal_cache_config_info_1_s {
@@ -152,10 +153,10 @@ typedef	union  pal_cache_config_info_1_s {
 
 typedef	union  pal_cache_config_info_2_s {
 	struct {
-		u64		cache_size	: 32,	/*cache size in bytes*/
+		u32		cache_size;		/*cache size in bytes*/
 
 
-				alias_boundary	: 8,	/* 39-32 aliased addr
+		u32		alias_boundary	: 8,	/* 39-32 aliased addr
 							 * separation for max
 							 * performance.
 							 */
@@ -1641,14 +1642,40 @@ ia64_pal_logical_to_phys(u64 proc_number, pal_logical_to_physical_t *mapping)
 
 	if (iprv.status == PAL_STATUS_SUCCESS)
 	{
-		if (proc_number == 0)
-			mapping->overview.overview_data = iprv.v0;
+		mapping->overview.overview_data = iprv.v0;
 		mapping->ppli1.ppli1_data = iprv.v1;
 		mapping->ppli2.ppli2_data = iprv.v2;
 	}
 
 	return iprv.status;
 }
+
+typedef struct pal_cache_shared_info_s
+{
+	u64 num_shared;
+	pal_proc_n_log_info1_t ppli1;
+	pal_proc_n_log_info2_t ppli2;
+} pal_cache_shared_info_t;
+
+/* Get information on logical to physical processor mappings. */
+static inline s64
+ia64_pal_cache_shared_info(u64 level,
+		u64 type,
+		u64 proc_number,
+		pal_cache_shared_info_t *info)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL(iprv, PAL_CACHE_SHARED_INFO, level, type, proc_number);
+
+	if (iprv.status == PAL_STATUS_SUCCESS) {
+		info->num_shared = iprv.v0;
+		info->ppli1.ppli1_data = iprv.v1;
+		info->ppli2.ppli2_data = iprv.v2;
+	}
+
+	return iprv.status;
+}
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_IA64_PAL_H */
diff --git a/linux-2.6-xen-sparse/include/asm-ia64/processor.h b/linux-2.6-xen-sparse/include/asm-ia64/processor.h
index 99a1de460b..1a3d84d2f2 100644
--- a/linux-2.6-xen-sparse/include/asm-ia64/processor.h
+++ b/linux-2.6-xen-sparse/include/asm-ia64/processor.h
@@ -51,7 +51,8 @@
 #define IA64_THREAD_PM_VALID	(__IA64_UL(1) << 2)	/* performance registers valid? */
 #define IA64_THREAD_UAC_NOPRINT	(__IA64_UL(1) << 3)	/* don't log unaligned accesses */
 #define IA64_THREAD_UAC_SIGBUS	(__IA64_UL(1) << 4)	/* generate SIGBUS on unaligned acc. */
-							/* bit 5 is currently unused */
+#define IA64_THREAD_MIGRATION	(__IA64_UL(1) << 5)	/* require migration
+							   sync at ctx sw */
 #define IA64_THREAD_FPEMU_NOPRINT (__IA64_UL(1) << 6)	/* don't log any fpswa faults */
 #define IA64_THREAD_FPEMU_SIGFPE  (__IA64_UL(1) << 7)	/* send a SIGFPE for fpswa faults */
 
@@ -181,7 +182,6 @@ DECLARE_PER_CPU(struct cpuinfo_ia64, cpu_info);
 #define local_cpu_data		(&__ia64_per_cpu_var(cpu_info))
 #define cpu_data(cpu)		(&per_cpu(cpu_info, cpu))
 
-extern void identify_cpu (struct cpuinfo_ia64 *);
 extern void print_cpu_info (struct cpuinfo_ia64 *);
 
 typedef struct {
diff --git a/linux-2.6-xen-sparse/include/asm-ia64/system.h b/linux-2.6-xen-sparse/include/asm-ia64/system.h
index 349c8d1a15..4f378314ce 100644
--- a/linux-2.6-xen-sparse/include/asm-ia64/system.h
+++ b/linux-2.6-xen-sparse/include/asm-ia64/system.h
@@ -244,6 +244,13 @@ extern void ia64_load_extra (struct task_struct *task);
 		__ia64_save_fpu((prev)->thread.fph);				\
 	}									\
 	__switch_to(prev, next, last);						\
+	/* "next" in old context is "current" in new context */			\
+	if (unlikely((current->thread.flags & IA64_THREAD_MIGRATION) &&	       \
+		     (task_cpu(current) !=				       \
+		      		      task_thread_info(current)->last_cpu))) { \
+		platform_migrate(current);				       \
+		task_thread_info(current)->last_cpu = task_cpu(current);       \
+	}								       \
 } while (0)
 #else
 # define switch_to(prev,next,last)	__switch_to(prev, next, last)
@@ -258,6 +265,8 @@ void sched_cacheflush(void);
 
 #define arch_align_stack(x) (x)
 
+void default_idle(void);
+
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */
diff --git a/linux-2.6-xen-sparse/include/asm-um/page.h b/linux-2.6-xen-sparse/include/asm-um/page.h
index ee9bac86b7..a864534755 100644
--- a/linux-2.6-xen-sparse/include/asm-um/page.h
+++ b/linux-2.6-xen-sparse/include/asm-um/page.h
@@ -106,9 +106,6 @@ extern unsigned long uml_physmem;
 #define __pa(virt) to_phys((void *) (unsigned long) (virt))
 #define __va(phys) to_virt((unsigned long) (phys))
 
-#define page_to_pfn(page) ((page) - mem_map)
-#define pfn_to_page(pfn) (mem_map + (pfn))
-
 #define phys_to_pfn(p) ((p) >> PAGE_SHIFT)
 #define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT)
 
@@ -121,6 +118,7 @@ extern struct page *arch_validate(struct page *page, gfp_t mask, int order);
 extern int arch_free_page(struct page *page, int order);
 #define HAVE_ARCH_FREE_PAGE
 
+#include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
 #endif
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h
index d99a8e5465..c15bf3f84a 100644
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h
@@ -47,7 +47,8 @@ extern void contig_e820_setup(void);
 extern unsigned long e820_end_of_ram(void);
 extern void e820_reserve_resources(struct e820entry *e820, int nr_map);
 extern void e820_print_map(char *who);
-extern int e820_mapped(unsigned long start, unsigned long end, unsigned type);
+extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
+extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
 
 extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end);
 extern void e820_setup_gap(struct e820entry *e820, int nr_map);
@@ -58,6 +59,8 @@ extern void __init parse_memopt(char *p, char **end);
 extern void __init parse_memmapopt(char *p, char **end);
 
 extern struct e820map e820;
+
+extern unsigned ebda_addr, ebda_size;
 #endif/*!__ASSEMBLY__*/
 
 #endif/*__E820_HEADER*/
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/floppy.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/floppy.h
index 1e0bf7b6e6..d1027d8bbd 100644
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/floppy.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/floppy.h
@@ -44,7 +44,7 @@
  * Do not use vmalloc/vfree: floppy_release_irq_and_dma() gets called from
  * softirq context via motor_off_callback. A generic bug we happen to trigger.
  */
-#define fd_dma_mem_alloc(size)	__get_free_pages(GFP_KERNEL, get_order(size))
+#define fd_dma_mem_alloc(size)	__get_free_pages(GFP_KERNEL|__GFP_NORETRY, get_order(size))
 #define fd_dma_mem_free(addr, size) free_pages(addr, get_order(size))
 #define fd_dma_setup(addr, size, mode, io) vdma_dma_setup(addr, size, mode, io)
 
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h
index 4d13b1b9c7..fcf7203ee0 100644
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h
@@ -151,6 +151,9 @@ static inline void __iomem * ioremap (unsigned long offset, unsigned long size)
 	return __ioremap(offset, size, 0);
 }
 
+extern void *early_ioremap(unsigned long addr, unsigned long size);
+extern void early_iounmap(void *addr, unsigned long size);
+
 /*
  * This one maps high address device memory and turns off caching for that area.
  * it's useful if some control registers are in such an area and write combining
@@ -159,11 +162,6 @@ static inline void __iomem * ioremap (unsigned long offset, unsigned long size)
 extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size);
 extern void iounmap(volatile void __iomem *addr);
 
-/* Use normal IO mappings for DMI */
-#define dmi_ioremap ioremap
-#define dmi_iounmap(x,l) iounmap(x)
-#define dmi_alloc(l) kmalloc(l, GFP_ATOMIC)
-
 /*
  * ISA I/O bus memory addresses are 1:1 with the physical address.
  */
@@ -196,7 +194,7 @@ static inline __u16 __readw(const volatile void __iomem *addr)
 {
 	return *(__force volatile __u16 *)addr;
 }
-static inline __u32 __readl(const volatile void __iomem *addr)
+static __always_inline __u32 __readl(const volatile void __iomem *addr)
 {
 	return *(__force volatile __u32 *)addr;
 }
@@ -219,23 +217,6 @@ static inline __u64 __readq(const volatile void __iomem *addr)
 
 #define mmiowb()
 
-#ifdef CONFIG_UNORDERED_IO
-static inline void __writel(__u32 val, volatile void __iomem *addr)
-{
-	volatile __u32 __iomem *target = addr;
-	asm volatile("movnti %1,%0"
-		     : "=m" (*target)
-		     : "r" (val) : "memory");
-}
-
-static inline void __writeq(__u64 val, volatile void __iomem *addr)
-{
-	volatile __u64 __iomem *target = addr;
-	asm volatile("movnti %1,%0"
-		     : "=m" (*target)
-		     : "r" (val) : "memory");
-}
-#else
 static inline void __writel(__u32 b, volatile void __iomem *addr)
 {
 	*(__force volatile __u32 *)addr = b;
@@ -244,7 +225,6 @@ static inline void __writeq(__u64 b, volatile void __iomem *addr)
 {
 	*(__force volatile __u64 *)addr = b;
 }
-#endif
 static inline void __writeb(__u8 b, volatile void __iomem *addr)
 {
 	*(__force volatile __u8 *)addr = b;
@@ -286,23 +266,11 @@ void memset_io(volatile void __iomem *a, int b, size_t c);
  */
 #define __ISA_IO_base ((char __iomem *)(fix_to_virt(FIX_ISAMAP_BEGIN)))
 
-#define isa_readb(a) readb(__ISA_IO_base + (a))
-#define isa_readw(a) readw(__ISA_IO_base + (a))
-#define isa_readl(a) readl(__ISA_IO_base + (a))
-#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a))
-#define isa_writew(w,a) writew(w,__ISA_IO_base + (a))
-#define isa_writel(l,a) writel(l,__ISA_IO_base + (a))
-#define isa_memset_io(a,b,c)		memset_io(__ISA_IO_base + (a),(b),(c))
-#define isa_memcpy_fromio(a,b,c)	memcpy_fromio((a),__ISA_IO_base + (b),(c))
-#define isa_memcpy_toio(a,b,c)		memcpy_toio(__ISA_IO_base + (a),(b),(c))
-
-
 /*
  * Again, x86-64 does not require mem IO specific function.
  */
 
 #define eth_io_copy_and_sum(a,b,c,d)		eth_copy_and_sum((a),(void *)(b),(c),(d))
-#define isa_eth_io_copy_and_sum(a,b,c,d)	eth_copy_and_sum((a),(void *)(__ISA_IO_base + (b)),(c),(d))
 
 /**
  *	check_signature		-	find BIOS signatures
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h
index 1e5b86a05e..6676cb91eb 100644
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h
@@ -76,12 +76,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 		BUG_ON(!next->context.pinned);
 
 		/* stop flush ipis for the previous mm */
-		clear_bit(cpu, &prev->cpu_vm_mask);
+		cpu_clear(cpu, prev->cpu_vm_mask);
 #if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
 		write_pda(mmu_state, TLBSTATE_OK);
 		write_pda(active_mm, next);
 #endif
-		set_bit(cpu, &next->cpu_vm_mask);
+		cpu_set(cpu, next->cpu_vm_mask);
 
 		/* load_cr3(next->pgd) */
 		op->cmd = MMUEXT_NEW_BASEPTR;
@@ -108,7 +108,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 		write_pda(mmu_state, TLBSTATE_OK);
 		if (read_pda(active_mm) != next)
 			out_of_line_bug();
-		if(!test_and_set_bit(cpu, &next->cpu_vm_mask)) {
+		if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
 			/* We were in lazy tlb mode and leave_mm disabled 
 			 * tlb flush IPI delivery. We must reload CR3
 			 * to make sure to use no freed page tables.
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h
index 7573cce405..c5af4c763e 100644
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h
@@ -189,8 +189,6 @@ static inline pgd_t __pgd(unsigned long x)
 #define __boot_va(x)		__va(x)
 #define __boot_pa(x)		__pa(x)
 #ifdef CONFIG_FLATMEM
-#define pfn_to_page(pfn)	(mem_map + (pfn))
-#define page_to_pfn(page)	((unsigned long)((page) - mem_map))
 #define pfn_valid(pfn)		((pfn) < end_pfn)
 #endif
 
@@ -206,6 +204,7 @@ static inline pgd_t __pgd(unsigned long x)
 
 #endif /* __KERNEL__ */
 
+#include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
 #endif /* _X86_64_PAGE_H */
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgalloc.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgalloc.h
index 6a7f8de3ff..61cf7ec389 100644
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgalloc.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgalloc.h
@@ -102,6 +102,32 @@ static inline void pud_free(pud_t *pud)
 	free_page((unsigned long)pud);
 }
 
+static inline void pgd_list_add(pgd_t *pgd)
+{
+	struct page *page = virt_to_page(pgd);
+
+	spin_lock(&pgd_lock);
+	page->index = (pgoff_t)pgd_list;
+	if (pgd_list)
+		pgd_list->private = (unsigned long)&page->index;
+	pgd_list = page;
+	page->private = (unsigned long)&pgd_list;
+	spin_unlock(&pgd_lock);
+}
+
+static inline void pgd_list_del(pgd_t *pgd)
+{
+	struct page *next, **pprev, *page = virt_to_page(pgd);
+
+	spin_lock(&pgd_lock);
+	next = (struct page *)page->index;
+	pprev = (struct page **)page->private;
+	*pprev = next;
+	if (next)
+		next->private = (unsigned long)pprev;
+	spin_unlock(&pgd_lock);
+}
+
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
         /*
@@ -109,9 +135,9 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
          */
         unsigned boundary;
 	pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, 1);
-
 	if (!pgd)
 		return NULL;
+	pgd_list_add(pgd);
 	/*
 	 * Copy kernel pointers in from init.
 	 * Could keep a freelist or slab cache of those because the kernel
@@ -155,6 +181,7 @@ static inline void pgd_free(pgd_t *pgd)
 			       0));
 	}
 
+	pgd_list_del(pgd);
 	free_pages((unsigned long)pgd, 1);
 }
 
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h
index 0c4d0a888e..b3dd8f2b2f 100644
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h
@@ -331,7 +331,7 @@ static inline int pte_dirty(pte_t pte)		{ return __pte_val(pte) & _PAGE_DIRTY; }
 static inline int pte_young(pte_t pte)		{ return __pte_val(pte) & _PAGE_ACCESSED; }
 static inline int pte_write(pte_t pte)		{ return __pte_val(pte) & _PAGE_RW; }
 static inline int pte_file(pte_t pte)		{ return __pte_val(pte) & _PAGE_FILE; }
-static inline int pte_huge(pte_t pte)		{ return (__pte_val(pte) & __LARGE_PTE) == __LARGE_PTE; }
+static inline int pte_huge(pte_t pte)		{ return __pte_val(pte) & _PAGE_PSE; }
 
 static inline pte_t pte_rdprotect(pte_t pte)	{ __pte_val(pte) &= ~_PAGE_USER; return pte; }
 static inline pte_t pte_exprotect(pte_t pte)	{ __pte_val(pte) &= ~_PAGE_USER; return pte; }
@@ -343,7 +343,7 @@ static inline pte_t pte_mkexec(pte_t pte)	{ __pte_val(pte) |= _PAGE_USER; return
 static inline pte_t pte_mkdirty(pte_t pte)	{ __pte_val(pte) |= _PAGE_DIRTY; return pte; }
 static inline pte_t pte_mkyoung(pte_t pte)	{ __pte_val(pte) |= _PAGE_ACCESSED; return pte; }
 static inline pte_t pte_mkwrite(pte_t pte)	{ __pte_val(pte) |= _PAGE_RW; return pte; }
-static inline pte_t pte_mkhuge(pte_t pte)	{ __pte_val(pte) |= __LARGE_PTE; return pte; }
+static inline pte_t pte_mkhuge(pte_t pte)	{ __pte_val(pte) |= _PAGE_PSE; return pte; }
 
 struct vm_area_struct;
 
@@ -507,6 +507,10 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
 #define __swp_entry_to_pte(x)		((pte_t) { (x).val })
 
+extern spinlock_t pgd_lock;
+extern struct page *pgd_list;
+void vmalloc_sync_all(void);
+
 #endif /* !__ASSEMBLY__ */
 
 extern int kern_addr_valid(unsigned long addr); 
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h
index e000547f62..cebdcfbfd7 100644
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h
@@ -20,6 +20,7 @@
 #include <asm/mmsegment.h>
 #include <asm/percpu.h>
 #include <linux/personality.h>
+#include <linux/cpumask.h>
 
 #define TF_MASK		0x00000100
 #define IF_MASK		0x00000200
@@ -65,6 +66,9 @@ struct cpuinfo_x86 {
         __u32   x86_power; 	
 	__u32   extended_cpuid_level;	/* Max extended CPUID function supported */
 	unsigned long loops_per_jiffy;
+#ifdef CONFIG_SMP
+	cpumask_t llc_shared_map;	/* cpus sharing the last level cache */
+#endif
 	__u8	apicid;
 	__u8	booted_cores;	/* number of cores as seen by OS */
 } ____cacheline_aligned;
@@ -365,9 +369,6 @@ struct extended_sigtable {
 	struct extended_signature sigs[0];
 };
 
-/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */
-#define MICROCODE_IOCFREE	_IO('6',0)
-
 
 #define ASM_NOP1 K8_NOP1
 #define ASM_NOP2 K8_NOP2
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/smp.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/smp.h
index 91ae795670..d02c9fbcc4 100644
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/smp.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/smp.h
@@ -55,6 +55,7 @@ extern cpumask_t cpu_sibling_map[NR_CPUS];
 extern cpumask_t cpu_core_map[NR_CPUS];
 extern int phys_proc_id[NR_CPUS];
 extern int cpu_core_id[NR_CPUS];
+extern u8 cpu_llc_id[NR_CPUS];
 
 #define SMP_TRAMPOLINE_BASE 0x6000
 
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/system.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/system.h
index 9826fa7269..0ce91ea069 100644
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/system.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/system.h
@@ -57,8 +57,7 @@
 		       [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent))   \
 		     : "memory", "cc" __EXTRA_CLOBBER)
     
-
-extern void load_gs_index(unsigned);
+extern void load_gs_index(unsigned); 
 
 /*
  * Load a segment. Fall back on loading the zero
@@ -80,12 +79,6 @@ extern void load_gs_index(unsigned);
 		".previous"			\
 		: :"r" (value), "r" (0))
 
-#define set_debug(value,register) \
-                __asm__("movq %0,%%db" #register  \
-		: /* no output */ \
-		:"r" ((unsigned long) value))
-
-
 #ifdef __KERNEL__
 struct alt_instr { 
 	__u8 *instr; 		/* original instruction */
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/mach_time.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/mach_time.h
index 4db611ecb0..2fbd606a46 100644
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/mach_time.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/mach_time.h
@@ -82,21 +82,8 @@ static inline int mach_set_rtc_mmss(unsigned long nowtime)
 static inline unsigned long mach_get_cmos_time(void)
 {
 	unsigned int year, mon, day, hour, min, sec;
-	int i;
 
-	/* The Linux interpretation of the CMOS clock register contents:
-	 * When the Update-In-Progress (UIP) flag goes from 1 to 0, the
-	 * RTC registers show the second which has precisely just started.
-	 * Let's hope other operating systems interpret the RTC the same way.
-	 */
-	/* read RTC exactly on falling edge of update flag */
-	for (i = 0 ; i < 1000000 ; i++)	/* may take up to 1 second... */
-		if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)
-			break;
-	for (i = 0 ; i < 1000000 ; i++)	/* must try at least 2.228 ms */
-		if (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
-			break;
-	do { /* Isn't this overkill ? UIP above should guarantee consistency */
+	do {
 		sec = CMOS_READ(RTC_SECONDS);
 		min = CMOS_READ(RTC_MINUTES);
 		hour = CMOS_READ(RTC_HOURS);
@@ -104,16 +91,18 @@ static inline unsigned long mach_get_cmos_time(void)
 		mon = CMOS_READ(RTC_MONTH);
 		year = CMOS_READ(RTC_YEAR);
 	} while (sec != CMOS_READ(RTC_SECONDS));
-	if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-	  {
-	    BCD_TO_BIN(sec);
-	    BCD_TO_BIN(min);
-	    BCD_TO_BIN(hour);
-	    BCD_TO_BIN(day);
-	    BCD_TO_BIN(mon);
-	    BCD_TO_BIN(year);
-	  }
-	if ((year += 1900) < 1970)
+
+	if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+		BCD_TO_BIN(sec);
+		BCD_TO_BIN(min);
+		BCD_TO_BIN(hour);
+		BCD_TO_BIN(day);
+		BCD_TO_BIN(mon);
+		BCD_TO_BIN(year);
+	}
+
+	year += 1900;
+	if (year < 1970)
 		year += 100;
 
 	return mktime(year, mon, day, hour, min, sec);
diff --git a/linux-2.6-xen-sparse/include/linux/gfp.h b/linux-2.6-xen-sparse/include/linux/gfp.h
index cba61ae4f5..3f82ca5207 100644
--- a/linux-2.6-xen-sparse/include/linux/gfp.h
+++ b/linux-2.6-xen-sparse/include/linux/gfp.h
@@ -57,6 +57,8 @@ struct vm_area_struct;
 			__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
 			__GFP_NOMEMALLOC|__GFP_HARDWALL)
 
+/* This equals 0, but use constants in case they ever change */
+#define GFP_NOWAIT	(GFP_ATOMIC & ~__GFP_HIGH)
 /* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */
 #define GFP_ATOMIC	(__GFP_HIGH)
 #define GFP_NOIO	(__GFP_WAIT)
diff --git a/linux-2.6-xen-sparse/include/linux/highmem.h b/linux-2.6-xen-sparse/include/linux/highmem.h
index 42c6aa2787..8f2bcfb85c 100644
--- a/linux-2.6-xen-sparse/include/linux/highmem.h
+++ b/linux-2.6-xen-sparse/include/linux/highmem.h
@@ -7,6 +7,18 @@
 
 #include <asm/cacheflush.h>
 
+#ifndef ARCH_HAS_FLUSH_ANON_PAGE
+static inline void flush_anon_page(struct page *page, unsigned long vmaddr)
+{
+}
+#endif
+
+#ifndef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
+static inline void flush_kernel_dcache_page(struct page *page)
+{
+}
+#endif
+
 #ifdef CONFIG_HIGHMEM
 
 #include <asm/highmem.h>
diff --git a/linux-2.6-xen-sparse/include/linux/mm.h b/linux-2.6-xen-sparse/include/linux/mm.h
index 778439edc7..0539b4d194 100644
--- a/linux-2.6-xen-sparse/include/linux/mm.h
+++ b/linux-2.6-xen-sparse/include/linux/mm.h
@@ -288,43 +288,34 @@ struct page {
  *
  * Also, many kernel routines increase the page count before a critical
  * routine so they can be sure the page doesn't go away from under them.
- *
- * Since 2.6.6 (approx), a free page has ->_count = -1.  This is so that we
- * can use atomic_add_negative(-1, page->_count) to detect when the page
- * becomes free and so that we can also use atomic_inc_and_test to atomically
- * detect when we just tried to grab a ref on a page which some other CPU has
- * already deemed to be freeable.
- *
- * NO code should make assumptions about this internal detail!  Use the provided
- * macros which retain the old rules: page_count(page) == 0 is a free page.
  */
 
 /*
  * Drop a ref, return true if the logical refcount fell to zero (the page has
  * no users)
  */
-#define put_page_testzero(p)				\
-	({						\
-		BUG_ON(atomic_read(&(p)->_count) == -1);\
-		atomic_add_negative(-1, &(p)->_count);	\
-	})
+static inline int put_page_testzero(struct page *page)
+{
+	BUG_ON(atomic_read(&page->_count) == 0);
+	return atomic_dec_and_test(&page->_count);
+}
 
 /*
- * Grab a ref, return true if the page previously had a logical refcount of
- * zero.  ie: returns true if we just grabbed an already-deemed-to-be-free page
+ * Try to grab a ref unless the page has a refcount of zero, return false if
+ * that is the case.
  */
-#define get_page_testone(p)	atomic_inc_and_test(&(p)->_count)
-
-#define set_page_count(p,v) 	atomic_set(&(p)->_count, (v) - 1)
-#define __put_page(p)		atomic_dec(&(p)->_count)
+static inline int get_page_unless_zero(struct page *page)
+{
+	return atomic_inc_not_zero(&page->_count);
+}
 
 extern void FASTCALL(__page_cache_release(struct page *));
 
 static inline int page_count(struct page *page)
 {
-	if (PageCompound(page))
+	if (unlikely(PageCompound(page)))
 		page = (struct page *)page_private(page);
-	return atomic_read(&page->_count) + 1;
+	return atomic_read(&page->_count);
 }
 
 static inline void get_page(struct page *page)
@@ -334,8 +325,19 @@ static inline void get_page(struct page *page)
 	atomic_inc(&page->_count);
 }
 
+/*
+ * Setup the page count before being freed into the page allocator for
+ * the first time (boot or memory hotplug)
+ */
+static inline void init_page_count(struct page *page)
+{
+	atomic_set(&page->_count, 1);
+}
+
 void put_page(struct page *page);
 
+void split_page(struct page *page, unsigned int order);
+
 /*
  * Multiple processes may "see" the same page. E.g. for untouched
  * mappings of /dev/null, all processes see the same page full of
@@ -1055,7 +1057,7 @@ int in_gate_area_no_task(unsigned long addr);
 
 int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
 					void __user *, size_t *, loff_t *);
-int shrink_slab(unsigned long scanned, gfp_t gfp_mask,
+unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
 			unsigned long lru_pages);
 void drop_pagecache(void);
 void drop_slab(void);
diff --git a/linux-2.6-xen-sparse/include/linux/pfn.h b/linux-2.6-xen-sparse/include/linux/pfn.h
new file mode 100644
index 0000000000..87a4ab52b6
--- /dev/null
+++ b/linux-2.6-xen-sparse/include/linux/pfn.h
@@ -0,0 +1,9 @@
+#ifndef _LINUX_PFN_H_
+#define _LINUX_PFN_H_
+
+#define PFN_ALIGN(x)	(((unsigned long long)(x) + (PAGE_SIZE - 1)) & PAGE_MASK)
+#define PFN_UP(x)	(((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+#define PFN_DOWN(x)	((x) >> PAGE_SHIFT)
+#define PFN_PHYS(x)	((unsigned long long)(x) << PAGE_SHIFT)
+
+#endif
diff --git a/linux-2.6-xen-sparse/include/linux/skbuff.h b/linux-2.6-xen-sparse/include/linux/skbuff.h
index 07b8f3036d..11b66ce616 100644
--- a/linux-2.6-xen-sparse/include/linux/skbuff.h
+++ b/linux-2.6-xen-sparse/include/linux/skbuff.h
@@ -287,7 +287,6 @@ struct sk_buff {
 
 	void			(*destructor)(struct sk_buff *skb);
 #ifdef CONFIG_NETFILTER
-	__u32			nfmark;
 	struct nf_conntrack	*nfct;
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	struct sk_buff		*nfct_reasm;
@@ -295,6 +294,7 @@ struct sk_buff {
 #ifdef CONFIG_BRIDGE_NETFILTER
 	struct nf_bridge_info	*nf_bridge;
 #endif
+	__u32			nfmark;
 #endif /* CONFIG_NETFILTER */
 #ifdef CONFIG_NET_SCHED
 	__u16			tc_index;	/* traffic control index */
@@ -321,6 +321,7 @@ struct sk_buff {
 
 #include <asm/system.h>
 
+extern void kfree_skb(struct sk_buff *skb);
 extern void	       __kfree_skb(struct sk_buff *skb);
 extern struct sk_buff *__alloc_skb(unsigned int size,
 				   gfp_t priority, int fclone);
@@ -361,6 +362,13 @@ extern void	      skb_over_panic(struct sk_buff *skb, int len,
 				     void *here);
 extern void	      skb_under_panic(struct sk_buff *skb, int len,
 				      void *here);
+extern void	      skb_truesize_bug(struct sk_buff *skb);
+
+static inline void skb_truesize_check(struct sk_buff *skb)
+{
+	if (unlikely((int)skb->truesize < sizeof(struct sk_buff) + skb->len))
+		skb_truesize_bug(skb);
+}
 
 extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
 			int getfrag(void *from, char *to, int offset,
@@ -422,22 +430,6 @@ static inline struct sk_buff *skb_get(struct sk_buff *skb)
  */
 
 /**
- *	kfree_skb - free an sk_buff
- *	@skb: buffer to free
- *
- *	Drop a reference to the buffer and free it if the usage count has
- *	hit zero.
- */
-static inline void kfree_skb(struct sk_buff *skb)
-{
-	if (likely(atomic_read(&skb->users) == 1))
-		smp_rmb();
-	else if (likely(!atomic_dec_and_test(&skb->users)))
-		return;
-	__kfree_skb(skb);
-}
-
-/**
  *	skb_cloned - is the buffer a clone
  *	@skb: buffer to check
  *
@@ -974,16 +966,34 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 #define NET_IP_ALIGN	2
 #endif
 
-extern int ___pskb_trim(struct sk_buff *skb, unsigned int len);
+/*
+ * The networking layer reserves some headroom in skb data (via
+ * dev_alloc_skb). This is used to avoid having to reallocate skb data when
+ * the header has to grow. In the default case, if the header has to grow
+ * 16 bytes or less we avoid the reallocation.
+ *
+ * Unfortunately this headroom changes the DMA alignment of the resulting
+ * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive
+ * on some architectures. An architecture can override this value,
+ * perhaps setting it to a cacheline in size (since that will maintain
+ * cacheline alignment of the DMA). It must be a power of 2.
+ *
+ * Various parts of the networking layer expect at least 16 bytes of
+ * headroom, you should not reduce this.
+ */
+#ifndef NET_SKB_PAD
+#define NET_SKB_PAD	16
+#endif
+
+extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
 
 static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
 {
-	if (unlikely(skb->data_len)) {
-		WARN_ON(1);
-		return;
-	}
-	skb->len  = len;
-	skb->tail = skb->data + len;
+	if (!skb->data_len) {
+		skb->len  = len;
+		skb->tail = skb->data + len;
+	} else
+		___pskb_trim(skb, len, 0);
 }
 
 /**
@@ -993,7 +1003,6 @@ static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
  *
  *	Cut the length of a buffer down by removing data from the tail. If
  *	the buffer is already under the length specified it is not modified.
- *	The skb must be linear.
  */
 static inline void skb_trim(struct sk_buff *skb, unsigned int len)
 {
@@ -1004,10 +1013,12 @@ static inline void skb_trim(struct sk_buff *skb, unsigned int len)
 
 static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
 {
-	if (skb->data_len)
-		return ___pskb_trim(skb, len);
-	__skb_trim(skb, len);
-	return 0;
+	if (!skb->data_len) {
+		skb->len  = len;
+		skb->tail = skb->data+len;
+		return 0;
+	}
+	return ___pskb_trim(skb, len, 1);
 }
 
 static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
@@ -1063,9 +1074,9 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
 static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
 					      gfp_t gfp_mask)
 {
-	struct sk_buff *skb = alloc_skb(length + 16, gfp_mask);
+	struct sk_buff *skb = alloc_skb(length + NET_SKB_PAD, gfp_mask);
 	if (likely(skb))
-		skb_reserve(skb, 16);
+		skb_reserve(skb, NET_SKB_PAD);
 	return skb;
 }
 #else
@@ -1103,13 +1114,15 @@ static inline struct sk_buff *dev_alloc_skb(unsigned int length)
  */
 static inline int skb_cow(struct sk_buff *skb, unsigned int headroom)
 {
-	int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
+	int delta = (headroom > NET_SKB_PAD ? headroom : NET_SKB_PAD) -
+			skb_headroom(skb);
 
 	if (delta < 0)
 		delta = 0;
 
 	if (delta || skb_cloned(skb))
-		return pskb_expand_head(skb, (delta + 15) & ~15, 0, GFP_ATOMIC);
+		return pskb_expand_head(skb, (delta + (NET_SKB_PAD-1)) &
+				~(NET_SKB_PAD-1), 0, GFP_ATOMIC);
 	return 0;
 }
 
@@ -1208,12 +1221,14 @@ static inline int skb_linearize_cow(struct sk_buff *skb)
  */
 
 static inline void skb_postpull_rcsum(struct sk_buff *skb,
-					 const void *start, int len)
+				      const void *start, unsigned int len)
 {
 	if (skb->ip_summed == CHECKSUM_HW)
 		skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0));
 }
 
+unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
+
 /**
  *	pskb_trim_rcsum - trim received skb and update checksum
  *	@skb: buffer to trim
@@ -1386,16 +1401,6 @@ static inline void nf_conntrack_put_reasm(struct sk_buff *skb)
 		kfree_skb(skb);
 }
 #endif
-static inline void nf_reset(struct sk_buff *skb)
-{
-	nf_conntrack_put(skb->nfct);
-	skb->nfct = NULL;
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-	nf_conntrack_put_reasm(skb->nfct_reasm);
-	skb->nfct_reasm = NULL;
-#endif
-}
-
 #ifdef CONFIG_BRIDGE_NETFILTER
 static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
 {
@@ -1408,6 +1413,20 @@ static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge)
 		atomic_inc(&nf_bridge->use);
 }
 #endif /* CONFIG_BRIDGE_NETFILTER */
+static inline void nf_reset(struct sk_buff *skb)
+{
+	nf_conntrack_put(skb->nfct);
+	skb->nfct = NULL;
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+	nf_conntrack_put_reasm(skb->nfct_reasm);
+	skb->nfct_reasm = NULL;
+#endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+	nf_bridge_put(skb->nf_bridge);
+	skb->nf_bridge = NULL;
+#endif
+}
+
 #else /* CONFIG_NETFILTER */
 static inline void nf_reset(struct sk_buff *skb) {}
 #endif /* CONFIG_NETFILTER */
diff --git a/linux-2.6-xen-sparse/kernel/fork.c b/linux-2.6-xen-sparse/kernel/fork.c
index 78ea997b3a..baec993005 100644
--- a/linux-2.6-xen-sparse/kernel/fork.c
+++ b/linux-2.6-xen-sparse/kernel/fork.c
@@ -84,7 +84,7 @@ static kmem_cache_t *task_struct_cachep;
 #endif
 
 /* SLAB cache for signal_struct structures (tsk->signal) */
-kmem_cache_t *signal_cachep;
+static kmem_cache_t *signal_cachep;
 
 /* SLAB cache for sighand_struct structures (tsk->sighand) */
 kmem_cache_t *sighand_cachep;
@@ -108,16 +108,12 @@ void free_task(struct task_struct *tsk)
 }
 EXPORT_SYMBOL(free_task);
 
-void __put_task_struct_cb(struct rcu_head *rhp)
+void __put_task_struct(struct task_struct *tsk)
 {
-	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
-
 	WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
 	WARN_ON(atomic_read(&tsk->usage));
 	WARN_ON(tsk == current);
 
-	if (unlikely(tsk->audit_context))
-		audit_free(tsk);
 	security_task_free(tsk);
 	free_uid(tsk->user);
 	put_group_info(tsk->group_info);
@@ -181,6 +177,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 	/* One for us, one for whoever does the "release_task()" (usually parent) */
 	atomic_set(&tsk->usage,2);
 	atomic_set(&tsk->fs_excl, 0);
+	tsk->btrace_seq = 0;
+	tsk->splice_pipe = NULL;
 	return tsk;
 }
 
@@ -610,12 +608,12 @@ static struct files_struct *alloc_files(void)
 	atomic_set(&newf->count, 1);
 
 	spin_lock_init(&newf->file_lock);
+	newf->next_fd = 0;
 	fdt = &newf->fdtab;
-	fdt->next_fd = 0;
 	fdt->max_fds = NR_OPEN_DEFAULT;
-	fdt->max_fdset = __FD_SETSIZE;
-	fdt->close_on_exec = &newf->close_on_exec_init;
-	fdt->open_fds = &newf->open_fds_init;
+	fdt->max_fdset = EMBEDDED_FD_SET_SIZE;
+	fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;
+	fdt->open_fds = (fd_set *)&newf->open_fds_init;
 	fdt->fd = &newf->fd_array[0];
 	INIT_RCU_HEAD(&fdt->rcu);
 	fdt->free_files = NULL;
@@ -771,8 +769,7 @@ int unshare_files(void)
 	struct files_struct *files  = current->files;
 	int rc;
 
-	if(!files)
-		BUG();
+	BUG_ON(!files);
 
 	/* This can race but the race causes us to copy when we don't
 	   need to and drop the copy */
@@ -789,14 +786,6 @@ int unshare_files(void)
 
 EXPORT_SYMBOL(unshare_files);
 
-void sighand_free_cb(struct rcu_head *rhp)
-{
-	struct sighand_struct *sp;
-
-	sp = container_of(rhp, struct sighand_struct, rcu);
-	kmem_cache_free(sighand_cachep, sp);
-}
-
 static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
 {
 	struct sighand_struct *sig;
@@ -809,12 +798,17 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t
 	rcu_assign_pointer(tsk->sighand, sig);
 	if (!sig)
 		return -ENOMEM;
-	spin_lock_init(&sig->siglock);
 	atomic_set(&sig->count, 1);
 	memcpy(sig->action, current->sighand->action, sizeof(sig->action));
 	return 0;
 }
 
+void __cleanup_sighand(struct sighand_struct *sighand)
+{
+	if (atomic_dec_and_test(&sighand->count))
+		kmem_cache_free(sighand_cachep, sighand);
+}
+
 static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk)
 {
 	struct signal_struct *sig;
@@ -850,7 +844,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
 	hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_REL);
 	sig->it_real_incr.tv64 = 0;
 	sig->real_timer.function = it_real_fn;
-	sig->real_timer.data = tsk;
+	sig->tsk = tsk;
 
 	sig->it_virt_expires = cputime_zero;
 	sig->it_virt_incr = cputime_zero;
@@ -884,6 +878,22 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
 	return 0;
 }
 
+void __cleanup_signal(struct signal_struct *sig)
+{
+	exit_thread_group_keys(sig);
+	kmem_cache_free(signal_cachep, sig);
+}
+
+static inline void cleanup_signal(struct task_struct *tsk)
+{
+	struct signal_struct *sig = tsk->signal;
+
+	atomic_dec(&sig->live);
+
+	if (atomic_dec_and_test(&sig->count))
+		__cleanup_signal(sig);
+}
+
 static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
 {
 	unsigned long new_flags = p->flags;
@@ -1023,6 +1033,7 @@ static task_t *copy_process(unsigned long clone_flags,
  		p->mempolicy = NULL;
  		goto bad_fork_cleanup_cpuset;
  	}
+	mpol_fix_fork_child_flag(p);
 #endif
 
 #ifdef CONFIG_DEBUG_MUTEXES
@@ -1063,7 +1074,10 @@ static task_t *copy_process(unsigned long clone_flags,
 	 * Clear TID on mm_release()?
 	 */
 	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
-
+	p->robust_list = NULL;
+#ifdef CONFIG_COMPAT
+	p->compat_robust_list = NULL;
+#endif
 	/*
 	 * sigaltstack should be cleared when sharing the same VM
 	 */
@@ -1094,6 +1108,7 @@ static task_t *copy_process(unsigned long clone_flags,
 	 * We dont wake it up yet.
 	 */
 	p->group_leader = p;
+	INIT_LIST_HEAD(&p->thread_group);
 	INIT_LIST_HEAD(&p->ptrace_children);
 	INIT_LIST_HEAD(&p->ptrace_list);
 
@@ -1117,16 +1132,6 @@ static task_t *copy_process(unsigned long clone_flags,
 			!cpu_online(task_cpu(p))))
 		set_task_cpu(p, smp_processor_id());
 
-	/*
-	 * Check for pending SIGKILL! The new thread should not be allowed
-	 * to slip out of an OOM kill. (or normal SIGKILL.)
-	 */
-	if (sigismember(&current->pending.signal, SIGKILL)) {
-		write_unlock_irq(&tasklist_lock);
-		retval = -EINTR;
-		goto bad_fork_cleanup_namespace;
-	}
-
 	/* CLONE_PARENT re-uses the old parent */
 	if (clone_flags & (CLONE_PARENT|CLONE_THREAD))
 		p->real_parent = current->real_parent;
@@ -1135,6 +1140,23 @@ static task_t *copy_process(unsigned long clone_flags,
 	p->parent = p->real_parent;
 
 	spin_lock(&current->sighand->siglock);
+
+	/*
+	 * Process group and session signals need to be delivered to just the
+	 * parent before the fork or both the parent and the child after the
+	 * fork. Restart if a signal comes in before we add the new process to
+	 * it's process group.
+	 * A fatal signal pending means that current will exit, so the new
+	 * thread can't slip out of an OOM kill (or normal SIGKILL).
+ 	 */
+ 	recalc_sigpending();
+	if (signal_pending(current)) {
+		spin_unlock(&current->sighand->siglock);
+		write_unlock_irq(&tasklist_lock);
+		retval = -ERESTARTNOINTR;
+		goto bad_fork_cleanup_namespace;
+	}
+
 	if (clone_flags & CLONE_THREAD) {
 		/*
 		 * Important: if an exit-all has been started then
@@ -1147,17 +1169,9 @@ static task_t *copy_process(unsigned long clone_flags,
 			retval = -EAGAIN;
 			goto bad_fork_cleanup_namespace;
 		}
-		p->group_leader = current->group_leader;
 
-		if (current->signal->group_stop_count > 0) {
-			/*
-			 * There is an all-stop in progress for the group.
-			 * We ourselves will stop as soon as we check signals.
-			 * Make the new thread part of that group stop too.
-			 */
-			current->signal->group_stop_count++;
-			set_tsk_thread_flag(p, TIF_SIGPENDING);
-		}
+		p->group_leader = current->group_leader;
+		list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
 
 		if (!cputime_eq(current->signal->it_virt_expires,
 				cputime_zero) ||
@@ -1180,23 +1194,25 @@ static task_t *copy_process(unsigned long clone_flags,
 	 */
 	p->ioprio = current->ioprio;
 
-	SET_LINKS(p);
-	if (unlikely(p->ptrace & PT_PTRACED))
-		__ptrace_link(p, current->parent);
-
-	if (thread_group_leader(p)) {
-		p->signal->tty = current->signal->tty;
-		p->signal->pgrp = process_group(current);
-		p->signal->session = current->signal->session;
-		attach_pid(p, PIDTYPE_PGID, process_group(p));
-		attach_pid(p, PIDTYPE_SID, p->signal->session);
-		if (p->pid)
+	if (likely(p->pid)) {
+		add_parent(p);
+		if (unlikely(p->ptrace & PT_PTRACED))
+			__ptrace_link(p, current->parent);
+
+		if (thread_group_leader(p)) {
+			p->signal->tty = current->signal->tty;
+			p->signal->pgrp = process_group(current);
+			p->signal->session = current->signal->session;
+			attach_pid(p, PIDTYPE_PGID, process_group(p));
+			attach_pid(p, PIDTYPE_SID, p->signal->session);
+
+			list_add_tail_rcu(&p->tasks, &init_task.tasks);
 			__get_cpu_var(process_counts)++;
+		}
+		attach_pid(p, PIDTYPE_PID, p->pid);
+		nr_threads++;
 	}
-	attach_pid(p, PIDTYPE_TGID, p->tgid);
-	attach_pid(p, PIDTYPE_PID, p->pid);
 
-	nr_threads++;
 	total_forks++;
 	spin_unlock(&current->sighand->siglock);
 	write_unlock_irq(&tasklist_lock);
@@ -1211,9 +1227,9 @@ bad_fork_cleanup_mm:
 	if (p->mm)
 		mmput(p->mm);
 bad_fork_cleanup_signal:
-	exit_signal(p);
+	cleanup_signal(p);
 bad_fork_cleanup_sighand:
-	exit_sighand(p);
+	__cleanup_sighand(p->sighand);
 bad_fork_cleanup_fs:
 	exit_fs(p); /* blocking */
 bad_fork_cleanup_files:
@@ -1260,7 +1276,7 @@ task_t * __devinit fork_idle(int cpu)
 	if (!task)
 		return ERR_PTR(-ENOMEM);
 	init_idle(task, cpu);
-	unhash_process(task);
+
 	return task;
 }
 
@@ -1295,17 +1311,19 @@ long do_fork(unsigned long clone_flags,
 {
 	struct task_struct *p;
 	int trace = 0;
-	long pid = alloc_pidmap();
+	struct pid *pid = alloc_pid();
+	long nr;
 
-	if (pid < 0)
+	if (!pid)
 		return -EAGAIN;
+	nr = pid->nr;
 	if (unlikely(current->ptrace)) {
 		trace = fork_traceflag (clone_flags);
 		if (trace)
 			clone_flags |= CLONE_PTRACE;
 	}
 
-	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
+	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, nr);
 	/*
 	 * Do this prior waking up the new thread - the thread pointer
 	 * might get invalid after that point, if the thread exits quickly.
@@ -1332,7 +1350,7 @@ long do_fork(unsigned long clone_flags,
 			p->state = TASK_STOPPED;
 
 		if (unlikely (trace)) {
-			current->ptrace_message = pid;
+			current->ptrace_message = nr;
 			ptrace_notify ((trace << 8) | SIGTRAP);
 		}
 
@@ -1342,21 +1360,31 @@ long do_fork(unsigned long clone_flags,
 				ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
 		}
 	} else {
-		free_pidmap(pid);
-		pid = PTR_ERR(p);
+		free_pid(pid);
+		nr = PTR_ERR(p);
 	}
-	return pid;
+	return nr;
 }
 
 #ifndef ARCH_MIN_MMSTRUCT_ALIGN
 #define ARCH_MIN_MMSTRUCT_ALIGN 0
 #endif
 
+static void sighand_ctor(void *data, kmem_cache_t *cachep, unsigned long flags)
+{
+	struct sighand_struct *sighand = data;
+
+	if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
+					SLAB_CTOR_CONSTRUCTOR)
+		spin_lock_init(&sighand->siglock);
+}
+
 void __init proc_caches_init(void)
 {
 	sighand_cachep = kmem_cache_create("sighand_cache",
 			sizeof(struct sighand_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU,
+			sighand_ctor, NULL);
 	signal_cachep = kmem_cache_create("signal_cache",
 			sizeof(struct signal_struct), 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
@@ -1537,6 +1565,12 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
 
 	check_unshare_flags(&unshare_flags);
 
+	/* Return -EINVAL for all unsupported flags */
+	err = -EINVAL;
+	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
+				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM))
+		goto bad_unshare_out;
+
 	if ((err = unshare_thread(unshare_flags)))
 		goto bad_unshare_out;
 	if ((err = unshare_fs(unshare_flags, &new_fs)))
diff --git a/linux-2.6-xen-sparse/lib/Makefile b/linux-2.6-xen-sparse/lib/Makefile
index 1f96de0517..30ddd47946 100644
--- a/linux-2.6-xen-sparse/lib/Makefile
+++ b/linux-2.6-xen-sparse/lib/Makefile
@@ -7,6 +7,8 @@ lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \
 	 idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \
 	 sha1.o
 
+lib-$(CONFIG_SMP) += cpumask.o
+
 lib-y	+= kobject.o kref.o kobject_uevent.o klist.o
 
 obj-y += sort.o parser.o halfmd4.o iomap_copy.o
@@ -21,6 +23,7 @@ lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
 lib-$(CONFIG_SEMAPHORE_SLEEPERS) += semaphore-sleepers.o
 lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
+lib-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 
diff --git a/linux-2.6-xen-sparse/mm/Kconfig b/linux-2.6-xen-sparse/mm/Kconfig
index f54f49fbb6..32bd2d6bc5 100644
--- a/linux-2.6-xen-sparse/mm/Kconfig
+++ b/linux-2.6-xen-sparse/mm/Kconfig
@@ -115,7 +115,7 @@ config SPARSEMEM_EXTREME
 # eventually, we can have this option just 'select SPARSEMEM'
 config MEMORY_HOTPLUG
 	bool "Allow for memory hot-add"
-	depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND && ARCH_ENABLE_MEMORY_HOTPLUG
+	depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND
 
 comment "Memory hotplug is currently incompatible with Software Suspend"
 	depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND
@@ -140,5 +140,11 @@ config SPLIT_PTLOCK_CPUS
 # support for page migration
 #
 config MIGRATION
-	def_bool y if NUMA || SPARSEMEM || DISCONTIGMEM
-	depends on SWAP
+	bool "Page migration"
+	def_bool y if NUMA
+	depends on SWAP && NUMA
+	help
+	  Allows the migration of the physical location of pages of processes
+	  while the virtual addresses are not changed. This is useful for
+	  example on NUMA systems to put pages nearer to the processors accessing
+	  the page.
diff --git a/linux-2.6-xen-sparse/mm/highmem.c b/linux-2.6-xen-sparse/mm/highmem.c
index b29bf621ec..a02c6dec91 100644
--- a/linux-2.6-xen-sparse/mm/highmem.c
+++ b/linux-2.6-xen-sparse/mm/highmem.c
@@ -26,18 +26,14 @@
 #include <linux/init.h>
 #include <linux/hash.h>
 #include <linux/highmem.h>
+#include <linux/blktrace_api.h>
 #include <asm/tlbflush.h>
 
 static mempool_t *page_pool, *isa_page_pool;
 
-static void *page_pool_alloc_isa(gfp_t gfp_mask, void *data)
+static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
 {
-	return alloc_page(gfp_mask | GFP_DMA);
-}
-
-static void page_pool_free(void *page, void *data)
-{
-	__free_page(page);
+	return mempool_alloc_pages(gfp_mask | GFP_DMA, data);
 }
 
 /*
@@ -50,11 +46,6 @@ static void page_pool_free(void *page, void *data)
  */
 #ifdef CONFIG_HIGHMEM
 
-static void *page_pool_alloc(gfp_t gfp_mask, void *data)
-{
-	return alloc_page(gfp_mask);
-}
-
 static int pkmap_count[LAST_PKMAP];
 static unsigned int last_pkmap_nr;
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
@@ -83,8 +74,7 @@ static void flush_all_zero_pkmaps(void)
 		pkmap_count[i] = 0;
 
 		/* sanity check */
-		if (pte_none(pkmap_page_table[i]))
-			BUG();
+		BUG_ON(pte_none(pkmap_page_table[i]));
 
 		/*
 		 * Don't need an atomic fetch-and-clear op here;
@@ -178,8 +168,7 @@ void fastcall *kmap_high(struct page *page)
 	if (!vaddr)
 		vaddr = map_new_virtual(page);
 	pkmap_count[PKMAP_NR(vaddr)]++;
-	if (pkmap_count[PKMAP_NR(vaddr)] < 2)
-		BUG();
+	BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 2);
 	spin_unlock(&kmap_lock);
 	return (void*) vaddr;
 }
@@ -194,8 +183,7 @@ void fastcall kunmap_high(struct page *page)
 
 	spin_lock(&kmap_lock);
 	vaddr = (unsigned long)page_address(page);
-	if (!vaddr)
-		BUG();
+	BUG_ON(!vaddr);
 	nr = PKMAP_NR(vaddr);
 
 	/*
@@ -239,9 +227,8 @@ static __init int init_emergency_pool(void)
 	if (!i.totalhigh)
 		return 0;
 
-	page_pool = mempool_create(POOL_SIZE, page_pool_alloc, page_pool_free, NULL);
-	if (!page_pool)
-		BUG();
+	page_pool = mempool_create_page_pool(POOL_SIZE, 0);
+	BUG_ON(!page_pool);
 	printk("highmem bounce pool size: %d pages\n", POOL_SIZE);
 
 	return 0;
@@ -282,9 +269,9 @@ int init_emergency_isa_pool(void)
 	if (isa_page_pool)
 		return 0;
 
-	isa_page_pool = mempool_create(ISA_POOL_SIZE, page_pool_alloc_isa, page_pool_free, NULL);
-	if (!isa_page_pool)
-		BUG();
+	isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa,
+				       mempool_free_pages, (void *) 0);
+	BUG_ON(!isa_page_pool);
 
 	printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE);
 	return 0;
@@ -347,7 +334,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
 	bio_put(bio);
 }
 
-static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done,int err)
+static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
 {
 	if (bio->bi_size)
 		return 1;
@@ -394,7 +381,7 @@ static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int
 }
 
 static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
-			mempool_t *pool)
+			       mempool_t *pool)
 {
 	struct page *page;
 	struct bio *bio = NULL;
@@ -494,6 +481,8 @@ void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
 		pool = isa_page_pool;
 	}
 
+	blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
+
 	/*
 	 * slow path
 	 */
diff --git a/linux-2.6-xen-sparse/mm/memory.c b/linux-2.6-xen-sparse/mm/memory.c
index d44d1ce44f..892a807b37 100644
--- a/linux-2.6-xen-sparse/mm/memory.c
+++ b/linux-2.6-xen-sparse/mm/memory.c
@@ -87,7 +87,7 @@ int randomize_va_space __read_mostly = 1;
 static int __init disable_randmaps(char *s)
 {
 	randomize_va_space = 0;
-	return 0;
+	return 1;
 }
 __setup("norandmaps", disable_randmaps);
 
@@ -277,7 +277,7 @@ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
 		anon_vma_unlink(vma);
 		unlink_file_vma(vma);
 
-		if (is_hugepage_only_range(vma->vm_mm, addr, HPAGE_SIZE)) {
+		if (is_vm_hugetlb_page(vma)) {
 			hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
 				floor, next? next->vm_start: ceiling);
 		} else {
@@ -285,8 +285,7 @@ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
 			 * Optimization: gather nearby vmas into one call down
 			 */
 			while (next && next->vm_start <= vma->vm_end + PMD_SIZE
-			  && !is_hugepage_only_range(vma->vm_mm, next->vm_start,
-							HPAGE_SIZE)) {
+			       && !is_vm_hugetlb_page(next)) {
 				vma = next;
 				next = vma->vm_next;
 				anon_vma_unlink(vma);
@@ -388,7 +387,7 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_
 {
 	unsigned long pfn = pte_pfn(pte);
 
-	if (vma->vm_flags & VM_PFNMAP) {
+	if (unlikely(vma->vm_flags & VM_PFNMAP)) {
 		unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT;
 		if (pfn == vma->vm_pgoff + off)
 			return NULL;
@@ -401,8 +400,6 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_
 	 * we should just do "return pfn_to_page(pfn)", but
 	 * in the meantime we check that we get a valid pfn,
 	 * and that the resulting page looks ok.
-	 *
-	 * Remove this test eventually!
 	 */
 	if (unlikely(!pfn_valid(pfn))) {
 		if (!(vma->vm_flags & VM_RESERVED))
@@ -1096,6 +1093,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			}
 			if (pages) {
 				pages[i] = page;
+
+				flush_anon_page(page, start);
 				flush_dcache_page(page);
 			}
 			if (vmas)
@@ -1243,9 +1242,7 @@ out:
  * The page has to be a nice clean _individual_ kernel allocation.
  * If you allocate a compound page, you need to have marked it as
  * such (__GFP_COMP), or manually just split the page up yourself
- * (which is mainly an issue of doing "set_page_count(page, 1)" for
- * each sub-page, and then freeing them one by one when you free
- * them rather than freeing it as a compound page).
+ * (see split_page()).
  *
  * NOTE! Traditionally this was done with "remap_pfn_range()" which
  * took an arbitrary page protection parameter. This doesn't allow
@@ -1536,7 +1533,6 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo
 		if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
 			memset(kaddr, 0, PAGE_SIZE);
 		kunmap_atomic(kaddr, KM_USER0);
-		flush_dcache_page(dst);
 		return;
 		
 	}
@@ -2476,10 +2472,8 @@ int make_pages_present(unsigned long addr, unsigned long end)
 	if (!vma)
 		return -1;
 	write = (vma->vm_flags & VM_WRITE) != 0;
-	if (addr >= end)
-		BUG();
-	if (end > vma->vm_end)
-		BUG();
+	BUG_ON(addr >= end);
+	BUG_ON(end > vma->vm_end);
 	len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE;
 	ret = get_user_pages(current, current->mm, addr,
 			len, write, 0, NULL, NULL);
diff --git a/linux-2.6-xen-sparse/mm/mmap.c b/linux-2.6-xen-sparse/mm/mmap.c
index f1b2f0f0ed..3346589fe8 100644
--- a/linux-2.6-xen-sparse/mm/mmap.c
+++ b/linux-2.6-xen-sparse/mm/mmap.c
@@ -30,10 +30,6 @@
 #include <asm/cacheflush.h>
 #include <asm/tlb.h>
 
-#ifndef arch_mmap_check
-#define arch_mmap_check(addr, len, flags)	(0)
-#endif
-
 static void unmap_region(struct mm_struct *mm,
 		struct vm_area_struct *vma, struct vm_area_struct *prev,
 		unsigned long start, unsigned long end);
@@ -125,14 +121,26 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
 		 * only call if we're about to fail.
 		 */
 		n = nr_free_pages();
+
+		/*
+		 * Leave reserved pages. The pages are not for anonymous pages.
+		 */
+		if (n <= totalreserve_pages)
+			goto error;
+		else
+			n -= totalreserve_pages;
+
+		/*
+		 * Leave the last 3% for root
+		 */
 		if (!cap_sys_admin)
 			n -= n / 32;
 		free += n;
 
 		if (free > pages)
 			return 0;
-		vm_unacct_memory(pages);
-		return -ENOMEM;
+
+		goto error;
 	}
 
 	allowed = (totalram_pages - hugetlb_total_pages())
@@ -154,7 +162,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
 	 */
 	if (atomic_read(&vm_committed_space) < (long)allowed)
 		return 0;
-
+error:
 	vm_unacct_memory(pages);
 
 	return -ENOMEM;
@@ -224,6 +232,17 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
 
 	if (brk < mm->end_code)
 		goto out;
+
+	/*
+	 * Check against rlimit here. If this check is done later after the test
+	 * of oldbrk with newbrk then it can escape the test and let the data
+	 * segment grow beyond its set limit the in case where the limit is
+	 * not page aligned -Ram Gupta
+	 */
+	rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
+	if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
+		goto out;
+
 	newbrk = PAGE_ALIGN(brk);
 	oldbrk = PAGE_ALIGN(mm->brk);
 	if (oldbrk == newbrk)
@@ -236,11 +255,6 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
 		goto out;
 	}
 
-	/* Check against rlimit.. */
-	rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
-	if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
-		goto out;
-
 	/* Check against existing mmap mappings. */
 	if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
 		goto out;
@@ -298,8 +312,7 @@ void validate_mm(struct mm_struct *mm)
 	i = browse_rb(&mm->mm_rb);
 	if (i != mm->map_count)
 		printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
-	if (bug)
-		BUG();
+	BUG_ON(bug);
 }
 #else
 #define validate_mm(mm) do { } while (0)
@@ -436,8 +449,7 @@ __insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
 	struct rb_node ** rb_link, * rb_parent;
 
 	__vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent);
-	if (__vma && __vma->vm_start < vma->vm_end)
-		BUG();
+	BUG_ON(__vma && __vma->vm_start < vma->vm_end);
 	__vma_link(mm, vma, prev, rb_link, rb_parent);
 	mm->map_count++;
 }
@@ -616,7 +628,7 @@ again:			remove_next = 1 + (end > next->vm_end);
  * If the vma has a ->close operation then the driver probably needs to release
  * per-vma resources, so we don't attempt to merge those.
  */
-#define VM_SPECIAL (VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
+#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
 
 static inline int is_mergeable_vma(struct vm_area_struct *vma,
 			struct file *file, unsigned long vm_flags)
@@ -817,8 +829,7 @@ try_prev:
 	 * (e.g. stash info in next's anon_vma_node when assigning
 	 * an anon_vma, or when trying vma_merge).  Another time.
 	 */
-	if (find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma)
-		BUG();
+	BUG_ON(find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma);
 	if (!near)
 		goto none;
 
@@ -849,14 +860,6 @@ void vm_stat_account(struct mm_struct *mm, unsigned long flags,
 	const unsigned long stack_flags
 		= VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
 
-#ifdef CONFIG_HUGETLB
-	if (flags & VM_HUGETLB) {
-		if (!(flags & VM_DONTCOPY))
-			mm->shared_vm += pages;
-		return;
-	}
-#endif /* CONFIG_HUGETLB */
-
 	if (file) {
 		mm->shared_vm += pages;
 		if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
@@ -910,10 +913,6 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
 	if (!len)
 		return -EINVAL;
 
-	error = arch_mmap_check(addr, len, flags);
-	if (error)
-		return error;
-
 	/* Careful about overflows.. */
 	len = PAGE_ALIGN(len);
 	if (!len || len > TASK_SIZE)
@@ -1056,12 +1055,11 @@ munmap_back:
 	 * specific mapper. the address has already been validated, but
 	 * not unmapped, but the maps are removed from the list.
 	 */
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 	if (!vma) {
 		error = -ENOMEM;
 		goto unacct_error;
 	}
-	memset(vma, 0, sizeof(*vma));
 
 	vma->vm_mm = mm;
 	vma->vm_start = addr;
@@ -1854,7 +1852,6 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
 	unsigned long flags;
 	struct rb_node ** rb_link, * rb_parent;
 	pgoff_t pgoff = addr >> PAGE_SHIFT;
-	int error;
 
 	len = PAGE_ALIGN(len);
 	if (!len)
@@ -1863,12 +1860,6 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
 	if ((addr + len) > TASK_SIZE || (addr + len) < addr)
 		return -EINVAL;
 
-	flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
-
-	error = arch_mmap_check(addr, len, flags);
-	if (error)
-		return error;
-
 	/*
 	 * mlock MCL_FUTURE?
 	 */
@@ -1909,6 +1900,8 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
 	if (security_vm_enough_memory(len >> PAGE_SHIFT))
 		return -ENOMEM;
 
+	flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
+
 	/* Can we just expand an old private anonymous mapping? */
 	if (vma_merge(mm, prev, addr, addr + len, flags,
 					NULL, NULL, pgoff, NULL))
@@ -1917,12 +1910,11 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
 	/*
 	 * create a vma struct for an anonymous mapping
 	 */
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 	if (!vma) {
 		vm_unacct_memory(len >> PAGE_SHIFT);
 		return -ENOMEM;
 	}
-	memset(vma, 0, sizeof(*vma));
 
 	vma->vm_mm = mm;
 	vma->vm_start = addr;
diff --git a/linux-2.6-xen-sparse/mm/page_alloc.c b/linux-2.6-xen-sparse/mm/page_alloc.c
index f12323955a..dfbd5f418f 100644
--- a/linux-2.6-xen-sparse/mm/page_alloc.c
+++ b/linux-2.6-xen-sparse/mm/page_alloc.c
@@ -39,6 +39,7 @@
 #include <linux/mempolicy.h>
 
 #include <asm/tlbflush.h>
+#include <asm/div64.h>
 #include "internal.h"
 
 /*
@@ -49,13 +50,12 @@ nodemask_t node_online_map __read_mostly = { { [0] = 1UL } };
 EXPORT_SYMBOL(node_online_map);
 nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
 EXPORT_SYMBOL(node_possible_map);
-struct pglist_data *pgdat_list __read_mostly;
 unsigned long totalram_pages __read_mostly;
 unsigned long totalhigh_pages __read_mostly;
+unsigned long totalreserve_pages __read_mostly;
 long nr_swap_pages;
 int percpu_pagelist_fraction;
 
-static void fastcall free_hot_cold_page(struct page *page, int cold);
 static void __free_pages_ok(struct page *page, unsigned int order);
 
 /*
@@ -191,7 +191,7 @@ static void prep_compound_page(struct page *page, unsigned long order)
 	for (i = 0; i < nr_pages; i++) {
 		struct page *p = page + i;
 
-		SetPageCompound(p);
+		__SetPageCompound(p);
 		set_page_private(p, (unsigned long)page);
 	}
 }
@@ -210,20 +210,36 @@ static void destroy_compound_page(struct page *page, unsigned long order)
 		if (unlikely(!PageCompound(p) |
 				(page_private(p) != (unsigned long)page)))
 			bad_page(page);
-		ClearPageCompound(p);
+		__ClearPageCompound(p);
 	}
 }
 
+static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
+{
+	int i;
+
+	BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM);
+	/*
+	 * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO
+	 * and __GFP_HIGHMEM from hard or soft interrupt context.
+	 */
+	BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt());
+	for (i = 0; i < (1 << order); i++)
+		clear_highpage(page + i);
+}
+
 /*
  * function for dealing with page's order in buddy system.
  * zone->lock is already acquired when we use these.
  * So, we don't need atomic page->flags operations here.
  */
-static inline unsigned long page_order(struct page *page) {
+static inline unsigned long page_order(struct page *page)
+{
 	return page_private(page);
 }
 
-static inline void set_page_order(struct page *page, int order) {
+static inline void set_page_order(struct page *page, int order)
+{
 	set_page_private(page, order);
 	__SetPageBuddy(page);
 }
@@ -286,9 +302,9 @@ static inline int page_is_buddy(struct page *page, int order)
 
 	if (PageBuddy(page) && page_order(page) == order) {
 		BUG_ON(page_count(page) != 0);
-               return 1;
+		return 1;
 	}
-       return 0;
+	return 0;
 }
 
 /*
@@ -428,11 +444,6 @@ static void __free_pages_ok(struct page *page, unsigned int order)
 		mutex_debug_check_no_locks_freed(page_address(page),
 						 PAGE_SIZE<<order);
 
-#ifndef CONFIG_MMU
-	for (i = 1 ; i < (1 << order) ; ++i)
-		__put_page(page + i);
-#endif
-
 	for (i = 0 ; i < (1 << order) ; ++i)
 		reserved += free_pages_check(page + i);
 	if (reserved)
@@ -453,28 +464,23 @@ void fastcall __init __free_pages_bootmem(struct page *page, unsigned int order)
 	if (order == 0) {
 		__ClearPageReserved(page);
 		set_page_count(page, 0);
-
-		free_hot_cold_page(page, 0);
+		set_page_refcounted(page);
+		__free_page(page);
 	} else {
-		LIST_HEAD(list);
 		int loop;
 
+		prefetchw(page);
 		for (loop = 0; loop < BITS_PER_LONG; loop++) {
 			struct page *p = &page[loop];
 
-			if (loop + 16 < BITS_PER_LONG)
-				prefetchw(p + 16);
+			if (loop + 1 < BITS_PER_LONG)
+				prefetchw(p + 1);
 			__ClearPageReserved(p);
 			set_page_count(p, 0);
 		}
 
-		arch_free_page(page, order);
-
-		mod_page_state(pgfree, 1 << order);
-
-		list_add(&page->lru, &list);
-		kernel_map_pages(page, 1 << order, 0);
-		free_pages_bulk(page_zone(page), 1, &list, order);
+		set_page_refcounted(page);
+		__free_pages(page, order);
 	}
 }
 
@@ -512,7 +518,7 @@ static inline void expand(struct zone *zone, struct page *page,
 /*
  * This page is about to be returned from the page allocator
  */
-static int prep_new_page(struct page *page, int order)
+static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
 {
 	if (unlikely(page_mapcount(page) |
 		(page->mapping != NULL)  |
@@ -542,8 +548,15 @@ static int prep_new_page(struct page *page, int order)
 			1 << PG_referenced | 1 << PG_arch_1 |
 			1 << PG_checked | 1 << PG_mappedtodisk);
 	set_page_private(page, 0);
-	set_page_refs(page, order);
+	set_page_refcounted(page);
 	kernel_map_pages(page, 1 << order, 1);
+
+	if (gfp_flags & __GFP_ZERO)
+		prep_zero_page(page, order, gfp_flags);
+
+	if (order && (gfp_flags & __GFP_COMP))
+		prep_compound_page(page, order);
+
 	return 0;
 }
 
@@ -599,13 +612,14 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 /*
  * Called from the slab reaper to drain pagesets on a particular node that
  * belong to the currently executing processor.
+ * Note that this function must be called with the thread pinned to
+ * a single processor.
  */
 void drain_node_pages(int nodeid)
 {
 	int i, z;
 	unsigned long flags;
 
-	local_irq_save(flags);
 	for (z = 0; z < MAX_NR_ZONES; z++) {
 		struct zone *zone = NODE_DATA(nodeid)->node_zones + z;
 		struct per_cpu_pageset *pset;
@@ -615,11 +629,14 @@ void drain_node_pages(int nodeid)
 			struct per_cpu_pages *pcp;
 
 			pcp = &pset->pcp[i];
-			free_pages_bulk(zone, pcp->count, &pcp->list, 0);
-			pcp->count = 0;
+			if (pcp->count) {
+				local_irq_save(flags);
+				free_pages_bulk(zone, pcp->count, &pcp->list, 0);
+				pcp->count = 0;
+				local_irq_restore(flags);
+			}
 		}
 	}
-	local_irq_restore(flags);
 }
 #endif
 
@@ -750,13 +767,22 @@ void fastcall free_cold_page(struct page *page)
 	free_hot_cold_page(page, 1);
 }
 
-static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
+/*
+ * split_page takes a non-compound higher-order page, and splits it into
+ * n (1<<order) sub-pages: page[0..n]
+ * Each sub-page must be freed individually.
+ *
+ * Note: this is probably too low level an operation for use in drivers.
+ * Please consult with lkml before using this in your driver.
+ */
+void split_page(struct page *page, unsigned int order)
 {
 	int i;
 
-	BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM);
-	for(i = 0; i < (1 << order); i++)
-		clear_highpage(page + i);
+	BUG_ON(PageCompound(page));
+	BUG_ON(!page_count(page));
+	for (i = 1; i < (1 << order); i++)
+		set_page_refcounted(page + i);
 }
 
 /*
@@ -802,14 +828,8 @@ again:
 	put_cpu();
 
 	BUG_ON(bad_range(zone, page));
-	if (prep_new_page(page, order))
+	if (prep_new_page(page, order, gfp_flags))
 		goto again;
-
-	if (gfp_flags & __GFP_ZERO)
-		prep_zero_page(page, order, gfp_flags);
-
-	if (order && (gfp_flags & __GFP_COMP))
-		prep_compound_page(page, order);
 	return page;
 
 failed:
@@ -933,7 +953,8 @@ restart:
 		goto got_pg;
 
 	do {
-		wakeup_kswapd(*z, order);
+		if (cpuset_zone_allowed(*z, gfp_mask|__GFP_HARDWALL))
+			wakeup_kswapd(*z, order);
 	} while (*(++z));
 
 	/*
@@ -1191,7 +1212,7 @@ unsigned int nr_free_highpages (void)
 	pg_data_t *pgdat;
 	unsigned int pages = 0;
 
-	for_each_pgdat(pgdat)
+	for_each_online_pgdat(pgdat)
 		pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
 
 	return pages;
@@ -1222,24 +1243,22 @@ DEFINE_PER_CPU(long, nr_pagecache_local) = 0;
 
 static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
 {
-	int cpu = 0;
+	unsigned cpu;
 
 	memset(ret, 0, nr * sizeof(unsigned long));
 	cpus_and(*cpumask, *cpumask, cpu_online_map);
 
-	cpu = first_cpu(*cpumask);
-	while (cpu < NR_CPUS) {
-		unsigned long *in, *out, off;
-
-		if (!cpu_isset(cpu, *cpumask))
-			continue;
+	for_each_cpu_mask(cpu, *cpumask) {
+		unsigned long *in;
+		unsigned long *out;
+		unsigned off;
+		unsigned next_cpu;
 
 		in = (unsigned long *)&per_cpu(page_states, cpu);
 
-		cpu = next_cpu(cpu, *cpumask);
-
-		if (likely(cpu < NR_CPUS))
-			prefetch(&per_cpu(page_states, cpu));
+		next_cpu = next_cpu(cpu, *cpumask);
+		if (likely(next_cpu < NR_CPUS))
+			prefetch(&per_cpu(page_states, next_cpu));
 
 		out = (unsigned long *)ret;
 		for (off = 0; off < nr; off++)
@@ -1335,7 +1354,7 @@ void get_zone_counts(unsigned long *active,
 	*active = 0;
 	*inactive = 0;
 	*free = 0;
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		unsigned long l, m, n;
 		__get_zone_counts(&l, &m, &n, pgdat);
 		*active += l;
@@ -1772,7 +1791,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 			continue;
 		page = pfn_to_page(pfn);
 		set_page_links(page, zone, nid, pfn);
-		set_page_count(page, 1);
+		init_page_count(page);
 		reset_page_mapcount(page);
 		SetPageReserved(page);
 		INIT_LIST_HEAD(&page->lru);
@@ -1947,7 +1966,7 @@ static inline void free_zone_pagesets(int cpu)
 	}
 }
 
-static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
+static int pageset_cpuup_callback(struct notifier_block *nfb,
 		unsigned long action,
 		void *hcpu)
 {
@@ -2021,8 +2040,9 @@ static __meminit void zone_pcp_init(struct zone *zone)
 		setup_pageset(zone_pcp(zone,cpu), batch);
 #endif
 	}
-	printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%lu\n",
-		zone->name, zone->present_pages, batch);
+	if (zone->present_pages)
+		printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%lu\n",
+			zone->name, zone->present_pages, batch);
 }
 
 static __meminit void init_currently_empty_zone(struct zone *zone,
@@ -2033,7 +2053,6 @@ static __meminit void init_currently_empty_zone(struct zone *zone,
 	zone_wait_table_init(zone, size);
 	pgdat->nr_zones = zone_idx(zone) + 1;
 
-	zone->zone_mem_map = pfn_to_page(zone_start_pfn);
 	zone->zone_start_pfn = zone_start_pfn;
 
 	memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn);
@@ -2108,14 +2127,22 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat)
 #ifdef CONFIG_FLAT_NODE_MEM_MAP
 	/* ia64 gets its own node_mem_map, before this, without bootmem */
 	if (!pgdat->node_mem_map) {
-		unsigned long size;
+		unsigned long size, start, end;
 		struct page *map;
 
-		size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
+		/*
+		 * The zone's endpoints aren't required to be MAX_ORDER
+		 * aligned but the node_mem_map endpoints must be in order
+		 * for the buddy allocator to function correctly.
+		 */
+		start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);
+		end = pgdat->node_start_pfn + pgdat->node_spanned_pages;
+		end = ALIGN(end, MAX_ORDER_NR_PAGES);
+		size =  (end - start) * sizeof(struct page);
 		map = alloc_remap(pgdat->node_id, size);
 		if (!map)
 			map = alloc_bootmem_node(pgdat, size);
-		pgdat->node_mem_map = map;
+		pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
 	}
 #ifdef CONFIG_FLATMEM
 	/*
@@ -2161,8 +2188,9 @@ static void *frag_start(struct seq_file *m, loff_t *pos)
 {
 	pg_data_t *pgdat;
 	loff_t node = *pos;
-
-	for (pgdat = pgdat_list; pgdat && node; pgdat = pgdat->pgdat_next)
+	for (pgdat = first_online_pgdat();
+	     pgdat && node;
+	     pgdat = next_online_pgdat(pgdat))
 		--node;
 
 	return pgdat;
@@ -2173,7 +2201,7 @@ static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
 	pg_data_t *pgdat = (pg_data_t *)arg;
 
 	(*pos)++;
-	return pgdat->pgdat_next;
+	return next_online_pgdat(pgdat);
 }
 
 static void frag_stop(struct seq_file *m, void *arg)
@@ -2464,6 +2492,38 @@ void __init page_alloc_init(void)
 }
 
 /*
+ * calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio
+ *	or min_free_kbytes changes.
+ */
+static void calculate_totalreserve_pages(void)
+{
+	struct pglist_data *pgdat;
+	unsigned long reserve_pages = 0;
+	int i, j;
+
+	for_each_online_pgdat(pgdat) {
+		for (i = 0; i < MAX_NR_ZONES; i++) {
+			struct zone *zone = pgdat->node_zones + i;
+			unsigned long max = 0;
+
+			/* Find valid and maximum lowmem_reserve in the zone */
+			for (j = i; j < MAX_NR_ZONES; j++) {
+				if (zone->lowmem_reserve[j] > max)
+					max = zone->lowmem_reserve[j];
+			}
+
+			/* we treat pages_high as reserved pages. */
+			max += zone->pages_high;
+
+			if (max > zone->present_pages)
+				max = zone->present_pages;
+			reserve_pages += max;
+		}
+	}
+	totalreserve_pages = reserve_pages;
+}
+
+/*
  * setup_per_zone_lowmem_reserve - called whenever
  *	sysctl_lower_zone_reserve_ratio changes.  Ensures that each zone
  *	has a correct pages reserved value, so an adequate number of
@@ -2474,7 +2534,7 @@ static void setup_per_zone_lowmem_reserve(void)
 	struct pglist_data *pgdat;
 	int j, idx;
 
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		for (j = 0; j < MAX_NR_ZONES; j++) {
 			struct zone *zone = pgdat->node_zones + j;
 			unsigned long present_pages = zone->present_pages;
@@ -2494,6 +2554,9 @@ static void setup_per_zone_lowmem_reserve(void)
 			}
 		}
 	}
+
+	/* update totalreserve_pages */
+	calculate_totalreserve_pages();
 }
 
 /*
@@ -2515,9 +2578,11 @@ void setup_per_zone_pages_min(void)
 	}
 
 	for_each_zone(zone) {
-		unsigned long tmp;
+		u64 tmp;
+
 		spin_lock_irqsave(&zone->lru_lock, flags);
-		tmp = (pages_min * zone->present_pages) / lowmem_pages;
+		tmp = (u64)pages_min * zone->present_pages;
+		do_div(tmp, lowmem_pages);
 		if (is_highmem(zone)) {
 			/*
 			 * __GFP_HIGH and PF_MEMALLOC allocations usually don't
@@ -2544,10 +2609,13 @@ void setup_per_zone_pages_min(void)
 			zone->pages_min = tmp;
 		}
 
-		zone->pages_low   = zone->pages_min + tmp / 4;
-		zone->pages_high  = zone->pages_min + tmp / 2;
+		zone->pages_low   = zone->pages_min + (tmp >> 2);
+		zone->pages_high  = zone->pages_min + (tmp >> 1);
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
+
+	/* update totalreserve_pages */
+	calculate_totalreserve_pages();
 }
 
 /*
@@ -2693,8 +2761,7 @@ void *__init alloc_large_system_hash(const char *tablename,
 		else
 			numentries <<= (PAGE_SHIFT - scale);
 	}
-	/* rounded up to nearest power of 2 in size */
-	numentries = 1UL << (long_log2(numentries) + 1);
+	numentries = roundup_pow_of_two(numentries);
 
 	/* limit allocation size to 1/16 total memory by default */
 	if (max == 0) {
@@ -2737,3 +2804,44 @@ void *__init alloc_large_system_hash(const char *tablename,
 
 	return table;
 }
+
+#ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE
+/*
+ * pfn <-> page translation. out-of-line version.
+ * (see asm-generic/memory_model.h)
+ */
+#if defined(CONFIG_FLATMEM)
+struct page *pfn_to_page(unsigned long pfn)
+{
+	return mem_map + (pfn - ARCH_PFN_OFFSET);
+}
+unsigned long page_to_pfn(struct page *page)
+{
+	return (page - mem_map) + ARCH_PFN_OFFSET;
+}
+#elif defined(CONFIG_DISCONTIGMEM)
+struct page *pfn_to_page(unsigned long pfn)
+{
+	int nid = arch_pfn_to_nid(pfn);
+	return NODE_DATA(nid)->node_mem_map + arch_local_page_offset(pfn,nid);
+}
+unsigned long page_to_pfn(struct page *page)
+{
+	struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+	return (page - pgdat->node_mem_map) + pgdat->node_start_pfn;
+}
+#elif defined(CONFIG_SPARSEMEM)
+struct page *pfn_to_page(unsigned long pfn)
+{
+	return __section_mem_map_addr(__pfn_to_section(pfn)) + pfn;
+}
+
+unsigned long page_to_pfn(struct page *page)
+{
+	long section_id = page_to_section(page);
+	return page - __section_mem_map_addr(__nr_to_section(section_id));
+}
+#endif /* CONFIG_FLATMEM/DISCONTIGMME/SPARSEMEM */
+EXPORT_SYMBOL(pfn_to_page);
+EXPORT_SYMBOL(page_to_pfn);
+#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */
diff --git a/linux-2.6-xen-sparse/net/core/dev.c b/linux-2.6-xen-sparse/net/core/dev.c
index 3065f9242d..179412ad00 100644
--- a/linux-2.6-xen-sparse/net/core/dev.c
+++ b/linux-2.6-xen-sparse/net/core/dev.c
@@ -81,6 +81,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/mutex.h>
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/socket.h>
@@ -110,12 +111,11 @@
 #include <linux/netpoll.h>
 #include <linux/rcupdate.h>
 #include <linux/delay.h>
-#ifdef CONFIG_NET_RADIO
-#include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */
+#include <linux/wireless.h>
 #include <net/iw_handler.h>
-#endif	/* CONFIG_NET_RADIO */
 #include <asm/current.h>
 #include <linux/err.h>
+#include <linux/audit.h>
 
 #ifdef CONFIG_XEN
 #include <net/ip.h>
@@ -134,7 +134,7 @@
  *             sure which should go first, but I bet it won't make much
  *             difference if we are running VLANs.  The good news is that
  *             this protocol won't be in the list unless compiled in, so
- *             the average user (w/out VLANs) will not be adversly affected.
+ *             the average user (w/out VLANs) will not be adversely affected.
  *             --BLG
  *
  *		0800	IP
@@ -156,7 +156,7 @@ static struct list_head ptype_base[16];	/* 16 way hashed list */
 static struct list_head ptype_all;		/* Taps */
 
 /*
- * The @dev_base list is protected by @dev_base_lock and the rtln
+ * The @dev_base list is protected by @dev_base_lock and the rtnl
  * semaphore.
  *
  * Pure readers hold dev_base_lock for reading.
@@ -200,7 +200,7 @@ static inline struct hlist_head *dev_index_hash(int ifindex)
  *	Our notifier list
  */
 
-static struct notifier_block *netdev_chain;
+static RAW_NOTIFIER_HEAD(netdev_chain);
 
 /*
  *	Device drivers call our routines to queue packets here. We empty the
@@ -648,10 +648,12 @@ int dev_valid_name(const char *name)
  *	@name: name format string
  *
  *	Passed a format string - eg "lt%d" it will try and find a suitable
- *	id. Not efficient for many devices, not called a lot. The caller
- *	must hold the dev_base or rtnl lock while allocating the name and
- *	adding the device in order to avoid duplicates. Returns the number
- *	of the unit assigned or a negative errno code.
+ *	id. It scans list of devices to build up a free map, then chooses
+ *	the first empty slot. The caller must hold the dev_base or rtnl lock
+ *	while allocating the name and adding the device in order to avoid
+ *	duplicates.
+ *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
+ *	Returns the number of the unit assigned or a negative errno code.
  */
 
 int dev_alloc_name(struct net_device *dev, const char *name)
@@ -743,21 +745,22 @@ int dev_change_name(struct net_device *dev, char *newname)
 	if (!err) {
 		hlist_del(&dev->name_hlist);
 		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
-		notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+		raw_notifier_call_chain(&netdev_chain,
+				NETDEV_CHANGENAME, dev);
 	}
 
 	return err;
 }
 
 /**
- *	netdev_features_change - device changes fatures
+ *	netdev_features_change - device changes features
  *	@dev: device to cause notification
  *
  *	Called to indicate a device has changed features.
  */
 void netdev_features_change(struct net_device *dev)
 {
-	notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
+	raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
 }
 EXPORT_SYMBOL(netdev_features_change);
 
@@ -772,7 +775,8 @@ EXPORT_SYMBOL(netdev_features_change);
 void netdev_state_change(struct net_device *dev)
 {
 	if (dev->flags & IFF_UP) {
-		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+		raw_notifier_call_chain(&netdev_chain,
+				NETDEV_CHANGE, dev);
 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
 	}
 }
@@ -869,7 +873,7 @@ int dev_open(struct net_device *dev)
 		/*
 		 *	... and announce new interface.
 		 */
-		notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
+		raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
 	}
 	return ret;
 }
@@ -892,7 +896,7 @@ int dev_close(struct net_device *dev)
 	 *	Tell people we are going down, so that they can
 	 *	prepare to death, when device is still operating.
 	 */
-	notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
+	raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
 
 	dev_deactivate(dev);
 
@@ -929,7 +933,7 @@ int dev_close(struct net_device *dev)
 	/*
 	 * Tell people we are down
 	 */
-	notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
+	raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
 
 	return 0;
 }
@@ -960,7 +964,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
 	int err;
 
 	rtnl_lock();
-	err = notifier_chain_register(&netdev_chain, nb);
+	err = raw_notifier_chain_register(&netdev_chain, nb);
 	if (!err) {
 		for (dev = dev_base; dev; dev = dev->next) {
 			nb->notifier_call(nb, NETDEV_REGISTER, dev);
@@ -985,7 +989,12 @@ int register_netdevice_notifier(struct notifier_block *nb)
 
 int unregister_netdevice_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_unregister(&netdev_chain, nb);
+	int err;
+
+	rtnl_lock();
+	err = raw_notifier_chain_unregister(&netdev_chain, nb);
+	rtnl_unlock();
+	return err;
 }
 
 /**
@@ -994,12 +1003,12 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
  *      @v:   pointer passed unmodified to notifier function
  *
  *	Call all network notifier blocks.  Parameters and return value
- *	are as for notifier_call_chain().
+ *	are as for raw_notifier_call_chain().
  */
 
 int call_netdevice_notifiers(unsigned long val, void *v)
 {
-	return notifier_call_chain(&netdev_chain, val, v);
+	return raw_notifier_call_chain(&netdev_chain, val, v);
 }
 
 /* When > 0 there are consumers of rx skb time stamps */
@@ -1080,6 +1089,70 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 	rcu_read_unlock();
 }
 
+
+void __netif_schedule(struct net_device *dev)
+{
+	if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
+		unsigned long flags;
+		struct softnet_data *sd;
+
+		local_irq_save(flags);
+		sd = &__get_cpu_var(softnet_data);
+		dev->next_sched = sd->output_queue;
+		sd->output_queue = dev;
+		raise_softirq_irqoff(NET_TX_SOFTIRQ);
+		local_irq_restore(flags);
+	}
+}
+EXPORT_SYMBOL(__netif_schedule);
+
+void __netif_rx_schedule(struct net_device *dev)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	dev_hold(dev);
+	list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
+	if (dev->quota < 0)
+		dev->quota += dev->weight;
+	else
+		dev->quota = dev->weight;
+	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(__netif_rx_schedule);
+
+void dev_kfree_skb_any(struct sk_buff *skb)
+{
+	if (in_irq() || irqs_disabled())
+		dev_kfree_skb_irq(skb);
+	else
+		dev_kfree_skb(skb);
+}
+EXPORT_SYMBOL(dev_kfree_skb_any);
+
+
+/* Hot-plugging. */
+void netif_device_detach(struct net_device *dev)
+{
+	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
+	    netif_running(dev)) {
+		netif_stop_queue(dev);
+	}
+}
+EXPORT_SYMBOL(netif_device_detach);
+
+void netif_device_attach(struct net_device *dev)
+{
+	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
+	    netif_running(dev)) {
+		netif_wake_queue(dev);
+ 		__netdev_watchdog_up(dev);
+	}
+}
+EXPORT_SYMBOL(netif_device_attach);
+
+
 /*
  * Invalidate hardware checksum when packet is to be mangled, and
  * complete checksum manually on outgoing path.
@@ -1589,8 +1662,29 @@ static inline struct net_device *skb_bond(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
 
-	if (dev->master)
+	if (dev->master) {
+		/*
+		 * On bonding slaves other than the currently active
+		 * slave, suppress duplicates except for 802.3ad
+		 * ETH_P_SLOW and alb non-mcast/bcast.
+		 */
+		if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
+			if (dev->master->priv_flags & IFF_MASTER_ALB) {
+				if (skb->pkt_type != PACKET_BROADCAST &&
+				    skb->pkt_type != PACKET_MULTICAST)
+					goto keep;
+			}
+
+			if (dev->master->priv_flags & IFF_MASTER_8023AD &&
+			    skb->protocol == __constant_htons(ETH_P_SLOW))
+				goto keep;
+		
+			kfree_skb(skb);
+			return NULL;
+		}
+keep:
 		skb->dev = dev->master;
+	}
 
 	return dev;
 }
@@ -1734,6 +1828,9 @@ int netif_receive_skb(struct sk_buff *skb)
 
 	orig_dev = skb_bond(skb);
 
+	if (!orig_dev)
+		return NET_RX_DROP;
+
 	__get_cpu_var(netdev_rx_stat).total++;
 
 	skb->h.raw = skb->nh.raw = skb->data;
@@ -1891,8 +1988,7 @@ static void net_rx_action(struct softirq_action *h)
 		if (dev->quota <= 0 || dev->poll(dev, &budget)) {
 			netpoll_poll_unlock(have);
 			local_irq_disable();
-			list_del(&dev->poll_list);
-			list_add_tail(&dev->poll_list, &queue->poll_list);
+			list_move_tail(&dev->poll_list, &queue->poll_list);
 			if (dev->quota < 0)
 				dev->quota += dev->weight;
 			else
@@ -2182,7 +2278,7 @@ static struct file_operations softnet_seq_fops = {
 	.release = seq_release,
 };
 
-#ifdef WIRELESS_EXT
+#ifdef CONFIG_WIRELESS_EXT
 extern int wireless_proc_init(void);
 #else
 #define wireless_proc_init() 0
@@ -2256,7 +2352,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
  *	@dev: device
  *	@inc: modifier
  *
- *	Add or remove promsicuity from a device. While the count in the device
+ *	Add or remove promiscuity from a device. While the count in the device
  *	remains above zero the interface remains promiscuous. Once it hits zero
  *	the device reverts back to normal filtering operation. A negative inc
  *	value is used to drop promiscuity on the device.
@@ -2274,6 +2370,12 @@ void dev_set_promiscuity(struct net_device *dev, int inc)
 		printk(KERN_INFO "device %s %s promiscuous mode\n",
 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
 		       					       "left");
+		audit_log(current->audit_context, GFP_ATOMIC,
+			AUDIT_ANOM_PROMISCUOUS,
+			"dev=%s prom=%d old_prom=%d auid=%u",
+			dev->name, (dev->flags & IFF_PROMISC),
+			(old_flags & IFF_PROMISC),
+			audit_get_loginuid(current->audit_context)); 
 	}
 }
 
@@ -2306,12 +2408,20 @@ unsigned dev_get_flags(const struct net_device *dev)
 
 	flags = (dev->flags & ~(IFF_PROMISC |
 				IFF_ALLMULTI |
-				IFF_RUNNING)) | 
+				IFF_RUNNING |
+				IFF_LOWER_UP |
+				IFF_DORMANT)) |
 		(dev->gflags & (IFF_PROMISC |
 				IFF_ALLMULTI));
 
-	if (netif_running(dev) && netif_carrier_ok(dev))
-		flags |= IFF_RUNNING;
+	if (netif_running(dev)) {
+		if (netif_oper_up(dev))
+			flags |= IFF_RUNNING;
+		if (netif_carrier_ok(dev))
+			flags |= IFF_LOWER_UP;
+		if (netif_dormant(dev))
+			flags |= IFF_DORMANT;
+	}
 
 	return flags;
 }
@@ -2354,7 +2464,8 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
 	if (dev->flags & IFF_UP &&
 	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
 					  IFF_VOLATILE)))
-		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+		raw_notifier_call_chain(&netdev_chain,
+				NETDEV_CHANGE, dev);
 
 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
 		int inc = (flags & IFF_PROMISC) ? +1 : -1;
@@ -2398,8 +2509,8 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
 	else
 		dev->mtu = new_mtu;
 	if (!err && dev->flags & IFF_UP)
-		notifier_call_chain(&netdev_chain,
-				    NETDEV_CHANGEMTU, dev);
+		raw_notifier_call_chain(&netdev_chain,
+				NETDEV_CHANGEMTU, dev);
 	return err;
 }
 
@@ -2415,7 +2526,8 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
 		return -ENODEV;
 	err = dev->set_mac_address(dev, sa);
 	if (!err)
-		notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
+		raw_notifier_call_chain(&netdev_chain,
+				NETDEV_CHANGEADDR, dev);
 	return err;
 }
 
@@ -2471,7 +2583,7 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
 				return -EINVAL;
 			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
-			notifier_call_chain(&netdev_chain,
+			raw_notifier_call_chain(&netdev_chain,
 					    NETDEV_CHANGEADDR, dev);
 			return 0;
 
@@ -2590,9 +2702,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
 	 */
 
 	if (cmd == SIOCGIFCONF) {
-		rtnl_shlock();
+		rtnl_lock();
 		ret = dev_ifconf((char __user *) arg);
-		rtnl_shunlock();
+		rtnl_unlock();
 		return ret;
 	}
 	if (cmd == SIOCGIFNAME)
@@ -2736,13 +2848,14 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
 					ret = -EFAULT;
 				return ret;
 			}
-#ifdef WIRELESS_EXT
+#ifdef CONFIG_WIRELESS_EXT
 			/* Take care of Wireless Extensions */
 			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
 				/* If command is `set a parameter', or
 				 * `get the encoding parameters', check if
 				 * the user has the right to do it */
-				if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) {
+				if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE
+				    || cmd == SIOCGIWENCODEEXT) {
 					if (!capable(CAP_NET_ADMIN))
 						return -EPERM;
 				}
@@ -2757,7 +2870,7 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
 					ret = -EFAULT;
 				return ret;
 			}
-#endif	/* WIRELESS_EXT */
+#endif	/* CONFIG_WIRELESS_EXT */
 			return -EINVAL;
 	}
 }
@@ -2820,6 +2933,8 @@ int register_netdevice(struct net_device *dev)
 	BUG_ON(dev_boot_phase);
 	ASSERT_RTNL();
 
+	might_sleep();
+
 	/* When net_device's are persistent, this will be fatal. */
 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
 
@@ -2904,6 +3019,11 @@ int register_netdevice(struct net_device *dev)
 	if (!dev->rebuild_header)
 		dev->rebuild_header = default_rebuild_header;
 
+	ret = netdev_register_sysfs(dev);
+	if (ret)
+		goto out_err;
+	dev->reg_state = NETREG_REGISTERED;
+
 	/*
 	 *	Default initial state at registry is that the
 	 *	device is present.
@@ -2919,14 +3039,11 @@ int register_netdevice(struct net_device *dev)
 	hlist_add_head(&dev->name_hlist, head);
 	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
 	dev_hold(dev);
-	dev->reg_state = NETREG_REGISTERING;
 	write_unlock_bh(&dev_base_lock);
 
 	/* Notify protocols, that a new device appeared. */
-	notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
+	raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
 
-	/* Finish registration after unlock */
-	net_set_todo(dev);
 	ret = 0;
 
 out:
@@ -2999,10 +3116,10 @@ static void netdev_wait_allrefs(struct net_device *dev)
 	rebroadcast_time = warning_time = jiffies;
 	while (atomic_read(&dev->refcnt) != 0) {
 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
-			rtnl_shlock();
+			rtnl_lock();
 
 			/* Rebroadcast unregister notification */
-			notifier_call_chain(&netdev_chain,
+			raw_notifier_call_chain(&netdev_chain,
 					    NETDEV_UNREGISTER, dev);
 
 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
@@ -3016,7 +3133,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
 				linkwatch_run_queue();
 			}
 
-			rtnl_shunlock();
+			__rtnl_unlock();
 
 			rebroadcast_time = jiffies;
 		}
@@ -3049,20 +3166,18 @@ static void netdev_wait_allrefs(struct net_device *dev)
  *
  * We are invoked by rtnl_unlock() after it drops the semaphore.
  * This allows us to deal with problems:
- * 1) We can create/delete sysfs objects which invoke hotplug
+ * 1) We can delete sysfs objects which invoke hotplug
  *    without deadlocking with linkwatch via keventd.
  * 2) Since we run with the RTNL semaphore not held, we can sleep
  *    safely in order to wait for the netdev refcnt to drop to zero.
  */
-static DECLARE_MUTEX(net_todo_run_mutex);
+static DEFINE_MUTEX(net_todo_run_mutex);
 void netdev_run_todo(void)
 {
 	struct list_head list = LIST_HEAD_INIT(list);
-	int err;
-
 
 	/* Need to guard against multiple cpu's getting out of order. */
-	down(&net_todo_run_mutex);
+	mutex_lock(&net_todo_run_mutex);
 
 	/* Not safe to do outside the semaphore.  We must not return
 	 * until all unregister events invoked by the local processor
@@ -3082,44 +3197,33 @@ void netdev_run_todo(void)
 			= list_entry(list.next, struct net_device, todo_list);
 		list_del(&dev->todo_list);
 
-		switch(dev->reg_state) {
-		case NETREG_REGISTERING:
-			dev->reg_state = NETREG_REGISTERED;
-			err = netdev_register_sysfs(dev);
-			if (err)
-				printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
-				       dev->name, err);
-			break;
-
-		case NETREG_UNREGISTERING:
-			netdev_unregister_sysfs(dev);
-			dev->reg_state = NETREG_UNREGISTERED;
+		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
+			printk(KERN_ERR "network todo '%s' but state %d\n",
+			       dev->name, dev->reg_state);
+			dump_stack();
+			continue;
+		}
 
-			netdev_wait_allrefs(dev);
+		netdev_unregister_sysfs(dev);
+		dev->reg_state = NETREG_UNREGISTERED;
 
-			/* paranoia */
-			BUG_ON(atomic_read(&dev->refcnt));
-			BUG_TRAP(!dev->ip_ptr);
-			BUG_TRAP(!dev->ip6_ptr);
-			BUG_TRAP(!dev->dn_ptr);
+		netdev_wait_allrefs(dev);
 
+		/* paranoia */
+		BUG_ON(atomic_read(&dev->refcnt));
+		BUG_TRAP(!dev->ip_ptr);
+		BUG_TRAP(!dev->ip6_ptr);
+		BUG_TRAP(!dev->dn_ptr);
 
-			/* It must be the very last action, 
-			 * after this 'dev' may point to freed up memory.
-			 */
-			if (dev->destructor)
-				dev->destructor(dev);
-			break;
-
-		default:
-			printk(KERN_ERR "network todo '%s' but state %d\n",
-			       dev->name, dev->reg_state);
-			break;
-		}
+		/* It must be the very last action,
+		 * after this 'dev' may point to freed up memory.
+		 */
+		if (dev->destructor)
+			dev->destructor(dev);
 	}
 
 out:
-	up(&net_todo_run_mutex);
+	mutex_unlock(&net_todo_run_mutex);
 }
 
 /**
@@ -3142,12 +3246,11 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name,
 	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
 	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
 
-	p = kmalloc(alloc_size, GFP_KERNEL);
+	p = kzalloc(alloc_size, GFP_KERNEL);
 	if (!p) {
 		printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
 		return NULL;
 	}
-	memset(p, 0, alloc_size);
 
 	dev = (struct net_device *)
 		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
@@ -3173,7 +3276,7 @@ EXPORT_SYMBOL(alloc_netdev);
 void free_netdev(struct net_device *dev)
 {
 #ifdef CONFIG_SYSFS
-	/*  Compatiablity with error handling in drivers */
+	/*  Compatibility with error handling in drivers */
 	if (dev->reg_state == NETREG_UNINITIALIZED) {
 		kfree((char *)dev - dev->padded);
 		return;
@@ -3258,7 +3361,7 @@ int unregister_netdevice(struct net_device *dev)
 	/* Notify protocols, that we are about to destroy
 	   this device. They should clean all the things.
 	*/
-	notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
+	raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
 	
 	/*
 	 *	Flush the multicast chain
@@ -3389,7 +3492,7 @@ static int __init net_dev_init(void)
 	 *	Initialise the packet receive queues.
 	 */
 
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		struct softnet_data *queue;
 
 		queue = &per_cpu(softnet_data, i);
diff --git a/linux-2.6-xen-sparse/net/core/skbuff.c b/linux-2.6-xen-sparse/net/core/skbuff.c
index 5a524946b0..c38885ea9a 100644
--- a/linux-2.6-xen-sparse/net/core/skbuff.c
+++ b/linux-2.6-xen-sparse/net/core/skbuff.c
@@ -112,6 +112,14 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 	BUG();
 }
 
+void skb_truesize_bug(struct sk_buff *skb)
+{
+	printk(KERN_ERR "SKB BUG: Invalid truesize (%u) "
+	       "len=%u, sizeof(sk_buff)=%Zd\n",
+	       skb->truesize, skb->len, sizeof(struct sk_buff));
+}
+EXPORT_SYMBOL(skb_truesize_bug);
+
 /* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
  *	'private' fields and also do memory statistics to find all the
  *	[BEEP] leaks.
@@ -150,7 +158,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 
 	/* Get the DATA. Size must match skb_add_mtu(). */
 	size = SKB_DATA_ALIGN(size);
-	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+	data = ____kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
 	if (!data)
 		goto nodata;
 
@@ -261,11 +269,11 @@ nodata:
 }
 
 
-static void skb_drop_list(struct sk_buff **listp)
+static void skb_drop_fraglist(struct sk_buff *skb)
 {
-	struct sk_buff *list = *listp;
+	struct sk_buff *list = skb_shinfo(skb)->frag_list;
 
-	*listp = NULL;
+	skb_shinfo(skb)->frag_list = NULL;
 
 	do {
 		struct sk_buff *this = list;
@@ -274,11 +282,6 @@ static void skb_drop_list(struct sk_buff **listp)
 	} while (list);
 }
 
-static inline void skb_drop_fraglist(struct sk_buff *skb)
-{
-	skb_drop_list(&skb_shinfo(skb)->frag_list);
-}
-
 static void skb_clone_fraglist(struct sk_buff *skb)
 {
 	struct sk_buff *list;
@@ -380,6 +383,24 @@ void __kfree_skb(struct sk_buff *skb)
 }
 
 /**
+ *	kfree_skb - free an sk_buff
+ *	@skb: buffer to free
+ *
+ *	Drop a reference to the buffer and free it if the usage count has
+ *	hit zero.
+ */
+void kfree_skb(struct sk_buff *skb)
+{
+	if (unlikely(!skb))
+		return;
+	if (likely(atomic_read(&skb->users) == 1))
+		smp_rmb();
+	else if (likely(!atomic_dec_and_test(&skb->users)))
+		return;
+	__kfree_skb(skb);
+}
+
+/**
  *	skb_clone	-	duplicate an sk_buff
  *	@skb: buffer to clone
  *	@gfp_mask: allocation priority
@@ -609,7 +630,6 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
 	n->csum	     = skb->csum;
 	n->ip_summed = skb->ip_summed;
 
-	n->truesize += skb->data_len;
 	n->data_len  = skb->data_len;
 	n->len	     = skb->len;
 
@@ -804,86 +824,49 @@ struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
 	return nskb;
 }	
  
-/* Trims skb to length len. It can change skb pointers.
+/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
+ * If realloc==0 and trimming is impossible without change of data,
+ * it is BUG().
  */
 
-int ___pskb_trim(struct sk_buff *skb, unsigned int len)
+int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
 {
-	struct sk_buff **fragp;
-	struct sk_buff *frag;
 	int offset = skb_headlen(skb);
 	int nfrags = skb_shinfo(skb)->nr_frags;
 	int i;
-	int err;
 
-	if (skb_cloned(skb) &&
-	    unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))))
-		return err;
-
-	i = 0;
-	if (offset >= len)
-		goto drop_pages;
-
-	for (; i < nfrags; i++) {
+	for (i = 0; i < nfrags; i++) {
 		int end = offset + skb_shinfo(skb)->frags[i].size;
-
-		if (end < len) {
-			offset = end;
-			continue;
-		}
-
-		skb_shinfo(skb)->frags[i++].size = len - offset;
-
-drop_pages:
-		skb_shinfo(skb)->nr_frags = i;
-
-		for (; i < nfrags; i++)
-			put_page(skb_shinfo(skb)->frags[i].page);
-
-		if (skb_shinfo(skb)->frag_list)
-			skb_drop_fraglist(skb);
-		goto done;
-	}
-
-	for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp);
-	     fragp = &frag->next) {
-		int end = offset + frag->len;
-
-		if (skb_shared(frag)) {
-			struct sk_buff *nfrag;
-
-			nfrag = skb_clone(frag, GFP_ATOMIC);
-			if (unlikely(!nfrag))
-				return -ENOMEM;
-
-			nfrag->next = frag->next;
-			kfree_skb(frag);
-			frag = nfrag;
-			*fragp = frag;
-		}
-
-		if (end < len) {
-			offset = end;
-			continue;
+		if (end > len) {
+			if (skb_cloned(skb)) {
+				BUG_ON(!realloc);
+				if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+					return -ENOMEM;
+			}
+			if (len <= offset) {
+				put_page(skb_shinfo(skb)->frags[i].page);
+				skb_shinfo(skb)->nr_frags--;
+			} else {
+				skb_shinfo(skb)->frags[i].size = len - offset;
+			}
 		}
-
-		if (end > len &&
-		    unlikely((err = pskb_trim(frag, len - offset))))
-			return err;
-
-		if (frag->next)
-			skb_drop_list(&frag->next);
-		break;
+		offset = end;
 	}
 
-done:
-	if (len > skb_headlen(skb)) {
+	if (offset < len) {
 		skb->data_len -= skb->len - len;
 		skb->len       = len;
 	} else {
-		skb->len       = len;
-		skb->data_len  = 0;
-		skb->tail      = skb->data + len;
+		if (len <= skb_headlen(skb)) {
+			skb->len      = len;
+			skb->data_len = 0;
+			skb->tail     = skb->data + len;
+			if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
+				skb_drop_fraglist(skb);
+		} else {
+			skb->data_len -= skb->len - len;
+			skb->len       = len;
+		}
 	}
 
 	return 0;
@@ -1970,6 +1953,29 @@ err:
 
 EXPORT_SYMBOL_GPL(skb_segment);
 
+/**
+ *	skb_pull_rcsum - pull skb and update receive checksum
+ *	@skb: buffer to update
+ *	@start: start of data before pull
+ *	@len: length of data pulled
+ *
+ *	This function performs an skb_pull on the packet and updates
+ *	update the CHECKSUM_HW checksum.  It should be used on receive
+ *	path processing instead of skb_pull unless you know that the
+ *	checksum difference is zero (e.g., a valid IP header) or you
+ *	are setting ip_summed to CHECKSUM_NONE.
+ */
+unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
+{
+	BUG_ON(len > skb->len);
+	skb->len -= len;
+	BUG_ON(skb->len < skb->data_len);
+	skb_postpull_rcsum(skb, skb->data, len);
+	return skb->data += len;
+}
+
+EXPORT_SYMBOL_GPL(skb_pull_rcsum);
+
 void __init skb_init(void)
 {
 	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
@@ -1992,6 +1998,7 @@ void __init skb_init(void)
 
 EXPORT_SYMBOL(___pskb_trim);
 EXPORT_SYMBOL(__kfree_skb);
+EXPORT_SYMBOL(kfree_skb);
 EXPORT_SYMBOL(__pskb_pull_tail);
 EXPORT_SYMBOL(__alloc_skb);
 EXPORT_SYMBOL(pskb_copy);
author	Christian Limpach <Christian.Limpach@xensource.com>	2007-01-12 14:30:49 +0000
committer	Christian Limpach <Christian.Limpach@xensource.com>	2007-01-12 14:30:49 +0000
commit	432e7b62cb60b6f6f45e52b3db0268560d9b7cdb (patch)
tree	5933250c7c05b423abff5ab16f19b70832820dbb /linux-2.6-xen-sparse
parent	a25c73ef380731b0ab4347b854e81827cb6a6131 (diff)
download	xen-432e7b62cb60b6f6f45e52b3db0268560d9b7cdb.tar.gz xen-432e7b62cb60b6f6f45e52b3db0268560d9b7cdb.tar.bz2 xen-432e7b62cb60b6f6f45e52b3db0268560d9b7cdb.zip