lose latest digit of the sparse tree for 2.4

linux-2.4.30-xen-sparse is now linux-2.4-xen-sparse Signed-off-by: Vincent Hanquez <vincent@xensource.com> --HG-- rename : linux-2.4.30-xen-sparse/Makefile => linux-2.4-xen-sparse/Makefile rename : linux-2.4.30-xen-sparse/arch/xen/Makefile => linux-2.4-xen-sparse/arch/xen/Makefile rename : linux-2.4.30-xen-sparse/arch/xen/boot/Makefile => linux-2.4-xen-sparse/arch/xen/boot/Makefile rename : linux-2.4.30-xen-sparse/arch/xen/config.in => linux-2.4-xen-sparse/arch/xen/config.in rename : linux-2.4.30-xen-sparse/arch/xen/defconfig-xen0 => linux-2.4-xen-sparse/arch/xen/defconfig-xen0 rename : linux-2.4.30-xen-sparse/arch/xen/defconfig-xenU => linux-2.4-xen-sparse/arch/xen/defconfig-xenU rename : linux-2.4.30-xen-sparse/arch/xen/drivers/balloon/Makefile => linux-2.4-xen-sparse/arch/xen/drivers/balloon/Makefile rename : linux-2.4.30-xen-sparse/arch/xen/drivers/blkif/Makefile => linux-2.4-xen-sparse/arch/xen/drivers/blkif/Makefile rename : linux-2.4.30-xen-sparse/arch/xen/drivers/blkif/backend/Makefile => linux-2.4-xen-sparse/arch/xen/drivers/blkif/backend/Makefile rename : linux-2.4.30-xen-sparse/arch/xen/drivers/blkif/frontend/Makefile => linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/Makefile rename : linux-2.4.30-xen-sparse/arch/xen/drivers/blkif/frontend/common.h => linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/common.h rename : linux-2.4.30-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c => linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c rename : linux-2.4.30-xen-sparse/arch/xen/drivers/console/Makefile => linux-2.4-xen-sparse/arch/xen/drivers/console/Makefile rename : linux-2.4.30-xen-sparse/arch/xen/drivers/dom0/Makefile => linux-2.4-xen-sparse/arch/xen/drivers/dom0/Makefile rename : linux-2.4.30-xen-sparse/arch/xen/drivers/evtchn/Makefile => linux-2.4-xen-sparse/arch/xen/drivers/evtchn/Makefile rename : linux-2.4.30-xen-sparse/arch/xen/drivers/netif/Makefile => linux-2.4-xen-sparse/arch/xen/drivers/netif/Makefile rename : linux-2.4.30-xen-sparse/arch/xen/drivers/netif/backend/Makefile => linux-2.4-xen-sparse/arch/xen/drivers/netif/backend/Makefile rename : linux-2.4.30-xen-sparse/arch/xen/drivers/netif/frontend/Makefile => linux-2.4-xen-sparse/arch/xen/drivers/netif/frontend/Makefile rename : linux-2.4.30-xen-sparse/arch/xen/kernel/Makefile => linux-2.4-xen-sparse/arch/xen/kernel/Makefile rename : linux-2.4.30-xen-sparse/arch/xen/kernel/entry.S => linux-2.4-xen-sparse/arch/xen/kernel/entry.S rename : linux-2.4.30-xen-sparse/arch/xen/kernel/head.S => linux-2.4-xen-sparse/arch/xen/kernel/head.S rename : linux-2.4.30-xen-sparse/arch/xen/kernel/i386_ksyms.c => linux-2.4-xen-sparse/arch/xen/kernel/i386_ksyms.c rename : linux-2.4.30-xen-sparse/arch/xen/kernel/irq.c => linux-2.4-xen-sparse/arch/xen/kernel/irq.c rename : linux-2.4.30-xen-sparse/arch/xen/kernel/ldt.c => linux-2.4-xen-sparse/arch/xen/kernel/ldt.c rename : linux-2.4.30-xen-sparse/arch/xen/kernel/pci-pc.c => linux-2.4-xen-sparse/arch/xen/kernel/pci-pc.c rename : linux-2.4.30-xen-sparse/arch/xen/kernel/process.c => linux-2.4-xen-sparse/arch/xen/kernel/process.c rename : linux-2.4.30-xen-sparse/arch/xen/kernel/setup.c => linux-2.4-xen-sparse/arch/xen/kernel/setup.c rename : linux-2.4.30-xen-sparse/arch/xen/kernel/signal.c => linux-2.4-xen-sparse/arch/xen/kernel/signal.c rename : linux-2.4.30-xen-sparse/arch/xen/kernel/time.c => linux-2.4-xen-sparse/arch/xen/kernel/time.c rename : linux-2.4.30-xen-sparse/arch/xen/kernel/traps.c => linux-2.4-xen-sparse/arch/xen/kernel/traps.c rename : linux-2.4.30-xen-sparse/arch/xen/lib/Makefile => linux-2.4-xen-sparse/arch/xen/lib/Makefile rename : linux-2.4.30-xen-sparse/arch/xen/lib/delay.c => linux-2.4-xen-sparse/arch/xen/lib/delay.c rename : linux-2.4.30-xen-sparse/arch/xen/mm/Makefile => linux-2.4-xen-sparse/arch/xen/mm/Makefile rename : linux-2.4.30-xen-sparse/arch/xen/mm/fault.c => linux-2.4-xen-sparse/arch/xen/mm/fault.c rename : linux-2.4.30-xen-sparse/arch/xen/mm/init.c => linux-2.4-xen-sparse/arch/xen/mm/init.c rename : linux-2.4.30-xen-sparse/arch/xen/mm/ioremap.c => linux-2.4-xen-sparse/arch/xen/mm/ioremap.c rename : linux-2.4.30-xen-sparse/arch/xen/vmlinux.lds => linux-2.4-xen-sparse/arch/xen/vmlinux.lds rename : linux-2.4.30-xen-sparse/drivers/block/ll_rw_blk.c => linux-2.4-xen-sparse/drivers/block/ll_rw_blk.c rename : linux-2.4.30-xen-sparse/drivers/char/Makefile => linux-2.4-xen-sparse/drivers/char/Makefile rename : linux-2.4.30-xen-sparse/drivers/char/mem.c => linux-2.4-xen-sparse/drivers/char/mem.c rename : linux-2.4.30-xen-sparse/drivers/char/tty_io.c => linux-2.4-xen-sparse/drivers/char/tty_io.c rename : linux-2.4.30-xen-sparse/drivers/scsi/aic7xxx/Makefile => linux-2.4-xen-sparse/drivers/scsi/aic7xxx/Makefile rename : linux-2.4.30-xen-sparse/include/asm-xen/bugs.h => linux-2.4-xen-sparse/include/asm-xen/bugs.h rename : linux-2.4.30-xen-sparse/include/asm-xen/desc.h => linux-2.4-xen-sparse/include/asm-xen/desc.h rename : linux-2.4.30-xen-sparse/include/asm-xen/fixmap.h => linux-2.4-xen-sparse/include/asm-xen/fixmap.h rename : linux-2.4.30-xen-sparse/include/asm-xen/highmem.h => linux-2.4-xen-sparse/include/asm-xen/highmem.h rename : linux-2.4.30-xen-sparse/include/asm-xen/hw_irq.h => linux-2.4-xen-sparse/include/asm-xen/hw_irq.h rename : linux-2.4.30-xen-sparse/include/asm-xen/io.h => linux-2.4-xen-sparse/include/asm-xen/io.h rename : linux-2.4.30-xen-sparse/include/asm-xen/irq.h => linux-2.4-xen-sparse/include/asm-xen/irq.h rename : linux-2.4.30-xen-sparse/include/asm-xen/keyboard.h => linux-2.4-xen-sparse/include/asm-xen/keyboard.h rename : linux-2.4.30-xen-sparse/include/asm-xen/mmu_context.h => linux-2.4-xen-sparse/include/asm-xen/mmu_context.h rename : linux-2.4.30-xen-sparse/include/asm-xen/module.h => linux-2.4-xen-sparse/include/asm-xen/module.h rename : linux-2.4.30-xen-sparse/include/asm-xen/page.h => linux-2.4-xen-sparse/include/asm-xen/page.h rename : linux-2.4.30-xen-sparse/include/asm-xen/pci.h => linux-2.4-xen-sparse/include/asm-xen/pci.h rename : linux-2.4.30-xen-sparse/include/asm-xen/pgalloc.h => linux-2.4-xen-sparse/include/asm-xen/pgalloc.h rename : linux-2.4.30-xen-sparse/include/asm-xen/pgtable-2level.h => linux-2.4-xen-sparse/include/asm-xen/pgtable-2level.h rename : linux-2.4.30-xen-sparse/include/asm-xen/pgtable.h => linux-2.4-xen-sparse/include/asm-xen/pgtable.h rename : linux-2.4.30-xen-sparse/include/asm-xen/processor.h => linux-2.4-xen-sparse/include/asm-xen/processor.h rename : linux-2.4.30-xen-sparse/include/asm-xen/queues.h => linux-2.4-xen-sparse/include/asm-xen/queues.h rename : linux-2.4.30-xen-sparse/include/asm-xen/segment.h => linux-2.4-xen-sparse/include/asm-xen/segment.h rename : linux-2.4.30-xen-sparse/include/asm-xen/smp.h => linux-2.4-xen-sparse/include/asm-xen/smp.h rename : linux-2.4.30-xen-sparse/include/asm-xen/system.h => linux-2.4-xen-sparse/include/asm-xen/system.h rename : linux-2.4.30-xen-sparse/include/asm-xen/vga.h => linux-2.4-xen-sparse/include/asm-xen/vga.h rename : linux-2.4.30-xen-sparse/include/asm-xen/xor.h => linux-2.4-xen-sparse/include/asm-xen/xor.h rename : linux-2.4.30-xen-sparse/include/linux/blk.h => linux-2.4-xen-sparse/include/linux/blk.h rename : linux-2.4.30-xen-sparse/include/linux/highmem.h => linux-2.4-xen-sparse/include/linux/highmem.h rename : linux-2.4.30-xen-sparse/include/linux/irq.h => linux-2.4-xen-sparse/include/linux/irq.h rename : linux-2.4.30-xen-sparse/include/linux/mm.h => linux-2.4-xen-sparse/include/linux/mm.h rename : linux-2.4.30-xen-sparse/include/linux/sched.h => linux-2.4-xen-sparse/include/linux/sched.h rename : linux-2.4.30-xen-sparse/include/linux/skbuff.h => linux-2.4-xen-sparse/include/linux/skbuff.h rename : linux-2.4.30-xen-sparse/include/linux/timer.h => linux-2.4-xen-sparse/include/linux/timer.h rename : linux-2.4.30-xen-sparse/kernel/time.c => linux-2.4-xen-sparse/kernel/time.c rename : linux-2.4.30-xen-sparse/kernel/timer.c => linux-2.4-xen-sparse/kernel/timer.c rename : linux-2.4.30-xen-sparse/mkbuildtree => linux-2.4-xen-sparse/mkbuildtree rename : linux-2.4.30-xen-sparse/mm/highmem.c => linux-2.4-xen-sparse/mm/highmem.c rename : linux-2.4.30-xen-sparse/mm/memory.c => linux-2.4-xen-sparse/mm/memory.c rename : linux-2.4.30-xen-sparse/mm/mprotect.c => linux-2.4-xen-sparse/mm/mprotect.c rename : linux-2.4.30-xen-sparse/mm/mremap.c => linux-2.4-xen-sparse/mm/mremap.c rename : linux-2.4.30-xen-sparse/mm/page_alloc.c => linux-2.4-xen-sparse/mm/page_alloc.c rename : linux-2.4.30-xen-sparse/net/core/skbuff.c => linux-2.4-xen-sparse/net/core/skbuff.c
author: vh249@kneesaa.uk.xensource.com <vh249@kneesaa.uk.xensource.com> 2005-07-11 09:29:56 -0500
committer: vh249@kneesaa.uk.xensource.com <vh249@kneesaa.uk.xensource.com> 2005-07-11 09:29:56 -0500
commit: 105077619922d8c782b74491afd1b406dc654fa7 (patch)
tree: 88e84b9d5206ea2039b20c3eeb3730727cf64d2c /linux-2.4-xen-sparse
parent: a81ff1692529ed28d67c4fbc78ffe54f9ecf2278 (diff)
download: xen-105077619922d8c782b74491afd1b406dc654fa7.tar.gz
xen-105077619922d8c782b74491afd1b406dc654fa7.tar.bz2
xen-105077619922d8c782b74491afd1b406dc654fa7.zip
80 files changed, 31293 insertions, 0 deletions
diff --git a/linux-2.4-xen-sparse/Makefile b/linux-2.4-xen-sparse/Makefile
new file mode 100644
index 0000000000..503ef53773
--- /dev/null
+++ b/linux-2.4-xen-sparse/Makefile
@@ -0,0 +1,592 @@
+VERSION = 2
+PATCHLEVEL = 4
+SUBLEVEL = 30
+EXTRAVERSION =
+
+KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
+
+# SUBARCH always tells us the underlying machine architecture.
+# Unless overridden, by default ARCH is equivalent to SUBARCH.
+# This will be overriden for Xen and UML builds.
+SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/)
+ARCH ?= $(SUBARCH)
+
+## XXX The following hack can be discarded after users have adjusted to the
+## architectural name change 'xeno' -> 'xen'.
+ifeq ($(ARCH),xeno)
+  ARCH := xen
+endif
+
+KERNELPATH=kernel-$(shell echo $(KERNELRELEASE) | sed -e "s/-//g")
+
+CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \
+	  else if [ -x /bin/bash ]; then echo /bin/bash; \
+	  else echo sh; fi ; fi)
+TOPDIR	:= $(shell /bin/pwd)
+
+HPATH   	= $(TOPDIR)/include
+FINDHPATH	= $(HPATH)/asm $(HPATH)/linux $(HPATH)/scsi $(HPATH)/net $(HPATH)/math-emu
+
+HOSTCC  	= gcc
+HOSTCFLAGS	= -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer
+
+CROSS_COMPILE 	=
+
+#
+# Include the make variables (CC, etc...)
+#
+
+AS		= $(CROSS_COMPILE)as
+LD		= $(CROSS_COMPILE)ld
+CC		= $(CROSS_COMPILE)gcc
+CPP		= $(CC) -E
+AR		= $(CROSS_COMPILE)ar
+NM		= $(CROSS_COMPILE)nm
+STRIP		= $(CROSS_COMPILE)strip
+OBJCOPY		= $(CROSS_COMPILE)objcopy
+OBJDUMP		= $(CROSS_COMPILE)objdump
+MAKEFILES	= $(TOPDIR)/.config
+GENKSYMS	= /sbin/genksyms
+DEPMOD		= /sbin/depmod
+MODFLAGS	= -DMODULE
+CFLAGS_KERNEL	=
+PERL		= perl
+AWK		= awk
+RPM 		:= $(shell if [ -x "/usr/bin/rpmbuild" ]; then echo rpmbuild; \
+		    	else echo rpm; fi)
+
+export	VERSION PATCHLEVEL SUBLEVEL EXTRAVERSION KERNELRELEASE ARCH \
+	CONFIG_SHELL TOPDIR HPATH HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC \
+	CPP AR NM STRIP OBJCOPY OBJDUMP MAKE MAKEFILES GENKSYMS MODFLAGS PERL AWK
+
+all:	do-it-all
+
+#
+# Make "config" the default target if there is no configuration file or
+# "depend" the target if there is no top-level dependency information.
+#
+
+ifeq (.config,$(wildcard .config))
+include .config
+ifeq (.depend,$(wildcard .depend))
+include .depend
+do-it-all:	Version vmlinux
+else
+CONFIGURATION = depend
+do-it-all:	depend
+endif
+else
+CONFIGURATION = config
+do-it-all:	config
+endif
+
+#
+# INSTALL_PATH specifies where to place the updated kernel and system map
+# images.  Uncomment if you want to place them anywhere other than root.
+#
+
+#export	INSTALL_PATH=/boot
+
+#
+# INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory
+# relocations required by build roots.  This is not defined in the
+# makefile but the arguement can be passed to make if needed.
+#
+
+MODLIB = $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE)
+export MODLIB
+
+#
+# standard CFLAGS
+#
+
+CPPFLAGS := -D__KERNEL__ -I$(HPATH)
+
+CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \
+	  -fno-strict-aliasing -fno-common
+ifndef CONFIG_FRAME_POINTER
+CFLAGS += -fomit-frame-pointer
+endif
+AFLAGS := -D__ASSEMBLY__ $(CPPFLAGS)
+
+#
+# ROOT_DEV specifies the default root-device when making the image.
+# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case
+# the default of FLOPPY is used by 'build'.
+# This is i386 specific.
+#
+
+export ROOT_DEV = CURRENT
+
+#
+# If you want to preset the SVGA mode, uncomment the next line and
+# set SVGA_MODE to whatever number you want.
+# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.
+# The number is the same as you would ordinarily press at bootup.
+# This is i386 specific.
+#
+
+export SVGA_MODE = -DSVGA_MODE=NORMAL_VGA
+
+#
+# If you want the RAM disk device, define this to be the size in blocks.
+# This is i386 specific.
+#
+
+#export RAMDISK = -DRAMDISK=512
+
+CORE_FILES	=kernel/kernel.o mm/mm.o fs/fs.o ipc/ipc.o
+NETWORKS	=net/network.o
+
+LIBS		=$(TOPDIR)/lib/lib.a
+SUBDIRS		=kernel drivers mm fs net ipc lib crypto
+
+DRIVERS-n :=
+DRIVERS-y :=
+DRIVERS-m :=
+DRIVERS-  :=
+
+DRIVERS-$(CONFIG_ACPI_BOOT) += drivers/acpi/acpi.o
+DRIVERS-$(CONFIG_PARPORT) += drivers/parport/driver.o
+DRIVERS-y += drivers/char/char.o \
+	drivers/block/block.o \
+	drivers/misc/misc.o \
+	drivers/net/net.o
+DRIVERS-$(CONFIG_AGP) += drivers/char/agp/agp.o
+DRIVERS-$(CONFIG_DRM_NEW) += drivers/char/drm/drm.o
+DRIVERS-$(CONFIG_DRM_OLD) += drivers/char/drm-4.0/drm.o
+DRIVERS-$(CONFIG_NUBUS) += drivers/nubus/nubus.a
+DRIVERS-$(CONFIG_NET_FC) += drivers/net/fc/fc.o
+DRIVERS-$(CONFIG_DEV_APPLETALK) += drivers/net/appletalk/appletalk.o
+DRIVERS-$(CONFIG_TR) += drivers/net/tokenring/tr.o
+DRIVERS-$(CONFIG_WAN) += drivers/net/wan/wan.o
+DRIVERS-$(CONFIG_ARCNET) += drivers/net/arcnet/arcnetdrv.o
+DRIVERS-$(CONFIG_ATM) += drivers/atm/atm.o
+DRIVERS-$(CONFIG_IDE) += drivers/ide/idedriver.o
+DRIVERS-$(CONFIG_FC4) += drivers/fc4/fc4.a
+DRIVERS-$(CONFIG_SCSI) += drivers/scsi/scsidrv.o
+DRIVERS-$(CONFIG_FUSION_BOOT) += drivers/message/fusion/fusion.o
+DRIVERS-$(CONFIG_IEEE1394) += drivers/ieee1394/ieee1394drv.o
+
+ifneq ($(CONFIG_CD_NO_IDESCSI)$(CONFIG_BLK_DEV_IDECD)$(CONFIG_BLK_DEV_SR)$(CONFIG_PARIDE_PCD),)
+DRIVERS-y += drivers/cdrom/driver.o
+endif
+
+DRIVERS-$(CONFIG_SOUND) += drivers/sound/sounddrivers.o
+DRIVERS-$(CONFIG_PCI) += drivers/pci/driver.o
+DRIVERS-$(CONFIG_MTD) += drivers/mtd/mtdlink.o
+DRIVERS-$(CONFIG_PCMCIA) += drivers/pcmcia/pcmcia.o
+DRIVERS-$(CONFIG_NET_PCMCIA) += drivers/net/pcmcia/pcmcia_net.o
+DRIVERS-$(CONFIG_NET_WIRELESS) += drivers/net/wireless/wireless_net.o
+DRIVERS-$(CONFIG_PCMCIA_CHRDEV) += drivers/char/pcmcia/pcmcia_char.o
+DRIVERS-$(CONFIG_DIO) += drivers/dio/dio.a
+DRIVERS-$(CONFIG_SBUS) += drivers/sbus/sbus_all.o
+DRIVERS-$(CONFIG_ZORRO) += drivers/zorro/driver.o
+DRIVERS-$(CONFIG_FC4) += drivers/fc4/fc4.a
+DRIVERS-$(CONFIG_PPC32) += drivers/macintosh/macintosh.o
+DRIVERS-$(CONFIG_MAC) += drivers/macintosh/macintosh.o
+DRIVERS-$(CONFIG_ISAPNP) += drivers/pnp/pnp.o
+DRIVERS-$(CONFIG_I2C) += drivers/i2c/i2c.o
+DRIVERS-$(CONFIG_VT) += drivers/video/video.o
+DRIVERS-$(CONFIG_PARIDE) += drivers/block/paride/paride.a
+DRIVERS-$(CONFIG_HAMRADIO) += drivers/net/hamradio/hamradio.o
+DRIVERS-$(CONFIG_TC) += drivers/tc/tc.a
+DRIVERS-$(CONFIG_USB) += drivers/usb/usbdrv.o
+DRIVERS-$(CONFIG_USB_GADGET) += drivers/usb/gadget/built-in.o
+DRIVERS-y +=drivers/media/media.o
+DRIVERS-$(CONFIG_INPUT) += drivers/input/inputdrv.o
+DRIVERS-$(CONFIG_HIL) += drivers/hil/hil.o
+DRIVERS-$(CONFIG_I2O) += drivers/message/i2o/i2o.o
+DRIVERS-$(CONFIG_IRDA) += drivers/net/irda/irda.o
+DRIVERS-$(CONFIG_PHONE) += drivers/telephony/telephony.o
+DRIVERS-$(CONFIG_MD) += drivers/md/mddev.o
+DRIVERS-$(CONFIG_GSC) += drivers/gsc/gscbus.o
+DRIVERS-$(CONFIG_BLUEZ) += drivers/bluetooth/bluetooth.o
+DRIVERS-$(CONFIG_HOTPLUG_PCI) += drivers/hotplug/vmlinux-obj.o
+DRIVERS-$(CONFIG_ISDN_BOOL) += drivers/isdn/vmlinux-obj.o
+DRIVERS-$(CONFIG_CRYPTO) += crypto/crypto.o
+
+DRIVERS := $(DRIVERS-y)
+
+
+# files removed with 'make clean'
+CLEAN_FILES = \
+	kernel/ksyms.lst include/linux/compile.h \
+	vmlinux System.map \
+	.tmp* \
+	drivers/char/consolemap_deftbl.c drivers/video/promcon_tbl.c \
+	drivers/char/conmakehash \
+	drivers/char/drm/*-mod.c \
+	drivers/pci/devlist.h drivers/pci/classlist.h drivers/pci/gen-devlist \
+	drivers/zorro/devlist.h drivers/zorro/gen-devlist \
+	drivers/sound/bin2hex drivers/sound/hex2hex \
+	drivers/atm/fore200e_mkfirm drivers/atm/{pca,sba}*{.bin,.bin1,.bin2} \
+	drivers/scsi/aic7xxx/aicasm/aicasm \
+	drivers/scsi/aic7xxx/aicasm/aicasm_gram.c \
+	drivers/scsi/aic7xxx/aicasm/aicasm_gram.h \
+	drivers/scsi/aic7xxx/aicasm/aicasm_macro_gram.c \
+	drivers/scsi/aic7xxx/aicasm/aicasm_macro_gram.h \
+	drivers/scsi/aic7xxx/aicasm/aicasm_macro_scan.c \
+	drivers/scsi/aic7xxx/aicasm/aicasm_scan.c \
+	drivers/scsi/aic7xxx/aicasm/aicdb.h \
+	drivers/scsi/aic7xxx/aicasm/y.tab.h \
+	drivers/scsi/53c700_d.h \
+	drivers/tc/lk201-map.c \
+	net/khttpd/make_times_h \
+	net/khttpd/times.h \
+	submenu* \
+	drivers/ieee1394/oui.c
+# directories removed with 'make clean'
+CLEAN_DIRS = \
+	modules
+
+# files removed with 'make mrproper'
+MRPROPER_FILES = \
+	include/linux/autoconf.h include/linux/version.h \
+	lib/crc32table.h lib/gen_crc32table \
+	drivers/net/hamradio/soundmodem/sm_tbl_{afsk1200,afsk2666,fsk9600}.h \
+	drivers/net/hamradio/soundmodem/sm_tbl_{hapn4800,psk4800}.h \
+	drivers/net/hamradio/soundmodem/sm_tbl_{afsk2400_7,afsk2400_8}.h \
+	drivers/net/hamradio/soundmodem/gentbl \
+	drivers/sound/*_boot.h drivers/sound/.*.boot \
+	drivers/sound/msndinit.c \
+	drivers/sound/msndperm.c \
+	drivers/sound/pndsperm.c \
+	drivers/sound/pndspini.c \
+	drivers/atm/fore200e_*_fw.c drivers/atm/.fore200e_*.fw \
+	.version .config* config.in config.old \
+	scripts/tkparse scripts/kconfig.tk scripts/kconfig.tmp \
+	scripts/lxdialog/*.o scripts/lxdialog/lxdialog \
+	.menuconfig.log \
+	include/asm \
+	.hdepend scripts/mkdep scripts/split-include scripts/docproc \
+	$(TOPDIR)/include/linux/modversions.h \
+	kernel.spec
+
+# directories removed with 'make mrproper'
+MRPROPER_DIRS = \
+	include/config \
+	$(TOPDIR)/include/linux/modules
+
+
+include arch/$(ARCH)/Makefile
+
+# Extra cflags for kbuild 2.4.  The default is to forbid includes by kernel code
+# from user space headers.  Some UML code requires user space headers, in the
+# UML Makefiles add 'kbuild_2_4_nostdinc :=' before include Rules.make.  No
+# other kernel code should include user space headers, if you need
+# 'kbuild_2_4_nostdinc :=' or -I/usr/include for kernel code and you are not UML
+# then your code is broken!  KAO.
+
+kbuild_2_4_nostdinc	:= -nostdinc -iwithprefix include
+export kbuild_2_4_nostdinc
+
+export	CPPFLAGS CFLAGS CFLAGS_KERNEL AFLAGS AFLAGS_KERNEL
+
+export	NETWORKS DRIVERS LIBS HEAD LDFLAGS LINKFLAGS MAKEBOOT ASFLAGS
+
+.S.s:
+	$(CPP) $(AFLAGS) $(AFLAGS_KERNEL) -traditional -o $*.s $<
+.S.o:
+	$(CC) $(AFLAGS) $(AFLAGS_KERNEL) -traditional -c -o $*.o $<
+
+Version: dummy
+	@rm -f include/linux/compile.h
+
+boot: vmlinux
+	@$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C arch/$(ARCH)/boot
+
+vmlinux: include/linux/version.h $(CONFIGURATION) init/main.o init/version.o init/do_mounts.o linuxsubdirs
+	$(LD) $(LINKFLAGS) $(HEAD) init/main.o init/version.o init/do_mounts.o \
+		--start-group \
+		$(CORE_FILES) \
+		$(DRIVERS) \
+		$(NETWORKS) \
+		$(LIBS) \
+		--end-group \
+		-o vmlinux
+	$(NM) vmlinux | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > System.map
+
+symlinks:
+	rm -f include/asm
+	( cd include ; ln -sf asm-$(ARCH) asm)
+	@if [ ! -d include/linux/modules ]; then \
+		mkdir include/linux/modules; \
+	fi
+
+oldconfig: symlinks
+	$(CONFIG_SHELL) scripts/Configure -d arch/$(ARCH)/config.in
+
+xconfig: symlinks
+	$(MAKE) -C scripts kconfig.tk
+	wish -f scripts/kconfig.tk
+
+menuconfig: include/linux/version.h symlinks
+	$(MAKE) -C scripts/lxdialog all
+	$(CONFIG_SHELL) scripts/Menuconfig arch/$(ARCH)/config.in
+
+config: symlinks
+	$(CONFIG_SHELL) scripts/Configure arch/$(ARCH)/config.in
+
+include/config/MARKER: scripts/split-include include/linux/autoconf.h
+	scripts/split-include include/linux/autoconf.h include/config
+	@ touch include/config/MARKER
+
+linuxsubdirs: $(patsubst %, _dir_%, $(SUBDIRS))
+
+$(patsubst %, _dir_%, $(SUBDIRS)) : dummy include/linux/version.h include/config/MARKER
+	$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C $(patsubst _dir_%, %, $@)
+
+$(TOPDIR)/include/linux/version.h: include/linux/version.h
+$(TOPDIR)/include/linux/compile.h: include/linux/compile.h
+
+newversion:
+	. scripts/mkversion > .tmpversion
+	@mv -f .tmpversion .version
+
+uts_len		:= 64
+uts_truncate	:= sed -e 's/\(.\{1,$(uts_len)\}\).*/\1/'
+
+include/linux/compile.h: $(CONFIGURATION) include/linux/version.h newversion
+	@echo -n \#`cat .version` > .ver1
+	@if [ -n "$(CONFIG_SMP)" ] ; then echo -n " SMP" >> .ver1; fi
+	@if [ -f .name ]; then  echo -n \-`cat .name` >> .ver1; fi
+	@LANG=C echo ' '`date` >> .ver1
+	@echo \#define UTS_VERSION \"`cat .ver1 | $(uts_truncate)`\" > .ver
+	@LANG=C echo \#define LINUX_COMPILE_TIME \"`date +%T`\" >> .ver
+	@echo \#define LINUX_COMPILE_BY \"`whoami`\" >> .ver
+	@echo \#define LINUX_COMPILE_HOST \"`hostname | $(uts_truncate)`\" >> .ver
+	@([ -x /bin/dnsdomainname ] && /bin/dnsdomainname > .ver1) || \
+	 ([ -x /bin/domainname ] && /bin/domainname > .ver1) || \
+	 echo > .ver1
+	@echo \#define LINUX_COMPILE_DOMAIN \"`cat .ver1 | $(uts_truncate)`\" >> .ver
+	@echo \#define LINUX_COMPILER \"`$(CC) $(CFLAGS) -v 2>&1 | tail -n 1`\" >> .ver
+	@mv -f .ver $@
+	@rm -f .ver1
+
+include/linux/version.h: ./Makefile
+	@expr length "$(KERNELRELEASE)" \<= $(uts_len) > /dev/null || \
+	  (echo KERNELRELEASE \"$(KERNELRELEASE)\" exceeds $(uts_len) characters >&2; false)
+	@echo \#define UTS_RELEASE \"$(KERNELRELEASE)\" > .ver
+	@echo \#define LINUX_VERSION_CODE `expr $(VERSION) \\* 65536 + $(PATCHLEVEL) \\* 256 + $(SUBLEVEL)` >> .ver
+	@echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))' >>.ver
+	@mv -f .ver $@
+
+comma	:= ,
+
+init/version.o: init/version.c include/linux/compile.h include/config/MARKER
+	$(CC) $(CFLAGS) $(CFLAGS_KERNEL) -DUTS_MACHINE='"$(SUBARCH)"' -DKBUILD_BASENAME=$(subst $(comma),_,$(subst -,_,$(*F))) -c -o init/version.o init/version.c
+
+init/main.o: init/main.c include/config/MARKER
+	$(CC) $(CFLAGS) $(CFLAGS_KERNEL) $(PROFILING) -DKBUILD_BASENAME=$(subst $(comma),_,$(subst -,_,$(*F))) -c -o $@ $<
+
+init/do_mounts.o: init/do_mounts.c include/config/MARKER
+	$(CC) $(CFLAGS) $(CFLAGS_KERNEL) $(PROFILING) -DKBUILD_BASENAME=$(subst $(comma),_,$(subst -,_,$(*F))) -c -o $@ $<
+
+fs lib mm ipc kernel drivers net: dummy
+	$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" $(subst $@, _dir_$@, $@)
+
+TAGS: dummy
+	{ find include/asm-${ARCH} -name '*.h' -print ; \
+	find include -type d \( -name "asm-*" -o -name config \) -prune -o -name '*.h' -print ; \
+	find $(SUBDIRS) init arch/${ARCH} -name '*.[chS]' ; } | grep -v SCCS | grep -v '\.svn' | etags -
+
+# Exuberant ctags works better with -I
+tags: dummy
+	CTAGSF=`ctags --version | grep -i exuberant >/dev/null && echo "-I __initdata,__exitdata,EXPORT_SYMBOL,EXPORT_SYMBOL_NOVERS"`; \
+	ctags $$CTAGSF `find include/asm-$(ARCH) -name '*.h'` && \
+	find include -type d \( -name "asm-*" -o -name config \) -prune -o -name '*.h' -print | xargs ctags $$CTAGSF -a && \
+	find $(SUBDIRS) init -name '*.[ch]' | xargs ctags $$CTAGSF -a
+
+ifdef CONFIG_MODULES
+ifdef CONFIG_MODVERSIONS
+MODFLAGS += -DMODVERSIONS -include $(HPATH)/linux/modversions.h
+endif
+
+.PHONY: modules
+modules: $(patsubst %, _mod_%, $(SUBDIRS))
+
+.PHONY: $(patsubst %, _mod_%, $(SUBDIRS))
+$(patsubst %, _mod_%, $(SUBDIRS)) : include/linux/version.h include/config/MARKER
+	$(MAKE) -C $(patsubst _mod_%, %, $@) CFLAGS="$(CFLAGS) $(MODFLAGS)" MAKING_MODULES=1 modules
+
+.PHONY: modules_install
+modules_install: _modinst_ $(patsubst %, _modinst_%, $(SUBDIRS)) _modinst_post
+
+.PHONY: _modinst_
+_modinst_:
+	@rm -rf $(MODLIB)/kernel
+	@rm -f $(MODLIB)/build
+	@mkdir -p $(MODLIB)/kernel
+	@ln -s $(TOPDIR) $(MODLIB)/build
+
+# If System.map exists, run depmod.  This deliberately does not have a
+# dependency on System.map since that would run the dependency tree on
+# vmlinux.  This depmod is only for convenience to give the initial
+# boot a modules.dep even before / is mounted read-write.  However the
+# boot script depmod is the master version.
+ifeq "$(strip $(INSTALL_MOD_PATH))" ""
+depmod_opts	:=
+else
+depmod_opts	:= -b $(INSTALL_MOD_PATH) -r
+endif
+.PHONY: _modinst_post
+_modinst_post: _modinst_post_pcmcia
+	if [ -r System.map ]; then $(DEPMOD) -ae -F System.map $(depmod_opts) $(KERNELRELEASE); fi
+
+# Backwards compatibilty symlinks for people still using old versions
+# of pcmcia-cs with hard coded pathnames on insmod.  Remove
+# _modinst_post_pcmcia for kernel 2.4.1.
+.PHONY: _modinst_post_pcmcia
+_modinst_post_pcmcia:
+	cd $(MODLIB); \
+	mkdir -p pcmcia; \
+	find kernel -path '*/pcmcia/*' -name '*.o' | xargs -i -r ln -sf ../{} pcmcia
+
+.PHONY: $(patsubst %, _modinst_%, $(SUBDIRS))
+$(patsubst %, _modinst_%, $(SUBDIRS)) :
+	$(MAKE) -C $(patsubst _modinst_%, %, $@) modules_install
+
+# modules disabled....
+
+else
+modules modules_install: dummy
+	@echo
+	@echo "The present kernel configuration has modules disabled."
+	@echo "Type 'make config' and enable loadable module support."
+	@echo "Then build a kernel with module support enabled."
+	@echo
+	@exit 1
+endif
+
+clean:	archclean
+	find . \( -name '*.[oas]' -o -name core -o -name '.*.flags' \) -type f -print \
+		| grep -v lxdialog/ | xargs rm -f
+	rm -f $(CLEAN_FILES)
+	rm -rf $(CLEAN_DIRS)
+	$(MAKE) -C Documentation/DocBook clean
+
+mrproper: clean archmrproper
+	find . \( -size 0 -o -name .depend \) -type f -print | xargs rm -f
+	rm -f $(MRPROPER_FILES)
+	rm -rf $(MRPROPER_DIRS)
+	$(MAKE) -C Documentation/DocBook mrproper
+
+distclean: mrproper
+	rm -f core `find . \( -not -type d \) -and \
+		\( -name '*.orig' -o -name '*.rej' -o -name '*~' \
+		-o -name '*.bak' -o -name '#*#' -o -name '.*.orig' \
+		-o -name '.*.rej' -o -name '.SUMS' -o -size 0 \) -type f -print` TAGS tags
+
+backup: mrproper
+	cd .. && tar cf - linux/ | gzip -9 > backup.gz
+	sync
+
+sgmldocs: 
+	chmod 755 $(TOPDIR)/scripts/docgen
+	chmod 755 $(TOPDIR)/scripts/gen-all-syms
+	chmod 755 $(TOPDIR)/scripts/kernel-doc
+	$(MAKE) -C $(TOPDIR)/Documentation/DocBook books
+
+psdocs: sgmldocs
+	$(MAKE) -C Documentation/DocBook ps
+
+pdfdocs: sgmldocs
+	$(MAKE) -C Documentation/DocBook pdf
+
+htmldocs: sgmldocs
+	$(MAKE) -C Documentation/DocBook html
+
+mandocs:
+	chmod 755 $(TOPDIR)/scripts/kernel-doc
+	chmod 755 $(TOPDIR)/scripts/split-man
+	$(MAKE) -C Documentation/DocBook man
+
+sums:
+	find . -type f -print | sort | xargs sum > .SUMS
+
+dep-files: scripts/mkdep archdep include/linux/version.h
+	rm -f .depend .hdepend
+	$(MAKE) $(patsubst %,_sfdep_%,$(SUBDIRS)) _FASTDEP_ALL_SUB_DIRS="$(SUBDIRS)"
+ifdef CONFIG_MODVERSIONS
+	$(MAKE) update-modverfile
+endif
+	scripts/mkdep -- `find $(FINDHPATH) \( -name SCCS -o -name .svn \) -prune -o -follow -name \*.h ! -name modversions.h -print` > .hdepend
+	scripts/mkdep -- init/*.c > .depend
+
+ifdef CONFIG_MODVERSIONS
+MODVERFILE := $(TOPDIR)/include/linux/modversions.h
+else
+MODVERFILE :=
+endif
+export	MODVERFILE
+
+depend dep: dep-files
+
+checkconfig:
+	find * -name '*.[hcS]' -type f -print | sort | xargs $(PERL) -w scripts/checkconfig.pl
+
+checkhelp:
+	find * -name [cC]onfig.in -print | sort | xargs $(PERL) -w scripts/checkhelp.pl
+
+checkincludes:
+	find * -name '*.[hcS]' -type f -print | sort | xargs $(PERL) -w scripts/checkincludes.pl
+
+ifdef CONFIGURATION
+..$(CONFIGURATION):
+	@echo
+	@echo "You have a bad or nonexistent" .$(CONFIGURATION) ": running 'make" $(CONFIGURATION)"'"
+	@echo
+	$(MAKE) $(CONFIGURATION)
+	@echo
+	@echo "Successful. Try re-making (ignore the error that follows)"
+	@echo
+	exit 1
+
+#dummy: ..$(CONFIGURATION)
+dummy:
+
+else
+
+dummy:
+
+endif
+
+include Rules.make
+
+#
+# This generates dependencies for the .h files.
+#
+
+scripts/mkdep: scripts/mkdep.c
+	$(HOSTCC) $(HOSTCFLAGS) -o scripts/mkdep scripts/mkdep.c
+
+scripts/split-include: scripts/split-include.c
+	$(HOSTCC) $(HOSTCFLAGS) -o scripts/split-include scripts/split-include.c
+
+#
+# RPM target
+#
+#	If you do a make spec before packing the tarball you can rpm -ta it
+#
+spec:
+	. scripts/mkspec >kernel.spec
+
+#
+#	Build a tar ball, generate an rpm from it and pack the result
+#	There arw two bits of magic here
+#	1) The use of /. to avoid tar packing just the symlink
+#	2) Removing the .dep files as they have source paths in them that
+#	   will become invalid
+#
+rpm:	clean spec
+	find . \( -size 0 -o -name .depend -o -name .hdepend \) -type f -print | xargs rm -f
+	set -e; \
+	cd $(TOPDIR)/.. ; \
+	ln -sf $(TOPDIR) $(KERNELPATH) ; \
+	tar -cvz --exclude CVS -f $(KERNELPATH).tar.gz $(KERNELPATH)/. ; \
+	rm $(KERNELPATH) ; \
+	cd $(TOPDIR) ; \
+	. scripts/mkversion > .version ; \
+	$(RPM) -ta $(TOPDIR)/../$(KERNELPATH).tar.gz ; \
+	rm $(TOPDIR)/../$(KERNELPATH).tar.gz
diff --git a/linux-2.4-xen-sparse/arch/xen/Makefile b/linux-2.4-xen-sparse/arch/xen/Makefile
new file mode 100644
index 0000000000..77da37bfaf
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/Makefile
@@ -0,0 +1,139 @@
+#
+# xen/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies. Remember to do have actions
+# for "archclean" and "archdep" for cleaning up and making dependencies for
+# this architecture
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Linus Torvalds
+#
+# 19990713  Artur Skawina <skawina@geocities.com>
+#           Added '-march' and '-mpreferred-stack-boundary' support
+#
+
+# If no .config file exists then use the appropriate defconfig-* file
+ifneq (.config,$(wildcard .config))
+DUMMYX:=$(shell cp $(TOPDIR)/arch/xen/defconfig$(EXTRAVERSION) $(TOPDIR)/.config)
+-include $(TOPDIR)/.config
+endif
+
+LD=$(CROSS_COMPILE)ld -m elf_i386
+OBJCOPY=$(CROSS_COMPILE)objcopy -R .note -R .comment -S
+LDFLAGS=-e stext
+LINKFLAGS =-T $(TOPDIR)/arch/xen/vmlinux.lds $(LDFLAGS)
+
+CFLAGS += -pipe
+
+check_gcc = $(shell if $(CC) $(1) -S -o /dev/null -xc /dev/null > /dev/null 2>&1; then echo "$(1)"; else echo "$(2)"; fi)
+
+# prevent gcc from keeping the stack 16 byte aligned
+CFLAGS += $(call check_gcc,-mpreferred-stack-boundary=2,)
+
+ifdef CONFIG_M686
+CFLAGS += -march=i686
+endif
+
+ifdef CONFIG_MPENTIUMIII
+CFLAGS += -march=i686
+endif
+
+ifdef CONFIG_MPENTIUM4
+CFLAGS += -march=i686
+endif
+
+ifdef CONFIG_MK7
+CFLAGS += $(call check_gcc,-march=athlon,-march=i686 -malign-functions=4)
+endif
+
+# Disable unit-at-a-time mode, it makes gcc use a lot more stack
+# due to the lack of sharing of stacklots.
+CFLAGS += $(call check_gcc,-fno-unit-at-a-time,)
+
+HEAD := arch/xen/kernel/head.o arch/xen/kernel/init_task.o
+
+SUBDIRS += arch/xen/kernel arch/xen/mm arch/xen/lib
+SUBDIRS += arch/xen/drivers/console 
+SUBDIRS += arch/xen/drivers/evtchn
+SUBDIRS += arch/xen/drivers/blkif
+SUBDIRS += arch/xen/drivers/netif
+SUBDIRS += arch/xen/drivers/usbif
+SUBDIRS += arch/xen/drivers/balloon
+ifdef CONFIG_XEN_PRIVILEGED_GUEST
+SUBDIRS += arch/xen/drivers/dom0 
+endif
+
+CORE_FILES += arch/xen/kernel/kernel.o arch/xen/mm/mm.o
+CORE_FILES += arch/xen/drivers/evtchn/drv.o
+CORE_FILES += arch/xen/drivers/console/drv.o
+DRIVERS += arch/xen/drivers/blkif/drv.o
+DRIVERS += arch/xen/drivers/netif/drv.o
+DRIVERS += arch/xen/drivers/usbif/drv.o
+ifdef CONFIG_XEN_PRIVILEGED_GUEST
+CORE_FILES += arch/xen/drivers/dom0/drv.o
+endif
+CORE_FILES += arch/xen/drivers/balloon/drv.o
+LIBS := $(TOPDIR)/arch/xen/lib/lib.a $(LIBS) $(TOPDIR)/arch/xen/lib/lib.a
+
+arch/xen/kernel: dummy
+	$(MAKE) linuxsubdirs SUBDIRS=arch/xen/kernel
+
+arch/xen/mm: dummy
+	$(MAKE) linuxsubdirs SUBDIRS=arch/xen/mm
+
+arch/xen/drivers/console: dummy
+	$(MAKE) linuxsubdirs SUBDIRS=arch/xen/drivers/console
+
+arch/xen/drivers/network: dummy
+	$(MAKE) linuxsubdirs SUBDIRS=arch/xen/drivers/network
+
+arch/xen/drivers/block: dummy
+	$(MAKE) linuxsubdirs SUBDIRS=arch/xen/drivers/block
+
+arch/xen/drivers/dom0: dummy
+	$(MAKE) linuxsubdirs SUBDIRS=arch/xen/drivers/dom0
+
+arch/xen/drivers/balloon: dummy
+	$(MAKE) linuxsubdirs SUBDIRS=arch/xen/drivers/balloon
+
+MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot
+
+vmlinux: arch/xen/vmlinux.lds
+
+FORCE: ;
+
+.PHONY: bzImage compressed clean archclean archmrproper archdep
+
+
+bzImage: vmlinux
+	@$(MAKEBOOT) bzImage
+
+INSTALL_NAME ?= $(KERNELRELEASE)
+install: bzImage
+	mkdir -p $(INSTALL_PATH)/boot
+	ln -f -s vmlinuz-$(INSTALL_NAME)$(INSTALL_SUFFIX) $(INSTALL_PATH)/boot/vmlinuz-$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(XENGUEST)$(INSTALL_SUFFIX)
+	rm -f $(INSTALL_PATH)/boot/vmlinuz-$(INSTALL_NAME)$(INSTALL_SUFFIX)
+	install -m0644 arch/$(ARCH)/boot/bzImage $(INSTALL_PATH)/boot/vmlinuz-$(INSTALL_NAME)$(INSTALL_SUFFIX)
+	install -m0644 vmlinux $(INSTALL_PATH)/boot/vmlinux-syms-$(INSTALL_NAME)$(INSTALL_SUFFIX)
+	install -m0664 .config $(INSTALL_PATH)/boot/config-$(INSTALL_NAME)$(INSTALL_SUFFIX)
+	install -m0664 System.map $(INSTALL_PATH)/boot/System.map-$(INSTALL_NAME)$(INSTALL_SUFFIX)
+	ln -f -s vmlinuz-$(INSTALL_NAME)$(INSTALL_SUFFIX) $(INSTALL_PATH)/boot/vmlinuz-$(VERSION).$(PATCHLEVEL)$(XENGUEST)$(INSTALL_SUFFIX)
+
+%_config: arch/xen/defconfig-%
+	rm -f .config arch/xen/defconfig
+	cp -f arch/xen/defconfig-$(@:_config=) arch/xen/defconfig
+	cp -f arch/xen/defconfig-$(@:_config=) .config
+
+
+archclean:
+	@$(MAKEBOOT) clean
+
+archmrproper:
+	rm -f include/asm-xen/xen-public/arch
+
+archdep:
+	@$(MAKEBOOT) dep
diff --git a/linux-2.4-xen-sparse/arch/xen/boot/Makefile b/linux-2.4-xen-sparse/arch/xen/boot/Makefile
new file mode 100644
index 0000000000..4da2909a79
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/boot/Makefile
@@ -0,0 +1,13 @@
+#
+# arch/xen/boot/Makefile
+#
+
+bzImage: $(TOPDIR)/vmlinux
+	$(OBJCOPY) $< Image
+	gzip -f -9 < Image > $@
+	rm -f Image
+
+dep:
+
+clean:
+	rm -f bzImage Image
diff --git a/linux-2.4-xen-sparse/arch/xen/config.in b/linux-2.4-xen-sparse/arch/xen/config.in
new file mode 100644
index 0000000000..23492fb5c8
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/config.in
@@ -0,0 +1,337 @@
+#
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/config-language.txt.
+#
+mainmenu_name "Linux Kernel Configuration"
+
+define_bool CONFIG_XEN y
+
+define_bool CONFIG_X86 y
+define_bool CONFIG_ISA y
+define_bool CONFIG_SBUS n
+
+define_bool CONFIG_UID16 y
+
+mainmenu_option next_comment
+comment 'Xen'
+bool 'Support for privileged operations (domain 0)' CONFIG_XEN_PRIVILEGED_GUEST
+bool 'Device-driver domain (physical device access)' CONFIG_XEN_PHYSDEV_ACCESS
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
+    bool 'USB-device backend driver' CONFIG_XEN_USB_BACKEND
+fi
+bool 'Scrub memory before freeing it to Xen' CONFIG_XEN_SCRUB_PAGES
+bool 'Network-device frontend driver' CONFIG_XEN_NETDEV_FRONTEND
+bool 'Block-device frontend driver' CONFIG_XEN_BLKDEV_FRONTEND
+bool 'Block-device uses grant tables' CONFIG_XEN_BLKDEV_GRANT
+bool 'USB-device frontend driver' CONFIG_XEN_USB_FRONTEND
+endmenu
+# The IBM S/390 patch needs this.
+define_bool CONFIG_NO_IDLE_HZ y
+
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
+   define_bool CONFIG_FOREIGN_PAGES y
+else
+   define_bool CONFIG_FOREIGN_PAGES n
+   define_bool CONFIG_NETDEVICES y
+   define_bool CONFIG_VT n
+fi
+
+mainmenu_option next_comment
+comment 'Code maturity level options'
+bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL
+endmenu
+
+mainmenu_option next_comment
+comment 'Loadable module support'
+bool 'Enable loadable module support' CONFIG_MODULES
+if [ "$CONFIG_MODULES" = "y" ]; then
+   bool '  Set version information on all module symbols' CONFIG_MODVERSIONS
+   bool '  Kernel module loader' CONFIG_KMOD
+fi
+endmenu
+
+mainmenu_option next_comment
+comment 'Processor type and features'
+choice 'Processor family' \
+	"Pentium-Pro/Celeron/Pentium-II		CONFIG_M686 \
+	 Pentium-III/Celeron(Coppermine)	CONFIG_MPENTIUMIII \
+	 Pentium-4				CONFIG_MPENTIUM4 \
+	 Athlon/Duron/K7			CONFIG_MK7 \
+	 Opteron/Athlon64/Hammer/K8             CONFIG_MK8 \
+	 VIA-C3-2                               CONFIG_MVIAC3_2" Pentium-Pro
+
+   define_bool CONFIG_X86_WP_WORKS_OK y
+   define_bool CONFIG_X86_INVLPG y
+   define_bool CONFIG_X86_CMPXCHG y
+   define_bool CONFIG_X86_XADD y
+   define_bool CONFIG_X86_BSWAP y
+   define_bool CONFIG_X86_POPAD_OK y
+   define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n
+   define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y
+
+   define_bool CONFIG_X86_GOOD_APIC y
+   define_bool CONFIG_X86_PGE y
+   define_bool CONFIG_X86_USE_PPRO_CHECKSUM y
+   define_bool CONFIG_X86_TSC y
+
+if [ "$CONFIG_M686" = "y" ]; then
+   define_int  CONFIG_X86_L1_CACHE_SHIFT 5
+fi
+if [ "$CONFIG_MPENTIUMIII" = "y" ]; then
+   define_int  CONFIG_X86_L1_CACHE_SHIFT 5
+fi
+if [ "$CONFIG_MPENTIUM4" = "y" ]; then
+   define_int  CONFIG_X86_L1_CACHE_SHIFT 7
+fi
+if [ "$CONFIG_MK8" = "y" ]; then
+   define_bool CONFIG_MK7 y
+fi
+if [ "$CONFIG_MK7" = "y" ]; then
+   define_int  CONFIG_X86_L1_CACHE_SHIFT 6
+   define_bool CONFIG_X86_USE_3DNOW y
+fi
+if [ "$CONFIG_MVIAC3_2" = "y" ]; then
+   define_int  CONFIG_X86_L1_CACHE_SHIFT 5
+fi
+
+#if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+#   tristate 'BIOS Enhanced Disk Drive calls determine boot disk (EXPERIMENTAL)' CONFIG_EDD
+#fi
+
+choice 'High Memory Support' \
+	"off    CONFIG_NOHIGHMEM \
+	 4GB    CONFIG_HIGHMEM4G" off
+#	 64GB   CONFIG_HIGHMEM64G" off
+if [ "$CONFIG_HIGHMEM4G" = "y" ]; then
+   define_bool CONFIG_HIGHMEM y
+fi
+if [ "$CONFIG_HIGHMEM64G" = "y" ]; then
+   define_bool CONFIG_HIGHMEM y
+   define_bool CONFIG_X86_PAE y
+fi
+
+if [ "$CONFIG_HIGHMEM" = "y" ]; then
+   bool 'HIGHMEM I/O support' CONFIG_HIGHIO
+fi
+
+define_int CONFIG_FORCE_MAX_ZONEORDER 11
+
+#bool 'Symmetric multi-processing support' CONFIG_SMP
+#if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
+#   define_bool CONFIG_HAVE_DEC_LOCK y
+#fi
+endmenu
+
+mainmenu_option next_comment
+comment 'General setup'
+
+bool 'Networking support' CONFIG_NET
+
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
+   bool 'PCI support' CONFIG_PCI
+   source drivers/pci/Config.in
+
+   bool 'Support for hot-pluggable devices' CONFIG_HOTPLUG
+
+   if [ "$CONFIG_HOTPLUG" = "y" ] ; then
+      source drivers/pcmcia/Config.in
+      source drivers/hotplug/Config.in
+   else
+      define_bool CONFIG_PCMCIA n
+      define_bool CONFIG_HOTPLUG_PCI n
+   fi
+fi
+
+bool 'System V IPC' CONFIG_SYSVIPC
+bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
+bool 'Sysctl support' CONFIG_SYSCTL
+if [ "$CONFIG_PROC_FS" = "y" ]; then
+   choice 'Kernel core (/proc/kcore) format' \
+	"ELF		CONFIG_KCORE_ELF	\
+	 A.OUT		CONFIG_KCORE_AOUT" ELF
+fi
+tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT
+bool 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF
+tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC
+bool 'Select task to kill on out of memory condition' CONFIG_OOM_KILLER
+
+endmenu
+
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
+   source drivers/mtd/Config.in
+
+   source drivers/parport/Config.in
+
+   source drivers/pnp/Config.in
+
+   source drivers/block/Config.in
+
+   source drivers/md/Config.in
+fi
+
+if [ "$CONFIG_NET" = "y" ]; then
+   source net/Config.in
+fi
+
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
+   mainmenu_option next_comment
+   comment 'ATA/IDE/MFM/RLL support'
+
+   tristate 'ATA/IDE/MFM/RLL support' CONFIG_IDE
+
+   if [ "$CONFIG_IDE" != "n" ]; then
+      source drivers/ide/Config.in
+   else
+      define_bool CONFIG_BLK_DEV_HD n
+   fi
+   endmenu
+fi
+
+mainmenu_option next_comment
+comment 'SCSI support'
+
+tristate 'SCSI support' CONFIG_SCSI
+
+if [ "$CONFIG_SCSI" != "n" ]; then
+   source drivers/scsi/Config.in
+fi
+endmenu
+
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
+   source drivers/message/fusion/Config.in
+
+   source drivers/ieee1394/Config.in
+
+   source drivers/message/i2o/Config.in
+
+   if [ "$CONFIG_NET" = "y" ]; then
+      mainmenu_option next_comment
+      comment 'Network device support'
+
+      bool 'Network device support' CONFIG_NETDEVICES
+      if [ "$CONFIG_NETDEVICES" = "y" ]; then
+         source drivers/net/Config.in
+         if [ "$CONFIG_ATM" = "y" -o "$CONFIG_ATM" = "m" ]; then
+            source drivers/atm/Config.in
+         fi
+      fi
+      endmenu
+   fi
+
+   source net/ax25/Config.in
+
+   source net/irda/Config.in
+
+   mainmenu_option next_comment
+   comment 'ISDN subsystem'
+   if [ "$CONFIG_NET" != "n" ]; then
+      tristate 'ISDN support' CONFIG_ISDN
+      if [ "$CONFIG_ISDN" != "n" ]; then
+         source drivers/isdn/Config.in
+      fi
+   fi
+   endmenu
+
+   if [ "$CONFIG_ISA" = "y" ]; then
+       mainmenu_option next_comment
+       comment 'Old CD-ROM drivers (not SCSI, not IDE)'
+   
+       bool 'Support non-SCSI/IDE/ATAPI CDROM drives' CONFIG_CD_NO_IDESCSI
+       if [ "$CONFIG_CD_NO_IDESCSI" != "n" ]; then
+          source drivers/cdrom/Config.in
+       fi
+       endmenu
+   fi
+
+   #
+   # input before char - char/joystick depends on it. As does USB.
+   #
+   source drivers/input/Config.in
+else
+   #
+   # Block device driver configuration
+   #
+   mainmenu_option next_comment
+   comment 'Block devices'
+   tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP
+   dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET
+   tristate 'RAM disk support' CONFIG_BLK_DEV_RAM
+   if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then
+      int '  Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096
+   fi
+   dep_bool '  Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM
+   bool 'Per partition statistics in /proc/partitions' CONFIG_BLK_STATS
+   define_bool CONFIG_BLK_DEV_HD n
+   endmenu
+fi
+
+source drivers/char/Config.in
+
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" -o "$CONFIG_XEN_USB_FRONTEND" = "y" ]; then
+   source drivers/media/Config.in
+fi
+
+source fs/Config.in
+
+mainmenu_option next_comment
+comment 'Console drivers'
+
+define_bool CONFIG_XEN_CONSOLE y
+
+if [ "$CONFIG_VT" = "y" ]; then
+   bool 'VGA text console' CONFIG_VGA_CONSOLE
+   bool 'Dummy console' CONFIG_DUMMY_CONSOLE 
+   if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
+      bool 'Video mode selection support' CONFIG_VIDEO_SELECT
+      if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+         tristate 'MDA text console (dual-headed) (EXPERIMENTAL)' CONFIG_MDA_CONSOLE
+         source drivers/video/Config.in
+      fi
+   fi
+fi
+endmenu
+
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
+   mainmenu_option next_comment
+   comment 'Sound'
+
+   tristate 'Sound card support' CONFIG_SOUND
+   if [ "$CONFIG_SOUND" != "n" ]; then
+      source drivers/sound/Config.in
+   fi
+   endmenu
+fi
+
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" -o "$CONFIG_XEN_USB_FRONTEND" = "y" ]; then
+   if [ "$CONFIG_XEN_USB_FRONTEND" = "y" -o "$CONFIG_XEN_USB_BACKEND" = "y" ]; then
+       define_bool CONFIG_USB y
+   fi
+   source drivers/usb/Config.in
+fi
+
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
+   source net/bluetooth/Config.in
+fi
+
+mainmenu_option next_comment
+comment 'Kernel hacking'
+
+bool 'Kernel debugging' CONFIG_DEBUG_KERNEL
+if [ "$CONFIG_DEBUG_KERNEL" != "n" ]; then
+   bool '  Check for stack overflows' CONFIG_DEBUG_STACKOVERFLOW
+   bool '  Debug high memory support' CONFIG_DEBUG_HIGHMEM
+   bool '  Debug memory allocations' CONFIG_DEBUG_SLAB
+   bool '  Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT
+   bool '  Magic SysRq key' CONFIG_MAGIC_SYSRQ
+   bool '  Spinlock debugging' CONFIG_DEBUG_SPINLOCK
+   bool '  Verbose BUG() reporting (adds 70K)' CONFIG_DEBUG_BUGVERBOSE
+   bool '  Load all symbols for debugging' CONFIG_KALLSYMS
+   bool '  Compile the kernel with frame pointers' CONFIG_FRAME_POINTER
+fi
+
+int 'Kernel messages buffer length shift (0 = default)' CONFIG_LOG_BUF_SHIFT 0
+
+endmenu
+
+source crypto/Config.in
+source lib/Config.in
diff --git a/linux-2.4-xen-sparse/arch/xen/defconfig-xen0 b/linux-2.4-xen-sparse/arch/xen/defconfig-xen0
new file mode 100644
index 0000000000..78e93e900b
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/defconfig-xen0
@@ -0,0 +1,927 @@
+#
+# Automatically generated by make menuconfig: don't edit
+#
+CONFIG_XEN=y
+CONFIG_X86=y
+CONFIG_ISA=y
+# CONFIG_SBUS is not set
+CONFIG_UID16=y
+
+#
+# Xen
+#
+CONFIG_XEN_PRIVILEGED_GUEST=y
+CONFIG_XEN_PHYSDEV_ACCESS=y
+# CONFIG_XEN_USB_BACKEND is not set
+CONFIG_XEN_SCRUB_PAGES=y
+CONFIG_XEN_NETDEV_FRONTEND=y
+CONFIG_XEN_BLKDEV_FRONTEND=y
+CONFIG_XEN_BLKDEV_GRANT=y
+# CONFIG_XEN_USB_FRONTEND is not set
+CONFIG_NO_IDLE_HZ=y
+CONFIG_FOREIGN_PAGES=y
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODVERSIONS=y
+CONFIG_KMOD=y
+
+#
+# Processor type and features
+#
+CONFIG_M686=y
+# CONFIG_MPENTIUMIII is not set
+# CONFIG_MPENTIUM4 is not set
+# CONFIG_MK7 is not set
+# CONFIG_MK8 is not set
+# CONFIG_MVIAC3_2 is not set
+CONFIG_X86_WP_WORKS_OK=y
+CONFIG_X86_INVLPG=y
+CONFIG_X86_CMPXCHG=y
+CONFIG_X86_XADD=y
+CONFIG_X86_BSWAP=y
+CONFIG_X86_POPAD_OK=y
+# CONFIG_RWSEM_GENERIC_SPINLOCK is not set
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_X86_GOOD_APIC=y
+CONFIG_X86_PGE=y
+CONFIG_X86_USE_PPRO_CHECKSUM=y
+CONFIG_X86_TSC=y
+CONFIG_X86_L1_CACHE_SHIFT=5
+CONFIG_NOHIGHMEM=y
+# CONFIG_HIGHMEM4G is not set
+CONFIG_FORCE_MAX_ZONEORDER=11
+
+#
+# General setup
+#
+CONFIG_NET=y
+CONFIG_PCI=y
+CONFIG_PCI_NAMES=y
+CONFIG_HOTPLUG=y
+
+#
+# PCMCIA/CardBus support
+#
+# CONFIG_PCMCIA is not set
+
+#
+# PCI Hotplug Support
+#
+# CONFIG_HOTPLUG_PCI is not set
+# CONFIG_HOTPLUG_PCI_COMPAQ is not set
+# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set
+# CONFIG_HOTPLUG_PCI_SHPC is not set
+# CONFIG_HOTPLUG_PCI_SHPC_POLL_EVENT_MODE is not set
+# CONFIG_HOTPLUG_PCI_PCIE is not set
+# CONFIG_HOTPLUG_PCI_PCIE_POLL_EVENT_MODE is not set
+CONFIG_SYSVIPC=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+CONFIG_KCORE_ELF=y
+# CONFIG_KCORE_AOUT is not set
+CONFIG_BINFMT_AOUT=y
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+# CONFIG_OOM_KILLER is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play configuration
+#
+CONFIG_PNP=y
+# CONFIG_ISAPNP is not set
+
+#
+# Block devices
+#
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_DEV_XD is not set
+# CONFIG_PARIDE is not set
+# CONFIG_BLK_CPQ_DA is not set
+# CONFIG_BLK_CPQ_CISS_DA is not set
+# CONFIG_CISS_SCSI_TAPE is not set
+# CONFIG_CISS_MONITOR_THREAD is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+# CONFIG_BLK_DEV_SX8 is not set
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=4096
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_BLK_STATS is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+# CONFIG_MD_RAID0 is not set
+CONFIG_MD_RAID1=y
+# CONFIG_MD_RAID5 is not set
+# CONFIG_MD_MULTIPATH is not set
+CONFIG_BLK_DEV_LVM=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+# CONFIG_NETLINK_DEV is not set
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_FILTER=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+# CONFIG_IP_PNP_BOOTP is not set
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_INET_ECN is not set
+# CONFIG_SYN_COOKIES is not set
+
+#
+#   IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=m
+CONFIG_IP_NF_FTP=m
+# CONFIG_IP_NF_AMANDA is not set
+CONFIG_IP_NF_TFTP=m
+CONFIG_IP_NF_IRC=m
+# CONFIG_IP_NF_QUEUE is not set
+CONFIG_IP_NF_IPTABLES=y
+# CONFIG_IP_NF_MATCH_LIMIT is not set
+# CONFIG_IP_NF_MATCH_MAC is not set
+# CONFIG_IP_NF_MATCH_PKTTYPE is not set
+# CONFIG_IP_NF_MATCH_MARK is not set
+# CONFIG_IP_NF_MATCH_MULTIPORT is not set
+# CONFIG_IP_NF_MATCH_TOS is not set
+# CONFIG_IP_NF_MATCH_RECENT is not set
+# CONFIG_IP_NF_MATCH_ECN is not set
+# CONFIG_IP_NF_MATCH_DSCP is not set
+# CONFIG_IP_NF_MATCH_AH_ESP is not set
+# CONFIG_IP_NF_MATCH_LENGTH is not set
+# CONFIG_IP_NF_MATCH_TTL is not set
+# CONFIG_IP_NF_MATCH_TCPMSS is not set
+# CONFIG_IP_NF_MATCH_HELPER is not set
+CONFIG_IP_NF_MATCH_STATE=m
+CONFIG_IP_NF_MATCH_CONNTRACK=m
+# CONFIG_IP_NF_MATCH_UNCLEAN is not set
+# CONFIG_IP_NF_MATCH_OWNER is not set
+CONFIG_IP_NF_MATCH_PHYSDEV=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+# CONFIG_IP_NF_TARGET_MIRROR is not set
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+# CONFIG_IP_NF_NAT_SNMP_BASIC is not set
+CONFIG_IP_NF_NAT_IRC=m
+CONFIG_IP_NF_NAT_FTP=m
+CONFIG_IP_NF_NAT_TFTP=m
+# CONFIG_IP_NF_MANGLE is not set
+CONFIG_IP_NF_TARGET_LOG=y
+CONFIG_IP_NF_TARGET_ULOG=y
+# CONFIG_IP_NF_TARGET_TCPMSS is not set
+# CONFIG_IP_NF_ARPTABLES is not set
+
+#
+#   IP: Virtual Server Configuration
+#
+# CONFIG_IP_VS is not set
+# CONFIG_IPV6 is not set
+# CONFIG_KHTTPD is not set
+
+#
+#    SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+CONFIG_VLAN_8021Q=y
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_DECNET is not set
+CONFIG_BRIDGE=y
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_LOG=m
+CONFIG_BRIDGE_EBT_IPF=m
+CONFIG_BRIDGE_EBT_ARPF=m
+CONFIG_BRIDGE_EBT_AMONG=m
+CONFIG_BRIDGE_EBT_LIMIT=m
+CONFIG_BRIDGE_EBT_VLANF=m
+CONFIG_BRIDGE_EBT_802_3=m
+CONFIG_BRIDGE_EBT_PKTTYPE=m
+CONFIG_BRIDGE_EBT_STP=m
+CONFIG_BRIDGE_EBT_MARKF=m
+CONFIG_BRIDGE_EBT_ARPREPLY=m
+CONFIG_BRIDGE_EBT_SNAT=m
+CONFIG_BRIDGE_EBT_DNAT=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_LLC is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_FASTROUTE is not set
+# CONFIG_NET_HW_FLOWCONTROL is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+
+#
+# ATA/IDE/MFM/RLL support
+#
+CONFIG_IDE=y
+
+#
+# IDE, ATA and ATAPI Block devices
+#
+CONFIG_BLK_DEV_IDE=y
+# CONFIG_BLK_DEV_HD_IDE is not set
+# CONFIG_BLK_DEV_HD is not set
+# CONFIG_BLK_DEV_IDE_SATA is not set
+CONFIG_BLK_DEV_IDEDISK=y
+CONFIG_IDEDISK_MULTI_MODE=y
+CONFIG_IDEDISK_STROKE=y
+# CONFIG_BLK_DEV_IDECS is not set
+# CONFIG_BLK_DEV_DELKIN is not set
+CONFIG_BLK_DEV_IDECD=y
+CONFIG_BLK_DEV_IDETAPE=y
+CONFIG_BLK_DEV_IDEFLOPPY=y
+CONFIG_BLK_DEV_IDESCSI=y
+CONFIG_IDE_TASK_IOCTL=y
+CONFIG_BLK_DEV_CMD640=y
+CONFIG_BLK_DEV_CMD640_ENHANCED=y
+# CONFIG_BLK_DEV_ISAPNP is not set
+CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_IDEPCI_SHARE_IRQ=y
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+CONFIG_BLK_DEV_OFFBOARD=y
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+# CONFIG_IDEDMA_ONLYDISK is not set
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_PCI_WIP is not set
+CONFIG_BLK_DEV_ADMA100=y
+CONFIG_BLK_DEV_AEC62XX=y
+CONFIG_BLK_DEV_ALI15X3=y
+CONFIG_WDC_ALI15X3=y
+CONFIG_BLK_DEV_AMD74XX=y
+CONFIG_AMD74XX_OVERRIDE=y
+# CONFIG_BLK_DEV_ATIIXP is not set
+CONFIG_BLK_DEV_CMD64X=y
+CONFIG_BLK_DEV_TRIFLEX=y
+CONFIG_BLK_DEV_CY82C693=y
+CONFIG_BLK_DEV_CS5530=y
+CONFIG_BLK_DEV_HPT34X=y
+# CONFIG_HPT34X_AUTODMA is not set
+CONFIG_BLK_DEV_HPT366=y
+CONFIG_BLK_DEV_PIIX=y
+CONFIG_BLK_DEV_NS87415=y
+# CONFIG_BLK_DEV_OPTI621 is not set
+CONFIG_BLK_DEV_PDC202XX_OLD=y
+CONFIG_PDC202XX_BURST=y
+CONFIG_BLK_DEV_PDC202XX_NEW=y
+CONFIG_PDC202XX_FORCE=y
+CONFIG_BLK_DEV_RZ1000=y
+CONFIG_BLK_DEV_SC1200=y
+CONFIG_BLK_DEV_SVWKS=y
+CONFIG_BLK_DEV_SIIMAGE=y
+CONFIG_BLK_DEV_SIS5513=y
+CONFIG_BLK_DEV_SLC90E66=y
+CONFIG_BLK_DEV_TRM290=y
+CONFIG_BLK_DEV_VIA82CXXX=y
+CONFIG_IDE_CHIPSETS=y
+# CONFIG_BLK_DEV_4DRIVES is not set
+# CONFIG_BLK_DEV_ALI14XX is not set
+# CONFIG_BLK_DEV_DTC2278 is not set
+# CONFIG_BLK_DEV_HT6560B is not set
+# CONFIG_BLK_DEV_PDC4030 is not set
+# CONFIG_BLK_DEV_QD65XX is not set
+# CONFIG_BLK_DEV_UMC8672 is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_IDEDMA_IVB is not set
+# CONFIG_DMA_NONPCI is not set
+CONFIG_BLK_DEV_PDC202XX=y
+# CONFIG_BLK_DEV_ATARAID is not set
+# CONFIG_BLK_DEV_ATARAID_PDC is not set
+# CONFIG_BLK_DEV_ATARAID_HPT is not set
+# CONFIG_BLK_DEV_ATARAID_MEDLEY is not set
+# CONFIG_BLK_DEV_ATARAID_SII is not set
+
+#
+# SCSI support
+#
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_SD_EXTRA_DEVS=40
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+CONFIG_CHR_DEV_SG=y
+# CONFIG_SCSI_DEBUG_QUEUES is not set
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+
+#
+# SCSI low-level drivers
+#
+# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
+# CONFIG_SCSI_7000FASST is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AHA152X is not set
+# CONFIG_SCSI_AHA1542 is not set
+# CONFIG_SCSI_AHA1740 is not set
+CONFIG_SCSI_AACRAID=y
+CONFIG_SCSI_AIC7XXX=y
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_RESET_DELAY_MS=15000
+# CONFIG_AIC7XXX_PROBE_EISA_VL is not set
+# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set
+# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
+CONFIG_AIC7XXX_DEBUG_MASK=0
+# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
+CONFIG_SCSI_AIC79XX=y
+CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_RESET_DELAY_MS=15000
+# CONFIG_AIC79XX_BUILD_FIRMWARE is not set
+# CONFIG_AIC79XX_ENABLE_RD_STRM is not set
+# CONFIG_AIC79XX_DEBUG_ENABLE is not set
+CONFIG_AIC79XX_DEBUG_MASK=0
+# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
+# CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_SCSI_ADVANSYS is not set
+# CONFIG_SCSI_IN2000 is not set
+# CONFIG_SCSI_AM53C974 is not set
+CONFIG_SCSI_MEGARAID=y
+# CONFIG_SCSI_MEGARAID2 is not set
+CONFIG_SCSI_SATA=y
+# CONFIG_SCSI_SATA_AHCI is not set
+# CONFIG_SCSI_SATA_SVW is not set
+CONFIG_SCSI_ATA_PIIX=y
+# CONFIG_SCSI_SATA_NV is not set
+# CONFIG_SCSI_SATA_QSTOR is not set
+CONFIG_SCSI_SATA_PROMISE=y
+CONFIG_SCSI_SATA_SX4=y
+CONFIG_SCSI_SATA_SIL=y
+CONFIG_SCSI_SATA_SIS=y
+# CONFIG_SCSI_SATA_ULI is not set
+CONFIG_SCSI_SATA_VIA=y
+CONFIG_SCSI_SATA_VITESSE=y
+CONFIG_SCSI_BUSLOGIC=y
+# CONFIG_SCSI_OMIT_FLASHPOINT is not set
+# CONFIG_SCSI_CPQFCTS is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_DTC3280 is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_EATA_DMA is not set
+# CONFIG_SCSI_EATA_PIO is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+# CONFIG_SCSI_GDTH is not set
+# CONFIG_SCSI_GENERIC_NCR5380 is not set
+# CONFIG_SCSI_IPS is not set
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_NCR53C406A is not set
+# CONFIG_SCSI_NCR53C7xx is not set
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
+# CONFIG_SCSI_PAS16 is not set
+# CONFIG_SCSI_PCI2000 is not set
+# CONFIG_SCSI_PCI2220I is not set
+# CONFIG_SCSI_PSI240I is not set
+# CONFIG_SCSI_QLOGIC_FAS is not set
+# CONFIG_SCSI_QLOGIC_ISP is not set
+# CONFIG_SCSI_QLOGIC_FC is not set
+# CONFIG_SCSI_QLOGIC_1280 is not set
+# CONFIG_SCSI_SEAGATE is not set
+# CONFIG_SCSI_SIM710 is not set
+# CONFIG_SCSI_SYM53C416 is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_T128 is not set
+# CONFIG_SCSI_U14_34F is not set
+# CONFIG_SCSI_ULTRASTOR is not set
+# CONFIG_SCSI_NSP32 is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Fusion MPT device support
+#
+# CONFIG_FUSION is not set
+# CONFIG_FUSION_BOOT is not set
+# CONFIG_FUSION_ISENSE is not set
+# CONFIG_FUSION_CTL is not set
+# CONFIG_FUSION_LAN is not set
+
+#
+# IEEE 1394 (FireWire) support (EXPERIMENTAL)
+#
+# CONFIG_IEEE1394 is not set
+
+#
+# I2O device support
+#
+# CONFIG_I2O is not set
+# CONFIG_I2O_PCI is not set
+# CONFIG_I2O_BLOCK is not set
+# CONFIG_I2O_LAN is not set
+# CONFIG_I2O_SCSI is not set
+# CONFIG_I2O_PROC is not set
+
+#
+# Network device support
+#
+CONFIG_NETDEVICES=y
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_ETHERTAP is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+# CONFIG_SUNLANCE is not set
+# CONFIG_HAPPYMEAL is not set
+# CONFIG_SUNBMAC is not set
+# CONFIG_SUNQE is not set
+# CONFIG_SUNGEM is not set
+CONFIG_NET_VENDOR_3COM=y
+# CONFIG_EL1 is not set
+# CONFIG_EL2 is not set
+# CONFIG_ELPLUS is not set
+# CONFIG_EL16 is not set
+# CONFIG_EL3 is not set
+# CONFIG_3C515 is not set
+# CONFIG_ELMC is not set
+# CONFIG_ELMC_II is not set
+CONFIG_VORTEX=y
+# CONFIG_TYPHOON is not set
+# CONFIG_LANCE is not set
+# CONFIG_NET_VENDOR_SMC is not set
+# CONFIG_NET_VENDOR_RACAL is not set
+# CONFIG_AT1700 is not set
+# CONFIG_DEPCA is not set
+# CONFIG_HP100 is not set
+# CONFIG_NET_ISA is not set
+CONFIG_NET_PCI=y
+CONFIG_PCNET32=y
+# CONFIG_AMD8111_ETH is not set
+# CONFIG_ADAPTEC_STARFIRE is not set
+# CONFIG_AC3200 is not set
+# CONFIG_APRICOT is not set
+# CONFIG_B44 is not set
+# CONFIG_CS89x0 is not set
+# CONFIG_TULIP is not set
+# CONFIG_DE4X5 is not set
+# CONFIG_DGRS is not set
+# CONFIG_DM9102 is not set
+# CONFIG_EEPRO100 is not set
+# CONFIG_EEPRO100_PIO is not set
+CONFIG_E100=y
+# CONFIG_LNE390 is not set
+# CONFIG_FEALNX is not set
+# CONFIG_NATSEMI is not set
+CONFIG_NE2K_PCI=y
+# CONFIG_FORCEDETH is not set
+# CONFIG_NE3210 is not set
+# CONFIG_ES3210 is not set
+# CONFIG_8139CP is not set
+# CONFIG_8139TOO is not set
+# CONFIG_8139TOO_PIO is not set
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+# CONFIG_8139TOO_8129 is not set
+# CONFIG_8139_OLD_RX_RESET is not set
+# CONFIG_SIS900 is not set
+# CONFIG_EPIC100 is not set
+# CONFIG_SUNDANCE is not set
+# CONFIG_SUNDANCE_MMIO is not set
+# CONFIG_TLAN is not set
+# CONFIG_VIA_RHINE is not set
+# CONFIG_VIA_RHINE_MMIO is not set
+# CONFIG_WINBOND_840 is not set
+# CONFIG_NET_POCKET is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+# CONFIG_ACENIC is not set
+# CONFIG_DL2K is not set
+CONFIG_E1000=y
+# CONFIG_E1000_NAPI is not set
+# CONFIG_MYRI_SBUS is not set
+# CONFIG_NS83820 is not set
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+# CONFIG_R8169 is not set
+# CONFIG_SK98LIN is not set
+CONFIG_TIGON3=y
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PLIP is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Token Ring devices
+#
+# CONFIG_TR is not set
+# CONFIG_NET_FC is not set
+# CONFIG_RCPCI is not set
+# CONFIG_SHAPER is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+
+#
+# Amateur Radio support
+#
+# CONFIG_HAMRADIO is not set
+
+#
+# IrDA (infrared) support
+#
+# CONFIG_IRDA is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Old CD-ROM drivers (not SCSI, not IDE)
+#
+# CONFIG_CD_NO_IDESCSI is not set
+
+#
+# Input core support
+#
+# CONFIG_INPUT is not set
+# CONFIG_INPUT_KEYBDEV is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_UINPUT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+# CONFIG_SERIAL is not set
+# CONFIG_SERIAL_EXTENDED is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+CONFIG_UNIX98_PTYS=y
+CONFIG_UNIX98_PTY_COUNT=256
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Mice
+#
+# CONFIG_BUSMOUSE is not set
+CONFIG_MOUSE=y
+CONFIG_PSMOUSE=y
+# CONFIG_82C710_MOUSE is not set
+# CONFIG_PC110_PAD is not set
+# CONFIG_MK712_MOUSE is not set
+
+#
+# Joysticks
+#
+# CONFIG_INPUT_GAMEPORT is not set
+# CONFIG_QIC02_TAPE is not set
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_IPMI_PANIC_EVENT is not set
+# CONFIG_IPMI_DEVICE_INTERFACE is not set
+# CONFIG_IPMI_KCS is not set
+# CONFIG_IPMI_WATCHDOG is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_SCx200 is not set
+# CONFIG_SCx200_GPIO is not set
+# CONFIG_AMD_RNG is not set
+# CONFIG_INTEL_RNG is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_AMD_PM768 is not set
+# CONFIG_NVRAM is not set
+# CONFIG_RTC is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+# CONFIG_SONYPI is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_FTAPE is not set
+# CONFIG_AGP is not set
+
+#
+# Direct Rendering Manager (XFree86 DRI support)
+#
+# CONFIG_DRM is not set
+# CONFIG_MWAVE is not set
+# CONFIG_OBMOUSE is not set
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# File systems
+#
+# CONFIG_QUOTA is not set
+# CONFIG_QFMT_V2 is not set
+CONFIG_AUTOFS_FS=y
+CONFIG_AUTOFS4_FS=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_REISERFS_CHECK is not set
+# CONFIG_REISERFS_PROC_INFO is not set
+# CONFIG_ADFS_FS is not set
+# CONFIG_ADFS_FS_RW is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BEFS_DEBUG is not set
+# CONFIG_BFS_FS is not set
+CONFIG_EXT3_FS=y
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FAT_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_UMSDOS_FS=y
+CONFIG_VFAT_FS=y
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS_FS is not set
+# CONFIG_JFFS2_FS is not set
+# CONFIG_CRAMFS is not set
+CONFIG_TMPFS=y
+CONFIG_RAMFS=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+# CONFIG_JFS_FS is not set
+# CONFIG_JFS_DEBUG is not set
+# CONFIG_JFS_STATISTICS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_NTFS_FS is not set
+# CONFIG_NTFS_RW is not set
+# CONFIG_HPFS_FS is not set
+CONFIG_PROC_FS=y
+# CONFIG_DEVFS_FS is not set
+# CONFIG_DEVFS_MOUNT is not set
+# CONFIG_DEVFS_DEBUG is not set
+CONFIG_DEVPTS_FS=y
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_QNX4FS_RW is not set
+# CONFIG_ROMFS_FS is not set
+CONFIG_EXT2_FS=y
+# CONFIG_SYSV_FS is not set
+# CONFIG_UDF_FS is not set
+# CONFIG_UDF_RW is not set
+# CONFIG_UFS_FS is not set
+# CONFIG_UFS_FS_WRITE is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_XFS_QUOTA is not set
+# CONFIG_XFS_RT is not set
+# CONFIG_XFS_TRACE is not set
+# CONFIG_XFS_DEBUG is not set
+
+#
+# Network File Systems
+#
+# CONFIG_CODA_FS is not set
+# CONFIG_INTERMEZZO_FS is not set
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_DIRECTIO is not set
+CONFIG_ROOT_NFS=y
+CONFIG_NFSD=y
+CONFIG_NFSD_V3=y
+# CONFIG_NFSD_TCP is not set
+CONFIG_SUNRPC=y
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+# CONFIG_SMB_FS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_NCPFS_PACKET_SIGNING is not set
+# CONFIG_NCPFS_IOCTL_LOCKING is not set
+# CONFIG_NCPFS_STRONG is not set
+# CONFIG_NCPFS_NFS_NS is not set
+# CONFIG_NCPFS_OS2_NS is not set
+# CONFIG_NCPFS_SMALLDOS is not set
+# CONFIG_NCPFS_NLS is not set
+# CONFIG_NCPFS_EXTRAS is not set
+CONFIG_ZISOFS_FS=y
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+# CONFIG_EFI_PARTITION is not set
+# CONFIG_SMB_NLS is not set
+CONFIG_NLS=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS_DEFAULT="iso8559-1"
+# CONFIG_NLS_CODEPAGE_437 is not set
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+
+#
+# Console drivers
+#
+CONFIG_XEN_CONSOLE=y
+CONFIG_VGA_CONSOLE=y
+CONFIG_DUMMY_CONSOLE=y
+# CONFIG_VIDEO_SELECT is not set
+# CONFIG_MDA_CONSOLE is not set
+
+#
+# Frame-buffer support
+#
+# CONFIG_FB is not set
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+# CONFIG_USB is not set
+
+#
+# Support for USB gadgets
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# Bluetooth support
+#
+# CONFIG_BLUEZ is not set
+
+#
+# Kernel hacking
+#
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_IOVIRT is not set
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_KALLSYMS=y
+# CONFIG_FRAME_POINTER is not set
+CONFIG_LOG_BUF_SHIFT=0
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=m
+CONFIG_CRYPTO_SHA1=m
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+# CONFIG_CRYPTO_WP512 is not set
+CONFIG_CRYPTO_DES=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_AES=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_DEFLATE=m
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Library routines
+#
+# CONFIG_CRC32 is not set
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=m
+# CONFIG_FW_LOADER is not set
diff --git a/linux-2.4-xen-sparse/arch/xen/defconfig-xenU b/linux-2.4-xen-sparse/arch/xen/defconfig-xenU
new file mode 100644
index 0000000000..3640bfc19b
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/defconfig-xenU
@@ -0,0 +1,562 @@
+#
+# Automatically generated make config: don't edit
+#
+CONFIG_XEN=y
+CONFIG_X86=y
+CONFIG_ISA=y
+# CONFIG_SBUS is not set
+CONFIG_UID16=y
+
+#
+# Xen
+#
+# CONFIG_XEN_PRIVILEGED_GUEST is not set
+# CONFIG_XEN_PHYSDEV_ACCESS is not set
+CONFIG_XEN_SCRUB_PAGES=y
+CONFIG_XEN_NETDEV_FRONTEND=y
+CONFIG_XEN_BLKDEV_FRONTEND=y
+CONFIG_XEN_BLKDEV_GRANT=y
+# CONFIG_XEN_USB_FRONTEND is not set
+CONFIG_NO_IDLE_HZ=y
+# CONFIG_FOREIGN_PAGES is not set
+CONFIG_NETDEVICES=y
+# CONFIG_VT is not set
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODVERSIONS=y
+CONFIG_KMOD=y
+
+#
+# Processor type and features
+#
+CONFIG_M686=y
+# CONFIG_MPENTIUMIII is not set
+# CONFIG_MPENTIUM4 is not set
+# CONFIG_MK7 is not set
+# CONFIG_MK8 is not set
+# CONFIG_MVIAC3_2 is not set
+CONFIG_X86_WP_WORKS_OK=y
+CONFIG_X86_INVLPG=y
+CONFIG_X86_CMPXCHG=y
+CONFIG_X86_XADD=y
+CONFIG_X86_BSWAP=y
+CONFIG_X86_POPAD_OK=y
+# CONFIG_RWSEM_GENERIC_SPINLOCK is not set
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_X86_GOOD_APIC=y
+CONFIG_X86_PGE=y
+CONFIG_X86_USE_PPRO_CHECKSUM=y
+CONFIG_X86_TSC=y
+CONFIG_X86_L1_CACHE_SHIFT=5
+CONFIG_NOHIGHMEM=y
+# CONFIG_HIGHMEM4G is not set
+CONFIG_FORCE_MAX_ZONEORDER=11
+
+#
+# General setup
+#
+CONFIG_NET=y
+CONFIG_SYSVIPC=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+CONFIG_KCORE_ELF=y
+# CONFIG_KCORE_AOUT is not set
+CONFIG_BINFMT_AOUT=y
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+# CONFIG_OOM_KILLER is not set
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+# CONFIG_NETLINK_DEV is not set
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_FILTER=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+# CONFIG_IP_PNP_BOOTP is not set
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_INET_ECN is not set
+# CONFIG_SYN_COOKIES is not set
+
+#
+#   IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=y
+CONFIG_IP_NF_FTP=y
+# CONFIG_IP_NF_AMANDA is not set
+CONFIG_IP_NF_TFTP=y
+CONFIG_IP_NF_IRC=y
+# CONFIG_IP_NF_QUEUE is not set
+CONFIG_IP_NF_IPTABLES=y
+# CONFIG_IP_NF_MATCH_LIMIT is not set
+# CONFIG_IP_NF_MATCH_MAC is not set
+# CONFIG_IP_NF_MATCH_PKTTYPE is not set
+# CONFIG_IP_NF_MATCH_MARK is not set
+# CONFIG_IP_NF_MATCH_MULTIPORT is not set
+# CONFIG_IP_NF_MATCH_TOS is not set
+# CONFIG_IP_NF_MATCH_RECENT is not set
+# CONFIG_IP_NF_MATCH_ECN is not set
+# CONFIG_IP_NF_MATCH_DSCP is not set
+# CONFIG_IP_NF_MATCH_AH_ESP is not set
+# CONFIG_IP_NF_MATCH_LENGTH is not set
+# CONFIG_IP_NF_MATCH_TTL is not set
+# CONFIG_IP_NF_MATCH_TCPMSS is not set
+# CONFIG_IP_NF_MATCH_HELPER is not set
+CONFIG_IP_NF_MATCH_STATE=y
+CONFIG_IP_NF_MATCH_CONNTRACK=y
+# CONFIG_IP_NF_MATCH_UNCLEAN is not set
+# CONFIG_IP_NF_MATCH_OWNER is not set
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+# CONFIG_IP_NF_TARGET_MIRROR is not set
+CONFIG_IP_NF_NAT=y
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=y
+CONFIG_IP_NF_TARGET_REDIRECT=y
+# CONFIG_IP_NF_NAT_SNMP_BASIC is not set
+CONFIG_IP_NF_NAT_IRC=y
+CONFIG_IP_NF_NAT_FTP=y
+CONFIG_IP_NF_NAT_TFTP=y
+# CONFIG_IP_NF_MANGLE is not set
+CONFIG_IP_NF_TARGET_LOG=y
+CONFIG_IP_NF_TARGET_ULOG=y
+# CONFIG_IP_NF_TARGET_TCPMSS is not set
+# CONFIG_IP_NF_ARPTABLES is not set
+
+#
+#   IP: Virtual Server Configuration
+#
+# CONFIG_IP_VS is not set
+# CONFIG_IPV6 is not set
+# CONFIG_KHTTPD is not set
+
+#
+#    SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+CONFIG_VLAN_8021Q=y
+
+#
+#  
+#
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_DECNET is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_LLC is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_FASTROUTE is not set
+# CONFIG_NET_HW_FLOWCONTROL is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+
+#
+# SCSI support
+#
+CONFIG_SCSI=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+CONFIG_SD_EXTRA_DEVS=40
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+CONFIG_CHR_DEV_SG=y
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_DEBUG_QUEUES is not set
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+
+#
+# SCSI low-level drivers
+#
+# CONFIG_SCSI_7000FASST is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AHA152X is not set
+# CONFIG_SCSI_AHA1542 is not set
+# CONFIG_SCSI_AHA1740 is not set
+# CONFIG_SCSI_AACRAID is not set
+# CONFIG_SCSI_AIC7XXX is not set
+# CONFIG_SCSI_AIC79XX is not set
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+# CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_SCSI_ADVANSYS is not set
+# CONFIG_SCSI_IN2000 is not set
+# CONFIG_SCSI_AM53C974 is not set
+# CONFIG_SCSI_MEGARAID is not set
+# CONFIG_SCSI_MEGARAID2 is not set
+# CONFIG_SCSI_SATA is not set
+# CONFIG_SCSI_SATA_AHCI is not set
+# CONFIG_SCSI_SATA_SVW is not set
+# CONFIG_SCSI_ATA_PIIX is not set
+# CONFIG_SCSI_SATA_NV is not set
+# CONFIG_SCSI_SATA_QSTOR is not set
+# CONFIG_SCSI_SATA_PROMISE is not set
+# CONFIG_SCSI_SATA_SX4 is not set
+# CONFIG_SCSI_SATA_SIL is not set
+# CONFIG_SCSI_SATA_SIS is not set
+# CONFIG_SCSI_SATA_ULI is not set
+# CONFIG_SCSI_SATA_VIA is not set
+# CONFIG_SCSI_SATA_VITESSE is not set
+# CONFIG_SCSI_BUSLOGIC is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_DTC3280 is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_EATA_DMA is not set
+# CONFIG_SCSI_EATA_PIO is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+# CONFIG_SCSI_GDTH is not set
+# CONFIG_SCSI_GENERIC_NCR5380 is not set
+# CONFIG_SCSI_IPS is not set
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_PPA is not set
+# CONFIG_SCSI_IMM is not set
+# CONFIG_SCSI_NCR53C406A is not set
+# CONFIG_SCSI_NCR53C7xx is not set
+# CONFIG_SCSI_PAS16 is not set
+# CONFIG_SCSI_PCI2000 is not set
+# CONFIG_SCSI_PCI2220I is not set
+# CONFIG_SCSI_PSI240I is not set
+# CONFIG_SCSI_QLOGIC_FAS is not set
+# CONFIG_SCSI_SEAGATE is not set
+# CONFIG_SCSI_SIM710 is not set
+# CONFIG_SCSI_SYM53C416 is not set
+# CONFIG_SCSI_T128 is not set
+# CONFIG_SCSI_U14_34F is not set
+# CONFIG_SCSI_ULTRASTOR is not set
+# CONFIG_SCSI_NSP32 is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Block devices
+#
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=4096
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_BLK_STATS is not set
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_SERIAL is not set
+# CONFIG_SERIAL_EXTENDED is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+CONFIG_UNIX98_PTYS=y
+CONFIG_UNIX98_PTY_COUNT=256
+# CONFIG_PRINTER is not set
+# CONFIG_PPDEV is not set
+# CONFIG_TIPAR is not set
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Mice
+#
+# CONFIG_BUSMOUSE is not set
+CONFIG_MOUSE=y
+CONFIG_PSMOUSE=y
+# CONFIG_82C710_MOUSE is not set
+# CONFIG_PC110_PAD is not set
+# CONFIG_MK712_MOUSE is not set
+
+#
+# Joysticks
+#
+# CONFIG_INPUT_GAMEPORT is not set
+# CONFIG_INPUT_NS558 is not set
+# CONFIG_INPUT_LIGHTNING is not set
+# CONFIG_INPUT_PCIGAME is not set
+# CONFIG_INPUT_CS461X is not set
+# CONFIG_INPUT_EMU10K1 is not set
+# CONFIG_INPUT_SERIO is not set
+# CONFIG_INPUT_SERPORT is not set
+
+#
+# Joysticks
+#
+# CONFIG_INPUT_ANALOG is not set
+# CONFIG_INPUT_A3D is not set
+# CONFIG_INPUT_ADI is not set
+# CONFIG_INPUT_COBRA is not set
+# CONFIG_INPUT_GF2K is not set
+# CONFIG_INPUT_GRIP is not set
+# CONFIG_INPUT_INTERACT is not set
+# CONFIG_INPUT_TMDC is not set
+# CONFIG_INPUT_SIDEWINDER is not set
+# CONFIG_INPUT_IFORCE_USB is not set
+# CONFIG_INPUT_IFORCE_232 is not set
+# CONFIG_INPUT_WARRIOR is not set
+# CONFIG_INPUT_MAGELLAN is not set
+# CONFIG_INPUT_SPACEORB is not set
+# CONFIG_INPUT_SPACEBALL is not set
+# CONFIG_INPUT_STINGER is not set
+# CONFIG_INPUT_DB9 is not set
+# CONFIG_INPUT_GAMECON is not set
+# CONFIG_INPUT_TURBOGRAFX is not set
+# CONFIG_QIC02_TAPE is not set
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_IPMI_PANIC_EVENT is not set
+# CONFIG_IPMI_DEVICE_INTERFACE is not set
+# CONFIG_IPMI_KCS is not set
+# CONFIG_IPMI_WATCHDOG is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_SCx200 is not set
+# CONFIG_SCx200_GPIO is not set
+# CONFIG_AMD_RNG is not set
+# CONFIG_INTEL_RNG is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_AMD_PM768 is not set
+# CONFIG_NVRAM is not set
+# CONFIG_RTC is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+# CONFIG_SONYPI is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_FTAPE is not set
+# CONFIG_AGP is not set
+
+#
+# Direct Rendering Manager (XFree86 DRI support)
+#
+# CONFIG_DRM is not set
+# CONFIG_MWAVE is not set
+# CONFIG_OBMOUSE is not set
+
+#
+# File systems
+#
+# CONFIG_QUOTA is not set
+# CONFIG_QFMT_V2 is not set
+CONFIG_AUTOFS_FS=y
+CONFIG_AUTOFS4_FS=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_REISERFS_CHECK is not set
+# CONFIG_REISERFS_PROC_INFO is not set
+# CONFIG_ADFS_FS is not set
+# CONFIG_ADFS_FS_RW is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BEFS_DEBUG is not set
+# CONFIG_BFS_FS is not set
+CONFIG_EXT3_FS=y
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FAT_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_UMSDOS_FS=y
+CONFIG_VFAT_FS=y
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS_FS is not set
+# CONFIG_JFFS2_FS is not set
+# CONFIG_CRAMFS is not set
+CONFIG_TMPFS=y
+CONFIG_RAMFS=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+# CONFIG_JFS_FS is not set
+# CONFIG_JFS_DEBUG is not set
+# CONFIG_JFS_STATISTICS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_NTFS_FS is not set
+# CONFIG_NTFS_RW is not set
+# CONFIG_HPFS_FS is not set
+CONFIG_PROC_FS=y
+# CONFIG_DEVFS_FS is not set
+# CONFIG_DEVFS_MOUNT is not set
+# CONFIG_DEVFS_DEBUG is not set
+CONFIG_DEVPTS_FS=y
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_QNX4FS_RW is not set
+# CONFIG_ROMFS_FS is not set
+CONFIG_EXT2_FS=y
+# CONFIG_SYSV_FS is not set
+# CONFIG_UDF_FS is not set
+# CONFIG_UDF_RW is not set
+# CONFIG_UFS_FS is not set
+# CONFIG_UFS_FS_WRITE is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_XFS_QUOTA is not set
+# CONFIG_XFS_RT is not set
+# CONFIG_XFS_TRACE is not set
+# CONFIG_XFS_DEBUG is not set
+
+#
+# Network File Systems
+#
+# CONFIG_CODA_FS is not set
+# CONFIG_INTERMEZZO_FS is not set
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_DIRECTIO is not set
+CONFIG_ROOT_NFS=y
+CONFIG_NFSD=y
+CONFIG_NFSD_V3=y
+# CONFIG_NFSD_TCP is not set
+CONFIG_SUNRPC=y
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+# CONFIG_SMB_FS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_NCPFS_PACKET_SIGNING is not set
+# CONFIG_NCPFS_IOCTL_LOCKING is not set
+# CONFIG_NCPFS_STRONG is not set
+# CONFIG_NCPFS_NFS_NS is not set
+# CONFIG_NCPFS_OS2_NS is not set
+# CONFIG_NCPFS_SMALLDOS is not set
+# CONFIG_NCPFS_NLS is not set
+# CONFIG_NCPFS_EXTRAS is not set
+CONFIG_ZISOFS_FS=y
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+# CONFIG_EFI_PARTITION is not set
+# CONFIG_SMB_NLS is not set
+CONFIG_NLS=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS_DEFAULT="iso8559-1"
+# CONFIG_NLS_CODEPAGE_437 is not set
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+
+#
+# Console drivers
+#
+CONFIG_XEN_CONSOLE=y
+
+#
+# Kernel hacking
+#
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_IOVIRT is not set
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_KALLSYMS=y
+# CONFIG_FRAME_POINTER is not set
+CONFIG_LOG_BUF_SHIFT=0
+
+#
+# Cryptographic options
+#
+# CONFIG_CRYPTO is not set
+
+#
+# Library routines
+#
+# CONFIG_CRC32 is not set
+CONFIG_ZLIB_INFLATE=y
+# CONFIG_ZLIB_DEFLATE is not set
diff --git a/linux-2.4-xen-sparse/arch/xen/drivers/balloon/Makefile b/linux-2.4-xen-sparse/arch/xen/drivers/balloon/Makefile
new file mode 100644
index 0000000000..c97a95a522
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/drivers/balloon/Makefile
@@ -0,0 +1,4 @@
+O_TARGET := drv.o
+export-objs := balloon.o
+obj-y := balloon.o
+include $(TOPDIR)/Rules.make
diff --git a/linux-2.4-xen-sparse/arch/xen/drivers/blkif/Makefile b/linux-2.4-xen-sparse/arch/xen/drivers/blkif/Makefile
new file mode 100644
index 0000000000..7f1b4a98fb
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/drivers/blkif/Makefile
@@ -0,0 +1,10 @@
+
+O_TARGET := drv.o
+
+subdir-$(CONFIG_XEN_BLKDEV_FRONTEND) += frontend
+obj-$(CONFIG_XEN_BLKDEV_FRONTEND)    += frontend/drv.o
+
+subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend
+obj-$(CONFIG_XEN_PHYSDEV_ACCESS)    += backend/drv.o
+
+include $(TOPDIR)/Rules.make
diff --git a/linux-2.4-xen-sparse/arch/xen/drivers/blkif/backend/Makefile b/linux-2.4-xen-sparse/arch/xen/drivers/blkif/backend/Makefile
new file mode 100644
index 0000000000..4c8c17367c
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/drivers/blkif/backend/Makefile
@@ -0,0 +1,3 @@
+O_TARGET := drv.o
+obj-y := main.o control.o interface.o vbd.o
+include $(TOPDIR)/Rules.make
diff --git a/linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/Makefile b/linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/Makefile
new file mode 100644
index 0000000000..89ba879ed4
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/Makefile
@@ -0,0 +1,3 @@
+O_TARGET := drv.o
+obj-y := blkfront.o vbd.o
+include $(TOPDIR)/Rules.make
diff --git a/linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/common.h b/linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/common.h
new file mode 100644
index 0000000000..88c27238a7
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/common.h
@@ -0,0 +1,93 @@
+/******************************************************************************
+ * arch/xen/drivers/blkif/frontend/common.h
+ * 
+ * Shared definitions between all levels of XenoLinux Virtual block devices.
+ */
+
+#ifndef __XEN_DRIVERS_COMMON_H__
+#define __XEN_DRIVERS_COMMON_H__
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/hdreg.h>
+#include <linux/blkdev.h>
+#include <linux/major.h>
+#include <asm-xen/xen-public/xen.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+#include <asm-xen/xen-public/io/blkif.h>
+
+#if 1
+#define IPRINTK(fmt, args...) \
+    printk(KERN_INFO "xen_blk: " fmt, ##args)
+#else
+#define IPRINTK(fmt, args...) ((void)0)
+#endif
+
+#if 1
+#define WPRINTK(fmt, args...) \
+    printk(KERN_WARNING "xen_blk: " fmt, ##args)
+#else
+#define WPRINTK(fmt, args...) ((void)0)
+#endif
+
+#if 0
+#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
+#else
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+#if 0
+#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
+#else
+#define DPRINTK_IOCTL(_f, _a...) ((void)0)
+#endif
+
+/* Private gendisk->flags[] values. */
+#define GENHD_FL_XEN        2 /* Is unit a Xen block device?  */
+#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */
+
+/*
+ * We have one of these per vbd, whether ide, scsi or 'other'.
+ * They hang in an array off the gendisk structure. We may end up putting
+ * all kinds of interesting stuff here :-)
+ */
+typedef struct xl_disk {
+    int usage;
+} xl_disk_t;
+
+extern int blkif_open(struct inode *inode, struct file *filep);
+extern int blkif_release(struct inode *inode, struct file *filep);
+extern int blkif_ioctl(struct inode *inode, struct file *filep,
+                                 unsigned command, unsigned long argument);
+extern int blkif_check(kdev_t dev);
+extern int blkif_revalidate(kdev_t dev);
+extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
+extern void do_blkif_request (request_queue_t *rq); 
+
+extern void xlvbd_update_vbds(void);
+
+static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev)
+{
+    struct gendisk *gd = get_gendisk(xldev);
+    
+    if ( gd == NULL ) 
+        return NULL;
+    
+    return (xl_disk_t *)gd->real_devices + 
+        (MINOR(xldev) >> gd->minor_shift);
+}
+
+
+/* Virtual block-device subsystem. */
+extern int  xlvbd_init(void);
+extern void xlvbd_cleanup(void); 
+
+#endif /* __XEN_DRIVERS_COMMON_H__ */
diff --git a/linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c b/linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c
new file mode 100644
index 0000000000..682906bf66
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c
@@ -0,0 +1,540 @@
+/******************************************************************************
+ * arch/xen/drivers/blkif/frontend/vbd.c
+ * 
+ * Xenolinux virtual block-device driver.
+ * 
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ */
+
+#include "common.h"
+#include <linux/blk.h>
+
+/*
+ * For convenience we distinguish between ide, scsi and 'other' (i.e.
+ * potentially combinations of the two) in the naming scheme and in a few 
+ * other places (like default readahead, etc).
+ */
+#define XLIDE_MAJOR_NAME  "hd"
+#define XLSCSI_MAJOR_NAME "sd"
+#define XLVBD_MAJOR_NAME "xvd"
+
+#define XLIDE_DEVS_PER_MAJOR   2
+#define XLSCSI_DEVS_PER_MAJOR 16
+#define XLVBD_DEVS_PER_MAJOR  16
+
+#define XLIDE_PARTN_SHIFT  6    /* amount to shift minor to get 'real' minor */
+#define XLIDE_MAX_PART    (1 << XLIDE_PARTN_SHIFT)     /* minors per ide vbd */
+
+#define XLSCSI_PARTN_SHIFT 4    /* amount to shift minor to get 'real' minor */
+#define XLSCSI_MAX_PART   (1 << XLSCSI_PARTN_SHIFT)   /* minors per scsi vbd */
+
+#define XLVBD_PARTN_SHIFT  4    /* amount to shift minor to get 'real' minor */
+#define XLVBD_MAX_PART    (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */
+
+/* The below are for the generic drivers/block/ll_rw_block.c code. */
+static int xlide_blksize_size[256];
+static int xlide_hardsect_size[256];
+static int xlide_max_sectors[256];
+static int xlscsi_blksize_size[256];
+static int xlscsi_hardsect_size[256];
+static int xlscsi_max_sectors[256];
+static int xlvbd_blksize_size[256];
+static int xlvbd_hardsect_size[256];
+static int xlvbd_max_sectors[256];
+
+/* Information about our VBDs. */
+#define MAX_VBDS 64
+static int nr_vbds;
+static vdisk_t *vbd_info;
+
+static struct block_device_operations xlvbd_block_fops = 
+{
+    open:               blkif_open,
+    release:            blkif_release,
+    ioctl:              blkif_ioctl,
+    check_media_change: blkif_check,
+    revalidate:         blkif_revalidate,
+};
+
+static int xlvbd_get_vbd_info(vdisk_t *disk_info)
+{
+    vdisk_t         *buf = (vdisk_t *)__get_free_page(GFP_KERNEL);
+    blkif_request_t  req;
+    blkif_response_t rsp;
+    int              nr;
+
+    memset(&req, 0, sizeof(req));
+    req.operation   = BLKIF_OP_PROBE;
+    req.nr_segments = 1;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+    blkif_control_probe_send(&req, &rsp,
+                             (unsigned long)(virt_to_machine(buf)));
+#else
+    req.frame_and_sects[0] = virt_to_machine(buf) | 7;
+
+    blkif_control_send(&req, &rsp);
+#endif
+
+    if ( rsp.status <= 0 )
+    {
+        printk(KERN_ALERT "Could not probe disks (%d)\n", rsp.status);
+        return -1;
+    }
+
+    if ( (nr = rsp.status) > MAX_VBDS )
+         nr = MAX_VBDS;
+    memcpy(disk_info, buf, nr * sizeof(vdisk_t));
+
+    return nr;
+}
+
+/*
+ * xlvbd_init_device - initialise a VBD device
+ * @disk:              a vdisk_t describing the VBD
+ *
+ * Takes a vdisk_t * that describes a VBD the domain has access to.
+ * Performs appropriate initialisation and registration of the device.
+ *
+ * Care needs to be taken when making re-entrant calls to ensure that
+ * corruption does not occur.  Also, devices that are in use should not have
+ * their details updated.  This is the caller's responsibility.
+ */
+static int xlvbd_init_device(vdisk_t *xd)
+{
+    int device = xd->device;
+    int major  = MAJOR(device); 
+    int minor  = MINOR(device);
+    int is_ide = IDE_DISK_MAJOR(major);  /* is this an ide device? */
+    int is_scsi= SCSI_BLK_MAJOR(major);  /* is this a scsi device? */
+    char *major_name;
+    struct gendisk *gd;
+    struct block_device *bd;
+    xl_disk_t *disk;
+    int i, rc = 0, max_part, partno;
+    unsigned long capacity;
+
+    unsigned char buf[64];
+
+    if ( (bd = bdget(device)) == NULL )
+        return -1;
+
+    if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) )
+    {
+        printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device);
+        rc = -1;
+        goto out;
+    }
+
+    if ( is_ide ) {
+
+	major_name = XLIDE_MAJOR_NAME; 
+	max_part   = XLIDE_MAX_PART;
+
+    } else if ( is_scsi ) {
+
+	major_name = XLSCSI_MAJOR_NAME;
+	max_part   = XLSCSI_MAX_PART;
+
+    } else { 
+
+        /* SMH: hmm - probably a CCISS driver or sim; assume CCISS for now */
+	printk(KERN_ALERT "Assuming device %02x:%02x is CCISS/SCSI\n", 
+	       major, minor);
+	is_scsi    = 1; 
+	major_name = "cciss"; 
+	max_part   = XLSCSI_MAX_PART;
+
+    }
+    
+    partno = minor & (max_part - 1); 
+    
+    if ( (gd = get_gendisk(device)) == NULL )
+    {
+        rc = register_blkdev(major, major_name, &xlvbd_block_fops);
+        if ( rc < 0 )
+        {
+            printk(KERN_ALERT "XL VBD: can't get major %d\n", major);
+            goto out;
+        }
+
+        if ( is_ide )
+        { 
+            blksize_size[major]  = xlide_blksize_size;
+            hardsect_size[major] = xlide_hardsect_size;
+            max_sectors[major]   = xlide_max_sectors;
+            read_ahead[major]    = 8;
+        } 
+        else if ( is_scsi )
+        { 
+            blksize_size[major]  = xlscsi_blksize_size;
+            hardsect_size[major] = xlscsi_hardsect_size;
+            max_sectors[major]   = xlscsi_max_sectors;
+            read_ahead[major]    = 8;
+        }
+        else
+        { 
+            blksize_size[major]  = xlvbd_blksize_size;
+            hardsect_size[major] = xlvbd_hardsect_size;
+            max_sectors[major]   = xlvbd_max_sectors;
+            read_ahead[major]    = 8;
+        }
+
+        blk_init_queue(BLK_DEFAULT_QUEUE(major), do_blkif_request);
+
+        /*
+         * Turn off barking 'headactive' mode. We dequeue buffer heads as
+         * soon as we pass them to the back-end driver.
+         */
+        blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0);
+
+        /* Construct an appropriate gendisk structure. */
+        gd             = kmalloc(sizeof(struct gendisk), GFP_KERNEL);
+        gd->major      = major;
+        gd->major_name = major_name; 
+    
+        gd->max_p      = max_part; 
+        if ( is_ide )
+        { 
+            gd->minor_shift  = XLIDE_PARTN_SHIFT; 
+            gd->nr_real      = XLIDE_DEVS_PER_MAJOR; 
+        } 
+        else if ( is_scsi )
+        { 
+            gd->minor_shift  = XLSCSI_PARTN_SHIFT; 
+            gd->nr_real      = XLSCSI_DEVS_PER_MAJOR; 
+        }
+        else
+        { 
+            gd->minor_shift  = XLVBD_PARTN_SHIFT; 
+            gd->nr_real      = XLVBD_DEVS_PER_MAJOR; 
+        }
+
+        /* 
+        ** The sizes[] and part[] arrays hold the sizes and other 
+        ** information about every partition with this 'major' (i.e. 
+        ** every disk sharing the 8 bit prefix * max partns per disk) 
+        */
+        gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL);
+        gd->part  = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct), 
+                            GFP_KERNEL);
+        memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int));
+        memset(gd->part,  0, max_part * gd->nr_real 
+               * sizeof(struct hd_struct));
+
+
+        gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t), 
+                                   GFP_KERNEL);
+        memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t));
+
+        gd->next   = NULL;            
+        gd->fops   = &xlvbd_block_fops;
+
+        gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr), 
+                             GFP_KERNEL);
+        gd->flags  = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL);
+    
+        memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr));
+        memset(gd->flags, 0, gd->nr_real *  sizeof(*gd->flags));
+
+        add_gendisk(gd);
+
+        blk_size[major] = gd->sizes;
+    }
+
+    if ( xd->info & VDISK_READONLY )
+        set_device_ro(device, 1);
+
+    gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XEN;
+
+    /* NB. Linux 2.4 only handles 32-bit sector offsets and capacities. */
+    capacity = (unsigned long)xd->capacity;
+
+    if ( partno != 0 )
+    {
+        /*
+         * If this was previously set up as a real disc we will have set 
+         * up partition-table information. Virtual partitions override 
+         * 'real' partitions, and the two cannot coexist on a device.
+         */
+        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
+             (gd->sizes[minor & ~(max_part-1)] != 0) )
+        {
+            /*
+             * Any non-zero sub-partition entries must be cleaned out before
+             * installing 'virtual' partition entries. The two types cannot
+             * coexist, and virtual partitions are favoured.
+             */
+            kdev_t dev = device & ~(max_part-1);
+            for ( i = max_part - 1; i > 0; i-- )
+            {
+                invalidate_device(dev+i, 1);
+                gd->part[MINOR(dev+i)].start_sect = 0;
+                gd->part[MINOR(dev+i)].nr_sects   = 0;
+                gd->sizes[MINOR(dev+i)]           = 0;
+            }
+            printk(KERN_ALERT
+                   "Virtual partitions found for /dev/%s - ignoring any "
+                   "real partition information we may have found.\n",
+                   disk_name(gd, MINOR(device), buf));
+        }
+
+        /* Need to skankily setup 'partition' information */
+        gd->part[minor].start_sect = 0; 
+        gd->part[minor].nr_sects   = capacity; 
+        gd->sizes[minor]           = capacity >>(BLOCK_SIZE_BITS-9); 
+
+        gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
+    }
+    else
+    {
+        gd->part[minor].nr_sects = capacity;
+        gd->sizes[minor] = capacity>>(BLOCK_SIZE_BITS-9);
+        
+        /* Some final fix-ups depending on the device type */
+        if ( xd->info & VDISK_REMOVABLE )
+        { 
+            gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE; 
+            printk(KERN_ALERT 
+                   "Skipping partition check on %s /dev/%s\n", 
+                   (xd->info & VDISK_CDROM) ? "cdrom" : "removable",
+                   disk_name(gd, MINOR(device), buf)); 
+        }
+        else
+        {
+            /* Only check partitions on real discs (not virtual!). */
+            if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
+            {
+                printk(KERN_ALERT
+                       "Skipping partition check on virtual /dev/%s\n",
+                       disk_name(gd, MINOR(device), buf));
+                break;
+            }
+            register_disk(gd, device, gd->max_p, &xlvbd_block_fops, capacity);
+        }
+    }
+
+ out:
+    bdput(bd);    
+    return rc;
+}
+
+
+/*
+ * xlvbd_remove_device - remove a device node if possible
+ * @device:       numeric device ID
+ *
+ * Updates the gendisk structure and invalidates devices.
+ *
+ * This is OK for now but in future, should perhaps consider where this should
+ * deallocate gendisks / unregister devices.
+ */
+static int xlvbd_remove_device(int device)
+{
+    int i, rc = 0, minor = MINOR(device);
+    struct gendisk *gd;
+    struct block_device *bd;
+    xl_disk_t *disk = NULL;
+
+    if ( (bd = bdget(device)) == NULL )
+        return -1;
+
+    if ( ((gd = get_gendisk(device)) == NULL) ||
+         ((disk = xldev_to_xldisk(device)) == NULL) )
+        BUG();
+
+    if ( disk->usage != 0 )
+    {
+        printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device);
+        rc = -1;
+        goto out;
+    }
+ 
+    if ( (minor & (gd->max_p-1)) != 0 )
+    {
+        /* 1: The VBD is mapped to a partition rather than a whole unit. */
+        invalidate_device(device, 1);
+	gd->part[minor].start_sect = 0;
+        gd->part[minor].nr_sects   = 0;
+        gd->sizes[minor]           = 0;
+
+        /* Clear the consists-of-virtual-partitions flag if possible. */
+        gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS;
+        for ( i = 1; i < gd->max_p; i++ )
+            if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 )
+                gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
+
+        /*
+         * If all virtual partitions are now gone, and a 'whole unit' VBD is
+         * present, then we can try to grok the unit's real partition table.
+         */
+        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
+             (gd->sizes[minor & ~(gd->max_p-1)] != 0) &&
+             !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) )
+        {
+            register_disk(gd,
+                          device&~(gd->max_p-1), 
+                          gd->max_p, 
+                          &xlvbd_block_fops,
+                          gd->part[minor&~(gd->max_p-1)].nr_sects);
+        }
+    }
+    else
+    {
+        /*
+         * 2: The VBD is mapped to an entire 'unit'. Clear all partitions.
+         * NB. The partition entries are only cleared if there are no VBDs
+         * mapped to individual partitions on this unit.
+         */
+        i = gd->max_p - 1; /* Default: clear subpartitions as well. */
+        if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
+            i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */
+        while ( i >= 0 )
+        {
+            invalidate_device(device+i, 1);
+            gd->part[minor+i].start_sect = 0;
+            gd->part[minor+i].nr_sects   = 0;
+            gd->sizes[minor+i]           = 0;
+            i--;
+        }
+    }
+
+ out:
+    bdput(bd);
+    return rc;
+}
+
+/*
+ * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
+ * state. The VBDs need to be updated in this way when the domain is
+ * initialised and also each time we receive an XLBLK_UPDATE event.
+ */
+void xlvbd_update_vbds(void)
+{
+    int i, j, k, old_nr, new_nr;
+    vdisk_t *old_info, *new_info, *merged_info;
+
+    old_info = vbd_info;
+    old_nr   = nr_vbds;
+
+    new_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
+    if (!new_info)
+        return;
+
+    if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
+        goto out;
+
+    /*
+     * Final list maximum size is old list + new list. This occurs only when
+     * old list and new list do not overlap at all, and we cannot yet destroy
+     * VBDs in the old list because the usage counts are busy.
+     */
+    merged_info = kmalloc((old_nr + new_nr) * sizeof(vdisk_t), GFP_KERNEL);
+    if (!merged_info)
+        goto out;
+
+    /* @i tracks old list; @j tracks new list; @k tracks merged list. */
+    i = j = k = 0;
+
+    while ( (i < old_nr) && (j < new_nr) )
+    {
+        if ( old_info[i].device < new_info[j].device )
+        {
+            if ( xlvbd_remove_device(old_info[i].device) != 0 )
+                memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
+            i++;
+        }
+        else if ( old_info[i].device > new_info[j].device )
+        {
+            if ( xlvbd_init_device(&new_info[j]) == 0 )
+                memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
+            j++;
+        }
+        else
+        {
+            if ( ((old_info[i].capacity == new_info[j].capacity) &&
+                  (old_info[i].info == new_info[j].info)) ||
+                 (xlvbd_remove_device(old_info[i].device) != 0) )
+                memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
+            else if ( xlvbd_init_device(&new_info[j]) == 0 )
+                memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
+            i++; j++;
+        }
+    }
+
+    for ( ; i < old_nr; i++ )
+    {
+        if ( xlvbd_remove_device(old_info[i].device) != 0 )
+            memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
+    }
+
+    for ( ; j < new_nr; j++ )
+    {
+        if ( xlvbd_init_device(&new_info[j]) == 0 )
+            memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
+    }
+
+    vbd_info = merged_info;
+    nr_vbds  = k;
+
+    kfree(old_info);
+out:
+    kfree(new_info);
+}
+
+
+/*
+ * Set up all the linux device goop for the virtual block devices (vbd's) that
+ * we know about. Note that although from the backend driver's p.o.v. VBDs are
+ * addressed simply an opaque 16-bit device number, the domain creation tools 
+ * conventionally allocate these numbers to correspond to those used by 'real' 
+ * linux -- this is just for convenience as it means e.g. that the same 
+ * /etc/fstab can be used when booting with or without Xen.
+ */
+int xlvbd_init(void)
+{
+    int i;
+    
+    /*
+     * If compiled as a module, we don't support unloading yet. We therefore 
+     * permanently increment the reference count to disallow it.
+     */
+    SET_MODULE_OWNER(&xlvbd_block_fops);
+    MOD_INC_USE_COUNT;
+
+    /* Initialize the global arrays. */
+    for ( i = 0; i < 256; i++ ) 
+    {
+        xlide_blksize_size[i]  = 1024;
+        xlide_hardsect_size[i] = 512;
+        xlide_max_sectors[i]   = 512;
+
+        xlscsi_blksize_size[i]  = 1024;
+        xlscsi_hardsect_size[i] = 512;
+        xlscsi_max_sectors[i]   = 512;
+
+        xlvbd_blksize_size[i]  = 512;
+        xlvbd_hardsect_size[i] = 512;
+        xlvbd_max_sectors[i]   = 512;
+    }
+
+    vbd_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
+    if (!vbd_info)
+        return -ENOMEM;
+
+    nr_vbds  = xlvbd_get_vbd_info(vbd_info);
+
+    if ( nr_vbds < 0 )
+    {
+        kfree(vbd_info);
+        vbd_info = NULL;
+        nr_vbds  = 0;
+    }
+    else
+    {
+        for ( i = 0; i < nr_vbds; i++ )
+            xlvbd_init_device(&vbd_info[i]);
+    }
+
+    return 0;
+}
diff --git a/linux-2.4-xen-sparse/arch/xen/drivers/console/Makefile b/linux-2.4-xen-sparse/arch/xen/drivers/console/Makefile
new file mode 100644
index 0000000000..aaa546a8f3
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/drivers/console/Makefile
@@ -0,0 +1,3 @@
+O_TARGET := drv.o
+obj-$(CONFIG_XEN_CONSOLE) := console.o
+include $(TOPDIR)/Rules.make
diff --git a/linux-2.4-xen-sparse/arch/xen/drivers/dom0/Makefile b/linux-2.4-xen-sparse/arch/xen/drivers/dom0/Makefile
new file mode 100644
index 0000000000..a3fee726b3
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/drivers/dom0/Makefile
@@ -0,0 +1,3 @@
+O_TARGET := drv.o
+obj-y := core.o
+include $(TOPDIR)/Rules.make
diff --git a/linux-2.4-xen-sparse/arch/xen/drivers/evtchn/Makefile b/linux-2.4-xen-sparse/arch/xen/drivers/evtchn/Makefile
new file mode 100644
index 0000000000..61c983f625
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/drivers/evtchn/Makefile
@@ -0,0 +1,3 @@
+O_TARGET := drv.o
+obj-y := evtchn.o
+include $(TOPDIR)/Rules.make
diff --git a/linux-2.4-xen-sparse/arch/xen/drivers/netif/Makefile b/linux-2.4-xen-sparse/arch/xen/drivers/netif/Makefile
new file mode 100644
index 0000000000..7474e71fb4
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/drivers/netif/Makefile
@@ -0,0 +1,10 @@
+
+O_TARGET := drv.o
+
+subdir-$(CONFIG_XEN_NETDEV_FRONTEND) += frontend
+obj-$(CONFIG_XEN_NETDEV_FRONTEND)    += frontend/drv.o
+
+subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend
+obj-$(CONFIG_XEN_PHYSDEV_ACCESS)    += backend/drv.o
+
+include $(TOPDIR)/Rules.make
diff --git a/linux-2.4-xen-sparse/arch/xen/drivers/netif/backend/Makefile b/linux-2.4-xen-sparse/arch/xen/drivers/netif/backend/Makefile
new file mode 100644
index 0000000000..ba04eb449e
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/drivers/netif/backend/Makefile
@@ -0,0 +1,4 @@
+O_TARGET := drv.o
+export-objs := interface.o
+obj-y := main.o control.o interface.o
+include $(TOPDIR)/Rules.make
diff --git a/linux-2.4-xen-sparse/arch/xen/drivers/netif/frontend/Makefile b/linux-2.4-xen-sparse/arch/xen/drivers/netif/frontend/Makefile
new file mode 100644
index 0000000000..032d02d7cc
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/drivers/netif/frontend/Makefile
@@ -0,0 +1,3 @@
+O_TARGET := drv.o
+obj-y := main.o
+include $(TOPDIR)/Rules.make
diff --git a/linux-2.4-xen-sparse/arch/xen/kernel/Makefile b/linux-2.4-xen-sparse/arch/xen/kernel/Makefile
new file mode 100644
index 0000000000..3eb0701958
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/kernel/Makefile
@@ -0,0 +1,20 @@
+
+.S.o:
+	$(CC) $(AFLAGS) -traditional -c $< -o $*.o
+
+all: kernel.o head.o init_task.o
+
+O_TARGET := kernel.o
+
+export-objs     := i386_ksyms.o gnttab.o skbuff.o ctrl_if.o
+
+obj-y	:= process.o semaphore.o signal.o entry.o traps.o irq.o  \
+		ptrace.o ioport.o ldt.o setup.o time.o sys_i386.o \
+		i386_ksyms.o i387.o evtchn.o ctrl_if.o pci-dma.o \
+		reboot.o fixup.o gnttab.o skbuff.o
+
+ifdef CONFIG_PCI
+obj-y	+= pci-i386.o pci-pc.o
+endif
+
+include $(TOPDIR)/Rules.make
diff --git a/linux-2.4-xen-sparse/arch/xen/kernel/entry.S b/linux-2.4-xen-sparse/arch/xen/kernel/entry.S
new file mode 100644
index 0000000000..750d2b63b4
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/kernel/entry.S
@@ -0,0 +1,779 @@
+/*
+ *  linux/arch/i386/entry.S
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ * entry.S contains the system-call and fault low-level handling routines.
+ * This also contains the timer-interrupt handler, as well as all interrupts
+ * and faults that can result in a task-switch.
+ *
+ * NOTE: This code handles signal-recognition, which happens every time
+ * after a timer-interrupt and after each system call.
+ *
+ * I changed all the .align's to 4 (16 byte alignment), as that's faster
+ * on a 486.
+ *
+ * Stack layout in 'ret_to_user':
+ * 	ptrace needs to have all regs on the stack.
+ *	if the order here is changed, it needs to be
+ *	updated in fork.c:copy_process, signal.c:do_signal,
+ *	ptrace.c and ptrace.h
+ *
+ *	 0(%esp) - %ebx
+ *	 4(%esp) - %ecx
+ *	 8(%esp) - %edx
+ *       C(%esp) - %esi
+ *	10(%esp) - %edi
+ *	14(%esp) - %ebp
+ *	18(%esp) - %eax
+ *	1C(%esp) - %ds
+ *	20(%esp) - %es
+ *	24(%esp) - orig_eax
+ *	28(%esp) - %eip
+ *	2C(%esp) - %cs
+ *	30(%esp) - %eflags
+ *	34(%esp) - %oldesp
+ *	38(%esp) - %oldss
+ *
+ * "current" is in register %ebx during any slow entries.
+ */
+
+#include <linux/config.h>
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/smp.h>
+
+EBX		= 0x00
+ECX		= 0x04
+EDX		= 0x08
+ESI		= 0x0C
+EDI		= 0x10
+EBP		= 0x14
+EAX		= 0x18
+DS		= 0x1C
+ES		= 0x20
+ORIG_EAX	= 0x24
+EIP		= 0x28
+CS		= 0x2C
+EFLAGS		= 0x30
+OLDESP		= 0x34
+OLDSS		= 0x38
+
+CF_MASK		= 0x00000001
+TF_MASK		= 0x00000100
+IF_MASK		= 0x00000200
+DF_MASK		= 0x00000400
+NT_MASK		= 0x00004000
+
+/* Offsets into task_struct. */
+state		=  0
+flags		=  4
+sigpending	=  8
+addr_limit	= 12
+exec_domain	= 16
+need_resched	= 20
+tsk_ptrace	= 24
+processor	= 52
+
+/* Offsets into shared_info_t. */
+#define evtchn_upcall_pending /* 0 */
+#define evtchn_upcall_mask       1
+
+ENOSYS = 38
+
+
+#define SAVE_ALL \
+	cld; \
+	pushl %es; \
+	pushl %ds; \
+	pushl %eax; \
+	pushl %ebp; \
+	pushl %edi; \
+	pushl %esi; \
+	pushl %edx; \
+	pushl %ecx; \
+	pushl %ebx; \
+	movl $(__KERNEL_DS),%edx; \
+	movl %edx,%ds; \
+	movl %edx,%es;
+
+#define RESTORE_ALL	\
+	popl %ebx;	\
+	popl %ecx;	\
+	popl %edx;	\
+	popl %esi;	\
+	popl %edi;	\
+	popl %ebp;	\
+	popl %eax;	\
+1:	popl %ds;	\
+2:	popl %es;	\
+	addl $4,%esp;	\
+3:	iret;		\
+.section .fixup,"ax";	\
+4:	movl $0,(%esp);	\
+	jmp 1b;		\
+5:	movl $0,(%esp);	\
+	jmp 2b;		\
+6:	pushl %ss;	\
+	popl %ds;	\
+	pushl %ss;	\
+	popl %es;	\
+	pushl $11;	\
+	call do_exit;	\
+.previous;		\
+.section __ex_table,"a";\
+	.align 4;	\
+	.long 1b,4b;	\
+	.long 2b,5b;	\
+	.long 3b,6b;	\
+.previous
+
+#define GET_CURRENT(reg) \
+	movl $-8192, reg; \
+	andl %esp, reg
+
+ENTRY(lcall7)
+	pushfl			# We get a different stack layout with call
+	pushl %eax		# gates, which has to be cleaned up later..
+	SAVE_ALL
+	movl EIP(%esp),%eax	# due to call gates, this is eflags, not eip..
+	movl CS(%esp),%edx	# this is eip..
+	movl EFLAGS(%esp),%ecx	# and this is cs..
+	movl %eax,EFLAGS(%esp)	#
+	andl $~(NT_MASK|TF_MASK|DF_MASK), %eax
+	pushl %eax
+	popfl
+	movl %edx,EIP(%esp)	# Now we move them to their "normal" places
+	movl %ecx,CS(%esp)	#
+	movl %esp,%ebx
+	pushl %ebx
+	andl $-8192,%ebx	# GET_CURRENT
+	movl exec_domain(%ebx),%edx	# Get the execution domain
+	movl 4(%edx),%edx	# Get the lcall7 handler for the domain
+	pushl $0x7
+	call *%edx
+	addl $4, %esp
+	popl %eax
+	jmp ret_to_user
+
+ENTRY(lcall27)
+	pushfl			# We get a different stack layout with call
+	pushl %eax		# gates, which has to be cleaned up later..
+	SAVE_ALL
+	movl EIP(%esp),%eax	# due to call gates, this is eflags, not eip..
+	movl CS(%esp),%edx	# this is eip..
+	movl EFLAGS(%esp),%ecx	# and this is cs..
+	movl %eax,EFLAGS(%esp)	#
+	andl $~(NT_MASK|TF_MASK|DF_MASK), %eax
+	pushl %eax
+	popfl
+	movl %edx,EIP(%esp)	# Now we move them to their "normal" places
+	movl %ecx,CS(%esp)	#
+	movl %esp,%ebx
+	pushl %ebx
+	andl $-8192,%ebx	# GET_CURRENT
+	movl exec_domain(%ebx),%edx	# Get the execution domain
+	movl 4(%edx),%edx	# Get the lcall7 handler for the domain
+	pushl $0x27
+	call *%edx
+	addl $4, %esp
+	popl %eax
+	jmp ret_to_user
+
+ENTRY(ret_from_fork)
+	pushl %ebx
+	call SYMBOL_NAME(schedule_tail)
+	addl $4, %esp
+	GET_CURRENT(%ebx)
+	testb $0x02,tsk_ptrace(%ebx)	# PT_TRACESYS
+	jne tracesys_exit
+	jmp ret_to_user
+
+/*
+ * Return to user mode is not as complex as all this looks,
+ * but we want the default path for a system call return to
+ * go as quickly as possible which is why some of this is
+ * less clear than it otherwise should be.
+ */
+ENTRY(system_call)
+	pushl %eax			# save orig_eax
+	SAVE_ALL
+	GET_CURRENT(%ebx)
+	testb $0x02,tsk_ptrace(%ebx)	# PT_TRACESYS
+	jne tracesys
+	cmpl $(NR_syscalls),%eax
+	jae badsys
+	call *SYMBOL_NAME(sys_call_table)(,%eax,4)
+	movl %eax,EAX(%esp)		# save the return value
+ret_to_user:
+        movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi
+        movb $1,evtchn_upcall_mask(%esi) # make tests atomic
+ret_to_user_nocli:
+	cmpl $0,need_resched(%ebx)
+	jne  reschedule
+	cmpl $0,sigpending(%ebx)
+	je   safesti                    # ensure need_resched updates are seen
+/*signal_return:*/
+	movb $0,evtchn_upcall_mask(%esi) # reenable event callbacks
+	movl %esp,%eax
+	xorl %edx,%edx
+	call SYMBOL_NAME(do_signal)
+	jmp  safesti
+
+	ALIGN
+restore_all:
+	RESTORE_ALL
+
+	ALIGN
+tracesys:
+	movl $-ENOSYS,EAX(%esp)
+	call SYMBOL_NAME(syscall_trace)
+	movl ORIG_EAX(%esp),%eax
+	cmpl $(NR_syscalls),%eax
+	jae tracesys_exit
+	call *SYMBOL_NAME(sys_call_table)(,%eax,4)
+	movl %eax,EAX(%esp)		# save the return value
+tracesys_exit:
+	call SYMBOL_NAME(syscall_trace)
+	jmp ret_to_user
+badsys:
+	movl $-ENOSYS,EAX(%esp)
+	jmp ret_to_user
+
+	ALIGN
+ENTRY(ret_from_intr)
+	GET_CURRENT(%ebx)
+ret_from_exception:
+	movb CS(%esp),%al
+	testl $2,%eax
+	jne ret_to_user
+	jmp restore_all
+
+	ALIGN
+reschedule:
+        movb $0,evtchn_upcall_mask(%esi)  # reenable event callbacks
+	call SYMBOL_NAME(schedule)        # test
+	jmp  ret_to_user
+
+ENTRY(divide_error)
+	pushl $0		# no error code
+	pushl $ SYMBOL_NAME(do_divide_error)
+	ALIGN
+error_code:
+	pushl %ds
+	pushl %eax
+	xorl %eax,%eax
+	pushl %ebp
+	pushl %edi
+	pushl %esi
+	pushl %edx
+	decl %eax			# eax = -1
+	pushl %ecx
+	pushl %ebx
+	GET_CURRENT(%ebx)
+	cld
+	movl %es,%ecx
+	movl ORIG_EAX(%esp), %esi	# get the error code
+	movl ES(%esp), %edi		# get the function address
+	movl %eax, ORIG_EAX(%esp)
+	movl %ecx, ES(%esp)
+	movl %esp,%edx
+	pushl %esi			# push the error code
+	pushl %edx			# push the pt_regs pointer
+	movl $(__KERNEL_DS),%edx
+	movl %edx,%ds
+	movl %edx,%es
+	call *%edi
+	addl $8,%esp
+	jmp ret_from_exception
+
+# A note on the "critical region" in our callback handler.
+# We want to avoid stacking callback handlers due to events occurring
+# during handling of the last event. To do this, we keep events disabled
+# until we've done all processing. HOWEVER, we must enable events before
+# popping the stack frame (can't be done atomically) and so it would still
+# be possible to get enough handler activations to overflow the stack.
+# Although unlikely, bugs of that kind are hard to track down, so we'd
+# like to avoid the possibility.
+# So, on entry to the handler we detect whether we interrupted an
+# existing activation in its critical region -- if so, we pop the current
+# activation and restart the handler using the previous one.
+ENTRY(hypervisor_callback)
+        pushl %eax
+        SAVE_ALL
+        GET_CURRENT(%ebx)
+        movl EIP(%esp),%eax
+        cmpl $scrit,%eax
+        jb   11f
+        cmpl $ecrit,%eax
+        jb   critical_region_fixup
+11:     push %esp
+        call evtchn_do_upcall
+        add  $4,%esp
+        movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi
+        movb CS(%esp),%cl
+	test $2,%cl          # slow return to ring 2 or 3
+	jne  ret_to_user_nocli
+safesti:movb $0,evtchn_upcall_mask(%esi) # reenable event callbacks
+scrit:  /**** START OF CRITICAL REGION ****/
+        testb $0xFF,evtchn_upcall_pending(%esi)
+        jnz  14f              # process more events if necessary...
+        RESTORE_ALL
+14:     movb $1,evtchn_upcall_mask(%esi)
+        jmp  11b
+ecrit:  /**** END OF CRITICAL REGION ****/
+# [How we do the fixup]. We want to merge the current stack frame with the
+# just-interrupted frame. How we do this depends on where in the critical
+# region the interrupted handler was executing, and so how many saved
+# registers are in each frame. We do this quickly using the lookup table
+# 'critical_fixup_table'. For each byte offset in the critical region, it
+# provides the number of bytes which have already been popped from the
+# interrupted stack frame. 
+critical_region_fixup:
+        addl $critical_fixup_table-scrit,%eax
+        movzbl (%eax),%eax    # %eax contains num bytes popped
+        mov  %esp,%esi
+        add  %eax,%esi        # %esi points at end of src region
+        mov  %esp,%edi
+        add  $0x34,%edi       # %edi points at end of dst region
+        mov  %eax,%ecx
+        shr  $2,%ecx          # convert words to bytes
+        je   16f              # skip loop if nothing to copy
+15:     subl $4,%esi          # pre-decrementing copy loop
+        subl $4,%edi
+        movl (%esi),%eax
+        movl %eax,(%edi)
+        loop 15b
+16:     movl %edi,%esp        # final %edi is top of merged stack
+        jmp  11b
+        
+critical_fixup_table:        
+        .byte 0x00,0x00,0x00                  # testb $0xFF,(%esi)
+        .byte 0x00,0x00                       # jnz  14f
+        .byte 0x00                            # pop  %ebx
+        .byte 0x04                            # pop  %ecx
+        .byte 0x08                            # pop  %edx
+        .byte 0x0c                            # pop  %esi
+        .byte 0x10                            # pop  %edi
+        .byte 0x14                            # pop  %ebp
+        .byte 0x18                            # pop  %eax
+        .byte 0x1c                            # pop  %ds
+        .byte 0x20                            # pop  %es
+        .byte 0x24,0x24,0x24                  # add  $4,%esp
+        .byte 0x28                            # iret
+        .byte 0x00,0x00,0x00,0x00             # movb $1,4(%esi)
+        .byte 0x00,0x00                       # jmp  11b
+
+# Hypervisor uses this for application faults while it executes.
+ENTRY(failsafe_callback)
+1:      popl %ds
+2:      popl %es
+3:      popl %fs
+4:      popl %gs
+5:      iret
+.section .fixup,"ax";	\
+6:	movl $0,(%esp);	\
+	jmp 1b;		\
+7:	movl $0,(%esp);	\
+	jmp 2b;		\
+8:	movl $0,(%esp);	\
+	jmp 3b;		\
+9:	movl $0,(%esp);	\
+	jmp 4b;		\
+10:	pushl %ss;	\
+	popl %ds;	\
+	pushl %ss;	\
+	popl %es;	\
+	pushl $11;	\
+	call do_exit;	\
+.previous;		\
+.section __ex_table,"a";\
+	.align 4;	\
+	.long 1b,6b;	\
+	.long 2b,7b;	\
+	.long 3b,8b;	\
+	.long 4b,9b;	\
+	.long 5b,10b;	\
+.previous
+        
+ENTRY(coprocessor_error)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_coprocessor_error)
+	jmp error_code
+
+ENTRY(simd_coprocessor_error)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_simd_coprocessor_error)
+	jmp error_code
+
+ENTRY(device_not_available)
+	pushl $-1		# mark this as an int
+	SAVE_ALL
+	GET_CURRENT(%ebx)
+	call SYMBOL_NAME(math_state_restore)
+	jmp ret_from_exception
+
+ENTRY(debug)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_debug)
+	jmp error_code
+
+ENTRY(int3)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_int3)
+	jmp error_code
+
+ENTRY(overflow)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_overflow)
+	jmp error_code
+
+ENTRY(bounds)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_bounds)
+	jmp error_code
+
+ENTRY(invalid_op)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_invalid_op)
+	jmp error_code
+
+ENTRY(coprocessor_segment_overrun)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun)
+	jmp error_code
+
+ENTRY(double_fault)
+	pushl $ SYMBOL_NAME(do_double_fault)
+	jmp error_code
+
+ENTRY(invalid_TSS)
+	pushl $ SYMBOL_NAME(do_invalid_TSS)
+	jmp error_code
+
+ENTRY(segment_not_present)
+	pushl $ SYMBOL_NAME(do_segment_not_present)
+	jmp error_code
+
+ENTRY(stack_segment)
+	pushl $ SYMBOL_NAME(do_stack_segment)
+	jmp error_code
+
+ENTRY(general_protection)
+	pushl $ SYMBOL_NAME(do_general_protection)
+	jmp error_code
+
+ENTRY(alignment_check)
+	pushl $ SYMBOL_NAME(do_alignment_check)
+	jmp error_code
+
+# This handler is special, because it gets an extra value on its stack,
+# which is the linear faulting address.
+#define PAGE_FAULT_STUB(_name1, _name2)                                  \
+ENTRY(_name1)                                                            \
+	pushl %ds                                                      ; \
+	pushl %eax                                                     ; \
+	xorl %eax,%eax                                                 ; \
+	pushl %ebp                                                     ; \
+	pushl %edi                                                     ; \
+	pushl %esi                                                     ; \
+	pushl %edx                                                     ; \
+	decl %eax                      /* eax = -1 */                  ; \
+	pushl %ecx                                                     ; \
+	pushl %ebx                                                     ; \
+	GET_CURRENT(%ebx)                                              ; \
+	cld                                                            ; \
+	movl %es,%ecx                                                  ; \
+	movl ORIG_EAX(%esp), %esi      /* get the error code */        ; \
+	movl ES(%esp), %edi            /* get the faulting address */  ; \
+	movl %eax, ORIG_EAX(%esp)                                      ; \
+	movl %ecx, ES(%esp)                                            ; \
+	movl %esp,%edx                                                 ; \
+        pushl %edi                     /* push the faulting address */ ; \
+	pushl %esi                     /* push the error code */       ; \
+	pushl %edx                     /* push the pt_regs pointer */  ; \
+	movl $(__KERNEL_DS),%edx                                       ; \
+	movl %edx,%ds                                                  ; \
+	movl %edx,%es                                                  ; \
+	call SYMBOL_NAME(_name2)                                       ; \
+	addl $12,%esp                                                  ; \
+	jmp ret_from_exception                                         ;
+PAGE_FAULT_STUB(page_fault, do_page_fault)
+
+ENTRY(machine_check)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_machine_check)
+	jmp error_code
+
+ENTRY(fixup_4gb_segment)
+	pushl $ SYMBOL_NAME(do_fixup_4gb_segment)
+	jmp error_code
+
+.data
+ENTRY(sys_call_table)
+	.long SYMBOL_NAME(sys_ni_syscall)	/* 0  -  old "setup()" system call*/
+	.long SYMBOL_NAME(sys_exit)
+	.long SYMBOL_NAME(sys_fork)
+	.long SYMBOL_NAME(sys_read)
+	.long SYMBOL_NAME(sys_write)
+	.long SYMBOL_NAME(sys_open)		/* 5 */
+	.long SYMBOL_NAME(sys_close)
+	.long SYMBOL_NAME(sys_waitpid)
+	.long SYMBOL_NAME(sys_creat)
+	.long SYMBOL_NAME(sys_link)
+	.long SYMBOL_NAME(sys_unlink)		/* 10 */
+	.long SYMBOL_NAME(sys_execve)
+	.long SYMBOL_NAME(sys_chdir)
+	.long SYMBOL_NAME(sys_time)
+	.long SYMBOL_NAME(sys_mknod)
+	.long SYMBOL_NAME(sys_chmod)		/* 15 */
+	.long SYMBOL_NAME(sys_lchown16)
+	.long SYMBOL_NAME(sys_ni_syscall)				/* old break syscall holder */
+	.long SYMBOL_NAME(sys_stat)
+	.long SYMBOL_NAME(sys_lseek)
+	.long SYMBOL_NAME(sys_getpid)		/* 20 */
+	.long SYMBOL_NAME(sys_mount)
+	.long SYMBOL_NAME(sys_oldumount)
+	.long SYMBOL_NAME(sys_setuid16)
+	.long SYMBOL_NAME(sys_getuid16)
+	.long SYMBOL_NAME(sys_stime)		/* 25 */
+	.long SYMBOL_NAME(sys_ptrace)
+	.long SYMBOL_NAME(sys_alarm)
+	.long SYMBOL_NAME(sys_fstat)
+	.long SYMBOL_NAME(sys_pause)
+	.long SYMBOL_NAME(sys_utime)		/* 30 */
+	.long SYMBOL_NAME(sys_ni_syscall)				/* old stty syscall holder */
+	.long SYMBOL_NAME(sys_ni_syscall)				/* old gtty syscall holder */
+	.long SYMBOL_NAME(sys_access)
+	.long SYMBOL_NAME(sys_nice)
+	.long SYMBOL_NAME(sys_ni_syscall)	/* 35 */		/* old ftime syscall holder */
+	.long SYMBOL_NAME(sys_sync)
+	.long SYMBOL_NAME(sys_kill)
+	.long SYMBOL_NAME(sys_rename)
+	.long SYMBOL_NAME(sys_mkdir)
+	.long SYMBOL_NAME(sys_rmdir)		/* 40 */
+	.long SYMBOL_NAME(sys_dup)
+	.long SYMBOL_NAME(sys_pipe)
+	.long SYMBOL_NAME(sys_times)
+	.long SYMBOL_NAME(sys_ni_syscall)				/* old prof syscall holder */
+	.long SYMBOL_NAME(sys_brk)		/* 45 */
+	.long SYMBOL_NAME(sys_setgid16)
+	.long SYMBOL_NAME(sys_getgid16)
+	.long SYMBOL_NAME(sys_signal)
+	.long SYMBOL_NAME(sys_geteuid16)
+	.long SYMBOL_NAME(sys_getegid16)	/* 50 */
+	.long SYMBOL_NAME(sys_acct)
+	.long SYMBOL_NAME(sys_umount)					/* recycled never used phys() */
+	.long SYMBOL_NAME(sys_ni_syscall)				/* old lock syscall holder */
+	.long SYMBOL_NAME(sys_ioctl)
+	.long SYMBOL_NAME(sys_fcntl)		/* 55 */
+	.long SYMBOL_NAME(sys_ni_syscall)				/* old mpx syscall holder */
+	.long SYMBOL_NAME(sys_setpgid)
+	.long SYMBOL_NAME(sys_ni_syscall)				/* old ulimit syscall holder */
+	.long SYMBOL_NAME(sys_olduname)
+	.long SYMBOL_NAME(sys_umask)		/* 60 */
+	.long SYMBOL_NAME(sys_chroot)
+	.long SYMBOL_NAME(sys_ustat)
+	.long SYMBOL_NAME(sys_dup2)
+	.long SYMBOL_NAME(sys_getppid)
+	.long SYMBOL_NAME(sys_getpgrp)		/* 65 */
+	.long SYMBOL_NAME(sys_setsid)
+	.long SYMBOL_NAME(sys_sigaction)
+	.long SYMBOL_NAME(sys_sgetmask)
+	.long SYMBOL_NAME(sys_ssetmask)
+	.long SYMBOL_NAME(sys_setreuid16)	/* 70 */
+	.long SYMBOL_NAME(sys_setregid16)
+	.long SYMBOL_NAME(sys_sigsuspend)
+	.long SYMBOL_NAME(sys_sigpending)
+	.long SYMBOL_NAME(sys_sethostname)
+	.long SYMBOL_NAME(sys_setrlimit)	/* 75 */
+	.long SYMBOL_NAME(sys_old_getrlimit)
+	.long SYMBOL_NAME(sys_getrusage)
+	.long SYMBOL_NAME(sys_gettimeofday)
+	.long SYMBOL_NAME(sys_settimeofday)
+	.long SYMBOL_NAME(sys_getgroups16)	/* 80 */
+	.long SYMBOL_NAME(sys_setgroups16)
+	.long SYMBOL_NAME(old_select)
+	.long SYMBOL_NAME(sys_symlink)
+	.long SYMBOL_NAME(sys_lstat)
+	.long SYMBOL_NAME(sys_readlink)		/* 85 */
+	.long SYMBOL_NAME(sys_uselib)
+	.long SYMBOL_NAME(sys_swapon)
+	.long SYMBOL_NAME(sys_reboot)
+	.long SYMBOL_NAME(old_readdir)
+	.long SYMBOL_NAME(old_mmap)		/* 90 */
+	.long SYMBOL_NAME(sys_munmap)
+	.long SYMBOL_NAME(sys_truncate)
+	.long SYMBOL_NAME(sys_ftruncate)
+	.long SYMBOL_NAME(sys_fchmod)
+	.long SYMBOL_NAME(sys_fchown16)		/* 95 */
+	.long SYMBOL_NAME(sys_getpriority)
+	.long SYMBOL_NAME(sys_setpriority)
+	.long SYMBOL_NAME(sys_ni_syscall)				/* old profil syscall holder */
+	.long SYMBOL_NAME(sys_statfs)
+	.long SYMBOL_NAME(sys_fstatfs)		/* 100 */
+	.long SYMBOL_NAME(sys_ioperm)
+	.long SYMBOL_NAME(sys_socketcall)
+	.long SYMBOL_NAME(sys_syslog)
+	.long SYMBOL_NAME(sys_setitimer)
+	.long SYMBOL_NAME(sys_getitimer)	/* 105 */
+	.long SYMBOL_NAME(sys_newstat)
+	.long SYMBOL_NAME(sys_newlstat)
+	.long SYMBOL_NAME(sys_newfstat)
+	.long SYMBOL_NAME(sys_uname)
+	.long SYMBOL_NAME(sys_iopl)		/* 110 */
+	.long SYMBOL_NAME(sys_vhangup)
+	.long SYMBOL_NAME(sys_ni_syscall)	/* old "idle" system call */
+	.long SYMBOL_NAME(sys_ni_syscall) /* was VM86 */
+	.long SYMBOL_NAME(sys_wait4)
+	.long SYMBOL_NAME(sys_swapoff)		/* 115 */
+	.long SYMBOL_NAME(sys_sysinfo)
+	.long SYMBOL_NAME(sys_ipc)
+	.long SYMBOL_NAME(sys_fsync)
+	.long SYMBOL_NAME(sys_sigreturn)
+	.long SYMBOL_NAME(sys_clone)		/* 120 */
+	.long SYMBOL_NAME(sys_setdomainname)
+	.long SYMBOL_NAME(sys_newuname)
+	.long SYMBOL_NAME(sys_modify_ldt)
+	.long SYMBOL_NAME(sys_adjtimex)
+	.long SYMBOL_NAME(sys_mprotect)		/* 125 */
+	.long SYMBOL_NAME(sys_sigprocmask)
+	.long SYMBOL_NAME(sys_create_module)
+	.long SYMBOL_NAME(sys_init_module)
+	.long SYMBOL_NAME(sys_delete_module)
+	.long SYMBOL_NAME(sys_get_kernel_syms)	/* 130 */
+	.long SYMBOL_NAME(sys_quotactl)
+	.long SYMBOL_NAME(sys_getpgid)
+	.long SYMBOL_NAME(sys_fchdir)
+	.long SYMBOL_NAME(sys_bdflush)
+	.long SYMBOL_NAME(sys_sysfs)		/* 135 */
+	.long SYMBOL_NAME(sys_personality)
+	.long SYMBOL_NAME(sys_ni_syscall)	/* for afs_syscall */
+	.long SYMBOL_NAME(sys_setfsuid16)
+	.long SYMBOL_NAME(sys_setfsgid16)
+	.long SYMBOL_NAME(sys_llseek)		/* 140 */
+	.long SYMBOL_NAME(sys_getdents)
+	.long SYMBOL_NAME(sys_select)
+	.long SYMBOL_NAME(sys_flock)
+	.long SYMBOL_NAME(sys_msync)
+	.long SYMBOL_NAME(sys_readv)		/* 145 */
+	.long SYMBOL_NAME(sys_writev)
+	.long SYMBOL_NAME(sys_getsid)
+	.long SYMBOL_NAME(sys_fdatasync)
+	.long SYMBOL_NAME(sys_sysctl)
+	.long SYMBOL_NAME(sys_mlock)		/* 150 */
+	.long SYMBOL_NAME(sys_munlock)
+	.long SYMBOL_NAME(sys_mlockall)
+	.long SYMBOL_NAME(sys_munlockall)
+	.long SYMBOL_NAME(sys_sched_setparam)
+	.long SYMBOL_NAME(sys_sched_getparam)   /* 155 */
+	.long SYMBOL_NAME(sys_sched_setscheduler)
+	.long SYMBOL_NAME(sys_sched_getscheduler)
+	.long SYMBOL_NAME(sys_sched_yield)
+	.long SYMBOL_NAME(sys_sched_get_priority_max)
+	.long SYMBOL_NAME(sys_sched_get_priority_min)  /* 160 */
+	.long SYMBOL_NAME(sys_sched_rr_get_interval)
+	.long SYMBOL_NAME(sys_nanosleep)
+	.long SYMBOL_NAME(sys_mremap)
+	.long SYMBOL_NAME(sys_setresuid16)
+	.long SYMBOL_NAME(sys_getresuid16)	/* 165 */
+	.long SYMBOL_NAME(sys_ni_syscall) /* was VM86 */
+	.long SYMBOL_NAME(sys_query_module)
+	.long SYMBOL_NAME(sys_poll)
+	.long SYMBOL_NAME(sys_nfsservctl)
+	.long SYMBOL_NAME(sys_setresgid16)	/* 170 */
+	.long SYMBOL_NAME(sys_getresgid16)
+	.long SYMBOL_NAME(sys_prctl)
+	.long SYMBOL_NAME(sys_rt_sigreturn)
+	.long SYMBOL_NAME(sys_rt_sigaction)
+	.long SYMBOL_NAME(sys_rt_sigprocmask)	/* 175 */
+	.long SYMBOL_NAME(sys_rt_sigpending)
+	.long SYMBOL_NAME(sys_rt_sigtimedwait)
+	.long SYMBOL_NAME(sys_rt_sigqueueinfo)
+	.long SYMBOL_NAME(sys_rt_sigsuspend)
+	.long SYMBOL_NAME(sys_pread)		/* 180 */
+	.long SYMBOL_NAME(sys_pwrite)
+	.long SYMBOL_NAME(sys_chown16)
+	.long SYMBOL_NAME(sys_getcwd)
+	.long SYMBOL_NAME(sys_capget)
+	.long SYMBOL_NAME(sys_capset)           /* 185 */
+	.long SYMBOL_NAME(sys_sigaltstack)
+	.long SYMBOL_NAME(sys_sendfile)
+	.long SYMBOL_NAME(sys_ni_syscall)		/* streams1 */
+	.long SYMBOL_NAME(sys_ni_syscall)		/* streams2 */
+	.long SYMBOL_NAME(sys_vfork)            /* 190 */
+	.long SYMBOL_NAME(sys_getrlimit)
+	.long SYMBOL_NAME(sys_mmap2)
+	.long SYMBOL_NAME(sys_truncate64)
+	.long SYMBOL_NAME(sys_ftruncate64)
+	.long SYMBOL_NAME(sys_stat64)		/* 195 */
+	.long SYMBOL_NAME(sys_lstat64)
+	.long SYMBOL_NAME(sys_fstat64)
+	.long SYMBOL_NAME(sys_lchown)
+	.long SYMBOL_NAME(sys_getuid)
+	.long SYMBOL_NAME(sys_getgid)		/* 200 */
+	.long SYMBOL_NAME(sys_geteuid)
+	.long SYMBOL_NAME(sys_getegid)
+	.long SYMBOL_NAME(sys_setreuid)
+	.long SYMBOL_NAME(sys_setregid)
+	.long SYMBOL_NAME(sys_getgroups)	/* 205 */
+	.long SYMBOL_NAME(sys_setgroups)
+	.long SYMBOL_NAME(sys_fchown)
+	.long SYMBOL_NAME(sys_setresuid)
+	.long SYMBOL_NAME(sys_getresuid)
+	.long SYMBOL_NAME(sys_setresgid)	/* 210 */
+	.long SYMBOL_NAME(sys_getresgid)
+	.long SYMBOL_NAME(sys_chown)
+	.long SYMBOL_NAME(sys_setuid)
+	.long SYMBOL_NAME(sys_setgid)
+	.long SYMBOL_NAME(sys_setfsuid)		/* 215 */
+	.long SYMBOL_NAME(sys_setfsgid)
+	.long SYMBOL_NAME(sys_pivot_root)
+	.long SYMBOL_NAME(sys_mincore)
+	.long SYMBOL_NAME(sys_madvise)
+	.long SYMBOL_NAME(sys_getdents64)	/* 220 */
+	.long SYMBOL_NAME(sys_fcntl64)
+	.long SYMBOL_NAME(sys_ni_syscall)	/* reserved for TUX */
+	.long SYMBOL_NAME(sys_ni_syscall)	/* Reserved for Security */
+	.long SYMBOL_NAME(sys_gettid)
+	.long SYMBOL_NAME(sys_readahead)	/* 225 */
+	.long SYMBOL_NAME(sys_setxattr)
+	.long SYMBOL_NAME(sys_lsetxattr)
+	.long SYMBOL_NAME(sys_fsetxattr)
+	.long SYMBOL_NAME(sys_getxattr)
+	.long SYMBOL_NAME(sys_lgetxattr)	/* 230 */
+	.long SYMBOL_NAME(sys_fgetxattr)
+	.long SYMBOL_NAME(sys_listxattr)
+	.long SYMBOL_NAME(sys_llistxattr)
+	.long SYMBOL_NAME(sys_flistxattr)
+	.long SYMBOL_NAME(sys_removexattr)	/* 235 */
+	.long SYMBOL_NAME(sys_lremovexattr)
+	.long SYMBOL_NAME(sys_fremovexattr)
+ 	.long SYMBOL_NAME(sys_tkill)
+	.long SYMBOL_NAME(sys_sendfile64)
+	.long SYMBOL_NAME(sys_ni_syscall)	/* 240 reserved for futex */
+	.long SYMBOL_NAME(sys_ni_syscall)	/* reserved for sched_setaffinity */
+	.long SYMBOL_NAME(sys_ni_syscall)	/* reserved for sched_getaffinity */
+	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_set_thread_area */
+	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_get_thread_area */
+	.long SYMBOL_NAME(sys_ni_syscall)	/* 245 sys_io_setup */
+	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_io_destroy */
+	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_io_getevents */
+	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_io_submit */
+	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_io_cancel */
+	.long SYMBOL_NAME(sys_ni_syscall)	/* 250 sys_alloc_hugepages */
+	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_free_hugepages */
+	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_exit_group */
+	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_lookup_dcookie */
+	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_epoll_create */
+	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_epoll_ctl 255 */
+	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_epoll_wait */
+ 	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_remap_file_pages */
+ 	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_set_tid_address */
+
+	.rept NR_syscalls-(.-sys_call_table)/4
+		.long SYMBOL_NAME(sys_ni_syscall)
+	.endr
diff --git a/linux-2.4-xen-sparse/arch/xen/kernel/head.S b/linux-2.4-xen-sparse/arch/xen/kernel/head.S
new file mode 100644
index 0000000000..e8c563572b
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/kernel/head.S
@@ -0,0 +1,41 @@
+
+.section __xen_guest
+    .ascii "GUEST_OS=linux,GUEST_VER=2.4,XEN_VER=3.0,VIRT_BASE=0xC0000000"
+    .ascii ",LOADER=generic"
+    .byte  0
+
+.text
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/desc.h>
+
+ENTRY(stext)
+ENTRY(_stext)
+        cld
+        lss stack_start,%esp
+        /* Copy the necessary stuff from xen_start_info structure. */
+        mov $SYMBOL_NAME(xen_start_info_union),%edi
+        mov $128,%ecx
+        rep movsl
+        jmp SYMBOL_NAME(start_kernel)
+
+ENTRY(stack_start)
+	.long SYMBOL_NAME(init_task_union)+8192, __KERNEL_DS
+
+.org 0x1000
+ENTRY(empty_zero_page)
+
+.org 0x2000
+ENTRY(default_ldt)
+
+.org 0x3000
+ENTRY(cpu0_pte_quicklist)
+
+.org 0x3400
+ENTRY(cpu0_pgd_quicklist)
+        
+.org 0x3800
diff --git a/linux-2.4-xen-sparse/arch/xen/kernel/i386_ksyms.c b/linux-2.4-xen-sparse/arch/xen/kernel/i386_ksyms.c
new file mode 100644
index 0000000000..0938a64f65
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/kernel/i386_ksyms.c
@@ -0,0 +1,180 @@
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/user.h>
+#include <linux/elfcore.h>
+#include <linux/mca.h>
+#include <linux/sched.h>
+#include <linux/in6.h>
+#include <linux/interrupt.h>
+#include <linux/smp_lock.h>
+#include <linux/pm.h>
+#include <linux/pci.h>
+#include <linux/apm_bios.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/tty.h>
+
+#include <asm/semaphore.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/uaccess.h>
+#include <asm/checksum.h>
+#include <asm/io.h>
+#include <asm/hardirq.h>
+#include <asm/delay.h>
+#include <asm/irq.h>
+#include <asm/mmx.h>
+#include <asm/desc.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+
+extern void dump_thread(struct pt_regs *, struct user *);
+extern spinlock_t rtc_lock;
+
+#if defined(CONFIG_APMXXX) || defined(CONFIG_APM_MODULEXXX)
+extern void machine_real_restart(unsigned char *, int);
+EXPORT_SYMBOL(machine_real_restart);
+extern void default_idle(void);
+EXPORT_SYMBOL(default_idle);
+#endif
+
+#ifdef CONFIG_SMP
+extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
+extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
+#endif
+
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
+extern struct drive_info_struct drive_info;
+EXPORT_SYMBOL(drive_info);
+#endif
+
+// XXX extern unsigned long get_cmos_time(void);
+
+/* platform dependent support */
+EXPORT_SYMBOL(boot_cpu_data);
+EXPORT_SYMBOL(dump_thread);
+EXPORT_SYMBOL(dump_fpu);
+EXPORT_SYMBOL(dump_extended_fpu);
+EXPORT_SYMBOL(__ioremap);
+EXPORT_SYMBOL(iounmap);
+EXPORT_SYMBOL(enable_irq);
+EXPORT_SYMBOL(disable_irq);
+EXPORT_SYMBOL(disable_irq_nosync);
+EXPORT_SYMBOL(probe_irq_mask);
+EXPORT_SYMBOL(kernel_thread);
+EXPORT_SYMBOL(pm_idle);
+EXPORT_SYMBOL(pm_power_off);
+EXPORT_SYMBOL(apm_info);
+//EXPORT_SYMBOL(gdt);
+EXPORT_SYMBOL(empty_zero_page);
+EXPORT_SYMBOL(phys_to_machine_mapping);
+
+
+#ifdef CONFIG_DEBUG_IOVIRT
+EXPORT_SYMBOL(__io_virt_debug);
+#endif
+
+EXPORT_SYMBOL_NOVERS(__down_failed);
+EXPORT_SYMBOL_NOVERS(__down_failed_interruptible);
+EXPORT_SYMBOL_NOVERS(__down_failed_trylock);
+EXPORT_SYMBOL_NOVERS(__up_wakeup);
+/* Networking helper routines. */
+EXPORT_SYMBOL(csum_partial_copy_generic);
+/* Delay loops */
+EXPORT_SYMBOL(__ndelay);
+EXPORT_SYMBOL(__udelay);
+EXPORT_SYMBOL(__delay);
+EXPORT_SYMBOL(__const_udelay);
+
+EXPORT_SYMBOL_NOVERS(__get_user_1);
+EXPORT_SYMBOL_NOVERS(__get_user_2);
+EXPORT_SYMBOL_NOVERS(__get_user_4);
+
+EXPORT_SYMBOL(strtok);
+EXPORT_SYMBOL(strpbrk);
+EXPORT_SYMBOL(strstr);
+
+EXPORT_SYMBOL(strncpy_from_user);
+EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(clear_user);
+EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(__generic_copy_from_user);
+EXPORT_SYMBOL(__generic_copy_to_user);
+EXPORT_SYMBOL(strnlen_user);
+
+
+EXPORT_SYMBOL(pci_alloc_consistent);
+EXPORT_SYMBOL(pci_free_consistent);
+
+#ifdef CONFIG_PCI
+EXPORT_SYMBOL(pcibios_penalize_isa_irq);
+EXPORT_SYMBOL(pci_mem_start);
+#endif
+
+
+#ifdef CONFIG_X86_USE_3DNOW
+EXPORT_SYMBOL(_mmx_memcpy);
+EXPORT_SYMBOL(mmx_clear_page);
+EXPORT_SYMBOL(mmx_copy_page);
+#endif
+
+#ifdef CONFIG_SMP
+EXPORT_SYMBOL(cpu_data);
+EXPORT_SYMBOL(kernel_flag_cacheline);
+EXPORT_SYMBOL(smp_num_cpus);
+EXPORT_SYMBOL(cpu_online_map);
+EXPORT_SYMBOL_NOVERS(__write_lock_failed);
+EXPORT_SYMBOL_NOVERS(__read_lock_failed);
+
+/* Global SMP irq stuff */
+EXPORT_SYMBOL(synchronize_irq);
+EXPORT_SYMBOL(global_irq_holder);
+EXPORT_SYMBOL(__global_cli);
+EXPORT_SYMBOL(__global_sti);
+EXPORT_SYMBOL(__global_save_flags);
+EXPORT_SYMBOL(__global_restore_flags);
+EXPORT_SYMBOL(smp_call_function);
+
+/* TLB flushing */
+EXPORT_SYMBOL(flush_tlb_page);
+
+/* HT support */
+EXPORT_SYMBOL(smp_num_siblings);
+EXPORT_SYMBOL(cpu_sibling_map);
+#endif
+
+#ifdef CONFIG_X86_IO_APIC
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+#endif
+
+#ifdef CONFIG_VT
+EXPORT_SYMBOL(screen_info);
+#endif
+
+EXPORT_SYMBOL(get_wchan);
+
+EXPORT_SYMBOL(rtc_lock);
+
+#undef memcpy
+#undef memset
+extern void * memset(void *,int,__kernel_size_t);
+extern void * memcpy(void *,const void *,__kernel_size_t);
+EXPORT_SYMBOL_NOVERS(memcpy);
+EXPORT_SYMBOL_NOVERS(memset);
+
+#ifdef CONFIG_HAVE_DEC_LOCK
+EXPORT_SYMBOL(atomic_dec_and_lock);
+#endif
+
+#ifdef CONFIG_MULTIQUAD
+EXPORT_SYMBOL(xquad_portio);
+#endif
+
+#include <asm/xen_proc.h>
+EXPORT_SYMBOL(create_xen_proc_entry);
+EXPORT_SYMBOL(remove_xen_proc_entry);
+
+EXPORT_SYMBOL(evtchn_do_upcall);
+EXPORT_SYMBOL(force_evtchn_callback);
+EXPORT_SYMBOL(HYPERVISOR_shared_info);
diff --git a/linux-2.4-xen-sparse/arch/xen/kernel/irq.c b/linux-2.4-xen-sparse/arch/xen/kernel/irq.c
new file mode 100644
index 0000000000..457ca3f553
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/kernel/irq.c
@@ -0,0 +1,1242 @@
+/*
+ *	linux/arch/i386/kernel/irq.c
+ *
+ *	Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ */
+
+/*
+ * (mostly architecture independent, will move to kernel/irq.c in 2.5.)
+ *
+ * IRQs are in fact implemented a bit like signal handlers for the kernel.
+ * Naturally it's not a 1:1 relation, but there are similarities.
+ */
+
+#include <linux/config.h>
+#include <linux/ptrace.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/kernel_stat.h>
+#include <linux/irq.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <asm/atomic.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/delay.h>
+#include <asm/desc.h>
+#include <asm/irq.h>
+
+
+
+/*
+ * Linux has a controller-independent x86 interrupt architecture.
+ * every controller has a 'controller-template', that is used
+ * by the main code to do the right thing. Each driver-visible
+ * interrupt source is transparently wired to the apropriate
+ * controller. Thus drivers need not be aware of the
+ * interrupt-controller.
+ *
+ * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
+ * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
+ * (IO-APICs assumed to be messaging to Pentium local-APICs)
+ *
+ * the code is designed to be easily extended with new/different
+ * interrupt controllers, without having to do assembly magic.
+ */
+
+/*
+ * Controller mappings for all interrupt sources:
+ */
+irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned =
+	{ [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}};
+
+static void register_irq_proc (unsigned int irq);
+
+/*
+ * Special irq handlers.
+ */
+
+void no_action(int cpl, void *dev_id, struct pt_regs *regs) { }
+
+/*
+ * Generic no controller code
+ */
+
+static void enable_none(unsigned int irq) { }
+static unsigned int startup_none(unsigned int irq) { return 0; }
+static void disable_none(unsigned int irq) { }
+static void ack_none(unsigned int irq)
+{
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves, it doesnt deserve
+ * a generic callback i think.
+ */
+#if CONFIG_X86
+	printk("unexpected IRQ trap at vector %02x\n", irq);
+#ifdef CONFIG_X86_LOCAL_APIC
+	/*
+	 * Currently unexpected vectors happen only on SMP and APIC.
+	 * We _must_ ack these because every local APIC has only N
+	 * irq slots per priority level, and a 'hanging, unacked' IRQ
+	 * holds up an irq slot - in excessive cases (when multiple
+	 * unexpected vectors occur) that might lock up the APIC
+	 * completely.
+	 */
+	ack_APIC_irq();
+#endif
+#endif
+}
+
+/* startup is the same as "enable", shutdown is same as "disable" */
+#define shutdown_none	disable_none
+#define end_none	enable_none
+
+struct hw_interrupt_type no_irq_type = {
+	"none",
+	startup_none,
+	shutdown_none,
+	enable_none,
+	disable_none,
+	ack_none,
+	end_none
+};
+
+atomic_t irq_err_count;
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+atomic_t irq_mis_count;
+#endif
+#endif
+
+/*
+ * Generic, controller-independent functions:
+ */
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+	int i, j;
+	struct irqaction * action;
+
+	seq_printf(p, "           ");
+	for (j=0; j<smp_num_cpus; j++)
+		seq_printf(p, "CPU%d       ",j);
+	seq_putc(p,'\n');
+
+	for (i = 0 ; i < NR_IRQS ; i++) {
+		action = irq_desc[i].action;
+		if (!action) 
+			continue;
+		seq_printf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+		seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+		for (j = 0; j < smp_num_cpus; j++)
+			seq_printf(p, "%10u ",
+				kstat.irqs[cpu_logical_map(j)][i]);
+#endif
+		seq_printf(p, " %14s", irq_desc[i].handler->typename);
+		seq_printf(p, "  %s", action->name);
+
+		for (action=action->next; action; action = action->next)
+			seq_printf(p, ", %s", action->name);
+		seq_putc(p,'\n');
+	}
+	seq_printf(p, "NMI: ");
+	for (j = 0; j < smp_num_cpus; j++)
+		seq_printf(p, "%10u ",
+			nmi_count(cpu_logical_map(j)));
+	seq_printf(p, "\n");
+#if CONFIG_X86_LOCAL_APIC
+	seq_printf(p, "LOC: ");
+	for (j = 0; j < smp_num_cpus; j++)
+		seq_printf(p, "%10u ",
+			apic_timer_irqs[cpu_logical_map(j)]);
+	seq_printf(p, "\n");
+#endif
+	seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+	seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+#endif
+
+	return 0;
+}
+
+
+/*
+ * Global interrupt locks for SMP. Allow interrupts to come in on any
+ * CPU, yet make cli/sti act globally to protect critical regions..
+ */
+
+#ifdef CONFIG_SMP
+unsigned char global_irq_holder = NO_PROC_ID;
+unsigned volatile long global_irq_lock; /* pendantic: long for set_bit --RR */
+
+extern void show_stack(unsigned long* esp);
+
+static void show(char * str)
+{
+	int i;
+	int cpu = smp_processor_id();
+
+	printk("\n%s, CPU %d:\n", str, cpu);
+	printk("irq:  %d [",irqs_running());
+	for(i=0;i < smp_num_cpus;i++)
+		printk(" %d",local_irq_count(i));
+	printk(" ]\nbh:   %d [",spin_is_locked(&global_bh_lock) ? 1 : 0);
+	for(i=0;i < smp_num_cpus;i++)
+		printk(" %d",local_bh_count(i));
+
+	printk(" ]\nStack dumps:");
+	for(i = 0; i < smp_num_cpus; i++) {
+		unsigned long esp;
+		if (i == cpu)
+			continue;
+		printk("\nCPU %d:",i);
+		esp = init_tss[i].esp0;
+		if (!esp) {
+			/* tss->esp0 is set to NULL in cpu_init(),
+			 * it's initialized when the cpu returns to user
+			 * space. -- manfreds
+			 */
+			printk(" <unknown> ");
+			continue;
+		}
+		esp &= ~(THREAD_SIZE-1);
+		esp += sizeof(struct task_struct);
+		show_stack((void*)esp);
+ 	}
+	printk("\nCPU %d:",cpu);
+	show_stack(NULL);
+	printk("\n");
+}
+	
+#define MAXCOUNT 100000000
+
+/*
+ * I had a lockup scenario where a tight loop doing
+ * spin_unlock()/spin_lock() on CPU#1 was racing with
+ * spin_lock() on CPU#0. CPU#0 should have noticed spin_unlock(), but
+ * apparently the spin_unlock() information did not make it
+ * through to CPU#0 ... nasty, is this by design, do we have to limit
+ * 'memory update oscillation frequency' artificially like here?
+ *
+ * Such 'high frequency update' races can be avoided by careful design, but
+ * some of our major constructs like spinlocks use similar techniques,
+ * it would be nice to clarify this issue. Set this define to 0 if you
+ * want to check whether your system freezes.  I suspect the delay done
+ * by SYNC_OTHER_CORES() is in correlation with 'snooping latency', but
+ * i thought that such things are guaranteed by design, since we use
+ * the 'LOCK' prefix.
+ */
+#define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 0
+
+#if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND
+# define SYNC_OTHER_CORES(x) udelay(x+1)
+#else
+/*
+ * We have to allow irqs to arrive between __sti and __cli
+ */
+# define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop")
+#endif
+
+static inline void wait_on_irq(int cpu)
+{
+	int count = MAXCOUNT;
+
+	for (;;) {
+
+		/*
+		 * Wait until all interrupts are gone. Wait
+		 * for bottom half handlers unless we're
+		 * already executing in one..
+		 */
+		if (!irqs_running())
+			if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock))
+				break;
+
+		/* Duh, we have to loop. Release the lock to avoid deadlocks */
+		clear_bit(0,&global_irq_lock);
+
+		for (;;) {
+			if (!--count) {
+				show("wait_on_irq");
+				count = ~0;
+			}
+			__sti();
+			SYNC_OTHER_CORES(cpu);
+			__cli();
+			if (irqs_running())
+				continue;
+			if (global_irq_lock)
+				continue;
+			if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock))
+				continue;
+			if (!test_and_set_bit(0,&global_irq_lock))
+				break;
+		}
+	}
+}
+
+/*
+ * This is called when we want to synchronize with
+ * interrupts. We may for example tell a device to
+ * stop sending interrupts: but to make sure there
+ * are no interrupts that are executing on another
+ * CPU we need to call this function.
+ */
+void synchronize_irq(void)
+{
+	if (irqs_running()) {
+		/* Stupid approach */
+		cli();
+		sti();
+	}
+}
+
+static inline void get_irqlock(int cpu)
+{
+	if (test_and_set_bit(0,&global_irq_lock)) {
+		/* do we already hold the lock? */
+		if ((unsigned char) cpu == global_irq_holder)
+			return;
+		/* Uhhuh.. Somebody else got it. Wait.. */
+		do {
+			do {
+				rep_nop();
+			} while (test_bit(0,&global_irq_lock));
+		} while (test_and_set_bit(0,&global_irq_lock));		
+	}
+	/* 
+	 * We also to make sure that nobody else is running
+	 * in an interrupt context. 
+	 */
+	wait_on_irq(cpu);
+
+	/*
+	 * Ok, finally..
+	 */
+	global_irq_holder = cpu;
+}
+
+/*
+ * A global "cli()" while in an interrupt context
+ * turns into just a local cli(). Interrupts
+ * should use spinlocks for the (very unlikely)
+ * case that they ever want to protect against
+ * each other.
+ *
+ * If we already have local interrupts disabled,
+ * this will not turn a local disable into a
+ * global one (problems with spinlocks: this makes
+ * save_flags+cli+sti usable inside a spinlock).
+ */
+void __global_cli(void)
+{
+	unsigned int flags;
+
+	__save_flags(flags);
+	if (!flags) {
+		int cpu = smp_processor_id();
+		__cli();
+		if (!local_irq_count(cpu))
+			get_irqlock(cpu);
+	}
+}
+
+void __global_sti(void)
+{
+	int cpu = smp_processor_id();
+
+	if (!local_irq_count(cpu))
+		release_irqlock(cpu);
+	__sti();
+}
+
+/*
+ * SMP flags value to restore to:
+ * 0 - global cli
+ * 1 - global sti
+ * 2 - local cli
+ * 3 - local sti
+ */
+unsigned long __global_save_flags(void)
+{
+	int retval;
+	int local_enabled;
+	unsigned long flags;
+	int cpu = smp_processor_id();
+
+	__save_flags(flags);
+	local_enabled = !flags;
+	/* default to local */
+	retval = 2 + local_enabled;
+
+	/* check for global flags if we're not in an interrupt */
+	if (!local_irq_count(cpu)) {
+		if (local_enabled)
+			retval = 1;
+		if (global_irq_holder == cpu)
+			retval = 0;
+	}
+	return retval;
+}
+
+void __global_restore_flags(unsigned long flags)
+{
+	switch (flags) {
+	case 0:
+		__global_cli();
+		break;
+	case 1:
+		__global_sti();
+		break;
+	case 2:
+		__cli();
+		break;
+	case 3:
+		__sti();
+		break;
+	default:
+		printk("global_restore_flags: %08lx (%08lx)\n",
+			flags, (&flags)[-1]);
+	}
+}
+
+#endif
+
+/*
+ * This should really return information about whether
+ * we should do bottom half handling etc. Right now we
+ * end up _always_ checking the bottom half, which is a
+ * waste of time and is not what some drivers would
+ * prefer.
+ */
+int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action)
+{
+	int status;
+	int cpu = smp_processor_id();
+
+	irq_enter(cpu, irq);
+
+	status = 1;	/* Force the "do bottom halves" bit */
+
+	if (!(action->flags & SA_INTERRUPT))
+		__sti();
+
+	do {
+		status |= action->flags;
+		action->handler(irq, action->dev_id, regs);
+		action = action->next;
+	} while (action);
+	if (status & SA_SAMPLE_RANDOM)
+		add_interrupt_randomness(irq);
+	__cli();
+
+	irq_exit(cpu, irq);
+
+	return status;
+}
+
+/*
+ * Generic enable/disable code: this just calls
+ * down into the PIC-specific version for the actual
+ * hardware disable after having gotten the irq
+ * controller lock. 
+ */
+ 
+/**
+ *	disable_irq_nosync - disable an irq without waiting
+ *	@irq: Interrupt to disable
+ *
+ *	Disable the selected interrupt line.  Disables and Enables are
+ *	nested.
+ *	Unlike disable_irq(), this function does not ensure existing
+ *	instances of the IRQ handler have completed before returning.
+ *
+ *	This function may be called from IRQ context.
+ */
+ 
+inline void disable_irq_nosync(unsigned int irq)
+{
+	irq_desc_t *desc = irq_desc + irq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	if (!desc->depth++) {
+		desc->status |= IRQ_DISABLED;
+		desc->handler->disable(irq);
+	}
+	spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+/**
+ *	disable_irq - disable an irq and wait for completion
+ *	@irq: Interrupt to disable
+ *
+ *	Disable the selected interrupt line.  Enables and Disables are
+ *	nested.
+ *	This function waits for any pending IRQ handlers for this interrupt
+ *	to complete before returning. If you use this function while
+ *	holding a resource the IRQ handler may need you will deadlock.
+ *
+ *	This function may be called - with care - from IRQ context.
+ */
+ 
+void disable_irq(unsigned int irq)
+{
+	disable_irq_nosync(irq);
+
+	if (!local_irq_count(smp_processor_id())) {
+		do {
+			barrier();
+			cpu_relax();
+		} while (irq_desc[irq].status & IRQ_INPROGRESS);
+	}
+}
+
+/**
+ *	enable_irq - enable handling of an irq
+ *	@irq: Interrupt to enable
+ *
+ *	Undoes the effect of one call to disable_irq().  If this
+ *	matches the last disable, processing of interrupts on this
+ *	IRQ line is re-enabled.
+ *
+ *	This function may be called from IRQ context.
+ */
+ 
+void enable_irq(unsigned int irq)
+{
+	irq_desc_t *desc = irq_desc + irq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	switch (desc->depth) {
+	case 1: {
+		unsigned int status = desc->status & ~IRQ_DISABLED;
+		desc->status = status;
+		if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
+			desc->status = status | IRQ_REPLAY;
+			hw_resend_irq(desc->handler,irq);
+		}
+		desc->handler->enable(irq);
+		/* fall-through */
+	}
+	default:
+		desc->depth--;
+		break;
+	case 0:
+		printk("enable_irq(%u) unbalanced from %p\n", irq,
+		       __builtin_return_address(0));
+	}
+	spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
+{	
+	/* 
+	 * We ack quickly, we don't want the irq controller
+	 * thinking we're snobs just because some other CPU has
+	 * disabled global interrupts (we have already done the
+	 * INT_ACK cycles, it's too late to try to pretend to the
+	 * controller that we aren't taking the interrupt).
+	 *
+	 * 0 return value means that this irq is already being
+	 * handled by some other CPU. (or is disabled)
+	 */
+	int irq = regs->orig_eax & 0xff; /* high bits used in ret_from_ code */
+	int cpu = smp_processor_id();
+	irq_desc_t *desc = irq_desc + irq;
+	struct irqaction * action;
+	unsigned int status;
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+	long esp;
+
+	/* Debugging check for stack overflow: is there less than 1KB free? */
+	__asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191));
+	if (unlikely(esp < (sizeof(struct task_struct) + 1024))) {
+		extern void show_stack(unsigned long *);
+
+		printk("do_IRQ: stack overflow: %ld\n",
+			esp - sizeof(struct task_struct));
+		__asm__ __volatile__("movl %%esp,%0" : "=r" (esp));
+		show_stack((void *)esp);
+	}
+#endif
+
+	kstat.irqs[cpu][irq]++;
+	spin_lock(&desc->lock);
+	desc->handler->ack(irq);
+	/*
+	   REPLAY is when Linux resends an IRQ that was dropped earlier
+	   WAITING is used by probe to mark irqs that are being tested
+	   */
+	status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
+	status |= IRQ_PENDING; /* we _want_ to handle it */
+
+	/*
+	 * If the IRQ is disabled for whatever reason, we cannot
+	 * use the action we have.
+	 */
+	action = NULL;
+	if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
+		action = desc->action;
+		status &= ~IRQ_PENDING; /* we commit to handling */
+		status |= IRQ_INPROGRESS; /* we are handling it */
+	}
+	desc->status = status;
+
+	/*
+	 * If there is no IRQ handler or it was disabled, exit early.
+	   Since we set PENDING, if another processor is handling
+	   a different instance of this same irq, the other processor
+	   will take care of it.
+	 */
+	if (!action)
+		goto out;
+
+	/*
+	 * Edge triggered interrupts need to remember
+	 * pending events.
+	 * This applies to any hw interrupts that allow a second
+	 * instance of the same irq to arrive while we are in do_IRQ
+	 * or in the handler. But the code here only handles the _second_
+	 * instance of the irq, not the third or fourth. So it is mostly
+	 * useful for irq hardware that does not mask cleanly in an
+	 * SMP environment.
+	 */
+	for (;;) {
+		spin_unlock(&desc->lock);
+		handle_IRQ_event(irq, regs, action);
+		spin_lock(&desc->lock);
+		
+		if (!(desc->status & IRQ_PENDING))
+			break;
+		desc->status &= ~IRQ_PENDING;
+	}
+	desc->status &= ~IRQ_INPROGRESS;
+out:
+	/*
+	 * The ->end() handler has to deal with interrupts which got
+	 * disabled while the handler was running.
+	 */
+	desc->handler->end(irq);
+	spin_unlock(&desc->lock);
+
+	if (softirq_pending(cpu))
+		do_softirq();
+	return 1;
+}
+
+/**
+ *	request_irq - allocate an interrupt line
+ *	@irq: Interrupt line to allocate
+ *	@handler: Function to be called when the IRQ occurs
+ *	@irqflags: Interrupt type flags
+ *	@devname: An ascii name for the claiming device
+ *	@dev_id: A cookie passed back to the handler function
+ *
+ *	This call allocates interrupt resources and enables the
+ *	interrupt line and IRQ handling. From the point this
+ *	call is made your handler function may be invoked. Since
+ *	your handler function must clear any interrupt the board 
+ *	raises, you must take care both to initialise your hardware
+ *	and to set up the interrupt handler in the right order.
+ *
+ *	Dev_id must be globally unique. Normally the address of the
+ *	device data structure is used as the cookie. Since the handler
+ *	receives this value it makes sense to use it.
+ *
+ *	If your interrupt is shared you must pass a non NULL dev_id
+ *	as this is required when freeing the interrupt.
+ *
+ *	Flags:
+ *
+ *	SA_SHIRQ		Interrupt is shared
+ *
+ *	SA_INTERRUPT		Disable local interrupts while processing
+ *
+ *	SA_SAMPLE_RANDOM	The interrupt can be used for entropy
+ *
+ */
+ 
+int request_irq(unsigned int irq, 
+		void (*handler)(int, void *, struct pt_regs *),
+		unsigned long irqflags, 
+		const char * devname,
+		void *dev_id)
+{
+	int retval;
+	struct irqaction * action;
+
+#if 1
+	/*
+	 * Sanity-check: shared interrupts should REALLY pass in
+	 * a real dev-ID, otherwise we'll have trouble later trying
+	 * to figure out which interrupt is which (messes up the
+	 * interrupt freeing logic etc).
+	 */
+	if (irqflags & SA_SHIRQ) {
+		if (!dev_id)
+			printk("Bad boy: %s (at 0x%x) called us without a dev_id!\n", devname, (&irq)[-1]);
+	}
+#endif
+
+	if (irq >= NR_IRQS)
+		return -EINVAL;
+	if (!handler)
+		return -EINVAL;
+
+	action = (struct irqaction *)
+			kmalloc(sizeof(struct irqaction), GFP_KERNEL);
+	if (!action)
+		return -ENOMEM;
+
+	action->handler = handler;
+	action->flags = irqflags;
+	action->mask = 0;
+	action->name = devname;
+	action->next = NULL;
+	action->dev_id = dev_id;
+
+	retval = setup_irq(irq, action);
+	if (retval)
+		kfree(action);
+	return retval;
+}
+
+/*
+ * Internal function to unregister an irqaction - typically used to
+ * deallocate special interrupts that are part of the architecture.
+ */
+int teardown_irq(unsigned int irq, struct irqaction * old)
+{
+	irq_desc_t *desc;
+	struct irqaction **p;
+	unsigned long flags;
+
+	if (irq >= NR_IRQS)
+		return -ENOENT;
+
+	desc = irq_desc + irq;
+	spin_lock_irqsave(&desc->lock,flags);
+	p = &desc->action;
+	for (;;) {
+		struct irqaction * action = *p;
+		if (action) {
+			struct irqaction **pp = p;
+			p = &action->next;
+			if (action != old)
+				continue;
+
+			/* Found it - now remove it from the list of entries */
+			*pp = action->next;
+			if (!desc->action) {
+				desc->status |= IRQ_DISABLED;
+				desc->handler->shutdown(irq);
+			}
+			spin_unlock_irqrestore(&desc->lock,flags);
+
+#ifdef CONFIG_SMP
+			/* Wait to make sure it's not being used on another CPU */
+			while (desc->status & IRQ_INPROGRESS) {
+				barrier();
+				cpu_relax();
+			}
+#endif
+			return 0;
+		}
+		printk("Trying to free free IRQ%d\n",irq);
+		spin_unlock_irqrestore(&desc->lock,flags);
+		return -ENOENT;
+	}
+}
+
+/**
+ *	free_irq - free an interrupt
+ *	@irq: Interrupt line to free
+ *	@dev_id: Device identity to free
+ *
+ *	Remove an interrupt handler. The handler is removed and if the
+ *	interrupt line is no longer in use by any driver it is disabled.
+ *	On a shared IRQ the caller must ensure the interrupt is disabled
+ *	on the card it drives before calling this function. The function
+ *	does not return until any executing interrupts for this IRQ
+ *	have completed.
+ *
+ *	This function may be called from interrupt context. 
+ *
+ *	Bugs: Attempting to free an irq in a handler for the same irq hangs
+ *	      the machine.
+ */
+ 
+void free_irq(unsigned int irq, void *dev_id)
+{
+	irq_desc_t *desc;
+	struct irqaction *action;
+	unsigned long flags;
+
+	if (irq >= NR_IRQS)
+		return;
+
+	desc = irq_desc + irq;
+	spin_lock_irqsave(&desc->lock,flags);
+	for (action = desc->action; action != NULL; action = action->next) {
+		if (action->dev_id != dev_id)
+			continue;
+
+		spin_unlock_irqrestore(&desc->lock,flags);
+
+		if (teardown_irq(irq, action) == 0)
+			kfree(action);
+		return;
+	}
+	printk("Trying to free free IRQ%d\n",irq);
+	spin_unlock_irqrestore(&desc->lock,flags);
+	return;
+}
+
+/*
+ * IRQ autodetection code..
+ *
+ * This depends on the fact that any interrupt that
+ * comes in on to an unassigned handler will get stuck
+ * with "IRQ_WAITING" cleared and the interrupt
+ * disabled.
+ */
+
+static DECLARE_MUTEX(probe_sem);
+
+/**
+ *	probe_irq_on	- begin an interrupt autodetect
+ *
+ *	Commence probing for an interrupt. The interrupts are scanned
+ *	and a mask of potential interrupt lines is returned.
+ *
+ */
+ 
+unsigned long probe_irq_on(void)
+{
+	unsigned int i;
+	irq_desc_t *desc;
+	unsigned long val;
+	unsigned long delay;
+
+	down(&probe_sem);
+	/* 
+	 * something may have generated an irq long ago and we want to
+	 * flush such a longstanding irq before considering it as spurious. 
+	 */
+	for (i = NR_PIRQS-1; i > 0; i--)  {
+		desc = irq_desc + i;
+
+		spin_lock_irq(&desc->lock);
+		if (!irq_desc[i].action) 
+			irq_desc[i].handler->startup(i);
+		spin_unlock_irq(&desc->lock);
+	}
+
+	/* Wait for longstanding interrupts to trigger. */
+	for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
+		/* about 20ms delay */ synchronize_irq();
+
+	/*
+	 * enable any unassigned irqs
+	 * (we must startup again here because if a longstanding irq
+	 * happened in the previous stage, it may have masked itself)
+	 */
+	for (i = NR_PIRQS-1; i > 0; i--) {
+		desc = irq_desc + i;
+
+		spin_lock_irq(&desc->lock);
+		if (!desc->action) {
+			desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
+			if (desc->handler->startup(i))
+				desc->status |= IRQ_PENDING;
+		}
+		spin_unlock_irq(&desc->lock);
+	}
+
+	/*
+	 * Wait for spurious interrupts to trigger
+	 */
+	for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
+		/* about 100ms delay */ synchronize_irq();
+
+	/*
+	 * Now filter out any obviously spurious interrupts
+	 */
+	val = 0;
+	for (i = 0; i < NR_PIRQS; i++) {
+		irq_desc_t *desc = irq_desc + i;
+		unsigned int status;
+
+		spin_lock_irq(&desc->lock);
+		status = desc->status;
+
+		if (status & IRQ_AUTODETECT) {
+			/* It triggered already - consider it spurious. */
+			if (!(status & IRQ_WAITING)) {
+				desc->status = status & ~IRQ_AUTODETECT;
+				desc->handler->shutdown(i);
+			} else
+				if (i < 32)
+					val |= 1 << i;
+		}
+		spin_unlock_irq(&desc->lock);
+	}
+
+	return val;
+}
+
+/*
+ * Return a mask of triggered interrupts (this
+ * can handle only legacy ISA interrupts).
+ */
+ 
+/**
+ *	probe_irq_mask - scan a bitmap of interrupt lines
+ *	@val:	mask of interrupts to consider
+ *
+ *	Scan the ISA bus interrupt lines and return a bitmap of
+ *	active interrupts. The interrupt probe logic state is then
+ *	returned to its previous value.
+ *
+ *	Note: we need to scan all the irq's even though we will
+ *	only return ISA irq numbers - just so that we reset them
+ *	all to a known state.
+ */
+unsigned int probe_irq_mask(unsigned long val)
+{
+	int i;
+	unsigned int mask;
+
+	mask = 0;
+	for (i = 0; i < NR_PIRQS; i++) {
+		irq_desc_t *desc = irq_desc + i;
+		unsigned int status;
+
+		spin_lock_irq(&desc->lock);
+		status = desc->status;
+
+		if (status & IRQ_AUTODETECT) {
+			if (i < 16 && !(status & IRQ_WAITING))
+				mask |= 1 << i;
+
+			desc->status = status & ~IRQ_AUTODETECT;
+			desc->handler->shutdown(i);
+		}
+		spin_unlock_irq(&desc->lock);
+	}
+	up(&probe_sem);
+
+	return mask & val;
+}
+
+/*
+ * Return the one interrupt that triggered (this can
+ * handle any interrupt source).
+ */
+
+/**
+ *	probe_irq_off	- end an interrupt autodetect
+ *	@val: mask of potential interrupts (unused)
+ *
+ *	Scans the unused interrupt lines and returns the line which
+ *	appears to have triggered the interrupt. If no interrupt was
+ *	found then zero is returned. If more than one interrupt is
+ *	found then minus the first candidate is returned to indicate
+ *	their is doubt.
+ *
+ *	The interrupt probe logic state is returned to its previous
+ *	value.
+ *
+ *	BUGS: When used in a module (which arguably shouldnt happen)
+ *	nothing prevents two IRQ probe callers from overlapping. The
+ *	results of this are non-optimal.
+ */
+ 
+int probe_irq_off(unsigned long val)
+{
+	int i, irq_found, nr_irqs;
+
+	nr_irqs = 0;
+	irq_found = 0;
+	for (i = 0; i < NR_PIRQS; i++) {
+		irq_desc_t *desc = irq_desc + i;
+		unsigned int status;
+
+		spin_lock_irq(&desc->lock);
+		status = desc->status;
+
+		if (status & IRQ_AUTODETECT) {
+			if (!(status & IRQ_WAITING)) {
+				if (!nr_irqs)
+					irq_found = i;
+				nr_irqs++;
+			}
+			desc->status = status & ~IRQ_AUTODETECT;
+			desc->handler->shutdown(i);
+		}
+		spin_unlock_irq(&desc->lock);
+	}
+	up(&probe_sem);
+
+	if (nr_irqs > 1)
+		irq_found = -irq_found;
+	return irq_found;
+}
+
+/* this was setup_x86_irq but it seems pretty generic */
+int setup_irq(unsigned int irq, struct irqaction * new)
+{
+	int shared = 0;
+	unsigned long flags;
+	struct irqaction *old, **p;
+	irq_desc_t *desc = irq_desc + irq;
+
+	/*
+	 * Some drivers like serial.c use request_irq() heavily,
+	 * so we have to be careful not to interfere with a
+	 * running system.
+	 */
+	if (new->flags & SA_SAMPLE_RANDOM) {
+		/*
+		 * This function might sleep, we want to call it first,
+		 * outside of the atomic block.
+		 * Yes, this might clear the entropy pool if the wrong
+		 * driver is attempted to be loaded, without actually
+		 * installing a new handler, but is this really a problem,
+		 * only the sysadmin is able to do this.
+		 */
+		rand_initialize_irq(irq);
+	}
+
+	/*
+	 * The following block of code has to be executed atomically
+	 */
+	spin_lock_irqsave(&desc->lock,flags);
+	p = &desc->action;
+	if ((old = *p) != NULL) {
+		/* Can't share interrupts unless both agree to */
+		if (!(old->flags & new->flags & SA_SHIRQ)) {
+			spin_unlock_irqrestore(&desc->lock,flags);
+			return -EBUSY;
+		}
+
+		/* add new interrupt at end of irq queue */
+		do {
+			p = &old->next;
+			old = *p;
+		} while (old);
+		shared = 1;
+	}
+
+	*p = new;
+
+	if (!shared) {
+		desc->depth = 0;
+		desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING | IRQ_INPROGRESS);
+		desc->handler->startup(irq);
+	}
+	spin_unlock_irqrestore(&desc->lock,flags);
+
+	register_irq_proc(irq);
+	return 0;
+}
+
+static struct proc_dir_entry * root_irq_dir;
+static struct proc_dir_entry * irq_dir [NR_IRQS];
+
+#define HEX_DIGITS 8
+
+static unsigned int parse_hex_value (const char *buffer,
+		unsigned long count, unsigned long *ret)
+{
+	unsigned char hexnum [HEX_DIGITS];
+	unsigned long value;
+	int i;
+
+	if (!count)
+		return -EINVAL;
+	if (count > HEX_DIGITS)
+		count = HEX_DIGITS;
+	if (copy_from_user(hexnum, buffer, count))
+		return -EFAULT;
+
+	/*
+	 * Parse the first 8 characters as a hex string, any non-hex char
+	 * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same.
+	 */
+	value = 0;
+
+	for (i = 0; i < count; i++) {
+		unsigned int c = hexnum[i];
+
+		switch (c) {
+			case '0' ... '9': c -= '0'; break;
+			case 'a' ... 'f': c -= 'a'-10; break;
+			case 'A' ... 'F': c -= 'A'-10; break;
+		default:
+			goto out;
+		}
+		value = (value << 4) | c;
+	}
+out:
+	*ret = value;
+	return 0;
+}
+
+#if CONFIG_SMP
+
+static struct proc_dir_entry * smp_affinity_entry [NR_IRQS];
+
+static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL };
+static int irq_affinity_read_proc (char *page, char **start, off_t off,
+			int count, int *eof, void *data)
+{
+	if (count < HEX_DIGITS+1)
+		return -EINVAL;
+	return sprintf (page, "%08lx\n", irq_affinity[(long)data]);
+}
+
+static int irq_affinity_write_proc (struct file *file, const char *buffer,
+					unsigned long count, void *data)
+{
+	int irq = (long) data, full_count = count, err;
+	unsigned long new_value;
+
+	if (!irq_desc[irq].handler->set_affinity)
+		return -EIO;
+
+	err = parse_hex_value(buffer, count, &new_value);
+
+	/*
+	 * Do not allow disabling IRQs completely - it's a too easy
+	 * way to make the system unusable accidentally :-) At least
+	 * one online CPU still has to be targeted.
+	 */
+	if (!(new_value & cpu_online_map))
+		return -EINVAL;
+
+	irq_affinity[irq] = new_value;
+	irq_desc[irq].handler->set_affinity(irq, new_value);
+
+	return full_count;
+}
+
+#endif
+
+static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
+			int count, int *eof, void *data)
+{
+	unsigned long *mask = (unsigned long *) data;
+	if (count < HEX_DIGITS+1)
+		return -EINVAL;
+	return sprintf (page, "%08lx\n", *mask);
+}
+
+static int prof_cpu_mask_write_proc (struct file *file, const char *buffer,
+					unsigned long count, void *data)
+{
+	unsigned long *mask = (unsigned long *) data, full_count = count, err;
+	unsigned long new_value;
+
+	err = parse_hex_value(buffer, count, &new_value);
+	if (err)
+		return err;
+
+	*mask = new_value;
+	return full_count;
+}
+
+#define MAX_NAMELEN 10
+
+static void register_irq_proc (unsigned int irq)
+{
+	char name [MAX_NAMELEN];
+
+	if (!root_irq_dir || (irq_desc[irq].handler == &no_irq_type) ||
+			irq_dir[irq])
+		return;
+
+	memset(name, 0, MAX_NAMELEN);
+	sprintf(name, "%d", irq);
+
+	/* create /proc/irq/1234 */
+	irq_dir[irq] = proc_mkdir(name, root_irq_dir);
+
+#if CONFIG_SMP
+	{
+		struct proc_dir_entry *entry;
+
+		/* create /proc/irq/1234/smp_affinity */
+		entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
+
+		if (entry) {
+			entry->nlink = 1;
+			entry->data = (void *)(long)irq;
+			entry->read_proc = irq_affinity_read_proc;
+			entry->write_proc = irq_affinity_write_proc;
+		}
+
+		smp_affinity_entry[irq] = entry;
+	}
+#endif
+}
+
+unsigned long prof_cpu_mask = -1;
+
+void init_irq_proc (void)
+{
+	struct proc_dir_entry *entry;
+	int i;
+
+	/* create /proc/irq */
+	root_irq_dir = proc_mkdir("irq", 0);
+
+	/* create /proc/irq/prof_cpu_mask */
+	entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
+
+	if (!entry)
+	    return;
+
+	entry->nlink = 1;
+	entry->data = (void *)&prof_cpu_mask;
+	entry->read_proc = prof_cpu_mask_read_proc;
+	entry->write_proc = prof_cpu_mask_write_proc;
+
+	/*
+	 * Create entries for all existing IRQs.
+	 */
+	for (i = 0; i < NR_IRQS; i++)
+		register_irq_proc(i);
+}
+
diff --git a/linux-2.4-xen-sparse/arch/xen/kernel/ldt.c b/linux-2.4-xen-sparse/arch/xen/kernel/ldt.c
new file mode 100644
index 0000000000..6235778493
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/kernel/ldt.c
@@ -0,0 +1,272 @@
+/*
+ * linux/kernel/ldt.c
+ *
+ * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+
+#include <asm/mmu_context.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/ldt.h>
+#include <asm/desc.h>
+
+#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
+static void flush_ldt(void *mm)
+{
+	if (current->active_mm)
+		load_LDT(&current->active_mm->context);
+}
+#endif
+
+static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
+{
+	void *oldldt;
+	void *newldt;
+	int oldsize;
+
+	if (mincount <= pc->size)
+		return 0;
+	oldsize = pc->size;
+	mincount = (mincount+511)&(~511);
+	if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
+		newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
+	else
+		newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+
+	if (!newldt)
+		return -ENOMEM;
+
+	if (oldsize)
+		memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
+
+	oldldt = pc->ldt;
+	memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
+	wmb();
+	pc->ldt = newldt;
+	pc->size = mincount;
+	if (reload) {
+		make_pages_readonly(
+			pc->ldt,
+			(pc->size*LDT_ENTRY_SIZE)/PAGE_SIZE);
+		load_LDT(pc);
+#ifdef CONFIG_SMP
+		if (current->mm->cpu_vm_mask != (1<<smp_processor_id()))
+			smp_call_function(flush_ldt, 0, 1, 1);
+#endif
+	}
+	wmb();
+	if (oldsize) {
+		make_pages_writable(
+			oldldt, (oldsize*LDT_ENTRY_SIZE)/PAGE_SIZE);
+		if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
+			vfree(oldldt);
+		else
+			kfree(oldldt);
+	}
+	return 0;
+}
+
+static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
+{
+	int err = alloc_ldt(new, old->size, 0);
+	if (err < 0) {
+		printk(KERN_WARNING "ldt allocation failed\n");
+		new->size = 0;
+		return err;
+	}
+	memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
+	make_pages_readonly(new->ldt, (new->size*LDT_ENTRY_SIZE)/PAGE_SIZE);
+	return 0;
+}
+
+/*
+ * we do not have to muck with descriptors here, that is
+ * done in switch_mm() as needed.
+ */
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	struct mm_struct * old_mm;
+	int retval = 0;
+
+	init_MUTEX(&mm->context.sem);
+	mm->context.size = 0;
+	old_mm = current->mm;
+	if (old_mm && old_mm->context.size > 0) {
+		down(&old_mm->context.sem);
+		retval = copy_ldt(&mm->context, &old_mm->context);
+		up(&old_mm->context.sem);
+	}
+	return retval;
+}
+
+/*
+ * No need to lock the MM as we are the last user
+ * Do not touch the ldt register, we are already
+ * in the next thread.
+ */
+void destroy_context(struct mm_struct *mm)
+{
+	if (mm->context.size) {
+		make_pages_writable(
+			mm->context.ldt, 
+			(mm->context.size*LDT_ENTRY_SIZE)/PAGE_SIZE);
+		if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
+			vfree(mm->context.ldt);
+		else
+			kfree(mm->context.ldt);
+		mm->context.size = 0;
+	}
+}
+
+static int read_ldt(void * ptr, unsigned long bytecount)
+{
+	int err;
+	unsigned long size;
+	struct mm_struct * mm = current->mm;
+
+	if (!mm->context.size)
+		return 0;
+	if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
+		bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
+
+	down(&mm->context.sem);
+	size = mm->context.size*LDT_ENTRY_SIZE;
+	if (size > bytecount)
+		size = bytecount;
+
+	err = 0;
+	if (copy_to_user(ptr, mm->context.ldt, size))
+		err = -EFAULT;
+	up(&mm->context.sem);
+	if (err < 0)
+		return err;
+	if (size != bytecount) {
+		/* zero-fill the rest */
+		clear_user(ptr+size, bytecount-size);
+	}
+	return bytecount;
+}
+
+static int read_default_ldt(void * ptr, unsigned long bytecount)
+{
+	int err;
+	unsigned long size;
+	void *address;
+
+	err = 0;
+	address = &default_ldt[0];
+	size = 5*sizeof(struct desc_struct);
+	if (size > bytecount)
+		size = bytecount;
+
+	err = size;
+	if (copy_to_user(ptr, address, size))
+		err = -EFAULT;
+
+	return err;
+}
+
+static int write_ldt(void * ptr, unsigned long bytecount, int oldmode)
+{
+	struct mm_struct * mm = current->mm;
+	__u32 entry_1, entry_2, *lp;
+	unsigned long mach_lp;
+	int error;
+	struct modify_ldt_ldt_s ldt_info;
+
+	error = -EINVAL;
+	if (bytecount != sizeof(ldt_info))
+		goto out;
+	error = -EFAULT; 	
+	if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
+		goto out;
+
+	error = -EINVAL;
+	if (ldt_info.entry_number >= LDT_ENTRIES)
+		goto out;
+	if (ldt_info.contents == 3) {
+		if (oldmode)
+			goto out;
+		if (ldt_info.seg_not_present == 0)
+			goto out;
+	}
+
+	down(&mm->context.sem);
+	if (ldt_info.entry_number >= mm->context.size) {
+		error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
+		if (error < 0)
+			goto out_unlock;
+	}
+
+	lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
+	mach_lp = arbitrary_virt_to_machine(lp);
+
+   	/* Allow LDTs to be cleared by the user. */
+   	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+		if (oldmode ||
+		    (ldt_info.contents == 0		&&
+		     ldt_info.read_exec_only == 1	&&
+		     ldt_info.seg_32bit == 0		&&
+		     ldt_info.limit_in_pages == 0	&&
+		     ldt_info.seg_not_present == 1	&&
+		     ldt_info.useable == 0 )) {
+			entry_1 = 0;
+			entry_2 = 0;
+			goto install;
+		}
+	}
+
+	entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
+		  (ldt_info.limit & 0x0ffff);
+	entry_2 = (ldt_info.base_addr & 0xff000000) |
+		  ((ldt_info.base_addr & 0x00ff0000) >> 16) |
+		  (ldt_info.limit & 0xf0000) |
+		  ((ldt_info.read_exec_only ^ 1) << 9) |
+		  (ldt_info.contents << 10) |
+		  ((ldt_info.seg_not_present ^ 1) << 15) |
+		  (ldt_info.seg_32bit << 22) |
+		  (ldt_info.limit_in_pages << 23) |
+		  0x7000;
+	if (!oldmode)
+		entry_2 |= (ldt_info.useable << 20);
+
+	/* Install the new entry ...  */
+install:
+	error = HYPERVISOR_update_descriptor(mach_lp, entry_1, entry_2);
+
+out_unlock:
+	up(&mm->context.sem);
+out:
+	return error;
+}
+
+asmlinkage int sys_modify_ldt(int func, void *ptr, unsigned long bytecount)
+{
+	int ret = -ENOSYS;
+
+	switch (func) {
+	case 0:
+		ret = read_ldt(ptr, bytecount);
+		break;
+	case 1:
+		ret = write_ldt(ptr, bytecount, 1);
+		break;
+	case 2:
+		ret = read_default_ldt(ptr, bytecount);
+		break;
+	case 0x11:
+		ret = write_ldt(ptr, bytecount, 0);
+		break;
+	}
+	return ret;
+}
diff --git a/linux-2.4-xen-sparse/arch/xen/kernel/pci-pc.c b/linux-2.4-xen-sparse/arch/xen/kernel/pci-pc.c
new file mode 100644
index 0000000000..17e027ff19
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/kernel/pci-pc.c
@@ -0,0 +1,260 @@
+/*
+ *	Low-Level PCI Support for PC
+ *
+ *	(c) 1999--2000 Martin Mares <mj@ucw.cz>
+ *
+ * Adjusted to use Xen's interface by Rolf Neugebauer, Intel Research Cambridge
+ * Further modifications by Keir Fraser, University of Cambridge
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+
+#include <asm/segment.h>
+#include <asm/io.h>
+
+#include <asm-xen/xen-public/xen.h>
+#include <asm-xen/xen-public/physdev.h>
+
+#include "pci-i386.h"
+
+/*
+ * NB. The following interface functions are not included here:
+ *  1. void eisa_set_level_irq(unsigned int irq)
+ *  2. irq_routing_table * __devinit pcibios_get_irq_routing_table(void)
+ *  3. int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq)
+ * All are used by the ACPI driver. This should be ported to Xen if it is
+ * ever required -- Xen is the ultimate source for IRQ-routing knowledge.
+ */
+
+struct pci_ops *pci_root_ops = NULL;
+
+int (*pci_config_read)(int seg, int bus, int dev, int fn, 
+                       int reg, int len, u32 *value) = NULL;
+int (*pci_config_write)(int seg, int bus, int dev, int fn,
+                        int reg, int len, u32 value) = NULL;
+
+unsigned int pci_probe = PCI_PROBE_BIOS;
+
+struct pci_fixup pcibios_fixups[] = { { 0 } };
+
+static int pci_confx_read(int seg, int bus, int dev, int fn, int reg, 
+                          int len, u32 *value)
+{
+    int ret;
+    physdev_op_t op;
+
+    if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
+        return -EINVAL;
+
+    op.cmd = PHYSDEVOP_PCI_CFGREG_READ;
+    op.u.pci_cfgreg_read.bus  = bus;
+    op.u.pci_cfgreg_read.dev  = dev;
+    op.u.pci_cfgreg_read.func = fn;
+    op.u.pci_cfgreg_read.reg  = reg;
+    op.u.pci_cfgreg_read.len  = len;
+
+    if ( (ret = HYPERVISOR_physdev_op(&op)) != 0 )
+        return ret;
+
+    *value = op.u.pci_cfgreg_read.value;
+
+    return 0;
+}
+
+static int pci_confx_write(int seg, int bus, int dev, int fn, int reg, 
+                           int len, u32 value)
+{
+    int ret;
+    physdev_op_t op;
+
+    if ((bus > 255 || dev > 31 || fn > 7 || reg > 255)) 
+        return -EINVAL;
+
+    op.cmd = PHYSDEVOP_PCI_CFGREG_WRITE;
+    op.u.pci_cfgreg_write.bus   = bus;
+    op.u.pci_cfgreg_write.dev   = dev;
+    op.u.pci_cfgreg_write.func  = fn;
+    op.u.pci_cfgreg_write.reg   = reg;
+    op.u.pci_cfgreg_write.len   = len;
+    op.u.pci_cfgreg_write.value = value;
+
+    if ( (ret = HYPERVISOR_physdev_op(&op)) != 0 )
+        return ret;
+    return 0;
+}
+
+
+static int pci_confx_read_config_byte(struct pci_dev *dev, 
+                                      int where, u8 *value)
+{
+    int result; 
+    u32 data;
+
+    result = pci_confx_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+                            PCI_FUNC(dev->devfn), where, 1, &data);
+
+    *value = (u8)data;
+
+    return result;
+}
+
+static int pci_confx_read_config_word(struct pci_dev *dev, 
+                                      int where, u16 *value)
+{
+    int result; 
+    u32 data;
+
+    result = pci_confx_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+                            PCI_FUNC(dev->devfn), where, 2, &data);
+
+    *value = (u16)data;
+
+    return result;
+}
+
+static int pci_confx_read_config_dword(struct pci_dev *dev, 
+                                       int where, u32 *value)
+{
+    return pci_confx_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+                          PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static int pci_confx_write_config_byte(struct pci_dev *dev, 
+                                       int where, u8 value)
+{
+    return pci_confx_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+                           PCI_FUNC(dev->devfn), where, 1, value);
+}
+
+static int pci_confx_write_config_word(struct pci_dev *dev, 
+                                       int where, u16 value)
+{
+    return pci_confx_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+                           PCI_FUNC(dev->devfn), where, 2, value);
+}
+
+static int pci_confx_write_config_dword(struct pci_dev *dev, 
+                                        int where, u32 value)
+{
+    return pci_confx_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+                           PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static struct pci_ops pci_conf_xen = {
+    pci_confx_read_config_byte,
+    pci_confx_read_config_word,
+    pci_confx_read_config_dword,
+    pci_confx_write_config_byte,
+    pci_confx_write_config_word,
+    pci_confx_write_config_dword
+};
+
+void pcibios_penalize_isa_irq(int irq)
+{ 
+    /* nothing */
+}
+
+void __devinit pcibios_fixup_bus(struct pci_bus *b)
+{
+    pci_read_bridge_bases(b);
+}
+
+struct pci_bus * __devinit pcibios_scan_root(int busnum)
+{
+    struct list_head *list;
+    struct pci_bus *bus;
+
+    list_for_each ( list, &pci_root_buses )
+    {
+        bus = pci_bus_b(list);
+        if ( bus->number == busnum )
+            return bus;
+    }
+
+    printk("PCI: Probing PCI hardware (bus %02x)\n", busnum);
+    return pci_scan_bus(busnum, pci_root_ops, NULL);
+}
+
+void __init pcibios_init(void)
+{
+    int bus;
+    physdev_op_t op;
+
+    if ( !pci_probe )
+        return;
+
+    pci_root_ops     = &pci_conf_xen;
+    pci_config_read  = pci_confx_read;
+    pci_config_write = pci_confx_write;
+
+    pcibios_set_cacheline_size();
+
+    op.cmd = PHYSDEVOP_PCI_PROBE_ROOT_BUSES;
+    if ( HYPERVISOR_physdev_op(&op) != 0 )
+    {
+        printk(KERN_WARNING "PCI: System does not support PCI\n");
+        return;
+    }
+
+    printk(KERN_INFO "PCI: Probing PCI hardware\n");
+    for ( bus = 0; bus < 256; bus++ )
+        if ( test_bit(bus, &op.u.pci_probe_root_buses.busmask[0]) )
+            (void)pcibios_scan_root(bus);
+
+    pcibios_resource_survey();
+}
+
+char * __devinit pcibios_setup(char *str)
+{
+    if ( !strcmp(str, "off") )
+        pci_probe = 0;
+    return NULL;
+}
+
+unsigned int pcibios_assign_all_busses(void)
+{
+    return 0;
+}
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+    int err;
+    u8  pin;
+    physdev_op_t op;
+
+    /* Inform Xen that we are going to use this device. */
+    op.cmd = PHYSDEVOP_PCI_INITIALISE_DEVICE;
+    op.u.pci_initialise_device.bus  = dev->bus->number;
+    op.u.pci_initialise_device.dev  = PCI_SLOT(dev->devfn);
+    op.u.pci_initialise_device.func = PCI_FUNC(dev->devfn);
+    if ( (err = HYPERVISOR_physdev_op(&op)) != 0 )
+        return err;
+
+    /* Now we can bind to the very final IRQ line. */
+    pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &pin);
+    dev->irq = pin;
+
+    /* Turn on device I/O and memory access as necessary. */
+    if ( (err = pcibios_enable_resources(dev, mask)) < 0 )
+        return err;
+
+    /* Sanity-check that an interrupt-producing device is routed to an IRQ. */
+    pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+    if ( pin != 0 )
+    {
+        if ( dev->irq != 0 )
+            printk(KERN_INFO "PCI: Obtained IRQ %d for device %s\n",
+                   dev->irq, dev->slot_name);
+        else
+            printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of "
+                   "device %s.\n", 'A' + pin - 1, dev->slot_name);
+    }
+
+    return 0;
+}
diff --git a/linux-2.4-xen-sparse/arch/xen/kernel/process.c b/linux-2.4-xen-sparse/arch/xen/kernel/process.c
new file mode 100644
index 0000000000..c9d553627f
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/kernel/process.c
@@ -0,0 +1,448 @@
+/*
+ *  linux/arch/i386/kernel/process.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ *	Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+/*
+ * This file handles the architecture-dependent parts of process handling..
+ */
+
+#define __KERNEL_SYSCALLS__
+#include <stdarg.h>
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/interrupt.h>
+#include <linux/config.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/mc146818rtc.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/ldt.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/desc.h>
+#include <asm/mmu_context.h>
+#include <asm-xen/xen-public/physdev.h>
+
+#include <linux/irq.h>
+
+asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+
+int hlt_counter;
+
+/*
+ * Powermanagement idle function, if any..
+ */
+void (*pm_idle)(void);
+
+/*
+ * Power off function, if any
+ */
+void (*pm_power_off)(void);
+
+void disable_hlt(void)
+{
+    hlt_counter++;
+}
+
+void enable_hlt(void)
+{
+    hlt_counter--;
+}
+
+/*
+ * The idle thread. There's no useful work to be
+ * done, so just try to conserve power and have a
+ * low exit latency (ie sit in a loop waiting for
+ * somebody to say that they'd like to reschedule)
+ */
+void cpu_idle (void)
+{
+    extern int set_timeout_timer(void);
+
+    /* Endless idle loop with no priority at all. */
+    init_idle();
+    current->nice = 20;
+    current->counter = -100;
+
+    for ( ; ; )
+    {
+        while ( !current->need_resched )
+        {
+            __cli();
+            if ( current->need_resched )
+            {
+                /* The race-free check for events failed. */
+                __sti();
+                break;
+            }
+            else if ( set_timeout_timer() == 0 )
+            {
+                /* NB. Blocking reenable events in a race-free manner. */
+                HYPERVISOR_block();
+            }
+            else
+            {
+                /* No race here: yielding will get us the CPU again anyway. */
+                __sti();
+                HYPERVISOR_yield();
+            }
+        }
+        schedule();
+        check_pgt_cache();
+    }
+}
+
+extern void show_trace(unsigned long* esp);
+
+void show_regs(struct pt_regs * regs)
+{
+    printk("\n");
+    printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
+    printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs,regs->eip, smp_processor_id());
+    if (regs->xcs & 2)
+        printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
+    printk(" EFLAGS: %08lx    %s\n",regs->eflags, print_tainted());
+    printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
+           regs->eax,regs->ebx,regs->ecx,regs->edx);
+    printk("ESI: %08lx EDI: %08lx EBP: %08lx",
+           regs->esi, regs->edi, regs->ebp);
+    printk(" DS: %04x ES: %04x\n",
+           0xffff & regs->xds,0xffff & regs->xes);
+
+    show_trace(&regs->esp);
+}
+
+
+/*
+ * Create a kernel thread
+ */
+int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
+{
+    long retval, d0;
+
+    __asm__ __volatile__(
+        "movl %%esp,%%esi\n\t"
+        "int $0x80\n\t"		/* Linux/i386 system call */
+        "cmpl %%esp,%%esi\n\t"	/* child or parent? */
+        "je 1f\n\t"		/* parent - jump */
+        /* Load the argument into eax, and push it.  That way, it does
+         * not matter whether the called function is compiled with
+         * -mregparm or not.  */
+        "movl %4,%%eax\n\t"
+        "pushl %%eax\n\t"		
+        "call *%5\n\t"		/* call fn */
+        "movl %3,%0\n\t"	/* exit */
+        "int $0x80\n"
+        "1:\t"
+        :"=&a" (retval), "=&S" (d0)
+        :"0" (__NR_clone), "i" (__NR_exit),
+        "r" (arg), "r" (fn),
+        "b" (flags | CLONE_VM)
+        : "memory");
+
+    return retval;
+}
+
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+    /* nothing to do ... */
+}
+
+void flush_thread(void)
+{
+    struct task_struct *tsk = current;
+
+    memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
+
+    /*
+     * Forget coprocessor state..
+     */
+    clear_fpu(tsk);
+    tsk->used_math = 0;
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+    if (dead_task->mm) {
+        // temporary debugging check
+        if (dead_task->mm->context.size) {
+            printk("WARNING: dead process %8s still has LDT? <%p/%08x>\n",
+                   dead_task->comm, 
+		   dead_task->mm->context.ldt,
+		   dead_task->mm->context.size);
+            BUG();
+        }
+    }
+    //release_x86_irqs(dead_task);
+}
+
+
+/*
+ * Save a segment.
+ */
+#define savesegment(seg,value) \
+	asm volatile("movl %%" #seg ",%0":"=m" (*(int *)&(value)))
+
+int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
+                unsigned long unused,
+                struct task_struct * p, struct pt_regs * regs)
+{
+    struct pt_regs * childregs;
+
+    childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1;
+    struct_cpy(childregs, regs);
+    childregs->eax = 0;
+    childregs->esp = esp;
+
+    p->thread.esp = (unsigned long) childregs;
+    p->thread.esp0 = (unsigned long) (childregs+1);
+
+    p->thread.eip = (unsigned long) ret_from_fork;
+
+    savesegment(fs,p->thread.fs);
+    savesegment(gs,p->thread.gs);
+
+    unlazy_fpu(current);
+    struct_cpy(&p->thread.i387, &current->thread.i387);
+
+    p->thread.io_pl = current->thread.io_pl;
+
+    return 0;
+}
+
+/*
+ * fill in the user structure for a core dump..
+ */
+void dump_thread(struct pt_regs * regs, struct user * dump)
+{
+    int i;
+
+/* changed the size calculations - should hopefully work better. lbt */
+    dump->magic = CMAGIC;
+    dump->start_code = 0;
+    dump->start_stack = regs->esp & ~(PAGE_SIZE - 1);
+    dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
+    dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
+    dump->u_dsize -= dump->u_tsize;
+    dump->u_ssize = 0;
+    for (i = 0; i < 8; i++)
+        dump->u_debugreg[i] = current->thread.debugreg[i];  
+
+    if (dump->start_stack < TASK_SIZE)
+        dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
+
+    dump->regs.ebx = regs->ebx;
+    dump->regs.ecx = regs->ecx;
+    dump->regs.edx = regs->edx;
+    dump->regs.esi = regs->esi;
+    dump->regs.edi = regs->edi;
+    dump->regs.ebp = regs->ebp;
+    dump->regs.eax = regs->eax;
+    dump->regs.ds = regs->xds;
+    dump->regs.es = regs->xes;
+    savesegment(fs,dump->regs.fs);
+    savesegment(gs,dump->regs.gs);
+    dump->regs.orig_eax = regs->orig_eax;
+    dump->regs.eip = regs->eip;
+    dump->regs.cs = regs->xcs;
+    dump->regs.eflags = regs->eflags;
+    dump->regs.esp = regs->esp;
+    dump->regs.ss = regs->xss;
+
+    dump->u_fpvalid = dump_fpu (regs, &dump->i387);
+}
+
+/*
+ *	switch_to(x,yn) should switch tasks from x to y.
+ *
+ * We fsave/fwait so that an exception goes off at the right time
+ * (as a call from the fsave or fwait in effect) rather than to
+ * the wrong process. Lazy FP saving no longer makes any sense
+ * with modern CPU's, and this simplifies a lot of things (SMP
+ * and UP become the same).
+ *
+ * NOTE! We used to use the x86 hardware context switching. The
+ * reason for not using it any more becomes apparent when you
+ * try to recover gracefully from saved state that is no longer
+ * valid (stale segment register values in particular). With the
+ * hardware task-switch, there is no way to fix up bad state in
+ * a reasonable manner.
+ *
+ * The fact that Intel documents the hardware task-switching to
+ * be slow is a fairly red herring - this code is not noticeably
+ * faster. However, there _is_ some room for improvement here,
+ * so the performance issues may eventually be a valid point.
+ * More important, however, is the fact that this allows us much
+ * more flexibility.
+ */
+void fastcall __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+{
+    struct thread_struct *next = &next_p->thread;
+    physdev_op_t op;
+    multicall_entry_t _mcl[8], *mcl = _mcl;
+
+    /*
+     * This is basically 'unlazy_fpu', except that we queue a multicall to 
+     * indicate FPU task switch, rather than synchronously trapping to Xen.
+     */
+    if ( prev_p->flags & PF_USEDFPU )
+    {
+	if ( cpu_has_fxsr )
+            asm volatile( "fxsave %0 ; fnclex"
+                          : "=m" (prev_p->thread.i387.fxsave) );
+	else
+            asm volatile( "fnsave %0 ; fwait"
+                          : "=m" (prev_p->thread.i387.fsave) );
+	prev_p->flags &= ~PF_USEDFPU;
+        mcl->op      = __HYPERVISOR_fpu_taskswitch;
+        mcl->args[0] = 1;
+        mcl++;
+    }
+
+    mcl->op      = __HYPERVISOR_stack_switch;
+    mcl->args[0] = __KERNEL_DS;
+    mcl->args[1] = next->esp0;
+    mcl++;
+
+    if ( prev_p->thread.io_pl != next->io_pl ) 
+    {
+        op.cmd             = PHYSDEVOP_SET_IOPL;
+	op.u.set_iopl.iopl = next->io_pl;
+        mcl->op      = __HYPERVISOR_physdev_op;
+        mcl->args[0] = (unsigned long)&op;
+        mcl++;
+    }
+
+    (void)HYPERVISOR_multicall(_mcl, mcl - _mcl);
+
+    /*
+     * Restore %fs and %gs.
+     */
+    loadsegment(fs, next->fs);
+    loadsegment(gs, next->gs);
+
+    /*
+     * Now maybe reload the debug registers
+     */
+    if ( next->debugreg[7] != 0 )
+    {
+        HYPERVISOR_set_debugreg(0, next->debugreg[0]);
+        HYPERVISOR_set_debugreg(1, next->debugreg[1]);
+        HYPERVISOR_set_debugreg(2, next->debugreg[2]);
+        HYPERVISOR_set_debugreg(3, next->debugreg[3]);
+        /* no 4 and 5 */
+        HYPERVISOR_set_debugreg(6, next->debugreg[6]);
+        HYPERVISOR_set_debugreg(7, next->debugreg[7]);
+    }
+}
+
+asmlinkage int sys_fork(struct pt_regs regs)
+{
+    return do_fork(SIGCHLD, regs.esp, &regs, 0);
+}
+
+asmlinkage int sys_clone(struct pt_regs regs)
+{
+    unsigned long clone_flags;
+    unsigned long newsp;
+
+    clone_flags = regs.ebx;
+    newsp = regs.ecx;
+    if (!newsp)
+        newsp = regs.esp;
+    return do_fork(clone_flags, newsp, &regs, 0);
+}
+
+/*
+ * This is trivial, and on the face of it looks like it
+ * could equally well be done in user mode.
+ *
+ * Not so, for quite unobvious reasons - register pressure.
+ * In user mode vfork() cannot have a stack frame, and if
+ * done by calling the "clone()" system call directly, you
+ * do not have enough call-clobbered registers to hold all
+ * the information you need.
+ */
+asmlinkage int sys_vfork(struct pt_regs regs)
+{
+    return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, &regs, 0);
+}
+
+/*
+ * sys_execve() executes a new program.
+ */
+asmlinkage int sys_execve(struct pt_regs regs)
+{
+    int error;
+    char * filename;
+
+    filename = getname((char *) regs.ebx);
+    error = PTR_ERR(filename);
+    if (IS_ERR(filename))
+        goto out;
+    error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, &regs);
+    if (error == 0)
+        current->ptrace &= ~PT_DTRACE;
+    putname(filename);
+ out:
+    return error;
+}
+
+/*
+ * These bracket the sleeping functions..
+ */
+extern void scheduling_functions_start_here(void);
+extern void scheduling_functions_end_here(void);
+#define first_sched	((unsigned long) scheduling_functions_start_here)
+#define last_sched	((unsigned long) scheduling_functions_end_here)
+
+unsigned long get_wchan(struct task_struct *p)
+{
+    unsigned long ebp, esp, eip;
+    unsigned long stack_page;
+    int count = 0;
+    if (!p || p == current || p->state == TASK_RUNNING)
+        return 0;
+    stack_page = (unsigned long)p;
+    esp = p->thread.esp;
+    if (!stack_page || esp < stack_page || esp > 8188+stack_page)
+        return 0;
+    /* include/asm-i386/system.h:switch_to() pushes ebp last. */
+    ebp = *(unsigned long *) esp;
+    do {
+        if (ebp < stack_page || ebp > 8184+stack_page)
+            return 0;
+        eip = *(unsigned long *) (ebp+4);
+        if (eip < first_sched || eip >= last_sched)
+            return eip;
+        ebp = *(unsigned long *) ebp;
+    } while (count++ < 16);
+    return 0;
+}
+#undef last_sched
+#undef first_sched
diff --git a/linux-2.4-xen-sparse/arch/xen/kernel/setup.c b/linux-2.4-xen-sparse/arch/xen/kernel/setup.c
new file mode 100644
index 0000000000..230e0bb66a
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/kernel/setup.c
@@ -0,0 +1,1213 @@
+/*
+ *  linux/arch/i386/kernel/setup.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ */
+
+/*
+ * This file handles the architecture-dependent parts of initialization
+ */
+
+#define __KERNEL_SYSCALLS__
+static int errno;
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/tty.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/apm_bios.h>
+#ifdef CONFIG_BLK_DEV_RAM
+#include <linux/blk.h>
+#endif
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
+#include <linux/seq_file.h>
+#include <linux/reboot.h>
+#include <asm/processor.h>
+#include <linux/console.h>
+#include <linux/module.h>
+#include <asm/mtrr.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/msr.h>
+#include <asm/desc.h>
+#include <asm/dma.h>
+#include <asm/mpspec.h>
+#include <asm/mmu_context.h>
+#include <asm/ctrl_if.h>
+#include <asm/hypervisor.h>
+#include <asm-xen/xen-public/physdev.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/tqueue.h>
+#include <net/pkt_sched.h> /* dev_(de)activate */
+
+/*
+ * Point at the empty zero page to start with. We map the real shared_info
+ * page as soon as fixmap is up and running.
+ */
+shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
+
+unsigned int *phys_to_machine_mapping, *pfn_to_mfn_frame_list;
+
+/*
+ * Machine setup..
+ */
+
+char ignore_irq13;		/* set if exception 16 works */
+struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
+
+unsigned long mmu_cr4_features;
+
+unsigned char * vgacon_mmap;
+
+/*
+ * Bus types ..
+ */
+#ifdef CONFIG_EISA
+int EISA_bus;
+#endif
+int MCA_bus;
+
+/* for MCA, but anyone else can use it if they want */
+unsigned int machine_id;
+unsigned int machine_submodel_id;
+unsigned int BIOS_revision;
+unsigned int mca_pentium_flag;
+
+/* For PCI or other memory-mapped resources */
+unsigned long pci_mem_start = 0x10000000;
+
+/*
+ * Setup options
+ */
+struct drive_info_struct { char dummy[32]; } drive_info;
+struct screen_info screen_info;
+struct apm_info apm_info;
+struct sys_desc_table_struct {
+    unsigned short length;
+    unsigned char table[0];
+};
+
+unsigned char aux_device_present;
+
+extern int root_mountflags;
+extern char _text, _etext, _edata, _end;
+
+extern int blk_nohighio;
+
+int enable_acpi_smp_table;
+
+/* Raw start-of-day parameters from the hypervisor. */
+union xen_start_info_union xen_start_info_union;
+
+#define COMMAND_LINE_SIZE MAX_GUEST_CMDLINE
+static char command_line[COMMAND_LINE_SIZE];
+char saved_command_line[COMMAND_LINE_SIZE];
+
+/* parse_mem_cmdline()
+ * returns the value of the mem= boot param converted to pages or 0
+ */ 
+static int __init parse_mem_cmdline (char ** cmdline_p)
+{
+    char c = ' ', *to = command_line, *from = saved_command_line;
+    int len = 0;
+    unsigned long long bytes;
+    int mem_param = 0;
+
+    /* Save unparsed command line copy for /proc/cmdline */
+    memcpy(saved_command_line, xen_start_info.cmd_line, COMMAND_LINE_SIZE);
+    saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
+
+    for (;;) {
+        /*
+         * "mem=nopentium" disables the 4MB page tables.
+         * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
+         * to <mem>, overriding the bios size.
+         * "mem=XXX[KkmM]@XXX[KkmM]" defines a memory region from
+         * <start> to <start>+<mem>, overriding the bios size.
+         */
+        if (c == ' ' && !memcmp(from, "mem=", 4)) {
+            if (to != command_line)
+                to--;
+            if (!memcmp(from+4, "nopentium", 9)) {
+                from += 9+4;
+            } else if (!memcmp(from+4, "exactmap", 8)) {
+                from += 8+4;
+            } else {
+                bytes = memparse(from+4, &from);
+                mem_param = bytes>>PAGE_SHIFT;
+		if (*from == '@')
+                    (void)memparse(from+1, &from);
+            }
+        }
+
+        c = *(from++);
+        if (!c)
+            break;
+        if (COMMAND_LINE_SIZE <= ++len)
+            break;
+        *(to++) = c;
+    }
+    *to = '\0';
+    *cmdline_p = command_line;
+
+    return mem_param;
+}
+
+/*
+ * Every exception-fixup table is sorted (i.e., kernel main table, and every
+ * module table. Some elements may be out of order if they reference text.init,
+ * for example. 
+ */
+static void sort_exception_table(struct exception_table_entry *start,
+                                 struct exception_table_entry *end)
+{
+    struct exception_table_entry *p, *q, tmp;
+
+    for ( p = start; p < end; p++ )
+    {
+        for ( q = p-1; q > start; q-- )
+            if ( p->insn > q->insn )
+                break;
+        if ( ++q != p )
+        {
+            tmp = *p;
+            memmove(q+1, q, (p-q)*sizeof(*p));
+            *q = tmp;
+        }
+    }
+}
+
+int xen_module_init(struct module *mod)
+{
+    sort_exception_table(mod->ex_table_start, mod->ex_table_end);
+    return 0;
+}
+
+void __init setup_arch(char **cmdline_p)
+{
+    int i,j;
+    unsigned long bootmap_size, start_pfn, lmax_low_pfn;
+    int mem_param;  /* user specified memory size in pages */
+    int boot_pfn;   /* low pages available for bootmem */
+    physdev_op_t op;
+
+    extern void hypervisor_callback(void);
+    extern void failsafe_callback(void);
+
+    extern unsigned long cpu0_pte_quicklist[];
+    extern unsigned long cpu0_pgd_quicklist[];
+
+    extern const struct exception_table_entry __start___ex_table[];
+    extern const struct exception_table_entry __stop___ex_table[];
+
+    extern char _stext;
+
+    /* Force a quick death if the kernel panics. */
+    extern int panic_timeout;
+    if ( panic_timeout == 0 )
+        panic_timeout = 1;
+
+    /* Ensure that the kernel exception-fixup table is sorted. */
+    sort_exception_table(__start___ex_table, __stop___ex_table);
+
+#ifndef CONFIG_HIGHIO
+    blk_nohighio = 1;
+#endif
+
+    HYPERVISOR_vm_assist(
+        VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
+    HYPERVISOR_vm_assist(
+        VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
+        
+    HYPERVISOR_set_callbacks(
+        __KERNEL_CS, (unsigned long)hypervisor_callback,
+        __KERNEL_CS, (unsigned long)failsafe_callback);
+
+    boot_cpu_data.pgd_quick = cpu0_pgd_quicklist;
+    boot_cpu_data.pte_quick = cpu0_pte_quicklist;
+
+    /* This must be initialized to UNNAMED_MAJOR for ipconfig to work
+       properly.  Setting ROOT_DEV to default to /dev/ram0 breaks initrd. */
+    ROOT_DEV = MKDEV(UNNAMED_MAJOR,0);
+    memset(&drive_info, 0, sizeof(drive_info));
+    memset(&screen_info, 0, sizeof(screen_info));
+    
+    /* This is drawn from a dump from vgacon:startup in standard Linux. */
+    screen_info.orig_video_mode = 3; 
+    screen_info.orig_video_isVGA = 1;
+    screen_info.orig_video_lines = 25;
+    screen_info.orig_video_cols = 80;
+    screen_info.orig_video_ega_bx = 3;
+    screen_info.orig_video_points = 16;
+
+    memset(&apm_info.bios, 0, sizeof(apm_info.bios));
+    aux_device_present = 0; 
+#ifdef CONFIG_BLK_DEV_RAM
+    rd_image_start = 0;
+    rd_prompt = 0;
+    rd_doload = 0;
+#endif
+
+    root_mountflags &= ~MS_RDONLY;
+    init_mm.start_code = (unsigned long) &_text;
+    init_mm.end_code = (unsigned long) &_etext;
+    init_mm.end_data = (unsigned long) &_edata;
+    init_mm.brk = (unsigned long) &_end;
+
+    /* The mem= kernel command line param overrides the detected amount
+     * of memory.   For xenolinux, if this override is larger than detected
+     * memory, then boot using only detected memory and make provisions to
+     * use all of the override value.   The hypervisor can give this
+     * domain more memory later on and it will be added to the free
+     * lists at that time.   See claim_new_pages() in
+     * arch/xen/drivers/balloon/balloon.c
+     */
+    mem_param = parse_mem_cmdline(cmdline_p);
+    if (mem_param < xen_start_info.nr_pages)
+        mem_param = xen_start_info.nr_pages;
+
+#define PFN_UP(x)	(((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+#define PFN_DOWN(x)	((x) >> PAGE_SHIFT)
+#define PFN_PHYS(x)	((x) << PAGE_SHIFT)
+
+/*
+ * 128MB for vmalloc(), iomap(), kmap(), and fixaddr mappings.
+ */
+#define VMALLOC_RESERVE	(unsigned long)(128 << 20)
+#define MAXMEM		(unsigned long)(HYPERVISOR_VIRT_START-PAGE_OFFSET-VMALLOC_RESERVE)
+#define MAXMEM_PFN	PFN_DOWN(MAXMEM)
+#define MAX_NONPAE_PFN	(1 << 20)
+
+    /*
+     * Determine low and high memory ranges:
+     */
+    lmax_low_pfn = max_pfn = mem_param;
+    if (lmax_low_pfn > MAXMEM_PFN) {
+        lmax_low_pfn = MAXMEM_PFN;
+#ifndef CONFIG_HIGHMEM
+        /* Maximum memory usable is what is directly addressable */
+        printk(KERN_WARNING "Warning only %ldMB will be used.\n",
+               MAXMEM>>20);
+        if (max_pfn > MAX_NONPAE_PFN)
+            printk(KERN_WARNING "Use a PAE enabled kernel.\n");
+        else
+            printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
+        max_pfn = lmax_low_pfn;
+#else /* !CONFIG_HIGHMEM */
+#ifndef CONFIG_X86_PAE
+        if (max_pfn > MAX_NONPAE_PFN) {
+            max_pfn = MAX_NONPAE_PFN;
+            printk(KERN_WARNING "Warning only 4GB will be used.\n");
+            printk(KERN_WARNING "Use a PAE enabled kernel.\n");
+        }
+#endif /* !CONFIG_X86_PAE */
+#endif /* !CONFIG_HIGHMEM */
+    }
+
+#ifdef CONFIG_HIGHMEM
+    highstart_pfn = highend_pfn = max_pfn;
+    if (max_pfn > MAXMEM_PFN) {
+        highstart_pfn = MAXMEM_PFN;
+        printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
+               pages_to_mb(highend_pfn - highstart_pfn));
+    }
+#endif
+
+    phys_to_machine_mapping = (unsigned int *)xen_start_info.mfn_list;
+    cur_pgd = init_mm.pgd = (pgd_t *)xen_start_info.pt_base;
+
+    start_pfn = (__pa(xen_start_info.pt_base) >> PAGE_SHIFT) + 
+        xen_start_info.nr_pt_frames;
+
+    /*
+     * Initialize the boot-time allocator, and free up all RAM. Then reserve 
+     * space for OS image, initrd, phys->machine table, bootstrap page table,
+     * and the bootmem bitmap. 
+     * NB. There is definitely enough room for the bootmem bitmap in the
+     * bootstrap page table. We are guaranteed to get >=512kB unused 'padding'
+     * for our own use after all bootstrap elements 
+     * (see asm-xen/xen-public/xen.h).
+     */
+    boot_pfn = min((int)xen_start_info.nr_pages,lmax_low_pfn);
+    bootmap_size = init_bootmem(start_pfn,boot_pfn);
+    free_bootmem(0, PFN_PHYS(boot_pfn));
+    reserve_bootmem(__pa(&_stext), 
+                    PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1 - 
+                    __pa(&_stext));
+
+    /* init_bootmem() set the global max_low_pfn to boot_pfn.  Now max_low_pfn 
+     * can be set to the override value.
+     */
+    max_low_pfn = lmax_low_pfn;
+
+#ifdef CONFIG_BLK_DEV_INITRD
+    if ( xen_start_info.mod_start != 0 )
+    {
+        if ( (__pa(xen_start_info.mod_start) + xen_start_info.mod_len) <= 
+             (max_low_pfn << PAGE_SHIFT) )
+        {
+            initrd_start = xen_start_info.mod_start;
+            initrd_end   = initrd_start + xen_start_info.mod_len;
+            initrd_below_start_ok = 1;
+        }
+        else
+        {
+            printk(KERN_ERR "initrd extends beyond end of memory "
+                   "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+                   __pa(xen_start_info.mod_start) + xen_start_info.mod_len,
+                   max_low_pfn << PAGE_SHIFT);
+            initrd_start = 0;
+        }
+    }
+#endif
+
+    paging_init();
+
+    /* Make sure we have a correctly sized P->M table. */
+    if ( max_pfn != xen_start_info.nr_pages )
+    {
+        phys_to_machine_mapping = alloc_bootmem_low_pages(
+            max_pfn * sizeof(unsigned long));
+        if ( max_pfn > xen_start_info.nr_pages )
+        {
+            memset(phys_to_machine_mapping, ~0,
+                   max_pfn * sizeof(unsigned long));
+            memcpy(phys_to_machine_mapping,
+                   (unsigned long *)xen_start_info.mfn_list,
+                   xen_start_info.nr_pages * sizeof(unsigned long));
+        }
+        else
+        {
+            memcpy(phys_to_machine_mapping,
+                   (unsigned long *)xen_start_info.mfn_list,
+                   max_pfn * sizeof(unsigned long));
+            if (HYPERVISOR_dom_mem_op(
+                MEMOP_decrease_reservation,
+                (unsigned long *)xen_start_info.mfn_list + max_pfn,
+                xen_start_info.nr_pages - max_pfn, 0) !=
+                (xen_start_info.nr_pages - max_pfn))
+                BUG();
+        }
+        free_bootmem(__pa(xen_start_info.mfn_list), 
+                     PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
+                                     sizeof(unsigned long))));
+    }
+
+    pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE);
+    for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
+    {	
+        pfn_to_mfn_frame_list[j] = 
+            virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT;
+    }
+    HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list =
+	virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT;
+
+    op.cmd             = PHYSDEVOP_SET_IOPL;
+    op.u.set_iopl.iopl = current->thread.io_pl = 1;
+    HYPERVISOR_physdev_op(&op);
+
+    if (xen_start_info.flags & SIF_INITDOMAIN )
+    {
+        if( !(xen_start_info.flags & SIF_PRIVILEGED) )
+            panic("Xen granted us console access but not privileged status");
+
+#if defined(CONFIG_VT)
+#if defined(CONFIG_VGA_CONSOLE)
+        conswitchp = &vga_con;
+#elif defined(CONFIG_DUMMY_CONSOLE)
+        conswitchp = &dummy_con;
+#endif
+#endif
+    }
+}
+
+static int cachesize_override __initdata = -1;
+static int __init cachesize_setup(char *str)
+{
+    get_option (&str, &cachesize_override);
+    return 1;
+}
+__setup("cachesize=", cachesize_setup);
+
+static int __init highio_setup(char *str)
+{
+    printk("i386: disabling HIGHMEM block I/O\n");
+    blk_nohighio = 1;
+    return 1;
+}
+__setup("nohighio", highio_setup);
+
+static int __init get_model_name(struct cpuinfo_x86 *c)
+{
+    unsigned int *v;
+    char *p, *q;
+
+    if (cpuid_eax(0x80000000) < 0x80000004)
+        return 0;
+
+    v = (unsigned int *) c->x86_model_id;
+    cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
+    cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
+    cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
+    c->x86_model_id[48] = 0;
+
+    /* Intel chips right-justify this string for some dumb reason;
+       undo that brain damage */
+    p = q = &c->x86_model_id[0];
+    while ( *p == ' ' )
+        p++;
+    if ( p != q ) {
+        while ( *p )
+            *q++ = *p++;
+        while ( q <= &c->x86_model_id[48] )
+            *q++ = '\0';	/* Zero-pad the rest */
+    }
+
+    return 1;
+}
+
+
+static void __init display_cacheinfo(struct cpuinfo_x86 *c)
+{
+    unsigned int n, dummy, ecx, edx, l2size;
+
+    n = cpuid_eax(0x80000000);
+
+    if (n >= 0x80000005) {
+        cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
+        printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
+               edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
+        c->x86_cache_size=(ecx>>24)+(edx>>24);	
+    }
+
+    if (n < 0x80000006)	/* Some chips just has a large L1. */
+        return;
+
+    ecx = cpuid_ecx(0x80000006);
+    l2size = ecx >> 16;
+
+    /* AMD errata T13 (order #21922) */
+    if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
+        if (c->x86_model == 3 && c->x86_mask == 0)	/* Duron Rev A0 */
+            l2size = 64;
+        if (c->x86_model == 4 &&
+            (c->x86_mask==0 || c->x86_mask==1))	/* Tbird rev A1/A2 */
+            l2size = 256;
+    }
+
+    /* Intel PIII Tualatin. This comes in two flavours.
+     * One has 256kb of cache, the other 512. We have no way
+     * to determine which, so we use a boottime override
+     * for the 512kb model, and assume 256 otherwise.
+     */
+    if ((c->x86_vendor == X86_VENDOR_INTEL) && (c->x86 == 6) &&
+        (c->x86_model == 11) && (l2size == 0))
+        l2size = 256;
+
+    if (c->x86_vendor == X86_VENDOR_CENTAUR) {
+	/* VIA C3 CPUs (670-68F) need further shifting. */
+	if ((c->x86 == 6) &&
+	    ((c->x86_model == 7) || (c->x86_model == 8))) {
+		l2size >>= 8;
+	}
+
+	/* VIA also screwed up Nehemiah stepping 1, and made
+	   it return '65KB' instead of '64KB'
+	   - Note, it seems this may only be in engineering samples. */
+	if ((c->x86==6) && (c->x86_model==9) &&
+	    (c->x86_mask==1) && (l2size==65))
+		l2size -= 1;
+    }
+
+    /* Allow user to override all this if necessary. */
+    if (cachesize_override != -1)
+        l2size = cachesize_override;
+
+    if ( l2size == 0 )
+        return;		/* Again, no L2 cache is possible */
+
+    c->x86_cache_size = l2size;
+
+    printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
+           l2size, ecx & 0xFF);
+}
+
+static void __init init_c3(struct cpuinfo_x86 *c)
+{
+    /* Test for Centaur Extended Feature Flags presence */
+    if (cpuid_eax(0xC0000000) >= 0xC0000001) {
+        /* store Centaur Extended Feature Flags as
+         * word 5 of the CPU capability bit array
+         */
+        c->x86_capability[5] = cpuid_edx(0xC0000001);
+    }
+   
+    switch (c->x86_model) {
+    case 9:	/* Nehemiah */
+    default:
+        get_model_name(c);
+        display_cacheinfo(c);
+        break;
+    }
+}
+
+static void __init init_centaur(struct cpuinfo_x86 *c)
+{
+    /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+       3DNow is IDd by bit 31 in extended CPUID (1*3231) anyway */
+    clear_bit(0*32+31, &c->x86_capability);
+  
+    switch (c->x86) {
+    case 6:
+        init_c3(c);
+        break;
+    default:
+        panic("Unsupported Centaur CPU (%i)\n", c->x86);
+    }
+}
+
+static int __init init_amd(struct cpuinfo_x86 *c)
+{
+    int r;
+
+    /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+       3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+    clear_bit(0*32+31, &c->x86_capability);
+	
+    r = get_model_name(c);
+
+    switch(c->x86)
+    {
+    case 5: /* We don't like AMD K6 */
+        panic("Unsupported AMD processor\n");
+    case 6:	/* An Athlon/Duron. We can trust the BIOS probably */
+        break;
+    }
+
+    display_cacheinfo(c);
+    return r;
+}
+
+
+static void __init init_intel(struct cpuinfo_x86 *c)
+{
+    char *p = NULL;
+    unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
+
+    if (c->cpuid_level > 1) {
+        /* supports eax=2  call */
+        int i, j, n;
+        int regs[4];
+        unsigned char *dp = (unsigned char *)regs;
+
+        /* Number of times to iterate */
+        n = cpuid_eax(2) & 0xFF;
+
+        for ( i = 0 ; i < n ; i++ ) {
+            cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
+			
+            /* If bit 31 is set, this is an unknown format */
+            for ( j = 0 ; j < 3 ; j++ ) {
+                if ( regs[j] < 0 ) regs[j] = 0;
+            }
+
+            /* Byte 0 is level count, not a descriptor */
+            for ( j = 1 ; j < 16 ; j++ ) {
+                unsigned char des = dp[j];
+                unsigned char dl, dh;
+                unsigned int cs;
+
+                dh = des >> 4;
+                dl = des & 0x0F;
+
+				/* Black magic... */
+
+                switch ( dh )
+                {
+                case 0:
+                    switch ( dl ) {
+                    case 6:
+                        /* L1 I cache */
+                        l1i += 8;
+                        break;
+                    case 8:
+                        /* L1 I cache */
+                        l1i += 16;
+                        break;
+                    case 10:
+                        /* L1 D cache */
+                        l1d += 8;
+                        break;
+                    case 12:
+                        /* L1 D cache */
+                        l1d += 16;
+                        break;
+                    default:;
+                        /* TLB, or unknown */
+                    }
+                    break;
+                case 2:
+                    if ( dl ) {
+                        /* L3 cache */
+                        cs = (dl-1) << 9;
+                        l3 += cs;
+                    }
+                    break;
+                case 4:
+                    if ( c->x86 > 6 && dl ) {
+                        /* P4 family */
+                        /* L3 cache */
+                        cs = 128 << (dl-1);
+                        l3 += cs;
+                        break;
+                    }
+                    /* else same as 8 - fall through */
+                case 8:
+                    if ( dl ) {
+                        /* L2 cache */
+                        cs = 128 << (dl-1);
+                        l2 += cs;
+                    }
+                    break;
+                case 6:
+                    if (dl > 5) {
+                        /* L1 D cache */
+                        cs = 8<<(dl-6);
+                        l1d += cs;
+                    }
+                    break;
+                case 7:
+                    if ( dl >= 8 ) 
+                    {
+                        /* L2 cache */
+                        cs = 64<<(dl-8);
+                        l2 += cs;
+                    } else {
+                        /* L0 I cache, count as L1 */
+                        cs = dl ? (16 << (dl-1)) : 12;
+                        l1i += cs;
+                    }
+                    break;
+                default:
+                    /* TLB, or something else we don't know about */
+                    break;
+                }
+            }
+        }
+        if ( l1i || l1d )
+            printk(KERN_INFO "CPU: L1 I cache: %dK, L1 D cache: %dK\n",
+                   l1i, l1d);
+        if ( l2 )
+            printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
+        if ( l3 )
+            printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
+
+        /*
+         * This assumes the L3 cache is shared; it typically lives in
+         * the northbridge.  The L1 caches are included by the L2
+         * cache, and so should not be included for the purpose of
+         * SMP switching weights.
+         */
+        c->x86_cache_size = l2 ? l2 : (l1i+l1d);
+    }
+
+    /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */
+    if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 )
+        clear_bit(X86_FEATURE_SEP, &c->x86_capability);
+	
+    /* Names for the Pentium II/Celeron processors 
+       detectable only by also checking the cache size.
+       Dixon is NOT a Celeron. */
+    if (c->x86 == 6) {
+        switch (c->x86_model) {
+        case 5:
+            if (l2 == 0)
+                p = "Celeron (Covington)";
+            if (l2 == 256)
+                p = "Mobile Pentium II (Dixon)";
+            break;
+			
+        case 6:
+            if (l2 == 128)
+                p = "Celeron (Mendocino)";
+            break;
+			
+        case 8:
+            if (l2 == 128)
+                p = "Celeron (Coppermine)";
+            break;
+        }
+    }
+
+    if ( p )
+        strcpy(c->x86_model_id, p);
+}
+
+void __init get_cpu_vendor(struct cpuinfo_x86 *c)
+{
+    char *v = c->x86_vendor_id;
+
+    if (!strcmp(v, "GenuineIntel"))
+        c->x86_vendor = X86_VENDOR_INTEL;
+    else if (!strcmp(v, "AuthenticAMD"))
+        c->x86_vendor = X86_VENDOR_AMD;
+    else if (!strcmp(v, "CentaurHauls"))
+        c->x86_vendor = X86_VENDOR_CENTAUR;
+    else
+        c->x86_vendor = X86_VENDOR_UNKNOWN;
+}
+
+struct cpu_model_info {
+    int vendor;
+    int family;
+    char *model_names[16];
+};
+
+/* Naming convention should be: <Name> [(<Codename>)] */
+/* This table only is used unless init_<vendor>() below doesn't set it; */
+/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
+static struct cpu_model_info cpu_models[] __initdata = {
+    { X86_VENDOR_INTEL,	6,
+      { "Pentium Pro A-step", "Pentium Pro", NULL, "Pentium II (Klamath)", 
+        NULL, "Pentium II (Deschutes)", "Mobile Pentium II",
+        "Pentium III (Katmai)", "Pentium III (Coppermine)", NULL,
+        "Pentium III (Cascades)", NULL, NULL, NULL, NULL }},
+    { X86_VENDOR_AMD,	6, /* Is this this really necessary?? */
+      { "Athlon", "Athlon",
+        "Athlon", NULL, "Athlon", NULL,
+        NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL }}
+};
+
+/* Look up CPU names by table lookup. */
+static char __init *table_lookup_model(struct cpuinfo_x86 *c)
+{
+    struct cpu_model_info *info = cpu_models;
+    int i;
+
+    if ( c->x86_model >= 16 )
+        return NULL;	/* Range check */
+
+    for ( i = 0 ; i < sizeof(cpu_models)/sizeof(struct cpu_model_info) ; i++ ) {
+        if ( info->vendor == c->x86_vendor &&
+             info->family == c->x86 ) {
+            return info->model_names[c->x86_model];
+        }
+        info++;
+    }
+    return NULL;		/* Not found */
+}
+
+
+
+/* Standard macro to see if a specific flag is changeable */
+static inline int flag_is_changeable_p(u32 flag)
+{
+    u32 f1, f2;
+
+    asm("pushfl\n\t"
+        "pushfl\n\t"
+        "popl %0\n\t"
+        "movl %0,%1\n\t"
+        "xorl %2,%0\n\t"
+        "pushl %0\n\t"
+        "popfl\n\t"
+        "pushfl\n\t"
+        "popl %0\n\t"
+        "popfl\n\t"
+        : "=&r" (f1), "=&r" (f2)
+        : "ir" (flag));
+
+    return ((f1^f2) & flag) != 0;
+}
+
+
+/* Probe for the CPUID instruction */
+static int __init have_cpuid_p(void)
+{
+    return flag_is_changeable_p(X86_EFLAGS_ID);
+}
+
+
+
+#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
+unsigned char eddnr;
+struct edd_info edd[EDDMAXNR];
+unsigned int edd_disk80_sig;
+/**
+ * copy_edd() - Copy the BIOS EDD information
+ *              from empty_zero_page into a safe place.
+ *
+ */
+static inline void copy_edd(void)
+{
+     eddnr = EDD_NR;
+     memcpy(edd, EDD_BUF, sizeof(edd));
+     edd_disk80_sig = DISK80_SIGNATURE_BUFFER;
+}
+#else
+static inline void copy_edd(void) {}
+#endif
+
+/*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+void __init identify_cpu(struct cpuinfo_x86 *c)
+{
+    int junk, i;
+    u32 xlvl, tfms;
+
+    c->loops_per_jiffy = loops_per_jiffy;
+    c->x86_cache_size = -1;
+    c->x86_vendor = X86_VENDOR_UNKNOWN;
+    c->cpuid_level = -1;	/* CPUID not detected */
+    c->x86_model = c->x86_mask = 0;	/* So far unknown... */
+    c->x86_vendor_id[0] = '\0'; /* Unset */
+    c->x86_model_id[0] = '\0';  /* Unset */
+    memset(&c->x86_capability, 0, sizeof c->x86_capability);
+    c->hard_math = 1;
+
+    if ( !have_cpuid_p() ) {
+        panic("Processor must support CPUID\n");
+    } else {
+        /* CPU does have CPUID */
+
+        /* Get vendor name */
+        cpuid(0x00000000, &c->cpuid_level,
+              (int *)&c->x86_vendor_id[0],
+              (int *)&c->x86_vendor_id[8],
+              (int *)&c->x86_vendor_id[4]);
+		
+        get_cpu_vendor(c);
+        /* Initialize the standard set of capabilities */
+        /* Note that the vendor-specific code below might override */
+
+        /* Intel-defined flags: level 0x00000001 */
+        if ( c->cpuid_level >= 0x00000001 ) {
+                        u32 capability, excap;
+                        cpuid(0x00000001, &tfms, &junk, &excap, &capability);
+                        c->x86_capability[0] = capability;
+                        c->x86_capability[4] = excap;
+                        c->x86 = (tfms >> 8) & 15;
+                        c->x86_model = (tfms >> 4) & 15;
+                        if (c->x86 == 0xf) {
+                                c->x86 += (tfms >> 20) & 0xff;
+                                c->x86_model += ((tfms >> 16) & 0xF) << 4;
+                        }
+                        c->x86_mask = tfms & 15;
+        } else {
+            /* Have CPUID level 0 only - unheard of */
+            c->x86 = 4;
+        }
+
+        /* AMD-defined flags: level 0x80000001 */
+        xlvl = cpuid_eax(0x80000000);
+        if ( (xlvl & 0xffff0000) == 0x80000000 ) {
+            if ( xlvl >= 0x80000001 )
+                c->x86_capability[1] = cpuid_edx(0x80000001);
+            if ( xlvl >= 0x80000004 )
+                get_model_name(c); /* Default name */
+        }
+
+        /* Transmeta-defined flags: level 0x80860001 */
+        xlvl = cpuid_eax(0x80860000);
+        if ( (xlvl & 0xffff0000) == 0x80860000 ) {
+            if (  xlvl >= 0x80860001 )
+                c->x86_capability[2] = cpuid_edx(0x80860001);
+        }
+    }
+
+    printk(KERN_DEBUG "CPU: Before vendor init, caps: %08x %08x %08x, vendor = %d\n",
+           c->x86_capability[0],
+           c->x86_capability[1],
+           c->x86_capability[2],
+           c->x86_vendor);
+
+    /*
+     * Vendor-specific initialization.  In this section we
+     * canonicalize the feature flags, meaning if there are
+     * features a certain CPU supports which CPUID doesn't
+     * tell us, CPUID claiming incorrect flags, or other bugs,
+     * we handle them here.
+     *
+     * At the end of this section, c->x86_capability better
+     * indicate the features this CPU genuinely supports!
+     */
+    switch ( c->x86_vendor ) {
+    case X86_VENDOR_AMD:
+        init_amd(c);
+        break;
+
+    case X86_VENDOR_INTEL:
+        init_intel(c);
+        break;
+
+    case X86_VENDOR_CENTAUR:
+        init_centaur(c);
+        break;
+        
+    default:
+        printk("Unsupported CPU vendor (%d) -- please report!\n",
+               c->x86_vendor);
+    }
+	
+    printk(KERN_DEBUG "CPU: After vendor init, caps: %08x %08x %08x %08x\n",
+           c->x86_capability[0],
+           c->x86_capability[1],
+           c->x86_capability[2],
+           c->x86_capability[3]);
+
+
+    /* If the model name is still unset, do table lookup. */
+    if ( !c->x86_model_id[0] ) {
+        char *p;
+        p = table_lookup_model(c);
+        if ( p )
+            strcpy(c->x86_model_id, p);
+        else
+            /* Last resort... */
+            sprintf(c->x86_model_id, "%02x/%02x",
+                    c->x86_vendor, c->x86_model);
+    }
+
+    /* Now the feature flags better reflect actual CPU features! */
+
+    printk(KERN_DEBUG "CPU:     After generic, caps: %08x %08x %08x %08x\n",
+           c->x86_capability[0],
+           c->x86_capability[1],
+           c->x86_capability[2],
+           c->x86_capability[3]);
+
+    /*
+     * On SMP, boot_cpu_data holds the common feature set between
+     * all CPUs; so make sure that we indicate which features are
+     * common between the CPUs.  The first time this routine gets
+     * executed, c == &boot_cpu_data.
+     */
+    if ( c != &boot_cpu_data ) {
+        /* AND the already accumulated flags with these */
+        for ( i = 0 ; i < NCAPINTS ; i++ )
+            boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
+    }
+
+    printk(KERN_DEBUG "CPU:             Common caps: %08x %08x %08x %08x\n",
+           boot_cpu_data.x86_capability[0],
+           boot_cpu_data.x86_capability[1],
+           boot_cpu_data.x86_capability[2],
+           boot_cpu_data.x86_capability[3]);
+}
+
+
+/* These need to match <asm/processor.h> */
+static char *cpu_vendor_names[] __initdata = {
+    "Intel", "Cyrix", "AMD", "UMC", "NexGen", "Centaur", "Rise", "Transmeta" };
+
+
+void __init print_cpu_info(struct cpuinfo_x86 *c)
+{
+    char *vendor = NULL;
+
+    if (c->x86_vendor < sizeof(cpu_vendor_names)/sizeof(char *))
+        vendor = cpu_vendor_names[c->x86_vendor];
+    else if (c->cpuid_level >= 0)
+        vendor = c->x86_vendor_id;
+
+    if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor)))
+        printk("%s ", vendor);
+
+    if (!c->x86_model_id[0])
+        printk("%d86", c->x86);
+    else
+        printk("%s", c->x86_model_id);
+
+    if (c->x86_mask || c->cpuid_level >= 0) 
+        printk(" stepping %02x\n", c->x86_mask);
+    else
+        printk("\n");
+}
+
+/*
+ *	Get CPU information for use by the procfs.
+ */
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+    /* 
+     * These flag bits must match the definitions in <asm/cpufeature.h>.
+     * NULL means this bit is undefined or reserved; either way it doesn't
+     * have meaning as far as Linux is concerned.  Note that it's important
+     * to realize there is a difference between this table and CPUID -- if
+     * applications want to get the raw CPUID data, they should access
+     * /dev/cpu/<cpu_nr>/cpuid instead.
+	 */
+    static char *x86_cap_flags[] = {
+        /* Intel-defined */
+        "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
+        "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
+        "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
+        "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
+
+        /* AMD-defined */
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, "mp", NULL, NULL, "mmxext", NULL,
+        NULL, NULL, NULL, NULL, NULL, "lm", "3dnowext", "3dnow",
+
+        /* Transmeta-defined */
+        "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+        /* Other (Linux-defined) */
+        "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", 
+	NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+        /* Intel-defined (#2) */
+        "pni", NULL, NULL, "monitor", "ds_cpl", NULL, NULL, "tm2",
+        "est", NULL, "cid", NULL, NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+        /* VIA/Cyrix/Centaur-defined */
+        NULL, NULL, "xstore", NULL, NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+    };
+    struct cpuinfo_x86 *c = v;
+    int i, n = c - cpu_data;
+    int fpu_exception;
+
+#ifdef CONFIG_SMP
+    if (!(cpu_online_map & (1<<n)))
+        return 0;
+#endif
+    seq_printf(m, "processor\t: %d\n"
+               "vendor_id\t: %s\n"
+               "cpu family\t: %d\n"
+               "model\t\t: %d\n"
+               "model name\t: %s\n",
+               n,
+               c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
+               c->x86,
+               c->x86_model,
+               c->x86_model_id[0] ? c->x86_model_id : "unknown");
+
+    if (c->x86_mask || c->cpuid_level >= 0)
+        seq_printf(m, "stepping\t: %d\n", c->x86_mask);
+    else
+        seq_printf(m, "stepping\t: unknown\n");
+
+    if ( test_bit(X86_FEATURE_TSC, &c->x86_capability) ) {
+        seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n",
+                   cpu_khz / 1000, (cpu_khz % 1000));
+    }
+
+    /* Cache size */
+    if (c->x86_cache_size >= 0)
+        seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
+	
+	/* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */
+    fpu_exception = c->hard_math && (ignore_irq13 || cpu_has_fpu);
+    seq_printf(m, "fdiv_bug\t: %s\n"
+               "hlt_bug\t\t: %s\n"
+               "f00f_bug\t: %s\n"
+               "coma_bug\t: %s\n"
+               "fpu\t\t: %s\n"
+               "fpu_exception\t: %s\n"
+               "cpuid level\t: %d\n"
+               "wp\t\t: %s\n"
+               "flags\t\t:",
+               c->fdiv_bug ? "yes" : "no",
+               c->hlt_works_ok ? "no" : "yes",
+               c->f00f_bug ? "yes" : "no",
+               c->coma_bug ? "yes" : "no",
+               c->hard_math ? "yes" : "no",
+               fpu_exception ? "yes" : "no",
+               c->cpuid_level,
+               c->wp_works_ok ? "yes" : "no");
+
+    for ( i = 0 ; i < 32*NCAPINTS ; i++ )
+        if ( test_bit(i, &c->x86_capability) &&
+             x86_cap_flags[i] != NULL )
+            seq_printf(m, " %s", x86_cap_flags[i]);
+
+    seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n",
+               c->loops_per_jiffy/(500000/HZ),
+               (c->loops_per_jiffy/(5000/HZ)) % 100);
+    return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+    return *pos < NR_CPUS ? cpu_data + *pos : NULL;
+}
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+    ++*pos;
+    return c_start(m, pos);
+}
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+struct seq_operations cpuinfo_op = {
+    start:	c_start,
+    next:	c_next,
+    stop:	c_stop,
+    show:	show_cpuinfo,
+};
+
+unsigned long cpu_initialized __initdata = 0;
+
+/*
+ * cpu_init() initializes state that is per-CPU. Some data is already
+ * initialized (naturally) in the bootstrap process, such as the GDT
+ * and IDT. We reload them nevertheless, this function acts as a
+ * 'CPU state barrier', nothing should get across.
+ */
+void __init cpu_init (void)
+{
+    int nr = smp_processor_id();
+
+    if (test_and_set_bit(nr, &cpu_initialized)) {
+        printk(KERN_WARNING "CPU#%d already initialized!\n", nr);
+        for (;;) __sti();
+    }
+    printk(KERN_INFO "Initializing CPU#%d\n", nr);
+
+    /*
+     * set up and load the per-CPU TSS and LDT
+     */
+    atomic_inc(&init_mm.mm_count);
+    current->active_mm = &init_mm;
+    if(current->mm)
+        BUG();
+    enter_lazy_tlb(&init_mm, current, nr);
+
+    HYPERVISOR_stack_switch(__KERNEL_DS, current->thread.esp0);
+
+    load_LDT(&init_mm.context);
+
+    /* Force FPU initialization. */
+    current->flags &= ~PF_USEDFPU;
+    current->used_math = 0;
+    stts();
+}
diff --git a/linux-2.4-xen-sparse/arch/xen/kernel/signal.c b/linux-2.4-xen-sparse/arch/xen/kernel/signal.c
new file mode 100644
index 0000000000..6f2a2ac11d
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/kernel/signal.c
@@ -0,0 +1,717 @@
+/*
+ *  linux/arch/i386/kernel/signal.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
+ *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/personality.h>
+#include <asm/ucontext.h>
+#include <asm/uaccess.h>
+#include <asm/i387.h>
+
+#define DEBUG_SIG 0
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+int FASTCALL(do_signal(struct pt_regs *regs, sigset_t *oldset));
+
+int copy_siginfo_to_user(siginfo_t *to, siginfo_t *from)
+{
+	if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t)))
+		return -EFAULT;
+	if (from->si_code < 0)
+		return __copy_to_user(to, from, sizeof(siginfo_t));
+	else {
+		int err;
+
+		/* If you change siginfo_t structure, please be sure
+		   this code is fixed accordingly.
+		   It should never copy any pad contained in the structure
+		   to avoid security leaks, but must copy the generic
+		   3 ints plus the relevant union member.  */
+		err = __put_user(from->si_signo, &to->si_signo);
+		err |= __put_user(from->si_errno, &to->si_errno);
+		err |= __put_user((short)from->si_code, &to->si_code);
+		/* First 32bits of unions are always present.  */
+		err |= __put_user(from->si_pid, &to->si_pid);
+		switch (from->si_code >> 16) {
+		case __SI_FAULT >> 16:
+			break;
+		case __SI_CHLD >> 16:
+			err |= __put_user(from->si_utime, &to->si_utime);
+			err |= __put_user(from->si_stime, &to->si_stime);
+			err |= __put_user(from->si_status, &to->si_status);
+		default:
+			err |= __put_user(from->si_uid, &to->si_uid);
+			break;
+		/* case __SI_RT: This is not generated by the kernel as of now.  */
+		}
+		return err;
+	}
+}
+
+/*
+ * Atomically swap in the new signal mask, and wait for a signal.
+ */
+asmlinkage int
+sys_sigsuspend(int history0, int history1, old_sigset_t mask)
+{
+	struct pt_regs * regs = (struct pt_regs *) &history0;
+	sigset_t saveset;
+
+	mask &= _BLOCKABLE;
+	spin_lock_irq(&current->sigmask_lock);
+	saveset = current->blocked;
+	siginitset(&current->blocked, mask);
+	recalc_sigpending(current);
+	spin_unlock_irq(&current->sigmask_lock);
+
+	regs->eax = -EINTR;
+	while (1) {
+		current->state = TASK_INTERRUPTIBLE;
+		schedule();
+		if (do_signal(regs, &saveset))
+			return -EINTR;
+	}
+}
+
+asmlinkage int
+sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize)
+{
+	struct pt_regs * regs = (struct pt_regs *) &unewset;
+	sigset_t saveset, newset;
+
+	/* XXX: Don't preclude handling different sized sigset_t's.  */
+	if (sigsetsize != sizeof(sigset_t))
+		return -EINVAL;
+
+	if (copy_from_user(&newset, unewset, sizeof(newset)))
+		return -EFAULT;
+	sigdelsetmask(&newset, ~_BLOCKABLE);
+
+	spin_lock_irq(&current->sigmask_lock);
+	saveset = current->blocked;
+	current->blocked = newset;
+	recalc_sigpending(current);
+	spin_unlock_irq(&current->sigmask_lock);
+
+	regs->eax = -EINTR;
+	while (1) {
+		current->state = TASK_INTERRUPTIBLE;
+		schedule();
+		if (do_signal(regs, &saveset))
+			return -EINTR;
+	}
+}
+
+asmlinkage int 
+sys_sigaction(int sig, const struct old_sigaction *act,
+	      struct old_sigaction *oact)
+{
+	struct k_sigaction new_ka, old_ka;
+	int ret;
+
+	if (act) {
+		old_sigset_t mask;
+		if (verify_area(VERIFY_READ, act, sizeof(*act)) ||
+		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
+			return -EFAULT;
+		__get_user(new_ka.sa.sa_flags, &act->sa_flags);
+		__get_user(mask, &act->sa_mask);
+		siginitset(&new_ka.sa.sa_mask, mask);
+	}
+
+	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+	if (!ret && oact) {
+		if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) ||
+		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
+			return -EFAULT;
+		__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+		__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+	}
+
+	return ret;
+}
+
+asmlinkage int
+sys_sigaltstack(const stack_t *uss, stack_t *uoss)
+{
+	struct pt_regs *regs = (struct pt_regs *) &uss;
+	return do_sigaltstack(uss, uoss, regs->esp);
+}
+
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+
+struct sigframe
+{
+	char *pretcode;
+	int sig;
+	struct sigcontext sc;
+	struct _fpstate fpstate;
+	unsigned long extramask[_NSIG_WORDS-1];
+	char retcode[8];
+};
+
+struct rt_sigframe
+{
+	char *pretcode;
+	int sig;
+	struct siginfo *pinfo;
+	void *puc;
+	struct siginfo info;
+	struct ucontext uc;
+	struct _fpstate fpstate;
+	char retcode[8];
+};
+
+static int
+restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc, int *peax)
+{
+	unsigned int err = 0;
+
+#define COPY(x)		err |= __get_user(regs->x, &sc->x)
+
+#define COPY_SEG(seg)							\
+	{ unsigned short tmp;						\
+	  err |= __get_user(tmp, &sc->seg);				\
+	  regs->x##seg = tmp; }
+
+#define COPY_SEG_STRICT(seg)						\
+	{ unsigned short tmp;						\
+	  err |= __get_user(tmp, &sc->seg);				\
+	  regs->x##seg = tmp|3; }
+
+#define GET_SEG(seg)							\
+	{ unsigned short tmp;						\
+	  err |= __get_user(tmp, &sc->seg);				\
+	  loadsegment(seg,tmp); }
+
+	GET_SEG(gs);
+	GET_SEG(fs);
+	COPY_SEG(es);
+	COPY_SEG(ds);
+	COPY(edi);
+	COPY(esi);
+	COPY(ebp);
+	COPY(esp);
+	COPY(ebx);
+	COPY(edx);
+	COPY(ecx);
+	COPY(eip);
+	COPY_SEG_STRICT(cs);
+	COPY_SEG_STRICT(ss);
+	
+	{
+		unsigned int tmpflags;
+		err |= __get_user(tmpflags, &sc->eflags);
+		regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
+		regs->orig_eax = -1;		/* disable syscall checks */
+	}
+
+	{
+		struct _fpstate * buf;
+		err |= __get_user(buf, &sc->fpstate);
+		if (buf) {
+			if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
+				goto badframe;
+			err |= restore_i387(buf);
+		}
+	}
+
+	err |= __get_user(*peax, &sc->eax);
+	return err;
+
+badframe:
+	return 1;
+}
+
+asmlinkage int sys_sigreturn(unsigned long __unused)
+{
+	struct pt_regs *regs = (struct pt_regs *) &__unused;
+	struct sigframe *frame = (struct sigframe *)(regs->esp - 8);
+	sigset_t set;
+	int eax;
+
+	if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__get_user(set.sig[0], &frame->sc.oldmask)
+	    || (_NSIG_WORDS > 1
+		&& __copy_from_user(&set.sig[1], &frame->extramask,
+				    sizeof(frame->extramask))))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sigmask_lock);
+	current->blocked = set;
+	recalc_sigpending(current);
+	spin_unlock_irq(&current->sigmask_lock);
+	
+	if (restore_sigcontext(regs, &frame->sc, &eax))
+		goto badframe;
+	return eax;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}	
+
+asmlinkage int sys_rt_sigreturn(unsigned long __unused)
+{
+	struct pt_regs *regs = (struct pt_regs *) &__unused;
+	struct rt_sigframe *frame = (struct rt_sigframe *)(regs->esp - 4);
+	sigset_t set;
+	stack_t st;
+	int eax;
+
+	if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sigmask_lock);
+	current->blocked = set;
+	recalc_sigpending(current);
+	spin_unlock_irq(&current->sigmask_lock);
+	
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
+		goto badframe;
+
+	if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st)))
+		goto badframe;
+	/* It is more difficult to avoid calling this function than to
+	   call it and ignore errors.  */
+	do_sigaltstack(&st, NULL, regs->esp);
+
+	return eax;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}	
+
+/*
+ * Set up a signal frame.
+ */
+
+static int
+setup_sigcontext(struct sigcontext *sc, struct _fpstate *fpstate,
+		 struct pt_regs *regs, unsigned long mask)
+{
+	int tmp, err = 0;
+
+	tmp = 0;
+	__asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp));
+	err |= __put_user(tmp, (unsigned int *)&sc->gs);
+	__asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp));
+	err |= __put_user(tmp, (unsigned int *)&sc->fs);
+
+	err |= __put_user(regs->xes, (unsigned int *)&sc->es);
+	err |= __put_user(regs->xds, (unsigned int *)&sc->ds);
+	err |= __put_user(regs->edi, &sc->edi);
+	err |= __put_user(regs->esi, &sc->esi);
+	err |= __put_user(regs->ebp, &sc->ebp);
+	err |= __put_user(regs->esp, &sc->esp);
+	err |= __put_user(regs->ebx, &sc->ebx);
+	err |= __put_user(regs->edx, &sc->edx);
+	err |= __put_user(regs->ecx, &sc->ecx);
+	err |= __put_user(regs->eax, &sc->eax);
+	err |= __put_user(current->thread.trap_no, &sc->trapno);
+	err |= __put_user(current->thread.error_code, &sc->err);
+	err |= __put_user(regs->eip, &sc->eip);
+	err |= __put_user(regs->xcs, (unsigned int *)&sc->cs);
+	err |= __put_user(regs->eflags, &sc->eflags);
+	err |= __put_user(regs->esp, &sc->esp_at_signal);
+	err |= __put_user(regs->xss, (unsigned int *)&sc->ss);
+
+	tmp = save_i387(fpstate);
+	if (tmp < 0)
+	  err = 1;
+	else
+	  err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
+
+	/* non-iBCS2 extensions.. */
+	err |= __put_user(mask, &sc->oldmask);
+	err |= __put_user(current->thread.cr2, &sc->cr2);
+
+	return err;
+}
+
+/*
+ * Determine which stack to use..
+ */
+static inline void *
+get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+{
+	unsigned long esp;
+
+	/* Default to using normal stack */
+	esp = regs->esp;
+
+	/* This is the X/Open sanctioned signal stack switching.  */
+	if (ka->sa.sa_flags & SA_ONSTACK) {
+		if (sas_ss_flags(esp) == 0)
+			esp = current->sas_ss_sp + current->sas_ss_size;
+	}
+
+	/* This is the legacy signal stack switching. */
+	else if ((regs->xss & 0xffff) != __USER_DS &&
+		 !(ka->sa.sa_flags & SA_RESTORER) &&
+		 ka->sa.sa_restorer) {
+		esp = (unsigned long) ka->sa.sa_restorer;
+	}
+
+	return (void *)((esp - frame_size) & -8ul);
+}
+
+static void setup_frame(int sig, struct k_sigaction *ka,
+			sigset_t *set, struct pt_regs * regs)
+{
+	struct sigframe *frame;
+	int err = 0;
+
+	frame = get_sigframe(ka, regs, sizeof(*frame));
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		goto give_sigsegv;
+
+	err |= __put_user((current->exec_domain
+		           && current->exec_domain->signal_invmap
+		           && sig < 32
+		           ? current->exec_domain->signal_invmap[sig]
+		           : sig),
+		          &frame->sig);
+	if (err)
+		goto give_sigsegv;
+
+	err |= setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]);
+	if (err)
+		goto give_sigsegv;
+
+	if (_NSIG_WORDS > 1) {
+		err |= __copy_to_user(frame->extramask, &set->sig[1],
+				      sizeof(frame->extramask));
+	}
+	if (err)
+		goto give_sigsegv;
+
+	/* Set up to return from userspace.  If provided, use a stub
+	   already in userspace.  */
+	if (ka->sa.sa_flags & SA_RESTORER) {
+		err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
+	} else {
+		err |= __put_user(frame->retcode, &frame->pretcode);
+		/* This is popl %eax ; movl $,%eax ; int $0x80 */
+		err |= __put_user(0xb858, (short *)(frame->retcode+0));
+		err |= __put_user(__NR_sigreturn, (int *)(frame->retcode+2));
+		err |= __put_user(0x80cd, (short *)(frame->retcode+6));
+	}
+
+	if (err)
+		goto give_sigsegv;
+
+	/* Set up registers for signal handler */
+	regs->esp = (unsigned long) frame;
+	regs->eip = (unsigned long) ka->sa.sa_handler;
+
+	set_fs(USER_DS);
+	regs->xds = __USER_DS;
+	regs->xes = __USER_DS;
+	regs->xss = __USER_DS;
+	regs->xcs = __USER_CS;
+	regs->eflags &= ~TF_MASK;
+
+#if DEBUG_SIG
+	printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
+		current->comm, current->pid, frame, regs->eip, frame->pretcode);
+#endif
+
+	return;
+
+give_sigsegv:
+	if (sig == SIGSEGV)
+		ka->sa.sa_handler = SIG_DFL;
+	force_sig(SIGSEGV, current);
+}
+
+static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+			   sigset_t *set, struct pt_regs * regs)
+{
+	struct rt_sigframe *frame;
+	int err = 0;
+
+	frame = get_sigframe(ka, regs, sizeof(*frame));
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		goto give_sigsegv;
+
+	err |= __put_user((current->exec_domain
+		    	   && current->exec_domain->signal_invmap
+		    	   && sig < 32
+		    	   ? current->exec_domain->signal_invmap[sig]
+			   : sig),
+			  &frame->sig);
+	err |= __put_user(&frame->info, &frame->pinfo);
+	err |= __put_user(&frame->uc, &frame->puc);
+	err |= copy_siginfo_to_user(&frame->info, info);
+	if (err)
+		goto give_sigsegv;
+
+	/* Create the ucontext.  */
+	err |= __put_user(0, &frame->uc.uc_flags);
+	err |= __put_user(0, &frame->uc.uc_link);
+	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+	err |= __put_user(sas_ss_flags(regs->esp),
+			  &frame->uc.uc_stack.ss_flags);
+	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+	err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
+			        regs, set->sig[0]);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+	if (err)
+		goto give_sigsegv;
+
+	/* Set up to return from userspace.  If provided, use a stub
+	   already in userspace.  */
+	if (ka->sa.sa_flags & SA_RESTORER) {
+		err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
+	} else {
+		err |= __put_user(frame->retcode, &frame->pretcode);
+		/* This is movl $,%eax ; int $0x80 */
+		err |= __put_user(0xb8, (char *)(frame->retcode+0));
+		err |= __put_user(__NR_rt_sigreturn, (int *)(frame->retcode+1));
+		err |= __put_user(0x80cd, (short *)(frame->retcode+5));
+	}
+
+	if (err)
+		goto give_sigsegv;
+
+	/* Set up registers for signal handler */
+	regs->esp = (unsigned long) frame;
+	regs->eip = (unsigned long) ka->sa.sa_handler;
+
+	set_fs(USER_DS);
+	regs->xds = __USER_DS;
+	regs->xes = __USER_DS;
+	regs->xss = __USER_DS;
+	regs->xcs = __USER_CS;
+	regs->eflags &= ~TF_MASK;
+
+#if DEBUG_SIG
+	printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
+		current->comm, current->pid, frame, regs->eip, frame->pretcode);
+#endif
+
+	return;
+
+give_sigsegv:
+	if (sig == SIGSEGV)
+		ka->sa.sa_handler = SIG_DFL;
+	force_sig(SIGSEGV, current);
+}
+
+/*
+ * OK, we're invoking a handler
+ */	
+
+static void
+handle_signal(unsigned long sig, struct k_sigaction *ka,
+	      siginfo_t *info, sigset_t *oldset, struct pt_regs * regs)
+{
+	/* Are we from a system call? */
+	if (regs->orig_eax >= 0) {
+		/* If so, check system call restarting.. */
+		switch (regs->eax) {
+			case -ERESTARTNOHAND:
+				regs->eax = -EINTR;
+				break;
+
+			case -ERESTARTSYS:
+				if (!(ka->sa.sa_flags & SA_RESTART)) {
+					regs->eax = -EINTR;
+					break;
+				}
+			/* fallthrough */
+			case -ERESTARTNOINTR:
+				regs->eax = regs->orig_eax;
+				regs->eip -= 2;
+		}
+	}
+
+	/* Set up the stack frame */
+	if (ka->sa.sa_flags & SA_SIGINFO)
+		setup_rt_frame(sig, ka, info, oldset, regs);
+	else
+		setup_frame(sig, ka, oldset, regs);
+
+	if (ka->sa.sa_flags & SA_ONESHOT)
+		ka->sa.sa_handler = SIG_DFL;
+
+	if (!(ka->sa.sa_flags & SA_NODEFER)) {
+		spin_lock_irq(&current->sigmask_lock);
+		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+		sigaddset(&current->blocked,sig);
+		recalc_sigpending(current);
+		spin_unlock_irq(&current->sigmask_lock);
+	}
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ */
+int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset)
+{
+	siginfo_t info;
+	struct k_sigaction *ka;
+
+	/*
+	 * We want the common case to go fast, which
+	 * is why we may in certain cases get here from
+	 * kernel mode. Just return without doing anything
+	 * if so.
+	 */
+	if ((regs->xcs & 2) != 2)
+		return 1;
+
+	if (!oldset)
+		oldset = &current->blocked;
+
+	for (;;) {
+		unsigned long signr;
+
+		spin_lock_irq(&current->sigmask_lock);
+		signr = dequeue_signal(&current->blocked, &info);
+		spin_unlock_irq(&current->sigmask_lock);
+
+		if (!signr)
+			break;
+
+		if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) {
+			/* Let the debugger run.  */
+			current->exit_code = signr;
+			current->state = TASK_STOPPED;
+			notify_parent(current, SIGCHLD);
+			schedule();
+
+			/* We're back.  Did the debugger cancel the sig?  */
+			if (!(signr = current->exit_code))
+				continue;
+			current->exit_code = 0;
+
+			/* The debugger continued.  Ignore SIGSTOP.  */
+			if (signr == SIGSTOP)
+				continue;
+
+			/* Update the siginfo structure.  Is this good?  */
+			if (signr != info.si_signo) {
+				info.si_signo = signr;
+				info.si_errno = 0;
+				info.si_code = SI_USER;
+				info.si_pid = current->p_pptr->pid;
+				info.si_uid = current->p_pptr->uid;
+			}
+
+			/* If the (new) signal is now blocked, requeue it.  */
+			if (sigismember(&current->blocked, signr)) {
+				send_sig_info(signr, &info, current);
+				continue;
+			}
+		}
+
+		ka = &current->sig->action[signr-1];
+		if (ka->sa.sa_handler == SIG_IGN) {
+			if (signr != SIGCHLD)
+				continue;
+			/* Check for SIGCHLD: it's special.  */
+			while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0)
+				/* nothing */;
+			continue;
+		}
+
+		if (ka->sa.sa_handler == SIG_DFL) {
+			int exit_code = signr;
+
+			/* Init gets no signals it doesn't want.  */
+			if (current->pid == 1)
+				continue;
+
+			switch (signr) {
+			case SIGCONT: case SIGCHLD: case SIGWINCH: case SIGURG:
+				continue;
+
+			case SIGTSTP: case SIGTTIN: case SIGTTOU:
+				if (is_orphaned_pgrp(current->pgrp))
+					continue;
+				/* FALLTHRU */
+
+			case SIGSTOP: {
+				struct signal_struct *sig;
+				current->state = TASK_STOPPED;
+				current->exit_code = signr;
+				sig = current->p_pptr->sig;
+				if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
+					notify_parent(current, SIGCHLD);
+				schedule();
+				continue;
+			}
+
+			case SIGQUIT: case SIGILL: case SIGTRAP:
+			case SIGABRT: case SIGFPE: case SIGSEGV:
+			case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ:
+				if (do_coredump(signr, regs))
+					exit_code |= 0x80;
+				/* FALLTHRU */
+
+			default:
+				sig_exit(signr, exit_code, &info);
+				/* NOTREACHED */
+			}
+		}
+
+		/* Reenable any watchpoints before delivering the
+		 * signal to user space. The processor register will
+		 * have been cleared if the watchpoint triggered
+		 * inside the kernel.
+		 */
+                if ( current->thread.debugreg[7] != 0 )
+                    HYPERVISOR_set_debugreg(7, current->thread.debugreg[7]);
+
+		/* Whee!  Actually deliver the signal.  */
+		handle_signal(signr, ka, &info, oldset, regs);
+		return 1;
+	}
+
+	/* Did we come from a system call? */
+	if (regs->orig_eax >= 0) {
+		/* Restart the system call - no handlers present */
+		if (regs->eax == -ERESTARTNOHAND ||
+		    regs->eax == -ERESTARTSYS ||
+		    regs->eax == -ERESTARTNOINTR) {
+			regs->eax = regs->orig_eax;
+			regs->eip -= 2;
+		}
+	}
+	return 0;
+}
diff --git a/linux-2.4-xen-sparse/arch/xen/kernel/time.c b/linux-2.4-xen-sparse/arch/xen/kernel/time.c
new file mode 100644
index 0000000000..9b3d40a08c
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/kernel/time.c
@@ -0,0 +1,721 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
+ * (C) 2002-2003 - Keir Fraser - University of Cambridge
+ ****************************************************************************
+ *
+ *        File: arch/xen/kernel/time.c
+ *      Author: Rolf Neugebauer and Keir Fraser
+ * 
+ * Description: Interface with Xen to get correct notion of time
+ */
+
+/*
+ *  linux/arch/i386/kernel/time.c
+ *
+ *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ *
+ * This file contains the PC-specific time handling details:
+ * reading the RTC at bootup, etc..
+ * 1994-07-02    Alan Modra
+ * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
+ * 1995-03-26    Markus Kuhn
+ *      fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
+ *      precision CMOS clock update
+ * 1996-05-03    Ingo Molnar
+ *      fixed time warps in do_[slow|fast]_gettimeoffset()
+ * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
+ *  "A Kernel Model for Precision Timekeeping" by Dave Mills
+ * 1998-09-05    (Various)
+ * More robust do_fast_gettimeoffset() algorithm implemented
+ * (works with APM, Cyrix 6x86MX and Centaur C6),
+ * monotonic gettimeofday() with fast_get_timeoffset(),
+ * drift-proof precision TSC calibration on boot
+ * (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
+ * Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
+ * ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
+ * 1998-12-16    Andrea Arcangeli
+ * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
+ * because was not accounting lost_ticks.
+ * 1998-12-24 Copyright (C) 1998  Andrea Arcangeli
+ * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
+ * serialize accesses to xtime/lost_ticks).
+ */
+
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/msr.h>
+#include <asm/delay.h>
+#include <asm/mpspec.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+
+#include <asm/div64.h>
+#include <asm/hypervisor.h>
+#include <asm-xen/xen-public/dom0_ops.h>
+
+#include <linux/mc146818rtc.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/irq.h>
+#include <linux/sysctl.h>
+#include <linux/sysrq.h>
+
+spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
+extern rwlock_t xtime_lock;
+extern unsigned long wall_jiffies;
+
+unsigned long cpu_khz; /* get this from Xen, used elsewhere */
+
+static unsigned int rdtsc_bitshift;
+static u32 st_scale_f; /* convert ticks -> usecs */
+static u32 st_scale_i; /* convert ticks -> usecs */
+
+/* These are peridically updated in shared_info, and then copied here. */
+static u32 shadow_tsc_stamp;
+static u64 shadow_system_time;
+static u32 shadow_time_version;
+static struct timeval shadow_tv;
+
+/*
+ * We use this to ensure that gettimeofday() is monotonically increasing. We
+ * only break this guarantee if the wall clock jumps backwards "a long way".
+ */
+static struct timeval last_seen_tv = {0,0};
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+/* Periodically propagate synchronised time base to the RTC and to Xen. */
+static long last_update_to_rtc, last_update_to_xen;
+#endif
+
+/* Periodically take synchronised time base from Xen, if we need it. */
+static long last_update_from_xen;   /* UTC seconds when last read Xen clock. */
+
+/* Keep track of last time we did processing/updating of jiffies and xtime. */
+static u64 processed_system_time;   /* System time (ns) at last processing. */
+
+#define NS_PER_TICK (1000000000ULL/HZ)
+
+#ifndef NSEC_PER_SEC
+#define NSEC_PER_SEC (1000000000L)
+#endif
+
+#define HANDLE_USEC_UNDERFLOW(_tv)         \
+    do {                                   \
+        while ( (_tv).tv_usec < 0 )        \
+        {                                  \
+            (_tv).tv_usec += 1000000;      \
+            (_tv).tv_sec--;                \
+        }                                  \
+    } while ( 0 )
+#define HANDLE_USEC_OVERFLOW(_tv)          \
+    do {                                   \
+        while ( (_tv).tv_usec >= 1000000 ) \
+        {                                  \
+            (_tv).tv_usec -= 1000000;      \
+            (_tv).tv_sec++;                \
+        }                                  \
+    } while ( 0 )
+static inline void __normalize_time(time_t *sec, s64 *nsec)
+{
+	while (*nsec >= NSEC_PER_SEC) {
+		(*nsec) -= NSEC_PER_SEC;
+		(*sec)++;
+	}
+	while (*nsec < 0) {
+		(*nsec) += NSEC_PER_SEC;
+		(*sec)--;
+	}
+}
+
+/* Dynamically-mapped IRQs. */
+static int time_irq, debug_irq;
+
+/* Does this guest OS track Xen time, or set its wall clock independently? */
+static int independent_wallclock = 0;
+static int __init __independent_wallclock(char *str)
+{
+    independent_wallclock = 1;
+    return 1;
+}
+__setup("independent_wallclock", __independent_wallclock);
+#define INDEPENDENT_WALLCLOCK() \
+    (independent_wallclock || (xen_start_info.flags & SIF_INITDOMAIN))
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+/*
+ * In order to set the CMOS clock precisely, set_rtc_mmss has to be
+ * called 500 ms after the second nowtime has started, because when
+ * nowtime is written into the registers of the CMOS clock, it will
+ * jump to the next second precisely 500 ms later. Check the Motorola
+ * MC146818A or Dallas DS12887 data sheet for details.
+ *
+ * BUG: This routine does not handle hour overflow properly; it just
+ *      sets the minutes. Usually you'll only notice that after reboot!
+ */
+static int set_rtc_mmss(unsigned long nowtime)
+{
+    int retval = 0;
+    int real_seconds, real_minutes, cmos_minutes;
+    unsigned char save_control, save_freq_select;
+
+    /* gets recalled with irq locally disabled */
+    spin_lock(&rtc_lock);
+    save_control = CMOS_READ(RTC_CONTROL);
+    CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
+
+    save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+    CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
+
+    cmos_minutes = CMOS_READ(RTC_MINUTES);
+    if ( !(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD )
+        BCD_TO_BIN(cmos_minutes);
+
+    /*
+     * since we're only adjusting minutes and seconds, don't interfere with
+     * hour overflow. This avoids messing with unknown time zones but requires
+     * your RTC not to be off by more than 15 minutes
+     */
+    real_seconds = nowtime % 60;
+    real_minutes = nowtime / 60;
+    if ( ((abs(real_minutes - cmos_minutes) + 15)/30) & 1 )
+        real_minutes += 30;  /* correct for half hour time zone */
+    real_minutes %= 60;
+
+    if ( abs(real_minutes - cmos_minutes) < 30 )
+    {
+        if ( !(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD )
+        {
+            BIN_TO_BCD(real_seconds);
+            BIN_TO_BCD(real_minutes);
+        }
+        CMOS_WRITE(real_seconds,RTC_SECONDS);
+        CMOS_WRITE(real_minutes,RTC_MINUTES);
+    }
+    else 
+    {
+        printk(KERN_WARNING
+               "set_rtc_mmss: can't update from %d to %d\n",
+               cmos_minutes, real_minutes);
+        retval = -1;
+    }
+
+    /* The following flags have to be released exactly in this order,
+     * otherwise the DS12887 (popular MC146818A clone with integrated
+     * battery and quartz) will not reset the oscillator and will not
+     * update precisely 500 ms later. You won't find this mentioned in
+     * the Dallas Semiconductor data sheets, but who believes data
+     * sheets anyway ...                           -- Markus Kuhn
+     */
+    CMOS_WRITE(save_control, RTC_CONTROL);
+    CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+    spin_unlock(&rtc_lock);
+
+    return retval;
+}
+#endif
+
+
+/*
+ * Reads a consistent set of time-base values from Xen, into a shadow data
+ * area. Must be called with the xtime_lock held for writing.
+ */
+static void __get_time_values_from_xen(void)
+{
+    do {
+        shadow_time_version = HYPERVISOR_shared_info->time_version2;
+        rmb();
+        shadow_tv.tv_sec    = HYPERVISOR_shared_info->wc_sec;
+        shadow_tv.tv_usec   = HYPERVISOR_shared_info->wc_usec;
+        shadow_tsc_stamp    = 
+            (u32)(HYPERVISOR_shared_info->tsc_timestamp >> rdtsc_bitshift);
+        shadow_system_time  = HYPERVISOR_shared_info->system_time;
+        rmb();
+    }
+    while ( shadow_time_version != HYPERVISOR_shared_info->time_version1 );
+}
+
+#define TIME_VALUES_UP_TO_DATE \
+ ({ rmb(); (shadow_time_version == HYPERVISOR_shared_info->time_version2); })
+
+
+/*
+ * Returns the system time elapsed, in ns, since the current shadow_timestamp
+ * was calculated. Must be called with the xtime_lock held for reading.
+ */
+static inline unsigned long __get_time_delta_usecs(void)
+{
+    s32      delta_tsc;
+    u32      low;
+    u64      delta, tsc;
+
+    rdtscll(tsc);
+    low = (u32)(tsc >> rdtsc_bitshift);
+    delta_tsc = (s32)(low - shadow_tsc_stamp);
+    if ( unlikely(delta_tsc < 0) ) delta_tsc = 0;
+    delta = ((u64)delta_tsc * st_scale_f);
+    delta >>= 32;
+    delta += ((u64)delta_tsc * st_scale_i);
+
+    return (unsigned long)delta;
+}
+
+
+/*
+ * Returns the current time-of-day in UTC timeval format.
+ */
+void do_gettimeofday(struct timeval *tv)
+{
+    unsigned long flags, lost;
+    struct timeval _tv;
+    s64 nsec;
+
+ again:
+    read_lock_irqsave(&xtime_lock, flags);
+
+    _tv.tv_usec = __get_time_delta_usecs();
+    if ( (lost = (jiffies - wall_jiffies)) != 0 )
+        _tv.tv_usec += lost * (1000000 / HZ);
+    _tv.tv_sec   = xtime.tv_sec;
+    _tv.tv_usec += xtime.tv_usec;
+
+    nsec = shadow_system_time - processed_system_time;
+    __normalize_time(&_tv.tv_sec, &nsec);
+    _tv.tv_usec += (long)nsec / 1000L;
+
+    if ( unlikely(!TIME_VALUES_UP_TO_DATE) )
+    {
+        /*
+         * We may have blocked for a long time, rendering our calculations
+         * invalid (e.g. the time delta may have overflowed). Detect that
+         * and recalculate with fresh values.
+         */
+        read_unlock_irqrestore(&xtime_lock, flags);
+        write_lock_irqsave(&xtime_lock, flags);
+        __get_time_values_from_xen();
+        write_unlock_irqrestore(&xtime_lock, flags);
+        goto again;
+    }
+
+    HANDLE_USEC_OVERFLOW(_tv);
+
+    /* Ensure that time-of-day is monotonically increasing. */
+    if ( (_tv.tv_sec < last_seen_tv.tv_sec) ||
+         ((_tv.tv_sec == last_seen_tv.tv_sec) &&
+          (_tv.tv_usec < last_seen_tv.tv_usec)) )
+        _tv = last_seen_tv;
+    last_seen_tv = _tv;
+
+    read_unlock_irqrestore(&xtime_lock, flags);
+
+    *tv = _tv;
+}
+
+
+/*
+ * Sets the current time-of-day based on passed-in UTC timeval parameter.
+ */
+void do_settimeofday(struct timeval *tv)
+{
+    struct timeval newtv;
+    s64            nsec;
+    suseconds_t    usec;
+    
+    if ( !INDEPENDENT_WALLCLOCK() )
+        return;
+    
+    write_lock_irq(&xtime_lock);
+    
+    /*
+     * Ensure we don't get blocked for a long time so that our time delta
+     * overflows. If that were to happen then our shadow time values would
+     * be stale, so we can retry with fresh ones.
+     */
+ again:
+    usec = tv->tv_usec - __get_time_delta_usecs();
+
+    nsec = shadow_system_time - processed_system_time;
+    __normalize_time(&tv->tv_sec, &nsec);
+    usec -= (long)nsec / 1000L;
+
+    if ( unlikely(!TIME_VALUES_UP_TO_DATE) )
+    {
+        __get_time_values_from_xen();
+        goto again;
+    }
+    tv->tv_usec = usec;
+    
+    HANDLE_USEC_UNDERFLOW(*tv);
+    
+    newtv = *tv;
+    
+    tv->tv_usec -= (jiffies - wall_jiffies) * (1000000 / HZ);
+    HANDLE_USEC_UNDERFLOW(*tv);
+
+    xtime = *tv;
+    time_adjust = 0;  /* stop active adjtime() */
+    time_status |= STA_UNSYNC;
+    time_maxerror = NTP_PHASE_LIMIT;
+    time_esterror = NTP_PHASE_LIMIT;
+
+    /* Reset all our running time counts. They make no sense now. */
+    last_seen_tv.tv_sec = 0;
+    last_update_from_xen = 0;
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+    if ( xen_start_info.flags & SIF_INITDOMAIN )
+    {
+        dom0_op_t op;
+        last_update_to_rtc = last_update_to_xen = 0;
+        op.cmd = DOM0_SETTIME;
+        op.u.settime.secs        = newtv.tv_sec;
+        op.u.settime.usecs       = newtv.tv_usec;
+        op.u.settime.system_time = shadow_system_time;
+        write_unlock_irq(&xtime_lock);
+        HYPERVISOR_dom0_op(&op);
+    }
+    else
+#endif
+    {
+        write_unlock_irq(&xtime_lock);
+    }
+}
+
+
+asmlinkage long sys_stime(int *tptr)
+{
+    int value;
+    struct timeval tv;
+
+    if ( !capable(CAP_SYS_TIME) )
+        return -EPERM;
+
+    if ( get_user(value, tptr) )
+        return -EFAULT;
+
+    tv.tv_sec  = value;
+    tv.tv_usec = 0;
+
+    do_settimeofday(&tv);
+
+    return 0;
+}
+
+
+/* Convert jiffies to system time. Call with xtime_lock held for reading. */
+static inline u64 __jiffies_to_st(unsigned long j) 
+{
+    return processed_system_time + ((j - jiffies) * NS_PER_TICK);
+}
+
+
+static inline void do_timer_interrupt(int irq, void *dev_id,
+                                      struct pt_regs *regs)
+{
+    s64 delta;
+    unsigned long ticks = 0;
+    long sec_diff;
+
+    do {
+        __get_time_values_from_xen();
+        
+        delta = (s64)(shadow_system_time + 
+                      ((s64)__get_time_delta_usecs() * 1000LL) -
+                      processed_system_time);
+    }
+    while ( !TIME_VALUES_UP_TO_DATE );
+
+    if ( unlikely(delta < 0) )
+    {
+        printk("Timer ISR: Time went backwards: %lld\n", delta);
+        return;
+    }
+
+    /* Process elapsed jiffies since last call. */
+    while ( delta >= NS_PER_TICK )
+    {
+        ticks++;
+        delta -= NS_PER_TICK;
+        processed_system_time += NS_PER_TICK;
+    }
+
+    if ( ticks != 0 )
+    {
+        do_timer_ticks(ticks);
+
+        if ( user_mode(regs) )
+            update_process_times_us(ticks, 0);
+        else
+            update_process_times_us(0, ticks);
+    }
+
+    /*
+     * Take synchronised time from Xen once a minute if we're not
+     * synchronised ourselves, and we haven't chosen to keep an independent
+     * time base.
+     */
+    if ( !INDEPENDENT_WALLCLOCK() &&
+         ((time_status & STA_UNSYNC) != 0) &&
+         (xtime.tv_sec > (last_update_from_xen + 60)) )
+    {
+        /* Adjust shadow timeval for jiffies that haven't updated xtime yet. */
+        shadow_tv.tv_usec -= (jiffies - wall_jiffies) * (1000000/HZ);
+        HANDLE_USEC_UNDERFLOW(shadow_tv);
+
+        /*
+         * Reset our running time counts if they are invalidated by a warp
+         * backwards of more than 500ms.
+         */
+        sec_diff = xtime.tv_sec - shadow_tv.tv_sec;
+        if ( unlikely(abs(sec_diff) > 1) ||
+             unlikely(((sec_diff * 1000000) + 
+                       xtime.tv_usec - shadow_tv.tv_usec) > 500000) )
+        {
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+            last_update_to_rtc = last_update_to_xen = 0;
+#endif
+            last_seen_tv.tv_sec = 0;
+        }
+
+        /* Update our unsynchronised xtime appropriately. */
+        xtime = shadow_tv;
+
+        last_update_from_xen = xtime.tv_sec;
+    }
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+    if ( (xen_start_info.flags & SIF_INITDOMAIN) &&
+         ((time_status & STA_UNSYNC) == 0) )
+    {
+        /* Send synchronised time to Xen approximately every minute. */
+        if ( xtime.tv_sec > (last_update_to_xen + 60) )
+        {
+            dom0_op_t op;
+            struct timeval tv = xtime;
+
+            tv.tv_usec += (jiffies - wall_jiffies) * (1000000/HZ);
+            HANDLE_USEC_OVERFLOW(tv);
+
+            op.cmd = DOM0_SETTIME;
+            op.u.settime.secs        = tv.tv_sec;
+            op.u.settime.usecs       = tv.tv_usec;
+            op.u.settime.system_time = shadow_system_time;
+            HYPERVISOR_dom0_op(&op);
+
+            last_update_to_xen = xtime.tv_sec;
+        }
+
+        /*
+         * If we have an externally synchronized Linux clock, then update CMOS
+         * clock accordingly every ~11 minutes. Set_rtc_mmss() has to be called
+         * as close as possible to 500 ms before the new second starts.
+         */
+        if ( (xtime.tv_sec > (last_update_to_rtc + 660)) &&
+             (xtime.tv_usec >= (500000 - ((unsigned) tick) / 2)) &&
+             (xtime.tv_usec <= (500000 + ((unsigned) tick) / 2)) )
+        {
+            if ( set_rtc_mmss(xtime.tv_sec) == 0 )
+                last_update_to_rtc = xtime.tv_sec;
+            else
+                last_update_to_rtc = xtime.tv_sec - 600;
+        }
+    }
+#endif
+}
+
+
+static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+    write_lock(&xtime_lock);
+    do_timer_interrupt(irq, NULL, regs);
+    write_unlock(&xtime_lock);
+}
+
+static struct irqaction irq_timer = {
+    timer_interrupt, 
+    SA_INTERRUPT, 
+    0, 
+    "timer", 
+    NULL, 
+    NULL
+};
+
+
+/*
+ * This function works out when the the next timer function has to be
+ * executed (by looking at the timer list) and sets the Xen one-shot
+ * domain timer to the appropriate value. This is typically called in
+ * cpu_idle() before the domain blocks.
+ * 
+ * The function returns a non-0 value on error conditions.
+ * 
+ * It must be called with interrupts disabled.
+ */
+extern spinlock_t timerlist_lock;
+int set_timeout_timer(void)
+{
+    struct timer_list *timer;
+    u64 alarm = 0;
+    int ret = 0;
+
+    spin_lock(&timerlist_lock);
+
+    /*
+     * This is safe against long blocking (since calculations are not based on 
+     * TSC deltas). It is also safe against warped system time since
+     * suspend-resume is cooperative and we would first get locked out. It is 
+     * safe against normal updates of jiffies since interrupts are off.
+     */
+    if ( (timer = next_timer_event()) != NULL )
+        alarm = __jiffies_to_st(timer->expires);
+
+    /* Tasks on the timer task queue expect to be executed on the next tick. */
+    if ( TQ_ACTIVE(tq_timer) )
+        alarm = __jiffies_to_st(jiffies + 1);
+
+    /* Failure is pretty bad, but we'd best soldier on. */
+    if ( HYPERVISOR_set_timer_op(alarm) != 0 )
+        ret = -1;
+    
+    spin_unlock(&timerlist_lock);
+
+    return ret;
+}
+
+
+/* Time debugging. */
+static void dbg_time_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+    unsigned long flags, j;
+    u64 s_now, j_st;
+    struct timeval s_tv, tv;
+
+    struct timer_list *timer;
+    u64 t_st;
+
+    read_lock_irqsave(&xtime_lock, flags);
+    s_tv.tv_sec  = shadow_tv.tv_sec;
+    s_tv.tv_usec = shadow_tv.tv_usec;
+    s_now        = shadow_system_time;
+    read_unlock_irqrestore(&xtime_lock, flags);
+
+    do_gettimeofday(&tv);
+
+    j = jiffies;
+    j_st = __jiffies_to_st(j);
+
+    timer = next_timer_event();
+    t_st = __jiffies_to_st(timer->expires);
+
+    printk(KERN_ALERT "time: shadow_st=0x%X:%08X\n",
+           (u32)(s_now>>32), (u32)s_now);
+    printk(KERN_ALERT "time: wct=%lds %ldus shadow_wct=%lds %ldus\n",
+           tv.tv_sec, tv.tv_usec, s_tv.tv_sec, s_tv.tv_usec);
+    printk(KERN_ALERT "time: jiffies=%lu(0x%X:%08X) timeout=%lu(0x%X:%08X)\n",
+           jiffies,(u32)(j_st>>32), (u32)j_st,
+           timer->expires,(u32)(t_st>>32), (u32)t_st);
+    printk(KERN_ALERT "time: processed_system_time=0x%X:%08X\n",
+           (u32)(processed_system_time>>32), (u32)processed_system_time);
+
+#ifdef CONFIG_MAGIC_SYSRQ
+    handle_sysrq('t',NULL,NULL,NULL);
+#endif
+}
+
+static struct irqaction dbg_time = {
+    dbg_time_int, 
+    SA_SHIRQ, 
+    0, 
+    "timer_dbg", 
+    &dbg_time_int,
+    NULL
+};
+
+void __init time_init(void)
+{
+    unsigned long long alarm;
+    u64 __cpu_khz, __cpu_ghz, cpu_freq, scale, scale2;
+    unsigned int cpu_ghz;
+
+    __cpu_khz = __cpu_ghz = cpu_freq = HYPERVISOR_shared_info->cpu_freq;
+    do_div(__cpu_khz, 1000UL);
+    cpu_khz = (u32)__cpu_khz;
+    do_div(__cpu_ghz, 1000000000UL);
+    cpu_ghz = (unsigned int)__cpu_ghz;
+
+    printk("Xen reported: %lu.%03lu MHz processor.\n", 
+           cpu_khz / 1000, cpu_khz % 1000);
+
+    xtime.tv_sec = HYPERVISOR_shared_info->wc_sec;
+    xtime.tv_usec = HYPERVISOR_shared_info->wc_usec;
+    processed_system_time = shadow_system_time;
+
+    for ( rdtsc_bitshift = 0; cpu_ghz != 0; rdtsc_bitshift++, cpu_ghz >>= 1 )
+        continue;
+
+    scale = 1000000LL << (32 + rdtsc_bitshift);
+    do_div(scale, (u32)cpu_freq);
+
+    if ( (cpu_freq >> 32) != 0 )
+    {
+        scale2 = 1000000LL << rdtsc_bitshift;
+        do_div(scale2, (u32)(cpu_freq>>32));
+        scale += scale2;
+    }
+
+    st_scale_f = scale & 0xffffffff;
+    st_scale_i = scale >> 32;
+
+    __get_time_values_from_xen();
+    processed_system_time = shadow_system_time;
+
+    time_irq  = bind_virq_to_irq(VIRQ_TIMER);
+    debug_irq = bind_virq_to_irq(VIRQ_DEBUG);
+
+    (void)setup_irq(time_irq, &irq_timer);
+    (void)setup_irq(debug_irq, &dbg_time);
+
+    rdtscll(alarm);
+}
+
+void time_suspend(void)
+{
+}
+
+void time_resume(void)
+{
+    unsigned long flags;
+    write_lock_irqsave(&xtime_lock, flags);
+    /* Get timebases for new environment. */ 
+    __get_time_values_from_xen();
+    /* Reset our own concept of passage of system time. */
+    processed_system_time = shadow_system_time;
+    /* Accept a warp in UTC (wall-clock) time. */
+    last_seen_tv.tv_sec = 0;
+    /* Make sure we resync UTC time with Xen on next timer interrupt. */
+    last_update_from_xen = 0;
+    write_unlock_irqrestore(&xtime_lock, flags);
+}
+
+/*
+ * /proc/sys/xen: This really belongs in another file. It can stay here for
+ * now however.
+ */
+static ctl_table xen_subtable[] = {
+    {1, "independent_wallclock", &independent_wallclock,
+     sizeof(independent_wallclock), 0644, NULL, proc_dointvec},
+    {0}
+};
+static ctl_table xen_table[] = {
+    {123, "xen", NULL, 0, 0555, xen_subtable},
+    {0}
+};
+static int __init xen_sysctl_init(void)
+{
+    (void)register_sysctl_table(xen_table, 0);
+    return 0;
+}
+__initcall(xen_sysctl_init);
diff --git a/linux-2.4-xen-sparse/arch/xen/kernel/traps.c b/linux-2.4-xen-sparse/arch/xen/kernel/traps.c
new file mode 100644
index 0000000000..b87fc3804c
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/kernel/traps.c
@@ -0,0 +1,619 @@
+/*
+ *  linux/arch/i386/traps.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ *	Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+/*
+ * 'Traps.c' handles hardware traps and faults after we have saved some
+ * state in 'asm.s'.
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/highmem.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/debugreg.h>
+#include <asm/desc.h>
+#include <asm/i387.h>
+
+#include <asm/smp.h>
+#include <asm/pgalloc.h>
+
+#include <asm/hypervisor.h>
+
+#include <linux/irq.h>
+#include <linux/module.h>
+
+asmlinkage int system_call(void);
+asmlinkage void lcall7(void);
+asmlinkage void lcall27(void);
+
+asmlinkage void divide_error(void);
+asmlinkage void debug(void);
+asmlinkage void int3(void);
+asmlinkage void overflow(void);
+asmlinkage void bounds(void);
+asmlinkage void invalid_op(void);
+asmlinkage void device_not_available(void);
+asmlinkage void double_fault(void);
+asmlinkage void coprocessor_segment_overrun(void);
+asmlinkage void invalid_TSS(void);
+asmlinkage void segment_not_present(void);
+asmlinkage void stack_segment(void);
+asmlinkage void general_protection(void);
+asmlinkage void page_fault(void);
+asmlinkage void coprocessor_error(void);
+asmlinkage void simd_coprocessor_error(void);
+asmlinkage void alignment_check(void);
+asmlinkage void fixup_4gb_segment(void);
+asmlinkage void machine_check(void);
+
+int kstack_depth_to_print = 24;
+
+
+/*
+ * If the address is either in the .text section of the
+ * kernel, or in the vmalloc'ed module regions, it *may* 
+ * be the address of a calling routine
+ */
+
+#ifdef CONFIG_MODULES
+
+extern struct module *module_list;
+extern struct module kernel_module;
+
+static inline int kernel_text_address(unsigned long addr)
+{
+	int retval = 0;
+	struct module *mod;
+
+	if (addr >= (unsigned long) &_stext &&
+	    addr <= (unsigned long) &_etext)
+		return 1;
+
+	for (mod = module_list; mod != &kernel_module; mod = mod->next) {
+		/* mod_bound tests for addr being inside the vmalloc'ed
+		 * module area. Of course it'd be better to test only
+		 * for the .text subset... */
+		if (mod_bound(addr, 0, mod)) {
+			retval = 1;
+			break;
+		}
+	}
+
+	return retval;
+}
+
+#else
+
+static inline int kernel_text_address(unsigned long addr)
+{
+	return (addr >= (unsigned long) &_stext &&
+		addr <= (unsigned long) &_etext);
+}
+
+#endif
+
+void show_trace(unsigned long * stack)
+{
+	int i;
+	unsigned long addr;
+
+	if (!stack)
+		stack = (unsigned long*)&stack;
+
+	printk("Call Trace: ");
+	i = 1;
+	while (((long) stack & (THREAD_SIZE-1)) != 0) {
+		addr = *stack++;
+		if (kernel_text_address(addr)) {
+			if (i && ((i % 6) == 0))
+				printk("\n   ");
+			printk("[<%08lx>] ", addr);
+			i++;
+		}
+	}
+	printk("\n");
+}
+
+void show_trace_task(struct task_struct *tsk)
+{
+	unsigned long esp = tsk->thread.esp;
+
+	/* User space on another CPU? */
+	if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1))
+		return;
+	show_trace((unsigned long *)esp);
+}
+
+void show_stack(unsigned long * esp)
+{
+	unsigned long *stack;
+	int i;
+
+	// debugging aid: "show_stack(NULL);" prints the
+	// back trace for this cpu.
+
+	if(esp==NULL)
+		esp=(unsigned long*)&esp;
+
+	stack = esp;
+	for(i=0; i < kstack_depth_to_print; i++) {
+		if (((long) stack & (THREAD_SIZE-1)) == 0)
+			break;
+		if (i && ((i % 8) == 0))
+			printk("\n       ");
+		printk("%08lx ", *stack++);
+	}
+	printk("\n");
+	show_trace(esp);
+}
+
+void show_registers(struct pt_regs *regs)
+{
+	int in_kernel = 1;
+	unsigned long esp;
+	unsigned short ss;
+
+	esp = (unsigned long) (&regs->esp);
+	ss = __KERNEL_DS;
+	if (regs->xcs & 2) {
+		in_kernel = 0;
+		esp = regs->esp;
+		ss = regs->xss & 0xffff;
+	}
+	printk(KERN_ALERT "CPU:    %d\n", smp_processor_id() );
+	printk(KERN_ALERT "EIP:    %04x:[<%08lx>]    %s\n",
+	       0xffff & regs->xcs, regs->eip, print_tainted());
+	printk(KERN_ALERT "EFLAGS: %08lx\n",regs->eflags);
+	printk(KERN_ALERT "eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
+		regs->eax, regs->ebx, regs->ecx, regs->edx);
+	printk(KERN_ALERT "esi: %08lx   edi: %08lx   ebp: %08lx   esp: %08lx\n",
+		regs->esi, regs->edi, regs->ebp, esp);
+	printk(KERN_ALERT "ds: %04x   es: %04x   ss: %04x\n",
+		regs->xds & 0xffff, regs->xes & 0xffff, ss);
+	printk(KERN_ALERT "Process %s (pid: %d, stackpage=%08lx)",
+		current->comm, current->pid, 4096+(unsigned long)current);
+	/*
+	 * When in-kernel, we also print out the stack and code at the
+	 * time of the fault..
+	 */
+	if (in_kernel) {
+
+		printk(KERN_ALERT "\nStack: ");
+		show_stack((unsigned long*)esp);
+
+#if 0
+                {
+                        int i;
+			printk(KERN_ALERT "\nCode: ");
+			if(regs->eip < PAGE_OFFSET)
+			        goto bad;
+
+			for(i=0;i<20;i++)
+			{
+			        unsigned char c;
+			        if(__get_user(c, &((unsigned char*)regs->eip)[i])) {
+bad:
+				        printk(KERN_ALERT " Bad EIP value.");
+					break;
+				}
+				printk("%02x ", c);
+			}
+		}
+#endif
+	}
+	printk(KERN_ALERT "\n");
+}	
+
+spinlock_t die_lock = SPIN_LOCK_UNLOCKED;
+
+void die(const char * str, struct pt_regs * regs, long err)
+{
+	console_verbose();
+	spin_lock_irq(&die_lock);
+	bust_spinlocks(1);
+	printk("%s: %04lx\n", str, err & 0xffff);
+	show_registers(regs);
+	bust_spinlocks(0);
+	spin_unlock_irq(&die_lock);
+	do_exit(SIGSEGV);
+}
+
+static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
+{
+	if (!(2 & regs->xcs))
+		die(str, regs, err);
+}
+
+
+static void inline do_trap(int trapnr, int signr, char *str,
+			   struct pt_regs * regs, long error_code,
+                           siginfo_t *info)
+{
+	if (!(regs->xcs & 2))
+		goto kernel_trap;
+
+	/*trap_signal:*/ {
+		struct task_struct *tsk = current;
+		tsk->thread.error_code = error_code;
+		tsk->thread.trap_no = trapnr;
+		if (info)
+			force_sig_info(signr, info, tsk);
+		else
+			force_sig(signr, tsk);
+		return;
+	}
+
+	kernel_trap: {
+		unsigned long fixup = search_exception_table(regs->eip);
+		if (fixup)
+			regs->eip = fixup;
+		else	
+			die(str, regs, error_code);
+		return;
+	}
+}
+
+#define DO_ERROR(trapnr, signr, str, name) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+	do_trap(trapnr, signr, str, regs, error_code, NULL); \
+}
+
+#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+	siginfo_t info; \
+	info.si_signo = signr; \
+	info.si_errno = 0; \
+	info.si_code = sicode; \
+	info.si_addr = (void *)siaddr; \
+	do_trap(trapnr, signr, str, regs, error_code, &info); \
+}
+
+DO_ERROR_INFO( 0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->eip)
+DO_ERROR( 3, SIGTRAP, "int3", int3)
+DO_ERROR( 4, SIGSEGV, "overflow", overflow)
+DO_ERROR( 5, SIGSEGV, "bounds", bounds)
+DO_ERROR_INFO( 6, SIGILL,  "invalid operand", invalid_op, ILL_ILLOPN, regs->eip)
+DO_ERROR( 7, SIGSEGV, "device not available", device_not_available)
+DO_ERROR( 8, SIGSEGV, "double fault", double_fault)
+DO_ERROR( 9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun)
+DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
+DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
+DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
+DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
+DO_ERROR(18, SIGBUS, "machine check", machine_check)
+
+asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
+{
+	/*
+	 * If we trapped on an LDT access then ensure that the default_ldt is
+	 * loaded, if nothing else. We load default_ldt lazily because LDT
+	 * switching costs time and many applications don't need it.
+	 */
+	if ( unlikely((error_code & 6) == 4) )
+	{
+		unsigned long ldt;
+		__asm__ __volatile__ ( "sldt %0" : "=r" (ldt) );
+		if ( ldt == 0 )
+		{
+                    xen_set_ldt((unsigned long)&default_ldt[0], 5);
+		    return;
+		}
+	}
+
+	if (!(regs->xcs & 2))
+		goto gp_in_kernel;
+
+	current->thread.error_code = error_code;
+	current->thread.trap_no = 13;
+	force_sig(SIGSEGV, current);
+	return;
+
+gp_in_kernel:
+	{
+		unsigned long fixup;
+		fixup = search_exception_table(regs->eip);
+		if (fixup) {
+			regs->eip = fixup;
+			return;
+		}
+		die("general protection fault", regs, error_code);
+	}
+}
+
+
+asmlinkage void do_debug(struct pt_regs * regs, long error_code)
+{
+    unsigned int condition;
+    struct task_struct *tsk = current;
+    siginfo_t info;
+
+    condition = HYPERVISOR_get_debugreg(6);
+
+    /* Mask out spurious debug traps due to lazy DR7 setting */
+    if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
+        if (!tsk->thread.debugreg[7])
+            goto clear_dr7;
+    }
+
+    /* Save debug status register where ptrace can see it */
+    tsk->thread.debugreg[6] = condition;
+
+    /* Mask out spurious TF errors due to lazy TF clearing */
+    if (condition & DR_STEP) {
+        /*
+         * The TF error should be masked out only if the current
+         * process is not traced and if the TRAP flag has been set
+         * previously by a tracing process (condition detected by
+         * the PT_DTRACE flag); remember that the i386 TRAP flag
+         * can be modified by the process itself in user mode,
+         * allowing programs to debug themselves without the ptrace()
+         * interface.
+         */
+        if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE)
+            goto clear_TF;
+    }
+
+    /* Ok, finally something we can handle */
+    tsk->thread.trap_no = 1;
+    tsk->thread.error_code = error_code;
+    info.si_signo = SIGTRAP;
+    info.si_errno = 0;
+    info.si_code = TRAP_BRKPT;
+        
+    /* If this is a kernel mode trap, save the user PC on entry to 
+     * the kernel, that's what the debugger can make sense of.
+     */
+    info.si_addr = ((regs->xcs & 2) == 0) ? (void *)tsk->thread.eip : 
+                                            (void *)regs->eip;
+    force_sig_info(SIGTRAP, &info, tsk);
+
+    /* Disable additional traps. They'll be re-enabled when
+     * the signal is delivered.
+     */
+ clear_dr7:
+    HYPERVISOR_set_debugreg(7, 0);
+    return;
+
+ clear_TF:
+    regs->eflags &= ~TF_MASK;
+    return;
+}
+
+
+/*
+ * Note that we play around with the 'TS' bit in an attempt to get
+ * the correct behaviour even in the presence of the asynchronous
+ * IRQ13 behaviour
+ */
+void math_error(void *eip)
+{
+	struct task_struct * task;
+	siginfo_t info;
+	unsigned short cwd, swd;
+
+	/*
+	 * Save the info for the exception handler and clear the error.
+	 */
+	task = current;
+	save_init_fpu(task);
+	task->thread.trap_no = 16;
+	task->thread.error_code = 0;
+	info.si_signo = SIGFPE;
+	info.si_errno = 0;
+	info.si_code = __SI_FAULT;
+	info.si_addr = eip;
+	/*
+	 * (~cwd & swd) will mask out exceptions that are not set to unmasked
+	 * status.  0x3f is the exception bits in these regs, 0x200 is the
+	 * C1 reg you need in case of a stack fault, 0x040 is the stack
+	 * fault bit.  We should only be taking one exception at a time,
+	 * so if this combination doesn't produce any single exception,
+	 * then we have a bad program that isn't syncronizing its FPU usage
+	 * and it will suffer the consequences since we won't be able to
+	 * fully reproduce the context of the exception
+	 */
+	cwd = get_fpu_cwd(task);
+	swd = get_fpu_swd(task);
+	switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) {
+		case 0x000:
+		default:
+			break;
+		case 0x001: /* Invalid Op */
+		case 0x041: /* Stack Fault */
+		case 0x241: /* Stack Fault | Direction */
+			info.si_code = FPE_FLTINV;
+			break;
+		case 0x002: /* Denormalize */
+		case 0x010: /* Underflow */
+			info.si_code = FPE_FLTUND;
+			break;
+		case 0x004: /* Zero Divide */
+			info.si_code = FPE_FLTDIV;
+			break;
+		case 0x008: /* Overflow */
+			info.si_code = FPE_FLTOVF;
+			break;
+		case 0x020: /* Precision */
+			info.si_code = FPE_FLTRES;
+			break;
+	}
+	force_sig_info(SIGFPE, &info, task);
+}
+
+asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code)
+{
+	ignore_irq13 = 1;
+	math_error((void *)regs->eip);
+}
+
+void simd_math_error(void *eip)
+{
+	struct task_struct * task;
+	siginfo_t info;
+	unsigned short mxcsr;
+
+	/*
+	 * Save the info for the exception handler and clear the error.
+	 */
+	task = current;
+	save_init_fpu(task);
+	task->thread.trap_no = 19;
+	task->thread.error_code = 0;
+	info.si_signo = SIGFPE;
+	info.si_errno = 0;
+	info.si_code = __SI_FAULT;
+	info.si_addr = eip;
+	/*
+	 * The SIMD FPU exceptions are handled a little differently, as there
+	 * is only a single status/control register.  Thus, to determine which
+	 * unmasked exception was caught we must mask the exception mask bits
+	 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
+	 */
+	mxcsr = get_fpu_mxcsr(task);
+	switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
+		case 0x000:
+		default:
+			break;
+		case 0x001: /* Invalid Op */
+			info.si_code = FPE_FLTINV;
+			break;
+		case 0x002: /* Denormalize */
+		case 0x010: /* Underflow */
+			info.si_code = FPE_FLTUND;
+			break;
+		case 0x004: /* Zero Divide */
+			info.si_code = FPE_FLTDIV;
+			break;
+		case 0x008: /* Overflow */
+			info.si_code = FPE_FLTOVF;
+			break;
+		case 0x020: /* Precision */
+			info.si_code = FPE_FLTRES;
+			break;
+	}
+	force_sig_info(SIGFPE, &info, task);
+}
+
+asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs,
+					  long error_code)
+{
+	if (cpu_has_xmm) {
+		/* Handle SIMD FPU exceptions on PIII+ processors. */
+		ignore_irq13 = 1;
+		simd_math_error((void *)regs->eip);
+	} else {
+		die_if_kernel("cache flush denied", regs, error_code);
+		current->thread.trap_no = 19;
+		current->thread.error_code = error_code;
+		force_sig(SIGSEGV, current);
+	}
+}
+
+/*
+ *  'math_state_restore()' saves the current math information in the
+ * old math state array, and gets the new ones from the current task
+ *
+ * Careful.. There are problems with IBM-designed IRQ13 behaviour.
+ * Don't touch unless you *really* know how it works.
+ */
+asmlinkage void math_state_restore(struct pt_regs regs)
+{
+	/*
+	 * A trap in kernel mode can be ignored. It'll be the fast XOR or
+	 * copying libraries, which will correctly save/restore state and
+	 * reset the TS bit in CR0.
+	 */
+	if ( (regs.xcs & 2) == 0 )
+		return;
+
+	if (current->used_math) {
+		restore_fpu(current);
+	} else {
+		init_fpu();
+	}
+	current->flags |= PF_USEDFPU;	/* So we fnsave on switch_to() */
+}
+
+
+#define _set_gate(gate_addr,type,dpl,addr) \
+do { \
+  int __d0, __d1; \
+  __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
+	"movw %4,%%dx\n\t" \
+	"movl %%eax,%0\n\t" \
+	"movl %%edx,%1" \
+	:"=m" (*((long *) (gate_addr))), \
+	 "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
+	:"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
+	 "3" ((char *) (addr)),"2" (__KERNEL_CS << 16)); \
+} while (0)
+
+static void __init set_call_gate(void *a, void *addr)
+{
+	_set_gate(a,12,3,addr);
+}
+
+
+/* NB. All these are "trap gates" (i.e. events_mask isn't cleared). */
+static trap_info_t trap_table[] = {
+    {  0, 0, __KERNEL_CS, (unsigned long)divide_error                },
+    {  1, 0, __KERNEL_CS, (unsigned long)debug                       },
+    {  3, 3, __KERNEL_CS, (unsigned long)int3                        },
+    {  4, 3, __KERNEL_CS, (unsigned long)overflow                    },
+    {  5, 3, __KERNEL_CS, (unsigned long)bounds                      },
+    {  6, 0, __KERNEL_CS, (unsigned long)invalid_op                  },
+    {  7, 0, __KERNEL_CS, (unsigned long)device_not_available        },
+    {  8, 0, __KERNEL_CS, (unsigned long)double_fault                },
+    {  9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun },
+    { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS                 },
+    { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present         },
+    { 12, 0, __KERNEL_CS, (unsigned long)stack_segment               },
+    { 13, 0, __KERNEL_CS, (unsigned long)general_protection          },
+    { 14, 0, __KERNEL_CS, (unsigned long)page_fault                  },
+    { 15, 0, __KERNEL_CS, (unsigned long)fixup_4gb_segment           },
+    { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error           },
+    { 17, 0, __KERNEL_CS, (unsigned long)alignment_check             },
+    { 18, 0, __KERNEL_CS, (unsigned long)machine_check               },
+    { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error      },
+    { SYSCALL_VECTOR, 
+          3, __KERNEL_CS, (unsigned long)system_call                 },
+    {  0, 0,           0, 0                           }
+};
+
+
+void __init trap_init(void)
+{
+    HYPERVISOR_set_trap_table(trap_table);    
+
+    /*
+     * The default LDT is a single-entry callgate to lcall7 for iBCS and a
+     * callgate to lcall27 for Solaris/x86 binaries.
+     */
+    clear_page(&default_ldt[0]);
+    set_call_gate(&default_ldt[0],lcall7);
+    set_call_gate(&default_ldt[4],lcall27);
+    __make_page_readonly(&default_ldt[0]);
+
+    cpu_init();
+}
diff --git a/linux-2.4-xen-sparse/arch/xen/lib/Makefile b/linux-2.4-xen-sparse/arch/xen/lib/Makefile
new file mode 100644
index 0000000000..5e00fdc135
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/lib/Makefile
@@ -0,0 +1,15 @@
+
+.S.o:
+	$(CC) $(AFLAGS) -c $< -o $*.o
+
+L_TARGET = lib.a
+
+obj-y = checksum.o old-checksum.o delay.o \
+	usercopy.o getuser.o \
+	memcpy.o strstr.o xen_proc.o
+
+obj-$(CONFIG_X86_USE_3DNOW) += mmx.o
+obj-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
+obj-$(CONFIG_DEBUG_IOVIRT)  += iodebug.o
+
+include $(TOPDIR)/Rules.make
diff --git a/linux-2.4-xen-sparse/arch/xen/lib/delay.c b/linux-2.4-xen-sparse/arch/xen/lib/delay.c
new file mode 100644
index 0000000000..0035bed074
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/lib/delay.c
@@ -0,0 +1,52 @@
+/*
+ *	Precise Delay Loops for i386
+ *
+ *	Copyright (C) 1993 Linus Torvalds
+ *	Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ *	The __delay function must _NOT_ be inlined as its execution time
+ *	depends wildly on alignment on many x86 processors. The additional
+ *	jump magic is needed to get the timing stable on all the CPU's
+ *	we have to worry about.
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <asm/processor.h>
+#include <asm/delay.h>
+
+#ifdef CONFIG_SMP
+#include <asm/smp.h>
+#endif
+
+void __delay(unsigned long loops)
+{
+	unsigned long bclock, now;
+	
+	rdtscl(bclock);
+	do
+	{
+		rep_nop();
+		rdtscl(now);
+	} while ((now-bclock) < loops);
+}
+
+inline void __const_udelay(unsigned long xloops)
+{
+	int d0;
+	__asm__("mull %0"
+		:"=d" (xloops), "=&a" (d0)
+		:"1" (xloops),"0" (current_cpu_data.loops_per_jiffy));
+        __delay(xloops * HZ);
+}
+
+void __udelay(unsigned long usecs)
+{
+	__const_udelay(usecs * 0x000010c6);  /* 2**32 / 1000000 */
+}
+
+void __ndelay(unsigned long nsecs)
+{
+	__const_udelay(nsecs * 0x00005);  /* 2**32 / 1000000000 (rounded up) */
+}
diff --git a/linux-2.4-xen-sparse/arch/xen/mm/Makefile b/linux-2.4-xen-sparse/arch/xen/mm/Makefile
new file mode 100644
index 0000000000..d0d16114b6
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/mm/Makefile
@@ -0,0 +1,16 @@
+#
+# Makefile for the linux i386-specific parts of the memory manager.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definition is now in the main makefile...
+
+O_TARGET := mm.o
+
+obj-y	 := init.o fault.o extable.o pageattr.o hypervisor.o ioremap.o
+
+export-objs := pageattr.o
+
+include $(TOPDIR)/Rules.make
diff --git a/linux-2.4-xen-sparse/arch/xen/mm/fault.c b/linux-2.4-xen-sparse/arch/xen/mm/fault.c
new file mode 100644
index 0000000000..7db6463e09
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/mm/fault.c
@@ -0,0 +1,302 @@
+/*
+ *  linux/arch/i386/mm/fault.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h>		/* For unblank_screen() */
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/hardirq.h>
+
+extern void die(const char *,struct pt_regs *,long);
+
+pgd_t *cur_pgd;
+
+extern spinlock_t timerlist_lock;
+
+/*
+ * Unlock any spinlocks which will prevent us from getting the
+ * message out (timerlist_lock is acquired through the
+ * console unblank code)
+ */
+void bust_spinlocks(int yes)
+{
+	spin_lock_init(&timerlist_lock);
+	if (yes) {
+		oops_in_progress = 1;
+	} else {
+		int loglevel_save = console_loglevel;
+#ifdef CONFIG_VT
+		unblank_screen();
+#endif
+		oops_in_progress = 0;
+		/*
+		 * OK, the message is on the console.  Now we call printk()
+		 * without oops_in_progress set so that printk will give klogd
+		 * a poke.  Hold onto your hats...
+		 */
+		console_loglevel = 15;		/* NMI oopser may have shut the console up */
+		printk(" ");
+		console_loglevel = loglevel_save;
+	}
+}
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ *
+ * error_code:
+ *	bit 0 == 0 means no page found, 1 means protection fault
+ *	bit 1 == 0 means read, 1 means write
+ *	bit 2 == 0 means kernel, 1 means user-mode
+ */
+asmlinkage void do_page_fault(struct pt_regs *regs, 
+                              unsigned long error_code,
+                              unsigned long address)
+{
+	struct task_struct *tsk = current;
+	struct mm_struct *mm;
+	struct vm_area_struct * vma;
+	unsigned long page;
+	unsigned long fixup;
+	int write;
+	siginfo_t info;
+
+        /* Set the "privileged fault" bit to something sane. */
+        error_code &= 3;
+        error_code |= (regs->xcs & 2) << 1;
+
+	/*
+	 * We fault-in kernel-space virtual memory on-demand. The
+	 * 'reference' page table is init_mm.pgd.
+	 *
+	 * NOTE! We MUST NOT take any locks for this case. We may
+	 * be in an interrupt or a critical region, and should
+	 * only copy the information from the master page table,
+	 * nothing more.
+	 *
+	 * This verifies that the fault happens in kernel space
+	 * (error_code & 4) == 0, and that the fault was not a
+	 * protection error (error_code & 1) == 0.
+	 */
+	if (address >= TASK_SIZE && !(error_code & 5))
+		goto vmalloc_fault;
+
+	mm = tsk->mm;
+	info.si_code = SEGV_MAPERR;
+
+	/*
+	 * If we're in an interrupt or have no user
+	 * context, we must not take the fault..
+	 */
+	if (in_interrupt() || !mm)
+		goto no_context;
+
+	down_read(&mm->mmap_sem);
+
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (error_code & 4) {
+		/*
+		 * accessing the stack below %esp is always a bug.
+		 * The "+ 32" is there due to some instructions (like
+		 * pusha) doing post-decrement on the stack and that
+		 * doesn't show up until later..
+		 */
+		if (address + 32 < regs->esp)
+			goto bad_area;
+	}
+	if (expand_stack(vma, address))
+		goto bad_area;
+/*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+good_area:
+	info.si_code = SEGV_ACCERR;
+	write = 0;
+	switch (error_code & 3) {
+		default:	/* 3: write, present */
+			/* fall through */
+		case 2:		/* write, not present */
+			if (!(vma->vm_flags & VM_WRITE))
+				goto bad_area;
+			write++;
+			break;
+		case 1:		/* read, present */
+			goto bad_area;
+		case 0:		/* read, not present */
+			if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+				goto bad_area;
+	}
+
+ survive:
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+	switch (handle_mm_fault(mm, vma, address, write)) {
+	case 1:
+		tsk->min_flt++;
+		break;
+	case 2:
+		tsk->maj_flt++;
+		break;
+	case 0:
+		goto do_sigbus;
+	default:
+		goto out_of_memory;
+	}
+
+	up_read(&mm->mmap_sem);
+	return;
+
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+bad_area:
+	up_read(&mm->mmap_sem);
+
+	/* User mode accesses just cause a SIGSEGV */
+	if (error_code & 4) {
+		tsk->thread.cr2 = address;
+		/* Kernel addresses are always protection faults */
+		tsk->thread.error_code = error_code | (address >= TASK_SIZE);
+		tsk->thread.trap_no = 14;
+		info.si_signo = SIGSEGV;
+		info.si_errno = 0;
+		/* info.si_code has been set above */
+		info.si_addr = (void *)address;
+		force_sig_info(SIGSEGV, &info, tsk);
+		return;
+	}
+
+no_context:
+	/* Are we prepared to handle this kernel fault?  */
+	if ((fixup = search_exception_table(regs->eip)) != 0) {
+		regs->eip = fixup;
+		return;
+	}
+
+/*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ */
+
+	bust_spinlocks(1);
+
+	if (address < PAGE_SIZE)
+		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
+	else
+		printk(KERN_ALERT "Unable to handle kernel paging request");
+	printk(" at virtual address %08lx\n",address);
+	printk(" printing eip:\n");
+	printk("%08lx\n", regs->eip);
+        page = ((unsigned long *) cur_pgd)[address >> 22];
+        printk(KERN_ALERT "*pde=%08lx(%08lx)\n", page, machine_to_phys(page));
+	if (page & 1) {
+		page &= PAGE_MASK;
+		address &= 0x003ff000;
+                page = machine_to_phys(page);
+		page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
+                printk(KERN_ALERT "*pte=%08lx(%08lx)\n", page, 
+                       machine_to_phys(page));
+	}
+	die("Oops", regs, error_code);
+	bust_spinlocks(0);
+	do_exit(SIGKILL);
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+	if (tsk->pid == 1) {
+		yield();
+		goto survive;
+	}
+	up_read(&mm->mmap_sem);
+	printk("VM: killing process %s\n", tsk->comm);
+	if (error_code & 4)
+		do_exit(SIGKILL);
+	goto no_context;
+
+do_sigbus:
+	up_read(&mm->mmap_sem);
+
+	/*
+	 * Send a sigbus, regardless of whether we were in kernel
+	 * or user mode.
+	 */
+	tsk->thread.cr2 = address;
+	tsk->thread.error_code = error_code;
+	tsk->thread.trap_no = 14;
+	info.si_signo = SIGBUS;
+	info.si_errno = 0;
+	info.si_code = BUS_ADRERR;
+	info.si_addr = (void *)address;
+	force_sig_info(SIGBUS, &info, tsk);
+
+	/* Kernel mode? Handle exceptions or die */
+	if (!(error_code & 4))
+		goto no_context;
+	return;
+
+vmalloc_fault:
+	{
+		/*
+		 * Synchronize this task's top level page-table
+		 * with the 'reference' page table.
+		 *
+		 * Do _not_ use "tsk" here. We might be inside
+		 * an interrupt in the middle of a task switch..
+		 */
+		int offset = __pgd_offset(address);
+		pgd_t *pgd, *pgd_k;
+		pmd_t *pmd, *pmd_k;
+		pte_t *pte_k;
+
+		pgd = offset + cur_pgd;
+		pgd_k = init_mm.pgd + offset;
+
+		if (!pgd_present(*pgd_k))
+			goto no_context;
+		set_pgd(pgd, *pgd_k);
+		
+		pmd = pmd_offset(pgd, address);
+		pmd_k = pmd_offset(pgd_k, address);
+		if (!pmd_present(*pmd_k))
+			goto no_context;
+		set_pmd(pmd, *pmd_k);
+
+		pte_k = pte_offset(pmd_k, address);
+		if (!pte_present(*pte_k))
+			goto no_context;
+		return;
+	}
+}
diff --git a/linux-2.4-xen-sparse/arch/xen/mm/init.c b/linux-2.4-xen-sparse/arch/xen/mm/init.c
new file mode 100644
index 0000000000..88d775bcd4
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/mm/init.c
@@ -0,0 +1,482 @@
+/*
+ *  linux/arch/i386/mm/init.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#ifdef CONFIG_BLK_DEV_INITRD
+#include <linux/blk.h>
+#endif
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/bootmem.h>
+#include <linux/slab.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/dma.h>
+#include <asm/apic.h>
+#include <asm/tlb.h>
+
+/* XEN: We *cannot* use mmx_clear_page() this early. Force dumb memset(). */
+#undef clear_page
+#define clear_page(page) memset((void *)(page), 0, PAGE_SIZE)
+
+mmu_gather_t mmu_gathers[NR_CPUS];
+unsigned long highstart_pfn, highend_pfn;
+static unsigned long totalram_pages;
+static unsigned long totalhigh_pages;
+
+int do_check_pgt_cache(int low, int high)
+{
+    int freed = 0;
+    if(pgtable_cache_size > high) {
+        do {
+            if (!QUICKLIST_EMPTY(pgd_quicklist)) {
+                free_pgd_slow(get_pgd_fast());
+                freed++;
+            }
+            if (!QUICKLIST_EMPTY(pte_quicklist)) {
+                pte_free_slow(pte_alloc_one_fast(NULL, 0));
+                freed++;
+            }
+        } while(pgtable_cache_size > low);
+    }
+    return freed;
+}
+ 
+/*
+ * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
+ * physical space so we can cache the place of the first one and move
+ * around without checking the pgd every time.
+ */
+
+#if CONFIG_HIGHMEM
+pte_t *kmap_pte;
+pgprot_t kmap_prot;
+
+#define kmap_get_fixmap_pte(vaddr) \
+    pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
+
+void __init kmap_init(void)
+{
+    unsigned long kmap_vstart;
+
+    /* cache the first kmap pte */
+    kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
+    kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
+
+    kmap_prot = PAGE_KERNEL;
+}
+#endif /* CONFIG_HIGHMEM */
+
+void show_mem(void)
+{
+    int i, total = 0, reserved = 0;
+    int shared = 0, cached = 0;
+    int highmem = 0;
+
+    printk("Mem-info:\n");
+    show_free_areas();
+    printk("Free swap:       %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
+    i = max_mapnr;
+    while (i-- > 0) {
+        total++;
+        if (PageHighMem(mem_map+i))
+            highmem++;
+        if (PageReserved(mem_map+i))
+            reserved++;
+        else if (PageSwapCache(mem_map+i))
+            cached++;
+        else if (page_count(mem_map+i))
+            shared += page_count(mem_map+i) - 1;
+    }
+    printk("%d pages of RAM\n", total);
+    printk("%d pages of HIGHMEM\n",highmem);
+    printk("%d reserved pages\n",reserved);
+    printk("%d pages shared\n",shared);
+    printk("%d pages swap cached\n",cached);
+    printk("%ld pages in page table cache\n",pgtable_cache_size);
+    show_buffers();
+}
+
+/* References to section boundaries */
+
+extern char _text, _etext, _edata, __bss_start, _end;
+extern char __init_begin, __init_end;
+
+static inline void set_pte_phys (unsigned long vaddr,
+                                 unsigned long phys, pgprot_t prot)
+{
+    pgd_t *pgd;
+    pmd_t *pmd;
+    pte_t *pte;
+
+    pgd = init_mm.pgd + __pgd_offset(vaddr);
+    if (pgd_none(*pgd)) {
+        printk("PAE BUG #00!\n");
+        return;
+    }
+    pmd = pmd_offset(pgd, vaddr);
+    if (pmd_none(*pmd)) {
+        printk("PAE BUG #01!\n");
+        return;
+    }
+    pte = pte_offset(pmd, vaddr);
+
+    set_pte(pte, (pte_t) { phys | pgprot_val(prot) });
+
+    /*
+     * It's enough to flush this one mapping.
+     * (PGE mappings get flushed as well)
+     */
+    __flush_tlb_one(vaddr);
+}
+
+void __set_fixmap(enum fixed_addresses idx, unsigned long phys, 
+                  pgprot_t flags)
+{
+    unsigned long address = __fix_to_virt(idx);
+
+    if (idx >= __end_of_fixed_addresses) {
+        printk("Invalid __set_fixmap\n");
+        return;
+    }
+    set_pte_phys(address, phys, flags);
+}
+
+void clear_fixmap(enum fixed_addresses idx)
+{
+    set_pte_phys(__fix_to_virt(idx), 0, __pgprot(0));
+}
+
+static void __init fixrange_init (unsigned long start, 
+                                  unsigned long end, pgd_t *pgd_base)
+{
+    pgd_t *pgd, *kpgd;
+    pmd_t *pmd, *kpmd;
+    pte_t *pte, *kpte;
+    int i, j;
+    unsigned long vaddr;
+
+    vaddr = start;
+    i = __pgd_offset(vaddr);
+    j = __pmd_offset(vaddr);
+    pgd = pgd_base + i;
+
+    for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
+#if CONFIG_X86_PAE
+        if (pgd_none(*pgd)) {
+            pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+            set_pgd(pgd, __pgd(__pa(pmd) + 0x1));
+            if (pmd != pmd_offset(pgd, 0))
+                printk("PAE BUG #02!\n");
+        }
+        pmd = pmd_offset(pgd, vaddr);
+#else
+        pmd = (pmd_t *)pgd;
+#endif
+        for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) {
+            if (pmd_none(*pmd)) {
+                pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+                clear_page(pte);
+                kpgd = pgd_offset_k((unsigned long)pte);
+                kpmd = pmd_offset(kpgd, (unsigned long)pte);
+                kpte = pte_offset(kpmd, (unsigned long)pte);
+                set_pte(kpte, pte_wrprotect(*kpte));
+                set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte)));
+            }
+            vaddr += PMD_SIZE;
+        }
+        j = 0;
+    }
+}
+
+
+static void __init pagetable_init (void)
+{
+    unsigned long vaddr, end, ram_end;
+    pgd_t *kpgd, *pgd, *pgd_base;
+    int i, j, k;
+    pmd_t *kpmd, *pmd;
+    pte_t *kpte, *pte, *pte_base;
+
+    ram_end = end = (unsigned long)__va(max_low_pfn * PAGE_SIZE);
+    if ( xen_start_info.nr_pages < max_low_pfn )
+        ram_end = (unsigned long)__va(xen_start_info.nr_pages * PAGE_SIZE);
+
+    pgd_base = init_mm.pgd;
+    i = __pgd_offset(PAGE_OFFSET);
+    pgd = pgd_base + i;
+
+    for (; i < PTRS_PER_PGD; pgd++, i++) {
+        vaddr = i*PGDIR_SIZE;
+        if (vaddr >= end)
+            break;
+        pmd = (pmd_t *)pgd;
+        for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
+            vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
+            if (vaddr >= end)
+                break;
+
+            /* Filled in for us already? */
+            if ( pmd_val(*pmd) & _PAGE_PRESENT )
+                continue;
+
+            pte_base = pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+            clear_page(pte_base);
+
+            for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
+                vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
+                if (vaddr >= ram_end)
+                    break;
+                *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
+            }
+            kpgd = pgd_offset_k((unsigned long)pte_base);
+            kpmd = pmd_offset(kpgd, (unsigned long)pte_base);
+            kpte = pte_offset(kpmd, (unsigned long)pte_base);
+            set_pte(kpte, pte_wrprotect(*kpte));
+            set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
+        }
+    }
+
+    /*
+     * Fixed mappings, only the page table structure has to be
+     * created - mappings will be set by set_fixmap():
+     */
+    vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+    fixrange_init(vaddr, HYPERVISOR_VIRT_START, init_mm.pgd);
+
+#if CONFIG_HIGHMEM
+    /*
+     * Permanent kmaps:
+     */
+    vaddr = PKMAP_BASE;
+    fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, init_mm.pgd);
+
+    pgd = init_mm.pgd + __pgd_offset(vaddr);
+    pmd = pmd_offset(pgd, vaddr);
+    pte = pte_offset(pmd, vaddr);
+    pkmap_page_table = pte;
+#endif
+}
+
+static void __init zone_sizes_init(void)
+{
+    unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+    unsigned int max_dma, high, low;
+
+    max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+    low = max_low_pfn;
+    high = highend_pfn;
+
+    if (low < max_dma)
+        zones_size[ZONE_DMA] = low;
+    else {
+        zones_size[ZONE_DMA] = max_dma;
+        zones_size[ZONE_NORMAL] = low - max_dma;
+#ifdef CONFIG_HIGHMEM
+        zones_size[ZONE_HIGHMEM] = high - low;
+#endif
+    }
+    free_area_init(zones_size);
+}
+
+void __init paging_init(void)
+{
+    pagetable_init();
+
+    zone_sizes_init();
+
+    /* Switch to the real shared_info page, and clear the dummy page. */
+    set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info);
+    HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
+    memset(empty_zero_page, 0, sizeof(empty_zero_page));
+
+#ifdef CONFIG_HIGHMEM
+    kmap_init();
+#endif
+}
+
+static inline int page_is_ram (unsigned long pagenr)
+{
+    return 1;
+}
+
+#ifdef CONFIG_HIGHMEM
+void __init one_highpage_init(struct page *page, int free_page)
+{
+    ClearPageReserved(page);
+    set_bit(PG_highmem, &page->flags);
+    atomic_set(&page->count, 1);
+    if ( free_page )
+        __free_page(page);
+    totalhigh_pages++;
+}
+#endif /* CONFIG_HIGHMEM */
+
+static void __init set_max_mapnr_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+    highmem_start_page = mem_map + highstart_pfn;
+    max_mapnr = num_physpages = highend_pfn;
+    num_mappedpages = max_low_pfn;
+#else
+    max_mapnr = num_mappedpages = num_physpages = max_low_pfn;
+#endif
+}
+
+static int __init free_pages_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+    int bad_ppro = 0;
+#endif
+    int reservedpages, pfn;
+
+    /* add only boot_pfn pages of low memory to free list.
+     * max_low_pfn may be sized for
+     * pages yet to be allocated from the hypervisor, or it may be set
+     * to override the xen_start_info amount of memory
+     */
+    int boot_pfn = min(xen_start_info.nr_pages,max_low_pfn);
+
+    /* this will put all low memory onto the freelists */
+    totalram_pages += free_all_bootmem();
+    /* XEN: init and count low-mem pages outside initial allocation. */
+    for (pfn = boot_pfn; pfn < max_low_pfn; pfn++) {
+        ClearPageReserved(&mem_map[pfn]);
+        atomic_set(&mem_map[pfn].count, 1);
+        totalram_pages++;
+    }
+
+    reservedpages = 0;
+    for (pfn = 0; pfn < boot_pfn ; pfn++) {
+        /*
+         * Only count reserved RAM pages
+         */
+        if (page_is_ram(pfn) && PageReserved(mem_map+pfn))
+            reservedpages++;
+    }
+#ifdef CONFIG_HIGHMEM
+    for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--)
+        one_highpage_init((struct page *) (mem_map + pfn),
+                          (pfn < xen_start_info.nr_pages));
+    totalram_pages += totalhigh_pages;
+#endif
+    return reservedpages;
+}
+
+void __init mem_init(void)
+{
+    int codesize, reservedpages, datasize, initsize;
+
+    if (!mem_map)
+        BUG();
+
+#ifdef CONFIG_HIGHMEM
+    /* check that fixmap and pkmap do not overlap */
+    if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
+	printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n");
+	printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n",
+	       PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START);
+	BUG();
+    }
+#endif
+
+    set_max_mapnr_init();
+
+    high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+
+    /* clear the zero-page */
+    memset(empty_zero_page, 0, PAGE_SIZE);
+
+    reservedpages = free_pages_init();
+
+    codesize =  (unsigned long) &_etext - (unsigned long) &_text;
+    datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
+    initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
+
+    printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
+           (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+           max_mapnr << (PAGE_SHIFT-10),
+           codesize >> 10,
+           reservedpages << (PAGE_SHIFT-10),
+           datasize >> 10,
+           initsize >> 10,
+           (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
+        );
+
+    boot_cpu_data.wp_works_ok = 1;
+}
+
+void free_initmem(void)
+{
+    unsigned long addr;
+
+    addr = (unsigned long)(&__init_begin);
+    for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
+        ClearPageReserved(virt_to_page(addr));
+        set_page_count(virt_to_page(addr), 1);
+        free_page(addr);
+        totalram_pages++;
+    }
+    printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+    if (start < end)
+        printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+    for (; start < end; start += PAGE_SIZE) {
+        ClearPageReserved(virt_to_page(start));
+        set_page_count(virt_to_page(start), 1);
+        free_page(start);
+        totalram_pages++;
+    }
+}
+#endif
+
+void si_meminfo(struct sysinfo *val)
+{
+    val->totalram = max_pfn;
+    val->sharedram = 0;
+    val->freeram = nr_free_pages();
+    val->bufferram = atomic_read(&buffermem_pages);
+    val->totalhigh = max_pfn-max_low_pfn;
+    val->freehigh = nr_free_highpages();
+    val->mem_unit = PAGE_SIZE;
+    return;
+}
+
+#if defined(CONFIG_X86_PAE)
+struct kmem_cache_s *pae_pgd_cachep;
+void __init pgtable_cache_init(void)
+{
+    /*
+     * PAE pgds must be 16-byte aligned:
+	 */
+    pae_pgd_cachep = kmem_cache_create("pae_pgd", 32, 0,
+                                       SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, NULL, NULL);
+    if (!pae_pgd_cachep)
+        panic("init_pae(): Cannot alloc pae_pgd SLAB cache");
+}
+#endif /* CONFIG_X86_PAE */
diff --git a/linux-2.4-xen-sparse/arch/xen/mm/ioremap.c b/linux-2.4-xen-sparse/arch/xen/mm/ioremap.c
new file mode 100644
index 0000000000..2f3db057d9
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/mm/ioremap.c
@@ -0,0 +1,266 @@
+/*
+ * arch/xen/mm/ioremap.c
+ *
+ * Re-map IO memory to kernel address space so that we can access it.
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ *
+ * Modifications for Xenolinux (c) 2003-2004 Keir Fraser
+ */
+
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <asm/tlb.h>
+#include <asm/mmu.h>
+
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
+
+/* These hacky macros avoid phys->machine translations. */
+#define __direct_pte(x) ((pte_t) { (x) } )
+#define __direct_mk_pte(page_nr,pgprot) \
+  __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
+#define direct_mk_pte_phys(physpage, pgprot) \
+  __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
+
+static inline void direct_remap_area_pte(pte_t *pte, 
+                                        unsigned long address, 
+                                        unsigned long size,
+					mmu_update_t **v)
+{
+    unsigned long end;
+
+    address &= ~PMD_MASK;
+    end = address + size;
+    if (end > PMD_SIZE)
+        end = PMD_SIZE;
+    if (address >= end)
+        BUG();
+
+    do {
+        (*v)->ptr = virt_to_machine(pte);
+        (*v)++;
+        address += PAGE_SIZE;
+        pte++;
+    } while (address && (address < end));
+}
+
+static inline int direct_remap_area_pmd(struct mm_struct *mm,
+                                        pmd_t *pmd, 
+                                        unsigned long address, 
+                                        unsigned long size,
+					mmu_update_t **v)
+{
+    unsigned long end;
+
+    address &= ~PGDIR_MASK;
+    end = address + size;
+    if (end > PGDIR_SIZE)
+        end = PGDIR_SIZE;
+    if (address >= end)
+        BUG();
+    do {
+        pte_t *pte = pte_alloc(mm, pmd, address);
+        if (!pte)
+            return -ENOMEM;
+        direct_remap_area_pte(pte, address, end - address, v);
+
+        address = (address + PMD_SIZE) & PMD_MASK;
+        pmd++;
+    } while (address && (address < end));
+    return 0;
+}
+ 
+int __direct_remap_area_pages(struct mm_struct *mm,
+			      unsigned long address, 
+			      unsigned long size, 
+			      mmu_update_t *v)
+{
+    pgd_t * dir;
+    unsigned long end = address + size;
+
+    dir = pgd_offset(mm, address);
+    flush_cache_all();
+    if (address >= end)
+        BUG();
+    spin_lock(&mm->page_table_lock);
+    do {
+        pmd_t *pmd = pmd_alloc(mm, dir, address);
+        if (!pmd)
+	    return -ENOMEM;
+        direct_remap_area_pmd(mm, pmd, address, end - address, &v);
+        address = (address + PGDIR_SIZE) & PGDIR_MASK;
+        dir++;
+
+    } while (address && (address < end));
+    spin_unlock(&mm->page_table_lock);
+    flush_tlb_all();
+    return 0;
+}
+
+
+int direct_remap_area_pages(struct mm_struct *mm,
+                            unsigned long address, 
+                            unsigned long machine_addr,
+                            unsigned long size, 
+                            pgprot_t prot,
+                            domid_t  domid)
+{
+    int i;
+    unsigned long start_address;
+#define MAX_DIRECTMAP_MMU_QUEUE 130
+    mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u;
+
+    start_address = address;
+
+    for( i = 0; i < size; i += PAGE_SIZE )
+    {
+	if ( (v - u) == MAX_DIRECTMAP_MMU_QUEUE )
+	{
+	    /* Fill in the PTE pointers. */
+	    __direct_remap_area_pages( mm,
+				       start_address, 
+				       address-start_address, 
+				       u);
+	    
+	    if ( HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0 )
+		return -EFAULT;	    
+	    v = u;
+	    start_address = address;
+	}
+
+	/*
+         * Fill in the machine address: PTE ptr is done later by
+         * __direct_remap_area_pages(). 
+         */
+        v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot);
+
+        machine_addr += PAGE_SIZE;
+        address += PAGE_SIZE; 
+        v++;
+    }
+
+    if ( v != u )
+    {
+	/* get the ptep's filled in */
+	__direct_remap_area_pages(mm,
+                                  start_address, 
+                                  address-start_address, 
+                                  u);	 
+	if ( unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0) )
+	    return -EFAULT;	    
+    }
+    
+    return 0;
+}
+
+
+#endif /* CONFIG_XEN_PRIVILEGED_GUEST */
+
+
+/*
+ * Remap an arbitrary machine address space into the kernel virtual
+ * address space. Needed when a privileged instance of Xenolinux wants
+ * to access space outside its world directly.
+ *
+ * NOTE! We need to allow non-page-aligned mappings too: we will obviously
+ * have to convert them into an offset in a page-aligned mapping, but the
+ * caller shouldn't need to know that small detail.
+ */
+void * __ioremap(unsigned long machine_addr, 
+                 unsigned long size, 
+                 unsigned long flags)
+{
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
+    void * addr;
+    struct vm_struct * area;
+    unsigned long offset, last_addr;
+    pgprot_t prot;
+
+    /* Don't allow wraparound or zero size */
+    last_addr = machine_addr + size - 1;
+    if (!size || last_addr < machine_addr)
+        return NULL;
+
+    /* Mappings have to be page-aligned */
+    offset = machine_addr & ~PAGE_MASK;
+    machine_addr &= PAGE_MASK;
+    size = PAGE_ALIGN(last_addr+1) - machine_addr;
+
+    /* Ok, go for it */
+    area = get_vm_area(size, VM_IOREMAP);
+    if (!area)
+        return NULL;
+    addr = area->addr;
+    prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | 
+                    _PAGE_ACCESSED | flags);
+    if (direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(addr), 
+                                machine_addr, size, prot, 0)) {
+        vfree(addr);
+        return NULL;
+    }
+    return (void *) (offset + (char *)addr);
+#else
+    return NULL;
+#endif
+}
+
+void iounmap(void *addr)
+{
+    vfree((void *)((unsigned long)addr & PAGE_MASK));
+}
+
+/* implementation of boot time ioremap for purpose of provising access
+to the vga console for privileged domains. Unlike boot time ioremap on 
+other architectures, ours is permanent and not reclaimed when then vmalloc
+infrastructure is started */
+
+void __init *bt_ioremap(unsigned long machine_addr, unsigned long size)
+{
+        unsigned long offset, last_addr;
+        unsigned int nrpages;
+        enum fixed_addresses idx;
+
+        /* Don't allow wraparound or zero size */
+        last_addr = machine_addr + size - 1;
+        if (!size || last_addr < machine_addr)
+                return NULL;
+
+        /*
+         * Mappings have to be page-aligned
+         */
+        offset = machine_addr & ~PAGE_MASK;
+        machine_addr &= PAGE_MASK;
+        size = PAGE_ALIGN(last_addr) - machine_addr;
+
+        /*
+         * Mappings have to fit in the FIX_BTMAP area.
+         */
+        nrpages = size >> PAGE_SHIFT;
+        if (nrpages > NR_FIX_BTMAPS)
+                return NULL;
+
+        /*
+         * Ok, go for it..
+         */
+        idx = FIX_BTMAP_BEGIN;
+        while (nrpages > 0) {
+                __set_fixmap(idx, machine_addr, PAGE_KERNEL);
+                machine_addr += PAGE_SIZE;
+                --idx;
+                --nrpages;
+        }
+
+	flush_tlb_all();
+
+        return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN));
+}
+
+
+#if 0 /* We don't support these functions. They shouldn't be required. */
+void __init bt_iounmap(void *addr, unsigned long size) {}
+#endif
diff --git a/linux-2.4-xen-sparse/arch/xen/vmlinux.lds b/linux-2.4-xen-sparse/arch/xen/vmlinux.lds
new file mode 100644
index 0000000000..258de1a67f
--- /dev/null
+++ b/linux-2.4-xen-sparse/arch/xen/vmlinux.lds
@@ -0,0 +1,75 @@
+/* ld script to make i386 Linux kernel
+ * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
+ */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+SECTIONS
+{
+  . = 0xC0000000 + 0x100000;
+  _text = .;			/* Text and read-only data */
+  .text : {
+	*(.text)
+	*(.fixup)
+	*(.gnu.warning)
+	} = 0x9090
+
+  _etext = .;			/* End of text section */
+
+  .rodata : { *(.rodata) *(.rodata.*) }
+  .kstrtab : { *(.kstrtab) }
+
+  . = ALIGN(16);		/* Exception table */
+  __start___ex_table = .;
+  __ex_table : { *(__ex_table) }
+  __stop___ex_table = .;
+
+  __start___ksymtab = .;	/* Kernel symbol table */
+  __ksymtab : { *(__ksymtab) }
+  __stop___ksymtab = .;
+
+  .data : {			/* Data */
+	*(.data)
+	CONSTRUCTORS
+	}
+
+  _edata = .;			/* End of data section */
+
+  . = ALIGN(8192);		/* init_task */
+  .data.init_task : { *(.data.init_task) }
+
+  . = ALIGN(4096);		/* Init code and data */
+  __init_begin = .;
+  .text.init : { *(.text.init) }
+  .data.init : { *(.data.init) }
+  . = ALIGN(16);
+  __setup_start = .;
+  .setup.init : { *(.setup.init) }
+  __setup_end = .;
+  __initcall_start = .;
+  .initcall.init : { *(.initcall.init) }
+  __initcall_end = .;
+  . = ALIGN(4096);
+  __init_end = .;
+
+  . = ALIGN(4096);
+  .data.page_aligned : { *(.data.idt) }
+
+  . = ALIGN(32);
+  .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+
+  __bss_start = .;		/* BSS */
+  .bss : {
+	*(.bss)
+	}
+  _end = . ;
+
+  /* Stabs debugging sections.  */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment 0 : { *(.comment) }
+}
diff --git a/linux-2.4-xen-sparse/drivers/block/ll_rw_blk.c b/linux-2.4-xen-sparse/drivers/block/ll_rw_blk.c
new file mode 100644
index 0000000000..b1b1fef9b5
--- /dev/null
+++ b/linux-2.4-xen-sparse/drivers/block/ll_rw_blk.c
@@ -0,0 +1,1663 @@
+/*
+ *  linux/drivers/block/ll_rw_blk.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
+ * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
+ * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
+ * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> -  July2000
+ */
+
+/*
+ * This handles all read/write requests to block devices
+ */
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/config.h>
+#include <linux/locks.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/init.h>
+#include <linux/smp_lock.h>
+#include <linux/completion.h>
+#include <linux/bootmem.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <linux/blk.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+/*
+ * MAC Floppy IWM hooks
+ */
+
+#ifdef CONFIG_MAC_FLOPPY_IWM
+extern int mac_floppy_init(void);
+#endif
+
+/*
+ * For the allocated request tables
+ */
+static kmem_cache_t *request_cachep;
+
+/*
+ * The "disk" task queue is used to start the actual requests
+ * after a plug
+ */
+DECLARE_TASK_QUEUE(tq_disk);
+
+/*
+ * Protect the request list against multiple users..
+ *
+ * With this spinlock the Linux block IO subsystem is 100% SMP threaded
+ * from the IRQ event side, and almost 100% SMP threaded from the syscall
+ * side (we still have protect against block device array operations, and
+ * the do_request() side is casually still unsafe. The kernel lock protects
+ * this part currently.).
+ *
+ * there is a fair chance that things will work just OK if these functions
+ * are called with no global kernel lock held ...
+ */
+spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED;
+
+/* This specifies how many sectors to read ahead on the disk. */
+
+int read_ahead[MAX_BLKDEV];
+
+/* blk_dev_struct is:
+ *	*request_fn
+ *	*current_request
+ */
+struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */
+
+/*
+ * blk_size contains the size of all block-devices in units of 1024 byte
+ * sectors:
+ *
+ * blk_size[MAJOR][MINOR]
+ *
+ * if (!blk_size[MAJOR]) then no minor size checking is done.
+ */
+int * blk_size[MAX_BLKDEV];
+
+/*
+ * blksize_size contains the size of all block-devices:
+ *
+ * blksize_size[MAJOR][MINOR]
+ *
+ * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
+ */
+int * blksize_size[MAX_BLKDEV];
+
+/*
+ * hardsect_size contains the size of the hardware sector of a device.
+ *
+ * hardsect_size[MAJOR][MINOR]
+ *
+ * if (!hardsect_size[MAJOR])
+ *		then 512 bytes is assumed.
+ * else
+ *		sector_size is hardsect_size[MAJOR][MINOR]
+ * This is currently set by some scsi devices and read by the msdos fs driver.
+ * Other uses may appear later.
+ */
+int * hardsect_size[MAX_BLKDEV];
+
+/*
+ * The following tunes the read-ahead algorithm in mm/filemap.c
+ */
+int * max_readahead[MAX_BLKDEV];
+
+/*
+ * Max number of sectors per request
+ */
+int * max_sectors[MAX_BLKDEV];
+
+unsigned long blk_max_low_pfn, blk_max_pfn;
+int blk_nohighio = 0;
+
+int block_dump = 0;
+
+static struct timer_list writeback_timer;
+
+static inline int get_max_sectors(kdev_t dev)
+{
+	if (!max_sectors[MAJOR(dev)])
+		return MAX_SECTORS;
+	return max_sectors[MAJOR(dev)][MINOR(dev)];
+}
+
+static inline request_queue_t *__blk_get_queue(kdev_t dev)
+{
+	struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
+
+	if (bdev->queue)
+		return bdev->queue(dev);
+	else
+		return &blk_dev[MAJOR(dev)].request_queue;
+}
+
+request_queue_t *blk_get_queue(kdev_t dev)
+{
+	return __blk_get_queue(dev);
+}
+
+static int __blk_cleanup_queue(struct request_list *list)
+{
+	struct list_head *head = &list->free;
+	struct request *rq;
+	int i = 0;
+
+	while (!list_empty(head)) {
+		rq = list_entry(head->next, struct request, queue);
+		list_del(&rq->queue);
+		kmem_cache_free(request_cachep, rq);
+		i++;
+	};
+
+	if (i != list->count)
+		printk("request list leak!\n");
+
+	list->count = 0;
+	return i;
+}
+
+/**
+ * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
+ * @q:    the request queue to be released
+ *
+ * Description:
+ *     blk_cleanup_queue is the pair to blk_init_queue().  It should
+ *     be called when a request queue is being released; typically
+ *     when a block device is being de-registered.  Currently, its
+ *     primary task it to free all the &struct request structures that
+ *     were allocated to the queue.
+ * Caveat: 
+ *     Hopefully the low level driver will have finished any
+ *     outstanding requests first...
+ **/
+void blk_cleanup_queue(request_queue_t * q)
+{
+	int count = q->nr_requests;
+
+	count -= __blk_cleanup_queue(&q->rq);
+
+	if (count)
+		printk("blk_cleanup_queue: leaked requests (%d)\n", count);
+	if (atomic_read(&q->nr_sectors))
+		printk("blk_cleanup_queue: leaked sectors (%d)\n", atomic_read(&q->nr_sectors));
+
+	memset(q, 0, sizeof(*q));
+}
+
+/**
+ * blk_queue_headactive - indicate whether head of request queue may be active
+ * @q:       The queue which this applies to.
+ * @active:  A flag indication where the head of the queue is active.
+ *
+ * Description:
+ *    The driver for a block device may choose to leave the currently active
+ *    request on the request queue, removing it only when it has completed.
+ *    The queue handling routines assume this by default for safety reasons
+ *    and will not involve the head of the request queue in any merging or
+ *    reordering of requests when the queue is unplugged (and thus may be
+ *    working on this particular request).
+ *
+ *    If a driver removes requests from the queue before processing them, then
+ *    it may indicate that it does so, there by allowing the head of the queue
+ *    to be involved in merging and reordering.  This is done be calling
+ *    blk_queue_headactive() with an @active flag of %0.
+ *
+ *    If a driver processes several requests at once, it must remove them (or
+ *    at least all but one of them) from the request queue.
+ *
+ *    When a queue is plugged the head will be assumed to be inactive.
+ **/
+ 
+void blk_queue_headactive(request_queue_t * q, int active)
+{
+	q->head_active = active;
+}
+
+/**
+ * blk_queue_throttle_sectors - indicates you will call sector throttling funcs
+ * @q:       The queue which this applies to.
+ * @active:  A flag indication if you want sector throttling on
+ *
+ * Description:
+ * The sector throttling code allows us to put a limit on the number of
+ * sectors pending io to the disk at a given time, sending @active nonzero
+ * indicates you will call blk_started_sectors and blk_finished_sectors in
+ * addition to calling blk_started_io and blk_finished_io in order to
+ * keep track of the number of sectors in flight.
+ **/
+ 
+void blk_queue_throttle_sectors(request_queue_t * q, int active)
+{
+	q->can_throttle = active;
+}
+
+/**
+ * blk_queue_make_request - define an alternate make_request function for a device
+ * @q:  the request queue for the device to be affected
+ * @mfn: the alternate make_request function
+ *
+ * Description:
+ *    The normal way for &struct buffer_heads to be passed to a device
+ *    driver is for them to be collected into requests on a request
+ *    queue, and then to allow the device driver to select requests
+ *    off that queue when it is ready.  This works well for many block
+ *    devices. However some block devices (typically virtual devices
+ *    such as md or lvm) do not benefit from the processing on the
+ *    request queue, and are served best by having the requests passed
+ *    directly to them.  This can be achieved by providing a function
+ *    to blk_queue_make_request().
+ *
+ * Caveat:
+ *    The driver that does this *must* be able to deal appropriately
+ *    with buffers in "highmemory", either by calling bh_kmap() to get
+ *    a kernel mapping, to by calling create_bounce() to create a
+ *    buffer in normal memory.
+ **/
+
+void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
+{
+	q->make_request_fn = mfn;
+}
+
+/**
+ * blk_queue_bounce_limit - set bounce buffer limit for queue
+ * @q:  the request queue for the device
+ * @dma_addr:   bus address limit
+ *
+ * Description:
+ *    Different hardware can have different requirements as to what pages
+ *    it can do I/O directly to. A low level driver can call
+ *    blk_queue_bounce_limit to have lower memory pages allocated as bounce
+ *    buffers for doing I/O to pages residing above @page. By default
+ *    the block layer sets this to the highest numbered "low" memory page.
+ **/
+void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)
+{
+	unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;
+	unsigned long mb = dma_addr >> 20;
+	static request_queue_t *old_q;
+
+	/*
+	 * keep this for debugging for now...
+	 */
+	if (dma_addr != BLK_BOUNCE_HIGH && q != old_q) {
+		old_q = q;
+		printk("blk: queue %p, ", q);
+		if (dma_addr == BLK_BOUNCE_ANY)
+			printk("no I/O memory limit\n");
+		else
+			printk("I/O limit %luMb (mask 0x%Lx)\n", mb,
+			       (long long) dma_addr);
+	}
+
+	q->bounce_pfn = bounce_pfn;
+}
+
+
+/*
+ * can we merge the two segments, or do we need to start a new one?
+ */
+static inline int __blk_seg_merge_ok(struct buffer_head *bh, struct buffer_head *nxt)
+{
+	/*
+	 * if bh and nxt are contigous and don't cross a 4g boundary, it's ok
+	 */
+	if (BH_CONTIG(bh, nxt) && BH_PHYS_4G(bh, nxt))
+		return 1;
+
+	return 0;
+}
+
+int blk_seg_merge_ok(struct buffer_head *bh, struct buffer_head *nxt)
+{
+	return __blk_seg_merge_ok(bh, nxt);
+}
+
+static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
+{
+	if (req->nr_segments < max_segments) {
+		req->nr_segments++;
+		return 1;
+	}
+	return 0;
+}
+
+static int ll_back_merge_fn(request_queue_t *q, struct request *req, 
+			    struct buffer_head *bh, int max_segments)
+{
+	if (__blk_seg_merge_ok(req->bhtail, bh))
+		return 1;
+
+	return ll_new_segment(q, req, max_segments);
+}
+
+static int ll_front_merge_fn(request_queue_t *q, struct request *req, 
+			     struct buffer_head *bh, int max_segments)
+{
+	if (__blk_seg_merge_ok(bh, req->bh))
+		return 1;
+
+	return ll_new_segment(q, req, max_segments);
+}
+
+static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
+				struct request *next, int max_segments)
+{
+	int total_segments = req->nr_segments + next->nr_segments;
+
+	if (__blk_seg_merge_ok(req->bhtail, next->bh))
+		total_segments--;
+
+	if (total_segments > max_segments)
+		return 0;
+
+	req->nr_segments = total_segments;
+	return 1;
+}
+
+/*
+ * "plug" the device if there are no outstanding requests: this will
+ * force the transfer to start only after we have put all the requests
+ * on the list.
+ *
+ * This is called with interrupts off and no requests on the queue.
+ * (and with the request spinlock acquired)
+ */
+static void generic_plug_device(request_queue_t *q, kdev_t dev)
+{
+	/*
+	 * no need to replug device
+	 */
+	if (!list_empty(&q->queue_head) || q->plugged)
+		return;
+
+	q->plugged = 1;
+	queue_task(&q->plug_tq, &tq_disk);
+}
+
+/*
+ * remove the plug and let it rip..
+ */
+static inline void __generic_unplug_device(request_queue_t *q)
+{
+	if (q->plugged) {
+		q->plugged = 0;
+		if (!list_empty(&q->queue_head))
+			q->request_fn(q);
+	}
+}
+
+void generic_unplug_device(void *data)
+{
+	request_queue_t *q = (request_queue_t *) data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&io_request_lock, flags);
+	__generic_unplug_device(q);
+	spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/** blk_grow_request_list
+ *  @q: The &request_queue_t
+ *  @nr_requests: how many requests are desired
+ *
+ * More free requests are added to the queue's free lists, bringing
+ * the total number of requests to @nr_requests.
+ *
+ * The requests are added equally to the request queue's read
+ * and write freelists.
+ *
+ * This function can sleep.
+ *
+ * Returns the (new) number of requests which the queue has available.
+ */
+int blk_grow_request_list(request_queue_t *q, int nr_requests, int max_queue_sectors)
+{
+	unsigned long flags;
+	/* Several broken drivers assume that this function doesn't sleep,
+	 * this causes system hangs during boot.
+	 * As a temporary fix, make the function non-blocking.
+	 */
+	spin_lock_irqsave(&io_request_lock, flags);
+	while (q->nr_requests < nr_requests) {
+		struct request *rq;
+
+		rq = kmem_cache_alloc(request_cachep, SLAB_ATOMIC);
+		if (rq == NULL)
+			break;
+		memset(rq, 0, sizeof(*rq));
+		rq->rq_status = RQ_INACTIVE;
+ 		list_add(&rq->queue, &q->rq.free);
+ 		q->rq.count++;
+
+		q->nr_requests++;
+	}
+
+ 	/*
+ 	 * Wakeup waiters after both one quarter of the
+ 	 * max-in-fligh queue and one quarter of the requests
+ 	 * are available again.
+ 	 */
+
+	q->batch_requests = q->nr_requests / 4;
+	if (q->batch_requests > 32)
+		q->batch_requests = 32;
+ 	q->batch_sectors = max_queue_sectors / 4;
+ 
+ 	q->max_queue_sectors = max_queue_sectors;
+ 
+ 	BUG_ON(!q->batch_sectors);
+ 	atomic_set(&q->nr_sectors, 0);
+
+	spin_unlock_irqrestore(&io_request_lock, flags);
+	return q->nr_requests;
+}
+
+static void blk_init_free_list(request_queue_t *q)
+{
+	struct sysinfo si;
+	int megs;		/* Total memory, in megabytes */
+ 	int nr_requests, max_queue_sectors = MAX_QUEUE_SECTORS;
+  
+ 	INIT_LIST_HEAD(&q->rq.free);
+	q->rq.count = 0;
+	q->rq.pending[READ] = q->rq.pending[WRITE] = 0;
+	q->nr_requests = 0;
+
+	si_meminfo(&si);
+	megs = si.totalram >> (20 - PAGE_SHIFT);
+ 	nr_requests = MAX_NR_REQUESTS;
+ 	if (megs < 30) {
+  		nr_requests /= 2;
+ 		max_queue_sectors /= 2;
+ 	}
+ 	/* notice early if anybody screwed the defaults */
+ 	BUG_ON(!nr_requests);
+ 	BUG_ON(!max_queue_sectors);
+ 
+ 	blk_grow_request_list(q, nr_requests, max_queue_sectors);
+
+ 	init_waitqueue_head(&q->wait_for_requests);
+
+	spin_lock_init(&q->queue_lock);
+}
+
+static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
+
+/**
+ * blk_init_queue  - prepare a request queue for use with a block device
+ * @q:    The &request_queue_t to be initialised
+ * @rfn:  The function to be called to process requests that have been
+ *        placed on the queue.
+ *
+ * Description:
+ *    If a block device wishes to use the standard request handling procedures,
+ *    which sorts requests and coalesces adjacent requests, then it must
+ *    call blk_init_queue().  The function @rfn will be called when there
+ *    are requests on the queue that need to be processed.  If the device
+ *    supports plugging, then @rfn may not be called immediately when requests
+ *    are available on the queue, but may be called at some time later instead.
+ *    Plugged queues are generally unplugged when a buffer belonging to one
+ *    of the requests on the queue is needed, or due to memory pressure.
+ *
+ *    @rfn is not required, or even expected, to remove all requests off the
+ *    queue, but only as many as it can handle at a time.  If it does leave
+ *    requests on the queue, it is responsible for arranging that the requests
+ *    get dealt with eventually.
+ *
+ *    A global spin lock $io_request_lock must be held while manipulating the
+ *    requests on the request queue.
+ *
+ *    The request on the head of the queue is by default assumed to be
+ *    potentially active, and it is not considered for re-ordering or merging
+ *    whenever the given queue is unplugged. This behaviour can be changed with
+ *    blk_queue_headactive().
+ *
+ * Note:
+ *    blk_init_queue() must be paired with a blk_cleanup_queue() call
+ *    when the block device is deactivated (such as at module unload).
+ **/
+void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
+{
+	INIT_LIST_HEAD(&q->queue_head);
+	elevator_init(&q->elevator, ELEVATOR_LINUS);
+	blk_init_free_list(q);
+	q->request_fn     	= rfn;
+	q->back_merge_fn       	= ll_back_merge_fn;
+	q->front_merge_fn      	= ll_front_merge_fn;
+	q->merge_requests_fn	= ll_merge_requests_fn;
+	q->make_request_fn	= __make_request;
+	q->plug_tq.sync		= 0;
+	q->plug_tq.routine	= &generic_unplug_device;
+	q->plug_tq.data		= q;
+	q->plugged        	= 0;
+	q->can_throttle		= 0;
+
+	/*
+	 * These booleans describe the queue properties.  We set the
+	 * default (and most common) values here.  Other drivers can
+	 * use the appropriate functions to alter the queue properties.
+	 * as appropriate.
+	 */
+	q->plug_device_fn 	= generic_plug_device;
+	q->head_active    	= 1;
+
+	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+}
+
+#define blkdev_free_rq(list) list_entry((list)->next, struct request, queue);
+/*
+ * Get a free request. io_request_lock must be held and interrupts
+ * disabled on the way in.  Returns NULL if there are no free requests.
+ */
+static struct request *get_request(request_queue_t *q, int rw)
+{
+	struct request *rq = NULL;
+	struct request_list *rl = &q->rq;
+
+	if (blk_oversized_queue(q)) {
+		int rlim = q->nr_requests >> 5;
+
+		if (rlim < 4)
+			rlim = 4;
+
+		/*
+		 * if its a write, or we have more than a handful of reads
+		 * pending, bail out
+		 */
+		if ((rw == WRITE) || (rw == READ && rl->pending[READ] > rlim))
+			return NULL;
+		if (blk_oversized_queue_reads(q))
+			return NULL;
+	}
+	
+	if (!list_empty(&rl->free)) {
+		rq = blkdev_free_rq(&rl->free);
+		list_del(&rq->queue);
+		rl->count--;
+		rl->pending[rw]++;
+		rq->rq_status = RQ_ACTIVE;
+		rq->cmd = rw;
+		rq->special = NULL;
+		rq->q = q;
+	}
+
+	return rq;
+}
+
+/*
+ * Here's the request allocation design, low latency version:
+ *
+ * 1: Blocking on request exhaustion is a key part of I/O throttling.
+ * 
+ * 2: We want to be `fair' to all requesters.  We must avoid starvation, and
+ *    attempt to ensure that all requesters sleep for a similar duration.  Hence
+ *    no stealing requests when there are other processes waiting.
+ *
+ * There used to be more here, attempting to allow a process to send in a
+ * number of requests once it has woken up.  But, there's no way to 
+ * tell if a process has just been woken up, or if it is a new process
+ * coming in to steal requests from the waiters.  So, we give up and force
+ * everyone to wait fairly.
+ * 
+ * So here's what we do:
+ * 
+ *    a) A READA requester fails if free_requests < batch_requests
+ * 
+ *       We don't want READA requests to prevent sleepers from ever
+ *       waking.  Note that READA is used extremely rarely - a few
+ *       filesystems use it for directory readahead.
+ * 
+ *  When a process wants a new request:
+ * 
+ *    b) If free_requests == 0, the requester sleeps in FIFO manner, and
+ *       the queue full condition is set.  The full condition is not
+ *       cleared until there are no longer any waiters.  Once the full
+ *       condition is set, all new io must wait, hopefully for a very
+ *       short period of time.
+ * 
+ *  When a request is released:
+ * 
+ *    c) If free_requests < batch_requests, do nothing.
+ * 
+ *    d) If free_requests >= batch_requests, wake up a single waiter.
+ *
+ *   As each waiter gets a request, he wakes another waiter.  We do this
+ *   to prevent a race where an unplug might get run before a request makes
+ *   it's way onto the queue.  The result is a cascade of wakeups, so delaying
+ *   the initial wakeup until we've got batch_requests available helps avoid
+ *   wakeups where there aren't any requests available yet.
+ */
+
+static struct request *__get_request_wait(request_queue_t *q, int rw)
+{
+	register struct request *rq;
+	DECLARE_WAITQUEUE(wait, current);
+
+	add_wait_queue_exclusive(&q->wait_for_requests, &wait);
+
+	do {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		spin_lock_irq(&io_request_lock);
+		if (blk_oversized_queue(q) || q->rq.count == 0) {
+			__generic_unplug_device(q);
+			spin_unlock_irq(&io_request_lock);
+			schedule();
+			spin_lock_irq(&io_request_lock);
+		}
+		rq = get_request(q, rw);
+		spin_unlock_irq(&io_request_lock);
+	} while (rq == NULL);
+	remove_wait_queue(&q->wait_for_requests, &wait);
+	current->state = TASK_RUNNING;
+
+	return rq;
+}
+
+static void get_request_wait_wakeup(request_queue_t *q, int rw)
+{
+	/*
+	 * avoid losing an unplug if a second __get_request_wait did the
+	 * generic_unplug_device while our __get_request_wait was running
+	 * w/o the queue_lock held and w/ our request out of the queue.
+	 */	
+	if (waitqueue_active(&q->wait_for_requests))
+		wake_up(&q->wait_for_requests);
+}
+
+/* RO fail safe mechanism */
+
+static long ro_bits[MAX_BLKDEV][8];
+
+int is_read_only(kdev_t dev)
+{
+	int minor,major;
+
+	major = MAJOR(dev);
+	minor = MINOR(dev);
+	if (major < 0 || major >= MAX_BLKDEV) return 0;
+	return ro_bits[major][minor >> 5] & (1 << (minor & 31));
+}
+
+void set_device_ro(kdev_t dev,int flag)
+{
+	int minor,major;
+
+	major = MAJOR(dev);
+	minor = MINOR(dev);
+	if (major < 0 || major >= MAX_BLKDEV) return;
+	if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31);
+	else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31));
+}
+
+inline void drive_stat_acct (kdev_t dev, int rw,
+				unsigned long nr_sectors, int new_io)
+{
+	unsigned int major = MAJOR(dev);
+	unsigned int index;
+
+	index = disk_index(dev);
+	if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
+		return;
+
+	kstat.dk_drive[major][index] += new_io;
+	if (rw == READ) {
+		kstat.dk_drive_rio[major][index] += new_io;
+		kstat.dk_drive_rblk[major][index] += nr_sectors;
+	} else if (rw == WRITE) {
+		kstat.dk_drive_wio[major][index] += new_io;
+		kstat.dk_drive_wblk[major][index] += nr_sectors;
+	} else
+		printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
+}
+
+#ifdef CONFIG_BLK_STATS
+/*
+ * Return up to two hd_structs on which to do IO accounting for a given
+ * request.
+ *
+ * On a partitioned device, we want to account both against the partition
+ * and against the whole disk.
+ */
+static void locate_hd_struct(struct request *req, 
+			     struct hd_struct **hd1,
+			     struct hd_struct **hd2)
+{
+	struct gendisk *gd;
+
+	*hd1 = NULL;
+	*hd2 = NULL;
+	
+	gd = get_gendisk(req->rq_dev);
+	if (gd && gd->part) {
+		/* Mask out the partition bits: account for the entire disk */
+		int devnr = MINOR(req->rq_dev) >> gd->minor_shift;
+		int whole_minor = devnr << gd->minor_shift;
+
+		*hd1 = &gd->part[whole_minor];
+		if (whole_minor != MINOR(req->rq_dev))
+			*hd2= &gd->part[MINOR(req->rq_dev)];
+	}
+}
+
+/*
+ * Round off the performance stats on an hd_struct.
+ *
+ * The average IO queue length and utilisation statistics are maintained
+ * by observing the current state of the queue length and the amount of
+ * time it has been in this state for.
+ * Normally, that accounting is done on IO completion, but that can result
+ * in more than a second's worth of IO being accounted for within any one
+ * second, leading to >100% utilisation.  To deal with that, we do a
+ * round-off before returning the results when reading /proc/partitions,
+ * accounting immediately for all queue usage up to the current jiffies and
+ * restarting the counters again.
+ */
+void disk_round_stats(struct hd_struct *hd)
+{
+	unsigned long now = jiffies;
+	
+	hd->aveq += (hd->ios_in_flight * (jiffies - hd->last_queue_change));
+	hd->last_queue_change = now;
+
+	if (hd->ios_in_flight)
+		hd->io_ticks += (now - hd->last_idle_time);
+	hd->last_idle_time = now;	
+}
+
+static inline void down_ios(struct hd_struct *hd)
+{
+	disk_round_stats(hd);	
+	--hd->ios_in_flight;
+}
+
+static inline void up_ios(struct hd_struct *hd)
+{
+	disk_round_stats(hd);
+	++hd->ios_in_flight;
+}
+
+static void account_io_start(struct hd_struct *hd, struct request *req,
+			     int merge, int sectors)
+{
+	switch (req->cmd) {
+	case READ:
+		if (merge)
+			hd->rd_merges++;
+		hd->rd_sectors += sectors;
+		break;
+	case WRITE:
+		if (merge)
+			hd->wr_merges++;
+		hd->wr_sectors += sectors;
+		break;
+	}
+	if (!merge)
+		up_ios(hd);
+}
+
+static void account_io_end(struct hd_struct *hd, struct request *req)
+{
+	unsigned long duration = jiffies - req->start_time;
+	switch (req->cmd) {
+	case READ:
+		hd->rd_ticks += duration;
+		hd->rd_ios++;
+		break;
+	case WRITE:
+		hd->wr_ticks += duration;
+		hd->wr_ios++;
+		break;
+	}
+	down_ios(hd);
+}
+
+void req_new_io(struct request *req, int merge, int sectors)
+{
+	struct hd_struct *hd1, *hd2;
+
+	locate_hd_struct(req, &hd1, &hd2);
+	if (hd1)
+		account_io_start(hd1, req, merge, sectors);
+	if (hd2)
+		account_io_start(hd2, req, merge, sectors);
+}
+
+void req_merged_io(struct request *req)
+{
+	struct hd_struct *hd1, *hd2;
+
+	locate_hd_struct(req, &hd1, &hd2);
+	if (hd1)
+		down_ios(hd1);
+	if (hd2)	
+		down_ios(hd2);
+}
+
+void req_finished_io(struct request *req)
+{
+	struct hd_struct *hd1, *hd2;
+
+	locate_hd_struct(req, &hd1, &hd2);
+	if (hd1)
+		account_io_end(hd1, req);
+	if (hd2)	
+		account_io_end(hd2, req);
+}
+EXPORT_SYMBOL(req_finished_io);
+#endif /* CONFIG_BLK_STATS */
+
+/*
+ * add-request adds a request to the linked list.
+ * io_request_lock is held and interrupts disabled, as we muck with the
+ * request queue list.
+ *
+ * By this point, req->cmd is always either READ/WRITE, never READA,
+ * which is important for drive_stat_acct() above.
+ */
+static inline void add_request(request_queue_t * q, struct request * req,
+			       struct list_head *insert_here)
+{
+	drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
+
+	if (!q->plugged && q->head_active && insert_here == &q->queue_head) {
+		spin_unlock_irq(&io_request_lock);
+		BUG();
+	}
+
+	/*
+	 * elevator indicated where it wants this request to be
+	 * inserted at elevator_merge time
+	 */
+	list_add(&req->queue, insert_here);
+}
+
+/*
+ * Must be called with io_request_lock held and interrupts disabled
+ */
+void blkdev_release_request(struct request *req)
+{
+	request_queue_t *q = req->q;
+
+	req->rq_status = RQ_INACTIVE;
+	req->q = NULL;
+
+	/*
+	 * Request may not have originated from ll_rw_blk. if not,
+	 * assume it has free buffers and check waiters
+	 */
+	if (q) {
+		struct request_list *rl = &q->rq;
+		int oversized_batch = 0;
+
+		if (q->can_throttle)
+			oversized_batch = blk_oversized_queue_batch(q);
+		rl->count++;
+		/*
+		 * paranoia check
+		 */
+		if (req->cmd == READ || req->cmd == WRITE)
+			rl->pending[req->cmd]--;
+		if (rl->pending[READ] > q->nr_requests)
+			printk("blk: reads: %u\n", rl->pending[READ]);
+		if (rl->pending[WRITE] > q->nr_requests)
+			printk("blk: writes: %u\n", rl->pending[WRITE]);
+		if (rl->pending[READ] + rl->pending[WRITE] > q->nr_requests)
+			printk("blk: r/w: %u + %u > %u\n", rl->pending[READ], rl->pending[WRITE], q->nr_requests);
+		list_add(&req->queue, &rl->free);
+		if (rl->count >= q->batch_requests && !oversized_batch) {
+			smp_mb();
+			if (waitqueue_active(&q->wait_for_requests))
+				wake_up(&q->wait_for_requests);
+		}
+	}
+}
+
+/*
+ * Has to be called with the request spinlock acquired
+ */
+static void attempt_merge(request_queue_t * q,
+			  struct request *req,
+			  int max_sectors,
+			  int max_segments)
+{
+	struct request *next;
+  
+	next = blkdev_next_request(req);
+	if (req->sector + req->nr_sectors != next->sector)
+		return;
+	if (req->cmd != next->cmd
+	    || req->rq_dev != next->rq_dev
+	    || req->nr_sectors + next->nr_sectors > max_sectors
+	    || next->waiting)
+		return;
+	/*
+	 * If we are not allowed to merge these requests, then
+	 * return.  If we are allowed to merge, then the count
+	 * will have been updated to the appropriate number,
+	 * and we shouldn't do it here too.
+	 */
+	if (!q->merge_requests_fn(q, req, next, max_segments))
+		return;
+
+	q->elevator.elevator_merge_req_fn(req, next);
+	
+	/* At this point we have either done a back merge
+	 * or front merge. We need the smaller start_time of
+	 * the merged requests to be the current request
+	 * for accounting purposes.
+	 */
+	if (time_after(req->start_time, next->start_time))
+		req->start_time = next->start_time;
+		
+	req->bhtail->b_reqnext = next->bh;
+	req->bhtail = next->bhtail;
+	req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
+	list_del(&next->queue);
+
+	/* One last thing: we have removed a request, so we now have one
+	   less expected IO to complete for accounting purposes. */
+	req_merged_io(req);
+
+	blkdev_release_request(next);
+}
+
+static inline void attempt_back_merge(request_queue_t * q,
+				      struct request *req,
+				      int max_sectors,
+				      int max_segments)
+{
+	if (&req->queue == q->queue_head.prev)
+		return;
+	attempt_merge(q, req, max_sectors, max_segments);
+}
+
+static inline void attempt_front_merge(request_queue_t * q,
+				       struct list_head * head,
+				       struct request *req,
+				       int max_sectors,
+				       int max_segments)
+{
+	struct list_head * prev;
+
+	prev = req->queue.prev;
+	if (head == prev)
+		return;
+	attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments);
+}
+
+static int __make_request(request_queue_t * q, int rw,
+				  struct buffer_head * bh)
+{
+	unsigned int sector, count, sync;
+	int max_segments = MAX_SEGMENTS;
+	struct request * req, *freereq = NULL;
+	int rw_ahead, max_sectors, el_ret;
+	struct list_head *head, *insert_here;
+	int latency;
+	elevator_t *elevator = &q->elevator;
+	int should_wake = 0;
+
+	count = bh->b_size >> 9;
+	sector = bh->b_rsector;
+	sync = test_and_clear_bit(BH_Sync, &bh->b_state);
+
+	rw_ahead = 0;	/* normal case; gets changed below for READA */
+	switch (rw) {
+		case READA:
+#if 0	/* bread() misinterprets failed READA attempts as IO errors on SMP */
+			rw_ahead = 1;
+#endif
+			rw = READ;	/* drop into READ */
+		case READ:
+		case WRITE:
+			latency = elevator_request_latency(elevator, rw);
+			break;
+		default:
+			BUG();
+			goto end_io;
+	}
+
+	/* We'd better have a real physical mapping!
+	   Check this bit only if the buffer was dirty and just locked
+	   down by us so at this point flushpage will block and
+	   won't clear the mapped bit under us. */
+	if (!buffer_mapped(bh))
+		BUG();
+
+	/*
+	 * Temporary solution - in 2.5 this will be done by the lowlevel
+	 * driver. Create a bounce buffer if the buffer data points into
+	 * high memory - keep the original buffer otherwise.
+	 */
+	bh = blk_queue_bounce(q, rw, bh);
+
+/* look for a free request. */
+	/*
+	 * Try to coalesce the new request with old requests
+	 */
+	max_sectors = get_max_sectors(bh->b_rdev);
+
+	req = NULL;
+	head = &q->queue_head;
+	/*
+	 * Now we acquire the request spinlock, we have to be mega careful
+	 * not to schedule or do something nonatomic
+	 */
+	spin_lock_irq(&io_request_lock);
+
+again:
+	insert_here = head->prev;
+
+	if (list_empty(head)) {
+		q->plug_device_fn(q, bh->b_rdev); /* is atomic */
+		goto get_rq;
+	} else if (q->head_active && !q->plugged)
+		head = head->next;
+
+	el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors);
+	switch (el_ret) {
+
+		case ELEVATOR_BACK_MERGE:
+			if (!q->back_merge_fn(q, req, bh, max_segments)) {
+				insert_here = &req->queue;
+				break;
+			}
+			req->bhtail->b_reqnext = bh;
+			req->bhtail = bh;
+			req->nr_sectors = req->hard_nr_sectors += count;
+			blk_started_io(count);
+			blk_started_sectors(req, count);
+			drive_stat_acct(req->rq_dev, req->cmd, count, 0);
+			req_new_io(req, 1, count);
+			attempt_back_merge(q, req, max_sectors, max_segments);
+			goto out;
+
+		case ELEVATOR_FRONT_MERGE:
+			if (!q->front_merge_fn(q, req, bh, max_segments)) {
+				insert_here = req->queue.prev;
+				break;
+			}
+			bh->b_reqnext = req->bh;
+			req->bh = bh;
+			/*
+			 * may not be valid, but queues not having bounce
+			 * enabled for highmem pages must not look at
+			 * ->buffer anyway
+			 */
+			req->buffer = bh->b_data;
+			req->current_nr_sectors = req->hard_cur_sectors = count;
+			req->sector = req->hard_sector = sector;
+			req->nr_sectors = req->hard_nr_sectors += count;
+			blk_started_io(count);
+			blk_started_sectors(req, count);
+			drive_stat_acct(req->rq_dev, req->cmd, count, 0);
+			req_new_io(req, 1, count);
+			attempt_front_merge(q, head, req, max_sectors, max_segments);
+			goto out;
+
+		/*
+		 * elevator says don't/can't merge. get new request
+		 */
+		case ELEVATOR_NO_MERGE:
+			/*
+			 * use elevator hints as to where to insert the
+			 * request. if no hints, just add it to the back
+			 * of the queue
+			 */
+			if (req)
+				insert_here = &req->queue;
+			break;
+
+		default:
+			printk("elevator returned crap (%d)\n", el_ret);
+			BUG();
+	}
+		
+get_rq:
+	if (freereq) {
+		req = freereq;
+		freereq = NULL;
+	} else {
+		/*
+		 * See description above __get_request_wait()
+		 */
+		if (rw_ahead) {
+			if (q->rq.count < q->batch_requests || blk_oversized_queue_batch(q)) {
+				spin_unlock_irq(&io_request_lock);
+				goto end_io;
+			}
+			req = get_request(q, rw);
+			if (req == NULL)
+				BUG();
+		} else {
+			req = get_request(q, rw);
+			if (req == NULL) {
+				spin_unlock_irq(&io_request_lock);
+				freereq = __get_request_wait(q, rw);
+				head = &q->queue_head;
+				spin_lock_irq(&io_request_lock);
+				should_wake = 1;
+				goto again;
+			}
+		}
+	}
+
+/* fill up the request-info, and add it to the queue */
+	req->elevator_sequence = latency;
+	req->cmd = rw;
+	req->errors = 0;
+	req->hard_sector = req->sector = sector;
+	req->hard_nr_sectors = req->nr_sectors = count;
+	req->current_nr_sectors = req->hard_cur_sectors = count;
+	req->nr_segments = 1; /* Always 1 for a new request. */
+	req->nr_hw_segments = 1; /* Always 1 for a new request. */
+	req->buffer = bh->b_data;
+	req->waiting = NULL;
+	req->bh = bh;
+	req->bhtail = bh;
+	req->rq_dev = bh->b_rdev;
+	req->start_time = jiffies;
+	req_new_io(req, 0, count);
+	blk_started_io(count);
+	blk_started_sectors(req, count);
+	add_request(q, req, insert_here);
+out:
+	if (freereq)
+		blkdev_release_request(freereq);
+	if (should_wake)
+		get_request_wait_wakeup(q, rw);
+	if (sync)
+		__generic_unplug_device(q);
+	spin_unlock_irq(&io_request_lock);
+	return 0;
+end_io:
+	bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
+	return 0;
+}
+
+/**
+ * generic_make_request: hand a buffer head to it's device driver for I/O
+ * @rw:  READ, WRITE, or READA - what sort of I/O is desired.
+ * @bh:  The buffer head describing the location in memory and on the device.
+ *
+ * generic_make_request() is used to make I/O requests of block
+ * devices. It is passed a &struct buffer_head and a &rw value.  The
+ * %READ and %WRITE options are (hopefully) obvious in meaning.  The
+ * %READA value means that a read is required, but that the driver is
+ * free to fail the request if, for example, it cannot get needed
+ * resources immediately.
+ *
+ * generic_make_request() does not return any status.  The
+ * success/failure status of the request, along with notification of
+ * completion, is delivered asynchronously through the bh->b_end_io
+ * function described (one day) else where.
+ *
+ * The caller of generic_make_request must make sure that b_page,
+ * b_addr, b_size are set to describe the memory buffer, that b_rdev
+ * and b_rsector are set to describe the device address, and the
+ * b_end_io and optionally b_private are set to describe how
+ * completion notification should be signaled.  BH_Mapped should also
+ * be set (to confirm that b_dev and b_blocknr are valid).
+ *
+ * generic_make_request and the drivers it calls may use b_reqnext,
+ * and may change b_rdev and b_rsector.  So the values of these fields
+ * should NOT be depended on after the call to generic_make_request.
+ * Because of this, the caller should record the device address
+ * information in b_dev and b_blocknr.
+ *
+ * Apart from those fields mentioned above, no other fields, and in
+ * particular, no other flags, are changed by generic_make_request or
+ * any lower level drivers.
+ * */
+void generic_make_request (int rw, struct buffer_head * bh)
+{
+	int major = MAJOR(bh->b_rdev);
+	int minorsize = 0;
+	request_queue_t *q;
+
+	if (!bh->b_end_io)
+		BUG();
+
+	/* Test device size, when known. */
+	if (blk_size[major])
+		minorsize = blk_size[major][MINOR(bh->b_rdev)];
+	if (minorsize) {
+		unsigned long maxsector = (minorsize << 1) + 1;
+		unsigned long sector = bh->b_rsector;
+		unsigned int count = bh->b_size >> 9;
+
+		if (maxsector < count || maxsector - count < sector) {
+			/* Yecch */
+			bh->b_state &= ~(1 << BH_Dirty);
+
+			/* This may well happen - the kernel calls bread()
+			   without checking the size of the device, e.g.,
+			   when mounting a device. */
+			printk(KERN_INFO
+			       "attempt to access beyond end of device\n");
+			printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n",
+			       kdevname(bh->b_rdev), rw,
+			       (sector + count)>>1, minorsize);
+
+			bh->b_end_io(bh, 0);
+			return;
+		}
+	}
+
+	/*
+	 * Resolve the mapping until finished. (drivers are
+	 * still free to implement/resolve their own stacking
+	 * by explicitly returning 0)
+	 */
+	/* NOTE: we don't repeat the blk_size check for each new device.
+	 * Stacking drivers are expected to know what they are doing.
+	 */
+	do {
+		q = __blk_get_queue(bh->b_rdev);
+		if (!q) {
+			printk(KERN_ERR
+			       "generic_make_request: Trying to access "
+			       "nonexistent block-device %s (%ld)\n",
+			       kdevname(bh->b_rdev), bh->b_rsector);
+			buffer_IO_error(bh);
+			break;
+		}
+	} while (q->make_request_fn(q, rw, bh));
+}
+
+
+/**
+ * submit_bh: submit a buffer_head to the block device later for I/O
+ * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
+ * @bh: The &struct buffer_head which describes the I/O
+ *
+ * submit_bh() is very similar in purpose to generic_make_request(), and
+ * uses that function to do most of the work.
+ *
+ * The extra functionality provided by submit_bh is to determine
+ * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev.
+ * This is is appropriate for IO requests that come from the buffer
+ * cache and page cache which (currently) always use aligned blocks.
+ */
+void submit_bh(int rw, struct buffer_head * bh)
+{
+	int count = bh->b_size >> 9;
+
+	if (!test_bit(BH_Lock, &bh->b_state))
+		BUG();
+
+	set_bit(BH_Req, &bh->b_state);
+	set_bit(BH_Launder, &bh->b_state);
+
+	/*
+	 * First step, 'identity mapping' - RAID or LVM might
+	 * further remap this.
+	 */
+	bh->b_rdev = bh->b_dev;
+	bh->b_rsector = bh->b_blocknr * count;
+
+	get_bh(bh);
+	generic_make_request(rw, bh);
+
+	/* fix race condition with wait_on_buffer() */
+	smp_mb(); /* spin_unlock may have inclusive semantics */
+	if (waitqueue_active(&bh->b_wait))
+		wake_up(&bh->b_wait);
+
+	if (block_dump)
+		printk(KERN_DEBUG "%s: %s block %lu/%u on %s\n", current->comm, rw == WRITE ? "WRITE" : "READ", bh->b_rsector, count, kdevname(bh->b_rdev));
+
+	put_bh(bh);
+	switch (rw) {
+		case WRITE:
+			kstat.pgpgout += count;
+			break;
+		default:
+			kstat.pgpgin += count;
+			break;
+	}
+}
+
+/**
+ * ll_rw_block: low-level access to block devices
+ * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
+ * @nr: number of &struct buffer_heads in the array
+ * @bhs: array of pointers to &struct buffer_head
+ *
+ * ll_rw_block() takes an array of pointers to &struct buffer_heads,
+ * and requests an I/O operation on them, either a %READ or a %WRITE.
+ * The third %READA option is described in the documentation for
+ * generic_make_request() which ll_rw_block() calls.
+ *
+ * This function provides extra functionality that is not in
+ * generic_make_request() that is relevant to buffers in the buffer
+ * cache or page cache.  In particular it drops any buffer that it
+ * cannot get a lock on (with the BH_Lock state bit), any buffer that
+ * appears to be clean when doing a write request, and any buffer that
+ * appears to be up-to-date when doing read request.  Further it marks
+ * as clean buffers that are processed for writing (the buffer cache
+ * wont assume that they are actually clean until the buffer gets
+ * unlocked).
+ *
+ * ll_rw_block sets b_end_io to simple completion handler that marks
+ * the buffer up-to-date (if approriate), unlocks the buffer and wakes
+ * any waiters.  As client that needs a more interesting completion
+ * routine should call submit_bh() (or generic_make_request())
+ * directly.
+ *
+ * Caveat:
+ *  All of the buffers must be for the same device, and must also be
+ *  of the current approved size for the device.  */
+
+void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
+{
+	unsigned int major;
+	int correct_size;
+	int i;
+
+	if (!nr)
+		return;
+
+	major = MAJOR(bhs[0]->b_dev);
+
+	/* Determine correct block size for this device. */
+	correct_size = get_hardsect_size(bhs[0]->b_dev);
+
+	/* Verify requested block sizes. */
+	for (i = 0; i < nr; i++) {
+		struct buffer_head *bh = bhs[i];
+		if (bh->b_size % correct_size) {
+			printk(KERN_NOTICE "ll_rw_block: device %s: "
+			       "only %d-char blocks implemented (%u)\n",
+			       kdevname(bhs[0]->b_dev),
+			       correct_size, bh->b_size);
+			goto sorry;
+		}
+	}
+
+	if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) {
+		printk(KERN_NOTICE "Can't write to read-only device %s\n",
+		       kdevname(bhs[0]->b_dev));
+		goto sorry;
+	}
+
+	for (i = 0; i < nr; i++) {
+		struct buffer_head *bh = bhs[i];
+
+		lock_buffer(bh);
+
+		/* We have the buffer lock */
+		atomic_inc(&bh->b_count);
+		bh->b_end_io = end_buffer_io_sync;
+
+		switch(rw) {
+		case WRITE:
+			if (!atomic_set_buffer_clean(bh))
+				/* Hmmph! Nothing to write */
+				goto end_io;
+			__mark_buffer_clean(bh);
+			break;
+
+		case READA:
+		case READ:
+			if (buffer_uptodate(bh))
+				/* Hmmph! Already have it */
+				goto end_io;
+			break;
+		default:
+			BUG();
+	end_io:
+			bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
+			continue;
+		}
+
+		submit_bh(rw, bh);
+	}
+	return;
+
+sorry:
+	/* Make sure we don't get infinite dirty retries.. */
+	for (i = 0; i < nr; i++)
+		mark_buffer_clean(bhs[i]);
+}
+
+#ifdef CONFIG_STRAM_SWAP
+extern int stram_device_init (void);
+#endif
+
+static void blk_writeback_timer(unsigned long data)
+{
+	wakeup_bdflush();
+	wakeup_kupdate();
+}
+
+/**
+ * end_that_request_first - end I/O on one buffer.
+ * @req:      the request being processed
+ * @uptodate: 0 for I/O error
+ * @name:     the name printed for an I/O error
+ *
+ * Description:
+ *     Ends I/O on the first buffer attached to @req, and sets it up
+ *     for the next buffer_head (if any) in the cluster.
+ *     
+ * Return:
+ *     0 - we are done with this request, call end_that_request_last()
+ *     1 - still buffers pending for this request
+ *
+ * Caveat: 
+ *     Drivers implementing their own end_request handling must call
+ *     blk_finished_io() appropriately.
+ **/
+
+int end_that_request_first (struct request *req, int uptodate, char *name)
+{
+	struct buffer_head * bh;
+	int nsect;
+
+	req->errors = 0;
+	if (!uptodate)
+		printk("end_request: I/O error, dev %s (%s), sector %lu\n",
+			kdevname(req->rq_dev), name, req->sector);
+
+	if ((bh = req->bh) != NULL) {
+		nsect = bh->b_size >> 9;
+		blk_finished_io(nsect);
+		blk_finished_sectors(req, nsect);
+		req->bh = bh->b_reqnext;
+		bh->b_reqnext = NULL;
+		bh->b_end_io(bh, uptodate);
+		if ((bh = req->bh) != NULL) {
+			req->hard_sector += nsect;
+			req->hard_nr_sectors -= nsect;
+			req->sector = req->hard_sector;
+			req->nr_sectors = req->hard_nr_sectors;
+
+			req->current_nr_sectors = bh->b_size >> 9;
+			req->hard_cur_sectors = req->current_nr_sectors;
+			if (req->nr_sectors < req->current_nr_sectors) {
+				req->nr_sectors = req->current_nr_sectors;
+				printk("end_request: buffer-list destroyed\n");
+			}
+			req->buffer = bh->b_data;
+			return 1;
+		}
+	}
+	return 0;
+}
+
+extern int laptop_mode;
+
+void end_that_request_last(struct request *req)
+{
+	struct completion *waiting = req->waiting;
+
+	/*
+	 * schedule the writeout of pending dirty data when the disk is idle
+	 */
+	if (laptop_mode && req->cmd == READ)
+		mod_timer(&writeback_timer, jiffies + 5 * HZ);
+
+	req_finished_io(req);
+	blkdev_release_request(req);
+	if (waiting)
+		complete(waiting);
+}
+
+int __init blk_dev_init(void)
+{
+	struct blk_dev_struct *dev;
+
+	request_cachep = kmem_cache_create("blkdev_requests",
+					   sizeof(struct request),
+					   0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+
+	if (!request_cachep)
+		panic("Can't create request pool slab cache\n");
+
+	for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;)
+		dev->queue = NULL;
+
+	memset(ro_bits,0,sizeof(ro_bits));
+	memset(max_readahead, 0, sizeof(max_readahead));
+	memset(max_sectors, 0, sizeof(max_sectors));
+
+	blk_max_low_pfn = max_low_pfn - 1;
+	blk_max_pfn = max_pfn - 1;
+
+	init_timer(&writeback_timer);
+	writeback_timer.function = blk_writeback_timer;
+
+#ifdef CONFIG_AMIGA_Z2RAM
+	z2_init();
+#endif
+#ifdef CONFIG_STRAM_SWAP
+	stram_device_init();
+#endif
+#ifdef CONFIG_ISP16_CDI
+	isp16_init();
+#endif
+#ifdef CONFIG_BLK_DEV_PS2
+	ps2esdi_init();
+#endif
+#ifdef CONFIG_BLK_DEV_XD
+	xd_init();
+#endif
+#ifdef CONFIG_BLK_DEV_MFM
+	mfm_init();
+#endif
+#ifdef CONFIG_PARIDE
+	{ extern void paride_init(void); paride_init(); };
+#endif
+#ifdef CONFIG_MAC_FLOPPY
+	swim3_init();
+#endif
+#ifdef CONFIG_BLK_DEV_SWIM_IOP
+	swimiop_init();
+#endif
+#ifdef CONFIG_AMIGA_FLOPPY
+	amiga_floppy_init();
+#endif
+#ifdef CONFIG_ATARI_FLOPPY
+	atari_floppy_init();
+#endif
+#ifdef CONFIG_BLK_DEV_FD
+	floppy_init();
+#else
+#if defined(__i386__) && !defined(CONFIG_XEN) /* Do we even need this? */
+	outb_p(0xc, 0x3f2);
+#endif
+#endif
+#ifdef CONFIG_CDU31A
+	cdu31a_init();
+#endif
+#ifdef CONFIG_ATARI_ACSI
+	acsi_init();
+#endif
+#ifdef CONFIG_MCD
+	mcd_init();
+#endif
+#ifdef CONFIG_MCDX
+	mcdx_init();
+#endif
+#ifdef CONFIG_SBPCD
+	sbpcd_init();
+#endif
+#ifdef CONFIG_AZTCD
+	aztcd_init();
+#endif
+#ifdef CONFIG_CDU535
+	sony535_init();
+#endif
+#ifdef CONFIG_GSCD
+	gscd_init();
+#endif
+#ifdef CONFIG_CM206
+	cm206_init();
+#endif
+#ifdef CONFIG_OPTCD
+	optcd_init();
+#endif
+#ifdef CONFIG_SJCD
+	sjcd_init();
+#endif
+#ifdef CONFIG_APBLOCK
+	ap_init();
+#endif
+#ifdef CONFIG_DDV
+	ddv_init();
+#endif
+#ifdef CONFIG_MDISK
+	mdisk_init();
+#endif
+#ifdef CONFIG_DASD
+	dasd_init();
+#endif
+#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK)
+	tapeblock_init();
+#endif
+#ifdef CONFIG_BLK_DEV_XPRAM
+        xpram_init();
+#endif
+
+#ifdef CONFIG_SUN_JSFLASH
+	jsfd_init();
+#endif
+
+#if defined(CONFIG_XEN_BLKDEV_FRONTEND)
+    xlblk_init();
+#endif
+
+	return 0;
+};
+
+EXPORT_SYMBOL(io_request_lock);
+EXPORT_SYMBOL(end_that_request_first);
+EXPORT_SYMBOL(end_that_request_last);
+EXPORT_SYMBOL(blk_grow_request_list);
+EXPORT_SYMBOL(blk_init_queue);
+EXPORT_SYMBOL(blk_get_queue);
+EXPORT_SYMBOL(blk_cleanup_queue);
+EXPORT_SYMBOL(blk_queue_headactive);
+EXPORT_SYMBOL(blk_queue_throttle_sectors);
+EXPORT_SYMBOL(blk_queue_make_request);
+EXPORT_SYMBOL(generic_make_request);
+EXPORT_SYMBOL(blkdev_release_request);
+EXPORT_SYMBOL(generic_unplug_device);
+EXPORT_SYMBOL(blk_queue_bounce_limit);
+EXPORT_SYMBOL(blk_max_low_pfn);
+EXPORT_SYMBOL(blk_max_pfn);
+EXPORT_SYMBOL(blk_seg_merge_ok);
+EXPORT_SYMBOL(blk_nohighio);
diff --git a/linux-2.4-xen-sparse/drivers/char/Makefile b/linux-2.4-xen-sparse/drivers/char/Makefile
new file mode 100644
index 0000000000..583a17f1fa
--- /dev/null
+++ b/linux-2.4-xen-sparse/drivers/char/Makefile
@@ -0,0 +1,361 @@
+#
+# Makefile for the kernel character device drivers.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now inherited from the
+# parent makes..
+#
+
+#
+# This file contains the font map for the default (hardware) font
+#
+FONTMAPFILE = cp437.uni
+
+O_TARGET := char.o
+
+obj-y	 += mem.o tty_io.o n_tty.o tty_ioctl.o raw.o pty.o misc.o random.o
+
+# All of the (potential) objects that export symbols.
+# This list comes from 'grep -l EXPORT_SYMBOL *.[hc]'.
+
+export-objs     :=	busmouse.o console.o keyboard.o sysrq.o \
+			misc.o pty.o random.o selection.o serial.o \
+			sonypi.o tty_io.o tty_ioctl.o generic_serial.o \
+			au1000_gpio.o vac-serial.o hp_psaux.o nvram.o \
+			scx200.o fetchop.o
+
+mod-subdirs	:=	joystick ftape drm drm-4.0 pcmcia
+
+list-multi	:=	
+
+KEYMAP   =defkeymap.o
+KEYBD    =pc_keyb.o
+CONSOLE  =console.o
+SERIAL   =serial.o
+
+ifeq ($(ARCH),xen)
+  ifneq ($(CONFIG_XEN_PHYSDEV_ACCESS),y)
+    KEYBD  =
+  endif
+endif
+
+ifeq ($(ARCH),s390)
+  KEYMAP   =
+  KEYBD    =
+  CONSOLE  =
+  SERIAL   =
+endif
+
+ifeq ($(ARCH),mips)
+  ifneq ($(CONFIG_PC_KEYB),y)
+    KEYBD    =
+  endif
+  ifeq ($(CONFIG_VR41XX_KIU),y)
+    KEYMAP   =
+    KEYBD    = vr41xx_keyb.o
+  endif
+endif
+
+ifeq ($(ARCH),s390x)
+  KEYMAP   =
+  KEYBD    =
+  CONSOLE  =
+  SERIAL   =
+endif
+
+ifeq ($(ARCH),m68k)
+   ifdef CONFIG_AMIGA
+      KEYBD = amikeyb.o
+   else
+      ifndef CONFIG_MAC
+	 KEYBD =
+      endif
+   endif
+   SERIAL   =
+endif
+
+ifeq ($(ARCH),parisc)
+   ifdef CONFIG_GSC_PS2
+      KEYBD   = hp_psaux.o hp_keyb.o
+   else
+      KEYBD   =
+   endif
+   ifdef CONFIG_SERIAL_MUX
+      CONSOLE += mux.o
+   endif
+   ifdef CONFIG_PDC_CONSOLE
+      CONSOLE += pdc_console.o
+   endif
+endif
+
+ifdef CONFIG_Q40
+  KEYBD += q40_keyb.o
+  SERIAL = serial.o
+endif
+
+ifdef CONFIG_APOLLO
+  KEYBD += dn_keyb.o
+endif
+
+ifeq ($(ARCH),parisc)
+   ifdef CONFIG_GSC_PS2
+      KEYBD   = hp_psaux.o hp_keyb.o
+   else
+      KEYBD   =
+   endif
+   ifdef CONFIG_PDC_CONSOLE
+      CONSOLE += pdc_console.o
+   endif
+endif
+
+ifeq ($(ARCH),arm)
+  ifneq ($(CONFIG_PC_KEYMAP),y)
+    KEYMAP   =
+  endif
+  ifneq ($(CONFIG_PC_KEYB),y)
+    KEYBD    =
+  endif
+endif
+
+ifeq ($(ARCH),sh)
+  KEYMAP   =
+  KEYBD    =
+  CONSOLE  =
+  ifeq ($(CONFIG_SH_HP600),y)
+  KEYMAP   = defkeymap.o
+  KEYBD    = scan_keyb.o hp600_keyb.o
+  CONSOLE  = console.o
+  endif
+  ifeq ($(CONFIG_SH_DMIDA),y)
+  # DMIDA does not connect the HD64465 PS/2 keyboard port
+  # but we allow for USB keyboards to be plugged in.
+  KEYMAP   = defkeymap.o
+  KEYBD    = # hd64465_keyb.o pc_keyb.o
+  CONSOLE  = console.o
+  endif
+  ifeq ($(CONFIG_SH_EC3104),y)
+  KEYMAP   = defkeymap.o
+  KEYBD    = ec3104_keyb.o
+  CONSOLE  = console.o
+  endif
+  ifeq ($(CONFIG_SH_DREAMCAST),y)
+  KEYMAP   = defkeymap.o
+  KEYBD    =
+  CONSOLE  = console.o
+  endif
+endif
+
+ifeq ($(CONFIG_DECSTATION),y)
+  KEYMAP   =
+  KEYBD    =
+endif
+
+ifeq ($(CONFIG_BAGET_MIPS),y)
+  KEYBD    =
+  SERIAL   = vac-serial.o
+endif
+
+ifeq ($(CONFIG_NINO),y)
+  SERIAL   =
+endif
+
+ifneq ($(CONFIG_SUN_SERIAL),)
+  SERIAL   =
+endif
+
+ifeq ($(CONFIG_QTRONIX_KEYBOARD),y)
+  KEYBD    = qtronix.o
+  KEYMAP   = qtronixmap.o
+endif
+
+ifeq ($(CONFIG_DUMMY_KEYB),y)
+  KEYBD = dummy_keyb.o
+endif
+
+obj-$(CONFIG_VT) += vt.o vc_screen.o consolemap.o consolemap_deftbl.o $(CONSOLE) selection.o
+obj-$(CONFIG_SERIAL) += $(SERIAL)
+obj-$(CONFIG_PARPORT_SERIAL) += parport_serial.o
+obj-$(CONFIG_SERIAL_HCDP) += hcdp_serial.o
+obj-$(CONFIG_SERIAL_21285) += serial_21285.o
+obj-$(CONFIG_SERIAL_SA1100) += serial_sa1100.o
+obj-$(CONFIG_SERIAL_AMBA) += serial_amba.o
+obj-$(CONFIG_TS_AU1X00_ADS7846) += au1000_ts.o
+obj-$(CONFIG_SERIAL_DEC) += decserial.o
+
+ifndef CONFIG_SUN_KEYBOARD
+  obj-$(CONFIG_VT) += keyboard.o $(KEYMAP) $(KEYBD)
+else
+  obj-$(CONFIG_PCI) += keyboard.o $(KEYMAP)
+endif
+
+obj-$(CONFIG_HIL) += hp_keyb.o
+obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o
+obj-$(CONFIG_ATARI_DSP56K) += dsp56k.o
+obj-$(CONFIG_ROCKETPORT) += rocket.o
+obj-$(CONFIG_MOXA_SMARTIO) += mxser.o
+obj-$(CONFIG_MOXA_INTELLIO) += moxa.o
+obj-$(CONFIG_DIGI) += pcxx.o
+obj-$(CONFIG_DIGIEPCA) += epca.o
+obj-$(CONFIG_CYCLADES) += cyclades.o
+obj-$(CONFIG_STALLION) += stallion.o
+obj-$(CONFIG_ISTALLION) += istallion.o
+obj-$(CONFIG_SIBYTE_SB1250_DUART) += sb1250_duart.o
+obj-$(CONFIG_COMPUTONE) += ip2.o ip2main.o
+obj-$(CONFIG_RISCOM8) += riscom8.o
+obj-$(CONFIG_ISI) += isicom.o
+obj-$(CONFIG_ESPSERIAL) += esp.o
+obj-$(CONFIG_SYNCLINK) += synclink.o
+obj-$(CONFIG_SYNCLINKMP) += synclinkmp.o
+obj-$(CONFIG_N_HDLC) += n_hdlc.o
+obj-$(CONFIG_SPECIALIX) += specialix.o
+obj-$(CONFIG_AMIGA_BUILTIN_SERIAL) += amiserial.o
+obj-$(CONFIG_A2232) += ser_a2232.o generic_serial.o
+obj-$(CONFIG_SX) += sx.o generic_serial.o
+obj-$(CONFIG_RIO) += rio/rio.o generic_serial.o
+obj-$(CONFIG_SH_SCI) += sh-sci.o generic_serial.o
+obj-$(CONFIG_SERIAL167) += serial167.o
+obj-$(CONFIG_MVME147_SCC) += generic_serial.o vme_scc.o
+obj-$(CONFIG_MVME162_SCC) += generic_serial.o vme_scc.o
+obj-$(CONFIG_BVME6000_SCC) += generic_serial.o vme_scc.o
+obj-$(CONFIG_HVC_CONSOLE) += hvc_console.o
+obj-$(CONFIG_SERIAL_TX3912) += generic_serial.o serial_tx3912.o
+obj-$(CONFIG_TXX927_SERIAL) += serial_txx927.o
+obj-$(CONFIG_SERIAL_TXX9) += generic_serial.o serial_txx9.o
+obj-$(CONFIG_IP22_SERIAL) += sgiserial.o
+obj-$(CONFIG_AU1X00_UART) += au1x00-serial.o
+obj-$(CONFIG_SGI_L1_SERIAL) += sn_serial.o
+
+subdir-$(CONFIG_RIO) += rio
+subdir-$(CONFIG_INPUT) += joystick
+
+obj-$(CONFIG_ATIXL_BUSMOUSE) += atixlmouse.o
+obj-$(CONFIG_LOGIBUSMOUSE) += logibusmouse.o
+obj-$(CONFIG_PRINTER) += lp.o
+obj-$(CONFIG_TIPAR) += tipar.o
+obj-$(CONFIG_OBMOUSE) += obmouse.o
+
+ifeq ($(CONFIG_INPUT),y)
+obj-y += joystick/js.o
+endif
+
+obj-$(CONFIG_FETCHOP) += fetchop.o
+obj-$(CONFIG_BUSMOUSE) += busmouse.o
+obj-$(CONFIG_DTLK) += dtlk.o
+obj-$(CONFIG_R3964) += n_r3964.o
+obj-$(CONFIG_APPLICOM) += applicom.o
+obj-$(CONFIG_SONYPI) += sonypi.o
+obj-$(CONFIG_MS_BUSMOUSE) += msbusmouse.o
+obj-$(CONFIG_82C710_MOUSE) += qpmouse.o
+obj-$(CONFIG_AMIGAMOUSE) += amigamouse.o
+obj-$(CONFIG_ATARIMOUSE) += atarimouse.o
+obj-$(CONFIG_ADBMOUSE) += adbmouse.o
+obj-$(CONFIG_PC110_PAD) += pc110pad.o
+obj-$(CONFIG_MK712_MOUSE) += mk712.o
+obj-$(CONFIG_RTC) += rtc.o
+obj-$(CONFIG_GEN_RTC) += genrtc.o
+obj-$(CONFIG_EFI_RTC) += efirtc.o
+obj-$(CONFIG_SGI_DS1286) += ds1286.o
+obj-$(CONFIG_MIPS_RTC) += mips_rtc.o
+obj-$(CONFIG_SGI_IP27_RTC) += ip27-rtc.o
+ifeq ($(CONFIG_PPC),)
+  obj-$(CONFIG_NVRAM) += nvram.o
+endif
+obj-$(CONFIG_TOSHIBA) += toshiba.o
+obj-$(CONFIG_I8K) += i8k.o
+obj-$(CONFIG_DS1620) += ds1620.o
+obj-$(CONFIG_DS1742) += ds1742.o
+obj-$(CONFIG_INTEL_RNG) += i810_rng.o
+obj-$(CONFIG_AMD_RNG) += amd768_rng.o
+obj-$(CONFIG_HW_RANDOM) += hw_random.o
+obj-$(CONFIG_AMD_PM768) += amd76x_pm.o
+obj-$(CONFIG_BRIQ_PANEL) += briq_panel.o
+
+obj-$(CONFIG_ITE_GPIO) += ite_gpio.o
+obj-$(CONFIG_AU1X00_GPIO) += au1000_gpio.o
+obj-$(CONFIG_AU1X00_USB_TTY) += au1000_usbtty.o
+obj-$(CONFIG_AU1X00_USB_RAW) += au1000_usbraw.o
+obj-$(CONFIG_COBALT_LCD) += lcd.o
+
+obj-$(CONFIG_QIC02_TAPE) += tpqic02.o
+
+subdir-$(CONFIG_FTAPE) += ftape
+subdir-$(CONFIG_DRM_OLD) += drm-4.0
+subdir-$(CONFIG_DRM_NEW) += drm
+subdir-$(CONFIG_PCMCIA) += pcmcia
+subdir-$(CONFIG_AGP) += agp
+
+ifeq ($(CONFIG_FTAPE),y)
+obj-y       += ftape/ftape.o
+endif
+
+obj-$(CONFIG_H8) += h8.o
+obj-$(CONFIG_PPDEV) += ppdev.o
+obj-$(CONFIG_DZ) += dz.o
+obj-$(CONFIG_NWBUTTON) += nwbutton.o
+obj-$(CONFIG_NWFLASH) += nwflash.o
+obj-$(CONFIG_SCx200) += scx200.o
+obj-$(CONFIG_SCx200_GPIO) += scx200_gpio.o
+
+# Only one watchdog can succeed. We probe the hardware watchdog
+# drivers first, then the softdog driver.  This means if your hardware
+# watchdog dies or is 'borrowed' for some reason the software watchdog
+# still gives you some cover.
+
+obj-$(CONFIG_PCWATCHDOG) += pcwd.o
+obj-$(CONFIG_ACQUIRE_WDT) += acquirewdt.o
+obj-$(CONFIG_ADVANTECH_WDT) += advantechwdt.o
+obj-$(CONFIG_IB700_WDT) += ib700wdt.o
+obj-$(CONFIG_MIXCOMWD) += mixcomwd.o
+obj-$(CONFIG_60XX_WDT) += sbc60xxwdt.o
+obj-$(CONFIG_W83877F_WDT) += w83877f_wdt.o
+obj-$(CONFIG_SC520_WDT) += sc520_wdt.o
+obj-$(CONFIG_WDT) += wdt.o
+obj-$(CONFIG_WDTPCI) += wdt_pci.o
+obj-$(CONFIG_21285_WATCHDOG) += wdt285.o
+obj-$(CONFIG_977_WATCHDOG) += wdt977.o
+obj-$(CONFIG_I810_TCO) += i810-tco.o
+obj-$(CONFIG_MACHZ_WDT) += machzwd.o
+obj-$(CONFIG_SH_WDT) += shwdt.o
+obj-$(CONFIG_EUROTECH_WDT) += eurotechwdt.o
+obj-$(CONFIG_ALIM7101_WDT) += alim7101_wdt.o
+obj-$(CONFIG_ALIM1535_WDT) += alim1535d_wdt.o
+obj-$(CONFIG_INDYDOG) += indydog.o
+obj-$(CONFIG_SC1200_WDT) += sc1200wdt.o
+obj-$(CONFIG_SCx200_WDT) += scx200_wdt.o
+obj-$(CONFIG_WAFER_WDT) += wafer5823wdt.o
+obj-$(CONFIG_SOFT_WATCHDOG) += softdog.o
+obj-$(CONFIG_INDYDOG) += indydog.o
+obj-$(CONFIG_8xx_WDT) += mpc8xx_wdt.o
+
+subdir-$(CONFIG_MWAVE) += mwave
+ifeq ($(CONFIG_MWAVE),y)
+  obj-y += mwave/mwave.o
+endif
+
+subdir-$(CONFIG_IPMI_HANDLER) += ipmi
+ifeq ($(CONFIG_IPMI_HANDLER),y)
+  obj-y += ipmi/ipmi.o
+endif
+
+include $(TOPDIR)/Rules.make
+
+fastdep:
+
+conmakehash: conmakehash.c
+	$(HOSTCC) $(HOSTCFLAGS) -o conmakehash conmakehash.c
+
+consolemap_deftbl.c: $(FONTMAPFILE) conmakehash
+	./conmakehash $(FONTMAPFILE) > consolemap_deftbl.c
+
+consolemap_deftbl.o: consolemap_deftbl.c $(TOPDIR)/include/linux/types.h
+
+.DELETE_ON_ERROR:
+
+defkeymap.c: defkeymap.map
+	set -e ; loadkeys --mktable $< | sed -e 's/^static *//' > $@
+
+qtronixmap.c: qtronixmap.map
+	set -e ; loadkeys --mktable $< | sed -e 's/^static *//' > $@
diff --git a/linux-2.4-xen-sparse/drivers/char/mem.c b/linux-2.4-xen-sparse/drivers/char/mem.c
new file mode 100644
index 0000000000..f9ef0f54e9
--- /dev/null
+++ b/linux-2.4-xen-sparse/drivers/char/mem.c
@@ -0,0 +1,812 @@
+/*
+ *  linux/drivers/char/mem.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Added devfs support. 
+ *    Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
+ *  Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
+ *
+ *  MODIFIED FOR XEN by Keir Fraser, 10th July 2003.
+ *  Linux running on Xen has strange semantics for /dev/mem and /dev/kmem!!
+ *   1. mmap will not work on /dev/kmem
+ *   2. mmap on /dev/mem interprets the 'file offset' as a machine address
+ *      rather than a physical address.
+ *  I don't believe anyone sane mmaps /dev/kmem, but /dev/mem is mmapped
+ *  to get at memory-mapped I/O spaces (eg. the VESA X server does this).
+ *  For this to work at all we need to expect machine addresses.
+ *  Reading/writing of /dev/kmem expects kernel virtual addresses, as usual.
+ *  Reading/writing of /dev/mem expects 'physical addresses' as usual -- this
+ *  is because /dev/mem can only read/write existing kernel mappings, which
+ *  will be normal RAM, and we should present pseudo-physical layout for all
+ *  except I/O (which is the sticky case that mmap is hacked to deal with).
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+#include <linux/tpqic02.h>
+#include <linux/ftape.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mman.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <linux/raw.h>
+#include <linux/tty.h>
+#include <linux/capability.h>
+#include <linux/ptrace.h>
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/pgalloc.h>
+
+#ifdef CONFIG_I2C
+extern int i2c_init_all(void);
+#endif
+#ifdef CONFIG_FB
+extern void fbmem_init(void);
+#endif
+#ifdef CONFIG_PROM_CONSOLE
+extern void prom_con_init(void);
+#endif
+#ifdef CONFIG_MDA_CONSOLE
+extern void mda_console_init(void);
+#endif
+#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_CHAR)
+extern void tapechar_init(void);
+#endif
+     
+static ssize_t do_write_mem(struct file * file, void *p, unsigned long realp,
+			    const char * buf, size_t count, loff_t *ppos)
+{
+	ssize_t written;
+
+	written = 0;
+#if defined(__sparc__) || defined(__mc68000__)
+	/* we don't have page 0 mapped on sparc and m68k.. */
+	if (realp < PAGE_SIZE) {
+		unsigned long sz = PAGE_SIZE-realp;
+		if (sz > count) sz = count; 
+		/* Hmm. Do something? */
+		buf+=sz;
+		p+=sz;
+		count-=sz;
+		written+=sz;
+	}
+#endif
+	if (copy_from_user(p, buf, count))
+		return -EFAULT;
+	written += count;
+	*ppos = realp + written;
+	return written;
+}
+
+
+/*
+ * This funcion reads the *physical* memory. The f_pos points directly to the 
+ * memory location. 
+ */
+static ssize_t read_mem(struct file * file, char * buf,
+			size_t count, loff_t *ppos)
+{
+	unsigned long p = *ppos;
+	unsigned long end_mem;
+	ssize_t read;
+	
+	end_mem = __pa(high_memory);
+	if (p >= end_mem)
+		return 0;
+	if (count > end_mem - p)
+		count = end_mem - p;
+	read = 0;
+#if defined(__sparc__) || defined(__mc68000__)
+	/* we don't have page 0 mapped on sparc and m68k.. */
+	if (p < PAGE_SIZE) {
+		unsigned long sz = PAGE_SIZE-p;
+		if (sz > count) 
+			sz = count; 
+		if (sz > 0) {
+			if (clear_user(buf, sz))
+				return -EFAULT;
+			buf += sz; 
+			p += sz; 
+			count -= sz; 
+			read += sz; 
+		}
+	}
+#endif
+	if (copy_to_user(buf, __va(p), count))
+		return -EFAULT;
+	read += count;
+	*ppos = p + read;
+	return read;
+}
+
+static ssize_t write_mem(struct file * file, const char * buf, 
+			 size_t count, loff_t *ppos)
+{
+	unsigned long p = *ppos;
+	unsigned long end_mem;
+
+	end_mem = __pa(high_memory);
+	if (p >= end_mem)
+		return 0;
+	if (count > end_mem - p)
+		count = end_mem - p;
+	return do_write_mem(file, __va(p), p, buf, count, ppos);
+}
+
+#ifndef pgprot_noncached
+
+/*
+ * This should probably be per-architecture in <asm/pgtable.h>
+ */
+static inline pgprot_t pgprot_noncached(pgprot_t _prot)
+{
+	unsigned long prot = pgprot_val(_prot);
+
+#if defined(__i386__) || defined(__x86_64__)
+	/* On PPro and successors, PCD alone doesn't always mean 
+	    uncached because of interactions with the MTRRs. PCD | PWT
+	    means definitely uncached. */ 
+	if (boot_cpu_data.x86 > 3)
+		prot |= _PAGE_PCD | _PAGE_PWT;
+#elif defined(__powerpc__)
+	prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
+#elif defined(__mc68000__)
+#ifdef SUN3_PAGE_NOCACHE
+	if (MMU_IS_SUN3)
+		prot |= SUN3_PAGE_NOCACHE;
+	else
+#endif
+	if (MMU_IS_851 || MMU_IS_030)
+		prot |= _PAGE_NOCACHE030;
+	/* Use no-cache mode, serialized */
+	else if (MMU_IS_040 || MMU_IS_060)
+		prot = (prot & _CACHEMASK040) | _PAGE_NOCACHE_S;
+#endif
+
+	return __pgprot(prot);
+}
+
+#endif /* !pgprot_noncached */
+
+/*
+ * Architectures vary in how they handle caching for addresses 
+ * outside of main memory.
+ */
+static inline int noncached_address(unsigned long addr)
+{
+#if defined(__i386__)
+	/* 
+	 * On the PPro and successors, the MTRRs are used to set
+	 * memory types for physical addresses outside main memory, 
+	 * so blindly setting PCD or PWT on those pages is wrong.
+	 * For Pentiums and earlier, the surround logic should disable 
+	 * caching for the high addresses through the KEN pin, but
+	 * we maintain the tradition of paranoia in this code.
+	 */
+ 	return !( test_bit(X86_FEATURE_MTRR, &boot_cpu_data.x86_capability) ||
+		  test_bit(X86_FEATURE_K6_MTRR, &boot_cpu_data.x86_capability) ||
+		  test_bit(X86_FEATURE_CYRIX_ARR, &boot_cpu_data.x86_capability) ||
+		  test_bit(X86_FEATURE_CENTAUR_MCR, &boot_cpu_data.x86_capability) )
+	  && addr >= __pa(high_memory);
+#else
+	return addr >= __pa(high_memory);
+#endif
+}
+
+#if !defined(CONFIG_XEN)
+static int mmap_mem(struct file * file, struct vm_area_struct * vma)
+{
+	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+
+	/*
+	 * Accessing memory above the top the kernel knows about or
+	 * through a file pointer that was marked O_SYNC will be
+	 * done non-cached.
+	 */
+	if (noncached_address(offset) || (file->f_flags & O_SYNC))
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	/* Don't try to swap out physical pages.. */
+	vma->vm_flags |= VM_RESERVED;
+
+	/*
+	 * Don't dump addresses that are not real memory to a core file.
+	 */
+	if (offset >= __pa(high_memory) || (file->f_flags & O_SYNC))
+		vma->vm_flags |= VM_IO;
+
+	if (remap_page_range(vma->vm_start, offset, vma->vm_end-vma->vm_start,
+			     vma->vm_page_prot))
+		return -EAGAIN;
+	return 0;
+}
+#elif !defined(CONFIG_XEN_PRIVILEGED_GUEST)
+static int mmap_mem(struct file * file, struct vm_area_struct * vma)
+{
+	return -ENXIO;
+}
+#else
+static int mmap_mem(struct file * file, struct vm_area_struct * vma)
+{
+	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+
+	if (!(xen_start_info.flags & SIF_PRIVILEGED))
+		return -ENXIO;
+
+	/* DONTCOPY is essential for Xen as copy_page_range is broken. */
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	if (direct_remap_area_pages(vma->vm_mm, vma->vm_start, offset, 
+				vma->vm_end-vma->vm_start, vma->vm_page_prot,
+				DOMID_IO))
+		return -EAGAIN;
+	return 0;
+}
+#endif /* CONFIG_XEN */
+
+/*
+ * This function reads the *virtual* memory as seen by the kernel.
+ */
+static ssize_t read_kmem(struct file *file, char *buf, 
+			 size_t count, loff_t *ppos)
+{
+	unsigned long p = *ppos;
+	ssize_t read = 0;
+	ssize_t virtr = 0;
+	char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
+		
+	if (p < (unsigned long) high_memory) {
+		read = count;
+		if (count > (unsigned long) high_memory - p)
+			read = (unsigned long) high_memory - p;
+
+#if defined(__sparc__) || defined(__mc68000__)
+		/* we don't have page 0 mapped on sparc and m68k.. */
+		if (p < PAGE_SIZE && read > 0) {
+			size_t tmp = PAGE_SIZE - p;
+			if (tmp > read) tmp = read;
+			if (clear_user(buf, tmp))
+				return -EFAULT;
+			buf += tmp;
+			p += tmp;
+			read -= tmp;
+			count -= tmp;
+		}
+#endif
+		if (copy_to_user(buf, (char *)p, read))
+			return -EFAULT;
+		p += read;
+		buf += read;
+		count -= read;
+	}
+
+	if (count > 0) {
+		kbuf = (char *)__get_free_page(GFP_KERNEL);
+		if (!kbuf)
+			return -ENOMEM;
+		while (count > 0) {
+			int len = count;
+
+			if (len > PAGE_SIZE)
+				len = PAGE_SIZE;
+			len = vread(kbuf, (char *)p, len);
+			if (!len)
+				break;
+			if (copy_to_user(buf, kbuf, len)) {
+				free_page((unsigned long)kbuf);
+				return -EFAULT;
+			}
+			count -= len;
+			buf += len;
+			virtr += len;
+			p += len;
+		}
+		free_page((unsigned long)kbuf);
+	}
+ 	*ppos = p;
+ 	return virtr + read;
+}
+
+extern long vwrite(char *buf, char *addr, unsigned long count);
+
+/*
+ * This function writes to the *virtual* memory as seen by the kernel.
+ */
+static ssize_t write_kmem(struct file * file, const char * buf, 
+			  size_t count, loff_t *ppos)
+{
+	unsigned long p = *ppos;
+	ssize_t wrote = 0;
+	ssize_t virtr = 0;
+	char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
+
+	if (p < (unsigned long) high_memory) {
+		wrote = count;
+		if (count > (unsigned long) high_memory - p)
+			wrote = (unsigned long) high_memory - p;
+
+		wrote = do_write_mem(file, (void*)p, p, buf, wrote, ppos);
+
+		p += wrote;
+		buf += wrote;
+		count -= wrote;
+	}
+
+	if (count > 0) {
+		kbuf = (char *)__get_free_page(GFP_KERNEL);
+		if (!kbuf)
+			return -ENOMEM;
+		while (count > 0) {
+			int len = count;
+
+			if (len > PAGE_SIZE)
+				len = PAGE_SIZE;
+			if (len && copy_from_user(kbuf, buf, len)) {
+				free_page((unsigned long)kbuf);
+				return -EFAULT;
+			}
+			len = vwrite(kbuf, (char *)p, len);
+			count -= len;
+			buf += len;
+			virtr += len;
+			p += len;
+		}
+		free_page((unsigned long)kbuf);
+	}
+
+ 	*ppos = p;
+ 	return virtr + wrote;
+}
+
+#if defined(CONFIG_ISA) || !defined(__mc68000__)
+static ssize_t read_port(struct file * file, char * buf,
+			 size_t count, loff_t *ppos)
+{
+	unsigned long i = *ppos;
+	char *tmp = buf;
+
+	if (verify_area(VERIFY_WRITE,buf,count))
+		return -EFAULT; 
+	while (count-- > 0 && i < 65536) {
+		if (__put_user(inb(i),tmp) < 0) 
+			return -EFAULT;  
+		i++;
+		tmp++;
+	}
+	*ppos = i;
+	return tmp-buf;
+}
+
+static ssize_t write_port(struct file * file, const char * buf,
+			  size_t count, loff_t *ppos)
+{
+	unsigned long i = *ppos;
+	const char * tmp = buf;
+
+	if (verify_area(VERIFY_READ,buf,count))
+		return -EFAULT;
+	while (count-- > 0 && i < 65536) {
+		char c;
+		if (__get_user(c, tmp)) 
+			return -EFAULT; 
+		outb(c,i);
+		i++;
+		tmp++;
+	}
+	*ppos = i;
+	return tmp-buf;
+}
+#endif
+
+static ssize_t read_null(struct file * file, char * buf,
+			 size_t count, loff_t *ppos)
+{
+	return 0;
+}
+
+static ssize_t write_null(struct file * file, const char * buf,
+			  size_t count, loff_t *ppos)
+{
+	return count;
+}
+
+/*
+ * For fun, we are using the MMU for this.
+ */
+static inline size_t read_zero_pagealigned(char * buf, size_t size)
+{
+	struct mm_struct *mm;
+	struct vm_area_struct * vma;
+	unsigned long addr=(unsigned long)buf;
+
+	mm = current->mm;
+	/* Oops, this was forgotten before. -ben */
+	down_read(&mm->mmap_sem);
+
+	/* For private mappings, just map in zero pages. */
+	for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
+		unsigned long count;
+
+		if (vma->vm_start > addr || (vma->vm_flags & VM_WRITE) == 0)
+			goto out_up;
+		if (vma->vm_flags & VM_SHARED)
+			break;
+		count = vma->vm_end - addr;
+		if (count > size)
+			count = size;
+
+		zap_page_range(mm, addr, count);
+        	zeromap_page_range(addr, count, PAGE_COPY);
+
+		size -= count;
+		buf += count;
+		addr += count;
+		if (size == 0)
+			goto out_up;
+	}
+
+	up_read(&mm->mmap_sem);
+	
+	/* The shared case is hard. Let's do the conventional zeroing. */ 
+	do {
+		unsigned long unwritten = clear_user(buf, PAGE_SIZE);
+		if (unwritten)
+			return size + unwritten - PAGE_SIZE;
+		if (current->need_resched)
+			schedule();
+		buf += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	} while (size);
+
+	return size;
+out_up:
+	up_read(&mm->mmap_sem);
+	return size;
+}
+
+static ssize_t read_zero(struct file * file, char * buf, 
+			 size_t count, loff_t *ppos)
+{
+	unsigned long left, unwritten, written = 0;
+
+	if (!count)
+		return 0;
+
+	if (!access_ok(VERIFY_WRITE, buf, count))
+		return -EFAULT;
+
+	left = count;
+
+	/* do we want to be clever? Arbitrary cut-off */
+	if (count >= PAGE_SIZE*4) {
+		unsigned long partial;
+
+		/* How much left of the page? */
+		partial = (PAGE_SIZE-1) & -(unsigned long) buf;
+		unwritten = clear_user(buf, partial);
+		written = partial - unwritten;
+		if (unwritten)
+			goto out;
+		left -= partial;
+		buf += partial;
+		unwritten = read_zero_pagealigned(buf, left & PAGE_MASK);
+		written += (left & PAGE_MASK) - unwritten;
+		if (unwritten)
+			goto out;
+		buf += left & PAGE_MASK;
+		left &= ~PAGE_MASK;
+	}
+	unwritten = clear_user(buf, left);
+	written += left - unwritten;
+out:
+	return written ? written : -EFAULT;
+}
+
+static int mmap_zero(struct file * file, struct vm_area_struct * vma)
+{
+	if (vma->vm_flags & VM_SHARED)
+		return shmem_zero_setup(vma);
+	if (zeromap_page_range(vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot))
+		return -EAGAIN;
+	return 0;
+}
+
+static ssize_t write_full(struct file * file, const char * buf,
+			  size_t count, loff_t *ppos)
+{
+	return -ENOSPC;
+}
+
+/*
+ * Special lseek() function for /dev/null and /dev/zero.  Most notably, you
+ * can fopen() both devices with "a" now.  This was previously impossible.
+ * -- SRB.
+ */
+
+static loff_t null_lseek(struct file * file, loff_t offset, int orig)
+{
+	return file->f_pos = 0;
+}
+
+/*
+ * The memory devices use the full 32/64 bits of the offset, and so we cannot
+ * check against negative addresses: they are ok. The return value is weird,
+ * though, in that case (0).
+ *
+ * also note that seeking relative to the "end of file" isn't supported:
+ * it has no meaning, so it returns -EINVAL.
+ */
+static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
+{
+	loff_t ret;
+
+	switch (orig) {
+		case 0:
+			file->f_pos = offset;
+			ret = file->f_pos;
+			force_successful_syscall_return();
+			break;
+		case 1:
+			file->f_pos += offset;
+			ret = file->f_pos;
+			force_successful_syscall_return();
+			break;
+		default:
+			ret = -EINVAL;
+	}
+	return ret;
+}
+
+static int open_port(struct inode * inode, struct file * filp)
+{
+	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
+}
+
+struct page *kmem_vm_nopage(struct vm_area_struct *vma, unsigned long address, int write)
+{
+	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+	unsigned long kaddr;
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *ptep, pte;
+	struct page *page = NULL;
+
+	/* address is user VA; convert to kernel VA of desired page */
+	kaddr = (address - vma->vm_start) + offset;
+	kaddr = VMALLOC_VMADDR(kaddr);
+
+	spin_lock(&init_mm.page_table_lock);
+
+	/* Lookup page structure for kernel VA */
+	pgd = pgd_offset(&init_mm, kaddr);
+	if (pgd_none(*pgd) || pgd_bad(*pgd))
+		goto out;
+	pmd = pmd_offset(pgd, kaddr);
+	if (pmd_none(*pmd) || pmd_bad(*pmd))
+		goto out;
+	ptep = pte_offset(pmd, kaddr);
+	if (!ptep)
+		goto out;
+	pte = *ptep;
+	if (!pte_present(pte))
+		goto out;
+	if (write && !pte_write(pte))
+		goto out;
+	page = pte_page(pte);
+	if (!VALID_PAGE(page)) {
+		page = NULL;
+		goto out;
+	}
+
+	/* Increment reference count on page */
+	get_page(page);
+
+out:
+	spin_unlock(&init_mm.page_table_lock);
+
+	return page;
+}
+
+struct vm_operations_struct kmem_vm_ops = {
+	nopage:		kmem_vm_nopage,
+};
+
+static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
+{
+	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+	unsigned long size = vma->vm_end - vma->vm_start;
+
+	/*
+	 * If the user is not attempting to mmap a high memory address then
+	 * the standard mmap_mem mechanism will work.  High memory addresses
+	 * need special handling, as remap_page_range expects a physically-
+	 * contiguous range of kernel addresses (such as obtained in kmalloc).
+	 */
+	if ((offset + size) < (unsigned long) high_memory)
+		return mmap_mem(file, vma);
+
+	/*
+	 * Accessing memory above the top the kernel knows about or
+	 * through a file pointer that was marked O_SYNC will be
+	 * done non-cached.
+	 */
+	if (noncached_address(offset) || (file->f_flags & O_SYNC))
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	/* Don't do anything here; "nopage" will fill the holes */
+	vma->vm_ops = &kmem_vm_ops;
+
+	/* Don't try to swap out physical pages.. */
+	vma->vm_flags |= VM_RESERVED;
+
+	/*
+	 * Don't dump addresses that are not real memory to a core file.
+	 */
+	vma->vm_flags |= VM_IO;
+
+	return 0;
+}
+
+#define zero_lseek	null_lseek
+#define full_lseek      null_lseek
+#define write_zero	write_null
+#define read_full       read_zero
+#define open_mem	open_port
+#define open_kmem	open_mem
+
+static struct file_operations mem_fops = {
+	llseek:		memory_lseek,
+	read:		read_mem,
+	write:		write_mem,
+	mmap:		mmap_mem,
+	open:		open_mem,
+};
+
+static struct file_operations kmem_fops = {
+	llseek:		memory_lseek,
+	read:		read_kmem,
+	write:		write_kmem,
+#if !defined(CONFIG_XEN)
+	mmap:		mmap_kmem,
+#endif
+	open:		open_kmem,
+};
+
+static struct file_operations null_fops = {
+	llseek:		null_lseek,
+	read:		read_null,
+	write:		write_null,
+};
+
+#if defined(CONFIG_ISA) || !defined(__mc68000__)
+static struct file_operations port_fops = {
+	llseek:		memory_lseek,
+	read:		read_port,
+	write:		write_port,
+	open:		open_port,
+};
+#endif
+
+static struct file_operations zero_fops = {
+	llseek:		zero_lseek,
+	read:		read_zero,
+	write:		write_zero,
+	mmap:		mmap_zero,
+};
+
+static struct file_operations full_fops = {
+	llseek:		full_lseek,
+	read:		read_full,
+	write:		write_full,
+};
+
+static int memory_open(struct inode * inode, struct file * filp)
+{
+	switch (MINOR(inode->i_rdev)) {
+		case 1:
+			filp->f_op = &mem_fops;
+			break;
+		case 2:
+			filp->f_op = &kmem_fops;
+			break;
+		case 3:
+			filp->f_op = &null_fops;
+			break;
+#if defined(CONFIG_ISA) || !defined(__mc68000__)
+		case 4:
+			filp->f_op = &port_fops;
+			break;
+#endif
+		case 5:
+			filp->f_op = &zero_fops;
+			break;
+		case 7:
+			filp->f_op = &full_fops;
+			break;
+		case 8:
+			filp->f_op = &random_fops;
+			break;
+		case 9:
+			filp->f_op = &urandom_fops;
+			break;
+		default:
+			return -ENXIO;
+	}
+	if (filp->f_op && filp->f_op->open)
+		return filp->f_op->open(inode,filp);
+	return 0;
+}
+
+void __init memory_devfs_register (void)
+{
+    /*  These are never unregistered  */
+    static const struct {
+	unsigned short minor;
+	char *name;
+	umode_t mode;
+	struct file_operations *fops;
+    } list[] = { /* list of minor devices */
+	{1, "mem",     S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops},
+	{2, "kmem",    S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops},
+	{3, "null",    S_IRUGO | S_IWUGO,           &null_fops},
+#if defined(CONFIG_ISA) || !defined(__mc68000__)
+	{4, "port",    S_IRUSR | S_IWUSR | S_IRGRP, &port_fops},
+#endif
+	{5, "zero",    S_IRUGO | S_IWUGO,           &zero_fops},
+	{7, "full",    S_IRUGO | S_IWUGO,           &full_fops},
+	{8, "random",  S_IRUGO | S_IWUSR,           &random_fops},
+	{9, "urandom", S_IRUGO | S_IWUSR,           &urandom_fops}
+    };
+    int i;
+
+    for (i=0; i<(sizeof(list)/sizeof(*list)); i++)
+	devfs_register (NULL, list[i].name, DEVFS_FL_NONE,
+			MEM_MAJOR, list[i].minor,
+			list[i].mode | S_IFCHR,
+			list[i].fops, NULL);
+}
+
+static struct file_operations memory_fops = {
+	open:		memory_open,	/* just a selector for the real open */
+};
+
+int __init chr_dev_init(void)
+{
+	if (devfs_register_chrdev(MEM_MAJOR,"mem",&memory_fops))
+		printk("unable to get major %d for memory devs\n", MEM_MAJOR);
+	memory_devfs_register();
+	rand_initialize();
+#ifdef CONFIG_I2C
+	i2c_init_all();
+#endif
+#if defined (CONFIG_FB)
+	fbmem_init();
+#endif
+#if defined (CONFIG_PROM_CONSOLE)
+	prom_con_init();
+#endif
+#if defined (CONFIG_MDA_CONSOLE)
+	mda_console_init();
+#endif
+	tty_init();
+#ifdef CONFIG_M68K_PRINTER
+	lp_m68k_init();
+#endif
+	misc_init();
+#if CONFIG_QIC02_TAPE
+	qic02_tape_init();
+#endif
+#ifdef CONFIG_FTAPE
+	ftape_init();
+#endif
+#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_CHAR)
+	tapechar_init();
+#endif
+	return 0;
+}
+
+__initcall(chr_dev_init);
diff --git a/linux-2.4-xen-sparse/drivers/char/tty_io.c b/linux-2.4-xen-sparse/drivers/char/tty_io.c
new file mode 100644
index 0000000000..c08218e8cc
--- /dev/null
+++ b/linux-2.4-xen-sparse/drivers/char/tty_io.c
@@ -0,0 +1,2891 @@
+/*
+ *  linux/drivers/char/tty_io.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ * 'tty_io.c' gives an orthogonal feeling to tty's, be they consoles
+ * or rs-channels. It also implements echoing, cooked mode etc.
+ *
+ * Kill-line thanks to John T Kohl, who also corrected VMIN = VTIME = 0.
+ *
+ * Modified by Theodore Ts'o, 9/14/92, to dynamically allocate the
+ * tty_struct and tty_queue structures.  Previously there was an array
+ * of 256 tty_struct's which was statically allocated, and the
+ * tty_queue structures were allocated at boot time.  Both are now
+ * dynamically allocated only when the tty is open.
+ *
+ * Also restructured routines so that there is more of a separation
+ * between the high-level tty routines (tty_io.c and tty_ioctl.c) and
+ * the low-level tty routines (serial.c, pty.c, console.c).  This
+ * makes for cleaner and more compact code.  -TYT, 9/17/92 
+ *
+ * Modified by Fred N. van Kempen, 01/29/93, to add line disciplines
+ * which can be dynamically activated and de-activated by the line
+ * discipline handling modules (like SLIP).
+ *
+ * NOTE: pay no attention to the line discipline code (yet); its
+ * interface is still subject to change in this version...
+ * -- TYT, 1/31/92
+ *
+ * Added functionality to the OPOST tty handling.  No delays, but all
+ * other bits should be there.
+ *	-- Nick Holloway <alfie@dcs.warwick.ac.uk>, 27th May 1993.
+ *
+ * Rewrote canonical mode and added more termios flags.
+ * 	-- julian@uhunix.uhcc.hawaii.edu (J. Cowley), 13Jan94
+ *
+ * Reorganized FASYNC support so mouse code can share it.
+ *	-- ctm@ardi.com, 9Sep95
+ *
+ * New TIOCLINUX variants added.
+ *	-- mj@k332.feld.cvut.cz, 19-Nov-95
+ * 
+ * Restrict vt switching via ioctl()
+ *      -- grif@cs.ucr.edu, 5-Dec-95
+ *
+ * Move console and virtual terminal code to more appropriate files,
+ * implement CONFIG_VT and generalize console device interface.
+ *	-- Marko Kohtala <Marko.Kohtala@hut.fi>, March 97
+ *
+ * Rewrote init_dev and release_dev to eliminate races.
+ *	-- Bill Hawes <whawes@star.net>, June 97
+ *
+ * Added devfs support.
+ *      -- C. Scott Ananian <cananian@alumni.princeton.edu>, 13-Jan-1998
+ *
+ * Added support for a Unix98-style ptmx device.
+ *      -- C. Scott Ananian <cananian@alumni.princeton.edu>, 14-Jan-1998
+ *
+ * Reduced memory usage for older ARM systems
+ *      -- Russell King <rmk@arm.linux.org.uk>
+ *
+ * Move do_SAK() into process context.  Less stack use in devfs functions.
+ * alloc_tty_struct() always uses kmalloc() -- Andrew Morton <andrewm@uow.edu.eu> 17Mar01
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/major.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/fcntl.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/tty_flip.h>
+#include <linux/devpts_fs.h>
+#include <linux/file.h>
+#include <linux/console.h>
+#include <linux/timer.h>
+#include <linux/ctype.h>
+#include <linux/kd.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/smp_lock.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+
+#include <linux/kbd_kern.h>
+#include <linux/vt_kern.h>
+#include <linux/selection.h>
+#include <linux/devfs_fs_kernel.h>
+
+#include <linux/kmod.h>
+
+#ifdef CONFIG_XEN_CONSOLE
+extern void xen_console_init(void);
+#endif
+
+#ifdef CONFIG_VT
+extern void con_init_devfs (void);
+#endif
+
+extern void disable_early_printk(void);
+
+#define CONSOLE_DEV MKDEV(TTY_MAJOR,0)
+#define TTY_DEV MKDEV(TTYAUX_MAJOR,0)
+#define SYSCONS_DEV MKDEV(TTYAUX_MAJOR,1)
+#define PTMX_DEV MKDEV(TTYAUX_MAJOR,2)
+
+#undef TTY_DEBUG_HANGUP
+
+#define TTY_PARANOIA_CHECK 1
+#define CHECK_TTY_COUNT 1
+
+struct termios tty_std_termios;		/* for the benefit of tty drivers  */
+struct tty_driver *tty_drivers;		/* linked list of tty drivers */
+
+#ifdef CONFIG_UNIX98_PTYS
+extern struct tty_driver ptm_driver[];	/* Unix98 pty masters; for /dev/ptmx */
+extern struct tty_driver pts_driver[];	/* Unix98 pty slaves;  for /dev/ptmx */
+#endif
+
+static void initialize_tty_struct(struct tty_struct *tty);
+
+static ssize_t tty_read(struct file *, char *, size_t, loff_t *);
+static ssize_t tty_write(struct file *, const char *, size_t, loff_t *);
+static unsigned int tty_poll(struct file *, poll_table *);
+static int tty_open(struct inode *, struct file *);
+static int tty_release(struct inode *, struct file *);
+int tty_ioctl(struct inode * inode, struct file * file,
+	      unsigned int cmd, unsigned long arg);
+static int tty_fasync(int fd, struct file * filp, int on);
+extern int vme_scc_init (void);
+extern long vme_scc_console_init(void);
+extern int serial167_init(void);
+extern long serial167_console_init(void);
+extern void console_8xx_init(void);
+extern void au1x00_serial_console_init(void);
+extern int rs_8xx_init(void);
+extern void mac_scc_console_init(void);
+extern void hwc_console_init(void);
+extern void hwc_tty_init(void);
+extern void con3215_init(void);
+extern void tty3215_init(void);
+extern void tub3270_con_init(void);
+extern void tub3270_init(void);
+extern void rs285_console_init(void);
+extern void sa1100_rs_console_init(void);
+extern void sgi_serial_console_init(void);
+extern void sn_sal_serial_console_init(void);
+extern void sci_console_init(void);
+extern void dec_serial_console_init(void);
+extern void tx3912_console_init(void);
+extern void tx3912_rs_init(void);
+extern void txx927_console_init(void);
+extern void txx9_rs_init(void);
+extern void txx9_serial_console_init(void);
+extern void sb1250_serial_console_init(void);
+extern void arc_console_init(void);
+extern int hvc_console_init(void);
+
+#ifndef MIN
+#define MIN(a,b)	((a) < (b) ? (a) : (b))
+#endif
+#ifndef MAX
+#define MAX(a,b)	((a) < (b) ? (b) : (a))
+#endif
+
+static struct tty_struct *alloc_tty_struct(void)
+{
+	struct tty_struct *tty;
+
+	tty = kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
+	if (tty)
+		memset(tty, 0, sizeof(struct tty_struct));
+	return tty;
+}
+
+static inline void free_tty_struct(struct tty_struct *tty)
+{
+	kfree(tty);
+}
+
+/*
+ * This routine returns the name of tty.
+ */
+static char *
+_tty_make_name(struct tty_struct *tty, const char *name, char *buf)
+{
+	int idx = (tty)?MINOR(tty->device) - tty->driver.minor_start:0;
+
+	if (!tty) /* Hmm.  NULL pointer.  That's fun. */
+		strcpy(buf, "NULL tty");
+	else
+		sprintf(buf, name,
+			idx + tty->driver.name_base);
+		
+	return buf;
+}
+
+#define TTY_NUMBER(tty) (MINOR((tty)->device) - (tty)->driver.minor_start + \
+			 (tty)->driver.name_base)
+
+char *tty_name(struct tty_struct *tty, char *buf)
+{
+	return _tty_make_name(tty, (tty)?tty->driver.name:NULL, buf);
+}
+
+inline int tty_paranoia_check(struct tty_struct *tty, kdev_t device,
+			      const char *routine)
+{
+#ifdef TTY_PARANOIA_CHECK
+	static const char badmagic[] = KERN_WARNING
+		"Warning: bad magic number for tty struct (%s) in %s\n";
+	static const char badtty[] = KERN_WARNING
+		"Warning: null TTY for (%s) in %s\n";
+
+	if (!tty) {
+		printk(badtty, kdevname(device), routine);
+		return 1;
+	}
+	if (tty->magic != TTY_MAGIC) {
+		printk(badmagic, kdevname(device), routine);
+		return 1;
+	}
+#endif
+	return 0;
+}
+
+static int check_tty_count(struct tty_struct *tty, const char *routine)
+{
+#ifdef CHECK_TTY_COUNT
+	struct list_head *p;
+	int count = 0;
+	
+	file_list_lock();
+	for(p = tty->tty_files.next; p != &tty->tty_files; p = p->next) {
+		if(list_entry(p, struct file, f_list)->private_data == tty)
+			count++;
+	}
+	file_list_unlock();
+	if (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
+	    tty->driver.subtype == PTY_TYPE_SLAVE &&
+	    tty->link && tty->link->count)
+		count++;
+	if (tty->count != count) {
+		printk(KERN_WARNING "Warning: dev (%s) tty->count(%d) "
+				    "!= #fd's(%d) in %s\n",
+		       kdevname(tty->device), tty->count, count, routine);
+		return count;
+       }	
+#endif
+	return 0;
+}
+
+/*
+ *     This is probably overkill for real world processors but
+ *     they are not on hot paths so a little discipline won't do
+ *     any harm.
+ */    
+
+static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
+{
+	down(&tty->termios_sem);
+	tty->termios->c_line = num;
+	up(&tty->termios_sem);
+}
+
+/*
+ *     This guards the refcounted line discipline lists. The lock
+ *     must be taken with irqs off because there are hangup path
+ *     callers who will do ldisc lookups and cannot sleep.
+ */
+
+spinlock_t tty_ldisc_lock = SPIN_LOCK_UNLOCKED;
+DECLARE_WAIT_QUEUE_HEAD(tty_ldisc_wait);
+struct tty_ldisc tty_ldiscs[NR_LDISCS]; /* line disc dispatch table     */
+
+int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc)
+{
+
+	unsigned long flags;
+	int ret = 0;
+
+	if (disc < N_TTY || disc >= NR_LDISCS)
+		return -EINVAL;
+
+	spin_lock_irqsave(&tty_ldisc_lock, flags);
+	if (new_ldisc) {
+		tty_ldiscs[disc] = *new_ldisc;
+		tty_ldiscs[disc].num = disc;
+		tty_ldiscs[disc].flags |= LDISC_FLAG_DEFINED;
+		tty_ldiscs[disc].refcount = 0;
+	} else {
+		if(tty_ldiscs[disc].refcount)
+			ret = -EBUSY;
+		else
+			tty_ldiscs[disc].flags &= ~LDISC_FLAG_DEFINED;
+        }
+	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+	
+	return ret;
+
+}
+
+
+EXPORT_SYMBOL(tty_register_ldisc);
+
+struct tty_ldisc *tty_ldisc_get(int disc)
+{
+	unsigned long flags;
+	struct tty_ldisc *ld;
+
+	if (disc < N_TTY || disc >= NR_LDISCS)
+		return NULL;
+
+	spin_lock_irqsave(&tty_ldisc_lock, flags);
+
+	ld = &tty_ldiscs[disc];
+	/* Check the entry is defined */
+	if(ld->flags & LDISC_FLAG_DEFINED)
+		ld->refcount++;
+	else
+		ld = NULL;
+	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+	return ld;
+}
+
+EXPORT_SYMBOL_GPL(tty_ldisc_get);
+
+void tty_ldisc_put(int disc)
+{
+	struct tty_ldisc *ld;
+	unsigned long flags;
+
+	if (disc < N_TTY || disc >= NR_LDISCS)
+		BUG();
+
+	spin_lock_irqsave(&tty_ldisc_lock, flags);
+	ld = &tty_ldiscs[disc];
+	if(ld->refcount <= 0)
+		BUG();
+	ld->refcount--;
+	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+}
+      
+EXPORT_SYMBOL_GPL(tty_ldisc_put);
+
+void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld)
+{
+	tty->ldisc = *ld;
+	tty->ldisc.refcount = 0;
+}
+
+/**
+ *     tty_ldisc_try           -       internal helper
+ *     @tty: the tty
+ *
+ *     Make a single attempt to grab and bump the refcount on
+ *     the tty ldisc. Return 0 on failure or 1 on success. This is
+ *     used to implement both the waiting and non waiting versions
+ *     of tty_ldisc_ref
+ */
+
+static int tty_ldisc_try(struct tty_struct *tty)
+{
+	unsigned long flags;
+	struct tty_ldisc *ld;
+	int ret = 0;
+
+	spin_lock_irqsave(&tty_ldisc_lock, flags);
+	ld = &tty->ldisc;
+	if(test_bit(TTY_LDISC, &tty->flags))
+	{
+		ld->refcount++;
+		ret = 1;
+	}
+	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+	return ret;
+}
+
+/**
+ *     tty_ldisc_ref_wait      -       wait for the tty ldisc
+ *     @tty: tty device
+ *
+ *     Dereference the line discipline for the terminal and take a
+ *     reference to it. If the line discipline is in flux then
+ *     wait patiently until it changes.
+ *
+ *     Note: Must not be called from an IRQ/timer context. The caller
+ *     must also be careful not to hold other locks that will deadlock
+ *     against a discipline change, such as an existing ldisc reference
+ *     (which we check for)
+ */
+
+struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty)
+{
+	/* wait_event is a macro */
+	wait_event(tty_ldisc_wait, tty_ldisc_try(tty));
+	return &tty->ldisc;
+}
+
+EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait);
+
+/**
+ *     tty_ldisc_ref           -       get the tty ldisc
+ *     @tty: tty device
+ *
+ *     Dereference the line discipline for the terminal and take a
+ *     reference to it. If the line discipline is in flux then
+ *     return NULL. Can be called from IRQ and timer functions.
+ */
+
+struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty)
+{
+	if(tty_ldisc_try(tty))
+		return &tty->ldisc;
+	return NULL;
+}
+
+EXPORT_SYMBOL_GPL(tty_ldisc_ref);
+
+
+void tty_ldisc_deref(struct tty_ldisc *ld)
+{
+
+	unsigned long flags;
+
+	if(ld == NULL)
+		BUG();
+
+	spin_lock_irqsave(&tty_ldisc_lock, flags);
+	if(ld->refcount == 0)
+		printk(KERN_EMERG "tty_ldisc_deref: no references.\n");
+	else
+		ld->refcount--;
+	if(ld->refcount == 0)
+		wake_up(&tty_ldisc_wait);
+	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+}
+
+EXPORT_SYMBOL_GPL(tty_ldisc_deref);
+
+/**
+  *     tty_ldisc_enable        -       allow ldisc use
+  *     @tty: terminal to activate ldisc on
+  *
+  *     Set the TTY_LDISC flag when the line discipline can be called
+  *     again. Do neccessary wakeups for existing sleepers.
+  *
+  *     Note: nobody should set this bit except via this function. Clearing
+  *     directly is allowed.
+  */
+
+static void tty_ldisc_enable(struct tty_struct *tty)
+{
+	set_bit(TTY_LDISC, &tty->flags);
+	wake_up(&tty_ldisc_wait);
+}
+
+/**
+ *     tty_set_ldisc           -       set line discipline
+ *     @tty: the terminal to set
+ *     @ldisc: the line discipline
+ *
+ *     Set the discipline of a tty line. Must be called from a process
+ *     context.
+ */
+
+static int tty_set_ldisc(struct tty_struct *tty, int ldisc)
+{
+	int	retval = 0;
+	struct	tty_ldisc o_ldisc;
+	char buf[64];
+	unsigned long flags;
+	struct tty_ldisc *ld;
+
+	if ((ldisc < N_TTY) || (ldisc >= NR_LDISCS))
+		return -EINVAL;
+
+restart:
+
+	if (tty->ldisc.num == ldisc)
+		return 0; /* We are already in the desired discipline */
+
+	ld = tty_ldisc_get(ldisc);
+	/* Eduardo Blanco <ejbs@cs.cs.com.uy> */
+	/* Cyrus Durgin <cider@speakeasy.org> */
+	if (ld == NULL)
+	{
+		char modname [20];
+                sprintf(modname, "tty-ldisc-%d", ldisc);
+                request_module (modname);
+		ld = tty_ldisc_get(ldisc);
+	}
+	
+	if (ld == NULL)
+		return -EINVAL;
+
+	
+	o_ldisc = tty->ldisc;
+	tty_wait_until_sent(tty, 0);
+
+	/*
+	 *      Make sure we don't change while someone holds a
+	 *      reference to the line discipline. The TTY_LDISC bit
+	 *      prevents anyone taking a reference once it is clear.
+	 *      We need the lock to avoid racing reference takers.
+	 */
+
+	spin_lock_irqsave(&tty_ldisc_lock, flags);
+	if(tty->ldisc.refcount)
+	{
+		/* Free the new ldisc we grabbed. Must drop the lock
+		   first. */
+		spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+		tty_ldisc_put(ldisc);
+		/*
+		 * There are several reasons we may be busy, including
+		 * random momentary I/O traffic. We must therefore
+		 * retry. We could distinguish between blocking ops
+		 * and retries if we made tty_ldisc_wait() smarter. That
+		 * is up for discussion.
+		 */
+		if(wait_event_interruptible(tty_ldisc_wait, tty->ldisc.refcount == 0) < 0)
+			return -ERESTARTSYS;
+		goto restart;
+	}
+	clear_bit(TTY_LDISC, &tty->flags);
+	clear_bit(TTY_DONT_FLIP, &tty->flags);
+	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+
+	/*
+	 *      From this point on we know nobody has an ldisc
+	 *      usage reference, nor can they obtain one until
+	 *      we say so later on.
+	 */
+
+	/*
+	 * Wait for ->hangup_work and ->flip.work handlers to terminate
+	 */
+	run_task_queue(&tq_timer);
+	flush_scheduled_tasks();
+	
+	/* Shutdown the current discipline. */
+	if (tty->ldisc.close)
+		(tty->ldisc.close)(tty);
+
+	/* Now set up the new line discipline. */
+	tty_ldisc_assign(tty, ld);
+	tty_set_termios_ldisc(tty, ldisc);
+	if (tty->ldisc.open)
+		retval = (tty->ldisc.open)(tty);
+	if (retval < 0) {
+		tty_ldisc_put(ldisc);
+		/* There is an outstanding reference here so this is safe */
+		tty_ldisc_assign(tty, tty_ldisc_get(o_ldisc.num));
+		tty_set_termios_ldisc(tty, tty->ldisc.num);
+		if (tty->ldisc.open && (tty->ldisc.open(tty) < 0)) {
+			tty_ldisc_put(o_ldisc.num);
+			/* This driver is always present */
+			tty_ldisc_assign(tty, tty_ldisc_get(N_TTY));
+			tty_set_termios_ldisc(tty, N_TTY);
+			if (tty->ldisc.open) {
+				int r = tty->ldisc.open(tty);
+
+				if (r < 0)
+					panic("Couldn't open N_TTY ldisc for "
+					      "%s --- error %d.",
+					      tty_name(tty, buf), r);
+			}
+		}
+	}
+	/* At this point we hold a reference to the new ldisc and a
+	    reference to the old ldisc. If we ended up flipping back
+	    to the existing ldisc we have two references to it */
+
+	if (tty->ldisc.num != o_ldisc.num && tty->driver.set_ldisc)
+		tty->driver.set_ldisc(tty);
+
+	tty_ldisc_put(o_ldisc.num);
+
+	/*
+	 *      Allow ldisc referencing to occur as soon as the driver
+	 *      ldisc callback completes.
+	 */
+	tty_ldisc_enable(tty);
+
+	return retval;
+}
+
+/*
+ * This routine returns a tty driver structure, given a device number
+ */
+struct tty_driver *get_tty_driver(kdev_t device)
+{
+	int	major, minor;
+	struct tty_driver *p;
+	
+	minor = MINOR(device);
+	major = MAJOR(device);
+
+	for (p = tty_drivers; p; p = p->next) {
+		if (p->major != major)
+			continue;
+		if (minor < p->minor_start)
+			continue;
+		if (minor >= p->minor_start + p->num)
+			continue;
+		return p;
+	}
+	return NULL;
+}
+
+/*
+ * If we try to write to, or set the state of, a terminal and we're
+ * not in the foreground, send a SIGTTOU.  If the signal is blocked or
+ * ignored, go ahead and perform the operation.  (POSIX 7.2)
+ */
+int tty_check_change(struct tty_struct * tty)
+{
+	if (current->tty != tty)
+		return 0;
+	if (tty->pgrp <= 0) {
+		printk(KERN_WARNING "tty_check_change: tty->pgrp <= 0!\n");
+		return 0;
+	}
+	if (current->pgrp == tty->pgrp)
+		return 0;
+	if (is_ignored(SIGTTOU))
+		return 0;
+	if (is_orphaned_pgrp(current->pgrp))
+		return -EIO;
+	(void) kill_pg(current->pgrp,SIGTTOU,1);
+	return -ERESTARTSYS;
+}
+
+static ssize_t hung_up_tty_read(struct file * file, char * buf,
+				size_t count, loff_t *ppos)
+{
+	/* Can't seek (pread) on ttys.  */
+	if (ppos != &file->f_pos)
+		return -ESPIPE;
+	return 0;
+}
+
+static ssize_t hung_up_tty_write(struct file * file, const char * buf,
+				 size_t count, loff_t *ppos)
+{
+	/* Can't seek (pwrite) on ttys.  */
+	if (ppos != &file->f_pos)
+		return -ESPIPE;
+	return -EIO;
+}
+
+/* No kernel lock held - none needed ;) */
+static unsigned int hung_up_tty_poll(struct file * filp, poll_table * wait)
+{
+	return POLLIN | POLLOUT | POLLERR | POLLHUP | POLLRDNORM | POLLWRNORM;
+}
+
+static int hung_up_tty_ioctl(struct inode * inode, struct file * file,
+			     unsigned int cmd, unsigned long arg)
+{
+	return cmd == TIOCSPGRP ? -ENOTTY : -EIO;
+}
+
+static struct file_operations tty_fops = {
+	llseek:		no_llseek,
+	read:		tty_read,
+	write:		tty_write,
+	poll:		tty_poll,
+	ioctl:		tty_ioctl,
+	open:		tty_open,
+	release:	tty_release,
+	fasync:		tty_fasync,
+};
+
+static struct file_operations hung_up_tty_fops = {
+	llseek:		no_llseek,
+	read:		hung_up_tty_read,
+	write:		hung_up_tty_write,
+	poll:		hung_up_tty_poll,
+	ioctl:		hung_up_tty_ioctl,
+	release:	tty_release,
+};
+
+static spinlock_t redirect_lock = SPIN_LOCK_UNLOCKED;
+static struct file *redirect;
+
+/**
+  *     tty_wakeup      -       request more data
+  *     @tty: terminal
+  *
+  *     Internal and external helper for wakeups of tty. This function
+  *     informs the line discipline if present that the driver is ready\
+  *     to receive more output data.
+  */
+
+void tty_wakeup(struct tty_struct *tty)
+{
+	struct tty_ldisc *ld;
+
+	if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) {
+		ld = tty_ldisc_ref(tty);
+		if(ld) {
+			if(ld->write_wakeup)
+				ld->write_wakeup(tty);
+			tty_ldisc_deref(ld);
+		}
+	}
+	wake_up_interruptible(&tty->write_wait);
+}
+
+/*
+ * tty_wakeup/tty_ldisc_flush are actually _GPL exports but we can't do 
+ * that in 2.4 for modutils compat reasons.
+ */
+EXPORT_SYMBOL(tty_wakeup);
+
+
+void tty_ldisc_flush(struct tty_struct *tty)
+{
+	struct tty_ldisc *ld = tty_ldisc_ref(tty);
+	if(ld) {
+		if(ld->flush_buffer)
+			ld->flush_buffer(tty);
+		tty_ldisc_deref(ld);
+	}
+}
+
+
+/*
+ * tty_wakeup/tty_ldisc_flush are actually _GPL exports but we can't do 
+ * that in 2.4 for modutils compat reasons.
+ */
+EXPORT_SYMBOL(tty_ldisc_flush);
+
+void do_tty_hangup(void *data)
+{
+	struct tty_struct *tty = (struct tty_struct *) data;
+	struct file * cons_filp = NULL;
+	struct file *f = NULL;
+	struct task_struct *p;
+	struct list_head *l;
+	struct tty_ldisc *ld;
+	int    closecount = 0, n;
+
+	if (!tty)
+		return;
+
+	/* inuse_filps is protected by the single kernel lock */
+	lock_kernel();
+
+	spin_lock(&redirect_lock);
+	if (redirect && redirect->private_data == tty) {
+		f = redirect;
+		redirect = NULL;
+	}
+	spin_unlock(&redirect_lock);
+	
+	check_tty_count(tty, "do_tty_hangup");
+	file_list_lock();
+	for (l = tty->tty_files.next; l != &tty->tty_files; l = l->next) {
+		struct file * filp = list_entry(l, struct file, f_list);
+		if (filp->f_dentry->d_inode->i_rdev == CONSOLE_DEV ||
+		    filp->f_dentry->d_inode->i_rdev == SYSCONS_DEV) {
+			cons_filp = filp;
+			continue;
+		}
+		if (filp->f_op != &tty_fops)
+			continue;
+		closecount++;
+		tty_fasync(-1, filp, 0);	/* can't block */
+		filp->f_op = &hung_up_tty_fops;
+	}
+	file_list_unlock();
+	
+	/* FIXME! What are the locking issues here? This may me overdoing things.. */
+	ld = tty_ldisc_ref(tty);
+	if(ld != NULL)	
+	{
+		if (ld->flush_buffer)
+			ld->flush_buffer(tty);
+		if (tty->driver.flush_buffer)
+			tty->driver.flush_buffer(tty);
+		if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) && ld->write_wakeup)
+			ld->write_wakeup(tty);
+		if (ld->hangup)
+			ld->hangup(tty);
+	}
+
+	/* FIXME: Once we trust the LDISC code better we can wait here for
+	   ldisc completion and fix the driver call race */
+
+	wake_up_interruptible(&tty->write_wait);
+	wake_up_interruptible(&tty->read_wait);
+
+	/*
+	 * Shutdown the current line discipline, and reset it to
+	 * N_TTY.
+	 */
+
+	if (tty->driver.flags & TTY_DRIVER_RESET_TERMIOS)
+	{
+		down(&tty->termios_sem);
+                *tty->termios = tty->driver.init_termios;
+		up(&tty->termios_sem);
+	}
+
+	/* Defer ldisc switch */
+	/* tty_deferred_ldisc_switch(N_TTY)
+	   This should get done automatically when the port closes and
+	   tty_release is called */
+
+	read_lock(&tasklist_lock);
+ 	for_each_task(p) {
+		if ((tty->session > 0) && (p->session == tty->session) &&
+		    p->leader) {
+			send_sig(SIGHUP,p,1);
+			send_sig(SIGCONT,p,1);
+			if (tty->pgrp > 0)
+				p->tty_old_pgrp = tty->pgrp;
+		}
+		if (p->tty == tty)
+			p->tty = NULL;
+	}
+	read_unlock(&tasklist_lock);
+
+	tty->flags = 0;
+	tty->session = 0;
+	tty->pgrp = -1;
+	tty->ctrl_status = 0;
+	/*
+	 *	If one of the devices matches a console pointer, we
+	 *	cannot just call hangup() because that will cause
+	 *	tty->count and state->count to go out of sync.
+	 *	So we just call close() the right number of times.
+	 */
+	if (cons_filp) {
+		if (tty->driver.close)
+			for (n = 0; n < closecount; n++)
+				tty->driver.close(tty, cons_filp);
+	} else if (tty->driver.hangup)
+		(tty->driver.hangup)(tty);
+
+	/* We don't want to have driver/ldisc interactions beyond
+	   the ones we did here. The driver layer expects no
+	   calls after ->hangup() from the ldisc side. However we
+	   can't yet guarantee all that */
+
+	set_bit(TTY_HUPPED, &tty->flags);
+	if(ld) {
+		tty_ldisc_enable(tty);
+		tty_ldisc_deref(ld);
+	}
+	unlock_kernel();
+	if (f)
+		fput(f);
+}
+
+void tty_hangup(struct tty_struct * tty)
+{
+#ifdef TTY_DEBUG_HANGUP
+	char	buf[64];
+	
+	printk(KERN_DEBUG "%s hangup...\n", tty_name(tty, buf));
+#endif
+	schedule_task(&tty->tq_hangup);
+}
+
+void tty_vhangup(struct tty_struct * tty)
+{
+#ifdef TTY_DEBUG_HANGUP
+	char	buf[64];
+
+	printk(KERN_DEBUG "%s vhangup...\n", tty_name(tty, buf));
+#endif
+	do_tty_hangup((void *) tty);
+}
+
+int tty_hung_up_p(struct file * filp)
+{
+	return (filp->f_op == &hung_up_tty_fops);
+}
+
+/*
+ * This function is typically called only by the session leader, when
+ * it wants to disassociate itself from its controlling tty.
+ *
+ * It performs the following functions:
+ * 	(1)  Sends a SIGHUP and SIGCONT to the foreground process group
+ * 	(2)  Clears the tty from being controlling the session
+ * 	(3)  Clears the controlling tty for all processes in the
+ * 		session group.
+ *
+ * The argument on_exit is set to 1 if called when a process is
+ * exiting; it is 0 if called by the ioctl TIOCNOTTY.
+ */
+void disassociate_ctty(int on_exit)
+{
+	struct tty_struct *tty = current->tty;
+	struct task_struct *p;
+	int tty_pgrp = -1;
+
+	if (tty) {
+		tty_pgrp = tty->pgrp;
+		if (on_exit && tty->driver.type != TTY_DRIVER_TYPE_PTY)
+			tty_vhangup(tty);
+	} else {
+		if (current->tty_old_pgrp) {
+			kill_pg(current->tty_old_pgrp, SIGHUP, on_exit);
+			kill_pg(current->tty_old_pgrp, SIGCONT, on_exit);
+		}
+		return;
+	}
+	if (tty_pgrp > 0) {
+		kill_pg(tty_pgrp, SIGHUP, on_exit);
+		if (!on_exit)
+			kill_pg(tty_pgrp, SIGCONT, on_exit);
+	}
+
+	current->tty_old_pgrp = 0;
+	tty->session = 0;
+	tty->pgrp = -1;
+
+	read_lock(&tasklist_lock);
+	for_each_task(p)
+	  	if (p->session == current->session)
+			p->tty = NULL;
+	read_unlock(&tasklist_lock);
+}
+
+void stop_tty(struct tty_struct *tty)
+{
+	if (tty->stopped)
+		return;
+	tty->stopped = 1;
+	if (tty->link && tty->link->packet) {
+		tty->ctrl_status &= ~TIOCPKT_START;
+		tty->ctrl_status |= TIOCPKT_STOP;
+		wake_up_interruptible(&tty->link->read_wait);
+	}
+	if (tty->driver.stop)
+		(tty->driver.stop)(tty);
+}
+
+void start_tty(struct tty_struct *tty)
+{
+	if (!tty->stopped || tty->flow_stopped)
+		return;
+	tty->stopped = 0;
+	if (tty->link && tty->link->packet) {
+		tty->ctrl_status &= ~TIOCPKT_STOP;
+		tty->ctrl_status |= TIOCPKT_START;
+		wake_up_interruptible(&tty->link->read_wait);
+	}
+	if (tty->driver.start)
+		(tty->driver.start)(tty);
+	/* If we have a running line discipline it may need kicking */
+	tty_wakeup(tty);
+}
+
+static ssize_t tty_read(struct file * file, char * buf, size_t count, 
+			loff_t *ppos)
+{
+	int i;
+	struct tty_struct * tty;
+	struct inode *inode;
+	struct tty_ldisc *ld;
+
+	/* Can't seek (pread) on ttys.  */
+	if (ppos != &file->f_pos)
+		return -ESPIPE;
+
+	tty = (struct tty_struct *)file->private_data;
+	inode = file->f_dentry->d_inode;
+	if (tty_paranoia_check(tty, inode->i_rdev, "tty_read"))
+		return -EIO;
+	if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags)))
+		return -EIO;
+
+	/* This check not only needs to be done before reading, but also
+	   whenever read_chan() gets woken up after sleeping, so I've
+	   moved it to there.  This should only be done for the N_TTY
+	   line discipline, anyway.  Same goes for write_chan(). -- jlc. */
+#if 0
+	if ((inode->i_rdev != CONSOLE_DEV) && /* don't stop on /dev/console */
+	    (tty->pgrp > 0) &&
+	    (current->tty == tty) &&
+	    (tty->pgrp != current->pgrp))
+		if (is_ignored(SIGTTIN) || is_orphaned_pgrp(current->pgrp))
+			return -EIO;
+		else {
+			(void) kill_pg(current->pgrp, SIGTTIN, 1);
+			return -ERESTARTSYS;
+		}
+#endif
+	/* We want to wait for the line discipline to sort out in this
+	   situation */
+	ld = tty_ldisc_ref_wait(tty);
+	lock_kernel();
+	if (ld->read)
+		i = (ld->read)(tty,file,buf,count);
+	else
+		i = -EIO;
+	tty_ldisc_deref(ld);
+	unlock_kernel();
+	if (i > 0)
+		inode->i_atime = CURRENT_TIME;
+	return i;
+}
+
+/*
+ * Split writes up in sane blocksizes to avoid
+ * denial-of-service type attacks
+ */
+static inline ssize_t do_tty_write(
+	ssize_t (*write)(struct tty_struct *, struct file *, const unsigned char *, size_t),
+	struct tty_struct *tty,
+	struct file *file,
+	const unsigned char *buf,
+	size_t count)
+{
+	ssize_t ret = 0, written = 0;
+	
+	if (file->f_flags & O_NONBLOCK) {
+		if (down_trylock(&tty->atomic_write))
+			return -EAGAIN;
+	}
+	else {
+		if (down_interruptible(&tty->atomic_write))
+			return -ERESTARTSYS;
+	}
+	if ( test_bit(TTY_NO_WRITE_SPLIT, &tty->flags) ) {
+		lock_kernel();
+		written = write(tty, file, buf, count);
+		unlock_kernel();
+	} else {
+		for (;;) {
+			unsigned long size = MAX(PAGE_SIZE*2,16384);
+			if (size > count)
+				size = count;
+			lock_kernel();
+			ret = write(tty, file, buf, size);
+			unlock_kernel();
+			if (ret <= 0)
+				break;
+			written += ret;
+			buf += ret;
+			count -= ret;
+			if (!count)
+				break;
+			ret = -ERESTARTSYS;
+			if (signal_pending(current))
+				break;
+			if (current->need_resched)
+				schedule();
+		}
+	}
+	if (written) {
+		file->f_dentry->d_inode->i_mtime = CURRENT_TIME;
+		ret = written;
+	}
+	up(&tty->atomic_write);
+	return ret;
+}
+
+
+static ssize_t tty_write(struct file * file, const char * buf, size_t count,
+			 loff_t *ppos)
+{
+	int is_console;
+	struct tty_struct * tty;
+	struct inode *inode = file->f_dentry->d_inode;
+	ssize_t ret;
+	struct tty_ldisc *ld;
+
+	/* Can't seek (pwrite) on ttys.  */
+	if (ppos != &file->f_pos)
+		return -ESPIPE;
+
+	/*
+	 *      For now, we redirect writes from /dev/console as
+	 *      well as /dev/tty0.
+	 */
+	inode = file->f_dentry->d_inode;
+	is_console = (inode->i_rdev == SYSCONS_DEV ||
+		      inode->i_rdev == CONSOLE_DEV);
+
+	if (is_console) {
+		struct file *p = NULL;
+
+		spin_lock(&redirect_lock);
+		if (redirect) {
+			get_file(redirect);
+			p = redirect;
+		}
+		spin_unlock(&redirect_lock);
+
+		if (p) {
+			ssize_t res = p->f_op->write(p, buf, count, &p->f_pos);
+			fput(p);
+			return res;
+		}
+	}
+
+	tty = (struct tty_struct *)file->private_data;
+	if (tty_paranoia_check(tty, inode->i_rdev, "tty_write"))
+		return -EIO;
+	if (!tty || !tty->driver.write || (test_bit(TTY_IO_ERROR, &tty->flags)))
+		return -EIO;
+#if 0
+	if (!is_console && L_TOSTOP(tty) && (tty->pgrp > 0) &&
+	    (current->tty == tty) && (tty->pgrp != current->pgrp)) {
+		if (is_orphaned_pgrp(current->pgrp))
+			return -EIO;
+		if (!is_ignored(SIGTTOU)) {
+			(void) kill_pg(current->pgrp, SIGTTOU, 1);
+			return -ERESTARTSYS;
+		}
+	}
+#endif
+
+	ld = tty_ldisc_ref_wait(tty);
+	if (!ld->write)
+		ret = -EIO;
+	else
+		ret = do_tty_write(ld->write, tty, file,
+				(const unsigned char __user *)buf, count);
+	tty_ldisc_deref(ld);
+	return ret;
+}
+
+/* Semaphore to protect creating and releasing a tty. This is shared with
+   vt.c for deeply disgusting hack reasons */
+static DECLARE_MUTEX(tty_sem);
+
+static void down_tty_sem(int index)
+{
+	down(&tty_sem);
+}
+
+static void up_tty_sem(int index)
+{
+	up(&tty_sem);
+}
+
+static void release_mem(struct tty_struct *tty, int idx);
+
+/*
+ * WSH 06/09/97: Rewritten to remove races and properly clean up after a
+ * failed open.  The new code protects the open with a semaphore, so it's
+ * really quite straightforward.  The semaphore locking can probably be
+ * relaxed for the (most common) case of reopening a tty.
+ */
+static int init_dev(kdev_t device, struct tty_struct **ret_tty)
+{
+	struct tty_struct *tty, *o_tty;
+	struct termios *tp, **tp_loc, *o_tp, **o_tp_loc;
+	struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc;
+	struct tty_driver *driver;	
+	int retval=0;
+	int idx;
+
+	driver = get_tty_driver(device);
+	if (!driver)
+		return -ENODEV;
+
+	idx = MINOR(device) - driver->minor_start;
+
+	/* 
+	 * Check whether we need to acquire the tty semaphore to avoid
+	 * race conditions.  For now, play it safe.
+	 */
+	down_tty_sem(idx);
+
+	/* check whether we're reopening an existing tty */
+	tty = driver->table[idx];
+	if (tty) goto fast_track;
+
+	/*
+	 * First time open is complex, especially for PTY devices.
+	 * This code guarantees that either everything succeeds and the
+	 * TTY is ready for operation, or else the table slots are vacated
+	 * and the allocated memory released.  (Except that the termios 
+	 * and locked termios may be retained.)
+	 */
+
+	o_tty = NULL;
+	tp = o_tp = NULL;
+	ltp = o_ltp = NULL;
+
+	tty = alloc_tty_struct();
+	if(!tty)
+		goto fail_no_mem;
+	initialize_tty_struct(tty);
+	tty->device = device;
+	tty->driver = *driver;
+
+	tp_loc = &driver->termios[idx];
+	if (!*tp_loc) {
+		tp = (struct termios *) kmalloc(sizeof(struct termios),
+						GFP_KERNEL);
+		if (!tp)
+			goto free_mem_out;
+		*tp = driver->init_termios;
+	}
+
+	ltp_loc = &driver->termios_locked[idx];
+	if (!*ltp_loc) {
+		ltp = (struct termios *) kmalloc(sizeof(struct termios),
+						 GFP_KERNEL);
+		if (!ltp)
+			goto free_mem_out;
+		memset(ltp, 0, sizeof(struct termios));
+	}
+
+	if (driver->type == TTY_DRIVER_TYPE_PTY) {
+		o_tty = alloc_tty_struct();
+		if (!o_tty)
+			goto free_mem_out;
+		initialize_tty_struct(o_tty);
+		o_tty->device = (kdev_t) MKDEV(driver->other->major,
+					driver->other->minor_start + idx);
+		o_tty->driver = *driver->other;
+
+		o_tp_loc  = &driver->other->termios[idx];
+		if (!*o_tp_loc) {
+			o_tp = (struct termios *)
+				kmalloc(sizeof(struct termios), GFP_KERNEL);
+			if (!o_tp)
+				goto free_mem_out;
+			*o_tp = driver->other->init_termios;
+		}
+
+		o_ltp_loc = &driver->other->termios_locked[idx];
+		if (!*o_ltp_loc) {
+			o_ltp = (struct termios *)
+				kmalloc(sizeof(struct termios), GFP_KERNEL);
+			if (!o_ltp)
+				goto free_mem_out;
+			memset(o_ltp, 0, sizeof(struct termios));
+		}
+
+		/*
+		 * Everything allocated ... set up the o_tty structure.
+		 */
+		driver->other->table[idx] = o_tty;
+		if (!*o_tp_loc)
+			*o_tp_loc = o_tp;
+		if (!*o_ltp_loc)
+			*o_ltp_loc = o_ltp;
+		o_tty->termios = *o_tp_loc;
+		o_tty->termios_locked = *o_ltp_loc;
+		(*driver->other->refcount)++;
+		if (driver->subtype == PTY_TYPE_MASTER)
+			o_tty->count++;
+
+		/* Establish the links in both directions */
+		tty->link   = o_tty;
+		o_tty->link = tty;
+	}
+
+	/* 
+	 * All structures have been allocated, so now we install them.
+	 * Failures after this point use release_mem to clean up, so 
+	 * there's no need to null out the local pointers.
+	 */
+	driver->table[idx] = tty;
+	
+	if (!*tp_loc)
+		*tp_loc = tp;
+	if (!*ltp_loc)
+		*ltp_loc = ltp;
+	tty->termios = *tp_loc;
+	tty->termios_locked = *ltp_loc;
+	(*driver->refcount)++;
+	tty->count++;
+
+	/* 
+	 * Structures all installed ... call the ldisc open routines.
+	 * If we fail here just call release_mem to clean up.  No need
+	 * to decrement the use counts, as release_mem doesn't care.
+	 */
+	if (tty->ldisc.open) {
+		retval = (tty->ldisc.open)(tty);
+		if (retval)
+			goto release_mem_out;
+	}
+	if (o_tty && o_tty->ldisc.open) {
+		retval = (o_tty->ldisc.open)(o_tty);
+		if (retval) {
+			if (tty->ldisc.close)
+				(tty->ldisc.close)(tty);
+			goto release_mem_out;
+		}
+		set_bit(TTY_LDISC, &o_tty->flags);
+		tty_ldisc_enable(o_tty);
+	}
+	tty_ldisc_enable(tty);
+	goto success;
+
+	/*
+	 * This fast open can be used if the tty is already open.
+	 * No memory is allocated, and the only failures are from
+	 * attempting to open a closing tty or attempting multiple
+	 * opens on a pty master.
+	 */
+fast_track:
+	if (test_bit(TTY_CLOSING, &tty->flags)) {
+		retval = -EIO;
+		goto end_init;
+	}
+	if (driver->type == TTY_DRIVER_TYPE_PTY &&
+	    driver->subtype == PTY_TYPE_MASTER) {
+		/*
+		 * special case for PTY masters: only one open permitted, 
+		 * and the slave side open count is incremented as well.
+		 */
+		if (tty->count) {
+			retval = -EIO;
+			goto end_init;
+		}
+		tty->link->count++;
+	}
+	tty->count++;
+	tty->driver = *driver; /* N.B. why do this every time?? */
+	/* FIXME */
+	if(!test_bit(TTY_LDISC, &tty->flags))
+		printk(KERN_ERR "init_dev but no ldisc\n");
+success:
+	*ret_tty = tty;
+	
+	/* All paths come through here to release the semaphore */
+end_init:
+	up_tty_sem(idx);
+	return retval;
+
+	/* Release locally allocated memory ... nothing placed in slots */
+free_mem_out:
+	if (o_tp)
+		kfree(o_tp);
+	if (o_tty)
+		free_tty_struct(o_tty);
+	if (ltp)
+		kfree(ltp);
+	if (tp)
+		kfree(tp);
+	free_tty_struct(tty);
+
+fail_no_mem:
+	retval = -ENOMEM;
+	goto end_init;
+
+	/* call the tty release_mem routine to clean out this slot */
+release_mem_out:
+	printk(KERN_INFO "init_dev: ldisc open failed, "
+			 "clearing slot %d\n", idx);
+	release_mem(tty, idx);
+	goto end_init;
+}
+
+/*
+ * Releases memory associated with a tty structure, and clears out the
+ * driver table slots.
+ */
+static void release_mem(struct tty_struct *tty, int idx)
+{
+	struct tty_struct *o_tty;
+	struct termios *tp;
+
+	if ((o_tty = tty->link) != NULL) {
+		o_tty->driver.table[idx] = NULL;
+		if (o_tty->driver.flags & TTY_DRIVER_RESET_TERMIOS) {
+			tp = o_tty->driver.termios[idx];
+			o_tty->driver.termios[idx] = NULL;
+			kfree(tp);
+		}
+		o_tty->magic = 0;
+		(*o_tty->driver.refcount)--;
+		list_del_init(&o_tty->tty_files);
+		free_tty_struct(o_tty);
+	}
+
+	tty->driver.table[idx] = NULL;
+	if (tty->driver.flags & TTY_DRIVER_RESET_TERMIOS) {
+		tp = tty->driver.termios[idx];
+		tty->driver.termios[idx] = NULL;
+		kfree(tp);
+	}
+	tty->magic = 0;
+	(*tty->driver.refcount)--;
+	list_del_init(&tty->tty_files);
+	free_tty_struct(tty);
+}
+
+/*
+ * Even releasing the tty structures is a tricky business.. We have
+ * to be very careful that the structures are all released at the
+ * same time, as interrupts might otherwise get the wrong pointers.
+ *
+ * WSH 09/09/97: rewritten to avoid some nasty race conditions that could
+ * lead to double frees or releasing memory still in use.
+ */
+static void release_dev(struct file * filp)
+{
+	struct tty_struct *tty, *o_tty;
+	int	pty_master, tty_closing, o_tty_closing, do_sleep;
+	int	idx;
+	char	buf[64];
+	unsigned long flags;
+	
+	tty = (struct tty_struct *)filp->private_data;
+	if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, "release_dev"))
+		return;
+
+	check_tty_count(tty, "release_dev");
+
+	tty_fasync(-1, filp, 0);
+
+	idx = MINOR(tty->device) - tty->driver.minor_start;
+	pty_master = (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
+		      tty->driver.subtype == PTY_TYPE_MASTER);
+	o_tty = tty->link;
+
+#ifdef TTY_PARANOIA_CHECK
+	if (idx < 0 || idx >= tty->driver.num) {
+		printk(KERN_DEBUG "release_dev: bad idx when trying to "
+				  "free (%s)\n", kdevname(tty->device));
+		return;
+	}
+	if (tty != tty->driver.table[idx]) {
+		printk(KERN_DEBUG "release_dev: driver.table[%d] not tty "
+				  "for (%s)\n", idx, kdevname(tty->device));
+		return;
+	}
+	if (tty->termios != tty->driver.termios[idx]) {
+		printk(KERN_DEBUG "release_dev: driver.termios[%d] not termios "
+		       "for (%s)\n",
+		       idx, kdevname(tty->device));
+		return;
+	}
+	if (tty->termios_locked != tty->driver.termios_locked[idx]) {
+		printk(KERN_DEBUG "release_dev: driver.termios_locked[%d] not "
+		       "termios_locked for (%s)\n",
+		       idx, kdevname(tty->device));
+		return;
+	}
+#endif
+
+#ifdef TTY_DEBUG_HANGUP
+	printk(KERN_DEBUG "release_dev of %s (tty count=%d)...",
+	       tty_name(tty, buf), tty->count);
+#endif
+
+#ifdef TTY_PARANOIA_CHECK
+	if (tty->driver.other) {
+		if (o_tty != tty->driver.other->table[idx]) {
+			printk(KERN_DEBUG "release_dev: other->table[%d] "
+					  "not o_tty for (%s)\n",
+			       idx, kdevname(tty->device));
+			return;
+		}
+		if (o_tty->termios != tty->driver.other->termios[idx]) {
+			printk(KERN_DEBUG "release_dev: other->termios[%d] "
+					  "not o_termios for (%s)\n",
+			       idx, kdevname(tty->device));
+			return;
+		}
+		if (o_tty->termios_locked != 
+		      tty->driver.other->termios_locked[idx]) {
+			printk(KERN_DEBUG "release_dev: other->termios_locked["
+					  "%d] not o_termios_locked for (%s)\n",
+			       idx, kdevname(tty->device));
+			return;
+		}
+		if (o_tty->link != tty) {
+			printk(KERN_DEBUG "release_dev: bad pty pointers\n");
+			return;
+		}
+	}
+#endif
+
+	if (tty->driver.close)
+		tty->driver.close(tty, filp);
+
+	/*
+	 * Sanity check: if tty->count is going to zero, there shouldn't be
+	 * any waiters on tty->read_wait or tty->write_wait.  We test the
+	 * wait queues and kick everyone out _before_ actually starting to
+	 * close.  This ensures that we won't block while releasing the tty
+	 * structure.
+	 *
+	 * The test for the o_tty closing is necessary, since the master and
+	 * slave sides may close in any order.  If the slave side closes out
+	 * first, its count will be one, since the master side holds an open.
+	 * Thus this test wouldn't be triggered at the time the slave closes,
+	 * so we do it now.
+	 *
+	 * Note that it's possible for the tty to be opened again while we're
+	 * flushing out waiters.  By recalculating the closing flags before
+	 * each iteration we avoid any problems.
+	 */
+	while (1) {
+		tty_closing = tty->count <= 1;
+		o_tty_closing = o_tty &&
+			(o_tty->count <= (pty_master ? 1 : 0));
+		do_sleep = 0;
+
+		if (tty_closing) {
+			if (waitqueue_active(&tty->read_wait)) {
+				wake_up(&tty->read_wait);
+				do_sleep++;
+			}
+			if (waitqueue_active(&tty->write_wait)) {
+				wake_up(&tty->write_wait);
+				do_sleep++;
+			}
+		}
+		if (o_tty_closing) {
+			if (waitqueue_active(&o_tty->read_wait)) {
+				wake_up(&o_tty->read_wait);
+				do_sleep++;
+			}
+			if (waitqueue_active(&o_tty->write_wait)) {
+				wake_up(&o_tty->write_wait);
+				do_sleep++;
+			}
+		}
+		if (!do_sleep)
+			break;
+
+		printk(KERN_WARNING "release_dev: %s: read/write wait queue "
+				    "active!\n", tty_name(tty, buf));
+		schedule();
+	}	
+
+	/*
+	 * The closing flags are now consistent with the open counts on 
+	 * both sides, and we've completed the last operation that could 
+	 * block, so it's safe to proceed with closing.
+	 */
+	if (pty_master) {
+		if (--o_tty->count < 0) {
+			printk(KERN_WARNING "release_dev: bad pty slave count "
+					    "(%d) for %s\n",
+			       o_tty->count, tty_name(o_tty, buf));
+			o_tty->count = 0;
+		}
+	}
+	if (--tty->count < 0) {
+		printk(KERN_WARNING "release_dev: bad tty->count (%d) for %s\n",
+		       tty->count, tty_name(tty, buf));
+		tty->count = 0;
+	}
+
+	/*
+	 * We've decremented tty->count, so we should zero out
+	 * filp->private_data, to break the link between the tty and
+	 * the file descriptor.  Otherwise if filp_close() blocks before
+	 * the file descriptor is removed from the inuse_filp
+	 * list, check_tty_count() could observe a discrepancy and
+	 * printk a warning message to the user.
+	 */
+	filp->private_data = 0;
+
+	/*
+	 * Perform some housekeeping before deciding whether to return.
+	 *
+	 * Set the TTY_CLOSING flag if this was the last open.  In the
+	 * case of a pty we may have to wait around for the other side
+	 * to close, and TTY_CLOSING makes sure we can't be reopened.
+	 */
+	if(tty_closing)
+		set_bit(TTY_CLOSING, &tty->flags);
+	if(o_tty_closing)
+		set_bit(TTY_CLOSING, &o_tty->flags);
+
+	/*
+	 * If _either_ side is closing, make sure there aren't any
+	 * processes that still think tty or o_tty is their controlling
+	 * tty.
+	 */
+	if (tty_closing || o_tty_closing) {
+		struct task_struct *p;
+
+		read_lock(&tasklist_lock);
+		for_each_task(p) {
+			if (p->tty == tty || (o_tty && p->tty == o_tty))
+				p->tty = NULL;
+		}
+		read_unlock(&tasklist_lock);
+	}
+
+	/* check whether both sides are closing ... */
+	if (!tty_closing || (o_tty && !o_tty_closing))
+		return;
+	
+#ifdef TTY_DEBUG_HANGUP
+	printk(KERN_DEBUG "freeing tty structure...");
+#endif
+
+	/*
+	 * Prevent flush_to_ldisc() from rescheduling the work for later.  Then
+	 * kill any delayed work. As this is the final close it does not
+	 * race with the set_ldisc code path.
+	 */
+	clear_bit(TTY_LDISC, &tty->flags);
+	clear_bit(TTY_DONT_FLIP, &tty->flags);
+
+	/*
+	 * Wait for ->hangup_work and ->flip.work handlers to terminate
+	 */
+
+	run_task_queue(&tq_timer);
+	flush_scheduled_tasks();
+
+	/*
+	 * Wait for any short term users (we know they are just driver
+         * side waiters as the file is closing so user count on the file
+         * side is zero.
+	 */
+
+	spin_lock_irqsave(&tty_ldisc_lock, flags);
+	while(tty->ldisc.refcount)
+	{
+		spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+		wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0);
+		spin_lock_irqsave(&tty_ldisc_lock, flags);
+	}
+	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+
+	/*
+	 * Shutdown the current line discipline, and reset it to N_TTY.
+	 * N.B. why reset ldisc when we're releasing the memory??
+	 * FIXME: this MUST get fixed for the new reflocking
+	 */
+	if (tty->ldisc.close)
+		(tty->ldisc.close)(tty);
+	tty_ldisc_put(tty->ldisc.num);
+
+	/*
+	 *      Switch the line discipline back
+	 */
+	tty_ldisc_assign(tty, tty_ldisc_get(N_TTY));
+	tty_set_termios_ldisc(tty,N_TTY); 
+
+	if (o_tty) {
+		/* FIXME: could o_tty be in setldisc here ? */
+		clear_bit(TTY_LDISC, &o_tty->flags);
+		if (o_tty->ldisc.close)
+			(o_tty->ldisc.close)(o_tty);
+		tty_ldisc_put(o_tty->ldisc.num);
+		tty_ldisc_assign(o_tty, tty_ldisc_get(N_TTY));
+		tty_set_termios_ldisc(o_tty,N_TTY);
+	}
+
+	/* 
+	 * The release_mem function takes care of the details of clearing
+	 * the slots and preserving the termios structure.
+	 */
+	release_mem(tty, idx);
+}
+
+/*
+ * tty_open and tty_release keep up the tty count that contains the
+ * number of opens done on a tty. We cannot use the inode-count, as
+ * different inodes might point to the same tty.
+ *
+ * Open-counting is needed for pty masters, as well as for keeping
+ * track of serial lines: DTR is dropped when the last close happens.
+ * (This is not done solely through tty->count, now.  - Ted 1/27/92)
+ *
+ * The termios state of a pty is reset on first open so that
+ * settings don't persist across reuse.
+ */
+static int tty_open(struct inode * inode, struct file * filp)
+{
+	struct tty_struct *tty;
+	int noctty, retval;
+	kdev_t device;
+	unsigned short saved_flags;
+	char	buf[64];
+
+	saved_flags = filp->f_flags;
+retry_open:
+	noctty = filp->f_flags & O_NOCTTY;
+	device = inode->i_rdev;
+	if (device == TTY_DEV) {
+		if (!current->tty)
+			return -ENXIO;
+		device = current->tty->device;
+		filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
+		/* noctty = 1; */
+	}
+#ifdef CONFIG_VT
+	if (device == CONSOLE_DEV) {
+		extern int fg_console;
+		device = MKDEV(TTY_MAJOR, fg_console + 1);
+		noctty = 1;
+	}
+#endif
+	if (device == SYSCONS_DEV) {
+		struct console *c = console_drivers;
+		while(c && !c->device)
+			c = c->next;
+		if (!c)
+                        return -ENODEV;
+                device = c->device(c);
+		filp->f_flags |= O_NONBLOCK; /* Don't let /dev/console block */
+		noctty = 1;
+	}
+
+	if (device == PTMX_DEV) {
+#ifdef CONFIG_UNIX98_PTYS
+
+		/* find a free pty. */
+		int major, minor;
+		struct tty_driver *driver;
+
+		/* find a device that is not in use. */
+		retval = -1;
+		for ( major = 0 ; major < UNIX98_NR_MAJORS ; major++ ) {
+			driver = &ptm_driver[major];
+			for (minor = driver->minor_start ;
+			     minor < driver->minor_start + driver->num ;
+			     minor++) {
+				device = MKDEV(driver->major, minor);
+				if (!init_dev(device, &tty)) goto ptmx_found; /* ok! */
+			}
+		}
+		return -EIO; /* no free ptys */
+	ptmx_found:
+		set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
+		minor -= driver->minor_start;
+		devpts_pty_new(driver->other->name_base + minor, MKDEV(driver->other->major, minor + driver->other->minor_start));
+		tty_register_devfs(&pts_driver[major], DEVFS_FL_DEFAULT,
+				   pts_driver[major].minor_start + minor);
+		noctty = 1;
+		goto init_dev_done;
+
+#else   /* CONFIG_UNIX_98_PTYS */
+
+		return -ENODEV;
+
+#endif  /* CONFIG_UNIX_98_PTYS */
+	}
+
+	retval = init_dev(device, &tty);
+	if (retval)
+		return retval;
+
+#ifdef CONFIG_UNIX98_PTYS
+init_dev_done:
+#endif
+	filp->private_data = tty;
+	file_move(filp, &tty->tty_files);
+	check_tty_count(tty, "tty_open");
+	if (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
+	    tty->driver.subtype == PTY_TYPE_MASTER)
+		noctty = 1;
+#ifdef TTY_DEBUG_HANGUP
+	printk(KERN_DEBUG "opening %s...", tty_name(tty, buf));
+#endif
+	if (tty->driver.open)
+		retval = tty->driver.open(tty, filp);
+	else
+		retval = -ENODEV;
+	filp->f_flags = saved_flags;
+
+	if (!retval && test_bit(TTY_EXCLUSIVE, &tty->flags) && !suser())
+		retval = -EBUSY;
+
+	if (retval) {
+#ifdef TTY_DEBUG_HANGUP
+		printk(KERN_DEBUG "error %d in opening %s...", retval,
+		       tty_name(tty, buf));
+#endif
+
+		release_dev(filp);
+		if (retval != -ERESTARTSYS)
+			return retval;
+		if (signal_pending(current))
+			return retval;
+		schedule();
+		/*
+		 * Need to reset f_op in case a hangup happened.
+		 */
+		filp->f_op = &tty_fops;
+		goto retry_open;
+	}
+	if (!noctty &&
+	    current->leader &&
+	    !current->tty &&
+	    tty->session == 0) {
+	    	task_lock(current);
+		current->tty = tty;
+		task_unlock(current);
+		current->tty_old_pgrp = 0;
+		tty->session = current->session;
+		tty->pgrp = current->pgrp;
+	}
+	if ((tty->driver.type == TTY_DRIVER_TYPE_SERIAL) &&
+	    (tty->driver.subtype == SERIAL_TYPE_CALLOUT) &&
+	    (tty->count == 1)) {
+		static int nr_warns;
+		if (nr_warns < 5) {
+			printk(KERN_WARNING "tty_io.c: "
+				"process %d (%s) used obsolete /dev/%s - "
+				"update software to use /dev/ttyS%d\n",
+				current->pid, current->comm,
+				tty_name(tty, buf), TTY_NUMBER(tty));
+			nr_warns++;
+		}
+	}
+	return 0;
+}
+
+static int tty_release(struct inode * inode, struct file * filp)
+{
+	lock_kernel();
+	release_dev(filp);
+	unlock_kernel();
+	return 0;
+}
+
+/* No kernel lock held - fine */
+static unsigned int tty_poll(struct file * filp, poll_table * wait)
+{
+	struct tty_struct * tty;
+	struct tty_ldisc *ld;
+	int ret = 0;
+
+	tty = (struct tty_struct *)filp->private_data;
+	if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, "tty_poll"))
+		return 0;
+
+	ld = tty_ldisc_ref_wait(tty);
+	if (ld->poll)
+		ret = (ld->poll)(tty, filp, wait);
+	tty_ldisc_deref(ld);
+	return ret;
+}
+
+static int tty_fasync(int fd, struct file * filp, int on)
+{
+	struct tty_struct * tty;
+	int retval;
+
+	tty = (struct tty_struct *)filp->private_data;
+	if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, "tty_fasync"))
+		return 0;
+	
+	retval = fasync_helper(fd, filp, on, &tty->fasync);
+	if (retval <= 0)
+		return retval;
+
+	if (on) {
+		if (!waitqueue_active(&tty->read_wait))
+			tty->minimum_to_wake = 1;
+		if (filp->f_owner.pid == 0) {
+			filp->f_owner.pid = (-tty->pgrp) ? : current->pid;
+			filp->f_owner.uid = current->uid;
+			filp->f_owner.euid = current->euid;
+		}
+	} else {
+		if (!tty->fasync && !waitqueue_active(&tty->read_wait))
+			tty->minimum_to_wake = N_TTY_BUF_SIZE;
+	}
+	return 0;
+}
+
+static int tiocsti(struct tty_struct *tty, char * arg)
+{
+	char ch, mbz = 0;
+	struct tty_ldisc *ld;
+
+	if ((current->tty != tty) && !suser())
+		return -EPERM;
+	if (get_user(ch, arg))
+		return -EFAULT;
+	ld = tty_ldisc_ref_wait(tty);
+	ld->receive_buf(tty, &ch, &mbz, 1);
+	tty_ldisc_deref(ld);
+	return 0;
+}
+
+static int tiocgwinsz(struct tty_struct *tty, struct winsize * arg)
+{
+	if (copy_to_user(arg, &tty->winsize, sizeof(*arg)))
+		return -EFAULT;
+	return 0;
+}
+
+static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty,
+	struct winsize * arg)
+{
+	struct winsize tmp_ws;
+
+	if (copy_from_user(&tmp_ws, arg, sizeof(*arg)))
+		return -EFAULT;
+	if (!memcmp(&tmp_ws, &tty->winsize, sizeof(*arg)))
+		return 0;
+	if (tty->pgrp > 0)
+		kill_pg(tty->pgrp, SIGWINCH, 1);
+	if ((real_tty->pgrp != tty->pgrp) && (real_tty->pgrp > 0))
+		kill_pg(real_tty->pgrp, SIGWINCH, 1);
+	tty->winsize = tmp_ws;
+	real_tty->winsize = tmp_ws;
+	return 0;
+}
+
+static int tioccons(struct inode *inode, struct file *file)
+{
+	if (inode->i_rdev == SYSCONS_DEV ||
+	    inode->i_rdev == CONSOLE_DEV) {
+		struct file *f;
+		if (!suser())
+			return -EPERM;
+		spin_lock(&redirect_lock);
+		f = redirect;
+		redirect = NULL;
+		spin_unlock(&redirect_lock);
+		if (f)
+			fput(f);
+		return 0;
+	}
+	spin_lock(&redirect_lock);
+	if (redirect) {
+		spin_unlock(&redirect_lock);
+		return -EBUSY;
+	}
+	get_file(file);
+	redirect = file;
+	spin_unlock(&redirect_lock);
+	return 0;
+}
+
+
+static int fionbio(struct file *file, int *arg)
+{
+	int nonblock;
+
+	if (get_user(nonblock, arg))
+		return -EFAULT;
+
+	if (nonblock)
+		file->f_flags |= O_NONBLOCK;
+	else
+		file->f_flags &= ~O_NONBLOCK;
+	return 0;
+}
+
+static int tiocsctty(struct tty_struct *tty, int arg)
+{
+	if (current->leader &&
+	    (current->session == tty->session))
+		return 0;
+	/*
+	 * The process must be a session leader and
+	 * not have a controlling tty already.
+	 */
+	if (!current->leader || current->tty)
+		return -EPERM;
+	if (tty->session > 0) {
+		/*
+		 * This tty is already the controlling
+		 * tty for another session group!
+		 */
+		if ((arg == 1) && suser()) {
+			/*
+			 * Steal it away
+			 */
+			struct task_struct *p;
+
+			read_lock(&tasklist_lock);
+			for_each_task(p)
+				if (p->tty == tty)
+					p->tty = NULL;
+			read_unlock(&tasklist_lock);
+		} else
+			return -EPERM;
+	}
+	task_lock(current);
+	current->tty = tty;
+	task_unlock(current);
+	current->tty_old_pgrp = 0;
+	tty->session = current->session;
+	tty->pgrp = current->pgrp;
+	return 0;
+}
+
+static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t *arg)
+{
+	/*
+	 * (tty == real_tty) is a cheap way of
+	 * testing if the tty is NOT a master pty.
+	 */
+	if (tty == real_tty && current->tty != real_tty)
+		return -ENOTTY;
+	return put_user(real_tty->pgrp, arg);
+}
+
+static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t *arg)
+{
+	pid_t pgrp;
+	int retval = tty_check_change(real_tty);
+
+	if (retval == -EIO)
+		return -ENOTTY;
+	if (retval)
+		return retval;
+	if (!current->tty ||
+	    (current->tty != real_tty) ||
+	    (real_tty->session != current->session))
+		return -ENOTTY;
+	if (get_user(pgrp, (pid_t *) arg))
+		return -EFAULT;
+	if (pgrp < 0)
+		return -EINVAL;
+	if (session_of_pgrp(pgrp) != current->session)
+		return -EPERM;
+	real_tty->pgrp = pgrp;
+	return 0;
+}
+
+static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t *arg)
+{
+	/*
+	 * (tty == real_tty) is a cheap way of
+	 * testing if the tty is NOT a master pty.
+	*/
+	if (tty == real_tty && current->tty != real_tty)
+		return -ENOTTY;
+	if (real_tty->session <= 0)
+		return -ENOTTY;
+	return put_user(real_tty->session, arg);
+}
+
+static int tiocttygstruct(struct tty_struct *tty, struct tty_struct *arg)
+{
+	if (copy_to_user(arg, tty, sizeof(*arg)))
+		return -EFAULT;
+	return 0;
+}
+
+static int tiocsetd(struct tty_struct *tty, int *arg)
+{
+	int ldisc;
+
+	if (get_user(ldisc, arg))
+		return -EFAULT;
+	return tty_set_ldisc(tty, ldisc);
+}
+
+static int send_break(struct tty_struct *tty, int duration)
+{
+	tty->driver.break_ctl(tty, -1);
+	if (!signal_pending(current)) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(duration);
+	}
+	tty->driver.break_ctl(tty, 0);
+	if (signal_pending(current))
+		return -EINTR;
+	return 0;
+}
+
+static int tty_generic_brk(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg)
+{
+	if (cmd == TCSBRK && arg) 
+	{
+		/* tcdrain case */
+		int retval = tty_check_change(tty);
+		if (retval)
+			return retval;
+		tty_wait_until_sent(tty, 0);
+		if (signal_pending(current))
+			return -EINTR;
+	}
+	return 0;
+}
+
+/*
+ * Split this up, as gcc can choke on it otherwise..
+ */
+int tty_ioctl(struct inode * inode, struct file * file,
+	      unsigned int cmd, unsigned long arg)
+{
+	struct tty_struct *tty, *real_tty;
+	int retval;
+	struct tty_ldisc *ld;
+	
+	tty = (struct tty_struct *)file->private_data;
+	if (tty_paranoia_check(tty, inode->i_rdev, "tty_ioctl"))
+		return -EINVAL;
+
+	real_tty = tty;
+	if (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
+	    tty->driver.subtype == PTY_TYPE_MASTER)
+		real_tty = tty->link;
+
+	/*
+	 * Break handling by driver
+	 */
+	if (!tty->driver.break_ctl) {
+		switch(cmd) {
+		case TIOCSBRK:
+		case TIOCCBRK:
+			if (tty->driver.ioctl)
+				return tty->driver.ioctl(tty, file, cmd, arg);
+			return -EINVAL;
+			
+		/* These two ioctl's always return success; even if */
+		/* the driver doesn't support them. */
+		case TCSBRK:
+		case TCSBRKP:
+			retval = -ENOIOCTLCMD;
+			if (tty->driver.ioctl)
+				retval = tty->driver.ioctl(tty, file, cmd, arg);
+			/* Not driver handled */
+			if (retval == -ENOIOCTLCMD)
+				retval = tty_generic_brk(tty, file, cmd, arg);
+			return retval;
+		}
+	}
+
+	/*
+	 * Factor out some common prep work
+	 */
+	switch (cmd) {
+	case TIOCSETD:
+	case TIOCSBRK:
+	case TIOCCBRK:
+	case TCSBRK:
+	case TCSBRKP:			
+		retval = tty_check_change(tty);
+		if (retval)
+			return retval;
+		if (cmd != TIOCCBRK) {
+			tty_wait_until_sent(tty, 0);
+			if (signal_pending(current))
+				return -EINTR;
+		}
+		break;
+	}
+
+	switch (cmd) {
+		case TIOCSTI:
+			return tiocsti(tty, (char *)arg);
+		case TIOCGWINSZ:
+			return tiocgwinsz(tty, (struct winsize *) arg);
+		case TIOCSWINSZ:
+			return tiocswinsz(tty, real_tty, (struct winsize *) arg);
+		case TIOCCONS:
+			return real_tty!=tty ? -EINVAL : tioccons(inode, file);
+		case FIONBIO:
+			return fionbio(file, (int *) arg);
+		case TIOCEXCL:
+			set_bit(TTY_EXCLUSIVE, &tty->flags);
+			return 0;
+		case TIOCNXCL:
+			clear_bit(TTY_EXCLUSIVE, &tty->flags);
+			return 0;
+		case TIOCNOTTY:
+			if (current->tty != tty)
+				return -ENOTTY;
+			if (current->leader)
+				disassociate_ctty(0);
+			task_lock(current);
+			current->tty = NULL;
+			task_unlock(current);
+			return 0;
+		case TIOCSCTTY:
+			return tiocsctty(tty, arg);
+		case TIOCGPGRP:
+			return tiocgpgrp(tty, real_tty, (pid_t *) arg);
+		case TIOCSPGRP:
+			return tiocspgrp(tty, real_tty, (pid_t *) arg);
+		case TIOCGSID:
+			return tiocgsid(tty, real_tty, (pid_t *) arg);
+		case TIOCGETD:
+			/* FIXME: check this is ok */
+			return put_user(tty->ldisc.num, (int *) arg);
+		case TIOCSETD:
+			return tiocsetd(tty, (int *) arg);
+#ifdef CONFIG_VT
+		case TIOCLINUX:
+			return tioclinux(tty, arg);
+#endif
+		case TIOCTTYGSTRUCT:
+			return tiocttygstruct(tty, (struct tty_struct *) arg);
+
+		/*
+		 * Break handling
+		 */
+		case TIOCSBRK:	/* Turn break on, unconditionally */
+			tty->driver.break_ctl(tty, -1);
+			return 0;
+			
+		case TIOCCBRK:	/* Turn break off, unconditionally */
+			tty->driver.break_ctl(tty, 0);
+			return 0;
+		case TCSBRK:   /* SVID version: non-zero arg --> no break */
+			/*
+			 * XXX is the above comment correct, or the
+			 * code below correct?  Is this ioctl used at
+			 * all by anyone?
+			 */
+			if (!arg)
+				return send_break(tty, HZ/4);
+			return 0;
+		case TCSBRKP:	/* support for POSIX tcsendbreak() */	
+			return send_break(tty, arg ? arg*(HZ/10) : HZ/4);
+	}
+	if (tty->driver.ioctl) {
+		retval = (tty->driver.ioctl)(tty, file, cmd, arg);
+		if (retval != -ENOIOCTLCMD)
+			return retval;
+	}
+	ld = tty_ldisc_ref_wait(tty);
+	retval = -EINVAL;
+	if (ld->ioctl) {
+		retval = ld->ioctl(tty, file, cmd, arg);
+		if (retval == -ENOIOCTLCMD)
+			retval = -EINVAL;
+	}
+	tty_ldisc_deref(ld);
+	return retval;
+}
+
+
+/*
+ * This implements the "Secure Attention Key" ---  the idea is to
+ * prevent trojan horses by killing all processes associated with this
+ * tty when the user hits the "Secure Attention Key".  Required for
+ * super-paranoid applications --- see the Orange Book for more details.
+ * 
+ * This code could be nicer; ideally it should send a HUP, wait a few
+ * seconds, then send a INT, and then a KILL signal.  But you then
+ * have to coordinate with the init process, since all processes associated
+ * with the current tty must be dead before the new getty is allowed
+ * to spawn.
+ *
+ * Now, if it would be correct ;-/ The current code has a nasty hole -
+ * it doesn't catch files in flight. We may send the descriptor to ourselves
+ * via AF_UNIX socket, close it and later fetch from socket. FIXME.
+ *
+ * Nasty bug: do_SAK is being called in interrupt context.  This can
+ * deadlock.  We punt it up to process context.  AKPM - 16Mar2001
+ */
+static void __do_SAK(void *arg)
+{
+#ifdef TTY_SOFT_SAK
+	tty_hangup(tty);
+#else
+	struct tty_struct *tty = arg;
+	struct task_struct *p;
+	int session;
+	int		i;
+	struct file	*filp;
+	struct tty_ldisc *disc;
+	
+	if (!tty)
+		return;
+	session  = tty->session;
+	/* We don't want an ldisc switch during this */
+	disc = tty_ldisc_ref(tty);
+	if (disc && disc->flush_buffer)
+		disc->flush_buffer(tty);
+	tty_ldisc_deref(disc);
+
+	if (tty->driver.flush_buffer)
+		tty->driver.flush_buffer(tty);
+
+	read_lock(&tasklist_lock);
+	for_each_task(p) {
+		if ((p->tty == tty) ||
+		    ((session > 0) && (p->session == session))) {
+			send_sig(SIGKILL, p, 1);
+			continue;
+		}
+		task_lock(p);
+		if (p->files) {
+			read_lock(&p->files->file_lock);
+			for (i=0; i < p->files->max_fds; i++) {
+				filp = fcheck_files(p->files, i);
+				if (filp && (filp->f_op == &tty_fops) &&
+				    (filp->private_data == tty)) {
+					send_sig(SIGKILL, p, 1);
+					break;
+				}
+			}
+			read_unlock(&p->files->file_lock);
+		}
+		task_unlock(p);
+	}
+	read_unlock(&tasklist_lock);
+#endif
+}
+
+/*
+ * The tq handling here is a little racy - tty->SAK_tq may already be queued.
+ * But there's no mechanism to fix that without futzing with tqueue_lock.
+ * Fortunately we don't need to worry, because if ->SAK_tq is already queued,
+ * the values which we write to it will be identical to the values which it
+ * already has. --akpm
+ */
+void do_SAK(struct tty_struct *tty)
+{
+	if (!tty)
+		return;
+	PREPARE_TQUEUE(&tty->SAK_tq, __do_SAK, tty);
+	schedule_task(&tty->SAK_tq);
+}
+
+/*
+ * This routine is called out of the software interrupt to flush data
+ * from the flip buffer to the line discipline.
+ */
+static void flush_to_ldisc(void *private_)
+{
+	struct tty_struct *tty = (struct tty_struct *) private_;
+	unsigned char	*cp;
+	char		*fp;
+	int		count;
+	unsigned long 	flags;
+	struct tty_ldisc *disc;
+
+	disc = tty_ldisc_ref(tty);
+	if (disc == NULL)       /*  !TTY_LDISC */
+		return;
+
+	if (test_bit(TTY_DONT_FLIP, &tty->flags)) {
+		queue_task(&tty->flip.tqueue, &tq_timer);
+		goto out;
+	}
+	if (tty->flip.buf_num) {
+		cp = tty->flip.char_buf + TTY_FLIPBUF_SIZE;
+		fp = tty->flip.flag_buf + TTY_FLIPBUF_SIZE;
+		tty->flip.buf_num = 0;
+
+		save_flags(flags); cli();
+		tty->flip.char_buf_ptr = tty->flip.char_buf;
+		tty->flip.flag_buf_ptr = tty->flip.flag_buf;
+	} else {
+		cp = tty->flip.char_buf;
+		fp = tty->flip.flag_buf;
+		tty->flip.buf_num = 1;
+
+		save_flags(flags); cli();
+		tty->flip.char_buf_ptr = tty->flip.char_buf + TTY_FLIPBUF_SIZE;
+		tty->flip.flag_buf_ptr = tty->flip.flag_buf + TTY_FLIPBUF_SIZE;
+	}
+	count = tty->flip.count;
+	tty->flip.count = 0;
+	restore_flags(flags);
+	
+	disc->receive_buf(tty, cp, fp, count);
+out:
+	tty_ldisc_deref(disc);
+}
+
+/*
+ *     Call the ldisc flush directly from a driver. This function may
+ *     return an error and need retrying by the user.
+ */
+
+int tty_push_data(struct tty_struct *tty, unsigned char *cp, unsigned char *fp, int count)
+{
+	int ret = 0;
+	struct tty_ldisc *disc;
+	
+	disc = tty_ldisc_ref(tty);
+	if(test_bit(TTY_DONT_FLIP, &tty->flags))
+		ret = -EAGAIN;
+	else if(disc == NULL)
+		ret = -EIO;
+	else
+		disc->receive_buf(tty, cp, fp, count);
+	tty_ldisc_deref(disc);
+	return ret;
+
+}
+
+/*
+ * Routine which returns the baud rate of the tty
+ *
+ * Note that the baud_table needs to be kept in sync with the
+ * include/asm/termbits.h file.
+ */
+static int baud_table[] = {
+	0, 50, 75, 110, 134, 150, 200, 300, 600, 1200, 1800, 2400, 4800,
+	9600, 19200, 38400, 57600, 115200, 230400, 460800,
+#ifdef __sparc__
+	76800, 153600, 307200, 614400, 921600
+#else
+	500000, 576000, 921600, 1000000, 1152000, 1500000, 2000000,
+	2500000, 3000000, 3500000, 4000000
+#endif
+};
+
+static int n_baud_table = sizeof(baud_table)/sizeof(int);
+
+int tty_get_baud_rate(struct tty_struct *tty)
+{
+	unsigned int cflag, i;
+
+	cflag = tty->termios->c_cflag;
+
+	i = cflag & CBAUD;
+	if (i & CBAUDEX) {
+		i &= ~CBAUDEX;
+		if (i < 1 || i+15 >= n_baud_table) 
+			tty->termios->c_cflag &= ~CBAUDEX;
+		else
+			i += 15;
+	}
+	if (i==15 && tty->alt_speed) {
+		if (!tty->warned) {
+			printk(KERN_WARNING "Use of setserial/setrocket to "
+					    "set SPD_* flags is deprecated\n");
+			tty->warned = 1;
+		}
+		return(tty->alt_speed);
+	}
+	
+	return baud_table[i];
+}
+
+void tty_flip_buffer_push(struct tty_struct *tty)
+{
+	if (tty->low_latency)
+		flush_to_ldisc((void *) tty);
+	else
+		queue_task(&tty->flip.tqueue, &tq_timer);
+}
+
+/*
+ * This subroutine initializes a tty structure.
+ */
+static void initialize_tty_struct(struct tty_struct *tty)
+{
+	memset(tty, 0, sizeof(struct tty_struct));
+	tty->magic = TTY_MAGIC;
+	tty_ldisc_assign(tty, tty_ldisc_get(N_TTY));
+	tty->pgrp = -1;
+	tty->flip.char_buf_ptr = tty->flip.char_buf;
+	tty->flip.flag_buf_ptr = tty->flip.flag_buf;
+	tty->flip.tqueue.routine = flush_to_ldisc;
+	tty->flip.tqueue.data = tty;
+	init_MUTEX(&tty->flip.pty_sem);
+	init_MUTEX(&tty->termios_sem);
+	init_waitqueue_head(&tty->write_wait);
+	init_waitqueue_head(&tty->read_wait);
+	tty->tq_hangup.routine = do_tty_hangup;
+	tty->tq_hangup.data = tty;
+	sema_init(&tty->atomic_read, 1);
+	sema_init(&tty->atomic_write, 1);
+	spin_lock_init(&tty->read_lock);
+	INIT_LIST_HEAD(&tty->tty_files);
+	INIT_TQUEUE(&tty->SAK_tq, 0, 0);
+}
+
+/*
+ * The default put_char routine if the driver did not define one.
+ */
+void tty_default_put_char(struct tty_struct *tty, unsigned char ch)
+{
+	tty->driver.write(tty, 0, &ch, 1);
+}
+
+/*
+ * Register a tty device described by <driver>, with minor number <minor>.
+ */
+void tty_register_devfs (struct tty_driver *driver, unsigned int flags, unsigned minor)
+{
+#ifdef CONFIG_DEVFS_FS
+	umode_t mode = S_IFCHR | S_IRUSR | S_IWUSR;
+	kdev_t device = MKDEV (driver->major, minor);
+	int idx = minor - driver->minor_start;
+	char buf[32];
+
+	switch (device) {
+		case TTY_DEV:
+		case PTMX_DEV:
+			mode |= S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
+			break;
+		default:
+			if (driver->major == PTY_MASTER_MAJOR)
+				mode |= S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
+			break;
+	}
+	if ( (minor <  driver->minor_start) || 
+	     (minor >= driver->minor_start + driver->num) ) {
+		printk(KERN_ERR "Attempt to register invalid minor number "
+		       "with devfs (%d:%d).\n", (int)driver->major,(int)minor);
+		return;
+	}
+#  ifdef CONFIG_UNIX98_PTYS
+	if ( (driver->major >= UNIX98_PTY_SLAVE_MAJOR) &&
+	     (driver->major < UNIX98_PTY_SLAVE_MAJOR + UNIX98_NR_MAJORS) )
+		flags |= DEVFS_FL_CURRENT_OWNER;
+#  endif
+	sprintf(buf, driver->name, idx + driver->name_base);
+	devfs_register (NULL, buf, flags | DEVFS_FL_DEFAULT,
+			driver->major, minor, mode, &tty_fops, NULL);
+#endif /* CONFIG_DEVFS_FS */
+}
+
+void tty_unregister_devfs (struct tty_driver *driver, unsigned minor)
+{
+#ifdef CONFIG_DEVFS_FS
+	void * handle;
+	int idx = minor - driver->minor_start;
+	char buf[32];
+
+	sprintf(buf, driver->name, idx + driver->name_base);
+	handle = devfs_find_handle (NULL, buf, driver->major, minor,
+				    DEVFS_SPECIAL_CHR, 0);
+	devfs_unregister (handle);
+#endif /* CONFIG_DEVFS_FS */
+}
+
+EXPORT_SYMBOL(tty_register_devfs);
+EXPORT_SYMBOL(tty_unregister_devfs);
+
+/*
+ * Called by a tty driver to register itself.
+ */
+int tty_register_driver(struct tty_driver *driver)
+{
+	int error;
+        int i;
+
+	if (driver->flags & TTY_DRIVER_INSTALLED)
+		return 0;
+
+	error = devfs_register_chrdev(driver->major, driver->name, &tty_fops);
+	if (error < 0)
+		return error;
+	else if(driver->major == 0)
+		driver->major = error;
+
+	if (!driver->put_char)
+		driver->put_char = tty_default_put_char;
+	
+	driver->prev = 0;
+	driver->next = tty_drivers;
+	if (tty_drivers) tty_drivers->prev = driver;
+	tty_drivers = driver;
+	
+	if ( !(driver->flags & TTY_DRIVER_NO_DEVFS) ) {
+		for(i = 0; i < driver->num; i++)
+		    tty_register_devfs(driver, 0, driver->minor_start + i);
+	}
+	proc_tty_register_driver(driver);
+	return error;
+}
+
+/*
+ * Called by a tty driver to unregister itself.
+ */
+int tty_unregister_driver(struct tty_driver *driver)
+{
+	int	retval;
+	struct tty_driver *p;
+	int	i, found = 0;
+	struct termios *tp;
+	const char *othername = NULL;
+	
+	if (*driver->refcount)
+		return -EBUSY;
+
+	for (p = tty_drivers; p; p = p->next) {
+		if (p == driver)
+			found++;
+		else if (p->major == driver->major)
+			othername = p->name;
+	}
+	
+	if (!found)
+		return -ENOENT;
+
+	if (othername == NULL) {
+		retval = devfs_unregister_chrdev(driver->major, driver->name);
+		if (retval)
+			return retval;
+	} else
+		devfs_register_chrdev(driver->major, othername, &tty_fops);
+
+	if (driver->prev)
+		driver->prev->next = driver->next;
+	else
+		tty_drivers = driver->next;
+	
+	if (driver->next)
+		driver->next->prev = driver->prev;
+
+	/*
+	 * Free the termios and termios_locked structures because
+	 * we don't want to get memory leaks when modular tty
+	 * drivers are removed from the kernel.
+	 */
+	for (i = 0; i < driver->num; i++) {
+		tp = driver->termios[i];
+		if (tp) {
+			driver->termios[i] = NULL;
+			kfree(tp);
+		}
+		tp = driver->termios_locked[i];
+		if (tp) {
+			driver->termios_locked[i] = NULL;
+			kfree(tp);
+		}
+		tty_unregister_devfs(driver, driver->minor_start + i);
+	}
+	proc_tty_unregister_driver(driver);
+	return 0;
+}
+
+
+/*
+ * Initialize the console device. This is called *early*, so
+ * we can't necessarily depend on lots of kernel help here.
+ * Just do some early initializations, and do the complex setup
+ * later.
+ */
+void __init console_init(void)
+{
+	/* Setup the default TTY line discipline. */
+	memset(tty_ldiscs, 0, NR_LDISCS*sizeof(struct tty_ldisc));
+	(void) tty_register_ldisc(N_TTY, &tty_ldisc_N_TTY);
+
+	/*
+	 * Set up the standard termios.  Individual tty drivers may 
+	 * deviate from this; this is used as a template.
+	 */
+	memset(&tty_std_termios, 0, sizeof(struct termios));
+	memcpy(tty_std_termios.c_cc, INIT_C_CC, NCCS);
+	tty_std_termios.c_iflag = ICRNL | IXON;
+	tty_std_termios.c_oflag = OPOST | ONLCR;
+	tty_std_termios.c_cflag = B38400 | CS8 | CREAD | HUPCL;
+	tty_std_termios.c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK |
+		ECHOCTL | ECHOKE | IEXTEN;
+
+	/*
+	 * set up the console device so that later boot sequences can 
+	 * inform about problems etc..
+	 */
+#ifdef CONFIG_EARLY_PRINTK
+	disable_early_printk(); 
+#endif
+
+#ifdef CONFIG_XEN_CONSOLE
+        xen_console_init();
+#endif
+
+#ifdef CONFIG_VT
+	con_init();
+#endif
+#ifdef CONFIG_AU1X00_SERIAL_CONSOLE
+	au1x00_serial_console_init();
+#endif
+#ifdef CONFIG_SERIAL_CONSOLE
+#if (defined(CONFIG_8xx) || defined(CONFIG_CPM2))
+	console_8xx_init();
+#elif defined(CONFIG_MAC_SERIAL) && defined(CONFIG_SERIAL)
+	if (_machine == _MACH_Pmac)
+ 		mac_scc_console_init();
+	else
+		serial_console_init();
+#elif defined(CONFIG_MAC_SERIAL)
+ 	mac_scc_console_init();
+#elif defined(CONFIG_PARISC)
+	pdc_console_init();
+#elif defined(CONFIG_SERIAL)
+	serial_console_init();
+#endif /* CONFIG_8xx */
+#if defined(CONFIG_MVME162_SCC) || defined(CONFIG_BVME6000_SCC) || defined(CONFIG_MVME147_SCC)
+	vme_scc_console_init();
+#endif
+#if defined(CONFIG_SERIAL167)
+	serial167_console_init();
+#endif
+#if defined(CONFIG_SH_SCI)
+	sci_console_init();
+#endif
+#endif
+#ifdef CONFIG_SERIAL_DEC_CONSOLE
+	dec_serial_console_init();
+#endif
+#ifdef CONFIG_TN3270_CONSOLE
+	tub3270_con_init();
+#endif
+#ifdef CONFIG_TN3215
+	con3215_init();
+#endif
+#ifdef CONFIG_HWC
+        hwc_console_init();
+#endif
+#ifdef CONFIG_STDIO_CONSOLE
+	stdio_console_init();
+#endif
+#ifdef CONFIG_SERIAL_21285_CONSOLE
+	rs285_console_init();
+#endif
+#ifdef CONFIG_SERIAL_SA1100_CONSOLE
+	sa1100_rs_console_init();
+#endif
+#ifdef CONFIG_ARC_CONSOLE
+	arc_console_init();
+#endif
+#ifdef CONFIG_SERIAL_AMBA_CONSOLE
+	ambauart_console_init();
+#endif
+#ifdef CONFIG_SERIAL_TX3912_CONSOLE
+	tx3912_console_init();
+#endif
+#ifdef CONFIG_TXX927_SERIAL_CONSOLE
+	txx927_console_init();
+#endif
+#ifdef CONFIG_SERIAL_TXX9_CONSOLE
+	txx9_serial_console_init();
+#endif
+#ifdef CONFIG_SIBYTE_SB1250_DUART_CONSOLE
+	sb1250_serial_console_init();
+#endif
+#ifdef CONFIG_IP22_SERIAL
+	sgi_serial_console_init();
+#endif
+}
+
+static struct tty_driver dev_tty_driver, dev_syscons_driver;
+#ifdef CONFIG_UNIX98_PTYS
+static struct tty_driver dev_ptmx_driver;
+#endif
+#ifdef CONFIG_HVC_CONSOLE
+	hvc_console_init();
+#endif
+#ifdef CONFIG_VT
+static struct tty_driver dev_console_driver;
+#endif
+
+/*
+ * Ok, now we can initialize the rest of the tty devices and can count
+ * on memory allocations, interrupts etc..
+ */
+void __init tty_init(void)
+{
+	/*
+	 * dev_tty_driver and dev_console_driver are actually magic
+	 * devices which get redirected at open time.  Nevertheless,
+	 * we register them so that register_chrdev is called
+	 * appropriately.
+	 */
+	memset(&dev_tty_driver, 0, sizeof(struct tty_driver));
+	dev_tty_driver.magic = TTY_DRIVER_MAGIC;
+	dev_tty_driver.driver_name = "/dev/tty";
+	dev_tty_driver.name = dev_tty_driver.driver_name + 5;
+	dev_tty_driver.name_base = 0;
+	dev_tty_driver.major = TTYAUX_MAJOR;
+	dev_tty_driver.minor_start = 0;
+	dev_tty_driver.num = 1;
+	dev_tty_driver.type = TTY_DRIVER_TYPE_SYSTEM;
+	dev_tty_driver.subtype = SYSTEM_TYPE_TTY;
+	
+	if (tty_register_driver(&dev_tty_driver))
+		panic("Couldn't register /dev/tty driver\n");
+
+	dev_syscons_driver = dev_tty_driver;
+	dev_syscons_driver.driver_name = "/dev/console";
+	dev_syscons_driver.name = dev_syscons_driver.driver_name + 5;
+	dev_syscons_driver.major = TTYAUX_MAJOR;
+	dev_syscons_driver.minor_start = 1;
+	dev_syscons_driver.type = TTY_DRIVER_TYPE_SYSTEM;
+	dev_syscons_driver.subtype = SYSTEM_TYPE_SYSCONS;
+
+	if (tty_register_driver(&dev_syscons_driver))
+		panic("Couldn't register /dev/console driver\n");
+
+	/* console calls tty_register_driver() before kmalloc() works.
+	 * Thus, we can't devfs_register() then.  Do so now, instead. 
+	 */
+#ifdef CONFIG_VT
+	con_init_devfs();
+#endif
+
+#ifdef CONFIG_UNIX98_PTYS
+	dev_ptmx_driver = dev_tty_driver;
+	dev_ptmx_driver.driver_name = "/dev/ptmx";
+	dev_ptmx_driver.name = dev_ptmx_driver.driver_name + 5;
+	dev_ptmx_driver.major= MAJOR(PTMX_DEV);
+	dev_ptmx_driver.minor_start = MINOR(PTMX_DEV);
+	dev_ptmx_driver.type = TTY_DRIVER_TYPE_SYSTEM;
+	dev_ptmx_driver.subtype = SYSTEM_TYPE_SYSPTMX;
+
+	if (tty_register_driver(&dev_ptmx_driver))
+		panic("Couldn't register /dev/ptmx driver\n");
+#endif
+	
+#ifdef CONFIG_VT
+	dev_console_driver = dev_tty_driver;
+	dev_console_driver.driver_name = "/dev/vc/0";
+	dev_console_driver.name = dev_console_driver.driver_name + 5;
+	dev_console_driver.major = TTY_MAJOR;
+	dev_console_driver.type = TTY_DRIVER_TYPE_SYSTEM;
+	dev_console_driver.subtype = SYSTEM_TYPE_CONSOLE;
+
+	if (tty_register_driver(&dev_console_driver))
+		panic("Couldn't register /dev/tty0 driver\n");
+
+	kbd_init();
+#endif
+
+#ifdef CONFIG_SGI_L1_SERIAL_CONSOLE
+	if (ia64_platform_is("sn2")) {
+		sn_sal_serial_console_init();
+		return; /* only one console right now for SN2 */
+	}
+#endif
+#ifdef CONFIG_ESPSERIAL  /* init ESP before rs, so rs doesn't see the port */
+	espserial_init();
+#endif
+#if defined(CONFIG_MVME162_SCC) || defined(CONFIG_BVME6000_SCC) || defined(CONFIG_MVME147_SCC)
+	vme_scc_init();
+#endif
+#ifdef CONFIG_SERIAL_TX3912
+	tx3912_rs_init();
+#endif
+#ifdef CONFIG_ROCKETPORT
+	rp_init();
+#endif
+#ifdef CONFIG_SERIAL167
+	serial167_init();
+#endif
+#ifdef CONFIG_CYCLADES
+	cy_init();
+#endif
+#ifdef CONFIG_STALLION
+	stl_init();
+#endif
+#ifdef CONFIG_ISTALLION
+	stli_init();
+#endif
+#ifdef CONFIG_DIGI
+	pcxe_init();
+#endif
+#ifdef CONFIG_DIGIEPCA
+	pc_init();
+#endif
+#ifdef CONFIG_SPECIALIX
+	specialix_init();
+#endif
+#if (defined(CONFIG_8xx) || defined(CONFIG_CPM2))
+	rs_8xx_init();
+#endif /* CONFIG_8xx */
+	pty_init();
+#ifdef CONFIG_MOXA_SMARTIO
+	mxser_init();
+#endif	
+#ifdef CONFIG_MOXA_INTELLIO
+	moxa_init();
+#endif	
+#ifdef CONFIG_VT
+	vcs_init();
+#endif
+#ifdef CONFIG_TN3270
+	tub3270_init();
+#endif
+#ifdef CONFIG_TN3215
+	tty3215_init();
+#endif
+#ifdef CONFIG_HWC
+	hwc_tty_init();
+#endif
+#ifdef CONFIG_A2232
+	a2232board_init();
+#endif
+}
diff --git a/linux-2.4-xen-sparse/drivers/scsi/aic7xxx/Makefile b/linux-2.4-xen-sparse/drivers/scsi/aic7xxx/Makefile
new file mode 100644
index 0000000000..16ac7f1a6d
--- /dev/null
+++ b/linux-2.4-xen-sparse/drivers/scsi/aic7xxx/Makefile
@@ -0,0 +1,97 @@
+#
+# drivers/scsi/aic7xxx/Makefile
+#
+# Makefile for the Linux aic7xxx SCSI driver.
+#
+
+O_TARGET := aic7xxx_drv.o
+
+list-multi	:= aic7xxx.o aic79xx.o
+
+obj-$(CONFIG_SCSI_AIC7XXX)	+= aic7xxx.o
+ifeq ($(CONFIG_PCI),y)
+obj-$(CONFIG_SCSI_AIC79XX)	+= aic79xx.o
+endif
+
+EXTRA_CFLAGS += -I$(TOPDIR)/drivers/scsi -Werror
+#EXTRA_CFLAGS += -g
+
+# Platform Specific Files
+obj-aic7xxx = aic7xxx_osm.o aic7xxx_proc.o
+
+# Core Files
+obj-aic7xxx += aic7xxx_core.o aic7xxx_93cx6.o
+ifeq ($(CONFIG_AIC7XXX_REG_PRETTY_PRINT),y)
+obj-aic7xxx += aic7xxx_reg_print.o
+endif
+
+#EISA Specific Files
+AIC7XXX_EISA_ARCH = $(filter i386 alpha xen,$(ARCH))
+ifneq ($(AIC7XXX_EISA_ARCH),)
+obj-aic7xxx += aic7770.o
+# Platform Specific EISA Files
+obj-aic7xxx += aic7770_osm.o
+endif
+
+#PCI Specific Files
+ifeq ($(CONFIG_PCI),y)
+obj-aic7xxx += aic7xxx_pci.o
+# Platform Specific PCI Files
+obj-aic7xxx += aic7xxx_osm_pci.o
+endif
+
+# Platform Specific U320 Files
+obj-aic79xx = aic79xx_osm.o aic79xx_proc.o aic79xx_osm_pci.o
+# Core Files
+obj-aic79xx += aic79xx_core.o aic79xx_pci.o
+ifeq ($(CONFIG_AIC79XX_REG_PRETTY_PRINT),y)
+obj-aic79xx += aic79xx_reg_print.o
+endif
+
+# Override our module desitnation
+MOD_DESTDIR = $(shell cd .. && $(CONFIG_SHELL) $(TOPDIR)/scripts/pathdown.sh)
+
+include $(TOPDIR)/Rules.make
+
+aic7xxx_core.o: aic7xxx_seq.h
+$(obj-aic7xxx): aic7xxx_reg.h
+aic7xxx.o: aic7xxx_seq.h aic7xxx_reg.h $(obj-aic7xxx)
+	$(LD) $(LD_RFLAG) -r -o $@ $(obj-aic7xxx)
+
+aic79xx_core.o: aic79xx_seq.h
+$(obj-aic79xx): aic79xx_reg.h
+aic79xx.o: aic79xx_seq.h aic79xx_reg.h $(obj-aic79xx)
+	$(LD) $(LD_RFLAG) -r -o $@ $(obj-aic79xx)
+
+ifeq ($(CONFIG_AIC7XXX_BUILD_FIRMWARE),y)
+aic7xxx_gen = aic7xxx_seq.h aic7xxx_reg.h
+ifeq ($(CONFIG_AIC7XXX_REG_PRETTY_PRINT),y)
+aic7xxx_gen += aic7xxx_reg_print.c
+aic7xxx_asm_cmd = aicasm/aicasm -I. -r aic7xxx_reg.h		\
+		 -p aic7xxx_reg_print.c -i aic7xxx_osm.h	\
+		 -o aic7xxx_seq.h aic7xxx.seq
+else
+aic7xxx_asm_cmd = aicasm/aicasm -I. -r aic7xxx_reg.h		\
+		 -o aic7xxx_seq.h aic7xxx.seq
+endif
+$(aic7xxx_gen): aic7xxx.seq aic7xxx.reg aicasm/aicasm
+	$(aic7xxx_asm_cmd)
+endif
+
+ifeq ($(CONFIG_AIC79XX_BUILD_FIRMWARE),y)
+aic79xx_gen = aic79xx_seq.h aic79xx_reg.h
+ifeq ($(CONFIG_AIC79XX_REG_PRETTY_PRINT),y)
+aic79xx_gen += aic79xx_reg_print.c
+aic79xx_asm_cmd = aicasm/aicasm -I. -r aic79xx_reg.h		\
+		 -p aic79xx_reg_print.c -i aic79xx_osm.h	\
+		 -o aic79xx_seq.h aic79xx.seq
+else
+aic79xx_asm_cmd = aicasm/aicasm -I. -r aic79xx_reg.h \
+		 -o aic79xx_seq.h aic79xx.seq
+endif
+$(aic79xx_gen): aic79xx.seq aic79xx.reg aicasm/aicasm
+	$(aic79xx_asm_cmd)
+endif
+
+aicasm/aicasm: aicasm/*.[chyl]
+	$(MAKE) -C aicasm
diff --git a/linux-2.4-xen-sparse/include/asm-xen/bugs.h b/linux-2.4-xen-sparse/include/asm-xen/bugs.h
new file mode 100644
index 0000000000..c46b6a0b15
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/bugs.h
@@ -0,0 +1,53 @@
+/*
+ *  include/asm-i386/bugs.h
+ *
+ *  Copyright (C) 1994  Linus Torvalds
+ *
+ *  Cyrix stuff, June 1998 by:
+ *	- Rafael R. Reilova (moved everything from head.S),
+ *        <rreilova@ececs.uc.edu>
+ *	- Channing Corn (tests & fixes),
+ *	- Andrew D. Balsa (code cleanup).
+ *
+ *  Pentium III FXSR, SSE support
+ *	Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+/*
+ * This is included by init/main.c to check for architecture-dependent bugs.
+ *
+ * Needs:
+ *	void check_bugs(void);
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/msr.h>
+
+
+static void __init check_fpu(void)
+{
+    boot_cpu_data.fdiv_bug = 0;
+}
+
+static void __init check_hlt(void)
+{
+    boot_cpu_data.hlt_works_ok = 1;
+}
+
+static void __init check_bugs(void)
+{
+	extern void __init boot_init_fpu(void);
+
+	identify_cpu(&boot_cpu_data);
+	boot_init_fpu();
+#ifndef CONFIG_SMP
+	printk("CPU: ");
+	print_cpu_info(&boot_cpu_data);
+#endif
+	check_fpu();
+	check_hlt();
+    system_utsname.machine[1] = '0' + 
+        (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
+}
diff --git a/linux-2.4-xen-sparse/include/asm-xen/desc.h b/linux-2.4-xen-sparse/include/asm-xen/desc.h
new file mode 100644
index 0000000000..b59b998d95
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/desc.h
@@ -0,0 +1,37 @@
+#ifndef __ARCH_DESC_H
+#define __ARCH_DESC_H
+
+#include <asm/ldt.h>
+
+#ifndef __ASSEMBLY__
+
+struct desc_struct {
+	unsigned long a,b;
+};
+
+struct Xgt_desc_struct {
+	unsigned short size;
+	unsigned long address __attribute__((packed));
+};
+
+extern struct desc_struct default_ldt[];
+
+static inline void clear_LDT(void)
+{
+    xen_set_ldt(0, 0);
+}
+
+static inline void load_LDT(mm_context_t *pc)
+{
+    void *segments = pc->ldt;
+    int count = pc->size;
+    
+    if ( count == 0 )
+        segments = NULL;
+    
+    xen_set_ldt((unsigned long)segments, count);               
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ARCH_DESC_H__ */
diff --git a/linux-2.4-xen-sparse/include/asm-xen/fixmap.h b/linux-2.4-xen-sparse/include/asm-xen/fixmap.h
new file mode 100644
index 0000000000..255ac4a468
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/fixmap.h
@@ -0,0 +1,107 @@
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ *
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ */
+
+#ifndef _ASM_FIXMAP_H
+#define _ASM_FIXMAP_H
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+#include <asm-xen/gnttab.h>
+#ifdef CONFIG_HIGHMEM
+#include <linux/threads.h>
+#include <asm/kmap_types.h>
+#endif
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process. We allocate these special  addresses
+ * from the end of virtual memory (0xfffff000) backwards.
+ * Also this lets us do fail-safe vmalloc(), we
+ * can guarantee that these special addresses and
+ * vmalloc()-ed addresses never overlap.
+ *
+ * these 'compile-time allocated' memory buffers are
+ * fixed-size 4k pages. (or larger if used with an increment
+ * highger than 1) use fixmap_set(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ * TLB entries of such buffers will not be flushed across
+ * task switches.
+ */
+
+enum fixed_addresses {
+#ifdef CONFIG_HIGHMEM
+	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
+	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+#endif
+	FIX_BLKRING_BASE,
+	FIX_NETRING0_BASE,
+	FIX_NETRING1_BASE,
+	FIX_NETRING2_BASE,
+	FIX_NETRING3_BASE,
+        FIX_SHARED_INFO,
+	FIX_GNTTAB_BEGIN,
+    FIX_GNTTAB_END = FIX_GNTTAB_BEGIN + NR_GRANT_FRAMES - 1,
+#ifdef CONFIG_VGA_CONSOLE
+#define NR_FIX_BTMAPS   32  /* 128KB For the Dom0 VGA Console A0000-C0000 */
+#else
+#define NR_FIX_BTMAPS   1   /* in case anyone wants it in future... */
+#endif
+        FIX_BTMAP_END,
+        FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1,
+	/* our bt_ioremap is permanent, unlike other architectures */
+	
+	__end_of_permanent_fixed_addresses,
+	__end_of_fixed_addresses = __end_of_permanent_fixed_addresses
+};
+
+extern void __set_fixmap (enum fixed_addresses idx,
+					unsigned long phys, pgprot_t flags);
+
+#define set_fixmap(idx, phys) \
+		__set_fixmap(idx, phys, PAGE_KERNEL)
+/*
+ * Some hardware wants to get fixmapped without caching.
+ */
+#define set_fixmap_nocache(idx, phys) \
+		__set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
+
+extern void clear_fixmap(enum fixed_addresses idx);
+
+/*
+ * used by vmalloc.c.
+ *
+ * Leave one empty page between vmalloc'ed areas and
+ * the start of the fixmap, and leave one page empty
+ * at the top of mem..
+ */
+#define FIXADDR_TOP   (HYPERVISOR_VIRT_START - 2*PAGE_SIZE)
+#define __FIXADDR_SIZE        (__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE)
+
+#define __fix_to_virt(x)	(FIXADDR_TOP - ((x) << PAGE_SHIFT))
+
+/*
+ * 'index to address' translation. If anyone tries to use the idx
+ * directly without tranlation, we catch the bug with a NULL-deference
+ * kernel oops. Illegal ranges of incoming indices are caught too.
+ */
+static inline unsigned long fix_to_virt(unsigned int idx)
+{
+        return __fix_to_virt(idx);
+}
+
+#endif
diff --git a/linux-2.4-xen-sparse/include/asm-xen/highmem.h b/linux-2.4-xen-sparse/include/asm-xen/highmem.h
new file mode 100644
index 0000000000..25ef32882c
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/highmem.h
@@ -0,0 +1,132 @@
+/*
+ * highmem.h: virtual kernel memory mappings for high memory
+ *
+ * Used in CONFIG_HIGHMEM systems for memory pages which
+ * are not addressable by direct kernel virtual addresses.
+ *
+ * Copyright (C) 1999 Gerhard Wichert, Siemens AG
+ *		      Gerhard.Wichert@pdb.siemens.de
+ *
+ *
+ * Redesigned the x86 32-bit VM architecture to deal with 
+ * up to 16 Terabyte physical memory. With current x86 CPUs
+ * we now support up to 64 Gigabytes physical RAM.
+ *
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+#ifndef _ASM_HIGHMEM_H
+#define _ASM_HIGHMEM_H
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <asm/kmap_types.h>
+#include <asm/pgtable.h>
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+#define HIGHMEM_DEBUG 1
+#else
+#define HIGHMEM_DEBUG 0
+#endif
+
+/* declarations for highmem.c */
+extern unsigned long highstart_pfn, highend_pfn;
+
+extern pte_t *kmap_pte;
+extern pgprot_t kmap_prot;
+extern pte_t *pkmap_page_table;
+
+extern void kmap_init(void) __init;
+
+/*
+ * Right now we initialize only a single pte table. It can be extended
+ * easily, subsequent pte tables have to be allocated in one physical
+ * chunk of RAM.
+ */
+#define PKMAP_BASE (HYPERVISOR_VIRT_START - (1<<23))
+#ifdef CONFIG_X86_PAE
+#define LAST_PKMAP 512
+#else
+#define LAST_PKMAP 1024
+#endif
+#define LAST_PKMAP_MASK (LAST_PKMAP-1)
+#define PKMAP_NR(virt)  ((virt-PKMAP_BASE) >> PAGE_SHIFT)
+#define PKMAP_ADDR(nr)  (PKMAP_BASE + ((nr) << PAGE_SHIFT))
+
+extern void * FASTCALL(kmap_high(struct page *page, int nonblocking));
+extern void FASTCALL(kunmap_high(struct page *page));
+
+#define kmap(page) __kmap(page, 0)
+#define kmap_nonblock(page) __kmap(page, 1)
+
+static inline void *__kmap(struct page *page, int nonblocking)
+{
+	if (in_interrupt())
+		out_of_line_bug();
+	if (page < highmem_start_page)
+		return page_address(page);
+	return kmap_high(page, nonblocking);
+}
+
+static inline void kunmap(struct page *page)
+{
+	if (in_interrupt())
+		out_of_line_bug();
+	if (page < highmem_start_page)
+		return;
+	kunmap_high(page);
+}
+
+/*
+ * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap
+ * gives a more generic (and caching) interface. But kmap_atomic can
+ * be used in IRQ contexts, so in some (very limited) cases we need
+ * it.
+ */
+static inline void *kmap_atomic(struct page *page, enum km_type type)
+{
+	enum fixed_addresses idx;
+	unsigned long vaddr;
+
+	if (page < highmem_start_page)
+		return page_address(page);
+
+	idx = type + KM_TYPE_NR*smp_processor_id();
+	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#if HIGHMEM_DEBUG
+	if (!pte_none(*(kmap_pte-idx)))
+		out_of_line_bug();
+#endif
+	set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
+	__flush_tlb_one(vaddr);
+
+	return (void*) vaddr;
+}
+
+static inline void kunmap_atomic(void *kvaddr, enum km_type type)
+{
+#if HIGHMEM_DEBUG
+	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
+
+	if (vaddr < FIXADDR_START) // FIXME
+		return;
+
+	if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
+		out_of_line_bug();
+
+	/*
+	 * force other mappings to Oops if they'll try to access
+	 * this pte without first remap it
+	 */
+	pte_clear(kmap_pte-idx);
+	__flush_tlb_one(vaddr);
+#endif
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_HIGHMEM_H */
diff --git a/linux-2.4-xen-sparse/include/asm-xen/hw_irq.h b/linux-2.4-xen-sparse/include/asm-xen/hw_irq.h
new file mode 100644
index 0000000000..d99d15bd24
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/hw_irq.h
@@ -0,0 +1,61 @@
+#ifndef _ASM_HW_IRQ_H
+#define _ASM_HW_IRQ_H
+
+/*
+ *	linux/include/asm/hw_irq.h
+ *
+ *	(C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
+ */
+
+#include <linux/config.h>
+#include <linux/smp.h>
+#include <asm/atomic.h>
+#include <asm/irq.h>
+
+#define SYSCALL_VECTOR		0x80
+
+extern int irq_vector[NR_IRQS];
+
+extern atomic_t irq_err_count;
+extern atomic_t irq_mis_count;
+
+extern char _stext, _etext;
+
+extern unsigned long prof_cpu_mask;
+extern unsigned int * prof_buffer;
+extern unsigned long prof_len;
+extern unsigned long prof_shift;
+
+/*
+ * x86 profiling function, SMP safe. We might want to do this in
+ * assembly totally?
+ */
+static inline void x86_do_profile (unsigned long eip)
+{
+        if (!prof_buffer)
+                return;
+
+        /*
+         * Only measure the CPUs specified by /proc/irq/prof_cpu_mask.
+         * (default is all CPUs.)
+         */
+        if (!((1<<smp_processor_id()) & prof_cpu_mask))
+                return;
+
+        eip -= (unsigned long) &_stext;
+        eip >>= prof_shift;
+        /*
+         * Don't ignore out-of-bounds EIP values silently,
+         * put them into the last histogram slot, so if
+         * present, they will show up as a sharp peak.
+         */
+        if (eip > prof_len-1)
+                eip = prof_len-1;
+        atomic_inc((atomic_t *)&prof_buffer[eip]);
+}
+
+static inline void hw_resend_irq(struct hw_interrupt_type *h,
+                                 unsigned int i)
+{}
+
+#endif /* _ASM_HW_IRQ_H */
diff --git a/linux-2.4-xen-sparse/include/asm-xen/io.h b/linux-2.4-xen-sparse/include/asm-xen/io.h
new file mode 100644
index 0000000000..3ea2752635
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/io.h
@@ -0,0 +1,457 @@
+#ifndef _ASM_IO_H
+#define _ASM_IO_H
+
+#include <linux/config.h>
+
+/*
+ * This file contains the definitions for the x86 IO instructions
+ * inb/inw/inl/outb/outw/outl and the "string versions" of the same
+ * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing"
+ * versions of the single-IO instructions (inb_p/inw_p/..).
+ *
+ * This file is not meant to be obfuscating: it's just complicated
+ * to (a) handle it all in a way that makes gcc able to optimize it
+ * as well as possible and (b) trying to avoid writing the same thing
+ * over and over again with slight variations and possibly making a
+ * mistake somewhere.
+ */
+
+/*
+ * Thanks to James van Artsdalen for a better timing-fix than
+ * the two short jumps: using outb's to a nonexistent port seems
+ * to guarantee better timings even on fast machines.
+ *
+ * On the other hand, I'd like to be sure of a non-existent port:
+ * I feel a bit unsafe about using 0x80 (should be safe, though)
+ *
+ *		Linus
+ */
+
+ /*
+  *  Bit simplified and optimized by Jan Hubicka
+  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999.
+  *
+  *  isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added,
+  *  isa_read[wl] and isa_write[wl] fixed
+  *  - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+  */
+
+#define IO_SPACE_LIMIT 0xffff
+
+#define XQUAD_PORTIO_BASE 0xfe400000
+#define XQUAD_PORTIO_QUAD 0x40000  /* 256k per quad. */
+#define XQUAD_PORTIO_LEN  0x80000  /* Only remapping first 2 quads */
+
+#ifdef __KERNEL__
+
+#include <linux/vmalloc.h>
+
+/*
+ * Temporary debugging check to catch old code using
+ * unmapped ISA addresses. Will be removed in 2.4.
+ */
+#if CONFIG_DEBUG_IOVIRT
+  extern void *__io_virt_debug(unsigned long x, const char *file, int line);
+  extern unsigned long __io_phys_debug(unsigned long x, const char *file, int line);
+  #define __io_virt(x) __io_virt_debug((unsigned long)(x), __FILE__, __LINE__)
+//#define __io_phys(x) __io_phys_debug((unsigned long)(x), __FILE__, __LINE__)
+#else
+  #define __io_virt(x) ((void *)(x))
+//#define __io_phys(x) __pa(x)
+#endif
+
+/**
+ *	virt_to_phys	-	map virtual addresses to physical
+ *	@address: address to remap
+ *
+ *	The returned physical address is the physical (CPU) mapping for
+ *	the memory address given. It is only valid to use this function on
+ *	addresses directly mapped or allocated via kmalloc. 
+ *
+ *	This function does not give bus mappings for DMA transfers. In
+ *	almost all conceivable cases a device driver should not be using
+ *	this function
+ */
+ 
+static inline unsigned long virt_to_phys(volatile void * address)
+{
+	return __pa(address);
+}
+
+/**
+ *	phys_to_virt	-	map physical address to virtual
+ *	@address: address to remap
+ *
+ *	The returned virtual address is a current CPU mapping for
+ *	the memory address given. It is only valid to use this function on
+ *	addresses that have a kernel mapping
+ *
+ *	This function does not handle bus mappings for DMA transfers. In
+ *	almost all conceivable cases a device driver should not be using
+ *	this function
+ */
+
+static inline void * phys_to_virt(unsigned long address)
+{
+	return __va(address);
+}
+
+/*
+ * We define page_to_phys 'incorrectly' because it is used when merging blkdev 
+ * requests, and the correct thing to do there is to use machine addresses.
+ */
+#define page_to_phys(_x) phys_to_machine(((_x) - mem_map) << PAGE_SHIFT)
+
+extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
+
+/**
+ *	ioremap		-	map bus memory into CPU space
+ *	@offset:	bus address of the memory
+ *	@size:		size of the resource to map
+ *
+ *	ioremap performs a platform specific sequence of operations to
+ *	make bus memory CPU accessible via the readb/readw/readl/writeb/
+ *	writew/writel functions and the other mmio helpers. The returned
+ *	address is not guaranteed to be usable directly as a virtual
+ *	address. 
+ */
+ 
+static inline void * ioremap (unsigned long offset, unsigned long size)
+{
+	return __ioremap(offset, size, 0);
+}
+
+/**
+ *	ioremap_nocache		-	map bus memory into CPU space
+ *	@offset:	bus address of the memory
+ *	@size:		size of the resource to map
+ *
+ *	ioremap_nocache performs a platform specific sequence of operations to
+ *	make bus memory CPU accessible via the readb/readw/readl/writeb/
+ *	writew/writel functions and the other mmio helpers. The returned
+ *	address is not guaranteed to be usable directly as a virtual
+ *	address. 
+ *
+ *	This version of ioremap ensures that the memory is marked uncachable
+ *	on the CPU as well as honouring existing caching rules from things like
+ *	the PCI bus. Note that there are other caches and buffers on many 
+ *	busses. In paticular driver authors should read up on PCI writes
+ *
+ *	It's useful if some control registers are in such an area and
+ *	write combining or read caching is not desirable:
+ */
+ 
+static inline void * ioremap_nocache (unsigned long offset, unsigned long size)
+{
+        return __ioremap(offset, size, _PAGE_PCD);
+}
+
+extern void iounmap(void *addr);
+
+/*
+ * bt_ioremap() and bt_iounmap() are for temporary early boot-time
+ * mappings, before the real ioremap() is functional.
+ * A boot-time mapping is currently limited to at most 16 pages.
+ */
+extern void *bt_ioremap(unsigned long offset, unsigned long size);
+extern void bt_iounmap(void *addr, unsigned long size);
+
+#define virt_to_bus(_x) phys_to_machine(virt_to_phys(_x))
+#define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x))
+#define page_to_bus(_x) phys_to_machine(((_x) - mem_map) << PAGE_SHIFT)
+#define bus_to_phys(_x) machine_to_phys(_x)
+#define bus_to_page(_x) (mem_map + (bus_to_phys(_x) >> PAGE_SHIFT))
+
+/*
+ * readX/writeX() are used to access memory mapped devices. On some
+ * architectures the memory mapped IO stuff needs to be accessed
+ * differently. On the x86 architecture, we just read/write the
+ * memory location directly.
+ */
+
+#define readb(addr) (*(volatile unsigned char *) __io_virt(addr))
+#define readw(addr) (*(volatile unsigned short *) __io_virt(addr))
+#define readl(addr) (*(volatile unsigned int *) __io_virt(addr))
+#define __raw_readb readb
+#define __raw_readw readw
+#define __raw_readl readl
+
+#define writeb(b,addr) (*(volatile unsigned char *) __io_virt(addr) = (b))
+#define writew(b,addr) (*(volatile unsigned short *) __io_virt(addr) = (b))
+#define writel(b,addr) (*(volatile unsigned int *) __io_virt(addr) = (b))
+#define __raw_writeb writeb
+#define __raw_writew writew
+#define __raw_writel writel
+
+#define memset_io(a,b,c)	__memset(__io_virt(a),(b),(c))
+#define memcpy_fromio(a,b,c)	__memcpy((a),__io_virt(b),(c))
+#define memcpy_toio(a,b,c)	__memcpy(__io_virt(a),(b),(c))
+
+/*
+ * ISA space is 'always mapped' on a typical x86 system, no need to
+ * explicitly ioremap() it. The fact that the ISA IO space is mapped
+ * to PAGE_OFFSET is pure coincidence - it does not mean ISA values
+ * are physical addresses. The following constant pointer can be
+ * used as the IO-area pointer (it can be iounmapped as well, so the
+ * analogy with PCI is quite large):
+ */
+#define __ISA_IO_base ((char *)(PAGE_OFFSET))
+
+#define isa_readb(a) readb(__ISA_IO_base + (a))
+#define isa_readw(a) readw(__ISA_IO_base + (a))
+#define isa_readl(a) readl(__ISA_IO_base + (a))
+#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a))
+#define isa_writew(w,a) writew(w,__ISA_IO_base + (a))
+#define isa_writel(l,a) writel(l,__ISA_IO_base + (a))
+#define isa_memset_io(a,b,c)		memset_io(__ISA_IO_base + (a),(b),(c))
+#define isa_memcpy_fromio(a,b,c)	memcpy_fromio((a),__ISA_IO_base + (b),(c))
+#define isa_memcpy_toio(a,b,c)		memcpy_toio(__ISA_IO_base + (a),(b),(c))
+
+
+/*
+ * Again, i386 does not require mem IO specific function.
+ */
+
+#define eth_io_copy_and_sum(a,b,c,d)		eth_copy_and_sum((a),__io_virt(b),(c),(d))
+#define isa_eth_io_copy_and_sum(a,b,c,d)	eth_copy_and_sum((a),__io_virt(__ISA_IO_base + (b)),(c),(d))
+
+/**
+ *	check_signature		-	find BIOS signatures
+ *	@io_addr: mmio address to check 
+ *	@signature:  signature block
+ *	@length: length of signature
+ *
+ *	Perform a signature comparison with the mmio address io_addr. This
+ *	address should have been obtained by ioremap.
+ *	Returns 1 on a match.
+ */
+ 
+static inline int check_signature(unsigned long io_addr,
+	const unsigned char *signature, int length)
+{
+	int retval = 0;
+	do {
+		if (readb(io_addr) != *signature)
+			goto out;
+		io_addr++;
+		signature++;
+		length--;
+	} while (length);
+	retval = 1;
+out:
+	return retval;
+}
+
+/**
+ *	isa_check_signature		-	find BIOS signatures
+ *	@io_addr: mmio address to check 
+ *	@signature:  signature block
+ *	@length: length of signature
+ *
+ *	Perform a signature comparison with the ISA mmio address io_addr.
+ *	Returns 1 on a match.
+ *
+ *	This function is deprecated. New drivers should use ioremap and
+ *	check_signature.
+ */
+ 
+
+static inline int isa_check_signature(unsigned long io_addr,
+	const unsigned char *signature, int length)
+{
+	int retval = 0;
+	do {
+		if (isa_readb(io_addr) != *signature)
+			goto out;
+		io_addr++;
+		signature++;
+		length--;
+	} while (length);
+	retval = 1;
+out:
+	return retval;
+}
+
+/*
+ *	Cache management
+ *
+ *	This needed for two cases
+ *	1. Out of order aware processors
+ *	2. Accidentally out of order processors (PPro errata #51)
+ */
+ 
+#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
+
+static inline void flush_write_buffers(void)
+{
+	__asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory");
+}
+
+#define dma_cache_inv(_start,_size)		flush_write_buffers()
+#define dma_cache_wback(_start,_size)		flush_write_buffers()
+#define dma_cache_wback_inv(_start,_size)	flush_write_buffers()
+
+#else
+
+/* Nothing to do */
+
+#define dma_cache_inv(_start,_size)		do { } while (0)
+#define dma_cache_wback(_start,_size)		do { } while (0)
+#define dma_cache_wback_inv(_start,_size)	do { } while (0)
+#define flush_write_buffers()
+
+#endif
+
+#endif /* __KERNEL__ */
+
+#ifdef SLOW_IO_BY_JUMPING
+#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:"
+#elif defined(__UNSAFE_IO__)
+#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
+#else
+#define __SLOW_DOWN_IO "\n1: outb %%al,$0x80\n"           \
+                       "2:\n"                             \
+                       ".section __ex_table,\"a\"\n\t"    \
+                       ".align 4\n\t"                     \
+                       ".long 1b,2b\n"                    \
+                       ".previous"
+#endif
+
+#ifdef REALLY_SLOW_IO
+#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
+#else
+#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
+#endif
+
+#ifdef CONFIG_MULTIQUAD
+extern void *xquad_portio;    /* Where the IO area was mapped */
+#endif /* CONFIG_MULTIQUAD */
+
+/*
+ * Talk about misusing macros..
+ */
+#define __OUT1(s,x) \
+static inline void out##s(unsigned x value, unsigned short port) {
+
+#ifdef __UNSAFE_IO__
+#define __OUT2(s,s1,s2) \
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+#else
+#define __OUT2(s,s1,s2)                                  \
+__asm__ __volatile__ ("1: out" #s " %" s1 "0,%" s2 "1\n" \
+                      "2:\n"                             \
+                      ".section __ex_table,\"a\"\n\t"    \
+                      ".align 4\n\t"                     \
+                      ".long 1b,2b\n"                    \
+                      ".previous"
+#endif
+
+#if defined (CONFIG_MULTIQUAD) && !defined(STANDALONE)
+#define __OUTQ(s,ss,x)    /* Do the equivalent of the portio op on quads */ \
+static inline void out##ss(unsigned x value, unsigned short port) { \
+	if (xquad_portio) \
+		write##s(value, (unsigned long) xquad_portio + port); \
+	else               /* We're still in early boot, running on quad 0 */ \
+		out##ss##_local(value, port); \
+} \
+static inline void out##ss##_quad(unsigned x value, unsigned short port, int quad) { \
+	if (xquad_portio) \
+		write##s(value, (unsigned long) xquad_portio + (XQUAD_PORTIO_QUAD*quad)\
+			+ port); \
+}
+
+#define __INQ(s,ss)       /* Do the equivalent of the portio op on quads */ \
+static inline RETURN_TYPE in##ss(unsigned short port) { \
+	if (xquad_portio) \
+		return read##s((unsigned long) xquad_portio + port); \
+	else               /* We're still in early boot, running on quad 0 */ \
+		return in##ss##_local(port); \
+} \
+static inline RETURN_TYPE in##ss##_quad(unsigned short port, int quad) { \
+	if (xquad_portio) \
+		return read##s((unsigned long) xquad_portio + (XQUAD_PORTIO_QUAD*quad)\
+			+ port); \
+	else\
+		return 0;\
+}
+#endif /* CONFIG_MULTIQUAD && !STANDALONE */
+
+#if !defined(CONFIG_MULTIQUAD) || defined(STANDALONE)
+#define __OUT(s,s1,x) \
+__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
+__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} 
+#else
+/* Make the default portio routines operate on quad 0 */
+#define __OUT(s,s1,x) \
+__OUT1(s##_local,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
+__OUT1(s##_p_local,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \
+__OUTQ(s,s,x) \
+__OUTQ(s,s##_p,x) 
+#endif /* !CONFIG_MULTIQUAD || STANDALONE */
+
+#define __IN1(s) \
+static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
+
+#ifdef __UNSAFE_IO__
+#define __IN2(s,s1,s2) \
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+#else
+#define __IN2(s,s1,s2) \
+__asm__ __volatile__ ("1: in" #s " %" s2 "1,%" s1 "0\n" \
+                      "2:\n"                            \
+                      ".section .fixup,\"ax\"\n"        \
+                      "3: mov" #s " $~0,%" s1 "0\n\t"   \
+                      "jmp 2b\n"                        \
+                      ".previous\n"                     \
+                      ".section __ex_table,\"a\"\n\t"   \
+                      ".align 4\n\t"                    \
+                      ".long 1b,3b\n"                   \
+                      ".previous"
+#endif
+
+#if !defined(CONFIG_MULTIQUAD) || defined(STANDALONE)
+#define __IN(s,s1,i...) \
+__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } 
+#else
+/* Make the default portio routines operate on quad 0 */
+#define __IN(s,s1,i...) \
+__IN1(s##_local) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+__IN1(s##_p_local) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+__INQ(s,s) \
+__INQ(s,s##_p) 
+#endif /* !CONFIG_MULTIQUAD || STANDALONE */
+
+#define __INS(s) \
+static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
+{ __asm__ __volatile__ ("rep ; ins" #s \
+: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
+
+#define __OUTS(s) \
+static inline void outs##s(unsigned short port, const void * addr, unsigned long count) \
+{ __asm__ __volatile__ ("rep ; outs" #s \
+: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
+
+#define RETURN_TYPE unsigned char
+__IN(b,"")
+#undef RETURN_TYPE
+#define RETURN_TYPE unsigned short
+__IN(w,"")
+#undef RETURN_TYPE
+#define RETURN_TYPE unsigned int
+__IN(l,"")
+#undef RETURN_TYPE
+
+__OUT(b,"b",char)
+__OUT(w,"w",short)
+__OUT(l,,int)
+
+__INS(b)
+__INS(w)
+__INS(l)
+
+__OUTS(b)
+__OUTS(w)
+__OUTS(l)
+
+#endif
diff --git a/linux-2.4-xen-sparse/include/asm-xen/irq.h b/linux-2.4-xen-sparse/include/asm-xen/irq.h
new file mode 100644
index 0000000000..836629fe92
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/irq.h
@@ -0,0 +1,65 @@
+#ifndef _ASM_IRQ_H
+#define _ASM_IRQ_H
+
+/*
+ *	linux/include/asm/irq.h
+ *
+ *	(C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
+ *
+ *	IRQ/IPI changes taken from work by Thomas Radke
+ *	<tomsoft@informatik.tu-chemnitz.de>
+ */
+
+#include <linux/config.h>
+#include <asm/hypervisor.h>
+#include <asm/ptrace.h>
+
+/*
+ * The flat IRQ space is divided into two regions:
+ *  1. A one-to-one mapping of real physical IRQs. This space is only used
+ *     if we have physical device-access privilege. This region is at the 
+ *     start of the IRQ space so that existing device drivers do not need
+ *     to be modified to translate physical IRQ numbers into our IRQ space.
+ *  3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These
+ *     are bound using the provided bind/unbind functions.
+ */
+
+#define PIRQ_BASE   0
+#define NR_PIRQS  128
+
+#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS)
+#define NR_DYNIRQS  128
+
+#define NR_IRQS   (NR_PIRQS + NR_DYNIRQS)
+
+#define pirq_to_irq(_x)   ((_x) + PIRQ_BASE)
+#define irq_to_pirq(_x)   ((_x) - PIRQ_BASE)
+
+#define dynirq_to_irq(_x) ((_x) + DYNIRQ_BASE)
+#define irq_to_dynirq(_x) ((_x) - DYNIRQ_BASE)
+
+/* Dynamic binding of event channels and VIRQ sources to Linux IRQ space. */
+extern int  bind_virq_to_irq(int virq);
+extern void unbind_virq_from_irq(int virq);
+extern int  bind_evtchn_to_irq(int evtchn);
+extern void unbind_evtchn_from_irq(int evtchn);
+
+static __inline__ int irq_cannonicalize(int irq)
+{
+    return (irq == 2) ? 9 : irq;
+}
+
+extern void disable_irq(unsigned int);
+extern void disable_irq_nosync(unsigned int);
+extern void enable_irq(unsigned int);
+
+extern void irq_suspend(void);
+extern void irq_resume(void);
+
+
+#define CPU_MASK_NONE 0
+
+/* XXX SMH: no-op for compat w/ 2.6 shared files */ 
+#define irq_ctx_init(cpu) do { ; } while (0)
+
+#endif /* _ASM_IRQ_H */
diff --git a/linux-2.4-xen-sparse/include/asm-xen/keyboard.h b/linux-2.4-xen-sparse/include/asm-xen/keyboard.h
new file mode 100644
index 0000000000..81c49a6908
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/keyboard.h
@@ -0,0 +1,74 @@
+/*
+ *  linux/include/asm-i386/keyboard.h
+ *
+ *  Created 3 Nov 1996 by Geert Uytterhoeven
+ */
+
+/*
+ *  This file contains the i386 architecture specific keyboard definitions
+ */
+
+#ifndef _I386_KEYBOARD_H
+#define _I386_KEYBOARD_H
+
+#ifdef __KERNEL__
+
+#include <linux/kernel.h>
+#include <linux/ioport.h>
+#include <linux/kd.h>
+#include <linux/pm.h>
+#include <asm/io.h>
+
+#define KEYBOARD_IRQ			1
+#define DISABLE_KBD_DURING_INTERRUPTS	0
+
+extern int pckbd_setkeycode(unsigned int scancode, unsigned int keycode);
+extern int pckbd_getkeycode(unsigned int scancode);
+extern int pckbd_translate(unsigned char scancode, unsigned char *keycode,
+			   char raw_mode);
+extern char pckbd_unexpected_up(unsigned char keycode);
+extern void pckbd_leds(unsigned char leds);
+extern void pckbd_init_hw(void);
+extern int pckbd_pm_resume(struct pm_dev *, pm_request_t, void *);
+extern pm_callback pm_kbd_request_override;
+extern unsigned char pckbd_sysrq_xlate[128];
+
+#define kbd_setkeycode		pckbd_setkeycode
+#define kbd_getkeycode		pckbd_getkeycode
+#define kbd_translate		pckbd_translate
+#define kbd_unexpected_up	pckbd_unexpected_up
+#define kbd_leds		pckbd_leds
+#define kbd_init_hw		pckbd_init_hw
+#define kbd_sysrq_xlate		pckbd_sysrq_xlate
+
+#define SYSRQ_KEY 0x54
+
+#define kbd_controller_present() (xen_start_info.flags & SIF_INITDOMAIN)
+
+/* resource allocation */
+#define kbd_request_region()
+#define kbd_request_irq(handler) request_irq(KEYBOARD_IRQ, handler, 0, \
+                                             "keyboard", NULL)
+
+/* How to access the keyboard macros on this platform.  */
+#define kbd_read_input() inb(KBD_DATA_REG)
+#define kbd_read_status() inb(KBD_STATUS_REG)
+#define kbd_write_output(val) outb(val, KBD_DATA_REG)
+#define kbd_write_command(val) outb(val, KBD_CNTL_REG)
+
+/* Some stoneage hardware needs delays after some operations.  */
+#define kbd_pause() do { } while(0)
+
+/*
+ * Machine specific bits for the PS/2 driver
+ */
+
+#define AUX_IRQ 12
+
+#define aux_request_irq(hand, dev_id)					\
+	request_irq(AUX_IRQ, hand, SA_SHIRQ, "PS/2 Mouse", dev_id)
+
+#define aux_free_irq(dev_id) free_irq(AUX_IRQ, dev_id)
+
+#endif /* __KERNEL__ */
+#endif /* _I386_KEYBOARD_H */
diff --git a/linux-2.4-xen-sparse/include/asm-xen/mmu_context.h b/linux-2.4-xen-sparse/include/asm-xen/mmu_context.h
new file mode 100644
index 0000000000..74004c8d46
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/mmu_context.h
@@ -0,0 +1,59 @@
+#ifndef __I386_MMU_CONTEXT_H
+#define __I386_MMU_CONTEXT_H
+
+#include <linux/config.h>
+#include <asm/desc.h>
+#include <asm/atomic.h>
+#include <asm/pgalloc.h>
+
+/*
+ * hooks to add arch specific data into the mm struct.
+ * Note that destroy_context is called even if init_new_context
+ * fails.
+ */
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
+void destroy_context(struct mm_struct *mm);
+
+#ifdef CONFIG_SMP
+
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu)
+{
+	if(cpu_tlbstate[cpu].state == TLBSTATE_OK)
+		cpu_tlbstate[cpu].state = TLBSTATE_LAZY;	
+}
+#else
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu)
+{
+}
+#endif
+
+extern pgd_t *cur_pgd;
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu)
+{
+	struct mmuext_op _op[2], *op = _op;
+	if (prev != next) {
+		/* stop flush ipis for the previous mm */
+		clear_bit(cpu, &prev->cpu_vm_mask);
+		/* Re-load page tables */
+		cur_pgd = next->pgd;
+		op->cmd = MMUEXT_NEW_BASEPTR;
+		op->mfn = pfn_to_mfn(__pa(next->pgd) >> PAGE_SHIFT);
+		op++;
+		/* load_LDT, if either the previous or next thread
+		 * has a non-default LDT.
+		 */
+		if (next->context.size+prev->context.size) {
+			op->cmd = MMUEXT_SET_LDT;
+			op->linear_addr = (unsigned long)next->context.ldt;
+			op->nr_ents     = next->context.size;
+			op++;
+		}
+		BUG_ON(HYPERVISOR_mmuext_op(_op, op-_op, NULL, DOMID_SELF));
+	}
+}
+
+#define activate_mm(prev, next)	\
+	switch_mm((prev),(next),NULL,smp_processor_id())
+
+#endif
diff --git a/linux-2.4-xen-sparse/include/asm-xen/module.h b/linux-2.4-xen-sparse/include/asm-xen/module.h
new file mode 100644
index 0000000000..17cd5ff07c
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/module.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_I386_MODULE_H
+#define _ASM_I386_MODULE_H
+/*
+ * This file contains the i386 architecture specific module code.
+ */
+
+extern int xen_module_init(struct module *mod);
+
+#define module_map(x)		vmalloc(x)
+#define module_unmap(x)		vfree(x)
+#define module_arch_init(x)	xen_module_init(x)
+#define arch_init_modules(x)	do { } while (0)
+
+#endif /* _ASM_I386_MODULE_H */
diff --git a/linux-2.4-xen-sparse/include/asm-xen/page.h b/linux-2.4-xen-sparse/include/asm-xen/page.h
new file mode 100644
index 0000000000..901d9acfc5
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/page.h
@@ -0,0 +1,178 @@
+#ifndef _I386_PAGE_H
+#define _I386_PAGE_H
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT	12
+#define PAGE_SIZE	(1UL << PAGE_SHIFT)
+#define PAGE_MASK	(~(PAGE_SIZE-1))
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+#include <linux/config.h>
+#include <linux/string.h>
+#include <asm/types.h>
+#include <asm-xen/xen-public/xen.h>
+
+#ifdef CONFIG_XEN_SCRUB_PAGES
+#define scrub_pages(_p,_n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT)
+#else
+#define scrub_pages(_p,_n) ((void)0)
+#endif
+
+#ifdef CONFIG_X86_USE_3DNOW
+
+#include <asm/mmx.h>
+
+#define clear_page(page)	mmx_clear_page((void *)(page))
+#define copy_page(to,from)	mmx_copy_page(to,from)
+
+#else
+
+/*
+ *	On older X86 processors its not a win to use MMX here it seems.
+ *	Maybe the K6-III ?
+ */
+ 
+#define clear_page(page)	memset((void *)(page), 0, PAGE_SIZE)
+#define copy_page(to,from)	memcpy((void *)(to), (void *)(from), PAGE_SIZE)
+
+#endif
+
+#define clear_user_page(page, vaddr)	clear_page(page)
+#define copy_user_page(to, from, vaddr)	copy_page(to, from)
+
+/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
+extern unsigned int *phys_to_machine_mapping;
+#define pfn_to_mfn(_pfn) ((unsigned long)(phys_to_machine_mapping[(_pfn)]))
+#define mfn_to_pfn(_mfn) ((unsigned long)(machine_to_phys_mapping[(_mfn)]))
+static inline unsigned long phys_to_machine(unsigned long phys)
+{
+    unsigned long machine = pfn_to_mfn(phys >> PAGE_SHIFT);
+    machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK);
+    return machine;
+}
+static inline unsigned long machine_to_phys(unsigned long machine)
+{
+    unsigned long phys = mfn_to_pfn(machine >> PAGE_SHIFT);
+    phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK);
+    return phys;
+}
+
+/*
+ * These are used to make use of C type-checking..
+ */
+#if CONFIG_X86_PAE
+typedef struct { unsigned long pte_low, pte_high; } pte_t;
+typedef struct { unsigned long long pmd; } pmd_t;
+typedef struct { unsigned long long pgd; } pgd_t;
+#define pte_val(x)	((x).pte_low | ((unsigned long long)(x).pte_high << 32))
+#else
+typedef struct { unsigned long pte_low; } pte_t;
+typedef struct { unsigned long pmd; } pmd_t;
+typedef struct { unsigned long pgd; } pgd_t;
+static inline unsigned long pte_val(pte_t x)
+{
+    unsigned long ret = x.pte_low;
+    if ( (ret & 1) ) ret = machine_to_phys(ret);
+    return ret;
+}
+#define pte_val_ma(x)   ((x).pte_low)
+#endif
+#define PTE_MASK	PAGE_MASK
+
+typedef struct { unsigned long pgprot; } pgprot_t;
+
+static inline unsigned long pmd_val(pmd_t x)
+{
+    unsigned long ret = x.pmd;
+    if ( ret ) ret = machine_to_phys(ret) | 1;
+    return ret;
+}
+#define pmd_val_ma(x)   ((x).pmd)
+#define pgd_val(x)	({ BUG(); (unsigned long)0; })
+#define pgprot_val(x)	((x).pgprot)
+
+#define __pte(x) ({ unsigned long _x = (x); \
+    (((_x)&1) ? ((pte_t) {phys_to_machine(_x)}) : ((pte_t) {(_x)})); })
+#define __pte_ma(x)     ((pte_t) { (x) } )
+#define __pmd(x) ({ unsigned long _x = (x); \
+    (((_x)&1) ? ((pmd_t) {phys_to_machine(_x)}) : ((pmd_t) {(_x)})); })
+#define __pgd(x) ({ BUG(); (pgprot_t) { 0 }; })
+#define __pgprot(x)	((pgprot_t) { (x) } )
+
+#endif /* !__ASSEMBLY__ */
+
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
+
+/*
+ * This handles the memory map.. We could make this a config
+ * option, but too many people screw it up, and too few need
+ * it.
+ *
+ * A __PAGE_OFFSET of 0xC0000000 means that the kernel has
+ * a virtual address space of one gigabyte, which limits the
+ * amount of physical memory you can use to about 950MB. 
+ *
+ * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
+ * and CONFIG_HIGHMEM64G options in the kernel configuration.
+ */
+
+#define __PAGE_OFFSET		(0xC0000000)
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Tell the user there is some problem. Beep too, so we can
+ * see^H^H^Hhear bugs in early bootup as well!
+ * The offending file and line are encoded after the "officially
+ * undefined" opcode for parsing in the trap handler.
+ */
+
+#if 1	/* Set to zero for a slightly smaller kernel */
+#define BUG()				\
+ __asm__ __volatile__(	"ud2\n"		\
+			"\t.word %c0\n"	\
+			"\t.long %c1\n"	\
+			 : : "i" (__LINE__), "i" (__FILE__))
+#else
+#define BUG() __asm__ __volatile__("ud2\n")
+#endif
+
+#define PAGE_BUG(page) do { \
+	BUG(); \
+} while (0)
+
+/* Pure 2^n version of get_order */
+static __inline__ int get_order(unsigned long size)
+{
+	int order;
+
+	size = (size-1) >> (PAGE_SHIFT-1);
+	order = -1;
+	do {
+		size >>= 1;
+		order++;
+	} while (size);
+	return order;
+}
+
+#endif /* __ASSEMBLY__ */
+
+#define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
+#define __pa(x)			((unsigned long)(x)-PAGE_OFFSET)
+#define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
+#define virt_to_page(kaddr)	(mem_map + (__pa(kaddr) >> PAGE_SHIFT))
+#define VALID_PAGE(page)	((page - mem_map) < max_mapnr)
+
+#define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
+				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+/* VIRT <-> MACHINE conversion */
+#define virt_to_machine(_a) (phys_to_machine(__pa(_a)))
+#define machine_to_virt(_m) (__va(machine_to_phys(_m)))
+
+#endif /* __KERNEL__ */
+
+#endif /* _I386_PAGE_H */
diff --git a/linux-2.4-xen-sparse/include/asm-xen/pci.h b/linux-2.4-xen-sparse/include/asm-xen/pci.h
new file mode 100644
index 0000000000..74ae5ba8b1
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/pci.h
@@ -0,0 +1,283 @@
+#ifndef __i386_PCI_H
+#define __i386_PCI_H
+
+#include <linux/config.h>
+
+#ifdef __KERNEL__
+
+/* Can be used to override the logic in pci_scan_bus for skipping
+   already-configured bus numbers - to be used for buggy BIOSes
+   or architectures with incomplete PCI setup by the loader */
+
+#ifdef CONFIG_PCI
+extern unsigned int pcibios_assign_all_busses(void);
+#else
+#define pcibios_assign_all_busses()	0
+#endif
+#define pcibios_scan_all_fns()		0
+
+extern unsigned long pci_mem_start;
+#define PCIBIOS_MIN_IO		0x1000
+#define PCIBIOS_MIN_MEM		(pci_mem_start)
+
+void pcibios_config_init(void);
+struct pci_bus * pcibios_scan_root(int bus);
+extern int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value);
+extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value);
+
+void pcibios_set_master(struct pci_dev *dev);
+void pcibios_penalize_isa_irq(int irq);
+struct irq_routing_table *pcibios_get_irq_routing_table(void);
+int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
+
+/* Dynamic DMA mapping stuff.
+ * i386 has everything mapped statically.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/scatterlist.h>
+#include <linux/string.h>
+#include <asm/io.h>
+
+struct pci_dev;
+
+/* The networking and block device layers use this boolean for bounce
+ * buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS	(0)
+
+/* Allocate and map kernel buffer using consistent mode DMA for a device.
+ * hwdev should be valid struct pci_dev pointer for PCI devices,
+ * NULL for PCI-like buses (ISA, EISA).
+ * Returns non-NULL cpu-view pointer to the buffer if successful and
+ * sets *dma_addrp to the pci side dma address as well, else *dma_addrp
+ * is undefined.
+ */
+extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
+				  dma_addr_t *dma_handle);
+
+/* Free and unmap a consistent DMA buffer.
+ * cpu_addr is what was returned from pci_alloc_consistent,
+ * size must be the same as what as passed into pci_alloc_consistent,
+ * and likewise dma_addr must be the same as what *dma_addrp was set to.
+ *
+ * References to the memory and mappings associated with cpu_addr/dma_addr
+ * past this call are illegal.
+ */
+extern void pci_free_consistent(struct pci_dev *hwdev, size_t size,
+				void *vaddr, dma_addr_t dma_handle);
+
+/* Map a single buffer of the indicated size for DMA in streaming mode.
+ * The 32-bit bus address to use is returned.
+ *
+ * Once the device is given the dma address, the device owns this memory
+ * until either pci_unmap_single or pci_dma_sync_single is performed.
+ */
+static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
+					size_t size, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+		out_of_line_bug();
+	flush_write_buffers();
+	return virt_to_bus(ptr);
+}
+
+/* Unmap a single streaming mode DMA translation.  The dma_addr and size
+ * must match what was provided for in a previous pci_map_single call.  All
+ * other usages are undefined.
+ *
+ * After this call, reads by the cpu to the buffer are guarenteed to see
+ * whatever the device wrote there.
+ */
+static inline void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
+				    size_t size, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+		out_of_line_bug();
+	/* Nothing to do */
+}
+
+/*
+ * pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical
+ * to pci_map_single, but takes a struct page instead of a virtual address
+ */
+static inline dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page,
+				      unsigned long offset, size_t size, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+		out_of_line_bug();
+
+	return page_to_bus(page) + offset;
+}
+
+static inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address,
+				  size_t size, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+		out_of_line_bug();
+	/* Nothing to do */
+}
+
+/* pci_unmap_{page,single} is a nop so... */
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
+#define pci_unmap_addr(PTR, ADDR_NAME)		(0)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)	do { } while (0)
+#define pci_unmap_len(PTR, LEN_NAME)		(0)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL)	do { } while (0)
+
+/* Map a set of buffers described by scatterlist in streaming
+ * mode for DMA.  This is the scather-gather version of the
+ * above pci_map_single interface.  Here the scatter gather list
+ * elements are each tagged with the appropriate dma address
+ * and length.  They are obtained via sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ *       DMA address/length pairs than there are SG table elements.
+ *       (for example via virtual mapping capabilities)
+ *       The routine returns the number of addr/length pairs actually
+ *       used, at most nents.
+ *
+ * Device ownership issues as mentioned above for pci_map_single are
+ * the same here.
+ */
+static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
+			     int nents, int direction)
+{
+	int i;
+
+	if (direction == PCI_DMA_NONE)
+		out_of_line_bug();
+ 
+ 	/*
+ 	 * temporary 2.4 hack
+ 	 */
+ 	for (i = 0; i < nents; i++ ) {
+ 		if (sg[i].address && sg[i].page)
+ 			out_of_line_bug();
+ 		else if (!sg[i].address && !sg[i].page)
+ 			out_of_line_bug();
+ 
+ 		if (sg[i].address)
+ 			sg[i].dma_address = virt_to_bus(sg[i].address);
+ 		else
+ 			sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset;
+ 	}
+ 
+	flush_write_buffers();
+	return nents;
+}
+
+/* Unmap a set of streaming mode DMA translations.
+ * Again, cpu read rules concerning calls here are the same as for
+ * pci_unmap_single() above.
+ */
+static inline void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
+				int nents, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+		out_of_line_bug();
+	/* Nothing to do */
+}
+
+/* Make physical memory consistent for a single
+ * streaming mode DMA translation after a transfer.
+ *
+ * If you perform a pci_map_single() but wish to interrogate the
+ * buffer using the cpu, yet do not wish to teardown the PCI dma
+ * mapping, you must call this function before doing so.  At the
+ * next point you give the PCI dma address back to the card, the
+ * device again owns the buffer.
+ */
+static inline void pci_dma_sync_single(struct pci_dev *hwdev,
+				       dma_addr_t dma_handle,
+				       size_t size, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+		out_of_line_bug();
+	flush_write_buffers();
+}
+
+/* Make physical memory consistent for a set of streaming
+ * mode DMA translations after a transfer.
+ *
+ * The same as pci_dma_sync_single but for a scatter-gather list,
+ * same rules and usage.
+ */
+static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
+				   struct scatterlist *sg,
+				   int nelems, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+		out_of_line_bug();
+	flush_write_buffers();
+}
+
+/* Return whether the given PCI device DMA address mask can
+ * be supported properly.  For example, if your device can
+ * only drive the low 24-bits during PCI bus mastering, then
+ * you would pass 0x00ffffff as the mask to this function.
+ */
+static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask)
+{
+        /*
+         * we fall back to GFP_DMA when the mask isn't all 1s,
+         * so we can't guarantee allocations that must be
+         * within a tighter range than GFP_DMA..
+         */
+        if(mask < 0x00ffffff)
+                return 0;
+
+	return 1;
+}
+
+/* This is always fine. */
+#define pci_dac_dma_supported(pci_dev, mask)	(1)
+
+static __inline__ dma64_addr_t
+pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction)
+{
+	return ((dma64_addr_t) page_to_bus(page) +
+		(dma64_addr_t) offset);
+}
+
+static __inline__ struct page *
+pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr)
+{
+	return bus_to_page(dma_addr);
+}
+
+static __inline__ unsigned long
+pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr)
+{
+	return (dma_addr & ~PAGE_MASK);
+}
+
+static __inline__ void
+pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
+{
+	flush_write_buffers();
+}
+
+/* These macros should be used after a pci_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries pci_map_sg
+ * returns.
+ */
+#define sg_dma_address(sg)	((sg)->dma_address)
+#define sg_dma_len(sg)		((sg)->length)
+
+/* Return the index of the PCI controller for device. */
+static inline int pci_controller_num(struct pci_dev *dev)
+{
+	return 0;
+}
+
+#define HAVE_PCI_MMAP
+extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+			       enum pci_mmap_state mmap_state, int write_combine);
+
+#endif /* __KERNEL__ */
+
+#endif /* __i386_PCI_H */
diff --git a/linux-2.4-xen-sparse/include/asm-xen/pgalloc.h b/linux-2.4-xen-sparse/include/asm-xen/pgalloc.h
new file mode 100644
index 0000000000..3f8f388774
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/pgalloc.h
@@ -0,0 +1,280 @@
+#ifndef _I386_PGALLOC_H
+#define _I386_PGALLOC_H
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+#include <asm/hypervisor.h>
+#include <linux/threads.h>
+
+/*
+ * Quick lists are aligned so that least significant bits of array pointer
+ * are all zero when list is empty, and all one when list is full.
+ */
+#define QUICKLIST_ENTRIES 256
+#define QUICKLIST_EMPTY(_l) !((unsigned long)(_l) & ((QUICKLIST_ENTRIES*4)-1))
+#define QUICKLIST_FULL(_l)  QUICKLIST_EMPTY((_l)+1)
+#define pgd_quicklist (current_cpu_data.pgd_quick)
+#define pmd_quicklist (current_cpu_data.pmd_quick)
+#define pte_quicklist (current_cpu_data.pte_quick)
+#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz)
+
+#define pmd_populate(mm, pmd, pte) 		  \
+ do {                                             \
+  set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)));   \
+ } while ( 0 )
+
+/*
+ * Allocate and free page tables.
+ */
+
+#if defined (CONFIG_X86_PAE)
+
+#error "no PAE support as yet"
+
+/*
+ * We can't include <linux/slab.h> here, thus these uglinesses.
+ */
+struct kmem_cache_s;
+
+extern struct kmem_cache_s *pae_pgd_cachep;
+extern void *kmem_cache_alloc(struct kmem_cache_s *, int);
+extern void kmem_cache_free(struct kmem_cache_s *, void *);
+
+
+static inline pgd_t *get_pgd_slow(void)
+{
+	int i;
+	pgd_t *pgd = kmem_cache_alloc(pae_pgd_cachep, GFP_KERNEL);
+
+	if (pgd) {
+		for (i = 0; i < USER_PTRS_PER_PGD; i++) {
+			unsigned long pmd = __get_free_page(GFP_KERNEL);
+			if (!pmd)
+				goto out_oom;
+			clear_page(pmd);
+			set_pgd(pgd + i, __pgd(1 + __pa(pmd)));
+		}
+		memcpy(pgd + USER_PTRS_PER_PGD,
+			init_mm.pgd + USER_PTRS_PER_PGD,
+			(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+	}
+	return pgd;
+out_oom:
+	for (i--; i >= 0; i--)
+		free_page((unsigned long)__va(pgd_val(pgd[i])-1));
+	kmem_cache_free(pae_pgd_cachep, pgd);
+	return NULL;
+}
+
+#else
+
+static inline pgd_t *get_pgd_slow(void)
+{
+	pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL);
+
+	if (pgd) {
+		memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
+		memcpy(pgd + USER_PTRS_PER_PGD,
+			init_mm.pgd + USER_PTRS_PER_PGD,
+			(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+		__make_page_readonly(pgd);
+		xen_pgd_pin(__pa(pgd));
+	}
+	return pgd;
+}
+
+#endif /* CONFIG_X86_PAE */
+
+static inline pgd_t *get_pgd_fast(void)
+{
+	unsigned long ret;
+
+	if ( !QUICKLIST_EMPTY(pgd_quicklist) ) {
+		ret = *(--pgd_quicklist);
+		pgtable_cache_size--;
+
+	} else
+		ret = (unsigned long)get_pgd_slow();
+	return (pgd_t *)ret;
+}
+
+static inline void free_pgd_slow(pgd_t *pgd)
+{
+#if defined(CONFIG_X86_PAE)
+#error
+	int i;
+
+	for (i = 0; i < USER_PTRS_PER_PGD; i++)
+		free_page((unsigned long)__va(pgd_val(pgd[i])-1));
+	kmem_cache_free(pae_pgd_cachep, pgd);
+#else
+	xen_pgd_unpin(__pa(pgd));
+	__make_page_writable(pgd);
+	free_page((unsigned long)pgd);
+#endif
+}
+
+static inline void free_pgd_fast(pgd_t *pgd)
+{
+        if ( !QUICKLIST_FULL(pgd_quicklist) ) {
+                *(pgd_quicklist++) = (unsigned long)pgd;
+                pgtable_cache_size++;
+        } else
+                free_pgd_slow(pgd);
+}
+
+static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+    pte_t *pte;
+
+    pte = (pte_t *) __get_free_page(GFP_KERNEL);
+    if (pte)
+    {
+        clear_page(pte);
+        __make_page_readonly(pte);
+        xen_pte_pin(__pa(pte));
+    }
+    return pte;
+
+}
+
+static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
+					unsigned long address)
+{
+    unsigned long ret = 0;
+    if ( !QUICKLIST_EMPTY(pte_quicklist) ) {
+        ret = *(--pte_quicklist);
+        pgtable_cache_size--;
+    }
+    return (pte_t *)ret;
+}
+
+static __inline__ void pte_free_slow(pte_t *pte)
+{
+    xen_pte_unpin(__pa(pte));
+    __make_page_writable(pte);
+    free_page((unsigned long)pte);
+}
+
+static inline void pte_free_fast(pte_t *pte)
+{
+    if ( !QUICKLIST_FULL(pte_quicklist) ) {
+        *(pte_quicklist++) = (unsigned long)pte;
+        pgtable_cache_size++;
+    } else
+        pte_free_slow(pte);
+}
+
+#define pte_free(pte)		pte_free_fast(pte)
+#define pgd_free(pgd)		free_pgd_fast(pgd)
+#define pgd_alloc(mm)		get_pgd_fast()
+
+/*
+ * allocating and freeing a pmd is trivial: the 1-entry pmd is
+ * inside the pgd, so has no extra memory associated with it.
+ * (In the PAE case we free the pmds as part of the pgd.)
+ */
+
+#define pmd_alloc_one_fast(mm, addr)	({ BUG(); ((pmd_t *)1); })
+#define pmd_alloc_one(mm, addr)		({ BUG(); ((pmd_t *)2); })
+#define pmd_free_slow(x)		do { } while (0)
+#define pmd_free_fast(x)		do { } while (0)
+#define pmd_free(x)			do { } while (0)
+#define pgd_populate(mm, pmd, pte)	BUG()
+
+extern int do_check_pgt_cache(int, int);
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb() flushes the current mm struct TLBs
+ *  - flush_tlb_all() flushes all processes TLBs
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(mm, start, end) flushes a range of pages
+ *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
+ *
+ * ..but the i386 has somewhat limited tlb flushing capabilities,
+ * and page-granular flushes are available only on i486 and up.
+ */
+
+#ifndef CONFIG_SMP
+
+#define flush_tlb() __flush_tlb()
+#define flush_tlb_all() __flush_tlb_all()
+#define local_flush_tlb() __flush_tlb()
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+	if (mm == current->active_mm) xen_tlb_flush();
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+	unsigned long addr)
+{
+	if (vma->vm_mm == current->active_mm) xen_invlpg(addr);
+}
+
+static inline void flush_tlb_range(struct mm_struct *mm,
+	unsigned long start, unsigned long end)
+{
+	if (mm == current->active_mm) xen_tlb_flush();
+}
+
+#else
+#error no kernel SMP support yet...
+#include <asm/smp.h>
+
+#define local_flush_tlb() \
+	__flush_tlb()
+
+extern void flush_tlb_all(void);
+extern void flush_tlb_current_task(void);
+extern void flush_tlb_mm(struct mm_struct *);
+extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+
+#define flush_tlb()	flush_tlb_current_task()
+
+static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, unsigned long end)
+{
+	flush_tlb_mm(mm);
+}
+
+#define TLBSTATE_OK	1
+#define TLBSTATE_LAZY	2
+
+struct tlb_state
+{
+	struct mm_struct *active_mm;
+	int state;
+} ____cacheline_aligned;
+extern struct tlb_state cpu_tlbstate[NR_CPUS];
+
+#endif /* CONFIG_SMP */
+
+static inline void flush_tlb_pgtables(struct mm_struct *mm,
+				      unsigned long start, unsigned long end)
+{
+    /* i386 does not keep any page table caches in TLB */
+}
+
+/*
+ * NB. The 'domid' field should be zero if mapping I/O space (non RAM).
+ * Otherwise it identifies the owner of the memory that is being mapped.
+ */
+extern int direct_remap_area_pages(struct mm_struct *mm,
+                                   unsigned long address, 
+                                   unsigned long machine_addr,
+                                   unsigned long size, 
+                                   pgprot_t prot,
+                                   domid_t  domid);
+
+extern int __direct_remap_area_pages(struct mm_struct *mm,
+				     unsigned long address, 
+				     unsigned long size, 
+				     mmu_update_t *v);
+
+
+
+#endif /* _I386_PGALLOC_H */
diff --git a/linux-2.4-xen-sparse/include/asm-xen/pgtable-2level.h b/linux-2.4-xen-sparse/include/asm-xen/pgtable-2level.h
new file mode 100644
index 0000000000..70f8356fb1
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/pgtable-2level.h
@@ -0,0 +1,97 @@
+#ifndef _I386_PGTABLE_2LEVEL_H
+#define _I386_PGTABLE_2LEVEL_H
+
+/*
+ * traditional i386 two-level paging structure:
+ */
+
+#define PGDIR_SHIFT	22
+#define PTRS_PER_PGD	1024
+
+/*
+ * the i386 is two-level, so we don't really have any
+ * PMD directory physically.
+ */
+#define PMD_SHIFT	22
+#define PTRS_PER_PMD	1
+
+#define PTRS_PER_PTE	1024
+
+#define pte_ERROR(e) \
+	printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low)
+#define pmd_ERROR(e) \
+	printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pgd_ERROR(e) \
+	printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * The "pgd_xxx()" functions here are trivial for a folded two-level
+ * setup: the pgd is never bad, and a pmd always exists (as it's folded
+ * into the pgd entry)
+ */
+static inline int pgd_none(pgd_t pgd)		{ return 0; }
+static inline int pgd_bad(pgd_t pgd)		{ return 0; }
+static inline int pgd_present(pgd_t pgd)	{ return 1; }
+#define pgd_clear(xp)				do { } while (0)
+
+/*
+ * Certain architectures need to do special things when PTEs
+ * within a page table are directly modified.  Thus, the following
+ * hook is made available.
+ */
+#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
+#define set_pte_atomic(pteptr, pteval) (*(pteptr) = pteval)
+
+/*
+ * (pmds are folded into pgds so this doesnt get actually called,
+ * but the define is needed for a generic inline function.)
+ */
+#define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval))
+#define set_pgd(pgdptr, pgdval) ((void)0)
+
+#define pgd_page(pgd) \
+((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
+
+static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
+{
+	return (pmd_t *) dir;
+}
+
+#define ptep_get_and_clear(xp)	__pte_ma(xchg(&(xp)->pte_low, 0))
+#define pte_same(a, b)		((a).pte_low == (b).pte_low)
+
+/*                                 
+ * We detect special mappings in one of two ways:
+ *  1. If the MFN is an I/O page then Xen will set the m2p entry
+ *     to be outside our maximum possible pseudophys range.
+ *  2. If the MFN belongs to a different domain then we will certainly
+ *     not have MFN in our p2m table. Conversely, if the page is ours,
+ *     then we'll have p2m(m2p(MFN))==MFN.
+ * If we detect a special mapping then it doesn't have a 'struct page'.
+ * We force !VALID_PAGE() by returning an out-of-range pointer.
+ *
+ * NB. These checks require that, for any MFN that is not in our reservation,
+ * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if
+ * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN.
+ * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety.
+ * 
+ * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
+ *      use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
+ *      require. In all the cases we care about, the high bit gets shifted out
+ *      (e.g., phys_to_machine()) so behaviour there is correct.
+ */
+#define INVALID_P2M_ENTRY (~0U)
+#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
+#define pte_page(_pte)                                        \
+({                                                            \
+    unsigned long mfn = (_pte).pte_low >> PAGE_SHIFT;         \
+    unsigned long pfn = mfn_to_pfn(mfn);                      \
+    if ( (pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn) )     \
+        pfn = max_mapnr; /* specia: force !VALID_PAGE() */    \
+    &mem_map[pfn];                                            \
+})
+
+#define pte_none(x)		(!(x).pte_low)
+#define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
+
+#endif /* _I386_PGTABLE_2LEVEL_H */
diff --git a/linux-2.4-xen-sparse/include/asm-xen/pgtable.h b/linux-2.4-xen-sparse/include/asm-xen/pgtable.h
new file mode 100644
index 0000000000..19947a9aae
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/pgtable.h
@@ -0,0 +1,371 @@
+#ifndef _I386_PGTABLE_H
+#define _I386_PGTABLE_H
+
+#include <linux/config.h>
+
+/*
+ * The Linux memory management assumes a three-level page table setup. On
+ * the i386, we use that, but "fold" the mid level into the top-level page
+ * table, so that we physically have the same two-level page table as the
+ * i386 mmu expects.
+ *
+ * This file contains the functions and defines necessary to modify and use
+ * the i386 page table tree.
+ */
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#include <asm/hypervisor.h>
+#include <linux/threads.h>
+#include <asm/fixmap.h>
+
+#ifndef _I386_BITOPS_H
+#include <asm/bitops.h>
+#endif
+
+#define swapper_pg_dir 0
+extern void paging_init(void);
+
+/* Caches aren't brain-dead on the intel. */
+#define flush_cache_all()			do { } while (0)
+#define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_range(mm, start, end)	do { } while (0)
+#define flush_cache_page(vma, vmaddr)		do { } while (0)
+#define flush_page_to_ram(page)			do { } while (0)
+#define flush_dcache_page(page)			do { } while (0)
+#define flush_icache_range(start, end)		do { } while (0)
+#define flush_icache_page(vma,pg)		do { } while (0)
+#define flush_icache_user_range(vma,pg,adr,len)	do { } while (0)
+
+extern unsigned long pgkern_mask;
+
+#define __flush_tlb() xen_tlb_flush()
+#define __flush_tlb_global() __flush_tlb()
+#define __flush_tlb_all() __flush_tlb_global()
+#define __flush_tlb_one(addr) xen_invlpg(addr)
+#define __flush_tlb_single(addr) xen_invlpg(addr)
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_zero_page[1024];
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * The Linux x86 paging architecture is 'compile-time dual-mode', it
+ * implements both the traditional 2-level x86 page tables and the
+ * newer 3-level PAE-mode page tables.
+ */
+#ifndef __ASSEMBLY__
+#if CONFIG_X86_PAE
+# include <asm/pgtable-3level.h>
+
+/*
+ * Need to initialise the X86 PAE caches
+ */
+extern void pgtable_cache_init(void);
+
+#else
+# include <asm/pgtable-2level.h>
+
+/*
+ * No page table caches to initialise
+ */
+#define pgtable_cache_init()	do { } while (0)
+
+#endif
+#endif
+
+#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_MASK	(~(PMD_SIZE-1))
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+#define USER_PTRS_PER_PGD	(TASK_SIZE/PGDIR_SIZE)
+#define FIRST_USER_PGD_NR	0
+
+#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
+#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
+
+#define TWOLEVEL_PGDIR_SHIFT	22
+#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
+#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
+
+
+#ifndef __ASSEMBLY__
+/* 4MB is just a nice "safety zone". Also, we align to a fresh pde. */
+#define VMALLOC_OFFSET	(4*1024*1024)
+extern void * high_memory;
+#define VMALLOC_START	(((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \
+						~(VMALLOC_OFFSET-1))
+#define VMALLOC_VMADDR(x) ((unsigned long)(x))
+#if CONFIG_HIGHMEM
+# define VMALLOC_END	(PKMAP_BASE-2*PAGE_SIZE)
+#else
+# define VMALLOC_END	(FIXADDR_START-2*PAGE_SIZE)
+#endif
+
+#define _PAGE_BIT_PRESENT	0
+#define _PAGE_BIT_RW		1
+#define _PAGE_BIT_USER		2
+#define _PAGE_BIT_PWT		3
+#define _PAGE_BIT_PCD		4
+#define _PAGE_BIT_ACCESSED	5
+#define _PAGE_BIT_DIRTY		6
+#define _PAGE_BIT_PSE		7	/* 4 MB (or 2MB) page, Pentium+, if present.. */
+#define _PAGE_BIT_GLOBAL	8	/* Global TLB entry PPro+ */
+
+#define _PAGE_PRESENT	0x001
+#define _PAGE_RW	0x002
+#define _PAGE_USER	0x004
+#define _PAGE_PWT	0x008
+#define _PAGE_PCD	0x010
+#define _PAGE_ACCESSED	0x020
+#define _PAGE_DIRTY	0x040
+#define _PAGE_PSE	0x080	/* 4 MB (or 2MB) page, Pentium+, if present.. */
+#define _PAGE_GLOBAL	0x100	/* Global TLB entry PPro+ */
+
+#define _PAGE_PROTNONE	0x080	/* If not present */
+
+#define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _PAGE_CHG_MASK	(PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
+
+#define PAGE_NONE	__pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
+#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_COPY	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_READONLY	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+
+#define __PAGE_KERNEL \
+	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define __PAGE_KERNEL_NOCACHE \
+	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED)
+#define __PAGE_KERNEL_RO \
+	(_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED)
+
+#if 0
+#define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL)
+#else
+#define MAKE_GLOBAL(x) __pgprot(x)
+#endif
+
+#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL)
+#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO)
+#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
+
+/*
+ * The i386 can't do page protection for execute, and considers that
+ * the same are read. Also, write permissions imply read permissions.
+ * This is the closest we can get..
+ */
+#define __P000	PAGE_NONE
+#define __P001	PAGE_READONLY
+#define __P010	PAGE_COPY
+#define __P011	PAGE_COPY
+#define __P100	PAGE_READONLY
+#define __P101	PAGE_READONLY
+#define __P110	PAGE_COPY
+#define __P111	PAGE_COPY
+
+#define __S000	PAGE_NONE
+#define __S001	PAGE_READONLY
+#define __S010	PAGE_SHARED
+#define __S011	PAGE_SHARED
+#define __S100	PAGE_READONLY
+#define __S101	PAGE_READONLY
+#define __S110	PAGE_SHARED
+#define __S111	PAGE_SHARED
+
+#define pte_present(x)	((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
+#define pte_clear(xp)	do { set_pte(xp, __pte(0)); } while (0)
+
+#define pmd_none(x)	(!pmd_val(x))
+/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
+   can temporarily clear it. */
+#define pmd_present(x)	(pmd_val(x))
+#define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
+#define pmd_bad(x)	((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
+
+
+#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+static inline int pte_read(pte_t pte)		{ return (pte).pte_low & _PAGE_USER; }
+static inline int pte_exec(pte_t pte)		{ return (pte).pte_low & _PAGE_USER; }
+static inline int pte_dirty(pte_t pte)		{ return (pte).pte_low & _PAGE_DIRTY; }
+static inline int pte_young(pte_t pte)		{ return (pte).pte_low & _PAGE_ACCESSED; }
+static inline int pte_write(pte_t pte)		{ return (pte).pte_low & _PAGE_RW; }
+
+static inline pte_t pte_rdprotect(pte_t pte)	{ (pte).pte_low &= ~_PAGE_USER; return pte; }
+static inline pte_t pte_exprotect(pte_t pte)	{ (pte).pte_low &= ~_PAGE_USER; return pte; }
+static inline pte_t pte_mkclean(pte_t pte)	{ (pte).pte_low &= ~_PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkold(pte_t pte)	{ (pte).pte_low &= ~_PAGE_ACCESSED; return pte; }
+static inline pte_t pte_wrprotect(pte_t pte)	{ (pte).pte_low &= ~_PAGE_RW; return pte; }
+static inline pte_t pte_mkread(pte_t pte)	{ (pte).pte_low |= _PAGE_USER; return pte; }
+static inline pte_t pte_mkexec(pte_t pte)	{ (pte).pte_low |= _PAGE_USER; return pte; }
+static inline pte_t pte_mkdirty(pte_t pte)	{ (pte).pte_low |= _PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkyoung(pte_t pte)	{ (pte).pte_low |= _PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkwrite(pte_t pte)	{ (pte).pte_low |= _PAGE_RW; return pte; }
+
+static inline int ptep_test_and_clear_dirty(pte_t *ptep)
+{
+    if (!pte_dirty(*ptep))
+        return 0;
+    return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
+}
+
+static inline int ptep_test_and_clear_young(pte_t *ptep)
+{
+    if (!pte_young(*ptep))
+        return 0;
+    return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low);
+}
+
+static inline void ptep_set_wrprotect(pte_t *ptep)
+{
+    if (pte_write(*ptep))
+        clear_bit(_PAGE_BIT_RW, &ptep->pte_low);
+}
+
+static inline void ptep_mkdirty(pte_t *ptep)
+{
+    if (!pte_dirty(*ptep))
+        set_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
+}
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+
+#define mk_pte(page, pgprot)	__mk_pte((page) - mem_map, (pgprot))
+
+/* This takes a physical page address that is used by the remapping functions */
+#define mk_pte_phys(physpage, pgprot)	__mk_pte((physpage) >> PAGE_SHIFT, pgprot)
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	pte.pte_low &= _PAGE_CHG_MASK;
+	pte.pte_low |= pgprot_val(newprot);
+	return pte;
+}
+
+#define page_pte(page) page_pte_prot(page, __pgprot(0))
+
+#define pmd_page(pmd) \
+((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+
+/* to find an entry in a page-table-directory. */
+#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+
+#define __pgd_offset(address) pgd_index(address)
+
+#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
+
+/* to find an entry in a kernel page-table-directory */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+#define __pmd_offset(address) \
+		(((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+
+/* Find an entry in the third-level page table.. */
+#define __pte_offset(address) \
+		((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \
+			__pte_offset(address))
+
+/*
+ * The i386 doesn't have any external MMU info: the kernel page
+ * tables contain all the necessary information.
+ */
+#define update_mmu_cache(vma,address,pte) do { } while (0)
+
+/* Encode and de-code a swap entry */
+#define SWP_TYPE(x)			(((x).val >> 1) & 0x3f)
+#define SWP_OFFSET(x)			((x).val >> 8)
+#define SWP_ENTRY(type, offset)		((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
+#define pte_to_swp_entry(pte)		((swp_entry_t) { (pte).pte_low })
+#define swp_entry_to_pte(x)		((pte_t) { (x).val })
+
+struct page;
+int change_page_attr(struct page *, int, pgprot_t prot);
+
+static inline void __make_page_readonly(void *va)
+{
+    pgd_t *pgd = pgd_offset_k((unsigned long)va);
+    pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
+    pte_t *pte = pte_offset(pmd, (unsigned long)va);
+    set_pte(pte, pte_wrprotect(*pte));
+}
+
+static inline void __make_page_writable(void *va)
+{
+    pgd_t *pgd = pgd_offset_k((unsigned long)va);
+    pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
+    pte_t *pte = pte_offset(pmd, (unsigned long)va);
+    set_pte(pte, pte_mkwrite(*pte));
+}
+
+static inline void make_page_readonly(void *va)
+{
+    pgd_t *pgd = pgd_offset_k((unsigned long)va);
+    pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
+    pte_t *pte = pte_offset(pmd, (unsigned long)va);
+    set_pte(pte, pte_wrprotect(*pte));
+    if ( (unsigned long)va >= VMALLOC_START )
+        __make_page_readonly(machine_to_virt(
+            *(unsigned long *)pte&PAGE_MASK));
+}
+
+static inline void make_page_writable(void *va)
+{
+    pgd_t *pgd = pgd_offset_k((unsigned long)va);
+    pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
+    pte_t *pte = pte_offset(pmd, (unsigned long)va);
+    set_pte(pte, pte_mkwrite(*pte));
+    if ( (unsigned long)va >= VMALLOC_START )
+        __make_page_writable(machine_to_virt(
+            *(unsigned long *)pte&PAGE_MASK));
+}
+
+static inline void make_pages_readonly(void *va, unsigned int nr)
+{
+    while ( nr-- != 0 )
+    {
+        make_page_readonly(va);
+        va = (void *)((unsigned long)va + PAGE_SIZE);
+    }
+}
+
+static inline void make_pages_writable(void *va, unsigned int nr)
+{
+    while ( nr-- != 0 )
+    {
+        make_page_writable(va);
+        va = (void *)((unsigned long)va + PAGE_SIZE);
+    }
+}
+
+static inline unsigned long arbitrary_virt_to_machine(void *va)
+{
+    pgd_t *pgd = pgd_offset_k((unsigned long)va);
+    pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
+    pte_t *pte = pte_offset(pmd, (unsigned long)va);
+    unsigned long pa = (*(unsigned long *)pte) & PAGE_MASK;
+    return pa | ((unsigned long)va & (PAGE_SIZE-1));
+}
+
+#endif /* !__ASSEMBLY__ */
+
+/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
+#define PageSkip(page)		(0)
+#define kern_addr_valid(addr)	(1)
+
+#define io_remap_page_range remap_page_range
+
+#endif /* _I386_PGTABLE_H */
diff --git a/linux-2.4-xen-sparse/include/asm-xen/processor.h b/linux-2.4-xen-sparse/include/asm-xen/processor.h
new file mode 100644
index 0000000000..9036704e23
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/processor.h
@@ -0,0 +1,483 @@
+/*
+ * include/asm-i386/processor.h
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ */
+
+#ifndef __ASM_I386_PROCESSOR_H
+#define __ASM_I386_PROCESSOR_H
+
+#include <asm/vm86.h>
+#include <asm/math_emu.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/types.h>
+#include <asm/sigcontext.h>
+#include <asm/cpufeature.h>
+#include <linux/cache.h>
+#include <linux/config.h>
+#include <linux/threads.h>
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#define current_text_addr() ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; })
+
+/*
+ *  CPU type and hardware bug flags. Kept separately for each CPU.
+ *  Members of this structure are referenced in head.S, so think twice
+ *  before touching them. [mj]
+ */
+
+struct cpuinfo_x86 {
+	__u8	x86;		/* CPU family */
+	__u8	x86_vendor;	/* CPU vendor */
+	__u8	x86_model;
+	__u8	x86_mask;
+	char	wp_works_ok;	/* It doesn't on 386's */
+	char	hlt_works_ok;	/* Problems on some 486Dx4's and old 386's */
+	char	hard_math;
+	char	rfu;
+       	int	cpuid_level;	/* Maximum supported CPUID level, -1=no CPUID */
+	__u32	x86_capability[NCAPINTS];
+	char	x86_vendor_id[16];
+	char	x86_model_id[64];
+	int 	x86_cache_size;  /* in KB - valid for CPUS which support this
+				    call  */
+	int	fdiv_bug;
+	int	f00f_bug;
+	int	coma_bug;
+	unsigned long loops_per_jiffy;
+	unsigned long *pgd_quick;
+	unsigned long *pmd_quick;
+	unsigned long *pte_quick;
+	unsigned long pgtable_cache_sz;
+} __attribute__((__aligned__(SMP_CACHE_BYTES)));
+
+#define X86_VENDOR_INTEL 0
+#define X86_VENDOR_CYRIX 1
+#define X86_VENDOR_AMD 2
+#define X86_VENDOR_UMC 3
+#define X86_VENDOR_NEXGEN 4
+#define X86_VENDOR_CENTAUR 5
+#define X86_VENDOR_RISE 6
+#define X86_VENDOR_TRANSMETA 7
+#define X86_VENDOR_NSC 8
+#define X86_VENDOR_SIS 9
+#define X86_VENDOR_UNKNOWN 0xff
+
+/*
+ * capabilities of CPUs
+ */
+
+extern struct cpuinfo_x86 boot_cpu_data;
+extern struct tss_struct init_tss[NR_CPUS];
+
+#ifdef CONFIG_SMP
+extern struct cpuinfo_x86 cpu_data[];
+#define current_cpu_data cpu_data[smp_processor_id()]
+#else
+#define cpu_data (&boot_cpu_data)
+#define current_cpu_data boot_cpu_data
+#endif
+
+extern char ignore_irq13;
+
+extern void identify_cpu(struct cpuinfo_x86 *);
+extern void print_cpu_info(struct cpuinfo_x86 *);
+
+/*
+ * EFLAGS bits
+ */
+#define X86_EFLAGS_CF	0x00000001 /* Carry Flag */
+#define X86_EFLAGS_PF	0x00000004 /* Parity Flag */
+#define X86_EFLAGS_AF	0x00000010 /* Auxillary carry Flag */
+#define X86_EFLAGS_ZF	0x00000040 /* Zero Flag */
+#define X86_EFLAGS_SF	0x00000080 /* Sign Flag */
+#define X86_EFLAGS_TF	0x00000100 /* Trap Flag */
+#define X86_EFLAGS_IF	0x00000200 /* Interrupt Flag */
+#define X86_EFLAGS_DF	0x00000400 /* Direction Flag */
+#define X86_EFLAGS_OF	0x00000800 /* Overflow Flag */
+#define X86_EFLAGS_IOPL	0x00003000 /* IOPL mask */
+#define X86_EFLAGS_NT	0x00004000 /* Nested Task */
+#define X86_EFLAGS_RF	0x00010000 /* Resume Flag */
+#define X86_EFLAGS_VM	0x00020000 /* Virtual Mode */
+#define X86_EFLAGS_AC	0x00040000 /* Alignment Check */
+#define X86_EFLAGS_VIF	0x00080000 /* Virtual Interrupt Flag */
+#define X86_EFLAGS_VIP	0x00100000 /* Virtual Interrupt Pending */
+#define X86_EFLAGS_ID	0x00200000 /* CPUID detection flag */
+
+/*
+ * Generic CPUID function
+ */
+static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
+{
+	__asm__("cpuid"
+		: "=a" (*eax),
+		  "=b" (*ebx),
+		  "=c" (*ecx),
+		  "=d" (*edx)
+		: "0" (op));
+}
+
+/*
+ * CPUID functions returning a single datum
+ */
+static inline unsigned int cpuid_eax(unsigned int op)
+{
+	unsigned int eax;
+
+	__asm__("cpuid"
+		: "=a" (eax)
+		: "0" (op)
+		: "bx", "cx", "dx");
+	return eax;
+}
+static inline unsigned int cpuid_ebx(unsigned int op)
+{
+	unsigned int eax, ebx;
+
+	__asm__("cpuid"
+		: "=a" (eax), "=b" (ebx)
+		: "0" (op)
+		: "cx", "dx" );
+	return ebx;
+}
+static inline unsigned int cpuid_ecx(unsigned int op)
+{
+	unsigned int eax, ecx;
+
+	__asm__("cpuid"
+		: "=a" (eax), "=c" (ecx)
+		: "0" (op)
+		: "bx", "dx" );
+	return ecx;
+}
+static inline unsigned int cpuid_edx(unsigned int op)
+{
+	unsigned int eax, edx;
+
+	__asm__("cpuid"
+		: "=a" (eax), "=d" (edx)
+		: "0" (op)
+		: "bx", "cx");
+	return edx;
+}
+
+/*
+ * Intel CPU features in CR4
+ */
+#define X86_CR4_VME		0x0001	/* enable vm86 extensions */
+#define X86_CR4_PVI		0x0002	/* virtual interrupts flag enable */
+#define X86_CR4_TSD		0x0004	/* disable time stamp at ipl 3 */
+#define X86_CR4_DE		0x0008	/* enable debugging extensions */
+#define X86_CR4_PSE		0x0010	/* enable page size extensions */
+#define X86_CR4_PAE		0x0020	/* enable physical address extensions */
+#define X86_CR4_MCE		0x0040	/* Machine check enable */
+#define X86_CR4_PGE		0x0080	/* enable global pages */
+#define X86_CR4_PCE		0x0100	/* enable performance counters at ipl 3 */
+#define X86_CR4_OSFXSR		0x0200	/* enable fast FPU save and restore */
+#define X86_CR4_OSXMMEXCPT	0x0400	/* enable unmasked SSE exceptions */
+
+#define load_cr3(pgdir) \
+	asm volatile("movl %0,%%cr3": :"r" (__pa(pgdir)));
+
+extern unsigned long mmu_cr4_features;
+
+#include <asm/hypervisor.h>
+
+static inline void set_in_cr4 (unsigned long mask)
+{
+    BUG();
+}
+
+static inline void clear_in_cr4 (unsigned long mask)
+{
+    BUG();
+}
+
+/*
+ *      Cyrix CPU configuration register indexes
+ */
+#define CX86_CCR0 0xc0
+#define CX86_CCR1 0xc1
+#define CX86_CCR2 0xc2
+#define CX86_CCR3 0xc3
+#define CX86_CCR4 0xe8
+#define CX86_CCR5 0xe9
+#define CX86_CCR6 0xea
+#define CX86_CCR7 0xeb
+#define CX86_DIR0 0xfe
+#define CX86_DIR1 0xff
+#define CX86_ARR_BASE 0xc4
+#define CX86_RCR_BASE 0xdc
+
+/*
+ *      Cyrix CPU indexed register access macros
+ */
+
+#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); })
+
+#define setCx86(reg, data) do { \
+	outb((reg), 0x22); \
+	outb((data), 0x23); \
+} while (0)
+
+/*
+ * Bus types (default is ISA, but people can check others with these..)
+ */
+#ifdef CONFIG_EISA
+extern int EISA_bus;
+#else
+#define EISA_bus (0)
+#endif
+extern int MCA_bus;
+
+/* from system description table in BIOS.  Mostly for MCA use, but
+others may find it useful. */
+extern unsigned int machine_id;
+extern unsigned int machine_submodel_id;
+extern unsigned int BIOS_revision;
+extern unsigned int mca_pentium_flag;
+
+/*
+ * User space process size: 3GB (default).
+ */
+#define TASK_SIZE	(PAGE_OFFSET)
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_BASE	(TASK_SIZE / 3)
+
+/*
+ * Size of io_bitmap in longwords: 32 is ports 0-0x3ff.
+ */
+#define IO_BITMAP_SIZE	32
+#define IO_BITMAP_BYTES (IO_BITMAP_SIZE * 4)
+#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
+#define INVALID_IO_BITMAP_OFFSET 0x8000
+
+struct i387_fsave_struct {
+	long	cwd;
+	long	swd;
+	long	twd;
+	long	fip;
+	long	fcs;
+	long	foo;
+	long	fos;
+	long	st_space[20];	/* 8*10 bytes for each FP-reg = 80 bytes */
+	long	status;		/* software status information */
+};
+
+struct i387_fxsave_struct {
+	unsigned short	cwd;
+	unsigned short	swd;
+	unsigned short	twd;
+	unsigned short	fop;
+	long	fip;
+	long	fcs;
+	long	foo;
+	long	fos;
+	long	mxcsr;
+	long	reserved;
+	long	st_space[32];	/* 8*16 bytes for each FP-reg = 128 bytes */
+	long	xmm_space[32];	/* 8*16 bytes for each XMM-reg = 128 bytes */
+	long	padding[56];
+} __attribute__ ((aligned (16)));
+
+struct i387_soft_struct {
+	long	cwd;
+	long	swd;
+	long	twd;
+	long	fip;
+	long	fcs;
+	long	foo;
+	long	fos;
+	long	st_space[20];	/* 8*10 bytes for each FP-reg = 80 bytes */
+	unsigned char	ftop, changed, lookahead, no_update, rm, alimit;
+	struct info	*info;
+	unsigned long	entry_eip;
+};
+
+union i387_union {
+	struct i387_fsave_struct	fsave;
+	struct i387_fxsave_struct	fxsave;
+	struct i387_soft_struct soft;
+};
+
+typedef struct {
+	unsigned long seg;
+} mm_segment_t;
+
+struct tss_struct {
+	unsigned short	back_link,__blh;
+	unsigned long	esp0;
+	unsigned short	ss0,__ss0h;
+	unsigned long	esp1;
+	unsigned short	ss1,__ss1h;
+	unsigned long	esp2;
+	unsigned short	ss2,__ss2h;
+	unsigned long	__cr3;
+	unsigned long	eip;
+	unsigned long	eflags;
+	unsigned long	eax,ecx,edx,ebx;
+	unsigned long	esp;
+	unsigned long	ebp;
+	unsigned long	esi;
+	unsigned long	edi;
+	unsigned short	es, __esh;
+	unsigned short	cs, __csh;
+	unsigned short	ss, __ssh;
+	unsigned short	ds, __dsh;
+	unsigned short	fs, __fsh;
+	unsigned short	gs, __gsh;
+	unsigned short	ldt, __ldth;
+	unsigned short	trace, bitmap;
+	unsigned long	io_bitmap[IO_BITMAP_SIZE+1];
+	/*
+	 * pads the TSS to be cacheline-aligned (size is 0x100)
+	 */
+	unsigned long __cacheline_filler[5];
+};
+
+struct thread_struct {
+	unsigned long	esp0;
+	unsigned long	eip;
+	unsigned long	esp;
+	unsigned long	fs;
+	unsigned long	gs;
+	unsigned int	io_pl;
+/* Hardware debugging registers */
+	unsigned long	debugreg[8];  /* %%db0-7 debug registers */
+/* fault info */
+	unsigned long	cr2, trap_no, error_code;
+/* floating point info */
+	union i387_union	i387;
+/* virtual 86 mode info */
+	struct vm86_struct	* vm86_info;
+	unsigned long		screen_bitmap;
+	unsigned long		v86flags, v86mask, saved_esp0;
+};
+
+#define INIT_THREAD  { sizeof(init_stack) + (long) &init_stack, \
+                       0, 0, 0, 0, 0, 0, {0}, 0, 0, 0, {{0}}, 0, 0, 0, 0, 0 }
+
+#define INIT_TSS  {						\
+	0,0, /* back_link, __blh */				\
+	sizeof(init_stack) + (long) &init_stack, /* esp0 */	\
+	__KERNEL_DS, 0, /* ss0 */				\
+	0,0,0,0,0,0, /* stack1, stack2 */			\
+	0, /* cr3 */						\
+	0,0, /* eip,eflags */					\
+	0,0,0,0, /* eax,ecx,edx,ebx */				\
+	0,0,0,0, /* esp,ebp,esi,edi */				\
+	0,0,0,0,0,0, /* es,cs,ss */				\
+	0,0,0,0,0,0, /* ds,fs,gs */				\
+	0,0, /* ldt */						\
+	0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */		\
+	{~0, } /* ioperm */					\
+}
+
+#define start_thread(regs, new_eip, new_esp) do {		\
+	__asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0));	\
+	set_fs(USER_DS);					\
+	regs->xds = __USER_DS;					\
+	regs->xes = __USER_DS;					\
+	regs->xss = __USER_DS;					\
+	regs->xcs = __USER_CS;					\
+	regs->eip = new_eip;					\
+	regs->esp = new_esp;					\
+} while (0)
+
+/* Forward declaration, a strange C thing */
+struct task_struct;
+struct mm_struct;
+
+/* Free all resources held by a thread. */
+extern void release_thread(struct task_struct *);
+/*
+ * create a kernel thread without removing it from tasklists
+ */
+extern int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
+
+/* Copy and release all segment info associated with a VM
+ * Unusable due to lack of error handling, use {init_new,destroy}_context
+ * instead.
+ */
+static inline void copy_segments(struct task_struct *p, struct mm_struct * mm) { }
+static inline void release_segments(struct mm_struct * mm) { }
+
+/*
+ * Return saved PC of a blocked thread.
+ */
+static inline unsigned long thread_saved_pc(struct thread_struct *t)
+{
+	return ((unsigned long *)t->esp)[3];
+}
+
+unsigned long get_wchan(struct task_struct *p);
+#define KSTK_EIP(tsk)	(((unsigned long *)(4096+(unsigned long)(tsk)))[1019])
+#define KSTK_ESP(tsk)	(((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
+
+#define THREAD_SIZE (2*PAGE_SIZE)
+#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1))
+#define free_task_struct(p) free_pages((unsigned long) (p), 1)
+#define get_task_struct(tsk)      atomic_inc(&virt_to_page(tsk)->count)
+
+#define init_task	(init_task_union.task)
+#define init_stack	(init_task_union.stack)
+
+struct microcode {
+	unsigned int hdrver;
+	unsigned int rev;
+	unsigned int date;
+	unsigned int sig;
+	unsigned int cksum;
+	unsigned int ldrver;
+	unsigned int pf;
+	unsigned int reserved[5];
+	unsigned int bits[500];
+};
+
+/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */
+#define MICROCODE_IOCFREE	_IO('6',0)
+
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+static inline void rep_nop(void)
+{
+	__asm__ __volatile__("rep;nop" ::: "memory");
+}
+
+#define cpu_relax()	rep_nop()
+
+/* Prefetch instructions for Pentium III and AMD Athlon */
+#if defined(CONFIG_MPENTIUMIII) || defined (CONFIG_MPENTIUM4)
+
+#define ARCH_HAS_PREFETCH
+extern inline void prefetch(const void *x)
+{
+	__asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x));
+}
+
+#elif CONFIG_X86_USE_3DNOW
+
+#define ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCHW
+#define ARCH_HAS_SPINLOCK_PREFETCH
+
+extern inline void prefetch(const void *x)
+{
+	 __asm__ __volatile__ ("prefetch (%0)" : : "r"(x));
+}
+
+extern inline void prefetchw(const void *x)
+{
+	 __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x));
+}
+#define spin_lock_prefetch(x)	prefetchw(x)
+
+#endif
+
+#endif /* __ASM_I386_PROCESSOR_H */
diff --git a/linux-2.4-xen-sparse/include/asm-xen/queues.h b/linux-2.4-xen-sparse/include/asm-xen/queues.h
new file mode 100644
index 0000000000..dd527603ce
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/queues.h
@@ -0,0 +1,20 @@
+
+/* Work-queue emulation over task queues. Pretty simple. */
+
+#ifndef __QUEUES_H__
+#define __QUEUES_H__
+
+#include <linux/version.h>
+#include <linux/list.h>
+#include <linux/tqueue.h>
+
+#define DECLARE_TQUEUE(_name, _fn, _arg) \
+    struct tq_struct _name = { LIST_HEAD_INIT((_name).list), 0, _fn, _arg }
+#define DECLARE_WORK(_name, _fn, _arg) DECLARE_TQUEUE(_name, _fn, _arg)
+
+#define work_struct tq_struct
+#define INIT_WORK(_work, _fn, _arg) INIT_TQUEUE(_work, _fn, _arg)
+
+#define schedule_work(_w) schedule_task(_w)
+
+#endif /* __QUEUES_H__ */
diff --git a/linux-2.4-xen-sparse/include/asm-xen/segment.h b/linux-2.4-xen-sparse/include/asm-xen/segment.h
new file mode 100644
index 0000000000..5b3a4392f3
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/segment.h
@@ -0,0 +1,15 @@
+#ifndef _ASM_SEGMENT_H
+#define _ASM_SEGMENT_H
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#endif
+#include <asm-xen/xen-public/xen.h>
+
+#define __KERNEL_CS	FLAT_RING1_CS
+#define __KERNEL_DS	FLAT_RING1_DS
+
+#define __USER_CS	FLAT_RING3_CS
+#define __USER_DS	FLAT_RING3_DS
+
+#endif
diff --git a/linux-2.4-xen-sparse/include/asm-xen/smp.h b/linux-2.4-xen-sparse/include/asm-xen/smp.h
new file mode 100644
index 0000000000..025a93a890
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/smp.h
@@ -0,0 +1,102 @@
+#ifndef __ASM_SMP_H
+#define __ASM_SMP_H
+
+/*
+ * We need the APIC definitions automatically as part of 'smp.h'
+ */
+#ifndef __ASSEMBLY__
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/ptrace.h>
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#ifndef __ASSEMBLY__
+#include <asm/bitops.h>
+#include <asm/mpspec.h>
+#ifdef CONFIG_X86_IO_APIC
+#include <asm/io_apic.h>
+#endif
+#include <asm/apic.h>
+#endif
+#endif
+
+#ifdef CONFIG_SMP
+#ifndef __ASSEMBLY__
+
+/*
+ * Private routines/data
+ */
+ 
+extern void smp_alloc_memory(void);
+extern unsigned long phys_cpu_present_map;
+extern unsigned long cpu_online_map;
+extern volatile unsigned long smp_invalidate_needed;
+extern int pic_mode;
+extern int smp_num_siblings;
+extern int cpu_sibling_map[];
+
+extern void smp_flush_tlb(void);
+extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
+extern void fastcall smp_send_reschedule(int cpu);
+extern void smp_invalidate_rcv(void);		/* Process an NMI */
+extern void (*mtrr_hook) (void);
+extern void zap_low_mappings (void);
+
+/*
+ * On x86 all CPUs are mapped 1:1 to the APIC space.
+ * This simplifies scheduling and IPI sending and
+ * compresses data structures.
+ */
+static inline int cpu_logical_map(int cpu)
+{
+	return cpu;
+}
+static inline int cpu_number_map(int cpu)
+{
+	return cpu;
+}
+
+/*
+ * Some lowlevel functions might want to know about
+ * the real APIC ID <-> CPU # mapping.
+ */
+#define MAX_APICID 256
+extern volatile int cpu_to_physical_apicid[NR_CPUS];
+extern volatile int physical_apicid_to_cpu[MAX_APICID];
+extern volatile int cpu_to_logical_apicid[NR_CPUS];
+extern volatile int logical_apicid_to_cpu[MAX_APICID];
+
+/*
+ * General functions that each host system must provide.
+ */
+ 
+extern void smp_boot_cpus(void);
+extern void smp_store_cpu_info(int id);		/* Store per CPU info (like the initial udelay numbers */
+
+/*
+ * This function is needed by all SMP systems. It must _always_ be valid
+ * from the initial startup. We map APIC_BASE very early in page_setup(),
+ * so this is correct in the x86 case.
+ */
+
+#define smp_processor_id() (current->processor)
+
+#endif /* !__ASSEMBLY__ */
+
+#define NO_PROC_ID		0xFF		/* No processor magic marker */
+
+/*
+ *	This magic constant controls our willingness to transfer
+ *	a process across CPUs. Such a transfer incurs misses on the L1
+ *	cache, and on a P6 or P5 with multiple L2 caches L2 hits. My
+ *	gut feeling is this will vary by board in value. For a board
+ *	with separate L2 cache it probably depends also on the RSS, and
+ *	for a board with shared L2 cache it ought to decay fast as other
+ *	processes are run.
+ */
+ 
+#define PROC_CHANGE_PENALTY	15		/* Schedule penalty */
+
+#endif
+#endif
diff --git a/linux-2.4-xen-sparse/include/asm-xen/system.h b/linux-2.4-xen-sparse/include/asm-xen/system.h
new file mode 100644
index 0000000000..f694674233
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/system.h
@@ -0,0 +1,424 @@
+#ifndef __ASM_SYSTEM_H
+#define __ASM_SYSTEM_H
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <asm/synch_bitops.h>
+#include <asm/segment.h>
+#include <asm/hypervisor.h>
+#include <asm/evtchn.h>
+
+#ifdef __KERNEL__
+
+struct task_struct;
+extern void FASTCALL(__switch_to(struct task_struct *prev, 
+                                 struct task_struct *next));
+
+#define prepare_to_switch()                                             \
+do {                                                                    \
+    struct thread_struct *__t = &current->thread;                       \
+    __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (*(int *)&__t->fs) );  \
+    __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (*(int *)&__t->gs) );  \
+} while (0)
+#define switch_to(prev,next,last) do {					\
+	asm volatile("pushl %%esi\n\t"					\
+		     "pushl %%edi\n\t"					\
+		     "pushl %%ebp\n\t"					\
+		     "movl %%esp,%0\n\t"	/* save ESP */		\
+		     "movl %3,%%esp\n\t"	/* restore ESP */	\
+		     "movl $1f,%1\n\t"		/* save EIP */		\
+		     "pushl %4\n\t"		/* restore EIP */	\
+		     "jmp __switch_to\n"				\
+		     "1:\t"						\
+		     "popl %%ebp\n\t"					\
+		     "popl %%edi\n\t"					\
+		     "popl %%esi\n\t"					\
+		     :"=m" (prev->thread.esp),"=m" (prev->thread.eip),	\
+		      "=b" (last)					\
+		     :"m" (next->thread.esp),"m" (next->thread.eip),	\
+		      "a" (prev), "d" (next),				\
+		      "b" (prev));					\
+} while (0)
+
+#define _set_base(addr,base) do { unsigned long __pr; \
+__asm__ __volatile__ ("movw %%dx,%1\n\t" \
+	"rorl $16,%%edx\n\t" \
+	"movb %%dl,%2\n\t" \
+	"movb %%dh,%3" \
+	:"=&d" (__pr) \
+	:"m" (*((addr)+2)), \
+	 "m" (*((addr)+4)), \
+	 "m" (*((addr)+7)), \
+         "0" (base) \
+        ); } while(0)
+
+#define _set_limit(addr,limit) do { unsigned long __lr; \
+__asm__ __volatile__ ("movw %%dx,%1\n\t" \
+	"rorl $16,%%edx\n\t" \
+	"movb %2,%%dh\n\t" \
+	"andb $0xf0,%%dh\n\t" \
+	"orb %%dh,%%dl\n\t" \
+	"movb %%dl,%2" \
+	:"=&d" (__lr) \
+	:"m" (*(addr)), \
+	 "m" (*((addr)+6)), \
+	 "0" (limit) \
+        ); } while(0)
+
+#define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) )
+#define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1)>>12 )
+
+static inline unsigned long _get_base(char * addr)
+{
+	unsigned long __base;
+	__asm__("movb %3,%%dh\n\t"
+		"movb %2,%%dl\n\t"
+		"shll $16,%%edx\n\t"
+		"movw %1,%%dx"
+		:"=&d" (__base)
+		:"m" (*((addr)+2)),
+		 "m" (*((addr)+4)),
+		 "m" (*((addr)+7)));
+	return __base;
+}
+
+#define get_base(ldt) _get_base( ((char *)&(ldt)) )
+
+/*
+ * Load a segment. Fall back on loading the zero
+ * segment if something goes wrong..
+ */
+#define loadsegment(seg,value)			\
+	asm volatile("\n"			\
+		"1:\t"				\
+		"movl %0,%%" #seg "\n"		\
+		"2:\n"				\
+		".section .fixup,\"ax\"\n"	\
+		"3:\t"				\
+		"pushl $0\n\t"			\
+		"popl %%" #seg "\n\t"		\
+		"jmp 2b\n"			\
+		".previous\n"			\
+		".section __ex_table,\"a\"\n\t"	\
+		".align 4\n\t"			\
+		".long 1b,3b\n"			\
+		".previous"			\
+		: :"m" (*(unsigned int *)&(value)))
+
+/* NB. 'clts' is done for us by Xen during virtual trap. */
+#define clts() ((void)0)
+#define stts() (HYPERVISOR_fpu_taskswitch(1))
+
+#endif	/* __KERNEL__ */
+
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ *
+ * Taken from 2.6 for Xen.
+ */
+static inline unsigned long __ffs(unsigned long word)
+{
+	__asm__("bsfl %1,%0"
+		:"=r" (word)
+		:"rm" (word));
+	return word;
+}
+
+static inline unsigned long get_limit(unsigned long segment)
+{
+	unsigned long __limit;
+	__asm__("lsll %1,%0"
+		:"=r" (__limit):"r" (segment));
+	return __limit+1;
+}
+
+#define nop() __asm__ __volatile__ ("nop")
+
+#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
+
+#define tas(ptr) (xchg((ptr),1))
+
+struct __xchg_dummy { unsigned long a[100]; };
+#define __xg(x) ((struct __xchg_dummy *)(x))
+
+
+/*
+ * The semantics of XCHGCMP8B are a bit strange, this is why
+ * there is a loop and the loading of %%eax and %%edx has to
+ * be inside. This inlines well in most cases, the cached
+ * cost is around ~38 cycles. (in the future we might want
+ * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that
+ * might have an implicit FPU-save as a cost, so it's not
+ * clear which path to go.)
+ *
+ * chmxchg8b must be used with the lock prefix here to allow
+ * the instruction to be executed atomically, see page 3-102
+ * of the instruction set reference 24319102.pdf. We need
+ * the reader side to see the coherent 64bit value.
+ */
+static inline void __set_64bit (unsigned long long * ptr,
+		unsigned int low, unsigned int high)
+{
+	__asm__ __volatile__ (
+		"\n1:\t"
+		"movl (%0), %%eax\n\t"
+		"movl 4(%0), %%edx\n\t"
+		"lock cmpxchg8b (%0)\n\t"
+		"jnz 1b"
+		: /* no outputs */
+		:	"D"(ptr),
+			"b"(low),
+			"c"(high)
+		:	"ax","dx","memory");
+}
+
+static inline void __set_64bit_constant (unsigned long long *ptr,
+						 unsigned long long value)
+{
+	__set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL));
+}
+#define ll_low(x)	*(((unsigned int*)&(x))+0)
+#define ll_high(x)	*(((unsigned int*)&(x))+1)
+
+static inline void __set_64bit_var (unsigned long long *ptr,
+			 unsigned long long value)
+{
+	__set_64bit(ptr,ll_low(value), ll_high(value));
+}
+
+#define set_64bit(ptr,value) \
+(__builtin_constant_p(value) ? \
+ __set_64bit_constant(ptr, value) : \
+ __set_64bit_var(ptr, value) )
+
+#define _set_64bit(ptr,value) \
+(__builtin_constant_p(value) ? \
+ __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
+ __set_64bit(ptr, ll_low(value), ll_high(value)) )
+
+/*
+ * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
+ * Note 2: xchg has side effect, so that attribute volatile is necessary,
+ *	  but generally the primitive is invalid, *ptr is output argument. --ANK
+ */
+static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
+{
+	switch (size) {
+		case 1:
+			__asm__ __volatile__("xchgb %b0,%1"
+				:"=q" (x)
+				:"m" (*__xg(ptr)), "0" (x)
+				:"memory");
+			break;
+		case 2:
+			__asm__ __volatile__("xchgw %w0,%1"
+				:"=r" (x)
+				:"m" (*__xg(ptr)), "0" (x)
+				:"memory");
+			break;
+		case 4:
+			__asm__ __volatile__("xchgl %0,%1"
+				:"=r" (x)
+				:"m" (*__xg(ptr)), "0" (x)
+				:"memory");
+			break;
+	}
+	return x;
+}
+
+/*
+ * Atomic compare and exchange.  Compare OLD with MEM, if identical,
+ * store NEW in MEM.  Return the initial value in MEM.  Success is
+ * indicated by comparing RETURN with OLD.
+ */
+
+#ifdef CONFIG_X86_CMPXCHG
+#define __HAVE_ARCH_CMPXCHG 1
+
+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+				      unsigned long new, int size)
+{
+	unsigned long prev;
+	switch (size) {
+	case 1:
+		__asm__ __volatile__("lock cmpxchgb %b1,%2"
+				     : "=a"(prev)
+				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
+				     : "memory");
+		return prev;
+	case 2:
+		__asm__ __volatile__("lock cmpxchgw %w1,%2"
+				     : "=a"(prev)
+				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
+				     : "memory");
+		return prev;
+	case 4:
+		__asm__ __volatile__("lock cmpxchgl %1,%2"
+				     : "=a"(prev)
+				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
+				     : "memory");
+		return prev;
+	}
+	return old;
+}
+
+#define cmpxchg(ptr,o,n)\
+	((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+					(unsigned long)(n),sizeof(*(ptr))))
+    
+#else
+/* Compiling for a 386 proper.	Is it worth implementing via cli/sti?  */
+#endif
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ *
+ * For now, "wmb()" doesn't actually do anything, as all
+ * Intel CPU's follow what Intel calls a *Processor Order*,
+ * in which all writes are seen in the program order even
+ * outside the CPU.
+ *
+ * I expect future Intel CPU's to have a weaker ordering,
+ * but I'd also expect them to finally get their act together
+ * and add some real memory barriers if so.
+ *
+ * Some non intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+ 
+#define mb() 	__asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
+#define rmb()	mb()
+
+#ifdef CONFIG_X86_OOSTORE
+#define wmb() 	__asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
+#else
+#define wmb()	__asm__ __volatile__ ("": : :"memory")
+#endif
+
+#ifdef CONFIG_SMP
+#define smp_mb()	mb()
+#define smp_rmb()	rmb()
+#define smp_wmb()	wmb()
+#define set_mb(var, value) do { xchg(&var, value); } while (0)
+#else
+#define smp_mb()	barrier()
+#define smp_rmb()	barrier()
+#define smp_wmb()	barrier()
+#define set_mb(var, value) do { var = value; barrier(); } while (0)
+#endif
+
+#define set_wmb(var, value) do { var = value; wmb(); } while (0)
+
+#define safe_halt()             ((void)0)
+
+/* 
+ * The use of 'barrier' in the following reflects their use as local-lock
+ * operations. Reentrancy must be prevented (e.g., __cli()) /before/ following
+ * critical operations are executed. All critical operatiosn must complete
+ * /before/ reentrancy is permitted (e.g., __sti()). Alpha architecture also
+ * includes these barriers, for example.
+ */
+
+#define __cli()                                                               \
+do {                                                                          \
+    HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1;              \
+    barrier();                                                                \
+} while (0)
+
+#define __sti()                                                               \
+do {                                                                          \
+    shared_info_t *_shared = HYPERVISOR_shared_info;                          \
+    barrier();                                                                \
+    _shared->vcpu_data[0].evtchn_upcall_mask = 0;                             \
+    barrier(); /* unmask then check (avoid races) */                          \
+    if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) )              \
+        force_evtchn_callback();                                              \
+} while (0)
+
+#define __save_flags(x)                                                       \
+do {                                                                          \
+    (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask;            \
+} while (0)
+
+#define __restore_flags(x)                                                    \
+do {                                                                          \
+    shared_info_t *_shared = HYPERVISOR_shared_info;                          \
+    barrier();                                                                \
+    if ( (_shared->vcpu_data[0].evtchn_upcall_mask = x) == 0 ) {              \
+        barrier(); /* unmask then check (avoid races) */                      \
+        if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) )          \
+            force_evtchn_callback();                                          \
+    }                                                                         \
+} while (0)
+
+#define __save_and_cli(x)                                                     \
+do {                                                                          \
+    (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask;            \
+    HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1;              \
+    barrier();                                                                \
+} while (0)
+
+#define __save_and_sti(x)                                                     \
+do {                                                                          \
+    shared_info_t *_shared = HYPERVISOR_shared_info;                          \
+    barrier();                                                                \
+    (x) = _shared->vcpu_data[0].evtchn_upcall_mask;                           \
+    _shared->vcpu_data[0].evtchn_upcall_mask = 0;                             \
+    barrier(); /* unmask then check (avoid races) */                          \
+    if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) )              \
+        force_evtchn_callback();                                              \
+} while (0)
+
+#define local_irq_save(x)       __save_and_cli(x)
+#define local_irq_set(x)        __save_and_sti(x)
+#define local_irq_restore(x)    __restore_flags(x)
+#define local_irq_disable()     __cli()
+#define local_irq_enable()      __sti()
+
+
+#ifdef CONFIG_SMP
+#error no SMP
+extern void __global_cli(void);
+extern void __global_sti(void);
+extern unsigned long __global_save_flags(void);
+extern void __global_restore_flags(unsigned long);
+#define cli() __global_cli()
+#define sti() __global_sti()
+#define save_flags(x) ((x)=__global_save_flags())
+#define restore_flags(x) __global_restore_flags(x)
+#define save_and_cli(x) do { save_flags(x); cli(); } while(0);
+#define save_and_sti(x) do { save_flags(x); sti(); } while(0);
+
+#else
+
+#define cli() __cli()
+#define sti() __sti()
+#define save_flags(x) __save_flags(x)
+#define restore_flags(x) __restore_flags(x)
+#define save_and_cli(x) __save_and_cli(x)
+#define save_and_sti(x) __save_and_sti(x)
+
+#endif
+
+/*
+ * disable hlt during certain critical i/o operations
+ */
+#define HAVE_DISABLE_HLT
+void disable_hlt(void);
+void enable_hlt(void);
+
+extern unsigned long dmi_broken;
+extern int is_sony_vaio_laptop;
+
+#define BROKEN_ACPI_Sx		0x0001
+#define BROKEN_INIT_AFTER_S1	0x0002
+#define BROKEN_PNP_BIOS		0x0004
+
+#endif
diff --git a/linux-2.4-xen-sparse/include/asm-xen/vga.h b/linux-2.4-xen-sparse/include/asm-xen/vga.h
new file mode 100644
index 0000000000..d0624cf480
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/vga.h
@@ -0,0 +1,42 @@
+/*
+ *	Access to VGA videoram
+ *
+ *	(c) 1998 Martin Mares <mj@ucw.cz>
+ */
+
+#ifndef _LINUX_ASM_VGA_H_
+#define _LINUX_ASM_VGA_H_
+
+#include <asm/io.h>
+
+extern unsigned char *vgacon_mmap;
+
+static unsigned long VGA_MAP_MEM(unsigned long x)
+{
+    if( vgacon_mmap == NULL )
+    {
+        /* This is our first time in this function. This whole thing
+           is a rather grim hack. We know we're going to get asked 
+           to map a 32KB region between 0xb0000 and 0xb8000 because
+           that's what VGAs are. We used the boot time permanent 
+           fixed map region, and map it to machine pages.
+        */
+        if( x != 0xb8000 )
+            panic("Argghh! VGA Console is weird. 1:%08lx\n",x);
+
+        vgacon_mmap = (unsigned char*) bt_ioremap( 0xa0000, 128*1024 );
+        return (unsigned long) (vgacon_mmap+x-0xa0000);
+    }
+    else
+    {
+        if( x != 0xc0000 && x != 0xa0000 ) /* vidmem_end or charmap fonts */
+            panic("Argghh! VGA Console is weird. 2:%08lx\n",x);  
+	return (unsigned long) (vgacon_mmap+x-0xa0000);
+    }
+    return 0;
+}
+
+static inline unsigned char vga_readb(unsigned char * x) { return (*(x)); }
+static inline void vga_writeb(unsigned char x, unsigned char *y) { *(y) = (x); }
+
+#endif
diff --git a/linux-2.4-xen-sparse/include/asm-xen/xor.h b/linux-2.4-xen-sparse/include/asm-xen/xor.h
new file mode 100644
index 0000000000..9e6cca8a8a
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/asm-xen/xor.h
@@ -0,0 +1,879 @@
+/*
+ * include/asm-i386/xor.h
+ *
+ * Optimized RAID-5 checksumming functions for MMX and SSE.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * (for example /usr/src/linux/COPYING); if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * High-speed RAID5 checksumming functions utilizing MMX instructions.
+ * Copyright (C) 1998 Ingo Molnar.
+ */
+
+#define FPU_SAVE							\
+  do {									\
+	if (!(current->flags & PF_USEDFPU))				\
+		clts();							\
+	__asm__ __volatile__ ("fsave %0; fwait": "=m"(fpu_save[0]));	\
+  } while (0)
+
+#define FPU_RESTORE							\
+  do {									\
+	__asm__ __volatile__ ("frstor %0": : "m"(fpu_save[0]));		\
+	if (!(current->flags & PF_USEDFPU))				\
+		stts();							\
+  } while (0)
+
+#define LD(x,y)		"       movq   8*("#x")(%1), %%mm"#y"   ;\n"
+#define ST(x,y)		"       movq %%mm"#y",   8*("#x")(%1)   ;\n"
+#define XO1(x,y)	"       pxor   8*("#x")(%2), %%mm"#y"   ;\n"
+#define XO2(x,y)	"       pxor   8*("#x")(%3), %%mm"#y"   ;\n"
+#define XO3(x,y)	"       pxor   8*("#x")(%4), %%mm"#y"   ;\n"
+#define XO4(x,y)	"       pxor   8*("#x")(%5), %%mm"#y"   ;\n"
+
+
+static void
+xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+	unsigned long lines = bytes >> 7;
+	char fpu_save[108];
+
+	FPU_SAVE;
+
+	__asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+	LD(i,0)					\
+		LD(i+1,1)			\
+			LD(i+2,2)		\
+				LD(i+3,3)	\
+	XO1(i,0)				\
+	ST(i,0)					\
+		XO1(i+1,1)			\
+		ST(i+1,1)			\
+			XO1(i+2,2)		\
+			ST(i+2,2)		\
+				XO1(i+3,3)	\
+				ST(i+3,3)
+
+	" .align 32			;\n"
+  	" 1:                            ;\n"
+
+	BLOCK(0)
+	BLOCK(4)
+	BLOCK(8)
+	BLOCK(12)
+
+	"       addl $128, %1         ;\n"
+	"       addl $128, %2         ;\n"
+	"       decl %0               ;\n"
+	"       jnz 1b                ;\n"
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2)
+	:
+	: "memory");
+
+	FPU_RESTORE;
+}
+
+static void
+xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	      unsigned long *p3)
+{
+	unsigned long lines = bytes >> 7;
+	char fpu_save[108];
+
+	FPU_SAVE;
+
+	__asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+	LD(i,0)					\
+		LD(i+1,1)			\
+			LD(i+2,2)		\
+				LD(i+3,3)	\
+	XO1(i,0)				\
+		XO1(i+1,1)			\
+			XO1(i+2,2)		\
+				XO1(i+3,3)	\
+	XO2(i,0)				\
+	ST(i,0)					\
+		XO2(i+1,1)			\
+		ST(i+1,1)			\
+			XO2(i+2,2)		\
+			ST(i+2,2)		\
+				XO2(i+3,3)	\
+				ST(i+3,3)
+
+	" .align 32			;\n"
+	" 1:                            ;\n"
+
+	BLOCK(0)
+	BLOCK(4)
+	BLOCK(8)
+	BLOCK(12)
+
+	"       addl $128, %1         ;\n"
+	"       addl $128, %2         ;\n"
+	"       addl $128, %3         ;\n"
+	"       decl %0               ;\n"
+	"       jnz 1b                ;\n"
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2), "+r" (p3)
+	:
+	: "memory");
+
+	FPU_RESTORE;
+}
+
+static void
+xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	      unsigned long *p3, unsigned long *p4)
+{
+	unsigned long lines = bytes >> 7;
+	char fpu_save[108];
+
+	FPU_SAVE;
+
+	__asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+	LD(i,0)					\
+		LD(i+1,1)			\
+			LD(i+2,2)		\
+				LD(i+3,3)	\
+	XO1(i,0)				\
+		XO1(i+1,1)			\
+			XO1(i+2,2)		\
+				XO1(i+3,3)	\
+	XO2(i,0)				\
+		XO2(i+1,1)			\
+			XO2(i+2,2)		\
+				XO2(i+3,3)	\
+	XO3(i,0)				\
+	ST(i,0)					\
+		XO3(i+1,1)			\
+		ST(i+1,1)			\
+			XO3(i+2,2)		\
+			ST(i+2,2)		\
+				XO3(i+3,3)	\
+				ST(i+3,3)
+
+	" .align 32			;\n"
+	" 1:                            ;\n"
+
+	BLOCK(0)
+	BLOCK(4)
+	BLOCK(8)
+	BLOCK(12)
+
+	"       addl $128, %1         ;\n"
+	"       addl $128, %2         ;\n"
+	"       addl $128, %3         ;\n"
+	"       addl $128, %4         ;\n"
+	"       decl %0               ;\n"
+	"       jnz 1b                ;\n"
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
+	:
+	: "memory");
+
+	FPU_RESTORE;
+}
+
+
+static void
+xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	      unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+	unsigned long lines = bytes >> 7;
+	char fpu_save[108];
+
+	FPU_SAVE;
+
+	/* need to save/restore p4/p5 manually otherwise gcc's 10 argument
+	   limit gets exceeded (+ counts as two arguments) */
+	__asm__ __volatile__ (
+		"  pushl %4\n"
+		"  pushl %5\n"
+#undef BLOCK
+#define BLOCK(i) \
+	LD(i,0)					\
+		LD(i+1,1)			\
+			LD(i+2,2)		\
+				LD(i+3,3)	\
+	XO1(i,0)				\
+		XO1(i+1,1)			\
+			XO1(i+2,2)		\
+				XO1(i+3,3)	\
+	XO2(i,0)				\
+		XO2(i+1,1)			\
+			XO2(i+2,2)		\
+				XO2(i+3,3)	\
+	XO3(i,0)				\
+		XO3(i+1,1)			\
+			XO3(i+2,2)		\
+				XO3(i+3,3)	\
+	XO4(i,0)				\
+	ST(i,0)					\
+		XO4(i+1,1)			\
+		ST(i+1,1)			\
+			XO4(i+2,2)		\
+			ST(i+2,2)		\
+				XO4(i+3,3)	\
+				ST(i+3,3)
+
+	" .align 32			;\n"
+	" 1:                            ;\n"
+
+	BLOCK(0)
+	BLOCK(4)
+	BLOCK(8)
+	BLOCK(12)
+
+	"       addl $128, %1         ;\n"
+	"       addl $128, %2         ;\n"
+	"       addl $128, %3         ;\n"
+	"       addl $128, %4         ;\n"
+	"       addl $128, %5         ;\n"
+	"       decl %0               ;\n"
+	"       jnz 1b                ;\n"
+	"	popl %5\n"
+	"	popl %4\n"
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2), "+r" (p3)
+	: "r" (p4), "r" (p5) 
+	: "memory");
+
+	FPU_RESTORE;
+}
+
+#undef LD
+#undef XO1
+#undef XO2
+#undef XO3
+#undef XO4
+#undef ST
+#undef BLOCK
+
+static void
+xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+	unsigned long lines = bytes >> 6;
+	char fpu_save[108];
+
+	FPU_SAVE;
+
+	__asm__ __volatile__ (
+	" .align 32	             ;\n"
+	" 1:                         ;\n"
+	"       movq   (%1), %%mm0   ;\n"
+	"       movq  8(%1), %%mm1   ;\n"
+	"       pxor   (%2), %%mm0   ;\n"
+	"       movq 16(%1), %%mm2   ;\n"
+	"       movq %%mm0,   (%1)   ;\n"
+	"       pxor  8(%2), %%mm1   ;\n"
+	"       movq 24(%1), %%mm3   ;\n"
+	"       movq %%mm1,  8(%1)   ;\n"
+	"       pxor 16(%2), %%mm2   ;\n"
+	"       movq 32(%1), %%mm4   ;\n"
+	"       movq %%mm2, 16(%1)   ;\n"
+	"       pxor 24(%2), %%mm3   ;\n"
+	"       movq 40(%1), %%mm5   ;\n"
+	"       movq %%mm3, 24(%1)   ;\n"
+	"       pxor 32(%2), %%mm4   ;\n"
+	"       movq 48(%1), %%mm6   ;\n"
+	"       movq %%mm4, 32(%1)   ;\n"
+	"       pxor 40(%2), %%mm5   ;\n"
+	"       movq 56(%1), %%mm7   ;\n"
+	"       movq %%mm5, 40(%1)   ;\n"
+	"       pxor 48(%2), %%mm6   ;\n"
+	"       pxor 56(%2), %%mm7   ;\n"
+	"       movq %%mm6, 48(%1)   ;\n"
+	"       movq %%mm7, 56(%1)   ;\n"
+	
+	"       addl $64, %1         ;\n"
+	"       addl $64, %2         ;\n"
+	"       decl %0              ;\n"
+	"       jnz 1b               ;\n"
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2)
+	:
+	: "memory");
+
+	FPU_RESTORE;
+}
+
+static void
+xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	     unsigned long *p3)
+{
+	unsigned long lines = bytes >> 6;
+	char fpu_save[108];
+
+	FPU_SAVE;
+
+	__asm__ __volatile__ (
+	" .align 32,0x90             ;\n"
+	" 1:                         ;\n"
+	"       movq   (%1), %%mm0   ;\n"
+	"       movq  8(%1), %%mm1   ;\n"
+	"       pxor   (%2), %%mm0   ;\n"
+	"       movq 16(%1), %%mm2   ;\n"
+	"       pxor  8(%2), %%mm1   ;\n"
+	"       pxor   (%3), %%mm0   ;\n"
+	"       pxor 16(%2), %%mm2   ;\n"
+	"       movq %%mm0,   (%1)   ;\n"
+	"       pxor  8(%3), %%mm1   ;\n"
+	"       pxor 16(%3), %%mm2   ;\n"
+	"       movq 24(%1), %%mm3   ;\n"
+	"       movq %%mm1,  8(%1)   ;\n"
+	"       movq 32(%1), %%mm4   ;\n"
+	"       movq 40(%1), %%mm5   ;\n"
+	"       pxor 24(%2), %%mm3   ;\n"
+	"       movq %%mm2, 16(%1)   ;\n"
+	"       pxor 32(%2), %%mm4   ;\n"
+	"       pxor 24(%3), %%mm3   ;\n"
+	"       pxor 40(%2), %%mm5   ;\n"
+	"       movq %%mm3, 24(%1)   ;\n"
+	"       pxor 32(%3), %%mm4   ;\n"
+	"       pxor 40(%3), %%mm5   ;\n"
+	"       movq 48(%1), %%mm6   ;\n"
+	"       movq %%mm4, 32(%1)   ;\n"
+	"       movq 56(%1), %%mm7   ;\n"
+	"       pxor 48(%2), %%mm6   ;\n"
+	"       movq %%mm5, 40(%1)   ;\n"
+	"       pxor 56(%2), %%mm7   ;\n"
+	"       pxor 48(%3), %%mm6   ;\n"
+	"       pxor 56(%3), %%mm7   ;\n"
+	"       movq %%mm6, 48(%1)   ;\n"
+	"       movq %%mm7, 56(%1)   ;\n"
+      
+	"       addl $64, %1         ;\n"
+	"       addl $64, %2         ;\n"
+	"       addl $64, %3         ;\n"
+	"       decl %0              ;\n"
+	"       jnz 1b               ;\n"
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2), "+r" (p3)
+	:
+	: "memory" );
+
+	FPU_RESTORE;
+}
+
+static void
+xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	     unsigned long *p3, unsigned long *p4)
+{
+	unsigned long lines = bytes >> 6;
+	char fpu_save[108];
+
+	FPU_SAVE;
+
+	__asm__ __volatile__ (
+	" .align 32,0x90             ;\n"
+	" 1:                         ;\n"
+	"       movq   (%1), %%mm0   ;\n"
+	"       movq  8(%1), %%mm1   ;\n"
+	"       pxor   (%2), %%mm0   ;\n"
+	"       movq 16(%1), %%mm2   ;\n"
+	"       pxor  8(%2), %%mm1   ;\n"
+	"       pxor   (%3), %%mm0   ;\n"
+	"       pxor 16(%2), %%mm2   ;\n"
+	"       pxor  8(%3), %%mm1   ;\n"
+	"       pxor   (%4), %%mm0   ;\n"
+	"       movq 24(%1), %%mm3   ;\n"
+	"       pxor 16(%3), %%mm2   ;\n"
+	"       pxor  8(%4), %%mm1   ;\n"
+	"       movq %%mm0,   (%1)   ;\n"
+	"       movq 32(%1), %%mm4   ;\n"
+	"       pxor 24(%2), %%mm3   ;\n"
+	"       pxor 16(%4), %%mm2   ;\n"
+	"       movq %%mm1,  8(%1)   ;\n"
+	"       movq 40(%1), %%mm5   ;\n"
+	"       pxor 32(%2), %%mm4   ;\n"
+	"       pxor 24(%3), %%mm3   ;\n"
+	"       movq %%mm2, 16(%1)   ;\n"
+	"       pxor 40(%2), %%mm5   ;\n"
+	"       pxor 32(%3), %%mm4   ;\n"
+	"       pxor 24(%4), %%mm3   ;\n"
+	"       movq %%mm3, 24(%1)   ;\n"
+	"       movq 56(%1), %%mm7   ;\n"
+	"       movq 48(%1), %%mm6   ;\n"
+	"       pxor 40(%3), %%mm5   ;\n"
+	"       pxor 32(%4), %%mm4   ;\n"
+	"       pxor 48(%2), %%mm6   ;\n"
+	"       movq %%mm4, 32(%1)   ;\n"
+	"       pxor 56(%2), %%mm7   ;\n"
+	"       pxor 40(%4), %%mm5   ;\n"
+	"       pxor 48(%3), %%mm6   ;\n"
+	"       pxor 56(%3), %%mm7   ;\n"
+	"       movq %%mm5, 40(%1)   ;\n"
+	"       pxor 48(%4), %%mm6   ;\n"
+	"       pxor 56(%4), %%mm7   ;\n"
+	"       movq %%mm6, 48(%1)   ;\n"
+	"       movq %%mm7, 56(%1)   ;\n"
+      
+	"       addl $64, %1         ;\n"
+	"       addl $64, %2         ;\n"
+	"       addl $64, %3         ;\n"
+	"       addl $64, %4         ;\n"
+	"       decl %0              ;\n"
+	"       jnz 1b               ;\n"
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
+	:
+	: "memory");
+
+	FPU_RESTORE;
+}
+
+static void
+xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	     unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+	unsigned long lines = bytes >> 6;
+	char fpu_save[108];
+
+	FPU_SAVE;
+
+	/* need to save p4/p5 manually to not exceed gcc's 10 argument limit */
+	__asm__ __volatile__ (
+	"	pushl %4\n"
+	"	pushl %5\n"        	
+	" .align 32,0x90             ;\n"
+	" 1:                         ;\n"
+	"       movq   (%1), %%mm0   ;\n"
+	"       movq  8(%1), %%mm1   ;\n"
+	"       pxor   (%2), %%mm0   ;\n"
+	"       pxor  8(%2), %%mm1   ;\n"
+	"       movq 16(%1), %%mm2   ;\n"
+	"       pxor   (%3), %%mm0   ;\n"
+	"       pxor  8(%3), %%mm1   ;\n"
+	"       pxor 16(%2), %%mm2   ;\n"
+	"       pxor   (%4), %%mm0   ;\n"
+	"       pxor  8(%4), %%mm1   ;\n"
+	"       pxor 16(%3), %%mm2   ;\n"
+	"       movq 24(%1), %%mm3   ;\n"
+	"       pxor   (%5), %%mm0   ;\n"
+	"       pxor  8(%5), %%mm1   ;\n"
+	"       movq %%mm0,   (%1)   ;\n"
+	"       pxor 16(%4), %%mm2   ;\n"
+	"       pxor 24(%2), %%mm3   ;\n"
+	"       movq %%mm1,  8(%1)   ;\n"
+	"       pxor 16(%5), %%mm2   ;\n"
+	"       pxor 24(%3), %%mm3   ;\n"
+	"       movq 32(%1), %%mm4   ;\n"
+	"       movq %%mm2, 16(%1)   ;\n"
+	"       pxor 24(%4), %%mm3   ;\n"
+	"       pxor 32(%2), %%mm4   ;\n"
+	"       movq 40(%1), %%mm5   ;\n"
+	"       pxor 24(%5), %%mm3   ;\n"
+	"       pxor 32(%3), %%mm4   ;\n"
+	"       pxor 40(%2), %%mm5   ;\n"
+	"       movq %%mm3, 24(%1)   ;\n"
+	"       pxor 32(%4), %%mm4   ;\n"
+	"       pxor 40(%3), %%mm5   ;\n"
+	"       movq 48(%1), %%mm6   ;\n"
+	"       movq 56(%1), %%mm7   ;\n"
+	"       pxor 32(%5), %%mm4   ;\n"
+	"       pxor 40(%4), %%mm5   ;\n"
+	"       pxor 48(%2), %%mm6   ;\n"
+	"       pxor 56(%2), %%mm7   ;\n"
+	"       movq %%mm4, 32(%1)   ;\n"
+	"       pxor 48(%3), %%mm6   ;\n"
+	"       pxor 56(%3), %%mm7   ;\n"
+	"       pxor 40(%5), %%mm5   ;\n"
+	"       pxor 48(%4), %%mm6   ;\n"
+	"       pxor 56(%4), %%mm7   ;\n"
+	"       movq %%mm5, 40(%1)   ;\n"
+	"       pxor 48(%5), %%mm6   ;\n"
+	"       pxor 56(%5), %%mm7   ;\n"
+	"       movq %%mm6, 48(%1)   ;\n"
+	"       movq %%mm7, 56(%1)   ;\n"
+      
+	"       addl $64, %1         ;\n"
+	"       addl $64, %2         ;\n"
+	"       addl $64, %3         ;\n"
+	"       addl $64, %4         ;\n"
+	"       addl $64, %5         ;\n"
+	"       decl %0              ;\n"
+	"       jnz 1b               ;\n"
+	"	popl %5\n"
+	"	popl %4\n"
+	: "+g" (lines),
+	  "+r" (p1), "+r" (p2), "+r" (p3)
+	: "r" (p4), "r" (p5)
+	: "memory");
+
+	FPU_RESTORE;
+}
+
+static struct xor_block_template xor_block_pII_mmx = {
+	name: "pII_mmx",
+	do_2: xor_pII_mmx_2,
+	do_3: xor_pII_mmx_3,
+	do_4: xor_pII_mmx_4,
+	do_5: xor_pII_mmx_5,
+};
+
+static struct xor_block_template xor_block_p5_mmx = {
+	name: "p5_mmx",
+	do_2: xor_p5_mmx_2,
+	do_3: xor_p5_mmx_3,
+	do_4: xor_p5_mmx_4,
+	do_5: xor_p5_mmx_5,
+};
+
+#undef FPU_SAVE
+#undef FPU_RESTORE
+
+/*
+ * Cache avoiding checksumming functions utilizing KNI instructions
+ * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
+ */
+
+#define XMMS_SAVE				\
+	if (!(current->flags & PF_USEDFPU))	\
+		clts();				\
+	__asm__ __volatile__ ( 			\
+		"movups %%xmm0,(%1)	;\n\t"	\
+		"movups %%xmm1,0x10(%1)	;\n\t"	\
+		"movups %%xmm2,0x20(%1)	;\n\t"	\
+		"movups %%xmm3,0x30(%1)	;\n\t"	\
+		: "=&r" (cr0)			\
+		: "r" (xmm_save) 		\
+		: "memory")
+
+#define XMMS_RESTORE				\
+	__asm__ __volatile__ ( 			\
+		"sfence			;\n\t"	\
+		"movups (%1),%%xmm0	;\n\t"	\
+		"movups 0x10(%1),%%xmm1	;\n\t"	\
+		"movups 0x20(%1),%%xmm2	;\n\t"	\
+		"movups 0x30(%1),%%xmm3	;\n\t"	\
+		:				\
+		: "r" (cr0), "r" (xmm_save)	\
+		: "memory");			\
+	if (!(current->flags & PF_USEDFPU))	\
+		stts()
+
+#define ALIGN16 __attribute__((aligned(16)))
+
+#define OFFS(x)		"16*("#x")"
+#define PF_OFFS(x)	"256+16*("#x")"
+#define	PF0(x)		"	prefetchnta "PF_OFFS(x)"(%1)		;\n"
+#define LD(x,y)		"       movaps   "OFFS(x)"(%1), %%xmm"#y"	;\n"
+#define ST(x,y)		"       movaps %%xmm"#y",   "OFFS(x)"(%1)	;\n"
+#define PF1(x)		"	prefetchnta "PF_OFFS(x)"(%2)		;\n"
+#define PF2(x)		"	prefetchnta "PF_OFFS(x)"(%3)		;\n"
+#define PF3(x)		"	prefetchnta "PF_OFFS(x)"(%4)		;\n"
+#define PF4(x)		"	prefetchnta "PF_OFFS(x)"(%5)		;\n"
+#define PF5(x)		"	prefetchnta "PF_OFFS(x)"(%6)		;\n"
+#define XO1(x,y)	"       xorps   "OFFS(x)"(%2), %%xmm"#y"	;\n"
+#define XO2(x,y)	"       xorps   "OFFS(x)"(%3), %%xmm"#y"	;\n"
+#define XO3(x,y)	"       xorps   "OFFS(x)"(%4), %%xmm"#y"	;\n"
+#define XO4(x,y)	"       xorps   "OFFS(x)"(%5), %%xmm"#y"	;\n"
+#define XO5(x,y)	"       xorps   "OFFS(x)"(%6), %%xmm"#y"	;\n"
+
+
+static void
+xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+        unsigned long lines = bytes >> 8;
+	char xmm_save[16*4] ALIGN16;
+	int cr0;
+
+	XMMS_SAVE;
+
+        __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+		LD(i,0)					\
+			LD(i+1,1)			\
+		PF1(i)					\
+				PF1(i+2)		\
+				LD(i+2,2)		\
+					LD(i+3,3)	\
+		PF0(i+4)				\
+				PF0(i+6)		\
+		XO1(i,0)				\
+			XO1(i+1,1)			\
+				XO1(i+2,2)		\
+					XO1(i+3,3)	\
+		ST(i,0)					\
+			ST(i+1,1)			\
+				ST(i+2,2)		\
+					ST(i+3,3)	\
+
+
+		PF0(0)
+				PF0(2)
+
+	" .align 32			;\n"
+        " 1:                            ;\n"
+
+		BLOCK(0)
+		BLOCK(4)
+		BLOCK(8)
+		BLOCK(12)
+
+        "       addl $256, %1           ;\n"
+        "       addl $256, %2           ;\n"
+        "       decl %0                 ;\n"
+        "       jnz 1b                  ;\n"
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2)
+	:
+        : "memory");
+
+	XMMS_RESTORE;
+}
+
+static void
+xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	  unsigned long *p3)
+{
+        unsigned long lines = bytes >> 8;
+	char xmm_save[16*4] ALIGN16;
+	int cr0;
+
+	XMMS_SAVE;
+
+        __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+		PF1(i)					\
+				PF1(i+2)		\
+		LD(i,0)					\
+			LD(i+1,1)			\
+				LD(i+2,2)		\
+					LD(i+3,3)	\
+		PF2(i)					\
+				PF2(i+2)		\
+		PF0(i+4)				\
+				PF0(i+6)		\
+		XO1(i,0)				\
+			XO1(i+1,1)			\
+				XO1(i+2,2)		\
+					XO1(i+3,3)	\
+		XO2(i,0)				\
+			XO2(i+1,1)			\
+				XO2(i+2,2)		\
+					XO2(i+3,3)	\
+		ST(i,0)					\
+			ST(i+1,1)			\
+				ST(i+2,2)		\
+					ST(i+3,3)	\
+
+
+		PF0(0)
+				PF0(2)
+
+	" .align 32			;\n"
+        " 1:                            ;\n"
+
+		BLOCK(0)
+		BLOCK(4)
+		BLOCK(8)
+		BLOCK(12)
+
+        "       addl $256, %1           ;\n"
+        "       addl $256, %2           ;\n"
+        "       addl $256, %3           ;\n"
+        "       decl %0                 ;\n"
+        "       jnz 1b                  ;\n"
+	: "+r" (lines),
+	  "+r" (p1), "+r"(p2), "+r"(p3)
+	:
+        : "memory" );
+
+	XMMS_RESTORE;
+}
+
+static void
+xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	  unsigned long *p3, unsigned long *p4)
+{
+        unsigned long lines = bytes >> 8;
+	char xmm_save[16*4] ALIGN16;
+	int cr0;
+
+	XMMS_SAVE;
+
+        __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+		PF1(i)					\
+				PF1(i+2)		\
+		LD(i,0)					\
+			LD(i+1,1)			\
+				LD(i+2,2)		\
+					LD(i+3,3)	\
+		PF2(i)					\
+				PF2(i+2)		\
+		XO1(i,0)				\
+			XO1(i+1,1)			\
+				XO1(i+2,2)		\
+					XO1(i+3,3)	\
+		PF3(i)					\
+				PF3(i+2)		\
+		PF0(i+4)				\
+				PF0(i+6)		\
+		XO2(i,0)				\
+			XO2(i+1,1)			\
+				XO2(i+2,2)		\
+					XO2(i+3,3)	\
+		XO3(i,0)				\
+			XO3(i+1,1)			\
+				XO3(i+2,2)		\
+					XO3(i+3,3)	\
+		ST(i,0)					\
+			ST(i+1,1)			\
+				ST(i+2,2)		\
+					ST(i+3,3)	\
+
+
+		PF0(0)
+				PF0(2)
+
+	" .align 32			;\n"
+        " 1:                            ;\n"
+
+		BLOCK(0)
+		BLOCK(4)
+		BLOCK(8)
+		BLOCK(12)
+
+        "       addl $256, %1           ;\n"
+        "       addl $256, %2           ;\n"
+        "       addl $256, %3           ;\n"
+        "       addl $256, %4           ;\n"
+        "       decl %0                 ;\n"
+        "       jnz 1b                  ;\n"
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
+	:
+        : "memory" );
+
+	XMMS_RESTORE;
+}
+
+static void
+xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	  unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+        unsigned long lines = bytes >> 8;
+	char xmm_save[16*4] ALIGN16;
+	int cr0;
+
+	XMMS_SAVE;
+
+	/* need to save p4/p5 manually to not exceed gcc's 10 argument limit */
+        __asm__ __volatile__ (
+		" pushl %4\n"
+		" pushl %5\n"
+#undef BLOCK
+#define BLOCK(i) \
+		PF1(i)					\
+				PF1(i+2)		\
+		LD(i,0)					\
+			LD(i+1,1)			\
+				LD(i+2,2)		\
+					LD(i+3,3)	\
+		PF2(i)					\
+				PF2(i+2)		\
+		XO1(i,0)				\
+			XO1(i+1,1)			\
+				XO1(i+2,2)		\
+					XO1(i+3,3)	\
+		PF3(i)					\
+				PF3(i+2)		\
+		XO2(i,0)				\
+			XO2(i+1,1)			\
+				XO2(i+2,2)		\
+					XO2(i+3,3)	\
+		PF4(i)					\
+				PF4(i+2)		\
+		PF0(i+4)				\
+				PF0(i+6)		\
+		XO3(i,0)				\
+			XO3(i+1,1)			\
+				XO3(i+2,2)		\
+					XO3(i+3,3)	\
+		XO4(i,0)				\
+			XO4(i+1,1)			\
+				XO4(i+2,2)		\
+					XO4(i+3,3)	\
+		ST(i,0)					\
+			ST(i+1,1)			\
+				ST(i+2,2)		\
+					ST(i+3,3)	\
+
+
+		PF0(0)
+				PF0(2)
+
+	" .align 32			;\n"
+        " 1:                            ;\n"
+
+		BLOCK(0)
+		BLOCK(4)
+		BLOCK(8)
+		BLOCK(12)
+
+        "       addl $256, %1           ;\n"
+        "       addl $256, %2           ;\n"
+        "       addl $256, %3           ;\n"
+        "       addl $256, %4           ;\n"
+        "       addl $256, %5           ;\n"
+        "       decl %0                 ;\n"
+        "       jnz 1b                  ;\n"
+	"	popl %5\n"	
+	"	popl %4\n"	
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2), "+r" (p3)
+	: "r" (p4), "r" (p5)
+	: "memory");
+
+	XMMS_RESTORE;
+}
+
+static struct xor_block_template xor_block_pIII_sse = {
+        name: "pIII_sse",
+        do_2: xor_sse_2,
+        do_3: xor_sse_3,
+        do_4: xor_sse_4,
+        do_5: xor_sse_5,
+};
+
+/* Also try the generic routines.  */
+#include <asm-generic/xor.h>
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES				\
+	do {						\
+		xor_speed(&xor_block_8regs);		\
+		xor_speed(&xor_block_32regs);		\
+	        if (cpu_has_xmm)			\
+			xor_speed(&xor_block_pIII_sse);	\
+	        if (md_cpu_has_mmx()) {			\
+	                xor_speed(&xor_block_pII_mmx);	\
+	                xor_speed(&xor_block_p5_mmx);	\
+	        }					\
+	} while (0)
+
+/* We force the use of the SSE xor block because it can write around L2.
+   We may also be able to load into the L1 only depending on how the cpu
+   deals with a load to a line that is being prefetched.  */
+#define XOR_SELECT_TEMPLATE(FASTEST) \
+	(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
diff --git a/linux-2.4-xen-sparse/include/linux/blk.h b/linux-2.4-xen-sparse/include/linux/blk.h
new file mode 100644
index 0000000000..d032dff7ba
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/linux/blk.h
@@ -0,0 +1,409 @@
+#ifndef _BLK_H
+#define _BLK_H
+
+#include <linux/blkdev.h>
+#include <linux/locks.h>
+#include <linux/config.h>
+#include <linux/spinlock.h>
+
+/*
+ * Spinlock for protecting the request queue which
+ * is mucked around with in interrupts on potentially
+ * multiple CPU's..
+ */
+extern spinlock_t io_request_lock;
+
+/*
+ * Initialization functions.
+ */
+extern int isp16_init(void);
+extern int cdu31a_init(void);
+extern int acsi_init(void);
+extern int mcd_init(void);
+extern int mcdx_init(void);
+extern int sbpcd_init(void);
+extern int aztcd_init(void);
+extern int sony535_init(void);
+extern int gscd_init(void);
+extern int cm206_init(void);
+extern int optcd_init(void);
+extern int sjcd_init(void);
+extern int cdi_init(void);
+extern int hd_init(void);
+extern int ide_init(void);
+extern int xd_init(void);
+extern int mfm_init(void);
+extern int loop_init(void);
+extern int md_init(void);
+extern int ap_init(void);
+extern int ddv_init(void);
+extern int z2_init(void);
+extern int swim3_init(void);
+extern int swimiop_init(void);
+extern int amiga_floppy_init(void);
+extern int atari_floppy_init(void);
+extern int ez_init(void);
+extern int bpcd_init(void);
+extern int ps2esdi_init(void);
+extern int jsfd_init(void);
+extern int viodasd_init(void);
+extern int viocd_init(void);
+
+#if defined(CONFIG_ARCH_S390)
+extern int dasd_init(void);
+extern int xpram_init(void);
+extern int tapeblock_init(void);
+#endif /* CONFIG_ARCH_S390 */
+
+#if defined(CONFIG_XEN)
+extern int xlblk_init(void);
+#endif /* CONFIG_XEN */
+
+extern void set_device_ro(kdev_t dev,int flag);
+void add_blkdev_randomness(int major);
+
+extern int floppy_init(void);
+extern int rd_doload;		/* 1 = load ramdisk, 0 = don't load */
+extern int rd_prompt;		/* 1 = prompt for ramdisk, 0 = don't prompt */
+extern int rd_image_start;	/* starting block # of image */
+
+#ifdef CONFIG_BLK_DEV_INITRD
+
+#define INITRD_MINOR 250 /* shouldn't collide with /dev/ram* too soon ... */
+
+extern unsigned long initrd_start,initrd_end;
+extern int initrd_below_start_ok; /* 1 if it is not an error if initrd_start < memory_start */
+void initrd_init(void);
+
+#endif
+
+		 
+/*
+ * end_request() and friends. Must be called with the request queue spinlock
+ * acquired. All functions called within end_request() _must_be_ atomic.
+ *
+ * Several drivers define their own end_request and call
+ * end_that_request_first() and end_that_request_last()
+ * for parts of the original function. This prevents
+ * code duplication in drivers.
+ */
+
+static inline void blkdev_dequeue_request(struct request * req)
+{
+	list_del(&req->queue);
+}
+
+int end_that_request_first(struct request *req, int uptodate, char *name);
+void end_that_request_last(struct request *req);
+
+#if defined(MAJOR_NR) || defined(IDE_DRIVER)
+
+#undef DEVICE_ON
+#undef DEVICE_OFF
+
+/*
+ * Add entries as needed.
+ */
+
+#ifdef IDE_DRIVER
+
+#define DEVICE_NR(device)	(MINOR(device) >> PARTN_BITS)
+#define DEVICE_NAME "ide"
+
+#elif (MAJOR_NR == RAMDISK_MAJOR)
+
+/* ram disk */
+#define DEVICE_NAME "ramdisk"
+#define DEVICE_NR(device) (MINOR(device))
+#define DEVICE_NO_RANDOM
+
+#elif (MAJOR_NR == Z2RAM_MAJOR)
+
+/* Zorro II Ram */
+#define DEVICE_NAME "Z2RAM"
+#define DEVICE_REQUEST do_z2_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == FLOPPY_MAJOR)
+
+static void floppy_off(unsigned int nr);
+
+#define DEVICE_NAME "floppy"
+#define DEVICE_INTR do_floppy
+#define DEVICE_REQUEST do_fd_request
+#define DEVICE_NR(device) ( (MINOR(device) & 3) | ((MINOR(device) & 0x80 ) >> 5 ))
+#define DEVICE_OFF(device) floppy_off(DEVICE_NR(device))
+
+#elif (MAJOR_NR == HD_MAJOR)
+
+/* Hard disk:  timeout is 6 seconds. */
+#define DEVICE_NAME "hard disk"
+#define DEVICE_INTR do_hd
+#define TIMEOUT_VALUE (6*HZ)
+#define DEVICE_REQUEST do_hd_request
+#define DEVICE_NR(device) (MINOR(device)>>6)
+
+#elif (SCSI_DISK_MAJOR(MAJOR_NR))
+
+#define DEVICE_NAME "scsidisk"
+#define TIMEOUT_VALUE (2*HZ)
+#define DEVICE_NR(device) (((MAJOR(device) & SD_MAJOR_MASK) << (8 - 4)) + (MINOR(device) >> 4))
+
+/* Kludge to use the same number for both char and block major numbers */
+#elif  (MAJOR_NR == MD_MAJOR) && defined(MD_DRIVER)
+
+#define DEVICE_NAME "Multiple devices driver"
+#define DEVICE_REQUEST do_md_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == SCSI_TAPE_MAJOR)
+
+#define DEVICE_NAME "scsitape"
+#define DEVICE_INTR do_st  
+#define DEVICE_NR(device) (MINOR(device) & 0x7f)
+
+#elif (MAJOR_NR == OSST_MAJOR)
+
+#define DEVICE_NAME "onstream" 
+#define DEVICE_INTR do_osst
+#define DEVICE_NR(device) (MINOR(device) & 0x7f) 
+#define DEVICE_ON(device) 
+#define DEVICE_OFF(device) 
+
+#elif (MAJOR_NR == SCSI_CDROM_MAJOR)
+
+#define DEVICE_NAME "CD-ROM"
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == XT_DISK_MAJOR)
+
+#define DEVICE_NAME "xt disk"
+#define DEVICE_REQUEST do_xd_request
+#define DEVICE_NR(device) (MINOR(device) >> 6)
+
+#elif (MAJOR_NR == PS2ESDI_MAJOR)
+
+#define DEVICE_NAME "PS/2 ESDI"
+#define DEVICE_REQUEST do_ps2esdi_request
+#define DEVICE_NR(device) (MINOR(device) >> 6)
+
+#elif (MAJOR_NR == CDU31A_CDROM_MAJOR)
+
+#define DEVICE_NAME "CDU31A"
+#define DEVICE_REQUEST do_cdu31a_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == ACSI_MAJOR) && (defined(CONFIG_ATARI_ACSI) || defined(CONFIG_ATARI_ACSI_MODULE))
+
+#define DEVICE_NAME "ACSI"
+#define DEVICE_INTR do_acsi
+#define DEVICE_REQUEST do_acsi_request
+#define DEVICE_NR(device) (MINOR(device) >> 4)
+
+#elif (MAJOR_NR == MITSUMI_CDROM_MAJOR)
+
+#define DEVICE_NAME "Mitsumi CD-ROM"
+/* #define DEVICE_INTR do_mcd */
+#define DEVICE_REQUEST do_mcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MITSUMI_X_CDROM_MAJOR)
+
+#define DEVICE_NAME "Mitsumi CD-ROM"
+/* #define DEVICE_INTR do_mcdx */
+#define DEVICE_REQUEST do_mcdx_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #1"
+#define DEVICE_REQUEST do_sbpcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM2_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #2"
+#define DEVICE_REQUEST do_sbpcd2_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM3_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #3"
+#define DEVICE_REQUEST do_sbpcd3_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM4_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #4"
+#define DEVICE_REQUEST do_sbpcd4_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == AZTECH_CDROM_MAJOR)
+
+#define DEVICE_NAME "Aztech CD-ROM"
+#define DEVICE_REQUEST do_aztcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == CDU535_CDROM_MAJOR)
+
+#define DEVICE_NAME "SONY-CDU535"
+#define DEVICE_INTR do_cdu535
+#define DEVICE_REQUEST do_cdu535_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == GOLDSTAR_CDROM_MAJOR)
+
+#define DEVICE_NAME "Goldstar R420"
+#define DEVICE_REQUEST do_gscd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == CM206_CDROM_MAJOR)
+#define DEVICE_NAME "Philips/LMS CD-ROM cm206"
+#define DEVICE_REQUEST do_cm206_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == OPTICS_CDROM_MAJOR)
+
+#define DEVICE_NAME "DOLPHIN 8000AT CD-ROM"
+#define DEVICE_REQUEST do_optcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == SANYO_CDROM_MAJOR)
+
+#define DEVICE_NAME "Sanyo H94A CD-ROM"
+#define DEVICE_REQUEST do_sjcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == APBLOCK_MAJOR)
+
+#define DEVICE_NAME "apblock"
+#define DEVICE_REQUEST ap_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == DDV_MAJOR)
+
+#define DEVICE_NAME "ddv"
+#define DEVICE_REQUEST ddv_request
+#define DEVICE_NR(device) (MINOR(device)>>PARTN_BITS)
+
+#elif (MAJOR_NR == MFM_ACORN_MAJOR)
+
+#define DEVICE_NAME "mfm disk"
+#define DEVICE_INTR do_mfm
+#define DEVICE_REQUEST do_mfm_request
+#define DEVICE_NR(device) (MINOR(device) >> 6)
+
+#elif (MAJOR_NR == NBD_MAJOR)
+
+#define DEVICE_NAME "nbd"
+#define DEVICE_REQUEST do_nbd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MDISK_MAJOR)
+
+#define DEVICE_NAME "mdisk"
+#define DEVICE_REQUEST mdisk_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == DASD_MAJOR)
+
+#define DEVICE_NAME "dasd"
+#define DEVICE_REQUEST do_dasd_request
+#define DEVICE_NR(device) (MINOR(device) >> PARTN_BITS)
+
+#elif (MAJOR_NR == I2O_MAJOR)
+
+#define DEVICE_NAME "I2O block"
+#define DEVICE_REQUEST i2ob_request
+#define DEVICE_NR(device) (MINOR(device)>>4)
+
+#elif (MAJOR_NR == COMPAQ_SMART2_MAJOR)
+
+#define DEVICE_NAME "ida"
+#define TIMEOUT_VALUE (25*HZ)
+#define DEVICE_REQUEST do_ida_request
+#define DEVICE_NR(device) (MINOR(device) >> 4)
+
+#endif /* MAJOR_NR == whatever */
+
+/* provide DEVICE_xxx defaults, if not explicitly defined
+ * above in the MAJOR_NR==xxx if-elif tree */
+#ifndef DEVICE_ON
+#define DEVICE_ON(device) do {} while (0)
+#endif
+#ifndef DEVICE_OFF
+#define DEVICE_OFF(device) do {} while (0)
+#endif
+
+#if (MAJOR_NR != SCSI_TAPE_MAJOR) && (MAJOR_NR != OSST_MAJOR)
+#if !defined(IDE_DRIVER)
+
+#ifndef CURRENT
+#define CURRENT blkdev_entry_next_request(&blk_dev[MAJOR_NR].request_queue.queue_head)
+#endif
+#ifndef QUEUE_EMPTY
+#define QUEUE_EMPTY list_empty(&blk_dev[MAJOR_NR].request_queue.queue_head)
+#endif
+
+#ifndef DEVICE_NAME
+#define DEVICE_NAME "unknown"
+#endif
+
+#define CURRENT_DEV DEVICE_NR(CURRENT->rq_dev)
+
+#ifdef DEVICE_INTR
+static void (*DEVICE_INTR)(void) = NULL;
+#endif
+
+#define SET_INTR(x) (DEVICE_INTR = (x))
+
+#ifdef DEVICE_REQUEST
+static void (DEVICE_REQUEST)(request_queue_t *);
+#endif 
+  
+#ifdef DEVICE_INTR
+#define CLEAR_INTR SET_INTR(NULL)
+#else
+#define CLEAR_INTR
+#endif
+
+#define INIT_REQUEST \
+	if (QUEUE_EMPTY) {\
+		CLEAR_INTR; \
+		return; \
+	} \
+	if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) \
+		panic(DEVICE_NAME ": request list destroyed"); \
+	if (CURRENT->bh) { \
+		if (!buffer_locked(CURRENT->bh)) \
+			panic(DEVICE_NAME ": block not locked"); \
+	}
+
+#endif /* !defined(IDE_DRIVER) */
+
+
+#ifndef LOCAL_END_REQUEST	/* If we have our own end_request, we do not want to include this mess */
+
+#if ! SCSI_BLK_MAJOR(MAJOR_NR) && (MAJOR_NR != COMPAQ_SMART2_MAJOR)
+
+static inline void end_request(int uptodate) {
+	struct request *req = CURRENT;
+
+	if (end_that_request_first(req, uptodate, DEVICE_NAME))
+		return;
+
+#ifndef DEVICE_NO_RANDOM
+	add_blkdev_randomness(MAJOR(req->rq_dev));
+#endif
+	DEVICE_OFF(req->rq_dev);
+	blkdev_dequeue_request(req);
+	end_that_request_last(req);
+}
+
+#endif /* ! SCSI_BLK_MAJOR(MAJOR_NR) */
+#endif /* LOCAL_END_REQUEST */
+
+#endif /* (MAJOR_NR != SCSI_TAPE_MAJOR) */
+#endif /* defined(MAJOR_NR) || defined(IDE_DRIVER) */
+
+#endif /* _BLK_H */
diff --git a/linux-2.4-xen-sparse/include/linux/highmem.h b/linux-2.4-xen-sparse/include/linux/highmem.h
new file mode 100644
index 0000000000..7dab94f5eb
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/linux/highmem.h
@@ -0,0 +1,137 @@
+#ifndef _LINUX_HIGHMEM_H
+#define _LINUX_HIGHMEM_H
+
+#include <linux/config.h>
+#include <asm/pgalloc.h>
+
+#ifdef CONFIG_HIGHMEM
+
+extern struct page *highmem_start_page;
+
+#include <asm/highmem.h>
+
+/* declarations for linux/mm/highmem.c */
+unsigned int nr_free_highpages(void);
+void kmap_flush_unused(void);
+
+extern struct buffer_head *create_bounce(int rw, struct buffer_head * bh_orig);
+
+static inline char *bh_kmap(struct buffer_head *bh)
+{
+	return kmap(bh->b_page) + bh_offset(bh);
+}
+
+static inline void bh_kunmap(struct buffer_head *bh)
+{
+	kunmap(bh->b_page);
+}
+
+/*
+ * remember to add offset! and never ever reenable interrupts between a
+ * bh_kmap_irq and bh_kunmap_irq!!
+ */
+static inline char *bh_kmap_irq(struct buffer_head *bh, unsigned long *flags)
+{
+	unsigned long addr;
+
+	__save_flags(*flags);
+
+	/*
+	 * could be low
+	 */
+	if (!PageHighMem(bh->b_page))
+		return bh->b_data;
+
+	/*
+	 * it's a highmem page
+	 */
+	__cli();
+	addr = (unsigned long) kmap_atomic(bh->b_page, KM_BH_IRQ);
+
+	if (addr & ~PAGE_MASK)
+		BUG();
+
+	return (char *) addr + bh_offset(bh);
+}
+
+static inline void bh_kunmap_irq(char *buffer, unsigned long *flags)
+{
+	unsigned long ptr = (unsigned long) buffer & PAGE_MASK;
+
+	kunmap_atomic((void *) ptr, KM_BH_IRQ);
+	__restore_flags(*flags);
+}
+
+#else /* CONFIG_HIGHMEM */
+
+static inline unsigned int nr_free_highpages(void) { return 0; }
+static inline void kmap_flush_unused(void) { }
+
+static inline void *kmap(struct page *page) { return page_address(page); }
+
+#define kunmap(page) do { } while (0)
+
+#define kmap_atomic(page,idx)		kmap(page)
+#define kunmap_atomic(page,idx)		kunmap(page)
+
+#define bh_kmap(bh)			((bh)->b_data)
+#define bh_kunmap(bh)			do { } while (0)
+#define kmap_nonblock(page)            kmap(page)
+#define bh_kmap_irq(bh, flags)		((bh)->b_data)
+#define bh_kunmap_irq(bh, flags)	do { *(flags) = 0; } while (0)
+
+#endif /* CONFIG_HIGHMEM */
+
+/* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */
+static inline void clear_user_highpage(struct page *page, unsigned long vaddr)
+{
+	void *addr = kmap_atomic(page, KM_USER0);
+	clear_user_page(addr, vaddr);
+	kunmap_atomic(addr, KM_USER0);
+}
+
+static inline void clear_highpage(struct page *page)
+{
+	clear_page(kmap(page));
+	kunmap(page);
+}
+
+/*
+ * Same but also flushes aliased cache contents to RAM.
+ */
+static inline void memclear_highpage_flush(struct page *page, unsigned int offset, unsigned int size)
+{
+	char *kaddr;
+
+	if (offset + size > PAGE_SIZE)
+		out_of_line_bug();
+	kaddr = kmap(page);
+	memset(kaddr + offset, 0, size);
+	flush_dcache_page(page);
+	flush_page_to_ram(page);
+	kunmap(page);
+}
+
+static inline void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr)
+{
+	char *vfrom, *vto;
+
+	vfrom = kmap_atomic(from, KM_USER0);
+	vto = kmap_atomic(to, KM_USER1);
+	copy_user_page(vto, vfrom, vaddr);
+	kunmap_atomic(vfrom, KM_USER0);
+	kunmap_atomic(vto, KM_USER1);
+}
+
+static inline void copy_highpage(struct page *to, struct page *from)
+{
+	char *vfrom, *vto;
+
+	vfrom = kmap_atomic(from, KM_USER0);
+	vto = kmap_atomic(to, KM_USER1);
+	copy_page(vto, vfrom);
+	kunmap_atomic(vfrom, KM_USER0);
+	kunmap_atomic(vto, KM_USER1);
+}
+
+#endif /* _LINUX_HIGHMEM_H */
diff --git a/linux-2.4-xen-sparse/include/linux/irq.h b/linux-2.4-xen-sparse/include/linux/irq.h
new file mode 100644
index 0000000000..7052672c2b
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/linux/irq.h
@@ -0,0 +1,80 @@
+#ifndef __irq_h
+#define __irq_h
+
+/*
+ * Please do not include this file in generic code.  There is currently
+ * no requirement for any architecture to implement anything held
+ * within this file.
+ *
+ * Thanks. --rmk
+ */
+
+#include <linux/config.h>
+
+#if !defined(CONFIG_ARCH_S390)
+
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+
+#include <asm/irq.h>
+#include <asm/ptrace.h>
+
+/*
+ * IRQ line status.
+ */
+#define IRQ_INPROGRESS	1	/* IRQ handler active - do not enter! */
+#define IRQ_DISABLED	2	/* IRQ disabled - do not enter! */
+#define IRQ_PENDING	4	/* IRQ pending - replay on enable */
+#define IRQ_REPLAY	8	/* IRQ has been replayed but not acked yet */
+#define IRQ_AUTODETECT	16	/* IRQ is being autodetected */
+#define IRQ_WAITING	32	/* IRQ not yet seen - for autodetection */
+#define IRQ_LEVEL	64	/* IRQ level triggered */
+#define IRQ_MASKED	128	/* IRQ masked - shouldn't be seen again */
+#define IRQ_PER_CPU	256	/* IRQ is per CPU */
+
+/*
+ * Interrupt controller descriptor. This is all we need
+ * to describe about the low-level hardware. 
+ */
+struct hw_interrupt_type {
+	const char * typename;
+	unsigned int (*startup)(unsigned int irq);
+	void (*shutdown)(unsigned int irq);
+	void (*enable)(unsigned int irq);
+	void (*disable)(unsigned int irq);
+	void (*ack)(unsigned int irq);
+	void (*end)(unsigned int irq);
+	void (*set_affinity)(unsigned int irq, unsigned long mask);
+};
+
+typedef struct hw_interrupt_type  hw_irq_controller;
+
+/*
+ * This is the "IRQ descriptor", which contains various information
+ * about the irq, including what kind of hardware handling it has,
+ * whether it is disabled etc etc.
+ *
+ * Pad this out to 32 bytes for cache and indexing reasons.
+ */
+typedef struct {
+	unsigned int status;		/* IRQ status */
+	hw_irq_controller *handler;
+	struct irqaction *action;	/* IRQ action list */
+	unsigned int depth;		/* nested irq disables */
+	spinlock_t lock;
+} ____cacheline_aligned irq_desc_t;
+
+extern irq_desc_t irq_desc [NR_IRQS];
+
+#include <asm/hw_irq.h> /* the arch dependent stuff */
+
+extern int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
+extern int setup_irq(unsigned int , struct irqaction * );
+extern int teardown_irq(unsigned int , struct irqaction * );
+
+extern hw_irq_controller no_irq_type;  /* needed in every arch ? */
+extern void no_action(int cpl, void *dev_id, struct pt_regs *regs);
+
+#endif
+
+#endif /* __irq_h */
diff --git a/linux-2.4-xen-sparse/include/linux/mm.h b/linux-2.4-xen-sparse/include/linux/mm.h
new file mode 100644
index 0000000000..10b81190c2
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/linux/mm.h
@@ -0,0 +1,727 @@
+#ifndef _LINUX_MM_H
+#define _LINUX_MM_H
+
+#include <linux/sched.h>
+#include <linux/errno.h>
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/mmzone.h>
+#include <linux/swap.h>
+#include <linux/rbtree.h>
+
+extern unsigned long max_mapnr;
+extern unsigned long num_physpages;
+extern unsigned long num_mappedpages;
+extern void * high_memory;
+extern int page_cluster;
+/* The inactive_clean lists are per zone. */
+extern struct list_head active_list;
+extern struct list_head inactive_list;
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/atomic.h>
+
+/*
+ * Linux kernel virtual memory manager primitives.
+ * The idea being to have a "virtual" mm in the same way
+ * we have a virtual fs - giving a cleaner interface to the
+ * mm details, and allowing different kinds of memory mappings
+ * (from shared memory to executable loading to arbitrary
+ * mmap() functions).
+ */
+
+/*
+ * This struct defines a memory VMM memory area. There is one of these
+ * per VM-area/task.  A VM area is any part of the process virtual memory
+ * space that has a special rule for the page-fault handlers (ie a shared
+ * library, the executable area etc).
+ */
+struct vm_area_struct {
+	struct mm_struct * vm_mm;	/* The address space we belong to. */
+	unsigned long vm_start;		/* Our start address within vm_mm. */
+	unsigned long vm_end;		/* The first byte after our end address
+					   within vm_mm. */
+
+	/* linked list of VM areas per task, sorted by address */
+	struct vm_area_struct *vm_next;
+
+	pgprot_t vm_page_prot;		/* Access permissions of this VMA. */
+	unsigned long vm_flags;		/* Flags, listed below. */
+
+	rb_node_t vm_rb;
+
+	/*
+	 * For areas with an address space and backing store,
+	 * one of the address_space->i_mmap{,shared} lists,
+	 * for shm areas, the list of attaches, otherwise unused.
+	 */
+	struct vm_area_struct *vm_next_share;
+	struct vm_area_struct **vm_pprev_share;
+
+	/* Function pointers to deal with this struct. */
+	struct vm_operations_struct * vm_ops;
+
+	/* Information about our backing store: */
+	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
+					   units, *not* PAGE_CACHE_SIZE */
+	struct file * vm_file;		/* File we map to (can be NULL). */
+	unsigned long vm_raend;		/* XXX: put full readahead info here. */
+	void * vm_private_data;		/* was vm_pte (shared mem) */
+};
+
+/*
+ * vm_flags..
+ */
+#define VM_READ		0x00000001	/* currently active flags */
+#define VM_WRITE	0x00000002
+#define VM_EXEC		0x00000004
+#define VM_SHARED	0x00000008
+
+#define VM_MAYREAD	0x00000010	/* limits for mprotect() etc */
+#define VM_MAYWRITE	0x00000020
+#define VM_MAYEXEC	0x00000040
+#define VM_MAYSHARE	0x00000080
+
+#define VM_GROWSDOWN	0x00000100	/* general info on the segment */
+#define VM_GROWSUP	0x00000200
+#define VM_SHM		0x00000400	/* shared memory area, don't swap out */
+#define VM_DENYWRITE	0x00000800	/* ETXTBSY on write attempts.. */
+
+#define VM_EXECUTABLE	0x00001000
+#define VM_LOCKED	0x00002000
+#define VM_IO           0x00004000	/* Memory mapped I/O or similar */
+
+					/* Used by sys_madvise() */
+#define VM_SEQ_READ	0x00008000	/* App will access data sequentially */
+#define VM_RAND_READ	0x00010000	/* App will not benefit from clustered reads */
+
+#define VM_DONTCOPY	0x00020000      /* Do not copy this vma on fork */
+#define VM_DONTEXPAND	0x00040000	/* Cannot expand with mremap() */
+#define VM_RESERVED	0x00080000	/* Don't unmap it from swap_out */
+
+#ifndef VM_STACK_FLAGS
+#define VM_STACK_FLAGS	0x00000177
+#endif
+
+#define VM_READHINTMASK			(VM_SEQ_READ | VM_RAND_READ)
+#define VM_ClearReadHint(v)		(v)->vm_flags &= ~VM_READHINTMASK
+#define VM_NormalReadHint(v)		(!((v)->vm_flags & VM_READHINTMASK))
+#define VM_SequentialReadHint(v)	((v)->vm_flags & VM_SEQ_READ)
+#define VM_RandomReadHint(v)		((v)->vm_flags & VM_RAND_READ)
+
+/* read ahead limits */
+extern int vm_min_readahead;
+extern int vm_max_readahead;
+
+/*
+ * mapping from the currently active vm_flags protection bits (the
+ * low four bits) to a page protection mask..
+ */
+extern pgprot_t protection_map[16];
+
+
+/*
+ * These are the virtual MM functions - opening of an area, closing and
+ * unmapping it (needed to keep files on disk up-to-date etc), pointer
+ * to the functions called when a no-page or a wp-page exception occurs. 
+ */
+struct vm_operations_struct {
+	void (*open)(struct vm_area_struct * area);
+	void (*close)(struct vm_area_struct * area);
+	struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int unused);
+};
+
+/*
+ * Each physical page in the system has a struct page associated with
+ * it to keep track of whatever it is we are using the page for at the
+ * moment. Note that we have no way to track which tasks are using
+ * a page.
+ *
+ * Try to keep the most commonly accessed fields in single cache lines
+ * here (16 bytes or greater).  This ordering should be particularly
+ * beneficial on 32-bit processors.
+ *
+ * The first line is data used in page cache lookup, the second line
+ * is used for linear searches (eg. clock algorithm scans). 
+ *
+ * TODO: make this structure smaller, it could be as small as 32 bytes.
+ */
+typedef struct page {
+	struct list_head list;		/* ->mapping has some page lists. */
+	struct address_space *mapping;	/* The inode (or ...) we belong to. */
+	unsigned long index;		/* Our offset within mapping. */
+	struct page *next_hash;		/* Next page sharing our hash bucket in
+					   the pagecache hash table. */
+	atomic_t count;			/* Usage count, see below. */
+	unsigned long flags;		/* atomic flags, some possibly
+					   updated asynchronously */
+	struct list_head lru;		/* Pageout list, eg. active_list;
+					   protected by pagemap_lru_lock !! */
+	struct page **pprev_hash;	/* Complement to *next_hash. */
+	struct buffer_head * buffers;	/* Buffer maps us to a disk block. */
+
+	/*
+	 * On machines where all RAM is mapped into kernel address space,
+	 * we can simply calculate the virtual address. On machines with
+	 * highmem some memory is mapped into kernel virtual memory
+	 * dynamically, so we need a place to store that address.
+	 * Note that this field could be 16 bits on x86 ... ;)
+	 *
+	 * Architectures with slow multiplication can define
+	 * WANT_PAGE_VIRTUAL in asm/page.h
+	 */
+#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL)
+	void *virtual;			/* Kernel virtual address (NULL if
+					   not kmapped, ie. highmem) */
+#endif /* CONFIG_HIGMEM || WANT_PAGE_VIRTUAL */
+} mem_map_t;
+
+/*
+ * Methods to modify the page usage count.
+ *
+ * What counts for a page usage:
+ * - cache mapping   (page->mapping)
+ * - disk mapping    (page->buffers)
+ * - page mapped in a task's page tables, each mapping
+ *   is counted separately
+ *
+ * Also, many kernel routines increase the page count before a critical
+ * routine so they can be sure the page doesn't go away from under them.
+ */
+#define get_page(p)		atomic_inc(&(p)->count)
+#define put_page(p)		__free_page(p)
+#define put_page_testzero(p) 	atomic_dec_and_test(&(p)->count)
+#define page_count(p)		atomic_read(&(p)->count)
+#define set_page_count(p,v) 	atomic_set(&(p)->count, v)
+
+static inline struct page *nth_page(struct page *page, int n)
+{
+	return page + n;
+}
+
+/*
+ * Various page->flags bits:
+ *
+ * PG_reserved is set for special pages, which can never be swapped
+ * out. Some of them might not even exist (eg empty_bad_page)...
+ *
+ * Multiple processes may "see" the same page. E.g. for untouched
+ * mappings of /dev/null, all processes see the same page full of
+ * zeroes, and text pages of executables and shared libraries have
+ * only one copy in memory, at most, normally.
+ *
+ * For the non-reserved pages, page->count denotes a reference count.
+ *   page->count == 0 means the page is free.
+ *   page->count == 1 means the page is used for exactly one purpose
+ *   (e.g. a private data page of one process).
+ *
+ * A page may be used for kmalloc() or anyone else who does a
+ * __get_free_page(). In this case the page->count is at least 1, and
+ * all other fields are unused but should be 0 or NULL. The
+ * management of this page is the responsibility of the one who uses
+ * it.
+ *
+ * The other pages (we may call them "process pages") are completely
+ * managed by the Linux memory manager: I/O, buffers, swapping etc.
+ * The following discussion applies only to them.
+ *
+ * A page may belong to an inode's memory mapping. In this case,
+ * page->mapping is the pointer to the inode, and page->index is the
+ * file offset of the page, in units of PAGE_CACHE_SIZE.
+ *
+ * A page may have buffers allocated to it. In this case,
+ * page->buffers is a circular list of these buffer heads. Else,
+ * page->buffers == NULL.
+ *
+ * For pages belonging to inodes, the page->count is the number of
+ * attaches, plus 1 if buffers are allocated to the page, plus one
+ * for the page cache itself.
+ *
+ * All pages belonging to an inode are in these doubly linked lists:
+ * mapping->clean_pages, mapping->dirty_pages and mapping->locked_pages;
+ * using the page->list list_head. These fields are also used for
+ * freelist managemet (when page->count==0).
+ *
+ * There is also a hash table mapping (mapping,index) to the page
+ * in memory if present. The lists for this hash table use the fields
+ * page->next_hash and page->pprev_hash.
+ *
+ * All process pages can do I/O:
+ * - inode pages may need to be read from disk,
+ * - inode pages which have been modified and are MAP_SHARED may need
+ *   to be written to disk,
+ * - private pages which have been modified may need to be swapped out
+ *   to swap space and (later) to be read back into memory.
+ * During disk I/O, PG_locked is used. This bit is set before I/O
+ * and reset when I/O completes. page_waitqueue(page) is a wait queue of all
+ * tasks waiting for the I/O on this page to complete.
+ * PG_uptodate tells whether the page's contents is valid.
+ * When a read completes, the page becomes uptodate, unless a disk I/O
+ * error happened.
+ *
+ * For choosing which pages to swap out, inode pages carry a
+ * PG_referenced bit, which is set any time the system accesses
+ * that page through the (mapping,index) hash table. This referenced
+ * bit, together with the referenced bit in the page tables, is used
+ * to manipulate page->age and move the page across the active,
+ * inactive_dirty and inactive_clean lists.
+ *
+ * Note that the referenced bit, the page->lru list_head and the
+ * active, inactive_dirty and inactive_clean lists are protected by
+ * the pagemap_lru_lock, and *NOT* by the usual PG_locked bit!
+ *
+ * PG_skip is used on sparc/sparc64 architectures to "skip" certain
+ * parts of the address space.
+ *
+ * PG_error is set to indicate that an I/O error occurred on this page.
+ *
+ * PG_arch_1 is an architecture specific page state bit.  The generic
+ * code guarantees that this bit is cleared for a page when it first
+ * is entered into the page cache.
+ *
+ * PG_highmem pages are not permanently mapped into the kernel virtual
+ * address space, they need to be kmapped separately for doing IO on
+ * the pages. The struct page (these bits with information) are always
+ * mapped into kernel address space...
+ */
+#define PG_locked		 0	/* Page is locked. Don't touch. */
+#define PG_error		 1
+#define PG_referenced		 2
+#define PG_uptodate		 3
+#define PG_dirty		 4
+#define PG_unused		 5
+#define PG_lru			 6
+#define PG_active		 7
+#define PG_slab			 8
+#define PG_skip			10
+#define PG_highmem		11
+#define PG_checked		12	/* kill me in 2.5.<early>. */
+#define PG_arch_1		13
+#define PG_reserved		14
+#define PG_launder		15	/* written out by VM pressure.. */
+#define PG_fs_1			16	/* Filesystem specific */
+#define PG_foreign		21	/* Page belongs to foreign allocator */
+
+#ifndef arch_set_page_uptodate
+#define arch_set_page_uptodate(page)
+#endif
+
+/* Make it prettier to test the above... */
+#define UnlockPage(page)	unlock_page(page)
+#define Page_Uptodate(page)	test_bit(PG_uptodate, &(page)->flags)
+#ifndef SetPageUptodate
+#define SetPageUptodate(page)	set_bit(PG_uptodate, &(page)->flags)
+#endif
+#define ClearPageUptodate(page)	clear_bit(PG_uptodate, &(page)->flags)
+#define PageDirty(page)		test_bit(PG_dirty, &(page)->flags)
+#define SetPageDirty(page)	set_bit(PG_dirty, &(page)->flags)
+#define ClearPageDirty(page)	clear_bit(PG_dirty, &(page)->flags)
+#define PageLocked(page)	test_bit(PG_locked, &(page)->flags)
+#define LockPage(page)		set_bit(PG_locked, &(page)->flags)
+#define TryLockPage(page)	test_and_set_bit(PG_locked, &(page)->flags)
+#define PageChecked(page)	test_bit(PG_checked, &(page)->flags)
+#define SetPageChecked(page)	set_bit(PG_checked, &(page)->flags)
+#define ClearPageChecked(page)	clear_bit(PG_checked, &(page)->flags)
+#define PageLaunder(page)	test_bit(PG_launder, &(page)->flags)
+#define SetPageLaunder(page)	set_bit(PG_launder, &(page)->flags)
+#define ClearPageLaunder(page)	clear_bit(PG_launder, &(page)->flags)
+#define ClearPageArch1(page)	clear_bit(PG_arch_1, &(page)->flags)
+
+/* A foreign page uses a custom destructor rather than the buddy allocator. */
+#ifdef CONFIG_FOREIGN_PAGES
+#define PageForeign(page)	test_bit(PG_foreign, &(page)->flags)
+#define SetPageForeign(page, dtor) do {		\
+	set_bit(PG_foreign, &(page)->flags);	\
+	(page)->mapping = (void *)dtor;		\
+} while (0)
+#define ClearPageForeign(page) do {		\
+	clear_bit(PG_foreign, &(page)->flags);	\
+	(page)->mapping = NULL;			\
+} while (0)
+#define PageForeignDestructor(page)	\
+	( (void (*) (struct page *)) (page)->mapping )
+#else
+#define PageForeign(page)	0
+#define PageForeignDestructor(page)	void
+#endif
+
+/*
+ * The zone field is never updated after free_area_init_core()
+ * sets it, so none of the operations on it need to be atomic.
+ */
+#define NODE_SHIFT 4
+#define ZONE_SHIFT (BITS_PER_LONG - 8)
+
+struct zone_struct;
+extern struct zone_struct *zone_table[];
+
+static inline zone_t *page_zone(struct page *page)
+{
+	return zone_table[page->flags >> ZONE_SHIFT];
+}
+
+static inline void set_page_zone(struct page *page, unsigned long zone_num)
+{
+	page->flags &= ~(~0UL << ZONE_SHIFT);
+	page->flags |= zone_num << ZONE_SHIFT;
+}
+
+/*
+ * In order to avoid #ifdefs within C code itself, we define
+ * set_page_address to a noop for non-highmem machines, where
+ * the field isn't useful.
+ * The same is true for page_address() in arch-dependent code.
+ */
+#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL)
+
+#define set_page_address(page, address)			\
+	do {						\
+		(page)->virtual = (address);		\
+	} while(0)
+
+#else /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */
+#define set_page_address(page, address)  do { } while(0)
+#endif /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */
+
+/*
+ * Permanent address of a page. Obviously must never be
+ * called on a highmem page.
+ */
+#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL)
+
+#define page_address(page) ((page)->virtual)
+
+#else /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */
+
+#define page_address(page)						\
+	__va( (((page) - page_zone(page)->zone_mem_map) << PAGE_SHIFT)	\
+			+ page_zone(page)->zone_start_paddr)
+
+#endif /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */
+
+extern void FASTCALL(set_page_dirty(struct page *));
+
+/*
+ * The first mb is necessary to safely close the critical section opened by the
+ * TryLockPage(), the second mb is necessary to enforce ordering between
+ * the clear_bit and the read of the waitqueue (to avoid SMP races with a
+ * parallel wait_on_page).
+ */
+#define PageError(page)		test_bit(PG_error, &(page)->flags)
+#define SetPageError(page)	set_bit(PG_error, &(page)->flags)
+#define ClearPageError(page)	clear_bit(PG_error, &(page)->flags)
+#define PageReferenced(page)	test_bit(PG_referenced, &(page)->flags)
+#define SetPageReferenced(page)	set_bit(PG_referenced, &(page)->flags)
+#define ClearPageReferenced(page)	clear_bit(PG_referenced, &(page)->flags)
+#define PageTestandClearReferenced(page)	test_and_clear_bit(PG_referenced, &(page)->flags)
+#define PageSlab(page)		test_bit(PG_slab, &(page)->flags)
+#define PageSetSlab(page)	set_bit(PG_slab, &(page)->flags)
+#define PageClearSlab(page)	clear_bit(PG_slab, &(page)->flags)
+#define PageReserved(page)	test_bit(PG_reserved, &(page)->flags)
+
+#define PageActive(page)	test_bit(PG_active, &(page)->flags)
+#define SetPageActive(page)	set_bit(PG_active, &(page)->flags)
+#define ClearPageActive(page)	clear_bit(PG_active, &(page)->flags)
+
+#define PageLRU(page)		test_bit(PG_lru, &(page)->flags)
+#define TestSetPageLRU(page)	test_and_set_bit(PG_lru, &(page)->flags)
+#define TestClearPageLRU(page)	test_and_clear_bit(PG_lru, &(page)->flags)
+
+#ifdef CONFIG_HIGHMEM
+#define PageHighMem(page)		test_bit(PG_highmem, &(page)->flags)
+#else
+#define PageHighMem(page)		0 /* needed to optimize away at compile time */
+#endif
+
+#define SetPageReserved(page)		set_bit(PG_reserved, &(page)->flags)
+#define ClearPageReserved(page)		clear_bit(PG_reserved, &(page)->flags)
+
+/*
+ * Error return values for the *_nopage functions
+ */
+#define NOPAGE_SIGBUS	(NULL)
+#define NOPAGE_OOM	((struct page *) (-1))
+
+/* The array of struct pages */
+extern mem_map_t * mem_map;
+
+/*
+ * There is only one page-allocator function, and two main namespaces to
+ * it. The alloc_page*() variants return 'struct page *' and as such
+ * can allocate highmem pages, the *get*page*() variants return
+ * virtual kernel addresses to the allocated page(s).
+ */
+extern struct page * FASTCALL(_alloc_pages(unsigned int gfp_mask, unsigned int order));
+extern struct page * FASTCALL(__alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist));
+extern struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order);
+
+static inline struct page * alloc_pages(unsigned int gfp_mask, unsigned int order)
+{
+	/*
+	 * Gets optimized away by the compiler.
+	 */
+	if (order >= MAX_ORDER)
+		return NULL;
+	return _alloc_pages(gfp_mask, order);
+}
+
+#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
+
+extern unsigned long FASTCALL(__get_free_pages(unsigned int gfp_mask, unsigned int order));
+extern unsigned long FASTCALL(get_zeroed_page(unsigned int gfp_mask));
+
+#define __get_free_page(gfp_mask) \
+		__get_free_pages((gfp_mask),0)
+
+#define __get_dma_pages(gfp_mask, order) \
+		__get_free_pages((gfp_mask) | GFP_DMA,(order))
+
+/*
+ * The old interface name will be removed in 2.5:
+ */
+#define get_free_page get_zeroed_page
+
+/*
+ * There is only one 'core' page-freeing function.
+ */
+extern void FASTCALL(__free_pages(struct page *page, unsigned int order));
+extern void FASTCALL(free_pages(unsigned long addr, unsigned int order));
+
+#define __free_page(page) __free_pages((page), 0)
+#define free_page(addr) free_pages((addr),0)
+
+extern void show_free_areas(void);
+extern void show_free_areas_node(pg_data_t *pgdat);
+
+extern void clear_page_tables(struct mm_struct *, unsigned long, int);
+
+extern int fail_writepage(struct page *);
+struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int unused);
+struct file *shmem_file_setup(char * name, loff_t size);
+extern void shmem_lock(struct file * file, int lock);
+extern int shmem_zero_setup(struct vm_area_struct *);
+
+extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size);
+extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma);
+extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot);
+extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot);
+
+extern int vmtruncate(struct inode * inode, loff_t offset);
+extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address));
+extern pte_t *FASTCALL(pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
+extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
+extern int make_pages_present(unsigned long addr, unsigned long end);
+extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
+extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len);
+extern int ptrace_writedata(struct task_struct *tsk, char * src, unsigned long dst, int len);
+extern int ptrace_attach(struct task_struct *tsk);
+extern int ptrace_detach(struct task_struct *, unsigned int);
+extern void ptrace_disable(struct task_struct *);
+extern int ptrace_check_attach(struct task_struct *task, int kill);
+
+int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
+		int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
+
+/*
+ * On a two-level page table, this ends up being trivial. Thus the
+ * inlining and the symmetry break with pte_alloc() that does all
+ * of this out-of-line.
+ */
+static inline pmd_t *pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+{
+	if (pgd_none(*pgd))
+		return __pmd_alloc(mm, pgd, address);
+	return pmd_offset(pgd, address);
+}
+
+extern int pgt_cache_water[2];
+extern int check_pgt_cache(void);
+
+extern void free_area_init(unsigned long * zones_size);
+extern void free_area_init_node(int nid, pg_data_t *pgdat, struct page *pmap,
+	unsigned long * zones_size, unsigned long zone_start_paddr, 
+	unsigned long *zholes_size);
+extern void mem_init(void);
+extern void show_mem(void);
+extern void si_meminfo(struct sysinfo * val);
+extern void swapin_readahead(swp_entry_t);
+
+extern struct address_space swapper_space;
+#define PageSwapCache(page) ((page)->mapping == &swapper_space)
+
+static inline int is_page_cache_freeable(struct page * page)
+{
+	return page_count(page) - !!page->buffers == 1;
+}
+
+extern int FASTCALL(can_share_swap_page(struct page *));
+extern int FASTCALL(remove_exclusive_swap_page(struct page *));
+
+extern void __free_pte(pte_t);
+
+/* mmap.c */
+extern void lock_vma_mappings(struct vm_area_struct *);
+extern void unlock_vma_mappings(struct vm_area_struct *);
+extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
+extern void __insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
+extern void build_mmap_rb(struct mm_struct *);
+extern void exit_mmap(struct mm_struct *);
+
+extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
+
+extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+	unsigned long len, unsigned long prot,
+	unsigned long flag, unsigned long pgoff);
+
+static inline unsigned long do_mmap(struct file *file, unsigned long addr,
+	unsigned long len, unsigned long prot,
+	unsigned long flag, unsigned long offset)
+{
+	unsigned long ret = -EINVAL;
+	if ((offset + PAGE_ALIGN(len)) < offset)
+		goto out;
+	if (!(offset & ~PAGE_MASK))
+		ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
+out:
+	return ret;
+}
+
+extern int do_munmap(struct mm_struct *, unsigned long, size_t);
+
+extern unsigned long do_brk(unsigned long, unsigned long);
+
+static inline void __vma_unlink(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev)
+{
+	prev->vm_next = vma->vm_next;
+	rb_erase(&vma->vm_rb, &mm->mm_rb);
+	if (mm->mmap_cache == vma)
+		mm->mmap_cache = prev;
+}
+
+static inline int can_vma_merge(struct vm_area_struct * vma, unsigned long vm_flags)
+{
+	if (!vma->vm_file && vma->vm_flags == vm_flags)
+		return 1;
+	else
+		return 0;
+}
+
+struct zone_t;
+/* filemap.c */
+extern void remove_inode_page(struct page *);
+extern unsigned long page_unuse(struct page *);
+extern void truncate_inode_pages(struct address_space *, loff_t);
+
+/* generic vm_area_ops exported for stackable file systems */
+extern int filemap_sync(struct vm_area_struct *, unsigned long,	size_t, unsigned int);
+extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int);
+
+/*
+ * GFP bitmasks..
+ */
+/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low four bits) */
+#define __GFP_DMA	0x01
+#define __GFP_HIGHMEM	0x02
+
+/* Action modifiers - doesn't change the zoning */
+#define __GFP_WAIT	0x10	/* Can wait and reschedule? */
+#define __GFP_HIGH	0x20	/* Should access emergency pools? */
+#define __GFP_IO	0x40	/* Can start low memory physical IO? */
+#define __GFP_HIGHIO	0x80	/* Can start high mem physical IO? */
+#define __GFP_FS	0x100	/* Can call down to low-level FS? */
+
+#define GFP_NOHIGHIO	(__GFP_HIGH | __GFP_WAIT | __GFP_IO)
+#define GFP_NOIO	(__GFP_HIGH | __GFP_WAIT)
+#define GFP_NOFS	(__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO)
+#define GFP_ATOMIC	(__GFP_HIGH)
+#define GFP_USER	(             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
+#define GFP_HIGHUSER	(             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS | __GFP_HIGHMEM)
+#define GFP_KERNEL	(__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
+#define GFP_NFS		(__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
+#define GFP_KSWAPD	(             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
+
+/* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
+   platforms, used as appropriate on others */
+
+#define GFP_DMA		__GFP_DMA
+
+static inline unsigned int pf_gfp_mask(unsigned int gfp_mask)
+{
+	/* avoid all memory balancing I/O methods if this task cannot block on I/O */
+	if (current->flags & PF_NOIO)
+		gfp_mask &= ~(__GFP_IO | __GFP_HIGHIO | __GFP_FS);
+
+	return gfp_mask;
+}
+	
+/* vma is the first one with  address < vma->vm_end,
+ * and even  address < vma->vm_start. Have to extend vma. */
+static inline int expand_stack(struct vm_area_struct * vma, unsigned long address)
+{
+	unsigned long grow;
+
+	/*
+	 * vma->vm_start/vm_end cannot change under us because the caller
+	 * is required to hold the mmap_sem in read mode.  We need the
+	 * page_table_lock lock to serialize against concurrent expand_stacks.
+	 */
+	address &= PAGE_MASK;
+ 	spin_lock(&vma->vm_mm->page_table_lock);
+
+	/* already expanded while we were spinning? */
+	if (vma->vm_start <= address) {
+		spin_unlock(&vma->vm_mm->page_table_lock);
+		return 0;
+	}
+
+	grow = (vma->vm_start - address) >> PAGE_SHIFT;
+	if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur ||
+	    ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur) {
+		spin_unlock(&vma->vm_mm->page_table_lock);
+		return -ENOMEM;
+	}
+
+	if ((vma->vm_flags & VM_LOCKED) &&
+      	    ((vma->vm_mm->locked_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_MEMLOCK].rlim_cur) {
+		spin_unlock(&vma->vm_mm->page_table_lock);
+		return -ENOMEM;
+	}
+
+
+	vma->vm_start = address;
+	vma->vm_pgoff -= grow;
+	vma->vm_mm->total_vm += grow;
+	if (vma->vm_flags & VM_LOCKED)
+		vma->vm_mm->locked_vm += grow;
+	spin_unlock(&vma->vm_mm->page_table_lock);
+	return 0;
+}
+
+/* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
+extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
+extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
+					     struct vm_area_struct **pprev);
+
+/* Look up the first VMA which intersects the interval start_addr..end_addr-1,
+   NULL if none.  Assume start_addr < end_addr. */
+static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)
+{
+	struct vm_area_struct * vma = find_vma(mm,start_addr);
+
+	if (vma && end_addr <= vma->vm_start)
+		vma = NULL;
+	return vma;
+}
+
+extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr);
+
+extern struct page * vmalloc_to_page(void *addr);
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/linux-2.4-xen-sparse/include/linux/sched.h b/linux-2.4-xen-sparse/include/linux/sched.h
new file mode 100644
index 0000000000..9e318d7ed6
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/linux/sched.h
@@ -0,0 +1,971 @@
+#ifndef _LINUX_SCHED_H
+#define _LINUX_SCHED_H
+
+#include <asm/param.h>	/* for HZ */
+
+extern unsigned long event;
+
+#include <linux/config.h>
+#include <linux/binfmts.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/times.h>
+#include <linux/timex.h>
+#include <linux/rbtree.h>
+
+#include <asm/system.h>
+#include <asm/semaphore.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/mmu.h>
+
+#include <linux/smp.h>
+#include <linux/tty.h>
+#include <linux/sem.h>
+#include <linux/signal.h>
+#include <linux/securebits.h>
+#include <linux/fs_struct.h>
+
+struct exec_domain;
+
+/*
+ * cloning flags:
+ */
+#define CSIGNAL		0x000000ff	/* signal mask to be sent at exit */
+#define CLONE_VM	0x00000100	/* set if VM shared between processes */
+#define CLONE_FS	0x00000200	/* set if fs info shared between processes */
+#define CLONE_FILES	0x00000400	/* set if open files shared between processes */
+#define CLONE_SIGHAND	0x00000800	/* set if signal handlers and blocked signals shared */
+#define CLONE_PID	0x00001000	/* set if pid shared */
+#define CLONE_PTRACE	0x00002000	/* set if we want to let tracing continue on the child too */
+#define CLONE_VFORK	0x00004000	/* set if the parent wants the child to wake it up on mm_release */
+#define CLONE_PARENT	0x00008000	/* set if we want to have the same parent as the cloner */
+#define CLONE_THREAD	0x00010000	/* Same thread group? */
+#define CLONE_NEWNS	0x00020000	/* New namespace group? */
+
+#define CLONE_SIGNAL	(CLONE_SIGHAND | CLONE_THREAD)
+
+/*
+ * These are the constant used to fake the fixed-point load-average
+ * counting. Some notes:
+ *  - 11 bit fractions expand to 22 bits by the multiplies: this gives
+ *    a load-average precision of 10 bits integer + 11 bits fractional
+ *  - if you want to count load-averages more often, you need more
+ *    precision, or rounding will get you. With 2-second counting freq,
+ *    the EXP_n values would be 1981, 2034 and 2043 if still using only
+ *    11 bit fractions.
+ */
+extern unsigned long avenrun[];		/* Load averages */
+
+#define FSHIFT		11		/* nr of bits of precision */
+#define FIXED_1		(1<<FSHIFT)	/* 1.0 as fixed-point */
+#define LOAD_FREQ	(5*HZ)		/* 5 sec intervals */
+#define EXP_1		1884		/* 1/exp(5sec/1min) as fixed-point */
+#define EXP_5		2014		/* 1/exp(5sec/5min) */
+#define EXP_15		2037		/* 1/exp(5sec/15min) */
+
+#define CALC_LOAD(load,exp,n) \
+	load *= exp; \
+	load += n*(FIXED_1-exp); \
+	load >>= FSHIFT;
+
+#define CT_TO_SECS(x)	((x) / HZ)
+#define CT_TO_USECS(x)	(((x) % HZ) * 1000000/HZ)
+
+extern int nr_running, nr_threads;
+extern int last_pid;
+
+#include <linux/fs.h>
+#include <linux/time.h>
+#include <linux/param.h>
+#include <linux/resource.h>
+#ifdef __KERNEL__
+#include <linux/timer.h>
+#endif
+
+#include <asm/processor.h>
+
+#define TASK_RUNNING		0
+#define TASK_INTERRUPTIBLE	1
+#define TASK_UNINTERRUPTIBLE	2
+#define TASK_ZOMBIE		4
+#define TASK_STOPPED		8
+
+#define __set_task_state(tsk, state_value)		\
+	do { (tsk)->state = (state_value); } while (0)
+#define set_task_state(tsk, state_value)		\
+	set_mb((tsk)->state, (state_value))
+
+#define __set_current_state(state_value)			\
+	do { current->state = (state_value); } while (0)
+#define set_current_state(state_value)		\
+	set_mb(current->state, (state_value))
+
+/*
+ * Scheduling policies
+ */
+#define SCHED_OTHER		0
+#define SCHED_FIFO		1
+#define SCHED_RR		2
+
+/*
+ * This is an additional bit set when we want to
+ * yield the CPU for one re-schedule..
+ */
+#define SCHED_YIELD		0x10
+
+struct sched_param {
+	int sched_priority;
+};
+
+struct completion;
+
+#ifdef __KERNEL__
+
+#include <linux/spinlock.h>
+
+/*
+ * This serializes "schedule()" and also protects
+ * the run-queue from deletions/modifications (but
+ * _adding_ to the beginning of the run-queue has
+ * a separate lock).
+ */
+extern rwlock_t tasklist_lock;
+extern spinlock_t runqueue_lock;
+extern spinlock_t mmlist_lock;
+
+extern void sched_init(void);
+extern void init_idle(void);
+extern void show_state(void);
+extern void cpu_init (void);
+extern void trap_init(void);
+extern void update_process_times(int user);
+#ifdef CONFIG_NO_IDLE_HZ
+extern void update_process_times_us(int user, int system);
+#endif
+extern void update_one_process(struct task_struct *p, unsigned long user,
+			       unsigned long system, int cpu);
+
+#define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
+extern signed long FASTCALL(schedule_timeout(signed long timeout));
+asmlinkage void schedule(void);
+
+extern int schedule_task(struct tq_struct *task);
+extern void flush_scheduled_tasks(void);
+extern int start_context_thread(void);
+extern int current_is_keventd(void);
+
+#if CONFIG_SMP
+extern void set_cpus_allowed(struct task_struct *p, unsigned long new_mask);
+#else
+# define set_cpus_allowed(p, new_mask) do { } while (0)
+#endif
+
+/*
+ * The default fd array needs to be at least BITS_PER_LONG,
+ * as this is the granularity returned by copy_fdset().
+ */
+#define NR_OPEN_DEFAULT BITS_PER_LONG
+
+struct namespace;
+/*
+ * Open file table structure
+ */
+struct files_struct {
+	atomic_t count;
+	rwlock_t file_lock;	/* Protects all the below members.  Nests inside tsk->alloc_lock */
+	int max_fds;
+	int max_fdset;
+	int next_fd;
+	struct file ** fd;	/* current fd array */
+	fd_set *close_on_exec;
+	fd_set *open_fds;
+	fd_set close_on_exec_init;
+	fd_set open_fds_init;
+	struct file * fd_array[NR_OPEN_DEFAULT];
+};
+
+#define INIT_FILES \
+{ 							\
+	count:		ATOMIC_INIT(1), 		\
+	file_lock:	RW_LOCK_UNLOCKED, 		\
+	max_fds:	NR_OPEN_DEFAULT, 		\
+	max_fdset:	__FD_SETSIZE, 			\
+	next_fd:	0, 				\
+	fd:		&init_files.fd_array[0], 	\
+	close_on_exec:	&init_files.close_on_exec_init, \
+	open_fds:	&init_files.open_fds_init, 	\
+	close_on_exec_init: { { 0, } }, 		\
+	open_fds_init:	{ { 0, } }, 			\
+	fd_array:	{ NULL, } 			\
+}
+
+/* Maximum number of active map areas.. This is a random (large) number */
+#define DEFAULT_MAX_MAP_COUNT	(65536)
+
+extern int max_map_count;
+
+struct mm_struct {
+	struct vm_area_struct * mmap;		/* list of VMAs */
+	rb_root_t mm_rb;
+	struct vm_area_struct * mmap_cache;	/* last find_vma result */
+	pgd_t * pgd;
+	atomic_t mm_users;			/* How many users with user space? */
+	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
+	int map_count;				/* number of VMAs */
+	struct rw_semaphore mmap_sem;
+	spinlock_t page_table_lock;		/* Protects task page tables and mm->rss */
+
+	struct list_head mmlist;		/* List of all active mm's.  These are globally strung
+						 * together off init_mm.mmlist, and are protected
+						 * by mmlist_lock
+						 */
+
+	unsigned long start_code, end_code, start_data, end_data;
+	unsigned long start_brk, brk, start_stack;
+	unsigned long arg_start, arg_end, env_start, env_end;
+	unsigned long rss, total_vm, locked_vm;
+	unsigned long def_flags;
+	unsigned long cpu_vm_mask;
+	unsigned long swap_address;
+
+	unsigned dumpable:1;
+
+	/* Architecture-specific MM context */
+	mm_context_t context;
+};
+
+extern int mmlist_nr;
+
+#define INIT_MM(name) \
+{			 				\
+	mm_rb:		RB_ROOT,			\
+	pgd:		swapper_pg_dir, 		\
+	mm_users:	ATOMIC_INIT(2), 		\
+	mm_count:	ATOMIC_INIT(1), 		\
+	mmap_sem:	__RWSEM_INITIALIZER(name.mmap_sem), \
+	page_table_lock: SPIN_LOCK_UNLOCKED, 		\
+	mmlist:		LIST_HEAD_INIT(name.mmlist),	\
+}
+
+struct signal_struct {
+	atomic_t		count;
+	struct k_sigaction	action[_NSIG];
+	spinlock_t		siglock;
+};
+
+
+#define INIT_SIGNALS {	\
+	count:		ATOMIC_INIT(1), 		\
+	action:		{ {{0,}}, }, 			\
+	siglock:	SPIN_LOCK_UNLOCKED 		\
+}
+
+/*
+ * Some day this will be a full-fledged user tracking system..
+ */
+struct user_struct {
+	atomic_t __count;	/* reference count */
+	atomic_t processes;	/* How many processes does this user have? */
+	atomic_t files;		/* How many open files does this user have? */
+
+	/* Hash table maintenance information */
+	struct user_struct *next, **pprev;
+	uid_t uid;
+};
+
+#define get_current_user() ({ 				\
+	struct user_struct *__tmp_user = current->user;	\
+	atomic_inc(&__tmp_user->__count);		\
+	__tmp_user; })
+
+extern struct user_struct root_user;
+#define INIT_USER (&root_user)
+
+struct task_struct {
+	/*
+	 * offsets of these are hardcoded elsewhere - touch with care
+	 */
+	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
+	unsigned long flags;	/* per process flags, defined below */
+	int sigpending;
+	mm_segment_t addr_limit;	/* thread address space:
+					 	0-0xBFFFFFFF for user-thead
+						0-0xFFFFFFFF for kernel-thread
+					 */
+	struct exec_domain *exec_domain;
+	volatile long need_resched;
+	unsigned long ptrace;
+
+	int lock_depth;		/* Lock depth */
+
+/*
+ * offset 32 begins here on 32-bit platforms. We keep
+ * all fields in a single cacheline that are needed for
+ * the goodness() loop in schedule().
+ */
+	long counter;
+	long nice;
+	unsigned long policy;
+	struct mm_struct *mm;
+	int processor;
+	/*
+	 * cpus_runnable is ~0 if the process is not running on any
+	 * CPU. It's (1 << cpu) if it's running on a CPU. This mask
+	 * is updated under the runqueue lock.
+	 *
+	 * To determine whether a process might run on a CPU, this
+	 * mask is AND-ed with cpus_allowed.
+	 */
+	unsigned long cpus_runnable, cpus_allowed;
+	/*
+	 * (only the 'next' pointer fits into the cacheline, but
+	 * that's just fine.)
+	 */
+	struct list_head run_list;
+	unsigned long sleep_time;
+
+	struct task_struct *next_task, *prev_task;
+	struct mm_struct *active_mm;
+	struct list_head local_pages;
+	unsigned int allocation_order, nr_local_pages;
+
+/* task state */
+	struct linux_binfmt *binfmt;
+	int exit_code, exit_signal;
+	int pdeath_signal;  /*  The signal sent when the parent dies  */
+	/* ??? */
+	unsigned long personality;
+	int did_exec:1;
+	unsigned task_dumpable:1;
+	pid_t pid;
+	pid_t pgrp;
+	pid_t tty_old_pgrp;
+	pid_t session;
+	pid_t tgid;
+	/* boolean value for session group leader */
+	int leader;
+	/* 
+	 * pointers to (original) parent process, youngest child, younger sibling,
+	 * older sibling, respectively.  (p->father can be replaced with 
+	 * p->p_pptr->pid)
+	 */
+	struct task_struct *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr;
+	struct list_head thread_group;
+
+	/* PID hash table linkage. */
+	struct task_struct *pidhash_next;
+	struct task_struct **pidhash_pprev;
+
+	wait_queue_head_t wait_chldexit;	/* for wait4() */
+	struct completion *vfork_done;		/* for vfork() */
+	unsigned long rt_priority;
+	unsigned long it_real_value, it_prof_value, it_virt_value;
+	unsigned long it_real_incr, it_prof_incr, it_virt_incr;
+	struct timer_list real_timer;
+	struct tms times;
+	unsigned long start_time;
+	long per_cpu_utime[NR_CPUS], per_cpu_stime[NR_CPUS];
+/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
+	unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
+	int swappable:1;
+/* process credentials */
+	uid_t uid,euid,suid,fsuid;
+	gid_t gid,egid,sgid,fsgid;
+	int ngroups;
+	gid_t	groups[NGROUPS];
+	kernel_cap_t   cap_effective, cap_inheritable, cap_permitted;
+	int keep_capabilities:1;
+	struct user_struct *user;
+/* limits */
+	struct rlimit rlim[RLIM_NLIMITS];
+	unsigned short used_math;
+	char comm[16];
+/* file system info */
+	int link_count, total_link_count;
+	struct tty_struct *tty; /* NULL if no tty */
+	unsigned int locks; /* How many file locks are being held */
+/* ipc stuff */
+	struct sem_undo *semundo;
+	struct sem_queue *semsleeping;
+/* CPU-specific state of this task */
+	struct thread_struct thread;
+/* filesystem information */
+	struct fs_struct *fs;
+/* open file information */
+	struct files_struct *files;
+/* namespace */
+	struct namespace *namespace;
+/* signal handlers */
+	spinlock_t sigmask_lock;	/* Protects signal and blocked */
+	struct signal_struct *sig;
+
+	sigset_t blocked;
+	struct sigpending pending;
+
+	unsigned long sas_ss_sp;
+	size_t sas_ss_size;
+	int (*notifier)(void *priv);
+	void *notifier_data;
+	sigset_t *notifier_mask;
+	
+/* Thread group tracking */
+   	u32 parent_exec_id;
+   	u32 self_exec_id;
+/* Protection of (de-)allocation: mm, files, fs, tty */
+	spinlock_t alloc_lock;
+
+/* journalling filesystem info */
+	void *journal_info;
+};
+
+/*
+ * Per process flags
+ */
+#define PF_ALIGNWARN	0x00000001	/* Print alignment warning msgs */
+					/* Not implemented yet, only for 486*/
+#define PF_STARTING	0x00000002	/* being created */
+#define PF_EXITING	0x00000004	/* getting shut down */
+#define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
+#define PF_SUPERPRIV	0x00000100	/* used super-user privileges */
+#define PF_DUMPCORE	0x00000200	/* dumped core */
+#define PF_SIGNALED	0x00000400	/* killed by a signal */
+#define PF_MEMALLOC	0x00000800	/* Allocating memory */
+#define PF_MEMDIE      0x00001000       /* Killed for out-of-memory */
+#define PF_FREE_PAGES	0x00002000	/* per process page freeing */
+#define PF_NOIO		0x00004000	/* avoid generating further I/O */
+#define PF_FSTRANS	0x00008000	/* inside a filesystem transaction */
+
+#define PF_USEDFPU	0x00100000	/* task used FPU this quantum (SMP) */
+
+/*
+ * Ptrace flags
+ */
+
+#define PT_PTRACED	0x00000001
+#define PT_TRACESYS	0x00000002
+#define PT_DTRACE	0x00000004	/* delayed trace (used on m68k, i386) */
+#define PT_TRACESYSGOOD	0x00000008
+#define PT_PTRACE_CAP	0x00000010	/* ptracer can follow suid-exec */
+
+#define is_dumpable(tsk)    ((tsk)->task_dumpable && (tsk)->mm && (tsk)->mm->dumpable)
+
+/*
+ * Limit the stack by to some sane default: root can always
+ * increase this limit if needed..  8MB seems reasonable.
+ */
+#define _STK_LIM	(8*1024*1024)
+
+#define DEF_COUNTER	(10*HZ/100)	/* 100 ms time slice */
+#define MAX_COUNTER	(20*HZ/100)
+#define DEF_NICE	(0)
+
+extern void yield(void);
+
+/*
+ * The default (Linux) execution domain.
+ */
+extern struct exec_domain	default_exec_domain;
+
+/*
+ *  INIT_TASK is used to set up the first task table, touch at
+ * your own risk!. Base=0, limit=0x1fffff (=2MB)
+ */
+#define INIT_TASK(tsk)	\
+{									\
+    state:		0,						\
+    flags:		0,						\
+    sigpending:		0,						\
+    addr_limit:		KERNEL_DS,					\
+    exec_domain:	&default_exec_domain,				\
+    lock_depth:		-1,						\
+    counter:		DEF_COUNTER,					\
+    nice:		DEF_NICE,					\
+    policy:		SCHED_OTHER,					\
+    mm:			NULL,						\
+    active_mm:		&init_mm,					\
+    cpus_runnable:	~0UL,						\
+    cpus_allowed:	~0UL,						\
+    run_list:		LIST_HEAD_INIT(tsk.run_list),			\
+    next_task:		&tsk,						\
+    prev_task:		&tsk,						\
+    p_opptr:		&tsk,						\
+    p_pptr:		&tsk,						\
+    thread_group:	LIST_HEAD_INIT(tsk.thread_group),		\
+    wait_chldexit:	__WAIT_QUEUE_HEAD_INITIALIZER(tsk.wait_chldexit),\
+    real_timer:		{						\
+	function:		it_real_fn				\
+    },									\
+    cap_effective:	CAP_INIT_EFF_SET,				\
+    cap_inheritable:	CAP_INIT_INH_SET,				\
+    cap_permitted:	CAP_FULL_SET,					\
+    keep_capabilities:	0,						\
+    rlim:		INIT_RLIMITS,					\
+    user:		INIT_USER,					\
+    comm:		"swapper",					\
+    thread:		INIT_THREAD,					\
+    fs:			&init_fs,					\
+    files:		&init_files,					\
+    sigmask_lock:	SPIN_LOCK_UNLOCKED,				\
+    sig:		&init_signals,					\
+    pending:		{ NULL, &tsk.pending.head, {{0}}},		\
+    blocked:		{{0}},						\
+    alloc_lock:		SPIN_LOCK_UNLOCKED,				\
+    journal_info:	NULL,						\
+}
+
+
+#ifndef INIT_TASK_SIZE
+# define INIT_TASK_SIZE	2048*sizeof(long)
+#endif
+
+union task_union {
+	struct task_struct task;
+	unsigned long stack[INIT_TASK_SIZE/sizeof(long)];
+};
+
+extern union task_union init_task_union;
+
+extern struct   mm_struct init_mm;
+extern struct task_struct *init_tasks[NR_CPUS];
+
+/* PID hashing. (shouldnt this be dynamic?) */
+#define PIDHASH_SZ (4096 >> 2)
+extern struct task_struct *pidhash[PIDHASH_SZ];
+
+#define pid_hashfn(x)	((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1))
+
+static inline void hash_pid(struct task_struct *p)
+{
+	struct task_struct **htable = &pidhash[pid_hashfn(p->pid)];
+
+	if((p->pidhash_next = *htable) != NULL)
+		(*htable)->pidhash_pprev = &p->pidhash_next;
+	*htable = p;
+	p->pidhash_pprev = htable;
+}
+
+static inline void unhash_pid(struct task_struct *p)
+{
+	if(p->pidhash_next)
+		p->pidhash_next->pidhash_pprev = p->pidhash_pprev;
+	*p->pidhash_pprev = p->pidhash_next;
+}
+
+static inline struct task_struct *find_task_by_pid(int pid)
+{
+	struct task_struct *p, **htable = &pidhash[pid_hashfn(pid)];
+
+	for(p = *htable; p && p->pid != pid; p = p->pidhash_next)
+		;
+
+	return p;
+}
+
+#define task_has_cpu(tsk) ((tsk)->cpus_runnable != ~0UL)
+
+static inline void task_set_cpu(struct task_struct *tsk, unsigned int cpu)
+{
+	tsk->processor = cpu;
+	tsk->cpus_runnable = 1UL << cpu;
+}
+
+static inline void task_release_cpu(struct task_struct *tsk)
+{
+	tsk->cpus_runnable = ~0UL;
+}
+
+/* per-UID process charging. */
+extern struct user_struct * alloc_uid(uid_t);
+extern void free_uid(struct user_struct *);
+extern void switch_uid(struct user_struct *);
+
+#include <asm/current.h>
+
+extern unsigned long volatile jiffies;
+extern unsigned long itimer_ticks;
+extern unsigned long itimer_next;
+extern struct timeval xtime;
+extern void do_timer(struct pt_regs *);
+#ifdef CONFIG_NO_IDLE_HZ
+extern void do_timer_ticks(int ticks);
+#endif
+
+extern unsigned int * prof_buffer;
+extern unsigned long prof_len;
+extern unsigned long prof_shift;
+
+#define CURRENT_TIME (xtime.tv_sec)
+
+extern void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr));
+extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr));
+extern void FASTCALL(sleep_on(wait_queue_head_t *q));
+extern long FASTCALL(sleep_on_timeout(wait_queue_head_t *q,
+				      signed long timeout));
+extern void FASTCALL(interruptible_sleep_on(wait_queue_head_t *q));
+extern long FASTCALL(interruptible_sleep_on_timeout(wait_queue_head_t *q,
+						    signed long timeout));
+extern int FASTCALL(wake_up_process(struct task_struct * tsk));
+
+#define wake_up(x)			__wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
+#define wake_up_nr(x, nr)		__wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
+#define wake_up_all(x)			__wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0)
+#define wake_up_sync(x)			__wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
+#define wake_up_sync_nr(x, nr)		__wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
+#define wake_up_interruptible(x)	__wake_up((x),TASK_INTERRUPTIBLE, 1)
+#define wake_up_interruptible_nr(x, nr)	__wake_up((x),TASK_INTERRUPTIBLE, nr)
+#define wake_up_interruptible_all(x)	__wake_up((x),TASK_INTERRUPTIBLE, 0)
+#define wake_up_interruptible_sync(x)	__wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
+#define wake_up_interruptible_sync_nr(x, nr) __wake_up_sync((x),TASK_INTERRUPTIBLE,  nr)
+asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru);
+
+extern int in_group_p(gid_t);
+extern int in_egroup_p(gid_t);
+
+extern void proc_caches_init(void);
+extern void flush_signals(struct task_struct *);
+extern void flush_signal_handlers(struct task_struct *);
+extern void sig_exit(int, int, struct siginfo *);
+extern int dequeue_signal(sigset_t *, siginfo_t *);
+extern void block_all_signals(int (*notifier)(void *priv), void *priv,
+			      sigset_t *mask);
+extern void unblock_all_signals(void);
+extern int send_sig_info(int, struct siginfo *, struct task_struct *);
+extern int force_sig_info(int, struct siginfo *, struct task_struct *);
+extern int kill_pg_info(int, struct siginfo *, pid_t);
+extern int kill_sl_info(int, struct siginfo *, pid_t);
+extern int kill_proc_info(int, struct siginfo *, pid_t);
+extern void notify_parent(struct task_struct *, int);
+extern void do_notify_parent(struct task_struct *, int);
+extern void force_sig(int, struct task_struct *);
+extern int send_sig(int, struct task_struct *, int);
+extern int kill_pg(pid_t, int, int);
+extern int kill_sl(pid_t, int, int);
+extern int kill_proc(pid_t, int, int);
+extern int do_sigaction(int, const struct k_sigaction *, struct k_sigaction *);
+extern int do_sigaltstack(const stack_t *, stack_t *, unsigned long);
+
+static inline int signal_pending(struct task_struct *p)
+{
+	return (p->sigpending != 0);
+}
+
+/*
+ * Re-calculate pending state from the set of locally pending
+ * signals, globally pending signals, and blocked signals.
+ */
+static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
+{
+	unsigned long ready;
+	long i;
+
+	switch (_NSIG_WORDS) {
+	default:
+		for (i = _NSIG_WORDS, ready = 0; --i >= 0 ;)
+			ready |= signal->sig[i] &~ blocked->sig[i];
+		break;
+
+	case 4: ready  = signal->sig[3] &~ blocked->sig[3];
+		ready |= signal->sig[2] &~ blocked->sig[2];
+		ready |= signal->sig[1] &~ blocked->sig[1];
+		ready |= signal->sig[0] &~ blocked->sig[0];
+		break;
+
+	case 2: ready  = signal->sig[1] &~ blocked->sig[1];
+		ready |= signal->sig[0] &~ blocked->sig[0];
+		break;
+
+	case 1: ready  = signal->sig[0] &~ blocked->sig[0];
+	}
+	return ready !=	0;
+}
+
+/* Reevaluate whether the task has signals pending delivery.
+   This is required every time the blocked sigset_t changes.
+   All callers should have t->sigmask_lock.  */
+
+static inline void recalc_sigpending(struct task_struct *t)
+{
+	t->sigpending = has_pending_signals(&t->pending.signal, &t->blocked);
+}
+
+/* True if we are on the alternate signal stack.  */
+
+static inline int on_sig_stack(unsigned long sp)
+{
+	return (sp - current->sas_ss_sp < current->sas_ss_size);
+}
+
+static inline int sas_ss_flags(unsigned long sp)
+{
+	return (current->sas_ss_size == 0 ? SS_DISABLE
+		: on_sig_stack(sp) ? SS_ONSTACK : 0);
+}
+
+extern int request_irq(unsigned int,
+		       void (*handler)(int, void *, struct pt_regs *),
+		       unsigned long, const char *, void *);
+extern void free_irq(unsigned int, void *);
+
+/*
+ * This has now become a routine instead of a macro, it sets a flag if
+ * it returns true (to do BSD-style accounting where the process is flagged
+ * if it uses root privs). The implication of this is that you should do
+ * normal permissions checks first, and check suser() last.
+ *
+ * [Dec 1997 -- Chris Evans]
+ * For correctness, the above considerations need to be extended to
+ * fsuser(). This is done, along with moving fsuser() checks to be
+ * last.
+ *
+ * These will be removed, but in the mean time, when the SECURE_NOROOT 
+ * flag is set, uids don't grant privilege.
+ */
+static inline int suser(void)
+{
+	if (!issecure(SECURE_NOROOT) && current->euid == 0) { 
+		current->flags |= PF_SUPERPRIV;
+		return 1;
+	}
+	return 0;
+}
+
+static inline int fsuser(void)
+{
+	if (!issecure(SECURE_NOROOT) && current->fsuid == 0) {
+		current->flags |= PF_SUPERPRIV;
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * capable() checks for a particular capability.  
+ * New privilege checks should use this interface, rather than suser() or
+ * fsuser(). See include/linux/capability.h for defined capabilities.
+ */
+
+static inline int capable(int cap)
+{
+#if 1 /* ok now */
+	if (cap_raised(current->cap_effective, cap))
+#else
+	if (cap_is_fs_cap(cap) ? current->fsuid == 0 : current->euid == 0)
+#endif
+	{
+		current->flags |= PF_SUPERPRIV;
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * Routines for handling mm_structs
+ */
+extern struct mm_struct * mm_alloc(void);
+
+extern struct mm_struct * start_lazy_tlb(void);
+extern void end_lazy_tlb(struct mm_struct *mm);
+
+/* mmdrop drops the mm and the page tables */
+extern void FASTCALL(__mmdrop(struct mm_struct *));
+static inline void mmdrop(struct mm_struct * mm)
+{
+	if (atomic_dec_and_test(&mm->mm_count))
+		__mmdrop(mm);
+}
+
+/* mmput gets rid of the mappings and all user-space */
+extern void mmput(struct mm_struct *);
+/* Remove the current tasks stale references to the old mm_struct */
+extern void mm_release(void);
+
+/*
+ * Routines for handling the fd arrays
+ */
+extern struct file ** alloc_fd_array(int);
+extern int expand_fd_array(struct files_struct *, int nr);
+extern void free_fd_array(struct file **, int);
+
+extern fd_set *alloc_fdset(int);
+extern int expand_fdset(struct files_struct *, int nr);
+extern void free_fdset(fd_set *, int);
+
+extern int  copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
+extern void flush_thread(void);
+extern void exit_thread(void);
+
+extern void exit_mm(struct task_struct *);
+extern void exit_files(struct task_struct *);
+extern void exit_sighand(struct task_struct *);
+
+extern void reparent_to_init(void);
+extern void daemonize(void);
+
+extern int do_execve(char *, char **, char **, struct pt_regs *);
+extern int do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long);
+
+extern void set_task_comm(struct task_struct *tsk, char *from);
+extern void get_task_comm(char *to, struct task_struct *tsk);
+
+extern void FASTCALL(add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait));
+extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait));
+extern void FASTCALL(remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait));
+
+extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
+
+#define __wait_event(wq, condition) 					\
+do {									\
+	wait_queue_t __wait;						\
+	init_waitqueue_entry(&__wait, current);				\
+									\
+	add_wait_queue(&wq, &__wait);					\
+	for (;;) {							\
+		set_current_state(TASK_UNINTERRUPTIBLE);		\
+		if (condition)						\
+			break;						\
+		schedule();						\
+	}								\
+	current->state = TASK_RUNNING;					\
+	remove_wait_queue(&wq, &__wait);				\
+} while (0)
+
+#define wait_event(wq, condition) 					\
+do {									\
+	if (condition)	 						\
+		break;							\
+	__wait_event(wq, condition);					\
+} while (0)
+
+#define __wait_event_interruptible(wq, condition, ret)			\
+do {									\
+	wait_queue_t __wait;						\
+	init_waitqueue_entry(&__wait, current);				\
+									\
+	add_wait_queue(&wq, &__wait);					\
+	for (;;) {							\
+		set_current_state(TASK_INTERRUPTIBLE);			\
+		if (condition)						\
+			break;						\
+		if (!signal_pending(current)) {				\
+			schedule();					\
+			continue;					\
+		}							\
+		ret = -ERESTARTSYS;					\
+		break;							\
+	}								\
+	current->state = TASK_RUNNING;					\
+	remove_wait_queue(&wq, &__wait);				\
+} while (0)
+	
+#define wait_event_interruptible(wq, condition)				\
+({									\
+	int __ret = 0;							\
+	if (!(condition))						\
+		__wait_event_interruptible(wq, condition, __ret);	\
+	__ret;								\
+})
+
+#define REMOVE_LINKS(p) do { \
+	(p)->next_task->prev_task = (p)->prev_task; \
+	(p)->prev_task->next_task = (p)->next_task; \
+	if ((p)->p_osptr) \
+		(p)->p_osptr->p_ysptr = (p)->p_ysptr; \
+	if ((p)->p_ysptr) \
+		(p)->p_ysptr->p_osptr = (p)->p_osptr; \
+	else \
+		(p)->p_pptr->p_cptr = (p)->p_osptr; \
+	} while (0)
+
+#define SET_LINKS(p) do { \
+	(p)->next_task = &init_task; \
+	(p)->prev_task = init_task.prev_task; \
+	init_task.prev_task->next_task = (p); \
+	init_task.prev_task = (p); \
+	(p)->p_ysptr = NULL; \
+	if (((p)->p_osptr = (p)->p_pptr->p_cptr) != NULL) \
+		(p)->p_osptr->p_ysptr = p; \
+	(p)->p_pptr->p_cptr = p; \
+	} while (0)
+
+#define for_each_task(p) \
+	for (p = &init_task ; (p = p->next_task) != &init_task ; )
+
+#define for_each_thread(task) \
+	for (task = next_thread(current) ; task != current ; task = next_thread(task))
+
+#define next_thread(p) \
+	list_entry((p)->thread_group.next, struct task_struct, thread_group)
+
+#define thread_group_leader(p)	(p->pid == p->tgid)
+
+static inline void del_from_runqueue(struct task_struct * p)
+{
+	nr_running--;
+	p->sleep_time = jiffies;
+	list_del(&p->run_list);
+	p->run_list.next = NULL;
+}
+
+static inline int task_on_runqueue(struct task_struct *p)
+{
+	return (p->run_list.next != NULL);
+}
+
+static inline void unhash_process(struct task_struct *p)
+{
+	if (task_on_runqueue(p))
+		out_of_line_bug();
+	write_lock_irq(&tasklist_lock);
+	nr_threads--;
+	unhash_pid(p);
+	REMOVE_LINKS(p);
+	list_del(&p->thread_group);
+	write_unlock_irq(&tasklist_lock);
+}
+
+/* Protects ->fs, ->files, ->mm, and synchronises with wait4().  Nests inside tasklist_lock */
+static inline void task_lock(struct task_struct *p)
+{
+	spin_lock(&p->alloc_lock);
+}
+
+static inline void task_unlock(struct task_struct *p)
+{
+	spin_unlock(&p->alloc_lock);
+}
+
+/* write full pathname into buffer and return start of pathname */
+static inline char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
+				char *buf, int buflen)
+{
+	char *res;
+	struct vfsmount *rootmnt;
+	struct dentry *root;
+	read_lock(&current->fs->lock);
+	rootmnt = mntget(current->fs->rootmnt);
+	root = dget(current->fs->root);
+	read_unlock(&current->fs->lock);
+	spin_lock(&dcache_lock);
+	res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen);
+	spin_unlock(&dcache_lock);
+	dput(root);
+	mntput(rootmnt);
+	return res;
+}
+
+static inline int need_resched(void)
+{
+	return (unlikely(current->need_resched));
+}
+
+extern void __cond_resched(void);
+static inline void cond_resched(void)
+{
+	if (need_resched())
+		__cond_resched();
+}
+
+#endif /* __KERNEL__ */
+#endif
diff --git a/linux-2.4-xen-sparse/include/linux/skbuff.h b/linux-2.4-xen-sparse/include/linux/skbuff.h
new file mode 100644
index 0000000000..c9a0e3b5e1
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/linux/skbuff.h
@@ -0,0 +1,1181 @@
+/*
+ *	Definitions for the 'struct sk_buff' memory handlers.
+ *
+ *	Authors:
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *		Florian La Roche, <rzsfl@rz.uni-sb.de>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+ 
+#ifndef _LINUX_SKBUFF_H
+#define _LINUX_SKBUFF_H
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/cache.h>
+
+#include <asm/atomic.h>
+#include <asm/types.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+
+#define HAVE_ALLOC_SKB		/* For the drivers to know */
+#define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
+#define SLAB_SKB 		/* Slabified skbuffs 	   */
+
+#define CHECKSUM_NONE 0
+#define CHECKSUM_HW 1
+#define CHECKSUM_UNNECESSARY 2
+
+#define SKB_DATA_ALIGN(X)	(((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1))
+#define SKB_MAX_ORDER(X,ORDER)	(((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1))
+#define SKB_MAX_HEAD(X)		(SKB_MAX_ORDER((X),0))
+#define SKB_MAX_ALLOC		(SKB_MAX_ORDER(0,2))
+
+/* A. Checksumming of received packets by device.
+ *
+ *	NONE: device failed to checksum this packet.
+ *		skb->csum is undefined.
+ *
+ *	UNNECESSARY: device parsed packet and wouldbe verified checksum.
+ *		skb->csum is undefined.
+ *	      It is bad option, but, unfortunately, many of vendors do this.
+ *	      Apparently with secret goal to sell you new device, when you
+ *	      will add new protocol to your host. F.e. IPv6. 8)
+ *
+ *	HW: the most generic way. Device supplied checksum of _all_
+ *	    the packet as seen by netif_rx in skb->csum.
+ *	    NOTE: Even if device supports only some protocols, but
+ *	    is able to produce some skb->csum, it MUST use HW,
+ *	    not UNNECESSARY.
+ *
+ * B. Checksumming on output.
+ *
+ *	NONE: skb is checksummed by protocol or csum is not required.
+ *
+ *	HW: device is required to csum packet as seen by hard_start_xmit
+ *	from skb->h.raw to the end and to record the checksum
+ *	at skb->h.raw+skb->csum.
+ *
+ *	Device must show its capabilities in dev->features, set
+ *	at device setup time.
+ *	NETIF_F_HW_CSUM	- it is clever device, it is able to checksum
+ *			  everything.
+ *	NETIF_F_NO_CSUM - loopback or reliable single hop media.
+ *	NETIF_F_IP_CSUM - device is dumb. It is able to csum only
+ *			  TCP/UDP over IPv4. Sigh. Vendors like this
+ *			  way by an unknown reason. Though, see comment above
+ *			  about CHECKSUM_UNNECESSARY. 8)
+ *
+ *	Any questions? No questions, good. 		--ANK
+ */
+
+#ifdef __i386__
+#define NET_CALLER(arg) (*(((void**)&arg)-1))
+#else
+#define NET_CALLER(arg) __builtin_return_address(0)
+#endif
+
+#ifdef CONFIG_NETFILTER
+struct nf_conntrack {
+	atomic_t use;
+	void (*destroy)(struct nf_conntrack *);
+};
+
+struct nf_ct_info {
+	struct nf_conntrack *master;
+};
+#endif
+
+struct sk_buff_head {
+	/* These two members must be first. */
+	struct sk_buff	* next;
+	struct sk_buff	* prev;
+
+	__u32		qlen;
+	spinlock_t	lock;
+};
+
+struct sk_buff;
+
+#define MAX_SKB_FRAGS 6
+
+typedef struct skb_frag_struct skb_frag_t;
+
+struct skb_frag_struct
+{
+	struct page *page;
+	__u16 page_offset;
+	__u16 size;
+};
+
+/* This data is invariant across clones and lives at
+ * the end of the header data, ie. at skb->end.
+ */
+struct skb_shared_info {
+	atomic_t	dataref;
+	unsigned int	nr_frags;
+	struct sk_buff	*frag_list;
+	skb_frag_t	frags[MAX_SKB_FRAGS];
+};
+
+struct sk_buff {
+	/* These two members must be first. */
+	struct sk_buff	* next;			/* Next buffer in list 				*/
+	struct sk_buff	* prev;			/* Previous buffer in list 			*/
+
+	struct sk_buff_head * list;		/* List we are on				*/
+	struct sock	*sk;			/* Socket we are owned by 			*/
+	struct timeval	stamp;			/* Time we arrived				*/
+	struct net_device	*dev;		/* Device we arrived on/are leaving by		*/
+	struct net_device	*real_dev;	/* For support of point to point protocols 
+						   (e.g. 802.3ad) over bonding, we must save the
+						   physical device that got the packet before
+						   replacing skb->dev with the virtual device.  */
+
+	/* Transport layer header */
+	union
+	{
+		struct tcphdr	*th;
+		struct udphdr	*uh;
+		struct icmphdr	*icmph;
+		struct igmphdr	*igmph;
+		struct iphdr	*ipiph;
+		struct spxhdr	*spxh;
+		unsigned char	*raw;
+	} h;
+
+	/* Network layer header */
+	union
+	{
+		struct iphdr	*iph;
+		struct ipv6hdr	*ipv6h;
+		struct arphdr	*arph;
+		struct ipxhdr	*ipxh;
+		unsigned char	*raw;
+	} nh;
+  
+	/* Link layer header */
+	union 
+	{	
+	  	struct ethhdr	*ethernet;
+	  	unsigned char 	*raw;
+	} mac;
+
+	struct  dst_entry *dst;
+
+	/* 
+	 * This is the control buffer. It is free to use for every
+	 * layer. Please put your private variables there. If you
+	 * want to keep them across layers you have to do a skb_clone()
+	 * first. This is owned by whoever has the skb queued ATM.
+	 */ 
+	char		cb[48];	 
+
+	unsigned int 	len;			/* Length of actual data			*/
+ 	unsigned int 	data_len;
+	unsigned int	csum;			/* Checksum 					*/
+	unsigned char 	__unused,		/* Dead field, may be reused			*/
+			cloned, 		/* head may be cloned (check refcnt to be sure). */
+  			pkt_type,		/* Packet class					*/
+  			ip_summed;		/* Driver fed us an IP checksum			*/
+	__u32		priority;		/* Packet queueing priority			*/
+	atomic_t	users;			/* User count - see datagram.c,tcp.c 		*/
+	unsigned short	protocol;		/* Packet protocol from driver. 		*/
+	unsigned short	security;		/* Security level of packet			*/
+	unsigned int	truesize;		/* Buffer size 					*/
+
+	unsigned char	*head;			/* Head of buffer 				*/
+	unsigned char	*data;			/* Data head pointer				*/
+	unsigned char	*tail;			/* Tail pointer					*/
+	unsigned char 	*end;			/* End pointer					*/
+
+	void 		(*destructor)(struct sk_buff *);	/* Destruct function		*/
+#ifdef CONFIG_NETFILTER
+	/* Can be used for communication between hooks. */
+        unsigned long	nfmark;
+	/* Cache info */
+	__u32		nfcache;
+	/* Associated connection, if any */
+	struct nf_ct_info *nfct;
+#ifdef CONFIG_NETFILTER_DEBUG
+        unsigned int nf_debug;
+#endif
+#endif /*CONFIG_NETFILTER*/
+
+#if defined(CONFIG_HIPPI)
+	union{
+		__u32	ifield;
+	} private;
+#endif
+
+#ifdef CONFIG_NET_SCHED
+       __u32           tc_index;               /* traffic control index */
+#endif
+};
+
+#ifdef __KERNEL__
+/*
+ *	Handling routines are only of interest to the kernel
+ */
+#include <linux/slab.h>
+
+#include <asm/system.h>
+
+extern void			__kfree_skb(struct sk_buff *skb);
+extern struct sk_buff *		alloc_skb(unsigned int size, int priority);
+extern struct sk_buff *		alloc_skb_from_cache(kmem_cache_t *cp, unsigned int size, int priority);
+extern void			kfree_skbmem(struct sk_buff *skb);
+extern struct sk_buff *		skb_clone(struct sk_buff *skb, int priority);
+extern struct sk_buff *		skb_copy(const struct sk_buff *skb, int priority);
+extern struct sk_buff *		pskb_copy(struct sk_buff *skb, int gfp_mask);
+extern int			pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask);
+extern struct sk_buff *		skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom);
+extern struct sk_buff *		skb_copy_expand(const struct sk_buff *skb, 
+						int newheadroom,
+						int newtailroom,
+						int priority);
+extern struct sk_buff *		skb_pad(struct sk_buff *skb, int pad);
+#define dev_kfree_skb(a)	kfree_skb(a)
+extern void	skb_over_panic(struct sk_buff *skb, int len, void *here);
+extern void	skb_under_panic(struct sk_buff *skb, int len, void *here);
+
+/* Internal */
+#define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
+
+/**
+ *	skb_queue_empty - check if a queue is empty
+ *	@list: queue head
+ *
+ *	Returns true if the queue is empty, false otherwise.
+ */
+ 
+static inline int skb_queue_empty(struct sk_buff_head *list)
+{
+	return (list->next == (struct sk_buff *) list);
+}
+
+/**
+ *	skb_get - reference buffer
+ *	@skb: buffer to reference
+ *
+ *	Makes another reference to a socket buffer and returns a pointer
+ *	to the buffer.
+ */
+ 
+static inline struct sk_buff *skb_get(struct sk_buff *skb)
+{
+	atomic_inc(&skb->users);
+	return skb;
+}
+
+/*
+ * If users==1, we are the only owner and are can avoid redundant
+ * atomic change.
+ */
+ 
+/**
+ *	kfree_skb - free an sk_buff
+ *	@skb: buffer to free
+ *
+ *	Drop a reference to the buffer and free it if the usage count has
+ *	hit zero.
+ */
+ 
+static inline void kfree_skb(struct sk_buff *skb)
+{
+	if (likely(atomic_read(&skb->users) == 1))
+		smp_rmb();
+	else if (likely(!atomic_dec_and_test(&skb->users)))
+		return;
+	__kfree_skb(skb);
+}
+
+/**
+ *	skb_cloned - is the buffer a clone
+ *	@skb: buffer to check
+ *
+ *	Returns true if the buffer was generated with skb_clone() and is
+ *	one of multiple shared copies of the buffer. Cloned buffers are
+ *	shared data so must not be written to under normal circumstances.
+ */
+
+static inline int skb_cloned(struct sk_buff *skb)
+{
+	return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1;
+}
+
+/**
+ *	skb_shared - is the buffer shared
+ *	@skb: buffer to check
+ *
+ *	Returns true if more than one person has a reference to this
+ *	buffer.
+ */
+ 
+static inline int skb_shared(struct sk_buff *skb)
+{
+	return (atomic_read(&skb->users) != 1);
+}
+
+/** 
+ *	skb_share_check - check if buffer is shared and if so clone it
+ *	@skb: buffer to check
+ *	@pri: priority for memory allocation
+ *	
+ *	If the buffer is shared the buffer is cloned and the old copy
+ *	drops a reference. A new clone with a single reference is returned.
+ *	If the buffer is not shared the original buffer is returned. When
+ *	being called from interrupt status or with spinlocks held pri must
+ *	be GFP_ATOMIC.
+ *
+ *	NULL is returned on a memory allocation failure.
+ */
+ 
+static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri)
+{
+	if (skb_shared(skb)) {
+		struct sk_buff *nskb;
+		nskb = skb_clone(skb, pri);
+		kfree_skb(skb);
+		return nskb;
+	}
+	return skb;
+}
+
+
+/*
+ *	Copy shared buffers into a new sk_buff. We effectively do COW on
+ *	packets to handle cases where we have a local reader and forward
+ *	and a couple of other messy ones. The normal one is tcpdumping
+ *	a packet thats being forwarded.
+ */
+ 
+/**
+ *	skb_unshare - make a copy of a shared buffer
+ *	@skb: buffer to check
+ *	@pri: priority for memory allocation
+ *
+ *	If the socket buffer is a clone then this function creates a new
+ *	copy of the data, drops a reference count on the old copy and returns
+ *	the new copy with the reference count at 1. If the buffer is not a clone
+ *	the original buffer is returned. When called with a spinlock held or
+ *	from interrupt state @pri must be %GFP_ATOMIC
+ *
+ *	%NULL is returned on a memory allocation failure.
+ */
+ 
+static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri)
+{
+	struct sk_buff *nskb;
+	if(!skb_cloned(skb))
+		return skb;
+	nskb=skb_copy(skb, pri);
+	kfree_skb(skb);		/* Free our shared copy */
+	return nskb;
+}
+
+/**
+ *	skb_peek
+ *	@list_: list to peek at
+ *
+ *	Peek an &sk_buff. Unlike most other operations you _MUST_
+ *	be careful with this one. A peek leaves the buffer on the
+ *	list and someone else may run off with it. You must hold
+ *	the appropriate locks or have a private queue to do this.
+ *
+ *	Returns %NULL for an empty list or a pointer to the head element.
+ *	The reference count is not incremented and the reference is therefore
+ *	volatile. Use with caution.
+ */
+ 
+static inline struct sk_buff *skb_peek(struct sk_buff_head *list_)
+{
+	struct sk_buff *list = ((struct sk_buff *)list_)->next;
+	if (list == (struct sk_buff *)list_)
+		list = NULL;
+	return list;
+}
+
+/**
+ *	skb_peek_tail
+ *	@list_: list to peek at
+ *
+ *	Peek an &sk_buff. Unlike most other operations you _MUST_
+ *	be careful with this one. A peek leaves the buffer on the
+ *	list and someone else may run off with it. You must hold
+ *	the appropriate locks or have a private queue to do this.
+ *
+ *	Returns %NULL for an empty list or a pointer to the tail element.
+ *	The reference count is not incremented and the reference is therefore
+ *	volatile. Use with caution.
+ */
+
+static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_)
+{
+	struct sk_buff *list = ((struct sk_buff *)list_)->prev;
+	if (list == (struct sk_buff *)list_)
+		list = NULL;
+	return list;
+}
+
+/**
+ *	skb_queue_len	- get queue length
+ *	@list_: list to measure
+ *
+ *	Return the length of an &sk_buff queue. 
+ */
+ 
+static inline __u32 skb_queue_len(struct sk_buff_head *list_)
+{
+	return(list_->qlen);
+}
+
+static inline void skb_queue_head_init(struct sk_buff_head *list)
+{
+	spin_lock_init(&list->lock);
+	list->prev = (struct sk_buff *)list;
+	list->next = (struct sk_buff *)list;
+	list->qlen = 0;
+}
+
+/*
+ *	Insert an sk_buff at the start of a list.
+ *
+ *	The "__skb_xxxx()" functions are the non-atomic ones that
+ *	can only be called with interrupts disabled.
+ */
+
+/**
+ *	__skb_queue_head - queue a buffer at the list head
+ *	@list: list to use
+ *	@newsk: buffer to queue
+ *
+ *	Queue a buffer at the start of a list. This function takes no locks
+ *	and you must therefore hold required locks before calling it.
+ *
+ *	A buffer cannot be placed on two lists at the same time.
+ */	
+ 
+static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+	struct sk_buff *prev, *next;
+
+	newsk->list = list;
+	list->qlen++;
+	prev = (struct sk_buff *)list;
+	next = prev->next;
+	newsk->next = next;
+	newsk->prev = prev;
+	next->prev = newsk;
+	prev->next = newsk;
+}
+
+
+/**
+ *	skb_queue_head - queue a buffer at the list head
+ *	@list: list to use
+ *	@newsk: buffer to queue
+ *
+ *	Queue a buffer at the start of the list. This function takes the
+ *	list lock and can be used safely with other locking &sk_buff functions
+ *	safely.
+ *
+ *	A buffer cannot be placed on two lists at the same time.
+ */	
+
+static inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&list->lock, flags);
+	__skb_queue_head(list, newsk);
+	spin_unlock_irqrestore(&list->lock, flags);
+}
+
+/**
+ *	__skb_queue_tail - queue a buffer at the list tail
+ *	@list: list to use
+ *	@newsk: buffer to queue
+ *
+ *	Queue a buffer at the end of a list. This function takes no locks
+ *	and you must therefore hold required locks before calling it.
+ *
+ *	A buffer cannot be placed on two lists at the same time.
+ */	
+ 
+
+static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+	struct sk_buff *prev, *next;
+
+	newsk->list = list;
+	list->qlen++;
+	next = (struct sk_buff *)list;
+	prev = next->prev;
+	newsk->next = next;
+	newsk->prev = prev;
+	next->prev = newsk;
+	prev->next = newsk;
+}
+
+/**
+ *	skb_queue_tail - queue a buffer at the list tail
+ *	@list: list to use
+ *	@newsk: buffer to queue
+ *
+ *	Queue a buffer at the tail of the list. This function takes the
+ *	list lock and can be used safely with other locking &sk_buff functions
+ *	safely.
+ *
+ *	A buffer cannot be placed on two lists at the same time.
+ */	
+
+static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&list->lock, flags);
+	__skb_queue_tail(list, newsk);
+	spin_unlock_irqrestore(&list->lock, flags);
+}
+
+/**
+ *	__skb_dequeue - remove from the head of the queue
+ *	@list: list to dequeue from
+ *
+ *	Remove the head of the list. This function does not take any locks
+ *	so must be used with appropriate locks held only. The head item is
+ *	returned or %NULL if the list is empty.
+ */
+
+static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
+{
+	struct sk_buff *next, *prev, *result;
+
+	prev = (struct sk_buff *) list;
+	next = prev->next;
+	result = NULL;
+	if (next != prev) {
+		result = next;
+		next = next->next;
+		list->qlen--;
+		next->prev = prev;
+		prev->next = next;
+		result->next = NULL;
+		result->prev = NULL;
+		result->list = NULL;
+	}
+	return result;
+}
+
+/**
+ *	skb_dequeue - remove from the head of the queue
+ *	@list: list to dequeue from
+ *
+ *	Remove the head of the list. The list lock is taken so the function
+ *	may be used safely with other locking list functions. The head item is
+ *	returned or %NULL if the list is empty.
+ */
+
+static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list)
+{
+	unsigned long flags;
+	struct sk_buff *result;
+
+	spin_lock_irqsave(&list->lock, flags);
+	result = __skb_dequeue(list);
+	spin_unlock_irqrestore(&list->lock, flags);
+	return result;
+}
+
+/*
+ *	Insert a packet on a list.
+ */
+
+static inline void __skb_insert(struct sk_buff *newsk,
+	struct sk_buff * prev, struct sk_buff *next,
+	struct sk_buff_head * list)
+{
+	newsk->next = next;
+	newsk->prev = prev;
+	next->prev = newsk;
+	prev->next = newsk;
+	newsk->list = list;
+	list->qlen++;
+}
+
+/**
+ *	skb_insert	-	insert a buffer
+ *	@old: buffer to insert before
+ *	@newsk: buffer to insert
+ *
+ *	Place a packet before a given packet in a list. The list locks are taken
+ *	and this function is atomic with respect to other list locked calls
+ *	A buffer cannot be placed on two lists at the same time.
+ */
+
+static inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&old->list->lock, flags);
+	__skb_insert(newsk, old->prev, old, old->list);
+	spin_unlock_irqrestore(&old->list->lock, flags);
+}
+
+/*
+ *	Place a packet after a given packet in a list.
+ */
+
+static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk)
+{
+	__skb_insert(newsk, old, old->next, old->list);
+}
+
+/**
+ *	skb_append	-	append a buffer
+ *	@old: buffer to insert after
+ *	@newsk: buffer to insert
+ *
+ *	Place a packet after a given packet in a list. The list locks are taken
+ *	and this function is atomic with respect to other list locked calls.
+ *	A buffer cannot be placed on two lists at the same time.
+ */
+
+
+static inline void skb_append(struct sk_buff *old, struct sk_buff *newsk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&old->list->lock, flags);
+	__skb_append(old, newsk);
+	spin_unlock_irqrestore(&old->list->lock, flags);
+}
+
+/*
+ * remove sk_buff from list. _Must_ be called atomically, and with
+ * the list known..
+ */
+ 
+static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
+{
+	struct sk_buff * next, * prev;
+
+	list->qlen--;
+	next = skb->next;
+	prev = skb->prev;
+	skb->next = NULL;
+	skb->prev = NULL;
+	skb->list = NULL;
+	next->prev = prev;
+	prev->next = next;
+}
+
+/**
+ *	skb_unlink	-	remove a buffer from a list
+ *	@skb: buffer to remove
+ *
+ *	Place a packet after a given packet in a list. The list locks are taken
+ *	and this function is atomic with respect to other list locked calls
+ *	
+ *	Works even without knowing the list it is sitting on, which can be 
+ *	handy at times. It also means that THE LIST MUST EXIST when you 
+ *	unlink. Thus a list must have its contents unlinked before it is
+ *	destroyed.
+ */
+
+static inline void skb_unlink(struct sk_buff *skb)
+{
+	struct sk_buff_head *list = skb->list;
+
+	if(list) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&list->lock, flags);
+		if(skb->list == list)
+			__skb_unlink(skb, skb->list);
+		spin_unlock_irqrestore(&list->lock, flags);
+	}
+}
+
+/* XXX: more streamlined implementation */
+
+/**
+ *	__skb_dequeue_tail - remove from the tail of the queue
+ *	@list: list to dequeue from
+ *
+ *	Remove the tail of the list. This function does not take any locks
+ *	so must be used with appropriate locks held only. The tail item is
+ *	returned or %NULL if the list is empty.
+ */
+
+static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
+{
+	struct sk_buff *skb = skb_peek_tail(list); 
+	if (skb)
+		__skb_unlink(skb, list);
+	return skb;
+}
+
+/**
+ *	skb_dequeue - remove from the head of the queue
+ *	@list: list to dequeue from
+ *
+ *	Remove the head of the list. The list lock is taken so the function
+ *	may be used safely with other locking list functions. The tail item is
+ *	returned or %NULL if the list is empty.
+ */
+
+static inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
+{
+	unsigned long flags;
+	struct sk_buff *result;
+
+	spin_lock_irqsave(&list->lock, flags);
+	result = __skb_dequeue_tail(list);
+	spin_unlock_irqrestore(&list->lock, flags);
+	return result;
+}
+
+static inline int skb_is_nonlinear(const struct sk_buff *skb)
+{
+	return skb->data_len;
+}
+
+static inline unsigned int skb_headlen(const struct sk_buff *skb)
+{
+	return skb->len - skb->data_len;
+}
+
+#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) out_of_line_bug(); } while (0)
+#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) out_of_line_bug(); } while (0)
+#define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) out_of_line_bug(); } while (0)
+
+/*
+ *	Add data to an sk_buff
+ */
+ 
+static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
+{
+	unsigned char *tmp=skb->tail;
+	SKB_LINEAR_ASSERT(skb);
+	skb->tail+=len;
+	skb->len+=len;
+	return tmp;
+}
+
+/**
+ *	skb_put - add data to a buffer
+ *	@skb: buffer to use 
+ *	@len: amount of data to add
+ *
+ *	This function extends the used data area of the buffer. If this would
+ *	exceed the total buffer size the kernel will panic. A pointer to the
+ *	first byte of the extra data is returned.
+ */
+ 
+static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
+{
+	unsigned char *tmp=skb->tail;
+	SKB_LINEAR_ASSERT(skb);
+	skb->tail+=len;
+	skb->len+=len;
+	if(skb->tail>skb->end) {
+		skb_over_panic(skb, len, current_text_addr());
+	}
+	return tmp;
+}
+
+static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
+{
+	skb->data-=len;
+	skb->len+=len;
+	return skb->data;
+}
+
+/**
+ *	skb_push - add data to the start of a buffer
+ *	@skb: buffer to use 
+ *	@len: amount of data to add
+ *
+ *	This function extends the used data area of the buffer at the buffer
+ *	start. If this would exceed the total buffer headroom the kernel will
+ *	panic. A pointer to the first byte of the extra data is returned.
+ */
+
+static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
+{
+	skb->data-=len;
+	skb->len+=len;
+	if(skb->data<skb->head) {
+		skb_under_panic(skb, len, current_text_addr());
+	}
+	return skb->data;
+}
+
+static inline char *__skb_pull(struct sk_buff *skb, unsigned int len)
+{
+	skb->len-=len;
+	if (skb->len < skb->data_len)
+		out_of_line_bug();
+	return 	skb->data+=len;
+}
+
+/**
+ *	skb_pull - remove data from the start of a buffer
+ *	@skb: buffer to use 
+ *	@len: amount of data to remove
+ *
+ *	This function removes data from the start of a buffer, returning
+ *	the memory to the headroom. A pointer to the next data in the buffer
+ *	is returned. Once the data has been pulled future pushes will overwrite
+ *	the old data.
+ */
+
+static inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len)
+{	
+	if (len > skb->len)
+		return NULL;
+	return __skb_pull(skb,len);
+}
+
+extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta);
+
+static inline char *__pskb_pull(struct sk_buff *skb, unsigned int len)
+{
+	if (len > skb_headlen(skb) &&
+	    __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL)
+		return NULL;
+	skb->len -= len;
+	return 	skb->data += len;
+}
+
+static inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len)
+{	
+	if (len > skb->len)
+		return NULL;
+	return __pskb_pull(skb,len);
+}
+
+static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
+{
+	if (len <= skb_headlen(skb))
+		return 1;
+	if (len > skb->len)
+		return 0;
+	return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL);
+}
+
+/**
+ *	skb_headroom - bytes at buffer head
+ *	@skb: buffer to check
+ *
+ *	Return the number of bytes of free space at the head of an &sk_buff.
+ */
+ 
+static inline int skb_headroom(const struct sk_buff *skb)
+{
+	return skb->data-skb->head;
+}
+
+/**
+ *	skb_tailroom - bytes at buffer end
+ *	@skb: buffer to check
+ *
+ *	Return the number of bytes of free space at the tail of an sk_buff
+ */
+
+static inline int skb_tailroom(const struct sk_buff *skb)
+{
+	return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail;
+}
+
+/**
+ *	skb_reserve - adjust headroom
+ *	@skb: buffer to alter
+ *	@len: bytes to move
+ *
+ *	Increase the headroom of an empty &sk_buff by reducing the tail
+ *	room. This is only allowed for an empty buffer.
+ */
+
+static inline void skb_reserve(struct sk_buff *skb, unsigned int len)
+{
+	skb->data+=len;
+	skb->tail+=len;
+}
+
+extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
+
+static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
+{
+	if (!skb->data_len) {
+		skb->len = len;
+		skb->tail = skb->data+len;
+	} else {
+		___pskb_trim(skb, len, 0);
+	}
+}
+
+/**
+ *	skb_trim - remove end from a buffer
+ *	@skb: buffer to alter
+ *	@len: new length
+ *
+ *	Cut the length of a buffer down by removing data from the tail. If
+ *	the buffer is already under the length specified it is not modified.
+ */
+
+static inline void skb_trim(struct sk_buff *skb, unsigned int len)
+{
+	if (skb->len > len) {
+		__skb_trim(skb, len);
+	}
+}
+
+
+static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
+{
+	if (!skb->data_len) {
+		skb->len = len;
+		skb->tail = skb->data+len;
+		return 0;
+	} else {
+		return ___pskb_trim(skb, len, 1);
+	}
+}
+
+static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
+{
+	if (len < skb->len)
+		return __pskb_trim(skb, len);
+	return 0;
+}
+
+/**
+ *	skb_orphan - orphan a buffer
+ *	@skb: buffer to orphan
+ *
+ *	If a buffer currently has an owner then we call the owner's
+ *	destructor function and make the @skb unowned. The buffer continues
+ *	to exist but is no longer charged to its former owner.
+ */
+
+
+static inline void skb_orphan(struct sk_buff *skb)
+{
+	if (skb->destructor)
+		skb->destructor(skb);
+	skb->destructor = NULL;
+	skb->sk = NULL;
+}
+
+/**
+ *	skb_purge - empty a list
+ *	@list: list to empty
+ *
+ *	Delete all buffers on an &sk_buff list. Each buffer is removed from
+ *	the list and one reference dropped. This function takes the list
+ *	lock and is atomic with respect to other list locking functions.
+ */
+
+
+static inline void skb_queue_purge(struct sk_buff_head *list)
+{
+	struct sk_buff *skb;
+	while ((skb=skb_dequeue(list))!=NULL)
+		kfree_skb(skb);
+}
+
+/**
+ *	__skb_purge - empty a list
+ *	@list: list to empty
+ *
+ *	Delete all buffers on an &sk_buff list. Each buffer is removed from
+ *	the list and one reference dropped. This function does not take the
+ *	list lock and the caller must hold the relevant locks to use it.
+ */
+
+
+static inline void __skb_queue_purge(struct sk_buff_head *list)
+{
+	struct sk_buff *skb;
+	while ((skb=__skb_dequeue(list))!=NULL)
+		kfree_skb(skb);
+}
+
+/**
+ *	__dev_alloc_skb - allocate an skbuff for sending
+ *	@length: length to allocate
+ *	@gfp_mask: get_free_pages mask, passed to alloc_skb
+ *
+ *	Allocate a new &sk_buff and assign it a usage count of one. The
+ *	buffer has unspecified headroom built in. Users should allocate
+ *	the headroom they think they need without accounting for the
+ *	built in space. The built in space is used for optimisations.
+ *
+ *	%NULL is returned in there is no free memory.
+ */
+#ifndef CONFIG_XEN 
+static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
+					      int gfp_mask)
+{
+	struct sk_buff *skb = alloc_skb(length+16, gfp_mask);
+	if (skb)
+		skb_reserve(skb,16);
+	return skb;
+}
+#else
+extern struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask);
+#endif
+
+/**
+ *	dev_alloc_skb - allocate an skbuff for sending
+ *	@length: length to allocate
+ *
+ *	Allocate a new &sk_buff and assign it a usage count of one. The
+ *	buffer has unspecified headroom built in. Users should allocate
+ *	the headroom they think they need without accounting for the
+ *	built in space. The built in space is used for optimisations.
+ *
+ *	%NULL is returned in there is no free memory. Although this function
+ *	allocates memory it can be called from an interrupt.
+ */
+ 
+static inline struct sk_buff *dev_alloc_skb(unsigned int length)
+{
+	return __dev_alloc_skb(length, GFP_ATOMIC);
+}
+
+/**
+ *	skb_cow - copy header of skb when it is required
+ *	@skb: buffer to cow
+ *	@headroom: needed headroom
+ *
+ *	If the skb passed lacks sufficient headroom or its data part
+ *	is shared, data is reallocated. If reallocation fails, an error
+ *	is returned and original skb is not changed.
+ *
+ *	The result is skb with writable area skb->head...skb->tail
+ *	and at least @headroom of space at head.
+ */
+
+static inline int
+skb_cow(struct sk_buff *skb, unsigned int headroom)
+{
+	int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
+
+	if (delta < 0)
+		delta = 0;
+
+	if (delta || skb_cloned(skb))
+		return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC);
+	return 0;
+}
+
+/**
+ *	skb_padto	- pad an skbuff up to a minimal size
+ *	@skb: buffer to pad
+ *	@len: minimal length
+ *
+ *	Pads up a buffer to ensure the trailing bytes exist and are
+ *	blanked. If the buffer already contains sufficient data it
+ *	is untouched. Returns the buffer, which may be a replacement
+ *	for the original, or NULL for out of memory - in which case
+ *	the original buffer is still freed.
+ */
+ 
+static inline struct sk_buff *skb_padto(struct sk_buff *skb, unsigned int len)
+{
+	unsigned int size = skb->len;
+	if(likely(size >= len))
+		return skb;
+	return skb_pad(skb, len-size);
+}
+
+/**
+ *	skb_linearize - convert paged skb to linear one
+ *	@skb: buffer to linarize
+ *	@gfp: allocation mode
+ *
+ *	If there is no free memory -ENOMEM is returned, otherwise zero
+ *	is returned and the old skb data released.  */
+int skb_linearize(struct sk_buff *skb, int gfp);
+
+static inline void *kmap_skb_frag(const skb_frag_t *frag)
+{
+#ifdef CONFIG_HIGHMEM
+	if (in_irq())
+		out_of_line_bug();
+
+	local_bh_disable();
+#endif
+	return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
+}
+
+static inline void kunmap_skb_frag(void *vaddr)
+{
+	kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
+#ifdef CONFIG_HIGHMEM
+	local_bh_enable();
+#endif
+}
+
+#define skb_queue_walk(queue, skb) \
+		for (skb = (queue)->next;			\
+		     (skb != (struct sk_buff *)(queue));	\
+		     skb=skb->next)
+
+
+extern struct sk_buff *		skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err);
+extern unsigned int		datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait);
+extern int			skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size);
+extern int			skb_copy_datagram_iovec(const struct sk_buff *from, int offset, struct iovec *to,int size);
+extern int			skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump);
+extern int			skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, int hlen, struct iovec *iov);
+extern void			skb_free_datagram(struct sock * sk, struct sk_buff *skb);
+
+extern unsigned int		skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum);
+extern int			skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
+extern unsigned int		skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum);
+extern void			skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
+
+extern void skb_init(void);
+extern void skb_add_mtu(int mtu);
+
+#ifdef CONFIG_NETFILTER
+static inline void
+nf_conntrack_put(struct nf_ct_info *nfct)
+{
+	if (nfct && atomic_dec_and_test(&nfct->master->use))
+		nfct->master->destroy(nfct->master);
+}
+static inline void
+nf_conntrack_get(struct nf_ct_info *nfct)
+{
+	if (nfct)
+		atomic_inc(&nfct->master->use);
+}
+static inline void
+nf_reset(struct sk_buff *skb)
+{
+	nf_conntrack_put(skb->nfct);
+	skb->nfct = NULL;
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug = 0;
+#endif
+}
+#else /* CONFIG_NETFILTER */
+static inline void nf_reset(struct sk_buff *skb) {}
+#endif /* CONFIG_NETFILTER */
+
+#endif	/* __KERNEL__ */
+#endif	/* _LINUX_SKBUFF_H */
diff --git a/linux-2.4-xen-sparse/include/linux/timer.h b/linux-2.4-xen-sparse/include/linux/timer.h
new file mode 100644
index 0000000000..238083218f
--- /dev/null
+++ b/linux-2.4-xen-sparse/include/linux/timer.h
@@ -0,0 +1,77 @@
+#ifndef _LINUX_TIMER_H
+#define _LINUX_TIMER_H
+
+#include <linux/config.h>
+#include <linux/list.h>
+
+/*
+ * In Linux 2.4, static timers have been removed from the kernel.
+ * Timers may be dynamically created and destroyed, and should be initialized
+ * by a call to init_timer() upon creation.
+ *
+ * The "data" field enables use of a common timeout function for several
+ * timeouts. You can use this field to distinguish between the different
+ * invocations.
+ */
+struct timer_list {
+	struct list_head list;
+	unsigned long expires;
+	unsigned long data;
+	void (*function)(unsigned long);
+};
+
+extern void add_timer(struct timer_list * timer);
+extern int del_timer(struct timer_list * timer);
+#ifdef CONFIG_NO_IDLE_HZ
+extern struct timer_list *next_timer_event(void);
+#endif
+
+#ifdef CONFIG_SMP
+extern int del_timer_sync(struct timer_list * timer);
+extern void sync_timers(void);
+#else
+#define del_timer_sync(t)	del_timer(t)
+#define sync_timers()		do { } while (0)
+#endif
+
+/*
+ * mod_timer is a more efficient way to update the expire field of an
+ * active timer (if the timer is inactive it will be activated)
+ * mod_timer(a,b) is equivalent to del_timer(a); a->expires = b; add_timer(a).
+ * If the timer is known to be not pending (ie, in the handler), mod_timer
+ * is less efficient than a->expires = b; add_timer(a).
+ */
+int mod_timer(struct timer_list *timer, unsigned long expires);
+
+extern void it_real_fn(unsigned long);
+
+static inline void init_timer(struct timer_list * timer)
+{
+	timer->list.next = timer->list.prev = NULL;
+}
+
+static inline int timer_pending (const struct timer_list * timer)
+{
+	return timer->list.next != NULL;
+}
+
+/*
+ *	These inlines deal with timer wrapping correctly. You are 
+ *	strongly encouraged to use them
+ *	1. Because people otherwise forget
+ *	2. Because if the timer wrap changes in future you wont have to
+ *	   alter your driver code.
+ *
+ * time_after(a,b) returns true if the time a is after time b.
+ *
+ * Do this with "<0" and ">=0" to only test the sign of the result. A
+ * good compiler would generate better code (and a really good compiler
+ * wouldn't care). Gcc is currently neither.
+ */
+#define time_after(a,b)		((long)(b) - (long)(a) < 0)
+#define time_before(a,b)	time_after(b,a)
+
+#define time_after_eq(a,b)	((long)(a) - (long)(b) >= 0)
+#define time_before_eq(a,b)	time_after_eq(b,a)
+
+#endif
diff --git a/linux-2.4-xen-sparse/kernel/time.c b/linux-2.4-xen-sparse/kernel/time.c
new file mode 100644
index 0000000000..b4f8b55e8a
--- /dev/null
+++ b/linux-2.4-xen-sparse/kernel/time.c
@@ -0,0 +1,415 @@
+/*
+ *  linux/kernel/time.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  This file contains the interface functions for the various
+ *  time related system calls: time, stime, gettimeofday, settimeofday,
+ *			       adjtime
+ */
+/*
+ * Modification history kernel/time.c
+ * 
+ * 1993-09-02    Philip Gladstone
+ *      Created file with time related functions from sched.c and adjtimex() 
+ * 1993-10-08    Torsten Duwe
+ *      adjtime interface update and CMOS clock write code
+ * 1995-08-13    Torsten Duwe
+ *      kernel PLL updated to 1994-12-13 specs (rfc-1589)
+ * 1999-01-16    Ulrich Windl
+ *	Introduced error checking for many cases in adjtimex().
+ *	Updated NTP code according to technical memorandum Jan '96
+ *	"A Kernel Model for Precision Timekeeping" by Dave Mills
+ *	Allow time_constant larger than MAXTC(6) for NTP v4 (MAXTC == 10)
+ *	(Even though the technical memorandum forbids it)
+ */
+
+#include <linux/mm.h>
+#include <linux/timex.h>
+#include <linux/smp_lock.h>
+
+#include <asm/uaccess.h>
+
+/* 
+ * The timezone where the local system is located.  Used as a default by some
+ * programs who obtain this value by using gettimeofday.
+ */
+struct timezone sys_tz;
+
+/* The xtime_lock is not only serializing the xtime read/writes but it's also
+   serializing all accesses to the global NTP variables now. */
+extern rwlock_t xtime_lock;
+
+#if !defined(__alpha__) && !defined(__ia64__)
+
+/*
+ * sys_time() can be implemented in user-level using
+ * sys_gettimeofday().  Is this for backwards compatibility?  If so,
+ * why not move it into the appropriate arch directory (for those
+ * architectures that need it).
+ *
+ * XXX This function is NOT 64-bit clean!
+ */
+asmlinkage long sys_time(int * tloc)
+{
+	struct timeval now; 
+	int i; 
+
+	do_gettimeofday(&now);
+	i = now.tv_sec;
+	if (tloc) {
+		if (put_user(i,tloc))
+			i = -EFAULT;
+	}
+	return i;
+}
+
+#if !defined(CONFIG_XEN)
+
+/*
+ * sys_stime() can be implemented in user-level using
+ * sys_settimeofday().  Is this for backwards compatibility?  If so,
+ * why not move it into the appropriate arch directory (for those
+ * architectures that need it).
+ */
+ 
+asmlinkage long sys_stime(int * tptr)
+{
+	int value;
+
+	if (!capable(CAP_SYS_TIME))
+		return -EPERM;
+	if (get_user(value, tptr))
+		return -EFAULT;
+	write_lock_irq(&xtime_lock);
+	vxtime_lock();
+	xtime.tv_sec = value;
+	xtime.tv_usec = 0;
+	vxtime_unlock();
+	time_adjust = 0;	/* stop active adjtime() */
+	time_status |= STA_UNSYNC;
+	time_maxerror = NTP_PHASE_LIMIT;
+	time_esterror = NTP_PHASE_LIMIT;
+	write_unlock_irq(&xtime_lock);
+	return 0;
+}
+
+#endif
+
+#endif
+
+asmlinkage long sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+	if (tv) {
+		struct timeval ktv;
+		do_gettimeofday(&ktv);
+		if (copy_to_user(tv, &ktv, sizeof(ktv)))
+			return -EFAULT;
+	}
+	if (tz) {
+		if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
+			return -EFAULT;
+	}
+	return 0;
+}
+
+/*
+ * Adjust the time obtained from the CMOS to be UTC time instead of
+ * local time.
+ * 
+ * This is ugly, but preferable to the alternatives.  Otherwise we
+ * would either need to write a program to do it in /etc/rc (and risk
+ * confusion if the program gets run more than once; it would also be 
+ * hard to make the program warp the clock precisely n hours)  or
+ * compile in the timezone information into the kernel.  Bad, bad....
+ *
+ *              				- TYT, 1992-01-01
+ *
+ * The best thing to do is to keep the CMOS clock in universal time (UTC)
+ * as real UNIX machines always do it. This avoids all headaches about
+ * daylight saving times and warping kernel clocks.
+ */
+inline static void warp_clock(void)
+{
+	write_lock_irq(&xtime_lock);
+	vxtime_lock();
+	xtime.tv_sec += sys_tz.tz_minuteswest * 60;
+	vxtime_unlock();
+	write_unlock_irq(&xtime_lock);
+}
+
+/*
+ * In case for some reason the CMOS clock has not already been running
+ * in UTC, but in some local time: The first time we set the timezone,
+ * we will warp the clock so that it is ticking UTC time instead of
+ * local time. Presumably, if someone is setting the timezone then we
+ * are running in an environment where the programs understand about
+ * timezones. This should be done at boot time in the /etc/rc script,
+ * as soon as possible, so that the clock can be set right. Otherwise,
+ * various programs will get confused when the clock gets warped.
+ */
+
+int do_sys_settimeofday(struct timeval *tv, struct timezone *tz)
+{
+	static int firsttime = 1;
+
+	if (!capable(CAP_SYS_TIME))
+		return -EPERM;
+		
+	if (tz) {
+		/* SMP safe, global irq locking makes it work. */
+		sys_tz = *tz;
+		if (firsttime) {
+			firsttime = 0;
+			if (!tv)
+				warp_clock();
+		}
+	}
+	if (tv)
+	{
+		/* SMP safe, again the code in arch/foo/time.c should
+		 * globally block out interrupts when it runs.
+		 */
+		do_settimeofday(tv);
+	}
+	return 0;
+}
+
+asmlinkage long sys_settimeofday(struct timeval *tv, struct timezone *tz)
+{
+	struct timeval	new_tv;
+	struct timezone new_tz;
+
+	if (tv) {
+		if (copy_from_user(&new_tv, tv, sizeof(*tv)))
+			return -EFAULT;
+	}
+	if (tz) {
+		if (copy_from_user(&new_tz, tz, sizeof(*tz)))
+			return -EFAULT;
+	}
+
+	return do_sys_settimeofday(tv ? &new_tv : NULL, tz ? &new_tz : NULL);
+}
+
+long pps_offset;		/* pps time offset (us) */
+long pps_jitter = MAXTIME;	/* time dispersion (jitter) (us) */
+
+long pps_freq;			/* frequency offset (scaled ppm) */
+long pps_stabil = MAXFREQ;	/* frequency dispersion (scaled ppm) */
+
+long pps_valid = PPS_VALID;	/* pps signal watchdog counter */
+
+int pps_shift = PPS_SHIFT;	/* interval duration (s) (shift) */
+
+long pps_jitcnt;		/* jitter limit exceeded */
+long pps_calcnt;		/* calibration intervals */
+long pps_errcnt;		/* calibration errors */
+long pps_stbcnt;		/* stability limit exceeded */
+
+/* hook for a loadable hardpps kernel module */
+void (*hardpps_ptr)(struct timeval *);
+
+/* adjtimex mainly allows reading (and writing, if superuser) of
+ * kernel time-keeping variables. used by xntpd.
+ */
+int do_adjtimex(struct timex *txc)
+{
+        long ltemp, mtemp, save_adjust;
+	int result;
+
+	/* In order to modify anything, you gotta be super-user! */
+	if (txc->modes && !capable(CAP_SYS_TIME))
+		return -EPERM;
+		
+	/* Now we validate the data before disabling interrupts */
+
+	if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
+	  /* singleshot must not be used with any other mode bits */
+		if (txc->modes != ADJ_OFFSET_SINGLESHOT)
+			return -EINVAL;
+
+	if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET))
+	  /* adjustment Offset limited to +- .512 seconds */
+		if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE )
+			return -EINVAL;	
+
+	/* if the quartz is off by more than 10% something is VERY wrong ! */
+	if (txc->modes & ADJ_TICK)
+		if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ)
+			return -EINVAL;
+
+	write_lock_irq(&xtime_lock);
+	result = time_state;	/* mostly `TIME_OK' */
+
+	/* Save for later - semantics of adjtime is to return old value */
+	save_adjust = time_adjust;
+
+#if 0	/* STA_CLOCKERR is never set yet */
+	time_status &= ~STA_CLOCKERR;		/* reset STA_CLOCKERR */
+#endif
+	/* If there are input parameters, then process them */
+	if (txc->modes)
+	{
+	    if (txc->modes & ADJ_STATUS)	/* only set allowed bits */
+		time_status =  (txc->status & ~STA_RONLY) |
+			      (time_status & STA_RONLY);
+
+	    if (txc->modes & ADJ_FREQUENCY) {	/* p. 22 */
+		if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) {
+		    result = -EINVAL;
+		    goto leave;
+		}
+		time_freq = txc->freq - pps_freq;
+	    }
+
+	    if (txc->modes & ADJ_MAXERROR) {
+		if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) {
+		    result = -EINVAL;
+		    goto leave;
+		}
+		time_maxerror = txc->maxerror;
+	    }
+
+	    if (txc->modes & ADJ_ESTERROR) {
+		if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) {
+		    result = -EINVAL;
+		    goto leave;
+		}
+		time_esterror = txc->esterror;
+	    }
+
+	    if (txc->modes & ADJ_TIMECONST) {	/* p. 24 */
+		if (txc->constant < 0) {	/* NTP v4 uses values > 6 */
+		    result = -EINVAL;
+		    goto leave;
+		}
+		time_constant = txc->constant;
+	    }
+
+	    if (txc->modes & ADJ_OFFSET) {	/* values checked earlier */
+		if (txc->modes == ADJ_OFFSET_SINGLESHOT) {
+		    /* adjtime() is independent from ntp_adjtime() */
+		    time_adjust = txc->offset;
+		}
+		else if ( time_status & (STA_PLL | STA_PPSTIME) ) {
+		    ltemp = (time_status & (STA_PPSTIME | STA_PPSSIGNAL)) ==
+		            (STA_PPSTIME | STA_PPSSIGNAL) ?
+		            pps_offset : txc->offset;
+
+		    /*
+		     * Scale the phase adjustment and
+		     * clamp to the operating range.
+		     */
+		    if (ltemp > MAXPHASE)
+		        time_offset = MAXPHASE << SHIFT_UPDATE;
+		    else if (ltemp < -MAXPHASE)
+			time_offset = -(MAXPHASE << SHIFT_UPDATE);
+		    else
+		        time_offset = ltemp << SHIFT_UPDATE;
+
+		    /*
+		     * Select whether the frequency is to be controlled
+		     * and in which mode (PLL or FLL). Clamp to the operating
+		     * range. Ugly multiply/divide should be replaced someday.
+		     */
+
+		    if (time_status & STA_FREQHOLD || time_reftime == 0)
+		        time_reftime = xtime.tv_sec;
+		    mtemp = xtime.tv_sec - time_reftime;
+		    time_reftime = xtime.tv_sec;
+		    if (time_status & STA_FLL) {
+		        if (mtemp >= MINSEC) {
+			    ltemp = (time_offset / mtemp) << (SHIFT_USEC -
+							      SHIFT_UPDATE);
+			    if (ltemp < 0)
+			        time_freq -= -ltemp >> SHIFT_KH;
+			    else
+			        time_freq += ltemp >> SHIFT_KH;
+			} else /* calibration interval too short (p. 12) */
+				result = TIME_ERROR;
+		    } else {	/* PLL mode */
+		        if (mtemp < MAXSEC) {
+			    ltemp *= mtemp;
+			    if (ltemp < 0)
+			        time_freq -= -ltemp >> (time_constant +
+							time_constant +
+							SHIFT_KF - SHIFT_USEC);
+			    else
+			        time_freq += ltemp >> (time_constant +
+						       time_constant +
+						       SHIFT_KF - SHIFT_USEC);
+			} else /* calibration interval too long (p. 12) */
+				result = TIME_ERROR;
+		    }
+		    if (time_freq > time_tolerance)
+		        time_freq = time_tolerance;
+		    else if (time_freq < -time_tolerance)
+		        time_freq = -time_tolerance;
+		} /* STA_PLL || STA_PPSTIME */
+	    } /* txc->modes & ADJ_OFFSET */
+	    if (txc->modes & ADJ_TICK) {
+		/* if the quartz is off by more than 10% something is
+		   VERY wrong ! */
+		if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ) {
+		    result = -EINVAL;
+		    goto leave;
+		}
+		tick = txc->tick;
+	    }
+	} /* txc->modes */
+leave:	if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0
+	    || ((time_status & (STA_PPSFREQ|STA_PPSTIME)) != 0
+		&& (time_status & STA_PPSSIGNAL) == 0)
+	    /* p. 24, (b) */
+	    || ((time_status & (STA_PPSTIME|STA_PPSJITTER))
+		== (STA_PPSTIME|STA_PPSJITTER))
+	    /* p. 24, (c) */
+	    || ((time_status & STA_PPSFREQ) != 0
+		&& (time_status & (STA_PPSWANDER|STA_PPSERROR)) != 0))
+	    /* p. 24, (d) */
+		result = TIME_ERROR;
+	
+	if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
+	    txc->offset	   = save_adjust;
+	else {
+	    if (time_offset < 0)
+		txc->offset = -(-time_offset >> SHIFT_UPDATE);
+	    else
+		txc->offset = time_offset >> SHIFT_UPDATE;
+	}
+	txc->freq	   = time_freq + pps_freq;
+	txc->maxerror	   = time_maxerror;
+	txc->esterror	   = time_esterror;
+	txc->status	   = time_status;
+	txc->constant	   = time_constant;
+	txc->precision	   = time_precision;
+	txc->tolerance	   = time_tolerance;
+	txc->tick	   = tick;
+	txc->ppsfreq	   = pps_freq;
+	txc->jitter	   = pps_jitter >> PPS_AVG;
+	txc->shift	   = pps_shift;
+	txc->stabil	   = pps_stabil;
+	txc->jitcnt	   = pps_jitcnt;
+	txc->calcnt	   = pps_calcnt;
+	txc->errcnt	   = pps_errcnt;
+	txc->stbcnt	   = pps_stbcnt;
+	write_unlock_irq(&xtime_lock);
+	do_gettimeofday(&txc->time);
+	return(result);
+}
+
+asmlinkage long sys_adjtimex(struct timex *txc_p)
+{
+	struct timex txc;		/* Local copy of parameter */
+	int ret;
+
+	/* Copy the user data space into the kernel copy
+	 * structure. But bear in mind that the structures
+	 * may change
+	 */
+	if(copy_from_user(&txc, txc_p, sizeof(struct timex)))
+		return -EFAULT;
+	ret = do_adjtimex(&txc);
+	return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
+}
diff --git a/linux-2.4-xen-sparse/kernel/timer.c b/linux-2.4-xen-sparse/kernel/timer.c
new file mode 100644
index 0000000000..567794ab26
--- /dev/null
+++ b/linux-2.4-xen-sparse/kernel/timer.c
@@ -0,0 +1,968 @@
+/*
+ *  linux/kernel/timer.c
+ *
+ *  Kernel internal timers, kernel timekeeping, basic process system calls
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
+ *
+ *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
+ *              "A Kernel Model for Precision Timekeeping" by Dave Mills
+ *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
+ *              serialize accesses to xtime/lost_ticks).
+ *                              Copyright (C) 1998  Andrea Arcangeli
+ *  1999-03-10  Improved NTP compatibility by Ulrich Windl
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/timex.h>
+#include <linux/delay.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+#include <asm/uaccess.h>
+
+/*
+ * Timekeeping variables
+ */
+
+long tick = (1000000 + HZ/2) / HZ;	/* timer interrupt period */
+
+/* The current time */
+struct timeval xtime __attribute__ ((aligned (16)));
+
+/* Don't completely fail for HZ > 500.  */
+int tickadj = 500/HZ ? : 1;		/* microsecs */
+
+DECLARE_TASK_QUEUE(tq_timer);
+DECLARE_TASK_QUEUE(tq_immediate);
+
+/*
+ * phase-lock loop variables
+ */
+/* TIME_ERROR prevents overwriting the CMOS clock */
+int time_state = TIME_OK;		/* clock synchronization status	*/
+int time_status = STA_UNSYNC;		/* clock status bits		*/
+long time_offset;			/* time adjustment (us)		*/
+long time_constant = 2;			/* pll time constant		*/
+long time_tolerance = MAXFREQ;		/* frequency tolerance (ppm)	*/
+long time_precision = 1;		/* clock precision (us)		*/
+long time_maxerror = NTP_PHASE_LIMIT;	/* maximum error (us)		*/
+long time_esterror = NTP_PHASE_LIMIT;	/* estimated error (us)		*/
+long time_phase;			/* phase offset (scaled us)	*/
+long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC;
+					/* frequency offset (scaled ppm)*/
+long time_adj;				/* tick adjust (scaled 1 / HZ)	*/
+long time_reftime;			/* time at last adjustment (s)	*/
+
+long time_adjust;
+long time_adjust_step;
+
+unsigned long event;
+
+extern int do_setitimer(int, struct itimerval *, struct itimerval *);
+
+unsigned long volatile jiffies;
+
+unsigned int * prof_buffer;
+unsigned long prof_len;
+unsigned long prof_shift;
+
+/*
+ * Event timer code
+ */
+#define TVN_BITS 6
+#define TVR_BITS 8
+#define TVN_SIZE (1 << TVN_BITS)
+#define TVR_SIZE (1 << TVR_BITS)
+#define TVN_MASK (TVN_SIZE - 1)
+#define TVR_MASK (TVR_SIZE - 1)
+
+struct timer_vec {
+	int index;
+	struct list_head vec[TVN_SIZE];
+};
+
+struct timer_vec_root {
+	int index;
+	struct list_head vec[TVR_SIZE];
+};
+
+static struct timer_vec tv5;
+static struct timer_vec tv4;
+static struct timer_vec tv3;
+static struct timer_vec tv2;
+static struct timer_vec_root tv1;
+
+static struct timer_vec * const tvecs[] = {
+	(struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
+};
+
+static struct list_head * run_timer_list_running;
+
+#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
+
+void init_timervecs (void)
+{
+	int i;
+
+	for (i = 0; i < TVN_SIZE; i++) {
+		INIT_LIST_HEAD(tv5.vec + i);
+		INIT_LIST_HEAD(tv4.vec + i);
+		INIT_LIST_HEAD(tv3.vec + i);
+		INIT_LIST_HEAD(tv2.vec + i);
+	}
+	for (i = 0; i < TVR_SIZE; i++)
+		INIT_LIST_HEAD(tv1.vec + i);
+}
+
+static unsigned long timer_jiffies;
+
+static inline void internal_add_timer(struct timer_list *timer)
+{
+	/*
+	 * must be cli-ed when calling this
+	 */
+	unsigned long expires = timer->expires;
+	unsigned long idx = expires - timer_jiffies;
+	struct list_head * vec;
+
+	if (run_timer_list_running)
+		vec = run_timer_list_running;
+	else if (idx < TVR_SIZE) {
+		int i = expires & TVR_MASK;
+		vec = tv1.vec + i;
+	} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
+		int i = (expires >> TVR_BITS) & TVN_MASK;
+		vec = tv2.vec + i;
+	} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
+		int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
+		vec =  tv3.vec + i;
+	} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
+		int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
+		vec = tv4.vec + i;
+	} else if ((signed long) idx < 0) {
+		/* can happen if you add a timer with expires == jiffies,
+		 * or you set a timer to go off in the past
+		 */
+		vec = tv1.vec + tv1.index;
+	} else if (idx <= 0xffffffffUL) {
+		int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
+		vec = tv5.vec + i;
+	} else {
+		/* Can only get here on architectures with 64-bit jiffies */
+		INIT_LIST_HEAD(&timer->list);
+		return;
+	}
+	/*
+	 * Timers are FIFO!
+	 */
+	list_add(&timer->list, vec->prev);
+}
+
+/* Initialize both explicitly - let's try to have them in the same cache line */
+spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
+
+#ifdef CONFIG_SMP
+volatile struct timer_list * volatile running_timer;
+#define timer_enter(t) do { running_timer = t; mb(); } while (0)
+#define timer_exit() do { running_timer = NULL; } while (0)
+#define timer_is_running(t) (running_timer == t)
+#define timer_synchronize(t) while (timer_is_running(t)) barrier()
+#else
+#define timer_enter(t)		do { } while (0)
+#define timer_exit()		do { } while (0)
+#endif
+
+void add_timer(struct timer_list *timer)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&timerlist_lock, flags);
+	if (timer_pending(timer))
+		goto bug;
+	internal_add_timer(timer);
+	spin_unlock_irqrestore(&timerlist_lock, flags);
+	return;
+bug:
+	spin_unlock_irqrestore(&timerlist_lock, flags);
+	printk("bug: kernel timer added twice at %p.\n",
+			__builtin_return_address(0));
+}
+
+static inline int detach_timer (struct timer_list *timer)
+{
+	if (!timer_pending(timer))
+		return 0;
+	list_del(&timer->list);
+	return 1;
+}
+
+int mod_timer(struct timer_list *timer, unsigned long expires)
+{
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&timerlist_lock, flags);
+	timer->expires = expires;
+	ret = detach_timer(timer);
+	internal_add_timer(timer);
+	spin_unlock_irqrestore(&timerlist_lock, flags);
+	return ret;
+}
+
+int del_timer(struct timer_list * timer)
+{
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&timerlist_lock, flags);
+	ret = detach_timer(timer);
+	timer->list.next = timer->list.prev = NULL;
+	spin_unlock_irqrestore(&timerlist_lock, flags);
+	return ret;
+}
+
+#ifdef CONFIG_SMP
+void sync_timers(void)
+{
+	spin_unlock_wait(&global_bh_lock);
+}
+
+/*
+ * SMP specific function to delete periodic timer.
+ * Caller must disable by some means restarting the timer
+ * for new. Upon exit the timer is not queued and handler is not running
+ * on any CPU. It returns number of times, which timer was deleted
+ * (for reference counting).
+ */
+
+int del_timer_sync(struct timer_list * timer)
+{
+	int ret = 0;
+
+	for (;;) {
+		unsigned long flags;
+		int running;
+
+		spin_lock_irqsave(&timerlist_lock, flags);
+		ret += detach_timer(timer);
+		timer->list.next = timer->list.prev = 0;
+		running = timer_is_running(timer);
+		spin_unlock_irqrestore(&timerlist_lock, flags);
+
+		if (!running)
+			break;
+
+		timer_synchronize(timer);
+	}
+
+	return ret;
+}
+#endif
+
+
+static inline void cascade_timers(struct timer_vec *tv)
+{
+	/* cascade all the timers from tv up one level */
+	struct list_head *head, *curr, *next;
+
+	head = tv->vec + tv->index;
+	curr = head->next;
+	/*
+	 * We are removing _all_ timers from the list, so we don't  have to
+	 * detach them individually, just clear the list afterwards.
+	 */
+	while (curr != head) {
+		struct timer_list *tmp;
+
+		tmp = list_entry(curr, struct timer_list, list);
+		next = curr->next;
+		list_del(curr); // not needed
+		internal_add_timer(tmp);
+		curr = next;
+	}
+	INIT_LIST_HEAD(head);
+	tv->index = (tv->index + 1) & TVN_MASK;
+}
+
+static inline void run_timer_list(void)
+{
+	spin_lock_irq(&timerlist_lock);
+	while ((long)(jiffies - timer_jiffies) >= 0) {
+		LIST_HEAD(queued);
+		struct list_head *head, *curr;
+		if (!tv1.index) {
+			int n = 1;
+			do {
+				cascade_timers(tvecs[n]);
+			} while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
+		}
+		run_timer_list_running = &queued;
+repeat:
+		head = tv1.vec + tv1.index;
+		curr = head->next;
+		if (curr != head) {
+			struct timer_list *timer;
+			void (*fn)(unsigned long);
+			unsigned long data;
+
+			timer = list_entry(curr, struct timer_list, list);
+ 			fn = timer->function;
+ 			data= timer->data;
+
+			detach_timer(timer);
+			timer->list.next = timer->list.prev = NULL;
+			timer_enter(timer);
+			spin_unlock_irq(&timerlist_lock);
+			fn(data);
+			spin_lock_irq(&timerlist_lock);
+			timer_exit();
+			goto repeat;
+		}
+		run_timer_list_running = NULL;
+		++timer_jiffies; 
+		tv1.index = (tv1.index + 1) & TVR_MASK;
+
+		curr = queued.next;
+		while (curr != &queued) {
+			struct timer_list *timer;
+
+			timer = list_entry(curr, struct timer_list, list);
+			curr = curr->next;
+			internal_add_timer(timer);
+		}			
+	}
+	spin_unlock_irq(&timerlist_lock);
+}
+
+#ifdef CONFIG_NO_IDLE_HZ
+/*
+ * Find out when the next timer event is due to happen. This
+ * is used on S/390 to stop all activity when all cpus are idle.
+ * And in XenoLinux to achieve the same.
+ * The timerlist_lock must be acquired before calling this function.
+ */
+struct timer_list *next_timer_event(void)
+{
+	struct timer_list *nte, *tmp;
+	struct list_head *lst;
+	int i, j;
+
+	/* Look for the next timer event in tv1. */
+	i = 0;
+	j = tvecs[0]->index;
+	do {
+		struct list_head *head = tvecs[0]->vec + j;
+		if (!list_empty(head)) {
+			nte = list_entry(head->next, struct timer_list, list);
+			goto found;
+		}
+		j = (j + 1) & TVR_MASK;
+	} while (j != tv1.index);
+
+	/* No event found in tv1. Check tv2-tv5. */
+	for (i = 1; i < NOOF_TVECS; i++) {
+		j = tvecs[i]->index;
+		do {
+			nte = NULL;
+			list_for_each(lst, tvecs[i]->vec + j) {
+				tmp = list_entry(lst, struct timer_list, list);
+				if (nte == NULL ||
+				    time_before(tmp->expires, nte->expires))
+					nte = tmp;
+			}
+			if (nte)
+				goto found;
+			j = (j + 1) & TVN_MASK;
+		} while (j != tvecs[i]->index);
+	}
+	return NULL;
+found:
+	/* Found timer event in tvecs[i]->vec[j] */
+	if (j < tvecs[i]->index && i < NOOF_TVECS-1) {
+		/* 
+		 * The search wrapped. We need to look at the next list
+		 * from tvecs[i+1] that would cascade into tvecs[i].
+		 */
+		list_for_each(lst, tvecs[i+1]->vec+tvecs[i+1]->index) {
+			tmp = list_entry(lst, struct timer_list, list);
+			if (time_before(tmp->expires, nte->expires))
+				nte = tmp;
+		}
+	}
+	return nte;
+}
+#endif
+
+spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED;
+
+void tqueue_bh(void)
+{
+	run_task_queue(&tq_timer);
+}
+
+void immediate_bh(void)
+{
+	run_task_queue(&tq_immediate);
+}
+
+/*
+ * this routine handles the overflow of the microsecond field
+ *
+ * The tricky bits of code to handle the accurate clock support
+ * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
+ * They were originally developed for SUN and DEC kernels.
+ * All the kudos should go to Dave for this stuff.
+ *
+ */
+static void second_overflow(void)
+{
+    long ltemp;
+
+    /* Bump the maxerror field */
+    time_maxerror += time_tolerance >> SHIFT_USEC;
+    if ( time_maxerror > NTP_PHASE_LIMIT ) {
+	time_maxerror = NTP_PHASE_LIMIT;
+	time_status |= STA_UNSYNC;
+    }
+
+    /*
+     * Leap second processing. If in leap-insert state at
+     * the end of the day, the system clock is set back one
+     * second; if in leap-delete state, the system clock is
+     * set ahead one second. The microtime() routine or
+     * external clock driver will insure that reported time
+     * is always monotonic. The ugly divides should be
+     * replaced.
+     */
+    switch (time_state) {
+
+    case TIME_OK:
+	if (time_status & STA_INS)
+	    time_state = TIME_INS;
+	else if (time_status & STA_DEL)
+	    time_state = TIME_DEL;
+	break;
+
+    case TIME_INS:
+	if (xtime.tv_sec % 86400 == 0) {
+	    xtime.tv_sec--;
+	    time_state = TIME_OOP;
+	    printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
+	}
+	break;
+
+    case TIME_DEL:
+	if ((xtime.tv_sec + 1) % 86400 == 0) {
+	    xtime.tv_sec++;
+	    time_state = TIME_WAIT;
+	    printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
+	}
+	break;
+
+    case TIME_OOP:
+	time_state = TIME_WAIT;
+	break;
+
+    case TIME_WAIT:
+	if (!(time_status & (STA_INS | STA_DEL)))
+	    time_state = TIME_OK;
+    }
+
+    /*
+     * Compute the phase adjustment for the next second. In
+     * PLL mode, the offset is reduced by a fixed factor
+     * times the time constant. In FLL mode the offset is
+     * used directly. In either mode, the maximum phase
+     * adjustment for each second is clamped so as to spread
+     * the adjustment over not more than the number of
+     * seconds between updates.
+     */
+    if (time_offset < 0) {
+	ltemp = -time_offset;
+	if (!(time_status & STA_FLL))
+	    ltemp >>= SHIFT_KG + time_constant;
+	if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
+	    ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
+	time_offset += ltemp;
+	time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+    } else {
+	ltemp = time_offset;
+	if (!(time_status & STA_FLL))
+	    ltemp >>= SHIFT_KG + time_constant;
+	if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
+	    ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
+	time_offset -= ltemp;
+	time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+    }
+
+    /*
+     * Compute the frequency estimate and additional phase
+     * adjustment due to frequency error for the next
+     * second. When the PPS signal is engaged, gnaw on the
+     * watchdog counter and update the frequency computed by
+     * the pll and the PPS signal.
+     */
+    pps_valid++;
+    if (pps_valid == PPS_VALID) {	/* PPS signal lost */
+	pps_jitter = MAXTIME;
+	pps_stabil = MAXFREQ;
+	time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
+			 STA_PPSWANDER | STA_PPSERROR);
+    }
+    ltemp = time_freq + pps_freq;
+    if (ltemp < 0)
+	time_adj -= -ltemp >>
+	    (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
+    else
+	time_adj += ltemp >>
+	    (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
+
+#if HZ == 100
+    /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
+     * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
+     */
+    if (time_adj < 0)
+	time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
+    else
+	time_adj += (time_adj >> 2) + (time_adj >> 5);
+#endif
+}
+
+/* in the NTP reference this is called "hardclock()" */
+static void update_wall_time_one_tick(void)
+{
+	if ( (time_adjust_step = time_adjust) != 0 ) {
+	    /* We are doing an adjtime thing. 
+	     *
+	     * Prepare time_adjust_step to be within bounds.
+	     * Note that a positive time_adjust means we want the clock
+	     * to run faster.
+	     *
+	     * Limit the amount of the step to be in the range
+	     * -tickadj .. +tickadj
+	     */
+	     if (time_adjust > tickadj)
+		time_adjust_step = tickadj;
+	     else if (time_adjust < -tickadj)
+		time_adjust_step = -tickadj;
+	     
+	    /* Reduce by this step the amount of time left  */
+	    time_adjust -= time_adjust_step;
+	}
+	xtime.tv_usec += tick + time_adjust_step;
+	/*
+	 * Advance the phase, once it gets to one microsecond, then
+	 * advance the tick more.
+	 */
+	time_phase += time_adj;
+	if (time_phase <= -FINEUSEC) {
+		long ltemp = -time_phase >> SHIFT_SCALE;
+		time_phase += ltemp << SHIFT_SCALE;
+		xtime.tv_usec -= ltemp;
+	}
+	else if (time_phase >= FINEUSEC) {
+		long ltemp = time_phase >> SHIFT_SCALE;
+		time_phase -= ltemp << SHIFT_SCALE;
+		xtime.tv_usec += ltemp;
+	}
+}
+
+/*
+ * Using a loop looks inefficient, but "ticks" is
+ * usually just one (we shouldn't be losing ticks,
+ * we're doing this this way mainly for interrupt
+ * latency reasons, not because we think we'll
+ * have lots of lost timer ticks
+ */
+static void update_wall_time(unsigned long ticks)
+{
+	do {
+		ticks--;
+		update_wall_time_one_tick();
+	} while (ticks);
+
+	while (xtime.tv_usec >= 1000000) {
+	    xtime.tv_usec -= 1000000;
+	    xtime.tv_sec++;
+	    second_overflow();
+	}
+}
+
+static inline void do_process_times(struct task_struct *p,
+	unsigned long user, unsigned long system)
+{
+	unsigned long psecs;
+
+	psecs = (p->times.tms_utime += user);
+	psecs += (p->times.tms_stime += system);
+	if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_cur) {
+		/* Send SIGXCPU every second.. */
+		if (!(psecs % HZ))
+			send_sig(SIGXCPU, p, 1);
+		/* and SIGKILL when we go over max.. */
+		if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_max)
+			send_sig(SIGKILL, p, 1);
+	}
+}
+
+static inline void do_it_virt(struct task_struct * p, unsigned long ticks)
+{
+	unsigned long it_virt = p->it_virt_value;
+
+	if (it_virt) {
+		it_virt -= ticks;
+		if (!it_virt) {
+			it_virt = p->it_virt_incr;
+			send_sig(SIGVTALRM, p, 1);
+		}
+		p->it_virt_value = it_virt;
+	}
+}
+
+static inline void do_it_prof(struct task_struct *p)
+{
+	unsigned long it_prof = p->it_prof_value;
+
+	if (it_prof) {
+		if (--it_prof == 0) {
+			it_prof = p->it_prof_incr;
+			send_sig(SIGPROF, p, 1);
+		}
+		p->it_prof_value = it_prof;
+	}
+}
+
+void update_one_process(struct task_struct *p, unsigned long user,
+			unsigned long system, int cpu)
+{
+	p->per_cpu_utime[cpu] += user;
+	p->per_cpu_stime[cpu] += system;
+	do_process_times(p, user, system);
+	do_it_virt(p, user);
+	do_it_prof(p);
+}	
+
+/*
+ * Called from the timer interrupt handler to charge one tick to the current 
+ * process.  user_tick is 1 if the tick is user time, 0 for system.
+ */
+void update_process_times(int user_tick)
+{
+	struct task_struct *p = current;
+	int cpu = smp_processor_id(), system = user_tick ^ 1;
+
+	update_one_process(p, user_tick, system, cpu);
+	if (p->pid) {
+		if (--p->counter <= 0) {
+			p->counter = 0;
+			/*
+			 * SCHED_FIFO is priority preemption, so this is 
+			 * not the place to decide whether to reschedule a
+			 * SCHED_FIFO task or not - Bhavesh Davda
+			 */
+			if (p->policy != SCHED_FIFO) {
+				p->need_resched = 1;
+			}
+		}
+		if (p->nice > 0)
+			kstat.per_cpu_nice[cpu] += user_tick;
+		else
+			kstat.per_cpu_user[cpu] += user_tick;
+		kstat.per_cpu_system[cpu] += system;
+	} else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
+		kstat.per_cpu_system[cpu] += system;
+}
+
+/*
+ * Called from the timer interrupt handler to charge a couple of ticks
+ * to the current process.
+ */
+void update_process_times_us(int user_ticks, int system_ticks)
+{
+	struct task_struct *p = current;
+	int cpu = smp_processor_id();
+
+	update_one_process(p, user_ticks, system_ticks, cpu);
+	if (p->pid) {
+		p->counter -= user_ticks + system_ticks;
+		if (p->counter <= 0) {
+			p->counter = 0;
+			p->need_resched = 1;
+		}
+		if (p->nice > 0)
+			kstat.per_cpu_nice[cpu] += user_ticks;
+		else
+			kstat.per_cpu_user[cpu] += user_ticks;
+		kstat.per_cpu_system[cpu] += system_ticks;
+	} else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
+		kstat.per_cpu_system[cpu] += system_ticks;
+}
+
+/*
+ * Nr of active tasks - counted in fixed-point numbers
+ */
+static unsigned long count_active_tasks(void)
+{
+	struct task_struct *p;
+	unsigned long nr = 0;
+
+	read_lock(&tasklist_lock);
+	for_each_task(p) {
+		if ((p->state == TASK_RUNNING ||
+		     (p->state & TASK_UNINTERRUPTIBLE)))
+			nr += FIXED_1;
+	}
+	read_unlock(&tasklist_lock);
+	return nr;
+}
+
+/*
+ * Hmm.. Changed this, as the GNU make sources (load.c) seems to
+ * imply that avenrun[] is the standard name for this kind of thing.
+ * Nothing else seems to be standardized: the fractional size etc
+ * all seem to differ on different machines.
+ */
+unsigned long avenrun[3];
+
+static inline void calc_load(unsigned long ticks)
+{
+	unsigned long active_tasks; /* fixed-point */
+	static int count = LOAD_FREQ;
+
+	count -= ticks;
+	while (count < 0) {
+		count += LOAD_FREQ;
+		active_tasks = count_active_tasks();
+		CALC_LOAD(avenrun[0], EXP_1, active_tasks);
+		CALC_LOAD(avenrun[1], EXP_5, active_tasks);
+		CALC_LOAD(avenrun[2], EXP_15, active_tasks);
+	}
+}
+
+/* jiffies at the most recent update of wall time */
+unsigned long wall_jiffies;
+
+/*
+ * This spinlock protect us from races in SMP while playing with xtime. -arca
+ */
+rwlock_t xtime_lock = RW_LOCK_UNLOCKED;
+
+static inline void update_times(void)
+{
+	unsigned long ticks;
+
+	/*
+	 * update_times() is run from the raw timer_bh handler so we
+	 * just know that the irqs are locally enabled and so we don't
+	 * need to save/restore the flags of the local CPU here. -arca
+	 */
+	write_lock_irq(&xtime_lock);
+	vxtime_lock();
+
+	ticks = jiffies - wall_jiffies;
+	if (ticks) {
+		wall_jiffies += ticks;
+		update_wall_time(ticks);
+	}
+	vxtime_unlock();
+	write_unlock_irq(&xtime_lock);
+	calc_load(ticks);
+}
+
+void timer_bh(void)
+{
+	update_times();
+	run_timer_list();
+}
+
+void do_timer(struct pt_regs *regs)
+{
+	(*(unsigned long *)&jiffies)++;
+#ifndef CONFIG_SMP
+	/* SMP process accounting uses the local APIC timer */
+
+	update_process_times(user_mode(regs));
+#endif
+	mark_bh(TIMER_BH);
+	if (TQ_ACTIVE(tq_timer))
+		mark_bh(TQUEUE_BH);
+}
+
+void do_timer_ticks(int ticks)
+{
+	(*(unsigned long *)&jiffies) += ticks;
+	mark_bh(TIMER_BH);
+	if (TQ_ACTIVE(tq_timer))
+		mark_bh(TQUEUE_BH);
+}
+
+#if !defined(__alpha__) && !defined(__ia64__)
+
+/*
+ * For backwards compatibility?  This can be done in libc so Alpha
+ * and all newer ports shouldn't need it.
+ */
+asmlinkage unsigned long sys_alarm(unsigned int seconds)
+{
+	struct itimerval it_new, it_old;
+	unsigned int oldalarm;
+
+	it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
+	it_new.it_value.tv_sec = seconds;
+	it_new.it_value.tv_usec = 0;
+	do_setitimer(ITIMER_REAL, &it_new, &it_old);
+	oldalarm = it_old.it_value.tv_sec;
+	/* ehhh.. We can't return 0 if we have an alarm pending.. */
+	/* And we'd better return too much than too little anyway */
+	if (it_old.it_value.tv_usec)
+		oldalarm++;
+	return oldalarm;
+}
+
+#endif
+
+#ifndef __alpha__
+
+/*
+ * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
+ * should be moved into arch/i386 instead?
+ */
+
+/**
+ * sys_getpid - return the thread group id of the current process
+ *
+ * Note, despite the name, this returns the tgid not the pid.  The tgid and
+ * the pid are identical unless CLONE_THREAD was specified on clone() in
+ * which case the tgid is the same in all threads of the same group.
+ *
+ * This is SMP safe as current->tgid does not change.
+ */
+asmlinkage long sys_getpid(void)
+{
+	return current->tgid;
+}
+
+/*
+ * This is not strictly SMP safe: p_opptr could change
+ * from under us. However, rather than getting any lock
+ * we can use an optimistic algorithm: get the parent
+ * pid, and go back and check that the parent is still
+ * the same. If it has changed (which is extremely unlikely
+ * indeed), we just try again..
+ *
+ * NOTE! This depends on the fact that even if we _do_
+ * get an old value of "parent", we can happily dereference
+ * the pointer: we just can't necessarily trust the result
+ * until we know that the parent pointer is valid.
+ *
+ * The "mb()" macro is a memory barrier - a synchronizing
+ * event. It also makes sure that gcc doesn't optimize
+ * away the necessary memory references.. The barrier doesn't
+ * have to have all that strong semantics: on x86 we don't
+ * really require a synchronizing instruction, for example.
+ * The barrier is more important for code generation than
+ * for any real memory ordering semantics (even if there is
+ * a small window for a race, using the old pointer is
+ * harmless for a while).
+ */
+asmlinkage long sys_getppid(void)
+{
+	int pid;
+	struct task_struct * me = current;
+	struct task_struct * parent;
+
+	parent = me->p_opptr;
+	for (;;) {
+		pid = parent->pid;
+#if CONFIG_SMP
+{
+		struct task_struct *old = parent;
+		mb();
+		parent = me->p_opptr;
+		if (old != parent)
+			continue;
+}
+#endif
+		break;
+	}
+	return pid;
+}
+
+asmlinkage long sys_getuid(void)
+{
+	/* Only we change this so SMP safe */
+	return current->uid;
+}
+
+asmlinkage long sys_geteuid(void)
+{
+	/* Only we change this so SMP safe */
+	return current->euid;
+}
+
+asmlinkage long sys_getgid(void)
+{
+	/* Only we change this so SMP safe */
+	return current->gid;
+}
+
+asmlinkage long sys_getegid(void)
+{
+	/* Only we change this so SMP safe */
+	return  current->egid;
+}
+
+#endif
+
+/* Thread ID - the internal kernel "pid" */
+asmlinkage long sys_gettid(void)
+{
+	return current->pid;
+}
+
+asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
+{
+	struct timespec t;
+	unsigned long expire;
+
+	if(copy_from_user(&t, rqtp, sizeof(struct timespec)))
+		return -EFAULT;
+
+	if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
+		return -EINVAL;
+
+
+	if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
+	    current->policy != SCHED_OTHER)
+	{
+		/*
+		 * Short delay requests up to 2 ms will be handled with
+		 * high precision by a busy wait for all real-time processes.
+		 *
+		 * Its important on SMP not to do this holding locks.
+		 */
+		udelay((t.tv_nsec + 999) / 1000);
+		return 0;
+	}
+
+	expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
+
+	current->state = TASK_INTERRUPTIBLE;
+	expire = schedule_timeout(expire);
+
+	if (expire) {
+		if (rmtp) {
+			jiffies_to_timespec(expire, &t);
+			if (copy_to_user(rmtp, &t, sizeof(struct timespec)))
+				return -EFAULT;
+		}
+		return -EINTR;
+	}
+	return 0;
+}
+
diff --git a/linux-2.4-xen-sparse/mkbuildtree b/linux-2.4-xen-sparse/mkbuildtree
new file mode 100755
index 0000000000..714d85e69f
--- /dev/null
+++ b/linux-2.4-xen-sparse/mkbuildtree
@@ -0,0 +1,292 @@
+#!/bin/bash
+
+# mkbuildtree <build tree>
+#
+# Creates symbolic links in <build tree> for the sparse tree
+# in the current directory.
+
+# Script to determine the relative path between two directories.
+# Copyright (c) D. J. Hawkey Jr. 2002
+# Fixed for Xen project by K. Fraser in 2003.  
+abs_to_rel ()
+{
+	local CWD SRCPATH
+                
+	if [ "$1" != "/" -a "${1##*[^/]}" = "/" ]; then
+		SRCPATH=${1%?}
+	else
+		SRCPATH=$1
+	fi
+	if [ "$2" != "/" -a "${2##*[^/]}" = "/" ]; then
+		DESTPATH=${2%?}
+	else
+		DESTPATH=$2
+	fi
+
+	CWD=$PWD
+	[ "${1%%[^/]*}" != "/" ] && cd $1 && SRCPATH=$PWD
+	[ "${2%%[^/]*}" != "/" ] && cd $2 && DESTPATH=$PWD
+	[ "$CWD" != "$PWD" ] && cd $CWD
+
+	BASEPATH=$SRCPATH
+
+	[ "$SRCPATH" = "$DESTPATH" ] && DESTPATH="." && return
+	[ "$SRCPATH" = "/" ] && DESTPATH=${DESTPATH#?} && return
+
+	while [ "$BASEPATH/" != "${DESTPATH%${DESTPATH#$BASEPATH/}}" ]; do
+          BASEPATH=${BASEPATH%/*}
+	done
+
+	SRCPATH=${SRCPATH#$BASEPATH}
+        DESTPATH=${DESTPATH#$BASEPATH}
+        DESTPATH=${DESTPATH#?}
+	while [ -n "$SRCPATH" ]; do
+		SRCPATH=${SRCPATH%/*}
+		DESTPATH="../$DESTPATH"
+	done
+
+	[ -z "$BASEPATH" ] && BASEPATH="/"
+	[ "${DESTPATH##*[^/]}" = "/" ] && DESTPATH=${DESTPATH%?}
+}
+
+# relative_lndir <target_dir>
+# Creates a tree of symlinks in the current working directory that mirror
+# real files in <target_dir>. <target_dir> should be relative to the current
+# working directory. Symlinks in <target_dir> are ignored. Source-control files
+# are ignored.
+relative_lndir ()
+{
+  local SYMLINK_DIR REAL_DIR pref i j
+  SYMLINK_DIR=$PWD
+  REAL_DIR=$1
+  (
+  cd $REAL_DIR
+  for i in `find . -type d | grep -v SCCS`; do
+    [ -d $SYMLINK_DIR/$i ] || mkdir -p $SYMLINK_DIR/$i
+    (
+    cd $i
+    pref=`echo $i | sed -e 's#/[^/]*#../#g' -e 's#^\.##'`
+    for j in `find . -maxdepth 1 -type f -o -type l`; do
+      ln -sf ${pref}${REAL_DIR}/$i/$j ${SYMLINK_DIR}/$i/$j
+    done
+    )
+  done
+  )
+}
+
+[ "$1" == "" ] && { echo "Syntax: $0 <linux tree to xenify>"; exit 1; }
+
+# Get absolute path to the destination directory
+pushd . >/dev/null
+cd ${1} || { echo "cannot cd to ${1}"; exit 1; }
+AD=$PWD
+popd >/dev/null
+  
+# Get absolute path to the source directory
+AS=`pwd`
+
+# Get path to source, relative to destination
+abs_to_rel ${AD} ${AS}
+RS=$DESTPATH
+
+# Remove old copies of files and directories at the destination
+for i in `find . -type f -o -type l` ; do rm -f ${AD}/${i#./} ; done
+
+# We now work from the destination directory
+cd ${AD} || { echo "cannot cd to ${AD}"; exit 1; }
+
+# Remove old symlinks
+for i in `find . -type l`; do rm -f $i; done
+
+# Create symlinks of files and directories which exist in the sparse source
+relative_lndir ${RS}
+rm -f mkbuildtree
+
+set ${RS}/../linux-2.6.*-xen-sparse
+[ "$1" == "${RS}/../linux-2.6.*-xen-parse" ] && { echo "no Linux 2.6 sparse tree at ${RS}/../linux-2.6.*-xen-sparse"; exit 1; }
+LINUX_26="$1"
+
+
+# Create links to the shared definitions of the Xen interfaces.
+rm -rf ${AD}/include/asm-xen/xen-public
+mkdir  ${AD}/include/asm-xen/xen-public
+cd     ${AD}/include/asm-xen/xen-public
+relative_lndir ../../../${RS}/../xen/include/public
+
+# Create a link to the shared definitions for the control interface
+cd ${AD}/include/asm-xen
+
+## Symlinks for files:
+## - which are identical in the i386 and xen-i386 architecture-dependent
+##   subdirectories.
+## - which are identical in the Linux 2.6 and Linux 2.4 ports.
+
+cd ${AD}/include/asm-xen
+ln -sf ../asm-i386/a.out.h 
+ln -sf ../asm-i386/apicdef.h 
+ln -sf ../asm-i386/apic.h 
+ln -sf ../asm-i386/atomic.h 
+ln -sf ../asm-i386/bitops.h 
+ln -sf ../asm-i386/boot.h 
+ln -sf ../asm-i386/byteorder.h 
+ln -sf ../asm-i386/cache.h 
+ln -sf ../asm-i386/checksum.h 
+ln -sf ../asm-i386/cpufeature.h 
+ln -sf ../asm-i386/current.h 
+ln -sf ../asm-i386/debugreg.h 
+ln -sf ../asm-i386/delay.h 
+ln -sf ../asm-i386/div64.h 
+ln -sf ../asm-i386/dma.h 
+ln -sf ../asm-i386/elf.h 
+ln -sf ../asm-i386/errno.h 
+ln -sf ../asm-i386/fcntl.h 
+ln -sf ../asm-i386/floppy.h 
+ln -sf ../asm-i386/hardirq.h 
+ln -sf ../asm-i386/hdreg.h 
+ln -sf ../asm-i386/i387.h 
+ln -sf ../asm-i386/ide.h 
+ln -sf ../asm-i386/init.h
+ln -sf ../asm-i386/io_apic.h
+ln -sf ../asm-i386/ioctl.h
+ln -sf ../asm-i386/ioctls.h
+ln -sf ../asm-i386/ipcbuf.h
+ln -sf ../asm-i386/ipc.h
+ln -sf ../asm-i386/kmap_types.h
+ln -sf ../asm-i386/ldt.h 
+ln -sf ../asm-i386/linux_logo.h
+ln -sf ../asm-i386/locks.h 
+ln -sf ../asm-i386/math_emu.h
+ln -sf ../asm-i386/mc146818rtc.h
+ln -sf ../asm-i386/mca_dma.h 
+ln -sf ../asm-i386/mman.h 
+ln -sf ../asm-i386/mmu.h 
+ln -sf ../asm-i386/mmx.h 
+ln -sf ../asm-i386/mpspec.h 
+ln -sf ../asm-i386/msgbuf.h 
+ln -sf ../asm-i386/msr.h 
+ln -sf ../asm-i386/mtrr.h 
+ln -sf ../asm-i386/namei.h 
+ln -sf ../asm-i386/param.h 
+ln -sf ../asm-i386/parport.h 
+ln -sf ../asm-i386/pgtable-3level.h 
+ln -sf ../asm-i386/poll.h 
+ln -sf ../asm-i386/posix_types.h 
+ln -sf ../asm-i386/ptrace.h 
+ln -sf ../asm-i386/resource.h 
+ln -sf ../asm-i386/rwlock.h 
+ln -sf ../asm-i386/rwsem.h 
+ln -sf ../asm-i386/scatterlist.h
+ln -sf ../asm-i386/semaphore.h 
+ln -sf ../asm-i386/sembuf.h 
+ln -sf ../asm-i386/serial.h 
+ln -sf ../asm-i386/setup.h 
+ln -sf ../asm-i386/shmbuf.h 
+ln -sf ../asm-i386/shmparam.h 
+ln -sf ../asm-i386/sigcontext.h 
+ln -sf ../asm-i386/siginfo.h 
+ln -sf ../asm-i386/signal.h 
+ln -sf ../asm-i386/smplock.h 
+ln -sf ../asm-i386/socket.h 
+ln -sf ../asm-i386/sockios.h 
+ln -sf ../asm-i386/softirq.h 
+ln -sf ../asm-i386/spinlock.h 
+ln -sf ../asm-i386/statfs.h 
+ln -sf ../asm-i386/stat.h 
+ln -sf ../asm-i386/string-486.h 
+ln -sf ../asm-i386/string.h 
+ln -sf ../asm-i386/termbits.h 
+ln -sf ../asm-i386/termios.h 
+ln -sf ../asm-i386/timex.h 
+ln -sf ../asm-i386/tlb.h 
+ln -sf ../asm-i386/types.h 
+ln -sf ../asm-i386/uaccess.h 
+ln -sf ../asm-i386/ucontext.h 
+ln -sf ../asm-i386/unaligned.h
+ln -sf ../asm-i386/unistd.h 
+ln -sf ../asm-i386/user.h 
+ln -sf ../asm-i386/vm86.h 
+ln -sf ../../${LINUX_26}/include/asm-xen/balloon.h
+ln -sf ../../${LINUX_26}/include/asm-xen/ctrl_if.h
+ln -sf ../../${LINUX_26}/include/asm-xen/evtchn.h
+ln -sf ../../${LINUX_26}/include/asm-xen/gnttab.h
+ln -sf ../../${LINUX_26}/include/asm-xen/hypervisor.h
+ln -sf ../../${LINUX_26}/include/asm-xen/xen_proc.h
+ln -sf ../../${LINUX_26}/include/asm-xen/asm-i386/synch_bitops.h
+ln -sf ../../${LINUX_26}/include/asm-xen/asm-i386/hypercall.h
+
+mkdir -p linux-public && cd linux-public
+ln -sf ../../../${LINUX_26}/include/asm-xen/linux-public/privcmd.h
+ln -sf ../../../${LINUX_26}/include/asm-xen/linux-public/suspend.h
+
+cd ${AD}/arch/xen/kernel
+ln -sf ../../i386/kernel/i387.c
+ln -sf ../../i386/kernel/init_task.c
+ln -sf ../../i386/kernel/pci-i386.c
+ln -sf ../../i386/kernel/pci-i386.h
+ln -sf ../../i386/kernel/ptrace.c
+ln -sf ../../i386/kernel/semaphore.c 
+ln -sf ../../i386/kernel/sys_i386.c 
+ln -sf ../../../${LINUX_26}/arch/xen/kernel/ctrl_if.c
+ln -sf ../../../${LINUX_26}/arch/xen/kernel/evtchn.c
+ln -sf ../../../${LINUX_26}/arch/xen/kernel/fixup.c
+ln -sf ../../../${LINUX_26}/arch/xen/kernel/gnttab.c
+ln -sf ../../../${LINUX_26}/arch/xen/kernel/reboot.c
+ln -sf ../../../${LINUX_26}/arch/xen/kernel/skbuff.c
+
+cd ${AD}/arch/xen/lib
+ln -sf ../../i386/lib/checksum.S 
+ln -sf ../../i386/lib/dec_and_lock.c 
+ln -sf ../../i386/lib/getuser.S 
+ln -sf ../../i386/lib/iodebug.c 
+ln -sf ../../i386/lib/memcpy.c 
+ln -sf ../../i386/lib/mmx.c
+ln -sf ../../i386/lib/old-checksum.c 
+ln -sf ../../i386/lib/strstr.c 
+ln -sf ../../i386/lib/usercopy.c 
+ln -sf ../../../${LINUX_26}/arch/xen/kernel/xen_proc.c
+
+cd ${AD}/arch/xen/mm
+ln -sf ../../i386/mm/extable.c 
+ln -sf ../../i386/mm/pageattr.c 
+ln -sf ../../../${LINUX_26}/arch/xen/i386/mm/hypervisor.c
+
+cd ${AD}/arch/xen/drivers/balloon
+ln -sf ../../../../${LINUX_26}/drivers/xen/balloon/balloon.c
+
+cd ${AD}/arch/xen/drivers/console
+ln -sf ../../../../${LINUX_26}/drivers/xen/console/console.c 
+
+cd ${AD}/arch/xen/drivers/dom0
+ln -sf ../../../../${LINUX_26}/drivers/xen/privcmd/privcmd.c core.c
+
+cd ${AD}/arch/xen/drivers/evtchn
+ln -sf ../../../../${LINUX_26}/drivers/xen/evtchn/evtchn.c
+
+cd ${AD}/arch/xen/drivers/netif/frontend
+ln -sf ../../../../../${LINUX_26}/drivers/xen/netfront/netfront.c main.c
+
+cd ${AD}/arch/xen/drivers/netif/backend
+ln -sf ../../../../../${LINUX_26}/drivers/xen/netback/common.h
+ln -sf ../../../../../${LINUX_26}/drivers/xen/netback/control.c
+ln -sf ../../../../../${LINUX_26}/drivers/xen/netback/interface.c
+ln -sf ../../../../../${LINUX_26}/drivers/xen/netback/netback.c main.c
+
+cd ${AD}/arch/xen/drivers/blkif/backend
+ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/common.h
+ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/blkback.c main.c
+ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/control.c
+ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/interface.c
+ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/vbd.c
+
+cd ${AD}/arch/xen/drivers/blkif/frontend
+ln -sf ../../../../../${LINUX_26}/drivers/xen/blkfront/blkfront.c
+
+cd ${AD}/arch/xen/drivers/usbif/frontend
+ln -sf ../../../../../${LINUX_26}/drivers/xen/usbfront/usbfront.c main.c
+ln -sf ../../../../../${LINUX_26}/drivers/xen/usbfront/xhci.h
+
+cd ${AD}/arch/xen/drivers/usbif/backend
+ln -sf ../../../../../${LINUX_26}/drivers/xen/usbback/common.h
+ln -sf ../../../../../${LINUX_26}/drivers/xen/usbback/control.c
+ln -sf ../../../../../${LINUX_26}/drivers/xen/usbback/interface.c
+ln -sf ../../../../../${LINUX_26}/drivers/xen/usbback/usbback.c main.c
diff --git a/linux-2.4-xen-sparse/mm/highmem.c b/linux-2.4-xen-sparse/mm/highmem.c
new file mode 100644
index 0000000000..f8182820ac
--- /dev/null
+++ b/linux-2.4-xen-sparse/mm/highmem.c
@@ -0,0 +1,461 @@
+/*
+ * High memory handling common code and variables.
+ *
+ * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
+ *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
+ *
+ *
+ * Redesigned the x86 32-bit VM architecture to deal with
+ * 64-bit physical space. With current x86 CPUs this
+ * means up to 64 Gigabytes physical RAM.
+ *
+ * Rewrote high memory support to move the page cache into
+ * high memory. Implemented permanent (schedulable) kmaps
+ * based on Linus' idea.
+ *
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+#include <linux/swap.h>
+#include <linux/slab.h>
+
+/*
+ * Virtual_count is not a pure "count".
+ *  0 means that it is not mapped, and has not been mapped
+ *    since a TLB flush - it is usable.
+ *  1 means that there are no users, but it has been mapped
+ *    since the last TLB flush - so we can't use it.
+ *  n means that there are (n-1) current users of it.
+ */
+static int pkmap_count[LAST_PKMAP];
+static unsigned int last_pkmap_nr;
+static spinlock_cacheline_t kmap_lock_cacheline = {SPIN_LOCK_UNLOCKED};
+#define kmap_lock  kmap_lock_cacheline.lock
+
+pte_t * pkmap_page_table;
+
+static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait);
+
+static void flush_all_zero_pkmaps(void)
+{
+	int i;
+
+	flush_cache_all();
+
+	for (i = 0; i < LAST_PKMAP; i++) {
+		struct page *page;
+
+		/*
+		 * zero means we don't have anything to do,
+		 * >1 means that it is still in use. Only
+		 * a count of 1 means that it is free but
+		 * needs to be unmapped
+		 */
+		if (pkmap_count[i] != 1)
+			continue;
+		pkmap_count[i] = 0;
+
+		/* sanity check */
+		if (pte_none(pkmap_page_table[i]))
+			BUG();
+
+		/*
+		 * Don't need an atomic fetch-and-clear op here;
+		 * no-one has the page mapped, and cannot get at
+		 * its virtual address (and hence PTE) without first
+		 * getting the kmap_lock (which is held here).
+		 * So no dangers, even with speculative execution.
+		 */
+		page = pte_page(pkmap_page_table[i]);
+		pte_clear(&pkmap_page_table[i]);
+
+		page->virtual = NULL;
+	}
+	flush_tlb_all();
+}
+
+static inline unsigned long map_new_virtual(struct page *page, int nonblocking)
+{
+	unsigned long vaddr;
+	int count;
+
+start:
+	count = LAST_PKMAP;
+	/* Find an empty entry */
+	for (;;) {
+		last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK;
+		if (!last_pkmap_nr) {
+			flush_all_zero_pkmaps();
+			count = LAST_PKMAP;
+		}
+		if (!pkmap_count[last_pkmap_nr])
+			break;	/* Found a usable entry */
+		if (--count)
+			continue;
+
+		if (nonblocking)
+			return 0;
+
+		/*
+		 * Sleep for somebody else to unmap their entries
+		 */
+		{
+			DECLARE_WAITQUEUE(wait, current);
+
+			current->state = TASK_UNINTERRUPTIBLE;
+			add_wait_queue(&pkmap_map_wait, &wait);
+			spin_unlock(&kmap_lock);
+			schedule();
+			remove_wait_queue(&pkmap_map_wait, &wait);
+			spin_lock(&kmap_lock);
+
+			/* Somebody else might have mapped it while we slept */
+			if (page->virtual)
+				return (unsigned long) page->virtual;
+
+			/* Re-start */
+			goto start;
+		}
+	}
+	vaddr = PKMAP_ADDR(last_pkmap_nr);
+	set_pte(&(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));
+
+	pkmap_count[last_pkmap_nr] = 1;
+	page->virtual = (void *) vaddr;
+
+	return vaddr;
+}
+
+void kmap_flush_unused(void)
+{
+	spin_lock(&kmap_lock);
+	flush_all_zero_pkmaps();
+	spin_unlock(&kmap_lock);
+}
+
+void fastcall *kmap_high(struct page *page, int nonblocking)
+{
+	unsigned long vaddr;
+
+	/*
+	 * For highmem pages, we can't trust "virtual" until
+	 * after we have the lock.
+	 *
+	 * We cannot call this from interrupts, as it may block
+	 */
+	spin_lock(&kmap_lock);
+	vaddr = (unsigned long) page->virtual;
+	if (!vaddr) {
+		vaddr = map_new_virtual(page, nonblocking);
+		if (!vaddr)
+			goto out;
+	}
+	pkmap_count[PKMAP_NR(vaddr)]++;
+	if (pkmap_count[PKMAP_NR(vaddr)] < 2)
+		BUG();
+ out:
+	spin_unlock(&kmap_lock);
+	return (void*) vaddr;
+}
+
+void fastcall kunmap_high(struct page *page)
+{
+	unsigned long vaddr;
+	unsigned long nr;
+	int need_wakeup;
+
+	spin_lock(&kmap_lock);
+	vaddr = (unsigned long) page->virtual;
+	if (!vaddr)
+		BUG();
+	nr = PKMAP_NR(vaddr);
+
+	/*
+	 * A count must never go down to zero
+	 * without a TLB flush!
+	 */
+	need_wakeup = 0;
+	switch (--pkmap_count[nr]) {
+	case 0:
+		BUG();
+	case 1:
+		/*
+		 * Avoid an unnecessary wake_up() function call.
+		 * The common case is pkmap_count[] == 1, but
+		 * no waiters.
+		 * The tasks queued in the wait-queue are guarded
+		 * by both the lock in the wait-queue-head and by
+		 * the kmap_lock.  As the kmap_lock is held here,
+		 * no need for the wait-queue-head's lock.  Simply
+		 * test if the queue is empty.
+		 */
+		need_wakeup = waitqueue_active(&pkmap_map_wait);
+	}
+	spin_unlock(&kmap_lock);
+
+	/* do wake-up, if needed, race-free outside of the spin lock */
+	if (need_wakeup)
+		wake_up(&pkmap_map_wait);
+}
+
+#define POOL_SIZE 32
+
+/*
+ * This lock gets no contention at all, normally.
+ */
+static spinlock_t emergency_lock = SPIN_LOCK_UNLOCKED;
+
+int nr_emergency_pages;
+static LIST_HEAD(emergency_pages);
+
+int nr_emergency_bhs;
+static LIST_HEAD(emergency_bhs);
+
+/*
+ * Simple bounce buffer support for highmem pages.
+ * This will be moved to the block layer in 2.5.
+ */
+
+static inline void copy_from_high_bh (struct buffer_head *to,
+			 struct buffer_head *from)
+{
+	struct page *p_from;
+	char *vfrom;
+
+	p_from = from->b_page;
+
+	vfrom = kmap_atomic(p_from, KM_USER0);
+	memcpy(to->b_data, vfrom + bh_offset(from), to->b_size);
+	kunmap_atomic(vfrom, KM_USER0);
+}
+
+static inline void copy_to_high_bh_irq (struct buffer_head *to,
+			 struct buffer_head *from)
+{
+	struct page *p_to;
+	char *vto;
+	unsigned long flags;
+
+	p_to = to->b_page;
+	__save_flags(flags);
+	__cli();
+	vto = kmap_atomic(p_to, KM_BOUNCE_READ);
+	memcpy(vto + bh_offset(to), from->b_data, to->b_size);
+	kunmap_atomic(vto, KM_BOUNCE_READ);
+	__restore_flags(flags);
+}
+
+static inline void bounce_end_io (struct buffer_head *bh, int uptodate)
+{
+	struct page *page;
+	struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private);
+	unsigned long flags;
+
+	bh_orig->b_end_io(bh_orig, uptodate);
+
+	page = bh->b_page;
+
+	spin_lock_irqsave(&emergency_lock, flags);
+	if (nr_emergency_pages >= POOL_SIZE)
+		__free_page(page);
+	else {
+		/*
+		 * We are abusing page->list to manage
+		 * the highmem emergency pool:
+		 */
+		list_add(&page->list, &emergency_pages);
+		nr_emergency_pages++;
+	}
+	
+	if (nr_emergency_bhs >= POOL_SIZE) {
+#ifdef HIGHMEM_DEBUG
+		/* Don't clobber the constructed slab cache */
+		init_waitqueue_head(&bh->b_wait);
+#endif
+		kmem_cache_free(bh_cachep, bh);
+	} else {
+		/*
+		 * Ditto in the bh case, here we abuse b_inode_buffers:
+		 */
+		list_add(&bh->b_inode_buffers, &emergency_bhs);
+		nr_emergency_bhs++;
+	}
+	spin_unlock_irqrestore(&emergency_lock, flags);
+}
+
+static __init int init_emergency_pool(void)
+{
+	struct sysinfo i;
+        si_meminfo(&i);
+        si_swapinfo(&i);
+        
+        if (!i.totalhigh)
+        	return 0;
+
+	spin_lock_irq(&emergency_lock);
+	while (nr_emergency_pages < POOL_SIZE) {
+		struct page * page = alloc_page(GFP_ATOMIC);
+		if (!page) {
+			printk("couldn't refill highmem emergency pages");
+			break;
+		}
+		list_add(&page->list, &emergency_pages);
+		nr_emergency_pages++;
+	}
+	while (nr_emergency_bhs < POOL_SIZE) {
+		struct buffer_head * bh = kmem_cache_alloc(bh_cachep, SLAB_ATOMIC);
+		if (!bh) {
+			printk("couldn't refill highmem emergency bhs");
+			break;
+		}
+		list_add(&bh->b_inode_buffers, &emergency_bhs);
+		nr_emergency_bhs++;
+	}
+	spin_unlock_irq(&emergency_lock);
+	printk("allocated %d pages and %d bhs reserved for the highmem bounces\n",
+	       nr_emergency_pages, nr_emergency_bhs);
+
+	return 0;
+}
+
+__initcall(init_emergency_pool);
+
+static void bounce_end_io_write (struct buffer_head *bh, int uptodate)
+{
+	bounce_end_io(bh, uptodate);
+}
+
+static void bounce_end_io_read (struct buffer_head *bh, int uptodate)
+{
+	struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private);
+
+	if (uptodate)
+		copy_to_high_bh_irq(bh_orig, bh);
+	bounce_end_io(bh, uptodate);
+}
+
+struct page *alloc_bounce_page (void)
+{
+	struct list_head *tmp;
+	struct page *page;
+
+	page = alloc_page(GFP_NOHIGHIO);
+	if (page)
+		return page;
+	/*
+	 * No luck. First, kick the VM so it doesn't idle around while
+	 * we are using up our emergency rations.
+	 */
+	wakeup_bdflush();
+
+repeat_alloc:
+	/*
+	 * Try to allocate from the emergency pool.
+	 */
+	tmp = &emergency_pages;
+	spin_lock_irq(&emergency_lock);
+	if (!list_empty(tmp)) {
+		page = list_entry(tmp->next, struct page, list);
+		list_del(tmp->next);
+		nr_emergency_pages--;
+	}
+	spin_unlock_irq(&emergency_lock);
+	if (page)
+		return page;
+
+	/* we need to wait I/O completion */
+	run_task_queue(&tq_disk);
+
+	yield();
+	goto repeat_alloc;
+}
+
+struct buffer_head *alloc_bounce_bh (void)
+{
+	struct list_head *tmp;
+	struct buffer_head *bh;
+
+	bh = kmem_cache_alloc(bh_cachep, SLAB_NOHIGHIO);
+	if (bh)
+		return bh;
+	/*
+	 * No luck. First, kick the VM so it doesn't idle around while
+	 * we are using up our emergency rations.
+	 */
+	wakeup_bdflush();
+
+repeat_alloc:
+	/*
+	 * Try to allocate from the emergency pool.
+	 */
+	tmp = &emergency_bhs;
+	spin_lock_irq(&emergency_lock);
+	if (!list_empty(tmp)) {
+		bh = list_entry(tmp->next, struct buffer_head, b_inode_buffers);
+		list_del(tmp->next);
+		nr_emergency_bhs--;
+	}
+	spin_unlock_irq(&emergency_lock);
+	if (bh)
+		return bh;
+
+	/* we need to wait I/O completion */
+	run_task_queue(&tq_disk);
+
+	yield();
+	goto repeat_alloc;
+}
+
+struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig)
+{
+	struct page *page;
+	struct buffer_head *bh;
+
+	if (!PageHighMem(bh_orig->b_page))
+		return bh_orig;
+
+	bh = alloc_bounce_bh();
+	/*
+	 * This is wasteful for 1k buffers, but this is a stopgap measure
+	 * and we are being ineffective anyway. This approach simplifies
+	 * things immensly. On boxes with more than 4GB RAM this should
+	 * not be an issue anyway.
+	 */
+	page = alloc_bounce_page();
+
+	set_bh_page(bh, page, 0);
+
+	bh->b_next = NULL;
+	bh->b_blocknr = bh_orig->b_blocknr;
+	bh->b_size = bh_orig->b_size;
+	bh->b_list = -1;
+	bh->b_dev = bh_orig->b_dev;
+	bh->b_count = bh_orig->b_count;
+	bh->b_rdev = bh_orig->b_rdev;
+	bh->b_state = bh_orig->b_state;
+#ifdef HIGHMEM_DEBUG
+	bh->b_flushtime = jiffies;
+	bh->b_next_free = NULL;
+	bh->b_prev_free = NULL;
+	/* bh->b_this_page */
+	bh->b_reqnext = NULL;
+	bh->b_pprev = NULL;
+#endif
+	/* bh->b_page */
+	if (rw == WRITE) {
+		bh->b_end_io = bounce_end_io_write;
+		copy_from_high_bh(bh, bh_orig);
+	} else
+		bh->b_end_io = bounce_end_io_read;
+	bh->b_private = (void *)bh_orig;
+	bh->b_rsector = bh_orig->b_rsector;
+#ifdef HIGHMEM_DEBUG
+	memset(&bh->b_wait, -1, sizeof(bh->b_wait));
+#endif
+
+	return bh;
+}
+
diff --git a/linux-2.4-xen-sparse/mm/memory.c b/linux-2.4-xen-sparse/mm/memory.c
new file mode 100644
index 0000000000..6b7c807fd3
--- /dev/null
+++ b/linux-2.4-xen-sparse/mm/memory.c
@@ -0,0 +1,1534 @@
+/*
+ *  linux/mm/memory.c
+ *
+ *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ */
+
+/*
+ * demand-loading started 01.12.91 - seems it is high on the list of
+ * things wanted, and it should be easy to implement. - Linus
+ */
+
+/*
+ * Ok, demand-loading was easy, shared pages a little bit tricker. Shared
+ * pages started 02.12.91, seems to work. - Linus.
+ *
+ * Tested sharing by executing about 30 /bin/sh: under the old kernel it
+ * would have taken more than the 6M I have free, but it worked well as
+ * far as I could see.
+ *
+ * Also corrected some "invalidate()"s - I wasn't doing enough of them.
+ */
+
+/*
+ * Real VM (paging to/from disk) started 18.12.91. Much more work and
+ * thought has to go into this. Oh, well..
+ * 19.12.91  -  works, somewhat. Sometimes I get faults, don't know why.
+ *		Found it. Everything seems to work now.
+ * 20.12.91  -  Ok, making the swap-device changeable like the root.
+ */
+
+/*
+ * 05.04.94  -  Multi-page memory management added for v1.1.
+ * 		Idea by Alex Bligh (alex@cconcepts.co.uk)
+ *
+ * 16.07.99  -  Support of BIGMEM added by Gerhard Wichert, Siemens AG
+ *		(Gerhard.Wichert@pdb.siemens.de)
+ */
+
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/swap.h>
+#include <linux/smp_lock.h>
+#include <linux/swapctl.h>
+#include <linux/iobuf.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/module.h>
+
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <asm/tlb.h>
+
+unsigned long max_mapnr;
+unsigned long num_physpages;
+unsigned long num_mappedpages;
+void * high_memory;
+struct page *highmem_start_page;
+
+/*
+ * We special-case the C-O-W ZERO_PAGE, because it's such
+ * a common occurrence (no need to read the page to know
+ * that it's zero - better for the cache and memory subsystem).
+ */
+static inline void copy_cow_page(struct page * from, struct page * to, unsigned long address)
+{
+	if (from == ZERO_PAGE(address)) {
+		clear_user_highpage(to, address);
+		return;
+	}
+	copy_user_highpage(to, from, address);
+}
+
+mem_map_t * mem_map;
+
+/*
+ * Called by TLB shootdown 
+ */
+void __free_pte(pte_t pte)
+{
+	struct page *page = pte_page(pte);
+	if ((!VALID_PAGE(page)) || PageReserved(page))
+		return;
+	if (pte_dirty(pte))
+		set_page_dirty(page);		
+	free_page_and_swap_cache(page);
+}
+
+
+/*
+ * Note: this doesn't free the actual pages themselves. That
+ * has been handled earlier when unmapping all the memory regions.
+ */
+static inline void free_one_pmd(pmd_t * dir)
+{
+	pte_t * pte;
+
+	if (pmd_none(*dir))
+		return;
+	if (pmd_bad(*dir)) {
+		pmd_ERROR(*dir);
+		pmd_clear(dir);
+		return;
+	}
+	pte = pte_offset(dir, 0);
+	pmd_clear(dir);
+	pte_free(pte);
+}
+
+static inline void free_one_pgd(pgd_t * dir)
+{
+	int j;
+	pmd_t * pmd;
+
+	if (pgd_none(*dir))
+		return;
+	if (pgd_bad(*dir)) {
+		pgd_ERROR(*dir);
+		pgd_clear(dir);
+		return;
+	}
+	pmd = pmd_offset(dir, 0);
+	pgd_clear(dir);
+	for (j = 0; j < PTRS_PER_PMD ; j++) {
+		prefetchw(pmd+j+(PREFETCH_STRIDE/16));
+		free_one_pmd(pmd+j);
+	}
+	pmd_free(pmd);
+}
+
+/* Low and high watermarks for page table cache.
+   The system should try to have pgt_water[0] <= cache elements <= pgt_water[1]
+ */
+int pgt_cache_water[2] = { 25, 50 };
+
+/* Returns the number of pages freed */
+int check_pgt_cache(void)
+{
+	return do_check_pgt_cache(pgt_cache_water[0], pgt_cache_water[1]);
+}
+
+
+/*
+ * This function clears all user-level page tables of a process - this
+ * is needed by execve(), so that old pages aren't in the way.
+ */
+void clear_page_tables(struct mm_struct *mm, unsigned long first, int nr)
+{
+	pgd_t * page_dir = mm->pgd;
+
+	spin_lock(&mm->page_table_lock);
+	page_dir += first;
+	do {
+		free_one_pgd(page_dir);
+		page_dir++;
+	} while (--nr);
+	spin_unlock(&mm->page_table_lock);
+
+	/* keep the page table cache within bounds */
+	check_pgt_cache();
+}
+
+#define PTE_TABLE_MASK	((PTRS_PER_PTE-1) * sizeof(pte_t))
+#define PMD_TABLE_MASK	((PTRS_PER_PMD-1) * sizeof(pmd_t))
+
+/*
+ * copy one vm_area from one task to the other. Assumes the page tables
+ * already present in the new task to be cleared in the whole range
+ * covered by this vma.
+ *
+ * 08Jan98 Merged into one routine from several inline routines to reduce
+ *         variable count and make things faster. -jj
+ *
+ * dst->page_table_lock is held on entry and exit,
+ * but may be dropped within pmd_alloc() and pte_alloc().
+ */
+int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
+			struct vm_area_struct *vma)
+{
+	pgd_t * src_pgd, * dst_pgd;
+	unsigned long address = vma->vm_start;
+	unsigned long end = vma->vm_end;
+	unsigned long cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
+
+	src_pgd = pgd_offset(src, address)-1;
+	dst_pgd = pgd_offset(dst, address)-1;
+
+	for (;;) {
+		pmd_t * src_pmd, * dst_pmd;
+
+		src_pgd++; dst_pgd++;
+		
+		/* copy_pmd_range */
+		
+		if (pgd_none(*src_pgd))
+			goto skip_copy_pmd_range;
+		if (pgd_bad(*src_pgd)) {
+			pgd_ERROR(*src_pgd);
+			pgd_clear(src_pgd);
+skip_copy_pmd_range:	address = (address + PGDIR_SIZE) & PGDIR_MASK;
+			if (!address || (address >= end))
+				goto out;
+			continue;
+		}
+
+		src_pmd = pmd_offset(src_pgd, address);
+		dst_pmd = pmd_alloc(dst, dst_pgd, address);
+		if (!dst_pmd)
+			goto nomem;
+
+		do {
+			pte_t * src_pte, * dst_pte;
+		
+			/* copy_pte_range */
+		
+			if (pmd_none(*src_pmd))
+				goto skip_copy_pte_range;
+			if (pmd_bad(*src_pmd)) {
+				pmd_ERROR(*src_pmd);
+				pmd_clear(src_pmd);
+skip_copy_pte_range:		address = (address + PMD_SIZE) & PMD_MASK;
+				if (address >= end)
+					goto out;
+				goto cont_copy_pmd_range;
+			}
+
+			src_pte = pte_offset(src_pmd, address);
+			dst_pte = pte_alloc(dst, dst_pmd, address);
+			if (!dst_pte)
+				goto nomem;
+
+			spin_lock(&src->page_table_lock);			
+			do {
+				pte_t pte = *src_pte;
+				struct page *ptepage;
+				
+				/* copy_one_pte */
+
+				if (pte_none(pte))
+					goto cont_copy_pte_range_noset;
+				if (!pte_present(pte)) {
+					swap_duplicate(pte_to_swp_entry(pte));
+					goto cont_copy_pte_range;
+				}
+				ptepage = pte_page(pte);
+				if ((!VALID_PAGE(ptepage)) || 
+				    PageReserved(ptepage))
+					goto cont_copy_pte_range;
+
+				/* If it's a COW mapping, write protect it both in the parent and the child */
+				if (cow && pte_write(pte)) {
+					ptep_set_wrprotect(src_pte);
+					pte = *src_pte;
+				}
+
+				/* If it's a shared mapping, mark it clean in the child */
+				if (vma->vm_flags & VM_SHARED)
+					pte = pte_mkclean(pte);
+				pte = pte_mkold(pte);
+				get_page(ptepage);
+				dst->rss++;
+
+cont_copy_pte_range:		set_pte(dst_pte, pte);
+cont_copy_pte_range_noset:	address += PAGE_SIZE;
+				if (address >= end)
+					goto out_unlock;
+				src_pte++;
+				dst_pte++;
+			} while ((unsigned long)src_pte & PTE_TABLE_MASK);
+			spin_unlock(&src->page_table_lock);
+		
+cont_copy_pmd_range:	src_pmd++;
+			dst_pmd++;
+		} while ((unsigned long)src_pmd & PMD_TABLE_MASK);
+	}
+out_unlock:
+	spin_unlock(&src->page_table_lock);
+out:
+	return 0;
+nomem:
+	return -ENOMEM;
+}
+
+/*
+ * Return indicates whether a page was freed so caller can adjust rss
+ */
+static inline void forget_pte(pte_t page)
+{
+	if (!pte_none(page)) {
+		printk("forget_pte: old mapping existed!\n");
+		BUG();
+	}
+}
+
+static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size)
+{
+	unsigned long offset;
+	pte_t * ptep;
+	int freed = 0;
+
+	if (pmd_none(*pmd))
+		return 0;
+	if (pmd_bad(*pmd)) {
+		pmd_ERROR(*pmd);
+		pmd_clear(pmd);
+		return 0;
+	}
+	ptep = pte_offset(pmd, address);
+	offset = address & ~PMD_MASK;
+	if (offset + size > PMD_SIZE)
+		size = PMD_SIZE - offset;
+	size &= PAGE_MASK;
+	for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) {
+		pte_t pte = *ptep;
+		if (pte_none(pte))
+			continue;
+		if (pte_present(pte)) {
+			struct page *page = pte_page(pte);
+			if (VALID_PAGE(page) && !PageReserved(page))
+				freed ++;
+			/* This will eventually call __free_pte on the pte. */
+			tlb_remove_page(tlb, ptep, address + offset);
+		} else {
+			free_swap_and_cache(pte_to_swp_entry(pte));
+			pte_clear(ptep);
+		}
+	}
+
+	return freed;
+}
+
+static inline int zap_pmd_range(mmu_gather_t *tlb, pgd_t * dir, unsigned long address, unsigned long size)
+{
+	pmd_t * pmd;
+	unsigned long end;
+	int freed;
+
+	if (pgd_none(*dir))
+		return 0;
+	if (pgd_bad(*dir)) {
+		pgd_ERROR(*dir);
+		pgd_clear(dir);
+		return 0;
+	}
+	pmd = pmd_offset(dir, address);
+	end = address + size;
+	if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
+		end = ((address + PGDIR_SIZE) & PGDIR_MASK);
+	freed = 0;
+	do {
+		freed += zap_pte_range(tlb, pmd, address, end - address);
+		address = (address + PMD_SIZE) & PMD_MASK; 
+		pmd++;
+	} while (address < end);
+	return freed;
+}
+
+/*
+ * remove user pages in a given range.
+ */
+void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size)
+{
+	mmu_gather_t *tlb;
+	pgd_t * dir;
+	unsigned long start = address, end = address + size;
+	int freed = 0;
+
+	dir = pgd_offset(mm, address);
+
+	/*
+	 * This is a long-lived spinlock. That's fine.
+	 * There's no contention, because the page table
+	 * lock only protects against kswapd anyway, and
+	 * even if kswapd happened to be looking at this
+	 * process we _want_ it to get stuck.
+	 */
+	if (address >= end)
+		BUG();
+	spin_lock(&mm->page_table_lock);
+	flush_cache_range(mm, address, end);
+	tlb = tlb_gather_mmu(mm);
+
+	do {
+		freed += zap_pmd_range(tlb, dir, address, end - address);
+		address = (address + PGDIR_SIZE) & PGDIR_MASK;
+		dir++;
+	} while (address && (address < end));
+
+	/* this will flush any remaining tlb entries */
+	tlb_finish_mmu(tlb, start, end);
+
+	/*
+	 * Update rss for the mm_struct (not necessarily current->mm)
+	 * Notice that rss is an unsigned long.
+	 */
+	if (mm->rss > freed)
+		mm->rss -= freed;
+	else
+		mm->rss = 0;
+	spin_unlock(&mm->page_table_lock);
+}
+
+/*
+ * Do a quick page-table lookup for a single page. 
+ */
+static struct page * follow_page(struct mm_struct *mm, unsigned long address, int write) 
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *ptep, pte;
+
+	pgd = pgd_offset(mm, address);
+	if (pgd_none(*pgd) || pgd_bad(*pgd))
+		goto out;
+
+	pmd = pmd_offset(pgd, address);
+	if (pmd_none(*pmd) || pmd_bad(*pmd))
+		goto out;
+
+	ptep = pte_offset(pmd, address);
+	if (!ptep)
+		goto out;
+
+	pte = *ptep;
+	if (pte_present(pte)) {
+		if (!write ||
+		    (pte_write(pte) && pte_dirty(pte)))
+			return pte_page(pte);
+	}
+
+out:
+	return 0;
+}
+
+/* 
+ * Given a physical address, is there a useful struct page pointing to
+ * it?  This may become more complex in the future if we start dealing
+ * with IO-aperture pages in kiobufs.
+ */
+
+static inline struct page * get_page_map(struct page *page)
+{
+	if (!VALID_PAGE(page))
+		return 0;
+	return page;
+}
+
+/*
+ * Please read Documentation/cachetlb.txt before using this function,
+ * accessing foreign memory spaces can cause cache coherency problems.
+ *
+ * Accessing a VM_IO area is even more dangerous, therefore the function
+ * fails if pages is != NULL and a VM_IO area is found.
+ */
+int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
+		int len, int write, int force, struct page **pages, struct vm_area_struct **vmas)
+{
+	int i;
+	unsigned int flags;
+
+	/*
+	 * Require read or write permissions.
+	 * If 'force' is set, we only require the "MAY" flags.
+	 */
+	flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
+	flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
+	i = 0;
+
+	do {
+		struct vm_area_struct *	vma;
+
+		vma = find_extend_vma(mm, start);
+
+		if ( !vma || (pages && vma->vm_flags & VM_IO) || !(flags & vma->vm_flags) )
+			return i ? : -EFAULT;
+
+		spin_lock(&mm->page_table_lock);
+		do {
+			struct page *map;
+			while (!(map = follow_page(mm, start, write))) {
+				spin_unlock(&mm->page_table_lock);
+				switch (handle_mm_fault(mm, vma, start, write)) {
+				case 1:
+					tsk->min_flt++;
+					break;
+				case 2:
+					tsk->maj_flt++;
+					break;
+				case 0:
+					if (i) return i;
+					return -EFAULT;
+				default:
+					if (i) return i;
+					return -ENOMEM;
+				}
+				spin_lock(&mm->page_table_lock);
+			}
+			if (pages) {
+				pages[i] = get_page_map(map);
+				/* FIXME: call the correct function,
+				 * depending on the type of the found page
+				 */
+				if (!pages[i] || PageReserved(pages[i])) {
+					if (pages[i] != ZERO_PAGE(start))
+						goto bad_page;
+				} else
+					page_cache_get(pages[i]);
+			}
+			if (vmas)
+				vmas[i] = vma;
+			i++;
+			start += PAGE_SIZE;
+			len--;
+		} while(len && start < vma->vm_end);
+		spin_unlock(&mm->page_table_lock);
+	} while(len);
+out:
+	return i;
+
+	/*
+	 * We found an invalid page in the VMA.  Release all we have
+	 * so far and fail.
+	 */
+bad_page:
+	spin_unlock(&mm->page_table_lock);
+	while (i--)
+		page_cache_release(pages[i]);
+	i = -EFAULT;
+	goto out;
+}
+
+EXPORT_SYMBOL(get_user_pages);
+
+/*
+ * Force in an entire range of pages from the current process's user VA,
+ * and pin them in physical memory.  
+ */
+#define dprintk(x...)
+
+int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len)
+{
+	int pgcount, err;
+	struct mm_struct *	mm;
+	
+	/* Make sure the iobuf is not already mapped somewhere. */
+	if (iobuf->nr_pages)
+		return -EINVAL;
+
+	mm = current->mm;
+	dprintk ("map_user_kiobuf: begin\n");
+	
+	pgcount = (va + len + PAGE_SIZE - 1)/PAGE_SIZE - va/PAGE_SIZE;
+	/* mapping 0 bytes is not permitted */
+	if (!pgcount) BUG();
+	err = expand_kiobuf(iobuf, pgcount);
+	if (err)
+		return err;
+
+	iobuf->locked = 0;
+	iobuf->offset = va & (PAGE_SIZE-1);
+	iobuf->length = len;
+	
+	/* Try to fault in all of the necessary pages */
+	down_read(&mm->mmap_sem);
+	/* rw==READ means read from disk, write into memory area */
+	err = get_user_pages(current, mm, va, pgcount,
+			(rw==READ), 0, iobuf->maplist, NULL);
+	up_read(&mm->mmap_sem);
+	if (err < 0) {
+		unmap_kiobuf(iobuf);
+		dprintk ("map_user_kiobuf: end %d\n", err);
+		return err;
+	}
+	iobuf->nr_pages = err;
+	while (pgcount--) {
+		/* FIXME: flush superflous for rw==READ,
+		 * probably wrong function for rw==WRITE
+		 */
+		flush_dcache_page(iobuf->maplist[pgcount]);
+	}
+	dprintk ("map_user_kiobuf: end OK\n");
+	return 0;
+}
+
+/*
+ * Mark all of the pages in a kiobuf as dirty 
+ *
+ * We need to be able to deal with short reads from disk: if an IO error
+ * occurs, the number of bytes read into memory may be less than the
+ * size of the kiobuf, so we have to stop marking pages dirty once the
+ * requested byte count has been reached.
+ *
+ * Must be called from process context - set_page_dirty() takes VFS locks.
+ */
+
+void mark_dirty_kiobuf(struct kiobuf *iobuf, int bytes)
+{
+	int index, offset, remaining;
+	struct page *page;
+	
+	index = iobuf->offset >> PAGE_SHIFT;
+	offset = iobuf->offset & ~PAGE_MASK;
+	remaining = bytes;
+	if (remaining > iobuf->length)
+		remaining = iobuf->length;
+	
+	while (remaining > 0 && index < iobuf->nr_pages) {
+		page = iobuf->maplist[index];
+		
+		if (!PageReserved(page))
+			set_page_dirty(page);
+
+		remaining -= (PAGE_SIZE - offset);
+		offset = 0;
+		index++;
+	}
+}
+
+/*
+ * Unmap all of the pages referenced by a kiobuf.  We release the pages,
+ * and unlock them if they were locked. 
+ */
+
+void unmap_kiobuf (struct kiobuf *iobuf) 
+{
+	int i;
+	struct page *map;
+	
+	for (i = 0; i < iobuf->nr_pages; i++) {
+		map = iobuf->maplist[i];
+		if (map) {
+			if (iobuf->locked)
+				UnlockPage(map);
+			/* FIXME: cache flush missing for rw==READ
+			 * FIXME: call the correct reference counting function
+			 */
+			page_cache_release(map);
+		}
+	}
+	
+	iobuf->nr_pages = 0;
+	iobuf->locked = 0;
+}
+
+
+/*
+ * Lock down all of the pages of a kiovec for IO.
+ *
+ * If any page is mapped twice in the kiovec, we return the error -EINVAL.
+ *
+ * The optional wait parameter causes the lock call to block until all
+ * pages can be locked if set.  If wait==0, the lock operation is
+ * aborted if any locked pages are found and -EAGAIN is returned.
+ */
+
+int lock_kiovec(int nr, struct kiobuf *iovec[], int wait)
+{
+	struct kiobuf *iobuf;
+	int i, j;
+	struct page *page, **ppage;
+	int doublepage = 0;
+	int repeat = 0;
+	
+ repeat:
+	
+	for (i = 0; i < nr; i++) {
+		iobuf = iovec[i];
+
+		if (iobuf->locked)
+			continue;
+
+		ppage = iobuf->maplist;
+		for (j = 0; j < iobuf->nr_pages; ppage++, j++) {
+			page = *ppage;
+			if (!page)
+				continue;
+			
+			if (TryLockPage(page)) {
+				while (j--) {
+					struct page *tmp = *--ppage;
+					if (tmp)
+						UnlockPage(tmp);
+				}
+				goto retry;
+			}
+		}
+		iobuf->locked = 1;
+	}
+
+	return 0;
+	
+ retry:
+	
+	/* 
+	 * We couldn't lock one of the pages.  Undo the locking so far,
+	 * wait on the page we got to, and try again.  
+	 */
+	
+	unlock_kiovec(nr, iovec);
+	if (!wait)
+		return -EAGAIN;
+	
+	/* 
+	 * Did the release also unlock the page we got stuck on?
+	 */
+	if (!PageLocked(page)) {
+		/* 
+		 * If so, we may well have the page mapped twice
+		 * in the IO address range.  Bad news.  Of
+		 * course, it _might_ just be a coincidence,
+		 * but if it happens more than once, chances
+		 * are we have a double-mapped page. 
+		 */
+		if (++doublepage >= 3) 
+			return -EINVAL;
+		
+		/* Try again...  */
+		wait_on_page(page);
+	}
+	
+	if (++repeat < 16)
+		goto repeat;
+	return -EAGAIN;
+}
+
+/*
+ * Unlock all of the pages of a kiovec after IO.
+ */
+
+int unlock_kiovec(int nr, struct kiobuf *iovec[])
+{
+	struct kiobuf *iobuf;
+	int i, j;
+	struct page *page, **ppage;
+	
+	for (i = 0; i < nr; i++) {
+		iobuf = iovec[i];
+
+		if (!iobuf->locked)
+			continue;
+		iobuf->locked = 0;
+		
+		ppage = iobuf->maplist;
+		for (j = 0; j < iobuf->nr_pages; ppage++, j++) {
+			page = *ppage;
+			if (!page)
+				continue;
+			UnlockPage(page);
+		}
+	}
+	return 0;
+}
+
+static inline void zeromap_pte_range(pte_t * pte, unsigned long address,
+                                     unsigned long size, pgprot_t prot)
+{
+	unsigned long end;
+
+	address &= ~PMD_MASK;
+	end = address + size;
+	if (end > PMD_SIZE)
+		end = PMD_SIZE;
+	do {
+		pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), prot));
+		pte_t oldpage = ptep_get_and_clear(pte);
+		set_pte(pte, zero_pte);
+		forget_pte(oldpage);
+		address += PAGE_SIZE;
+		pte++;
+	} while (address && (address < end));
+}
+
+static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address,
+                                    unsigned long size, pgprot_t prot)
+{
+	unsigned long end;
+
+	address &= ~PGDIR_MASK;
+	end = address + size;
+	if (end > PGDIR_SIZE)
+		end = PGDIR_SIZE;
+	do {
+		pte_t * pte = pte_alloc(mm, pmd, address);
+		if (!pte)
+			return -ENOMEM;
+		zeromap_pte_range(pte, address, end - address, prot);
+		address = (address + PMD_SIZE) & PMD_MASK;
+		pmd++;
+	} while (address && (address < end));
+	return 0;
+}
+
+int zeromap_page_range(unsigned long address, unsigned long size, pgprot_t prot)
+{
+	int error = 0;
+	pgd_t * dir;
+	unsigned long beg = address;
+	unsigned long end = address + size;
+	struct mm_struct *mm = current->mm;
+
+	dir = pgd_offset(mm, address);
+	flush_cache_range(mm, beg, end);
+	if (address >= end)
+		BUG();
+
+	spin_lock(&mm->page_table_lock);
+	do {
+		pmd_t *pmd = pmd_alloc(mm, dir, address);
+		error = -ENOMEM;
+		if (!pmd)
+			break;
+		error = zeromap_pmd_range(mm, pmd, address, end - address, prot);
+		if (error)
+			break;
+		address = (address + PGDIR_SIZE) & PGDIR_MASK;
+		dir++;
+	} while (address && (address < end));
+	spin_unlock(&mm->page_table_lock);
+	flush_tlb_range(mm, beg, end);
+	return error;
+}
+
+/*
+ * maps a range of physical memory into the requested pages. the old
+ * mappings are removed. any references to nonexistent pages results
+ * in null mappings (currently treated as "copy-on-access")
+ */
+static inline void remap_pte_range(pte_t * pte, unsigned long address, unsigned long size,
+	unsigned long phys_addr, pgprot_t prot)
+{
+	unsigned long end;
+
+	address &= ~PMD_MASK;
+	end = address + size;
+	if (end > PMD_SIZE)
+		end = PMD_SIZE;
+	do {
+		struct page *page;
+		pte_t oldpage;
+		oldpage = ptep_get_and_clear(pte);
+
+		page = virt_to_page(__va(phys_addr));
+		if ((!VALID_PAGE(page)) || PageReserved(page))
+ 			set_pte(pte, mk_pte_phys(phys_addr, prot));
+		forget_pte(oldpage);
+		address += PAGE_SIZE;
+		phys_addr += PAGE_SIZE;
+		pte++;
+	} while (address && (address < end));
+}
+
+static inline int remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size,
+	unsigned long phys_addr, pgprot_t prot)
+{
+	unsigned long end;
+
+	address &= ~PGDIR_MASK;
+	end = address + size;
+	if (end > PGDIR_SIZE)
+		end = PGDIR_SIZE;
+	phys_addr -= address;
+	do {
+		pte_t * pte = pte_alloc(mm, pmd, address);
+		if (!pte)
+			return -ENOMEM;
+		remap_pte_range(pte, address, end - address, address + phys_addr, prot);
+		address = (address + PMD_SIZE) & PMD_MASK;
+		pmd++;
+	} while (address && (address < end));
+	return 0;
+}
+
+/*  Note: this is only safe if the mm semaphore is held when called. */
+int remap_page_range(unsigned long from, unsigned long phys_addr, unsigned long size, pgprot_t prot)
+{
+	int error = 0;
+	pgd_t * dir;
+	unsigned long beg = from;
+	unsigned long end = from + size;
+	struct mm_struct *mm = current->mm;
+
+	phys_addr -= from;
+	dir = pgd_offset(mm, from);
+	flush_cache_range(mm, beg, end);
+	if (from >= end)
+		BUG();
+
+	spin_lock(&mm->page_table_lock);
+	do {
+		pmd_t *pmd = pmd_alloc(mm, dir, from);
+		error = -ENOMEM;
+		if (!pmd)
+			break;
+		error = remap_pmd_range(mm, pmd, from, end - from, phys_addr + from, prot);
+		if (error)
+			break;
+		from = (from + PGDIR_SIZE) & PGDIR_MASK;
+		dir++;
+	} while (from && (from < end));
+	spin_unlock(&mm->page_table_lock);
+	flush_tlb_range(mm, beg, end);
+	return error;
+}
+
+/*
+ * Establish a new mapping:
+ *  - flush the old one
+ *  - update the page tables
+ *  - inform the TLB about the new one
+ *
+ * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock
+ */
+static inline void establish_pte(struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t entry)
+{
+#ifdef CONFIG_XEN
+	if ( likely(vma->vm_mm == current->mm) ) {
+		HYPERVISOR_update_va_mapping(address, entry, UVMF_INVLPG|UVMF_LOCAL);
+	} else {
+		set_pte(page_table, entry);
+		flush_tlb_page(vma, address);
+	}
+#else
+	set_pte(page_table, entry);
+	flush_tlb_page(vma, address);
+#endif
+	update_mmu_cache(vma, address, entry);
+}
+
+/*
+ * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock
+ */
+static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address, 
+		pte_t *page_table)
+{
+	flush_page_to_ram(new_page);
+	flush_cache_page(vma, address);
+	establish_pte(vma, address, page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
+}
+
+/*
+ * This routine handles present pages, when users try to write
+ * to a shared page. It is done by copying the page to a new address
+ * and decrementing the shared-page counter for the old page.
+ *
+ * Goto-purists beware: the only reason for goto's here is that it results
+ * in better assembly code.. The "default" path will see no jumps at all.
+ *
+ * Note that this routine assumes that the protection checks have been
+ * done by the caller (the low-level page fault routine in most cases).
+ * Thus we can safely just mark it writable once we've done any necessary
+ * COW.
+ *
+ * We also mark the page dirty at this point even though the page will
+ * change only once the write actually happens. This avoids a few races,
+ * and potentially makes it more efficient.
+ *
+ * We hold the mm semaphore and the page_table_lock on entry and exit
+ * with the page_table_lock released.
+ */
+static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
+	unsigned long address, pte_t *page_table, pte_t pte)
+{
+	struct page *old_page, *new_page;
+
+	old_page = pte_page(pte);
+	if (!VALID_PAGE(old_page))
+		goto bad_wp_page;
+
+	if (!TryLockPage(old_page)) {
+		int reuse = can_share_swap_page(old_page);
+		unlock_page(old_page);
+		if (reuse) {
+			flush_cache_page(vma, address);
+			establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
+			spin_unlock(&mm->page_table_lock);
+			return 1;	/* Minor fault */
+		}
+	}
+
+	/*
+	 * Ok, we need to copy. Oh, well..
+	 */
+	page_cache_get(old_page);
+	spin_unlock(&mm->page_table_lock);
+
+	new_page = alloc_page(GFP_HIGHUSER);
+	if (!new_page)
+		goto no_mem;
+	copy_cow_page(old_page,new_page,address);
+
+	/*
+	 * Re-check the pte - we dropped the lock
+	 */
+	spin_lock(&mm->page_table_lock);
+	if (pte_same(*page_table, pte)) {
+		if (PageReserved(old_page))
+			++mm->rss;
+		break_cow(vma, new_page, address, page_table);
+		if (vm_anon_lru)
+			lru_cache_add(new_page);
+
+		/* Free the old page.. */
+		new_page = old_page;
+	}
+	spin_unlock(&mm->page_table_lock);
+	page_cache_release(new_page);
+	page_cache_release(old_page);
+	return 1;	/* Minor fault */
+
+bad_wp_page:
+	spin_unlock(&mm->page_table_lock);
+	printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page);
+	return -1;
+no_mem:
+	page_cache_release(old_page);
+	return -1;
+}
+
+static void vmtruncate_list(struct vm_area_struct *mpnt, unsigned long pgoff)
+{
+	do {
+		struct mm_struct *mm = mpnt->vm_mm;
+		unsigned long start = mpnt->vm_start;
+		unsigned long end = mpnt->vm_end;
+		unsigned long len = end - start;
+		unsigned long diff;
+
+		/* mapping wholly truncated? */
+		if (mpnt->vm_pgoff >= pgoff) {
+			zap_page_range(mm, start, len);
+			continue;
+		}
+
+		/* mapping wholly unaffected? */
+		len = len >> PAGE_SHIFT;
+		diff = pgoff - mpnt->vm_pgoff;
+		if (diff >= len)
+			continue;
+
+		/* Ok, partially affected.. */
+		start += diff << PAGE_SHIFT;
+		len = (len - diff) << PAGE_SHIFT;
+		zap_page_range(mm, start, len);
+	} while ((mpnt = mpnt->vm_next_share) != NULL);
+}
+
+/*
+ * Handle all mappings that got truncated by a "truncate()"
+ * system call.
+ *
+ * NOTE! We have to be ready to update the memory sharing
+ * between the file and the memory map for a potential last
+ * incomplete page.  Ugly, but necessary.
+ */
+int vmtruncate(struct inode * inode, loff_t offset)
+{
+	unsigned long pgoff;
+	struct address_space *mapping = inode->i_mapping;
+	unsigned long limit;
+
+	if (inode->i_size < offset)
+		goto do_expand;
+	inode->i_size = offset;
+	spin_lock(&mapping->i_shared_lock);
+	if (!mapping->i_mmap && !mapping->i_mmap_shared)
+		goto out_unlock;
+
+	pgoff = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	if (mapping->i_mmap != NULL)
+		vmtruncate_list(mapping->i_mmap, pgoff);
+	if (mapping->i_mmap_shared != NULL)
+		vmtruncate_list(mapping->i_mmap_shared, pgoff);
+
+out_unlock:
+	spin_unlock(&mapping->i_shared_lock);
+	truncate_inode_pages(mapping, offset);
+	goto out_truncate;
+
+do_expand:
+	limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
+	if (limit != RLIM_INFINITY && offset > limit)
+		goto out_sig;
+	if (offset > inode->i_sb->s_maxbytes)
+		goto out;
+	inode->i_size = offset;
+
+out_truncate:
+	if (inode->i_op && inode->i_op->truncate) {
+		lock_kernel();
+		inode->i_op->truncate(inode);
+		unlock_kernel();
+	}
+	return 0;
+out_sig:
+	send_sig(SIGXFSZ, current, 0);
+out:
+	return -EFBIG;
+}
+
+/* 
+ * Primitive swap readahead code. We simply read an aligned block of
+ * (1 << page_cluster) entries in the swap area. This method is chosen
+ * because it doesn't cost us any seek time.  We also make sure to queue
+ * the 'original' request together with the readahead ones...  
+ */
+void swapin_readahead(swp_entry_t entry)
+{
+	int i, num;
+	struct page *new_page;
+	unsigned long offset;
+
+	/*
+	 * Get the number of handles we should do readahead io to.
+	 */
+	num = valid_swaphandles(entry, &offset);
+	for (i = 0; i < num; offset++, i++) {
+		/* Ok, do the async read-ahead now */
+		new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset));
+		if (!new_page)
+			break;
+		page_cache_release(new_page);
+	}
+	return;
+}
+
+/*
+ * We hold the mm semaphore and the page_table_lock on entry and
+ * should release the pagetable lock on exit..
+ */
+static int do_swap_page(struct mm_struct * mm,
+	struct vm_area_struct * vma, unsigned long address,
+	pte_t * page_table, pte_t orig_pte, int write_access)
+{
+	struct page *page;
+	swp_entry_t entry = pte_to_swp_entry(orig_pte);
+	pte_t pte;
+	int ret = 1;
+
+	spin_unlock(&mm->page_table_lock);
+	page = lookup_swap_cache(entry);
+	if (!page) {
+		swapin_readahead(entry);
+		page = read_swap_cache_async(entry);
+		if (!page) {
+			/*
+			 * Back out if somebody else faulted in this pte while
+			 * we released the page table lock.
+			 */
+			int retval;
+			spin_lock(&mm->page_table_lock);
+			retval = pte_same(*page_table, orig_pte) ? -1 : 1;
+			spin_unlock(&mm->page_table_lock);
+			return retval;
+		}
+
+		/* Had to read the page from swap area: Major fault */
+		ret = 2;
+	}
+
+	mark_page_accessed(page);
+
+	lock_page(page);
+
+	/*
+	 * Back out if somebody else faulted in this pte while we
+	 * released the page table lock.
+	 */
+	spin_lock(&mm->page_table_lock);
+	if (!pte_same(*page_table, orig_pte)) {
+		spin_unlock(&mm->page_table_lock);
+		unlock_page(page);
+		page_cache_release(page);
+		return 1;
+	}
+
+	/* The page isn't present yet, go ahead with the fault. */
+		
+	swap_free(entry);
+	if (vm_swap_full())
+		remove_exclusive_swap_page(page);
+
+	mm->rss++;
+	pte = mk_pte(page, vma->vm_page_prot);
+	if (write_access && can_share_swap_page(page))
+		pte = pte_mkdirty(pte_mkwrite(pte));
+	unlock_page(page);
+
+	flush_page_to_ram(page);
+	flush_icache_page(vma, page);
+#ifdef CONFIG_XEN
+	if ( likely(vma->vm_mm == current->mm) )
+		HYPERVISOR_update_va_mapping(address, pte, 0);
+	else
+		set_pte(page_table, pte);
+#else
+	set_pte(page_table, pte);
+#endif
+
+	/* No need to invalidate - it was non-present before */
+	update_mmu_cache(vma, address, pte);
+	spin_unlock(&mm->page_table_lock);
+	return ret;
+}
+
+/*
+ * We are called with the MM semaphore and page_table_lock
+ * spinlock held to protect against concurrent faults in
+ * multithreaded programs. 
+ */
+static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr)
+{
+	pte_t entry;
+
+	/* Read-only mapping of ZERO_PAGE. */
+	entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
+
+	/* ..except if it's a write access */
+	if (write_access) {
+		struct page *page;
+
+		/* Allocate our own private page. */
+		spin_unlock(&mm->page_table_lock);
+
+		page = alloc_page(GFP_HIGHUSER);
+		if (!page)
+			goto no_mem;
+		clear_user_highpage(page, addr);
+
+		spin_lock(&mm->page_table_lock);
+		if (!pte_none(*page_table)) {
+			page_cache_release(page);
+			spin_unlock(&mm->page_table_lock);
+			return 1;
+		}
+		mm->rss++;
+		flush_page_to_ram(page);
+		entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
+		if (vm_anon_lru)
+			lru_cache_add(page);
+		mark_page_accessed(page);
+	}
+
+#ifdef CONFIG_XEN
+	if ( likely(vma->vm_mm == current->mm) )
+		HYPERVISOR_update_va_mapping(addr, entry, 0);
+	else
+		set_pte(page_table, entry);
+#else
+	set_pte(page_table, entry);
+#endif
+
+	/* No need to invalidate - it was non-present before */
+	update_mmu_cache(vma, addr, entry);
+	spin_unlock(&mm->page_table_lock);
+	return 1;	/* Minor fault */
+
+no_mem:
+	return -1;
+}
+
+/*
+ * do_no_page() tries to create a new page mapping. It aggressively
+ * tries to share with existing pages, but makes a separate copy if
+ * the "write_access" parameter is true in order to avoid the next
+ * page fault.
+ *
+ * As this is called only for pages that do not currently exist, we
+ * do not need to flush old virtual caches or the TLB.
+ *
+ * This is called with the MM semaphore held and the page table
+ * spinlock held. Exit with the spinlock released.
+ */
+static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
+	unsigned long address, int write_access, pte_t *page_table)
+{
+	struct page * new_page;
+	pte_t entry;
+
+	if (!vma->vm_ops || !vma->vm_ops->nopage)
+		return do_anonymous_page(mm, vma, page_table, write_access, address);
+	spin_unlock(&mm->page_table_lock);
+
+	new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 0);
+
+	if (new_page == NULL)	/* no page was available -- SIGBUS */
+		return 0;
+	if (new_page == NOPAGE_OOM)
+		return -1;
+
+	/*
+	 * Should we do an early C-O-W break?
+	 */
+	if (write_access && !(vma->vm_flags & VM_SHARED)) {
+		struct page * page = alloc_page(GFP_HIGHUSER);
+		if (!page) {
+			page_cache_release(new_page);
+			return -1;
+		}
+		copy_user_highpage(page, new_page, address);
+		page_cache_release(new_page);
+		if (vm_anon_lru)
+			lru_cache_add(page);
+		new_page = page;
+	}
+
+	spin_lock(&mm->page_table_lock);
+	/*
+	 * This silly early PAGE_DIRTY setting removes a race
+	 * due to the bad i386 page protection. But it's valid
+	 * for other architectures too.
+	 *
+	 * Note that if write_access is true, we either now have
+	 * an exclusive copy of the page, or this is a shared mapping,
+	 * so we can make it writable and dirty to avoid having to
+	 * handle that later.
+	 */
+	/* Only go through if we didn't race with anybody else... */
+	if (pte_none(*page_table)) {
+		if (!PageReserved(new_page))
+			++mm->rss;
+		flush_page_to_ram(new_page);
+		flush_icache_page(vma, new_page);
+		entry = mk_pte(new_page, vma->vm_page_prot);
+		if (write_access)
+			entry = pte_mkwrite(pte_mkdirty(entry));
+#ifdef CONFIG_XEN
+		if ( likely(vma->vm_mm == current->mm) )
+			HYPERVISOR_update_va_mapping(address, entry, 0);
+		else
+			set_pte(page_table, entry);
+#else
+		set_pte(page_table, entry);
+#endif
+	} else {
+		/* One of our sibling threads was faster, back out. */
+		page_cache_release(new_page);
+		spin_unlock(&mm->page_table_lock);
+		return 1;
+	}
+
+	/* no need to invalidate: a not-present page shouldn't be cached */
+	update_mmu_cache(vma, address, entry);
+	spin_unlock(&mm->page_table_lock);
+	return 2;	/* Major fault */
+}
+
+/*
+ * These routines also need to handle stuff like marking pages dirty
+ * and/or accessed for architectures that don't do it in hardware (most
+ * RISC architectures).  The early dirtying is also good on the i386.
+ *
+ * There is also a hook called "update_mmu_cache()" that architectures
+ * with external mmu caches can use to update those (ie the Sparc or
+ * PowerPC hashed page tables that act as extended TLBs).
+ *
+ * Note the "page_table_lock". It is to protect against kswapd removing
+ * pages from under us. Note that kswapd only ever _removes_ pages, never
+ * adds them. As such, once we have noticed that the page is not present,
+ * we can drop the lock early.
+ *
+ * The adding of pages is protected by the MM semaphore (which we hold),
+ * so we don't need to worry about a page being suddenly been added into
+ * our VM.
+ *
+ * We enter with the pagetable spinlock held, we are supposed to
+ * release it when done.
+ */
+static inline int handle_pte_fault(struct mm_struct *mm,
+	struct vm_area_struct * vma, unsigned long address,
+	int write_access, pte_t * pte)
+{
+	pte_t entry;
+
+	entry = *pte;
+	if (!pte_present(entry)) {
+		/*
+		 * If it truly wasn't present, we know that kswapd
+		 * and the PTE updates will not touch it later. So
+		 * drop the lock.
+		 */
+		if (pte_none(entry))
+			return do_no_page(mm, vma, address, write_access, pte);
+		return do_swap_page(mm, vma, address, pte, entry, write_access);
+	}
+
+	if (write_access) {
+		if (!pte_write(entry))
+			return do_wp_page(mm, vma, address, pte, entry);
+
+		entry = pte_mkdirty(entry);
+	}
+	entry = pte_mkyoung(entry);
+	establish_pte(vma, address, pte, entry);
+	spin_unlock(&mm->page_table_lock);
+	return 1;
+}
+
+/*
+ * By the time we get here, we already hold the mm semaphore
+ */
+int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
+	unsigned long address, int write_access)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+
+	current->state = TASK_RUNNING;
+	pgd = pgd_offset(mm, address);
+
+	/*
+	 * We need the page table lock to synchronize with kswapd
+	 * and the SMP-safe atomic PTE updates.
+	 */
+	spin_lock(&mm->page_table_lock);
+	pmd = pmd_alloc(mm, pgd, address);
+
+	if (pmd) {
+		pte_t * pte = pte_alloc(mm, pmd, address);
+		if (pte)
+			return handle_pte_fault(mm, vma, address, write_access, pte);
+	}
+	spin_unlock(&mm->page_table_lock);
+	return -1;
+}
+
+/*
+ * Allocate page middle directory.
+ *
+ * We've already handled the fast-path in-line, and we own the
+ * page table lock.
+ *
+ * On a two-level page table, this ends up actually being entirely
+ * optimized away.
+ */
+pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+{
+	pmd_t *new;
+
+	/* "fast" allocation can happen without dropping the lock.. */
+	new = pmd_alloc_one_fast(mm, address);
+	if (!new) {
+		spin_unlock(&mm->page_table_lock);
+		new = pmd_alloc_one(mm, address);
+		spin_lock(&mm->page_table_lock);
+		if (!new)
+			return NULL;
+
+		/*
+		 * Because we dropped the lock, we should re-check the
+		 * entry, as somebody else could have populated it..
+		 */
+		if (!pgd_none(*pgd)) {
+			pmd_free(new);
+			check_pgt_cache();
+			goto out;
+		}
+	}
+	pgd_populate(mm, pgd, new);
+out:
+	return pmd_offset(pgd, address);
+}
+
+/*
+ * Allocate the page table directory.
+ *
+ * We've already handled the fast-path in-line, and we own the
+ * page table lock.
+ */
+pte_t fastcall *pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+{
+	if (pmd_none(*pmd)) {
+		pte_t *new;
+
+		/* "fast" allocation can happen without dropping the lock.. */
+		new = pte_alloc_one_fast(mm, address);
+		if (!new) {
+			spin_unlock(&mm->page_table_lock);
+			new = pte_alloc_one(mm, address);
+			spin_lock(&mm->page_table_lock);
+			if (!new)
+				return NULL;
+
+			/*
+			 * Because we dropped the lock, we should re-check the
+			 * entry, as somebody else could have populated it..
+			 */
+			if (!pmd_none(*pmd)) {
+				pte_free(new);
+				check_pgt_cache();
+				goto out;
+			}
+		}
+		pmd_populate(mm, pmd, new);
+	}
+out:
+	return pte_offset(pmd, address);
+}
+
+int make_pages_present(unsigned long addr, unsigned long end)
+{
+	int ret, len, write;
+	struct vm_area_struct * vma;
+
+	vma = find_vma(current->mm, addr);
+	write = (vma->vm_flags & VM_WRITE) != 0;
+	if (addr >= end)
+		BUG();
+	if (end > vma->vm_end)
+		BUG();
+	len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE;
+	ret = get_user_pages(current, current->mm, addr,
+			len, write, 0, NULL, NULL);
+	return ret == len ? 0 : -1;
+}
+
+struct page * vmalloc_to_page(void * vmalloc_addr)
+{
+	unsigned long addr = (unsigned long) vmalloc_addr;
+	struct page *page = NULL;
+	pmd_t *pmd;
+	pte_t *pte;
+	pgd_t *pgd;
+	
+	pgd = pgd_offset_k(addr);
+	if (!pgd_none(*pgd)) {
+		pmd = pmd_offset(pgd, addr);
+		if (!pmd_none(*pmd)) {
+			pte = pte_offset(pmd, addr);
+			if (pte_present(*pte)) {
+				page = pte_page(*pte);
+			}
+		}
+	}
+	return page;
+}
diff --git a/linux-2.4-xen-sparse/mm/mprotect.c b/linux-2.4-xen-sparse/mm/mprotect.c
new file mode 100644
index 0000000000..9d2ed3c24c
--- /dev/null
+++ b/linux-2.4-xen-sparse/mm/mprotect.c
@@ -0,0 +1,344 @@
+/*
+ *	linux/mm/mprotect.c
+ *
+ *  (C) Copyright 1994 Linus Torvalds
+ */
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/shm.h>
+#include <linux/mman.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+
+static inline void change_pte_range(pmd_t * pmd, unsigned long address,
+	unsigned long size, pgprot_t newprot)
+{
+	pte_t * pte;
+	unsigned long end;
+
+	if (pmd_none(*pmd))
+		return;
+	if (pmd_bad(*pmd)) {
+		pmd_ERROR(*pmd);
+		pmd_clear(pmd);
+		return;
+	}
+	pte = pte_offset(pmd, address);
+	address &= ~PMD_MASK;
+	end = address + size;
+	if (end > PMD_SIZE)
+		end = PMD_SIZE;
+	do {
+		if (pte_present(*pte)) {
+			pte_t entry;
+
+			/* Avoid an SMP race with hardware updated dirty/clean
+			 * bits by wiping the pte and then setting the new pte
+			 * into place.
+			 */
+			entry = ptep_get_and_clear(pte);
+			set_pte(pte, pte_modify(entry, newprot));
+		}
+		address += PAGE_SIZE;
+		pte++;
+	} while (address && (address < end));
+}
+
+static inline void change_pmd_range(pgd_t * pgd, unsigned long address,
+	unsigned long size, pgprot_t newprot)
+{
+	pmd_t * pmd;
+	unsigned long end;
+
+	if (pgd_none(*pgd))
+		return;
+	if (pgd_bad(*pgd)) {
+		pgd_ERROR(*pgd);
+		pgd_clear(pgd);
+		return;
+	}
+	pmd = pmd_offset(pgd, address);
+	address &= ~PGDIR_MASK;
+	end = address + size;
+	if (end > PGDIR_SIZE)
+		end = PGDIR_SIZE;
+	do {
+		change_pte_range(pmd, address, end - address, newprot);
+		address = (address + PMD_SIZE) & PMD_MASK;
+		pmd++;
+	} while (address && (address < end));
+}
+
+static void change_protection(unsigned long start, unsigned long end, pgprot_t newprot)
+{
+	pgd_t *dir;
+	unsigned long beg = start;
+
+	dir = pgd_offset(current->mm, start);
+	flush_cache_range(current->mm, beg, end);
+	if (start >= end)
+		BUG();
+	spin_lock(&current->mm->page_table_lock);
+	do {
+		change_pmd_range(dir, start, end - start, newprot);
+		start = (start + PGDIR_SIZE) & PGDIR_MASK;
+		dir++;
+	} while (start && (start < end));
+	spin_unlock(&current->mm->page_table_lock);
+	flush_tlb_range(current->mm, beg, end);
+	return;
+}
+
+static inline int mprotect_fixup_all(struct vm_area_struct * vma, struct vm_area_struct ** pprev,
+	int newflags, pgprot_t prot)
+{
+	struct vm_area_struct * prev = *pprev;
+	struct mm_struct * mm = vma->vm_mm;
+
+	if (prev && prev->vm_end == vma->vm_start && can_vma_merge(prev, newflags) &&
+	    !vma->vm_file && !(vma->vm_flags & VM_SHARED)) {
+		spin_lock(&mm->page_table_lock);
+		prev->vm_end = vma->vm_end;
+		__vma_unlink(mm, vma, prev);
+		spin_unlock(&mm->page_table_lock);
+
+		kmem_cache_free(vm_area_cachep, vma);
+		mm->map_count--;
+
+		return 0;
+	}
+
+	spin_lock(&mm->page_table_lock);
+	vma->vm_flags = newflags;
+	vma->vm_page_prot = prot;
+	spin_unlock(&mm->page_table_lock);
+
+	*pprev = vma;
+
+	return 0;
+}
+
+static inline int mprotect_fixup_start(struct vm_area_struct * vma, struct vm_area_struct ** pprev,
+	unsigned long end,
+	int newflags, pgprot_t prot)
+{
+	struct vm_area_struct * n, * prev = *pprev;
+
+	*pprev = vma;
+
+	if (prev && prev->vm_end == vma->vm_start && can_vma_merge(prev, newflags) &&
+	    !vma->vm_file && !(vma->vm_flags & VM_SHARED)) {
+		spin_lock(&vma->vm_mm->page_table_lock);
+		prev->vm_end = end;
+		vma->vm_start = end;
+		spin_unlock(&vma->vm_mm->page_table_lock);
+
+		return 0;
+	}
+	n = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	if (!n)
+		return -ENOMEM;
+	*n = *vma;
+	n->vm_end = end;
+	n->vm_flags = newflags;
+	n->vm_raend = 0;
+	n->vm_page_prot = prot;
+	if (n->vm_file)
+		get_file(n->vm_file);
+	if (n->vm_ops && n->vm_ops->open)
+		n->vm_ops->open(n);
+	vma->vm_pgoff += (end - vma->vm_start) >> PAGE_SHIFT;
+	lock_vma_mappings(vma);
+	spin_lock(&vma->vm_mm->page_table_lock);
+	vma->vm_start = end;
+	__insert_vm_struct(current->mm, n);
+	spin_unlock(&vma->vm_mm->page_table_lock);
+	unlock_vma_mappings(vma);
+
+	return 0;
+}
+
+static inline int mprotect_fixup_end(struct vm_area_struct * vma, struct vm_area_struct ** pprev,
+	unsigned long start,
+	int newflags, pgprot_t prot)
+{
+	struct vm_area_struct * n;
+
+	n = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+	if (!n)
+		return -ENOMEM;
+	*n = *vma;
+	n->vm_start = start;
+	n->vm_pgoff += (n->vm_start - vma->vm_start) >> PAGE_SHIFT;
+	n->vm_flags = newflags;
+	n->vm_raend = 0;
+	n->vm_page_prot = prot;
+	if (n->vm_file)
+		get_file(n->vm_file);
+	if (n->vm_ops && n->vm_ops->open)
+		n->vm_ops->open(n);
+	lock_vma_mappings(vma);
+	spin_lock(&vma->vm_mm->page_table_lock);
+	vma->vm_end = start;
+	__insert_vm_struct(current->mm, n);
+	spin_unlock(&vma->vm_mm->page_table_lock);
+	unlock_vma_mappings(vma);
+
+	*pprev = n;
+
+	return 0;
+}
+
+static inline int mprotect_fixup_middle(struct vm_area_struct * vma, struct vm_area_struct ** pprev,
+	unsigned long start, unsigned long end,
+	int newflags, pgprot_t prot)
+{
+	struct vm_area_struct * left, * right;
+
+	left = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	if (!left)
+		return -ENOMEM;
+	right = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	if (!right) {
+		kmem_cache_free(vm_area_cachep, left);
+		return -ENOMEM;
+	}
+	*left = *vma;
+	*right = *vma;
+	left->vm_end = start;
+	right->vm_start = end;
+	right->vm_pgoff += (right->vm_start - left->vm_start) >> PAGE_SHIFT;
+	left->vm_raend = 0;
+	right->vm_raend = 0;
+	if (vma->vm_file)
+		atomic_add(2,&vma->vm_file->f_count);
+	if (vma->vm_ops && vma->vm_ops->open) {
+		vma->vm_ops->open(left);
+		vma->vm_ops->open(right);
+	}
+	vma->vm_pgoff += (start - vma->vm_start) >> PAGE_SHIFT;
+	vma->vm_raend = 0;
+	vma->vm_page_prot = prot;
+	lock_vma_mappings(vma);
+	spin_lock(&vma->vm_mm->page_table_lock);
+	vma->vm_start = start;
+	vma->vm_end = end;
+	vma->vm_flags = newflags;
+	__insert_vm_struct(current->mm, left);
+	__insert_vm_struct(current->mm, right);
+	spin_unlock(&vma->vm_mm->page_table_lock);
+	unlock_vma_mappings(vma);
+
+	*pprev = right;
+
+	return 0;
+}
+
+static int mprotect_fixup(struct vm_area_struct * vma, struct vm_area_struct ** pprev,
+	unsigned long start, unsigned long end, unsigned int newflags)
+{
+	pgprot_t newprot;
+	int error;
+
+	if (newflags == vma->vm_flags) {
+		*pprev = vma;
+		return 0;
+	}
+	newprot = protection_map[newflags & 0xf];
+	if (start == vma->vm_start) {
+		if (end == vma->vm_end)
+			error = mprotect_fixup_all(vma, pprev, newflags, newprot);
+		else
+			error = mprotect_fixup_start(vma, pprev, end, newflags, newprot);
+	} else if (end == vma->vm_end)
+		error = mprotect_fixup_end(vma, pprev, start, newflags, newprot);
+	else
+		error = mprotect_fixup_middle(vma, pprev, start, end, newflags, newprot);
+
+	if (error)
+		return error;
+
+	change_protection(start, end, newprot);
+	return 0;
+}
+
+asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot)
+{
+	unsigned long nstart, end, tmp;
+	struct vm_area_struct * vma, * next, * prev;
+	int error = -EINVAL;
+
+	if (start & ~PAGE_MASK)
+		return -EINVAL;
+	len = PAGE_ALIGN(len);
+	end = start + len;
+	if (end < start)
+		return -ENOMEM;
+	if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC))
+		return -EINVAL;
+	if (end == start)
+		return 0;
+
+	down_write(&current->mm->mmap_sem);
+
+	vma = find_vma_prev(current->mm, start, &prev);
+	error = -ENOMEM;
+	if (!vma || vma->vm_start > start)
+		goto out;
+
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
+	/* mprotect() unsupported for I/O mappings in Xenolinux. */
+	error = -EINVAL;
+	if (vma->vm_flags & VM_IO)
+		goto out;
+#endif
+
+	for (nstart = start ; ; ) {
+		unsigned int newflags;
+		int last = 0;
+
+		/* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
+
+		newflags = prot | (vma->vm_flags & ~(PROT_READ | PROT_WRITE | PROT_EXEC));
+		if ((newflags & ~(newflags >> 4)) & 0xf) {
+			error = -EACCES;
+			goto out;
+		}
+
+		if (vma->vm_end > end) {
+			error = mprotect_fixup(vma, &prev, nstart, end, newflags);
+			goto out;
+		}
+		if (vma->vm_end == end)
+			last = 1;
+
+		tmp = vma->vm_end;
+		next = vma->vm_next;
+		error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
+		if (error)
+			goto out;
+		if (last)
+			break;
+		nstart = tmp;
+		vma = next;
+		if (!vma || vma->vm_start != nstart) {
+			error = -ENOMEM;
+			goto out;
+		}
+	}
+	if (next && prev->vm_end == next->vm_start && can_vma_merge(next, prev->vm_flags) &&
+	    !prev->vm_file && !(prev->vm_flags & VM_SHARED)) {
+		spin_lock(&prev->vm_mm->page_table_lock);
+		prev->vm_end = next->vm_end;
+		__vma_unlink(prev->vm_mm, next, prev);
+		spin_unlock(&prev->vm_mm->page_table_lock);
+
+		kmem_cache_free(vm_area_cachep, next);
+		prev->vm_mm->map_count--;
+	}
+out:
+	up_write(&current->mm->mmap_sem);
+	return error;
+}
diff --git a/linux-2.4-xen-sparse/mm/mremap.c b/linux-2.4-xen-sparse/mm/mremap.c
new file mode 100644
index 0000000000..475c308b1b
--- /dev/null
+++ b/linux-2.4-xen-sparse/mm/mremap.c
@@ -0,0 +1,390 @@
+/*
+ *	linux/mm/remap.c
+ *
+ *	(C) Copyright 1996 Linus Torvalds
+ */
+
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/shm.h>
+#include <linux/mman.h>
+#include <linux/swap.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+
+extern int vm_enough_memory(long pages);
+
+static inline pte_t *get_one_pte(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t * pgd;
+	pmd_t * pmd;
+	pte_t * pte = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd_none(*pgd))
+		goto end;
+	if (pgd_bad(*pgd)) {
+		pgd_ERROR(*pgd);
+		pgd_clear(pgd);
+		goto end;
+	}
+
+	pmd = pmd_offset(pgd, addr);
+	if (pmd_none(*pmd))
+		goto end;
+	if (pmd_bad(*pmd)) {
+		pmd_ERROR(*pmd);
+		pmd_clear(pmd);
+		goto end;
+	}
+
+	pte = pte_offset(pmd, addr);
+	if (pte_none(*pte))
+		pte = NULL;
+end:
+	return pte;
+}
+
+static inline pte_t *alloc_one_pte(struct mm_struct *mm, unsigned long addr)
+{
+	pmd_t * pmd;
+	pte_t * pte = NULL;
+
+	pmd = pmd_alloc(mm, pgd_offset(mm, addr), addr);
+	if (pmd)
+		pte = pte_alloc(mm, pmd, addr);
+	return pte;
+}
+
+static inline int copy_one_pte(struct mm_struct *mm, pte_t * src, pte_t * dst)
+{
+	int error = 0;
+	pte_t pte;
+
+	if (!pte_none(*src)) {
+		pte = ptep_get_and_clear(src);
+		if (!dst) {
+			/* No dest?  We must put it back. */
+			dst = src;
+			error++;
+		}
+		set_pte(dst, pte);
+	}
+	return error;
+}
+
+static int move_one_page(struct mm_struct *mm, unsigned long old_addr, unsigned long new_addr)
+{
+	int error = 0;
+	pte_t * src, * dst;
+
+	spin_lock(&mm->page_table_lock);
+	src = get_one_pte(mm, old_addr);
+	if (src) {
+		dst = alloc_one_pte(mm, new_addr);
+		src = get_one_pte(mm, old_addr);
+		if (src) 
+			error = copy_one_pte(mm, src, dst);
+	}
+	spin_unlock(&mm->page_table_lock);
+	return error;
+}
+
+static int move_page_tables(struct mm_struct * mm,
+	unsigned long new_addr, unsigned long old_addr, unsigned long len)
+{
+	unsigned long offset = len;
+
+	flush_cache_range(mm, old_addr, old_addr + len);
+
+	/*
+	 * This is not the clever way to do this, but we're taking the
+	 * easy way out on the assumption that most remappings will be
+	 * only a few pages.. This also makes error recovery easier.
+	 */
+	while (offset) {
+		offset -= PAGE_SIZE;
+		if (move_one_page(mm, old_addr + offset, new_addr + offset))
+			goto oops_we_failed;
+	}
+	flush_tlb_range(mm, old_addr, old_addr + len);
+	return 0;
+
+	/*
+	 * Ok, the move failed because we didn't have enough pages for
+	 * the new page table tree. This is unlikely, but we have to
+	 * take the possibility into account. In that case we just move
+	 * all the pages back (this will work, because we still have
+	 * the old page tables)
+	 */
+oops_we_failed:
+	flush_cache_range(mm, new_addr, new_addr + len);
+	while ((offset += PAGE_SIZE) < len)
+		move_one_page(mm, new_addr + offset, old_addr + offset);
+	zap_page_range(mm, new_addr, len);
+	return -1;
+}
+
+static inline unsigned long move_vma(struct vm_area_struct * vma,
+	unsigned long addr, unsigned long old_len, unsigned long new_len,
+	unsigned long new_addr)
+{
+	struct mm_struct * mm = vma->vm_mm;
+	struct vm_area_struct * new_vma, * next, * prev;
+	int allocated_vma;
+
+	new_vma = NULL;
+	next = find_vma_prev(mm, new_addr, &prev);
+	if (next) {
+		if (prev && prev->vm_end == new_addr &&
+		    can_vma_merge(prev, vma->vm_flags) && !vma->vm_file && !(vma->vm_flags & VM_SHARED)) {
+			spin_lock(&mm->page_table_lock);
+			prev->vm_end = new_addr + new_len;
+			spin_unlock(&mm->page_table_lock);
+			new_vma = prev;
+			if (next != prev->vm_next)
+				BUG();
+			if (prev->vm_end == next->vm_start && can_vma_merge(next, prev->vm_flags)) {
+				spin_lock(&mm->page_table_lock);
+				prev->vm_end = next->vm_end;
+				__vma_unlink(mm, next, prev);
+				spin_unlock(&mm->page_table_lock);
+
+				mm->map_count--;
+				kmem_cache_free(vm_area_cachep, next);
+			}
+		} else if (next->vm_start == new_addr + new_len &&
+			   can_vma_merge(next, vma->vm_flags) && !vma->vm_file && !(vma->vm_flags & VM_SHARED)) {
+			spin_lock(&mm->page_table_lock);
+			next->vm_start = new_addr;
+			spin_unlock(&mm->page_table_lock);
+			new_vma = next;
+		}
+	} else {
+		prev = find_vma(mm, new_addr-1);
+		if (prev && prev->vm_end == new_addr &&
+		    can_vma_merge(prev, vma->vm_flags) && !vma->vm_file && !(vma->vm_flags & VM_SHARED)) {
+			spin_lock(&mm->page_table_lock);
+			prev->vm_end = new_addr + new_len;
+			spin_unlock(&mm->page_table_lock);
+			new_vma = prev;
+		}
+	}
+
+	allocated_vma = 0;
+	if (!new_vma) {
+		new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+		if (!new_vma)
+			goto out;
+		allocated_vma = 1;
+	}
+
+	if (!move_page_tables(current->mm, new_addr, addr, old_len)) {
+		unsigned long vm_locked = vma->vm_flags & VM_LOCKED;
+
+		if (allocated_vma) {
+			*new_vma = *vma;
+			new_vma->vm_start = new_addr;
+			new_vma->vm_end = new_addr+new_len;
+			new_vma->vm_pgoff += (addr-vma->vm_start) >> PAGE_SHIFT;
+			new_vma->vm_raend = 0;
+			if (new_vma->vm_file)
+				get_file(new_vma->vm_file);
+			if (new_vma->vm_ops && new_vma->vm_ops->open)
+				new_vma->vm_ops->open(new_vma);
+			insert_vm_struct(current->mm, new_vma);
+		}
+
+		/* XXX: possible errors masked, mapping might remain */
+		do_munmap(current->mm, addr, old_len);
+
+		current->mm->total_vm += new_len >> PAGE_SHIFT;
+		if (vm_locked) {
+			current->mm->locked_vm += new_len >> PAGE_SHIFT;
+			if (new_len > old_len)
+				make_pages_present(new_addr + old_len,
+						   new_addr + new_len);
+		}
+		return new_addr;
+	}
+	if (allocated_vma)
+		kmem_cache_free(vm_area_cachep, new_vma);
+ out:
+	return -ENOMEM;
+}
+
+/*
+ * Expand (or shrink) an existing mapping, potentially moving it at the
+ * same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
+ *
+ * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise
+ * This option implies MREMAP_MAYMOVE.
+ */
+unsigned long do_mremap(unsigned long addr,
+	unsigned long old_len, unsigned long new_len,
+	unsigned long flags, unsigned long new_addr)
+{
+	struct vm_area_struct *vma;
+	unsigned long ret = -EINVAL;
+
+	if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
+		goto out;
+
+	if (addr & ~PAGE_MASK)
+		goto out;
+
+	old_len = PAGE_ALIGN(old_len);
+	new_len = PAGE_ALIGN(new_len);
+
+	if (old_len > TASK_SIZE || addr > TASK_SIZE - old_len)
+		goto out;
+
+	if (addr >= TASK_SIZE)
+		goto out;
+
+	/* new_addr is only valid if MREMAP_FIXED is specified */
+	if (flags & MREMAP_FIXED) {
+		if (new_addr & ~PAGE_MASK)
+			goto out;
+		if (!(flags & MREMAP_MAYMOVE))
+			goto out;
+
+		if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len)
+			goto out;
+
+		if (new_addr >= TASK_SIZE)
+			goto out;
+
+		/*
+		 * Allow new_len == 0 only if new_addr == addr
+		 * to preserve truncation in place (that was working
+		 * safe and some app may depend on it).
+		 */
+		if (unlikely(!new_len && new_addr != addr))
+			goto out;
+
+		/* Check if the location we're moving into overlaps the
+		 * old location at all, and fail if it does.
+		 */
+		if ((new_addr <= addr) && (new_addr+new_len) > addr)
+			goto out;
+
+		if ((addr <= new_addr) && (addr+old_len) > new_addr)
+			goto out;
+
+		ret = do_munmap(current->mm, new_addr, new_len);
+		if (ret && new_len)
+			goto out;
+	}
+
+	/*
+	 * Always allow a shrinking remap: that just unmaps
+	 * the unnecessary pages..
+	 */
+	if (old_len >= new_len) {
+		ret = do_munmap(current->mm, addr+new_len, old_len - new_len);
+		if (ret && old_len != new_len)
+			goto out;
+		ret = addr;
+		if (!(flags & MREMAP_FIXED) || (new_addr == addr))
+			goto out;
+	}
+
+	/*
+	 * Ok, we need to grow..  or relocate.
+	 */
+	ret = -EFAULT;
+	vma = find_vma(current->mm, addr);
+	if (!vma || vma->vm_start > addr)
+		goto out;
+	/* We can't remap across vm area boundaries */
+	if (old_len > vma->vm_end - addr)
+		goto out;
+	if (vma->vm_flags & VM_DONTEXPAND) {
+		if (new_len > old_len)
+			goto out;
+	}
+	if (vma->vm_flags & VM_LOCKED) {
+		unsigned long locked = current->mm->locked_vm << PAGE_SHIFT;
+		locked += new_len - old_len;
+		ret = -EAGAIN;
+		if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
+			goto out;
+	}
+	ret = -ENOMEM;
+	if ((current->mm->total_vm << PAGE_SHIFT) + (new_len - old_len)
+	    > current->rlim[RLIMIT_AS].rlim_cur)
+		goto out;
+	/* Private writable mapping? Check memory availability.. */
+	if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE &&
+	    !(flags & MAP_NORESERVE)				 &&
+	    !vm_enough_memory((new_len - old_len) >> PAGE_SHIFT))
+		goto out;
+
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
+	/* mremap() unsupported for I/O mappings in Xenolinux. */
+	ret = -EINVAL;
+	if (vma->vm_flags & VM_IO)
+		goto out;
+#endif
+
+	/* old_len exactly to the end of the area..
+	 * And we're not relocating the area.
+	 */
+	if (old_len == vma->vm_end - addr &&
+	    !((flags & MREMAP_FIXED) && (addr != new_addr)) &&
+	    (old_len != new_len || !(flags & MREMAP_MAYMOVE))) {
+		unsigned long max_addr = TASK_SIZE;
+		if (vma->vm_next)
+			max_addr = vma->vm_next->vm_start;
+		/* can we just expand the current mapping? */
+		if (max_addr - addr >= new_len) {
+			int pages = (new_len - old_len) >> PAGE_SHIFT;
+			spin_lock(&vma->vm_mm->page_table_lock);
+			vma->vm_end = addr + new_len;
+			spin_unlock(&vma->vm_mm->page_table_lock);
+			current->mm->total_vm += pages;
+			if (vma->vm_flags & VM_LOCKED) {
+				current->mm->locked_vm += pages;
+				make_pages_present(addr + old_len,
+						   addr + new_len);
+			}
+			ret = addr;
+			goto out;
+		}
+	}
+
+	/*
+	 * We weren't able to just expand or shrink the area,
+	 * we need to create a new one and move it..
+	 */
+	ret = -ENOMEM;
+	if (flags & MREMAP_MAYMOVE) {
+		if (!(flags & MREMAP_FIXED)) {
+			unsigned long map_flags = 0;
+			if (vma->vm_flags & VM_SHARED)
+				map_flags |= MAP_SHARED;
+
+			new_addr = get_unmapped_area(vma->vm_file, 0, new_len, vma->vm_pgoff, map_flags);
+			ret = new_addr;
+			if (new_addr & ~PAGE_MASK)
+				goto out;
+		}
+		ret = move_vma(vma, addr, old_len, new_len, new_addr);
+	}
+out:
+	return ret;
+}
+
+asmlinkage unsigned long sys_mremap(unsigned long addr,
+	unsigned long old_len, unsigned long new_len,
+	unsigned long flags, unsigned long new_addr)
+{
+	unsigned long ret;
+
+	down_write(&current->mm->mmap_sem);
+	ret = do_mremap(addr, old_len, new_len, flags, new_addr);
+	up_write(&current->mm->mmap_sem);
+	return ret;
+}
diff --git a/linux-2.4-xen-sparse/mm/page_alloc.c b/linux-2.4-xen-sparse/mm/page_alloc.c
new file mode 100644
index 0000000000..928433b8df
--- /dev/null
+++ b/linux-2.4-xen-sparse/mm/page_alloc.c
@@ -0,0 +1,972 @@
+/*
+ *  linux/mm/page_alloc.c
+ *
+ *  Manages the free list, the system allocates free pages here.
+ *  Note that kmalloc() lives in slab.c
+ *
+ *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *  Swap reorganised 29.12.95, Stephen Tweedie
+ *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ *  Reshaped it to be a zoned allocator, Ingo Molnar, Red Hat, 1999
+ *  Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
+ *  Zone balancing, Kanoj Sarcar, SGI, Jan 2000
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/swapctl.h>
+#include <linux/interrupt.h>
+#include <linux/pagemap.h>
+#include <linux/bootmem.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+int nr_swap_pages;
+int nr_active_pages;
+int nr_inactive_pages;
+LIST_HEAD(inactive_list);
+LIST_HEAD(active_list);
+pg_data_t *pgdat_list;
+
+/*
+ *
+ * The zone_table array is used to look up the address of the
+ * struct zone corresponding to a given zone number (ZONE_DMA,
+ * ZONE_NORMAL, or ZONE_HIGHMEM).
+ */
+zone_t *zone_table[MAX_NR_ZONES*MAX_NR_NODES];
+EXPORT_SYMBOL(zone_table);
+
+static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
+static int zone_balance_ratio[MAX_NR_ZONES] __initdata = { 128, 128, 128, };
+static int zone_balance_min[MAX_NR_ZONES] __initdata = { 20 , 20, 20, };
+static int zone_balance_max[MAX_NR_ZONES] __initdata = { 255 , 255, 255, };
+static int lower_zone_reserve_ratio[MAX_NR_ZONES-1] = { 256, 32 };
+
+int vm_gfp_debug = 0;
+
+static void FASTCALL(__free_pages_ok (struct page *page, unsigned int order));
+
+static spinlock_t free_pages_ok_no_irq_lock = SPIN_LOCK_UNLOCKED;
+struct page * free_pages_ok_no_irq_head;
+
+static void do_free_pages_ok_no_irq(void * arg)
+{
+       struct page * page, * __page;
+
+       spin_lock_irq(&free_pages_ok_no_irq_lock);
+
+       page = free_pages_ok_no_irq_head;
+       free_pages_ok_no_irq_head = NULL;
+
+       spin_unlock_irq(&free_pages_ok_no_irq_lock);
+
+       while (page) {
+               __page = page;
+               page = page->next_hash;
+               __free_pages_ok(__page, __page->index);
+       }
+}
+
+static struct tq_struct free_pages_ok_no_irq_task = {
+       .routine        = do_free_pages_ok_no_irq,
+};
+
+
+/*
+ * Temporary debugging check.
+ */
+#define BAD_RANGE(zone, page)						\
+(									\
+	(((page) - mem_map) >= ((zone)->zone_start_mapnr+(zone)->size))	\
+	|| (((page) - mem_map) < (zone)->zone_start_mapnr)		\
+	|| ((zone) != page_zone(page))					\
+)
+
+/*
+ * Freeing function for a buddy system allocator.
+ * Contrary to prior comments, this is *NOT* hairy, and there
+ * is no reason for anyone not to understand it.
+ *
+ * The concept of a buddy system is to maintain direct-mapped tables
+ * (containing bit values) for memory blocks of various "orders".
+ * The bottom level table contains the map for the smallest allocatable
+ * units of memory (here, pages), and each level above it describes
+ * pairs of units from the levels below, hence, "buddies".
+ * At a high level, all that happens here is marking the table entry
+ * at the bottom level available, and propagating the changes upward
+ * as necessary, plus some accounting needed to play nicely with other
+ * parts of the VM system.
+ * At each level, we keep one bit for each pair of blocks, which
+ * is set to 1 iff only one of the pair is allocated.  So when we
+ * are allocating or freeing one, we can derive the state of the
+ * other.  That is, if we allocate a small block, and both were   
+ * free, the remainder of the region must be split into blocks.   
+ * If a block is freed, and its buddy is also free, then this
+ * triggers coalescing into a block of larger size.            
+ *
+ * -- wli
+ */
+
+static void fastcall __free_pages_ok (struct page *page, unsigned int order)
+{
+	unsigned long index, page_idx, mask, flags;
+	free_area_t *area;
+	struct page *base;
+	zone_t *zone;
+
+	if (PageForeign(page))
+		return (PageForeignDestructor(page))(page);
+
+	/*
+	 * Yes, think what happens when other parts of the kernel take 
+	 * a reference to a page in order to pin it for io. -ben
+	 */
+	if (PageLRU(page)) {
+		if (unlikely(in_interrupt())) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&free_pages_ok_no_irq_lock, flags);
+			page->next_hash = free_pages_ok_no_irq_head;
+			free_pages_ok_no_irq_head = page;
+			page->index = order;
+	
+			spin_unlock_irqrestore(&free_pages_ok_no_irq_lock, flags);
+	
+			schedule_task(&free_pages_ok_no_irq_task);
+			return;
+		}
+		
+		lru_cache_del(page);
+	}
+
+	if (page->buffers)
+		BUG();
+	if (page->mapping)
+		BUG();
+	if (!VALID_PAGE(page))
+		BUG();
+	if (PageLocked(page))
+		BUG();
+	if (PageActive(page))
+		BUG();
+	ClearPageReferenced(page);
+	ClearPageDirty(page);
+
+	if (current->flags & PF_FREE_PAGES)
+		goto local_freelist;
+ back_local_freelist:
+
+	zone = page_zone(page);
+
+	mask = (~0UL) << order;
+	base = zone->zone_mem_map;
+	page_idx = page - base;
+	if (page_idx & ~mask)
+		BUG();
+	index = page_idx >> (1 + order);
+
+	area = zone->free_area + order;
+
+	spin_lock_irqsave(&zone->lock, flags);
+
+	zone->free_pages -= mask;
+
+	while (mask + (1 << (MAX_ORDER-1))) {
+		struct page *buddy1, *buddy2;
+
+		if (area >= zone->free_area + MAX_ORDER)
+			BUG();
+		if (!__test_and_change_bit(index, area->map))
+			/*
+			 * the buddy page is still allocated.
+			 */
+			break;
+		/*
+		 * Move the buddy up one level.
+		 * This code is taking advantage of the identity:
+		 * 	-mask = 1+~mask
+		 */
+		buddy1 = base + (page_idx ^ -mask);
+		buddy2 = base + page_idx;
+		if (BAD_RANGE(zone,buddy1))
+			BUG();
+		if (BAD_RANGE(zone,buddy2))
+			BUG();
+
+		list_del(&buddy1->list);
+		mask <<= 1;
+		area++;
+		index >>= 1;
+		page_idx &= mask;
+	}
+	list_add(&(base + page_idx)->list, &area->free_list);
+
+	spin_unlock_irqrestore(&zone->lock, flags);
+	return;
+
+ local_freelist:
+	if (current->nr_local_pages)
+		goto back_local_freelist;
+	if (in_interrupt())
+		goto back_local_freelist;		
+
+	list_add(&page->list, &current->local_pages);
+	page->index = order;
+	current->nr_local_pages++;
+}
+
+#define MARK_USED(index, order, area) \
+	__change_bit((index) >> (1+(order)), (area)->map)
+
+static inline struct page * expand (zone_t *zone, struct page *page,
+	 unsigned long index, int low, int high, free_area_t * area)
+{
+	unsigned long size = 1 << high;
+
+	while (high > low) {
+		if (BAD_RANGE(zone,page))
+			BUG();
+		area--;
+		high--;
+		size >>= 1;
+		list_add(&(page)->list, &(area)->free_list);
+		MARK_USED(index, high, area);
+		index += size;
+		page += size;
+	}
+	if (BAD_RANGE(zone,page))
+		BUG();
+	return page;
+}
+
+static FASTCALL(struct page * rmqueue(zone_t *zone, unsigned int order));
+static struct page * fastcall rmqueue(zone_t *zone, unsigned int order)
+{
+	free_area_t * area = zone->free_area + order;
+	unsigned int curr_order = order;
+	struct list_head *head, *curr;
+	unsigned long flags;
+	struct page *page;
+
+	spin_lock_irqsave(&zone->lock, flags);
+	do {
+		head = &area->free_list;
+		curr = head->next;
+
+		if (curr != head) {
+			unsigned int index;
+
+			page = list_entry(curr, struct page, list);
+			if (BAD_RANGE(zone,page))
+				BUG();
+			list_del(curr);
+			index = page - zone->zone_mem_map;
+			if (curr_order != MAX_ORDER-1)
+				MARK_USED(index, curr_order, area);
+			zone->free_pages -= 1UL << order;
+
+			page = expand(zone, page, index, order, curr_order, area);
+			spin_unlock_irqrestore(&zone->lock, flags);
+
+			set_page_count(page, 1);
+			if (BAD_RANGE(zone,page))
+				BUG();
+			if (PageLRU(page))
+				BUG();
+			if (PageActive(page))
+				BUG();
+			return page;	
+		}
+		curr_order++;
+		area++;
+	} while (curr_order < MAX_ORDER);
+	spin_unlock_irqrestore(&zone->lock, flags);
+
+	return NULL;
+}
+
+#ifndef CONFIG_DISCONTIGMEM
+struct page * fastcall _alloc_pages(unsigned int gfp_mask, unsigned int order)
+{
+	return __alloc_pages(gfp_mask, order,
+		contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK));
+}
+#endif
+
+static struct page * FASTCALL(balance_classzone(zone_t *, unsigned int, unsigned int, int *));
+static struct page * fastcall balance_classzone(zone_t * classzone, unsigned int gfp_mask, unsigned int order, int * freed)
+{
+	struct page * page = NULL;
+	int __freed;
+
+	if (in_interrupt())
+		BUG();
+
+	current->allocation_order = order;
+	current->flags |= PF_MEMALLOC | PF_FREE_PAGES;
+
+	__freed = try_to_free_pages_zone(classzone, gfp_mask);
+
+	current->flags &= ~(PF_MEMALLOC | PF_FREE_PAGES);
+
+	if (current->nr_local_pages) {
+		struct list_head * entry, * local_pages;
+		struct page * tmp;
+		int nr_pages;
+
+		local_pages = &current->local_pages;
+
+		if (likely(__freed)) {
+			/* pick from the last inserted so we're lifo */
+			entry = local_pages->next;
+			do {
+				tmp = list_entry(entry, struct page, list);
+				if (tmp->index == order && memclass(page_zone(tmp), classzone)) {
+					list_del(entry);
+					current->nr_local_pages--;
+					set_page_count(tmp, 1);
+					page = tmp;
+
+					if (page->buffers)
+						BUG();
+					if (page->mapping)
+						BUG();
+					if (!VALID_PAGE(page))
+						BUG();
+					if (PageLocked(page))
+						BUG();
+					if (PageLRU(page))
+						BUG();
+					if (PageActive(page))
+						BUG();
+					if (PageDirty(page))
+						BUG();
+
+					break;
+				}
+			} while ((entry = entry->next) != local_pages);
+		}
+
+		nr_pages = current->nr_local_pages;
+		/* free in reverse order so that the global order will be lifo */
+		while ((entry = local_pages->prev) != local_pages) {
+			list_del(entry);
+			tmp = list_entry(entry, struct page, list);
+			__free_pages_ok(tmp, tmp->index);
+			if (!nr_pages--)
+				BUG();
+		}
+		current->nr_local_pages = 0;
+	}
+
+	*freed = __freed;
+	return page;
+}
+
+static inline unsigned long zone_free_pages(zone_t * zone, unsigned int order)
+{
+	long free = zone->free_pages - (1UL << order);
+	return free >= 0 ? free : 0;
+}
+
+/*
+ * This is the 'heart' of the zoned buddy allocator:
+ */
+struct page * fastcall __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist)
+{
+	zone_t **zone, * classzone;
+	struct page * page;
+	int freed, class_idx;
+
+	zone = zonelist->zones;
+	classzone = *zone;
+	class_idx = zone_idx(classzone);
+
+	for (;;) {
+		zone_t *z = *(zone++);
+		if (!z)
+			break;
+
+		if (zone_free_pages(z, order) > z->watermarks[class_idx].low) {
+			page = rmqueue(z, order);
+			if (page)
+				return page;
+		}
+	}
+
+	classzone->need_balance = 1;
+	mb();
+	if (waitqueue_active(&kswapd_wait))
+		wake_up_interruptible(&kswapd_wait);
+
+	zone = zonelist->zones;
+	for (;;) {
+		unsigned long min;
+		zone_t *z = *(zone++);
+		if (!z)
+			break;
+
+		min = z->watermarks[class_idx].min;
+		if (!(gfp_mask & __GFP_WAIT))
+			min >>= 2;
+		if (zone_free_pages(z, order) > min) {
+			page = rmqueue(z, order);
+			if (page)
+				return page;
+		}
+	}
+
+	/* here we're in the low on memory slow path */
+
+	if ((current->flags & PF_MEMALLOC) && 
+			(!in_interrupt() || (current->flags & PF_MEMDIE))) {
+		zone = zonelist->zones;
+		for (;;) {
+			zone_t *z = *(zone++);
+			if (!z)
+				break;
+
+			page = rmqueue(z, order);
+			if (page)
+				return page;
+		}
+		return NULL;
+	}
+
+	/* Atomic allocations - we can't balance anything */
+	if (!(gfp_mask & __GFP_WAIT))
+		goto out;
+
+ rebalance:
+	page = balance_classzone(classzone, gfp_mask, order, &freed);
+	if (page)
+		return page;
+
+	zone = zonelist->zones;
+	if (likely(freed)) {
+		for (;;) {
+			zone_t *z = *(zone++);
+			if (!z)
+				break;
+
+			if (zone_free_pages(z, order) > z->watermarks[class_idx].min) {
+				page = rmqueue(z, order);
+				if (page)
+					return page;
+			}
+		}
+		goto rebalance;
+	} else {
+		/* 
+		 * Check that no other task is been killed meanwhile,
+		 * in such a case we can succeed the allocation.
+		 */
+		for (;;) {
+			zone_t *z = *(zone++);
+			if (!z)
+				break;
+
+			if (zone_free_pages(z, order) > z->watermarks[class_idx].high) {
+				page = rmqueue(z, order);
+				if (page)
+					return page;
+			}
+		}
+	}
+
+ out:
+	printk(KERN_NOTICE "__alloc_pages: %u-order allocation failed (gfp=0x%x/%i)\n",
+	       order, gfp_mask, !!(current->flags & PF_MEMALLOC));
+	if (unlikely(vm_gfp_debug))
+		dump_stack();
+	return NULL;
+}
+
+/*
+ * Common helper functions.
+ */
+fastcall unsigned long __get_free_pages(unsigned int gfp_mask, unsigned int order)
+{
+	struct page * page;
+
+	page = alloc_pages(gfp_mask, order);
+	if (!page)
+		return 0;
+	return (unsigned long) page_address(page);
+}
+
+fastcall unsigned long get_zeroed_page(unsigned int gfp_mask)
+{
+	struct page * page;
+
+	page = alloc_pages(gfp_mask, 0);
+	if (page) {
+		void *address = page_address(page);
+		clear_page(address);
+		return (unsigned long) address;
+	}
+	return 0;
+}
+
+fastcall void __free_pages(struct page *page, unsigned int order)
+{
+	if (!PageReserved(page) && put_page_testzero(page))
+		__free_pages_ok(page, order);
+}
+
+fastcall void free_pages(unsigned long addr, unsigned int order)
+{
+	if (addr != 0)
+		__free_pages(virt_to_page(addr), order);
+}
+
+/*
+ * Total amount of free (allocatable) RAM:
+ */
+unsigned int nr_free_pages (void)
+{
+	unsigned int sum = 0;
+	zone_t *zone;
+
+	for_each_zone(zone)
+		sum += zone->free_pages;
+
+	return sum;
+}
+
+/*
+ * Amount of free RAM allocatable as buffer memory:
+ */
+unsigned int nr_free_buffer_pages (void)
+{
+	pg_data_t *pgdat;
+	unsigned int sum = 0;
+	zonelist_t *zonelist;
+	zone_t **zonep, *zone;
+
+	for_each_pgdat(pgdat) {
+		int class_idx;
+		zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK);
+		zonep = zonelist->zones;
+		zone = *zonep;
+		class_idx = zone_idx(zone);
+
+		sum += zone->nr_cache_pages;
+		for (; zone; zone = *zonep++) {
+			int free = zone->free_pages - zone->watermarks[class_idx].high;
+			if (free <= 0)
+				continue;
+			sum += free;
+		}
+	}
+
+	return sum;
+}
+
+#if CONFIG_HIGHMEM
+unsigned int nr_free_highpages (void)
+{
+	pg_data_t *pgdat;
+	unsigned int pages = 0;
+
+	for_each_pgdat(pgdat)
+		pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
+
+	return pages;
+}
+
+unsigned int freeable_lowmem(void)
+{
+	unsigned int pages = 0;
+	pg_data_t *pgdat;
+
+	for_each_pgdat(pgdat) {
+		pages += pgdat->node_zones[ZONE_DMA].free_pages;
+		pages += pgdat->node_zones[ZONE_DMA].nr_active_pages;
+		pages += pgdat->node_zones[ZONE_DMA].nr_inactive_pages;
+		pages += pgdat->node_zones[ZONE_NORMAL].free_pages;
+		pages += pgdat->node_zones[ZONE_NORMAL].nr_active_pages;
+		pages += pgdat->node_zones[ZONE_NORMAL].nr_inactive_pages;
+	}
+
+	return pages;
+}
+#endif
+
+#define K(x) ((x) << (PAGE_SHIFT-10))
+
+/*
+ * Show free area list (used inside shift_scroll-lock stuff)
+ * We also calculate the percentage fragmentation. We do this by counting the
+ * memory on each free list with the exception of the first item on the list.
+ */
+void show_free_areas_core(pg_data_t *pgdat)
+{
+ 	unsigned int order;
+	unsigned type;
+	pg_data_t *tmpdat = pgdat;
+
+	printk("Free pages:      %6dkB (%6dkB HighMem)\n",
+		K(nr_free_pages()),
+		K(nr_free_highpages()));
+
+	while (tmpdat) {
+		zone_t *zone;
+		for (zone = tmpdat->node_zones;
+			       	zone < tmpdat->node_zones + MAX_NR_ZONES; zone++)
+			printk("Zone:%s freepages:%6lukB\n", 
+					zone->name,
+					K(zone->free_pages));
+			
+		tmpdat = tmpdat->node_next;
+	}
+
+	printk("( Active: %d, inactive: %d, free: %d )\n",
+	       nr_active_pages,
+	       nr_inactive_pages,
+	       nr_free_pages());
+
+	for (type = 0; type < MAX_NR_ZONES; type++) {
+		struct list_head *head, *curr;
+		zone_t *zone = pgdat->node_zones + type;
+ 		unsigned long nr, total, flags;
+
+		total = 0;
+		if (zone->size) {
+			spin_lock_irqsave(&zone->lock, flags);
+		 	for (order = 0; order < MAX_ORDER; order++) {
+				head = &(zone->free_area + order)->free_list;
+				curr = head;
+				nr = 0;
+				for (;;) {
+					if ((curr = curr->next) == head)
+						break;
+					nr++;
+				}
+				total += nr * (1 << order);
+				printk("%lu*%lukB ", nr, K(1UL) << order);
+			}
+			spin_unlock_irqrestore(&zone->lock, flags);
+		}
+		printk("= %lukB)\n", K(total));
+	}
+
+#ifdef SWAP_CACHE_INFO
+	show_swap_cache_info();
+#endif	
+}
+
+void show_free_areas(void)
+{
+	show_free_areas_core(pgdat_list);
+}
+
+/*
+ * Builds allocation fallback zone lists.
+ */
+static inline void build_zonelists(pg_data_t *pgdat)
+{
+	int i, j, k;
+
+	for (i = 0; i <= GFP_ZONEMASK; i++) {
+		zonelist_t *zonelist;
+		zone_t *zone;
+
+		zonelist = pgdat->node_zonelists + i;
+		memset(zonelist, 0, sizeof(*zonelist));
+
+		j = 0;
+		k = ZONE_NORMAL;
+		if (i & __GFP_HIGHMEM)
+			k = ZONE_HIGHMEM;
+		if (i & __GFP_DMA)
+			k = ZONE_DMA;
+
+		switch (k) {
+			default:
+				BUG();
+			/*
+			 * fallthrough:
+			 */
+			case ZONE_HIGHMEM:
+				zone = pgdat->node_zones + ZONE_HIGHMEM;
+				if (zone->size) {
+#ifndef CONFIG_HIGHMEM
+					BUG();
+#endif
+					zonelist->zones[j++] = zone;
+				}
+			case ZONE_NORMAL:
+				zone = pgdat->node_zones + ZONE_NORMAL;
+				if (zone->size)
+					zonelist->zones[j++] = zone;
+			case ZONE_DMA:
+				zone = pgdat->node_zones + ZONE_DMA;
+				if (zone->size)
+					zonelist->zones[j++] = zone;
+		}
+		zonelist->zones[j++] = NULL;
+	} 
+}
+
+/*
+ * Helper functions to size the waitqueue hash table.
+ * Essentially these want to choose hash table sizes sufficiently
+ * large so that collisions trying to wait on pages are rare.
+ * But in fact, the number of active page waitqueues on typical
+ * systems is ridiculously low, less than 200. So this is even
+ * conservative, even though it seems large.
+ *
+ * The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to
+ * waitqueues, i.e. the size of the waitq table given the number of pages.
+ */
+#define PAGES_PER_WAITQUEUE	256
+
+static inline unsigned long wait_table_size(unsigned long pages)
+{
+	unsigned long size = 1;
+
+	pages /= PAGES_PER_WAITQUEUE;
+
+	while (size < pages)
+		size <<= 1;
+
+	/*
+	 * Once we have dozens or even hundreds of threads sleeping
+	 * on IO we've got bigger problems than wait queue collision.
+	 * Limit the size of the wait table to a reasonable size.
+	 */
+	size = min(size, 4096UL);
+
+	return size;
+}
+
+/*
+ * This is an integer logarithm so that shifts can be used later
+ * to extract the more random high bits from the multiplicative
+ * hash function before the remainder is taken.
+ */
+static inline unsigned long wait_table_bits(unsigned long size)
+{
+	return ffz(~size);
+}
+
+#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
+
+/*
+ * Set up the zone data structures:
+ *   - mark all pages reserved
+ *   - mark all memory queues empty
+ *   - clear the memory bitmaps
+ */
+void __init free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap,
+	unsigned long *zones_size, unsigned long zone_start_paddr, 
+	unsigned long *zholes_size, struct page *lmem_map)
+{
+	unsigned long i, j;
+	unsigned long map_size;
+	unsigned long totalpages, offset, realtotalpages;
+	const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1);
+
+	if (zone_start_paddr & ~PAGE_MASK)
+		BUG();
+
+	totalpages = 0;
+	for (i = 0; i < MAX_NR_ZONES; i++) {
+		unsigned long size = zones_size[i];
+		totalpages += size;
+	}
+	realtotalpages = totalpages;
+	if (zholes_size)
+		for (i = 0; i < MAX_NR_ZONES; i++)
+			realtotalpages -= zholes_size[i];
+			
+	printk("On node %d totalpages: %lu\n", nid, realtotalpages);
+
+	/*
+	 * Some architectures (with lots of mem and discontinous memory
+	 * maps) have to search for a good mem_map area:
+	 * For discontigmem, the conceptual mem map array starts from 
+	 * PAGE_OFFSET, we need to align the actual array onto a mem map 
+	 * boundary, so that MAP_NR works.
+	 */
+	map_size = (totalpages + 1)*sizeof(struct page);
+	if (lmem_map == (struct page *)0) {
+		lmem_map = (struct page *) alloc_bootmem_node(pgdat, map_size);
+		lmem_map = (struct page *)(PAGE_OFFSET + 
+			MAP_ALIGN((unsigned long)lmem_map - PAGE_OFFSET));
+	}
+	*gmap = pgdat->node_mem_map = lmem_map;
+	pgdat->node_size = totalpages;
+	pgdat->node_start_paddr = zone_start_paddr;
+	pgdat->node_start_mapnr = (lmem_map - mem_map);
+	pgdat->nr_zones = 0;
+
+	offset = lmem_map - mem_map;	
+	for (j = 0; j < MAX_NR_ZONES; j++) {
+		zone_t *zone = pgdat->node_zones + j;
+		unsigned long mask;
+		unsigned long size, realsize;
+		int idx;
+
+		zone_table[nid * MAX_NR_ZONES + j] = zone;
+		realsize = size = zones_size[j];
+		if (zholes_size)
+			realsize -= zholes_size[j];
+
+		printk("zone(%lu): %lu pages.\n", j, size);
+		zone->size = size;
+		zone->realsize = realsize;
+		zone->name = zone_names[j];
+		zone->lock = SPIN_LOCK_UNLOCKED;
+		zone->zone_pgdat = pgdat;
+		zone->free_pages = 0;
+		zone->need_balance = 0;
+		 zone->nr_active_pages = zone->nr_inactive_pages = 0;
+
+
+		if (!size)
+			continue;
+
+		/*
+		 * The per-page waitqueue mechanism uses hashed waitqueues
+		 * per zone.
+		 */
+		zone->wait_table_size = wait_table_size(size);
+		zone->wait_table_shift =
+			BITS_PER_LONG - wait_table_bits(zone->wait_table_size);
+		zone->wait_table = (wait_queue_head_t *)
+			alloc_bootmem_node(pgdat, zone->wait_table_size
+						* sizeof(wait_queue_head_t));
+
+		for(i = 0; i < zone->wait_table_size; ++i)
+			init_waitqueue_head(zone->wait_table + i);
+
+		pgdat->nr_zones = j+1;
+
+		mask = (realsize / zone_balance_ratio[j]);
+		if (mask < zone_balance_min[j])
+			mask = zone_balance_min[j];
+		else if (mask > zone_balance_max[j])
+			mask = zone_balance_max[j];
+		zone->watermarks[j].min = mask;
+		zone->watermarks[j].low = mask*2;
+		zone->watermarks[j].high = mask*3;
+		/* now set the watermarks of the lower zones in the "j" classzone */
+		for (idx = j-1; idx >= 0; idx--) {
+			zone_t * lower_zone = pgdat->node_zones + idx;
+			unsigned long lower_zone_reserve;
+			if (!lower_zone->size)
+				continue;
+
+			mask = lower_zone->watermarks[idx].min;
+			lower_zone->watermarks[j].min = mask;
+			lower_zone->watermarks[j].low = mask*2;
+			lower_zone->watermarks[j].high = mask*3;
+
+			/* now the brainer part */
+			lower_zone_reserve = realsize / lower_zone_reserve_ratio[idx];
+			lower_zone->watermarks[j].min += lower_zone_reserve;
+			lower_zone->watermarks[j].low += lower_zone_reserve;
+			lower_zone->watermarks[j].high += lower_zone_reserve;
+
+			realsize += lower_zone->realsize;
+		}
+
+		zone->zone_mem_map = mem_map + offset;
+		zone->zone_start_mapnr = offset;
+		zone->zone_start_paddr = zone_start_paddr;
+
+		if ((zone_start_paddr >> PAGE_SHIFT) & (zone_required_alignment-1))
+			printk("BUG: wrong zone alignment, it will crash\n");
+
+		/*
+		 * Initially all pages are reserved - free ones are freed
+		 * up by free_all_bootmem() once the early boot process is
+		 * done. Non-atomic initialization, single-pass.
+		 */
+		for (i = 0; i < size; i++) {
+			struct page *page = mem_map + offset + i;
+			set_page_zone(page, nid * MAX_NR_ZONES + j);
+			set_page_count(page, 0);
+			SetPageReserved(page);
+			INIT_LIST_HEAD(&page->list);
+			if (j != ZONE_HIGHMEM)
+				set_page_address(page, __va(zone_start_paddr));
+			zone_start_paddr += PAGE_SIZE;
+		}
+
+		offset += size;
+		for (i = 0; ; i++) {
+			unsigned long bitmap_size;
+
+			INIT_LIST_HEAD(&zone->free_area[i].free_list);
+			if (i == MAX_ORDER-1) {
+				zone->free_area[i].map = NULL;
+				break;
+			}
+
+			/*
+			 * Page buddy system uses "index >> (i+1)",
+			 * where "index" is at most "size-1".
+			 *
+			 * The extra "+3" is to round down to byte
+			 * size (8 bits per byte assumption). Thus
+			 * we get "(size-1) >> (i+4)" as the last byte
+			 * we can access.
+			 *
+			 * The "+1" is because we want to round the
+			 * byte allocation up rather than down. So
+			 * we should have had a "+7" before we shifted
+			 * down by three. Also, we have to add one as
+			 * we actually _use_ the last bit (it's [0,n]
+			 * inclusive, not [0,n[).
+			 *
+			 * So we actually had +7+1 before we shift
+			 * down by 3. But (n+8) >> 3 == (n >> 3) + 1
+			 * (modulo overflows, which we do not have).
+			 *
+			 * Finally, we LONG_ALIGN because all bitmap
+			 * operations are on longs.
+			 */
+			bitmap_size = (size-1) >> (i+4);
+			bitmap_size = LONG_ALIGN(bitmap_size+1);
+			zone->free_area[i].map = 
+			  (unsigned long *) alloc_bootmem_node(pgdat, bitmap_size);
+		}
+	}
+	build_zonelists(pgdat);
+}
+
+void __init free_area_init(unsigned long *zones_size)
+{
+	free_area_init_core(0, &contig_page_data, &mem_map, zones_size, 0, 0, 0);
+}
+
+static int __init setup_mem_frac(char *str)
+{
+	int j = 0;
+
+	while (get_option(&str, &zone_balance_ratio[j++]) == 2);
+	printk("setup_mem_frac: ");
+	for (j = 0; j < MAX_NR_ZONES; j++) printk("%d  ", zone_balance_ratio[j]);
+	printk("\n");
+	return 1;
+}
+
+__setup("memfrac=", setup_mem_frac);
+
+static int __init setup_lower_zone_reserve(char *str)
+{
+	int j = 0;
+
+	while (get_option(&str, &lower_zone_reserve_ratio[j++]) == 2);
+	printk("setup_lower_zone_reserve: ");
+	for (j = 0; j < MAX_NR_ZONES-1; j++) printk("%d  ", lower_zone_reserve_ratio[j]);
+	printk("\n");
+	return 1;
+}
+
+__setup("lower_zone_reserve=", setup_lower_zone_reserve);
diff --git a/linux-2.4-xen-sparse/net/core/skbuff.c b/linux-2.4-xen-sparse/net/core/skbuff.c
new file mode 100644
index 0000000000..ea9af64a48
--- /dev/null
+++ b/linux-2.4-xen-sparse/net/core/skbuff.c
@@ -0,0 +1,1309 @@
+/*
+ *	Routines having to do with the 'struct sk_buff' memory handlers.
+ *
+ *	Authors:	Alan Cox <iiitac@pyr.swan.ac.uk>
+ *			Florian La Roche <rzsfl@rz.uni-sb.de>
+ *
+ *	Version:	$Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $
+ *
+ *	Fixes:	
+ *		Alan Cox	:	Fixed the worst of the load balancer bugs.
+ *		Dave Platt	:	Interrupt stacking fix.
+ *	Richard Kooijman	:	Timestamp fixes.
+ *		Alan Cox	:	Changed buffer format.
+ *		Alan Cox	:	destructor hook for AF_UNIX etc.
+ *		Linus Torvalds	:	Better skb_clone.
+ *		Alan Cox	:	Added skb_copy.
+ *		Alan Cox	:	Added all the changed routines Linus
+ *					only put in the headers
+ *		Ray VanTassle	:	Fixed --skb->lock in free
+ *		Alan Cox	:	skb_copy copy arp field
+ *		Andi Kleen	:	slabified it.
+ *
+ *	NOTE:
+ *		The __skb_ routines should be called with interrupts 
+ *	disabled, or you better be *real* sure that the operation is atomic 
+ *	with respect to whatever list is being frobbed (e.g. via lock_sock()
+ *	or via disabling bottom half handlers, etc).
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+/*
+ *	The functions in this file will not compile correctly with gcc 2.4.x
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/cache.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+#include <net/protocol.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/checksum.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+int sysctl_hot_list_len = 128;
+
+static kmem_cache_t *skbuff_head_cache;
+
+static union {
+	struct sk_buff_head	list;
+	char			pad[SMP_CACHE_BYTES];
+} skb_head_pool[NR_CPUS];
+
+/*
+ *	Keep out-of-line to prevent kernel bloat.
+ *	__builtin_return_address is not used because it is not always
+ *	reliable. 
+ */
+
+/**
+ *	skb_over_panic	- 	private function
+ *	@skb: buffer
+ *	@sz: size
+ *	@here: address
+ *
+ *	Out of line support code for skb_put(). Not user callable.
+ */
+ 
+void skb_over_panic(struct sk_buff *skb, int sz, void *here)
+{
+	printk("skput:over: %p:%d put:%d dev:%s", 
+		here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+	BUG();
+}
+
+/**
+ *	skb_under_panic	- 	private function
+ *	@skb: buffer
+ *	@sz: size
+ *	@here: address
+ *
+ *	Out of line support code for skb_push(). Not user callable.
+ */
+ 
+
+void skb_under_panic(struct sk_buff *skb, int sz, void *here)
+{
+        printk("skput:under: %p:%d put:%d dev:%s",
+                here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+	BUG();
+}
+
+static __inline__ struct sk_buff *skb_head_from_pool(void)
+{
+	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
+
+	if (skb_queue_len(list)) {
+		struct sk_buff *skb;
+		unsigned long flags;
+
+		local_irq_save(flags);
+		skb = __skb_dequeue(list);
+		local_irq_restore(flags);
+		return skb;
+	}
+	return NULL;
+}
+
+static __inline__ void skb_head_to_pool(struct sk_buff *skb)
+{
+	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
+
+	if (skb_queue_len(list) < sysctl_hot_list_len) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		__skb_queue_head(list, skb);
+		local_irq_restore(flags);
+
+		return;
+	}
+	kmem_cache_free(skbuff_head_cache, skb);
+}
+
+
+/* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
+ *	'private' fields and also do memory statistics to find all the
+ *	[BEEP] leaks.
+ * 
+ */
+
+/**
+ *	alloc_skb	-	allocate a network buffer
+ *	@size: size to allocate
+ *	@gfp_mask: allocation mask
+ *
+ *	Allocate a new &sk_buff. The returned buffer has no headroom and a
+ *	tail room of size bytes. The object has a reference count of one.
+ *	The return is the buffer. On a failure the return is %NULL.
+ *
+ *	Buffers may only be allocated from interrupts using a @gfp_mask of
+ *	%GFP_ATOMIC.
+ */
+ 
+struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
+{
+	struct sk_buff *skb;
+	u8 *data;
+
+	if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
+		static int count = 0;
+		if (++count < 5) {
+			printk(KERN_ERR "alloc_skb called nonatomically "
+			       "from interrupt %p\n", NET_CALLER(size));
+ 			BUG();
+		}
+		gfp_mask &= ~__GFP_WAIT;
+	}
+
+	/* Get the HEAD */
+	skb = skb_head_from_pool();
+	if (skb == NULL) {
+		skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
+		if (skb == NULL)
+			goto nohead;
+	}
+
+	/* Get the DATA. Size must match skb_add_mtu(). */
+	size = SKB_DATA_ALIGN(size);
+	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+	if (data == NULL)
+		goto nodata;
+
+	/* XXX: does not include slab overhead */ 
+	skb->truesize = size + sizeof(struct sk_buff);
+
+	/* Load the data pointers. */
+	skb->head = data;
+	skb->data = data;
+	skb->tail = data;
+	skb->end = data + size;
+
+	/* Set up other state */
+	skb->len = 0;
+	skb->cloned = 0;
+	skb->data_len = 0;
+
+	atomic_set(&skb->users, 1); 
+	atomic_set(&(skb_shinfo(skb)->dataref), 1);
+	skb_shinfo(skb)->nr_frags = 0;
+	skb_shinfo(skb)->frag_list = NULL;
+	return skb;
+
+nodata:
+	skb_head_to_pool(skb);
+nohead:
+	return NULL;
+}
+
+/**
+ *	alloc_skb_from_cache	-	allocate a network buffer
+ *	@cp: kmem_cache from which to allocate the data area
+ *           (object size must be big enough for @size bytes + skb overheads)
+ *	@size: size to allocate
+ *	@gfp_mask: allocation mask
+ *
+ *	Allocate a new &sk_buff. The returned buffer has no headroom and a
+ *	tail room of size bytes. The object has a reference count of one.
+ *	The return is the buffer. On a failure the return is %NULL.
+ *
+ *	Buffers may only be allocated from interrupts using a @gfp_mask of
+ *	%GFP_ATOMIC.
+ */
+ 
+struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
+				     unsigned int size, int gfp_mask)
+{
+	struct sk_buff *skb;
+	u8 *data;
+
+	if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
+		static int count = 0;
+		if (++count < 5) {
+			printk(KERN_ERR "alloc_skb called nonatomically "
+			       "from interrupt %p\n", NET_CALLER(size));
+ 			BUG();
+		}
+		gfp_mask &= ~__GFP_WAIT;
+	}
+
+	/* Get the HEAD */
+	skb = skb_head_from_pool();
+	if (skb == NULL) {
+		skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
+		if (skb == NULL)
+			goto nohead;
+	}
+
+	/* Get the DATA. */
+	size = SKB_DATA_ALIGN(size);
+	data = kmem_cache_alloc(cp, gfp_mask);
+	if (data == NULL)
+		goto nodata;
+
+	/* XXX: does not include slab overhead */ 
+	skb->truesize = size + sizeof(struct sk_buff);
+
+	/* Load the data pointers. */
+	skb->head = data;
+	skb->data = data;
+	skb->tail = data;
+	skb->end = data + size;
+
+	/* Set up other state */
+	skb->len = 0;
+	skb->cloned = 0;
+	skb->data_len = 0;
+
+	atomic_set(&skb->users, 1); 
+	atomic_set(&(skb_shinfo(skb)->dataref), 1);
+	skb_shinfo(skb)->nr_frags = 0;
+	skb_shinfo(skb)->frag_list = NULL;
+	return skb;
+
+nodata:
+	skb_head_to_pool(skb);
+nohead:
+	return NULL;
+}
+
+
+/*
+ *	Slab constructor for a skb head. 
+ */ 
+static inline void skb_headerinit(void *p, kmem_cache_t *cache, 
+				  unsigned long flags)
+{
+	struct sk_buff *skb = p;
+
+	skb->next = NULL;
+	skb->prev = NULL;
+	skb->list = NULL;
+	skb->sk = NULL;
+	skb->stamp.tv_sec=0;	/* No idea about time */
+	skb->dev = NULL;
+	skb->real_dev = NULL;
+	skb->dst = NULL;
+	memset(skb->cb, 0, sizeof(skb->cb));
+	skb->pkt_type = PACKET_HOST;	/* Default type */
+	skb->ip_summed = 0;
+	skb->priority = 0;
+	skb->security = 0;	/* By default packets are insecure */
+	skb->destructor = NULL;
+
+#ifdef CONFIG_NETFILTER
+	skb->nfmark = skb->nfcache = 0;
+	skb->nfct = NULL;
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug = 0;
+#endif
+#endif
+#ifdef CONFIG_NET_SCHED
+	skb->tc_index = 0;
+#endif
+}
+
+static void skb_drop_fraglist(struct sk_buff *skb)
+{
+	struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+	skb_shinfo(skb)->frag_list = NULL;
+
+	do {
+		struct sk_buff *this = list;
+		list = list->next;
+		kfree_skb(this);
+	} while (list);
+}
+
+static void skb_clone_fraglist(struct sk_buff *skb)
+{
+	struct sk_buff *list;
+
+	for (list = skb_shinfo(skb)->frag_list; list; list=list->next)
+		skb_get(list);
+}
+
+static void skb_release_data(struct sk_buff *skb)
+{
+	if (!skb->cloned ||
+	    atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
+		if (skb_shinfo(skb)->nr_frags) {
+			int i;
+			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+				put_page(skb_shinfo(skb)->frags[i].page);
+		}
+
+		if (skb_shinfo(skb)->frag_list)
+			skb_drop_fraglist(skb);
+
+		kfree(skb->head);
+	}
+}
+
+/*
+ *	Free an skbuff by memory without cleaning the state. 
+ */
+void kfree_skbmem(struct sk_buff *skb)
+{
+	skb_release_data(skb);
+	skb_head_to_pool(skb);
+}
+
+/**
+ *	__kfree_skb - private function 
+ *	@skb: buffer
+ *
+ *	Free an sk_buff. Release anything attached to the buffer. 
+ *	Clean the state. This is an internal helper function. Users should
+ *	always call kfree_skb
+ */
+
+void __kfree_skb(struct sk_buff *skb)
+{
+	if (skb->list) {
+	 	printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
+		       "on a list (from %p).\n", NET_CALLER(skb));
+		BUG();
+	}
+
+	dst_release(skb->dst);
+	if(skb->destructor) {
+		if (in_irq()) {
+			printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n",
+				NET_CALLER(skb));
+		}
+		skb->destructor(skb);
+	}
+#ifdef CONFIG_NETFILTER
+	nf_conntrack_put(skb->nfct);
+#endif
+	skb_headerinit(skb, NULL, 0);  /* clean state */
+	kfree_skbmem(skb);
+}
+
+/**
+ *	skb_clone	-	duplicate an sk_buff
+ *	@skb: buffer to clone
+ *	@gfp_mask: allocation priority
+ *
+ *	Duplicate an &sk_buff. The new one is not owned by a socket. Both
+ *	copies share the same packet data but not structure. The new
+ *	buffer has a reference count of 1. If the allocation fails the 
+ *	function returns %NULL otherwise the new buffer is returned.
+ *	
+ *	If this function is called from an interrupt gfp_mask() must be
+ *	%GFP_ATOMIC.
+ */
+
+struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
+{
+	struct sk_buff *n;
+
+	n = skb_head_from_pool();
+	if (!n) {
+		n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+		if (!n)
+			return NULL;
+	}
+
+#define C(x) n->x = skb->x
+
+	n->next = n->prev = NULL;
+	n->list = NULL;
+	n->sk = NULL;
+	C(stamp);
+	C(dev);
+	C(real_dev);
+	C(h);
+	C(nh);
+	C(mac);
+	C(dst);
+	dst_clone(n->dst);
+	memcpy(n->cb, skb->cb, sizeof(skb->cb));
+	C(len);
+	C(data_len);
+	C(csum);
+	n->cloned = 1;
+	C(pkt_type);
+	C(ip_summed);
+	C(priority);
+	atomic_set(&n->users, 1);
+	C(protocol);
+	C(security);
+	C(truesize);
+	C(head);
+	C(data);
+	C(tail);
+	C(end);
+	n->destructor = NULL;
+#ifdef CONFIG_NETFILTER
+	C(nfmark);
+	C(nfcache);
+	C(nfct);
+#ifdef CONFIG_NETFILTER_DEBUG
+	C(nf_debug);
+#endif
+#endif /*CONFIG_NETFILTER*/
+#if defined(CONFIG_HIPPI)
+	C(private);
+#endif
+#ifdef CONFIG_NET_SCHED
+	C(tc_index);
+#endif
+
+	atomic_inc(&(skb_shinfo(skb)->dataref));
+	skb->cloned = 1;
+#ifdef CONFIG_NETFILTER
+	nf_conntrack_get(skb->nfct);
+#endif
+	return n;
+}
+
+static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
+{
+	/*
+	 *	Shift between the two data areas in bytes
+	 */
+	unsigned long offset = new->data - old->data;
+
+	new->list=NULL;
+	new->sk=NULL;
+	new->dev=old->dev;
+	new->real_dev=old->real_dev;
+	new->priority=old->priority;
+	new->protocol=old->protocol;
+	new->dst=dst_clone(old->dst);
+	new->h.raw=old->h.raw+offset;
+	new->nh.raw=old->nh.raw+offset;
+	new->mac.raw=old->mac.raw+offset;
+	memcpy(new->cb, old->cb, sizeof(old->cb));
+	atomic_set(&new->users, 1);
+	new->pkt_type=old->pkt_type;
+	new->stamp=old->stamp;
+	new->destructor = NULL;
+	new->security=old->security;
+#ifdef CONFIG_NETFILTER
+	new->nfmark=old->nfmark;
+	new->nfcache=old->nfcache;
+	new->nfct=old->nfct;
+	nf_conntrack_get(new->nfct);
+#ifdef CONFIG_NETFILTER_DEBUG
+	new->nf_debug=old->nf_debug;
+#endif
+#endif
+#ifdef CONFIG_NET_SCHED
+	new->tc_index = old->tc_index;
+#endif
+}
+
+/**
+ *	skb_copy	-	create private copy of an sk_buff
+ *	@skb: buffer to copy
+ *	@gfp_mask: allocation priority
+ *
+ *	Make a copy of both an &sk_buff and its data. This is used when the
+ *	caller wishes to modify the data and needs a private copy of the 
+ *	data to alter. Returns %NULL on failure or the pointer to the buffer
+ *	on success. The returned buffer has a reference count of 1.
+ *
+ *	As by-product this function converts non-linear &sk_buff to linear
+ *	one, so that &sk_buff becomes completely private and caller is allowed
+ *	to modify all the data of returned buffer. This means that this
+ *	function is not recommended for use in circumstances when only
+ *	header is going to be modified. Use pskb_copy() instead.
+ */
+ 
+struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
+{
+	struct sk_buff *n;
+	int headerlen = skb->data-skb->head;
+
+	/*
+	 *	Allocate the copy buffer
+	 */
+	n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
+	if(n==NULL)
+		return NULL;
+
+	/* Set the data pointer */
+	skb_reserve(n,headerlen);
+	/* Set the tail pointer and length */
+	skb_put(n,skb->len);
+	n->csum = skb->csum;
+	n->ip_summed = skb->ip_summed;
+
+	if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len))
+		BUG();
+
+	copy_skb_header(n, skb);
+
+	return n;
+}
+
+/* Keep head the same: replace data */
+int skb_linearize(struct sk_buff *skb, int gfp_mask)
+{
+	unsigned int size;
+	u8 *data;
+	long offset;
+	int headerlen = skb->data - skb->head;
+	int expand = (skb->tail+skb->data_len) - skb->end;
+
+	if (skb_shared(skb))
+		BUG();
+
+	if (expand <= 0)
+		expand = 0;
+
+	size = (skb->end - skb->head + expand);
+	size = SKB_DATA_ALIGN(size);
+	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+	if (data == NULL)
+		return -ENOMEM;
+
+	/* Copy entire thing */
+	if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len))
+		BUG();
+
+	/* Offset between the two in bytes */
+	offset = data - skb->head;
+
+	/* Free old data. */
+	skb_release_data(skb);
+
+	skb->head = data;
+	skb->end  = data + size;
+
+	/* Set up new pointers */
+	skb->h.raw += offset;
+	skb->nh.raw += offset;
+	skb->mac.raw += offset;
+	skb->tail += offset;
+	skb->data += offset;
+
+	/* Set up shinfo */
+	atomic_set(&(skb_shinfo(skb)->dataref), 1);
+	skb_shinfo(skb)->nr_frags = 0;
+	skb_shinfo(skb)->frag_list = NULL;
+
+	/* We are no longer a clone, even if we were. */
+	skb->cloned = 0;
+
+	skb->tail += skb->data_len;
+	skb->data_len = 0;
+	return 0;
+}
+
+
+/**
+ *	pskb_copy	-	create copy of an sk_buff with private head.
+ *	@skb: buffer to copy
+ *	@gfp_mask: allocation priority
+ *
+ *	Make a copy of both an &sk_buff and part of its data, located
+ *	in header. Fragmented data remain shared. This is used when
+ *	the caller wishes to modify only header of &sk_buff and needs
+ *	private copy of the header to alter. Returns %NULL on failure
+ *	or the pointer to the buffer on success.
+ *	The returned buffer has a reference count of 1.
+ */
+
+struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
+{
+	struct sk_buff *n;
+
+	/*
+	 *	Allocate the copy buffer
+	 */
+	n=alloc_skb(skb->end - skb->head, gfp_mask);
+	if(n==NULL)
+		return NULL;
+
+	/* Set the data pointer */
+	skb_reserve(n,skb->data-skb->head);
+	/* Set the tail pointer and length */
+	skb_put(n,skb_headlen(skb));
+	/* Copy the bytes */
+	memcpy(n->data, skb->data, n->len);
+	n->csum = skb->csum;
+	n->ip_summed = skb->ip_summed;
+
+	n->data_len = skb->data_len;
+	n->len = skb->len;
+
+	if (skb_shinfo(skb)->nr_frags) {
+		int i;
+
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+			skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
+			get_page(skb_shinfo(n)->frags[i].page);
+		}
+		skb_shinfo(n)->nr_frags = i;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
+		skb_clone_fraglist(n);
+	}
+
+	copy_skb_header(n, skb);
+
+	return n;
+}
+
+/**
+ *	pskb_expand_head - reallocate header of &sk_buff
+ *	@skb: buffer to reallocate
+ *	@nhead: room to add at head
+ *	@ntail: room to add at tail
+ *	@gfp_mask: allocation priority
+ *
+ *	Expands (or creates identical copy, if &nhead and &ntail are zero)
+ *	header of skb. &sk_buff itself is not changed. &sk_buff MUST have
+ *	reference count of 1. Returns zero in the case of success or error,
+ *	if expansion failed. In the last case, &sk_buff is not changed.
+ *
+ *	All the pointers pointing into skb header may change and must be
+ *	reloaded after call to this function.
+ */
+
+int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
+{
+	int i;
+	u8 *data;
+	int size = nhead + (skb->end - skb->head) + ntail;
+	long off;
+
+	if (skb_shared(skb))
+		BUG();
+
+	size = SKB_DATA_ALIGN(size);
+
+	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+	if (data == NULL)
+		goto nodata;
+
+	/* Copy only real data... and, alas, header. This should be
+	 * optimized for the cases when header is void. */
+	memcpy(data+nhead, skb->head, skb->tail-skb->head);
+	memcpy(data+size, skb->end, sizeof(struct skb_shared_info));
+
+	for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
+		get_page(skb_shinfo(skb)->frags[i].page);
+
+	if (skb_shinfo(skb)->frag_list)
+		skb_clone_fraglist(skb);
+
+	skb_release_data(skb);
+
+	off = (data+nhead) - skb->head;
+
+	skb->head = data;
+	skb->end  = data+size;
+
+	skb->data += off;
+	skb->tail += off;
+	skb->mac.raw += off;
+	skb->h.raw += off;
+	skb->nh.raw += off;
+	skb->cloned = 0;
+	atomic_set(&skb_shinfo(skb)->dataref, 1);
+	return 0;
+
+nodata:
+	return -ENOMEM;
+}
+
+/* Make private copy of skb with writable head and some headroom */
+
+struct sk_buff *
+skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
+{
+	struct sk_buff *skb2;
+	int delta = headroom - skb_headroom(skb);
+
+	if (delta <= 0)
+		return pskb_copy(skb, GFP_ATOMIC);
+
+	skb2 = skb_clone(skb, GFP_ATOMIC);
+	if (skb2 == NULL ||
+	    !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
+		return skb2;
+
+	kfree_skb(skb2);
+	return NULL;
+}
+
+
+/**
+ *	skb_copy_expand	-	copy and expand sk_buff
+ *	@skb: buffer to copy
+ *	@newheadroom: new free bytes at head
+ *	@newtailroom: new free bytes at tail
+ *	@gfp_mask: allocation priority
+ *
+ *	Make a copy of both an &sk_buff and its data and while doing so 
+ *	allocate additional space.
+ *
+ *	This is used when the caller wishes to modify the data and needs a 
+ *	private copy of the data to alter as well as more space for new fields.
+ *	Returns %NULL on failure or the pointer to the buffer
+ *	on success. The returned buffer has a reference count of 1.
+ *
+ *	You must pass %GFP_ATOMIC as the allocation priority if this function
+ *	is called from an interrupt.
+ */
+ 
+
+struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
+				int newheadroom,
+				int newtailroom,
+				int gfp_mask)
+{
+	struct sk_buff *n;
+
+	/*
+	 *	Allocate the copy buffer
+	 */
+ 	 
+	n=alloc_skb(newheadroom + skb->len + newtailroom,
+		    gfp_mask);
+	if(n==NULL)
+		return NULL;
+
+	skb_reserve(n,newheadroom);
+
+	/* Set the tail pointer and length */
+	skb_put(n,skb->len);
+
+	/* Copy the data only. */
+	if (skb_copy_bits(skb, 0, n->data, skb->len))
+		BUG();
+
+	copy_skb_header(n, skb);
+	return n;
+}
+
+/**
+ *	skb_pad			-	zero pad the tail of an skb
+ *	@skb: buffer to pad
+ *	@pad: space to pad
+ *
+ *	Ensure that a buffer is followed by a padding area that is zero
+ *	filled. Used by network drivers which may DMA or transfer data
+ *	beyond the buffer end onto the wire.
+ *
+ *	May return NULL in out of memory cases.
+ */
+ 
+struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
+{
+	struct sk_buff *nskb;
+	
+	/* If the skbuff is non linear tailroom is always zero.. */
+	if(skb_tailroom(skb) >= pad)
+	{
+		memset(skb->data+skb->len, 0, pad);
+		return skb;
+	}
+	
+	nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC);
+	kfree_skb(skb);
+	if(nskb)
+		memset(nskb->data+nskb->len, 0, pad);
+	return nskb;
+}	
+ 
+/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
+ * If realloc==0 and trimming is impossible without change of data,
+ * it is BUG().
+ */
+
+int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
+{
+	int offset = skb_headlen(skb);
+	int nfrags = skb_shinfo(skb)->nr_frags;
+	int i;
+
+	for (i=0; i<nfrags; i++) {
+		int end = offset + skb_shinfo(skb)->frags[i].size;
+		if (end > len) {
+			if (skb_cloned(skb)) {
+				if (!realloc)
+					BUG();
+				if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+					return -ENOMEM;
+			}
+			if (len <= offset) {
+				put_page(skb_shinfo(skb)->frags[i].page);
+				skb_shinfo(skb)->nr_frags--;
+			} else {
+				skb_shinfo(skb)->frags[i].size = len-offset;
+			}
+		}
+		offset = end;
+	}
+
+	if (offset < len) {
+		skb->data_len -= skb->len - len;
+		skb->len = len;
+	} else {
+		if (len <= skb_headlen(skb)) {
+			skb->len = len;
+			skb->data_len = 0;
+			skb->tail = skb->data + len;
+			if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
+				skb_drop_fraglist(skb);
+		} else {
+			skb->data_len -= skb->len - len;
+			skb->len = len;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ *	__pskb_pull_tail - advance tail of skb header 
+ *	@skb: buffer to reallocate
+ *	@delta: number of bytes to advance tail
+ *
+ *	The function makes a sense only on a fragmented &sk_buff,
+ *	it expands header moving its tail forward and copying necessary
+ *	data from fragmented part.
+ *
+ *	&sk_buff MUST have reference count of 1.
+ *
+ *	Returns %NULL (and &sk_buff does not change) if pull failed
+ *	or value of new tail of skb in the case of success.
+ *
+ *	All the pointers pointing into skb header may change and must be
+ *	reloaded after call to this function.
+ */
+
+/* Moves tail of skb head forward, copying data from fragmented part,
+ * when it is necessary.
+ * 1. It may fail due to malloc failure.
+ * 2. It may change skb pointers.
+ *
+ * It is pretty complicated. Luckily, it is called only in exceptional cases.
+ */
+unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta)
+{
+	int i, k, eat;
+
+	/* If skb has not enough free space at tail, get new one
+	 * plus 128 bytes for future expansions. If we have enough
+	 * room at tail, reallocate without expansion only if skb is cloned.
+	 */
+	eat = (skb->tail+delta) - skb->end;
+
+	if (eat > 0 || skb_cloned(skb)) {
+		if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC))
+			return NULL;
+	}
+
+	if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
+		BUG();
+
+	/* Optimization: no fragments, no reasons to preestimate
+	 * size of pulled pages. Superb.
+	 */
+	if (skb_shinfo(skb)->frag_list == NULL)
+		goto pull_pages;
+
+	/* Estimate size of pulled pages. */
+	eat = delta;
+	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
+		if (skb_shinfo(skb)->frags[i].size >= eat)
+			goto pull_pages;
+		eat -= skb_shinfo(skb)->frags[i].size;
+	}
+
+	/* If we need update frag list, we are in troubles.
+	 * Certainly, it possible to add an offset to skb data,
+	 * but taking into account that pulling is expected to
+	 * be very rare operation, it is worth to fight against
+	 * further bloating skb head and crucify ourselves here instead.
+	 * Pure masohism, indeed. 8)8)
+	 */
+	if (eat) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+		struct sk_buff *clone = NULL;
+		struct sk_buff *insp = NULL;
+
+		do {
+			if (list == NULL)
+				BUG();
+
+			if (list->len <= eat) {
+				/* Eaten as whole. */
+				eat -= list->len;
+				list = list->next;
+				insp = list;
+			} else {
+				/* Eaten partially. */
+
+				if (skb_shared(list)) {
+					/* Sucks! We need to fork list. :-( */
+					clone = skb_clone(list, GFP_ATOMIC);
+					if (clone == NULL)
+						return NULL;
+					insp = list->next;
+					list = clone;
+				} else {
+					/* This may be pulled without
+					 * problems. */
+					insp = list;
+				}
+				if (pskb_pull(list, eat) == NULL) {
+					if (clone)
+						kfree_skb(clone);
+					return NULL;
+				}
+				break;
+			}
+		} while (eat);
+
+		/* Free pulled out fragments. */
+		while ((list = skb_shinfo(skb)->frag_list) != insp) {
+			skb_shinfo(skb)->frag_list = list->next;
+			kfree_skb(list);
+		}
+		/* And insert new clone at head. */
+		if (clone) {
+			clone->next = list;
+			skb_shinfo(skb)->frag_list = clone;
+		}
+	}
+	/* Success! Now we may commit changes to skb data. */
+
+pull_pages:
+	eat = delta;
+	k = 0;
+	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
+		if (skb_shinfo(skb)->frags[i].size <= eat) {
+			put_page(skb_shinfo(skb)->frags[i].page);
+			eat -= skb_shinfo(skb)->frags[i].size;
+		} else {
+			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
+			if (eat) {
+				skb_shinfo(skb)->frags[k].page_offset += eat;
+				skb_shinfo(skb)->frags[k].size -= eat;
+				eat = 0;
+			}
+			k++;
+		}
+	}
+	skb_shinfo(skb)->nr_frags = k;
+
+	skb->tail += delta;
+	skb->data_len -= delta;
+
+	return skb->tail;
+}
+
+/* Copy some data bits from skb to kernel buffer. */
+
+int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
+{
+	int i, copy;
+	int start = skb->len - skb->data_len;
+
+	if (offset > (int)skb->len-len)
+		goto fault;
+
+	/* Copy header. */
+	if ((copy = start-offset) > 0) {
+		if (copy > len)
+			copy = len;
+		memcpy(to, skb->data + offset, copy);
+		if ((len -= copy) == 0)
+			return 0;
+		offset += copy;
+		to += copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset+len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end-offset) > 0) {
+			u8 *vaddr;
+
+			if (copy > len)
+				copy = len;
+
+			vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+			memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+
+			       offset-start, copy);
+			kunmap_skb_frag(vaddr);
+
+			if ((len -= copy) == 0)
+				return 0;
+			offset += copy;
+			to += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list;
+
+		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset+len);
+
+			end = start + list->len;
+			if ((copy = end-offset) > 0) {
+				if (copy > len)
+					copy = len;
+				if (skb_copy_bits(list, offset-start, to, copy))
+					goto fault;
+				if ((len -= copy) == 0)
+					return 0;
+				offset += copy;
+				to += copy;
+			}
+			start = end;
+		}
+	}
+	if (len == 0)
+		return 0;
+
+fault:
+	return -EFAULT;
+}
+
+/* Checksum skb data. */
+
+unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum)
+{
+	int i, copy;
+	int start = skb->len - skb->data_len;
+	int pos = 0;
+
+	/* Checksum header. */
+	if ((copy = start-offset) > 0) {
+		if (copy > len)
+			copy = len;
+		csum = csum_partial(skb->data+offset, copy, csum);
+		if ((len -= copy) == 0)
+			return csum;
+		offset += copy;
+		pos = copy;
+	}
+
+	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset+len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end-offset) > 0) {
+			unsigned int csum2;
+			u8 *vaddr;
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			if (copy > len)
+				copy = len;
+			vaddr = kmap_skb_frag(frag);
+			csum2 = csum_partial(vaddr + frag->page_offset +
+					     offset-start, copy, 0);
+			kunmap_skb_frag(vaddr);
+			csum = csum_block_add(csum, csum2, pos);
+			if (!(len -= copy))
+				return csum;
+			offset += copy;
+			pos += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list;
+
+		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset+len);
+
+			end = start + list->len;
+			if ((copy = end-offset) > 0) {
+				unsigned int csum2;
+				if (copy > len)
+					copy = len;
+				csum2 = skb_checksum(list, offset-start, copy, 0);
+				csum = csum_block_add(csum, csum2, pos);
+				if ((len -= copy) == 0)
+					return csum;
+				offset += copy;
+				pos += copy;
+			}
+			start = end;
+		}
+	}
+	if (len == 0)
+		return csum;
+
+	BUG();
+	return csum;
+}
+
+/* Both of above in one bottle. */
+
+unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum)
+{
+	int i, copy;
+	int start = skb->len - skb->data_len;
+	int pos = 0;
+
+	/* Copy header. */
+	if ((copy = start-offset) > 0) {
+		if (copy > len)
+			copy = len;
+		csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum);
+		if ((len -= copy) == 0)
+			return csum;
+		offset += copy;
+		to += copy;
+		pos = copy;
+	}
+
+	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset+len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end-offset) > 0) {
+			unsigned int csum2;
+			u8 *vaddr;
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			if (copy > len)
+				copy = len;
+			vaddr = kmap_skb_frag(frag);
+			csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset +
+						      offset-start, to, copy, 0);
+			kunmap_skb_frag(vaddr);
+			csum = csum_block_add(csum, csum2, pos);
+			if (!(len -= copy))
+				return csum;
+			offset += copy;
+			to += copy;
+			pos += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list;
+
+		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
+			unsigned int csum2;
+			int end;
+
+			BUG_TRAP(start <= offset+len);
+
+			end = start + list->len;
+			if ((copy = end-offset) > 0) {
+				if (copy > len)
+					copy = len;
+				csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0);
+				csum = csum_block_add(csum, csum2, pos);
+				if ((len -= copy) == 0)
+					return csum;
+				offset += copy;
+				to += copy;
+				pos += copy;
+			}
+			start = end;
+		}
+	}
+	if (len == 0)
+		return csum;
+
+	BUG();
+	return csum;
+}
+
+void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
+{
+	unsigned int csum;
+	long csstart;
+
+	if (skb->ip_summed == CHECKSUM_HW)
+		csstart = skb->h.raw - skb->data;
+	else
+		csstart = skb->len - skb->data_len;
+
+	if (csstart > skb->len - skb->data_len)
+		BUG();
+
+	memcpy(to, skb->data, csstart);
+
+	csum = 0;
+	if (csstart != skb->len)
+		csum = skb_copy_and_csum_bits(skb, csstart, to+csstart,
+				skb->len-csstart, 0);
+
+	if (skb->ip_summed == CHECKSUM_HW) {
+		long csstuff = csstart + skb->csum;
+
+		*((unsigned short *)(to + csstuff)) = csum_fold(csum);
+	}
+}
+
+#if 0
+/* 
+ * 	Tune the memory allocator for a new MTU size.
+ */
+void skb_add_mtu(int mtu)
+{
+	/* Must match allocation in alloc_skb */
+	mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
+
+	kmem_add_cache_size(mtu);
+}
+#endif
+
+void __init skb_init(void)
+{
+	int i;
+
+	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
+					      sizeof(struct sk_buff),
+					      0,
+					      SLAB_HWCACHE_ALIGN,
+					      skb_headerinit, NULL);
+	if (!skbuff_head_cache)
+		panic("cannot create skbuff cache");
+
+	for (i=0; i<NR_CPUS; i++)
+		skb_queue_head_init(&skb_head_pool[i].list);
+}
author	vh249@kneesaa.uk.xensource.com <vh249@kneesaa.uk.xensource.com>	2005-07-11 09:29:56 -0500
committer	vh249@kneesaa.uk.xensource.com <vh249@kneesaa.uk.xensource.com>	2005-07-11 09:29:56 -0500
commit	105077619922d8c782b74491afd1b406dc654fa7 (patch)
tree	88e84b9d5206ea2039b20c3eeb3730727cf64d2c /linux-2.4-xen-sparse
parent	a81ff1692529ed28d67c4fbc78ffe54f9ecf2278 (diff)
download	xen-105077619922d8c782b74491afd1b406dc654fa7.tar.gz xen-105077619922d8c782b74491afd1b406dc654fa7.tar.bz2 xen-105077619922d8c782b74491afd1b406dc654fa7.zip