bitkeeper revision 1.1159.272.3 (423e7e90uxPqdRoA4EvOUikif-yhXA)

Check-in of the sparse tree for FreeBSD 5.3 (version 050317) This currently supports running as a domU. - to create freebsd-5.3-xenU run fbsdxensetup from anywhere in the tree - once created go to freebsd-5.3-xenU on a FreeBSD 5.3 machine, run xenfbsd_kernel_build - you'll find kernel and kernel.debug under i386-xen/compile/XENCONF See http://www.fsmware.com/xenofreebsd/5.3/xenbsdsetup.txt Thanks to NetApp for their contributions in support of the FreeBSD port to Xen . Signed-off-by: Kip Macy <kip.macy@gmail.com Signed-off-by: ian.pratt@cl.cam.ac.uk
author: iap10@freefall.cl.cam.ac.uk <iap10@freefall.cl.cam.ac.uk> 2005-03-21 07:58:08 +0000
committer: iap10@freefall.cl.cam.ac.uk <iap10@freefall.cl.cam.ac.uk> 2005-03-21 07:58:08 +0000
commit: d73b5730fbb7f3d0fd7fcd9a9b6e36d71d33ade0 (patch)
tree: bc7051351d4d09c13c29b247ee34c3f22ec28a3f /freebsd-5.3-xen-sparse
parent: a280a68e6317b8d274296935eee67d12788beeb4 (diff)
download: xen-d73b5730fbb7f3d0fd7fcd9a9b6e36d71d33ade0.tar.gz
xen-d73b5730fbb7f3d0fd7fcd9a9b6e36d71d33ade0.tar.bz2
xen-d73b5730fbb7f3d0fd7fcd9a9b6e36d71d33ade0.zip
72 files changed, 32094 insertions, 0 deletions
diff --git a/freebsd-5.3-xen-sparse/conf/Makefile.i386-xen b/freebsd-5.3-xen-sparse/conf/Makefile.i386-xen
new file mode 100644
index 0000000000..80e1cdd35c
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/conf/Makefile.i386-xen
@@ -0,0 +1,51 @@
+# Makefile.i386 -- with config changes.
+# Copyright 1990 W. Jolitz
+#	from: @(#)Makefile.i386	7.1 5/10/91
+# $FreeBSD: src/sys/conf/Makefile.i386,v 1.259 2003/04/15 21:29:11 phk Exp $
+#
+# Makefile for FreeBSD
+#
+# This makefile is constructed from a machine description:
+#	config machineid
+# Most changes should be made in the machine description
+#	/sys/i386/conf/``machineid''
+# after which you should do
+#	 config machineid
+# Generic makefile changes should be made in
+#	/sys/conf/Makefile.i386
+# after which config should be rerun for all machines.
+#
+
+# Which version of config(8) is required.
+%VERSREQ=	500013
+
+STD8X16FONT?=	iso
+
+
+
+.if !defined(S)
+.if exists(./@/.)
+S=	./@
+.else
+S=	../../..
+.endif
+.endif
+.include "$S/conf/kern.pre.mk"
+M=	i386-xen
+MKMODULESENV+= MACHINE=i386-xen
+INCLUDES+= -I../../include/xen-public
+%BEFORE_DEPEND
+
+%OBJS
+
+%FILES.c
+
+%FILES.s
+
+%FILES.m
+
+%CLEAN
+
+%RULES
+
+.include "$S/conf/kern.post.mk"
diff --git a/freebsd-5.3-xen-sparse/conf/files.i386-xen b/freebsd-5.3-xen-sparse/conf/files.i386-xen
new file mode 100644
index 0000000000..189378d469
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/conf/files.i386-xen
@@ -0,0 +1,294 @@
+# This file tells config what files go into building a kernel,
+# files marked standard are always included.
+#
+# $FreeBSD: src/sys/conf/files.i386,v 1.457 2003/12/03 23:06:30 imp Exp $
+#
+# The long compile-with and dependency lines are required because of
+# limitations in config: backslash-newline doesn't work in strings, and
+# dependency lines other than the first are silently ignored.
+#
+linux_genassym.o		optional	compat_linux		\
+	dependency 	"$S/i386/linux/linux_genassym.c"		\
+	compile-with	"${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}"	\
+	no-obj no-implicit-rule						\
+	clean		"linux_genassym.o"
+#
+linux_assym.h			optional	compat_linux		\
+	dependency 	"$S/kern/genassym.sh linux_genassym.o"		\
+	compile-with	"sh $S/kern/genassym.sh linux_genassym.o > ${.TARGET}" \
+	no-obj no-implicit-rule before-depend				\
+	clean		"linux_assym.h"
+#
+svr4_genassym.o			optional	compat_svr4		\
+	dependency 	"$S/i386/svr4/svr4_genassym.c"			\
+	compile-with	"${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}"	\
+	no-obj no-implicit-rule						\
+	clean		"svr4_genassym.o"
+#
+svr4_assym.h			optional	compat_svr4		\
+	dependency 	"$S/kern/genassym.sh svr4_genassym.o"	   	\
+	compile-with	"sh $S/kern/genassym.sh svr4_genassym.o > ${.TARGET}" \
+	no-obj no-implicit-rule before-depend				\
+	clean		"svr4_assym.h"
+#
+font.h				optional	sc_dflt_font		\
+	compile-with	"uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'static u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'static u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'static u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h"									\
+	no-obj no-implicit-rule before-depend				\
+	clean		"font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8"
+#
+atkbdmap.h			optional	atkbd_dflt_keymap	\
+	compile-with	"/usr/sbin/kbdcontrol -L ${ATKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > atkbdmap.h"			\
+	no-obj no-implicit-rule before-depend				\
+	clean		"atkbdmap.h"
+#
+ukbdmap.h			optional	ukbd_dflt_keymap	\
+	compile-with	"/usr/sbin/kbdcontrol -L ${UKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > ukbdmap.h"			\
+	no-obj no-implicit-rule before-depend				\
+	clean		"ukbdmap.h"
+#
+msysosak.o			optional	fla			\
+	dependency	"$S/contrib/dev/fla/i386/msysosak.o.uu" 	\
+	compile-with	"uudecode < $S/contrib/dev/fla/i386/msysosak.o.uu" \
+	no-implicit-rule
+#
+trlld.o				optional	oltr			\
+	dependency	"$S/contrib/dev/oltr/i386-elf.trlld.o.uu"	\
+	compile-with	"uudecode < $S/contrib/dev/oltr/i386-elf.trlld.o.uu"	\
+	no-implicit-rule
+#
+hal.o				optional	ath_hal			\
+	dependency	"$S/contrib/dev/ath/freebsd/i386-elf.hal.o.uu"	\
+	compile-with	"uudecode < $S/contrib/dev/ath/freebsd/i386-elf.hal.o.uu" \
+	no-implicit-rule
+#
+#
+compat/linux/linux_file.c	optional	compat_linux
+compat/linux/linux_getcwd.c	optional	compat_linux
+compat/linux/linux_ioctl.c	optional	compat_linux
+compat/linux/linux_ipc.c	optional	compat_linux
+compat/linux/linux_mib.c	optional	compat_linux
+compat/linux/linux_misc.c	optional	compat_linux
+compat/linux/linux_signal.c	optional	compat_linux
+compat/linux/linux_socket.c	optional	compat_linux
+compat/linux/linux_stats.c	optional	compat_linux
+compat/linux/linux_sysctl.c	optional	compat_linux
+compat/linux/linux_uid16.c	optional	compat_linux
+compat/linux/linux_util.c	optional	compat_linux
+compat/pecoff/imgact_pecoff.c		optional	pecoff_support
+compat/svr4/imgact_svr4.c		optional	compat_svr4
+compat/svr4/svr4_fcntl.c		optional	compat_svr4
+compat/svr4/svr4_filio.c		optional	compat_svr4
+compat/svr4/svr4_ioctl.c		optional	compat_svr4
+compat/svr4/svr4_ipc.c			optional	compat_svr4
+compat/svr4/svr4_misc.c			optional	compat_svr4
+compat/svr4/svr4_resource.c		optional	compat_svr4
+compat/svr4/svr4_signal.c		optional	compat_svr4
+compat/svr4/svr4_socket.c		optional	compat_svr4
+compat/svr4/svr4_sockio.c		optional	compat_svr4
+compat/svr4/svr4_stat.c			optional	compat_svr4
+compat/svr4/svr4_stream.c		optional	compat_svr4
+compat/svr4/svr4_syscallnames.c		optional	compat_svr4
+compat/svr4/svr4_sysent.c		optional	compat_svr4
+compat/svr4/svr4_sysvec.c		optional	compat_svr4
+compat/svr4/svr4_termios.c		optional	compat_svr4
+compat/svr4/svr4_ttold.c		optional	compat_svr4
+contrib/dev/fla/fla.c		optional	fla
+contrib/dev/oltr/if_oltr.c	optional	oltr
+contrib/dev/oltr/trlldbm.c	optional	oltr
+contrib/dev/oltr/trlldhm.c	optional	oltr
+contrib/dev/oltr/trlldmac.c	optional	oltr
+bf_enc.o			optional	ipsec ipsec_esp		\
+	dependency	"$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S"		\
+	compile-with	"${CC} -c -I$S/crypto/blowfish/arch/i386 ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}"	\
+	no-implicit-rule
+crypto/des/arch/i386/des_enc.S		optional	ipsec ipsec_esp
+crypto/des/des_ecb.c			optional	netsmbcrypto
+crypto/des/arch/i386/des_enc.S		optional	netsmbcrypto
+crypto/des/des_setkey.c			optional	netsmbcrypto
+bf_enc.o			optional	crypto		\
+	dependency	"$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S"		\
+	compile-with	"${CC} -c -I$S/crypto/blowfish/arch/i386 ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}"	\
+	no-implicit-rule
+crypto/des/arch/i386/des_enc.S		optional	crypto
+crypto/des/des_ecb.c			optional	crypto
+crypto/des/des_setkey.c			optional	crypto
+dev/ar/if_ar.c			optional	ar
+dev/ar/if_ar_pci.c		optional	ar pci
+dev/cx/csigma.c			optional	cx
+dev/cx/cxddk.c			optional	cx
+dev/cx/if_cx.c			optional	cx
+dev/dgb/dgb.c			count		dgb
+dev/fb/fb.c			optional	fb
+dev/fb/fb.c			optional	vga
+dev/fb/splash.c			optional	splash
+dev/fb/vga.c			optional	vga
+dev/kbd/atkbd.c			optional	atkbd
+dev/kbd/atkbdc.c		optional	atkbdc
+dev/kbd/kbd.c			optional	atkbd
+dev/kbd/kbd.c			optional	kbd
+dev/kbd/kbd.c			optional	sc
+dev/kbd/kbd.c			optional	ukbd
+dev/kbd/kbd.c			optional	vt
+dev/mem/memutil.c		standard
+dev/random/nehemiah.c		standard
+dev/ppc/ppc.c			optional	ppc
+dev/ppc/ppc_puc.c		optional	ppc puc pci
+dev/sio/sio.c			optional	sio
+dev/sio/sio_isa.c		optional	sio isa
+dev/syscons/schistory.c		optional	sc
+dev/syscons/scmouse.c		optional	sc
+dev/syscons/scterm.c		optional	sc
+dev/syscons/scterm-dumb.c	optional	sc
+dev/syscons/scterm-sc.c		optional	sc
+dev/syscons/scvesactl.c		optional	sc vga vesa
+dev/syscons/scvgarndr.c		optional	sc vga
+dev/syscons/scvidctl.c		optional	sc
+dev/syscons/scvtb.c		optional	sc
+dev/syscons/syscons.c		optional	sc
+dev/syscons/sysmouse.c		optional	sc
+dev/uart/uart_cpu_i386.c	optional	uart
+geom/geom_bsd.c			standard
+geom/geom_bsd_enc.c		standard
+geom/geom_mbr.c			standard
+geom/geom_mbr_enc.c		standard
+i386/acpica/OsdEnvironment.c	optional	acpi
+i386/acpica/acpi_machdep.c	optional	acpi
+i386/acpica/acpi_wakeup.c	optional	acpi
+acpi_wakecode.h			optional	acpi			\
+	dependency 	"$S/i386/acpica/acpi_wakecode.S"		\
+	compile-with	"${MAKE} -f $S/i386/acpica/Makefile MAKESRCPATH=$S/i386/acpica"	\
+	no-obj no-implicit-rule before-depend				\
+	clean		"acpi_wakecode.h acpi_wakecode.o acpi_wakecode.bin"
+#
+i386/acpica/madt.c		optional	acpi apic
+i386/bios/mca_machdep.c		optional	mca
+i386/bios/smapi.c		optional	smapi
+i386/bios/smapi_bios.S		optional	smapi
+i386/bios/smbios.c		optional	smbios
+i386/bios/vpd.c			optional	vpd
+i386/i386/apic_vector.s		optional	apic
+i386/i386/atomic.c		standard				\
+	compile-with	"${CC} -c ${CFLAGS} ${DEFINED_PROF:S/^$/-fomit-frame-pointer/} ${.IMPSRC}"
+i386/i386/autoconf.c		        standard
+i386/i386/busdma_machdep.c		standard
+i386-xen/i386-xen/critical.c		standard
+i386/i386/db_disasm.c			optional	ddb
+i386-xen/i386-xen/db_interface.c	optional	ddb
+i386/i386/db_trace.c			optional	ddb
+i386/i386/i386-gdbstub.c		optional	ddb
+i386/i386/dump_machdep.c		standard
+i386/i386/elf_machdep.c	standard
+i386-xen/i386-xen/exception.s		standard
+i386-xen/i386-xen/i686_mem.c		standard
+i386/i386/identcpu.c			standard
+i386/i386/in_cksum.c			optional	inet
+i386-xen/i386-xen/initcpu.c		standard
+i386-xen/i386-xen/intr_machdep.c	standard
+i386-xen/i386-xen/io_apic.c           optional        apic
+i386/i386/legacy.c			standard
+i386-xen/i386-xen/locore.s		standard	no-obj
+i386-xen/i386-xen/machdep.c		standard
+i386/i386/mem.c		                standard
+i386-xen/i386-xen/mp_clock.c		optional	smp
+i386-xen/i386-xen/mp_machdep.c	optional	smp
+i386/i386/mpboot.s		optional	smp
+i386-xen/i386-xen/mptable.c	optional	apic
+i386-xen/i386-xen/local_apic.c	optional	apic
+i386/i386/mptable_pci.c		optional	apic pci
+i386/i386/nexus.c	        standard
+i386/i386/uio_machdep.c	        standard
+i386/i386/perfmon.c		optional	perfmon
+i386/i386/perfmon.c		optional	perfmon	profiling-routine
+i386-xen/i386-xen/pmap.c	standard
+i386-xen/i386-xen/support.s	standard
+i386-xen/i386-xen/swtch.s	standard
+i386-xen/i386-xen/sys_machdep.c		standard
+i386-xen/i386-xen/trap.c	standard
+i386/i386/tsc.c			standard
+i386-xen/i386-xen/vm_machdep.c		standard
+i386-xen/i386-xen/clock.c	standard
+
+# xen specific arch-dep files
+i386-xen/i386-xen/hypervisor.c	standard
+i386-xen/i386-xen/xen_machdep.c	standard
+i386-xen/i386-xen/xen_bus.c		standard
+i386-xen/i386-xen/evtchn.c		standard
+i386-xen/i386-xen/ctrl_if.c		standard
+
+
+i386/isa/asc.c			count		asc
+i386/isa/ctx.c			optional	ctx
+i386/isa/cy.c			count		cy
+i386/isa/elink.c		optional	ep
+i386/isa/elink.c		optional	ie
+i386/isa/gpib.c			optional	gp
+i386/isa/gsc.c			count		gsc
+i386/isa/istallion.c		optional	stli nowerror
+i386/isa/loran.c		optional	loran
+i386/isa/mse.c			optional	mse
+i386/isa/nmi.c			standard
+
+# drivers
+i386-xen/xen/misc/npx.c	optional	npx
+i386-xen/xen/misc/evtchn_dev.c	standard
+i386-xen/xen/char/console.c	standard
+i386-xen/xen/netfront/xn_netfront.c	standard
+i386-xen/xen/blkfront/xb_blkfront.c	standard
+
+
+
+i386/isa/pcf.c			optional	pcf
+i386/isa/pcvt/pcvt_drv.c	optional	vt
+i386/isa/pcvt/pcvt_ext.c	optional	vt
+i386/isa/pcvt/pcvt_kbd.c	optional	vt
+i386/isa/pcvt/pcvt_out.c	optional	vt
+i386/isa/pcvt/pcvt_sup.c	optional	vt
+i386/isa/pcvt/pcvt_vtf.c	optional	vt
+i386/isa/pmtimer.c		optional	pmtimer
+i386/isa/prof_machdep.c		optional	profiling-routine
+i386/isa/spic.c			optional	spic
+i386/isa/spigot.c		count		spigot
+i386/isa/spkr.c			optional	speaker
+i386/isa/stallion.c		optional	stl nowerror
+i386/isa/vesa.c			optional	vga vesa
+i386/isa/wt.c			count		wt
+i386/linux/imgact_linux.c	optional	compat_linux
+i386/linux/linux_dummy.c	optional	compat_linux
+i386/linux/linux_locore.s	optional	compat_linux		\
+	dependency 	"linux_assym.h"
+i386/linux/linux_machdep.c	optional	compat_linux
+i386/linux/linux_ptrace.c	optional	compat_linux
+i386/linux/linux_sysent.c	optional	compat_linux
+i386/linux/linux_sysvec.c	optional	compat_linux
+i386/pci/pci_cfgreg.c		optional	pci
+i386/pci/pci_bus.c		optional	pci
+i386/svr4/svr4_locore.s		optional	compat_svr4		\
+	dependency	"svr4_assym.h"	\
+	warning "COMPAT_SVR4 is broken and should be avoided"
+i386/svr4/svr4_machdep.c	optional	compat_svr4
+isa/atkbd_isa.c			optional	atkbd
+isa/atkbdc_isa.c		optional	atkbdc
+isa/fd.c			optional	fdc
+isa/psm.c			optional	psm
+isa/syscons_isa.c		optional	sc
+isa/vga_isa.c			optional	vga
+kern/imgact_aout.c		optional	compat_aout
+kern/imgact_gzip.c		optional	gzip
+libkern/divdi3.c		standard
+libkern/moddi3.c		standard
+libkern/qdivrem.c		standard
+libkern/ucmpdi2.c		standard
+libkern/udivdi3.c		standard
+libkern/umoddi3.c		standard
+libkern/flsl.c			standard
+libkern/ffsl.c			standard
+
+pci/cy_pci.c			optional	cy pci
+pci/agp_intel.c			optional	agp
+pci/agp_via.c			optional	agp
+pci/agp_sis.c			optional	agp
+pci/agp_ali.c			optional	agp
+pci/agp_amd.c			optional	agp
+pci/agp_i810.c			optional	agp
+pci/agp_nvidia.c		optional	agp
+
diff --git a/freebsd-5.3-xen-sparse/conf/ldscript.i386-xen b/freebsd-5.3-xen-sparse/conf/ldscript.i386-xen
new file mode 100644
index 0000000000..65cbc852da
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/conf/ldscript.i386-xen
@@ -0,0 +1,134 @@
+/* $FreeBSD: src/sys/conf/ldscript.i386,v 1.9 2003/12/03 07:40:03 phk Exp $ */
+OUTPUT_FORMAT("elf32-i386-freebsd", "elf32-i386-freebsd", "elf32-i386-freebsd")
+OUTPUT_ARCH(i386)
+ENTRY(btext)
+SEARCH_DIR(/usr/lib);
+SECTIONS
+{
+  /* Read-only sections, merged into text segment: */
+  . = kernbase + SIZEOF_HEADERS;
+  .interp     : { *(.interp) 	}
+  .hash          : { *(.hash)		}
+  .dynsym        : { *(.dynsym)		}
+  .dynstr        : { *(.dynstr)		}
+  .gnu.version   : { *(.gnu.version)	}
+  .gnu.version_d   : { *(.gnu.version_d)	}
+  .gnu.version_r   : { *(.gnu.version_r)	}
+  .rel.text      :
+    { *(.rel.text) *(.rel.gnu.linkonce.t*) }
+  .rela.text     :
+    { *(.rela.text) *(.rela.gnu.linkonce.t*) }
+  .rel.data      :
+    { *(.rel.data) *(.rel.gnu.linkonce.d*) }
+  .rela.data     :
+    { *(.rela.data) *(.rela.gnu.linkonce.d*) }
+  .rel.rodata    :
+    { *(.rel.rodata) *(.rel.gnu.linkonce.r*) }
+  .rela.rodata   :
+    { *(.rela.rodata) *(.rela.gnu.linkonce.r*) }
+  .rel.got       : { *(.rel.got)		}
+  .rela.got      : { *(.rela.got)		}
+  .rel.ctors     : { *(.rel.ctors)	}
+  .rela.ctors    : { *(.rela.ctors)	}
+  .rel.dtors     : { *(.rel.dtors)	}
+  .rela.dtors    : { *(.rela.dtors)	}
+  .rel.init      : { *(.rel.init)	}
+  .rela.init     : { *(.rela.init)	}
+  .rel.fini      : { *(.rel.fini)	}
+  .rela.fini     : { *(.rela.fini)	}
+  .rel.bss       : { *(.rel.bss)		}
+  .rela.bss      : { *(.rela.bss)		}
+  .rel.plt       : { *(.rel.plt)		}
+  .rela.plt      : { *(.rela.plt)		}
+  .init          : { *(.init)	} =0x9090
+  .plt      : { *(.plt)	}
+  .text      :
+  {
+    *(.text)
+    *(.stub)
+    /* .gnu.warning sections are handled specially by elf32.em.  */
+    *(.gnu.warning)
+    *(.gnu.linkonce.t*)
+  } =0x9090
+  _etext = .;
+  PROVIDE (etext = .);
+  .fini      : { *(.fini)    } =0x9090
+  .rodata    : { *(.rodata) *(.gnu.linkonce.r*) }
+  .rodata1   : { *(.rodata1) }
+  /* Adjust the address for the data segment.  We want to adjust up to
+     the same address within the page on the next page up.  */
+  . = ALIGN(0x1000) + (. & (0x1000 - 1)) ; 
+  .data    :
+  {
+    *(.data)
+    *(.gnu.linkonce.d*)
+    CONSTRUCTORS
+  }
+  .data1   : { *(.data1) }
+  . = ALIGN(32 / 8);
+  _start_ctors = .;
+  PROVIDE (start_ctors = .);
+  .ctors         :
+  {
+    *(.ctors)
+  }
+  _stop_ctors = .;
+  PROVIDE (stop_ctors = .);
+  .dtors         :
+  {
+    *(.dtors)
+  }
+  .got           : { *(.got.plt) *(.got) }
+  .dynamic       : { *(.dynamic) }
+  /* We want the small data sections together, so single-instruction offsets
+     can access them all, and initialized data all before uninitialized, so
+     we can shorten the on-disk segment size.  */
+  .sdata     : { *(.sdata) }
+  _edata  =  .;
+  PROVIDE (edata = .);
+  __bss_start = .;
+  .sbss      : { *(.sbss) *(.scommon) }
+  .bss       :
+  {
+   *(.dynbss)
+   *(.bss)
+   *(COMMON)
+  }
+  . = ALIGN(32 / 8);
+  _end = . ;
+  PROVIDE (end = .);
+  /* Stabs debugging sections.  */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment 0 : { *(.comment) }
+  /* DWARF debug sections.
+     Symbols in the DWARF debugging sections are relative to the beginning
+     of the section so we begin them at 0.  */
+  /* DWARF 1 */
+  .debug          0 : { *(.debug) }
+  .line           0 : { *(.line) }
+  /* GNU DWARF 1 extensions */
+  .debug_srcinfo  0 : { *(.debug_srcinfo) }
+  .debug_sfnames  0 : { *(.debug_sfnames) }
+  /* DWARF 1.1 and DWARF 2 */
+  .debug_aranges  0 : { *(.debug_aranges) }
+  .debug_pubnames 0 : { *(.debug_pubnames) }
+  /* DWARF 2 */
+  .debug_info     0 : { *(.debug_info) }
+  .debug_abbrev   0 : { *(.debug_abbrev) }
+  .debug_line     0 : { *(.debug_line) }
+  .debug_frame    0 : { *(.debug_frame) }
+  .debug_str      0 : { *(.debug_str) }
+  .debug_loc      0 : { *(.debug_loc) }
+  .debug_macinfo  0 : { *(.debug_macinfo) }
+  /* SGI/MIPS DWARF 2 extensions */
+  .debug_weaknames 0 : { *(.debug_weaknames) }
+  .debug_funcnames 0 : { *(.debug_funcnames) }
+  .debug_typenames 0 : { *(.debug_typenames) }
+  .debug_varnames  0 : { *(.debug_varnames) }
+  /* These must appear regardless of  .  */
+}
diff --git a/freebsd-5.3-xen-sparse/conf/options.i386-xen b/freebsd-5.3-xen-sparse/conf/options.i386-xen
new file mode 100644
index 0000000000..6bbc509087
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/conf/options.i386-xen
@@ -0,0 +1,162 @@
+# $FreeBSD: src/sys/conf/options.i386,v 1.204 2003/12/03 23:06:30 imp Exp $
+# Options specific to the i386 platform kernels
+
+AUTO_EOI_1		opt_auto_eoi.h
+AUTO_EOI_2		opt_auto_eoi.h
+BROKEN_KEYBOARD_RESET	opt_reset.h
+COMPAT_OLDISA
+I586_PMC_GUPROF		opt_i586_guprof.h
+MAXMEM
+MPTABLE_FORCE_HTT
+NO_MIXED_MODE
+PERFMON
+DISABLE_PSE		opt_pmap.h
+DISABLE_PG_G		opt_pmap.h
+PMAP_SHPGPERPROC	opt_pmap.h
+PPC_PROBE_CHIPSET	opt_ppc.h
+PPC_DEBUG		opt_ppc.h
+POWERFAIL_NMI		opt_trap.h
+MP_WATCHDOG             opt_mp_watchdog.h
+
+
+
+# Options for emulators.  These should only be used at config time, so
+# they are handled like options for static filesystems
+# (see src/sys/conf/options), except for broken debugging options.
+COMPAT_AOUT		opt_dontuse.h
+IBCS2			opt_dontuse.h
+COMPAT_LINUX		opt_dontuse.h
+COMPAT_SVR4		opt_dontuse.h
+DEBUG_SVR4		opt_svr4.h
+PECOFF_SUPPORT		opt_dontuse.h
+PECOFF_DEBUG		opt_pecoff.h
+
+# Change KVM size.  Changes things all over the kernel.
+KVA_PAGES		opt_global.h
+XEN			opt_global.h
+XENDEV			opt_xen.h
+NOXENDEBUG		opt_xen.h
+# Physical address extensions and support for >4G ram.  As above.
+PAE			opt_global.h
+
+CLK_CALIBRATION_LOOP		opt_clock.h
+CLK_USE_I8254_CALIBRATION	opt_clock.h
+CLK_USE_TSC_CALIBRATION		opt_clock.h
+TIMER_FREQ			opt_clock.h
+
+CPU_ATHLON_SSE_HACK		opt_cpu.h
+CPU_BLUELIGHTNING_3X		opt_cpu.h
+CPU_BLUELIGHTNING_FPU_OP_CACHE	opt_cpu.h
+CPU_BTB_EN			opt_cpu.h
+CPU_CYRIX_NO_LOCK		opt_cpu.h
+CPU_DIRECT_MAPPED_CACHE		opt_cpu.h
+CPU_DISABLE_5X86_LSSER		opt_cpu.h
+CPU_DISABLE_CMPXCHG		opt_global.h	# XXX global, unlike other CPU_*
+CPU_DISABLE_SSE			opt_cpu.h
+CPU_ELAN			opt_cpu.h
+CPU_ELAN_XTAL			opt_cpu.h
+CPU_ELAN_PPS			opt_cpu.h
+CPU_ENABLE_SSE			opt_cpu.h
+CPU_FASTER_5X86_FPU		opt_cpu.h
+CPU_GEODE			opt_cpu.h
+CPU_I486_ON_386			opt_cpu.h
+CPU_IORT			opt_cpu.h
+CPU_L2_LATENCY			opt_cpu.h
+CPU_LOOP_EN			opt_cpu.h
+CPU_PPRO2CELERON		opt_cpu.h
+CPU_RSTK_EN			opt_cpu.h
+CPU_SOEKRIS			opt_cpu.h
+CPU_SUSP_HLT			opt_cpu.h
+CPU_UPGRADE_HW_CACHE		opt_cpu.h
+CPU_WT_ALLOC			opt_cpu.h
+CYRIX_CACHE_REALLY_WORKS	opt_cpu.h
+CYRIX_CACHE_WORKS		opt_cpu.h
+NO_F00F_HACK			opt_cpu.h
+NO_MEMORY_HOLE			opt_cpu.h
+
+# The CPU type affects the endian conversion functions all over the kernel.
+I386_CPU		opt_global.h
+I486_CPU		opt_global.h
+I586_CPU		opt_global.h
+I686_CPU		opt_global.h
+
+VGA_ALT_SEQACCESS	opt_vga.h
+VGA_DEBUG		opt_vga.h
+VGA_NO_FONT_LOADING	opt_vga.h
+VGA_NO_MODE_CHANGE	opt_vga.h
+VGA_SLOW_IOACCESS	opt_vga.h
+VGA_WIDTH90		opt_vga.h
+
+VESA
+VESA_DEBUG		opt_vesa.h
+
+PSM_HOOKRESUME		opt_psm.h
+PSM_RESETAFTERSUSPEND	opt_psm.h
+PSM_DEBUG		opt_psm.h
+
+ATKBD_DFLT_KEYMAP	opt_atkbd.h
+
+# pcvt(4) has a bunch of options
+FAT_CURSOR		opt_pcvt.h
+XSERVER			opt_pcvt.h
+PCVT_24LINESDEF		opt_pcvt.h
+PCVT_CTRL_ALT_DEL	opt_pcvt.h
+PCVT_META_ESC		opt_pcvt.h
+PCVT_NSCREENS		opt_pcvt.h
+PCVT_PRETTYSCRNS	opt_pcvt.h
+PCVT_SCANSET		opt_pcvt.h
+PCVT_SCREENSAVER	opt_pcvt.h
+PCVT_USEKBDSEC		opt_pcvt.h
+PCVT_VT220KEYB		opt_pcvt.h
+PCVT_GREENSAVER		opt_pcvt.h
+
+# Video spigot
+SPIGOT_UNSECURE		opt_spigot.h
+
+# Enables NETGRAPH support for Cronyx adapters
+NETGRAPH_CRONYX		opt_ng_cronyx.h
+
+# -------------------------------
+# isdn4bsd: passive ISA cards
+# -------------------------------
+TEL_S0_8		opt_i4b.h
+TEL_S0_16		opt_i4b.h
+TEL_S0_16_3		opt_i4b.h
+AVM_A1			opt_i4b.h
+USR_STI			opt_i4b.h
+ITKIX1			opt_i4b.h
+ELSA_PCC16		opt_i4b.h
+# -------------------------------
+# isdn4bsd: passive ISA PnP cards
+# -------------------------------
+CRTX_S0_P		opt_i4b.h
+DRN_NGO                 opt_i4b.h
+TEL_S0_16_3_P		opt_i4b.h
+SEDLBAUER		opt_i4b.h
+DYNALINK		opt_i4b.h
+ASUSCOM_IPAC		opt_i4b.h
+ELSA_QS1ISA		opt_i4b.h
+SIEMENS_ISURF2		opt_i4b.h
+EICON_DIVA		opt_i4b.h
+COMPAQ_M610		opt_i4b.h
+# -------------------------------
+# isdn4bsd: passive PCI cards
+# -------------------------------
+ELSA_QS1PCI		opt_i4b.h
+# -------------------------------
+# isdn4bsd: misc options
+# -------------------------------
+# temporary workaround for SMP machines
+I4B_SMP_WORKAROUND      opt_i4b.h
+# enable VJ compression code for ipr i/f
+IPR_VJ			opt_i4b.h
+IPR_LOG			opt_i4b.h
+
+# Device options
+DEV_ACPI		opt_acpi.h
+DEV_APIC		opt_apic.h
+DEV_NPX			opt_npx.h
+
+# -------------------------------
+# EOF
+# -------------------------------
diff --git a/freebsd-5.3-xen-sparse/fbsdxensetup b/freebsd-5.3-xen-sparse/fbsdxensetup
new file mode 100644
index 0000000000..3d024c370e
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/fbsdxensetup
@@ -0,0 +1,39 @@
+#!/bin/csh -f
+
+setenv XENROOT `bk root`
+rm -rf $XENROOT/fbsdtmp $XENROOT/freebsd-5.3-xenU
+mkdir -p $XENROOT/fbsdtmp
+cd $XENROOT/fbsdtmp
+echo "step 1"
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.aa
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ab
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ac
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ad
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ae
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.af
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ag
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ah
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ai
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.aj
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ak
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.al
+mkdir -p foo
+cat ssys.?? | tar --unlink -xpzf - -C foo/
+mkdir -p $XENROOT/freebsd-5.3-xenU
+mv foo/sys/* $XENROOT/freebsd-5.3-xenU
+cd $XENROOT
+rm -rf $XENROOT/fbsdtmp
+echo "step 2"
+mkdir -p $XENROOT/freebsd-5.3-xenU/i386-xen/include
+cd $XENROOT/freebsd-5.3-xenU/i386-xen/include/
+foreach file (../../i386/include/*)
+	ln -s $file
+end 
+echo "step 3"
+cd $XENROOT/freebsd-5.3-xen-sparse
+echo "step 4"
+./mkbuildtree ../freebsd-5.3-xenU
+echo "step 5"
+cd $XENROOT/freebsd-5.3-xenU/i386-xen/include
+ln -s $XENROOT/xen/include/public xen-public
+echo "done"
diff --git a/freebsd-5.3-xen-sparse/i386-xen/Makefile b/freebsd-5.3-xen-sparse/i386-xen/Makefile
new file mode 100644
index 0000000000..f33c7a5af6
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/Makefile
@@ -0,0 +1,40 @@
+# $FreeBSD: src/sys/i386/Makefile,v 1.11 2002/06/21 06:18:02 mckusick Exp $
+#	@(#)Makefile	8.1 (Berkeley) 6/11/93
+
+# Makefile for i386 links, tags file
+
+# SYS is normally set in Make.tags.inc
+# SYS=/sys
+SYS=/nsys
+
+TAGDIR=	i386
+
+.include "../kern/Make.tags.inc"
+
+all:
+	@echo "make links or tags only"
+
+# Directories in which to place i386 tags links
+DI386=	apm i386 ibcs2 include isa linux
+
+links::
+	-for i in ${COMMDIR1}; do \
+	    (cd $$i && { rm -f tags; ln -s ../${TAGDIR}/tags tags; }) done
+	-for i in ${COMMDIR2}; do \
+	    (cd $$i && { rm -f tags; ln -s ../../${TAGDIR}/tags tags; }) done
+	-for i in ${DI386}; do \
+	    (cd $$i && { rm -f tags; ln -s ../tags tags; }) done
+
+SI386=	${SYS}/i386/apm/*.[ch] \
+	${SYS}/i386/i386/*.[ch] ${SYS}/i386/ibcs2/*.[ch] \
+	${SYS}/i386/include/*.[ch] ${SYS}/i386/isa/*.[ch] \
+	${SYS}/i386/linux/*.[ch]
+AI386=	${SYS}/i386/i386/*.s
+
+tags::
+	-ctags -wdt ${COMM} ${SI386}
+	egrep "^ENTRY(.*)|^ALTENTRY(.*)" ${AI386} | \
+	    sed "s;\([^:]*\):\([^(]*\)(\([^, )]*\)\(.*\);\3 \1 /^\2(\3\4$$/;" \
+		>> tags
+	sort -o tags tags
+	chmod 444 tags
diff --git a/freebsd-5.3-xen-sparse/i386-xen/compile/.cvsignore b/freebsd-5.3-xen-sparse/i386-xen/compile/.cvsignore
new file mode 100644
index 0000000000..232298edb1
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/compile/.cvsignore
@@ -0,0 +1 @@
+[A-Za-z0-9]*
diff --git a/freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC b/freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC
new file mode 100644
index 0000000000..6a70639bda
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC
@@ -0,0 +1,273 @@
+#
+# GENERIC -- Generic kernel configuration file for FreeBSD/i386
+#
+# For more information on this file, please read the handbook section on
+# Kernel Configuration Files:
+#
+#    http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html
+#
+# The handbook is also available locally in /usr/share/doc/handbook
+# if you've installed the doc distribution, otherwise always see the
+# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the
+# latest information.
+#
+# An exhaustive list of options and more detailed explanations of the
+# device lines is also present in the ../../conf/NOTES and NOTES files. 
+# If you are in doubt as to the purpose or necessity of a line, check first 
+# in NOTES.
+#
+# $FreeBSD: src/sys/i386/conf/GENERIC,v 1.394.2.3 2004/01/26 19:42:11 nectar Exp $
+
+machine		i386
+cpu		I486_CPU
+cpu		I586_CPU
+cpu		I686_CPU
+ident		GENERIC
+
+#To statically compile in device wiring instead of /boot/device.hints
+#hints		"GENERIC.hints"		#Default places to look for devices.
+
+#makeoptions	DEBUG=-g		#Build kernel with gdb(1) debug symbols
+
+options 	SCHED_4BSD		#4BSD scheduler
+options 	INET			#InterNETworking
+options 	INET6			#IPv6 communications protocols
+options 	FFS			#Berkeley Fast Filesystem
+options 	SOFTUPDATES		#Enable FFS soft updates support
+options 	UFS_ACL			#Support for access control lists
+options 	UFS_DIRHASH		#Improve performance on big directories
+options 	MD_ROOT			#MD is a potential root device
+options 	NFSCLIENT		#Network Filesystem Client
+options 	NFSSERVER		#Network Filesystem Server
+options 	NFS_ROOT		#NFS usable as /, requires NFSCLIENT
+options 	MSDOSFS			#MSDOS Filesystem
+options 	CD9660			#ISO 9660 Filesystem
+options 	PROCFS			#Process filesystem (requires PSEUDOFS)
+options 	PSEUDOFS		#Pseudo-filesystem framework
+options 	COMPAT_43		#Compatible with BSD 4.3 [KEEP THIS!]
+options 	COMPAT_FREEBSD4		#Compatible with FreeBSD4
+options 	SCSI_DELAY=15000	#Delay (in ms) before probing SCSI
+options 	KTRACE			#ktrace(1) support
+options 	SYSVSHM			#SYSV-style shared memory
+options 	SYSVMSG			#SYSV-style message queues
+options 	SYSVSEM			#SYSV-style semaphores
+options 	_KPOSIX_PRIORITY_SCHEDULING #Posix P1003_1B real-time extensions
+options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
+options 	AHC_REG_PRETTY_PRINT	# Print register bitfields in debug
+					# output.  Adds ~128k to driver.
+options 	AHD_REG_PRETTY_PRINT	# Print register bitfields in debug
+					# output.  Adds ~215k to driver.
+options 	PFIL_HOOKS		# pfil(9) framework
+
+# Debugging for use in -current
+#options 	DDB			#Enable the kernel debugger
+#options 	INVARIANTS		#Enable calls of extra sanity checking
+options 	INVARIANT_SUPPORT	#Extra sanity checks of internal structures, required by INVARIANTS
+#options 	WITNESS			#Enable checks to detect deadlocks and cycles
+#options 	WITNESS_SKIPSPIN	#Don't run witness on spinlocks for speed
+
+# To make an SMP kernel, the next two are needed
+options 	SMP			# Symmetric MultiProcessor Kernel
+device		apic			# I/O APIC
+
+device		isa
+device		eisa
+device		pci
+
+# Floppy drives
+device		fdc
+
+# ATA and ATAPI devices
+device		ata
+device		atadisk			# ATA disk drives
+device		ataraid			# ATA RAID drives
+device		atapicd			# ATAPI CDROM drives
+device		atapifd			# ATAPI floppy drives
+device		atapist			# ATAPI tape drives
+options 	ATA_STATIC_ID		#Static device numbering
+
+# SCSI Controllers
+device		ahb		# EISA AHA1742 family
+device		ahc		# AHA2940 and onboard AIC7xxx devices
+device		ahd		# AHA39320/29320 and onboard AIC79xx devices
+device		amd		# AMD 53C974 (Tekram DC-390(T))
+device		isp		# Qlogic family
+device		mpt		# LSI-Logic MPT-Fusion
+#device		ncr		# NCR/Symbios Logic
+device		sym		# NCR/Symbios Logic (newer chipsets + those of `ncr')
+device		trm		# Tekram DC395U/UW/F DC315U adapters
+
+device		adv		# Advansys SCSI adapters
+device		adw		# Advansys wide SCSI adapters
+device		aha		# Adaptec 154x SCSI adapters
+device		aic		# Adaptec 15[012]x SCSI adapters, AIC-6[23]60.
+device		bt		# Buslogic/Mylex MultiMaster SCSI adapters
+
+device		ncv		# NCR 53C500
+device		nsp		# Workbit Ninja SCSI-3
+device		stg		# TMC 18C30/18C50
+
+# SCSI peripherals
+device		scbus		# SCSI bus (required for SCSI)
+device		ch		# SCSI media changers
+device		da		# Direct Access (disks)
+device		sa		# Sequential Access (tape etc)
+device		cd		# CD
+device		pass		# Passthrough device (direct SCSI access)
+device		ses		# SCSI Environmental Services (and SAF-TE)
+
+# RAID controllers interfaced to the SCSI subsystem
+device		amr		# AMI MegaRAID
+device		asr		# DPT SmartRAID V, VI and Adaptec SCSI RAID
+device		ciss		# Compaq Smart RAID 5*
+device		dpt		# DPT Smartcache III, IV - See NOTES for options
+device		iir		# Intel Integrated RAID
+device		ips		# IBM (Adaptec) ServeRAID
+device		mly		# Mylex AcceleRAID/eXtremeRAID
+
+# RAID controllers
+device		aac		# Adaptec FSA RAID
+device		aacp		# SCSI passthrough for aac (requires CAM)
+device		ida		# Compaq Smart RAID
+device		mlx		# Mylex DAC960 family
+device		pst		# Promise Supertrak SX6000
+device		twe		# 3ware ATA RAID
+
+# atkbdc0 controls both the keyboard and the PS/2 mouse
+device		atkbdc		# AT keyboard controller
+device		atkbd		# AT keyboard
+device		psm		# PS/2 mouse
+
+device		vga		# VGA video card driver
+
+device		splash		# Splash screen and screen saver support
+
+# syscons is the default console driver, resembling an SCO console
+device		sc
+
+# Enable this for the pcvt (VT220 compatible) console driver
+#device		vt
+#options 	XSERVER			# support for X server on a vt console
+#options 	FAT_CURSOR		# start with block cursor
+
+device		agp		# support several AGP chipsets
+
+# Floating point support - do not disable.
+device		npx
+
+# Power management support (see NOTES for more options)
+#device		apm
+# Add suspend/resume support for the i8254.
+device		pmtimer
+
+# PCCARD (PCMCIA) support
+# Pcmcia and cardbus bridge support
+device		cbb			# cardbus (yenta) bridge
+#device		pcic			# ExCA ISA and PCI bridges
+device		pccard			# PC Card (16-bit) bus
+device		cardbus			# CardBus (32-bit) bus
+
+# Serial (COM) ports
+device		sio		# 8250, 16[45]50 based serial ports
+
+# Parallel port
+device		ppc
+device		ppbus		# Parallel port bus (required)
+device		lpt		# Printer
+device		plip		# TCP/IP over parallel
+device		ppi		# Parallel port interface device
+#device		vpo		# Requires scbus and da
+
+# If you've got a "dumb" serial or parallel PCI card that is
+# supported by the puc(4) glue driver, uncomment the following
+# line to enable it (connects to the sio and/or ppc drivers):
+#device         puc
+
+# PCI Ethernet NICs.
+device		de		# DEC/Intel DC21x4x (``Tulip'')
+device		em		# Intel PRO/1000 adapter Gigabit Ethernet Card
+device		txp		# 3Com 3cR990 (``Typhoon'')
+device		vx		# 3Com 3c590, 3c595 (``Vortex'')
+
+# PCI Ethernet NICs that use the common MII bus controller code.
+# NOTE: Be sure to keep the 'device miibus' line in order to use these NICs!
+device		miibus		# MII bus support
+device		bfe		# Broadcom BCM440x 10/100 ethernet
+device		bge		# Broadcom BCM570xx Gigabit Ethernet
+device		dc		# DEC/Intel 21143 and various workalikes
+device		fxp		# Intel EtherExpress PRO/100B (82557, 82558)
+device		pcn		# AMD Am79C97x PCI 10/100 (precedence over 'lnc')
+device		re		# RealTek 8139C+/8169/8169S/8110S
+device		rl		# RealTek 8129/8139
+device		sf		# Adaptec AIC-6915 (``Starfire'')
+device		sis		# Silicon Integrated Systems SiS 900/SiS 7016
+device		sk		# SysKonnect SK-984x and SK-982x gigabit ethernet
+device		ste		# Sundance ST201 (D-Link DFE-550TX)
+device		ti		# Alteon Networks Tigon I/II gigabit ethernet
+device		tl		# Texas Instruments ThunderLAN
+device		tx		# SMC EtherPower II (83c170 ``EPIC'')
+device		vr		# VIA Rhine, Rhine II
+device		wb		# Winbond W89C840F
+device		xl		# 3Com 3c90x (``Boomerang'', ``Cyclone'')
+
+# ISA Ethernet NICs.  pccard nics included.
+device		cs		# Crystal Semiconductor CS89x0 NIC
+# 'device ed' requires 'device miibus'
+device		ed		# NE[12]000, SMC Ultra, 3c503, DS8390 cards
+device		ex		# Intel EtherExpress Pro/10 and Pro/10+
+device		ep		# Etherlink III based cards
+device		fe		# Fujitsu MB8696x based cards
+device		ie		# EtherExpress 8/16, 3C507, StarLAN 10 etc.
+device		lnc		# NE2100, NE32-VL Lance Ethernet cards
+device		sn		# SMC's 9000 series of ethernet chips
+device		xe		# Xircom pccard ethernet
+
+# ISA devices that use the old ISA shims
+#device		le
+
+# Wireless NIC cards
+device		wlan		# 802.11 support
+device		an		# Aironet 4500/4800 802.11 wireless NICs. 
+device		awi		# BayStack 660 and others
+device		wi		# WaveLAN/Intersil/Symbol 802.11 wireless NICs.
+#device		wl		# Older non 802.11 Wavelan wireless NIC.
+
+# Pseudo devices - the number indicates how many units to allocate.
+device		random		# Entropy device
+device		loop		# Network loopback
+device		ether		# Ethernet support
+device		sl		# Kernel SLIP
+device		ppp		# Kernel PPP
+device		tun		# Packet tunnel.
+device		pty		# Pseudo-ttys (telnet etc)
+device		md		# Memory "disks"
+device		gif		# IPv6 and IPv4 tunneling
+device		faith		# IPv6-to-IPv4 relaying (translation)
+
+# The `bpf' device enables the Berkeley Packet Filter.
+# Be aware of the administrative consequences of enabling this!
+device		bpf		# Berkeley packet filter
+
+# USB support
+device		uhci		# UHCI PCI->USB interface
+device		ohci		# OHCI PCI->USB interface
+device		usb		# USB Bus (required)
+#device		udbp		# USB Double Bulk Pipe devices
+device		ugen		# Generic
+device		uhid		# "Human Interface Devices"
+device		ukbd		# Keyboard
+device		ulpt		# Printer
+device		umass		# Disks/Mass storage - Requires scbus and da
+device		ums		# Mouse
+device		urio		# Diamond Rio 500 MP3 player
+device		uscanner	# Scanners
+# USB Ethernet, requires mii
+device		aue		# ADMtek USB ethernet
+device		axe		# ASIX Electronics USB ethernet
+device		cue		# CATC USB ethernet
+device		kue		# Kawasaki LSI USB ethernet
+
+# FireWire support
+device		firewire	# FireWire bus code
+device		sbp		# SCSI over FireWire (Requires scbus and da)
+device		fwe		# Ethernet over FireWire (non-standard!)
diff --git a/freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC.hints b/freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC.hints
new file mode 100644
index 0000000000..c02274871b
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC.hints
@@ -0,0 +1,93 @@
+# $FreeBSD: src/sys/i386/conf/GENERIC.hints,v 1.11 2002/12/05 22:49:47 jhb Exp $
+hint.fdc.0.at="isa"
+hint.fdc.0.port="0x3F0"
+hint.fdc.0.irq="6"
+hint.fdc.0.drq="2"
+hint.fd.0.at="fdc0"
+hint.fd.0.drive="0"
+hint.fd.1.at="fdc0"
+hint.fd.1.drive="1"
+hint.ata.0.at="isa"
+hint.ata.0.port="0x1F0"
+hint.ata.0.irq="14"
+hint.ata.1.at="isa"
+hint.ata.1.port="0x170"
+hint.ata.1.irq="15"
+hint.adv.0.at="isa"
+hint.adv.0.disabled="1"
+hint.bt.0.at="isa"
+hint.bt.0.disabled="1"
+hint.aha.0.at="isa"
+hint.aha.0.disabled="1"
+hint.aic.0.at="isa"
+hint.aic.0.disabled="1"
+hint.atkbdc.0.at="isa"
+hint.atkbdc.0.port="0x060"
+hint.atkbd.0.at="atkbdc"
+hint.atkbd.0.irq="1"
+hint.atkbd.0.flags="0x1"
+hint.psm.0.at="atkbdc"
+hint.psm.0.irq="12"
+hint.vga.0.at="isa"
+hint.sc.0.at="isa"
+hint.sc.0.flags="0x100"
+hint.vt.0.at="isa"
+hint.vt.0.disabled="1"
+hint.apm.0.disabled="1"
+hint.apm.0.flags="0x20"
+hint.pcic.0.at="isa"
+# hint.pcic.0.irq="10"	# Default to polling
+hint.pcic.0.port="0x3e0"
+hint.pcic.0.maddr="0xd0000"
+hint.pcic.1.at="isa"
+hint.pcic.1.irq="11"
+hint.pcic.1.port="0x3e2"
+hint.pcic.1.maddr="0xd4000"
+hint.pcic.1.disabled="1"
+hint.sio.0.at="isa"
+hint.sio.0.port="0x3F8"
+hint.sio.0.flags="0x10"
+hint.sio.0.irq="4"
+hint.sio.1.at="isa"
+hint.sio.1.port="0x2F8"
+hint.sio.1.irq="3"
+hint.sio.2.at="isa"
+hint.sio.2.disabled="1"
+hint.sio.2.port="0x3E8"
+hint.sio.2.irq="5"
+hint.sio.3.at="isa"
+hint.sio.3.disabled="1"
+hint.sio.3.port="0x2E8"
+hint.sio.3.irq="9"
+hint.ppc.0.at="isa"
+hint.ppc.0.irq="7"
+hint.ed.0.at="isa"
+hint.ed.0.disabled="1"
+hint.ed.0.port="0x280"
+hint.ed.0.irq="10"
+hint.ed.0.maddr="0xd8000"
+hint.cs.0.at="isa"
+hint.cs.0.disabled="1"
+hint.cs.0.port="0x300"
+hint.sn.0.at="isa"
+hint.sn.0.disabled="1"
+hint.sn.0.port="0x300"
+hint.sn.0.irq="10"
+hint.ie.0.at="isa"
+hint.ie.0.disabled="1"
+hint.ie.0.port="0x300"
+hint.ie.0.irq="10"
+hint.ie.0.maddr="0xd0000"
+hint.fe.0.at="isa"
+hint.fe.0.disabled="1"
+hint.fe.0.port="0x300"
+hint.le.0.at="isa"
+hint.le.0.disabled="1"
+hint.le.0.port="0x300"
+hint.le.0.irq="5"
+hint.le.0.maddr="0xd0000"
+hint.lnc.0.at="isa"
+hint.lnc.0.disabled="1"
+hint.lnc.0.port="0x280"
+hint.lnc.0.irq="10"
+hint.lnc.0.drq="0"
diff --git a/freebsd-5.3-xen-sparse/i386-xen/conf/Makefile b/freebsd-5.3-xen-sparse/i386-xen/conf/Makefile
new file mode 100644
index 0000000000..0284f84e82
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/conf/Makefile
@@ -0,0 +1,3 @@
+# $FreeBSD: src/sys/i386/conf/Makefile,v 1.9 2003/02/26 23:36:58 ru Exp $
+
+.include "${.CURDIR}/../../conf/makeLINT.mk"
diff --git a/freebsd-5.3-xen-sparse/i386-xen/conf/NOTES b/freebsd-5.3-xen-sparse/i386-xen/conf/NOTES
new file mode 100644
index 0000000000..b01c607dfa
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/conf/NOTES
@@ -0,0 +1,1115 @@
+#
+# NOTES -- Lines that can be cut/pasted into kernel and hints configs.
+#
+# This file contains machine dependent kernel configuration notes.  For
+# machine independent notes, look in /sys/conf/NOTES.
+#
+# $FreeBSD: src/sys/i386/conf/NOTES,v 1.1108 2003/12/04 19:57:56 phk Exp $
+#
+
+#
+# This directive is mandatory; it defines the architecture to be
+# configured for; in this case, the 386 family based IBM-PC and
+# compatibles.
+#
+machine		i386
+
+# 
+# We want LINT to cover profiling as well
+profile         2
+
+
+#####################################################################
+# SMP OPTIONS:
+#
+# The apic device enables the use of the I/O APIC for interrupt delivery.
+# The apic device can be used in both UP and SMP kernels, but is required
+# for SMP kernels.  Thus, the apic device is not strictly an SMP option,
+# but it is a prerequisite for SMP.
+#
+# Notes:
+#
+# Be sure to disable 'cpu I386_CPU' for SMP kernels.
+#
+# By default, mixed mode is used to route IRQ0 from the AT timer via
+# the 8259A master PIC through the ExtINT pin on the first I/O APIC.
+# This can be disabled via the NO_MIXED_MODE option.  In that case,
+# IRQ0 will be routed via an intpin on the first I/O APIC.  Not all
+# motherboards hook IRQ0 up to the first I/O APIC even though their
+# MP table or MADT may claim to do so.  That is why mixed mode is
+# enabled by default.
+#
+# HTT CPUs should only be used if they are enabled in the BIOS.  For
+# the ACPI case, ACPI only correctly tells us about any HTT CPUs if
+# they are enabled.  However, most HTT systems do not list HTT CPUs
+# in the MP Table if they are enabled, thus we guess at the HTT CPUs
+# for the MP Table case.  However, we shouldn't try to guess and use
+# these CPUs if HTTT is disabled.  Thus, HTT guessing is only enabled
+# for the MP Table if the user explicitly asks for it via the
+# MPTABLE_FORCE_HTT option.  Do NOT use this option if you have HTT
+# disabled in your BIOS.
+#
+
+# Mandatory:
+device		apic			# I/O apic
+
+# Optional:
+options		MPTABLE_FORCE_HTT	# Enable HTT CPUs with the MP Table
+options 	NO_MIXED_MODE		# Disable use of mixed mode
+
+
+#####################################################################
+# CPU OPTIONS
+
+#
+# You must specify at least one CPU (the one you intend to run on);
+# deleting the specification for CPUs you don't need to use may make
+# parts of the system run faster.
+# I386_CPU is mutually exclusive with the other CPU types.
+#
+#cpu		I386_CPU		
+cpu		I486_CPU
+cpu		I586_CPU		# aka Pentium(tm)
+cpu		I686_CPU		# aka Pentium Pro(tm)
+
+#
+# Options for CPU features.
+#
+# CPU_ATHLON_SSE_HACK tries to enable SSE instructions when the BIOS has
+# forgotten to enable them.
+#
+# CPU_BLUELIGHTNING_FPU_OP_CACHE enables FPU operand cache on IBM
+# BlueLightning CPU.  It works only with Cyrix FPU, and this option
+# should not be used with Intel FPU.
+#
+# CPU_BLUELIGHTNING_3X enables triple-clock mode on IBM Blue Lightning
+# CPU if CPU supports it. The default is double-clock mode on
+# BlueLightning CPU box.
+#
+# CPU_BTB_EN enables branch target buffer on Cyrix 5x86 (NOTE 1).
+#
+# CPU_DIRECT_MAPPED_CACHE sets L1 cache of Cyrix 486DLC CPU in direct
+# mapped mode.  Default is 2-way set associative mode.
+#
+# CPU_CYRIX_NO_LOCK enables weak locking for the entire address space
+# of Cyrix 6x86 and 6x86MX CPUs by setting the NO_LOCK bit of CCR1.
+# Otherwise, the NO_LOCK bit of CCR1 is cleared.  (NOTE 3)
+#
+# CPU_DISABLE_5X86_LSSER disables load store serialize (i.e. enables
+# reorder).  This option should not be used if you use memory mapped
+# I/O device(s).
+#
+# CPU_ELAN enables support for AMDs ElanSC520 CPU.
+#    CPU_ELAN_XTAL sets the clock crystal frequency in Hz
+#    CPU_ELAN_PPS enables precision timestamp code.
+#
+# CPU_SOEKRIS enables support www.soekris.com hardware.
+#
+# CPU_ENABLE_SSE enables SSE/MMX2 instructions support.  This is default
+# on I686_CPU and above.
+# CPU_DISABLE_SSE explicitly prevent I686_CPU from turning on SSE.
+#
+# CPU_FASTER_5X86_FPU enables faster FPU exception handler.
+#
+# CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products
+# for i386 machines.
+#
+# CPU_IORT defines I/O clock delay time (NOTE 1).  Default values of
+# I/O clock delay time on Cyrix 5x86 and 6x86 are 0 and 7,respectively
+# (no clock delay).
+#
+# CPU_L2_LATENCY specifed the L2 cache latency value.  This option is used
+# only when CPU_PPRO2CELERON is defined and Mendocino Celeron is detected.
+# The default value is 5.
+#
+# CPU_LOOP_EN prevents flushing the prefetch buffer if the destination
+# of a jump is already present in the prefetch buffer on Cyrix 5x86(NOTE
+# 1).
+#
+# CPU_PPRO2CELERON enables L2 cache of Mendocino Celeron CPUs.  This option
+# is useful when you use Socket 8 to Socket 370 converter, because most Pentium
+# Pro BIOSs do not enable L2 cache of Mendocino Celeron CPUs.
+#
+# CPU_RSTK_EN enables return stack on Cyrix 5x86 (NOTE 1).
+#
+# CPU_SUSP_HLT enables suspend on HALT.  If this option is set, CPU
+# enters suspend mode following execution of HALT instruction.
+#
+# CPU_UPGRADE_HW_CACHE eliminates unneeded cache flush instruction(s).
+#
+# CPU_WT_ALLOC enables write allocation on Cyrix 6x86/6x86MX and AMD
+# K5/K6/K6-2 cpus.
+#
+# CYRIX_CACHE_WORKS enables CPU cache on Cyrix 486 CPUs with cache
+# flush at hold state.
+#
+# CYRIX_CACHE_REALLY_WORKS enables (1) CPU cache on Cyrix 486 CPUs
+# without cache flush at hold state, and (2) write-back CPU cache on
+# Cyrix 6x86 whose revision < 2.7 (NOTE 2).
+#
+# NO_F00F_HACK disables the hack that prevents Pentiums (and ONLY
+# Pentiums) from locking up when a LOCK CMPXCHG8B instruction is
+# executed.  This option is only needed if I586_CPU is also defined,
+# and should be included for any non-Pentium CPU that defines it.
+#
+# NO_MEMORY_HOLE is an optimisation for systems with AMD K6 processors
+# which indicates that the 15-16MB range is *definitely* not being
+# occupied by an ISA memory hole.
+#
+# CPU_DISABLE_CMPXCHG disables the CMPXCHG instruction on > i386 IA32 
+# machines.  VmWare seems to emulate this instruction poorly, causing 
+# the guest OS to run very slowly.  Enabling this with a SMP kernel
+# will cause the kernel to be unusable.
+#
+# NOTE 1: The options, CPU_BTB_EN, CPU_LOOP_EN, CPU_IORT,
+# CPU_LOOP_EN and CPU_RSTK_EN should not be used because of CPU bugs.
+# These options may crash your system.
+#
+# NOTE 2: If CYRIX_CACHE_REALLY_WORKS is not set, CPU cache is enabled
+# in write-through mode when revision < 2.7.  If revision of Cyrix
+# 6x86 >= 2.7, CPU cache is always enabled in write-back mode.
+#
+# NOTE 3: This option may cause failures for software that requires
+# locked cycles in order to operate correctly.
+#
+options 	CPU_ATHLON_SSE_HACK
+options 	CPU_BLUELIGHTNING_FPU_OP_CACHE
+options 	CPU_BLUELIGHTNING_3X
+options 	CPU_BTB_EN
+options 	CPU_DIRECT_MAPPED_CACHE
+options 	CPU_DISABLE_5X86_LSSER
+options 	CPU_ELAN
+options 	CPU_SOEKRIS
+options 	CPU_ELAN_XTAL=32768000
+options 	CPU_ELAN_PPS
+options 	CPU_ENABLE_SSE
+#options 	CPU_DISABLE_SSE
+options 	CPU_FASTER_5X86_FPU
+options 	CPU_I486_ON_386
+options 	CPU_IORT
+options 	CPU_L2_LATENCY=5
+options 	CPU_LOOP_EN
+options 	CPU_PPRO2CELERON
+options 	CPU_RSTK_EN
+options 	CPU_SUSP_HLT
+options 	CPU_UPGRADE_HW_CACHE
+options 	CPU_WT_ALLOC
+options 	CYRIX_CACHE_WORKS
+options 	CYRIX_CACHE_REALLY_WORKS
+#options 	NO_F00F_HACK
+options 	CPU_DISABLE_CMPXCHG
+
+# Debug options
+options 	NPX_DEBUG	# enable npx debugging (FPU/math emu)
+					#new math emulator
+
+#
+# PERFMON causes the driver for Pentium/Pentium Pro performance counters
+# to be compiled.  See perfmon(4) for more information.
+#
+options 	PERFMON
+
+
+#####################################################################
+# NETWORKING OPTIONS
+
+#
+# DEVICE_POLLING adds support for mixed interrupt-polling handling
+# of network device drivers, which has significant benefits in terms
+# of robustness to overloads and responsivity, as well as permitting
+# accurate scheduling of the CPU time between kernel network processing
+# and other activities. The drawback is a moderate (up to 1/HZ seconds)
+# potential increase in response times.
+# It is strongly recommended to use HZ=1000 or 2000 with DEVICE_POLLING
+# to achieve smoother behaviour.
+# Additionally, you can enable/disable polling at runtime with the
+# sysctl variable kern.polling.enable (defaults off), and select
+# the CPU fraction reserved to userland with the sysctl variable
+# kern.polling.user_frac (default 50, range 0..100).
+#
+# Only the "dc" "fxp" and "sis" devices support this mode of operation at
+# the time of this writing.
+
+options 	DEVICE_POLLING
+
+
+#####################################################################
+# CLOCK OPTIONS
+
+# The following options are used for debugging clock behavior only, and
+# should not be used for production systems.
+#
+# CLK_CALIBRATION_LOOP will run the clock calibration loop at startup
+# until the user presses a key.
+
+options 	CLK_CALIBRATION_LOOP
+
+# The following two options measure the frequency of the corresponding
+# clock relative to the RTC (onboard mc146818a).
+
+options 	CLK_USE_I8254_CALIBRATION
+options 	CLK_USE_TSC_CALIBRATION
+
+
+#####################################################################
+# MISCELLANEOUS DEVICES AND OPTIONS
+
+device		speaker		#Play IBM BASIC-style noises out your speaker
+hint.speaker.0.at="isa"
+hint.speaker.0.port="0x61"
+device		gzip		#Exec gzipped a.out's. REQUIRES COMPAT_AOUT!
+device		apm_saver	# Requires APM
+
+
+#####################################################################
+# HARDWARE BUS CONFIGURATION
+
+#
+# ISA bus
+#
+device		isa
+
+#
+# Options for `isa':
+#
+# AUTO_EOI_1 enables the `automatic EOI' feature for the master 8259A
+# interrupt controller.  This saves about 0.7-1.25 usec for each interrupt.
+# This option breaks suspend/resume on some portables.
+#
+# AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A
+# interrupt controller.  This saves about 0.7-1.25 usec for each interrupt.
+# Automatic EOI is documented not to work for for the slave with the
+# original i8259A, but it works for some clones and some integrated
+# versions.
+#
+# MAXMEM specifies the amount of RAM on the machine; if this is not
+# specified, FreeBSD will first read the amount of memory from the CMOS
+# RAM, so the amount of memory will initially be limited to 64MB or 16MB
+# depending on the BIOS.  If the BIOS reports 64MB, a memory probe will
+# then attempt to detect the installed amount of RAM.  If this probe
+# fails to detect >64MB RAM you will have to use the MAXMEM option.
+# The amount is in kilobytes, so for a machine with 128MB of RAM, it would
+# be 131072 (128 * 1024).
+#
+# BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to
+# reset the CPU for reboot.  This is needed on some systems with broken
+# keyboard controllers.
+
+options 	COMPAT_OLDISA	#Use ISA shims and glue for old drivers
+options 	AUTO_EOI_1
+#options 	AUTO_EOI_2
+
+options 	MAXMEM=(128*1024)
+#options 	BROKEN_KEYBOARD_RESET
+
+# 
+# EISA bus
+#
+# The EISA bus device is `eisa'.  It provides auto-detection and
+# configuration support for all devices on the EISA bus.
+
+device		eisa
+
+# By default, only 10 EISA slots are probed, since the slot numbers
+# above clash with the configuration address space of the PCI subsystem,
+# and the EISA probe is not very smart about this.  This is sufficient
+# for most machines, but in particular the HP NetServer LC series comes
+# with an onboard AIC7770 dual-channel SCSI controller on EISA slot #11,
+# thus you need to bump this figure to 12 for them.
+options 	EISA_SLOTS=12
+
+#
+# MCA bus:
+#
+# The MCA bus device is `mca'.  It provides auto-detection and
+# configuration support for all devices on the MCA bus.
+# No hints are required for MCA.
+
+device		mca
+
+#
+# PCI bus & PCI options:
+#
+device		pci
+
+#
+# AGP GART support
+device		agp
+
+
+#####################################################################
+# HARDWARE DEVICE CONFIGURATION
+
+#
+# Mandatory devices:
+#
+
+# To include support for VGA VESA video modes
+options 	VESA
+
+# Turn on extra debugging checks and output for VESA support.
+options 	VESA_DEBUG
+
+# The pcvt console driver (vt220 compatible).
+device		vt
+hint.vt.0.at="isa"
+options 	XSERVER			# support for running an X server on vt
+options 	FAT_CURSOR		# start with block cursor
+# This PCVT option is for keyboards such as those used on really old ThinkPads
+options 	PCVT_SCANSET=2
+# Other PCVT options are documented in pcvt(4).
+options 	PCVT_24LINESDEF
+options 	PCVT_CTRL_ALT_DEL
+options 	PCVT_META_ESC
+options 	PCVT_NSCREENS=9
+options 	PCVT_PRETTYSCRNS
+options 	PCVT_SCREENSAVER
+options 	PCVT_USEKBDSEC
+options 	PCVT_VT220KEYB
+options 	PCVT_GREENSAVER
+
+#
+# The Numeric Processing eXtension driver.  In addition to this, you
+# may configure a math emulator (see above).  If your machine has a
+# hardware FPU and the kernel configuration includes the npx device
+# *and* a math emulator compiled into the kernel, the hardware FPU
+# will be used, unless it is found to be broken or unless "flags" to
+# npx0 includes "0x08", which requests preference for the emulator.
+device		npx
+hint.npx.0.flags="0x0"
+hint.npx.0.irq="13"
+
+#
+# `flags' for npx0:
+#	0x01	don't use the npx registers to optimize bcopy.
+#	0x02	don't use the npx registers to optimize bzero.
+#	0x04	don't use the npx registers to optimize copyin or copyout.
+#	0x08	use emulator even if hardware FPU is available.
+# The npx registers are normally used to optimize copying and zeroing when
+# all of the following conditions are satisfied:
+#	I586_CPU is an option
+#	the cpu is an i586 (perhaps not a Pentium)
+#	the probe for npx0 succeeds
+#	INT 16 exception handling works.
+# Then copying and zeroing using the npx registers is normally 30-100% faster.
+# The flags can be used to control cases where it doesn't work or is slower.
+# Setting them at boot time using userconfig works right (the optimizations
+# are not used until later in the bootstrap when npx0 is attached).
+# Flag 0x08 automatically disables the i586 optimized routines.
+#
+
+#
+# Optional devices:
+#
+
+# 3Dfx Voodoo Graphics, Voodoo II /dev/3dfx CDEV support. This will create
+# the /dev/3dfx0 device to work with glide implementations. This should get
+# linked to /dev/3dfx and /dev/voodoo. Note that this is not the same as
+# the tdfx DRI module from XFree86 and is completely unrelated.
+#
+# To enable Linuxulator support, one must also include COMPAT_LINUX in the
+# config as well, or you will not have the dependencies. The other option
+# is to load both as modules.
+
+device 		tdfx			# Enable 3Dfx Voodoo support
+options 	TDFX_LINUX		# Enable Linuxulator support
+
+#
+# ACPI support using the Intel ACPI Component Architecture reference
+# implementation.
+#
+# ACPI_DEBUG enables the use of the debug.acpi.level and debug.acpi.layer
+# kernel environment variables to select initial debugging levels for the
+# Intel ACPICA code.  (Note that the Intel code must also have USE_DEBUGGER
+# defined when it is built).
+#
+# ACPI_MAX_THREADS sets the number of task threads started.
+#
+# ACPI_NO_SEMAPHORES makes the AcpiOs*Semaphore routines a no-op.
+#
+# ACPICA_PEDANTIC enables strict checking of AML.  Our default is to
+# relax these checks to allow code generated by the Microsoft compiler
+# to still execute.
+#
+# Note that building ACPI into the kernel is deprecated; the module is
+# normally loaded automatically by the loader.
+#
+device		acpi
+options 	ACPI_DEBUG
+options 	ACPI_MAX_THREADS=1
+#!options 	ACPI_NO_SEMAPHORES
+#!options 	ACPICA_PEDANTIC
+
+# DRM options:
+# mgadrm:    AGP Matrox G200, G400, G450, G550
+# r128drm:   ATI Rage 128
+# radeondrm: ATI Radeon up to 9000/9100
+# sisdrm:    SiS 300/305,540,630
+# tdfxdrm:   3dfx Voodoo 3/4/5 and Banshee
+# DRM_DEBUG: include debug printfs, very slow
+#
+# mga requires AGP in the kernel, and it is recommended
+# for AGP r128 and radeon cards.
+
+device		mgadrm
+device		"r128drm"
+device		radeondrm
+device		sisdrm
+device		tdfxdrm
+
+options 	DRM_DEBUG
+
+# M-systems DiskOnchip products see src/sys/contrib/dev/fla/README
+device		fla
+hint.fla.0.at="isa"
+
+#
+# mse: Logitech and ATI InPort bus mouse ports
+
+device		mse
+hint.mse.0.at="isa"
+hint.mse.0.port="0x23c"
+hint.mse.0.irq="5"
+
+#
+# Network interfaces:
+#
+
+# ar:   Arnet SYNC/570i hdlc sync 2/4 port V.35/X.21 serial driver
+#       (requires sppp)
+# ath:	Atheros a/b/g WiFi adapters (requires ath_hal and wlan)
+# cx:   Cronyx/Sigma multiport sync/async (with Cisco or PPP framing)
+# ed:   Western Digital and SMC 80xx; Novell NE1000 and NE2000; 3Com 3C503
+#       HP PC Lan+, various PC Card devices (refer to etc/defauls/pccard.conf)
+#       (requires miibus)
+# el:   3Com 3C501 (slow!)
+# ie:   AT&T StarLAN 10 and EN100; 3Com 3C507; unknown NI5210;
+#       Intel EtherExpress
+# le:   Digital Equipment EtherWorks 2 and EtherWorks 3 (DEPCA, DE100,
+#       DE101, DE200, DE201, DE202, DE203, DE204, DE205, DE422)
+# lnc:  Lance/PCnet cards (Isolan, Novell NE2100, NE32-VL, AMD Am7990 and
+#       Am79C960)
+# oltr: Olicom ISA token-ring adapters OC-3115, OC-3117, OC-3118 and OC-3133
+#       (no hints needed).
+#       Olicom PCI token-ring adapters OC-3136, OC-3137, OC-3139, OC-3140,
+#       OC-3141, OC-3540, OC-3250
+# rdp:  RealTek RTL 8002-based pocket ethernet adapters
+# sbni:	Granch SBNI12-xx ISA and PCI adapters
+# sr:   RISCom/N2 hdlc sync 1/2 port V.35/X.21 serial driver (requires sppp)
+# wl:	Lucent Wavelan (ISA card only).
+
+# Order for ISA/EISA devices is important here
+
+device		ar
+hint.ar.0.at="isa"
+hint.ar.0.port="0x300"
+hint.ar.0.irq="10"
+hint.ar.0.maddr="0xd0000"
+device		cx
+hint.cx.0.at="isa"
+hint.cx.0.port="0x240"
+hint.cx.0.irq="15"
+hint.cx.0.drq="7"
+device		ed
+#options 	ED_NO_MIIBUS		# Disable ed miibus support
+hint.ed.0.at="isa"
+hint.ed.0.port="0x280"
+hint.ed.0.irq="5"
+hint.ed.0.maddr="0xd8000"
+device		el	1
+hint.el.0.at="isa"
+hint.el.0.port="0x300"
+hint.el.0.irq="9"
+device		ie			# Hints only required for Starlan
+hint.ie.2.at="isa"
+hint.ie.2.port="0x300"
+hint.ie.2.irq="5"
+hint.ie.2.maddr="0xd0000"
+device		le	1
+hint.le.0.at="isa"
+hint.le.0.port="0x300"
+hint.le.0.irq="5"
+hint.le.0.maddr="0xd0000"
+device		lnc
+hint.lnc.0.at="isa"
+hint.lnc.0.port="0x280"
+hint.lnc.0.irq="10"
+hint.lnc.0.drq="0"
+device		rdp	1
+hint.rdp.0.at="isa"
+hint.rdp.0.port="0x378"
+hint.rdp.0.irq="7"
+hint.rdp.0.flags="2"
+device		sbni
+hint.sbni.0.at="isa"
+hint.sbni.0.port="0x210"
+hint.sbni.0.irq="0xefdead"
+hint.sbni.0.flags="0"
+device		sr
+hint.sr.0.at="isa"
+hint.sr.0.port="0x300"
+hint.sr.0.irq="5"
+hint.sr.0.maddr="0xd0000"
+device		oltr
+hint.oltr.0.at="isa"
+device		wl
+hint.wl.0.at="isa"
+hint.wl.0.port="0x300"
+options 	WLCACHE		# enables the signal-strength cache
+options 	WLDEBUG		# enables verbose debugging output
+
+device		ath
+device		ath_hal		# Atheros HAL (includes binary component)
+#device		wlan		# 802.11 layer
+
+#
+# ATA raid adapters
+#
+device		pst
+
+# 
+# SCSI host adapters:
+# 
+# ncv: NCR 53C500 based SCSI host adapters.
+# nsp: Workbit Ninja SCSI-3 based PC Card SCSI host adapters.
+# stg: TMC 18C30, 18C50 based SCSI host adapters.
+
+device          ncv
+device          nsp
+device          stg
+hint.stg.0.at="isa"
+hint.stg.0.port="0x140"
+hint.stg.0.port="11"
+
+#
+# Adaptec FSA RAID controllers, including integrated DELL controllers,
+# the Dell PERC 2/QC and the HP NetRAID-4M
+device		aac
+device		aacp	# SCSI Passthrough interface (optional, CAM required)
+
+#
+# IBM (now Adaptec) ServeRAID controllers
+device		ips
+
+#
+# SafeNet crypto driver: can be moved to the MI NOTES as soon as
+# it's tested on a big-endian machine
+#
+device		safe		# SafeNet 1141
+options		SAFE_DEBUG	# enable debugging support: hw.safe.debug
+options		SAFE_RNDTEST	# enable rndtest support
+
+#####################################################################
+
+#
+# Miscellaneous hardware:
+#
+# wt: Wangtek and Archive QIC-02/QIC-36 tape drives
+# ctx: Cortex-I frame grabber
+# apm: Laptop Advanced Power Management (experimental)
+# pmtimer: Timer device driver for power management events (APM or ACPI)
+# spigot: The Creative Labs Video Spigot video-acquisition board
+# dgb: Digiboard PC/Xi and PC/Xe series driver (ALPHA QUALITY!)
+# digi: Digiboard driver
+# gp:  National Instruments AT-GPIB and AT-GPIB/TNT board, PCMCIA-GPIB
+# asc: GI1904-based hand scanners, e.g. the Trust Amiscan Grey
+# gsc: Genius GS-4500 hand scanner.
+# spic: Sony Programmable I/O controller (VAIO notebooks)
+# stl: Stallion EasyIO and EasyConnection 8/32 (cd1400 based)
+# stli: Stallion EasyConnection 8/64, ONboard, Brumby (intelligent)
+
+# Notes on APM
+#  The flags takes the following meaning for apm0:
+#    0x0020  Statclock is broken.
+#  If apm is omitted, some systems require sysctl kern.timecounter.method=1
+#  for correct timekeeping.
+
+# Notes on the spigot:
+#  The video spigot is at 0xad6.  This port address can not be changed.
+#  The irq values may only be 10, 11, or 15
+#  I/O memory is an 8kb region.  Possible values are:
+#    0a0000, 0a2000, ..., 0fffff, f00000, f02000, ..., ffffff
+#    The start address must be on an even boundary.
+#  Add the following option if you want to allow non-root users to be able
+#  to access the spigot.  This option is not secure because it allows users
+#  direct access to the I/O page.
+#  	options SPIGOT_UNSECURE
+
+# Notes on the Specialix SI/XIO driver:
+#  The host card is memory, not IO mapped.
+#  The Rev 1 host cards use a 64K chunk, on a 32K boundary.
+#  The Rev 2 host cards use a 32K chunk, on a 32K boundary.
+#  The cards can use an IRQ of 11, 12 or 15.
+
+# Notes on the Sony Programmable I/O controller
+#  This is a temporary driver that should someday be replaced by something
+#  that hooks into the ACPI layer. The device is hooked to the PIIX4's
+#  General Device 10 decoder, which means you have to fiddle with PCI
+#  registers to map it in, even though it is otherwise treated here as
+#  an ISA device. At the moment, the driver polls, although the device
+#  is capable of generating interrupts. It largely undocumented.
+#  The port location in the hint is where you WANT the device to be
+#  mapped. 0x10a0 seems to be traditional. At the moment the jogdial
+#  is the only thing truly supported, but aparently a fair percentage
+#  of the Vaio extra features are controlled by this device.
+
+# Notes on the Stallion stl and stli drivers:
+#  See src/i386/isa/README.stl for complete instructions.
+#  This is version 0.0.5alpha, unsupported by Stallion.
+#  The stl driver has a secondary IO port hard coded at 0x280.  You need
+#     to change src/i386/isa/stallion.c if you reconfigure this on the boards.
+#  The "flags" and "msize" settings on the stli driver depend on the board:
+#	EasyConnection 8/64 ISA:     flags 23         msize 0x1000
+#	EasyConnection 8/64 EISA:    flags 24         msize 0x10000
+#	EasyConnection 8/64 MCA:     flags 25         msize 0x1000
+#	ONboard ISA:                 flags 4          msize 0x10000
+#	ONboard EISA:                flags 7          msize 0x10000
+#	ONboard MCA:                 flags 3          msize 0x10000
+#	Brumby:                      flags 2          msize 0x4000
+#	Stallion:                    flags 1          msize 0x10000
+
+# Notes on the Digiboard PC/Xi and PC/Xe series driver
+#               
+# The NDGBPORTS option specifies the number of ports controlled by the
+# dgb(4) driver.  The default value is 16 ports per device.
+#
+# The following flag values have special meanings in dgb:
+#	0x01 - alternate layout of pins
+#	0x02 - use the windowed PC/Xe in 64K mode
+
+device		wt	1
+hint.wt.0.at="isa"
+hint.wt.0.port="0x300"
+hint.wt.0.irq="5"
+hint.wt.0.drq="1"
+device		ctx
+hint.ctx.0.at="isa"
+hint.ctx.0.port="0x230"
+hint.ctx.0.maddr="0xd0000"
+device		spigot	1
+hint.spigot.0.at="isa"
+hint.spigot.0.port="0xad6"
+hint.spigot.0.irq="15"
+hint.spigot.0.maddr="0xee000"
+device		apm
+hint.apm.0.flags="0x20"
+device		pmtimer			# Adjust system timer at wakeup time
+device		gp
+hint.gp.0.at="isa"
+hint.gp.0.port="0x2c0"
+device		gsc	1
+hint.gsc.0.at="isa"
+hint.gsc.0.port="0x270"
+hint.gsc.0.drq="3"
+device		dgb	  1
+options		NDGBPORTS=17
+hint.dgb.0.at="isa"
+hint.dgb.0.port="0x220"
+hint.dgb.0.maddr="0xfc000"
+device		digi
+hint.digi.0.at="isa"
+hint.digi.0.port="0x104"
+hint.digi.0.maddr="0xd0000"
+# BIOS & FEP/OS components of device digi.
+device		digi_CX
+device		digi_CX_PCI
+device		digi_EPCX
+device		digi_EPCX_PCI
+device		digi_Xe
+device		digi_Xem
+device		digi_Xr
+device		asc	1
+hint.asc.0.at="isa"
+hint.asc.0.port="0x3EB"
+hint.asc.0.drq="3"
+hint.asc.0.irq="10"
+device		spic
+hint.spic.0.at="isa"
+hint.spic.0.port="0x10a0"
+device		stl
+hint.stl.0.at="isa"
+hint.stl.0.port="0x2a0"
+hint.stl.0.irq="10"
+device		stli
+hint.stli.0.at="isa"
+hint.stli.0.port="0x2a0"
+hint.stli.0.maddr="0xcc000"
+hint.stli.0.flags="23"
+hint.stli.0.msize="0x1000"
+# You are unlikely to have the hardware for loran <phk@FreeBSD.org>
+device		loran
+hint.loran.0.at="isa"
+hint.loran.0.irq="5"
+# HOT1 Xilinx 6200 card (http://www.vcc.com/)
+device		xrpu
+
+#
+# Laptop/Notebook options:
+#
+# See also:
+#  apm under `Miscellaneous hardware'
+# above.
+
+# For older notebooks that signal a powerfail condition (external
+# power supply dropped, or battery state low) by issuing an NMI:
+
+options 	POWERFAIL_NMI	# make it beep instead of panicing
+
+#
+# I2C Bus
+#
+# Philips i2c bus support is provided by the `iicbus' device.
+#
+# Supported interfaces:
+# pcf	Philips PCF8584 ISA-bus controller
+#
+device		pcf
+hint.pcf.0.at="isa"
+hint.pcf.0.port="0x320"
+hint.pcf.0.irq="5"
+
+#---------------------------------------------------------------------------
+# ISDN4BSD
+#
+# See /usr/share/examples/isdn/ROADMAP for an introduction to isdn4bsd.
+#
+# i4b passive ISDN cards support contains the following hardware drivers:
+#
+#	isic  - Siemens/Infineon ISDN ISAC/HSCX/IPAC chipset driver
+#	iwic  - Winbond W6692 PCI bus ISDN S/T interface controller
+#	ifpi  - AVM Fritz!Card PCI driver
+#	ifpi2  - AVM Fritz!Card PCI version 2 driver
+#	ihfc  - Cologne Chip HFC ISA/ISA-PnP chipset driver
+#	ifpnp - AVM Fritz!Card PnP driver 
+#	itjc  - Siemens ISAC / TJNet Tiger300/320 chipset
+#
+# i4b active ISDN cards support contains the following hardware drivers:
+#
+#	iavc  - AVM B1 PCI, AVM B1 ISA, AVM T1
+#
+# Note that the ``options'' (if given) and ``device'' lines must BOTH
+# be uncommented to enable support for a given card !
+#
+# In addition to a hardware driver (and probably an option) the mandatory
+# ISDN protocol stack devices and the mandatory support device must be 
+# enabled as well as one or more devices from the optional devices section.
+#
+#---------------------------------------------------------------------------
+#	isic driver (Siemens/Infineon chipsets)
+#
+device	isic
+#
+# ISA bus non-PnP Cards:
+# ----------------------
+#
+# Teles S0/8 or Niccy 1008
+options 	TEL_S0_8
+hint.isic.0.at="isa"
+hint.isic.0.maddr="0xd0000"
+hint.isic.0.irq="5"
+hint.isic.0.flags="1"
+#
+# Teles S0/16 or Creatix ISDN-S0 or Niccy 1016
+options 	TEL_S0_16
+hint.isic.0.at="isa"
+hint.isic.0.port="0xd80"
+hint.isic.0.maddr="0xd0000"
+hint.isic.0.irq="5"
+hint.isic.0.flags="2"
+#
+# Teles S0/16.3
+options 	TEL_S0_16_3
+hint.isic.0.at="isa"
+hint.isic.0.port="0xd80"
+hint.isic.0.irq="5"
+hint.isic.0.flags="3"
+#
+# AVM A1 or AVM Fritz!Card
+options 	AVM_A1
+hint.isic.0.at="isa"
+hint.isic.0.port="0x340"
+hint.isic.0.irq="5"
+hint.isic.0.flags="4"
+#
+# USRobotics Sportster ISDN TA intern
+options 	USR_STI
+hint.isic.0.at="isa"
+hint.isic.0.port="0x268"
+hint.isic.0.irq="5"
+hint.isic.0.flags="7"
+#
+# ITK ix1 Micro ( < V.3, non-PnP version )
+options 	ITKIX1
+hint.isic.0.at="isa"
+hint.isic.0.port="0x398"
+hint.isic.0.irq="10"
+hint.isic.0.flags="18"
+#
+# ELSA PCC-16
+options 	ELSA_PCC16
+hint.isic.0.at="isa"
+hint.isic.0.port="0x360"
+hint.isic.0.irq="10"
+hint.isic.0.flags="20"
+#
+# ISA bus PnP Cards:
+# ------------------
+#
+# Teles S0/16.3 PnP
+options 	TEL_S0_16_3_P
+#
+# Creatix ISDN-S0 P&P
+options 	CRTX_S0_P
+#
+# Dr. Neuhaus Niccy Go@
+options 	DRN_NGO
+#
+# Sedlbauer Win Speed
+options 	SEDLBAUER
+#
+# Dynalink IS64PH
+options 	DYNALINK 
+#
+# ELSA QuickStep 1000pro ISA
+options 	ELSA_QS1ISA
+#
+# Siemens I-Surf 2.0
+options 	SIEMENS_ISURF2
+#
+# Asuscom ISDNlink 128K ISA
+options 	ASUSCOM_IPAC
+#
+# Eicon Diehl DIVA 2.0 and 2.02
+options 	EICON_DIVA
+#
+# Compaq Microcom 610 ISDN card (Compaq series PSB2222I)
+options 	COMPAQ_M610
+#
+# PCI bus Cards:
+# --------------
+#
+# Cyclades Cyclom-Y PCI serial driver
+device		cy	1
+options 	CY_PCI_FASTINTR		# Use with cy_pci unless irq is shared
+hint.cy.0.at="isa"
+hint.cy.0.irq="10"
+hint.cy.0.maddr="0xd4000"
+hint.cy.0.msize="0x2000"
+#
+#---------------------------------------------------------------------------
+# ELSA MicroLink ISDN/PCI (same as ELSA QuickStep 1000pro PCI)
+options 	ELSA_QS1PCI
+#
+#
+#---------------------------------------------------------------------------
+#	ifpnp driver for AVM Fritz!Card PnP
+#
+# AVM Fritz!Card PnP
+device ifpnp
+#
+#---------------------------------------------------------------------------
+#	ihfc driver for Cologne Chip ISA chipsets (experimental!)
+#
+# Teles 16.3c ISA PnP
+# AcerISDN P10 ISA PnP
+# TELEINT ISDN SPEED No.1
+device ihfc
+#
+#---------------------------------------------------------------------------
+#	ifpi driver for AVM Fritz!Card PCI
+#
+# AVM Fritz!Card PCI
+device  ifpi
+#
+#---------------------------------------------------------------------------
+#	ifpi2 driver for AVM Fritz!Card PCI version 2
+#
+# AVM Fritz!Card PCI version 2
+device  "ifpi2"
+#
+#---------------------------------------------------------------------------
+#	iwic driver for Winbond W6692 chipset
+#
+# ASUSCOM P-IN100-ST-D (and other Winbond W6692 based cards)
+device  iwic
+#
+#---------------------------------------------------------------------------
+#	itjc driver for Simens ISAC / TJNet Tiger300/320 chipset
+#
+# Traverse Technologies NETjet-S
+# Teles PCI-TJ
+device  itjc
+#
+#---------------------------------------------------------------------------
+#	iavc driver (AVM active cards, needs i4bcapi driver!)
+#
+device	iavc
+#
+# AVM B1 ISA bus (PnP mode not supported!)
+# ----------------------------------------
+hint.iavc.0.at="isa"
+hint.iavc.0.port="0x150"
+hint.iavc.0.irq="5"
+#
+#---------------------------------------------------------------------------
+#	ISDN Protocol Stack - mandatory for all hardware drivers
+#
+# Q.921 / layer 2 - i4b passive cards D channel handling
+device		"i4bq921"
+#
+# Q.931 / layer 3 - i4b passive cards D channel handling
+device		"i4bq931"
+#
+# layer 4 - i4b common passive and active card handling
+device		"i4b"
+#
+#---------------------------------------------------------------------------
+#	ISDN devices - mandatory for all hardware drivers
+#
+# userland driver to do ISDN tracing (for passive cards only)
+device		"i4btrc"	4
+#
+# userland driver to control the whole thing
+device		"i4bctl"
+#
+#---------------------------------------------------------------------------
+#	ISDN devices - optional
+#
+# userland driver for access to raw B channel
+device		"i4brbch"	4
+#
+# userland driver for telephony
+device		"i4btel"	2
+#
+# network driver for IP over raw HDLC ISDN
+device		"i4bipr"	4
+# enable VJ header compression detection for ipr i/f
+options 	IPR_VJ
+# enable logging of the first n IP packets to isdnd (n=32 here)
+options 	IPR_LOG=32
+#
+# network driver for sync PPP over ISDN; requires an equivalent
+# number of sppp device to be configured
+device		"i4bisppp"	4
+#
+# B-channel interface to the netgraph subsystem
+device		"i4bing"	2
+#
+# CAPI driver needed for active ISDN cards (see iavc driver above)
+device		"i4bcapi"
+#
+#---------------------------------------------------------------------------
+
+#
+# Set the number of PV entries per process.  Increasing this can
+# stop panics related to heavy use of shared memory. However, that can
+# (combined with large amounts of physical memory) cause panics at
+# boot time due the kernel running out of VM space.
+#
+# If you're tweaking this, you might also want to increase the sysctls
+# "vm.v_free_min", "vm.v_free_reserved", and "vm.v_free_target".
+#
+# The value below is the one more than the default.
+#
+options 	PMAP_SHPGPERPROC=201
+
+#
+# Change the size of the kernel virtual address space.  Due to
+# constraints in loader(8) on i386, this must be a multiple of 4.
+# 256 = 1 GB of kernel address space.  Increasing this also causes
+# a reduction of the address space in user processes.  512 splits
+# the 4GB cpu address space in half (2GB user, 2GB kernel).
+#
+options 	KVA_PAGES=260
+
+
+#####################################################################
+# ABI Emulation
+
+# Enable iBCS2 runtime support for SCO and ISC binaries
+options 	IBCS2
+
+# Emulate spx device for client side of SVR3 local X interface
+options 	SPX_HACK
+
+# Enable Linux ABI emulation
+options 	COMPAT_LINUX
+
+# Enable i386 a.out binary support
+options 	COMPAT_AOUT
+
+# Enable the linux-like proc filesystem support (requires COMPAT_LINUX
+# and PSEUDOFS)
+options 	LINPROCFS
+
+#
+# SysVR4 ABI emulation
+#
+# The svr4 ABI emulator can be statically compiled into the kernel or loaded as
+# a KLD module.  
+# The STREAMS network emulation code can also be compiled statically or as a 
+# module.  If loaded as a module, it must be loaded before the svr4 module
+# (the /usr/sbin/svr4 script does this for you).  If compiling statically,
+# the `streams' device must be configured into any kernel which also
+# specifies COMPAT_SVR4.  It is possible to have a statically-configured 
+# STREAMS device and a dynamically loadable svr4 emulator;  the /usr/sbin/svr4
+# script understands that it doesn't need to load the `streams' module under
+# those circumstances.
+# Caveat:  At this time, `options KTRACE' is required for the svr4 emulator
+# (whether static or dynamic).  
+# 
+options 	COMPAT_SVR4	# build emulator statically
+options 	DEBUG_SVR4	# enable verbose debugging
+device		streams		# STREAMS network driver (required for svr4).
+
+
+#####################################################################
+# VM OPTIONS
+
+# Disable the 4 MByte page PSE CPU feature.  The PSE feature allows the
+# kernel to use a 4 MByte pages to map the kernel instead of 4k pages.
+# This saves on the amount of memory needed for page tables needed to
+# map the kernel.  You should only disable this feature as a temporary
+# workaround if you are having problems with it enabled.
+#
+#options 	DISABLE_PSE
+
+# Disable the global pages PGE CPU feature.  The PGE feature allows pages
+# to be marked with the PG_G bit.  TLB entries for these pages are not
+# flushed from the cache when %cr3 is reloaded.  This can make context
+# switches less expensive.  You should only disable this feature as a
+# temporary workaround if you are having problems with it enabled.
+#
+#options 	DISABLE_PG_G
+
+# KSTACK_PAGES is the number of memory pages to assign to the kernel
+# stack of each thread.
+
+options 	KSTACK_PAGES=3
+
+#####################################################################
+
+# More undocumented options for linting.
+# Note that documenting these are not considered an affront.
+
+options 	FB_INSTALL_CDEV		# install a CDEV entry in /dev
+
+# PECOFF module (Win32 Execution Format)
+options 	PECOFF_SUPPORT
+options 	PECOFF_DEBUG
+
+options 	ENABLE_ALART
+options 	I4B_SMP_WORKAROUND
+options 	I586_PMC_GUPROF=0x70000
+options 	KBDIO_DEBUG=2
+options 	KBD_MAXRETRY=4
+options 	KBD_MAXWAIT=6
+options 	KBD_RESETDELAY=201
+
+options 	PSM_DEBUG=1
+
+options 	TIMER_FREQ=((14318182+6)/12)
+
+options 	VM_KMEM_SIZE
+options 	VM_KMEM_SIZE_MAX
+options 	VM_KMEM_SIZE_SCALE
diff --git a/freebsd-5.3-xen-sparse/i386-xen/conf/OLDCARD b/freebsd-5.3-xen-sparse/i386-xen/conf/OLDCARD
new file mode 100644
index 0000000000..2d13fbe2b5
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/conf/OLDCARD
@@ -0,0 +1,17 @@
+#
+# OLDCARD -- Generic kernel configuration file for FreeBSD/i386
+#            using the OLDCARD pccard system.
+#
+# $FreeBSD: src/sys/i386/conf/OLDCARD,v 1.18 2003/02/15 02:39:13 ru Exp $
+
+include GENERIC
+
+ident		OLDCARD
+
+# PCCARD (PCMCIA) support
+nodevice	cbb		# cardbus (yenta) bridge
+#nodevice	pcic		# ExCA ISA and PCI bridges
+nodevice	pccard		# PC Card (16-bit) bus
+nodevice	cardbus		# CardBus (32-bit) bus
+device		card	1	# pccard bus
+device		pcic		# PCMCIA bridge
diff --git a/freebsd-5.3-xen-sparse/i386-xen/conf/PAE b/freebsd-5.3-xen-sparse/i386-xen/conf/PAE
new file mode 100644
index 0000000000..98d4f2c252
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/conf/PAE
@@ -0,0 +1,99 @@
+#
+# PAE -- Generic kernel configuration file for FreeBSD/i386 PAE
+#
+# $FreeBSD: src/sys/i386/conf/PAE,v 1.8 2003/11/03 22:49:19 jhb Exp $
+
+include GENERIC
+
+ident		PAE-GENERIC
+
+# To make a PAE kernel, the next option is needed
+options		PAE			# Physical Address Extensions Kernel
+
+# Compile acpi in statically since the module isn't built properly.  Most
+# machines which support large amounts of memory require acpi.
+device		acpi
+
+# Don't build modules with this kernel config, since they are not built with
+# the correct options headers.
+makeoptions	NO_MODULES=yes
+
+# What follows is a list of drivers that are normally in GENERIC, but either
+# don't work or are untested with PAE.  Be very careful before enabling any
+# of these drivers.  Drivers which use DMA and don't handle 64 bit physical
+# address properly may cause data corruption when used in a machine with more
+# than 4 gigabytes of memory.
+
+nodevice	ahb
+nodevice	amd
+nodevice	isp
+nodevice	sym
+nodevice	trm
+
+nodevice	adv
+nodevice	adw
+nodevice	aha
+nodevice	aic
+nodevice	bt
+
+nodevice	ncv
+nodevice	nsp
+nodevice	stg
+
+nodevice	asr
+nodevice	dpt
+nodevice	iir
+nodevice	mly
+
+nodevice	amr
+nodevice	ida
+nodevice	mlx
+nodevice	pst
+
+nodevice	agp
+
+nodevice	de
+nodevice	txp
+nodevice	vx
+
+nodevice	dc
+nodevice	pcn
+nodevice	rl
+nodevice	sf
+nodevice	sis
+nodevice	ste
+nodevice	tl
+nodevice	tx
+nodevice	vr
+nodevice	wb
+
+nodevice	cs
+nodevice	ed
+nodevice	ex
+nodevice	ep
+nodevice	fe
+nodevice	ie
+nodevice	lnc
+nodevice	sn
+nodevice	xe
+
+nodevice	wlan
+nodevice	an
+nodevice	awi
+nodevice	wi
+
+nodevice	uhci
+nodevice	ohci
+nodevice	usb
+nodevice	ugen
+nodevice	uhid
+nodevice	ukbd
+nodevice	ulpt
+nodevice	umass
+nodevice	ums
+nodevice	urio
+nodevice	uscanner
+nodevice	aue
+nodevice	axe
+nodevice	cue
+nodevice	kue
diff --git a/freebsd-5.3-xen-sparse/i386-xen/conf/XENCONF b/freebsd-5.3-xen-sparse/i386-xen/conf/XENCONF
new file mode 100644
index 0000000000..4214b1c59b
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/conf/XENCONF
@@ -0,0 +1,137 @@
+#
+# GENERIC -- Generic kernel configuration file for FreeBSD/i386
+#
+# For more information on this file, please read the handbook section on
+# Kernel Configuration Files:
+#
+#    http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html
+#
+# The handbook is also available locally in /usr/share/doc/handbook
+# if you've installed the doc distribution, otherwise always see the
+# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the
+# latest information.
+#
+# An exhaustive list of options and more detailed explanations of the
+# device lines is also present in the ../../conf/NOTES and NOTES files. 
+# If you are in doubt as to the purpose or necessity of a line, check first 
+# in NOTES.
+#
+# $FreeBSD: src/sys/i386/conf/GENERIC,v 1.394.2.3 2004/01/26 19:42:11 nectar Exp $
+
+machine		i386-xen
+cpu		I686_CPU
+ident		XEN
+
+#To statically compile in device wiring instead of /boot/device.hints
+#hints		"GENERIC.hints"		#Default places to look for devices.
+
+makeoptions	DEBUG=-g		#Build kernel with gdb(1) debug symbols
+
+options 	SCHED_4BSD		#4BSD scheduler
+options 	INET			#InterNETworking
+options 	INET6			#IPv6 communications protocols
+options 	FFS			#Berkeley Fast Filesystem
+options 	SOFTUPDATES		#Enable FFS soft updates support
+options 	UFS_ACL			#Support for access control lists
+options 	UFS_DIRHASH		#Improve performance on big directories
+options 	MD_ROOT			#MD is a potential root device
+options 	NFSCLIENT		#Network Filesystem Client
+options 	NFSSERVER		#Network Filesystem Server
+# options 	NFS_ROOT		#NFS usable as /, requires NFSCLIENT
+#options 	MSDOSFS			#MSDOS Filesystem
+#options 	CD9660			#ISO 9660 Filesystem
+options 	PROCFS			#Process filesystem (requires PSEUDOFS)
+options 	PSEUDOFS		#Pseudo-filesystem framework
+options 	COMPAT_43		#Compatible with BSD 4.3 [KEEP THIS!]
+options 	COMPAT_FREEBSD4		#Compatible with FreeBSD4
+options 	SCSI_DELAY=15000	#Delay (in ms) before probing SCSI
+options 	KTRACE			#ktrace(1) support
+options 	SYSVSHM			#SYSV-style shared memory
+options 	SYSVMSG			#SYSV-style message queues
+options 	SYSVSEM			#SYSV-style semaphores
+options 	_KPOSIX_PRIORITY_SCHEDULING #Posix P1003_1B real-time extensions
+options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
+options		CPU_DISABLE_SSE		# don't turn on SSE framework with Xen
+#options 	PFIL_HOOKS		# pfil(9) framework
+
+# Debugging for use in -current
+options 	KDB			#Enable the kernel debugger
+options 	INVARIANTS		#Enable calls of extra sanity checking
+options 	INVARIANT_SUPPORT	#Extra sanity checks of internal structures, required by INVARIANTS
+#options 	WITNESS			#Enable checks to detect deadlocks and cycles
+#options 	WITNESS_SKIPSPIN	#Don't run witness on spinlocks for speed
+
+# To make an SMP kernel, the next two are needed
+#options 	SMP		# Symmetric MultiProcessor Kernel
+#device		apic		# I/O APIC
+
+# SCSI peripherals
+device		scbus		# SCSI bus (required for SCSI)
+#device		ch		# SCSI media changers
+device		da		# Direct Access (disks)
+#device		sa		# Sequential Access (tape etc)
+#device		cd		# CD
+device		pass		# Passthrough device (direct SCSI access)
+#device		ses		# SCSI Environmental Services (and SAF-TE)
+
+# atkbdc0 controls both the keyboard and the PS/2 mouse
+#device		atkbdc		# AT keyboard controller
+#device		atkbd		# AT keyboard
+#device		psm		# PS/2 mouse
+
+# device		vga	# VGA video card driver
+
+#device		splash		# Splash screen and screen saver support
+
+# syscons is the default console driver, resembling an SCO console
+#device		sc
+
+# Enable this for the pcvt (VT220 compatible) console driver
+#device		vt
+#options 	XSERVER			# support for X server on a vt console
+#options 	FAT_CURSOR		# start with block cursor
+
+#device		agp		# support several AGP chipsets
+
+# Floating point support - do not disable. 
+device		npx
+
+# Serial (COM) ports
+#device		sio		# 8250, 16[45]50 based serial ports
+
+# Parallel port
+#device		ppc
+#device		ppbus		# Parallel port bus (required)
+#device		lpt		# Printer
+#device		plip		# TCP/IP over parallel
+#device		ppi		# Parallel port interface device
+#device		vpo		# Requires scbus and da
+
+# If you've got a "dumb" serial or parallel PCI card that is
+# supported by the puc(4) glue driver, uncomment the following
+# line to enable it (connects to the sio and/or ppc drivers):
+#device         puc
+
+
+# Pseudo devices - the number indicates how many units to allocate.
+device		random		# Entropy device
+device		loop		# Network loopback
+device		ether		# Ethernet support
+device		tun		# Packet tunnel.
+device		pty		# Pseudo-ttys (telnet etc)
+device		md		# Memory "disks"
+device		gif		# IPv6 and IPv4 tunneling
+device		faith		# IPv6-to-IPv4 relaying (translation)
+
+# The `bpf' device enables the Berkeley Packet Filter.
+# Be aware of the administrative consequences of enabling this!
+device		bpf		# Berkeley packet filter
+
+#options		BOOTP
+options		XEN
+options		MCLSHIFT=12	# this has to be enabled for Xen as we can only have one cluster per page
+options		MSIZE=256 
+options 	DIAGNOSTIC
+options		MAXMEM=(256*1024)
+options		NOXENDEBUG=1 		# Turn off Debugging printfs
+
diff --git a/freebsd-5.3-xen-sparse/i386-xen/conf/gethints.awk b/freebsd-5.3-xen-sparse/i386-xen/conf/gethints.awk
new file mode 100644
index 0000000000..e8cc6b67de
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/conf/gethints.awk
@@ -0,0 +1,116 @@
+#! /usr/bin/awk -f
+#
+# This is a transition aid. It extracts old-style configuration information
+# from a config file and writes an equivalent device.hints file to stdout.
+# You can use that with loader(8) or statically compile it in with the
+# 'hints' directive.  See how GENERIC and GENERIC.hints fit together for
+# a static example.  You should use loader(8) if at all possible.
+#
+# $FreeBSD: src/sys/i386/conf/gethints.awk,v 1.2 2002/07/26 03:52:30 peter Exp $
+
+# skip commented lines, empty lines and not "device" lines
+/^[ \t]*#/ || /^[ \t]*$/ || !/[ \t]*device/ { next; }
+
+# input format :
+#    device <name><unit> at <controler>[?] [key [val]]...
+# possible keys are :
+#    disable, port #, irq #, drq #, drive #, iomem #, iosiz #,
+#    flags #, bus #, target #, unit #.
+# output format :
+#    hint.<name>.<unit>.<key>=<val>
+# mapped keys are :
+#    iomem -> maddr, iosiz -> msize.
+{
+	gsub ("#.*", "");		# delete comments
+	gsub ("\"", "");		# and double-quotes
+	nameunit = $2;			# <name><unit>
+	at = $3;			# at
+	controler = $4;			# <controler>[?]
+	rest = 5;			# optional keys begin at indice 5
+	if (at != "at" || controler == "")
+		next;			# skip devices w/o controlers
+	name = nameunit;
+	sub ("[0-9]*$", "", name);	# get the name
+	unit = nameunit;
+	sub ("^" name, "", unit);	# and the unit
+	sub ("\?$", "", controler);
+	printf "hint.%s.%s.at=\"%s\"\n", name, unit, controler;
+	# for each keys, if any ?
+	for (key = $rest; rest <= NF; key = $(++rest)) {
+		# skip auto-detect keys (the one w/ a ?)
+		if (key == "port?" || key == "drq?" || key == "irq?" || \
+		    key == "iomem?" || key == "iosiz?")
+			continue;
+		# disable has no value, so, give it one
+		if (key == "disable") {
+			printf "hint.%s.%s.disabled=\"1\"\n", name, unit;
+			continue;
+		}
+		# recognized keys
+		if (key == "port" || key == "irq" || key == "drq" || \
+		    key == "drive" || key == "iomem" || key == "iosiz" || \
+		    key == "flags" || key == "bus" || key == "target" || \
+		    key == "unit") {
+			val = $(++rest);
+			if (val == "?")	# has above
+				continue;
+			if (key == "port") {
+				# map port macros to static values
+				sub ("IO_AHA0", "0x330", val);
+				sub ("IO_AHA1", "0x334", val);
+				sub ("IO_ASC1", "0x3EB", val);
+				sub ("IO_ASC2", "0x22B", val);
+				sub ("IO_ASC3", "0x26B", val);
+				sub ("IO_ASC4", "0x2AB", val);
+				sub ("IO_ASC5", "0x2EB", val);
+				sub ("IO_ASC6", "0x32B", val);
+				sub ("IO_ASC7", "0x36B", val);
+				sub ("IO_ASC8", "0x3AB", val);
+				sub ("IO_BT0", "0x330", val);
+				sub ("IO_BT1", "0x334", val);
+				sub ("IO_CGA", "0x3D0", val);
+				sub ("IO_COM1", "0x3F8", val);
+				sub ("IO_COM2", "0x2F8", val);
+				sub ("IO_COM3", "0x3E8", val);
+				sub ("IO_COM4", "0x2E8", val);
+				sub ("IO_DMA1", "0x000", val);
+				sub ("IO_DMA2", "0x0C0", val);
+				sub ("IO_DMAPG", "0x080", val);
+				sub ("IO_FD1", "0x3F0", val);
+				sub ("IO_FD2", "0x370", val);
+				sub ("IO_GAME", "0x201", val);
+				sub ("IO_GSC1", "0x270", val);
+				sub ("IO_GSC2", "0x2E0", val);
+				sub ("IO_GSC3", "0x370", val);
+				sub ("IO_GSC4", "0x3E0", val);
+				sub ("IO_ICU1", "0x020", val);
+				sub ("IO_ICU2", "0x0A0", val);
+				sub ("IO_KBD", "0x060", val);
+				sub ("IO_LPT1", "0x378", val);
+				sub ("IO_LPT2", "0x278", val);
+				sub ("IO_LPT3", "0x3BC", val);
+				sub ("IO_MDA", "0x3B0", val);
+				sub ("IO_NMI", "0x070", val);
+				sub ("IO_NPX", "0x0F0", val);
+				sub ("IO_PMP1", "0x026", val);
+				sub ("IO_PMP2", "0x178", val);
+				sub ("IO_PPI", "0x061", val);
+				sub ("IO_RTC", "0x070", val);
+				sub ("IO_TIMER1", "0x040", val);
+				sub ("IO_TIMER2", "0x048", val);
+				sub ("IO_UHA0", "0x330", val);
+				sub ("IO_VGA", "0x3C0", val);
+				sub ("IO_WD1", "0x1F0", val);
+				sub ("IO_WD2", "0x170", val);
+			} else {
+				# map key names
+				sub ("iomem", "maddr", key);
+				sub ("iosiz", "msize", key);
+			}
+			printf "hint.%s.%s.%s=\"%s\"\n", name, unit, key, val;
+			continue;
+		}
+		printf ("unrecognized config token '%s:%s' on line %s\n",
+			rest, key, NR); # > "/dev/stderr";
+	}
+}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c
new file mode 100644
index 0000000000..393e091986
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c
@@ -0,0 +1,511 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz and Don Ahn.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)clock.c	7.2 (Berkeley) 5/12/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/isa/clock.c,v 1.207 2003/11/13 10:02:12 phk Exp $");
+
+/* #define DELAYDEBUG */
+/*
+ * Routines to handle clock hardware.
+ */
+
+/*
+ * inittodr, settodr and support routines written
+ * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at>
+ *
+ * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94
+ */
+
+#include "opt_clock.h"
+#include "opt_isa.h"
+#include "opt_mca.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/time.h>
+#include <sys/timetc.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/sysctl.h>
+#include <sys/cons.h>
+#include <sys/power.h>
+
+#include <machine/clock.h>
+#include <machine/cputypes.h>
+#include <machine/frame.h>
+#include <machine/intr_machdep.h>
+#include <machine/md_var.h>
+#include <machine/psl.h>
+#if defined(SMP)
+#include <machine/smp.h>
+#endif
+#include <machine/specialreg.h>
+
+#include <i386/isa/icu.h>
+#include <i386/isa/isa.h>
+#include <isa/rtc.h>
+#include <i386/isa/timerreg.h>
+
+/* XEN specific defines */
+#include <machine/xen_intr.h>
+
+/*
+ * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
+ * can use a simple formula for leap years.
+ */
+#define	LEAPYEAR(y) (((u_int)(y) % 4 == 0) ? 1 : 0)
+#define DAYSPERYEAR   (31+28+31+30+31+30+31+31+30+31+30+31)
+
+int	adjkerntz;		/* local offset from GMT in seconds */
+int	clkintr_pending;
+int	disable_rtc_set = 1;	/* disable resettodr() if != 0 */
+int	pscnt = 1;
+int	psdiv = 1;
+int	statclock_disable;
+#ifndef TIMER_FREQ
+#define TIMER_FREQ   1193182
+#endif
+u_int	timer_freq = TIMER_FREQ;
+
+static	const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
+
+/* Values for timerX_state: */
+#define	RELEASED	0
+#define	RELEASE_PENDING	1
+#define	ACQUIRED	2
+#define	ACQUIRE_PENDING	3
+
+/* Cached *multiplier* to convert TSC counts to microseconds.
+ * (see the equation below).
+ * Equal to 2^32 * (1 / (clocks per usec) ).
+ * Initialized in time_init.
+ */
+static unsigned long fast_gettimeoffset_quotient;
+
+/* These are peridically updated in shared_info, and then copied here. */
+static uint32_t shadow_tsc_stamp;
+static uint64_t shadow_system_time;
+static uint32_t shadow_time_version;
+static struct timeval shadow_tv;
+
+static uint64_t processed_system_time;/* System time (ns) at last processing. */
+
+#define NS_PER_TICK (1000000000ULL/hz)
+
+/* convert from cycles(64bits) => nanoseconds (64bits)
+ *  basic equation:
+ *		ns = cycles / (freq / ns_per_sec)
+ *		ns = cycles * (ns_per_sec / freq)
+ *		ns = cycles * (10^9 / (cpu_mhz * 10^6))
+ *		ns = cycles * (10^3 / cpu_mhz)
+ *
+ *	Then we use scaling math (suggested by george@mvista.com) to get:
+ *		ns = cycles * (10^3 * SC / cpu_mhz) / SC
+ *		ns = cycles * cyc2ns_scale / SC
+ *
+ *	And since SC is a constant power of two, we can convert the div
+ *  into a shift.   
+ *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
+ */
+static unsigned long cyc2ns_scale; 
+#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
+
+static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
+{
+	cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
+}
+
+static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+	return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+}
+
+/*
+ * Reads a consistent set of time-base values from Xen, into a shadow data
+ * area. Must be called with the xtime_lock held for writing.
+ */
+static void __get_time_values_from_xen(void)
+{
+	shared_info_t *s = HYPERVISOR_shared_info;
+
+	do {
+		shadow_time_version = s->time_version2;
+		rmb();
+		shadow_tv.tv_sec    = s->wc_sec;
+		shadow_tv.tv_usec   = s->wc_usec;
+		shadow_tsc_stamp    = (uint32_t)s->tsc_timestamp;
+		shadow_system_time  = s->system_time;
+		rmb();
+	}
+	while (shadow_time_version != s->time_version1);
+}
+
+#define TIME_VALUES_UP_TO_DATE \
+	(shadow_time_version == HYPERVISOR_shared_info->time_version2)
+
+static	void	(*timer_func)(struct clockframe *frame) = hardclock;
+
+static	unsigned xen_get_offset(void);
+static	unsigned xen_get_timecount(struct timecounter *tc);
+
+static struct timecounter xen_timecounter = {
+	xen_get_timecount,	/* get_timecount */
+	0,			/* no poll_pps */
+	~0u,			/* counter_mask */
+	0,			/* frequency */
+	"ixen",			/* name */
+	0			/* quality */
+};
+
+
+static void 
+clkintr(struct clockframe *frame)
+{
+    int64_t delta;
+    long ticks = 0;
+
+
+    do {
+    	__get_time_values_from_xen();
+    	delta = (int64_t)(shadow_system_time + 
+			  xen_get_offset() * 1000 - 
+			  processed_system_time);
+    } while (!TIME_VALUES_UP_TO_DATE);
+
+    if (unlikely(delta < 0)) {
+        printk("Timer ISR: Time went backwards: %lld\n", delta);
+        return;
+    }
+
+    /* Process elapsed ticks since last call. */
+    while ( delta >= NS_PER_TICK )
+    {
+        ticks++;
+        delta -= NS_PER_TICK;
+        processed_system_time += NS_PER_TICK;
+    }
+
+    if (ticks > 0) {
+	if (frame)
+		timer_func(frame);
+#ifdef SMP
+	if (timer_func == hardclock && frame)
+		forward_hardclock();
+#endif
+    }
+}
+
+#include "opt_ddb.h"
+static uint32_t
+getit(void)
+{
+	__get_time_values_from_xen();
+	return shadow_tsc_stamp;
+}
+
+/*
+ * Wait "n" microseconds.
+ * Relies on timer 1 counting down from (timer_freq / hz)
+ * Note: timer had better have been programmed before this is first used!
+ */
+void
+DELAY(int n)
+{
+	int delta, ticks_left;
+	uint32_t tick, prev_tick;
+#ifdef DELAYDEBUG
+	int getit_calls = 1;
+	int n1;
+	static int state = 0;
+
+	if (state == 0) {
+		state = 1;
+		for (n1 = 1; n1 <= 10000000; n1 *= 10)
+			DELAY(n1);
+		state = 2;
+	}
+	if (state == 1)
+		printf("DELAY(%d)...", n);
+#endif
+	/*
+	 * Read the counter first, so that the rest of the setup overhead is
+	 * counted.  Guess the initial overhead is 20 usec (on most systems it
+	 * takes about 1.5 usec for each of the i/o's in getit().  The loop
+	 * takes about 6 usec on a 486/33 and 13 usec on a 386/20.  The
+	 * multiplications and divisions to scale the count take a while).
+	 *
+	 * However, if ddb is active then use a fake counter since reading
+	 * the i8254 counter involves acquiring a lock.  ddb must not go
+	 * locking for many reasons, but it calls here for at least atkbd
+	 * input.
+	 */
+	prev_tick = getit();
+
+	n -= 0;			/* XXX actually guess no initial overhead */
+	/*
+	 * Calculate (n * (timer_freq / 1e6)) without using floating point
+	 * and without any avoidable overflows.
+	 */
+	if (n <= 0)
+		ticks_left = 0;
+	else if (n < 256)
+		/*
+		 * Use fixed point to avoid a slow division by 1000000.
+		 * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest.
+		 * 2^15 is the first power of 2 that gives exact results
+		 * for n between 0 and 256.
+		 */
+		ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15;
+	else
+		/*
+		 * Don't bother using fixed point, although gcc-2.7.2
+		 * generates particularly poor code for the long long
+		 * division, since even the slow way will complete long
+		 * before the delay is up (unless we're interrupted).
+		 */
+		ticks_left = ((u_int)n * (long long)timer_freq + 999999)
+			     / 1000000;
+
+	while (ticks_left > 0) {
+		tick = getit();
+#ifdef DELAYDEBUG
+		++getit_calls;
+#endif
+		delta = tick - prev_tick;
+		prev_tick = tick;
+		if (delta < 0) {
+			/*
+			 * Guard against timer0_max_count being wrong.
+			 * This shouldn't happen in normal operation,
+			 * but it may happen if set_timer_freq() is
+			 * traced.
+			 */
+			/* delta += timer0_max_count; ??? */
+			if (delta < 0)
+				delta = 0;
+		}
+		ticks_left -= delta;
+	}
+#ifdef DELAYDEBUG
+	if (state == 1)
+		printf(" %d calls to getit() at %d usec each\n",
+		       getit_calls, (n + 5) / getit_calls);
+#endif
+}
+
+
+int
+sysbeep(int pitch, int period)
+{
+	return (0);
+}
+
+/*
+ * Restore all the timers non-atomically (XXX: should be atomically).
+ *
+ * This function is called from pmtimer_resume() to restore all the timers.
+ * This should not be necessary, but there are broken laptops that do not
+ * restore all the timers on resume.
+ */
+void
+timer_restore(void)
+{
+    /* Get timebases for new environment. */ 
+    __get_time_values_from_xen();
+
+    /* Reset our own concept of passage of system time. */
+    processed_system_time = shadow_system_time;
+}
+
+void
+startrtclock()
+{
+	unsigned long long alarm;
+	uint64_t __cpu_khz;
+	uint32_t cpu_khz;
+
+	__cpu_khz = HYPERVISOR_shared_info->cpu_freq;
+	__cpu_khz /= 1000;
+	cpu_khz = (uint32_t)__cpu_khz;
+	printk("Xen reported: %lu.%03lu MHz processor.\n", 
+	       cpu_khz / 1000, cpu_khz % 1000);
+
+	/* (10^6 * 2^32) / cpu_hz = (10^3 * 2^32) / cpu_khz =
+	   (2^32 * 1 / (clocks/us)) */
+	{	
+		unsigned long eax=0, edx=1000;
+		__asm__("divl %2"
+		    :"=a" (fast_gettimeoffset_quotient), "=d" (edx)
+		    :"r" (cpu_khz),
+		    "0" (eax), "1" (edx));
+	}
+
+	set_cyc2ns_scale(cpu_khz/1000);
+	timer_freq = tsc_freq = xen_timecounter.tc_frequency = cpu_khz * 1000;
+        tc_init(&xen_timecounter);
+
+
+	rdtscll(alarm);
+}
+
+/*
+ * Initialize the time of day register, based on the time base which is, e.g.
+ * from a filesystem.
+ */
+void
+inittodr(time_t base)
+{
+	int		s, y;
+	struct timespec ts;
+
+	s = splclock();
+	if (base) {
+		ts.tv_sec = base;
+		ts.tv_nsec = 0;
+		tc_setclock(&ts);
+	}
+
+	y = time_second - shadow_tv.tv_sec;
+	if (y <= -2 || y >= 2) {
+		/* badly off, adjust it */
+		ts.tv_sec = shadow_tv.tv_sec;
+		ts.tv_nsec = shadow_tv.tv_usec * 1000;
+		tc_setclock(&ts);
+	}
+	splx(s);
+}
+
+/*
+ * Write system time back to RTC.  Not supported for guest domains.
+ */
+void
+resettodr()
+{
+}
+
+
+/*
+ * Start clocks running.
+ */
+void
+cpu_initclocks()
+{
+	int diag;
+	int time_irq = bind_virq_to_irq(VIRQ_TIMER);
+
+        if ((diag = intr_add_handler("clk", time_irq,
+				     (driver_intr_t *)clkintr, NULL,
+				     INTR_TYPE_CLK | INTR_FAST, NULL))) {
+		panic("failed to register clock interrupt: %d\n", diag);
+	}
+
+	/* should fast clock be enabled ? */
+
+	/* initialize xen values */
+	__get_time_values_from_xen();
+	processed_system_time = shadow_system_time;
+}
+
+void
+cpu_startprofclock(void)
+{
+
+    	printf("cpu_startprofclock: profiling clock is not supported\n");
+}
+
+void
+cpu_stopprofclock(void)
+{
+
+    	printf("cpu_stopprofclock: profiling clock is not supported\n");
+}
+
+static uint32_t
+xen_get_timecount(struct timecounter *tc)
+{
+    	__get_time_values_from_xen();
+	return shadow_tsc_stamp;
+}
+
+/*
+ * Track behavior of cur_timer->get_offset() functionality in timer_tsc.c
+ */
+#undef rdtsc
+#define rdtsc(low,high) \
+     __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
+
+static uint32_t
+xen_get_offset(void)
+{
+	register unsigned long eax, edx;
+
+	/* Read the Time Stamp Counter */
+
+	rdtsc(eax,edx);
+
+	/* .. relative to previous jiffy (32 bits is enough) */
+	eax -= shadow_tsc_stamp;
+
+	/*
+	 * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
+	 *             = (tsc_low delta) * (usecs_per_clock)
+	 *             = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
+	 *
+	 * Using a mull instead of a divl saves up to 31 clock cycles
+	 * in the critical path.
+	 */
+
+	__asm__("mull %2"
+		:"=a" (eax), "=d" (edx)
+		:"rm" (fast_gettimeoffset_quotient),
+		 "0" (eax));
+
+	/* our adjusted time offset in microseconds */
+	return edx;
+}
+
+void
+idle_block(void)
+{
+	if (HYPERVISOR_set_timer_op(processed_system_time + NS_PER_TICK) == 0)
+		HYPERVISOR_block();
+}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/critical.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/critical.c
new file mode 100644
index 0000000000..ce388fa048
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/critical.c
@@ -0,0 +1,46 @@
+/*-
+ * Copyright (c) 2002 Matthew Dillon.  All Rights Reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/critical.c,v 1.12 2003/11/03 21:06:54 jhb Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <machine/critical.h>
+#include <machine/psl.h>
+
+/*
+ * cpu_critical_fork_exit() - cleanup after fork
+ *
+ *	Enable interrupts in the saved copy of eflags.
+ */
+void
+cpu_critical_fork_exit(void)
+{
+    curthread->td_md.md_savecrit = 0;
+}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/ctrl_if.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/ctrl_if.c
new file mode 100644
index 0000000000..8e8ce9fde7
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/ctrl_if.c
@@ -0,0 +1,476 @@
+/******************************************************************************
+ * ctrl_if.c
+ * 
+ * Management functions for special interface to the domain controller.
+ * 
+ * Copyright (c) 2004, K A Fraser
+ * Copyright (c) 2004, K M Macy
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/uio.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/selinfo.h>
+#include <sys/poll.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
+#include <sys/ioccom.h>
+#include <sys/taskqueue.h>
+
+
+#include <machine/cpufunc.h>
+#include <machine/intr_machdep.h>
+#include <machine/xen-os.h>
+#include <machine/xen_intr.h>
+#include <machine/bus.h>
+#include <sys/rman.h>
+#include <machine/resource.h>
+#include <machine/synch_bitops.h>
+
+
+#include <machine/hypervisor-ifs.h>
+
+#include <machine/ctrl_if.h>
+#include <machine/evtchn.h>
+
+/*
+ * Only used by initial domain which must create its own control-interface
+ * event channel. This value is picked up by the user-space domain controller
+ * via an ioctl.
+ */
+int initdom_ctrlif_domcontroller_port = -1;
+
+static int        ctrl_if_evtchn;
+static int        ctrl_if_irq;
+static struct mtx ctrl_if_lock;
+static int *      ctrl_if_wchan = &ctrl_if_evtchn;
+
+
+static CONTROL_RING_IDX ctrl_if_tx_resp_cons;
+static CONTROL_RING_IDX ctrl_if_rx_req_cons;
+
+/* Incoming message requests. */
+    /* Primary message type -> message handler. */
+static ctrl_msg_handler_t ctrl_if_rxmsg_handler[256];
+    /* Primary message type -> callback in process context? */
+static unsigned long ctrl_if_rxmsg_blocking_context[256/sizeof(unsigned long)];
+    /* Queue up messages to be handled in process context. */
+static ctrl_msg_t ctrl_if_rxmsg_deferred[CONTROL_RING_SIZE];
+static CONTROL_RING_IDX ctrl_if_rxmsg_deferred_prod;
+static CONTROL_RING_IDX ctrl_if_rxmsg_deferred_cons;
+
+/* Incoming message responses: message identifier -> message handler/id. */
+static struct {
+    ctrl_msg_handler_t fn;
+    unsigned long      id;
+} ctrl_if_txmsg_id_mapping[CONTROL_RING_SIZE];
+
+/*
+ * FreeBSD task queues don't allow you to requeue an already executing task.
+ * Since ctrl_if_interrupt clears the TX_FULL condition and schedules any 
+ * waiting tasks, which themselves may need to schedule a new task 
+ * (due to new a TX_FULL condition), we ping-pong between these A/B task queues.
+ * The interrupt runs anything on the current queue and moves the index so that
+ * future schedulings occur on the next queue.  We should never get into a 
+ * situation where there is a task scheduleded on both the A & B queues.
+ */
+TASKQUEUE_DECLARE(ctrl_if_txA);
+TASKQUEUE_DEFINE(ctrl_if_txA, NULL, NULL, {});
+TASKQUEUE_DECLARE(ctrl_if_txB);
+TASKQUEUE_DEFINE(ctrl_if_txB, NULL, NULL, {});
+struct taskqueue **taskqueue_ctrl_if_tx[2] = { &taskqueue_ctrl_if_txA,
+    				               &taskqueue_ctrl_if_txB };
+int ctrl_if_idx;
+
+static struct task ctrl_if_rx_tasklet;
+static struct task ctrl_if_tx_tasklet;
+    /* Passed to schedule_task(). */
+static struct task ctrl_if_rxmsg_deferred_task;
+
+
+
+#define get_ctrl_if() ((control_if_t *)((char *)HYPERVISOR_shared_info + 2048))
+#define TX_FULL(_c)   \
+    (((_c)->tx_req_prod - ctrl_if_tx_resp_cons) == CONTROL_RING_SIZE)
+
+static void 
+ctrl_if_notify_controller(void)
+{
+    notify_via_evtchn(ctrl_if_evtchn);
+}
+
+static void 
+ctrl_if_rxmsg_default_handler(ctrl_msg_t *msg, unsigned long id)
+{
+    msg->length = 0;
+    ctrl_if_send_response(msg);
+}
+
+static void 
+__ctrl_if_tx_tasklet(void *context __unused, int pending __unused)
+{
+    control_if_t *ctrl_if = get_ctrl_if();
+    ctrl_msg_t   *msg;
+    int           was_full = TX_FULL(ctrl_if);
+
+    while ( ctrl_if_tx_resp_cons != ctrl_if->tx_resp_prod )
+    {
+        msg = &ctrl_if->tx_ring[MASK_CONTROL_IDX(ctrl_if_tx_resp_cons)];
+
+        /* Execute the callback handler, if one was specified. */
+        if ( msg->id != 0xFF )
+        {
+            (*ctrl_if_txmsg_id_mapping[msg->id].fn)(
+                msg, ctrl_if_txmsg_id_mapping[msg->id].id);
+            smp_mb(); /* Execute, /then/ free. */
+            ctrl_if_txmsg_id_mapping[msg->id].fn = NULL;
+        }
+
+        /*
+         * Step over the message in the ring /after/ finishing reading it. As 
+         * soon as the index is updated then the message may get blown away.
+         */
+        smp_mb();
+        ctrl_if_tx_resp_cons++;
+    }
+
+    if ( was_full && !TX_FULL(ctrl_if) )
+    {
+        wakeup(ctrl_if_wchan);
+
+	/* bump idx so future enqueues will occur on the next taskq
+	 * process any currently pending tasks
+	 */
+	ctrl_if_idx++;		
+        taskqueue_run(*taskqueue_ctrl_if_tx[(ctrl_if_idx-1) & 1]);
+    }
+}
+
+static void 
+__ctrl_if_rxmsg_deferred_task(void *context __unused, int pending __unused)
+{
+    ctrl_msg_t *msg;
+
+    while ( ctrl_if_rxmsg_deferred_cons != ctrl_if_rxmsg_deferred_prod )
+    {
+        msg = &ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX(
+            ctrl_if_rxmsg_deferred_cons++)];
+        (*ctrl_if_rxmsg_handler[msg->type])(msg, 0);
+    }
+}
+
+static void 
+__ctrl_if_rx_tasklet(void *context __unused, int pending __unused)
+{
+    control_if_t *ctrl_if = get_ctrl_if();
+    ctrl_msg_t    msg, *pmsg;
+
+    while ( ctrl_if_rx_req_cons != ctrl_if->rx_req_prod )
+    {
+        pmsg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(ctrl_if_rx_req_cons++)];
+        memcpy(&msg, pmsg, offsetof(ctrl_msg_t, msg));
+        if ( msg.length != 0 )
+            memcpy(msg.msg, pmsg->msg, msg.length);
+        if ( test_bit(msg.type, &ctrl_if_rxmsg_blocking_context) )
+        {
+            pmsg = &ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX(
+                ctrl_if_rxmsg_deferred_prod++)];
+            memcpy(pmsg, &msg, offsetof(ctrl_msg_t, msg) + msg.length);
+            taskqueue_enqueue(taskqueue_thread, &ctrl_if_rxmsg_deferred_task);
+        }
+        else
+        {
+            (*ctrl_if_rxmsg_handler[msg.type])(&msg, 0);
+        }
+    }
+}
+
+static void 
+ctrl_if_interrupt(void *ctrl_sc)
+/* (int irq, void *dev_id, struct pt_regs *regs) */
+{
+    control_if_t *ctrl_if = get_ctrl_if();
+
+    if ( ctrl_if_tx_resp_cons != ctrl_if->tx_resp_prod )
+	taskqueue_enqueue(taskqueue_swi, &ctrl_if_tx_tasklet);
+    
+
+    if ( ctrl_if_rx_req_cons != ctrl_if->rx_req_prod )
+ 	taskqueue_enqueue(taskqueue_swi, &ctrl_if_rx_tasklet);
+}
+
+int 
+ctrl_if_send_message_noblock(
+    ctrl_msg_t *msg, 
+    ctrl_msg_handler_t hnd,
+    unsigned long id)
+{
+    control_if_t *ctrl_if = get_ctrl_if();
+    unsigned long flags;
+    int           i;
+
+    mtx_lock_irqsave(&ctrl_if_lock, flags);
+
+    if ( TX_FULL(ctrl_if) )
+    {
+        mtx_unlock_irqrestore(&ctrl_if_lock, flags);
+        return EAGAIN;
+    }
+
+    msg->id = 0xFF;
+    if ( hnd != NULL )
+    {
+        for ( i = 0; ctrl_if_txmsg_id_mapping[i].fn != NULL; i++ )
+            continue;
+        ctrl_if_txmsg_id_mapping[i].fn = hnd;
+        ctrl_if_txmsg_id_mapping[i].id = id;
+        msg->id = i;
+    }
+
+    memcpy(&ctrl_if->tx_ring[MASK_CONTROL_IDX(ctrl_if->tx_req_prod)], 
+           msg, sizeof(*msg));
+    wmb(); /* Write the message before letting the controller peek at it. */
+    ctrl_if->tx_req_prod++;
+
+    mtx_unlock_irqrestore(&ctrl_if_lock, flags);
+
+    ctrl_if_notify_controller();
+
+    return 0;
+}
+
+int 
+ctrl_if_send_message_block(
+    ctrl_msg_t *msg, 
+    ctrl_msg_handler_t hnd, 
+    unsigned long id,
+    long wait_state)
+{
+    int rc, sst = 0;
+
+    /* Fast path. */
+    if ( (rc = ctrl_if_send_message_noblock(msg, hnd, id)) != EAGAIN )
+        return rc;
+
+
+    for ( ; ; )
+    {
+
+        if ( (rc = ctrl_if_send_message_noblock(msg, hnd, id)) != EAGAIN )
+            break;
+
+        if ( sst != 0) 
+	    return EINTR;
+
+        sst = tsleep(ctrl_if_wchan, PWAIT|PCATCH, "ctlrwt", 10);
+    }
+
+    return rc;
+}
+
+int 
+ctrl_if_enqueue_space_callback(struct task *task)
+{
+    control_if_t *ctrl_if = get_ctrl_if();
+
+    /* Fast path. */
+    if ( !TX_FULL(ctrl_if) )
+        return 0;
+
+    (void)taskqueue_enqueue(*taskqueue_ctrl_if_tx[(ctrl_if_idx & 1)], task);
+
+    /*
+     * We may race execution of the task queue, so return re-checked status. If
+     * the task is not executed despite the ring being non-full then we will
+     * certainly return 'not full'.
+     */
+    smp_mb();
+    return TX_FULL(ctrl_if);
+}
+
+void 
+ctrl_if_send_response(ctrl_msg_t *msg)
+{
+    control_if_t *ctrl_if = get_ctrl_if();
+    unsigned long flags;
+    ctrl_msg_t   *dmsg;
+
+    /*
+     * NB. The response may the original request message, modified in-place.
+     * In this situation we may have src==dst, so no copying is required.
+     */
+    mtx_lock_irqsave(&ctrl_if_lock, flags);
+    dmsg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(ctrl_if->rx_resp_prod)];
+    if ( dmsg != msg )
+        memcpy(dmsg, msg, sizeof(*msg));
+    wmb(); /* Write the message before letting the controller peek at it. */
+    ctrl_if->rx_resp_prod++;
+    mtx_unlock_irqrestore(&ctrl_if_lock, flags);
+
+    ctrl_if_notify_controller();
+}
+
+int 
+ctrl_if_register_receiver(
+    uint8_t type, 
+    ctrl_msg_handler_t hnd, 
+    unsigned int flags)
+{
+    unsigned long _flags;
+    int inuse;
+
+    mtx_lock_irqsave(&ctrl_if_lock, _flags);
+
+    inuse = (ctrl_if_rxmsg_handler[type] != ctrl_if_rxmsg_default_handler);
+
+    if ( inuse )
+    {
+        printk("Receiver %p already established for control "
+               "messages of type %d.\n", ctrl_if_rxmsg_handler[type], type);
+    }
+    else
+    {
+        ctrl_if_rxmsg_handler[type] = hnd;
+        clear_bit(type, &ctrl_if_rxmsg_blocking_context);
+        if ( flags == CALLBACK_IN_BLOCKING_CONTEXT )
+        {
+            set_bit(type, &ctrl_if_rxmsg_blocking_context);
+        }
+    }
+
+    mtx_unlock_irqrestore(&ctrl_if_lock, _flags);
+
+    return !inuse;
+}
+
+void 
+ctrl_if_unregister_receiver(uint8_t type, ctrl_msg_handler_t hnd)
+{
+    unsigned long flags;
+
+    mtx_lock_irqsave(&ctrl_if_lock, flags);
+
+    if ( ctrl_if_rxmsg_handler[type] != hnd )
+        printk("Receiver %p is not registered for control "
+               "messages of type %d.\n", hnd, type);
+    else
+        ctrl_if_rxmsg_handler[type] = ctrl_if_rxmsg_default_handler;
+
+    mtx_unlock_irqrestore(&ctrl_if_lock, flags);
+
+    /* Ensure that @hnd will not be executed after this function returns. */
+    /* XXX need rx_tasklet_lock -- can cheat for now?*/
+#ifdef notyet
+    tasklet_unlock_wait(&ctrl_if_rx_tasklet);
+#endif
+}
+
+void 
+ctrl_if_suspend(void)
+{
+    /* I'm not sure what the equivalent is - we aren't going to support suspend 
+     * yet anyway 
+     */
+#ifdef notyet
+    free_irq(ctrl_if_irq, NULL);
+#endif
+    unbind_evtchn_from_irq(ctrl_if_evtchn);
+}
+ 
+/** Reset the control interface progress pointers.
+ * Marks the queues empty if 'clear' non-zero.
+ */
+static void 
+ctrl_if_reset(int clear)
+{
+    control_if_t *ctrl_if = get_ctrl_if();
+
+    if (clear) {
+	*ctrl_if = (control_if_t){};
+    }
+    
+    ctrl_if_tx_resp_cons = ctrl_if->tx_resp_prod;
+    ctrl_if_rx_req_cons  = ctrl_if->rx_resp_prod;
+}
+
+
+void 
+ctrl_if_resume(void)
+{
+    if ( xen_start_info->flags & SIF_INITDOMAIN )
+    {
+        /*
+         * The initial domain must create its own domain-controller link.
+         * The controller is probably not running at this point, but will
+         * pick up its end of the event channel from 
+         */
+        evtchn_op_t op;
+        op.cmd = EVTCHNOP_bind_interdomain;
+        op.u.bind_interdomain.dom1 = DOMID_SELF;
+        op.u.bind_interdomain.dom2 = DOMID_SELF;
+        op.u.bind_interdomain.port1 = 0;
+        op.u.bind_interdomain.port2 = 0;
+        if ( HYPERVISOR_event_channel_op(&op) != 0 )
+            panic("event_channel_op failed\n");
+        xen_start_info->domain_controller_evtchn = op.u.bind_interdomain.port1;
+        initdom_ctrlif_domcontroller_port   = op.u.bind_interdomain.port2;
+    }
+    
+    ctrl_if_reset(0);
+
+    ctrl_if_evtchn = xen_start_info->domain_controller_evtchn;
+    ctrl_if_irq    = bind_evtchn_to_irq(ctrl_if_evtchn);
+    
+    /*
+     * I have not taken the time to determine what the interrupt thread priorities
+     * correspond to - this interface is used for network and disk, network would
+     * seem higher priority, hence I'm using it
+     */
+
+    intr_add_handler("ctrl-if", ctrl_if_irq, (driver_intr_t*)ctrl_if_interrupt,
+		     NULL, INTR_TYPE_NET | INTR_MPSAFE, NULL);
+}
+
+static void 
+ctrl_if_init(void *dummy __unused)
+{
+    int i;
+
+    for ( i = 0; i < 256; i++ )
+        ctrl_if_rxmsg_handler[i] = ctrl_if_rxmsg_default_handler;
+    
+    mtx_init(&ctrl_if_lock, "ctrlif", NULL, MTX_SPIN | MTX_NOWITNESS);
+    
+    TASK_INIT(&ctrl_if_tx_tasklet, 0, __ctrl_if_tx_tasklet, NULL);
+
+    TASK_INIT(&ctrl_if_rx_tasklet, 0, __ctrl_if_rx_tasklet, NULL);
+
+    TASK_INIT(&ctrl_if_rxmsg_deferred_task, 0, __ctrl_if_rxmsg_deferred_task, NULL);
+
+    ctrl_if_reset(1);
+    ctrl_if_resume();
+}
+
+/*
+ * !! The following are DANGEROUS FUNCTIONS !!
+ * Use with care [for example, see xencons_force_flush()].
+ */
+
+int 
+ctrl_if_transmitter_empty(void)
+{
+    return (get_ctrl_if()->tx_req_prod == ctrl_if_tx_resp_cons);
+}
+
+void 
+ctrl_if_discard_responses(void)
+{
+    ctrl_if_tx_resp_cons = get_ctrl_if()->tx_resp_prod;
+}
+
+SYSINIT(ctrl_if_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, ctrl_if_init, NULL);
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/db_interface.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/db_interface.c
new file mode 100644
index 0000000000..57aa4e2ef4
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/db_interface.c
@@ -0,0 +1,209 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/db_interface.c,v 1.77 2003/11/08 03:01:26 alc Exp $");
+
+/*
+ * Interface to new debugger.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/reboot.h>
+#include <sys/cons.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/smp.h>
+
+#include <machine/cpu.h>
+#ifdef SMP
+#include <machine/smptests.h>	/** CPUSTOP_ON_DDBBREAK */
+#endif
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <ddb/ddb.h>
+
+#include <machine/setjmp.h>
+#include <machine/xenfunc.h>
+
+
+static jmp_buf *db_nofault = 0;
+extern jmp_buf	db_jmpbuf;
+
+extern void	gdb_handle_exception(db_regs_t *, int, int);
+
+int	db_active;
+db_regs_t ddb_regs;
+
+static __inline u_short
+rss(void)
+{
+	u_short ss;
+#ifdef __GNUC__
+	__asm __volatile("mov %%ss,%0" : "=r" (ss));
+#else
+	ss = 0; /* XXXX Fix for other compilers. */
+#endif
+	return ss;
+}
+
+/*
+ *  kdb_trap - field a TRACE or BPT trap
+ */
+int
+kdb_trap(int type, int code, struct i386_saved_state *regs)
+{
+	volatile int ddb_mode = !(boothowto & RB_GDB);
+
+	disable_intr();
+
+	if (ddb_mode) {
+	    	/* we can't do much as a guest domain except print a 
+		 * backtrace and die gracefuly.  The reason is that we
+		 * can't get character input to make this work.
+		 */
+	    	db_active = 1;
+		db_print_backtrace(); 
+		db_printf("************ Domain shutting down ************\n");
+		HYPERVISOR_shutdown();
+	} else {
+	    	Debugger("kdb_trap");
+	}
+	return (1);
+}
+
+/*
+ * Read bytes from kernel address space for debugger.
+ */
+void
+db_read_bytes(vm_offset_t addr, size_t size, char *data)
+{
+	char	*src;
+
+	db_nofault = &db_jmpbuf;
+
+	src = (char *)addr;
+	while (size-- > 0)
+	    *data++ = *src++;
+
+	db_nofault = 0;
+}
+
+/*
+ * Write bytes to kernel address space for debugger.
+ */
+void
+db_write_bytes(vm_offset_t addr, size_t size, char *data)
+{
+	char	*dst;
+
+	pt_entry_t	*ptep0 = NULL;
+	pt_entry_t	oldmap0 = 0;
+	vm_offset_t	addr1;
+	pt_entry_t	*ptep1 = NULL;
+	pt_entry_t	oldmap1 = 0;
+
+	db_nofault = &db_jmpbuf;
+
+	if (addr > trunc_page((vm_offset_t)btext) - size &&
+	    addr < round_page((vm_offset_t)etext)) {
+
+	    ptep0 = pmap_pte(kernel_pmap, addr);
+	    oldmap0 = *ptep0;
+	    *ptep0 |= PG_RW;
+
+	    /* Map another page if the data crosses a page boundary. */
+	    if ((*ptep0 & PG_PS) == 0) {
+	    	addr1 = trunc_page(addr + size - 1);
+	    	if (trunc_page(addr) != addr1) {
+		    ptep1 = pmap_pte(kernel_pmap, addr1);
+		    oldmap1 = *ptep1;
+		    *ptep1 |= PG_RW;
+	    	}
+	    } else {
+		addr1 = trunc_4mpage(addr + size - 1);
+		if (trunc_4mpage(addr) != addr1) {
+		    ptep1 = pmap_pte(kernel_pmap, addr1);
+		    oldmap1 = *ptep1;
+		    *ptep1 |= PG_RW;
+		}
+	    }
+
+	    invltlb();
+	}
+
+	dst = (char *)addr;
+
+	while (size-- > 0)
+	    *dst++ = *data++;
+
+	db_nofault = 0;
+
+	if (ptep0) {
+	    *ptep0 = oldmap0;
+
+	    if (ptep1)
+		*ptep1 = oldmap1;
+
+	    invltlb();
+	}
+}
+
+/*
+ * XXX
+ * Move this to machdep.c and allow it to be called if any debugger is
+ * installed.
+ */
+void
+Debugger(const char *msg)
+{
+	static volatile u_int in_Debugger;
+
+	/*
+	 * XXX
+	 * Do nothing if the console is in graphics mode.  This is
+	 * OK if the call is for the debugger hotkey but not if the call
+	 * is a weak form of panicing.
+	 */
+	if (cons_unavail && !(boothowto & RB_GDB))
+	    return;
+
+	if (atomic_cmpset_acq_int(&in_Debugger, 0, 1)) {
+	    db_printf("Debugger(\"%s\")\n", msg);
+	    breakpoint();
+	    atomic_store_rel_int(&in_Debugger, 0);
+	}
+}
+
+void
+db_show_mdpcpu(struct pcpu *pc)
+{
+
+	db_printf("APIC ID      = %d\n", pc->pc_apic_id);
+	db_printf("currentldt   = 0x%x\n", pc->pc_currentldt);
+}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c
new file mode 100644
index 0000000000..635a3bfe4e
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c
@@ -0,0 +1,580 @@
+/******************************************************************************
+ * evtchn.c
+ * 
+ * Communication via Xen event channels.
+ * 
+ * Copyright (c) 2002-2004, K A Fraser
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include <machine/cpufunc.h>
+#include <machine/intr_machdep.h>
+#include <machine/xen-os.h>
+#include <machine/xen_intr.h>
+#include <machine/synch_bitops.h>
+#include <machine/evtchn.h>
+#include <machine/hypervisor.h>
+#include <machine/hypervisor-ifs.h>
+
+
+static struct mtx irq_mapping_update_lock;
+
+#define TODO            printf("%s: not implemented!\n", __func__) 
+
+/* IRQ <-> event-channel mappings. */
+static int evtchn_to_irq[NR_EVENT_CHANNELS];
+static int irq_to_evtchn[NR_IRQS];
+
+/* IRQ <-> VIRQ mapping. */
+static int virq_to_irq[NR_VIRQS];
+
+/* Reference counts for bindings to IRQs. */
+static int irq_bindcount[NR_IRQS];
+
+#define VALID_EVTCHN(_chn) ((_chn) != -1)
+
+/*
+ * Force a proper event-channel callback from Xen after clearing the
+ * callback mask. We do this in a very simple manner, by making a call
+ * down into Xen. The pending flag will be checked by Xen on return.
+ */
+void force_evtchn_callback(void)
+{
+    (void)HYPERVISOR_xen_version(0);
+}
+
+void 
+evtchn_do_upcall(struct intrframe *frame) 
+{
+    unsigned long  l1, l2;
+    unsigned int   l1i, l2i, port;
+    int            irq, owned;
+    unsigned long  flags;
+    shared_info_t *s = HYPERVISOR_shared_info;
+
+    local_irq_save(flags);
+
+    while ( s->vcpu_data[0].evtchn_upcall_pending )
+    {
+        s->vcpu_data[0].evtchn_upcall_pending = 0;
+        /* NB. No need for a barrier here -- XCHG is a barrier on x86. */
+        l1 = xen_xchg(&s->evtchn_pending_sel, 0);
+        while ( (l1i = ffs(l1)) != 0 )
+        {
+            l1i--;
+            l1 &= ~(1 << l1i);
+        
+            l2 = s->evtchn_pending[l1i] & ~s->evtchn_mask[l1i];
+            while ( (l2i = ffs(l2)) != 0 )
+            {
+                l2i--;
+                l2 &= ~(1 << l2i);
+            
+                port = (l1i << 5) + l2i;
+		if ((owned = mtx_owned(&sched_lock)) != 0)
+		    mtx_unlock_spin_flags(&sched_lock, MTX_QUIET);
+                if ( (irq = evtchn_to_irq[port]) != -1 ) {
+		    struct intsrc *isrc = intr_lookup_source(irq);
+		    intr_execute_handlers(isrc, frame);
+
+		} else {
+                    evtchn_device_upcall(port);
+		}
+		if (owned)
+		    mtx_lock_spin_flags(&sched_lock, MTX_QUIET);
+            }
+        }
+    }
+
+    local_irq_restore(flags);
+
+}
+
+
+static int 
+find_unbound_irq(void)
+{
+    int irq;
+
+    for ( irq = 0; irq < NR_IRQS; irq++ )
+        if ( irq_bindcount[irq] == 0 )
+            break;
+
+    if ( irq == NR_IRQS )
+        panic("No available IRQ to bind to: increase NR_IRQS!\n");
+
+    return irq;
+}
+
+int 
+bind_virq_to_irq(int virq)
+{
+    evtchn_op_t op;
+    int evtchn, irq;
+
+    mtx_lock(&irq_mapping_update_lock);
+
+    if ( (irq = virq_to_irq[virq]) == -1 )
+    {
+        op.cmd              = EVTCHNOP_bind_virq;
+        op.u.bind_virq.virq = virq;
+        if ( HYPERVISOR_event_channel_op(&op) != 0 )
+            panic("Failed to bind virtual IRQ %d\n", virq);
+        evtchn = op.u.bind_virq.port;
+
+        irq = find_unbound_irq();
+        evtchn_to_irq[evtchn] = irq;
+        irq_to_evtchn[irq]    = evtchn;
+
+        virq_to_irq[virq] = irq;
+    }
+
+    irq_bindcount[irq]++;
+
+    mtx_unlock(&irq_mapping_update_lock);
+    
+    return irq;
+}
+
+void 
+unbind_virq_from_irq(int virq)
+{
+    evtchn_op_t op;
+    int irq    = virq_to_irq[virq];
+    int evtchn = irq_to_evtchn[irq];
+
+    mtx_lock(&irq_mapping_update_lock);
+
+    if ( --irq_bindcount[irq] == 0 )
+    {
+        op.cmd          = EVTCHNOP_close;
+        op.u.close.dom  = DOMID_SELF;
+        op.u.close.port = evtchn;
+        if ( HYPERVISOR_event_channel_op(&op) != 0 )
+            panic("Failed to unbind virtual IRQ %d\n", virq);
+
+        evtchn_to_irq[evtchn] = -1;
+        irq_to_evtchn[irq]    = -1;
+        virq_to_irq[virq]     = -1;
+    }
+
+    mtx_unlock(&irq_mapping_update_lock);
+}
+
+int 
+bind_evtchn_to_irq(int evtchn)
+{
+    int irq;
+
+    mtx_lock(&irq_mapping_update_lock);
+
+    if ( (irq = evtchn_to_irq[evtchn]) == -1 )
+    {
+        irq = find_unbound_irq();
+        evtchn_to_irq[evtchn] = irq;
+        irq_to_evtchn[irq]    = evtchn;
+    }
+
+    irq_bindcount[irq]++;
+
+    mtx_unlock(&irq_mapping_update_lock);
+    
+    return irq;
+}
+
+void 
+unbind_evtchn_from_irq(int evtchn)
+{
+    int irq = evtchn_to_irq[evtchn];
+
+    mtx_lock(&irq_mapping_update_lock);
+
+    if ( --irq_bindcount[irq] == 0 )
+    {
+        evtchn_to_irq[evtchn] = -1;
+        irq_to_evtchn[irq]    = -1;
+    }
+
+    mtx_unlock(&irq_mapping_update_lock);
+}
+
+
+/*
+ * Interface to generic handling in intr_machdep.c
+ */
+
+
+/*------------ interrupt handling --------------------------------------*/
+#define TODO            printf("%s: not implemented!\n", __func__) 
+
+ struct mtx xenpic_lock;
+
+struct xenpic_intsrc {
+    struct intsrc xp_intsrc;
+    uint8_t       xp_vector;
+    boolean_t	  xp_masked;
+};
+
+struct xenpic { 
+    struct pic xp_pic; /* this MUST be first */
+    uint16_t xp_numintr; 
+    struct xenpic_intsrc xp_pins[0]; 
+}; 
+
+static void     xenpic_enable_dynirq_source(struct intsrc *isrc); 
+static void     xenpic_disable_dynirq_source(struct intsrc *isrc, int); 
+static void     xenpic_eoi_source(struct intsrc *isrc); 
+static void     xenpic_enable_dynirq_intr(struct intsrc *isrc); 
+static int      xenpic_vector(struct intsrc *isrc); 
+static int      xenpic_source_pending(struct intsrc *isrc); 
+static void     xenpic_suspend(struct intsrc *isrc); 
+static void     xenpic_resume(struct intsrc *isrc); 
+
+
+struct pic xenpic_template  =  { 
+    xenpic_enable_dynirq_source, 
+    xenpic_disable_dynirq_source,
+    xenpic_eoi_source, 
+    xenpic_enable_dynirq_intr, 
+    xenpic_vector, 
+    xenpic_source_pending,
+    xenpic_suspend, 
+    xenpic_resume 
+};
+
+
+void 
+xenpic_enable_dynirq_source(struct intsrc *isrc)
+{
+    unsigned int irq;
+    struct xenpic_intsrc *xp;
+
+    xp = (struct xenpic_intsrc *)isrc;
+
+    if (xp->xp_masked) {
+	irq = xenpic_vector(isrc);
+	unmask_evtchn(irq_to_evtchn[irq]);
+	xp->xp_masked = FALSE;
+    }
+}
+
+static void 
+xenpic_disable_dynirq_source(struct intsrc *isrc, int foo)
+{
+    unsigned int irq;
+    struct xenpic_intsrc *xp;
+
+    xp = (struct xenpic_intsrc *)isrc;
+
+    if (!xp->xp_masked) {
+	irq = xenpic_vector(isrc);
+	mask_evtchn(irq_to_evtchn[irq]);
+	xp->xp_masked = TRUE;
+    }
+
+}
+
+static void 
+xenpic_enable_dynirq_intr(struct intsrc *isrc)
+{
+    unsigned int irq;
+
+    irq = xenpic_vector(isrc);
+    unmask_evtchn(irq_to_evtchn[irq]);
+}
+
+static void 
+xenpic_eoi_source(struct intsrc *isrc)
+{
+    unsigned int irq = xenpic_vector(isrc);
+    clear_evtchn(irq_to_evtchn[irq]);
+}
+
+static int
+xenpic_vector(struct intsrc *isrc)
+{
+    struct xenpic_intsrc *pin = (struct xenpic_intsrc *)isrc;
+    return (pin->xp_vector);
+}
+
+static int
+xenpic_source_pending(struct intsrc *isrc)
+{
+    TODO;
+    return 0;
+}
+
+static void 
+xenpic_suspend(struct intsrc *isrc) 
+{ 
+    TODO; 
+} 
+ 
+static void 
+xenpic_resume(struct intsrc *isrc) 
+{ 
+    TODO; 
+} 
+
+#ifdef CONFIG_PHYSDEV
+/* required for support of physical devices */
+static inline void 
+pirq_unmask_notify(int pirq)
+{
+    physdev_op_t op;
+    if ( unlikely(test_bit(pirq, &pirq_needs_unmask_notify[0])) )
+    {
+        op.cmd = PHYSDEVOP_IRQ_UNMASK_NOTIFY;
+        (void)HYPERVISOR_physdev_op(&op);
+    }
+}
+
+static inline void 
+pirq_query_unmask(int pirq)
+{
+    physdev_op_t op;
+    op.cmd = PHYSDEVOP_IRQ_STATUS_QUERY;
+    op.u.irq_status_query.irq = pirq;
+    (void)HYPERVISOR_physdev_op(&op);
+    clear_bit(pirq, &pirq_needs_unmask_notify[0]);
+    if ( op.u.irq_status_query.flags & PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY )
+        set_bit(pirq, &pirq_needs_unmask_notify[0]);
+}
+
+/*
+ * On startup, if there is no action associated with the IRQ then we are
+ * probing. In this case we should not share with others as it will confuse us.
+ */
+#define probing_irq(_irq) (irq_desc[(_irq)].action == NULL)
+
+static unsigned int startup_pirq(unsigned int irq)
+{
+    evtchn_op_t op;
+    int evtchn;
+
+    op.cmd               = EVTCHNOP_bind_pirq;
+    op.u.bind_pirq.pirq  = irq;
+    /* NB. We are happy to share unless we are probing. */
+    op.u.bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE;
+    if ( HYPERVISOR_event_channel_op(&op) != 0 )
+    {
+        if ( !probing_irq(irq) ) /* Some failures are expected when probing. */
+            printk(KERN_INFO "Failed to obtain physical IRQ %d\n", irq);
+        return 0;
+    }
+    evtchn = op.u.bind_pirq.port;
+
+    pirq_query_unmask(irq_to_pirq(irq));
+
+    evtchn_to_irq[evtchn] = irq;
+    irq_to_evtchn[irq]    = evtchn;
+
+    unmask_evtchn(evtchn);
+    pirq_unmask_notify(irq_to_pirq(irq));
+
+    return 0;
+}
+
+static void shutdown_pirq(unsigned int irq)
+{
+    evtchn_op_t op;
+    int evtchn = irq_to_evtchn[irq];
+
+    if ( !VALID_EVTCHN(evtchn) )
+        return;
+
+    mask_evtchn(evtchn);
+
+    op.cmd          = EVTCHNOP_close;
+    op.u.close.dom  = DOMID_SELF;
+    op.u.close.port = evtchn;
+    if ( HYPERVISOR_event_channel_op(&op) != 0 )
+        panic("Failed to unbind physical IRQ %d\n", irq);
+
+    evtchn_to_irq[evtchn] = -1;
+    irq_to_evtchn[irq]    = -1;
+}
+
+static void enable_pirq(unsigned int irq)
+{
+    int evtchn = irq_to_evtchn[irq];
+    if ( !VALID_EVTCHN(evtchn) )
+        return;
+    unmask_evtchn(evtchn);
+    pirq_unmask_notify(irq_to_pirq(irq));
+}
+
+static void disable_pirq(unsigned int irq)
+{
+    int evtchn = irq_to_evtchn[irq];
+    if ( !VALID_EVTCHN(evtchn) )
+        return;
+    mask_evtchn(evtchn);
+}
+
+static void ack_pirq(unsigned int irq)
+{
+    int evtchn = irq_to_evtchn[irq];
+    if ( !VALID_EVTCHN(evtchn) )
+        return;
+    mask_evtchn(evtchn);
+    clear_evtchn(evtchn);
+}
+
+static void end_pirq(unsigned int irq)
+{
+    int evtchn = irq_to_evtchn[irq];
+    if ( !VALID_EVTCHN(evtchn) )
+        return;
+    if ( !(irq_desc[irq].status & IRQ_DISABLED) )
+    {
+        unmask_evtchn(evtchn);
+        pirq_unmask_notify(irq_to_pirq(irq));
+    }
+}
+
+static struct hw_interrupt_type pirq_type = {
+    "Phys-irq",
+    startup_pirq,
+    shutdown_pirq,
+    enable_pirq,
+    disable_pirq,
+    ack_pirq,
+    end_pirq,
+    NULL
+};
+#endif
+
+
+static void 
+misdirect_interrupt(void *sc)
+{
+}
+
+void irq_suspend(void)
+{
+    int virq, irq, evtchn;
+
+    /* Unbind VIRQs from event channels. */
+    for ( virq = 0; virq < NR_VIRQS; virq++ )
+    {
+        if ( (irq = virq_to_irq[virq]) == -1 )
+            continue;
+        evtchn = irq_to_evtchn[irq];
+
+        /* Mark the event channel as unused in our table. */
+        evtchn_to_irq[evtchn] = -1;
+        irq_to_evtchn[irq]    = -1;
+    }
+
+    /*
+     * We should now be unbound from all event channels. Stale bindings to 
+     * PIRQs and/or inter-domain event channels will cause us to barf here.
+     */
+    for ( evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++ )
+        if ( evtchn_to_irq[evtchn] != -1 )
+            panic("Suspend attempted while bound to evtchn %d.\n", evtchn);
+}
+
+
+void irq_resume(void)
+{
+    evtchn_op_t op;
+    int         virq, irq, evtchn;
+
+    for ( evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++ )
+        mask_evtchn(evtchn); /* New event-channel space is not 'live' yet. */
+
+    for ( virq = 0; virq < NR_VIRQS; virq++ )
+    {
+        if ( (irq = virq_to_irq[virq]) == -1 )
+            continue;
+
+        /* Get a new binding from Xen. */
+        op.cmd              = EVTCHNOP_bind_virq;
+        op.u.bind_virq.virq = virq;
+        if ( HYPERVISOR_event_channel_op(&op) != 0 )
+            panic("Failed to bind virtual IRQ %d\n", virq);
+        evtchn = op.u.bind_virq.port;
+        
+        /* Record the new mapping. */
+        evtchn_to_irq[evtchn] = irq;
+        irq_to_evtchn[irq]    = evtchn;
+
+        /* Ready for use. */
+        unmask_evtchn(evtchn);
+    }
+}
+
+static void 
+evtchn_init(void *dummy __unused)
+{
+    int i;
+    struct xenpic *xp;
+    struct xenpic_intsrc *pin;
+
+    /*
+     * xenpic_lock: in order to allow an interrupt to occur in a critical
+     * 	        section, to set pcpu->ipending (etc...) properly, we
+     *	        must be able to get the icu lock, so it can't be
+     *	        under witness.
+     */
+    mtx_init(&irq_mapping_update_lock, "xp", NULL, MTX_DEF);
+
+    /* No VIRQ -> IRQ mappings. */
+    for ( i = 0; i < NR_VIRQS; i++ )
+        virq_to_irq[i] = -1;
+
+    /* No event-channel -> IRQ mappings. */
+    for ( i = 0; i < NR_EVENT_CHANNELS; i++ )
+    {
+        evtchn_to_irq[i] = -1;
+        mask_evtchn(i); /* No event channels are 'live' right now. */
+    }
+
+    /* No IRQ -> event-channel mappings. */
+    for ( i = 0; i < NR_IRQS; i++ )
+        irq_to_evtchn[i] = -1;
+
+    xp = malloc(sizeof(struct xenpic) + NR_DYNIRQS*sizeof(struct xenpic_intsrc), M_DEVBUF, M_WAITOK);
+    xp->xp_pic = xenpic_template;
+    xp->xp_numintr = NR_DYNIRQS;
+    bzero(xp->xp_pins, sizeof(struct xenpic_intsrc) * NR_DYNIRQS);
+
+    for ( i = 0, pin = xp->xp_pins; i < NR_DYNIRQS; i++, pin++ )
+    {
+        /* Dynamic IRQ space is currently unbound. Zero the refcnts. */
+        irq_bindcount[dynirq_to_irq(i)] = 0;
+
+	pin->xp_intsrc.is_pic = (struct pic *)xp;
+	pin->xp_vector = i;
+	intr_register_source(&pin->xp_intsrc);
+    }
+    /* We don't currently have any support for physical devices in XenoFreeBSD 
+     * so leaving this out for the moment for the sake of expediency.
+     */
+#ifdef notyet
+    for ( i = 0; i < NR_PIRQS; i++ )
+    {
+        /* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */
+        irq_bindcount[pirq_to_irq(i)] = 1;
+
+        irq_desc[pirq_to_irq(i)].status  = IRQ_DISABLED;
+        irq_desc[pirq_to_irq(i)].action  = 0;
+        irq_desc[pirq_to_irq(i)].depth   = 1;
+        irq_desc[pirq_to_irq(i)].handler = &pirq_type;
+    }
+
+#endif
+    (void) intr_add_handler("xb_mis", bind_virq_to_irq(VIRQ_MISDIRECT),
+	    	            (driver_intr_t *)misdirect_interrupt, 
+			    NULL, INTR_TYPE_MISC, NULL);
+}
+
+SYSINIT(evtchn_init, SI_SUB_INTR, SI_ORDER_ANY, evtchn_init, NULL);
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/exception.s b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/exception.s
new file mode 100644
index 0000000000..4adb61a350
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/exception.s
@@ -0,0 +1,428 @@
+/*-
+ * Copyright (c) 1989, 1990 William F. Jolitz.
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/i386/exception.s,v 1.106 2003/11/03 22:08:52 jhb Exp $
+ */
+
+#include "opt_npx.h"
+
+#include <machine/asmacros.h>
+#include <machine/psl.h>
+#include <machine/trap.h>
+
+#include "assym.s"
+
+#define	SEL_RPL_MASK	0x0002
+/* Offsets into shared_info_t. */
+#define evtchn_upcall_pending /* 0 */
+#define evtchn_upcall_mask       1
+#define XEN_BLOCK_EVENTS(reg)     movb $1,evtchn_upcall_mask(reg)
+#define XEN_UNBLOCK_EVENTS(reg)   movb $0,evtchn_upcall_mask(reg)
+#define XEN_TEST_PENDING(reg)     testb $0x1,evtchn_upcall_pending(reg)
+	 
+	
+#define POPA \
+	popl %edi; \
+	popl %esi; \
+	popl %ebp; \
+	popl %ebx; \
+	popl %ebx; \
+	popl %edx; \
+	popl %ecx; \
+	popl %eax;
+
+	.text
+
+/*****************************************************************************/
+/* Trap handling                                                             */
+/*****************************************************************************/
+/*
+ * Trap and fault vector routines.
+ *
+ * Most traps are 'trap gates', SDT_SYS386TGT.  A trap gate pushes state on
+ * the stack that mostly looks like an interrupt, but does not disable 
+ * interrupts.  A few of the traps we are use are interrupt gates, 
+ * SDT_SYS386IGT, which are nearly the same thing except interrupts are
+ * disabled on entry.
+ *
+ * The cpu will push a certain amount of state onto the kernel stack for
+ * the current process.  The amount of state depends on the type of trap 
+ * and whether the trap crossed rings or not.  See i386/include/frame.h.  
+ * At the very least the current EFLAGS (status register, which includes 
+ * the interrupt disable state prior to the trap), the code segment register,
+ * and the return instruction pointer are pushed by the cpu.  The cpu 
+ * will also push an 'error' code for certain traps.  We push a dummy 
+ * error code for those traps where the cpu doesn't in order to maintain 
+ * a consistent frame.  We also push a contrived 'trap number'.
+ *
+ * The cpu does not push the general registers, we must do that, and we 
+ * must restore them prior to calling 'iret'.  The cpu adjusts the %cs and
+ * %ss segment registers, but does not mess with %ds, %es, or %fs.  Thus we
+ * must load them with appropriate values for supervisor mode operation.
+ */
+
+MCOUNT_LABEL(user)
+MCOUNT_LABEL(btrap)
+
+IDTVEC(div)
+	pushl $0; pushl $0; TRAP(T_DIVIDE)
+IDTVEC(dbg)
+	pushl $0; pushl $0; TRAP(T_TRCTRAP)
+IDTVEC(nmi)
+	pushl $0; pushl $0; TRAP(T_NMI)
+IDTVEC(bpt)
+	pushl $0; pushl $0; TRAP(T_BPTFLT)
+IDTVEC(ofl)
+	pushl $0; pushl $0; TRAP(T_OFLOW)
+IDTVEC(bnd)
+	pushl $0; pushl $0; TRAP(T_BOUND)
+IDTVEC(ill)
+	pushl $0; pushl $0; TRAP(T_PRIVINFLT)
+IDTVEC(dna)
+	pushl $0; pushl $0; TRAP(T_DNA)
+IDTVEC(fpusegm)
+	pushl $0; pushl $0; TRAP(T_FPOPFLT)
+IDTVEC(tss)
+	pushl $0; TRAP(T_TSSFLT)
+IDTVEC(missing)
+	pushl $0; TRAP(T_SEGNPFLT)
+IDTVEC(stk)
+	pushl $0; TRAP(T_STKFLT)
+IDTVEC(prot)
+	pushl $0; TRAP(T_PROTFLT)
+IDTVEC(page)
+	TRAP(T_PAGEFLT)
+IDTVEC(mchk)
+	pushl $0; pushl $0; TRAP(T_MCHK)
+IDTVEC(rsvd)
+	pushl $0; pushl $0; TRAP(T_RESERVED)
+IDTVEC(fpu)
+	pushl $0; pushl $0; TRAP(T_ARITHTRAP)
+IDTVEC(align)
+	pushl $0; TRAP(T_ALIGNFLT)
+
+IDTVEC(xmm)
+	pushl $0; pushl $0; TRAP(T_XMMFLT)
+
+IDTVEC(hypervisor_callback)
+	 pushl $T_HYPCALLBACK;  pushl %eax; TRAP(T_HYPCALLBACK)
+
+hypervisor_callback_pending:
+	movl	$T_HYPCALLBACK,TF_TRAPNO(%esp)
+	movl	$T_HYPCALLBACK,TF_ERR(%esp)
+	jmp	11f
+	
+	/*
+	 * alltraps entry point.  Interrupts are enabled if this was a trap
+	 * gate (TGT), else disabled if this was an interrupt gate (IGT).
+	 * Note that int0x80_syscall is a trap gate.  Only page faults
+	 * use an interrupt gate.
+	 */
+
+	SUPERALIGN_TEXT
+	.globl	alltraps
+	.type	alltraps,@function
+alltraps:
+	cld
+	pushal
+	pushl	%ds
+	pushl	%es
+	pushl	%fs
+alltraps_with_regs_pushed:
+	movl	$KDSEL,%eax
+	movl	%eax,%ds
+	movl	%eax,%es
+	movl	$KPSEL,%eax
+	movl	%eax,%fs
+	FAKE_MCOUNT(TF_EIP(%esp))
+calltrap:
+	movl	TF_EIP(%esp),%eax
+	cmpl	$scrit,%eax
+	jb	11f
+	cmpl	$ecrit,%eax
+	jb	critical_region_fixup
+11:	call	trap
+
+	/*
+	 * Return via doreti to handle ASTs.
+	 */
+	MEXITCOUNT
+	jmp	doreti
+
+/*
+ * SYSCALL CALL GATE (old entry point for a.out binaries)
+ *
+ * The intersegment call has been set up to specify one dummy parameter.
+ *
+ * This leaves a place to put eflags so that the call frame can be
+ * converted to a trap frame. Note that the eflags is (semi-)bogusly
+ * pushed into (what will be) tf_err and then copied later into the
+ * final spot. It has to be done this way because esp can't be just
+ * temporarily altered for the pushfl - an interrupt might come in
+ * and clobber the saved cs/eip.
+ */
+	SUPERALIGN_TEXT
+IDTVEC(lcall_syscall)
+	pushfl				/* save eflags */
+	popl	8(%esp)			/* shuffle into tf_eflags */
+	pushl	$7			/* sizeof "lcall 7,0" */
+	subl	$4,%esp			/* skip over tf_trapno */
+	pushal
+	pushl	%ds
+	pushl	%es
+	pushl	%fs
+	movl	$KDSEL,%eax		/* switch to kernel segments */
+	movl	%eax,%ds
+	movl	%eax,%es
+	movl	$KPSEL,%eax
+	movl	%eax,%fs
+	FAKE_MCOUNT(TF_EIP(%esp))
+	call	syscall
+	MEXITCOUNT
+	jmp	doreti
+
+/*
+ * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80)
+ *
+ * Even though the name says 'int0x80', this is actually a TGT (trap gate)
+ * rather then an IGT (interrupt gate).  Thus interrupts are enabled on
+ * entry just as they are for a normal syscall.
+ */
+	SUPERALIGN_TEXT
+IDTVEC(int0x80_syscall)
+	pushl	$2			/* sizeof "int 0x80" */
+	pushl	$0xCAFE
+	pushl	$0xDEAD
+	pushal
+	pushl	%ds
+	pushl	%es
+	pushl	%fs
+	movl	$KDSEL,%eax		/* switch to kernel segments */
+	movl	%eax,%ds
+	movl	%eax,%es
+	movl	$KPSEL,%eax
+	movl	%eax,%fs
+	FAKE_MCOUNT(TF_EIP(%esp))
+	call	syscall
+	MEXITCOUNT
+	jmp	doreti
+
+ENTRY(fork_trampoline)
+	pushl	%esp			/* trapframe pointer */
+	pushl	%ebx			/* arg1 */
+	pushl	%esi			/* function */
+	call	fork_exit
+	addl	$12,%esp               
+	/* cut from syscall */
+
+	/*
+	 * Return via doreti to handle ASTs.
+	 */
+	MEXITCOUNT
+	jmp	doreti
+
+
+/*
+# A note on the "critical region" in our callback handler.
+# We want to avoid stacking callback handlers due to events occurring
+# during handling of the last event. To do this, we keep events disabled
+# until weve done all processing. HOWEVER, we must enable events before
+# popping the stack frame (cant be done atomically) and so it would still
+# be possible to get enough handler activations to overflow the stack.
+# Although unlikely, bugs of that kind are hard to track down, so wed
+# like to avoid the possibility.
+# So, on entry to the handler we detect whether we interrupted an
+# existing activation in its critical region -- if so, we pop the current
+# activation and restart the handler using the previous one.
+*/
+
+
+/*
+ * void doreti(struct trapframe)
+ *
+ * Handle return from interrupts, traps and syscalls.
+ */
+	.text
+	SUPERALIGN_TEXT
+	.globl	doreti
+	.type	doreti,@function
+doreti:
+	FAKE_MCOUNT(bintr)		/* init "from" bintr -> doreti */	
+doreti_next:
+	testb	$SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */
+	jz	doreti_exit		  /* #can't handle ASTs now if not */
+
+doreti_ast:
+	/*
+	 * Check for ASTs atomically with returning.  Disabling CPU
+	 * interrupts provides sufficient locking even in the SMP case,
+	 * since we will be informed of any new ASTs by an IPI.
+	 */
+	
+	movl	HYPERVISOR_shared_info,%esi
+	XEN_BLOCK_EVENTS(%esi) 
+	movl	PCPU(CURTHREAD),%eax
+	testl	$TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%eax)
+	je	doreti_exit
+	XEN_UNBLOCK_EVENTS(%esi) 
+	pushl	%esp		/* pass a pointer to the trapframe */
+	call	ast
+	add	$4,%esp
+	jmp	doreti_ast
+
+doreti_exit:
+	/*
+	 * doreti_exit:	pop registers, iret.
+	 *
+	 *	The segment register pop is a special case, since it may
+	 *	fault if (for example) a sigreturn specifies bad segment
+	 *	registers.  The fault is handled in trap.c.
+	 */
+
+	movl	HYPERVISOR_shared_info,%esi
+	XEN_UNBLOCK_EVENTS(%esi) # reenable event callbacks (sti)
+
+	.globl	scrit
+scrit:
+	XEN_TEST_PENDING(%esi)
+        jnz	hypervisor_callback_pending	/* More to go  */
+	MEXITCOUNT
+
+	.globl	doreti_popl_fs
+doreti_popl_fs:
+	popl	%fs
+	.globl	doreti_popl_es
+doreti_popl_es:
+	popl	%es
+	.globl	doreti_popl_ds
+doreti_popl_ds:
+	popl	%ds
+	POPA
+	addl	$12,%esp
+	.globl	doreti_iret
+doreti_iret:
+	iret
+	.globl	ecrit
+ecrit:
+
+	/*
+	 * doreti_iret_fault and friends.  Alternative return code for
+	 * the case where we get a fault in the doreti_exit code
+	 * above.  trap() (i386/i386/trap.c) catches this specific
+	 * case, sends the process a signal and continues in the
+	 * corresponding place in the code below.
+	 */
+	ALIGN_TEXT
+	.globl	doreti_iret_fault
+doreti_iret_fault:
+	subl	$12,%esp
+	pushal
+	pushl	%ds
+	.globl	doreti_popl_ds_fault
+doreti_popl_ds_fault:
+	pushl	%es
+	.globl	doreti_popl_es_fault
+doreti_popl_es_fault:
+	pushl	%fs
+	.globl	doreti_popl_fs_fault
+doreti_popl_fs_fault:
+	movl	$0,TF_ERR(%esp)	/* XXX should be the error code */
+	movl	$T_PROTFLT,TF_TRAPNO(%esp)
+	jmp	alltraps_with_regs_pushed
+
+
+
+
+/*
+# [How we do the fixup]. We want to merge the current stack frame with the
+# just-interrupted frame. How we do this depends on where in the critical
+# region the interrupted handler was executing, and so how many saved
+# registers are in each frame. We do this quickly using the lookup table
+# 'critical_fixup_table'. For each byte offset in the critical region, it
+# provides the number of bytes which have already been popped from the
+# interrupted stack frame.
+*/
+
+.globl critical_region_fixup
+critical_region_fixup:
+	addl $critical_fixup_table-scrit,%eax
+	movzbl (%eax),%eax    # %eax contains num bytes popped
+        movl  %esp,%esi
+        add  %eax,%esi        # %esi points at end of src region
+        movl  %esp,%edi
+        add  $0x44,%edi       # %edi points at end of dst region
+        movl  %eax,%ecx
+        shr  $2,%ecx          # convert bytes to words
+        je   16f              # skip loop if nothing to copy
+15:     subl $4,%esi          # pre-decrementing copy loop
+        subl $4,%edi
+        movl (%esi),%eax
+        movl %eax,(%edi)
+        loop 15b
+16:     movl %edi,%esp        # final %edi is top of merged stack
+	jmp  hypervisor_callback_pending
+
+
+critical_fixup_table:        
+.byte   0x0,0x0,0x0			#testb  $0x1,(%esi)
+.byte   0x0,0x0,0x0,0x0,0x0,0x0		#jne    ea 
+.byte   0x0,0x0				#pop    %fs
+.byte   0x04				#pop    %es
+.byte   0x08				#pop    %ds
+.byte   0x0c				#pop    %edi
+.byte   0x10	                        #pop    %esi
+.byte   0x14	                        #pop    %ebp
+.byte   0x18	                        #pop    %ebx
+.byte   0x1c	                        #pop    %ebx
+.byte   0x20	                        #pop    %edx
+.byte   0x24	                        #pop    %ecx
+.byte   0x28	                        #pop    %eax
+.byte   0x2c,0x2c,0x2c                  #add    $0xc,%esp
+.byte   0x38	                        #iret   
+
+	
+/* # Hypervisor uses this for application faults while it executes.*/
+ENTRY(failsafe_callback)
+	pushal
+	call xen_failsafe_handler
+/*#	call install_safe_pf_handler */
+        movl 32(%esp),%ebx
+1:      movl %ebx,%ds
+        movl 36(%esp),%ebx
+2:      movl %ebx,%es
+        movl 40(%esp),%ebx
+3:      movl %ebx,%fs
+        movl 44(%esp),%ebx
+4:      movl %ebx,%gs
+/*#        call install_normal_pf_handler */
+	popal
+	addl $16,%esp
+	iret
+
+
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/genassym.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/genassym.c
new file mode 100644
index 0000000000..1e9df732c7
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/genassym.c
@@ -0,0 +1,234 @@
+/*-
+ * Copyright (c) 1982, 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)genassym.c	5.11 (Berkeley) 5/10/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/genassym.c,v 1.146 2003/11/12 18:14:34 jhb Exp $");
+
+#include "opt_apic.h"
+#include "opt_compat.h"
+#include "opt_kstack_pages.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/assym.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/socket.h>
+#include <sys/resourcevar.h>
+#include <sys/ucontext.h>
+#include <sys/user.h>
+#include <machine/bootinfo.h>
+#include <machine/tss.h>
+#include <sys/vmmeter.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <sys/user.h>
+#include <sys/proc.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <nfs/nfsproto.h>
+#include <nfs/rpcv2.h>
+#include <nfsclient/nfs.h>
+#include <nfsclient/nfsdiskless.h>
+#ifdef DEV_APIC
+#include <machine/apicreg.h>
+#endif
+#include <machine/cpu.h>
+#include <machine/sigframe.h>
+#include <machine/proc.h>
+
+ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
+ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
+ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
+ASSYM(P_SFLAG, offsetof(struct proc, p_sflag));
+ASSYM(P_UAREA, offsetof(struct proc, p_uarea));
+
+ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
+ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
+ASSYM(TD_PROC, offsetof(struct thread, td_proc));
+ASSYM(TD_MD, offsetof(struct thread, td_md));
+
+ASSYM(P_MD, offsetof(struct proc, p_md));
+ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt));
+
+ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
+ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
+
+ASSYM(V_TRAP, offsetof(struct vmmeter, v_trap));
+ASSYM(V_SYSCALL, offsetof(struct vmmeter, v_syscall));
+ASSYM(V_INTR, offsetof(struct vmmeter, v_intr));
+/* ASSYM(UPAGES, UPAGES);*/
+ASSYM(UAREA_PAGES, UAREA_PAGES);
+ASSYM(KSTACK_PAGES, KSTACK_PAGES);
+ASSYM(PAGE_SIZE, PAGE_SIZE);
+ASSYM(NPTEPG, NPTEPG);
+ASSYM(NPDEPG, NPDEPG);
+ASSYM(NPDEPTD, NPDEPTD);
+ASSYM(NPGPTD, NPGPTD);
+ASSYM(PDESIZE, sizeof(pd_entry_t));
+ASSYM(PTESIZE, sizeof(pt_entry_t));
+ASSYM(PDESHIFT, PDESHIFT);
+ASSYM(PTESHIFT, PTESHIFT);
+ASSYM(PAGE_SHIFT, PAGE_SHIFT);
+ASSYM(PAGE_MASK, PAGE_MASK);
+ASSYM(PDRSHIFT, PDRSHIFT);
+ASSYM(PDRMASK, PDRMASK);
+ASSYM(USRSTACK, USRSTACK);
+ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS);
+ASSYM(KERNBASE, KERNBASE);
+ASSYM(KERNLOAD, KERNLOAD);
+ASSYM(MCLBYTES, MCLBYTES);
+ASSYM(PCB_CR3, offsetof(struct pcb, pcb_cr3));
+ASSYM(PCB_EDI, offsetof(struct pcb, pcb_edi));
+ASSYM(PCB_ESI, offsetof(struct pcb, pcb_esi));
+ASSYM(PCB_EBP, offsetof(struct pcb, pcb_ebp));
+ASSYM(PCB_ESP, offsetof(struct pcb, pcb_esp));
+ASSYM(PCB_EBX, offsetof(struct pcb, pcb_ebx));
+ASSYM(PCB_EIP, offsetof(struct pcb, pcb_eip));
+ASSYM(TSS_ESP0, offsetof(struct i386tss, tss_esp0));
+
+ASSYM(PCB_GS, offsetof(struct pcb, pcb_gs));
+ASSYM(PCB_DR0, offsetof(struct pcb, pcb_dr0));
+ASSYM(PCB_DR1, offsetof(struct pcb, pcb_dr1));
+ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2));
+ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3));
+ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6));
+ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
+ASSYM(PCB_PSL, offsetof(struct pcb, pcb_psl));
+ASSYM(PCB_DBREGS, PCB_DBREGS);
+ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
+
+ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare));
+ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
+ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
+ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu));
+ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
+ASSYM(PCB_SWITCHOUT, offsetof(struct pcb, pcb_switchout));
+
+ASSYM(PCB_SIZE, sizeof(struct pcb));
+
+ASSYM(TF_TRAPNO, offsetof(struct trapframe, tf_trapno));
+ASSYM(TF_ERR, offsetof(struct trapframe, tf_err));
+ASSYM(TF_CS, offsetof(struct trapframe, tf_cs));
+ASSYM(TF_EFLAGS, offsetof(struct trapframe, tf_eflags));
+ASSYM(TF_EIP, offsetof(struct trapframe, tf_eip));
+ASSYM(SIGF_HANDLER, offsetof(struct sigframe, sf_ahu.sf_handler));
+#ifdef COMPAT_43
+ASSYM(SIGF_SC, offsetof(struct osigframe, sf_siginfo.si_sc));
+#endif
+ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc));
+#ifdef COMPAT_FREEBSD4
+ASSYM(SIGF_UC4, offsetof(struct sigframe4, sf_uc));
+#endif
+#ifdef COMPAT_43
+ASSYM(SC_PS, offsetof(struct osigcontext, sc_ps));
+ASSYM(SC_FS, offsetof(struct osigcontext, sc_fs));
+ASSYM(SC_GS, offsetof(struct osigcontext, sc_gs));
+ASSYM(SC_TRAPNO, offsetof(struct osigcontext, sc_trapno));
+#endif
+#ifdef COMPAT_FREEBSD4
+ASSYM(UC4_EFLAGS, offsetof(struct ucontext4, uc_mcontext.mc_eflags));
+ASSYM(UC4_GS, offsetof(struct ucontext4, uc_mcontext.mc_gs));
+#endif
+ASSYM(UC_EFLAGS, offsetof(ucontext_t, uc_mcontext.mc_eflags));
+ASSYM(UC_GS, offsetof(ucontext_t, uc_mcontext.mc_gs));
+ASSYM(ENOENT, ENOENT);
+ASSYM(EFAULT, EFAULT);
+ASSYM(ENAMETOOLONG, ENAMETOOLONG);
+ASSYM(MAXCOMLEN, MAXCOMLEN);
+ASSYM(MAXPATHLEN, MAXPATHLEN);
+ASSYM(BOOTINFO_SIZE, sizeof(struct bootinfo));
+ASSYM(BI_VERSION, offsetof(struct bootinfo, bi_version));
+ASSYM(BI_KERNELNAME, offsetof(struct bootinfo, bi_kernelname));
+ASSYM(BI_NFS_DISKLESS, offsetof(struct bootinfo, bi_nfs_diskless));
+ASSYM(BI_ENDCOMMON, offsetof(struct bootinfo, bi_endcommon));
+ASSYM(NFSDISKLESS_SIZE, sizeof(struct nfs_diskless));
+ASSYM(BI_SIZE, offsetof(struct bootinfo, bi_size));
+ASSYM(BI_SYMTAB, offsetof(struct bootinfo, bi_symtab));
+ASSYM(BI_ESYMTAB, offsetof(struct bootinfo, bi_esymtab));
+ASSYM(BI_KERNEND, offsetof(struct bootinfo, bi_kernend));
+ASSYM(PC_SIZEOF, sizeof(struct pcpu));
+ASSYM(PC_PRVSPACE, offsetof(struct pcpu, pc_prvspace));
+ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread));
+ASSYM(PC_FPCURTHREAD, offsetof(struct pcpu, pc_fpcurthread));
+ASSYM(PC_IDLETHREAD, offsetof(struct pcpu, pc_idlethread));
+ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb));
+ASSYM(PC_COMMON_TSS, offsetof(struct pcpu, pc_common_tss));
+ASSYM(PC_COMMON_TSSD, offsetof(struct pcpu, pc_common_tssd));
+ASSYM(PC_TSS_GDT, offsetof(struct pcpu, pc_tss_gdt));
+ASSYM(PC_CURRENTLDT, offsetof(struct pcpu, pc_currentldt));
+ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid));
+ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap));
+ASSYM(PC_TRAP_NESTING, offsetof(struct pcpu, pc_trap_nesting));
+
+ASSYM(PC_CR3, offsetof(struct pcpu, pc_pdir));
+
+#ifdef DEV_APIC
+ASSYM(LA_VER, offsetof(struct LAPIC, version));
+ASSYM(LA_TPR, offsetof(struct LAPIC, tpr));
+ASSYM(LA_EOI, offsetof(struct LAPIC, eoi));
+ASSYM(LA_SVR, offsetof(struct LAPIC, svr));
+ASSYM(LA_ICR_LO, offsetof(struct LAPIC, icr_lo));
+ASSYM(LA_ICR_HI, offsetof(struct LAPIC, icr_hi));
+ASSYM(LA_ISR, offsetof(struct LAPIC, isr0));
+#endif
+
+ASSYM(KCSEL, GSEL(GCODE_SEL, SEL_KPL));
+ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL));
+ASSYM(KPSEL, GSEL(GPRIV_SEL, SEL_KPL));
+
+ASSYM(BC32SEL, GSEL(GBIOSCODE32_SEL, SEL_KPL));
+ASSYM(GPROC0_SEL, GPROC0_SEL);
+
+ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock));
+ASSYM(MTX_RECURSECNT, offsetof(struct mtx, mtx_recurse));
+
+#ifdef PC98
+#include <machine/bus.h>
+
+ASSYM(BUS_SPACE_HANDLE_BASE, offsetof(struct bus_space_handle, bsh_base));
+ASSYM(BUS_SPACE_HANDLE_IAT, offsetof(struct bus_space_handle, bsh_iat));
+#endif
+
+ASSYM(HYPERVISOR_STACK_SWITCH, __HYPERVISOR_stack_switch);
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/hypervisor.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/hypervisor.c
new file mode 100644
index 0000000000..df9568c7d1
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/hypervisor.c
@@ -0,0 +1,107 @@
+/******************************************************************************
+ * hypervisor.c
+ * 
+ * Communication to/from hypervisor.
+ * 
+ * Copyright (c) 2002-2003, K A Fraser
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIEAS OF MERCHANTABILITY, 
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/xenvar.h>
+#include <machine/multicall.h>
+
+/* XXX need to verify what the caller save registers are on x86 KMM */
+#define CALLER_SAVE __asm__("pushal; ")
+#define CALLER_RESTORE __asm__("popal;")
+
+
+/* ni == non-inline - these are only intended for use from assembler
+ * no reason to have them in a header - 
+ *
+ */
+void ni_queue_multicall0(unsigned long op); 
+void ni_queue_multicall1(unsigned long op, unsigned long arg1); 
+void ni_queue_multicall2(unsigned long op, unsigned long arg1,
+			 unsigned long arg2); 
+void ni_queue_multicall3(unsigned long op, unsigned long arg1,
+			 unsigned long arg2, unsigned long arg3); 
+void ni_queue_multicall4(unsigned long op, unsigned long arg1,
+			 unsigned long arg2, unsigned long arg4,
+			 unsigned long arg5); 
+
+void ni_execute_multicall_list(void);
+
+multicall_entry_t multicall_list[MAX_MULTICALL_ENTS];
+int nr_multicall_ents = 0;
+
+
+void 
+ni_queue_multicall0(unsigned long op) 
+{
+    CALLER_SAVE;
+    queue_multicall0(op);
+    CALLER_RESTORE;
+}
+
+void 
+ni_queue_multicall1(unsigned long op, unsigned long arg1) 
+{
+    CALLER_SAVE;
+    queue_multicall1(op, arg1);
+    CALLER_RESTORE;
+}
+
+void 
+ni_queue_multicall2(unsigned long op, unsigned long arg1, 
+		    unsigned long arg2) 
+{
+    CALLER_SAVE;
+    queue_multicall2(op, arg1, arg2);
+    CALLER_RESTORE;
+}
+
+void 
+ni_queue_multicall3(unsigned long op, unsigned long arg1, 
+		    unsigned long arg2, unsigned long arg3) 
+{
+    CALLER_SAVE;
+    queue_multicall3(op, arg1, arg2, arg3);
+    CALLER_RESTORE;
+}
+
+void 
+ni_queue_multicall4(unsigned long op, unsigned long arg1,
+		    unsigned long arg2, unsigned long arg3,
+		    unsigned long arg4) 
+{
+    CALLER_SAVE;    
+    queue_multicall4(op, arg1, arg2, arg3, arg4);
+    CALLER_RESTORE;
+}
+
+void
+ni_execute_multicall_list(void)
+{
+    CALLER_SAVE;
+    execute_multicall_list();
+    CALLER_RESTORE;
+}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/i686_mem.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/i686_mem.c
new file mode 100644
index 0000000000..fe21232f7a
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/i686_mem.c
@@ -0,0 +1,626 @@
+/*-
+ * Copyright (c) 1999 Michael Smith <msmith@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/i686_mem.c,v 1.23 2003/10/21 18:28:34 silby Exp $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/memrange.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+
+/*
+ * i686 memory range operations
+ *
+ * This code will probably be impenetrable without reference to the
+ * Intel Pentium Pro documentation.
+ */
+
+static char *mem_owner_bios = "BIOS";
+
+#define MR686_FIXMTRR	(1<<0)
+
+#define mrwithin(mr, a) \
+    (((a) >= (mr)->mr_base) && ((a) < ((mr)->mr_base + (mr)->mr_len)))
+#define mroverlap(mra, mrb) \
+    (mrwithin(mra, mrb->mr_base) || mrwithin(mrb, mra->mr_base))
+
+#define mrvalid(base, len) 						\
+    ((!(base & ((1 << 12) - 1))) && 	/* base is multiple of 4k */	\
+     ((len) >= (1 << 12)) && 		/* length is >= 4k */		\
+     powerof2((len)) && 		/* ... and power of two */	\
+     !((base) & ((len) - 1)))		/* range is not discontiuous */
+
+#define mrcopyflags(curr, new) (((curr) & ~MDF_ATTRMASK) | ((new) & MDF_ATTRMASK))
+
+static int			mtrrs_disabled;
+TUNABLE_INT("machdep.disable_mtrrs", &mtrrs_disabled);
+SYSCTL_INT(_machdep, OID_AUTO, disable_mtrrs, CTLFLAG_RDTUN,
+	&mtrrs_disabled, 0, "Disable i686 MTRRs.");
+
+static void			i686_mrinit(struct mem_range_softc *sc);
+static int			i686_mrset(struct mem_range_softc *sc,
+					   struct mem_range_desc *mrd,
+					   int *arg);
+static void			i686_mrAPinit(struct mem_range_softc *sc);
+
+static struct mem_range_ops i686_mrops = {
+    i686_mrinit,
+    i686_mrset,
+    i686_mrAPinit
+};
+
+/* XXX for AP startup hook */
+static u_int64_t		mtrrcap, mtrrdef;
+
+static struct mem_range_desc	*mem_range_match(struct mem_range_softc *sc,
+						 struct mem_range_desc *mrd);
+static void			i686_mrfetch(struct mem_range_softc *sc);
+static int			i686_mtrrtype(int flags);
+#if 0
+static int			i686_mrt2mtrr(int flags, int oldval);
+#endif
+static int			i686_mtrrconflict(int flag1, int flag2);
+static void			i686_mrstore(struct mem_range_softc *sc);
+static void			i686_mrstoreone(void *arg);
+static struct mem_range_desc	*i686_mtrrfixsearch(struct mem_range_softc *sc,
+						    u_int64_t addr);
+static int			i686_mrsetlow(struct mem_range_softc *sc,
+					      struct mem_range_desc *mrd,
+					      int *arg);
+static int			i686_mrsetvariable(struct mem_range_softc *sc,
+						   struct mem_range_desc *mrd,
+						   int *arg);
+
+/* i686 MTRR type to memory range type conversion */
+static int i686_mtrrtomrt[] = {
+    MDF_UNCACHEABLE,
+    MDF_WRITECOMBINE,
+    MDF_UNKNOWN,
+    MDF_UNKNOWN,
+    MDF_WRITETHROUGH,
+    MDF_WRITEPROTECT,
+    MDF_WRITEBACK
+};
+
+#define MTRRTOMRTLEN (sizeof(i686_mtrrtomrt) / sizeof(i686_mtrrtomrt[0]))
+
+static int
+i686_mtrr2mrt(int val) {
+	if (val < 0 || val >= MTRRTOMRTLEN)
+		return MDF_UNKNOWN;
+	return i686_mtrrtomrt[val];
+}
+
+/* 
+ * i686 MTRR conflicts. Writeback and uncachable may overlap.
+ */
+static int
+i686_mtrrconflict(int flag1, int flag2) {
+	flag1 &= MDF_ATTRMASK;
+	flag2 &= MDF_ATTRMASK;
+	if (flag1 == flag2 ||
+	    (flag1 == MDF_WRITEBACK && flag2 == MDF_UNCACHEABLE) ||
+	    (flag2 == MDF_WRITEBACK && flag1 == MDF_UNCACHEABLE))
+		return 0;
+	return 1;
+}
+
+/*
+ * Look for an exactly-matching range.
+ */
+static struct mem_range_desc *
+mem_range_match(struct mem_range_softc *sc, struct mem_range_desc *mrd) 
+{
+    struct mem_range_desc	*cand;
+    int				i;
+	
+    for (i = 0, cand = sc->mr_desc; i < sc->mr_ndesc; i++, cand++)
+	if ((cand->mr_base == mrd->mr_base) &&
+	    (cand->mr_len == mrd->mr_len))
+	    return(cand);
+    return(NULL);
+}
+
+/*
+ * Fetch the current mtrr settings from the current CPU (assumed to all
+ * be in sync in the SMP case).  Note that if we are here, we assume
+ * that MTRRs are enabled, and we may or may not have fixed MTRRs.
+ */
+static void
+i686_mrfetch(struct mem_range_softc *sc)
+{
+    struct mem_range_desc	*mrd;
+    u_int64_t			msrv;
+    int				i, j, msr;
+
+    mrd = sc->mr_desc;
+
+    /* Get fixed-range MTRRs */
+    if (sc->mr_cap & MR686_FIXMTRR) {
+	msr = MSR_MTRR64kBase;
+	for (i = 0; i < (MTRR_N64K / 8); i++, msr++) {
+	    msrv = rdmsr(msr);
+	    for (j = 0; j < 8; j++, mrd++) {
+		mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) |
+		    i686_mtrr2mrt(msrv & 0xff) |
+		    MDF_ACTIVE;
+		if (mrd->mr_owner[0] == 0)
+		    strcpy(mrd->mr_owner, mem_owner_bios);
+		msrv = msrv >> 8;
+	    }
+	}
+	msr = MSR_MTRR16kBase;
+	for (i = 0; i < (MTRR_N16K / 8); i++, msr++) {
+	    msrv = rdmsr(msr);
+	    for (j = 0; j < 8; j++, mrd++) {
+		mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) |
+		    i686_mtrr2mrt(msrv & 0xff) |
+		    MDF_ACTIVE;
+		if (mrd->mr_owner[0] == 0)
+		    strcpy(mrd->mr_owner, mem_owner_bios);
+		msrv = msrv >> 8;
+	    }
+	}
+	msr = MSR_MTRR4kBase;
+	for (i = 0; i < (MTRR_N4K / 8); i++, msr++) {
+	    msrv = rdmsr(msr);
+	    for (j = 0; j < 8; j++, mrd++) {
+		mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) |
+		    i686_mtrr2mrt(msrv & 0xff) |
+		    MDF_ACTIVE;
+		if (mrd->mr_owner[0] == 0)
+		    strcpy(mrd->mr_owner, mem_owner_bios);
+		msrv = msrv >> 8;
+	    }
+	}
+    }
+
+    /* Get remainder which must be variable MTRRs */
+    msr = MSR_MTRRVarBase;
+    for (; (mrd - sc->mr_desc) < sc->mr_ndesc; msr += 2, mrd++) {
+	msrv = rdmsr(msr);
+	mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) |
+	    i686_mtrr2mrt(msrv & 0xff);
+	mrd->mr_base = msrv & 0x0000000ffffff000LL;
+	msrv = rdmsr(msr + 1);
+	mrd->mr_flags = (msrv & 0x800) ? 
+	    (mrd->mr_flags | MDF_ACTIVE) :
+	    (mrd->mr_flags & ~MDF_ACTIVE);
+	/* Compute the range from the mask. Ick. */
+	mrd->mr_len = (~(msrv & 0x0000000ffffff000LL) & 0x0000000fffffffffLL) + 1;
+	if (!mrvalid(mrd->mr_base, mrd->mr_len))
+	    mrd->mr_flags |= MDF_BOGUS;
+	/* If unclaimed and active, must be the BIOS */
+	if ((mrd->mr_flags & MDF_ACTIVE) && (mrd->mr_owner[0] == 0))
+	    strcpy(mrd->mr_owner, mem_owner_bios);
+    }
+}
+
+/*
+ * Return the MTRR memory type matching a region's flags
+ */
+static int
+i686_mtrrtype(int flags)
+{
+    int		i;
+
+    flags &= MDF_ATTRMASK;
+
+    for (i = 0; i < MTRRTOMRTLEN; i++) {
+	if (i686_mtrrtomrt[i] == MDF_UNKNOWN)
+	    continue;
+	if (flags == i686_mtrrtomrt[i])
+	    return(i);
+    }
+    return(-1);
+}
+#if 0
+static int
+i686_mrt2mtrr(int flags, int oldval)
+{
+	int val;
+
+	if ((val = i686_mtrrtype(flags)) == -1)
+		return oldval & 0xff;
+	return val & 0xff;
+}
+#endif
+/*
+ * Update running CPU(s) MTRRs to match the ranges in the descriptor
+ * list.
+ *
+ * XXX Must be called with interrupts enabled.
+ */
+static void
+i686_mrstore(struct mem_range_softc *sc)
+{
+#ifdef SMP
+    /*
+     * We should use ipi_all_but_self() to call other CPUs into a 
+     * locking gate, then call a target function to do this work.
+     * The "proper" solution involves a generalised locking gate
+     * implementation, not ready yet.
+     */
+    smp_rendezvous(NULL, i686_mrstoreone, NULL, (void *)sc);
+#else
+    disable_intr();				/* disable interrupts */
+    i686_mrstoreone((void *)sc);
+    enable_intr();
+#endif
+}
+
+/*
+ * Update the current CPU's MTRRs with those represented in the
+ * descriptor list.  Note that we do this wholesale rather than
+ * just stuffing one entry; this is simpler (but slower, of course).
+ */
+static void
+i686_mrstoreone(void *arg)
+{
+#if 0
+    struct mem_range_softc 	*sc = (struct mem_range_softc *)arg;
+    struct mem_range_desc	*mrd;
+    u_int64_t			omsrv, msrv;
+    int				i, j, msr;
+    u_int			cr4save;
+
+    mrd = sc->mr_desc;
+
+    cr4save = rcr4();				/* save cr4 */
+    if (cr4save & CR4_PGE)
+	load_cr4(cr4save & ~CR4_PGE);
+    load_cr0((rcr0() & ~CR0_NW) | CR0_CD);	/* disable caches (CD = 1, NW = 0) */
+    wbinvd();					/* flush caches, TLBs */
+    wrmsr(MSR_MTRRdefType, rdmsr(MSR_MTRRdefType) & ~0x800);	/* disable MTRRs (E = 0) */
+
+    /* Set fixed-range MTRRs */
+    if (sc->mr_cap & MR686_FIXMTRR) {
+	msr = MSR_MTRR64kBase;
+	for (i = 0; i < (MTRR_N64K / 8); i++, msr++) {
+	    msrv = 0;
+	    omsrv = rdmsr(msr);
+	    for (j = 7; j >= 0; j--) {
+		msrv = msrv << 8;
+		msrv |= i686_mrt2mtrr((mrd + j)->mr_flags, omsrv >> (j*8));
+	    }
+	    wrmsr(msr, msrv);
+	    mrd += 8;
+	}
+	msr = MSR_MTRR16kBase;
+	for (i = 0; i < (MTRR_N16K / 8); i++, msr++) {
+	    msrv = 0;
+	    omsrv = rdmsr(msr);
+	    for (j = 7; j >= 0; j--) {
+		msrv = msrv << 8;
+		msrv |= i686_mrt2mtrr((mrd + j)->mr_flags, omsrv >> (j*8));
+	    }
+	    wrmsr(msr, msrv);
+	    mrd += 8;
+	}
+	msr = MSR_MTRR4kBase;
+	for (i = 0; i < (MTRR_N4K / 8); i++, msr++) {
+	    msrv = 0;
+	    omsrv = rdmsr(msr);
+	    for (j = 7; j >= 0; j--) {
+		msrv = msrv << 8;
+		msrv |= i686_mrt2mtrr((mrd + j)->mr_flags, omsrv >> (j*8));
+	    }
+	    wrmsr(msr, msrv);
+	    mrd += 8;
+	}
+    }
+
+    /* Set remainder which must be variable MTRRs */
+    msr = MSR_MTRRVarBase;
+    for (; (mrd - sc->mr_desc) < sc->mr_ndesc; msr += 2, mrd++) {
+	/* base/type register */
+	omsrv = rdmsr(msr);
+	if (mrd->mr_flags & MDF_ACTIVE) {
+	    msrv = mrd->mr_base & 0x0000000ffffff000LL;
+	    msrv |= i686_mrt2mtrr(mrd->mr_flags, omsrv);
+	} else {
+	    msrv = 0;
+	}
+	wrmsr(msr, msrv);	
+	    
+	/* mask/active register */
+	if (mrd->mr_flags & MDF_ACTIVE) {
+	    msrv = 0x800 | (~(mrd->mr_len - 1) & 0x0000000ffffff000LL);
+	} else {
+	    msrv = 0;
+	}
+	wrmsr(msr + 1, msrv);
+    }
+    wbinvd();							/* flush caches, TLBs */
+    wrmsr(MSR_MTRRdefType, rdmsr(MSR_MTRRdefType) | 0x800);	/* restore MTRR state */
+    load_cr0(rcr0() & ~(CR0_CD | CR0_NW));  			/* enable caches CD = 0 and NW = 0 */
+    load_cr4(cr4save);						/* restore cr4 */
+#endif
+}
+
+/*
+ * Hunt for the fixed MTRR referencing (addr)
+ */
+static struct mem_range_desc *
+i686_mtrrfixsearch(struct mem_range_softc *sc, u_int64_t addr)
+{
+    struct mem_range_desc *mrd;
+    int			i;
+    
+    for (i = 0, mrd = sc->mr_desc; i < (MTRR_N64K + MTRR_N16K + MTRR_N4K); i++, mrd++)
+	if ((addr >= mrd->mr_base) && (addr < (mrd->mr_base + mrd->mr_len)))
+	    return(mrd);
+    return(NULL);
+}
+
+/*
+ * Try to satisfy the given range request by manipulating the fixed MTRRs that
+ * cover low memory.
+ *
+ * Note that we try to be generous here; we'll bloat the range out to the 
+ * next higher/lower boundary to avoid the consumer having to know too much
+ * about the mechanisms here.
+ *
+ * XXX note that this will have to be updated when we start supporting "busy" ranges.
+ */
+static int
+i686_mrsetlow(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg)
+{
+    struct mem_range_desc	*first_md, *last_md, *curr_md;
+
+    /* range check */
+    if (((first_md = i686_mtrrfixsearch(sc, mrd->mr_base)) == NULL) ||
+	((last_md = i686_mtrrfixsearch(sc, mrd->mr_base + mrd->mr_len - 1)) == NULL))
+	return(EINVAL);
+
+    /* check we aren't doing something risky */
+    if (!(mrd->mr_flags & MDF_FORCE))
+	for (curr_md = first_md; curr_md <= last_md; curr_md++) {
+	    if ((curr_md->mr_flags & MDF_ATTRMASK) == MDF_UNKNOWN)
+		return (EACCES);
+	}
+
+    /* set flags, clear set-by-firmware flag */
+    for (curr_md = first_md; curr_md <= last_md; curr_md++) {
+	curr_md->mr_flags = mrcopyflags(curr_md->mr_flags & ~MDF_FIRMWARE, mrd->mr_flags);
+	bcopy(mrd->mr_owner, curr_md->mr_owner, sizeof(mrd->mr_owner));
+    }
+
+    return(0);
+}
+
+
+/*
+ * Modify/add a variable MTRR to satisfy the request.
+ *
+ * XXX needs to be updated to properly support "busy" ranges.
+ */
+static int
+i686_mrsetvariable(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg)
+{
+    struct mem_range_desc	*curr_md, *free_md;
+    int				i;
+    
+    /* 
+     * Scan the currently active variable descriptors, look for 
+     * one we exactly match (straight takeover) and for possible
+     * accidental overlaps.
+     * Keep track of the first empty variable descriptor in case we
+     * can't perform a takeover.
+     */
+    i = (sc->mr_cap & MR686_FIXMTRR) ? MTRR_N64K + MTRR_N16K + MTRR_N4K : 0;
+    curr_md = sc->mr_desc + i;
+    free_md = NULL;
+    for (; i < sc->mr_ndesc; i++, curr_md++) {
+	if (curr_md->mr_flags & MDF_ACTIVE) {
+	    /* exact match? */
+	    if ((curr_md->mr_base == mrd->mr_base) &&
+		(curr_md->mr_len == mrd->mr_len)) {
+		/* whoops, owned by someone */
+		if (curr_md->mr_flags & MDF_BUSY)
+		    return(EBUSY);
+		/* check we aren't doing something risky */
+		if (!(mrd->mr_flags & MDF_FORCE) &&
+		  ((curr_md->mr_flags & MDF_ATTRMASK) == MDF_UNKNOWN))
+		    return (EACCES);
+		/* Ok, just hijack this entry */
+		free_md = curr_md;
+		break;
+	    }
+	    /* non-exact overlap ? */
+	    if (mroverlap(curr_md, mrd)) {
+		/* between conflicting region types? */
+		if (i686_mtrrconflict(curr_md->mr_flags, mrd->mr_flags))
+		    return(EINVAL);
+	    }
+	} else if (free_md == NULL) {
+	    free_md = curr_md;
+	}
+    }
+    /* got somewhere to put it? */
+    if (free_md == NULL)
+	return(ENOSPC);
+
+    /* Set up new descriptor */
+    free_md->mr_base = mrd->mr_base;
+    free_md->mr_len = mrd->mr_len;
+    free_md->mr_flags = mrcopyflags(MDF_ACTIVE, mrd->mr_flags);
+    bcopy(mrd->mr_owner, free_md->mr_owner, sizeof(mrd->mr_owner));
+    return(0);
+}
+
+/*
+ * Handle requests to set memory range attributes by manipulating MTRRs.
+ *
+ */
+static int
+i686_mrset(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg)
+{
+    struct mem_range_desc	*targ;
+    int				error = 0;
+
+    switch(*arg) {
+    case MEMRANGE_SET_UPDATE:
+	/* make sure that what's being asked for is even possible at all */
+	if (!mrvalid(mrd->mr_base, mrd->mr_len) ||
+	    i686_mtrrtype(mrd->mr_flags) == -1)
+	    return(EINVAL);
+
+#define FIXTOP	((MTRR_N64K * 0x10000) + (MTRR_N16K * 0x4000) + (MTRR_N4K * 0x1000))
+
+	/* are the "low memory" conditions applicable? */
+	if ((sc->mr_cap & MR686_FIXMTRR) &&
+	    ((mrd->mr_base + mrd->mr_len) <= FIXTOP)) {
+	    if ((error = i686_mrsetlow(sc, mrd, arg)) != 0)
+		return(error);
+	} else {
+	    /* it's time to play with variable MTRRs */
+	    if ((error = i686_mrsetvariable(sc, mrd, arg)) != 0)
+		return(error);
+	}
+	break;
+
+    case MEMRANGE_SET_REMOVE:
+	if ((targ = mem_range_match(sc, mrd)) == NULL)
+	    return(ENOENT);
+	if (targ->mr_flags & MDF_FIXACTIVE)
+	    return(EPERM);
+	if (targ->mr_flags & MDF_BUSY)
+	    return(EBUSY);
+	targ->mr_flags &= ~MDF_ACTIVE;
+	targ->mr_owner[0] = 0;
+	break;
+
+    default:
+	return(EOPNOTSUPP);
+    }
+
+    /* update the hardware */
+    i686_mrstore(sc);
+    i686_mrfetch(sc);	/* refetch to see where we're at */
+    return(0);
+}
+
+/*
+ * Work out how many ranges we support, initialise storage for them, 
+ * fetch the initial settings.
+ */
+static void
+i686_mrinit(struct mem_range_softc *sc)
+{
+    struct mem_range_desc	*mrd;
+    int				nmdesc = 0;
+    int				i;
+
+    /* XXX */
+    return;
+
+    mtrrcap = rdmsr(MSR_MTRRcap);
+    mtrrdef = rdmsr(MSR_MTRRdefType);
+
+    /* For now, bail out if MTRRs are not enabled */
+    if (!(mtrrdef & 0x800)) {
+	if (bootverbose)
+	    printf("CPU supports MTRRs but not enabled\n");
+	return;
+    }
+    nmdesc = mtrrcap & 0xff;
+    printf("Pentium Pro MTRR support enabled\n");
+
+    /* If fixed MTRRs supported and enabled */
+    if ((mtrrcap & 0x100) && (mtrrdef & 0x400)) {
+	sc->mr_cap = MR686_FIXMTRR;
+	nmdesc += MTRR_N64K + MTRR_N16K + MTRR_N4K;
+    }
+
+    sc->mr_desc = 
+	(struct mem_range_desc *)malloc(nmdesc * sizeof(struct mem_range_desc), 
+					M_MEMDESC, M_WAITOK | M_ZERO);
+    sc->mr_ndesc = nmdesc;
+
+    mrd = sc->mr_desc;
+
+    /* Populate the fixed MTRR entries' base/length */
+    if (sc->mr_cap & MR686_FIXMTRR) {
+	for (i = 0; i < MTRR_N64K; i++, mrd++) {
+	    mrd->mr_base = i * 0x10000;
+	    mrd->mr_len = 0x10000;
+	    mrd->mr_flags = MDF_FIXBASE | MDF_FIXLEN | MDF_FIXACTIVE;
+	}
+	for (i = 0; i < MTRR_N16K; i++, mrd++) {
+	    mrd->mr_base = i * 0x4000 + 0x80000;
+	    mrd->mr_len = 0x4000;
+	    mrd->mr_flags = MDF_FIXBASE | MDF_FIXLEN | MDF_FIXACTIVE;
+	}
+	for (i = 0; i < MTRR_N4K; i++, mrd++) {
+	    mrd->mr_base = i * 0x1000 + 0xc0000;
+	    mrd->mr_len = 0x1000;
+	    mrd->mr_flags = MDF_FIXBASE | MDF_FIXLEN | MDF_FIXACTIVE;
+	}
+    }
+
+    /* 
+     * Get current settings, anything set now is considered to have 
+     * been set by the firmware. (XXX has something already played here?)
+     */
+    i686_mrfetch(sc);
+    mrd = sc->mr_desc;
+    for (i = 0; i < sc->mr_ndesc; i++, mrd++) {
+	if (mrd->mr_flags & MDF_ACTIVE)
+	    mrd->mr_flags |= MDF_FIRMWARE;
+    }
+}
+
+/*
+ * Initialise MTRRs on an AP after the BSP has run the init code.
+ */
+static void
+i686_mrAPinit(struct mem_range_softc *sc)
+{
+    i686_mrstoreone((void *)sc);	/* set MTRRs to match BSP */
+    wrmsr(MSR_MTRRdefType, mtrrdef);	/* set MTRR behaviour to match BSP */
+}
+
+static void
+i686_mem_drvinit(void *unused)
+{
+    /* Try for i686 MTRRs */
+    if (!mtrrs_disabled && (cpu_feature & CPUID_MTRR) &&
+	((cpu_id & 0xf00) == 0x600 || (cpu_id & 0xf00) == 0xf00) &&
+	((strcmp(cpu_vendor, "GenuineIntel") == 0) ||
+	(strcmp(cpu_vendor, "AuthenticAMD") == 0))) {
+	mem_range_softc.mr_op = &i686_mrops;
+    }
+}
+
+SYSINIT(i686memdev,SI_SUB_DRIVERS,SI_ORDER_FIRST,i686_mem_drvinit,NULL)
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/initcpu.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/initcpu.c
new file mode 100644
index 0000000000..0852fb98aa
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/initcpu.c
@@ -0,0 +1,889 @@
+/*-
+ * Copyright (c) KATO Takenori, 1997, 1998.
+ * 
+ * All rights reserved.  Unpublished rights reserved under the copyright
+ * laws of Japan.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer as
+ *    the first lines of this file unmodified.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/initcpu.c,v 1.49 2003/11/10 15:48:30 jhb Exp $");
+
+#include "opt_cpu.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+
+#include <machine/cputypes.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+
+#if !defined(CPU_ENABLE_SSE) && defined(I686_CPU)
+#define CPU_ENABLE_SSE
+#endif
+#if defined(CPU_DISABLE_SSE)
+#undef CPU_ENABLE_SSE
+#endif
+
+void initializecpu(void);
+#if defined(I586_CPU) && defined(CPU_WT_ALLOC)
+void	enable_K5_wt_alloc(void);
+void	enable_K6_wt_alloc(void);
+void	enable_K6_2_wt_alloc(void);
+#endif
+
+#ifdef I486_CPU
+static void init_5x86(void);
+static void init_bluelightning(void);
+static void init_486dlc(void);
+static void init_cy486dx(void);
+#ifdef CPU_I486_ON_386
+static void init_i486_on_386(void);
+#endif
+static void init_6x86(void);
+#endif /* I486_CPU */
+
+#ifdef I686_CPU
+static void	init_6x86MX(void);
+static void	init_ppro(void);
+static void	init_mendocino(void);
+#endif
+
+static int	hw_instruction_sse;
+SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD,
+    &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU");
+
+/* Must *NOT* be BSS or locore will bzero these after setting them */
+int	cpu = 0;		/* Are we 386, 386sx, 486, etc? */
+u_int	cpu_feature = 0;	/* Feature flags */
+u_int	cpu_high = 0;		/* Highest arg to CPUID */
+u_int	cpu_id = 0;		/* Stepping ID */
+u_int	cpu_procinfo = 0;	/* HyperThreading Info / Brand Index / CLFUSH */
+char	cpu_vendor[20] = "";	/* CPU Origin code */
+
+#ifdef CPU_ENABLE_SSE
+u_int	cpu_fxsr;		/* SSE enabled */
+#endif
+
+#ifdef I486_CPU
+/*
+ * IBM Blue Lightning
+ */
+static void
+init_bluelightning(void)
+{
+#if 0
+	u_long	eflags;
+
+#if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE)
+	need_post_dma_flush = 1;
+#endif
+
+	eflags = read_eflags();
+	disable_intr();
+
+	load_cr0(rcr0() | CR0_CD | CR0_NW);
+	invd();
+
+#ifdef CPU_BLUELIGHTNING_FPU_OP_CACHE
+	wrmsr(0x1000, 0x9c92LL);	/* FP operand can be cacheable on Cyrix FPU */
+#else
+	wrmsr(0x1000, 0x1c92LL);	/* Intel FPU */
+#endif
+	/* Enables 13MB and 0-640KB cache. */
+	wrmsr(0x1001, (0xd0LL << 32) | 0x3ff);
+#ifdef CPU_BLUELIGHTNING_3X
+	wrmsr(0x1002, 0x04000000LL);	/* Enables triple-clock mode. */
+#else
+	wrmsr(0x1002, 0x03000000LL);	/* Enables double-clock mode. */
+#endif
+
+	/* Enable caching in CR0. */
+	load_cr0(rcr0() & ~(CR0_CD | CR0_NW));	/* CD = 0 and NW = 0 */
+	invd();
+	write_eflags(eflags);
+#endif
+}
+
+/*
+ * Cyrix 486SLC/DLC/SR/DR series
+ */
+static void
+init_486dlc(void)
+{
+	u_long	eflags;
+	u_char	ccr0;
+
+	eflags = read_eflags();
+	disable_intr();
+	invd();
+
+	ccr0 = read_cyrix_reg(CCR0);
+#ifndef CYRIX_CACHE_WORKS
+	ccr0 |= CCR0_NC1 | CCR0_BARB;
+	write_cyrix_reg(CCR0, ccr0);
+	invd();
+#else
+	ccr0 &= ~CCR0_NC0;
+#ifndef CYRIX_CACHE_REALLY_WORKS
+	ccr0 |= CCR0_NC1 | CCR0_BARB;
+#else
+	ccr0 |= CCR0_NC1;
+#endif
+#ifdef CPU_DIRECT_MAPPED_CACHE
+	ccr0 |= CCR0_CO;			/* Direct mapped mode. */
+#endif
+	write_cyrix_reg(CCR0, ccr0);
+
+	/* Clear non-cacheable region. */
+	write_cyrix_reg(NCR1+2, NCR_SIZE_0K);
+	write_cyrix_reg(NCR2+2, NCR_SIZE_0K);
+	write_cyrix_reg(NCR3+2, NCR_SIZE_0K);
+	write_cyrix_reg(NCR4+2, NCR_SIZE_0K);
+
+	write_cyrix_reg(0, 0);	/* dummy write */
+
+	/* Enable caching in CR0. */
+	load_cr0(rcr0() & ~(CR0_CD | CR0_NW));	/* CD = 0 and NW = 0 */
+	invd();
+#endif /* !CYRIX_CACHE_WORKS */
+	write_eflags(eflags);
+}
+
+
+/*
+ * Cyrix 486S/DX series
+ */
+static void
+init_cy486dx(void)
+{
+	u_long	eflags;
+	u_char	ccr2;
+
+	eflags = read_eflags();
+	disable_intr();
+	invd();
+
+	ccr2 = read_cyrix_reg(CCR2);
+#ifdef CPU_SUSP_HLT
+	ccr2 |= CCR2_SUSP_HLT;
+#endif
+
+#ifdef PC98
+	/* Enables WB cache interface pin and Lock NW bit in CR0. */
+	ccr2 |= CCR2_WB | CCR2_LOCK_NW;
+	/* Unlock NW bit in CR0. */
+	write_cyrix_reg(CCR2, ccr2 & ~CCR2_LOCK_NW);
+	load_cr0((rcr0() & ~CR0_CD) | CR0_NW);	/* CD = 0, NW = 1 */
+#endif
+
+	write_cyrix_reg(CCR2, ccr2);
+	write_eflags(eflags);
+}
+
+
+/*
+ * Cyrix 5x86
+ */
+static void
+init_5x86(void)
+{
+	u_long	eflags;
+	u_char	ccr2, ccr3, ccr4, pcr0;
+
+	eflags = read_eflags();
+	disable_intr();
+
+	load_cr0(rcr0() | CR0_CD | CR0_NW);
+	wbinvd();
+
+	(void)read_cyrix_reg(CCR3);		/* dummy */
+
+	/* Initialize CCR2. */
+	ccr2 = read_cyrix_reg(CCR2);
+	ccr2 |= CCR2_WB;
+#ifdef CPU_SUSP_HLT
+	ccr2 |= CCR2_SUSP_HLT;
+#else
+	ccr2 &= ~CCR2_SUSP_HLT;
+#endif
+	ccr2 |= CCR2_WT1;
+	write_cyrix_reg(CCR2, ccr2);
+
+	/* Initialize CCR4. */
+	ccr3 = read_cyrix_reg(CCR3);
+	write_cyrix_reg(CCR3, CCR3_MAPEN0);
+
+	ccr4 = read_cyrix_reg(CCR4);
+	ccr4 |= CCR4_DTE;
+	ccr4 |= CCR4_MEM;
+#ifdef CPU_FASTER_5X86_FPU
+	ccr4 |= CCR4_FASTFPE;
+#else
+	ccr4 &= ~CCR4_FASTFPE;
+#endif
+	ccr4 &= ~CCR4_IOMASK;
+	/********************************************************************
+	 * WARNING: The "BIOS Writers Guide" mentions that I/O recovery time
+	 * should be 0 for errata fix.
+	 ********************************************************************/
+#ifdef CPU_IORT
+	ccr4 |= CPU_IORT & CCR4_IOMASK;
+#endif
+	write_cyrix_reg(CCR4, ccr4);
+
+	/* Initialize PCR0. */
+	/****************************************************************
+	 * WARNING: RSTK_EN and LOOP_EN could make your system unstable.
+	 * BTB_EN might make your system unstable.
+	 ****************************************************************/
+	pcr0 = read_cyrix_reg(PCR0);
+#ifdef CPU_RSTK_EN
+	pcr0 |= PCR0_RSTK;
+#else
+	pcr0 &= ~PCR0_RSTK;
+#endif
+#ifdef CPU_BTB_EN
+	pcr0 |= PCR0_BTB;
+#else
+	pcr0 &= ~PCR0_BTB;
+#endif
+#ifdef CPU_LOOP_EN
+	pcr0 |= PCR0_LOOP;
+#else
+	pcr0 &= ~PCR0_LOOP;
+#endif
+
+	/****************************************************************
+	 * WARNING: if you use a memory mapped I/O device, don't use
+	 * DISABLE_5X86_LSSER option, which may reorder memory mapped
+	 * I/O access.
+	 * IF YOUR MOTHERBOARD HAS PCI BUS, DON'T DISABLE LSSER.
+	 ****************************************************************/
+#ifdef CPU_DISABLE_5X86_LSSER
+	pcr0 &= ~PCR0_LSSER;
+#else
+	pcr0 |= PCR0_LSSER;
+#endif
+	write_cyrix_reg(PCR0, pcr0);
+
+	/* Restore CCR3. */
+	write_cyrix_reg(CCR3, ccr3);
+
+	(void)read_cyrix_reg(0x80);		/* dummy */
+
+	/* Unlock NW bit in CR0. */
+	write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_LOCK_NW);
+	load_cr0((rcr0() & ~CR0_CD) | CR0_NW);	/* CD = 0, NW = 1 */
+	/* Lock NW bit in CR0. */
+	write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW);
+
+	write_eflags(eflags);
+}
+
+#ifdef CPU_I486_ON_386
+/*
+ * There are i486 based upgrade products for i386 machines.
+ * In this case, BIOS doesn't enables CPU cache.
+ */
+static void
+init_i486_on_386(void)
+{
+	u_long	eflags;
+
+#if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE)
+	need_post_dma_flush = 1;
+#endif
+
+	eflags = read_eflags();
+	disable_intr();
+
+	load_cr0(rcr0() & ~(CR0_CD | CR0_NW));	/* CD = 0, NW = 0 */
+
+	write_eflags(eflags);
+}
+#endif
+
+/*
+ * Cyrix 6x86
+ *
+ * XXX - What should I do here?  Please let me know.
+ */
+static void
+init_6x86(void)
+{
+	u_long	eflags;
+	u_char	ccr3, ccr4;
+
+	eflags = read_eflags();
+	disable_intr();
+
+	load_cr0(rcr0() | CR0_CD | CR0_NW);
+	wbinvd();
+
+	/* Initialize CCR0. */
+	write_cyrix_reg(CCR0, read_cyrix_reg(CCR0) | CCR0_NC1);
+
+	/* Initialize CCR1. */
+#ifdef CPU_CYRIX_NO_LOCK
+	write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) | CCR1_NO_LOCK);
+#else
+	write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) & ~CCR1_NO_LOCK);
+#endif
+
+	/* Initialize CCR2. */
+#ifdef CPU_SUSP_HLT
+	write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_SUSP_HLT);
+#else
+	write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_SUSP_HLT);
+#endif
+
+	ccr3 = read_cyrix_reg(CCR3);
+	write_cyrix_reg(CCR3, CCR3_MAPEN0);
+
+	/* Initialize CCR4. */
+	ccr4 = read_cyrix_reg(CCR4);
+	ccr4 |= CCR4_DTE;
+	ccr4 &= ~CCR4_IOMASK;
+#ifdef CPU_IORT
+	write_cyrix_reg(CCR4, ccr4 | (CPU_IORT & CCR4_IOMASK));
+#else
+	write_cyrix_reg(CCR4, ccr4 | 7);
+#endif
+
+	/* Initialize CCR5. */
+#ifdef CPU_WT_ALLOC
+	write_cyrix_reg(CCR5, read_cyrix_reg(CCR5) | CCR5_WT_ALLOC);
+#endif
+
+	/* Restore CCR3. */
+	write_cyrix_reg(CCR3, ccr3);
+
+	/* Unlock NW bit in CR0. */
+	write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_LOCK_NW);
+
+	/*
+	 * Earlier revision of the 6x86 CPU could crash the system if
+	 * L1 cache is in write-back mode.
+	 */
+	if ((cyrix_did & 0xff00) > 0x1600)
+		load_cr0(rcr0() & ~(CR0_CD | CR0_NW));	/* CD = 0 and NW = 0 */
+	else {
+		/* Revision 2.6 and lower. */
+#ifdef CYRIX_CACHE_REALLY_WORKS
+		load_cr0(rcr0() & ~(CR0_CD | CR0_NW));	/* CD = 0 and NW = 0 */
+#else
+		load_cr0((rcr0() & ~CR0_CD) | CR0_NW);	/* CD = 0 and NW = 1 */
+#endif
+	}
+
+	/* Lock NW bit in CR0. */
+	write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW);
+
+	write_eflags(eflags);
+}
+#endif /* I486_CPU */
+
+#ifdef I686_CPU
+/*
+ * Cyrix 6x86MX (code-named M2)
+ *
+ * XXX - What should I do here?  Please let me know.
+ */
+static void
+init_6x86MX(void)
+{
+#if 0
+	u_long	eflags;
+	u_char	ccr3, ccr4;
+
+	eflags = read_eflags();
+	disable_intr();
+
+	load_cr0(rcr0() | CR0_CD | CR0_NW);
+	wbinvd();
+
+	/* Initialize CCR0. */
+	write_cyrix_reg(CCR0, read_cyrix_reg(CCR0) | CCR0_NC1);
+
+	/* Initialize CCR1. */
+#ifdef CPU_CYRIX_NO_LOCK
+	write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) | CCR1_NO_LOCK);
+#else
+	write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) & ~CCR1_NO_LOCK);
+#endif
+
+	/* Initialize CCR2. */
+#ifdef CPU_SUSP_HLT
+	write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_SUSP_HLT);
+#else
+	write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_SUSP_HLT);
+#endif
+
+	ccr3 = read_cyrix_reg(CCR3);
+	write_cyrix_reg(CCR3, CCR3_MAPEN0);
+
+	/* Initialize CCR4. */
+	ccr4 = read_cyrix_reg(CCR4);
+	ccr4 &= ~CCR4_IOMASK;
+#ifdef CPU_IORT
+	write_cyrix_reg(CCR4, ccr4 | (CPU_IORT & CCR4_IOMASK));
+#else
+	write_cyrix_reg(CCR4, ccr4 | 7);
+#endif
+
+	/* Initialize CCR5. */
+#ifdef CPU_WT_ALLOC
+	write_cyrix_reg(CCR5, read_cyrix_reg(CCR5) | CCR5_WT_ALLOC);
+#endif
+
+	/* Restore CCR3. */
+	write_cyrix_reg(CCR3, ccr3);
+
+	/* Unlock NW bit in CR0. */
+	write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_LOCK_NW);
+
+	load_cr0(rcr0() & ~(CR0_CD | CR0_NW));	/* CD = 0 and NW = 0 */
+
+	/* Lock NW bit in CR0. */
+	write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW);
+
+	write_eflags(eflags);
+#endif
+}
+
+static void
+init_ppro(void)
+{
+	u_int64_t	apicbase;
+
+	/*
+	 * Local APIC should be disabled if it is not going to be used.
+	 */
+	apicbase = rdmsr(MSR_APICBASE);
+	apicbase &= ~APICBASE_ENABLED;
+	wrmsr(MSR_APICBASE, apicbase);
+}
+
+/*
+ * Initialize BBL_CR_CTL3 (Control register 3: used to configure the
+ * L2 cache).
+ */
+static void
+init_mendocino(void)
+{
+#ifdef CPU_PPRO2CELERON
+	u_long	eflags;
+	u_int64_t	bbl_cr_ctl3;
+
+	eflags = read_eflags();
+	disable_intr();
+
+	load_cr0(rcr0() | CR0_CD | CR0_NW);
+	wbinvd();
+
+	bbl_cr_ctl3 = rdmsr(MSR_BBL_CR_CTL3);
+
+	/* If the L2 cache is configured, do nothing. */
+	if (!(bbl_cr_ctl3 & 1)) {
+		bbl_cr_ctl3 = 0x134052bLL;
+
+		/* Set L2 Cache Latency (Default: 5). */
+#ifdef	CPU_CELERON_L2_LATENCY
+#if CPU_L2_LATENCY > 15
+#error invalid CPU_L2_LATENCY.
+#endif
+		bbl_cr_ctl3 |= CPU_L2_LATENCY << 1;
+#else
+		bbl_cr_ctl3 |= 5 << 1;
+#endif
+		wrmsr(MSR_BBL_CR_CTL3, bbl_cr_ctl3);
+	}
+
+	load_cr0(rcr0() & ~(CR0_CD | CR0_NW));
+	write_eflags(eflags);
+#endif /* CPU_PPRO2CELERON */
+}
+
+#endif /* I686_CPU */
+
+/*
+ * Initialize CR4 (Control register 4) to enable SSE instructions.
+ */
+void
+enable_sse(void)
+{
+#ifdef XEN 
+    return;
+#endif
+#if defined(CPU_ENABLE_SSE)
+	if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
+		load_cr4(rcr4() | CR4_FXSR | CR4_XMM);
+		cpu_fxsr = hw_instruction_sse = 1;
+	}
+#endif
+}
+
+void
+initializecpu(void)
+{
+
+	switch (cpu) {
+#ifdef I486_CPU
+	case CPU_BLUE:
+		init_bluelightning();
+		break;
+	case CPU_486DLC:
+		init_486dlc();
+		break;
+	case CPU_CY486DX:
+		init_cy486dx();
+		break;
+	case CPU_M1SC:
+		init_5x86();
+		break;
+#ifdef CPU_I486_ON_386
+	case CPU_486:
+		init_i486_on_386();
+		break;
+#endif
+	case CPU_M1:
+		init_6x86();
+		break;
+#endif /* I486_CPU */
+#ifdef I686_CPU
+	case CPU_M2:
+		init_6x86MX();
+		break;
+	case CPU_686:
+		if (strcmp(cpu_vendor, "GenuineIntel") == 0) {
+			switch (cpu_id & 0xff0) {
+			case 0x610:
+				init_ppro();
+				break;
+			case 0x660:
+				init_mendocino();
+				break;
+			}
+		} else if (strcmp(cpu_vendor, "AuthenticAMD") == 0) {
+#if defined(I686_CPU) && defined(CPU_ATHLON_SSE_HACK)
+			/*
+			 * Sometimes the BIOS doesn't enable SSE instructions.
+			 * According to AMD document 20734, the mobile
+			 * Duron, the (mobile) Athlon 4 and the Athlon MP
+			 * support SSE. These correspond to cpu_id 0x66X
+			 * or 0x67X.
+			 */
+			if ((cpu_feature & CPUID_XMM) == 0 &&
+			    ((cpu_id & ~0xf) == 0x660 ||
+			     (cpu_id & ~0xf) == 0x670 ||
+			     (cpu_id & ~0xf) == 0x680)) {
+				u_int regs[4];
+				wrmsr(0xC0010015, rdmsr(0xC0010015) & ~0x08000);
+				do_cpuid(1, regs);
+				cpu_feature = regs[3];
+			}
+#endif
+		}
+		break;
+#endif
+	default:
+		break;
+	}
+	enable_sse();
+
+#if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE)
+	/*
+	 * OS should flush L1 cache by itself because no PC-98 supports
+	 * non-Intel CPUs.  Use wbinvd instruction before DMA transfer
+	 * when need_pre_dma_flush = 1, use invd instruction after DMA
+	 * transfer when need_post_dma_flush = 1.  If your CPU upgrade
+	 * product supports hardware cache control, you can add the
+	 * CPU_UPGRADE_HW_CACHE option in your kernel configuration file.
+	 * This option eliminates unneeded cache flush instruction(s).
+	 */
+	if (strcmp(cpu_vendor, "CyrixInstead") == 0) {
+		switch (cpu) {
+#ifdef I486_CPU
+		case CPU_486DLC:
+			need_post_dma_flush = 1;
+			break;
+		case CPU_M1SC:
+			need_pre_dma_flush = 1;
+			break;
+		case CPU_CY486DX:
+			need_pre_dma_flush = 1;
+#ifdef CPU_I486_ON_386
+			need_post_dma_flush = 1;
+#endif
+			break;
+#endif
+		default:
+			break;
+		}
+	} else if (strcmp(cpu_vendor, "AuthenticAMD") == 0) {
+		switch (cpu_id & 0xFF0) {
+		case 0x470:		/* Enhanced Am486DX2 WB */
+		case 0x490:		/* Enhanced Am486DX4 WB */
+		case 0x4F0:		/* Am5x86 WB */
+			need_pre_dma_flush = 1;
+			break;
+		}
+	} else if (strcmp(cpu_vendor, "IBM") == 0) {
+		need_post_dma_flush = 1;
+	} else {
+#ifdef CPU_I486_ON_386
+		need_pre_dma_flush = 1;
+#endif
+	}
+#endif /* PC98 && !CPU_UPGRADE_HW_CACHE */
+}
+
+#if defined(I586_CPU) && defined(CPU_WT_ALLOC)
+/*
+ * Enable write allocate feature of AMD processors.
+ * Following two functions require the Maxmem variable being set.
+ */
+void
+enable_K5_wt_alloc(void)
+{
+	u_int64_t	msr;
+	register_t	savecrit;
+
+	/*
+	 * Write allocate is supported only on models 1, 2, and 3, with
+	 * a stepping of 4 or greater.
+	 */
+	if (((cpu_id & 0xf0) > 0) && ((cpu_id & 0x0f) > 3)) {
+		savecrit = intr_disable();
+		msr = rdmsr(0x83);		/* HWCR */
+		wrmsr(0x83, msr & !(0x10));
+
+		/*
+		 * We have to tell the chip where the top of memory is,
+		 * since video cards could have frame bufferes there,
+		 * memory-mapped I/O could be there, etc.
+		 */
+		if(Maxmem > 0)
+		  msr = Maxmem / 16;
+		else
+		  msr = 0;
+		msr |= AMD_WT_ALLOC_TME | AMD_WT_ALLOC_FRE;
+#ifdef PC98
+		if (!(inb(0x43b) & 4)) {
+			wrmsr(0x86, 0x0ff00f0);
+			msr |= AMD_WT_ALLOC_PRE;
+		}
+#else
+		/*
+		 * There is no way to know wheter 15-16M hole exists or not. 
+		 * Therefore, we disable write allocate for this range.
+		 */
+			wrmsr(0x86, 0x0ff00f0);
+			msr |= AMD_WT_ALLOC_PRE;
+#endif
+		wrmsr(0x85, msr);
+
+		msr=rdmsr(0x83);
+		wrmsr(0x83, msr|0x10); /* enable write allocate */
+		intr_restore(savecrit);
+	}
+}
+
+void
+enable_K6_wt_alloc(void)
+{
+	quad_t	size;
+	u_int64_t	whcr;
+	u_long	eflags;
+
+	eflags = read_eflags();
+	disable_intr();
+	wbinvd();
+
+#ifdef CPU_DISABLE_CACHE
+	/*
+	 * Certain K6-2 box becomes unstable when write allocation is
+	 * enabled.
+	 */
+	/*
+	 * The AMD-K6 processer provides the 64-bit Test Register 12(TR12),
+	 * but only the Cache Inhibit(CI) (bit 3 of TR12) is suppported.
+	 * All other bits in TR12 have no effect on the processer's operation.
+	 * The I/O Trap Restart function (bit 9 of TR12) is always enabled
+	 * on the AMD-K6.
+	 */
+	wrmsr(0x0000000e, (u_int64_t)0x0008);
+#endif
+	/* Don't assume that memory size is aligned with 4M. */
+	if (Maxmem > 0)
+	  size = ((Maxmem >> 8) + 3) >> 2;
+	else
+	  size = 0;
+
+	/* Limit is 508M bytes. */
+	if (size > 0x7f)
+		size = 0x7f;
+	whcr = (rdmsr(0xc0000082) & ~(0x7fLL << 1)) | (size << 1);
+
+#if defined(PC98) || defined(NO_MEMORY_HOLE)
+	if (whcr & (0x7fLL << 1)) {
+#ifdef PC98
+		/*
+		 * If bit 2 of port 0x43b is 0, disable wrte allocate for the
+		 * 15-16M range.
+		 */
+		if (!(inb(0x43b) & 4))
+			whcr &= ~0x0001LL;
+		else
+#endif
+			whcr |=  0x0001LL;
+	}
+#else
+	/*
+	 * There is no way to know wheter 15-16M hole exists or not. 
+	 * Therefore, we disable write allocate for this range.
+	 */
+	whcr &= ~0x0001LL;
+#endif
+	wrmsr(0x0c0000082, whcr);
+
+	write_eflags(eflags);
+}
+
+void
+enable_K6_2_wt_alloc(void)
+{
+	quad_t	size;
+	u_int64_t	whcr;
+	u_long	eflags;
+
+	eflags = read_eflags();
+	disable_intr();
+	wbinvd();
+
+#ifdef CPU_DISABLE_CACHE
+	/*
+	 * Certain K6-2 box becomes unstable when write allocation is
+	 * enabled.
+	 */
+	/*
+	 * The AMD-K6 processer provides the 64-bit Test Register 12(TR12),
+	 * but only the Cache Inhibit(CI) (bit 3 of TR12) is suppported.
+	 * All other bits in TR12 have no effect on the processer's operation.
+	 * The I/O Trap Restart function (bit 9 of TR12) is always enabled
+	 * on the AMD-K6.
+	 */
+	wrmsr(0x0000000e, (u_int64_t)0x0008);
+#endif
+	/* Don't assume that memory size is aligned with 4M. */
+	if (Maxmem > 0)
+	  size = ((Maxmem >> 8) + 3) >> 2;
+	else
+	  size = 0;
+
+	/* Limit is 4092M bytes. */
+	if (size > 0x3fff)
+		size = 0x3ff;
+	whcr = (rdmsr(0xc0000082) & ~(0x3ffLL << 22)) | (size << 22);
+
+#if defined(PC98) || defined(NO_MEMORY_HOLE)
+	if (whcr & (0x3ffLL << 22)) {
+#ifdef PC98
+		/*
+		 * If bit 2 of port 0x43b is 0, disable wrte allocate for the
+		 * 15-16M range.
+		 */
+		if (!(inb(0x43b) & 4))
+			whcr &= ~(1LL << 16);
+		else
+#endif
+			whcr |=  1LL << 16;
+	}
+#else
+	/*
+	 * There is no way to know wheter 15-16M hole exists or not. 
+	 * Therefore, we disable write allocate for this range.
+	 */
+	whcr &= ~(1LL << 16);
+#endif
+	wrmsr(0x0c0000082, whcr);
+
+	write_eflags(eflags);
+}
+#endif /* I585_CPU && CPU_WT_ALLOC */
+
+#include "opt_ddb.h"
+#ifdef DDB
+#include <ddb/ddb.h>
+#if 0
+DB_SHOW_COMMAND(cyrixreg, cyrixreg)
+{
+	u_long	eflags;
+	u_int	cr0;
+	u_char	ccr1, ccr2, ccr3;
+	u_char	ccr0 = 0, ccr4 = 0, ccr5 = 0, pcr0 = 0;
+
+	cr0 = rcr0();
+	if (strcmp(cpu_vendor,"CyrixInstead") == 0) {
+		eflags = read_eflags();
+		disable_intr();
+
+
+		if ((cpu != CPU_M1SC) && (cpu != CPU_CY486DX)) {
+			ccr0 = read_cyrix_reg(CCR0);
+		}
+		ccr1 = read_cyrix_reg(CCR1);
+		ccr2 = read_cyrix_reg(CCR2);
+		ccr3 = read_cyrix_reg(CCR3);
+		if ((cpu == CPU_M1SC) || (cpu == CPU_M1) || (cpu == CPU_M2)) {
+			write_cyrix_reg(CCR3, CCR3_MAPEN0);
+			ccr4 = read_cyrix_reg(CCR4);
+			if ((cpu == CPU_M1) || (cpu == CPU_M2))
+				ccr5 = read_cyrix_reg(CCR5);
+			else
+				pcr0 = read_cyrix_reg(PCR0);
+			write_cyrix_reg(CCR3, ccr3);		/* Restore CCR3. */
+		}
+		write_eflags(eflags);
+
+		if ((cpu != CPU_M1SC) && (cpu != CPU_CY486DX))
+			printf("CCR0=%x, ", (u_int)ccr0);
+
+		printf("CCR1=%x, CCR2=%x, CCR3=%x",
+			(u_int)ccr1, (u_int)ccr2, (u_int)ccr3);
+		if ((cpu == CPU_M1SC) || (cpu == CPU_M1) || (cpu == CPU_M2)) {
+			printf(", CCR4=%x, ", (u_int)ccr4);
+			if (cpu == CPU_M1SC)
+				printf("PCR0=%x\n", pcr0);
+			else
+				printf("CCR5=%x\n", ccr5);
+		}
+	}
+	printf("CR0=%x\n", cr0);
+}
+#endif
+#endif /* DDB */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/intr_machdep.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/intr_machdep.c
new file mode 100644
index 0000000000..6ab354a00c
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/intr_machdep.c
@@ -0,0 +1,326 @@
+/*-
+ * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/i386/intr_machdep.c,v 1.4 2003/11/17 06:10:14 peter Exp $
+ */
+
+/*
+ * Machine dependent interrupt code for i386.  For the i386, we have to
+ * deal with different PICs.  Thus, we use the passed in vector to lookup
+ * an interrupt source associated with that vector.  The interrupt source
+ * describes which PIC the source belongs to and includes methods to handle
+ * that source.
+ */
+
+#include "opt_ddb.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
+#include <sys/lock.h>
+#include <sys/ktr.h>
+#include <sys/kernel.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/syslog.h>
+#include <sys/systm.h>
+#include <machine/clock.h>
+#include <machine/intr_machdep.h>
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
+#define	MAX_STRAY_LOG	5
+
+typedef void (*mask_fn)(uintptr_t vector);
+
+static int intrcnt_index;
+static struct intsrc *interrupt_sources[NUM_IO_INTS];
+static struct mtx intr_table_lock;
+
+static void	intr_init(void *__dummy);
+static void	intrcnt_setname(const char *name, int index);
+static void	intrcnt_updatename(struct intsrc *is);
+static void	intrcnt_register(struct intsrc *is);
+
+/*
+ * Register a new interrupt source with the global interrupt system.
+ * The global interrupts need to be disabled when this function is
+ * called.
+ */
+int
+intr_register_source(struct intsrc *isrc)
+{
+	int error, vector;
+
+	vector = isrc->is_pic->pic_vector(isrc);
+	if (interrupt_sources[vector] != NULL)
+		return (EEXIST);
+	error = ithread_create(&isrc->is_ithread, (uintptr_t)isrc, 0,
+	    (mask_fn)isrc->is_pic->pic_disable_source,
+	    (mask_fn)isrc->is_pic->pic_enable_source, "irq%d:", vector);
+	if (error)
+		return (error);
+	mtx_lock_spin(&intr_table_lock);
+	if (interrupt_sources[vector] != NULL) {
+		mtx_unlock_spin(&intr_table_lock);
+		ithread_destroy(isrc->is_ithread);
+		return (EEXIST);
+	}
+	intrcnt_register(isrc);
+	interrupt_sources[vector] = isrc;
+	mtx_unlock_spin(&intr_table_lock);
+	return (0);
+}
+
+struct intsrc *
+intr_lookup_source(int vector)
+{
+
+	return (interrupt_sources[vector]);
+}
+
+int
+intr_add_handler(const char *name, int vector, driver_intr_t handler,
+    void *arg, enum intr_type flags, void **cookiep)
+{
+	struct intsrc *isrc;
+	int error;
+
+	isrc = intr_lookup_source(vector);
+	if (isrc == NULL)
+		return (EINVAL);
+
+	error = ithread_add_handler(isrc->is_ithread, name, handler, arg,
+	    ithread_priority(flags), flags, cookiep);
+	if (error == 0) {
+		intrcnt_updatename(isrc);
+		isrc->is_pic->pic_enable_intr(isrc);
+		isrc->is_pic->pic_enable_source(isrc);
+	}
+	return (error);
+}
+
+int
+intr_remove_handler(void *cookie)
+{
+	int error;
+
+	error = ithread_remove_handler(cookie);
+#ifdef XXX
+	if (error == 0)
+		intrcnt_updatename(/* XXX */);
+#endif
+	return (error);
+}
+
+int
+intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol)
+{
+	struct intsrc *isrc;
+
+	isrc = intr_lookup_source(vector);
+	if (isrc == NULL)
+		return (EINVAL);
+	return (isrc->is_pic->pic_config_intr(isrc, trig, pol));
+}
+
+void
+intr_execute_handlers(struct intsrc *isrc, struct intrframe *iframe)
+{
+	struct thread *td;
+	struct ithd *it;
+	struct intrhand *ih;
+	int error, vector;
+
+	td = curthread;
+	td->td_intr_nesting_level++;
+
+	/*
+	 * We count software interrupts when we process them.  The
+	 * code here follows previous practice, but there's an
+	 * argument for counting hardware interrupts when they're
+	 * processed too.
+	 */
+	atomic_add_long(isrc->is_count, 1);
+	atomic_add_int(&cnt.v_intr, 1);
+
+	it = isrc->is_ithread;
+	if (it == NULL)
+		ih = NULL;
+	else
+		ih = TAILQ_FIRST(&it->it_handlers);
+
+	/*
+	 * XXX: We assume that IRQ 0 is only used for the ISA timer
+	 * device (clk).
+	 */
+	vector = isrc->is_pic->pic_vector(isrc);
+	if (vector == 0)
+		clkintr_pending = 1;
+
+
+	if (ih != NULL && ih->ih_flags & IH_FAST) {
+		/*
+		 * Execute fast interrupt handlers directly.
+		 * To support clock handlers, if a handler registers
+		 * with a NULL argument, then we pass it a pointer to
+		 * a trapframe as its argument.
+		 */
+		critical_enter();
+		TAILQ_FOREACH(ih, &it->it_handlers, ih_next) {
+			MPASS(ih->ih_flags & IH_FAST);
+			CTR3(KTR_INTR, "%s: executing handler %p(%p)",
+			    __func__, ih->ih_handler,
+			    ih->ih_argument == NULL ? iframe :
+			    ih->ih_argument);
+			if (ih->ih_argument == NULL)
+				ih->ih_handler(iframe);
+			else
+				ih->ih_handler(ih->ih_argument);
+		}
+		isrc->is_pic->pic_eoi_source(isrc);
+		error = 0;
+		/* XXX */
+		td->td_pflags &= ~TDP_OWEPREEMPT;
+		critical_exit();
+	} else {
+		/*
+		 * For stray and threaded interrupts, we mask and EOI the
+		 * source.
+		 */
+		isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
+		if (ih == NULL)
+			error = EINVAL;
+		else
+			error = ithread_schedule(it);
+		isrc->is_pic->pic_eoi_source(isrc);
+	}
+
+	if (error == EINVAL) {
+		atomic_add_long(isrc->is_straycount, 1);
+		if (*isrc->is_straycount < MAX_STRAY_LOG)
+			log(LOG_ERR, "stray irq%d\n", vector);
+		else if (*isrc->is_straycount == MAX_STRAY_LOG)
+			log(LOG_CRIT,
+			    "too many stray irq %d's: not logging anymore\n",
+			    vector);
+	}
+	td->td_intr_nesting_level--;
+	
+}
+
+void
+intr_resume(void)
+{
+	struct intsrc **isrc;
+	int i;
+
+	mtx_lock_spin(&intr_table_lock);
+	for (i = 0, isrc = interrupt_sources; i < NUM_IO_INTS; i++, isrc++)
+		if (*isrc != NULL && (*isrc)->is_pic->pic_resume != NULL)
+			(*isrc)->is_pic->pic_resume(*isrc);
+	mtx_unlock_spin(&intr_table_lock);
+}
+
+void
+intr_suspend(void)
+{
+	struct intsrc **isrc;
+	int i;
+
+	mtx_lock_spin(&intr_table_lock);
+	for (i = 0, isrc = interrupt_sources; i < NUM_IO_INTS; i++, isrc++)
+		if (*isrc != NULL && (*isrc)->is_pic->pic_suspend != NULL)
+			(*isrc)->is_pic->pic_suspend(*isrc);
+	mtx_unlock_spin(&intr_table_lock);
+}
+
+static void
+intrcnt_setname(const char *name, int index)
+{
+
+	snprintf(intrnames + (MAXCOMLEN + 1) * index, MAXCOMLEN + 1, "%-*s",
+	    MAXCOMLEN, name);
+}
+
+static void
+intrcnt_updatename(struct intsrc *is)
+{
+
+	intrcnt_setname(is->is_ithread->it_td->td_proc->p_comm, is->is_index);
+}
+
+static void
+intrcnt_register(struct intsrc *is)
+{
+	char straystr[MAXCOMLEN + 1];
+
+	/* mtx_assert(&intr_table_lock, MA_OWNED); */
+	KASSERT(is->is_ithread != NULL, ("%s: isrc with no ithread", __func__));
+	is->is_index = intrcnt_index;
+	intrcnt_index += 2;
+	snprintf(straystr, MAXCOMLEN + 1, "stray irq%d",
+	    is->is_pic->pic_vector(is));
+	intrcnt_updatename(is);
+	is->is_count = &intrcnt[is->is_index];
+	intrcnt_setname(straystr, is->is_index + 1);
+	is->is_straycount = &intrcnt[is->is_index + 1];
+}
+
+static void
+intr_init(void *dummy __unused)
+{
+
+	intrcnt_setname("???", 0);
+	intrcnt_index = 1;
+	mtx_init(&intr_table_lock, "intr table", NULL, MTX_SPIN);
+}
+SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL)
+
+#ifdef DDB
+/*
+ * Dump data about interrupt handlers
+ */
+DB_SHOW_COMMAND(irqs, db_show_irqs)
+{
+	struct intsrc **isrc;
+	int i, quit, verbose;
+
+	quit = 0;
+	if (strcmp(modif, "v") == 0)
+		verbose = 1;
+	else
+		verbose = 0;
+	isrc = interrupt_sources;
+	db_setup_paging(db_simple_pager, &quit, DB_LINES_PER_PAGE);
+	for (i = 0; i < NUM_IO_INTS && !quit; i++, isrc++)
+		if (*isrc != NULL)
+			db_dump_ithread((*isrc)->is_ithread, verbose);
+}
+#endif
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/io_apic.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/io_apic.c
new file mode 100644
index 0000000000..9892a998b2
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/io_apic.c
@@ -0,0 +1,850 @@
+/*-
+ * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/io_apic.c,v 1.14 2004/08/02 15:31:10 scottl Exp $");
+
+#include "opt_isa.h"
+#include "opt_no_mixed_mode.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/apicreg.h>
+#include <machine/frame.h>
+#include <machine/intr_machdep.h>
+#include <machine/apicvar.h>
+#include <machine/segments.h>
+
+#define IOAPIC_ISA_INTS		16
+#define	IOAPIC_MEM_REGION	32
+#define	IOAPIC_REDTBL_LO(i)	(IOAPIC_REDTBL + (i) * 2)
+#define	IOAPIC_REDTBL_HI(i)	(IOAPIC_REDTBL_LO(i) + 1)
+
+#define	VECTOR_EXTINT		252
+#define	VECTOR_NMI		253
+#define	VECTOR_SMI		254
+#define	VECTOR_DISABLED		255
+
+#define	DEST_NONE		-1
+#define	DEST_EXTINT		-2
+
+#define	TODO		printf("%s: not implemented!\n", __func__)
+
+MALLOC_DEFINE(M_IOAPIC, "I/O APIC", "I/O APIC structures");
+
+/*
+ * New interrupt support code..
+ *
+ * XXX: we really should have the interrupt cookie passed up from new-bus
+ * just be a int pin, and not map 1:1 to interrupt vector number but should
+ * use INTR_TYPE_FOO to set priority bands for device classes and do all the
+ * magic remapping of intpin to vector in here.  For now we just cheat as on
+ * ia64 and map intpin X to vector NRSVIDT + X.  Note that we assume that the
+ * first IO APIC has ISA interrupts on pins 1-15.  Not sure how you are
+ * really supposed to figure out which IO APIC in a system with multiple IO
+ * APIC's actually has the ISA interrupts routed to it.  As far as interrupt
+ * pin numbers, we use the ACPI System Interrupt number model where each
+ * IO APIC has a contiguous chunk of the System Interrupt address space.
+ */
+
+/*
+ * Direct the ExtINT pin on the first I/O APIC to a logical cluster of
+ * CPUs rather than a physical destination of just the BSP.
+ *
+ * Note: This is disabled by default as test systems seem to croak with it
+ * enabled.
+#define ENABLE_EXTINT_LOGICAL_DESTINATION
+ */
+
+struct ioapic_intsrc {
+	struct intsrc io_intsrc;
+	u_int io_intpin:8;
+	u_int io_vector:8;
+	u_int io_activehi:1;
+	u_int io_edgetrigger:1;
+	u_int io_masked:1;
+	int io_dest:5;
+	int io_bus:4;
+};
+
+struct ioapic {
+	struct pic io_pic;
+	u_int io_id:8;			/* logical ID */
+	u_int io_apic_id:4;
+	u_int io_intbase:8;		/* System Interrupt base */
+	u_int io_numintr:8;
+	volatile ioapic_t *io_addr;	/* XXX: should use bus_space */
+	STAILQ_ENTRY(ioapic) io_next;
+	struct ioapic_intsrc io_pins[0];
+};
+
+static u_int	ioapic_read(volatile ioapic_t *apic, int reg);
+static void	ioapic_write(volatile ioapic_t *apic, int reg, u_int val);
+static const char *ioapic_bus_string(int bus_type);
+static void	ioapic_print_vector(struct ioapic_intsrc *intpin);
+static void	ioapic_enable_source(struct intsrc *isrc);
+static void	ioapic_disable_source(struct intsrc *isrc, int eoi);
+static void	ioapic_eoi_source(struct intsrc *isrc);
+static void	ioapic_enable_intr(struct intsrc *isrc);
+static int	ioapic_vector(struct intsrc *isrc);
+static int	ioapic_source_pending(struct intsrc *isrc);
+static int	ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
+		    enum intr_polarity pol);
+static void	ioapic_suspend(struct intsrc *isrc);
+static void	ioapic_resume(struct intsrc *isrc);
+static void	ioapic_program_destination(struct ioapic_intsrc *intpin);
+static void	ioapic_program_intpin(struct ioapic_intsrc *intpin);
+static void	ioapic_setup_mixed_mode(struct ioapic_intsrc *intpin);
+
+static STAILQ_HEAD(,ioapic) ioapic_list = STAILQ_HEAD_INITIALIZER(ioapic_list);
+struct pic ioapic_template = { ioapic_enable_source, ioapic_disable_source,
+			       ioapic_eoi_source, ioapic_enable_intr,
+			       ioapic_vector, ioapic_source_pending,
+			       ioapic_suspend, ioapic_resume,
+			       ioapic_config_intr };
+	
+static int bsp_id, current_cluster, logical_clusters, next_ioapic_base;
+static u_int mixed_mode_enabled, next_id, program_logical_dest;
+#ifdef NO_MIXED_MODE
+static int mixed_mode_active = 0;
+#else
+static int mixed_mode_active = 1;
+#endif
+TUNABLE_INT("hw.apic.mixed_mode", &mixed_mode_active);
+
+static __inline void
+_ioapic_eoi_source(struct intsrc *isrc)
+{
+	lapic_eoi();
+}
+
+static u_int
+ioapic_read(volatile ioapic_t *apic, int reg)
+{
+
+	mtx_assert(&icu_lock, MA_OWNED);
+	apic->ioregsel = reg;
+	return (apic->iowin);
+}
+
+static void
+ioapic_write(volatile ioapic_t *apic, int reg, u_int val)
+{
+
+	mtx_assert(&icu_lock, MA_OWNED);
+	apic->ioregsel = reg;
+	apic->iowin = val;
+}
+
+static const char *
+ioapic_bus_string(int bus_type)
+{
+
+	switch (bus_type) {
+	case APIC_BUS_ISA:
+		return ("ISA");
+	case APIC_BUS_EISA:
+		return ("EISA");
+	case APIC_BUS_PCI:
+		return ("PCI");
+	default:
+		return ("unknown");
+	}
+}
+
+static void
+ioapic_print_vector(struct ioapic_intsrc *intpin)
+{
+
+	switch (intpin->io_vector) {
+	case VECTOR_DISABLED:
+		printf("disabled");
+		break;
+	case VECTOR_EXTINT:
+		printf("ExtINT");
+		break;
+	case VECTOR_NMI:
+		printf("NMI");
+		break;
+	case VECTOR_SMI:
+		printf("SMI");
+		break;
+	default:
+		printf("%s IRQ %u", ioapic_bus_string(intpin->io_bus),
+		    intpin->io_vector);
+	}
+}
+
+static void
+ioapic_enable_source(struct intsrc *isrc)
+{
+	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
+	struct ioapic *io = (struct ioapic *)isrc->is_pic;
+	uint32_t flags;
+
+	mtx_lock_spin(&icu_lock);
+	if (intpin->io_masked) {
+		flags = ioapic_read(io->io_addr,
+		    IOAPIC_REDTBL_LO(intpin->io_intpin));
+		flags &= ~(IOART_INTMASK);
+		ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin),
+		    flags);
+		intpin->io_masked = 0;
+	}
+	mtx_unlock_spin(&icu_lock);
+}
+
+static void
+ioapic_disable_source(struct intsrc *isrc, int eoi)
+{
+	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
+	struct ioapic *io = (struct ioapic *)isrc->is_pic;
+	uint32_t flags;
+
+	mtx_lock_spin(&icu_lock);
+	if (!intpin->io_masked && !intpin->io_edgetrigger) {
+		flags = ioapic_read(io->io_addr,
+		    IOAPIC_REDTBL_LO(intpin->io_intpin));
+		flags |= IOART_INTMSET;
+		ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin),
+		    flags);
+		intpin->io_masked = 1;
+	}
+
+	if (eoi == PIC_EOI)
+		_ioapic_eoi_source(isrc);
+
+	mtx_unlock_spin(&icu_lock);
+}
+
+static void
+ioapic_eoi_source(struct intsrc *isrc)
+{
+
+	_ioapic_eoi_source(isrc);
+}
+
+/*
+ * Completely program an intpin based on the data in its interrupt source
+ * structure.
+ */
+static void
+ioapic_program_intpin(struct ioapic_intsrc *intpin)
+{
+	struct ioapic *io = (struct ioapic *)intpin->io_intsrc.is_pic;
+	uint32_t low, high, value;
+
+	/*
+	 * For pins routed via mixed mode or disabled, just ensure that
+	 * they are masked.
+	 */
+	if (intpin->io_dest == DEST_EXTINT ||
+	    intpin->io_vector == VECTOR_DISABLED) {
+		low = ioapic_read(io->io_addr,
+		    IOAPIC_REDTBL_LO(intpin->io_intpin));
+		if ((low & IOART_INTMASK) == IOART_INTMCLR)
+			ioapic_write(io->io_addr,
+			    IOAPIC_REDTBL_LO(intpin->io_intpin),
+			    low | IOART_INTMSET);
+		return;
+	}
+
+	/* Set the destination. */
+	if (intpin->io_dest == DEST_NONE) {
+		low = IOART_DESTPHY;
+		high = bsp_id << APIC_ID_SHIFT;
+	} else {
+		low = IOART_DESTLOG;
+		high = (intpin->io_dest << APIC_ID_CLUSTER_SHIFT |
+		    APIC_ID_CLUSTER_ID) << APIC_ID_SHIFT;
+	}
+
+	/* Program the rest of the low word. */
+	if (intpin->io_edgetrigger)
+		low |= IOART_TRGREDG;
+	else
+		low |= IOART_TRGRLVL;
+	if (intpin->io_activehi)
+		low |= IOART_INTAHI;
+	else
+		low |= IOART_INTALO;
+	if (intpin->io_masked)
+		low |= IOART_INTMSET;
+	switch (intpin->io_vector) {
+	case VECTOR_EXTINT:
+		KASSERT(intpin->io_edgetrigger,
+		    ("EXTINT not edge triggered"));
+		low |= IOART_DELEXINT;
+		break;
+	case VECTOR_NMI:
+		KASSERT(intpin->io_edgetrigger,
+		    ("NMI not edge triggered"));
+		low |= IOART_DELNMI;
+		break;
+	case VECTOR_SMI:
+		KASSERT(intpin->io_edgetrigger,
+		    ("SMI not edge triggered"));
+		low |= IOART_DELSMI;
+		break;
+	default:
+		low |= IOART_DELLOPRI | apic_irq_to_idt(intpin->io_vector);
+	}
+
+	/* Write the values to the APIC. */
+	mtx_lock_spin(&icu_lock);
+	ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), low);
+	value = ioapic_read(io->io_addr, IOAPIC_REDTBL_HI(intpin->io_intpin));
+	value &= ~IOART_DEST;
+	value |= high;
+	ioapic_write(io->io_addr, IOAPIC_REDTBL_HI(intpin->io_intpin), value);
+	mtx_unlock_spin(&icu_lock);
+}
+
+/*
+ * Program an individual intpin's logical destination.
+ */
+static void
+ioapic_program_destination(struct ioapic_intsrc *intpin)
+{
+	struct ioapic *io = (struct ioapic *)intpin->io_intsrc.is_pic;
+
+	KASSERT(intpin->io_dest != DEST_NONE,
+	    ("intpin not assigned to a cluster"));
+	KASSERT(intpin->io_dest != DEST_EXTINT,
+	    ("intpin routed via ExtINT"));
+	if (bootverbose) {
+		printf("ioapic%u: routing intpin %u (", io->io_id,
+		    intpin->io_intpin);
+		ioapic_print_vector(intpin);
+		printf(") to cluster %u\n", intpin->io_dest);
+	}
+	ioapic_program_intpin(intpin);
+}
+
+static void
+ioapic_assign_cluster(struct ioapic_intsrc *intpin)
+{
+
+	/*
+	 * Assign this intpin to a logical APIC cluster in a
+	 * round-robin fashion.  We don't actually use the logical
+	 * destination for this intpin until after all the CPU's
+	 * have been started so that we don't end up with interrupts
+	 * that don't go anywhere.  Another alternative might be to
+	 * start up the CPU's earlier so that they can handle interrupts
+	 * sooner.
+	 */
+	intpin->io_dest = current_cluster;
+	current_cluster++;
+	if (current_cluster >= logical_clusters)
+		current_cluster = 0;
+	if (program_logical_dest)
+		ioapic_program_destination(intpin);
+}
+
+static void
+ioapic_enable_intr(struct intsrc *isrc)
+{
+	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
+
+	KASSERT(intpin->io_dest != DEST_EXTINT,
+	    ("ExtINT pin trying to use ioapic enable_intr method"));
+	if (intpin->io_dest == DEST_NONE) {
+		ioapic_assign_cluster(intpin);
+		lapic_enable_intr(intpin->io_vector);
+	}
+}
+
+static int
+ioapic_vector(struct intsrc *isrc)
+{
+	struct ioapic_intsrc *pin;
+
+	pin = (struct ioapic_intsrc *)isrc;
+	return (pin->io_vector);
+}
+
+static int
+ioapic_source_pending(struct intsrc *isrc)
+{
+	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
+
+	return (lapic_intr_pending(intpin->io_vector));
+}
+
+static int
+ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
+    enum intr_polarity pol)
+{
+	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
+	struct ioapic *io = (struct ioapic *)isrc->is_pic;
+	int changed;
+
+	KASSERT(!(trig == INTR_TRIGGER_CONFORM || pol == INTR_POLARITY_CONFORM),
+	    ("%s: Conforming trigger or polarity\n", __func__));
+
+	/*
+	 * EISA interrupts always use active high polarity, so don't allow
+	 * them to be set to active low.
+	 *
+	 * XXX: Should we write to the ELCR if the trigger mode changes for
+	 * an EISA IRQ?
+	 */
+	if (intpin->io_bus == APIC_BUS_EISA)
+		pol = INTR_POLARITY_HIGH;
+	changed = 0;
+	if (intpin->io_edgetrigger != (trig == INTR_TRIGGER_EDGE)) {
+		if (bootverbose)
+			printf("ioapic%u: Changing trigger for pin %u to %s\n",
+			    io->io_id, intpin->io_intpin,
+			    trig == INTR_TRIGGER_EDGE ? "edge" : "level");
+		intpin->io_edgetrigger = (trig == INTR_TRIGGER_EDGE);
+		changed++;
+	}
+	if (intpin->io_activehi != (pol == INTR_POLARITY_HIGH)) {
+		if (bootverbose)
+			printf("ioapic%u: Changing polarity for pin %u to %s\n",
+			    io->io_id, intpin->io_intpin,
+			    pol == INTR_POLARITY_HIGH ? "high" : "low");
+		intpin->io_activehi = (pol == INTR_POLARITY_HIGH);
+		changed++;
+	}
+	if (changed)
+		ioapic_program_intpin(intpin);
+	return (0);
+}
+
+static void
+ioapic_suspend(struct intsrc *isrc)
+{
+
+	TODO;
+}
+
+static void
+ioapic_resume(struct intsrc *isrc)
+{
+
+	ioapic_program_intpin((struct ioapic_intsrc *)isrc);
+}
+
+/*
+ * APIC enumerators call this function to indicate that the 8259A AT PICs
+ * are available and that mixed mode can be used.
+ */
+void
+ioapic_enable_mixed_mode(void)
+{
+
+	mixed_mode_enabled = 1;
+}
+
+/*
+ * Allocate and return a logical cluster ID.  Note that the first time
+ * this is called, it returns cluster 0.  ioapic_enable_intr() treats
+ * the two cases of logical_clusters == 0 and logical_clusters == 1 the
+ * same: one cluster of ID 0 exists.  The logical_clusters == 0 case is
+ * for UP kernels, which should never call this function.
+ */
+int
+ioapic_next_logical_cluster(void)
+{
+
+	if (logical_clusters >= APIC_MAX_CLUSTER)
+		panic("WARNING: Local APIC cluster IDs exhausted!");
+	return (logical_clusters++);
+}
+
+/*
+ * Create a plain I/O APIC object.
+ */
+void *
+ioapic_create(uintptr_t addr, int32_t apic_id, int intbase)
+{
+	struct ioapic *io;
+	struct ioapic_intsrc *intpin;
+	volatile ioapic_t *apic;
+	u_int numintr, i;
+	uint32_t value;
+
+	apic = (ioapic_t *)pmap_mapdev(addr, IOAPIC_MEM_REGION);
+	mtx_lock_spin(&icu_lock);
+	numintr = ((ioapic_read(apic, IOAPIC_VER) & IOART_VER_MAXREDIR) >>
+	    MAXREDIRSHIFT) + 1;
+	mtx_unlock_spin(&icu_lock);
+	io = malloc(sizeof(struct ioapic) +
+	    numintr * sizeof(struct ioapic_intsrc), M_IOAPIC, M_WAITOK);
+	io->io_pic = ioapic_template;
+	mtx_lock_spin(&icu_lock);
+	io->io_id = next_id++;
+	io->io_apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT;	
+	if (apic_id != -1 && io->io_apic_id != apic_id) {
+		ioapic_write(apic, IOAPIC_ID, apic_id << APIC_ID_SHIFT);
+		mtx_unlock_spin(&icu_lock);
+		io->io_apic_id = apic_id;
+		printf("ioapic%u: Changing APIC ID to %d\n", io->io_id,
+		    apic_id);
+	} else
+		mtx_unlock_spin(&icu_lock);
+	if (intbase == -1) {
+		intbase = next_ioapic_base;
+		printf("ioapic%u: Assuming intbase of %d\n", io->io_id,
+		    intbase);
+	} else if (intbase != next_ioapic_base)
+		printf("ioapic%u: WARNING: intbase %d != expected base %d\n",
+		    io->io_id, intbase, next_ioapic_base);
+	io->io_intbase = intbase;
+	next_ioapic_base = intbase + numintr;
+	io->io_numintr = numintr;
+	io->io_addr = apic;
+
+	/*
+	 * Initialize pins.  Start off with interrupts disabled.  Default
+	 * to active-hi and edge-triggered for ISA interrupts and active-lo
+	 * and level-triggered for all others.
+	 */
+	bzero(io->io_pins, sizeof(struct ioapic_intsrc) * numintr);
+	mtx_lock_spin(&icu_lock);
+	for (i = 0, intpin = io->io_pins; i < numintr; i++, intpin++) {
+		intpin->io_intsrc.is_pic = (struct pic *)io;
+		intpin->io_intpin = i;
+		intpin->io_vector = intbase + i;
+
+		/*
+		 * Assume that pin 0 on the first I/O APIC is an ExtINT pin
+		 * and that pins 1-15 are ISA interrupts.  Assume that all
+		 * other pins are PCI interrupts.
+		 */
+		if (intpin->io_vector == 0)
+			ioapic_set_extint(io, i);
+		else if (intpin->io_vector < IOAPIC_ISA_INTS) {
+			intpin->io_bus = APIC_BUS_ISA;
+			intpin->io_activehi = 1;
+			intpin->io_edgetrigger = 1;
+			intpin->io_masked = 1;
+		} else {
+			intpin->io_bus = APIC_BUS_PCI;
+			intpin->io_activehi = 0;
+			intpin->io_edgetrigger = 0;
+			intpin->io_masked = 1;
+		}
+
+		/*
+		 * Route interrupts to the BSP by default using physical
+		 * addressing.  Vectored interrupts get readdressed using
+		 * logical IDs to CPU clusters when they are enabled.
+		 */
+		intpin->io_dest = DEST_NONE;
+		if (bootverbose && intpin->io_vector != VECTOR_DISABLED) {
+			printf("ioapic%u: intpin %d -> ",  io->io_id, i);
+			ioapic_print_vector(intpin);
+			printf(" (%s, %s)\n", intpin->io_edgetrigger ?
+			    "edge" : "level", intpin->io_activehi ? "high" :
+			    "low");
+		}
+		value = ioapic_read(apic, IOAPIC_REDTBL_LO(i));
+		ioapic_write(apic, IOAPIC_REDTBL_LO(i), value | IOART_INTMSET);
+	}
+	mtx_unlock_spin(&icu_lock);
+
+	return (io);
+}
+
+int
+ioapic_get_vector(void *cookie, u_int pin)
+{
+	struct ioapic *io;
+
+	io = (struct ioapic *)cookie;
+	if (pin >= io->io_numintr)
+		return (-1);
+	return (io->io_pins[pin].io_vector);
+}
+
+int
+ioapic_disable_pin(void *cookie, u_int pin)
+{
+	struct ioapic *io;
+
+	io = (struct ioapic *)cookie;
+	if (pin >= io->io_numintr)
+		return (EINVAL);
+	if (io->io_pins[pin].io_vector == VECTOR_DISABLED)
+		return (EINVAL);
+	io->io_pins[pin].io_vector = VECTOR_DISABLED;
+	if (bootverbose)
+		printf("ioapic%u: intpin %d disabled\n", io->io_id, pin);
+	return (0);
+}
+
+int
+ioapic_remap_vector(void *cookie, u_int pin, int vector)
+{
+	struct ioapic *io;
+
+	io = (struct ioapic *)cookie;
+	if (pin >= io->io_numintr || vector < 0)
+		return (EINVAL);
+	if (io->io_pins[pin].io_vector >= NUM_IO_INTS)
+		return (EINVAL);
+	io->io_pins[pin].io_vector = vector;
+	if (bootverbose)
+		printf("ioapic%u: Routing IRQ %d -> intpin %d\n", io->io_id,
+		    vector, pin);
+	return (0);
+}
+
+int
+ioapic_set_bus(void *cookie, u_int pin, int bus_type)
+{
+	struct ioapic *io;
+
+	if (bus_type < 0 || bus_type > APIC_BUS_MAX)
+		return (EINVAL);
+	io = (struct ioapic *)cookie;
+	if (pin >= io->io_numintr)
+		return (EINVAL);
+	if (io->io_pins[pin].io_vector >= NUM_IO_INTS)
+		return (EINVAL);
+	io->io_pins[pin].io_bus = bus_type;
+	if (bootverbose)
+		printf("ioapic%u: intpin %d bus %s\n", io->io_id, pin,
+		    ioapic_bus_string(bus_type));
+	return (0);
+}
+
+int
+ioapic_set_nmi(void *cookie, u_int pin)
+{
+	struct ioapic *io;
+
+	io = (struct ioapic *)cookie;
+	if (pin >= io->io_numintr)
+		return (EINVAL);
+	if (io->io_pins[pin].io_vector == VECTOR_NMI)
+		return (0);
+	if (io->io_pins[pin].io_vector >= NUM_IO_INTS)
+		return (EINVAL);
+	io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN;
+	io->io_pins[pin].io_vector = VECTOR_NMI;
+	io->io_pins[pin].io_masked = 0;
+	io->io_pins[pin].io_edgetrigger = 1;
+	io->io_pins[pin].io_activehi = 1;
+	if (bootverbose)
+		printf("ioapic%u: Routing NMI -> intpin %d\n",
+		    io->io_id, pin);
+	return (0);
+}
+
+int
+ioapic_set_smi(void *cookie, u_int pin)
+{
+	struct ioapic *io;
+
+	io = (struct ioapic *)cookie;
+	if (pin >= io->io_numintr)
+		return (EINVAL);
+	if (io->io_pins[pin].io_vector == VECTOR_SMI)
+		return (0);
+	if (io->io_pins[pin].io_vector >= NUM_IO_INTS)
+		return (EINVAL);
+	io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN;
+	io->io_pins[pin].io_vector = VECTOR_SMI;
+	io->io_pins[pin].io_masked = 0;
+	io->io_pins[pin].io_edgetrigger = 1;
+	io->io_pins[pin].io_activehi = 1;
+	if (bootverbose)
+		printf("ioapic%u: Routing SMI -> intpin %d\n",
+		    io->io_id, pin);
+	return (0);
+}
+
+int
+ioapic_set_extint(void *cookie, u_int pin)
+{
+	struct ioapic *io;
+
+	io = (struct ioapic *)cookie;
+	if (pin >= io->io_numintr)
+		return (EINVAL);
+	if (io->io_pins[pin].io_vector == VECTOR_EXTINT)
+		return (0);
+	if (io->io_pins[pin].io_vector >= NUM_IO_INTS)
+		return (EINVAL);
+	io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN;
+	io->io_pins[pin].io_vector = VECTOR_EXTINT;
+
+	/* Enable this pin if mixed mode is available and active. */
+	if (mixed_mode_enabled && mixed_mode_active)
+		io->io_pins[pin].io_masked = 0;
+	else
+		io->io_pins[pin].io_masked = 1;
+	io->io_pins[pin].io_edgetrigger = 1;
+	io->io_pins[pin].io_activehi = 1;
+	if (bootverbose)
+		printf("ioapic%u: Routing external 8259A's -> intpin %d\n",
+		    io->io_id, pin);
+	return (0);
+}
+
+int
+ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol)
+{
+	struct ioapic *io;
+
+	io = (struct ioapic *)cookie;
+	if (pin >= io->io_numintr || pol == INTR_POLARITY_CONFORM)
+		return (EINVAL);
+	if (io->io_pins[pin].io_vector >= NUM_IO_INTS)
+		return (EINVAL);
+	io->io_pins[pin].io_activehi = (pol == INTR_POLARITY_HIGH);
+	if (bootverbose)
+		printf("ioapic%u: intpin %d polarity: %s\n", io->io_id, pin,
+		    pol == INTR_POLARITY_HIGH ? "high" : "low");
+	return (0);
+}
+
+int
+ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger)
+{
+	struct ioapic *io;
+
+	io = (struct ioapic *)cookie;
+	if (pin >= io->io_numintr || trigger == INTR_TRIGGER_CONFORM)
+		return (EINVAL);
+	if (io->io_pins[pin].io_vector >= NUM_IO_INTS)
+		return (EINVAL);
+	io->io_pins[pin].io_edgetrigger = (trigger == INTR_TRIGGER_EDGE);
+	if (bootverbose)
+		printf("ioapic%u: intpin %d trigger: %s\n", io->io_id, pin,
+		    trigger == INTR_TRIGGER_EDGE ? "edge" : "level");
+	return (0);
+}
+
+/*
+ * Register a complete I/O APIC object with the interrupt subsystem.
+ */
+void
+ioapic_register(void *cookie)
+{
+	struct ioapic_intsrc *pin;
+	struct ioapic *io;
+	volatile ioapic_t *apic;
+	uint32_t flags;
+	int i;
+
+	io = (struct ioapic *)cookie;
+	apic = io->io_addr;
+	mtx_lock_spin(&icu_lock);
+	flags = ioapic_read(apic, IOAPIC_VER) & IOART_VER_VERSION;
+	STAILQ_INSERT_TAIL(&ioapic_list, io, io_next);
+	mtx_unlock_spin(&icu_lock);
+	printf("ioapic%u <Version %u.%u> irqs %u-%u on motherboard\n",
+	    io->io_id, flags >> 4, flags & 0xf, io->io_intbase,
+	    io->io_intbase + io->io_numintr - 1);
+	bsp_id = PCPU_GET(apic_id);
+	for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++) {
+		/*
+		 * Finish initializing the pins by programming the vectors
+		 * and delivery mode.
+		 */
+		if (pin->io_vector == VECTOR_DISABLED)
+			continue;
+		ioapic_program_intpin(pin);
+		if (pin->io_vector >= NUM_IO_INTS)
+			continue;
+		/*
+		 * Route IRQ0 via the 8259A using mixed mode if mixed mode
+		 * is available and turned on.
+		 */
+		if (pin->io_vector == 0 && mixed_mode_active &&
+		    mixed_mode_enabled)
+			ioapic_setup_mixed_mode(pin);
+		else
+			intr_register_source(&pin->io_intsrc);
+	}
+}
+
+/*
+ * Program all the intpins to use logical destinations once the AP's
+ * have been launched.
+ */
+static void
+ioapic_set_logical_destinations(void *arg __unused)
+{
+	struct ioapic *io;
+	int i;
+
+	program_logical_dest = 1;
+	STAILQ_FOREACH(io, &ioapic_list, io_next)
+	    for (i = 0; i < io->io_numintr; i++)
+		    if (io->io_pins[i].io_dest != DEST_NONE &&
+			io->io_pins[i].io_dest != DEST_EXTINT)
+			    ioapic_program_destination(&io->io_pins[i]);
+}
+SYSINIT(ioapic_destinations, SI_SUB_SMP, SI_ORDER_SECOND,
+    ioapic_set_logical_destinations, NULL)
+
+/*
+ * Support for mixed-mode interrupt sources.  These sources route an ISA
+ * IRQ through the 8259A's via the ExtINT on pin 0 of the I/O APIC that
+ * routes the ISA interrupts.  We just ignore the intpins that use this
+ * mode and allow the atpic driver to register its interrupt source for
+ * that IRQ instead.
+ */
+
+static void
+ioapic_setup_mixed_mode(struct ioapic_intsrc *intpin)
+{
+	struct ioapic_intsrc *extint;
+	struct ioapic *io;
+
+	/*
+	 * Mark the associated I/O APIC intpin as being delivered via
+	 * ExtINT and enable the ExtINT pin on the I/O APIC if needed.
+	 */
+	intpin->io_dest = DEST_EXTINT;
+	io = (struct ioapic *)intpin->io_intsrc.is_pic;
+	extint = &io->io_pins[0];
+	if (extint->io_vector != VECTOR_EXTINT)
+		panic("Can't find ExtINT pin to route through!");
+#ifdef ENABLE_EXTINT_LOGICAL_DESTINATION
+	if (extint->io_dest == DEST_NONE)
+		ioapic_assign_cluster(extint);
+#endif
+}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/local_apic.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/local_apic.c
new file mode 100644
index 0000000000..8fb7f9f12e
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/local_apic.c
@@ -0,0 +1,762 @@
+/*-
+ * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
+ * Copyright (c) 1996, by Steve Passe
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. The name of the developer may NOT be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Local APIC support on Pentium and later processors.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/local_apic.c,v 1.9 2004/07/14 18:12:15 jhb Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/pcpu.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/apicreg.h>
+#include <machine/cputypes.h>
+#include <machine/frame.h>
+#include <machine/intr_machdep.h>
+#include <machine/apicvar.h>
+#include <machine/md_var.h>
+#include <machine/smp.h>
+#include <machine/specialreg.h>
+
+/*
+ * We can handle up to 60 APICs via our logical cluster IDs, but currently
+ * the physical IDs on Intel processors up to the Pentium 4 are limited to
+ * 16.
+ */
+#define	MAX_APICID	16
+
+/* Sanity checks on IDT vectors. */
+CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS <= APIC_LOCAL_INTS);
+CTASSERT(IPI_STOP < APIC_SPURIOUS_INT);
+
+/*
+ * Support for local APICs.  Local APICs manage interrupts on each
+ * individual processor as opposed to I/O APICs which receive interrupts
+ * from I/O devices and then forward them on to the local APICs.
+ *
+ * Local APICs can also send interrupts to each other thus providing the
+ * mechanism for IPIs.
+ */
+
+struct lvt {
+	u_int lvt_edgetrigger:1;
+	u_int lvt_activehi:1;
+	u_int lvt_masked:1;
+	u_int lvt_active:1;
+	u_int lvt_mode:16;
+	u_int lvt_vector:8;
+};
+
+struct lapic {
+	struct lvt la_lvts[LVT_MAX + 1];
+	u_int la_id:8;
+	u_int la_cluster:4;
+	u_int la_cluster_id:2;
+	u_int la_present:1;
+} static lapics[MAX_APICID];
+
+/* XXX: should thermal be an NMI? */
+
+/* Global defaults for local APIC LVT entries. */
+static struct lvt lvts[LVT_MAX + 1] = {
+	{ 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 },	/* LINT0: masked ExtINT */
+	{ 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 },	/* LINT1: NMI */
+	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, 0 },	/* Timer: needs a vector */
+	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, 0 },	/* Error: needs a vector */
+	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, 0 },	/* PMC */
+	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, 0 },	/* Thermal: needs a vector */
+};
+
+static inthand_t *ioint_handlers[] = {
+	NULL,			/* 0 - 31 */
+	IDTVEC(apic_isr1),	/* 32 - 63 */
+	IDTVEC(apic_isr2),	/* 64 - 95 */
+	IDTVEC(apic_isr3),	/* 96 - 127 */
+	IDTVEC(apic_isr4),	/* 128 - 159 */
+	IDTVEC(apic_isr5),	/* 160 - 191 */
+	IDTVEC(apic_isr6),	/* 192 - 223 */
+	IDTVEC(apic_isr7),	/* 224 - 255 */
+};
+
+volatile lapic_t *lapic;
+
+static uint32_t
+lvt_mode(struct lapic *la, u_int pin, uint32_t value)
+{
+	struct lvt *lvt;
+
+	KASSERT(pin <= LVT_MAX, ("%s: pin %u out of range", __func__, pin));
+	if (la->la_lvts[pin].lvt_active)
+		lvt = &la->la_lvts[pin];
+	else
+		lvt = &lvts[pin];
+
+	value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM |
+	    APIC_LVT_VECTOR);
+	if (lvt->lvt_edgetrigger == 0)
+		value |= APIC_LVT_TM;
+	if (lvt->lvt_activehi == 0)
+		value |= APIC_LVT_IIPP_INTALO;
+	if (lvt->lvt_masked)
+		value |= APIC_LVT_M;
+	value |= lvt->lvt_mode;
+	switch (lvt->lvt_mode) {
+	case APIC_LVT_DM_NMI:
+	case APIC_LVT_DM_SMI:
+	case APIC_LVT_DM_INIT:
+	case APIC_LVT_DM_EXTINT:
+		if (!lvt->lvt_edgetrigger) {
+			printf("lapic%u: Forcing LINT%u to edge trigger\n",
+			    la->la_id, pin);
+			value |= APIC_LVT_TM;
+		}
+		/* Use a vector of 0. */
+		break;
+	case APIC_LVT_DM_FIXED:
+#if 0
+		value |= lvt->lvt_vector;
+#else
+		panic("Fixed LINT pins not supported");
+#endif
+		break;
+	default:
+		panic("bad APIC LVT delivery mode: %#x\n", value);
+	}
+	return (value);
+}
+
+/*
+ * Map the local APIC and setup necessary interrupt vectors.
+ */
+void
+lapic_init(uintptr_t addr)
+{
+	u_int32_t value;
+
+	/* Map the local APIC and setup the spurious interrupt handler. */
+	KASSERT(trunc_page(addr) == addr,
+	    ("local APIC not aligned on a page boundary"));
+	lapic = (lapic_t *)pmap_mapdev(addr, sizeof(lapic_t));
+	setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
+	    GSEL(GCODE_SEL, SEL_KPL));
+
+	/* Perform basic initialization of the BSP's local APIC. */
+	value = lapic->svr;
+	value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS);
+	value |= (APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT);
+	lapic->svr = value;
+
+	/* Set BSP's per-CPU local APIC ID. */
+	PCPU_SET(apic_id, lapic_id());
+
+	/* XXX: timer/error/thermal interrupts */
+}
+
+/*
+ * Create a local APIC instance.
+ */
+void
+lapic_create(u_int apic_id, int boot_cpu)
+{
+	int i;
+
+	if (apic_id >= MAX_APICID) {
+		printf("APIC: Ignoring local APIC with ID %d\n", apic_id);
+		if (boot_cpu)
+			panic("Can't ignore BSP");
+		return;
+	}
+	KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u",
+	    apic_id));
+
+	/*
+	 * Assume no local LVT overrides and a cluster of 0 and
+	 * intra-cluster ID of 0.
+	 */
+	lapics[apic_id].la_present = 1;
+	lapics[apic_id].la_id = apic_id;
+	for (i = 0; i < LVT_MAX; i++) {
+		lapics[apic_id].la_lvts[i] = lvts[i];
+		lapics[apic_id].la_lvts[i].lvt_active = 0;
+	}
+
+#ifdef SMP
+	cpu_add(apic_id, boot_cpu);
+#endif
+}
+
+/*
+ * Dump contents of local APIC registers
+ */
+void
+lapic_dump(const char* str)
+{
+
+	printf("cpu%d %s:\n", PCPU_GET(cpuid), str);
+	printf("     ID: 0x%08x   VER: 0x%08x LDR: 0x%08x DFR: 0x%08x\n",
+	    lapic->id, lapic->version, lapic->ldr, lapic->dfr);
+	printf("  lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
+	    lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr);
+}
+
+void
+lapic_enable_intr(u_int irq)
+{
+	u_int vector;
+
+	vector = apic_irq_to_idt(irq);
+	KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
+	KASSERT(ioint_handlers[vector / 32] != NULL,
+	    ("No ISR handler for IRQ %u", irq));
+	setidt(vector, ioint_handlers[vector / 32], SDT_SYS386IGT, SEL_KPL,
+	    GSEL(GCODE_SEL, SEL_KPL));
+}
+
+void
+lapic_setup(void)
+{
+	struct lapic *la;
+	u_int32_t value, maxlvt;
+	register_t eflags;
+
+	la = &lapics[lapic_id()];
+	KASSERT(la->la_present, ("missing APIC structure"));
+	eflags = intr_disable();
+	maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
+
+	/* Program LINT[01] LVT entries. */
+	lapic->lvt_lint0 = lvt_mode(la, LVT_LINT0, lapic->lvt_lint0);
+	lapic->lvt_lint1 = lvt_mode(la, LVT_LINT1, lapic->lvt_lint1);
+
+	/* XXX: more LVT entries */
+
+	/* Clear the TPR. */
+	value = lapic->tpr;
+	value &= ~APIC_TPR_PRIO;
+	lapic->tpr = value;
+
+	/* Use the cluster model for logical IDs. */
+	value = lapic->dfr;
+	value &= ~APIC_DFR_MODEL_MASK;
+	value |= APIC_DFR_MODEL_CLUSTER;
+	lapic->dfr = value;
+
+	/* Set this APIC's logical ID. */
+	value = lapic->ldr;
+	value &= ~APIC_ID_MASK;
+	value |= (la->la_cluster << APIC_ID_CLUSTER_SHIFT |
+	    1 << la->la_cluster_id) << APIC_ID_SHIFT;
+	lapic->ldr = value;
+
+	/* Setup spurious vector and enable the local APIC. */
+	value = lapic->svr;
+	value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS);
+	value |= (APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT);
+	lapic->svr = value;
+	intr_restore(eflags);
+}
+
+void
+lapic_disable(void)
+{
+	uint32_t value;
+
+	/* Software disable the local APIC. */
+	value = lapic->svr;
+	value &= ~APIC_SVR_SWEN;
+	lapic->svr = value;
+}
+
+int
+lapic_id(void)
+{
+
+	KASSERT(lapic != NULL, ("local APIC is not mapped"));
+	return (lapic->id >> APIC_ID_SHIFT);
+}
+
+int
+lapic_intr_pending(u_int vector)
+{
+	volatile u_int32_t *irr;
+
+	/*
+	 * The IRR registers are an array of 128-bit registers each of
+	 * which only describes 32 interrupts in the low 32 bits..  Thus,
+	 * we divide the vector by 32 to get the 128-bit index.  We then
+	 * multiply that index by 4 to get the equivalent index from
+	 * treating the IRR as an array of 32-bit registers.  Finally, we
+	 * modulus the vector by 32 to determine the individual bit to
+	 * test.
+	 */
+	irr = &lapic->irr0;
+	return (irr[(vector / 32) * 4] & 1 << (vector % 32));
+}
+
+void
+lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id)
+{
+	struct lapic *la;
+
+	KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist",
+	    __func__, apic_id));
+	KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big",
+	    __func__, cluster));
+	KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID,
+	    ("%s: intra cluster id %u too big", __func__, cluster_id));
+	la = &lapics[apic_id];
+	la->la_cluster = cluster;
+	la->la_cluster_id = cluster_id;
+}
+
+int
+lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked)
+{
+
+	if (pin > LVT_MAX)
+		return (EINVAL);
+	if (apic_id == APIC_ID_ALL) {
+		lvts[pin].lvt_masked = masked;
+		if (bootverbose)
+			printf("lapic:");
+	} else {
+		KASSERT(lapics[apic_id].la_present,
+		    ("%s: missing APIC %u", __func__, apic_id));
+		lapics[apic_id].la_lvts[pin].lvt_masked = masked;
+		lapics[apic_id].la_lvts[pin].lvt_active = 1;
+		if (bootverbose)
+			printf("lapic%u:", apic_id);
+	}
+	if (bootverbose)
+		printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked");
+	return (0);
+}
+
+int
+lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode)
+{
+	struct lvt *lvt;
+
+	if (pin > LVT_MAX)
+		return (EINVAL);
+	if (apic_id == APIC_ID_ALL) {
+		lvt = &lvts[pin];
+		if (bootverbose)
+			printf("lapic:");
+	} else {
+		KASSERT(lapics[apic_id].la_present,
+		    ("%s: missing APIC %u", __func__, apic_id));
+		lvt = &lapics[apic_id].la_lvts[pin];
+		lvt->lvt_active = 1;
+		if (bootverbose)
+			printf("lapic%u:", apic_id);
+	}
+	lvt->lvt_mode = mode;
+	switch (mode) {
+	case APIC_LVT_DM_NMI:
+	case APIC_LVT_DM_SMI:
+	case APIC_LVT_DM_INIT:
+	case APIC_LVT_DM_EXTINT:
+		lvt->lvt_edgetrigger = 1;
+		lvt->lvt_activehi = 1;
+		if (mode == APIC_LVT_DM_EXTINT)
+			lvt->lvt_masked = 1;
+		else
+			lvt->lvt_masked = 0;
+		break;
+	default:
+		panic("Unsupported delivery mode: 0x%x\n", mode);
+	}
+	if (bootverbose) {
+		printf(" Routing ");
+		switch (mode) {
+		case APIC_LVT_DM_NMI:
+			printf("NMI");
+			break;
+		case APIC_LVT_DM_SMI:
+			printf("SMI");
+			break;
+		case APIC_LVT_DM_INIT:
+			printf("INIT");
+			break;
+		case APIC_LVT_DM_EXTINT:
+			printf("ExtINT");
+			break;
+		}
+		printf(" -> LINT%u\n", pin);
+	}
+	return (0);
+}
+
+int
+lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol)
+{
+
+	if (pin > LVT_MAX || pol == INTR_POLARITY_CONFORM)
+		return (EINVAL);
+	if (apic_id == APIC_ID_ALL) {
+		lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH);
+		if (bootverbose)
+			printf("lapic:");
+	} else {
+		KASSERT(lapics[apic_id].la_present,
+		    ("%s: missing APIC %u", __func__, apic_id));
+		lapics[apic_id].la_lvts[pin].lvt_active = 1;
+		lapics[apic_id].la_lvts[pin].lvt_activehi =
+		    (pol == INTR_POLARITY_HIGH);
+		if (bootverbose)
+			printf("lapic%u:", apic_id);
+	}
+	if (bootverbose)
+		printf(" LINT%u polarity: active-%s\n", pin,
+		    pol == INTR_POLARITY_HIGH ? "high" : "low");
+	return (0);
+}
+
+int
+lapic_set_lvt_triggermode(u_int apic_id, u_int pin, enum intr_trigger trigger)
+{
+
+	if (pin > LVT_MAX || trigger == INTR_TRIGGER_CONFORM)
+		return (EINVAL);
+	if (apic_id == APIC_ID_ALL) {
+		lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE);
+		if (bootverbose)
+			printf("lapic:");
+	} else {
+		KASSERT(lapics[apic_id].la_present,
+		    ("%s: missing APIC %u", __func__, apic_id));
+		lapics[apic_id].la_lvts[pin].lvt_edgetrigger =
+		    (trigger == INTR_TRIGGER_EDGE);
+		lapics[apic_id].la_lvts[pin].lvt_active = 1;
+		if (bootverbose)
+			printf("lapic%u:", apic_id);
+	}
+	if (bootverbose)
+		printf(" LINT%u trigger: %s\n", pin,
+		    trigger == INTR_TRIGGER_EDGE ? "edge" : "level");
+	return (0);
+}
+
+void
+lapic_eoi(void)
+{
+
+	lapic->eoi = 0;
+}
+
+void
+lapic_handle_intr(struct intrframe frame)
+{
+	struct intsrc *isrc;
+
+	if (frame.if_vec == -1)
+		panic("Couldn't get vector from ISR!");
+	isrc = intr_lookup_source(apic_idt_to_irq(frame.if_vec));
+	intr_execute_handlers(isrc, &frame);
+}
+
+/* Translate between IDT vectors and IRQ vectors. */
+u_int
+apic_irq_to_idt(u_int irq)
+{
+	u_int vector;
+
+	KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq));
+	vector = irq + APIC_IO_INTS;
+	if (vector >= IDT_SYSCALL)
+		vector++;
+	return (vector);
+}
+
+u_int
+apic_idt_to_irq(u_int vector)
+{
+
+	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
+	    vector <= APIC_IO_INTS + NUM_IO_INTS,
+	    ("Vector %u does not map to an IRQ line", vector));
+	if (vector > IDT_SYSCALL)
+		vector--;
+	return (vector - APIC_IO_INTS);
+}
+
+/*
+ * APIC probing support code.  This includes code to manage enumerators.
+ */
+
+static SLIST_HEAD(, apic_enumerator) enumerators =
+	SLIST_HEAD_INITIALIZER(enumerators);
+static struct apic_enumerator *best_enum;
+	
+void
+apic_register_enumerator(struct apic_enumerator *enumerator)
+{
+#ifdef INVARIANTS
+	struct apic_enumerator *apic_enum;
+
+	SLIST_FOREACH(apic_enum, &enumerators, apic_next) {
+		if (apic_enum == enumerator)
+			panic("%s: Duplicate register of %s", __func__,
+			    enumerator->apic_name);
+	}
+#endif
+	SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next);
+}
+
+/*
+ * Probe the APIC enumerators, enumerate CPUs, and initialize the
+ * local APIC.
+ */
+static void
+apic_init(void *dummy __unused)
+{
+	struct apic_enumerator *enumerator;
+	uint64_t apic_base;
+	int retval, best;
+
+	/* We only support built in local APICs. */
+	if (!(cpu_feature & CPUID_APIC))
+		return;
+
+	/* Don't probe if APIC mode is disabled. */
+	if (resource_disabled("apic", 0))
+		return;
+
+	/* First, probe all the enumerators to find the best match. */
+	best_enum = NULL;
+	best = 0;
+	SLIST_FOREACH(enumerator, &enumerators, apic_next) {
+		retval = enumerator->apic_probe();
+		if (retval > 0)
+			continue;
+		if (best_enum == NULL || best < retval) {
+			best_enum = enumerator;
+			best = retval;
+		}
+	}
+	if (best_enum == NULL) {
+		if (bootverbose)
+			printf("APIC: Could not find any APICs.\n");
+		return;
+	}
+
+	if (bootverbose)
+		printf("APIC: Using the %s enumerator.\n",
+		    best_enum->apic_name);
+
+	/*
+	 * To work around an errata, we disable the local APIC on some
+	 * CPUs during early startup.  We need to turn the local APIC back
+	 * on on such CPUs now.
+	 */
+	if (cpu == CPU_686 && strcmp(cpu_vendor, "GenuineIntel") == 0 &&
+	    (cpu_id & 0xff0) == 0x610) {
+		apic_base = rdmsr(MSR_APICBASE);
+		apic_base |= APICBASE_ENABLED;
+		wrmsr(MSR_APICBASE, apic_base);
+	}
+
+	/* Second, probe the CPU's in the system. */
+	retval = best_enum->apic_probe_cpus();
+	if (retval != 0)
+		printf("%s: Failed to probe CPUs: returned %d\n",
+		    best_enum->apic_name, retval);
+
+	/* Third, initialize the local APIC. */
+	retval = best_enum->apic_setup_local();
+	if (retval != 0)
+		printf("%s: Failed to setup the local APIC: returned %d\n",
+		    best_enum->apic_name, retval);
+#ifdef SMP
+	/* Last, setup the cpu topology now that we have probed CPUs */
+	mp_topology();
+#endif
+}
+SYSINIT(apic_init, SI_SUB_CPU, SI_ORDER_FIRST, apic_init, NULL)
+
+/*
+ * Setup the I/O APICs.
+ */
+static void
+apic_setup_io(void *dummy __unused)
+{
+	int retval;
+
+	if (best_enum == NULL)
+		return;
+	retval = best_enum->apic_setup_io();
+	if (retval != 0)
+		printf("%s: Failed to setup I/O APICs: returned %d\n",
+		    best_enum->apic_name, retval);
+
+	/*
+	 * Finish setting up the local APIC on the BSP once we know how to
+	 * properly program the LINT pins.
+	 */
+	lapic_setup();
+	if (bootverbose)
+		lapic_dump("BSP");
+}
+SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_SECOND, apic_setup_io, NULL)
+
+#ifdef SMP
+/*
+ * Inter Processor Interrupt functions.  The lapic_ipi_*() functions are
+ * private the sys/i386 code.  The public interface for the rest of the
+ * kernel is defined in mp_machdep.c.
+ */
+
+int
+lapic_ipi_wait(int delay)
+{
+	int x, incr;
+
+	/*
+	 * Wait delay loops for IPI to be sent.  This is highly bogus
+	 * since this is sensitive to CPU clock speed.  If delay is
+	 * -1, we wait forever.
+	 */
+	if (delay == -1) {
+		incr = 0;
+		delay = 1;
+	} else
+		incr = 1;
+	for (x = 0; x < delay; x += incr) {
+		if ((lapic->icr_lo & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE)
+			return (1);
+		ia32_pause();
+	}
+	return (0);
+}
+
+void
+lapic_ipi_raw(register_t icrlo, u_int dest)
+{
+	register_t value, eflags;
+
+	/* XXX: Need more sanity checking of icrlo? */
+	KASSERT(lapic != NULL, ("%s called too early", __func__));
+	KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
+	    ("%s: invalid dest field", __func__));
+	KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0,
+	    ("%s: reserved bits set in ICR LO register", __func__));
+
+	/* Set destination in ICR HI register if it is being used. */
+	eflags = intr_disable();
+	if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) {
+		value = lapic->icr_hi;
+		value &= ~APIC_ID_MASK;
+		value |= dest << APIC_ID_SHIFT;
+		lapic->icr_hi = value;
+	}
+
+	/* Program the contents of the IPI and dispatch it. */
+	value = lapic->icr_lo;
+	value &= APIC_ICRLO_RESV_MASK;
+	value |= icrlo;
+	lapic->icr_lo = value;
+	intr_restore(eflags);
+}
+
+#define	BEFORE_SPIN	1000000
+#ifdef DETECT_DEADLOCK
+#define	AFTER_SPIN	1000
+#endif
+
+void
+lapic_ipi_vectored(u_int vector, int dest)
+{
+	register_t icrlo, destfield;
+
+	KASSERT((vector & ~APIC_VECTOR_MASK) == 0,
+	    ("%s: invalid vector %d", __func__, vector));
+
+	icrlo = vector | APIC_DELMODE_FIXED | APIC_DESTMODE_PHY |
+	    APIC_LEVEL_DEASSERT | APIC_TRIGMOD_EDGE;
+	destfield = 0;
+	switch (dest) {
+	case APIC_IPI_DEST_SELF:
+		icrlo |= APIC_DEST_SELF;
+		break;
+	case APIC_IPI_DEST_ALL:
+		icrlo |= APIC_DEST_ALLISELF;
+		break;
+	case APIC_IPI_DEST_OTHERS:
+		icrlo |= APIC_DEST_ALLESELF;
+		break;
+	default:
+		KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
+		    ("%s: invalid destination 0x%x", __func__, dest));
+		destfield = dest;
+	}
+
+	/* Wait for an earlier IPI to finish. */
+	if (!lapic_ipi_wait(BEFORE_SPIN))
+		panic("APIC: Previous IPI is stuck");
+
+	lapic_ipi_raw(icrlo, destfield);
+
+#ifdef DETECT_DEADLOCK
+	/* Wait for IPI to be delivered. */
+	if (!lapic_ipi_wait(AFTER_SPIN)) {
+#ifdef needsattention
+		/*
+		 * XXX FIXME:
+		 *
+		 * The above function waits for the message to actually be
+		 * delivered.  It breaks out after an arbitrary timeout
+		 * since the message should eventually be delivered (at
+		 * least in theory) and that if it wasn't we would catch
+		 * the failure with the check above when the next IPI is
+		 * sent.
+		 *
+		 * We could skiip this wait entirely, EXCEPT it probably
+		 * protects us from other routines that assume that the
+		 * message was delivered and acted upon when this function
+		 * returns.
+		 */
+		printf("APIC: IPI might be stuck\n");
+#else /* !needsattention */
+		/* Wait until mesage is sent without a timeout. */
+		while (lapic->icr_lo & APIC_DELSTAT_PEND)
+			ia32_pause();
+#endif /* needsattention */
+	}
+#endif /* DETECT_DEADLOCK */
+}
+#endif /* SMP */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/locore.s b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/locore.s
new file mode 100644
index 0000000000..5146169162
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/locore.s
@@ -0,0 +1,949 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)locore.s	7.3 (Berkeley) 5/13/91
+ * $FreeBSD: src/sys/i386/i386/locore.s,v 1.181 2003/11/03 21:53:37 jhb Exp $
+ *
+ *		originally from: locore.s, by William F. Jolitz
+ *
+ *		Substantially rewritten by David Greenman, Rod Grimes,
+ *			Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp
+ *			and many others.
+ */
+
+#include "opt_bootp.h"
+#include "opt_compat.h"
+#include "opt_nfsroot.h"
+#include "opt_pmap.h"
+
+#include <sys/syscall.h>
+#include <sys/reboot.h>
+
+#include <machine/asmacros.h>
+#include <machine/cputypes.h>
+#include <machine/psl.h>
+#include <machine/pmap.h>
+#include <machine/specialreg.h>
+
+#include "assym.s"
+
+.section __xen_guest
+	    .asciz "LOADER=generic,GUEST_VER=5.2.1,XEN_VER=2.0,BSD_SYMTAB"
+	
+	
+/*
+ *	XXX
+ *
+ * Note: This version greatly munged to avoid various assembler errors
+ * that may be fixed in newer versions of gas. Perhaps newer versions
+ * will have more pleasant appearance.
+ */
+
+/*
+ * PTmap is recursive pagemap at top of virtual address space.
+ * Within PTmap, the page directory can be found (third indirection).
+ */
+	.globl	PTmap,PTD,PTDpde
+	.set	PTmap,(PTDPTDI << PDRSHIFT)
+	.set	PTD,PTmap + (PTDPTDI * PAGE_SIZE)
+	.set	PTDpde,PTD + (PTDPTDI * PDESIZE)
+
+#ifdef SMP
+/*
+ * Define layout of per-cpu address space.
+ * This is "constructed" in locore.s on the BSP and in mp_machdep.c
+ * for each AP.  DO NOT REORDER THESE WITHOUT UPDATING THE REST!
+ */
+	.globl	SMP_prvspace
+	.set	SMP_prvspace,(MPPTDI << PDRSHIFT)
+#endif /* SMP */
+
+/*
+ * Compiled KERNBASE location and the kernel load address
+ */
+	.globl	kernbase
+	.set	kernbase,KERNBASE
+	.globl	kernload
+	.set	kernload,KERNLOAD
+
+/*
+ * Globals
+ */
+	.data
+	ALIGN_DATA			/* just to be sure */
+
+	.space	0x2000			/* space for tmpstk - temporary stack */
+tmpstk:
+
+	.globl	bootinfo
+bootinfo:	.space	BOOTINFO_SIZE	/* bootinfo that we can handle */
+
+		.globl KERNend
+KERNend:	.long	0		/* phys addr end of kernel (just after bss) */
+physfree:	.long	0		/* phys addr of next free page */
+
+#ifdef SMP
+		.globl	cpu0prvpage
+cpu0pp:		.long	0		/* phys addr cpu0 private pg */
+cpu0prvpage:	.long	0		/* relocated version */
+
+		.globl	SMPpt
+SMPptpa:	.long	0		/* phys addr SMP page table */
+SMPpt:		.long	0		/* relocated version */
+#endif /* SMP */
+
+	.globl	IdlePTD
+IdlePTD:	.long	0		/* phys addr of kernel PTD */
+
+
+	.globl	KPTphys
+KPTphys:	.long	0		/* phys addr of kernel page tables */
+
+	.globl	proc0uarea, proc0kstack
+proc0uarea:	.long	0		/* address of proc 0 uarea space */
+proc0kstack:	.long	0		/* address of proc 0 kstack space */
+p0upa:		.long	0		/* phys addr of proc0's UAREA */
+p0kpa:		.long	0		/* phys addr of proc0's STACK */
+
+#ifdef PC98
+	.globl	pc98_system_parameter
+pc98_system_parameter:
+	.space	0x240
+#endif
+
+/**********************************************************************
+ *
+ * Some handy macros
+ *
+ */
+
+#define R(foo) ((foo))
+
+#define ALLOCPAGES(foo) \
+	movl	R(physfree), %esi ; \
+	movl	$((foo)*PAGE_SIZE), %eax ; \
+	addl	%esi, %eax ; \
+	movl	%eax, R(physfree) ; \
+	movl	%esi, %edi ; \
+	movl	$((foo)*PAGE_SIZE),%ecx ; \
+	xorl	%eax,%eax ; \
+	cld ; \
+	rep ; \
+	stosb
+
+/*
+ * fillkpt
+ *	eax = page frame address
+ *	ebx = index into page table
+ *	ecx = how many pages to map
+ * 	base = base address of page dir/table
+ *	prot = protection bits
+ */
+#define	fillkpt(base, prot)		  \
+	shll	$PTESHIFT,%ebx		; \
+	addl	base,%ebx		; \
+	orl	$PG_V,%eax		; \
+	orl	prot,%eax		; \
+1:	movl	%eax,(%ebx)		; \
+	addl	$PAGE_SIZE,%eax		; /* increment physical address */ \
+	addl	$PTESIZE,%ebx		; /* next pte */ \
+	loop	1b
+
+/*
+ * fillkptphys(prot)
+ *	eax = physical address
+ *	ecx = how many pages to map
+ *	prot = protection bits
+ */
+#define	fillkptphys(prot)		  \
+	movl	%eax, %ebx		; \
+	shrl	$PAGE_SHIFT, %ebx	; \
+	fillkpt(R(KPTphys), prot)
+
+	.text
+/**********************************************************************
+ *
+ * This is where the bootblocks start us, set the ball rolling...
+ *
+ */
+NON_GPROF_ENTRY(btext)
+	pushl   %esi
+	call	initvalues	
+	popl	%esi
+	call	identify_cpu
+	movl	proc0kstack,%eax
+	leal	(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp	
+        xorl    %ebp,%ebp               /* mark end of frames */
+	movl    IdlePTD,%esi
+        movl    %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
+	call	init386
+	call	mi_startup
+	int	$3
+
+	
+#ifdef PC98
+	/* save SYSTEM PARAMETER for resume (NS/T or other) */
+	movl	$0xa1400,%esi
+	movl	$R(pc98_system_parameter),%edi
+	movl	$0x0240,%ecx
+	cld
+	rep
+	movsb
+#else	/* IBM-PC */
+/* Tell the bios to warmboot next time */
+	movw	$0x1234,0x472
+#endif	/* PC98 */
+
+/* Set up a real frame in case the double return in newboot is executed. */
+	pushl	%ebp
+	movl	%esp, %ebp
+
+/* Don't trust what the BIOS gives for eflags. */
+	pushl	$PSL_KERNEL
+	popfl
+
+/*
+ * Don't trust what the BIOS gives for %fs and %gs.  Trust the bootstrap
+ * to set %cs, %ds, %es and %ss.
+ */
+	mov	%ds, %ax
+	mov	%ax, %fs
+	mov	%ax, %gs
+
+/*
+ * Clear the bss.  Not all boot programs do it, and it is our job anyway.
+ *
+ * XXX we don't check that there is memory for our bss and page tables
+ * before using it.
+ *
+ * Note: we must be careful to not overwrite an active gdt or idt.  They
+ * inactive from now until we switch to new ones, since we don't load any
+ * more segment registers or permit interrupts until after the switch.
+ */
+	movl	$R(end),%ecx
+	movl	$R(edata),%edi
+	subl	%edi,%ecx
+	xorl	%eax,%eax
+	cld
+	rep
+	stosb
+
+	call	recover_bootinfo
+
+/* Get onto a stack that we can trust. */
+/*
+ * XXX this step is delayed in case recover_bootinfo needs to return via
+ * the old stack, but it need not be, since recover_bootinfo actually
+ * returns via the old frame.
+ */
+	movl	$R(tmpstk),%esp
+
+#ifdef PC98
+	/* pc98_machine_type & M_EPSON_PC98 */
+	testb	$0x02,R(pc98_system_parameter)+220
+	jz	3f
+	/* epson_machine_id <= 0x0b */
+	cmpb	$0x0b,R(pc98_system_parameter)+224
+	ja	3f
+
+	/* count up memory */
+	movl	$0x100000,%eax		/* next, talley remaining memory */
+	movl	$0xFFF-0x100,%ecx
+1:	movl	0(%eax),%ebx		/* save location to check */
+	movl	$0xa55a5aa5,0(%eax)	/* write test pattern */
+	cmpl	$0xa55a5aa5,0(%eax)	/* does not check yet for rollover */
+	jne	2f
+	movl	%ebx,0(%eax)		/* restore memory */
+	addl	$PAGE_SIZE,%eax
+	loop	1b
+2:	subl	$0x100000,%eax
+	shrl	$17,%eax
+	movb	%al,R(pc98_system_parameter)+1
+3:
+
+	movw	R(pc98_system_parameter+0x86),%ax
+	movw	%ax,R(cpu_id)
+#endif
+
+	call	identify_cpu
+	call	create_pagetables
+
+/*
+ * If the CPU has support for VME, turn it on.
+ */ 
+	testl	$CPUID_VME, R(cpu_feature)
+	jz	1f
+	movl	%cr4, %eax
+	orl	$CR4_VME, %eax
+	movl	%eax, %cr4
+1:
+
+/* Now enable paging */
+	movl	R(IdlePTD), %eax
+	movl	%eax,%cr3		/* load ptd addr into mmu */
+	movl	%cr0,%eax		/* get control word */
+	orl	$CR0_PE|CR0_PG,%eax	/* enable paging */
+	movl	%eax,%cr0		/* and let's page NOW! */
+
+	pushl	$begin			/* jump to high virtualized address */
+	ret
+
+/* now running relocated at KERNBASE where the system is linked to run */
+begin:
+	/* set up bootstrap stack */
+	movl	proc0kstack,%eax	/* location of in-kernel stack */
+			/* bootstrap stack end location */
+	leal	(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp
+
+	xorl	%ebp,%ebp		/* mark end of frames */
+
+#ifdef PAE
+	movl	IdlePDPT,%esi
+#else
+	movl	IdlePTD,%esi
+#endif
+	movl	%esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
+
+	pushl	physfree		/* value of first for init386(first) */
+	call	init386			/* wire 386 chip for unix operation */
+
+	/*
+	 * Clean up the stack in a way that db_numargs() understands, so
+	 * that backtraces in ddb don't underrun the stack.  Traps for
+	 * inaccessible memory are more fatal than usual this early.
+	 */
+	addl	$4,%esp
+
+	call	mi_startup		/* autoconfiguration, mountroot etc */
+	/* NOTREACHED */
+	addl	$0,%esp			/* for db_numargs() again */
+
+/*
+ * Signal trampoline, copied to top of user stack
+ */
+NON_GPROF_ENTRY(sigcode)
+	calll	*SIGF_HANDLER(%esp)
+	leal	SIGF_UC(%esp),%eax	/* get ucontext */
+	pushl	%eax
+	testl	$PSL_VM,UC_EFLAGS(%eax)
+	jne	1f
+	movl	UC_GS(%eax),%gs		/* restore %gs */
+1:
+	movl	$SYS_sigreturn,%eax
+	pushl	%eax			/* junk to fake return addr. */
+	int	$0x80			/* enter kernel with args */
+					/* on stack */
+1:
+	jmp	1b
+
+#ifdef COMPAT_FREEBSD4
+	ALIGN_TEXT
+freebsd4_sigcode:
+	calll	*SIGF_HANDLER(%esp)
+	leal	SIGF_UC4(%esp),%eax	/* get ucontext */
+	pushl	%eax
+	testl	$PSL_VM,UC4_EFLAGS(%eax)
+	jne	1f
+	movl	UC4_GS(%eax),%gs	/* restore %gs */
+1:
+	movl	$344,%eax		/* 4.x SYS_sigreturn */
+	pushl	%eax			/* junk to fake return addr. */
+	int	$0x80			/* enter kernel with args */
+					/* on stack */
+1:
+	jmp	1b
+#endif
+
+#ifdef COMPAT_43
+	ALIGN_TEXT
+osigcode:
+	call	*SIGF_HANDLER(%esp)	/* call signal handler */
+	lea	SIGF_SC(%esp),%eax	/* get sigcontext */
+	pushl	%eax
+	testl	$PSL_VM,SC_PS(%eax)
+	jne	9f
+	movl	SC_GS(%eax),%gs		/* restore %gs */
+9:
+	movl	$103,%eax		/* 3.x SYS_sigreturn */
+	pushl	%eax			/* junk to fake return addr. */
+	int	$0x80			/* enter kernel with args */
+0:	jmp	0b
+#endif /* COMPAT_43 */
+
+	ALIGN_TEXT
+esigcode:
+
+	.data
+	.globl	szsigcode
+szsigcode:
+	.long	esigcode-sigcode
+#ifdef COMPAT_FREEBSD4
+	.globl	szfreebsd4_sigcode
+szfreebsd4_sigcode:
+	.long	esigcode-freebsd4_sigcode
+#endif
+#ifdef COMPAT_43
+	.globl	szosigcode
+szosigcode:
+	.long	esigcode-osigcode
+#endif
+	.text
+
+/**********************************************************************
+ *
+ * Recover the bootinfo passed to us from the boot program
+ *
+ */
+recover_bootinfo:
+	/*
+	 * This code is called in different ways depending on what loaded
+	 * and started the kernel.  This is used to detect how we get the
+	 * arguments from the other code and what we do with them.
+	 *
+	 * Old disk boot blocks:
+	 *	(*btext)(howto, bootdev, cyloffset, esym);
+	 *	[return address == 0, and can NOT be returned to]
+	 *	[cyloffset was not supported by the FreeBSD boot code
+	 *	 and always passed in as 0]
+	 *	[esym is also known as total in the boot code, and
+	 *	 was never properly supported by the FreeBSD boot code]
+	 *
+	 * Old diskless netboot code:
+	 *	(*btext)(0,0,0,0,&nfsdiskless,0,0,0);
+	 *	[return address != 0, and can NOT be returned to]
+	 *	If we are being booted by this code it will NOT work,
+	 *	so we are just going to halt if we find this case.
+	 *
+	 * New uniform boot code:
+	 *	(*btext)(howto, bootdev, 0, 0, 0, &bootinfo)
+	 *	[return address != 0, and can be returned to]
+	 *
+	 * There may seem to be a lot of wasted arguments in here, but
+	 * that is so the newer boot code can still load very old kernels
+	 * and old boot code can load new kernels.
+	 */
+
+	/*
+	 * The old style disk boot blocks fake a frame on the stack and
+	 * did an lret to get here.  The frame on the stack has a return
+	 * address of 0.
+	 */
+	cmpl	$0,4(%ebp)
+	je	olddiskboot
+
+	/*
+	 * We have some form of return address, so this is either the
+	 * old diskless netboot code, or the new uniform code.  That can
+	 * be detected by looking at the 5th argument, if it is 0
+	 * we are being booted by the new uniform boot code.
+	 */
+	cmpl	$0,24(%ebp)
+	je	newboot
+
+	/*
+	 * Seems we have been loaded by the old diskless boot code, we
+	 * don't stand a chance of running as the diskless structure
+	 * changed considerably between the two, so just halt.
+	 */
+	 hlt
+
+	/*
+	 * We have been loaded by the new uniform boot code.
+	 * Let's check the bootinfo version, and if we do not understand
+	 * it we return to the loader with a status of 1 to indicate this error
+	 */
+newboot:
+	movl	28(%ebp),%ebx		/* &bootinfo.version */
+	movl	BI_VERSION(%ebx),%eax
+	cmpl	$1,%eax			/* We only understand version 1 */
+	je	1f
+	movl	$1,%eax			/* Return status */
+	leave
+	/*
+	 * XXX this returns to our caller's caller (as is required) since
+	 * we didn't set up a frame and our caller did.
+	 */
+	ret
+
+1:
+	/*
+	 * If we have a kernelname copy it in
+	 */
+	movl	BI_KERNELNAME(%ebx),%esi
+	cmpl	$0,%esi
+	je	2f			/* No kernelname */
+	movl	$MAXPATHLEN,%ecx	/* Brute force!!! */
+	movl	$R(kernelname),%edi
+	cmpb	$'/',(%esi)		/* Make sure it starts with a slash */
+	je	1f
+	movb	$'/',(%edi)
+	incl	%edi
+	decl	%ecx
+1:
+	cld
+	rep
+	movsb
+
+2:
+	/*
+	 * Determine the size of the boot loader's copy of the bootinfo
+	 * struct.  This is impossible to do properly because old versions
+	 * of the struct don't contain a size field and there are 2 old
+	 * versions with the same version number.
+	 */
+	movl	$BI_ENDCOMMON,%ecx	/* prepare for sizeless version */
+	testl	$RB_BOOTINFO,8(%ebp)	/* bi_size (and bootinfo) valid? */
+	je	got_bi_size		/* no, sizeless version */
+	movl	BI_SIZE(%ebx),%ecx
+got_bi_size:
+
+	/*
+	 * Copy the common part of the bootinfo struct
+	 */
+	movl	%ebx,%esi
+	movl	$R(bootinfo),%edi
+	cmpl	$BOOTINFO_SIZE,%ecx
+	jbe	got_common_bi_size
+	movl	$BOOTINFO_SIZE,%ecx
+got_common_bi_size:
+	cld
+	rep
+	movsb
+
+#ifdef NFS_ROOT
+#ifndef BOOTP_NFSV3
+	/*
+	 * If we have a nfs_diskless structure copy it in
+	 */
+	movl	BI_NFS_DISKLESS(%ebx),%esi
+	cmpl	$0,%esi
+	je	olddiskboot
+	movl	$R(nfs_diskless),%edi
+	movl	$NFSDISKLESS_SIZE,%ecx
+	cld
+	rep
+	movsb
+	movl	$R(nfs_diskless_valid),%edi
+	movl	$1,(%edi)
+#endif
+#endif
+
+	/*
+	 * The old style disk boot.
+	 *	(*btext)(howto, bootdev, cyloffset, esym);
+	 * Note that the newer boot code just falls into here to pick
+	 * up howto and bootdev, cyloffset and esym are no longer used
+	 */
+olddiskboot:
+	movl	8(%ebp),%eax
+	movl	%eax,R(boothowto)
+	movl	12(%ebp),%eax
+	movl	%eax,R(bootdev)
+
+	ret
+
+
+/**********************************************************************
+	 *
+	 * Identify the CPU and initialize anything special about it
+	 *
+	 */
+identify_cpu:
+
+	        /* Try to toggle alignment check flag ;  does not exist on 386. */
+	        pushfl
+	        popl    %eax
+	        movl    %eax,%ecx
+	        orl     $PSL_AC,%eax
+	        pushl   %eax
+	        popfl
+	        pushfl
+	        popl    %eax
+	        xorl    %ecx,%eax
+	        andl    $PSL_AC,%eax
+	        pushl   %ecx
+	        popfl
+
+	        testl   %eax,%eax
+	        jnz     try486
+
+	        /* NexGen CPU does not have aligment check flag. */
+	        pushfl
+	        movl    $0x5555, %eax
+	        xorl    %edx, %edx
+	        movl    $2, %ecx
+	        clc
+	        divl    %ecx
+	        jz      trynexgen
+	        popfl
+	        movl    $CPU_386,R(cpu)
+	        jmp     3f
+
+trynexgen:
+	        popfl
+	        movl    $CPU_NX586,R(cpu)
+	        movl    $0x4778654e,R(cpu_vendor)       # store vendor string
+	        movl    $0x72446e65,R(cpu_vendor+4)
+	        movl    $0x6e657669,R(cpu_vendor+8)
+	        movl    $0,R(cpu_vendor+12)
+	        jmp     3f
+
+try486:	 /* Try to toggle identification flag ;  does not exist on early 486s. */
+	        pushfl
+	        popl    %eax
+	        movl    %eax,%ecx
+	        xorl    $PSL_ID,%eax
+	        pushl   %eax
+	        popfl
+	        pushfl
+	        popl    %eax
+	        xorl    %ecx,%eax
+	        andl    $PSL_ID,%eax
+	        pushl   %ecx
+	        popfl
+
+	        testl   %eax,%eax
+	        jnz     trycpuid
+	        movl    $CPU_486,R(cpu)
+
+	        /*
+	         * Check Cyrix CPU
+	         * Cyrix CPUs do not change the undefined flags following
+	         * execution of the divide instruction which divides 5 by 2.
+	         *
+	         * Note:	 CPUID is enabled on M2, so it passes another way.
+	         */
+	        pushfl
+	        movl    $0x5555, %eax
+	        xorl    %edx, %edx
+	        movl    $2, %ecx
+	        clc
+	        divl    %ecx
+	        jnc     trycyrix
+	        popfl
+	        jmp     3f              /* You may use Intel CPU. */
+
+trycyrix:
+	        popfl
+	        /*
+	         * IBM Bluelighting CPU also doesn't change the undefined flags.
+	         * Because IBM doesn't disclose the information for Bluelighting
+	         * CPU, we couldn't distinguish it from Cyrix's (including IBM
+	         * brand of Cyrix CPUs).
+	         */
+	        movl    $0x69727943,R(cpu_vendor)       # store vendor string
+	        movl    $0x736e4978,R(cpu_vendor+4)
+	        movl    $0x64616574,R(cpu_vendor+8)
+	        jmp     3f
+
+trycpuid:	       /* Use the `cpuid' instruction. */
+	        xorl    %eax,%eax
+	        cpuid                                   # cpuid 0
+	        movl    %eax,R(cpu_high)                # highest capability
+	        movl    %ebx,R(cpu_vendor)              # store vendor string
+	        movl    %edx,R(cpu_vendor+4)
+	        movl    %ecx,R(cpu_vendor+8)
+	        movb    $0,R(cpu_vendor+12)
+
+	        movl    $1,%eax
+	        cpuid                                   # cpuid 1
+	        movl    %eax,R(cpu_id)                  # store cpu_id
+	        movl    %ebx,R(cpu_procinfo)            # store cpu_procinfo
+	        movl    %edx,R(cpu_feature)             # store cpu_feature
+	        rorl    $8,%eax                         # extract family type
+	        andl    $15,%eax
+	        cmpl    $5,%eax
+	        jae     1f
+
+	        /* less than Pentium ;  must be 486 */
+	        movl    $CPU_486,R(cpu)
+	        jmp     3f
+1:
+	        /* a Pentium? */
+	        cmpl    $5,%eax
+	        jne     2f
+	        movl    $CPU_586,R(cpu)
+	        jmp     3f
+2:
+	        /* Greater than Pentium...call it a Pentium Pro */
+	        movl    $CPU_686,R(cpu)
+3:
+	        ret
+	
+/**********************************************************************
+ *
+ * Create the first page directory and its page tables.
+ *
+ */
+
+create_pagetables:
+
+/* Find end of kernel image (rounded up to a page boundary). */
+	movl	$R(_end),%esi
+
+/* Include symbols, if any. */
+	movl	R(bootinfo+BI_ESYMTAB),%edi
+	testl	%edi,%edi
+	je	over_symalloc
+	movl	%edi,%esi
+	movl	$KERNBASE,%edi
+	addl	%edi,R(bootinfo+BI_SYMTAB)
+	addl	%edi,R(bootinfo+BI_ESYMTAB)
+over_symalloc:
+
+/* If we are told where the end of the kernel space is, believe it. */
+	movl	R(bootinfo+BI_KERNEND),%edi
+	testl	%edi,%edi
+	je	no_kernend
+	movl	%edi,%esi
+no_kernend:
+
+	addl	$PDRMASK,%esi		/* Play conservative for now, and */
+	andl	$~PDRMASK,%esi		/*   ... wrap to next 4M. */
+	movl	%esi,R(KERNend)		/* save end of kernel */
+	movl	%esi,R(physfree)	/* next free page is at end of kernel */
+
+/* Allocate Kernel Page Tables */
+	ALLOCPAGES(NKPT)
+	movl	%esi,R(KPTphys)
+
+/* Allocate Page Table Directory */
+#ifdef PAE
+	/* XXX only need 32 bytes (easier for now) */
+	ALLOCPAGES(1)
+	movl	%esi,R(IdlePDPT)
+#endif
+	ALLOCPAGES(NPGPTD)
+	movl	%esi,R(IdlePTD)
+
+/* Allocate UPAGES */
+	ALLOCPAGES(UAREA_PAGES)
+	movl	%esi,R(p0upa)
+	addl	$KERNBASE, %esi
+	movl	%esi, R(proc0uarea)
+
+	ALLOCPAGES(KSTACK_PAGES)
+	movl	%esi,R(p0kpa)
+	addl	$KERNBASE, %esi
+	movl	%esi, R(proc0kstack)
+#if 0
+	ALLOCPAGES(1)			/* vm86/bios stack */
+	movl	%esi,R(vm86phystk)
+
+	ALLOCPAGES(3)			/* pgtable + ext + IOPAGES */
+	movl	%esi,R(vm86pa)
+	addl	$KERNBASE, %esi
+	movl	%esi, R(vm86paddr)
+#endif
+#ifdef SMP
+/* Allocate cpu0's private data page */
+	ALLOCPAGES(1)
+	movl	%esi,R(cpu0pp)
+	addl	$KERNBASE, %esi
+	movl	%esi, R(cpu0prvpage)	/* relocated to KVM space */
+
+/* Allocate SMP page table page */
+	ALLOCPAGES(1)
+	movl	%esi,R(SMPptpa)
+	addl	$KERNBASE, %esi
+	movl	%esi, R(SMPpt)		/* relocated to KVM space */
+#endif	/* SMP */
+
+/* Map page zero read-write so bios32 calls can use it */
+	xorl	%eax, %eax
+	movl	$PG_RW,%edx
+	movl	$1,%ecx
+	fillkptphys(%edx)
+
+/* Map read-only from page 1 to the beginning of the kernel text section */
+	movl	$PAGE_SIZE, %eax
+	xorl	%edx,%edx
+	movl	$R(btext),%ecx
+	addl	$PAGE_MASK,%ecx
+	subl	%eax,%ecx
+	shrl	$PAGE_SHIFT,%ecx
+	fillkptphys(%edx)
+
+/*
+ * Enable PSE and PGE.
+ */
+#ifndef DISABLE_PSE
+	testl	$CPUID_PSE, R(cpu_feature)
+	jz	1f
+	movl	$PG_PS, R(pseflag)
+	movl	%cr4, %eax
+	orl	$CR4_PSE, %eax
+	movl	%eax, %cr4
+1:
+#endif
+#ifndef DISABLE_PG_G
+	testl	$CPUID_PGE, R(cpu_feature)
+	jz	2f
+	movl	$PG_G, R(pgeflag)
+	movl	%cr4, %eax
+	orl	$CR4_PGE, %eax
+	movl	%eax, %cr4
+2:
+#endif
+
+/*
+ * Write page tables for the kernel starting at btext and
+ * until the end.  Make sure to map read+write.  We do this even
+ * if we've enabled PSE above, we'll just switch the corresponding kernel
+ * PDEs before we turn on paging.
+ *
+ * XXX: We waste some pages here in the PSE case!  DON'T BLINDLY REMOVE
+ * THIS!  SMP needs the page table to be there to map the kernel P==V.
+ */
+	movl	$R(btext),%eax
+	addl	$PAGE_MASK, %eax
+	andl	$~PAGE_MASK, %eax
+	movl	$PG_RW,%edx
+	movl	R(KERNend),%ecx
+	subl	%eax,%ecx
+	shrl	$PAGE_SHIFT,%ecx
+	fillkptphys(%edx)
+
+/* Map page directory. */
+	movl	R(IdlePTD), %eax
+	movl	$NPGPTD, %ecx
+	fillkptphys($PG_RW)
+
+/* Map proc0's UPAGES in the physical way ... */
+	movl	R(p0upa), %eax
+	movl	$(UAREA_PAGES), %ecx
+	fillkptphys($PG_RW)
+
+/* Map proc0's KSTACK in the physical way ... */
+	movl	R(p0kpa), %eax
+	movl	$(KSTACK_PAGES), %ecx
+	fillkptphys($PG_RW)
+
+/* Map ISA hole */
+	movl	$ISA_HOLE_START, %eax
+	movl	$ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
+	fillkptphys($PG_RW)
+#if 0
+/* Map space for the vm86 region */
+	movl	R(vm86phystk), %eax
+	movl	$4, %ecx
+	fillkptphys($PG_RW)
+
+/* Map page 0 into the vm86 page table */
+	movl	$0, %eax
+	movl	$0, %ebx
+	movl	$1, %ecx
+	fillkpt(R(vm86pa), $PG_RW|PG_U)
+
+/* ...likewise for the ISA hole */
+	movl	$ISA_HOLE_START, %eax
+	movl	$ISA_HOLE_START>>PAGE_SHIFT, %ebx
+	movl	$ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
+	fillkpt(R(vm86pa), $PG_RW|PG_U)
+#endif
+#ifdef SMP
+/* Map cpu0's private page into global kmem (4K @ cpu0prvpage) */
+	movl	R(cpu0pp), %eax
+	movl	$1, %ecx
+	fillkptphys($PG_RW)
+
+/* Map SMP page table page into global kmem FWIW */
+	movl	R(SMPptpa), %eax
+	movl	$1, %ecx
+	fillkptphys($PG_RW)
+
+/* Map the private page into the SMP page table */
+	movl	R(cpu0pp), %eax
+	movl	$0, %ebx		/* pte offset = 0 */
+	movl	$1, %ecx		/* one private page coming right up */
+	fillkpt(R(SMPptpa), $PG_RW)
+
+/* ... and put the page table table in the pde. */
+	movl	R(SMPptpa), %eax
+	movl	$MPPTDI, %ebx
+	movl	$1, %ecx
+	fillkpt(R(IdlePTD), $PG_RW)
+
+/* Fakeup VA for the local apic to allow early traps. */
+	ALLOCPAGES(1)
+	movl	%esi, %eax
+	movl	$(NPTEPG-1), %ebx	/* pte offset = NTEPG-1 */
+	movl	$1, %ecx		/* one private pt coming right up */
+	fillkpt(R(SMPptpa), $PG_RW)
+#endif	/* SMP */
+
+/* install a pde for temporary double map of bottom of VA */
+	movl	R(KPTphys), %eax
+	xorl	%ebx, %ebx
+	movl	$NKPT, %ecx
+	fillkpt(R(IdlePTD), $PG_RW)
+
+/*
+ * For the non-PSE case, install PDEs for PTs covering the kernel.
+ * For the PSE case, do the same, but clobber the ones corresponding
+ * to the kernel (from btext to KERNend) with 4M ('PS') PDEs immediately
+ * after.
+ */
+	movl	R(KPTphys), %eax
+	movl	$KPTDI, %ebx
+	movl	$NKPT, %ecx
+	fillkpt(R(IdlePTD), $PG_RW)
+	cmpl	$0,R(pseflag)
+	je	done_pde
+
+	movl	R(KERNend), %ecx
+	movl	$KERNLOAD, %eax
+	subl	%eax, %ecx
+	shrl	$PDRSHIFT, %ecx
+	movl	$(KPTDI+(KERNLOAD/(1 << PDRSHIFT))), %ebx
+	shll	$PDESHIFT, %ebx
+	addl	R(IdlePTD), %ebx
+	orl	$(PG_V|PG_RW|PG_PS), %eax
+1:	movl	%eax, (%ebx)
+	addl	$(1 << PDRSHIFT), %eax
+	addl	$PDESIZE, %ebx
+	loop	1b
+
+done_pde:
+/* install a pde recursively mapping page directory as a page table */
+	movl	R(IdlePTD), %eax
+	movl	$PTDPTDI, %ebx
+	movl	$NPGPTD,%ecx
+	fillkpt(R(IdlePTD), $PG_RW)
+
+#ifdef PAE
+	movl	R(IdlePTD), %eax
+	xorl	%ebx, %ebx
+	movl	$NPGPTD, %ecx
+	fillkpt(R(IdlePDPT), $0x0)
+#endif
+
+	ret
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c
new file mode 100644
index 0000000000..ea813b897c
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c
@@ -0,0 +1,2396 @@
+/*-
+ * Copyright (c) 1992 Terrence R. Lambert.
+ * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/machdep.c,v 1.584 2003/12/03 21:12:09 jhb Exp $");
+
+#include "opt_apic.h"
+#include "opt_atalk.h"
+#include "opt_compat.h"
+#include "opt_cpu.h"
+#include "opt_ddb.h"
+#include "opt_inet.h"
+#include "opt_ipx.h"
+#include "opt_isa.h"
+#include "opt_kstack_pages.h"
+#include "opt_maxmem.h"
+#include "opt_msgbuf.h"
+#include "opt_npx.h"
+#include "opt_perfmon.h"
+#include "opt_xen.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sysproto.h>
+#include <sys/signalvar.h>
+#include <sys/imgact.h>
+#include <sys/kdb.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/linker.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/memrange.h>
+#include <sys/mutex.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/reboot.h>
+#include <sys/callout.h>
+#include <sys/msgbuf.h>
+#include <sys/sched.h>
+#include <sys/sysent.h>
+#include <sys/sysctl.h>
+#include <sys/ucontext.h>
+#include <sys/vmmeter.h>
+#include <sys/bus.h>
+#include <sys/eventhandler.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_extern.h>
+
+#include <sys/user.h>
+#include <sys/exec.h>
+#include <sys/cons.h>
+
+#ifdef DDB
+#ifndef KDB
+#error KDB must be enabled in order for DDB to work!
+#endif
+#include <ddb/ddb.h>
+#include <ddb/db_sym.h>
+#endif
+
+#include <net/netisr.h>
+
+#include <machine/cpu.h>
+#include <machine/cputypes.h>
+#include <machine/reg.h>
+#include <machine/clock.h>
+#include <machine/specialreg.h>
+#include <machine/bootinfo.h>
+#include <machine/intr_machdep.h>
+#include <machine/md_var.h>
+#include <machine/pc/bios.h>
+#include <machine/pcb_ext.h>		/* pcb.h included via sys/user.h */
+#include <machine/proc.h>
+#ifdef PERFMON
+#include <machine/perfmon.h>
+#endif
+#ifdef SMP
+#include <machine/privatespace.h>
+#include <machine/smp.h>
+#endif
+
+#ifdef DEV_ISA
+#include <i386/isa/icu.h>
+#endif
+
+#include <isa/rtc.h>
+#include <sys/ptrace.h>
+#include <machine/sigframe.h>
+
+
+/* XEN includes */
+#include <machine/hypervisor-ifs.h>
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/xenfunc.h>
+#include <machine/xenvar.h>
+#include <machine/xen_intr.h>
+
+void Xhypervisor_callback(void);
+void failsafe_callback(void);
+
+/***************/
+
+
+/* Sanity check for __curthread() */
+CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
+
+extern void init386(void);
+extern void dblfault_handler(void);
+
+extern void printcpuinfo(void);	/* XXX header file */
+extern void finishidentcpu(void);
+extern void panicifcpuunsupported(void);
+extern void initializecpu(void);
+void initvalues(start_info_t *startinfo);
+
+#define	CS_SECURE(cs)		(ISPL(cs) == SEL_UPL)
+#define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
+
+#if !defined(CPU_ENABLE_SSE) && defined(I686_CPU)
+#define CPU_ENABLE_SSE
+#endif
+#if defined(CPU_DISABLE_SSE)
+#undef CPU_ENABLE_SSE
+#endif
+
+static void cpu_startup(void *);
+static void fpstate_drop(struct thread *td);
+static void get_fpcontext(struct thread *td, mcontext_t *mcp);
+static int  set_fpcontext(struct thread *td, const mcontext_t *mcp);
+#ifdef CPU_ENABLE_SSE
+static void set_fpregs_xmm(struct save87 *, struct savexmm *);
+static void fill_fpregs_xmm(struct savexmm *, struct save87 *);
+#endif /* CPU_ENABLE_SSE */
+SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
+
+#ifdef DDB
+extern vm_offset_t ksym_start, ksym_end;
+#endif
+
+int	_udatasel, _ucodesel;
+u_int	basemem;
+
+start_info_t *xen_start_info;
+unsigned long *xen_phys_machine;
+int xendebug_flags; 
+int init_first = 0;
+int cold = 1;
+
+#ifdef COMPAT_43
+static void osendsig(sig_t catcher, int sig, sigset_t *mask, u_long code);
+#endif
+#ifdef COMPAT_FREEBSD4
+static void freebsd4_sendsig(sig_t catcher, int sig, sigset_t *mask,
+    u_long code);
+#endif
+
+long Maxmem = 0;
+
+vm_paddr_t phys_avail[10];
+
+/* must be 2 less so 0 0 can signal end of chunks */
+#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
+
+struct kva_md_info kmi;
+
+static struct trapframe proc0_tf;
+#ifndef SMP
+static struct pcpu __pcpu;
+#endif
+
+static void 
+map_range(void *physptr, unsigned long physptrindex, 
+	  unsigned long physindex, int count, unsigned int flags) {
+    int i;
+    unsigned long pte, ppa;
+    for (i = 0; i < count; i++) {
+	pte = ((unsigned long)physptr) + (physptrindex << 2) + (i << 2); 
+	ppa = (PTOM(physindex + i) << PAGE_SHIFT) | flags | PG_V | PG_A;
+	xpq_queue_pt_update((pt_entry_t *)pte, ppa); 
+    }
+    mcl_flush_queue();
+}
+
+struct mem_range_softc mem_range_softc;
+
+static void
+cpu_startup(void *dummy)
+{
+	/*
+	 * Good {morning,afternoon,evening,night}.
+	 */
+    /* XXX need to write clock driver */
+	startrtclock();
+
+	printcpuinfo();
+	panicifcpuunsupported();
+#ifdef PERFMON
+	perfmon_init();
+#endif
+	printf("real memory  = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem),
+	    ptoa((uintmax_t)Maxmem) / 1048576);
+	/*
+	 * Display any holes after the first chunk of extended memory.
+	 */
+	if (bootverbose) {
+		int indx;
+
+		printf("Physical memory chunk(s):\n");
+		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
+			vm_paddr_t size;
+
+			size = phys_avail[indx + 1] - phys_avail[indx];
+			printf(
+			    "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
+			    (uintmax_t)phys_avail[indx],
+			    (uintmax_t)phys_avail[indx + 1] - 1,
+			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
+		}
+	}
+
+	vm_ksubmap_init(&kmi);
+
+	printf("avail memory = %ju (%ju MB)\n",
+	    ptoa((uintmax_t)cnt.v_free_count),
+	    ptoa((uintmax_t)cnt.v_free_count) / 1048576);
+
+	/*
+	 * Set up buffers, so they can be used to read disk labels.
+	 */
+	bufinit();
+	vm_pager_bufferinit();
+
+	cpu_setregs();
+
+}
+
+/*
+ * Send an interrupt to process.
+ *
+ * Stack is set up to allow sigcode stored
+ * at top to call routine, followed by kcall
+ * to sigreturn routine below.  After sigreturn
+ * resets the signal mask, the stack, and the
+ * frame pointer, it returns to the user
+ * specified pc, psl.
+ */
+#ifdef COMPAT_43
+static void
+osendsig(catcher, sig, mask, code)
+	sig_t catcher;
+	int sig;
+	sigset_t *mask;
+	u_long code;
+{
+	struct osigframe sf, *fp;
+	struct proc *p;
+	struct thread *td;
+	struct sigacts *psp;
+	struct trapframe *regs;
+	int oonstack;
+
+	td = curthread;
+	p = td->td_proc;
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	psp = p->p_sigacts;
+	mtx_assert(&psp->ps_mtx, MA_OWNED);
+	regs = td->td_frame;
+	oonstack = sigonstack(regs->tf_esp);
+
+	/* Allocate space for the signal handler context. */
+	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
+	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
+		fp = (struct osigframe *)(td->td_sigstk.ss_sp +
+		    td->td_sigstk.ss_size - sizeof(struct osigframe));
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+		td->td_sigstk.ss_flags |= SS_ONSTACK;
+#endif
+	} else
+		fp = (struct osigframe *)regs->tf_esp - 1;
+
+	/* Translate the signal if appropriate. */
+	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
+		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
+
+	/* Build the argument list for the signal handler. */
+	sf.sf_signum = sig;
+	sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
+	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
+		/* Signal handler installed with SA_SIGINFO. */
+		sf.sf_arg2 = (register_t)&fp->sf_siginfo;
+		sf.sf_siginfo.si_signo = sig;
+		sf.sf_siginfo.si_code = code;
+		sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
+	} else {
+		/* Old FreeBSD-style arguments. */
+		sf.sf_arg2 = code;
+		sf.sf_addr = regs->tf_err;
+		sf.sf_ahu.sf_handler = catcher;
+	}
+	mtx_unlock(&psp->ps_mtx);
+	PROC_UNLOCK(p);
+
+	/* Save most if not all of trap frame. */
+	sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
+	sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
+	sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
+	sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
+	sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
+	sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
+	sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
+	sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
+	sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
+	sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
+	sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
+	sf.sf_siginfo.si_sc.sc_gs = rgs();
+	sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
+
+	/* Build the signal context to be used by osigreturn(). */
+	sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
+	SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
+	sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
+	sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
+	sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
+	sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
+	sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
+	sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
+
+	/*
+	 * Copy the sigframe out to the user's stack.
+	 */
+	if (copyout(&sf, fp, sizeof(*fp)) != 0) {
+#ifdef DEBUG
+		printf("process %ld has trashed its stack\n", (long)p->p_pid);
+#endif
+		PROC_LOCK(p);
+		sigexit(td, SIGILL);
+	}
+
+	regs->tf_esp = (int)fp;
+	regs->tf_eip = PS_STRINGS - szosigcode;
+	regs->tf_eflags &= ~PSL_T;
+	regs->tf_cs = _ucodesel;
+	regs->tf_ds = _udatasel;
+	regs->tf_es = _udatasel;
+	regs->tf_fs = _udatasel;
+	load_gs(_udatasel);
+	regs->tf_ss = _udatasel;
+	PROC_LOCK(p);
+	mtx_lock(&psp->ps_mtx);
+}
+#endif /* COMPAT_43 */
+
+#ifdef COMPAT_FREEBSD4
+static void
+freebsd4_sendsig(catcher, sig, mask, code)
+	sig_t catcher;
+	int sig;
+	sigset_t *mask;
+	u_long code;
+{
+	struct sigframe4 sf, *sfp;
+	struct proc *p;
+	struct thread *td;
+	struct sigacts *psp;
+	struct trapframe *regs;
+	int oonstack;
+
+	td = curthread;
+	p = td->td_proc;
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	psp = p->p_sigacts;
+	mtx_assert(&psp->ps_mtx, MA_OWNED);
+	regs = td->td_frame;
+	oonstack = sigonstack(regs->tf_esp);
+
+	/* Save user context. */
+	bzero(&sf, sizeof(sf));
+	sf.sf_uc.uc_sigmask = *mask;
+	sf.sf_uc.uc_stack = td->td_sigstk;
+	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
+	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
+	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
+	sf.sf_uc.uc_mcontext.mc_gs = rgs();
+	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
+
+	/* Allocate space for the signal handler context. */
+	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
+	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
+		sfp = (struct sigframe4 *)(td->td_sigstk.ss_sp +
+		    td->td_sigstk.ss_size - sizeof(struct sigframe4));
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+		td->td_sigstk.ss_flags |= SS_ONSTACK;
+#endif
+	} else
+		sfp = (struct sigframe4 *)regs->tf_esp - 1;
+
+	/* Translate the signal if appropriate. */
+	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
+		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
+
+	/* Build the argument list for the signal handler. */
+	sf.sf_signum = sig;
+	sf.sf_ucontext = (register_t)&sfp->sf_uc;
+	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
+		/* Signal handler installed with SA_SIGINFO. */
+		sf.sf_siginfo = (register_t)&sfp->sf_si;
+		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
+
+		/* Fill in POSIX parts */
+		sf.sf_si.si_signo = sig;
+		sf.sf_si.si_code = code;
+		sf.sf_si.si_addr = (void *)regs->tf_err;
+	} else {
+		/* Old FreeBSD-style arguments. */
+		sf.sf_siginfo = code;
+		sf.sf_addr = regs->tf_err;
+		sf.sf_ahu.sf_handler = catcher;
+	}
+	mtx_unlock(&psp->ps_mtx);
+	PROC_UNLOCK(p);
+
+	/*
+	 * Copy the sigframe out to the user's stack.
+	 */
+	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
+#ifdef DEBUG
+		printf("process %ld has trashed its stack\n", (long)p->p_pid);
+#endif
+		PROC_LOCK(p);
+		sigexit(td, SIGILL);
+	}
+
+	regs->tf_esp = (int)sfp;
+	regs->tf_eip = PS_STRINGS - szfreebsd4_sigcode;
+	regs->tf_eflags &= ~PSL_T;
+	regs->tf_cs = _ucodesel;
+	regs->tf_ds = _udatasel;
+	regs->tf_es = _udatasel;
+	regs->tf_fs = _udatasel;
+	regs->tf_ss = _udatasel;
+	PROC_LOCK(p);
+	mtx_lock(&psp->ps_mtx);
+}
+#endif	/* COMPAT_FREEBSD4 */
+
+void
+sendsig(catcher, sig, mask, code)
+	sig_t catcher;
+	int sig;
+	sigset_t *mask;
+	u_long code;
+{
+	struct sigframe sf, *sfp;
+	struct proc *p;
+	struct thread *td;
+	struct sigacts *psp;
+	char *sp;
+	struct trapframe *regs;
+	int oonstack;
+
+	td = curthread;
+	p = td->td_proc;
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	psp = p->p_sigacts;
+	mtx_assert(&psp->ps_mtx, MA_OWNED);
+#ifdef COMPAT_FREEBSD4
+	if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
+		freebsd4_sendsig(catcher, sig, mask, code);
+		return;
+	}
+#endif
+#ifdef COMPAT_43
+	if (SIGISMEMBER(psp->ps_osigset, sig)) {
+		osendsig(catcher, sig, mask, code);
+		return;
+	}
+#endif
+	regs = td->td_frame;
+	oonstack = sigonstack(regs->tf_esp);
+
+	/* Save user context. */
+	bzero(&sf, sizeof(sf));
+	sf.sf_uc.uc_sigmask = *mask;
+	sf.sf_uc.uc_stack = td->td_sigstk;
+	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
+	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
+	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
+	sf.sf_uc.uc_mcontext.mc_gs = rgs();
+	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
+	sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
+	get_fpcontext(td, &sf.sf_uc.uc_mcontext);
+	fpstate_drop(td);
+
+	/* Allocate space for the signal handler context. */
+	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
+	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
+		sp = td->td_sigstk.ss_sp +
+		    td->td_sigstk.ss_size - sizeof(struct sigframe);
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+		td->td_sigstk.ss_flags |= SS_ONSTACK;
+#endif
+	} else
+		sp = (char *)regs->tf_esp - sizeof(struct sigframe);
+	/* Align to 16 bytes. */
+	sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
+
+	/* Translate the signal if appropriate. */
+	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
+		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
+
+	/* Build the argument list for the signal handler. */
+	sf.sf_signum = sig;
+	sf.sf_ucontext = (register_t)&sfp->sf_uc;
+	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
+		/* Signal handler installed with SA_SIGINFO. */
+		sf.sf_siginfo = (register_t)&sfp->sf_si;
+		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
+
+		/* Fill in POSIX parts */
+		sf.sf_si.si_signo = sig;
+		sf.sf_si.si_code = code;
+		sf.sf_si.si_addr = (void *)regs->tf_err;
+	} else {
+		/* Old FreeBSD-style arguments. */
+		sf.sf_siginfo = code;
+		sf.sf_addr = regs->tf_err;
+		sf.sf_ahu.sf_handler = catcher;
+	}
+	mtx_unlock(&psp->ps_mtx);
+	PROC_UNLOCK(p);
+	/*
+	 * Copy the sigframe out to the user's stack.
+	 */
+	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
+#ifdef DEBUG
+		printf("process %ld has trashed its stack\n", (long)p->p_pid);
+#endif
+		PROC_LOCK(p);
+		sigexit(td, SIGILL);
+	}
+
+	regs->tf_esp = (int)sfp;
+	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
+	regs->tf_eflags &= ~PSL_T;
+	regs->tf_cs = _ucodesel;
+	regs->tf_ds = _udatasel;
+	regs->tf_es = _udatasel;
+	regs->tf_fs = _udatasel;
+	regs->tf_ss = _udatasel;
+	PROC_LOCK(p);
+	mtx_lock(&psp->ps_mtx);
+}
+
+/*
+ * Build siginfo_t for SA thread
+ */
+void
+cpu_thread_siginfo(int sig, u_long code, siginfo_t *si)
+{
+	struct proc *p;
+	struct thread *td;
+
+	td = curthread;
+	p = td->td_proc;
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+
+	bzero(si, sizeof(*si));
+	si->si_signo = sig;
+	si->si_code = code;
+	si->si_addr = (void *)td->td_frame->tf_err;
+	/* XXXKSE fill other fields */
+}
+
+/*
+ * System call to cleanup state after a signal
+ * has been taken.  Reset signal mask and
+ * stack state from context left by sendsig (above).
+ * Return to previous pc and psl as specified by
+ * context left by sendsig. Check carefully to
+ * make sure that the user has not modified the
+ * state to gain improper privileges.
+ *
+ * MPSAFE
+ */
+#ifdef COMPAT_43
+int
+osigreturn(td, uap)
+	struct thread *td;
+	struct osigreturn_args /* {
+		struct osigcontext *sigcntxp;
+	} */ *uap;
+{
+	struct osigcontext sc;
+	struct trapframe *regs;
+	struct osigcontext *scp;
+	struct proc *p = td->td_proc;
+	int eflags, error;
+
+	regs = td->td_frame;
+	error = copyin(uap->sigcntxp, &sc, sizeof(sc));
+	if (error != 0)
+		return (error);
+	scp = &sc;
+	eflags = scp->sc_ps;
+		/*
+		 * Don't allow users to change privileged or reserved flags.
+		 */
+		/*
+		 * XXX do allow users to change the privileged flag PSL_RF.
+		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
+		 * should sometimes set it there too.  tf_eflags is kept in
+		 * the signal context during signal handling and there is no
+		 * other place to remember it, so the PSL_RF bit may be
+		 * corrupted by the signal handler without us knowing.
+		 * Corruption of the PSL_RF bit at worst causes one more or
+		 * one less debugger trap, so allowing it is fairly harmless.
+		 */
+		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
+	    		return (EINVAL);
+		}
+
+		/*
+		 * Don't allow users to load a valid privileged %cs.  Let the
+		 * hardware check for invalid selectors, excess privilege in
+		 * other selectors, invalid %eip's and invalid %esp's.
+		 */
+		if (!CS_SECURE(scp->sc_cs)) {
+			trapsignal(td, SIGBUS, T_PROTFLT);
+			return (EINVAL);
+		}
+		regs->tf_ds = scp->sc_ds;
+		regs->tf_es = scp->sc_es;
+		regs->tf_fs = scp->sc_fs;
+
+	/* Restore remaining registers. */
+	regs->tf_eax = scp->sc_eax;
+	regs->tf_ebx = scp->sc_ebx;
+	regs->tf_ecx = scp->sc_ecx;
+	regs->tf_edx = scp->sc_edx;
+	regs->tf_esi = scp->sc_esi;
+	regs->tf_edi = scp->sc_edi;
+	regs->tf_cs = scp->sc_cs;
+	regs->tf_ss = scp->sc_ss;
+	regs->tf_isp = scp->sc_isp;
+	regs->tf_ebp = scp->sc_fp;
+	regs->tf_esp = scp->sc_sp;
+	regs->tf_eip = scp->sc_pc;
+	regs->tf_eflags = eflags;
+
+	PROC_LOCK(p);
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+	if (scp->sc_onstack & 1)
+		td->td_sigstk.ss_flags |= SS_ONSTACK;
+	else
+		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
+#endif
+	SIGSETOLD(td->td_sigmask, scp->sc_mask);
+	SIG_CANTMASK(td->td_sigmask);
+	signotify(td);
+	PROC_UNLOCK(p);
+	return (EJUSTRETURN);
+}
+#endif /* COMPAT_43 */
+
+#ifdef COMPAT_FREEBSD4
+/*
+ * MPSAFE
+ */
+int
+freebsd4_sigreturn(td, uap)
+	struct thread *td;
+	struct freebsd4_sigreturn_args /* {
+		const ucontext4 *sigcntxp;
+	} */ *uap;
+{
+	struct ucontext4 uc;
+	struct proc *p = td->td_proc;
+	struct trapframe *regs;
+	const struct ucontext4 *ucp;
+	int cs, eflags, error;
+
+	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
+	if (error != 0)
+		return (error);
+	ucp = &uc;
+	regs = td->td_frame;
+	eflags = ucp->uc_mcontext.mc_eflags;
+		/*
+		 * Don't allow users to change privileged or reserved flags.
+		 */
+		/*
+		 * XXX do allow users to change the privileged flag PSL_RF.
+		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
+		 * should sometimes set it there too.  tf_eflags is kept in
+		 * the signal context during signal handling and there is no
+		 * other place to remember it, so the PSL_RF bit may be
+		 * corrupted by the signal handler without us knowing.
+		 * Corruption of the PSL_RF bit at worst causes one more or
+		 * one less debugger trap, so allowing it is fairly harmless.
+		 */
+		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
+			printf("freebsd4_sigreturn: eflags = 0x%x\n", eflags);
+	    		return (EINVAL);
+		}
+
+		/*
+		 * Don't allow users to load a valid privileged %cs.  Let the
+		 * hardware check for invalid selectors, excess privilege in
+		 * other selectors, invalid %eip's and invalid %esp's.
+		 */
+		cs = ucp->uc_mcontext.mc_cs;
+		if (!CS_SECURE(cs)) {
+			printf("freebsd4_sigreturn: cs = 0x%x\n", cs);
+			trapsignal(td, SIGBUS, T_PROTFLT);
+			return (EINVAL);
+		}
+
+		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
+
+	PROC_LOCK(p);
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+	if (ucp->uc_mcontext.mc_onstack & 1)
+		td->td_sigstk.ss_flags |= SS_ONSTACK;
+	else
+		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
+#endif
+
+	td->td_sigmask = ucp->uc_sigmask;
+	SIG_CANTMASK(td->td_sigmask);
+	signotify(td);
+	PROC_UNLOCK(p);
+	return (EJUSTRETURN);
+}
+#endif	/* COMPAT_FREEBSD4 */
+
+/*
+ * MPSAFE
+ */
+int
+sigreturn(td, uap)
+	struct thread *td;
+	struct sigreturn_args /* {
+		const __ucontext *sigcntxp;
+	} */ *uap;
+{
+	ucontext_t uc;
+	struct proc *p = td->td_proc;
+	struct trapframe *regs;
+	const ucontext_t *ucp;
+	int cs, eflags, error, ret;
+
+	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
+	if (error != 0)
+		return (error);
+	ucp = &uc;
+	regs = td->td_frame;
+	eflags = ucp->uc_mcontext.mc_eflags;
+		/*
+		 * Don't allow users to change privileged or reserved flags.
+		 */
+		/*
+		 * XXX do allow users to change the privileged flag PSL_RF.
+		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
+		 * should sometimes set it there too.  tf_eflags is kept in
+		 * the signal context during signal handling and there is no
+		 * other place to remember it, so the PSL_RF bit may be
+		 * corrupted by the signal handler without us knowing.
+		 * Corruption of the PSL_RF bit at worst causes one more or
+		 * one less debugger trap, so allowing it is fairly harmless.
+		 */
+#if 0
+		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
+		    __asm__("int $0x3");
+			printf("sigreturn: eflags = 0x%x\n", eflags);
+	    		return (EINVAL);
+		}
+#endif
+		/*
+		 * Don't allow users to load a valid privileged %cs.  Let the
+		 * hardware check for invalid selectors, excess privilege in
+		 * other selectors, invalid %eip's and invalid %esp's.
+		 */
+		cs = ucp->uc_mcontext.mc_cs;
+		if (!CS_SECURE(cs)) {
+		    __asm__("int $0x3");
+			printf("sigreturn: cs = 0x%x\n", cs);
+			trapsignal(td, SIGBUS, T_PROTFLT);
+			return (EINVAL);
+		}
+
+		ret = set_fpcontext(td, &ucp->uc_mcontext);
+		if (ret != 0)
+			return (ret);
+		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
+	PROC_LOCK(p);
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+	if (ucp->uc_mcontext.mc_onstack & 1)
+		td->td_sigstk.ss_flags |= SS_ONSTACK;
+	else
+		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
+#endif
+
+	td->td_sigmask = ucp->uc_sigmask;
+	SIG_CANTMASK(td->td_sigmask);
+	signotify(td);
+	PROC_UNLOCK(p);
+	return (EJUSTRETURN);
+}
+
+/*
+ * Machine dependent boot() routine
+ *
+ * I haven't seen anything to put here yet
+ * Possibly some stuff might be grafted back here from boot()
+ */
+void
+cpu_boot(int howto)
+{
+}
+
+/*
+ * Shutdown the CPU as much as possible
+ */
+void
+cpu_halt(void)
+{
+    	HYPERVISOR_shutdown();
+}
+
+/*
+ * Hook to idle the CPU when possible.  In the SMP case we default to
+ * off because a halted cpu will not currently pick up a new thread in the
+ * run queue until the next timer tick.  If turned on this will result in
+ * approximately a 4.2% loss in real time performance in buildworld tests
+ * (but improves user and sys times oddly enough), and saves approximately
+ * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3).
+ *
+ * XXX we need to have a cpu mask of idle cpus and generate an IPI or
+ * otherwise generate some sort of interrupt to wake up cpus sitting in HLT.
+ * Then we can have our cake and eat it too.
+ *
+ * XXX I'm turning it on for SMP as well by default for now.  It seems to
+ * help lock contention somewhat, and this is critical for HTT. -Peter
+ */
+static int	cpu_idle_hlt = 1;
+SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
+    &cpu_idle_hlt, 0, "Idle loop HLT enable");
+
+static void
+cpu_idle_default(void)
+{
+#if 0
+	/*
+	 * we must absolutely guarentee that hlt is the
+	 * absolute next instruction after sti or we
+	 * introduce a timing window.
+	 */
+	__asm __volatile("sti; hlt");
+#endif
+	idle_block();
+	enable_intr();
+}
+
+/*
+ * Note that we have to be careful here to avoid a race between checking
+ * sched_runnable() and actually halting.  If we don't do this, we may waste
+ * the time between calling hlt and the next interrupt even though there
+ * is a runnable process.
+ */
+void
+cpu_idle(void)
+{
+
+#ifdef SMP
+	if (mp_grab_cpu_hlt())
+		return;
+#endif
+
+	if (cpu_idle_hlt) {
+		disable_intr();
+  		if (sched_runnable())
+			enable_intr();
+		else
+			(*cpu_idle_hook)();
+	}
+}
+
+/* Other subsystems (e.g., ACPI) can hook this later. */
+void (*cpu_idle_hook)(void) = cpu_idle_default;
+
+/*
+ * Clear registers on exec
+ */
+void
+exec_setregs(td, entry, stack, ps_strings)
+	struct thread *td;
+	u_long entry;
+	u_long stack;
+	u_long ps_strings;
+{
+	struct trapframe *regs = td->td_frame;
+	struct pcb *pcb = td->td_pcb;
+
+	/* Reset pc->pcb_gs and %gs before possibly invalidating it. */
+	pcb->pcb_gs = _udatasel;
+	load_gs(_udatasel);
+
+	if (td->td_proc->p_md.md_ldt)
+		user_ldt_free(td);
+  
+	bzero((char *)regs, sizeof(struct trapframe));
+	regs->tf_eip = entry;
+	regs->tf_esp = stack;
+	regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
+	regs->tf_ss = _udatasel;
+	regs->tf_ds = _udatasel;
+	regs->tf_es = _udatasel;
+	regs->tf_fs = _udatasel;
+	regs->tf_cs = _ucodesel;
+
+	/* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
+	regs->tf_ebx = ps_strings;
+
+        /*
+         * Reset the hardware debug registers if they were in use.
+         * They won't have any meaning for the newly exec'd process.  
+         */
+        if (pcb->pcb_flags & PCB_DBREGS) {
+                pcb->pcb_dr0 = 0;
+                pcb->pcb_dr1 = 0;
+                pcb->pcb_dr2 = 0;
+                pcb->pcb_dr3 = 0;
+                pcb->pcb_dr6 = 0;
+                pcb->pcb_dr7 = 0;
+                if (pcb == PCPU_GET(curpcb)) {
+		        /*
+			 * Clear the debug registers on the running
+			 * CPU, otherwise they will end up affecting
+			 * the next process we switch to.
+			 */
+		        reset_dbregs();
+                }
+                pcb->pcb_flags &= ~PCB_DBREGS;
+        }
+
+	/*
+	 * Initialize the math emulator (if any) for the current process.
+	 * Actually, just clear the bit that says that the emulator has
+	 * been initialized.  Initialization is delayed until the process
+	 * traps to the emulator (if it is done at all) mainly because
+	 * emulators don't provide an entry point for initialization.
+	 */
+	td->td_pcb->pcb_flags &= ~FP_SOFTFP;
+
+	/* Initialize the npx (if any) for the current process. */
+	/*
+	 * XXX the above load_cr0() also initializes it and is a layering
+	 * violation if NPX is configured.  It drops the npx partially
+	 * and this would be fatal if we were interrupted now, and decided
+	 * to force the state to the pcb, and checked the invariant
+	 * (CR0_TS clear) if and only if PCPU_GET(fpcurthread) != NULL).
+	 * ALL of this can happen except the check.  The check used to
+	 * happen and be fatal later when we didn't complete the drop
+	 * before returning to user mode.  This should be fixed properly
+	 * soon.
+	 */
+	fpstate_drop(td);
+
+	/*
+	 * XXX - Linux emulator
+	 * Make sure sure edx is 0x0 on entry. Linux binaries depend
+	 * on it.
+	 */
+	td->td_retval[1] = 0;
+}
+
+void
+cpu_setregs(void)
+{
+    /* nothing for Xen to do */
+}
+
+static int
+sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
+		req);
+	if (!error && req->newptr)
+		resettodr();
+	return (error);
+}
+
+SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
+	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
+
+SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
+	CTLFLAG_RW, &disable_rtc_set, 0, "");
+
+SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, 
+	CTLFLAG_RD, &bootinfo, bootinfo, "");
+
+u_long bootdev;		/* not a dev_t - encoding is different */
+SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
+	CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)");
+
+/*
+ * Initialize 386 and configure to run kernel
+ */
+
+/*
+ * Initialize segments & interrupt table
+ */
+
+int _default_ldt;
+union descriptor *gdt;	/* global descriptor table */
+static struct gate_descriptor idt0[NIDT];
+struct gate_descriptor *idt = &idt0[0];	/* interrupt descriptor table */
+union descriptor *ldt;		/* local descriptor table */
+struct region_descriptor r_idt;	/* table descriptors */
+
+int private_tss;			/* flag indicating private tss */
+
+#if defined(I586_CPU) && !defined(NO_F00F_HACK)
+extern int has_f00f_bug;
+#endif
+
+static struct i386tss dblfault_tss;
+static char dblfault_stack[PAGE_SIZE];
+
+extern  struct user	*proc0uarea;
+extern  vm_offset_t	proc0kstack;
+
+
+/* software prototypes -- in more palatable form */
+struct soft_segment_descriptor gdt_segs[] = {
+/* GNULL_SEL	0 Null Descriptor */
+{	0x0,			/* segment base address  */
+	0x0,			/* length */
+	0,			/* segment type */
+	SEL_KPL,		/* segment descriptor priority level */
+	0,			/* segment descriptor present */
+	0, 0,
+	0,			/* default 32 vs 16 bit size */
+	0  			/* limit granularity (byte/page units)*/ },
+/* GCODE_SEL	1 Code Descriptor for kernel */
+{	0x0,			/* segment base address  */
+	0x0,			/* length - all address space */
+	0,			/* segment type */
+	0,			/* segment descriptor priority level */
+	0,			/* segment descriptor present */
+	0, 0,
+	0,			/* default 32 vs 16 bit size */
+	0  			/* limit granularity (byte/page units)*/ },
+
+/* GDATA_SEL	2 Data Descriptor for kernel */
+{	0x0,			/* segment base address  */
+	0x0,			/* length - all address space */
+	0,			/* segment type */
+	0,			/* segment descriptor priority level */
+	0,			/* segment descriptor present */
+	0, 0,
+	0,			/* default 32 vs 16 bit size */
+	0  			/* limit granularity (byte/page units)*/ },
+
+/* GPRIV_SEL	3 SMP Per-Processor Private Data Descriptor */
+{	0x0,			/* segment base address  */
+	0xfffff,		/* length - all address space */
+	SDT_MEMRWA,		/* segment type */
+	SEL_KPL,		/* segment descriptor priority level */
+	1,			/* segment descriptor present */
+	0, 0,
+	1,			/* default 32 vs 16 bit size */
+	1  			/* limit granularity (byte/page units)*/ },
+#if 0
+/* GPROC0_SEL	4 Proc 0 Tss Descriptor */
+{
+	0x0,			/* segment base address */
+	sizeof(struct i386tss)-1,/* length  */
+	SDT_SYS386TSS,		/* segment type */
+	0,			/* segment descriptor priority level */
+	1,			/* segment descriptor present */
+	0, 0,
+	0,			/* unused - default 32 vs 16 bit size */
+	0  			/* limit granularity (byte/page units)*/ },
+/* GLDT_SEL	5 LDT Descriptor */
+{	(int) ldt,		/* segment base address  */
+	sizeof(ldt)-1,		/* length - all address space */
+	SDT_SYSLDT,		/* segment type */
+	SEL_UPL,		/* segment descriptor priority level */
+	1,			/* segment descriptor present */
+	0, 0,
+	0,			/* unused - default 32 vs 16 bit size */
+	0  			/* limit granularity (byte/page units)*/ },
+/* GUSERLDT_SEL	6 User LDT Descriptor per process */
+{	(int) ldt,		/* segment base address  */
+	(512 * sizeof(union descriptor)-1),		/* length */
+	SDT_SYSLDT,		/* segment type */
+	0,			/* segment descriptor priority level */
+	1,			/* segment descriptor present */
+	0, 0,
+	0,			/* unused - default 32 vs 16 bit size */
+	0  			/* limit granularity (byte/page units)*/ },
+/* GTGATE_SEL	7 Null Descriptor - Placeholder */
+{	0x0,			/* segment base address  */
+	0x0,			/* length - all address space */
+	0,			/* segment type */
+	0,			/* segment descriptor priority level */
+	0,			/* segment descriptor present */
+	0, 0,
+	0,			/* default 32 vs 16 bit size */
+	0  			/* limit granularity (byte/page units)*/ },
+/* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
+{	0x400,			/* segment base address */
+	0xfffff,		/* length */
+	SDT_MEMRWA,		/* segment type */
+	0,			/* segment descriptor priority level */
+	1,			/* segment descriptor present */
+	0, 0,
+	1,			/* default 32 vs 16 bit size */
+	1  			/* limit granularity (byte/page units)*/ },
+/* GPANIC_SEL	9 Panic Tss Descriptor */
+{	(int) &dblfault_tss,	/* segment base address  */
+	sizeof(struct i386tss)-1,/* length - all address space */
+	SDT_SYS386TSS,		/* segment type */
+	0,			/* segment descriptor priority level */
+	1,			/* segment descriptor present */
+	0, 0,
+	0,			/* unused - default 32 vs 16 bit size */
+	0  			/* limit granularity (byte/page units)*/ },
+/* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */
+{	0,			/* segment base address (overwritten)  */
+	0xfffff,		/* length */
+	SDT_MEMERA,		/* segment type */
+	0,			/* segment descriptor priority level */
+	1,			/* segment descriptor present */
+	0, 0,
+	0,			/* default 32 vs 16 bit size */
+	1  			/* limit granularity (byte/page units)*/ },
+/* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */
+{	0,			/* segment base address (overwritten)  */
+	0xfffff,		/* length */
+	SDT_MEMERA,		/* segment type */
+	0,			/* segment descriptor priority level */
+	1,			/* segment descriptor present */
+	0, 0,
+	0,			/* default 32 vs 16 bit size */
+	1  			/* limit granularity (byte/page units)*/ },
+/* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */
+{	0,			/* segment base address (overwritten) */
+	0xfffff,		/* length */
+	SDT_MEMRWA,		/* segment type */
+	0,			/* segment descriptor priority level */
+	1,			/* segment descriptor present */
+	0, 0,
+	1,			/* default 32 vs 16 bit size */
+	1  			/* limit granularity (byte/page units)*/ },
+/* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */
+{	0,			/* segment base address (overwritten) */
+	0xfffff,		/* length */
+	SDT_MEMRWA,		/* segment type */
+	0,			/* segment descriptor priority level */
+	1,			/* segment descriptor present */
+	0, 0,
+	0,			/* default 32 vs 16 bit size */
+	1  			/* limit granularity (byte/page units)*/ },
+/* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */
+{	0,			/* segment base address (overwritten) */
+	0xfffff,		/* length */
+	SDT_MEMRWA,		/* segment type */
+	0,			/* segment descriptor priority level */
+	1,			/* segment descriptor present */
+	0, 0,
+	0,			/* default 32 vs 16 bit size */
+	1  			/* limit granularity (byte/page units)*/ },
+#endif
+};
+
+static struct soft_segment_descriptor ldt_segs[] = {
+	/* Null Descriptor - overwritten by call gate */
+{	0x0,			/* segment base address  */
+	0x0,			/* length - all address space */
+	0,			/* segment type */
+	0,			/* segment descriptor priority level */
+	0,			/* segment descriptor present */
+	0, 0,
+	0,			/* default 32 vs 16 bit size */
+	0  			/* limit granularity (byte/page units)*/ },
+	/* Null Descriptor - overwritten by call gate */
+{	0x0,			/* segment base address  */
+	0x0,			/* length - all address space */
+	0,			/* segment type */
+	0,			/* segment descriptor priority level */
+	0,			/* segment descriptor present */
+	0, 0,
+	0,			/* default 32 vs 16 bit size */
+	0  			/* limit granularity (byte/page units)*/ },
+	/* Null Descriptor - overwritten by call gate */
+{	0x0,			/* segment base address  */
+	0x0,			/* length - all address space */
+	0,			/* segment type */
+	0,			/* segment descriptor priority level */
+	0,			/* segment descriptor present */
+	0, 0,
+	0,			/* default 32 vs 16 bit size */
+	0  			/* limit granularity (byte/page units)*/ },
+	/* Code Descriptor for user */
+{	0x0,			/* segment base address  */
+	0xfffff,		/* length - all address space */
+	SDT_MEMERA,		/* segment type */
+	SEL_UPL,		/* segment descriptor priority level */
+	1,			/* segment descriptor present */
+	0, 0,
+	1,			/* default 32 vs 16 bit size */
+	1  			/* limit granularity (byte/page units)*/ },
+	/* Null Descriptor - overwritten by call gate */
+{	0x0,			/* segment base address  */
+	0x0,			/* length - all address space */
+	0,			/* segment type */
+	0,			/* segment descriptor priority level */
+	0,			/* segment descriptor present */
+	0, 0,
+	0,			/* default 32 vs 16 bit size */
+	0  			/* limit granularity (byte/page units)*/ },
+	/* Data Descriptor for user */
+{	0x0,			/* segment base address  */
+	0xfffff,		/* length - all address space */
+	SDT_MEMRWA,		/* segment type */
+	SEL_UPL,		/* segment descriptor priority level */
+	1,			/* segment descriptor present */
+	0, 0,
+	1,			/* default 32 vs 16 bit size */
+	1  			/* limit granularity (byte/page units)*/ },
+};
+
+struct proc_ldt default_proc_ldt;
+
+void
+setidt(idx, func, typ, dpl, selec)
+	int idx;
+	inthand_t *func;
+	int typ;
+	int dpl;
+	int selec;
+{
+	struct gate_descriptor *ip;
+
+	ip = idt + idx;
+	ip->gd_looffset = (int)func;
+	ip->gd_selector = selec;
+	ip->gd_stkcpy = 0;
+	ip->gd_xx = 0;
+	ip->gd_type = typ;
+	ip->gd_dpl = dpl;
+	ip->gd_p = 1;
+	ip->gd_hioffset = ((int)func)>>16 ;
+}
+
+#define	IDTVEC(name)	__CONCAT(X,name)
+
+extern inthand_t
+	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
+	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
+	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
+	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
+	IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
+
+#ifdef DDB
+/*
+ * Display the index and function name of any IDT entries that don't use
+ * the default 'rsvd' entry point.
+ */
+DB_SHOW_COMMAND(idt, db_show_idt)
+{
+	struct gate_descriptor *ip;
+	int idx, quit;
+	uintptr_t func;
+
+	ip = idt;
+	db_setup_paging(db_simple_pager, &quit, DB_LINES_PER_PAGE);
+	for (idx = 0, quit = 0; idx < NIDT; idx++) {
+		func = (ip->gd_hioffset << 16 | ip->gd_looffset);
+		if (func != (uintptr_t)&IDTVEC(rsvd)) {
+			db_printf("%3d\t", idx);
+			db_printsym(func, DB_STGY_PROC);
+			db_printf("\n");
+		}
+		ip++;
+	}
+}
+#endif
+
+void
+sdtossd(sd, ssd)
+	struct segment_descriptor *sd;
+	struct soft_segment_descriptor *ssd;
+{
+	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
+	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
+	ssd->ssd_type  = sd->sd_type;
+	ssd->ssd_dpl   = sd->sd_dpl;
+	ssd->ssd_p     = sd->sd_p;
+	ssd->ssd_def32 = sd->sd_def32;
+	ssd->ssd_gran  = sd->sd_gran;
+}
+
+#define PHYSMAP_SIZE	(2 * 8)
+
+/*
+ * Populate the (physmap) array with base/bound pairs describing the
+ * available physical memory in the system, then test this memory and
+ * build the phys_avail array describing the actually-available memory.
+ *
+ * If we cannot accurately determine the physical memory map, then use
+ * value from the 0xE801 call, and failing that, the RTC.
+ *
+ * Total memory size may be set by the kernel environment variable
+ * hw.physmem or the compile-time define MAXMEM.
+ *
+ * XXX first should be vm_paddr_t.
+ */
+static void
+getmemsize(void)
+{
+    int i;
+    printf("start_info %p\n", xen_start_info);
+    printf("start_info->nr_pages %ld\n", xen_start_info->nr_pages);
+    Maxmem = xen_start_info->nr_pages - init_first;
+    /* call pmap initialization to make new kernel address space */
+    pmap_bootstrap((init_first)<< PAGE_SHIFT, 0);
+    for (i = 0; i < 10; i++)
+	phys_avail[i] = 0;
+#ifdef MAXMEM
+    if (MAXMEM/4 < Maxmem)
+	Maxmem = MAXMEM/4;
+#endif
+    physmem = Maxmem;
+    avail_end = ptoa(Maxmem) - round_page(MSGBUF_SIZE);
+    phys_avail[0] = init_first << PAGE_SHIFT;
+    phys_avail[1] = avail_end;
+}
+
+extern pt_entry_t *KPTphys;
+extern int kernbase;
+pteinfo_t *pteinfo_list;
+unsigned long *xen_machine_phys = ((unsigned long *)VADDR(1008, 0));
+
+/* Linux infection */
+#define PAGE_OFFSET  KERNBASE
+#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
+#define PFN_UP(x)    (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+void
+initvalues(start_info_t *startinfo)
+{ 
+    int i;
+    xen_start_info = startinfo;
+    xen_phys_machine = (unsigned long *)startinfo->mfn_list;
+    unsigned long tmpindex = ((__pa(xen_start_info->pt_base) >> PAGE_SHIFT) + xen_start_info->nr_pt_frames) + 3 /* number of pages allocated after the pts + 1*/;
+    xendebug_flags = 0xffffffff;
+    /* pre-zero unused mapped pages */
+    bzero((char *)(KERNBASE + (tmpindex << PAGE_SHIFT)), (1024 - tmpindex)*PAGE_SIZE); 
+    
+    KPTphys = (pt_entry_t *)xpmap_ptom(__pa(startinfo->pt_base + PAGE_SIZE));
+    IdlePTD = (pd_entry_t *)xpmap_ptom(__pa(startinfo->pt_base));
+    XENPRINTF("IdlePTD %p\n", IdlePTD);
+    XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%lx pt_base: 0x%lx "
+	      "mod_start: 0x%lx mod_len: 0x%lx\n",
+	      xen_start_info->nr_pages, xen_start_info->shared_info, 
+	      xen_start_info->flags, xen_start_info->pt_base, 
+	      xen_start_info->mod_start, xen_start_info->mod_len);
+    
+    /* Map proc0's UPAGES */
+    proc0uarea = (struct user *)(KERNBASE + (tmpindex << PAGE_SHIFT));
+    tmpindex += UAREA_PAGES;
+
+    /* Map proc0's KSTACK */
+    proc0kstack = KERNBASE + (tmpindex << PAGE_SHIFT);
+    tmpindex += KSTACK_PAGES;    
+    
+    /* allocate page for gdt */
+    gdt = (union descriptor *)(KERNBASE + (tmpindex << PAGE_SHIFT));
+    tmpindex++; 
+
+    /* allocate page for ldt */
+    ldt = (union descriptor *)(KERNBASE + (tmpindex << PAGE_SHIFT));
+    tmpindex++; 
+
+#ifdef PMAP_DEBUG    
+    pteinfo_list = (pteinfo_t *)(KERNBASE + (tmpindex << PAGE_SHIFT));
+    tmpindex +=  ((xen_start_info->nr_pages >> 10) + 1)*(1 + XPQ_CALL_DEPTH*XPQ_CALL_COUNT);
+    
+    if (tmpindex > 980)
+	    __asm__("int3");
+#endif
+    /* unmap remaining pages from initial 4MB chunk */
+    for (i = tmpindex; i%1024 != 0; i++) 
+	PT_CLEAR(KERNBASE + (i << PAGE_SHIFT), TRUE);
+
+    /* allocate remainder of NKPT pages */
+    map_range(IdlePTD, KPTDI + 1, tmpindex, NKPT-1, PG_U | PG_M | PG_RW);
+    tmpindex += NKPT-1;
+    map_range(IdlePTD, PTDPTDI, __pa(xen_start_info->pt_base) >> PAGE_SHIFT, 1, 0);
+
+    xpq_queue_pt_update(KPTphys + tmpindex, xen_start_info->shared_info | PG_A | PG_V | PG_RW);
+    HYPERVISOR_shared_info = (shared_info_t *)(KERNBASE + (tmpindex << PAGE_SHIFT));
+    tmpindex++;
+
+    mcl_flush_queue();
+    HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = (unsigned long)xen_phys_machine;
+    HYPERVISOR_shared_info->arch.mfn_to_pfn_start = (unsigned long)xen_machine_phys;
+    
+    init_first = tmpindex;
+    
+}
+
+void
+init386(void)
+{
+	int gsel_tss, metadata_missing, off, x, error;
+	struct pcpu *pc;
+	trap_info_t trap_table[] = {
+	    { 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
+	    { 1,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
+	    { 3,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
+	    { 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
+	    /* This is UPL on Linux and KPL on BSD */
+	    { 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
+	    { 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
+	    { 7,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
+	    /*
+	     * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
+	     *   no handler for double fault
+	     */
+	    { 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
+	    {10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
+	    {11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
+	    {12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
+	    {13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
+	    {14,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
+	    {15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
+	    {16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
+	    {17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
+	    {18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
+	    {19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
+	    {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
+	    {  0, 0,           0, 0 }
+        };
+	proc0.p_uarea = proc0uarea;
+	thread0.td_kstack = proc0kstack;
+	thread0.td_pcb = (struct pcb *)
+	   (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
+	
+	/*
+ 	 * This may be done better later if it gets more high level
+ 	 * components in it. If so just link td->td_proc here.
+	 */
+	proc_linkup(&proc0, &ksegrp0, &thread0);
+
+	metadata_missing = 0;
+	if (xen_start_info->mod_start) 
+	    preload_metadata = (caddr_t)xen_start_info->mod_start;
+	 else 
+	     metadata_missing = 1;
+
+	/* XXX - temporary hack */
+	preload_metadata = (caddr_t)0;
+	/* XXX */
+	
+	if (envmode == 1)
+		kern_envp = static_env;
+	else if ((caddr_t)xen_start_info->cmd_line)
+		kern_envp = xen_setbootenv((caddr_t)xen_start_info->cmd_line);
+
+	boothowto |= xen_boothowto(kern_envp);
+
+        if (boothowto & RB_GDB_PAUSE)
+            __asm__("int $0x3;");
+
+	/* Init basic tunables, hz etc */
+	init_param1();
+	/*
+	 * make gdt memory segments, the code segment goes up to end of the
+	 * page with etext in it, the data segment goes to the end of
+	 * the address space
+	 */
+#if 0
+ 	/*
+	 * XEN occupies the upper 64MB of virtual address space 
+	 * At its base it manages an array mapping machine page frames 
+	 * to physical page frames - hence we need to be able to 
+	 * access 4GB - (64MB  - 4MB + 64k) 
+	 */
+	gdt_segs[GCODE_SEL].ssd_limit = atop(0 - ((1 << 26) - (1 << 22) + (1 << 16))); 
+	gdt_segs[GDATA_SEL].ssd_limit = atop(0 - ((1 << 26) - (1 << 22) + (1 << 16))); 
+#endif
+#ifdef SMP
+	pc = &SMP_prvspace[0].pcpu;
+	gdt_segs[GPRIV_SEL].ssd_limit =
+		atop(sizeof(struct privatespace) - 1);
+#else
+	pc = &__pcpu;
+	gdt_segs[GPRIV_SEL].ssd_limit =
+		atop(sizeof(struct pcpu) - 1);
+#endif
+	gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
+	gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
+	for (x = 0; x < NGDT; x++)
+	    ssdtosd(&gdt_segs[x], &gdt[x].sd);
+	/* re-map GDT read-only */
+	{
+	    unsigned long gdtindex = (((unsigned long)gdt - KERNBASE) >> PAGE_SHIFT);
+	    unsigned long gdtphys = PTOM(gdtindex);
+	    map_range(KPTphys, gdtindex, gdtindex, 1, 0);
+	    mcl_flush_queue();
+	    if (HYPERVISOR_set_gdt(&gdtphys, LAST_RESERVED_GDT_ENTRY + 1)) {
+		panic("set_gdt failed\n");
+	    }
+	    lgdt_finish();
+	}
+
+	if ((error = HYPERVISOR_set_trap_table(trap_table)) != 0) {
+		panic("set_trap_table failed - error %d\n", error);
+	}
+	if ((error = HYPERVISOR_set_fast_trap(0x80)) != 0) {
+	        panic("set_fast_trap failed - error %d\n", error);
+	}
+        HYPERVISOR_set_callbacks(GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback,
+				 GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback);
+
+
+
+	pcpu_init(pc, 0, sizeof(struct pcpu));
+	PCPU_SET(prvspace, pc);
+	PCPU_SET(curthread, &thread0);
+	PCPU_SET(curpcb, thread0.td_pcb);
+	PCPU_SET(trap_nesting, 0);
+	PCPU_SET(pdir, (unsigned long)IdlePTD);
+	/*
+	 * Initialize mutexes.
+	 *
+	 */
+	mutex_init();
+
+	/* make ldt memory segments */
+	/*
+	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
+	 * should be spelled ...MAX_USER...
+	 */
+	ldt_segs[LUCODE_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
+	ldt_segs[LUDATA_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
+	for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
+		ssdtosd(&ldt_segs[x], &ldt[x].sd);
+	default_proc_ldt.ldt_base = (caddr_t)ldt;
+	default_proc_ldt.ldt_len = 6;
+	_default_ldt = (int)&default_proc_ldt;
+	PCPU_SET(currentldt, _default_ldt);
+	{
+	    unsigned long ldtindex = (((unsigned long)ldt - KERNBASE) >> PAGE_SHIFT);
+	    map_range(KPTphys, ldtindex, ldtindex, 1, 0);
+	    mcl_flush_queue();
+	    xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof ldt_segs[0]));
+	}
+ 
+	/*
+	 * Initialize the console before we print anything out.
+	 */
+	cninit();
+	if (metadata_missing)
+		printf("WARNING: loader(8) metadata is missing!\n");
+
+#ifdef DDB
+	ksym_start = bootinfo.bi_symtab;
+	ksym_end = bootinfo.bi_esymtab;
+#endif
+	kdb_init();
+#ifdef KDB
+	if (boothowto & RB_KDB)
+		kdb_enter("Boot flags requested debugger");
+#endif
+
+	finishidentcpu();	/* Final stage of CPU initialization */
+	setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
+	    GSEL(GCODE_SEL, SEL_KPL));
+	setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
+	    GSEL(GCODE_SEL, SEL_KPL));
+	initializecpu();	/* Initialize CPU registers */
+
+	/* make an initial tss so cpu can get interrupt stack on syscall! */
+	/* Note: -16 is so we can grow the trapframe if we came from vm86 */
+	PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
+	    KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16);
+	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
+	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
+	private_tss = 0;
+	PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
+	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
+	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
+	HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), PCPU_GET(common_tss.tss_esp0));
+
+	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
+	    dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
+	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
+	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
+
+	dblfault_tss.tss_cr3 = (int)IdlePTD;
+	dblfault_tss.tss_eip = (int)dblfault_handler;
+	dblfault_tss.tss_eflags = PSL_KERNEL;
+	dblfault_tss.tss_ds = dblfault_tss.tss_es =
+	    dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
+	dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
+	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
+	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
+
+	getmemsize();
+	init_param2(physmem);
+	/* now running on new page tables, configured,and u/iom is accessible */
+	/* Map the message buffer. */
+	for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
+		pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
+	PT_UPDATES_FLUSH();
+
+	/* safe to enable xen page queue locking */
+    	xpq_init();
+
+	msgbufinit(msgbufp, MSGBUF_SIZE);
+	/* XXX KMM I don't think we need call gates */
+#if 0
+	printf("modify ldt\n");
+	/* make a call gate to reenter kernel with */
+	gdp = &ldt[LSYS5CALLS_SEL].gd;
+
+	x = (int) &IDTVEC(lcall_syscall);
+	gdp->gd_looffset = x;
+	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
+	gdp->gd_stkcpy = 1;
+	gdp->gd_type = SDT_SYS386CGT;
+	gdp->gd_dpl = SEL_UPL;
+	gdp->gd_p = 1;
+	gdp->gd_hioffset = x >> 16;
+
+	/* XXX does this work? */
+	ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
+	ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
+#endif
+	/* transfer to user mode */
+
+	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
+	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
+
+	/* setup proc 0's pcb */
+	thread0.td_pcb->pcb_flags = 0; /* XXXKSE */
+	thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
+	thread0.td_pcb->pcb_ext = 0;
+	thread0.td_frame = &proc0_tf;
+}
+
+void
+cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
+{
+
+	pcpu->pc_acpi_id = 0xffffffff;
+}
+
+/*
+ * Construct a PCB from a trapframe. This is called from kdb_trap() where
+ * we want to start a backtrace from the function that caused us to enter
+ * the debugger. We have the context in the trapframe, but base the trace
+ * on the PCB. The PCB doesn't have to be perfect, as long as it contains
+ * enough for a backtrace.
+ */
+void
+makectx(struct trapframe *tf, struct pcb *pcb)
+{
+
+	pcb->pcb_edi = tf->tf_edi;
+	pcb->pcb_esi = tf->tf_esi;
+	pcb->pcb_ebp = tf->tf_ebp;
+	pcb->pcb_ebx = tf->tf_ebx;
+	pcb->pcb_eip = tf->tf_eip;
+	pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8;
+}
+
+int
+ptrace_set_pc(struct thread *td, u_long addr)
+{
+
+	td->td_frame->tf_eip = addr;
+	return (0);
+}
+
+int
+ptrace_single_step(struct thread *td)
+{
+	td->td_frame->tf_eflags |= PSL_T;
+	return (0);
+}
+
+int
+ptrace_clear_single_step(struct thread *td)
+{
+	td->td_frame->tf_eflags &= ~PSL_T;
+	return (0);
+}
+
+int
+fill_regs(struct thread *td, struct reg *regs)
+{
+	struct pcb *pcb;
+	struct trapframe *tp;
+
+	tp = td->td_frame;
+	regs->r_fs = tp->tf_fs;
+	regs->r_es = tp->tf_es;
+	regs->r_ds = tp->tf_ds;
+	regs->r_edi = tp->tf_edi;
+	regs->r_esi = tp->tf_esi;
+	regs->r_ebp = tp->tf_ebp;
+	regs->r_ebx = tp->tf_ebx;
+	regs->r_edx = tp->tf_edx;
+	regs->r_ecx = tp->tf_ecx;
+	regs->r_eax = tp->tf_eax;
+	regs->r_eip = tp->tf_eip;
+	regs->r_cs = tp->tf_cs;
+	regs->r_eflags = tp->tf_eflags;
+	regs->r_esp = tp->tf_esp;
+	regs->r_ss = tp->tf_ss;
+	pcb = td->td_pcb;
+	regs->r_gs = pcb->pcb_gs;
+	return (0);
+}
+
+int
+set_regs(struct thread *td, struct reg *regs)
+{
+	struct pcb *pcb;
+	struct trapframe *tp;
+
+	tp = td->td_frame;
+	if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
+	    !CS_SECURE(regs->r_cs))
+		return (EINVAL);
+	tp->tf_fs = regs->r_fs;
+	tp->tf_es = regs->r_es;
+	tp->tf_ds = regs->r_ds;
+	tp->tf_edi = regs->r_edi;
+	tp->tf_esi = regs->r_esi;
+	tp->tf_ebp = regs->r_ebp;
+	tp->tf_ebx = regs->r_ebx;
+	tp->tf_edx = regs->r_edx;
+	tp->tf_ecx = regs->r_ecx;
+	tp->tf_eax = regs->r_eax;
+	tp->tf_eip = regs->r_eip;
+	tp->tf_cs = regs->r_cs;
+	tp->tf_eflags = regs->r_eflags;
+	tp->tf_esp = regs->r_esp;
+	tp->tf_ss = regs->r_ss;
+	pcb = td->td_pcb;
+	pcb->pcb_gs = regs->r_gs;
+	return (0);
+}
+
+#ifdef CPU_ENABLE_SSE
+static void
+fill_fpregs_xmm(sv_xmm, sv_87)
+	struct savexmm *sv_xmm;
+	struct save87 *sv_87;
+{
+	register struct env87 *penv_87 = &sv_87->sv_env;
+	register struct envxmm *penv_xmm = &sv_xmm->sv_env;
+	int i;
+
+	bzero(sv_87, sizeof(*sv_87));
+
+	/* FPU control/status */
+	penv_87->en_cw = penv_xmm->en_cw;
+	penv_87->en_sw = penv_xmm->en_sw;
+	penv_87->en_tw = penv_xmm->en_tw;
+	penv_87->en_fip = penv_xmm->en_fip;
+	penv_87->en_fcs = penv_xmm->en_fcs;
+	penv_87->en_opcode = penv_xmm->en_opcode;
+	penv_87->en_foo = penv_xmm->en_foo;
+	penv_87->en_fos = penv_xmm->en_fos;
+
+	/* FPU registers */
+	for (i = 0; i < 8; ++i)
+		sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
+}
+
+static void
+set_fpregs_xmm(sv_87, sv_xmm)
+	struct save87 *sv_87;
+	struct savexmm *sv_xmm;
+{
+	register struct env87 *penv_87 = &sv_87->sv_env;
+	register struct envxmm *penv_xmm = &sv_xmm->sv_env;
+	int i;
+
+	/* FPU control/status */
+	penv_xmm->en_cw = penv_87->en_cw;
+	penv_xmm->en_sw = penv_87->en_sw;
+	penv_xmm->en_tw = penv_87->en_tw;
+	penv_xmm->en_fip = penv_87->en_fip;
+	penv_xmm->en_fcs = penv_87->en_fcs;
+	penv_xmm->en_opcode = penv_87->en_opcode;
+	penv_xmm->en_foo = penv_87->en_foo;
+	penv_xmm->en_fos = penv_87->en_fos;
+
+	/* FPU registers */
+	for (i = 0; i < 8; ++i)
+		sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
+}
+#endif /* CPU_ENABLE_SSE */
+
+int
+fill_fpregs(struct thread *td, struct fpreg *fpregs)
+{
+#ifdef CPU_ENABLE_SSE
+	if (cpu_fxsr) {
+		fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm,
+						(struct save87 *)fpregs);
+		return (0);
+	}
+#endif /* CPU_ENABLE_SSE */
+	bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs);
+	return (0);
+}
+
+int
+set_fpregs(struct thread *td, struct fpreg *fpregs)
+{
+#ifdef CPU_ENABLE_SSE
+	if (cpu_fxsr) {
+		set_fpregs_xmm((struct save87 *)fpregs,
+					   &td->td_pcb->pcb_save.sv_xmm);
+		return (0);
+	}
+#endif /* CPU_ENABLE_SSE */
+	bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs);
+	return (0);
+}
+
+/*
+ * Get machine context.
+ */
+int
+get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
+{
+	struct trapframe *tp;
+
+	tp = td->td_frame;
+
+	PROC_LOCK(curthread->td_proc);
+	mcp->mc_onstack = sigonstack(tp->tf_esp);
+	PROC_UNLOCK(curthread->td_proc);
+	mcp->mc_gs = td->td_pcb->pcb_gs;
+	mcp->mc_fs = tp->tf_fs;
+	mcp->mc_es = tp->tf_es;
+	mcp->mc_ds = tp->tf_ds;
+	mcp->mc_edi = tp->tf_edi;
+	mcp->mc_esi = tp->tf_esi;
+	mcp->mc_ebp = tp->tf_ebp;
+	mcp->mc_isp = tp->tf_isp;
+	if (flags & GET_MC_CLEAR_RET) {
+		mcp->mc_eax = 0;
+		mcp->mc_edx = 0;
+	} else {
+		mcp->mc_eax = tp->tf_eax;
+		mcp->mc_edx = tp->tf_edx;
+	}
+	mcp->mc_ebx = tp->tf_ebx;
+	mcp->mc_ecx = tp->tf_ecx;
+	mcp->mc_eip = tp->tf_eip;
+	mcp->mc_cs = tp->tf_cs;
+	mcp->mc_eflags = tp->tf_eflags;
+	mcp->mc_esp = tp->tf_esp;
+	mcp->mc_ss = tp->tf_ss;
+	mcp->mc_len = sizeof(*mcp);
+	get_fpcontext(td, mcp);
+	return (0);
+}
+
+/*
+ * Set machine context.
+ *
+ * However, we don't set any but the user modifiable flags, and we won't
+ * touch the cs selector.
+ */
+int
+set_mcontext(struct thread *td, const mcontext_t *mcp)
+{
+	struct trapframe *tp;
+	int eflags, ret;
+
+	tp = td->td_frame;
+	if (mcp->mc_len != sizeof(*mcp))
+		return (EINVAL);
+	eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
+	    (tp->tf_eflags & ~PSL_USERCHANGE);
+	if ((ret = set_fpcontext(td, mcp)) == 0) {
+		tp->tf_fs = mcp->mc_fs;
+		tp->tf_es = mcp->mc_es;
+		tp->tf_ds = mcp->mc_ds;
+		tp->tf_edi = mcp->mc_edi;
+		tp->tf_esi = mcp->mc_esi;
+		tp->tf_ebp = mcp->mc_ebp;
+		tp->tf_ebx = mcp->mc_ebx;
+		tp->tf_edx = mcp->mc_edx;
+		tp->tf_ecx = mcp->mc_ecx;
+		tp->tf_eax = mcp->mc_eax;
+		tp->tf_eip = mcp->mc_eip;
+		tp->tf_eflags = eflags;
+		tp->tf_esp = mcp->mc_esp;
+		tp->tf_ss = mcp->mc_ss;
+		td->td_pcb->pcb_gs = mcp->mc_gs;
+		ret = 0;
+	}
+	return (ret);
+}
+
+static void
+get_fpcontext(struct thread *td, mcontext_t *mcp)
+{
+#ifndef DEV_NPX
+	mcp->mc_fpformat = _MC_FPFMT_NODEV;
+	mcp->mc_ownedfp = _MC_FPOWNED_NONE;
+#else
+	union savefpu *addr;
+
+	/*
+	 * XXX mc_fpstate might be misaligned, since its declaration is not
+	 * unportabilized using __attribute__((aligned(16))) like the
+	 * declaration of struct savemm, and anyway, alignment doesn't work
+	 * for auto variables since we don't use gcc's pessimal stack
+	 * alignment.  Work around this by abusing the spare fields after
+	 * mcp->mc_fpstate.
+	 *
+	 * XXX unpessimize most cases by only aligning when fxsave might be
+	 * called, although this requires knowing too much about
+	 * npxgetregs()'s internals.
+	 */
+	addr = (union savefpu *)&mcp->mc_fpstate;
+	if (td == PCPU_GET(fpcurthread) &&
+#ifdef CPU_ENABLE_SSE
+	    cpu_fxsr &&
+#endif
+	    ((uintptr_t)(void *)addr & 0xF)) {
+		do
+			addr = (void *)((char *)addr + 4);
+		while ((uintptr_t)(void *)addr & 0xF);
+	}
+	mcp->mc_ownedfp = npxgetregs(td, addr);
+	if (addr != (union savefpu *)&mcp->mc_fpstate) {
+		bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate));
+		bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2));
+	}
+	mcp->mc_fpformat = npxformat();
+#endif
+}
+
+static int
+set_fpcontext(struct thread *td, const mcontext_t *mcp)
+{
+	union savefpu *addr;
+
+	if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
+		return (0);
+	else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
+	    mcp->mc_fpformat != _MC_FPFMT_XMM)
+		return (EINVAL);
+	else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE)
+		/* We don't care what state is left in the FPU or PCB. */
+		fpstate_drop(td);
+	else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
+	    mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
+		/* XXX align as above. */
+		addr = (union savefpu *)&mcp->mc_fpstate;
+		if (td == PCPU_GET(fpcurthread) &&
+#ifdef CPU_ENABLE_SSE
+		    cpu_fxsr &&
+#endif
+		    ((uintptr_t)(void *)addr & 0xF)) {
+			do
+				addr = (void *)((char *)addr + 4);
+			while ((uintptr_t)(void *)addr & 0xF);
+			bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate));
+		}
+#ifdef DEV_NPX
+		/*
+		 * XXX we violate the dubious requirement that npxsetregs()
+		 * be called with interrupts disabled.
+		 */
+		npxsetregs(td, addr);
+#endif
+		/*
+		 * Don't bother putting things back where they were in the
+		 * misaligned case, since we know that the caller won't use
+		 * them again.
+		 */
+	} else
+		return (EINVAL);
+	return (0);
+}
+
+static void
+fpstate_drop(struct thread *td)
+{
+	register_t s;
+
+	s = intr_disable();
+#ifdef DEV_NPX
+	if (PCPU_GET(fpcurthread) == td)
+		npxdrop();
+#endif
+	/*
+	 * XXX force a full drop of the npx.  The above only drops it if we
+	 * owned it.  npxgetregs() has the same bug in the !cpu_fxsr case.
+	 *
+	 * XXX I don't much like npxgetregs()'s semantics of doing a full
+	 * drop.  Dropping only to the pcb matches fnsave's behaviour.
+	 * We only need to drop to !PCB_INITDONE in sendsig().  But
+	 * sendsig() is the only caller of npxgetregs()... perhaps we just
+	 * have too many layers.
+	 */
+	curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
+	intr_restore(s);
+}
+
+int
+fill_dbregs(struct thread *td, struct dbreg *dbregs)
+{
+	struct pcb *pcb;
+
+	if (td == NULL) {
+		dbregs->dr[0] = rdr0();
+		dbregs->dr[1] = rdr1();
+		dbregs->dr[2] = rdr2();
+		dbregs->dr[3] = rdr3();
+		dbregs->dr[4] = rdr4();
+		dbregs->dr[5] = rdr5();
+		dbregs->dr[6] = rdr6();
+		dbregs->dr[7] = rdr7();
+	} else {
+		pcb = td->td_pcb;
+		dbregs->dr[0] = pcb->pcb_dr0;
+		dbregs->dr[1] = pcb->pcb_dr1;
+		dbregs->dr[2] = pcb->pcb_dr2;
+		dbregs->dr[3] = pcb->pcb_dr3;
+		dbregs->dr[4] = 0;
+		dbregs->dr[5] = 0;
+		dbregs->dr[6] = pcb->pcb_dr6;
+		dbregs->dr[7] = pcb->pcb_dr7;
+	}
+	return (0);
+}
+
+int
+set_dbregs(struct thread *td, struct dbreg *dbregs)
+{
+	struct pcb *pcb;
+	int i;
+	u_int32_t mask1, mask2;
+
+	if (td == NULL) {
+		load_dr0(dbregs->dr[0]);
+		load_dr1(dbregs->dr[1]);
+		load_dr2(dbregs->dr[2]);
+		load_dr3(dbregs->dr[3]);
+		load_dr4(dbregs->dr[4]);
+		load_dr5(dbregs->dr[5]);
+		load_dr6(dbregs->dr[6]);
+		load_dr7(dbregs->dr[7]);
+	} else {
+		/*
+		 * Don't let an illegal value for dr7 get set.	Specifically,
+		 * check for undefined settings.  Setting these bit patterns
+		 * result in undefined behaviour and can lead to an unexpected
+		 * TRCTRAP.
+		 */
+		for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; 
+		     i++, mask1 <<= 2, mask2 <<= 2)
+			if ((dbregs->dr[7] & mask1) == mask2)
+				return (EINVAL);
+		
+		pcb = td->td_pcb;
+		
+		/*
+		 * Don't let a process set a breakpoint that is not within the
+		 * process's address space.  If a process could do this, it
+		 * could halt the system by setting a breakpoint in the kernel
+		 * (if ddb was enabled).  Thus, we need to check to make sure
+		 * that no breakpoints are being enabled for addresses outside
+		 * process's address space, unless, perhaps, we were called by
+		 * uid 0.
+		 *
+		 * XXX - what about when the watched area of the user's
+		 * address space is written into from within the kernel
+		 * ... wouldn't that still cause a breakpoint to be generated
+		 * from within kernel mode?
+		 */
+
+		if (suser(td) != 0) {
+			if (dbregs->dr[7] & 0x3) {
+				/* dr0 is enabled */
+				if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
+					return (EINVAL);
+			}
+			
+			if (dbregs->dr[7] & (0x3<<2)) {
+				/* dr1 is enabled */
+				if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
+					return (EINVAL);
+			}
+			
+			if (dbregs->dr[7] & (0x3<<4)) {
+				/* dr2 is enabled */
+				if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
+					return (EINVAL);
+			}
+			
+			if (dbregs->dr[7] & (0x3<<6)) {
+				/* dr3 is enabled */
+				if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
+					return (EINVAL);
+			}
+		}
+
+		pcb->pcb_dr0 = dbregs->dr[0];
+		pcb->pcb_dr1 = dbregs->dr[1];
+		pcb->pcb_dr2 = dbregs->dr[2];
+		pcb->pcb_dr3 = dbregs->dr[3];
+		pcb->pcb_dr6 = dbregs->dr[6];
+		pcb->pcb_dr7 = dbregs->dr[7];
+
+		pcb->pcb_flags |= PCB_DBREGS;
+	}
+
+	return (0);
+}
+
+/*
+ * Return > 0 if a hardware breakpoint has been hit, and the
+ * breakpoint was in user space.  Return 0, otherwise.
+ */
+int
+user_dbreg_trap(void)
+{
+        u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
+        u_int32_t bp;       /* breakpoint bits extracted from dr6 */
+        int nbp;            /* number of breakpoints that triggered */
+        caddr_t addr[4];    /* breakpoint addresses */
+        int i;
+        
+        dr7 = rdr7();
+        if ((dr7 & 0x000000ff) == 0) {
+                /*
+                 * all GE and LE bits in the dr7 register are zero,
+                 * thus the trap couldn't have been caused by the
+                 * hardware debug registers
+                 */
+                return 0;
+        }
+
+        nbp = 0;
+        dr6 = rdr6();
+        bp = dr6 & 0x0000000f;
+
+        if (!bp) {
+                /*
+                 * None of the breakpoint bits are set meaning this
+                 * trap was not caused by any of the debug registers
+                 */
+                return 0;
+        }
+
+        /*
+         * at least one of the breakpoints were hit, check to see
+         * which ones and if any of them are user space addresses
+         */
+
+        if (bp & 0x01) {
+                addr[nbp++] = (caddr_t)rdr0();
+        }
+        if (bp & 0x02) {
+                addr[nbp++] = (caddr_t)rdr1();
+        }
+        if (bp & 0x04) {
+                addr[nbp++] = (caddr_t)rdr2();
+        }
+        if (bp & 0x08) {
+                addr[nbp++] = (caddr_t)rdr3();
+        }
+
+        for (i=0; i<nbp; i++) {
+                if (addr[i] <
+                    (caddr_t)VM_MAXUSER_ADDRESS) {
+                        /*
+                         * addr[i] is in user space
+                         */
+                        return nbp;
+                }
+        }
+
+        /*
+         * None of the breakpoints are in user space.
+         */
+        return 0;
+}
+
+#ifndef DEV_APIC
+#include <machine/apicvar.h>
+
+/*
+ * Provide stub functions so that the MADT APIC enumerator in the acpi
+ * kernel module will link against a kernel without 'device apic'.
+ *
+ * XXX - This is a gross hack.
+ */
+void
+apic_register_enumerator(struct apic_enumerator *enumerator)
+{
+}
+
+void *
+ioapic_create(uintptr_t addr, int32_t id, int intbase)
+{
+	return (NULL);
+}
+
+int
+ioapic_disable_pin(void *cookie, u_int pin)
+{
+	return (ENXIO);
+}
+
+int
+ioapic_get_vector(void *cookie, u_int pin)
+{
+	return (-1);
+}
+
+void
+ioapic_register(void *cookie)
+{
+}
+
+int
+ioapic_remap_vector(void *cookie, u_int pin, int vector)
+{
+	return (ENXIO);
+}
+
+int
+ioapic_set_extint(void *cookie, u_int pin)
+{
+	return (ENXIO);
+}
+
+int
+ioapic_set_nmi(void *cookie, u_int pin)
+{
+	return (ENXIO);
+}
+
+int
+ioapic_set_polarity(void *cookie, u_int pin,enum intr_polarity pol )
+{
+	return (ENXIO);
+}
+
+int
+ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger )
+{
+	return (ENXIO);
+}
+
+void
+lapic_create(u_int apic_id, int boot_cpu)
+{
+}
+
+void
+lapic_init(uintptr_t addr)
+{
+}
+
+int
+lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode)
+{
+	return (ENXIO);
+}
+
+int
+lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol)
+{
+	return (ENXIO);
+}
+
+int
+lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger)
+{
+	return (ENXIO);
+}
+#endif
+
+#ifdef KDB
+
+/*
+ * Provide inb() and outb() as functions.  They are normally only
+ * available as macros calling inlined functions, thus cannot be
+ * called from the debugger.
+ *
+ * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
+ */
+
+#undef inb
+#undef outb
+
+/* silence compiler warnings */
+u_char inb(u_int);
+void outb(u_int, u_char);
+
+u_char
+inb(u_int port)
+{
+	u_char	data;
+	/*
+	 * We use %%dx and not %1 here because i/o is done at %dx and not at
+	 * %edx, while gcc generates inferior code (movw instead of movl)
+	 * if we tell it to load (u_short) port.
+	 */
+	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
+	return (data);
+}
+
+void
+outb(u_int port, u_char data)
+{
+	u_char	al;
+	/*
+	 * Use an unnecessary assignment to help gcc's register allocator.
+	 * This make a large difference for gcc-1.40 and a tiny difference
+	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
+	 * best results.  gcc-2.6.0 can't handle this.
+	 */
+	al = data;
+	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
+}
+
+#endif /* KDB */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_clock.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_clock.c
new file mode 100644
index 0000000000..af07002ebb
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_clock.c
@@ -0,0 +1,150 @@
+/*-
+ * ----------------------------------------------------------------------------
+ * "THE BEER-WARE LICENSE" (Revision 42):
+ * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
+ * can do whatever you want with this stuff. If we meet some day, and you think
+ * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
+ * ----------------------------------------------------------------------------
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/mp_clock.c,v 1.19 2004/05/30 20:34:57 phk Exp $");
+
+/*-
+ * Just when we thought life were beautiful, reality pops its grim face over
+ * the edge again:
+ *
+ * ] 20. ACPI Timer Errata
+ * ]
+ * ]   Problem: The power management timer may return improper result when
+ * ]   read. Although the timer value settles properly after incrementing,
+ * ]   while incrementing there is a 3nS window every 69.8nS where the
+ * ]   timer value is indeterminate (a 4.2% chance that the data will be
+ * ]   incorrect when read). As a result, the ACPI free running count up
+ * ]   timer specification is violated due to erroneous reads.  Implication:
+ * ]   System hangs due to the "inaccuracy" of the timer when used by
+ * ]   software for time critical events and delays.
+ * ] 
+ * ] Workaround: Read the register twice and compare.
+ * ] Status: This will not be fixed in the PIIX4 or PIIX4E.
+ *
+ * The counter is in other words not latched to the PCI bus clock when
+ * read.  Notice the workaround isn't:  We need to read until we have
+ * three monotonic samples and then use the middle one, otherwise we are
+ * not protected against the fact that the bits can be wrong in two
+ * directions.  If we only cared about monosity two reads would be enough.
+ */
+
+/* #include "opt_bus.h" */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/timetc.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/sysctl.h>
+#include <sys/bus.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+static unsigned piix_get_timecount(struct timecounter *tc);
+
+static u_int32_t piix_timecounter_address;
+static u_int piix_freq = 14318182/4;
+
+static struct timecounter piix_timecounter = {
+	piix_get_timecount,	/* get_timecount */
+	0,			/* no poll_pps */
+	0xffffff,		/* counter_mask */
+	0,			/* frequency */
+	"PIIX"			/* name */
+};
+
+
+static int
+sysctl_machdep_piix_freq(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	u_int freq;
+
+	if (piix_timecounter.tc_frequency == 0)
+		return (EOPNOTSUPP);
+	freq = piix_freq;
+	error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
+	if (error == 0 && req->newptr != NULL) {
+		piix_freq = freq;
+		piix_timecounter.tc_frequency = piix_freq;
+	}
+	return (error);
+}
+
+SYSCTL_PROC(_machdep, OID_AUTO, piix_freq, CTLTYPE_INT | CTLFLAG_RW,
+    0, sizeof(u_int), sysctl_machdep_piix_freq, "I", "");
+
+static unsigned
+piix_get_timecount(struct timecounter *tc)
+{
+	unsigned u1, u2, u3;
+
+	u2 = inl(piix_timecounter_address);
+	u3 = inl(piix_timecounter_address);
+	do {
+		u1 = u2;
+		u2 = u3;
+		u3 = inl(piix_timecounter_address);
+	} while (u1 > u2 || u2 > u3);
+	return (u2);
+}
+
+static int
+piix_probe(device_t dev)
+{
+	u_int32_t d;
+
+	if (devclass_get_device(devclass_find("acpi"), 0) != NULL)
+		return (ENXIO);
+	switch (pci_get_devid(dev)) {
+	case 0x71138086:
+		device_set_desc(dev, "PIIX Timecounter");
+		break;
+	default:
+		return (ENXIO);
+	}
+
+	d = pci_read_config(dev, PCIR_COMMAND, 2);
+	if (!(d & PCIM_CMD_PORTEN)) {
+		device_printf(dev, "PIIX I/O space not mapped\n");
+		return (ENXIO);
+	}
+	return (0);
+}
+
+static int
+piix_attach(device_t dev)
+{
+	u_int32_t d;
+
+	d = pci_read_config(dev, 0x40, 4);
+	piix_timecounter_address = (d & 0xffc0) + 8;
+	piix_timecounter.tc_frequency = piix_freq;
+	tc_init(&piix_timecounter);
+	return (0);
+}
+
+static device_method_t piix_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		piix_probe),
+	DEVMETHOD(device_attach,	piix_attach),
+	{ 0, 0 }
+};
+
+static driver_t piix_driver = {
+	"piix",
+	piix_methods,
+	1,
+};
+
+static devclass_t piix_devclass;
+
+DRIVER_MODULE(piix, pci, piix_driver, piix_devclass, 0, 0);
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c
new file mode 100644
index 0000000000..b975c9e491
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c
@@ -0,0 +1,1315 @@
+/*-
+ * Copyright (c) 1996, by Steve Passe
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. The name of the developer may NOT be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.235.2.3 2004/09/24 15:02:33 rik Exp $");
+
+#include "opt_apic.h"
+#include "opt_cpu.h"
+#include "opt_kstack_pages.h"
+#include "opt_mp_watchdog.h"
+
+#if !defined(lint)
+#if !defined(SMP)
+#error How did you get here?
+#endif
+
+#if defined(I386_CPU) && !defined(COMPILING_LINT)
+#error SMP not supported with I386_CPU
+#endif
+#if 0
+#ifndef DEV_APIC
+#error The apic device is required for SMP, add "device apic" to your config file.
+#endif
+#endif
+#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
+#error SMP not supported with CPU_DISABLE_CMPXCHG
+#endif
+#endif /* not lint */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/cons.h>	/* cngetc() */
+#ifdef GPROF 
+#include <sys/gmon.h>
+#endif
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/memrange.h>
+#include <sys/mutex.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_extern.h>
+
+#include <machine/apicreg.h>
+#include <machine/clock.h>
+#include <machine/md_var.h>
+#include <machine/mp_watchdog.h>
+#include <machine/pcb.h>
+#include <machine/smp.h>
+#include <machine/smptests.h>	/** COUNT_XINVLTLB_HITS */
+#include <machine/specialreg.h>
+#include <machine/privatespace.h>
+
+#include <machine/xenfunc.h>
+
+#define WARMBOOT_TARGET		0
+#define WARMBOOT_OFF		(KERNBASE + 0x0467)
+#define WARMBOOT_SEG		(KERNBASE + 0x0469)
+
+#define CMOS_REG		(0x70)
+#define CMOS_DATA		(0x71)
+#define BIOS_RESET		(0x0f)
+#define BIOS_WARM		(0x0a)
+
+/*
+ * this code MUST be enabled here and in mpboot.s.
+ * it follows the very early stages of AP boot by placing values in CMOS ram.
+ * it NORMALLY will never be needed and thus the primitive method for enabling.
+ *
+#define CHECK_POINTS
+ */
+
+#if defined(CHECK_POINTS) && !defined(PC98)
+#define CHECK_READ(A)	 (outb(CMOS_REG, (A)), inb(CMOS_DATA))
+#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
+
+#define CHECK_INIT(D);				\
+	CHECK_WRITE(0x34, (D));			\
+	CHECK_WRITE(0x35, (D));			\
+	CHECK_WRITE(0x36, (D));			\
+	CHECK_WRITE(0x37, (D));			\
+	CHECK_WRITE(0x38, (D));			\
+	CHECK_WRITE(0x39, (D));
+
+#define CHECK_PRINT(S);				\
+	printf("%s: %d, %d, %d, %d, %d, %d\n",	\
+	   (S),					\
+	   CHECK_READ(0x34),			\
+	   CHECK_READ(0x35),			\
+	   CHECK_READ(0x36),			\
+	   CHECK_READ(0x37),			\
+	   CHECK_READ(0x38),			\
+	   CHECK_READ(0x39));
+
+#else				/* CHECK_POINTS */
+
+#define CHECK_INIT(D)
+#define CHECK_PRINT(S)
+#define CHECK_WRITE(A, D)
+
+#endif				/* CHECK_POINTS */
+
+/*
+ * Values to send to the POST hardware.
+ */
+#define MP_BOOTADDRESS_POST	0x10
+#define MP_PROBE_POST		0x11
+#define MPTABLE_PASS1_POST	0x12
+
+#define MP_START_POST		0x13
+#define MP_ENABLE_POST		0x14
+#define MPTABLE_PASS2_POST	0x15
+
+#define START_ALL_APS_POST	0x16
+#define INSTALL_AP_TRAMP_POST	0x17
+#define START_AP_POST		0x18
+
+#define MP_ANNOUNCE_POST	0x19
+
+/* lock region used by kernel profiling */
+int	mcount_lock;
+
+/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
+int	current_postcode;
+
+int	mp_naps;		/* # of Applications processors */
+int	boot_cpu_id = -1;	/* designated BSP */
+extern	int nkpt;
+
+/*
+ * CPU topology map datastructures for HTT.
+ */
+static struct cpu_group mp_groups[MAXCPU];
+static struct cpu_top mp_top;
+
+/* AP uses this during bootstrap.  Do not staticize.  */
+char *bootSTK;
+static int bootAP;
+
+/* Hotwire a 0->4MB V==P mapping */
+extern pt_entry_t *KPTphys;
+
+/* SMP page table page */
+extern pt_entry_t *SMPpt;
+
+struct pcb stoppcbs[MAXCPU];
+
+/* Variables needed for SMP tlb shootdown. */
+vm_offset_t smp_tlb_addr1;
+vm_offset_t smp_tlb_addr2;
+volatile int smp_tlb_wait;
+
+/*
+ * Local data and functions.
+ */
+
+static u_int logical_cpus;
+
+/* used to hold the AP's until we are ready to release them */
+static struct mtx ap_boot_mtx;
+
+/* Set to 1 once we're ready to let the APs out of the pen. */
+static volatile int aps_ready = 0;
+
+/*
+ * Store data from cpu_add() until later in the boot when we actually setup
+ * the APs.
+ */
+struct cpu_info {
+	int	cpu_present:1;
+	int	cpu_bsp:1;
+} static cpu_info[MAXCPU];
+static int cpu_apic_ids[MAXCPU];
+
+static u_int boot_address;
+
+static void	set_logical_apic_ids(void);
+static int	start_all_aps(void);
+static void	install_ap_tramp(void);
+static int	start_ap(int apic_id);
+static void	release_aps(void *dummy);
+
+static int	hlt_logical_cpus;
+static struct	sysctl_ctx_list logical_cpu_clist;
+
+static void
+mem_range_AP_init(void)
+{
+	if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
+		mem_range_softc.mr_op->initAP(&mem_range_softc);
+}
+
+void
+mp_topology(void)
+{
+	struct cpu_group *group;
+	int logical_cpus;
+	int apic_id;
+	int groups;
+	int cpu;
+
+	/* Build the smp_topology map. */
+	/* Nothing to do if there is no HTT support. */
+	if ((cpu_feature & CPUID_HTT) == 0)
+		return;
+	logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
+	if (logical_cpus <= 1)
+		return;
+	group = &mp_groups[0];
+	groups = 1;
+	for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
+		if (!cpu_info[apic_id].cpu_present)
+			continue;
+		/*
+		 * If the current group has members and we're not a logical
+		 * cpu, create a new group.
+		 */
+		if (group->cg_count != 0 && (apic_id % logical_cpus) == 0) {
+			group++;
+			groups++;
+		}
+		group->cg_count++;
+		group->cg_mask |= 1 << cpu;
+		cpu++;
+	}
+
+	mp_top.ct_count = groups;
+	mp_top.ct_group = mp_groups;
+	smp_topology = &mp_top;
+}
+
+
+/*
+ * Calculate usable address in base memory for AP trampoline code.
+ */
+u_int
+mp_bootaddress(u_int basemem)
+{
+	POSTCODE(MP_BOOTADDRESS_POST);
+
+	boot_address = trunc_page(basemem);	/* round down to 4k boundary */
+	if ((basemem - boot_address) < bootMP_size)
+		boot_address -= PAGE_SIZE;	/* not enough, lower by 4k */
+
+	return boot_address;
+}
+
+void
+cpu_add(u_int apic_id, char boot_cpu)
+{
+
+	if (apic_id >= MAXCPU) {
+		printf("SMP: CPU %d exceeds maximum CPU %d, ignoring\n",
+		    apic_id, MAXCPU - 1);
+		return;
+	}
+	KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
+	    apic_id));
+	cpu_info[apic_id].cpu_present = 1;
+	if (boot_cpu) {
+		KASSERT(boot_cpu_id == -1,
+		    ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
+		    boot_cpu_id));
+		boot_cpu_id = apic_id;
+		cpu_info[apic_id].cpu_bsp = 1;
+	}
+	mp_ncpus++;
+	if (bootverbose)
+		printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
+		    "AP");
+	
+}
+
+void
+cpu_mp_setmaxid(void)
+{
+
+	mp_maxid = MAXCPU - 1;
+}
+
+int
+cpu_mp_probe(void)
+{
+
+	/*
+	 * Always record BSP in CPU map so that the mbuf init code works
+	 * correctly.
+	 */
+	all_cpus = 1;
+	if (mp_ncpus == 0) {
+		/*
+		 * No CPUs were found, so this must be a UP system.  Setup
+		 * the variables to represent a system with a single CPU
+		 * with an id of 0.
+		 */
+		mp_ncpus = 1;
+		return (0);
+	}
+
+	/* At least one CPU was found. */
+	if (mp_ncpus == 1) {
+		/*
+		 * One CPU was found, so this must be a UP system with
+		 * an I/O APIC.
+		 */
+		return (0);
+	}
+
+	/* At least two CPUs were found. */
+	return (1);
+}
+
+/*
+ * Initialize the IPI handlers and start up the AP's.
+ */
+void
+cpu_mp_start(void)
+{
+	int i;
+
+	POSTCODE(MP_START_POST);
+
+	/* Initialize the logical ID to APIC ID table. */
+	for (i = 0; i < MAXCPU; i++)
+		cpu_apic_ids[i] = -1;
+
+	/* Install an inter-CPU IPI for TLB invalidation */
+	setidt(IPI_INVLTLB, IDTVEC(invltlb),
+	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+	setidt(IPI_INVLPG, IDTVEC(invlpg),
+	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+	setidt(IPI_INVLRNG, IDTVEC(invlrng),
+	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
+	/* Install an inter-CPU IPI for forwarding hardclock() */
+	setidt(IPI_HARDCLOCK, IDTVEC(hardclock),
+	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+	
+	/* Install an inter-CPU IPI for forwarding statclock() */
+	setidt(IPI_STATCLOCK, IDTVEC(statclock),
+	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+	
+	/* Install an inter-CPU IPI for lazy pmap release */
+	setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
+	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
+	/* Install an inter-CPU IPI for all-CPU rendezvous */
+	setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
+	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
+	/* Install an inter-CPU IPI for forcing an additional software trap */
+	setidt(IPI_AST, IDTVEC(cpuast),
+	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
+	/* Install an inter-CPU IPI for CPU stop/restart */
+	setidt(IPI_STOP, IDTVEC(cpustop),
+	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
+
+	/* Set boot_cpu_id if needed. */
+	if (boot_cpu_id == -1) {
+		boot_cpu_id = PCPU_GET(apic_id);
+		cpu_info[boot_cpu_id].cpu_bsp = 1;
+	} else
+		KASSERT(boot_cpu_id == PCPU_GET(apic_id),
+		    ("BSP's APIC ID doesn't match boot_cpu_id"));
+	cpu_apic_ids[0] = boot_cpu_id;
+
+	/* Start each Application Processor */
+	start_all_aps();
+
+	/* Setup the initial logical CPUs info. */
+	logical_cpus = logical_cpus_mask = 0;
+	if (cpu_feature & CPUID_HTT)
+		logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
+
+	set_logical_apic_ids();
+}
+
+
+/*
+ * Print various information about the SMP system hardware and setup.
+ */
+void
+cpu_mp_announce(void)
+{
+	int i, x;
+
+	POSTCODE(MP_ANNOUNCE_POST);
+
+	/* List CPUs */
+	printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
+	for (i = 1, x = 0; x < MAXCPU; x++) {
+		if (cpu_info[x].cpu_present && !cpu_info[x].cpu_bsp) {
+			KASSERT(i < mp_ncpus,
+			    ("mp_ncpus and actual cpus are out of whack"));
+			printf(" cpu%d (AP): APIC ID: %2d\n", i++, x);
+		}
+	}
+}
+
+/*
+ * AP CPU's call this to initialize themselves.
+ */
+void
+init_secondary(void)
+{
+	int	gsel_tss;
+	int	x, myid;
+#if 0
+	u_int	cr0;
+#endif
+	/* bootAP is set in start_ap() to our ID. */
+	myid = bootAP;
+	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
+	gdt_segs[GPROC0_SEL].ssd_base =
+		(int) &SMP_prvspace[myid].pcpu.pc_common_tss;
+	SMP_prvspace[myid].pcpu.pc_prvspace =
+		&SMP_prvspace[myid].pcpu;
+
+	for (x = 0; x < NGDT; x++) {
+		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
+	}
+
+#if 0
+	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
+	r_gdt.rd_base = (int) &gdt[myid * NGDT];
+	lgdt(&r_gdt);			/* does magic intra-segment return */
+
+	lidt(&r_idt);
+	lldt(_default_ldt);
+#endif
+	PCPU_SET(currentldt, _default_ldt);
+
+	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
+	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
+	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
+	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
+	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
+	PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
+	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
+#if 0
+	ltr(gsel_tss);
+
+	/*
+	 * Set to a known state:
+	 * Set by mpboot.s: CR0_PG, CR0_PE
+	 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
+	 */
+	cr0 = rcr0();
+	cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
+	load_cr0(cr0);
+#endif
+	CHECK_WRITE(0x38, 5);
+	
+	/* Disable local APIC just to be sure. */
+	lapic_disable();
+
+	/* signal our startup to the BSP. */
+	mp_naps++;
+	CHECK_WRITE(0x39, 6);
+
+	/* Spin until the BSP releases the AP's. */
+	while (!aps_ready)
+		ia32_pause();
+
+	/* BSP may have changed PTD while we were waiting */
+	invltlb();
+	pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
+
+#if defined(I586_CPU) && !defined(NO_F00F_HACK)
+	lidt(&r_idt);
+#endif
+
+	/* set up CPU registers and state */
+	cpu_setregs();
+
+	/* set up FPU state on the AP */
+	npxinit(__INITIAL_NPXCW__);
+
+	/* set up SSE registers */
+	enable_sse();
+
+	/* A quick check from sanity claus */
+	if (PCPU_GET(apic_id) != lapic_id()) {
+		printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
+		printf("SMP: actual apic_id = %d\n", lapic_id());
+		printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
+		printf("PTD[MPPTDI] = %#jx\n", (uintmax_t)PTD[MPPTDI]);
+		panic("cpuid mismatch! boom!!");
+	}
+
+	mtx_lock_spin(&ap_boot_mtx);
+
+	/* Init local apic for irq's */
+	lapic_setup();
+
+	/* Set memory range attributes for this CPU to match the BSP */
+	mem_range_AP_init();
+
+	smp_cpus++;
+
+	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
+	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
+
+	/* Determine if we are a logical CPU. */
+	if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
+		logical_cpus_mask |= PCPU_GET(cpumask);
+	
+	/* Build our map of 'other' CPUs. */
+	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+
+	if (bootverbose)
+		lapic_dump("AP");
+
+	if (smp_cpus == mp_ncpus) {
+		/* enable IPI's, tlb shootdown, freezes etc */
+		atomic_store_rel_int(&smp_started, 1);
+		smp_active = 1;	 /* historic */
+	}
+
+	mtx_unlock_spin(&ap_boot_mtx);
+
+	/* wait until all the AP's are up */
+	while (smp_started == 0)
+		ia32_pause();
+
+	/* ok, now grab sched_lock and enter the scheduler */
+	mtx_lock_spin(&sched_lock);
+
+	binuptime(PCPU_PTR(switchtime));
+	PCPU_SET(switchticks, ticks);
+
+	cpu_throw(NULL, choosethread());	/* doesn't return */
+
+	panic("scheduler returned us to %s", __func__);
+	/* NOTREACHED */
+}
+
+/*******************************************************************
+ * local functions and data
+ */
+
+/*
+ * Set the APIC logical IDs.
+ *
+ * We want to cluster logical CPU's within the same APIC ID cluster.
+ * Since logical CPU's are aligned simply filling in the clusters in
+ * APIC ID order works fine.  Note that this does not try to balance
+ * the number of CPU's in each cluster. (XXX?)
+ */
+static void
+set_logical_apic_ids(void)
+{
+	u_int apic_id, cluster, cluster_id;
+
+	/* Force us to allocate cluster 0 at the start. */
+	cluster = -1;
+	cluster_id = APIC_MAX_INTRACLUSTER_ID;
+	for (apic_id = 0; apic_id < MAXCPU; apic_id++) {
+		if (!cpu_info[apic_id].cpu_present)
+			continue;
+		if (cluster_id == APIC_MAX_INTRACLUSTER_ID) {
+			cluster = ioapic_next_logical_cluster();
+			cluster_id = 0;
+		} else
+			cluster_id++;
+		if (bootverbose)
+			printf("APIC ID: physical %u, logical %u:%u\n",
+			    apic_id, cluster, cluster_id);
+		lapic_set_logical_id(apic_id, cluster, cluster_id);
+	}
+}
+
+/*
+ * start each AP in our list
+ */
+static int
+start_all_aps(void)
+{
+#ifndef PC98
+	u_char mpbiosreason;
+#endif
+	u_long mpbioswarmvec;
+	struct pcpu *pc;
+	char *stack;
+	uintptr_t kptbase;
+	int i, pg, apic_id, cpu;
+
+	POSTCODE(START_ALL_APS_POST);
+
+	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
+
+	/* install the AP 1st level boot code */
+	install_ap_tramp();
+
+	/* save the current value of the warm-start vector */
+	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
+#ifndef PC98
+	outb(CMOS_REG, BIOS_RESET);
+	mpbiosreason = inb(CMOS_DATA);
+#endif
+
+	/* set up temporary P==V mapping for AP boot */
+	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
+	kptbase = (uintptr_t)(void *)KPTphys;
+	for (i = 0; i < NKPT; i++)
+		PTD[i] = (pd_entry_t)(PG_V | PG_RW |
+		    ((kptbase + i * PAGE_SIZE) & PG_FRAME));
+	invltlb();
+
+	/* start each AP */
+	for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
+		if (!cpu_info[apic_id].cpu_present ||
+		    cpu_info[apic_id].cpu_bsp)
+			continue;
+		cpu++;
+
+		/* save APIC ID for this logical ID */
+		cpu_apic_ids[cpu] = apic_id;
+
+		/* first page of AP's private space */
+		pg = cpu * i386_btop(sizeof(struct privatespace));
+
+		/* allocate a new private data page */
+		pc = (struct pcpu *)kmem_alloc(kernel_map, PAGE_SIZE);
+
+		/* wire it into the private page table page */
+		SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(pc));
+
+		/* allocate and set up an idle stack data page */
+		stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); /* XXXKSE */
+		for (i = 0; i < KSTACK_PAGES; i++)
+			SMPpt[pg + 1 + i] = (pt_entry_t)
+			    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
+
+		/* prime data page for it to use */
+		pcpu_init(pc, cpu, sizeof(struct pcpu));
+		pc->pc_apic_id = apic_id;
+
+		/* setup a vector to our boot code */
+		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
+		*((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
+#ifndef PC98
+		outb(CMOS_REG, BIOS_RESET);
+		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
+#endif
+
+		bootSTK = &SMP_prvspace[cpu].idlekstack[KSTACK_PAGES *
+		    PAGE_SIZE];
+		bootAP = cpu;
+
+		/* attempt to start the Application Processor */
+		CHECK_INIT(99);	/* setup checkpoints */
+		if (!start_ap(apic_id)) {
+			printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
+			CHECK_PRINT("trace");	/* show checkpoints */
+			/* better panic as the AP may be running loose */
+			printf("panic y/n? [y] ");
+			if (cngetc() != 'n')
+				panic("bye-bye");
+		}
+		CHECK_PRINT("trace");		/* show checkpoints */
+
+		all_cpus |= (1 << cpu);		/* record AP in CPU map */
+	}
+
+	/* build our map of 'other' CPUs */
+	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+
+	/* restore the warmstart vector */
+	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
+#ifndef PC98
+	outb(CMOS_REG, BIOS_RESET);
+	outb(CMOS_DATA, mpbiosreason);
+#endif
+
+	/*
+	 * Set up the idle context for the BSP.  Similar to above except
+	 * that some was done by locore, some by pmap.c and some is implicit
+	 * because the BSP is cpu#0 and the page is initially zero and also
+	 * because we can refer to variables by name on the BSP..
+	 */
+
+	/* Allocate and setup BSP idle stack */
+	stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
+	for (i = 0; i < KSTACK_PAGES; i++)
+		SMPpt[1 + i] = (pt_entry_t)
+		    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
+
+	for (i = 0; i < NKPT; i++)
+		PTD[i] = 0;
+	pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
+
+	/* number of APs actually started */
+	return mp_naps;
+}
+
+/*
+ * load the 1st level AP boot code into base memory.
+ */
+
+/* targets for relocation */
+extern void bigJump(void);
+extern void bootCodeSeg(void);
+extern void bootDataSeg(void);
+extern void MPentry(void);
+extern u_int MP_GDT;
+extern u_int mp_gdtbase;
+
+static void
+install_ap_tramp(void)
+{
+	int     x;
+	int     size = *(int *) ((u_long) & bootMP_size);
+	vm_offset_t va = boot_address + KERNBASE;
+	u_char *src = (u_char *) ((u_long) bootMP);
+	u_char *dst = (u_char *) va;
+	u_int   boot_base = (u_int) bootMP;
+	u_int8_t *dst8;
+	u_int16_t *dst16;
+	u_int32_t *dst32;
+
+	POSTCODE(INSTALL_AP_TRAMP_POST);
+
+	KASSERT (size <= PAGE_SIZE,
+	    ("'size' do not fit into PAGE_SIZE, as expected."));
+	pmap_kenter(va, boot_address);
+	pmap_invalidate_page (kernel_pmap, va);
+	for (x = 0; x < size; ++x)
+		*dst++ = *src++;
+
+	/*
+	 * modify addresses in code we just moved to basemem. unfortunately we
+	 * need fairly detailed info about mpboot.s for this to work.  changes
+	 * to mpboot.s might require changes here.
+	 */
+
+	/* boot code is located in KERNEL space */
+	dst = (u_char *) va;
+
+	/* modify the lgdt arg */
+	dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
+	*dst32 = boot_address + ((u_int) & MP_GDT - boot_base);
+
+	/* modify the ljmp target for MPentry() */
+	dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
+	*dst32 = ((u_int) MPentry - KERNBASE);
+
+	/* modify the target for boot code segment */
+	dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
+	dst8 = (u_int8_t *) (dst16 + 1);
+	*dst16 = (u_int) boot_address & 0xffff;
+	*dst8 = ((u_int) boot_address >> 16) & 0xff;
+
+	/* modify the target for boot data segment */
+	dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
+	dst8 = (u_int8_t *) (dst16 + 1);
+	*dst16 = (u_int) boot_address & 0xffff;
+	*dst8 = ((u_int) boot_address >> 16) & 0xff;
+}
+
+/*
+ * This function starts the AP (application processor) identified
+ * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
+ * to accomplish this.  This is necessary because of the nuances
+ * of the different hardware we might encounter.  It isn't pretty,
+ * but it seems to work.
+ */
+static int
+start_ap(int apic_id)
+{
+	int vector, ms;
+	int cpus;
+
+	POSTCODE(START_AP_POST);
+
+	/* calculate the vector */
+	vector = (boot_address >> 12) & 0xff;
+
+	/* used as a watchpoint to signal AP startup */
+	cpus = mp_naps;
+
+	/*
+	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
+	 * and running the target CPU. OR this INIT IPI might be latched (P5
+	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
+	 * ignored.
+	 */
+
+	/* do an INIT IPI: assert RESET */
+	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
+
+	/* wait for pending status end */
+	lapic_ipi_wait(-1);
+
+	/* do an INIT IPI: deassert RESET */
+	lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
+	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
+
+	/* wait for pending status end */
+	DELAY(10000);		/* wait ~10mS */
+	lapic_ipi_wait(-1);
+
+	/*
+	 * next we do a STARTUP IPI: the previous INIT IPI might still be
+	 * latched, (P5 bug) this 1st STARTUP would then terminate
+	 * immediately, and the previously started INIT IPI would continue. OR
+	 * the previous INIT IPI has already run. and this STARTUP IPI will
+	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
+	 * will run.
+	 */
+
+	/* do a STARTUP IPI */
+	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+	    vector, apic_id);
+	lapic_ipi_wait(-1);
+	DELAY(200);		/* wait ~200uS */
+
+	/*
+	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
+	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
+	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
+	 * recognized after hardware RESET or INIT IPI.
+	 */
+
+	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+	    vector, apic_id);
+	lapic_ipi_wait(-1);
+	DELAY(200);		/* wait ~200uS */
+
+	/* Wait up to 5 seconds for it to start. */
+	for (ms = 0; ms < 5000; ms++) {
+		if (mp_naps > cpus)
+			return 1;	/* return SUCCESS */
+		DELAY(1000);
+	}
+	return 0;		/* return FAILURE */
+}
+
+#ifdef COUNT_XINVLTLB_HITS
+u_int xhits_gbl[MAXCPU];
+u_int xhits_pg[MAXCPU];
+u_int xhits_rng[MAXCPU];
+SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
+    sizeof(xhits_gbl), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
+    sizeof(xhits_pg), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
+    sizeof(xhits_rng), "IU", "");
+
+u_int ipi_global;
+u_int ipi_page;
+u_int ipi_range;
+u_int ipi_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
+    0, "");
+
+u_int ipi_masked_global;
+u_int ipi_masked_page;
+u_int ipi_masked_range;
+u_int ipi_masked_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
+    &ipi_masked_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
+    &ipi_masked_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
+    &ipi_masked_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
+    &ipi_masked_range_size, 0, "");
+#endif /* COUNT_XINVLTLB_HITS */
+
+/*
+ * Flush the TLB on all other CPU's
+ */
+static void
+smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+{
+	u_int ncpu;
+
+	ncpu = mp_ncpus - 1;	/* does not shootdown self */
+	if (ncpu < 1)
+		return;		/* no other cpus */
+	mtx_assert(&smp_rv_mtx, MA_OWNED);
+	smp_tlb_addr1 = addr1;
+	smp_tlb_addr2 = addr2;
+	atomic_store_rel_int(&smp_tlb_wait, 0);
+	ipi_all_but_self(vector);
+	while (smp_tlb_wait < ncpu)
+		ia32_pause();
+}
+
+/*
+ * This is about as magic as it gets.  fortune(1) has got similar code
+ * for reversing bits in a word.  Who thinks up this stuff??
+ *
+ * Yes, it does appear to be consistently faster than:
+ * while (i = ffs(m)) {
+ *	m >>= i;
+ *	bits++;
+ * }
+ * and
+ * while (lsb = (m & -m)) {	// This is magic too
+ * 	m &= ~lsb;		// or: m ^= lsb
+ *	bits++;
+ * }
+ * Both of these latter forms do some very strange things on gcc-3.1 with
+ * -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
+ * There is probably an SSE or MMX popcnt instruction.
+ *
+ * I wonder if this should be in libkern?
+ *
+ * XXX Stop the presses!  Another one:
+ * static __inline u_int32_t
+ * popcnt1(u_int32_t v)
+ * {
+ *	v -= ((v >> 1) & 0x55555555);
+ *	v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
+ *	v = (v + (v >> 4)) & 0x0F0F0F0F;
+ *	return (v * 0x01010101) >> 24;
+ * }
+ * The downside is that it has a multiply.  With a pentium3 with
+ * -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
+ * an imull, and in that case it is faster.  In most other cases
+ * it appears slightly slower.
+ *
+ * Another variant (also from fortune):
+ * #define BITCOUNT(x) (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255)
+ * #define  BX_(x)     ((x) - (((x)>>1)&0x77777777)            \
+ *                          - (((x)>>2)&0x33333333)            \
+ *                          - (((x)>>3)&0x11111111))
+ */
+static __inline u_int32_t
+popcnt(u_int32_t m)
+{
+
+	m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1);
+	m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2);
+	m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4);
+	m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8);
+	m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16);
+	return m;
+}
+
+static void
+smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+{
+	int ncpu, othercpus;
+
+	othercpus = mp_ncpus - 1;
+	if (mask == (u_int)-1) {
+		ncpu = othercpus;
+		if (ncpu < 1)
+			return;
+	} else {
+		mask &= ~PCPU_GET(cpumask);
+		if (mask == 0)
+			return;
+		ncpu = popcnt(mask);
+		if (ncpu > othercpus) {
+			/* XXX this should be a panic offence */
+			printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
+			    ncpu, othercpus);
+			ncpu = othercpus;
+		}
+		/* XXX should be a panic, implied by mask == 0 above */
+		if (ncpu < 1)
+			return;
+	}
+	mtx_assert(&smp_rv_mtx, MA_OWNED);
+	smp_tlb_addr1 = addr1;
+	smp_tlb_addr2 = addr2;
+	atomic_store_rel_int(&smp_tlb_wait, 0);
+	if (mask == (u_int)-1)
+		ipi_all_but_self(vector);
+	else
+		ipi_selected(mask, vector);
+	while (smp_tlb_wait < ncpu)
+		ia32_pause();
+}
+
+void
+smp_invltlb(void)
+{
+	if (smp_started) {
+		smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+		ipi_global++;
+#endif
+	}
+}
+
+void
+smp_invlpg(vm_offset_t addr)
+{
+	if (smp_started) {
+		smp_tlb_shootdown(IPI_INVLPG, addr, 0);
+#ifdef COUNT_XINVLTLB_HITS
+		ipi_page++;
+#endif
+	}
+}
+
+void
+smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
+{
+	if (smp_started) {
+		smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
+#ifdef COUNT_XINVLTLB_HITS
+		ipi_range++;
+		ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+	}
+}
+
+void
+smp_masked_invltlb(u_int mask)
+{
+	if (smp_started) {
+		smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+		ipi_masked_global++;
+#endif
+	}
+}
+
+void
+smp_masked_invlpg(u_int mask, vm_offset_t addr)
+{
+	if (smp_started) {
+		smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
+#ifdef COUNT_XINVLTLB_HITS
+		ipi_masked_page++;
+#endif
+	}
+}
+
+void
+smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
+{
+	if (smp_started) {
+		smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
+#ifdef COUNT_XINVLTLB_HITS
+		ipi_masked_range++;
+		ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+	}
+}
+
+
+/*
+ * For statclock, we send an IPI to all CPU's to have them call this
+ * function.
+ */
+void
+forwarded_statclock(struct clockframe frame)
+{
+	struct thread *td;
+
+	CTR0(KTR_SMP, "forwarded_statclock");
+	td = curthread;
+	td->td_intr_nesting_level++;
+	if (profprocs != 0)
+		profclock(&frame);
+	if (pscnt == psdiv)
+		statclock(&frame);
+	td->td_intr_nesting_level--;
+}
+
+void
+forward_statclock(void)
+{
+	int map;
+
+	CTR0(KTR_SMP, "forward_statclock");
+
+	if (!smp_started || cold || panicstr)
+		return;
+
+	map = PCPU_GET(other_cpus) & ~(stopped_cpus|hlt_cpus_mask);
+	if (map != 0)
+		ipi_selected(map, IPI_STATCLOCK);
+}
+
+/*
+ * For each hardclock(), we send an IPI to all other CPU's to have them
+ * execute this function.  It would be nice to reduce contention on
+ * sched_lock if we could simply peek at the CPU to determine the user/kernel
+ * state and call hardclock_process() on the CPU receiving the clock interrupt
+ * and then just use a simple IPI to handle any ast's if needed.
+ */
+void
+forwarded_hardclock(struct clockframe frame)
+{
+	struct thread *td;
+
+	CTR0(KTR_SMP, "forwarded_hardclock");
+	td = curthread;
+	td->td_intr_nesting_level++;
+	hardclock_process(&frame);
+	td->td_intr_nesting_level--;
+}
+
+void 
+forward_hardclock(void)
+{
+	u_int map;
+
+	CTR0(KTR_SMP, "forward_hardclock");
+
+	if (!smp_started || cold || panicstr)
+		return;
+
+	map = PCPU_GET(other_cpus) & ~(stopped_cpus|hlt_cpus_mask);
+	if (map != 0)
+		ipi_selected(map, IPI_HARDCLOCK);
+}
+
+/*
+ * send an IPI to a set of cpus.
+ */
+void
+ipi_selected(u_int32_t cpus, u_int ipi)
+{
+	int cpu;
+
+	CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
+	while ((cpu = ffs(cpus)) != 0) {
+		cpu--;
+		KASSERT(cpu_apic_ids[cpu] != -1,
+		    ("IPI to non-existent CPU %d", cpu));
+		lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
+		cpus &= ~(1 << cpu);
+	}
+}
+
+/*
+ * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
+ */
+void
+ipi_all(u_int ipi)
+{
+
+	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
+	lapic_ipi_vectored(ipi, APIC_IPI_DEST_ALL);
+}
+
+/*
+ * send an IPI to all CPUs EXCEPT myself
+ */
+void
+ipi_all_but_self(u_int ipi)
+{
+
+	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
+	lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
+}
+
+/*
+ * send an IPI to myself
+ */
+void
+ipi_self(u_int ipi)
+{
+
+	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
+	lapic_ipi_vectored(ipi, APIC_IPI_DEST_SELF);
+}
+
+/*
+ * This is called once the rest of the system is up and running and we're
+ * ready to let the AP's out of the pen.
+ */
+static void
+release_aps(void *dummy __unused)
+{
+
+	if (mp_ncpus == 1) 
+		return;
+	mtx_lock_spin(&sched_lock);
+	atomic_store_rel_int(&aps_ready, 1);
+	while (smp_started == 0)
+		ia32_pause();
+	mtx_unlock_spin(&sched_lock);
+}
+SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
+
+static int
+sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
+{
+	u_int mask;
+	int error;
+
+	mask = hlt_cpus_mask;
+	error = sysctl_handle_int(oidp, &mask, 0, req);
+	if (error || !req->newptr)
+		return (error);
+
+	if (logical_cpus_mask != 0 &&
+	    (mask & logical_cpus_mask) == logical_cpus_mask)
+		hlt_logical_cpus = 1;
+	else
+		hlt_logical_cpus = 0;
+
+	if ((mask & all_cpus) == all_cpus)
+		mask &= ~(1<<0);
+	hlt_cpus_mask = mask;
+	return (error);
+}
+SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
+    0, 0, sysctl_hlt_cpus, "IU",
+    "Bitmap of CPUs to halt.  101 (binary) will halt CPUs 0 and 2.");
+
+static int
+sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
+{
+	int disable, error;
+
+	disable = hlt_logical_cpus;
+	error = sysctl_handle_int(oidp, &disable, 0, req);
+	if (error || !req->newptr)
+		return (error);
+
+	if (disable)
+		hlt_cpus_mask |= logical_cpus_mask;
+	else
+		hlt_cpus_mask &= ~logical_cpus_mask;
+
+	if ((hlt_cpus_mask & all_cpus) == all_cpus)
+		hlt_cpus_mask &= ~(1<<0);
+
+	hlt_logical_cpus = disable;
+	return (error);
+}
+
+static void
+cpu_hlt_setup(void *dummy __unused)
+{
+
+	if (logical_cpus_mask != 0) {
+		TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
+		    &hlt_logical_cpus);
+		sysctl_ctx_init(&logical_cpu_clist);
+		SYSCTL_ADD_PROC(&logical_cpu_clist,
+		    SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
+		    "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
+		    sysctl_hlt_logical_cpus, "IU", "");
+		SYSCTL_ADD_UINT(&logical_cpu_clist,
+		    SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
+		    "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
+		    &logical_cpus_mask, 0, "");
+
+		if (hlt_logical_cpus)
+			hlt_cpus_mask |= logical_cpus_mask;
+	}
+}
+SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
+
+int
+mp_grab_cpu_hlt(void)
+{
+	u_int mask = PCPU_GET(cpumask);
+#ifdef MP_WATCHDOG
+	u_int cpuid = PCPU_GET(cpuid);
+#endif
+	int retval;
+
+#ifdef MP_WATCHDOG
+	ap_watchdog(cpuid);
+#endif
+
+	retval = mask & hlt_cpus_mask;
+	while (mask & hlt_cpus_mask)
+		__asm __volatile("sti; hlt" : : : "memory");
+	return (retval);
+}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mptable.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mptable.c
new file mode 100644
index 0000000000..2f0aff0055
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mptable.c
@@ -0,0 +1,974 @@
+/*-
+ * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
+ * Copyright (c) 1996, by Steve Passe
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. The name of the developer may NOT be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/mptable.c,v 1.235.2.1 2004/09/28 16:24:09 jhb Exp $");
+
+#include "opt_mptable_force_htt.h"
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+
+#include <machine/apicreg.h>
+#include <machine/frame.h>
+#include <machine/intr_machdep.h>
+#include <machine/apicvar.h>
+#include <machine/md_var.h>
+#include <machine/mptable.h>
+#include <machine/specialreg.h>
+
+#include <dev/pci/pcivar.h>
+
+/* string defined by the Intel MP Spec as identifying the MP table */
+#define	MP_SIG			0x5f504d5f	/* _MP_ */
+
+#define	NAPICID			32	/* Max number of APIC's */
+
+#ifdef PC98
+#define BIOS_BASE		(0xe8000)
+#define BIOS_SIZE		(0x18000)
+#else
+#define BIOS_BASE		(0xf0000)
+#define BIOS_SIZE		(0x10000)
+#endif
+#define BIOS_COUNT		(BIOS_SIZE/4)
+
+typedef	void mptable_entry_handler(u_char *entry, void *arg);
+
+static basetable_entry basetable_entry_types[] =
+{
+	{0, 20, "Processor"},
+	{1, 8, "Bus"},
+	{2, 8, "I/O APIC"},
+	{3, 8, "I/O INT"},
+	{4, 8, "Local INT"}
+};
+
+typedef struct BUSDATA {
+	u_char  bus_id;
+	enum busTypes bus_type;
+}       bus_datum;
+
+typedef struct INTDATA {
+	u_char  int_type;
+	u_short int_flags;
+	u_char  src_bus_id;
+	u_char  src_bus_irq;
+	u_char  dst_apic_id;
+	u_char  dst_apic_int;
+	u_char	int_vector;
+}       io_int, local_int;
+
+typedef struct BUSTYPENAME {
+	u_char  type;
+	char    name[7];
+}       bus_type_name;
+
+/* From MP spec v1.4, table 4-8. */
+static bus_type_name bus_type_table[] =
+{
+	{UNKNOWN_BUSTYPE, "CBUS  "},
+	{UNKNOWN_BUSTYPE, "CBUSII"},
+	{EISA, "EISA  "},
+	{UNKNOWN_BUSTYPE, "FUTURE"},
+	{UNKNOWN_BUSTYPE, "INTERN"},
+	{ISA, "ISA   "},
+	{UNKNOWN_BUSTYPE, "MBI   "},
+	{UNKNOWN_BUSTYPE, "MBII  "},
+	{MCA, "MCA   "},
+	{UNKNOWN_BUSTYPE, "MPI   "},
+	{UNKNOWN_BUSTYPE, "MPSA  "},
+	{UNKNOWN_BUSTYPE, "NUBUS "},
+	{PCI, "PCI   "},
+	{UNKNOWN_BUSTYPE, "PCMCIA"},
+	{UNKNOWN_BUSTYPE, "TC    "},
+	{UNKNOWN_BUSTYPE, "VL    "},
+	{UNKNOWN_BUSTYPE, "VME   "},
+	{UNKNOWN_BUSTYPE, "XPRESS"}
+};
+
+/* From MP spec v1.4, table 5-1. */
+static int default_data[7][5] =
+{
+/*   nbus, id0, type0, id1, type1 */
+	{1, 0, ISA, 255, NOBUS},
+	{1, 0, EISA, 255, NOBUS},
+	{1, 0, EISA, 255, NOBUS},
+	{1, 0, MCA, 255, NOBUS},
+	{2, 0, ISA, 1, PCI},
+	{2, 0, EISA, 1, PCI},
+	{2, 0, MCA, 1, PCI}
+};
+
+struct pci_probe_table_args {
+	u_char bus;
+	u_char found;
+};
+
+struct pci_route_interrupt_args {
+	u_char bus;		/* Source bus. */
+	u_char irq;		/* Source slot:pin. */
+	int vector;		/* Return value. */
+};
+
+static mpfps_t mpfps;
+static mpcth_t mpct;
+static void *ioapics[NAPICID];
+static bus_datum *busses;
+static int mptable_nioapics, mptable_nbusses, mptable_maxbusid;
+static int pci0 = -1;
+
+MALLOC_DEFINE(M_MPTABLE, "MP Table", "MP Table Items");
+
+static enum intr_polarity conforming_polarity(u_char src_bus,
+	    u_char src_bus_irq);
+static enum intr_trigger conforming_trigger(u_char src_bus, u_char src_bus_irq);
+static enum intr_polarity intentry_polarity(int_entry_ptr intr);
+static enum intr_trigger intentry_trigger(int_entry_ptr intr);
+static int	lookup_bus_type(char *name);
+static void	mptable_count_items(void);
+static void	mptable_count_items_handler(u_char *entry, void *arg);
+#ifdef MPTABLE_FORCE_HTT
+static void	mptable_hyperthread_fixup(u_int id_mask);
+#endif
+static void	mptable_parse_apics_and_busses(void);
+static void	mptable_parse_apics_and_busses_handler(u_char *entry,
+    void *arg);
+static void	mptable_parse_ints(void);
+static void	mptable_parse_ints_handler(u_char *entry, void *arg);
+static void	mptable_parse_io_int(int_entry_ptr intr);
+static void	mptable_parse_local_int(int_entry_ptr intr);
+static void	mptable_pci_probe_table_handler(u_char *entry, void *arg);
+static void	mptable_pci_route_interrupt_handler(u_char *entry, void *arg);
+static void	mptable_pci_setup(void);
+static int	mptable_probe(void);
+static int	mptable_probe_cpus(void);
+static void	mptable_probe_cpus_handler(u_char *entry, void *arg __unused);
+static void	mptable_register(void *dummy);
+static int	mptable_setup_local(void);
+static int	mptable_setup_io(void);
+static void	mptable_walk_table(mptable_entry_handler *handler, void *arg);
+static int	search_for_sig(u_int32_t target, int count);
+
+static struct apic_enumerator mptable_enumerator = {
+	"MPTable",
+	mptable_probe,
+	mptable_probe_cpus,
+	mptable_setup_local,
+	mptable_setup_io
+};
+
+/*
+ * look for the MP spec signature
+ */
+
+static int
+search_for_sig(u_int32_t target, int count)
+{
+	int     x;
+	u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
+
+	for (x = 0; x < count; x += 4)
+		if (addr[x] == MP_SIG)
+			/* make array index a byte index */
+			return (target + (x * sizeof(u_int32_t)));
+	return (-1);
+}
+
+static int
+lookup_bus_type(char *name)
+{
+	int     x;
+
+	for (x = 0; x < MAX_BUSTYPE; ++x)
+		if (strncmp(bus_type_table[x].name, name, 6) == 0)
+			return (bus_type_table[x].type);
+
+	return (UNKNOWN_BUSTYPE);
+}
+
+/*
+ * Look for an Intel MP spec table (ie, SMP capable hardware).
+ */
+static int
+mptable_probe(void)
+{
+	int     x;
+	u_long  segment;
+	u_int32_t target;
+
+	/* see if EBDA exists */
+	if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
+		/* search first 1K of EBDA */
+		target = (u_int32_t) (segment << 4);
+		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
+			goto found;
+	} else {
+		/* last 1K of base memory, effective 'top of base' passed in */
+		target = (u_int32_t) ((basemem * 1024) - 0x400);
+		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
+			goto found;
+	}
+
+	/* search the BIOS */
+	target = (u_int32_t) BIOS_BASE;
+	if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
+		goto found;
+
+	/* nothing found */
+	return (ENXIO);
+
+found:
+	mpfps = (mpfps_t)(KERNBASE + x);
+
+	/* Map in the configuration table if it exists. */
+	if (mpfps->config_type != 0)
+		mpct = NULL;
+	else {
+		if ((uintptr_t)mpfps->pap >= 1024 * 1024) {
+			printf("%s: Unable to map MP Configuration Table\n",
+			    __func__);
+			return (ENXIO);
+		}
+		mpct = (mpcth_t)(KERNBASE + (uintptr_t)mpfps->pap);
+		if (mpct->base_table_length + (uintptr_t)mpfps->pap >=
+		    1024 * 1024) {
+			printf("%s: Unable to map end of MP Config Table\n",
+			    __func__);
+			return (ENXIO);
+		}
+		if (mpct->signature[0] != 'P' || mpct->signature[1] != 'C' ||
+		    mpct->signature[2] != 'M' || mpct->signature[3] != 'P') {
+			printf("%s: MP Config Table has bad signature: %c%c%c%c\n",
+			    __func__, mpct->signature[0], mpct->signature[1],
+			    mpct->signature[2], mpct->signature[3]);
+			return (ENXIO);
+		}
+		if (bootverbose)
+			printf(
+			"MP Configuration Table version 1.%d found at %p\n",
+			    mpct->spec_rev, mpct);
+	}
+
+	return (-100);
+}
+
+/*
+ * Run through the MP table enumerating CPUs.
+ */
+static int
+mptable_probe_cpus(void)
+{
+	u_int cpu_mask;
+
+	/* Is this a pre-defined config? */
+	if (mpfps->config_type != 0) {
+		lapic_create(0, 1);
+		lapic_create(1, 0);
+	} else {
+		cpu_mask = 0;
+		mptable_walk_table(mptable_probe_cpus_handler, &cpu_mask);
+#ifdef MPTABLE_FORCE_HTT
+		mptable_hyperthread_fixup(cpu_mask);
+#endif
+	}
+	return (0);
+}
+
+/*
+ * Initialize the local APIC on the BSP.
+ */
+static int
+mptable_setup_local(void)
+{
+
+	/* Is this a pre-defined config? */
+	printf("MPTable: <");
+	if (mpfps->config_type != 0) {
+		lapic_init(DEFAULT_APIC_BASE);
+		printf("Preset Config %d", mpfps->config_type);
+	} else {
+		lapic_init((uintptr_t)mpct->apic_address);
+		printf("%.*s %.*s", (int)sizeof(mpct->oem_id), mpct->oem_id,
+		    (int)sizeof(mpct->product_id), mpct->product_id);
+	}
+	printf(">\n");
+	return (0);
+}
+
+/*
+ * Run through the MP table enumerating I/O APICs.
+ */
+static int
+mptable_setup_io(void)
+{
+	int i;
+	u_char byte;
+
+	/* First, we count individual items and allocate arrays. */
+	mptable_count_items();
+	busses = malloc((mptable_maxbusid + 1) * sizeof(bus_datum), M_MPTABLE,
+	    M_WAITOK);
+	for (i = 0; i <= mptable_maxbusid; i++)
+		busses[i].bus_type = NOBUS;
+
+	/* Second, we run through adding I/O APIC's and busses. */
+	ioapic_enable_mixed_mode();
+	mptable_parse_apics_and_busses();	
+
+	/* Third, we run through the table tweaking interrupt sources. */
+	mptable_parse_ints();
+
+	/* Fourth, we register all the I/O APIC's. */
+	for (i = 0; i < NAPICID; i++)
+		if (ioapics[i] != NULL)
+			ioapic_register(ioapics[i]);
+
+	/* Fifth, we setup data structures to handle PCI interrupt routing. */
+	mptable_pci_setup();
+
+	/* Finally, we throw the switch to enable the I/O APIC's. */
+	if (mpfps->mpfb2 & MPFB2_IMCR_PRESENT) {
+		outb(0x22, 0x70);	/* select IMCR */
+		byte = inb(0x23);	/* current contents */
+		byte |= 0x01;		/* mask external INTR */
+		outb(0x23, byte);	/* disconnect 8259s/NMI */
+	}
+
+	return (0);
+}
+
+static void
+mptable_register(void *dummy __unused)
+{
+
+	apic_register_enumerator(&mptable_enumerator);
+}
+SYSINIT(mptable_register, SI_SUB_CPU - 1, SI_ORDER_FIRST, mptable_register,
+    NULL)
+
+/*
+ * Call the handler routine for each entry in the MP config table.
+ */
+static void
+mptable_walk_table(mptable_entry_handler *handler, void *arg)
+{
+	u_int i;
+	u_char *entry;
+
+	entry = (u_char *)(mpct + 1);
+	for (i = 0; i < mpct->entry_count; i++) {
+		switch (*entry) {
+		case MPCT_ENTRY_PROCESSOR:
+		case MPCT_ENTRY_IOAPIC:
+		case MPCT_ENTRY_BUS:
+		case MPCT_ENTRY_INT:
+		case MPCT_ENTRY_LOCAL_INT:
+			break;
+		default:
+			panic("%s: Unknown MP Config Entry %d\n", __func__,
+			    (int)*entry);
+		}
+		handler(entry, arg);
+		entry += basetable_entry_types[*entry].length;
+	}
+}
+
+static void
+mptable_probe_cpus_handler(u_char *entry, void *arg)
+{
+	proc_entry_ptr proc;
+	u_int *cpu_mask;
+
+	switch (*entry) {
+	case MPCT_ENTRY_PROCESSOR:
+		proc = (proc_entry_ptr)entry;
+		if (proc->cpu_flags & PROCENTRY_FLAG_EN) {
+			lapic_create(proc->apic_id, proc->cpu_flags &
+			    PROCENTRY_FLAG_BP);
+			cpu_mask = (u_int *)arg;
+			*cpu_mask |= (1 << proc->apic_id);
+		}
+		break;
+	}
+}
+
+static void
+mptable_count_items_handler(u_char *entry, void *arg __unused)
+{
+	io_apic_entry_ptr apic;
+	bus_entry_ptr bus;
+
+	switch (*entry) {
+	case MPCT_ENTRY_BUS:
+		bus = (bus_entry_ptr)entry;
+		mptable_nbusses++;
+		if (bus->bus_id > mptable_maxbusid)
+			mptable_maxbusid = bus->bus_id;
+		break;
+	case MPCT_ENTRY_IOAPIC:
+		apic = (io_apic_entry_ptr)entry;
+		if (apic->apic_flags & IOAPICENTRY_FLAG_EN)
+			mptable_nioapics++;
+		break;
+	}
+}
+
+/*
+ * Count items in the table.
+ */
+static void
+mptable_count_items(void)
+{
+
+	/* Is this a pre-defined config? */
+	if (mpfps->config_type != 0) {
+		mptable_nioapics = 1;
+		switch (mpfps->config_type) {
+		case 1:
+		case 2:
+		case 3:
+		case 4:
+			mptable_nbusses = 1;
+			break;
+		case 5:
+		case 6:
+		case 7:
+			mptable_nbusses = 2;
+			break;
+		default:
+			panic("Unknown pre-defined MP Table config type %d",
+			    mpfps->config_type);
+		}
+		mptable_maxbusid = mptable_nbusses - 1;
+	} else
+		mptable_walk_table(mptable_count_items_handler, NULL);
+}
+
+/*
+ * Add a bus or I/O APIC from an entry in the table.
+ */
+static void
+mptable_parse_apics_and_busses_handler(u_char *entry, void *arg __unused)
+{
+	io_apic_entry_ptr apic;
+	bus_entry_ptr bus;
+	enum busTypes bus_type;
+	int i;
+
+
+	switch (*entry) {
+	case MPCT_ENTRY_BUS:
+		bus = (bus_entry_ptr)entry;
+		bus_type = lookup_bus_type(bus->bus_type);
+		if (bus_type == UNKNOWN_BUSTYPE) {
+			printf("MPTable: Unknown bus %d type \"", bus->bus_id);
+			for (i = 0; i < 6; i++)
+				printf("%c", bus->bus_type[i]);
+			printf("\"\n");
+		}
+		busses[bus->bus_id].bus_id = bus->bus_id;
+		busses[bus->bus_id].bus_type = bus_type;
+		break;
+	case MPCT_ENTRY_IOAPIC:
+		apic = (io_apic_entry_ptr)entry;
+		if (!(apic->apic_flags & IOAPICENTRY_FLAG_EN))
+			break;
+		if (apic->apic_id >= NAPICID)
+			panic("%s: I/O APIC ID %d too high", __func__,
+			    apic->apic_id);
+		if (ioapics[apic->apic_id] != NULL)
+			panic("%s: Double APIC ID %d", __func__,
+			    apic->apic_id);
+		ioapics[apic->apic_id] = ioapic_create(
+			(uintptr_t)apic->apic_address, apic->apic_id, -1);
+		break;
+	default:
+		break;
+	}
+}
+
+/*
+ * Enumerate I/O APIC's and busses.
+ */
+static void
+mptable_parse_apics_and_busses(void)
+{
+
+	/* Is this a pre-defined config? */
+	if (mpfps->config_type != 0) {
+		ioapics[0] = ioapic_create(DEFAULT_IO_APIC_BASE, 2, 0);
+		busses[0].bus_id = 0;
+		busses[0].bus_type = default_data[mpfps->config_type][2];
+		if (mptable_nbusses > 1) {
+			busses[1].bus_id = 1;
+			busses[1].bus_type =
+			    default_data[mpfps->config_type][4];
+		}
+	} else
+		mptable_walk_table(mptable_parse_apics_and_busses_handler,
+		    NULL);
+}
+
+/*
+ * Determine conforming polarity for a given bus type.
+ */
+static enum intr_polarity
+conforming_polarity(u_char src_bus, u_char src_bus_irq)
+{
+
+	KASSERT(src_bus <= mptable_maxbusid, ("bus id %d too large", src_bus));
+	switch (busses[src_bus].bus_type) {
+	case ISA:
+	case EISA:
+		return (INTR_POLARITY_HIGH);
+	case PCI:
+		return (INTR_POLARITY_LOW);
+	default:
+		panic("%s: unknown bus type %d", __func__,
+		    busses[src_bus].bus_type);
+	}
+}
+
+/*
+ * Determine conforming trigger for a given bus type.
+ */
+static enum intr_trigger
+conforming_trigger(u_char src_bus, u_char src_bus_irq)
+{
+
+	KASSERT(src_bus <= mptable_maxbusid, ("bus id %d too large", src_bus));
+	switch (busses[src_bus].bus_type) {
+	case ISA:
+		return (INTR_TRIGGER_EDGE);
+	case PCI:
+		return (INTR_TRIGGER_LEVEL);
+#if !defined(PC98) && !defined(XEN)
+	case EISA:
+		KASSERT(src_bus_irq < 16, ("Invalid EISA IRQ %d", src_bus_irq));
+		return (elcr_read_trigger(src_bus_irq));
+#endif
+	default:
+		panic("%s: unknown bus type %d", __func__,
+		    busses[src_bus].bus_type);
+	}
+}
+
+static enum intr_polarity
+intentry_polarity(int_entry_ptr intr)
+{
+
+	switch (intr->int_flags & INTENTRY_FLAGS_POLARITY) {
+	case INTENTRY_FLAGS_POLARITY_CONFORM:
+		return (conforming_polarity(intr->src_bus_id,
+			    intr->src_bus_irq));
+	case INTENTRY_FLAGS_POLARITY_ACTIVEHI:
+		return (INTR_POLARITY_HIGH);
+	case INTENTRY_FLAGS_POLARITY_ACTIVELO:
+		return (INTR_POLARITY_LOW);
+	default:
+		panic("Bogus interrupt flags");
+	}
+}
+
+static enum intr_trigger
+intentry_trigger(int_entry_ptr intr)
+{
+
+	switch (intr->int_flags & INTENTRY_FLAGS_TRIGGER) {
+	case INTENTRY_FLAGS_TRIGGER_CONFORM:
+		return (conforming_trigger(intr->src_bus_id,
+			    intr->src_bus_irq));
+	case INTENTRY_FLAGS_TRIGGER_EDGE:
+		return (INTR_TRIGGER_EDGE);
+	case INTENTRY_FLAGS_TRIGGER_LEVEL:
+		return (INTR_TRIGGER_LEVEL);
+	default:
+		panic("Bogus interrupt flags");
+	}
+}
+
+/*
+ * Parse an interrupt entry for an I/O interrupt routed to a pin on an I/O APIC.
+ */
+static void
+mptable_parse_io_int(int_entry_ptr intr)
+{
+	void *ioapic;
+	u_int pin;
+
+	if (intr->dst_apic_id == 0xff) {
+		printf("MPTable: Ignoring global interrupt entry for pin %d\n",
+		    intr->dst_apic_int);
+		return;
+	}
+	if (intr->dst_apic_id >= NAPICID) {
+		printf("MPTable: Ignoring interrupt entry for ioapic%d\n",
+		    intr->dst_apic_id);
+		return;
+	}
+	ioapic = ioapics[intr->dst_apic_id];
+	if (ioapic == NULL) {
+		printf(
+	"MPTable: Ignoring interrupt entry for missing ioapic%d\n",
+		    intr->dst_apic_id);
+		return;
+	}
+	pin = intr->dst_apic_int;
+	switch (intr->int_type) {
+	case INTENTRY_TYPE_INT:
+		switch (busses[intr->src_bus_id].bus_type) {
+		case NOBUS:
+			panic("interrupt from missing bus");
+		case ISA:
+		case EISA:
+			if (busses[intr->src_bus_id].bus_type == ISA)
+				ioapic_set_bus(ioapic, pin, APIC_BUS_ISA);
+			else
+				ioapic_set_bus(ioapic, pin, APIC_BUS_EISA);
+			if (intr->src_bus_irq == pin)
+				break;
+			ioapic_remap_vector(ioapic, pin, intr->src_bus_irq);
+			if (ioapic_get_vector(ioapic, intr->src_bus_irq) ==
+			    intr->src_bus_irq)
+				ioapic_disable_pin(ioapic, intr->src_bus_irq);
+			break;
+		case PCI:
+			ioapic_set_bus(ioapic, pin, APIC_BUS_PCI);
+			break;
+		default:
+			ioapic_set_bus(ioapic, pin, APIC_BUS_UNKNOWN);
+			break;
+		}
+		break;
+	case INTENTRY_TYPE_NMI:
+		ioapic_set_nmi(ioapic, pin);
+		break;
+	case INTENTRY_TYPE_SMI:
+		ioapic_set_smi(ioapic, pin);
+		break;
+	case INTENTRY_TYPE_EXTINT:
+		ioapic_set_extint(ioapic, pin);
+		break;
+	default:
+		panic("%s: invalid interrupt entry type %d\n", __func__,
+		    intr->int_type);
+	}
+	if (intr->int_type == INTENTRY_TYPE_INT ||
+	    (intr->int_flags & INTENTRY_FLAGS_TRIGGER) !=
+	    INTENTRY_FLAGS_TRIGGER_CONFORM)
+		ioapic_set_triggermode(ioapic, pin, intentry_trigger(intr));
+	if (intr->int_type == INTENTRY_TYPE_INT ||
+	    (intr->int_flags & INTENTRY_FLAGS_POLARITY) !=
+	    INTENTRY_FLAGS_POLARITY_CONFORM)
+		ioapic_set_polarity(ioapic, pin, intentry_polarity(intr));
+}
+
+/*
+ * Parse an interrupt entry for a local APIC LVT pin.
+ */
+static void
+mptable_parse_local_int(int_entry_ptr intr)
+{
+	u_int apic_id, pin;
+
+	if (intr->dst_apic_id == 0xff)
+		apic_id = APIC_ID_ALL;
+	else
+		apic_id = intr->dst_apic_id;
+	if (intr->dst_apic_int == 0)
+		pin = LVT_LINT0;
+	else
+		pin = LVT_LINT1;
+	switch (intr->int_type) {
+	case INTENTRY_TYPE_INT:
+#if 1
+		printf(
+	"MPTable: Ignoring vectored local interrupt for LINTIN%d vector %d\n",
+		    intr->dst_apic_int, intr->src_bus_irq);
+		return;
+#else
+		lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_FIXED);
+		break;
+#endif
+	case INTENTRY_TYPE_NMI:
+		lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_NMI);
+		break;
+	case INTENTRY_TYPE_SMI:
+		lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_SMI);
+		break;
+	case INTENTRY_TYPE_EXTINT:
+		lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_EXTINT);
+		break;
+	default:
+		panic("%s: invalid interrupt entry type %d\n", __func__,
+		    intr->int_type);
+	}
+	if ((intr->int_flags & INTENTRY_FLAGS_TRIGGER) !=
+	    INTENTRY_FLAGS_TRIGGER_CONFORM)
+		lapic_set_lvt_triggermode(apic_id, pin,
+		    intentry_trigger(intr));
+	if ((intr->int_flags & INTENTRY_FLAGS_POLARITY) !=
+	    INTENTRY_FLAGS_POLARITY_CONFORM)
+		lapic_set_lvt_polarity(apic_id, pin, intentry_polarity(intr));
+}
+
+/*
+ * Parse interrupt entries.
+ */
+static void
+mptable_parse_ints_handler(u_char *entry, void *arg __unused)
+{
+	int_entry_ptr intr;
+
+	intr = (int_entry_ptr)entry;
+	switch (*entry) {
+	case MPCT_ENTRY_INT:
+		mptable_parse_io_int(intr);
+		break;
+	case MPCT_ENTRY_LOCAL_INT:
+		mptable_parse_local_int(intr);
+		break;
+	}
+}
+	
+/*
+ * Configure the interrupt pins
+ */
+static void
+mptable_parse_ints(void)
+{
+
+	/* Is this a pre-defined config? */
+	if (mpfps->config_type != 0) {
+		/* Configure LINT pins. */
+		lapic_set_lvt_mode(APIC_ID_ALL, LVT_LINT0, APIC_LVT_DM_EXTINT);
+		lapic_set_lvt_mode(APIC_ID_ALL, LVT_LINT1, APIC_LVT_DM_NMI);
+
+		/* Configure I/O APIC pins. */
+		if (mpfps->config_type != 7)
+			ioapic_set_extint(ioapics[0], 0);
+		else
+			ioapic_disable_pin(ioapics[0], 0);
+		if (mpfps->config_type != 2)
+			ioapic_remap_vector(ioapics[0], 2, 0);
+		else
+			ioapic_disable_pin(ioapics[0], 2);
+		if (mpfps->config_type == 2)
+			ioapic_disable_pin(ioapics[0], 13);
+	} else
+		mptable_walk_table(mptable_parse_ints_handler, NULL);
+}
+
+#ifdef MPTABLE_FORCE_HTT
+/*
+ * Perform a hyperthreading "fix-up" to enumerate any logical CPU's
+ * that aren't already listed in the table.
+ *
+ * XXX: We assume that all of the physical CPUs in the
+ * system have the same number of logical CPUs.
+ *
+ * XXX: We assume that APIC ID's are allocated such that
+ * the APIC ID's for a physical processor are aligned
+ * with the number of logical CPU's in the processor.
+ */
+static void
+mptable_hyperthread_fixup(u_int id_mask)
+{
+	u_int i, id, logical_cpus;
+
+	/* Nothing to do if there is no HTT support. */
+	if ((cpu_feature & CPUID_HTT) == 0)
+		return;
+	logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
+	if (logical_cpus <= 1)
+		return;
+
+	/*
+	 * For each APIC ID of a CPU that is set in the mask,
+	 * scan the other candidate APIC ID's for this
+	 * physical processor.  If any of those ID's are
+	 * already in the table, then kill the fixup.
+	 */
+	for (id = 0; id < NAPICID; id++) {
+		if ((id_mask & 1 << id) == 0)
+			continue;
+		/* First, make sure we are on a logical_cpus boundary. */
+		if (id % logical_cpus != 0)
+			return;
+		for (i = id + 1; i < id + logical_cpus; i++)
+			if ((id_mask & 1 << i) != 0)
+				return;
+	}
+
+	/*
+	 * Ok, the ID's checked out, so perform the fixup by
+	 * adding the logical CPUs.
+	 */
+	while ((id = ffs(id_mask)) != 0) {
+		id--;
+		for (i = id + 1; i < id + logical_cpus; i++) {
+			if (bootverbose)
+				printf(
+			"MPTable: Adding logical CPU %d from main CPU %d\n",
+				    i, id);
+			lapic_create(i, 0);
+		}
+		id_mask &= ~(1 << id);
+	}
+}
+#endif /* MPTABLE_FORCE_HTT */
+
+/*
+ * Support code for routing PCI interrupts using the MP Table.
+ */
+static void
+mptable_pci_setup(void)
+{
+	int i;
+
+	/*
+	 * Find the first pci bus and call it 0.  Panic if pci0 is not
+	 * bus zero and there are multiple PCI busses.
+	 */
+	for (i = 0; i <= mptable_maxbusid; i++)
+		if (busses[i].bus_type == PCI) {
+			if (pci0 == -1)
+				pci0 = i;
+			else if (pci0 != 0)
+				panic(
+		"MPTable contains multiple PCI busses but no PCI bus 0");
+		}
+}
+
+static void
+mptable_pci_probe_table_handler(u_char *entry, void *arg)
+{
+	struct pci_probe_table_args *args;
+	int_entry_ptr intr;
+
+	if (*entry != MPCT_ENTRY_INT)
+		return;
+	intr = (int_entry_ptr)entry;
+	args = (struct pci_probe_table_args *)arg;
+	KASSERT(args->bus <= mptable_maxbusid,
+	    ("bus %d is too big", args->bus));
+	KASSERT(busses[args->bus].bus_type == PCI, ("probing for non-PCI bus"));
+	if (intr->src_bus_id == args->bus)
+		args->found = 1;
+}
+
+int
+mptable_pci_probe_table(int bus)
+{
+	struct pci_probe_table_args args;
+
+	if (bus < 0)
+		return (EINVAL);
+	if (pci0 == -1 || pci0 + bus > mptable_maxbusid)
+		return (ENXIO);
+	if (busses[pci0 + bus].bus_type != PCI)
+		return (ENXIO);
+	args.bus = pci0 + bus;
+	args.found = 0;
+	mptable_walk_table(mptable_pci_probe_table_handler, &args);
+	if (args.found == 0)
+		return (ENXIO);
+	return (0);
+}
+
+static void
+mptable_pci_route_interrupt_handler(u_char *entry, void *arg)
+{
+	struct pci_route_interrupt_args *args;
+	int_entry_ptr intr;
+	int vector;
+
+	if (*entry != MPCT_ENTRY_INT)
+		return;
+	intr = (int_entry_ptr)entry;
+	args = (struct pci_route_interrupt_args *)arg;
+	if (intr->src_bus_id != args->bus || intr->src_bus_irq != args->irq)
+		return;
+
+	/* Make sure the APIC maps to a known APIC. */
+	KASSERT(ioapics[intr->dst_apic_id] != NULL,
+	    ("No I/O APIC %d to route interrupt to", intr->dst_apic_id));
+
+	/*
+	 * Look up the vector for this APIC / pin combination.  If we
+	 * have previously matched an entry for this PCI IRQ but it
+	 * has the same vector as this entry, just return.  Otherwise,
+	 * we use the vector for this APIC / pin combination.
+	 */
+	vector = ioapic_get_vector(ioapics[intr->dst_apic_id],
+	    intr->dst_apic_int);
+	if (args->vector == vector)
+		return;
+	KASSERT(args->vector == -1,
+	    ("Multiple IRQs for PCI interrupt %d.%d.INT%c: %d and %d\n",
+	    args->bus, args->irq >> 2, 'A' + (args->irq & 0x3), args->vector,
+	    vector));
+	args->vector = vector;
+}
+
+int
+mptable_pci_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+	struct pci_route_interrupt_args args;
+	int slot;
+
+	/* Like ACPI, pin numbers are 0-3, not 1-4. */
+	pin--;
+	KASSERT(pci0 != -1, ("do not know how to route PCI interrupts"));
+	args.bus = pci_get_bus(dev) + pci0;
+	slot = pci_get_slot(dev);
+
+	/*
+	 * PCI interrupt entries in the MP Table encode both the slot and
+	 * pin into the IRQ with the pin being the two least significant
+	 * bits, the slot being the next five bits, and the most significant
+	 * bit being reserved.
+	 */
+	args.irq = slot << 2 | pin;
+	args.vector = -1;
+	mptable_walk_table(mptable_pci_route_interrupt_handler, &args);
+	if (args.vector < 0) {
+		device_printf(pcib, "unable to route slot %d INT%c\n", slot,
+		    'A' + pin);
+		return (PCI_INVALID_IRQ);
+	}
+	if (bootverbose)
+		device_printf(pcib, "slot %d INT%c routed to irq %d\n", slot,
+		    'A' + pin, args.vector);
+	return (args.vector);
+}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c
new file mode 100644
index 0000000000..ee61e80ed9
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c
@@ -0,0 +1,3381 @@
+/*-
+ * Copyright (c) 1991 Regents of the University of California.
+ * All rights reserved.
+ * Copyright (c) 1994 John S. Dyson
+ * All rights reserved.
+ * Copyright (c) 1994 David Greenman
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department and William Jolitz of UUNET Technologies Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
+ */
+/*-
+ * Copyright (c) 2003 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Jake Burkholder,
+ * Safeport Network Services, and Network Associates Laboratories, the
+ * Security Research Division of Network Associates, Inc. under
+ * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
+ * CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/pmap.c,v 1.494.2.6 2004/10/10 19:08:00 alc Exp $");
+
+/*
+ *	Manages physical address maps.
+ *	XEN NOTES: page table entries (pt_entry_t) and 
+ *	page directory entries (pd_entry_t) contain machine
+ *	addresses and not physical addresses.  Use PT_GET() before
+ *	dereferencing these structures to convert them into a 
+ *	physical address.  Use the PT_SET_VA operations to commit
+ *	page changes back to XEN.  PT_SET_VA_MA should be used with
+ *	great care!
+ *
+ *
+ *	In addition to hardware address maps, this
+ *	module is called upon to provide software-use-only
+ *	maps which may or may not be stored in the same
+ *	form as hardware maps.  These pseudo-maps are
+ *	used to store intermediate results from copy
+ *	operations to and from address spaces.
+ *
+ *	Since the information managed by this module is
+ *	also stored by the logical address mapping module,
+ *	this module may throw away valid virtual-to-physical
+ *	mappings at almost any time.  However, invalidations
+ *	of virtual-to-physical mappings must be done as
+ *	requested.
+ *
+ *	In order to cope with hardware architectures which
+ *	make virtual-to-physical map invalidates expensive,
+ *	this module may delay invalidate or reduced protection
+ *	operations until such time as they are actually
+ *	necessary.  This module is given full information as
+ *	to which processors are currently using which maps,
+ *	and to when physical maps must be made correct.
+ */
+
+#include "opt_cpu.h"
+#include "opt_pmap.h"
+#include "opt_msgbuf.h"
+#include "opt_kstack_pages.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
+#include <sys/msgbuf.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sx.h>
+#include <sys/user.h>
+#include <sys/vmmeter.h>
+#include <sys/sched.h>
+#include <sys/sysctl.h>
+#ifdef SMP
+#include <sys/smp.h>
+#endif
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_pager.h>
+#include <vm/uma.h>
+
+#include <machine/cpu.h>
+#include <machine/cputypes.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+#ifdef SMP
+#include <machine/smp.h>
+#endif
+
+#include <machine/xenfunc.h>
+
+#if !defined(CPU_ENABLE_SSE) && defined(I686_CPU)
+#define CPU_ENABLE_SSE
+#endif
+#if defined(CPU_DISABLE_SSE)
+#undef CPU_ENABLE_SSE
+#endif
+
+#ifndef PMAP_SHPGPERPROC
+#define PMAP_SHPGPERPROC 200
+#endif
+
+#if defined(DIAGNOSTIC)
+#define PMAP_DIAGNOSTIC
+#endif
+
+#define MINPV 2048
+
+#if !defined(PMAP_DIAGNOSTIC)
+#define PMAP_INLINE __inline
+#else
+#define PMAP_INLINE
+#endif
+
+/*
+ * Get PDEs and PTEs for user/kernel address space
+ */
+#define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
+#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
+
+#define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
+#define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
+#define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
+#define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
+#define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
+
+#if 0  
+#define pmap_pte_set_w(pte, v)	((v) ? atomic_set_int((u_int *)(pte), PG_W) : \
+    atomic_clear_int((u_int *)(pte), PG_W))
+#else 
+#define pmap_pte_set_w(pte, v)  {				\
+    if (v)							\
+	PT_SET_VA_MA(pte, *pte | PG_W, TRUE); 			\
+    else 							\
+	PT_SET_VA_MA(pte, *pte & ~PG_W, TRUE); 			\
+}
+#endif
+
+struct pmap kernel_pmap_store;
+LIST_HEAD(pmaplist, pmap);
+static struct pmaplist allpmaps;
+static struct mtx allpmaps_lock;
+
+vm_paddr_t avail_end;	/* PA of last available physical page */
+vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
+vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
+static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
+int pgeflag = 0;		/* PG_G or-in */
+int pseflag = 0;		/* PG_PS or-in */
+
+static int nkpt;
+vm_offset_t kernel_vm_end;
+extern u_int32_t KERNend;
+
+#ifdef PAE
+static uma_zone_t pdptzone;
+#endif
+
+/*
+ * Data for the pv entry allocation mechanism
+ */
+static uma_zone_t pvzone;
+static struct vm_object pvzone_obj;
+static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
+int pmap_pagedaemon_waken;
+
+/*
+ * All those kernel PT submaps that BSD is so fond of
+ */
+pt_entry_t *CMAP1 = 0;
+static pt_entry_t *CMAP2, *CMAP3;
+caddr_t CADDR1 = 0, ptvmmap = 0;
+static caddr_t CADDR2, CADDR3;
+static struct mtx CMAPCADDR12_lock;
+struct msgbuf *msgbufp = 0;
+
+/*
+ * Crashdump maps.
+ */
+static caddr_t crashdumpmap;
+
+#ifdef SMP
+extern pt_entry_t *SMPpt;
+#endif
+static pt_entry_t *PMAP1 = 0, *PMAP2;
+static pt_entry_t *PADDR1 = 0, *PADDR2;
+#ifdef SMP
+static int PMAP1cpu;
+static int PMAP1changedcpu;
+SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, 
+	   &PMAP1changedcpu, 0,
+	   "Number of times pmap_pte_quick changed CPU with same PMAP1");
+#endif
+static int PMAP1changed;
+SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, 
+	   &PMAP1changed, 0,
+	   "Number of times pmap_pte_quick changed PMAP1");
+static int PMAP1unchanged;
+SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, 
+	   &PMAP1unchanged, 0,
+	   "Number of times pmap_pte_quick didn't change PMAP1");
+static struct mtx PMAP2mutex;
+
+static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
+static pv_entry_t get_pv_entry(void);
+static void	pmap_clear_ptes(vm_page_t m, int bit);
+
+static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva);
+static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
+static int pmap_remove_entry(struct pmap *pmap, vm_page_t m,
+					vm_offset_t va);
+static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
+
+static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
+
+static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
+static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m);
+static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
+static void pmap_pte_release(pt_entry_t *pte);
+static int pmap_unuse_pt(pmap_t, vm_offset_t);
+static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
+#ifdef PAE
+static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
+#endif
+
+CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
+CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
+
+#ifndef DEBUG
+#define DEBUG
+#endif
+#ifdef PMAP_DEBUG
+static void pmap_dec_ref(unsigned long ma);
+static void pmap_mark_privileged(unsigned long pa);
+static void pmap_mark_unprivileged(unsigned long pa);
+static void pmap_dec_ref_page(vm_page_t m);
+int pmap_pid_dump(int pid);
+#endif
+/*
+ * Move the kernel virtual free pointer to the next
+ * 4MB.  This is used to help improve performance
+ * by using a large (4MB) page for much of the kernel
+ * (.text, .data, .bss)
+ */
+static vm_offset_t
+pmap_kmem_choose(vm_offset_t addr)
+{
+	vm_offset_t newaddr = addr;
+
+#ifndef DISABLE_PSE
+	if (cpu_feature & CPUID_PSE)
+		newaddr = (addr + PDRMASK) & ~PDRMASK;
+#endif
+	return newaddr;
+}
+
+/*
+ *	Bootstrap the system enough to run with virtual memory.
+ *
+ *	On the i386 this is called after mapping has already been enabled
+ *	and just syncs the pmap module with what has already been done.
+ *	[We can't call it easily with mapping off since the kernel is not
+ *	mapped with PA == VA, hence we would have to relocate every address
+ *	from the linked base (virtual) address "KERNBASE" to the actual
+ *	(physical) address starting relative to 0]
+ */
+void
+pmap_bootstrap(firstaddr, loadaddr)
+	vm_paddr_t firstaddr;
+	vm_paddr_t loadaddr;
+{
+	vm_offset_t va;
+	pt_entry_t *pte, *unused;
+	int i;
+
+	/*
+	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
+	 * large. It should instead be correctly calculated in locore.s and
+	 * not based on 'first' (which is a physical address, not a virtual
+	 * address, for the start of unused physical memory). The kernel
+	 * page tables are NOT double mapped and thus should not be included
+	 * in this calculation.
+	 */
+	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
+	virtual_avail = pmap_kmem_choose(virtual_avail);
+
+	virtual_end = VM_MAX_KERNEL_ADDRESS;
+
+	/*
+	 * Initialize the kernel pmap (which is statically allocated).
+	 */
+	PMAP_LOCK_INIT(kernel_pmap);
+	kernel_pmap->pm_pdir = (pd_entry_t *) xen_start_info->pt_base; 
+#ifdef PAE
+	kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
+#endif
+	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
+	TAILQ_INIT(&kernel_pmap->pm_pvlist);
+	LIST_INIT(&allpmaps);
+	mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
+	mtx_lock_spin(&allpmaps_lock);
+	LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
+	mtx_unlock_spin(&allpmaps_lock);
+	nkpt = NKPT;
+
+	/*
+	 * Reserve some special page table entries/VA space for temporary
+	 * mapping of pages.
+	 */
+#define	SYSMAP(c, p, v, n)	\
+	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
+
+	va = virtual_avail;
+	pte = vtopte(va);
+
+	/*
+	 * CMAP1/CMAP2 are used for zeroing and copying pages.
+	 * CMAP3 is used for the idle process page zeroing.
+	 */
+	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
+	SYSMAP(caddr_t, CMAP2, CADDR2, 1)
+	SYSMAP(caddr_t, CMAP3, CADDR3, 1)
+	PT_CLEAR_VA(CMAP3, TRUE);
+
+	mtx_init(&CMAPCADDR12_lock, "CMAPCADDR12", NULL, MTX_DEF);
+
+	/*
+	 * Crashdump maps.
+	 */
+	SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS)
+
+	/*
+	 * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
+	 */
+	SYSMAP(caddr_t, unused, ptvmmap, 1)
+
+	/*
+	 * msgbufp is used to map the system message buffer.
+	 */
+	SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE)))
+
+	/*
+	 * ptemap is used for pmap_pte_quick
+	 */
+	SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1);
+	SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1);
+
+	mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF);
+
+	virtual_avail = va;
+	PT_CLEAR_VA(CMAP1, FALSE);
+	PT_CLEAR_VA(CMAP2, FALSE);
+
+	for (i = 0; i < NKPT; i++)
+		PT_CLEAR_VA(&PTD[i], FALSE);
+	PT_UPDATES_FLUSH();
+#ifdef XEN_UNNEEDED
+	/* Turn on PG_G on kernel page(s) */
+	pmap_set_pg();
+#endif
+}
+
+/*
+ * Set PG_G on kernel pages.  Only the BSP calls this when SMP is turned on.
+ */
+void
+pmap_set_pg(void)
+{
+	pd_entry_t pdir;
+	pt_entry_t *pte;
+	vm_offset_t va, endva;
+	int i; 
+
+	if (pgeflag == 0)
+		return;
+	panic("this won't work");
+	i = KERNLOAD/NBPDR;
+	endva = KERNBASE + KERNend;
+
+	if (pseflag) {
+		va = KERNBASE + KERNLOAD;
+		while (va  < endva) {
+			pdir = kernel_pmap->pm_pdir[KPTDI+i];
+			pdir |= pgeflag;
+			kernel_pmap->pm_pdir[KPTDI+i] = PTD[KPTDI+i] = pdir;
+			invltlb();	/* Play it safe, invltlb() every time */
+			i++;
+			va += NBPDR;
+		}
+	} else {
+		va = (vm_offset_t)btext;
+		while (va < endva) {
+			pte = vtopte(va);
+			if (*pte)
+				*pte |= pgeflag;
+			invltlb();	/* Play it safe, invltlb() every time */
+			va += PAGE_SIZE;
+		}
+	}
+}
+
+#ifdef PAE
+
+static MALLOC_DEFINE(M_PMAPPDPT, "pmap", "pmap pdpt");
+
+static void *
+pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+{
+	*flags = UMA_SLAB_PRIV;
+	return (contigmalloc(PAGE_SIZE, M_PMAPPDPT, 0, 0x0ULL, 0xffffffffULL,
+	    1, 0));
+}
+#endif
+
+/*
+ *	Initialize the pmap module.
+ *	Called by vm_init, to initialize any structures that the pmap
+ *	system needs to map virtual memory.
+ *	pmap_init has been enhanced to support in a fairly consistant
+ *	way, discontiguous physical memory.
+ */
+void
+pmap_init(void)
+{
+	int i;
+
+	/*
+	 * Allocate memory for random pmap data structures.  Includes the
+	 * pv_head_table.
+	 */
+
+	for(i = 0; i < vm_page_array_size; i++) {
+		vm_page_t m;
+
+		m = &vm_page_array[i];
+		TAILQ_INIT(&m->md.pv_list);
+		m->md.pv_list_count = 0;
+	}
+
+	/*
+	 * init the pv free list
+	 */
+	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL, 
+	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
+	uma_prealloc(pvzone, MINPV);
+
+#ifdef PAE
+	pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL,
+	    NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1,
+	    UMA_ZONE_VM | UMA_ZONE_NOFREE);
+	uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf);
+#endif
+
+	/*
+	 * Now it is safe to enable pv_table recording.
+	 */
+	pmap_initialized = TRUE;
+}
+
+/*
+ * Initialize the address space (zone) for the pv_entries.  Set a
+ * high water mark so that the system can recover from excessive
+ * numbers of pv entries.
+ */
+void
+pmap_init2()
+{
+	int shpgperproc = PMAP_SHPGPERPROC;
+
+	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
+	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
+	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
+	pv_entry_high_water = 9 * (pv_entry_max / 10);
+	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
+}
+
+
+/***************************************************
+ * Low level helper routines.....
+ ***************************************************/
+
+#if defined(PMAP_DIAGNOSTIC)
+
+/*
+ * This code checks for non-writeable/modified pages.
+ * This should be an invalid condition.
+ */
+static int
+pmap_nw_modified(pt_entry_t ptea)
+{
+	int pte;
+
+	pte = (int) ptea;
+
+	if ((pte & (PG_M|PG_RW)) == PG_M)
+		return 1;
+	else
+		return 0;
+}
+#endif
+
+
+/*
+ * this routine defines the region(s) of memory that should
+ * not be tested for the modified bit.
+ */
+static PMAP_INLINE int
+pmap_track_modified(vm_offset_t va)
+{
+	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) 
+		return 1;
+	else
+		return 0;
+}
+
+#ifdef I386_CPU
+/*
+ * i386 only has "invalidate everything" and no SMP to worry about.
+ */
+PMAP_INLINE void
+pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
+{
+
+	if (pmap == kernel_pmap || pmap->pm_active)
+		invltlb();
+}
+
+PMAP_INLINE void
+pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+
+	if (pmap == kernel_pmap || pmap->pm_active)
+		invltlb();
+}
+
+PMAP_INLINE void
+pmap_invalidate_all(pmap_t pmap)
+{
+
+	if (pmap == kernel_pmap || pmap->pm_active)
+		invltlb();
+}
+#else /* !I386_CPU */
+#ifdef SMP
+/*
+ * For SMP, these functions have to use the IPI mechanism for coherence.
+ */
+void
+pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
+{
+	u_int cpumask;
+	u_int other_cpus;
+
+	if (smp_started) {
+		if (!(read_eflags() & PSL_I))
+			panic("%s: interrupts disabled", __func__);
+		mtx_lock_spin(&smp_rv_mtx);
+	} else
+		critical_enter();
+	/*
+	 * We need to disable interrupt preemption but MUST NOT have
+	 * interrupts disabled here.
+	 * XXX we may need to hold schedlock to get a coherent pm_active
+	 * XXX critical sections disable interrupts again
+	 */
+	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+		invlpg(va);
+		smp_invlpg(va);
+	} else {
+		cpumask = PCPU_GET(cpumask);
+		other_cpus = PCPU_GET(other_cpus);
+		if (pmap->pm_active & cpumask)
+			invlpg(va);
+		if (pmap->pm_active & other_cpus)
+			smp_masked_invlpg(pmap->pm_active & other_cpus, va);
+	}
+	if (smp_started)
+		mtx_unlock_spin(&smp_rv_mtx);
+	else
+		critical_exit();
+}
+
+void
+pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+	u_int cpumask;
+	u_int other_cpus;
+	vm_offset_t addr;
+
+	if (smp_started) {
+		if (!(read_eflags() & PSL_I))
+			panic("%s: interrupts disabled", __func__);
+		mtx_lock_spin(&smp_rv_mtx);
+	} else
+		critical_enter();
+	/*
+	 * We need to disable interrupt preemption but MUST NOT have
+	 * interrupts disabled here.
+	 * XXX we may need to hold schedlock to get a coherent pm_active
+	 * XXX critical sections disable interrupts again
+	 */
+	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+		for (addr = sva; addr < eva; addr += PAGE_SIZE)
+			invlpg(addr);
+		smp_invlpg_range(sva, eva);
+	} else {
+		cpumask = PCPU_GET(cpumask);
+		other_cpus = PCPU_GET(other_cpus);
+		if (pmap->pm_active & cpumask)
+			for (addr = sva; addr < eva; addr += PAGE_SIZE)
+				invlpg(addr);
+		if (pmap->pm_active & other_cpus)
+			smp_masked_invlpg_range(pmap->pm_active & other_cpus,
+			    sva, eva);
+	}
+	if (smp_started)
+		mtx_unlock_spin(&smp_rv_mtx);
+	else
+		critical_exit();
+}
+
+void
+pmap_invalidate_all(pmap_t pmap)
+{
+	u_int cpumask;
+	u_int other_cpus;
+
+	if (smp_started) {
+		if (!(read_eflags() & PSL_I))
+			panic("%s: interrupts disabled", __func__);
+		mtx_lock_spin(&smp_rv_mtx);
+	} else
+		critical_enter();
+	/*
+	 * We need to disable interrupt preemption but MUST NOT have
+	 * interrupts disabled here.
+	 * XXX we may need to hold schedlock to get a coherent pm_active
+	 * XXX critical sections disable interrupts again
+	 */
+	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+		invltlb();
+		smp_invltlb();
+	} else {
+		cpumask = PCPU_GET(cpumask);
+		other_cpus = PCPU_GET(other_cpus);
+		if (pmap->pm_active & cpumask)
+			invltlb();
+		if (pmap->pm_active & other_cpus)
+			smp_masked_invltlb(pmap->pm_active & other_cpus);
+	}
+	if (smp_started)
+		mtx_unlock_spin(&smp_rv_mtx);
+	else
+		critical_exit();
+}
+#else /* !SMP */
+/*
+ * Normal, non-SMP, 486+ invalidation functions.
+ * We inline these within pmap.c for speed.
+ */
+PMAP_INLINE void
+pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
+{
+
+	if (pmap == kernel_pmap || pmap->pm_active)
+		invlpg(va);
+	PT_UPDATES_FLUSH();
+
+}
+
+PMAP_INLINE void
+pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+	vm_offset_t addr;
+
+	if (pmap == kernel_pmap || pmap->pm_active)
+		for (addr = sva; addr < eva; addr += PAGE_SIZE)
+			invlpg(addr);
+	PT_UPDATES_FLUSH();
+
+}
+
+PMAP_INLINE void
+pmap_invalidate_all(pmap_t pmap)
+{
+
+	if (pmap == kernel_pmap || pmap->pm_active)
+		invltlb();
+}
+#endif /* !SMP */
+#endif /* !I386_CPU */
+
+/*
+ * Are we current address space or kernel?  N.B. We return FALSE when
+ * a pmap's page table is in use because a kernel thread is borrowing
+ * it.  The borrowed page table can change spontaneously, making any
+ * dependence on its continued use subject to a race condition.
+ */
+static __inline int
+pmap_is_current(pmap_t pmap)
+{
+
+	return (pmap == kernel_pmap ||
+		(pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
+	    (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
+}
+
+/*
+ * If the given pmap is not the current or kernel pmap, the returned pte must
+ * be released by passing it to pmap_pte_release().
+ */
+pt_entry_t *
+pmap_pte(pmap_t pmap, vm_offset_t va)
+{
+	pd_entry_t tmppf, newpf;
+	pd_entry_t *pde;
+
+	pde = pmap_pde(pmap, va);
+	if (*pde & PG_PS)
+		return (pde);
+	if (*pde != 0) {
+		/* are we current address space or kernel? */
+		if (pmap_is_current(pmap))
+			return (vtopte(va));
+		mtx_lock(&PMAP2mutex);
+		newpf = PT_GET(pde) & PG_FRAME;
+		tmppf = PT_GET(PMAP2) & PG_FRAME;
+		if (tmppf != newpf) {
+			PT_SET_VA(PMAP2, newpf | PG_V | PG_A, FALSE);
+			pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2);
+		}
+		return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
+	}
+	return (0);
+}
+
+/*
+ * Releases a pte that was obtained from pmap_pte().  Be prepared for the pte
+ * being NULL.
+ */
+static __inline void
+pmap_pte_release(pt_entry_t *pte)
+{
+
+	if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2)
+		mtx_unlock(&PMAP2mutex);
+}
+
+static __inline void
+invlcaddr(void *caddr)
+{
+#ifdef I386_CPU
+	invltlb();
+#else
+	invlpg((u_int)caddr);
+#endif
+	PT_UPDATES_FLUSH();
+}
+
+/*
+ * Super fast pmap_pte routine best used when scanning
+ * the pv lists.  This eliminates many coarse-grained
+ * invltlb calls.  Note that many of the pv list
+ * scans are across different pmaps.  It is very wasteful
+ * to do an entire invltlb for checking a single mapping.
+ *
+ * If the given pmap is not the current pmap, vm_page_queue_mtx
+ * must be held and curthread pinned to a CPU.
+ */
+static pt_entry_t *
+pmap_pte_quick(pmap_t pmap, vm_offset_t va)
+{
+	pd_entry_t tmppf, newpf;
+	pd_entry_t *pde;
+
+	pde = pmap_pde(pmap, va);
+	if (*pde & PG_PS)
+		return (pde);
+	if (*pde != 0) {
+		/* are we current address space or kernel? */
+		if (pmap_is_current(pmap))
+			return (vtopte(va));
+		mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+		KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
+		newpf = PT_GET(pde) & PG_FRAME;
+		tmppf = PT_GET(PMAP1) & PG_FRAME;
+		if (tmppf != newpf) {
+			PT_SET_VA(PMAP1, newpf | PG_V | PG_A, TRUE);
+#ifdef SMP
+			PMAP1cpu = PCPU_GET(cpuid);
+#endif
+			invlcaddr(PADDR1);
+			PMAP1changed++;
+		} else
+#ifdef SMP
+		if (PMAP1cpu != PCPU_GET(cpuid)) {
+			PMAP1cpu = PCPU_GET(cpuid);
+			invlcaddr(PADDR1);
+			PMAP1changedcpu++;
+		} else
+#endif
+			PMAP1unchanged++;
+		return (PADDR1 + (i386_btop(va) & (NPTEPG - 1)));
+	}
+	return (0);
+}
+
+/*
+ *	Routine:	pmap_extract
+ *	Function:
+ *		Extract the physical page address associated
+ *		with the given map/virtual_address pair.
+ */
+vm_paddr_t 
+pmap_extract(pmap_t pmap, vm_offset_t va)
+{
+	vm_paddr_t rtval;
+	pt_entry_t *pte;
+	pd_entry_t pde;
+
+	rtval = 0;
+	PMAP_LOCK(pmap);
+	pde  = PT_GET(&pmap->pm_pdir[va >> PDRSHIFT]);
+	if (pde != 0) {
+		if ((pde & PG_PS) != 0) {
+			rtval = (pde & ~PDRMASK) | (va & PDRMASK);
+			PMAP_UNLOCK(pmap);
+			return rtval;
+		}
+		pte = pmap_pte(pmap, va);
+		rtval = (PT_GET(pte) & PG_FRAME) | (va & PAGE_MASK);
+		pmap_pte_release(pte);
+	}
+	PMAP_UNLOCK(pmap);
+	return (rtval);
+}
+
+/*
+ *	Routine:	pmap_extract_and_hold
+ *	Function:
+ *		Atomically extract and hold the physical page
+ *		with the given pmap and virtual address pair
+ *		if that mapping permits the given protection.
+ */
+vm_page_t
+pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
+{
+	pd_entry_t pde;
+	pt_entry_t pte;
+	vm_page_t m;
+
+	m = NULL;
+	vm_page_lock_queues();
+	PMAP_LOCK(pmap);
+	pde = PT_GET(pmap_pde(pmap, va));
+	if (pde != 0) {
+		if (pde & PG_PS) {
+			panic("4MB pages not currently supported");
+			if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
+				m = PHYS_TO_VM_PAGE((pde & ~PDRMASK) |
+				    (va & PDRMASK));
+				vm_page_hold(m);
+			}
+		} else {
+			sched_pin();
+			pte = PT_GET(pmap_pte_quick(pmap, va));
+			if (pte != 0 &&
+			    ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
+				m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
+				vm_page_hold(m);
+			}
+			sched_unpin();
+		}
+	}
+	vm_page_unlock_queues();
+	PMAP_UNLOCK(pmap);
+	return (m);
+}
+
+/***************************************************
+ * Low level mapping routines.....
+ ***************************************************/
+
+/*
+ * Add a wired page to the kva.
+ * Note: not SMP coherent.
+ */
+PMAP_INLINE void 
+pmap_kenter(vm_offset_t va, vm_paddr_t pa)
+{
+	PT_SET(va, pa | PG_RW | PG_V | pgeflag, TRUE);
+}
+
+/*
+ * Remove a page from the kernel pagetables.
+ * Note: not SMP coherent.
+ */
+PMAP_INLINE void
+pmap_kremove(vm_offset_t va)
+{
+	PT_CLEAR(va, TRUE);
+}
+
+/*
+ *	Used to map a range of physical addresses into kernel
+ *	virtual address space.
+ *
+ *	The value passed in '*virt' is a suggested virtual address for
+ *	the mapping. Architectures which can support a direct-mapped
+ *	physical to virtual region can return the appropriate address
+ *	within that region, leaving '*virt' unchanged. Other
+ *	architectures should map the pages starting at '*virt' and
+ *	update '*virt' with the first usable address after the mapped
+ *	region.
+ */
+vm_offset_t
+pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
+{
+	vm_offset_t va, sva;
+	pt_entry_t *pte;
+	
+	va = sva = *virt;
+	while (start < end) {
+		pte = vtopte(va);
+		PT_SET_VA(pte, start | PG_RW | PG_V | pgeflag, FALSE);
+		va += PAGE_SIZE;
+		start += PAGE_SIZE;
+	}
+	/* invalidate will flush the update queue */
+	pmap_invalidate_range(kernel_pmap, sva, va);
+	*virt = va;
+	return (sva);
+}
+
+
+/*
+ * Add a list of wired pages to the kva
+ * this routine is only used for temporary
+ * kernel mappings that do not need to have
+ * page modification or references recorded.
+ * Note that old mappings are simply written
+ * over.  The page *must* be wired.
+ * Note: SMP coherent.  Uses a ranged shootdown IPI.
+ */
+void
+pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
+{
+	vm_offset_t va;
+
+	va = sva;
+	while (count-- > 0) {
+		PT_SET(va, VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag, 
+			  FALSE);
+		va += PAGE_SIZE;
+		m++;
+	}
+	/* invalidate will flush the update queue */
+	pmap_invalidate_range(kernel_pmap, sva, va);
+}
+
+/*
+ * This routine tears out page mappings from the
+ * kernel -- it is meant only for temporary mappings.
+ * Note: SMP coherent.  Uses a ranged shootdown IPI.
+ */
+void
+pmap_qremove(vm_offset_t sva, int count)
+{
+	vm_offset_t va;
+
+	va = sva;
+	while (count-- > 0) {
+		PT_CLEAR(va, FALSE);
+		va += PAGE_SIZE;
+	}
+	/* invalidate will flush the update queue */
+	pmap_invalidate_range(kernel_pmap, sva, va);
+}
+
+/***************************************************
+ * Page table page management routines.....
+ ***************************************************/
+
+/*
+ * This routine unholds page table pages, and if the hold count
+ * drops to zero, then it decrements the wire count.
+ */
+static PMAP_INLINE int
+pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
+{
+
+	--m->wire_count;
+	if (m->wire_count == 0)
+		return _pmap_unwire_pte_hold(pmap, m);
+	else
+		return 0;
+}
+
+static int 
+_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
+{
+	vm_offset_t pteva;
+	/*
+	 * unmap the page table page
+	 */
+	xpq_queue_unpin_table(pmap->pm_pdir[m->pindex]);
+	PT_CLEAR_VA(&pmap->pm_pdir[m->pindex], TRUE);
+	--pmap->pm_stats.resident_count;
+
+	/*
+	 * Do an invltlb to make the invalidated mapping
+	 * take effect immediately.
+	 */
+	pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex);
+	pmap_invalidate_page(pmap, pteva);
+
+	vm_page_free_zero(m);
+	atomic_subtract_int(&cnt.v_wire_count, 1);
+	return 1;
+}
+
+/*
+ * After removing a page table entry, this routine is used to
+ * conditionally free the page, and manage the hold/wire counts.
+ */
+static int
+pmap_unuse_pt(pmap_t pmap, vm_offset_t va)
+{
+	pd_entry_t ptepde;
+	vm_page_t mpte;
+
+	if (va >= VM_MAXUSER_ADDRESS)
+		return 0;
+	ptepde = PT_GET(pmap_pde(pmap, va));
+	mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
+	return pmap_unwire_pte_hold(pmap, mpte);
+}
+
+void
+pmap_pinit0(pmap)
+	struct pmap *pmap;
+{
+
+	PMAP_LOCK_INIT(pmap);
+	pmap->pm_pdir = (pd_entry_t *)(xen_start_info->pt_base);
+#ifdef PAE
+	pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
+#endif
+	pmap->pm_active = 0;
+	PCPU_SET(curpmap, pmap);
+	TAILQ_INIT(&pmap->pm_pvlist);
+	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
+	mtx_lock_spin(&allpmaps_lock);
+	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
+	mtx_unlock_spin(&allpmaps_lock);
+}
+
+/*
+ * Initialize a preallocated and zeroed pmap structure,
+ * such as one in a vmspace structure.
+ */
+void
+pmap_pinit(struct pmap *pmap)
+{
+	vm_page_t m, ptdpg[NPGPTD];
+	vm_paddr_t ma;
+	static int color;
+	int i;
+
+	PMAP_LOCK_INIT(pmap);
+
+	/*
+	 * No need to allocate page table space yet but we do need a valid
+	 * page directory table.
+	 */
+	if (pmap->pm_pdir == NULL) {
+		pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map,
+		    NBPTD);
+#ifdef PAE
+		pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO);
+		KASSERT(((vm_offset_t)pmap->pm_pdpt &
+		    ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0,
+		    ("pmap_pinit: pdpt misaligned"));
+		KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30),
+		    ("pmap_pinit: pdpt above 4g"));
+#endif
+	}
+
+	/*
+	 * allocate the page directory page(s)
+	 */
+	for (i = 0; i < NPGPTD;) {
+		m = vm_page_alloc(NULL, color++,
+		    VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
+		    VM_ALLOC_ZERO);
+		if (m == NULL)
+			VM_WAIT;
+		else {
+			pmap_zero_page(m);
+			ptdpg[i++] = m;
+		}
+	}
+
+	pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
+
+	for (i = 0; i < NPGPTD; i++) {
+		if ((ptdpg[i]->flags & PG_ZERO) == 0)
+			bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE);
+	}
+
+	mtx_lock_spin(&allpmaps_lock);
+	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
+	mtx_unlock_spin(&allpmaps_lock);
+	/* Wire in kernel global address entries. */
+	/* XXX copies current process, does not fill in MPPTDI */
+	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
+#ifdef SMP
+	pmap->pm_pdir[MPPTDI] = PTD[MPPTDI];
+#endif
+
+	/* install self-referential address mapping entry(s) */
+	for (i = 0; i < NPGPTD; i++) {
+		ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[i]));
+		pmap->pm_pdir[PTDPTDI + i] = ma | PG_V | PG_A;
+#ifdef PAE
+		pmap->pm_pdpt[i] = ma | PG_V;
+#endif
+#ifndef PAE
+		PT_SET_MA(pmap->pm_pdir, ma | PG_V | PG_A, TRUE);  
+#else
+		panic("FIX ME!");
+#endif
+		xpq_queue_pin_table(ma, XPQ_PIN_L2_TABLE);
+	}
+
+	pmap->pm_active = 0;
+	TAILQ_INIT(&pmap->pm_pvlist);
+	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
+}
+
+/*
+ * this routine is called if the page table page is not
+ * mapped correctly.
+ */
+static vm_page_t
+_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags)
+{
+	vm_paddr_t ptepa;
+	vm_page_t m;
+
+	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
+	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
+	    ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
+
+	/*
+	 * Allocate a page table page.
+	 */
+	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
+	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
+		if (flags & M_WAITOK) {
+			PMAP_UNLOCK(pmap);
+			vm_page_unlock_queues();
+			VM_WAIT;
+			vm_page_lock_queues();
+			PMAP_LOCK(pmap);
+		}
+
+		/*
+		 * Indicate the need to retry.  While waiting, the page table
+		 * page may have been allocated.
+		 */
+		return (NULL);
+	}
+	if ((m->flags & PG_ZERO) == 0)
+		pmap_zero_page(m);
+
+	/*
+	 * Map the pagetable page into the process address space, if
+	 * it isn't already there.
+	 */
+
+	pmap->pm_stats.resident_count++;
+
+	ptepa = VM_PAGE_TO_PHYS(m);
+	xpq_queue_pin_table(xpmap_ptom(ptepa), XPQ_PIN_L1_TABLE);
+	PT_SET_VA(&pmap->pm_pdir[ptepindex], 
+		(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M), TRUE);
+
+	return m;
+}
+
+static vm_page_t
+pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
+{
+	unsigned ptepindex;
+	pd_entry_t ptepa;
+	vm_page_t m;
+
+	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
+	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
+	    ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
+
+	/*
+	 * Calculate pagetable page index
+	 */
+	ptepindex = va >> PDRSHIFT;
+retry:
+	/*
+	 * Get the page directory entry
+	 */
+	ptepa = PT_GET(&pmap->pm_pdir[ptepindex]);
+
+	/*
+	 * This supports switching from a 4MB page to a
+	 * normal 4K page.
+	 */
+	if (ptepa & PG_PS) {
+		pmap->pm_pdir[ptepindex] = 0;
+		ptepa = 0;
+		pmap_invalidate_all(kernel_pmap);
+	}
+
+	/*
+	 * If the page table page is mapped, we just increment the
+	 * hold count, and activate it.
+	 */
+	if (ptepa) {
+		m = PHYS_TO_VM_PAGE(ptepa);
+		m->wire_count++;
+	} else {
+		/*
+		 * Here if the pte page isn't mapped, or if it has
+		 * been deallocated. 
+		 */
+		m = _pmap_allocpte(pmap, ptepindex, flags);
+		if (m == NULL && (flags & M_WAITOK))
+			goto retry;
+	}
+	return (m);
+}
+
+
+/***************************************************
+* Pmap allocation/deallocation routines.
+ ***************************************************/
+
+#ifdef SMP
+/*
+ * Deal with a SMP shootdown of other users of the pmap that we are
+ * trying to dispose of.  This can be a bit hairy.
+ */
+static u_int *lazymask;
+static u_int lazyptd;
+static volatile u_int lazywait;
+
+void pmap_lazyfix_action(void);
+
+void
+pmap_lazyfix_action(void)
+{
+	u_int mymask = PCPU_GET(cpumask);
+
+	if (PCPU_GET(curpcb)->pcb_cr3 == lazyptd)
+		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+	atomic_clear_int(lazymask, mymask);
+	atomic_store_rel_int(&lazywait, 1);
+}
+
+static void
+pmap_lazyfix_self(u_int mymask)
+{
+
+	if (PCPU_GET(curpcb)->pcb_cr3 == lazyptd)
+		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+	atomic_clear_int(lazymask, mymask);
+}
+
+
+static void
+pmap_lazyfix(pmap_t pmap)
+{
+	u_int mymask = PCPU_GET(cpumask);
+	u_int mask;
+	register u_int spins;
+
+	while ((mask = pmap->pm_active) != 0) {
+		spins = 50000000;
+		mask = mask & -mask;	/* Find least significant set bit */
+		mtx_lock_spin(&smp_rv_mtx);
+#ifdef PAE
+		lazyptd = vtophys(pmap->pm_pdpt);
+#else
+		lazyptd = vtophys(pmap->pm_pdir);
+#endif
+		if (mask == mymask) {
+			lazymask = &pmap->pm_active;
+			pmap_lazyfix_self(mymask);
+		} else {
+			atomic_store_rel_int((u_int *)&lazymask,
+			    (u_int)&pmap->pm_active);
+			atomic_store_rel_int(&lazywait, 0);
+			ipi_selected(mask, IPI_LAZYPMAP);
+			while (lazywait == 0) {
+				ia32_pause();
+				if (--spins == 0)
+					break;
+			}
+		}
+		mtx_unlock_spin(&smp_rv_mtx);
+		if (spins == 0)
+			printf("pmap_lazyfix: spun for 50000000\n");
+	}
+}
+
+#else	/* SMP */
+
+/*
+ * Cleaning up on uniprocessor is easy.  For various reasons, we're
+ * unlikely to have to even execute this code, including the fact
+ * that the cleanup is deferred until the parent does a wait(2), which
+ * means that another userland process has run.
+ */
+static void
+pmap_lazyfix(pmap_t pmap)
+{
+	u_int cr3;
+
+	cr3 = vtophys(pmap->pm_pdir);
+	if (cr3 == PCPU_GET(curpcb)->pcb_cr3) {
+		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+		pmap->pm_active &= ~(PCPU_GET(cpumask));
+	}
+}
+#endif	/* SMP */
+
+/*
+ * Release any resources held by the given physical map.
+ * Called when a pmap initialized by pmap_pinit is being released.
+ * Should only be called if the map contains no valid mappings.
+ */
+void
+pmap_release(pmap_t pmap)
+{
+	vm_page_t m, ptdpg[NPGPTD];
+	vm_paddr_t ma;
+	int i;
+
+	KASSERT(pmap->pm_stats.resident_count == 0,
+	    ("pmap_release: pmap resident count %ld != 0",
+	    pmap->pm_stats.resident_count));
+
+	pmap_lazyfix(pmap);
+	mtx_lock_spin(&allpmaps_lock);
+	LIST_REMOVE(pmap, pm_list);
+	mtx_unlock_spin(&allpmaps_lock);
+
+	for (i = 0; i < NPGPTD; i++)
+		ptdpg[i] = PHYS_TO_VM_PAGE(PT_GET(&pmap->pm_pdir[PTDPTDI + i]));
+
+	for (i = 0; i < nkpt + NPGPTD; i++)
+		PT_CLEAR_VA(&pmap->pm_pdir[PTDPTDI + i], FALSE);
+
+	bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) *
+	    sizeof(*pmap->pm_pdir));
+#ifdef SMP
+	PT_CLEAR_VA(&pmap->pm_pdir[MPPTDI], FALSE);
+#endif
+	PT_UPDATES_FLUSH();
+	pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
+
+	vm_page_lock_queues();
+	for (i = 0; i < NPGPTD; i++) {
+		m = ptdpg[i];
+		
+		ma = xpmap_ptom(VM_PAGE_TO_PHYS(m));
+                xpq_queue_unpin_table(ma);
+		pmap_zero_page(m);
+#ifdef PAE
+		KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME),
+		    ("pmap_release: got wrong ptd page"));
+#endif
+		m->wire_count--;
+		atomic_subtract_int(&cnt.v_wire_count, 1);
+		
+		vm_page_free_zero(m);
+	}
+	vm_page_unlock_queues();
+	PMAP_LOCK_DESTROY(pmap);
+}
+
+static int
+kvm_size(SYSCTL_HANDLER_ARGS)
+{
+	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
+
+	return sysctl_handle_long(oidp, &ksize, 0, req);
+}
+SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
+    0, 0, kvm_size, "IU", "Size of KVM");
+
+static int
+kvm_free(SYSCTL_HANDLER_ARGS)
+{
+	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
+
+	return sysctl_handle_long(oidp, &kfree, 0, req);
+}
+SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
+    0, 0, kvm_free, "IU", "Amount of KVM free");
+
+/*
+ * grow the number of kernel page table entries, if needed
+ */
+void
+pmap_growkernel(vm_offset_t addr)
+{
+	struct pmap *pmap;
+	vm_paddr_t ptppaddr;
+	vm_page_t nkpg;
+	pd_entry_t newpdir;
+	pt_entry_t *pde;
+
+	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
+	if (kernel_vm_end == 0) {
+		kernel_vm_end = KERNBASE;
+		nkpt = 0;
+		while (pdir_pde(PTD, kernel_vm_end)) {
+			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
+			nkpt++;
+		}
+	}
+	addr = roundup2(addr, PAGE_SIZE * NPTEPG);
+	while (kernel_vm_end < addr) {
+		if (pdir_pde(PTD, kernel_vm_end)) {
+			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
+			continue;
+		}
+
+		/*
+		 * This index is bogus, but out of the way
+		 */
+		nkpg = vm_page_alloc(NULL, nkpt,
+		    VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
+		if (!nkpg)
+			panic("pmap_growkernel: no memory to grow kernel");
+
+		nkpt++;
+
+		pmap_zero_page(nkpg);
+		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
+		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
+		PT_SET_VA(&pdir_pde(PTD, kernel_vm_end), newpdir, TRUE);
+
+		mtx_lock_spin(&allpmaps_lock);
+		LIST_FOREACH(pmap, &allpmaps, pm_list) {
+			pde = pmap_pde(pmap, kernel_vm_end);
+			PT_SET_VA(pde, newpdir, FALSE);
+		}
+		PT_UPDATES_FLUSH();
+		mtx_unlock_spin(&allpmaps_lock);
+		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
+	}
+}
+
+
+/***************************************************
+ * page management routines.
+ ***************************************************/
+
+/*
+ * free the pv_entry back to the free list
+ */
+static PMAP_INLINE void
+free_pv_entry(pv_entry_t pv)
+{
+	pv_entry_count--;
+	uma_zfree(pvzone, pv);
+}
+
+/*
+ * get a new pv_entry, allocating a block from the system
+ * when needed.
+ * the memory allocation is performed bypassing the malloc code
+ * because of the possibility of allocations at interrupt time.
+ */
+static pv_entry_t
+get_pv_entry(void)
+{
+	pv_entry_count++;
+	if (pv_entry_high_water &&
+		(pv_entry_count > pv_entry_high_water) &&
+		(pmap_pagedaemon_waken == 0)) {
+		pmap_pagedaemon_waken = 1;
+		wakeup (&vm_pages_needed);
+	}
+	return uma_zalloc(pvzone, M_NOWAIT);
+}
+
+
+static int
+pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
+{
+	pv_entry_t pv;
+	int rtval;
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
+		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+			if (pmap == pv->pv_pmap && va == pv->pv_va) 
+				break;
+		}
+	} else {
+		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
+			if (va == pv->pv_va) 
+				break;
+		}
+	}
+
+	rtval = 0;
+	if (pv) {
+		rtval = pmap_unuse_pt(pmap, va);
+		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+		m->md.pv_list_count--;
+		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
+			vm_page_flag_clear(m, PG_WRITEABLE);
+
+		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
+		free_pv_entry(pv);
+	}
+			
+	return rtval;
+}
+
+/*
+ * Create a pv entry for page at pa for
+ * (pmap, va).
+ */
+static void
+pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
+{
+	pv_entry_t pv;
+	pv = get_pv_entry();
+	pv->pv_va = va;
+	pv->pv_pmap = pmap;
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
+	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+	m->md.pv_list_count++;
+}
+
+/*
+ * pmap_remove_pte: do the things to unmap a page in a process
+ */
+static int
+pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va)
+{
+	pt_entry_t oldpte;
+	vm_page_t m;
+
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	oldpte = pte_load_clear(ptq);
+	if (oldpte & PG_W)
+		pmap->pm_stats.wired_count -= 1;
+	/*
+	 * Machines that don't support invlpg, also don't support
+	 * PG_G.
+	 */
+	if (oldpte & PG_G)
+		pmap_invalidate_page(kernel_pmap, va);
+	pmap->pm_stats.resident_count -= 1;
+	if (oldpte & PG_MANAGED) {
+		m = PHYS_TO_VM_PAGE(oldpte);
+		if (oldpte & PG_M) {
+#if defined(PMAP_DIAGNOSTIC)
+			if (pmap_nw_modified((pt_entry_t) oldpte)) {
+				printf(
+	"pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
+				    va, oldpte);
+			}
+#endif
+			if (pmap_track_modified(va))
+				vm_page_dirty(m);
+		}
+		if (oldpte & PG_A)
+			vm_page_flag_set(m, PG_REFERENCED);
+		return pmap_remove_entry(pmap, m, va);
+	} else {
+		return pmap_unuse_pt(pmap, va);
+	}
+}
+
+/*
+ * Remove a single page from a process address space
+ */
+static void
+pmap_remove_page(pmap_t pmap, vm_offset_t va)
+{
+	pt_entry_t *pte;
+
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0)
+		return;
+	pmap_remove_pte(pmap, pte, va);
+	pmap_invalidate_page(pmap, va);
+}
+
+/*
+ *	Remove the given range of addresses from the specified map.
+ *
+ *	It is assumed that the start and end are properly
+ *	rounded to the page size.
+ */
+void
+pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+	vm_offset_t pdnxt;
+	pd_entry_t ptpaddr;
+	pt_entry_t *pte;
+	int anyvalid;
+
+	/*
+	 * Perform an unsynchronized read.  This is, however, safe.
+	 */
+	if (pmap->pm_stats.resident_count == 0)
+		return;
+
+	anyvalid = 0;
+
+	vm_page_lock_queues();
+	sched_pin();
+	PMAP_LOCK(pmap);
+
+	/*
+	 * special handling of removing one page.  a very
+	 * common operation and easy to short circuit some
+	 * code.
+	 */
+	if ((sva + PAGE_SIZE == eva) && 
+	    ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
+		pmap_remove_page(pmap, sva);
+		goto out;
+	}
+
+	for (; sva < eva; sva = pdnxt) {
+		unsigned pdirindex;
+
+		/*
+		 * Calculate index for next page table.
+		 */
+		pdnxt = (sva + NBPDR) & ~PDRMASK;
+		if (pmap->pm_stats.resident_count == 0)
+			break;
+
+		pdirindex = sva >> PDRSHIFT;
+		ptpaddr = PT_GET(&pmap->pm_pdir[pdirindex]);
+
+		/*
+		 * Weed out invalid mappings. Note: we assume that the page
+		 * directory table is always allocated, and in kernel virtual.
+		 */
+		if (ptpaddr == 0)
+			continue;
+
+		/*
+		 * Check for large page.
+		 */
+		if ((ptpaddr & PG_PS) != 0) {
+			PT_CLEAR_VA(pmap->pm_pdir[pdirindex], TRUE);
+			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
+			anyvalid = 1;
+			continue;
+		}
+
+		/*
+		 * Limit our scan to either the end of the va represented
+		 * by the current page table page, or to the end of the
+		 * range being removed.
+		 */
+		if (pdnxt > eva)
+			pdnxt = eva;
+
+		for (; sva != pdnxt; sva += PAGE_SIZE) {
+			if ((pte = pmap_pte_quick(pmap, sva)) == NULL ||
+			    *pte == 0)
+				continue;
+			anyvalid = 1;
+			if (pmap_remove_pte(pmap, pte, sva))
+				break;
+		}
+	}
+out:
+	sched_unpin();
+	vm_page_unlock_queues();
+	if (anyvalid)
+		pmap_invalidate_all(pmap);
+	PMAP_UNLOCK(pmap);
+}
+
+/*
+ *	Routine:	pmap_remove_all
+ *	Function:
+ *		Removes this physical page from
+ *		all physical maps in which it resides.
+ *		Reflects back modify bits to the pager.
+ *
+ *	Notes:
+ *		Original versions of this routine were very
+ *		inefficient because they iteratively called
+ *		pmap_remove (slow...)
+ */
+
+void
+pmap_remove_all(vm_page_t m)
+{
+	pv_entry_t pv;
+	pt_entry_t *pte, tpte;
+
+#if defined(PMAP_DIAGNOSTIC)
+	/*
+	 * XXX This makes pmap_remove_all() illegal for non-managed pages!
+	 */
+	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
+		panic("pmap_remove_all: illegal for unmanaged page, va: 0x%x",
+		    VM_PAGE_TO_PHYS(m));
+	}
+#endif
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	sched_pin();
+	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
+		PMAP_LOCK(pv->pv_pmap);
+		pv->pv_pmap->pm_stats.resident_count--;
+		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+		tpte = pte_load_clear(pte);
+		if (tpte & PG_W)
+			pv->pv_pmap->pm_stats.wired_count--;
+		if (tpte & PG_A)
+			vm_page_flag_set(m, PG_REFERENCED);
+
+		/*
+		 * Update the vm_page_t clean and reference bits.
+		 */
+		if (tpte & PG_M) {
+#if defined(PMAP_DIAGNOSTIC)
+			if (pmap_nw_modified((pt_entry_t) tpte)) {
+				printf(
+	"pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
+				    pv->pv_va, tpte);
+			}
+#endif
+			if (pmap_track_modified(pv->pv_va))
+				vm_page_dirty(m);
+		}
+		pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
+		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
+		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+		m->md.pv_list_count--;
+		pmap_unuse_pt(pv->pv_pmap, pv->pv_va);
+		PMAP_UNLOCK(pv->pv_pmap);
+		free_pv_entry(pv);
+	}
+	vm_page_flag_clear(m, PG_WRITEABLE);
+	sched_unpin();
+}
+
+/*
+ *	Set the physical protection on the
+ *	specified range of this map as requested.
+ */
+void
+pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
+{
+	vm_offset_t pdnxt;
+	pd_entry_t ptpaddr;
+	int anychanged;
+
+	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
+		pmap_remove(pmap, sva, eva);
+		return;
+	}
+
+	if (prot & VM_PROT_WRITE)
+		return;
+
+	anychanged = 0;
+
+	vm_page_lock_queues();
+	sched_pin();
+	PMAP_LOCK(pmap);
+	for (; sva < eva; sva = pdnxt) {
+		unsigned obits, pbits, pdirindex;
+
+		pdnxt = (sva + NBPDR) & ~PDRMASK;
+
+		pdirindex = sva >> PDRSHIFT;
+		ptpaddr = PT_GET(&pmap->pm_pdir[pdirindex]);
+
+		/*
+		 * Weed out invalid mappings. Note: we assume that the page
+		 * directory table is always allocated, and in kernel virtual.
+		 */
+		if (ptpaddr == 0)
+			continue;
+
+		/*
+		 * Check for large page.
+		 */
+		if ((ptpaddr & PG_PS) != 0) {
+			pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
+			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
+			anychanged = 1;
+			continue;
+		}
+
+		if (pdnxt > eva)
+			pdnxt = eva;
+
+		for (; sva != pdnxt; sva += PAGE_SIZE) {
+			pt_entry_t *pte;
+			vm_page_t m;
+
+			if ((pte = pmap_pte_quick(pmap, sva)) == NULL)
+				continue;
+#ifdef notyet
+retry:
+#endif
+			/*
+			 * Regardless of whether a pte is 32 or 64 bits in
+			 * size, PG_RW, PG_A, and PG_M are among the least
+			 * significant 32 bits.
+			 */
+			obits = pbits = PT_GET(pte);
+			if (pbits & PG_MANAGED) {
+				m = NULL;
+				if (pbits & PG_A) {
+					m = PHYS_TO_VM_PAGE(pbits);
+					vm_page_flag_set(m, PG_REFERENCED);
+					pbits &= ~PG_A;
+				}
+				if ((pbits & PG_M) != 0 &&
+				    pmap_track_modified(sva)) {
+					if (m == NULL)
+						m = PHYS_TO_VM_PAGE(pbits);
+					vm_page_dirty(m);
+				}
+			}
+
+			pbits &= ~(PG_RW | PG_M);
+
+			if (pbits != obits) {
+#ifdef notyet
+				if (!atomic_cmpset_int((u_int *)pte, obits,
+				    pbits))
+					goto retry;
+#endif
+				PT_SET_VA(pte, pbits, FALSE);
+				anychanged = 1;
+			}
+		}
+	}
+	sched_unpin();
+	vm_page_unlock_queues();
+	if (anychanged)
+		pmap_invalidate_all(pmap);
+	PMAP_UNLOCK(pmap);
+}
+
+/*
+ *	Insert the given physical page (p) at
+ *	the specified virtual address (v) in the
+ *	target physical map with the protection requested.
+ *
+ *	If specified, the page will be wired down, meaning
+ *	that the related pte can not be reclaimed.
+ *
+ *	NB:  This is the only routine which MAY NOT lazy-evaluate
+ *	or lose information.  That is, this routine must actually
+ *	insert this page into the given map NOW.
+ */
+void
+pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
+	   boolean_t wired)
+{
+	vm_paddr_t pa;
+	register pt_entry_t *pte;
+	vm_paddr_t opa;
+	pt_entry_t origpte, newpte;
+	vm_page_t mpte, om;
+
+	va &= PG_FRAME;
+#ifdef PMAP_DIAGNOSTIC
+	if (va > VM_MAX_KERNEL_ADDRESS)
+		panic("pmap_enter: toobig");
+	if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
+		panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
+#endif
+
+	mpte = NULL;
+
+	vm_page_lock_queues();
+	PMAP_LOCK(pmap);
+	sched_pin();
+
+	/*
+	 * In the case that a page table page is not
+	 * resident, we are creating it here.
+	 */
+	if (va < VM_MAXUSER_ADDRESS) {
+		mpte = pmap_allocpte(pmap, va, M_WAITOK);
+	}
+#if 0 && defined(PMAP_DIAGNOSTIC)
+	else {
+		pd_entry_t *pdeaddr = pmap_pde(pmap, va);
+		origpte = PT_GET(pdeaddr);
+		if ((origpte & PG_V) == 0) { 
+			panic("pmap_enter: invalid kernel page table page, pdir=%p, pde=%p, va=%p\n",
+				pmap->pm_pdir[PTDPTDI], origpte, va);
+		}
+	}
+#endif
+
+	pte = pmap_pte_quick(pmap, va);
+
+	/*
+	 * Page Directory table entry not valid, we need a new PT page
+	 */
+	if (pte == NULL) {
+		panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x\n",
+			(uintmax_t)pmap->pm_pdir[PTDPTDI], va);
+	}
+
+	pa = VM_PAGE_TO_PHYS(m);
+	om = NULL;
+	origpte = PT_GET(pte);
+	opa = origpte & PG_FRAME;
+
+	if (origpte & PG_PS) {
+		/*
+		 * Yes, I know this will truncate upper address bits for PAE,
+		 * but I'm actually more interested in the lower bits
+		 */
+		printf("pmap_enter: va %p, pte %p, origpte %p\n",
+		    (void *)va, (void *)pte, (void *)(uintptr_t)origpte);
+		panic("pmap_enter: attempted pmap_enter on 4MB page");
+	}
+
+	/*
+	 * Mapping has not changed, must be protection or wiring change.
+	 */
+	if (origpte && (opa == pa)) {
+		/*
+		 * Wiring change, just update stats. We don't worry about
+		 * wiring PT pages as they remain resident as long as there
+		 * are valid mappings in them. Hence, if a user page is wired,
+		 * the PT page will be also.
+		 */
+		if (wired && ((origpte & PG_W) == 0))
+			pmap->pm_stats.wired_count++;
+		else if (!wired && (origpte & PG_W))
+			pmap->pm_stats.wired_count--;
+
+#if defined(PMAP_DIAGNOSTIC)
+		if (pmap_nw_modified((pt_entry_t) origpte)) {
+			printf(
+	"pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
+			    va, origpte);
+		}
+#endif
+
+		/*
+		 * Remove extra pte reference
+		 */
+		if (mpte)
+			mpte->wire_count--;
+
+		/*
+		 * We might be turning off write access to the page,
+		 * so we go ahead and sense modify status.
+		 */
+		if (origpte & PG_MANAGED) {
+			om = m;
+			pa |= PG_MANAGED;
+		}
+		goto validate;
+	} 
+	/*
+	 * Mapping has changed, invalidate old range and fall through to
+	 * handle validating new mapping.
+	 */
+	if (opa) {
+		int err;
+		if (origpte & PG_W)
+			pmap->pm_stats.wired_count--;
+		if (origpte & PG_MANAGED) {
+			om = PHYS_TO_VM_PAGE(opa);
+			err = pmap_remove_entry(pmap, om, va);
+		} else
+			err = pmap_unuse_pt(pmap, va);
+		if (err)
+			panic("pmap_enter: pte vanished, va: 0x%x", va);
+	} else
+		pmap->pm_stats.resident_count++;
+
+	/*
+	 * Enter on the PV list if part of our managed memory. Note that we
+	 * raise IPL while manipulating pv_table since pmap_enter can be
+	 * called at interrupt time.
+	 */
+	if (pmap_initialized && 
+	    (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
+		pmap_insert_entry(pmap, va, m);
+		pa |= PG_MANAGED;
+	}
+
+	/*
+	 * Increment counters
+	 */
+	if (wired)
+		pmap->pm_stats.wired_count++;
+
+validate:
+	/*
+	 * Now validate mapping with desired protection/wiring.
+	 */
+	newpte = (pt_entry_t)(pa | PG_V);
+	if ((prot & VM_PROT_WRITE) != 0)
+		newpte |= PG_RW;
+	if (wired)
+		newpte |= PG_W;
+	if (va < VM_MAXUSER_ADDRESS)
+		newpte |= PG_U;
+	if (pmap == kernel_pmap)
+		newpte |= pgeflag;
+
+	/*
+	 * if the mapping or permission bits are different, we need
+	 * to update the pte.
+	 */
+	if ((origpte & ~(PG_M|PG_A)) != newpte) {
+		if (origpte & PG_MANAGED) {
+			origpte = PT_GET(pte);
+			PT_SET_VA(pte, newpte | PG_A, TRUE);
+			if ((origpte & PG_M) && pmap_track_modified(va))
+				vm_page_dirty(om);
+			if (origpte & PG_A)
+				vm_page_flag_set(om, PG_REFERENCED);
+		} else
+			PT_SET_VA(pte, newpte | PG_A, TRUE);
+		if (origpte) {
+			pmap_invalidate_page(pmap, va);
+		}
+	}
+	sched_unpin();
+	vm_page_unlock_queues();
+	PMAP_UNLOCK(pmap);
+}
+
+/*
+ * this code makes some *MAJOR* assumptions:
+ * 1. Current pmap & pmap exists.
+ * 2. Not wired.
+ * 3. Read access.
+ * 4. No page table pages.
+ * 5. Tlbflush is deferred to calling procedure.
+ * 6. Page IS managed.
+ * but is *MUCH* faster than pmap_enter...
+ */
+
+vm_page_t
+pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
+{
+	pt_entry_t *pte;
+	vm_paddr_t pa;
+
+	vm_page_lock_queues();
+	PMAP_LOCK(pmap);
+
+	/*
+	 * In the case that a page table page is not
+	 * resident, we are creating it here.
+	 */
+	if (va < VM_MAXUSER_ADDRESS) {
+		unsigned ptepindex;
+		pd_entry_t ptepa;
+
+		/*
+		 * Calculate pagetable page index
+		 */
+		ptepindex = va >> PDRSHIFT;
+		if (mpte && (mpte->pindex == ptepindex)) {
+			mpte->wire_count++;
+		} else {
+retry:
+			/*
+			 * Get the page directory entry
+			 */
+			ptepa = PT_GET(&pmap->pm_pdir[ptepindex]);
+
+			/*
+			 * If the page table page is mapped, we just increment
+			 * the hold count, and activate it.
+			 */
+			if (ptepa) {
+				if (ptepa & PG_PS)
+					panic("pmap_enter_quick: unexpected mapping into 4MB page");
+				mpte = PHYS_TO_VM_PAGE(ptepa);
+				mpte->wire_count++;
+			} else {
+				mpte = _pmap_allocpte(pmap, ptepindex,
+				    M_WAITOK);
+				if (mpte == NULL)
+					goto retry;
+			}
+		}
+	} else {
+		mpte = NULL;
+	}
+
+	/*
+	 * This call to vtopte makes the assumption that we are
+	 * entering the page into the current pmap.  In order to support
+	 * quick entry into any pmap, one would likely use pmap_pte_quick.
+	 * But that isn't as quick as vtopte.
+	 */
+	pte = vtopte(va);
+	if (PT_GET(pte)) {
+		if (mpte != NULL) {
+			pmap_unwire_pte_hold(pmap, mpte);
+			mpte = NULL;
+		}
+		goto out;
+	}
+
+	/*
+	 * Enter on the PV list if part of our managed memory. Note that we
+	 * raise IPL while manipulating pv_table since pmap_enter can be
+	 * called at interrupt time.
+	 */
+	if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
+		pmap_insert_entry(pmap, va, m);
+
+	/*
+	 * Increment counters
+	 */
+	pmap->pm_stats.resident_count++;
+
+	pa = VM_PAGE_TO_PHYS(m);
+
+	/*
+	 * Now validate mapping with RO protection
+	 */
+	if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
+		PT_SET(va, pa | PG_V | PG_U, TRUE);
+	else
+		PT_SET(va, pa | PG_V | PG_U | PG_MANAGED, TRUE);
+out:
+	vm_page_unlock_queues();
+	PMAP_UNLOCK(pmap);
+	return mpte;
+}
+
+/*
+ * Make a temporary mapping for a physical address.  This is only intended
+ * to be used for panic dumps.
+ */
+void *
+pmap_kenter_temporary(vm_paddr_t pa, int i)
+{
+	vm_offset_t va;
+
+	va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
+	pmap_kenter(va, pa);
+#ifndef I386_CPU
+	invlpg(va);
+#else
+	invltlb();
+#endif
+	return ((void *)crashdumpmap);
+}
+
+/*
+ * This code maps large physical mmap regions into the
+ * processor address space.  Note that some shortcuts
+ * are taken, but the code works.
+ */
+void
+pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
+		    vm_object_t object, vm_pindex_t pindex,
+		    vm_size_t size)
+{
+	vm_page_t p;
+
+	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
+	KASSERT(object->type == OBJT_DEVICE,
+	    ("pmap_object_init_pt: non-device object"));
+	if (pseflag && 
+	    ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) {
+		int i;
+		vm_page_t m[1];
+		unsigned int ptepindex;
+		int npdes;
+		pd_entry_t ptepa;
+
+		PMAP_LOCK(pmap);
+		if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
+			goto out;
+		PMAP_UNLOCK(pmap);
+retry:
+		p = vm_page_lookup(object, pindex);
+		if (p != NULL) {
+			vm_page_lock_queues();
+			if (vm_page_sleep_if_busy(p, FALSE, "init4p"))
+				goto retry;
+		} else {
+			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
+			if (p == NULL)
+				return;
+			m[0] = p;
+
+			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
+				vm_page_lock_queues();
+				vm_page_free(p);
+				vm_page_unlock_queues();
+				return;
+			}
+
+			p = vm_page_lookup(object, pindex);
+			vm_page_lock_queues();
+			vm_page_wakeup(p);
+		}
+		vm_page_unlock_queues();
+
+		ptepa = VM_PAGE_TO_PHYS(p);
+		if (ptepa & (NBPDR - 1))
+			return;
+
+		p->valid = VM_PAGE_BITS_ALL;
+
+		PMAP_LOCK(pmap);
+		pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
+		npdes = size >> PDRSHIFT;
+		for(i = 0; i < npdes; i++) {
+			PT_SET_VA(&pmap->pm_pdir[ptepindex],
+			    ptepa | PG_U | PG_RW | PG_V | PG_PS, FALSE);
+			ptepa += NBPDR;
+			ptepindex += 1;
+		}
+		pmap_invalidate_all(pmap);
+out:
+		PMAP_UNLOCK(pmap);
+	}
+}
+
+void
+pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len)
+{
+	int i, npages = round_page(len) >> PAGE_SHIFT;
+	for (i = 0; i < npages; i++) {
+		pt_entry_t *pte;
+		pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
+		PT_SET_MA(va + i*PAGE_SIZE, *pte & ~(PG_RW|PG_M), FALSE);
+		PMAP_MARK_PRIV(xpmap_mtop(*pte));
+		pmap_pte_release(pte);
+	}
+	PT_UPDATES_FLUSH();
+}
+
+void
+pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len)
+{
+	int i, npages = round_page(len) >> PAGE_SHIFT;
+	for (i = 0; i < npages; i++) {
+		pt_entry_t *pte;
+		pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
+		PMAP_MARK_UNPRIV(xpmap_mtop(*pte));
+		PT_SET_MA(va + i*PAGE_SIZE, *pte | (PG_RW|PG_M), FALSE);
+		pmap_pte_release(pte);
+	}
+	PT_UPDATES_FLUSH();
+}
+
+/*
+ *	Routine:	pmap_change_wiring
+ *	Function:	Change the wiring attribute for a map/virtual-address
+ *			pair.
+ *	In/out conditions:
+ *			The mapping must already exist in the pmap.
+ */
+void
+pmap_change_wiring(pmap, va, wired)
+	register pmap_t pmap;
+	vm_offset_t va;
+	boolean_t wired;
+{
+	register pt_entry_t *pte;
+
+	PMAP_LOCK(pmap);
+	pte = pmap_pte(pmap, va);
+
+	if (wired && !pmap_pte_w(pte))
+		pmap->pm_stats.wired_count++;
+	else if (!wired && pmap_pte_w(pte))
+		pmap->pm_stats.wired_count--;
+
+	/*
+	 * Wiring is not a hardware characteristic so there is no need to
+	 * invalidate TLB.
+	 */
+	pmap_pte_set_w(pte, wired);
+	pmap_pte_release(pte);
+	PMAP_UNLOCK(pmap);
+}
+
+
+
+/*
+ *	Copy the range specified by src_addr/len
+ *	from the source map to the range dst_addr/len
+ *	in the destination map.
+ *
+ *	This routine is only advisory and need not do anything.
+ */
+
+void
+pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
+	  vm_offset_t src_addr)
+{
+	vm_offset_t addr;
+	vm_offset_t end_addr = src_addr + len;
+	vm_offset_t pdnxt;
+	vm_page_t m;
+
+	if (dst_addr != src_addr)
+		return;
+
+	if (!pmap_is_current(src_pmap))
+		return;
+
+	vm_page_lock_queues();
+	if (dst_pmap < src_pmap) {
+		PMAP_LOCK(dst_pmap);
+		PMAP_LOCK(src_pmap);
+	} else {
+		PMAP_LOCK(src_pmap);
+		PMAP_LOCK(dst_pmap);
+	}
+	sched_pin();
+	for (addr = src_addr; addr < end_addr; addr = pdnxt) {
+		pt_entry_t *src_pte, *dst_pte;
+		vm_page_t dstmpte, srcmpte;
+		pd_entry_t srcptepaddr;
+		unsigned ptepindex;
+
+		if (addr >= UPT_MIN_ADDRESS)
+			panic("pmap_copy: invalid to pmap_copy page tables");
+
+		/*
+		 * Don't let optional prefaulting of pages make us go
+		 * way below the low water mark of free pages or way
+		 * above high water mark of used pv entries.
+		 */
+		if (cnt.v_free_count < cnt.v_free_reserved ||
+		    pv_entry_count > pv_entry_high_water)
+			break;
+		
+		pdnxt = (addr + NBPDR) & ~PDRMASK;
+		ptepindex = addr >> PDRSHIFT;
+
+		srcptepaddr = PT_GET(&src_pmap->pm_pdir[ptepindex]);
+		if (srcptepaddr == 0)
+			continue;
+			
+		if (srcptepaddr & PG_PS) {
+			if (dst_pmap->pm_pdir[ptepindex] == 0) {
+				PT_SET_VA(&dst_pmap->pm_pdir[ptepindex], srcptepaddr, TRUE);
+				dst_pmap->pm_stats.resident_count +=
+				    NBPDR / PAGE_SIZE;
+			}
+			continue;
+		}
+
+		srcmpte = PHYS_TO_VM_PAGE(srcptepaddr);
+		if (srcmpte->wire_count == 0)
+			panic("pmap_copy: source page table page is unused");
+
+		if (pdnxt > end_addr)
+			pdnxt = end_addr;
+
+		src_pte = vtopte(addr);
+		while (addr < pdnxt) {
+			pt_entry_t ptetemp;
+			ptetemp = PT_GET(src_pte);
+			/*
+			 * we only virtual copy managed pages
+			 */
+			if ((ptetemp & PG_MANAGED) != 0) {
+				/*
+				 * We have to check after allocpte for the
+				 * pte still being around...  allocpte can
+				 * block.
+				 */
+				dstmpte = pmap_allocpte(dst_pmap, addr,
+				    M_NOWAIT);
+				if (dstmpte == NULL)
+					break;
+				dst_pte = pmap_pte_quick(dst_pmap, addr);
+				if (*dst_pte == 0) {
+					/*
+					 * Clear the modified and
+					 * accessed (referenced) bits
+					 * during the copy.
+					 */
+					m = PHYS_TO_VM_PAGE(ptetemp);
+					PT_SET_VA(dst_pte, ptetemp & ~(PG_M | PG_A), FALSE);
+					dst_pmap->pm_stats.resident_count++;
+					pmap_insert_entry(dst_pmap, addr, m);
+	 			} else
+					pmap_unwire_pte_hold(dst_pmap, dstmpte);
+				if (dstmpte->wire_count >= srcmpte->wire_count)
+					break;
+			}
+			addr += PAGE_SIZE;
+			src_pte++;
+		}
+	}
+	PT_UPDATES_FLUSH();
+	sched_unpin();
+	vm_page_unlock_queues();
+	PMAP_UNLOCK(src_pmap);
+	PMAP_UNLOCK(dst_pmap);
+}	
+
+static __inline void
+pagezero(void *page)
+{
+#if defined(I686_CPU)
+	if (cpu_class == CPUCLASS_686) {
+#if defined(CPU_ENABLE_SSE)
+		if (cpu_feature & CPUID_SSE2)
+			sse2_pagezero(page);
+		else
+#endif
+			i686_pagezero(page);
+	} else
+#endif
+		bzero(page, PAGE_SIZE);
+}
+
+/*
+ *	pmap_zero_page zeros the specified hardware page by mapping 
+ *	the page into KVM and using bzero to clear its contents.
+ */
+void
+pmap_zero_page(vm_page_t m)
+{
+
+	mtx_lock(&CMAPCADDR12_lock);
+	if (*CMAP2)
+		panic("pmap_zero_page: CMAP2 busy");
+	sched_pin();
+	PT_SET_VA(CMAP2, PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M, FALSE);
+	invlcaddr(CADDR2);
+	pagezero(CADDR2);
+	PT_CLEAR_VA(CMAP2, TRUE);
+	sched_unpin();
+	mtx_unlock(&CMAPCADDR12_lock);
+}
+
+/*
+ *	pmap_zero_page_area zeros the specified hardware page by mapping 
+ *	the page into KVM and using bzero to clear its contents.
+ *
+ *	off and size may not cover an area beyond a single hardware page.
+ */
+void
+pmap_zero_page_area(vm_page_t m, int off, int size)
+{
+
+	mtx_lock(&CMAPCADDR12_lock);
+	if (*CMAP2)
+		panic("pmap_zero_page: CMAP2 busy");
+	sched_pin();
+	PT_SET_VA(CMAP2, PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M, FALSE);
+	invlcaddr(CADDR2);
+	if (off == 0 && size == PAGE_SIZE) 
+		pagezero(CADDR2);
+	else
+		bzero((char *)CADDR2 + off, size);
+	PT_CLEAR_VA(CMAP2, TRUE);
+	sched_unpin();
+	mtx_unlock(&CMAPCADDR12_lock);
+}
+
+/*
+ *	pmap_zero_page_idle zeros the specified hardware page by mapping 
+ *	the page into KVM and using bzero to clear its contents.  This
+ *	is intended to be called from the vm_pagezero process only and
+ *	outside of Giant.
+ */
+void
+pmap_zero_page_idle(vm_page_t m)
+{
+
+	if (*CMAP3)
+		panic("pmap_zero_page: CMAP3 busy");
+	sched_pin();
+	PT_SET_VA(CMAP3, PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M, TRUE);
+	invlcaddr(CADDR3);
+	pagezero(CADDR3);
+	PT_CLEAR_VA(CMAP3, TRUE);
+	sched_unpin();
+}
+
+/*
+ *	pmap_copy_page copies the specified (machine independent)
+ *	page by mapping the page into virtual memory and using
+ *	bcopy to copy the page, one machine dependent page at a
+ *	time.
+ */
+void
+pmap_copy_page(vm_page_t src, vm_page_t dst)
+{
+
+	mtx_lock(&CMAPCADDR12_lock);
+	if (*CMAP1)
+		panic("pmap_copy_page: CMAP1 busy");
+	if (*CMAP2)
+		panic("pmap_copy_page: CMAP2 busy");
+	sched_pin();
+#ifdef I386_CPU
+	invltlb();
+#else
+	invlpg((u_int)CADDR1);
+	invlpg((u_int)CADDR2);
+#endif
+	PT_SET_VA(CMAP1, PG_V | VM_PAGE_TO_PHYS(src) | PG_A, FALSE);
+	PT_SET_VA(CMAP2, PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M, TRUE);
+
+	bcopy(CADDR1, CADDR2, PAGE_SIZE);
+	PT_CLEAR_VA(CMAP1, FALSE);
+	PT_CLEAR_VA(CMAP2, TRUE);
+	sched_unpin();
+	mtx_unlock(&CMAPCADDR12_lock);
+}
+
+/*
+ * Returns true if the pmap's pv is one of the first
+ * 16 pvs linked to from this page.  This count may
+ * be changed upwards or downwards in the future; it
+ * is only necessary that true be returned for a small
+ * subset of pmaps for proper page aging.
+ */
+boolean_t
+pmap_page_exists_quick(pmap, m)
+	pmap_t pmap;
+	vm_page_t m;
+{
+	pv_entry_t pv;
+	int loops = 0;
+
+	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
+		return FALSE;
+
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+		if (pv->pv_pmap == pmap) {
+			return TRUE;
+		}
+		loops++;
+		if (loops >= 16)
+			break;
+	}
+	return (FALSE);
+}
+
+#define PMAP_REMOVE_PAGES_CURPROC_ONLY
+/*
+ * Remove all pages from specified address space
+ * this aids process exit speeds.  Also, this code
+ * is special cased for current process only, but
+ * can have the more generic (and slightly slower)
+ * mode enabled.  This is much faster than pmap_remove
+ * in the case of running down an entire address space.
+ */
+void
+pmap_remove_pages(pmap, sva, eva)
+	pmap_t pmap;
+	vm_offset_t sva, eva;
+{
+	pt_entry_t *pte, tpte;
+	vm_page_t m;
+	pv_entry_t pv, npv;
+	
+#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
+	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
+		printf("warning: pmap_remove_pages called with non-current pmap\n");
+		return;
+	}
+#endif
+	vm_page_lock_queues();
+	PMAP_LOCK(pmap);
+	sched_pin();
+
+	for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
+		if (pv->pv_va >= eva || pv->pv_va < sva) {
+			npv = TAILQ_NEXT(pv, pv_plist);
+			continue;
+		}
+
+#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
+		pte = vtopte(pv->pv_va);
+#else
+		pte = pmap_pte_quick(pmap, pv->pv_va);
+#endif
+		tpte = PT_GET(pte);
+
+		if (tpte == 0) {
+			printf("TPTE at %p  IS ZERO @ VA %08x\n",
+							pte, pv->pv_va);
+			panic("bad pte");
+		}
+
+/*
+ * We cannot remove wired pages from a process' mapping at this time
+ */
+		if (tpte & PG_W) {
+			npv = TAILQ_NEXT(pv, pv_plist);
+			continue;
+		}
+
+		m = PHYS_TO_VM_PAGE(tpte);
+		KASSERT(m->phys_addr == (tpte & PG_FRAME),
+		    ("vm_page_t %p phys_addr mismatch %016jx %016jx",
+		    m, (uintmax_t)m->phys_addr, (uintmax_t)tpte));
+
+		KASSERT(m < &vm_page_array[vm_page_array_size],
+			("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte));
+
+		pmap->pm_stats.resident_count--;
+
+		pte_clear(pte);
+
+		/*
+		 * Update the vm_page_t clean and reference bits.
+		 */
+		if (tpte & PG_M) {
+			vm_page_dirty(m);
+		}
+
+		npv = TAILQ_NEXT(pv, pv_plist);
+		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
+
+		m->md.pv_list_count--;
+		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+		if (TAILQ_EMPTY(&m->md.pv_list))
+			vm_page_flag_clear(m, PG_WRITEABLE);
+
+		pmap_unuse_pt(pmap, pv->pv_va);
+		free_pv_entry(pv);
+	}
+	sched_unpin();
+	pmap_invalidate_all(pmap);
+	PMAP_UNLOCK(pmap);
+	vm_page_unlock_queues();
+}
+
+/*
+ *	pmap_is_modified:
+ *
+ *	Return whether or not the specified physical page was modified
+ *	in any physical maps.
+ */
+boolean_t
+pmap_is_modified(vm_page_t m)
+{
+	pv_entry_t pv;
+	pt_entry_t *pte;
+	boolean_t rv;
+
+	rv = FALSE;
+	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
+		return (rv);
+
+	sched_pin();
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+		/*
+		 * if the bit being tested is the modified bit, then
+		 * mark clean_map and ptes as never
+		 * modified.
+		 */
+		if (!pmap_track_modified(pv->pv_va))
+			continue;
+#if defined(PMAP_DIAGNOSTIC)
+		if (!pv->pv_pmap) {
+			printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
+			continue;
+		}
+#endif
+		PMAP_LOCK(pv->pv_pmap);
+		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+		rv = (*pte & PG_M) != 0;
+		PMAP_UNLOCK(pv->pv_pmap);
+		if (rv)
+			break;
+	}
+	sched_unpin();
+	return (rv);
+}
+
+/*
+ *	pmap_is_prefaultable:
+ *
+ *	Return whether or not the specified virtual address is elgible
+ *	for prefault.
+ */
+boolean_t
+pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
+{
+	pt_entry_t *pte;
+	boolean_t rv;
+
+	rv = FALSE;
+
+	return (rv);
+	PMAP_LOCK(pmap);
+	if (pmap_pde(pmap, addr)) {
+		pte = vtopte(addr);
+		rv = *pte == 0;
+	}
+	PMAP_UNLOCK(pmap);
+	return (rv);
+}
+
+/*
+ *	Clear the given bit in each of the given page's ptes.  The bit is
+ *	expressed as a 32-bit mask.  Consequently, if the pte is 64 bits in
+ *	size, only a bit within the least significant 32 can be cleared.
+ */
+static __inline void
+pmap_clear_ptes(vm_page_t m, int bit)
+{
+	register pv_entry_t pv;
+	pt_entry_t pbits, *pte;
+
+	if (!pmap_initialized || (m->flags & PG_FICTITIOUS) ||
+	    (bit == PG_RW && (m->flags & PG_WRITEABLE) == 0))
+		return;
+
+	sched_pin();
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	/*
+	 * Loop over all current mappings setting/clearing as appropos If
+	 * setting RO do we need to clear the VAC?
+	 */
+	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+		/*
+		 * don't write protect pager mappings
+		 */
+		if (bit == PG_RW) {
+			if (!pmap_track_modified(pv->pv_va))
+				continue;
+		}
+
+#if defined(PMAP_DIAGNOSTIC)
+		if (!pv->pv_pmap) {
+			printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
+			continue;
+		}
+#endif
+
+		PMAP_LOCK(pv->pv_pmap);
+		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+#ifdef notyet
+retry:
+#endif
+		pbits = PT_GET(pte);
+		if (pbits & bit) {
+			if (bit == PG_RW) {
+				/*
+				 * Regardless of whether a pte is 32 or 64 bits
+				 * in size, PG_RW and PG_M are among the least
+				 * significant 32 bits.
+				 */
+#ifdef notyet
+				if (!atomic_cmpset_int((u_int *)pte, pbits,
+				    pbits & ~(PG_RW | PG_M)))
+					goto retry;
+#endif
+				PT_SET_VA(pte, pbits & ~(PG_M|PG_RW), TRUE);
+
+
+				if (pbits & PG_M) {
+					vm_page_dirty(m);
+				}
+			} else {
+#ifdef notyet
+				atomic_clear_int((u_int *)pte, bit);
+#endif
+				/* XXX */
+				PT_SET_VA(pte, pbits & ~bit, TRUE);
+			}
+			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
+		}
+		PMAP_UNLOCK(pv->pv_pmap);
+	}
+	if (bit == PG_RW)
+		vm_page_flag_clear(m, PG_WRITEABLE);
+	sched_unpin();
+}
+
+/*
+ *      pmap_page_protect:
+ *
+ *      Lower the permission for all mappings to a given page.
+ */
+void
+pmap_page_protect(vm_page_t m, vm_prot_t prot)
+{
+	if ((prot & VM_PROT_WRITE) == 0) {
+		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
+			pmap_clear_ptes(m, PG_RW);
+		} else {
+			pmap_remove_all(m);
+		}
+	}
+}
+
+/*
+ *	pmap_ts_referenced:
+ *
+ *	Return a count of reference bits for a page, clearing those bits.
+ *	It is not necessary for every reference bit to be cleared, but it
+ *	is necessary that 0 only be returned when there are truly no
+ *	reference bits set.
+ *
+ *	XXX: The exact number of bits to check and clear is a matter that
+ *	should be tested and standardized at some point in the future for
+ *	optimal aging of shared pages.
+ */
+int
+pmap_ts_referenced(vm_page_t m)
+{
+	register pv_entry_t pv, pvf, pvn;
+	pt_entry_t *pte;
+	pt_entry_t v;
+	int rtval = 0;
+
+	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
+		return (rtval);
+
+	sched_pin();
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
+
+		pvf = pv;
+
+		do {
+			pvn = TAILQ_NEXT(pv, pv_list);
+
+			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+
+			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+
+			if (!pmap_track_modified(pv->pv_va))
+				continue;
+
+			PMAP_LOCK(pv->pv_pmap);
+			pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+
+			if (pte && ((v = PT_GET(pte)) & PG_A) != 0) {
+#ifdef notyet
+				atomic_clear_int((u_int *)pte, PG_A);
+#endif
+				PT_SET_VA(pte, v & ~PG_A, FALSE);
+				pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
+
+				rtval++;
+				if (rtval > 4) {
+					PMAP_UNLOCK(pv->pv_pmap);
+					break;
+				}
+			}
+			PMAP_UNLOCK(pv->pv_pmap);
+		} while ((pv = pvn) != NULL && pv != pvf);
+	}
+	sched_unpin();
+
+	return (rtval);
+}
+
+/*
+ *	Clear the modify bits on the specified physical page.
+ */
+void
+pmap_clear_modify(vm_page_t m)
+{
+	pmap_clear_ptes(m, PG_M);
+}
+
+/*
+ *	pmap_clear_reference:
+ *
+ *	Clear the reference bit on the specified physical page.
+ */
+void
+pmap_clear_reference(vm_page_t m)
+{
+	pmap_clear_ptes(m, PG_A);
+}
+
+/*
+ * Miscellaneous support routines follow
+ */
+
+/*
+ * Map a set of physical memory pages into the kernel virtual
+ * address space. Return a pointer to where it is mapped. This
+ * routine is intended to be used for mapping device memory,
+ * NOT real memory.
+ */
+void *
+pmap_mapdev(pa, size)
+	vm_paddr_t pa;
+	vm_size_t size;
+{
+	vm_offset_t va, tmpva, offset;
+
+	offset = pa & PAGE_MASK;
+	size = roundup(offset + size, PAGE_SIZE);
+	pa = pa & PG_FRAME;
+
+	if (pa < KERNLOAD && pa + size <= KERNLOAD)
+		va = KERNBASE + pa;
+	else
+		va = kmem_alloc_nofault(kernel_map, size);
+	if (!va)
+		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
+
+	for (tmpva = va; size > 0; ) {
+		PT_SET(tmpva, pa | PG_RW | PG_V | pgeflag, FALSE);
+		size -= PAGE_SIZE;
+		tmpva += PAGE_SIZE;
+		pa += PAGE_SIZE;
+	}
+	pmap_invalidate_range(kernel_pmap, va, tmpva);
+	return ((void *)(va + offset));
+}
+
+void
+pmap_unmapdev(va, size)
+	vm_offset_t va;
+	vm_size_t size;
+{
+	vm_offset_t base, offset, tmpva;
+	panic("unused");
+	if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD)
+		return;
+	base = va & PG_FRAME;
+	offset = va & PAGE_MASK;
+	size = roundup(offset + size, PAGE_SIZE);
+	for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE)
+		PT_CLEAR(tmpva, FALSE);
+	pmap_invalidate_range(kernel_pmap, va, tmpva);
+	kmem_free(kernel_map, base, size);
+}
+
+/*
+ * perform the pmap work for mincore
+ */
+int
+pmap_mincore(pmap, addr)
+	pmap_t pmap;
+	vm_offset_t addr;
+{
+	pt_entry_t *ptep, pte;
+	vm_page_t m;
+	int val = 0;
+	
+	PMAP_LOCK(pmap);
+	ptep = pmap_pte(pmap, addr);
+	pte = (ptep != NULL) ? PT_GET(ptep) : 0;
+	pmap_pte_release(ptep);
+	PMAP_UNLOCK(pmap);
+
+	if (pte != 0) {
+		vm_paddr_t pa;
+
+		val = MINCORE_INCORE;
+		if ((pte & PG_MANAGED) == 0)
+			return val;
+
+		pa = pte & PG_FRAME;
+
+		m = PHYS_TO_VM_PAGE(pa);
+
+		/*
+		 * Modified by us
+		 */
+		if (pte & PG_M)
+			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
+		else {
+			/*
+			 * Modified by someone else
+			 */
+			vm_page_lock_queues();
+			if (m->dirty || pmap_is_modified(m))
+				val |= MINCORE_MODIFIED_OTHER;
+			vm_page_unlock_queues();
+		}
+		/*
+		 * Referenced by us
+		 */
+		if (pte & PG_A)
+			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
+		else {
+			/*
+			 * Referenced by someone else
+			 */
+			vm_page_lock_queues();
+			if ((m->flags & PG_REFERENCED) ||
+			    pmap_ts_referenced(m)) {
+				val |= MINCORE_REFERENCED_OTHER;
+				vm_page_flag_set(m, PG_REFERENCED);
+			}
+			vm_page_unlock_queues();
+		}
+	} 
+	return val;
+}
+
+void
+pmap_activate(struct thread *td)
+{
+	struct proc *p = td->td_proc;
+	pmap_t	pmap, oldpmap;
+	u_int32_t  cr3;
+
+	critical_enter();
+	pmap = vmspace_pmap(td->td_proc->p_vmspace);
+	oldpmap = PCPU_GET(curpmap);
+#if defined(SMP)
+	atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
+	atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
+#else
+	oldpmap->pm_active &= ~1;
+	pmap->pm_active |= 1;
+#endif
+#ifdef PAE
+	cr3 = vtophys(pmap->pm_pdpt);
+#else
+	cr3 = vtophys(pmap->pm_pdir);
+#endif
+	/* XXXKSE this is wrong.
+	 * pmap_activate is for the current thread on the current cpu
+	 */
+	if (p->p_flag & P_SA) {
+		/* Make sure all other cr3 entries are updated. */
+		/* what if they are running?  XXXKSE (maybe abort them) */
+		FOREACH_THREAD_IN_PROC(p, td) {
+			td->td_pcb->pcb_cr3 = cr3;
+		}
+	} else {
+		td->td_pcb->pcb_cr3 = cr3;
+	}
+	load_cr3(cr3);
+	PCPU_SET(curpmap, pmap);
+	critical_exit();
+}
+
+vm_offset_t
+pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
+{
+
+	if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
+		return addr;
+	}
+
+	addr = (addr + PDRMASK) & ~PDRMASK;
+	return addr;
+}
+
+
+#if defined(PMAP_DEBUG)
+extern int init_first;
+void
+pmap_ref(pt_entry_t *pte, unsigned long ma)
+{
+	int ind, i, count;
+	unsigned long ebp_prev, eip_prev, oma = 0;
+	unsigned long pa = xpmap_mtop(ma);
+
+	/* are we to the point where mappings are set up? */
+	if (!init_first)
+		return;
+	
+	ind = pa >> PAGE_SHIFT;
+	/* privileged? */
+	if ((pa & PG_RW) && pteinfo_list[ind].pt_ref & (1 << 31))
+		BKPT;
+
+	/* is MA already mapped ? */
+	oma = *pte;
+	
+	/* old reference being lost */
+	if (oma && (oma & PG_RW) && ((oma & PG_FRAME) != (ma & PG_FRAME)))
+		pmap_dec_ref(oma);
+
+	/* ignore RO mappings - unless were downgrading */
+	if (!(ma & PG_RW)) {
+		/* downgrading mapping - lose reference */
+		if (((oma & PG_FRAME) == (ma & PG_FRAME)) &&
+		    (oma & PG_RW))
+			pmap_dec_ref(ma);
+		return;
+	}
+
+	if (pteinfo_list[ind].pt_ref < 0)
+		BKPT;
+
+
+	/* same address and not upgrading the mapping */
+	if (((oma & PG_FRAME) == (ma & PG_FRAME)) &&
+	    (oma & PG_RW))
+		return;
+
+	count = pteinfo_list[ind].pt_ref;
+	__asm__("movl %%ebp, %0" : "=r" (ebp_prev));
+	for (i = 0; i < XPQ_CALL_DEPTH && ebp_prev > KERNBASE; i++) {
+		__asm__("movl 4(%1), %0" : "=r" (eip_prev) : "r" (ebp_prev)); 
+		pteinfo_list[ind].pt_eip[count%XPQ_CALL_COUNT][i] = eip_prev;
+		__asm__("movl (%1), %0" : "=r" (ebp_prev) : "r" (ebp_prev)); 
+	}
+
+	pteinfo_list[ind].pt_ref++;
+
+}
+
+void
+pmap_dec_ref(unsigned long ma)
+{
+	unsigned long pa;
+	int ind, count;
+
+	if (!ma) BKPT;
+
+	pa = xpmap_mtop(ma);
+
+	ind = pa >> PAGE_SHIFT;
+	if (pteinfo_list[ind].pt_ref & (1 << 31)) BKPT;
+
+	count = pteinfo_list[ind].pt_ref & ~(1 << 31);
+	if (count < 1) {
+		printk("ma: %lx has ref count of 0\n", ma);
+		BKPT;
+	}
+	pteinfo_list[ind].pt_ref = (--count | (pteinfo_list[ind].pt_ref & (1 << 31)));
+
+}
+
+void
+pmap_dec_ref_page(vm_page_t m)
+{
+	unsigned long *pt;
+	int i;
+	mtx_lock(&CMAPCADDR12_lock);
+	if (*CMAP2)
+		panic("pmap_zero_page: CMAP2 busy");
+	sched_pin();
+	PT_SET_VA(CMAP2, PG_V | VM_PAGE_TO_PHYS(m) | PG_A | PG_M, FALSE);
+	invlcaddr(CADDR2);
+	pt = (unsigned long *)CADDR2;
+	for (i = 0; i < 1024; i++)
+		if (pt[i] & PG_RW)
+			pmap_dec_ref(xpmap_ptom(pt[i]));
+	PT_CLEAR_VA(CMAP2, TRUE);
+	sched_unpin();
+	mtx_unlock(&CMAPCADDR12_lock);
+}
+
+void
+pmap_mark_privileged(unsigned long pa)
+{
+	int ind = pa >> PAGE_SHIFT;
+
+	if (pteinfo_list[ind].pt_ref & (1 << 31)) BKPT;
+	if ((pteinfo_list[ind].pt_ref & ~(1 << 31)) > 0) BKPT;
+
+	pteinfo_list[ind].pt_ref |= (1 << 31);
+
+}
+
+void
+pmap_mark_unprivileged(unsigned long pa)
+{
+	int ind = pa >> PAGE_SHIFT;
+
+	if (pteinfo_list[ind].pt_ref != (1 << 31)) BKPT;
+
+	pteinfo_list[ind].pt_ref &= ~(1 << 31);
+
+}
+
+
+int
+pmap_pid_dump(int pid)
+{
+	pmap_t pmap;
+	struct proc *p;
+	int npte = 0;
+	int index;
+
+	sx_slock(&allproc_lock);
+	LIST_FOREACH(p, &allproc, p_list) {
+		if (p->p_pid != pid)
+			continue;
+
+		if (p->p_vmspace) {
+			int i,j;
+			index = 0;
+			pmap = vmspace_pmap(p->p_vmspace);
+			for (i = 0; i < NPDEPTD; i++) {
+				pd_entry_t *pde;
+				pt_entry_t *pte;
+				vm_offset_t base = i << PDRSHIFT;
+				
+				pde = &pmap->pm_pdir[i];
+				if (pde && pmap_pde_v(pde)) {
+					for (j = 0; j < NPTEPG; j++) {
+						vm_offset_t va = base + (j << PAGE_SHIFT);
+						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
+							if (index) {
+								index = 0;
+								printf("\n");
+							}
+							sx_sunlock(&allproc_lock);
+							return npte;
+						}
+						pte = pmap_pte(pmap, va);
+						if (pte && pmap_pte_v(pte)) {
+							pt_entry_t pa;
+							vm_page_t m;
+							pa = PT_GET(pte);
+							m = PHYS_TO_VM_PAGE(pa);
+							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
+								va, pa, m->hold_count, m->wire_count, m->flags);
+							npte++;
+							index++;
+							if (index >= 2) {
+								index = 0;
+								printf("\n");
+							} else {
+								printf(" ");
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+	sx_sunlock(&allproc_lock);
+	return npte;
+}
+#endif /* PMAP_DEBUG */
+
+#if defined(DEBUG)
+
+static void	pads(pmap_t pm);
+void		pmap_pvdump(vm_offset_t pa);
+
+/* print address space of pmap*/
+static void
+pads(pm)
+	pmap_t pm;
+{
+	int i, j;
+	vm_paddr_t va;
+	pt_entry_t *ptep;
+
+	if (pm == kernel_pmap)
+		return;
+	for (i = 0; i < NPDEPTD; i++)
+		if (pm->pm_pdir[i])
+			for (j = 0; j < NPTEPG; j++) {
+				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
+				if (pm == kernel_pmap && va < KERNBASE)
+					continue;
+				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
+					continue;
+				ptep = pmap_pte(pm, va);
+				if (pmap_pte_v(ptep))
+					printf("%x:%x ", va, *ptep);
+			};
+
+}
+
+void
+pmap_pvdump(pa)
+	vm_paddr_t pa;
+{
+	pv_entry_t pv;
+	vm_page_t m;
+
+	printf("pa %x", pa);
+	m = PHYS_TO_VM_PAGE(pa);
+	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+		printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
+		pads(pv->pv_pmap);
+	}
+	printf(" ");
+}
+#endif
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/support.s b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/support.s
new file mode 100644
index 0000000000..deb4a94859
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/support.s
@@ -0,0 +1,1553 @@
+/*-
+ * Copyright (c) 1993 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/i386/support.s,v 1.100 2003/11/03 21:28:54 jhb Exp $
+ */
+
+#include "opt_npx.h"
+
+#include <machine/asmacros.h>
+#include <machine/cputypes.h>
+#include <machine/intr_machdep.h>
+#include <machine/pmap.h>
+#include <machine/specialreg.h>
+
+#include "assym.s"
+
+#define IDXSHIFT	10
+
+	.data
+	.globl	bcopy_vector
+bcopy_vector:
+	.long	generic_bcopy
+	.globl	bzero_vector
+bzero_vector:
+	.long	generic_bzero
+	.globl	copyin_vector
+copyin_vector:
+	.long	generic_copyin
+	.globl	copyout_vector
+copyout_vector:
+	.long	generic_copyout
+#if defined(I586_CPU) && defined(DEV_NPX)
+kernel_fpu_lock:
+	.byte	0xfe
+	.space	3
+#endif
+	ALIGN_DATA
+	.globl	intrcnt, eintrcnt
+intrcnt:
+	.space	INTRCNT_COUNT * 4
+eintrcnt:
+
+	.globl	intrnames, eintrnames
+intrnames:
+	.space	INTRCNT_COUNT * (MAXCOMLEN + 1)
+eintrnames:
+
+	.text
+
+/*
+ * bcopy family
+ * void bzero(void *buf, u_int len)
+ */
+
+ENTRY(bzero)
+	MEXITCOUNT
+	jmp	*bzero_vector
+
+ENTRY(generic_bzero)
+	pushl	%edi
+	movl	8(%esp),%edi
+	movl	12(%esp),%ecx
+	xorl	%eax,%eax
+	shrl	$2,%ecx
+	cld
+	rep
+	stosl
+	movl	12(%esp),%ecx
+	andl	$3,%ecx
+	rep
+	stosb
+	popl	%edi
+	ret
+
+#ifdef I486_CPU
+ENTRY(i486_bzero)
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+	xorl	%eax,%eax
+/*
+ * do 64 byte chunks first
+ *
+ * XXX this is probably over-unrolled at least for DX2's
+ */
+2:
+	cmpl	$64,%ecx
+	jb	3f
+	movl	%eax,(%edx)
+	movl	%eax,4(%edx)
+	movl	%eax,8(%edx)
+	movl	%eax,12(%edx)
+	movl	%eax,16(%edx)
+	movl	%eax,20(%edx)
+	movl	%eax,24(%edx)
+	movl	%eax,28(%edx)
+	movl	%eax,32(%edx)
+	movl	%eax,36(%edx)
+	movl	%eax,40(%edx)
+	movl	%eax,44(%edx)
+	movl	%eax,48(%edx)
+	movl	%eax,52(%edx)
+	movl	%eax,56(%edx)
+	movl	%eax,60(%edx)
+	addl	$64,%edx
+	subl	$64,%ecx
+	jnz	2b
+	ret
+
+/*
+ * do 16 byte chunks
+ */
+	SUPERALIGN_TEXT
+3:
+	cmpl	$16,%ecx
+	jb	4f
+	movl	%eax,(%edx)
+	movl	%eax,4(%edx)
+	movl	%eax,8(%edx)
+	movl	%eax,12(%edx)
+	addl	$16,%edx
+	subl	$16,%ecx
+	jnz	3b
+	ret
+
+/*
+ * do 4 byte chunks
+ */
+	SUPERALIGN_TEXT
+4:
+	cmpl	$4,%ecx
+	jb	5f
+	movl	%eax,(%edx)
+	addl	$4,%edx
+	subl	$4,%ecx
+	jnz	4b
+	ret
+
+/*
+ * do 1 byte chunks
+ * a jump table seems to be faster than a loop or more range reductions
+ *
+ * XXX need a const section for non-text
+ */
+	.data
+jtab:
+	.long	do0
+	.long	do1
+	.long	do2
+	.long	do3
+
+	.text
+	SUPERALIGN_TEXT
+5:
+	jmp	*jtab(,%ecx,4)
+
+	SUPERALIGN_TEXT
+do3:
+	movw	%ax,(%edx)
+	movb	%al,2(%edx)
+	ret
+
+	SUPERALIGN_TEXT
+do2:
+	movw	%ax,(%edx)
+	ret
+
+	SUPERALIGN_TEXT
+do1:
+	movb	%al,(%edx)
+	ret
+
+	SUPERALIGN_TEXT
+do0:
+	ret
+#endif
+
+#if defined(I586_CPU) && defined(DEV_NPX)
+ENTRY(i586_bzero)
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+
+	/*
+	 * The FPU register method is twice as fast as the integer register
+	 * method unless the target is in the L1 cache and we pre-allocate a
+	 * cache line for it (then the integer register method is 4-5 times
+	 * faster).  However, we never pre-allocate cache lines, since that
+	 * would make the integer method 25% or more slower for the common
+	 * case when the target isn't in either the L1 cache or the L2 cache.
+	 * Thus we normally use the FPU register method unless the overhead
+	 * would be too large.
+	 */
+	cmpl	$256,%ecx	/* empirical; clts, fninit, smsw cost a lot */
+	jb	intreg_i586_bzero
+
+	/*
+	 * The FPU registers may belong to an application or to fastmove()
+	 * or to another invocation of bcopy() or ourself in a higher level
+	 * interrupt or trap handler.  Preserving the registers is
+	 * complicated since we avoid it if possible at all levels.  We
+	 * want to localize the complications even when that increases them.
+	 * Here the extra work involves preserving CR0_TS in TS.
+	 * `fpcurthread != NULL' is supposed to be the condition that all the
+	 * FPU resources belong to an application, but fpcurthread and CR0_TS
+	 * aren't set atomically enough for this condition to work in
+	 * interrupt handlers.
+	 *
+	 * Case 1: FPU registers belong to the application: we must preserve
+	 * the registers if we use them, so we only use the FPU register
+	 * method if the target size is large enough to amortize the extra
+	 * overhead for preserving them.  CR0_TS must be preserved although
+	 * it is very likely to end up as set.
+	 *
+	 * Case 2: FPU registers belong to fastmove(): fastmove() currently
+	 * makes the registers look like they belong to an application so
+	 * that cpu_switch() and savectx() don't have to know about it, so
+	 * this case reduces to case 1.
+	 *
+	 * Case 3: FPU registers belong to the kernel: don't use the FPU
+	 * register method.  This case is unlikely, and supporting it would
+	 * be more complicated and might take too much stack.
+	 *
+	 * Case 4: FPU registers don't belong to anyone: the FPU registers
+	 * don't need to be preserved, so we always use the FPU register
+	 * method.  CR0_TS must be preserved although it is very likely to
+	 * always end up as clear.
+	 */
+	cmpl	$0,PCPU(FPCURTHREAD)
+	je	i586_bz1
+
+	/*
+	 * XXX don't use the FPU for cases 1 and 2, since preemptive
+	 * scheduling of ithreads broke these cases.  Note that we can
+	 * no longer get here from an interrupt handler, since the
+	 * context sitch to the interrupt handler will have saved the
+	 * FPU state.
+	 */
+	jmp	intreg_i586_bzero
+
+	cmpl	$256+184,%ecx		/* empirical; not quite 2*108 more */
+	jb	intreg_i586_bzero
+	sarb	$1,kernel_fpu_lock
+	jc	intreg_i586_bzero
+	smsw	%ax
+	clts
+	subl	$108,%esp
+	fnsave	0(%esp)
+	jmp	i586_bz2
+
+i586_bz1:
+	sarb	$1,kernel_fpu_lock
+	jc	intreg_i586_bzero
+	smsw	%ax
+	clts
+	fninit				/* XXX should avoid needing this */
+i586_bz2:
+	fldz
+
+	/*
+	 * Align to an 8 byte boundary (misalignment in the main loop would
+	 * cost a factor of >= 2).  Avoid jumps (at little cost if it is
+	 * already aligned) by always zeroing 8 bytes and using the part up
+	 * to the _next_ alignment position.
+	 */
+	fstl	0(%edx)
+	addl	%edx,%ecx		/* part of %ecx -= new_%edx - %edx */
+	addl	$8,%edx
+	andl	$~7,%edx
+	subl	%edx,%ecx
+
+	/*
+	 * Similarly align `len' to a multiple of 8.
+	 */
+	fstl	-8(%edx,%ecx)
+	decl	%ecx
+	andl	$~7,%ecx
+
+	/*
+	 * This wouldn't be any faster if it were unrolled, since the loop
+	 * control instructions are much faster than the fstl and/or done
+	 * in parallel with it so their overhead is insignificant.
+	 */
+fpureg_i586_bzero_loop:
+	fstl	0(%edx)
+	addl	$8,%edx
+	subl	$8,%ecx
+	cmpl	$8,%ecx
+	jae	fpureg_i586_bzero_loop
+
+	cmpl	$0,PCPU(FPCURTHREAD)
+	je	i586_bz3
+
+	/* XXX check that the condition for cases 1-2 stayed false. */
+i586_bzero_oops:
+	int	$3
+	jmp	i586_bzero_oops
+
+	frstor	0(%esp)
+	addl	$108,%esp
+	lmsw	%ax
+	movb	$0xfe,kernel_fpu_lock
+	ret
+
+i586_bz3:
+	fstp	%st(0)
+	lmsw	%ax
+	movb	$0xfe,kernel_fpu_lock
+	ret
+
+intreg_i586_bzero:
+	/*
+	 * `rep stos' seems to be the best method in practice for small
+	 * counts.  Fancy methods usually take too long to start up due
+	 * to cache and BTB misses.
+	 */
+	pushl	%edi
+	movl	%edx,%edi
+	xorl	%eax,%eax
+	shrl	$2,%ecx
+	cld
+	rep
+	stosl
+	movl	12(%esp),%ecx
+	andl	$3,%ecx
+	jne	1f
+	popl	%edi
+	ret
+
+1:
+	rep
+	stosb
+	popl	%edi
+	ret
+#endif /* I586_CPU && defined(DEV_NPX) */
+
+ENTRY(sse2_pagezero)
+	pushl	%ebx
+	movl	8(%esp),%ecx
+	movl	%ecx,%eax
+	addl	$4096,%eax
+	xor	%ebx,%ebx
+1:
+	movnti	%ebx,(%ecx)
+	addl	$4,%ecx
+	cmpl	%ecx,%eax
+	jne	1b
+	sfence
+	popl	%ebx
+	ret
+
+ENTRY(i686_pagezero)
+	pushl	%edi
+	pushl	%ebx
+
+	movl	12(%esp), %edi
+	movl	$1024, %ecx
+	cld
+
+	ALIGN_TEXT
+1:
+	xorl	%eax, %eax
+	repe
+	scasl
+	jnz	2f
+
+	popl	%ebx
+	popl	%edi
+	ret
+
+	ALIGN_TEXT
+
+2:
+	incl	%ecx
+	subl	$4, %edi
+
+	movl	%ecx, %edx
+	cmpl	$16, %ecx
+
+	jge	3f
+
+	movl	%edi, %ebx
+	andl	$0x3f, %ebx
+	shrl	%ebx
+	shrl	%ebx
+	movl	$16, %ecx
+	subl	%ebx, %ecx
+
+3:
+	subl	%ecx, %edx
+	rep
+	stosl
+
+	movl	%edx, %ecx
+	testl	%edx, %edx
+	jnz	1b
+
+	popl	%ebx
+	popl	%edi
+	ret
+
+/* fillw(pat, base, cnt) */
+ENTRY(fillw)
+	pushl	%edi
+	movl	8(%esp),%eax
+	movl	12(%esp),%edi
+	movl	16(%esp),%ecx
+	cld
+	rep
+	stosw
+	popl	%edi
+	ret
+
+ENTRY(bcopyb)
+	pushl	%esi
+	pushl	%edi
+	movl	12(%esp),%esi
+	movl	16(%esp),%edi
+	movl	20(%esp),%ecx
+	movl	%edi,%eax
+	subl	%esi,%eax
+	cmpl	%ecx,%eax			/* overlapping && src < dst? */
+	jb	1f
+	cld					/* nope, copy forwards */
+	rep
+	movsb
+	popl	%edi
+	popl	%esi
+	ret
+
+	ALIGN_TEXT
+1:
+	addl	%ecx,%edi			/* copy backwards. */
+	addl	%ecx,%esi
+	decl	%edi
+	decl	%esi
+	std
+	rep
+	movsb
+	popl	%edi
+	popl	%esi
+	cld
+	ret
+
+ENTRY(bcopy)
+	MEXITCOUNT
+	jmp	*bcopy_vector
+
+/*
+ * generic_bcopy(src, dst, cnt)
+ *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
+ */
+ENTRY(generic_bcopy)
+	pushl	%esi
+	pushl	%edi
+	movl	12(%esp),%esi
+	movl	16(%esp),%edi
+	movl	20(%esp),%ecx
+
+	movl	%edi,%eax
+	subl	%esi,%eax
+	cmpl	%ecx,%eax			/* overlapping && src < dst? */
+	jb	1f
+
+	shrl	$2,%ecx				/* copy by 32-bit words */
+	cld					/* nope, copy forwards */
+	rep
+	movsl
+	movl	20(%esp),%ecx
+	andl	$3,%ecx				/* any bytes left? */
+	rep
+	movsb
+	popl	%edi
+	popl	%esi
+	ret
+
+	ALIGN_TEXT
+1:
+	addl	%ecx,%edi			/* copy backwards */
+	addl	%ecx,%esi
+	decl	%edi
+	decl	%esi
+	andl	$3,%ecx				/* any fractional bytes? */
+	std
+	rep
+	movsb
+	movl	20(%esp),%ecx			/* copy remainder by 32-bit words */
+	shrl	$2,%ecx
+	subl	$3,%esi
+	subl	$3,%edi
+	rep
+	movsl
+	popl	%edi
+	popl	%esi
+	cld
+	ret
+
+#if defined(I586_CPU) && defined(DEV_NPX)
+ENTRY(i586_bcopy)
+	pushl	%esi
+	pushl	%edi
+	movl	12(%esp),%esi
+	movl	16(%esp),%edi
+	movl	20(%esp),%ecx
+
+	movl	%edi,%eax
+	subl	%esi,%eax
+	cmpl	%ecx,%eax			/* overlapping && src < dst? */
+	jb	1f
+
+	cmpl	$1024,%ecx
+	jb	small_i586_bcopy
+
+	sarb	$1,kernel_fpu_lock
+	jc	small_i586_bcopy
+	cmpl	$0,PCPU(FPCURTHREAD)
+	je	i586_bc1
+
+	/* XXX turn off handling of cases 1-2, as above. */
+	movb	$0xfe,kernel_fpu_lock
+	jmp	small_i586_bcopy
+
+	smsw	%dx
+	clts
+	subl	$108,%esp
+	fnsave	0(%esp)
+	jmp	4f
+
+i586_bc1:
+	smsw	%dx
+	clts
+	fninit				/* XXX should avoid needing this */
+
+	ALIGN_TEXT
+4:
+	pushl	%ecx
+#define	DCACHE_SIZE	8192
+	cmpl	$(DCACHE_SIZE-512)/2,%ecx
+	jbe	2f
+	movl	$(DCACHE_SIZE-512)/2,%ecx
+2:
+	subl	%ecx,0(%esp)
+	cmpl	$256,%ecx
+	jb	5f			/* XXX should prefetch if %ecx >= 32 */
+	pushl	%esi
+	pushl	%ecx
+	ALIGN_TEXT
+3:
+	movl	0(%esi),%eax
+	movl	32(%esi),%eax
+	movl	64(%esi),%eax
+	movl	96(%esi),%eax
+	movl	128(%esi),%eax
+	movl	160(%esi),%eax
+	movl	192(%esi),%eax
+	movl	224(%esi),%eax
+	addl	$256,%esi
+	subl	$256,%ecx
+	cmpl	$256,%ecx
+	jae	3b
+	popl	%ecx
+	popl	%esi
+5:
+	ALIGN_TEXT
+large_i586_bcopy_loop:
+	fildq	0(%esi)
+	fildq	8(%esi)
+	fildq	16(%esi)
+	fildq	24(%esi)
+	fildq	32(%esi)
+	fildq	40(%esi)
+	fildq	48(%esi)
+	fildq	56(%esi)
+	fistpq	56(%edi)
+	fistpq	48(%edi)
+	fistpq	40(%edi)
+	fistpq	32(%edi)
+	fistpq	24(%edi)
+	fistpq	16(%edi)
+	fistpq	8(%edi)
+	fistpq	0(%edi)
+	addl	$64,%esi
+	addl	$64,%edi
+	subl	$64,%ecx
+	cmpl	$64,%ecx
+	jae	large_i586_bcopy_loop
+	popl	%eax
+	addl	%eax,%ecx
+	cmpl	$64,%ecx
+	jae	4b
+
+	cmpl	$0,PCPU(FPCURTHREAD)
+	je	i586_bc2
+
+	/* XXX check that the condition for cases 1-2 stayed false. */
+i586_bcopy_oops:
+	int	$3
+	jmp	i586_bcopy_oops
+
+	frstor	0(%esp)
+	addl	$108,%esp
+i586_bc2:
+	lmsw	%dx
+	movb	$0xfe,kernel_fpu_lock
+
+/*
+ * This is a duplicate of the main part of generic_bcopy.  See the comments
+ * there.  Jumping into generic_bcopy would cost a whole 0-1 cycles and
+ * would mess up high resolution profiling.
+ */
+	ALIGN_TEXT
+small_i586_bcopy:
+	shrl	$2,%ecx
+	cld
+	rep
+	movsl
+	movl	20(%esp),%ecx
+	andl	$3,%ecx
+	rep
+	movsb
+	popl	%edi
+	popl	%esi
+	ret
+
+	ALIGN_TEXT
+1:
+	addl	%ecx,%edi
+	addl	%ecx,%esi
+	decl	%edi
+	decl	%esi
+	andl	$3,%ecx
+	std
+	rep
+	movsb
+	movl	20(%esp),%ecx
+	shrl	$2,%ecx
+	subl	$3,%esi
+	subl	$3,%edi
+	rep
+	movsl
+	popl	%edi
+	popl	%esi
+	cld
+	ret
+#endif /* I586_CPU && defined(DEV_NPX) */
+
+/*
+ * Note: memcpy does not support overlapping copies
+ */
+ENTRY(memcpy)
+	pushl	%edi
+	pushl	%esi
+	movl	12(%esp),%edi
+	movl	16(%esp),%esi
+	movl	20(%esp),%ecx
+	movl	%edi,%eax
+	shrl	$2,%ecx				/* copy by 32-bit words */
+	cld					/* nope, copy forwards */
+	rep
+	movsl
+	movl	20(%esp),%ecx
+	andl	$3,%ecx				/* any bytes left? */
+	rep
+	movsb
+	popl	%esi
+	popl	%edi
+	ret
+
+
+/*****************************************************************************/
+/* copyout and fubyte family                                                 */
+/*****************************************************************************/
+/*
+ * Access user memory from inside the kernel. These routines and possibly
+ * the math- and DOS emulators should be the only places that do this.
+ *
+ * We have to access the memory with user's permissions, so use a segment
+ * selector with RPL 3. For writes to user space we have to additionally
+ * check the PTE for write permission, because the 386 does not check
+ * write permissions when we are executing with EPL 0. The 486 does check
+ * this if the WP bit is set in CR0, so we can use a simpler version here.
+ *
+ * These routines set curpcb->onfault for the time they execute. When a
+ * protection violation occurs inside the functions, the trap handler
+ * returns to *curpcb->onfault instead of the function.
+ */
+
+/*
+ * copyout(from_kernel, to_user, len)  - MP SAFE (if not I386_CPU)
+ */
+ENTRY(copyout)
+	MEXITCOUNT
+	jmp	*copyout_vector
+
+ENTRY(generic_copyout)
+	movl	PCPU(CURPCB),%eax
+	movl	$copyout_fault,PCB_ONFAULT(%eax)
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+	movl	16(%esp),%esi
+	movl	20(%esp),%edi
+	movl	24(%esp),%ebx
+	testl	%ebx,%ebx			/* anything to do? */
+	jz	done_copyout
+
+	/*
+	 * Check explicitly for non-user addresses.  If 486 write protection
+	 * is being used, this check is essential because we are in kernel
+	 * mode so the h/w does not provide any protection against writing
+	 * kernel addresses.
+	 */
+
+	/*
+	 * First, prevent address wrapping.
+	 */
+	movl	%edi,%eax
+	addl	%ebx,%eax
+	jc	copyout_fault
+/*
+ * XXX STOP USING VM_MAXUSER_ADDRESS.
+ * It is an end address, not a max, so every time it is used correctly it
+ * looks like there is an off by one error, and of course it caused an off
+ * by one error in several places.
+ */
+	cmpl	$VM_MAXUSER_ADDRESS,%eax
+	ja	copyout_fault
+
+	/* bcopy(%esi, %edi, %ebx) */
+	movl	%ebx,%ecx
+
+#if defined(I586_CPU) && defined(DEV_NPX)
+	ALIGN_TEXT
+slow_copyout:
+#endif
+	shrl	$2,%ecx
+	cld
+	rep
+	movsl
+	movb	%bl,%cl
+	andb	$3,%cl
+	rep
+	movsb
+
+done_copyout:
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+	xorl	%eax,%eax
+	movl	PCPU(CURPCB),%edx
+	movl	%eax,PCB_ONFAULT(%edx)
+	ret
+
+	ALIGN_TEXT
+copyout_fault:
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+	movl	PCPU(CURPCB),%edx
+	movl	$0,PCB_ONFAULT(%edx)
+	movl	$EFAULT,%eax
+	ret
+
+#if defined(I586_CPU) && defined(DEV_NPX)
+ENTRY(i586_copyout)
+	/*
+	 * Duplicated from generic_copyout.  Could be done a bit better.
+	 */
+	movl	PCPU(CURPCB),%eax
+	movl	$copyout_fault,PCB_ONFAULT(%eax)
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+	movl	16(%esp),%esi
+	movl	20(%esp),%edi
+	movl	24(%esp),%ebx
+	testl	%ebx,%ebx			/* anything to do? */
+	jz	done_copyout
+
+	/*
+	 * Check explicitly for non-user addresses.  If 486 write protection
+	 * is being used, this check is essential because we are in kernel
+	 * mode so the h/w does not provide any protection against writing
+	 * kernel addresses.
+	 */
+
+	/*
+	 * First, prevent address wrapping.
+	 */
+	movl	%edi,%eax
+	addl	%ebx,%eax
+	jc	copyout_fault
+/*
+ * XXX STOP USING VM_MAXUSER_ADDRESS.
+ * It is an end address, not a max, so every time it is used correctly it
+ * looks like there is an off by one error, and of course it caused an off
+ * by one error in several places.
+ */
+	cmpl	$VM_MAXUSER_ADDRESS,%eax
+	ja	copyout_fault
+
+	/* bcopy(%esi, %edi, %ebx) */
+3:
+	movl	%ebx,%ecx
+	/*
+	 * End of duplicated code.
+	 */
+
+	cmpl	$1024,%ecx
+	jb	slow_copyout
+
+	pushl	%ecx
+	call	fastmove
+	addl	$4,%esp
+	jmp	done_copyout
+#endif /* I586_CPU && defined(DEV_NPX) */
+
+/*
+ * copyin(from_user, to_kernel, len) - MP SAFE
+ */
+ENTRY(copyin)
+	MEXITCOUNT
+	jmp	*copyin_vector
+
+ENTRY(generic_copyin)
+	movl	PCPU(CURPCB),%eax
+	movl	$copyin_fault,PCB_ONFAULT(%eax)
+	pushl	%esi
+	pushl	%edi
+	movl	12(%esp),%esi			/* caddr_t from */
+	movl	16(%esp),%edi			/* caddr_t to */
+	movl	20(%esp),%ecx			/* size_t  len */
+
+	/*
+	 * make sure address is valid
+	 */
+	movl	%esi,%edx
+	addl	%ecx,%edx
+	jc	copyin_fault
+	cmpl	$VM_MAXUSER_ADDRESS,%edx
+	ja	copyin_fault
+
+#if defined(I586_CPU) && defined(DEV_NPX)
+	ALIGN_TEXT
+slow_copyin:
+#endif
+	movb	%cl,%al
+	shrl	$2,%ecx				/* copy longword-wise */
+	cld
+	rep
+	movsl
+	movb	%al,%cl
+	andb	$3,%cl				/* copy remaining bytes */
+	rep
+	movsb
+
+#if defined(I586_CPU) && defined(DEV_NPX)
+	ALIGN_TEXT
+done_copyin:
+#endif
+	popl	%edi
+	popl	%esi
+	xorl	%eax,%eax
+	movl	PCPU(CURPCB),%edx
+	movl	%eax,PCB_ONFAULT(%edx)
+	ret
+
+	ALIGN_TEXT
+copyin_fault:
+	popl	%edi
+	popl	%esi
+	movl	PCPU(CURPCB),%edx
+	movl	$0,PCB_ONFAULT(%edx)
+	movl	$EFAULT,%eax
+	ret
+
+#if defined(I586_CPU) && defined(DEV_NPX)
+ENTRY(i586_copyin)
+	/*
+	 * Duplicated from generic_copyin.  Could be done a bit better.
+	 */
+	movl	PCPU(CURPCB),%eax
+	movl	$copyin_fault,PCB_ONFAULT(%eax)
+	pushl	%esi
+	pushl	%edi
+	movl	12(%esp),%esi			/* caddr_t from */
+	movl	16(%esp),%edi			/* caddr_t to */
+	movl	20(%esp),%ecx			/* size_t  len */
+
+	/*
+	 * make sure address is valid
+	 */
+	movl	%esi,%edx
+	addl	%ecx,%edx
+	jc	copyin_fault
+	cmpl	$VM_MAXUSER_ADDRESS,%edx
+	ja	copyin_fault
+	/*
+	 * End of duplicated code.
+	 */
+
+	cmpl	$1024,%ecx
+	jb	slow_copyin
+
+	pushl	%ebx			/* XXX prepare for fastmove_fault */
+	pushl	%ecx
+	call	fastmove
+	addl	$8,%esp
+	jmp	done_copyin
+#endif /* I586_CPU && defined(DEV_NPX) */
+
+#if defined(I586_CPU) && defined(DEV_NPX)
+/* fastmove(src, dst, len)
+	src in %esi
+	dst in %edi
+	len in %ecx		XXX changed to on stack for profiling
+	uses %eax and %edx for tmp. storage
+ */
+/* XXX use ENTRY() to get profiling.  fastmove() is actually a non-entry. */
+ENTRY(fastmove)
+	pushl	%ebp
+	movl	%esp,%ebp
+	subl	$PCB_SAVEFPU_SIZE+3*4,%esp
+
+	movl	8(%ebp),%ecx
+	cmpl	$63,%ecx
+	jbe	fastmove_tail
+
+	testl	$7,%esi	/* check if src addr is multiple of 8 */
+	jnz	fastmove_tail
+
+	testl	$7,%edi	/* check if dst addr is multiple of 8 */
+	jnz	fastmove_tail
+
+	/* XXX grab FPU context atomically. */
+	call	ni_cli
+
+/* if (fpcurthread != NULL) { */
+	cmpl	$0,PCPU(FPCURTHREAD)
+	je	6f
+/*    fnsave(&curpcb->pcb_savefpu); */
+	movl	PCPU(CURPCB),%eax
+	fnsave	PCB_SAVEFPU(%eax)
+/*   FPCURTHREAD = NULL; */
+	movl	$0,PCPU(FPCURTHREAD)
+/* } */
+6:
+/* now we own the FPU. */
+
+/*
+ * The process' FP state is saved in the pcb, but if we get
+ * switched, the cpu_switch() will store our FP state in the
+ * pcb.  It should be possible to avoid all the copying for
+ * this, e.g., by setting a flag to tell cpu_switch() to
+ * save the state somewhere else.
+ */
+/* tmp = curpcb->pcb_savefpu; */
+	movl	%ecx,-12(%ebp)
+	movl	%esi,-8(%ebp)
+	movl	%edi,-4(%ebp)
+	movl	%esp,%edi
+	movl	PCPU(CURPCB),%esi
+	addl	$PCB_SAVEFPU,%esi
+	cld
+	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
+	rep
+	movsl
+	movl	-12(%ebp),%ecx
+	movl	-8(%ebp),%esi
+	movl	-4(%ebp),%edi
+/* stop_emulating(); */
+	clts
+/* fpcurthread = curthread; */
+	movl	PCPU(CURTHREAD),%eax
+	movl	%eax,PCPU(FPCURTHREAD)
+	movl	PCPU(CURPCB),%eax
+
+	/* XXX end of atomic FPU context grab. */
+	call	ni_sti
+
+	movl	$fastmove_fault,PCB_ONFAULT(%eax)
+4:
+	movl	%ecx,-12(%ebp)
+	cmpl	$1792,%ecx
+	jbe	2f
+	movl	$1792,%ecx
+2:
+	subl	%ecx,-12(%ebp)
+	cmpl	$256,%ecx
+	jb	5f
+	movl	%ecx,-8(%ebp)
+	movl	%esi,-4(%ebp)
+	ALIGN_TEXT
+3:
+	movl	0(%esi),%eax
+	movl	32(%esi),%eax
+	movl	64(%esi),%eax
+	movl	96(%esi),%eax
+	movl	128(%esi),%eax
+	movl	160(%esi),%eax
+	movl	192(%esi),%eax
+	movl	224(%esi),%eax
+	addl	$256,%esi
+	subl	$256,%ecx
+	cmpl	$256,%ecx
+	jae	3b
+	movl	-8(%ebp),%ecx
+	movl	-4(%ebp),%esi
+5:
+	ALIGN_TEXT
+fastmove_loop:
+	fildq	0(%esi)
+	fildq	8(%esi)
+	fildq	16(%esi)
+	fildq	24(%esi)
+	fildq	32(%esi)
+	fildq	40(%esi)
+	fildq	48(%esi)
+	fildq	56(%esi)
+	fistpq	56(%edi)
+	fistpq	48(%edi)
+	fistpq	40(%edi)
+	fistpq	32(%edi)
+	fistpq	24(%edi)
+	fistpq	16(%edi)
+	fistpq	8(%edi)
+	fistpq	0(%edi)
+	addl	$-64,%ecx
+	addl	$64,%esi
+	addl	$64,%edi
+	cmpl	$63,%ecx
+	ja	fastmove_loop
+	movl	-12(%ebp),%eax
+	addl	%eax,%ecx
+	cmpl	$64,%ecx
+	jae	4b
+
+	/* XXX ungrab FPU context atomically. */
+	call	ni_cli
+
+/* curpcb->pcb_savefpu = tmp; */
+	movl	%ecx,-12(%ebp)
+	movl	%esi,-8(%ebp)
+	movl	%edi,-4(%ebp)
+	movl	PCPU(CURPCB),%edi
+	addl	$PCB_SAVEFPU,%edi
+	movl	%esp,%esi
+	cld
+	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
+	rep
+	movsl
+	movl	-12(%ebp),%ecx
+	movl	-8(%ebp),%esi
+	movl	-4(%ebp),%edi
+
+/* start_emulating(); */
+	smsw	%ax
+	orb	$CR0_TS,%al
+	lmsw	%ax
+/* fpcurthread = NULL; */
+	movl	$0,PCPU(FPCURTHREAD)
+
+	/* XXX end of atomic FPU context ungrab. */
+	call	ni_sti
+
+	ALIGN_TEXT
+fastmove_tail:
+	movl	PCPU(CURPCB),%eax
+	movl	$fastmove_tail_fault,PCB_ONFAULT(%eax)
+
+	movb	%cl,%al
+	shrl	$2,%ecx				/* copy longword-wise */
+	cld
+	rep
+	movsl
+	movb	%al,%cl
+	andb	$3,%cl				/* copy remaining bytes */
+	rep
+	movsb
+
+	movl	%ebp,%esp
+	popl	%ebp
+	ret
+
+	ALIGN_TEXT
+fastmove_fault:
+	/* XXX ungrab FPU context atomically. */
+	call	ni_cli
+
+	movl	PCPU(CURPCB),%edi
+	addl	$PCB_SAVEFPU,%edi
+	movl	%esp,%esi
+	cld
+	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
+	rep
+	movsl
+
+	smsw	%ax
+	orb	$CR0_TS,%al
+	lmsw	%ax
+	movl	$0,PCPU(FPCURTHREAD)
+
+	/* XXX end of atomic FPU context ungrab. */
+	call	ni_sti
+
+fastmove_tail_fault:
+	movl	%ebp,%esp
+	popl	%ebp
+	addl	$8,%esp
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+	movl	PCPU(CURPCB),%edx
+	movl	$0,PCB_ONFAULT(%edx)
+	movl	$EFAULT,%eax
+	ret
+#endif /* I586_CPU && defined(DEV_NPX) */
+
+/*
+ * casuptr.  Compare and set user pointer.  Returns -1 or the current value.
+ */
+ENTRY(casuptr)
+	movl	PCPU(CURPCB),%ecx
+	movl	$fusufault,PCB_ONFAULT(%ecx)
+	movl	4(%esp),%edx			/* dst */
+	movl	8(%esp),%eax			/* old */
+	movl	12(%esp),%ecx			/* new */
+
+	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
+	ja	fusufault
+
+#ifdef SMP
+	lock
+#endif
+	cmpxchgl %ecx, (%edx)			/* Compare and set. */
+
+	/*
+	 * The old value is in %eax.  If the store succeeded it will be the
+	 * value we expected (old) from before the store, otherwise it will
+	 * be the current value.
+	 */
+
+	movl	PCPU(CURPCB),%ecx
+	movl	$fusufault,PCB_ONFAULT(%ecx)
+	movl	$0,PCB_ONFAULT(%ecx)
+	ret
+
+/*
+ * fu{byte,sword,word} - MP SAFE
+ *
+ *	Fetch a byte (sword, word) from user memory
+ */
+ENTRY(fuword)
+	movl	PCPU(CURPCB),%ecx
+	movl	$fusufault,PCB_ONFAULT(%ecx)
+	movl	4(%esp),%edx			/* from */
+
+	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
+	ja	fusufault
+
+	movl	(%edx),%eax
+	movl	$0,PCB_ONFAULT(%ecx)
+	ret
+
+ENTRY(fuword32)
+	jmp	fuword
+
+/*
+ * These two routines are called from the profiling code, potentially
+ * at interrupt time. If they fail, that's okay, good things will
+ * happen later. Fail all the time for now - until the trap code is
+ * able to deal with this.
+ */
+ALTENTRY(suswintr)
+ENTRY(fuswintr)
+	movl	$-1,%eax
+	ret
+
+/*
+ * fuword16 - MP SAFE
+ */
+ENTRY(fuword16)
+	movl	PCPU(CURPCB),%ecx
+	movl	$fusufault,PCB_ONFAULT(%ecx)
+	movl	4(%esp),%edx
+
+	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
+	ja	fusufault
+
+	movzwl	(%edx),%eax
+	movl	$0,PCB_ONFAULT(%ecx)
+	ret
+
+/*
+ * fubyte - MP SAFE
+ */
+ENTRY(fubyte)
+	movl	PCPU(CURPCB),%ecx
+	movl	$fusufault,PCB_ONFAULT(%ecx)
+	movl	4(%esp),%edx
+
+	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
+	ja	fusufault
+
+	movzbl	(%edx),%eax
+	movl	$0,PCB_ONFAULT(%ecx)
+	ret
+
+	ALIGN_TEXT
+fusufault:
+	movl	PCPU(CURPCB),%ecx
+	xorl	%eax,%eax
+	movl	%eax,PCB_ONFAULT(%ecx)
+	decl	%eax
+	ret
+
+/*
+ * su{byte,sword,word} - MP SAFE (if not I386_CPU)
+ *
+ *	Write a byte (word, longword) to user memory
+ */
+ENTRY(suword)
+	movl	PCPU(CURPCB),%ecx
+	movl	$fusufault,PCB_ONFAULT(%ecx)
+	movl	4(%esp),%edx
+
+	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
+	ja	fusufault
+
+	movl	8(%esp),%eax
+	movl	%eax,(%edx)
+	xorl	%eax,%eax
+	movl	PCPU(CURPCB),%ecx
+	movl	%eax,PCB_ONFAULT(%ecx)
+	ret
+
+ENTRY(suword32)
+	jmp	suword
+
+/*
+ * suword16 - MP SAFE (if not I386_CPU)
+ */
+ENTRY(suword16)
+	movl	PCPU(CURPCB),%ecx
+	movl	$fusufault,PCB_ONFAULT(%ecx)
+	movl	4(%esp),%edx
+
+	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
+	ja	fusufault
+
+	movw	8(%esp),%ax
+	movw	%ax,(%edx)
+	xorl	%eax,%eax
+	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
+	movl	%eax,PCB_ONFAULT(%ecx)
+	ret
+
+/*
+ * subyte - MP SAFE (if not I386_CPU)
+ */
+ENTRY(subyte)
+	movl	PCPU(CURPCB),%ecx
+	movl	$fusufault,PCB_ONFAULT(%ecx)
+	movl	4(%esp),%edx
+
+	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
+	ja	fusufault
+
+	movb	8(%esp),%al
+	movb	%al,(%edx)
+	xorl	%eax,%eax
+	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
+	movl	%eax,PCB_ONFAULT(%ecx)
+	ret
+
+/*
+ * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
+ *
+ *	copy a string from from to to, stop when a 0 character is reached.
+ *	return ENAMETOOLONG if string is longer than maxlen, and
+ *	EFAULT on protection violations. If lencopied is non-zero,
+ *	return the actual length in *lencopied.
+ */
+ENTRY(copyinstr)
+	pushl	%esi
+	pushl	%edi
+	movl	PCPU(CURPCB),%ecx
+	movl	$cpystrflt,PCB_ONFAULT(%ecx)
+
+	movl	12(%esp),%esi			/* %esi = from */
+	movl	16(%esp),%edi			/* %edi = to */
+	movl	20(%esp),%edx			/* %edx = maxlen */
+
+	movl	$VM_MAXUSER_ADDRESS,%eax
+
+	/* make sure 'from' is within bounds */
+	subl	%esi,%eax
+	jbe	cpystrflt
+
+	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
+	cmpl	%edx,%eax
+	jae	1f
+	movl	%eax,%edx
+	movl	%eax,20(%esp)
+1:
+	incl	%edx
+	cld
+
+2:
+	decl	%edx
+	jz	3f
+
+	lodsb
+	stosb
+	orb	%al,%al
+	jnz	2b
+
+	/* Success -- 0 byte reached */
+	decl	%edx
+	xorl	%eax,%eax
+	jmp	cpystrflt_x
+3:
+	/* edx is zero - return ENAMETOOLONG or EFAULT */
+	cmpl	$VM_MAXUSER_ADDRESS,%esi
+	jae	cpystrflt
+4:
+	movl	$ENAMETOOLONG,%eax
+	jmp	cpystrflt_x
+
+cpystrflt:
+	movl	$EFAULT,%eax
+
+cpystrflt_x:
+	/* set *lencopied and return %eax */
+	movl	PCPU(CURPCB),%ecx
+	movl	$0,PCB_ONFAULT(%ecx)
+	movl	20(%esp),%ecx
+	subl	%edx,%ecx
+	movl	24(%esp),%edx
+	testl	%edx,%edx
+	jz	1f
+	movl	%ecx,(%edx)
+1:
+	popl	%edi
+	popl	%esi
+	ret
+
+
+/*
+ * copystr(from, to, maxlen, int *lencopied) - MP SAFE
+ */
+ENTRY(copystr)
+	pushl	%esi
+	pushl	%edi
+
+	movl	12(%esp),%esi			/* %esi = from */
+	movl	16(%esp),%edi			/* %edi = to */
+	movl	20(%esp),%edx			/* %edx = maxlen */
+	incl	%edx
+	cld
+1:
+	decl	%edx
+	jz	4f
+	lodsb
+	stosb
+	orb	%al,%al
+	jnz	1b
+
+	/* Success -- 0 byte reached */
+	decl	%edx
+	xorl	%eax,%eax
+	jmp	6f
+4:
+	/* edx is zero -- return ENAMETOOLONG */
+	movl	$ENAMETOOLONG,%eax
+
+6:
+	/* set *lencopied and return %eax */
+	movl	20(%esp),%ecx
+	subl	%edx,%ecx
+	movl	24(%esp),%edx
+	testl	%edx,%edx
+	jz	7f
+	movl	%ecx,(%edx)
+7:
+	popl	%edi
+	popl	%esi
+	ret
+
+ENTRY(bcmp)
+	pushl	%edi
+	pushl	%esi
+	movl	12(%esp),%edi
+	movl	16(%esp),%esi
+	movl	20(%esp),%edx
+	xorl	%eax,%eax
+
+	movl	%edx,%ecx
+	shrl	$2,%ecx
+	cld					/* compare forwards */
+	repe
+	cmpsl
+	jne	1f
+
+	movl	%edx,%ecx
+	andl	$3,%ecx
+	repe
+	cmpsb
+	je	2f
+1:
+	incl	%eax
+2:
+	popl	%esi
+	popl	%edi
+	ret
+
+
+/*
+ * Handling of special 386 registers and descriptor tables etc
+ */
+/* void lgdt(struct region_descriptor *rdp); */
+ENTRY(lgdt_finish)
+#if 0
+	/* reload the descriptor table */
+	movl	4(%esp),%eax
+	lgdt	(%eax)
+#endif
+	/* flush the prefetch q */
+	jmp	1f
+	nop
+1:
+	/* reload "stale" selectors */
+	movl	$KDSEL,%eax
+	movl	%eax,%ds
+	movl	%eax,%es
+	movl	%eax,%gs
+	movl	%eax,%ss
+	movl	$KPSEL,%eax
+	movl	%eax,%fs
+
+	/* reload code selector by turning return into intersegmental return */
+	movl	(%esp),%eax
+	pushl	%eax
+	movl	$KCSEL,4(%esp)
+	lret
+
+/* ssdtosd(*ssdp,*sdp) */
+ENTRY(ssdtosd)
+	pushl	%ebx
+	movl	8(%esp),%ecx
+	movl	8(%ecx),%ebx
+	shll	$16,%ebx
+	movl	(%ecx),%edx
+	roll	$16,%edx
+	movb	%dh,%bl
+	movb	%dl,%bh
+	rorl	$8,%ebx
+	movl	4(%ecx),%eax
+	movw	%ax,%dx
+	andl	$0xf0000,%eax
+	orl	%eax,%ebx
+	movl	12(%esp),%ecx
+	movl	%edx,(%ecx)
+	movl	%ebx,4(%ecx)
+	popl	%ebx
+	ret
+
+/* void reset_dbregs() */
+ENTRY(reset_dbregs)
+	movl    $0,%eax
+	movl    %eax,%dr7     /* disable all breapoints first */
+	movl    %eax,%dr0
+	movl    %eax,%dr1
+	movl    %eax,%dr2
+	movl    %eax,%dr3
+	movl    %eax,%dr6
+	ret
+
+/*****************************************************************************/
+/* setjump, longjump                                                         */
+/*****************************************************************************/
+
+ENTRY(setjmp)
+	movl	4(%esp),%eax
+	movl	%ebx,(%eax)			/* save ebx */
+	movl	%esp,4(%eax)			/* save esp */
+	movl	%ebp,8(%eax)			/* save ebp */
+	movl	%esi,12(%eax)			/* save esi */
+	movl	%edi,16(%eax)			/* save edi */
+	movl	(%esp),%edx			/* get rta */
+	movl	%edx,20(%eax)			/* save eip */
+	xorl	%eax,%eax			/* return(0); */
+	ret
+
+ENTRY(longjmp)
+	movl	4(%esp),%eax
+	movl	(%eax),%ebx			/* restore ebx */
+	movl	4(%eax),%esp			/* restore esp */
+	movl	8(%eax),%ebp			/* restore ebp */
+	movl	12(%eax),%esi			/* restore esi */
+	movl	16(%eax),%edi			/* restore edi */
+	movl	20(%eax),%edx			/* get rta */
+	movl	%edx,(%esp)			/* put in return frame */
+	xorl	%eax,%eax			/* return(1); */
+	incl	%eax
+	ret
+
+/*
+ * Support for BB-profiling (gcc -a).  The kernbb program will extract
+ * the data from the kernel.
+ */
+
+	.data
+	ALIGN_DATA
+	.globl bbhead
+bbhead:
+	.long 0
+
+	.text
+NON_GPROF_ENTRY(__bb_init_func)
+	movl	4(%esp),%eax
+	movl	$1,(%eax)
+	movl	bbhead,%edx
+	movl	%edx,16(%eax)
+	movl	%eax,bbhead
+	NON_GPROF_RET
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/swtch.s b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/swtch.s
new file mode 100644
index 0000000000..f468c429bd
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/swtch.s
@@ -0,0 +1,445 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/i386/swtch.s,v 1.143 2003/09/30 08:11:35 jeff Exp $
+ */
+
+#include "opt_npx.h"
+
+#include <machine/asmacros.h>
+
+#include "assym.s"
+
+		
+/*****************************************************************************/
+/* Scheduling                                                                */
+/*****************************************************************************/
+
+	.text
+
+/*
+ * cpu_throw()
+ *
+ * This is the second half of cpu_swtch(). It is used when the current
+ * thread is either a dummy or slated to die, and we no longer care
+ * about its state.  This is only a slight optimization and is probably
+ * not worth it anymore.  Note that we need to clear the pm_active bits so
+ * we do need the old proc if it still exists.
+ * 0(%esp) = ret
+ * 4(%esp) = oldtd
+ * 8(%esp) = newtd
+ */
+ENTRY(cpu_throw)
+	movl	PCPU(CPUID), %esi
+	movl	4(%esp),%ecx			/* Old thread */
+	testl	%ecx,%ecx			/* no thread? */
+	jz	1f
+	/* release bit from old pm_active */
+	movl	PCPU(CURPMAP), %ebx
+#ifdef SMP
+	lock
+#endif
+	btrl	%esi, PM_ACTIVE(%ebx)		/* clear old */
+1:
+	movl	8(%esp),%ecx			/* New thread */
+	movl	TD_PCB(%ecx),%edx
+	movl	PCB_CR3(%edx),%eax
+
+	movl	%eax,PCPU(CR3)			/* new address space */
+
+	pushl	%ecx
+	pushl	%edx
+	pushl	%esi
+	pushl   %eax
+        call    load_cr3
+	addl    $4,%esp
+	popl	%esi
+	popl	%edx
+	popl	%ecx
+	
+	/* set bit in new pm_active */
+	movl	TD_PROC(%ecx),%eax
+	movl	P_VMSPACE(%eax), %ebx
+	addl	$VM_PMAP, %ebx
+	movl	%ebx, PCPU(CURPMAP)
+#ifdef SMP
+	lock
+#endif
+	btsl	%esi, PM_ACTIVE(%ebx)		/* set new */
+	jmp	sw1
+
+/*
+ * cpu_switch(old, new)
+ *
+ * Save the current thread state, then select the next thread to run
+ * and load its state.
+ * 0(%esp) = ret
+ * 4(%esp) = oldtd
+ * 8(%esp) = newtd
+ */
+ENTRY(cpu_switch)
+
+	/* Switch to new thread.  First, save context. */
+	movl	4(%esp),%ecx
+	
+#ifdef INVARIANTS
+	testl	%ecx,%ecx			/* no thread? */
+	jz	badsw2				/* no, panic */
+#endif
+
+	movl	TD_PCB(%ecx),%edx
+
+	movl	(%esp),%eax			/* Hardware registers */
+	movl	%eax,PCB_EIP(%edx)
+	movl	%ebx,PCB_EBX(%edx)
+	movl	%esp,PCB_ESP(%edx)
+	movl	%ebp,PCB_EBP(%edx)
+	movl	%esi,PCB_ESI(%edx)
+	movl	%edi,PCB_EDI(%edx)
+	movl	%gs,PCB_GS(%edx)
+#if 0
+	pushfl					/* PSL */
+	popl	PCB_PSL(%edx)
+#endif
+	/* Check to see if we need to call a switchout function. */
+	movl	PCB_SWITCHOUT(%edx),%eax
+	cmpl	$0, %eax
+	je	1f
+	call	*%eax
+1:
+	/* Test if debug registers should be saved. */
+	testl	$PCB_DBREGS,PCB_FLAGS(%edx)
+	jz      1f                              /* no, skip over */
+	movl    %dr7,%eax                       /* yes, do the save */
+	movl    %eax,PCB_DR7(%edx)
+	andl    $0x0000fc00, %eax               /* disable all watchpoints */
+	movl    %eax,%dr7
+	movl    %dr6,%eax
+	movl    %eax,PCB_DR6(%edx)
+	movl    %dr3,%eax
+	movl    %eax,PCB_DR3(%edx)
+	movl    %dr2,%eax
+	movl    %eax,PCB_DR2(%edx)
+	movl    %dr1,%eax
+	movl    %eax,PCB_DR1(%edx)
+	movl    %dr0,%eax
+	movl    %eax,PCB_DR0(%edx)
+1:
+
+#ifdef DEV_NPX
+	/* have we used fp, and need a save? */
+	cmpl	%ecx,PCPU(FPCURTHREAD)
+	jne	1f
+	addl	$PCB_SAVEFPU,%edx		/* h/w bugs make saving complicated */
+	pushl	%edx
+	call	npxsave				/* do it in a big C function */
+	popl	%eax
+1:
+#endif
+
+	
+	/* Save is done.  Now fire up new thread. Leave old vmspace. */
+	movl	%ecx,%edi
+	movl	8(%esp),%ecx			/* New thread */
+#ifdef INVARIANTS
+	testl	%ecx,%ecx			/* no thread? */
+	jz	badsw3				/* no, panic */
+#endif
+	movl	TD_PCB(%ecx),%edx
+	movl	PCPU(CPUID), %esi
+
+	/* switch address space */
+	movl	PCB_CR3(%edx),%eax
+	
+	cmpl	%eax,IdlePTD			/* Kernel address space? */
+
+	je	sw1
+	/* XXX optimize later KMM */
+#if 0
+	movl	%cr3,%ebx			/* The same address space? */
+#else
+	movl    PCPU(CR3),%ebx
+#endif		
+	cmpl	%ebx,%eax
+	je	sw1
+
+	movl	%eax,PCPU(CR3)			/* new address space */
+
+	pushl	%edx
+	pushl	%ecx
+  	pushl	%esi
+	pushl   %eax
+        call    load_cr3			/* inform xen of the switch */
+	addl    $4,%esp
+  	popl	%esi
+	popl	%ecx
+	popl	%edx
+	
+	/* Release bit from old pmap->pm_active */
+	movl	PCPU(CURPMAP), %ebx
+
+#ifdef SMP
+	lock
+#endif
+	btrl	%esi, PM_ACTIVE(%ebx)		/* clear old */
+	/* Set bit in new pmap->pm_active */
+	movl	TD_PROC(%ecx),%eax		/* newproc */
+	movl	P_VMSPACE(%eax), %ebx
+	addl	$VM_PMAP, %ebx
+	movl	%ebx, PCPU(CURPMAP)
+#ifdef SMP
+	lock
+#endif
+	btsl	%esi, PM_ACTIVE(%ebx)		/* set new */
+sw1:
+	
+#if 0 
+
+	/* only one task selector under Xen */ 
+	/*
+	 * At this point, we've switched address spaces and are ready
+	 * to load up the rest of the next context.
+	 */
+	cmpl	$0, PCB_EXT(%edx)		/* has pcb extension? */
+	je	1f				/* If not, use the default */
+	btsl	%esi, private_tss		/* mark use of private tss */
+	movl	PCB_EXT(%edx), %edi		/* new tss descriptor */
+	jmp	2f				/* Load it up */
+
+1:	/*
+	 * Use the common default TSS instead of our own.
+	 * Set our stack pointer into the TSS, it's set to just
+	 * below the PCB.  In C, common_tss.tss_esp0 = &pcb - 16;
+	 */
+	leal	-16(%edx), %ebx			/* leave space for vm86 */
+	movl	%ebx, PCPU(COMMON_TSS) + TSS_ESP0
+
+	/*
+	 * Test this CPU's  bit in the bitmap to see if this
+	 * CPU was using a private TSS.
+	 */
+	btrl	%esi, private_tss		/* Already using the common? */
+	jae	3f				/* if so, skip reloading */
+	PCPU_ADDR(COMMON_TSSD, %edi)
+2:
+	/* Move correct tss descriptor into GDT slot, then reload tr. */
+	movl	PCPU(TSS_GDT), %ebx		/* entry in GDT */
+	movl	0(%edi), %eax
+	movl	%eax, 0(%ebx)
+	movl	4(%edi), %eax
+	movl	%eax, 4(%ebx)
+
+	movl	$GPROC0_SEL*8, %esi		/* GSEL(entry, SEL_KPL) */
+	ltr	%si
+#endif /* !XEN */	
+3:
+	/* notify Xen of task switch */
+	pushl	%edx 				/* &pcb is the new stack base */
+	pushl	$KDSEL	 
+	pushl	$HYPERVISOR_STACK_SWITCH
+	call	ni_queue_multicall2 
+	addl	$12,%esp
+	/* XXX handle DOM0 IOPL case here (KMM)		*/
+	/* we currently don't support running FreeBSD	*/
+        /* in DOM0 so we can skip for now		*/ 
+	
+	call	ni_execute_multicall_list
+	
+	/* Restore context. */
+	movl	PCB_EBX(%edx),%ebx
+	movl	PCB_ESP(%edx),%esp
+	movl	PCB_EBP(%edx),%ebp
+	movl	PCB_ESI(%edx),%esi
+	movl	PCB_EDI(%edx),%edi
+	movl	PCB_EIP(%edx),%eax
+	movl	%eax,(%esp)
+#if 0
+	pushl	PCB_PSL(%edx)
+	popfl
+#endif
+	movl	%edx, PCPU(CURPCB)
+	movl	%ecx, PCPU(CURTHREAD)		/* into next thread */
+
+	/*
+	 * Determine the LDT to use and load it if is the default one and
+	 * that is not the current one.
+	 */
+	movl	TD_PROC(%ecx),%eax
+	cmpl    $0,P_MD+MD_LDT(%eax)
+	jnz	1f
+	movl	_default_ldt,%eax
+	cmpl	PCPU(CURRENTLDT),%eax
+	je	2f
+	pushl   %edx
+	pushl	%eax
+	xorl	%eax,%eax
+	movl	%eax,%gs	
+	call	i386_reset_ldt
+	popl	%eax
+	popl	%edx
+	
+	movl	%eax,PCPU(CURRENTLDT)
+	jmp	2f
+1:
+	/* Load the LDT when it is not the default one. */
+	pushl	%edx				/* Preserve pointer to pcb. */
+	addl	$P_MD,%eax			/* Pointer to mdproc is arg. */
+	pushl	%eax
+	call	set_user_ldt
+	addl	$4,%esp
+	popl	%edx
+2:
+	/* This must be done after loading the user LDT. */
+	.globl	cpu_switch_load_gs
+cpu_switch_load_gs:
+	movl	PCB_GS(%edx),%gs
+
+	/* XXX evidently setting debug registers needs to be
+	 * routed through Xen - this appears to work - so I
+	 * am leaving it as it is for now - (KMM)
+	 */
+		
+	/* Test if debug registers should be restored. */
+	testl	$PCB_DBREGS,PCB_FLAGS(%edx)
+	jz      1f
+
+	/*
+	 * Restore debug registers.  The special code for dr7 is to
+	 * preserve the current values of its reserved bits.
+	 */
+	movl    PCB_DR6(%edx),%eax
+	movl    %eax,%dr6
+	movl    PCB_DR3(%edx),%eax
+	movl    %eax,%dr3
+	movl    PCB_DR2(%edx),%eax
+	movl    %eax,%dr2
+	movl    PCB_DR1(%edx),%eax
+	movl    %eax,%dr1
+	movl    PCB_DR0(%edx),%eax
+	movl    %eax,%dr0
+	movl	%dr7,%eax
+	andl    $0x0000fc00,%eax
+	movl    PCB_DR7(%edx),%ecx
+	andl	$~0x0000fc00,%ecx
+	orl     %ecx,%eax
+	movl    %eax,%dr7
+1:
+	ret
+
+#ifdef INVARIANTS
+badsw1:
+	pushal
+	pushl	$sw0_1
+	call	panic
+sw0_1:	.asciz	"cpu_throw: no newthread supplied"
+
+badsw2:
+	pushal
+	pushl	$sw0_2
+	call	panic
+sw0_2:	.asciz	"cpu_switch: no curthread supplied"
+
+badsw3:
+	pushal
+	pushl	$sw0_3
+	call	panic
+sw0_3:	.asciz	"cpu_switch: no newthread supplied"
+#endif
+
+/*
+ * savectx(pcb)
+ * Update pcb, saving current processor state.
+ */
+ENTRY(savectx)
+	/* Fetch PCB. */
+	movl	4(%esp),%ecx
+
+	/* Save caller's return address.  Child won't execute this routine. */
+	movl	(%esp),%eax
+	movl	%eax,PCB_EIP(%ecx)
+
+#if 0 
+	movl	%cr3,%eax
+#else
+	movl    PCPU(CR3),%eax
+#endif		
+	movl	%eax,PCB_CR3(%ecx)
+
+	movl	%ebx,PCB_EBX(%ecx)
+	movl	%esp,PCB_ESP(%ecx)
+	movl	%ebp,PCB_EBP(%ecx)
+	movl	%esi,PCB_ESI(%ecx)
+	movl	%edi,PCB_EDI(%ecx)
+	movl	%gs,PCB_GS(%ecx)
+#if 0
+	pushfl
+	popl	PCB_PSL(%ecx)
+#endif
+#ifdef DEV_NPX
+	/*
+	 * If fpcurthread == NULL, then the npx h/w state is irrelevant and the
+	 * state had better already be in the pcb.  This is true for forks
+	 * but not for dumps (the old book-keeping with FP flags in the pcb
+	 * always lost for dumps because the dump pcb has 0 flags).
+	 *
+	 * If fpcurthread != NULL, then we have to save the npx h/w state to
+	 * fpcurthread's pcb and copy it to the requested pcb, or save to the
+	 * requested pcb and reload.  Copying is easier because we would
+	 * have to handle h/w bugs for reloading.  We used to lose the
+	 * parent's npx state for forks by forgetting to reload.
+	 */
+	pushfl
+	call	ni_cli
+	movl	PCPU(FPCURTHREAD),%eax
+	testl	%eax,%eax
+	je	1f
+
+	pushl	%ecx
+	movl	TD_PCB(%eax),%eax
+	leal	PCB_SAVEFPU(%eax),%eax
+	pushl	%eax
+	pushl	%eax
+	call	npxsave
+	addl	$4,%esp
+	popl	%eax
+	popl	%ecx
+
+	pushl	$PCB_SAVEFPU_SIZE
+	leal	PCB_SAVEFPU(%ecx),%ecx
+	pushl	%ecx
+	pushl	%eax
+	call	bcopy
+	addl	$12,%esp
+1:
+	popfl
+#endif	/* DEV_NPX */
+
+	ret
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/symbols.raw b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/symbols.raw
new file mode 100644
index 0000000000..014c6442ad
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/symbols.raw
@@ -0,0 +1,75 @@
+#	@(#)symbols.raw	7.6 (Berkeley) 5/8/91
+#
+# $FreeBSD: src/sys/i386/i386/symbols.raw,v 1.15 1999/08/28 00:43:51 peter Exp $
+#
+
+
+#gdb
+	_IdlePTD
+	_PTD
+	_panicstr
+	_atdevbase
+#	_version
+#dmesg
+	_msgbufp
+#	_msgbuf
+#iostat
+	_tk_nin
+	_tk_nout
+	_cp_time
+#	_io_info
+#ps
+	_nswap
+	_maxslp
+	_ccpu
+	_fscale
+	_avail_start
+	_avail_end
+#pstat
+#	_cons
+	_nswap
+	_swapblist
+#	_swaplist
+#vmstat
+	_cp_time
+#	_rate
+#	_total
+#	_sum
+#	_rectime
+#	_pgintime
+	_boottime
+#w
+	_swapdev
+	_nswap
+	_averunnable
+	_boottime
+#netstat
+	_mbstat
+	_ipstat
+	_tcb
+	_tcpstat
+	_udb
+	_udpstat
+#	_rawcb
+	_ifnet
+#	_rthost
+#	_rtnet
+	_icmpstat
+	_filehead
+	_nfiles
+#	_rthashsize
+#	_radix_node_head
+#routed
+	_ifnet
+#rwho
+	_boottime
+#savecore
+	_dumpdev
+	_dumplo
+	_time_second
+	_version
+	_dumpsize
+	_panicstr
+	_dumpmag
+#deprecated
+#	_avenrun
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/sys_machdep.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/sys_machdep.c
new file mode 100644
index 0000000000..8f85c128ba
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/sys_machdep.c
@@ -0,0 +1,703 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)sys_machdep.c	5.5 (Berkeley) 1/19/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/sys_machdep.c,v 1.91 2003/09/07 05:23:28 davidxu Exp $");
+
+#include "opt_kstack_pages.h"
+#include "opt_mac.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/mac.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/smp.h>
+#include <sys/sysproto.h>
+#include <sys/user.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_extern.h>
+
+#include <machine/cpu.h>
+#include <machine/pcb_ext.h>	/* pcb.h included by sys/user.h */
+#include <machine/proc.h>
+#include <machine/sysarch.h>
+#include <machine/xenfunc.h>
+
+#include <vm/vm_kern.h>		/* for kernel_map */
+
+#define MAX_LD 8192
+#define LD_PER_PAGE 512
+#define NEW_MAX_LD(num)  ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1))
+#define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3)
+
+void i386_reset_ldt(struct proc_ldt *pldt);
+
+static int i386_get_ldt(struct thread *, char *);
+static int i386_set_ldt(struct thread *, char *);
+static int i386_set_ldt_data(struct thread *, int start, int num,
+	union descriptor *descs);
+static int i386_ldt_grow(struct thread *td, int len);
+static int i386_get_ioperm(struct thread *, char *);
+static int i386_set_ioperm(struct thread *, char *);
+#ifdef SMP
+static void set_user_ldt_rv(struct thread *);
+#endif
+
+#ifndef _SYS_SYSPROTO_H_
+struct sysarch_args {
+	int op;
+	char *parms;
+};
+#endif
+
+int
+sysarch(td, uap)
+	struct thread *td;
+	register struct sysarch_args *uap;
+{
+	int error;
+
+	mtx_lock(&Giant);
+	switch(uap->op) {
+	case I386_GET_LDT:
+		error = i386_get_ldt(td, uap->parms);
+		break;
+
+	case I386_SET_LDT:
+		error = i386_set_ldt(td, uap->parms);
+		break;
+	case I386_GET_IOPERM:
+		error = i386_get_ioperm(td, uap->parms);
+		break;
+	case I386_SET_IOPERM:
+		error = i386_set_ioperm(td, uap->parms);
+		break;
+#if 0
+	case I386_VM86:
+		error = vm86_sysarch(td, uap->parms);
+		break;
+#endif
+	default:
+		error = EINVAL;
+		break;
+	}
+	mtx_unlock(&Giant);
+	return (error);
+}
+
+int
+i386_extend_pcb(struct thread *td)
+{
+	int i, offset;
+	u_long *addr;
+	struct pcb_ext *ext;
+	struct soft_segment_descriptor ssd = {
+		0,			/* segment base address (overwritten) */
+		ctob(IOPAGES + 1) - 1,	/* length */
+		SDT_SYS386TSS,		/* segment type */
+		0,			/* priority level */
+		1,			/* descriptor present */
+		0, 0,
+		0,			/* default 32 size */
+		0			/* granularity */
+	};
+
+	if (td->td_proc->p_flag & P_SA)
+		return (EINVAL);		/* XXXKSE */
+/* XXXKSE  All the code below only works in 1:1   needs changing */
+	ext = (struct pcb_ext *)kmem_alloc(kernel_map, ctob(IOPAGES+1));
+	if (ext == 0)
+		return (ENOMEM);
+	bzero(ext, sizeof(struct pcb_ext)); 
+	/* -16 is so we can convert a trapframe into vm86trapframe inplace */
+	ext->ext_tss.tss_esp0 = td->td_kstack + ctob(KSTACK_PAGES) -
+	    sizeof(struct pcb) - 16;
+	ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
+	/*
+	 * The last byte of the i/o map must be followed by an 0xff byte.
+	 * We arbitrarily allocate 16 bytes here, to keep the starting
+	 * address on a doubleword boundary.
+	 */
+	offset = PAGE_SIZE - 16;
+	ext->ext_tss.tss_ioopt = 
+	    (offset - ((unsigned)&ext->ext_tss - (unsigned)ext)) << 16;
+	ext->ext_iomap = (caddr_t)ext + offset;
+	ext->ext_vm86.vm86_intmap = (caddr_t)ext + offset - 32;
+
+	addr = (u_long *)ext->ext_vm86.vm86_intmap;
+	for (i = 0; i < (ctob(IOPAGES) + 32 + 16) / sizeof(u_long); i++)
+		*addr++ = ~0;
+
+	ssd.ssd_base = (unsigned)&ext->ext_tss;
+	ssd.ssd_limit -= ((unsigned)&ext->ext_tss - (unsigned)ext);
+	ssdtosd(&ssd, &ext->ext_tssd);
+
+	KASSERT(td->td_proc == curthread->td_proc, ("giving TSS to !curproc"));
+	KASSERT(td->td_pcb->pcb_ext == 0, ("already have a TSS!"));
+	mtx_lock_spin(&sched_lock);
+	td->td_pcb->pcb_ext = ext;
+	
+	/* switch to the new TSS after syscall completes */
+	td->td_flags |= TDF_NEEDRESCHED;
+	mtx_unlock_spin(&sched_lock);
+
+	return 0;
+}
+
+static int
+i386_set_ioperm(td, args)
+	struct thread *td;
+	char *args;
+{
+	int i, error;
+	struct i386_ioperm_args ua;
+	char *iomap;
+
+	if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0)
+		return (error);
+
+#ifdef MAC
+	if ((error = mac_check_sysarch_ioperm(td->td_ucred)) != 0)
+		return (error);
+#endif
+	if ((error = suser(td)) != 0)
+		return (error);
+	if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
+		return (error);
+	/*
+	 * XXX 
+	 * While this is restricted to root, we should probably figure out
+	 * whether any other driver is using this i/o address, as so not to
+	 * cause confusion.  This probably requires a global 'usage registry'.
+	 */
+
+	if (td->td_pcb->pcb_ext == 0)
+		if ((error = i386_extend_pcb(td)) != 0)
+			return (error);
+	iomap = (char *)td->td_pcb->pcb_ext->ext_iomap;
+
+	if (ua.start + ua.length > IOPAGES * PAGE_SIZE * NBBY)
+		return (EINVAL);
+
+	for (i = ua.start; i < ua.start + ua.length; i++) {
+		if (ua.enable) 
+			iomap[i >> 3] &= ~(1 << (i & 7));
+		else
+			iomap[i >> 3] |= (1 << (i & 7));
+	}
+	return (error);
+}
+
+static int
+i386_get_ioperm(td, args)
+	struct thread *td;
+	char *args;
+{
+	int i, state, error;
+	struct i386_ioperm_args ua;
+	char *iomap;
+
+	if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0)
+		return (error);
+	if (ua.start >= IOPAGES * PAGE_SIZE * NBBY)
+		return (EINVAL);
+
+	if (td->td_pcb->pcb_ext == 0) {
+		ua.length = 0;
+		goto done;
+	}
+
+	iomap = (char *)td->td_pcb->pcb_ext->ext_iomap;
+
+	i = ua.start;
+	state = (iomap[i >> 3] >> (i & 7)) & 1;
+	ua.enable = !state;
+	ua.length = 1;
+
+	for (i = ua.start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) {
+		if (state != ((iomap[i >> 3] >> (i & 7)) & 1))
+			break;
+		ua.length++;
+	}
+			
+done:
+	error = copyout(&ua, args, sizeof(struct i386_ioperm_args));
+	return (error);
+}
+
+/*
+ * Update the GDT entry pointing to the LDT to point to the LDT of the
+ * current process.
+ *
+ * This must be called with sched_lock held.  Unfortunately, we can't use a
+ * mtx_assert() here because cpu_switch() calls this function after changing
+ * curproc but before sched_lock's owner is updated in mi_switch().
+ */   
+void
+set_user_ldt(struct mdproc *mdp)
+{
+	struct proc_ldt *pldt;
+	pldt = mdp->md_ldt;
+	i386_reset_ldt(pldt);
+	PCPU_SET(currentldt, (int)pldt);
+
+}
+
+#ifdef SMP
+static void
+set_user_ldt_rv(struct thread *td)
+{
+
+	if (td->td_proc != curthread->td_proc)
+		return;
+
+	set_user_ldt(&td->td_proc->p_md);
+}
+#endif
+
+/*
+ * Must be called with either sched_lock free or held but not recursed.
+ * If it does not return NULL, it will return with it owned.
+ */
+struct proc_ldt *
+user_ldt_alloc(struct mdproc *mdp, int len)
+{
+	struct proc_ldt *pldt,*new_ldt;
+
+
+	if (mtx_owned(&sched_lock))
+		mtx_unlock_spin(&sched_lock);
+	mtx_assert(&sched_lock, MA_NOTOWNED);
+	MALLOC(new_ldt, struct proc_ldt *, sizeof(struct proc_ldt),
+		M_SUBPROC, M_WAITOK);
+
+	new_ldt->ldt_len = len = NEW_MAX_LD(len);
+	new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map,
+		round_page(len * sizeof(union descriptor)));
+	if (new_ldt->ldt_base == NULL) {
+		FREE(new_ldt, M_SUBPROC);
+		return NULL;
+	}
+	new_ldt->ldt_refcnt = 1;
+	new_ldt->ldt_active = 0;
+
+	mtx_lock_spin(&sched_lock);
+
+	if ((pldt = mdp->md_ldt)) {
+		if (len > pldt->ldt_len)
+			len = pldt->ldt_len;
+		bcopy(pldt->ldt_base, new_ldt->ldt_base,
+		    len * sizeof(union descriptor));
+	} else {
+		bcopy(ldt, new_ldt->ldt_base, PAGE_SIZE);
+	}
+	pmap_map_readonly(kernel_pmap, (vm_offset_t)new_ldt->ldt_base, 
+			  new_ldt->ldt_len*sizeof(union descriptor));
+	return new_ldt;
+}
+
+/*
+ * Must be called either with sched_lock free or held but not recursed.
+ * If md_ldt is not NULL, it will return with sched_lock released.
+ */
+void
+user_ldt_free(struct thread *td)
+{
+	struct mdproc *mdp = &td->td_proc->p_md;
+	struct proc_ldt *pldt = mdp->md_ldt;
+	if (pldt == NULL)
+		return;
+	
+	if (!mtx_owned(&sched_lock))
+		mtx_lock_spin(&sched_lock);
+	mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
+	if (td == PCPU_GET(curthread)) {
+		PCPU_SET(currentldt, _default_ldt);
+		i386_reset_ldt((struct proc_ldt *)_default_ldt);
+	}
+
+	mdp->md_ldt = NULL;
+	if (--pldt->ldt_refcnt == 0) {
+		mtx_unlock_spin(&sched_lock);
+
+		pmap_map_readwrite(kernel_pmap,(vm_offset_t) pldt->ldt_base, 
+				   pldt->ldt_len*sizeof(union descriptor));
+		kmem_free(kernel_map, (vm_offset_t)pldt->ldt_base,
+			pldt->ldt_len * sizeof(union descriptor));
+		FREE(pldt, M_SUBPROC);
+	} else
+		mtx_unlock_spin(&sched_lock);
+}
+
+void
+i386_reset_ldt(struct proc_ldt *pldt)
+{
+	xen_set_ldt((vm_offset_t)pldt->ldt_base, pldt->ldt_len);
+}
+
+static int
+i386_get_ldt(td, args)
+	struct thread *td;
+	char *args;
+{
+	int error = 0;
+	struct proc_ldt *pldt = td->td_proc->p_md.md_ldt;
+	int nldt, num;
+	union descriptor *lp;
+	struct i386_ldt_args ua, *uap = &ua;
+
+	if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0)
+		return(error);
+
+#ifdef	DEBUG
+	printf("i386_get_ldt: start=%d num=%d descs=%p\n",
+	    uap->start, uap->num, (void *)uap->descs);
+#endif
+
+	/* verify range of LDTs exist */
+	if ((uap->start < 0) || (uap->num <= 0))
+		return(EINVAL);
+
+	if (pldt) {
+		nldt = pldt->ldt_len;
+		num = min(uap->num, nldt);
+		lp = &((union descriptor *)(pldt->ldt_base))[uap->start];
+	} else {
+		nldt = sizeof(ldt)/sizeof(ldt[0]);
+		num = min(uap->num, nldt);
+		lp = &ldt[uap->start];
+	}
+	if (uap->start + num > nldt)
+		return(EINVAL);
+
+	error = copyout(lp, uap->descs, num * sizeof(union descriptor));
+	if (!error)
+		td->td_retval[0] = num;
+
+	return(error);
+}
+
+static int ldt_warnings;
+#define NUM_LDT_WARNINGS 10
+
+static int
+i386_set_ldt(struct thread *td,	char *args)
+{
+	int error = 0, i;
+	int largest_ld;
+	struct mdproc *mdp = &td->td_proc->p_md;
+	struct proc_ldt *pldt = 0;
+	struct i386_ldt_args ua, *uap = &ua;
+	union descriptor *descs, *dp;
+	int descs_size;
+
+	if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0)
+		return(error);
+#ifdef DEBUG
+	printf("i386_set_ldt: start=%d num=%d descs=%p\n",
+	       uap->start, uap->num, (void *)uap->descs);
+	
+#endif
+
+	if (uap->descs == NULL) {
+		/* Free descriptors */
+		if (uap->start == 0 && uap->num == 0) {
+			/*
+			 * Treat this as a special case, so userland needn't
+			 * know magic number NLDT.
+		 	 */
+			uap->start = NLDT;
+			uap->num = MAX_LD - NLDT;
+		}
+		if (uap->start <= LUDATA_SEL || uap->num <= 0)
+			return (EINVAL);
+		mtx_lock_spin(&sched_lock);
+		pldt = mdp->md_ldt;
+		if (pldt == NULL || uap->start >= pldt->ldt_len) {
+			mtx_unlock_spin(&sched_lock);
+			return (0);
+		}
+		largest_ld = uap->start + uap->num;
+		if (largest_ld > pldt->ldt_len)
+			largest_ld = pldt->ldt_len;
+		i = largest_ld - uap->start;
+		bzero(&((union descriptor *)(pldt->ldt_base))[uap->start],
+		    sizeof(union descriptor) * i);
+		mtx_unlock_spin(&sched_lock);
+		return (0);
+	}
+
+	if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) {
+		/* complain a for a while if using old methods */
+		if (ldt_warnings++ < NUM_LDT_WARNINGS) {
+			printf("Warning: pid %d used static ldt allocation.\n",
+			    td->td_proc->p_pid);
+			printf("See the i386_set_ldt man page for more info\n");
+		}
+		/* verify range of descriptors to modify */
+		largest_ld = uap->start + uap->num;
+		if (uap->start >= MAX_LD ||
+		    uap->num < 0 || largest_ld > MAX_LD) {
+			return (EINVAL);
+		}
+	}
+
+	descs_size = uap->num * sizeof(union descriptor);
+	descs = (union descriptor *)kmem_alloc(kernel_map, descs_size);
+	if (descs == NULL)
+		return (ENOMEM);
+	error = copyin(uap->descs, descs, descs_size);
+	if (error) {
+		kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
+		return (error);
+	}
+
+	/* Check descriptors for access violations */
+	for (i = 0; i < uap->num; i++) {
+		dp = &descs[i];
+
+		switch (dp->sd.sd_type) {
+		case SDT_SYSNULL:	/* system null */ 
+			dp->sd.sd_p = 0;
+			break;
+		case SDT_SYS286TSS: /* system 286 TSS available */
+		case SDT_SYSLDT:    /* system local descriptor table */
+		case SDT_SYS286BSY: /* system 286 TSS busy */
+		case SDT_SYSTASKGT: /* system task gate */
+		case SDT_SYS286IGT: /* system 286 interrupt gate */
+		case SDT_SYS286TGT: /* system 286 trap gate */
+		case SDT_SYSNULL2:  /* undefined by Intel */ 
+		case SDT_SYS386TSS: /* system 386 TSS available */
+		case SDT_SYSNULL3:  /* undefined by Intel */
+		case SDT_SYS386BSY: /* system 386 TSS busy */
+		case SDT_SYSNULL4:  /* undefined by Intel */ 
+		case SDT_SYS386IGT: /* system 386 interrupt gate */
+		case SDT_SYS386TGT: /* system 386 trap gate */
+		case SDT_SYS286CGT: /* system 286 call gate */ 
+		case SDT_SYS386CGT: /* system 386 call gate */
+			/* I can't think of any reason to allow a user proc
+			 * to create a segment of these types.  They are
+			 * for OS use only.
+			 */
+			kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
+			return (EACCES);
+			/*NOTREACHED*/
+
+		/* memory segment types */
+		case SDT_MEMEC:   /* memory execute only conforming */
+		case SDT_MEMEAC:  /* memory execute only accessed conforming */
+		case SDT_MEMERC:  /* memory execute read conforming */
+		case SDT_MEMERAC: /* memory execute read accessed conforming */
+			 /* Must be "present" if executable and conforming. */
+			if (dp->sd.sd_p == 0) {
+				kmem_free(kernel_map, (vm_offset_t)descs,
+				    descs_size);
+				return (EACCES);
+			}
+			break;
+		case SDT_MEMRO:   /* memory read only */
+		case SDT_MEMROA:  /* memory read only accessed */
+		case SDT_MEMRW:   /* memory read write */
+		case SDT_MEMRWA:  /* memory read write accessed */
+		case SDT_MEMROD:  /* memory read only expand dwn limit */
+		case SDT_MEMRODA: /* memory read only expand dwn lim accessed */
+		case SDT_MEMRWD:  /* memory read write expand dwn limit */  
+		case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */
+		case SDT_MEME:    /* memory execute only */ 
+		case SDT_MEMEA:   /* memory execute only accessed */
+		case SDT_MEMER:   /* memory execute read */
+		case SDT_MEMERA:  /* memory execute read accessed */
+			break;
+		default:
+			kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
+			return(EINVAL);
+			/*NOTREACHED*/
+		}
+
+		/* Only user (ring-3) descriptors may be present. */
+		if ((dp->sd.sd_p != 0) && (dp->sd.sd_dpl != SEL_UPL)) {
+			kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
+			return (EACCES);
+		}
+	}
+
+	if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) {
+		/* Allocate a free slot */
+		pldt = mdp->md_ldt;
+		if (pldt == NULL) {
+			load_gs(0);
+			error = i386_ldt_grow(td, NLDT+1);
+			if (error) {
+				kmem_free(kernel_map, (vm_offset_t)descs,
+				    descs_size);
+				return (error);
+			}
+			pldt = mdp->md_ldt;
+		}
+again:
+		mtx_lock_spin(&sched_lock);
+		/*
+		 * start scanning a bit up to leave room for NVidia and
+		 * Wine, which still user the "Blat" method of allocation.
+		 */
+		dp = &((union descriptor *)(pldt->ldt_base))[NLDT];
+		for (i = NLDT; i < pldt->ldt_len; ++i) {
+			if (dp->sd.sd_type == SDT_SYSNULL)
+				break;
+			dp++;
+		}
+		if (i >= pldt->ldt_len) {
+			mtx_unlock_spin(&sched_lock);
+			error = i386_ldt_grow(td, pldt->ldt_len+1);
+			if (error) {
+				kmem_free(kernel_map, (vm_offset_t)descs,
+				    descs_size);
+				return (error);
+			}
+			goto again;
+		}
+		uap->start = i;
+		error = i386_set_ldt_data(td, i, 1, descs);
+		mtx_unlock_spin(&sched_lock);
+	} else {
+		largest_ld = uap->start + uap->num;
+		error = i386_ldt_grow(td, largest_ld);
+		if (error == 0) {
+			mtx_lock_spin(&sched_lock);
+			error = i386_set_ldt_data(td, uap->start, uap->num,
+			    descs);
+			mtx_unlock_spin(&sched_lock);
+		}
+	}
+	kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
+	if (error == 0)
+		td->td_retval[0] = uap->start;
+	return (error);
+}
+typedef struct uint64_lohi {
+	unsigned long lo;
+	unsigned long hi;
+} uint64_lohi;
+
+static int
+i386_set_ldt_data(struct thread *td, int start, int num,
+	union descriptor *descs)
+{
+	struct mdproc *mdp = &td->td_proc->p_md;
+	struct proc_ldt *pldt = mdp->md_ldt;
+	int i, error;
+
+	mtx_assert(&sched_lock, MA_OWNED);
+
+	/* Fill in range */
+	for (i = 0; i < num; i++) {
+		error = HYPERVISOR_update_descriptor(vtomach(&((union descriptor *)(pldt->ldt_base))[start + i]), ((uint64_lohi *)descs)[i].lo, ((uint64_lohi *)descs)[i].hi);
+		if (error)
+			panic("failed to update ldt: %d", error);
+	}
+	return (0);
+}
+
+static int
+i386_ldt_grow(struct thread *td, int len) 
+{
+	struct mdproc *mdp = &td->td_proc->p_md;
+	struct proc_ldt *pldt;
+	caddr_t old_ldt_base;
+	int old_ldt_len;
+
+	if (len > MAX_LD)
+		return (ENOMEM);
+	if (len < NLDT+1)
+		len = NLDT+1;
+	pldt = mdp->md_ldt;
+	/* allocate user ldt */
+	if (!pldt || len > pldt->ldt_len) {
+		struct proc_ldt *new_ldt = user_ldt_alloc(mdp, len);
+		if (new_ldt == NULL)
+			return (ENOMEM);
+		pldt = mdp->md_ldt;
+		/* sched_lock was held by user_ldt_alloc */
+		if (pldt) {
+			if (new_ldt->ldt_len > pldt->ldt_len) {
+				old_ldt_base = pldt->ldt_base;
+				old_ldt_len = pldt->ldt_len;
+				pldt->ldt_sd = new_ldt->ldt_sd;
+				pldt->ldt_base = new_ldt->ldt_base;
+				pldt->ldt_len = new_ldt->ldt_len;
+				mtx_unlock_spin(&sched_lock);
+				pmap_map_readwrite(kernel_pmap, 
+				   (vm_offset_t)old_ldt_base,
+				   old_ldt_len * sizeof(union descriptor));
+				kmem_free(kernel_map, (vm_offset_t)old_ldt_base,
+					old_ldt_len * sizeof(union descriptor));
+				FREE(new_ldt, M_SUBPROC);
+				mtx_lock_spin(&sched_lock);
+			} else {
+				/*
+				 * If other threads already did the work,
+				 * do nothing
+				 */
+				mtx_unlock_spin(&sched_lock);
+				pmap_map_readwrite(kernel_pmap, 
+				   (vm_offset_t)new_ldt->ldt_base,
+				   new_ldt->ldt_len * sizeof(union descriptor));
+				kmem_free(kernel_map, 
+				   (vm_offset_t)new_ldt->ldt_base,
+				   new_ldt->ldt_len * sizeof(union descriptor));
+				FREE(new_ldt, M_SUBPROC);
+				return (0);
+			}
+		} else {
+			mdp->md_ldt = pldt = new_ldt;
+		}
+#ifdef SMP
+		mtx_unlock_spin(&sched_lock);
+		/* signal other cpus to reload ldt */
+		smp_rendezvous(NULL, (void (*)(void *))set_user_ldt_rv,
+		    NULL, td);
+#else
+		set_user_ldt(mdp);
+		mtx_unlock_spin(&sched_lock);
+#endif
+	}
+	return (0);
+}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/trap.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/trap.c
new file mode 100644
index 0000000000..a74986ed18
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/trap.c
@@ -0,0 +1,1006 @@
+/*-
+ * Copyright (C) 1994, David Greenman
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the University of Utah, and William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/trap.c,v 1.260 2003/11/03 21:53:37 jhb Exp $");
+
+/*
+ * 386 Trap and System call handling
+ */
+
+#include "opt_clock.h"
+#include "opt_cpu.h"
+#include "opt_isa.h"
+#include "opt_ktrace.h"
+#include "opt_npx.h"
+#include "opt_trap.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/pioctl.h>
+#include <sys/ptrace.h>
+#include <sys/kdb.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/resourcevar.h>
+#include <sys/signalvar.h>
+#include <sys/syscall.h>
+#include <sys/sysctl.h>
+#include <sys/sysent.h>
+#include <sys/uio.h>
+#include <sys/vmmeter.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+#include <vm/vm_extern.h>
+
+#include <machine/cpu.h>
+#include <machine/intr_machdep.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#ifdef SMP
+#include <machine/smp.h>
+#endif
+#include <machine/tss.h>
+#ifdef POWERFAIL_NMI
+#include <sys/syslog.h>
+#include <machine/clock.h>
+#endif
+
+
+#include <machine/xenfunc.h>
+#include <machine/hypervisor.h>
+#include <machine/xenvar.h>
+#include <machine/hypervisor-ifs.h>
+
+
+extern void trap(struct trapframe frame);
+extern void syscall(struct trapframe frame);
+
+static int trap_pfault(struct trapframe *, int, vm_offset_t);
+static void trap_fatal(struct trapframe *, vm_offset_t);
+void dblfault_handler(void);
+
+extern inthand_t IDTVEC(lcall_syscall);
+
+#define MAX_TRAP_MSG		28
+static char *trap_msg[] = {
+	"",					/*  0 unused */
+	"privileged instruction fault",		/*  1 T_PRIVINFLT */
+	"",					/*  2 unused */
+	"breakpoint instruction fault",		/*  3 T_BPTFLT */
+	"",					/*  4 unused */
+	"",					/*  5 unused */
+	"arithmetic trap",			/*  6 T_ARITHTRAP */
+	"",					/*  7 unused */
+	"",					/*  8 unused */
+	"general protection fault",		/*  9 T_PROTFLT */
+	"trace trap",				/* 10 T_TRCTRAP */
+	"",					/* 11 unused */
+	"page fault",				/* 12 T_PAGEFLT */
+	"",					/* 13 unused */
+	"alignment fault",			/* 14 T_ALIGNFLT */
+	"",					/* 15 unused */
+	"",					/* 16 unused */
+	"hypervisor callback",			/* 17 T_HYPCALLBACK */
+	"integer divide fault",			/* 18 T_DIVIDE */
+	"non-maskable interrupt trap",		/* 19 T_NMI */
+	"overflow trap",			/* 20 T_OFLOW */
+	"FPU bounds check fault",		/* 21 T_BOUND */
+	"FPU device not available",		/* 22 T_DNA */
+	"double fault",				/* 23 T_DOUBLEFLT */
+	"FPU operand fetch fault",		/* 24 T_FPOPFLT */
+	"invalid TSS fault",			/* 25 T_TSSFLT */
+	"segment not present fault",		/* 26 T_SEGNPFLT */
+	"stack fault",				/* 27 T_STKFLT */
+	"machine check trap",			/* 28 T_MCHK */
+};
+
+#if defined(I586_CPU) && !defined(NO_F00F_HACK)
+extern int has_f00f_bug;
+#endif
+
+#ifdef KDB
+static int kdb_on_nmi = 1;
+SYSCTL_INT(_machdep, OID_AUTO, kdb_on_nmi, CTLFLAG_RW,
+	&kdb_on_nmi, 0, "Go to KDB on NMI");
+#endif
+static int panic_on_nmi = 1;
+SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
+	&panic_on_nmi, 0, "Panic on NMI");
+
+#ifdef WITNESS
+extern char *syscallnames[];
+#endif
+
+#ifdef DEVICE_POLLING
+extern u_int32_t poll_in_trap;
+extern int ether_poll(int count);
+#endif /* DEVICE_POLLING */
+
+
+/*
+ * Exception, fault, and trap interface to the FreeBSD kernel.
+ * This common code is called from assembly language IDT gate entry
+ * routines that prepare a suitable stack frame, and restore this
+ * frame after the exception has been processed.
+ */
+
+void
+trap(struct trapframe frame)
+{
+	struct thread *td = curthread;
+	struct proc *p = td->td_proc;
+	u_int sticks = 0;
+	int i = 0, ucode = 0, type, code;
+	vm_offset_t eva;
+#ifdef STACK_DEBUGGING
+	int nesting, current_sp;
+	static int prev_csp = 0, prev_ssp = 0;
+	nesting = PCPU_GET(trap_nesting);
+#endif
+
+#ifdef POWERFAIL_NMI
+	static int lastalert = 0;
+#endif
+
+	atomic_add_int(&cnt.v_trap, 1);
+	type = frame.tf_trapno;
+#ifdef KDB
+	if (kdb_active) {
+		kdb_reenter();
+		goto out;
+	}
+#endif
+	
+	eva = 0;
+	code = frame.tf_err;
+
+	if (type == T_HYPCALLBACK) {
+	    evtchn_do_upcall((struct intrframe *)&frame);
+	    if (ISPL(frame.tf_cs) == SEL_KPL)
+		goto out;
+	    goto userout;
+	} else if (type == 0)
+	    panic("invalid trap type/code %d/%d\n",type, code);
+
+ 
+	if (type == T_PAGEFLT) {
+		/*
+		 * For some Cyrix CPUs, %cr2 is clobbered by
+		 * interrupts.  This problem is worked around by using
+		 * an interrupt gate for the pagefault handler.  We
+		 * are finally ready to read %cr2 and then must
+		 * reenable interrupts.
+		 *
+		 * If we get a page fault while in a critical section, then
+		 * it is most likely a fatal kernel page fault.  The kernel
+		 * is already going to panic trying to get a sleep lock to
+		 * do the VM lookup, so just consider it a fatal trap so the
+		 * kernel can print out a useful trap message and even get
+		 * to the debugger.
+		 */
+	        eva = frame.tf_cr2;
+
+		if (td->td_critnest != 0)			
+		    trap_fatal(&frame, eva);
+	}
+
+#ifdef	DEVICE_POLLING
+	if (poll_in_trap)
+		ether_poll(poll_in_trap);
+#endif	/* DEVICE_POLLING */
+
+        if ((ISPL(frame.tf_cs) == SEL_UPL) 
+	    || ((frame.tf_eflags & PSL_VM) && 
+		!(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL))) {
+		/* user trap */
+
+		sticks = td->td_sticks;
+		td->td_frame = &frame;
+		if (td->td_ucred != p->p_ucred) 
+			cred_update_thread(td);
+
+		switch (type) {
+		case T_PRIVINFLT:	/* privileged instruction fault */
+			ucode = type;
+			i = SIGILL;
+			break;
+
+		case T_BPTFLT:		/* bpt instruction fault */
+		case T_TRCTRAP:		/* trace trap */
+			enable_intr();
+			frame.tf_eflags &= ~PSL_T;
+			i = SIGTRAP;
+			break;
+
+		case T_ARITHTRAP:	/* arithmetic trap */
+#ifdef DEV_NPX
+			ucode = npxtrap();
+			if (ucode == -1)
+				goto userout;
+#else
+			ucode = code;
+#endif
+			i = SIGFPE;
+			break;
+
+		case T_PROTFLT:		/* general protection fault */
+		case T_STKFLT:		/* stack fault */
+		case T_SEGNPFLT:	/* segment not present fault */
+		case T_TSSFLT:		/* invalid TSS fault */
+		case T_DOUBLEFLT:	/* double fault */
+		default:
+			ucode = code + BUS_SEGM_FAULT ;
+			printf("unexpected trap type/code %d/%d\n",type, code); /* XXX temporary */
+
+			i = SIGBUS;
+			break;
+
+		case T_PAGEFLT:		/* page fault */
+			if (td->td_pflags & TDP_SA)
+				thread_user_enter(td);
+
+			i = trap_pfault(&frame, TRUE, eva);
+#if defined(I586_CPU) && !defined(NO_F00F_HACK)
+			if (i == -2) {
+				/*
+				 * The f00f hack workaround has triggered, so
+				 * treat the fault as an illegal instruction 
+				 * (T_PRIVINFLT) instead of a page fault.
+				 */
+				type = frame.tf_trapno = T_PRIVINFLT;
+
+				/* Proceed as in that case. */
+				ucode = type;
+				i = SIGILL;
+				break;
+			}
+#endif
+			if (i == -1)
+				goto userout;
+			if (i == 0)
+				goto user;
+
+			ucode = T_PAGEFLT;
+			break;
+
+		case T_DIVIDE:		/* integer divide fault */
+			ucode = FPE_INTDIV;
+			i = SIGFPE;
+			break;
+
+#ifdef DEV_ISA
+		case T_NMI:
+#ifdef POWERFAIL_NMI
+#ifndef TIMER_FREQ
+#  define TIMER_FREQ 1193182
+#endif
+			mtx_lock(&Giant);
+			if (time_second - lastalert > 10) {
+				log(LOG_WARNING, "NMI: power fail\n");
+				sysbeep(TIMER_FREQ/880, hz);
+				lastalert = time_second;
+			}
+			mtx_unlock(&Giant);
+			goto userout;
+#else /* !POWERFAIL_NMI */
+			/* machine/parity/power fail/"kitchen sink" faults */
+			/* XXX Giant */
+			if (isa_nmi(code) == 0) {
+#ifdef KDB
+				/*
+				 * NMI can be hooked up to a pushbutton
+				 * for debugging.
+				 */
+				if (kdb_on_nmi) {
+					printf ("NMI ... going to debugger\n");
+					kdb_trap (type, 0, &frame);
+				}
+#endif /* KDB */
+				goto userout;
+			} else if (panic_on_nmi)
+				panic("NMI indicates hardware failure");
+			break;
+#endif /* POWERFAIL_NMI */
+#endif /* DEV_ISA */
+
+		case T_OFLOW:		/* integer overflow fault */
+			ucode = FPE_INTOVF;
+			i = SIGFPE;
+			break;
+
+		case T_BOUND:		/* bounds check fault */
+			ucode = FPE_FLTSUB;
+			i = SIGFPE;
+			break;
+
+		case T_DNA:
+#ifdef DEV_NPX
+			/* transparent fault (due to context switch "late") */
+			if (npxdna())
+				goto userout;
+#endif
+			i = SIGFPE;
+			ucode = FPE_FPU_NP_TRAP;
+			break;
+
+		case T_FPOPFLT:		/* FPU operand fetch fault */
+			ucode = T_FPOPFLT;
+			i = SIGILL;
+			break;
+
+		case T_XMMFLT:		/* SIMD floating-point exception */
+			ucode = 0; /* XXX */
+			i = SIGFPE;
+			break;
+		}
+	} else {
+		/* kernel trap */
+
+		KASSERT(cold || td->td_ucred != NULL,
+		    ("kernel trap doesn't have ucred"));
+		switch (type) {
+		case T_PAGEFLT:			/* page fault */
+			(void) trap_pfault(&frame, FALSE, eva);
+			goto out;
+
+		case T_DNA:
+#ifdef DEV_NPX
+			/*
+			 * The kernel is apparently using npx for copying.
+			 * XXX this should be fatal unless the kernel has
+			 * registered such use.
+			 */
+			if (npxdna())
+				goto out;
+#endif
+			break;
+
+			/*
+			 * The following two traps can happen in
+			 * vm86 mode, and, if so, we want to handle
+			 * them specially.
+			 */
+		case T_PROTFLT:		/* general protection fault */
+		case T_STKFLT:		/* stack fault */
+#if 0
+			if (frame.tf_eflags & PSL_VM) {
+				i = vm86_emulate((struct vm86frame *)&frame);
+				if (i != 0)
+					/*
+					 * returns to original process
+					 */
+					vm86_trap((struct vm86frame *)&frame);
+				goto out;
+			}
+#endif
+			if (type == T_STKFLT)
+				break;
+
+			/* FALL THROUGH */
+
+		case T_SEGNPFLT:	/* segment not present fault */
+			if (PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL)
+				break;
+
+			/*
+			 * Invalid %fs's and %gs's can be created using
+			 * procfs or PT_SETREGS or by invalidating the
+			 * underlying LDT entry.  This causes a fault
+			 * in kernel mode when the kernel attempts to
+			 * switch contexts.  Lose the bad context
+			 * (XXX) so that we can continue, and generate
+			 * a signal.
+			 */
+			if (frame.tf_eip == (int)cpu_switch_load_gs) {
+				PCPU_GET(curpcb)->pcb_gs = 0;
+#if 0				
+				PROC_LOCK(p);
+				psignal(p, SIGBUS);
+				PROC_UNLOCK(p);
+#endif				
+				goto out;
+			}
+
+			if (td->td_intr_nesting_level != 0)
+				break;
+
+			/*
+			 * Invalid segment selectors and out of bounds
+			 * %eip's and %esp's can be set up in user mode.
+			 * This causes a fault in kernel mode when the
+			 * kernel tries to return to user mode.  We want
+			 * to get this fault so that we can fix the
+			 * problem here and not have to check all the
+			 * selectors and pointers when the user changes
+			 * them.
+			 */
+			if (frame.tf_eip == (int)doreti_iret) {
+				frame.tf_eip = (int)doreti_iret_fault;
+				goto out;
+			}
+			if (frame.tf_eip == (int)doreti_popl_ds) {
+				frame.tf_eip = (int)doreti_popl_ds_fault;
+				goto out;
+			}
+			if (frame.tf_eip == (int)doreti_popl_es) {
+				frame.tf_eip = (int)doreti_popl_es_fault;
+				goto out;
+			}
+			if (frame.tf_eip == (int)doreti_popl_fs) {
+				frame.tf_eip = (int)doreti_popl_fs_fault;
+				goto out;
+			}
+			if (PCPU_GET(curpcb)->pcb_onfault != NULL) {
+				frame.tf_eip =
+				    (int)PCPU_GET(curpcb)->pcb_onfault;
+				goto out;
+			}
+			break;
+
+		case T_TSSFLT:
+			/*
+			 * PSL_NT can be set in user mode and isn't cleared
+			 * automatically when the kernel is entered.  This
+			 * causes a TSS fault when the kernel attempts to
+			 * `iret' because the TSS link is uninitialized.  We
+			 * want to get this fault so that we can fix the
+			 * problem here and not every time the kernel is
+			 * entered.
+			 */
+			if (frame.tf_eflags & PSL_NT) {
+				frame.tf_eflags &= ~PSL_NT;
+				goto out;
+			}
+			break;
+
+		case T_TRCTRAP:	 /* trace trap */
+			if (frame.tf_eip == (int)IDTVEC(lcall_syscall)) {
+				/*
+				 * We've just entered system mode via the
+				 * syscall lcall.  Continue single stepping
+				 * silently until the syscall handler has
+				 * saved the flags.
+				 */
+				goto out;
+			}
+			if (frame.tf_eip == (int)IDTVEC(lcall_syscall) + 1) {
+				/*
+				 * The syscall handler has now saved the
+				 * flags.  Stop single stepping it.
+				 */
+				frame.tf_eflags &= ~PSL_T;
+				goto out;
+			}
+			/*
+			 * Ignore debug register trace traps due to
+			 * accesses in the user's address space, which
+			 * can happen under several conditions such as
+			 * if a user sets a watchpoint on a buffer and
+			 * then passes that buffer to a system call.
+			 * We still want to get TRCTRAPS for addresses
+			 * in kernel space because that is useful when
+			 * debugging the kernel.
+			 */
+			/* XXX Giant */
+			if (user_dbreg_trap() && 
+			   !(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL)) {
+				/*
+				 * Reset breakpoint bits because the
+				 * processor doesn't
+				 */
+				load_dr6(rdr6() & 0xfffffff0);
+				goto out;
+			}
+			/*
+			 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
+			 */
+		case T_BPTFLT:
+			/*
+			 * If KDB is enabled, let it handle the debugger trap.
+			 * Otherwise, debugger traps "can't happen".
+			 */
+#ifdef KDB
+			/* XXX Giant */
+			if (kdb_trap (type, 0, &frame))
+				goto out;
+#endif
+			break;
+
+#ifdef DEV_ISA
+		case T_NMI:
+#ifdef POWERFAIL_NMI
+			mtx_lock(&Giant);
+			if (time_second - lastalert > 10) {
+				log(LOG_WARNING, "NMI: power fail\n");
+				sysbeep(TIMER_FREQ/880, hz);
+				lastalert = time_second;
+			}
+			mtx_unlock(&Giant);
+			goto out;
+#else /* !POWERFAIL_NMI */
+			/* XXX Giant */
+			/* machine/parity/power fail/"kitchen sink" faults */
+			if (isa_nmi(code) == 0) {
+#ifdef KDB
+				/*
+				 * NMI can be hooked up to a pushbutton
+				 * for debugging.
+				 */
+				if (kdb_on_nmi) {
+					printf ("NMI ... going to debugger\n");
+					kdb_trap (type, 0, &frame);
+				}
+#endif /* KDB */
+				goto out;
+			} else if (panic_on_nmi == 0)
+				goto out;
+			/* FALLTHROUGH */
+#endif /* POWERFAIL_NMI */
+#endif /* DEV_ISA */
+		}
+
+		trap_fatal(&frame, eva);
+		goto out;
+	}
+
+	/* Translate fault for emulators (e.g. Linux) */
+	if (*p->p_sysent->sv_transtrap)
+		i = (*p->p_sysent->sv_transtrap)(i, type);
+
+	trapsignal(td, i, ucode);
+
+#if 1 /* DEBUG */ 
+	if (type <= MAX_TRAP_MSG) {
+		uprintf("fatal process exception: %s",
+			trap_msg[type]);
+		if ((type == T_PAGEFLT) || (type == T_PROTFLT))
+			uprintf(", fault VA = 0x%lx", (u_long)eva);
+		uprintf("\n");
+	}
+#endif
+
+user:
+	userret(td, &frame, sticks);
+	mtx_assert(&Giant, MA_NOTOWNED);
+userout:
+out:
+#ifdef STACK_DEBUGGING 
+	PCPU_SET(trap_nesting, nesting);
+#endif
+	return;
+}
+
+static int
+trap_pfault(frame, usermode, eva)
+	struct trapframe *frame;
+	int usermode;
+	vm_offset_t eva;
+{
+	vm_offset_t va;
+	struct vmspace *vm = NULL;
+	vm_map_t map = 0;
+	int rv = 0;
+	vm_prot_t ftype;
+	struct thread *td = curthread;
+	struct proc *p = td->td_proc;
+
+	va = trunc_page(eva);
+	if (va >= KERNBASE) {
+		/*
+		 * Don't allow user-mode faults in kernel address space.
+		 * An exception:  if the faulting address is the invalid
+		 * instruction entry in the IDT, then the Intel Pentium
+		 * F00F bug workaround was triggered, and we need to
+		 * treat it is as an illegal instruction, and not a page
+		 * fault.
+		 */
+#if defined(I586_CPU) && !defined(NO_F00F_HACK)
+		if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
+			return -2;
+#endif
+		if (usermode)
+			goto nogo;
+
+		map = kernel_map;
+	} else {
+		/*
+		 * This is a fault on non-kernel virtual memory.
+		 * vm is initialized above to NULL. If curproc is NULL
+		 * or curproc->p_vmspace is NULL the fault is fatal.
+		 */
+		if (p != NULL)
+			vm = p->p_vmspace;
+
+		if (vm == NULL)
+			goto nogo;
+
+		map = &vm->vm_map;
+	}
+
+	if (frame->tf_err & PGEX_W)
+		ftype = VM_PROT_WRITE;
+	else
+		ftype = VM_PROT_READ;
+
+	if (map != kernel_map) {
+		/*
+		 * Keep swapout from messing with us during this
+		 *	critical time.
+		 */
+		PROC_LOCK(p);
+		++p->p_lock;
+		PROC_UNLOCK(p);
+
+		/* Fault in the user page: */
+		rv = vm_fault(map, va, ftype,
+			      (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY
+						      : VM_FAULT_NORMAL);
+
+		PROC_LOCK(p);
+		--p->p_lock;
+		PROC_UNLOCK(p);
+	} else {
+		/*
+		 * Don't have to worry about process locking or stacks in the
+		 * kernel.
+		 */
+		rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
+	}
+	if (rv == KERN_SUCCESS)
+		return (0);
+nogo:
+	if (!usermode) {
+		if (td->td_intr_nesting_level == 0 &&
+		    PCPU_GET(curpcb)->pcb_onfault != NULL) {
+			frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault;
+			return (0);
+		}
+		trap_fatal(frame, eva);
+		return (-1);
+	}
+
+	/* kludge to pass faulting virtual address to sendsig */
+	frame->tf_err = eva;
+
+	return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
+}
+
+static void
+trap_fatal(struct trapframe *frame, vm_offset_t eva)
+{
+	int code, type, ss, esp;
+	struct soft_segment_descriptor softseg;
+
+	code = frame->tf_err;
+	type = frame->tf_trapno;
+#if 0
+	XENPRINTF("trying to read gdt\n");
+	sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg);
+	XENPRINTF("read gdt\n");
+#endif
+	if (type <= MAX_TRAP_MSG)
+		printf("\n\nFatal trap %d: %s while in %s mode\n",
+			type, trap_msg[type],
+        		frame->tf_eflags & PSL_VM ? "vm86" :
+			ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
+#ifdef SMP
+	/* two separate prints in case of a trap on an unmapped page */
+	printf("cpuid = %d; ", PCPU_GET(cpuid));
+	printf("apic id = %02x\n", PCPU_GET(apic_id));
+#endif
+	if (type == T_PAGEFLT) {
+		printf("fault virtual address	= 0x%x\n", eva);
+		printf("fault code		= %s %s, %s\n",
+			code & PGEX_U ? "user" : "supervisor",
+			code & PGEX_W ? "write" : "read",
+			code & PGEX_P ? "protection violation" : "page not present");
+	}
+	printf("instruction pointer	= 0x%x:0x%x\n",
+	       frame->tf_cs & 0xffff, frame->tf_eip);
+        if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) {
+		ss = frame->tf_ss & 0xffff;
+		esp = frame->tf_esp;
+	} else {
+		ss = GSEL(GDATA_SEL, SEL_KPL);
+		esp = (int)&frame->tf_esp;
+	}
+	printf("stack pointer	        = 0x%x:0x%x\n", ss, esp);
+	printf("frame pointer	        = 0x%x:0x%x\n", ss, frame->tf_ebp);
+	printf("code segment		= base 0x%x, limit 0x%x, type 0x%x\n",
+	       softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
+	printf("			= DPL %d, pres %d, def32 %d, gran %d\n",
+	       softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32,
+	       softseg.ssd_gran);
+	printf("processor eflags	= ");
+	if (frame->tf_eflags & PSL_T)
+		printf("trace trap, ");
+	if (frame->tf_eflags & PSL_I)
+		printf("interrupt enabled, ");
+	if (frame->tf_eflags & PSL_NT)
+		printf("nested task, ");
+	if (frame->tf_eflags & PSL_RF)
+		printf("resume, ");
+	if (frame->tf_eflags & PSL_VM)
+		printf("vm86, ");
+	printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
+	printf("current process		= ");
+	if (curproc) {
+		printf("%lu (%s)\n",
+		    (u_long)curproc->p_pid, curproc->p_comm ?
+		    curproc->p_comm : "");
+	} else {
+		printf("Idle\n");
+	}
+	/* XXX */
+
+#ifdef KDB
+	if (kdb_trap(type, 0, frame))
+		return;
+#endif
+	printf("trap number		= %d\n", type);
+	if (type <= MAX_TRAP_MSG)
+		panic("%s", trap_msg[type]);
+	else
+		panic("unknown/reserved trap");
+}
+
+/*
+ * Double fault handler. Called when a fault occurs while writing
+ * a frame for a trap/exception onto the stack. This usually occurs
+ * when the stack overflows (such is the case with infinite recursion,
+ * for example).
+ *
+ * XXX Note that the current PTD gets replaced by IdlePTD when the
+ * task switch occurs. This means that the stack that was active at
+ * the time of the double fault is not available at <kstack> unless
+ * the machine was idle when the double fault occurred. The downside
+ * of this is that "trace <ebp>" in ddb won't work.
+ */
+void
+dblfault_handler()
+{
+	printf("\nFatal double fault:\n");
+	printf("eip = 0x%x\n", PCPU_GET(common_tss.tss_eip));
+	printf("esp = 0x%x\n", PCPU_GET(common_tss.tss_esp));
+	printf("ebp = 0x%x\n", PCPU_GET(common_tss.tss_ebp));
+#ifdef SMP
+	/* two separate prints in case of a trap on an unmapped page */
+	printf("cpuid = %d; ", PCPU_GET(cpuid));
+	printf("apic id = %02x\n", PCPU_GET(apic_id));
+#endif
+	panic("double fault");
+}
+
+/*
+ *	syscall -	system call request C handler
+ *
+ *	A system call is essentially treated as a trap.
+ */
+void
+syscall(frame)
+	struct trapframe frame;
+{
+	caddr_t params;
+	struct sysent *callp;
+	struct thread *td = curthread;
+	struct proc *p = td->td_proc;
+	register_t orig_tf_eflags;
+	u_int sticks;
+	int error;
+	int narg;
+	int args[8];
+	u_int code;
+
+	/*
+	 * note: PCPU_LAZY_INC() can only be used if we can afford
+	 * occassional inaccuracy in the count.
+	 */
+	PCPU_LAZY_INC(cnt.v_syscall);
+
+#ifdef DIAGNOSTIC
+	if (ISPL(frame.tf_cs) != SEL_UPL) {
+		mtx_lock(&Giant);	/* try to stabilize the system XXX */
+		panic("syscall");
+		/* NOT REACHED */
+		mtx_unlock(&Giant);
+	}
+#endif
+
+	sticks = td->td_sticks;
+	td->td_frame = &frame;
+	if (td->td_ucred != p->p_ucred) 
+		cred_update_thread(td);
+	if (p->p_flag & P_SA)
+		thread_user_enter(td);
+	params = (caddr_t)frame.tf_esp + sizeof(int);
+	code = frame.tf_eax;
+	orig_tf_eflags = frame.tf_eflags;
+
+	if (p->p_sysent->sv_prepsyscall) {
+		/*
+		 * The prep code is MP aware.
+		 */
+		(*p->p_sysent->sv_prepsyscall)(&frame, args, &code, &params);
+	} else {
+		/*
+		 * Need to check if this is a 32 bit or 64 bit syscall.
+		 * fuword is MP aware.
+		 */
+		if (code == SYS_syscall) {
+			/*
+			 * Code is first argument, followed by actual args.
+			 */
+			code = fuword(params);
+			params += sizeof(int);
+		} else if (code == SYS___syscall) {
+			/*
+			 * Like syscall, but code is a quad, so as to maintain
+			 * quad alignment for the rest of the arguments.
+			 */
+			code = fuword(params);
+			params += sizeof(quad_t);
+		}
+	}
+
+ 	if (p->p_sysent->sv_mask)
+ 		code &= p->p_sysent->sv_mask;
+
+ 	if (code >= p->p_sysent->sv_size)
+ 		callp = &p->p_sysent->sv_table[0];
+  	else
+ 		callp = &p->p_sysent->sv_table[code];
+
+	narg = callp->sy_narg & SYF_ARGMASK;
+
+	/*
+	 * copyin and the ktrsyscall()/ktrsysret() code is MP-aware
+	 */
+	if (params != NULL && narg != 0)
+		error = copyin(params, (caddr_t)args,
+		    (u_int)(narg * sizeof(int)));
+	else
+		error = 0;
+		
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_SYSCALL))
+		ktrsyscall(code, narg, args);
+#endif
+	CTR4(KTR_SYSC, "syscall enter thread %p pid %d proc %s code %d", td,
+	     td->td_proc->p_pid, td->td_proc->p_comm, code);	
+
+	/*
+	 * Try to run the syscall without Giant if the syscall
+	 * is MP safe.
+	 */
+	if ((callp->sy_narg & SYF_MPSAFE) == 0)
+		mtx_lock(&Giant);
+
+	if (error == 0) {
+		td->td_retval[0] = 0;
+		td->td_retval[1] = frame.tf_edx;
+
+		STOPEVENT(p, S_SCE, narg);
+
+		PTRACESTOP_SC(p, td, S_PT_SCE);
+
+		error = (*callp->sy_call)(td, args);
+	}
+
+	switch (error) {
+	case 0:
+		frame.tf_eax = td->td_retval[0];
+		frame.tf_edx = td->td_retval[1];
+		frame.tf_eflags &= ~PSL_C;
+		break;
+
+	case ERESTART:
+		/*
+		 * Reconstruct pc, assuming lcall $X,y is 7 bytes,
+		 * int 0x80 is 2 bytes. We saved this in tf_err.
+		 */
+		frame.tf_eip -= frame.tf_err;
+		break;
+
+	case EJUSTRETURN:
+		break;
+
+	default:
+ 		if (p->p_sysent->sv_errsize) {
+ 			if (error >= p->p_sysent->sv_errsize)
+  				error = -1;	/* XXX */
+   			else
+  				error = p->p_sysent->sv_errtbl[error];
+		}
+		frame.tf_eax = error;
+		frame.tf_eflags |= PSL_C;
+		break;
+	}
+
+	/*
+	 * Release Giant if we previously set it.
+	 */
+	if ((callp->sy_narg & SYF_MPSAFE) == 0)
+		mtx_unlock(&Giant);
+
+	/*
+	 * Traced syscall.
+	 */
+	if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) {
+		frame.tf_eflags &= ~PSL_T;
+		trapsignal(td, SIGTRAP, 0);
+	}
+
+	/*
+	 * Handle reschedule and other end-of-syscall issues
+	 */
+	userret(td, &frame, sticks);
+
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_SYSRET))
+		ktrsysret(code, error, td->td_retval[0]);
+#endif
+
+	/*
+	 * This works because errno is findable through the
+	 * register set.  If we ever support an emulation where this
+	 * is not the case, this code will need to be revisited.
+	 */
+	STOPEVENT(p, S_SCX, code);
+
+	PTRACESTOP_SC(p, td, S_PT_SCX);
+
+	WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
+	    (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???");
+	mtx_assert(&sched_lock, MA_NOTOWNED);
+	mtx_assert(&Giant, MA_NOTOWNED);
+}
+
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/vm_machdep.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/vm_machdep.c
new file mode 100644
index 0000000000..cff67833f7
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/vm_machdep.c
@@ -0,0 +1,618 @@
+/*-
+ * Copyright (c) 1982, 1986 The Regents of the University of California.
+ * Copyright (c) 1989, 1990 William Jolitz
+ * Copyright (c) 1994 John Dyson
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department, and William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
+ *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.219 2003/11/17 18:22:24 alc Exp $");
+
+#include "opt_npx.h"
+#ifdef PC98
+#include "opt_pc98.h"
+#endif
+#include "opt_reset.h"
+#include "opt_cpu.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/kse.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sf_buf.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/unistd.h>
+#include <sys/user.h>
+#include <sys/vnode.h>
+#include <sys/vmmeter.h>
+
+#include <machine/cpu.h>
+#include <machine/cputypes.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/pcb_ext.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_param.h>
+
+#ifdef PC98
+#include <pc98/pc98/pc98.h>
+#else
+#include <i386/isa/isa.h>
+#endif
+
+#ifndef NSFBUFS
+#define	NSFBUFS		(512 + maxusers * 16)
+#endif
+
+#include <machine/xenfunc.h>
+
+#ifdef SMP
+static void	cpu_reset_proxy(void);
+static u_int	cpu_reset_proxyid;
+static volatile u_int	cpu_reset_proxy_active;
+#endif
+static void	sf_buf_init(void *arg);
+SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
+
+LIST_HEAD(sf_head, sf_buf);
+
+/*
+ * A hash table of active sendfile(2) buffers
+ */
+static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
+
+
+static struct sf_head *sf_buf_active;
+static u_long sf_buf_hashmask;
+
+
+#define	SF_BUF_HASH(m)	(((m) - vm_page_array) & sf_buf_hashmask)
+
+static u_int	sf_buf_alloc_want;
+
+/*
+ * A lock used to synchronize access to the hash table and free list
+ */
+static struct mtx sf_buf_lock;
+
+extern int	_ucodesel, _udatasel;
+
+/*
+ * Finish a fork operation, with process p2 nearly set up.
+ * Copy and update the pcb, set up the stack so that the child
+ * ready to run and return to user mode.
+ */
+void
+cpu_fork(struct thread *td1,
+	 struct proc *p2,
+	 struct thread *td2,
+	 int flags)
+{
+	register struct proc *p1;
+	struct pcb *pcb2;
+	struct mdproc *mdp2;
+#ifdef DEV_NPX
+	register_t savecrit;
+#endif
+
+	p1 = td1->td_proc;
+	if ((flags & RFPROC) == 0) {
+		if ((flags & RFMEM) == 0) {
+			/* unshare user LDT */
+			struct mdproc *mdp1 = &p1->p_md;
+			struct proc_ldt *pldt = mdp1->md_ldt;
+			if (pldt && pldt->ldt_refcnt > 1) {
+				pldt = user_ldt_alloc(mdp1, pldt->ldt_len);
+				if (pldt == NULL)
+					panic("could not copy LDT");
+				mdp1->md_ldt = pldt;
+				set_user_ldt(mdp1);
+				user_ldt_free(td1);
+			}
+		}
+		return;
+	}
+
+	/* Ensure that p1's pcb is up to date. */
+#ifdef DEV_NPX
+	if (td1 == curthread)
+		td1->td_pcb->pcb_gs = rgs();
+	savecrit = intr_disable();
+	if (PCPU_GET(fpcurthread) == td1)
+		npxsave(&td1->td_pcb->pcb_save);
+	intr_restore(savecrit);
+#endif
+
+	/* Point the pcb to the top of the stack */
+	pcb2 = (struct pcb *)(td2->td_kstack + td2->td_kstack_pages * PAGE_SIZE) - 1;
+	td2->td_pcb = pcb2;
+
+	/* Copy p1's pcb */
+	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
+
+	/* Point mdproc and then copy over td1's contents */
+	mdp2 = &p2->p_md;
+	bcopy(&p1->p_md, mdp2, sizeof(*mdp2));
+
+	/*
+	 * Create a new fresh stack for the new process.
+	 * Copy the trap frame for the return to user mode as if from a
+	 * syscall.  This copies most of the user mode register values.
+	 */
+	td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb) - 1;
+	bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
+
+	td2->td_frame->tf_eax = 0;		/* Child returns zero */
+	td2->td_frame->tf_eflags &= ~PSL_C;	/* success */
+	td2->td_frame->tf_edx = 1;
+	/*
+	 * Set registers for trampoline to user mode.  Leave space for the
+	 * return address on stack.  These are the kernel mode register values.
+	 */
+	pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir);
+	pcb2->pcb_edi = 0;
+	pcb2->pcb_esi = (int)fork_return;	/* fork_trampoline argument */
+	pcb2->pcb_ebp = 0;
+	pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *);
+	pcb2->pcb_ebx = (int)td2;		/* fork_trampoline argument */
+	pcb2->pcb_eip = (int)fork_trampoline;
+	pcb2->pcb_psl = PSL_KERNEL;		/* ints disabled */
+	pcb2->pcb_gs = rgs();
+	/*-
+	 * pcb2->pcb_dr*:	cloned above.
+	 * pcb2->pcb_savefpu:	cloned above.
+	 * pcb2->pcb_flags:	cloned above.
+	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
+	 * pcb2->pcb_gs:	cloned above.
+	 * pcb2->pcb_ext:	cleared below.
+	 */
+
+	/*
+	 * XXX don't copy the i/o pages.  this should probably be fixed.
+	 */
+	pcb2->pcb_ext = 0;
+
+        /* Copy the LDT, if necessary. */
+	mtx_lock_spin(&sched_lock);
+
+        if (mdp2->md_ldt != 0) {
+		if (flags & RFMEM) {
+			mdp2->md_ldt->ldt_refcnt++;
+		} else {
+			mdp2->md_ldt = user_ldt_alloc(mdp2,
+			    mdp2->md_ldt->ldt_len);
+			if (mdp2->md_ldt == NULL)
+				panic("could not copy LDT");
+		}
+        }
+	mtx_unlock_spin(&sched_lock);
+
+	/*
+	 * Now, cpu_switch() can schedule the new process.
+	 * pcb_esp is loaded pointing to the cpu_switch() stack frame
+	 * containing the return address when exiting cpu_switch.
+	 * This will normally be to fork_trampoline(), which will have
+	 * %ebx loaded with the new proc's pointer.  fork_trampoline()
+	 * will set up a stack to call fork_return(p, frame); to complete
+	 * the return to user-mode.
+	 */
+}
+
+/*
+ * Intercept the return address from a freshly forked process that has NOT
+ * been scheduled yet.
+ *
+ * This is needed to make kernel threads stay in kernel mode.
+ */
+void
+cpu_set_fork_handler(td, func, arg)
+	struct thread *td;
+	void (*func)(void *);
+	void *arg;
+{
+	/*
+	 * Note that the trap frame follows the args, so the function
+	 * is really called like this:  func(arg, frame);
+	 */
+	td->td_pcb->pcb_esi = (int) func;	/* function */
+	td->td_pcb->pcb_ebx = (int) arg;	/* first arg */
+}
+
+void
+cpu_exit(struct thread *td)
+{
+	struct mdproc *mdp;
+	struct pcb *pcb = td->td_pcb; 
+
+
+	/* Reset pc->pcb_gs and %gs before possibly invalidating it. */
+	mdp = &td->td_proc->p_md;
+	if (mdp->md_ldt) {
+		td->td_pcb->pcb_gs = _udatasel;
+		load_gs(_udatasel);
+		user_ldt_free(td);
+	}
+	if (pcb->pcb_flags & PCB_DBREGS) {
+		/* disable all hardware breakpoints */
+		reset_dbregs();
+		pcb->pcb_flags &= ~PCB_DBREGS;
+	}
+}
+
+void
+cpu_thread_exit(struct thread *td)
+{
+	struct pcb *pcb = td->td_pcb; 
+#ifdef DEV_NPX
+	if (td == PCPU_GET(fpcurthread))
+		npxdrop();
+#endif
+        if (pcb->pcb_flags & PCB_DBREGS) {
+		/* disable all hardware breakpoints */
+                reset_dbregs();
+                pcb->pcb_flags &= ~PCB_DBREGS;
+        }
+}
+
+void
+cpu_thread_clean(struct thread *td)
+{
+	struct pcb *pcb;
+
+	pcb = td->td_pcb; 
+	if (pcb->pcb_ext != 0) {
+		/* XXXKSE  XXXSMP  not SMP SAFE.. what locks do we have? */
+		/* if (pcb->pcb_ext->ext_refcount-- == 1) ?? */
+		/*
+		 * XXX do we need to move the TSS off the allocated pages
+		 * before freeing them?  (not done here)
+		 */
+		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext,
+		    ctob(IOPAGES + 1));
+		pcb->pcb_ext = 0;
+	}
+}
+
+void
+cpu_thread_swapin(struct thread *td)
+{
+}
+
+void
+cpu_thread_swapout(struct thread *td)
+{
+}
+
+void
+cpu_thread_setup(struct thread *td)
+{
+
+	td->td_pcb =
+	     (struct pcb *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE) - 1;
+	td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1;
+	td->td_pcb->pcb_ext = NULL; 
+}
+
+/*
+ * Initialize machine state (pcb and trap frame) for a new thread about to
+ * upcall. Pu t enough state in the new thread's PCB to get it to go back 
+ * userret(), where we can intercept it again to set the return (upcall)
+ * Address and stack, along with those from upcals that are from other sources
+ * such as those generated in thread_userret() itself.
+ */
+void
+cpu_set_upcall(struct thread *td, struct thread *td0)
+{
+	struct pcb *pcb2;
+
+	/* Point the pcb to the top of the stack. */
+	pcb2 = td->td_pcb;
+
+	/*
+	 * Copy the upcall pcb.  This loads kernel regs.
+	 * Those not loaded individually below get their default
+	 * values here.
+	 *
+	 * XXXKSE It might be a good idea to simply skip this as
+	 * the values of the other registers may be unimportant.
+	 * This would remove any requirement for knowing the KSE
+	 * at this time (see the matching comment below for
+	 * more analysis) (need a good safe default).
+	 */
+	bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
+	pcb2->pcb_flags &= ~(PCB_NPXTRAP|PCB_NPXINITDONE);
+
+	/*
+	 * Create a new fresh stack for the new thread.
+	 * Don't forget to set this stack value into whatever supplies
+	 * the address for the fault handlers.
+	 * The contexts are filled in at the time we actually DO the
+	 * upcall as only then do we know which KSE we got.
+	 */
+	bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
+
+	/*
+	 * Set registers for trampoline to user mode.  Leave space for the
+	 * return address on stack.  These are the kernel mode register values.
+	 */
+#ifdef PAE
+	pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdpt);
+#else
+	pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdir);
+#endif
+	pcb2->pcb_edi = 0;
+	pcb2->pcb_esi = (int)fork_return;		    /* trampoline arg */
+	pcb2->pcb_ebp = 0;
+	pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */
+	pcb2->pcb_ebx = (int)td;			    /* trampoline arg */
+	pcb2->pcb_eip = (int)fork_trampoline;
+	pcb2->pcb_psl &= ~(PSL_I);	/* interrupts must be disabled */
+	pcb2->pcb_gs = rgs();
+	/*
+	 * If we didn't copy the pcb, we'd need to do the following registers:
+	 * pcb2->pcb_dr*:	cloned above.
+	 * pcb2->pcb_savefpu:	cloned above.
+	 * pcb2->pcb_flags:	cloned above.
+	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
+	 * pcb2->pcb_gs:	cloned above.  XXXKSE ???
+	 * pcb2->pcb_ext:	cleared below.
+	 */
+	 pcb2->pcb_ext = NULL;
+}
+
+/*
+ * Set that machine state for performing an upcall that has to
+ * be done in thread_userret() so that those upcalls generated
+ * in thread_userret() itself can be done as well.
+ */
+void
+cpu_set_upcall_kse(struct thread *td, struct kse_upcall *ku)
+{
+
+	/* 
+	 * Do any extra cleaning that needs to be done.
+	 * The thread may have optional components
+	 * that are not present in a fresh thread.
+	 * This may be a recycled thread so make it look
+	 * as though it's newly allocated.
+	 */
+	cpu_thread_clean(td);
+
+	/*
+	 * Set the trap frame to point at the beginning of the uts
+	 * function.
+	 */
+	td->td_frame->tf_ebp = 0; 
+	td->td_frame->tf_esp =
+	    (int)ku->ku_stack.ss_sp + ku->ku_stack.ss_size - 16;
+	td->td_frame->tf_eip = (int)ku->ku_func;
+
+	/*
+	 * Pass the address of the mailbox for this kse to the uts
+	 * function as a parameter on the stack.
+	 */
+	suword((void *)(td->td_frame->tf_esp + sizeof(void *)),
+	    (int)ku->ku_mailbox);
+}
+
+/*
+ * Convert kernel VA to physical address
+ */
+vm_paddr_t
+kvtop(void *addr)
+{
+	vm_paddr_t pa;
+
+	pa = pmap_kextract((vm_offset_t)addr);
+	if (pa == 0)
+		panic("kvtop: zero page frame");
+	return (pa);
+}
+
+/*
+ * Force reset the processor by invalidating the entire address space!
+ */
+
+#ifdef SMP
+static void
+cpu_reset_proxy()
+{
+
+	cpu_reset_proxy_active = 1;
+	while (cpu_reset_proxy_active == 1)
+		;	 /* Wait for other cpu to see that we've started */
+	stop_cpus((1<<cpu_reset_proxyid));
+	printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
+	DELAY(1000000);
+	cpu_reset_real();
+}
+#endif
+
+void
+cpu_reset()
+{
+	HYPERVISOR_shutdown();
+}
+
+
+/*
+ * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
+ */
+static void
+sf_buf_init(void *arg)
+{
+	struct sf_buf *sf_bufs;
+	vm_offset_t sf_base;
+	int i;
+
+	nsfbufs = NSFBUFS;
+	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
+
+	sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
+	TAILQ_INIT(&sf_buf_freelist);
+	sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE);
+	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
+	    M_NOWAIT | M_ZERO);
+	for (i = 0; i < nsfbufs; i++) {
+		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
+		TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
+	}
+	sf_buf_alloc_want = 0;
+	mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
+}
+
+/*
+ * Get an sf_buf from the freelist. Will block if none are available.
+ */
+struct sf_buf *
+sf_buf_alloc(struct vm_page *m, int pri)
+{
+	struct sf_head *hash_list;
+	struct sf_buf *sf;
+	int error;
+
+	hash_list = &sf_buf_active[SF_BUF_HASH(m)];
+	mtx_lock(&sf_buf_lock);
+	LIST_FOREACH(sf, hash_list, list_entry) {
+		if (sf->m == m) {
+			sf->ref_count++;
+			if (sf->ref_count == 1) {
+				TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
+				nsfbufsused++;
+				nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
+			}
+			goto done;
+		}
+	}
+	while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
+		sf_buf_alloc_want++;
+		mbstat.sf_allocwait++;
+		error = msleep(&sf_buf_freelist, &sf_buf_lock, PVM | pri,
+		    "sfbufa", 0);
+		sf_buf_alloc_want--;
+
+		/*
+		 * If we got a signal, don't risk going back to sleep. 
+		 */
+		if (error)
+			goto done;
+	}
+	TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
+	if (sf->m != NULL)
+		LIST_REMOVE(sf, list_entry);
+	LIST_INSERT_HEAD(hash_list, sf, list_entry);
+	sf->ref_count = 1;
+	sf->m = m;
+	nsfbufsused++;
+	nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
+	pmap_qenter(sf->kva, &sf->m, 1);
+done:
+	mtx_unlock(&sf_buf_lock);
+	return (sf);
+}
+
+/*
+ * Detatch mapped page and release resources back to the system.
+ */
+void
+sf_buf_free(struct sf_buf *sf)
+{
+	mtx_lock(&sf_buf_lock);
+	sf->ref_count--;
+	if (sf->ref_count == 0) {
+		TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
+		nsfbufsused--;
+		/* XEN only */
+		pmap_qremove(sf->kva, 1);
+		sf->m = NULL;
+		LIST_REMOVE(sf, list_entry);
+		/* -----  */
+		if (sf_buf_alloc_want > 0)
+			wakeup_one(&sf_buf_freelist);
+	}
+	mtx_unlock(&sf_buf_lock);
+}
+
+/*
+ * Software interrupt handler for queued VM system processing.
+ */   
+void  
+swi_vm(void *dummy) 
+{     
+	if (busdma_swi_pending != 0)
+		busdma_swi();
+}
+
+/*
+ * Tell whether this address is in some physical memory region.
+ * Currently used by the kernel coredump code in order to avoid
+ * dumping the ``ISA memory hole'' which could cause indefinite hangs,
+ * or other unpredictable behaviour.
+ */
+
+int
+is_physical_memory(vm_paddr_t addr)
+{
+
+#ifdef DEV_ISA
+	/* The ISA ``memory hole''. */
+	if (addr >= 0xa0000 && addr < 0x100000)
+		return 0;
+#endif
+
+	/*
+	 * stuff other tests for known memory-mapped devices (PCI?)
+	 * here
+	 */
+
+	return 1;
+}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_bus.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_bus.c
new file mode 100644
index 0000000000..96f6ca086b
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_bus.c
@@ -0,0 +1,238 @@
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/kernel.h>
+#include <machine/bus.h>
+#include <sys/rman.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include <machine/frame.h> 
+#include <machine/intr_machdep.h> 
+#include <machine/resource.h>
+
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/xen_intr.h>
+
+static MALLOC_DEFINE(M_XENDEV, "xenintrdrv", "xen system device");
+
+struct xenbus_device {
+    struct resource_list  xen_resources;
+};
+
+#define DEVTOXEN(dev)       ((struct xenbus_device *)device_get_ivars(dev))
+
+static void xenbus_identify(driver_t *, device_t); 
+static int xenbus_probe(device_t);
+static int xenbus_attach(device_t);
+static int xenbus_print_child(device_t, device_t);
+static device_t xenbus_add_child(device_t bus, int order, const char *name, 
+				 int unit);
+static struct resource *xenbus_alloc_resource(device_t, device_t, int, int *,
+					      u_long, u_long, u_long, u_int);
+static  int xenbus_release_resource(device_t, device_t, int, int, 
+				    struct resource *); 
+static  int xenbus_set_resource(device_t, device_t, int, int, u_long, u_long); 
+static  int xenbus_get_resource(device_t, device_t, int, int, u_long *, u_long *); 
+static void xenbus_delete_resource(device_t, device_t, int, int); 
+
+
+static device_method_t xenbus_methods[] = { 
+    /* Device interface */ 
+    DEVMETHOD(device_identify,      xenbus_identify), 
+    DEVMETHOD(device_probe,         xenbus_probe), 
+    DEVMETHOD(device_attach,        xenbus_attach), 
+    DEVMETHOD(device_detach,        bus_generic_detach), 
+    DEVMETHOD(device_shutdown,      bus_generic_shutdown), 
+    DEVMETHOD(device_suspend,       bus_generic_suspend), 
+    DEVMETHOD(device_resume,        bus_generic_resume), 
+ 
+    /* Bus interface */ 
+    DEVMETHOD(bus_print_child,      xenbus_print_child),
+    DEVMETHOD(bus_add_child,        xenbus_add_child), 
+    DEVMETHOD(bus_read_ivar,        bus_generic_read_ivar), 
+    DEVMETHOD(bus_write_ivar,       bus_generic_write_ivar), 
+    DEVMETHOD(bus_set_resource,     xenbus_set_resource), 
+    DEVMETHOD(bus_get_resource,     xenbus_get_resource), 
+    DEVMETHOD(bus_alloc_resource,   xenbus_alloc_resource), 
+    DEVMETHOD(bus_release_resource, xenbus_release_resource), 
+    DEVMETHOD(bus_delete_resource,  xenbus_delete_resource), 
+    DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), 
+    DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), 
+    DEVMETHOD(bus_setup_intr,       bus_generic_setup_intr), 
+    DEVMETHOD(bus_teardown_intr,    bus_generic_teardown_intr), 
+ 
+    { 0, 0 } 
+}; 
+
+
+static driver_t xenbus_driver = { 
+    "xenbus", 
+    xenbus_methods, 
+    1,                      /* no softc */ 
+}; 
+static devclass_t xenbus_devclass; 
+static device_t xenbus_dev;
+static boolean_t xenbus_probe_delay = TRUE;	/* delay child probes */
+ 
+DRIVER_MODULE(xenbus, nexus, xenbus_driver, xenbus_devclass, 0, 0); 
+ 
+static void 
+xenbus_identify(driver_t *driver, device_t parent) 
+{ 
+ 
+    /* 
+     * Add child device with order of 0 so it gets probed 
+     * first
+     */ 
+    xenbus_dev = BUS_ADD_CHILD(parent, 0, "xenbus", 0);
+    if (xenbus_dev == NULL)
+	panic("xenbus: could not attach"); 
+} 
+
+static int 
+xenbus_probe(device_t dev) 
+{ 
+    device_set_desc(dev, "xen system"); 
+    device_quiet(dev); 
+    return (0); 
+} 
+
+static int 
+xenbus_attach(device_t dev) 
+{ 
+    /* 
+     * First, let our child driver's identify any child devices that 
+     * they can find.  Once that is done attach any devices that we 
+     * found. 
+     */ 
+    if (!xenbus_probe_delay) {
+    	bus_generic_probe(dev); 
+    	bus_generic_attach(dev); 
+    }
+ 
+    return 0; 
+} 
+
+
+static int 
+xenbus_print_all_resources(device_t dev) 
+{ 
+    struct	xenbus_device *xdev = device_get_ivars(dev); 
+    struct resource_list *rl = &xdev->xen_resources;
+    int retval = 0;
+
+    if (SLIST_FIRST(rl))
+	    retval += printf(" at");
+    
+    retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
+    retval += resource_list_print_type(rl, "iomem", SYS_RES_MEMORY, "%#lx");
+    retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
+
+    return retval; 
+}
+ 
+
+static int 
+xenbus_print_child(device_t bus, device_t child) 
+{ 
+    int retval = 0; 
+ 
+    retval += bus_print_child_header(bus, child); 
+    retval += xenbus_print_all_resources(child); 
+    retval += printf(" on motherboard\n");	/* XXX "motherboard", ick */
+ 
+    return (retval); 
+} 
+
+static device_t 
+xenbus_add_child(device_t bus, int order, const char *name, int unit) 
+{ 
+    device_t child; 
+    struct xenbus_device *xendev; 
+ 
+    xendev = malloc(sizeof(struct xenbus_device), M_XENDEV, 
+		   M_NOWAIT | M_ZERO); 
+    if (!xendev)
+	return(0); 
+    resource_list_init(&xendev->xen_resources); 
+
+    child = device_add_child_ordered(bus, order, name, unit);  
+ 
+    /* should we free this in xenbus_child_detached? */ 
+    device_set_ivars(child, xendev); 
+ 
+    return(child); 
+} 
+
+static struct resource * 
+xenbus_alloc_resource(device_t bus, device_t child, int type, int *rid, 
+		      u_long start, u_long end, u_long count, u_int flags) 
+{ 
+    struct xenbus_device *xendev = DEVTOXEN(child); 
+    struct resource_list *rl = &xendev->xen_resources; 
+ 
+    return (resource_list_alloc(rl, bus, child, type, rid, start, end, 
+				count, flags)); 
+} 
+
+
+static int 
+xenbus_release_resource(device_t bus, device_t child, int type, int rid, 
+			struct resource *r) 
+{ 
+    struct xenbus_device *xendev = DEVTOXEN(child); 
+    struct resource_list *rl = &xendev->xen_resources; 
+ 
+    return (resource_list_release(rl, bus, child, type, rid, r)); 
+} 
+
+static int 
+xenbus_set_resource(device_t dev, device_t child, int type, int rid, 
+		    u_long start, u_long count) 
+{ 
+    struct xenbus_device *xendev = DEVTOXEN(child); 
+    struct resource_list *rl = &xendev->xen_resources; 
+ 
+    resource_list_add(rl, type, rid, start, start + count - 1, count); 
+    return(0); 
+} 
+
+static int 
+xenbus_get_resource(device_t dev, device_t child, int type, int rid, 
+		    u_long *startp, u_long *countp) 
+{ 
+    struct xenbus_device *xendev = DEVTOXEN(child); 
+    struct resource_list *rl = &xendev->xen_resources; 
+    struct resource_list_entry *rle; 
+ 
+    rle = resource_list_find(rl, type, rid); 
+    if (!rle) 
+	return(ENOENT); 
+    if (startp) 
+	*startp = rle->start; 
+    if (countp) 
+	*countp = rle->count; 
+    return(0); 
+} 
+
+static void 
+xenbus_delete_resource(device_t dev, device_t child, int type, int rid) 
+{ 
+    struct xenbus_device *xendev = DEVTOXEN(child); 
+    struct resource_list *rl = &xendev->xen_resources; 
+ 
+    resource_list_delete(rl, type, rid); 
+} 
+
+static void
+xenbus_init(void *unused)
+{
+    	xenbus_probe_delay = FALSE;
+	xenbus_attach(xenbus_dev);
+}
+SYSINIT(xenbusdev, SI_SUB_PSEUDO, SI_ORDER_FIRST, xenbus_init, NULL);
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c
new file mode 100644
index 0000000000..dd24a206b1
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c
@@ -0,0 +1,687 @@
+/*	$NetBSD:$	*/
+
+/*
+ *
+ * Copyright (c) 2004 Christian Limpach.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Christian Limpach.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/reboot.h>
+
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <machine/stdarg.h>
+#include <machine/xenfunc.h>
+#include <machine/xenpmap.h>
+#include <machine/vmparam.h>
+#include <machine/cpu.h>
+#include <machine/xenvar.h>
+
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/if_var.h>
+#include <net/ethernet.h>
+#include <netinet/in.h>
+#include <sys/mbuf.h>
+#include <nfs/rpcv2.h>
+#include <nfsclient/krpc.h>
+#include <nfs/nfsproto.h>
+
+
+shared_info_t *HYPERVISOR_shared_info;
+
+void ni_cli(void);
+void ni_sti(void);
+#ifdef NFS_ROOT
+
+static int
+xdr_opaque_decode(struct mbuf **mptr, u_char *buf, int len)
+{
+    struct mbuf *m;
+    int alignedlen;
+
+    m = *mptr;
+    alignedlen = ( len + 3 ) & ~3;
+
+    if (m->m_len < alignedlen) {
+	m = m_pullup(m, alignedlen);
+	if (m == NULL) {
+	    *mptr = NULL;
+	    return EBADRPC;
+	}
+    }
+    bcopy(mtod(m, u_char *), buf, len);
+    m_adj(m, alignedlen);
+    *mptr = m;
+    return 0;
+}
+
+
+static int
+getdec(char **ptr)
+{
+    char *p;
+    int ret;
+
+    p = *ptr;
+    ret = 0;
+    if ((*p < '0') || (*p > '9'))
+	return -1;
+    while ((*p >= '0') && (*p <= '9')) {
+	ret = ret * 10 + (*p - '0');
+	p++;
+    }
+    *ptr = p;
+    return ret;
+}
+
+int
+setinaddr(struct sockaddr_in *addr,  char *ipstr)
+{
+    unsigned int ip;
+    int val;
+
+    ip = 0;
+    if (((val = getdec(&ipstr)) < 0) || (val > 255))
+	return 1;
+    ip = val << 24;
+    if (*ipstr != '.')
+	return 1;
+    ipstr++;
+    if (((val = getdec(&ipstr)) < 0) || (val > 255))
+	return 1;
+    ip |= (val << 16);
+    if (*ipstr != '.')
+	return 1;
+    ipstr++;
+    if (((val = getdec(&ipstr)) < 0) || (val > 255))
+	return 1;
+    ip |= (val << 8);
+    if (*ipstr != '.')
+	return 1;
+    ipstr++;
+    if (((val = getdec(&ipstr)) < 0) || (val > 255))
+	return 1;
+    ip |= val;
+
+    addr->sin_addr.s_addr = htonl(ip);
+    addr->sin_len = sizeof(struct sockaddr_in);
+    addr->sin_family = AF_INET;
+
+    return 0;
+}
+
+static int
+hwaddr_to_sockaddr(char *ev, struct sockaddr_dl *sa)
+{
+    char *cp;
+    u_int32_t a[6];
+    int count;
+
+    bzero(sa, sizeof(*sa));
+    sa->sdl_len = sizeof(*sa);
+    sa->sdl_family = AF_LINK;
+    sa->sdl_type = IFT_ETHER;
+    sa->sdl_alen = ETHER_ADDR_LEN;
+    if ((cp = getenv(ev)) == NULL)
+	return (1);
+    count = sscanf(cp, "%x:%x:%x:%x:%x:%x",
+		   &a[0], &a[1], &a[2], &a[3], &a[4], &a[5]);
+    freeenv(cp);
+    if (count != 6)
+	return (1);
+    sa->sdl_data[0] = a[0];
+    sa->sdl_data[1] = a[1];
+    sa->sdl_data[2] = a[2];
+    sa->sdl_data[3] = a[3];
+    sa->sdl_data[4] = a[4];
+    sa->sdl_data[5] = a[5];
+    return (0);
+}
+extern int in_control(struct socket *so, u_long cmd,
+	   caddr_t data, struct ifnet *ifp,
+	   struct thread *td);
+
+static int
+xen_setnetwork(void)
+{
+    int error = 0;
+    struct ifaddr *ifa;
+    struct ifnet *ifp;
+    struct sockaddr_dl *sdl, ourdl;
+
+    if (sizeof(struct sockaddr) != sizeof(struct sockaddr_in))
+	panic("sizes not equal\n");
+
+    if (hwaddr_to_sockaddr("boot.netif.hwaddr", &ourdl)) {
+	printf("nfs_diskless: no hardware address\n");
+	return -1;
+    }
+
+
+    ifa = NULL;
+    IFNET_RLOCK();
+    TAILQ_FOREACH(ifp, &ifnet, if_link) {
+	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+	    if ((ifa->ifa_addr->sa_family == AF_LINK) &&
+		(sdl = ((struct sockaddr_dl *)ifa->ifa_addr))) {
+		if ((sdl->sdl_type == ourdl.sdl_type) &&
+		    (sdl->sdl_alen == ourdl.sdl_alen) &&
+		    !bcmp(sdl->sdl_data + sdl->sdl_nlen,
+			  ourdl.sdl_data + ourdl.sdl_nlen,
+			  sdl->sdl_alen)) {
+		    IFNET_RUNLOCK();
+		    goto match_done;
+		}
+	    }
+	}
+    }
+    IFNET_RUNLOCK();
+    printf("nfs_diskless: no interface\n");
+    return -1; /* no matching interface */
+ match_done:
+
+    if (getenv("boot.netif.ip") && getenv("boot.netif.gateway") && 
+	getenv("boot.netif.netmask")) {
+	struct ifaliasreq ifra;
+	char *ip;
+	
+	bzero(&ifra, sizeof(ifra));
+	strcpy(ifra.ifra_name, "xn0");
+	ip = getenv("boot.netif.ip");
+	setinaddr((struct sockaddr_in *)&(ifra.ifra_addr), ip);
+	printf("setting ip to %s\n", ip);
+	ip = getenv("boot.netif.netmask");
+	setinaddr((struct sockaddr_in *)&ifra.ifra_mask, ip);
+	setinaddr((struct sockaddr_in *)&ifra.ifra_broadaddr, "255.255.255.255");
+
+
+	if ((error = in_control(NULL, SIOCAIFADDR,  (caddr_t) &ifra, ifp, curthread))) 
+	    printf("couldn't set interface address %d\n", error);
+#if 0
+	if ((error = xn_ioctl(ifp, SIOCSIFNETMASK, (caddr_t)&ifa)))
+	    printf("couldn't set interface netmask %d\n", error);
+#endif
+    }
+    return error;
+}
+
+int
+xen_setnfshandle(void) 
+{
+    char *path, *ip;
+    u_char fhp[NFSX_V2FH];
+    int error = 0;
+    struct sockaddr_in sin_local, *sin ;
+    struct mbuf *m;
+
+    if ((error = xen_setnetwork())) 
+	return error;
+    
+    sin = &sin_local; 
+    
+    path = getenv("boot.nfsroot.path");
+    ip = getenv("boot.nfsroot.server");
+
+    /* we aren't configured for NFS root */ 
+    if (!path || !ip)
+	return 0;
+
+    error = setinaddr(sin, ip);
+    if (error) {
+	printf("invalid ip address %s\n", ip);
+	return error;
+    }
+ 
+    error = krpc_portmap(sin, RPCPROG_MNT, RPCMNT_VER1,
+			 &sin->sin_port, curthread);
+    if (error) { 
+	printf("failed to find port number for mountd\n");
+	return error;
+    }
+    m = xdr_string_encode(path, strlen(path));
+    
+    /* Do RPC to mountd */
+    error = krpc_call(sin, RPCPROG_MNT, RPCMNT_VER1,
+		      RPCMNT_MOUNT, &m, NULL, curthread);
+    if (error) {
+	printf("call to mountd failed\n");
+	return error;
+    }
+    
+    if (xdr_opaque_decode(&m, fhp, NFSX_V2FH) != 0) {
+	printf("failed to decode nfs file handle\n");
+	return error;
+    }
+
+    setenv("boot.nfsroot.nfshandle", fhp);
+
+    return 0;
+}
+#endif
+void
+ni_cli(void)
+{
+    __asm__("pushl %edx;"
+	    "pushl %eax;"
+	    );
+    __cli();
+    __asm__("popl %eax;"
+	    "popl %edx;"
+	    );
+}
+
+
+void
+ni_sti(void)
+{
+    __asm__("pushl %edx;"
+	    "pushl %esi;"
+	    "pushl %eax;"
+	    );
+    __sti();
+    __asm__("popl %eax;"
+	    "popl %esi;"
+	    "popl %edx;"
+	    );
+}
+
+/*
+ * Modify the cmd_line by converting ',' to NULLs so that it is in a  format 
+ * suitable for the static env vars.
+ */
+char *
+xen_setbootenv(char *cmd_line)
+{
+     char *cmd_line_next;
+    
+    for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;);
+    return cmd_line;
+}
+
+static struct 
+{
+    const char	*ev;
+    int		mask;
+} howto_names[] = {
+    {"boot_askname",	RB_ASKNAME},
+    {"boot_cdrom",	RB_CDROM},
+    {"boot_userconfig",	RB_CONFIG},
+    {"boot_ddb",	RB_KDB},
+    {"boot_gdb",	RB_GDB},
+    {"boot_gdb_pause",	RB_GDB_PAUSE},
+    {"boot_single",	RB_SINGLE},
+    {"boot_verbose",	RB_VERBOSE},
+    {"boot_multicons",	RB_MULTIPLE},
+    {"boot_serial",	RB_SERIAL},
+    {NULL,	0}
+};
+
+int 
+xen_boothowto(char *envp)
+{
+    int i, howto = 0;
+
+    /* get equivalents from the environment */
+    for (i = 0; howto_names[i].ev != NULL; i++)
+	if (getenv(howto_names[i].ev) != NULL)
+	    howto |= howto_names[i].mask;
+    return howto;
+}
+
+#define PRINTK_BUFSIZE 1024
+void
+printk(const char *fmt, ...)
+{
+        __va_list ap;
+        int ret;
+        static char buf[PRINTK_BUFSIZE];
+
+        va_start(ap, fmt);
+        ret = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap);
+        va_end(ap);
+        buf[ret] = 0;
+        (void)HYPERVISOR_console_write(buf, ret);
+}
+
+#define XPQUEUE_SIZE 2048
+
+typedef struct xpq_queue {
+    uint32_t ptr; 
+    uint32_t val;
+} xpq_queue_t;
+
+#define MCLQUEUE_SIZE 512
+static multicall_entry_t mcl_queue[MCLQUEUE_SIZE];
+static int mcl_idx = 0;
+
+static xpq_queue_t xpq_queue[XPQUEUE_SIZE];
+static boolean_t xpq_initialized;
+static struct mtx update_lock;
+static int xpq_idx = 0;
+
+/*
+ * Don't attempt to lock until after lock & memory initialization
+ */
+#define XPQ_LOCK(lock, flags)		\
+	if (likely(xpq_initialized))	\
+    		mtx_lock_irqsave(lock, flags)
+#define XPQ_UNLOCK(lock, flags)		\
+	if (likely(xpq_initialized))	\
+    		mtx_unlock_irqrestore(lock, flags)
+
+void 
+xpq_init(void)
+{
+    xpq_initialized = TRUE;
+    mtx_init(&update_lock, "mmu", "MMU LOCK", MTX_SPIN);
+}
+
+static __inline void
+_xpq_flush_queue(void)
+{
+    	int _xpq_idx = xpq_idx;
+	int error, i;
+
+	xpq_idx = 0;
+	/* Make sure index is cleared first to avoid double updates. */
+	error = HYPERVISOR_mmu_update((mmu_update_t *)xpq_queue, _xpq_idx, 
+				       NULL);
+	
+    	if (__predict_false(error < 0)) {
+	    for (i = 0; i < _xpq_idx; i++)
+		printk("val: %x ptr: %p\n", xpq_queue[i].val, xpq_queue[i].ptr);
+	    panic("Failed to execute MMU updates: %d", error);
+	}
+
+}
+static void
+xpq_flush_queue(void)
+{
+	unsigned long flags = 0;
+
+	XPQ_LOCK(&update_lock, flags);
+	if (xpq_idx != 0) _xpq_flush_queue();
+	XPQ_UNLOCK(&update_lock, flags);
+}
+
+static __inline void
+_mcl_flush_queue(void)
+{
+    	int _mcl_idx = mcl_idx;
+	mcl_idx = 0;
+	(void)HYPERVISOR_multicall(mcl_queue, _mcl_idx);
+}
+
+void
+mcl_flush_queue(void)
+{
+	unsigned long flags = 0;
+
+	XPQ_LOCK(&update_lock, flags);
+	if (__predict_true(mcl_idx != 0)) _mcl_flush_queue();
+	XPQ_UNLOCK(&update_lock, flags);
+	/* XXX: until we can remove the  pervasive 
+	 * __HYPERVISOR_update_va_mapping calls, we have 2 queues.  In order
+	 * to ensure that they never get out of sync, only 1 flush interface
+	 * is provided.
+	 */
+	xpq_flush_queue();
+}
+
+
+static __inline void
+xpq_increment_idx(void)
+{
+    xpq_idx++;
+    if (__predict_false(xpq_idx == XPQUEUE_SIZE))
+	xpq_flush_queue();
+}
+
+static __inline void
+mcl_increment_idx(void)
+{
+    mcl_idx++;
+    if (__predict_false(mcl_idx == MCLQUEUE_SIZE))
+	mcl_flush_queue();
+}
+
+void
+xpq_queue_invlpg(vm_offset_t va)
+{
+	unsigned long flags = 0;
+
+	XPQ_LOCK(&update_lock, flags);
+	xpq_queue[xpq_idx].ptr = (va & ~PAGE_MASK) | MMU_EXTENDED_COMMAND;
+	xpq_queue[xpq_idx].val = MMUEXT_INVLPG;
+	xpq_increment_idx();
+	XPQ_UNLOCK(&update_lock, flags);
+}
+
+void
+load_cr3(uint32_t val)
+{
+	xpq_queue_pt_switch(val);
+	xpq_flush_queue();
+}
+
+void
+xen_set_ldt(vm_offset_t base, uint32_t entries)
+{
+	xpq_queue_set_ldt(base, entries);
+	_xpq_flush_queue();
+}
+
+void
+xen_machphys_update(unsigned long mfn, unsigned long pfn)
+{
+    	unsigned long flags = 0;
+	XPQ_LOCK(&update_lock, flags);
+	xpq_queue[xpq_idx].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+	xpq_queue[xpq_idx].val = pfn;
+	xpq_increment_idx();
+	_xpq_flush_queue();
+	XPQ_UNLOCK(&update_lock, flags);
+}
+
+void
+xpq_queue_pt_update(pt_entry_t *ptr, pt_entry_t val)
+{
+	unsigned long flags = 0;
+
+	XPQ_LOCK(&update_lock, flags);
+    	xpq_queue[xpq_idx].ptr = (uint32_t)ptr;
+    	xpq_queue[xpq_idx].val = val;
+    	xpq_increment_idx();
+	XPQ_UNLOCK(&update_lock, flags);
+}
+
+void 
+mcl_queue_pt_update(vm_offset_t va, vm_paddr_t ma)
+{
+#if 0
+    printf("setting va %x to ma %x\n", va, ma); 
+#endif
+        unsigned long flags = 0;
+        XPQ_LOCK(&update_lock, flags);
+	mcl_queue[mcl_idx].op = __HYPERVISOR_update_va_mapping;
+	mcl_queue[mcl_idx].args[0] = (unsigned long)(va >> PAGE_SHIFT);
+	mcl_queue[mcl_idx].args[1] = (unsigned long)ma;
+	mcl_queue[mcl_idx].args[2] = 0;
+    	mcl_increment_idx();
+	XPQ_UNLOCK(&update_lock, flags);
+}
+
+
+
+void
+xpq_queue_pt_switch(uint32_t val)
+{
+	unsigned long flags = 0;
+	vm_paddr_t ma = xpmap_ptom(val) & PG_FRAME;
+
+	XPQ_LOCK(&update_lock, flags);
+	xpq_queue[xpq_idx].ptr = ma | MMU_EXTENDED_COMMAND;
+	xpq_queue[xpq_idx].val = MMUEXT_NEW_BASEPTR;
+	xpq_increment_idx();
+	XPQ_UNLOCK(&update_lock, flags);
+}
+
+
+void
+xpq_queue_pin_table(uint32_t pa, int type)
+{
+	unsigned long flags = 0;
+	XPQ_LOCK(&update_lock, flags);
+	xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
+	switch (type) {
+	case XPQ_PIN_L1_TABLE:
+		xpq_queue[xpq_idx].val = MMUEXT_PIN_L1_TABLE;
+		break;
+	case XPQ_PIN_L2_TABLE:
+		xpq_queue[xpq_idx].val = MMUEXT_PIN_L2_TABLE;
+		break;
+	}
+	xpq_increment_idx();
+	XPQ_UNLOCK(&update_lock, flags);
+}
+
+void
+xpq_queue_unpin_table(uint32_t pa)
+{
+	unsigned long flags = 0;
+
+	XPQ_LOCK(&update_lock, flags);
+	xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
+	xpq_queue[xpq_idx].val = MMUEXT_UNPIN_TABLE;
+	xpq_increment_idx();
+	XPQ_UNLOCK(&update_lock, flags);
+}
+
+void
+xpq_queue_set_ldt(vm_offset_t va, uint32_t entries)
+{
+	unsigned long flags = 0;
+
+	XPQ_LOCK(&update_lock, flags);
+	KASSERT(va == (va & PG_FRAME), ("ldt not page aligned"));
+	xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND | va;
+	xpq_queue[xpq_idx].val = MMUEXT_SET_LDT |
+		(entries << MMUEXT_CMD_SHIFT);
+	xpq_increment_idx();
+	XPQ_UNLOCK(&update_lock, flags);
+}
+
+void
+xpq_queue_tlb_flush()
+{
+	unsigned long flags = 0;
+
+	XPQ_LOCK(&update_lock, flags);
+
+	xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
+	xpq_queue[xpq_idx].val = MMUEXT_TLB_FLUSH;
+	xpq_increment_idx();
+	XPQ_UNLOCK(&update_lock, flags);
+}
+
+
+/********** CODE WORTH KEEPING ABOVE HERE *****************/ 
+
+void xen_failsafe_handler(void);
+
+void
+xen_failsafe_handler(void)
+{
+
+	panic("xen_failsafe_handler called!\n");
+}
+
+
+void
+xen_update_descriptor(union descriptor *table, union descriptor *entry)
+{
+	vm_paddr_t pa;
+	pt_entry_t *ptp;
+	uint32_t raw[2];
+
+	bcopy(entry, raw, 2*sizeof(int32_t));
+	ptp = vtopte((vm_offset_t)table);
+	pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK);
+	if (HYPERVISOR_update_descriptor(pa, raw[0], raw[1]))
+		panic("HYPERVISOR_update_descriptor failed\n");
+}
+
+
+
+#if defined(XENDEBUG)
+static void
+xpmap_dump_pt(pt_entry_t *ptp, int p)
+{
+	pt_entry_t pte;
+	int j;
+	int bufpos;
+
+	pte = xpmap_ptom((uint32_t)ptp - KERNTEXTOFF);
+	PRINTK(("%03x: %p(%p) %08x\n", p, ptp, (void *)pte, p << PDRSHIFT));
+
+	bufpos = 0;
+	for (j = 0; j < PTES_PER_PTP; j++) {
+		if ((ptp[j] & PG_V) == 0)
+			continue;
+		pte = ptp[j] /* & PG_FRAME */;
+		bufpos += sprintf(XBUF + bufpos, "%x:%03x:%08x ",
+		    p, j, pte);
+		if (bufpos > 70) {
+			int k;
+			sprintf(XBUF + bufpos, "\n");
+			PRINTK((XBUF));
+			bufpos = 0;
+			for (k = 0; k < 1000000; k++);
+		}
+	}
+	if (bufpos) {
+		PRINTK((XBUF));
+		bufpos = 0;
+	}
+}
+#endif
+
+
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/cpufunc.h b/freebsd-5.3-xen-sparse/i386-xen/include/cpufunc.h
new file mode 100644
index 0000000000..fadc3a4a26
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/cpufunc.h
@@ -0,0 +1,601 @@
+/*-
+ * Copyright (c) 1993 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/include/cpufunc.h,v 1.135 2003/08/06 18:21:27 bde Exp $
+ */
+
+/*
+ * Functions to provide access to special i386 instructions.
+ * This in included in sys/systm.h, and that file should be
+ * used in preference to this.
+ */
+
+#ifndef _MACHINE_CPUFUNC_H_
+#define	_MACHINE_CPUFUNC_H_
+
+#include <sys/cdefs.h>
+#include <machine/psl.h>
+#define NO_EXCHANGE
+#include <machine/xen-os.h>
+#include <machine/evtchn.h>
+#include <machine/xenvar.h>
+struct thread;
+struct region_descriptor;
+
+__BEGIN_DECLS
+#define readb(va)	(*(volatile u_int8_t *) (va))
+#define readw(va)	(*(volatile u_int16_t *) (va))
+#define readl(va)	(*(volatile u_int32_t *) (va))
+
+#define writeb(va, d)	(*(volatile u_int8_t *) (va) = (d))
+#define writew(va, d)	(*(volatile u_int16_t *) (va) = (d))
+#define writel(va, d)	(*(volatile u_int32_t *) (va) = (d))
+
+static __inline u_int
+read_eflags(void)
+{
+	u_int   ef;
+	__asm __volatile("pushfl; popl %0" : "=r" (ef));
+	return (ef);
+}
+
+static __inline void
+write_eflags(u_int ef)
+{
+    __asm __volatile("pushl %0; popfl" : : "r" (ef));
+}
+#ifdef	__GNUC__
+
+static __inline void
+breakpoint(void)
+{
+	__asm __volatile("int $3");
+}
+
+static __inline u_int
+bsfl(u_int mask)
+{
+	u_int	result;
+
+	__asm __volatile("bsfl %1,%0" : "=r" (result) : "rm" (mask));
+	return (result);
+}
+
+static __inline u_int
+bsrl(u_int mask)
+{
+	u_int	result;
+
+	__asm __volatile("bsrl %1,%0" : "=r" (result) : "rm" (mask));
+	return (result);
+}
+static __inline void
+disable_intr(void)
+{
+    __cli();
+}
+static __inline void
+do_cpuid(u_int ax, u_int *p)
+{
+	__asm __volatile("cpuid"
+			 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
+			 :  "0" (ax));
+}
+
+static __inline void
+enable_intr(void)
+{
+    __sti();
+}
+
+
+#define	HAVE_INLINE_FFS
+
+static __inline int
+ffs(int mask)
+{
+	/*
+	 * Note that gcc-2's builtin ffs would be used if we didn't declare
+	 * this inline or turn off the builtin.  The builtin is faster but
+	 * broken in gcc-2.4.5 and slower but working in gcc-2.5 and later
+	 * versions.
+	 */
+	 return (mask == 0 ? mask : (int)bsfl((u_int)mask) + 1);
+}
+
+#define	HAVE_INLINE_FLS
+
+static __inline int
+fls(int mask)
+{
+	return (mask == 0 ? mask : (int)bsrl((u_int)mask) + 1);
+}
+
+static __inline void
+halt(void)
+{
+	__asm __volatile("hlt");
+}
+
+#if __GNUC__ < 2
+
+#define	inb(port)		inbv(port)
+#define	outb(port, data)	outbv(port, data)
+
+#else /* __GNUC >= 2 */
+
+/*
+ * The following complications are to get around gcc not having a
+ * constraint letter for the range 0..255.  We still put "d" in the
+ * constraint because "i" isn't a valid constraint when the port
+ * isn't constant.  This only matters for -O0 because otherwise
+ * the non-working version gets optimized away.
+ * 
+ * Use an expression-statement instead of a conditional expression
+ * because gcc-2.6.0 would promote the operands of the conditional
+ * and produce poor code for "if ((inb(var) & const1) == const2)".
+ *
+ * The unnecessary test `(port) < 0x10000' is to generate a warning if
+ * the `port' has type u_short or smaller.  Such types are pessimal.
+ * This actually only works for signed types.  The range check is
+ * careful to avoid generating warnings.
+ */
+#define	inb(port) __extension__ ({					\
+	u_char	_data;							\
+	if (__builtin_constant_p(port) && ((port) & 0xffff) < 0x100	\
+	    && (port) < 0x10000)					\
+		_data = inbc(port);					\
+	else								\
+		_data = inbv(port);					\
+	_data; })
+
+#define	outb(port, data) (						\
+	__builtin_constant_p(port) && ((port) & 0xffff) < 0x100		\
+	&& (port) < 0x10000						\
+	? outbc(port, data) : outbv(port, data))
+
+static __inline u_char
+inbc(u_int port)
+{
+	u_char	data;
+
+	__asm __volatile("inb %1,%0" : "=a" (data) : "id" ((u_short)(port)));
+	return (data);
+}
+
+static __inline void
+outbc(u_int port, u_char data)
+{
+	__asm __volatile("outb %0,%1" : : "a" (data), "id" ((u_short)(port)));
+}
+
+#endif /* __GNUC <= 2 */
+
+static __inline u_char
+inbv(u_int port)
+{
+	u_char	data;
+	/*
+	 * We use %%dx and not %1 here because i/o is done at %dx and not at
+	 * %edx, while gcc generates inferior code (movw instead of movl)
+	 * if we tell it to load (u_short) port.
+	 */
+	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
+	return (data);
+}
+
+static __inline u_int
+inl(u_int port)
+{
+	u_int	data;
+
+	__asm __volatile("inl %%dx,%0" : "=a" (data) : "d" (port));
+	return (data);
+}
+
+static __inline void
+insb(u_int port, void *addr, size_t cnt)
+{
+	__asm __volatile("cld; rep; insb"
+			 : "+D" (addr), "+c" (cnt)
+			 : "d" (port)
+			 : "memory");
+}
+
+static __inline void
+insw(u_int port, void *addr, size_t cnt)
+{
+	__asm __volatile("cld; rep; insw"
+			 : "+D" (addr), "+c" (cnt)
+			 : "d" (port)
+			 : "memory");
+}
+
+static __inline void
+insl(u_int port, void *addr, size_t cnt)
+{
+	__asm __volatile("cld; rep; insl"
+			 : "+D" (addr), "+c" (cnt)
+			 : "d" (port)
+			 : "memory");
+}
+
+static __inline void
+invd(void)
+{
+	__asm __volatile("invd");
+}
+
+static __inline u_short
+inw(u_int port)
+{
+	u_short	data;
+
+	__asm __volatile("inw %%dx,%0" : "=a" (data) : "d" (port));
+	return (data);
+}
+
+static __inline void
+outbv(u_int port, u_char data)
+{
+	u_char	al;
+	/*
+	 * Use an unnecessary assignment to help gcc's register allocator.
+	 * This make a large difference for gcc-1.40 and a tiny difference
+	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
+	 * best results.  gcc-2.6.0 can't handle this.
+	 */
+	al = data;
+	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
+}
+
+static __inline void
+outl(u_int port, u_int data)
+{
+	/*
+	 * outl() and outw() aren't used much so we haven't looked at
+	 * possible micro-optimizations such as the unnecessary
+	 * assignment for them.
+	 */
+	__asm __volatile("outl %0,%%dx" : : "a" (data), "d" (port));
+}
+
+static __inline void
+outsb(u_int port, const void *addr, size_t cnt)
+{
+	__asm __volatile("cld; rep; outsb"
+			 : "+S" (addr), "+c" (cnt)
+			 : "d" (port));
+}
+
+static __inline void
+outsw(u_int port, const void *addr, size_t cnt)
+{
+	__asm __volatile("cld; rep; outsw"
+			 : "+S" (addr), "+c" (cnt)
+			 : "d" (port));
+}
+
+static __inline void
+outsl(u_int port, const void *addr, size_t cnt)
+{
+	__asm __volatile("cld; rep; outsl"
+			 : "+S" (addr), "+c" (cnt)
+			 : "d" (port));
+}
+
+static __inline void
+outw(u_int port, u_short data)
+{
+	__asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port));
+}
+
+static __inline void
+ia32_pause(void)
+{
+	__asm __volatile("pause");
+}
+
+static __inline u_int64_t
+rdmsr(u_int msr)
+{
+	u_int64_t rv;
+
+	__asm __volatile("rdmsr" : "=A" (rv) : "c" (msr));
+	return (rv);
+}
+
+static __inline u_int64_t
+rdpmc(u_int pmc)
+{
+	u_int64_t rv;
+
+	__asm __volatile("rdpmc" : "=A" (rv) : "c" (pmc));
+	return (rv);
+}
+
+static __inline u_int64_t
+rdtsc(void)
+{
+	u_int64_t rv;
+
+	__asm __volatile("rdtsc" : "=A" (rv));
+	return (rv);
+}
+
+static __inline void
+wbinvd(void)
+{
+	__asm __volatile("wbinvd");
+}
+
+static __inline void
+wrmsr(u_int msr, u_int64_t newval)
+{
+	__asm __volatile("wrmsr" : : "A" (newval), "c" (msr));
+}
+
+static __inline u_int
+rfs(void)
+{
+	u_int sel;
+	__asm __volatile("movl %%fs,%0" : "=rm" (sel));
+	return (sel);
+}
+
+static __inline u_int
+rgs(void)
+{
+	u_int sel;
+	__asm __volatile("movl %%gs,%0" : "=rm" (sel));
+	return (sel);
+}
+
+static __inline void
+load_fs(u_int sel)
+{
+	__asm __volatile("movl %0,%%fs" : : "rm" (sel));
+}
+
+static __inline void
+load_gs(u_int sel)
+{
+	__asm __volatile("movl %0,%%gs" : : "rm" (sel));
+}
+
+/* void lidt(struct region_descriptor *addr); */
+static __inline void
+lidt(struct region_descriptor *addr)
+{
+	__asm __volatile("lidt (%0)" : : "r" (addr));
+}
+
+static __inline u_int
+rdr0(void)
+{
+	u_int	data;
+	__asm __volatile("movl %%dr0,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline void
+load_dr0(u_int dr0)
+{
+	__asm __volatile("movl %0,%%dr0" : : "r" (dr0));
+}
+
+static __inline u_int
+rdr1(void)
+{
+	u_int	data;
+	__asm __volatile("movl %%dr1,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline void
+load_dr1(u_int dr1)
+{
+	__asm __volatile("movl %0,%%dr1" : : "r" (dr1));
+}
+
+static __inline u_int
+rdr2(void)
+{
+	u_int	data;
+	__asm __volatile("movl %%dr2,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline void
+load_dr2(u_int dr2)
+{
+	__asm __volatile("movl %0,%%dr2" : : "r" (dr2));
+}
+
+static __inline u_int
+rdr3(void)
+{
+	u_int	data;
+	__asm __volatile("movl %%dr3,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline void
+load_dr3(u_int dr3)
+{
+	__asm __volatile("movl %0,%%dr3" : : "r" (dr3));
+}
+
+static __inline u_int
+rdr4(void)
+{
+	u_int	data;
+	__asm __volatile("movl %%dr4,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline void
+load_dr4(u_int dr4)
+{
+	__asm __volatile("movl %0,%%dr4" : : "r" (dr4));
+}
+
+static __inline u_int
+rdr5(void)
+{
+	u_int	data;
+	__asm __volatile("movl %%dr5,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline void
+load_dr5(u_int dr5)
+{
+	__asm __volatile("movl %0,%%dr5" : : "r" (dr5));
+}
+
+static __inline u_int
+rdr6(void)
+{
+	u_int	data;
+	__asm __volatile("movl %%dr6,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline void
+load_dr6(u_int dr6)
+{
+	__asm __volatile("movl %0,%%dr6" : : "r" (dr6));
+}
+
+static __inline u_int
+rdr7(void)
+{
+	u_int	data;
+	__asm __volatile("movl %%dr7,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline void
+load_dr7(u_int dr7)
+{
+	__asm __volatile("movl %0,%%dr7" : : "r" (dr7));
+}
+
+static __inline register_t
+intr_disable(void)
+{
+	register_t eflags;
+	
+	__save_and_cli(eflags);
+	return (eflags);
+}
+
+static __inline void
+intr_restore(register_t eflags)
+{
+    __restore_flags(eflags);
+}
+
+#else /* !__GNUC__ */
+
+int	breakpoint(void);
+u_int	bsfl(u_int mask);
+u_int	bsrl(u_int mask);
+void	cpu_invlpg(u_int addr);
+void	cpu_invlpg_range(u_int start, u_int end);
+void	disable_intr(void);
+void	do_cpuid(u_int ax, u_int *p);
+void	enable_intr(void);
+void	halt(void);
+u_char	inb(u_int port);
+u_int	inl(u_int port);
+void	insb(u_int port, void *addr, size_t cnt);
+void	insl(u_int port, void *addr, size_t cnt);
+void	insw(u_int port, void *addr, size_t cnt);
+void	invd(void);
+void	invlpg(u_int addr);
+void	invlpg_range(u_int start, u_int end);
+void	invltlb(void);
+u_short	inw(u_int port);
+void	load_cr3(u_int cr3);
+void	load_cr4(u_int cr4);
+void	load_fs(u_int sel);
+void	load_gs(u_int sel);
+struct region_descriptor;
+void	lidt(struct region_descriptor *addr);
+void	ltr(u_short sel);
+void	outb(u_int port, u_char data);
+void	outl(u_int port, u_int data);
+void	outsb(u_int port, void *addr, size_t cnt);
+void	outsl(u_int port, void *addr, size_t cnt);
+void	outsw(u_int port, void *addr, size_t cnt);
+void	outw(u_int port, u_short data);
+void	ia32_pause(void);
+u_int	rcr2(void);
+u_int	rcr3(void);
+u_int	rcr4(void);
+u_int	rfs(void);
+u_int	rgs(void);
+u_int64_t rdmsr(u_int msr);
+u_int64_t rdpmc(u_int pmc);
+u_int64_t rdtsc(void);
+u_int	read_eflags(void);
+void	wbinvd(void);
+void	write_eflags(u_int ef);
+void	wrmsr(u_int msr, u_int64_t newval);
+u_int	rdr0(void);
+void	load_dr0(u_int dr0);
+u_int	rdr1(void);
+void	load_dr1(u_int dr1);
+u_int	rdr2(void);
+void	load_dr2(u_int dr2);
+u_int	rdr3(void);
+void	load_dr3(u_int dr3);
+u_int	rdr4(void);
+void	load_dr4(u_int dr4);
+u_int	rdr5(void);
+void	load_dr5(u_int dr5);
+u_int	rdr6(void);
+void	load_dr6(u_int dr6);
+u_int	rdr7(void);
+void	load_dr7(u_int dr7);
+register_t	intr_disable(void);
+void	intr_restore(register_t ef);
+
+#endif	/* __GNUC__ */
+
+void    reset_dbregs(void);
+
+__END_DECLS
+
+#endif /* !_MACHINE_CPUFUNC_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/ctrl_if.h b/freebsd-5.3-xen-sparse/i386-xen/include/ctrl_if.h
new file mode 100644
index 0000000000..1ccd49d448
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/ctrl_if.h
@@ -0,0 +1,120 @@
+/******************************************************************************
+ * ctrl_if.h
+ * 
+ * Management functions for special interface to the domain controller.
+ * 
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#ifndef __I386_XENO__CTRL_IF_H__
+#define __I386_XENO__CTRL_IF_H__
+
+#include <sys/taskqueue.h>
+#include <machine/hypervisor.h>
+
+
+typedef control_msg_t ctrl_msg_t;
+
+/*
+ * Callback function type. Called for asynchronous processing of received
+ * request messages, and responses to previously-transmitted request messages.
+ * The parameters are (@msg, @id).
+ *  @msg: Original request/response message (not a copy). The message can be
+ *        modified in-place by the handler (e.g., a response callback can
+ *        turn a request message into a response message in place). The message
+ *        is no longer accessible after the callback handler returns -- if the
+ *        message is required to persist for longer then it must be copied.
+ *  @id:  (Response callbacks only) The 'id' that was specified when the
+ *        original request message was queued for transmission.
+ */
+typedef void (*ctrl_msg_handler_t)(ctrl_msg_t *, unsigned long);
+
+/*
+ * Send @msg to the domain controller. Execute @hnd when a response is
+ * received, passing the response message and the specified @id. This
+ * operation will not block: it will return -EAGAIN if there is no space.
+ * Notes:
+ *  1. The @msg is copied if it is transmitted and so can be freed after this
+ *     function returns.
+ *  2. If @hnd is NULL then no callback is executed.
+ */
+int ctrl_if_send_message_noblock(
+    ctrl_msg_t *msg, 
+    ctrl_msg_handler_t hnd,
+    unsigned long id);
+
+/*
+ * Send @msg to the domain controller. Execute @hnd when a response is
+ * received, passing the response message and the specified @id. This
+ * operation will block until the message is sent, or a signal is received
+ * for the calling process (unless @wait_state is TASK_UNINTERRUPTIBLE).
+ * Notes:
+ *  1. The @msg is copied if it is transmitted and so can be freed after this
+ *     function returns.
+ *  2. If @hnd is NULL then no callback is executed.
+ */
+int ctrl_if_send_message_block(
+    ctrl_msg_t *msg, 
+    ctrl_msg_handler_t hnd, 
+    unsigned long id, 
+    long wait_state);
+
+/*
+ * Request a callback when there is /possibly/ space to immediately send a
+ * message to the domain controller. This function returns 0 if there is
+ * already space to trasnmit a message --- in this case the callback task /may/
+ * still be executed. If this function returns 1 then the callback /will/ be
+ * executed when space becomes available.
+ */
+int ctrl_if_enqueue_space_callback(struct task *task);
+
+/*
+ * Send a response (@msg) to a message from the domain controller. This will 
+ * never block.
+ * Notes:
+ *  1. The @msg is copied and so can be freed after this function returns.
+ *  2. The @msg may be the original request message, modified in-place.
+ */
+void ctrl_if_send_response(ctrl_msg_t *msg);
+
+/*
+ * Register a receiver for typed messages from the domain controller. The 
+ * handler (@hnd) is called for every received message of specified @type.
+ * Returns TRUE (non-zero) if the handler was successfully registered.
+ * If CALLBACK_IN_BLOCKING CONTEXT is specified in @flags then callbacks will
+ * occur in a context in which it is safe to yield (i.e., process context).
+ */
+#define CALLBACK_IN_BLOCKING_CONTEXT 1
+int ctrl_if_register_receiver(
+    uint8_t type, 
+    ctrl_msg_handler_t hnd,
+    unsigned int flags);
+
+/*
+ * Unregister a receiver for typed messages from the domain controller. The 
+ * handler (@hnd) will not be executed after this function returns.
+ */
+void ctrl_if_unregister_receiver(uint8_t type, ctrl_msg_handler_t hnd);
+
+/* Suspend/resume notifications. */
+void ctrl_if_suspend(void);
+void ctrl_if_resume(void);
+
+
+/*
+ * Returns TRUE if there are no outstanding message requests at the domain
+ * controller. This can be used to ensure that messages have really flushed
+ * through when it is not possible to use the response-callback interface.
+ * WARNING: If other subsystems are using the control interface then this
+ * function might never return TRUE!
+ */
+int ctrl_if_transmitter_empty(void);  /* !! DANGEROUS FUNCTION !! */
+
+/*
+ * Manually discard response messages from the domain controller. 
+ * WARNING: This is usually done automatically -- this function should only
+ * be called when normal interrupt mechanisms are disabled!
+ */
+void ctrl_if_discard_responses(void); /* !! DANGEROUS FUNCTION !! */
+
+#endif /* __ASM_XEN__CONTROL_IF_H__ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/evtchn.h b/freebsd-5.3-xen-sparse/i386-xen/include/evtchn.h
new file mode 100644
index 0000000000..3e962e3014
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/evtchn.h
@@ -0,0 +1,92 @@
+/******************************************************************************
+ * evtchn.h
+ * 
+ * Communication via Xen event channels.
+ * Also definitions for the device that demuxes notifications to userspace.
+ * 
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#ifndef __ASM_EVTCHN_H__
+#define __ASM_EVTCHN_H__
+
+#include <machine/hypervisor.h>
+#include <machine/synch_bitops.h>
+#include <machine/hypervisor-ifs.h>
+
+/*
+ * LOW-LEVEL DEFINITIONS
+ */
+
+/* Force a proper event-channel callback from Xen. */
+void force_evtchn_callback(void);
+
+/* Entry point for notifications into Linux subsystems. */
+void evtchn_do_upcall(struct intrframe *frame);
+
+/* Entry point for notifications into the userland character device. */
+void evtchn_device_upcall(int port);
+
+static inline void 
+mask_evtchn(int port)
+{
+    shared_info_t *s = HYPERVISOR_shared_info;
+    synch_set_bit(port, &s->evtchn_mask[0]);
+}
+
+static inline void 
+unmask_evtchn(int port)
+{
+    shared_info_t *s = HYPERVISOR_shared_info;
+
+    synch_clear_bit(port, &s->evtchn_mask[0]);
+
+    /*
+     * The following is basically the equivalent of 'hw_resend_irq'. Just like
+     * a real IO-APIC we 'lose the interrupt edge' if the channel is masked.
+     */
+    if (  synch_test_bit        (port,    &s->evtchn_pending[0]) && 
+         !synch_test_and_set_bit(port>>5, &s->evtchn_pending_sel) )
+    {
+        s->vcpu_data[0].evtchn_upcall_pending = 1;
+        if ( !s->vcpu_data[0].evtchn_upcall_mask )
+            force_evtchn_callback();
+    }
+}
+
+static inline void 
+clear_evtchn(int port)
+{
+    shared_info_t *s = HYPERVISOR_shared_info;
+    synch_clear_bit(port, &s->evtchn_pending[0]);
+}
+
+static inline void 
+notify_via_evtchn(int port)
+{
+    evtchn_op_t op;
+    op.cmd = EVTCHNOP_send;
+    op.u.send.local_port = port;
+    (void)HYPERVISOR_event_channel_op(&op);
+}
+
+/*
+ * CHARACTER-DEVICE DEFINITIONS
+ */
+
+#define PORT_NORMAL    0x0000
+#define PORT_EXCEPTION 0x8000
+#define PORTIDX_MASK   0x7fff
+
+/* /dev/xen/evtchn resides at device number major=10, minor=200 */
+#define EVTCHN_MINOR 200
+
+/* /dev/xen/evtchn ioctls: */
+/* EVTCHN_RESET: Clear and reinit the event buffer. Clear error condition. */
+#define EVTCHN_RESET  _IO('E', 1)
+/* EVTCHN_BIND: Bind to the specified event-channel port. */
+#define EVTCHN_BIND   _IO('E', 2)
+/* EVTCHN_UNBIND: Unbind from the specified event-channel port. */
+#define EVTCHN_UNBIND _IO('E', 3)
+
+#endif /* __ASM_EVTCHN_H__ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/frame.h b/freebsd-5.3-xen-sparse/i386-xen/include/frame.h
new file mode 100644
index 0000000000..a6572d85a9
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/frame.h
@@ -0,0 +1,129 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)frame.h	5.2 (Berkeley) 1/18/91
+ * $FreeBSD: src/sys/i386/include/frame.h,v 1.23 2003/07/22 08:11:15 peter Exp $
+ */
+
+#ifndef _MACHINE_FRAME_H_
+#define _MACHINE_FRAME_H_ 1
+
+/*
+ * System stack frames.
+ */
+
+/*
+ * Exception/Trap Stack Frame
+ */
+
+struct trapframe {
+	int	tf_fs;
+	int	tf_es;
+	int	tf_ds;
+	int	tf_edi;
+	int	tf_esi;
+	int	tf_ebp;
+	int	tf_isp;
+	int	tf_ebx;
+	int	tf_edx;
+	int	tf_ecx;
+	int	tf_eax;
+	int	tf_trapno;
+        int     tf_cr2; 
+	/* below portion defined in 386 hardware */
+	int	tf_err;
+	int	tf_eip;
+	int	tf_cs;
+	int	tf_eflags;
+	/* below only when crossing rings (e.g. user to kernel) */
+	int	tf_esp;
+	int	tf_ss;
+};
+
+/* Interrupt stack frame */
+
+struct intrframe {
+	int	if_fs;
+	int	if_es;
+	int	if_ds;
+	int	if_edi;
+	int	if_esi;
+	int	if_ebp;
+	int	:32;
+	int	if_ebx;
+	int	if_edx;
+	int	if_ecx;
+	int	if_eax;
+	int	:32;		/* for compat with trap frame - trapno */
+        int     if_vec;         /* cr2 in trap frame */
+	int	:32;		/* for compat with trap frame - err */
+	/* below portion defined in 386 hardware */
+	int	if_eip;
+	int	if_cs;
+	int	if_eflags;
+	/* below only when crossing rings (e.g. user to kernel) */
+	int	if_esp;
+	int	if_ss;
+};
+
+/* frame of clock (same as interrupt frame) */
+
+struct clockframe {
+	int	cf_fs;
+	int	cf_es;
+	int	cf_ds;
+	int	cf_edi;
+	int	cf_esi;
+	int	cf_ebp;
+	int	:32;
+	int	cf_ebx;
+	int	cf_edx;
+	int	cf_ecx;
+	int	cf_eax;
+	int	:32;		/* for compat with trap frame - trapno */
+        int     cf_vec;         /* cr2 in trap frame */ 
+	int	:32;		/* for compat with trap frame - err */
+	/* below portion defined in 386 hardware */
+	int	cf_eip;
+	int	cf_cs;
+	int	cf_eflags;
+	/* below only when crossing rings (e.g. user to kernel) */
+	int	cf_esp;
+	int	cf_ss;
+};
+
+#define	INTR_TO_TRAPFRAME(frame) ((struct trapframe *)&(frame)->if_fs)
+
+#endif /* _MACHINE_FRAME_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor-ifs.h b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor-ifs.h
new file mode 100644
index 0000000000..4f75d27a9a
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor-ifs.h
@@ -0,0 +1,36 @@
+#ifndef _HYPERVISOR_IFS_H_
+#define _HYPERVISOR_IFS_H_
+
+#define s8  int8_t
+#define s16 int16_t
+#define s32 int32_t
+#define s64 int64_t
+
+#define u8  uint8_t
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+
+#include <machine/xen-public/xen.h>
+#include <machine/xen-public/io/domain_controller.h>
+#include <machine/xen-public/io/netif.h>
+#include <machine/xen-public/io/blkif.h>
+#include <machine/xen-public/dom0_ops.h>
+#include <machine/xen-public/event_channel.h>
+#include <machine/xen-public/sched_ctl.h>
+#include <machine/xen-public/physdev.h>
+#undef  blkif_sector_t			/* XXX pre-processor didn't do the */
+#define blkif_sector_t uint64_t		/* right thing */
+
+#undef s8  
+#undef s16 
+#undef s32 
+#undef s64 
+
+#undef u8  
+#undef u16 
+#undef u32 
+#undef u64 
+
+
+#endif
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h
new file mode 100644
index 0000000000..95ee85f352
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h
@@ -0,0 +1,355 @@
+/******************************************************************************
+ * hypervisor.h
+ * 
+ * Linux-specific hypervisor handling.
+ * 
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#ifndef __HYPERVISOR_H__
+#define __HYPERVISOR_H__
+
+
+#include <machine/hypervisor-ifs.h>
+#include <machine/frame.h>
+#include "opt_xen.h"
+
+extern start_info_t *xen_start_info;
+
+/* arch/xen/mm/hypervisor.c */
+/*
+ * NB. ptr values should be PHYSICAL, not MACHINE. 'vals' should be already
+ * be MACHINE addresses.
+ */
+
+
+void MULTICALL_flush_page_update_queue(void);
+
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+/* Allocate a contiguous empty region of low memory. Return virtual start. */
+unsigned long allocate_empty_lowmem_region(unsigned long pages);
+/* Deallocate a contiguous region of low memory. Return it to the allocator. */
+void deallocate_lowmem_region(unsigned long vstart, unsigned long pages);
+#endif
+
+typedef struct { unsigned long pte_low, pte_high; } pte_t;
+
+/*
+ * Assembler stubs for hyper-calls.
+ */
+
+static inline int HYPERVISOR_set_trap_table(trap_info_t *table)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_set_trap_table),
+        "b" (table) : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_mmu_update(mmu_update_t *req, 
+					int count,
+					int *success_count)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_mmu_update), 
+        "b" (req), "c" (count), "d" (success_count) : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_set_gdt(unsigned long *frame_list, int entries)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_set_gdt), 
+        "b" (frame_list), "c" (entries) : "memory" );
+
+
+    return ret;
+}
+
+static inline int HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_stack_switch),
+        "b" (ss), "c" (esp) : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_set_callbacks(
+    unsigned long event_selector, unsigned long event_address,
+    unsigned long failsafe_selector, unsigned long failsafe_address)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_set_callbacks),
+        "b" (event_selector), "c" (event_address), 
+        "d" (failsafe_selector), "S" (failsafe_address) : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_fpu_taskswitch(void)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_fpu_taskswitch) : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_yield(void)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
+        "b" (SCHEDOP_yield) : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_block(void)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
+        "b" (SCHEDOP_block) : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_shutdown(void)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
+	"b" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))
+        : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_reboot(void)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
+        "b" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))
+        : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_suspend(unsigned long srec)
+{
+    int ret;
+    /* NB. On suspend, control software expects a suspend record in %esi. */
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
+	"b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)), 
+        "S" (srec) : "memory" );
+
+    return ret;
+}
+
+static inline long HYPERVISOR_set_timer_op(uint64_t timeout)
+{
+    int ret;
+    unsigned long timeout_hi = (unsigned long)(timeout>>32);
+    unsigned long timeout_lo = (unsigned long)timeout;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_set_timer_op),
+        "b" (timeout_hi), "c" (timeout_lo) : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_dom0_op(dom0_op_t *dom0_op)
+{
+    int ret;
+    dom0_op->interface_version = DOM0_INTERFACE_VERSION;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_dom0_op),
+        "b" (dom0_op) : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_set_debugreg(int reg, unsigned long value)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_set_debugreg),
+        "b" (reg), "c" (value) : "memory" );
+
+    return ret;
+}
+
+static inline unsigned long HYPERVISOR_get_debugreg(int reg)
+{
+    unsigned long ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_get_debugreg),
+        "b" (reg) : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_update_descriptor(
+    unsigned long pa, unsigned long word1, unsigned long word2)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_update_descriptor), 
+        "b" (pa), "c" (word1), "d" (word2) : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_set_fast_trap(int idx)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_set_fast_trap), 
+        "b" (idx) : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_dom_mem_op(unsigned int   op,
+					unsigned long *pages,
+					unsigned long  nr_pages)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_dom_mem_op),
+	"b" (op), "c" (pages), "d" (nr_pages) : "memory" );
+    return ret;
+}
+
+static inline int HYPERVISOR_multicall(void *call_list, int nr_calls)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_multicall),
+        "b" (call_list), "c" (nr_calls) : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_update_va_mapping(
+    unsigned long page_nr, pte_t new_val, unsigned long flags)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_update_va_mapping), 
+	"b" (page_nr), "c" ((new_val).pte_low), "d" (flags):
+	"memory" );
+    /* XXX */
+#if 0
+    if ( unlikely(ret < 0) )
+        panic("Failed update VA mapping: %08lx, %08lx, %08lx",
+              page_nr, (new_val).pte_low, flags);
+#endif
+    return ret;
+}
+
+static inline int HYPERVISOR_event_channel_op(void *op)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_event_channel_op),
+        "b" (op) : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_xen_version(int cmd)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_xen_version), 
+        "b" (cmd) : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_console_io(int cmd, int count, char *str)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_console_io),
+        "b" (cmd), "c" (count), "d" (str) : "memory" );
+
+    return ret;
+}
+
+static __inline int HYPERVISOR_console_write(char *str, int count)
+{
+    return HYPERVISOR_console_io(CONSOLEIO_write, count, str); 
+}
+
+static inline int HYPERVISOR_physdev_op(void *physdev_op)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_physdev_op),
+        "b" (physdev_op) : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_update_va_mapping_otherdomain(
+    unsigned long page_nr, pte_t new_val, unsigned long flags, domid_t domid)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_update_va_mapping_otherdomain), 
+        "b" (page_nr), "c" ((new_val).pte_low), "d" (flags), "S" (domid) :
+        "memory" );
+    
+    return ret;
+} 
+
+static inline int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_vm_assist),
+        "b" (cmd), "c" (type) : "memory" );
+
+    return ret;
+}
+
+#endif /* __HYPERVISOR_H__ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/md_var.h b/freebsd-5.3-xen-sparse/i386-xen/include/md_var.h
new file mode 100644
index 0000000000..5822a1e3d1
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/md_var.h
@@ -0,0 +1,108 @@
+/*-
+ * Copyright (c) 1995 Bruce D. Evans.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/include/md_var.h,v 1.66 2003/11/03 22:37:28 jhb Exp $
+ */
+
+#ifndef _MACHINE_MD_VAR_H_
+#define	_MACHINE_MD_VAR_H_
+
+/*
+ * Miscellaneous machine-dependent declarations.
+ */
+
+extern	void	(*bcopy_vector)(const void *from, void *to, size_t len);
+extern	void	(*bzero_vector)(void *buf, size_t len);
+extern	int	(*copyin_vector)(const void *udaddr, void *kaddr, size_t len);
+extern	int	(*copyout_vector)(const void *kaddr, void *udaddr, size_t len);
+
+extern	long	Maxmem;
+extern	u_int	atdevbase;	/* offset in virtual memory of ISA io mem */
+extern	u_int	basemem;	/* PA of original top of base memory */
+extern	int	busdma_swi_pending;
+extern	u_int	cpu_exthigh;
+extern	u_int	cpu_feature;
+extern	u_int	cpu_fxsr;
+extern	u_int	cpu_high;
+extern	u_int	cpu_id;
+extern	u_int	cpu_procinfo;
+extern	char	cpu_vendor[];
+extern	u_int	cyrix_did;
+extern	uint16_t *elan_mmcr;
+extern	char	kstack[];
+#ifdef PC98
+extern	int	need_pre_dma_flush;
+extern	int	need_post_dma_flush;
+#endif
+extern	char	sigcode[];
+extern	int	szsigcode;
+#ifdef COMPAT_FREEBSD4
+extern	int	szfreebsd4_sigcode;
+#endif
+#ifdef COMPAT_43
+extern	int	szosigcode;
+#endif
+
+typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss);
+struct	thread;
+struct	reg;
+struct	fpreg;
+struct  dbreg;
+
+void	bcopyb(const void *from, void *to, size_t len);
+void	busdma_swi(void);
+void	cpu_setregs(void);
+void	cpu_switch_load_gs(void) __asm(__STRING(cpu_switch_load_gs));
+void	doreti_iret(void) __asm(__STRING(doreti_iret));
+void	doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault));
+void	doreti_popl_ds(void) __asm(__STRING(doreti_popl_ds));
+void	doreti_popl_ds_fault(void) __asm(__STRING(doreti_popl_ds_fault));
+void	doreti_popl_es(void) __asm(__STRING(doreti_popl_es));
+void	doreti_popl_es_fault(void) __asm(__STRING(doreti_popl_es_fault));
+void	doreti_popl_fs(void) __asm(__STRING(doreti_popl_fs));
+void	doreti_popl_fs_fault(void) __asm(__STRING(doreti_popl_fs_fault));
+void	scrit(void) __asm(__STRING(scrit));
+void	ecrit(void) __asm(__STRING(ecrit));
+void	critical_region_fixup(void) __asm(__STRING(critical_region_fixup));
+void	enable_sse(void);
+void	fillw(int /*u_short*/ pat, void *base, size_t cnt);
+void	i486_bzero(void *buf, size_t len);
+void	i586_bcopy(const void *from, void *to, size_t len);
+void	i586_bzero(void *buf, size_t len);
+int	i586_copyin(const void *udaddr, void *kaddr, size_t len);
+int	i586_copyout(const void *kaddr, void *udaddr, size_t len);
+void	i686_pagezero(void *addr);
+void	sse2_pagezero(void *addr);
+void	init_AMD_Elan_sc520(void);
+int	is_physical_memory(vm_offset_t addr);
+int	isa_nmi(int cd);
+vm_paddr_t kvtop(void *addr);
+void	setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int selec);
+int     user_dbreg_trap(void);
+
+#endif /* !_MACHINE_MD_VAR_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/multicall.h b/freebsd-5.3-xen-sparse/i386-xen/include/multicall.h
new file mode 100644
index 0000000000..30de865ee2
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/multicall.h
@@ -0,0 +1,98 @@
+/******************************************************************************
+ * multicall.h
+ */
+
+#ifndef __MULTICALL_H__
+#define __MULTICALL_H__
+
+#include <machine/hypervisor.h>
+#define MAX_MULTICALL_ENTS 8
+extern multicall_entry_t multicall_list[];
+extern int nr_multicall_ents;
+
+static inline void execute_multicall_list(void)
+{
+    if ( unlikely(nr_multicall_ents == 0) ) return;
+    (void)HYPERVISOR_multicall(multicall_list, nr_multicall_ents);
+    nr_multicall_ents = 0;
+}
+
+
+static inline void handle_edge(void)
+{
+    if (unlikely(nr_multicall_ents == MAX_MULTICALL_ENTS)) 
+	execute_multicall_list();
+}
+
+static inline void queue_multicall0(unsigned long op)
+{
+    int i = nr_multicall_ents;
+    multicall_list[i].op      = op;
+    nr_multicall_ents = i+1;
+    handle_edge();
+}
+
+static inline void queue_multicall1(unsigned long op, unsigned long arg1)
+{
+    int i = nr_multicall_ents;
+    multicall_list[i].op      = op;
+    multicall_list[i].args[0] = arg1;
+    nr_multicall_ents = i+1;
+    handle_edge();
+}
+
+static inline void queue_multicall2(
+    unsigned long op, unsigned long arg1, unsigned long arg2)
+{
+    int i = nr_multicall_ents;
+    multicall_list[i].op      = op;
+    multicall_list[i].args[0] = arg1;
+    multicall_list[i].args[1] = arg2;
+    nr_multicall_ents = i+1;
+    handle_edge();
+}
+
+static inline void queue_multicall3(
+    unsigned long op, unsigned long arg1, unsigned long arg2,
+    unsigned long arg3)
+{
+    int i = nr_multicall_ents;
+    multicall_list[i].op      = op;
+    multicall_list[i].args[0] = arg1;
+    multicall_list[i].args[1] = arg2;
+    multicall_list[i].args[2] = arg3;
+    nr_multicall_ents = i+1;
+    handle_edge();
+}
+
+static inline void queue_multicall4(
+    unsigned long op, unsigned long arg1, unsigned long arg2,
+    unsigned long arg3, unsigned long arg4)
+{
+    int i = nr_multicall_ents;
+    multicall_list[i].op      = op;
+    multicall_list[i].args[0] = arg1;
+    multicall_list[i].args[1] = arg2;
+    multicall_list[i].args[2] = arg3;
+    multicall_list[i].args[3] = arg4;
+    nr_multicall_ents = i+1;
+    handle_edge();
+}
+
+static inline void queue_multicall5(
+    unsigned long op, unsigned long arg1, unsigned long arg2,
+    unsigned long arg3, unsigned long arg4, unsigned long arg5)
+{
+    int i = nr_multicall_ents;
+    multicall_list[i].op      = op;
+    multicall_list[i].args[0] = arg1;
+    multicall_list[i].args[1] = arg2;
+    multicall_list[i].args[2] = arg3;
+    multicall_list[i].args[3] = arg4;
+    multicall_list[i].args[4] = arg5;
+    nr_multicall_ents = i+1;
+    handle_edge();
+}
+
+
+#endif /* __MULTICALL_H__ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/param.h b/freebsd-5.3-xen-sparse/i386-xen/include/param.h
new file mode 100644
index 0000000000..a45fdd67c3
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/param.h
@@ -0,0 +1,146 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)param.h	5.8 (Berkeley) 6/28/91
+ * $FreeBSD: src/sys/i386/include/param.h,v 1.69 2003/06/14 23:23:53 alc Exp $
+ */
+
+/*
+ * Machine dependent constants for Intel 386.
+ */
+
+/*
+ * Round p (pointer or byte index) up to a correctly-aligned value
+ * for all data types (int, long, ...).   The result is unsigned int
+ * and must be cast to any desired pointer type.
+ */
+#ifndef _ALIGNBYTES
+#define _ALIGNBYTES	(sizeof(int) - 1)
+#endif
+#ifndef _ALIGN
+#define _ALIGN(p)	(((unsigned)(p) + _ALIGNBYTES) & ~_ALIGNBYTES)
+#endif
+
+#ifndef _MACHINE
+#define	_MACHINE	i386-xen
+#endif
+#ifndef _MACHINE_ARCH
+#define	_MACHINE_ARCH	i386-xen
+#endif
+
+#ifndef _NO_NAMESPACE_POLLUTION
+
+#ifndef _MACHINE_PARAM_H_
+#define	_MACHINE_PARAM_H_
+
+#ifndef MACHINE
+#define MACHINE		"i386"
+#endif
+#ifndef MACHINE_ARCH
+#define	MACHINE_ARCH	"i386"
+#endif
+#define MID_MACHINE	MID_I386
+
+#ifdef SMP
+#define MAXCPU		16
+#else
+#define MAXCPU		1
+#endif /* SMP */
+
+#define ALIGNBYTES	_ALIGNBYTES
+#define ALIGN(p)	_ALIGN(p)
+
+#define PAGE_SHIFT	12		/* LOG2(PAGE_SIZE) */
+#define PAGE_SIZE	(1<<PAGE_SHIFT)	/* bytes/page */
+#define PAGE_MASK	(PAGE_SIZE-1)
+#define NPTEPG		(PAGE_SIZE/(sizeof (pt_entry_t)))
+
+#ifdef PAE
+#define NPGPTD		4
+#define PDRSHIFT	21		/* LOG2(NBPDR) */
+#else
+#define NPGPTD		1
+#define PDRSHIFT	22		/* LOG2(NBPDR) */
+#endif
+
+#define NBPTD		(NPGPTD<<PAGE_SHIFT)
+#define NPDEPTD		(NBPTD/(sizeof (pd_entry_t)))
+#define NPDEPG		(PAGE_SIZE/(sizeof (pd_entry_t)))
+#define NBPDR		(1<<PDRSHIFT)	/* bytes/page dir */
+#define PDRMASK		(NBPDR-1)
+
+#define IOPAGES	2		/* pages of i/o permission bitmap */
+
+#ifndef KSTACK_PAGES
+#define KSTACK_PAGES 2		/* Includes pcb! */
+#endif
+#define KSTACK_GUARD_PAGES 1	/* pages of kstack guard; 0 disables */
+#define UAREA_PAGES 1		/* holds struct user WITHOUT PCB (see def.) */
+
+/*
+ * Ceiling on amount of swblock kva space, can be changed via
+ * the kern.maxswzone /boot/loader.conf variable.
+ */
+#ifndef VM_SWZONE_SIZE_MAX
+#define VM_SWZONE_SIZE_MAX	(32 * 1024 * 1024)
+#endif
+
+/*
+ * Ceiling on size of buffer cache (really only effects write queueing,
+ * the VM page cache is not effected), can be changed via
+ * the kern.maxbcache /boot/loader.conf variable.
+ */
+#ifndef VM_BCACHE_SIZE_MAX
+#define VM_BCACHE_SIZE_MAX	(200 * 1024 * 1024)
+#endif
+
+/*
+ * Mach derived conversion macros
+ */
+#define trunc_page(x)		((x) & ~PAGE_MASK)
+#define round_page(x)		(((x) + PAGE_MASK) & ~PAGE_MASK)
+#define trunc_4mpage(x)		((x) & ~PDRMASK)
+#define round_4mpage(x)		((((x)) + PDRMASK) & ~PDRMASK)
+
+#define atop(x)			((x) >> PAGE_SHIFT)
+#define ptoa(x)			((x) << PAGE_SHIFT)
+
+#define i386_btop(x)		((x) >> PAGE_SHIFT)
+#define i386_ptob(x)		((x) << PAGE_SHIFT)
+
+#define	pgtok(x)		((x) * (PAGE_SIZE / 1024))
+
+#endif /* !_MACHINE_PARAM_H_ */
+#endif /* !_NO_NAMESPACE_POLLUTION */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/pcb.h b/freebsd-5.3-xen-sparse/i386-xen/include/pcb.h
new file mode 100644
index 0000000000..ff68761540
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/pcb.h
@@ -0,0 +1,96 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)pcb.h	5.10 (Berkeley) 5/12/91
+ * $FreeBSD: src/sys/i386/include/pcb.h,v 1.50 2003/09/30 08:11:36 jeff Exp $
+ */
+
+#ifndef _I386_PCB_H_
+#define _I386_PCB_H_
+
+/*
+ * Intel 386 process control block
+ */
+#include <machine/npx.h>
+
+struct pcb {
+	int	pcb_cr3;
+	int	pcb_edi;
+	int	pcb_esi;
+	int	pcb_ebp;
+	int	pcb_esp;
+	int	pcb_eax;
+  	int	pcb_ebx;
+	int	pcb_ecx;
+	int	pcb_edx;
+	int	pcb_eip;
+
+	int     pcb_dr0;
+	int     pcb_dr1;
+	int     pcb_dr2;
+	int     pcb_dr3;
+	int     pcb_dr6;
+	int     pcb_dr7;
+
+	union	savefpu	pcb_save;
+	u_int	pcb_flags;
+#define	FP_SOFTFP	0x01	/* process using software fltng pnt emulator */
+#define	PCB_DBREGS	0x02	/* process using debug registers */
+#define	PCB_NPXTRAP	0x04	/* npx trap pending */
+#define	PCB_NPXINITDONE	0x08	/* fpu state is initialized */
+#define	PCB_VM86CALL	0x10	/* in vm86 call */
+
+	caddr_t	pcb_onfault;	/* copyin/out fault recovery */
+	int	pcb_cs;
+	int	pcb_ds;
+	int	pcb_ss;
+	int	pcb_es;
+  	int	pcb_gs;
+	int	pcb_fs;
+	struct	pcb_ext	*pcb_ext;	/* optional pcb extension */
+	int	pcb_psl;	/* process status long */
+	void	(*pcb_switchout)(void);	/* Special switchout function. */
+	u_long	__pcb_spare[2];	/* adjust to avoid core dump size changes */
+};
+
+#ifdef _KERNEL
+struct trapframe;
+
+void makectx(struct trapframe *, struct pcb *);
+
+void	savectx(struct pcb *);
+#endif
+
+#endif /* _I386_PCB_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h b/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h
new file mode 100644
index 0000000000..80a675cd4a
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h
@@ -0,0 +1,173 @@
+/*-
+ * Copyright (c) Peter Wemm <peter@netplex.com.au>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/include/pcpu.h,v 1.41 2003/11/20 23:23:22 peter Exp $
+ */
+
+#ifndef _MACHINE_PCPU_H_
+#define _MACHINE_PCPU_H_
+
+#ifdef _KERNEL
+
+#include <machine/segments.h>
+#include <machine/tss.h>
+
+/*
+ * The SMP parts are setup in pmap.c and locore.s for the BSP, and
+ * mp_machdep.c sets up the data for the AP's to "see" when they awake.
+ * The reason for doing it via a struct is so that an array of pointers
+ * to each CPU's data can be set up for things like "check curproc on all
+ * other processors"
+ */
+#define	PCPU_MD_FIELDS							\
+	struct	pcpu *pc_prvspace;		/* Self-reference */	\
+	struct	pmap *pc_curpmap;					\
+	struct	i386tss pc_common_tss;					\
+	struct	segment_descriptor pc_common_tssd;			\
+	struct	segment_descriptor *pc_tss_gdt;				\
+	int	pc_currentldt;						\
+	u_int	pc_acpi_id;						\
+	u_int	pc_apic_id;                                             \
+        u_int   pc_faultaddr;                                           \
+        u_int   pc_trap_nesting;                                        \
+        u_int   pc_pdir                                        
+
+#if defined(lint)
+ 
+extern struct pcpu *pcpup;
+ 
+#define PCPU_GET(member)        (pcpup->pc_ ## member)
+#define PCPU_PTR(member)        (&pcpup->pc_ ## member)
+#define PCPU_SET(member,value)  (pcpup->pc_ ## member = (value))
+ 
+#elif defined(__GNUC__)
+
+/*
+ * Evaluates to the byte offset of the per-cpu variable name.
+ */
+#define	__pcpu_offset(name)						\
+	__offsetof(struct pcpu, name)
+
+/*
+ * Evaluates to the type of the per-cpu variable name.
+ */
+#define	__pcpu_type(name)						\
+	__typeof(((struct pcpu *)0)->name)
+
+/*
+ * Evaluates to the address of the per-cpu variable name.
+ */
+#define	__PCPU_PTR(name) __extension__ ({				\
+	__pcpu_type(name) *__p;						\
+									\
+	__asm __volatile("movl %%fs:%1,%0; addl %2,%0"			\
+	    : "=r" (__p)						\
+	    : "m" (*(struct pcpu *)(__pcpu_offset(pc_prvspace))),	\
+	      "i" (__pcpu_offset(name)));				\
+									\
+	__p;								\
+})
+
+/*
+ * Evaluates to the value of the per-cpu variable name.
+ */
+#define	__PCPU_GET(name) __extension__ ({				\
+	__pcpu_type(name) __result;					\
+									\
+	if (sizeof(__result) == 1) {					\
+		u_char __b;						\
+		__asm __volatile("movb %%fs:%1,%0"			\
+		    : "=r" (__b)					\
+		    : "m" (*(u_char *)(__pcpu_offset(name))));		\
+		__result = *(__pcpu_type(name) *)(void *)&__b;		\
+	} else if (sizeof(__result) == 2) {				\
+		u_short __w;						\
+		__asm __volatile("movw %%fs:%1,%0"			\
+		    : "=r" (__w)					\
+		    : "m" (*(u_short *)(__pcpu_offset(name))));		\
+		__result = *(__pcpu_type(name) *)(void *)&__w;		\
+	} else if (sizeof(__result) == 4) {				\
+		u_int __i;						\
+		__asm __volatile("movl %%fs:%1,%0"			\
+		    : "=r" (__i)					\
+		    : "m" (*(u_int *)(__pcpu_offset(name))));		\
+		__result = *(__pcpu_type(name) *)(void *)&__i;		\
+	} else {							\
+		__result = *__PCPU_PTR(name);				\
+	}								\
+									\
+	__result;							\
+})
+
+/*
+ * Sets the value of the per-cpu variable name to value val.
+ */
+#define	__PCPU_SET(name, val) {						\
+	__pcpu_type(name) __val = (val);				\
+									\
+	if (sizeof(__val) == 1) {					\
+		u_char __b;						\
+		__b = *(u_char *)&__val;				\
+		__asm __volatile("movb %1,%%fs:%0"			\
+		    : "=m" (*(u_char *)(__pcpu_offset(name)))		\
+		    : "r" (__b));					\
+	} else if (sizeof(__val) == 2) {				\
+		u_short __w;						\
+		__w = *(u_short *)&__val;				\
+		__asm __volatile("movw %1,%%fs:%0"			\
+		    : "=m" (*(u_short *)(__pcpu_offset(name)))		\
+		    : "r" (__w));					\
+	} else if (sizeof(__val) == 4) {				\
+		u_int __i;						\
+		__i = *(u_int *)&__val;					\
+		__asm __volatile("movl %1,%%fs:%0"			\
+		    : "=m" (*(u_int *)(__pcpu_offset(name)))		\
+		    : "r" (__i));					\
+	} else {							\
+		*__PCPU_PTR(name) = __val;				\
+	}								\
+}
+
+#define	PCPU_GET(member)	__PCPU_GET(pc_ ## member)
+#define	PCPU_PTR(member)	__PCPU_PTR(pc_ ## member)
+#define	PCPU_SET(member, val)	__PCPU_SET(pc_ ## member, val)
+
+static __inline struct thread *
+__curthread(void)
+{
+	struct thread *td;
+
+	__asm __volatile("movl %%fs:0,%0" : "=r" (td));
+	return (td);
+}
+#define	curthread (__curthread())
+
+#else
+#error gcc or lint is required to use this file
+#endif
+
+#endif	/* _KERNEL */
+
+#endif	/* ! _MACHINE_PCPU_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h b/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h
new file mode 100644
index 0000000000..9e838b9bd4
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h
@@ -0,0 +1,355 @@
+/*
+ * Copyright (c) 1991 Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department and William Jolitz of UUNET Technologies Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Derived from hp300 version by Mike Hibler, this version by William
+ * Jolitz uses a recursive map [a pde points to the page directory] to
+ * map the page tables using the pagetables themselves. This is done to
+ * reduce the impact on kernel virtual memory for lots of sparse address
+ * space, and to reduce the cost of memory to each process.
+ *
+ *	from: hp300: @(#)pmap.h	7.2 (Berkeley) 12/16/90
+ *	from: @(#)pmap.h	7.4 (Berkeley) 5/12/91
+ * $FreeBSD: src/sys/i386/include/pmap.h,v 1.103 2003/11/08 03:01:26 alc Exp $
+ */
+
+#ifndef _MACHINE_PMAP_H_
+#define	_MACHINE_PMAP_H_
+
+/*
+ * Page-directory and page-table entires follow this format, with a few
+ * of the fields not present here and there, depending on a lot of things.
+ */
+				/* ---- Intel Nomenclature ---- */
+#define	PG_V		0x001	/* P	Valid			*/
+#define PG_RW		0x002	/* R/W	Read/Write		*/
+#define PG_U		0x004	/* U/S  User/Supervisor		*/
+#define	PG_NC_PWT	0x008	/* PWT	Write through		*/
+#define	PG_NC_PCD	0x010	/* PCD	Cache disable		*/
+#define PG_A		0x020	/* A	Accessed		*/
+#define	PG_M		0x040	/* D	Dirty			*/
+#define	PG_PS		0x080	/* PS	Page size (0=4k,1=4M)	*/
+#define	PG_G		0x100	/* G	Global			*/
+#define	PG_AVAIL1	0x200	/*    /	Available for system	*/
+#define	PG_AVAIL2	0x400	/*   <	programmers use		*/
+#define	PG_AVAIL3	0x800	/*    \				*/
+
+
+/* Our various interpretations of the above */
+#define PG_W		PG_AVAIL1	/* "Wired" pseudoflag */
+#define	PG_MANAGED	PG_AVAIL2
+#define	PG_FRAME	(~((vm_paddr_t)PAGE_MASK))
+#define	PG_PROT		(PG_RW|PG_U)	/* all protection bits . */
+#define PG_N		(PG_NC_PWT|PG_NC_PCD)	/* Non-cacheable */
+
+#define PG_KERNEL         (PG_V | PG_RW | PG_M | PG_A)
+#define PG_KERNEL_NC      (PG_KERNEL | PG_N)
+#define PG_KERNEL_RO      (PG_VALID | PG_M | PG_A)
+
+/*
+ * Page Protection Exception bits
+ */
+
+#define PGEX_P		0x01	/* Protection violation vs. not present */
+#define PGEX_W		0x02	/* during a Write cycle */
+#define PGEX_U		0x04	/* access from User mode (UPL) */
+#define XEN_PAGES       16
+
+/*
+ * Size of Kernel address space.  This is the number of page table pages
+ * (4MB each) to use for the kernel.  256 pages == 1 Gigabyte.
+ * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc).
+ */
+
+#ifndef KVA_PAGES
+#ifdef PAE
+#define KVA_PAGES	512 
+#else
+#define KVA_PAGES	256
+#endif
+#endif
+
+/*
+ * Pte related macros
+ */
+#define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<<PDRSHIFT)|((pti)<<PAGE_SHIFT)))
+
+#ifndef NKPT
+#ifdef PAE
+#define	NKPT		120	/* actual number of kernel page tables */
+#else
+#define	NKPT		30	/* actual number of kernel page tables */
+#endif
+#endif
+
+/* 
+ * XEN NOTE: Xen consumes 64MB of memory, so subtract that from the number 
+ * of page available to the kernel virutal address space.
+ */
+#ifndef NKPDE
+#ifdef SMP
+#define NKPDE	(KVA_PAGES - 1 - XEN_PAGES) /* number of page tables/pde's */
+#else
+#define NKPDE	(KVA_PAGES - XEN_PAGES)	/* number of page tables/pde's */
+#endif
+#endif
+
+/*
+ * The *PTDI values control the layout of virtual memory
+ *
+ * XXX This works for now, but I am not real happy with it, I'll fix it
+ * right after I fix locore.s and the magic 28K hole
+ *
+ * SMP_PRIVPAGES: The per-cpu address space is 0xff80000 -> 0xffbfffff
+ */
+
+/*
+ * XEN NOTE: We need to shift down the start of KVA by 64MB to account for
+ * Xen using the upper 64MB.  
+ *
+ * The layout of VA for XenoBSD is:
+ * | 	USER 	|  PTDPTDI   |    KVA     |          XEN          |
+ * | 0x00000000 | 0xbfc00000 | 0xc0000000 | 0xfc000000 - 0xffffffff|
+ *
+ * Normally it is just:
+ * | 	USER 	|  PTDPTDI   |          KVA            |
+ * | 0x00000000 | 0xbfc00000 | 0xc0000000 - 0xffffffff |
+ */
+
+#ifdef SMP
+#define MPPTDI	(NPDEPTD-1)	  	  /* per cpu ptd entry */
+#define	KPTDI 	(MPPTDI-NKPDE-XEN_PAGES	  /* start of kernel virtual pde's */
+#else
+#define	KPTDI	(NPDEPTD-NKPDE-XEN_PAGES) /* start of kernel virtual pde's */
+#endif	/* SMP */
+
+#define	PTDPTDI	(KPTDI-NPGPTD)	  	  /* ptd entry that points to ptd! */
+
+/*
+ * XXX doesn't really belong here I guess...
+ */
+#define ISA_HOLE_START    0xa0000
+#define ISA_HOLE_LENGTH (0x100000-ISA_HOLE_START)
+
+#ifndef LOCORE
+
+#include <sys/queue.h>
+#include <sys/_lock.h>
+#include <sys/_mutex.h>
+
+
+typedef uint32_t pd_entry_t;
+typedef uint32_t pt_entry_t;
+
+#define	PTESHIFT	(2)
+#define	PDESHIFT	(2)
+
+
+/*
+ * Address of current and alternate address space page table maps
+ * and directories.
+ */
+#ifdef _KERNEL
+extern pt_entry_t PTmap[];
+extern pd_entry_t PTD[];
+extern pd_entry_t PTDpde[];
+
+extern pd_entry_t *IdlePTD;	/* physical address of "Idle" state directory */
+
+#include <machine/xen-os.h>
+#include <machine/xenvar.h>
+#include <machine/xenpmap.h>
+
+
+/*
+ * virtual address to page table entry and
+ * to physical address. Likewise for alternate address space.
+ * Note: these work recursively, thus vtopte of a pte will give
+ * the corresponding pde that in turn maps it.
+ */
+#define	vtopte(va)	(PTmap + i386_btop(va))
+
+/*
+ * Given a virtual address, return the machine address of its PTE 
+ *
+ */
+#define vtoptema(va) pmap_kextract_ma((vm_offset_t) vtopte(va))
+
+/*
+ *	Routine:	pmap_kextract/pmap_kextract_ma
+ *	Function:
+ *		Extract the physical/machine page address associated
+ *		kernel virtual address.
+ */
+
+static __inline vm_paddr_t
+pmap_kextract_ma(vm_offset_t va)
+{
+	vm_paddr_t ma;
+	if ((ma = PTD[va >> PDRSHIFT]) & PG_PS) {
+		ma = (ma & ~(NBPDR - 1)) | (va & (NBPDR - 1));
+	} else {
+		ma = (*vtopte(va) & PG_FRAME) | (va & PAGE_MASK);
+	}
+	return ma;
+}
+
+static __inline vm_paddr_t
+pmap_kextract(vm_offset_t va)
+{
+	return xpmap_mtop(pmap_kextract_ma(va));
+}
+
+#define	vtophys(va)	pmap_kextract(((vm_offset_t) (va)))
+#define vtomach(va)     pmap_kextract_ma(((vm_offset_t) (va)))
+
+static __inline pt_entry_t
+pte_load_clear(pt_entry_t *ptep)
+{
+	pt_entry_t r;
+
+	r = PT_GET(ptep);
+	PT_CLEAR_VA(ptep, TRUE);
+	return (r);
+}
+static __inline pt_entry_t
+pte_load_store(pt_entry_t *ptep, pt_entry_t v)
+{
+	pt_entry_t r;
+	r = PT_GET(ptep);
+	PT_SET_VA_MA(ptep, v, TRUE);
+	return (r);
+}
+
+#define	pte_store(ptep, pte)	PT_SET_VA_MA(ptep, pte, TRUE);
+#define pte_clear(pte)          PT_CLEAR_VA(pte, TRUE);
+
+
+#endif /* _KERNEL */
+
+/*
+ * Pmap stuff
+ */
+struct	pv_entry;
+
+struct md_page {
+	int pv_list_count;
+	TAILQ_HEAD(,pv_entry)	pv_list;
+};
+
+struct pmap {
+	struct mtx               pm_mtx;
+	pd_entry_t		*pm_pdir;	/* KVA of page directory */
+	TAILQ_HEAD(,pv_entry)	pm_pvlist;	/* list of mappings in pmap */
+	u_int			pm_active;	/* active on cpus */
+	struct pmap_statistics	pm_stats;	/* pmap statistics */
+	LIST_ENTRY(pmap) 	pm_list;	/* List of all pmaps */
+};
+
+
+typedef struct pmap	*pmap_t;
+
+#ifdef _KERNEL
+extern struct pmap	kernel_pmap_store;
+#define kernel_pmap	(&kernel_pmap_store)
+
+#define PMAP_LOCK(pmap)mtx_lock(&(pmap)->pm_mtx)
+#define PMAP_LOCK_ASSERT(pmap, type) \
+mtx_assert(&(pmap)->pm_mtx, (type))
+#define PMAP_LOCK_DESTROY(pmap)mtx_destroy(&(pmap)->pm_mtx)
+#define PMAP_LOCK_INIT(pmap)mtx_init(&(pmap)->pm_mtx, "pmap", \
+    NULL, MTX_DEF | MTX_DUPOK)
+#define PMAP_LOCKED(pmap)mtx_owned(&(pmap)->pm_mtx)
+#define PMAP_MTX(pmap)(&(pmap)->pm_mtx)
+#define PMAP_TRYLOCK(pmap)mtx_trylock(&(pmap)->pm_mtx)
+#define PMAP_UNLOCK(pmap)mtx_unlock(&(pmap)->pm_mtx)
+
+#endif
+
+/*
+ * For each vm_page_t, there is a list of all currently valid virtual
+ * mappings of that page.  An entry is a pv_entry_t, the list is pv_table.
+ */
+typedef struct pv_entry {
+	pmap_t		pv_pmap;	/* pmap where mapping lies */
+	vm_offset_t	pv_va;		/* virtual address for mapping */
+	TAILQ_ENTRY(pv_entry)	pv_list;
+	TAILQ_ENTRY(pv_entry)	pv_plist;
+} *pv_entry_t;
+
+#ifdef	_KERNEL
+
+#define NPPROVMTRR		8
+#define PPRO_VMTRRphysBase0	0x200
+#define PPRO_VMTRRphysMask0	0x201
+struct ppro_vmtrr {
+	u_int64_t base, mask;
+};
+extern struct ppro_vmtrr PPro_vmtrr[NPPROVMTRR];
+
+extern caddr_t	CADDR1;
+extern pt_entry_t *CMAP1;
+extern vm_paddr_t avail_end;
+extern vm_paddr_t phys_avail[];
+extern int pseflag;
+extern int pgeflag;
+extern char *ptvmmap;		/* poor name! */
+extern vm_offset_t virtual_avail;
+extern vm_offset_t virtual_end;
+
+#define pmap_page_is_mapped(m)(!TAILQ_EMPTY(&(m)->md.pv_list))
+
+void	pmap_bootstrap(vm_paddr_t, vm_paddr_t);
+void	pmap_kenter(vm_offset_t va, vm_paddr_t pa);
+void	pmap_kenter_ma(vm_offset_t va, vm_paddr_t pa);
+void   *pmap_kenter_temporary(vm_paddr_t pa, int i);
+void	pmap_kremove(vm_offset_t);
+void	*pmap_mapdev(vm_paddr_t, vm_size_t);
+void	pmap_unmapdev(vm_offset_t, vm_size_t);
+pt_entry_t *pmap_pte(pmap_t, vm_offset_t) __pure2;
+void	pmap_set_pg(void);
+void	pmap_invalidate_page(pmap_t, vm_offset_t);
+void	pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
+void	pmap_invalidate_all(pmap_t);
+
+void pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len);
+void pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len);
+
+
+#endif /* _KERNEL */
+
+#endif /* !LOCORE */
+
+#endif /* !_MACHINE_PMAP_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/segments.h b/freebsd-5.3-xen-sparse/i386-xen/include/segments.h
new file mode 100644
index 0000000000..85cc20c1f5
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/segments.h
@@ -0,0 +1,260 @@
+/*-
+ * Copyright (c) 1989, 1990 William F. Jolitz
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)segments.h	7.1 (Berkeley) 5/9/91
+ * $FreeBSD: src/sys/i386/include/segments.h,v 1.36 2003/11/03 21:12:04 jhb Exp $
+ */
+
+#ifndef _MACHINE_SEGMENTS_H_
+#define	_MACHINE_SEGMENTS_H_
+
+/*
+ * 386 Segmentation Data Structures and definitions
+ *	William F. Jolitz (william@ernie.berkeley.edu) 6/20/1989
+ */
+
+/*
+ * Selectors
+ */
+
+#define	ISPL(s)	((s)&3)		/* what is the priority level of a selector */
+#define	SEL_KPL	1		/* kernel priority level */
+#define	SEL_UPL	3		/* user priority level */
+#define	ISLDT(s)	((s)&SEL_LDT)	/* is it local or global */
+#define	SEL_LDT	4		/* local descriptor table */
+#define	IDXSEL(s)	(((s)>>3) & 0x1fff)		/* index of selector */
+#define	LSEL(s,r)	(((s)<<3) | SEL_LDT | r)	/* a local selector */
+#define	GSEL(s,r)	(((s)<<3) | r)			/* a global selector */
+
+/*
+ * Memory and System segment descriptors
+ */
+struct	segment_descriptor	{
+	unsigned sd_lolimit:16 ;	/* segment extent (lsb) */
+	unsigned sd_lobase:24 __packed;	/* segment base address (lsb) */
+	unsigned sd_type:5 ;		/* segment type */
+	unsigned sd_dpl:2 ;		/* segment descriptor priority level */
+	unsigned sd_p:1 ;		/* segment descriptor present */
+	unsigned sd_hilimit:4 ;		/* segment extent (msb) */
+	unsigned sd_xx:2 ;		/* unused */
+	unsigned sd_def32:1 ;		/* default 32 vs 16 bit size */
+	unsigned sd_gran:1 ;		/* limit granularity (byte/page units)*/
+	unsigned sd_hibase:8 ;		/* segment base address  (msb) */
+} ;
+
+/*
+ * Gate descriptors (e.g. indirect descriptors)
+ */
+struct	gate_descriptor	{
+	unsigned gd_looffset:16 ;	/* gate offset (lsb) */
+	unsigned gd_selector:16 ;	/* gate segment selector */
+	unsigned gd_stkcpy:5 ;		/* number of stack wds to cpy */
+	unsigned gd_xx:3 ;		/* unused */
+	unsigned gd_type:5 ;		/* segment type */
+	unsigned gd_dpl:2 ;		/* segment descriptor priority level */
+	unsigned gd_p:1 ;		/* segment descriptor present */
+	unsigned gd_hioffset:16 ;	/* gate offset (msb) */
+} ;
+
+/*
+ * Generic descriptor
+ */
+union	descriptor	{
+	struct	segment_descriptor sd;
+	struct	gate_descriptor gd;
+};
+
+	/* system segments and gate types */
+#define	SDT_SYSNULL	 0	/* system null */
+#define	SDT_SYS286TSS	 1	/* system 286 TSS available */
+#define	SDT_SYSLDT	 2	/* system local descriptor table */
+#define	SDT_SYS286BSY	 3	/* system 286 TSS busy */
+#define	SDT_SYS286CGT	 4	/* system 286 call gate */
+#define	SDT_SYSTASKGT	 5	/* system task gate */
+#define	SDT_SYS286IGT	 6	/* system 286 interrupt gate */
+#define	SDT_SYS286TGT	 7	/* system 286 trap gate */
+#define	SDT_SYSNULL2	 8	/* system null again */
+#define	SDT_SYS386TSS	 9	/* system 386 TSS available */
+#define	SDT_SYSNULL3	10	/* system null again */
+#define	SDT_SYS386BSY	11	/* system 386 TSS busy */
+#define	SDT_SYS386CGT	12	/* system 386 call gate */
+#define	SDT_SYSNULL4	13	/* system null again */
+#define	SDT_SYS386IGT	14	/* system 386 interrupt gate */
+#define	SDT_SYS386TGT	15	/* system 386 trap gate */
+
+	/* memory segment types */
+#define	SDT_MEMRO	16	/* memory read only */
+#define	SDT_MEMROA	17	/* memory read only accessed */
+#define	SDT_MEMRW	18	/* memory read write */
+#define	SDT_MEMRWA	19	/* memory read write accessed */
+#define	SDT_MEMROD	20	/* memory read only expand dwn limit */
+#define	SDT_MEMRODA	21	/* memory read only expand dwn limit accessed */
+#define	SDT_MEMRWD	22	/* memory read write expand dwn limit */
+#define	SDT_MEMRWDA	23	/* memory read write expand dwn limit accessed */
+#define	SDT_MEME	24	/* memory execute only */
+#define	SDT_MEMEA	25	/* memory execute only accessed */
+#define	SDT_MEMER	26	/* memory execute read */
+#define	SDT_MEMERA	27	/* memory execute read accessed */
+#define	SDT_MEMEC	28	/* memory execute only conforming */
+#define	SDT_MEMEAC	29	/* memory execute only accessed conforming */
+#define	SDT_MEMERC	30	/* memory execute read conforming */
+#define	SDT_MEMERAC	31	/* memory execute read accessed conforming */
+
+/*
+ * Software definitions are in this convenient format,
+ * which are translated into inconvenient segment descriptors
+ * when needed to be used by the 386 hardware
+ */
+
+struct	soft_segment_descriptor	{
+	unsigned ssd_base ;		/* segment base address  */
+	unsigned ssd_limit ;		/* segment extent */
+	unsigned ssd_type:5 ;		/* segment type */
+	unsigned ssd_dpl:2 ;		/* segment descriptor priority level */
+	unsigned ssd_p:1 ;		/* segment descriptor present */
+	unsigned ssd_xx:4 ;		/* unused */
+	unsigned ssd_xx1:2 ;		/* unused */
+	unsigned ssd_def32:1 ;		/* default 32 vs 16 bit size */
+	unsigned ssd_gran:1 ;		/* limit granularity (byte/page units)*/
+};
+
+/*
+ * region descriptors, used to load gdt/idt tables before segments yet exist.
+ */
+struct region_descriptor {
+	unsigned rd_limit:16;		/* segment extent */
+	unsigned rd_base:32 __packed;	/* base address  */
+};
+
+/*
+ * Segment Protection Exception code bits
+ */
+
+#define	SEGEX_EXT	0x01	/* recursive or externally induced */
+#define	SEGEX_IDT	0x02	/* interrupt descriptor table */
+#define	SEGEX_TI	0x04	/* local descriptor table */
+				/* other bits are affected descriptor index */
+#define SEGEX_IDX(s)	(((s)>>3)&0x1fff)
+
+/*
+ * Size of IDT table
+ */
+
+#define	NIDT	256		/* 32 reserved, 0x80 syscall, most are h/w */
+#define	NRSVIDT	32		/* reserved entries for cpu exceptions */
+
+/*
+ * Entries in the Interrupt Descriptor Table (IDT)
+ */
+#define	IDT_DE		0	/* #DE: Divide Error */
+#define	IDT_DB		1	/* #DB: Debug */
+#define	IDT_NMI		2	/* Nonmaskable External Interrupt */
+#define	IDT_BP		3	/* #BP: Breakpoint */
+#define	IDT_OF		4	/* #OF: Overflow */
+#define	IDT_BR		5	/* #BR: Bound Range Exceeded */
+#define	IDT_UD		6	/* #UD: Undefined/Invalid Opcode */
+#define	IDT_NM		7	/* #NM: No Math Coprocessor */
+#define	IDT_DF		8	/* #DF: Double Fault */
+#define	IDT_FPUGP	9	/* Coprocessor Segment Overrun */
+#define	IDT_TS		10	/* #TS: Invalid TSS */
+#define	IDT_NP		11	/* #NP: Segment Not Present */
+#define	IDT_SS		12	/* #SS: Stack Segment Fault */
+#define	IDT_GP		13	/* #GP: General Protection Fault */
+#define	IDT_PF		14	/* #PF: Page Fault */
+#define	IDT_MF		16	/* #MF: FPU Floating-Point Error */
+#define	IDT_AC		17	/* #AC: Alignment Check */
+#define	IDT_MC		18	/* #MC: Machine Check */
+#define	IDT_XF		19	/* #XF: SIMD Floating-Point Exception */
+#define	IDT_IO_INTS	NRSVIDT	/* Base of IDT entries for I/O interrupts. */
+#define	IDT_SYSCALL	0x80	/* System Call Interrupt Vector */
+
+/*
+ * Entries in the Global Descriptor Table (GDT)
+ */
+#define	GNULL_SEL	0	/* Null Descriptor */
+#if 0
+#define	GCODE_SEL	1	/* Kernel Code Descriptor */
+#define	GDATA_SEL	2	/* Kernel Data Descriptor */
+#else
+#define	GCODE_SEL	(__KERNEL_CS >> 3)	/* Kernel Code Descriptor */
+#define	GDATA_SEL	(__KERNEL_DS >> 3)	/* Kernel Data Descriptor */
+#endif
+#define	GPRIV_SEL	3	/* SMP Per-Processor Private Data */
+#define	GPROC0_SEL	4	/* Task state process slot zero and up */
+#define	GLDT_SEL	5	/* LDT - eventually one per process */
+#define	GUSERLDT_SEL	6	/* User LDT */
+#define	GTGATE_SEL	7	/* Process task switch gate */
+#define	GBIOSLOWMEM_SEL	8	/* BIOS low memory access (must be entry 8) */
+#define	GPANIC_SEL	9	/* Task state to consider panic from */
+#define GBIOSCODE32_SEL	10	/* BIOS interface (32bit Code) */
+#define GBIOSCODE16_SEL	11	/* BIOS interface (16bit Code) */
+#define GBIOSDATA_SEL	12	/* BIOS interface (Data) */
+#define GBIOSUTIL_SEL	13	/* BIOS interface (Utility) */
+#define GBIOSARGS_SEL	14	/* BIOS interface (Arguments) */
+
+#define NGDT 		4
+
+/*
+ * Entries in the Local Descriptor Table (LDT)
+ */
+#define	LSYS5CALLS_SEL	0	/* forced by intel BCS */
+#define	LSYS5SIGR_SEL	1
+#define	L43BSDCALLS_SEL	2	/* notyet */
+#define	LUCODE_SEL	3
+#define LSOL26CALLS_SEL	4	/* Solaris >= 2.6 system call gate */
+#define	LUDATA_SEL	5
+/* separate stack, es,fs,gs sels ? */
+/* #define	LPOSIXCALLS_SEL	5*/	/* notyet */
+#define LBSDICALLS_SEL	16	/* BSDI system call gate */
+#define NLDT		(LBSDICALLS_SEL + 1)
+
+#ifdef _KERNEL
+extern int	_default_ldt;
+extern union descriptor *gdt;
+extern struct soft_segment_descriptor gdt_segs[];
+extern struct gate_descriptor *idt;
+extern union descriptor *ldt;
+extern struct region_descriptor r_gdt, r_idt;
+
+void	lgdt(struct region_descriptor *rdp);
+void	lgdt_finish(void);
+void	sdtossd(struct segment_descriptor *sdp,
+	    struct soft_segment_descriptor *ssdp);
+void	ssdtosd(struct soft_segment_descriptor *ssdp,
+	    struct segment_descriptor *sdp);
+#endif /* _KERNEL */
+
+#endif /* !_MACHINE_SEGMENTS_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/synch_bitops.h b/freebsd-5.3-xen-sparse/i386-xen/include/synch_bitops.h
new file mode 100644
index 0000000000..31ec3d3468
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/synch_bitops.h
@@ -0,0 +1,82 @@
+#ifndef __XEN_SYNCH_BITOPS_H__
+#define __XEN_SYNCH_BITOPS_H__
+
+/*
+ * Copyright 1992, Linus Torvalds.
+ * Heavily modified to provide guaranteed strong synchronisation
+ * when communicating with Xen or other guest OSes running on other CPUs.
+ */
+
+
+#define ADDR (*(volatile long *) addr)
+
+static __inline__ void synch_set_bit(int nr, volatile void * addr)
+{
+    __asm__ __volatile__ ( 
+        "lock btsl %1,%0"
+        : "=m" (ADDR) : "Ir" (nr) : "memory" );
+}
+
+static __inline__ void synch_clear_bit(int nr, volatile void * addr)
+{
+    __asm__ __volatile__ (
+        "lock btrl %1,%0"
+        : "=m" (ADDR) : "Ir" (nr) : "memory" );
+}
+
+static __inline__ void synch_change_bit(int nr, volatile void * addr)
+{
+    __asm__ __volatile__ (
+        "lock btcl %1,%0"
+        : "=m" (ADDR) : "Ir" (nr) : "memory" );
+}
+
+static __inline__ int synch_test_and_set_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+    __asm__ __volatile__ (
+        "lock btsl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
+    return oldbit;
+}
+
+static __inline__ int synch_test_and_clear_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+    __asm__ __volatile__ (
+        "lock btrl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
+    return oldbit;
+}
+
+static __inline__ int synch_test_and_change_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+
+    __asm__ __volatile__ (
+        "lock btcl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
+    return oldbit;
+}
+
+static __inline__ int synch_const_test_bit(int nr, const volatile void * addr)
+{
+    return ((1UL << (nr & 31)) & 
+            (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
+}
+
+static __inline__ int synch_var_test_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+    __asm__ __volatile__ (
+        "btl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit) : "m" (ADDR), "Ir" (nr) );
+    return oldbit;
+}
+
+#define synch_test_bit(nr,addr) \
+(__builtin_constant_p(nr) ? \
+ synch_const_test_bit((nr),(addr)) : \
+ synch_var_test_bit((nr),(addr)))
+
+#endif /* __XEN_SYNCH_BITOPS_H__ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/trap.h b/freebsd-5.3-xen-sparse/i386-xen/include/trap.h
new file mode 100644
index 0000000000..c61beb90aa
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/trap.h
@@ -0,0 +1,111 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)trap.h	5.4 (Berkeley) 5/9/91
+ * $FreeBSD: src/sys/i386/include/trap.h,v 1.13 2001/07/12 06:32:51 peter Exp $
+ */
+
+#ifndef _MACHINE_TRAP_H_
+#define	_MACHINE_TRAP_H_
+
+/*
+ * Trap type values
+ * also known in trap.c for name strings
+ */
+
+#define	T_PRIVINFLT	1	/* privileged instruction */
+#define	T_BPTFLT	3	/* breakpoint instruction */
+#define	T_ARITHTRAP	6	/* arithmetic trap */
+#define	T_PROTFLT	9	/* protection fault */
+#define	T_TRCTRAP	10	/* debug exception (sic) */
+#define	T_PAGEFLT	12	/* page fault */
+#define	T_ALIGNFLT	14	/* alignment fault */
+
+#define T_NESTED        16
+#define T_HYPCALLBACK   17      /* hypervisor callback */
+
+
+#define	T_DIVIDE	18	/* integer divide fault */
+#define	T_NMI		19	/* non-maskable trap */
+#define	T_OFLOW		20	/* overflow trap */
+#define	T_BOUND		21	/* bound instruction fault */
+#define	T_DNA		22	/* device not available fault */
+#define	T_DOUBLEFLT	23	/* double fault */
+#define	T_FPOPFLT	24	/* fp coprocessor operand fetch fault */
+#define	T_TSSFLT	25	/* invalid tss fault */
+#define	T_SEGNPFLT	26	/* segment not present fault */
+#define	T_STKFLT	27	/* stack fault */
+#define	T_MCHK		28	/* machine check trap */
+#define	T_XMMFLT	29	/* SIMD floating-point exception */
+#define	T_RESERVED	30	/* reserved (unknown) */
+
+/* XXX most of the following codes aren't used, but could be. */
+
+/* definitions for <sys/signal.h> */
+#define	    ILL_RESAD_FAULT	T_RESADFLT
+#define	    ILL_PRIVIN_FAULT	T_PRIVINFLT
+#define	    ILL_RESOP_FAULT	T_RESOPFLT
+#define	    ILL_ALIGN_FAULT	T_ALIGNFLT
+#define	    ILL_FPOP_FAULT	T_FPOPFLT	/* coprocessor operand fault */
+
+/* portable macros for SIGFPE/ARITHTRAP */
+#define FPE_INTOVF	1	/* integer overflow */
+#define FPE_INTDIV	2	/* integer divide by zero */
+#define FPE_FLTDIV	3	/* floating point divide by zero */
+#define FPE_FLTOVF	4	/* floating point overflow */
+#define FPE_FLTUND	5	/* floating point underflow */
+#define FPE_FLTRES	6	/* floating point inexact result */
+#define FPE_FLTINV	7	/* invalid floating point operation */
+#define FPE_FLTSUB	8	/* subscript out of range */
+
+/* old FreeBSD macros, deprecated */
+#define	FPE_INTOVF_TRAP	0x1	/* integer overflow */
+#define	FPE_INTDIV_TRAP	0x2	/* integer divide by zero */
+#define	FPE_FLTDIV_TRAP	0x3	/* floating/decimal divide by zero */
+#define	FPE_FLTOVF_TRAP	0x4	/* floating overflow */
+#define	FPE_FLTUND_TRAP	0x5	/* floating underflow */
+#define	FPE_FPU_NP_TRAP	0x6	/* floating point unit not present  */
+#define	FPE_SUBRNG_TRAP	0x7	/* subrange out of bounds */
+
+/* codes for SIGBUS */
+#define	    BUS_PAGE_FAULT	T_PAGEFLT	/* page fault protection base */
+#define	    BUS_SEGNP_FAULT	T_SEGNPFLT	/* segment not present */
+#define	    BUS_STK_FAULT	T_STKFLT	/* stack segment */
+#define	    BUS_SEGM_FAULT	T_RESERVED	/* segment protection base */
+
+/* Trap's coming from user mode */
+#define	T_USER	0x100
+
+#endif /* !_MACHINE_TRAP_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/ucontext.h b/freebsd-5.3-xen-sparse/i386-xen/include/ucontext.h
new file mode 100644
index 0000000000..eda584b62e
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/ucontext.h
@@ -0,0 +1,105 @@
+/*-
+ * Copyright (c) 1999 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer 
+ *    in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/include/ucontext.h,v 1.10 2002/12/02 19:58:55 deischen Exp $
+ */
+
+#ifndef _MACHINE_UCONTEXT_H_
+#define	_MACHINE_UCONTEXT_H_
+
+typedef struct __mcontext {
+	/*
+	 * The first 20 fields must match the definition of
+	 * sigcontext. So that we can support sigcontext
+	 * and ucontext_t at the same time.
+	 */
+	int	mc_onstack;		/* XXX - sigcontext compat. */
+	int	mc_gs;			/* machine state (struct trapframe) */
+	int	mc_fs;
+	int	mc_es;
+	int	mc_ds;
+	int	mc_edi;
+	int	mc_esi;
+	int	mc_ebp;
+	int	mc_isp;
+	int	mc_ebx;
+	int	mc_edx;
+	int	mc_ecx;
+	int	mc_eax;
+	int	mc_trapno;
+        int     mc_cr2;
+	int	mc_err;
+	int	mc_eip;
+	int	mc_cs;
+	int	mc_eflags;
+	int	mc_esp;
+	int	mc_ss;
+
+	int	mc_len;			/* sizeof(mcontext_t) */
+#define	_MC_FPFMT_NODEV		0x10000	/* device not present or configured */
+#define	_MC_FPFMT_387		0x10001
+#define	_MC_FPFMT_XMM		0x10002
+	int	mc_fpformat;
+#define	_MC_FPOWNED_NONE	0x20000	/* FP state not used */
+#define	_MC_FPOWNED_FPU		0x20001	/* FP state came from FPU */
+#define	_MC_FPOWNED_PCB		0x20002	/* FP state came from PCB */
+	int	mc_ownedfp;
+	/*
+	 * See <machine/npx.h> for the internals of mc_fpstate[].
+	 */
+	int	mc_fpstate[128] __aligned(16);
+	int	mc_spare2[8];
+} mcontext_t;
+
+#if defined(_KERNEL) && defined(COMPAT_FREEBSD4)
+struct mcontext4 {
+	int	mc_onstack;		/* XXX - sigcontext compat. */
+	int	mc_gs;			/* machine state (struct trapframe) */
+	int	mc_fs;
+	int	mc_es;
+	int	mc_ds;
+	int	mc_edi;
+	int	mc_esi;
+	int	mc_ebp;
+	int	mc_isp;
+	int	mc_ebx;
+	int	mc_edx;
+	int	mc_ecx;
+	int	mc_eax;
+	int	mc_trapno;
+	int	mc_err;
+	int	mc_eip;
+	int	mc_cs;
+	int	mc_eflags;
+	int	mc_esp;			/* machine state */
+	int	mc_ss;
+	int	mc_fpregs[28];		/* env87 + fpacc87 + u_long */
+	int	__spare__[17];
+};
+#endif
+
+#endif /* !_MACHINE_UCONTEXT_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/vmparam.h b/freebsd-5.3-xen-sparse/i386-xen/include/vmparam.h
new file mode 100644
index 0000000000..7fa9af3c68
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/vmparam.h
@@ -0,0 +1,141 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ * Copyright (c) 1994 John S. Dyson
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)vmparam.h	5.9 (Berkeley) 5/12/91
+ * $FreeBSD: src/sys/i386/include/vmparam.h,v 1.37 2003/10/01 23:46:08 peter Exp $
+ */
+
+
+#ifndef _MACHINE_VMPARAM_H_
+#define _MACHINE_VMPARAM_H_ 1
+
+/*
+ * Machine dependent constants for 386.
+ */
+
+#define VM_PROT_READ_IS_EXEC	/* if you can read -- then you can exec */
+
+/*
+ * Virtual memory related constants, all in bytes
+ */
+#define	MAXTSIZ		(128UL*1024*1024)	/* max text size */
+#ifndef DFLDSIZ
+#define	DFLDSIZ		(128UL*1024*1024)	/* initial data size limit */
+#endif
+#ifndef MAXDSIZ
+#define	MAXDSIZ		(512UL*1024*1024)	/* max data size */
+#endif
+#ifndef	DFLSSIZ
+#define	DFLSSIZ		(8UL*1024*1024)		/* initial stack size limit */
+#endif
+#ifndef	MAXSSIZ
+#define	MAXSSIZ		(64UL*1024*1024)	/* max stack size */
+#endif
+#ifndef SGROWSIZ
+#define SGROWSIZ	(128UL*1024)		/* amount to grow stack */
+#endif
+
+#define USRTEXT		(1*PAGE_SIZE)		/* base of user text XXX bogus */
+
+/*
+ * The time for a process to be blocked before being very swappable.
+ * This is a number of seconds which the system takes as being a non-trivial
+ * amount of real time.  You probably shouldn't change this;
+ * it is used in subtle ways (fractions and multiples of it are, that is, like
+ * half of a ``long time'', almost a long time, etc.)
+ * It is related to human patience and other factors which don't really
+ * change over time.
+ */
+#define	MAXSLP 		20
+
+
+/*
+ * Kernel physical load address.
+ */
+#ifndef KERNLOAD
+#define	KERNLOAD		(1 << PDRSHIFT)
+#endif
+
+/*
+ * Virtual addresses of things.  Derived from the page directory and
+ * page table indexes from pmap.h for precision.
+ * Because of the page that is both a PD and PT, it looks a little
+ * messy at times, but hey, we'll do anything to save a page :-)
+ */
+
+#define VM_MAX_KERNEL_ADDRESS	VADDR(KPTDI+NKPDE-1, NPTEPG-1)
+#define VM_MIN_KERNEL_ADDRESS	VADDR(PTDPTDI, PTDPTDI)
+
+#define	KERNBASE		VADDR(KPTDI, 0)
+
+#define UPT_MAX_ADDRESS		VADDR(PTDPTDI, PTDPTDI)
+#define UPT_MIN_ADDRESS		VADDR(PTDPTDI, 0)
+
+#define VM_MAXUSER_ADDRESS	VADDR(PTDPTDI-1, 0)
+
+#define USRSTACK		VM_MAXUSER_ADDRESS
+
+#define VM_MAX_ADDRESS		VADDR(PTDPTDI, PTDPTDI)
+#define VM_MIN_ADDRESS		((vm_offset_t)0)
+
+/* virtual sizes (bytes) for various kernel submaps */
+#ifndef VM_KMEM_SIZE
+#define VM_KMEM_SIZE		(12 * 1024 * 1024)
+#endif
+
+/*
+ * How many physical pages per KVA page allocated.
+ * min(max(VM_KMEM_SIZE, Physical memory/VM_KMEM_SIZE_SCALE), VM_KMEM_SIZE_MAX)
+ * is the total KVA space allocated for kmem_map.
+ */
+#ifndef VM_KMEM_SIZE_SCALE
+#define	VM_KMEM_SIZE_SCALE	(3)
+#endif
+
+/*
+ * Ceiling on amount of kmem_map kva space.
+ */
+#ifndef VM_KMEM_SIZE_MAX
+#define	VM_KMEM_SIZE_MAX	(320 * 1024 * 1024)
+#endif
+
+/* initial pagein size of beginning of executable file */
+#ifndef VM_INITIAL_PAGEIN
+#define	VM_INITIAL_PAGEIN	16
+#endif
+
+#endif /* _MACHINE_VMPARAM_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/xen-os.h b/freebsd-5.3-xen-sparse/i386-xen/include/xen-os.h
new file mode 100644
index 0000000000..e483fc535c
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/xen-os.h
@@ -0,0 +1,293 @@
+/******************************************************************************
+ * os.h
+ * 
+ * random collection of macros and definition
+ */
+
+#ifndef _OS_H_
+#define _OS_H_
+
+#ifndef NULL
+#define NULL (void *)0
+#endif
+
+/* Somewhere in the middle of the GCC 2.96 development cycle, we implemented
+   a mechanism by which the user can annotate likely branch directions and
+   expect the blocks to be reordered appropriately.  Define __builtin_expect
+   to nothing for earlier compilers.  */
+
+#if __GNUC__ == 2 && __GNUC_MINOR__ < 96
+#define __builtin_expect(x, expected_value) (x)
+#endif
+
+
+
+/*
+ * These are the segment descriptors provided for us by the hypervisor.
+ * For now, these are hardwired -- guest OSes cannot update the GDT
+ * or LDT.
+ * 
+ * It shouldn't be hard to support descriptor-table frobbing -- let me 
+ * know if the BSD or XP ports require flexibility here.
+ */
+
+
+/*
+ * these are also defined in hypervisor-if.h but can't be pulled in as
+ * they are used in start of day assembly. Need to clean up the .h files
+ * a bit more...
+ */
+
+#ifndef FLAT_RING1_CS
+#define FLAT_RING1_CS		0x0819
+#define FLAT_RING1_DS		0x0821
+#define FLAT_RING3_CS		0x082b
+#define FLAT_RING3_DS		0x0833
+#endif
+
+#define __KERNEL_CS        FLAT_RING1_CS
+#define __KERNEL_DS        FLAT_RING1_DS
+
+/* Everything below this point is not included by assembler (.S) files. */
+#ifndef __ASSEMBLY__
+#include <sys/types.h>
+
+#include <machine/hypervisor-ifs.h>
+void printk(const char *fmt, ...);
+
+/* some function prototypes */
+void trap_init(void);
+
+
+/*
+ * STI/CLI equivalents. These basically set and clear the virtual
+ * event_enable flag in teh shared_info structure. Note that when
+ * the enable bit is set, there may be pending events to be handled.
+ * We may therefore call into do_hypervisor_callback() directly.
+ */
+#define likely(x)  __builtin_expect((x),1)
+#define unlikely(x)  __builtin_expect((x),0)
+
+#define __cli()                                                               \
+do {                                                                          \
+    HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1;              \
+    barrier();                                                                \
+} while (0)
+
+#define __sti()                                                               \
+do {                                                                          \
+    shared_info_t *_shared = HYPERVISOR_shared_info;                          \
+    barrier();                                                                \
+    _shared->vcpu_data[0].evtchn_upcall_mask = 0;                             \
+    barrier(); /* unmask then check (avoid races) */                          \
+    if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) )              \
+        force_evtchn_callback();                                              \
+} while (0)
+
+#define __save_flags(x)                                                       \
+do {                                                                          \
+    (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask;            \
+} while (0)
+
+#define __restore_flags(x)                                                    \
+do {                                                                          \
+    shared_info_t *_shared = HYPERVISOR_shared_info;                          \
+    barrier();                                                                \
+    if ( (_shared->vcpu_data[0].evtchn_upcall_mask = (x)) == 0 ) {            \
+        barrier(); /* unmask then check (avoid races) */                      \
+        if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) )          \
+            force_evtchn_callback();                                          \
+    }                                                                         \
+} while (0)
+
+#define __save_and_cli(x)                                                     \
+do {                                                                          \
+    (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask;            \
+    HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1;              \
+    barrier();                                                                \
+} while (0)
+
+#define __save_and_sti(x)                                                     \
+do {                                                                          \
+    shared_info_t *_shared = HYPERVISOR_shared_info;                          \
+    barrier();                                                                \
+    (x) = _shared->vcpu_data[0].evtchn_upcall_mask;                           \
+    _shared->vcpu_data[0].evtchn_upcall_mask = 0;                             \
+    barrier(); /* unmask then check (avoid races) */                          \
+    if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) )              \
+        force_evtchn_callback();                                              \
+} while (0)
+
+#ifdef SMP
+/* extra macros need for the SMP case */
+#error "global_irq_* not defined"
+#endif
+
+#define cli() __cli()
+#define sti() __sti()
+#define save_flags(x) __save_flags(x)
+#define restore_flags(x) __restore_flags(x)
+#define save_and_cli(x) __save_and_cli(x)
+#define save_and_sti(x) __save_and_sti(x)
+
+#define local_irq_save(x)       __save_and_cli(x)
+#define local_irq_set(x)        __save_and_sti(x)
+#define local_irq_restore(x)    __restore_flags(x)
+#define local_irq_disable()     __cli()
+#define local_irq_enable()      __sti()
+
+#define mtx_lock_irqsave(lock, x) {local_irq_save((x)); mtx_lock_spin((lock));}
+#define mtx_unlock_irqrestore(lock, x) {mtx_unlock_spin((lock)); local_irq_restore((x)); }
+
+#define mb()
+#define rmb()
+#define smp_mb() 
+#define wmb()
+
+
+
+/* This is a barrier for the compiler only, NOT the processor! */
+#define barrier() __asm__ __volatile__("": : :"memory")
+
+#define LOCK_PREFIX ""
+#define LOCK ""
+#define ADDR (*(volatile long *) addr)
+/*
+ * Make sure gcc doesn't try to be clever and move things around
+ * on us. We need to use _exactly_ the address the user gave us,
+ * not some alias that contains the same information.
+ */
+typedef struct { volatile int counter; } atomic_t;
+
+
+
+#define xen_xchg(ptr,v) \
+        ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
+struct __xchg_dummy { unsigned long a[100]; };
+#define __xg(x) ((volatile struct __xchg_dummy *)(x))
+static __inline unsigned long __xchg(unsigned long x, volatile void * ptr,
+                                   int size)
+{
+    switch (size) {
+    case 1:
+        __asm__ __volatile__("xchgb %b0,%1"
+                             :"=q" (x)
+                             :"m" (*__xg(ptr)), "0" (x)
+                             :"memory");
+        break;
+    case 2:
+        __asm__ __volatile__("xchgw %w0,%1"
+                             :"=r" (x)
+                             :"m" (*__xg(ptr)), "0" (x)
+                             :"memory");
+        break;
+    case 4:
+        __asm__ __volatile__("xchgl %0,%1"
+                             :"=r" (x)
+                             :"m" (*__xg(ptr)), "0" (x)
+                             :"memory");
+        break;
+    }
+    return x;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_clear_bit(int nr, volatile void * addr)
+{
+        int oldbit;
+
+        __asm__ __volatile__( LOCK_PREFIX
+                "btrl %2,%1\n\tsbbl %0,%0"
+                :"=r" (oldbit),"=m" (ADDR)
+                :"Ir" (nr) : "memory");
+        return oldbit;
+}
+
+static __inline__ int constant_test_bit(int nr, const volatile void * addr)
+{
+    return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
+}
+
+static __inline__ int variable_test_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+    
+    __asm__ __volatile__(
+        "btl %2,%1\n\tsbbl %0,%0"
+        :"=r" (oldbit)
+        :"m" (ADDR),"Ir" (nr));
+    return oldbit;
+}
+
+#define test_bit(nr,addr) \
+(__builtin_constant_p(nr) ? \
+ constant_test_bit((nr),(addr)) : \
+ variable_test_bit((nr),(addr)))
+
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.  See __set_bit()
+ * if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static __inline__ void set_bit(int nr, volatile void * addr)
+{
+        __asm__ __volatile__( LOCK_PREFIX
+                "btsl %1,%0"
+                :"=m" (ADDR)
+                :"Ir" (nr));
+}
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered.  However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static __inline__ void clear_bit(int nr, volatile void * addr)
+{
+        __asm__ __volatile__( LOCK_PREFIX
+                "btrl %1,%0"
+                :"=m" (ADDR)
+                :"Ir" (nr));
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically increments @v by 1.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ void atomic_inc(atomic_t *v)
+{
+        __asm__ __volatile__(
+                LOCK "incl %0"
+                :"=m" (v->counter)
+                :"m" (v->counter));
+}
+
+
+#define rdtscll(val) \
+     __asm__ __volatile__("rdtsc" : "=A" (val))
+
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _OS_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/xen_intr.h b/freebsd-5.3-xen-sparse/i386-xen/include/xen_intr.h
new file mode 100644
index 0000000000..e35eafa5d2
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/xen_intr.h
@@ -0,0 +1,50 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*- */
+#ifndef _XEN_INTR_H_
+#define _XEN_INTR_H_
+
+/*
+* The flat IRQ space is divided into two regions:
+*  1. A one-to-one mapping of real physical IRQs. This space is only used
+*     if we have physical device-access privilege. This region is at the 
+*     start of the IRQ space so that existing device drivers do not need
+*     to be modified to translate physical IRQ numbers into our IRQ space.
+*  3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These
+*     are bound using the provided bind/unbind functions.
+*/
+
+#define PIRQ_BASE   0
+#define NR_PIRQS  128
+
+#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS)
+#define NR_DYNIRQS  128
+
+#define NR_IRQS   (NR_PIRQS + NR_DYNIRQS)
+
+#define pirq_to_irq(_x)   ((_x) + PIRQ_BASE)
+#define irq_to_pirq(_x)   ((_x) - PIRQ_BASE)
+
+#define dynirq_to_irq(_x) ((_x) + DYNIRQ_BASE)
+#define irq_to_dynirq(_x) ((_x) - DYNIRQ_BASE)
+
+/* Dynamic binding of event channels and VIRQ sources to Linux IRQ space. */
+extern int  bind_virq_to_irq(int virq);
+extern void unbind_virq_from_irq(int virq);
+extern int  bind_evtchn_to_irq(int evtchn);
+extern void unbind_evtchn_from_irq(int evtchn);
+
+static __inline__ int irq_cannonicalize(int irq)
+{
+    return (irq == 2) ? 9 : irq;
+}
+
+extern void disable_irq(unsigned int);
+extern void disable_irq_nosync(unsigned int);
+extern void enable_irq(unsigned int);
+
+extern void irq_suspend(void);
+extern void irq_resume(void);
+
+extern void idle_block(void);
+
+
+#endif /* _XEN_INTR_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h b/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h
new file mode 100644
index 0000000000..93ffd7853a
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h
@@ -0,0 +1,85 @@
+/*	$NetBSD:$	*/
+
+/*
+ *
+ * Copyright (c) 2004 Christian Limpach.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Christian Limpach.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef _XEN_XENFUNC_H_
+#define _XEN_XENFUNC_H_
+
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/xenpmap.h>
+#include <machine/segments.h>
+#include <sys/pcpu.h>
+#define BKPT __asm__("int3");
+#define XPQ_CALL_DEPTH 5
+#define XPQ_CALL_COUNT 2
+#define PG_PRIV PG_AVAIL3
+typedef struct { 
+	unsigned long pt_ref;
+	unsigned long pt_eip[XPQ_CALL_COUNT][XPQ_CALL_DEPTH];
+} pteinfo_t;
+
+extern pteinfo_t *pteinfo_list;
+#ifdef XENDEBUG_LOW
+#define	__PRINTK(x) printk x
+#else
+#define	__PRINTK(x)
+#endif
+
+char *xen_setbootenv(char *cmd_line);
+int xen_boothowto(char *envp);
+void load_cr3(uint32_t val);
+void xen_set_ldt(vm_offset_t, uint32_t);
+void xen_machphys_update(unsigned long, unsigned long);
+void xen_update_descriptor(union descriptor *, union descriptor *);
+void lldt(u_short sel);
+/*
+ * Invalidate a patricular VA on all cpus
+ *
+ * N.B. Made these global for external loadable modules to reference.
+ */
+static __inline void
+invlpg(u_int addr)
+{
+	xpq_queue_invlpg(addr);
+}
+
+static __inline void
+invltlb(void)
+{
+	xpq_queue_tlb_flush();
+	mcl_flush_queue();
+}
+
+
+#endif /* _XEN_XENFUNC_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/xenpmap.h b/freebsd-5.3-xen-sparse/i386-xen/include/xenpmap.h
new file mode 100644
index 0000000000..f445096228
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/xenpmap.h
@@ -0,0 +1,132 @@
+/*	$NetBSD:$	*/
+
+/*
+ *
+ * Copyright (c) 2004 Christian Limpach.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Christian Limpach.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef _XEN_XENPMAP_H_
+#define _XEN_XENPMAP_H_
+#include <machine/xenvar.h>
+void xpq_physbcopy(const unsigned long *, unsigned long, size_t);
+void xpq_queue_invlpg(vm_offset_t);
+void xpq_queue_pt_update(pt_entry_t *, pt_entry_t);
+void xpq_queue_pt_switch(uint32_t);
+void xpq_queue_set_ldt(vm_offset_t, uint32_t);
+void xpq_queue_tlb_flush(void);
+void xpq_queue_pin_table(uint32_t, int);
+void xpq_queue_unpin_table(uint32_t);
+void xpq_record(unsigned long, unsigned long);
+void mcl_queue_pt_update(vm_offset_t, vm_offset_t);
+void mcl_flush_queue(void);
+void pmap_ref(pt_entry_t *pte, unsigned long ma);
+
+
+#ifdef PMAP_DEBUG
+#define PMAP_REF pmap_ref
+#define PMAP_DEC_REF_PAGE pmap_dec_ref_page
+#define PMAP_MARK_PRIV pmap_mark_privileged
+#define PMAP_MARK_UNPRIV pmap_mark_unprivileged
+#else 
+#define PMAP_MARK_PRIV(a)
+#define PMAP_MARK_UNPRIV(a)
+#define PMAP_REF(a, b)
+#define PMAP_DEC_REF_PAGE(a)
+#endif
+
+#define ALWAYS_SYNC 0
+
+#define pmap_valid_entry(E)           ((E) & PG_V) /* is PDE or PTE valid? */
+
+#define	XPQ_PIN_L1_TABLE 1
+#define	XPQ_PIN_L2_TABLE 2
+
+#define	PT_GET(_ptp)						\
+	(pmap_valid_entry(*(_ptp)) ? xpmap_mtop(*(_ptp)) : *(_ptp))
+#define PT_SET_VA(_ptp,_npte,sync) do {				\
+        PMAP_REF((_ptp), xpmap_ptom(_npte));                    \
+	xpq_queue_pt_update((pt_entry_t *)vtomach((_ptp)), 	\
+			    xpmap_ptom((_npte))); 		\
+	if (sync || ALWAYS_SYNC)				\
+		mcl_flush_queue();				\
+} while (/*CONSTCOND*/0)
+#define PT_SET_VA_MA(_ptp,_npte,sync) do {		    \
+        PMAP_REF((_ptp), (_npte));                              \
+	xpq_queue_pt_update((pt_entry_t *)vtomach((_ptp)), (_npte)); \
+	if (sync || ALWAYS_SYNC)				\
+		mcl_flush_queue();				\
+} while (/*CONSTCOND*/0)
+#define PT_CLEAR_VA(_ptp, sync) do {				\
+        PMAP_REF((pt_entry_t *)(_ptp), 0);                      \
+	xpq_queue_pt_update((pt_entry_t *)vtomach(_ptp), 0);	\
+	if (sync || ALWAYS_SYNC)				\
+		mcl_flush_queue();				\
+} while (/*CONSTCOND*/0)
+#define PT_CLEAR(_ptp, sync) do {                               \
+        PMAP_REF((pt_entry_t *)(vtopte(_ptp)), 0);              \
+        mcl_queue_pt_update((unsigned long)_ptp, 0);            \
+        if (sync || ALWAYS_SYNC)                                \
+               mcl_flush_queue();                               \
+} while (/*CONSTCOND*/0)
+#define PT_SET_MA(_va,_ma,sync) do {				\
+        PMAP_REF(vtopte((unsigned long)_va), (_ma));            \
+	mcl_queue_pt_update((vm_offset_t )(_va), (_ma)); \
+	if (sync || ALWAYS_SYNC)				\
+		mcl_flush_queue();				\
+} while (/*CONSTCOND*/0)
+#define PT_SET(_va,_pa,sync) do {				\
+        PMAP_REF((pt_entry_t *)(vtopte(_va)), xpmap_ptom(_pa)); \
+	mcl_queue_pt_update((vm_offset_t)(_va), 	        \
+			    xpmap_ptom((_pa))); 		\
+	if (sync || ALWAYS_SYNC)				\
+		mcl_flush_queue();				\
+} while (/*CONSTCOND*/0)
+
+
+
+#define	PT_UPDATES_FLUSH() do {				        \
+        mcl_flush_queue();                                      \
+} while (/*CONSTCOND*/0)
+
+
+static __inline uint32_t
+xpmap_mtop(uint32_t mpa)
+{
+    return (((xen_machine_phys[(mpa >> PAGE_SHIFT)]) << PAGE_SHIFT) 
+		| (mpa & ~PG_FRAME));
+}
+
+static __inline vm_paddr_t
+xpmap_ptom(uint32_t ppa)
+{
+    return phystomach(ppa) | (ppa & ~PG_FRAME);
+}
+
+#endif /* _XEN_XENPMAP_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/xenvar.h b/freebsd-5.3-xen-sparse/i386-xen/include/xenvar.h
new file mode 100644
index 0000000000..5a3d3acb0b
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/xenvar.h
@@ -0,0 +1,30 @@
+#ifndef XENVAR_H_
+#define XENVAR_H_
+
+#define XBOOTUP 0x1
+#define XPMAP   0x2
+extern int xendebug_flags;
+#ifndef NOXENDEBUG
+#define XENPRINTF printk
+#else
+#define XENPRINTF(x...)
+#endif 
+extern unsigned long *xen_phys_machine;
+#define TRACE_ENTER XENPRINTF("(file=%s, line=%d) entered %s\n", __FILE__, __LINE__, __FUNCTION__)
+#define TRACE_EXIT XENPRINTF("(file=%s, line=%d) exiting %s\n", __FILE__, __LINE__, __FUNCTION__)
+#define TRACE_DEBUG(argflags, _f, _a...) \
+if (xendebug_flags & argflags) XENPRINTF("(file=%s, line=%d) " _f "\n", __FILE__, __LINE__, ## _a);
+
+extern unsigned long *xen_machine_phys;
+#define PTOM(i) (((unsigned long *)xen_phys_machine)[i])
+#define phystomach(pa) ((((unsigned long *)xen_phys_machine)[(pa >> PAGE_SHIFT)]) << PAGE_SHIFT)
+void xpq_init(void);
+
+struct sockaddr_in;
+ 
+int xen_setnfshandle(void);
+int setinaddr(struct sockaddr_in *addr,  char *ipstr);
+
+#define RB_GDB_PAUSE RB_RESERVED1 
+
+#endif
diff --git a/freebsd-5.3-xen-sparse/i386-xen/xen/blkfront/xb_blkfront.c b/freebsd-5.3-xen-sparse/i386-xen/xen/blkfront/xb_blkfront.c
new file mode 100644
index 0000000000..66c80f3ece
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/xen/blkfront/xb_blkfront.c
@@ -0,0 +1,925 @@
+/*-
+ * All rights reserved.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/*
+ * XenoBSD block device driver
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <sys/bio.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+
+#include <machine/bus.h>
+#include <sys/rman.h>
+#include <machine/resource.h>
+#include <machine/intr_machdep.h>
+#include <machine/vmparam.h>
+
+#include <machine/hypervisor.h>
+#include <machine/hypervisor-ifs.h>
+#include <machine/xen-os.h>
+#include <machine/xen_intr.h>
+#include <machine/evtchn.h>
+
+#include <geom/geom_disk.h>
+#include <machine/ctrl_if.h>
+#include <machine/xenfunc.h>
+
+/* prototypes */
+struct xb_softc;
+static void xb_startio(struct xb_softc *sc);
+static void xb_vbdinit(void);
+static void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
+static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id);
+
+struct xb_softc {
+    device_t		  xb_dev;
+    struct disk		  xb_disk;		/* disk params */
+    struct bio_queue_head xb_bioq;		/* sort queue */
+    struct resource 	 *xb_irq;
+    void		 *xb_resp_handler;
+    int			  xb_unit;
+    int			  xb_flags;
+#define XB_OPEN	(1<<0)		/* drive is open (can't shut down) */
+};
+
+/* Control whether runtime update of vbds is enabled. */
+#define ENABLE_VBD_UPDATE 1
+
+#if ENABLE_VBD_UPDATE
+static void vbd_update(void);
+#else
+static void vbd_update(void){};
+#endif
+
+#define BLKIF_STATE_CLOSED       0
+#define BLKIF_STATE_DISCONNECTED 1
+#define BLKIF_STATE_CONNECTED    2
+
+static char *blkif_state_name[] = {
+    [BLKIF_STATE_CLOSED]       = "closed",
+    [BLKIF_STATE_DISCONNECTED] = "disconnected",
+    [BLKIF_STATE_CONNECTED]    = "connected",
+};
+
+static char * blkif_status_name[] = {
+    [BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
+    [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
+    [BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
+    [BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
+};
+
+#define WPRINTK(fmt, args...) printk("[XEN] " fmt, ##args)
+
+static int blkif_handle;
+static unsigned int blkif_state = BLKIF_STATE_CLOSED;
+static unsigned int blkif_evtchn;
+static unsigned int blkif_irq;
+
+static int blkif_control_rsp_valid;
+static blkif_response_t blkif_control_rsp;
+
+static unsigned long xb_rec_ring_free;		
+blkif_request_t xb_rec_ring[BLKIF_RING_SIZE];	/* shadow recovery ring */
+
+/* XXX move to xb_vbd.c when VBD update support is added */
+#define MAX_VBDS 64
+static vdisk_t xb_diskinfo[MAX_VBDS];
+static int xb_ndisks;
+
+#define XBD_SECTOR_SIZE		512	/* XXX: assume for now */
+#define XBD_SECTOR_SHFT		9
+
+static unsigned int xb_kick_pending;
+
+static struct mtx blkif_io_lock;
+
+static blkif_ring_t   *xb_blk_ring;
+static BLKIF_RING_IDX xb_resp_cons; /* Response consumer for comms ring. */
+static BLKIF_RING_IDX xb_req_prod;  /* Private request producer */
+
+static int xb_recovery = 0;           /* "Recovery in progress" flag.  Protected
+                                       * by the blkif_io_lock */
+
+/* We plug the I/O ring if the driver is suspended or if the ring is full. */
+#define BLKIF_RING_FULL (((xb_req_prod - xb_resp_cons) == BLKIF_RING_SIZE) || \
+                         (blkif_state != BLKIF_STATE_CONNECTED))
+
+void blkif_completion(blkif_request_t *req);
+void xb_response_intr(void *);
+
+/* XXX: This isn't supported in FreeBSD, so ignore it for now. */
+#define TASK_UNINTERRUPTIBLE    0
+
+static inline int 
+GET_ID_FROM_FREELIST( void )
+{
+    unsigned long free = xb_rec_ring_free;
+
+    KASSERT(free <= BLKIF_RING_SIZE, ("free %lu > BLKIF_RING_SIZE", free));
+
+    xb_rec_ring_free = xb_rec_ring[free].id;
+
+    xb_rec_ring[free].id = 0x0fffffee; /* debug */
+
+    return free;
+}
+
+static inline void 
+ADD_ID_TO_FREELIST( unsigned long id )
+{
+    xb_rec_ring[id].id = xb_rec_ring_free;
+    xb_rec_ring_free = id;
+}
+
+static inline void translate_req_to_pfn(blkif_request_t *xreq,
+                                        blkif_request_t *req)
+{
+    int i;
+
+    xreq->operation     = req->operation;
+    xreq->nr_segments   = req->nr_segments;
+    xreq->device        = req->device;
+    /* preserve id */
+    xreq->sector_number = req->sector_number;
+
+    for ( i = 0; i < req->nr_segments; i++ ){
+        xreq->frame_and_sects[i] = xpmap_mtop(req->frame_and_sects[i]);
+    }
+}
+
+static inline void translate_req_to_mfn(blkif_request_t *xreq,
+                                        blkif_request_t *req)
+{
+    int i;
+
+    xreq->operation     = req->operation;
+    xreq->nr_segments   = req->nr_segments;
+    xreq->device        = req->device;
+    xreq->id            = req->id;   /* copy id (unlike above) */
+    xreq->sector_number = req->sector_number;
+
+    for ( i = 0; i < req->nr_segments; i++ ){
+        xreq->frame_and_sects[i] = xpmap_ptom(req->frame_and_sects[i]);
+    }
+}
+
+
+static inline void flush_requests(void)
+{
+    xb_blk_ring->req_prod = xb_req_prod;
+    notify_via_evtchn(blkif_evtchn);
+}
+
+
+#if ENABLE_VBD_UPDATE
+static void vbd_update()
+{
+    XENPRINTF(">\n");
+    XENPRINTF("<\n");
+}
+#endif /* ENABLE_VBD_UPDATE */
+
+void
+xb_response_intr(void *xsc)
+{
+    struct xb_softc *sc = NULL;
+    struct bio *bp;
+    blkif_response_t *bret;
+    BLKIF_RING_IDX i, rp; 
+    unsigned long flags;
+    
+    if (blkif_state == BLKIF_STATE_CLOSED)
+	return;
+
+    mtx_lock_irqsave(&blkif_io_lock, flags);
+
+    if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) || 
+         unlikely(xb_recovery) ) {
+        mtx_unlock_irqrestore(&blkif_io_lock, flags);
+        return;
+    }
+
+    rp = xb_blk_ring->resp_prod;
+    rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+    /* sometimes we seem to lose i/o.  stay in the interrupt handler while
+     * there is stuff to process: continually recheck the response producer.
+     */
+    for ( i = xb_resp_cons; i != (rp = xb_blk_ring->resp_prod); i++ ) {
+	unsigned long id;
+        bret = &xb_blk_ring->ring[MASK_BLKIF_IDX(i)].resp;
+
+	id = bret->id;
+	bp = (struct bio *)xb_rec_ring[id].id;
+
+	blkif_completion(&xb_rec_ring[id]);
+
+	ADD_ID_TO_FREELIST(id);	/* overwrites req */
+
+        switch ( bret->operation ) {
+        case BLKIF_OP_READ:
+	    /* had an unaligned buffer that needs to be copied */
+	    if (bp->bio_driver1)
+		bcopy(bp->bio_data, bp->bio_driver1, bp->bio_bcount);
+        case BLKIF_OP_WRITE:
+
+	    /* free the copy buffer */
+	    if (bp->bio_driver1) {
+		    free(bp->bio_data, M_DEVBUF);
+		    bp->bio_data = bp->bio_driver1;
+		    bp->bio_driver1 = NULL;
+	    }
+
+	    if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) {
+		XENPRINTF("Bad return from blkdev data request: %x\n", 
+			  bret->status);
+	    	bp->bio_flags |= BIO_ERROR;
+	    }
+
+	    sc = (struct xb_softc *)bp->bio_disk->d_drv1;
+
+	    if (bp->bio_flags & BIO_ERROR)
+		bp->bio_error = EIO;
+	    else
+		bp->bio_resid = 0;
+
+	    biodone(bp);
+            break;
+	case BLKIF_OP_PROBE:
+            memcpy(&blkif_control_rsp, bret, sizeof(*bret));
+            blkif_control_rsp_valid = 1;
+            break;
+        default:
+	    panic("received invalid operation");
+            break;
+        }
+    }
+    
+    xb_resp_cons = i;
+
+    if (sc && xb_kick_pending) {
+    	xb_kick_pending = FALSE;
+	xb_startio(sc);
+    }
+
+    mtx_unlock_irqrestore(&blkif_io_lock, flags);
+}
+
+static int
+xb_open(struct disk *dp)
+{
+    struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
+
+    if (sc == NULL) {
+	printk("xb%d: not found", sc->xb_unit);
+	return (ENXIO);
+    }
+
+    /* block dev not active */
+    if (blkif_state != BLKIF_STATE_CONNECTED) {
+	printk("xb%d: bad state: %dn", sc->xb_unit, blkif_state);
+	return(ENXIO);
+    }
+
+    sc->xb_flags |= XB_OPEN;
+    return (0);
+}
+
+static int
+xb_close(struct disk *dp)
+{
+    struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
+
+    if (sc == NULL)
+	return (ENXIO);
+    sc->xb_flags &= ~XB_OPEN;
+    return (0);
+}
+
+static int
+xb_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
+{
+    struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
+
+    TRACE_ENTER;
+	
+    if (sc == NULL)
+	return (ENXIO);
+
+    return (ENOTTY);
+}
+
+/*
+ * Dequeue buffers and place them in the shared communication ring.
+ * Return when no more requests can be accepted or all buffers have 
+ * been queued.
+ *
+ * Signal XEN once the ring has been filled out.
+ */
+static void
+xb_startio(struct xb_softc *sc)
+{
+    struct bio		*bp;
+    unsigned long  	buffer_ma;
+    blkif_request_t     *req;
+    int			s, queued = 0;
+    unsigned long id;
+    unsigned int fsect, lsect;
+
+    
+    if (unlikely(blkif_state != BLKIF_STATE_CONNECTED))
+	return;
+
+    s = splbio();
+
+    for (bp = bioq_first(&sc->xb_bioq);
+         bp && !BLKIF_RING_FULL;
+	 xb_req_prod++, queued++, bp = bioq_first(&sc->xb_bioq)) {
+	
+	/* Check if the buffer is properly aligned */
+	if ((vm_offset_t)bp->bio_data & PAGE_MASK) {
+		int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE : 
+		    					     PAGE_SIZE;
+		caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF, 
+					M_WAITOK);
+		caddr_t alignbuf = (char *)roundup2((u_long)newbuf, align);
+
+		/* save a copy of the current buffer */
+		bp->bio_driver1 = bp->bio_data;
+
+		/* Copy the data for a write */
+		if (bp->bio_cmd == BIO_WRITE)
+		    bcopy(bp->bio_data, alignbuf, bp->bio_bcount);
+		bp->bio_data = alignbuf;
+	}
+		
+    	bioq_remove(&sc->xb_bioq, bp);
+	buffer_ma = vtomach(bp->bio_data);
+	fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
+	lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1;
+
+	KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0,
+	       ("XEN buffer must be sector aligned"));
+	KASSERT(lsect <= 7, 
+	       ("XEN disk driver data cannot cross a page boundary"));
+	
+	buffer_ma &= ~PAGE_MASK;
+
+    	/* Fill out a communications ring structure. */
+    	req 		  = &xb_blk_ring->ring[MASK_BLKIF_IDX(xb_req_prod)].req;
+	id		  = GET_ID_FROM_FREELIST();
+	xb_rec_ring[id].id= (unsigned long)bp;
+
+    	req->id 	  = id;
+    	req->operation 	  = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
+						         BLKIF_OP_WRITE;
+
+    	req->sector_number= (blkif_sector_t)bp->bio_pblkno;
+    	req->device 	  = xb_diskinfo[sc->xb_unit].device;
+
+    	req->nr_segments  = 1;	/* not doing scatter/gather since buffer
+    				 * chaining is not supported.
+				 */
+	/*
+	 * upper bits represent the machine address of the buffer and the
+	 * lower bits is the number of sectors to be read/written.
+	 */
+	req->frame_and_sects[0] = buffer_ma | (fsect << 3) | lsect; 
+
+	/* Keep a private copy so we can reissue requests when recovering. */
+	translate_req_to_pfn( &xb_rec_ring[id], req);
+
+    }
+
+    if (BLKIF_RING_FULL)
+	xb_kick_pending = TRUE;
+    
+    if (queued != 0) 
+	flush_requests();
+    splx(s);
+}
+
+/*
+ * Read/write routine for a buffer.  Finds the proper unit, place it on
+ * the sortq and kick the controller.
+ */
+static void
+xb_strategy(struct bio *bp)
+{
+    struct xb_softc	*sc = (struct xb_softc *)bp->bio_disk->d_drv1;
+    int			s;
+
+    /* bogus disk? */
+    if (sc == NULL) {
+	bp->bio_error = EINVAL;
+	bp->bio_flags |= BIO_ERROR;
+	goto bad;
+    }
+
+    s = splbio();
+    /*
+     * Place it in the queue of disk activities for this disk
+     */
+    bioq_disksort(&sc->xb_bioq, bp);
+    splx(s);
+
+    xb_startio(sc);
+    return;
+
+ bad:
+    /*
+     * Correctly set the bio to indicate a failed tranfer.
+     */
+    bp->bio_resid = bp->bio_bcount;
+    biodone(bp);
+    return;
+}
+
+
+static int
+xb_create(int unit)
+{
+    struct xb_softc	*sc;
+    int			error = 0;
+    
+    sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK);
+    sc->xb_unit = unit;
+
+    memset(&sc->xb_disk, 0, sizeof(sc->xb_disk)); 
+    sc->xb_disk.d_unit = unit;
+    sc->xb_disk.d_open = xb_open;
+    sc->xb_disk.d_close = xb_close;
+    sc->xb_disk.d_ioctl = xb_ioctl;
+    sc->xb_disk.d_strategy = xb_strategy;
+    sc->xb_disk.d_name = "xbd";
+    sc->xb_disk.d_drv1 = sc;
+    sc->xb_disk.d_sectorsize = XBD_SECTOR_SIZE;
+    sc->xb_disk.d_mediasize = xb_diskinfo[sc->xb_unit].capacity 
+					<< XBD_SECTOR_SHFT;
+#if 0
+    sc->xb_disk.d_maxsize = DFLTPHYS;
+#else /* XXX: xen can't handle large single i/o requests */
+    sc->xb_disk.d_maxsize = 4096;
+#endif
+
+    XENPRINTF("attaching device 0x%x unit %d capacity %llu\n",
+    	       xb_diskinfo[sc->xb_unit].device, sc->xb_unit,
+    	       sc->xb_disk.d_mediasize);
+
+    disk_create(&sc->xb_disk, DISK_VERSION_00);
+    bioq_init(&sc->xb_bioq);
+
+    return error;
+}
+
+/* XXX move to xb_vbd.c when vbd update support is added */
+static void
+xb_vbdinit(void)
+{
+    int i;
+    blkif_request_t req;
+    blkif_response_t rsp; 
+    vdisk_t *buf;
+
+    TRACE_ENTER;
+
+    buf = (vdisk_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
+
+    /* Probe for disk information. */
+    memset(&req, 0, sizeof(req)); 
+    req.operation = BLKIF_OP_PROBE;
+    req.nr_segments = 1;
+    req.frame_and_sects[0] = vtomach(buf) | 7;
+    blkif_control_send(&req, &rsp);
+    
+    if ( rsp.status <= 0 ) {
+        printk("xb_identify: Could not identify disks (%d)\n", rsp.status);
+    	free(buf, M_DEVBUF);
+        return;
+    }
+    
+    if ((xb_ndisks = rsp.status) > MAX_VBDS)
+	xb_ndisks = MAX_VBDS;
+
+    memcpy(xb_diskinfo, buf, xb_ndisks * sizeof(vdisk_t));
+
+    for (i = 0; i < xb_ndisks; i++)
+	xb_create(i);
+
+    free(buf, M_DEVBUF);
+}
+
+
+/*****************************  COMMON CODE  *******************************/
+
+void 
+blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
+{
+    unsigned long flags, id;
+
+ retry:
+    while ( (xb_req_prod - xb_resp_cons) == BLKIF_RING_SIZE ) {
+	tsleep( req, PWAIT | PCATCH, "blkif", hz);
+    }
+
+    mtx_lock_irqsave(&blkif_io_lock, flags);
+    if ( (xb_req_prod - xb_resp_cons) == BLKIF_RING_SIZE )
+    {
+        mtx_unlock_irqrestore(&blkif_io_lock, flags);
+        goto retry;
+    }
+
+    xb_blk_ring->ring[MASK_BLKIF_IDX(xb_req_prod)].req = *req;    
+
+    id = GET_ID_FROM_FREELIST();
+    xb_blk_ring->ring[MASK_BLKIF_IDX(xb_req_prod)].req.id = id;
+    xb_rec_ring[id].id = (unsigned long) req;
+
+    translate_req_to_pfn( &xb_rec_ring[id], req );
+
+    xb_req_prod++;
+    flush_requests();
+
+    mtx_unlock_irqrestore(&blkif_io_lock, flags);
+
+    while ( !blkif_control_rsp_valid )
+    {
+	tsleep( &blkif_control_rsp_valid, PWAIT | PCATCH, "blkif", hz);
+    }
+
+    memcpy(rsp, &blkif_control_rsp, sizeof(*rsp));
+    blkif_control_rsp_valid = 0;
+}
+
+
+/* Send a driver status notification to the domain controller. */
+static void 
+send_driver_status(int ok)
+{
+    ctrl_msg_t cmsg = {
+        .type    = CMSG_BLKIF_FE,
+        .subtype = CMSG_BLKIF_FE_DRIVER_STATUS,
+        .length  = sizeof(blkif_fe_driver_status_t),
+    };
+    blkif_fe_driver_status_t *msg = (void*)cmsg.msg;
+    
+    msg->status = (ok ? BLKIF_DRIVER_STATUS_UP : BLKIF_DRIVER_STATUS_DOWN);
+
+    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
+
+/* Tell the controller to bring up the interface. */
+static void 
+blkif_send_interface_connect(void)
+{
+    ctrl_msg_t cmsg = {
+        .type    = CMSG_BLKIF_FE,
+        .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
+        .length  = sizeof(blkif_fe_interface_connect_t),
+    };
+    blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
+    
+    msg->handle      = 0;
+    msg->shmem_frame = (vtomach(xb_blk_ring) >> PAGE_SHIFT);
+    
+    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
+
+static void 
+blkif_free(void)
+{
+
+    unsigned long flags;
+
+    printk("[XEN] Recovering virtual block device driver\n");
+            
+    /* Prevent new requests being issued until we fix things up. */
+    mtx_lock_irqsave(&blkif_io_lock, flags);
+    xb_recovery = 1;
+    blkif_state = BLKIF_STATE_DISCONNECTED;
+    mtx_unlock_irqrestore(&blkif_io_lock, flags);
+
+    /* Free resources associated with old device channel. */
+    if (xb_blk_ring) {
+        free(xb_blk_ring, M_DEVBUF);
+        xb_blk_ring = NULL;
+    }
+    /* free_irq(blkif_irq, NULL);*/
+    blkif_irq = 0;
+    
+    unbind_evtchn_from_irq(blkif_evtchn);
+    blkif_evtchn = 0;
+}
+
+static void 
+blkif_close(void)
+{
+}
+
+/* Move from CLOSED to DISCONNECTED state. */
+static void 
+blkif_disconnect(void)
+{
+    if (xb_blk_ring) free(xb_blk_ring, M_DEVBUF);
+    xb_blk_ring = (blkif_ring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
+    xb_blk_ring->req_prod = xb_blk_ring->resp_prod = 0;
+    xb_resp_cons = xb_req_prod = 0;
+    blkif_state  = BLKIF_STATE_DISCONNECTED;
+    blkif_send_interface_connect();
+}
+
+static void 
+blkif_reset(void)
+{
+    printk("[XEN] Recovering virtual block device driver\n");
+    blkif_free();
+    blkif_disconnect();
+}
+
+static void 
+blkif_recover(void)
+{
+
+    int i;
+
+    /* Hmm, requests might be re-ordered when we re-issue them.
+     * This will need to be fixed once we have barriers */
+
+    /* Stage 1 : Find active and move to safety. */
+    for ( i = 0; i < BLKIF_RING_SIZE; i++ ) {
+        if ( xb_rec_ring[i].id >= KERNBASE ) {
+            translate_req_to_mfn(
+                &xb_blk_ring->ring[xb_req_prod].req, &xb_rec_ring[i]);
+            xb_req_prod++;
+        }
+    }
+
+    printk("blkfront: recovered %d descriptors\n",xb_req_prod);
+	    
+    /* Stage 2 : Set up shadow list. */
+    for ( i = 0; i < xb_req_prod; i++ ) {
+        xb_rec_ring[i].id = xb_blk_ring->ring[i].req.id;		
+        xb_blk_ring->ring[i].req.id = i;
+        translate_req_to_pfn(&xb_rec_ring[i], &xb_blk_ring->ring[i].req);
+    }
+
+    /* Stage 3 : Set up free list. */
+    for ( ; i < BLKIF_RING_SIZE; i++ ){
+        xb_rec_ring[i].id = i+1;
+    }
+    xb_rec_ring_free = xb_req_prod;
+    xb_rec_ring[BLKIF_RING_SIZE-1].id = 0x0fffffff;
+
+    /* xb_blk_ring->req_prod will be set when we flush_requests().*/
+    wmb();
+
+    /* Switch off recovery mode, using a memory barrier to ensure that
+     * it's seen before we flush requests - we don't want to miss any
+     * interrupts. */
+    xb_recovery = 0;
+    wmb();
+
+    /* Kicks things back into life. */
+    flush_requests();
+
+    /* Now safe to left other peope use interface. */
+    blkif_state = BLKIF_STATE_CONNECTED;
+}
+
+static void 
+blkif_connect(blkif_fe_interface_status_t *status)
+{
+    int err = 0;
+
+    blkif_evtchn = status->evtchn;
+    blkif_irq    = bind_evtchn_to_irq(blkif_evtchn);
+
+    err = intr_add_handler("xbd", blkif_irq, 
+			   (driver_intr_t *)xb_response_intr, NULL,
+			   INTR_TYPE_BIO | INTR_MPSAFE, NULL);
+    if(err){
+        printk("[XEN] blkfront request_irq failed (err=%d)\n", err);
+        return;
+    }
+
+    if ( xb_recovery ) {
+        blkif_recover();
+    } else {
+        /* Probe for discs attached to the interface. */
+	xb_vbdinit();
+
+        /* XXX: transition state after probe */
+        blkif_state = BLKIF_STATE_CONNECTED;
+    }
+    
+    /* Kick pending requests. */
+#if 0 /* XXX: figure out sortq logic */
+    mtx_lock_irq(&blkif_io_lock);
+    kick_pending_request_queues();
+    mtx_unlock_irq(&blkif_io_lock);
+#endif
+}
+
+static void 
+unexpected(blkif_fe_interface_status_t *status)
+{
+    WPRINTK(" Unexpected blkif status %s in state %s\n", 
+           blkif_status_name[status->status],
+           blkif_state_name[blkif_state]);
+}
+
+static void 
+blkif_status(blkif_fe_interface_status_t *status)
+{
+    if (status->handle != blkif_handle) {
+        WPRINTK(" Invalid blkif: handle=%u", status->handle);
+        return;
+    }
+
+    switch (status->status) {
+
+    case BLKIF_INTERFACE_STATUS_CLOSED:
+        switch(blkif_state){
+        case BLKIF_STATE_CLOSED:
+            unexpected(status);
+            break;
+        case BLKIF_STATE_DISCONNECTED:
+        case BLKIF_STATE_CONNECTED:
+            unexpected(status);
+            blkif_close();
+            break;
+        }
+        break;
+
+    case BLKIF_INTERFACE_STATUS_DISCONNECTED:
+        switch(blkif_state){
+        case BLKIF_STATE_CLOSED:
+            blkif_disconnect();
+            break;
+        case BLKIF_STATE_DISCONNECTED:
+        case BLKIF_STATE_CONNECTED:
+            unexpected(status);
+            blkif_reset();
+            break;
+        }
+        break;
+
+    case BLKIF_INTERFACE_STATUS_CONNECTED:
+        switch(blkif_state){
+        case BLKIF_STATE_CLOSED:
+            unexpected(status);
+            blkif_disconnect();
+            blkif_connect(status);
+            break;
+        case BLKIF_STATE_DISCONNECTED:
+            blkif_connect(status);
+            break;
+        case BLKIF_STATE_CONNECTED:
+            unexpected(status);
+            blkif_connect(status);
+            break;
+        }
+        break;
+
+   case BLKIF_INTERFACE_STATUS_CHANGED:
+        switch(blkif_state){
+        case BLKIF_STATE_CLOSED:
+        case BLKIF_STATE_DISCONNECTED:
+            unexpected(status);
+            break;
+        case BLKIF_STATE_CONNECTED:
+            vbd_update();
+            break;
+        }
+       break;
+
+    default:
+        WPRINTK("Invalid blkif status: %d\n", status->status);
+        break;
+    }
+}
+
+
+static void 
+blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+    switch ( msg->subtype )
+    {
+    case CMSG_BLKIF_FE_INTERFACE_STATUS:
+        if ( msg->length != sizeof(blkif_fe_interface_status_t) )
+            goto parse_error;
+        blkif_status((blkif_fe_interface_status_t *)
+                     &msg->msg[0]);
+        break;        
+    default:
+        goto parse_error;
+    }
+
+    ctrl_if_send_response(msg);
+    return;
+
+ parse_error:
+    msg->length = 0;
+    ctrl_if_send_response(msg);
+}
+
+static int 
+wait_for_blkif(void)
+{
+    int err = 0;
+    int i;
+    send_driver_status(1);
+
+    /*
+     * We should read 'nr_interfaces' from response message and wait
+     * for notifications before proceeding. For now we assume that we
+     * will be notified of exactly one interface.
+     */
+    for ( i=0; (blkif_state != BLKIF_STATE_CONNECTED) && (i < 10*hz); i++ )
+    {
+	tsleep(&blkif_state, PWAIT | PCATCH, "blkif", hz);
+    }
+
+    if (blkif_state != BLKIF_STATE_CONNECTED){
+        printk("[XEN] Timeout connecting block device driver!\n");
+        err = -ENOSYS;
+    }
+    return err;
+}
+
+
+static void
+xb_init(void *unused)
+{
+    int i;
+
+    printk("[XEN] Initialising virtual block device driver\n");
+
+    xb_rec_ring_free = 0;
+    for (i = 0; i < BLKIF_RING_SIZE; i++) {
+	xb_rec_ring[i].id = i+1;
+    }
+    xb_rec_ring[BLKIF_RING_SIZE-1].id = 0x0fffffff;
+
+    (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx, 0);
+
+    wait_for_blkif();
+}
+
+#if 0 /* XXX not yet */
+void
+blkdev_suspend(void)
+{
+}
+
+void 
+blkdev_resume(void)
+{
+    send_driver_status(1);
+}
+#endif
+
+/* XXXXX THIS IS A TEMPORARY FUNCTION UNTIL WE GET GRANT TABLES */
+
+void 
+blkif_completion(blkif_request_t *req)
+{
+    int i;
+
+    switch ( req->operation )
+    {
+    case BLKIF_OP_READ:
+	for ( i = 0; i < req->nr_segments; i++ )
+	{
+	    unsigned long pfn = req->frame_and_sects[i] >> PAGE_SHIFT;
+	    unsigned long mfn = xen_phys_machine[pfn];
+	    xen_machphys_update(mfn, pfn);
+	}
+	break;
+    }
+    
+}
+MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_SPIN); 
+SYSINIT(xbdev, SI_SUB_PSEUDO, SI_ORDER_ANY, xb_init, NULL)
diff --git a/freebsd-5.3-xen-sparse/i386-xen/xen/char/console.c b/freebsd-5.3-xen-sparse/i386-xen/xen/char/console.c
new file mode 100644
index 0000000000..7ea8e3eb4f
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/xen/char/console.c
@@ -0,0 +1,536 @@
+#include <sys/cdefs.h>
+
+
+#include <sys/param.h>
+#include <sys/module.h>
+#include <sys/systm.h>
+#include <sys/consio.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/tty.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <machine/stdarg.h>
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/ctrl_if.h>
+#include <sys/cons.h>
+
+#include "opt_ddb.h"
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
+static char driver_name[] = "xc";
+devclass_t xc_devclass;
+static void	xcstart (struct tty *);
+static int	xcparam (struct tty *, struct termios *);
+static void	xcstop (struct tty *, int);
+static void	xc_timeout(void *);
+static void xencons_tx_flush_task_routine(void *,int );
+static void __xencons_tx_flush(void);
+static void xencons_rx(ctrl_msg_t *msg,unsigned long id);
+static boolean_t xcons_putc(int c);
+
+/* switch console so that shutdown can occur gracefully */
+static void xc_shutdown(void *arg, int howto);
+static int xc_mute;
+
+void xcons_force_flush(void);
+
+static cn_probe_t       xccnprobe;
+static cn_init_t        xccninit;
+static cn_getc_t        xccngetc;
+static cn_putc_t        xccnputc;
+static cn_checkc_t      xccncheckc;
+
+#define XC_POLLTIME 	(hz/10)
+
+CONS_DRIVER(xc, xccnprobe, xccninit, NULL, xccngetc, 
+	    xccncheckc, xccnputc, NULL);
+
+static int xen_console_up;
+static boolean_t xc_tx_task_queued;
+static boolean_t xc_start_needed;
+static struct callout xc_callout;
+struct mtx              cn_mtx;
+
+#define RBUF_SIZE     1024
+#define RBUF_MASK(_i) ((_i)&(RBUF_SIZE-1))
+#define WBUF_SIZE     4096
+#define WBUF_MASK(_i) ((_i)&(WBUF_SIZE-1))
+static char wbuf[WBUF_SIZE];
+static char rbuf[RBUF_SIZE];
+static int rc, rp;
+static int cnsl_evt_reg;
+static unsigned int wc, wp; /* write_cons, write_prod */
+static struct task xencons_tx_flush_task = { {NULL},0,0,&xencons_tx_flush_task_routine,NULL };
+
+
+#define CDEV_MAJOR 12
+#define	XCUNIT(x)	(minor(x))
+#define ISTTYOPEN(tp)	((tp) && ((tp)->t_state & TS_ISOPEN))
+#define CN_LOCK_INIT(x, _name) \
+        mtx_init(&x, _name, _name, MTX_SPIN)
+#define CN_LOCK(l, f)        mtx_lock_irqsave(&(l), (f))
+#define CN_UNLOCK(l, f)      mtx_unlock_irqrestore(&(l), (f))
+#define CN_LOCK_ASSERT(x)    mtx_assert(&x, MA_OWNED)
+#define CN_LOCK_DESTROY(x)   mtx_destroy(&x)
+
+
+static struct tty *xccons;
+
+struct xc_softc {
+    int    xc_unit;
+    struct cdev *xc_dev;
+};
+
+
+static d_open_t  xcopen;
+static d_close_t xcclose;
+static d_ioctl_t xcioctl;
+
+static struct cdevsw xc_cdevsw = {
+	/* version */   D_VERSION_00,
+        /* maj */       CDEV_MAJOR,
+        /* flags */     D_TTY | D_NEEDGIANT,
+        /* name */      driver_name,
+
+        /* open */      xcopen,
+	/* fdopen */    0,
+        /* close */     xcclose,
+        /* read */      ttyread,
+        /* write */     ttywrite,
+        /* ioctl */     xcioctl,
+        /* poll */      ttypoll,
+        /* mmap */      0,
+        /* strategy */  0,
+        /* dump */      0,
+        /* kqfilter */  ttykqfilter
+};
+
+static void
+xccnprobe(struct consdev *cp)
+{
+    cp->cn_pri = CN_REMOTE;
+    cp->cn_tp = xccons;
+    sprintf(cp->cn_name, "%s0", driver_name);
+}
+
+
+static void
+xccninit(struct consdev *cp)
+{ 
+    CN_LOCK_INIT(cn_mtx,"XCONS LOCK");
+
+}
+int
+xccngetc(struct consdev *dev)
+{
+	int c;
+	if (xc_mute)
+	    	return 0;
+	do {
+		if ((c = xccncheckc(dev)) == -1) {
+		       /* polling without sleeping in Xen doesn't work well. 
+			* Sleeping gives other things like clock a chance to 
+			* run
+			*/
+		       tsleep(&cn_mtx, PWAIT | PCATCH, "console sleep", 
+			      XC_POLLTIME);
+		}
+	} while( c == -1 );
+	return c;
+}
+
+int
+xccncheckc(struct consdev *dev)
+{
+    int ret = (xc_mute ? 0 : -1);
+    int flags;
+    CN_LOCK(cn_mtx, flags);
+    if ( (rp - rc) ){
+	/* we need to return only one char */
+	ret = (int)rbuf[RBUF_MASK(rc)];
+	rc++;
+    }
+    CN_UNLOCK(cn_mtx, flags);
+    return(ret);
+}
+
+static void
+xccnputc(struct consdev *dev, int c)
+{
+    	int flags;
+	CN_LOCK(cn_mtx, flags);
+	xcons_putc(c);
+	CN_UNLOCK(cn_mtx, flags);
+}
+
+static boolean_t
+xcons_putc(int c)
+{
+	int force_flush = xc_mute ||
+#ifdef DDB
+	    		 db_active ||
+#endif
+	    		  panicstr;	/* we're not gonna recover, so force
+					 * flush 
+					 */
+
+	if ( (wp-wc) < (WBUF_SIZE-1) ){
+		if ( (wbuf[WBUF_MASK(wp++)] = c) == '\n' ) {
+        		wbuf[WBUF_MASK(wp++)] = '\r';
+			if (force_flush)
+			    xcons_force_flush();
+		}
+	} else if (force_flush) {
+		xcons_force_flush();
+	    	
+	}
+	if (cnsl_evt_reg)
+		__xencons_tx_flush();
+
+	/* inform start path that we're pretty full */
+	return ((wp - wc) >= WBUF_SIZE - 100) ? TRUE : FALSE;
+}
+
+static void
+xc_identify(driver_t *driver, device_t parent)
+{
+    	device_t child;
+	child = BUS_ADD_CHILD(parent, 0, driver_name, 0);
+	device_set_driver(child, driver);
+	device_set_desc(child, "Xen Console");
+}
+
+static int
+xc_probe(device_t dev)
+{
+    	struct xc_softc *sc = (struct xc_softc *)device_get_softc(dev);
+
+	sc->xc_unit = device_get_unit(dev);
+	return (0);
+}
+
+static int
+xc_attach(device_t dev) 
+{
+    struct xc_softc *sc = (struct xc_softc *)device_get_softc(dev);
+
+    sc->xc_dev = make_dev(&xc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "xc%r", 0);
+    xccons = ttymalloc(NULL);
+
+    sc->xc_dev->si_drv1 = (void *)sc;
+    sc->xc_dev->si_tty = xccons;
+			     
+    xccons->t_oproc = xcstart;
+    xccons->t_param = xcparam;
+    xccons->t_stop = xcstop;
+    xccons->t_dev = sc->xc_dev;
+
+    callout_init(&xc_callout, 0);
+
+    /* Ensure that we don't attach before the event channel is able to receive
+     * a registration.  The XenBus code delays the probe/attach order until
+     * this has occurred.
+     */
+    (void)ctrl_if_register_receiver(CMSG_CONSOLE, xencons_rx, 0);
+    cnsl_evt_reg = 1;
+
+    callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, xccons);
+
+    /* register handler to flush console on shutdown */
+    if ((EVENTHANDLER_REGISTER(shutdown_post_sync, xc_shutdown,
+			       NULL, SHUTDOWN_PRI_DEFAULT)) == NULL)
+	printf("xencons: shutdown event registration failed!\n");
+
+    return (0);
+}
+
+/*
+ * return 0 for all console input, force flush all output.
+ */
+static void
+xc_shutdown(void *arg, int howto)
+{
+	xc_mute = 1;
+	xcons_force_flush();
+
+}
+
+static void 
+xencons_rx(ctrl_msg_t *msg,unsigned long id)
+{
+	int           i, flags;
+	struct tty *tp = xccons;
+
+	CN_LOCK(cn_mtx, flags);
+	for ( i = 0; i < msg->length; i++ ) {
+	    if (  xen_console_up )
+		    (*linesw[tp->t_line]->l_rint)(msg->msg[i], tp);
+	    else
+		    rbuf[RBUF_MASK(rp++)] = msg->msg[i];
+	}
+	CN_UNLOCK(cn_mtx, flags);
+	msg->length = 0;
+        ctrl_if_send_response(msg);
+}
+
+static void 
+__xencons_tx_flush(void)
+{
+    int        sz, work_done = 0;
+    ctrl_msg_t msg;
+
+    while ( wc != wp )
+    {
+	sz = wp - wc;
+	if ( sz > sizeof(msg.msg) )
+	    sz = sizeof(msg.msg);
+	if ( sz > (WBUF_SIZE - WBUF_MASK(wc)) )
+	    sz = WBUF_SIZE - WBUF_MASK(wc);
+
+	msg.type    = CMSG_CONSOLE;
+	msg.subtype = CMSG_CONSOLE_DATA;
+	msg.length  = sz;
+	memcpy(msg.msg, &wbuf[WBUF_MASK(wc)], sz);
+	
+	if ( ctrl_if_send_message_noblock(&msg, NULL, 0) == 0 ){
+	    wc += sz;
+	}
+	else if (xc_tx_task_queued) {
+	    /* avoid the extra enqueue check if we know we're already queued */
+	    break;
+	} else if (ctrl_if_enqueue_space_callback(&xencons_tx_flush_task)) {
+	    xc_tx_task_queued = TRUE;
+	    break;
+	}
+
+	work_done = 1;
+    }
+
+    if ( work_done && xen_console_up )
+	    ttwakeup(xccons);
+}
+static void 
+xencons_tx_flush_task_routine(void * data, int arg) 
+{
+    int flags;
+    CN_LOCK(cn_mtx, flags);
+    xc_tx_task_queued = FALSE;
+    __xencons_tx_flush();
+    CN_UNLOCK(cn_mtx, flags);
+}
+
+int
+xcopen(struct cdev *dev, int flag, int mode, struct thread *td)
+{
+    struct xc_softc *sc;
+    int unit = XCUNIT(dev);
+    struct tty *tp;
+    int s, error;
+
+    sc = (struct xc_softc *)device_get_softc(
+	    			devclass_get_device(xc_devclass, unit));
+    if (sc == NULL)
+	return (ENXIO);
+    
+    tp = dev->si_tty;
+    s = spltty();
+    if (!ISTTYOPEN(tp)) {
+	tp->t_state |= TS_CARR_ON;
+	ttychars(tp);
+	tp->t_iflag = TTYDEF_IFLAG;
+	tp->t_oflag = TTYDEF_OFLAG;
+	tp->t_cflag = TTYDEF_CFLAG|CLOCAL;
+	tp->t_lflag = TTYDEF_LFLAG;
+	tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED;
+	xcparam(tp, &tp->t_termios);
+	ttsetwater(tp);
+    } else if (tp->t_state & TS_XCLUDE && suser(td)) {
+	splx(s);
+	return (EBUSY);
+    }
+    splx(s);
+
+    xen_console_up = 1;
+
+    error =  (*linesw[tp->t_line]->l_open)(dev, tp);
+
+    return error;
+}
+
+int
+xcclose(struct cdev *dev, int flag, int mode, struct thread *td)
+{
+    struct tty *tp = dev->si_tty;
+    
+    if (tp == NULL)
+	return (0);
+    xen_console_up = 0;
+    
+    spltty();
+    (*linesw[tp->t_line]->l_close)(tp, flag);
+    tty_close(tp);
+    spl0();
+    return (0);
+}
+
+
+int
+xcioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
+{
+    struct tty *tp = dev->si_tty;
+    int error;
+    
+    error = (*linesw[tp->t_line]->l_ioctl)(tp, cmd, data, flag, td);
+    if (error != ENOIOCTL)
+	    return (error);
+    error = ttioctl(tp, cmd, data, flag);
+    if (error != ENOIOCTL)
+	    return (error);
+    return (ENOTTY);
+}
+
+static inline int 
+__xencons_put_char(int ch)
+{
+    char _ch = (char)ch;
+    if ( (wp - wc) == WBUF_SIZE )
+       return 0;
+    wbuf[WBUF_MASK(wp++)] = _ch;
+    return 1;
+}
+
+
+static void
+xcstart(struct tty *tp)
+{
+    	int flags;
+	int s;
+	boolean_t cons_full = FALSE;
+
+	s = spltty();
+	CN_LOCK(cn_mtx, flags);
+	if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) {
+		ttwwakeup(tp);
+		CN_UNLOCK(cn_mtx, flags);
+		return;
+	}
+
+	tp->t_state |= TS_BUSY;
+	while (tp->t_outq.c_cc != 0 && !cons_full)
+		cons_full = xcons_putc(getc(&tp->t_outq));
+
+	/* if the console is close to full leave our state as busy */
+	if (!cons_full) {
+		tp->t_state &= ~TS_BUSY;
+		ttwwakeup(tp);
+	} else {
+	    	/* let the timeout kick us in a bit */
+	    	xc_start_needed = TRUE;
+	}
+	CN_UNLOCK(cn_mtx, flags);
+	splx(s);
+}
+
+static void
+xcstop(struct tty *tp, int flag)
+{
+
+	if (tp->t_state & TS_BUSY) {
+		if ((tp->t_state & TS_TTSTOP) == 0) {
+			tp->t_state |= TS_FLUSH;
+		}
+	}
+}
+
+static void
+xc_timeout(void *v)
+{
+	struct	tty *tp;
+	int 	c;
+
+	tp = (struct tty *)v;
+
+	while ((c = xccncheckc(NULL)) != -1) {
+		if (tp->t_state & TS_ISOPEN) {
+			(*linesw[tp->t_line]->l_rint)(c, tp);
+		}
+	}
+
+	if (xc_start_needed) {
+	    	xc_start_needed = FALSE;
+		xcstart(tp);
+	}
+
+	callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, tp);
+}
+
+/*
+ * Set line parameters.
+ */
+int
+xcparam(struct tty *tp, struct termios *t)
+{
+	tp->t_ispeed = t->c_ispeed;
+	tp->t_ospeed = t->c_ospeed;
+	tp->t_cflag = t->c_cflag;
+	return (0);
+}
+
+
+static device_method_t xc_methods[] = {
+    DEVMETHOD(device_identify, xc_identify),
+    DEVMETHOD(device_probe, xc_probe),
+    DEVMETHOD(device_attach, xc_attach),
+    {0, 0}
+};
+
+static driver_t xc_driver = {
+    driver_name,
+    xc_methods,
+    sizeof(struct xc_softc),
+};
+
+/*** Forcibly flush console data before dying. ***/
+void 
+xcons_force_flush(void)
+{
+    ctrl_msg_t msg;
+    int        sz;
+
+    /*
+     * We use dangerous control-interface functions that require a quiescent
+     * system and no interrupts. Try to ensure this with a global cli().
+     */
+    cli();
+
+    /* Spin until console data is flushed through to the domain controller. */
+    while ( (wc != wp) && !ctrl_if_transmitter_empty() )
+    {
+        /* Interrupts are disabled -- we must manually reap responses. */
+        ctrl_if_discard_responses();
+
+        if ( (sz = wp - wc) == 0 )
+            continue;
+        if ( sz > sizeof(msg.msg) )
+            sz = sizeof(msg.msg);
+        if ( sz > (WBUF_SIZE - WBUF_MASK(wc)) )
+            sz = WBUF_SIZE - WBUF_MASK(wc);
+
+        msg.type    = CMSG_CONSOLE;
+        msg.subtype = CMSG_CONSOLE_DATA;
+        msg.length  = sz;
+        memcpy(msg.msg, &wbuf[WBUF_MASK(wc)], sz);
+            
+        if ( ctrl_if_send_message_noblock(&msg, NULL, 0) == 0 )
+            wc += sz;
+    }
+}
+
+DRIVER_MODULE(xc, xenbus, xc_driver, xc_devclass, 0, 0);
diff --git a/freebsd-5.3-xen-sparse/i386-xen/xen/misc/evtchn_dev.c b/freebsd-5.3-xen-sparse/i386-xen/xen/misc/evtchn_dev.c
new file mode 100644
index 0000000000..de379b6bf9
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/xen/misc/evtchn_dev.c
@@ -0,0 +1,410 @@
+/******************************************************************************
+ * evtchn.c
+ * 
+ * Xenolinux driver for receiving and demuxing event-channel signals.
+ * 
+ * Copyright (c) 2004, K A Fraser
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/uio.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/selinfo.h>
+#include <sys/poll.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
+#include <sys/ioccom.h>
+
+#include <machine/cpufunc.h>
+#include <machine/intr_machdep.h>
+#include <machine/xen-os.h>
+#include <machine/xen_intr.h>
+#include <machine/bus.h>
+#include <sys/rman.h>
+#include <machine/resource.h>
+#include <machine/synch_bitops.h>
+
+#include <machine/hypervisor.h>
+
+
+typedef struct evtchn_sotfc {
+
+    struct selinfo  ev_rsel;
+} evtchn_softc_t;
+
+
+#ifdef linuxcrap
+/* NB. This must be shared amongst drivers if more things go in /dev/xen */
+static devfs_handle_t xen_dev_dir;
+#endif
+
+/* Only one process may open /dev/xen/evtchn at any time. */
+static unsigned long evtchn_dev_inuse;
+
+/* Notification ring, accessed via /dev/xen/evtchn. */
+#define RING_SIZE     2048  /* 2048 16-bit entries */
+#define RING_MASK(_i) ((_i)&(RING_SIZE-1))
+static uint16_t *ring;
+static unsigned int ring_cons, ring_prod, ring_overflow;
+
+/* Which ports is user-space bound to? */
+static uint32_t bound_ports[32];
+
+/* Unique address for processes to sleep on */
+static void *evtchn_waddr = &ring;
+
+static struct mtx lock, upcall_lock;
+
+static d_read_t      evtchn_read;
+static d_write_t     evtchn_write;
+static d_ioctl_t     evtchn_ioctl;
+static d_poll_t      evtchn_poll;
+static d_open_t      evtchn_open;
+static d_close_t     evtchn_close;
+
+
+void 
+evtchn_device_upcall(int port)
+{
+    mtx_lock(&upcall_lock);
+
+    mask_evtchn(port);
+    clear_evtchn(port);
+
+    if ( ring != NULL ) {
+        if ( (ring_prod - ring_cons) < RING_SIZE ) {
+            ring[RING_MASK(ring_prod)] = (uint16_t)port;
+            if ( ring_cons == ring_prod++ ) {
+		wakeup(evtchn_waddr);
+            }
+        }
+        else {
+            ring_overflow = 1;
+        }
+    }
+
+    mtx_unlock(&upcall_lock);
+}
+
+static void 
+__evtchn_reset_buffer_ring(void)
+{
+    /* Initialise the ring to empty. Clear errors. */
+    ring_cons = ring_prod = ring_overflow = 0;
+}
+
+static int
+evtchn_read(struct cdev *dev, struct uio *uio, int ioflag)
+{
+    int rc;
+    unsigned int count, c, p, sst = 0, bytes1 = 0, bytes2 = 0;
+    count = uio->uio_resid;
+    
+    count &= ~1; /* even number of bytes */
+
+    if ( count == 0 )
+    {
+        rc = 0;
+        goto out;
+    }
+
+    if ( count > PAGE_SIZE )
+        count = PAGE_SIZE;
+
+    for ( ; ; ) {
+        if ( (c = ring_cons) != (p = ring_prod) )
+            break;
+
+        if ( ring_overflow ) {
+            rc = EFBIG;
+            goto out;
+        }
+
+        if (sst != 0) {
+            rc = EINTR;
+            goto out;
+        }
+
+	/* PCATCH == check for signals before and after sleeping 
+	 * PWAIT == priority of waiting on resource 
+	 */
+        sst = tsleep(evtchn_waddr, PWAIT|PCATCH, "evchwt", 10);
+    }
+
+    /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
+    if ( ((c ^ p) & RING_SIZE) != 0 ) {
+        bytes1 = (RING_SIZE - RING_MASK(c)) * sizeof(uint16_t);
+        bytes2 = RING_MASK(p) * sizeof(uint16_t);
+    }
+    else {
+        bytes1 = (p - c) * sizeof(uint16_t);
+        bytes2 = 0;
+    }
+
+    /* Truncate chunks according to caller's maximum byte count. */
+    if ( bytes1 > count ) {
+        bytes1 = count;
+        bytes2 = 0;
+    }
+    else if ( (bytes1 + bytes2) > count ) {
+        bytes2 = count - bytes1;
+    }
+    
+    if ( uiomove(&ring[RING_MASK(c)], bytes1, uio) ||
+         ((bytes2 != 0) && uiomove(&ring[0], bytes2, uio)))
+	  /* keeping this around as its replacement is not equivalent 
+	   * copyout(&ring[0], &buf[bytes1], bytes2) 
+	   */
+    {
+        rc = EFAULT;
+        goto out;
+    }
+
+    ring_cons += (bytes1 + bytes2) / sizeof(uint16_t);
+
+    rc = bytes1 + bytes2;
+
+ out:
+    
+    return rc;
+}
+
+static int 
+evtchn_write(struct cdev *dev, struct uio *uio, int ioflag)
+{
+    int  rc, i, count;
+    
+    count = uio->uio_resid;
+    
+    uint16_t *kbuf = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
+
+
+    if ( kbuf == NULL )
+        return ENOMEM;
+
+    count &= ~1; /* even number of bytes */
+
+    if ( count == 0 ) {
+        rc = 0;
+        goto out;
+    }
+
+    if ( count > PAGE_SIZE )
+        count = PAGE_SIZE;
+
+    if ( uiomove(kbuf, count, uio) != 0 ) {
+        rc = EFAULT;
+        goto out;
+    }
+
+    mtx_lock_spin(&lock);
+    for ( i = 0; i < (count/2); i++ )
+        if ( test_bit(kbuf[i], &bound_ports[0]) )
+            unmask_evtchn(kbuf[i]);
+    mtx_unlock_spin(&lock);
+
+    rc = count;
+
+ out:
+    free(kbuf, M_DEVBUF);
+    return rc;
+}
+
+static int 
+evtchn_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg, 
+			int mode, struct thread *td __unused)
+{
+    int rc = 0;
+    
+    mtx_lock_spin(&lock);
+    
+    switch ( cmd )
+    {
+    case EVTCHN_RESET:
+        __evtchn_reset_buffer_ring();
+        break;
+    case EVTCHN_BIND:
+        if ( !synch_test_and_set_bit((int)arg, &bound_ports[0]) )
+            unmask_evtchn((int)arg);
+        else
+            rc = EINVAL;
+        break;
+    case EVTCHN_UNBIND:
+        if ( synch_test_and_clear_bit((int)arg, &bound_ports[0]) )
+            mask_evtchn((int)arg);
+        else
+            rc = EINVAL;
+        break;
+    default:
+        rc = ENOSYS;
+        break;
+    }
+
+    mtx_unlock_spin(&lock);   
+
+    return rc;
+}
+
+static int
+evtchn_poll(struct cdev *dev, int poll_events, struct thread *td)
+{
+
+    evtchn_softc_t *sc;
+    unsigned int mask = POLLOUT | POLLWRNORM;
+    
+    sc = dev->si_drv1;
+    
+    if ( ring_cons != ring_prod )
+        mask |= POLLIN | POLLRDNORM;
+    else if ( ring_overflow )
+        mask = POLLERR;
+    else
+	selrecord(td, &sc->ev_rsel);
+
+
+    return mask;
+}
+
+
+static int 
+evtchn_open(struct cdev *dev, int flag, int otyp, struct thread *td)
+{
+    uint16_t *_ring;
+    
+    if (flag & O_NONBLOCK)
+	return EBUSY;
+
+    if ( synch_test_and_set_bit(0, &evtchn_dev_inuse) )
+        return EBUSY;
+
+    if ( (_ring = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK)) == NULL )
+        return ENOMEM;
+
+    mtx_lock_spin(&lock);
+    ring = _ring;
+    __evtchn_reset_buffer_ring();
+    mtx_unlock_spin(&lock);
+
+
+    return 0;
+}
+
+static int 
+evtchn_close(struct cdev *dev, int flag, int otyp, struct thread *td __unused)
+{
+    int i;
+
+    mtx_lock_spin(&lock);
+    if (ring != NULL) {
+        free(ring, M_DEVBUF);
+        ring = NULL;
+    }
+    for ( i = 0; i < NR_EVENT_CHANNELS; i++ )
+        if ( synch_test_and_clear_bit(i, &bound_ports[0]) )
+            mask_evtchn(i);
+    mtx_unlock_spin(&lock);
+
+    evtchn_dev_inuse = 0;
+
+    return 0;
+}
+
+
+
+/* XXX wild assed guess as to a safe major number */
+#define EVTCHN_MAJOR   140
+
+static struct cdevsw evtchn_devsw = {
+    d_version:   D_VERSION_00,
+    d_open:      evtchn_open,
+    d_close:     evtchn_close,
+    d_read:      evtchn_read,
+    d_write:     evtchn_write,
+    d_ioctl:     evtchn_ioctl,
+    d_poll:      evtchn_poll,
+    d_name:      "evtchn",
+    d_maj:       EVTCHN_MAJOR,
+    d_flags:     0,
+};
+
+
+/* XXX  - if this device is ever supposed to support use by more than one process
+ * this global static will have to go away
+ */
+static struct cdev *evtchn_dev;
+
+
+
+static int 
+evtchn_init(void *dummy __unused)
+{
+    /* XXX I believe we don't need these leaving them here for now until we 
+     * have some semblance of it working 
+     */
+#if 0
+    devfs_handle_t symlink_handle;
+    int            err, pos;
+    char           link_dest[64];
+#endif
+    mtx_init(&upcall_lock, "evtchup", NULL, MTX_DEF);
+
+    /* (DEVFS) create '/dev/misc/evtchn'. */
+    evtchn_dev = make_dev(&evtchn_devsw, 0, UID_ROOT, GID_WHEEL, 0600, "xen/evtchn");
+
+    mtx_init(&lock, "evch", NULL, MTX_SPIN | MTX_NOWITNESS);
+
+    evtchn_dev->si_drv1 = malloc(sizeof(evtchn_softc_t), M_DEVBUF, M_WAITOK);
+    bzero(evtchn_dev->si_drv1, sizeof(evtchn_softc_t));
+
+    /* XXX I don't think we need any of this rubbish */
+#if 0
+    if ( err != 0 )
+    {
+        printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
+        return err;
+    }
+
+    /* (DEVFS) create directory '/dev/xen'. */
+    xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL);
+
+    /* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */
+    pos = devfs_generate_path(evtchn_miscdev.devfs_handle, 
+                              &link_dest[3], 
+                              sizeof(link_dest) - 3);
+    if ( pos >= 0 )
+        strncpy(&link_dest[pos], "../", 3);
+    /* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */
+    (void)devfs_mk_symlink(xen_dev_dir, 
+                           "evtchn", 
+                           DEVFS_FL_DEFAULT, 
+                           &link_dest[pos],
+                           &symlink_handle, 
+                           NULL);
+
+    /* (DEVFS) automatically destroy the symlink with its destination. */
+    devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle);
+#endif
+    printk("Event-channel device installed.\n");
+
+    return 0;
+}
+
+
+SYSINIT(evtchn_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, evtchn_init, NULL);
+
+
+#if 0
+
+static void cleanup_module(void)
+{
+    destroy_dev(evtchn_dev);
+;
+}
+
+module_init(init_module);
+module_exit(cleanup_module);
+#endif
diff --git a/freebsd-5.3-xen-sparse/i386-xen/xen/misc/npx.c b/freebsd-5.3-xen-sparse/i386-xen/xen/misc/npx.c
new file mode 100644
index 0000000000..604aec78c1
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/xen/misc/npx.c
@@ -0,0 +1,1109 @@
+/*-
+ * Copyright (c) 1990 William Jolitz.
+ * Copyright (c) 1991 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)npx.c	7.2 (Berkeley) 5/12/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/isa/npx.c,v 1.144 2003/11/03 21:53:38 jhb Exp $");
+
+#include "opt_cpu.h"
+#include "opt_debug_npx.h"
+#include "opt_isa.h"
+#include "opt_npx.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <machine/bus.h>
+#include <sys/rman.h>
+#ifdef NPX_DEBUG
+#include <sys/syslog.h>
+#endif
+#include <sys/signalvar.h>
+#include <sys/user.h>
+
+#include <machine/asmacros.h>
+#include <machine/cputypes.h>
+#include <machine/frame.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/psl.h>
+#include <machine/clock.h>
+#include <machine/resource.h>
+#include <machine/specialreg.h>
+#include <machine/segments.h>
+#include <machine/ucontext.h>
+
+#include <machine/multicall.h>
+
+#include <i386/isa/icu.h>
+#ifdef PC98
+#include <pc98/pc98/pc98.h>
+#else
+#include <i386/isa/isa.h>
+#endif
+#include <machine/intr_machdep.h>
+#ifdef DEV_ISA
+#include <isa/isavar.h>
+#endif
+
+#if !defined(CPU_ENABLE_SSE) && defined(I686_CPU)
+#define CPU_ENABLE_SSE
+#endif
+#if defined(CPU_DISABLE_SSE)
+#undef CPU_ENABLE_SSE
+#endif
+
+/*
+ * 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
+ */
+
+/* Configuration flags. */
+#define	NPX_DISABLE_I586_OPTIMIZED_BCOPY	(1 << 0)
+#define	NPX_DISABLE_I586_OPTIMIZED_BZERO	(1 << 1)
+#define	NPX_DISABLE_I586_OPTIMIZED_COPYIO	(1 << 2)
+
+#if defined(__GNUC__) && !defined(lint)
+
+#define	fldcw(addr)		__asm("fldcw %0" : : "m" (*(addr)))
+#define	fnclex()		__asm("fnclex")
+#define	fninit()		__asm("fninit")
+#define	fnsave(addr)		__asm __volatile("fnsave %0" : "=m" (*(addr)))
+#define	fnstcw(addr)		__asm __volatile("fnstcw %0" : "=m" (*(addr)))
+#define	fnstsw(addr)		__asm __volatile("fnstsw %0" : "=m" (*(addr)))
+#define	fp_divide_by_0()	__asm("fldz; fld1; fdiv %st,%st(1); fnop")
+#define	frstor(addr)		__asm("frstor %0" : : "m" (*(addr)))
+#ifdef CPU_ENABLE_SSE
+#define	fxrstor(addr)		__asm("fxrstor %0" : : "m" (*(addr)))
+#define	fxsave(addr)		__asm __volatile("fxsave %0" : "=m" (*(addr)))
+#endif
+#define	start_emulating()	__asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
+				      : : "n" (CR0_TS) : "ax")
+#define	stop_emulating()	__asm("clts")
+
+#else	/* not __GNUC__ */
+
+void	fldcw(caddr_t addr);
+void	fnclex(void);
+void	fninit(void);
+void	fnsave(caddr_t addr);
+void	fnstcw(caddr_t addr);
+void	fnstsw(caddr_t addr);
+void	fp_divide_by_0(void);
+void	frstor(caddr_t addr);
+#ifdef CPU_ENABLE_SSE
+void	fxsave(caddr_t addr);
+void	fxrstor(caddr_t addr);
+#endif
+void	start_emulating(void);
+void	stop_emulating(void);
+
+#endif	/* __GNUC__ */
+
+#ifdef CPU_ENABLE_SSE
+#define GET_FPU_CW(thread) \
+	(cpu_fxsr ? \
+		(thread)->td_pcb->pcb_save.sv_xmm.sv_env.en_cw : \
+		(thread)->td_pcb->pcb_save.sv_87.sv_env.en_cw)
+#define GET_FPU_SW(thread) \
+	(cpu_fxsr ? \
+		(thread)->td_pcb->pcb_save.sv_xmm.sv_env.en_sw : \
+		(thread)->td_pcb->pcb_save.sv_87.sv_env.en_sw)
+#else /* CPU_ENABLE_SSE */
+#define GET_FPU_CW(thread) \
+	(thread->td_pcb->pcb_save.sv_87.sv_env.en_cw)
+#define GET_FPU_SW(thread) \
+	(thread->td_pcb->pcb_save.sv_87.sv_env.en_sw)
+#endif /* CPU_ENABLE_SSE */
+
+typedef u_char bool_t;
+
+static	void	fpusave(union savefpu *);
+static	void	fpurstor(union savefpu *);
+static	int	npx_attach(device_t dev);
+static	void	npx_identify(driver_t *driver, device_t parent);
+#if 0
+static	void	npx_intr(void *);
+#endif
+static	int	npx_probe(device_t dev);
+#ifdef I586_CPU_XXX
+static	long	timezero(const char *funcname,
+		    void (*func)(void *buf, size_t len));
+#endif /* I586_CPU */
+
+int	hw_float;		/* XXX currently just alias for npx_exists */
+
+SYSCTL_INT(_hw,HW_FLOATINGPT, floatingpoint,
+	CTLFLAG_RD, &hw_float, 0, 
+	"Floatingpoint instructions executed in hardware");
+#if 0
+static	volatile u_int		npx_intrs_while_probing;
+#endif
+static	union savefpu		npx_cleanstate;
+static	bool_t			npx_cleanstate_ready;
+static	bool_t			npx_ex16;
+static	bool_t			npx_exists;
+static	bool_t			npx_irq13;
+
+alias_for_inthand_t probetrap;
+#if 0
+__asm("								\n\
+	.text							\n\
+	.p2align 2,0x90						\n\
+	.type	" __XSTRING(CNAME(probetrap)) ",@function	\n\
+" __XSTRING(CNAME(probetrap)) ":				\n\
+	ss							\n\
+	incl	" __XSTRING(CNAME(npx_traps_while_probing)) "	\n\
+	fnclex							\n\
+	iret							\n\
+");
+#endif
+/*
+ * Identify routine.  Create a connection point on our parent for probing.
+ */
+static void
+npx_identify(driver, parent)
+	driver_t *driver;
+	device_t parent;
+{
+	device_t child;
+
+	child = BUS_ADD_CHILD(parent, 0, "npx", 0);
+	if (child == NULL)
+		panic("npx_identify");
+}
+#if 0
+/*
+ * Do minimal handling of npx interrupts to convert them to traps.
+ */
+static void
+npx_intr(dummy)
+	void *dummy;
+{
+	struct thread *td;
+
+	npx_intrs_while_probing++;
+
+	/*
+	 * The BUSY# latch must be cleared in all cases so that the next
+	 * unmasked npx exception causes an interrupt.
+	 */
+#ifdef PC98
+	outb(0xf8, 0);
+#else
+	outb(0xf0, 0);
+#endif
+
+	/*
+	 * fpcurthread is normally non-null here.  In that case, schedule an
+	 * AST to finish the exception handling in the correct context
+	 * (this interrupt may occur after the thread has entered the
+	 * kernel via a syscall or an interrupt).  Otherwise, the npx
+	 * state of the thread that caused this interrupt must have been
+	 * pushed to the thread's pcb, and clearing of the busy latch
+	 * above has finished the (essentially null) handling of this
+	 * interrupt.  Control will eventually return to the instruction
+	 * that caused it and it will repeat.  We will eventually (usually
+	 * soon) win the race to handle the interrupt properly.
+	 */
+	td = PCPU_GET(fpcurthread);
+	if (td != NULL) {
+		td->td_pcb->pcb_flags |= PCB_NPXTRAP;
+		mtx_lock_spin(&sched_lock);
+		td->td_flags |= TDF_ASTPENDING;
+		mtx_unlock_spin(&sched_lock);
+	}
+}
+#endif
+
+static int
+npx_probe(device_t dev)
+{
+
+    return 1;
+}
+
+#if 0
+/*
+ * Probe routine.  Initialize cr0 to give correct behaviour for [f]wait
+ * whether the device exists or not (XXX should be elsewhere).  Set flags
+ * to tell npxattach() what to do.  Modify device struct if npx doesn't
+ * need to use interrupts.  Return 0 if device exists.
+ */
+static int
+npx_probe(device_t dev)
+{
+	struct gate_descriptor save_idt_npxtrap;
+	struct resource *ioport_res, *irq_res;
+	void *irq_cookie;
+	int ioport_rid, irq_num, irq_rid;
+	u_short control;
+	u_short status;
+
+	save_idt_npxtrap = idt[IDT_MF];
+	setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL,
+	    GSEL(GCODE_SEL, SEL_KPL));
+	ioport_rid = 0;
+	ioport_res = bus_alloc_resource(dev, SYS_RES_IOPORT, &ioport_rid,
+	    IO_NPX, IO_NPX, IO_NPXSIZE, RF_ACTIVE);
+	if (ioport_res == NULL)
+		panic("npx: can't get ports");
+#ifdef PC98
+	if (resource_int_value("npx", 0, "irq", &irq_num) != 0)
+		irq_num = 8;
+#else
+	if (resource_int_value("npx", 0, "irq", &irq_num) != 0)
+		irq_num = 13;
+#endif
+	irq_rid = 0;
+	irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &irq_rid, irq_num,
+	    irq_num, 1, RF_ACTIVE);
+	if (irq_res == NULL)
+		panic("npx: can't get IRQ");
+	if (bus_setup_intr(dev, irq_res, INTR_TYPE_MISC | INTR_FAST, npx_intr,
+	    NULL, &irq_cookie) != 0)
+		panic("npx: can't create intr");
+
+	/*
+	 * Partially reset the coprocessor, if any.  Some BIOS's don't reset
+	 * it after a warm boot.
+	 */
+#ifdef PC98
+	outb(0xf8,0);
+#else
+	outb(0xf1, 0);		/* full reset on some systems, NOP on others */
+	outb(0xf0, 0);		/* clear BUSY# latch */
+#endif
+	/*
+	 * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT
+	 * instructions.  We must set the CR0_MP bit and use the CR0_TS
+	 * bit to control the trap, because setting the CR0_EM bit does
+	 * not cause WAIT instructions to trap.  It's important to trap
+	 * WAIT instructions - otherwise the "wait" variants of no-wait
+	 * control instructions would degenerate to the "no-wait" variants
+	 * after FP context switches but work correctly otherwise.  It's
+	 * particularly important to trap WAITs when there is no NPX -
+	 * otherwise the "wait" variants would always degenerate.
+	 *
+	 * Try setting CR0_NE to get correct error reporting on 486DX's.
+	 * Setting it should fail or do nothing on lesser processors.
+	 */
+	load_cr0(rcr0() | CR0_MP | CR0_NE);
+	/*
+	 * But don't trap while we're probing.
+	 */
+	stop_emulating();
+	/*
+	 * Finish resetting the coprocessor, if any.  If there is an error
+	 * pending, then we may get a bogus IRQ13, but npx_intr() will handle
+	 * it OK.  Bogus halts have never been observed, but we enabled
+	 * IRQ13 and cleared the BUSY# latch early to handle them anyway.
+	 */
+	fninit();
+
+	device_set_desc(dev, "math processor");
+
+	/*
+	 * Don't use fwait here because it might hang.
+	 * Don't use fnop here because it usually hangs if there is no FPU.
+	 */
+	DELAY(1000);		/* wait for any IRQ13 */
+#ifdef DIAGNOSTIC
+	if (npx_intrs_while_probing != 0)
+		printf("fninit caused %u bogus npx interrupt(s)\n",
+		       npx_intrs_while_probing);
+	if (npx_traps_while_probing != 0)
+		printf("fninit caused %u bogus npx trap(s)\n",
+		       npx_traps_while_probing);
+#endif
+	/*
+	 * Check for a status of mostly zero.
+	 */
+	status = 0x5a5a;
+	fnstsw(&status);
+	if ((status & 0xb8ff) == 0) {
+		/*
+		 * Good, now check for a proper control word.
+		 */
+		control = 0x5a5a;
+		fnstcw(&control);
+		if ((control & 0x1f3f) == 0x033f) {
+			hw_float = npx_exists = 1;
+			/*
+			 * We have an npx, now divide by 0 to see if exception
+			 * 16 works.
+			 */
+			control &= ~(1 << 2);	/* enable divide by 0 trap */
+			fldcw(&control);
+#ifdef FPU_ERROR_BROKEN
+			/*
+			 * FPU error signal doesn't work on some CPU
+			 * accelerator board.
+			 */
+			npx_ex16 = 1;
+			return (0);
+#endif
+			npx_traps_while_probing = npx_intrs_while_probing = 0;
+			fp_divide_by_0();
+			if (npx_traps_while_probing != 0) {
+				/*
+				 * Good, exception 16 works.
+				 */
+				npx_ex16 = 1;
+				goto no_irq13;
+			}
+			if (npx_intrs_while_probing != 0) {
+				/*
+				 * Bad, we are stuck with IRQ13.
+				 */
+				npx_irq13 = 1;
+				idt[IDT_MF] = save_idt_npxtrap;
+#ifdef SMP
+				if (mp_ncpus > 1)
+					panic("npx0 cannot use IRQ 13 on an SMP system");
+#endif
+				return (0);
+			}
+			/*
+			 * Worse, even IRQ13 is broken.  Use emulator.
+			 */
+		}
+	}
+	/*
+	 * Probe failed, but we want to get to npxattach to initialize the
+	 * emulator and say that it has been installed.  XXX handle devices
+	 * that aren't really devices better.
+	 */
+#ifdef SMP
+	if (mp_ncpus > 1)
+		panic("npx0 cannot be emulated on an SMP system");
+#endif
+	/* FALLTHROUGH */
+no_irq13:
+	idt[IDT_MF] = save_idt_npxtrap;
+	bus_teardown_intr(dev, irq_res, irq_cookie);
+
+	/*
+	 * XXX hack around brokenness of bus_teardown_intr().  If we left the
+	 * irq active then we would get it instead of exception 16.
+	 */
+	{
+		struct intsrc *isrc;
+
+		isrc = intr_lookup_source(irq_num);
+		isrc->is_pic->pic_disable_source(isrc);
+	}
+
+	bus_release_resource(dev, SYS_RES_IRQ, irq_rid, irq_res);
+	bus_release_resource(dev, SYS_RES_IOPORT, ioport_rid, ioport_res);
+	return (0);
+}
+#endif
+
+/*
+ * Attach routine - announce which it is, and wire into system
+ */
+static int
+npx_attach(device_t dev)
+{
+	int flags;
+	register_t s;
+
+	if (resource_int_value("npx", 0, "flags", &flags) != 0)
+		flags = 0;
+
+	if (flags)
+		device_printf(dev, "flags 0x%x ", flags);
+	if (npx_irq13) {
+		device_printf(dev, "using IRQ 13 interface\n");
+	} else {
+		if (npx_ex16)
+			device_printf(dev, "INT 16 interface\n");
+		else
+			device_printf(dev, "WARNING: no FPU!\n");
+	}
+	npxinit(__INITIAL_NPXCW__);
+
+	if (npx_cleanstate_ready == 0) {
+		s = intr_disable();
+		stop_emulating();
+		fpusave(&npx_cleanstate);
+		start_emulating();
+		npx_cleanstate_ready = 1;
+		intr_restore(s);
+	}
+#ifdef I586_CPU_XXX
+	if (cpu_class == CPUCLASS_586 && npx_ex16 && npx_exists &&
+	    timezero("i586_bzero()", i586_bzero) <
+	    timezero("bzero()", bzero) * 4 / 5) {
+		if (!(flags & NPX_DISABLE_I586_OPTIMIZED_BCOPY))
+			bcopy_vector = i586_bcopy;
+		if (!(flags & NPX_DISABLE_I586_OPTIMIZED_BZERO))
+			bzero_vector = i586_bzero;
+		if (!(flags & NPX_DISABLE_I586_OPTIMIZED_COPYIO)) {
+			copyin_vector = i586_copyin;
+			copyout_vector = i586_copyout;
+		}
+	}
+#endif
+
+	return (0);		/* XXX unused */
+}
+
+/*
+ * Initialize floating point unit.
+ */
+void
+npxinit(control)
+	u_short control;
+{
+	static union savefpu dummy;
+	register_t savecrit;
+
+	if (!npx_exists)
+		return;
+	/*
+	 * fninit has the same h/w bugs as fnsave.  Use the detoxified
+	 * fnsave to throw away any junk in the fpu.  npxsave() initializes
+	 * the fpu and sets fpcurthread = NULL as important side effects.
+	 */
+	savecrit = intr_disable();
+	npxsave(&dummy);
+	stop_emulating();
+#ifdef CPU_ENABLE_SSE
+	/* XXX npxsave() doesn't actually initialize the fpu in the SSE case. */
+	if (cpu_fxsr)
+		fninit();
+#endif
+	fldcw(&control);
+	start_emulating();
+	intr_restore(savecrit);
+}
+
+/*
+ * Free coprocessor (if we have it).
+ */
+void
+npxexit(td)
+	struct thread *td;
+{
+	register_t savecrit;
+
+	savecrit = intr_disable();
+	if (curthread == PCPU_GET(fpcurthread))
+		npxsave(&PCPU_GET(curpcb)->pcb_save);
+	intr_restore(savecrit);
+#ifdef NPX_DEBUG
+	if (npx_exists) {
+		u_int	masked_exceptions;
+
+		masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f;
+		/*
+		 * Log exceptions that would have trapped with the old
+		 * control word (overflow, divide by 0, and invalid operand).
+		 */
+		if (masked_exceptions & 0x0d)
+			log(LOG_ERR,
+	"pid %d (%s) exited with masked floating point exceptions 0x%02x\n",
+			    td->td_proc->p_pid, td->td_proc->p_comm,
+			    masked_exceptions);
+	}
+#endif
+}
+
+int
+npxformat()
+{
+
+	if (!npx_exists)
+		return (_MC_FPFMT_NODEV);
+#ifdef	CPU_ENABLE_SSE
+	if (cpu_fxsr)
+		return (_MC_FPFMT_XMM);
+#endif
+	return (_MC_FPFMT_387);
+}
+
+/* 
+ * The following mechanism is used to ensure that the FPE_... value
+ * that is passed as a trapcode to the signal handler of the user
+ * process does not have more than one bit set.
+ * 
+ * Multiple bits may be set if the user process modifies the control
+ * word while a status word bit is already set.  While this is a sign
+ * of bad coding, we have no choise than to narrow them down to one
+ * bit, since we must not send a trapcode that is not exactly one of
+ * the FPE_ macros.
+ *
+ * The mechanism has a static table with 127 entries.  Each combination
+ * of the 7 FPU status word exception bits directly translates to a
+ * position in this table, where a single FPE_... value is stored.
+ * This FPE_... value stored there is considered the "most important"
+ * of the exception bits and will be sent as the signal code.  The
+ * precedence of the bits is based upon Intel Document "Numerical
+ * Applications", Chapter "Special Computational Situations".
+ *
+ * The macro to choose one of these values does these steps: 1) Throw
+ * away status word bits that cannot be masked.  2) Throw away the bits
+ * currently masked in the control word, assuming the user isn't
+ * interested in them anymore.  3) Reinsert status word bit 7 (stack
+ * fault) if it is set, which cannot be masked but must be presered.
+ * 4) Use the remaining bits to point into the trapcode table.
+ *
+ * The 6 maskable bits in order of their preference, as stated in the
+ * above referenced Intel manual:
+ * 1  Invalid operation (FP_X_INV)
+ * 1a   Stack underflow
+ * 1b   Stack overflow
+ * 1c   Operand of unsupported format
+ * 1d   SNaN operand.
+ * 2  QNaN operand (not an exception, irrelavant here)
+ * 3  Any other invalid-operation not mentioned above or zero divide
+ *      (FP_X_INV, FP_X_DZ)
+ * 4  Denormal operand (FP_X_DNML)
+ * 5  Numeric over/underflow (FP_X_OFL, FP_X_UFL)
+ * 6  Inexact result (FP_X_IMP) 
+ */
+static char fpetable[128] = {
+	0,
+	FPE_FLTINV,	/*  1 - INV */
+	FPE_FLTUND,	/*  2 - DNML */
+	FPE_FLTINV,	/*  3 - INV | DNML */
+	FPE_FLTDIV,	/*  4 - DZ */
+	FPE_FLTINV,	/*  5 - INV | DZ */
+	FPE_FLTDIV,	/*  6 - DNML | DZ */
+	FPE_FLTINV,	/*  7 - INV | DNML | DZ */
+	FPE_FLTOVF,	/*  8 - OFL */
+	FPE_FLTINV,	/*  9 - INV | OFL */
+	FPE_FLTUND,	/*  A - DNML | OFL */
+	FPE_FLTINV,	/*  B - INV | DNML | OFL */
+	FPE_FLTDIV,	/*  C - DZ | OFL */
+	FPE_FLTINV,	/*  D - INV | DZ | OFL */
+	FPE_FLTDIV,	/*  E - DNML | DZ | OFL */
+	FPE_FLTINV,	/*  F - INV | DNML | DZ | OFL */
+	FPE_FLTUND,	/* 10 - UFL */
+	FPE_FLTINV,	/* 11 - INV | UFL */
+	FPE_FLTUND,	/* 12 - DNML | UFL */
+	FPE_FLTINV,	/* 13 - INV | DNML | UFL */
+	FPE_FLTDIV,	/* 14 - DZ | UFL */
+	FPE_FLTINV,	/* 15 - INV | DZ | UFL */
+	FPE_FLTDIV,	/* 16 - DNML | DZ | UFL */
+	FPE_FLTINV,	/* 17 - INV | DNML | DZ | UFL */
+	FPE_FLTOVF,	/* 18 - OFL | UFL */
+	FPE_FLTINV,	/* 19 - INV | OFL | UFL */
+	FPE_FLTUND,	/* 1A - DNML | OFL | UFL */
+	FPE_FLTINV,	/* 1B - INV | DNML | OFL | UFL */
+	FPE_FLTDIV,	/* 1C - DZ | OFL | UFL */
+	FPE_FLTINV,	/* 1D - INV | DZ | OFL | UFL */
+	FPE_FLTDIV,	/* 1E - DNML | DZ | OFL | UFL */
+	FPE_FLTINV,	/* 1F - INV | DNML | DZ | OFL | UFL */
+	FPE_FLTRES,	/* 20 - IMP */
+	FPE_FLTINV,	/* 21 - INV | IMP */
+	FPE_FLTUND,	/* 22 - DNML | IMP */
+	FPE_FLTINV,	/* 23 - INV | DNML | IMP */
+	FPE_FLTDIV,	/* 24 - DZ | IMP */
+	FPE_FLTINV,	/* 25 - INV | DZ | IMP */
+	FPE_FLTDIV,	/* 26 - DNML | DZ | IMP */
+	FPE_FLTINV,	/* 27 - INV | DNML | DZ | IMP */
+	FPE_FLTOVF,	/* 28 - OFL | IMP */
+	FPE_FLTINV,	/* 29 - INV | OFL | IMP */
+	FPE_FLTUND,	/* 2A - DNML | OFL | IMP */
+	FPE_FLTINV,	/* 2B - INV | DNML | OFL | IMP */
+	FPE_FLTDIV,	/* 2C - DZ | OFL | IMP */
+	FPE_FLTINV,	/* 2D - INV | DZ | OFL | IMP */
+	FPE_FLTDIV,	/* 2E - DNML | DZ | OFL | IMP */
+	FPE_FLTINV,	/* 2F - INV | DNML | DZ | OFL | IMP */
+	FPE_FLTUND,	/* 30 - UFL | IMP */
+	FPE_FLTINV,	/* 31 - INV | UFL | IMP */
+	FPE_FLTUND,	/* 32 - DNML | UFL | IMP */
+	FPE_FLTINV,	/* 33 - INV | DNML | UFL | IMP */
+	FPE_FLTDIV,	/* 34 - DZ | UFL | IMP */
+	FPE_FLTINV,	/* 35 - INV | DZ | UFL | IMP */
+	FPE_FLTDIV,	/* 36 - DNML | DZ | UFL | IMP */
+	FPE_FLTINV,	/* 37 - INV | DNML | DZ | UFL | IMP */
+	FPE_FLTOVF,	/* 38 - OFL | UFL | IMP */
+	FPE_FLTINV,	/* 39 - INV | OFL | UFL | IMP */
+	FPE_FLTUND,	/* 3A - DNML | OFL | UFL | IMP */
+	FPE_FLTINV,	/* 3B - INV | DNML | OFL | UFL | IMP */
+	FPE_FLTDIV,	/* 3C - DZ | OFL | UFL | IMP */
+	FPE_FLTINV,	/* 3D - INV | DZ | OFL | UFL | IMP */
+	FPE_FLTDIV,	/* 3E - DNML | DZ | OFL | UFL | IMP */
+	FPE_FLTINV,	/* 3F - INV | DNML | DZ | OFL | UFL | IMP */
+	FPE_FLTSUB,	/* 40 - STK */
+	FPE_FLTSUB,	/* 41 - INV | STK */
+	FPE_FLTUND,	/* 42 - DNML | STK */
+	FPE_FLTSUB,	/* 43 - INV | DNML | STK */
+	FPE_FLTDIV,	/* 44 - DZ | STK */
+	FPE_FLTSUB,	/* 45 - INV | DZ | STK */
+	FPE_FLTDIV,	/* 46 - DNML | DZ | STK */
+	FPE_FLTSUB,	/* 47 - INV | DNML | DZ | STK */
+	FPE_FLTOVF,	/* 48 - OFL | STK */
+	FPE_FLTSUB,	/* 49 - INV | OFL | STK */
+	FPE_FLTUND,	/* 4A - DNML | OFL | STK */
+	FPE_FLTSUB,	/* 4B - INV | DNML | OFL | STK */
+	FPE_FLTDIV,	/* 4C - DZ | OFL | STK */
+	FPE_FLTSUB,	/* 4D - INV | DZ | OFL | STK */
+	FPE_FLTDIV,	/* 4E - DNML | DZ | OFL | STK */
+	FPE_FLTSUB,	/* 4F - INV | DNML | DZ | OFL | STK */
+	FPE_FLTUND,	/* 50 - UFL | STK */
+	FPE_FLTSUB,	/* 51 - INV | UFL | STK */
+	FPE_FLTUND,	/* 52 - DNML | UFL | STK */
+	FPE_FLTSUB,	/* 53 - INV | DNML | UFL | STK */
+	FPE_FLTDIV,	/* 54 - DZ | UFL | STK */
+	FPE_FLTSUB,	/* 55 - INV | DZ | UFL | STK */
+	FPE_FLTDIV,	/* 56 - DNML | DZ | UFL | STK */
+	FPE_FLTSUB,	/* 57 - INV | DNML | DZ | UFL | STK */
+	FPE_FLTOVF,	/* 58 - OFL | UFL | STK */
+	FPE_FLTSUB,	/* 59 - INV | OFL | UFL | STK */
+	FPE_FLTUND,	/* 5A - DNML | OFL | UFL | STK */
+	FPE_FLTSUB,	/* 5B - INV | DNML | OFL | UFL | STK */
+	FPE_FLTDIV,	/* 5C - DZ | OFL | UFL | STK */
+	FPE_FLTSUB,	/* 5D - INV | DZ | OFL | UFL | STK */
+	FPE_FLTDIV,	/* 5E - DNML | DZ | OFL | UFL | STK */
+	FPE_FLTSUB,	/* 5F - INV | DNML | DZ | OFL | UFL | STK */
+	FPE_FLTRES,	/* 60 - IMP | STK */
+	FPE_FLTSUB,	/* 61 - INV | IMP | STK */
+	FPE_FLTUND,	/* 62 - DNML | IMP | STK */
+	FPE_FLTSUB,	/* 63 - INV | DNML | IMP | STK */
+	FPE_FLTDIV,	/* 64 - DZ | IMP | STK */
+	FPE_FLTSUB,	/* 65 - INV | DZ | IMP | STK */
+	FPE_FLTDIV,	/* 66 - DNML | DZ | IMP | STK */
+	FPE_FLTSUB,	/* 67 - INV | DNML | DZ | IMP | STK */
+	FPE_FLTOVF,	/* 68 - OFL | IMP | STK */
+	FPE_FLTSUB,	/* 69 - INV | OFL | IMP | STK */
+	FPE_FLTUND,	/* 6A - DNML | OFL | IMP | STK */
+	FPE_FLTSUB,	/* 6B - INV | DNML | OFL | IMP | STK */
+	FPE_FLTDIV,	/* 6C - DZ | OFL | IMP | STK */
+	FPE_FLTSUB,	/* 6D - INV | DZ | OFL | IMP | STK */
+	FPE_FLTDIV,	/* 6E - DNML | DZ | OFL | IMP | STK */
+	FPE_FLTSUB,	/* 6F - INV | DNML | DZ | OFL | IMP | STK */
+	FPE_FLTUND,	/* 70 - UFL | IMP | STK */
+	FPE_FLTSUB,	/* 71 - INV | UFL | IMP | STK */
+	FPE_FLTUND,	/* 72 - DNML | UFL | IMP | STK */
+	FPE_FLTSUB,	/* 73 - INV | DNML | UFL | IMP | STK */
+	FPE_FLTDIV,	/* 74 - DZ | UFL | IMP | STK */
+	FPE_FLTSUB,	/* 75 - INV | DZ | UFL | IMP | STK */
+	FPE_FLTDIV,	/* 76 - DNML | DZ | UFL | IMP | STK */
+	FPE_FLTSUB,	/* 77 - INV | DNML | DZ | UFL | IMP | STK */
+	FPE_FLTOVF,	/* 78 - OFL | UFL | IMP | STK */
+	FPE_FLTSUB,	/* 79 - INV | OFL | UFL | IMP | STK */
+	FPE_FLTUND,	/* 7A - DNML | OFL | UFL | IMP | STK */
+	FPE_FLTSUB,	/* 7B - INV | DNML | OFL | UFL | IMP | STK */
+	FPE_FLTDIV,	/* 7C - DZ | OFL | UFL | IMP | STK */
+	FPE_FLTSUB,	/* 7D - INV | DZ | OFL | UFL | IMP | STK */
+	FPE_FLTDIV,	/* 7E - DNML | DZ | OFL | UFL | IMP | STK */
+	FPE_FLTSUB,	/* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */
+};
+
+/*
+ * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE.
+ *
+ * Clearing exceptions is necessary mainly to avoid IRQ13 bugs.  We now
+ * depend on longjmp() restoring a usable state.  Restoring the state
+ * or examining it might fail if we didn't clear exceptions.
+ *
+ * The error code chosen will be one of the FPE_... macros. It will be
+ * sent as the second argument to old BSD-style signal handlers and as
+ * "siginfo_t->si_code" (second argument) to SA_SIGINFO signal handlers.
+ *
+ * XXX the FP state is not preserved across signal handlers.  So signal
+ * handlers cannot afford to do FP unless they preserve the state or
+ * longjmp() out.  Both preserving the state and longjmp()ing may be
+ * destroyed by IRQ13 bugs.  Clearing FP exceptions is not an acceptable
+ * solution for signals other than SIGFPE.
+ */
+int
+npxtrap()
+{
+	register_t savecrit;
+	u_short control, status;
+
+	if (!npx_exists) {
+		printf("npxtrap: fpcurthread = %p, curthread = %p, npx_exists = %d\n",
+		       PCPU_GET(fpcurthread), curthread, npx_exists);
+		panic("npxtrap from nowhere");
+	}
+	savecrit = intr_disable();
+
+	/*
+	 * Interrupt handling (for another interrupt) may have pushed the
+	 * state to memory.  Fetch the relevant parts of the state from
+	 * wherever they are.
+	 */
+	if (PCPU_GET(fpcurthread) != curthread) {
+		control = GET_FPU_CW(curthread);
+		status = GET_FPU_SW(curthread);
+	} else {
+		fnstcw(&control);
+		fnstsw(&status);
+	}
+
+	if (PCPU_GET(fpcurthread) == curthread)
+		fnclex();
+	intr_restore(savecrit);
+	return (fpetable[status & ((~control & 0x3f) | 0x40)]);
+}
+
+/*
+ * Implement device not available (DNA) exception
+ *
+ * It would be better to switch FP context here (if curthread != fpcurthread)
+ * and not necessarily for every context switch, but it is too hard to
+ * access foreign pcb's.
+ */
+
+static int err_count = 0;
+
+int
+npxdna()
+{
+	struct pcb *pcb;
+	register_t s;
+	u_short control;
+
+	if (!npx_exists)
+		return (0);
+	if (PCPU_GET(fpcurthread) == curthread) {
+		printf("npxdna: fpcurthread == curthread %d times\n",
+		    ++err_count);
+		stop_emulating();
+		return (1);
+	}
+	if (PCPU_GET(fpcurthread) != NULL) {
+		printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n",
+		       PCPU_GET(fpcurthread),
+		       PCPU_GET(fpcurthread)->td_proc->p_pid,
+		       curthread, curthread->td_proc->p_pid);
+		panic("npxdna");
+	}
+	s = intr_disable();
+	stop_emulating();
+	/*
+	 * Record new context early in case frstor causes an IRQ13.
+	 */
+	PCPU_SET(fpcurthread, curthread);
+	pcb = PCPU_GET(curpcb);
+
+	if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) {
+		/*
+		 * This is the first time this thread has used the FPU or
+		 * the PCB doesn't contain a clean FPU state.  Explicitly
+		 * initialize the FPU and load the default control word.
+		 */
+		fninit();
+		control = __INITIAL_NPXCW__;
+		fldcw(&control);
+		pcb->pcb_flags |= PCB_NPXINITDONE;
+	} else {
+		/*
+		 * The following frstor may cause an IRQ13 when the state
+		 * being restored has a pending error.  The error will
+		 * appear to have been triggered by the current (npx) user
+		 * instruction even when that instruction is a no-wait
+		 * instruction that should not trigger an error (e.g.,
+		 * fnclex).  On at least one 486 system all of the no-wait
+		 * instructions are broken the same as frstor, so our
+		 * treatment does not amplify the breakage.  On at least
+		 * one 386/Cyrix 387 system, fnclex works correctly while
+		 * frstor and fnsave are broken, so our treatment breaks
+		 * fnclex if it is the first FPU instruction after a context
+		 * switch.
+		 */
+		fpurstor(&pcb->pcb_save);
+	}
+	intr_restore(s);
+
+	return (1);
+}
+
+/*
+ * Wrapper for fnsave instruction, partly to handle hardware bugs.  When npx
+ * exceptions are reported via IRQ13, spurious IRQ13's may be triggered by
+ * no-wait npx instructions.  See the Intel application note AP-578 for
+ * details.  This doesn't cause any additional complications here.  IRQ13's
+ * are inherently asynchronous unless the CPU is frozen to deliver them --
+ * one that started in userland may be delivered many instructions later,
+ * after the process has entered the kernel.  It may even be delivered after
+ * the fnsave here completes.  A spurious IRQ13 for the fnsave is handled in
+ * the same way as a very-late-arriving non-spurious IRQ13 from user mode:
+ * it is normally ignored at first because we set fpcurthread to NULL; it is
+ * normally retriggered in npxdna() after return to user mode.
+ *
+ * npxsave() must be called with interrupts disabled, so that it clears
+ * fpcurthread atomically with saving the state.  We require callers to do the
+ * disabling, since most callers need to disable interrupts anyway to call
+ * npxsave() atomically with checking fpcurthread.
+ *
+ * A previous version of npxsave() went to great lengths to excecute fnsave
+ * with interrupts enabled in case executing it froze the CPU.  This case
+ * can't happen, at least for Intel CPU/NPX's.  Spurious IRQ13's don't imply
+ * spurious freezes.
+ */
+void
+npxsave(addr)
+	union savefpu *addr;
+{
+
+	stop_emulating();
+	fpusave(addr);
+
+	start_emulating();
+	PCPU_SET(fpcurthread, NULL);
+	queue_multicall0(__HYPERVISOR_fpu_taskswitch);
+}
+
+/*
+ * This should be called with interrupts disabled and only when the owning
+ * FPU thread is non-null.
+ */
+void
+npxdrop()
+{
+	struct thread *td;
+
+	td = PCPU_GET(fpcurthread);
+	PCPU_SET(fpcurthread, NULL);
+	td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
+	start_emulating();
+}
+
+/*
+ * Get the state of the FPU without dropping ownership (if possible).
+ * It returns the FPU ownership status.
+ */
+int
+npxgetregs(td, addr)
+	struct thread *td;
+	union savefpu *addr;
+{
+	register_t s;
+
+	if (!npx_exists)
+		return (_MC_FPOWNED_NONE);
+
+	if ((td->td_pcb->pcb_flags & PCB_NPXINITDONE) == 0) {
+		if (npx_cleanstate_ready)
+			bcopy(&npx_cleanstate, addr, sizeof(npx_cleanstate));
+		else
+			bzero(addr, sizeof(*addr));
+		return (_MC_FPOWNED_NONE);
+	}
+	s = intr_disable();
+	if (td == PCPU_GET(fpcurthread)) {
+		fpusave(addr);
+#ifdef CPU_ENABLE_SSE
+		if (!cpu_fxsr)
+#endif
+			/*
+			 * fnsave initializes the FPU and destroys whatever
+			 * context it contains.  Make sure the FPU owner
+			 * starts with a clean state next time.
+			 */
+			npxdrop();
+		intr_restore(s);
+		return (_MC_FPOWNED_FPU);
+	} else {
+		intr_restore(s);
+		bcopy(&td->td_pcb->pcb_save, addr, sizeof(*addr));
+		return (_MC_FPOWNED_PCB);
+	}
+}
+
+/*
+ * Set the state of the FPU.
+ */
+void
+npxsetregs(td, addr)
+	struct thread *td;
+	union savefpu *addr;
+{
+	register_t s;
+
+	if (!npx_exists)
+		return;
+
+	s = intr_disable();
+	if (td == PCPU_GET(fpcurthread)) {
+		fpurstor(addr);
+		intr_restore(s);
+	} else {
+		intr_restore(s);
+		bcopy(addr, &td->td_pcb->pcb_save, sizeof(*addr));
+	}
+	curthread->td_pcb->pcb_flags |= PCB_NPXINITDONE;
+}
+
+static void
+fpusave(addr)
+	union savefpu *addr;
+{
+	
+#ifdef CPU_ENABLE_SSE
+	if (cpu_fxsr)
+		fxsave(addr);
+	else
+#endif
+		fnsave(addr);
+}
+
+static void
+fpurstor(addr)
+	union savefpu *addr;
+{
+
+#ifdef CPU_ENABLE_SSE
+	if (cpu_fxsr)
+		fxrstor(addr);
+	else
+#endif
+		frstor(addr);
+}
+
+#ifdef I586_CPU_XXX
+static long
+timezero(funcname, func)
+	const char *funcname;
+	void (*func)(void *buf, size_t len);
+
+{
+	void *buf;
+#define	BUFSIZE		1048576
+	long usec;
+	struct timeval finish, start;
+
+	buf = malloc(BUFSIZE, M_TEMP, M_NOWAIT);
+	if (buf == NULL)
+		return (BUFSIZE);
+	microtime(&start);
+	(*func)(buf, BUFSIZE);
+	microtime(&finish);
+	usec = 1000000 * (finish.tv_sec - start.tv_sec) +
+	    finish.tv_usec - start.tv_usec;
+	if (usec <= 0)
+		usec = 1;
+	if (bootverbose)
+		printf("%s bandwidth = %u kBps\n", funcname,
+		    (u_int32_t)(((BUFSIZE >> 10) * 1000000) / usec));
+	free(buf, M_TEMP);
+	return (usec);
+}
+#endif /* I586_CPU */
+
+static device_method_t npx_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_identify,	npx_identify),
+	DEVMETHOD(device_probe,		npx_probe),
+	DEVMETHOD(device_attach,	npx_attach),
+	DEVMETHOD(device_detach,	bus_generic_detach),
+	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
+	DEVMETHOD(device_suspend,	bus_generic_suspend),
+	DEVMETHOD(device_resume,	bus_generic_resume),
+	
+	{ 0, 0 }
+};
+
+static driver_t npx_driver = {
+	"npx",
+	npx_methods,
+	1,			/* no softc */
+};
+
+static devclass_t npx_devclass;
+DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0);
+
+#ifdef DEV_ISA
+/*
+ * We prefer to attach to the root nexus so that the usual case (exception 16)
+ * doesn't describe the processor as being `on isa'.
+ */
+DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0);
+
+/*
+ * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI.
+ */
+static struct isa_pnp_id npxisa_ids[] = {
+	{ 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */
+	{ 0 }
+};
+
+static int
+npxisa_probe(device_t dev)
+{
+	int result;
+	if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) {
+		device_quiet(dev);
+	}
+	return(result);
+}
+
+static int
+npxisa_attach(device_t dev)
+{
+	return (0);
+}
+
+static device_method_t npxisa_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		npxisa_probe),
+	DEVMETHOD(device_attach,	npxisa_attach),
+	DEVMETHOD(device_detach,	bus_generic_detach),
+	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
+	DEVMETHOD(device_suspend,	bus_generic_suspend),
+	DEVMETHOD(device_resume,	bus_generic_resume),
+	
+	{ 0, 0 }
+};
+
+static driver_t npxisa_driver = {
+	"npxisa",
+	npxisa_methods,
+	1,			/* no softc */
+};
+
+static devclass_t npxisa_devclass;
+
+DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0);
+#ifndef PC98
+DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0);
+#endif
+#endif /* DEV_ISA */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c b/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c
new file mode 100644
index 0000000000..e25f218eb3
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c
@@ -0,0 +1,1436 @@
+/*
+ *
+ * Copyright (c) 2004 Kip Macy
+ * All rights reserved.
+ *
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "opt_nfsroot.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sockio.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <net/ethernet.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+
+#include <net/bpf.h>
+
+#include <net/if_types.h>
+#include <net/if_vlan_var.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/clock.h>      /* for DELAY */
+#include <machine/bus_memio.h>
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <machine/frame.h>
+
+
+#include <sys/bus.h>
+#include <sys/rman.h>
+
+#include <machine/intr_machdep.h>
+
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/hypervisor-ifs.h>
+#include <machine/xen_intr.h>
+#include <machine/evtchn.h>
+#include <machine/ctrl_if.h>
+
+struct xn_softc;
+static void xn_txeof(struct xn_softc *);
+static void xn_rxeof(struct xn_softc *);
+static void xn_alloc_rx_buffers(struct xn_softc *);
+
+static void xn_tick_locked(struct xn_softc *);
+static void xn_tick(void *);
+
+static void xn_intr(void *);
+static void xn_start_locked(struct ifnet *);
+static void xn_start(struct ifnet *);
+static int  xn_ioctl(struct ifnet *, u_long, caddr_t);
+static void xn_ifinit_locked(struct xn_softc *);
+static void xn_ifinit(void *);
+static void xn_stop(struct xn_softc *);
+#ifdef notyet
+static void xn_watchdog(struct ifnet *);
+#endif
+/* Xenolinux helper functions */
+static void network_connect(struct xn_softc *, netif_fe_interface_status_t *);
+static void create_netdev(int handle, struct xn_softc **);
+static void netif_ctrlif_rx(ctrl_msg_t *,unsigned long);
+
+static void xn_free_rx_ring(struct xn_softc *);
+
+static void xn_free_tx_ring(struct xn_softc *);
+
+
+
+/* XXX: This isn't supported in FreeBSD, so ignore it for now. */
+#define TASK_UNINTERRUPTIBLE	0
+#define INVALID_P2M_ENTRY (~0UL)
+
+/*
+ * If the backend driver is pipelining transmit requests then we can be very
+ * aggressive in avoiding new-packet notifications -- only need to send a
+ * notification if there are no outstanding unreceived responses.
+ * If the backend may be buffering our transmit buffers for any reason then we
+ * are rather more conservative.
+ */
+#ifdef CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER
+#define TX_TEST_IDX resp_prod /* aggressive: any outstanding responses? */
+#else
+#define TX_TEST_IDX req_cons  /* conservative: not seen all our requests? */
+#endif
+
+/*
+ * Mbuf pointers. We need these to keep track of the virtual addresses
+ * of our mbuf chains since we can only convert from virtual to physical,
+ * not the other way around.  The size must track the free index arrays.
+ */
+struct xn_chain_data {
+	struct mbuf		*xn_tx_chain[NETIF_TX_RING_SIZE+1];
+        struct mbuf		*xn_rx_chain[NETIF_RX_RING_SIZE+1];
+};
+
+struct xn_softc {
+	struct arpcom		arpcom;		/* interface info */
+	device_t		xn_dev;
+	SLIST_ENTRY(xn_softc)	xn_links;
+        struct mtx              xn_mtx;
+	void			*xn_intrhand;
+	struct resource		*xn_res;
+	u_int8_t		xn_ifno;	/* interface number */
+	struct xn_chain_data	xn_cdata;	/* mbufs */
+
+        netif_tx_interface_t    *xn_tx_if;
+        netif_rx_interface_t    *xn_rx_if;
+
+	int			xn_if_flags;
+	int			xn_txcnt;
+	int			xn_rxbufcnt;
+	struct callout	        xn_stat_ch;
+	unsigned int		xn_irq;
+        unsigned int            xn_evtchn;  
+
+
+    /* What is the status of our connection to the remote backend? */
+#define BEST_CLOSED       0
+#define BEST_DISCONNECTED 1
+#define BEST_CONNECTED    2
+    	unsigned int 		xn_backend_state;
+
+    /* Is this interface open or closed (down or up)? */
+#define UST_CLOSED        0
+#define UST_OPEN          1
+    	unsigned int 		xn_user_state;
+    
+    /* Receive-ring batched refills. */
+#define RX_MIN_TARGET 64	/* XXX: larger than linux.  was causing packet
+				 * loss at the default of 8.
+				 */
+#define RX_MAX_TARGET NETIF_RX_RING_SIZE
+	int 			xn_rx_target;	/* number to allocate */
+	struct mbuf		*xn_rx_batch;	/* head of the batch queue */
+	struct mbuf		*xn_rx_batchtail;
+	int			xn_rx_batchlen;	/* how many queued */
+
+        int                     xn_rx_resp_cons;
+        int                     xn_tx_resp_cons;
+        unsigned short          xn_rx_free_idxs[NETIF_RX_RING_SIZE+1];
+        unsigned short          xn_tx_free_idxs[NETIF_RX_RING_SIZE+1];
+};
+
+static unsigned long           	xn_rx_pfns[NETIF_RX_RING_SIZE];
+static multicall_entry_t       	xn_rx_mcl[NETIF_RX_RING_SIZE+1];
+static mmu_update_t		xn_rx_mmu[NETIF_RX_RING_SIZE];
+
+static SLIST_HEAD(, xn_softc) xn_dev_list =
+       SLIST_HEAD_INITIALIZER(xn_dev_list);
+
+#define XN_LOCK_INIT(_sc, _name) \
+        mtx_init(&(_sc)->xn_mtx, _name, MTX_NETWORK_LOCK, MTX_DEF)
+#define XN_LOCK(_sc)           mtx_lock(&(_sc)->xn_mtx)
+#define XN_LOCK_ASSERT(_sc)    mtx_assert(&(_sc)->xn_mtx, MA_OWNED)
+#define XN_UNLOCK(_sc)         mtx_unlock(&(_sc)->xn_mtx)
+#define XN_LOCK_DESTROY(_sc)   mtx_destroy(&(_sc)->xn_mtx)
+
+/* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */
+#define ADD_ID_TO_FREELIST(_list, _id)             \
+    (_list)[(_id)] = (_list)[0];                   \
+    (_list)[0]     = (_id);
+#define GET_ID_FROM_FREELIST(_list)                \
+ ({ unsigned short _id = (_list)[0]; \
+    (_list)[0]  = (_list)[_id];                    \
+    (unsigned short)_id; })
+#define FREELIST_EMPTY(_list, _maxid) 		   \
+    ((_list)[0] == (_maxid+1))
+
+static char *status_name[] = {
+    [NETIF_INTERFACE_STATUS_CLOSED]       = "closed",
+    [NETIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
+    [NETIF_INTERFACE_STATUS_CONNECTED]    = "connected",
+    [NETIF_INTERFACE_STATUS_CHANGED]      = "changed",
+};
+
+static char *be_state_name[] = {
+    [BEST_CLOSED]       = "closed",
+    [BEST_DISCONNECTED] = "disconnected",
+    [BEST_CONNECTED]    = "connected",
+};
+
+#define IPRINTK(fmt, args...) \
+    printk("[XEN] " fmt, ##args)
+#define WPRINTK(fmt, args...) \
+    printk("[XEN] " fmt, ##args)
+
+static struct xn_softc *
+find_sc_by_handle(unsigned int handle)
+{
+    struct xn_softc *sc;
+    SLIST_FOREACH(sc, &xn_dev_list, xn_links)
+    {
+        if ( sc->xn_ifno == handle )
+            return sc;
+    }
+    return NULL;
+}
+
+/** Network interface info. */
+struct netif_ctrl {
+    /** Number of interfaces. */
+    int interface_n;
+    /** Number of connected interfaces. */
+    int connected_n;
+    /** Error code. */
+    int err;
+    int up;
+};
+
+static struct netif_ctrl netctrl;
+
+static void 
+netctrl_init(void)
+{
+    /* 
+     * netctrl is already in bss, why are we setting it?
+     */
+    memset(&netctrl, 0, sizeof(netctrl)); 
+    netctrl.up = NETIF_DRIVER_STATUS_DOWN;
+}
+
+/** Get or set a network interface error.
+ */
+static int 
+netctrl_err(int err)
+{
+    if ( (err < 0) && !netctrl.err )
+        netctrl.err = err;
+    return netctrl.err;
+}
+
+/** Test if all network interfaces are connected.
+ *
+ * @return 1 if all connected, 0 if not, negative error code otherwise
+ */
+static int 
+netctrl_connected(void)
+{
+    int ok;
+
+    if (netctrl.err)
+	ok = netctrl.err;
+    else if (netctrl.up == NETIF_DRIVER_STATUS_UP)
+	ok = (netctrl.connected_n == netctrl.interface_n);
+    else
+	ok = 0;
+
+    return ok;
+}
+
+/** Count the connected network interfaces.
+ *
+ * @return connected count
+ */
+static int 
+netctrl_connected_count(void)
+{
+    
+    struct xn_softc *sc;
+    unsigned int connected;
+
+    connected = 0;
+    
+    SLIST_FOREACH(sc, &xn_dev_list, xn_links)
+    {
+        if ( sc->xn_backend_state == BEST_CONNECTED )
+            connected++;
+    }
+
+    netctrl.connected_n = connected;
+    XENPRINTF("> connected_n=%d interface_n=%d\n",
+              netctrl.connected_n, netctrl.interface_n);
+    return connected;
+}
+
+static __inline struct mbuf* 
+makembuf (struct mbuf *buf)
+{
+	struct mbuf *m = NULL;
+
+        MGETHDR (m, M_DONTWAIT, MT_DATA);
+
+        if (! m)
+               return 0;
+
+	M_MOVE_PKTHDR(m, buf);
+
+        MCLGET (m, M_DONTWAIT);
+
+        m->m_pkthdr.len = buf->m_pkthdr.len;
+        m->m_len = buf->m_len;
+	m_copydata(buf, 0, buf->m_pkthdr.len, mtod(m,caddr_t) );
+	m->m_ext.ext_args = (vm_paddr_t *)vtophys(mtod(m,caddr_t));
+
+       	return m;
+}
+
+
+
+static void
+xn_free_rx_ring(struct xn_softc *sc)
+{
+#if 0
+    int i;
+    
+    for (i = 0; i < NETIF_RX_RING_SIZE; i++) {
+	if (sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(i)] != NULL) {
+	    m_freem(sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(i)]);
+	    sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(i)] = NULL;
+	}
+    }
+    
+    sc->xn_rx_resp_cons = 0;
+    sc->xn_rx_if->req_prod = 0;
+    sc->xn_rx_if->event = sc->xn_rx_resp_cons ;
+#endif
+}
+
+static void
+xn_free_tx_ring(struct xn_softc *sc)
+{
+#if 0
+    int i;
+    
+    for (i = 0; i < NETIF_TX_RING_SIZE; i++) {
+	if (sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(i)] != NULL) {
+	    m_freem(sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(i)]);
+	    sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(i)] = NULL;
+	}
+    }
+    
+    return;
+#endif
+}
+
+static void
+xn_alloc_rx_buffers(struct xn_softc *sc)
+{
+    unsigned short id;
+    struct mbuf *m_new, *next;
+    int i, batch_target;
+    NETIF_RING_IDX req_prod = sc->xn_rx_if->req_prod;
+
+    if (unlikely(sc->xn_backend_state != BEST_CONNECTED) )
+	    return;
+
+    /*
+     * Allocate skbuffs greedily, even though we batch updates to the
+     * receive ring. This creates a less bursty demand on the memory allocator,
+     * so should reduce the chance of failed allocation requests both for
+     * ourself and for other kernel subsystems.
+     */
+    batch_target = sc->xn_rx_target - (req_prod - sc->xn_rx_resp_cons);
+    for ( i = sc->xn_rx_batchlen; i < batch_target; i++, sc->xn_rx_batchlen++) {
+	MGETHDR(m_new, M_DONTWAIT, MT_DATA);
+	if (m_new == NULL) 
+	    break;
+	
+	MCLGET(m_new, M_DONTWAIT);
+	if (!(m_new->m_flags & M_EXT)) {
+	    m_freem(m_new);
+	    break;
+	}
+	m_new->m_len = m_new->m_pkthdr.len = MCLBYTES;
+
+	/* queue the mbufs allocated */
+	if (!sc->xn_rx_batch)
+	    	sc->xn_rx_batch = m_new;
+
+	if (sc->xn_rx_batchtail)
+	    sc->xn_rx_batchtail->m_next = m_new;
+	sc->xn_rx_batchtail = m_new;
+    }
+
+    /* Is the batch large enough to be worthwhile? */
+    if ( i < (sc->xn_rx_target/2)  )
+        return;
+
+    for (i = 0, m_new = sc->xn_rx_batch; m_new; 
+	 i++, sc->xn_rx_batchlen--, m_new = next) {
+
+	next = m_new->m_next;
+	m_new->m_next = NULL;
+
+	m_new->m_ext.ext_args = (vm_paddr_t *)vtophys(m_new->m_ext.ext_buf);
+
+	id = GET_ID_FROM_FREELIST(sc->xn_rx_free_idxs);
+	KASSERT(id != 0, ("alloc_rx_buffers: found free receive index of 0\n"));
+	sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(id)] = m_new;
+
+	sc->xn_rx_if->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
+
+	xn_rx_pfns[i] = vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT;
+
+	/* Remove this page from pseudo phys map before passing back to Xen. */
+    	xen_phys_machine[((unsigned long)m_new->m_ext.ext_args >> PAGE_SHIFT)] 
+		= INVALID_P2M_ENTRY;
+	    	
+	xn_rx_mcl[i].op = __HYPERVISOR_update_va_mapping;
+	xn_rx_mcl[i].args[0] = (unsigned long)mtod(m_new,vm_offset_t) 
+						>> PAGE_SHIFT;
+	xn_rx_mcl[i].args[1] = 0;
+	xn_rx_mcl[i].args[2] = 0;
+
+    } 
+
+    KASSERT(i, ("no mbufs processed"));	/* should have returned earlier */
+    KASSERT(sc->xn_rx_batchlen == 0, ("not all mbufs processed"));
+    sc->xn_rx_batch = sc->xn_rx_batchtail = NULL;
+    
+    /*
+     * We may have allocated buffers which have entries outstanding
+     in the page * update queue -- make sure we flush those first!  */
+    PT_UPDATES_FLUSH();
+
+    /* After all PTEs have been zapped we blow away stale TLB entries. */
+    xn_rx_mcl[i-1].args[2] = UVMF_FLUSH_TLB;
+
+    /* Give away a batch of pages. */
+    xn_rx_mcl[i].op = __HYPERVISOR_dom_mem_op;
+    xn_rx_mcl[i].args[0] = (unsigned long) MEMOP_decrease_reservation;
+    xn_rx_mcl[i].args[1] = (unsigned long)xn_rx_pfns;
+    xn_rx_mcl[i].args[2] = (unsigned long)i;
+    xn_rx_mcl[i].args[3] = 0;
+    xn_rx_mcl[i].args[4] = DOMID_SELF;
+
+    /* Zap PTEs and give away pages in one big multicall. */
+    (void)HYPERVISOR_multicall(xn_rx_mcl, i+1);
+
+    /* Check return status of HYPERVISOR_dom_mem_op(). */
+    if ( xn_rx_mcl[i].args[5] != i )
+        panic("Unable to reduce memory reservation\n");
+
+    /* Above is a suitable barrier to ensure backend will see requests. */
+    sc->xn_rx_if->req_prod = req_prod + i;
+
+    /* Adjust our floating fill target if we risked running out of buffers. */
+    if ( ((req_prod - sc->xn_rx_if->resp_prod) < (sc->xn_rx_target / 4)) &&
+         ((sc->xn_rx_target *= 2) > RX_MAX_TARGET) )
+        sc->xn_rx_target = RX_MAX_TARGET;
+}
+
+static void
+xn_rxeof(struct xn_softc *sc)
+{
+    struct ifnet *ifp;
+    netif_rx_response_t  *rx;
+    NETIF_RING_IDX i, rp;
+    mmu_update_t *mmu = xn_rx_mmu;
+    multicall_entry_t *mcl = xn_rx_mcl;
+    struct mbuf *tail_mbuf = NULL, *head_mbuf = NULL, *m, *next;
+    
+    XN_LOCK_ASSERT(sc);
+    if (sc->xn_backend_state != BEST_CONNECTED)
+	return;
+
+    ifp = &sc->arpcom.ac_if;
+
+    rp = sc->xn_rx_if->resp_prod;
+    rmb();	/* Ensure we see queued responses up to 'rp'. */
+
+    for (i = sc->xn_rx_resp_cons; i != rp; i++) {
+
+	rx = &sc->xn_rx_if->ring[MASK_NETIF_RX_IDX(i)].resp;
+	KASSERT(rx->id != 0, ("xn_rxeof: found free receive index of 0\n"));
+
+        /*
+         * An error here is very odd. Usually indicates a backend bug,
+         * low-memory condition, or that we didn't have reservation headroom.
+         * Whatever - print an error and queue the id again straight away.
+         */
+        if (unlikely(rx->status <= 0)) {
+	    printk("bad buffer on RX ring!(%d)\n", rx->status);
+	    sc->xn_rx_if->ring[MASK_NETIF_RX_IDX(sc->xn_rx_if->req_prod)].req.id
+			= rx->id;
+	    wmb();
+	    sc->xn_rx_if->req_prod++;
+            continue;
+        }
+
+	m = (struct mbuf *)
+	    	sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(rx->id)];
+	if (m->m_next)
+	    panic("mbuf is already part of a valid mbuf chain");
+	ADD_ID_TO_FREELIST(sc->xn_rx_free_idxs, rx->id);
+
+	m->m_data += (rx->addr & PAGE_MASK);
+	m->m_pkthdr.len = m->m_len = rx->status;
+	m->m_pkthdr.rcvif = ifp;
+
+	/* Remap the page. */
+	mmu->ptr = (rx->addr & ~PAGE_MASK) | MMU_MACHPHYS_UPDATE;
+	mmu->val = (unsigned long)m->m_ext.ext_args >> PAGE_SHIFT;
+	mmu++;
+	mcl->op = __HYPERVISOR_update_va_mapping;
+	mcl->args[0] = (unsigned long)m->m_data >> PAGE_SHIFT;
+	mcl->args[1] = (rx->addr & ~PAGE_MASK) | PG_KERNEL;
+	mcl->args[2] = 0;
+	mcl++;
+
+    	xen_phys_machine[((unsigned long)m->m_ext.ext_args >> PAGE_SHIFT)] = 
+	    	(rx->addr >> PAGE_SHIFT);
+
+	if (unlikely(!head_mbuf))
+	    head_mbuf = m;
+
+	if (tail_mbuf)
+	    tail_mbuf->m_next = m;
+	tail_mbuf = m;
+
+	sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(rx->id)] = NULL;
+	sc->xn_rxbufcnt++;
+    }
+
+    /* Do all the remapping work, and M->P updates,  in one big hypercall. */
+    if (likely((mcl - xn_rx_mcl) != 0)) {
+	mcl->op = __HYPERVISOR_mmu_update;
+	mcl->args[0] = (unsigned long)xn_rx_mmu;
+	mcl->args[1] = mmu - xn_rx_mmu;
+	mcl->args[2] = 0;
+	mcl++;
+	(void)HYPERVISOR_multicall(xn_rx_mcl, mcl - xn_rx_mcl);
+    }
+
+
+    /* 
+     * Process all the mbufs after the remapping is complete.
+     * Break the mbuf chain first though.
+     */
+    for (m = head_mbuf; m; m = next) {
+	next = m->m_next;
+	m->m_next = NULL;
+
+	ifp->if_ipackets++;
+
+    	XN_UNLOCK(sc);
+
+	/* Pass it up. */
+	(*ifp->if_input)(ifp, m);
+    	XN_LOCK(sc);
+    }
+    
+    sc->xn_rx_resp_cons = i;
+
+    /* If we get a callback with very few responses, reduce fill target. */
+    /* NB. Note exponential increase, linear decrease. */
+    if (((sc->xn_rx_if->req_prod - sc->xn_rx_if->resp_prod) > 
+	    ((3*sc->xn_rx_target) / 4)) && (--sc->xn_rx_target < RX_MIN_TARGET))
+        sc->xn_rx_target = RX_MIN_TARGET;
+
+    xn_alloc_rx_buffers(sc);
+
+    sc->xn_rx_if->event = i + 1;
+}
+
+static void 
+xn_txeof(struct xn_softc *sc)
+{
+    NETIF_RING_IDX i, prod;
+    unsigned short id;
+    struct ifnet *ifp;
+    struct mbuf *m;
+
+    XN_LOCK_ASSERT(sc);
+
+    if (sc->xn_backend_state != BEST_CONNECTED)
+	return;
+
+    ifp = &sc->arpcom.ac_if;
+    ifp->if_timer = 0;
+
+    do {
+	prod = sc->xn_tx_if->resp_prod;
+
+	for (i = sc->xn_tx_resp_cons; i != prod; i++) {
+	    	id = sc->xn_tx_if->ring[MASK_NETIF_TX_IDX(i)].resp.id;
+	    	m = sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(id)]; 
+
+		KASSERT(m != NULL, ("mbuf not found in xn_tx_chain"));
+		M_ASSERTVALID(m);
+
+	    	m_freem(m);
+	    	sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(id)] = NULL;
+		ADD_ID_TO_FREELIST(sc->xn_tx_free_idxs, id);
+		sc->xn_txcnt--;
+	}
+	sc->xn_tx_resp_cons = prod;
+
+        /*
+         * Set a new event, then check for race with update of tx_cons. Note
+         * that it is essential to schedule a callback, no matter how few
+         * buffers are pending. Even if there is space in the transmit ring,
+         * higher layers may be blocked because too much data is outstanding:
+         * in such cases notification from Xen is likely to be the only kick
+         * that we'll get.
+         */
+	sc->xn_tx_if->event = 
+	    prod + ((sc->xn_tx_if->req_prod - prod) >> 1) + 1;
+
+	mb();
+
+    } while (prod != sc->xn_tx_if->resp_prod);
+}
+
+static void
+xn_intr(void *xsc)
+{
+    struct xn_softc *sc = xsc;
+    struct ifnet *ifp = &sc->arpcom.ac_if;
+
+    XN_LOCK(sc);
+
+    /* sometimes we seem to lose packets.  stay in the interrupt handler while
+     * there is stuff to process: continually recheck the response producer.
+     */
+    do {
+    	xn_txeof(sc);
+
+    	if (sc->xn_rx_resp_cons != sc->xn_rx_if->resp_prod &&
+		sc->xn_user_state == UST_OPEN)
+		xn_rxeof(sc);
+    
+    	if (ifp->if_flags & IFF_RUNNING && ifp->if_snd.ifq_head != NULL)
+		xn_start_locked(ifp);
+    } while (sc->xn_rx_resp_cons != sc->xn_rx_if->resp_prod &&
+		sc->xn_user_state == UST_OPEN);
+
+    XN_UNLOCK(sc);
+    return;
+}
+
+static void
+xn_tick_locked(struct xn_softc *sc) 
+{
+    XN_LOCK_ASSERT(sc);
+    callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
+
+    /* XXX placeholder for printing debug information */
+     
+}
+
+
+static void
+xn_tick(void *xsc) 
+{
+    struct xn_softc *sc;
+    
+    sc = xsc;
+    XN_LOCK(sc);
+    xn_tick_locked(sc);
+    XN_UNLOCK(sc);
+     
+}
+static void
+xn_start_locked(struct ifnet *ifp) 
+{
+    unsigned short id;
+    struct mbuf *m_head, *new_m;
+    struct xn_softc *sc = ifp->if_softc;
+    netif_tx_request_t *tx;
+    NETIF_RING_IDX i, start;
+
+    if (sc->xn_backend_state != BEST_CONNECTED)
+	return;
+
+    for (i = start = sc->xn_tx_if->req_prod; TRUE; i++, sc->xn_txcnt++) {
+
+    	IF_DEQUEUE(&ifp->if_snd, m_head);
+    	if (m_head == NULL) 
+	    break;
+
+	if (FREELIST_EMPTY(sc->xn_tx_free_idxs, NETIF_TX_RING_SIZE)) {
+	    IF_PREPEND(&ifp->if_snd, m_head);
+	    ifp->if_flags |= IFF_OACTIVE;
+	    break;
+	}
+
+	i = sc->xn_tx_if->req_prod;
+
+	id = GET_ID_FROM_FREELIST(sc->xn_tx_free_idxs);
+
+	/*
+	 * Start packing the mbufs in this chain into
+	 * the fragment pointers. Stop when we run out
+	 * of fragments or hit the end of the mbuf chain.
+	 */
+	new_m = makembuf(m_head);
+	tx = &(sc->xn_tx_if->ring[MASK_NETIF_TX_IDX(i)].req);
+	tx->id = id;
+	tx->size = new_m->m_pkthdr.len;
+	new_m->m_next = NULL;
+	new_m->m_nextpkt = NULL;
+
+	m_freem(m_head);
+	tx->addr = vtomach(mtod(new_m, vm_offset_t));
+		
+	sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(id)] = new_m;
+	BPF_MTAP(ifp, new_m);
+    }
+
+    sc->xn_tx_if->req_prod = i;
+    xn_txeof(sc);
+
+    /* Only notify Xen if we really have to. */
+    if (sc->xn_tx_if->TX_TEST_IDX == start)
+	notify_via_evtchn(sc->xn_evtchn);
+    return;
+}    
+
+static void
+xn_start(struct ifnet *ifp)
+{
+    struct xn_softc *sc;
+    sc = ifp->if_softc;
+    XN_LOCK(sc);
+    xn_start_locked(ifp);
+    XN_UNLOCK(sc);
+}
+
+
+
+/* equivalent of network_open() in Linux */
+static void 
+xn_ifinit_locked(struct xn_softc *sc) 
+{
+    struct ifnet *ifp;
+
+    XN_LOCK_ASSERT(sc);
+
+    ifp = &sc->arpcom.ac_if;
+    
+    if (ifp->if_flags & IFF_RUNNING) 
+	return;
+	
+    xn_stop(sc);
+
+    sc->xn_user_state = UST_OPEN;
+
+    xn_alloc_rx_buffers(sc);
+    sc->xn_rx_if->event = sc->xn_rx_resp_cons + 1;
+
+    ifp->if_flags |= IFF_RUNNING;
+    ifp->if_flags &= ~IFF_OACTIVE;
+
+    callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
+
+}
+
+
+static void 
+xn_ifinit(void *xsc)
+{
+    struct xn_softc *sc = xsc;
+    
+    XN_LOCK(sc);
+    xn_ifinit_locked(sc);
+    XN_UNLOCK(sc);
+
+}
+
+
+static int
+xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+    struct xn_softc *sc = ifp->if_softc;
+    struct ifreq *ifr = (struct ifreq *) data;
+    int mask, error = 0;
+    switch(cmd) {
+    case SIOCSIFMTU:
+	/* XXX can we alter the MTU on a VN ?*/
+#ifdef notyet
+	if (ifr->ifr_mtu > XN_JUMBO_MTU)
+	    error = EINVAL;
+	else 
+#endif
+	    {
+		ifp->if_mtu = ifr->ifr_mtu;
+		ifp->if_flags &= ~IFF_RUNNING;
+		xn_ifinit(sc);
+	    }
+	break;
+    case SIOCSIFFLAGS:
+	XN_LOCK(sc);
+	if (ifp->if_flags & IFF_UP) {
+	    /*
+	     * If only the state of the PROMISC flag changed,
+	     * then just use the 'set promisc mode' command
+	     * instead of reinitializing the entire NIC. Doing
+	     * a full re-init means reloading the firmware and
+	     * waiting for it to start up, which may take a
+	     * second or two.
+	     */
+#ifdef notyet
+	    /* No promiscuous mode with Xen */
+	    if (ifp->if_flags & IFF_RUNNING &&
+		ifp->if_flags & IFF_PROMISC &&
+		!(sc->xn_if_flags & IFF_PROMISC)) {
+		XN_SETBIT(sc, XN_RX_MODE,
+			  XN_RXMODE_RX_PROMISC);
+	    } else if (ifp->if_flags & IFF_RUNNING &&
+		       !(ifp->if_flags & IFF_PROMISC) &&
+		       sc->xn_if_flags & IFF_PROMISC) {
+		XN_CLRBIT(sc, XN_RX_MODE,
+			  XN_RXMODE_RX_PROMISC);
+	    } else
+#endif
+		xn_ifinit_locked(sc);
+	} else {
+	    if (ifp->if_flags & IFF_RUNNING) {
+		xn_stop(sc);
+	    }
+	}
+	sc->xn_if_flags = ifp->if_flags;
+	XN_UNLOCK(sc);
+	error = 0;
+	break;
+    case SIOCSIFCAP:
+	mask = ifr->ifr_reqcap ^ ifp->if_capenable;
+	if (mask & IFCAP_HWCSUM) {
+	    if (IFCAP_HWCSUM & ifp->if_capenable)
+		ifp->if_capenable &= ~IFCAP_HWCSUM;
+	    else
+		ifp->if_capenable |= IFCAP_HWCSUM;
+	}
+	error = 0;
+	break;
+    case SIOCADDMULTI:
+    case SIOCDELMULTI:
+#ifdef notyet
+	if (ifp->if_flags & IFF_RUNNING) {
+	    XN_LOCK(sc);
+	    xn_setmulti(sc);
+	    XN_UNLOCK(sc);
+	    error = 0;
+	}
+#endif
+	/* FALLTHROUGH */
+    case SIOCSIFMEDIA:
+    case SIOCGIFMEDIA:
+	error = EINVAL;
+	break;
+    default:
+	error = ether_ioctl(ifp, cmd, data);
+    }
+    
+    return (error);
+}
+
+static void
+xn_stop(struct xn_softc *sc)
+{	
+    struct ifnet *ifp;
+
+    XN_LOCK_ASSERT(sc);
+    
+    ifp = &sc->arpcom.ac_if;
+
+    callout_stop(&sc->xn_stat_ch);
+
+    xn_free_rx_ring(sc);
+    xn_free_tx_ring(sc);
+    
+    ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
+}
+
+/* START of Xenolinux helper functions adapted to FreeBSD */
+static void
+network_connect(struct xn_softc *sc, netif_fe_interface_status_t *status)
+{
+    struct ifnet *ifp;
+    int i, requeue_idx;
+    netif_tx_request_t *tx;
+
+    XN_LOCK(sc);
+
+    ifp = &sc->arpcom.ac_if;
+    /* first time through, setup the ifp info */
+    if (ifp->if_softc == NULL) {
+    	ifp->if_softc = sc;
+    	if_initname(ifp, "xn", sc->xn_ifno);
+    	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX;
+    	ifp->if_ioctl = xn_ioctl;
+    	ifp->if_output = ether_output;
+    	ifp->if_start = xn_start;
+#ifdef notyet
+    	ifp->if_watchdog = xn_watchdog;
+#endif
+    	ifp->if_init = xn_ifinit;
+    	ifp->if_mtu = ETHERMTU;
+    	ifp->if_snd.ifq_maxlen = NETIF_TX_RING_SIZE - 1;
+
+#ifdef notyet
+    	ifp->if_hwassist = XN_CSUM_FEATURES;
+    	ifp->if_capabilities = IFCAP_HWCSUM;
+    	ifp->if_capenable = ifp->if_capabilities;
+#endif    
+
+    	ether_ifattach(ifp, sc->arpcom.ac_enaddr);
+    	callout_init(&sc->xn_stat_ch, CALLOUT_MPSAFE);
+    }
+
+    /* Recovery procedure: */
+
+    /* Step 1: Reinitialise variables. */
+    sc->xn_rx_resp_cons = sc->xn_tx_resp_cons = 0;
+    sc->xn_rxbufcnt = sc->xn_txcnt = 0;
+    sc->xn_rx_if->event = sc->xn_tx_if->event = 1;
+
+    /* Step 2: Rebuild the RX and TX ring contents.
+     * NB. We could just free the queued TX packets now but we hope
+     * that sending them out might do some good.  We have to rebuild
+     * the RX ring because some of our pages are currently flipped out
+     * so we can't just free the RX skbs.
+     */
+
+    /* Rebuild the TX buffer freelist and the TX ring itself.
+     * NB. This reorders packets.  We could keep more private state
+     * to avoid this but maybe it doesn't matter so much given the
+     * interface has been down.
+     */
+    for ( requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++ )
+    {
+	    if (sc->xn_cdata.xn_tx_chain[i] != NULL)
+            {
+                struct mbuf *m = sc->xn_cdata.xn_tx_chain[i];
+                
+                tx = &sc->xn_tx_if->ring[requeue_idx++].req;
+                
+                tx->id   = i;
+		tx->addr = vtomach(mtod(m, vm_offset_t));
+		tx->size = m->m_pkthdr.len;
+		sc->xn_txcnt++;
+            }
+    }
+    wmb();
+    sc->xn_tx_if->req_prod = requeue_idx;
+
+    /* Rebuild the RX buffer freelist and the RX ring itself. */
+    for ( requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++ )
+	if (sc->xn_cdata.xn_rx_chain[i] != NULL) 
+            sc->xn_rx_if->ring[requeue_idx++].req.id = i;
+    wmb();                
+    sc->xn_rx_if->req_prod = requeue_idx;
+
+    printk("[XEN] Netfront recovered tx=%d rxfree=%d\n",
+       	   sc->xn_tx_if->req_prod,sc->xn_rx_if->req_prod);
+
+
+    /* Step 3: All public and private state should now be sane.  Get
+     * ready to start sending and receiving packets and give the driver
+     * domain a kick because we've probably just requeued some
+     * packets.
+     */
+    sc->xn_backend_state = BEST_CONNECTED;
+    wmb();
+    notify_via_evtchn(status->evtchn);  
+    xn_txeof(sc);
+
+    XN_UNLOCK(sc);
+}
+
+
+static void 
+vif_show(struct xn_softc *sc)
+{
+#if DEBUG
+    if (sc) {
+        IPRINTK("<vif handle=%u %s(%s) evtchn=%u irq=%u tx=%p rx=%p>\n",
+               sc->xn_ifno,
+               be_state_name[sc->xn_backend_state],
+               sc->xn_user_state ? "open" : "closed",
+               sc->xn_evtchn,
+               sc->xn_irq,
+               sc->xn_tx_if,
+               sc->xn_rx_if);
+    } else {
+        IPRINTK("<vif NULL>\n");
+    }
+#endif
+}
+
+/* Send a connect message to xend to tell it to bring up the interface. */
+static void 
+send_interface_connect(struct xn_softc *sc)
+{
+    ctrl_msg_t cmsg = {
+        .type    = CMSG_NETIF_FE,
+        .subtype = CMSG_NETIF_FE_INTERFACE_CONNECT,
+        .length  = sizeof(netif_fe_interface_connect_t),
+    };
+    netif_fe_interface_connect_t *msg = (void*)cmsg.msg;
+
+    vif_show(sc); 
+    msg->handle = sc->xn_ifno;
+    msg->tx_shmem_frame = (vtomach(sc->xn_tx_if) >> PAGE_SHIFT);
+    msg->rx_shmem_frame = (vtomach(sc->xn_rx_if) >> PAGE_SHIFT);
+        
+    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
+
+/* Send a driver status notification to the domain controller. */
+static int 
+send_driver_status(int ok)
+{
+    int err = 0;
+    ctrl_msg_t cmsg = {
+        .type    = CMSG_NETIF_FE,
+        .subtype = CMSG_NETIF_FE_DRIVER_STATUS,
+        .length  = sizeof(netif_fe_driver_status_t),
+    };
+    netif_fe_driver_status_t *msg = (void*)cmsg.msg;
+
+    msg->status = (ok ? NETIF_DRIVER_STATUS_UP : NETIF_DRIVER_STATUS_DOWN);
+    err = ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+    return err;
+}
+
+/* Stop network device and free tx/rx queues and irq.
+ */
+static void 
+vif_release(struct xn_softc *sc)
+{
+    /* Stop old i/f to prevent errors whilst we rebuild the state. */
+    XN_LOCK(sc);
+    /* sc->xn_backend_state = BEST_DISCONNECTED; */
+    XN_UNLOCK(sc);
+    
+    /* Free resources. */
+    if(sc->xn_tx_if != NULL) {
+        unbind_evtchn_from_irq(sc->xn_evtchn);
+	free(sc->xn_tx_if, M_DEVBUF);
+	free(sc->xn_rx_if, M_DEVBUF);
+        sc->xn_irq = 0;
+        sc->xn_evtchn = 0;
+        sc->xn_tx_if = NULL;
+        sc->xn_rx_if = NULL;
+    }
+}
+
+/* Release vif resources and close it down completely.
+ */
+static void 
+vif_close(struct xn_softc *sc)
+{
+    vif_show(sc);
+    WPRINTK("Unexpected netif-CLOSED message in state %s\n",
+            be_state_name[sc->xn_backend_state]);
+    vif_release(sc);
+    sc->xn_backend_state = BEST_CLOSED;
+    /* todo: take dev down and free. */
+    vif_show(sc);
+}
+
+/* Move the vif into disconnected state.
+ * Allocates tx/rx pages.
+ * Sends connect message to xend.
+ */
+static void 
+vif_disconnect(struct xn_softc *sc)
+{
+    if (sc->xn_tx_if) free(sc->xn_tx_if, M_DEVBUF);
+    if (sc->xn_rx_if) free(sc->xn_rx_if, M_DEVBUF);
+
+    // Before this sc->xn_tx_if and sc->xn_rx_if had better be null.
+    sc->xn_tx_if = (netif_tx_interface_t *)malloc(PAGE_SIZE,M_DEVBUF,M_WAITOK);
+    sc->xn_rx_if = (netif_rx_interface_t *)malloc(PAGE_SIZE,M_DEVBUF,M_WAITOK);
+    memset(sc->xn_tx_if, 0, PAGE_SIZE);
+    memset(sc->xn_rx_if, 0, PAGE_SIZE);
+    sc->xn_backend_state = BEST_DISCONNECTED;
+    send_interface_connect(sc);
+    vif_show(sc);
+}
+
+/* Begin interface recovery.
+ *
+ * NB. Whilst we're recovering, we turn the carrier state off.  We
+ * take measures to ensure that this device isn't used for
+ * anything.  We also stop the queue for this device.  Various
+ * different approaches (e.g. continuing to buffer packets) have
+ * been tested but don't appear to improve the overall impact on
+ * TCP connections.
+ *
+ * TODO: (MAW) Change the Xend<->Guest protocol so that a recovery
+ * is initiated by a special "RESET" message - disconnect could
+ * just mean we're not allowed to use this interface any more.
+ */
+static void 
+vif_reset(struct xn_softc *sc)
+{
+    IPRINTK("Attempting to reconnect network interface: handle=%u\n",
+            sc->xn_ifno);    
+    vif_release(sc);
+    vif_disconnect(sc);
+    vif_show(sc);
+}
+
+/* Move the vif into connected state.
+ * Sets the mac and event channel from the message.
+ * Binds the irq to the event channel.
+ */
+static void
+vif_connect(
+    struct xn_softc *sc, netif_fe_interface_status_t *status)
+{
+    memcpy(sc->arpcom.ac_enaddr, status->mac, ETHER_ADDR_LEN);
+    network_connect(sc, status);
+
+    sc->xn_evtchn = status->evtchn;
+    sc->xn_irq = bind_evtchn_to_irq(sc->xn_evtchn);
+
+    (void)intr_add_handler("xn", sc->xn_irq, (driver_intr_t *)xn_intr, sc,
+			   INTR_TYPE_NET | INTR_MPSAFE, &sc->xn_intrhand);
+    netctrl_connected_count();
+    /* vif_wake(dev); Not needed for FreeBSD */
+    vif_show(sc);
+}
+
+/** Create a network device.
+ * @param handle device handle
+ */
+static void 
+create_netdev(int handle, struct xn_softc **sc)
+{
+    int i;
+
+    *sc = (struct xn_softc *)malloc(sizeof(**sc), M_DEVBUF, M_WAITOK);
+    memset(*sc, 0, sizeof(struct xn_softc));
+
+    (*sc)->xn_backend_state = BEST_CLOSED;
+    (*sc)->xn_user_state    = UST_CLOSED;
+    (*sc)->xn_ifno 	 = handle;
+    
+    XN_LOCK_INIT(*sc, "xnetif");
+    (*sc)->xn_rx_target	= RX_MIN_TARGET;
+
+    /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
+    for ( i = 0; i <= NETIF_TX_RING_SIZE; i++ )
+        (*sc)->xn_tx_free_idxs[i] = (i+1);
+    for ( i = 0; i <= NETIF_RX_RING_SIZE; i++ )
+        (*sc)->xn_rx_free_idxs[i] = (i+1);
+
+    SLIST_INSERT_HEAD(&xn_dev_list, *sc, xn_links);
+}
+
+/* Get the target interface for a status message.
+ * Creates the interface when it makes sense.
+ * The returned interface may be null when there is no error.
+ *
+ * @param status status message
+ * @param sc return parameter for interface state
+ * @return 0 on success, error code otherwise
+ */
+static int 
+target_vif(netif_fe_interface_status_t *status, struct xn_softc **sc)
+{
+    int err = 0;
+
+    XENPRINTF("> handle=%d\n", status->handle);
+    if ( status->handle < 0 )
+    {
+        err = -EINVAL;
+        goto exit;
+    }
+
+    if ( (*sc = find_sc_by_handle(status->handle)) != NULL )
+        goto exit;
+
+    if ( status->status == NETIF_INTERFACE_STATUS_CLOSED )
+        goto exit;
+    if ( status->status == NETIF_INTERFACE_STATUS_CHANGED )
+        goto exit;
+
+    /* It's a new interface in a good state - create it. */
+    XENPRINTF("> create device...\n");
+    create_netdev(status->handle, sc);
+    netctrl.interface_n++;
+
+exit:
+    return err;
+}
+
+/* Handle an interface status message. */
+static void 
+netif_interface_status(netif_fe_interface_status_t *status)
+{
+    int err = 0;
+    struct xn_softc *sc = NULL;
+    
+    XENPRINTF("> status=%s handle=%d\n",
+            status_name[status->status], status->handle);
+
+    if ( (err = target_vif(status, &sc)) != 0 )
+    {
+        WPRINTK("Invalid netif: handle=%u\n", status->handle);
+        return;
+    }
+
+    if ( sc == NULL )
+    {
+        XENPRINTF("> no vif\n");
+        return;
+    }
+
+    vif_show(sc);
+
+    switch ( status->status )
+    {
+    case NETIF_INTERFACE_STATUS_CLOSED:
+        switch ( sc->xn_backend_state )
+        {
+        case BEST_CLOSED:
+        case BEST_DISCONNECTED:
+        case BEST_CONNECTED:
+            vif_close(sc);
+            break;
+        }
+        break;
+
+    case NETIF_INTERFACE_STATUS_DISCONNECTED:
+        switch ( sc->xn_backend_state )
+        {
+        case BEST_CLOSED:
+            vif_disconnect(sc);
+            break;
+        case BEST_DISCONNECTED:
+        case BEST_CONNECTED:
+            vif_reset(sc);
+            break;
+        }
+        break;
+
+    case NETIF_INTERFACE_STATUS_CONNECTED:
+        switch ( sc->xn_backend_state )
+        {
+        case BEST_CLOSED:
+            WPRINTK("Unexpected netif status %s in state %s\n",
+                    status_name[status->status],
+                    be_state_name[sc->xn_backend_state]);
+            vif_disconnect(sc);
+            vif_connect(sc, status);
+            break;
+        case BEST_DISCONNECTED:
+            vif_connect(sc, status);
+            break;
+        }
+        break;
+
+    case NETIF_INTERFACE_STATUS_CHANGED:
+        /*
+         * The domain controller is notifying us that a device has been
+         * added or removed.
+         */
+        break;
+
+    default:
+        WPRINTK("Invalid netif status code %d\n", status->status);
+        break;
+    }
+    vif_show(sc);
+}
+
+/*
+ * Initialize the network control interface. 
+ */
+static void 
+netif_driver_status(netif_fe_driver_status_t *status)
+{
+    XENPRINTF("> status=%d\n", status->status);
+    netctrl.up = status->status;
+    //netctrl.interface_n = status->max_handle;
+    //netctrl.connected_n = 0;
+    netctrl_connected_count();
+}
+
+/* Receive handler for control messages. */
+static void 
+netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+
+    switch ( msg->subtype )
+    {
+    case CMSG_NETIF_FE_INTERFACE_STATUS:
+        if ( msg->length != sizeof(netif_fe_interface_status_t) )
+            goto error;
+        netif_interface_status((netif_fe_interface_status_t *)
+                               &msg->msg[0]);
+        break;
+
+    case CMSG_NETIF_FE_DRIVER_STATUS:
+        if ( msg->length != sizeof(netif_fe_driver_status_t) )
+            goto error;
+        netif_driver_status((netif_fe_driver_status_t *)
+                            &msg->msg[0]);
+        break;
+
+    error:
+    default:
+        msg->length = 0;
+        break;
+    }
+
+    ctrl_if_send_response(msg);
+}
+
+#if 1
+/* Wait for all interfaces to be connected.
+ *
+ * This works OK, but we'd like to use the probing mode (see below).
+ */
+static int probe_interfaces(void)
+{
+    int err = 0, conn = 0;
+    int wait_i, wait_n = 100;
+
+    for ( wait_i = 0; wait_i < wait_n; wait_i++)
+    { 
+        XENPRINTF("> wait_i=%d\n", wait_i);
+        conn = netctrl_connected();
+        if(conn) break;
+	tsleep(&xn_dev_list, PWAIT | PCATCH, "netif", hz);
+    }
+
+    XENPRINTF("> wait finished...\n");
+    if ( conn <= 0 )
+    {
+        err = netctrl_err(-ENETDOWN);
+        WPRINTK("Failed to connect all virtual interfaces: err=%d\n", err);
+    }
+
+    XENPRINTF("< err=%d\n", err);
+
+    return err;
+}
+#else
+/* Probe for interfaces until no more are found.
+ *
+ * This is the mode we'd like to use, but at the moment it panics the kernel.
+*/
+static int 
+probe_interfaces(void)
+{
+    int err = 0;
+    int wait_i, wait_n = 100;
+    ctrl_msg_t cmsg = {
+        .type    = CMSG_NETIF_FE,
+        .subtype = CMSG_NETIF_FE_INTERFACE_STATUS,
+        .length  = sizeof(netif_fe_interface_status_t),
+    };
+    netif_fe_interface_status_t msg = {};
+    ctrl_msg_t rmsg = {};
+    netif_fe_interface_status_t *reply = (void*)rmsg.msg;
+    int state = TASK_UNINTERRUPTIBLE;
+    uint32_t query = -1;
+
+
+    netctrl.interface_n = 0;
+    for ( wait_i = 0; wait_i < wait_n; wait_i++ )
+    { 
+        XENPRINTF("> wait_i=%d query=%d\n", wait_i, query);
+        msg.handle = query;
+        memcpy(cmsg.msg, &msg, sizeof(msg));
+        XENPRINTF("> set_current_state...\n");
+        set_current_state(state);
+        XENPRINTF("> rmsg=%p msg=%p, reply=%p\n", &rmsg, rmsg.msg, reply);
+        XENPRINTF("> sending...\n");
+        err = ctrl_if_send_message_and_get_response(&cmsg, &rmsg, state);
+        XENPRINTF("> err=%d\n", err);
+        if(err) goto exit;
+        XENPRINTF("> rmsg=%p msg=%p, reply=%p\n", &rmsg, rmsg.msg, reply);
+        if((int)reply->handle < 0){
+            // No more interfaces.
+            break;
+        }
+        query = -reply->handle - 2;
+        XENPRINTF(">netif_interface_status ...\n");
+        netif_interface_status(reply);
+    }
+
+  exit:
+    if ( err )
+    {
+        err = netctrl_err(-ENETDOWN);
+        WPRINTK("Connecting virtual network interfaces failed: err=%d\n", err);
+    }
+
+    XENPRINTF("< err=%d\n", err);
+    return err;
+}
+
+#endif
+
+static void
+xn_init(void *unused)
+{
+    
+    int err = 0;
+
+    netctrl_init();
+    (void)ctrl_if_register_receiver(CMSG_NETIF_FE, netif_ctrlif_rx,
+				    CALLBACK_IN_BLOCKING_CONTEXT);
+
+    send_driver_status(1);
+    err = probe_interfaces();
+
+    if (err)
+	ctrl_if_unregister_receiver(CMSG_NETIF_FE, netif_ctrlif_rx);
+}
+
+SYSINIT(xndev, SI_SUB_PSEUDO, SI_ORDER_ANY, xn_init, NULL)
diff --git a/freebsd-5.3-xen-sparse/kern/kern_fork.c b/freebsd-5.3-xen-sparse/kern/kern_fork.c
new file mode 100644
index 0000000000..4b38ee45b6
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/kern/kern_fork.c
@@ -0,0 +1,846 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_fork.c	8.6 (Berkeley) 4/8/94
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/kern/kern_fork.c,v 1.234.2.4 2004/09/18 04:11:35 julian Exp $");
+
+#include "opt_ktrace.h"
+#include "opt_mac.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sysproto.h>
+#include <sys/eventhandler.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/sysctl.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/pioctl.h>
+#include <sys/resourcevar.h>
+#include <sys/sched.h>
+#include <sys/syscall.h>
+#include <sys/vmmeter.h>
+#include <sys/vnode.h>
+#include <sys/acct.h>
+#include <sys/mac.h>
+#include <sys/ktr.h>
+#include <sys/ktrace.h>
+#include <sys/unistd.h>	
+#include <sys/sx.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_extern.h>
+#include <vm/uma.h>
+
+#include <sys/user.h>
+#include <machine/critical.h>
+
+#ifndef _SYS_SYSPROTO_H_
+struct fork_args {
+	int     dummy;
+};
+#endif
+
+static int forksleep; /* Place for fork1() to sleep on. */
+
+/*
+ * MPSAFE
+ */
+/* ARGSUSED */
+int
+fork(td, uap)
+	struct thread *td;
+	struct fork_args *uap;
+{
+	int error;
+	struct proc *p2;
+
+	error = fork1(td, RFFDG | RFPROC, 0, &p2);
+	if (error == 0) {
+		td->td_retval[0] = p2->p_pid;
+		td->td_retval[1] = 0;
+	}
+	return (error);
+}
+
+/*
+ * MPSAFE
+ */
+/* ARGSUSED */
+int
+vfork(td, uap)
+	struct thread *td;
+	struct vfork_args *uap;
+{
+	int error;
+	struct proc *p2;
+
+	error = fork1(td, RFFDG | RFPROC /* | RFPPWAIT | RFMEM */, 0, &p2);
+	if (error == 0) {
+		td->td_retval[0] = p2->p_pid;
+		td->td_retval[1] = 0;
+	}
+	return (error);
+}
+
+/*
+ * MPSAFE
+ */
+int
+rfork(td, uap)
+	struct thread *td;
+	struct rfork_args *uap;
+{
+	struct proc *p2;
+	int error;
+
+	/* Don't allow kernel-only flags. */
+	if ((uap->flags & RFKERNELONLY) != 0)
+		return (EINVAL);
+
+	error = fork1(td, uap->flags, 0, &p2);
+	if (error == 0) {
+		td->td_retval[0] = p2 ? p2->p_pid : 0;
+		td->td_retval[1] = 0;
+	}
+	return (error);
+}
+
+int	nprocs = 1;		/* process 0 */
+int	lastpid = 0;
+SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0, 
+    "Last used PID");
+
+/*
+ * Random component to lastpid generation.  We mix in a random factor to make
+ * it a little harder to predict.  We sanity check the modulus value to avoid
+ * doing it in critical paths.  Don't let it be too small or we pointlessly
+ * waste randomness entropy, and don't let it be impossibly large.  Using a
+ * modulus that is too big causes a LOT more process table scans and slows
+ * down fork processing as the pidchecked caching is defeated.
+ */
+static int randompid = 0;
+
+static int
+sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
+{
+	int error, pid;
+
+	error = sysctl_wire_old_buffer(req, sizeof(int));
+	if (error != 0)
+		return(error);
+	sx_xlock(&allproc_lock);
+	pid = randompid;
+	error = sysctl_handle_int(oidp, &pid, 0, req);
+	if (error == 0 && req->newptr != NULL) {
+		if (pid < 0 || pid > PID_MAX - 100)	/* out of range */
+			pid = PID_MAX - 100;
+		else if (pid < 2)			/* NOP */
+			pid = 0;
+		else if (pid < 100)			/* Make it reasonable */
+			pid = 100;
+		randompid = pid;
+	}
+	sx_xunlock(&allproc_lock);
+	return (error);
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
+    0, 0, sysctl_kern_randompid, "I", "Random PID modulus");
+
+int
+fork1(td, flags, pages, procp)
+	struct thread *td;
+	int flags;
+	int pages;
+	struct proc **procp;
+{
+	struct proc *p1, *p2, *pptr;
+	uid_t uid;
+	struct proc *newproc;
+	int ok, trypid;
+	static int curfail, pidchecked = 0;
+	static struct timeval lastfail;
+	struct filedesc *fd;
+	struct filedesc_to_leader *fdtol;
+	struct thread *td2;
+	struct ksegrp *kg2;
+	struct sigacts *newsigacts;
+	int error;
+
+	/* Can't copy and clear. */
+	if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
+		return (EINVAL);
+
+	p1 = td->td_proc;
+
+	/*
+	 * Here we don't create a new process, but we divorce
+	 * certain parts of a process from itself.
+	 */
+	if ((flags & RFPROC) == 0) {
+		mtx_lock(&Giant);
+		vm_forkproc(td, NULL, NULL, flags);
+		mtx_unlock(&Giant);
+
+		/*
+		 * Close all file descriptors.
+		 */
+		if (flags & RFCFDG) {
+			struct filedesc *fdtmp;
+			FILEDESC_LOCK(td->td_proc->p_fd);
+			fdtmp = fdinit(td->td_proc->p_fd);
+			FILEDESC_UNLOCK(td->td_proc->p_fd);
+			fdfree(td);
+			p1->p_fd = fdtmp;
+		}
+
+		/*
+		 * Unshare file descriptors (from parent).
+		 */
+		if (flags & RFFDG) {
+			FILEDESC_LOCK(p1->p_fd);
+			if (p1->p_fd->fd_refcnt > 1) {
+				struct filedesc *newfd;
+
+				newfd = fdcopy(td->td_proc->p_fd);
+				FILEDESC_UNLOCK(p1->p_fd);
+				fdfree(td);
+				p1->p_fd = newfd;
+			} else
+				FILEDESC_UNLOCK(p1->p_fd);
+		}
+		*procp = NULL;
+		return (0);
+	}
+
+	/*
+	 * Note 1:1 allows for forking with one thread coming out on the
+	 * other side with the expectation that the process is about to
+	 * exec.
+	 */
+	if (p1->p_flag & P_HADTHREADS) {
+		/*
+		 * Idle the other threads for a second.
+		 * Since the user space is copied, it must remain stable.
+		 * In addition, all threads (from the user perspective)
+		 * need to either be suspended or in the kernel,
+		 * where they will try restart in the parent and will
+		 * be aborted in the child.
+		 */
+		PROC_LOCK(p1);
+		if (thread_single(SINGLE_NO_EXIT)) {
+			/* Abort. Someone else is single threading before us. */
+			PROC_UNLOCK(p1);
+			return (ERESTART);
+		}
+		PROC_UNLOCK(p1);
+		/*
+		 * All other activity in this process
+		 * is now suspended at the user boundary,
+		 * (or other safe places if we think of any).
+		 */
+	}
+
+	/* Allocate new proc. */
+	newproc = uma_zalloc(proc_zone, M_WAITOK);
+#ifdef MAC
+	mac_init_proc(newproc);
+#endif
+	knlist_init(&newproc->p_klist, &newproc->p_mtx);
+
+	/* We have to lock the process tree while we look for a pid. */
+	sx_slock(&proctree_lock);
+
+	/*
+	 * Although process entries are dynamically created, we still keep
+	 * a global limit on the maximum number we will create.  Don't allow
+	 * a nonprivileged user to use the last ten processes; don't let root
+	 * exceed the limit. The variable nprocs is the current number of
+	 * processes, maxproc is the limit.
+	 */
+	sx_xlock(&allproc_lock);
+	uid = td->td_ucred->cr_ruid;
+	if ((nprocs >= maxproc - 10 &&
+	    suser_cred(td->td_ucred, SUSER_RUID) != 0) ||
+	    nprocs >= maxproc) {
+		error = EAGAIN;
+		goto fail;
+	}
+
+	/*
+	 * Increment the count of procs running with this uid. Don't allow
+	 * a nonprivileged user to exceed their current limit.
+	 */
+	PROC_LOCK(p1);
+	ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1,
+		(uid != 0) ? lim_cur(p1, RLIMIT_NPROC) : 0);
+	PROC_UNLOCK(p1);
+	if (!ok) {
+		error = EAGAIN;
+		goto fail;
+	}
+
+	/*
+	 * Increment the nprocs resource before blocking can occur.  There
+	 * are hard-limits as to the number of processes that can run.
+	 */
+	nprocs++;
+
+	/*
+	 * Find an unused process ID.  We remember a range of unused IDs
+	 * ready to use (from lastpid+1 through pidchecked-1).
+	 *
+	 * If RFHIGHPID is set (used during system boot), do not allocate
+	 * low-numbered pids.
+	 */
+	trypid = lastpid + 1;
+	if (flags & RFHIGHPID) {
+		if (trypid < 10)
+			trypid = 10;
+	} else {
+		if (randompid)
+			trypid += arc4random() % randompid;
+	}
+retry:
+	/*
+	 * If the process ID prototype has wrapped around,
+	 * restart somewhat above 0, as the low-numbered procs
+	 * tend to include daemons that don't exit.
+	 */
+	if (trypid >= PID_MAX) {
+		trypid = trypid % PID_MAX;
+		if (trypid < 100)
+			trypid += 100;
+		pidchecked = 0;
+	}
+	if (trypid >= pidchecked) {
+		int doingzomb = 0;
+
+		pidchecked = PID_MAX;
+		/*
+		 * Scan the active and zombie procs to check whether this pid
+		 * is in use.  Remember the lowest pid that's greater
+		 * than trypid, so we can avoid checking for a while.
+		 */
+		p2 = LIST_FIRST(&allproc);
+again:
+		for (; p2 != NULL; p2 = LIST_NEXT(p2, p_list)) {
+			PROC_LOCK(p2);
+			while (p2->p_pid == trypid ||
+			    (p2->p_pgrp != NULL &&
+			    (p2->p_pgrp->pg_id == trypid ||
+			    (p2->p_session != NULL &&
+			    p2->p_session->s_sid == trypid)))) {
+				trypid++;
+				if (trypid >= pidchecked) {
+					PROC_UNLOCK(p2);
+					goto retry;
+				}
+			}
+			if (p2->p_pid > trypid && pidchecked > p2->p_pid)
+				pidchecked = p2->p_pid;
+			if (p2->p_pgrp != NULL) {
+				if (p2->p_pgrp->pg_id > trypid &&
+				    pidchecked > p2->p_pgrp->pg_id)
+					pidchecked = p2->p_pgrp->pg_id;
+				if (p2->p_session != NULL &&
+				    p2->p_session->s_sid > trypid &&
+				    pidchecked > p2->p_session->s_sid)
+					pidchecked = p2->p_session->s_sid;
+			}
+			PROC_UNLOCK(p2);
+		}
+		if (!doingzomb) {
+			doingzomb = 1;
+			p2 = LIST_FIRST(&zombproc);
+			goto again;
+		}
+	}
+	sx_sunlock(&proctree_lock);
+
+	/*
+	 * RFHIGHPID does not mess with the lastpid counter during boot.
+	 */
+	if (flags & RFHIGHPID)
+		pidchecked = 0;
+	else
+		lastpid = trypid;
+
+	p2 = newproc;
+	p2->p_state = PRS_NEW;		/* protect against others */
+	p2->p_pid = trypid;
+	LIST_INSERT_HEAD(&allproc, p2, p_list);
+	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
+	sx_xunlock(&allproc_lock);
+
+	/*
+	 * Malloc things while we don't hold any locks.
+	 */
+	if (flags & RFSIGSHARE)
+		newsigacts = NULL;
+	else
+		newsigacts = sigacts_alloc();
+
+	/*
+	 * Copy filedesc.
+	 */
+	if (flags & RFCFDG) {
+		FILEDESC_LOCK(td->td_proc->p_fd);
+		fd = fdinit(td->td_proc->p_fd);
+		FILEDESC_UNLOCK(td->td_proc->p_fd);
+		fdtol = NULL;
+	} else if (flags & RFFDG) {
+		FILEDESC_LOCK(p1->p_fd);
+		fd = fdcopy(td->td_proc->p_fd);
+		FILEDESC_UNLOCK(p1->p_fd);
+		fdtol = NULL;
+	} else {
+		fd = fdshare(p1->p_fd);
+		if (p1->p_fdtol == NULL)
+			p1->p_fdtol =
+				filedesc_to_leader_alloc(NULL,
+							 NULL,
+							 p1->p_leader);
+		if ((flags & RFTHREAD) != 0) {
+			/*
+			 * Shared file descriptor table and
+			 * shared process leaders.
+			 */
+			fdtol = p1->p_fdtol;
+			FILEDESC_LOCK(p1->p_fd);
+			fdtol->fdl_refcount++;
+			FILEDESC_UNLOCK(p1->p_fd);
+		} else {
+			/* 
+			 * Shared file descriptor table, and
+			 * different process leaders 
+			 */
+			fdtol = filedesc_to_leader_alloc(p1->p_fdtol,
+							 p1->p_fd,
+							 p2);
+		}
+	}
+	/*
+	 * Make a proc table entry for the new process.
+	 * Start by zeroing the section of proc that is zero-initialized,
+	 * then copy the section that is copied directly from the parent.
+	 */
+	td2 = FIRST_THREAD_IN_PROC(p2);
+	kg2 = FIRST_KSEGRP_IN_PROC(p2);
+
+	/* Allocate and switch to an alternate kstack if specified. */
+	if (pages != 0)
+		vm_thread_new_altkstack(td2, pages);
+
+	PROC_LOCK(p2);
+	PROC_LOCK(p1);
+
+#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
+
+	bzero(&p2->p_startzero,
+	    (unsigned) RANGEOF(struct proc, p_startzero, p_endzero));
+	bzero(&td2->td_startzero,
+	    (unsigned) RANGEOF(struct thread, td_startzero, td_endzero));
+	bzero(&kg2->kg_startzero,
+	    (unsigned) RANGEOF(struct ksegrp, kg_startzero, kg_endzero));
+
+	bcopy(&p1->p_startcopy, &p2->p_startcopy,
+	    (unsigned) RANGEOF(struct proc, p_startcopy, p_endcopy));
+	bcopy(&td->td_startcopy, &td2->td_startcopy,
+	    (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy));
+	bcopy(&td->td_ksegrp->kg_startcopy, &kg2->kg_startcopy,
+	    (unsigned) RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy));
+#undef RANGEOF
+
+	td2->td_sigstk = td->td_sigstk;
+
+	/*
+	 * Duplicate sub-structures as needed.
+	 * Increase reference counts on shared objects.
+	 * The p_stats substruct is set in vm_forkproc.
+	 */
+	p2->p_flag = 0;
+	if (p1->p_flag & P_PROFIL)
+		startprofclock(p2);
+	mtx_lock_spin(&sched_lock);
+	p2->p_sflag = PS_INMEM;
+	/*
+	 * Allow the scheduler to adjust the priority of the child and
+	 * parent while we hold the sched_lock.
+	 */
+	sched_fork(td, td2);
+
+	mtx_unlock_spin(&sched_lock);
+	p2->p_ucred = crhold(td->td_ucred);
+	td2->td_ucred = crhold(p2->p_ucred);	/* XXXKSE */
+
+	pargs_hold(p2->p_args);
+
+	if (flags & RFSIGSHARE) {
+		p2->p_sigacts = sigacts_hold(p1->p_sigacts);
+	} else {
+		sigacts_copy(newsigacts, p1->p_sigacts);
+		p2->p_sigacts = newsigacts;
+	}
+	if (flags & RFLINUXTHPN) 
+	        p2->p_sigparent = SIGUSR1;
+	else
+	        p2->p_sigparent = SIGCHLD;
+
+	p2->p_textvp = p1->p_textvp;
+	p2->p_fd = fd;
+	p2->p_fdtol = fdtol;
+
+	/*
+	 * p_limit is copy-on-write.  Bump its refcount.
+	 */
+	p2->p_limit = lim_hold(p1->p_limit);
+	PROC_UNLOCK(p1);
+	PROC_UNLOCK(p2);
+
+	/* Bump references to the text vnode (for procfs) */
+	if (p2->p_textvp)
+		vref(p2->p_textvp);
+
+	/*
+	 * Set up linkage for kernel based threading.
+	 */
+	if ((flags & RFTHREAD) != 0) {
+		mtx_lock(&ppeers_lock);
+		p2->p_peers = p1->p_peers;
+		p1->p_peers = p2;
+		p2->p_leader = p1->p_leader;
+		mtx_unlock(&ppeers_lock);
+		PROC_LOCK(p1->p_leader);
+		if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
+			PROC_UNLOCK(p1->p_leader);
+			/*
+			 * The task leader is exiting, so process p1 is
+			 * going to be killed shortly.  Since p1 obviously
+			 * isn't dead yet, we know that the leader is either
+			 * sending SIGKILL's to all the processes in this
+			 * task or is sleeping waiting for all the peers to
+			 * exit.  We let p1 complete the fork, but we need
+			 * to go ahead and kill the new process p2 since
+			 * the task leader may not get a chance to send
+			 * SIGKILL to it.  We leave it on the list so that
+			 * the task leader will wait for this new process
+			 * to commit suicide.
+			 */
+			PROC_LOCK(p2);
+			psignal(p2, SIGKILL);
+			PROC_UNLOCK(p2);
+		} else
+			PROC_UNLOCK(p1->p_leader);
+	} else {
+		p2->p_peers = NULL;
+		p2->p_leader = p2;
+	}
+
+	sx_xlock(&proctree_lock);
+	PGRP_LOCK(p1->p_pgrp);
+	PROC_LOCK(p2);
+	PROC_LOCK(p1);
+
+	/*
+	 * Preserve some more flags in subprocess.  P_PROFIL has already
+	 * been preserved.
+	 */
+	p2->p_flag |= p1->p_flag & P_SUGID;
+	td2->td_pflags |= td->td_pflags & TDP_ALTSTACK;
+	SESS_LOCK(p1->p_session);
+	if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
+		p2->p_flag |= P_CONTROLT;
+	SESS_UNLOCK(p1->p_session);
+	if (flags & RFPPWAIT)
+		p2->p_flag |= P_PPWAIT;
+
+	p2->p_pgrp = p1->p_pgrp;
+	LIST_INSERT_AFTER(p1, p2, p_pglist);
+	PGRP_UNLOCK(p1->p_pgrp);
+	LIST_INIT(&p2->p_children);
+
+	callout_init(&p2->p_itcallout, CALLOUT_MPSAFE);
+
+#ifdef KTRACE
+	/*
+	 * Copy traceflag and tracefile if enabled.
+	 */
+	mtx_lock(&ktrace_mtx);
+	KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode"));
+	if (p1->p_traceflag & KTRFAC_INHERIT) {
+		p2->p_traceflag = p1->p_traceflag;
+		if ((p2->p_tracevp = p1->p_tracevp) != NULL) {
+			VREF(p2->p_tracevp);
+			KASSERT(p1->p_tracecred != NULL,
+			    ("ktrace vnode with no cred"));
+			p2->p_tracecred = crhold(p1->p_tracecred);
+		}
+	}
+	mtx_unlock(&ktrace_mtx);
+#endif
+
+	/*
+	 * If PF_FORK is set, the child process inherits the
+	 * procfs ioctl flags from its parent.
+	 */
+	if (p1->p_pfsflags & PF_FORK) {
+		p2->p_stops = p1->p_stops;
+		p2->p_pfsflags = p1->p_pfsflags;
+	}
+
+	/*
+	 * This begins the section where we must prevent the parent
+	 * from being swapped.
+	 */
+	_PHOLD(p1);
+	PROC_UNLOCK(p1);
+
+	/*
+	 * Attach the new process to its parent.
+	 *
+	 * If RFNOWAIT is set, the newly created process becomes a child
+	 * of init.  This effectively disassociates the child from the
+	 * parent.
+	 */
+	if (flags & RFNOWAIT)
+		pptr = initproc;
+	else
+		pptr = p1;
+	p2->p_pptr = pptr;
+	LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
+	sx_xunlock(&proctree_lock);
+
+	/* Inform accounting that we have forked. */
+	p2->p_acflag = AFORK;
+	PROC_UNLOCK(p2);
+
+	/*
+	 * Finish creating the child process.  It will return via a different
+	 * execution path later.  (ie: directly into user mode)
+	 */
+	mtx_lock(&Giant);
+	vm_forkproc(td, p2, td2, flags);
+
+	if (flags == (RFFDG | RFPROC)) {
+		cnt.v_forks++;
+		cnt.v_forkpages += p2->p_vmspace->vm_dsize +
+		    p2->p_vmspace->vm_ssize;
+	} else if (flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) {
+		cnt.v_vforks++;
+		cnt.v_vforkpages += p2->p_vmspace->vm_dsize +
+		    p2->p_vmspace->vm_ssize;
+	} else if (p1 == &proc0) {
+		cnt.v_kthreads++;
+		cnt.v_kthreadpages += p2->p_vmspace->vm_dsize +
+		    p2->p_vmspace->vm_ssize;
+	} else {
+		cnt.v_rforks++;
+		cnt.v_rforkpages += p2->p_vmspace->vm_dsize +
+		    p2->p_vmspace->vm_ssize;
+	}
+	mtx_unlock(&Giant);
+
+	/*
+	 * Both processes are set up, now check if any loadable modules want
+	 * to adjust anything.
+	 *   What if they have an error? XXX
+	 */
+	EVENTHANDLER_INVOKE(process_fork, p1, p2, flags);
+
+	/*
+	 * Set the child start time and mark the process as being complete.
+	 */
+	microuptime(&p2->p_stats->p_start);
+	mtx_lock_spin(&sched_lock);
+	p2->p_state = PRS_NORMAL;
+
+	/*
+	 * If RFSTOPPED not requested, make child runnable and add to
+	 * run queue.
+	 */
+	if ((flags & RFSTOPPED) == 0) {
+		TD_SET_CAN_RUN(td2);
+		setrunqueue(td2, SRQ_BORING);
+	}
+	mtx_unlock_spin(&sched_lock);
+
+	/*
+	 * Now can be swapped.
+	 */
+	PROC_LOCK(p1);
+	_PRELE(p1);
+
+	/*
+	 * Tell any interested parties about the new process.
+	 */
+	KNOTE_LOCKED(&p1->p_klist, NOTE_FORK | p2->p_pid);
+
+	PROC_UNLOCK(p1);
+
+	/*
+	 * Preserve synchronization semantics of vfork.  If waiting for
+	 * child to exec or exit, set P_PPWAIT on child, and sleep on our
+	 * proc (in case of exit).
+	 */
+	PROC_LOCK(p2);
+	while (p2->p_flag & P_PPWAIT)
+		msleep(p1, &p2->p_mtx, PWAIT, "ppwait", 0);
+	PROC_UNLOCK(p2);
+
+	/*
+	 * If other threads are waiting, let them continue now.
+	 */
+	if (p1->p_flag & P_HADTHREADS) {
+		PROC_LOCK(p1);
+		thread_single_end();
+		PROC_UNLOCK(p1);
+	}
+
+	/*
+	 * Return child proc pointer to parent.
+	 */
+	*procp = p2;
+	return (0);
+fail:
+	sx_sunlock(&proctree_lock);
+	if (ppsratecheck(&lastfail, &curfail, 1))
+		printf("maxproc limit exceeded by uid %i, please see tuning(7) and login.conf(5).\n",
+			uid);
+	sx_xunlock(&allproc_lock);
+#ifdef MAC
+	mac_destroy_proc(newproc);
+#endif
+	uma_zfree(proc_zone, newproc);
+	if (p1->p_flag & P_HADTHREADS) {
+		PROC_LOCK(p1);
+		thread_single_end();
+		PROC_UNLOCK(p1);
+	}
+	tsleep(&forksleep, PUSER, "fork", hz / 2);
+	return (error);
+}
+
+/*
+ * Handle the return of a child process from fork1().  This function
+ * is called from the MD fork_trampoline() entry point.
+ */
+void
+fork_exit(callout, arg, frame)
+	void (*callout)(void *, struct trapframe *);
+	void *arg;
+	struct trapframe *frame;
+{
+	struct proc *p;
+	struct thread *td;
+
+	/*
+	 * Finish setting up thread glue so that it begins execution in a
+	 * non-nested critical section with sched_lock held but not recursed.
+	 */
+	td = curthread;
+	p = td->td_proc;
+	td->td_oncpu = PCPU_GET(cpuid);
+	KASSERT(p->p_state == PRS_NORMAL, ("executing process is still new"));
+
+	sched_lock.mtx_lock = (uintptr_t)td;
+	mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
+	cpu_critical_fork_exit();
+	CTR4(KTR_PROC, "fork_exit: new thread %p (kse %p, pid %d, %s)",
+		td, td->td_sched, p->p_pid, p->p_comm);
+
+	/*
+	 * Processes normally resume in mi_switch() after being
+	 * cpu_switch()'ed to, but when children start up they arrive here
+	 * instead, so we must do much the same things as mi_switch() would.
+	 */
+
+	if ((td = PCPU_GET(deadthread))) {
+		PCPU_SET(deadthread, NULL);
+		thread_stash(td);
+	}
+	td = curthread;
+	mtx_unlock_spin(&sched_lock);
+
+	/*
+	 * cpu_set_fork_handler intercepts this function call to
+	 * have this call a non-return function to stay in kernel mode.
+	 * initproc has its own fork handler, but it does return.
+	 */
+	KASSERT(callout != NULL, ("NULL callout in fork_exit"));
+	callout(arg, frame);
+
+	/*
+	 * Check if a kernel thread misbehaved and returned from its main
+	 * function.
+	 */
+	PROC_LOCK(p);
+	if (p->p_flag & P_KTHREAD) {
+		PROC_UNLOCK(p);
+		printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n",
+		    p->p_comm, p->p_pid);
+		kthread_exit(0);
+	}
+	PROC_UNLOCK(p);
+	mtx_assert(&Giant, MA_NOTOWNED);
+}
+
+/*
+ * Simplified back end of syscall(), used when returning from fork()
+ * directly into user mode.  Giant is not held on entry, and must not
+ * be held on return.  This function is passed in to fork_exit() as the
+ * first parameter and is called when returning to a new userland process.
+ */
+void
+fork_return(td, frame)
+	struct thread *td;
+	struct trapframe *frame;
+{
+
+	userret(td, frame, 0);
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_SYSRET))
+		ktrsysret(SYS_fork, 0, 0);
+#endif
+	mtx_assert(&Giant, MA_NOTOWNED);
+}
diff --git a/freebsd-5.3-xen-sparse/mkbuildtree b/freebsd-5.3-xen-sparse/mkbuildtree
new file mode 100644
index 0000000000..ce4c91d431
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/mkbuildtree
@@ -0,0 +1,119 @@
+#!/bin/bash
+
+# mkbuildtree <build tree>
+#
+# Creates symbolic links in <build tree> for the sparse tree
+# in the current directory.
+
+# Script to determine the relative path between two directories.
+# Copyright (c) D. J. Hawkey Jr. 2002
+# Fixed for Xen project by K. Fraser in 2003.  
+abs_to_rel ()
+{
+	local CWD SRCPATH
+                
+	if [ "$1" != "/" -a "${1##*[^/]}" = "/" ]; then
+		SRCPATH=${1%?}
+	else
+		SRCPATH=$1
+	fi
+	if [ "$2" != "/" -a "${2##*[^/]}" = "/" ]; then
+		DESTPATH=${2%?}
+	else
+		DESTPATH=$2
+	fi
+
+	CWD=$PWD
+	[ "${1%%[^/]*}" != "/" ] && cd $1 && SRCPATH=$PWD
+	[ "${2%%[^/]*}" != "/" ] && cd $2 && DESTPATH=$PWD
+	[ "$CWD" != "$PWD" ] && cd $CWD
+
+	BASEPATH=$SRCPATH
+
+	[ "$SRCPATH" = "$DESTPATH" ] && DESTPATH="." && return
+	[ "$SRCPATH" = "/" ] && DESTPATH=${DESTPATH#?} && return
+
+	while [ "$BASEPATH/" != "${DESTPATH%${DESTPATH#$BASEPATH/}}" ]; do
+          BASEPATH=${BASEPATH%/*}
+	done
+
+	SRCPATH=${SRCPATH#$BASEPATH}
+        DESTPATH=${DESTPATH#$BASEPATH}
+        DESTPATH=${DESTPATH#?}
+	while [ -n "$SRCPATH" ]; do
+		SRCPATH=${SRCPATH%/*}
+		DESTPATH="../$DESTPATH"
+	done
+
+	[ -z "$BASEPATH" ] && BASEPATH="/"
+	[ "${DESTPATH##*[^/]}" = "/" ] && DESTPATH=${DESTPATH%?}
+}
+
+# relative_lndir <target_dir>
+# Creates a tree of symlinks in the current working directory that mirror
+# real files in <target_dir>. <target_dir> should be relative to the current
+# working directory. Symlinks in <target_dir> are ignored. Source-control files
+# are ignored.
+relative_lndir ()
+{
+  local SYMLINK_DIR REAL_DIR pref i j
+  SYMLINK_DIR=$PWD
+  REAL_DIR=$1
+  (
+  cd $REAL_DIR
+  for i in `find . -type d | grep -v SCCS`; do
+    [ -d $SYMLINK_DIR/$i ] || mkdir -p $SYMLINK_DIR/$i
+    (
+    cd $i
+    pref=`echo $i | sed -e 's#/[^/]*#../#g' -e 's#^\.##'`
+    for j in `find . -type f -o -type l -maxdepth 1`; do
+      ln -sf ${pref}${REAL_DIR}/$i/$j ${SYMLINK_DIR}/$i/$j
+    done
+    )
+  done
+  )
+}
+
+[ "$1" == "" ] && { echo "Syntax: $0 <linux tree to xenify>"; exit 1; }
+
+# Get absolute path to the destination directory
+pushd . >/dev/null
+cd ${1}
+AD=$PWD
+popd >/dev/null
+  
+# Get absolute path to the source directory
+AS=`pwd`
+
+# Get name of sparse directory
+SDN=$(basename $AS)
+
+# Get path to source, relative to destination
+abs_to_rel ${AD} ${AS}
+RS=$DESTPATH
+
+# Remove old copies of files and directories at the destination
+for i in `find sys -type f -o -type l` ; do rm -f ${AD}/${i#./} ; done
+
+# We now work from the destination directory
+cd ${AD}
+
+# Remove old symlinks
+find sys -type l | while read f
+do
+  case $(readlink $f) in
+  */$SDN/*)
+    rm -f $f
+    ;;
+  esac
+done
+
+if [ -f ${AD}/BUILDING ]; then
+  # Create symlinks of files and directories which exist in the sparse source
+  (cd sys && relative_lndir ../${RS}/sys)
+else
+  # Create symlinks of files and directories which exist in the sparse source
+  relative_lndir ${RS}
+  rm -f mkbuildtree
+fi
+
diff --git a/freebsd-5.3-xen-sparse/xenfbsd_kernel_build b/freebsd-5.3-xen-sparse/xenfbsd_kernel_build
new file mode 100644
index 0000000000..dc2c927c06
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/xenfbsd_kernel_build
@@ -0,0 +1,7 @@
+#!/bin/csh -f
+cd i386-xen/conf
+config XENCONF
+cd ../compile/XENCONF
+make kernel-clean
+ln -s ../../include/xen-public/io/ring.h
+make kernel-depend; make -j4 kernel
author	iap10@freefall.cl.cam.ac.uk <iap10@freefall.cl.cam.ac.uk>	2005-03-21 07:58:08 +0000
committer	iap10@freefall.cl.cam.ac.uk <iap10@freefall.cl.cam.ac.uk>	2005-03-21 07:58:08 +0000
commit	d73b5730fbb7f3d0fd7fcd9a9b6e36d71d33ade0 (patch)
tree	bc7051351d4d09c13c29b247ee34c3f22ec28a3f /freebsd-5.3-xen-sparse
parent	a280a68e6317b8d274296935eee67d12788beeb4 (diff)
download	xen-d73b5730fbb7f3d0fd7fcd9a9b6e36d71d33ade0.tar.gz xen-d73b5730fbb7f3d0fd7fcd9a9b6e36d71d33ade0.tar.bz2 xen-d73b5730fbb7f3d0fd7fcd9a9b6e36d71d33ade0.zip