aboutsummaryrefslogtreecommitdiffstats
path: root/freebsd-5.3-xen-sparse
diff options
context:
space:
mode:
authoriap10@freefall.cl.cam.ac.uk <iap10@freefall.cl.cam.ac.uk>2005-03-21 07:58:08 +0000
committeriap10@freefall.cl.cam.ac.uk <iap10@freefall.cl.cam.ac.uk>2005-03-21 07:58:08 +0000
commitd73b5730fbb7f3d0fd7fcd9a9b6e36d71d33ade0 (patch)
treebc7051351d4d09c13c29b247ee34c3f22ec28a3f /freebsd-5.3-xen-sparse
parenta280a68e6317b8d274296935eee67d12788beeb4 (diff)
downloadxen-d73b5730fbb7f3d0fd7fcd9a9b6e36d71d33ade0.tar.gz
xen-d73b5730fbb7f3d0fd7fcd9a9b6e36d71d33ade0.tar.bz2
xen-d73b5730fbb7f3d0fd7fcd9a9b6e36d71d33ade0.zip
bitkeeper revision 1.1159.272.3 (423e7e90uxPqdRoA4EvOUikif-yhXA)
Check-in of the sparse tree for FreeBSD 5.3 (version 050317) This currently supports running as a domU. - to create freebsd-5.3-xenU run fbsdxensetup from anywhere in the tree - once created go to freebsd-5.3-xenU on a FreeBSD 5.3 machine, run xenfbsd_kernel_build - you'll find kernel and kernel.debug under i386-xen/compile/XENCONF See http://www.fsmware.com/xenofreebsd/5.3/xenbsdsetup.txt Thanks to NetApp for their contributions in support of the FreeBSD port to Xen . Signed-off-by: Kip Macy <kip.macy@gmail.com Signed-off-by: ian.pratt@cl.cam.ac.uk
Diffstat (limited to 'freebsd-5.3-xen-sparse')
-rw-r--r--freebsd-5.3-xen-sparse/conf/Makefile.i386-xen51
-rw-r--r--freebsd-5.3-xen-sparse/conf/files.i386-xen294
-rw-r--r--freebsd-5.3-xen-sparse/conf/ldscript.i386-xen134
-rw-r--r--freebsd-5.3-xen-sparse/conf/options.i386-xen162
-rw-r--r--freebsd-5.3-xen-sparse/fbsdxensetup39
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/Makefile40
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/compile/.cvsignore1
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC273
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC.hints93
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/conf/Makefile3
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/conf/NOTES1115
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/conf/OLDCARD17
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/conf/PAE99
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/conf/XENCONF137
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/conf/gethints.awk116
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c511
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/critical.c46
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/ctrl_if.c476
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/db_interface.c209
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c580
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/exception.s428
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/genassym.c234
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/hypervisor.c107
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/i686_mem.c626
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/initcpu.c889
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/intr_machdep.c326
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/io_apic.c850
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/local_apic.c762
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/locore.s949
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c2396
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_clock.c150
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c1315
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/mptable.c974
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c3381
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/support.s1553
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/swtch.s445
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/symbols.raw75
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/sys_machdep.c703
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/trap.c1006
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/vm_machdep.c618
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_bus.c238
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c687
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/cpufunc.h601
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/ctrl_if.h120
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/evtchn.h92
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/frame.h129
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/hypervisor-ifs.h36
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h355
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/md_var.h108
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/multicall.h98
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/param.h146
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/pcb.h96
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h173
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/pmap.h355
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/segments.h260
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/synch_bitops.h82
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/trap.h111
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/ucontext.h105
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/vmparam.h141
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/xen-os.h293
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/xen_intr.h50
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h85
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/xenpmap.h132
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/xenvar.h30
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/xen/blkfront/xb_blkfront.c925
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/xen/char/console.c536
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/xen/misc/evtchn_dev.c410
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/xen/misc/npx.c1109
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c1436
-rw-r--r--freebsd-5.3-xen-sparse/kern/kern_fork.c846
-rw-r--r--freebsd-5.3-xen-sparse/mkbuildtree119
-rw-r--r--freebsd-5.3-xen-sparse/xenfbsd_kernel_build7
72 files changed, 32094 insertions, 0 deletions
diff --git a/freebsd-5.3-xen-sparse/conf/Makefile.i386-xen b/freebsd-5.3-xen-sparse/conf/Makefile.i386-xen
new file mode 100644
index 0000000000..80e1cdd35c
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/conf/Makefile.i386-xen
@@ -0,0 +1,51 @@
+# Makefile.i386 -- with config changes.
+# Copyright 1990 W. Jolitz
+# from: @(#)Makefile.i386 7.1 5/10/91
+# $FreeBSD: src/sys/conf/Makefile.i386,v 1.259 2003/04/15 21:29:11 phk Exp $
+#
+# Makefile for FreeBSD
+#
+# This makefile is constructed from a machine description:
+# config machineid
+# Most changes should be made in the machine description
+# /sys/i386/conf/``machineid''
+# after which you should do
+# config machineid
+# Generic makefile changes should be made in
+# /sys/conf/Makefile.i386
+# after which config should be rerun for all machines.
+#
+
+# Which version of config(8) is required.
+%VERSREQ= 500013
+
+STD8X16FONT?= iso
+
+
+
+.if !defined(S)
+.if exists(./@/.)
+S= ./@
+.else
+S= ../../..
+.endif
+.endif
+.include "$S/conf/kern.pre.mk"
+M= i386-xen
+MKMODULESENV+= MACHINE=i386-xen
+INCLUDES+= -I../../include/xen-public
+%BEFORE_DEPEND
+
+%OBJS
+
+%FILES.c
+
+%FILES.s
+
+%FILES.m
+
+%CLEAN
+
+%RULES
+
+.include "$S/conf/kern.post.mk"
diff --git a/freebsd-5.3-xen-sparse/conf/files.i386-xen b/freebsd-5.3-xen-sparse/conf/files.i386-xen
new file mode 100644
index 0000000000..189378d469
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/conf/files.i386-xen
@@ -0,0 +1,294 @@
+# This file tells config what files go into building a kernel,
+# files marked standard are always included.
+#
+# $FreeBSD: src/sys/conf/files.i386,v 1.457 2003/12/03 23:06:30 imp Exp $
+#
+# The long compile-with and dependency lines are required because of
+# limitations in config: backslash-newline doesn't work in strings, and
+# dependency lines other than the first are silently ignored.
+#
+linux_genassym.o optional compat_linux \
+ dependency "$S/i386/linux/linux_genassym.c" \
+ compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \
+ no-obj no-implicit-rule \
+ clean "linux_genassym.o"
+#
+linux_assym.h optional compat_linux \
+ dependency "$S/kern/genassym.sh linux_genassym.o" \
+ compile-with "sh $S/kern/genassym.sh linux_genassym.o > ${.TARGET}" \
+ no-obj no-implicit-rule before-depend \
+ clean "linux_assym.h"
+#
+svr4_genassym.o optional compat_svr4 \
+ dependency "$S/i386/svr4/svr4_genassym.c" \
+ compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \
+ no-obj no-implicit-rule \
+ clean "svr4_genassym.o"
+#
+svr4_assym.h optional compat_svr4 \
+ dependency "$S/kern/genassym.sh svr4_genassym.o" \
+ compile-with "sh $S/kern/genassym.sh svr4_genassym.o > ${.TARGET}" \
+ no-obj no-implicit-rule before-depend \
+ clean "svr4_assym.h"
+#
+font.h optional sc_dflt_font \
+ compile-with "uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'static u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'static u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'static u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h" \
+ no-obj no-implicit-rule before-depend \
+ clean "font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8"
+#
+atkbdmap.h optional atkbd_dflt_keymap \
+ compile-with "/usr/sbin/kbdcontrol -L ${ATKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > atkbdmap.h" \
+ no-obj no-implicit-rule before-depend \
+ clean "atkbdmap.h"
+#
+ukbdmap.h optional ukbd_dflt_keymap \
+ compile-with "/usr/sbin/kbdcontrol -L ${UKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > ukbdmap.h" \
+ no-obj no-implicit-rule before-depend \
+ clean "ukbdmap.h"
+#
+msysosak.o optional fla \
+ dependency "$S/contrib/dev/fla/i386/msysosak.o.uu" \
+ compile-with "uudecode < $S/contrib/dev/fla/i386/msysosak.o.uu" \
+ no-implicit-rule
+#
+trlld.o optional oltr \
+ dependency "$S/contrib/dev/oltr/i386-elf.trlld.o.uu" \
+ compile-with "uudecode < $S/contrib/dev/oltr/i386-elf.trlld.o.uu" \
+ no-implicit-rule
+#
+hal.o optional ath_hal \
+ dependency "$S/contrib/dev/ath/freebsd/i386-elf.hal.o.uu" \
+ compile-with "uudecode < $S/contrib/dev/ath/freebsd/i386-elf.hal.o.uu" \
+ no-implicit-rule
+#
+#
+compat/linux/linux_file.c optional compat_linux
+compat/linux/linux_getcwd.c optional compat_linux
+compat/linux/linux_ioctl.c optional compat_linux
+compat/linux/linux_ipc.c optional compat_linux
+compat/linux/linux_mib.c optional compat_linux
+compat/linux/linux_misc.c optional compat_linux
+compat/linux/linux_signal.c optional compat_linux
+compat/linux/linux_socket.c optional compat_linux
+compat/linux/linux_stats.c optional compat_linux
+compat/linux/linux_sysctl.c optional compat_linux
+compat/linux/linux_uid16.c optional compat_linux
+compat/linux/linux_util.c optional compat_linux
+compat/pecoff/imgact_pecoff.c optional pecoff_support
+compat/svr4/imgact_svr4.c optional compat_svr4
+compat/svr4/svr4_fcntl.c optional compat_svr4
+compat/svr4/svr4_filio.c optional compat_svr4
+compat/svr4/svr4_ioctl.c optional compat_svr4
+compat/svr4/svr4_ipc.c optional compat_svr4
+compat/svr4/svr4_misc.c optional compat_svr4
+compat/svr4/svr4_resource.c optional compat_svr4
+compat/svr4/svr4_signal.c optional compat_svr4
+compat/svr4/svr4_socket.c optional compat_svr4
+compat/svr4/svr4_sockio.c optional compat_svr4
+compat/svr4/svr4_stat.c optional compat_svr4
+compat/svr4/svr4_stream.c optional compat_svr4
+compat/svr4/svr4_syscallnames.c optional compat_svr4
+compat/svr4/svr4_sysent.c optional compat_svr4
+compat/svr4/svr4_sysvec.c optional compat_svr4
+compat/svr4/svr4_termios.c optional compat_svr4
+compat/svr4/svr4_ttold.c optional compat_svr4
+contrib/dev/fla/fla.c optional fla
+contrib/dev/oltr/if_oltr.c optional oltr
+contrib/dev/oltr/trlldbm.c optional oltr
+contrib/dev/oltr/trlldhm.c optional oltr
+contrib/dev/oltr/trlldmac.c optional oltr
+bf_enc.o optional ipsec ipsec_esp \
+ dependency "$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S" \
+ compile-with "${CC} -c -I$S/crypto/blowfish/arch/i386 ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}" \
+ no-implicit-rule
+crypto/des/arch/i386/des_enc.S optional ipsec ipsec_esp
+crypto/des/des_ecb.c optional netsmbcrypto
+crypto/des/arch/i386/des_enc.S optional netsmbcrypto
+crypto/des/des_setkey.c optional netsmbcrypto
+bf_enc.o optional crypto \
+ dependency "$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S" \
+ compile-with "${CC} -c -I$S/crypto/blowfish/arch/i386 ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}" \
+ no-implicit-rule
+crypto/des/arch/i386/des_enc.S optional crypto
+crypto/des/des_ecb.c optional crypto
+crypto/des/des_setkey.c optional crypto
+dev/ar/if_ar.c optional ar
+dev/ar/if_ar_pci.c optional ar pci
+dev/cx/csigma.c optional cx
+dev/cx/cxddk.c optional cx
+dev/cx/if_cx.c optional cx
+dev/dgb/dgb.c count dgb
+dev/fb/fb.c optional fb
+dev/fb/fb.c optional vga
+dev/fb/splash.c optional splash
+dev/fb/vga.c optional vga
+dev/kbd/atkbd.c optional atkbd
+dev/kbd/atkbdc.c optional atkbdc
+dev/kbd/kbd.c optional atkbd
+dev/kbd/kbd.c optional kbd
+dev/kbd/kbd.c optional sc
+dev/kbd/kbd.c optional ukbd
+dev/kbd/kbd.c optional vt
+dev/mem/memutil.c standard
+dev/random/nehemiah.c standard
+dev/ppc/ppc.c optional ppc
+dev/ppc/ppc_puc.c optional ppc puc pci
+dev/sio/sio.c optional sio
+dev/sio/sio_isa.c optional sio isa
+dev/syscons/schistory.c optional sc
+dev/syscons/scmouse.c optional sc
+dev/syscons/scterm.c optional sc
+dev/syscons/scterm-dumb.c optional sc
+dev/syscons/scterm-sc.c optional sc
+dev/syscons/scvesactl.c optional sc vga vesa
+dev/syscons/scvgarndr.c optional sc vga
+dev/syscons/scvidctl.c optional sc
+dev/syscons/scvtb.c optional sc
+dev/syscons/syscons.c optional sc
+dev/syscons/sysmouse.c optional sc
+dev/uart/uart_cpu_i386.c optional uart
+geom/geom_bsd.c standard
+geom/geom_bsd_enc.c standard
+geom/geom_mbr.c standard
+geom/geom_mbr_enc.c standard
+i386/acpica/OsdEnvironment.c optional acpi
+i386/acpica/acpi_machdep.c optional acpi
+i386/acpica/acpi_wakeup.c optional acpi
+acpi_wakecode.h optional acpi \
+ dependency "$S/i386/acpica/acpi_wakecode.S" \
+ compile-with "${MAKE} -f $S/i386/acpica/Makefile MAKESRCPATH=$S/i386/acpica" \
+ no-obj no-implicit-rule before-depend \
+ clean "acpi_wakecode.h acpi_wakecode.o acpi_wakecode.bin"
+#
+i386/acpica/madt.c optional acpi apic
+i386/bios/mca_machdep.c optional mca
+i386/bios/smapi.c optional smapi
+i386/bios/smapi_bios.S optional smapi
+i386/bios/smbios.c optional smbios
+i386/bios/vpd.c optional vpd
+i386/i386/apic_vector.s optional apic
+i386/i386/atomic.c standard \
+ compile-with "${CC} -c ${CFLAGS} ${DEFINED_PROF:S/^$/-fomit-frame-pointer/} ${.IMPSRC}"
+i386/i386/autoconf.c standard
+i386/i386/busdma_machdep.c standard
+i386-xen/i386-xen/critical.c standard
+i386/i386/db_disasm.c optional ddb
+i386-xen/i386-xen/db_interface.c optional ddb
+i386/i386/db_trace.c optional ddb
+i386/i386/i386-gdbstub.c optional ddb
+i386/i386/dump_machdep.c standard
+i386/i386/elf_machdep.c standard
+i386-xen/i386-xen/exception.s standard
+i386-xen/i386-xen/i686_mem.c standard
+i386/i386/identcpu.c standard
+i386/i386/in_cksum.c optional inet
+i386-xen/i386-xen/initcpu.c standard
+i386-xen/i386-xen/intr_machdep.c standard
+i386-xen/i386-xen/io_apic.c optional apic
+i386/i386/legacy.c standard
+i386-xen/i386-xen/locore.s standard no-obj
+i386-xen/i386-xen/machdep.c standard
+i386/i386/mem.c standard
+i386-xen/i386-xen/mp_clock.c optional smp
+i386-xen/i386-xen/mp_machdep.c optional smp
+i386/i386/mpboot.s optional smp
+i386-xen/i386-xen/mptable.c optional apic
+i386-xen/i386-xen/local_apic.c optional apic
+i386/i386/mptable_pci.c optional apic pci
+i386/i386/nexus.c standard
+i386/i386/uio_machdep.c standard
+i386/i386/perfmon.c optional perfmon
+i386/i386/perfmon.c optional perfmon profiling-routine
+i386-xen/i386-xen/pmap.c standard
+i386-xen/i386-xen/support.s standard
+i386-xen/i386-xen/swtch.s standard
+i386-xen/i386-xen/sys_machdep.c standard
+i386-xen/i386-xen/trap.c standard
+i386/i386/tsc.c standard
+i386-xen/i386-xen/vm_machdep.c standard
+i386-xen/i386-xen/clock.c standard
+
+# xen specific arch-dep files
+i386-xen/i386-xen/hypervisor.c standard
+i386-xen/i386-xen/xen_machdep.c standard
+i386-xen/i386-xen/xen_bus.c standard
+i386-xen/i386-xen/evtchn.c standard
+i386-xen/i386-xen/ctrl_if.c standard
+
+
+i386/isa/asc.c count asc
+i386/isa/ctx.c optional ctx
+i386/isa/cy.c count cy
+i386/isa/elink.c optional ep
+i386/isa/elink.c optional ie
+i386/isa/gpib.c optional gp
+i386/isa/gsc.c count gsc
+i386/isa/istallion.c optional stli nowerror
+i386/isa/loran.c optional loran
+i386/isa/mse.c optional mse
+i386/isa/nmi.c standard
+
+# drivers
+i386-xen/xen/misc/npx.c optional npx
+i386-xen/xen/misc/evtchn_dev.c standard
+i386-xen/xen/char/console.c standard
+i386-xen/xen/netfront/xn_netfront.c standard
+i386-xen/xen/blkfront/xb_blkfront.c standard
+
+
+
+i386/isa/pcf.c optional pcf
+i386/isa/pcvt/pcvt_drv.c optional vt
+i386/isa/pcvt/pcvt_ext.c optional vt
+i386/isa/pcvt/pcvt_kbd.c optional vt
+i386/isa/pcvt/pcvt_out.c optional vt
+i386/isa/pcvt/pcvt_sup.c optional vt
+i386/isa/pcvt/pcvt_vtf.c optional vt
+i386/isa/pmtimer.c optional pmtimer
+i386/isa/prof_machdep.c optional profiling-routine
+i386/isa/spic.c optional spic
+i386/isa/spigot.c count spigot
+i386/isa/spkr.c optional speaker
+i386/isa/stallion.c optional stl nowerror
+i386/isa/vesa.c optional vga vesa
+i386/isa/wt.c count wt
+i386/linux/imgact_linux.c optional compat_linux
+i386/linux/linux_dummy.c optional compat_linux
+i386/linux/linux_locore.s optional compat_linux \
+ dependency "linux_assym.h"
+i386/linux/linux_machdep.c optional compat_linux
+i386/linux/linux_ptrace.c optional compat_linux
+i386/linux/linux_sysent.c optional compat_linux
+i386/linux/linux_sysvec.c optional compat_linux
+i386/pci/pci_cfgreg.c optional pci
+i386/pci/pci_bus.c optional pci
+i386/svr4/svr4_locore.s optional compat_svr4 \
+ dependency "svr4_assym.h" \
+ warning "COMPAT_SVR4 is broken and should be avoided"
+i386/svr4/svr4_machdep.c optional compat_svr4
+isa/atkbd_isa.c optional atkbd
+isa/atkbdc_isa.c optional atkbdc
+isa/fd.c optional fdc
+isa/psm.c optional psm
+isa/syscons_isa.c optional sc
+isa/vga_isa.c optional vga
+kern/imgact_aout.c optional compat_aout
+kern/imgact_gzip.c optional gzip
+libkern/divdi3.c standard
+libkern/moddi3.c standard
+libkern/qdivrem.c standard
+libkern/ucmpdi2.c standard
+libkern/udivdi3.c standard
+libkern/umoddi3.c standard
+libkern/flsl.c standard
+libkern/ffsl.c standard
+
+pci/cy_pci.c optional cy pci
+pci/agp_intel.c optional agp
+pci/agp_via.c optional agp
+pci/agp_sis.c optional agp
+pci/agp_ali.c optional agp
+pci/agp_amd.c optional agp
+pci/agp_i810.c optional agp
+pci/agp_nvidia.c optional agp
+
diff --git a/freebsd-5.3-xen-sparse/conf/ldscript.i386-xen b/freebsd-5.3-xen-sparse/conf/ldscript.i386-xen
new file mode 100644
index 0000000000..65cbc852da
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/conf/ldscript.i386-xen
@@ -0,0 +1,134 @@
+/* $FreeBSD: src/sys/conf/ldscript.i386,v 1.9 2003/12/03 07:40:03 phk Exp $ */
+OUTPUT_FORMAT("elf32-i386-freebsd", "elf32-i386-freebsd", "elf32-i386-freebsd")
+OUTPUT_ARCH(i386)
+ENTRY(btext)
+SEARCH_DIR(/usr/lib);
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ . = kernbase + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .hash : { *(.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rel.text :
+ { *(.rel.text) *(.rel.gnu.linkonce.t*) }
+ .rela.text :
+ { *(.rela.text) *(.rela.gnu.linkonce.t*) }
+ .rel.data :
+ { *(.rel.data) *(.rel.gnu.linkonce.d*) }
+ .rela.data :
+ { *(.rela.data) *(.rela.gnu.linkonce.d*) }
+ .rel.rodata :
+ { *(.rel.rodata) *(.rel.gnu.linkonce.r*) }
+ .rela.rodata :
+ { *(.rela.rodata) *(.rela.gnu.linkonce.r*) }
+ .rel.got : { *(.rel.got) }
+ .rela.got : { *(.rela.got) }
+ .rel.ctors : { *(.rel.ctors) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rel.dtors : { *(.rel.dtors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rel.init : { *(.rel.init) }
+ .rela.init : { *(.rela.init) }
+ .rel.fini : { *(.rel.fini) }
+ .rela.fini : { *(.rela.fini) }
+ .rel.bss : { *(.rel.bss) }
+ .rela.bss : { *(.rela.bss) }
+ .rel.plt : { *(.rel.plt) }
+ .rela.plt : { *(.rela.plt) }
+ .init : { *(.init) } =0x9090
+ .plt : { *(.plt) }
+ .text :
+ {
+ *(.text)
+ *(.stub)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ *(.gnu.linkonce.t*)
+ } =0x9090
+ _etext = .;
+ PROVIDE (etext = .);
+ .fini : { *(.fini) } =0x9090
+ .rodata : { *(.rodata) *(.gnu.linkonce.r*) }
+ .rodata1 : { *(.rodata1) }
+ /* Adjust the address for the data segment. We want to adjust up to
+ the same address within the page on the next page up. */
+ . = ALIGN(0x1000) + (. & (0x1000 - 1)) ;
+ .data :
+ {
+ *(.data)
+ *(.gnu.linkonce.d*)
+ CONSTRUCTORS
+ }
+ .data1 : { *(.data1) }
+ . = ALIGN(32 / 8);
+ _start_ctors = .;
+ PROVIDE (start_ctors = .);
+ .ctors :
+ {
+ *(.ctors)
+ }
+ _stop_ctors = .;
+ PROVIDE (stop_ctors = .);
+ .dtors :
+ {
+ *(.dtors)
+ }
+ .got : { *(.got.plt) *(.got) }
+ .dynamic : { *(.dynamic) }
+ /* We want the small data sections together, so single-instruction offsets
+ can access them all, and initialized data all before uninitialized, so
+ we can shorten the on-disk segment size. */
+ .sdata : { *(.sdata) }
+ _edata = .;
+ PROVIDE (edata = .);
+ __bss_start = .;
+ .sbss : { *(.sbss) *(.scommon) }
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss)
+ *(COMMON)
+ }
+ . = ALIGN(32 / 8);
+ _end = . ;
+ PROVIDE (end = .);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* These must appear regardless of . */
+}
diff --git a/freebsd-5.3-xen-sparse/conf/options.i386-xen b/freebsd-5.3-xen-sparse/conf/options.i386-xen
new file mode 100644
index 0000000000..6bbc509087
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/conf/options.i386-xen
@@ -0,0 +1,162 @@
+# $FreeBSD: src/sys/conf/options.i386,v 1.204 2003/12/03 23:06:30 imp Exp $
+# Options specific to the i386 platform kernels
+
+AUTO_EOI_1 opt_auto_eoi.h
+AUTO_EOI_2 opt_auto_eoi.h
+BROKEN_KEYBOARD_RESET opt_reset.h
+COMPAT_OLDISA
+I586_PMC_GUPROF opt_i586_guprof.h
+MAXMEM
+MPTABLE_FORCE_HTT
+NO_MIXED_MODE
+PERFMON
+DISABLE_PSE opt_pmap.h
+DISABLE_PG_G opt_pmap.h
+PMAP_SHPGPERPROC opt_pmap.h
+PPC_PROBE_CHIPSET opt_ppc.h
+PPC_DEBUG opt_ppc.h
+POWERFAIL_NMI opt_trap.h
+MP_WATCHDOG opt_mp_watchdog.h
+
+
+
+# Options for emulators. These should only be used at config time, so
+# they are handled like options for static filesystems
+# (see src/sys/conf/options), except for broken debugging options.
+COMPAT_AOUT opt_dontuse.h
+IBCS2 opt_dontuse.h
+COMPAT_LINUX opt_dontuse.h
+COMPAT_SVR4 opt_dontuse.h
+DEBUG_SVR4 opt_svr4.h
+PECOFF_SUPPORT opt_dontuse.h
+PECOFF_DEBUG opt_pecoff.h
+
+# Change KVM size. Changes things all over the kernel.
+KVA_PAGES opt_global.h
+XEN opt_global.h
+XENDEV opt_xen.h
+NOXENDEBUG opt_xen.h
+# Physical address extensions and support for >4G ram. As above.
+PAE opt_global.h
+
+CLK_CALIBRATION_LOOP opt_clock.h
+CLK_USE_I8254_CALIBRATION opt_clock.h
+CLK_USE_TSC_CALIBRATION opt_clock.h
+TIMER_FREQ opt_clock.h
+
+CPU_ATHLON_SSE_HACK opt_cpu.h
+CPU_BLUELIGHTNING_3X opt_cpu.h
+CPU_BLUELIGHTNING_FPU_OP_CACHE opt_cpu.h
+CPU_BTB_EN opt_cpu.h
+CPU_CYRIX_NO_LOCK opt_cpu.h
+CPU_DIRECT_MAPPED_CACHE opt_cpu.h
+CPU_DISABLE_5X86_LSSER opt_cpu.h
+CPU_DISABLE_CMPXCHG opt_global.h # XXX global, unlike other CPU_*
+CPU_DISABLE_SSE opt_cpu.h
+CPU_ELAN opt_cpu.h
+CPU_ELAN_XTAL opt_cpu.h
+CPU_ELAN_PPS opt_cpu.h
+CPU_ENABLE_SSE opt_cpu.h
+CPU_FASTER_5X86_FPU opt_cpu.h
+CPU_GEODE opt_cpu.h
+CPU_I486_ON_386 opt_cpu.h
+CPU_IORT opt_cpu.h
+CPU_L2_LATENCY opt_cpu.h
+CPU_LOOP_EN opt_cpu.h
+CPU_PPRO2CELERON opt_cpu.h
+CPU_RSTK_EN opt_cpu.h
+CPU_SOEKRIS opt_cpu.h
+CPU_SUSP_HLT opt_cpu.h
+CPU_UPGRADE_HW_CACHE opt_cpu.h
+CPU_WT_ALLOC opt_cpu.h
+CYRIX_CACHE_REALLY_WORKS opt_cpu.h
+CYRIX_CACHE_WORKS opt_cpu.h
+NO_F00F_HACK opt_cpu.h
+NO_MEMORY_HOLE opt_cpu.h
+
+# The CPU type affects the endian conversion functions all over the kernel.
+I386_CPU opt_global.h
+I486_CPU opt_global.h
+I586_CPU opt_global.h
+I686_CPU opt_global.h
+
+VGA_ALT_SEQACCESS opt_vga.h
+VGA_DEBUG opt_vga.h
+VGA_NO_FONT_LOADING opt_vga.h
+VGA_NO_MODE_CHANGE opt_vga.h
+VGA_SLOW_IOACCESS opt_vga.h
+VGA_WIDTH90 opt_vga.h
+
+VESA
+VESA_DEBUG opt_vesa.h
+
+PSM_HOOKRESUME opt_psm.h
+PSM_RESETAFTERSUSPEND opt_psm.h
+PSM_DEBUG opt_psm.h
+
+ATKBD_DFLT_KEYMAP opt_atkbd.h
+
+# pcvt(4) has a bunch of options
+FAT_CURSOR opt_pcvt.h
+XSERVER opt_pcvt.h
+PCVT_24LINESDEF opt_pcvt.h
+PCVT_CTRL_ALT_DEL opt_pcvt.h
+PCVT_META_ESC opt_pcvt.h
+PCVT_NSCREENS opt_pcvt.h
+PCVT_PRETTYSCRNS opt_pcvt.h
+PCVT_SCANSET opt_pcvt.h
+PCVT_SCREENSAVER opt_pcvt.h
+PCVT_USEKBDSEC opt_pcvt.h
+PCVT_VT220KEYB opt_pcvt.h
+PCVT_GREENSAVER opt_pcvt.h
+
+# Video spigot
+SPIGOT_UNSECURE opt_spigot.h
+
+# Enables NETGRAPH support for Cronyx adapters
+NETGRAPH_CRONYX opt_ng_cronyx.h
+
+# -------------------------------
+# isdn4bsd: passive ISA cards
+# -------------------------------
+TEL_S0_8 opt_i4b.h
+TEL_S0_16 opt_i4b.h
+TEL_S0_16_3 opt_i4b.h
+AVM_A1 opt_i4b.h
+USR_STI opt_i4b.h
+ITKIX1 opt_i4b.h
+ELSA_PCC16 opt_i4b.h
+# -------------------------------
+# isdn4bsd: passive ISA PnP cards
+# -------------------------------
+CRTX_S0_P opt_i4b.h
+DRN_NGO opt_i4b.h
+TEL_S0_16_3_P opt_i4b.h
+SEDLBAUER opt_i4b.h
+DYNALINK opt_i4b.h
+ASUSCOM_IPAC opt_i4b.h
+ELSA_QS1ISA opt_i4b.h
+SIEMENS_ISURF2 opt_i4b.h
+EICON_DIVA opt_i4b.h
+COMPAQ_M610 opt_i4b.h
+# -------------------------------
+# isdn4bsd: passive PCI cards
+# -------------------------------
+ELSA_QS1PCI opt_i4b.h
+# -------------------------------
+# isdn4bsd: misc options
+# -------------------------------
+# temporary workaround for SMP machines
+I4B_SMP_WORKAROUND opt_i4b.h
+# enable VJ compression code for ipr i/f
+IPR_VJ opt_i4b.h
+IPR_LOG opt_i4b.h
+
+# Device options
+DEV_ACPI opt_acpi.h
+DEV_APIC opt_apic.h
+DEV_NPX opt_npx.h
+
+# -------------------------------
+# EOF
+# -------------------------------
diff --git a/freebsd-5.3-xen-sparse/fbsdxensetup b/freebsd-5.3-xen-sparse/fbsdxensetup
new file mode 100644
index 0000000000..3d024c370e
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/fbsdxensetup
@@ -0,0 +1,39 @@
+#!/bin/csh -f
+
+setenv XENROOT `bk root`
+rm -rf $XENROOT/fbsdtmp $XENROOT/freebsd-5.3-xenU
+mkdir -p $XENROOT/fbsdtmp
+cd $XENROOT/fbsdtmp
+echo "step 1"
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.aa
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ab
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ac
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ad
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ae
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.af
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ag
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ah
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ai
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.aj
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ak
+wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.al
+mkdir -p foo
+cat ssys.?? | tar --unlink -xpzf - -C foo/
+mkdir -p $XENROOT/freebsd-5.3-xenU
+mv foo/sys/* $XENROOT/freebsd-5.3-xenU
+cd $XENROOT
+rm -rf $XENROOT/fbsdtmp
+echo "step 2"
+mkdir -p $XENROOT/freebsd-5.3-xenU/i386-xen/include
+cd $XENROOT/freebsd-5.3-xenU/i386-xen/include/
+foreach file (../../i386/include/*)
+ ln -s $file
+end
+echo "step 3"
+cd $XENROOT/freebsd-5.3-xen-sparse
+echo "step 4"
+./mkbuildtree ../freebsd-5.3-xenU
+echo "step 5"
+cd $XENROOT/freebsd-5.3-xenU/i386-xen/include
+ln -s $XENROOT/xen/include/public xen-public
+echo "done"
diff --git a/freebsd-5.3-xen-sparse/i386-xen/Makefile b/freebsd-5.3-xen-sparse/i386-xen/Makefile
new file mode 100644
index 0000000000..f33c7a5af6
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/Makefile
@@ -0,0 +1,40 @@
+# $FreeBSD: src/sys/i386/Makefile,v 1.11 2002/06/21 06:18:02 mckusick Exp $
+# @(#)Makefile 8.1 (Berkeley) 6/11/93
+
+# Makefile for i386 links, tags file
+
+# SYS is normally set in Make.tags.inc
+# SYS=/sys
+SYS=/nsys
+
+TAGDIR= i386
+
+.include "../kern/Make.tags.inc"
+
+all:
+ @echo "make links or tags only"
+
+# Directories in which to place i386 tags links
+DI386= apm i386 ibcs2 include isa linux
+
+links::
+ -for i in ${COMMDIR1}; do \
+ (cd $$i && { rm -f tags; ln -s ../${TAGDIR}/tags tags; }) done
+ -for i in ${COMMDIR2}; do \
+ (cd $$i && { rm -f tags; ln -s ../../${TAGDIR}/tags tags; }) done
+ -for i in ${DI386}; do \
+ (cd $$i && { rm -f tags; ln -s ../tags tags; }) done
+
+SI386= ${SYS}/i386/apm/*.[ch] \
+ ${SYS}/i386/i386/*.[ch] ${SYS}/i386/ibcs2/*.[ch] \
+ ${SYS}/i386/include/*.[ch] ${SYS}/i386/isa/*.[ch] \
+ ${SYS}/i386/linux/*.[ch]
+AI386= ${SYS}/i386/i386/*.s
+
+tags::
+ -ctags -wdt ${COMM} ${SI386}
+ egrep "^ENTRY(.*)|^ALTENTRY(.*)" ${AI386} | \
+ sed "s;\([^:]*\):\([^(]*\)(\([^, )]*\)\(.*\);\3 \1 /^\2(\3\4$$/;" \
+ >> tags
+ sort -o tags tags
+ chmod 444 tags
diff --git a/freebsd-5.3-xen-sparse/i386-xen/compile/.cvsignore b/freebsd-5.3-xen-sparse/i386-xen/compile/.cvsignore
new file mode 100644
index 0000000000..232298edb1
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/compile/.cvsignore
@@ -0,0 +1 @@
+[A-Za-z0-9]*
diff --git a/freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC b/freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC
new file mode 100644
index 0000000000..6a70639bda
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC
@@ -0,0 +1,273 @@
+#
+# GENERIC -- Generic kernel configuration file for FreeBSD/i386
+#
+# For more information on this file, please read the handbook section on
+# Kernel Configuration Files:
+#
+# http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html
+#
+# The handbook is also available locally in /usr/share/doc/handbook
+# if you've installed the doc distribution, otherwise always see the
+# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the
+# latest information.
+#
+# An exhaustive list of options and more detailed explanations of the
+# device lines is also present in the ../../conf/NOTES and NOTES files.
+# If you are in doubt as to the purpose or necessity of a line, check first
+# in NOTES.
+#
+# $FreeBSD: src/sys/i386/conf/GENERIC,v 1.394.2.3 2004/01/26 19:42:11 nectar Exp $
+
+machine i386
+cpu I486_CPU
+cpu I586_CPU
+cpu I686_CPU
+ident GENERIC
+
+#To statically compile in device wiring instead of /boot/device.hints
+#hints "GENERIC.hints" #Default places to look for devices.
+
+#makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols
+
+options SCHED_4BSD #4BSD scheduler
+options INET #InterNETworking
+options INET6 #IPv6 communications protocols
+options FFS #Berkeley Fast Filesystem
+options SOFTUPDATES #Enable FFS soft updates support
+options UFS_ACL #Support for access control lists
+options UFS_DIRHASH #Improve performance on big directories
+options MD_ROOT #MD is a potential root device
+options NFSCLIENT #Network Filesystem Client
+options NFSSERVER #Network Filesystem Server
+options NFS_ROOT #NFS usable as /, requires NFSCLIENT
+options MSDOSFS #MSDOS Filesystem
+options CD9660 #ISO 9660 Filesystem
+options PROCFS #Process filesystem (requires PSEUDOFS)
+options PSEUDOFS #Pseudo-filesystem framework
+options COMPAT_43 #Compatible with BSD 4.3 [KEEP THIS!]
+options COMPAT_FREEBSD4 #Compatible with FreeBSD4
+options SCSI_DELAY=15000 #Delay (in ms) before probing SCSI
+options KTRACE #ktrace(1) support
+options SYSVSHM #SYSV-style shared memory
+options SYSVMSG #SYSV-style message queues
+options SYSVSEM #SYSV-style semaphores
+options _KPOSIX_PRIORITY_SCHEDULING #Posix P1003_1B real-time extensions
+options KBD_INSTALL_CDEV # install a CDEV entry in /dev
+options AHC_REG_PRETTY_PRINT # Print register bitfields in debug
+ # output. Adds ~128k to driver.
+options AHD_REG_PRETTY_PRINT # Print register bitfields in debug
+ # output. Adds ~215k to driver.
+options PFIL_HOOKS # pfil(9) framework
+
+# Debugging for use in -current
+#options DDB #Enable the kernel debugger
+#options INVARIANTS #Enable calls of extra sanity checking
+options INVARIANT_SUPPORT #Extra sanity checks of internal structures, required by INVARIANTS
+#options WITNESS #Enable checks to detect deadlocks and cycles
+#options WITNESS_SKIPSPIN #Don't run witness on spinlocks for speed
+
+# To make an SMP kernel, the next two are needed
+options SMP # Symmetric MultiProcessor Kernel
+device apic # I/O APIC
+
+device isa
+device eisa
+device pci
+
+# Floppy drives
+device fdc
+
+# ATA and ATAPI devices
+device ata
+device atadisk # ATA disk drives
+device ataraid # ATA RAID drives
+device atapicd # ATAPI CDROM drives
+device atapifd # ATAPI floppy drives
+device atapist # ATAPI tape drives
+options ATA_STATIC_ID #Static device numbering
+
+# SCSI Controllers
+device ahb # EISA AHA1742 family
+device ahc # AHA2940 and onboard AIC7xxx devices
+device ahd # AHA39320/29320 and onboard AIC79xx devices
+device amd # AMD 53C974 (Tekram DC-390(T))
+device isp # Qlogic family
+device mpt # LSI-Logic MPT-Fusion
+#device ncr # NCR/Symbios Logic
+device sym # NCR/Symbios Logic (newer chipsets + those of `ncr')
+device trm # Tekram DC395U/UW/F DC315U adapters
+
+device adv # Advansys SCSI adapters
+device adw # Advansys wide SCSI adapters
+device aha # Adaptec 154x SCSI adapters
+device aic # Adaptec 15[012]x SCSI adapters, AIC-6[23]60.
+device bt # Buslogic/Mylex MultiMaster SCSI adapters
+
+device ncv # NCR 53C500
+device nsp # Workbit Ninja SCSI-3
+device stg # TMC 18C30/18C50
+
+# SCSI peripherals
+device scbus # SCSI bus (required for SCSI)
+device ch # SCSI media changers
+device da # Direct Access (disks)
+device sa # Sequential Access (tape etc)
+device cd # CD
+device pass # Passthrough device (direct SCSI access)
+device ses # SCSI Environmental Services (and SAF-TE)
+
+# RAID controllers interfaced to the SCSI subsystem
+device amr # AMI MegaRAID
+device asr # DPT SmartRAID V, VI and Adaptec SCSI RAID
+device ciss # Compaq Smart RAID 5*
+device dpt # DPT Smartcache III, IV - See NOTES for options
+device iir # Intel Integrated RAID
+device ips # IBM (Adaptec) ServeRAID
+device mly # Mylex AcceleRAID/eXtremeRAID
+
+# RAID controllers
+device aac # Adaptec FSA RAID
+device aacp # SCSI passthrough for aac (requires CAM)
+device ida # Compaq Smart RAID
+device mlx # Mylex DAC960 family
+device pst # Promise Supertrak SX6000
+device twe # 3ware ATA RAID
+
+# atkbdc0 controls both the keyboard and the PS/2 mouse
+device atkbdc # AT keyboard controller
+device atkbd # AT keyboard
+device psm # PS/2 mouse
+
+device vga # VGA video card driver
+
+device splash # Splash screen and screen saver support
+
+# syscons is the default console driver, resembling an SCO console
+device sc
+
+# Enable this for the pcvt (VT220 compatible) console driver
+#device vt
+#options XSERVER # support for X server on a vt console
+#options FAT_CURSOR # start with block cursor
+
+device agp # support several AGP chipsets
+
+# Floating point support - do not disable.
+device npx
+
+# Power management support (see NOTES for more options)
+#device apm
+# Add suspend/resume support for the i8254.
+device pmtimer
+
+# PCCARD (PCMCIA) support
+# Pcmcia and cardbus bridge support
+device cbb # cardbus (yenta) bridge
+#device pcic # ExCA ISA and PCI bridges
+device pccard # PC Card (16-bit) bus
+device cardbus # CardBus (32-bit) bus
+
+# Serial (COM) ports
+device sio # 8250, 16[45]50 based serial ports
+
+# Parallel port
+device ppc
+device ppbus # Parallel port bus (required)
+device lpt # Printer
+device plip # TCP/IP over parallel
+device ppi # Parallel port interface device
+#device vpo # Requires scbus and da
+
+# If you've got a "dumb" serial or parallel PCI card that is
+# supported by the puc(4) glue driver, uncomment the following
+# line to enable it (connects to the sio and/or ppc drivers):
+#device puc
+
+# PCI Ethernet NICs.
+device de # DEC/Intel DC21x4x (``Tulip'')
+device em # Intel PRO/1000 adapter Gigabit Ethernet Card
+device txp # 3Com 3cR990 (``Typhoon'')
+device vx # 3Com 3c590, 3c595 (``Vortex'')
+
+# PCI Ethernet NICs that use the common MII bus controller code.
+# NOTE: Be sure to keep the 'device miibus' line in order to use these NICs!
+device miibus # MII bus support
+device bfe # Broadcom BCM440x 10/100 ethernet
+device bge # Broadcom BCM570xx Gigabit Ethernet
+device dc # DEC/Intel 21143 and various workalikes
+device fxp # Intel EtherExpress PRO/100B (82557, 82558)
+device pcn # AMD Am79C97x PCI 10/100 (precedence over 'lnc')
+device re # RealTek 8139C+/8169/8169S/8110S
+device rl # RealTek 8129/8139
+device sf # Adaptec AIC-6915 (``Starfire'')
+device sis # Silicon Integrated Systems SiS 900/SiS 7016
+device sk # SysKonnect SK-984x and SK-982x gigabit ethernet
+device ste # Sundance ST201 (D-Link DFE-550TX)
+device ti # Alteon Networks Tigon I/II gigabit ethernet
+device tl # Texas Instruments ThunderLAN
+device tx # SMC EtherPower II (83c170 ``EPIC'')
+device vr # VIA Rhine, Rhine II
+device wb # Winbond W89C840F
+device xl # 3Com 3c90x (``Boomerang'', ``Cyclone'')
+
+# ISA Ethernet NICs. pccard nics included.
+device cs # Crystal Semiconductor CS89x0 NIC
+# 'device ed' requires 'device miibus'
+device ed # NE[12]000, SMC Ultra, 3c503, DS8390 cards
+device ex # Intel EtherExpress Pro/10 and Pro/10+
+device ep # Etherlink III based cards
+device fe # Fujitsu MB8696x based cards
+device ie # EtherExpress 8/16, 3C507, StarLAN 10 etc.
+device lnc # NE2100, NE32-VL Lance Ethernet cards
+device sn # SMC's 9000 series of ethernet chips
+device xe # Xircom pccard ethernet
+
+# ISA devices that use the old ISA shims
+#device le
+
+# Wireless NIC cards
+device wlan # 802.11 support
+device an # Aironet 4500/4800 802.11 wireless NICs.
+device awi # BayStack 660 and others
+device wi # WaveLAN/Intersil/Symbol 802.11 wireless NICs.
+#device wl # Older non 802.11 Wavelan wireless NIC.
+
+# Pseudo devices - the number indicates how many units to allocate.
+device random # Entropy device
+device loop # Network loopback
+device ether # Ethernet support
+device sl # Kernel SLIP
+device ppp # Kernel PPP
+device tun # Packet tunnel.
+device pty # Pseudo-ttys (telnet etc)
+device md # Memory "disks"
+device gif # IPv6 and IPv4 tunneling
+device faith # IPv6-to-IPv4 relaying (translation)
+
+# The `bpf' device enables the Berkeley Packet Filter.
+# Be aware of the administrative consequences of enabling this!
+device bpf # Berkeley packet filter
+
+# USB support
+device uhci # UHCI PCI->USB interface
+device ohci # OHCI PCI->USB interface
+device usb # USB Bus (required)
+#device udbp # USB Double Bulk Pipe devices
+device ugen # Generic
+device uhid # "Human Interface Devices"
+device ukbd # Keyboard
+device ulpt # Printer
+device umass # Disks/Mass storage - Requires scbus and da
+device ums # Mouse
+device urio # Diamond Rio 500 MP3 player
+device uscanner # Scanners
+# USB Ethernet, requires mii
+device aue # ADMtek USB ethernet
+device axe # ASIX Electronics USB ethernet
+device cue # CATC USB ethernet
+device kue # Kawasaki LSI USB ethernet
+
+# FireWire support
+device firewire # FireWire bus code
+device sbp # SCSI over FireWire (Requires scbus and da)
+device fwe # Ethernet over FireWire (non-standard!)
diff --git a/freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC.hints b/freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC.hints
new file mode 100644
index 0000000000..c02274871b
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC.hints
@@ -0,0 +1,93 @@
+# $FreeBSD: src/sys/i386/conf/GENERIC.hints,v 1.11 2002/12/05 22:49:47 jhb Exp $
+hint.fdc.0.at="isa"
+hint.fdc.0.port="0x3F0"
+hint.fdc.0.irq="6"
+hint.fdc.0.drq="2"
+hint.fd.0.at="fdc0"
+hint.fd.0.drive="0"
+hint.fd.1.at="fdc0"
+hint.fd.1.drive="1"
+hint.ata.0.at="isa"
+hint.ata.0.port="0x1F0"
+hint.ata.0.irq="14"
+hint.ata.1.at="isa"
+hint.ata.1.port="0x170"
+hint.ata.1.irq="15"
+hint.adv.0.at="isa"
+hint.adv.0.disabled="1"
+hint.bt.0.at="isa"
+hint.bt.0.disabled="1"
+hint.aha.0.at="isa"
+hint.aha.0.disabled="1"
+hint.aic.0.at="isa"
+hint.aic.0.disabled="1"
+hint.atkbdc.0.at="isa"
+hint.atkbdc.0.port="0x060"
+hint.atkbd.0.at="atkbdc"
+hint.atkbd.0.irq="1"
+hint.atkbd.0.flags="0x1"
+hint.psm.0.at="atkbdc"
+hint.psm.0.irq="12"
+hint.vga.0.at="isa"
+hint.sc.0.at="isa"
+hint.sc.0.flags="0x100"
+hint.vt.0.at="isa"
+hint.vt.0.disabled="1"
+hint.apm.0.disabled="1"
+hint.apm.0.flags="0x20"
+hint.pcic.0.at="isa"
+# hint.pcic.0.irq="10" # Default to polling
+hint.pcic.0.port="0x3e0"
+hint.pcic.0.maddr="0xd0000"
+hint.pcic.1.at="isa"
+hint.pcic.1.irq="11"
+hint.pcic.1.port="0x3e2"
+hint.pcic.1.maddr="0xd4000"
+hint.pcic.1.disabled="1"
+hint.sio.0.at="isa"
+hint.sio.0.port="0x3F8"
+hint.sio.0.flags="0x10"
+hint.sio.0.irq="4"
+hint.sio.1.at="isa"
+hint.sio.1.port="0x2F8"
+hint.sio.1.irq="3"
+hint.sio.2.at="isa"
+hint.sio.2.disabled="1"
+hint.sio.2.port="0x3E8"
+hint.sio.2.irq="5"
+hint.sio.3.at="isa"
+hint.sio.3.disabled="1"
+hint.sio.3.port="0x2E8"
+hint.sio.3.irq="9"
+hint.ppc.0.at="isa"
+hint.ppc.0.irq="7"
+hint.ed.0.at="isa"
+hint.ed.0.disabled="1"
+hint.ed.0.port="0x280"
+hint.ed.0.irq="10"
+hint.ed.0.maddr="0xd8000"
+hint.cs.0.at="isa"
+hint.cs.0.disabled="1"
+hint.cs.0.port="0x300"
+hint.sn.0.at="isa"
+hint.sn.0.disabled="1"
+hint.sn.0.port="0x300"
+hint.sn.0.irq="10"
+hint.ie.0.at="isa"
+hint.ie.0.disabled="1"
+hint.ie.0.port="0x300"
+hint.ie.0.irq="10"
+hint.ie.0.maddr="0xd0000"
+hint.fe.0.at="isa"
+hint.fe.0.disabled="1"
+hint.fe.0.port="0x300"
+hint.le.0.at="isa"
+hint.le.0.disabled="1"
+hint.le.0.port="0x300"
+hint.le.0.irq="5"
+hint.le.0.maddr="0xd0000"
+hint.lnc.0.at="isa"
+hint.lnc.0.disabled="1"
+hint.lnc.0.port="0x280"
+hint.lnc.0.irq="10"
+hint.lnc.0.drq="0"
diff --git a/freebsd-5.3-xen-sparse/i386-xen/conf/Makefile b/freebsd-5.3-xen-sparse/i386-xen/conf/Makefile
new file mode 100644
index 0000000000..0284f84e82
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/conf/Makefile
@@ -0,0 +1,3 @@
+# $FreeBSD: src/sys/i386/conf/Makefile,v 1.9 2003/02/26 23:36:58 ru Exp $
+
+.include "${.CURDIR}/../../conf/makeLINT.mk"
diff --git a/freebsd-5.3-xen-sparse/i386-xen/conf/NOTES b/freebsd-5.3-xen-sparse/i386-xen/conf/NOTES
new file mode 100644
index 0000000000..b01c607dfa
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/conf/NOTES
@@ -0,0 +1,1115 @@
+#
+# NOTES -- Lines that can be cut/pasted into kernel and hints configs.
+#
+# This file contains machine dependent kernel configuration notes. For
+# machine independent notes, look in /sys/conf/NOTES.
+#
+# $FreeBSD: src/sys/i386/conf/NOTES,v 1.1108 2003/12/04 19:57:56 phk Exp $
+#
+
+#
+# This directive is mandatory; it defines the architecture to be
+# configured for; in this case, the 386 family based IBM-PC and
+# compatibles.
+#
+machine i386
+
+#
+# We want LINT to cover profiling as well
+profile 2
+
+
+#####################################################################
+# SMP OPTIONS:
+#
+# The apic device enables the use of the I/O APIC for interrupt delivery.
+# The apic device can be used in both UP and SMP kernels, but is required
+# for SMP kernels. Thus, the apic device is not strictly an SMP option,
+# but it is a prerequisite for SMP.
+#
+# Notes:
+#
+# Be sure to disable 'cpu I386_CPU' for SMP kernels.
+#
+# By default, mixed mode is used to route IRQ0 from the AT timer via
+# the 8259A master PIC through the ExtINT pin on the first I/O APIC.
+# This can be disabled via the NO_MIXED_MODE option. In that case,
+# IRQ0 will be routed via an intpin on the first I/O APIC. Not all
+# motherboards hook IRQ0 up to the first I/O APIC even though their
+# MP table or MADT may claim to do so. That is why mixed mode is
+# enabled by default.
+#
+# HTT CPUs should only be used if they are enabled in the BIOS. For
+# the ACPI case, ACPI only correctly tells us about any HTT CPUs if
+# they are enabled. However, most HTT systems do not list HTT CPUs
+# in the MP Table if they are enabled, thus we guess at the HTT CPUs
+# for the MP Table case. However, we shouldn't try to guess and use
+# these CPUs if HTTT is disabled. Thus, HTT guessing is only enabled
+# for the MP Table if the user explicitly asks for it via the
+# MPTABLE_FORCE_HTT option. Do NOT use this option if you have HTT
+# disabled in your BIOS.
+#
+
+# Mandatory:
+device apic # I/O apic
+
+# Optional:
+options MPTABLE_FORCE_HTT # Enable HTT CPUs with the MP Table
+options NO_MIXED_MODE # Disable use of mixed mode
+
+
+#####################################################################
+# CPU OPTIONS
+
+#
+# You must specify at least one CPU (the one you intend to run on);
+# deleting the specification for CPUs you don't need to use may make
+# parts of the system run faster.
+# I386_CPU is mutually exclusive with the other CPU types.
+#
+#cpu I386_CPU
+cpu I486_CPU
+cpu I586_CPU # aka Pentium(tm)
+cpu I686_CPU # aka Pentium Pro(tm)
+
+#
+# Options for CPU features.
+#
+# CPU_ATHLON_SSE_HACK tries to enable SSE instructions when the BIOS has
+# forgotten to enable them.
+#
+# CPU_BLUELIGHTNING_FPU_OP_CACHE enables FPU operand cache on IBM
+# BlueLightning CPU. It works only with Cyrix FPU, and this option
+# should not be used with Intel FPU.
+#
+# CPU_BLUELIGHTNING_3X enables triple-clock mode on IBM Blue Lightning
+# CPU if CPU supports it. The default is double-clock mode on
+# BlueLightning CPU box.
+#
+# CPU_BTB_EN enables branch target buffer on Cyrix 5x86 (NOTE 1).
+#
+# CPU_DIRECT_MAPPED_CACHE sets L1 cache of Cyrix 486DLC CPU in direct
+# mapped mode. Default is 2-way set associative mode.
+#
+# CPU_CYRIX_NO_LOCK enables weak locking for the entire address space
+# of Cyrix 6x86 and 6x86MX CPUs by setting the NO_LOCK bit of CCR1.
+# Otherwise, the NO_LOCK bit of CCR1 is cleared. (NOTE 3)
+#
+# CPU_DISABLE_5X86_LSSER disables load store serialize (i.e. enables
+# reorder). This option should not be used if you use memory mapped
+# I/O device(s).
+#
+# CPU_ELAN enables support for AMDs ElanSC520 CPU.
+# CPU_ELAN_XTAL sets the clock crystal frequency in Hz
+# CPU_ELAN_PPS enables precision timestamp code.
+#
+# CPU_SOEKRIS enables support www.soekris.com hardware.
+#
+# CPU_ENABLE_SSE enables SSE/MMX2 instructions support. This is default
+# on I686_CPU and above.
+# CPU_DISABLE_SSE explicitly prevent I686_CPU from turning on SSE.
+#
+# CPU_FASTER_5X86_FPU enables faster FPU exception handler.
+#
+# CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products
+# for i386 machines.
+#
+# CPU_IORT defines I/O clock delay time (NOTE 1). Default values of
+# I/O clock delay time on Cyrix 5x86 and 6x86 are 0 and 7,respectively
+# (no clock delay).
+#
+# CPU_L2_LATENCY specifed the L2 cache latency value. This option is used
+# only when CPU_PPRO2CELERON is defined and Mendocino Celeron is detected.
+# The default value is 5.
+#
+# CPU_LOOP_EN prevents flushing the prefetch buffer if the destination
+# of a jump is already present in the prefetch buffer on Cyrix 5x86(NOTE
+# 1).
+#
+# CPU_PPRO2CELERON enables L2 cache of Mendocino Celeron CPUs. This option
+# is useful when you use Socket 8 to Socket 370 converter, because most Pentium
+# Pro BIOSs do not enable L2 cache of Mendocino Celeron CPUs.
+#
+# CPU_RSTK_EN enables return stack on Cyrix 5x86 (NOTE 1).
+#
+# CPU_SUSP_HLT enables suspend on HALT. If this option is set, CPU
+# enters suspend mode following execution of HALT instruction.
+#
+# CPU_UPGRADE_HW_CACHE eliminates unneeded cache flush instruction(s).
+#
+# CPU_WT_ALLOC enables write allocation on Cyrix 6x86/6x86MX and AMD
+# K5/K6/K6-2 cpus.
+#
+# CYRIX_CACHE_WORKS enables CPU cache on Cyrix 486 CPUs with cache
+# flush at hold state.
+#
+# CYRIX_CACHE_REALLY_WORKS enables (1) CPU cache on Cyrix 486 CPUs
+# without cache flush at hold state, and (2) write-back CPU cache on
+# Cyrix 6x86 whose revision < 2.7 (NOTE 2).
+#
+# NO_F00F_HACK disables the hack that prevents Pentiums (and ONLY
+# Pentiums) from locking up when a LOCK CMPXCHG8B instruction is
+# executed. This option is only needed if I586_CPU is also defined,
+# and should be included for any non-Pentium CPU that defines it.
+#
+# NO_MEMORY_HOLE is an optimisation for systems with AMD K6 processors
+# which indicates that the 15-16MB range is *definitely* not being
+# occupied by an ISA memory hole.
+#
+# CPU_DISABLE_CMPXCHG disables the CMPXCHG instruction on > i386 IA32
+# machines. VmWare seems to emulate this instruction poorly, causing
+# the guest OS to run very slowly. Enabling this with a SMP kernel
+# will cause the kernel to be unusable.
+#
+# NOTE 1: The options, CPU_BTB_EN, CPU_LOOP_EN, CPU_IORT,
+# CPU_LOOP_EN and CPU_RSTK_EN should not be used because of CPU bugs.
+# These options may crash your system.
+#
+# NOTE 2: If CYRIX_CACHE_REALLY_WORKS is not set, CPU cache is enabled
+# in write-through mode when revision < 2.7. If revision of Cyrix
+# 6x86 >= 2.7, CPU cache is always enabled in write-back mode.
+#
+# NOTE 3: This option may cause failures for software that requires
+# locked cycles in order to operate correctly.
+#
+options CPU_ATHLON_SSE_HACK
+options CPU_BLUELIGHTNING_FPU_OP_CACHE
+options CPU_BLUELIGHTNING_3X
+options CPU_BTB_EN
+options CPU_DIRECT_MAPPED_CACHE
+options CPU_DISABLE_5X86_LSSER
+options CPU_ELAN
+options CPU_SOEKRIS
+options CPU_ELAN_XTAL=32768000
+options CPU_ELAN_PPS
+options CPU_ENABLE_SSE
+#options CPU_DISABLE_SSE
+options CPU_FASTER_5X86_FPU
+options CPU_I486_ON_386
+options CPU_IORT
+options CPU_L2_LATENCY=5
+options CPU_LOOP_EN
+options CPU_PPRO2CELERON
+options CPU_RSTK_EN
+options CPU_SUSP_HLT
+options CPU_UPGRADE_HW_CACHE
+options CPU_WT_ALLOC
+options CYRIX_CACHE_WORKS
+options CYRIX_CACHE_REALLY_WORKS
+#options NO_F00F_HACK
+options CPU_DISABLE_CMPXCHG
+
+# Debug options
+options NPX_DEBUG # enable npx debugging (FPU/math emu)
+ #new math emulator
+
+#
+# PERFMON causes the driver for Pentium/Pentium Pro performance counters
+# to be compiled. See perfmon(4) for more information.
+#
+options PERFMON
+
+
+#####################################################################
+# NETWORKING OPTIONS
+
+#
+# DEVICE_POLLING adds support for mixed interrupt-polling handling
+# of network device drivers, which has significant benefits in terms
+# of robustness to overloads and responsivity, as well as permitting
+# accurate scheduling of the CPU time between kernel network processing
+# and other activities. The drawback is a moderate (up to 1/HZ seconds)
+# potential increase in response times.
+# It is strongly recommended to use HZ=1000 or 2000 with DEVICE_POLLING
+# to achieve smoother behaviour.
+# Additionally, you can enable/disable polling at runtime with the
+# sysctl variable kern.polling.enable (defaults off), and select
+# the CPU fraction reserved to userland with the sysctl variable
+# kern.polling.user_frac (default 50, range 0..100).
+#
+# Only the "dc" "fxp" and "sis" devices support this mode of operation at
+# the time of this writing.
+
+options DEVICE_POLLING
+
+
+#####################################################################
+# CLOCK OPTIONS
+
+# The following options are used for debugging clock behavior only, and
+# should not be used for production systems.
+#
+# CLK_CALIBRATION_LOOP will run the clock calibration loop at startup
+# until the user presses a key.
+
+options CLK_CALIBRATION_LOOP
+
+# The following two options measure the frequency of the corresponding
+# clock relative to the RTC (onboard mc146818a).
+
+options CLK_USE_I8254_CALIBRATION
+options CLK_USE_TSC_CALIBRATION
+
+
+#####################################################################
+# MISCELLANEOUS DEVICES AND OPTIONS
+
+device speaker #Play IBM BASIC-style noises out your speaker
+hint.speaker.0.at="isa"
+hint.speaker.0.port="0x61"
+device gzip #Exec gzipped a.out's. REQUIRES COMPAT_AOUT!
+device apm_saver # Requires APM
+
+
+#####################################################################
+# HARDWARE BUS CONFIGURATION
+
+#
+# ISA bus
+#
+device isa
+
+#
+# Options for `isa':
+#
+# AUTO_EOI_1 enables the `automatic EOI' feature for the master 8259A
+# interrupt controller. This saves about 0.7-1.25 usec for each interrupt.
+# This option breaks suspend/resume on some portables.
+#
+# AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A
+# interrupt controller. This saves about 0.7-1.25 usec for each interrupt.
+# Automatic EOI is documented not to work for for the slave with the
+# original i8259A, but it works for some clones and some integrated
+# versions.
+#
+# MAXMEM specifies the amount of RAM on the machine; if this is not
+# specified, FreeBSD will first read the amount of memory from the CMOS
+# RAM, so the amount of memory will initially be limited to 64MB or 16MB
+# depending on the BIOS. If the BIOS reports 64MB, a memory probe will
+# then attempt to detect the installed amount of RAM. If this probe
+# fails to detect >64MB RAM you will have to use the MAXMEM option.
+# The amount is in kilobytes, so for a machine with 128MB of RAM, it would
+# be 131072 (128 * 1024).
+#
+# BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to
+# reset the CPU for reboot. This is needed on some systems with broken
+# keyboard controllers.
+
+options COMPAT_OLDISA #Use ISA shims and glue for old drivers
+options AUTO_EOI_1
+#options AUTO_EOI_2
+
+options MAXMEM=(128*1024)
+#options BROKEN_KEYBOARD_RESET
+
+#
+# EISA bus
+#
+# The EISA bus device is `eisa'. It provides auto-detection and
+# configuration support for all devices on the EISA bus.
+
+device eisa
+
+# By default, only 10 EISA slots are probed, since the slot numbers
+# above clash with the configuration address space of the PCI subsystem,
+# and the EISA probe is not very smart about this. This is sufficient
+# for most machines, but in particular the HP NetServer LC series comes
+# with an onboard AIC7770 dual-channel SCSI controller on EISA slot #11,
+# thus you need to bump this figure to 12 for them.
+options EISA_SLOTS=12
+
+#
+# MCA bus:
+#
+# The MCA bus device is `mca'. It provides auto-detection and
+# configuration support for all devices on the MCA bus.
+# No hints are required for MCA.
+
+device mca
+
+#
+# PCI bus & PCI options:
+#
+device pci
+
+#
+# AGP GART support
+device agp
+
+
+#####################################################################
+# HARDWARE DEVICE CONFIGURATION
+
+#
+# Mandatory devices:
+#
+
+# To include support for VGA VESA video modes
+options VESA
+
+# Turn on extra debugging checks and output for VESA support.
+options VESA_DEBUG
+
+# The pcvt console driver (vt220 compatible).
+device vt
+hint.vt.0.at="isa"
+options XSERVER # support for running an X server on vt
+options FAT_CURSOR # start with block cursor
+# This PCVT option is for keyboards such as those used on really old ThinkPads
+options PCVT_SCANSET=2
+# Other PCVT options are documented in pcvt(4).
+options PCVT_24LINESDEF
+options PCVT_CTRL_ALT_DEL
+options PCVT_META_ESC
+options PCVT_NSCREENS=9
+options PCVT_PRETTYSCRNS
+options PCVT_SCREENSAVER
+options PCVT_USEKBDSEC
+options PCVT_VT220KEYB
+options PCVT_GREENSAVER
+
+#
+# The Numeric Processing eXtension driver. In addition to this, you
+# may configure a math emulator (see above). If your machine has a
+# hardware FPU and the kernel configuration includes the npx device
+# *and* a math emulator compiled into the kernel, the hardware FPU
+# will be used, unless it is found to be broken or unless "flags" to
+# npx0 includes "0x08", which requests preference for the emulator.
+device npx
+hint.npx.0.flags="0x0"
+hint.npx.0.irq="13"
+
+#
+# `flags' for npx0:
+# 0x01 don't use the npx registers to optimize bcopy.
+# 0x02 don't use the npx registers to optimize bzero.
+# 0x04 don't use the npx registers to optimize copyin or copyout.
+# 0x08 use emulator even if hardware FPU is available.
+# The npx registers are normally used to optimize copying and zeroing when
+# all of the following conditions are satisfied:
+# I586_CPU is an option
+# the cpu is an i586 (perhaps not a Pentium)
+# the probe for npx0 succeeds
+# INT 16 exception handling works.
+# Then copying and zeroing using the npx registers is normally 30-100% faster.
+# The flags can be used to control cases where it doesn't work or is slower.
+# Setting them at boot time using userconfig works right (the optimizations
+# are not used until later in the bootstrap when npx0 is attached).
+# Flag 0x08 automatically disables the i586 optimized routines.
+#
+
+#
+# Optional devices:
+#
+
+# 3Dfx Voodoo Graphics, Voodoo II /dev/3dfx CDEV support. This will create
+# the /dev/3dfx0 device to work with glide implementations. This should get
+# linked to /dev/3dfx and /dev/voodoo. Note that this is not the same as
+# the tdfx DRI module from XFree86 and is completely unrelated.
+#
+# To enable Linuxulator support, one must also include COMPAT_LINUX in the
+# config as well, or you will not have the dependencies. The other option
+# is to load both as modules.
+
+device tdfx # Enable 3Dfx Voodoo support
+options TDFX_LINUX # Enable Linuxulator support
+
+#
+# ACPI support using the Intel ACPI Component Architecture reference
+# implementation.
+#
+# ACPI_DEBUG enables the use of the debug.acpi.level and debug.acpi.layer
+# kernel environment variables to select initial debugging levels for the
+# Intel ACPICA code. (Note that the Intel code must also have USE_DEBUGGER
+# defined when it is built).
+#
+# ACPI_MAX_THREADS sets the number of task threads started.
+#
+# ACPI_NO_SEMAPHORES makes the AcpiOs*Semaphore routines a no-op.
+#
+# ACPICA_PEDANTIC enables strict checking of AML. Our default is to
+# relax these checks to allow code generated by the Microsoft compiler
+# to still execute.
+#
+# Note that building ACPI into the kernel is deprecated; the module is
+# normally loaded automatically by the loader.
+#
+device acpi
+options ACPI_DEBUG
+options ACPI_MAX_THREADS=1
+#!options ACPI_NO_SEMAPHORES
+#!options ACPICA_PEDANTIC
+
+# DRM options:
+# mgadrm: AGP Matrox G200, G400, G450, G550
+# r128drm: ATI Rage 128
+# radeondrm: ATI Radeon up to 9000/9100
+# sisdrm: SiS 300/305,540,630
+# tdfxdrm: 3dfx Voodoo 3/4/5 and Banshee
+# DRM_DEBUG: include debug printfs, very slow
+#
+# mga requires AGP in the kernel, and it is recommended
+# for AGP r128 and radeon cards.
+
+device mgadrm
+device "r128drm"
+device radeondrm
+device sisdrm
+device tdfxdrm
+
+options DRM_DEBUG
+
+# M-systems DiskOnchip products see src/sys/contrib/dev/fla/README
+device fla
+hint.fla.0.at="isa"
+
+#
+# mse: Logitech and ATI InPort bus mouse ports
+
+device mse
+hint.mse.0.at="isa"
+hint.mse.0.port="0x23c"
+hint.mse.0.irq="5"
+
+#
+# Network interfaces:
+#
+
+# ar: Arnet SYNC/570i hdlc sync 2/4 port V.35/X.21 serial driver
+# (requires sppp)
+# ath: Atheros a/b/g WiFi adapters (requires ath_hal and wlan)
+# cx: Cronyx/Sigma multiport sync/async (with Cisco or PPP framing)
+# ed: Western Digital and SMC 80xx; Novell NE1000 and NE2000; 3Com 3C503
+# HP PC Lan+, various PC Card devices (refer to etc/defauls/pccard.conf)
+# (requires miibus)
+# el: 3Com 3C501 (slow!)
+# ie: AT&T StarLAN 10 and EN100; 3Com 3C507; unknown NI5210;
+# Intel EtherExpress
+# le: Digital Equipment EtherWorks 2 and EtherWorks 3 (DEPCA, DE100,
+# DE101, DE200, DE201, DE202, DE203, DE204, DE205, DE422)
+# lnc: Lance/PCnet cards (Isolan, Novell NE2100, NE32-VL, AMD Am7990 and
+# Am79C960)
+# oltr: Olicom ISA token-ring adapters OC-3115, OC-3117, OC-3118 and OC-3133
+# (no hints needed).
+# Olicom PCI token-ring adapters OC-3136, OC-3137, OC-3139, OC-3140,
+# OC-3141, OC-3540, OC-3250
+# rdp: RealTek RTL 8002-based pocket ethernet adapters
+# sbni: Granch SBNI12-xx ISA and PCI adapters
+# sr: RISCom/N2 hdlc sync 1/2 port V.35/X.21 serial driver (requires sppp)
+# wl: Lucent Wavelan (ISA card only).
+
+# Order for ISA/EISA devices is important here
+
+device ar
+hint.ar.0.at="isa"
+hint.ar.0.port="0x300"
+hint.ar.0.irq="10"
+hint.ar.0.maddr="0xd0000"
+device cx
+hint.cx.0.at="isa"
+hint.cx.0.port="0x240"
+hint.cx.0.irq="15"
+hint.cx.0.drq="7"
+device ed
+#options ED_NO_MIIBUS # Disable ed miibus support
+hint.ed.0.at="isa"
+hint.ed.0.port="0x280"
+hint.ed.0.irq="5"
+hint.ed.0.maddr="0xd8000"
+device el 1
+hint.el.0.at="isa"
+hint.el.0.port="0x300"
+hint.el.0.irq="9"
+device ie # Hints only required for Starlan
+hint.ie.2.at="isa"
+hint.ie.2.port="0x300"
+hint.ie.2.irq="5"
+hint.ie.2.maddr="0xd0000"
+device le 1
+hint.le.0.at="isa"
+hint.le.0.port="0x300"
+hint.le.0.irq="5"
+hint.le.0.maddr="0xd0000"
+device lnc
+hint.lnc.0.at="isa"
+hint.lnc.0.port="0x280"
+hint.lnc.0.irq="10"
+hint.lnc.0.drq="0"
+device rdp 1
+hint.rdp.0.at="isa"
+hint.rdp.0.port="0x378"
+hint.rdp.0.irq="7"
+hint.rdp.0.flags="2"
+device sbni
+hint.sbni.0.at="isa"
+hint.sbni.0.port="0x210"
+hint.sbni.0.irq="0xefdead"
+hint.sbni.0.flags="0"
+device sr
+hint.sr.0.at="isa"
+hint.sr.0.port="0x300"
+hint.sr.0.irq="5"
+hint.sr.0.maddr="0xd0000"
+device oltr
+hint.oltr.0.at="isa"
+device wl
+hint.wl.0.at="isa"
+hint.wl.0.port="0x300"
+options WLCACHE # enables the signal-strength cache
+options WLDEBUG # enables verbose debugging output
+
+device ath
+device ath_hal # Atheros HAL (includes binary component)
+#device wlan # 802.11 layer
+
+#
+# ATA raid adapters
+#
+device pst
+
+#
+# SCSI host adapters:
+#
+# ncv: NCR 53C500 based SCSI host adapters.
+# nsp: Workbit Ninja SCSI-3 based PC Card SCSI host adapters.
+# stg: TMC 18C30, 18C50 based SCSI host adapters.
+
+device ncv
+device nsp
+device stg
+hint.stg.0.at="isa"
+hint.stg.0.port="0x140"
+hint.stg.0.port="11"
+
+#
+# Adaptec FSA RAID controllers, including integrated DELL controllers,
+# the Dell PERC 2/QC and the HP NetRAID-4M
+device aac
+device aacp # SCSI Passthrough interface (optional, CAM required)
+
+#
+# IBM (now Adaptec) ServeRAID controllers
+device ips
+
+#
+# SafeNet crypto driver: can be moved to the MI NOTES as soon as
+# it's tested on a big-endian machine
+#
+device safe # SafeNet 1141
+options SAFE_DEBUG # enable debugging support: hw.safe.debug
+options SAFE_RNDTEST # enable rndtest support
+
+#####################################################################
+
+#
+# Miscellaneous hardware:
+#
+# wt: Wangtek and Archive QIC-02/QIC-36 tape drives
+# ctx: Cortex-I frame grabber
+# apm: Laptop Advanced Power Management (experimental)
+# pmtimer: Timer device driver for power management events (APM or ACPI)
+# spigot: The Creative Labs Video Spigot video-acquisition board
+# dgb: Digiboard PC/Xi and PC/Xe series driver (ALPHA QUALITY!)
+# digi: Digiboard driver
+# gp: National Instruments AT-GPIB and AT-GPIB/TNT board, PCMCIA-GPIB
+# asc: GI1904-based hand scanners, e.g. the Trust Amiscan Grey
+# gsc: Genius GS-4500 hand scanner.
+# spic: Sony Programmable I/O controller (VAIO notebooks)
+# stl: Stallion EasyIO and EasyConnection 8/32 (cd1400 based)
+# stli: Stallion EasyConnection 8/64, ONboard, Brumby (intelligent)
+
+# Notes on APM
+# The flags takes the following meaning for apm0:
+# 0x0020 Statclock is broken.
+# If apm is omitted, some systems require sysctl kern.timecounter.method=1
+# for correct timekeeping.
+
+# Notes on the spigot:
+# The video spigot is at 0xad6. This port address can not be changed.
+# The irq values may only be 10, 11, or 15
+# I/O memory is an 8kb region. Possible values are:
+# 0a0000, 0a2000, ..., 0fffff, f00000, f02000, ..., ffffff
+# The start address must be on an even boundary.
+# Add the following option if you want to allow non-root users to be able
+# to access the spigot. This option is not secure because it allows users
+# direct access to the I/O page.
+# options SPIGOT_UNSECURE
+
+# Notes on the Specialix SI/XIO driver:
+# The host card is memory, not IO mapped.
+# The Rev 1 host cards use a 64K chunk, on a 32K boundary.
+# The Rev 2 host cards use a 32K chunk, on a 32K boundary.
+# The cards can use an IRQ of 11, 12 or 15.
+
+# Notes on the Sony Programmable I/O controller
+# This is a temporary driver that should someday be replaced by something
+# that hooks into the ACPI layer. The device is hooked to the PIIX4's
+# General Device 10 decoder, which means you have to fiddle with PCI
+# registers to map it in, even though it is otherwise treated here as
+# an ISA device. At the moment, the driver polls, although the device
+# is capable of generating interrupts. It largely undocumented.
+# The port location in the hint is where you WANT the device to be
+# mapped. 0x10a0 seems to be traditional. At the moment the jogdial
+# is the only thing truly supported, but aparently a fair percentage
+# of the Vaio extra features are controlled by this device.
+
+# Notes on the Stallion stl and stli drivers:
+# See src/i386/isa/README.stl for complete instructions.
+# This is version 0.0.5alpha, unsupported by Stallion.
+# The stl driver has a secondary IO port hard coded at 0x280. You need
+# to change src/i386/isa/stallion.c if you reconfigure this on the boards.
+# The "flags" and "msize" settings on the stli driver depend on the board:
+# EasyConnection 8/64 ISA: flags 23 msize 0x1000
+# EasyConnection 8/64 EISA: flags 24 msize 0x10000
+# EasyConnection 8/64 MCA: flags 25 msize 0x1000
+# ONboard ISA: flags 4 msize 0x10000
+# ONboard EISA: flags 7 msize 0x10000
+# ONboard MCA: flags 3 msize 0x10000
+# Brumby: flags 2 msize 0x4000
+# Stallion: flags 1 msize 0x10000
+
+# Notes on the Digiboard PC/Xi and PC/Xe series driver
+#
+# The NDGBPORTS option specifies the number of ports controlled by the
+# dgb(4) driver. The default value is 16 ports per device.
+#
+# The following flag values have special meanings in dgb:
+# 0x01 - alternate layout of pins
+# 0x02 - use the windowed PC/Xe in 64K mode
+
+device wt 1
+hint.wt.0.at="isa"
+hint.wt.0.port="0x300"
+hint.wt.0.irq="5"
+hint.wt.0.drq="1"
+device ctx
+hint.ctx.0.at="isa"
+hint.ctx.0.port="0x230"
+hint.ctx.0.maddr="0xd0000"
+device spigot 1
+hint.spigot.0.at="isa"
+hint.spigot.0.port="0xad6"
+hint.spigot.0.irq="15"
+hint.spigot.0.maddr="0xee000"
+device apm
+hint.apm.0.flags="0x20"
+device pmtimer # Adjust system timer at wakeup time
+device gp
+hint.gp.0.at="isa"
+hint.gp.0.port="0x2c0"
+device gsc 1
+hint.gsc.0.at="isa"
+hint.gsc.0.port="0x270"
+hint.gsc.0.drq="3"
+device dgb 1
+options NDGBPORTS=17
+hint.dgb.0.at="isa"
+hint.dgb.0.port="0x220"
+hint.dgb.0.maddr="0xfc000"
+device digi
+hint.digi.0.at="isa"
+hint.digi.0.port="0x104"
+hint.digi.0.maddr="0xd0000"
+# BIOS & FEP/OS components of device digi.
+device digi_CX
+device digi_CX_PCI
+device digi_EPCX
+device digi_EPCX_PCI
+device digi_Xe
+device digi_Xem
+device digi_Xr
+device asc 1
+hint.asc.0.at="isa"
+hint.asc.0.port="0x3EB"
+hint.asc.0.drq="3"
+hint.asc.0.irq="10"
+device spic
+hint.spic.0.at="isa"
+hint.spic.0.port="0x10a0"
+device stl
+hint.stl.0.at="isa"
+hint.stl.0.port="0x2a0"
+hint.stl.0.irq="10"
+device stli
+hint.stli.0.at="isa"
+hint.stli.0.port="0x2a0"
+hint.stli.0.maddr="0xcc000"
+hint.stli.0.flags="23"
+hint.stli.0.msize="0x1000"
+# You are unlikely to have the hardware for loran <phk@FreeBSD.org>
+device loran
+hint.loran.0.at="isa"
+hint.loran.0.irq="5"
+# HOT1 Xilinx 6200 card (http://www.vcc.com/)
+device xrpu
+
+#
+# Laptop/Notebook options:
+#
+# See also:
+# apm under `Miscellaneous hardware'
+# above.
+
+# For older notebooks that signal a powerfail condition (external
+# power supply dropped, or battery state low) by issuing an NMI:
+
+options POWERFAIL_NMI # make it beep instead of panicing
+
+#
+# I2C Bus
+#
+# Philips i2c bus support is provided by the `iicbus' device.
+#
+# Supported interfaces:
+# pcf Philips PCF8584 ISA-bus controller
+#
+device pcf
+hint.pcf.0.at="isa"
+hint.pcf.0.port="0x320"
+hint.pcf.0.irq="5"
+
+#---------------------------------------------------------------------------
+# ISDN4BSD
+#
+# See /usr/share/examples/isdn/ROADMAP for an introduction to isdn4bsd.
+#
+# i4b passive ISDN cards support contains the following hardware drivers:
+#
+# isic - Siemens/Infineon ISDN ISAC/HSCX/IPAC chipset driver
+# iwic - Winbond W6692 PCI bus ISDN S/T interface controller
+# ifpi - AVM Fritz!Card PCI driver
+# ifpi2 - AVM Fritz!Card PCI version 2 driver
+# ihfc - Cologne Chip HFC ISA/ISA-PnP chipset driver
+# ifpnp - AVM Fritz!Card PnP driver
+# itjc - Siemens ISAC / TJNet Tiger300/320 chipset
+#
+# i4b active ISDN cards support contains the following hardware drivers:
+#
+# iavc - AVM B1 PCI, AVM B1 ISA, AVM T1
+#
+# Note that the ``options'' (if given) and ``device'' lines must BOTH
+# be uncommented to enable support for a given card !
+#
+# In addition to a hardware driver (and probably an option) the mandatory
+# ISDN protocol stack devices and the mandatory support device must be
+# enabled as well as one or more devices from the optional devices section.
+#
+#---------------------------------------------------------------------------
+# isic driver (Siemens/Infineon chipsets)
+#
+device isic
+#
+# ISA bus non-PnP Cards:
+# ----------------------
+#
+# Teles S0/8 or Niccy 1008
+options TEL_S0_8
+hint.isic.0.at="isa"
+hint.isic.0.maddr="0xd0000"
+hint.isic.0.irq="5"
+hint.isic.0.flags="1"
+#
+# Teles S0/16 or Creatix ISDN-S0 or Niccy 1016
+options TEL_S0_16
+hint.isic.0.at="isa"
+hint.isic.0.port="0xd80"
+hint.isic.0.maddr="0xd0000"
+hint.isic.0.irq="5"
+hint.isic.0.flags="2"
+#
+# Teles S0/16.3
+options TEL_S0_16_3
+hint.isic.0.at="isa"
+hint.isic.0.port="0xd80"
+hint.isic.0.irq="5"
+hint.isic.0.flags="3"
+#
+# AVM A1 or AVM Fritz!Card
+options AVM_A1
+hint.isic.0.at="isa"
+hint.isic.0.port="0x340"
+hint.isic.0.irq="5"
+hint.isic.0.flags="4"
+#
+# USRobotics Sportster ISDN TA intern
+options USR_STI
+hint.isic.0.at="isa"
+hint.isic.0.port="0x268"
+hint.isic.0.irq="5"
+hint.isic.0.flags="7"
+#
+# ITK ix1 Micro ( < V.3, non-PnP version )
+options ITKIX1
+hint.isic.0.at="isa"
+hint.isic.0.port="0x398"
+hint.isic.0.irq="10"
+hint.isic.0.flags="18"
+#
+# ELSA PCC-16
+options ELSA_PCC16
+hint.isic.0.at="isa"
+hint.isic.0.port="0x360"
+hint.isic.0.irq="10"
+hint.isic.0.flags="20"
+#
+# ISA bus PnP Cards:
+# ------------------
+#
+# Teles S0/16.3 PnP
+options TEL_S0_16_3_P
+#
+# Creatix ISDN-S0 P&P
+options CRTX_S0_P
+#
+# Dr. Neuhaus Niccy Go@
+options DRN_NGO
+#
+# Sedlbauer Win Speed
+options SEDLBAUER
+#
+# Dynalink IS64PH
+options DYNALINK
+#
+# ELSA QuickStep 1000pro ISA
+options ELSA_QS1ISA
+#
+# Siemens I-Surf 2.0
+options SIEMENS_ISURF2
+#
+# Asuscom ISDNlink 128K ISA
+options ASUSCOM_IPAC
+#
+# Eicon Diehl DIVA 2.0 and 2.02
+options EICON_DIVA
+#
+# Compaq Microcom 610 ISDN card (Compaq series PSB2222I)
+options COMPAQ_M610
+#
+# PCI bus Cards:
+# --------------
+#
+# Cyclades Cyclom-Y PCI serial driver
+device cy 1
+options CY_PCI_FASTINTR # Use with cy_pci unless irq is shared
+hint.cy.0.at="isa"
+hint.cy.0.irq="10"
+hint.cy.0.maddr="0xd4000"
+hint.cy.0.msize="0x2000"
+#
+#---------------------------------------------------------------------------
+# ELSA MicroLink ISDN/PCI (same as ELSA QuickStep 1000pro PCI)
+options ELSA_QS1PCI
+#
+#
+#---------------------------------------------------------------------------
+# ifpnp driver for AVM Fritz!Card PnP
+#
+# AVM Fritz!Card PnP
+device ifpnp
+#
+#---------------------------------------------------------------------------
+# ihfc driver for Cologne Chip ISA chipsets (experimental!)
+#
+# Teles 16.3c ISA PnP
+# AcerISDN P10 ISA PnP
+# TELEINT ISDN SPEED No.1
+device ihfc
+#
+#---------------------------------------------------------------------------
+# ifpi driver for AVM Fritz!Card PCI
+#
+# AVM Fritz!Card PCI
+device ifpi
+#
+#---------------------------------------------------------------------------
+# ifpi2 driver for AVM Fritz!Card PCI version 2
+#
+# AVM Fritz!Card PCI version 2
+device "ifpi2"
+#
+#---------------------------------------------------------------------------
+# iwic driver for Winbond W6692 chipset
+#
+# ASUSCOM P-IN100-ST-D (and other Winbond W6692 based cards)
+device iwic
+#
+#---------------------------------------------------------------------------
+# itjc driver for Simens ISAC / TJNet Tiger300/320 chipset
+#
+# Traverse Technologies NETjet-S
+# Teles PCI-TJ
+device itjc
+#
+#---------------------------------------------------------------------------
+# iavc driver (AVM active cards, needs i4bcapi driver!)
+#
+device iavc
+#
+# AVM B1 ISA bus (PnP mode not supported!)
+# ----------------------------------------
+hint.iavc.0.at="isa"
+hint.iavc.0.port="0x150"
+hint.iavc.0.irq="5"
+#
+#---------------------------------------------------------------------------
+# ISDN Protocol Stack - mandatory for all hardware drivers
+#
+# Q.921 / layer 2 - i4b passive cards D channel handling
+device "i4bq921"
+#
+# Q.931 / layer 3 - i4b passive cards D channel handling
+device "i4bq931"
+#
+# layer 4 - i4b common passive and active card handling
+device "i4b"
+#
+#---------------------------------------------------------------------------
+# ISDN devices - mandatory for all hardware drivers
+#
+# userland driver to do ISDN tracing (for passive cards only)
+device "i4btrc" 4
+#
+# userland driver to control the whole thing
+device "i4bctl"
+#
+#---------------------------------------------------------------------------
+# ISDN devices - optional
+#
+# userland driver for access to raw B channel
+device "i4brbch" 4
+#
+# userland driver for telephony
+device "i4btel" 2
+#
+# network driver for IP over raw HDLC ISDN
+device "i4bipr" 4
+# enable VJ header compression detection for ipr i/f
+options IPR_VJ
+# enable logging of the first n IP packets to isdnd (n=32 here)
+options IPR_LOG=32
+#
+# network driver for sync PPP over ISDN; requires an equivalent
+# number of sppp device to be configured
+device "i4bisppp" 4
+#
+# B-channel interface to the netgraph subsystem
+device "i4bing" 2
+#
+# CAPI driver needed for active ISDN cards (see iavc driver above)
+device "i4bcapi"
+#
+#---------------------------------------------------------------------------
+
+#
+# Set the number of PV entries per process. Increasing this can
+# stop panics related to heavy use of shared memory. However, that can
+# (combined with large amounts of physical memory) cause panics at
+# boot time due the kernel running out of VM space.
+#
+# If you're tweaking this, you might also want to increase the sysctls
+# "vm.v_free_min", "vm.v_free_reserved", and "vm.v_free_target".
+#
+# The value below is the one more than the default.
+#
+options PMAP_SHPGPERPROC=201
+
+#
+# Change the size of the kernel virtual address space. Due to
+# constraints in loader(8) on i386, this must be a multiple of 4.
+# 256 = 1 GB of kernel address space. Increasing this also causes
+# a reduction of the address space in user processes. 512 splits
+# the 4GB cpu address space in half (2GB user, 2GB kernel).
+#
+options KVA_PAGES=260
+
+
+#####################################################################
+# ABI Emulation
+
+# Enable iBCS2 runtime support for SCO and ISC binaries
+options IBCS2
+
+# Emulate spx device for client side of SVR3 local X interface
+options SPX_HACK
+
+# Enable Linux ABI emulation
+options COMPAT_LINUX
+
+# Enable i386 a.out binary support
+options COMPAT_AOUT
+
+# Enable the linux-like proc filesystem support (requires COMPAT_LINUX
+# and PSEUDOFS)
+options LINPROCFS
+
+#
+# SysVR4 ABI emulation
+#
+# The svr4 ABI emulator can be statically compiled into the kernel or loaded as
+# a KLD module.
+# The STREAMS network emulation code can also be compiled statically or as a
+# module. If loaded as a module, it must be loaded before the svr4 module
+# (the /usr/sbin/svr4 script does this for you). If compiling statically,
+# the `streams' device must be configured into any kernel which also
+# specifies COMPAT_SVR4. It is possible to have a statically-configured
+# STREAMS device and a dynamically loadable svr4 emulator; the /usr/sbin/svr4
+# script understands that it doesn't need to load the `streams' module under
+# those circumstances.
+# Caveat: At this time, `options KTRACE' is required for the svr4 emulator
+# (whether static or dynamic).
+#
+options COMPAT_SVR4 # build emulator statically
+options DEBUG_SVR4 # enable verbose debugging
+device streams # STREAMS network driver (required for svr4).
+
+
+#####################################################################
+# VM OPTIONS
+
+# Disable the 4 MByte page PSE CPU feature. The PSE feature allows the
+# kernel to use a 4 MByte pages to map the kernel instead of 4k pages.
+# This saves on the amount of memory needed for page tables needed to
+# map the kernel. You should only disable this feature as a temporary
+# workaround if you are having problems with it enabled.
+#
+#options DISABLE_PSE
+
+# Disable the global pages PGE CPU feature. The PGE feature allows pages
+# to be marked with the PG_G bit. TLB entries for these pages are not
+# flushed from the cache when %cr3 is reloaded. This can make context
+# switches less expensive. You should only disable this feature as a
+# temporary workaround if you are having problems with it enabled.
+#
+#options DISABLE_PG_G
+
+# KSTACK_PAGES is the number of memory pages to assign to the kernel
+# stack of each thread.
+
+options KSTACK_PAGES=3
+
+#####################################################################
+
+# More undocumented options for linting.
+# Note that documenting these are not considered an affront.
+
+options FB_INSTALL_CDEV # install a CDEV entry in /dev
+
+# PECOFF module (Win32 Execution Format)
+options PECOFF_SUPPORT
+options PECOFF_DEBUG
+
+options ENABLE_ALART
+options I4B_SMP_WORKAROUND
+options I586_PMC_GUPROF=0x70000
+options KBDIO_DEBUG=2
+options KBD_MAXRETRY=4
+options KBD_MAXWAIT=6
+options KBD_RESETDELAY=201
+
+options PSM_DEBUG=1
+
+options TIMER_FREQ=((14318182+6)/12)
+
+options VM_KMEM_SIZE
+options VM_KMEM_SIZE_MAX
+options VM_KMEM_SIZE_SCALE
diff --git a/freebsd-5.3-xen-sparse/i386-xen/conf/OLDCARD b/freebsd-5.3-xen-sparse/i386-xen/conf/OLDCARD
new file mode 100644
index 0000000000..2d13fbe2b5
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/conf/OLDCARD
@@ -0,0 +1,17 @@
+#
+# OLDCARD -- Generic kernel configuration file for FreeBSD/i386
+# using the OLDCARD pccard system.
+#
+# $FreeBSD: src/sys/i386/conf/OLDCARD,v 1.18 2003/02/15 02:39:13 ru Exp $
+
+include GENERIC
+
+ident OLDCARD
+
+# PCCARD (PCMCIA) support
+nodevice cbb # cardbus (yenta) bridge
+#nodevice pcic # ExCA ISA and PCI bridges
+nodevice pccard # PC Card (16-bit) bus
+nodevice cardbus # CardBus (32-bit) bus
+device card 1 # pccard bus
+device pcic # PCMCIA bridge
diff --git a/freebsd-5.3-xen-sparse/i386-xen/conf/PAE b/freebsd-5.3-xen-sparse/i386-xen/conf/PAE
new file mode 100644
index 0000000000..98d4f2c252
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/conf/PAE
@@ -0,0 +1,99 @@
+#
+# PAE -- Generic kernel configuration file for FreeBSD/i386 PAE
+#
+# $FreeBSD: src/sys/i386/conf/PAE,v 1.8 2003/11/03 22:49:19 jhb Exp $
+
+include GENERIC
+
+ident PAE-GENERIC
+
+# To make a PAE kernel, the next option is needed
+options PAE # Physical Address Extensions Kernel
+
+# Compile acpi in statically since the module isn't built properly. Most
+# machines which support large amounts of memory require acpi.
+device acpi
+
+# Don't build modules with this kernel config, since they are not built with
+# the correct options headers.
+makeoptions NO_MODULES=yes
+
+# What follows is a list of drivers that are normally in GENERIC, but either
+# don't work or are untested with PAE. Be very careful before enabling any
+# of these drivers. Drivers which use DMA and don't handle 64 bit physical
+# address properly may cause data corruption when used in a machine with more
+# than 4 gigabytes of memory.
+
+nodevice ahb
+nodevice amd
+nodevice isp
+nodevice sym
+nodevice trm
+
+nodevice adv
+nodevice adw
+nodevice aha
+nodevice aic
+nodevice bt
+
+nodevice ncv
+nodevice nsp
+nodevice stg
+
+nodevice asr
+nodevice dpt
+nodevice iir
+nodevice mly
+
+nodevice amr
+nodevice ida
+nodevice mlx
+nodevice pst
+
+nodevice agp
+
+nodevice de
+nodevice txp
+nodevice vx
+
+nodevice dc
+nodevice pcn
+nodevice rl
+nodevice sf
+nodevice sis
+nodevice ste
+nodevice tl
+nodevice tx
+nodevice vr
+nodevice wb
+
+nodevice cs
+nodevice ed
+nodevice ex
+nodevice ep
+nodevice fe
+nodevice ie
+nodevice lnc
+nodevice sn
+nodevice xe
+
+nodevice wlan
+nodevice an
+nodevice awi
+nodevice wi
+
+nodevice uhci
+nodevice ohci
+nodevice usb
+nodevice ugen
+nodevice uhid
+nodevice ukbd
+nodevice ulpt
+nodevice umass
+nodevice ums
+nodevice urio
+nodevice uscanner
+nodevice aue
+nodevice axe
+nodevice cue
+nodevice kue
diff --git a/freebsd-5.3-xen-sparse/i386-xen/conf/XENCONF b/freebsd-5.3-xen-sparse/i386-xen/conf/XENCONF
new file mode 100644
index 0000000000..4214b1c59b
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/conf/XENCONF
@@ -0,0 +1,137 @@
+#
+# GENERIC -- Generic kernel configuration file for FreeBSD/i386
+#
+# For more information on this file, please read the handbook section on
+# Kernel Configuration Files:
+#
+# http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html
+#
+# The handbook is also available locally in /usr/share/doc/handbook
+# if you've installed the doc distribution, otherwise always see the
+# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the
+# latest information.
+#
+# An exhaustive list of options and more detailed explanations of the
+# device lines is also present in the ../../conf/NOTES and NOTES files.
+# If you are in doubt as to the purpose or necessity of a line, check first
+# in NOTES.
+#
+# $FreeBSD: src/sys/i386/conf/GENERIC,v 1.394.2.3 2004/01/26 19:42:11 nectar Exp $
+
+machine i386-xen
+cpu I686_CPU
+ident XEN
+
+#To statically compile in device wiring instead of /boot/device.hints
+#hints "GENERIC.hints" #Default places to look for devices.
+
+makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols
+
+options SCHED_4BSD #4BSD scheduler
+options INET #InterNETworking
+options INET6 #IPv6 communications protocols
+options FFS #Berkeley Fast Filesystem
+options SOFTUPDATES #Enable FFS soft updates support
+options UFS_ACL #Support for access control lists
+options UFS_DIRHASH #Improve performance on big directories
+options MD_ROOT #MD is a potential root device
+options NFSCLIENT #Network Filesystem Client
+options NFSSERVER #Network Filesystem Server
+# options NFS_ROOT #NFS usable as /, requires NFSCLIENT
+#options MSDOSFS #MSDOS Filesystem
+#options CD9660 #ISO 9660 Filesystem
+options PROCFS #Process filesystem (requires PSEUDOFS)
+options PSEUDOFS #Pseudo-filesystem framework
+options COMPAT_43 #Compatible with BSD 4.3 [KEEP THIS!]
+options COMPAT_FREEBSD4 #Compatible with FreeBSD4
+options SCSI_DELAY=15000 #Delay (in ms) before probing SCSI
+options KTRACE #ktrace(1) support
+options SYSVSHM #SYSV-style shared memory
+options SYSVMSG #SYSV-style message queues
+options SYSVSEM #SYSV-style semaphores
+options _KPOSIX_PRIORITY_SCHEDULING #Posix P1003_1B real-time extensions
+options KBD_INSTALL_CDEV # install a CDEV entry in /dev
+options CPU_DISABLE_SSE # don't turn on SSE framework with Xen
+#options PFIL_HOOKS # pfil(9) framework
+
+# Debugging for use in -current
+options KDB #Enable the kernel debugger
+options INVARIANTS #Enable calls of extra sanity checking
+options INVARIANT_SUPPORT #Extra sanity checks of internal structures, required by INVARIANTS
+#options WITNESS #Enable checks to detect deadlocks and cycles
+#options WITNESS_SKIPSPIN #Don't run witness on spinlocks for speed
+
+# To make an SMP kernel, the next two are needed
+#options SMP # Symmetric MultiProcessor Kernel
+#device apic # I/O APIC
+
+# SCSI peripherals
+device scbus # SCSI bus (required for SCSI)
+#device ch # SCSI media changers
+device da # Direct Access (disks)
+#device sa # Sequential Access (tape etc)
+#device cd # CD
+device pass # Passthrough device (direct SCSI access)
+#device ses # SCSI Environmental Services (and SAF-TE)
+
+# atkbdc0 controls both the keyboard and the PS/2 mouse
+#device atkbdc # AT keyboard controller
+#device atkbd # AT keyboard
+#device psm # PS/2 mouse
+
+# device vga # VGA video card driver
+
+#device splash # Splash screen and screen saver support
+
+# syscons is the default console driver, resembling an SCO console
+#device sc
+
+# Enable this for the pcvt (VT220 compatible) console driver
+#device vt
+#options XSERVER # support for X server on a vt console
+#options FAT_CURSOR # start with block cursor
+
+#device agp # support several AGP chipsets
+
+# Floating point support - do not disable.
+device npx
+
+# Serial (COM) ports
+#device sio # 8250, 16[45]50 based serial ports
+
+# Parallel port
+#device ppc
+#device ppbus # Parallel port bus (required)
+#device lpt # Printer
+#device plip # TCP/IP over parallel
+#device ppi # Parallel port interface device
+#device vpo # Requires scbus and da
+
+# If you've got a "dumb" serial or parallel PCI card that is
+# supported by the puc(4) glue driver, uncomment the following
+# line to enable it (connects to the sio and/or ppc drivers):
+#device puc
+
+
+# Pseudo devices - the number indicates how many units to allocate.
+device random # Entropy device
+device loop # Network loopback
+device ether # Ethernet support
+device tun # Packet tunnel.
+device pty # Pseudo-ttys (telnet etc)
+device md # Memory "disks"
+device gif # IPv6 and IPv4 tunneling
+device faith # IPv6-to-IPv4 relaying (translation)
+
+# The `bpf' device enables the Berkeley Packet Filter.
+# Be aware of the administrative consequences of enabling this!
+device bpf # Berkeley packet filter
+
+#options BOOTP
+options XEN
+options MCLSHIFT=12 # this has to be enabled for Xen as we can only have one cluster per page
+options MSIZE=256
+options DIAGNOSTIC
+options MAXMEM=(256*1024)
+options NOXENDEBUG=1 # Turn off Debugging printfs
+
diff --git a/freebsd-5.3-xen-sparse/i386-xen/conf/gethints.awk b/freebsd-5.3-xen-sparse/i386-xen/conf/gethints.awk
new file mode 100644
index 0000000000..e8cc6b67de
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/conf/gethints.awk
@@ -0,0 +1,116 @@
+#! /usr/bin/awk -f
+#
+# This is a transition aid. It extracts old-style configuration information
+# from a config file and writes an equivalent device.hints file to stdout.
+# You can use that with loader(8) or statically compile it in with the
+# 'hints' directive. See how GENERIC and GENERIC.hints fit together for
+# a static example. You should use loader(8) if at all possible.
+#
+# $FreeBSD: src/sys/i386/conf/gethints.awk,v 1.2 2002/07/26 03:52:30 peter Exp $
+
+# skip commented lines, empty lines and not "device" lines
+/^[ \t]*#/ || /^[ \t]*$/ || !/[ \t]*device/ { next; }
+
+# input format :
+# device <name><unit> at <controler>[?] [key [val]]...
+# possible keys are :
+# disable, port #, irq #, drq #, drive #, iomem #, iosiz #,
+# flags #, bus #, target #, unit #.
+# output format :
+# hint.<name>.<unit>.<key>=<val>
+# mapped keys are :
+# iomem -> maddr, iosiz -> msize.
+{
+ gsub ("#.*", ""); # delete comments
+ gsub ("\"", ""); # and double-quotes
+ nameunit = $2; # <name><unit>
+ at = $3; # at
+ controler = $4; # <controler>[?]
+ rest = 5; # optional keys begin at indice 5
+ if (at != "at" || controler == "")
+ next; # skip devices w/o controlers
+ name = nameunit;
+ sub ("[0-9]*$", "", name); # get the name
+ unit = nameunit;
+ sub ("^" name, "", unit); # and the unit
+ sub ("\?$", "", controler);
+ printf "hint.%s.%s.at=\"%s\"\n", name, unit, controler;
+ # for each keys, if any ?
+ for (key = $rest; rest <= NF; key = $(++rest)) {
+ # skip auto-detect keys (the one w/ a ?)
+ if (key == "port?" || key == "drq?" || key == "irq?" || \
+ key == "iomem?" || key == "iosiz?")
+ continue;
+ # disable has no value, so, give it one
+ if (key == "disable") {
+ printf "hint.%s.%s.disabled=\"1\"\n", name, unit;
+ continue;
+ }
+ # recognized keys
+ if (key == "port" || key == "irq" || key == "drq" || \
+ key == "drive" || key == "iomem" || key == "iosiz" || \
+ key == "flags" || key == "bus" || key == "target" || \
+ key == "unit") {
+ val = $(++rest);
+ if (val == "?") # has above
+ continue;
+ if (key == "port") {
+ # map port macros to static values
+ sub ("IO_AHA0", "0x330", val);
+ sub ("IO_AHA1", "0x334", val);
+ sub ("IO_ASC1", "0x3EB", val);
+ sub ("IO_ASC2", "0x22B", val);
+ sub ("IO_ASC3", "0x26B", val);
+ sub ("IO_ASC4", "0x2AB", val);
+ sub ("IO_ASC5", "0x2EB", val);
+ sub ("IO_ASC6", "0x32B", val);
+ sub ("IO_ASC7", "0x36B", val);
+ sub ("IO_ASC8", "0x3AB", val);
+ sub ("IO_BT0", "0x330", val);
+ sub ("IO_BT1", "0x334", val);
+ sub ("IO_CGA", "0x3D0", val);
+ sub ("IO_COM1", "0x3F8", val);
+ sub ("IO_COM2", "0x2F8", val);
+ sub ("IO_COM3", "0x3E8", val);
+ sub ("IO_COM4", "0x2E8", val);
+ sub ("IO_DMA1", "0x000", val);
+ sub ("IO_DMA2", "0x0C0", val);
+ sub ("IO_DMAPG", "0x080", val);
+ sub ("IO_FD1", "0x3F0", val);
+ sub ("IO_FD2", "0x370", val);
+ sub ("IO_GAME", "0x201", val);
+ sub ("IO_GSC1", "0x270", val);
+ sub ("IO_GSC2", "0x2E0", val);
+ sub ("IO_GSC3", "0x370", val);
+ sub ("IO_GSC4", "0x3E0", val);
+ sub ("IO_ICU1", "0x020", val);
+ sub ("IO_ICU2", "0x0A0", val);
+ sub ("IO_KBD", "0x060", val);
+ sub ("IO_LPT1", "0x378", val);
+ sub ("IO_LPT2", "0x278", val);
+ sub ("IO_LPT3", "0x3BC", val);
+ sub ("IO_MDA", "0x3B0", val);
+ sub ("IO_NMI", "0x070", val);
+ sub ("IO_NPX", "0x0F0", val);
+ sub ("IO_PMP1", "0x026", val);
+ sub ("IO_PMP2", "0x178", val);
+ sub ("IO_PPI", "0x061", val);
+ sub ("IO_RTC", "0x070", val);
+ sub ("IO_TIMER1", "0x040", val);
+ sub ("IO_TIMER2", "0x048", val);
+ sub ("IO_UHA0", "0x330", val);
+ sub ("IO_VGA", "0x3C0", val);
+ sub ("IO_WD1", "0x1F0", val);
+ sub ("IO_WD2", "0x170", val);
+ } else {
+ # map key names
+ sub ("iomem", "maddr", key);
+ sub ("iosiz", "msize", key);
+ }
+ printf "hint.%s.%s.%s=\"%s\"\n", name, unit, key, val;
+ continue;
+ }
+ printf ("unrecognized config token '%s:%s' on line %s\n",
+ rest, key, NR); # > "/dev/stderr";
+ }
+}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c
new file mode 100644
index 0000000000..393e091986
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c
@@ -0,0 +1,511 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz and Don Ahn.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)clock.c 7.2 (Berkeley) 5/12/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/isa/clock.c,v 1.207 2003/11/13 10:02:12 phk Exp $");
+
+/* #define DELAYDEBUG */
+/*
+ * Routines to handle clock hardware.
+ */
+
+/*
+ * inittodr, settodr and support routines written
+ * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at>
+ *
+ * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94
+ */
+
+#include "opt_clock.h"
+#include "opt_isa.h"
+#include "opt_mca.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/time.h>
+#include <sys/timetc.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/sysctl.h>
+#include <sys/cons.h>
+#include <sys/power.h>
+
+#include <machine/clock.h>
+#include <machine/cputypes.h>
+#include <machine/frame.h>
+#include <machine/intr_machdep.h>
+#include <machine/md_var.h>
+#include <machine/psl.h>
+#if defined(SMP)
+#include <machine/smp.h>
+#endif
+#include <machine/specialreg.h>
+
+#include <i386/isa/icu.h>
+#include <i386/isa/isa.h>
+#include <isa/rtc.h>
+#include <i386/isa/timerreg.h>
+
+/* XEN specific defines */
+#include <machine/xen_intr.h>
+
+/*
+ * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
+ * can use a simple formula for leap years.
+ */
+#define LEAPYEAR(y) (((u_int)(y) % 4 == 0) ? 1 : 0)
+#define DAYSPERYEAR (31+28+31+30+31+30+31+31+30+31+30+31)
+
+int adjkerntz; /* local offset from GMT in seconds */
+int clkintr_pending;
+int disable_rtc_set = 1; /* disable resettodr() if != 0 */
+int pscnt = 1;
+int psdiv = 1;
+int statclock_disable;
+#ifndef TIMER_FREQ
+#define TIMER_FREQ 1193182
+#endif
+u_int timer_freq = TIMER_FREQ;
+
+static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
+
+/* Values for timerX_state: */
+#define RELEASED 0
+#define RELEASE_PENDING 1
+#define ACQUIRED 2
+#define ACQUIRE_PENDING 3
+
+/* Cached *multiplier* to convert TSC counts to microseconds.
+ * (see the equation below).
+ * Equal to 2^32 * (1 / (clocks per usec) ).
+ * Initialized in time_init.
+ */
+static unsigned long fast_gettimeoffset_quotient;
+
+/* These are peridically updated in shared_info, and then copied here. */
+static uint32_t shadow_tsc_stamp;
+static uint64_t shadow_system_time;
+static uint32_t shadow_time_version;
+static struct timeval shadow_tv;
+
+static uint64_t processed_system_time;/* System time (ns) at last processing. */
+
+#define NS_PER_TICK (1000000000ULL/hz)
+
+/* convert from cycles(64bits) => nanoseconds (64bits)
+ * basic equation:
+ * ns = cycles / (freq / ns_per_sec)
+ * ns = cycles * (ns_per_sec / freq)
+ * ns = cycles * (10^9 / (cpu_mhz * 10^6))
+ * ns = cycles * (10^3 / cpu_mhz)
+ *
+ * Then we use scaling math (suggested by george@mvista.com) to get:
+ * ns = cycles * (10^3 * SC / cpu_mhz) / SC
+ * ns = cycles * cyc2ns_scale / SC
+ *
+ * And since SC is a constant power of two, we can convert the div
+ * into a shift.
+ * -johnstul@us.ibm.com "math is hard, lets go shopping!"
+ */
+static unsigned long cyc2ns_scale;
+#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
+
+static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
+{
+ cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
+}
+
+static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+ return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+}
+
+/*
+ * Reads a consistent set of time-base values from Xen, into a shadow data
+ * area. Must be called with the xtime_lock held for writing.
+ */
+static void __get_time_values_from_xen(void)
+{
+ shared_info_t *s = HYPERVISOR_shared_info;
+
+ do {
+ shadow_time_version = s->time_version2;
+ rmb();
+ shadow_tv.tv_sec = s->wc_sec;
+ shadow_tv.tv_usec = s->wc_usec;
+ shadow_tsc_stamp = (uint32_t)s->tsc_timestamp;
+ shadow_system_time = s->system_time;
+ rmb();
+ }
+ while (shadow_time_version != s->time_version1);
+}
+
+#define TIME_VALUES_UP_TO_DATE \
+ (shadow_time_version == HYPERVISOR_shared_info->time_version2)
+
+static void (*timer_func)(struct clockframe *frame) = hardclock;
+
+static unsigned xen_get_offset(void);
+static unsigned xen_get_timecount(struct timecounter *tc);
+
+static struct timecounter xen_timecounter = {
+ xen_get_timecount, /* get_timecount */
+ 0, /* no poll_pps */
+ ~0u, /* counter_mask */
+ 0, /* frequency */
+ "ixen", /* name */
+ 0 /* quality */
+};
+
+
+static void
+clkintr(struct clockframe *frame)
+{
+ int64_t delta;
+ long ticks = 0;
+
+
+ do {
+ __get_time_values_from_xen();
+ delta = (int64_t)(shadow_system_time +
+ xen_get_offset() * 1000 -
+ processed_system_time);
+ } while (!TIME_VALUES_UP_TO_DATE);
+
+ if (unlikely(delta < 0)) {
+ printk("Timer ISR: Time went backwards: %lld\n", delta);
+ return;
+ }
+
+ /* Process elapsed ticks since last call. */
+ while ( delta >= NS_PER_TICK )
+ {
+ ticks++;
+ delta -= NS_PER_TICK;
+ processed_system_time += NS_PER_TICK;
+ }
+
+ if (ticks > 0) {
+ if (frame)
+ timer_func(frame);
+#ifdef SMP
+ if (timer_func == hardclock && frame)
+ forward_hardclock();
+#endif
+ }
+}
+
+#include "opt_ddb.h"
+static uint32_t
+getit(void)
+{
+ __get_time_values_from_xen();
+ return shadow_tsc_stamp;
+}
+
+/*
+ * Wait "n" microseconds.
+ * Relies on timer 1 counting down from (timer_freq / hz)
+ * Note: timer had better have been programmed before this is first used!
+ */
+void
+DELAY(int n)
+{
+ int delta, ticks_left;
+ uint32_t tick, prev_tick;
+#ifdef DELAYDEBUG
+ int getit_calls = 1;
+ int n1;
+ static int state = 0;
+
+ if (state == 0) {
+ state = 1;
+ for (n1 = 1; n1 <= 10000000; n1 *= 10)
+ DELAY(n1);
+ state = 2;
+ }
+ if (state == 1)
+ printf("DELAY(%d)...", n);
+#endif
+ /*
+ * Read the counter first, so that the rest of the setup overhead is
+ * counted. Guess the initial overhead is 20 usec (on most systems it
+ * takes about 1.5 usec for each of the i/o's in getit(). The loop
+ * takes about 6 usec on a 486/33 and 13 usec on a 386/20. The
+ * multiplications and divisions to scale the count take a while).
+ *
+ * However, if ddb is active then use a fake counter since reading
+ * the i8254 counter involves acquiring a lock. ddb must not go
+ * locking for many reasons, but it calls here for at least atkbd
+ * input.
+ */
+ prev_tick = getit();
+
+ n -= 0; /* XXX actually guess no initial overhead */
+ /*
+ * Calculate (n * (timer_freq / 1e6)) without using floating point
+ * and without any avoidable overflows.
+ */
+ if (n <= 0)
+ ticks_left = 0;
+ else if (n < 256)
+ /*
+ * Use fixed point to avoid a slow division by 1000000.
+ * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest.
+ * 2^15 is the first power of 2 that gives exact results
+ * for n between 0 and 256.
+ */
+ ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15;
+ else
+ /*
+ * Don't bother using fixed point, although gcc-2.7.2
+ * generates particularly poor code for the long long
+ * division, since even the slow way will complete long
+ * before the delay is up (unless we're interrupted).
+ */
+ ticks_left = ((u_int)n * (long long)timer_freq + 999999)
+ / 1000000;
+
+ while (ticks_left > 0) {
+ tick = getit();
+#ifdef DELAYDEBUG
+ ++getit_calls;
+#endif
+ delta = tick - prev_tick;
+ prev_tick = tick;
+ if (delta < 0) {
+ /*
+ * Guard against timer0_max_count being wrong.
+ * This shouldn't happen in normal operation,
+ * but it may happen if set_timer_freq() is
+ * traced.
+ */
+ /* delta += timer0_max_count; ??? */
+ if (delta < 0)
+ delta = 0;
+ }
+ ticks_left -= delta;
+ }
+#ifdef DELAYDEBUG
+ if (state == 1)
+ printf(" %d calls to getit() at %d usec each\n",
+ getit_calls, (n + 5) / getit_calls);
+#endif
+}
+
+
+int
+sysbeep(int pitch, int period)
+{
+ return (0);
+}
+
+/*
+ * Restore all the timers non-atomically (XXX: should be atomically).
+ *
+ * This function is called from pmtimer_resume() to restore all the timers.
+ * This should not be necessary, but there are broken laptops that do not
+ * restore all the timers on resume.
+ */
+void
+timer_restore(void)
+{
+ /* Get timebases for new environment. */
+ __get_time_values_from_xen();
+
+ /* Reset our own concept of passage of system time. */
+ processed_system_time = shadow_system_time;
+}
+
+void
+startrtclock()
+{
+ unsigned long long alarm;
+ uint64_t __cpu_khz;
+ uint32_t cpu_khz;
+
+ __cpu_khz = HYPERVISOR_shared_info->cpu_freq;
+ __cpu_khz /= 1000;
+ cpu_khz = (uint32_t)__cpu_khz;
+ printk("Xen reported: %lu.%03lu MHz processor.\n",
+ cpu_khz / 1000, cpu_khz % 1000);
+
+ /* (10^6 * 2^32) / cpu_hz = (10^3 * 2^32) / cpu_khz =
+ (2^32 * 1 / (clocks/us)) */
+ {
+ unsigned long eax=0, edx=1000;
+ __asm__("divl %2"
+ :"=a" (fast_gettimeoffset_quotient), "=d" (edx)
+ :"r" (cpu_khz),
+ "0" (eax), "1" (edx));
+ }
+
+ set_cyc2ns_scale(cpu_khz/1000);
+ timer_freq = tsc_freq = xen_timecounter.tc_frequency = cpu_khz * 1000;
+ tc_init(&xen_timecounter);
+
+
+ rdtscll(alarm);
+}
+
+/*
+ * Initialize the time of day register, based on the time base which is, e.g.
+ * from a filesystem.
+ */
+void
+inittodr(time_t base)
+{
+ int s, y;
+ struct timespec ts;
+
+ s = splclock();
+ if (base) {
+ ts.tv_sec = base;
+ ts.tv_nsec = 0;
+ tc_setclock(&ts);
+ }
+
+ y = time_second - shadow_tv.tv_sec;
+ if (y <= -2 || y >= 2) {
+ /* badly off, adjust it */
+ ts.tv_sec = shadow_tv.tv_sec;
+ ts.tv_nsec = shadow_tv.tv_usec * 1000;
+ tc_setclock(&ts);
+ }
+ splx(s);
+}
+
+/*
+ * Write system time back to RTC. Not supported for guest domains.
+ */
+void
+resettodr()
+{
+}
+
+
+/*
+ * Start clocks running.
+ */
+void
+cpu_initclocks()
+{
+ int diag;
+ int time_irq = bind_virq_to_irq(VIRQ_TIMER);
+
+ if ((diag = intr_add_handler("clk", time_irq,
+ (driver_intr_t *)clkintr, NULL,
+ INTR_TYPE_CLK | INTR_FAST, NULL))) {
+ panic("failed to register clock interrupt: %d\n", diag);
+ }
+
+ /* should fast clock be enabled ? */
+
+ /* initialize xen values */
+ __get_time_values_from_xen();
+ processed_system_time = shadow_system_time;
+}
+
+void
+cpu_startprofclock(void)
+{
+
+ printf("cpu_startprofclock: profiling clock is not supported\n");
+}
+
+void
+cpu_stopprofclock(void)
+{
+
+ printf("cpu_stopprofclock: profiling clock is not supported\n");
+}
+
+static uint32_t
+xen_get_timecount(struct timecounter *tc)
+{
+ __get_time_values_from_xen();
+ return shadow_tsc_stamp;
+}
+
+/*
+ * Track behavior of cur_timer->get_offset() functionality in timer_tsc.c
+ */
+#undef rdtsc
+#define rdtsc(low,high) \
+ __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
+
+static uint32_t
+xen_get_offset(void)
+{
+ register unsigned long eax, edx;
+
+ /* Read the Time Stamp Counter */
+
+ rdtsc(eax,edx);
+
+ /* .. relative to previous jiffy (32 bits is enough) */
+ eax -= shadow_tsc_stamp;
+
+ /*
+ * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
+ * = (tsc_low delta) * (usecs_per_clock)
+ * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
+ *
+ * Using a mull instead of a divl saves up to 31 clock cycles
+ * in the critical path.
+ */
+
+ __asm__("mull %2"
+ :"=a" (eax), "=d" (edx)
+ :"rm" (fast_gettimeoffset_quotient),
+ "0" (eax));
+
+ /* our adjusted time offset in microseconds */
+ return edx;
+}
+
+void
+idle_block(void)
+{
+ if (HYPERVISOR_set_timer_op(processed_system_time + NS_PER_TICK) == 0)
+ HYPERVISOR_block();
+}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/critical.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/critical.c
new file mode 100644
index 0000000000..ce388fa048
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/critical.c
@@ -0,0 +1,46 @@
+/*-
+ * Copyright (c) 2002 Matthew Dillon. All Rights Reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/critical.c,v 1.12 2003/11/03 21:06:54 jhb Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <machine/critical.h>
+#include <machine/psl.h>
+
+/*
+ * cpu_critical_fork_exit() - cleanup after fork
+ *
+ * Enable interrupts in the saved copy of eflags.
+ */
+void
+cpu_critical_fork_exit(void)
+{
+ curthread->td_md.md_savecrit = 0;
+}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/ctrl_if.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/ctrl_if.c
new file mode 100644
index 0000000000..8e8ce9fde7
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/ctrl_if.c
@@ -0,0 +1,476 @@
+/******************************************************************************
+ * ctrl_if.c
+ *
+ * Management functions for special interface to the domain controller.
+ *
+ * Copyright (c) 2004, K A Fraser
+ * Copyright (c) 2004, K M Macy
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/uio.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/selinfo.h>
+#include <sys/poll.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
+#include <sys/ioccom.h>
+#include <sys/taskqueue.h>
+
+
+#include <machine/cpufunc.h>
+#include <machine/intr_machdep.h>
+#include <machine/xen-os.h>
+#include <machine/xen_intr.h>
+#include <machine/bus.h>
+#include <sys/rman.h>
+#include <machine/resource.h>
+#include <machine/synch_bitops.h>
+
+
+#include <machine/hypervisor-ifs.h>
+
+#include <machine/ctrl_if.h>
+#include <machine/evtchn.h>
+
+/*
+ * Only used by initial domain which must create its own control-interface
+ * event channel. This value is picked up by the user-space domain controller
+ * via an ioctl.
+ */
+int initdom_ctrlif_domcontroller_port = -1;
+
+static int ctrl_if_evtchn;
+static int ctrl_if_irq;
+static struct mtx ctrl_if_lock;
+static int * ctrl_if_wchan = &ctrl_if_evtchn;
+
+
+static CONTROL_RING_IDX ctrl_if_tx_resp_cons;
+static CONTROL_RING_IDX ctrl_if_rx_req_cons;
+
+/* Incoming message requests. */
+ /* Primary message type -> message handler. */
+static ctrl_msg_handler_t ctrl_if_rxmsg_handler[256];
+ /* Primary message type -> callback in process context? */
+static unsigned long ctrl_if_rxmsg_blocking_context[256/sizeof(unsigned long)];
+ /* Queue up messages to be handled in process context. */
+static ctrl_msg_t ctrl_if_rxmsg_deferred[CONTROL_RING_SIZE];
+static CONTROL_RING_IDX ctrl_if_rxmsg_deferred_prod;
+static CONTROL_RING_IDX ctrl_if_rxmsg_deferred_cons;
+
+/* Incoming message responses: message identifier -> message handler/id. */
+static struct {
+ ctrl_msg_handler_t fn;
+ unsigned long id;
+} ctrl_if_txmsg_id_mapping[CONTROL_RING_SIZE];
+
+/*
+ * FreeBSD task queues don't allow you to requeue an already executing task.
+ * Since ctrl_if_interrupt clears the TX_FULL condition and schedules any
+ * waiting tasks, which themselves may need to schedule a new task
+ * (due to new a TX_FULL condition), we ping-pong between these A/B task queues.
+ * The interrupt runs anything on the current queue and moves the index so that
+ * future schedulings occur on the next queue. We should never get into a
+ * situation where there is a task scheduleded on both the A & B queues.
+ */
+TASKQUEUE_DECLARE(ctrl_if_txA);
+TASKQUEUE_DEFINE(ctrl_if_txA, NULL, NULL, {});
+TASKQUEUE_DECLARE(ctrl_if_txB);
+TASKQUEUE_DEFINE(ctrl_if_txB, NULL, NULL, {});
+struct taskqueue **taskqueue_ctrl_if_tx[2] = { &taskqueue_ctrl_if_txA,
+ &taskqueue_ctrl_if_txB };
+int ctrl_if_idx;
+
+static struct task ctrl_if_rx_tasklet;
+static struct task ctrl_if_tx_tasklet;
+ /* Passed to schedule_task(). */
+static struct task ctrl_if_rxmsg_deferred_task;
+
+
+
+#define get_ctrl_if() ((control_if_t *)((char *)HYPERVISOR_shared_info + 2048))
+#define TX_FULL(_c) \
+ (((_c)->tx_req_prod - ctrl_if_tx_resp_cons) == CONTROL_RING_SIZE)
+
+static void
+ctrl_if_notify_controller(void)
+{
+ notify_via_evtchn(ctrl_if_evtchn);
+}
+
+static void
+ctrl_if_rxmsg_default_handler(ctrl_msg_t *msg, unsigned long id)
+{
+ msg->length = 0;
+ ctrl_if_send_response(msg);
+}
+
+static void
+__ctrl_if_tx_tasklet(void *context __unused, int pending __unused)
+{
+ control_if_t *ctrl_if = get_ctrl_if();
+ ctrl_msg_t *msg;
+ int was_full = TX_FULL(ctrl_if);
+
+ while ( ctrl_if_tx_resp_cons != ctrl_if->tx_resp_prod )
+ {
+ msg = &ctrl_if->tx_ring[MASK_CONTROL_IDX(ctrl_if_tx_resp_cons)];
+
+ /* Execute the callback handler, if one was specified. */
+ if ( msg->id != 0xFF )
+ {
+ (*ctrl_if_txmsg_id_mapping[msg->id].fn)(
+ msg, ctrl_if_txmsg_id_mapping[msg->id].id);
+ smp_mb(); /* Execute, /then/ free. */
+ ctrl_if_txmsg_id_mapping[msg->id].fn = NULL;
+ }
+
+ /*
+ * Step over the message in the ring /after/ finishing reading it. As
+ * soon as the index is updated then the message may get blown away.
+ */
+ smp_mb();
+ ctrl_if_tx_resp_cons++;
+ }
+
+ if ( was_full && !TX_FULL(ctrl_if) )
+ {
+ wakeup(ctrl_if_wchan);
+
+ /* bump idx so future enqueues will occur on the next taskq
+ * process any currently pending tasks
+ */
+ ctrl_if_idx++;
+ taskqueue_run(*taskqueue_ctrl_if_tx[(ctrl_if_idx-1) & 1]);
+ }
+}
+
+static void
+__ctrl_if_rxmsg_deferred_task(void *context __unused, int pending __unused)
+{
+ ctrl_msg_t *msg;
+
+ while ( ctrl_if_rxmsg_deferred_cons != ctrl_if_rxmsg_deferred_prod )
+ {
+ msg = &ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX(
+ ctrl_if_rxmsg_deferred_cons++)];
+ (*ctrl_if_rxmsg_handler[msg->type])(msg, 0);
+ }
+}
+
+static void
+__ctrl_if_rx_tasklet(void *context __unused, int pending __unused)
+{
+ control_if_t *ctrl_if = get_ctrl_if();
+ ctrl_msg_t msg, *pmsg;
+
+ while ( ctrl_if_rx_req_cons != ctrl_if->rx_req_prod )
+ {
+ pmsg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(ctrl_if_rx_req_cons++)];
+ memcpy(&msg, pmsg, offsetof(ctrl_msg_t, msg));
+ if ( msg.length != 0 )
+ memcpy(msg.msg, pmsg->msg, msg.length);
+ if ( test_bit(msg.type, &ctrl_if_rxmsg_blocking_context) )
+ {
+ pmsg = &ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX(
+ ctrl_if_rxmsg_deferred_prod++)];
+ memcpy(pmsg, &msg, offsetof(ctrl_msg_t, msg) + msg.length);
+ taskqueue_enqueue(taskqueue_thread, &ctrl_if_rxmsg_deferred_task);
+ }
+ else
+ {
+ (*ctrl_if_rxmsg_handler[msg.type])(&msg, 0);
+ }
+ }
+}
+
+static void
+ctrl_if_interrupt(void *ctrl_sc)
+/* (int irq, void *dev_id, struct pt_regs *regs) */
+{
+ control_if_t *ctrl_if = get_ctrl_if();
+
+ if ( ctrl_if_tx_resp_cons != ctrl_if->tx_resp_prod )
+ taskqueue_enqueue(taskqueue_swi, &ctrl_if_tx_tasklet);
+
+
+ if ( ctrl_if_rx_req_cons != ctrl_if->rx_req_prod )
+ taskqueue_enqueue(taskqueue_swi, &ctrl_if_rx_tasklet);
+}
+
+int
+ctrl_if_send_message_noblock(
+ ctrl_msg_t *msg,
+ ctrl_msg_handler_t hnd,
+ unsigned long id)
+{
+ control_if_t *ctrl_if = get_ctrl_if();
+ unsigned long flags;
+ int i;
+
+ mtx_lock_irqsave(&ctrl_if_lock, flags);
+
+ if ( TX_FULL(ctrl_if) )
+ {
+ mtx_unlock_irqrestore(&ctrl_if_lock, flags);
+ return EAGAIN;
+ }
+
+ msg->id = 0xFF;
+ if ( hnd != NULL )
+ {
+ for ( i = 0; ctrl_if_txmsg_id_mapping[i].fn != NULL; i++ )
+ continue;
+ ctrl_if_txmsg_id_mapping[i].fn = hnd;
+ ctrl_if_txmsg_id_mapping[i].id = id;
+ msg->id = i;
+ }
+
+ memcpy(&ctrl_if->tx_ring[MASK_CONTROL_IDX(ctrl_if->tx_req_prod)],
+ msg, sizeof(*msg));
+ wmb(); /* Write the message before letting the controller peek at it. */
+ ctrl_if->tx_req_prod++;
+
+ mtx_unlock_irqrestore(&ctrl_if_lock, flags);
+
+ ctrl_if_notify_controller();
+
+ return 0;
+}
+
+int
+ctrl_if_send_message_block(
+ ctrl_msg_t *msg,
+ ctrl_msg_handler_t hnd,
+ unsigned long id,
+ long wait_state)
+{
+ int rc, sst = 0;
+
+ /* Fast path. */
+ if ( (rc = ctrl_if_send_message_noblock(msg, hnd, id)) != EAGAIN )
+ return rc;
+
+
+ for ( ; ; )
+ {
+
+ if ( (rc = ctrl_if_send_message_noblock(msg, hnd, id)) != EAGAIN )
+ break;
+
+ if ( sst != 0)
+ return EINTR;
+
+ sst = tsleep(ctrl_if_wchan, PWAIT|PCATCH, "ctlrwt", 10);
+ }
+
+ return rc;
+}
+
+int
+ctrl_if_enqueue_space_callback(struct task *task)
+{
+ control_if_t *ctrl_if = get_ctrl_if();
+
+ /* Fast path. */
+ if ( !TX_FULL(ctrl_if) )
+ return 0;
+
+ (void)taskqueue_enqueue(*taskqueue_ctrl_if_tx[(ctrl_if_idx & 1)], task);
+
+ /*
+ * We may race execution of the task queue, so return re-checked status. If
+ * the task is not executed despite the ring being non-full then we will
+ * certainly return 'not full'.
+ */
+ smp_mb();
+ return TX_FULL(ctrl_if);
+}
+
+void
+ctrl_if_send_response(ctrl_msg_t *msg)
+{
+ control_if_t *ctrl_if = get_ctrl_if();
+ unsigned long flags;
+ ctrl_msg_t *dmsg;
+
+ /*
+ * NB. The response may the original request message, modified in-place.
+ * In this situation we may have src==dst, so no copying is required.
+ */
+ mtx_lock_irqsave(&ctrl_if_lock, flags);
+ dmsg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(ctrl_if->rx_resp_prod)];
+ if ( dmsg != msg )
+ memcpy(dmsg, msg, sizeof(*msg));
+ wmb(); /* Write the message before letting the controller peek at it. */
+ ctrl_if->rx_resp_prod++;
+ mtx_unlock_irqrestore(&ctrl_if_lock, flags);
+
+ ctrl_if_notify_controller();
+}
+
+int
+ctrl_if_register_receiver(
+ uint8_t type,
+ ctrl_msg_handler_t hnd,
+ unsigned int flags)
+{
+ unsigned long _flags;
+ int inuse;
+
+ mtx_lock_irqsave(&ctrl_if_lock, _flags);
+
+ inuse = (ctrl_if_rxmsg_handler[type] != ctrl_if_rxmsg_default_handler);
+
+ if ( inuse )
+ {
+ printk("Receiver %p already established for control "
+ "messages of type %d.\n", ctrl_if_rxmsg_handler[type], type);
+ }
+ else
+ {
+ ctrl_if_rxmsg_handler[type] = hnd;
+ clear_bit(type, &ctrl_if_rxmsg_blocking_context);
+ if ( flags == CALLBACK_IN_BLOCKING_CONTEXT )
+ {
+ set_bit(type, &ctrl_if_rxmsg_blocking_context);
+ }
+ }
+
+ mtx_unlock_irqrestore(&ctrl_if_lock, _flags);
+
+ return !inuse;
+}
+
+void
+ctrl_if_unregister_receiver(uint8_t type, ctrl_msg_handler_t hnd)
+{
+ unsigned long flags;
+
+ mtx_lock_irqsave(&ctrl_if_lock, flags);
+
+ if ( ctrl_if_rxmsg_handler[type] != hnd )
+ printk("Receiver %p is not registered for control "
+ "messages of type %d.\n", hnd, type);
+ else
+ ctrl_if_rxmsg_handler[type] = ctrl_if_rxmsg_default_handler;
+
+ mtx_unlock_irqrestore(&ctrl_if_lock, flags);
+
+ /* Ensure that @hnd will not be executed after this function returns. */
+ /* XXX need rx_tasklet_lock -- can cheat for now?*/
+#ifdef notyet
+ tasklet_unlock_wait(&ctrl_if_rx_tasklet);
+#endif
+}
+
+void
+ctrl_if_suspend(void)
+{
+ /* I'm not sure what the equivalent is - we aren't going to support suspend
+ * yet anyway
+ */
+#ifdef notyet
+ free_irq(ctrl_if_irq, NULL);
+#endif
+ unbind_evtchn_from_irq(ctrl_if_evtchn);
+}
+
+/** Reset the control interface progress pointers.
+ * Marks the queues empty if 'clear' non-zero.
+ */
+static void
+ctrl_if_reset(int clear)
+{
+ control_if_t *ctrl_if = get_ctrl_if();
+
+ if (clear) {
+ *ctrl_if = (control_if_t){};
+ }
+
+ ctrl_if_tx_resp_cons = ctrl_if->tx_resp_prod;
+ ctrl_if_rx_req_cons = ctrl_if->rx_resp_prod;
+}
+
+
+void
+ctrl_if_resume(void)
+{
+ if ( xen_start_info->flags & SIF_INITDOMAIN )
+ {
+ /*
+ * The initial domain must create its own domain-controller link.
+ * The controller is probably not running at this point, but will
+ * pick up its end of the event channel from
+ */
+ evtchn_op_t op;
+ op.cmd = EVTCHNOP_bind_interdomain;
+ op.u.bind_interdomain.dom1 = DOMID_SELF;
+ op.u.bind_interdomain.dom2 = DOMID_SELF;
+ op.u.bind_interdomain.port1 = 0;
+ op.u.bind_interdomain.port2 = 0;
+ if ( HYPERVISOR_event_channel_op(&op) != 0 )
+ panic("event_channel_op failed\n");
+ xen_start_info->domain_controller_evtchn = op.u.bind_interdomain.port1;
+ initdom_ctrlif_domcontroller_port = op.u.bind_interdomain.port2;
+ }
+
+ ctrl_if_reset(0);
+
+ ctrl_if_evtchn = xen_start_info->domain_controller_evtchn;
+ ctrl_if_irq = bind_evtchn_to_irq(ctrl_if_evtchn);
+
+ /*
+ * I have not taken the time to determine what the interrupt thread priorities
+ * correspond to - this interface is used for network and disk, network would
+ * seem higher priority, hence I'm using it
+ */
+
+ intr_add_handler("ctrl-if", ctrl_if_irq, (driver_intr_t*)ctrl_if_interrupt,
+ NULL, INTR_TYPE_NET | INTR_MPSAFE, NULL);
+}
+
+static void
+ctrl_if_init(void *dummy __unused)
+{
+ int i;
+
+ for ( i = 0; i < 256; i++ )
+ ctrl_if_rxmsg_handler[i] = ctrl_if_rxmsg_default_handler;
+
+ mtx_init(&ctrl_if_lock, "ctrlif", NULL, MTX_SPIN | MTX_NOWITNESS);
+
+ TASK_INIT(&ctrl_if_tx_tasklet, 0, __ctrl_if_tx_tasklet, NULL);
+
+ TASK_INIT(&ctrl_if_rx_tasklet, 0, __ctrl_if_rx_tasklet, NULL);
+
+ TASK_INIT(&ctrl_if_rxmsg_deferred_task, 0, __ctrl_if_rxmsg_deferred_task, NULL);
+
+ ctrl_if_reset(1);
+ ctrl_if_resume();
+}
+
+/*
+ * !! The following are DANGEROUS FUNCTIONS !!
+ * Use with care [for example, see xencons_force_flush()].
+ */
+
+int
+ctrl_if_transmitter_empty(void)
+{
+ return (get_ctrl_if()->tx_req_prod == ctrl_if_tx_resp_cons);
+}
+
+void
+ctrl_if_discard_responses(void)
+{
+ ctrl_if_tx_resp_cons = get_ctrl_if()->tx_resp_prod;
+}
+
+SYSINIT(ctrl_if_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, ctrl_if_init, NULL);
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/db_interface.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/db_interface.c
new file mode 100644
index 0000000000..57aa4e2ef4
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/db_interface.c
@@ -0,0 +1,209 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/db_interface.c,v 1.77 2003/11/08 03:01:26 alc Exp $");
+
+/*
+ * Interface to new debugger.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/reboot.h>
+#include <sys/cons.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/smp.h>
+
+#include <machine/cpu.h>
+#ifdef SMP
+#include <machine/smptests.h> /** CPUSTOP_ON_DDBBREAK */
+#endif
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <ddb/ddb.h>
+
+#include <machine/setjmp.h>
+#include <machine/xenfunc.h>
+
+
+static jmp_buf *db_nofault = 0;
+extern jmp_buf db_jmpbuf;
+
+extern void gdb_handle_exception(db_regs_t *, int, int);
+
+int db_active;
+db_regs_t ddb_regs;
+
+static __inline u_short
+rss(void)
+{
+ u_short ss;
+#ifdef __GNUC__
+ __asm __volatile("mov %%ss,%0" : "=r" (ss));
+#else
+ ss = 0; /* XXXX Fix for other compilers. */
+#endif
+ return ss;
+}
+
+/*
+ * kdb_trap - field a TRACE or BPT trap
+ */
+int
+kdb_trap(int type, int code, struct i386_saved_state *regs)
+{
+ volatile int ddb_mode = !(boothowto & RB_GDB);
+
+ disable_intr();
+
+ if (ddb_mode) {
+ /* we can't do much as a guest domain except print a
+ * backtrace and die gracefuly. The reason is that we
+ * can't get character input to make this work.
+ */
+ db_active = 1;
+ db_print_backtrace();
+ db_printf("************ Domain shutting down ************\n");
+ HYPERVISOR_shutdown();
+ } else {
+ Debugger("kdb_trap");
+ }
+ return (1);
+}
+
+/*
+ * Read bytes from kernel address space for debugger.
+ */
+void
+db_read_bytes(vm_offset_t addr, size_t size, char *data)
+{
+ char *src;
+
+ db_nofault = &db_jmpbuf;
+
+ src = (char *)addr;
+ while (size-- > 0)
+ *data++ = *src++;
+
+ db_nofault = 0;
+}
+
+/*
+ * Write bytes to kernel address space for debugger.
+ */
+void
+db_write_bytes(vm_offset_t addr, size_t size, char *data)
+{
+ char *dst;
+
+ pt_entry_t *ptep0 = NULL;
+ pt_entry_t oldmap0 = 0;
+ vm_offset_t addr1;
+ pt_entry_t *ptep1 = NULL;
+ pt_entry_t oldmap1 = 0;
+
+ db_nofault = &db_jmpbuf;
+
+ if (addr > trunc_page((vm_offset_t)btext) - size &&
+ addr < round_page((vm_offset_t)etext)) {
+
+ ptep0 = pmap_pte(kernel_pmap, addr);
+ oldmap0 = *ptep0;
+ *ptep0 |= PG_RW;
+
+ /* Map another page if the data crosses a page boundary. */
+ if ((*ptep0 & PG_PS) == 0) {
+ addr1 = trunc_page(addr + size - 1);
+ if (trunc_page(addr) != addr1) {
+ ptep1 = pmap_pte(kernel_pmap, addr1);
+ oldmap1 = *ptep1;
+ *ptep1 |= PG_RW;
+ }
+ } else {
+ addr1 = trunc_4mpage(addr + size - 1);
+ if (trunc_4mpage(addr) != addr1) {
+ ptep1 = pmap_pte(kernel_pmap, addr1);
+ oldmap1 = *ptep1;
+ *ptep1 |= PG_RW;
+ }
+ }
+
+ invltlb();
+ }
+
+ dst = (char *)addr;
+
+ while (size-- > 0)
+ *dst++ = *data++;
+
+ db_nofault = 0;
+
+ if (ptep0) {
+ *ptep0 = oldmap0;
+
+ if (ptep1)
+ *ptep1 = oldmap1;
+
+ invltlb();
+ }
+}
+
+/*
+ * XXX
+ * Move this to machdep.c and allow it to be called if any debugger is
+ * installed.
+ */
+void
+Debugger(const char *msg)
+{
+ static volatile u_int in_Debugger;
+
+ /*
+ * XXX
+ * Do nothing if the console is in graphics mode. This is
+ * OK if the call is for the debugger hotkey but not if the call
+ * is a weak form of panicing.
+ */
+ if (cons_unavail && !(boothowto & RB_GDB))
+ return;
+
+ if (atomic_cmpset_acq_int(&in_Debugger, 0, 1)) {
+ db_printf("Debugger(\"%s\")\n", msg);
+ breakpoint();
+ atomic_store_rel_int(&in_Debugger, 0);
+ }
+}
+
+void
+db_show_mdpcpu(struct pcpu *pc)
+{
+
+ db_printf("APIC ID = %d\n", pc->pc_apic_id);
+ db_printf("currentldt = 0x%x\n", pc->pc_currentldt);
+}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c
new file mode 100644
index 0000000000..635a3bfe4e
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c
@@ -0,0 +1,580 @@
+/******************************************************************************
+ * evtchn.c
+ *
+ * Communication via Xen event channels.
+ *
+ * Copyright (c) 2002-2004, K A Fraser
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include <machine/cpufunc.h>
+#include <machine/intr_machdep.h>
+#include <machine/xen-os.h>
+#include <machine/xen_intr.h>
+#include <machine/synch_bitops.h>
+#include <machine/evtchn.h>
+#include <machine/hypervisor.h>
+#include <machine/hypervisor-ifs.h>
+
+
+static struct mtx irq_mapping_update_lock;
+
+#define TODO printf("%s: not implemented!\n", __func__)
+
+/* IRQ <-> event-channel mappings. */
+static int evtchn_to_irq[NR_EVENT_CHANNELS];
+static int irq_to_evtchn[NR_IRQS];
+
+/* IRQ <-> VIRQ mapping. */
+static int virq_to_irq[NR_VIRQS];
+
+/* Reference counts for bindings to IRQs. */
+static int irq_bindcount[NR_IRQS];
+
+#define VALID_EVTCHN(_chn) ((_chn) != -1)
+
+/*
+ * Force a proper event-channel callback from Xen after clearing the
+ * callback mask. We do this in a very simple manner, by making a call
+ * down into Xen. The pending flag will be checked by Xen on return.
+ */
+void force_evtchn_callback(void)
+{
+ (void)HYPERVISOR_xen_version(0);
+}
+
+void
+evtchn_do_upcall(struct intrframe *frame)
+{
+ unsigned long l1, l2;
+ unsigned int l1i, l2i, port;
+ int irq, owned;
+ unsigned long flags;
+ shared_info_t *s = HYPERVISOR_shared_info;
+
+ local_irq_save(flags);
+
+ while ( s->vcpu_data[0].evtchn_upcall_pending )
+ {
+ s->vcpu_data[0].evtchn_upcall_pending = 0;
+ /* NB. No need for a barrier here -- XCHG is a barrier on x86. */
+ l1 = xen_xchg(&s->evtchn_pending_sel, 0);
+ while ( (l1i = ffs(l1)) != 0 )
+ {
+ l1i--;
+ l1 &= ~(1 << l1i);
+
+ l2 = s->evtchn_pending[l1i] & ~s->evtchn_mask[l1i];
+ while ( (l2i = ffs(l2)) != 0 )
+ {
+ l2i--;
+ l2 &= ~(1 << l2i);
+
+ port = (l1i << 5) + l2i;
+ if ((owned = mtx_owned(&sched_lock)) != 0)
+ mtx_unlock_spin_flags(&sched_lock, MTX_QUIET);
+ if ( (irq = evtchn_to_irq[port]) != -1 ) {
+ struct intsrc *isrc = intr_lookup_source(irq);
+ intr_execute_handlers(isrc, frame);
+
+ } else {
+ evtchn_device_upcall(port);
+ }
+ if (owned)
+ mtx_lock_spin_flags(&sched_lock, MTX_QUIET);
+ }
+ }
+ }
+
+ local_irq_restore(flags);
+
+}
+
+
+static int
+find_unbound_irq(void)
+{
+ int irq;
+
+ for ( irq = 0; irq < NR_IRQS; irq++ )
+ if ( irq_bindcount[irq] == 0 )
+ break;
+
+ if ( irq == NR_IRQS )
+ panic("No available IRQ to bind to: increase NR_IRQS!\n");
+
+ return irq;
+}
+
+int
+bind_virq_to_irq(int virq)
+{
+ evtchn_op_t op;
+ int evtchn, irq;
+
+ mtx_lock(&irq_mapping_update_lock);
+
+ if ( (irq = virq_to_irq[virq]) == -1 )
+ {
+ op.cmd = EVTCHNOP_bind_virq;
+ op.u.bind_virq.virq = virq;
+ if ( HYPERVISOR_event_channel_op(&op) != 0 )
+ panic("Failed to bind virtual IRQ %d\n", virq);
+ evtchn = op.u.bind_virq.port;
+
+ irq = find_unbound_irq();
+ evtchn_to_irq[evtchn] = irq;
+ irq_to_evtchn[irq] = evtchn;
+
+ virq_to_irq[virq] = irq;
+ }
+
+ irq_bindcount[irq]++;
+
+ mtx_unlock(&irq_mapping_update_lock);
+
+ return irq;
+}
+
+void
+unbind_virq_from_irq(int virq)
+{
+ evtchn_op_t op;
+ int irq = virq_to_irq[virq];
+ int evtchn = irq_to_evtchn[irq];
+
+ mtx_lock(&irq_mapping_update_lock);
+
+ if ( --irq_bindcount[irq] == 0 )
+ {
+ op.cmd = EVTCHNOP_close;
+ op.u.close.dom = DOMID_SELF;
+ op.u.close.port = evtchn;
+ if ( HYPERVISOR_event_channel_op(&op) != 0 )
+ panic("Failed to unbind virtual IRQ %d\n", virq);
+
+ evtchn_to_irq[evtchn] = -1;
+ irq_to_evtchn[irq] = -1;
+ virq_to_irq[virq] = -1;
+ }
+
+ mtx_unlock(&irq_mapping_update_lock);
+}
+
+int
+bind_evtchn_to_irq(int evtchn)
+{
+ int irq;
+
+ mtx_lock(&irq_mapping_update_lock);
+
+ if ( (irq = evtchn_to_irq[evtchn]) == -1 )
+ {
+ irq = find_unbound_irq();
+ evtchn_to_irq[evtchn] = irq;
+ irq_to_evtchn[irq] = evtchn;
+ }
+
+ irq_bindcount[irq]++;
+
+ mtx_unlock(&irq_mapping_update_lock);
+
+ return irq;
+}
+
+void
+unbind_evtchn_from_irq(int evtchn)
+{
+ int irq = evtchn_to_irq[evtchn];
+
+ mtx_lock(&irq_mapping_update_lock);
+
+ if ( --irq_bindcount[irq] == 0 )
+ {
+ evtchn_to_irq[evtchn] = -1;
+ irq_to_evtchn[irq] = -1;
+ }
+
+ mtx_unlock(&irq_mapping_update_lock);
+}
+
+
+/*
+ * Interface to generic handling in intr_machdep.c
+ */
+
+
+/*------------ interrupt handling --------------------------------------*/
+#define TODO printf("%s: not implemented!\n", __func__)
+
+ struct mtx xenpic_lock;
+
+struct xenpic_intsrc {
+ struct intsrc xp_intsrc;
+ uint8_t xp_vector;
+ boolean_t xp_masked;
+};
+
+struct xenpic {
+ struct pic xp_pic; /* this MUST be first */
+ uint16_t xp_numintr;
+ struct xenpic_intsrc xp_pins[0];
+};
+
+static void xenpic_enable_dynirq_source(struct intsrc *isrc);
+static void xenpic_disable_dynirq_source(struct intsrc *isrc, int);
+static void xenpic_eoi_source(struct intsrc *isrc);
+static void xenpic_enable_dynirq_intr(struct intsrc *isrc);
+static int xenpic_vector(struct intsrc *isrc);
+static int xenpic_source_pending(struct intsrc *isrc);
+static void xenpic_suspend(struct intsrc *isrc);
+static void xenpic_resume(struct intsrc *isrc);
+
+
+struct pic xenpic_template = {
+ xenpic_enable_dynirq_source,
+ xenpic_disable_dynirq_source,
+ xenpic_eoi_source,
+ xenpic_enable_dynirq_intr,
+ xenpic_vector,
+ xenpic_source_pending,
+ xenpic_suspend,
+ xenpic_resume
+};
+
+
+void
+xenpic_enable_dynirq_source(struct intsrc *isrc)
+{
+ unsigned int irq;
+ struct xenpic_intsrc *xp;
+
+ xp = (struct xenpic_intsrc *)isrc;
+
+ if (xp->xp_masked) {
+ irq = xenpic_vector(isrc);
+ unmask_evtchn(irq_to_evtchn[irq]);
+ xp->xp_masked = FALSE;
+ }
+}
+
+static void
+xenpic_disable_dynirq_source(struct intsrc *isrc, int foo)
+{
+ unsigned int irq;
+ struct xenpic_intsrc *xp;
+
+ xp = (struct xenpic_intsrc *)isrc;
+
+ if (!xp->xp_masked) {
+ irq = xenpic_vector(isrc);
+ mask_evtchn(irq_to_evtchn[irq]);
+ xp->xp_masked = TRUE;
+ }
+
+}
+
+static void
+xenpic_enable_dynirq_intr(struct intsrc *isrc)
+{
+ unsigned int irq;
+
+ irq = xenpic_vector(isrc);
+ unmask_evtchn(irq_to_evtchn[irq]);
+}
+
+static void
+xenpic_eoi_source(struct intsrc *isrc)
+{
+ unsigned int irq = xenpic_vector(isrc);
+ clear_evtchn(irq_to_evtchn[irq]);
+}
+
+static int
+xenpic_vector(struct intsrc *isrc)
+{
+ struct xenpic_intsrc *pin = (struct xenpic_intsrc *)isrc;
+ return (pin->xp_vector);
+}
+
+static int
+xenpic_source_pending(struct intsrc *isrc)
+{
+ TODO;
+ return 0;
+}
+
+static void
+xenpic_suspend(struct intsrc *isrc)
+{
+ TODO;
+}
+
+static void
+xenpic_resume(struct intsrc *isrc)
+{
+ TODO;
+}
+
+#ifdef CONFIG_PHYSDEV
+/* required for support of physical devices */
+static inline void
+pirq_unmask_notify(int pirq)
+{
+ physdev_op_t op;
+ if ( unlikely(test_bit(pirq, &pirq_needs_unmask_notify[0])) )
+ {
+ op.cmd = PHYSDEVOP_IRQ_UNMASK_NOTIFY;
+ (void)HYPERVISOR_physdev_op(&op);
+ }
+}
+
+static inline void
+pirq_query_unmask(int pirq)
+{
+ physdev_op_t op;
+ op.cmd = PHYSDEVOP_IRQ_STATUS_QUERY;
+ op.u.irq_status_query.irq = pirq;
+ (void)HYPERVISOR_physdev_op(&op);
+ clear_bit(pirq, &pirq_needs_unmask_notify[0]);
+ if ( op.u.irq_status_query.flags & PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY )
+ set_bit(pirq, &pirq_needs_unmask_notify[0]);
+}
+
+/*
+ * On startup, if there is no action associated with the IRQ then we are
+ * probing. In this case we should not share with others as it will confuse us.
+ */
+#define probing_irq(_irq) (irq_desc[(_irq)].action == NULL)
+
+static unsigned int startup_pirq(unsigned int irq)
+{
+ evtchn_op_t op;
+ int evtchn;
+
+ op.cmd = EVTCHNOP_bind_pirq;
+ op.u.bind_pirq.pirq = irq;
+ /* NB. We are happy to share unless we are probing. */
+ op.u.bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE;
+ if ( HYPERVISOR_event_channel_op(&op) != 0 )
+ {
+ if ( !probing_irq(irq) ) /* Some failures are expected when probing. */
+ printk(KERN_INFO "Failed to obtain physical IRQ %d\n", irq);
+ return 0;
+ }
+ evtchn = op.u.bind_pirq.port;
+
+ pirq_query_unmask(irq_to_pirq(irq));
+
+ evtchn_to_irq[evtchn] = irq;
+ irq_to_evtchn[irq] = evtchn;
+
+ unmask_evtchn(evtchn);
+ pirq_unmask_notify(irq_to_pirq(irq));
+
+ return 0;
+}
+
+static void shutdown_pirq(unsigned int irq)
+{
+ evtchn_op_t op;
+ int evtchn = irq_to_evtchn[irq];
+
+ if ( !VALID_EVTCHN(evtchn) )
+ return;
+
+ mask_evtchn(evtchn);
+
+ op.cmd = EVTCHNOP_close;
+ op.u.close.dom = DOMID_SELF;
+ op.u.close.port = evtchn;
+ if ( HYPERVISOR_event_channel_op(&op) != 0 )
+ panic("Failed to unbind physical IRQ %d\n", irq);
+
+ evtchn_to_irq[evtchn] = -1;
+ irq_to_evtchn[irq] = -1;
+}
+
+static void enable_pirq(unsigned int irq)
+{
+ int evtchn = irq_to_evtchn[irq];
+ if ( !VALID_EVTCHN(evtchn) )
+ return;
+ unmask_evtchn(evtchn);
+ pirq_unmask_notify(irq_to_pirq(irq));
+}
+
+static void disable_pirq(unsigned int irq)
+{
+ int evtchn = irq_to_evtchn[irq];
+ if ( !VALID_EVTCHN(evtchn) )
+ return;
+ mask_evtchn(evtchn);
+}
+
+static void ack_pirq(unsigned int irq)
+{
+ int evtchn = irq_to_evtchn[irq];
+ if ( !VALID_EVTCHN(evtchn) )
+ return;
+ mask_evtchn(evtchn);
+ clear_evtchn(evtchn);
+}
+
+static void end_pirq(unsigned int irq)
+{
+ int evtchn = irq_to_evtchn[irq];
+ if ( !VALID_EVTCHN(evtchn) )
+ return;
+ if ( !(irq_desc[irq].status & IRQ_DISABLED) )
+ {
+ unmask_evtchn(evtchn);
+ pirq_unmask_notify(irq_to_pirq(irq));
+ }
+}
+
+static struct hw_interrupt_type pirq_type = {
+ "Phys-irq",
+ startup_pirq,
+ shutdown_pirq,
+ enable_pirq,
+ disable_pirq,
+ ack_pirq,
+ end_pirq,
+ NULL
+};
+#endif
+
+
+static void
+misdirect_interrupt(void *sc)
+{
+}
+
+void irq_suspend(void)
+{
+ int virq, irq, evtchn;
+
+ /* Unbind VIRQs from event channels. */
+ for ( virq = 0; virq < NR_VIRQS; virq++ )
+ {
+ if ( (irq = virq_to_irq[virq]) == -1 )
+ continue;
+ evtchn = irq_to_evtchn[irq];
+
+ /* Mark the event channel as unused in our table. */
+ evtchn_to_irq[evtchn] = -1;
+ irq_to_evtchn[irq] = -1;
+ }
+
+ /*
+ * We should now be unbound from all event channels. Stale bindings to
+ * PIRQs and/or inter-domain event channels will cause us to barf here.
+ */
+ for ( evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++ )
+ if ( evtchn_to_irq[evtchn] != -1 )
+ panic("Suspend attempted while bound to evtchn %d.\n", evtchn);
+}
+
+
+void irq_resume(void)
+{
+ evtchn_op_t op;
+ int virq, irq, evtchn;
+
+ for ( evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++ )
+ mask_evtchn(evtchn); /* New event-channel space is not 'live' yet. */
+
+ for ( virq = 0; virq < NR_VIRQS; virq++ )
+ {
+ if ( (irq = virq_to_irq[virq]) == -1 )
+ continue;
+
+ /* Get a new binding from Xen. */
+ op.cmd = EVTCHNOP_bind_virq;
+ op.u.bind_virq.virq = virq;
+ if ( HYPERVISOR_event_channel_op(&op) != 0 )
+ panic("Failed to bind virtual IRQ %d\n", virq);
+ evtchn = op.u.bind_virq.port;
+
+ /* Record the new mapping. */
+ evtchn_to_irq[evtchn] = irq;
+ irq_to_evtchn[irq] = evtchn;
+
+ /* Ready for use. */
+ unmask_evtchn(evtchn);
+ }
+}
+
+static void
+evtchn_init(void *dummy __unused)
+{
+ int i;
+ struct xenpic *xp;
+ struct xenpic_intsrc *pin;
+
+ /*
+ * xenpic_lock: in order to allow an interrupt to occur in a critical
+ * section, to set pcpu->ipending (etc...) properly, we
+ * must be able to get the icu lock, so it can't be
+ * under witness.
+ */
+ mtx_init(&irq_mapping_update_lock, "xp", NULL, MTX_DEF);
+
+ /* No VIRQ -> IRQ mappings. */
+ for ( i = 0; i < NR_VIRQS; i++ )
+ virq_to_irq[i] = -1;
+
+ /* No event-channel -> IRQ mappings. */
+ for ( i = 0; i < NR_EVENT_CHANNELS; i++ )
+ {
+ evtchn_to_irq[i] = -1;
+ mask_evtchn(i); /* No event channels are 'live' right now. */
+ }
+
+ /* No IRQ -> event-channel mappings. */
+ for ( i = 0; i < NR_IRQS; i++ )
+ irq_to_evtchn[i] = -1;
+
+ xp = malloc(sizeof(struct xenpic) + NR_DYNIRQS*sizeof(struct xenpic_intsrc), M_DEVBUF, M_WAITOK);
+ xp->xp_pic = xenpic_template;
+ xp->xp_numintr = NR_DYNIRQS;
+ bzero(xp->xp_pins, sizeof(struct xenpic_intsrc) * NR_DYNIRQS);
+
+ for ( i = 0, pin = xp->xp_pins; i < NR_DYNIRQS; i++, pin++ )
+ {
+ /* Dynamic IRQ space is currently unbound. Zero the refcnts. */
+ irq_bindcount[dynirq_to_irq(i)] = 0;
+
+ pin->xp_intsrc.is_pic = (struct pic *)xp;
+ pin->xp_vector = i;
+ intr_register_source(&pin->xp_intsrc);
+ }
+ /* We don't currently have any support for physical devices in XenoFreeBSD
+ * so leaving this out for the moment for the sake of expediency.
+ */
+#ifdef notyet
+ for ( i = 0; i < NR_PIRQS; i++ )
+ {
+ /* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */
+ irq_bindcount[pirq_to_irq(i)] = 1;
+
+ irq_desc[pirq_to_irq(i)].status = IRQ_DISABLED;
+ irq_desc[pirq_to_irq(i)].action = 0;
+ irq_desc[pirq_to_irq(i)].depth = 1;
+ irq_desc[pirq_to_irq(i)].handler = &pirq_type;
+ }
+
+#endif
+ (void) intr_add_handler("xb_mis", bind_virq_to_irq(VIRQ_MISDIRECT),
+ (driver_intr_t *)misdirect_interrupt,
+ NULL, INTR_TYPE_MISC, NULL);
+}
+
+SYSINIT(evtchn_init, SI_SUB_INTR, SI_ORDER_ANY, evtchn_init, NULL);
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/exception.s b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/exception.s
new file mode 100644
index 0000000000..4adb61a350
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/exception.s
@@ -0,0 +1,428 @@
+/*-
+ * Copyright (c) 1989, 1990 William F. Jolitz.
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/i386/exception.s,v 1.106 2003/11/03 22:08:52 jhb Exp $
+ */
+
+#include "opt_npx.h"
+
+#include <machine/asmacros.h>
+#include <machine/psl.h>
+#include <machine/trap.h>
+
+#include "assym.s"
+
+#define SEL_RPL_MASK 0x0002
+/* Offsets into shared_info_t. */
+#define evtchn_upcall_pending /* 0 */
+#define evtchn_upcall_mask 1
+#define XEN_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg)
+#define XEN_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg)
+#define XEN_TEST_PENDING(reg) testb $0x1,evtchn_upcall_pending(reg)
+
+
+#define POPA \
+ popl %edi; \
+ popl %esi; \
+ popl %ebp; \
+ popl %ebx; \
+ popl %ebx; \
+ popl %edx; \
+ popl %ecx; \
+ popl %eax;
+
+ .text
+
+/*****************************************************************************/
+/* Trap handling */
+/*****************************************************************************/
+/*
+ * Trap and fault vector routines.
+ *
+ * Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on
+ * the stack that mostly looks like an interrupt, but does not disable
+ * interrupts. A few of the traps we are use are interrupt gates,
+ * SDT_SYS386IGT, which are nearly the same thing except interrupts are
+ * disabled on entry.
+ *
+ * The cpu will push a certain amount of state onto the kernel stack for
+ * the current process. The amount of state depends on the type of trap
+ * and whether the trap crossed rings or not. See i386/include/frame.h.
+ * At the very least the current EFLAGS (status register, which includes
+ * the interrupt disable state prior to the trap), the code segment register,
+ * and the return instruction pointer are pushed by the cpu. The cpu
+ * will also push an 'error' code for certain traps. We push a dummy
+ * error code for those traps where the cpu doesn't in order to maintain
+ * a consistent frame. We also push a contrived 'trap number'.
+ *
+ * The cpu does not push the general registers, we must do that, and we
+ * must restore them prior to calling 'iret'. The cpu adjusts the %cs and
+ * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we
+ * must load them with appropriate values for supervisor mode operation.
+ */
+
+MCOUNT_LABEL(user)
+MCOUNT_LABEL(btrap)
+
+IDTVEC(div)
+ pushl $0; pushl $0; TRAP(T_DIVIDE)
+IDTVEC(dbg)
+ pushl $0; pushl $0; TRAP(T_TRCTRAP)
+IDTVEC(nmi)
+ pushl $0; pushl $0; TRAP(T_NMI)
+IDTVEC(bpt)
+ pushl $0; pushl $0; TRAP(T_BPTFLT)
+IDTVEC(ofl)
+ pushl $0; pushl $0; TRAP(T_OFLOW)
+IDTVEC(bnd)
+ pushl $0; pushl $0; TRAP(T_BOUND)
+IDTVEC(ill)
+ pushl $0; pushl $0; TRAP(T_PRIVINFLT)
+IDTVEC(dna)
+ pushl $0; pushl $0; TRAP(T_DNA)
+IDTVEC(fpusegm)
+ pushl $0; pushl $0; TRAP(T_FPOPFLT)
+IDTVEC(tss)
+ pushl $0; TRAP(T_TSSFLT)
+IDTVEC(missing)
+ pushl $0; TRAP(T_SEGNPFLT)
+IDTVEC(stk)
+ pushl $0; TRAP(T_STKFLT)
+IDTVEC(prot)
+ pushl $0; TRAP(T_PROTFLT)
+IDTVEC(page)
+ TRAP(T_PAGEFLT)
+IDTVEC(mchk)
+ pushl $0; pushl $0; TRAP(T_MCHK)
+IDTVEC(rsvd)
+ pushl $0; pushl $0; TRAP(T_RESERVED)
+IDTVEC(fpu)
+ pushl $0; pushl $0; TRAP(T_ARITHTRAP)
+IDTVEC(align)
+ pushl $0; TRAP(T_ALIGNFLT)
+
+IDTVEC(xmm)
+ pushl $0; pushl $0; TRAP(T_XMMFLT)
+
+IDTVEC(hypervisor_callback)
+ pushl $T_HYPCALLBACK; pushl %eax; TRAP(T_HYPCALLBACK)
+
+hypervisor_callback_pending:
+ movl $T_HYPCALLBACK,TF_TRAPNO(%esp)
+ movl $T_HYPCALLBACK,TF_ERR(%esp)
+ jmp 11f
+
+ /*
+ * alltraps entry point. Interrupts are enabled if this was a trap
+ * gate (TGT), else disabled if this was an interrupt gate (IGT).
+ * Note that int0x80_syscall is a trap gate. Only page faults
+ * use an interrupt gate.
+ */
+
+ SUPERALIGN_TEXT
+ .globl alltraps
+ .type alltraps,@function
+alltraps:
+ cld
+ pushal
+ pushl %ds
+ pushl %es
+ pushl %fs
+alltraps_with_regs_pushed:
+ movl $KDSEL,%eax
+ movl %eax,%ds
+ movl %eax,%es
+ movl $KPSEL,%eax
+ movl %eax,%fs
+ FAKE_MCOUNT(TF_EIP(%esp))
+calltrap:
+ movl TF_EIP(%esp),%eax
+ cmpl $scrit,%eax
+ jb 11f
+ cmpl $ecrit,%eax
+ jb critical_region_fixup
+11: call trap
+
+ /*
+ * Return via doreti to handle ASTs.
+ */
+ MEXITCOUNT
+ jmp doreti
+
+/*
+ * SYSCALL CALL GATE (old entry point for a.out binaries)
+ *
+ * The intersegment call has been set up to specify one dummy parameter.
+ *
+ * This leaves a place to put eflags so that the call frame can be
+ * converted to a trap frame. Note that the eflags is (semi-)bogusly
+ * pushed into (what will be) tf_err and then copied later into the
+ * final spot. It has to be done this way because esp can't be just
+ * temporarily altered for the pushfl - an interrupt might come in
+ * and clobber the saved cs/eip.
+ */
+ SUPERALIGN_TEXT
+IDTVEC(lcall_syscall)
+ pushfl /* save eflags */
+ popl 8(%esp) /* shuffle into tf_eflags */
+ pushl $7 /* sizeof "lcall 7,0" */
+ subl $4,%esp /* skip over tf_trapno */
+ pushal
+ pushl %ds
+ pushl %es
+ pushl %fs
+ movl $KDSEL,%eax /* switch to kernel segments */
+ movl %eax,%ds
+ movl %eax,%es
+ movl $KPSEL,%eax
+ movl %eax,%fs
+ FAKE_MCOUNT(TF_EIP(%esp))
+ call syscall
+ MEXITCOUNT
+ jmp doreti
+
+/*
+ * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80)
+ *
+ * Even though the name says 'int0x80', this is actually a TGT (trap gate)
+ * rather then an IGT (interrupt gate). Thus interrupts are enabled on
+ * entry just as they are for a normal syscall.
+ */
+ SUPERALIGN_TEXT
+IDTVEC(int0x80_syscall)
+ pushl $2 /* sizeof "int 0x80" */
+ pushl $0xCAFE
+ pushl $0xDEAD
+ pushal
+ pushl %ds
+ pushl %es
+ pushl %fs
+ movl $KDSEL,%eax /* switch to kernel segments */
+ movl %eax,%ds
+ movl %eax,%es
+ movl $KPSEL,%eax
+ movl %eax,%fs
+ FAKE_MCOUNT(TF_EIP(%esp))
+ call syscall
+ MEXITCOUNT
+ jmp doreti
+
+ENTRY(fork_trampoline)
+ pushl %esp /* trapframe pointer */
+ pushl %ebx /* arg1 */
+ pushl %esi /* function */
+ call fork_exit
+ addl $12,%esp
+ /* cut from syscall */
+
+ /*
+ * Return via doreti to handle ASTs.
+ */
+ MEXITCOUNT
+ jmp doreti
+
+
+/*
+# A note on the "critical region" in our callback handler.
+# We want to avoid stacking callback handlers due to events occurring
+# during handling of the last event. To do this, we keep events disabled
+# until weve done all processing. HOWEVER, we must enable events before
+# popping the stack frame (cant be done atomically) and so it would still
+# be possible to get enough handler activations to overflow the stack.
+# Although unlikely, bugs of that kind are hard to track down, so wed
+# like to avoid the possibility.
+# So, on entry to the handler we detect whether we interrupted an
+# existing activation in its critical region -- if so, we pop the current
+# activation and restart the handler using the previous one.
+*/
+
+
+/*
+ * void doreti(struct trapframe)
+ *
+ * Handle return from interrupts, traps and syscalls.
+ */
+ .text
+ SUPERALIGN_TEXT
+ .globl doreti
+ .type doreti,@function
+doreti:
+ FAKE_MCOUNT(bintr) /* init "from" bintr -> doreti */
+doreti_next:
+ testb $SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */
+ jz doreti_exit /* #can't handle ASTs now if not */
+
+doreti_ast:
+ /*
+ * Check for ASTs atomically with returning. Disabling CPU
+ * interrupts provides sufficient locking even in the SMP case,
+ * since we will be informed of any new ASTs by an IPI.
+ */
+
+ movl HYPERVISOR_shared_info,%esi
+ XEN_BLOCK_EVENTS(%esi)
+ movl PCPU(CURTHREAD),%eax
+ testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%eax)
+ je doreti_exit
+ XEN_UNBLOCK_EVENTS(%esi)
+ pushl %esp /* pass a pointer to the trapframe */
+ call ast
+ add $4,%esp
+ jmp doreti_ast
+
+doreti_exit:
+ /*
+ * doreti_exit: pop registers, iret.
+ *
+ * The segment register pop is a special case, since it may
+ * fault if (for example) a sigreturn specifies bad segment
+ * registers. The fault is handled in trap.c.
+ */
+
+ movl HYPERVISOR_shared_info,%esi
+ XEN_UNBLOCK_EVENTS(%esi) # reenable event callbacks (sti)
+
+ .globl scrit
+scrit:
+ XEN_TEST_PENDING(%esi)
+ jnz hypervisor_callback_pending /* More to go */
+ MEXITCOUNT
+
+ .globl doreti_popl_fs
+doreti_popl_fs:
+ popl %fs
+ .globl doreti_popl_es
+doreti_popl_es:
+ popl %es
+ .globl doreti_popl_ds
+doreti_popl_ds:
+ popl %ds
+ POPA
+ addl $12,%esp
+ .globl doreti_iret
+doreti_iret:
+ iret
+ .globl ecrit
+ecrit:
+
+ /*
+ * doreti_iret_fault and friends. Alternative return code for
+ * the case where we get a fault in the doreti_exit code
+ * above. trap() (i386/i386/trap.c) catches this specific
+ * case, sends the process a signal and continues in the
+ * corresponding place in the code below.
+ */
+ ALIGN_TEXT
+ .globl doreti_iret_fault
+doreti_iret_fault:
+ subl $12,%esp
+ pushal
+ pushl %ds
+ .globl doreti_popl_ds_fault
+doreti_popl_ds_fault:
+ pushl %es
+ .globl doreti_popl_es_fault
+doreti_popl_es_fault:
+ pushl %fs
+ .globl doreti_popl_fs_fault
+doreti_popl_fs_fault:
+ movl $0,TF_ERR(%esp) /* XXX should be the error code */
+ movl $T_PROTFLT,TF_TRAPNO(%esp)
+ jmp alltraps_with_regs_pushed
+
+
+
+
+/*
+# [How we do the fixup]. We want to merge the current stack frame with the
+# just-interrupted frame. How we do this depends on where in the critical
+# region the interrupted handler was executing, and so how many saved
+# registers are in each frame. We do this quickly using the lookup table
+# 'critical_fixup_table'. For each byte offset in the critical region, it
+# provides the number of bytes which have already been popped from the
+# interrupted stack frame.
+*/
+
+.globl critical_region_fixup
+critical_region_fixup:
+ addl $critical_fixup_table-scrit,%eax
+ movzbl (%eax),%eax # %eax contains num bytes popped
+ movl %esp,%esi
+ add %eax,%esi # %esi points at end of src region
+ movl %esp,%edi
+ add $0x44,%edi # %edi points at end of dst region
+ movl %eax,%ecx
+ shr $2,%ecx # convert bytes to words
+ je 16f # skip loop if nothing to copy
+15: subl $4,%esi # pre-decrementing copy loop
+ subl $4,%edi
+ movl (%esi),%eax
+ movl %eax,(%edi)
+ loop 15b
+16: movl %edi,%esp # final %edi is top of merged stack
+ jmp hypervisor_callback_pending
+
+
+critical_fixup_table:
+.byte 0x0,0x0,0x0 #testb $0x1,(%esi)
+.byte 0x0,0x0,0x0,0x0,0x0,0x0 #jne ea
+.byte 0x0,0x0 #pop %fs
+.byte 0x04 #pop %es
+.byte 0x08 #pop %ds
+.byte 0x0c #pop %edi
+.byte 0x10 #pop %esi
+.byte 0x14 #pop %ebp
+.byte 0x18 #pop %ebx
+.byte 0x1c #pop %ebx
+.byte 0x20 #pop %edx
+.byte 0x24 #pop %ecx
+.byte 0x28 #pop %eax
+.byte 0x2c,0x2c,0x2c #add $0xc,%esp
+.byte 0x38 #iret
+
+
+/* # Hypervisor uses this for application faults while it executes.*/
+ENTRY(failsafe_callback)
+ pushal
+ call xen_failsafe_handler
+/*# call install_safe_pf_handler */
+ movl 32(%esp),%ebx
+1: movl %ebx,%ds
+ movl 36(%esp),%ebx
+2: movl %ebx,%es
+ movl 40(%esp),%ebx
+3: movl %ebx,%fs
+ movl 44(%esp),%ebx
+4: movl %ebx,%gs
+/*# call install_normal_pf_handler */
+ popal
+ addl $16,%esp
+ iret
+
+
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/genassym.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/genassym.c
new file mode 100644
index 0000000000..1e9df732c7
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/genassym.c
@@ -0,0 +1,234 @@
+/*-
+ * Copyright (c) 1982, 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)genassym.c 5.11 (Berkeley) 5/10/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/genassym.c,v 1.146 2003/11/12 18:14:34 jhb Exp $");
+
+#include "opt_apic.h"
+#include "opt_compat.h"
+#include "opt_kstack_pages.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/assym.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/socket.h>
+#include <sys/resourcevar.h>
+#include <sys/ucontext.h>
+#include <sys/user.h>
+#include <machine/bootinfo.h>
+#include <machine/tss.h>
+#include <sys/vmmeter.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <sys/user.h>
+#include <sys/proc.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <nfs/nfsproto.h>
+#include <nfs/rpcv2.h>
+#include <nfsclient/nfs.h>
+#include <nfsclient/nfsdiskless.h>
+#ifdef DEV_APIC
+#include <machine/apicreg.h>
+#endif
+#include <machine/cpu.h>
+#include <machine/sigframe.h>
+#include <machine/proc.h>
+
+ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
+ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
+ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
+ASSYM(P_SFLAG, offsetof(struct proc, p_sflag));
+ASSYM(P_UAREA, offsetof(struct proc, p_uarea));
+
+ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
+ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
+ASSYM(TD_PROC, offsetof(struct thread, td_proc));
+ASSYM(TD_MD, offsetof(struct thread, td_md));
+
+ASSYM(P_MD, offsetof(struct proc, p_md));
+ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt));
+
+ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
+ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
+
+ASSYM(V_TRAP, offsetof(struct vmmeter, v_trap));
+ASSYM(V_SYSCALL, offsetof(struct vmmeter, v_syscall));
+ASSYM(V_INTR, offsetof(struct vmmeter, v_intr));
+/* ASSYM(UPAGES, UPAGES);*/
+ASSYM(UAREA_PAGES, UAREA_PAGES);
+ASSYM(KSTACK_PAGES, KSTACK_PAGES);
+ASSYM(PAGE_SIZE, PAGE_SIZE);
+ASSYM(NPTEPG, NPTEPG);
+ASSYM(NPDEPG, NPDEPG);
+ASSYM(NPDEPTD, NPDEPTD);
+ASSYM(NPGPTD, NPGPTD);
+ASSYM(PDESIZE, sizeof(pd_entry_t));
+ASSYM(PTESIZE, sizeof(pt_entry_t));
+ASSYM(PDESHIFT, PDESHIFT);
+ASSYM(PTESHIFT, PTESHIFT);
+ASSYM(PAGE_SHIFT, PAGE_SHIFT);
+ASSYM(PAGE_MASK, PAGE_MASK);
+ASSYM(PDRSHIFT, PDRSHIFT);
+ASSYM(PDRMASK, PDRMASK);
+ASSYM(USRSTACK, USRSTACK);
+ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS);
+ASSYM(KERNBASE, KERNBASE);
+ASSYM(KERNLOAD, KERNLOAD);
+ASSYM(MCLBYTES, MCLBYTES);
+ASSYM(PCB_CR3, offsetof(struct pcb, pcb_cr3));
+ASSYM(PCB_EDI, offsetof(struct pcb, pcb_edi));
+ASSYM(PCB_ESI, offsetof(struct pcb, pcb_esi));
+ASSYM(PCB_EBP, offsetof(struct pcb, pcb_ebp));
+ASSYM(PCB_ESP, offsetof(struct pcb, pcb_esp));
+ASSYM(PCB_EBX, offsetof(struct pcb, pcb_ebx));
+ASSYM(PCB_EIP, offsetof(struct pcb, pcb_eip));
+ASSYM(TSS_ESP0, offsetof(struct i386tss, tss_esp0));
+
+ASSYM(PCB_GS, offsetof(struct pcb, pcb_gs));
+ASSYM(PCB_DR0, offsetof(struct pcb, pcb_dr0));
+ASSYM(PCB_DR1, offsetof(struct pcb, pcb_dr1));
+ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2));
+ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3));
+ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6));
+ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
+ASSYM(PCB_PSL, offsetof(struct pcb, pcb_psl));
+ASSYM(PCB_DBREGS, PCB_DBREGS);
+ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
+
+ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare));
+ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
+ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
+ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu));
+ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
+ASSYM(PCB_SWITCHOUT, offsetof(struct pcb, pcb_switchout));
+
+ASSYM(PCB_SIZE, sizeof(struct pcb));
+
+ASSYM(TF_TRAPNO, offsetof(struct trapframe, tf_trapno));
+ASSYM(TF_ERR, offsetof(struct trapframe, tf_err));
+ASSYM(TF_CS, offsetof(struct trapframe, tf_cs));
+ASSYM(TF_EFLAGS, offsetof(struct trapframe, tf_eflags));
+ASSYM(TF_EIP, offsetof(struct trapframe, tf_eip));
+ASSYM(SIGF_HANDLER, offsetof(struct sigframe, sf_ahu.sf_handler));
+#ifdef COMPAT_43
+ASSYM(SIGF_SC, offsetof(struct osigframe, sf_siginfo.si_sc));
+#endif
+ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc));
+#ifdef COMPAT_FREEBSD4
+ASSYM(SIGF_UC4, offsetof(struct sigframe4, sf_uc));
+#endif
+#ifdef COMPAT_43
+ASSYM(SC_PS, offsetof(struct osigcontext, sc_ps));
+ASSYM(SC_FS, offsetof(struct osigcontext, sc_fs));
+ASSYM(SC_GS, offsetof(struct osigcontext, sc_gs));
+ASSYM(SC_TRAPNO, offsetof(struct osigcontext, sc_trapno));
+#endif
+#ifdef COMPAT_FREEBSD4
+ASSYM(UC4_EFLAGS, offsetof(struct ucontext4, uc_mcontext.mc_eflags));
+ASSYM(UC4_GS, offsetof(struct ucontext4, uc_mcontext.mc_gs));
+#endif
+ASSYM(UC_EFLAGS, offsetof(ucontext_t, uc_mcontext.mc_eflags));
+ASSYM(UC_GS, offsetof(ucontext_t, uc_mcontext.mc_gs));
+ASSYM(ENOENT, ENOENT);
+ASSYM(EFAULT, EFAULT);
+ASSYM(ENAMETOOLONG, ENAMETOOLONG);
+ASSYM(MAXCOMLEN, MAXCOMLEN);
+ASSYM(MAXPATHLEN, MAXPATHLEN);
+ASSYM(BOOTINFO_SIZE, sizeof(struct bootinfo));
+ASSYM(BI_VERSION, offsetof(struct bootinfo, bi_version));
+ASSYM(BI_KERNELNAME, offsetof(struct bootinfo, bi_kernelname));
+ASSYM(BI_NFS_DISKLESS, offsetof(struct bootinfo, bi_nfs_diskless));
+ASSYM(BI_ENDCOMMON, offsetof(struct bootinfo, bi_endcommon));
+ASSYM(NFSDISKLESS_SIZE, sizeof(struct nfs_diskless));
+ASSYM(BI_SIZE, offsetof(struct bootinfo, bi_size));
+ASSYM(BI_SYMTAB, offsetof(struct bootinfo, bi_symtab));
+ASSYM(BI_ESYMTAB, offsetof(struct bootinfo, bi_esymtab));
+ASSYM(BI_KERNEND, offsetof(struct bootinfo, bi_kernend));
+ASSYM(PC_SIZEOF, sizeof(struct pcpu));
+ASSYM(PC_PRVSPACE, offsetof(struct pcpu, pc_prvspace));
+ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread));
+ASSYM(PC_FPCURTHREAD, offsetof(struct pcpu, pc_fpcurthread));
+ASSYM(PC_IDLETHREAD, offsetof(struct pcpu, pc_idlethread));
+ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb));
+ASSYM(PC_COMMON_TSS, offsetof(struct pcpu, pc_common_tss));
+ASSYM(PC_COMMON_TSSD, offsetof(struct pcpu, pc_common_tssd));
+ASSYM(PC_TSS_GDT, offsetof(struct pcpu, pc_tss_gdt));
+ASSYM(PC_CURRENTLDT, offsetof(struct pcpu, pc_currentldt));
+ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid));
+ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap));
+ASSYM(PC_TRAP_NESTING, offsetof(struct pcpu, pc_trap_nesting));
+
+ASSYM(PC_CR3, offsetof(struct pcpu, pc_pdir));
+
+#ifdef DEV_APIC
+ASSYM(LA_VER, offsetof(struct LAPIC, version));
+ASSYM(LA_TPR, offsetof(struct LAPIC, tpr));
+ASSYM(LA_EOI, offsetof(struct LAPIC, eoi));
+ASSYM(LA_SVR, offsetof(struct LAPIC, svr));
+ASSYM(LA_ICR_LO, offsetof(struct LAPIC, icr_lo));
+ASSYM(LA_ICR_HI, offsetof(struct LAPIC, icr_hi));
+ASSYM(LA_ISR, offsetof(struct LAPIC, isr0));
+#endif
+
+ASSYM(KCSEL, GSEL(GCODE_SEL, SEL_KPL));
+ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL));
+ASSYM(KPSEL, GSEL(GPRIV_SEL, SEL_KPL));
+
+ASSYM(BC32SEL, GSEL(GBIOSCODE32_SEL, SEL_KPL));
+ASSYM(GPROC0_SEL, GPROC0_SEL);
+
+ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock));
+ASSYM(MTX_RECURSECNT, offsetof(struct mtx, mtx_recurse));
+
+#ifdef PC98
+#include <machine/bus.h>
+
+ASSYM(BUS_SPACE_HANDLE_BASE, offsetof(struct bus_space_handle, bsh_base));
+ASSYM(BUS_SPACE_HANDLE_IAT, offsetof(struct bus_space_handle, bsh_iat));
+#endif
+
+ASSYM(HYPERVISOR_STACK_SWITCH, __HYPERVISOR_stack_switch);
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/hypervisor.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/hypervisor.c
new file mode 100644
index 0000000000..df9568c7d1
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/hypervisor.c
@@ -0,0 +1,107 @@
+/******************************************************************************
+ * hypervisor.c
+ *
+ * Communication to/from hypervisor.
+ *
+ * Copyright (c) 2002-2003, K A Fraser
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIEAS OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/xenvar.h>
+#include <machine/multicall.h>
+
+/* XXX need to verify what the caller save registers are on x86 KMM */
+#define CALLER_SAVE __asm__("pushal; ")
+#define CALLER_RESTORE __asm__("popal;")
+
+
+/* ni == non-inline - these are only intended for use from assembler
+ * no reason to have them in a header -
+ *
+ */
+void ni_queue_multicall0(unsigned long op);
+void ni_queue_multicall1(unsigned long op, unsigned long arg1);
+void ni_queue_multicall2(unsigned long op, unsigned long arg1,
+ unsigned long arg2);
+void ni_queue_multicall3(unsigned long op, unsigned long arg1,
+ unsigned long arg2, unsigned long arg3);
+void ni_queue_multicall4(unsigned long op, unsigned long arg1,
+ unsigned long arg2, unsigned long arg4,
+ unsigned long arg5);
+
+void ni_execute_multicall_list(void);
+
+multicall_entry_t multicall_list[MAX_MULTICALL_ENTS];
+int nr_multicall_ents = 0;
+
+
+void
+ni_queue_multicall0(unsigned long op)
+{
+ CALLER_SAVE;
+ queue_multicall0(op);
+ CALLER_RESTORE;
+}
+
+void
+ni_queue_multicall1(unsigned long op, unsigned long arg1)
+{
+ CALLER_SAVE;
+ queue_multicall1(op, arg1);
+ CALLER_RESTORE;
+}
+
+void
+ni_queue_multicall2(unsigned long op, unsigned long arg1,
+ unsigned long arg2)
+{
+ CALLER_SAVE;
+ queue_multicall2(op, arg1, arg2);
+ CALLER_RESTORE;
+}
+
+void
+ni_queue_multicall3(unsigned long op, unsigned long arg1,
+ unsigned long arg2, unsigned long arg3)
+{
+ CALLER_SAVE;
+ queue_multicall3(op, arg1, arg2, arg3);
+ CALLER_RESTORE;
+}
+
+void
+ni_queue_multicall4(unsigned long op, unsigned long arg1,
+ unsigned long arg2, unsigned long arg3,
+ unsigned long arg4)
+{
+ CALLER_SAVE;
+ queue_multicall4(op, arg1, arg2, arg3, arg4);
+ CALLER_RESTORE;
+}
+
+void
+ni_execute_multicall_list(void)
+{
+ CALLER_SAVE;
+ execute_multicall_list();
+ CALLER_RESTORE;
+}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/i686_mem.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/i686_mem.c
new file mode 100644
index 0000000000..fe21232f7a
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/i686_mem.c
@@ -0,0 +1,626 @@
+/*-
+ * Copyright (c) 1999 Michael Smith <msmith@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/i686_mem.c,v 1.23 2003/10/21 18:28:34 silby Exp $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/memrange.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+
+/*
+ * i686 memory range operations
+ *
+ * This code will probably be impenetrable without reference to the
+ * Intel Pentium Pro documentation.
+ */
+
+static char *mem_owner_bios = "BIOS";
+
+#define MR686_FIXMTRR (1<<0)
+
+#define mrwithin(mr, a) \
+ (((a) >= (mr)->mr_base) && ((a) < ((mr)->mr_base + (mr)->mr_len)))
+#define mroverlap(mra, mrb) \
+ (mrwithin(mra, mrb->mr_base) || mrwithin(mrb, mra->mr_base))
+
+#define mrvalid(base, len) \
+ ((!(base & ((1 << 12) - 1))) && /* base is multiple of 4k */ \
+ ((len) >= (1 << 12)) && /* length is >= 4k */ \
+ powerof2((len)) && /* ... and power of two */ \
+ !((base) & ((len) - 1))) /* range is not discontiuous */
+
+#define mrcopyflags(curr, new) (((curr) & ~MDF_ATTRMASK) | ((new) & MDF_ATTRMASK))
+
+static int mtrrs_disabled;
+TUNABLE_INT("machdep.disable_mtrrs", &mtrrs_disabled);
+SYSCTL_INT(_machdep, OID_AUTO, disable_mtrrs, CTLFLAG_RDTUN,
+ &mtrrs_disabled, 0, "Disable i686 MTRRs.");
+
+static void i686_mrinit(struct mem_range_softc *sc);
+static int i686_mrset(struct mem_range_softc *sc,
+ struct mem_range_desc *mrd,
+ int *arg);
+static void i686_mrAPinit(struct mem_range_softc *sc);
+
+static struct mem_range_ops i686_mrops = {
+ i686_mrinit,
+ i686_mrset,
+ i686_mrAPinit
+};
+
+/* XXX for AP startup hook */
+static u_int64_t mtrrcap, mtrrdef;
+
+static struct mem_range_desc *mem_range_match(struct mem_range_softc *sc,
+ struct mem_range_desc *mrd);
+static void i686_mrfetch(struct mem_range_softc *sc);
+static int i686_mtrrtype(int flags);
+#if 0
+static int i686_mrt2mtrr(int flags, int oldval);
+#endif
+static int i686_mtrrconflict(int flag1, int flag2);
+static void i686_mrstore(struct mem_range_softc *sc);
+static void i686_mrstoreone(void *arg);
+static struct mem_range_desc *i686_mtrrfixsearch(struct mem_range_softc *sc,
+ u_int64_t addr);
+static int i686_mrsetlow(struct mem_range_softc *sc,
+ struct mem_range_desc *mrd,
+ int *arg);
+static int i686_mrsetvariable(struct mem_range_softc *sc,
+ struct mem_range_desc *mrd,
+ int *arg);
+
+/* i686 MTRR type to memory range type conversion */
+static int i686_mtrrtomrt[] = {
+ MDF_UNCACHEABLE,
+ MDF_WRITECOMBINE,
+ MDF_UNKNOWN,
+ MDF_UNKNOWN,
+ MDF_WRITETHROUGH,
+ MDF_WRITEPROTECT,
+ MDF_WRITEBACK
+};
+
+#define MTRRTOMRTLEN (sizeof(i686_mtrrtomrt) / sizeof(i686_mtrrtomrt[0]))
+
+static int
+i686_mtrr2mrt(int val) {
+ if (val < 0 || val >= MTRRTOMRTLEN)
+ return MDF_UNKNOWN;
+ return i686_mtrrtomrt[val];
+}
+
+/*
+ * i686 MTRR conflicts. Writeback and uncachable may overlap.
+ */
+static int
+i686_mtrrconflict(int flag1, int flag2) {
+ flag1 &= MDF_ATTRMASK;
+ flag2 &= MDF_ATTRMASK;
+ if (flag1 == flag2 ||
+ (flag1 == MDF_WRITEBACK && flag2 == MDF_UNCACHEABLE) ||
+ (flag2 == MDF_WRITEBACK && flag1 == MDF_UNCACHEABLE))
+ return 0;
+ return 1;
+}
+
+/*
+ * Look for an exactly-matching range.
+ */
+static struct mem_range_desc *
+mem_range_match(struct mem_range_softc *sc, struct mem_range_desc *mrd)
+{
+ struct mem_range_desc *cand;
+ int i;
+
+ for (i = 0, cand = sc->mr_desc; i < sc->mr_ndesc; i++, cand++)
+ if ((cand->mr_base == mrd->mr_base) &&
+ (cand->mr_len == mrd->mr_len))
+ return(cand);
+ return(NULL);
+}
+
+/*
+ * Fetch the current mtrr settings from the current CPU (assumed to all
+ * be in sync in the SMP case). Note that if we are here, we assume
+ * that MTRRs are enabled, and we may or may not have fixed MTRRs.
+ */
+static void
+i686_mrfetch(struct mem_range_softc *sc)
+{
+ struct mem_range_desc *mrd;
+ u_int64_t msrv;
+ int i, j, msr;
+
+ mrd = sc->mr_desc;
+
+ /* Get fixed-range MTRRs */
+ if (sc->mr_cap & MR686_FIXMTRR) {
+ msr = MSR_MTRR64kBase;
+ for (i = 0; i < (MTRR_N64K / 8); i++, msr++) {
+ msrv = rdmsr(msr);
+ for (j = 0; j < 8; j++, mrd++) {
+ mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) |
+ i686_mtrr2mrt(msrv & 0xff) |
+ MDF_ACTIVE;
+ if (mrd->mr_owner[0] == 0)
+ strcpy(mrd->mr_owner, mem_owner_bios);
+ msrv = msrv >> 8;
+ }
+ }
+ msr = MSR_MTRR16kBase;
+ for (i = 0; i < (MTRR_N16K / 8); i++, msr++) {
+ msrv = rdmsr(msr);
+ for (j = 0; j < 8; j++, mrd++) {
+ mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) |
+ i686_mtrr2mrt(msrv & 0xff) |
+ MDF_ACTIVE;
+ if (mrd->mr_owner[0] == 0)
+ strcpy(mrd->mr_owner, mem_owner_bios);
+ msrv = msrv >> 8;
+ }
+ }
+ msr = MSR_MTRR4kBase;
+ for (i = 0; i < (MTRR_N4K / 8); i++, msr++) {
+ msrv = rdmsr(msr);
+ for (j = 0; j < 8; j++, mrd++) {
+ mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) |
+ i686_mtrr2mrt(msrv & 0xff) |
+ MDF_ACTIVE;
+ if (mrd->mr_owner[0] == 0)
+ strcpy(mrd->mr_owner, mem_owner_bios);
+ msrv = msrv >> 8;
+ }
+ }
+ }
+
+ /* Get remainder which must be variable MTRRs */
+ msr = MSR_MTRRVarBase;
+ for (; (mrd - sc->mr_desc) < sc->mr_ndesc; msr += 2, mrd++) {
+ msrv = rdmsr(msr);
+ mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) |
+ i686_mtrr2mrt(msrv & 0xff);
+ mrd->mr_base = msrv & 0x0000000ffffff000LL;
+ msrv = rdmsr(msr + 1);
+ mrd->mr_flags = (msrv & 0x800) ?
+ (mrd->mr_flags | MDF_ACTIVE) :
+ (mrd->mr_flags & ~MDF_ACTIVE);
+ /* Compute the range from the mask. Ick. */
+ mrd->mr_len = (~(msrv & 0x0000000ffffff000LL) & 0x0000000fffffffffLL) + 1;
+ if (!mrvalid(mrd->mr_base, mrd->mr_len))
+ mrd->mr_flags |= MDF_BOGUS;
+ /* If unclaimed and active, must be the BIOS */
+ if ((mrd->mr_flags & MDF_ACTIVE) && (mrd->mr_owner[0] == 0))
+ strcpy(mrd->mr_owner, mem_owner_bios);
+ }
+}
+
+/*
+ * Return the MTRR memory type matching a region's flags
+ */
+static int
+i686_mtrrtype(int flags)
+{
+ int i;
+
+ flags &= MDF_ATTRMASK;
+
+ for (i = 0; i < MTRRTOMRTLEN; i++) {
+ if (i686_mtrrtomrt[i] == MDF_UNKNOWN)
+ continue;
+ if (flags == i686_mtrrtomrt[i])
+ return(i);
+ }
+ return(-1);
+}
+#if 0
+static int
+i686_mrt2mtrr(int flags, int oldval)
+{
+ int val;
+
+ if ((val = i686_mtrrtype(flags)) == -1)
+ return oldval & 0xff;
+ return val & 0xff;
+}
+#endif
+/*
+ * Update running CPU(s) MTRRs to match the ranges in the descriptor
+ * list.
+ *
+ * XXX Must be called with interrupts enabled.
+ */
+static void
+i686_mrstore(struct mem_range_softc *sc)
+{
+#ifdef SMP
+ /*
+ * We should use ipi_all_but_self() to call other CPUs into a
+ * locking gate, then call a target function to do this work.
+ * The "proper" solution involves a generalised locking gate
+ * implementation, not ready yet.
+ */
+ smp_rendezvous(NULL, i686_mrstoreone, NULL, (void *)sc);
+#else
+ disable_intr(); /* disable interrupts */
+ i686_mrstoreone((void *)sc);
+ enable_intr();
+#endif
+}
+
+/*
+ * Update the current CPU's MTRRs with those represented in the
+ * descriptor list. Note that we do this wholesale rather than
+ * just stuffing one entry; this is simpler (but slower, of course).
+ */
+static void
+i686_mrstoreone(void *arg)
+{
+#if 0
+ struct mem_range_softc *sc = (struct mem_range_softc *)arg;
+ struct mem_range_desc *mrd;
+ u_int64_t omsrv, msrv;
+ int i, j, msr;
+ u_int cr4save;
+
+ mrd = sc->mr_desc;
+
+ cr4save = rcr4(); /* save cr4 */
+ if (cr4save & CR4_PGE)
+ load_cr4(cr4save & ~CR4_PGE);
+ load_cr0((rcr0() & ~CR0_NW) | CR0_CD); /* disable caches (CD = 1, NW = 0) */
+ wbinvd(); /* flush caches, TLBs */
+ wrmsr(MSR_MTRRdefType, rdmsr(MSR_MTRRdefType) & ~0x800); /* disable MTRRs (E = 0) */
+
+ /* Set fixed-range MTRRs */
+ if (sc->mr_cap & MR686_FIXMTRR) {
+ msr = MSR_MTRR64kBase;
+ for (i = 0; i < (MTRR_N64K / 8); i++, msr++) {
+ msrv = 0;
+ omsrv = rdmsr(msr);
+ for (j = 7; j >= 0; j--) {
+ msrv = msrv << 8;
+ msrv |= i686_mrt2mtrr((mrd + j)->mr_flags, omsrv >> (j*8));
+ }
+ wrmsr(msr, msrv);
+ mrd += 8;
+ }
+ msr = MSR_MTRR16kBase;
+ for (i = 0; i < (MTRR_N16K / 8); i++, msr++) {
+ msrv = 0;
+ omsrv = rdmsr(msr);
+ for (j = 7; j >= 0; j--) {
+ msrv = msrv << 8;
+ msrv |= i686_mrt2mtrr((mrd + j)->mr_flags, omsrv >> (j*8));
+ }
+ wrmsr(msr, msrv);
+ mrd += 8;
+ }
+ msr = MSR_MTRR4kBase;
+ for (i = 0; i < (MTRR_N4K / 8); i++, msr++) {
+ msrv = 0;
+ omsrv = rdmsr(msr);
+ for (j = 7; j >= 0; j--) {
+ msrv = msrv << 8;
+ msrv |= i686_mrt2mtrr((mrd + j)->mr_flags, omsrv >> (j*8));
+ }
+ wrmsr(msr, msrv);
+ mrd += 8;
+ }
+ }
+
+ /* Set remainder which must be variable MTRRs */
+ msr = MSR_MTRRVarBase;
+ for (; (mrd - sc->mr_desc) < sc->mr_ndesc; msr += 2, mrd++) {
+ /* base/type register */
+ omsrv = rdmsr(msr);
+ if (mrd->mr_flags & MDF_ACTIVE) {
+ msrv = mrd->mr_base & 0x0000000ffffff000LL;
+ msrv |= i686_mrt2mtrr(mrd->mr_flags, omsrv);
+ } else {
+ msrv = 0;
+ }
+ wrmsr(msr, msrv);
+
+ /* mask/active register */
+ if (mrd->mr_flags & MDF_ACTIVE) {
+ msrv = 0x800 | (~(mrd->mr_len - 1) & 0x0000000ffffff000LL);
+ } else {
+ msrv = 0;
+ }
+ wrmsr(msr + 1, msrv);
+ }
+ wbinvd(); /* flush caches, TLBs */
+ wrmsr(MSR_MTRRdefType, rdmsr(MSR_MTRRdefType) | 0x800); /* restore MTRR state */
+ load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* enable caches CD = 0 and NW = 0 */
+ load_cr4(cr4save); /* restore cr4 */
+#endif
+}
+
+/*
+ * Hunt for the fixed MTRR referencing (addr)
+ */
+static struct mem_range_desc *
+i686_mtrrfixsearch(struct mem_range_softc *sc, u_int64_t addr)
+{
+ struct mem_range_desc *mrd;
+ int i;
+
+ for (i = 0, mrd = sc->mr_desc; i < (MTRR_N64K + MTRR_N16K + MTRR_N4K); i++, mrd++)
+ if ((addr >= mrd->mr_base) && (addr < (mrd->mr_base + mrd->mr_len)))
+ return(mrd);
+ return(NULL);
+}
+
+/*
+ * Try to satisfy the given range request by manipulating the fixed MTRRs that
+ * cover low memory.
+ *
+ * Note that we try to be generous here; we'll bloat the range out to the
+ * next higher/lower boundary to avoid the consumer having to know too much
+ * about the mechanisms here.
+ *
+ * XXX note that this will have to be updated when we start supporting "busy" ranges.
+ */
+static int
+i686_mrsetlow(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg)
+{
+ struct mem_range_desc *first_md, *last_md, *curr_md;
+
+ /* range check */
+ if (((first_md = i686_mtrrfixsearch(sc, mrd->mr_base)) == NULL) ||
+ ((last_md = i686_mtrrfixsearch(sc, mrd->mr_base + mrd->mr_len - 1)) == NULL))
+ return(EINVAL);
+
+ /* check we aren't doing something risky */
+ if (!(mrd->mr_flags & MDF_FORCE))
+ for (curr_md = first_md; curr_md <= last_md; curr_md++) {
+ if ((curr_md->mr_flags & MDF_ATTRMASK) == MDF_UNKNOWN)
+ return (EACCES);
+ }
+
+ /* set flags, clear set-by-firmware flag */
+ for (curr_md = first_md; curr_md <= last_md; curr_md++) {
+ curr_md->mr_flags = mrcopyflags(curr_md->mr_flags & ~MDF_FIRMWARE, mrd->mr_flags);
+ bcopy(mrd->mr_owner, curr_md->mr_owner, sizeof(mrd->mr_owner));
+ }
+
+ return(0);
+}
+
+
+/*
+ * Modify/add a variable MTRR to satisfy the request.
+ *
+ * XXX needs to be updated to properly support "busy" ranges.
+ */
+static int
+i686_mrsetvariable(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg)
+{
+ struct mem_range_desc *curr_md, *free_md;
+ int i;
+
+ /*
+ * Scan the currently active variable descriptors, look for
+ * one we exactly match (straight takeover) and for possible
+ * accidental overlaps.
+ * Keep track of the first empty variable descriptor in case we
+ * can't perform a takeover.
+ */
+ i = (sc->mr_cap & MR686_FIXMTRR) ? MTRR_N64K + MTRR_N16K + MTRR_N4K : 0;
+ curr_md = sc->mr_desc + i;
+ free_md = NULL;
+ for (; i < sc->mr_ndesc; i++, curr_md++) {
+ if (curr_md->mr_flags & MDF_ACTIVE) {
+ /* exact match? */
+ if ((curr_md->mr_base == mrd->mr_base) &&
+ (curr_md->mr_len == mrd->mr_len)) {
+ /* whoops, owned by someone */
+ if (curr_md->mr_flags & MDF_BUSY)
+ return(EBUSY);
+ /* check we aren't doing something risky */
+ if (!(mrd->mr_flags & MDF_FORCE) &&
+ ((curr_md->mr_flags & MDF_ATTRMASK) == MDF_UNKNOWN))
+ return (EACCES);
+ /* Ok, just hijack this entry */
+ free_md = curr_md;
+ break;
+ }
+ /* non-exact overlap ? */
+ if (mroverlap(curr_md, mrd)) {
+ /* between conflicting region types? */
+ if (i686_mtrrconflict(curr_md->mr_flags, mrd->mr_flags))
+ return(EINVAL);
+ }
+ } else if (free_md == NULL) {
+ free_md = curr_md;
+ }
+ }
+ /* got somewhere to put it? */
+ if (free_md == NULL)
+ return(ENOSPC);
+
+ /* Set up new descriptor */
+ free_md->mr_base = mrd->mr_base;
+ free_md->mr_len = mrd->mr_len;
+ free_md->mr_flags = mrcopyflags(MDF_ACTIVE, mrd->mr_flags);
+ bcopy(mrd->mr_owner, free_md->mr_owner, sizeof(mrd->mr_owner));
+ return(0);
+}
+
+/*
+ * Handle requests to set memory range attributes by manipulating MTRRs.
+ *
+ */
+static int
+i686_mrset(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg)
+{
+ struct mem_range_desc *targ;
+ int error = 0;
+
+ switch(*arg) {
+ case MEMRANGE_SET_UPDATE:
+ /* make sure that what's being asked for is even possible at all */
+ if (!mrvalid(mrd->mr_base, mrd->mr_len) ||
+ i686_mtrrtype(mrd->mr_flags) == -1)
+ return(EINVAL);
+
+#define FIXTOP ((MTRR_N64K * 0x10000) + (MTRR_N16K * 0x4000) + (MTRR_N4K * 0x1000))
+
+ /* are the "low memory" conditions applicable? */
+ if ((sc->mr_cap & MR686_FIXMTRR) &&
+ ((mrd->mr_base + mrd->mr_len) <= FIXTOP)) {
+ if ((error = i686_mrsetlow(sc, mrd, arg)) != 0)
+ return(error);
+ } else {
+ /* it's time to play with variable MTRRs */
+ if ((error = i686_mrsetvariable(sc, mrd, arg)) != 0)
+ return(error);
+ }
+ break;
+
+ case MEMRANGE_SET_REMOVE:
+ if ((targ = mem_range_match(sc, mrd)) == NULL)
+ return(ENOENT);
+ if (targ->mr_flags & MDF_FIXACTIVE)
+ return(EPERM);
+ if (targ->mr_flags & MDF_BUSY)
+ return(EBUSY);
+ targ->mr_flags &= ~MDF_ACTIVE;
+ targ->mr_owner[0] = 0;
+ break;
+
+ default:
+ return(EOPNOTSUPP);
+ }
+
+ /* update the hardware */
+ i686_mrstore(sc);
+ i686_mrfetch(sc); /* refetch to see where we're at */
+ return(0);
+}
+
+/*
+ * Work out how many ranges we support, initialise storage for them,
+ * fetch the initial settings.
+ */
+static void
+i686_mrinit(struct mem_range_softc *sc)
+{
+ struct mem_range_desc *mrd;
+ int nmdesc = 0;
+ int i;
+
+ /* XXX */
+ return;
+
+ mtrrcap = rdmsr(MSR_MTRRcap);
+ mtrrdef = rdmsr(MSR_MTRRdefType);
+
+ /* For now, bail out if MTRRs are not enabled */
+ if (!(mtrrdef & 0x800)) {
+ if (bootverbose)
+ printf("CPU supports MTRRs but not enabled\n");
+ return;
+ }
+ nmdesc = mtrrcap & 0xff;
+ printf("Pentium Pro MTRR support enabled\n");
+
+ /* If fixed MTRRs supported and enabled */
+ if ((mtrrcap & 0x100) && (mtrrdef & 0x400)) {
+ sc->mr_cap = MR686_FIXMTRR;
+ nmdesc += MTRR_N64K + MTRR_N16K + MTRR_N4K;
+ }
+
+ sc->mr_desc =
+ (struct mem_range_desc *)malloc(nmdesc * sizeof(struct mem_range_desc),
+ M_MEMDESC, M_WAITOK | M_ZERO);
+ sc->mr_ndesc = nmdesc;
+
+ mrd = sc->mr_desc;
+
+ /* Populate the fixed MTRR entries' base/length */
+ if (sc->mr_cap & MR686_FIXMTRR) {
+ for (i = 0; i < MTRR_N64K; i++, mrd++) {
+ mrd->mr_base = i * 0x10000;
+ mrd->mr_len = 0x10000;
+ mrd->mr_flags = MDF_FIXBASE | MDF_FIXLEN | MDF_FIXACTIVE;
+ }
+ for (i = 0; i < MTRR_N16K; i++, mrd++) {
+ mrd->mr_base = i * 0x4000 + 0x80000;
+ mrd->mr_len = 0x4000;
+ mrd->mr_flags = MDF_FIXBASE | MDF_FIXLEN | MDF_FIXACTIVE;
+ }
+ for (i = 0; i < MTRR_N4K; i++, mrd++) {
+ mrd->mr_base = i * 0x1000 + 0xc0000;
+ mrd->mr_len = 0x1000;
+ mrd->mr_flags = MDF_FIXBASE | MDF_FIXLEN | MDF_FIXACTIVE;
+ }
+ }
+
+ /*
+ * Get current settings, anything set now is considered to have
+ * been set by the firmware. (XXX has something already played here?)
+ */
+ i686_mrfetch(sc);
+ mrd = sc->mr_desc;
+ for (i = 0; i < sc->mr_ndesc; i++, mrd++) {
+ if (mrd->mr_flags & MDF_ACTIVE)
+ mrd->mr_flags |= MDF_FIRMWARE;
+ }
+}
+
+/*
+ * Initialise MTRRs on an AP after the BSP has run the init code.
+ */
+static void
+i686_mrAPinit(struct mem_range_softc *sc)
+{
+ i686_mrstoreone((void *)sc); /* set MTRRs to match BSP */
+ wrmsr(MSR_MTRRdefType, mtrrdef); /* set MTRR behaviour to match BSP */
+}
+
+static void
+i686_mem_drvinit(void *unused)
+{
+ /* Try for i686 MTRRs */
+ if (!mtrrs_disabled && (cpu_feature & CPUID_MTRR) &&
+ ((cpu_id & 0xf00) == 0x600 || (cpu_id & 0xf00) == 0xf00) &&
+ ((strcmp(cpu_vendor, "GenuineIntel") == 0) ||
+ (strcmp(cpu_vendor, "AuthenticAMD") == 0))) {
+ mem_range_softc.mr_op = &i686_mrops;
+ }
+}
+
+SYSINIT(i686memdev,SI_SUB_DRIVERS,SI_ORDER_FIRST,i686_mem_drvinit,NULL)
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/initcpu.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/initcpu.c
new file mode 100644
index 0000000000..0852fb98aa
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/initcpu.c
@@ -0,0 +1,889 @@
+/*-
+ * Copyright (c) KATO Takenori, 1997, 1998.
+ *
+ * All rights reserved. Unpublished rights reserved under the copyright
+ * laws of Japan.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer as
+ * the first lines of this file unmodified.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/initcpu.c,v 1.49 2003/11/10 15:48:30 jhb Exp $");
+
+#include "opt_cpu.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+
+#include <machine/cputypes.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+
+#if !defined(CPU_ENABLE_SSE) && defined(I686_CPU)
+#define CPU_ENABLE_SSE
+#endif
+#if defined(CPU_DISABLE_SSE)
+#undef CPU_ENABLE_SSE
+#endif
+
+void initializecpu(void);
+#if defined(I586_CPU) && defined(CPU_WT_ALLOC)
+void enable_K5_wt_alloc(void);
+void enable_K6_wt_alloc(void);
+void enable_K6_2_wt_alloc(void);
+#endif
+
+#ifdef I486_CPU
+static void init_5x86(void);
+static void init_bluelightning(void);
+static void init_486dlc(void);
+static void init_cy486dx(void);
+#ifdef CPU_I486_ON_386
+static void init_i486_on_386(void);
+#endif
+static void init_6x86(void);
+#endif /* I486_CPU */
+
+#ifdef I686_CPU
+static void init_6x86MX(void);
+static void init_ppro(void);
+static void init_mendocino(void);
+#endif
+
+static int hw_instruction_sse;
+SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD,
+ &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU");
+
+/* Must *NOT* be BSS or locore will bzero these after setting them */
+int cpu = 0; /* Are we 386, 386sx, 486, etc? */
+u_int cpu_feature = 0; /* Feature flags */
+u_int cpu_high = 0; /* Highest arg to CPUID */
+u_int cpu_id = 0; /* Stepping ID */
+u_int cpu_procinfo = 0; /* HyperThreading Info / Brand Index / CLFUSH */
+char cpu_vendor[20] = ""; /* CPU Origin code */
+
+#ifdef CPU_ENABLE_SSE
+u_int cpu_fxsr; /* SSE enabled */
+#endif
+
+#ifdef I486_CPU
+/*
+ * IBM Blue Lightning
+ */
+static void
+init_bluelightning(void)
+{
+#if 0
+ u_long eflags;
+
+#if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE)
+ need_post_dma_flush = 1;
+#endif
+
+ eflags = read_eflags();
+ disable_intr();
+
+ load_cr0(rcr0() | CR0_CD | CR0_NW);
+ invd();
+
+#ifdef CPU_BLUELIGHTNING_FPU_OP_CACHE
+ wrmsr(0x1000, 0x9c92LL); /* FP operand can be cacheable on Cyrix FPU */
+#else
+ wrmsr(0x1000, 0x1c92LL); /* Intel FPU */
+#endif
+ /* Enables 13MB and 0-640KB cache. */
+ wrmsr(0x1001, (0xd0LL << 32) | 0x3ff);
+#ifdef CPU_BLUELIGHTNING_3X
+ wrmsr(0x1002, 0x04000000LL); /* Enables triple-clock mode. */
+#else
+ wrmsr(0x1002, 0x03000000LL); /* Enables double-clock mode. */
+#endif
+
+ /* Enable caching in CR0. */
+ load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */
+ invd();
+ write_eflags(eflags);
+#endif
+}
+
+/*
+ * Cyrix 486SLC/DLC/SR/DR series
+ */
+static void
+init_486dlc(void)
+{
+ u_long eflags;
+ u_char ccr0;
+
+ eflags = read_eflags();
+ disable_intr();
+ invd();
+
+ ccr0 = read_cyrix_reg(CCR0);
+#ifndef CYRIX_CACHE_WORKS
+ ccr0 |= CCR0_NC1 | CCR0_BARB;
+ write_cyrix_reg(CCR0, ccr0);
+ invd();
+#else
+ ccr0 &= ~CCR0_NC0;
+#ifndef CYRIX_CACHE_REALLY_WORKS
+ ccr0 |= CCR0_NC1 | CCR0_BARB;
+#else
+ ccr0 |= CCR0_NC1;
+#endif
+#ifdef CPU_DIRECT_MAPPED_CACHE
+ ccr0 |= CCR0_CO; /* Direct mapped mode. */
+#endif
+ write_cyrix_reg(CCR0, ccr0);
+
+ /* Clear non-cacheable region. */
+ write_cyrix_reg(NCR1+2, NCR_SIZE_0K);
+ write_cyrix_reg(NCR2+2, NCR_SIZE_0K);
+ write_cyrix_reg(NCR3+2, NCR_SIZE_0K);
+ write_cyrix_reg(NCR4+2, NCR_SIZE_0K);
+
+ write_cyrix_reg(0, 0); /* dummy write */
+
+ /* Enable caching in CR0. */
+ load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */
+ invd();
+#endif /* !CYRIX_CACHE_WORKS */
+ write_eflags(eflags);
+}
+
+
+/*
+ * Cyrix 486S/DX series
+ */
+static void
+init_cy486dx(void)
+{
+ u_long eflags;
+ u_char ccr2;
+
+ eflags = read_eflags();
+ disable_intr();
+ invd();
+
+ ccr2 = read_cyrix_reg(CCR2);
+#ifdef CPU_SUSP_HLT
+ ccr2 |= CCR2_SUSP_HLT;
+#endif
+
+#ifdef PC98
+ /* Enables WB cache interface pin and Lock NW bit in CR0. */
+ ccr2 |= CCR2_WB | CCR2_LOCK_NW;
+ /* Unlock NW bit in CR0. */
+ write_cyrix_reg(CCR2, ccr2 & ~CCR2_LOCK_NW);
+ load_cr0((rcr0() & ~CR0_CD) | CR0_NW); /* CD = 0, NW = 1 */
+#endif
+
+ write_cyrix_reg(CCR2, ccr2);
+ write_eflags(eflags);
+}
+
+
+/*
+ * Cyrix 5x86
+ */
+static void
+init_5x86(void)
+{
+ u_long eflags;
+ u_char ccr2, ccr3, ccr4, pcr0;
+
+ eflags = read_eflags();
+ disable_intr();
+
+ load_cr0(rcr0() | CR0_CD | CR0_NW);
+ wbinvd();
+
+ (void)read_cyrix_reg(CCR3); /* dummy */
+
+ /* Initialize CCR2. */
+ ccr2 = read_cyrix_reg(CCR2);
+ ccr2 |= CCR2_WB;
+#ifdef CPU_SUSP_HLT
+ ccr2 |= CCR2_SUSP_HLT;
+#else
+ ccr2 &= ~CCR2_SUSP_HLT;
+#endif
+ ccr2 |= CCR2_WT1;
+ write_cyrix_reg(CCR2, ccr2);
+
+ /* Initialize CCR4. */
+ ccr3 = read_cyrix_reg(CCR3);
+ write_cyrix_reg(CCR3, CCR3_MAPEN0);
+
+ ccr4 = read_cyrix_reg(CCR4);
+ ccr4 |= CCR4_DTE;
+ ccr4 |= CCR4_MEM;
+#ifdef CPU_FASTER_5X86_FPU
+ ccr4 |= CCR4_FASTFPE;
+#else
+ ccr4 &= ~CCR4_FASTFPE;
+#endif
+ ccr4 &= ~CCR4_IOMASK;
+ /********************************************************************
+ * WARNING: The "BIOS Writers Guide" mentions that I/O recovery time
+ * should be 0 for errata fix.
+ ********************************************************************/
+#ifdef CPU_IORT
+ ccr4 |= CPU_IORT & CCR4_IOMASK;
+#endif
+ write_cyrix_reg(CCR4, ccr4);
+
+ /* Initialize PCR0. */
+ /****************************************************************
+ * WARNING: RSTK_EN and LOOP_EN could make your system unstable.
+ * BTB_EN might make your system unstable.
+ ****************************************************************/
+ pcr0 = read_cyrix_reg(PCR0);
+#ifdef CPU_RSTK_EN
+ pcr0 |= PCR0_RSTK;
+#else
+ pcr0 &= ~PCR0_RSTK;
+#endif
+#ifdef CPU_BTB_EN
+ pcr0 |= PCR0_BTB;
+#else
+ pcr0 &= ~PCR0_BTB;
+#endif
+#ifdef CPU_LOOP_EN
+ pcr0 |= PCR0_LOOP;
+#else
+ pcr0 &= ~PCR0_LOOP;
+#endif
+
+ /****************************************************************
+ * WARNING: if you use a memory mapped I/O device, don't use
+ * DISABLE_5X86_LSSER option, which may reorder memory mapped
+ * I/O access.
+ * IF YOUR MOTHERBOARD HAS PCI BUS, DON'T DISABLE LSSER.
+ ****************************************************************/
+#ifdef CPU_DISABLE_5X86_LSSER
+ pcr0 &= ~PCR0_LSSER;
+#else
+ pcr0 |= PCR0_LSSER;
+#endif
+ write_cyrix_reg(PCR0, pcr0);
+
+ /* Restore CCR3. */
+ write_cyrix_reg(CCR3, ccr3);
+
+ (void)read_cyrix_reg(0x80); /* dummy */
+
+ /* Unlock NW bit in CR0. */
+ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_LOCK_NW);
+ load_cr0((rcr0() & ~CR0_CD) | CR0_NW); /* CD = 0, NW = 1 */
+ /* Lock NW bit in CR0. */
+ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW);
+
+ write_eflags(eflags);
+}
+
+#ifdef CPU_I486_ON_386
+/*
+ * There are i486 based upgrade products for i386 machines.
+ * In this case, BIOS doesn't enables CPU cache.
+ */
+static void
+init_i486_on_386(void)
+{
+ u_long eflags;
+
+#if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE)
+ need_post_dma_flush = 1;
+#endif
+
+ eflags = read_eflags();
+ disable_intr();
+
+ load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0, NW = 0 */
+
+ write_eflags(eflags);
+}
+#endif
+
+/*
+ * Cyrix 6x86
+ *
+ * XXX - What should I do here? Please let me know.
+ */
+static void
+init_6x86(void)
+{
+ u_long eflags;
+ u_char ccr3, ccr4;
+
+ eflags = read_eflags();
+ disable_intr();
+
+ load_cr0(rcr0() | CR0_CD | CR0_NW);
+ wbinvd();
+
+ /* Initialize CCR0. */
+ write_cyrix_reg(CCR0, read_cyrix_reg(CCR0) | CCR0_NC1);
+
+ /* Initialize CCR1. */
+#ifdef CPU_CYRIX_NO_LOCK
+ write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) | CCR1_NO_LOCK);
+#else
+ write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) & ~CCR1_NO_LOCK);
+#endif
+
+ /* Initialize CCR2. */
+#ifdef CPU_SUSP_HLT
+ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_SUSP_HLT);
+#else
+ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_SUSP_HLT);
+#endif
+
+ ccr3 = read_cyrix_reg(CCR3);
+ write_cyrix_reg(CCR3, CCR3_MAPEN0);
+
+ /* Initialize CCR4. */
+ ccr4 = read_cyrix_reg(CCR4);
+ ccr4 |= CCR4_DTE;
+ ccr4 &= ~CCR4_IOMASK;
+#ifdef CPU_IORT
+ write_cyrix_reg(CCR4, ccr4 | (CPU_IORT & CCR4_IOMASK));
+#else
+ write_cyrix_reg(CCR4, ccr4 | 7);
+#endif
+
+ /* Initialize CCR5. */
+#ifdef CPU_WT_ALLOC
+ write_cyrix_reg(CCR5, read_cyrix_reg(CCR5) | CCR5_WT_ALLOC);
+#endif
+
+ /* Restore CCR3. */
+ write_cyrix_reg(CCR3, ccr3);
+
+ /* Unlock NW bit in CR0. */
+ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_LOCK_NW);
+
+ /*
+ * Earlier revision of the 6x86 CPU could crash the system if
+ * L1 cache is in write-back mode.
+ */
+ if ((cyrix_did & 0xff00) > 0x1600)
+ load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */
+ else {
+ /* Revision 2.6 and lower. */
+#ifdef CYRIX_CACHE_REALLY_WORKS
+ load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */
+#else
+ load_cr0((rcr0() & ~CR0_CD) | CR0_NW); /* CD = 0 and NW = 1 */
+#endif
+ }
+
+ /* Lock NW bit in CR0. */
+ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW);
+
+ write_eflags(eflags);
+}
+#endif /* I486_CPU */
+
+#ifdef I686_CPU
+/*
+ * Cyrix 6x86MX (code-named M2)
+ *
+ * XXX - What should I do here? Please let me know.
+ */
+static void
+init_6x86MX(void)
+{
+#if 0
+ u_long eflags;
+ u_char ccr3, ccr4;
+
+ eflags = read_eflags();
+ disable_intr();
+
+ load_cr0(rcr0() | CR0_CD | CR0_NW);
+ wbinvd();
+
+ /* Initialize CCR0. */
+ write_cyrix_reg(CCR0, read_cyrix_reg(CCR0) | CCR0_NC1);
+
+ /* Initialize CCR1. */
+#ifdef CPU_CYRIX_NO_LOCK
+ write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) | CCR1_NO_LOCK);
+#else
+ write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) & ~CCR1_NO_LOCK);
+#endif
+
+ /* Initialize CCR2. */
+#ifdef CPU_SUSP_HLT
+ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_SUSP_HLT);
+#else
+ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_SUSP_HLT);
+#endif
+
+ ccr3 = read_cyrix_reg(CCR3);
+ write_cyrix_reg(CCR3, CCR3_MAPEN0);
+
+ /* Initialize CCR4. */
+ ccr4 = read_cyrix_reg(CCR4);
+ ccr4 &= ~CCR4_IOMASK;
+#ifdef CPU_IORT
+ write_cyrix_reg(CCR4, ccr4 | (CPU_IORT & CCR4_IOMASK));
+#else
+ write_cyrix_reg(CCR4, ccr4 | 7);
+#endif
+
+ /* Initialize CCR5. */
+#ifdef CPU_WT_ALLOC
+ write_cyrix_reg(CCR5, read_cyrix_reg(CCR5) | CCR5_WT_ALLOC);
+#endif
+
+ /* Restore CCR3. */
+ write_cyrix_reg(CCR3, ccr3);
+
+ /* Unlock NW bit in CR0. */
+ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_LOCK_NW);
+
+ load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */
+
+ /* Lock NW bit in CR0. */
+ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW);
+
+ write_eflags(eflags);
+#endif
+}
+
+static void
+init_ppro(void)
+{
+ u_int64_t apicbase;
+
+ /*
+ * Local APIC should be disabled if it is not going to be used.
+ */
+ apicbase = rdmsr(MSR_APICBASE);
+ apicbase &= ~APICBASE_ENABLED;
+ wrmsr(MSR_APICBASE, apicbase);
+}
+
+/*
+ * Initialize BBL_CR_CTL3 (Control register 3: used to configure the
+ * L2 cache).
+ */
+static void
+init_mendocino(void)
+{
+#ifdef CPU_PPRO2CELERON
+ u_long eflags;
+ u_int64_t bbl_cr_ctl3;
+
+ eflags = read_eflags();
+ disable_intr();
+
+ load_cr0(rcr0() | CR0_CD | CR0_NW);
+ wbinvd();
+
+ bbl_cr_ctl3 = rdmsr(MSR_BBL_CR_CTL3);
+
+ /* If the L2 cache is configured, do nothing. */
+ if (!(bbl_cr_ctl3 & 1)) {
+ bbl_cr_ctl3 = 0x134052bLL;
+
+ /* Set L2 Cache Latency (Default: 5). */
+#ifdef CPU_CELERON_L2_LATENCY
+#if CPU_L2_LATENCY > 15
+#error invalid CPU_L2_LATENCY.
+#endif
+ bbl_cr_ctl3 |= CPU_L2_LATENCY << 1;
+#else
+ bbl_cr_ctl3 |= 5 << 1;
+#endif
+ wrmsr(MSR_BBL_CR_CTL3, bbl_cr_ctl3);
+ }
+
+ load_cr0(rcr0() & ~(CR0_CD | CR0_NW));
+ write_eflags(eflags);
+#endif /* CPU_PPRO2CELERON */
+}
+
+#endif /* I686_CPU */
+
+/*
+ * Initialize CR4 (Control register 4) to enable SSE instructions.
+ */
+void
+enable_sse(void)
+{
+#ifdef XEN
+ return;
+#endif
+#if defined(CPU_ENABLE_SSE)
+ if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
+ load_cr4(rcr4() | CR4_FXSR | CR4_XMM);
+ cpu_fxsr = hw_instruction_sse = 1;
+ }
+#endif
+}
+
+void
+initializecpu(void)
+{
+
+ switch (cpu) {
+#ifdef I486_CPU
+ case CPU_BLUE:
+ init_bluelightning();
+ break;
+ case CPU_486DLC:
+ init_486dlc();
+ break;
+ case CPU_CY486DX:
+ init_cy486dx();
+ break;
+ case CPU_M1SC:
+ init_5x86();
+ break;
+#ifdef CPU_I486_ON_386
+ case CPU_486:
+ init_i486_on_386();
+ break;
+#endif
+ case CPU_M1:
+ init_6x86();
+ break;
+#endif /* I486_CPU */
+#ifdef I686_CPU
+ case CPU_M2:
+ init_6x86MX();
+ break;
+ case CPU_686:
+ if (strcmp(cpu_vendor, "GenuineIntel") == 0) {
+ switch (cpu_id & 0xff0) {
+ case 0x610:
+ init_ppro();
+ break;
+ case 0x660:
+ init_mendocino();
+ break;
+ }
+ } else if (strcmp(cpu_vendor, "AuthenticAMD") == 0) {
+#if defined(I686_CPU) && defined(CPU_ATHLON_SSE_HACK)
+ /*
+ * Sometimes the BIOS doesn't enable SSE instructions.
+ * According to AMD document 20734, the mobile
+ * Duron, the (mobile) Athlon 4 and the Athlon MP
+ * support SSE. These correspond to cpu_id 0x66X
+ * or 0x67X.
+ */
+ if ((cpu_feature & CPUID_XMM) == 0 &&
+ ((cpu_id & ~0xf) == 0x660 ||
+ (cpu_id & ~0xf) == 0x670 ||
+ (cpu_id & ~0xf) == 0x680)) {
+ u_int regs[4];
+ wrmsr(0xC0010015, rdmsr(0xC0010015) & ~0x08000);
+ do_cpuid(1, regs);
+ cpu_feature = regs[3];
+ }
+#endif
+ }
+ break;
+#endif
+ default:
+ break;
+ }
+ enable_sse();
+
+#if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE)
+ /*
+ * OS should flush L1 cache by itself because no PC-98 supports
+ * non-Intel CPUs. Use wbinvd instruction before DMA transfer
+ * when need_pre_dma_flush = 1, use invd instruction after DMA
+ * transfer when need_post_dma_flush = 1. If your CPU upgrade
+ * product supports hardware cache control, you can add the
+ * CPU_UPGRADE_HW_CACHE option in your kernel configuration file.
+ * This option eliminates unneeded cache flush instruction(s).
+ */
+ if (strcmp(cpu_vendor, "CyrixInstead") == 0) {
+ switch (cpu) {
+#ifdef I486_CPU
+ case CPU_486DLC:
+ need_post_dma_flush = 1;
+ break;
+ case CPU_M1SC:
+ need_pre_dma_flush = 1;
+ break;
+ case CPU_CY486DX:
+ need_pre_dma_flush = 1;
+#ifdef CPU_I486_ON_386
+ need_post_dma_flush = 1;
+#endif
+ break;
+#endif
+ default:
+ break;
+ }
+ } else if (strcmp(cpu_vendor, "AuthenticAMD") == 0) {
+ switch (cpu_id & 0xFF0) {
+ case 0x470: /* Enhanced Am486DX2 WB */
+ case 0x490: /* Enhanced Am486DX4 WB */
+ case 0x4F0: /* Am5x86 WB */
+ need_pre_dma_flush = 1;
+ break;
+ }
+ } else if (strcmp(cpu_vendor, "IBM") == 0) {
+ need_post_dma_flush = 1;
+ } else {
+#ifdef CPU_I486_ON_386
+ need_pre_dma_flush = 1;
+#endif
+ }
+#endif /* PC98 && !CPU_UPGRADE_HW_CACHE */
+}
+
+#if defined(I586_CPU) && defined(CPU_WT_ALLOC)
+/*
+ * Enable write allocate feature of AMD processors.
+ * Following two functions require the Maxmem variable being set.
+ */
+void
+enable_K5_wt_alloc(void)
+{
+ u_int64_t msr;
+ register_t savecrit;
+
+ /*
+ * Write allocate is supported only on models 1, 2, and 3, with
+ * a stepping of 4 or greater.
+ */
+ if (((cpu_id & 0xf0) > 0) && ((cpu_id & 0x0f) > 3)) {
+ savecrit = intr_disable();
+ msr = rdmsr(0x83); /* HWCR */
+ wrmsr(0x83, msr & !(0x10));
+
+ /*
+ * We have to tell the chip where the top of memory is,
+ * since video cards could have frame bufferes there,
+ * memory-mapped I/O could be there, etc.
+ */
+ if(Maxmem > 0)
+ msr = Maxmem / 16;
+ else
+ msr = 0;
+ msr |= AMD_WT_ALLOC_TME | AMD_WT_ALLOC_FRE;
+#ifdef PC98
+ if (!(inb(0x43b) & 4)) {
+ wrmsr(0x86, 0x0ff00f0);
+ msr |= AMD_WT_ALLOC_PRE;
+ }
+#else
+ /*
+ * There is no way to know wheter 15-16M hole exists or not.
+ * Therefore, we disable write allocate for this range.
+ */
+ wrmsr(0x86, 0x0ff00f0);
+ msr |= AMD_WT_ALLOC_PRE;
+#endif
+ wrmsr(0x85, msr);
+
+ msr=rdmsr(0x83);
+ wrmsr(0x83, msr|0x10); /* enable write allocate */
+ intr_restore(savecrit);
+ }
+}
+
+void
+enable_K6_wt_alloc(void)
+{
+ quad_t size;
+ u_int64_t whcr;
+ u_long eflags;
+
+ eflags = read_eflags();
+ disable_intr();
+ wbinvd();
+
+#ifdef CPU_DISABLE_CACHE
+ /*
+ * Certain K6-2 box becomes unstable when write allocation is
+ * enabled.
+ */
+ /*
+ * The AMD-K6 processer provides the 64-bit Test Register 12(TR12),
+ * but only the Cache Inhibit(CI) (bit 3 of TR12) is suppported.
+ * All other bits in TR12 have no effect on the processer's operation.
+ * The I/O Trap Restart function (bit 9 of TR12) is always enabled
+ * on the AMD-K6.
+ */
+ wrmsr(0x0000000e, (u_int64_t)0x0008);
+#endif
+ /* Don't assume that memory size is aligned with 4M. */
+ if (Maxmem > 0)
+ size = ((Maxmem >> 8) + 3) >> 2;
+ else
+ size = 0;
+
+ /* Limit is 508M bytes. */
+ if (size > 0x7f)
+ size = 0x7f;
+ whcr = (rdmsr(0xc0000082) & ~(0x7fLL << 1)) | (size << 1);
+
+#if defined(PC98) || defined(NO_MEMORY_HOLE)
+ if (whcr & (0x7fLL << 1)) {
+#ifdef PC98
+ /*
+ * If bit 2 of port 0x43b is 0, disable wrte allocate for the
+ * 15-16M range.
+ */
+ if (!(inb(0x43b) & 4))
+ whcr &= ~0x0001LL;
+ else
+#endif
+ whcr |= 0x0001LL;
+ }
+#else
+ /*
+ * There is no way to know wheter 15-16M hole exists or not.
+ * Therefore, we disable write allocate for this range.
+ */
+ whcr &= ~0x0001LL;
+#endif
+ wrmsr(0x0c0000082, whcr);
+
+ write_eflags(eflags);
+}
+
+void
+enable_K6_2_wt_alloc(void)
+{
+ quad_t size;
+ u_int64_t whcr;
+ u_long eflags;
+
+ eflags = read_eflags();
+ disable_intr();
+ wbinvd();
+
+#ifdef CPU_DISABLE_CACHE
+ /*
+ * Certain K6-2 box becomes unstable when write allocation is
+ * enabled.
+ */
+ /*
+ * The AMD-K6 processer provides the 64-bit Test Register 12(TR12),
+ * but only the Cache Inhibit(CI) (bit 3 of TR12) is suppported.
+ * All other bits in TR12 have no effect on the processer's operation.
+ * The I/O Trap Restart function (bit 9 of TR12) is always enabled
+ * on the AMD-K6.
+ */
+ wrmsr(0x0000000e, (u_int64_t)0x0008);
+#endif
+ /* Don't assume that memory size is aligned with 4M. */
+ if (Maxmem > 0)
+ size = ((Maxmem >> 8) + 3) >> 2;
+ else
+ size = 0;
+
+ /* Limit is 4092M bytes. */
+ if (size > 0x3fff)
+ size = 0x3ff;
+ whcr = (rdmsr(0xc0000082) & ~(0x3ffLL << 22)) | (size << 22);
+
+#if defined(PC98) || defined(NO_MEMORY_HOLE)
+ if (whcr & (0x3ffLL << 22)) {
+#ifdef PC98
+ /*
+ * If bit 2 of port 0x43b is 0, disable wrte allocate for the
+ * 15-16M range.
+ */
+ if (!(inb(0x43b) & 4))
+ whcr &= ~(1LL << 16);
+ else
+#endif
+ whcr |= 1LL << 16;
+ }
+#else
+ /*
+ * There is no way to know wheter 15-16M hole exists or not.
+ * Therefore, we disable write allocate for this range.
+ */
+ whcr &= ~(1LL << 16);
+#endif
+ wrmsr(0x0c0000082, whcr);
+
+ write_eflags(eflags);
+}
+#endif /* I585_CPU && CPU_WT_ALLOC */
+
+#include "opt_ddb.h"
+#ifdef DDB
+#include <ddb/ddb.h>
+#if 0
+DB_SHOW_COMMAND(cyrixreg, cyrixreg)
+{
+ u_long eflags;
+ u_int cr0;
+ u_char ccr1, ccr2, ccr3;
+ u_char ccr0 = 0, ccr4 = 0, ccr5 = 0, pcr0 = 0;
+
+ cr0 = rcr0();
+ if (strcmp(cpu_vendor,"CyrixInstead") == 0) {
+ eflags = read_eflags();
+ disable_intr();
+
+
+ if ((cpu != CPU_M1SC) && (cpu != CPU_CY486DX)) {
+ ccr0 = read_cyrix_reg(CCR0);
+ }
+ ccr1 = read_cyrix_reg(CCR1);
+ ccr2 = read_cyrix_reg(CCR2);
+ ccr3 = read_cyrix_reg(CCR3);
+ if ((cpu == CPU_M1SC) || (cpu == CPU_M1) || (cpu == CPU_M2)) {
+ write_cyrix_reg(CCR3, CCR3_MAPEN0);
+ ccr4 = read_cyrix_reg(CCR4);
+ if ((cpu == CPU_M1) || (cpu == CPU_M2))
+ ccr5 = read_cyrix_reg(CCR5);
+ else
+ pcr0 = read_cyrix_reg(PCR0);
+ write_cyrix_reg(CCR3, ccr3); /* Restore CCR3. */
+ }
+ write_eflags(eflags);
+
+ if ((cpu != CPU_M1SC) && (cpu != CPU_CY486DX))
+ printf("CCR0=%x, ", (u_int)ccr0);
+
+ printf("CCR1=%x, CCR2=%x, CCR3=%x",
+ (u_int)ccr1, (u_int)ccr2, (u_int)ccr3);
+ if ((cpu == CPU_M1SC) || (cpu == CPU_M1) || (cpu == CPU_M2)) {
+ printf(", CCR4=%x, ", (u_int)ccr4);
+ if (cpu == CPU_M1SC)
+ printf("PCR0=%x\n", pcr0);
+ else
+ printf("CCR5=%x\n", ccr5);
+ }
+ }
+ printf("CR0=%x\n", cr0);
+}
+#endif
+#endif /* DDB */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/intr_machdep.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/intr_machdep.c
new file mode 100644
index 0000000000..6ab354a00c
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/intr_machdep.c
@@ -0,0 +1,326 @@
+/*-
+ * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/i386/intr_machdep.c,v 1.4 2003/11/17 06:10:14 peter Exp $
+ */
+
+/*
+ * Machine dependent interrupt code for i386. For the i386, we have to
+ * deal with different PICs. Thus, we use the passed in vector to lookup
+ * an interrupt source associated with that vector. The interrupt source
+ * describes which PIC the source belongs to and includes methods to handle
+ * that source.
+ */
+
+#include "opt_ddb.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
+#include <sys/lock.h>
+#include <sys/ktr.h>
+#include <sys/kernel.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/syslog.h>
+#include <sys/systm.h>
+#include <machine/clock.h>
+#include <machine/intr_machdep.h>
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
+#define MAX_STRAY_LOG 5
+
+typedef void (*mask_fn)(uintptr_t vector);
+
+static int intrcnt_index;
+static struct intsrc *interrupt_sources[NUM_IO_INTS];
+static struct mtx intr_table_lock;
+
+static void intr_init(void *__dummy);
+static void intrcnt_setname(const char *name, int index);
+static void intrcnt_updatename(struct intsrc *is);
+static void intrcnt_register(struct intsrc *is);
+
+/*
+ * Register a new interrupt source with the global interrupt system.
+ * The global interrupts need to be disabled when this function is
+ * called.
+ */
+int
+intr_register_source(struct intsrc *isrc)
+{
+ int error, vector;
+
+ vector = isrc->is_pic->pic_vector(isrc);
+ if (interrupt_sources[vector] != NULL)
+ return (EEXIST);
+ error = ithread_create(&isrc->is_ithread, (uintptr_t)isrc, 0,
+ (mask_fn)isrc->is_pic->pic_disable_source,
+ (mask_fn)isrc->is_pic->pic_enable_source, "irq%d:", vector);
+ if (error)
+ return (error);
+ mtx_lock_spin(&intr_table_lock);
+ if (interrupt_sources[vector] != NULL) {
+ mtx_unlock_spin(&intr_table_lock);
+ ithread_destroy(isrc->is_ithread);
+ return (EEXIST);
+ }
+ intrcnt_register(isrc);
+ interrupt_sources[vector] = isrc;
+ mtx_unlock_spin(&intr_table_lock);
+ return (0);
+}
+
+struct intsrc *
+intr_lookup_source(int vector)
+{
+
+ return (interrupt_sources[vector]);
+}
+
+int
+intr_add_handler(const char *name, int vector, driver_intr_t handler,
+ void *arg, enum intr_type flags, void **cookiep)
+{
+ struct intsrc *isrc;
+ int error;
+
+ isrc = intr_lookup_source(vector);
+ if (isrc == NULL)
+ return (EINVAL);
+
+ error = ithread_add_handler(isrc->is_ithread, name, handler, arg,
+ ithread_priority(flags), flags, cookiep);
+ if (error == 0) {
+ intrcnt_updatename(isrc);
+ isrc->is_pic->pic_enable_intr(isrc);
+ isrc->is_pic->pic_enable_source(isrc);
+ }
+ return (error);
+}
+
+int
+intr_remove_handler(void *cookie)
+{
+ int error;
+
+ error = ithread_remove_handler(cookie);
+#ifdef XXX
+ if (error == 0)
+ intrcnt_updatename(/* XXX */);
+#endif
+ return (error);
+}
+
+int
+intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol)
+{
+ struct intsrc *isrc;
+
+ isrc = intr_lookup_source(vector);
+ if (isrc == NULL)
+ return (EINVAL);
+ return (isrc->is_pic->pic_config_intr(isrc, trig, pol));
+}
+
+void
+intr_execute_handlers(struct intsrc *isrc, struct intrframe *iframe)
+{
+ struct thread *td;
+ struct ithd *it;
+ struct intrhand *ih;
+ int error, vector;
+
+ td = curthread;
+ td->td_intr_nesting_level++;
+
+ /*
+ * We count software interrupts when we process them. The
+ * code here follows previous practice, but there's an
+ * argument for counting hardware interrupts when they're
+ * processed too.
+ */
+ atomic_add_long(isrc->is_count, 1);
+ atomic_add_int(&cnt.v_intr, 1);
+
+ it = isrc->is_ithread;
+ if (it == NULL)
+ ih = NULL;
+ else
+ ih = TAILQ_FIRST(&it->it_handlers);
+
+ /*
+ * XXX: We assume that IRQ 0 is only used for the ISA timer
+ * device (clk).
+ */
+ vector = isrc->is_pic->pic_vector(isrc);
+ if (vector == 0)
+ clkintr_pending = 1;
+
+
+ if (ih != NULL && ih->ih_flags & IH_FAST) {
+ /*
+ * Execute fast interrupt handlers directly.
+ * To support clock handlers, if a handler registers
+ * with a NULL argument, then we pass it a pointer to
+ * a trapframe as its argument.
+ */
+ critical_enter();
+ TAILQ_FOREACH(ih, &it->it_handlers, ih_next) {
+ MPASS(ih->ih_flags & IH_FAST);
+ CTR3(KTR_INTR, "%s: executing handler %p(%p)",
+ __func__, ih->ih_handler,
+ ih->ih_argument == NULL ? iframe :
+ ih->ih_argument);
+ if (ih->ih_argument == NULL)
+ ih->ih_handler(iframe);
+ else
+ ih->ih_handler(ih->ih_argument);
+ }
+ isrc->is_pic->pic_eoi_source(isrc);
+ error = 0;
+ /* XXX */
+ td->td_pflags &= ~TDP_OWEPREEMPT;
+ critical_exit();
+ } else {
+ /*
+ * For stray and threaded interrupts, we mask and EOI the
+ * source.
+ */
+ isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
+ if (ih == NULL)
+ error = EINVAL;
+ else
+ error = ithread_schedule(it);
+ isrc->is_pic->pic_eoi_source(isrc);
+ }
+
+ if (error == EINVAL) {
+ atomic_add_long(isrc->is_straycount, 1);
+ if (*isrc->is_straycount < MAX_STRAY_LOG)
+ log(LOG_ERR, "stray irq%d\n", vector);
+ else if (*isrc->is_straycount == MAX_STRAY_LOG)
+ log(LOG_CRIT,
+ "too many stray irq %d's: not logging anymore\n",
+ vector);
+ }
+ td->td_intr_nesting_level--;
+
+}
+
+void
+intr_resume(void)
+{
+ struct intsrc **isrc;
+ int i;
+
+ mtx_lock_spin(&intr_table_lock);
+ for (i = 0, isrc = interrupt_sources; i < NUM_IO_INTS; i++, isrc++)
+ if (*isrc != NULL && (*isrc)->is_pic->pic_resume != NULL)
+ (*isrc)->is_pic->pic_resume(*isrc);
+ mtx_unlock_spin(&intr_table_lock);
+}
+
+void
+intr_suspend(void)
+{
+ struct intsrc **isrc;
+ int i;
+
+ mtx_lock_spin(&intr_table_lock);
+ for (i = 0, isrc = interrupt_sources; i < NUM_IO_INTS; i++, isrc++)
+ if (*isrc != NULL && (*isrc)->is_pic->pic_suspend != NULL)
+ (*isrc)->is_pic->pic_suspend(*isrc);
+ mtx_unlock_spin(&intr_table_lock);
+}
+
+static void
+intrcnt_setname(const char *name, int index)
+{
+
+ snprintf(intrnames + (MAXCOMLEN + 1) * index, MAXCOMLEN + 1, "%-*s",
+ MAXCOMLEN, name);
+}
+
+static void
+intrcnt_updatename(struct intsrc *is)
+{
+
+ intrcnt_setname(is->is_ithread->it_td->td_proc->p_comm, is->is_index);
+}
+
+static void
+intrcnt_register(struct intsrc *is)
+{
+ char straystr[MAXCOMLEN + 1];
+
+ /* mtx_assert(&intr_table_lock, MA_OWNED); */
+ KASSERT(is->is_ithread != NULL, ("%s: isrc with no ithread", __func__));
+ is->is_index = intrcnt_index;
+ intrcnt_index += 2;
+ snprintf(straystr, MAXCOMLEN + 1, "stray irq%d",
+ is->is_pic->pic_vector(is));
+ intrcnt_updatename(is);
+ is->is_count = &intrcnt[is->is_index];
+ intrcnt_setname(straystr, is->is_index + 1);
+ is->is_straycount = &intrcnt[is->is_index + 1];
+}
+
+static void
+intr_init(void *dummy __unused)
+{
+
+ intrcnt_setname("???", 0);
+ intrcnt_index = 1;
+ mtx_init(&intr_table_lock, "intr table", NULL, MTX_SPIN);
+}
+SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL)
+
+#ifdef DDB
+/*
+ * Dump data about interrupt handlers
+ */
+DB_SHOW_COMMAND(irqs, db_show_irqs)
+{
+ struct intsrc **isrc;
+ int i, quit, verbose;
+
+ quit = 0;
+ if (strcmp(modif, "v") == 0)
+ verbose = 1;
+ else
+ verbose = 0;
+ isrc = interrupt_sources;
+ db_setup_paging(db_simple_pager, &quit, DB_LINES_PER_PAGE);
+ for (i = 0; i < NUM_IO_INTS && !quit; i++, isrc++)
+ if (*isrc != NULL)
+ db_dump_ithread((*isrc)->is_ithread, verbose);
+}
+#endif
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/io_apic.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/io_apic.c
new file mode 100644
index 0000000000..9892a998b2
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/io_apic.c
@@ -0,0 +1,850 @@
+/*-
+ * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/io_apic.c,v 1.14 2004/08/02 15:31:10 scottl Exp $");
+
+#include "opt_isa.h"
+#include "opt_no_mixed_mode.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/apicreg.h>
+#include <machine/frame.h>
+#include <machine/intr_machdep.h>
+#include <machine/apicvar.h>
+#include <machine/segments.h>
+
+#define IOAPIC_ISA_INTS 16
+#define IOAPIC_MEM_REGION 32
+#define IOAPIC_REDTBL_LO(i) (IOAPIC_REDTBL + (i) * 2)
+#define IOAPIC_REDTBL_HI(i) (IOAPIC_REDTBL_LO(i) + 1)
+
+#define VECTOR_EXTINT 252
+#define VECTOR_NMI 253
+#define VECTOR_SMI 254
+#define VECTOR_DISABLED 255
+
+#define DEST_NONE -1
+#define DEST_EXTINT -2
+
+#define TODO printf("%s: not implemented!\n", __func__)
+
+MALLOC_DEFINE(M_IOAPIC, "I/O APIC", "I/O APIC structures");
+
+/*
+ * New interrupt support code..
+ *
+ * XXX: we really should have the interrupt cookie passed up from new-bus
+ * just be a int pin, and not map 1:1 to interrupt vector number but should
+ * use INTR_TYPE_FOO to set priority bands for device classes and do all the
+ * magic remapping of intpin to vector in here. For now we just cheat as on
+ * ia64 and map intpin X to vector NRSVIDT + X. Note that we assume that the
+ * first IO APIC has ISA interrupts on pins 1-15. Not sure how you are
+ * really supposed to figure out which IO APIC in a system with multiple IO
+ * APIC's actually has the ISA interrupts routed to it. As far as interrupt
+ * pin numbers, we use the ACPI System Interrupt number model where each
+ * IO APIC has a contiguous chunk of the System Interrupt address space.
+ */
+
+/*
+ * Direct the ExtINT pin on the first I/O APIC to a logical cluster of
+ * CPUs rather than a physical destination of just the BSP.
+ *
+ * Note: This is disabled by default as test systems seem to croak with it
+ * enabled.
+#define ENABLE_EXTINT_LOGICAL_DESTINATION
+ */
+
+struct ioapic_intsrc {
+ struct intsrc io_intsrc;
+ u_int io_intpin:8;
+ u_int io_vector:8;
+ u_int io_activehi:1;
+ u_int io_edgetrigger:1;
+ u_int io_masked:1;
+ int io_dest:5;
+ int io_bus:4;
+};
+
+struct ioapic {
+ struct pic io_pic;
+ u_int io_id:8; /* logical ID */
+ u_int io_apic_id:4;
+ u_int io_intbase:8; /* System Interrupt base */
+ u_int io_numintr:8;
+ volatile ioapic_t *io_addr; /* XXX: should use bus_space */
+ STAILQ_ENTRY(ioapic) io_next;
+ struct ioapic_intsrc io_pins[0];
+};
+
+static u_int ioapic_read(volatile ioapic_t *apic, int reg);
+static void ioapic_write(volatile ioapic_t *apic, int reg, u_int val);
+static const char *ioapic_bus_string(int bus_type);
+static void ioapic_print_vector(struct ioapic_intsrc *intpin);
+static void ioapic_enable_source(struct intsrc *isrc);
+static void ioapic_disable_source(struct intsrc *isrc, int eoi);
+static void ioapic_eoi_source(struct intsrc *isrc);
+static void ioapic_enable_intr(struct intsrc *isrc);
+static int ioapic_vector(struct intsrc *isrc);
+static int ioapic_source_pending(struct intsrc *isrc);
+static int ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
+ enum intr_polarity pol);
+static void ioapic_suspend(struct intsrc *isrc);
+static void ioapic_resume(struct intsrc *isrc);
+static void ioapic_program_destination(struct ioapic_intsrc *intpin);
+static void ioapic_program_intpin(struct ioapic_intsrc *intpin);
+static void ioapic_setup_mixed_mode(struct ioapic_intsrc *intpin);
+
+static STAILQ_HEAD(,ioapic) ioapic_list = STAILQ_HEAD_INITIALIZER(ioapic_list);
+struct pic ioapic_template = { ioapic_enable_source, ioapic_disable_source,
+ ioapic_eoi_source, ioapic_enable_intr,
+ ioapic_vector, ioapic_source_pending,
+ ioapic_suspend, ioapic_resume,
+ ioapic_config_intr };
+
+static int bsp_id, current_cluster, logical_clusters, next_ioapic_base;
+static u_int mixed_mode_enabled, next_id, program_logical_dest;
+#ifdef NO_MIXED_MODE
+static int mixed_mode_active = 0;
+#else
+static int mixed_mode_active = 1;
+#endif
+TUNABLE_INT("hw.apic.mixed_mode", &mixed_mode_active);
+
+static __inline void
+_ioapic_eoi_source(struct intsrc *isrc)
+{
+ lapic_eoi();
+}
+
+static u_int
+ioapic_read(volatile ioapic_t *apic, int reg)
+{
+
+ mtx_assert(&icu_lock, MA_OWNED);
+ apic->ioregsel = reg;
+ return (apic->iowin);
+}
+
+static void
+ioapic_write(volatile ioapic_t *apic, int reg, u_int val)
+{
+
+ mtx_assert(&icu_lock, MA_OWNED);
+ apic->ioregsel = reg;
+ apic->iowin = val;
+}
+
+static const char *
+ioapic_bus_string(int bus_type)
+{
+
+ switch (bus_type) {
+ case APIC_BUS_ISA:
+ return ("ISA");
+ case APIC_BUS_EISA:
+ return ("EISA");
+ case APIC_BUS_PCI:
+ return ("PCI");
+ default:
+ return ("unknown");
+ }
+}
+
+static void
+ioapic_print_vector(struct ioapic_intsrc *intpin)
+{
+
+ switch (intpin->io_vector) {
+ case VECTOR_DISABLED:
+ printf("disabled");
+ break;
+ case VECTOR_EXTINT:
+ printf("ExtINT");
+ break;
+ case VECTOR_NMI:
+ printf("NMI");
+ break;
+ case VECTOR_SMI:
+ printf("SMI");
+ break;
+ default:
+ printf("%s IRQ %u", ioapic_bus_string(intpin->io_bus),
+ intpin->io_vector);
+ }
+}
+
+static void
+ioapic_enable_source(struct intsrc *isrc)
+{
+ struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
+ struct ioapic *io = (struct ioapic *)isrc->is_pic;
+ uint32_t flags;
+
+ mtx_lock_spin(&icu_lock);
+ if (intpin->io_masked) {
+ flags = ioapic_read(io->io_addr,
+ IOAPIC_REDTBL_LO(intpin->io_intpin));
+ flags &= ~(IOART_INTMASK);
+ ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin),
+ flags);
+ intpin->io_masked = 0;
+ }
+ mtx_unlock_spin(&icu_lock);
+}
+
+static void
+ioapic_disable_source(struct intsrc *isrc, int eoi)
+{
+ struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
+ struct ioapic *io = (struct ioapic *)isrc->is_pic;
+ uint32_t flags;
+
+ mtx_lock_spin(&icu_lock);
+ if (!intpin->io_masked && !intpin->io_edgetrigger) {
+ flags = ioapic_read(io->io_addr,
+ IOAPIC_REDTBL_LO(intpin->io_intpin));
+ flags |= IOART_INTMSET;
+ ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin),
+ flags);
+ intpin->io_masked = 1;
+ }
+
+ if (eoi == PIC_EOI)
+ _ioapic_eoi_source(isrc);
+
+ mtx_unlock_spin(&icu_lock);
+}
+
+static void
+ioapic_eoi_source(struct intsrc *isrc)
+{
+
+ _ioapic_eoi_source(isrc);
+}
+
+/*
+ * Completely program an intpin based on the data in its interrupt source
+ * structure.
+ */
+static void
+ioapic_program_intpin(struct ioapic_intsrc *intpin)
+{
+ struct ioapic *io = (struct ioapic *)intpin->io_intsrc.is_pic;
+ uint32_t low, high, value;
+
+ /*
+ * For pins routed via mixed mode or disabled, just ensure that
+ * they are masked.
+ */
+ if (intpin->io_dest == DEST_EXTINT ||
+ intpin->io_vector == VECTOR_DISABLED) {
+ low = ioapic_read(io->io_addr,
+ IOAPIC_REDTBL_LO(intpin->io_intpin));
+ if ((low & IOART_INTMASK) == IOART_INTMCLR)
+ ioapic_write(io->io_addr,
+ IOAPIC_REDTBL_LO(intpin->io_intpin),
+ low | IOART_INTMSET);
+ return;
+ }
+
+ /* Set the destination. */
+ if (intpin->io_dest == DEST_NONE) {
+ low = IOART_DESTPHY;
+ high = bsp_id << APIC_ID_SHIFT;
+ } else {
+ low = IOART_DESTLOG;
+ high = (intpin->io_dest << APIC_ID_CLUSTER_SHIFT |
+ APIC_ID_CLUSTER_ID) << APIC_ID_SHIFT;
+ }
+
+ /* Program the rest of the low word. */
+ if (intpin->io_edgetrigger)
+ low |= IOART_TRGREDG;
+ else
+ low |= IOART_TRGRLVL;
+ if (intpin->io_activehi)
+ low |= IOART_INTAHI;
+ else
+ low |= IOART_INTALO;
+ if (intpin->io_masked)
+ low |= IOART_INTMSET;
+ switch (intpin->io_vector) {
+ case VECTOR_EXTINT:
+ KASSERT(intpin->io_edgetrigger,
+ ("EXTINT not edge triggered"));
+ low |= IOART_DELEXINT;
+ break;
+ case VECTOR_NMI:
+ KASSERT(intpin->io_edgetrigger,
+ ("NMI not edge triggered"));
+ low |= IOART_DELNMI;
+ break;
+ case VECTOR_SMI:
+ KASSERT(intpin->io_edgetrigger,
+ ("SMI not edge triggered"));
+ low |= IOART_DELSMI;
+ break;
+ default:
+ low |= IOART_DELLOPRI | apic_irq_to_idt(intpin->io_vector);
+ }
+
+ /* Write the values to the APIC. */
+ mtx_lock_spin(&icu_lock);
+ ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), low);
+ value = ioapic_read(io->io_addr, IOAPIC_REDTBL_HI(intpin->io_intpin));
+ value &= ~IOART_DEST;
+ value |= high;
+ ioapic_write(io->io_addr, IOAPIC_REDTBL_HI(intpin->io_intpin), value);
+ mtx_unlock_spin(&icu_lock);
+}
+
+/*
+ * Program an individual intpin's logical destination.
+ */
+static void
+ioapic_program_destination(struct ioapic_intsrc *intpin)
+{
+ struct ioapic *io = (struct ioapic *)intpin->io_intsrc.is_pic;
+
+ KASSERT(intpin->io_dest != DEST_NONE,
+ ("intpin not assigned to a cluster"));
+ KASSERT(intpin->io_dest != DEST_EXTINT,
+ ("intpin routed via ExtINT"));
+ if (bootverbose) {
+ printf("ioapic%u: routing intpin %u (", io->io_id,
+ intpin->io_intpin);
+ ioapic_print_vector(intpin);
+ printf(") to cluster %u\n", intpin->io_dest);
+ }
+ ioapic_program_intpin(intpin);
+}
+
+static void
+ioapic_assign_cluster(struct ioapic_intsrc *intpin)
+{
+
+ /*
+ * Assign this intpin to a logical APIC cluster in a
+ * round-robin fashion. We don't actually use the logical
+ * destination for this intpin until after all the CPU's
+ * have been started so that we don't end up with interrupts
+ * that don't go anywhere. Another alternative might be to
+ * start up the CPU's earlier so that they can handle interrupts
+ * sooner.
+ */
+ intpin->io_dest = current_cluster;
+ current_cluster++;
+ if (current_cluster >= logical_clusters)
+ current_cluster = 0;
+ if (program_logical_dest)
+ ioapic_program_destination(intpin);
+}
+
+static void
+ioapic_enable_intr(struct intsrc *isrc)
+{
+ struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
+
+ KASSERT(intpin->io_dest != DEST_EXTINT,
+ ("ExtINT pin trying to use ioapic enable_intr method"));
+ if (intpin->io_dest == DEST_NONE) {
+ ioapic_assign_cluster(intpin);
+ lapic_enable_intr(intpin->io_vector);
+ }
+}
+
+static int
+ioapic_vector(struct intsrc *isrc)
+{
+ struct ioapic_intsrc *pin;
+
+ pin = (struct ioapic_intsrc *)isrc;
+ return (pin->io_vector);
+}
+
+static int
+ioapic_source_pending(struct intsrc *isrc)
+{
+ struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
+
+ return (lapic_intr_pending(intpin->io_vector));
+}
+
+static int
+ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
+ enum intr_polarity pol)
+{
+ struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
+ struct ioapic *io = (struct ioapic *)isrc->is_pic;
+ int changed;
+
+ KASSERT(!(trig == INTR_TRIGGER_CONFORM || pol == INTR_POLARITY_CONFORM),
+ ("%s: Conforming trigger or polarity\n", __func__));
+
+ /*
+ * EISA interrupts always use active high polarity, so don't allow
+ * them to be set to active low.
+ *
+ * XXX: Should we write to the ELCR if the trigger mode changes for
+ * an EISA IRQ?
+ */
+ if (intpin->io_bus == APIC_BUS_EISA)
+ pol = INTR_POLARITY_HIGH;
+ changed = 0;
+ if (intpin->io_edgetrigger != (trig == INTR_TRIGGER_EDGE)) {
+ if (bootverbose)
+ printf("ioapic%u: Changing trigger for pin %u to %s\n",
+ io->io_id, intpin->io_intpin,
+ trig == INTR_TRIGGER_EDGE ? "edge" : "level");
+ intpin->io_edgetrigger = (trig == INTR_TRIGGER_EDGE);
+ changed++;
+ }
+ if (intpin->io_activehi != (pol == INTR_POLARITY_HIGH)) {
+ if (bootverbose)
+ printf("ioapic%u: Changing polarity for pin %u to %s\n",
+ io->io_id, intpin->io_intpin,
+ pol == INTR_POLARITY_HIGH ? "high" : "low");
+ intpin->io_activehi = (pol == INTR_POLARITY_HIGH);
+ changed++;
+ }
+ if (changed)
+ ioapic_program_intpin(intpin);
+ return (0);
+}
+
+static void
+ioapic_suspend(struct intsrc *isrc)
+{
+
+ TODO;
+}
+
+static void
+ioapic_resume(struct intsrc *isrc)
+{
+
+ ioapic_program_intpin((struct ioapic_intsrc *)isrc);
+}
+
+/*
+ * APIC enumerators call this function to indicate that the 8259A AT PICs
+ * are available and that mixed mode can be used.
+ */
+void
+ioapic_enable_mixed_mode(void)
+{
+
+ mixed_mode_enabled = 1;
+}
+
+/*
+ * Allocate and return a logical cluster ID. Note that the first time
+ * this is called, it returns cluster 0. ioapic_enable_intr() treats
+ * the two cases of logical_clusters == 0 and logical_clusters == 1 the
+ * same: one cluster of ID 0 exists. The logical_clusters == 0 case is
+ * for UP kernels, which should never call this function.
+ */
+int
+ioapic_next_logical_cluster(void)
+{
+
+ if (logical_clusters >= APIC_MAX_CLUSTER)
+ panic("WARNING: Local APIC cluster IDs exhausted!");
+ return (logical_clusters++);
+}
+
+/*
+ * Create a plain I/O APIC object.
+ */
+void *
+ioapic_create(uintptr_t addr, int32_t apic_id, int intbase)
+{
+ struct ioapic *io;
+ struct ioapic_intsrc *intpin;
+ volatile ioapic_t *apic;
+ u_int numintr, i;
+ uint32_t value;
+
+ apic = (ioapic_t *)pmap_mapdev(addr, IOAPIC_MEM_REGION);
+ mtx_lock_spin(&icu_lock);
+ numintr = ((ioapic_read(apic, IOAPIC_VER) & IOART_VER_MAXREDIR) >>
+ MAXREDIRSHIFT) + 1;
+ mtx_unlock_spin(&icu_lock);
+ io = malloc(sizeof(struct ioapic) +
+ numintr * sizeof(struct ioapic_intsrc), M_IOAPIC, M_WAITOK);
+ io->io_pic = ioapic_template;
+ mtx_lock_spin(&icu_lock);
+ io->io_id = next_id++;
+ io->io_apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT;
+ if (apic_id != -1 && io->io_apic_id != apic_id) {
+ ioapic_write(apic, IOAPIC_ID, apic_id << APIC_ID_SHIFT);
+ mtx_unlock_spin(&icu_lock);
+ io->io_apic_id = apic_id;
+ printf("ioapic%u: Changing APIC ID to %d\n", io->io_id,
+ apic_id);
+ } else
+ mtx_unlock_spin(&icu_lock);
+ if (intbase == -1) {
+ intbase = next_ioapic_base;
+ printf("ioapic%u: Assuming intbase of %d\n", io->io_id,
+ intbase);
+ } else if (intbase != next_ioapic_base)
+ printf("ioapic%u: WARNING: intbase %d != expected base %d\n",
+ io->io_id, intbase, next_ioapic_base);
+ io->io_intbase = intbase;
+ next_ioapic_base = intbase + numintr;
+ io->io_numintr = numintr;
+ io->io_addr = apic;
+
+ /*
+ * Initialize pins. Start off with interrupts disabled. Default
+ * to active-hi and edge-triggered for ISA interrupts and active-lo
+ * and level-triggered for all others.
+ */
+ bzero(io->io_pins, sizeof(struct ioapic_intsrc) * numintr);
+ mtx_lock_spin(&icu_lock);
+ for (i = 0, intpin = io->io_pins; i < numintr; i++, intpin++) {
+ intpin->io_intsrc.is_pic = (struct pic *)io;
+ intpin->io_intpin = i;
+ intpin->io_vector = intbase + i;
+
+ /*
+ * Assume that pin 0 on the first I/O APIC is an ExtINT pin
+ * and that pins 1-15 are ISA interrupts. Assume that all
+ * other pins are PCI interrupts.
+ */
+ if (intpin->io_vector == 0)
+ ioapic_set_extint(io, i);
+ else if (intpin->io_vector < IOAPIC_ISA_INTS) {
+ intpin->io_bus = APIC_BUS_ISA;
+ intpin->io_activehi = 1;
+ intpin->io_edgetrigger = 1;
+ intpin->io_masked = 1;
+ } else {
+ intpin->io_bus = APIC_BUS_PCI;
+ intpin->io_activehi = 0;
+ intpin->io_edgetrigger = 0;
+ intpin->io_masked = 1;
+ }
+
+ /*
+ * Route interrupts to the BSP by default using physical
+ * addressing. Vectored interrupts get readdressed using
+ * logical IDs to CPU clusters when they are enabled.
+ */
+ intpin->io_dest = DEST_NONE;
+ if (bootverbose && intpin->io_vector != VECTOR_DISABLED) {
+ printf("ioapic%u: intpin %d -> ", io->io_id, i);
+ ioapic_print_vector(intpin);
+ printf(" (%s, %s)\n", intpin->io_edgetrigger ?
+ "edge" : "level", intpin->io_activehi ? "high" :
+ "low");
+ }
+ value = ioapic_read(apic, IOAPIC_REDTBL_LO(i));
+ ioapic_write(apic, IOAPIC_REDTBL_LO(i), value | IOART_INTMSET);
+ }
+ mtx_unlock_spin(&icu_lock);
+
+ return (io);
+}
+
+int
+ioapic_get_vector(void *cookie, u_int pin)
+{
+ struct ioapic *io;
+
+ io = (struct ioapic *)cookie;
+ if (pin >= io->io_numintr)
+ return (-1);
+ return (io->io_pins[pin].io_vector);
+}
+
+int
+ioapic_disable_pin(void *cookie, u_int pin)
+{
+ struct ioapic *io;
+
+ io = (struct ioapic *)cookie;
+ if (pin >= io->io_numintr)
+ return (EINVAL);
+ if (io->io_pins[pin].io_vector == VECTOR_DISABLED)
+ return (EINVAL);
+ io->io_pins[pin].io_vector = VECTOR_DISABLED;
+ if (bootverbose)
+ printf("ioapic%u: intpin %d disabled\n", io->io_id, pin);
+ return (0);
+}
+
+int
+ioapic_remap_vector(void *cookie, u_int pin, int vector)
+{
+ struct ioapic *io;
+
+ io = (struct ioapic *)cookie;
+ if (pin >= io->io_numintr || vector < 0)
+ return (EINVAL);
+ if (io->io_pins[pin].io_vector >= NUM_IO_INTS)
+ return (EINVAL);
+ io->io_pins[pin].io_vector = vector;
+ if (bootverbose)
+ printf("ioapic%u: Routing IRQ %d -> intpin %d\n", io->io_id,
+ vector, pin);
+ return (0);
+}
+
+int
+ioapic_set_bus(void *cookie, u_int pin, int bus_type)
+{
+ struct ioapic *io;
+
+ if (bus_type < 0 || bus_type > APIC_BUS_MAX)
+ return (EINVAL);
+ io = (struct ioapic *)cookie;
+ if (pin >= io->io_numintr)
+ return (EINVAL);
+ if (io->io_pins[pin].io_vector >= NUM_IO_INTS)
+ return (EINVAL);
+ io->io_pins[pin].io_bus = bus_type;
+ if (bootverbose)
+ printf("ioapic%u: intpin %d bus %s\n", io->io_id, pin,
+ ioapic_bus_string(bus_type));
+ return (0);
+}
+
+int
+ioapic_set_nmi(void *cookie, u_int pin)
+{
+ struct ioapic *io;
+
+ io = (struct ioapic *)cookie;
+ if (pin >= io->io_numintr)
+ return (EINVAL);
+ if (io->io_pins[pin].io_vector == VECTOR_NMI)
+ return (0);
+ if (io->io_pins[pin].io_vector >= NUM_IO_INTS)
+ return (EINVAL);
+ io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN;
+ io->io_pins[pin].io_vector = VECTOR_NMI;
+ io->io_pins[pin].io_masked = 0;
+ io->io_pins[pin].io_edgetrigger = 1;
+ io->io_pins[pin].io_activehi = 1;
+ if (bootverbose)
+ printf("ioapic%u: Routing NMI -> intpin %d\n",
+ io->io_id, pin);
+ return (0);
+}
+
+int
+ioapic_set_smi(void *cookie, u_int pin)
+{
+ struct ioapic *io;
+
+ io = (struct ioapic *)cookie;
+ if (pin >= io->io_numintr)
+ return (EINVAL);
+ if (io->io_pins[pin].io_vector == VECTOR_SMI)
+ return (0);
+ if (io->io_pins[pin].io_vector >= NUM_IO_INTS)
+ return (EINVAL);
+ io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN;
+ io->io_pins[pin].io_vector = VECTOR_SMI;
+ io->io_pins[pin].io_masked = 0;
+ io->io_pins[pin].io_edgetrigger = 1;
+ io->io_pins[pin].io_activehi = 1;
+ if (bootverbose)
+ printf("ioapic%u: Routing SMI -> intpin %d\n",
+ io->io_id, pin);
+ return (0);
+}
+
+int
+ioapic_set_extint(void *cookie, u_int pin)
+{
+ struct ioapic *io;
+
+ io = (struct ioapic *)cookie;
+ if (pin >= io->io_numintr)
+ return (EINVAL);
+ if (io->io_pins[pin].io_vector == VECTOR_EXTINT)
+ return (0);
+ if (io->io_pins[pin].io_vector >= NUM_IO_INTS)
+ return (EINVAL);
+ io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN;
+ io->io_pins[pin].io_vector = VECTOR_EXTINT;
+
+ /* Enable this pin if mixed mode is available and active. */
+ if (mixed_mode_enabled && mixed_mode_active)
+ io->io_pins[pin].io_masked = 0;
+ else
+ io->io_pins[pin].io_masked = 1;
+ io->io_pins[pin].io_edgetrigger = 1;
+ io->io_pins[pin].io_activehi = 1;
+ if (bootverbose)
+ printf("ioapic%u: Routing external 8259A's -> intpin %d\n",
+ io->io_id, pin);
+ return (0);
+}
+
+int
+ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol)
+{
+ struct ioapic *io;
+
+ io = (struct ioapic *)cookie;
+ if (pin >= io->io_numintr || pol == INTR_POLARITY_CONFORM)
+ return (EINVAL);
+ if (io->io_pins[pin].io_vector >= NUM_IO_INTS)
+ return (EINVAL);
+ io->io_pins[pin].io_activehi = (pol == INTR_POLARITY_HIGH);
+ if (bootverbose)
+ printf("ioapic%u: intpin %d polarity: %s\n", io->io_id, pin,
+ pol == INTR_POLARITY_HIGH ? "high" : "low");
+ return (0);
+}
+
+int
+ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger)
+{
+ struct ioapic *io;
+
+ io = (struct ioapic *)cookie;
+ if (pin >= io->io_numintr || trigger == INTR_TRIGGER_CONFORM)
+ return (EINVAL);
+ if (io->io_pins[pin].io_vector >= NUM_IO_INTS)
+ return (EINVAL);
+ io->io_pins[pin].io_edgetrigger = (trigger == INTR_TRIGGER_EDGE);
+ if (bootverbose)
+ printf("ioapic%u: intpin %d trigger: %s\n", io->io_id, pin,
+ trigger == INTR_TRIGGER_EDGE ? "edge" : "level");
+ return (0);
+}
+
+/*
+ * Register a complete I/O APIC object with the interrupt subsystem.
+ */
+void
+ioapic_register(void *cookie)
+{
+ struct ioapic_intsrc *pin;
+ struct ioapic *io;
+ volatile ioapic_t *apic;
+ uint32_t flags;
+ int i;
+
+ io = (struct ioapic *)cookie;
+ apic = io->io_addr;
+ mtx_lock_spin(&icu_lock);
+ flags = ioapic_read(apic, IOAPIC_VER) & IOART_VER_VERSION;
+ STAILQ_INSERT_TAIL(&ioapic_list, io, io_next);
+ mtx_unlock_spin(&icu_lock);
+ printf("ioapic%u <Version %u.%u> irqs %u-%u on motherboard\n",
+ io->io_id, flags >> 4, flags & 0xf, io->io_intbase,
+ io->io_intbase + io->io_numintr - 1);
+ bsp_id = PCPU_GET(apic_id);
+ for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++) {
+ /*
+ * Finish initializing the pins by programming the vectors
+ * and delivery mode.
+ */
+ if (pin->io_vector == VECTOR_DISABLED)
+ continue;
+ ioapic_program_intpin(pin);
+ if (pin->io_vector >= NUM_IO_INTS)
+ continue;
+ /*
+ * Route IRQ0 via the 8259A using mixed mode if mixed mode
+ * is available and turned on.
+ */
+ if (pin->io_vector == 0 && mixed_mode_active &&
+ mixed_mode_enabled)
+ ioapic_setup_mixed_mode(pin);
+ else
+ intr_register_source(&pin->io_intsrc);
+ }
+}
+
+/*
+ * Program all the intpins to use logical destinations once the AP's
+ * have been launched.
+ */
+static void
+ioapic_set_logical_destinations(void *arg __unused)
+{
+ struct ioapic *io;
+ int i;
+
+ program_logical_dest = 1;
+ STAILQ_FOREACH(io, &ioapic_list, io_next)
+ for (i = 0; i < io->io_numintr; i++)
+ if (io->io_pins[i].io_dest != DEST_NONE &&
+ io->io_pins[i].io_dest != DEST_EXTINT)
+ ioapic_program_destination(&io->io_pins[i]);
+}
+SYSINIT(ioapic_destinations, SI_SUB_SMP, SI_ORDER_SECOND,
+ ioapic_set_logical_destinations, NULL)
+
+/*
+ * Support for mixed-mode interrupt sources. These sources route an ISA
+ * IRQ through the 8259A's via the ExtINT on pin 0 of the I/O APIC that
+ * routes the ISA interrupts. We just ignore the intpins that use this
+ * mode and allow the atpic driver to register its interrupt source for
+ * that IRQ instead.
+ */
+
+static void
+ioapic_setup_mixed_mode(struct ioapic_intsrc *intpin)
+{
+ struct ioapic_intsrc *extint;
+ struct ioapic *io;
+
+ /*
+ * Mark the associated I/O APIC intpin as being delivered via
+ * ExtINT and enable the ExtINT pin on the I/O APIC if needed.
+ */
+ intpin->io_dest = DEST_EXTINT;
+ io = (struct ioapic *)intpin->io_intsrc.is_pic;
+ extint = &io->io_pins[0];
+ if (extint->io_vector != VECTOR_EXTINT)
+ panic("Can't find ExtINT pin to route through!");
+#ifdef ENABLE_EXTINT_LOGICAL_DESTINATION
+ if (extint->io_dest == DEST_NONE)
+ ioapic_assign_cluster(extint);
+#endif
+}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/local_apic.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/local_apic.c
new file mode 100644
index 0000000000..8fb7f9f12e
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/local_apic.c
@@ -0,0 +1,762 @@
+/*-
+ * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
+ * Copyright (c) 1996, by Steve Passe
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. The name of the developer may NOT be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Local APIC support on Pentium and later processors.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/local_apic.c,v 1.9 2004/07/14 18:12:15 jhb Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/pcpu.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/apicreg.h>
+#include <machine/cputypes.h>
+#include <machine/frame.h>
+#include <machine/intr_machdep.h>
+#include <machine/apicvar.h>
+#include <machine/md_var.h>
+#include <machine/smp.h>
+#include <machine/specialreg.h>
+
+/*
+ * We can handle up to 60 APICs via our logical cluster IDs, but currently
+ * the physical IDs on Intel processors up to the Pentium 4 are limited to
+ * 16.
+ */
+#define MAX_APICID 16
+
+/* Sanity checks on IDT vectors. */
+CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS <= APIC_LOCAL_INTS);
+CTASSERT(IPI_STOP < APIC_SPURIOUS_INT);
+
+/*
+ * Support for local APICs. Local APICs manage interrupts on each
+ * individual processor as opposed to I/O APICs which receive interrupts
+ * from I/O devices and then forward them on to the local APICs.
+ *
+ * Local APICs can also send interrupts to each other thus providing the
+ * mechanism for IPIs.
+ */
+
+struct lvt {
+ u_int lvt_edgetrigger:1;
+ u_int lvt_activehi:1;
+ u_int lvt_masked:1;
+ u_int lvt_active:1;
+ u_int lvt_mode:16;
+ u_int lvt_vector:8;
+};
+
+struct lapic {
+ struct lvt la_lvts[LVT_MAX + 1];
+ u_int la_id:8;
+ u_int la_cluster:4;
+ u_int la_cluster_id:2;
+ u_int la_present:1;
+} static lapics[MAX_APICID];
+
+/* XXX: should thermal be an NMI? */
+
+/* Global defaults for local APIC LVT entries. */
+static struct lvt lvts[LVT_MAX + 1] = {
+ { 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 }, /* LINT0: masked ExtINT */
+ { 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 }, /* LINT1: NMI */
+ { 1, 1, 1, 1, APIC_LVT_DM_FIXED, 0 }, /* Timer: needs a vector */
+ { 1, 1, 1, 1, APIC_LVT_DM_FIXED, 0 }, /* Error: needs a vector */
+ { 1, 1, 1, 1, APIC_LVT_DM_FIXED, 0 }, /* PMC */
+ { 1, 1, 1, 1, APIC_LVT_DM_FIXED, 0 }, /* Thermal: needs a vector */
+};
+
+static inthand_t *ioint_handlers[] = {
+ NULL, /* 0 - 31 */
+ IDTVEC(apic_isr1), /* 32 - 63 */
+ IDTVEC(apic_isr2), /* 64 - 95 */
+ IDTVEC(apic_isr3), /* 96 - 127 */
+ IDTVEC(apic_isr4), /* 128 - 159 */
+ IDTVEC(apic_isr5), /* 160 - 191 */
+ IDTVEC(apic_isr6), /* 192 - 223 */
+ IDTVEC(apic_isr7), /* 224 - 255 */
+};
+
+volatile lapic_t *lapic;
+
+static uint32_t
+lvt_mode(struct lapic *la, u_int pin, uint32_t value)
+{
+ struct lvt *lvt;
+
+ KASSERT(pin <= LVT_MAX, ("%s: pin %u out of range", __func__, pin));
+ if (la->la_lvts[pin].lvt_active)
+ lvt = &la->la_lvts[pin];
+ else
+ lvt = &lvts[pin];
+
+ value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM |
+ APIC_LVT_VECTOR);
+ if (lvt->lvt_edgetrigger == 0)
+ value |= APIC_LVT_TM;
+ if (lvt->lvt_activehi == 0)
+ value |= APIC_LVT_IIPP_INTALO;
+ if (lvt->lvt_masked)
+ value |= APIC_LVT_M;
+ value |= lvt->lvt_mode;
+ switch (lvt->lvt_mode) {
+ case APIC_LVT_DM_NMI:
+ case APIC_LVT_DM_SMI:
+ case APIC_LVT_DM_INIT:
+ case APIC_LVT_DM_EXTINT:
+ if (!lvt->lvt_edgetrigger) {
+ printf("lapic%u: Forcing LINT%u to edge trigger\n",
+ la->la_id, pin);
+ value |= APIC_LVT_TM;
+ }
+ /* Use a vector of 0. */
+ break;
+ case APIC_LVT_DM_FIXED:
+#if 0
+ value |= lvt->lvt_vector;
+#else
+ panic("Fixed LINT pins not supported");
+#endif
+ break;
+ default:
+ panic("bad APIC LVT delivery mode: %#x\n", value);
+ }
+ return (value);
+}
+
+/*
+ * Map the local APIC and setup necessary interrupt vectors.
+ */
+void
+lapic_init(uintptr_t addr)
+{
+ u_int32_t value;
+
+ /* Map the local APIC and setup the spurious interrupt handler. */
+ KASSERT(trunc_page(addr) == addr,
+ ("local APIC not aligned on a page boundary"));
+ lapic = (lapic_t *)pmap_mapdev(addr, sizeof(lapic_t));
+ setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+
+ /* Perform basic initialization of the BSP's local APIC. */
+ value = lapic->svr;
+ value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS);
+ value |= (APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT);
+ lapic->svr = value;
+
+ /* Set BSP's per-CPU local APIC ID. */
+ PCPU_SET(apic_id, lapic_id());
+
+ /* XXX: timer/error/thermal interrupts */
+}
+
+/*
+ * Create a local APIC instance.
+ */
+void
+lapic_create(u_int apic_id, int boot_cpu)
+{
+ int i;
+
+ if (apic_id >= MAX_APICID) {
+ printf("APIC: Ignoring local APIC with ID %d\n", apic_id);
+ if (boot_cpu)
+ panic("Can't ignore BSP");
+ return;
+ }
+ KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u",
+ apic_id));
+
+ /*
+ * Assume no local LVT overrides and a cluster of 0 and
+ * intra-cluster ID of 0.
+ */
+ lapics[apic_id].la_present = 1;
+ lapics[apic_id].la_id = apic_id;
+ for (i = 0; i < LVT_MAX; i++) {
+ lapics[apic_id].la_lvts[i] = lvts[i];
+ lapics[apic_id].la_lvts[i].lvt_active = 0;
+ }
+
+#ifdef SMP
+ cpu_add(apic_id, boot_cpu);
+#endif
+}
+
+/*
+ * Dump contents of local APIC registers
+ */
+void
+lapic_dump(const char* str)
+{
+
+ printf("cpu%d %s:\n", PCPU_GET(cpuid), str);
+ printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x\n",
+ lapic->id, lapic->version, lapic->ldr, lapic->dfr);
+ printf(" lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
+ lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr);
+}
+
+void
+lapic_enable_intr(u_int irq)
+{
+ u_int vector;
+
+ vector = apic_irq_to_idt(irq);
+ KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
+ KASSERT(ioint_handlers[vector / 32] != NULL,
+ ("No ISR handler for IRQ %u", irq));
+ setidt(vector, ioint_handlers[vector / 32], SDT_SYS386IGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+}
+
+void
+lapic_setup(void)
+{
+ struct lapic *la;
+ u_int32_t value, maxlvt;
+ register_t eflags;
+
+ la = &lapics[lapic_id()];
+ KASSERT(la->la_present, ("missing APIC structure"));
+ eflags = intr_disable();
+ maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
+
+ /* Program LINT[01] LVT entries. */
+ lapic->lvt_lint0 = lvt_mode(la, LVT_LINT0, lapic->lvt_lint0);
+ lapic->lvt_lint1 = lvt_mode(la, LVT_LINT1, lapic->lvt_lint1);
+
+ /* XXX: more LVT entries */
+
+ /* Clear the TPR. */
+ value = lapic->tpr;
+ value &= ~APIC_TPR_PRIO;
+ lapic->tpr = value;
+
+ /* Use the cluster model for logical IDs. */
+ value = lapic->dfr;
+ value &= ~APIC_DFR_MODEL_MASK;
+ value |= APIC_DFR_MODEL_CLUSTER;
+ lapic->dfr = value;
+
+ /* Set this APIC's logical ID. */
+ value = lapic->ldr;
+ value &= ~APIC_ID_MASK;
+ value |= (la->la_cluster << APIC_ID_CLUSTER_SHIFT |
+ 1 << la->la_cluster_id) << APIC_ID_SHIFT;
+ lapic->ldr = value;
+
+ /* Setup spurious vector and enable the local APIC. */
+ value = lapic->svr;
+ value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS);
+ value |= (APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT);
+ lapic->svr = value;
+ intr_restore(eflags);
+}
+
+void
+lapic_disable(void)
+{
+ uint32_t value;
+
+ /* Software disable the local APIC. */
+ value = lapic->svr;
+ value &= ~APIC_SVR_SWEN;
+ lapic->svr = value;
+}
+
+int
+lapic_id(void)
+{
+
+ KASSERT(lapic != NULL, ("local APIC is not mapped"));
+ return (lapic->id >> APIC_ID_SHIFT);
+}
+
+int
+lapic_intr_pending(u_int vector)
+{
+ volatile u_int32_t *irr;
+
+ /*
+ * The IRR registers are an array of 128-bit registers each of
+ * which only describes 32 interrupts in the low 32 bits.. Thus,
+ * we divide the vector by 32 to get the 128-bit index. We then
+ * multiply that index by 4 to get the equivalent index from
+ * treating the IRR as an array of 32-bit registers. Finally, we
+ * modulus the vector by 32 to determine the individual bit to
+ * test.
+ */
+ irr = &lapic->irr0;
+ return (irr[(vector / 32) * 4] & 1 << (vector % 32));
+}
+
+void
+lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id)
+{
+ struct lapic *la;
+
+ KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist",
+ __func__, apic_id));
+ KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big",
+ __func__, cluster));
+ KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID,
+ ("%s: intra cluster id %u too big", __func__, cluster_id));
+ la = &lapics[apic_id];
+ la->la_cluster = cluster;
+ la->la_cluster_id = cluster_id;
+}
+
+int
+lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked)
+{
+
+ if (pin > LVT_MAX)
+ return (EINVAL);
+ if (apic_id == APIC_ID_ALL) {
+ lvts[pin].lvt_masked = masked;
+ if (bootverbose)
+ printf("lapic:");
+ } else {
+ KASSERT(lapics[apic_id].la_present,
+ ("%s: missing APIC %u", __func__, apic_id));
+ lapics[apic_id].la_lvts[pin].lvt_masked = masked;
+ lapics[apic_id].la_lvts[pin].lvt_active = 1;
+ if (bootverbose)
+ printf("lapic%u:", apic_id);
+ }
+ if (bootverbose)
+ printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked");
+ return (0);
+}
+
+int
+lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode)
+{
+ struct lvt *lvt;
+
+ if (pin > LVT_MAX)
+ return (EINVAL);
+ if (apic_id == APIC_ID_ALL) {
+ lvt = &lvts[pin];
+ if (bootverbose)
+ printf("lapic:");
+ } else {
+ KASSERT(lapics[apic_id].la_present,
+ ("%s: missing APIC %u", __func__, apic_id));
+ lvt = &lapics[apic_id].la_lvts[pin];
+ lvt->lvt_active = 1;
+ if (bootverbose)
+ printf("lapic%u:", apic_id);
+ }
+ lvt->lvt_mode = mode;
+ switch (mode) {
+ case APIC_LVT_DM_NMI:
+ case APIC_LVT_DM_SMI:
+ case APIC_LVT_DM_INIT:
+ case APIC_LVT_DM_EXTINT:
+ lvt->lvt_edgetrigger = 1;
+ lvt->lvt_activehi = 1;
+ if (mode == APIC_LVT_DM_EXTINT)
+ lvt->lvt_masked = 1;
+ else
+ lvt->lvt_masked = 0;
+ break;
+ default:
+ panic("Unsupported delivery mode: 0x%x\n", mode);
+ }
+ if (bootverbose) {
+ printf(" Routing ");
+ switch (mode) {
+ case APIC_LVT_DM_NMI:
+ printf("NMI");
+ break;
+ case APIC_LVT_DM_SMI:
+ printf("SMI");
+ break;
+ case APIC_LVT_DM_INIT:
+ printf("INIT");
+ break;
+ case APIC_LVT_DM_EXTINT:
+ printf("ExtINT");
+ break;
+ }
+ printf(" -> LINT%u\n", pin);
+ }
+ return (0);
+}
+
+int
+lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol)
+{
+
+ if (pin > LVT_MAX || pol == INTR_POLARITY_CONFORM)
+ return (EINVAL);
+ if (apic_id == APIC_ID_ALL) {
+ lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH);
+ if (bootverbose)
+ printf("lapic:");
+ } else {
+ KASSERT(lapics[apic_id].la_present,
+ ("%s: missing APIC %u", __func__, apic_id));
+ lapics[apic_id].la_lvts[pin].lvt_active = 1;
+ lapics[apic_id].la_lvts[pin].lvt_activehi =
+ (pol == INTR_POLARITY_HIGH);
+ if (bootverbose)
+ printf("lapic%u:", apic_id);
+ }
+ if (bootverbose)
+ printf(" LINT%u polarity: active-%s\n", pin,
+ pol == INTR_POLARITY_HIGH ? "high" : "low");
+ return (0);
+}
+
+int
+lapic_set_lvt_triggermode(u_int apic_id, u_int pin, enum intr_trigger trigger)
+{
+
+ if (pin > LVT_MAX || trigger == INTR_TRIGGER_CONFORM)
+ return (EINVAL);
+ if (apic_id == APIC_ID_ALL) {
+ lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE);
+ if (bootverbose)
+ printf("lapic:");
+ } else {
+ KASSERT(lapics[apic_id].la_present,
+ ("%s: missing APIC %u", __func__, apic_id));
+ lapics[apic_id].la_lvts[pin].lvt_edgetrigger =
+ (trigger == INTR_TRIGGER_EDGE);
+ lapics[apic_id].la_lvts[pin].lvt_active = 1;
+ if (bootverbose)
+ printf("lapic%u:", apic_id);
+ }
+ if (bootverbose)
+ printf(" LINT%u trigger: %s\n", pin,
+ trigger == INTR_TRIGGER_EDGE ? "edge" : "level");
+ return (0);
+}
+
+void
+lapic_eoi(void)
+{
+
+ lapic->eoi = 0;
+}
+
+void
+lapic_handle_intr(struct intrframe frame)
+{
+ struct intsrc *isrc;
+
+ if (frame.if_vec == -1)
+ panic("Couldn't get vector from ISR!");
+ isrc = intr_lookup_source(apic_idt_to_irq(frame.if_vec));
+ intr_execute_handlers(isrc, &frame);
+}
+
+/* Translate between IDT vectors and IRQ vectors. */
+u_int
+apic_irq_to_idt(u_int irq)
+{
+ u_int vector;
+
+ KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq));
+ vector = irq + APIC_IO_INTS;
+ if (vector >= IDT_SYSCALL)
+ vector++;
+ return (vector);
+}
+
+u_int
+apic_idt_to_irq(u_int vector)
+{
+
+ KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
+ vector <= APIC_IO_INTS + NUM_IO_INTS,
+ ("Vector %u does not map to an IRQ line", vector));
+ if (vector > IDT_SYSCALL)
+ vector--;
+ return (vector - APIC_IO_INTS);
+}
+
+/*
+ * APIC probing support code. This includes code to manage enumerators.
+ */
+
+static SLIST_HEAD(, apic_enumerator) enumerators =
+ SLIST_HEAD_INITIALIZER(enumerators);
+static struct apic_enumerator *best_enum;
+
+void
+apic_register_enumerator(struct apic_enumerator *enumerator)
+{
+#ifdef INVARIANTS
+ struct apic_enumerator *apic_enum;
+
+ SLIST_FOREACH(apic_enum, &enumerators, apic_next) {
+ if (apic_enum == enumerator)
+ panic("%s: Duplicate register of %s", __func__,
+ enumerator->apic_name);
+ }
+#endif
+ SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next);
+}
+
+/*
+ * Probe the APIC enumerators, enumerate CPUs, and initialize the
+ * local APIC.
+ */
+static void
+apic_init(void *dummy __unused)
+{
+ struct apic_enumerator *enumerator;
+ uint64_t apic_base;
+ int retval, best;
+
+ /* We only support built in local APICs. */
+ if (!(cpu_feature & CPUID_APIC))
+ return;
+
+ /* Don't probe if APIC mode is disabled. */
+ if (resource_disabled("apic", 0))
+ return;
+
+ /* First, probe all the enumerators to find the best match. */
+ best_enum = NULL;
+ best = 0;
+ SLIST_FOREACH(enumerator, &enumerators, apic_next) {
+ retval = enumerator->apic_probe();
+ if (retval > 0)
+ continue;
+ if (best_enum == NULL || best < retval) {
+ best_enum = enumerator;
+ best = retval;
+ }
+ }
+ if (best_enum == NULL) {
+ if (bootverbose)
+ printf("APIC: Could not find any APICs.\n");
+ return;
+ }
+
+ if (bootverbose)
+ printf("APIC: Using the %s enumerator.\n",
+ best_enum->apic_name);
+
+ /*
+ * To work around an errata, we disable the local APIC on some
+ * CPUs during early startup. We need to turn the local APIC back
+ * on on such CPUs now.
+ */
+ if (cpu == CPU_686 && strcmp(cpu_vendor, "GenuineIntel") == 0 &&
+ (cpu_id & 0xff0) == 0x610) {
+ apic_base = rdmsr(MSR_APICBASE);
+ apic_base |= APICBASE_ENABLED;
+ wrmsr(MSR_APICBASE, apic_base);
+ }
+
+ /* Second, probe the CPU's in the system. */
+ retval = best_enum->apic_probe_cpus();
+ if (retval != 0)
+ printf("%s: Failed to probe CPUs: returned %d\n",
+ best_enum->apic_name, retval);
+
+ /* Third, initialize the local APIC. */
+ retval = best_enum->apic_setup_local();
+ if (retval != 0)
+ printf("%s: Failed to setup the local APIC: returned %d\n",
+ best_enum->apic_name, retval);
+#ifdef SMP
+ /* Last, setup the cpu topology now that we have probed CPUs */
+ mp_topology();
+#endif
+}
+SYSINIT(apic_init, SI_SUB_CPU, SI_ORDER_FIRST, apic_init, NULL)
+
+/*
+ * Setup the I/O APICs.
+ */
+static void
+apic_setup_io(void *dummy __unused)
+{
+ int retval;
+
+ if (best_enum == NULL)
+ return;
+ retval = best_enum->apic_setup_io();
+ if (retval != 0)
+ printf("%s: Failed to setup I/O APICs: returned %d\n",
+ best_enum->apic_name, retval);
+
+ /*
+ * Finish setting up the local APIC on the BSP once we know how to
+ * properly program the LINT pins.
+ */
+ lapic_setup();
+ if (bootverbose)
+ lapic_dump("BSP");
+}
+SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_SECOND, apic_setup_io, NULL)
+
+#ifdef SMP
+/*
+ * Inter Processor Interrupt functions. The lapic_ipi_*() functions are
+ * private the sys/i386 code. The public interface for the rest of the
+ * kernel is defined in mp_machdep.c.
+ */
+
+int
+lapic_ipi_wait(int delay)
+{
+ int x, incr;
+
+ /*
+ * Wait delay loops for IPI to be sent. This is highly bogus
+ * since this is sensitive to CPU clock speed. If delay is
+ * -1, we wait forever.
+ */
+ if (delay == -1) {
+ incr = 0;
+ delay = 1;
+ } else
+ incr = 1;
+ for (x = 0; x < delay; x += incr) {
+ if ((lapic->icr_lo & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE)
+ return (1);
+ ia32_pause();
+ }
+ return (0);
+}
+
+void
+lapic_ipi_raw(register_t icrlo, u_int dest)
+{
+ register_t value, eflags;
+
+ /* XXX: Need more sanity checking of icrlo? */
+ KASSERT(lapic != NULL, ("%s called too early", __func__));
+ KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
+ ("%s: invalid dest field", __func__));
+ KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0,
+ ("%s: reserved bits set in ICR LO register", __func__));
+
+ /* Set destination in ICR HI register if it is being used. */
+ eflags = intr_disable();
+ if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) {
+ value = lapic->icr_hi;
+ value &= ~APIC_ID_MASK;
+ value |= dest << APIC_ID_SHIFT;
+ lapic->icr_hi = value;
+ }
+
+ /* Program the contents of the IPI and dispatch it. */
+ value = lapic->icr_lo;
+ value &= APIC_ICRLO_RESV_MASK;
+ value |= icrlo;
+ lapic->icr_lo = value;
+ intr_restore(eflags);
+}
+
+#define BEFORE_SPIN 1000000
+#ifdef DETECT_DEADLOCK
+#define AFTER_SPIN 1000
+#endif
+
+void
+lapic_ipi_vectored(u_int vector, int dest)
+{
+ register_t icrlo, destfield;
+
+ KASSERT((vector & ~APIC_VECTOR_MASK) == 0,
+ ("%s: invalid vector %d", __func__, vector));
+
+ icrlo = vector | APIC_DELMODE_FIXED | APIC_DESTMODE_PHY |
+ APIC_LEVEL_DEASSERT | APIC_TRIGMOD_EDGE;
+ destfield = 0;
+ switch (dest) {
+ case APIC_IPI_DEST_SELF:
+ icrlo |= APIC_DEST_SELF;
+ break;
+ case APIC_IPI_DEST_ALL:
+ icrlo |= APIC_DEST_ALLISELF;
+ break;
+ case APIC_IPI_DEST_OTHERS:
+ icrlo |= APIC_DEST_ALLESELF;
+ break;
+ default:
+ KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
+ ("%s: invalid destination 0x%x", __func__, dest));
+ destfield = dest;
+ }
+
+ /* Wait for an earlier IPI to finish. */
+ if (!lapic_ipi_wait(BEFORE_SPIN))
+ panic("APIC: Previous IPI is stuck");
+
+ lapic_ipi_raw(icrlo, destfield);
+
+#ifdef DETECT_DEADLOCK
+ /* Wait for IPI to be delivered. */
+ if (!lapic_ipi_wait(AFTER_SPIN)) {
+#ifdef needsattention
+ /*
+ * XXX FIXME:
+ *
+ * The above function waits for the message to actually be
+ * delivered. It breaks out after an arbitrary timeout
+ * since the message should eventually be delivered (at
+ * least in theory) and that if it wasn't we would catch
+ * the failure with the check above when the next IPI is
+ * sent.
+ *
+ * We could skiip this wait entirely, EXCEPT it probably
+ * protects us from other routines that assume that the
+ * message was delivered and acted upon when this function
+ * returns.
+ */
+ printf("APIC: IPI might be stuck\n");
+#else /* !needsattention */
+ /* Wait until mesage is sent without a timeout. */
+ while (lapic->icr_lo & APIC_DELSTAT_PEND)
+ ia32_pause();
+#endif /* needsattention */
+ }
+#endif /* DETECT_DEADLOCK */
+}
+#endif /* SMP */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/locore.s b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/locore.s
new file mode 100644
index 0000000000..5146169162
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/locore.s
@@ -0,0 +1,949 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)locore.s 7.3 (Berkeley) 5/13/91
+ * $FreeBSD: src/sys/i386/i386/locore.s,v 1.181 2003/11/03 21:53:37 jhb Exp $
+ *
+ * originally from: locore.s, by William F. Jolitz
+ *
+ * Substantially rewritten by David Greenman, Rod Grimes,
+ * Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp
+ * and many others.
+ */
+
+#include "opt_bootp.h"
+#include "opt_compat.h"
+#include "opt_nfsroot.h"
+#include "opt_pmap.h"
+
+#include <sys/syscall.h>
+#include <sys/reboot.h>
+
+#include <machine/asmacros.h>
+#include <machine/cputypes.h>
+#include <machine/psl.h>
+#include <machine/pmap.h>
+#include <machine/specialreg.h>
+
+#include "assym.s"
+
+.section __xen_guest
+ .asciz "LOADER=generic,GUEST_VER=5.2.1,XEN_VER=2.0,BSD_SYMTAB"
+
+
+/*
+ * XXX
+ *
+ * Note: This version greatly munged to avoid various assembler errors
+ * that may be fixed in newer versions of gas. Perhaps newer versions
+ * will have more pleasant appearance.
+ */
+
+/*
+ * PTmap is recursive pagemap at top of virtual address space.
+ * Within PTmap, the page directory can be found (third indirection).
+ */
+ .globl PTmap,PTD,PTDpde
+ .set PTmap,(PTDPTDI << PDRSHIFT)
+ .set PTD,PTmap + (PTDPTDI * PAGE_SIZE)
+ .set PTDpde,PTD + (PTDPTDI * PDESIZE)
+
+#ifdef SMP
+/*
+ * Define layout of per-cpu address space.
+ * This is "constructed" in locore.s on the BSP and in mp_machdep.c
+ * for each AP. DO NOT REORDER THESE WITHOUT UPDATING THE REST!
+ */
+ .globl SMP_prvspace
+ .set SMP_prvspace,(MPPTDI << PDRSHIFT)
+#endif /* SMP */
+
+/*
+ * Compiled KERNBASE location and the kernel load address
+ */
+ .globl kernbase
+ .set kernbase,KERNBASE
+ .globl kernload
+ .set kernload,KERNLOAD
+
+/*
+ * Globals
+ */
+ .data
+ ALIGN_DATA /* just to be sure */
+
+ .space 0x2000 /* space for tmpstk - temporary stack */
+tmpstk:
+
+ .globl bootinfo
+bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */
+
+ .globl KERNend
+KERNend: .long 0 /* phys addr end of kernel (just after bss) */
+physfree: .long 0 /* phys addr of next free page */
+
+#ifdef SMP
+ .globl cpu0prvpage
+cpu0pp: .long 0 /* phys addr cpu0 private pg */
+cpu0prvpage: .long 0 /* relocated version */
+
+ .globl SMPpt
+SMPptpa: .long 0 /* phys addr SMP page table */
+SMPpt: .long 0 /* relocated version */
+#endif /* SMP */
+
+ .globl IdlePTD
+IdlePTD: .long 0 /* phys addr of kernel PTD */
+
+
+ .globl KPTphys
+KPTphys: .long 0 /* phys addr of kernel page tables */
+
+ .globl proc0uarea, proc0kstack
+proc0uarea: .long 0 /* address of proc 0 uarea space */
+proc0kstack: .long 0 /* address of proc 0 kstack space */
+p0upa: .long 0 /* phys addr of proc0's UAREA */
+p0kpa: .long 0 /* phys addr of proc0's STACK */
+
+#ifdef PC98
+ .globl pc98_system_parameter
+pc98_system_parameter:
+ .space 0x240
+#endif
+
+/**********************************************************************
+ *
+ * Some handy macros
+ *
+ */
+
+#define R(foo) ((foo))
+
+#define ALLOCPAGES(foo) \
+ movl R(physfree), %esi ; \
+ movl $((foo)*PAGE_SIZE), %eax ; \
+ addl %esi, %eax ; \
+ movl %eax, R(physfree) ; \
+ movl %esi, %edi ; \
+ movl $((foo)*PAGE_SIZE),%ecx ; \
+ xorl %eax,%eax ; \
+ cld ; \
+ rep ; \
+ stosb
+
+/*
+ * fillkpt
+ * eax = page frame address
+ * ebx = index into page table
+ * ecx = how many pages to map
+ * base = base address of page dir/table
+ * prot = protection bits
+ */
+#define fillkpt(base, prot) \
+ shll $PTESHIFT,%ebx ; \
+ addl base,%ebx ; \
+ orl $PG_V,%eax ; \
+ orl prot,%eax ; \
+1: movl %eax,(%ebx) ; \
+ addl $PAGE_SIZE,%eax ; /* increment physical address */ \
+ addl $PTESIZE,%ebx ; /* next pte */ \
+ loop 1b
+
+/*
+ * fillkptphys(prot)
+ * eax = physical address
+ * ecx = how many pages to map
+ * prot = protection bits
+ */
+#define fillkptphys(prot) \
+ movl %eax, %ebx ; \
+ shrl $PAGE_SHIFT, %ebx ; \
+ fillkpt(R(KPTphys), prot)
+
+ .text
+/**********************************************************************
+ *
+ * This is where the bootblocks start us, set the ball rolling...
+ *
+ */
+NON_GPROF_ENTRY(btext)
+ pushl %esi
+ call initvalues
+ popl %esi
+ call identify_cpu
+ movl proc0kstack,%eax
+ leal (KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp
+ xorl %ebp,%ebp /* mark end of frames */
+ movl IdlePTD,%esi
+ movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
+ call init386
+ call mi_startup
+ int $3
+
+
+#ifdef PC98
+ /* save SYSTEM PARAMETER for resume (NS/T or other) */
+ movl $0xa1400,%esi
+ movl $R(pc98_system_parameter),%edi
+ movl $0x0240,%ecx
+ cld
+ rep
+ movsb
+#else /* IBM-PC */
+/* Tell the bios to warmboot next time */
+ movw $0x1234,0x472
+#endif /* PC98 */
+
+/* Set up a real frame in case the double return in newboot is executed. */
+ pushl %ebp
+ movl %esp, %ebp
+
+/* Don't trust what the BIOS gives for eflags. */
+ pushl $PSL_KERNEL
+ popfl
+
+/*
+ * Don't trust what the BIOS gives for %fs and %gs. Trust the bootstrap
+ * to set %cs, %ds, %es and %ss.
+ */
+ mov %ds, %ax
+ mov %ax, %fs
+ mov %ax, %gs
+
+/*
+ * Clear the bss. Not all boot programs do it, and it is our job anyway.
+ *
+ * XXX we don't check that there is memory for our bss and page tables
+ * before using it.
+ *
+ * Note: we must be careful to not overwrite an active gdt or idt. They
+ * inactive from now until we switch to new ones, since we don't load any
+ * more segment registers or permit interrupts until after the switch.
+ */
+ movl $R(end),%ecx
+ movl $R(edata),%edi
+ subl %edi,%ecx
+ xorl %eax,%eax
+ cld
+ rep
+ stosb
+
+ call recover_bootinfo
+
+/* Get onto a stack that we can trust. */
+/*
+ * XXX this step is delayed in case recover_bootinfo needs to return via
+ * the old stack, but it need not be, since recover_bootinfo actually
+ * returns via the old frame.
+ */
+ movl $R(tmpstk),%esp
+
+#ifdef PC98
+ /* pc98_machine_type & M_EPSON_PC98 */
+ testb $0x02,R(pc98_system_parameter)+220
+ jz 3f
+ /* epson_machine_id <= 0x0b */
+ cmpb $0x0b,R(pc98_system_parameter)+224
+ ja 3f
+
+ /* count up memory */
+ movl $0x100000,%eax /* next, talley remaining memory */
+ movl $0xFFF-0x100,%ecx
+1: movl 0(%eax),%ebx /* save location to check */
+ movl $0xa55a5aa5,0(%eax) /* write test pattern */
+ cmpl $0xa55a5aa5,0(%eax) /* does not check yet for rollover */
+ jne 2f
+ movl %ebx,0(%eax) /* restore memory */
+ addl $PAGE_SIZE,%eax
+ loop 1b
+2: subl $0x100000,%eax
+ shrl $17,%eax
+ movb %al,R(pc98_system_parameter)+1
+3:
+
+ movw R(pc98_system_parameter+0x86),%ax
+ movw %ax,R(cpu_id)
+#endif
+
+ call identify_cpu
+ call create_pagetables
+
+/*
+ * If the CPU has support for VME, turn it on.
+ */
+ testl $CPUID_VME, R(cpu_feature)
+ jz 1f
+ movl %cr4, %eax
+ orl $CR4_VME, %eax
+ movl %eax, %cr4
+1:
+
+/* Now enable paging */
+ movl R(IdlePTD), %eax
+ movl %eax,%cr3 /* load ptd addr into mmu */
+ movl %cr0,%eax /* get control word */
+ orl $CR0_PE|CR0_PG,%eax /* enable paging */
+ movl %eax,%cr0 /* and let's page NOW! */
+
+ pushl $begin /* jump to high virtualized address */
+ ret
+
+/* now running relocated at KERNBASE where the system is linked to run */
+begin:
+ /* set up bootstrap stack */
+ movl proc0kstack,%eax /* location of in-kernel stack */
+ /* bootstrap stack end location */
+ leal (KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp
+
+ xorl %ebp,%ebp /* mark end of frames */
+
+#ifdef PAE
+ movl IdlePDPT,%esi
+#else
+ movl IdlePTD,%esi
+#endif
+ movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
+
+ pushl physfree /* value of first for init386(first) */
+ call init386 /* wire 386 chip for unix operation */
+
+ /*
+ * Clean up the stack in a way that db_numargs() understands, so
+ * that backtraces in ddb don't underrun the stack. Traps for
+ * inaccessible memory are more fatal than usual this early.
+ */
+ addl $4,%esp
+
+ call mi_startup /* autoconfiguration, mountroot etc */
+ /* NOTREACHED */
+ addl $0,%esp /* for db_numargs() again */
+
+/*
+ * Signal trampoline, copied to top of user stack
+ */
+NON_GPROF_ENTRY(sigcode)
+ calll *SIGF_HANDLER(%esp)
+ leal SIGF_UC(%esp),%eax /* get ucontext */
+ pushl %eax
+ testl $PSL_VM,UC_EFLAGS(%eax)
+ jne 1f
+ movl UC_GS(%eax),%gs /* restore %gs */
+1:
+ movl $SYS_sigreturn,%eax
+ pushl %eax /* junk to fake return addr. */
+ int $0x80 /* enter kernel with args */
+ /* on stack */
+1:
+ jmp 1b
+
+#ifdef COMPAT_FREEBSD4
+ ALIGN_TEXT
+freebsd4_sigcode:
+ calll *SIGF_HANDLER(%esp)
+ leal SIGF_UC4(%esp),%eax /* get ucontext */
+ pushl %eax
+ testl $PSL_VM,UC4_EFLAGS(%eax)
+ jne 1f
+ movl UC4_GS(%eax),%gs /* restore %gs */
+1:
+ movl $344,%eax /* 4.x SYS_sigreturn */
+ pushl %eax /* junk to fake return addr. */
+ int $0x80 /* enter kernel with args */
+ /* on stack */
+1:
+ jmp 1b
+#endif
+
+#ifdef COMPAT_43
+ ALIGN_TEXT
+osigcode:
+ call *SIGF_HANDLER(%esp) /* call signal handler */
+ lea SIGF_SC(%esp),%eax /* get sigcontext */
+ pushl %eax
+ testl $PSL_VM,SC_PS(%eax)
+ jne 9f
+ movl SC_GS(%eax),%gs /* restore %gs */
+9:
+ movl $103,%eax /* 3.x SYS_sigreturn */
+ pushl %eax /* junk to fake return addr. */
+ int $0x80 /* enter kernel with args */
+0: jmp 0b
+#endif /* COMPAT_43 */
+
+ ALIGN_TEXT
+esigcode:
+
+ .data
+ .globl szsigcode
+szsigcode:
+ .long esigcode-sigcode
+#ifdef COMPAT_FREEBSD4
+ .globl szfreebsd4_sigcode
+szfreebsd4_sigcode:
+ .long esigcode-freebsd4_sigcode
+#endif
+#ifdef COMPAT_43
+ .globl szosigcode
+szosigcode:
+ .long esigcode-osigcode
+#endif
+ .text
+
+/**********************************************************************
+ *
+ * Recover the bootinfo passed to us from the boot program
+ *
+ */
+recover_bootinfo:
+ /*
+ * This code is called in different ways depending on what loaded
+ * and started the kernel. This is used to detect how we get the
+ * arguments from the other code and what we do with them.
+ *
+ * Old disk boot blocks:
+ * (*btext)(howto, bootdev, cyloffset, esym);
+ * [return address == 0, and can NOT be returned to]
+ * [cyloffset was not supported by the FreeBSD boot code
+ * and always passed in as 0]
+ * [esym is also known as total in the boot code, and
+ * was never properly supported by the FreeBSD boot code]
+ *
+ * Old diskless netboot code:
+ * (*btext)(0,0,0,0,&nfsdiskless,0,0,0);
+ * [return address != 0, and can NOT be returned to]
+ * If we are being booted by this code it will NOT work,
+ * so we are just going to halt if we find this case.
+ *
+ * New uniform boot code:
+ * (*btext)(howto, bootdev, 0, 0, 0, &bootinfo)
+ * [return address != 0, and can be returned to]
+ *
+ * There may seem to be a lot of wasted arguments in here, but
+ * that is so the newer boot code can still load very old kernels
+ * and old boot code can load new kernels.
+ */
+
+ /*
+ * The old style disk boot blocks fake a frame on the stack and
+ * did an lret to get here. The frame on the stack has a return
+ * address of 0.
+ */
+ cmpl $0,4(%ebp)
+ je olddiskboot
+
+ /*
+ * We have some form of return address, so this is either the
+ * old diskless netboot code, or the new uniform code. That can
+ * be detected by looking at the 5th argument, if it is 0
+ * we are being booted by the new uniform boot code.
+ */
+ cmpl $0,24(%ebp)
+ je newboot
+
+ /*
+ * Seems we have been loaded by the old diskless boot code, we
+ * don't stand a chance of running as the diskless structure
+ * changed considerably between the two, so just halt.
+ */
+ hlt
+
+ /*
+ * We have been loaded by the new uniform boot code.
+ * Let's check the bootinfo version, and if we do not understand
+ * it we return to the loader with a status of 1 to indicate this error
+ */
+newboot:
+ movl 28(%ebp),%ebx /* &bootinfo.version */
+ movl BI_VERSION(%ebx),%eax
+ cmpl $1,%eax /* We only understand version 1 */
+ je 1f
+ movl $1,%eax /* Return status */
+ leave
+ /*
+ * XXX this returns to our caller's caller (as is required) since
+ * we didn't set up a frame and our caller did.
+ */
+ ret
+
+1:
+ /*
+ * If we have a kernelname copy it in
+ */
+ movl BI_KERNELNAME(%ebx),%esi
+ cmpl $0,%esi
+ je 2f /* No kernelname */
+ movl $MAXPATHLEN,%ecx /* Brute force!!! */
+ movl $R(kernelname),%edi
+ cmpb $'/',(%esi) /* Make sure it starts with a slash */
+ je 1f
+ movb $'/',(%edi)
+ incl %edi
+ decl %ecx
+1:
+ cld
+ rep
+ movsb
+
+2:
+ /*
+ * Determine the size of the boot loader's copy of the bootinfo
+ * struct. This is impossible to do properly because old versions
+ * of the struct don't contain a size field and there are 2 old
+ * versions with the same version number.
+ */
+ movl $BI_ENDCOMMON,%ecx /* prepare for sizeless version */
+ testl $RB_BOOTINFO,8(%ebp) /* bi_size (and bootinfo) valid? */
+ je got_bi_size /* no, sizeless version */
+ movl BI_SIZE(%ebx),%ecx
+got_bi_size:
+
+ /*
+ * Copy the common part of the bootinfo struct
+ */
+ movl %ebx,%esi
+ movl $R(bootinfo),%edi
+ cmpl $BOOTINFO_SIZE,%ecx
+ jbe got_common_bi_size
+ movl $BOOTINFO_SIZE,%ecx
+got_common_bi_size:
+ cld
+ rep
+ movsb
+
+#ifdef NFS_ROOT
+#ifndef BOOTP_NFSV3
+ /*
+ * If we have a nfs_diskless structure copy it in
+ */
+ movl BI_NFS_DISKLESS(%ebx),%esi
+ cmpl $0,%esi
+ je olddiskboot
+ movl $R(nfs_diskless),%edi
+ movl $NFSDISKLESS_SIZE,%ecx
+ cld
+ rep
+ movsb
+ movl $R(nfs_diskless_valid),%edi
+ movl $1,(%edi)
+#endif
+#endif
+
+ /*
+ * The old style disk boot.
+ * (*btext)(howto, bootdev, cyloffset, esym);
+ * Note that the newer boot code just falls into here to pick
+ * up howto and bootdev, cyloffset and esym are no longer used
+ */
+olddiskboot:
+ movl 8(%ebp),%eax
+ movl %eax,R(boothowto)
+ movl 12(%ebp),%eax
+ movl %eax,R(bootdev)
+
+ ret
+
+
+/**********************************************************************
+ *
+ * Identify the CPU and initialize anything special about it
+ *
+ */
+identify_cpu:
+
+ /* Try to toggle alignment check flag ; does not exist on 386. */
+ pushfl
+ popl %eax
+ movl %eax,%ecx
+ orl $PSL_AC,%eax
+ pushl %eax
+ popfl
+ pushfl
+ popl %eax
+ xorl %ecx,%eax
+ andl $PSL_AC,%eax
+ pushl %ecx
+ popfl
+
+ testl %eax,%eax
+ jnz try486
+
+ /* NexGen CPU does not have aligment check flag. */
+ pushfl
+ movl $0x5555, %eax
+ xorl %edx, %edx
+ movl $2, %ecx
+ clc
+ divl %ecx
+ jz trynexgen
+ popfl
+ movl $CPU_386,R(cpu)
+ jmp 3f
+
+trynexgen:
+ popfl
+ movl $CPU_NX586,R(cpu)
+ movl $0x4778654e,R(cpu_vendor) # store vendor string
+ movl $0x72446e65,R(cpu_vendor+4)
+ movl $0x6e657669,R(cpu_vendor+8)
+ movl $0,R(cpu_vendor+12)
+ jmp 3f
+
+try486: /* Try to toggle identification flag ; does not exist on early 486s. */
+ pushfl
+ popl %eax
+ movl %eax,%ecx
+ xorl $PSL_ID,%eax
+ pushl %eax
+ popfl
+ pushfl
+ popl %eax
+ xorl %ecx,%eax
+ andl $PSL_ID,%eax
+ pushl %ecx
+ popfl
+
+ testl %eax,%eax
+ jnz trycpuid
+ movl $CPU_486,R(cpu)
+
+ /*
+ * Check Cyrix CPU
+ * Cyrix CPUs do not change the undefined flags following
+ * execution of the divide instruction which divides 5 by 2.
+ *
+ * Note: CPUID is enabled on M2, so it passes another way.
+ */
+ pushfl
+ movl $0x5555, %eax
+ xorl %edx, %edx
+ movl $2, %ecx
+ clc
+ divl %ecx
+ jnc trycyrix
+ popfl
+ jmp 3f /* You may use Intel CPU. */
+
+trycyrix:
+ popfl
+ /*
+ * IBM Bluelighting CPU also doesn't change the undefined flags.
+ * Because IBM doesn't disclose the information for Bluelighting
+ * CPU, we couldn't distinguish it from Cyrix's (including IBM
+ * brand of Cyrix CPUs).
+ */
+ movl $0x69727943,R(cpu_vendor) # store vendor string
+ movl $0x736e4978,R(cpu_vendor+4)
+ movl $0x64616574,R(cpu_vendor+8)
+ jmp 3f
+
+trycpuid: /* Use the `cpuid' instruction. */
+ xorl %eax,%eax
+ cpuid # cpuid 0
+ movl %eax,R(cpu_high) # highest capability
+ movl %ebx,R(cpu_vendor) # store vendor string
+ movl %edx,R(cpu_vendor+4)
+ movl %ecx,R(cpu_vendor+8)
+ movb $0,R(cpu_vendor+12)
+
+ movl $1,%eax
+ cpuid # cpuid 1
+ movl %eax,R(cpu_id) # store cpu_id
+ movl %ebx,R(cpu_procinfo) # store cpu_procinfo
+ movl %edx,R(cpu_feature) # store cpu_feature
+ rorl $8,%eax # extract family type
+ andl $15,%eax
+ cmpl $5,%eax
+ jae 1f
+
+ /* less than Pentium ; must be 486 */
+ movl $CPU_486,R(cpu)
+ jmp 3f
+1:
+ /* a Pentium? */
+ cmpl $5,%eax
+ jne 2f
+ movl $CPU_586,R(cpu)
+ jmp 3f
+2:
+ /* Greater than Pentium...call it a Pentium Pro */
+ movl $CPU_686,R(cpu)
+3:
+ ret
+
+/**********************************************************************
+ *
+ * Create the first page directory and its page tables.
+ *
+ */
+
+create_pagetables:
+
+/* Find end of kernel image (rounded up to a page boundary). */
+ movl $R(_end),%esi
+
+/* Include symbols, if any. */
+ movl R(bootinfo+BI_ESYMTAB),%edi
+ testl %edi,%edi
+ je over_symalloc
+ movl %edi,%esi
+ movl $KERNBASE,%edi
+ addl %edi,R(bootinfo+BI_SYMTAB)
+ addl %edi,R(bootinfo+BI_ESYMTAB)
+over_symalloc:
+
+/* If we are told where the end of the kernel space is, believe it. */
+ movl R(bootinfo+BI_KERNEND),%edi
+ testl %edi,%edi
+ je no_kernend
+ movl %edi,%esi
+no_kernend:
+
+ addl $PDRMASK,%esi /* Play conservative for now, and */
+ andl $~PDRMASK,%esi /* ... wrap to next 4M. */
+ movl %esi,R(KERNend) /* save end of kernel */
+ movl %esi,R(physfree) /* next free page is at end of kernel */
+
+/* Allocate Kernel Page Tables */
+ ALLOCPAGES(NKPT)
+ movl %esi,R(KPTphys)
+
+/* Allocate Page Table Directory */
+#ifdef PAE
+ /* XXX only need 32 bytes (easier for now) */
+ ALLOCPAGES(1)
+ movl %esi,R(IdlePDPT)
+#endif
+ ALLOCPAGES(NPGPTD)
+ movl %esi,R(IdlePTD)
+
+/* Allocate UPAGES */
+ ALLOCPAGES(UAREA_PAGES)
+ movl %esi,R(p0upa)
+ addl $KERNBASE, %esi
+ movl %esi, R(proc0uarea)
+
+ ALLOCPAGES(KSTACK_PAGES)
+ movl %esi,R(p0kpa)
+ addl $KERNBASE, %esi
+ movl %esi, R(proc0kstack)
+#if 0
+ ALLOCPAGES(1) /* vm86/bios stack */
+ movl %esi,R(vm86phystk)
+
+ ALLOCPAGES(3) /* pgtable + ext + IOPAGES */
+ movl %esi,R(vm86pa)
+ addl $KERNBASE, %esi
+ movl %esi, R(vm86paddr)
+#endif
+#ifdef SMP
+/* Allocate cpu0's private data page */
+ ALLOCPAGES(1)
+ movl %esi,R(cpu0pp)
+ addl $KERNBASE, %esi
+ movl %esi, R(cpu0prvpage) /* relocated to KVM space */
+
+/* Allocate SMP page table page */
+ ALLOCPAGES(1)
+ movl %esi,R(SMPptpa)
+ addl $KERNBASE, %esi
+ movl %esi, R(SMPpt) /* relocated to KVM space */
+#endif /* SMP */
+
+/* Map page zero read-write so bios32 calls can use it */
+ xorl %eax, %eax
+ movl $PG_RW,%edx
+ movl $1,%ecx
+ fillkptphys(%edx)
+
+/* Map read-only from page 1 to the beginning of the kernel text section */
+ movl $PAGE_SIZE, %eax
+ xorl %edx,%edx
+ movl $R(btext),%ecx
+ addl $PAGE_MASK,%ecx
+ subl %eax,%ecx
+ shrl $PAGE_SHIFT,%ecx
+ fillkptphys(%edx)
+
+/*
+ * Enable PSE and PGE.
+ */
+#ifndef DISABLE_PSE
+ testl $CPUID_PSE, R(cpu_feature)
+ jz 1f
+ movl $PG_PS, R(pseflag)
+ movl %cr4, %eax
+ orl $CR4_PSE, %eax
+ movl %eax, %cr4
+1:
+#endif
+#ifndef DISABLE_PG_G
+ testl $CPUID_PGE, R(cpu_feature)
+ jz 2f
+ movl $PG_G, R(pgeflag)
+ movl %cr4, %eax
+ orl $CR4_PGE, %eax
+ movl %eax, %cr4
+2:
+#endif
+
+/*
+ * Write page tables for the kernel starting at btext and
+ * until the end. Make sure to map read+write. We do this even
+ * if we've enabled PSE above, we'll just switch the corresponding kernel
+ * PDEs before we turn on paging.
+ *
+ * XXX: We waste some pages here in the PSE case! DON'T BLINDLY REMOVE
+ * THIS! SMP needs the page table to be there to map the kernel P==V.
+ */
+ movl $R(btext),%eax
+ addl $PAGE_MASK, %eax
+ andl $~PAGE_MASK, %eax
+ movl $PG_RW,%edx
+ movl R(KERNend),%ecx
+ subl %eax,%ecx
+ shrl $PAGE_SHIFT,%ecx
+ fillkptphys(%edx)
+
+/* Map page directory. */
+ movl R(IdlePTD), %eax
+ movl $NPGPTD, %ecx
+ fillkptphys($PG_RW)
+
+/* Map proc0's UPAGES in the physical way ... */
+ movl R(p0upa), %eax
+ movl $(UAREA_PAGES), %ecx
+ fillkptphys($PG_RW)
+
+/* Map proc0's KSTACK in the physical way ... */
+ movl R(p0kpa), %eax
+ movl $(KSTACK_PAGES), %ecx
+ fillkptphys($PG_RW)
+
+/* Map ISA hole */
+ movl $ISA_HOLE_START, %eax
+ movl $ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
+ fillkptphys($PG_RW)
+#if 0
+/* Map space for the vm86 region */
+ movl R(vm86phystk), %eax
+ movl $4, %ecx
+ fillkptphys($PG_RW)
+
+/* Map page 0 into the vm86 page table */
+ movl $0, %eax
+ movl $0, %ebx
+ movl $1, %ecx
+ fillkpt(R(vm86pa), $PG_RW|PG_U)
+
+/* ...likewise for the ISA hole */
+ movl $ISA_HOLE_START, %eax
+ movl $ISA_HOLE_START>>PAGE_SHIFT, %ebx
+ movl $ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
+ fillkpt(R(vm86pa), $PG_RW|PG_U)
+#endif
+#ifdef SMP
+/* Map cpu0's private page into global kmem (4K @ cpu0prvpage) */
+ movl R(cpu0pp), %eax
+ movl $1, %ecx
+ fillkptphys($PG_RW)
+
+/* Map SMP page table page into global kmem FWIW */
+ movl R(SMPptpa), %eax
+ movl $1, %ecx
+ fillkptphys($PG_RW)
+
+/* Map the private page into the SMP page table */
+ movl R(cpu0pp), %eax
+ movl $0, %ebx /* pte offset = 0 */
+ movl $1, %ecx /* one private page coming right up */
+ fillkpt(R(SMPptpa), $PG_RW)
+
+/* ... and put the page table table in the pde. */
+ movl R(SMPptpa), %eax
+ movl $MPPTDI, %ebx
+ movl $1, %ecx
+ fillkpt(R(IdlePTD), $PG_RW)
+
+/* Fakeup VA for the local apic to allow early traps. */
+ ALLOCPAGES(1)
+ movl %esi, %eax
+ movl $(NPTEPG-1), %ebx /* pte offset = NTEPG-1 */
+ movl $1, %ecx /* one private pt coming right up */
+ fillkpt(R(SMPptpa), $PG_RW)
+#endif /* SMP */
+
+/* install a pde for temporary double map of bottom of VA */
+ movl R(KPTphys), %eax
+ xorl %ebx, %ebx
+ movl $NKPT, %ecx
+ fillkpt(R(IdlePTD), $PG_RW)
+
+/*
+ * For the non-PSE case, install PDEs for PTs covering the kernel.
+ * For the PSE case, do the same, but clobber the ones corresponding
+ * to the kernel (from btext to KERNend) with 4M ('PS') PDEs immediately
+ * after.
+ */
+ movl R(KPTphys), %eax
+ movl $KPTDI, %ebx
+ movl $NKPT, %ecx
+ fillkpt(R(IdlePTD), $PG_RW)
+ cmpl $0,R(pseflag)
+ je done_pde
+
+ movl R(KERNend), %ecx
+ movl $KERNLOAD, %eax
+ subl %eax, %ecx
+ shrl $PDRSHIFT, %ecx
+ movl $(KPTDI+(KERNLOAD/(1 << PDRSHIFT))), %ebx
+ shll $PDESHIFT, %ebx
+ addl R(IdlePTD), %ebx
+ orl $(PG_V|PG_RW|PG_PS), %eax
+1: movl %eax, (%ebx)
+ addl $(1 << PDRSHIFT), %eax
+ addl $PDESIZE, %ebx
+ loop 1b
+
+done_pde:
+/* install a pde recursively mapping page directory as a page table */
+ movl R(IdlePTD), %eax
+ movl $PTDPTDI, %ebx
+ movl $NPGPTD,%ecx
+ fillkpt(R(IdlePTD), $PG_RW)
+
+#ifdef PAE
+ movl R(IdlePTD), %eax
+ xorl %ebx, %ebx
+ movl $NPGPTD, %ecx
+ fillkpt(R(IdlePDPT), $0x0)
+#endif
+
+ ret
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c
new file mode 100644
index 0000000000..ea813b897c
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c
@@ -0,0 +1,2396 @@
+/*-
+ * Copyright (c) 1992 Terrence R. Lambert.
+ * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/machdep.c,v 1.584 2003/12/03 21:12:09 jhb Exp $");
+
+#include "opt_apic.h"
+#include "opt_atalk.h"
+#include "opt_compat.h"
+#include "opt_cpu.h"
+#include "opt_ddb.h"
+#include "opt_inet.h"
+#include "opt_ipx.h"
+#include "opt_isa.h"
+#include "opt_kstack_pages.h"
+#include "opt_maxmem.h"
+#include "opt_msgbuf.h"
+#include "opt_npx.h"
+#include "opt_perfmon.h"
+#include "opt_xen.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sysproto.h>
+#include <sys/signalvar.h>
+#include <sys/imgact.h>
+#include <sys/kdb.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/linker.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/memrange.h>
+#include <sys/mutex.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/reboot.h>
+#include <sys/callout.h>
+#include <sys/msgbuf.h>
+#include <sys/sched.h>
+#include <sys/sysent.h>
+#include <sys/sysctl.h>
+#include <sys/ucontext.h>
+#include <sys/vmmeter.h>
+#include <sys/bus.h>
+#include <sys/eventhandler.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_extern.h>
+
+#include <sys/user.h>
+#include <sys/exec.h>
+#include <sys/cons.h>
+
+#ifdef DDB
+#ifndef KDB
+#error KDB must be enabled in order for DDB to work!
+#endif
+#include <ddb/ddb.h>
+#include <ddb/db_sym.h>
+#endif
+
+#include <net/netisr.h>
+
+#include <machine/cpu.h>
+#include <machine/cputypes.h>
+#include <machine/reg.h>
+#include <machine/clock.h>
+#include <machine/specialreg.h>
+#include <machine/bootinfo.h>
+#include <machine/intr_machdep.h>
+#include <machine/md_var.h>
+#include <machine/pc/bios.h>
+#include <machine/pcb_ext.h> /* pcb.h included via sys/user.h */
+#include <machine/proc.h>
+#ifdef PERFMON
+#include <machine/perfmon.h>
+#endif
+#ifdef SMP
+#include <machine/privatespace.h>
+#include <machine/smp.h>
+#endif
+
+#ifdef DEV_ISA
+#include <i386/isa/icu.h>
+#endif
+
+#include <isa/rtc.h>
+#include <sys/ptrace.h>
+#include <machine/sigframe.h>
+
+
+/* XEN includes */
+#include <machine/hypervisor-ifs.h>
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/xenfunc.h>
+#include <machine/xenvar.h>
+#include <machine/xen_intr.h>
+
+void Xhypervisor_callback(void);
+void failsafe_callback(void);
+
+/***************/
+
+
+/* Sanity check for __curthread() */
+CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
+
+extern void init386(void);
+extern void dblfault_handler(void);
+
+extern void printcpuinfo(void); /* XXX header file */
+extern void finishidentcpu(void);
+extern void panicifcpuunsupported(void);
+extern void initializecpu(void);
+void initvalues(start_info_t *startinfo);
+
+#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
+#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
+
+#if !defined(CPU_ENABLE_SSE) && defined(I686_CPU)
+#define CPU_ENABLE_SSE
+#endif
+#if defined(CPU_DISABLE_SSE)
+#undef CPU_ENABLE_SSE
+#endif
+
+static void cpu_startup(void *);
+static void fpstate_drop(struct thread *td);
+static void get_fpcontext(struct thread *td, mcontext_t *mcp);
+static int set_fpcontext(struct thread *td, const mcontext_t *mcp);
+#ifdef CPU_ENABLE_SSE
+static void set_fpregs_xmm(struct save87 *, struct savexmm *);
+static void fill_fpregs_xmm(struct savexmm *, struct save87 *);
+#endif /* CPU_ENABLE_SSE */
+SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
+
+#ifdef DDB
+extern vm_offset_t ksym_start, ksym_end;
+#endif
+
+int _udatasel, _ucodesel;
+u_int basemem;
+
+start_info_t *xen_start_info;
+unsigned long *xen_phys_machine;
+int xendebug_flags;
+int init_first = 0;
+int cold = 1;
+
+#ifdef COMPAT_43
+static void osendsig(sig_t catcher, int sig, sigset_t *mask, u_long code);
+#endif
+#ifdef COMPAT_FREEBSD4
+static void freebsd4_sendsig(sig_t catcher, int sig, sigset_t *mask,
+ u_long code);
+#endif
+
+long Maxmem = 0;
+
+vm_paddr_t phys_avail[10];
+
+/* must be 2 less so 0 0 can signal end of chunks */
+#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
+
+struct kva_md_info kmi;
+
+static struct trapframe proc0_tf;
+#ifndef SMP
+static struct pcpu __pcpu;
+#endif
+
+static void
+map_range(void *physptr, unsigned long physptrindex,
+ unsigned long physindex, int count, unsigned int flags) {
+ int i;
+ unsigned long pte, ppa;
+ for (i = 0; i < count; i++) {
+ pte = ((unsigned long)physptr) + (physptrindex << 2) + (i << 2);
+ ppa = (PTOM(physindex + i) << PAGE_SHIFT) | flags | PG_V | PG_A;
+ xpq_queue_pt_update((pt_entry_t *)pte, ppa);
+ }
+ mcl_flush_queue();
+}
+
+struct mem_range_softc mem_range_softc;
+
+static void
+cpu_startup(void *dummy)
+{
+ /*
+ * Good {morning,afternoon,evening,night}.
+ */
+ /* XXX need to write clock driver */
+ startrtclock();
+
+ printcpuinfo();
+ panicifcpuunsupported();
+#ifdef PERFMON
+ perfmon_init();
+#endif
+ printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem),
+ ptoa((uintmax_t)Maxmem) / 1048576);
+ /*
+ * Display any holes after the first chunk of extended memory.
+ */
+ if (bootverbose) {
+ int indx;
+
+ printf("Physical memory chunk(s):\n");
+ for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
+ vm_paddr_t size;
+
+ size = phys_avail[indx + 1] - phys_avail[indx];
+ printf(
+ "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
+ (uintmax_t)phys_avail[indx],
+ (uintmax_t)phys_avail[indx + 1] - 1,
+ (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
+ }
+ }
+
+ vm_ksubmap_init(&kmi);
+
+ printf("avail memory = %ju (%ju MB)\n",
+ ptoa((uintmax_t)cnt.v_free_count),
+ ptoa((uintmax_t)cnt.v_free_count) / 1048576);
+
+ /*
+ * Set up buffers, so they can be used to read disk labels.
+ */
+ bufinit();
+ vm_pager_bufferinit();
+
+ cpu_setregs();
+
+}
+
+/*
+ * Send an interrupt to process.
+ *
+ * Stack is set up to allow sigcode stored
+ * at top to call routine, followed by kcall
+ * to sigreturn routine below. After sigreturn
+ * resets the signal mask, the stack, and the
+ * frame pointer, it returns to the user
+ * specified pc, psl.
+ */
+#ifdef COMPAT_43
+static void
+osendsig(catcher, sig, mask, code)
+ sig_t catcher;
+ int sig;
+ sigset_t *mask;
+ u_long code;
+{
+ struct osigframe sf, *fp;
+ struct proc *p;
+ struct thread *td;
+ struct sigacts *psp;
+ struct trapframe *regs;
+ int oonstack;
+
+ td = curthread;
+ p = td->td_proc;
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ psp = p->p_sigacts;
+ mtx_assert(&psp->ps_mtx, MA_OWNED);
+ regs = td->td_frame;
+ oonstack = sigonstack(regs->tf_esp);
+
+ /* Allocate space for the signal handler context. */
+ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
+ SIGISMEMBER(psp->ps_sigonstack, sig)) {
+ fp = (struct osigframe *)(td->td_sigstk.ss_sp +
+ td->td_sigstk.ss_size - sizeof(struct osigframe));
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+ td->td_sigstk.ss_flags |= SS_ONSTACK;
+#endif
+ } else
+ fp = (struct osigframe *)regs->tf_esp - 1;
+
+ /* Translate the signal if appropriate. */
+ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
+ sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
+
+ /* Build the argument list for the signal handler. */
+ sf.sf_signum = sig;
+ sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
+ if (SIGISMEMBER(psp->ps_siginfo, sig)) {
+ /* Signal handler installed with SA_SIGINFO. */
+ sf.sf_arg2 = (register_t)&fp->sf_siginfo;
+ sf.sf_siginfo.si_signo = sig;
+ sf.sf_siginfo.si_code = code;
+ sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
+ } else {
+ /* Old FreeBSD-style arguments. */
+ sf.sf_arg2 = code;
+ sf.sf_addr = regs->tf_err;
+ sf.sf_ahu.sf_handler = catcher;
+ }
+ mtx_unlock(&psp->ps_mtx);
+ PROC_UNLOCK(p);
+
+ /* Save most if not all of trap frame. */
+ sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
+ sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
+ sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
+ sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
+ sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
+ sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
+ sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
+ sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
+ sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
+ sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
+ sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
+ sf.sf_siginfo.si_sc.sc_gs = rgs();
+ sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
+
+ /* Build the signal context to be used by osigreturn(). */
+ sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
+ SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
+ sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
+ sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
+ sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
+ sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
+ sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
+ sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
+
+ /*
+ * Copy the sigframe out to the user's stack.
+ */
+ if (copyout(&sf, fp, sizeof(*fp)) != 0) {
+#ifdef DEBUG
+ printf("process %ld has trashed its stack\n", (long)p->p_pid);
+#endif
+ PROC_LOCK(p);
+ sigexit(td, SIGILL);
+ }
+
+ regs->tf_esp = (int)fp;
+ regs->tf_eip = PS_STRINGS - szosigcode;
+ regs->tf_eflags &= ~PSL_T;
+ regs->tf_cs = _ucodesel;
+ regs->tf_ds = _udatasel;
+ regs->tf_es = _udatasel;
+ regs->tf_fs = _udatasel;
+ load_gs(_udatasel);
+ regs->tf_ss = _udatasel;
+ PROC_LOCK(p);
+ mtx_lock(&psp->ps_mtx);
+}
+#endif /* COMPAT_43 */
+
+#ifdef COMPAT_FREEBSD4
+static void
+freebsd4_sendsig(catcher, sig, mask, code)
+ sig_t catcher;
+ int sig;
+ sigset_t *mask;
+ u_long code;
+{
+ struct sigframe4 sf, *sfp;
+ struct proc *p;
+ struct thread *td;
+ struct sigacts *psp;
+ struct trapframe *regs;
+ int oonstack;
+
+ td = curthread;
+ p = td->td_proc;
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ psp = p->p_sigacts;
+ mtx_assert(&psp->ps_mtx, MA_OWNED);
+ regs = td->td_frame;
+ oonstack = sigonstack(regs->tf_esp);
+
+ /* Save user context. */
+ bzero(&sf, sizeof(sf));
+ sf.sf_uc.uc_sigmask = *mask;
+ sf.sf_uc.uc_stack = td->td_sigstk;
+ sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
+ ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
+ sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
+ sf.sf_uc.uc_mcontext.mc_gs = rgs();
+ bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
+
+ /* Allocate space for the signal handler context. */
+ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
+ SIGISMEMBER(psp->ps_sigonstack, sig)) {
+ sfp = (struct sigframe4 *)(td->td_sigstk.ss_sp +
+ td->td_sigstk.ss_size - sizeof(struct sigframe4));
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+ td->td_sigstk.ss_flags |= SS_ONSTACK;
+#endif
+ } else
+ sfp = (struct sigframe4 *)regs->tf_esp - 1;
+
+ /* Translate the signal if appropriate. */
+ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
+ sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
+
+ /* Build the argument list for the signal handler. */
+ sf.sf_signum = sig;
+ sf.sf_ucontext = (register_t)&sfp->sf_uc;
+ if (SIGISMEMBER(psp->ps_siginfo, sig)) {
+ /* Signal handler installed with SA_SIGINFO. */
+ sf.sf_siginfo = (register_t)&sfp->sf_si;
+ sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
+
+ /* Fill in POSIX parts */
+ sf.sf_si.si_signo = sig;
+ sf.sf_si.si_code = code;
+ sf.sf_si.si_addr = (void *)regs->tf_err;
+ } else {
+ /* Old FreeBSD-style arguments. */
+ sf.sf_siginfo = code;
+ sf.sf_addr = regs->tf_err;
+ sf.sf_ahu.sf_handler = catcher;
+ }
+ mtx_unlock(&psp->ps_mtx);
+ PROC_UNLOCK(p);
+
+ /*
+ * Copy the sigframe out to the user's stack.
+ */
+ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
+#ifdef DEBUG
+ printf("process %ld has trashed its stack\n", (long)p->p_pid);
+#endif
+ PROC_LOCK(p);
+ sigexit(td, SIGILL);
+ }
+
+ regs->tf_esp = (int)sfp;
+ regs->tf_eip = PS_STRINGS - szfreebsd4_sigcode;
+ regs->tf_eflags &= ~PSL_T;
+ regs->tf_cs = _ucodesel;
+ regs->tf_ds = _udatasel;
+ regs->tf_es = _udatasel;
+ regs->tf_fs = _udatasel;
+ regs->tf_ss = _udatasel;
+ PROC_LOCK(p);
+ mtx_lock(&psp->ps_mtx);
+}
+#endif /* COMPAT_FREEBSD4 */
+
+void
+sendsig(catcher, sig, mask, code)
+ sig_t catcher;
+ int sig;
+ sigset_t *mask;
+ u_long code;
+{
+ struct sigframe sf, *sfp;
+ struct proc *p;
+ struct thread *td;
+ struct sigacts *psp;
+ char *sp;
+ struct trapframe *regs;
+ int oonstack;
+
+ td = curthread;
+ p = td->td_proc;
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ psp = p->p_sigacts;
+ mtx_assert(&psp->ps_mtx, MA_OWNED);
+#ifdef COMPAT_FREEBSD4
+ if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
+ freebsd4_sendsig(catcher, sig, mask, code);
+ return;
+ }
+#endif
+#ifdef COMPAT_43
+ if (SIGISMEMBER(psp->ps_osigset, sig)) {
+ osendsig(catcher, sig, mask, code);
+ return;
+ }
+#endif
+ regs = td->td_frame;
+ oonstack = sigonstack(regs->tf_esp);
+
+ /* Save user context. */
+ bzero(&sf, sizeof(sf));
+ sf.sf_uc.uc_sigmask = *mask;
+ sf.sf_uc.uc_stack = td->td_sigstk;
+ sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
+ ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
+ sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
+ sf.sf_uc.uc_mcontext.mc_gs = rgs();
+ bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
+ sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
+ get_fpcontext(td, &sf.sf_uc.uc_mcontext);
+ fpstate_drop(td);
+
+ /* Allocate space for the signal handler context. */
+ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
+ SIGISMEMBER(psp->ps_sigonstack, sig)) {
+ sp = td->td_sigstk.ss_sp +
+ td->td_sigstk.ss_size - sizeof(struct sigframe);
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+ td->td_sigstk.ss_flags |= SS_ONSTACK;
+#endif
+ } else
+ sp = (char *)regs->tf_esp - sizeof(struct sigframe);
+ /* Align to 16 bytes. */
+ sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
+
+ /* Translate the signal if appropriate. */
+ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
+ sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
+
+ /* Build the argument list for the signal handler. */
+ sf.sf_signum = sig;
+ sf.sf_ucontext = (register_t)&sfp->sf_uc;
+ if (SIGISMEMBER(psp->ps_siginfo, sig)) {
+ /* Signal handler installed with SA_SIGINFO. */
+ sf.sf_siginfo = (register_t)&sfp->sf_si;
+ sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
+
+ /* Fill in POSIX parts */
+ sf.sf_si.si_signo = sig;
+ sf.sf_si.si_code = code;
+ sf.sf_si.si_addr = (void *)regs->tf_err;
+ } else {
+ /* Old FreeBSD-style arguments. */
+ sf.sf_siginfo = code;
+ sf.sf_addr = regs->tf_err;
+ sf.sf_ahu.sf_handler = catcher;
+ }
+ mtx_unlock(&psp->ps_mtx);
+ PROC_UNLOCK(p);
+ /*
+ * Copy the sigframe out to the user's stack.
+ */
+ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
+#ifdef DEBUG
+ printf("process %ld has trashed its stack\n", (long)p->p_pid);
+#endif
+ PROC_LOCK(p);
+ sigexit(td, SIGILL);
+ }
+
+ regs->tf_esp = (int)sfp;
+ regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
+ regs->tf_eflags &= ~PSL_T;
+ regs->tf_cs = _ucodesel;
+ regs->tf_ds = _udatasel;
+ regs->tf_es = _udatasel;
+ regs->tf_fs = _udatasel;
+ regs->tf_ss = _udatasel;
+ PROC_LOCK(p);
+ mtx_lock(&psp->ps_mtx);
+}
+
+/*
+ * Build siginfo_t for SA thread
+ */
+void
+cpu_thread_siginfo(int sig, u_long code, siginfo_t *si)
+{
+ struct proc *p;
+ struct thread *td;
+
+ td = curthread;
+ p = td->td_proc;
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+
+ bzero(si, sizeof(*si));
+ si->si_signo = sig;
+ si->si_code = code;
+ si->si_addr = (void *)td->td_frame->tf_err;
+ /* XXXKSE fill other fields */
+}
+
+/*
+ * System call to cleanup state after a signal
+ * has been taken. Reset signal mask and
+ * stack state from context left by sendsig (above).
+ * Return to previous pc and psl as specified by
+ * context left by sendsig. Check carefully to
+ * make sure that the user has not modified the
+ * state to gain improper privileges.
+ *
+ * MPSAFE
+ */
+#ifdef COMPAT_43
+int
+osigreturn(td, uap)
+ struct thread *td;
+ struct osigreturn_args /* {
+ struct osigcontext *sigcntxp;
+ } */ *uap;
+{
+ struct osigcontext sc;
+ struct trapframe *regs;
+ struct osigcontext *scp;
+ struct proc *p = td->td_proc;
+ int eflags, error;
+
+ regs = td->td_frame;
+ error = copyin(uap->sigcntxp, &sc, sizeof(sc));
+ if (error != 0)
+ return (error);
+ scp = &sc;
+ eflags = scp->sc_ps;
+ /*
+ * Don't allow users to change privileged or reserved flags.
+ */
+ /*
+ * XXX do allow users to change the privileged flag PSL_RF.
+ * The cpu sets PSL_RF in tf_eflags for faults. Debuggers
+ * should sometimes set it there too. tf_eflags is kept in
+ * the signal context during signal handling and there is no
+ * other place to remember it, so the PSL_RF bit may be
+ * corrupted by the signal handler without us knowing.
+ * Corruption of the PSL_RF bit at worst causes one more or
+ * one less debugger trap, so allowing it is fairly harmless.
+ */
+ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
+ return (EINVAL);
+ }
+
+ /*
+ * Don't allow users to load a valid privileged %cs. Let the
+ * hardware check for invalid selectors, excess privilege in
+ * other selectors, invalid %eip's and invalid %esp's.
+ */
+ if (!CS_SECURE(scp->sc_cs)) {
+ trapsignal(td, SIGBUS, T_PROTFLT);
+ return (EINVAL);
+ }
+ regs->tf_ds = scp->sc_ds;
+ regs->tf_es = scp->sc_es;
+ regs->tf_fs = scp->sc_fs;
+
+ /* Restore remaining registers. */
+ regs->tf_eax = scp->sc_eax;
+ regs->tf_ebx = scp->sc_ebx;
+ regs->tf_ecx = scp->sc_ecx;
+ regs->tf_edx = scp->sc_edx;
+ regs->tf_esi = scp->sc_esi;
+ regs->tf_edi = scp->sc_edi;
+ regs->tf_cs = scp->sc_cs;
+ regs->tf_ss = scp->sc_ss;
+ regs->tf_isp = scp->sc_isp;
+ regs->tf_ebp = scp->sc_fp;
+ regs->tf_esp = scp->sc_sp;
+ regs->tf_eip = scp->sc_pc;
+ regs->tf_eflags = eflags;
+
+ PROC_LOCK(p);
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+ if (scp->sc_onstack & 1)
+ td->td_sigstk.ss_flags |= SS_ONSTACK;
+ else
+ td->td_sigstk.ss_flags &= ~SS_ONSTACK;
+#endif
+ SIGSETOLD(td->td_sigmask, scp->sc_mask);
+ SIG_CANTMASK(td->td_sigmask);
+ signotify(td);
+ PROC_UNLOCK(p);
+ return (EJUSTRETURN);
+}
+#endif /* COMPAT_43 */
+
+#ifdef COMPAT_FREEBSD4
+/*
+ * MPSAFE
+ */
+int
+freebsd4_sigreturn(td, uap)
+ struct thread *td;
+ struct freebsd4_sigreturn_args /* {
+ const ucontext4 *sigcntxp;
+ } */ *uap;
+{
+ struct ucontext4 uc;
+ struct proc *p = td->td_proc;
+ struct trapframe *regs;
+ const struct ucontext4 *ucp;
+ int cs, eflags, error;
+
+ error = copyin(uap->sigcntxp, &uc, sizeof(uc));
+ if (error != 0)
+ return (error);
+ ucp = &uc;
+ regs = td->td_frame;
+ eflags = ucp->uc_mcontext.mc_eflags;
+ /*
+ * Don't allow users to change privileged or reserved flags.
+ */
+ /*
+ * XXX do allow users to change the privileged flag PSL_RF.
+ * The cpu sets PSL_RF in tf_eflags for faults. Debuggers
+ * should sometimes set it there too. tf_eflags is kept in
+ * the signal context during signal handling and there is no
+ * other place to remember it, so the PSL_RF bit may be
+ * corrupted by the signal handler without us knowing.
+ * Corruption of the PSL_RF bit at worst causes one more or
+ * one less debugger trap, so allowing it is fairly harmless.
+ */
+ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
+ printf("freebsd4_sigreturn: eflags = 0x%x\n", eflags);
+ return (EINVAL);
+ }
+
+ /*
+ * Don't allow users to load a valid privileged %cs. Let the
+ * hardware check for invalid selectors, excess privilege in
+ * other selectors, invalid %eip's and invalid %esp's.
+ */
+ cs = ucp->uc_mcontext.mc_cs;
+ if (!CS_SECURE(cs)) {
+ printf("freebsd4_sigreturn: cs = 0x%x\n", cs);
+ trapsignal(td, SIGBUS, T_PROTFLT);
+ return (EINVAL);
+ }
+
+ bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
+
+ PROC_LOCK(p);
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+ if (ucp->uc_mcontext.mc_onstack & 1)
+ td->td_sigstk.ss_flags |= SS_ONSTACK;
+ else
+ td->td_sigstk.ss_flags &= ~SS_ONSTACK;
+#endif
+
+ td->td_sigmask = ucp->uc_sigmask;
+ SIG_CANTMASK(td->td_sigmask);
+ signotify(td);
+ PROC_UNLOCK(p);
+ return (EJUSTRETURN);
+}
+#endif /* COMPAT_FREEBSD4 */
+
+/*
+ * MPSAFE
+ */
+int
+sigreturn(td, uap)
+ struct thread *td;
+ struct sigreturn_args /* {
+ const __ucontext *sigcntxp;
+ } */ *uap;
+{
+ ucontext_t uc;
+ struct proc *p = td->td_proc;
+ struct trapframe *regs;
+ const ucontext_t *ucp;
+ int cs, eflags, error, ret;
+
+ error = copyin(uap->sigcntxp, &uc, sizeof(uc));
+ if (error != 0)
+ return (error);
+ ucp = &uc;
+ regs = td->td_frame;
+ eflags = ucp->uc_mcontext.mc_eflags;
+ /*
+ * Don't allow users to change privileged or reserved flags.
+ */
+ /*
+ * XXX do allow users to change the privileged flag PSL_RF.
+ * The cpu sets PSL_RF in tf_eflags for faults. Debuggers
+ * should sometimes set it there too. tf_eflags is kept in
+ * the signal context during signal handling and there is no
+ * other place to remember it, so the PSL_RF bit may be
+ * corrupted by the signal handler without us knowing.
+ * Corruption of the PSL_RF bit at worst causes one more or
+ * one less debugger trap, so allowing it is fairly harmless.
+ */
+#if 0
+ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
+ __asm__("int $0x3");
+ printf("sigreturn: eflags = 0x%x\n", eflags);
+ return (EINVAL);
+ }
+#endif
+ /*
+ * Don't allow users to load a valid privileged %cs. Let the
+ * hardware check for invalid selectors, excess privilege in
+ * other selectors, invalid %eip's and invalid %esp's.
+ */
+ cs = ucp->uc_mcontext.mc_cs;
+ if (!CS_SECURE(cs)) {
+ __asm__("int $0x3");
+ printf("sigreturn: cs = 0x%x\n", cs);
+ trapsignal(td, SIGBUS, T_PROTFLT);
+ return (EINVAL);
+ }
+
+ ret = set_fpcontext(td, &ucp->uc_mcontext);
+ if (ret != 0)
+ return (ret);
+ bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
+ PROC_LOCK(p);
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+ if (ucp->uc_mcontext.mc_onstack & 1)
+ td->td_sigstk.ss_flags |= SS_ONSTACK;
+ else
+ td->td_sigstk.ss_flags &= ~SS_ONSTACK;
+#endif
+
+ td->td_sigmask = ucp->uc_sigmask;
+ SIG_CANTMASK(td->td_sigmask);
+ signotify(td);
+ PROC_UNLOCK(p);
+ return (EJUSTRETURN);
+}
+
+/*
+ * Machine dependent boot() routine
+ *
+ * I haven't seen anything to put here yet
+ * Possibly some stuff might be grafted back here from boot()
+ */
+void
+cpu_boot(int howto)
+{
+}
+
+/*
+ * Shutdown the CPU as much as possible
+ */
+void
+cpu_halt(void)
+{
+ HYPERVISOR_shutdown();
+}
+
+/*
+ * Hook to idle the CPU when possible. In the SMP case we default to
+ * off because a halted cpu will not currently pick up a new thread in the
+ * run queue until the next timer tick. If turned on this will result in
+ * approximately a 4.2% loss in real time performance in buildworld tests
+ * (but improves user and sys times oddly enough), and saves approximately
+ * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3).
+ *
+ * XXX we need to have a cpu mask of idle cpus and generate an IPI or
+ * otherwise generate some sort of interrupt to wake up cpus sitting in HLT.
+ * Then we can have our cake and eat it too.
+ *
+ * XXX I'm turning it on for SMP as well by default for now. It seems to
+ * help lock contention somewhat, and this is critical for HTT. -Peter
+ */
+static int cpu_idle_hlt = 1;
+SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
+ &cpu_idle_hlt, 0, "Idle loop HLT enable");
+
+static void
+cpu_idle_default(void)
+{
+#if 0
+ /*
+ * we must absolutely guarentee that hlt is the
+ * absolute next instruction after sti or we
+ * introduce a timing window.
+ */
+ __asm __volatile("sti; hlt");
+#endif
+ idle_block();
+ enable_intr();
+}
+
+/*
+ * Note that we have to be careful here to avoid a race between checking
+ * sched_runnable() and actually halting. If we don't do this, we may waste
+ * the time between calling hlt and the next interrupt even though there
+ * is a runnable process.
+ */
+void
+cpu_idle(void)
+{
+
+#ifdef SMP
+ if (mp_grab_cpu_hlt())
+ return;
+#endif
+
+ if (cpu_idle_hlt) {
+ disable_intr();
+ if (sched_runnable())
+ enable_intr();
+ else
+ (*cpu_idle_hook)();
+ }
+}
+
+/* Other subsystems (e.g., ACPI) can hook this later. */
+void (*cpu_idle_hook)(void) = cpu_idle_default;
+
+/*
+ * Clear registers on exec
+ */
+void
+exec_setregs(td, entry, stack, ps_strings)
+ struct thread *td;
+ u_long entry;
+ u_long stack;
+ u_long ps_strings;
+{
+ struct trapframe *regs = td->td_frame;
+ struct pcb *pcb = td->td_pcb;
+
+ /* Reset pc->pcb_gs and %gs before possibly invalidating it. */
+ pcb->pcb_gs = _udatasel;
+ load_gs(_udatasel);
+
+ if (td->td_proc->p_md.md_ldt)
+ user_ldt_free(td);
+
+ bzero((char *)regs, sizeof(struct trapframe));
+ regs->tf_eip = entry;
+ regs->tf_esp = stack;
+ regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
+ regs->tf_ss = _udatasel;
+ regs->tf_ds = _udatasel;
+ regs->tf_es = _udatasel;
+ regs->tf_fs = _udatasel;
+ regs->tf_cs = _ucodesel;
+
+ /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */
+ regs->tf_ebx = ps_strings;
+
+ /*
+ * Reset the hardware debug registers if they were in use.
+ * They won't have any meaning for the newly exec'd process.
+ */
+ if (pcb->pcb_flags & PCB_DBREGS) {
+ pcb->pcb_dr0 = 0;
+ pcb->pcb_dr1 = 0;
+ pcb->pcb_dr2 = 0;
+ pcb->pcb_dr3 = 0;
+ pcb->pcb_dr6 = 0;
+ pcb->pcb_dr7 = 0;
+ if (pcb == PCPU_GET(curpcb)) {
+ /*
+ * Clear the debug registers on the running
+ * CPU, otherwise they will end up affecting
+ * the next process we switch to.
+ */
+ reset_dbregs();
+ }
+ pcb->pcb_flags &= ~PCB_DBREGS;
+ }
+
+ /*
+ * Initialize the math emulator (if any) for the current process.
+ * Actually, just clear the bit that says that the emulator has
+ * been initialized. Initialization is delayed until the process
+ * traps to the emulator (if it is done at all) mainly because
+ * emulators don't provide an entry point for initialization.
+ */
+ td->td_pcb->pcb_flags &= ~FP_SOFTFP;
+
+ /* Initialize the npx (if any) for the current process. */
+ /*
+ * XXX the above load_cr0() also initializes it and is a layering
+ * violation if NPX is configured. It drops the npx partially
+ * and this would be fatal if we were interrupted now, and decided
+ * to force the state to the pcb, and checked the invariant
+ * (CR0_TS clear) if and only if PCPU_GET(fpcurthread) != NULL).
+ * ALL of this can happen except the check. The check used to
+ * happen and be fatal later when we didn't complete the drop
+ * before returning to user mode. This should be fixed properly
+ * soon.
+ */
+ fpstate_drop(td);
+
+ /*
+ * XXX - Linux emulator
+ * Make sure sure edx is 0x0 on entry. Linux binaries depend
+ * on it.
+ */
+ td->td_retval[1] = 0;
+}
+
+void
+cpu_setregs(void)
+{
+ /* nothing for Xen to do */
+}
+
+static int
+sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
+ req);
+ if (!error && req->newptr)
+ resettodr();
+ return (error);
+}
+
+SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
+ &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
+
+SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
+ CTLFLAG_RW, &disable_rtc_set, 0, "");
+
+SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo,
+ CTLFLAG_RD, &bootinfo, bootinfo, "");
+
+u_long bootdev; /* not a dev_t - encoding is different */
+SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
+ CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)");
+
+/*
+ * Initialize 386 and configure to run kernel
+ */
+
+/*
+ * Initialize segments & interrupt table
+ */
+
+int _default_ldt;
+union descriptor *gdt; /* global descriptor table */
+static struct gate_descriptor idt0[NIDT];
+struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
+union descriptor *ldt; /* local descriptor table */
+struct region_descriptor r_idt; /* table descriptors */
+
+int private_tss; /* flag indicating private tss */
+
+#if defined(I586_CPU) && !defined(NO_F00F_HACK)
+extern int has_f00f_bug;
+#endif
+
+static struct i386tss dblfault_tss;
+static char dblfault_stack[PAGE_SIZE];
+
+extern struct user *proc0uarea;
+extern vm_offset_t proc0kstack;
+
+
+/* software prototypes -- in more palatable form */
+struct soft_segment_descriptor gdt_segs[] = {
+/* GNULL_SEL 0 Null Descriptor */
+{ 0x0, /* segment base address */
+ 0x0, /* length */
+ 0, /* segment type */
+ SEL_KPL, /* segment descriptor priority level */
+ 0, /* segment descriptor present */
+ 0, 0,
+ 0, /* default 32 vs 16 bit size */
+ 0 /* limit granularity (byte/page units)*/ },
+/* GCODE_SEL 1 Code Descriptor for kernel */
+{ 0x0, /* segment base address */
+ 0x0, /* length - all address space */
+ 0, /* segment type */
+ 0, /* segment descriptor priority level */
+ 0, /* segment descriptor present */
+ 0, 0,
+ 0, /* default 32 vs 16 bit size */
+ 0 /* limit granularity (byte/page units)*/ },
+
+/* GDATA_SEL 2 Data Descriptor for kernel */
+{ 0x0, /* segment base address */
+ 0x0, /* length - all address space */
+ 0, /* segment type */
+ 0, /* segment descriptor priority level */
+ 0, /* segment descriptor present */
+ 0, 0,
+ 0, /* default 32 vs 16 bit size */
+ 0 /* limit granularity (byte/page units)*/ },
+
+/* GPRIV_SEL 3 SMP Per-Processor Private Data Descriptor */
+{ 0x0, /* segment base address */
+ 0xfffff, /* length - all address space */
+ SDT_MEMRWA, /* segment type */
+ SEL_KPL, /* segment descriptor priority level */
+ 1, /* segment descriptor present */
+ 0, 0,
+ 1, /* default 32 vs 16 bit size */
+ 1 /* limit granularity (byte/page units)*/ },
+#if 0
+/* GPROC0_SEL 4 Proc 0 Tss Descriptor */
+{
+ 0x0, /* segment base address */
+ sizeof(struct i386tss)-1,/* length */
+ SDT_SYS386TSS, /* segment type */
+ 0, /* segment descriptor priority level */
+ 1, /* segment descriptor present */
+ 0, 0,
+ 0, /* unused - default 32 vs 16 bit size */
+ 0 /* limit granularity (byte/page units)*/ },
+/* GLDT_SEL 5 LDT Descriptor */
+{ (int) ldt, /* segment base address */
+ sizeof(ldt)-1, /* length - all address space */
+ SDT_SYSLDT, /* segment type */
+ SEL_UPL, /* segment descriptor priority level */
+ 1, /* segment descriptor present */
+ 0, 0,
+ 0, /* unused - default 32 vs 16 bit size */
+ 0 /* limit granularity (byte/page units)*/ },
+/* GUSERLDT_SEL 6 User LDT Descriptor per process */
+{ (int) ldt, /* segment base address */
+ (512 * sizeof(union descriptor)-1), /* length */
+ SDT_SYSLDT, /* segment type */
+ 0, /* segment descriptor priority level */
+ 1, /* segment descriptor present */
+ 0, 0,
+ 0, /* unused - default 32 vs 16 bit size */
+ 0 /* limit granularity (byte/page units)*/ },
+/* GTGATE_SEL 7 Null Descriptor - Placeholder */
+{ 0x0, /* segment base address */
+ 0x0, /* length - all address space */
+ 0, /* segment type */
+ 0, /* segment descriptor priority level */
+ 0, /* segment descriptor present */
+ 0, 0,
+ 0, /* default 32 vs 16 bit size */
+ 0 /* limit granularity (byte/page units)*/ },
+/* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
+{ 0x400, /* segment base address */
+ 0xfffff, /* length */
+ SDT_MEMRWA, /* segment type */
+ 0, /* segment descriptor priority level */
+ 1, /* segment descriptor present */
+ 0, 0,
+ 1, /* default 32 vs 16 bit size */
+ 1 /* limit granularity (byte/page units)*/ },
+/* GPANIC_SEL 9 Panic Tss Descriptor */
+{ (int) &dblfault_tss, /* segment base address */
+ sizeof(struct i386tss)-1,/* length - all address space */
+ SDT_SYS386TSS, /* segment type */
+ 0, /* segment descriptor priority level */
+ 1, /* segment descriptor present */
+ 0, 0,
+ 0, /* unused - default 32 vs 16 bit size */
+ 0 /* limit granularity (byte/page units)*/ },
+/* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */
+{ 0, /* segment base address (overwritten) */
+ 0xfffff, /* length */
+ SDT_MEMERA, /* segment type */
+ 0, /* segment descriptor priority level */
+ 1, /* segment descriptor present */
+ 0, 0,
+ 0, /* default 32 vs 16 bit size */
+ 1 /* limit granularity (byte/page units)*/ },
+/* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */
+{ 0, /* segment base address (overwritten) */
+ 0xfffff, /* length */
+ SDT_MEMERA, /* segment type */
+ 0, /* segment descriptor priority level */
+ 1, /* segment descriptor present */
+ 0, 0,
+ 0, /* default 32 vs 16 bit size */
+ 1 /* limit granularity (byte/page units)*/ },
+/* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */
+{ 0, /* segment base address (overwritten) */
+ 0xfffff, /* length */
+ SDT_MEMRWA, /* segment type */
+ 0, /* segment descriptor priority level */
+ 1, /* segment descriptor present */
+ 0, 0,
+ 1, /* default 32 vs 16 bit size */
+ 1 /* limit granularity (byte/page units)*/ },
+/* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */
+{ 0, /* segment base address (overwritten) */
+ 0xfffff, /* length */
+ SDT_MEMRWA, /* segment type */
+ 0, /* segment descriptor priority level */
+ 1, /* segment descriptor present */
+ 0, 0,
+ 0, /* default 32 vs 16 bit size */
+ 1 /* limit granularity (byte/page units)*/ },
+/* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */
+{ 0, /* segment base address (overwritten) */
+ 0xfffff, /* length */
+ SDT_MEMRWA, /* segment type */
+ 0, /* segment descriptor priority level */
+ 1, /* segment descriptor present */
+ 0, 0,
+ 0, /* default 32 vs 16 bit size */
+ 1 /* limit granularity (byte/page units)*/ },
+#endif
+};
+
+static struct soft_segment_descriptor ldt_segs[] = {
+ /* Null Descriptor - overwritten by call gate */
+{ 0x0, /* segment base address */
+ 0x0, /* length - all address space */
+ 0, /* segment type */
+ 0, /* segment descriptor priority level */
+ 0, /* segment descriptor present */
+ 0, 0,
+ 0, /* default 32 vs 16 bit size */
+ 0 /* limit granularity (byte/page units)*/ },
+ /* Null Descriptor - overwritten by call gate */
+{ 0x0, /* segment base address */
+ 0x0, /* length - all address space */
+ 0, /* segment type */
+ 0, /* segment descriptor priority level */
+ 0, /* segment descriptor present */
+ 0, 0,
+ 0, /* default 32 vs 16 bit size */
+ 0 /* limit granularity (byte/page units)*/ },
+ /* Null Descriptor - overwritten by call gate */
+{ 0x0, /* segment base address */
+ 0x0, /* length - all address space */
+ 0, /* segment type */
+ 0, /* segment descriptor priority level */
+ 0, /* segment descriptor present */
+ 0, 0,
+ 0, /* default 32 vs 16 bit size */
+ 0 /* limit granularity (byte/page units)*/ },
+ /* Code Descriptor for user */
+{ 0x0, /* segment base address */
+ 0xfffff, /* length - all address space */
+ SDT_MEMERA, /* segment type */
+ SEL_UPL, /* segment descriptor priority level */
+ 1, /* segment descriptor present */
+ 0, 0,
+ 1, /* default 32 vs 16 bit size */
+ 1 /* limit granularity (byte/page units)*/ },
+ /* Null Descriptor - overwritten by call gate */
+{ 0x0, /* segment base address */
+ 0x0, /* length - all address space */
+ 0, /* segment type */
+ 0, /* segment descriptor priority level */
+ 0, /* segment descriptor present */
+ 0, 0,
+ 0, /* default 32 vs 16 bit size */
+ 0 /* limit granularity (byte/page units)*/ },
+ /* Data Descriptor for user */
+{ 0x0, /* segment base address */
+ 0xfffff, /* length - all address space */
+ SDT_MEMRWA, /* segment type */
+ SEL_UPL, /* segment descriptor priority level */
+ 1, /* segment descriptor present */
+ 0, 0,
+ 1, /* default 32 vs 16 bit size */
+ 1 /* limit granularity (byte/page units)*/ },
+};
+
+struct proc_ldt default_proc_ldt;
+
+void
+setidt(idx, func, typ, dpl, selec)
+ int idx;
+ inthand_t *func;
+ int typ;
+ int dpl;
+ int selec;
+{
+ struct gate_descriptor *ip;
+
+ ip = idt + idx;
+ ip->gd_looffset = (int)func;
+ ip->gd_selector = selec;
+ ip->gd_stkcpy = 0;
+ ip->gd_xx = 0;
+ ip->gd_type = typ;
+ ip->gd_dpl = dpl;
+ ip->gd_p = 1;
+ ip->gd_hioffset = ((int)func)>>16 ;
+}
+
+#define IDTVEC(name) __CONCAT(X,name)
+
+extern inthand_t
+ IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
+ IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
+ IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
+ IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
+ IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
+
+#ifdef DDB
+/*
+ * Display the index and function name of any IDT entries that don't use
+ * the default 'rsvd' entry point.
+ */
+DB_SHOW_COMMAND(idt, db_show_idt)
+{
+ struct gate_descriptor *ip;
+ int idx, quit;
+ uintptr_t func;
+
+ ip = idt;
+ db_setup_paging(db_simple_pager, &quit, DB_LINES_PER_PAGE);
+ for (idx = 0, quit = 0; idx < NIDT; idx++) {
+ func = (ip->gd_hioffset << 16 | ip->gd_looffset);
+ if (func != (uintptr_t)&IDTVEC(rsvd)) {
+ db_printf("%3d\t", idx);
+ db_printsym(func, DB_STGY_PROC);
+ db_printf("\n");
+ }
+ ip++;
+ }
+}
+#endif
+
+void
+sdtossd(sd, ssd)
+ struct segment_descriptor *sd;
+ struct soft_segment_descriptor *ssd;
+{
+ ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase;
+ ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
+ ssd->ssd_type = sd->sd_type;
+ ssd->ssd_dpl = sd->sd_dpl;
+ ssd->ssd_p = sd->sd_p;
+ ssd->ssd_def32 = sd->sd_def32;
+ ssd->ssd_gran = sd->sd_gran;
+}
+
+#define PHYSMAP_SIZE (2 * 8)
+
+/*
+ * Populate the (physmap) array with base/bound pairs describing the
+ * available physical memory in the system, then test this memory and
+ * build the phys_avail array describing the actually-available memory.
+ *
+ * If we cannot accurately determine the physical memory map, then use
+ * value from the 0xE801 call, and failing that, the RTC.
+ *
+ * Total memory size may be set by the kernel environment variable
+ * hw.physmem or the compile-time define MAXMEM.
+ *
+ * XXX first should be vm_paddr_t.
+ */
+static void
+getmemsize(void)
+{
+ int i;
+ printf("start_info %p\n", xen_start_info);
+ printf("start_info->nr_pages %ld\n", xen_start_info->nr_pages);
+ Maxmem = xen_start_info->nr_pages - init_first;
+ /* call pmap initialization to make new kernel address space */
+ pmap_bootstrap((init_first)<< PAGE_SHIFT, 0);
+ for (i = 0; i < 10; i++)
+ phys_avail[i] = 0;
+#ifdef MAXMEM
+ if (MAXMEM/4 < Maxmem)
+ Maxmem = MAXMEM/4;
+#endif
+ physmem = Maxmem;
+ avail_end = ptoa(Maxmem) - round_page(MSGBUF_SIZE);
+ phys_avail[0] = init_first << PAGE_SHIFT;
+ phys_avail[1] = avail_end;
+}
+
+extern pt_entry_t *KPTphys;
+extern int kernbase;
+pteinfo_t *pteinfo_list;
+unsigned long *xen_machine_phys = ((unsigned long *)VADDR(1008, 0));
+
+/* Linux infection */
+#define PAGE_OFFSET KERNBASE
+#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
+#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+void
+initvalues(start_info_t *startinfo)
+{
+ int i;
+ xen_start_info = startinfo;
+ xen_phys_machine = (unsigned long *)startinfo->mfn_list;
+ unsigned long tmpindex = ((__pa(xen_start_info->pt_base) >> PAGE_SHIFT) + xen_start_info->nr_pt_frames) + 3 /* number of pages allocated after the pts + 1*/;
+ xendebug_flags = 0xffffffff;
+ /* pre-zero unused mapped pages */
+ bzero((char *)(KERNBASE + (tmpindex << PAGE_SHIFT)), (1024 - tmpindex)*PAGE_SIZE);
+
+ KPTphys = (pt_entry_t *)xpmap_ptom(__pa(startinfo->pt_base + PAGE_SIZE));
+ IdlePTD = (pd_entry_t *)xpmap_ptom(__pa(startinfo->pt_base));
+ XENPRINTF("IdlePTD %p\n", IdlePTD);
+ XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%lx pt_base: 0x%lx "
+ "mod_start: 0x%lx mod_len: 0x%lx\n",
+ xen_start_info->nr_pages, xen_start_info->shared_info,
+ xen_start_info->flags, xen_start_info->pt_base,
+ xen_start_info->mod_start, xen_start_info->mod_len);
+
+ /* Map proc0's UPAGES */
+ proc0uarea = (struct user *)(KERNBASE + (tmpindex << PAGE_SHIFT));
+ tmpindex += UAREA_PAGES;
+
+ /* Map proc0's KSTACK */
+ proc0kstack = KERNBASE + (tmpindex << PAGE_SHIFT);
+ tmpindex += KSTACK_PAGES;
+
+ /* allocate page for gdt */
+ gdt = (union descriptor *)(KERNBASE + (tmpindex << PAGE_SHIFT));
+ tmpindex++;
+
+ /* allocate page for ldt */
+ ldt = (union descriptor *)(KERNBASE + (tmpindex << PAGE_SHIFT));
+ tmpindex++;
+
+#ifdef PMAP_DEBUG
+ pteinfo_list = (pteinfo_t *)(KERNBASE + (tmpindex << PAGE_SHIFT));
+ tmpindex += ((xen_start_info->nr_pages >> 10) + 1)*(1 + XPQ_CALL_DEPTH*XPQ_CALL_COUNT);
+
+ if (tmpindex > 980)
+ __asm__("int3");
+#endif
+ /* unmap remaining pages from initial 4MB chunk */
+ for (i = tmpindex; i%1024 != 0; i++)
+ PT_CLEAR(KERNBASE + (i << PAGE_SHIFT), TRUE);
+
+ /* allocate remainder of NKPT pages */
+ map_range(IdlePTD, KPTDI + 1, tmpindex, NKPT-1, PG_U | PG_M | PG_RW);
+ tmpindex += NKPT-1;
+ map_range(IdlePTD, PTDPTDI, __pa(xen_start_info->pt_base) >> PAGE_SHIFT, 1, 0);
+
+ xpq_queue_pt_update(KPTphys + tmpindex, xen_start_info->shared_info | PG_A | PG_V | PG_RW);
+ HYPERVISOR_shared_info = (shared_info_t *)(KERNBASE + (tmpindex << PAGE_SHIFT));
+ tmpindex++;
+
+ mcl_flush_queue();
+ HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = (unsigned long)xen_phys_machine;
+ HYPERVISOR_shared_info->arch.mfn_to_pfn_start = (unsigned long)xen_machine_phys;
+
+ init_first = tmpindex;
+
+}
+
+void
+init386(void)
+{
+ int gsel_tss, metadata_missing, off, x, error;
+ struct pcpu *pc;
+ trap_info_t trap_table[] = {
+ { 0, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
+ { 1, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
+ { 3, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
+ { 4, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
+ /* This is UPL on Linux and KPL on BSD */
+ { 5, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
+ { 6, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
+ { 7, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
+ /*
+ * { 8, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
+ * no handler for double fault
+ */
+ { 9, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
+ {10, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
+ {11, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
+ {12, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
+ {13, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
+ {14, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
+ {15, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
+ {16, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
+ {17, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
+ {18, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
+ {19, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
+ {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
+ { 0, 0, 0, 0 }
+ };
+ proc0.p_uarea = proc0uarea;
+ thread0.td_kstack = proc0kstack;
+ thread0.td_pcb = (struct pcb *)
+ (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
+
+ /*
+ * This may be done better later if it gets more high level
+ * components in it. If so just link td->td_proc here.
+ */
+ proc_linkup(&proc0, &ksegrp0, &thread0);
+
+ metadata_missing = 0;
+ if (xen_start_info->mod_start)
+ preload_metadata = (caddr_t)xen_start_info->mod_start;
+ else
+ metadata_missing = 1;
+
+ /* XXX - temporary hack */
+ preload_metadata = (caddr_t)0;
+ /* XXX */
+
+ if (envmode == 1)
+ kern_envp = static_env;
+ else if ((caddr_t)xen_start_info->cmd_line)
+ kern_envp = xen_setbootenv((caddr_t)xen_start_info->cmd_line);
+
+ boothowto |= xen_boothowto(kern_envp);
+
+ if (boothowto & RB_GDB_PAUSE)
+ __asm__("int $0x3;");
+
+ /* Init basic tunables, hz etc */
+ init_param1();
+ /*
+ * make gdt memory segments, the code segment goes up to end of the
+ * page with etext in it, the data segment goes to the end of
+ * the address space
+ */
+#if 0
+ /*
+ * XEN occupies the upper 64MB of virtual address space
+ * At its base it manages an array mapping machine page frames
+ * to physical page frames - hence we need to be able to
+ * access 4GB - (64MB - 4MB + 64k)
+ */
+ gdt_segs[GCODE_SEL].ssd_limit = atop(0 - ((1 << 26) - (1 << 22) + (1 << 16)));
+ gdt_segs[GDATA_SEL].ssd_limit = atop(0 - ((1 << 26) - (1 << 22) + (1 << 16)));
+#endif
+#ifdef SMP
+ pc = &SMP_prvspace[0].pcpu;
+ gdt_segs[GPRIV_SEL].ssd_limit =
+ atop(sizeof(struct privatespace) - 1);
+#else
+ pc = &__pcpu;
+ gdt_segs[GPRIV_SEL].ssd_limit =
+ atop(sizeof(struct pcpu) - 1);
+#endif
+ gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
+ gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
+ for (x = 0; x < NGDT; x++)
+ ssdtosd(&gdt_segs[x], &gdt[x].sd);
+ /* re-map GDT read-only */
+ {
+ unsigned long gdtindex = (((unsigned long)gdt - KERNBASE) >> PAGE_SHIFT);
+ unsigned long gdtphys = PTOM(gdtindex);
+ map_range(KPTphys, gdtindex, gdtindex, 1, 0);
+ mcl_flush_queue();
+ if (HYPERVISOR_set_gdt(&gdtphys, LAST_RESERVED_GDT_ENTRY + 1)) {
+ panic("set_gdt failed\n");
+ }
+ lgdt_finish();
+ }
+
+ if ((error = HYPERVISOR_set_trap_table(trap_table)) != 0) {
+ panic("set_trap_table failed - error %d\n", error);
+ }
+ if ((error = HYPERVISOR_set_fast_trap(0x80)) != 0) {
+ panic("set_fast_trap failed - error %d\n", error);
+ }
+ HYPERVISOR_set_callbacks(GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback,
+ GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback);
+
+
+
+ pcpu_init(pc, 0, sizeof(struct pcpu));
+ PCPU_SET(prvspace, pc);
+ PCPU_SET(curthread, &thread0);
+ PCPU_SET(curpcb, thread0.td_pcb);
+ PCPU_SET(trap_nesting, 0);
+ PCPU_SET(pdir, (unsigned long)IdlePTD);
+ /*
+ * Initialize mutexes.
+ *
+ */
+ mutex_init();
+
+ /* make ldt memory segments */
+ /*
+ * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it
+ * should be spelled ...MAX_USER...
+ */
+ ldt_segs[LUCODE_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
+ ldt_segs[LUDATA_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
+ for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
+ ssdtosd(&ldt_segs[x], &ldt[x].sd);
+ default_proc_ldt.ldt_base = (caddr_t)ldt;
+ default_proc_ldt.ldt_len = 6;
+ _default_ldt = (int)&default_proc_ldt;
+ PCPU_SET(currentldt, _default_ldt);
+ {
+ unsigned long ldtindex = (((unsigned long)ldt - KERNBASE) >> PAGE_SHIFT);
+ map_range(KPTphys, ldtindex, ldtindex, 1, 0);
+ mcl_flush_queue();
+ xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof ldt_segs[0]));
+ }
+
+ /*
+ * Initialize the console before we print anything out.
+ */
+ cninit();
+ if (metadata_missing)
+ printf("WARNING: loader(8) metadata is missing!\n");
+
+#ifdef DDB
+ ksym_start = bootinfo.bi_symtab;
+ ksym_end = bootinfo.bi_esymtab;
+#endif
+ kdb_init();
+#ifdef KDB
+ if (boothowto & RB_KDB)
+ kdb_enter("Boot flags requested debugger");
+#endif
+
+ finishidentcpu(); /* Final stage of CPU initialization */
+ setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ initializecpu(); /* Initialize CPU registers */
+
+ /* make an initial tss so cpu can get interrupt stack on syscall! */
+ /* Note: -16 is so we can grow the trapframe if we came from vm86 */
+ PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
+ KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16);
+ PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
+ gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
+ private_tss = 0;
+ PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
+ PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
+ PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
+ HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), PCPU_GET(common_tss.tss_esp0));
+
+ dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
+ dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
+ dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
+ dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
+
+ dblfault_tss.tss_cr3 = (int)IdlePTD;
+ dblfault_tss.tss_eip = (int)dblfault_handler;
+ dblfault_tss.tss_eflags = PSL_KERNEL;
+ dblfault_tss.tss_ds = dblfault_tss.tss_es =
+ dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
+ dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
+ dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
+ dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
+
+ getmemsize();
+ init_param2(physmem);
+ /* now running on new page tables, configured,and u/iom is accessible */
+ /* Map the message buffer. */
+ for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
+ pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
+ PT_UPDATES_FLUSH();
+
+ /* safe to enable xen page queue locking */
+ xpq_init();
+
+ msgbufinit(msgbufp, MSGBUF_SIZE);
+ /* XXX KMM I don't think we need call gates */
+#if 0
+ printf("modify ldt\n");
+ /* make a call gate to reenter kernel with */
+ gdp = &ldt[LSYS5CALLS_SEL].gd;
+
+ x = (int) &IDTVEC(lcall_syscall);
+ gdp->gd_looffset = x;
+ gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
+ gdp->gd_stkcpy = 1;
+ gdp->gd_type = SDT_SYS386CGT;
+ gdp->gd_dpl = SEL_UPL;
+ gdp->gd_p = 1;
+ gdp->gd_hioffset = x >> 16;
+
+ /* XXX does this work? */
+ ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
+ ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
+#endif
+ /* transfer to user mode */
+
+ _ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
+ _udatasel = LSEL(LUDATA_SEL, SEL_UPL);
+
+ /* setup proc 0's pcb */
+ thread0.td_pcb->pcb_flags = 0; /* XXXKSE */
+ thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
+ thread0.td_pcb->pcb_ext = 0;
+ thread0.td_frame = &proc0_tf;
+}
+
+void
+cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
+{
+
+ pcpu->pc_acpi_id = 0xffffffff;
+}
+
+/*
+ * Construct a PCB from a trapframe. This is called from kdb_trap() where
+ * we want to start a backtrace from the function that caused us to enter
+ * the debugger. We have the context in the trapframe, but base the trace
+ * on the PCB. The PCB doesn't have to be perfect, as long as it contains
+ * enough for a backtrace.
+ */
+void
+makectx(struct trapframe *tf, struct pcb *pcb)
+{
+
+ pcb->pcb_edi = tf->tf_edi;
+ pcb->pcb_esi = tf->tf_esi;
+ pcb->pcb_ebp = tf->tf_ebp;
+ pcb->pcb_ebx = tf->tf_ebx;
+ pcb->pcb_eip = tf->tf_eip;
+ pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8;
+}
+
+int
+ptrace_set_pc(struct thread *td, u_long addr)
+{
+
+ td->td_frame->tf_eip = addr;
+ return (0);
+}
+
+int
+ptrace_single_step(struct thread *td)
+{
+ td->td_frame->tf_eflags |= PSL_T;
+ return (0);
+}
+
+int
+ptrace_clear_single_step(struct thread *td)
+{
+ td->td_frame->tf_eflags &= ~PSL_T;
+ return (0);
+}
+
+int
+fill_regs(struct thread *td, struct reg *regs)
+{
+ struct pcb *pcb;
+ struct trapframe *tp;
+
+ tp = td->td_frame;
+ regs->r_fs = tp->tf_fs;
+ regs->r_es = tp->tf_es;
+ regs->r_ds = tp->tf_ds;
+ regs->r_edi = tp->tf_edi;
+ regs->r_esi = tp->tf_esi;
+ regs->r_ebp = tp->tf_ebp;
+ regs->r_ebx = tp->tf_ebx;
+ regs->r_edx = tp->tf_edx;
+ regs->r_ecx = tp->tf_ecx;
+ regs->r_eax = tp->tf_eax;
+ regs->r_eip = tp->tf_eip;
+ regs->r_cs = tp->tf_cs;
+ regs->r_eflags = tp->tf_eflags;
+ regs->r_esp = tp->tf_esp;
+ regs->r_ss = tp->tf_ss;
+ pcb = td->td_pcb;
+ regs->r_gs = pcb->pcb_gs;
+ return (0);
+}
+
+int
+set_regs(struct thread *td, struct reg *regs)
+{
+ struct pcb *pcb;
+ struct trapframe *tp;
+
+ tp = td->td_frame;
+ if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
+ !CS_SECURE(regs->r_cs))
+ return (EINVAL);
+ tp->tf_fs = regs->r_fs;
+ tp->tf_es = regs->r_es;
+ tp->tf_ds = regs->r_ds;
+ tp->tf_edi = regs->r_edi;
+ tp->tf_esi = regs->r_esi;
+ tp->tf_ebp = regs->r_ebp;
+ tp->tf_ebx = regs->r_ebx;
+ tp->tf_edx = regs->r_edx;
+ tp->tf_ecx = regs->r_ecx;
+ tp->tf_eax = regs->r_eax;
+ tp->tf_eip = regs->r_eip;
+ tp->tf_cs = regs->r_cs;
+ tp->tf_eflags = regs->r_eflags;
+ tp->tf_esp = regs->r_esp;
+ tp->tf_ss = regs->r_ss;
+ pcb = td->td_pcb;
+ pcb->pcb_gs = regs->r_gs;
+ return (0);
+}
+
+#ifdef CPU_ENABLE_SSE
+static void
+fill_fpregs_xmm(sv_xmm, sv_87)
+ struct savexmm *sv_xmm;
+ struct save87 *sv_87;
+{
+ register struct env87 *penv_87 = &sv_87->sv_env;
+ register struct envxmm *penv_xmm = &sv_xmm->sv_env;
+ int i;
+
+ bzero(sv_87, sizeof(*sv_87));
+
+ /* FPU control/status */
+ penv_87->en_cw = penv_xmm->en_cw;
+ penv_87->en_sw = penv_xmm->en_sw;
+ penv_87->en_tw = penv_xmm->en_tw;
+ penv_87->en_fip = penv_xmm->en_fip;
+ penv_87->en_fcs = penv_xmm->en_fcs;
+ penv_87->en_opcode = penv_xmm->en_opcode;
+ penv_87->en_foo = penv_xmm->en_foo;
+ penv_87->en_fos = penv_xmm->en_fos;
+
+ /* FPU registers */
+ for (i = 0; i < 8; ++i)
+ sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
+}
+
+static void
+set_fpregs_xmm(sv_87, sv_xmm)
+ struct save87 *sv_87;
+ struct savexmm *sv_xmm;
+{
+ register struct env87 *penv_87 = &sv_87->sv_env;
+ register struct envxmm *penv_xmm = &sv_xmm->sv_env;
+ int i;
+
+ /* FPU control/status */
+ penv_xmm->en_cw = penv_87->en_cw;
+ penv_xmm->en_sw = penv_87->en_sw;
+ penv_xmm->en_tw = penv_87->en_tw;
+ penv_xmm->en_fip = penv_87->en_fip;
+ penv_xmm->en_fcs = penv_87->en_fcs;
+ penv_xmm->en_opcode = penv_87->en_opcode;
+ penv_xmm->en_foo = penv_87->en_foo;
+ penv_xmm->en_fos = penv_87->en_fos;
+
+ /* FPU registers */
+ for (i = 0; i < 8; ++i)
+ sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
+}
+#endif /* CPU_ENABLE_SSE */
+
+int
+fill_fpregs(struct thread *td, struct fpreg *fpregs)
+{
+#ifdef CPU_ENABLE_SSE
+ if (cpu_fxsr) {
+ fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm,
+ (struct save87 *)fpregs);
+ return (0);
+ }
+#endif /* CPU_ENABLE_SSE */
+ bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs);
+ return (0);
+}
+
+int
+set_fpregs(struct thread *td, struct fpreg *fpregs)
+{
+#ifdef CPU_ENABLE_SSE
+ if (cpu_fxsr) {
+ set_fpregs_xmm((struct save87 *)fpregs,
+ &td->td_pcb->pcb_save.sv_xmm);
+ return (0);
+ }
+#endif /* CPU_ENABLE_SSE */
+ bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs);
+ return (0);
+}
+
+/*
+ * Get machine context.
+ */
+int
+get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
+{
+ struct trapframe *tp;
+
+ tp = td->td_frame;
+
+ PROC_LOCK(curthread->td_proc);
+ mcp->mc_onstack = sigonstack(tp->tf_esp);
+ PROC_UNLOCK(curthread->td_proc);
+ mcp->mc_gs = td->td_pcb->pcb_gs;
+ mcp->mc_fs = tp->tf_fs;
+ mcp->mc_es = tp->tf_es;
+ mcp->mc_ds = tp->tf_ds;
+ mcp->mc_edi = tp->tf_edi;
+ mcp->mc_esi = tp->tf_esi;
+ mcp->mc_ebp = tp->tf_ebp;
+ mcp->mc_isp = tp->tf_isp;
+ if (flags & GET_MC_CLEAR_RET) {
+ mcp->mc_eax = 0;
+ mcp->mc_edx = 0;
+ } else {
+ mcp->mc_eax = tp->tf_eax;
+ mcp->mc_edx = tp->tf_edx;
+ }
+ mcp->mc_ebx = tp->tf_ebx;
+ mcp->mc_ecx = tp->tf_ecx;
+ mcp->mc_eip = tp->tf_eip;
+ mcp->mc_cs = tp->tf_cs;
+ mcp->mc_eflags = tp->tf_eflags;
+ mcp->mc_esp = tp->tf_esp;
+ mcp->mc_ss = tp->tf_ss;
+ mcp->mc_len = sizeof(*mcp);
+ get_fpcontext(td, mcp);
+ return (0);
+}
+
+/*
+ * Set machine context.
+ *
+ * However, we don't set any but the user modifiable flags, and we won't
+ * touch the cs selector.
+ */
+int
+set_mcontext(struct thread *td, const mcontext_t *mcp)
+{
+ struct trapframe *tp;
+ int eflags, ret;
+
+ tp = td->td_frame;
+ if (mcp->mc_len != sizeof(*mcp))
+ return (EINVAL);
+ eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
+ (tp->tf_eflags & ~PSL_USERCHANGE);
+ if ((ret = set_fpcontext(td, mcp)) == 0) {
+ tp->tf_fs = mcp->mc_fs;
+ tp->tf_es = mcp->mc_es;
+ tp->tf_ds = mcp->mc_ds;
+ tp->tf_edi = mcp->mc_edi;
+ tp->tf_esi = mcp->mc_esi;
+ tp->tf_ebp = mcp->mc_ebp;
+ tp->tf_ebx = mcp->mc_ebx;
+ tp->tf_edx = mcp->mc_edx;
+ tp->tf_ecx = mcp->mc_ecx;
+ tp->tf_eax = mcp->mc_eax;
+ tp->tf_eip = mcp->mc_eip;
+ tp->tf_eflags = eflags;
+ tp->tf_esp = mcp->mc_esp;
+ tp->tf_ss = mcp->mc_ss;
+ td->td_pcb->pcb_gs = mcp->mc_gs;
+ ret = 0;
+ }
+ return (ret);
+}
+
+static void
+get_fpcontext(struct thread *td, mcontext_t *mcp)
+{
+#ifndef DEV_NPX
+ mcp->mc_fpformat = _MC_FPFMT_NODEV;
+ mcp->mc_ownedfp = _MC_FPOWNED_NONE;
+#else
+ union savefpu *addr;
+
+ /*
+ * XXX mc_fpstate might be misaligned, since its declaration is not
+ * unportabilized using __attribute__((aligned(16))) like the
+ * declaration of struct savemm, and anyway, alignment doesn't work
+ * for auto variables since we don't use gcc's pessimal stack
+ * alignment. Work around this by abusing the spare fields after
+ * mcp->mc_fpstate.
+ *
+ * XXX unpessimize most cases by only aligning when fxsave might be
+ * called, although this requires knowing too much about
+ * npxgetregs()'s internals.
+ */
+ addr = (union savefpu *)&mcp->mc_fpstate;
+ if (td == PCPU_GET(fpcurthread) &&
+#ifdef CPU_ENABLE_SSE
+ cpu_fxsr &&
+#endif
+ ((uintptr_t)(void *)addr & 0xF)) {
+ do
+ addr = (void *)((char *)addr + 4);
+ while ((uintptr_t)(void *)addr & 0xF);
+ }
+ mcp->mc_ownedfp = npxgetregs(td, addr);
+ if (addr != (union savefpu *)&mcp->mc_fpstate) {
+ bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate));
+ bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2));
+ }
+ mcp->mc_fpformat = npxformat();
+#endif
+}
+
+static int
+set_fpcontext(struct thread *td, const mcontext_t *mcp)
+{
+ union savefpu *addr;
+
+ if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
+ return (0);
+ else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
+ mcp->mc_fpformat != _MC_FPFMT_XMM)
+ return (EINVAL);
+ else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE)
+ /* We don't care what state is left in the FPU or PCB. */
+ fpstate_drop(td);
+ else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
+ mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
+ /* XXX align as above. */
+ addr = (union savefpu *)&mcp->mc_fpstate;
+ if (td == PCPU_GET(fpcurthread) &&
+#ifdef CPU_ENABLE_SSE
+ cpu_fxsr &&
+#endif
+ ((uintptr_t)(void *)addr & 0xF)) {
+ do
+ addr = (void *)((char *)addr + 4);
+ while ((uintptr_t)(void *)addr & 0xF);
+ bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate));
+ }
+#ifdef DEV_NPX
+ /*
+ * XXX we violate the dubious requirement that npxsetregs()
+ * be called with interrupts disabled.
+ */
+ npxsetregs(td, addr);
+#endif
+ /*
+ * Don't bother putting things back where they were in the
+ * misaligned case, since we know that the caller won't use
+ * them again.
+ */
+ } else
+ return (EINVAL);
+ return (0);
+}
+
+static void
+fpstate_drop(struct thread *td)
+{
+ register_t s;
+
+ s = intr_disable();
+#ifdef DEV_NPX
+ if (PCPU_GET(fpcurthread) == td)
+ npxdrop();
+#endif
+ /*
+ * XXX force a full drop of the npx. The above only drops it if we
+ * owned it. npxgetregs() has the same bug in the !cpu_fxsr case.
+ *
+ * XXX I don't much like npxgetregs()'s semantics of doing a full
+ * drop. Dropping only to the pcb matches fnsave's behaviour.
+ * We only need to drop to !PCB_INITDONE in sendsig(). But
+ * sendsig() is the only caller of npxgetregs()... perhaps we just
+ * have too many layers.
+ */
+ curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
+ intr_restore(s);
+}
+
+int
+fill_dbregs(struct thread *td, struct dbreg *dbregs)
+{
+ struct pcb *pcb;
+
+ if (td == NULL) {
+ dbregs->dr[0] = rdr0();
+ dbregs->dr[1] = rdr1();
+ dbregs->dr[2] = rdr2();
+ dbregs->dr[3] = rdr3();
+ dbregs->dr[4] = rdr4();
+ dbregs->dr[5] = rdr5();
+ dbregs->dr[6] = rdr6();
+ dbregs->dr[7] = rdr7();
+ } else {
+ pcb = td->td_pcb;
+ dbregs->dr[0] = pcb->pcb_dr0;
+ dbregs->dr[1] = pcb->pcb_dr1;
+ dbregs->dr[2] = pcb->pcb_dr2;
+ dbregs->dr[3] = pcb->pcb_dr3;
+ dbregs->dr[4] = 0;
+ dbregs->dr[5] = 0;
+ dbregs->dr[6] = pcb->pcb_dr6;
+ dbregs->dr[7] = pcb->pcb_dr7;
+ }
+ return (0);
+}
+
+int
+set_dbregs(struct thread *td, struct dbreg *dbregs)
+{
+ struct pcb *pcb;
+ int i;
+ u_int32_t mask1, mask2;
+
+ if (td == NULL) {
+ load_dr0(dbregs->dr[0]);
+ load_dr1(dbregs->dr[1]);
+ load_dr2(dbregs->dr[2]);
+ load_dr3(dbregs->dr[3]);
+ load_dr4(dbregs->dr[4]);
+ load_dr5(dbregs->dr[5]);
+ load_dr6(dbregs->dr[6]);
+ load_dr7(dbregs->dr[7]);
+ } else {
+ /*
+ * Don't let an illegal value for dr7 get set. Specifically,
+ * check for undefined settings. Setting these bit patterns
+ * result in undefined behaviour and can lead to an unexpected
+ * TRCTRAP.
+ */
+ for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8;
+ i++, mask1 <<= 2, mask2 <<= 2)
+ if ((dbregs->dr[7] & mask1) == mask2)
+ return (EINVAL);
+
+ pcb = td->td_pcb;
+
+ /*
+ * Don't let a process set a breakpoint that is not within the
+ * process's address space. If a process could do this, it
+ * could halt the system by setting a breakpoint in the kernel
+ * (if ddb was enabled). Thus, we need to check to make sure
+ * that no breakpoints are being enabled for addresses outside
+ * process's address space, unless, perhaps, we were called by
+ * uid 0.
+ *
+ * XXX - what about when the watched area of the user's
+ * address space is written into from within the kernel
+ * ... wouldn't that still cause a breakpoint to be generated
+ * from within kernel mode?
+ */
+
+ if (suser(td) != 0) {
+ if (dbregs->dr[7] & 0x3) {
+ /* dr0 is enabled */
+ if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
+ return (EINVAL);
+ }
+
+ if (dbregs->dr[7] & (0x3<<2)) {
+ /* dr1 is enabled */
+ if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
+ return (EINVAL);
+ }
+
+ if (dbregs->dr[7] & (0x3<<4)) {
+ /* dr2 is enabled */
+ if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
+ return (EINVAL);
+ }
+
+ if (dbregs->dr[7] & (0x3<<6)) {
+ /* dr3 is enabled */
+ if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
+ return (EINVAL);
+ }
+ }
+
+ pcb->pcb_dr0 = dbregs->dr[0];
+ pcb->pcb_dr1 = dbregs->dr[1];
+ pcb->pcb_dr2 = dbregs->dr[2];
+ pcb->pcb_dr3 = dbregs->dr[3];
+ pcb->pcb_dr6 = dbregs->dr[6];
+ pcb->pcb_dr7 = dbregs->dr[7];
+
+ pcb->pcb_flags |= PCB_DBREGS;
+ }
+
+ return (0);
+}
+
+/*
+ * Return > 0 if a hardware breakpoint has been hit, and the
+ * breakpoint was in user space. Return 0, otherwise.
+ */
+int
+user_dbreg_trap(void)
+{
+ u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
+ u_int32_t bp; /* breakpoint bits extracted from dr6 */
+ int nbp; /* number of breakpoints that triggered */
+ caddr_t addr[4]; /* breakpoint addresses */
+ int i;
+
+ dr7 = rdr7();
+ if ((dr7 & 0x000000ff) == 0) {
+ /*
+ * all GE and LE bits in the dr7 register are zero,
+ * thus the trap couldn't have been caused by the
+ * hardware debug registers
+ */
+ return 0;
+ }
+
+ nbp = 0;
+ dr6 = rdr6();
+ bp = dr6 & 0x0000000f;
+
+ if (!bp) {
+ /*
+ * None of the breakpoint bits are set meaning this
+ * trap was not caused by any of the debug registers
+ */
+ return 0;
+ }
+
+ /*
+ * at least one of the breakpoints were hit, check to see
+ * which ones and if any of them are user space addresses
+ */
+
+ if (bp & 0x01) {
+ addr[nbp++] = (caddr_t)rdr0();
+ }
+ if (bp & 0x02) {
+ addr[nbp++] = (caddr_t)rdr1();
+ }
+ if (bp & 0x04) {
+ addr[nbp++] = (caddr_t)rdr2();
+ }
+ if (bp & 0x08) {
+ addr[nbp++] = (caddr_t)rdr3();
+ }
+
+ for (i=0; i<nbp; i++) {
+ if (addr[i] <
+ (caddr_t)VM_MAXUSER_ADDRESS) {
+ /*
+ * addr[i] is in user space
+ */
+ return nbp;
+ }
+ }
+
+ /*
+ * None of the breakpoints are in user space.
+ */
+ return 0;
+}
+
+#ifndef DEV_APIC
+#include <machine/apicvar.h>
+
+/*
+ * Provide stub functions so that the MADT APIC enumerator in the acpi
+ * kernel module will link against a kernel without 'device apic'.
+ *
+ * XXX - This is a gross hack.
+ */
+void
+apic_register_enumerator(struct apic_enumerator *enumerator)
+{
+}
+
+void *
+ioapic_create(uintptr_t addr, int32_t id, int intbase)
+{
+ return (NULL);
+}
+
+int
+ioapic_disable_pin(void *cookie, u_int pin)
+{
+ return (ENXIO);
+}
+
+int
+ioapic_get_vector(void *cookie, u_int pin)
+{
+ return (-1);
+}
+
+void
+ioapic_register(void *cookie)
+{
+}
+
+int
+ioapic_remap_vector(void *cookie, u_int pin, int vector)
+{
+ return (ENXIO);
+}
+
+int
+ioapic_set_extint(void *cookie, u_int pin)
+{
+ return (ENXIO);
+}
+
+int
+ioapic_set_nmi(void *cookie, u_int pin)
+{
+ return (ENXIO);
+}
+
+int
+ioapic_set_polarity(void *cookie, u_int pin,enum intr_polarity pol )
+{
+ return (ENXIO);
+}
+
+int
+ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger )
+{
+ return (ENXIO);
+}
+
+void
+lapic_create(u_int apic_id, int boot_cpu)
+{
+}
+
+void
+lapic_init(uintptr_t addr)
+{
+}
+
+int
+lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode)
+{
+ return (ENXIO);
+}
+
+int
+lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol)
+{
+ return (ENXIO);
+}
+
+int
+lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger)
+{
+ return (ENXIO);
+}
+#endif
+
+#ifdef KDB
+
+/*
+ * Provide inb() and outb() as functions. They are normally only
+ * available as macros calling inlined functions, thus cannot be
+ * called from the debugger.
+ *
+ * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
+ */
+
+#undef inb
+#undef outb
+
+/* silence compiler warnings */
+u_char inb(u_int);
+void outb(u_int, u_char);
+
+u_char
+inb(u_int port)
+{
+ u_char data;
+ /*
+ * We use %%dx and not %1 here because i/o is done at %dx and not at
+ * %edx, while gcc generates inferior code (movw instead of movl)
+ * if we tell it to load (u_short) port.
+ */
+ __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
+ return (data);
+}
+
+void
+outb(u_int port, u_char data)
+{
+ u_char al;
+ /*
+ * Use an unnecessary assignment to help gcc's register allocator.
+ * This make a large difference for gcc-1.40 and a tiny difference
+ * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for
+ * best results. gcc-2.6.0 can't handle this.
+ */
+ al = data;
+ __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
+}
+
+#endif /* KDB */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_clock.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_clock.c
new file mode 100644
index 0000000000..af07002ebb
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_clock.c
@@ -0,0 +1,150 @@
+/*-
+ * ----------------------------------------------------------------------------
+ * "THE BEER-WARE LICENSE" (Revision 42):
+ * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you
+ * can do whatever you want with this stuff. If we meet some day, and you think
+ * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
+ * ----------------------------------------------------------------------------
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/mp_clock.c,v 1.19 2004/05/30 20:34:57 phk Exp $");
+
+/*-
+ * Just when we thought life were beautiful, reality pops its grim face over
+ * the edge again:
+ *
+ * ] 20. ACPI Timer Errata
+ * ]
+ * ] Problem: The power management timer may return improper result when
+ * ] read. Although the timer value settles properly after incrementing,
+ * ] while incrementing there is a 3nS window every 69.8nS where the
+ * ] timer value is indeterminate (a 4.2% chance that the data will be
+ * ] incorrect when read). As a result, the ACPI free running count up
+ * ] timer specification is violated due to erroneous reads. Implication:
+ * ] System hangs due to the "inaccuracy" of the timer when used by
+ * ] software for time critical events and delays.
+ * ]
+ * ] Workaround: Read the register twice and compare.
+ * ] Status: This will not be fixed in the PIIX4 or PIIX4E.
+ *
+ * The counter is in other words not latched to the PCI bus clock when
+ * read. Notice the workaround isn't: We need to read until we have
+ * three monotonic samples and then use the middle one, otherwise we are
+ * not protected against the fact that the bits can be wrong in two
+ * directions. If we only cared about monosity two reads would be enough.
+ */
+
+/* #include "opt_bus.h" */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/timetc.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/sysctl.h>
+#include <sys/bus.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+static unsigned piix_get_timecount(struct timecounter *tc);
+
+static u_int32_t piix_timecounter_address;
+static u_int piix_freq = 14318182/4;
+
+static struct timecounter piix_timecounter = {
+ piix_get_timecount, /* get_timecount */
+ 0, /* no poll_pps */
+ 0xffffff, /* counter_mask */
+ 0, /* frequency */
+ "PIIX" /* name */
+};
+
+
+static int
+sysctl_machdep_piix_freq(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ u_int freq;
+
+ if (piix_timecounter.tc_frequency == 0)
+ return (EOPNOTSUPP);
+ freq = piix_freq;
+ error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
+ if (error == 0 && req->newptr != NULL) {
+ piix_freq = freq;
+ piix_timecounter.tc_frequency = piix_freq;
+ }
+ return (error);
+}
+
+SYSCTL_PROC(_machdep, OID_AUTO, piix_freq, CTLTYPE_INT | CTLFLAG_RW,
+ 0, sizeof(u_int), sysctl_machdep_piix_freq, "I", "");
+
+static unsigned
+piix_get_timecount(struct timecounter *tc)
+{
+ unsigned u1, u2, u3;
+
+ u2 = inl(piix_timecounter_address);
+ u3 = inl(piix_timecounter_address);
+ do {
+ u1 = u2;
+ u2 = u3;
+ u3 = inl(piix_timecounter_address);
+ } while (u1 > u2 || u2 > u3);
+ return (u2);
+}
+
+static int
+piix_probe(device_t dev)
+{
+ u_int32_t d;
+
+ if (devclass_get_device(devclass_find("acpi"), 0) != NULL)
+ return (ENXIO);
+ switch (pci_get_devid(dev)) {
+ case 0x71138086:
+ device_set_desc(dev, "PIIX Timecounter");
+ break;
+ default:
+ return (ENXIO);
+ }
+
+ d = pci_read_config(dev, PCIR_COMMAND, 2);
+ if (!(d & PCIM_CMD_PORTEN)) {
+ device_printf(dev, "PIIX I/O space not mapped\n");
+ return (ENXIO);
+ }
+ return (0);
+}
+
+static int
+piix_attach(device_t dev)
+{
+ u_int32_t d;
+
+ d = pci_read_config(dev, 0x40, 4);
+ piix_timecounter_address = (d & 0xffc0) + 8;
+ piix_timecounter.tc_frequency = piix_freq;
+ tc_init(&piix_timecounter);
+ return (0);
+}
+
+static device_method_t piix_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, piix_probe),
+ DEVMETHOD(device_attach, piix_attach),
+ { 0, 0 }
+};
+
+static driver_t piix_driver = {
+ "piix",
+ piix_methods,
+ 1,
+};
+
+static devclass_t piix_devclass;
+
+DRIVER_MODULE(piix, pci, piix_driver, piix_devclass, 0, 0);
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c
new file mode 100644
index 0000000000..b975c9e491
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c
@@ -0,0 +1,1315 @@
+/*-
+ * Copyright (c) 1996, by Steve Passe
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. The name of the developer may NOT be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.235.2.3 2004/09/24 15:02:33 rik Exp $");
+
+#include "opt_apic.h"
+#include "opt_cpu.h"
+#include "opt_kstack_pages.h"
+#include "opt_mp_watchdog.h"
+
+#if !defined(lint)
+#if !defined(SMP)
+#error How did you get here?
+#endif
+
+#if defined(I386_CPU) && !defined(COMPILING_LINT)
+#error SMP not supported with I386_CPU
+#endif
+#if 0
+#ifndef DEV_APIC
+#error The apic device is required for SMP, add "device apic" to your config file.
+#endif
+#endif
+#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
+#error SMP not supported with CPU_DISABLE_CMPXCHG
+#endif
+#endif /* not lint */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/cons.h> /* cngetc() */
+#ifdef GPROF
+#include <sys/gmon.h>
+#endif
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/memrange.h>
+#include <sys/mutex.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_extern.h>
+
+#include <machine/apicreg.h>
+#include <machine/clock.h>
+#include <machine/md_var.h>
+#include <machine/mp_watchdog.h>
+#include <machine/pcb.h>
+#include <machine/smp.h>
+#include <machine/smptests.h> /** COUNT_XINVLTLB_HITS */
+#include <machine/specialreg.h>
+#include <machine/privatespace.h>
+
+#include <machine/xenfunc.h>
+
+#define WARMBOOT_TARGET 0
+#define WARMBOOT_OFF (KERNBASE + 0x0467)
+#define WARMBOOT_SEG (KERNBASE + 0x0469)
+
+#define CMOS_REG (0x70)
+#define CMOS_DATA (0x71)
+#define BIOS_RESET (0x0f)
+#define BIOS_WARM (0x0a)
+
+/*
+ * this code MUST be enabled here and in mpboot.s.
+ * it follows the very early stages of AP boot by placing values in CMOS ram.
+ * it NORMALLY will never be needed and thus the primitive method for enabling.
+ *
+#define CHECK_POINTS
+ */
+
+#if defined(CHECK_POINTS) && !defined(PC98)
+#define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA))
+#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
+
+#define CHECK_INIT(D); \
+ CHECK_WRITE(0x34, (D)); \
+ CHECK_WRITE(0x35, (D)); \
+ CHECK_WRITE(0x36, (D)); \
+ CHECK_WRITE(0x37, (D)); \
+ CHECK_WRITE(0x38, (D)); \
+ CHECK_WRITE(0x39, (D));
+
+#define CHECK_PRINT(S); \
+ printf("%s: %d, %d, %d, %d, %d, %d\n", \
+ (S), \
+ CHECK_READ(0x34), \
+ CHECK_READ(0x35), \
+ CHECK_READ(0x36), \
+ CHECK_READ(0x37), \
+ CHECK_READ(0x38), \
+ CHECK_READ(0x39));
+
+#else /* CHECK_POINTS */
+
+#define CHECK_INIT(D)
+#define CHECK_PRINT(S)
+#define CHECK_WRITE(A, D)
+
+#endif /* CHECK_POINTS */
+
+/*
+ * Values to send to the POST hardware.
+ */
+#define MP_BOOTADDRESS_POST 0x10
+#define MP_PROBE_POST 0x11
+#define MPTABLE_PASS1_POST 0x12
+
+#define MP_START_POST 0x13
+#define MP_ENABLE_POST 0x14
+#define MPTABLE_PASS2_POST 0x15
+
+#define START_ALL_APS_POST 0x16
+#define INSTALL_AP_TRAMP_POST 0x17
+#define START_AP_POST 0x18
+
+#define MP_ANNOUNCE_POST 0x19
+
+/* lock region used by kernel profiling */
+int mcount_lock;
+
+/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
+int current_postcode;
+
+int mp_naps; /* # of Applications processors */
+int boot_cpu_id = -1; /* designated BSP */
+extern int nkpt;
+
+/*
+ * CPU topology map datastructures for HTT.
+ */
+static struct cpu_group mp_groups[MAXCPU];
+static struct cpu_top mp_top;
+
+/* AP uses this during bootstrap. Do not staticize. */
+char *bootSTK;
+static int bootAP;
+
+/* Hotwire a 0->4MB V==P mapping */
+extern pt_entry_t *KPTphys;
+
+/* SMP page table page */
+extern pt_entry_t *SMPpt;
+
+struct pcb stoppcbs[MAXCPU];
+
+/* Variables needed for SMP tlb shootdown. */
+vm_offset_t smp_tlb_addr1;
+vm_offset_t smp_tlb_addr2;
+volatile int smp_tlb_wait;
+
+/*
+ * Local data and functions.
+ */
+
+static u_int logical_cpus;
+
+/* used to hold the AP's until we are ready to release them */
+static struct mtx ap_boot_mtx;
+
+/* Set to 1 once we're ready to let the APs out of the pen. */
+static volatile int aps_ready = 0;
+
+/*
+ * Store data from cpu_add() until later in the boot when we actually setup
+ * the APs.
+ */
+struct cpu_info {
+ int cpu_present:1;
+ int cpu_bsp:1;
+} static cpu_info[MAXCPU];
+static int cpu_apic_ids[MAXCPU];
+
+static u_int boot_address;
+
+static void set_logical_apic_ids(void);
+static int start_all_aps(void);
+static void install_ap_tramp(void);
+static int start_ap(int apic_id);
+static void release_aps(void *dummy);
+
+static int hlt_logical_cpus;
+static struct sysctl_ctx_list logical_cpu_clist;
+
+static void
+mem_range_AP_init(void)
+{
+ if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
+ mem_range_softc.mr_op->initAP(&mem_range_softc);
+}
+
+void
+mp_topology(void)
+{
+ struct cpu_group *group;
+ int logical_cpus;
+ int apic_id;
+ int groups;
+ int cpu;
+
+ /* Build the smp_topology map. */
+ /* Nothing to do if there is no HTT support. */
+ if ((cpu_feature & CPUID_HTT) == 0)
+ return;
+ logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
+ if (logical_cpus <= 1)
+ return;
+ group = &mp_groups[0];
+ groups = 1;
+ for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
+ if (!cpu_info[apic_id].cpu_present)
+ continue;
+ /*
+ * If the current group has members and we're not a logical
+ * cpu, create a new group.
+ */
+ if (group->cg_count != 0 && (apic_id % logical_cpus) == 0) {
+ group++;
+ groups++;
+ }
+ group->cg_count++;
+ group->cg_mask |= 1 << cpu;
+ cpu++;
+ }
+
+ mp_top.ct_count = groups;
+ mp_top.ct_group = mp_groups;
+ smp_topology = &mp_top;
+}
+
+
+/*
+ * Calculate usable address in base memory for AP trampoline code.
+ */
+u_int
+mp_bootaddress(u_int basemem)
+{
+ POSTCODE(MP_BOOTADDRESS_POST);
+
+ boot_address = trunc_page(basemem); /* round down to 4k boundary */
+ if ((basemem - boot_address) < bootMP_size)
+ boot_address -= PAGE_SIZE; /* not enough, lower by 4k */
+
+ return boot_address;
+}
+
+void
+cpu_add(u_int apic_id, char boot_cpu)
+{
+
+ if (apic_id >= MAXCPU) {
+ printf("SMP: CPU %d exceeds maximum CPU %d, ignoring\n",
+ apic_id, MAXCPU - 1);
+ return;
+ }
+ KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
+ apic_id));
+ cpu_info[apic_id].cpu_present = 1;
+ if (boot_cpu) {
+ KASSERT(boot_cpu_id == -1,
+ ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
+ boot_cpu_id));
+ boot_cpu_id = apic_id;
+ cpu_info[apic_id].cpu_bsp = 1;
+ }
+ mp_ncpus++;
+ if (bootverbose)
+ printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
+ "AP");
+
+}
+
+void
+cpu_mp_setmaxid(void)
+{
+
+ mp_maxid = MAXCPU - 1;
+}
+
+int
+cpu_mp_probe(void)
+{
+
+ /*
+ * Always record BSP in CPU map so that the mbuf init code works
+ * correctly.
+ */
+ all_cpus = 1;
+ if (mp_ncpus == 0) {
+ /*
+ * No CPUs were found, so this must be a UP system. Setup
+ * the variables to represent a system with a single CPU
+ * with an id of 0.
+ */
+ mp_ncpus = 1;
+ return (0);
+ }
+
+ /* At least one CPU was found. */
+ if (mp_ncpus == 1) {
+ /*
+ * One CPU was found, so this must be a UP system with
+ * an I/O APIC.
+ */
+ return (0);
+ }
+
+ /* At least two CPUs were found. */
+ return (1);
+}
+
+/*
+ * Initialize the IPI handlers and start up the AP's.
+ */
+void
+cpu_mp_start(void)
+{
+ int i;
+
+ POSTCODE(MP_START_POST);
+
+ /* Initialize the logical ID to APIC ID table. */
+ for (i = 0; i < MAXCPU; i++)
+ cpu_apic_ids[i] = -1;
+
+ /* Install an inter-CPU IPI for TLB invalidation */
+ setidt(IPI_INVLTLB, IDTVEC(invltlb),
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IPI_INVLPG, IDTVEC(invlpg),
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IPI_INVLRNG, IDTVEC(invlrng),
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
+ /* Install an inter-CPU IPI for forwarding hardclock() */
+ setidt(IPI_HARDCLOCK, IDTVEC(hardclock),
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
+ /* Install an inter-CPU IPI for forwarding statclock() */
+ setidt(IPI_STATCLOCK, IDTVEC(statclock),
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
+ /* Install an inter-CPU IPI for lazy pmap release */
+ setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
+ /* Install an inter-CPU IPI for all-CPU rendezvous */
+ setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
+ /* Install an inter-CPU IPI for forcing an additional software trap */
+ setidt(IPI_AST, IDTVEC(cpuast),
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
+ /* Install an inter-CPU IPI for CPU stop/restart */
+ setidt(IPI_STOP, IDTVEC(cpustop),
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
+
+ /* Set boot_cpu_id if needed. */
+ if (boot_cpu_id == -1) {
+ boot_cpu_id = PCPU_GET(apic_id);
+ cpu_info[boot_cpu_id].cpu_bsp = 1;
+ } else
+ KASSERT(boot_cpu_id == PCPU_GET(apic_id),
+ ("BSP's APIC ID doesn't match boot_cpu_id"));
+ cpu_apic_ids[0] = boot_cpu_id;
+
+ /* Start each Application Processor */
+ start_all_aps();
+
+ /* Setup the initial logical CPUs info. */
+ logical_cpus = logical_cpus_mask = 0;
+ if (cpu_feature & CPUID_HTT)
+ logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
+
+ set_logical_apic_ids();
+}
+
+
+/*
+ * Print various information about the SMP system hardware and setup.
+ */
+void
+cpu_mp_announce(void)
+{
+ int i, x;
+
+ POSTCODE(MP_ANNOUNCE_POST);
+
+ /* List CPUs */
+ printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
+ for (i = 1, x = 0; x < MAXCPU; x++) {
+ if (cpu_info[x].cpu_present && !cpu_info[x].cpu_bsp) {
+ KASSERT(i < mp_ncpus,
+ ("mp_ncpus and actual cpus are out of whack"));
+ printf(" cpu%d (AP): APIC ID: %2d\n", i++, x);
+ }
+ }
+}
+
+/*
+ * AP CPU's call this to initialize themselves.
+ */
+void
+init_secondary(void)
+{
+ int gsel_tss;
+ int x, myid;
+#if 0
+ u_int cr0;
+#endif
+ /* bootAP is set in start_ap() to our ID. */
+ myid = bootAP;
+ gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
+ gdt_segs[GPROC0_SEL].ssd_base =
+ (int) &SMP_prvspace[myid].pcpu.pc_common_tss;
+ SMP_prvspace[myid].pcpu.pc_prvspace =
+ &SMP_prvspace[myid].pcpu;
+
+ for (x = 0; x < NGDT; x++) {
+ ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
+ }
+
+#if 0
+ r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
+ r_gdt.rd_base = (int) &gdt[myid * NGDT];
+ lgdt(&r_gdt); /* does magic intra-segment return */
+
+ lidt(&r_idt);
+ lldt(_default_ldt);
+#endif
+ PCPU_SET(currentldt, _default_ldt);
+
+ gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
+ gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
+ PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
+ PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
+ PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
+ PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
+ PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
+#if 0
+ ltr(gsel_tss);
+
+ /*
+ * Set to a known state:
+ * Set by mpboot.s: CR0_PG, CR0_PE
+ * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
+ */
+ cr0 = rcr0();
+ cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
+ load_cr0(cr0);
+#endif
+ CHECK_WRITE(0x38, 5);
+
+ /* Disable local APIC just to be sure. */
+ lapic_disable();
+
+ /* signal our startup to the BSP. */
+ mp_naps++;
+ CHECK_WRITE(0x39, 6);
+
+ /* Spin until the BSP releases the AP's. */
+ while (!aps_ready)
+ ia32_pause();
+
+ /* BSP may have changed PTD while we were waiting */
+ invltlb();
+ pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
+
+#if defined(I586_CPU) && !defined(NO_F00F_HACK)
+ lidt(&r_idt);
+#endif
+
+ /* set up CPU registers and state */
+ cpu_setregs();
+
+ /* set up FPU state on the AP */
+ npxinit(__INITIAL_NPXCW__);
+
+ /* set up SSE registers */
+ enable_sse();
+
+ /* A quick check from sanity claus */
+ if (PCPU_GET(apic_id) != lapic_id()) {
+ printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
+ printf("SMP: actual apic_id = %d\n", lapic_id());
+ printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
+ printf("PTD[MPPTDI] = %#jx\n", (uintmax_t)PTD[MPPTDI]);
+ panic("cpuid mismatch! boom!!");
+ }
+
+ mtx_lock_spin(&ap_boot_mtx);
+
+ /* Init local apic for irq's */
+ lapic_setup();
+
+ /* Set memory range attributes for this CPU to match the BSP */
+ mem_range_AP_init();
+
+ smp_cpus++;
+
+ CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
+ printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
+
+ /* Determine if we are a logical CPU. */
+ if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
+ logical_cpus_mask |= PCPU_GET(cpumask);
+
+ /* Build our map of 'other' CPUs. */
+ PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+
+ if (bootverbose)
+ lapic_dump("AP");
+
+ if (smp_cpus == mp_ncpus) {
+ /* enable IPI's, tlb shootdown, freezes etc */
+ atomic_store_rel_int(&smp_started, 1);
+ smp_active = 1; /* historic */
+ }
+
+ mtx_unlock_spin(&ap_boot_mtx);
+
+ /* wait until all the AP's are up */
+ while (smp_started == 0)
+ ia32_pause();
+
+ /* ok, now grab sched_lock and enter the scheduler */
+ mtx_lock_spin(&sched_lock);
+
+ binuptime(PCPU_PTR(switchtime));
+ PCPU_SET(switchticks, ticks);
+
+ cpu_throw(NULL, choosethread()); /* doesn't return */
+
+ panic("scheduler returned us to %s", __func__);
+ /* NOTREACHED */
+}
+
+/*******************************************************************
+ * local functions and data
+ */
+
+/*
+ * Set the APIC logical IDs.
+ *
+ * We want to cluster logical CPU's within the same APIC ID cluster.
+ * Since logical CPU's are aligned simply filling in the clusters in
+ * APIC ID order works fine. Note that this does not try to balance
+ * the number of CPU's in each cluster. (XXX?)
+ */
+static void
+set_logical_apic_ids(void)
+{
+ u_int apic_id, cluster, cluster_id;
+
+ /* Force us to allocate cluster 0 at the start. */
+ cluster = -1;
+ cluster_id = APIC_MAX_INTRACLUSTER_ID;
+ for (apic_id = 0; apic_id < MAXCPU; apic_id++) {
+ if (!cpu_info[apic_id].cpu_present)
+ continue;
+ if (cluster_id == APIC_MAX_INTRACLUSTER_ID) {
+ cluster = ioapic_next_logical_cluster();
+ cluster_id = 0;
+ } else
+ cluster_id++;
+ if (bootverbose)
+ printf("APIC ID: physical %u, logical %u:%u\n",
+ apic_id, cluster, cluster_id);
+ lapic_set_logical_id(apic_id, cluster, cluster_id);
+ }
+}
+
+/*
+ * start each AP in our list
+ */
+static int
+start_all_aps(void)
+{
+#ifndef PC98
+ u_char mpbiosreason;
+#endif
+ u_long mpbioswarmvec;
+ struct pcpu *pc;
+ char *stack;
+ uintptr_t kptbase;
+ int i, pg, apic_id, cpu;
+
+ POSTCODE(START_ALL_APS_POST);
+
+ mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
+
+ /* install the AP 1st level boot code */
+ install_ap_tramp();
+
+ /* save the current value of the warm-start vector */
+ mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
+#ifndef PC98
+ outb(CMOS_REG, BIOS_RESET);
+ mpbiosreason = inb(CMOS_DATA);
+#endif
+
+ /* set up temporary P==V mapping for AP boot */
+ /* XXX this is a hack, we should boot the AP on its own stack/PTD */
+ kptbase = (uintptr_t)(void *)KPTphys;
+ for (i = 0; i < NKPT; i++)
+ PTD[i] = (pd_entry_t)(PG_V | PG_RW |
+ ((kptbase + i * PAGE_SIZE) & PG_FRAME));
+ invltlb();
+
+ /* start each AP */
+ for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
+ if (!cpu_info[apic_id].cpu_present ||
+ cpu_info[apic_id].cpu_bsp)
+ continue;
+ cpu++;
+
+ /* save APIC ID for this logical ID */
+ cpu_apic_ids[cpu] = apic_id;
+
+ /* first page of AP's private space */
+ pg = cpu * i386_btop(sizeof(struct privatespace));
+
+ /* allocate a new private data page */
+ pc = (struct pcpu *)kmem_alloc(kernel_map, PAGE_SIZE);
+
+ /* wire it into the private page table page */
+ SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(pc));
+
+ /* allocate and set up an idle stack data page */
+ stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); /* XXXKSE */
+ for (i = 0; i < KSTACK_PAGES; i++)
+ SMPpt[pg + 1 + i] = (pt_entry_t)
+ (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
+
+ /* prime data page for it to use */
+ pcpu_init(pc, cpu, sizeof(struct pcpu));
+ pc->pc_apic_id = apic_id;
+
+ /* setup a vector to our boot code */
+ *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
+ *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
+#ifndef PC98
+ outb(CMOS_REG, BIOS_RESET);
+ outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
+#endif
+
+ bootSTK = &SMP_prvspace[cpu].idlekstack[KSTACK_PAGES *
+ PAGE_SIZE];
+ bootAP = cpu;
+
+ /* attempt to start the Application Processor */
+ CHECK_INIT(99); /* setup checkpoints */
+ if (!start_ap(apic_id)) {
+ printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
+ CHECK_PRINT("trace"); /* show checkpoints */
+ /* better panic as the AP may be running loose */
+ printf("panic y/n? [y] ");
+ if (cngetc() != 'n')
+ panic("bye-bye");
+ }
+ CHECK_PRINT("trace"); /* show checkpoints */
+
+ all_cpus |= (1 << cpu); /* record AP in CPU map */
+ }
+
+ /* build our map of 'other' CPUs */
+ PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+
+ /* restore the warmstart vector */
+ *(u_long *) WARMBOOT_OFF = mpbioswarmvec;
+#ifndef PC98
+ outb(CMOS_REG, BIOS_RESET);
+ outb(CMOS_DATA, mpbiosreason);
+#endif
+
+ /*
+ * Set up the idle context for the BSP. Similar to above except
+ * that some was done by locore, some by pmap.c and some is implicit
+ * because the BSP is cpu#0 and the page is initially zero and also
+ * because we can refer to variables by name on the BSP..
+ */
+
+ /* Allocate and setup BSP idle stack */
+ stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
+ for (i = 0; i < KSTACK_PAGES; i++)
+ SMPpt[1 + i] = (pt_entry_t)
+ (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
+
+ for (i = 0; i < NKPT; i++)
+ PTD[i] = 0;
+ pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
+
+ /* number of APs actually started */
+ return mp_naps;
+}
+
+/*
+ * load the 1st level AP boot code into base memory.
+ */
+
+/* targets for relocation */
+extern void bigJump(void);
+extern void bootCodeSeg(void);
+extern void bootDataSeg(void);
+extern void MPentry(void);
+extern u_int MP_GDT;
+extern u_int mp_gdtbase;
+
+static void
+install_ap_tramp(void)
+{
+ int x;
+ int size = *(int *) ((u_long) & bootMP_size);
+ vm_offset_t va = boot_address + KERNBASE;
+ u_char *src = (u_char *) ((u_long) bootMP);
+ u_char *dst = (u_char *) va;
+ u_int boot_base = (u_int) bootMP;
+ u_int8_t *dst8;
+ u_int16_t *dst16;
+ u_int32_t *dst32;
+
+ POSTCODE(INSTALL_AP_TRAMP_POST);
+
+ KASSERT (size <= PAGE_SIZE,
+ ("'size' do not fit into PAGE_SIZE, as expected."));
+ pmap_kenter(va, boot_address);
+ pmap_invalidate_page (kernel_pmap, va);
+ for (x = 0; x < size; ++x)
+ *dst++ = *src++;
+
+ /*
+ * modify addresses in code we just moved to basemem. unfortunately we
+ * need fairly detailed info about mpboot.s for this to work. changes
+ * to mpboot.s might require changes here.
+ */
+
+ /* boot code is located in KERNEL space */
+ dst = (u_char *) va;
+
+ /* modify the lgdt arg */
+ dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
+ *dst32 = boot_address + ((u_int) & MP_GDT - boot_base);
+
+ /* modify the ljmp target for MPentry() */
+ dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
+ *dst32 = ((u_int) MPentry - KERNBASE);
+
+ /* modify the target for boot code segment */
+ dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
+ dst8 = (u_int8_t *) (dst16 + 1);
+ *dst16 = (u_int) boot_address & 0xffff;
+ *dst8 = ((u_int) boot_address >> 16) & 0xff;
+
+ /* modify the target for boot data segment */
+ dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
+ dst8 = (u_int8_t *) (dst16 + 1);
+ *dst16 = (u_int) boot_address & 0xffff;
+ *dst8 = ((u_int) boot_address >> 16) & 0xff;
+}
+
+/*
+ * This function starts the AP (application processor) identified
+ * by the APIC ID 'physicalCpu'. It does quite a "song and dance"
+ * to accomplish this. This is necessary because of the nuances
+ * of the different hardware we might encounter. It isn't pretty,
+ * but it seems to work.
+ */
+static int
+start_ap(int apic_id)
+{
+ int vector, ms;
+ int cpus;
+
+ POSTCODE(START_AP_POST);
+
+ /* calculate the vector */
+ vector = (boot_address >> 12) & 0xff;
+
+ /* used as a watchpoint to signal AP startup */
+ cpus = mp_naps;
+
+ /*
+ * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
+ * and running the target CPU. OR this INIT IPI might be latched (P5
+ * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
+ * ignored.
+ */
+
+ /* do an INIT IPI: assert RESET */
+ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+ APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
+
+ /* wait for pending status end */
+ lapic_ipi_wait(-1);
+
+ /* do an INIT IPI: deassert RESET */
+ lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
+ APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
+
+ /* wait for pending status end */
+ DELAY(10000); /* wait ~10mS */
+ lapic_ipi_wait(-1);
+
+ /*
+ * next we do a STARTUP IPI: the previous INIT IPI might still be
+ * latched, (P5 bug) this 1st STARTUP would then terminate
+ * immediately, and the previously started INIT IPI would continue. OR
+ * the previous INIT IPI has already run. and this STARTUP IPI will
+ * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
+ * will run.
+ */
+
+ /* do a STARTUP IPI */
+ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+ APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+ vector, apic_id);
+ lapic_ipi_wait(-1);
+ DELAY(200); /* wait ~200uS */
+
+ /*
+ * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
+ * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
+ * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
+ * recognized after hardware RESET or INIT IPI.
+ */
+
+ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+ APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+ vector, apic_id);
+ lapic_ipi_wait(-1);
+ DELAY(200); /* wait ~200uS */
+
+ /* Wait up to 5 seconds for it to start. */
+ for (ms = 0; ms < 5000; ms++) {
+ if (mp_naps > cpus)
+ return 1; /* return SUCCESS */
+ DELAY(1000);
+ }
+ return 0; /* return FAILURE */
+}
+
+#ifdef COUNT_XINVLTLB_HITS
+u_int xhits_gbl[MAXCPU];
+u_int xhits_pg[MAXCPU];
+u_int xhits_rng[MAXCPU];
+SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
+ sizeof(xhits_gbl), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
+ sizeof(xhits_pg), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
+ sizeof(xhits_rng), "IU", "");
+
+u_int ipi_global;
+u_int ipi_page;
+u_int ipi_range;
+u_int ipi_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
+ 0, "");
+
+u_int ipi_masked_global;
+u_int ipi_masked_page;
+u_int ipi_masked_range;
+u_int ipi_masked_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
+ &ipi_masked_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
+ &ipi_masked_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
+ &ipi_masked_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
+ &ipi_masked_range_size, 0, "");
+#endif /* COUNT_XINVLTLB_HITS */
+
+/*
+ * Flush the TLB on all other CPU's
+ */
+static void
+smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+{
+ u_int ncpu;
+
+ ncpu = mp_ncpus - 1; /* does not shootdown self */
+ if (ncpu < 1)
+ return; /* no other cpus */
+ mtx_assert(&smp_rv_mtx, MA_OWNED);
+ smp_tlb_addr1 = addr1;
+ smp_tlb_addr2 = addr2;
+ atomic_store_rel_int(&smp_tlb_wait, 0);
+ ipi_all_but_self(vector);
+ while (smp_tlb_wait < ncpu)
+ ia32_pause();
+}
+
+/*
+ * This is about as magic as it gets. fortune(1) has got similar code
+ * for reversing bits in a word. Who thinks up this stuff??
+ *
+ * Yes, it does appear to be consistently faster than:
+ * while (i = ffs(m)) {
+ * m >>= i;
+ * bits++;
+ * }
+ * and
+ * while (lsb = (m & -m)) { // This is magic too
+ * m &= ~lsb; // or: m ^= lsb
+ * bits++;
+ * }
+ * Both of these latter forms do some very strange things on gcc-3.1 with
+ * -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
+ * There is probably an SSE or MMX popcnt instruction.
+ *
+ * I wonder if this should be in libkern?
+ *
+ * XXX Stop the presses! Another one:
+ * static __inline u_int32_t
+ * popcnt1(u_int32_t v)
+ * {
+ * v -= ((v >> 1) & 0x55555555);
+ * v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
+ * v = (v + (v >> 4)) & 0x0F0F0F0F;
+ * return (v * 0x01010101) >> 24;
+ * }
+ * The downside is that it has a multiply. With a pentium3 with
+ * -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
+ * an imull, and in that case it is faster. In most other cases
+ * it appears slightly slower.
+ *
+ * Another variant (also from fortune):
+ * #define BITCOUNT(x) (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255)
+ * #define BX_(x) ((x) - (((x)>>1)&0x77777777) \
+ * - (((x)>>2)&0x33333333) \
+ * - (((x)>>3)&0x11111111))
+ */
+static __inline u_int32_t
+popcnt(u_int32_t m)
+{
+
+ m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1);
+ m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2);
+ m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4);
+ m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8);
+ m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16);
+ return m;
+}
+
+static void
+smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+{
+ int ncpu, othercpus;
+
+ othercpus = mp_ncpus - 1;
+ if (mask == (u_int)-1) {
+ ncpu = othercpus;
+ if (ncpu < 1)
+ return;
+ } else {
+ mask &= ~PCPU_GET(cpumask);
+ if (mask == 0)
+ return;
+ ncpu = popcnt(mask);
+ if (ncpu > othercpus) {
+ /* XXX this should be a panic offence */
+ printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
+ ncpu, othercpus);
+ ncpu = othercpus;
+ }
+ /* XXX should be a panic, implied by mask == 0 above */
+ if (ncpu < 1)
+ return;
+ }
+ mtx_assert(&smp_rv_mtx, MA_OWNED);
+ smp_tlb_addr1 = addr1;
+ smp_tlb_addr2 = addr2;
+ atomic_store_rel_int(&smp_tlb_wait, 0);
+ if (mask == (u_int)-1)
+ ipi_all_but_self(vector);
+ else
+ ipi_selected(mask, vector);
+ while (smp_tlb_wait < ncpu)
+ ia32_pause();
+}
+
+void
+smp_invltlb(void)
+{
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_global++;
+#endif
+ }
+}
+
+void
+smp_invlpg(vm_offset_t addr)
+{
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLPG, addr, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_page++;
+#endif
+ }
+}
+
+void
+smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
+{
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_range++;
+ ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+ }
+}
+
+void
+smp_masked_invltlb(u_int mask)
+{
+ if (smp_started) {
+ smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_masked_global++;
+#endif
+ }
+}
+
+void
+smp_masked_invlpg(u_int mask, vm_offset_t addr)
+{
+ if (smp_started) {
+ smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_masked_page++;
+#endif
+ }
+}
+
+void
+smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
+{
+ if (smp_started) {
+ smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_masked_range++;
+ ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+ }
+}
+
+
+/*
+ * For statclock, we send an IPI to all CPU's to have them call this
+ * function.
+ */
+void
+forwarded_statclock(struct clockframe frame)
+{
+ struct thread *td;
+
+ CTR0(KTR_SMP, "forwarded_statclock");
+ td = curthread;
+ td->td_intr_nesting_level++;
+ if (profprocs != 0)
+ profclock(&frame);
+ if (pscnt == psdiv)
+ statclock(&frame);
+ td->td_intr_nesting_level--;
+}
+
+void
+forward_statclock(void)
+{
+ int map;
+
+ CTR0(KTR_SMP, "forward_statclock");
+
+ if (!smp_started || cold || panicstr)
+ return;
+
+ map = PCPU_GET(other_cpus) & ~(stopped_cpus|hlt_cpus_mask);
+ if (map != 0)
+ ipi_selected(map, IPI_STATCLOCK);
+}
+
+/*
+ * For each hardclock(), we send an IPI to all other CPU's to have them
+ * execute this function. It would be nice to reduce contention on
+ * sched_lock if we could simply peek at the CPU to determine the user/kernel
+ * state and call hardclock_process() on the CPU receiving the clock interrupt
+ * and then just use a simple IPI to handle any ast's if needed.
+ */
+void
+forwarded_hardclock(struct clockframe frame)
+{
+ struct thread *td;
+
+ CTR0(KTR_SMP, "forwarded_hardclock");
+ td = curthread;
+ td->td_intr_nesting_level++;
+ hardclock_process(&frame);
+ td->td_intr_nesting_level--;
+}
+
+void
+forward_hardclock(void)
+{
+ u_int map;
+
+ CTR0(KTR_SMP, "forward_hardclock");
+
+ if (!smp_started || cold || panicstr)
+ return;
+
+ map = PCPU_GET(other_cpus) & ~(stopped_cpus|hlt_cpus_mask);
+ if (map != 0)
+ ipi_selected(map, IPI_HARDCLOCK);
+}
+
+/*
+ * send an IPI to a set of cpus.
+ */
+void
+ipi_selected(u_int32_t cpus, u_int ipi)
+{
+ int cpu;
+
+ CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
+ while ((cpu = ffs(cpus)) != 0) {
+ cpu--;
+ KASSERT(cpu_apic_ids[cpu] != -1,
+ ("IPI to non-existent CPU %d", cpu));
+ lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
+ cpus &= ~(1 << cpu);
+ }
+}
+
+/*
+ * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
+ */
+void
+ipi_all(u_int ipi)
+{
+
+ CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
+ lapic_ipi_vectored(ipi, APIC_IPI_DEST_ALL);
+}
+
+/*
+ * send an IPI to all CPUs EXCEPT myself
+ */
+void
+ipi_all_but_self(u_int ipi)
+{
+
+ CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
+ lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
+}
+
+/*
+ * send an IPI to myself
+ */
+void
+ipi_self(u_int ipi)
+{
+
+ CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
+ lapic_ipi_vectored(ipi, APIC_IPI_DEST_SELF);
+}
+
+/*
+ * This is called once the rest of the system is up and running and we're
+ * ready to let the AP's out of the pen.
+ */
+static void
+release_aps(void *dummy __unused)
+{
+
+ if (mp_ncpus == 1)
+ return;
+ mtx_lock_spin(&sched_lock);
+ atomic_store_rel_int(&aps_ready, 1);
+ while (smp_started == 0)
+ ia32_pause();
+ mtx_unlock_spin(&sched_lock);
+}
+SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
+
+static int
+sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
+{
+ u_int mask;
+ int error;
+
+ mask = hlt_cpus_mask;
+ error = sysctl_handle_int(oidp, &mask, 0, req);
+ if (error || !req->newptr)
+ return (error);
+
+ if (logical_cpus_mask != 0 &&
+ (mask & logical_cpus_mask) == logical_cpus_mask)
+ hlt_logical_cpus = 1;
+ else
+ hlt_logical_cpus = 0;
+
+ if ((mask & all_cpus) == all_cpus)
+ mask &= ~(1<<0);
+ hlt_cpus_mask = mask;
+ return (error);
+}
+SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
+ 0, 0, sysctl_hlt_cpus, "IU",
+ "Bitmap of CPUs to halt. 101 (binary) will halt CPUs 0 and 2.");
+
+static int
+sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
+{
+ int disable, error;
+
+ disable = hlt_logical_cpus;
+ error = sysctl_handle_int(oidp, &disable, 0, req);
+ if (error || !req->newptr)
+ return (error);
+
+ if (disable)
+ hlt_cpus_mask |= logical_cpus_mask;
+ else
+ hlt_cpus_mask &= ~logical_cpus_mask;
+
+ if ((hlt_cpus_mask & all_cpus) == all_cpus)
+ hlt_cpus_mask &= ~(1<<0);
+
+ hlt_logical_cpus = disable;
+ return (error);
+}
+
+static void
+cpu_hlt_setup(void *dummy __unused)
+{
+
+ if (logical_cpus_mask != 0) {
+ TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
+ &hlt_logical_cpus);
+ sysctl_ctx_init(&logical_cpu_clist);
+ SYSCTL_ADD_PROC(&logical_cpu_clist,
+ SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
+ "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
+ sysctl_hlt_logical_cpus, "IU", "");
+ SYSCTL_ADD_UINT(&logical_cpu_clist,
+ SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
+ "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
+ &logical_cpus_mask, 0, "");
+
+ if (hlt_logical_cpus)
+ hlt_cpus_mask |= logical_cpus_mask;
+ }
+}
+SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
+
+int
+mp_grab_cpu_hlt(void)
+{
+ u_int mask = PCPU_GET(cpumask);
+#ifdef MP_WATCHDOG
+ u_int cpuid = PCPU_GET(cpuid);
+#endif
+ int retval;
+
+#ifdef MP_WATCHDOG
+ ap_watchdog(cpuid);
+#endif
+
+ retval = mask & hlt_cpus_mask;
+ while (mask & hlt_cpus_mask)
+ __asm __volatile("sti; hlt" : : : "memory");
+ return (retval);
+}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mptable.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mptable.c
new file mode 100644
index 0000000000..2f0aff0055
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mptable.c
@@ -0,0 +1,974 @@
+/*-
+ * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
+ * Copyright (c) 1996, by Steve Passe
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. The name of the developer may NOT be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/mptable.c,v 1.235.2.1 2004/09/28 16:24:09 jhb Exp $");
+
+#include "opt_mptable_force_htt.h"
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+
+#include <machine/apicreg.h>
+#include <machine/frame.h>
+#include <machine/intr_machdep.h>
+#include <machine/apicvar.h>
+#include <machine/md_var.h>
+#include <machine/mptable.h>
+#include <machine/specialreg.h>
+
+#include <dev/pci/pcivar.h>
+
+/* string defined by the Intel MP Spec as identifying the MP table */
+#define MP_SIG 0x5f504d5f /* _MP_ */
+
+#define NAPICID 32 /* Max number of APIC's */
+
+#ifdef PC98
+#define BIOS_BASE (0xe8000)
+#define BIOS_SIZE (0x18000)
+#else
+#define BIOS_BASE (0xf0000)
+#define BIOS_SIZE (0x10000)
+#endif
+#define BIOS_COUNT (BIOS_SIZE/4)
+
+typedef void mptable_entry_handler(u_char *entry, void *arg);
+
+static basetable_entry basetable_entry_types[] =
+{
+ {0, 20, "Processor"},
+ {1, 8, "Bus"},
+ {2, 8, "I/O APIC"},
+ {3, 8, "I/O INT"},
+ {4, 8, "Local INT"}
+};
+
+typedef struct BUSDATA {
+ u_char bus_id;
+ enum busTypes bus_type;
+} bus_datum;
+
+typedef struct INTDATA {
+ u_char int_type;
+ u_short int_flags;
+ u_char src_bus_id;
+ u_char src_bus_irq;
+ u_char dst_apic_id;
+ u_char dst_apic_int;
+ u_char int_vector;
+} io_int, local_int;
+
+typedef struct BUSTYPENAME {
+ u_char type;
+ char name[7];
+} bus_type_name;
+
+/* From MP spec v1.4, table 4-8. */
+static bus_type_name bus_type_table[] =
+{
+ {UNKNOWN_BUSTYPE, "CBUS "},
+ {UNKNOWN_BUSTYPE, "CBUSII"},
+ {EISA, "EISA "},
+ {UNKNOWN_BUSTYPE, "FUTURE"},
+ {UNKNOWN_BUSTYPE, "INTERN"},
+ {ISA, "ISA "},
+ {UNKNOWN_BUSTYPE, "MBI "},
+ {UNKNOWN_BUSTYPE, "MBII "},
+ {MCA, "MCA "},
+ {UNKNOWN_BUSTYPE, "MPI "},
+ {UNKNOWN_BUSTYPE, "MPSA "},
+ {UNKNOWN_BUSTYPE, "NUBUS "},
+ {PCI, "PCI "},
+ {UNKNOWN_BUSTYPE, "PCMCIA"},
+ {UNKNOWN_BUSTYPE, "TC "},
+ {UNKNOWN_BUSTYPE, "VL "},
+ {UNKNOWN_BUSTYPE, "VME "},
+ {UNKNOWN_BUSTYPE, "XPRESS"}
+};
+
+/* From MP spec v1.4, table 5-1. */
+static int default_data[7][5] =
+{
+/* nbus, id0, type0, id1, type1 */
+ {1, 0, ISA, 255, NOBUS},
+ {1, 0, EISA, 255, NOBUS},
+ {1, 0, EISA, 255, NOBUS},
+ {1, 0, MCA, 255, NOBUS},
+ {2, 0, ISA, 1, PCI},
+ {2, 0, EISA, 1, PCI},
+ {2, 0, MCA, 1, PCI}
+};
+
+struct pci_probe_table_args {
+ u_char bus;
+ u_char found;
+};
+
+struct pci_route_interrupt_args {
+ u_char bus; /* Source bus. */
+ u_char irq; /* Source slot:pin. */
+ int vector; /* Return value. */
+};
+
+static mpfps_t mpfps;
+static mpcth_t mpct;
+static void *ioapics[NAPICID];
+static bus_datum *busses;
+static int mptable_nioapics, mptable_nbusses, mptable_maxbusid;
+static int pci0 = -1;
+
+MALLOC_DEFINE(M_MPTABLE, "MP Table", "MP Table Items");
+
+static enum intr_polarity conforming_polarity(u_char src_bus,
+ u_char src_bus_irq);
+static enum intr_trigger conforming_trigger(u_char src_bus, u_char src_bus_irq);
+static enum intr_polarity intentry_polarity(int_entry_ptr intr);
+static enum intr_trigger intentry_trigger(int_entry_ptr intr);
+static int lookup_bus_type(char *name);
+static void mptable_count_items(void);
+static void mptable_count_items_handler(u_char *entry, void *arg);
+#ifdef MPTABLE_FORCE_HTT
+static void mptable_hyperthread_fixup(u_int id_mask);
+#endif
+static void mptable_parse_apics_and_busses(void);
+static void mptable_parse_apics_and_busses_handler(u_char *entry,
+ void *arg);
+static void mptable_parse_ints(void);
+static void mptable_parse_ints_handler(u_char *entry, void *arg);
+static void mptable_parse_io_int(int_entry_ptr intr);
+static void mptable_parse_local_int(int_entry_ptr intr);
+static void mptable_pci_probe_table_handler(u_char *entry, void *arg);
+static void mptable_pci_route_interrupt_handler(u_char *entry, void *arg);
+static void mptable_pci_setup(void);
+static int mptable_probe(void);
+static int mptable_probe_cpus(void);
+static void mptable_probe_cpus_handler(u_char *entry, void *arg __unused);
+static void mptable_register(void *dummy);
+static int mptable_setup_local(void);
+static int mptable_setup_io(void);
+static void mptable_walk_table(mptable_entry_handler *handler, void *arg);
+static int search_for_sig(u_int32_t target, int count);
+
+static struct apic_enumerator mptable_enumerator = {
+ "MPTable",
+ mptable_probe,
+ mptable_probe_cpus,
+ mptable_setup_local,
+ mptable_setup_io
+};
+
+/*
+ * look for the MP spec signature
+ */
+
+static int
+search_for_sig(u_int32_t target, int count)
+{
+ int x;
+ u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
+
+ for (x = 0; x < count; x += 4)
+ if (addr[x] == MP_SIG)
+ /* make array index a byte index */
+ return (target + (x * sizeof(u_int32_t)));
+ return (-1);
+}
+
+static int
+lookup_bus_type(char *name)
+{
+ int x;
+
+ for (x = 0; x < MAX_BUSTYPE; ++x)
+ if (strncmp(bus_type_table[x].name, name, 6) == 0)
+ return (bus_type_table[x].type);
+
+ return (UNKNOWN_BUSTYPE);
+}
+
+/*
+ * Look for an Intel MP spec table (ie, SMP capable hardware).
+ */
+static int
+mptable_probe(void)
+{
+ int x;
+ u_long segment;
+ u_int32_t target;
+
+ /* see if EBDA exists */
+ if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
+ /* search first 1K of EBDA */
+ target = (u_int32_t) (segment << 4);
+ if ((x = search_for_sig(target, 1024 / 4)) >= 0)
+ goto found;
+ } else {
+ /* last 1K of base memory, effective 'top of base' passed in */
+ target = (u_int32_t) ((basemem * 1024) - 0x400);
+ if ((x = search_for_sig(target, 1024 / 4)) >= 0)
+ goto found;
+ }
+
+ /* search the BIOS */
+ target = (u_int32_t) BIOS_BASE;
+ if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
+ goto found;
+
+ /* nothing found */
+ return (ENXIO);
+
+found:
+ mpfps = (mpfps_t)(KERNBASE + x);
+
+ /* Map in the configuration table if it exists. */
+ if (mpfps->config_type != 0)
+ mpct = NULL;
+ else {
+ if ((uintptr_t)mpfps->pap >= 1024 * 1024) {
+ printf("%s: Unable to map MP Configuration Table\n",
+ __func__);
+ return (ENXIO);
+ }
+ mpct = (mpcth_t)(KERNBASE + (uintptr_t)mpfps->pap);
+ if (mpct->base_table_length + (uintptr_t)mpfps->pap >=
+ 1024 * 1024) {
+ printf("%s: Unable to map end of MP Config Table\n",
+ __func__);
+ return (ENXIO);
+ }
+ if (mpct->signature[0] != 'P' || mpct->signature[1] != 'C' ||
+ mpct->signature[2] != 'M' || mpct->signature[3] != 'P') {
+ printf("%s: MP Config Table has bad signature: %c%c%c%c\n",
+ __func__, mpct->signature[0], mpct->signature[1],
+ mpct->signature[2], mpct->signature[3]);
+ return (ENXIO);
+ }
+ if (bootverbose)
+ printf(
+ "MP Configuration Table version 1.%d found at %p\n",
+ mpct->spec_rev, mpct);
+ }
+
+ return (-100);
+}
+
+/*
+ * Run through the MP table enumerating CPUs.
+ */
+static int
+mptable_probe_cpus(void)
+{
+ u_int cpu_mask;
+
+ /* Is this a pre-defined config? */
+ if (mpfps->config_type != 0) {
+ lapic_create(0, 1);
+ lapic_create(1, 0);
+ } else {
+ cpu_mask = 0;
+ mptable_walk_table(mptable_probe_cpus_handler, &cpu_mask);
+#ifdef MPTABLE_FORCE_HTT
+ mptable_hyperthread_fixup(cpu_mask);
+#endif
+ }
+ return (0);
+}
+
+/*
+ * Initialize the local APIC on the BSP.
+ */
+static int
+mptable_setup_local(void)
+{
+
+ /* Is this a pre-defined config? */
+ printf("MPTable: <");
+ if (mpfps->config_type != 0) {
+ lapic_init(DEFAULT_APIC_BASE);
+ printf("Preset Config %d", mpfps->config_type);
+ } else {
+ lapic_init((uintptr_t)mpct->apic_address);
+ printf("%.*s %.*s", (int)sizeof(mpct->oem_id), mpct->oem_id,
+ (int)sizeof(mpct->product_id), mpct->product_id);
+ }
+ printf(">\n");
+ return (0);
+}
+
+/*
+ * Run through the MP table enumerating I/O APICs.
+ */
+static int
+mptable_setup_io(void)
+{
+ int i;
+ u_char byte;
+
+ /* First, we count individual items and allocate arrays. */
+ mptable_count_items();
+ busses = malloc((mptable_maxbusid + 1) * sizeof(bus_datum), M_MPTABLE,
+ M_WAITOK);
+ for (i = 0; i <= mptable_maxbusid; i++)
+ busses[i].bus_type = NOBUS;
+
+ /* Second, we run through adding I/O APIC's and busses. */
+ ioapic_enable_mixed_mode();
+ mptable_parse_apics_and_busses();
+
+ /* Third, we run through the table tweaking interrupt sources. */
+ mptable_parse_ints();
+
+ /* Fourth, we register all the I/O APIC's. */
+ for (i = 0; i < NAPICID; i++)
+ if (ioapics[i] != NULL)
+ ioapic_register(ioapics[i]);
+
+ /* Fifth, we setup data structures to handle PCI interrupt routing. */
+ mptable_pci_setup();
+
+ /* Finally, we throw the switch to enable the I/O APIC's. */
+ if (mpfps->mpfb2 & MPFB2_IMCR_PRESENT) {
+ outb(0x22, 0x70); /* select IMCR */
+ byte = inb(0x23); /* current contents */
+ byte |= 0x01; /* mask external INTR */
+ outb(0x23, byte); /* disconnect 8259s/NMI */
+ }
+
+ return (0);
+}
+
+static void
+mptable_register(void *dummy __unused)
+{
+
+ apic_register_enumerator(&mptable_enumerator);
+}
+SYSINIT(mptable_register, SI_SUB_CPU - 1, SI_ORDER_FIRST, mptable_register,
+ NULL)
+
+/*
+ * Call the handler routine for each entry in the MP config table.
+ */
+static void
+mptable_walk_table(mptable_entry_handler *handler, void *arg)
+{
+ u_int i;
+ u_char *entry;
+
+ entry = (u_char *)(mpct + 1);
+ for (i = 0; i < mpct->entry_count; i++) {
+ switch (*entry) {
+ case MPCT_ENTRY_PROCESSOR:
+ case MPCT_ENTRY_IOAPIC:
+ case MPCT_ENTRY_BUS:
+ case MPCT_ENTRY_INT:
+ case MPCT_ENTRY_LOCAL_INT:
+ break;
+ default:
+ panic("%s: Unknown MP Config Entry %d\n", __func__,
+ (int)*entry);
+ }
+ handler(entry, arg);
+ entry += basetable_entry_types[*entry].length;
+ }
+}
+
+static void
+mptable_probe_cpus_handler(u_char *entry, void *arg)
+{
+ proc_entry_ptr proc;
+ u_int *cpu_mask;
+
+ switch (*entry) {
+ case MPCT_ENTRY_PROCESSOR:
+ proc = (proc_entry_ptr)entry;
+ if (proc->cpu_flags & PROCENTRY_FLAG_EN) {
+ lapic_create(proc->apic_id, proc->cpu_flags &
+ PROCENTRY_FLAG_BP);
+ cpu_mask = (u_int *)arg;
+ *cpu_mask |= (1 << proc->apic_id);
+ }
+ break;
+ }
+}
+
+static void
+mptable_count_items_handler(u_char *entry, void *arg __unused)
+{
+ io_apic_entry_ptr apic;
+ bus_entry_ptr bus;
+
+ switch (*entry) {
+ case MPCT_ENTRY_BUS:
+ bus = (bus_entry_ptr)entry;
+ mptable_nbusses++;
+ if (bus->bus_id > mptable_maxbusid)
+ mptable_maxbusid = bus->bus_id;
+ break;
+ case MPCT_ENTRY_IOAPIC:
+ apic = (io_apic_entry_ptr)entry;
+ if (apic->apic_flags & IOAPICENTRY_FLAG_EN)
+ mptable_nioapics++;
+ break;
+ }
+}
+
+/*
+ * Count items in the table.
+ */
+static void
+mptable_count_items(void)
+{
+
+ /* Is this a pre-defined config? */
+ if (mpfps->config_type != 0) {
+ mptable_nioapics = 1;
+ switch (mpfps->config_type) {
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ mptable_nbusses = 1;
+ break;
+ case 5:
+ case 6:
+ case 7:
+ mptable_nbusses = 2;
+ break;
+ default:
+ panic("Unknown pre-defined MP Table config type %d",
+ mpfps->config_type);
+ }
+ mptable_maxbusid = mptable_nbusses - 1;
+ } else
+ mptable_walk_table(mptable_count_items_handler, NULL);
+}
+
+/*
+ * Add a bus or I/O APIC from an entry in the table.
+ */
+static void
+mptable_parse_apics_and_busses_handler(u_char *entry, void *arg __unused)
+{
+ io_apic_entry_ptr apic;
+ bus_entry_ptr bus;
+ enum busTypes bus_type;
+ int i;
+
+
+ switch (*entry) {
+ case MPCT_ENTRY_BUS:
+ bus = (bus_entry_ptr)entry;
+ bus_type = lookup_bus_type(bus->bus_type);
+ if (bus_type == UNKNOWN_BUSTYPE) {
+ printf("MPTable: Unknown bus %d type \"", bus->bus_id);
+ for (i = 0; i < 6; i++)
+ printf("%c", bus->bus_type[i]);
+ printf("\"\n");
+ }
+ busses[bus->bus_id].bus_id = bus->bus_id;
+ busses[bus->bus_id].bus_type = bus_type;
+ break;
+ case MPCT_ENTRY_IOAPIC:
+ apic = (io_apic_entry_ptr)entry;
+ if (!(apic->apic_flags & IOAPICENTRY_FLAG_EN))
+ break;
+ if (apic->apic_id >= NAPICID)
+ panic("%s: I/O APIC ID %d too high", __func__,
+ apic->apic_id);
+ if (ioapics[apic->apic_id] != NULL)
+ panic("%s: Double APIC ID %d", __func__,
+ apic->apic_id);
+ ioapics[apic->apic_id] = ioapic_create(
+ (uintptr_t)apic->apic_address, apic->apic_id, -1);
+ break;
+ default:
+ break;
+ }
+}
+
+/*
+ * Enumerate I/O APIC's and busses.
+ */
+static void
+mptable_parse_apics_and_busses(void)
+{
+
+ /* Is this a pre-defined config? */
+ if (mpfps->config_type != 0) {
+ ioapics[0] = ioapic_create(DEFAULT_IO_APIC_BASE, 2, 0);
+ busses[0].bus_id = 0;
+ busses[0].bus_type = default_data[mpfps->config_type][2];
+ if (mptable_nbusses > 1) {
+ busses[1].bus_id = 1;
+ busses[1].bus_type =
+ default_data[mpfps->config_type][4];
+ }
+ } else
+ mptable_walk_table(mptable_parse_apics_and_busses_handler,
+ NULL);
+}
+
+/*
+ * Determine conforming polarity for a given bus type.
+ */
+static enum intr_polarity
+conforming_polarity(u_char src_bus, u_char src_bus_irq)
+{
+
+ KASSERT(src_bus <= mptable_maxbusid, ("bus id %d too large", src_bus));
+ switch (busses[src_bus].bus_type) {
+ case ISA:
+ case EISA:
+ return (INTR_POLARITY_HIGH);
+ case PCI:
+ return (INTR_POLARITY_LOW);
+ default:
+ panic("%s: unknown bus type %d", __func__,
+ busses[src_bus].bus_type);
+ }
+}
+
+/*
+ * Determine conforming trigger for a given bus type.
+ */
+static enum intr_trigger
+conforming_trigger(u_char src_bus, u_char src_bus_irq)
+{
+
+ KASSERT(src_bus <= mptable_maxbusid, ("bus id %d too large", src_bus));
+ switch (busses[src_bus].bus_type) {
+ case ISA:
+ return (INTR_TRIGGER_EDGE);
+ case PCI:
+ return (INTR_TRIGGER_LEVEL);
+#if !defined(PC98) && !defined(XEN)
+ case EISA:
+ KASSERT(src_bus_irq < 16, ("Invalid EISA IRQ %d", src_bus_irq));
+ return (elcr_read_trigger(src_bus_irq));
+#endif
+ default:
+ panic("%s: unknown bus type %d", __func__,
+ busses[src_bus].bus_type);
+ }
+}
+
+static enum intr_polarity
+intentry_polarity(int_entry_ptr intr)
+{
+
+ switch (intr->int_flags & INTENTRY_FLAGS_POLARITY) {
+ case INTENTRY_FLAGS_POLARITY_CONFORM:
+ return (conforming_polarity(intr->src_bus_id,
+ intr->src_bus_irq));
+ case INTENTRY_FLAGS_POLARITY_ACTIVEHI:
+ return (INTR_POLARITY_HIGH);
+ case INTENTRY_FLAGS_POLARITY_ACTIVELO:
+ return (INTR_POLARITY_LOW);
+ default:
+ panic("Bogus interrupt flags");
+ }
+}
+
+static enum intr_trigger
+intentry_trigger(int_entry_ptr intr)
+{
+
+ switch (intr->int_flags & INTENTRY_FLAGS_TRIGGER) {
+ case INTENTRY_FLAGS_TRIGGER_CONFORM:
+ return (conforming_trigger(intr->src_bus_id,
+ intr->src_bus_irq));
+ case INTENTRY_FLAGS_TRIGGER_EDGE:
+ return (INTR_TRIGGER_EDGE);
+ case INTENTRY_FLAGS_TRIGGER_LEVEL:
+ return (INTR_TRIGGER_LEVEL);
+ default:
+ panic("Bogus interrupt flags");
+ }
+}
+
+/*
+ * Parse an interrupt entry for an I/O interrupt routed to a pin on an I/O APIC.
+ */
+static void
+mptable_parse_io_int(int_entry_ptr intr)
+{
+ void *ioapic;
+ u_int pin;
+
+ if (intr->dst_apic_id == 0xff) {
+ printf("MPTable: Ignoring global interrupt entry for pin %d\n",
+ intr->dst_apic_int);
+ return;
+ }
+ if (intr->dst_apic_id >= NAPICID) {
+ printf("MPTable: Ignoring interrupt entry for ioapic%d\n",
+ intr->dst_apic_id);
+ return;
+ }
+ ioapic = ioapics[intr->dst_apic_id];
+ if (ioapic == NULL) {
+ printf(
+ "MPTable: Ignoring interrupt entry for missing ioapic%d\n",
+ intr->dst_apic_id);
+ return;
+ }
+ pin = intr->dst_apic_int;
+ switch (intr->int_type) {
+ case INTENTRY_TYPE_INT:
+ switch (busses[intr->src_bus_id].bus_type) {
+ case NOBUS:
+ panic("interrupt from missing bus");
+ case ISA:
+ case EISA:
+ if (busses[intr->src_bus_id].bus_type == ISA)
+ ioapic_set_bus(ioapic, pin, APIC_BUS_ISA);
+ else
+ ioapic_set_bus(ioapic, pin, APIC_BUS_EISA);
+ if (intr->src_bus_irq == pin)
+ break;
+ ioapic_remap_vector(ioapic, pin, intr->src_bus_irq);
+ if (ioapic_get_vector(ioapic, intr->src_bus_irq) ==
+ intr->src_bus_irq)
+ ioapic_disable_pin(ioapic, intr->src_bus_irq);
+ break;
+ case PCI:
+ ioapic_set_bus(ioapic, pin, APIC_BUS_PCI);
+ break;
+ default:
+ ioapic_set_bus(ioapic, pin, APIC_BUS_UNKNOWN);
+ break;
+ }
+ break;
+ case INTENTRY_TYPE_NMI:
+ ioapic_set_nmi(ioapic, pin);
+ break;
+ case INTENTRY_TYPE_SMI:
+ ioapic_set_smi(ioapic, pin);
+ break;
+ case INTENTRY_TYPE_EXTINT:
+ ioapic_set_extint(ioapic, pin);
+ break;
+ default:
+ panic("%s: invalid interrupt entry type %d\n", __func__,
+ intr->int_type);
+ }
+ if (intr->int_type == INTENTRY_TYPE_INT ||
+ (intr->int_flags & INTENTRY_FLAGS_TRIGGER) !=
+ INTENTRY_FLAGS_TRIGGER_CONFORM)
+ ioapic_set_triggermode(ioapic, pin, intentry_trigger(intr));
+ if (intr->int_type == INTENTRY_TYPE_INT ||
+ (intr->int_flags & INTENTRY_FLAGS_POLARITY) !=
+ INTENTRY_FLAGS_POLARITY_CONFORM)
+ ioapic_set_polarity(ioapic, pin, intentry_polarity(intr));
+}
+
+/*
+ * Parse an interrupt entry for a local APIC LVT pin.
+ */
+static void
+mptable_parse_local_int(int_entry_ptr intr)
+{
+ u_int apic_id, pin;
+
+ if (intr->dst_apic_id == 0xff)
+ apic_id = APIC_ID_ALL;
+ else
+ apic_id = intr->dst_apic_id;
+ if (intr->dst_apic_int == 0)
+ pin = LVT_LINT0;
+ else
+ pin = LVT_LINT1;
+ switch (intr->int_type) {
+ case INTENTRY_TYPE_INT:
+#if 1
+ printf(
+ "MPTable: Ignoring vectored local interrupt for LINTIN%d vector %d\n",
+ intr->dst_apic_int, intr->src_bus_irq);
+ return;
+#else
+ lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_FIXED);
+ break;
+#endif
+ case INTENTRY_TYPE_NMI:
+ lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_NMI);
+ break;
+ case INTENTRY_TYPE_SMI:
+ lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_SMI);
+ break;
+ case INTENTRY_TYPE_EXTINT:
+ lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_EXTINT);
+ break;
+ default:
+ panic("%s: invalid interrupt entry type %d\n", __func__,
+ intr->int_type);
+ }
+ if ((intr->int_flags & INTENTRY_FLAGS_TRIGGER) !=
+ INTENTRY_FLAGS_TRIGGER_CONFORM)
+ lapic_set_lvt_triggermode(apic_id, pin,
+ intentry_trigger(intr));
+ if ((intr->int_flags & INTENTRY_FLAGS_POLARITY) !=
+ INTENTRY_FLAGS_POLARITY_CONFORM)
+ lapic_set_lvt_polarity(apic_id, pin, intentry_polarity(intr));
+}
+
+/*
+ * Parse interrupt entries.
+ */
+static void
+mptable_parse_ints_handler(u_char *entry, void *arg __unused)
+{
+ int_entry_ptr intr;
+
+ intr = (int_entry_ptr)entry;
+ switch (*entry) {
+ case MPCT_ENTRY_INT:
+ mptable_parse_io_int(intr);
+ break;
+ case MPCT_ENTRY_LOCAL_INT:
+ mptable_parse_local_int(intr);
+ break;
+ }
+}
+
+/*
+ * Configure the interrupt pins
+ */
+static void
+mptable_parse_ints(void)
+{
+
+ /* Is this a pre-defined config? */
+ if (mpfps->config_type != 0) {
+ /* Configure LINT pins. */
+ lapic_set_lvt_mode(APIC_ID_ALL, LVT_LINT0, APIC_LVT_DM_EXTINT);
+ lapic_set_lvt_mode(APIC_ID_ALL, LVT_LINT1, APIC_LVT_DM_NMI);
+
+ /* Configure I/O APIC pins. */
+ if (mpfps->config_type != 7)
+ ioapic_set_extint(ioapics[0], 0);
+ else
+ ioapic_disable_pin(ioapics[0], 0);
+ if (mpfps->config_type != 2)
+ ioapic_remap_vector(ioapics[0], 2, 0);
+ else
+ ioapic_disable_pin(ioapics[0], 2);
+ if (mpfps->config_type == 2)
+ ioapic_disable_pin(ioapics[0], 13);
+ } else
+ mptable_walk_table(mptable_parse_ints_handler, NULL);
+}
+
+#ifdef MPTABLE_FORCE_HTT
+/*
+ * Perform a hyperthreading "fix-up" to enumerate any logical CPU's
+ * that aren't already listed in the table.
+ *
+ * XXX: We assume that all of the physical CPUs in the
+ * system have the same number of logical CPUs.
+ *
+ * XXX: We assume that APIC ID's are allocated such that
+ * the APIC ID's for a physical processor are aligned
+ * with the number of logical CPU's in the processor.
+ */
+static void
+mptable_hyperthread_fixup(u_int id_mask)
+{
+ u_int i, id, logical_cpus;
+
+ /* Nothing to do if there is no HTT support. */
+ if ((cpu_feature & CPUID_HTT) == 0)
+ return;
+ logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
+ if (logical_cpus <= 1)
+ return;
+
+ /*
+ * For each APIC ID of a CPU that is set in the mask,
+ * scan the other candidate APIC ID's for this
+ * physical processor. If any of those ID's are
+ * already in the table, then kill the fixup.
+ */
+ for (id = 0; id < NAPICID; id++) {
+ if ((id_mask & 1 << id) == 0)
+ continue;
+ /* First, make sure we are on a logical_cpus boundary. */
+ if (id % logical_cpus != 0)
+ return;
+ for (i = id + 1; i < id + logical_cpus; i++)
+ if ((id_mask & 1 << i) != 0)
+ return;
+ }
+
+ /*
+ * Ok, the ID's checked out, so perform the fixup by
+ * adding the logical CPUs.
+ */
+ while ((id = ffs(id_mask)) != 0) {
+ id--;
+ for (i = id + 1; i < id + logical_cpus; i++) {
+ if (bootverbose)
+ printf(
+ "MPTable: Adding logical CPU %d from main CPU %d\n",
+ i, id);
+ lapic_create(i, 0);
+ }
+ id_mask &= ~(1 << id);
+ }
+}
+#endif /* MPTABLE_FORCE_HTT */
+
+/*
+ * Support code for routing PCI interrupts using the MP Table.
+ */
+static void
+mptable_pci_setup(void)
+{
+ int i;
+
+ /*
+ * Find the first pci bus and call it 0. Panic if pci0 is not
+ * bus zero and there are multiple PCI busses.
+ */
+ for (i = 0; i <= mptable_maxbusid; i++)
+ if (busses[i].bus_type == PCI) {
+ if (pci0 == -1)
+ pci0 = i;
+ else if (pci0 != 0)
+ panic(
+ "MPTable contains multiple PCI busses but no PCI bus 0");
+ }
+}
+
+static void
+mptable_pci_probe_table_handler(u_char *entry, void *arg)
+{
+ struct pci_probe_table_args *args;
+ int_entry_ptr intr;
+
+ if (*entry != MPCT_ENTRY_INT)
+ return;
+ intr = (int_entry_ptr)entry;
+ args = (struct pci_probe_table_args *)arg;
+ KASSERT(args->bus <= mptable_maxbusid,
+ ("bus %d is too big", args->bus));
+ KASSERT(busses[args->bus].bus_type == PCI, ("probing for non-PCI bus"));
+ if (intr->src_bus_id == args->bus)
+ args->found = 1;
+}
+
+int
+mptable_pci_probe_table(int bus)
+{
+ struct pci_probe_table_args args;
+
+ if (bus < 0)
+ return (EINVAL);
+ if (pci0 == -1 || pci0 + bus > mptable_maxbusid)
+ return (ENXIO);
+ if (busses[pci0 + bus].bus_type != PCI)
+ return (ENXIO);
+ args.bus = pci0 + bus;
+ args.found = 0;
+ mptable_walk_table(mptable_pci_probe_table_handler, &args);
+ if (args.found == 0)
+ return (ENXIO);
+ return (0);
+}
+
+static void
+mptable_pci_route_interrupt_handler(u_char *entry, void *arg)
+{
+ struct pci_route_interrupt_args *args;
+ int_entry_ptr intr;
+ int vector;
+
+ if (*entry != MPCT_ENTRY_INT)
+ return;
+ intr = (int_entry_ptr)entry;
+ args = (struct pci_route_interrupt_args *)arg;
+ if (intr->src_bus_id != args->bus || intr->src_bus_irq != args->irq)
+ return;
+
+ /* Make sure the APIC maps to a known APIC. */
+ KASSERT(ioapics[intr->dst_apic_id] != NULL,
+ ("No I/O APIC %d to route interrupt to", intr->dst_apic_id));
+
+ /*
+ * Look up the vector for this APIC / pin combination. If we
+ * have previously matched an entry for this PCI IRQ but it
+ * has the same vector as this entry, just return. Otherwise,
+ * we use the vector for this APIC / pin combination.
+ */
+ vector = ioapic_get_vector(ioapics[intr->dst_apic_id],
+ intr->dst_apic_int);
+ if (args->vector == vector)
+ return;
+ KASSERT(args->vector == -1,
+ ("Multiple IRQs for PCI interrupt %d.%d.INT%c: %d and %d\n",
+ args->bus, args->irq >> 2, 'A' + (args->irq & 0x3), args->vector,
+ vector));
+ args->vector = vector;
+}
+
+int
+mptable_pci_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+ struct pci_route_interrupt_args args;
+ int slot;
+
+ /* Like ACPI, pin numbers are 0-3, not 1-4. */
+ pin--;
+ KASSERT(pci0 != -1, ("do not know how to route PCI interrupts"));
+ args.bus = pci_get_bus(dev) + pci0;
+ slot = pci_get_slot(dev);
+
+ /*
+ * PCI interrupt entries in the MP Table encode both the slot and
+ * pin into the IRQ with the pin being the two least significant
+ * bits, the slot being the next five bits, and the most significant
+ * bit being reserved.
+ */
+ args.irq = slot << 2 | pin;
+ args.vector = -1;
+ mptable_walk_table(mptable_pci_route_interrupt_handler, &args);
+ if (args.vector < 0) {
+ device_printf(pcib, "unable to route slot %d INT%c\n", slot,
+ 'A' + pin);
+ return (PCI_INVALID_IRQ);
+ }
+ if (bootverbose)
+ device_printf(pcib, "slot %d INT%c routed to irq %d\n", slot,
+ 'A' + pin, args.vector);
+ return (args.vector);
+}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c
new file mode 100644
index 0000000000..ee61e80ed9
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c
@@ -0,0 +1,3381 @@
+/*-
+ * Copyright (c) 1991 Regents of the University of California.
+ * All rights reserved.
+ * Copyright (c) 1994 John S. Dyson
+ * All rights reserved.
+ * Copyright (c) 1994 David Greenman
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department and William Jolitz of UUNET Technologies Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
+ */
+/*-
+ * Copyright (c) 2003 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Jake Burkholder,
+ * Safeport Network Services, and Network Associates Laboratories, the
+ * Security Research Division of Network Associates, Inc. under
+ * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
+ * CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/pmap.c,v 1.494.2.6 2004/10/10 19:08:00 alc Exp $");
+
+/*
+ * Manages physical address maps.
+ * XEN NOTES: page table entries (pt_entry_t) and
+ * page directory entries (pd_entry_t) contain machine
+ * addresses and not physical addresses. Use PT_GET() before
+ * dereferencing these structures to convert them into a
+ * physical address. Use the PT_SET_VA operations to commit
+ * page changes back to XEN. PT_SET_VA_MA should be used with
+ * great care!
+ *
+ *
+ * In addition to hardware address maps, this
+ * module is called upon to provide software-use-only
+ * maps which may or may not be stored in the same
+ * form as hardware maps. These pseudo-maps are
+ * used to store intermediate results from copy
+ * operations to and from address spaces.
+ *
+ * Since the information managed by this module is
+ * also stored by the logical address mapping module,
+ * this module may throw away valid virtual-to-physical
+ * mappings at almost any time. However, invalidations
+ * of virtual-to-physical mappings must be done as
+ * requested.
+ *
+ * In order to cope with hardware architectures which
+ * make virtual-to-physical map invalidates expensive,
+ * this module may delay invalidate or reduced protection
+ * operations until such time as they are actually
+ * necessary. This module is given full information as
+ * to which processors are currently using which maps,
+ * and to when physical maps must be made correct.
+ */
+
+#include "opt_cpu.h"
+#include "opt_pmap.h"
+#include "opt_msgbuf.h"
+#include "opt_kstack_pages.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
+#include <sys/msgbuf.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sx.h>
+#include <sys/user.h>
+#include <sys/vmmeter.h>
+#include <sys/sched.h>
+#include <sys/sysctl.h>
+#ifdef SMP
+#include <sys/smp.h>
+#endif
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_pager.h>
+#include <vm/uma.h>
+
+#include <machine/cpu.h>
+#include <machine/cputypes.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+#ifdef SMP
+#include <machine/smp.h>
+#endif
+
+#include <machine/xenfunc.h>
+
+#if !defined(CPU_ENABLE_SSE) && defined(I686_CPU)
+#define CPU_ENABLE_SSE
+#endif
+#if defined(CPU_DISABLE_SSE)
+#undef CPU_ENABLE_SSE
+#endif
+
+#ifndef PMAP_SHPGPERPROC
+#define PMAP_SHPGPERPROC 200
+#endif
+
+#if defined(DIAGNOSTIC)
+#define PMAP_DIAGNOSTIC
+#endif
+
+#define MINPV 2048
+
+#if !defined(PMAP_DIAGNOSTIC)
+#define PMAP_INLINE __inline
+#else
+#define PMAP_INLINE
+#endif
+
+/*
+ * Get PDEs and PTEs for user/kernel address space
+ */
+#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
+#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
+
+#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0)
+#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0)
+#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0)
+#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0)
+#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0)
+
+#if 0
+#define pmap_pte_set_w(pte, v) ((v) ? atomic_set_int((u_int *)(pte), PG_W) : \
+ atomic_clear_int((u_int *)(pte), PG_W))
+#else
+#define pmap_pte_set_w(pte, v) { \
+ if (v) \
+ PT_SET_VA_MA(pte, *pte | PG_W, TRUE); \
+ else \
+ PT_SET_VA_MA(pte, *pte & ~PG_W, TRUE); \
+}
+#endif
+
+struct pmap kernel_pmap_store;
+LIST_HEAD(pmaplist, pmap);
+static struct pmaplist allpmaps;
+static struct mtx allpmaps_lock;
+
+vm_paddr_t avail_end; /* PA of last available physical page */
+vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
+vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
+static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */
+int pgeflag = 0; /* PG_G or-in */
+int pseflag = 0; /* PG_PS or-in */
+
+static int nkpt;
+vm_offset_t kernel_vm_end;
+extern u_int32_t KERNend;
+
+#ifdef PAE
+static uma_zone_t pdptzone;
+#endif
+
+/*
+ * Data for the pv entry allocation mechanism
+ */
+static uma_zone_t pvzone;
+static struct vm_object pvzone_obj;
+static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
+int pmap_pagedaemon_waken;
+
+/*
+ * All those kernel PT submaps that BSD is so fond of
+ */
+pt_entry_t *CMAP1 = 0;
+static pt_entry_t *CMAP2, *CMAP3;
+caddr_t CADDR1 = 0, ptvmmap = 0;
+static caddr_t CADDR2, CADDR3;
+static struct mtx CMAPCADDR12_lock;
+struct msgbuf *msgbufp = 0;
+
+/*
+ * Crashdump maps.
+ */
+static caddr_t crashdumpmap;
+
+#ifdef SMP
+extern pt_entry_t *SMPpt;
+#endif
+static pt_entry_t *PMAP1 = 0, *PMAP2;
+static pt_entry_t *PADDR1 = 0, *PADDR2;
+#ifdef SMP
+static int PMAP1cpu;
+static int PMAP1changedcpu;
+SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD,
+ &PMAP1changedcpu, 0,
+ "Number of times pmap_pte_quick changed CPU with same PMAP1");
+#endif
+static int PMAP1changed;
+SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD,
+ &PMAP1changed, 0,
+ "Number of times pmap_pte_quick changed PMAP1");
+static int PMAP1unchanged;
+SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD,
+ &PMAP1unchanged, 0,
+ "Number of times pmap_pte_quick didn't change PMAP1");
+static struct mtx PMAP2mutex;
+
+static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
+static pv_entry_t get_pv_entry(void);
+static void pmap_clear_ptes(vm_page_t m, int bit);
+
+static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva);
+static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
+static int pmap_remove_entry(struct pmap *pmap, vm_page_t m,
+ vm_offset_t va);
+static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
+
+static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
+
+static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
+static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m);
+static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
+static void pmap_pte_release(pt_entry_t *pte);
+static int pmap_unuse_pt(pmap_t, vm_offset_t);
+static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
+#ifdef PAE
+static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
+#endif
+
+CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
+CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
+
+#ifndef DEBUG
+#define DEBUG
+#endif
+#ifdef PMAP_DEBUG
+static void pmap_dec_ref(unsigned long ma);
+static void pmap_mark_privileged(unsigned long pa);
+static void pmap_mark_unprivileged(unsigned long pa);
+static void pmap_dec_ref_page(vm_page_t m);
+int pmap_pid_dump(int pid);
+#endif
+/*
+ * Move the kernel virtual free pointer to the next
+ * 4MB. This is used to help improve performance
+ * by using a large (4MB) page for much of the kernel
+ * (.text, .data, .bss)
+ */
+static vm_offset_t
+pmap_kmem_choose(vm_offset_t addr)
+{
+ vm_offset_t newaddr = addr;
+
+#ifndef DISABLE_PSE
+ if (cpu_feature & CPUID_PSE)
+ newaddr = (addr + PDRMASK) & ~PDRMASK;
+#endif
+ return newaddr;
+}
+
+/*
+ * Bootstrap the system enough to run with virtual memory.
+ *
+ * On the i386 this is called after mapping has already been enabled
+ * and just syncs the pmap module with what has already been done.
+ * [We can't call it easily with mapping off since the kernel is not
+ * mapped with PA == VA, hence we would have to relocate every address
+ * from the linked base (virtual) address "KERNBASE" to the actual
+ * (physical) address starting relative to 0]
+ */
+void
+pmap_bootstrap(firstaddr, loadaddr)
+ vm_paddr_t firstaddr;
+ vm_paddr_t loadaddr;
+{
+ vm_offset_t va;
+ pt_entry_t *pte, *unused;
+ int i;
+
+ /*
+ * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
+ * large. It should instead be correctly calculated in locore.s and
+ * not based on 'first' (which is a physical address, not a virtual
+ * address, for the start of unused physical memory). The kernel
+ * page tables are NOT double mapped and thus should not be included
+ * in this calculation.
+ */
+ virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
+ virtual_avail = pmap_kmem_choose(virtual_avail);
+
+ virtual_end = VM_MAX_KERNEL_ADDRESS;
+
+ /*
+ * Initialize the kernel pmap (which is statically allocated).
+ */
+ PMAP_LOCK_INIT(kernel_pmap);
+ kernel_pmap->pm_pdir = (pd_entry_t *) xen_start_info->pt_base;
+#ifdef PAE
+ kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
+#endif
+ kernel_pmap->pm_active = -1; /* don't allow deactivation */
+ TAILQ_INIT(&kernel_pmap->pm_pvlist);
+ LIST_INIT(&allpmaps);
+ mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
+ mtx_lock_spin(&allpmaps_lock);
+ LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
+ mtx_unlock_spin(&allpmaps_lock);
+ nkpt = NKPT;
+
+ /*
+ * Reserve some special page table entries/VA space for temporary
+ * mapping of pages.
+ */
+#define SYSMAP(c, p, v, n) \
+ v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
+
+ va = virtual_avail;
+ pte = vtopte(va);
+
+ /*
+ * CMAP1/CMAP2 are used for zeroing and copying pages.
+ * CMAP3 is used for the idle process page zeroing.
+ */
+ SYSMAP(caddr_t, CMAP1, CADDR1, 1)
+ SYSMAP(caddr_t, CMAP2, CADDR2, 1)
+ SYSMAP(caddr_t, CMAP3, CADDR3, 1)
+ PT_CLEAR_VA(CMAP3, TRUE);
+
+ mtx_init(&CMAPCADDR12_lock, "CMAPCADDR12", NULL, MTX_DEF);
+
+ /*
+ * Crashdump maps.
+ */
+ SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS)
+
+ /*
+ * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
+ */
+ SYSMAP(caddr_t, unused, ptvmmap, 1)
+
+ /*
+ * msgbufp is used to map the system message buffer.
+ */
+ SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE)))
+
+ /*
+ * ptemap is used for pmap_pte_quick
+ */
+ SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1);
+ SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1);
+
+ mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF);
+
+ virtual_avail = va;
+ PT_CLEAR_VA(CMAP1, FALSE);
+ PT_CLEAR_VA(CMAP2, FALSE);
+
+ for (i = 0; i < NKPT; i++)
+ PT_CLEAR_VA(&PTD[i], FALSE);
+ PT_UPDATES_FLUSH();
+#ifdef XEN_UNNEEDED
+ /* Turn on PG_G on kernel page(s) */
+ pmap_set_pg();
+#endif
+}
+
+/*
+ * Set PG_G on kernel pages. Only the BSP calls this when SMP is turned on.
+ */
+void
+pmap_set_pg(void)
+{
+ pd_entry_t pdir;
+ pt_entry_t *pte;
+ vm_offset_t va, endva;
+ int i;
+
+ if (pgeflag == 0)
+ return;
+ panic("this won't work");
+ i = KERNLOAD/NBPDR;
+ endva = KERNBASE + KERNend;
+
+ if (pseflag) {
+ va = KERNBASE + KERNLOAD;
+ while (va < endva) {
+ pdir = kernel_pmap->pm_pdir[KPTDI+i];
+ pdir |= pgeflag;
+ kernel_pmap->pm_pdir[KPTDI+i] = PTD[KPTDI+i] = pdir;
+ invltlb(); /* Play it safe, invltlb() every time */
+ i++;
+ va += NBPDR;
+ }
+ } else {
+ va = (vm_offset_t)btext;
+ while (va < endva) {
+ pte = vtopte(va);
+ if (*pte)
+ *pte |= pgeflag;
+ invltlb(); /* Play it safe, invltlb() every time */
+ va += PAGE_SIZE;
+ }
+ }
+}
+
+#ifdef PAE
+
+static MALLOC_DEFINE(M_PMAPPDPT, "pmap", "pmap pdpt");
+
+static void *
+pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+{
+ *flags = UMA_SLAB_PRIV;
+ return (contigmalloc(PAGE_SIZE, M_PMAPPDPT, 0, 0x0ULL, 0xffffffffULL,
+ 1, 0));
+}
+#endif
+
+/*
+ * Initialize the pmap module.
+ * Called by vm_init, to initialize any structures that the pmap
+ * system needs to map virtual memory.
+ * pmap_init has been enhanced to support in a fairly consistant
+ * way, discontiguous physical memory.
+ */
+void
+pmap_init(void)
+{
+ int i;
+
+ /*
+ * Allocate memory for random pmap data structures. Includes the
+ * pv_head_table.
+ */
+
+ for(i = 0; i < vm_page_array_size; i++) {
+ vm_page_t m;
+
+ m = &vm_page_array[i];
+ TAILQ_INIT(&m->md.pv_list);
+ m->md.pv_list_count = 0;
+ }
+
+ /*
+ * init the pv free list
+ */
+ pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL,
+ NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
+ uma_prealloc(pvzone, MINPV);
+
+#ifdef PAE
+ pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL,
+ NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1,
+ UMA_ZONE_VM | UMA_ZONE_NOFREE);
+ uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf);
+#endif
+
+ /*
+ * Now it is safe to enable pv_table recording.
+ */
+ pmap_initialized = TRUE;
+}
+
+/*
+ * Initialize the address space (zone) for the pv_entries. Set a
+ * high water mark so that the system can recover from excessive
+ * numbers of pv entries.
+ */
+void
+pmap_init2()
+{
+ int shpgperproc = PMAP_SHPGPERPROC;
+
+ TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
+ pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
+ TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
+ pv_entry_high_water = 9 * (pv_entry_max / 10);
+ uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
+}
+
+
+/***************************************************
+ * Low level helper routines.....
+ ***************************************************/
+
+#if defined(PMAP_DIAGNOSTIC)
+
+/*
+ * This code checks for non-writeable/modified pages.
+ * This should be an invalid condition.
+ */
+static int
+pmap_nw_modified(pt_entry_t ptea)
+{
+ int pte;
+
+ pte = (int) ptea;
+
+ if ((pte & (PG_M|PG_RW)) == PG_M)
+ return 1;
+ else
+ return 0;
+}
+#endif
+
+
+/*
+ * this routine defines the region(s) of memory that should
+ * not be tested for the modified bit.
+ */
+static PMAP_INLINE int
+pmap_track_modified(vm_offset_t va)
+{
+ if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
+ return 1;
+ else
+ return 0;
+}
+
+#ifdef I386_CPU
+/*
+ * i386 only has "invalidate everything" and no SMP to worry about.
+ */
+PMAP_INLINE void
+pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
+{
+
+ if (pmap == kernel_pmap || pmap->pm_active)
+ invltlb();
+}
+
+PMAP_INLINE void
+pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+
+ if (pmap == kernel_pmap || pmap->pm_active)
+ invltlb();
+}
+
+PMAP_INLINE void
+pmap_invalidate_all(pmap_t pmap)
+{
+
+ if (pmap == kernel_pmap || pmap->pm_active)
+ invltlb();
+}
+#else /* !I386_CPU */
+#ifdef SMP
+/*
+ * For SMP, these functions have to use the IPI mechanism for coherence.
+ */
+void
+pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
+{
+ u_int cpumask;
+ u_int other_cpus;
+
+ if (smp_started) {
+ if (!(read_eflags() & PSL_I))
+ panic("%s: interrupts disabled", __func__);
+ mtx_lock_spin(&smp_rv_mtx);
+ } else
+ critical_enter();
+ /*
+ * We need to disable interrupt preemption but MUST NOT have
+ * interrupts disabled here.
+ * XXX we may need to hold schedlock to get a coherent pm_active
+ * XXX critical sections disable interrupts again
+ */
+ if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+ invlpg(va);
+ smp_invlpg(va);
+ } else {
+ cpumask = PCPU_GET(cpumask);
+ other_cpus = PCPU_GET(other_cpus);
+ if (pmap->pm_active & cpumask)
+ invlpg(va);
+ if (pmap->pm_active & other_cpus)
+ smp_masked_invlpg(pmap->pm_active & other_cpus, va);
+ }
+ if (smp_started)
+ mtx_unlock_spin(&smp_rv_mtx);
+ else
+ critical_exit();
+}
+
+void
+pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+ u_int cpumask;
+ u_int other_cpus;
+ vm_offset_t addr;
+
+ if (smp_started) {
+ if (!(read_eflags() & PSL_I))
+ panic("%s: interrupts disabled", __func__);
+ mtx_lock_spin(&smp_rv_mtx);
+ } else
+ critical_enter();
+ /*
+ * We need to disable interrupt preemption but MUST NOT have
+ * interrupts disabled here.
+ * XXX we may need to hold schedlock to get a coherent pm_active
+ * XXX critical sections disable interrupts again
+ */
+ if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+ for (addr = sva; addr < eva; addr += PAGE_SIZE)
+ invlpg(addr);
+ smp_invlpg_range(sva, eva);
+ } else {
+ cpumask = PCPU_GET(cpumask);
+ other_cpus = PCPU_GET(other_cpus);
+ if (pmap->pm_active & cpumask)
+ for (addr = sva; addr < eva; addr += PAGE_SIZE)
+ invlpg(addr);
+ if (pmap->pm_active & other_cpus)
+ smp_masked_invlpg_range(pmap->pm_active & other_cpus,
+ sva, eva);
+ }
+ if (smp_started)
+ mtx_unlock_spin(&smp_rv_mtx);
+ else
+ critical_exit();
+}
+
+void
+pmap_invalidate_all(pmap_t pmap)
+{
+ u_int cpumask;
+ u_int other_cpus;
+
+ if (smp_started) {
+ if (!(read_eflags() & PSL_I))
+ panic("%s: interrupts disabled", __func__);
+ mtx_lock_spin(&smp_rv_mtx);
+ } else
+ critical_enter();
+ /*
+ * We need to disable interrupt preemption but MUST NOT have
+ * interrupts disabled here.
+ * XXX we may need to hold schedlock to get a coherent pm_active
+ * XXX critical sections disable interrupts again
+ */
+ if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+ invltlb();
+ smp_invltlb();
+ } else {
+ cpumask = PCPU_GET(cpumask);
+ other_cpus = PCPU_GET(other_cpus);
+ if (pmap->pm_active & cpumask)
+ invltlb();
+ if (pmap->pm_active & other_cpus)
+ smp_masked_invltlb(pmap->pm_active & other_cpus);
+ }
+ if (smp_started)
+ mtx_unlock_spin(&smp_rv_mtx);
+ else
+ critical_exit();
+}
+#else /* !SMP */
+/*
+ * Normal, non-SMP, 486+ invalidation functions.
+ * We inline these within pmap.c for speed.
+ */
+PMAP_INLINE void
+pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
+{
+
+ if (pmap == kernel_pmap || pmap->pm_active)
+ invlpg(va);
+ PT_UPDATES_FLUSH();
+
+}
+
+PMAP_INLINE void
+pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+ vm_offset_t addr;
+
+ if (pmap == kernel_pmap || pmap->pm_active)
+ for (addr = sva; addr < eva; addr += PAGE_SIZE)
+ invlpg(addr);
+ PT_UPDATES_FLUSH();
+
+}
+
+PMAP_INLINE void
+pmap_invalidate_all(pmap_t pmap)
+{
+
+ if (pmap == kernel_pmap || pmap->pm_active)
+ invltlb();
+}
+#endif /* !SMP */
+#endif /* !I386_CPU */
+
+/*
+ * Are we current address space or kernel? N.B. We return FALSE when
+ * a pmap's page table is in use because a kernel thread is borrowing
+ * it. The borrowed page table can change spontaneously, making any
+ * dependence on its continued use subject to a race condition.
+ */
+static __inline int
+pmap_is_current(pmap_t pmap)
+{
+
+ return (pmap == kernel_pmap ||
+ (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
+ (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
+}
+
+/*
+ * If the given pmap is not the current or kernel pmap, the returned pte must
+ * be released by passing it to pmap_pte_release().
+ */
+pt_entry_t *
+pmap_pte(pmap_t pmap, vm_offset_t va)
+{
+ pd_entry_t tmppf, newpf;
+ pd_entry_t *pde;
+
+ pde = pmap_pde(pmap, va);
+ if (*pde & PG_PS)
+ return (pde);
+ if (*pde != 0) {
+ /* are we current address space or kernel? */
+ if (pmap_is_current(pmap))
+ return (vtopte(va));
+ mtx_lock(&PMAP2mutex);
+ newpf = PT_GET(pde) & PG_FRAME;
+ tmppf = PT_GET(PMAP2) & PG_FRAME;
+ if (tmppf != newpf) {
+ PT_SET_VA(PMAP2, newpf | PG_V | PG_A, FALSE);
+ pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2);
+ }
+ return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
+ }
+ return (0);
+}
+
+/*
+ * Releases a pte that was obtained from pmap_pte(). Be prepared for the pte
+ * being NULL.
+ */
+static __inline void
+pmap_pte_release(pt_entry_t *pte)
+{
+
+ if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2)
+ mtx_unlock(&PMAP2mutex);
+}
+
+static __inline void
+invlcaddr(void *caddr)
+{
+#ifdef I386_CPU
+ invltlb();
+#else
+ invlpg((u_int)caddr);
+#endif
+ PT_UPDATES_FLUSH();
+}
+
+/*
+ * Super fast pmap_pte routine best used when scanning
+ * the pv lists. This eliminates many coarse-grained
+ * invltlb calls. Note that many of the pv list
+ * scans are across different pmaps. It is very wasteful
+ * to do an entire invltlb for checking a single mapping.
+ *
+ * If the given pmap is not the current pmap, vm_page_queue_mtx
+ * must be held and curthread pinned to a CPU.
+ */
+static pt_entry_t *
+pmap_pte_quick(pmap_t pmap, vm_offset_t va)
+{
+ pd_entry_t tmppf, newpf;
+ pd_entry_t *pde;
+
+ pde = pmap_pde(pmap, va);
+ if (*pde & PG_PS)
+ return (pde);
+ if (*pde != 0) {
+ /* are we current address space or kernel? */
+ if (pmap_is_current(pmap))
+ return (vtopte(va));
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
+ newpf = PT_GET(pde) & PG_FRAME;
+ tmppf = PT_GET(PMAP1) & PG_FRAME;
+ if (tmppf != newpf) {
+ PT_SET_VA(PMAP1, newpf | PG_V | PG_A, TRUE);
+#ifdef SMP
+ PMAP1cpu = PCPU_GET(cpuid);
+#endif
+ invlcaddr(PADDR1);
+ PMAP1changed++;
+ } else
+#ifdef SMP
+ if (PMAP1cpu != PCPU_GET(cpuid)) {
+ PMAP1cpu = PCPU_GET(cpuid);
+ invlcaddr(PADDR1);
+ PMAP1changedcpu++;
+ } else
+#endif
+ PMAP1unchanged++;
+ return (PADDR1 + (i386_btop(va) & (NPTEPG - 1)));
+ }
+ return (0);
+}
+
+/*
+ * Routine: pmap_extract
+ * Function:
+ * Extract the physical page address associated
+ * with the given map/virtual_address pair.
+ */
+vm_paddr_t
+pmap_extract(pmap_t pmap, vm_offset_t va)
+{
+ vm_paddr_t rtval;
+ pt_entry_t *pte;
+ pd_entry_t pde;
+
+ rtval = 0;
+ PMAP_LOCK(pmap);
+ pde = PT_GET(&pmap->pm_pdir[va >> PDRSHIFT]);
+ if (pde != 0) {
+ if ((pde & PG_PS) != 0) {
+ rtval = (pde & ~PDRMASK) | (va & PDRMASK);
+ PMAP_UNLOCK(pmap);
+ return rtval;
+ }
+ pte = pmap_pte(pmap, va);
+ rtval = (PT_GET(pte) & PG_FRAME) | (va & PAGE_MASK);
+ pmap_pte_release(pte);
+ }
+ PMAP_UNLOCK(pmap);
+ return (rtval);
+}
+
+/*
+ * Routine: pmap_extract_and_hold
+ * Function:
+ * Atomically extract and hold the physical page
+ * with the given pmap and virtual address pair
+ * if that mapping permits the given protection.
+ */
+vm_page_t
+pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
+{
+ pd_entry_t pde;
+ pt_entry_t pte;
+ vm_page_t m;
+
+ m = NULL;
+ vm_page_lock_queues();
+ PMAP_LOCK(pmap);
+ pde = PT_GET(pmap_pde(pmap, va));
+ if (pde != 0) {
+ if (pde & PG_PS) {
+ panic("4MB pages not currently supported");
+ if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
+ m = PHYS_TO_VM_PAGE((pde & ~PDRMASK) |
+ (va & PDRMASK));
+ vm_page_hold(m);
+ }
+ } else {
+ sched_pin();
+ pte = PT_GET(pmap_pte_quick(pmap, va));
+ if (pte != 0 &&
+ ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
+ m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
+ vm_page_hold(m);
+ }
+ sched_unpin();
+ }
+ }
+ vm_page_unlock_queues();
+ PMAP_UNLOCK(pmap);
+ return (m);
+}
+
+/***************************************************
+ * Low level mapping routines.....
+ ***************************************************/
+
+/*
+ * Add a wired page to the kva.
+ * Note: not SMP coherent.
+ */
+PMAP_INLINE void
+pmap_kenter(vm_offset_t va, vm_paddr_t pa)
+{
+ PT_SET(va, pa | PG_RW | PG_V | pgeflag, TRUE);
+}
+
+/*
+ * Remove a page from the kernel pagetables.
+ * Note: not SMP coherent.
+ */
+PMAP_INLINE void
+pmap_kremove(vm_offset_t va)
+{
+ PT_CLEAR(va, TRUE);
+}
+
+/*
+ * Used to map a range of physical addresses into kernel
+ * virtual address space.
+ *
+ * The value passed in '*virt' is a suggested virtual address for
+ * the mapping. Architectures which can support a direct-mapped
+ * physical to virtual region can return the appropriate address
+ * within that region, leaving '*virt' unchanged. Other
+ * architectures should map the pages starting at '*virt' and
+ * update '*virt' with the first usable address after the mapped
+ * region.
+ */
+vm_offset_t
+pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
+{
+ vm_offset_t va, sva;
+ pt_entry_t *pte;
+
+ va = sva = *virt;
+ while (start < end) {
+ pte = vtopte(va);
+ PT_SET_VA(pte, start | PG_RW | PG_V | pgeflag, FALSE);
+ va += PAGE_SIZE;
+ start += PAGE_SIZE;
+ }
+ /* invalidate will flush the update queue */
+ pmap_invalidate_range(kernel_pmap, sva, va);
+ *virt = va;
+ return (sva);
+}
+
+
+/*
+ * Add a list of wired pages to the kva
+ * this routine is only used for temporary
+ * kernel mappings that do not need to have
+ * page modification or references recorded.
+ * Note that old mappings are simply written
+ * over. The page *must* be wired.
+ * Note: SMP coherent. Uses a ranged shootdown IPI.
+ */
+void
+pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
+{
+ vm_offset_t va;
+
+ va = sva;
+ while (count-- > 0) {
+ PT_SET(va, VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag,
+ FALSE);
+ va += PAGE_SIZE;
+ m++;
+ }
+ /* invalidate will flush the update queue */
+ pmap_invalidate_range(kernel_pmap, sva, va);
+}
+
+/*
+ * This routine tears out page mappings from the
+ * kernel -- it is meant only for temporary mappings.
+ * Note: SMP coherent. Uses a ranged shootdown IPI.
+ */
+void
+pmap_qremove(vm_offset_t sva, int count)
+{
+ vm_offset_t va;
+
+ va = sva;
+ while (count-- > 0) {
+ PT_CLEAR(va, FALSE);
+ va += PAGE_SIZE;
+ }
+ /* invalidate will flush the update queue */
+ pmap_invalidate_range(kernel_pmap, sva, va);
+}
+
+/***************************************************
+ * Page table page management routines.....
+ ***************************************************/
+
+/*
+ * This routine unholds page table pages, and if the hold count
+ * drops to zero, then it decrements the wire count.
+ */
+static PMAP_INLINE int
+pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
+{
+
+ --m->wire_count;
+ if (m->wire_count == 0)
+ return _pmap_unwire_pte_hold(pmap, m);
+ else
+ return 0;
+}
+
+static int
+_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
+{
+ vm_offset_t pteva;
+ /*
+ * unmap the page table page
+ */
+ xpq_queue_unpin_table(pmap->pm_pdir[m->pindex]);
+ PT_CLEAR_VA(&pmap->pm_pdir[m->pindex], TRUE);
+ --pmap->pm_stats.resident_count;
+
+ /*
+ * Do an invltlb to make the invalidated mapping
+ * take effect immediately.
+ */
+ pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex);
+ pmap_invalidate_page(pmap, pteva);
+
+ vm_page_free_zero(m);
+ atomic_subtract_int(&cnt.v_wire_count, 1);
+ return 1;
+}
+
+/*
+ * After removing a page table entry, this routine is used to
+ * conditionally free the page, and manage the hold/wire counts.
+ */
+static int
+pmap_unuse_pt(pmap_t pmap, vm_offset_t va)
+{
+ pd_entry_t ptepde;
+ vm_page_t mpte;
+
+ if (va >= VM_MAXUSER_ADDRESS)
+ return 0;
+ ptepde = PT_GET(pmap_pde(pmap, va));
+ mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
+ return pmap_unwire_pte_hold(pmap, mpte);
+}
+
+void
+pmap_pinit0(pmap)
+ struct pmap *pmap;
+{
+
+ PMAP_LOCK_INIT(pmap);
+ pmap->pm_pdir = (pd_entry_t *)(xen_start_info->pt_base);
+#ifdef PAE
+ pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
+#endif
+ pmap->pm_active = 0;
+ PCPU_SET(curpmap, pmap);
+ TAILQ_INIT(&pmap->pm_pvlist);
+ bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
+ mtx_lock_spin(&allpmaps_lock);
+ LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
+ mtx_unlock_spin(&allpmaps_lock);
+}
+
+/*
+ * Initialize a preallocated and zeroed pmap structure,
+ * such as one in a vmspace structure.
+ */
+void
+pmap_pinit(struct pmap *pmap)
+{
+ vm_page_t m, ptdpg[NPGPTD];
+ vm_paddr_t ma;
+ static int color;
+ int i;
+
+ PMAP_LOCK_INIT(pmap);
+
+ /*
+ * No need to allocate page table space yet but we do need a valid
+ * page directory table.
+ */
+ if (pmap->pm_pdir == NULL) {
+ pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map,
+ NBPTD);
+#ifdef PAE
+ pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO);
+ KASSERT(((vm_offset_t)pmap->pm_pdpt &
+ ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0,
+ ("pmap_pinit: pdpt misaligned"));
+ KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30),
+ ("pmap_pinit: pdpt above 4g"));
+#endif
+ }
+
+ /*
+ * allocate the page directory page(s)
+ */
+ for (i = 0; i < NPGPTD;) {
+ m = vm_page_alloc(NULL, color++,
+ VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
+ VM_ALLOC_ZERO);
+ if (m == NULL)
+ VM_WAIT;
+ else {
+ pmap_zero_page(m);
+ ptdpg[i++] = m;
+ }
+ }
+
+ pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
+
+ for (i = 0; i < NPGPTD; i++) {
+ if ((ptdpg[i]->flags & PG_ZERO) == 0)
+ bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE);
+ }
+
+ mtx_lock_spin(&allpmaps_lock);
+ LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
+ mtx_unlock_spin(&allpmaps_lock);
+ /* Wire in kernel global address entries. */
+ /* XXX copies current process, does not fill in MPPTDI */
+ bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
+#ifdef SMP
+ pmap->pm_pdir[MPPTDI] = PTD[MPPTDI];
+#endif
+
+ /* install self-referential address mapping entry(s) */
+ for (i = 0; i < NPGPTD; i++) {
+ ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[i]));
+ pmap->pm_pdir[PTDPTDI + i] = ma | PG_V | PG_A;
+#ifdef PAE
+ pmap->pm_pdpt[i] = ma | PG_V;
+#endif
+#ifndef PAE
+ PT_SET_MA(pmap->pm_pdir, ma | PG_V | PG_A, TRUE);
+#else
+ panic("FIX ME!");
+#endif
+ xpq_queue_pin_table(ma, XPQ_PIN_L2_TABLE);
+ }
+
+ pmap->pm_active = 0;
+ TAILQ_INIT(&pmap->pm_pvlist);
+ bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
+}
+
+/*
+ * this routine is called if the page table page is not
+ * mapped correctly.
+ */
+static vm_page_t
+_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags)
+{
+ vm_paddr_t ptepa;
+ vm_page_t m;
+
+ KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
+ (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
+ ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
+
+ /*
+ * Allocate a page table page.
+ */
+ if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
+ VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
+ if (flags & M_WAITOK) {
+ PMAP_UNLOCK(pmap);
+ vm_page_unlock_queues();
+ VM_WAIT;
+ vm_page_lock_queues();
+ PMAP_LOCK(pmap);
+ }
+
+ /*
+ * Indicate the need to retry. While waiting, the page table
+ * page may have been allocated.
+ */
+ return (NULL);
+ }
+ if ((m->flags & PG_ZERO) == 0)
+ pmap_zero_page(m);
+
+ /*
+ * Map the pagetable page into the process address space, if
+ * it isn't already there.
+ */
+
+ pmap->pm_stats.resident_count++;
+
+ ptepa = VM_PAGE_TO_PHYS(m);
+ xpq_queue_pin_table(xpmap_ptom(ptepa), XPQ_PIN_L1_TABLE);
+ PT_SET_VA(&pmap->pm_pdir[ptepindex],
+ (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M), TRUE);
+
+ return m;
+}
+
+static vm_page_t
+pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
+{
+ unsigned ptepindex;
+ pd_entry_t ptepa;
+ vm_page_t m;
+
+ KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
+ (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
+ ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
+
+ /*
+ * Calculate pagetable page index
+ */
+ ptepindex = va >> PDRSHIFT;
+retry:
+ /*
+ * Get the page directory entry
+ */
+ ptepa = PT_GET(&pmap->pm_pdir[ptepindex]);
+
+ /*
+ * This supports switching from a 4MB page to a
+ * normal 4K page.
+ */
+ if (ptepa & PG_PS) {
+ pmap->pm_pdir[ptepindex] = 0;
+ ptepa = 0;
+ pmap_invalidate_all(kernel_pmap);
+ }
+
+ /*
+ * If the page table page is mapped, we just increment the
+ * hold count, and activate it.
+ */
+ if (ptepa) {
+ m = PHYS_TO_VM_PAGE(ptepa);
+ m->wire_count++;
+ } else {
+ /*
+ * Here if the pte page isn't mapped, or if it has
+ * been deallocated.
+ */
+ m = _pmap_allocpte(pmap, ptepindex, flags);
+ if (m == NULL && (flags & M_WAITOK))
+ goto retry;
+ }
+ return (m);
+}
+
+
+/***************************************************
+* Pmap allocation/deallocation routines.
+ ***************************************************/
+
+#ifdef SMP
+/*
+ * Deal with a SMP shootdown of other users of the pmap that we are
+ * trying to dispose of. This can be a bit hairy.
+ */
+static u_int *lazymask;
+static u_int lazyptd;
+static volatile u_int lazywait;
+
+void pmap_lazyfix_action(void);
+
+void
+pmap_lazyfix_action(void)
+{
+ u_int mymask = PCPU_GET(cpumask);
+
+ if (PCPU_GET(curpcb)->pcb_cr3 == lazyptd)
+ load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+ atomic_clear_int(lazymask, mymask);
+ atomic_store_rel_int(&lazywait, 1);
+}
+
+static void
+pmap_lazyfix_self(u_int mymask)
+{
+
+ if (PCPU_GET(curpcb)->pcb_cr3 == lazyptd)
+ load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+ atomic_clear_int(lazymask, mymask);
+}
+
+
+static void
+pmap_lazyfix(pmap_t pmap)
+{
+ u_int mymask = PCPU_GET(cpumask);
+ u_int mask;
+ register u_int spins;
+
+ while ((mask = pmap->pm_active) != 0) {
+ spins = 50000000;
+ mask = mask & -mask; /* Find least significant set bit */
+ mtx_lock_spin(&smp_rv_mtx);
+#ifdef PAE
+ lazyptd = vtophys(pmap->pm_pdpt);
+#else
+ lazyptd = vtophys(pmap->pm_pdir);
+#endif
+ if (mask == mymask) {
+ lazymask = &pmap->pm_active;
+ pmap_lazyfix_self(mymask);
+ } else {
+ atomic_store_rel_int((u_int *)&lazymask,
+ (u_int)&pmap->pm_active);
+ atomic_store_rel_int(&lazywait, 0);
+ ipi_selected(mask, IPI_LAZYPMAP);
+ while (lazywait == 0) {
+ ia32_pause();
+ if (--spins == 0)
+ break;
+ }
+ }
+ mtx_unlock_spin(&smp_rv_mtx);
+ if (spins == 0)
+ printf("pmap_lazyfix: spun for 50000000\n");
+ }
+}
+
+#else /* SMP */
+
+/*
+ * Cleaning up on uniprocessor is easy. For various reasons, we're
+ * unlikely to have to even execute this code, including the fact
+ * that the cleanup is deferred until the parent does a wait(2), which
+ * means that another userland process has run.
+ */
+static void
+pmap_lazyfix(pmap_t pmap)
+{
+ u_int cr3;
+
+ cr3 = vtophys(pmap->pm_pdir);
+ if (cr3 == PCPU_GET(curpcb)->pcb_cr3) {
+ load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+ pmap->pm_active &= ~(PCPU_GET(cpumask));
+ }
+}
+#endif /* SMP */
+
+/*
+ * Release any resources held by the given physical map.
+ * Called when a pmap initialized by pmap_pinit is being released.
+ * Should only be called if the map contains no valid mappings.
+ */
+void
+pmap_release(pmap_t pmap)
+{
+ vm_page_t m, ptdpg[NPGPTD];
+ vm_paddr_t ma;
+ int i;
+
+ KASSERT(pmap->pm_stats.resident_count == 0,
+ ("pmap_release: pmap resident count %ld != 0",
+ pmap->pm_stats.resident_count));
+
+ pmap_lazyfix(pmap);
+ mtx_lock_spin(&allpmaps_lock);
+ LIST_REMOVE(pmap, pm_list);
+ mtx_unlock_spin(&allpmaps_lock);
+
+ for (i = 0; i < NPGPTD; i++)
+ ptdpg[i] = PHYS_TO_VM_PAGE(PT_GET(&pmap->pm_pdir[PTDPTDI + i]));
+
+ for (i = 0; i < nkpt + NPGPTD; i++)
+ PT_CLEAR_VA(&pmap->pm_pdir[PTDPTDI + i], FALSE);
+
+ bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) *
+ sizeof(*pmap->pm_pdir));
+#ifdef SMP
+ PT_CLEAR_VA(&pmap->pm_pdir[MPPTDI], FALSE);
+#endif
+ PT_UPDATES_FLUSH();
+ pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
+
+ vm_page_lock_queues();
+ for (i = 0; i < NPGPTD; i++) {
+ m = ptdpg[i];
+
+ ma = xpmap_ptom(VM_PAGE_TO_PHYS(m));
+ xpq_queue_unpin_table(ma);
+ pmap_zero_page(m);
+#ifdef PAE
+ KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME),
+ ("pmap_release: got wrong ptd page"));
+#endif
+ m->wire_count--;
+ atomic_subtract_int(&cnt.v_wire_count, 1);
+
+ vm_page_free_zero(m);
+ }
+ vm_page_unlock_queues();
+ PMAP_LOCK_DESTROY(pmap);
+}
+
+static int
+kvm_size(SYSCTL_HANDLER_ARGS)
+{
+ unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
+
+ return sysctl_handle_long(oidp, &ksize, 0, req);
+}
+SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD,
+ 0, 0, kvm_size, "IU", "Size of KVM");
+
+static int
+kvm_free(SYSCTL_HANDLER_ARGS)
+{
+ unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
+
+ return sysctl_handle_long(oidp, &kfree, 0, req);
+}
+SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD,
+ 0, 0, kvm_free, "IU", "Amount of KVM free");
+
+/*
+ * grow the number of kernel page table entries, if needed
+ */
+void
+pmap_growkernel(vm_offset_t addr)
+{
+ struct pmap *pmap;
+ vm_paddr_t ptppaddr;
+ vm_page_t nkpg;
+ pd_entry_t newpdir;
+ pt_entry_t *pde;
+
+ mtx_assert(&kernel_map->system_mtx, MA_OWNED);
+ if (kernel_vm_end == 0) {
+ kernel_vm_end = KERNBASE;
+ nkpt = 0;
+ while (pdir_pde(PTD, kernel_vm_end)) {
+ kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
+ nkpt++;
+ }
+ }
+ addr = roundup2(addr, PAGE_SIZE * NPTEPG);
+ while (kernel_vm_end < addr) {
+ if (pdir_pde(PTD, kernel_vm_end)) {
+ kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
+ continue;
+ }
+
+ /*
+ * This index is bogus, but out of the way
+ */
+ nkpg = vm_page_alloc(NULL, nkpt,
+ VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
+ if (!nkpg)
+ panic("pmap_growkernel: no memory to grow kernel");
+
+ nkpt++;
+
+ pmap_zero_page(nkpg);
+ ptppaddr = VM_PAGE_TO_PHYS(nkpg);
+ newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
+ PT_SET_VA(&pdir_pde(PTD, kernel_vm_end), newpdir, TRUE);
+
+ mtx_lock_spin(&allpmaps_lock);
+ LIST_FOREACH(pmap, &allpmaps, pm_list) {
+ pde = pmap_pde(pmap, kernel_vm_end);
+ PT_SET_VA(pde, newpdir, FALSE);
+ }
+ PT_UPDATES_FLUSH();
+ mtx_unlock_spin(&allpmaps_lock);
+ kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
+ }
+}
+
+
+/***************************************************
+ * page management routines.
+ ***************************************************/
+
+/*
+ * free the pv_entry back to the free list
+ */
+static PMAP_INLINE void
+free_pv_entry(pv_entry_t pv)
+{
+ pv_entry_count--;
+ uma_zfree(pvzone, pv);
+}
+
+/*
+ * get a new pv_entry, allocating a block from the system
+ * when needed.
+ * the memory allocation is performed bypassing the malloc code
+ * because of the possibility of allocations at interrupt time.
+ */
+static pv_entry_t
+get_pv_entry(void)
+{
+ pv_entry_count++;
+ if (pv_entry_high_water &&
+ (pv_entry_count > pv_entry_high_water) &&
+ (pmap_pagedaemon_waken == 0)) {
+ pmap_pagedaemon_waken = 1;
+ wakeup (&vm_pages_needed);
+ }
+ return uma_zalloc(pvzone, M_NOWAIT);
+}
+
+
+static int
+pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
+{
+ pv_entry_t pv;
+ int rtval;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+ if (pmap == pv->pv_pmap && va == pv->pv_va)
+ break;
+ }
+ } else {
+ TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
+ if (va == pv->pv_va)
+ break;
+ }
+ }
+
+ rtval = 0;
+ if (pv) {
+ rtval = pmap_unuse_pt(pmap, va);
+ TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+ m->md.pv_list_count--;
+ if (TAILQ_FIRST(&m->md.pv_list) == NULL)
+ vm_page_flag_clear(m, PG_WRITEABLE);
+
+ TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
+ free_pv_entry(pv);
+ }
+
+ return rtval;
+}
+
+/*
+ * Create a pv entry for page at pa for
+ * (pmap, va).
+ */
+static void
+pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
+{
+ pv_entry_t pv;
+ pv = get_pv_entry();
+ pv->pv_va = va;
+ pv->pv_pmap = pmap;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
+ TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+ m->md.pv_list_count++;
+}
+
+/*
+ * pmap_remove_pte: do the things to unmap a page in a process
+ */
+static int
+pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va)
+{
+ pt_entry_t oldpte;
+ vm_page_t m;
+
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ oldpte = pte_load_clear(ptq);
+ if (oldpte & PG_W)
+ pmap->pm_stats.wired_count -= 1;
+ /*
+ * Machines that don't support invlpg, also don't support
+ * PG_G.
+ */
+ if (oldpte & PG_G)
+ pmap_invalidate_page(kernel_pmap, va);
+ pmap->pm_stats.resident_count -= 1;
+ if (oldpte & PG_MANAGED) {
+ m = PHYS_TO_VM_PAGE(oldpte);
+ if (oldpte & PG_M) {
+#if defined(PMAP_DIAGNOSTIC)
+ if (pmap_nw_modified((pt_entry_t) oldpte)) {
+ printf(
+ "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
+ va, oldpte);
+ }
+#endif
+ if (pmap_track_modified(va))
+ vm_page_dirty(m);
+ }
+ if (oldpte & PG_A)
+ vm_page_flag_set(m, PG_REFERENCED);
+ return pmap_remove_entry(pmap, m, va);
+ } else {
+ return pmap_unuse_pt(pmap, va);
+ }
+}
+
+/*
+ * Remove a single page from a process address space
+ */
+static void
+pmap_remove_page(pmap_t pmap, vm_offset_t va)
+{
+ pt_entry_t *pte;
+
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0)
+ return;
+ pmap_remove_pte(pmap, pte, va);
+ pmap_invalidate_page(pmap, va);
+}
+
+/*
+ * Remove the given range of addresses from the specified map.
+ *
+ * It is assumed that the start and end are properly
+ * rounded to the page size.
+ */
+void
+pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+ vm_offset_t pdnxt;
+ pd_entry_t ptpaddr;
+ pt_entry_t *pte;
+ int anyvalid;
+
+ /*
+ * Perform an unsynchronized read. This is, however, safe.
+ */
+ if (pmap->pm_stats.resident_count == 0)
+ return;
+
+ anyvalid = 0;
+
+ vm_page_lock_queues();
+ sched_pin();
+ PMAP_LOCK(pmap);
+
+ /*
+ * special handling of removing one page. a very
+ * common operation and easy to short circuit some
+ * code.
+ */
+ if ((sva + PAGE_SIZE == eva) &&
+ ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
+ pmap_remove_page(pmap, sva);
+ goto out;
+ }
+
+ for (; sva < eva; sva = pdnxt) {
+ unsigned pdirindex;
+
+ /*
+ * Calculate index for next page table.
+ */
+ pdnxt = (sva + NBPDR) & ~PDRMASK;
+ if (pmap->pm_stats.resident_count == 0)
+ break;
+
+ pdirindex = sva >> PDRSHIFT;
+ ptpaddr = PT_GET(&pmap->pm_pdir[pdirindex]);
+
+ /*
+ * Weed out invalid mappings. Note: we assume that the page
+ * directory table is always allocated, and in kernel virtual.
+ */
+ if (ptpaddr == 0)
+ continue;
+
+ /*
+ * Check for large page.
+ */
+ if ((ptpaddr & PG_PS) != 0) {
+ PT_CLEAR_VA(pmap->pm_pdir[pdirindex], TRUE);
+ pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
+ anyvalid = 1;
+ continue;
+ }
+
+ /*
+ * Limit our scan to either the end of the va represented
+ * by the current page table page, or to the end of the
+ * range being removed.
+ */
+ if (pdnxt > eva)
+ pdnxt = eva;
+
+ for (; sva != pdnxt; sva += PAGE_SIZE) {
+ if ((pte = pmap_pte_quick(pmap, sva)) == NULL ||
+ *pte == 0)
+ continue;
+ anyvalid = 1;
+ if (pmap_remove_pte(pmap, pte, sva))
+ break;
+ }
+ }
+out:
+ sched_unpin();
+ vm_page_unlock_queues();
+ if (anyvalid)
+ pmap_invalidate_all(pmap);
+ PMAP_UNLOCK(pmap);
+}
+
+/*
+ * Routine: pmap_remove_all
+ * Function:
+ * Removes this physical page from
+ * all physical maps in which it resides.
+ * Reflects back modify bits to the pager.
+ *
+ * Notes:
+ * Original versions of this routine were very
+ * inefficient because they iteratively called
+ * pmap_remove (slow...)
+ */
+
+void
+pmap_remove_all(vm_page_t m)
+{
+ pv_entry_t pv;
+ pt_entry_t *pte, tpte;
+
+#if defined(PMAP_DIAGNOSTIC)
+ /*
+ * XXX This makes pmap_remove_all() illegal for non-managed pages!
+ */
+ if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
+ panic("pmap_remove_all: illegal for unmanaged page, va: 0x%x",
+ VM_PAGE_TO_PHYS(m));
+ }
+#endif
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ sched_pin();
+ while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
+ PMAP_LOCK(pv->pv_pmap);
+ pv->pv_pmap->pm_stats.resident_count--;
+ pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+ tpte = pte_load_clear(pte);
+ if (tpte & PG_W)
+ pv->pv_pmap->pm_stats.wired_count--;
+ if (tpte & PG_A)
+ vm_page_flag_set(m, PG_REFERENCED);
+
+ /*
+ * Update the vm_page_t clean and reference bits.
+ */
+ if (tpte & PG_M) {
+#if defined(PMAP_DIAGNOSTIC)
+ if (pmap_nw_modified((pt_entry_t) tpte)) {
+ printf(
+ "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
+ pv->pv_va, tpte);
+ }
+#endif
+ if (pmap_track_modified(pv->pv_va))
+ vm_page_dirty(m);
+ }
+ pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
+ TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
+ TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+ m->md.pv_list_count--;
+ pmap_unuse_pt(pv->pv_pmap, pv->pv_va);
+ PMAP_UNLOCK(pv->pv_pmap);
+ free_pv_entry(pv);
+ }
+ vm_page_flag_clear(m, PG_WRITEABLE);
+ sched_unpin();
+}
+
+/*
+ * Set the physical protection on the
+ * specified range of this map as requested.
+ */
+void
+pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
+{
+ vm_offset_t pdnxt;
+ pd_entry_t ptpaddr;
+ int anychanged;
+
+ if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
+ pmap_remove(pmap, sva, eva);
+ return;
+ }
+
+ if (prot & VM_PROT_WRITE)
+ return;
+
+ anychanged = 0;
+
+ vm_page_lock_queues();
+ sched_pin();
+ PMAP_LOCK(pmap);
+ for (; sva < eva; sva = pdnxt) {
+ unsigned obits, pbits, pdirindex;
+
+ pdnxt = (sva + NBPDR) & ~PDRMASK;
+
+ pdirindex = sva >> PDRSHIFT;
+ ptpaddr = PT_GET(&pmap->pm_pdir[pdirindex]);
+
+ /*
+ * Weed out invalid mappings. Note: we assume that the page
+ * directory table is always allocated, and in kernel virtual.
+ */
+ if (ptpaddr == 0)
+ continue;
+
+ /*
+ * Check for large page.
+ */
+ if ((ptpaddr & PG_PS) != 0) {
+ pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
+ pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
+ anychanged = 1;
+ continue;
+ }
+
+ if (pdnxt > eva)
+ pdnxt = eva;
+
+ for (; sva != pdnxt; sva += PAGE_SIZE) {
+ pt_entry_t *pte;
+ vm_page_t m;
+
+ if ((pte = pmap_pte_quick(pmap, sva)) == NULL)
+ continue;
+#ifdef notyet
+retry:
+#endif
+ /*
+ * Regardless of whether a pte is 32 or 64 bits in
+ * size, PG_RW, PG_A, and PG_M are among the least
+ * significant 32 bits.
+ */
+ obits = pbits = PT_GET(pte);
+ if (pbits & PG_MANAGED) {
+ m = NULL;
+ if (pbits & PG_A) {
+ m = PHYS_TO_VM_PAGE(pbits);
+ vm_page_flag_set(m, PG_REFERENCED);
+ pbits &= ~PG_A;
+ }
+ if ((pbits & PG_M) != 0 &&
+ pmap_track_modified(sva)) {
+ if (m == NULL)
+ m = PHYS_TO_VM_PAGE(pbits);
+ vm_page_dirty(m);
+ }
+ }
+
+ pbits &= ~(PG_RW | PG_M);
+
+ if (pbits != obits) {
+#ifdef notyet
+ if (!atomic_cmpset_int((u_int *)pte, obits,
+ pbits))
+ goto retry;
+#endif
+ PT_SET_VA(pte, pbits, FALSE);
+ anychanged = 1;
+ }
+ }
+ }
+ sched_unpin();
+ vm_page_unlock_queues();
+ if (anychanged)
+ pmap_invalidate_all(pmap);
+ PMAP_UNLOCK(pmap);
+}
+
+/*
+ * Insert the given physical page (p) at
+ * the specified virtual address (v) in the
+ * target physical map with the protection requested.
+ *
+ * If specified, the page will be wired down, meaning
+ * that the related pte can not be reclaimed.
+ *
+ * NB: This is the only routine which MAY NOT lazy-evaluate
+ * or lose information. That is, this routine must actually
+ * insert this page into the given map NOW.
+ */
+void
+pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
+ boolean_t wired)
+{
+ vm_paddr_t pa;
+ register pt_entry_t *pte;
+ vm_paddr_t opa;
+ pt_entry_t origpte, newpte;
+ vm_page_t mpte, om;
+
+ va &= PG_FRAME;
+#ifdef PMAP_DIAGNOSTIC
+ if (va > VM_MAX_KERNEL_ADDRESS)
+ panic("pmap_enter: toobig");
+ if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
+ panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
+#endif
+
+ mpte = NULL;
+
+ vm_page_lock_queues();
+ PMAP_LOCK(pmap);
+ sched_pin();
+
+ /*
+ * In the case that a page table page is not
+ * resident, we are creating it here.
+ */
+ if (va < VM_MAXUSER_ADDRESS) {
+ mpte = pmap_allocpte(pmap, va, M_WAITOK);
+ }
+#if 0 && defined(PMAP_DIAGNOSTIC)
+ else {
+ pd_entry_t *pdeaddr = pmap_pde(pmap, va);
+ origpte = PT_GET(pdeaddr);
+ if ((origpte & PG_V) == 0) {
+ panic("pmap_enter: invalid kernel page table page, pdir=%p, pde=%p, va=%p\n",
+ pmap->pm_pdir[PTDPTDI], origpte, va);
+ }
+ }
+#endif
+
+ pte = pmap_pte_quick(pmap, va);
+
+ /*
+ * Page Directory table entry not valid, we need a new PT page
+ */
+ if (pte == NULL) {
+ panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x\n",
+ (uintmax_t)pmap->pm_pdir[PTDPTDI], va);
+ }
+
+ pa = VM_PAGE_TO_PHYS(m);
+ om = NULL;
+ origpte = PT_GET(pte);
+ opa = origpte & PG_FRAME;
+
+ if (origpte & PG_PS) {
+ /*
+ * Yes, I know this will truncate upper address bits for PAE,
+ * but I'm actually more interested in the lower bits
+ */
+ printf("pmap_enter: va %p, pte %p, origpte %p\n",
+ (void *)va, (void *)pte, (void *)(uintptr_t)origpte);
+ panic("pmap_enter: attempted pmap_enter on 4MB page");
+ }
+
+ /*
+ * Mapping has not changed, must be protection or wiring change.
+ */
+ if (origpte && (opa == pa)) {
+ /*
+ * Wiring change, just update stats. We don't worry about
+ * wiring PT pages as they remain resident as long as there
+ * are valid mappings in them. Hence, if a user page is wired,
+ * the PT page will be also.
+ */
+ if (wired && ((origpte & PG_W) == 0))
+ pmap->pm_stats.wired_count++;
+ else if (!wired && (origpte & PG_W))
+ pmap->pm_stats.wired_count--;
+
+#if defined(PMAP_DIAGNOSTIC)
+ if (pmap_nw_modified((pt_entry_t) origpte)) {
+ printf(
+ "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
+ va, origpte);
+ }
+#endif
+
+ /*
+ * Remove extra pte reference
+ */
+ if (mpte)
+ mpte->wire_count--;
+
+ /*
+ * We might be turning off write access to the page,
+ * so we go ahead and sense modify status.
+ */
+ if (origpte & PG_MANAGED) {
+ om = m;
+ pa |= PG_MANAGED;
+ }
+ goto validate;
+ }
+ /*
+ * Mapping has changed, invalidate old range and fall through to
+ * handle validating new mapping.
+ */
+ if (opa) {
+ int err;
+ if (origpte & PG_W)
+ pmap->pm_stats.wired_count--;
+ if (origpte & PG_MANAGED) {
+ om = PHYS_TO_VM_PAGE(opa);
+ err = pmap_remove_entry(pmap, om, va);
+ } else
+ err = pmap_unuse_pt(pmap, va);
+ if (err)
+ panic("pmap_enter: pte vanished, va: 0x%x", va);
+ } else
+ pmap->pm_stats.resident_count++;
+
+ /*
+ * Enter on the PV list if part of our managed memory. Note that we
+ * raise IPL while manipulating pv_table since pmap_enter can be
+ * called at interrupt time.
+ */
+ if (pmap_initialized &&
+ (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
+ pmap_insert_entry(pmap, va, m);
+ pa |= PG_MANAGED;
+ }
+
+ /*
+ * Increment counters
+ */
+ if (wired)
+ pmap->pm_stats.wired_count++;
+
+validate:
+ /*
+ * Now validate mapping with desired protection/wiring.
+ */
+ newpte = (pt_entry_t)(pa | PG_V);
+ if ((prot & VM_PROT_WRITE) != 0)
+ newpte |= PG_RW;
+ if (wired)
+ newpte |= PG_W;
+ if (va < VM_MAXUSER_ADDRESS)
+ newpte |= PG_U;
+ if (pmap == kernel_pmap)
+ newpte |= pgeflag;
+
+ /*
+ * if the mapping or permission bits are different, we need
+ * to update the pte.
+ */
+ if ((origpte & ~(PG_M|PG_A)) != newpte) {
+ if (origpte & PG_MANAGED) {
+ origpte = PT_GET(pte);
+ PT_SET_VA(pte, newpte | PG_A, TRUE);
+ if ((origpte & PG_M) && pmap_track_modified(va))
+ vm_page_dirty(om);
+ if (origpte & PG_A)
+ vm_page_flag_set(om, PG_REFERENCED);
+ } else
+ PT_SET_VA(pte, newpte | PG_A, TRUE);
+ if (origpte) {
+ pmap_invalidate_page(pmap, va);
+ }
+ }
+ sched_unpin();
+ vm_page_unlock_queues();
+ PMAP_UNLOCK(pmap);
+}
+
+/*
+ * this code makes some *MAJOR* assumptions:
+ * 1. Current pmap & pmap exists.
+ * 2. Not wired.
+ * 3. Read access.
+ * 4. No page table pages.
+ * 5. Tlbflush is deferred to calling procedure.
+ * 6. Page IS managed.
+ * but is *MUCH* faster than pmap_enter...
+ */
+
+vm_page_t
+pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
+{
+ pt_entry_t *pte;
+ vm_paddr_t pa;
+
+ vm_page_lock_queues();
+ PMAP_LOCK(pmap);
+
+ /*
+ * In the case that a page table page is not
+ * resident, we are creating it here.
+ */
+ if (va < VM_MAXUSER_ADDRESS) {
+ unsigned ptepindex;
+ pd_entry_t ptepa;
+
+ /*
+ * Calculate pagetable page index
+ */
+ ptepindex = va >> PDRSHIFT;
+ if (mpte && (mpte->pindex == ptepindex)) {
+ mpte->wire_count++;
+ } else {
+retry:
+ /*
+ * Get the page directory entry
+ */
+ ptepa = PT_GET(&pmap->pm_pdir[ptepindex]);
+
+ /*
+ * If the page table page is mapped, we just increment
+ * the hold count, and activate it.
+ */
+ if (ptepa) {
+ if (ptepa & PG_PS)
+ panic("pmap_enter_quick: unexpected mapping into 4MB page");
+ mpte = PHYS_TO_VM_PAGE(ptepa);
+ mpte->wire_count++;
+ } else {
+ mpte = _pmap_allocpte(pmap, ptepindex,
+ M_WAITOK);
+ if (mpte == NULL)
+ goto retry;
+ }
+ }
+ } else {
+ mpte = NULL;
+ }
+
+ /*
+ * This call to vtopte makes the assumption that we are
+ * entering the page into the current pmap. In order to support
+ * quick entry into any pmap, one would likely use pmap_pte_quick.
+ * But that isn't as quick as vtopte.
+ */
+ pte = vtopte(va);
+ if (PT_GET(pte)) {
+ if (mpte != NULL) {
+ pmap_unwire_pte_hold(pmap, mpte);
+ mpte = NULL;
+ }
+ goto out;
+ }
+
+ /*
+ * Enter on the PV list if part of our managed memory. Note that we
+ * raise IPL while manipulating pv_table since pmap_enter can be
+ * called at interrupt time.
+ */
+ if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
+ pmap_insert_entry(pmap, va, m);
+
+ /*
+ * Increment counters
+ */
+ pmap->pm_stats.resident_count++;
+
+ pa = VM_PAGE_TO_PHYS(m);
+
+ /*
+ * Now validate mapping with RO protection
+ */
+ if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
+ PT_SET(va, pa | PG_V | PG_U, TRUE);
+ else
+ PT_SET(va, pa | PG_V | PG_U | PG_MANAGED, TRUE);
+out:
+ vm_page_unlock_queues();
+ PMAP_UNLOCK(pmap);
+ return mpte;
+}
+
+/*
+ * Make a temporary mapping for a physical address. This is only intended
+ * to be used for panic dumps.
+ */
+void *
+pmap_kenter_temporary(vm_paddr_t pa, int i)
+{
+ vm_offset_t va;
+
+ va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
+ pmap_kenter(va, pa);
+#ifndef I386_CPU
+ invlpg(va);
+#else
+ invltlb();
+#endif
+ return ((void *)crashdumpmap);
+}
+
+/*
+ * This code maps large physical mmap regions into the
+ * processor address space. Note that some shortcuts
+ * are taken, but the code works.
+ */
+void
+pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
+ vm_object_t object, vm_pindex_t pindex,
+ vm_size_t size)
+{
+ vm_page_t p;
+
+ VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
+ KASSERT(object->type == OBJT_DEVICE,
+ ("pmap_object_init_pt: non-device object"));
+ if (pseflag &&
+ ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) {
+ int i;
+ vm_page_t m[1];
+ unsigned int ptepindex;
+ int npdes;
+ pd_entry_t ptepa;
+
+ PMAP_LOCK(pmap);
+ if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
+ goto out;
+ PMAP_UNLOCK(pmap);
+retry:
+ p = vm_page_lookup(object, pindex);
+ if (p != NULL) {
+ vm_page_lock_queues();
+ if (vm_page_sleep_if_busy(p, FALSE, "init4p"))
+ goto retry;
+ } else {
+ p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
+ if (p == NULL)
+ return;
+ m[0] = p;
+
+ if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
+ vm_page_lock_queues();
+ vm_page_free(p);
+ vm_page_unlock_queues();
+ return;
+ }
+
+ p = vm_page_lookup(object, pindex);
+ vm_page_lock_queues();
+ vm_page_wakeup(p);
+ }
+ vm_page_unlock_queues();
+
+ ptepa = VM_PAGE_TO_PHYS(p);
+ if (ptepa & (NBPDR - 1))
+ return;
+
+ p->valid = VM_PAGE_BITS_ALL;
+
+ PMAP_LOCK(pmap);
+ pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
+ npdes = size >> PDRSHIFT;
+ for(i = 0; i < npdes; i++) {
+ PT_SET_VA(&pmap->pm_pdir[ptepindex],
+ ptepa | PG_U | PG_RW | PG_V | PG_PS, FALSE);
+ ptepa += NBPDR;
+ ptepindex += 1;
+ }
+ pmap_invalidate_all(pmap);
+out:
+ PMAP_UNLOCK(pmap);
+ }
+}
+
+void
+pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len)
+{
+ int i, npages = round_page(len) >> PAGE_SHIFT;
+ for (i = 0; i < npages; i++) {
+ pt_entry_t *pte;
+ pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
+ PT_SET_MA(va + i*PAGE_SIZE, *pte & ~(PG_RW|PG_M), FALSE);
+ PMAP_MARK_PRIV(xpmap_mtop(*pte));
+ pmap_pte_release(pte);
+ }
+ PT_UPDATES_FLUSH();
+}
+
+void
+pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len)
+{
+ int i, npages = round_page(len) >> PAGE_SHIFT;
+ for (i = 0; i < npages; i++) {
+ pt_entry_t *pte;
+ pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
+ PMAP_MARK_UNPRIV(xpmap_mtop(*pte));
+ PT_SET_MA(va + i*PAGE_SIZE, *pte | (PG_RW|PG_M), FALSE);
+ pmap_pte_release(pte);
+ }
+ PT_UPDATES_FLUSH();
+}
+
+/*
+ * Routine: pmap_change_wiring
+ * Function: Change the wiring attribute for a map/virtual-address
+ * pair.
+ * In/out conditions:
+ * The mapping must already exist in the pmap.
+ */
+void
+pmap_change_wiring(pmap, va, wired)
+ register pmap_t pmap;
+ vm_offset_t va;
+ boolean_t wired;
+{
+ register pt_entry_t *pte;
+
+ PMAP_LOCK(pmap);
+ pte = pmap_pte(pmap, va);
+
+ if (wired && !pmap_pte_w(pte))
+ pmap->pm_stats.wired_count++;
+ else if (!wired && pmap_pte_w(pte))
+ pmap->pm_stats.wired_count--;
+
+ /*
+ * Wiring is not a hardware characteristic so there is no need to
+ * invalidate TLB.
+ */
+ pmap_pte_set_w(pte, wired);
+ pmap_pte_release(pte);
+ PMAP_UNLOCK(pmap);
+}
+
+
+
+/*
+ * Copy the range specified by src_addr/len
+ * from the source map to the range dst_addr/len
+ * in the destination map.
+ *
+ * This routine is only advisory and need not do anything.
+ */
+
+void
+pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
+ vm_offset_t src_addr)
+{
+ vm_offset_t addr;
+ vm_offset_t end_addr = src_addr + len;
+ vm_offset_t pdnxt;
+ vm_page_t m;
+
+ if (dst_addr != src_addr)
+ return;
+
+ if (!pmap_is_current(src_pmap))
+ return;
+
+ vm_page_lock_queues();
+ if (dst_pmap < src_pmap) {
+ PMAP_LOCK(dst_pmap);
+ PMAP_LOCK(src_pmap);
+ } else {
+ PMAP_LOCK(src_pmap);
+ PMAP_LOCK(dst_pmap);
+ }
+ sched_pin();
+ for (addr = src_addr; addr < end_addr; addr = pdnxt) {
+ pt_entry_t *src_pte, *dst_pte;
+ vm_page_t dstmpte, srcmpte;
+ pd_entry_t srcptepaddr;
+ unsigned ptepindex;
+
+ if (addr >= UPT_MIN_ADDRESS)
+ panic("pmap_copy: invalid to pmap_copy page tables");
+
+ /*
+ * Don't let optional prefaulting of pages make us go
+ * way below the low water mark of free pages or way
+ * above high water mark of used pv entries.
+ */
+ if (cnt.v_free_count < cnt.v_free_reserved ||
+ pv_entry_count > pv_entry_high_water)
+ break;
+
+ pdnxt = (addr + NBPDR) & ~PDRMASK;
+ ptepindex = addr >> PDRSHIFT;
+
+ srcptepaddr = PT_GET(&src_pmap->pm_pdir[ptepindex]);
+ if (srcptepaddr == 0)
+ continue;
+
+ if (srcptepaddr & PG_PS) {
+ if (dst_pmap->pm_pdir[ptepindex] == 0) {
+ PT_SET_VA(&dst_pmap->pm_pdir[ptepindex], srcptepaddr, TRUE);
+ dst_pmap->pm_stats.resident_count +=
+ NBPDR / PAGE_SIZE;
+ }
+ continue;
+ }
+
+ srcmpte = PHYS_TO_VM_PAGE(srcptepaddr);
+ if (srcmpte->wire_count == 0)
+ panic("pmap_copy: source page table page is unused");
+
+ if (pdnxt > end_addr)
+ pdnxt = end_addr;
+
+ src_pte = vtopte(addr);
+ while (addr < pdnxt) {
+ pt_entry_t ptetemp;
+ ptetemp = PT_GET(src_pte);
+ /*
+ * we only virtual copy managed pages
+ */
+ if ((ptetemp & PG_MANAGED) != 0) {
+ /*
+ * We have to check after allocpte for the
+ * pte still being around... allocpte can
+ * block.
+ */
+ dstmpte = pmap_allocpte(dst_pmap, addr,
+ M_NOWAIT);
+ if (dstmpte == NULL)
+ break;
+ dst_pte = pmap_pte_quick(dst_pmap, addr);
+ if (*dst_pte == 0) {
+ /*
+ * Clear the modified and
+ * accessed (referenced) bits
+ * during the copy.
+ */
+ m = PHYS_TO_VM_PAGE(ptetemp);
+ PT_SET_VA(dst_pte, ptetemp & ~(PG_M | PG_A), FALSE);
+ dst_pmap->pm_stats.resident_count++;
+ pmap_insert_entry(dst_pmap, addr, m);
+ } else
+ pmap_unwire_pte_hold(dst_pmap, dstmpte);
+ if (dstmpte->wire_count >= srcmpte->wire_count)
+ break;
+ }
+ addr += PAGE_SIZE;
+ src_pte++;
+ }
+ }
+ PT_UPDATES_FLUSH();
+ sched_unpin();
+ vm_page_unlock_queues();
+ PMAP_UNLOCK(src_pmap);
+ PMAP_UNLOCK(dst_pmap);
+}
+
+static __inline void
+pagezero(void *page)
+{
+#if defined(I686_CPU)
+ if (cpu_class == CPUCLASS_686) {
+#if defined(CPU_ENABLE_SSE)
+ if (cpu_feature & CPUID_SSE2)
+ sse2_pagezero(page);
+ else
+#endif
+ i686_pagezero(page);
+ } else
+#endif
+ bzero(page, PAGE_SIZE);
+}
+
+/*
+ * pmap_zero_page zeros the specified hardware page by mapping
+ * the page into KVM and using bzero to clear its contents.
+ */
+void
+pmap_zero_page(vm_page_t m)
+{
+
+ mtx_lock(&CMAPCADDR12_lock);
+ if (*CMAP2)
+ panic("pmap_zero_page: CMAP2 busy");
+ sched_pin();
+ PT_SET_VA(CMAP2, PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M, FALSE);
+ invlcaddr(CADDR2);
+ pagezero(CADDR2);
+ PT_CLEAR_VA(CMAP2, TRUE);
+ sched_unpin();
+ mtx_unlock(&CMAPCADDR12_lock);
+}
+
+/*
+ * pmap_zero_page_area zeros the specified hardware page by mapping
+ * the page into KVM and using bzero to clear its contents.
+ *
+ * off and size may not cover an area beyond a single hardware page.
+ */
+void
+pmap_zero_page_area(vm_page_t m, int off, int size)
+{
+
+ mtx_lock(&CMAPCADDR12_lock);
+ if (*CMAP2)
+ panic("pmap_zero_page: CMAP2 busy");
+ sched_pin();
+ PT_SET_VA(CMAP2, PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M, FALSE);
+ invlcaddr(CADDR2);
+ if (off == 0 && size == PAGE_SIZE)
+ pagezero(CADDR2);
+ else
+ bzero((char *)CADDR2 + off, size);
+ PT_CLEAR_VA(CMAP2, TRUE);
+ sched_unpin();
+ mtx_unlock(&CMAPCADDR12_lock);
+}
+
+/*
+ * pmap_zero_page_idle zeros the specified hardware page by mapping
+ * the page into KVM and using bzero to clear its contents. This
+ * is intended to be called from the vm_pagezero process only and
+ * outside of Giant.
+ */
+void
+pmap_zero_page_idle(vm_page_t m)
+{
+
+ if (*CMAP3)
+ panic("pmap_zero_page: CMAP3 busy");
+ sched_pin();
+ PT_SET_VA(CMAP3, PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M, TRUE);
+ invlcaddr(CADDR3);
+ pagezero(CADDR3);
+ PT_CLEAR_VA(CMAP3, TRUE);
+ sched_unpin();
+}
+
+/*
+ * pmap_copy_page copies the specified (machine independent)
+ * page by mapping the page into virtual memory and using
+ * bcopy to copy the page, one machine dependent page at a
+ * time.
+ */
+void
+pmap_copy_page(vm_page_t src, vm_page_t dst)
+{
+
+ mtx_lock(&CMAPCADDR12_lock);
+ if (*CMAP1)
+ panic("pmap_copy_page: CMAP1 busy");
+ if (*CMAP2)
+ panic("pmap_copy_page: CMAP2 busy");
+ sched_pin();
+#ifdef I386_CPU
+ invltlb();
+#else
+ invlpg((u_int)CADDR1);
+ invlpg((u_int)CADDR2);
+#endif
+ PT_SET_VA(CMAP1, PG_V | VM_PAGE_TO_PHYS(src) | PG_A, FALSE);
+ PT_SET_VA(CMAP2, PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M, TRUE);
+
+ bcopy(CADDR1, CADDR2, PAGE_SIZE);
+ PT_CLEAR_VA(CMAP1, FALSE);
+ PT_CLEAR_VA(CMAP2, TRUE);
+ sched_unpin();
+ mtx_unlock(&CMAPCADDR12_lock);
+}
+
+/*
+ * Returns true if the pmap's pv is one of the first
+ * 16 pvs linked to from this page. This count may
+ * be changed upwards or downwards in the future; it
+ * is only necessary that true be returned for a small
+ * subset of pmaps for proper page aging.
+ */
+boolean_t
+pmap_page_exists_quick(pmap, m)
+ pmap_t pmap;
+ vm_page_t m;
+{
+ pv_entry_t pv;
+ int loops = 0;
+
+ if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
+ return FALSE;
+
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+ if (pv->pv_pmap == pmap) {
+ return TRUE;
+ }
+ loops++;
+ if (loops >= 16)
+ break;
+ }
+ return (FALSE);
+}
+
+#define PMAP_REMOVE_PAGES_CURPROC_ONLY
+/*
+ * Remove all pages from specified address space
+ * this aids process exit speeds. Also, this code
+ * is special cased for current process only, but
+ * can have the more generic (and slightly slower)
+ * mode enabled. This is much faster than pmap_remove
+ * in the case of running down an entire address space.
+ */
+void
+pmap_remove_pages(pmap, sva, eva)
+ pmap_t pmap;
+ vm_offset_t sva, eva;
+{
+ pt_entry_t *pte, tpte;
+ vm_page_t m;
+ pv_entry_t pv, npv;
+
+#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
+ if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
+ printf("warning: pmap_remove_pages called with non-current pmap\n");
+ return;
+ }
+#endif
+ vm_page_lock_queues();
+ PMAP_LOCK(pmap);
+ sched_pin();
+
+ for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
+ if (pv->pv_va >= eva || pv->pv_va < sva) {
+ npv = TAILQ_NEXT(pv, pv_plist);
+ continue;
+ }
+
+#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
+ pte = vtopte(pv->pv_va);
+#else
+ pte = pmap_pte_quick(pmap, pv->pv_va);
+#endif
+ tpte = PT_GET(pte);
+
+ if (tpte == 0) {
+ printf("TPTE at %p IS ZERO @ VA %08x\n",
+ pte, pv->pv_va);
+ panic("bad pte");
+ }
+
+/*
+ * We cannot remove wired pages from a process' mapping at this time
+ */
+ if (tpte & PG_W) {
+ npv = TAILQ_NEXT(pv, pv_plist);
+ continue;
+ }
+
+ m = PHYS_TO_VM_PAGE(tpte);
+ KASSERT(m->phys_addr == (tpte & PG_FRAME),
+ ("vm_page_t %p phys_addr mismatch %016jx %016jx",
+ m, (uintmax_t)m->phys_addr, (uintmax_t)tpte));
+
+ KASSERT(m < &vm_page_array[vm_page_array_size],
+ ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte));
+
+ pmap->pm_stats.resident_count--;
+
+ pte_clear(pte);
+
+ /*
+ * Update the vm_page_t clean and reference bits.
+ */
+ if (tpte & PG_M) {
+ vm_page_dirty(m);
+ }
+
+ npv = TAILQ_NEXT(pv, pv_plist);
+ TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
+
+ m->md.pv_list_count--;
+ TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+ if (TAILQ_EMPTY(&m->md.pv_list))
+ vm_page_flag_clear(m, PG_WRITEABLE);
+
+ pmap_unuse_pt(pmap, pv->pv_va);
+ free_pv_entry(pv);
+ }
+ sched_unpin();
+ pmap_invalidate_all(pmap);
+ PMAP_UNLOCK(pmap);
+ vm_page_unlock_queues();
+}
+
+/*
+ * pmap_is_modified:
+ *
+ * Return whether or not the specified physical page was modified
+ * in any physical maps.
+ */
+boolean_t
+pmap_is_modified(vm_page_t m)
+{
+ pv_entry_t pv;
+ pt_entry_t *pte;
+ boolean_t rv;
+
+ rv = FALSE;
+ if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
+ return (rv);
+
+ sched_pin();
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+ /*
+ * if the bit being tested is the modified bit, then
+ * mark clean_map and ptes as never
+ * modified.
+ */
+ if (!pmap_track_modified(pv->pv_va))
+ continue;
+#if defined(PMAP_DIAGNOSTIC)
+ if (!pv->pv_pmap) {
+ printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
+ continue;
+ }
+#endif
+ PMAP_LOCK(pv->pv_pmap);
+ pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+ rv = (*pte & PG_M) != 0;
+ PMAP_UNLOCK(pv->pv_pmap);
+ if (rv)
+ break;
+ }
+ sched_unpin();
+ return (rv);
+}
+
+/*
+ * pmap_is_prefaultable:
+ *
+ * Return whether or not the specified virtual address is elgible
+ * for prefault.
+ */
+boolean_t
+pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
+{
+ pt_entry_t *pte;
+ boolean_t rv;
+
+ rv = FALSE;
+
+ return (rv);
+ PMAP_LOCK(pmap);
+ if (pmap_pde(pmap, addr)) {
+ pte = vtopte(addr);
+ rv = *pte == 0;
+ }
+ PMAP_UNLOCK(pmap);
+ return (rv);
+}
+
+/*
+ * Clear the given bit in each of the given page's ptes. The bit is
+ * expressed as a 32-bit mask. Consequently, if the pte is 64 bits in
+ * size, only a bit within the least significant 32 can be cleared.
+ */
+static __inline void
+pmap_clear_ptes(vm_page_t m, int bit)
+{
+ register pv_entry_t pv;
+ pt_entry_t pbits, *pte;
+
+ if (!pmap_initialized || (m->flags & PG_FICTITIOUS) ||
+ (bit == PG_RW && (m->flags & PG_WRITEABLE) == 0))
+ return;
+
+ sched_pin();
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ /*
+ * Loop over all current mappings setting/clearing as appropos If
+ * setting RO do we need to clear the VAC?
+ */
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+ /*
+ * don't write protect pager mappings
+ */
+ if (bit == PG_RW) {
+ if (!pmap_track_modified(pv->pv_va))
+ continue;
+ }
+
+#if defined(PMAP_DIAGNOSTIC)
+ if (!pv->pv_pmap) {
+ printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
+ continue;
+ }
+#endif
+
+ PMAP_LOCK(pv->pv_pmap);
+ pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+#ifdef notyet
+retry:
+#endif
+ pbits = PT_GET(pte);
+ if (pbits & bit) {
+ if (bit == PG_RW) {
+ /*
+ * Regardless of whether a pte is 32 or 64 bits
+ * in size, PG_RW and PG_M are among the least
+ * significant 32 bits.
+ */
+#ifdef notyet
+ if (!atomic_cmpset_int((u_int *)pte, pbits,
+ pbits & ~(PG_RW | PG_M)))
+ goto retry;
+#endif
+ PT_SET_VA(pte, pbits & ~(PG_M|PG_RW), TRUE);
+
+
+ if (pbits & PG_M) {
+ vm_page_dirty(m);
+ }
+ } else {
+#ifdef notyet
+ atomic_clear_int((u_int *)pte, bit);
+#endif
+ /* XXX */
+ PT_SET_VA(pte, pbits & ~bit, TRUE);
+ }
+ pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
+ }
+ PMAP_UNLOCK(pv->pv_pmap);
+ }
+ if (bit == PG_RW)
+ vm_page_flag_clear(m, PG_WRITEABLE);
+ sched_unpin();
+}
+
+/*
+ * pmap_page_protect:
+ *
+ * Lower the permission for all mappings to a given page.
+ */
+void
+pmap_page_protect(vm_page_t m, vm_prot_t prot)
+{
+ if ((prot & VM_PROT_WRITE) == 0) {
+ if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
+ pmap_clear_ptes(m, PG_RW);
+ } else {
+ pmap_remove_all(m);
+ }
+ }
+}
+
+/*
+ * pmap_ts_referenced:
+ *
+ * Return a count of reference bits for a page, clearing those bits.
+ * It is not necessary for every reference bit to be cleared, but it
+ * is necessary that 0 only be returned when there are truly no
+ * reference bits set.
+ *
+ * XXX: The exact number of bits to check and clear is a matter that
+ * should be tested and standardized at some point in the future for
+ * optimal aging of shared pages.
+ */
+int
+pmap_ts_referenced(vm_page_t m)
+{
+ register pv_entry_t pv, pvf, pvn;
+ pt_entry_t *pte;
+ pt_entry_t v;
+ int rtval = 0;
+
+ if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
+ return (rtval);
+
+ sched_pin();
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
+
+ pvf = pv;
+
+ do {
+ pvn = TAILQ_NEXT(pv, pv_list);
+
+ TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+
+ TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+
+ if (!pmap_track_modified(pv->pv_va))
+ continue;
+
+ PMAP_LOCK(pv->pv_pmap);
+ pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+
+ if (pte && ((v = PT_GET(pte)) & PG_A) != 0) {
+#ifdef notyet
+ atomic_clear_int((u_int *)pte, PG_A);
+#endif
+ PT_SET_VA(pte, v & ~PG_A, FALSE);
+ pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
+
+ rtval++;
+ if (rtval > 4) {
+ PMAP_UNLOCK(pv->pv_pmap);
+ break;
+ }
+ }
+ PMAP_UNLOCK(pv->pv_pmap);
+ } while ((pv = pvn) != NULL && pv != pvf);
+ }
+ sched_unpin();
+
+ return (rtval);
+}
+
+/*
+ * Clear the modify bits on the specified physical page.
+ */
+void
+pmap_clear_modify(vm_page_t m)
+{
+ pmap_clear_ptes(m, PG_M);
+}
+
+/*
+ * pmap_clear_reference:
+ *
+ * Clear the reference bit on the specified physical page.
+ */
+void
+pmap_clear_reference(vm_page_t m)
+{
+ pmap_clear_ptes(m, PG_A);
+}
+
+/*
+ * Miscellaneous support routines follow
+ */
+
+/*
+ * Map a set of physical memory pages into the kernel virtual
+ * address space. Return a pointer to where it is mapped. This
+ * routine is intended to be used for mapping device memory,
+ * NOT real memory.
+ */
+void *
+pmap_mapdev(pa, size)
+ vm_paddr_t pa;
+ vm_size_t size;
+{
+ vm_offset_t va, tmpva, offset;
+
+ offset = pa & PAGE_MASK;
+ size = roundup(offset + size, PAGE_SIZE);
+ pa = pa & PG_FRAME;
+
+ if (pa < KERNLOAD && pa + size <= KERNLOAD)
+ va = KERNBASE + pa;
+ else
+ va = kmem_alloc_nofault(kernel_map, size);
+ if (!va)
+ panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
+
+ for (tmpva = va; size > 0; ) {
+ PT_SET(tmpva, pa | PG_RW | PG_V | pgeflag, FALSE);
+ size -= PAGE_SIZE;
+ tmpva += PAGE_SIZE;
+ pa += PAGE_SIZE;
+ }
+ pmap_invalidate_range(kernel_pmap, va, tmpva);
+ return ((void *)(va + offset));
+}
+
+void
+pmap_unmapdev(va, size)
+ vm_offset_t va;
+ vm_size_t size;
+{
+ vm_offset_t base, offset, tmpva;
+ panic("unused");
+ if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD)
+ return;
+ base = va & PG_FRAME;
+ offset = va & PAGE_MASK;
+ size = roundup(offset + size, PAGE_SIZE);
+ for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE)
+ PT_CLEAR(tmpva, FALSE);
+ pmap_invalidate_range(kernel_pmap, va, tmpva);
+ kmem_free(kernel_map, base, size);
+}
+
+/*
+ * perform the pmap work for mincore
+ */
+int
+pmap_mincore(pmap, addr)
+ pmap_t pmap;
+ vm_offset_t addr;
+{
+ pt_entry_t *ptep, pte;
+ vm_page_t m;
+ int val = 0;
+
+ PMAP_LOCK(pmap);
+ ptep = pmap_pte(pmap, addr);
+ pte = (ptep != NULL) ? PT_GET(ptep) : 0;
+ pmap_pte_release(ptep);
+ PMAP_UNLOCK(pmap);
+
+ if (pte != 0) {
+ vm_paddr_t pa;
+
+ val = MINCORE_INCORE;
+ if ((pte & PG_MANAGED) == 0)
+ return val;
+
+ pa = pte & PG_FRAME;
+
+ m = PHYS_TO_VM_PAGE(pa);
+
+ /*
+ * Modified by us
+ */
+ if (pte & PG_M)
+ val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
+ else {
+ /*
+ * Modified by someone else
+ */
+ vm_page_lock_queues();
+ if (m->dirty || pmap_is_modified(m))
+ val |= MINCORE_MODIFIED_OTHER;
+ vm_page_unlock_queues();
+ }
+ /*
+ * Referenced by us
+ */
+ if (pte & PG_A)
+ val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
+ else {
+ /*
+ * Referenced by someone else
+ */
+ vm_page_lock_queues();
+ if ((m->flags & PG_REFERENCED) ||
+ pmap_ts_referenced(m)) {
+ val |= MINCORE_REFERENCED_OTHER;
+ vm_page_flag_set(m, PG_REFERENCED);
+ }
+ vm_page_unlock_queues();
+ }
+ }
+ return val;
+}
+
+void
+pmap_activate(struct thread *td)
+{
+ struct proc *p = td->td_proc;
+ pmap_t pmap, oldpmap;
+ u_int32_t cr3;
+
+ critical_enter();
+ pmap = vmspace_pmap(td->td_proc->p_vmspace);
+ oldpmap = PCPU_GET(curpmap);
+#if defined(SMP)
+ atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
+ atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
+#else
+ oldpmap->pm_active &= ~1;
+ pmap->pm_active |= 1;
+#endif
+#ifdef PAE
+ cr3 = vtophys(pmap->pm_pdpt);
+#else
+ cr3 = vtophys(pmap->pm_pdir);
+#endif
+ /* XXXKSE this is wrong.
+ * pmap_activate is for the current thread on the current cpu
+ */
+ if (p->p_flag & P_SA) {
+ /* Make sure all other cr3 entries are updated. */
+ /* what if they are running? XXXKSE (maybe abort them) */
+ FOREACH_THREAD_IN_PROC(p, td) {
+ td->td_pcb->pcb_cr3 = cr3;
+ }
+ } else {
+ td->td_pcb->pcb_cr3 = cr3;
+ }
+ load_cr3(cr3);
+ PCPU_SET(curpmap, pmap);
+ critical_exit();
+}
+
+vm_offset_t
+pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
+{
+
+ if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
+ return addr;
+ }
+
+ addr = (addr + PDRMASK) & ~PDRMASK;
+ return addr;
+}
+
+
+#if defined(PMAP_DEBUG)
+extern int init_first;
+void
+pmap_ref(pt_entry_t *pte, unsigned long ma)
+{
+ int ind, i, count;
+ unsigned long ebp_prev, eip_prev, oma = 0;
+ unsigned long pa = xpmap_mtop(ma);
+
+ /* are we to the point where mappings are set up? */
+ if (!init_first)
+ return;
+
+ ind = pa >> PAGE_SHIFT;
+ /* privileged? */
+ if ((pa & PG_RW) && pteinfo_list[ind].pt_ref & (1 << 31))
+ BKPT;
+
+ /* is MA already mapped ? */
+ oma = *pte;
+
+ /* old reference being lost */
+ if (oma && (oma & PG_RW) && ((oma & PG_FRAME) != (ma & PG_FRAME)))
+ pmap_dec_ref(oma);
+
+ /* ignore RO mappings - unless were downgrading */
+ if (!(ma & PG_RW)) {
+ /* downgrading mapping - lose reference */
+ if (((oma & PG_FRAME) == (ma & PG_FRAME)) &&
+ (oma & PG_RW))
+ pmap_dec_ref(ma);
+ return;
+ }
+
+ if (pteinfo_list[ind].pt_ref < 0)
+ BKPT;
+
+
+ /* same address and not upgrading the mapping */
+ if (((oma & PG_FRAME) == (ma & PG_FRAME)) &&
+ (oma & PG_RW))
+ return;
+
+ count = pteinfo_list[ind].pt_ref;
+ __asm__("movl %%ebp, %0" : "=r" (ebp_prev));
+ for (i = 0; i < XPQ_CALL_DEPTH && ebp_prev > KERNBASE; i++) {
+ __asm__("movl 4(%1), %0" : "=r" (eip_prev) : "r" (ebp_prev));
+ pteinfo_list[ind].pt_eip[count%XPQ_CALL_COUNT][i] = eip_prev;
+ __asm__("movl (%1), %0" : "=r" (ebp_prev) : "r" (ebp_prev));
+ }
+
+ pteinfo_list[ind].pt_ref++;
+
+}
+
+void
+pmap_dec_ref(unsigned long ma)
+{
+ unsigned long pa;
+ int ind, count;
+
+ if (!ma) BKPT;
+
+ pa = xpmap_mtop(ma);
+
+ ind = pa >> PAGE_SHIFT;
+ if (pteinfo_list[ind].pt_ref & (1 << 31)) BKPT;
+
+ count = pteinfo_list[ind].pt_ref & ~(1 << 31);
+ if (count < 1) {
+ printk("ma: %lx has ref count of 0\n", ma);
+ BKPT;
+ }
+ pteinfo_list[ind].pt_ref = (--count | (pteinfo_list[ind].pt_ref & (1 << 31)));
+
+}
+
+void
+pmap_dec_ref_page(vm_page_t m)
+{
+ unsigned long *pt;
+ int i;
+ mtx_lock(&CMAPCADDR12_lock);
+ if (*CMAP2)
+ panic("pmap_zero_page: CMAP2 busy");
+ sched_pin();
+ PT_SET_VA(CMAP2, PG_V | VM_PAGE_TO_PHYS(m) | PG_A | PG_M, FALSE);
+ invlcaddr(CADDR2);
+ pt = (unsigned long *)CADDR2;
+ for (i = 0; i < 1024; i++)
+ if (pt[i] & PG_RW)
+ pmap_dec_ref(xpmap_ptom(pt[i]));
+ PT_CLEAR_VA(CMAP2, TRUE);
+ sched_unpin();
+ mtx_unlock(&CMAPCADDR12_lock);
+}
+
+void
+pmap_mark_privileged(unsigned long pa)
+{
+ int ind = pa >> PAGE_SHIFT;
+
+ if (pteinfo_list[ind].pt_ref & (1 << 31)) BKPT;
+ if ((pteinfo_list[ind].pt_ref & ~(1 << 31)) > 0) BKPT;
+
+ pteinfo_list[ind].pt_ref |= (1 << 31);
+
+}
+
+void
+pmap_mark_unprivileged(unsigned long pa)
+{
+ int ind = pa >> PAGE_SHIFT;
+
+ if (pteinfo_list[ind].pt_ref != (1 << 31)) BKPT;
+
+ pteinfo_list[ind].pt_ref &= ~(1 << 31);
+
+}
+
+
+int
+pmap_pid_dump(int pid)
+{
+ pmap_t pmap;
+ struct proc *p;
+ int npte = 0;
+ int index;
+
+ sx_slock(&allproc_lock);
+ LIST_FOREACH(p, &allproc, p_list) {
+ if (p->p_pid != pid)
+ continue;
+
+ if (p->p_vmspace) {
+ int i,j;
+ index = 0;
+ pmap = vmspace_pmap(p->p_vmspace);
+ for (i = 0; i < NPDEPTD; i++) {
+ pd_entry_t *pde;
+ pt_entry_t *pte;
+ vm_offset_t base = i << PDRSHIFT;
+
+ pde = &pmap->pm_pdir[i];
+ if (pde && pmap_pde_v(pde)) {
+ for (j = 0; j < NPTEPG; j++) {
+ vm_offset_t va = base + (j << PAGE_SHIFT);
+ if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
+ if (index) {
+ index = 0;
+ printf("\n");
+ }
+ sx_sunlock(&allproc_lock);
+ return npte;
+ }
+ pte = pmap_pte(pmap, va);
+ if (pte && pmap_pte_v(pte)) {
+ pt_entry_t pa;
+ vm_page_t m;
+ pa = PT_GET(pte);
+ m = PHYS_TO_VM_PAGE(pa);
+ printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
+ va, pa, m->hold_count, m->wire_count, m->flags);
+ npte++;
+ index++;
+ if (index >= 2) {
+ index = 0;
+ printf("\n");
+ } else {
+ printf(" ");
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ sx_sunlock(&allproc_lock);
+ return npte;
+}
+#endif /* PMAP_DEBUG */
+
+#if defined(DEBUG)
+
+static void pads(pmap_t pm);
+void pmap_pvdump(vm_offset_t pa);
+
+/* print address space of pmap*/
+static void
+pads(pm)
+ pmap_t pm;
+{
+ int i, j;
+ vm_paddr_t va;
+ pt_entry_t *ptep;
+
+ if (pm == kernel_pmap)
+ return;
+ for (i = 0; i < NPDEPTD; i++)
+ if (pm->pm_pdir[i])
+ for (j = 0; j < NPTEPG; j++) {
+ va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
+ if (pm == kernel_pmap && va < KERNBASE)
+ continue;
+ if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
+ continue;
+ ptep = pmap_pte(pm, va);
+ if (pmap_pte_v(ptep))
+ printf("%x:%x ", va, *ptep);
+ };
+
+}
+
+void
+pmap_pvdump(pa)
+ vm_paddr_t pa;
+{
+ pv_entry_t pv;
+ vm_page_t m;
+
+ printf("pa %x", pa);
+ m = PHYS_TO_VM_PAGE(pa);
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+ printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
+ pads(pv->pv_pmap);
+ }
+ printf(" ");
+}
+#endif
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/support.s b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/support.s
new file mode 100644
index 0000000000..deb4a94859
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/support.s
@@ -0,0 +1,1553 @@
+/*-
+ * Copyright (c) 1993 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/i386/support.s,v 1.100 2003/11/03 21:28:54 jhb Exp $
+ */
+
+#include "opt_npx.h"
+
+#include <machine/asmacros.h>
+#include <machine/cputypes.h>
+#include <machine/intr_machdep.h>
+#include <machine/pmap.h>
+#include <machine/specialreg.h>
+
+#include "assym.s"
+
+#define IDXSHIFT 10
+
+ .data
+ .globl bcopy_vector
+bcopy_vector:
+ .long generic_bcopy
+ .globl bzero_vector
+bzero_vector:
+ .long generic_bzero
+ .globl copyin_vector
+copyin_vector:
+ .long generic_copyin
+ .globl copyout_vector
+copyout_vector:
+ .long generic_copyout
+#if defined(I586_CPU) && defined(DEV_NPX)
+kernel_fpu_lock:
+ .byte 0xfe
+ .space 3
+#endif
+ ALIGN_DATA
+ .globl intrcnt, eintrcnt
+intrcnt:
+ .space INTRCNT_COUNT * 4
+eintrcnt:
+
+ .globl intrnames, eintrnames
+intrnames:
+ .space INTRCNT_COUNT * (MAXCOMLEN + 1)
+eintrnames:
+
+ .text
+
+/*
+ * bcopy family
+ * void bzero(void *buf, u_int len)
+ */
+
+ENTRY(bzero)
+ MEXITCOUNT
+ jmp *bzero_vector
+
+ENTRY(generic_bzero)
+ pushl %edi
+ movl 8(%esp),%edi
+ movl 12(%esp),%ecx
+ xorl %eax,%eax
+ shrl $2,%ecx
+ cld
+ rep
+ stosl
+ movl 12(%esp),%ecx
+ andl $3,%ecx
+ rep
+ stosb
+ popl %edi
+ ret
+
+#ifdef I486_CPU
+ENTRY(i486_bzero)
+ movl 4(%esp),%edx
+ movl 8(%esp),%ecx
+ xorl %eax,%eax
+/*
+ * do 64 byte chunks first
+ *
+ * XXX this is probably over-unrolled at least for DX2's
+ */
+2:
+ cmpl $64,%ecx
+ jb 3f
+ movl %eax,(%edx)
+ movl %eax,4(%edx)
+ movl %eax,8(%edx)
+ movl %eax,12(%edx)
+ movl %eax,16(%edx)
+ movl %eax,20(%edx)
+ movl %eax,24(%edx)
+ movl %eax,28(%edx)
+ movl %eax,32(%edx)
+ movl %eax,36(%edx)
+ movl %eax,40(%edx)
+ movl %eax,44(%edx)
+ movl %eax,48(%edx)
+ movl %eax,52(%edx)
+ movl %eax,56(%edx)
+ movl %eax,60(%edx)
+ addl $64,%edx
+ subl $64,%ecx
+ jnz 2b
+ ret
+
+/*
+ * do 16 byte chunks
+ */
+ SUPERALIGN_TEXT
+3:
+ cmpl $16,%ecx
+ jb 4f
+ movl %eax,(%edx)
+ movl %eax,4(%edx)
+ movl %eax,8(%edx)
+ movl %eax,12(%edx)
+ addl $16,%edx
+ subl $16,%ecx
+ jnz 3b
+ ret
+
+/*
+ * do 4 byte chunks
+ */
+ SUPERALIGN_TEXT
+4:
+ cmpl $4,%ecx
+ jb 5f
+ movl %eax,(%edx)
+ addl $4,%edx
+ subl $4,%ecx
+ jnz 4b
+ ret
+
+/*
+ * do 1 byte chunks
+ * a jump table seems to be faster than a loop or more range reductions
+ *
+ * XXX need a const section for non-text
+ */
+ .data
+jtab:
+ .long do0
+ .long do1
+ .long do2
+ .long do3
+
+ .text
+ SUPERALIGN_TEXT
+5:
+ jmp *jtab(,%ecx,4)
+
+ SUPERALIGN_TEXT
+do3:
+ movw %ax,(%edx)
+ movb %al,2(%edx)
+ ret
+
+ SUPERALIGN_TEXT
+do2:
+ movw %ax,(%edx)
+ ret
+
+ SUPERALIGN_TEXT
+do1:
+ movb %al,(%edx)
+ ret
+
+ SUPERALIGN_TEXT
+do0:
+ ret
+#endif
+
+#if defined(I586_CPU) && defined(DEV_NPX)
+ENTRY(i586_bzero)
+ movl 4(%esp),%edx
+ movl 8(%esp),%ecx
+
+ /*
+ * The FPU register method is twice as fast as the integer register
+ * method unless the target is in the L1 cache and we pre-allocate a
+ * cache line for it (then the integer register method is 4-5 times
+ * faster). However, we never pre-allocate cache lines, since that
+ * would make the integer method 25% or more slower for the common
+ * case when the target isn't in either the L1 cache or the L2 cache.
+ * Thus we normally use the FPU register method unless the overhead
+ * would be too large.
+ */
+ cmpl $256,%ecx /* empirical; clts, fninit, smsw cost a lot */
+ jb intreg_i586_bzero
+
+ /*
+ * The FPU registers may belong to an application or to fastmove()
+ * or to another invocation of bcopy() or ourself in a higher level
+ * interrupt or trap handler. Preserving the registers is
+ * complicated since we avoid it if possible at all levels. We
+ * want to localize the complications even when that increases them.
+ * Here the extra work involves preserving CR0_TS in TS.
+ * `fpcurthread != NULL' is supposed to be the condition that all the
+ * FPU resources belong to an application, but fpcurthread and CR0_TS
+ * aren't set atomically enough for this condition to work in
+ * interrupt handlers.
+ *
+ * Case 1: FPU registers belong to the application: we must preserve
+ * the registers if we use them, so we only use the FPU register
+ * method if the target size is large enough to amortize the extra
+ * overhead for preserving them. CR0_TS must be preserved although
+ * it is very likely to end up as set.
+ *
+ * Case 2: FPU registers belong to fastmove(): fastmove() currently
+ * makes the registers look like they belong to an application so
+ * that cpu_switch() and savectx() don't have to know about it, so
+ * this case reduces to case 1.
+ *
+ * Case 3: FPU registers belong to the kernel: don't use the FPU
+ * register method. This case is unlikely, and supporting it would
+ * be more complicated and might take too much stack.
+ *
+ * Case 4: FPU registers don't belong to anyone: the FPU registers
+ * don't need to be preserved, so we always use the FPU register
+ * method. CR0_TS must be preserved although it is very likely to
+ * always end up as clear.
+ */
+ cmpl $0,PCPU(FPCURTHREAD)
+ je i586_bz1
+
+ /*
+ * XXX don't use the FPU for cases 1 and 2, since preemptive
+ * scheduling of ithreads broke these cases. Note that we can
+ * no longer get here from an interrupt handler, since the
+ * context sitch to the interrupt handler will have saved the
+ * FPU state.
+ */
+ jmp intreg_i586_bzero
+
+ cmpl $256+184,%ecx /* empirical; not quite 2*108 more */
+ jb intreg_i586_bzero
+ sarb $1,kernel_fpu_lock
+ jc intreg_i586_bzero
+ smsw %ax
+ clts
+ subl $108,%esp
+ fnsave 0(%esp)
+ jmp i586_bz2
+
+i586_bz1:
+ sarb $1,kernel_fpu_lock
+ jc intreg_i586_bzero
+ smsw %ax
+ clts
+ fninit /* XXX should avoid needing this */
+i586_bz2:
+ fldz
+
+ /*
+ * Align to an 8 byte boundary (misalignment in the main loop would
+ * cost a factor of >= 2). Avoid jumps (at little cost if it is
+ * already aligned) by always zeroing 8 bytes and using the part up
+ * to the _next_ alignment position.
+ */
+ fstl 0(%edx)
+ addl %edx,%ecx /* part of %ecx -= new_%edx - %edx */
+ addl $8,%edx
+ andl $~7,%edx
+ subl %edx,%ecx
+
+ /*
+ * Similarly align `len' to a multiple of 8.
+ */
+ fstl -8(%edx,%ecx)
+ decl %ecx
+ andl $~7,%ecx
+
+ /*
+ * This wouldn't be any faster if it were unrolled, since the loop
+ * control instructions are much faster than the fstl and/or done
+ * in parallel with it so their overhead is insignificant.
+ */
+fpureg_i586_bzero_loop:
+ fstl 0(%edx)
+ addl $8,%edx
+ subl $8,%ecx
+ cmpl $8,%ecx
+ jae fpureg_i586_bzero_loop
+
+ cmpl $0,PCPU(FPCURTHREAD)
+ je i586_bz3
+
+ /* XXX check that the condition for cases 1-2 stayed false. */
+i586_bzero_oops:
+ int $3
+ jmp i586_bzero_oops
+
+ frstor 0(%esp)
+ addl $108,%esp
+ lmsw %ax
+ movb $0xfe,kernel_fpu_lock
+ ret
+
+i586_bz3:
+ fstp %st(0)
+ lmsw %ax
+ movb $0xfe,kernel_fpu_lock
+ ret
+
+intreg_i586_bzero:
+ /*
+ * `rep stos' seems to be the best method in practice for small
+ * counts. Fancy methods usually take too long to start up due
+ * to cache and BTB misses.
+ */
+ pushl %edi
+ movl %edx,%edi
+ xorl %eax,%eax
+ shrl $2,%ecx
+ cld
+ rep
+ stosl
+ movl 12(%esp),%ecx
+ andl $3,%ecx
+ jne 1f
+ popl %edi
+ ret
+
+1:
+ rep
+ stosb
+ popl %edi
+ ret
+#endif /* I586_CPU && defined(DEV_NPX) */
+
+ENTRY(sse2_pagezero)
+ pushl %ebx
+ movl 8(%esp),%ecx
+ movl %ecx,%eax
+ addl $4096,%eax
+ xor %ebx,%ebx
+1:
+ movnti %ebx,(%ecx)
+ addl $4,%ecx
+ cmpl %ecx,%eax
+ jne 1b
+ sfence
+ popl %ebx
+ ret
+
+ENTRY(i686_pagezero)
+ pushl %edi
+ pushl %ebx
+
+ movl 12(%esp), %edi
+ movl $1024, %ecx
+ cld
+
+ ALIGN_TEXT
+1:
+ xorl %eax, %eax
+ repe
+ scasl
+ jnz 2f
+
+ popl %ebx
+ popl %edi
+ ret
+
+ ALIGN_TEXT
+
+2:
+ incl %ecx
+ subl $4, %edi
+
+ movl %ecx, %edx
+ cmpl $16, %ecx
+
+ jge 3f
+
+ movl %edi, %ebx
+ andl $0x3f, %ebx
+ shrl %ebx
+ shrl %ebx
+ movl $16, %ecx
+ subl %ebx, %ecx
+
+3:
+ subl %ecx, %edx
+ rep
+ stosl
+
+ movl %edx, %ecx
+ testl %edx, %edx
+ jnz 1b
+
+ popl %ebx
+ popl %edi
+ ret
+
+/* fillw(pat, base, cnt) */
+ENTRY(fillw)
+ pushl %edi
+ movl 8(%esp),%eax
+ movl 12(%esp),%edi
+ movl 16(%esp),%ecx
+ cld
+ rep
+ stosw
+ popl %edi
+ ret
+
+ENTRY(bcopyb)
+ pushl %esi
+ pushl %edi
+ movl 12(%esp),%esi
+ movl 16(%esp),%edi
+ movl 20(%esp),%ecx
+ movl %edi,%eax
+ subl %esi,%eax
+ cmpl %ecx,%eax /* overlapping && src < dst? */
+ jb 1f
+ cld /* nope, copy forwards */
+ rep
+ movsb
+ popl %edi
+ popl %esi
+ ret
+
+ ALIGN_TEXT
+1:
+ addl %ecx,%edi /* copy backwards. */
+ addl %ecx,%esi
+ decl %edi
+ decl %esi
+ std
+ rep
+ movsb
+ popl %edi
+ popl %esi
+ cld
+ ret
+
+ENTRY(bcopy)
+ MEXITCOUNT
+ jmp *bcopy_vector
+
+/*
+ * generic_bcopy(src, dst, cnt)
+ * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
+ */
+ENTRY(generic_bcopy)
+ pushl %esi
+ pushl %edi
+ movl 12(%esp),%esi
+ movl 16(%esp),%edi
+ movl 20(%esp),%ecx
+
+ movl %edi,%eax
+ subl %esi,%eax
+ cmpl %ecx,%eax /* overlapping && src < dst? */
+ jb 1f
+
+ shrl $2,%ecx /* copy by 32-bit words */
+ cld /* nope, copy forwards */
+ rep
+ movsl
+ movl 20(%esp),%ecx
+ andl $3,%ecx /* any bytes left? */
+ rep
+ movsb
+ popl %edi
+ popl %esi
+ ret
+
+ ALIGN_TEXT
+1:
+ addl %ecx,%edi /* copy backwards */
+ addl %ecx,%esi
+ decl %edi
+ decl %esi
+ andl $3,%ecx /* any fractional bytes? */
+ std
+ rep
+ movsb
+ movl 20(%esp),%ecx /* copy remainder by 32-bit words */
+ shrl $2,%ecx
+ subl $3,%esi
+ subl $3,%edi
+ rep
+ movsl
+ popl %edi
+ popl %esi
+ cld
+ ret
+
+#if defined(I586_CPU) && defined(DEV_NPX)
+ENTRY(i586_bcopy)
+ pushl %esi
+ pushl %edi
+ movl 12(%esp),%esi
+ movl 16(%esp),%edi
+ movl 20(%esp),%ecx
+
+ movl %edi,%eax
+ subl %esi,%eax
+ cmpl %ecx,%eax /* overlapping && src < dst? */
+ jb 1f
+
+ cmpl $1024,%ecx
+ jb small_i586_bcopy
+
+ sarb $1,kernel_fpu_lock
+ jc small_i586_bcopy
+ cmpl $0,PCPU(FPCURTHREAD)
+ je i586_bc1
+
+ /* XXX turn off handling of cases 1-2, as above. */
+ movb $0xfe,kernel_fpu_lock
+ jmp small_i586_bcopy
+
+ smsw %dx
+ clts
+ subl $108,%esp
+ fnsave 0(%esp)
+ jmp 4f
+
+i586_bc1:
+ smsw %dx
+ clts
+ fninit /* XXX should avoid needing this */
+
+ ALIGN_TEXT
+4:
+ pushl %ecx
+#define DCACHE_SIZE 8192
+ cmpl $(DCACHE_SIZE-512)/2,%ecx
+ jbe 2f
+ movl $(DCACHE_SIZE-512)/2,%ecx
+2:
+ subl %ecx,0(%esp)
+ cmpl $256,%ecx
+ jb 5f /* XXX should prefetch if %ecx >= 32 */
+ pushl %esi
+ pushl %ecx
+ ALIGN_TEXT
+3:
+ movl 0(%esi),%eax
+ movl 32(%esi),%eax
+ movl 64(%esi),%eax
+ movl 96(%esi),%eax
+ movl 128(%esi),%eax
+ movl 160(%esi),%eax
+ movl 192(%esi),%eax
+ movl 224(%esi),%eax
+ addl $256,%esi
+ subl $256,%ecx
+ cmpl $256,%ecx
+ jae 3b
+ popl %ecx
+ popl %esi
+5:
+ ALIGN_TEXT
+large_i586_bcopy_loop:
+ fildq 0(%esi)
+ fildq 8(%esi)
+ fildq 16(%esi)
+ fildq 24(%esi)
+ fildq 32(%esi)
+ fildq 40(%esi)
+ fildq 48(%esi)
+ fildq 56(%esi)
+ fistpq 56(%edi)
+ fistpq 48(%edi)
+ fistpq 40(%edi)
+ fistpq 32(%edi)
+ fistpq 24(%edi)
+ fistpq 16(%edi)
+ fistpq 8(%edi)
+ fistpq 0(%edi)
+ addl $64,%esi
+ addl $64,%edi
+ subl $64,%ecx
+ cmpl $64,%ecx
+ jae large_i586_bcopy_loop
+ popl %eax
+ addl %eax,%ecx
+ cmpl $64,%ecx
+ jae 4b
+
+ cmpl $0,PCPU(FPCURTHREAD)
+ je i586_bc2
+
+ /* XXX check that the condition for cases 1-2 stayed false. */
+i586_bcopy_oops:
+ int $3
+ jmp i586_bcopy_oops
+
+ frstor 0(%esp)
+ addl $108,%esp
+i586_bc2:
+ lmsw %dx
+ movb $0xfe,kernel_fpu_lock
+
+/*
+ * This is a duplicate of the main part of generic_bcopy. See the comments
+ * there. Jumping into generic_bcopy would cost a whole 0-1 cycles and
+ * would mess up high resolution profiling.
+ */
+ ALIGN_TEXT
+small_i586_bcopy:
+ shrl $2,%ecx
+ cld
+ rep
+ movsl
+ movl 20(%esp),%ecx
+ andl $3,%ecx
+ rep
+ movsb
+ popl %edi
+ popl %esi
+ ret
+
+ ALIGN_TEXT
+1:
+ addl %ecx,%edi
+ addl %ecx,%esi
+ decl %edi
+ decl %esi
+ andl $3,%ecx
+ std
+ rep
+ movsb
+ movl 20(%esp),%ecx
+ shrl $2,%ecx
+ subl $3,%esi
+ subl $3,%edi
+ rep
+ movsl
+ popl %edi
+ popl %esi
+ cld
+ ret
+#endif /* I586_CPU && defined(DEV_NPX) */
+
+/*
+ * Note: memcpy does not support overlapping copies
+ */
+ENTRY(memcpy)
+ pushl %edi
+ pushl %esi
+ movl 12(%esp),%edi
+ movl 16(%esp),%esi
+ movl 20(%esp),%ecx
+ movl %edi,%eax
+ shrl $2,%ecx /* copy by 32-bit words */
+ cld /* nope, copy forwards */
+ rep
+ movsl
+ movl 20(%esp),%ecx
+ andl $3,%ecx /* any bytes left? */
+ rep
+ movsb
+ popl %esi
+ popl %edi
+ ret
+
+
+/*****************************************************************************/
+/* copyout and fubyte family */
+/*****************************************************************************/
+/*
+ * Access user memory from inside the kernel. These routines and possibly
+ * the math- and DOS emulators should be the only places that do this.
+ *
+ * We have to access the memory with user's permissions, so use a segment
+ * selector with RPL 3. For writes to user space we have to additionally
+ * check the PTE for write permission, because the 386 does not check
+ * write permissions when we are executing with EPL 0. The 486 does check
+ * this if the WP bit is set in CR0, so we can use a simpler version here.
+ *
+ * These routines set curpcb->onfault for the time they execute. When a
+ * protection violation occurs inside the functions, the trap handler
+ * returns to *curpcb->onfault instead of the function.
+ */
+
+/*
+ * copyout(from_kernel, to_user, len) - MP SAFE (if not I386_CPU)
+ */
+ENTRY(copyout)
+ MEXITCOUNT
+ jmp *copyout_vector
+
+ENTRY(generic_copyout)
+ movl PCPU(CURPCB),%eax
+ movl $copyout_fault,PCB_ONFAULT(%eax)
+ pushl %esi
+ pushl %edi
+ pushl %ebx
+ movl 16(%esp),%esi
+ movl 20(%esp),%edi
+ movl 24(%esp),%ebx
+ testl %ebx,%ebx /* anything to do? */
+ jz done_copyout
+
+ /*
+ * Check explicitly for non-user addresses. If 486 write protection
+ * is being used, this check is essential because we are in kernel
+ * mode so the h/w does not provide any protection against writing
+ * kernel addresses.
+ */
+
+ /*
+ * First, prevent address wrapping.
+ */
+ movl %edi,%eax
+ addl %ebx,%eax
+ jc copyout_fault
+/*
+ * XXX STOP USING VM_MAXUSER_ADDRESS.
+ * It is an end address, not a max, so every time it is used correctly it
+ * looks like there is an off by one error, and of course it caused an off
+ * by one error in several places.
+ */
+ cmpl $VM_MAXUSER_ADDRESS,%eax
+ ja copyout_fault
+
+ /* bcopy(%esi, %edi, %ebx) */
+ movl %ebx,%ecx
+
+#if defined(I586_CPU) && defined(DEV_NPX)
+ ALIGN_TEXT
+slow_copyout:
+#endif
+ shrl $2,%ecx
+ cld
+ rep
+ movsl
+ movb %bl,%cl
+ andb $3,%cl
+ rep
+ movsb
+
+done_copyout:
+ popl %ebx
+ popl %edi
+ popl %esi
+ xorl %eax,%eax
+ movl PCPU(CURPCB),%edx
+ movl %eax,PCB_ONFAULT(%edx)
+ ret
+
+ ALIGN_TEXT
+copyout_fault:
+ popl %ebx
+ popl %edi
+ popl %esi
+ movl PCPU(CURPCB),%edx
+ movl $0,PCB_ONFAULT(%edx)
+ movl $EFAULT,%eax
+ ret
+
+#if defined(I586_CPU) && defined(DEV_NPX)
+ENTRY(i586_copyout)
+ /*
+ * Duplicated from generic_copyout. Could be done a bit better.
+ */
+ movl PCPU(CURPCB),%eax
+ movl $copyout_fault,PCB_ONFAULT(%eax)
+ pushl %esi
+ pushl %edi
+ pushl %ebx
+ movl 16(%esp),%esi
+ movl 20(%esp),%edi
+ movl 24(%esp),%ebx
+ testl %ebx,%ebx /* anything to do? */
+ jz done_copyout
+
+ /*
+ * Check explicitly for non-user addresses. If 486 write protection
+ * is being used, this check is essential because we are in kernel
+ * mode so the h/w does not provide any protection against writing
+ * kernel addresses.
+ */
+
+ /*
+ * First, prevent address wrapping.
+ */
+ movl %edi,%eax
+ addl %ebx,%eax
+ jc copyout_fault
+/*
+ * XXX STOP USING VM_MAXUSER_ADDRESS.
+ * It is an end address, not a max, so every time it is used correctly it
+ * looks like there is an off by one error, and of course it caused an off
+ * by one error in several places.
+ */
+ cmpl $VM_MAXUSER_ADDRESS,%eax
+ ja copyout_fault
+
+ /* bcopy(%esi, %edi, %ebx) */
+3:
+ movl %ebx,%ecx
+ /*
+ * End of duplicated code.
+ */
+
+ cmpl $1024,%ecx
+ jb slow_copyout
+
+ pushl %ecx
+ call fastmove
+ addl $4,%esp
+ jmp done_copyout
+#endif /* I586_CPU && defined(DEV_NPX) */
+
+/*
+ * copyin(from_user, to_kernel, len) - MP SAFE
+ */
+ENTRY(copyin)
+ MEXITCOUNT
+ jmp *copyin_vector
+
+ENTRY(generic_copyin)
+ movl PCPU(CURPCB),%eax
+ movl $copyin_fault,PCB_ONFAULT(%eax)
+ pushl %esi
+ pushl %edi
+ movl 12(%esp),%esi /* caddr_t from */
+ movl 16(%esp),%edi /* caddr_t to */
+ movl 20(%esp),%ecx /* size_t len */
+
+ /*
+ * make sure address is valid
+ */
+ movl %esi,%edx
+ addl %ecx,%edx
+ jc copyin_fault
+ cmpl $VM_MAXUSER_ADDRESS,%edx
+ ja copyin_fault
+
+#if defined(I586_CPU) && defined(DEV_NPX)
+ ALIGN_TEXT
+slow_copyin:
+#endif
+ movb %cl,%al
+ shrl $2,%ecx /* copy longword-wise */
+ cld
+ rep
+ movsl
+ movb %al,%cl
+ andb $3,%cl /* copy remaining bytes */
+ rep
+ movsb
+
+#if defined(I586_CPU) && defined(DEV_NPX)
+ ALIGN_TEXT
+done_copyin:
+#endif
+ popl %edi
+ popl %esi
+ xorl %eax,%eax
+ movl PCPU(CURPCB),%edx
+ movl %eax,PCB_ONFAULT(%edx)
+ ret
+
+ ALIGN_TEXT
+copyin_fault:
+ popl %edi
+ popl %esi
+ movl PCPU(CURPCB),%edx
+ movl $0,PCB_ONFAULT(%edx)
+ movl $EFAULT,%eax
+ ret
+
+#if defined(I586_CPU) && defined(DEV_NPX)
+ENTRY(i586_copyin)
+ /*
+ * Duplicated from generic_copyin. Could be done a bit better.
+ */
+ movl PCPU(CURPCB),%eax
+ movl $copyin_fault,PCB_ONFAULT(%eax)
+ pushl %esi
+ pushl %edi
+ movl 12(%esp),%esi /* caddr_t from */
+ movl 16(%esp),%edi /* caddr_t to */
+ movl 20(%esp),%ecx /* size_t len */
+
+ /*
+ * make sure address is valid
+ */
+ movl %esi,%edx
+ addl %ecx,%edx
+ jc copyin_fault
+ cmpl $VM_MAXUSER_ADDRESS,%edx
+ ja copyin_fault
+ /*
+ * End of duplicated code.
+ */
+
+ cmpl $1024,%ecx
+ jb slow_copyin
+
+ pushl %ebx /* XXX prepare for fastmove_fault */
+ pushl %ecx
+ call fastmove
+ addl $8,%esp
+ jmp done_copyin
+#endif /* I586_CPU && defined(DEV_NPX) */
+
+#if defined(I586_CPU) && defined(DEV_NPX)
+/* fastmove(src, dst, len)
+ src in %esi
+ dst in %edi
+ len in %ecx XXX changed to on stack for profiling
+ uses %eax and %edx for tmp. storage
+ */
+/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */
+ENTRY(fastmove)
+ pushl %ebp
+ movl %esp,%ebp
+ subl $PCB_SAVEFPU_SIZE+3*4,%esp
+
+ movl 8(%ebp),%ecx
+ cmpl $63,%ecx
+ jbe fastmove_tail
+
+ testl $7,%esi /* check if src addr is multiple of 8 */
+ jnz fastmove_tail
+
+ testl $7,%edi /* check if dst addr is multiple of 8 */
+ jnz fastmove_tail
+
+ /* XXX grab FPU context atomically. */
+ call ni_cli
+
+/* if (fpcurthread != NULL) { */
+ cmpl $0,PCPU(FPCURTHREAD)
+ je 6f
+/* fnsave(&curpcb->pcb_savefpu); */
+ movl PCPU(CURPCB),%eax
+ fnsave PCB_SAVEFPU(%eax)
+/* FPCURTHREAD = NULL; */
+ movl $0,PCPU(FPCURTHREAD)
+/* } */
+6:
+/* now we own the FPU. */
+
+/*
+ * The process' FP state is saved in the pcb, but if we get
+ * switched, the cpu_switch() will store our FP state in the
+ * pcb. It should be possible to avoid all the copying for
+ * this, e.g., by setting a flag to tell cpu_switch() to
+ * save the state somewhere else.
+ */
+/* tmp = curpcb->pcb_savefpu; */
+ movl %ecx,-12(%ebp)
+ movl %esi,-8(%ebp)
+ movl %edi,-4(%ebp)
+ movl %esp,%edi
+ movl PCPU(CURPCB),%esi
+ addl $PCB_SAVEFPU,%esi
+ cld
+ movl $PCB_SAVEFPU_SIZE>>2,%ecx
+ rep
+ movsl
+ movl -12(%ebp),%ecx
+ movl -8(%ebp),%esi
+ movl -4(%ebp),%edi
+/* stop_emulating(); */
+ clts
+/* fpcurthread = curthread; */
+ movl PCPU(CURTHREAD),%eax
+ movl %eax,PCPU(FPCURTHREAD)
+ movl PCPU(CURPCB),%eax
+
+ /* XXX end of atomic FPU context grab. */
+ call ni_sti
+
+ movl $fastmove_fault,PCB_ONFAULT(%eax)
+4:
+ movl %ecx,-12(%ebp)
+ cmpl $1792,%ecx
+ jbe 2f
+ movl $1792,%ecx
+2:
+ subl %ecx,-12(%ebp)
+ cmpl $256,%ecx
+ jb 5f
+ movl %ecx,-8(%ebp)
+ movl %esi,-4(%ebp)
+ ALIGN_TEXT
+3:
+ movl 0(%esi),%eax
+ movl 32(%esi),%eax
+ movl 64(%esi),%eax
+ movl 96(%esi),%eax
+ movl 128(%esi),%eax
+ movl 160(%esi),%eax
+ movl 192(%esi),%eax
+ movl 224(%esi),%eax
+ addl $256,%esi
+ subl $256,%ecx
+ cmpl $256,%ecx
+ jae 3b
+ movl -8(%ebp),%ecx
+ movl -4(%ebp),%esi
+5:
+ ALIGN_TEXT
+fastmove_loop:
+ fildq 0(%esi)
+ fildq 8(%esi)
+ fildq 16(%esi)
+ fildq 24(%esi)
+ fildq 32(%esi)
+ fildq 40(%esi)
+ fildq 48(%esi)
+ fildq 56(%esi)
+ fistpq 56(%edi)
+ fistpq 48(%edi)
+ fistpq 40(%edi)
+ fistpq 32(%edi)
+ fistpq 24(%edi)
+ fistpq 16(%edi)
+ fistpq 8(%edi)
+ fistpq 0(%edi)
+ addl $-64,%ecx
+ addl $64,%esi
+ addl $64,%edi
+ cmpl $63,%ecx
+ ja fastmove_loop
+ movl -12(%ebp),%eax
+ addl %eax,%ecx
+ cmpl $64,%ecx
+ jae 4b
+
+ /* XXX ungrab FPU context atomically. */
+ call ni_cli
+
+/* curpcb->pcb_savefpu = tmp; */
+ movl %ecx,-12(%ebp)
+ movl %esi,-8(%ebp)
+ movl %edi,-4(%ebp)
+ movl PCPU(CURPCB),%edi
+ addl $PCB_SAVEFPU,%edi
+ movl %esp,%esi
+ cld
+ movl $PCB_SAVEFPU_SIZE>>2,%ecx
+ rep
+ movsl
+ movl -12(%ebp),%ecx
+ movl -8(%ebp),%esi
+ movl -4(%ebp),%edi
+
+/* start_emulating(); */
+ smsw %ax
+ orb $CR0_TS,%al
+ lmsw %ax
+/* fpcurthread = NULL; */
+ movl $0,PCPU(FPCURTHREAD)
+
+ /* XXX end of atomic FPU context ungrab. */
+ call ni_sti
+
+ ALIGN_TEXT
+fastmove_tail:
+ movl PCPU(CURPCB),%eax
+ movl $fastmove_tail_fault,PCB_ONFAULT(%eax)
+
+ movb %cl,%al
+ shrl $2,%ecx /* copy longword-wise */
+ cld
+ rep
+ movsl
+ movb %al,%cl
+ andb $3,%cl /* copy remaining bytes */
+ rep
+ movsb
+
+ movl %ebp,%esp
+ popl %ebp
+ ret
+
+ ALIGN_TEXT
+fastmove_fault:
+ /* XXX ungrab FPU context atomically. */
+ call ni_cli
+
+ movl PCPU(CURPCB),%edi
+ addl $PCB_SAVEFPU,%edi
+ movl %esp,%esi
+ cld
+ movl $PCB_SAVEFPU_SIZE>>2,%ecx
+ rep
+ movsl
+
+ smsw %ax
+ orb $CR0_TS,%al
+ lmsw %ax
+ movl $0,PCPU(FPCURTHREAD)
+
+ /* XXX end of atomic FPU context ungrab. */
+ call ni_sti
+
+fastmove_tail_fault:
+ movl %ebp,%esp
+ popl %ebp
+ addl $8,%esp
+ popl %ebx
+ popl %edi
+ popl %esi
+ movl PCPU(CURPCB),%edx
+ movl $0,PCB_ONFAULT(%edx)
+ movl $EFAULT,%eax
+ ret
+#endif /* I586_CPU && defined(DEV_NPX) */
+
+/*
+ * casuptr. Compare and set user pointer. Returns -1 or the current value.
+ */
+ENTRY(casuptr)
+ movl PCPU(CURPCB),%ecx
+ movl $fusufault,PCB_ONFAULT(%ecx)
+ movl 4(%esp),%edx /* dst */
+ movl 8(%esp),%eax /* old */
+ movl 12(%esp),%ecx /* new */
+
+ cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */
+ ja fusufault
+
+#ifdef SMP
+ lock
+#endif
+ cmpxchgl %ecx, (%edx) /* Compare and set. */
+
+ /*
+ * The old value is in %eax. If the store succeeded it will be the
+ * value we expected (old) from before the store, otherwise it will
+ * be the current value.
+ */
+
+ movl PCPU(CURPCB),%ecx
+ movl $fusufault,PCB_ONFAULT(%ecx)
+ movl $0,PCB_ONFAULT(%ecx)
+ ret
+
+/*
+ * fu{byte,sword,word} - MP SAFE
+ *
+ * Fetch a byte (sword, word) from user memory
+ */
+ENTRY(fuword)
+ movl PCPU(CURPCB),%ecx
+ movl $fusufault,PCB_ONFAULT(%ecx)
+ movl 4(%esp),%edx /* from */
+
+ cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */
+ ja fusufault
+
+ movl (%edx),%eax
+ movl $0,PCB_ONFAULT(%ecx)
+ ret
+
+ENTRY(fuword32)
+ jmp fuword
+
+/*
+ * These two routines are called from the profiling code, potentially
+ * at interrupt time. If they fail, that's okay, good things will
+ * happen later. Fail all the time for now - until the trap code is
+ * able to deal with this.
+ */
+ALTENTRY(suswintr)
+ENTRY(fuswintr)
+ movl $-1,%eax
+ ret
+
+/*
+ * fuword16 - MP SAFE
+ */
+ENTRY(fuword16)
+ movl PCPU(CURPCB),%ecx
+ movl $fusufault,PCB_ONFAULT(%ecx)
+ movl 4(%esp),%edx
+
+ cmpl $VM_MAXUSER_ADDRESS-2,%edx
+ ja fusufault
+
+ movzwl (%edx),%eax
+ movl $0,PCB_ONFAULT(%ecx)
+ ret
+
+/*
+ * fubyte - MP SAFE
+ */
+ENTRY(fubyte)
+ movl PCPU(CURPCB),%ecx
+ movl $fusufault,PCB_ONFAULT(%ecx)
+ movl 4(%esp),%edx
+
+ cmpl $VM_MAXUSER_ADDRESS-1,%edx
+ ja fusufault
+
+ movzbl (%edx),%eax
+ movl $0,PCB_ONFAULT(%ecx)
+ ret
+
+ ALIGN_TEXT
+fusufault:
+ movl PCPU(CURPCB),%ecx
+ xorl %eax,%eax
+ movl %eax,PCB_ONFAULT(%ecx)
+ decl %eax
+ ret
+
+/*
+ * su{byte,sword,word} - MP SAFE (if not I386_CPU)
+ *
+ * Write a byte (word, longword) to user memory
+ */
+ENTRY(suword)
+ movl PCPU(CURPCB),%ecx
+ movl $fusufault,PCB_ONFAULT(%ecx)
+ movl 4(%esp),%edx
+
+ cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */
+ ja fusufault
+
+ movl 8(%esp),%eax
+ movl %eax,(%edx)
+ xorl %eax,%eax
+ movl PCPU(CURPCB),%ecx
+ movl %eax,PCB_ONFAULT(%ecx)
+ ret
+
+ENTRY(suword32)
+ jmp suword
+
+/*
+ * suword16 - MP SAFE (if not I386_CPU)
+ */
+ENTRY(suword16)
+ movl PCPU(CURPCB),%ecx
+ movl $fusufault,PCB_ONFAULT(%ecx)
+ movl 4(%esp),%edx
+
+ cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */
+ ja fusufault
+
+ movw 8(%esp),%ax
+ movw %ax,(%edx)
+ xorl %eax,%eax
+ movl PCPU(CURPCB),%ecx /* restore trashed register */
+ movl %eax,PCB_ONFAULT(%ecx)
+ ret
+
+/*
+ * subyte - MP SAFE (if not I386_CPU)
+ */
+ENTRY(subyte)
+ movl PCPU(CURPCB),%ecx
+ movl $fusufault,PCB_ONFAULT(%ecx)
+ movl 4(%esp),%edx
+
+ cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */
+ ja fusufault
+
+ movb 8(%esp),%al
+ movb %al,(%edx)
+ xorl %eax,%eax
+ movl PCPU(CURPCB),%ecx /* restore trashed register */
+ movl %eax,PCB_ONFAULT(%ecx)
+ ret
+
+/*
+ * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
+ *
+ * copy a string from from to to, stop when a 0 character is reached.
+ * return ENAMETOOLONG if string is longer than maxlen, and
+ * EFAULT on protection violations. If lencopied is non-zero,
+ * return the actual length in *lencopied.
+ */
+ENTRY(copyinstr)
+ pushl %esi
+ pushl %edi
+ movl PCPU(CURPCB),%ecx
+ movl $cpystrflt,PCB_ONFAULT(%ecx)
+
+ movl 12(%esp),%esi /* %esi = from */
+ movl 16(%esp),%edi /* %edi = to */
+ movl 20(%esp),%edx /* %edx = maxlen */
+
+ movl $VM_MAXUSER_ADDRESS,%eax
+
+ /* make sure 'from' is within bounds */
+ subl %esi,%eax
+ jbe cpystrflt
+
+ /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
+ cmpl %edx,%eax
+ jae 1f
+ movl %eax,%edx
+ movl %eax,20(%esp)
+1:
+ incl %edx
+ cld
+
+2:
+ decl %edx
+ jz 3f
+
+ lodsb
+ stosb
+ orb %al,%al
+ jnz 2b
+
+ /* Success -- 0 byte reached */
+ decl %edx
+ xorl %eax,%eax
+ jmp cpystrflt_x
+3:
+ /* edx is zero - return ENAMETOOLONG or EFAULT */
+ cmpl $VM_MAXUSER_ADDRESS,%esi
+ jae cpystrflt
+4:
+ movl $ENAMETOOLONG,%eax
+ jmp cpystrflt_x
+
+cpystrflt:
+ movl $EFAULT,%eax
+
+cpystrflt_x:
+ /* set *lencopied and return %eax */
+ movl PCPU(CURPCB),%ecx
+ movl $0,PCB_ONFAULT(%ecx)
+ movl 20(%esp),%ecx
+ subl %edx,%ecx
+ movl 24(%esp),%edx
+ testl %edx,%edx
+ jz 1f
+ movl %ecx,(%edx)
+1:
+ popl %edi
+ popl %esi
+ ret
+
+
+/*
+ * copystr(from, to, maxlen, int *lencopied) - MP SAFE
+ */
+ENTRY(copystr)
+ pushl %esi
+ pushl %edi
+
+ movl 12(%esp),%esi /* %esi = from */
+ movl 16(%esp),%edi /* %edi = to */
+ movl 20(%esp),%edx /* %edx = maxlen */
+ incl %edx
+ cld
+1:
+ decl %edx
+ jz 4f
+ lodsb
+ stosb
+ orb %al,%al
+ jnz 1b
+
+ /* Success -- 0 byte reached */
+ decl %edx
+ xorl %eax,%eax
+ jmp 6f
+4:
+ /* edx is zero -- return ENAMETOOLONG */
+ movl $ENAMETOOLONG,%eax
+
+6:
+ /* set *lencopied and return %eax */
+ movl 20(%esp),%ecx
+ subl %edx,%ecx
+ movl 24(%esp),%edx
+ testl %edx,%edx
+ jz 7f
+ movl %ecx,(%edx)
+7:
+ popl %edi
+ popl %esi
+ ret
+
+ENTRY(bcmp)
+ pushl %edi
+ pushl %esi
+ movl 12(%esp),%edi
+ movl 16(%esp),%esi
+ movl 20(%esp),%edx
+ xorl %eax,%eax
+
+ movl %edx,%ecx
+ shrl $2,%ecx
+ cld /* compare forwards */
+ repe
+ cmpsl
+ jne 1f
+
+ movl %edx,%ecx
+ andl $3,%ecx
+ repe
+ cmpsb
+ je 2f
+1:
+ incl %eax
+2:
+ popl %esi
+ popl %edi
+ ret
+
+
+/*
+ * Handling of special 386 registers and descriptor tables etc
+ */
+/* void lgdt(struct region_descriptor *rdp); */
+ENTRY(lgdt_finish)
+#if 0
+ /* reload the descriptor table */
+ movl 4(%esp),%eax
+ lgdt (%eax)
+#endif
+ /* flush the prefetch q */
+ jmp 1f
+ nop
+1:
+ /* reload "stale" selectors */
+ movl $KDSEL,%eax
+ movl %eax,%ds
+ movl %eax,%es
+ movl %eax,%gs
+ movl %eax,%ss
+ movl $KPSEL,%eax
+ movl %eax,%fs
+
+ /* reload code selector by turning return into intersegmental return */
+ movl (%esp),%eax
+ pushl %eax
+ movl $KCSEL,4(%esp)
+ lret
+
+/* ssdtosd(*ssdp,*sdp) */
+ENTRY(ssdtosd)
+ pushl %ebx
+ movl 8(%esp),%ecx
+ movl 8(%ecx),%ebx
+ shll $16,%ebx
+ movl (%ecx),%edx
+ roll $16,%edx
+ movb %dh,%bl
+ movb %dl,%bh
+ rorl $8,%ebx
+ movl 4(%ecx),%eax
+ movw %ax,%dx
+ andl $0xf0000,%eax
+ orl %eax,%ebx
+ movl 12(%esp),%ecx
+ movl %edx,(%ecx)
+ movl %ebx,4(%ecx)
+ popl %ebx
+ ret
+
+/* void reset_dbregs() */
+ENTRY(reset_dbregs)
+ movl $0,%eax
+ movl %eax,%dr7 /* disable all breapoints first */
+ movl %eax,%dr0
+ movl %eax,%dr1
+ movl %eax,%dr2
+ movl %eax,%dr3
+ movl %eax,%dr6
+ ret
+
+/*****************************************************************************/
+/* setjump, longjump */
+/*****************************************************************************/
+
+ENTRY(setjmp)
+ movl 4(%esp),%eax
+ movl %ebx,(%eax) /* save ebx */
+ movl %esp,4(%eax) /* save esp */
+ movl %ebp,8(%eax) /* save ebp */
+ movl %esi,12(%eax) /* save esi */
+ movl %edi,16(%eax) /* save edi */
+ movl (%esp),%edx /* get rta */
+ movl %edx,20(%eax) /* save eip */
+ xorl %eax,%eax /* return(0); */
+ ret
+
+ENTRY(longjmp)
+ movl 4(%esp),%eax
+ movl (%eax),%ebx /* restore ebx */
+ movl 4(%eax),%esp /* restore esp */
+ movl 8(%eax),%ebp /* restore ebp */
+ movl 12(%eax),%esi /* restore esi */
+ movl 16(%eax),%edi /* restore edi */
+ movl 20(%eax),%edx /* get rta */
+ movl %edx,(%esp) /* put in return frame */
+ xorl %eax,%eax /* return(1); */
+ incl %eax
+ ret
+
+/*
+ * Support for BB-profiling (gcc -a). The kernbb program will extract
+ * the data from the kernel.
+ */
+
+ .data
+ ALIGN_DATA
+ .globl bbhead
+bbhead:
+ .long 0
+
+ .text
+NON_GPROF_ENTRY(__bb_init_func)
+ movl 4(%esp),%eax
+ movl $1,(%eax)
+ movl bbhead,%edx
+ movl %edx,16(%eax)
+ movl %eax,bbhead
+ NON_GPROF_RET
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/swtch.s b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/swtch.s
new file mode 100644
index 0000000000..f468c429bd
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/swtch.s
@@ -0,0 +1,445 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/i386/swtch.s,v 1.143 2003/09/30 08:11:35 jeff Exp $
+ */
+
+#include "opt_npx.h"
+
+#include <machine/asmacros.h>
+
+#include "assym.s"
+
+
+/*****************************************************************************/
+/* Scheduling */
+/*****************************************************************************/
+
+ .text
+
+/*
+ * cpu_throw()
+ *
+ * This is the second half of cpu_swtch(). It is used when the current
+ * thread is either a dummy or slated to die, and we no longer care
+ * about its state. This is only a slight optimization and is probably
+ * not worth it anymore. Note that we need to clear the pm_active bits so
+ * we do need the old proc if it still exists.
+ * 0(%esp) = ret
+ * 4(%esp) = oldtd
+ * 8(%esp) = newtd
+ */
+ENTRY(cpu_throw)
+ movl PCPU(CPUID), %esi
+ movl 4(%esp),%ecx /* Old thread */
+ testl %ecx,%ecx /* no thread? */
+ jz 1f
+ /* release bit from old pm_active */
+ movl PCPU(CURPMAP), %ebx
+#ifdef SMP
+ lock
+#endif
+ btrl %esi, PM_ACTIVE(%ebx) /* clear old */
+1:
+ movl 8(%esp),%ecx /* New thread */
+ movl TD_PCB(%ecx),%edx
+ movl PCB_CR3(%edx),%eax
+
+ movl %eax,PCPU(CR3) /* new address space */
+
+ pushl %ecx
+ pushl %edx
+ pushl %esi
+ pushl %eax
+ call load_cr3
+ addl $4,%esp
+ popl %esi
+ popl %edx
+ popl %ecx
+
+ /* set bit in new pm_active */
+ movl TD_PROC(%ecx),%eax
+ movl P_VMSPACE(%eax), %ebx
+ addl $VM_PMAP, %ebx
+ movl %ebx, PCPU(CURPMAP)
+#ifdef SMP
+ lock
+#endif
+ btsl %esi, PM_ACTIVE(%ebx) /* set new */
+ jmp sw1
+
+/*
+ * cpu_switch(old, new)
+ *
+ * Save the current thread state, then select the next thread to run
+ * and load its state.
+ * 0(%esp) = ret
+ * 4(%esp) = oldtd
+ * 8(%esp) = newtd
+ */
+ENTRY(cpu_switch)
+
+ /* Switch to new thread. First, save context. */
+ movl 4(%esp),%ecx
+
+#ifdef INVARIANTS
+ testl %ecx,%ecx /* no thread? */
+ jz badsw2 /* no, panic */
+#endif
+
+ movl TD_PCB(%ecx),%edx
+
+ movl (%esp),%eax /* Hardware registers */
+ movl %eax,PCB_EIP(%edx)
+ movl %ebx,PCB_EBX(%edx)
+ movl %esp,PCB_ESP(%edx)
+ movl %ebp,PCB_EBP(%edx)
+ movl %esi,PCB_ESI(%edx)
+ movl %edi,PCB_EDI(%edx)
+ movl %gs,PCB_GS(%edx)
+#if 0
+ pushfl /* PSL */
+ popl PCB_PSL(%edx)
+#endif
+ /* Check to see if we need to call a switchout function. */
+ movl PCB_SWITCHOUT(%edx),%eax
+ cmpl $0, %eax
+ je 1f
+ call *%eax
+1:
+ /* Test if debug registers should be saved. */
+ testl $PCB_DBREGS,PCB_FLAGS(%edx)
+ jz 1f /* no, skip over */
+ movl %dr7,%eax /* yes, do the save */
+ movl %eax,PCB_DR7(%edx)
+ andl $0x0000fc00, %eax /* disable all watchpoints */
+ movl %eax,%dr7
+ movl %dr6,%eax
+ movl %eax,PCB_DR6(%edx)
+ movl %dr3,%eax
+ movl %eax,PCB_DR3(%edx)
+ movl %dr2,%eax
+ movl %eax,PCB_DR2(%edx)
+ movl %dr1,%eax
+ movl %eax,PCB_DR1(%edx)
+ movl %dr0,%eax
+ movl %eax,PCB_DR0(%edx)
+1:
+
+#ifdef DEV_NPX
+ /* have we used fp, and need a save? */
+ cmpl %ecx,PCPU(FPCURTHREAD)
+ jne 1f
+ addl $PCB_SAVEFPU,%edx /* h/w bugs make saving complicated */
+ pushl %edx
+ call npxsave /* do it in a big C function */
+ popl %eax
+1:
+#endif
+
+
+ /* Save is done. Now fire up new thread. Leave old vmspace. */
+ movl %ecx,%edi
+ movl 8(%esp),%ecx /* New thread */
+#ifdef INVARIANTS
+ testl %ecx,%ecx /* no thread? */
+ jz badsw3 /* no, panic */
+#endif
+ movl TD_PCB(%ecx),%edx
+ movl PCPU(CPUID), %esi
+
+ /* switch address space */
+ movl PCB_CR3(%edx),%eax
+
+ cmpl %eax,IdlePTD /* Kernel address space? */
+
+ je sw1
+ /* XXX optimize later KMM */
+#if 0
+ movl %cr3,%ebx /* The same address space? */
+#else
+ movl PCPU(CR3),%ebx
+#endif
+ cmpl %ebx,%eax
+ je sw1
+
+ movl %eax,PCPU(CR3) /* new address space */
+
+ pushl %edx
+ pushl %ecx
+ pushl %esi
+ pushl %eax
+ call load_cr3 /* inform xen of the switch */
+ addl $4,%esp
+ popl %esi
+ popl %ecx
+ popl %edx
+
+ /* Release bit from old pmap->pm_active */
+ movl PCPU(CURPMAP), %ebx
+
+#ifdef SMP
+ lock
+#endif
+ btrl %esi, PM_ACTIVE(%ebx) /* clear old */
+ /* Set bit in new pmap->pm_active */
+ movl TD_PROC(%ecx),%eax /* newproc */
+ movl P_VMSPACE(%eax), %ebx
+ addl $VM_PMAP, %ebx
+ movl %ebx, PCPU(CURPMAP)
+#ifdef SMP
+ lock
+#endif
+ btsl %esi, PM_ACTIVE(%ebx) /* set new */
+sw1:
+
+#if 0
+
+ /* only one task selector under Xen */
+ /*
+ * At this point, we've switched address spaces and are ready
+ * to load up the rest of the next context.
+ */
+ cmpl $0, PCB_EXT(%edx) /* has pcb extension? */
+ je 1f /* If not, use the default */
+ btsl %esi, private_tss /* mark use of private tss */
+ movl PCB_EXT(%edx), %edi /* new tss descriptor */
+ jmp 2f /* Load it up */
+
+1: /*
+ * Use the common default TSS instead of our own.
+ * Set our stack pointer into the TSS, it's set to just
+ * below the PCB. In C, common_tss.tss_esp0 = &pcb - 16;
+ */
+ leal -16(%edx), %ebx /* leave space for vm86 */
+ movl %ebx, PCPU(COMMON_TSS) + TSS_ESP0
+
+ /*
+ * Test this CPU's bit in the bitmap to see if this
+ * CPU was using a private TSS.
+ */
+ btrl %esi, private_tss /* Already using the common? */
+ jae 3f /* if so, skip reloading */
+ PCPU_ADDR(COMMON_TSSD, %edi)
+2:
+ /* Move correct tss descriptor into GDT slot, then reload tr. */
+ movl PCPU(TSS_GDT), %ebx /* entry in GDT */
+ movl 0(%edi), %eax
+ movl %eax, 0(%ebx)
+ movl 4(%edi), %eax
+ movl %eax, 4(%ebx)
+
+ movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */
+ ltr %si
+#endif /* !XEN */
+3:
+ /* notify Xen of task switch */
+ pushl %edx /* &pcb is the new stack base */
+ pushl $KDSEL
+ pushl $HYPERVISOR_STACK_SWITCH
+ call ni_queue_multicall2
+ addl $12,%esp
+ /* XXX handle DOM0 IOPL case here (KMM) */
+ /* we currently don't support running FreeBSD */
+ /* in DOM0 so we can skip for now */
+
+ call ni_execute_multicall_list
+
+ /* Restore context. */
+ movl PCB_EBX(%edx),%ebx
+ movl PCB_ESP(%edx),%esp
+ movl PCB_EBP(%edx),%ebp
+ movl PCB_ESI(%edx),%esi
+ movl PCB_EDI(%edx),%edi
+ movl PCB_EIP(%edx),%eax
+ movl %eax,(%esp)
+#if 0
+ pushl PCB_PSL(%edx)
+ popfl
+#endif
+ movl %edx, PCPU(CURPCB)
+ movl %ecx, PCPU(CURTHREAD) /* into next thread */
+
+ /*
+ * Determine the LDT to use and load it if is the default one and
+ * that is not the current one.
+ */
+ movl TD_PROC(%ecx),%eax
+ cmpl $0,P_MD+MD_LDT(%eax)
+ jnz 1f
+ movl _default_ldt,%eax
+ cmpl PCPU(CURRENTLDT),%eax
+ je 2f
+ pushl %edx
+ pushl %eax
+ xorl %eax,%eax
+ movl %eax,%gs
+ call i386_reset_ldt
+ popl %eax
+ popl %edx
+
+ movl %eax,PCPU(CURRENTLDT)
+ jmp 2f
+1:
+ /* Load the LDT when it is not the default one. */
+ pushl %edx /* Preserve pointer to pcb. */
+ addl $P_MD,%eax /* Pointer to mdproc is arg. */
+ pushl %eax
+ call set_user_ldt
+ addl $4,%esp
+ popl %edx
+2:
+ /* This must be done after loading the user LDT. */
+ .globl cpu_switch_load_gs
+cpu_switch_load_gs:
+ movl PCB_GS(%edx),%gs
+
+ /* XXX evidently setting debug registers needs to be
+ * routed through Xen - this appears to work - so I
+ * am leaving it as it is for now - (KMM)
+ */
+
+ /* Test if debug registers should be restored. */
+ testl $PCB_DBREGS,PCB_FLAGS(%edx)
+ jz 1f
+
+ /*
+ * Restore debug registers. The special code for dr7 is to
+ * preserve the current values of its reserved bits.
+ */
+ movl PCB_DR6(%edx),%eax
+ movl %eax,%dr6
+ movl PCB_DR3(%edx),%eax
+ movl %eax,%dr3
+ movl PCB_DR2(%edx),%eax
+ movl %eax,%dr2
+ movl PCB_DR1(%edx),%eax
+ movl %eax,%dr1
+ movl PCB_DR0(%edx),%eax
+ movl %eax,%dr0
+ movl %dr7,%eax
+ andl $0x0000fc00,%eax
+ movl PCB_DR7(%edx),%ecx
+ andl $~0x0000fc00,%ecx
+ orl %ecx,%eax
+ movl %eax,%dr7
+1:
+ ret
+
+#ifdef INVARIANTS
+badsw1:
+ pushal
+ pushl $sw0_1
+ call panic
+sw0_1: .asciz "cpu_throw: no newthread supplied"
+
+badsw2:
+ pushal
+ pushl $sw0_2
+ call panic
+sw0_2: .asciz "cpu_switch: no curthread supplied"
+
+badsw3:
+ pushal
+ pushl $sw0_3
+ call panic
+sw0_3: .asciz "cpu_switch: no newthread supplied"
+#endif
+
+/*
+ * savectx(pcb)
+ * Update pcb, saving current processor state.
+ */
+ENTRY(savectx)
+ /* Fetch PCB. */
+ movl 4(%esp),%ecx
+
+ /* Save caller's return address. Child won't execute this routine. */
+ movl (%esp),%eax
+ movl %eax,PCB_EIP(%ecx)
+
+#if 0
+ movl %cr3,%eax
+#else
+ movl PCPU(CR3),%eax
+#endif
+ movl %eax,PCB_CR3(%ecx)
+
+ movl %ebx,PCB_EBX(%ecx)
+ movl %esp,PCB_ESP(%ecx)
+ movl %ebp,PCB_EBP(%ecx)
+ movl %esi,PCB_ESI(%ecx)
+ movl %edi,PCB_EDI(%ecx)
+ movl %gs,PCB_GS(%ecx)
+#if 0
+ pushfl
+ popl PCB_PSL(%ecx)
+#endif
+#ifdef DEV_NPX
+ /*
+ * If fpcurthread == NULL, then the npx h/w state is irrelevant and the
+ * state had better already be in the pcb. This is true for forks
+ * but not for dumps (the old book-keeping with FP flags in the pcb
+ * always lost for dumps because the dump pcb has 0 flags).
+ *
+ * If fpcurthread != NULL, then we have to save the npx h/w state to
+ * fpcurthread's pcb and copy it to the requested pcb, or save to the
+ * requested pcb and reload. Copying is easier because we would
+ * have to handle h/w bugs for reloading. We used to lose the
+ * parent's npx state for forks by forgetting to reload.
+ */
+ pushfl
+ call ni_cli
+ movl PCPU(FPCURTHREAD),%eax
+ testl %eax,%eax
+ je 1f
+
+ pushl %ecx
+ movl TD_PCB(%eax),%eax
+ leal PCB_SAVEFPU(%eax),%eax
+ pushl %eax
+ pushl %eax
+ call npxsave
+ addl $4,%esp
+ popl %eax
+ popl %ecx
+
+ pushl $PCB_SAVEFPU_SIZE
+ leal PCB_SAVEFPU(%ecx),%ecx
+ pushl %ecx
+ pushl %eax
+ call bcopy
+ addl $12,%esp
+1:
+ popfl
+#endif /* DEV_NPX */
+
+ ret
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/symbols.raw b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/symbols.raw
new file mode 100644
index 0000000000..014c6442ad
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/symbols.raw
@@ -0,0 +1,75 @@
+# @(#)symbols.raw 7.6 (Berkeley) 5/8/91
+#
+# $FreeBSD: src/sys/i386/i386/symbols.raw,v 1.15 1999/08/28 00:43:51 peter Exp $
+#
+
+
+#gdb
+ _IdlePTD
+ _PTD
+ _panicstr
+ _atdevbase
+# _version
+#dmesg
+ _msgbufp
+# _msgbuf
+#iostat
+ _tk_nin
+ _tk_nout
+ _cp_time
+# _io_info
+#ps
+ _nswap
+ _maxslp
+ _ccpu
+ _fscale
+ _avail_start
+ _avail_end
+#pstat
+# _cons
+ _nswap
+ _swapblist
+# _swaplist
+#vmstat
+ _cp_time
+# _rate
+# _total
+# _sum
+# _rectime
+# _pgintime
+ _boottime
+#w
+ _swapdev
+ _nswap
+ _averunnable
+ _boottime
+#netstat
+ _mbstat
+ _ipstat
+ _tcb
+ _tcpstat
+ _udb
+ _udpstat
+# _rawcb
+ _ifnet
+# _rthost
+# _rtnet
+ _icmpstat
+ _filehead
+ _nfiles
+# _rthashsize
+# _radix_node_head
+#routed
+ _ifnet
+#rwho
+ _boottime
+#savecore
+ _dumpdev
+ _dumplo
+ _time_second
+ _version
+ _dumpsize
+ _panicstr
+ _dumpmag
+#deprecated
+# _avenrun
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/sys_machdep.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/sys_machdep.c
new file mode 100644
index 0000000000..8f85c128ba
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/sys_machdep.c
@@ -0,0 +1,703 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/sys_machdep.c,v 1.91 2003/09/07 05:23:28 davidxu Exp $");
+
+#include "opt_kstack_pages.h"
+#include "opt_mac.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/mac.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/smp.h>
+#include <sys/sysproto.h>
+#include <sys/user.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_extern.h>
+
+#include <machine/cpu.h>
+#include <machine/pcb_ext.h> /* pcb.h included by sys/user.h */
+#include <machine/proc.h>
+#include <machine/sysarch.h>
+#include <machine/xenfunc.h>
+
+#include <vm/vm_kern.h> /* for kernel_map */
+
+#define MAX_LD 8192
+#define LD_PER_PAGE 512
+#define NEW_MAX_LD(num) ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1))
+#define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3)
+
+void i386_reset_ldt(struct proc_ldt *pldt);
+
+static int i386_get_ldt(struct thread *, char *);
+static int i386_set_ldt(struct thread *, char *);
+static int i386_set_ldt_data(struct thread *, int start, int num,
+ union descriptor *descs);
+static int i386_ldt_grow(struct thread *td, int len);
+static int i386_get_ioperm(struct thread *, char *);
+static int i386_set_ioperm(struct thread *, char *);
+#ifdef SMP
+static void set_user_ldt_rv(struct thread *);
+#endif
+
+#ifndef _SYS_SYSPROTO_H_
+struct sysarch_args {
+ int op;
+ char *parms;
+};
+#endif
+
+int
+sysarch(td, uap)
+ struct thread *td;
+ register struct sysarch_args *uap;
+{
+ int error;
+
+ mtx_lock(&Giant);
+ switch(uap->op) {
+ case I386_GET_LDT:
+ error = i386_get_ldt(td, uap->parms);
+ break;
+
+ case I386_SET_LDT:
+ error = i386_set_ldt(td, uap->parms);
+ break;
+ case I386_GET_IOPERM:
+ error = i386_get_ioperm(td, uap->parms);
+ break;
+ case I386_SET_IOPERM:
+ error = i386_set_ioperm(td, uap->parms);
+ break;
+#if 0
+ case I386_VM86:
+ error = vm86_sysarch(td, uap->parms);
+ break;
+#endif
+ default:
+ error = EINVAL;
+ break;
+ }
+ mtx_unlock(&Giant);
+ return (error);
+}
+
+int
+i386_extend_pcb(struct thread *td)
+{
+ int i, offset;
+ u_long *addr;
+ struct pcb_ext *ext;
+ struct soft_segment_descriptor ssd = {
+ 0, /* segment base address (overwritten) */
+ ctob(IOPAGES + 1) - 1, /* length */
+ SDT_SYS386TSS, /* segment type */
+ 0, /* priority level */
+ 1, /* descriptor present */
+ 0, 0,
+ 0, /* default 32 size */
+ 0 /* granularity */
+ };
+
+ if (td->td_proc->p_flag & P_SA)
+ return (EINVAL); /* XXXKSE */
+/* XXXKSE All the code below only works in 1:1 needs changing */
+ ext = (struct pcb_ext *)kmem_alloc(kernel_map, ctob(IOPAGES+1));
+ if (ext == 0)
+ return (ENOMEM);
+ bzero(ext, sizeof(struct pcb_ext));
+ /* -16 is so we can convert a trapframe into vm86trapframe inplace */
+ ext->ext_tss.tss_esp0 = td->td_kstack + ctob(KSTACK_PAGES) -
+ sizeof(struct pcb) - 16;
+ ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
+ /*
+ * The last byte of the i/o map must be followed by an 0xff byte.
+ * We arbitrarily allocate 16 bytes here, to keep the starting
+ * address on a doubleword boundary.
+ */
+ offset = PAGE_SIZE - 16;
+ ext->ext_tss.tss_ioopt =
+ (offset - ((unsigned)&ext->ext_tss - (unsigned)ext)) << 16;
+ ext->ext_iomap = (caddr_t)ext + offset;
+ ext->ext_vm86.vm86_intmap = (caddr_t)ext + offset - 32;
+
+ addr = (u_long *)ext->ext_vm86.vm86_intmap;
+ for (i = 0; i < (ctob(IOPAGES) + 32 + 16) / sizeof(u_long); i++)
+ *addr++ = ~0;
+
+ ssd.ssd_base = (unsigned)&ext->ext_tss;
+ ssd.ssd_limit -= ((unsigned)&ext->ext_tss - (unsigned)ext);
+ ssdtosd(&ssd, &ext->ext_tssd);
+
+ KASSERT(td->td_proc == curthread->td_proc, ("giving TSS to !curproc"));
+ KASSERT(td->td_pcb->pcb_ext == 0, ("already have a TSS!"));
+ mtx_lock_spin(&sched_lock);
+ td->td_pcb->pcb_ext = ext;
+
+ /* switch to the new TSS after syscall completes */
+ td->td_flags |= TDF_NEEDRESCHED;
+ mtx_unlock_spin(&sched_lock);
+
+ return 0;
+}
+
+static int
+i386_set_ioperm(td, args)
+ struct thread *td;
+ char *args;
+{
+ int i, error;
+ struct i386_ioperm_args ua;
+ char *iomap;
+
+ if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0)
+ return (error);
+
+#ifdef MAC
+ if ((error = mac_check_sysarch_ioperm(td->td_ucred)) != 0)
+ return (error);
+#endif
+ if ((error = suser(td)) != 0)
+ return (error);
+ if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
+ return (error);
+ /*
+ * XXX
+ * While this is restricted to root, we should probably figure out
+ * whether any other driver is using this i/o address, as so not to
+ * cause confusion. This probably requires a global 'usage registry'.
+ */
+
+ if (td->td_pcb->pcb_ext == 0)
+ if ((error = i386_extend_pcb(td)) != 0)
+ return (error);
+ iomap = (char *)td->td_pcb->pcb_ext->ext_iomap;
+
+ if (ua.start + ua.length > IOPAGES * PAGE_SIZE * NBBY)
+ return (EINVAL);
+
+ for (i = ua.start; i < ua.start + ua.length; i++) {
+ if (ua.enable)
+ iomap[i >> 3] &= ~(1 << (i & 7));
+ else
+ iomap[i >> 3] |= (1 << (i & 7));
+ }
+ return (error);
+}
+
+static int
+i386_get_ioperm(td, args)
+ struct thread *td;
+ char *args;
+{
+ int i, state, error;
+ struct i386_ioperm_args ua;
+ char *iomap;
+
+ if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0)
+ return (error);
+ if (ua.start >= IOPAGES * PAGE_SIZE * NBBY)
+ return (EINVAL);
+
+ if (td->td_pcb->pcb_ext == 0) {
+ ua.length = 0;
+ goto done;
+ }
+
+ iomap = (char *)td->td_pcb->pcb_ext->ext_iomap;
+
+ i = ua.start;
+ state = (iomap[i >> 3] >> (i & 7)) & 1;
+ ua.enable = !state;
+ ua.length = 1;
+
+ for (i = ua.start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) {
+ if (state != ((iomap[i >> 3] >> (i & 7)) & 1))
+ break;
+ ua.length++;
+ }
+
+done:
+ error = copyout(&ua, args, sizeof(struct i386_ioperm_args));
+ return (error);
+}
+
+/*
+ * Update the GDT entry pointing to the LDT to point to the LDT of the
+ * current process.
+ *
+ * This must be called with sched_lock held. Unfortunately, we can't use a
+ * mtx_assert() here because cpu_switch() calls this function after changing
+ * curproc but before sched_lock's owner is updated in mi_switch().
+ */
+void
+set_user_ldt(struct mdproc *mdp)
+{
+ struct proc_ldt *pldt;
+ pldt = mdp->md_ldt;
+ i386_reset_ldt(pldt);
+ PCPU_SET(currentldt, (int)pldt);
+
+}
+
+#ifdef SMP
+static void
+set_user_ldt_rv(struct thread *td)
+{
+
+ if (td->td_proc != curthread->td_proc)
+ return;
+
+ set_user_ldt(&td->td_proc->p_md);
+}
+#endif
+
+/*
+ * Must be called with either sched_lock free or held but not recursed.
+ * If it does not return NULL, it will return with it owned.
+ */
+struct proc_ldt *
+user_ldt_alloc(struct mdproc *mdp, int len)
+{
+ struct proc_ldt *pldt,*new_ldt;
+
+
+ if (mtx_owned(&sched_lock))
+ mtx_unlock_spin(&sched_lock);
+ mtx_assert(&sched_lock, MA_NOTOWNED);
+ MALLOC(new_ldt, struct proc_ldt *, sizeof(struct proc_ldt),
+ M_SUBPROC, M_WAITOK);
+
+ new_ldt->ldt_len = len = NEW_MAX_LD(len);
+ new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map,
+ round_page(len * sizeof(union descriptor)));
+ if (new_ldt->ldt_base == NULL) {
+ FREE(new_ldt, M_SUBPROC);
+ return NULL;
+ }
+ new_ldt->ldt_refcnt = 1;
+ new_ldt->ldt_active = 0;
+
+ mtx_lock_spin(&sched_lock);
+
+ if ((pldt = mdp->md_ldt)) {
+ if (len > pldt->ldt_len)
+ len = pldt->ldt_len;
+ bcopy(pldt->ldt_base, new_ldt->ldt_base,
+ len * sizeof(union descriptor));
+ } else {
+ bcopy(ldt, new_ldt->ldt_base, PAGE_SIZE);
+ }
+ pmap_map_readonly(kernel_pmap, (vm_offset_t)new_ldt->ldt_base,
+ new_ldt->ldt_len*sizeof(union descriptor));
+ return new_ldt;
+}
+
+/*
+ * Must be called either with sched_lock free or held but not recursed.
+ * If md_ldt is not NULL, it will return with sched_lock released.
+ */
+void
+user_ldt_free(struct thread *td)
+{
+ struct mdproc *mdp = &td->td_proc->p_md;
+ struct proc_ldt *pldt = mdp->md_ldt;
+ if (pldt == NULL)
+ return;
+
+ if (!mtx_owned(&sched_lock))
+ mtx_lock_spin(&sched_lock);
+ mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
+ if (td == PCPU_GET(curthread)) {
+ PCPU_SET(currentldt, _default_ldt);
+ i386_reset_ldt((struct proc_ldt *)_default_ldt);
+ }
+
+ mdp->md_ldt = NULL;
+ if (--pldt->ldt_refcnt == 0) {
+ mtx_unlock_spin(&sched_lock);
+
+ pmap_map_readwrite(kernel_pmap,(vm_offset_t) pldt->ldt_base,
+ pldt->ldt_len*sizeof(union descriptor));
+ kmem_free(kernel_map, (vm_offset_t)pldt->ldt_base,
+ pldt->ldt_len * sizeof(union descriptor));
+ FREE(pldt, M_SUBPROC);
+ } else
+ mtx_unlock_spin(&sched_lock);
+}
+
+void
+i386_reset_ldt(struct proc_ldt *pldt)
+{
+ xen_set_ldt((vm_offset_t)pldt->ldt_base, pldt->ldt_len);
+}
+
+static int
+i386_get_ldt(td, args)
+ struct thread *td;
+ char *args;
+{
+ int error = 0;
+ struct proc_ldt *pldt = td->td_proc->p_md.md_ldt;
+ int nldt, num;
+ union descriptor *lp;
+ struct i386_ldt_args ua, *uap = &ua;
+
+ if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0)
+ return(error);
+
+#ifdef DEBUG
+ printf("i386_get_ldt: start=%d num=%d descs=%p\n",
+ uap->start, uap->num, (void *)uap->descs);
+#endif
+
+ /* verify range of LDTs exist */
+ if ((uap->start < 0) || (uap->num <= 0))
+ return(EINVAL);
+
+ if (pldt) {
+ nldt = pldt->ldt_len;
+ num = min(uap->num, nldt);
+ lp = &((union descriptor *)(pldt->ldt_base))[uap->start];
+ } else {
+ nldt = sizeof(ldt)/sizeof(ldt[0]);
+ num = min(uap->num, nldt);
+ lp = &ldt[uap->start];
+ }
+ if (uap->start + num > nldt)
+ return(EINVAL);
+
+ error = copyout(lp, uap->descs, num * sizeof(union descriptor));
+ if (!error)
+ td->td_retval[0] = num;
+
+ return(error);
+}
+
+static int ldt_warnings;
+#define NUM_LDT_WARNINGS 10
+
+static int
+i386_set_ldt(struct thread *td, char *args)
+{
+ int error = 0, i;
+ int largest_ld;
+ struct mdproc *mdp = &td->td_proc->p_md;
+ struct proc_ldt *pldt = 0;
+ struct i386_ldt_args ua, *uap = &ua;
+ union descriptor *descs, *dp;
+ int descs_size;
+
+ if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0)
+ return(error);
+#ifdef DEBUG
+ printf("i386_set_ldt: start=%d num=%d descs=%p\n",
+ uap->start, uap->num, (void *)uap->descs);
+
+#endif
+
+ if (uap->descs == NULL) {
+ /* Free descriptors */
+ if (uap->start == 0 && uap->num == 0) {
+ /*
+ * Treat this as a special case, so userland needn't
+ * know magic number NLDT.
+ */
+ uap->start = NLDT;
+ uap->num = MAX_LD - NLDT;
+ }
+ if (uap->start <= LUDATA_SEL || uap->num <= 0)
+ return (EINVAL);
+ mtx_lock_spin(&sched_lock);
+ pldt = mdp->md_ldt;
+ if (pldt == NULL || uap->start >= pldt->ldt_len) {
+ mtx_unlock_spin(&sched_lock);
+ return (0);
+ }
+ largest_ld = uap->start + uap->num;
+ if (largest_ld > pldt->ldt_len)
+ largest_ld = pldt->ldt_len;
+ i = largest_ld - uap->start;
+ bzero(&((union descriptor *)(pldt->ldt_base))[uap->start],
+ sizeof(union descriptor) * i);
+ mtx_unlock_spin(&sched_lock);
+ return (0);
+ }
+
+ if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) {
+ /* complain a for a while if using old methods */
+ if (ldt_warnings++ < NUM_LDT_WARNINGS) {
+ printf("Warning: pid %d used static ldt allocation.\n",
+ td->td_proc->p_pid);
+ printf("See the i386_set_ldt man page for more info\n");
+ }
+ /* verify range of descriptors to modify */
+ largest_ld = uap->start + uap->num;
+ if (uap->start >= MAX_LD ||
+ uap->num < 0 || largest_ld > MAX_LD) {
+ return (EINVAL);
+ }
+ }
+
+ descs_size = uap->num * sizeof(union descriptor);
+ descs = (union descriptor *)kmem_alloc(kernel_map, descs_size);
+ if (descs == NULL)
+ return (ENOMEM);
+ error = copyin(uap->descs, descs, descs_size);
+ if (error) {
+ kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
+ return (error);
+ }
+
+ /* Check descriptors for access violations */
+ for (i = 0; i < uap->num; i++) {
+ dp = &descs[i];
+
+ switch (dp->sd.sd_type) {
+ case SDT_SYSNULL: /* system null */
+ dp->sd.sd_p = 0;
+ break;
+ case SDT_SYS286TSS: /* system 286 TSS available */
+ case SDT_SYSLDT: /* system local descriptor table */
+ case SDT_SYS286BSY: /* system 286 TSS busy */
+ case SDT_SYSTASKGT: /* system task gate */
+ case SDT_SYS286IGT: /* system 286 interrupt gate */
+ case SDT_SYS286TGT: /* system 286 trap gate */
+ case SDT_SYSNULL2: /* undefined by Intel */
+ case SDT_SYS386TSS: /* system 386 TSS available */
+ case SDT_SYSNULL3: /* undefined by Intel */
+ case SDT_SYS386BSY: /* system 386 TSS busy */
+ case SDT_SYSNULL4: /* undefined by Intel */
+ case SDT_SYS386IGT: /* system 386 interrupt gate */
+ case SDT_SYS386TGT: /* system 386 trap gate */
+ case SDT_SYS286CGT: /* system 286 call gate */
+ case SDT_SYS386CGT: /* system 386 call gate */
+ /* I can't think of any reason to allow a user proc
+ * to create a segment of these types. They are
+ * for OS use only.
+ */
+ kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
+ return (EACCES);
+ /*NOTREACHED*/
+
+ /* memory segment types */
+ case SDT_MEMEC: /* memory execute only conforming */
+ case SDT_MEMEAC: /* memory execute only accessed conforming */
+ case SDT_MEMERC: /* memory execute read conforming */
+ case SDT_MEMERAC: /* memory execute read accessed conforming */
+ /* Must be "present" if executable and conforming. */
+ if (dp->sd.sd_p == 0) {
+ kmem_free(kernel_map, (vm_offset_t)descs,
+ descs_size);
+ return (EACCES);
+ }
+ break;
+ case SDT_MEMRO: /* memory read only */
+ case SDT_MEMROA: /* memory read only accessed */
+ case SDT_MEMRW: /* memory read write */
+ case SDT_MEMRWA: /* memory read write accessed */
+ case SDT_MEMROD: /* memory read only expand dwn limit */
+ case SDT_MEMRODA: /* memory read only expand dwn lim accessed */
+ case SDT_MEMRWD: /* memory read write expand dwn limit */
+ case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */
+ case SDT_MEME: /* memory execute only */
+ case SDT_MEMEA: /* memory execute only accessed */
+ case SDT_MEMER: /* memory execute read */
+ case SDT_MEMERA: /* memory execute read accessed */
+ break;
+ default:
+ kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
+ return(EINVAL);
+ /*NOTREACHED*/
+ }
+
+ /* Only user (ring-3) descriptors may be present. */
+ if ((dp->sd.sd_p != 0) && (dp->sd.sd_dpl != SEL_UPL)) {
+ kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
+ return (EACCES);
+ }
+ }
+
+ if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) {
+ /* Allocate a free slot */
+ pldt = mdp->md_ldt;
+ if (pldt == NULL) {
+ load_gs(0);
+ error = i386_ldt_grow(td, NLDT+1);
+ if (error) {
+ kmem_free(kernel_map, (vm_offset_t)descs,
+ descs_size);
+ return (error);
+ }
+ pldt = mdp->md_ldt;
+ }
+again:
+ mtx_lock_spin(&sched_lock);
+ /*
+ * start scanning a bit up to leave room for NVidia and
+ * Wine, which still user the "Blat" method of allocation.
+ */
+ dp = &((union descriptor *)(pldt->ldt_base))[NLDT];
+ for (i = NLDT; i < pldt->ldt_len; ++i) {
+ if (dp->sd.sd_type == SDT_SYSNULL)
+ break;
+ dp++;
+ }
+ if (i >= pldt->ldt_len) {
+ mtx_unlock_spin(&sched_lock);
+ error = i386_ldt_grow(td, pldt->ldt_len+1);
+ if (error) {
+ kmem_free(kernel_map, (vm_offset_t)descs,
+ descs_size);
+ return (error);
+ }
+ goto again;
+ }
+ uap->start = i;
+ error = i386_set_ldt_data(td, i, 1, descs);
+ mtx_unlock_spin(&sched_lock);
+ } else {
+ largest_ld = uap->start + uap->num;
+ error = i386_ldt_grow(td, largest_ld);
+ if (error == 0) {
+ mtx_lock_spin(&sched_lock);
+ error = i386_set_ldt_data(td, uap->start, uap->num,
+ descs);
+ mtx_unlock_spin(&sched_lock);
+ }
+ }
+ kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
+ if (error == 0)
+ td->td_retval[0] = uap->start;
+ return (error);
+}
+typedef struct uint64_lohi {
+ unsigned long lo;
+ unsigned long hi;
+} uint64_lohi;
+
+static int
+i386_set_ldt_data(struct thread *td, int start, int num,
+ union descriptor *descs)
+{
+ struct mdproc *mdp = &td->td_proc->p_md;
+ struct proc_ldt *pldt = mdp->md_ldt;
+ int i, error;
+
+ mtx_assert(&sched_lock, MA_OWNED);
+
+ /* Fill in range */
+ for (i = 0; i < num; i++) {
+ error = HYPERVISOR_update_descriptor(vtomach(&((union descriptor *)(pldt->ldt_base))[start + i]), ((uint64_lohi *)descs)[i].lo, ((uint64_lohi *)descs)[i].hi);
+ if (error)
+ panic("failed to update ldt: %d", error);
+ }
+ return (0);
+}
+
+static int
+i386_ldt_grow(struct thread *td, int len)
+{
+ struct mdproc *mdp = &td->td_proc->p_md;
+ struct proc_ldt *pldt;
+ caddr_t old_ldt_base;
+ int old_ldt_len;
+
+ if (len > MAX_LD)
+ return (ENOMEM);
+ if (len < NLDT+1)
+ len = NLDT+1;
+ pldt = mdp->md_ldt;
+ /* allocate user ldt */
+ if (!pldt || len > pldt->ldt_len) {
+ struct proc_ldt *new_ldt = user_ldt_alloc(mdp, len);
+ if (new_ldt == NULL)
+ return (ENOMEM);
+ pldt = mdp->md_ldt;
+ /* sched_lock was held by user_ldt_alloc */
+ if (pldt) {
+ if (new_ldt->ldt_len > pldt->ldt_len) {
+ old_ldt_base = pldt->ldt_base;
+ old_ldt_len = pldt->ldt_len;
+ pldt->ldt_sd = new_ldt->ldt_sd;
+ pldt->ldt_base = new_ldt->ldt_base;
+ pldt->ldt_len = new_ldt->ldt_len;
+ mtx_unlock_spin(&sched_lock);
+ pmap_map_readwrite(kernel_pmap,
+ (vm_offset_t)old_ldt_base,
+ old_ldt_len * sizeof(union descriptor));
+ kmem_free(kernel_map, (vm_offset_t)old_ldt_base,
+ old_ldt_len * sizeof(union descriptor));
+ FREE(new_ldt, M_SUBPROC);
+ mtx_lock_spin(&sched_lock);
+ } else {
+ /*
+ * If other threads already did the work,
+ * do nothing
+ */
+ mtx_unlock_spin(&sched_lock);
+ pmap_map_readwrite(kernel_pmap,
+ (vm_offset_t)new_ldt->ldt_base,
+ new_ldt->ldt_len * sizeof(union descriptor));
+ kmem_free(kernel_map,
+ (vm_offset_t)new_ldt->ldt_base,
+ new_ldt->ldt_len * sizeof(union descriptor));
+ FREE(new_ldt, M_SUBPROC);
+ return (0);
+ }
+ } else {
+ mdp->md_ldt = pldt = new_ldt;
+ }
+#ifdef SMP
+ mtx_unlock_spin(&sched_lock);
+ /* signal other cpus to reload ldt */
+ smp_rendezvous(NULL, (void (*)(void *))set_user_ldt_rv,
+ NULL, td);
+#else
+ set_user_ldt(mdp);
+ mtx_unlock_spin(&sched_lock);
+#endif
+ }
+ return (0);
+}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/trap.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/trap.c
new file mode 100644
index 0000000000..a74986ed18
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/trap.c
@@ -0,0 +1,1006 @@
+/*-
+ * Copyright (C) 1994, David Greenman
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the University of Utah, and William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)trap.c 7.4 (Berkeley) 5/13/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/trap.c,v 1.260 2003/11/03 21:53:37 jhb Exp $");
+
+/*
+ * 386 Trap and System call handling
+ */
+
+#include "opt_clock.h"
+#include "opt_cpu.h"
+#include "opt_isa.h"
+#include "opt_ktrace.h"
+#include "opt_npx.h"
+#include "opt_trap.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/pioctl.h>
+#include <sys/ptrace.h>
+#include <sys/kdb.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/resourcevar.h>
+#include <sys/signalvar.h>
+#include <sys/syscall.h>
+#include <sys/sysctl.h>
+#include <sys/sysent.h>
+#include <sys/uio.h>
+#include <sys/vmmeter.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+#include <vm/vm_extern.h>
+
+#include <machine/cpu.h>
+#include <machine/intr_machdep.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#ifdef SMP
+#include <machine/smp.h>
+#endif
+#include <machine/tss.h>
+#ifdef POWERFAIL_NMI
+#include <sys/syslog.h>
+#include <machine/clock.h>
+#endif
+
+
+#include <machine/xenfunc.h>
+#include <machine/hypervisor.h>
+#include <machine/xenvar.h>
+#include <machine/hypervisor-ifs.h>
+
+
+extern void trap(struct trapframe frame);
+extern void syscall(struct trapframe frame);
+
+static int trap_pfault(struct trapframe *, int, vm_offset_t);
+static void trap_fatal(struct trapframe *, vm_offset_t);
+void dblfault_handler(void);
+
+extern inthand_t IDTVEC(lcall_syscall);
+
+#define MAX_TRAP_MSG 28
+static char *trap_msg[] = {
+ "", /* 0 unused */
+ "privileged instruction fault", /* 1 T_PRIVINFLT */
+ "", /* 2 unused */
+ "breakpoint instruction fault", /* 3 T_BPTFLT */
+ "", /* 4 unused */
+ "", /* 5 unused */
+ "arithmetic trap", /* 6 T_ARITHTRAP */
+ "", /* 7 unused */
+ "", /* 8 unused */
+ "general protection fault", /* 9 T_PROTFLT */
+ "trace trap", /* 10 T_TRCTRAP */
+ "", /* 11 unused */
+ "page fault", /* 12 T_PAGEFLT */
+ "", /* 13 unused */
+ "alignment fault", /* 14 T_ALIGNFLT */
+ "", /* 15 unused */
+ "", /* 16 unused */
+ "hypervisor callback", /* 17 T_HYPCALLBACK */
+ "integer divide fault", /* 18 T_DIVIDE */
+ "non-maskable interrupt trap", /* 19 T_NMI */
+ "overflow trap", /* 20 T_OFLOW */
+ "FPU bounds check fault", /* 21 T_BOUND */
+ "FPU device not available", /* 22 T_DNA */
+ "double fault", /* 23 T_DOUBLEFLT */
+ "FPU operand fetch fault", /* 24 T_FPOPFLT */
+ "invalid TSS fault", /* 25 T_TSSFLT */
+ "segment not present fault", /* 26 T_SEGNPFLT */
+ "stack fault", /* 27 T_STKFLT */
+ "machine check trap", /* 28 T_MCHK */
+};
+
+#if defined(I586_CPU) && !defined(NO_F00F_HACK)
+extern int has_f00f_bug;
+#endif
+
+#ifdef KDB
+static int kdb_on_nmi = 1;
+SYSCTL_INT(_machdep, OID_AUTO, kdb_on_nmi, CTLFLAG_RW,
+ &kdb_on_nmi, 0, "Go to KDB on NMI");
+#endif
+static int panic_on_nmi = 1;
+SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
+ &panic_on_nmi, 0, "Panic on NMI");
+
+#ifdef WITNESS
+extern char *syscallnames[];
+#endif
+
+#ifdef DEVICE_POLLING
+extern u_int32_t poll_in_trap;
+extern int ether_poll(int count);
+#endif /* DEVICE_POLLING */
+
+
+/*
+ * Exception, fault, and trap interface to the FreeBSD kernel.
+ * This common code is called from assembly language IDT gate entry
+ * routines that prepare a suitable stack frame, and restore this
+ * frame after the exception has been processed.
+ */
+
+void
+trap(struct trapframe frame)
+{
+ struct thread *td = curthread;
+ struct proc *p = td->td_proc;
+ u_int sticks = 0;
+ int i = 0, ucode = 0, type, code;
+ vm_offset_t eva;
+#ifdef STACK_DEBUGGING
+ int nesting, current_sp;
+ static int prev_csp = 0, prev_ssp = 0;
+ nesting = PCPU_GET(trap_nesting);
+#endif
+
+#ifdef POWERFAIL_NMI
+ static int lastalert = 0;
+#endif
+
+ atomic_add_int(&cnt.v_trap, 1);
+ type = frame.tf_trapno;
+#ifdef KDB
+ if (kdb_active) {
+ kdb_reenter();
+ goto out;
+ }
+#endif
+
+ eva = 0;
+ code = frame.tf_err;
+
+ if (type == T_HYPCALLBACK) {
+ evtchn_do_upcall((struct intrframe *)&frame);
+ if (ISPL(frame.tf_cs) == SEL_KPL)
+ goto out;
+ goto userout;
+ } else if (type == 0)
+ panic("invalid trap type/code %d/%d\n",type, code);
+
+
+ if (type == T_PAGEFLT) {
+ /*
+ * For some Cyrix CPUs, %cr2 is clobbered by
+ * interrupts. This problem is worked around by using
+ * an interrupt gate for the pagefault handler. We
+ * are finally ready to read %cr2 and then must
+ * reenable interrupts.
+ *
+ * If we get a page fault while in a critical section, then
+ * it is most likely a fatal kernel page fault. The kernel
+ * is already going to panic trying to get a sleep lock to
+ * do the VM lookup, so just consider it a fatal trap so the
+ * kernel can print out a useful trap message and even get
+ * to the debugger.
+ */
+ eva = frame.tf_cr2;
+
+ if (td->td_critnest != 0)
+ trap_fatal(&frame, eva);
+ }
+
+#ifdef DEVICE_POLLING
+ if (poll_in_trap)
+ ether_poll(poll_in_trap);
+#endif /* DEVICE_POLLING */
+
+ if ((ISPL(frame.tf_cs) == SEL_UPL)
+ || ((frame.tf_eflags & PSL_VM) &&
+ !(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL))) {
+ /* user trap */
+
+ sticks = td->td_sticks;
+ td->td_frame = &frame;
+ if (td->td_ucred != p->p_ucred)
+ cred_update_thread(td);
+
+ switch (type) {
+ case T_PRIVINFLT: /* privileged instruction fault */
+ ucode = type;
+ i = SIGILL;
+ break;
+
+ case T_BPTFLT: /* bpt instruction fault */
+ case T_TRCTRAP: /* trace trap */
+ enable_intr();
+ frame.tf_eflags &= ~PSL_T;
+ i = SIGTRAP;
+ break;
+
+ case T_ARITHTRAP: /* arithmetic trap */
+#ifdef DEV_NPX
+ ucode = npxtrap();
+ if (ucode == -1)
+ goto userout;
+#else
+ ucode = code;
+#endif
+ i = SIGFPE;
+ break;
+
+ case T_PROTFLT: /* general protection fault */
+ case T_STKFLT: /* stack fault */
+ case T_SEGNPFLT: /* segment not present fault */
+ case T_TSSFLT: /* invalid TSS fault */
+ case T_DOUBLEFLT: /* double fault */
+ default:
+ ucode = code + BUS_SEGM_FAULT ;
+ printf("unexpected trap type/code %d/%d\n",type, code); /* XXX temporary */
+
+ i = SIGBUS;
+ break;
+
+ case T_PAGEFLT: /* page fault */
+ if (td->td_pflags & TDP_SA)
+ thread_user_enter(td);
+
+ i = trap_pfault(&frame, TRUE, eva);
+#if defined(I586_CPU) && !defined(NO_F00F_HACK)
+ if (i == -2) {
+ /*
+ * The f00f hack workaround has triggered, so
+ * treat the fault as an illegal instruction
+ * (T_PRIVINFLT) instead of a page fault.
+ */
+ type = frame.tf_trapno = T_PRIVINFLT;
+
+ /* Proceed as in that case. */
+ ucode = type;
+ i = SIGILL;
+ break;
+ }
+#endif
+ if (i == -1)
+ goto userout;
+ if (i == 0)
+ goto user;
+
+ ucode = T_PAGEFLT;
+ break;
+
+ case T_DIVIDE: /* integer divide fault */
+ ucode = FPE_INTDIV;
+ i = SIGFPE;
+ break;
+
+#ifdef DEV_ISA
+ case T_NMI:
+#ifdef POWERFAIL_NMI
+#ifndef TIMER_FREQ
+# define TIMER_FREQ 1193182
+#endif
+ mtx_lock(&Giant);
+ if (time_second - lastalert > 10) {
+ log(LOG_WARNING, "NMI: power fail\n");
+ sysbeep(TIMER_FREQ/880, hz);
+ lastalert = time_second;
+ }
+ mtx_unlock(&Giant);
+ goto userout;
+#else /* !POWERFAIL_NMI */
+ /* machine/parity/power fail/"kitchen sink" faults */
+ /* XXX Giant */
+ if (isa_nmi(code) == 0) {
+#ifdef KDB
+ /*
+ * NMI can be hooked up to a pushbutton
+ * for debugging.
+ */
+ if (kdb_on_nmi) {
+ printf ("NMI ... going to debugger\n");
+ kdb_trap (type, 0, &frame);
+ }
+#endif /* KDB */
+ goto userout;
+ } else if (panic_on_nmi)
+ panic("NMI indicates hardware failure");
+ break;
+#endif /* POWERFAIL_NMI */
+#endif /* DEV_ISA */
+
+ case T_OFLOW: /* integer overflow fault */
+ ucode = FPE_INTOVF;
+ i = SIGFPE;
+ break;
+
+ case T_BOUND: /* bounds check fault */
+ ucode = FPE_FLTSUB;
+ i = SIGFPE;
+ break;
+
+ case T_DNA:
+#ifdef DEV_NPX
+ /* transparent fault (due to context switch "late") */
+ if (npxdna())
+ goto userout;
+#endif
+ i = SIGFPE;
+ ucode = FPE_FPU_NP_TRAP;
+ break;
+
+ case T_FPOPFLT: /* FPU operand fetch fault */
+ ucode = T_FPOPFLT;
+ i = SIGILL;
+ break;
+
+ case T_XMMFLT: /* SIMD floating-point exception */
+ ucode = 0; /* XXX */
+ i = SIGFPE;
+ break;
+ }
+ } else {
+ /* kernel trap */
+
+ KASSERT(cold || td->td_ucred != NULL,
+ ("kernel trap doesn't have ucred"));
+ switch (type) {
+ case T_PAGEFLT: /* page fault */
+ (void) trap_pfault(&frame, FALSE, eva);
+ goto out;
+
+ case T_DNA:
+#ifdef DEV_NPX
+ /*
+ * The kernel is apparently using npx for copying.
+ * XXX this should be fatal unless the kernel has
+ * registered such use.
+ */
+ if (npxdna())
+ goto out;
+#endif
+ break;
+
+ /*
+ * The following two traps can happen in
+ * vm86 mode, and, if so, we want to handle
+ * them specially.
+ */
+ case T_PROTFLT: /* general protection fault */
+ case T_STKFLT: /* stack fault */
+#if 0
+ if (frame.tf_eflags & PSL_VM) {
+ i = vm86_emulate((struct vm86frame *)&frame);
+ if (i != 0)
+ /*
+ * returns to original process
+ */
+ vm86_trap((struct vm86frame *)&frame);
+ goto out;
+ }
+#endif
+ if (type == T_STKFLT)
+ break;
+
+ /* FALL THROUGH */
+
+ case T_SEGNPFLT: /* segment not present fault */
+ if (PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL)
+ break;
+
+ /*
+ * Invalid %fs's and %gs's can be created using
+ * procfs or PT_SETREGS or by invalidating the
+ * underlying LDT entry. This causes a fault
+ * in kernel mode when the kernel attempts to
+ * switch contexts. Lose the bad context
+ * (XXX) so that we can continue, and generate
+ * a signal.
+ */
+ if (frame.tf_eip == (int)cpu_switch_load_gs) {
+ PCPU_GET(curpcb)->pcb_gs = 0;
+#if 0
+ PROC_LOCK(p);
+ psignal(p, SIGBUS);
+ PROC_UNLOCK(p);
+#endif
+ goto out;
+ }
+
+ if (td->td_intr_nesting_level != 0)
+ break;
+
+ /*
+ * Invalid segment selectors and out of bounds
+ * %eip's and %esp's can be set up in user mode.
+ * This causes a fault in kernel mode when the
+ * kernel tries to return to user mode. We want
+ * to get this fault so that we can fix the
+ * problem here and not have to check all the
+ * selectors and pointers when the user changes
+ * them.
+ */
+ if (frame.tf_eip == (int)doreti_iret) {
+ frame.tf_eip = (int)doreti_iret_fault;
+ goto out;
+ }
+ if (frame.tf_eip == (int)doreti_popl_ds) {
+ frame.tf_eip = (int)doreti_popl_ds_fault;
+ goto out;
+ }
+ if (frame.tf_eip == (int)doreti_popl_es) {
+ frame.tf_eip = (int)doreti_popl_es_fault;
+ goto out;
+ }
+ if (frame.tf_eip == (int)doreti_popl_fs) {
+ frame.tf_eip = (int)doreti_popl_fs_fault;
+ goto out;
+ }
+ if (PCPU_GET(curpcb)->pcb_onfault != NULL) {
+ frame.tf_eip =
+ (int)PCPU_GET(curpcb)->pcb_onfault;
+ goto out;
+ }
+ break;
+
+ case T_TSSFLT:
+ /*
+ * PSL_NT can be set in user mode and isn't cleared
+ * automatically when the kernel is entered. This
+ * causes a TSS fault when the kernel attempts to
+ * `iret' because the TSS link is uninitialized. We
+ * want to get this fault so that we can fix the
+ * problem here and not every time the kernel is
+ * entered.
+ */
+ if (frame.tf_eflags & PSL_NT) {
+ frame.tf_eflags &= ~PSL_NT;
+ goto out;
+ }
+ break;
+
+ case T_TRCTRAP: /* trace trap */
+ if (frame.tf_eip == (int)IDTVEC(lcall_syscall)) {
+ /*
+ * We've just entered system mode via the
+ * syscall lcall. Continue single stepping
+ * silently until the syscall handler has
+ * saved the flags.
+ */
+ goto out;
+ }
+ if (frame.tf_eip == (int)IDTVEC(lcall_syscall) + 1) {
+ /*
+ * The syscall handler has now saved the
+ * flags. Stop single stepping it.
+ */
+ frame.tf_eflags &= ~PSL_T;
+ goto out;
+ }
+ /*
+ * Ignore debug register trace traps due to
+ * accesses in the user's address space, which
+ * can happen under several conditions such as
+ * if a user sets a watchpoint on a buffer and
+ * then passes that buffer to a system call.
+ * We still want to get TRCTRAPS for addresses
+ * in kernel space because that is useful when
+ * debugging the kernel.
+ */
+ /* XXX Giant */
+ if (user_dbreg_trap() &&
+ !(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL)) {
+ /*
+ * Reset breakpoint bits because the
+ * processor doesn't
+ */
+ load_dr6(rdr6() & 0xfffffff0);
+ goto out;
+ }
+ /*
+ * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
+ */
+ case T_BPTFLT:
+ /*
+ * If KDB is enabled, let it handle the debugger trap.
+ * Otherwise, debugger traps "can't happen".
+ */
+#ifdef KDB
+ /* XXX Giant */
+ if (kdb_trap (type, 0, &frame))
+ goto out;
+#endif
+ break;
+
+#ifdef DEV_ISA
+ case T_NMI:
+#ifdef POWERFAIL_NMI
+ mtx_lock(&Giant);
+ if (time_second - lastalert > 10) {
+ log(LOG_WARNING, "NMI: power fail\n");
+ sysbeep(TIMER_FREQ/880, hz);
+ lastalert = time_second;
+ }
+ mtx_unlock(&Giant);
+ goto out;
+#else /* !POWERFAIL_NMI */
+ /* XXX Giant */
+ /* machine/parity/power fail/"kitchen sink" faults */
+ if (isa_nmi(code) == 0) {
+#ifdef KDB
+ /*
+ * NMI can be hooked up to a pushbutton
+ * for debugging.
+ */
+ if (kdb_on_nmi) {
+ printf ("NMI ... going to debugger\n");
+ kdb_trap (type, 0, &frame);
+ }
+#endif /* KDB */
+ goto out;
+ } else if (panic_on_nmi == 0)
+ goto out;
+ /* FALLTHROUGH */
+#endif /* POWERFAIL_NMI */
+#endif /* DEV_ISA */
+ }
+
+ trap_fatal(&frame, eva);
+ goto out;
+ }
+
+ /* Translate fault for emulators (e.g. Linux) */
+ if (*p->p_sysent->sv_transtrap)
+ i = (*p->p_sysent->sv_transtrap)(i, type);
+
+ trapsignal(td, i, ucode);
+
+#if 1 /* DEBUG */
+ if (type <= MAX_TRAP_MSG) {
+ uprintf("fatal process exception: %s",
+ trap_msg[type]);
+ if ((type == T_PAGEFLT) || (type == T_PROTFLT))
+ uprintf(", fault VA = 0x%lx", (u_long)eva);
+ uprintf("\n");
+ }
+#endif
+
+user:
+ userret(td, &frame, sticks);
+ mtx_assert(&Giant, MA_NOTOWNED);
+userout:
+out:
+#ifdef STACK_DEBUGGING
+ PCPU_SET(trap_nesting, nesting);
+#endif
+ return;
+}
+
+static int
+trap_pfault(frame, usermode, eva)
+ struct trapframe *frame;
+ int usermode;
+ vm_offset_t eva;
+{
+ vm_offset_t va;
+ struct vmspace *vm = NULL;
+ vm_map_t map = 0;
+ int rv = 0;
+ vm_prot_t ftype;
+ struct thread *td = curthread;
+ struct proc *p = td->td_proc;
+
+ va = trunc_page(eva);
+ if (va >= KERNBASE) {
+ /*
+ * Don't allow user-mode faults in kernel address space.
+ * An exception: if the faulting address is the invalid
+ * instruction entry in the IDT, then the Intel Pentium
+ * F00F bug workaround was triggered, and we need to
+ * treat it is as an illegal instruction, and not a page
+ * fault.
+ */
+#if defined(I586_CPU) && !defined(NO_F00F_HACK)
+ if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
+ return -2;
+#endif
+ if (usermode)
+ goto nogo;
+
+ map = kernel_map;
+ } else {
+ /*
+ * This is a fault on non-kernel virtual memory.
+ * vm is initialized above to NULL. If curproc is NULL
+ * or curproc->p_vmspace is NULL the fault is fatal.
+ */
+ if (p != NULL)
+ vm = p->p_vmspace;
+
+ if (vm == NULL)
+ goto nogo;
+
+ map = &vm->vm_map;
+ }
+
+ if (frame->tf_err & PGEX_W)
+ ftype = VM_PROT_WRITE;
+ else
+ ftype = VM_PROT_READ;
+
+ if (map != kernel_map) {
+ /*
+ * Keep swapout from messing with us during this
+ * critical time.
+ */
+ PROC_LOCK(p);
+ ++p->p_lock;
+ PROC_UNLOCK(p);
+
+ /* Fault in the user page: */
+ rv = vm_fault(map, va, ftype,
+ (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY
+ : VM_FAULT_NORMAL);
+
+ PROC_LOCK(p);
+ --p->p_lock;
+ PROC_UNLOCK(p);
+ } else {
+ /*
+ * Don't have to worry about process locking or stacks in the
+ * kernel.
+ */
+ rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
+ }
+ if (rv == KERN_SUCCESS)
+ return (0);
+nogo:
+ if (!usermode) {
+ if (td->td_intr_nesting_level == 0 &&
+ PCPU_GET(curpcb)->pcb_onfault != NULL) {
+ frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault;
+ return (0);
+ }
+ trap_fatal(frame, eva);
+ return (-1);
+ }
+
+ /* kludge to pass faulting virtual address to sendsig */
+ frame->tf_err = eva;
+
+ return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
+}
+
+static void
+trap_fatal(struct trapframe *frame, vm_offset_t eva)
+{
+ int code, type, ss, esp;
+ struct soft_segment_descriptor softseg;
+
+ code = frame->tf_err;
+ type = frame->tf_trapno;
+#if 0
+ XENPRINTF("trying to read gdt\n");
+ sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg);
+ XENPRINTF("read gdt\n");
+#endif
+ if (type <= MAX_TRAP_MSG)
+ printf("\n\nFatal trap %d: %s while in %s mode\n",
+ type, trap_msg[type],
+ frame->tf_eflags & PSL_VM ? "vm86" :
+ ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
+#ifdef SMP
+ /* two separate prints in case of a trap on an unmapped page */
+ printf("cpuid = %d; ", PCPU_GET(cpuid));
+ printf("apic id = %02x\n", PCPU_GET(apic_id));
+#endif
+ if (type == T_PAGEFLT) {
+ printf("fault virtual address = 0x%x\n", eva);
+ printf("fault code = %s %s, %s\n",
+ code & PGEX_U ? "user" : "supervisor",
+ code & PGEX_W ? "write" : "read",
+ code & PGEX_P ? "protection violation" : "page not present");
+ }
+ printf("instruction pointer = 0x%x:0x%x\n",
+ frame->tf_cs & 0xffff, frame->tf_eip);
+ if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) {
+ ss = frame->tf_ss & 0xffff;
+ esp = frame->tf_esp;
+ } else {
+ ss = GSEL(GDATA_SEL, SEL_KPL);
+ esp = (int)&frame->tf_esp;
+ }
+ printf("stack pointer = 0x%x:0x%x\n", ss, esp);
+ printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp);
+ printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n",
+ softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
+ printf(" = DPL %d, pres %d, def32 %d, gran %d\n",
+ softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32,
+ softseg.ssd_gran);
+ printf("processor eflags = ");
+ if (frame->tf_eflags & PSL_T)
+ printf("trace trap, ");
+ if (frame->tf_eflags & PSL_I)
+ printf("interrupt enabled, ");
+ if (frame->tf_eflags & PSL_NT)
+ printf("nested task, ");
+ if (frame->tf_eflags & PSL_RF)
+ printf("resume, ");
+ if (frame->tf_eflags & PSL_VM)
+ printf("vm86, ");
+ printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
+ printf("current process = ");
+ if (curproc) {
+ printf("%lu (%s)\n",
+ (u_long)curproc->p_pid, curproc->p_comm ?
+ curproc->p_comm : "");
+ } else {
+ printf("Idle\n");
+ }
+ /* XXX */
+
+#ifdef KDB
+ if (kdb_trap(type, 0, frame))
+ return;
+#endif
+ printf("trap number = %d\n", type);
+ if (type <= MAX_TRAP_MSG)
+ panic("%s", trap_msg[type]);
+ else
+ panic("unknown/reserved trap");
+}
+
+/*
+ * Double fault handler. Called when a fault occurs while writing
+ * a frame for a trap/exception onto the stack. This usually occurs
+ * when the stack overflows (such is the case with infinite recursion,
+ * for example).
+ *
+ * XXX Note that the current PTD gets replaced by IdlePTD when the
+ * task switch occurs. This means that the stack that was active at
+ * the time of the double fault is not available at <kstack> unless
+ * the machine was idle when the double fault occurred. The downside
+ * of this is that "trace <ebp>" in ddb won't work.
+ */
+void
+dblfault_handler()
+{
+ printf("\nFatal double fault:\n");
+ printf("eip = 0x%x\n", PCPU_GET(common_tss.tss_eip));
+ printf("esp = 0x%x\n", PCPU_GET(common_tss.tss_esp));
+ printf("ebp = 0x%x\n", PCPU_GET(common_tss.tss_ebp));
+#ifdef SMP
+ /* two separate prints in case of a trap on an unmapped page */
+ printf("cpuid = %d; ", PCPU_GET(cpuid));
+ printf("apic id = %02x\n", PCPU_GET(apic_id));
+#endif
+ panic("double fault");
+}
+
+/*
+ * syscall - system call request C handler
+ *
+ * A system call is essentially treated as a trap.
+ */
+void
+syscall(frame)
+ struct trapframe frame;
+{
+ caddr_t params;
+ struct sysent *callp;
+ struct thread *td = curthread;
+ struct proc *p = td->td_proc;
+ register_t orig_tf_eflags;
+ u_int sticks;
+ int error;
+ int narg;
+ int args[8];
+ u_int code;
+
+ /*
+ * note: PCPU_LAZY_INC() can only be used if we can afford
+ * occassional inaccuracy in the count.
+ */
+ PCPU_LAZY_INC(cnt.v_syscall);
+
+#ifdef DIAGNOSTIC
+ if (ISPL(frame.tf_cs) != SEL_UPL) {
+ mtx_lock(&Giant); /* try to stabilize the system XXX */
+ panic("syscall");
+ /* NOT REACHED */
+ mtx_unlock(&Giant);
+ }
+#endif
+
+ sticks = td->td_sticks;
+ td->td_frame = &frame;
+ if (td->td_ucred != p->p_ucred)
+ cred_update_thread(td);
+ if (p->p_flag & P_SA)
+ thread_user_enter(td);
+ params = (caddr_t)frame.tf_esp + sizeof(int);
+ code = frame.tf_eax;
+ orig_tf_eflags = frame.tf_eflags;
+
+ if (p->p_sysent->sv_prepsyscall) {
+ /*
+ * The prep code is MP aware.
+ */
+ (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, &params);
+ } else {
+ /*
+ * Need to check if this is a 32 bit or 64 bit syscall.
+ * fuword is MP aware.
+ */
+ if (code == SYS_syscall) {
+ /*
+ * Code is first argument, followed by actual args.
+ */
+ code = fuword(params);
+ params += sizeof(int);
+ } else if (code == SYS___syscall) {
+ /*
+ * Like syscall, but code is a quad, so as to maintain
+ * quad alignment for the rest of the arguments.
+ */
+ code = fuword(params);
+ params += sizeof(quad_t);
+ }
+ }
+
+ if (p->p_sysent->sv_mask)
+ code &= p->p_sysent->sv_mask;
+
+ if (code >= p->p_sysent->sv_size)
+ callp = &p->p_sysent->sv_table[0];
+ else
+ callp = &p->p_sysent->sv_table[code];
+
+ narg = callp->sy_narg & SYF_ARGMASK;
+
+ /*
+ * copyin and the ktrsyscall()/ktrsysret() code is MP-aware
+ */
+ if (params != NULL && narg != 0)
+ error = copyin(params, (caddr_t)args,
+ (u_int)(narg * sizeof(int)));
+ else
+ error = 0;
+
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_SYSCALL))
+ ktrsyscall(code, narg, args);
+#endif
+ CTR4(KTR_SYSC, "syscall enter thread %p pid %d proc %s code %d", td,
+ td->td_proc->p_pid, td->td_proc->p_comm, code);
+
+ /*
+ * Try to run the syscall without Giant if the syscall
+ * is MP safe.
+ */
+ if ((callp->sy_narg & SYF_MPSAFE) == 0)
+ mtx_lock(&Giant);
+
+ if (error == 0) {
+ td->td_retval[0] = 0;
+ td->td_retval[1] = frame.tf_edx;
+
+ STOPEVENT(p, S_SCE, narg);
+
+ PTRACESTOP_SC(p, td, S_PT_SCE);
+
+ error = (*callp->sy_call)(td, args);
+ }
+
+ switch (error) {
+ case 0:
+ frame.tf_eax = td->td_retval[0];
+ frame.tf_edx = td->td_retval[1];
+ frame.tf_eflags &= ~PSL_C;
+ break;
+
+ case ERESTART:
+ /*
+ * Reconstruct pc, assuming lcall $X,y is 7 bytes,
+ * int 0x80 is 2 bytes. We saved this in tf_err.
+ */
+ frame.tf_eip -= frame.tf_err;
+ break;
+
+ case EJUSTRETURN:
+ break;
+
+ default:
+ if (p->p_sysent->sv_errsize) {
+ if (error >= p->p_sysent->sv_errsize)
+ error = -1; /* XXX */
+ else
+ error = p->p_sysent->sv_errtbl[error];
+ }
+ frame.tf_eax = error;
+ frame.tf_eflags |= PSL_C;
+ break;
+ }
+
+ /*
+ * Release Giant if we previously set it.
+ */
+ if ((callp->sy_narg & SYF_MPSAFE) == 0)
+ mtx_unlock(&Giant);
+
+ /*
+ * Traced syscall.
+ */
+ if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) {
+ frame.tf_eflags &= ~PSL_T;
+ trapsignal(td, SIGTRAP, 0);
+ }
+
+ /*
+ * Handle reschedule and other end-of-syscall issues
+ */
+ userret(td, &frame, sticks);
+
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_SYSRET))
+ ktrsysret(code, error, td->td_retval[0]);
+#endif
+
+ /*
+ * This works because errno is findable through the
+ * register set. If we ever support an emulation where this
+ * is not the case, this code will need to be revisited.
+ */
+ STOPEVENT(p, S_SCX, code);
+
+ PTRACESTOP_SC(p, td, S_PT_SCX);
+
+ WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
+ (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???");
+ mtx_assert(&sched_lock, MA_NOTOWNED);
+ mtx_assert(&Giant, MA_NOTOWNED);
+}
+
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/vm_machdep.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/vm_machdep.c
new file mode 100644
index 0000000000..cff67833f7
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/vm_machdep.c
@@ -0,0 +1,618 @@
+/*-
+ * Copyright (c) 1982, 1986 The Regents of the University of California.
+ * Copyright (c) 1989, 1990 William Jolitz
+ * Copyright (c) 1994 John Dyson
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department, and William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91
+ * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.219 2003/11/17 18:22:24 alc Exp $");
+
+#include "opt_npx.h"
+#ifdef PC98
+#include "opt_pc98.h"
+#endif
+#include "opt_reset.h"
+#include "opt_cpu.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/kse.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sf_buf.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/unistd.h>
+#include <sys/user.h>
+#include <sys/vnode.h>
+#include <sys/vmmeter.h>
+
+#include <machine/cpu.h>
+#include <machine/cputypes.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/pcb_ext.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_param.h>
+
+#ifdef PC98
+#include <pc98/pc98/pc98.h>
+#else
+#include <i386/isa/isa.h>
+#endif
+
+#ifndef NSFBUFS
+#define NSFBUFS (512 + maxusers * 16)
+#endif
+
+#include <machine/xenfunc.h>
+
+#ifdef SMP
+static void cpu_reset_proxy(void);
+static u_int cpu_reset_proxyid;
+static volatile u_int cpu_reset_proxy_active;
+#endif
+static void sf_buf_init(void *arg);
+SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
+
+LIST_HEAD(sf_head, sf_buf);
+
+/*
+ * A hash table of active sendfile(2) buffers
+ */
+static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
+
+
+static struct sf_head *sf_buf_active;
+static u_long sf_buf_hashmask;
+
+
+#define SF_BUF_HASH(m) (((m) - vm_page_array) & sf_buf_hashmask)
+
+static u_int sf_buf_alloc_want;
+
+/*
+ * A lock used to synchronize access to the hash table and free list
+ */
+static struct mtx sf_buf_lock;
+
+extern int _ucodesel, _udatasel;
+
+/*
+ * Finish a fork operation, with process p2 nearly set up.
+ * Copy and update the pcb, set up the stack so that the child
+ * ready to run and return to user mode.
+ */
+void
+cpu_fork(struct thread *td1,
+ struct proc *p2,
+ struct thread *td2,
+ int flags)
+{
+ register struct proc *p1;
+ struct pcb *pcb2;
+ struct mdproc *mdp2;
+#ifdef DEV_NPX
+ register_t savecrit;
+#endif
+
+ p1 = td1->td_proc;
+ if ((flags & RFPROC) == 0) {
+ if ((flags & RFMEM) == 0) {
+ /* unshare user LDT */
+ struct mdproc *mdp1 = &p1->p_md;
+ struct proc_ldt *pldt = mdp1->md_ldt;
+ if (pldt && pldt->ldt_refcnt > 1) {
+ pldt = user_ldt_alloc(mdp1, pldt->ldt_len);
+ if (pldt == NULL)
+ panic("could not copy LDT");
+ mdp1->md_ldt = pldt;
+ set_user_ldt(mdp1);
+ user_ldt_free(td1);
+ }
+ }
+ return;
+ }
+
+ /* Ensure that p1's pcb is up to date. */
+#ifdef DEV_NPX
+ if (td1 == curthread)
+ td1->td_pcb->pcb_gs = rgs();
+ savecrit = intr_disable();
+ if (PCPU_GET(fpcurthread) == td1)
+ npxsave(&td1->td_pcb->pcb_save);
+ intr_restore(savecrit);
+#endif
+
+ /* Point the pcb to the top of the stack */
+ pcb2 = (struct pcb *)(td2->td_kstack + td2->td_kstack_pages * PAGE_SIZE) - 1;
+ td2->td_pcb = pcb2;
+
+ /* Copy p1's pcb */
+ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
+
+ /* Point mdproc and then copy over td1's contents */
+ mdp2 = &p2->p_md;
+ bcopy(&p1->p_md, mdp2, sizeof(*mdp2));
+
+ /*
+ * Create a new fresh stack for the new process.
+ * Copy the trap frame for the return to user mode as if from a
+ * syscall. This copies most of the user mode register values.
+ */
+ td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb) - 1;
+ bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
+
+ td2->td_frame->tf_eax = 0; /* Child returns zero */
+ td2->td_frame->tf_eflags &= ~PSL_C; /* success */
+ td2->td_frame->tf_edx = 1;
+ /*
+ * Set registers for trampoline to user mode. Leave space for the
+ * return address on stack. These are the kernel mode register values.
+ */
+ pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir);
+ pcb2->pcb_edi = 0;
+ pcb2->pcb_esi = (int)fork_return; /* fork_trampoline argument */
+ pcb2->pcb_ebp = 0;
+ pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *);
+ pcb2->pcb_ebx = (int)td2; /* fork_trampoline argument */
+ pcb2->pcb_eip = (int)fork_trampoline;
+ pcb2->pcb_psl = PSL_KERNEL; /* ints disabled */
+ pcb2->pcb_gs = rgs();
+ /*-
+ * pcb2->pcb_dr*: cloned above.
+ * pcb2->pcb_savefpu: cloned above.
+ * pcb2->pcb_flags: cloned above.
+ * pcb2->pcb_onfault: cloned above (always NULL here?).
+ * pcb2->pcb_gs: cloned above.
+ * pcb2->pcb_ext: cleared below.
+ */
+
+ /*
+ * XXX don't copy the i/o pages. this should probably be fixed.
+ */
+ pcb2->pcb_ext = 0;
+
+ /* Copy the LDT, if necessary. */
+ mtx_lock_spin(&sched_lock);
+
+ if (mdp2->md_ldt != 0) {
+ if (flags & RFMEM) {
+ mdp2->md_ldt->ldt_refcnt++;
+ } else {
+ mdp2->md_ldt = user_ldt_alloc(mdp2,
+ mdp2->md_ldt->ldt_len);
+ if (mdp2->md_ldt == NULL)
+ panic("could not copy LDT");
+ }
+ }
+ mtx_unlock_spin(&sched_lock);
+
+ /*
+ * Now, cpu_switch() can schedule the new process.
+ * pcb_esp is loaded pointing to the cpu_switch() stack frame
+ * containing the return address when exiting cpu_switch.
+ * This will normally be to fork_trampoline(), which will have
+ * %ebx loaded with the new proc's pointer. fork_trampoline()
+ * will set up a stack to call fork_return(p, frame); to complete
+ * the return to user-mode.
+ */
+}
+
+/*
+ * Intercept the return address from a freshly forked process that has NOT
+ * been scheduled yet.
+ *
+ * This is needed to make kernel threads stay in kernel mode.
+ */
+void
+cpu_set_fork_handler(td, func, arg)
+ struct thread *td;
+ void (*func)(void *);
+ void *arg;
+{
+ /*
+ * Note that the trap frame follows the args, so the function
+ * is really called like this: func(arg, frame);
+ */
+ td->td_pcb->pcb_esi = (int) func; /* function */
+ td->td_pcb->pcb_ebx = (int) arg; /* first arg */
+}
+
+void
+cpu_exit(struct thread *td)
+{
+ struct mdproc *mdp;
+ struct pcb *pcb = td->td_pcb;
+
+
+ /* Reset pc->pcb_gs and %gs before possibly invalidating it. */
+ mdp = &td->td_proc->p_md;
+ if (mdp->md_ldt) {
+ td->td_pcb->pcb_gs = _udatasel;
+ load_gs(_udatasel);
+ user_ldt_free(td);
+ }
+ if (pcb->pcb_flags & PCB_DBREGS) {
+ /* disable all hardware breakpoints */
+ reset_dbregs();
+ pcb->pcb_flags &= ~PCB_DBREGS;
+ }
+}
+
+void
+cpu_thread_exit(struct thread *td)
+{
+ struct pcb *pcb = td->td_pcb;
+#ifdef DEV_NPX
+ if (td == PCPU_GET(fpcurthread))
+ npxdrop();
+#endif
+ if (pcb->pcb_flags & PCB_DBREGS) {
+ /* disable all hardware breakpoints */
+ reset_dbregs();
+ pcb->pcb_flags &= ~PCB_DBREGS;
+ }
+}
+
+void
+cpu_thread_clean(struct thread *td)
+{
+ struct pcb *pcb;
+
+ pcb = td->td_pcb;
+ if (pcb->pcb_ext != 0) {
+ /* XXXKSE XXXSMP not SMP SAFE.. what locks do we have? */
+ /* if (pcb->pcb_ext->ext_refcount-- == 1) ?? */
+ /*
+ * XXX do we need to move the TSS off the allocated pages
+ * before freeing them? (not done here)
+ */
+ kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext,
+ ctob(IOPAGES + 1));
+ pcb->pcb_ext = 0;
+ }
+}
+
+void
+cpu_thread_swapin(struct thread *td)
+{
+}
+
+void
+cpu_thread_swapout(struct thread *td)
+{
+}
+
+void
+cpu_thread_setup(struct thread *td)
+{
+
+ td->td_pcb =
+ (struct pcb *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE) - 1;
+ td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1;
+ td->td_pcb->pcb_ext = NULL;
+}
+
+/*
+ * Initialize machine state (pcb and trap frame) for a new thread about to
+ * upcall. Pu t enough state in the new thread's PCB to get it to go back
+ * userret(), where we can intercept it again to set the return (upcall)
+ * Address and stack, along with those from upcals that are from other sources
+ * such as those generated in thread_userret() itself.
+ */
+void
+cpu_set_upcall(struct thread *td, struct thread *td0)
+{
+ struct pcb *pcb2;
+
+ /* Point the pcb to the top of the stack. */
+ pcb2 = td->td_pcb;
+
+ /*
+ * Copy the upcall pcb. This loads kernel regs.
+ * Those not loaded individually below get their default
+ * values here.
+ *
+ * XXXKSE It might be a good idea to simply skip this as
+ * the values of the other registers may be unimportant.
+ * This would remove any requirement for knowing the KSE
+ * at this time (see the matching comment below for
+ * more analysis) (need a good safe default).
+ */
+ bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
+ pcb2->pcb_flags &= ~(PCB_NPXTRAP|PCB_NPXINITDONE);
+
+ /*
+ * Create a new fresh stack for the new thread.
+ * Don't forget to set this stack value into whatever supplies
+ * the address for the fault handlers.
+ * The contexts are filled in at the time we actually DO the
+ * upcall as only then do we know which KSE we got.
+ */
+ bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
+
+ /*
+ * Set registers for trampoline to user mode. Leave space for the
+ * return address on stack. These are the kernel mode register values.
+ */
+#ifdef PAE
+ pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdpt);
+#else
+ pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdir);
+#endif
+ pcb2->pcb_edi = 0;
+ pcb2->pcb_esi = (int)fork_return; /* trampoline arg */
+ pcb2->pcb_ebp = 0;
+ pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */
+ pcb2->pcb_ebx = (int)td; /* trampoline arg */
+ pcb2->pcb_eip = (int)fork_trampoline;
+ pcb2->pcb_psl &= ~(PSL_I); /* interrupts must be disabled */
+ pcb2->pcb_gs = rgs();
+ /*
+ * If we didn't copy the pcb, we'd need to do the following registers:
+ * pcb2->pcb_dr*: cloned above.
+ * pcb2->pcb_savefpu: cloned above.
+ * pcb2->pcb_flags: cloned above.
+ * pcb2->pcb_onfault: cloned above (always NULL here?).
+ * pcb2->pcb_gs: cloned above. XXXKSE ???
+ * pcb2->pcb_ext: cleared below.
+ */
+ pcb2->pcb_ext = NULL;
+}
+
+/*
+ * Set that machine state for performing an upcall that has to
+ * be done in thread_userret() so that those upcalls generated
+ * in thread_userret() itself can be done as well.
+ */
+void
+cpu_set_upcall_kse(struct thread *td, struct kse_upcall *ku)
+{
+
+ /*
+ * Do any extra cleaning that needs to be done.
+ * The thread may have optional components
+ * that are not present in a fresh thread.
+ * This may be a recycled thread so make it look
+ * as though it's newly allocated.
+ */
+ cpu_thread_clean(td);
+
+ /*
+ * Set the trap frame to point at the beginning of the uts
+ * function.
+ */
+ td->td_frame->tf_ebp = 0;
+ td->td_frame->tf_esp =
+ (int)ku->ku_stack.ss_sp + ku->ku_stack.ss_size - 16;
+ td->td_frame->tf_eip = (int)ku->ku_func;
+
+ /*
+ * Pass the address of the mailbox for this kse to the uts
+ * function as a parameter on the stack.
+ */
+ suword((void *)(td->td_frame->tf_esp + sizeof(void *)),
+ (int)ku->ku_mailbox);
+}
+
+/*
+ * Convert kernel VA to physical address
+ */
+vm_paddr_t
+kvtop(void *addr)
+{
+ vm_paddr_t pa;
+
+ pa = pmap_kextract((vm_offset_t)addr);
+ if (pa == 0)
+ panic("kvtop: zero page frame");
+ return (pa);
+}
+
+/*
+ * Force reset the processor by invalidating the entire address space!
+ */
+
+#ifdef SMP
+static void
+cpu_reset_proxy()
+{
+
+ cpu_reset_proxy_active = 1;
+ while (cpu_reset_proxy_active == 1)
+ ; /* Wait for other cpu to see that we've started */
+ stop_cpus((1<<cpu_reset_proxyid));
+ printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
+ DELAY(1000000);
+ cpu_reset_real();
+}
+#endif
+
+void
+cpu_reset()
+{
+ HYPERVISOR_shutdown();
+}
+
+
+/*
+ * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
+ */
+static void
+sf_buf_init(void *arg)
+{
+ struct sf_buf *sf_bufs;
+ vm_offset_t sf_base;
+ int i;
+
+ nsfbufs = NSFBUFS;
+ TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
+
+ sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
+ TAILQ_INIT(&sf_buf_freelist);
+ sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE);
+ sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
+ M_NOWAIT | M_ZERO);
+ for (i = 0; i < nsfbufs; i++) {
+ sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
+ TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
+ }
+ sf_buf_alloc_want = 0;
+ mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
+}
+
+/*
+ * Get an sf_buf from the freelist. Will block if none are available.
+ */
+struct sf_buf *
+sf_buf_alloc(struct vm_page *m, int pri)
+{
+ struct sf_head *hash_list;
+ struct sf_buf *sf;
+ int error;
+
+ hash_list = &sf_buf_active[SF_BUF_HASH(m)];
+ mtx_lock(&sf_buf_lock);
+ LIST_FOREACH(sf, hash_list, list_entry) {
+ if (sf->m == m) {
+ sf->ref_count++;
+ if (sf->ref_count == 1) {
+ TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
+ nsfbufsused++;
+ nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
+ }
+ goto done;
+ }
+ }
+ while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
+ sf_buf_alloc_want++;
+ mbstat.sf_allocwait++;
+ error = msleep(&sf_buf_freelist, &sf_buf_lock, PVM | pri,
+ "sfbufa", 0);
+ sf_buf_alloc_want--;
+
+ /*
+ * If we got a signal, don't risk going back to sleep.
+ */
+ if (error)
+ goto done;
+ }
+ TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
+ if (sf->m != NULL)
+ LIST_REMOVE(sf, list_entry);
+ LIST_INSERT_HEAD(hash_list, sf, list_entry);
+ sf->ref_count = 1;
+ sf->m = m;
+ nsfbufsused++;
+ nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
+ pmap_qenter(sf->kva, &sf->m, 1);
+done:
+ mtx_unlock(&sf_buf_lock);
+ return (sf);
+}
+
+/*
+ * Detatch mapped page and release resources back to the system.
+ */
+void
+sf_buf_free(struct sf_buf *sf)
+{
+ mtx_lock(&sf_buf_lock);
+ sf->ref_count--;
+ if (sf->ref_count == 0) {
+ TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
+ nsfbufsused--;
+ /* XEN only */
+ pmap_qremove(sf->kva, 1);
+ sf->m = NULL;
+ LIST_REMOVE(sf, list_entry);
+ /* ----- */
+ if (sf_buf_alloc_want > 0)
+ wakeup_one(&sf_buf_freelist);
+ }
+ mtx_unlock(&sf_buf_lock);
+}
+
+/*
+ * Software interrupt handler for queued VM system processing.
+ */
+void
+swi_vm(void *dummy)
+{
+ if (busdma_swi_pending != 0)
+ busdma_swi();
+}
+
+/*
+ * Tell whether this address is in some physical memory region.
+ * Currently used by the kernel coredump code in order to avoid
+ * dumping the ``ISA memory hole'' which could cause indefinite hangs,
+ * or other unpredictable behaviour.
+ */
+
+int
+is_physical_memory(vm_paddr_t addr)
+{
+
+#ifdef DEV_ISA
+ /* The ISA ``memory hole''. */
+ if (addr >= 0xa0000 && addr < 0x100000)
+ return 0;
+#endif
+
+ /*
+ * stuff other tests for known memory-mapped devices (PCI?)
+ * here
+ */
+
+ return 1;
+}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_bus.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_bus.c
new file mode 100644
index 0000000000..96f6ca086b
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_bus.c
@@ -0,0 +1,238 @@
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/kernel.h>
+#include <machine/bus.h>
+#include <sys/rman.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include <machine/frame.h>
+#include <machine/intr_machdep.h>
+#include <machine/resource.h>
+
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/xen_intr.h>
+
+static MALLOC_DEFINE(M_XENDEV, "xenintrdrv", "xen system device");
+
+struct xenbus_device {
+ struct resource_list xen_resources;
+};
+
+#define DEVTOXEN(dev) ((struct xenbus_device *)device_get_ivars(dev))
+
+static void xenbus_identify(driver_t *, device_t);
+static int xenbus_probe(device_t);
+static int xenbus_attach(device_t);
+static int xenbus_print_child(device_t, device_t);
+static device_t xenbus_add_child(device_t bus, int order, const char *name,
+ int unit);
+static struct resource *xenbus_alloc_resource(device_t, device_t, int, int *,
+ u_long, u_long, u_long, u_int);
+static int xenbus_release_resource(device_t, device_t, int, int,
+ struct resource *);
+static int xenbus_set_resource(device_t, device_t, int, int, u_long, u_long);
+static int xenbus_get_resource(device_t, device_t, int, int, u_long *, u_long *);
+static void xenbus_delete_resource(device_t, device_t, int, int);
+
+
+static device_method_t xenbus_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_identify, xenbus_identify),
+ DEVMETHOD(device_probe, xenbus_probe),
+ DEVMETHOD(device_attach, xenbus_attach),
+ DEVMETHOD(device_detach, bus_generic_detach),
+ DEVMETHOD(device_shutdown, bus_generic_shutdown),
+ DEVMETHOD(device_suspend, bus_generic_suspend),
+ DEVMETHOD(device_resume, bus_generic_resume),
+
+ /* Bus interface */
+ DEVMETHOD(bus_print_child, xenbus_print_child),
+ DEVMETHOD(bus_add_child, xenbus_add_child),
+ DEVMETHOD(bus_read_ivar, bus_generic_read_ivar),
+ DEVMETHOD(bus_write_ivar, bus_generic_write_ivar),
+ DEVMETHOD(bus_set_resource, xenbus_set_resource),
+ DEVMETHOD(bus_get_resource, xenbus_get_resource),
+ DEVMETHOD(bus_alloc_resource, xenbus_alloc_resource),
+ DEVMETHOD(bus_release_resource, xenbus_release_resource),
+ DEVMETHOD(bus_delete_resource, xenbus_delete_resource),
+ DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
+ DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
+ DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
+ DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
+
+ { 0, 0 }
+};
+
+
+static driver_t xenbus_driver = {
+ "xenbus",
+ xenbus_methods,
+ 1, /* no softc */
+};
+static devclass_t xenbus_devclass;
+static device_t xenbus_dev;
+static boolean_t xenbus_probe_delay = TRUE; /* delay child probes */
+
+DRIVER_MODULE(xenbus, nexus, xenbus_driver, xenbus_devclass, 0, 0);
+
+static void
+xenbus_identify(driver_t *driver, device_t parent)
+{
+
+ /*
+ * Add child device with order of 0 so it gets probed
+ * first
+ */
+ xenbus_dev = BUS_ADD_CHILD(parent, 0, "xenbus", 0);
+ if (xenbus_dev == NULL)
+ panic("xenbus: could not attach");
+}
+
+static int
+xenbus_probe(device_t dev)
+{
+ device_set_desc(dev, "xen system");
+ device_quiet(dev);
+ return (0);
+}
+
+static int
+xenbus_attach(device_t dev)
+{
+ /*
+ * First, let our child driver's identify any child devices that
+ * they can find. Once that is done attach any devices that we
+ * found.
+ */
+ if (!xenbus_probe_delay) {
+ bus_generic_probe(dev);
+ bus_generic_attach(dev);
+ }
+
+ return 0;
+}
+
+
+static int
+xenbus_print_all_resources(device_t dev)
+{
+ struct xenbus_device *xdev = device_get_ivars(dev);
+ struct resource_list *rl = &xdev->xen_resources;
+ int retval = 0;
+
+ if (SLIST_FIRST(rl))
+ retval += printf(" at");
+
+ retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
+ retval += resource_list_print_type(rl, "iomem", SYS_RES_MEMORY, "%#lx");
+ retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
+
+ return retval;
+}
+
+
+static int
+xenbus_print_child(device_t bus, device_t child)
+{
+ int retval = 0;
+
+ retval += bus_print_child_header(bus, child);
+ retval += xenbus_print_all_resources(child);
+ retval += printf(" on motherboard\n"); /* XXX "motherboard", ick */
+
+ return (retval);
+}
+
+static device_t
+xenbus_add_child(device_t bus, int order, const char *name, int unit)
+{
+ device_t child;
+ struct xenbus_device *xendev;
+
+ xendev = malloc(sizeof(struct xenbus_device), M_XENDEV,
+ M_NOWAIT | M_ZERO);
+ if (!xendev)
+ return(0);
+ resource_list_init(&xendev->xen_resources);
+
+ child = device_add_child_ordered(bus, order, name, unit);
+
+ /* should we free this in xenbus_child_detached? */
+ device_set_ivars(child, xendev);
+
+ return(child);
+}
+
+static struct resource *
+xenbus_alloc_resource(device_t bus, device_t child, int type, int *rid,
+ u_long start, u_long end, u_long count, u_int flags)
+{
+ struct xenbus_device *xendev = DEVTOXEN(child);
+ struct resource_list *rl = &xendev->xen_resources;
+
+ return (resource_list_alloc(rl, bus, child, type, rid, start, end,
+ count, flags));
+}
+
+
+static int
+xenbus_release_resource(device_t bus, device_t child, int type, int rid,
+ struct resource *r)
+{
+ struct xenbus_device *xendev = DEVTOXEN(child);
+ struct resource_list *rl = &xendev->xen_resources;
+
+ return (resource_list_release(rl, bus, child, type, rid, r));
+}
+
+static int
+xenbus_set_resource(device_t dev, device_t child, int type, int rid,
+ u_long start, u_long count)
+{
+ struct xenbus_device *xendev = DEVTOXEN(child);
+ struct resource_list *rl = &xendev->xen_resources;
+
+ resource_list_add(rl, type, rid, start, start + count - 1, count);
+ return(0);
+}
+
+static int
+xenbus_get_resource(device_t dev, device_t child, int type, int rid,
+ u_long *startp, u_long *countp)
+{
+ struct xenbus_device *xendev = DEVTOXEN(child);
+ struct resource_list *rl = &xendev->xen_resources;
+ struct resource_list_entry *rle;
+
+ rle = resource_list_find(rl, type, rid);
+ if (!rle)
+ return(ENOENT);
+ if (startp)
+ *startp = rle->start;
+ if (countp)
+ *countp = rle->count;
+ return(0);
+}
+
+static void
+xenbus_delete_resource(device_t dev, device_t child, int type, int rid)
+{
+ struct xenbus_device *xendev = DEVTOXEN(child);
+ struct resource_list *rl = &xendev->xen_resources;
+
+ resource_list_delete(rl, type, rid);
+}
+
+static void
+xenbus_init(void *unused)
+{
+ xenbus_probe_delay = FALSE;
+ xenbus_attach(xenbus_dev);
+}
+SYSINIT(xenbusdev, SI_SUB_PSEUDO, SI_ORDER_FIRST, xenbus_init, NULL);
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c
new file mode 100644
index 0000000000..dd24a206b1
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c
@@ -0,0 +1,687 @@
+/* $NetBSD:$ */
+
+/*
+ *
+ * Copyright (c) 2004 Christian Limpach.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Christian Limpach.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/reboot.h>
+
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <machine/stdarg.h>
+#include <machine/xenfunc.h>
+#include <machine/xenpmap.h>
+#include <machine/vmparam.h>
+#include <machine/cpu.h>
+#include <machine/xenvar.h>
+
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/if_var.h>
+#include <net/ethernet.h>
+#include <netinet/in.h>
+#include <sys/mbuf.h>
+#include <nfs/rpcv2.h>
+#include <nfsclient/krpc.h>
+#include <nfs/nfsproto.h>
+
+
+shared_info_t *HYPERVISOR_shared_info;
+
+void ni_cli(void);
+void ni_sti(void);
+#ifdef NFS_ROOT
+
+static int
+xdr_opaque_decode(struct mbuf **mptr, u_char *buf, int len)
+{
+ struct mbuf *m;
+ int alignedlen;
+
+ m = *mptr;
+ alignedlen = ( len + 3 ) & ~3;
+
+ if (m->m_len < alignedlen) {
+ m = m_pullup(m, alignedlen);
+ if (m == NULL) {
+ *mptr = NULL;
+ return EBADRPC;
+ }
+ }
+ bcopy(mtod(m, u_char *), buf, len);
+ m_adj(m, alignedlen);
+ *mptr = m;
+ return 0;
+}
+
+
+static int
+getdec(char **ptr)
+{
+ char *p;
+ int ret;
+
+ p = *ptr;
+ ret = 0;
+ if ((*p < '0') || (*p > '9'))
+ return -1;
+ while ((*p >= '0') && (*p <= '9')) {
+ ret = ret * 10 + (*p - '0');
+ p++;
+ }
+ *ptr = p;
+ return ret;
+}
+
+int
+setinaddr(struct sockaddr_in *addr, char *ipstr)
+{
+ unsigned int ip;
+ int val;
+
+ ip = 0;
+ if (((val = getdec(&ipstr)) < 0) || (val > 255))
+ return 1;
+ ip = val << 24;
+ if (*ipstr != '.')
+ return 1;
+ ipstr++;
+ if (((val = getdec(&ipstr)) < 0) || (val > 255))
+ return 1;
+ ip |= (val << 16);
+ if (*ipstr != '.')
+ return 1;
+ ipstr++;
+ if (((val = getdec(&ipstr)) < 0) || (val > 255))
+ return 1;
+ ip |= (val << 8);
+ if (*ipstr != '.')
+ return 1;
+ ipstr++;
+ if (((val = getdec(&ipstr)) < 0) || (val > 255))
+ return 1;
+ ip |= val;
+
+ addr->sin_addr.s_addr = htonl(ip);
+ addr->sin_len = sizeof(struct sockaddr_in);
+ addr->sin_family = AF_INET;
+
+ return 0;
+}
+
+static int
+hwaddr_to_sockaddr(char *ev, struct sockaddr_dl *sa)
+{
+ char *cp;
+ u_int32_t a[6];
+ int count;
+
+ bzero(sa, sizeof(*sa));
+ sa->sdl_len = sizeof(*sa);
+ sa->sdl_family = AF_LINK;
+ sa->sdl_type = IFT_ETHER;
+ sa->sdl_alen = ETHER_ADDR_LEN;
+ if ((cp = getenv(ev)) == NULL)
+ return (1);
+ count = sscanf(cp, "%x:%x:%x:%x:%x:%x",
+ &a[0], &a[1], &a[2], &a[3], &a[4], &a[5]);
+ freeenv(cp);
+ if (count != 6)
+ return (1);
+ sa->sdl_data[0] = a[0];
+ sa->sdl_data[1] = a[1];
+ sa->sdl_data[2] = a[2];
+ sa->sdl_data[3] = a[3];
+ sa->sdl_data[4] = a[4];
+ sa->sdl_data[5] = a[5];
+ return (0);
+}
+extern int in_control(struct socket *so, u_long cmd,
+ caddr_t data, struct ifnet *ifp,
+ struct thread *td);
+
+static int
+xen_setnetwork(void)
+{
+ int error = 0;
+ struct ifaddr *ifa;
+ struct ifnet *ifp;
+ struct sockaddr_dl *sdl, ourdl;
+
+ if (sizeof(struct sockaddr) != sizeof(struct sockaddr_in))
+ panic("sizes not equal\n");
+
+ if (hwaddr_to_sockaddr("boot.netif.hwaddr", &ourdl)) {
+ printf("nfs_diskless: no hardware address\n");
+ return -1;
+ }
+
+
+ ifa = NULL;
+ IFNET_RLOCK();
+ TAILQ_FOREACH(ifp, &ifnet, if_link) {
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if ((ifa->ifa_addr->sa_family == AF_LINK) &&
+ (sdl = ((struct sockaddr_dl *)ifa->ifa_addr))) {
+ if ((sdl->sdl_type == ourdl.sdl_type) &&
+ (sdl->sdl_alen == ourdl.sdl_alen) &&
+ !bcmp(sdl->sdl_data + sdl->sdl_nlen,
+ ourdl.sdl_data + ourdl.sdl_nlen,
+ sdl->sdl_alen)) {
+ IFNET_RUNLOCK();
+ goto match_done;
+ }
+ }
+ }
+ }
+ IFNET_RUNLOCK();
+ printf("nfs_diskless: no interface\n");
+ return -1; /* no matching interface */
+ match_done:
+
+ if (getenv("boot.netif.ip") && getenv("boot.netif.gateway") &&
+ getenv("boot.netif.netmask")) {
+ struct ifaliasreq ifra;
+ char *ip;
+
+ bzero(&ifra, sizeof(ifra));
+ strcpy(ifra.ifra_name, "xn0");
+ ip = getenv("boot.netif.ip");
+ setinaddr((struct sockaddr_in *)&(ifra.ifra_addr), ip);
+ printf("setting ip to %s\n", ip);
+ ip = getenv("boot.netif.netmask");
+ setinaddr((struct sockaddr_in *)&ifra.ifra_mask, ip);
+ setinaddr((struct sockaddr_in *)&ifra.ifra_broadaddr, "255.255.255.255");
+
+
+ if ((error = in_control(NULL, SIOCAIFADDR, (caddr_t) &ifra, ifp, curthread)))
+ printf("couldn't set interface address %d\n", error);
+#if 0
+ if ((error = xn_ioctl(ifp, SIOCSIFNETMASK, (caddr_t)&ifa)))
+ printf("couldn't set interface netmask %d\n", error);
+#endif
+ }
+ return error;
+}
+
+int
+xen_setnfshandle(void)
+{
+ char *path, *ip;
+ u_char fhp[NFSX_V2FH];
+ int error = 0;
+ struct sockaddr_in sin_local, *sin ;
+ struct mbuf *m;
+
+ if ((error = xen_setnetwork()))
+ return error;
+
+ sin = &sin_local;
+
+ path = getenv("boot.nfsroot.path");
+ ip = getenv("boot.nfsroot.server");
+
+ /* we aren't configured for NFS root */
+ if (!path || !ip)
+ return 0;
+
+ error = setinaddr(sin, ip);
+ if (error) {
+ printf("invalid ip address %s\n", ip);
+ return error;
+ }
+
+ error = krpc_portmap(sin, RPCPROG_MNT, RPCMNT_VER1,
+ &sin->sin_port, curthread);
+ if (error) {
+ printf("failed to find port number for mountd\n");
+ return error;
+ }
+ m = xdr_string_encode(path, strlen(path));
+
+ /* Do RPC to mountd */
+ error = krpc_call(sin, RPCPROG_MNT, RPCMNT_VER1,
+ RPCMNT_MOUNT, &m, NULL, curthread);
+ if (error) {
+ printf("call to mountd failed\n");
+ return error;
+ }
+
+ if (xdr_opaque_decode(&m, fhp, NFSX_V2FH) != 0) {
+ printf("failed to decode nfs file handle\n");
+ return error;
+ }
+
+ setenv("boot.nfsroot.nfshandle", fhp);
+
+ return 0;
+}
+#endif
+void
+ni_cli(void)
+{
+ __asm__("pushl %edx;"
+ "pushl %eax;"
+ );
+ __cli();
+ __asm__("popl %eax;"
+ "popl %edx;"
+ );
+}
+
+
+void
+ni_sti(void)
+{
+ __asm__("pushl %edx;"
+ "pushl %esi;"
+ "pushl %eax;"
+ );
+ __sti();
+ __asm__("popl %eax;"
+ "popl %esi;"
+ "popl %edx;"
+ );
+}
+
+/*
+ * Modify the cmd_line by converting ',' to NULLs so that it is in a format
+ * suitable for the static env vars.
+ */
+char *
+xen_setbootenv(char *cmd_line)
+{
+ char *cmd_line_next;
+
+ for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;);
+ return cmd_line;
+}
+
+static struct
+{
+ const char *ev;
+ int mask;
+} howto_names[] = {
+ {"boot_askname", RB_ASKNAME},
+ {"boot_cdrom", RB_CDROM},
+ {"boot_userconfig", RB_CONFIG},
+ {"boot_ddb", RB_KDB},
+ {"boot_gdb", RB_GDB},
+ {"boot_gdb_pause", RB_GDB_PAUSE},
+ {"boot_single", RB_SINGLE},
+ {"boot_verbose", RB_VERBOSE},
+ {"boot_multicons", RB_MULTIPLE},
+ {"boot_serial", RB_SERIAL},
+ {NULL, 0}
+};
+
+int
+xen_boothowto(char *envp)
+{
+ int i, howto = 0;
+
+ /* get equivalents from the environment */
+ for (i = 0; howto_names[i].ev != NULL; i++)
+ if (getenv(howto_names[i].ev) != NULL)
+ howto |= howto_names[i].mask;
+ return howto;
+}
+
+#define PRINTK_BUFSIZE 1024
+void
+printk(const char *fmt, ...)
+{
+ __va_list ap;
+ int ret;
+ static char buf[PRINTK_BUFSIZE];
+
+ va_start(ap, fmt);
+ ret = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap);
+ va_end(ap);
+ buf[ret] = 0;
+ (void)HYPERVISOR_console_write(buf, ret);
+}
+
+#define XPQUEUE_SIZE 2048
+
+typedef struct xpq_queue {
+ uint32_t ptr;
+ uint32_t val;
+} xpq_queue_t;
+
+#define MCLQUEUE_SIZE 512
+static multicall_entry_t mcl_queue[MCLQUEUE_SIZE];
+static int mcl_idx = 0;
+
+static xpq_queue_t xpq_queue[XPQUEUE_SIZE];
+static boolean_t xpq_initialized;
+static struct mtx update_lock;
+static int xpq_idx = 0;
+
+/*
+ * Don't attempt to lock until after lock & memory initialization
+ */
+#define XPQ_LOCK(lock, flags) \
+ if (likely(xpq_initialized)) \
+ mtx_lock_irqsave(lock, flags)
+#define XPQ_UNLOCK(lock, flags) \
+ if (likely(xpq_initialized)) \
+ mtx_unlock_irqrestore(lock, flags)
+
+void
+xpq_init(void)
+{
+ xpq_initialized = TRUE;
+ mtx_init(&update_lock, "mmu", "MMU LOCK", MTX_SPIN);
+}
+
+static __inline void
+_xpq_flush_queue(void)
+{
+ int _xpq_idx = xpq_idx;
+ int error, i;
+
+ xpq_idx = 0;
+ /* Make sure index is cleared first to avoid double updates. */
+ error = HYPERVISOR_mmu_update((mmu_update_t *)xpq_queue, _xpq_idx,
+ NULL);
+
+ if (__predict_false(error < 0)) {
+ for (i = 0; i < _xpq_idx; i++)
+ printk("val: %x ptr: %p\n", xpq_queue[i].val, xpq_queue[i].ptr);
+ panic("Failed to execute MMU updates: %d", error);
+ }
+
+}
+static void
+xpq_flush_queue(void)
+{
+ unsigned long flags = 0;
+
+ XPQ_LOCK(&update_lock, flags);
+ if (xpq_idx != 0) _xpq_flush_queue();
+ XPQ_UNLOCK(&update_lock, flags);
+}
+
+static __inline void
+_mcl_flush_queue(void)
+{
+ int _mcl_idx = mcl_idx;
+ mcl_idx = 0;
+ (void)HYPERVISOR_multicall(mcl_queue, _mcl_idx);
+}
+
+void
+mcl_flush_queue(void)
+{
+ unsigned long flags = 0;
+
+ XPQ_LOCK(&update_lock, flags);
+ if (__predict_true(mcl_idx != 0)) _mcl_flush_queue();
+ XPQ_UNLOCK(&update_lock, flags);
+ /* XXX: until we can remove the pervasive
+ * __HYPERVISOR_update_va_mapping calls, we have 2 queues. In order
+ * to ensure that they never get out of sync, only 1 flush interface
+ * is provided.
+ */
+ xpq_flush_queue();
+}
+
+
+static __inline void
+xpq_increment_idx(void)
+{
+ xpq_idx++;
+ if (__predict_false(xpq_idx == XPQUEUE_SIZE))
+ xpq_flush_queue();
+}
+
+static __inline void
+mcl_increment_idx(void)
+{
+ mcl_idx++;
+ if (__predict_false(mcl_idx == MCLQUEUE_SIZE))
+ mcl_flush_queue();
+}
+
+void
+xpq_queue_invlpg(vm_offset_t va)
+{
+ unsigned long flags = 0;
+
+ XPQ_LOCK(&update_lock, flags);
+ xpq_queue[xpq_idx].ptr = (va & ~PAGE_MASK) | MMU_EXTENDED_COMMAND;
+ xpq_queue[xpq_idx].val = MMUEXT_INVLPG;
+ xpq_increment_idx();
+ XPQ_UNLOCK(&update_lock, flags);
+}
+
+void
+load_cr3(uint32_t val)
+{
+ xpq_queue_pt_switch(val);
+ xpq_flush_queue();
+}
+
+void
+xen_set_ldt(vm_offset_t base, uint32_t entries)
+{
+ xpq_queue_set_ldt(base, entries);
+ _xpq_flush_queue();
+}
+
+void
+xen_machphys_update(unsigned long mfn, unsigned long pfn)
+{
+ unsigned long flags = 0;
+ XPQ_LOCK(&update_lock, flags);
+ xpq_queue[xpq_idx].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+ xpq_queue[xpq_idx].val = pfn;
+ xpq_increment_idx();
+ _xpq_flush_queue();
+ XPQ_UNLOCK(&update_lock, flags);
+}
+
+void
+xpq_queue_pt_update(pt_entry_t *ptr, pt_entry_t val)
+{
+ unsigned long flags = 0;
+
+ XPQ_LOCK(&update_lock, flags);
+ xpq_queue[xpq_idx].ptr = (uint32_t)ptr;
+ xpq_queue[xpq_idx].val = val;
+ xpq_increment_idx();
+ XPQ_UNLOCK(&update_lock, flags);
+}
+
+void
+mcl_queue_pt_update(vm_offset_t va, vm_paddr_t ma)
+{
+#if 0
+ printf("setting va %x to ma %x\n", va, ma);
+#endif
+ unsigned long flags = 0;
+ XPQ_LOCK(&update_lock, flags);
+ mcl_queue[mcl_idx].op = __HYPERVISOR_update_va_mapping;
+ mcl_queue[mcl_idx].args[0] = (unsigned long)(va >> PAGE_SHIFT);
+ mcl_queue[mcl_idx].args[1] = (unsigned long)ma;
+ mcl_queue[mcl_idx].args[2] = 0;
+ mcl_increment_idx();
+ XPQ_UNLOCK(&update_lock, flags);
+}
+
+
+
+void
+xpq_queue_pt_switch(uint32_t val)
+{
+ unsigned long flags = 0;
+ vm_paddr_t ma = xpmap_ptom(val) & PG_FRAME;
+
+ XPQ_LOCK(&update_lock, flags);
+ xpq_queue[xpq_idx].ptr = ma | MMU_EXTENDED_COMMAND;
+ xpq_queue[xpq_idx].val = MMUEXT_NEW_BASEPTR;
+ xpq_increment_idx();
+ XPQ_UNLOCK(&update_lock, flags);
+}
+
+
+void
+xpq_queue_pin_table(uint32_t pa, int type)
+{
+ unsigned long flags = 0;
+ XPQ_LOCK(&update_lock, flags);
+ xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
+ switch (type) {
+ case XPQ_PIN_L1_TABLE:
+ xpq_queue[xpq_idx].val = MMUEXT_PIN_L1_TABLE;
+ break;
+ case XPQ_PIN_L2_TABLE:
+ xpq_queue[xpq_idx].val = MMUEXT_PIN_L2_TABLE;
+ break;
+ }
+ xpq_increment_idx();
+ XPQ_UNLOCK(&update_lock, flags);
+}
+
+void
+xpq_queue_unpin_table(uint32_t pa)
+{
+ unsigned long flags = 0;
+
+ XPQ_LOCK(&update_lock, flags);
+ xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
+ xpq_queue[xpq_idx].val = MMUEXT_UNPIN_TABLE;
+ xpq_increment_idx();
+ XPQ_UNLOCK(&update_lock, flags);
+}
+
+void
+xpq_queue_set_ldt(vm_offset_t va, uint32_t entries)
+{
+ unsigned long flags = 0;
+
+ XPQ_LOCK(&update_lock, flags);
+ KASSERT(va == (va & PG_FRAME), ("ldt not page aligned"));
+ xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND | va;
+ xpq_queue[xpq_idx].val = MMUEXT_SET_LDT |
+ (entries << MMUEXT_CMD_SHIFT);
+ xpq_increment_idx();
+ XPQ_UNLOCK(&update_lock, flags);
+}
+
+void
+xpq_queue_tlb_flush()
+{
+ unsigned long flags = 0;
+
+ XPQ_LOCK(&update_lock, flags);
+
+ xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
+ xpq_queue[xpq_idx].val = MMUEXT_TLB_FLUSH;
+ xpq_increment_idx();
+ XPQ_UNLOCK(&update_lock, flags);
+}
+
+
+/********** CODE WORTH KEEPING ABOVE HERE *****************/
+
+void xen_failsafe_handler(void);
+
+void
+xen_failsafe_handler(void)
+{
+
+ panic("xen_failsafe_handler called!\n");
+}
+
+
+void
+xen_update_descriptor(union descriptor *table, union descriptor *entry)
+{
+ vm_paddr_t pa;
+ pt_entry_t *ptp;
+ uint32_t raw[2];
+
+ bcopy(entry, raw, 2*sizeof(int32_t));
+ ptp = vtopte((vm_offset_t)table);
+ pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK);
+ if (HYPERVISOR_update_descriptor(pa, raw[0], raw[1]))
+ panic("HYPERVISOR_update_descriptor failed\n");
+}
+
+
+
+#if defined(XENDEBUG)
+static void
+xpmap_dump_pt(pt_entry_t *ptp, int p)
+{
+ pt_entry_t pte;
+ int j;
+ int bufpos;
+
+ pte = xpmap_ptom((uint32_t)ptp - KERNTEXTOFF);
+ PRINTK(("%03x: %p(%p) %08x\n", p, ptp, (void *)pte, p << PDRSHIFT));
+
+ bufpos = 0;
+ for (j = 0; j < PTES_PER_PTP; j++) {
+ if ((ptp[j] & PG_V) == 0)
+ continue;
+ pte = ptp[j] /* & PG_FRAME */;
+ bufpos += sprintf(XBUF + bufpos, "%x:%03x:%08x ",
+ p, j, pte);
+ if (bufpos > 70) {
+ int k;
+ sprintf(XBUF + bufpos, "\n");
+ PRINTK((XBUF));
+ bufpos = 0;
+ for (k = 0; k < 1000000; k++);
+ }
+ }
+ if (bufpos) {
+ PRINTK((XBUF));
+ bufpos = 0;
+ }
+}
+#endif
+
+
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/cpufunc.h b/freebsd-5.3-xen-sparse/i386-xen/include/cpufunc.h
new file mode 100644
index 0000000000..fadc3a4a26
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/cpufunc.h
@@ -0,0 +1,601 @@
+/*-
+ * Copyright (c) 1993 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/include/cpufunc.h,v 1.135 2003/08/06 18:21:27 bde Exp $
+ */
+
+/*
+ * Functions to provide access to special i386 instructions.
+ * This in included in sys/systm.h, and that file should be
+ * used in preference to this.
+ */
+
+#ifndef _MACHINE_CPUFUNC_H_
+#define _MACHINE_CPUFUNC_H_
+
+#include <sys/cdefs.h>
+#include <machine/psl.h>
+#define NO_EXCHANGE
+#include <machine/xen-os.h>
+#include <machine/evtchn.h>
+#include <machine/xenvar.h>
+struct thread;
+struct region_descriptor;
+
+__BEGIN_DECLS
+#define readb(va) (*(volatile u_int8_t *) (va))
+#define readw(va) (*(volatile u_int16_t *) (va))
+#define readl(va) (*(volatile u_int32_t *) (va))
+
+#define writeb(va, d) (*(volatile u_int8_t *) (va) = (d))
+#define writew(va, d) (*(volatile u_int16_t *) (va) = (d))
+#define writel(va, d) (*(volatile u_int32_t *) (va) = (d))
+
+static __inline u_int
+read_eflags(void)
+{
+ u_int ef;
+ __asm __volatile("pushfl; popl %0" : "=r" (ef));
+ return (ef);
+}
+
+static __inline void
+write_eflags(u_int ef)
+{
+ __asm __volatile("pushl %0; popfl" : : "r" (ef));
+}
+#ifdef __GNUC__
+
+static __inline void
+breakpoint(void)
+{
+ __asm __volatile("int $3");
+}
+
+static __inline u_int
+bsfl(u_int mask)
+{
+ u_int result;
+
+ __asm __volatile("bsfl %1,%0" : "=r" (result) : "rm" (mask));
+ return (result);
+}
+
+static __inline u_int
+bsrl(u_int mask)
+{
+ u_int result;
+
+ __asm __volatile("bsrl %1,%0" : "=r" (result) : "rm" (mask));
+ return (result);
+}
+static __inline void
+disable_intr(void)
+{
+ __cli();
+}
+static __inline void
+do_cpuid(u_int ax, u_int *p)
+{
+ __asm __volatile("cpuid"
+ : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
+ : "0" (ax));
+}
+
+static __inline void
+enable_intr(void)
+{
+ __sti();
+}
+
+
+#define HAVE_INLINE_FFS
+
+static __inline int
+ffs(int mask)
+{
+ /*
+ * Note that gcc-2's builtin ffs would be used if we didn't declare
+ * this inline or turn off the builtin. The builtin is faster but
+ * broken in gcc-2.4.5 and slower but working in gcc-2.5 and later
+ * versions.
+ */
+ return (mask == 0 ? mask : (int)bsfl((u_int)mask) + 1);
+}
+
+#define HAVE_INLINE_FLS
+
+static __inline int
+fls(int mask)
+{
+ return (mask == 0 ? mask : (int)bsrl((u_int)mask) + 1);
+}
+
+static __inline void
+halt(void)
+{
+ __asm __volatile("hlt");
+}
+
+#if __GNUC__ < 2
+
+#define inb(port) inbv(port)
+#define outb(port, data) outbv(port, data)
+
+#else /* __GNUC >= 2 */
+
+/*
+ * The following complications are to get around gcc not having a
+ * constraint letter for the range 0..255. We still put "d" in the
+ * constraint because "i" isn't a valid constraint when the port
+ * isn't constant. This only matters for -O0 because otherwise
+ * the non-working version gets optimized away.
+ *
+ * Use an expression-statement instead of a conditional expression
+ * because gcc-2.6.0 would promote the operands of the conditional
+ * and produce poor code for "if ((inb(var) & const1) == const2)".
+ *
+ * The unnecessary test `(port) < 0x10000' is to generate a warning if
+ * the `port' has type u_short or smaller. Such types are pessimal.
+ * This actually only works for signed types. The range check is
+ * careful to avoid generating warnings.
+ */
+#define inb(port) __extension__ ({ \
+ u_char _data; \
+ if (__builtin_constant_p(port) && ((port) & 0xffff) < 0x100 \
+ && (port) < 0x10000) \
+ _data = inbc(port); \
+ else \
+ _data = inbv(port); \
+ _data; })
+
+#define outb(port, data) ( \
+ __builtin_constant_p(port) && ((port) & 0xffff) < 0x100 \
+ && (port) < 0x10000 \
+ ? outbc(port, data) : outbv(port, data))
+
+static __inline u_char
+inbc(u_int port)
+{
+ u_char data;
+
+ __asm __volatile("inb %1,%0" : "=a" (data) : "id" ((u_short)(port)));
+ return (data);
+}
+
+static __inline void
+outbc(u_int port, u_char data)
+{
+ __asm __volatile("outb %0,%1" : : "a" (data), "id" ((u_short)(port)));
+}
+
+#endif /* __GNUC <= 2 */
+
+static __inline u_char
+inbv(u_int port)
+{
+ u_char data;
+ /*
+ * We use %%dx and not %1 here because i/o is done at %dx and not at
+ * %edx, while gcc generates inferior code (movw instead of movl)
+ * if we tell it to load (u_short) port.
+ */
+ __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
+ return (data);
+}
+
+static __inline u_int
+inl(u_int port)
+{
+ u_int data;
+
+ __asm __volatile("inl %%dx,%0" : "=a" (data) : "d" (port));
+ return (data);
+}
+
+static __inline void
+insb(u_int port, void *addr, size_t cnt)
+{
+ __asm __volatile("cld; rep; insb"
+ : "+D" (addr), "+c" (cnt)
+ : "d" (port)
+ : "memory");
+}
+
+static __inline void
+insw(u_int port, void *addr, size_t cnt)
+{
+ __asm __volatile("cld; rep; insw"
+ : "+D" (addr), "+c" (cnt)
+ : "d" (port)
+ : "memory");
+}
+
+static __inline void
+insl(u_int port, void *addr, size_t cnt)
+{
+ __asm __volatile("cld; rep; insl"
+ : "+D" (addr), "+c" (cnt)
+ : "d" (port)
+ : "memory");
+}
+
+static __inline void
+invd(void)
+{
+ __asm __volatile("invd");
+}
+
+static __inline u_short
+inw(u_int port)
+{
+ u_short data;
+
+ __asm __volatile("inw %%dx,%0" : "=a" (data) : "d" (port));
+ return (data);
+}
+
+static __inline void
+outbv(u_int port, u_char data)
+{
+ u_char al;
+ /*
+ * Use an unnecessary assignment to help gcc's register allocator.
+ * This make a large difference for gcc-1.40 and a tiny difference
+ * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for
+ * best results. gcc-2.6.0 can't handle this.
+ */
+ al = data;
+ __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
+}
+
+static __inline void
+outl(u_int port, u_int data)
+{
+ /*
+ * outl() and outw() aren't used much so we haven't looked at
+ * possible micro-optimizations such as the unnecessary
+ * assignment for them.
+ */
+ __asm __volatile("outl %0,%%dx" : : "a" (data), "d" (port));
+}
+
+static __inline void
+outsb(u_int port, const void *addr, size_t cnt)
+{
+ __asm __volatile("cld; rep; outsb"
+ : "+S" (addr), "+c" (cnt)
+ : "d" (port));
+}
+
+static __inline void
+outsw(u_int port, const void *addr, size_t cnt)
+{
+ __asm __volatile("cld; rep; outsw"
+ : "+S" (addr), "+c" (cnt)
+ : "d" (port));
+}
+
+static __inline void
+outsl(u_int port, const void *addr, size_t cnt)
+{
+ __asm __volatile("cld; rep; outsl"
+ : "+S" (addr), "+c" (cnt)
+ : "d" (port));
+}
+
+static __inline void
+outw(u_int port, u_short data)
+{
+ __asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port));
+}
+
+static __inline void
+ia32_pause(void)
+{
+ __asm __volatile("pause");
+}
+
+static __inline u_int64_t
+rdmsr(u_int msr)
+{
+ u_int64_t rv;
+
+ __asm __volatile("rdmsr" : "=A" (rv) : "c" (msr));
+ return (rv);
+}
+
+static __inline u_int64_t
+rdpmc(u_int pmc)
+{
+ u_int64_t rv;
+
+ __asm __volatile("rdpmc" : "=A" (rv) : "c" (pmc));
+ return (rv);
+}
+
+static __inline u_int64_t
+rdtsc(void)
+{
+ u_int64_t rv;
+
+ __asm __volatile("rdtsc" : "=A" (rv));
+ return (rv);
+}
+
+static __inline void
+wbinvd(void)
+{
+ __asm __volatile("wbinvd");
+}
+
+static __inline void
+wrmsr(u_int msr, u_int64_t newval)
+{
+ __asm __volatile("wrmsr" : : "A" (newval), "c" (msr));
+}
+
+static __inline u_int
+rfs(void)
+{
+ u_int sel;
+ __asm __volatile("movl %%fs,%0" : "=rm" (sel));
+ return (sel);
+}
+
+static __inline u_int
+rgs(void)
+{
+ u_int sel;
+ __asm __volatile("movl %%gs,%0" : "=rm" (sel));
+ return (sel);
+}
+
+static __inline void
+load_fs(u_int sel)
+{
+ __asm __volatile("movl %0,%%fs" : : "rm" (sel));
+}
+
+static __inline void
+load_gs(u_int sel)
+{
+ __asm __volatile("movl %0,%%gs" : : "rm" (sel));
+}
+
+/* void lidt(struct region_descriptor *addr); */
+static __inline void
+lidt(struct region_descriptor *addr)
+{
+ __asm __volatile("lidt (%0)" : : "r" (addr));
+}
+
+static __inline u_int
+rdr0(void)
+{
+ u_int data;
+ __asm __volatile("movl %%dr0,%0" : "=r" (data));
+ return (data);
+}
+
+static __inline void
+load_dr0(u_int dr0)
+{
+ __asm __volatile("movl %0,%%dr0" : : "r" (dr0));
+}
+
+static __inline u_int
+rdr1(void)
+{
+ u_int data;
+ __asm __volatile("movl %%dr1,%0" : "=r" (data));
+ return (data);
+}
+
+static __inline void
+load_dr1(u_int dr1)
+{
+ __asm __volatile("movl %0,%%dr1" : : "r" (dr1));
+}
+
+static __inline u_int
+rdr2(void)
+{
+ u_int data;
+ __asm __volatile("movl %%dr2,%0" : "=r" (data));
+ return (data);
+}
+
+static __inline void
+load_dr2(u_int dr2)
+{
+ __asm __volatile("movl %0,%%dr2" : : "r" (dr2));
+}
+
+static __inline u_int
+rdr3(void)
+{
+ u_int data;
+ __asm __volatile("movl %%dr3,%0" : "=r" (data));
+ return (data);
+}
+
+static __inline void
+load_dr3(u_int dr3)
+{
+ __asm __volatile("movl %0,%%dr3" : : "r" (dr3));
+}
+
+static __inline u_int
+rdr4(void)
+{
+ u_int data;
+ __asm __volatile("movl %%dr4,%0" : "=r" (data));
+ return (data);
+}
+
+static __inline void
+load_dr4(u_int dr4)
+{
+ __asm __volatile("movl %0,%%dr4" : : "r" (dr4));
+}
+
+static __inline u_int
+rdr5(void)
+{
+ u_int data;
+ __asm __volatile("movl %%dr5,%0" : "=r" (data));
+ return (data);
+}
+
+static __inline void
+load_dr5(u_int dr5)
+{
+ __asm __volatile("movl %0,%%dr5" : : "r" (dr5));
+}
+
+static __inline u_int
+rdr6(void)
+{
+ u_int data;
+ __asm __volatile("movl %%dr6,%0" : "=r" (data));
+ return (data);
+}
+
+static __inline void
+load_dr6(u_int dr6)
+{
+ __asm __volatile("movl %0,%%dr6" : : "r" (dr6));
+}
+
+static __inline u_int
+rdr7(void)
+{
+ u_int data;
+ __asm __volatile("movl %%dr7,%0" : "=r" (data));
+ return (data);
+}
+
+static __inline void
+load_dr7(u_int dr7)
+{
+ __asm __volatile("movl %0,%%dr7" : : "r" (dr7));
+}
+
+static __inline register_t
+intr_disable(void)
+{
+ register_t eflags;
+
+ __save_and_cli(eflags);
+ return (eflags);
+}
+
+static __inline void
+intr_restore(register_t eflags)
+{
+ __restore_flags(eflags);
+}
+
+#else /* !__GNUC__ */
+
+int breakpoint(void);
+u_int bsfl(u_int mask);
+u_int bsrl(u_int mask);
+void cpu_invlpg(u_int addr);
+void cpu_invlpg_range(u_int start, u_int end);
+void disable_intr(void);
+void do_cpuid(u_int ax, u_int *p);
+void enable_intr(void);
+void halt(void);
+u_char inb(u_int port);
+u_int inl(u_int port);
+void insb(u_int port, void *addr, size_t cnt);
+void insl(u_int port, void *addr, size_t cnt);
+void insw(u_int port, void *addr, size_t cnt);
+void invd(void);
+void invlpg(u_int addr);
+void invlpg_range(u_int start, u_int end);
+void invltlb(void);
+u_short inw(u_int port);
+void load_cr3(u_int cr3);
+void load_cr4(u_int cr4);
+void load_fs(u_int sel);
+void load_gs(u_int sel);
+struct region_descriptor;
+void lidt(struct region_descriptor *addr);
+void ltr(u_short sel);
+void outb(u_int port, u_char data);
+void outl(u_int port, u_int data);
+void outsb(u_int port, void *addr, size_t cnt);
+void outsl(u_int port, void *addr, size_t cnt);
+void outsw(u_int port, void *addr, size_t cnt);
+void outw(u_int port, u_short data);
+void ia32_pause(void);
+u_int rcr2(void);
+u_int rcr3(void);
+u_int rcr4(void);
+u_int rfs(void);
+u_int rgs(void);
+u_int64_t rdmsr(u_int msr);
+u_int64_t rdpmc(u_int pmc);
+u_int64_t rdtsc(void);
+u_int read_eflags(void);
+void wbinvd(void);
+void write_eflags(u_int ef);
+void wrmsr(u_int msr, u_int64_t newval);
+u_int rdr0(void);
+void load_dr0(u_int dr0);
+u_int rdr1(void);
+void load_dr1(u_int dr1);
+u_int rdr2(void);
+void load_dr2(u_int dr2);
+u_int rdr3(void);
+void load_dr3(u_int dr3);
+u_int rdr4(void);
+void load_dr4(u_int dr4);
+u_int rdr5(void);
+void load_dr5(u_int dr5);
+u_int rdr6(void);
+void load_dr6(u_int dr6);
+u_int rdr7(void);
+void load_dr7(u_int dr7);
+register_t intr_disable(void);
+void intr_restore(register_t ef);
+
+#endif /* __GNUC__ */
+
+void reset_dbregs(void);
+
+__END_DECLS
+
+#endif /* !_MACHINE_CPUFUNC_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/ctrl_if.h b/freebsd-5.3-xen-sparse/i386-xen/include/ctrl_if.h
new file mode 100644
index 0000000000..1ccd49d448
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/ctrl_if.h
@@ -0,0 +1,120 @@
+/******************************************************************************
+ * ctrl_if.h
+ *
+ * Management functions for special interface to the domain controller.
+ *
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#ifndef __I386_XENO__CTRL_IF_H__
+#define __I386_XENO__CTRL_IF_H__
+
+#include <sys/taskqueue.h>
+#include <machine/hypervisor.h>
+
+
+typedef control_msg_t ctrl_msg_t;
+
+/*
+ * Callback function type. Called for asynchronous processing of received
+ * request messages, and responses to previously-transmitted request messages.
+ * The parameters are (@msg, @id).
+ * @msg: Original request/response message (not a copy). The message can be
+ * modified in-place by the handler (e.g., a response callback can
+ * turn a request message into a response message in place). The message
+ * is no longer accessible after the callback handler returns -- if the
+ * message is required to persist for longer then it must be copied.
+ * @id: (Response callbacks only) The 'id' that was specified when the
+ * original request message was queued for transmission.
+ */
+typedef void (*ctrl_msg_handler_t)(ctrl_msg_t *, unsigned long);
+
+/*
+ * Send @msg to the domain controller. Execute @hnd when a response is
+ * received, passing the response message and the specified @id. This
+ * operation will not block: it will return -EAGAIN if there is no space.
+ * Notes:
+ * 1. The @msg is copied if it is transmitted and so can be freed after this
+ * function returns.
+ * 2. If @hnd is NULL then no callback is executed.
+ */
+int ctrl_if_send_message_noblock(
+ ctrl_msg_t *msg,
+ ctrl_msg_handler_t hnd,
+ unsigned long id);
+
+/*
+ * Send @msg to the domain controller. Execute @hnd when a response is
+ * received, passing the response message and the specified @id. This
+ * operation will block until the message is sent, or a signal is received
+ * for the calling process (unless @wait_state is TASK_UNINTERRUPTIBLE).
+ * Notes:
+ * 1. The @msg is copied if it is transmitted and so can be freed after this
+ * function returns.
+ * 2. If @hnd is NULL then no callback is executed.
+ */
+int ctrl_if_send_message_block(
+ ctrl_msg_t *msg,
+ ctrl_msg_handler_t hnd,
+ unsigned long id,
+ long wait_state);
+
+/*
+ * Request a callback when there is /possibly/ space to immediately send a
+ * message to the domain controller. This function returns 0 if there is
+ * already space to trasnmit a message --- in this case the callback task /may/
+ * still be executed. If this function returns 1 then the callback /will/ be
+ * executed when space becomes available.
+ */
+int ctrl_if_enqueue_space_callback(struct task *task);
+
+/*
+ * Send a response (@msg) to a message from the domain controller. This will
+ * never block.
+ * Notes:
+ * 1. The @msg is copied and so can be freed after this function returns.
+ * 2. The @msg may be the original request message, modified in-place.
+ */
+void ctrl_if_send_response(ctrl_msg_t *msg);
+
+/*
+ * Register a receiver for typed messages from the domain controller. The
+ * handler (@hnd) is called for every received message of specified @type.
+ * Returns TRUE (non-zero) if the handler was successfully registered.
+ * If CALLBACK_IN_BLOCKING CONTEXT is specified in @flags then callbacks will
+ * occur in a context in which it is safe to yield (i.e., process context).
+ */
+#define CALLBACK_IN_BLOCKING_CONTEXT 1
+int ctrl_if_register_receiver(
+ uint8_t type,
+ ctrl_msg_handler_t hnd,
+ unsigned int flags);
+
+/*
+ * Unregister a receiver for typed messages from the domain controller. The
+ * handler (@hnd) will not be executed after this function returns.
+ */
+void ctrl_if_unregister_receiver(uint8_t type, ctrl_msg_handler_t hnd);
+
+/* Suspend/resume notifications. */
+void ctrl_if_suspend(void);
+void ctrl_if_resume(void);
+
+
+/*
+ * Returns TRUE if there are no outstanding message requests at the domain
+ * controller. This can be used to ensure that messages have really flushed
+ * through when it is not possible to use the response-callback interface.
+ * WARNING: If other subsystems are using the control interface then this
+ * function might never return TRUE!
+ */
+int ctrl_if_transmitter_empty(void); /* !! DANGEROUS FUNCTION !! */
+
+/*
+ * Manually discard response messages from the domain controller.
+ * WARNING: This is usually done automatically -- this function should only
+ * be called when normal interrupt mechanisms are disabled!
+ */
+void ctrl_if_discard_responses(void); /* !! DANGEROUS FUNCTION !! */
+
+#endif /* __ASM_XEN__CONTROL_IF_H__ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/evtchn.h b/freebsd-5.3-xen-sparse/i386-xen/include/evtchn.h
new file mode 100644
index 0000000000..3e962e3014
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/evtchn.h
@@ -0,0 +1,92 @@
+/******************************************************************************
+ * evtchn.h
+ *
+ * Communication via Xen event channels.
+ * Also definitions for the device that demuxes notifications to userspace.
+ *
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#ifndef __ASM_EVTCHN_H__
+#define __ASM_EVTCHN_H__
+
+#include <machine/hypervisor.h>
+#include <machine/synch_bitops.h>
+#include <machine/hypervisor-ifs.h>
+
+/*
+ * LOW-LEVEL DEFINITIONS
+ */
+
+/* Force a proper event-channel callback from Xen. */
+void force_evtchn_callback(void);
+
+/* Entry point for notifications into Linux subsystems. */
+void evtchn_do_upcall(struct intrframe *frame);
+
+/* Entry point for notifications into the userland character device. */
+void evtchn_device_upcall(int port);
+
+static inline void
+mask_evtchn(int port)
+{
+ shared_info_t *s = HYPERVISOR_shared_info;
+ synch_set_bit(port, &s->evtchn_mask[0]);
+}
+
+static inline void
+unmask_evtchn(int port)
+{
+ shared_info_t *s = HYPERVISOR_shared_info;
+
+ synch_clear_bit(port, &s->evtchn_mask[0]);
+
+ /*
+ * The following is basically the equivalent of 'hw_resend_irq'. Just like
+ * a real IO-APIC we 'lose the interrupt edge' if the channel is masked.
+ */
+ if ( synch_test_bit (port, &s->evtchn_pending[0]) &&
+ !synch_test_and_set_bit(port>>5, &s->evtchn_pending_sel) )
+ {
+ s->vcpu_data[0].evtchn_upcall_pending = 1;
+ if ( !s->vcpu_data[0].evtchn_upcall_mask )
+ force_evtchn_callback();
+ }
+}
+
+static inline void
+clear_evtchn(int port)
+{
+ shared_info_t *s = HYPERVISOR_shared_info;
+ synch_clear_bit(port, &s->evtchn_pending[0]);
+}
+
+static inline void
+notify_via_evtchn(int port)
+{
+ evtchn_op_t op;
+ op.cmd = EVTCHNOP_send;
+ op.u.send.local_port = port;
+ (void)HYPERVISOR_event_channel_op(&op);
+}
+
+/*
+ * CHARACTER-DEVICE DEFINITIONS
+ */
+
+#define PORT_NORMAL 0x0000
+#define PORT_EXCEPTION 0x8000
+#define PORTIDX_MASK 0x7fff
+
+/* /dev/xen/evtchn resides at device number major=10, minor=200 */
+#define EVTCHN_MINOR 200
+
+/* /dev/xen/evtchn ioctls: */
+/* EVTCHN_RESET: Clear and reinit the event buffer. Clear error condition. */
+#define EVTCHN_RESET _IO('E', 1)
+/* EVTCHN_BIND: Bind to the specified event-channel port. */
+#define EVTCHN_BIND _IO('E', 2)
+/* EVTCHN_UNBIND: Unbind from the specified event-channel port. */
+#define EVTCHN_UNBIND _IO('E', 3)
+
+#endif /* __ASM_EVTCHN_H__ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/frame.h b/freebsd-5.3-xen-sparse/i386-xen/include/frame.h
new file mode 100644
index 0000000000..a6572d85a9
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/frame.h
@@ -0,0 +1,129 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)frame.h 5.2 (Berkeley) 1/18/91
+ * $FreeBSD: src/sys/i386/include/frame.h,v 1.23 2003/07/22 08:11:15 peter Exp $
+ */
+
+#ifndef _MACHINE_FRAME_H_
+#define _MACHINE_FRAME_H_ 1
+
+/*
+ * System stack frames.
+ */
+
+/*
+ * Exception/Trap Stack Frame
+ */
+
+struct trapframe {
+ int tf_fs;
+ int tf_es;
+ int tf_ds;
+ int tf_edi;
+ int tf_esi;
+ int tf_ebp;
+ int tf_isp;
+ int tf_ebx;
+ int tf_edx;
+ int tf_ecx;
+ int tf_eax;
+ int tf_trapno;
+ int tf_cr2;
+ /* below portion defined in 386 hardware */
+ int tf_err;
+ int tf_eip;
+ int tf_cs;
+ int tf_eflags;
+ /* below only when crossing rings (e.g. user to kernel) */
+ int tf_esp;
+ int tf_ss;
+};
+
+/* Interrupt stack frame */
+
+struct intrframe {
+ int if_fs;
+ int if_es;
+ int if_ds;
+ int if_edi;
+ int if_esi;
+ int if_ebp;
+ int :32;
+ int if_ebx;
+ int if_edx;
+ int if_ecx;
+ int if_eax;
+ int :32; /* for compat with trap frame - trapno */
+ int if_vec; /* cr2 in trap frame */
+ int :32; /* for compat with trap frame - err */
+ /* below portion defined in 386 hardware */
+ int if_eip;
+ int if_cs;
+ int if_eflags;
+ /* below only when crossing rings (e.g. user to kernel) */
+ int if_esp;
+ int if_ss;
+};
+
+/* frame of clock (same as interrupt frame) */
+
+struct clockframe {
+ int cf_fs;
+ int cf_es;
+ int cf_ds;
+ int cf_edi;
+ int cf_esi;
+ int cf_ebp;
+ int :32;
+ int cf_ebx;
+ int cf_edx;
+ int cf_ecx;
+ int cf_eax;
+ int :32; /* for compat with trap frame - trapno */
+ int cf_vec; /* cr2 in trap frame */
+ int :32; /* for compat with trap frame - err */
+ /* below portion defined in 386 hardware */
+ int cf_eip;
+ int cf_cs;
+ int cf_eflags;
+ /* below only when crossing rings (e.g. user to kernel) */
+ int cf_esp;
+ int cf_ss;
+};
+
+#define INTR_TO_TRAPFRAME(frame) ((struct trapframe *)&(frame)->if_fs)
+
+#endif /* _MACHINE_FRAME_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor-ifs.h b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor-ifs.h
new file mode 100644
index 0000000000..4f75d27a9a
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor-ifs.h
@@ -0,0 +1,36 @@
+#ifndef _HYPERVISOR_IFS_H_
+#define _HYPERVISOR_IFS_H_
+
+#define s8 int8_t
+#define s16 int16_t
+#define s32 int32_t
+#define s64 int64_t
+
+#define u8 uint8_t
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+
+#include <machine/xen-public/xen.h>
+#include <machine/xen-public/io/domain_controller.h>
+#include <machine/xen-public/io/netif.h>
+#include <machine/xen-public/io/blkif.h>
+#include <machine/xen-public/dom0_ops.h>
+#include <machine/xen-public/event_channel.h>
+#include <machine/xen-public/sched_ctl.h>
+#include <machine/xen-public/physdev.h>
+#undef blkif_sector_t /* XXX pre-processor didn't do the */
+#define blkif_sector_t uint64_t /* right thing */
+
+#undef s8
+#undef s16
+#undef s32
+#undef s64
+
+#undef u8
+#undef u16
+#undef u32
+#undef u64
+
+
+#endif
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h
new file mode 100644
index 0000000000..95ee85f352
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h
@@ -0,0 +1,355 @@
+/******************************************************************************
+ * hypervisor.h
+ *
+ * Linux-specific hypervisor handling.
+ *
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#ifndef __HYPERVISOR_H__
+#define __HYPERVISOR_H__
+
+
+#include <machine/hypervisor-ifs.h>
+#include <machine/frame.h>
+#include "opt_xen.h"
+
+extern start_info_t *xen_start_info;
+
+/* arch/xen/mm/hypervisor.c */
+/*
+ * NB. ptr values should be PHYSICAL, not MACHINE. 'vals' should be already
+ * be MACHINE addresses.
+ */
+
+
+void MULTICALL_flush_page_update_queue(void);
+
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+/* Allocate a contiguous empty region of low memory. Return virtual start. */
+unsigned long allocate_empty_lowmem_region(unsigned long pages);
+/* Deallocate a contiguous region of low memory. Return it to the allocator. */
+void deallocate_lowmem_region(unsigned long vstart, unsigned long pages);
+#endif
+
+typedef struct { unsigned long pte_low, pte_high; } pte_t;
+
+/*
+ * Assembler stubs for hyper-calls.
+ */
+
+static inline int HYPERVISOR_set_trap_table(trap_info_t *table)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_set_trap_table),
+ "b" (table) : "memory" );
+
+ return ret;
+}
+
+static inline int HYPERVISOR_mmu_update(mmu_update_t *req,
+ int count,
+ int *success_count)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_mmu_update),
+ "b" (req), "c" (count), "d" (success_count) : "memory" );
+
+ return ret;
+}
+
+static inline int HYPERVISOR_set_gdt(unsigned long *frame_list, int entries)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_set_gdt),
+ "b" (frame_list), "c" (entries) : "memory" );
+
+
+ return ret;
+}
+
+static inline int HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_stack_switch),
+ "b" (ss), "c" (esp) : "memory" );
+
+ return ret;
+}
+
+static inline int HYPERVISOR_set_callbacks(
+ unsigned long event_selector, unsigned long event_address,
+ unsigned long failsafe_selector, unsigned long failsafe_address)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_set_callbacks),
+ "b" (event_selector), "c" (event_address),
+ "d" (failsafe_selector), "S" (failsafe_address) : "memory" );
+
+ return ret;
+}
+
+static inline int HYPERVISOR_fpu_taskswitch(void)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_fpu_taskswitch) : "memory" );
+
+ return ret;
+}
+
+static inline int HYPERVISOR_yield(void)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
+ "b" (SCHEDOP_yield) : "memory" );
+
+ return ret;
+}
+
+static inline int HYPERVISOR_block(void)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
+ "b" (SCHEDOP_block) : "memory" );
+
+ return ret;
+}
+
+static inline int HYPERVISOR_shutdown(void)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
+ "b" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))
+ : "memory" );
+
+ return ret;
+}
+
+static inline int HYPERVISOR_reboot(void)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
+ "b" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))
+ : "memory" );
+
+ return ret;
+}
+
+static inline int HYPERVISOR_suspend(unsigned long srec)
+{
+ int ret;
+ /* NB. On suspend, control software expects a suspend record in %esi. */
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
+ "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)),
+ "S" (srec) : "memory" );
+
+ return ret;
+}
+
+static inline long HYPERVISOR_set_timer_op(uint64_t timeout)
+{
+ int ret;
+ unsigned long timeout_hi = (unsigned long)(timeout>>32);
+ unsigned long timeout_lo = (unsigned long)timeout;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_set_timer_op),
+ "b" (timeout_hi), "c" (timeout_lo) : "memory" );
+
+ return ret;
+}
+
+static inline int HYPERVISOR_dom0_op(dom0_op_t *dom0_op)
+{
+ int ret;
+ dom0_op->interface_version = DOM0_INTERFACE_VERSION;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_dom0_op),
+ "b" (dom0_op) : "memory" );
+
+ return ret;
+}
+
+static inline int HYPERVISOR_set_debugreg(int reg, unsigned long value)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_set_debugreg),
+ "b" (reg), "c" (value) : "memory" );
+
+ return ret;
+}
+
+static inline unsigned long HYPERVISOR_get_debugreg(int reg)
+{
+ unsigned long ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_get_debugreg),
+ "b" (reg) : "memory" );
+
+ return ret;
+}
+
+static inline int HYPERVISOR_update_descriptor(
+ unsigned long pa, unsigned long word1, unsigned long word2)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_update_descriptor),
+ "b" (pa), "c" (word1), "d" (word2) : "memory" );
+
+ return ret;
+}
+
+static inline int HYPERVISOR_set_fast_trap(int idx)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_set_fast_trap),
+ "b" (idx) : "memory" );
+
+ return ret;
+}
+
+static inline int HYPERVISOR_dom_mem_op(unsigned int op,
+ unsigned long *pages,
+ unsigned long nr_pages)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_dom_mem_op),
+ "b" (op), "c" (pages), "d" (nr_pages) : "memory" );
+ return ret;
+}
+
+static inline int HYPERVISOR_multicall(void *call_list, int nr_calls)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_multicall),
+ "b" (call_list), "c" (nr_calls) : "memory" );
+
+ return ret;
+}
+
+static inline int HYPERVISOR_update_va_mapping(
+ unsigned long page_nr, pte_t new_val, unsigned long flags)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_update_va_mapping),
+ "b" (page_nr), "c" ((new_val).pte_low), "d" (flags):
+ "memory" );
+ /* XXX */
+#if 0
+ if ( unlikely(ret < 0) )
+ panic("Failed update VA mapping: %08lx, %08lx, %08lx",
+ page_nr, (new_val).pte_low, flags);
+#endif
+ return ret;
+}
+
+static inline int HYPERVISOR_event_channel_op(void *op)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_event_channel_op),
+ "b" (op) : "memory" );
+
+ return ret;
+}
+
+static inline int HYPERVISOR_xen_version(int cmd)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_xen_version),
+ "b" (cmd) : "memory" );
+
+ return ret;
+}
+
+static inline int HYPERVISOR_console_io(int cmd, int count, char *str)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_console_io),
+ "b" (cmd), "c" (count), "d" (str) : "memory" );
+
+ return ret;
+}
+
+static __inline int HYPERVISOR_console_write(char *str, int count)
+{
+ return HYPERVISOR_console_io(CONSOLEIO_write, count, str);
+}
+
+static inline int HYPERVISOR_physdev_op(void *physdev_op)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_physdev_op),
+ "b" (physdev_op) : "memory" );
+
+ return ret;
+}
+
+static inline int HYPERVISOR_update_va_mapping_otherdomain(
+ unsigned long page_nr, pte_t new_val, unsigned long flags, domid_t domid)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_update_va_mapping_otherdomain),
+ "b" (page_nr), "c" ((new_val).pte_low), "d" (flags), "S" (domid) :
+ "memory" );
+
+ return ret;
+}
+
+static inline int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_vm_assist),
+ "b" (cmd), "c" (type) : "memory" );
+
+ return ret;
+}
+
+#endif /* __HYPERVISOR_H__ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/md_var.h b/freebsd-5.3-xen-sparse/i386-xen/include/md_var.h
new file mode 100644
index 0000000000..5822a1e3d1
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/md_var.h
@@ -0,0 +1,108 @@
+/*-
+ * Copyright (c) 1995 Bruce D. Evans.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/include/md_var.h,v 1.66 2003/11/03 22:37:28 jhb Exp $
+ */
+
+#ifndef _MACHINE_MD_VAR_H_
+#define _MACHINE_MD_VAR_H_
+
+/*
+ * Miscellaneous machine-dependent declarations.
+ */
+
+extern void (*bcopy_vector)(const void *from, void *to, size_t len);
+extern void (*bzero_vector)(void *buf, size_t len);
+extern int (*copyin_vector)(const void *udaddr, void *kaddr, size_t len);
+extern int (*copyout_vector)(const void *kaddr, void *udaddr, size_t len);
+
+extern long Maxmem;
+extern u_int atdevbase; /* offset in virtual memory of ISA io mem */
+extern u_int basemem; /* PA of original top of base memory */
+extern int busdma_swi_pending;
+extern u_int cpu_exthigh;
+extern u_int cpu_feature;
+extern u_int cpu_fxsr;
+extern u_int cpu_high;
+extern u_int cpu_id;
+extern u_int cpu_procinfo;
+extern char cpu_vendor[];
+extern u_int cyrix_did;
+extern uint16_t *elan_mmcr;
+extern char kstack[];
+#ifdef PC98
+extern int need_pre_dma_flush;
+extern int need_post_dma_flush;
+#endif
+extern char sigcode[];
+extern int szsigcode;
+#ifdef COMPAT_FREEBSD4
+extern int szfreebsd4_sigcode;
+#endif
+#ifdef COMPAT_43
+extern int szosigcode;
+#endif
+
+typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss);
+struct thread;
+struct reg;
+struct fpreg;
+struct dbreg;
+
+void bcopyb(const void *from, void *to, size_t len);
+void busdma_swi(void);
+void cpu_setregs(void);
+void cpu_switch_load_gs(void) __asm(__STRING(cpu_switch_load_gs));
+void doreti_iret(void) __asm(__STRING(doreti_iret));
+void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault));
+void doreti_popl_ds(void) __asm(__STRING(doreti_popl_ds));
+void doreti_popl_ds_fault(void) __asm(__STRING(doreti_popl_ds_fault));
+void doreti_popl_es(void) __asm(__STRING(doreti_popl_es));
+void doreti_popl_es_fault(void) __asm(__STRING(doreti_popl_es_fault));
+void doreti_popl_fs(void) __asm(__STRING(doreti_popl_fs));
+void doreti_popl_fs_fault(void) __asm(__STRING(doreti_popl_fs_fault));
+void scrit(void) __asm(__STRING(scrit));
+void ecrit(void) __asm(__STRING(ecrit));
+void critical_region_fixup(void) __asm(__STRING(critical_region_fixup));
+void enable_sse(void);
+void fillw(int /*u_short*/ pat, void *base, size_t cnt);
+void i486_bzero(void *buf, size_t len);
+void i586_bcopy(const void *from, void *to, size_t len);
+void i586_bzero(void *buf, size_t len);
+int i586_copyin(const void *udaddr, void *kaddr, size_t len);
+int i586_copyout(const void *kaddr, void *udaddr, size_t len);
+void i686_pagezero(void *addr);
+void sse2_pagezero(void *addr);
+void init_AMD_Elan_sc520(void);
+int is_physical_memory(vm_offset_t addr);
+int isa_nmi(int cd);
+vm_paddr_t kvtop(void *addr);
+void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int selec);
+int user_dbreg_trap(void);
+
+#endif /* !_MACHINE_MD_VAR_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/multicall.h b/freebsd-5.3-xen-sparse/i386-xen/include/multicall.h
new file mode 100644
index 0000000000..30de865ee2
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/multicall.h
@@ -0,0 +1,98 @@
+/******************************************************************************
+ * multicall.h
+ */
+
+#ifndef __MULTICALL_H__
+#define __MULTICALL_H__
+
+#include <machine/hypervisor.h>
+#define MAX_MULTICALL_ENTS 8
+extern multicall_entry_t multicall_list[];
+extern int nr_multicall_ents;
+
+static inline void execute_multicall_list(void)
+{
+ if ( unlikely(nr_multicall_ents == 0) ) return;
+ (void)HYPERVISOR_multicall(multicall_list, nr_multicall_ents);
+ nr_multicall_ents = 0;
+}
+
+
+static inline void handle_edge(void)
+{
+ if (unlikely(nr_multicall_ents == MAX_MULTICALL_ENTS))
+ execute_multicall_list();
+}
+
+static inline void queue_multicall0(unsigned long op)
+{
+ int i = nr_multicall_ents;
+ multicall_list[i].op = op;
+ nr_multicall_ents = i+1;
+ handle_edge();
+}
+
+static inline void queue_multicall1(unsigned long op, unsigned long arg1)
+{
+ int i = nr_multicall_ents;
+ multicall_list[i].op = op;
+ multicall_list[i].args[0] = arg1;
+ nr_multicall_ents = i+1;
+ handle_edge();
+}
+
+static inline void queue_multicall2(
+ unsigned long op, unsigned long arg1, unsigned long arg2)
+{
+ int i = nr_multicall_ents;
+ multicall_list[i].op = op;
+ multicall_list[i].args[0] = arg1;
+ multicall_list[i].args[1] = arg2;
+ nr_multicall_ents = i+1;
+ handle_edge();
+}
+
+static inline void queue_multicall3(
+ unsigned long op, unsigned long arg1, unsigned long arg2,
+ unsigned long arg3)
+{
+ int i = nr_multicall_ents;
+ multicall_list[i].op = op;
+ multicall_list[i].args[0] = arg1;
+ multicall_list[i].args[1] = arg2;
+ multicall_list[i].args[2] = arg3;
+ nr_multicall_ents = i+1;
+ handle_edge();
+}
+
+static inline void queue_multicall4(
+ unsigned long op, unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4)
+{
+ int i = nr_multicall_ents;
+ multicall_list[i].op = op;
+ multicall_list[i].args[0] = arg1;
+ multicall_list[i].args[1] = arg2;
+ multicall_list[i].args[2] = arg3;
+ multicall_list[i].args[3] = arg4;
+ nr_multicall_ents = i+1;
+ handle_edge();
+}
+
+static inline void queue_multicall5(
+ unsigned long op, unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4, unsigned long arg5)
+{
+ int i = nr_multicall_ents;
+ multicall_list[i].op = op;
+ multicall_list[i].args[0] = arg1;
+ multicall_list[i].args[1] = arg2;
+ multicall_list[i].args[2] = arg3;
+ multicall_list[i].args[3] = arg4;
+ multicall_list[i].args[4] = arg5;
+ nr_multicall_ents = i+1;
+ handle_edge();
+}
+
+
+#endif /* __MULTICALL_H__ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/param.h b/freebsd-5.3-xen-sparse/i386-xen/include/param.h
new file mode 100644
index 0000000000..a45fdd67c3
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/param.h
@@ -0,0 +1,146 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)param.h 5.8 (Berkeley) 6/28/91
+ * $FreeBSD: src/sys/i386/include/param.h,v 1.69 2003/06/14 23:23:53 alc Exp $
+ */
+
+/*
+ * Machine dependent constants for Intel 386.
+ */
+
+/*
+ * Round p (pointer or byte index) up to a correctly-aligned value
+ * for all data types (int, long, ...). The result is unsigned int
+ * and must be cast to any desired pointer type.
+ */
+#ifndef _ALIGNBYTES
+#define _ALIGNBYTES (sizeof(int) - 1)
+#endif
+#ifndef _ALIGN
+#define _ALIGN(p) (((unsigned)(p) + _ALIGNBYTES) & ~_ALIGNBYTES)
+#endif
+
+#ifndef _MACHINE
+#define _MACHINE i386-xen
+#endif
+#ifndef _MACHINE_ARCH
+#define _MACHINE_ARCH i386-xen
+#endif
+
+#ifndef _NO_NAMESPACE_POLLUTION
+
+#ifndef _MACHINE_PARAM_H_
+#define _MACHINE_PARAM_H_
+
+#ifndef MACHINE
+#define MACHINE "i386"
+#endif
+#ifndef MACHINE_ARCH
+#define MACHINE_ARCH "i386"
+#endif
+#define MID_MACHINE MID_I386
+
+#ifdef SMP
+#define MAXCPU 16
+#else
+#define MAXCPU 1
+#endif /* SMP */
+
+#define ALIGNBYTES _ALIGNBYTES
+#define ALIGN(p) _ALIGN(p)
+
+#define PAGE_SHIFT 12 /* LOG2(PAGE_SIZE) */
+#define PAGE_SIZE (1<<PAGE_SHIFT) /* bytes/page */
+#define PAGE_MASK (PAGE_SIZE-1)
+#define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t)))
+
+#ifdef PAE
+#define NPGPTD 4
+#define PDRSHIFT 21 /* LOG2(NBPDR) */
+#else
+#define NPGPTD 1
+#define PDRSHIFT 22 /* LOG2(NBPDR) */
+#endif
+
+#define NBPTD (NPGPTD<<PAGE_SHIFT)
+#define NPDEPTD (NBPTD/(sizeof (pd_entry_t)))
+#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t)))
+#define NBPDR (1<<PDRSHIFT) /* bytes/page dir */
+#define PDRMASK (NBPDR-1)
+
+#define IOPAGES 2 /* pages of i/o permission bitmap */
+
+#ifndef KSTACK_PAGES
+#define KSTACK_PAGES 2 /* Includes pcb! */
+#endif
+#define KSTACK_GUARD_PAGES 1 /* pages of kstack guard; 0 disables */
+#define UAREA_PAGES 1 /* holds struct user WITHOUT PCB (see def.) */
+
+/*
+ * Ceiling on amount of swblock kva space, can be changed via
+ * the kern.maxswzone /boot/loader.conf variable.
+ */
+#ifndef VM_SWZONE_SIZE_MAX
+#define VM_SWZONE_SIZE_MAX (32 * 1024 * 1024)
+#endif
+
+/*
+ * Ceiling on size of buffer cache (really only effects write queueing,
+ * the VM page cache is not effected), can be changed via
+ * the kern.maxbcache /boot/loader.conf variable.
+ */
+#ifndef VM_BCACHE_SIZE_MAX
+#define VM_BCACHE_SIZE_MAX (200 * 1024 * 1024)
+#endif
+
+/*
+ * Mach derived conversion macros
+ */
+#define trunc_page(x) ((x) & ~PAGE_MASK)
+#define round_page(x) (((x) + PAGE_MASK) & ~PAGE_MASK)
+#define trunc_4mpage(x) ((x) & ~PDRMASK)
+#define round_4mpage(x) ((((x)) + PDRMASK) & ~PDRMASK)
+
+#define atop(x) ((x) >> PAGE_SHIFT)
+#define ptoa(x) ((x) << PAGE_SHIFT)
+
+#define i386_btop(x) ((x) >> PAGE_SHIFT)
+#define i386_ptob(x) ((x) << PAGE_SHIFT)
+
+#define pgtok(x) ((x) * (PAGE_SIZE / 1024))
+
+#endif /* !_MACHINE_PARAM_H_ */
+#endif /* !_NO_NAMESPACE_POLLUTION */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/pcb.h b/freebsd-5.3-xen-sparse/i386-xen/include/pcb.h
new file mode 100644
index 0000000000..ff68761540
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/pcb.h
@@ -0,0 +1,96 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)pcb.h 5.10 (Berkeley) 5/12/91
+ * $FreeBSD: src/sys/i386/include/pcb.h,v 1.50 2003/09/30 08:11:36 jeff Exp $
+ */
+
+#ifndef _I386_PCB_H_
+#define _I386_PCB_H_
+
+/*
+ * Intel 386 process control block
+ */
+#include <machine/npx.h>
+
+struct pcb {
+ int pcb_cr3;
+ int pcb_edi;
+ int pcb_esi;
+ int pcb_ebp;
+ int pcb_esp;
+ int pcb_eax;
+ int pcb_ebx;
+ int pcb_ecx;
+ int pcb_edx;
+ int pcb_eip;
+
+ int pcb_dr0;
+ int pcb_dr1;
+ int pcb_dr2;
+ int pcb_dr3;
+ int pcb_dr6;
+ int pcb_dr7;
+
+ union savefpu pcb_save;
+ u_int pcb_flags;
+#define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */
+#define PCB_DBREGS 0x02 /* process using debug registers */
+#define PCB_NPXTRAP 0x04 /* npx trap pending */
+#define PCB_NPXINITDONE 0x08 /* fpu state is initialized */
+#define PCB_VM86CALL 0x10 /* in vm86 call */
+
+ caddr_t pcb_onfault; /* copyin/out fault recovery */
+ int pcb_cs;
+ int pcb_ds;
+ int pcb_ss;
+ int pcb_es;
+ int pcb_gs;
+ int pcb_fs;
+ struct pcb_ext *pcb_ext; /* optional pcb extension */
+ int pcb_psl; /* process status long */
+ void (*pcb_switchout)(void); /* Special switchout function. */
+ u_long __pcb_spare[2]; /* adjust to avoid core dump size changes */
+};
+
+#ifdef _KERNEL
+struct trapframe;
+
+void makectx(struct trapframe *, struct pcb *);
+
+void savectx(struct pcb *);
+#endif
+
+#endif /* _I386_PCB_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h b/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h
new file mode 100644
index 0000000000..80a675cd4a
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h
@@ -0,0 +1,173 @@
+/*-
+ * Copyright (c) Peter Wemm <peter@netplex.com.au>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/include/pcpu.h,v 1.41 2003/11/20 23:23:22 peter Exp $
+ */
+
+#ifndef _MACHINE_PCPU_H_
+#define _MACHINE_PCPU_H_
+
+#ifdef _KERNEL
+
+#include <machine/segments.h>
+#include <machine/tss.h>
+
+/*
+ * The SMP parts are setup in pmap.c and locore.s for the BSP, and
+ * mp_machdep.c sets up the data for the AP's to "see" when they awake.
+ * The reason for doing it via a struct is so that an array of pointers
+ * to each CPU's data can be set up for things like "check curproc on all
+ * other processors"
+ */
+#define PCPU_MD_FIELDS \
+ struct pcpu *pc_prvspace; /* Self-reference */ \
+ struct pmap *pc_curpmap; \
+ struct i386tss pc_common_tss; \
+ struct segment_descriptor pc_common_tssd; \
+ struct segment_descriptor *pc_tss_gdt; \
+ int pc_currentldt; \
+ u_int pc_acpi_id; \
+ u_int pc_apic_id; \
+ u_int pc_faultaddr; \
+ u_int pc_trap_nesting; \
+ u_int pc_pdir
+
+#if defined(lint)
+
+extern struct pcpu *pcpup;
+
+#define PCPU_GET(member) (pcpup->pc_ ## member)
+#define PCPU_PTR(member) (&pcpup->pc_ ## member)
+#define PCPU_SET(member,value) (pcpup->pc_ ## member = (value))
+
+#elif defined(__GNUC__)
+
+/*
+ * Evaluates to the byte offset of the per-cpu variable name.
+ */
+#define __pcpu_offset(name) \
+ __offsetof(struct pcpu, name)
+
+/*
+ * Evaluates to the type of the per-cpu variable name.
+ */
+#define __pcpu_type(name) \
+ __typeof(((struct pcpu *)0)->name)
+
+/*
+ * Evaluates to the address of the per-cpu variable name.
+ */
+#define __PCPU_PTR(name) __extension__ ({ \
+ __pcpu_type(name) *__p; \
+ \
+ __asm __volatile("movl %%fs:%1,%0; addl %2,%0" \
+ : "=r" (__p) \
+ : "m" (*(struct pcpu *)(__pcpu_offset(pc_prvspace))), \
+ "i" (__pcpu_offset(name))); \
+ \
+ __p; \
+})
+
+/*
+ * Evaluates to the value of the per-cpu variable name.
+ */
+#define __PCPU_GET(name) __extension__ ({ \
+ __pcpu_type(name) __result; \
+ \
+ if (sizeof(__result) == 1) { \
+ u_char __b; \
+ __asm __volatile("movb %%fs:%1,%0" \
+ : "=r" (__b) \
+ : "m" (*(u_char *)(__pcpu_offset(name)))); \
+ __result = *(__pcpu_type(name) *)(void *)&__b; \
+ } else if (sizeof(__result) == 2) { \
+ u_short __w; \
+ __asm __volatile("movw %%fs:%1,%0" \
+ : "=r" (__w) \
+ : "m" (*(u_short *)(__pcpu_offset(name)))); \
+ __result = *(__pcpu_type(name) *)(void *)&__w; \
+ } else if (sizeof(__result) == 4) { \
+ u_int __i; \
+ __asm __volatile("movl %%fs:%1,%0" \
+ : "=r" (__i) \
+ : "m" (*(u_int *)(__pcpu_offset(name)))); \
+ __result = *(__pcpu_type(name) *)(void *)&__i; \
+ } else { \
+ __result = *__PCPU_PTR(name); \
+ } \
+ \
+ __result; \
+})
+
+/*
+ * Sets the value of the per-cpu variable name to value val.
+ */
+#define __PCPU_SET(name, val) { \
+ __pcpu_type(name) __val = (val); \
+ \
+ if (sizeof(__val) == 1) { \
+ u_char __b; \
+ __b = *(u_char *)&__val; \
+ __asm __volatile("movb %1,%%fs:%0" \
+ : "=m" (*(u_char *)(__pcpu_offset(name))) \
+ : "r" (__b)); \
+ } else if (sizeof(__val) == 2) { \
+ u_short __w; \
+ __w = *(u_short *)&__val; \
+ __asm __volatile("movw %1,%%fs:%0" \
+ : "=m" (*(u_short *)(__pcpu_offset(name))) \
+ : "r" (__w)); \
+ } else if (sizeof(__val) == 4) { \
+ u_int __i; \
+ __i = *(u_int *)&__val; \
+ __asm __volatile("movl %1,%%fs:%0" \
+ : "=m" (*(u_int *)(__pcpu_offset(name))) \
+ : "r" (__i)); \
+ } else { \
+ *__PCPU_PTR(name) = __val; \
+ } \
+}
+
+#define PCPU_GET(member) __PCPU_GET(pc_ ## member)
+#define PCPU_PTR(member) __PCPU_PTR(pc_ ## member)
+#define PCPU_SET(member, val) __PCPU_SET(pc_ ## member, val)
+
+static __inline struct thread *
+__curthread(void)
+{
+ struct thread *td;
+
+ __asm __volatile("movl %%fs:0,%0" : "=r" (td));
+ return (td);
+}
+#define curthread (__curthread())
+
+#else
+#error gcc or lint is required to use this file
+#endif
+
+#endif /* _KERNEL */
+
+#endif /* ! _MACHINE_PCPU_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h b/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h
new file mode 100644
index 0000000000..9e838b9bd4
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h
@@ -0,0 +1,355 @@
+/*
+ * Copyright (c) 1991 Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department and William Jolitz of UUNET Technologies Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Derived from hp300 version by Mike Hibler, this version by William
+ * Jolitz uses a recursive map [a pde points to the page directory] to
+ * map the page tables using the pagetables themselves. This is done to
+ * reduce the impact on kernel virtual memory for lots of sparse address
+ * space, and to reduce the cost of memory to each process.
+ *
+ * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90
+ * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91
+ * $FreeBSD: src/sys/i386/include/pmap.h,v 1.103 2003/11/08 03:01:26 alc Exp $
+ */
+
+#ifndef _MACHINE_PMAP_H_
+#define _MACHINE_PMAP_H_
+
+/*
+ * Page-directory and page-table entires follow this format, with a few
+ * of the fields not present here and there, depending on a lot of things.
+ */
+ /* ---- Intel Nomenclature ---- */
+#define PG_V 0x001 /* P Valid */
+#define PG_RW 0x002 /* R/W Read/Write */
+#define PG_U 0x004 /* U/S User/Supervisor */
+#define PG_NC_PWT 0x008 /* PWT Write through */
+#define PG_NC_PCD 0x010 /* PCD Cache disable */
+#define PG_A 0x020 /* A Accessed */
+#define PG_M 0x040 /* D Dirty */
+#define PG_PS 0x080 /* PS Page size (0=4k,1=4M) */
+#define PG_G 0x100 /* G Global */
+#define PG_AVAIL1 0x200 /* / Available for system */
+#define PG_AVAIL2 0x400 /* < programmers use */
+#define PG_AVAIL3 0x800 /* \ */
+
+
+/* Our various interpretations of the above */
+#define PG_W PG_AVAIL1 /* "Wired" pseudoflag */
+#define PG_MANAGED PG_AVAIL2
+#define PG_FRAME (~((vm_paddr_t)PAGE_MASK))
+#define PG_PROT (PG_RW|PG_U) /* all protection bits . */
+#define PG_N (PG_NC_PWT|PG_NC_PCD) /* Non-cacheable */
+
+#define PG_KERNEL (PG_V | PG_RW | PG_M | PG_A)
+#define PG_KERNEL_NC (PG_KERNEL | PG_N)
+#define PG_KERNEL_RO (PG_VALID | PG_M | PG_A)
+
+/*
+ * Page Protection Exception bits
+ */
+
+#define PGEX_P 0x01 /* Protection violation vs. not present */
+#define PGEX_W 0x02 /* during a Write cycle */
+#define PGEX_U 0x04 /* access from User mode (UPL) */
+#define XEN_PAGES 16
+
+/*
+ * Size of Kernel address space. This is the number of page table pages
+ * (4MB each) to use for the kernel. 256 pages == 1 Gigabyte.
+ * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc).
+ */
+
+#ifndef KVA_PAGES
+#ifdef PAE
+#define KVA_PAGES 512
+#else
+#define KVA_PAGES 256
+#endif
+#endif
+
+/*
+ * Pte related macros
+ */
+#define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<<PDRSHIFT)|((pti)<<PAGE_SHIFT)))
+
+#ifndef NKPT
+#ifdef PAE
+#define NKPT 120 /* actual number of kernel page tables */
+#else
+#define NKPT 30 /* actual number of kernel page tables */
+#endif
+#endif
+
+/*
+ * XEN NOTE: Xen consumes 64MB of memory, so subtract that from the number
+ * of page available to the kernel virutal address space.
+ */
+#ifndef NKPDE
+#ifdef SMP
+#define NKPDE (KVA_PAGES - 1 - XEN_PAGES) /* number of page tables/pde's */
+#else
+#define NKPDE (KVA_PAGES - XEN_PAGES) /* number of page tables/pde's */
+#endif
+#endif
+
+/*
+ * The *PTDI values control the layout of virtual memory
+ *
+ * XXX This works for now, but I am not real happy with it, I'll fix it
+ * right after I fix locore.s and the magic 28K hole
+ *
+ * SMP_PRIVPAGES: The per-cpu address space is 0xff80000 -> 0xffbfffff
+ */
+
+/*
+ * XEN NOTE: We need to shift down the start of KVA by 64MB to account for
+ * Xen using the upper 64MB.
+ *
+ * The layout of VA for XenoBSD is:
+ * | USER | PTDPTDI | KVA | XEN |
+ * | 0x00000000 | 0xbfc00000 | 0xc0000000 | 0xfc000000 - 0xffffffff|
+ *
+ * Normally it is just:
+ * | USER | PTDPTDI | KVA |
+ * | 0x00000000 | 0xbfc00000 | 0xc0000000 - 0xffffffff |
+ */
+
+#ifdef SMP
+#define MPPTDI (NPDEPTD-1) /* per cpu ptd entry */
+#define KPTDI (MPPTDI-NKPDE-XEN_PAGES /* start of kernel virtual pde's */
+#else
+#define KPTDI (NPDEPTD-NKPDE-XEN_PAGES) /* start of kernel virtual pde's */
+#endif /* SMP */
+
+#define PTDPTDI (KPTDI-NPGPTD) /* ptd entry that points to ptd! */
+
+/*
+ * XXX doesn't really belong here I guess...
+ */
+#define ISA_HOLE_START 0xa0000
+#define ISA_HOLE_LENGTH (0x100000-ISA_HOLE_START)
+
+#ifndef LOCORE
+
+#include <sys/queue.h>
+#include <sys/_lock.h>
+#include <sys/_mutex.h>
+
+
+typedef uint32_t pd_entry_t;
+typedef uint32_t pt_entry_t;
+
+#define PTESHIFT (2)
+#define PDESHIFT (2)
+
+
+/*
+ * Address of current and alternate address space page table maps
+ * and directories.
+ */
+#ifdef _KERNEL
+extern pt_entry_t PTmap[];
+extern pd_entry_t PTD[];
+extern pd_entry_t PTDpde[];
+
+extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */
+
+#include <machine/xen-os.h>
+#include <machine/xenvar.h>
+#include <machine/xenpmap.h>
+
+
+/*
+ * virtual address to page table entry and
+ * to physical address. Likewise for alternate address space.
+ * Note: these work recursively, thus vtopte of a pte will give
+ * the corresponding pde that in turn maps it.
+ */
+#define vtopte(va) (PTmap + i386_btop(va))
+
+/*
+ * Given a virtual address, return the machine address of its PTE
+ *
+ */
+#define vtoptema(va) pmap_kextract_ma((vm_offset_t) vtopte(va))
+
+/*
+ * Routine: pmap_kextract/pmap_kextract_ma
+ * Function:
+ * Extract the physical/machine page address associated
+ * kernel virtual address.
+ */
+
+static __inline vm_paddr_t
+pmap_kextract_ma(vm_offset_t va)
+{
+ vm_paddr_t ma;
+ if ((ma = PTD[va >> PDRSHIFT]) & PG_PS) {
+ ma = (ma & ~(NBPDR - 1)) | (va & (NBPDR - 1));
+ } else {
+ ma = (*vtopte(va) & PG_FRAME) | (va & PAGE_MASK);
+ }
+ return ma;
+}
+
+static __inline vm_paddr_t
+pmap_kextract(vm_offset_t va)
+{
+ return xpmap_mtop(pmap_kextract_ma(va));
+}
+
+#define vtophys(va) pmap_kextract(((vm_offset_t) (va)))
+#define vtomach(va) pmap_kextract_ma(((vm_offset_t) (va)))
+
+static __inline pt_entry_t
+pte_load_clear(pt_entry_t *ptep)
+{
+ pt_entry_t r;
+
+ r = PT_GET(ptep);
+ PT_CLEAR_VA(ptep, TRUE);
+ return (r);
+}
+static __inline pt_entry_t
+pte_load_store(pt_entry_t *ptep, pt_entry_t v)
+{
+ pt_entry_t r;
+ r = PT_GET(ptep);
+ PT_SET_VA_MA(ptep, v, TRUE);
+ return (r);
+}
+
+#define pte_store(ptep, pte) PT_SET_VA_MA(ptep, pte, TRUE);
+#define pte_clear(pte) PT_CLEAR_VA(pte, TRUE);
+
+
+#endif /* _KERNEL */
+
+/*
+ * Pmap stuff
+ */
+struct pv_entry;
+
+struct md_page {
+ int pv_list_count;
+ TAILQ_HEAD(,pv_entry) pv_list;
+};
+
+struct pmap {
+ struct mtx pm_mtx;
+ pd_entry_t *pm_pdir; /* KVA of page directory */
+ TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */
+ u_int pm_active; /* active on cpus */
+ struct pmap_statistics pm_stats; /* pmap statistics */
+ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */
+};
+
+
+typedef struct pmap *pmap_t;
+
+#ifdef _KERNEL
+extern struct pmap kernel_pmap_store;
+#define kernel_pmap (&kernel_pmap_store)
+
+#define PMAP_LOCK(pmap)mtx_lock(&(pmap)->pm_mtx)
+#define PMAP_LOCK_ASSERT(pmap, type) \
+mtx_assert(&(pmap)->pm_mtx, (type))
+#define PMAP_LOCK_DESTROY(pmap)mtx_destroy(&(pmap)->pm_mtx)
+#define PMAP_LOCK_INIT(pmap)mtx_init(&(pmap)->pm_mtx, "pmap", \
+ NULL, MTX_DEF | MTX_DUPOK)
+#define PMAP_LOCKED(pmap)mtx_owned(&(pmap)->pm_mtx)
+#define PMAP_MTX(pmap)(&(pmap)->pm_mtx)
+#define PMAP_TRYLOCK(pmap)mtx_trylock(&(pmap)->pm_mtx)
+#define PMAP_UNLOCK(pmap)mtx_unlock(&(pmap)->pm_mtx)
+
+#endif
+
+/*
+ * For each vm_page_t, there is a list of all currently valid virtual
+ * mappings of that page. An entry is a pv_entry_t, the list is pv_table.
+ */
+typedef struct pv_entry {
+ pmap_t pv_pmap; /* pmap where mapping lies */
+ vm_offset_t pv_va; /* virtual address for mapping */
+ TAILQ_ENTRY(pv_entry) pv_list;
+ TAILQ_ENTRY(pv_entry) pv_plist;
+} *pv_entry_t;
+
+#ifdef _KERNEL
+
+#define NPPROVMTRR 8
+#define PPRO_VMTRRphysBase0 0x200
+#define PPRO_VMTRRphysMask0 0x201
+struct ppro_vmtrr {
+ u_int64_t base, mask;
+};
+extern struct ppro_vmtrr PPro_vmtrr[NPPROVMTRR];
+
+extern caddr_t CADDR1;
+extern pt_entry_t *CMAP1;
+extern vm_paddr_t avail_end;
+extern vm_paddr_t phys_avail[];
+extern int pseflag;
+extern int pgeflag;
+extern char *ptvmmap; /* poor name! */
+extern vm_offset_t virtual_avail;
+extern vm_offset_t virtual_end;
+
+#define pmap_page_is_mapped(m)(!TAILQ_EMPTY(&(m)->md.pv_list))
+
+void pmap_bootstrap(vm_paddr_t, vm_paddr_t);
+void pmap_kenter(vm_offset_t va, vm_paddr_t pa);
+void pmap_kenter_ma(vm_offset_t va, vm_paddr_t pa);
+void *pmap_kenter_temporary(vm_paddr_t pa, int i);
+void pmap_kremove(vm_offset_t);
+void *pmap_mapdev(vm_paddr_t, vm_size_t);
+void pmap_unmapdev(vm_offset_t, vm_size_t);
+pt_entry_t *pmap_pte(pmap_t, vm_offset_t) __pure2;
+void pmap_set_pg(void);
+void pmap_invalidate_page(pmap_t, vm_offset_t);
+void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
+void pmap_invalidate_all(pmap_t);
+
+void pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len);
+void pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len);
+
+
+#endif /* _KERNEL */
+
+#endif /* !LOCORE */
+
+#endif /* !_MACHINE_PMAP_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/segments.h b/freebsd-5.3-xen-sparse/i386-xen/include/segments.h
new file mode 100644
index 0000000000..85cc20c1f5
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/segments.h
@@ -0,0 +1,260 @@
+/*-
+ * Copyright (c) 1989, 1990 William F. Jolitz
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)segments.h 7.1 (Berkeley) 5/9/91
+ * $FreeBSD: src/sys/i386/include/segments.h,v 1.36 2003/11/03 21:12:04 jhb Exp $
+ */
+
+#ifndef _MACHINE_SEGMENTS_H_
+#define _MACHINE_SEGMENTS_H_
+
+/*
+ * 386 Segmentation Data Structures and definitions
+ * William F. Jolitz (william@ernie.berkeley.edu) 6/20/1989
+ */
+
+/*
+ * Selectors
+ */
+
+#define ISPL(s) ((s)&3) /* what is the priority level of a selector */
+#define SEL_KPL 1 /* kernel priority level */
+#define SEL_UPL 3 /* user priority level */
+#define ISLDT(s) ((s)&SEL_LDT) /* is it local or global */
+#define SEL_LDT 4 /* local descriptor table */
+#define IDXSEL(s) (((s)>>3) & 0x1fff) /* index of selector */
+#define LSEL(s,r) (((s)<<3) | SEL_LDT | r) /* a local selector */
+#define GSEL(s,r) (((s)<<3) | r) /* a global selector */
+
+/*
+ * Memory and System segment descriptors
+ */
+struct segment_descriptor {
+ unsigned sd_lolimit:16 ; /* segment extent (lsb) */
+ unsigned sd_lobase:24 __packed; /* segment base address (lsb) */
+ unsigned sd_type:5 ; /* segment type */
+ unsigned sd_dpl:2 ; /* segment descriptor priority level */
+ unsigned sd_p:1 ; /* segment descriptor present */
+ unsigned sd_hilimit:4 ; /* segment extent (msb) */
+ unsigned sd_xx:2 ; /* unused */
+ unsigned sd_def32:1 ; /* default 32 vs 16 bit size */
+ unsigned sd_gran:1 ; /* limit granularity (byte/page units)*/
+ unsigned sd_hibase:8 ; /* segment base address (msb) */
+} ;
+
+/*
+ * Gate descriptors (e.g. indirect descriptors)
+ */
+struct gate_descriptor {
+ unsigned gd_looffset:16 ; /* gate offset (lsb) */
+ unsigned gd_selector:16 ; /* gate segment selector */
+ unsigned gd_stkcpy:5 ; /* number of stack wds to cpy */
+ unsigned gd_xx:3 ; /* unused */
+ unsigned gd_type:5 ; /* segment type */
+ unsigned gd_dpl:2 ; /* segment descriptor priority level */
+ unsigned gd_p:1 ; /* segment descriptor present */
+ unsigned gd_hioffset:16 ; /* gate offset (msb) */
+} ;
+
+/*
+ * Generic descriptor
+ */
+union descriptor {
+ struct segment_descriptor sd;
+ struct gate_descriptor gd;
+};
+
+ /* system segments and gate types */
+#define SDT_SYSNULL 0 /* system null */
+#define SDT_SYS286TSS 1 /* system 286 TSS available */
+#define SDT_SYSLDT 2 /* system local descriptor table */
+#define SDT_SYS286BSY 3 /* system 286 TSS busy */
+#define SDT_SYS286CGT 4 /* system 286 call gate */
+#define SDT_SYSTASKGT 5 /* system task gate */
+#define SDT_SYS286IGT 6 /* system 286 interrupt gate */
+#define SDT_SYS286TGT 7 /* system 286 trap gate */
+#define SDT_SYSNULL2 8 /* system null again */
+#define SDT_SYS386TSS 9 /* system 386 TSS available */
+#define SDT_SYSNULL3 10 /* system null again */
+#define SDT_SYS386BSY 11 /* system 386 TSS busy */
+#define SDT_SYS386CGT 12 /* system 386 call gate */
+#define SDT_SYSNULL4 13 /* system null again */
+#define SDT_SYS386IGT 14 /* system 386 interrupt gate */
+#define SDT_SYS386TGT 15 /* system 386 trap gate */
+
+ /* memory segment types */
+#define SDT_MEMRO 16 /* memory read only */
+#define SDT_MEMROA 17 /* memory read only accessed */
+#define SDT_MEMRW 18 /* memory read write */
+#define SDT_MEMRWA 19 /* memory read write accessed */
+#define SDT_MEMROD 20 /* memory read only expand dwn limit */
+#define SDT_MEMRODA 21 /* memory read only expand dwn limit accessed */
+#define SDT_MEMRWD 22 /* memory read write expand dwn limit */
+#define SDT_MEMRWDA 23 /* memory read write expand dwn limit accessed */
+#define SDT_MEME 24 /* memory execute only */
+#define SDT_MEMEA 25 /* memory execute only accessed */
+#define SDT_MEMER 26 /* memory execute read */
+#define SDT_MEMERA 27 /* memory execute read accessed */
+#define SDT_MEMEC 28 /* memory execute only conforming */
+#define SDT_MEMEAC 29 /* memory execute only accessed conforming */
+#define SDT_MEMERC 30 /* memory execute read conforming */
+#define SDT_MEMERAC 31 /* memory execute read accessed conforming */
+
+/*
+ * Software definitions are in this convenient format,
+ * which are translated into inconvenient segment descriptors
+ * when needed to be used by the 386 hardware
+ */
+
+struct soft_segment_descriptor {
+ unsigned ssd_base ; /* segment base address */
+ unsigned ssd_limit ; /* segment extent */
+ unsigned ssd_type:5 ; /* segment type */
+ unsigned ssd_dpl:2 ; /* segment descriptor priority level */
+ unsigned ssd_p:1 ; /* segment descriptor present */
+ unsigned ssd_xx:4 ; /* unused */
+ unsigned ssd_xx1:2 ; /* unused */
+ unsigned ssd_def32:1 ; /* default 32 vs 16 bit size */
+ unsigned ssd_gran:1 ; /* limit granularity (byte/page units)*/
+};
+
+/*
+ * region descriptors, used to load gdt/idt tables before segments yet exist.
+ */
+struct region_descriptor {
+ unsigned rd_limit:16; /* segment extent */
+ unsigned rd_base:32 __packed; /* base address */
+};
+
+/*
+ * Segment Protection Exception code bits
+ */
+
+#define SEGEX_EXT 0x01 /* recursive or externally induced */
+#define SEGEX_IDT 0x02 /* interrupt descriptor table */
+#define SEGEX_TI 0x04 /* local descriptor table */
+ /* other bits are affected descriptor index */
+#define SEGEX_IDX(s) (((s)>>3)&0x1fff)
+
+/*
+ * Size of IDT table
+ */
+
+#define NIDT 256 /* 32 reserved, 0x80 syscall, most are h/w */
+#define NRSVIDT 32 /* reserved entries for cpu exceptions */
+
+/*
+ * Entries in the Interrupt Descriptor Table (IDT)
+ */
+#define IDT_DE 0 /* #DE: Divide Error */
+#define IDT_DB 1 /* #DB: Debug */
+#define IDT_NMI 2 /* Nonmaskable External Interrupt */
+#define IDT_BP 3 /* #BP: Breakpoint */
+#define IDT_OF 4 /* #OF: Overflow */
+#define IDT_BR 5 /* #BR: Bound Range Exceeded */
+#define IDT_UD 6 /* #UD: Undefined/Invalid Opcode */
+#define IDT_NM 7 /* #NM: No Math Coprocessor */
+#define IDT_DF 8 /* #DF: Double Fault */
+#define IDT_FPUGP 9 /* Coprocessor Segment Overrun */
+#define IDT_TS 10 /* #TS: Invalid TSS */
+#define IDT_NP 11 /* #NP: Segment Not Present */
+#define IDT_SS 12 /* #SS: Stack Segment Fault */
+#define IDT_GP 13 /* #GP: General Protection Fault */
+#define IDT_PF 14 /* #PF: Page Fault */
+#define IDT_MF 16 /* #MF: FPU Floating-Point Error */
+#define IDT_AC 17 /* #AC: Alignment Check */
+#define IDT_MC 18 /* #MC: Machine Check */
+#define IDT_XF 19 /* #XF: SIMD Floating-Point Exception */
+#define IDT_IO_INTS NRSVIDT /* Base of IDT entries for I/O interrupts. */
+#define IDT_SYSCALL 0x80 /* System Call Interrupt Vector */
+
+/*
+ * Entries in the Global Descriptor Table (GDT)
+ */
+#define GNULL_SEL 0 /* Null Descriptor */
+#if 0
+#define GCODE_SEL 1 /* Kernel Code Descriptor */
+#define GDATA_SEL 2 /* Kernel Data Descriptor */
+#else
+#define GCODE_SEL (__KERNEL_CS >> 3) /* Kernel Code Descriptor */
+#define GDATA_SEL (__KERNEL_DS >> 3) /* Kernel Data Descriptor */
+#endif
+#define GPRIV_SEL 3 /* SMP Per-Processor Private Data */
+#define GPROC0_SEL 4 /* Task state process slot zero and up */
+#define GLDT_SEL 5 /* LDT - eventually one per process */
+#define GUSERLDT_SEL 6 /* User LDT */
+#define GTGATE_SEL 7 /* Process task switch gate */
+#define GBIOSLOWMEM_SEL 8 /* BIOS low memory access (must be entry 8) */
+#define GPANIC_SEL 9 /* Task state to consider panic from */
+#define GBIOSCODE32_SEL 10 /* BIOS interface (32bit Code) */
+#define GBIOSCODE16_SEL 11 /* BIOS interface (16bit Code) */
+#define GBIOSDATA_SEL 12 /* BIOS interface (Data) */
+#define GBIOSUTIL_SEL 13 /* BIOS interface (Utility) */
+#define GBIOSARGS_SEL 14 /* BIOS interface (Arguments) */
+
+#define NGDT 4
+
+/*
+ * Entries in the Local Descriptor Table (LDT)
+ */
+#define LSYS5CALLS_SEL 0 /* forced by intel BCS */
+#define LSYS5SIGR_SEL 1
+#define L43BSDCALLS_SEL 2 /* notyet */
+#define LUCODE_SEL 3
+#define LSOL26CALLS_SEL 4 /* Solaris >= 2.6 system call gate */
+#define LUDATA_SEL 5
+/* separate stack, es,fs,gs sels ? */
+/* #define LPOSIXCALLS_SEL 5*/ /* notyet */
+#define LBSDICALLS_SEL 16 /* BSDI system call gate */
+#define NLDT (LBSDICALLS_SEL + 1)
+
+#ifdef _KERNEL
+extern int _default_ldt;
+extern union descriptor *gdt;
+extern struct soft_segment_descriptor gdt_segs[];
+extern struct gate_descriptor *idt;
+extern union descriptor *ldt;
+extern struct region_descriptor r_gdt, r_idt;
+
+void lgdt(struct region_descriptor *rdp);
+void lgdt_finish(void);
+void sdtossd(struct segment_descriptor *sdp,
+ struct soft_segment_descriptor *ssdp);
+void ssdtosd(struct soft_segment_descriptor *ssdp,
+ struct segment_descriptor *sdp);
+#endif /* _KERNEL */
+
+#endif /* !_MACHINE_SEGMENTS_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/synch_bitops.h b/freebsd-5.3-xen-sparse/i386-xen/include/synch_bitops.h
new file mode 100644
index 0000000000..31ec3d3468
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/synch_bitops.h
@@ -0,0 +1,82 @@
+#ifndef __XEN_SYNCH_BITOPS_H__
+#define __XEN_SYNCH_BITOPS_H__
+
+/*
+ * Copyright 1992, Linus Torvalds.
+ * Heavily modified to provide guaranteed strong synchronisation
+ * when communicating with Xen or other guest OSes running on other CPUs.
+ */
+
+
+#define ADDR (*(volatile long *) addr)
+
+static __inline__ void synch_set_bit(int nr, volatile void * addr)
+{
+ __asm__ __volatile__ (
+ "lock btsl %1,%0"
+ : "=m" (ADDR) : "Ir" (nr) : "memory" );
+}
+
+static __inline__ void synch_clear_bit(int nr, volatile void * addr)
+{
+ __asm__ __volatile__ (
+ "lock btrl %1,%0"
+ : "=m" (ADDR) : "Ir" (nr) : "memory" );
+}
+
+static __inline__ void synch_change_bit(int nr, volatile void * addr)
+{
+ __asm__ __volatile__ (
+ "lock btcl %1,%0"
+ : "=m" (ADDR) : "Ir" (nr) : "memory" );
+}
+
+static __inline__ int synch_test_and_set_bit(int nr, volatile void * addr)
+{
+ int oldbit;
+ __asm__ __volatile__ (
+ "lock btsl %2,%1\n\tsbbl %0,%0"
+ : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
+ return oldbit;
+}
+
+static __inline__ int synch_test_and_clear_bit(int nr, volatile void * addr)
+{
+ int oldbit;
+ __asm__ __volatile__ (
+ "lock btrl %2,%1\n\tsbbl %0,%0"
+ : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
+ return oldbit;
+}
+
+static __inline__ int synch_test_and_change_bit(int nr, volatile void * addr)
+{
+ int oldbit;
+
+ __asm__ __volatile__ (
+ "lock btcl %2,%1\n\tsbbl %0,%0"
+ : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
+ return oldbit;
+}
+
+static __inline__ int synch_const_test_bit(int nr, const volatile void * addr)
+{
+ return ((1UL << (nr & 31)) &
+ (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
+}
+
+static __inline__ int synch_var_test_bit(int nr, volatile void * addr)
+{
+ int oldbit;
+ __asm__ __volatile__ (
+ "btl %2,%1\n\tsbbl %0,%0"
+ : "=r" (oldbit) : "m" (ADDR), "Ir" (nr) );
+ return oldbit;
+}
+
+#define synch_test_bit(nr,addr) \
+(__builtin_constant_p(nr) ? \
+ synch_const_test_bit((nr),(addr)) : \
+ synch_var_test_bit((nr),(addr)))
+
+#endif /* __XEN_SYNCH_BITOPS_H__ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/trap.h b/freebsd-5.3-xen-sparse/i386-xen/include/trap.h
new file mode 100644
index 0000000000..c61beb90aa
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/trap.h
@@ -0,0 +1,111 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)trap.h 5.4 (Berkeley) 5/9/91
+ * $FreeBSD: src/sys/i386/include/trap.h,v 1.13 2001/07/12 06:32:51 peter Exp $
+ */
+
+#ifndef _MACHINE_TRAP_H_
+#define _MACHINE_TRAP_H_
+
+/*
+ * Trap type values
+ * also known in trap.c for name strings
+ */
+
+#define T_PRIVINFLT 1 /* privileged instruction */
+#define T_BPTFLT 3 /* breakpoint instruction */
+#define T_ARITHTRAP 6 /* arithmetic trap */
+#define T_PROTFLT 9 /* protection fault */
+#define T_TRCTRAP 10 /* debug exception (sic) */
+#define T_PAGEFLT 12 /* page fault */
+#define T_ALIGNFLT 14 /* alignment fault */
+
+#define T_NESTED 16
+#define T_HYPCALLBACK 17 /* hypervisor callback */
+
+
+#define T_DIVIDE 18 /* integer divide fault */
+#define T_NMI 19 /* non-maskable trap */
+#define T_OFLOW 20 /* overflow trap */
+#define T_BOUND 21 /* bound instruction fault */
+#define T_DNA 22 /* device not available fault */
+#define T_DOUBLEFLT 23 /* double fault */
+#define T_FPOPFLT 24 /* fp coprocessor operand fetch fault */
+#define T_TSSFLT 25 /* invalid tss fault */
+#define T_SEGNPFLT 26 /* segment not present fault */
+#define T_STKFLT 27 /* stack fault */
+#define T_MCHK 28 /* machine check trap */
+#define T_XMMFLT 29 /* SIMD floating-point exception */
+#define T_RESERVED 30 /* reserved (unknown) */
+
+/* XXX most of the following codes aren't used, but could be. */
+
+/* definitions for <sys/signal.h> */
+#define ILL_RESAD_FAULT T_RESADFLT
+#define ILL_PRIVIN_FAULT T_PRIVINFLT
+#define ILL_RESOP_FAULT T_RESOPFLT
+#define ILL_ALIGN_FAULT T_ALIGNFLT
+#define ILL_FPOP_FAULT T_FPOPFLT /* coprocessor operand fault */
+
+/* portable macros for SIGFPE/ARITHTRAP */
+#define FPE_INTOVF 1 /* integer overflow */
+#define FPE_INTDIV 2 /* integer divide by zero */
+#define FPE_FLTDIV 3 /* floating point divide by zero */
+#define FPE_FLTOVF 4 /* floating point overflow */
+#define FPE_FLTUND 5 /* floating point underflow */
+#define FPE_FLTRES 6 /* floating point inexact result */
+#define FPE_FLTINV 7 /* invalid floating point operation */
+#define FPE_FLTSUB 8 /* subscript out of range */
+
+/* old FreeBSD macros, deprecated */
+#define FPE_INTOVF_TRAP 0x1 /* integer overflow */
+#define FPE_INTDIV_TRAP 0x2 /* integer divide by zero */
+#define FPE_FLTDIV_TRAP 0x3 /* floating/decimal divide by zero */
+#define FPE_FLTOVF_TRAP 0x4 /* floating overflow */
+#define FPE_FLTUND_TRAP 0x5 /* floating underflow */
+#define FPE_FPU_NP_TRAP 0x6 /* floating point unit not present */
+#define FPE_SUBRNG_TRAP 0x7 /* subrange out of bounds */
+
+/* codes for SIGBUS */
+#define BUS_PAGE_FAULT T_PAGEFLT /* page fault protection base */
+#define BUS_SEGNP_FAULT T_SEGNPFLT /* segment not present */
+#define BUS_STK_FAULT T_STKFLT /* stack segment */
+#define BUS_SEGM_FAULT T_RESERVED /* segment protection base */
+
+/* Trap's coming from user mode */
+#define T_USER 0x100
+
+#endif /* !_MACHINE_TRAP_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/ucontext.h b/freebsd-5.3-xen-sparse/i386-xen/include/ucontext.h
new file mode 100644
index 0000000000..eda584b62e
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/ucontext.h
@@ -0,0 +1,105 @@
+/*-
+ * Copyright (c) 1999 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/include/ucontext.h,v 1.10 2002/12/02 19:58:55 deischen Exp $
+ */
+
+#ifndef _MACHINE_UCONTEXT_H_
+#define _MACHINE_UCONTEXT_H_
+
+typedef struct __mcontext {
+ /*
+ * The first 20 fields must match the definition of
+ * sigcontext. So that we can support sigcontext
+ * and ucontext_t at the same time.
+ */
+ int mc_onstack; /* XXX - sigcontext compat. */
+ int mc_gs; /* machine state (struct trapframe) */
+ int mc_fs;
+ int mc_es;
+ int mc_ds;
+ int mc_edi;
+ int mc_esi;
+ int mc_ebp;
+ int mc_isp;
+ int mc_ebx;
+ int mc_edx;
+ int mc_ecx;
+ int mc_eax;
+ int mc_trapno;
+ int mc_cr2;
+ int mc_err;
+ int mc_eip;
+ int mc_cs;
+ int mc_eflags;
+ int mc_esp;
+ int mc_ss;
+
+ int mc_len; /* sizeof(mcontext_t) */
+#define _MC_FPFMT_NODEV 0x10000 /* device not present or configured */
+#define _MC_FPFMT_387 0x10001
+#define _MC_FPFMT_XMM 0x10002
+ int mc_fpformat;
+#define _MC_FPOWNED_NONE 0x20000 /* FP state not used */
+#define _MC_FPOWNED_FPU 0x20001 /* FP state came from FPU */
+#define _MC_FPOWNED_PCB 0x20002 /* FP state came from PCB */
+ int mc_ownedfp;
+ /*
+ * See <machine/npx.h> for the internals of mc_fpstate[].
+ */
+ int mc_fpstate[128] __aligned(16);
+ int mc_spare2[8];
+} mcontext_t;
+
+#if defined(_KERNEL) && defined(COMPAT_FREEBSD4)
+struct mcontext4 {
+ int mc_onstack; /* XXX - sigcontext compat. */
+ int mc_gs; /* machine state (struct trapframe) */
+ int mc_fs;
+ int mc_es;
+ int mc_ds;
+ int mc_edi;
+ int mc_esi;
+ int mc_ebp;
+ int mc_isp;
+ int mc_ebx;
+ int mc_edx;
+ int mc_ecx;
+ int mc_eax;
+ int mc_trapno;
+ int mc_err;
+ int mc_eip;
+ int mc_cs;
+ int mc_eflags;
+ int mc_esp; /* machine state */
+ int mc_ss;
+ int mc_fpregs[28]; /* env87 + fpacc87 + u_long */
+ int __spare__[17];
+};
+#endif
+
+#endif /* !_MACHINE_UCONTEXT_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/vmparam.h b/freebsd-5.3-xen-sparse/i386-xen/include/vmparam.h
new file mode 100644
index 0000000000..7fa9af3c68
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/vmparam.h
@@ -0,0 +1,141 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ * Copyright (c) 1994 John S. Dyson
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)vmparam.h 5.9 (Berkeley) 5/12/91
+ * $FreeBSD: src/sys/i386/include/vmparam.h,v 1.37 2003/10/01 23:46:08 peter Exp $
+ */
+
+
+#ifndef _MACHINE_VMPARAM_H_
+#define _MACHINE_VMPARAM_H_ 1
+
+/*
+ * Machine dependent constants for 386.
+ */
+
+#define VM_PROT_READ_IS_EXEC /* if you can read -- then you can exec */
+
+/*
+ * Virtual memory related constants, all in bytes
+ */
+#define MAXTSIZ (128UL*1024*1024) /* max text size */
+#ifndef DFLDSIZ
+#define DFLDSIZ (128UL*1024*1024) /* initial data size limit */
+#endif
+#ifndef MAXDSIZ
+#define MAXDSIZ (512UL*1024*1024) /* max data size */
+#endif
+#ifndef DFLSSIZ
+#define DFLSSIZ (8UL*1024*1024) /* initial stack size limit */
+#endif
+#ifndef MAXSSIZ
+#define MAXSSIZ (64UL*1024*1024) /* max stack size */
+#endif
+#ifndef SGROWSIZ
+#define SGROWSIZ (128UL*1024) /* amount to grow stack */
+#endif
+
+#define USRTEXT (1*PAGE_SIZE) /* base of user text XXX bogus */
+
+/*
+ * The time for a process to be blocked before being very swappable.
+ * This is a number of seconds which the system takes as being a non-trivial
+ * amount of real time. You probably shouldn't change this;
+ * it is used in subtle ways (fractions and multiples of it are, that is, like
+ * half of a ``long time'', almost a long time, etc.)
+ * It is related to human patience and other factors which don't really
+ * change over time.
+ */
+#define MAXSLP 20
+
+
+/*
+ * Kernel physical load address.
+ */
+#ifndef KERNLOAD
+#define KERNLOAD (1 << PDRSHIFT)
+#endif
+
+/*
+ * Virtual addresses of things. Derived from the page directory and
+ * page table indexes from pmap.h for precision.
+ * Because of the page that is both a PD and PT, it looks a little
+ * messy at times, but hey, we'll do anything to save a page :-)
+ */
+
+#define VM_MAX_KERNEL_ADDRESS VADDR(KPTDI+NKPDE-1, NPTEPG-1)
+#define VM_MIN_KERNEL_ADDRESS VADDR(PTDPTDI, PTDPTDI)
+
+#define KERNBASE VADDR(KPTDI, 0)
+
+#define UPT_MAX_ADDRESS VADDR(PTDPTDI, PTDPTDI)
+#define UPT_MIN_ADDRESS VADDR(PTDPTDI, 0)
+
+#define VM_MAXUSER_ADDRESS VADDR(PTDPTDI-1, 0)
+
+#define USRSTACK VM_MAXUSER_ADDRESS
+
+#define VM_MAX_ADDRESS VADDR(PTDPTDI, PTDPTDI)
+#define VM_MIN_ADDRESS ((vm_offset_t)0)
+
+/* virtual sizes (bytes) for various kernel submaps */
+#ifndef VM_KMEM_SIZE
+#define VM_KMEM_SIZE (12 * 1024 * 1024)
+#endif
+
+/*
+ * How many physical pages per KVA page allocated.
+ * min(max(VM_KMEM_SIZE, Physical memory/VM_KMEM_SIZE_SCALE), VM_KMEM_SIZE_MAX)
+ * is the total KVA space allocated for kmem_map.
+ */
+#ifndef VM_KMEM_SIZE_SCALE
+#define VM_KMEM_SIZE_SCALE (3)
+#endif
+
+/*
+ * Ceiling on amount of kmem_map kva space.
+ */
+#ifndef VM_KMEM_SIZE_MAX
+#define VM_KMEM_SIZE_MAX (320 * 1024 * 1024)
+#endif
+
+/* initial pagein size of beginning of executable file */
+#ifndef VM_INITIAL_PAGEIN
+#define VM_INITIAL_PAGEIN 16
+#endif
+
+#endif /* _MACHINE_VMPARAM_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/xen-os.h b/freebsd-5.3-xen-sparse/i386-xen/include/xen-os.h
new file mode 100644
index 0000000000..e483fc535c
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/xen-os.h
@@ -0,0 +1,293 @@
+/******************************************************************************
+ * os.h
+ *
+ * random collection of macros and definition
+ */
+
+#ifndef _OS_H_
+#define _OS_H_
+
+#ifndef NULL
+#define NULL (void *)0
+#endif
+
+/* Somewhere in the middle of the GCC 2.96 development cycle, we implemented
+ a mechanism by which the user can annotate likely branch directions and
+ expect the blocks to be reordered appropriately. Define __builtin_expect
+ to nothing for earlier compilers. */
+
+#if __GNUC__ == 2 && __GNUC_MINOR__ < 96
+#define __builtin_expect(x, expected_value) (x)
+#endif
+
+
+
+/*
+ * These are the segment descriptors provided for us by the hypervisor.
+ * For now, these are hardwired -- guest OSes cannot update the GDT
+ * or LDT.
+ *
+ * It shouldn't be hard to support descriptor-table frobbing -- let me
+ * know if the BSD or XP ports require flexibility here.
+ */
+
+
+/*
+ * these are also defined in hypervisor-if.h but can't be pulled in as
+ * they are used in start of day assembly. Need to clean up the .h files
+ * a bit more...
+ */
+
+#ifndef FLAT_RING1_CS
+#define FLAT_RING1_CS 0x0819
+#define FLAT_RING1_DS 0x0821
+#define FLAT_RING3_CS 0x082b
+#define FLAT_RING3_DS 0x0833
+#endif
+
+#define __KERNEL_CS FLAT_RING1_CS
+#define __KERNEL_DS FLAT_RING1_DS
+
+/* Everything below this point is not included by assembler (.S) files. */
+#ifndef __ASSEMBLY__
+#include <sys/types.h>
+
+#include <machine/hypervisor-ifs.h>
+void printk(const char *fmt, ...);
+
+/* some function prototypes */
+void trap_init(void);
+
+
+/*
+ * STI/CLI equivalents. These basically set and clear the virtual
+ * event_enable flag in teh shared_info structure. Note that when
+ * the enable bit is set, there may be pending events to be handled.
+ * We may therefore call into do_hypervisor_callback() directly.
+ */
+#define likely(x) __builtin_expect((x),1)
+#define unlikely(x) __builtin_expect((x),0)
+
+#define __cli() \
+do { \
+ HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1; \
+ barrier(); \
+} while (0)
+
+#define __sti() \
+do { \
+ shared_info_t *_shared = HYPERVISOR_shared_info; \
+ barrier(); \
+ _shared->vcpu_data[0].evtchn_upcall_mask = 0; \
+ barrier(); /* unmask then check (avoid races) */ \
+ if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \
+ force_evtchn_callback(); \
+} while (0)
+
+#define __save_flags(x) \
+do { \
+ (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask; \
+} while (0)
+
+#define __restore_flags(x) \
+do { \
+ shared_info_t *_shared = HYPERVISOR_shared_info; \
+ barrier(); \
+ if ( (_shared->vcpu_data[0].evtchn_upcall_mask = (x)) == 0 ) { \
+ barrier(); /* unmask then check (avoid races) */ \
+ if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \
+ force_evtchn_callback(); \
+ } \
+} while (0)
+
+#define __save_and_cli(x) \
+do { \
+ (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask; \
+ HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1; \
+ barrier(); \
+} while (0)
+
+#define __save_and_sti(x) \
+do { \
+ shared_info_t *_shared = HYPERVISOR_shared_info; \
+ barrier(); \
+ (x) = _shared->vcpu_data[0].evtchn_upcall_mask; \
+ _shared->vcpu_data[0].evtchn_upcall_mask = 0; \
+ barrier(); /* unmask then check (avoid races) */ \
+ if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \
+ force_evtchn_callback(); \
+} while (0)
+
+#ifdef SMP
+/* extra macros need for the SMP case */
+#error "global_irq_* not defined"
+#endif
+
+#define cli() __cli()
+#define sti() __sti()
+#define save_flags(x) __save_flags(x)
+#define restore_flags(x) __restore_flags(x)
+#define save_and_cli(x) __save_and_cli(x)
+#define save_and_sti(x) __save_and_sti(x)
+
+#define local_irq_save(x) __save_and_cli(x)
+#define local_irq_set(x) __save_and_sti(x)
+#define local_irq_restore(x) __restore_flags(x)
+#define local_irq_disable() __cli()
+#define local_irq_enable() __sti()
+
+#define mtx_lock_irqsave(lock, x) {local_irq_save((x)); mtx_lock_spin((lock));}
+#define mtx_unlock_irqrestore(lock, x) {mtx_unlock_spin((lock)); local_irq_restore((x)); }
+
+#define mb()
+#define rmb()
+#define smp_mb()
+#define wmb()
+
+
+
+/* This is a barrier for the compiler only, NOT the processor! */
+#define barrier() __asm__ __volatile__("": : :"memory")
+
+#define LOCK_PREFIX ""
+#define LOCK ""
+#define ADDR (*(volatile long *) addr)
+/*
+ * Make sure gcc doesn't try to be clever and move things around
+ * on us. We need to use _exactly_ the address the user gave us,
+ * not some alias that contains the same information.
+ */
+typedef struct { volatile int counter; } atomic_t;
+
+
+
+#define xen_xchg(ptr,v) \
+ ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
+struct __xchg_dummy { unsigned long a[100]; };
+#define __xg(x) ((volatile struct __xchg_dummy *)(x))
+static __inline unsigned long __xchg(unsigned long x, volatile void * ptr,
+ int size)
+{
+ switch (size) {
+ case 1:
+ __asm__ __volatile__("xchgb %b0,%1"
+ :"=q" (x)
+ :"m" (*__xg(ptr)), "0" (x)
+ :"memory");
+ break;
+ case 2:
+ __asm__ __volatile__("xchgw %w0,%1"
+ :"=r" (x)
+ :"m" (*__xg(ptr)), "0" (x)
+ :"memory");
+ break;
+ case 4:
+ __asm__ __volatile__("xchgl %0,%1"
+ :"=r" (x)
+ :"m" (*__xg(ptr)), "0" (x)
+ :"memory");
+ break;
+ }
+ return x;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_clear_bit(int nr, volatile void * addr)
+{
+ int oldbit;
+
+ __asm__ __volatile__( LOCK_PREFIX
+ "btrl %2,%1\n\tsbbl %0,%0"
+ :"=r" (oldbit),"=m" (ADDR)
+ :"Ir" (nr) : "memory");
+ return oldbit;
+}
+
+static __inline__ int constant_test_bit(int nr, const volatile void * addr)
+{
+ return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
+}
+
+static __inline__ int variable_test_bit(int nr, volatile void * addr)
+{
+ int oldbit;
+
+ __asm__ __volatile__(
+ "btl %2,%1\n\tsbbl %0,%0"
+ :"=r" (oldbit)
+ :"m" (ADDR),"Ir" (nr));
+ return oldbit;
+}
+
+#define test_bit(nr,addr) \
+(__builtin_constant_p(nr) ? \
+ constant_test_bit((nr),(addr)) : \
+ variable_test_bit((nr),(addr)))
+
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered. See __set_bit()
+ * if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static __inline__ void set_bit(int nr, volatile void * addr)
+{
+ __asm__ __volatile__( LOCK_PREFIX
+ "btsl %1,%0"
+ :"=m" (ADDR)
+ :"Ir" (nr));
+}
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered. However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static __inline__ void clear_bit(int nr, volatile void * addr)
+{
+ __asm__ __volatile__( LOCK_PREFIX
+ "btrl %1,%0"
+ :"=m" (ADDR)
+ :"Ir" (nr));
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1. Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ void atomic_inc(atomic_t *v)
+{
+ __asm__ __volatile__(
+ LOCK "incl %0"
+ :"=m" (v->counter)
+ :"m" (v->counter));
+}
+
+
+#define rdtscll(val) \
+ __asm__ __volatile__("rdtsc" : "=A" (val))
+
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _OS_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/xen_intr.h b/freebsd-5.3-xen-sparse/i386-xen/include/xen_intr.h
new file mode 100644
index 0000000000..e35eafa5d2
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/xen_intr.h
@@ -0,0 +1,50 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- */
+#ifndef _XEN_INTR_H_
+#define _XEN_INTR_H_
+
+/*
+* The flat IRQ space is divided into two regions:
+* 1. A one-to-one mapping of real physical IRQs. This space is only used
+* if we have physical device-access privilege. This region is at the
+* start of the IRQ space so that existing device drivers do not need
+* to be modified to translate physical IRQ numbers into our IRQ space.
+* 3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These
+* are bound using the provided bind/unbind functions.
+*/
+
+#define PIRQ_BASE 0
+#define NR_PIRQS 128
+
+#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS)
+#define NR_DYNIRQS 128
+
+#define NR_IRQS (NR_PIRQS + NR_DYNIRQS)
+
+#define pirq_to_irq(_x) ((_x) + PIRQ_BASE)
+#define irq_to_pirq(_x) ((_x) - PIRQ_BASE)
+
+#define dynirq_to_irq(_x) ((_x) + DYNIRQ_BASE)
+#define irq_to_dynirq(_x) ((_x) - DYNIRQ_BASE)
+
+/* Dynamic binding of event channels and VIRQ sources to Linux IRQ space. */
+extern int bind_virq_to_irq(int virq);
+extern void unbind_virq_from_irq(int virq);
+extern int bind_evtchn_to_irq(int evtchn);
+extern void unbind_evtchn_from_irq(int evtchn);
+
+static __inline__ int irq_cannonicalize(int irq)
+{
+ return (irq == 2) ? 9 : irq;
+}
+
+extern void disable_irq(unsigned int);
+extern void disable_irq_nosync(unsigned int);
+extern void enable_irq(unsigned int);
+
+extern void irq_suspend(void);
+extern void irq_resume(void);
+
+extern void idle_block(void);
+
+
+#endif /* _XEN_INTR_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h b/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h
new file mode 100644
index 0000000000..93ffd7853a
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h
@@ -0,0 +1,85 @@
+/* $NetBSD:$ */
+
+/*
+ *
+ * Copyright (c) 2004 Christian Limpach.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Christian Limpach.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef _XEN_XENFUNC_H_
+#define _XEN_XENFUNC_H_
+
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/xenpmap.h>
+#include <machine/segments.h>
+#include <sys/pcpu.h>
+#define BKPT __asm__("int3");
+#define XPQ_CALL_DEPTH 5
+#define XPQ_CALL_COUNT 2
+#define PG_PRIV PG_AVAIL3
+typedef struct {
+ unsigned long pt_ref;
+ unsigned long pt_eip[XPQ_CALL_COUNT][XPQ_CALL_DEPTH];
+} pteinfo_t;
+
+extern pteinfo_t *pteinfo_list;
+#ifdef XENDEBUG_LOW
+#define __PRINTK(x) printk x
+#else
+#define __PRINTK(x)
+#endif
+
+char *xen_setbootenv(char *cmd_line);
+int xen_boothowto(char *envp);
+void load_cr3(uint32_t val);
+void xen_set_ldt(vm_offset_t, uint32_t);
+void xen_machphys_update(unsigned long, unsigned long);
+void xen_update_descriptor(union descriptor *, union descriptor *);
+void lldt(u_short sel);
+/*
+ * Invalidate a patricular VA on all cpus
+ *
+ * N.B. Made these global for external loadable modules to reference.
+ */
+static __inline void
+invlpg(u_int addr)
+{
+ xpq_queue_invlpg(addr);
+}
+
+static __inline void
+invltlb(void)
+{
+ xpq_queue_tlb_flush();
+ mcl_flush_queue();
+}
+
+
+#endif /* _XEN_XENFUNC_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/xenpmap.h b/freebsd-5.3-xen-sparse/i386-xen/include/xenpmap.h
new file mode 100644
index 0000000000..f445096228
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/xenpmap.h
@@ -0,0 +1,132 @@
+/* $NetBSD:$ */
+
+/*
+ *
+ * Copyright (c) 2004 Christian Limpach.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Christian Limpach.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef _XEN_XENPMAP_H_
+#define _XEN_XENPMAP_H_
+#include <machine/xenvar.h>
+void xpq_physbcopy(const unsigned long *, unsigned long, size_t);
+void xpq_queue_invlpg(vm_offset_t);
+void xpq_queue_pt_update(pt_entry_t *, pt_entry_t);
+void xpq_queue_pt_switch(uint32_t);
+void xpq_queue_set_ldt(vm_offset_t, uint32_t);
+void xpq_queue_tlb_flush(void);
+void xpq_queue_pin_table(uint32_t, int);
+void xpq_queue_unpin_table(uint32_t);
+void xpq_record(unsigned long, unsigned long);
+void mcl_queue_pt_update(vm_offset_t, vm_offset_t);
+void mcl_flush_queue(void);
+void pmap_ref(pt_entry_t *pte, unsigned long ma);
+
+
+#ifdef PMAP_DEBUG
+#define PMAP_REF pmap_ref
+#define PMAP_DEC_REF_PAGE pmap_dec_ref_page
+#define PMAP_MARK_PRIV pmap_mark_privileged
+#define PMAP_MARK_UNPRIV pmap_mark_unprivileged
+#else
+#define PMAP_MARK_PRIV(a)
+#define PMAP_MARK_UNPRIV(a)
+#define PMAP_REF(a, b)
+#define PMAP_DEC_REF_PAGE(a)
+#endif
+
+#define ALWAYS_SYNC 0
+
+#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */
+
+#define XPQ_PIN_L1_TABLE 1
+#define XPQ_PIN_L2_TABLE 2
+
+#define PT_GET(_ptp) \
+ (pmap_valid_entry(*(_ptp)) ? xpmap_mtop(*(_ptp)) : *(_ptp))
+#define PT_SET_VA(_ptp,_npte,sync) do { \
+ PMAP_REF((_ptp), xpmap_ptom(_npte)); \
+ xpq_queue_pt_update((pt_entry_t *)vtomach((_ptp)), \
+ xpmap_ptom((_npte))); \
+ if (sync || ALWAYS_SYNC) \
+ mcl_flush_queue(); \
+} while (/*CONSTCOND*/0)
+#define PT_SET_VA_MA(_ptp,_npte,sync) do { \
+ PMAP_REF((_ptp), (_npte)); \
+ xpq_queue_pt_update((pt_entry_t *)vtomach((_ptp)), (_npte)); \
+ if (sync || ALWAYS_SYNC) \
+ mcl_flush_queue(); \
+} while (/*CONSTCOND*/0)
+#define PT_CLEAR_VA(_ptp, sync) do { \
+ PMAP_REF((pt_entry_t *)(_ptp), 0); \
+ xpq_queue_pt_update((pt_entry_t *)vtomach(_ptp), 0); \
+ if (sync || ALWAYS_SYNC) \
+ mcl_flush_queue(); \
+} while (/*CONSTCOND*/0)
+#define PT_CLEAR(_ptp, sync) do { \
+ PMAP_REF((pt_entry_t *)(vtopte(_ptp)), 0); \
+ mcl_queue_pt_update((unsigned long)_ptp, 0); \
+ if (sync || ALWAYS_SYNC) \
+ mcl_flush_queue(); \
+} while (/*CONSTCOND*/0)
+#define PT_SET_MA(_va,_ma,sync) do { \
+ PMAP_REF(vtopte((unsigned long)_va), (_ma)); \
+ mcl_queue_pt_update((vm_offset_t )(_va), (_ma)); \
+ if (sync || ALWAYS_SYNC) \
+ mcl_flush_queue(); \
+} while (/*CONSTCOND*/0)
+#define PT_SET(_va,_pa,sync) do { \
+ PMAP_REF((pt_entry_t *)(vtopte(_va)), xpmap_ptom(_pa)); \
+ mcl_queue_pt_update((vm_offset_t)(_va), \
+ xpmap_ptom((_pa))); \
+ if (sync || ALWAYS_SYNC) \
+ mcl_flush_queue(); \
+} while (/*CONSTCOND*/0)
+
+
+
+#define PT_UPDATES_FLUSH() do { \
+ mcl_flush_queue(); \
+} while (/*CONSTCOND*/0)
+
+
+static __inline uint32_t
+xpmap_mtop(uint32_t mpa)
+{
+ return (((xen_machine_phys[(mpa >> PAGE_SHIFT)]) << PAGE_SHIFT)
+ | (mpa & ~PG_FRAME));
+}
+
+static __inline vm_paddr_t
+xpmap_ptom(uint32_t ppa)
+{
+ return phystomach(ppa) | (ppa & ~PG_FRAME);
+}
+
+#endif /* _XEN_XENPMAP_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/xenvar.h b/freebsd-5.3-xen-sparse/i386-xen/include/xenvar.h
new file mode 100644
index 0000000000..5a3d3acb0b
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/xenvar.h
@@ -0,0 +1,30 @@
+#ifndef XENVAR_H_
+#define XENVAR_H_
+
+#define XBOOTUP 0x1
+#define XPMAP 0x2
+extern int xendebug_flags;
+#ifndef NOXENDEBUG
+#define XENPRINTF printk
+#else
+#define XENPRINTF(x...)
+#endif
+extern unsigned long *xen_phys_machine;
+#define TRACE_ENTER XENPRINTF("(file=%s, line=%d) entered %s\n", __FILE__, __LINE__, __FUNCTION__)
+#define TRACE_EXIT XENPRINTF("(file=%s, line=%d) exiting %s\n", __FILE__, __LINE__, __FUNCTION__)
+#define TRACE_DEBUG(argflags, _f, _a...) \
+if (xendebug_flags & argflags) XENPRINTF("(file=%s, line=%d) " _f "\n", __FILE__, __LINE__, ## _a);
+
+extern unsigned long *xen_machine_phys;
+#define PTOM(i) (((unsigned long *)xen_phys_machine)[i])
+#define phystomach(pa) ((((unsigned long *)xen_phys_machine)[(pa >> PAGE_SHIFT)]) << PAGE_SHIFT)
+void xpq_init(void);
+
+struct sockaddr_in;
+
+int xen_setnfshandle(void);
+int setinaddr(struct sockaddr_in *addr, char *ipstr);
+
+#define RB_GDB_PAUSE RB_RESERVED1
+
+#endif
diff --git a/freebsd-5.3-xen-sparse/i386-xen/xen/blkfront/xb_blkfront.c b/freebsd-5.3-xen-sparse/i386-xen/xen/blkfront/xb_blkfront.c
new file mode 100644
index 0000000000..66c80f3ece
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/xen/blkfront/xb_blkfront.c
@@ -0,0 +1,925 @@
+/*-
+ * All rights reserved.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/*
+ * XenoBSD block device driver
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <sys/bio.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+
+#include <machine/bus.h>
+#include <sys/rman.h>
+#include <machine/resource.h>
+#include <machine/intr_machdep.h>
+#include <machine/vmparam.h>
+
+#include <machine/hypervisor.h>
+#include <machine/hypervisor-ifs.h>
+#include <machine/xen-os.h>
+#include <machine/xen_intr.h>
+#include <machine/evtchn.h>
+
+#include <geom/geom_disk.h>
+#include <machine/ctrl_if.h>
+#include <machine/xenfunc.h>
+
+/* prototypes */
+struct xb_softc;
+static void xb_startio(struct xb_softc *sc);
+static void xb_vbdinit(void);
+static void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
+static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id);
+
+struct xb_softc {
+ device_t xb_dev;
+ struct disk xb_disk; /* disk params */
+ struct bio_queue_head xb_bioq; /* sort queue */
+ struct resource *xb_irq;
+ void *xb_resp_handler;
+ int xb_unit;
+ int xb_flags;
+#define XB_OPEN (1<<0) /* drive is open (can't shut down) */
+};
+
+/* Control whether runtime update of vbds is enabled. */
+#define ENABLE_VBD_UPDATE 1
+
+#if ENABLE_VBD_UPDATE
+static void vbd_update(void);
+#else
+static void vbd_update(void){};
+#endif
+
+#define BLKIF_STATE_CLOSED 0
+#define BLKIF_STATE_DISCONNECTED 1
+#define BLKIF_STATE_CONNECTED 2
+
+static char *blkif_state_name[] = {
+ [BLKIF_STATE_CLOSED] = "closed",
+ [BLKIF_STATE_DISCONNECTED] = "disconnected",
+ [BLKIF_STATE_CONNECTED] = "connected",
+};
+
+static char * blkif_status_name[] = {
+ [BLKIF_INTERFACE_STATUS_CLOSED] = "closed",
+ [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
+ [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected",
+ [BLKIF_INTERFACE_STATUS_CHANGED] = "changed",
+};
+
+#define WPRINTK(fmt, args...) printk("[XEN] " fmt, ##args)
+
+static int blkif_handle;
+static unsigned int blkif_state = BLKIF_STATE_CLOSED;
+static unsigned int blkif_evtchn;
+static unsigned int blkif_irq;
+
+static int blkif_control_rsp_valid;
+static blkif_response_t blkif_control_rsp;
+
+static unsigned long xb_rec_ring_free;
+blkif_request_t xb_rec_ring[BLKIF_RING_SIZE]; /* shadow recovery ring */
+
+/* XXX move to xb_vbd.c when VBD update support is added */
+#define MAX_VBDS 64
+static vdisk_t xb_diskinfo[MAX_VBDS];
+static int xb_ndisks;
+
+#define XBD_SECTOR_SIZE 512 /* XXX: assume for now */
+#define XBD_SECTOR_SHFT 9
+
+static unsigned int xb_kick_pending;
+
+static struct mtx blkif_io_lock;
+
+static blkif_ring_t *xb_blk_ring;
+static BLKIF_RING_IDX xb_resp_cons; /* Response consumer for comms ring. */
+static BLKIF_RING_IDX xb_req_prod; /* Private request producer */
+
+static int xb_recovery = 0; /* "Recovery in progress" flag. Protected
+ * by the blkif_io_lock */
+
+/* We plug the I/O ring if the driver is suspended or if the ring is full. */
+#define BLKIF_RING_FULL (((xb_req_prod - xb_resp_cons) == BLKIF_RING_SIZE) || \
+ (blkif_state != BLKIF_STATE_CONNECTED))
+
+void blkif_completion(blkif_request_t *req);
+void xb_response_intr(void *);
+
+/* XXX: This isn't supported in FreeBSD, so ignore it for now. */
+#define TASK_UNINTERRUPTIBLE 0
+
+static inline int
+GET_ID_FROM_FREELIST( void )
+{
+ unsigned long free = xb_rec_ring_free;
+
+ KASSERT(free <= BLKIF_RING_SIZE, ("free %lu > BLKIF_RING_SIZE", free));
+
+ xb_rec_ring_free = xb_rec_ring[free].id;
+
+ xb_rec_ring[free].id = 0x0fffffee; /* debug */
+
+ return free;
+}
+
+static inline void
+ADD_ID_TO_FREELIST( unsigned long id )
+{
+ xb_rec_ring[id].id = xb_rec_ring_free;
+ xb_rec_ring_free = id;
+}
+
+static inline void translate_req_to_pfn(blkif_request_t *xreq,
+ blkif_request_t *req)
+{
+ int i;
+
+ xreq->operation = req->operation;
+ xreq->nr_segments = req->nr_segments;
+ xreq->device = req->device;
+ /* preserve id */
+ xreq->sector_number = req->sector_number;
+
+ for ( i = 0; i < req->nr_segments; i++ ){
+ xreq->frame_and_sects[i] = xpmap_mtop(req->frame_and_sects[i]);
+ }
+}
+
+static inline void translate_req_to_mfn(blkif_request_t *xreq,
+ blkif_request_t *req)
+{
+ int i;
+
+ xreq->operation = req->operation;
+ xreq->nr_segments = req->nr_segments;
+ xreq->device = req->device;
+ xreq->id = req->id; /* copy id (unlike above) */
+ xreq->sector_number = req->sector_number;
+
+ for ( i = 0; i < req->nr_segments; i++ ){
+ xreq->frame_and_sects[i] = xpmap_ptom(req->frame_and_sects[i]);
+ }
+}
+
+
+static inline void flush_requests(void)
+{
+ xb_blk_ring->req_prod = xb_req_prod;
+ notify_via_evtchn(blkif_evtchn);
+}
+
+
+#if ENABLE_VBD_UPDATE
+static void vbd_update()
+{
+ XENPRINTF(">\n");
+ XENPRINTF("<\n");
+}
+#endif /* ENABLE_VBD_UPDATE */
+
+void
+xb_response_intr(void *xsc)
+{
+ struct xb_softc *sc = NULL;
+ struct bio *bp;
+ blkif_response_t *bret;
+ BLKIF_RING_IDX i, rp;
+ unsigned long flags;
+
+ if (blkif_state == BLKIF_STATE_CLOSED)
+ return;
+
+ mtx_lock_irqsave(&blkif_io_lock, flags);
+
+ if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) ||
+ unlikely(xb_recovery) ) {
+ mtx_unlock_irqrestore(&blkif_io_lock, flags);
+ return;
+ }
+
+ rp = xb_blk_ring->resp_prod;
+ rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+ /* sometimes we seem to lose i/o. stay in the interrupt handler while
+ * there is stuff to process: continually recheck the response producer.
+ */
+ for ( i = xb_resp_cons; i != (rp = xb_blk_ring->resp_prod); i++ ) {
+ unsigned long id;
+ bret = &xb_blk_ring->ring[MASK_BLKIF_IDX(i)].resp;
+
+ id = bret->id;
+ bp = (struct bio *)xb_rec_ring[id].id;
+
+ blkif_completion(&xb_rec_ring[id]);
+
+ ADD_ID_TO_FREELIST(id); /* overwrites req */
+
+ switch ( bret->operation ) {
+ case BLKIF_OP_READ:
+ /* had an unaligned buffer that needs to be copied */
+ if (bp->bio_driver1)
+ bcopy(bp->bio_data, bp->bio_driver1, bp->bio_bcount);
+ case BLKIF_OP_WRITE:
+
+ /* free the copy buffer */
+ if (bp->bio_driver1) {
+ free(bp->bio_data, M_DEVBUF);
+ bp->bio_data = bp->bio_driver1;
+ bp->bio_driver1 = NULL;
+ }
+
+ if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) {
+ XENPRINTF("Bad return from blkdev data request: %x\n",
+ bret->status);
+ bp->bio_flags |= BIO_ERROR;
+ }
+
+ sc = (struct xb_softc *)bp->bio_disk->d_drv1;
+
+ if (bp->bio_flags & BIO_ERROR)
+ bp->bio_error = EIO;
+ else
+ bp->bio_resid = 0;
+
+ biodone(bp);
+ break;
+ case BLKIF_OP_PROBE:
+ memcpy(&blkif_control_rsp, bret, sizeof(*bret));
+ blkif_control_rsp_valid = 1;
+ break;
+ default:
+ panic("received invalid operation");
+ break;
+ }
+ }
+
+ xb_resp_cons = i;
+
+ if (sc && xb_kick_pending) {
+ xb_kick_pending = FALSE;
+ xb_startio(sc);
+ }
+
+ mtx_unlock_irqrestore(&blkif_io_lock, flags);
+}
+
+static int
+xb_open(struct disk *dp)
+{
+ struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
+
+ if (sc == NULL) {
+ printk("xb%d: not found", sc->xb_unit);
+ return (ENXIO);
+ }
+
+ /* block dev not active */
+ if (blkif_state != BLKIF_STATE_CONNECTED) {
+ printk("xb%d: bad state: %dn", sc->xb_unit, blkif_state);
+ return(ENXIO);
+ }
+
+ sc->xb_flags |= XB_OPEN;
+ return (0);
+}
+
+static int
+xb_close(struct disk *dp)
+{
+ struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
+
+ if (sc == NULL)
+ return (ENXIO);
+ sc->xb_flags &= ~XB_OPEN;
+ return (0);
+}
+
+static int
+xb_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
+{
+ struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
+
+ TRACE_ENTER;
+
+ if (sc == NULL)
+ return (ENXIO);
+
+ return (ENOTTY);
+}
+
+/*
+ * Dequeue buffers and place them in the shared communication ring.
+ * Return when no more requests can be accepted or all buffers have
+ * been queued.
+ *
+ * Signal XEN once the ring has been filled out.
+ */
+static void
+xb_startio(struct xb_softc *sc)
+{
+ struct bio *bp;
+ unsigned long buffer_ma;
+ blkif_request_t *req;
+ int s, queued = 0;
+ unsigned long id;
+ unsigned int fsect, lsect;
+
+
+ if (unlikely(blkif_state != BLKIF_STATE_CONNECTED))
+ return;
+
+ s = splbio();
+
+ for (bp = bioq_first(&sc->xb_bioq);
+ bp && !BLKIF_RING_FULL;
+ xb_req_prod++, queued++, bp = bioq_first(&sc->xb_bioq)) {
+
+ /* Check if the buffer is properly aligned */
+ if ((vm_offset_t)bp->bio_data & PAGE_MASK) {
+ int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE :
+ PAGE_SIZE;
+ caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF,
+ M_WAITOK);
+ caddr_t alignbuf = (char *)roundup2((u_long)newbuf, align);
+
+ /* save a copy of the current buffer */
+ bp->bio_driver1 = bp->bio_data;
+
+ /* Copy the data for a write */
+ if (bp->bio_cmd == BIO_WRITE)
+ bcopy(bp->bio_data, alignbuf, bp->bio_bcount);
+ bp->bio_data = alignbuf;
+ }
+
+ bioq_remove(&sc->xb_bioq, bp);
+ buffer_ma = vtomach(bp->bio_data);
+ fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
+ lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1;
+
+ KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0,
+ ("XEN buffer must be sector aligned"));
+ KASSERT(lsect <= 7,
+ ("XEN disk driver data cannot cross a page boundary"));
+
+ buffer_ma &= ~PAGE_MASK;
+
+ /* Fill out a communications ring structure. */
+ req = &xb_blk_ring->ring[MASK_BLKIF_IDX(xb_req_prod)].req;
+ id = GET_ID_FROM_FREELIST();
+ xb_rec_ring[id].id= (unsigned long)bp;
+
+ req->id = id;
+ req->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
+ BLKIF_OP_WRITE;
+
+ req->sector_number= (blkif_sector_t)bp->bio_pblkno;
+ req->device = xb_diskinfo[sc->xb_unit].device;
+
+ req->nr_segments = 1; /* not doing scatter/gather since buffer
+ * chaining is not supported.
+ */
+ /*
+ * upper bits represent the machine address of the buffer and the
+ * lower bits is the number of sectors to be read/written.
+ */
+ req->frame_and_sects[0] = buffer_ma | (fsect << 3) | lsect;
+
+ /* Keep a private copy so we can reissue requests when recovering. */
+ translate_req_to_pfn( &xb_rec_ring[id], req);
+
+ }
+
+ if (BLKIF_RING_FULL)
+ xb_kick_pending = TRUE;
+
+ if (queued != 0)
+ flush_requests();
+ splx(s);
+}
+
+/*
+ * Read/write routine for a buffer. Finds the proper unit, place it on
+ * the sortq and kick the controller.
+ */
+static void
+xb_strategy(struct bio *bp)
+{
+ struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1;
+ int s;
+
+ /* bogus disk? */
+ if (sc == NULL) {
+ bp->bio_error = EINVAL;
+ bp->bio_flags |= BIO_ERROR;
+ goto bad;
+ }
+
+ s = splbio();
+ /*
+ * Place it in the queue of disk activities for this disk
+ */
+ bioq_disksort(&sc->xb_bioq, bp);
+ splx(s);
+
+ xb_startio(sc);
+ return;
+
+ bad:
+ /*
+ * Correctly set the bio to indicate a failed tranfer.
+ */
+ bp->bio_resid = bp->bio_bcount;
+ biodone(bp);
+ return;
+}
+
+
+static int
+xb_create(int unit)
+{
+ struct xb_softc *sc;
+ int error = 0;
+
+ sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK);
+ sc->xb_unit = unit;
+
+ memset(&sc->xb_disk, 0, sizeof(sc->xb_disk));
+ sc->xb_disk.d_unit = unit;
+ sc->xb_disk.d_open = xb_open;
+ sc->xb_disk.d_close = xb_close;
+ sc->xb_disk.d_ioctl = xb_ioctl;
+ sc->xb_disk.d_strategy = xb_strategy;
+ sc->xb_disk.d_name = "xbd";
+ sc->xb_disk.d_drv1 = sc;
+ sc->xb_disk.d_sectorsize = XBD_SECTOR_SIZE;
+ sc->xb_disk.d_mediasize = xb_diskinfo[sc->xb_unit].capacity
+ << XBD_SECTOR_SHFT;
+#if 0
+ sc->xb_disk.d_maxsize = DFLTPHYS;
+#else /* XXX: xen can't handle large single i/o requests */
+ sc->xb_disk.d_maxsize = 4096;
+#endif
+
+ XENPRINTF("attaching device 0x%x unit %d capacity %llu\n",
+ xb_diskinfo[sc->xb_unit].device, sc->xb_unit,
+ sc->xb_disk.d_mediasize);
+
+ disk_create(&sc->xb_disk, DISK_VERSION_00);
+ bioq_init(&sc->xb_bioq);
+
+ return error;
+}
+
+/* XXX move to xb_vbd.c when vbd update support is added */
+static void
+xb_vbdinit(void)
+{
+ int i;
+ blkif_request_t req;
+ blkif_response_t rsp;
+ vdisk_t *buf;
+
+ TRACE_ENTER;
+
+ buf = (vdisk_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
+
+ /* Probe for disk information. */
+ memset(&req, 0, sizeof(req));
+ req.operation = BLKIF_OP_PROBE;
+ req.nr_segments = 1;
+ req.frame_and_sects[0] = vtomach(buf) | 7;
+ blkif_control_send(&req, &rsp);
+
+ if ( rsp.status <= 0 ) {
+ printk("xb_identify: Could not identify disks (%d)\n", rsp.status);
+ free(buf, M_DEVBUF);
+ return;
+ }
+
+ if ((xb_ndisks = rsp.status) > MAX_VBDS)
+ xb_ndisks = MAX_VBDS;
+
+ memcpy(xb_diskinfo, buf, xb_ndisks * sizeof(vdisk_t));
+
+ for (i = 0; i < xb_ndisks; i++)
+ xb_create(i);
+
+ free(buf, M_DEVBUF);
+}
+
+
+/***************************** COMMON CODE *******************************/
+
+void
+blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
+{
+ unsigned long flags, id;
+
+ retry:
+ while ( (xb_req_prod - xb_resp_cons) == BLKIF_RING_SIZE ) {
+ tsleep( req, PWAIT | PCATCH, "blkif", hz);
+ }
+
+ mtx_lock_irqsave(&blkif_io_lock, flags);
+ if ( (xb_req_prod - xb_resp_cons) == BLKIF_RING_SIZE )
+ {
+ mtx_unlock_irqrestore(&blkif_io_lock, flags);
+ goto retry;
+ }
+
+ xb_blk_ring->ring[MASK_BLKIF_IDX(xb_req_prod)].req = *req;
+
+ id = GET_ID_FROM_FREELIST();
+ xb_blk_ring->ring[MASK_BLKIF_IDX(xb_req_prod)].req.id = id;
+ xb_rec_ring[id].id = (unsigned long) req;
+
+ translate_req_to_pfn( &xb_rec_ring[id], req );
+
+ xb_req_prod++;
+ flush_requests();
+
+ mtx_unlock_irqrestore(&blkif_io_lock, flags);
+
+ while ( !blkif_control_rsp_valid )
+ {
+ tsleep( &blkif_control_rsp_valid, PWAIT | PCATCH, "blkif", hz);
+ }
+
+ memcpy(rsp, &blkif_control_rsp, sizeof(*rsp));
+ blkif_control_rsp_valid = 0;
+}
+
+
+/* Send a driver status notification to the domain controller. */
+static void
+send_driver_status(int ok)
+{
+ ctrl_msg_t cmsg = {
+ .type = CMSG_BLKIF_FE,
+ .subtype = CMSG_BLKIF_FE_DRIVER_STATUS,
+ .length = sizeof(blkif_fe_driver_status_t),
+ };
+ blkif_fe_driver_status_t *msg = (void*)cmsg.msg;
+
+ msg->status = (ok ? BLKIF_DRIVER_STATUS_UP : BLKIF_DRIVER_STATUS_DOWN);
+
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
+
+/* Tell the controller to bring up the interface. */
+static void
+blkif_send_interface_connect(void)
+{
+ ctrl_msg_t cmsg = {
+ .type = CMSG_BLKIF_FE,
+ .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
+ .length = sizeof(blkif_fe_interface_connect_t),
+ };
+ blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
+
+ msg->handle = 0;
+ msg->shmem_frame = (vtomach(xb_blk_ring) >> PAGE_SHIFT);
+
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
+
+static void
+blkif_free(void)
+{
+
+ unsigned long flags;
+
+ printk("[XEN] Recovering virtual block device driver\n");
+
+ /* Prevent new requests being issued until we fix things up. */
+ mtx_lock_irqsave(&blkif_io_lock, flags);
+ xb_recovery = 1;
+ blkif_state = BLKIF_STATE_DISCONNECTED;
+ mtx_unlock_irqrestore(&blkif_io_lock, flags);
+
+ /* Free resources associated with old device channel. */
+ if (xb_blk_ring) {
+ free(xb_blk_ring, M_DEVBUF);
+ xb_blk_ring = NULL;
+ }
+ /* free_irq(blkif_irq, NULL);*/
+ blkif_irq = 0;
+
+ unbind_evtchn_from_irq(blkif_evtchn);
+ blkif_evtchn = 0;
+}
+
+static void
+blkif_close(void)
+{
+}
+
+/* Move from CLOSED to DISCONNECTED state. */
+static void
+blkif_disconnect(void)
+{
+ if (xb_blk_ring) free(xb_blk_ring, M_DEVBUF);
+ xb_blk_ring = (blkif_ring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
+ xb_blk_ring->req_prod = xb_blk_ring->resp_prod = 0;
+ xb_resp_cons = xb_req_prod = 0;
+ blkif_state = BLKIF_STATE_DISCONNECTED;
+ blkif_send_interface_connect();
+}
+
+static void
+blkif_reset(void)
+{
+ printk("[XEN] Recovering virtual block device driver\n");
+ blkif_free();
+ blkif_disconnect();
+}
+
+static void
+blkif_recover(void)
+{
+
+ int i;
+
+ /* Hmm, requests might be re-ordered when we re-issue them.
+ * This will need to be fixed once we have barriers */
+
+ /* Stage 1 : Find active and move to safety. */
+ for ( i = 0; i < BLKIF_RING_SIZE; i++ ) {
+ if ( xb_rec_ring[i].id >= KERNBASE ) {
+ translate_req_to_mfn(
+ &xb_blk_ring->ring[xb_req_prod].req, &xb_rec_ring[i]);
+ xb_req_prod++;
+ }
+ }
+
+ printk("blkfront: recovered %d descriptors\n",xb_req_prod);
+
+ /* Stage 2 : Set up shadow list. */
+ for ( i = 0; i < xb_req_prod; i++ ) {
+ xb_rec_ring[i].id = xb_blk_ring->ring[i].req.id;
+ xb_blk_ring->ring[i].req.id = i;
+ translate_req_to_pfn(&xb_rec_ring[i], &xb_blk_ring->ring[i].req);
+ }
+
+ /* Stage 3 : Set up free list. */
+ for ( ; i < BLKIF_RING_SIZE; i++ ){
+ xb_rec_ring[i].id = i+1;
+ }
+ xb_rec_ring_free = xb_req_prod;
+ xb_rec_ring[BLKIF_RING_SIZE-1].id = 0x0fffffff;
+
+ /* xb_blk_ring->req_prod will be set when we flush_requests().*/
+ wmb();
+
+ /* Switch off recovery mode, using a memory barrier to ensure that
+ * it's seen before we flush requests - we don't want to miss any
+ * interrupts. */
+ xb_recovery = 0;
+ wmb();
+
+ /* Kicks things back into life. */
+ flush_requests();
+
+ /* Now safe to left other peope use interface. */
+ blkif_state = BLKIF_STATE_CONNECTED;
+}
+
+static void
+blkif_connect(blkif_fe_interface_status_t *status)
+{
+ int err = 0;
+
+ blkif_evtchn = status->evtchn;
+ blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
+
+ err = intr_add_handler("xbd", blkif_irq,
+ (driver_intr_t *)xb_response_intr, NULL,
+ INTR_TYPE_BIO | INTR_MPSAFE, NULL);
+ if(err){
+ printk("[XEN] blkfront request_irq failed (err=%d)\n", err);
+ return;
+ }
+
+ if ( xb_recovery ) {
+ blkif_recover();
+ } else {
+ /* Probe for discs attached to the interface. */
+ xb_vbdinit();
+
+ /* XXX: transition state after probe */
+ blkif_state = BLKIF_STATE_CONNECTED;
+ }
+
+ /* Kick pending requests. */
+#if 0 /* XXX: figure out sortq logic */
+ mtx_lock_irq(&blkif_io_lock);
+ kick_pending_request_queues();
+ mtx_unlock_irq(&blkif_io_lock);
+#endif
+}
+
+static void
+unexpected(blkif_fe_interface_status_t *status)
+{
+ WPRINTK(" Unexpected blkif status %s in state %s\n",
+ blkif_status_name[status->status],
+ blkif_state_name[blkif_state]);
+}
+
+static void
+blkif_status(blkif_fe_interface_status_t *status)
+{
+ if (status->handle != blkif_handle) {
+ WPRINTK(" Invalid blkif: handle=%u", status->handle);
+ return;
+ }
+
+ switch (status->status) {
+
+ case BLKIF_INTERFACE_STATUS_CLOSED:
+ switch(blkif_state){
+ case BLKIF_STATE_CLOSED:
+ unexpected(status);
+ break;
+ case BLKIF_STATE_DISCONNECTED:
+ case BLKIF_STATE_CONNECTED:
+ unexpected(status);
+ blkif_close();
+ break;
+ }
+ break;
+
+ case BLKIF_INTERFACE_STATUS_DISCONNECTED:
+ switch(blkif_state){
+ case BLKIF_STATE_CLOSED:
+ blkif_disconnect();
+ break;
+ case BLKIF_STATE_DISCONNECTED:
+ case BLKIF_STATE_CONNECTED:
+ unexpected(status);
+ blkif_reset();
+ break;
+ }
+ break;
+
+ case BLKIF_INTERFACE_STATUS_CONNECTED:
+ switch(blkif_state){
+ case BLKIF_STATE_CLOSED:
+ unexpected(status);
+ blkif_disconnect();
+ blkif_connect(status);
+ break;
+ case BLKIF_STATE_DISCONNECTED:
+ blkif_connect(status);
+ break;
+ case BLKIF_STATE_CONNECTED:
+ unexpected(status);
+ blkif_connect(status);
+ break;
+ }
+ break;
+
+ case BLKIF_INTERFACE_STATUS_CHANGED:
+ switch(blkif_state){
+ case BLKIF_STATE_CLOSED:
+ case BLKIF_STATE_DISCONNECTED:
+ unexpected(status);
+ break;
+ case BLKIF_STATE_CONNECTED:
+ vbd_update();
+ break;
+ }
+ break;
+
+ default:
+ WPRINTK("Invalid blkif status: %d\n", status->status);
+ break;
+ }
+}
+
+
+static void
+blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+ switch ( msg->subtype )
+ {
+ case CMSG_BLKIF_FE_INTERFACE_STATUS:
+ if ( msg->length != sizeof(blkif_fe_interface_status_t) )
+ goto parse_error;
+ blkif_status((blkif_fe_interface_status_t *)
+ &msg->msg[0]);
+ break;
+ default:
+ goto parse_error;
+ }
+
+ ctrl_if_send_response(msg);
+ return;
+
+ parse_error:
+ msg->length = 0;
+ ctrl_if_send_response(msg);
+}
+
+static int
+wait_for_blkif(void)
+{
+ int err = 0;
+ int i;
+ send_driver_status(1);
+
+ /*
+ * We should read 'nr_interfaces' from response message and wait
+ * for notifications before proceeding. For now we assume that we
+ * will be notified of exactly one interface.
+ */
+ for ( i=0; (blkif_state != BLKIF_STATE_CONNECTED) && (i < 10*hz); i++ )
+ {
+ tsleep(&blkif_state, PWAIT | PCATCH, "blkif", hz);
+ }
+
+ if (blkif_state != BLKIF_STATE_CONNECTED){
+ printk("[XEN] Timeout connecting block device driver!\n");
+ err = -ENOSYS;
+ }
+ return err;
+}
+
+
+static void
+xb_init(void *unused)
+{
+ int i;
+
+ printk("[XEN] Initialising virtual block device driver\n");
+
+ xb_rec_ring_free = 0;
+ for (i = 0; i < BLKIF_RING_SIZE; i++) {
+ xb_rec_ring[i].id = i+1;
+ }
+ xb_rec_ring[BLKIF_RING_SIZE-1].id = 0x0fffffff;
+
+ (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx, 0);
+
+ wait_for_blkif();
+}
+
+#if 0 /* XXX not yet */
+void
+blkdev_suspend(void)
+{
+}
+
+void
+blkdev_resume(void)
+{
+ send_driver_status(1);
+}
+#endif
+
+/* XXXXX THIS IS A TEMPORARY FUNCTION UNTIL WE GET GRANT TABLES */
+
+void
+blkif_completion(blkif_request_t *req)
+{
+ int i;
+
+ switch ( req->operation )
+ {
+ case BLKIF_OP_READ:
+ for ( i = 0; i < req->nr_segments; i++ )
+ {
+ unsigned long pfn = req->frame_and_sects[i] >> PAGE_SHIFT;
+ unsigned long mfn = xen_phys_machine[pfn];
+ xen_machphys_update(mfn, pfn);
+ }
+ break;
+ }
+
+}
+MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_SPIN);
+SYSINIT(xbdev, SI_SUB_PSEUDO, SI_ORDER_ANY, xb_init, NULL)
diff --git a/freebsd-5.3-xen-sparse/i386-xen/xen/char/console.c b/freebsd-5.3-xen-sparse/i386-xen/xen/char/console.c
new file mode 100644
index 0000000000..7ea8e3eb4f
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/xen/char/console.c
@@ -0,0 +1,536 @@
+#include <sys/cdefs.h>
+
+
+#include <sys/param.h>
+#include <sys/module.h>
+#include <sys/systm.h>
+#include <sys/consio.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/tty.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <machine/stdarg.h>
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/ctrl_if.h>
+#include <sys/cons.h>
+
+#include "opt_ddb.h"
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
+static char driver_name[] = "xc";
+devclass_t xc_devclass;
+static void xcstart (struct tty *);
+static int xcparam (struct tty *, struct termios *);
+static void xcstop (struct tty *, int);
+static void xc_timeout(void *);
+static void xencons_tx_flush_task_routine(void *,int );
+static void __xencons_tx_flush(void);
+static void xencons_rx(ctrl_msg_t *msg,unsigned long id);
+static boolean_t xcons_putc(int c);
+
+/* switch console so that shutdown can occur gracefully */
+static void xc_shutdown(void *arg, int howto);
+static int xc_mute;
+
+void xcons_force_flush(void);
+
+static cn_probe_t xccnprobe;
+static cn_init_t xccninit;
+static cn_getc_t xccngetc;
+static cn_putc_t xccnputc;
+static cn_checkc_t xccncheckc;
+
+#define XC_POLLTIME (hz/10)
+
+CONS_DRIVER(xc, xccnprobe, xccninit, NULL, xccngetc,
+ xccncheckc, xccnputc, NULL);
+
+static int xen_console_up;
+static boolean_t xc_tx_task_queued;
+static boolean_t xc_start_needed;
+static struct callout xc_callout;
+struct mtx cn_mtx;
+
+#define RBUF_SIZE 1024
+#define RBUF_MASK(_i) ((_i)&(RBUF_SIZE-1))
+#define WBUF_SIZE 4096
+#define WBUF_MASK(_i) ((_i)&(WBUF_SIZE-1))
+static char wbuf[WBUF_SIZE];
+static char rbuf[RBUF_SIZE];
+static int rc, rp;
+static int cnsl_evt_reg;
+static unsigned int wc, wp; /* write_cons, write_prod */
+static struct task xencons_tx_flush_task = { {NULL},0,0,&xencons_tx_flush_task_routine,NULL };
+
+
+#define CDEV_MAJOR 12
+#define XCUNIT(x) (minor(x))
+#define ISTTYOPEN(tp) ((tp) && ((tp)->t_state & TS_ISOPEN))
+#define CN_LOCK_INIT(x, _name) \
+ mtx_init(&x, _name, _name, MTX_SPIN)
+#define CN_LOCK(l, f) mtx_lock_irqsave(&(l), (f))
+#define CN_UNLOCK(l, f) mtx_unlock_irqrestore(&(l), (f))
+#define CN_LOCK_ASSERT(x) mtx_assert(&x, MA_OWNED)
+#define CN_LOCK_DESTROY(x) mtx_destroy(&x)
+
+
+static struct tty *xccons;
+
+struct xc_softc {
+ int xc_unit;
+ struct cdev *xc_dev;
+};
+
+
+static d_open_t xcopen;
+static d_close_t xcclose;
+static d_ioctl_t xcioctl;
+
+static struct cdevsw xc_cdevsw = {
+ /* version */ D_VERSION_00,
+ /* maj */ CDEV_MAJOR,
+ /* flags */ D_TTY | D_NEEDGIANT,
+ /* name */ driver_name,
+
+ /* open */ xcopen,
+ /* fdopen */ 0,
+ /* close */ xcclose,
+ /* read */ ttyread,
+ /* write */ ttywrite,
+ /* ioctl */ xcioctl,
+ /* poll */ ttypoll,
+ /* mmap */ 0,
+ /* strategy */ 0,
+ /* dump */ 0,
+ /* kqfilter */ ttykqfilter
+};
+
+static void
+xccnprobe(struct consdev *cp)
+{
+ cp->cn_pri = CN_REMOTE;
+ cp->cn_tp = xccons;
+ sprintf(cp->cn_name, "%s0", driver_name);
+}
+
+
+static void
+xccninit(struct consdev *cp)
+{
+ CN_LOCK_INIT(cn_mtx,"XCONS LOCK");
+
+}
+int
+xccngetc(struct consdev *dev)
+{
+ int c;
+ if (xc_mute)
+ return 0;
+ do {
+ if ((c = xccncheckc(dev)) == -1) {
+ /* polling without sleeping in Xen doesn't work well.
+ * Sleeping gives other things like clock a chance to
+ * run
+ */
+ tsleep(&cn_mtx, PWAIT | PCATCH, "console sleep",
+ XC_POLLTIME);
+ }
+ } while( c == -1 );
+ return c;
+}
+
+int
+xccncheckc(struct consdev *dev)
+{
+ int ret = (xc_mute ? 0 : -1);
+ int flags;
+ CN_LOCK(cn_mtx, flags);
+ if ( (rp - rc) ){
+ /* we need to return only one char */
+ ret = (int)rbuf[RBUF_MASK(rc)];
+ rc++;
+ }
+ CN_UNLOCK(cn_mtx, flags);
+ return(ret);
+}
+
+static void
+xccnputc(struct consdev *dev, int c)
+{
+ int flags;
+ CN_LOCK(cn_mtx, flags);
+ xcons_putc(c);
+ CN_UNLOCK(cn_mtx, flags);
+}
+
+static boolean_t
+xcons_putc(int c)
+{
+ int force_flush = xc_mute ||
+#ifdef DDB
+ db_active ||
+#endif
+ panicstr; /* we're not gonna recover, so force
+ * flush
+ */
+
+ if ( (wp-wc) < (WBUF_SIZE-1) ){
+ if ( (wbuf[WBUF_MASK(wp++)] = c) == '\n' ) {
+ wbuf[WBUF_MASK(wp++)] = '\r';
+ if (force_flush)
+ xcons_force_flush();
+ }
+ } else if (force_flush) {
+ xcons_force_flush();
+
+ }
+ if (cnsl_evt_reg)
+ __xencons_tx_flush();
+
+ /* inform start path that we're pretty full */
+ return ((wp - wc) >= WBUF_SIZE - 100) ? TRUE : FALSE;
+}
+
+static void
+xc_identify(driver_t *driver, device_t parent)
+{
+ device_t child;
+ child = BUS_ADD_CHILD(parent, 0, driver_name, 0);
+ device_set_driver(child, driver);
+ device_set_desc(child, "Xen Console");
+}
+
+static int
+xc_probe(device_t dev)
+{
+ struct xc_softc *sc = (struct xc_softc *)device_get_softc(dev);
+
+ sc->xc_unit = device_get_unit(dev);
+ return (0);
+}
+
+static int
+xc_attach(device_t dev)
+{
+ struct xc_softc *sc = (struct xc_softc *)device_get_softc(dev);
+
+ sc->xc_dev = make_dev(&xc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "xc%r", 0);
+ xccons = ttymalloc(NULL);
+
+ sc->xc_dev->si_drv1 = (void *)sc;
+ sc->xc_dev->si_tty = xccons;
+
+ xccons->t_oproc = xcstart;
+ xccons->t_param = xcparam;
+ xccons->t_stop = xcstop;
+ xccons->t_dev = sc->xc_dev;
+
+ callout_init(&xc_callout, 0);
+
+ /* Ensure that we don't attach before the event channel is able to receive
+ * a registration. The XenBus code delays the probe/attach order until
+ * this has occurred.
+ */
+ (void)ctrl_if_register_receiver(CMSG_CONSOLE, xencons_rx, 0);
+ cnsl_evt_reg = 1;
+
+ callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, xccons);
+
+ /* register handler to flush console on shutdown */
+ if ((EVENTHANDLER_REGISTER(shutdown_post_sync, xc_shutdown,
+ NULL, SHUTDOWN_PRI_DEFAULT)) == NULL)
+ printf("xencons: shutdown event registration failed!\n");
+
+ return (0);
+}
+
+/*
+ * return 0 for all console input, force flush all output.
+ */
+static void
+xc_shutdown(void *arg, int howto)
+{
+ xc_mute = 1;
+ xcons_force_flush();
+
+}
+
+static void
+xencons_rx(ctrl_msg_t *msg,unsigned long id)
+{
+ int i, flags;
+ struct tty *tp = xccons;
+
+ CN_LOCK(cn_mtx, flags);
+ for ( i = 0; i < msg->length; i++ ) {
+ if ( xen_console_up )
+ (*linesw[tp->t_line]->l_rint)(msg->msg[i], tp);
+ else
+ rbuf[RBUF_MASK(rp++)] = msg->msg[i];
+ }
+ CN_UNLOCK(cn_mtx, flags);
+ msg->length = 0;
+ ctrl_if_send_response(msg);
+}
+
+static void
+__xencons_tx_flush(void)
+{
+ int sz, work_done = 0;
+ ctrl_msg_t msg;
+
+ while ( wc != wp )
+ {
+ sz = wp - wc;
+ if ( sz > sizeof(msg.msg) )
+ sz = sizeof(msg.msg);
+ if ( sz > (WBUF_SIZE - WBUF_MASK(wc)) )
+ sz = WBUF_SIZE - WBUF_MASK(wc);
+
+ msg.type = CMSG_CONSOLE;
+ msg.subtype = CMSG_CONSOLE_DATA;
+ msg.length = sz;
+ memcpy(msg.msg, &wbuf[WBUF_MASK(wc)], sz);
+
+ if ( ctrl_if_send_message_noblock(&msg, NULL, 0) == 0 ){
+ wc += sz;
+ }
+ else if (xc_tx_task_queued) {
+ /* avoid the extra enqueue check if we know we're already queued */
+ break;
+ } else if (ctrl_if_enqueue_space_callback(&xencons_tx_flush_task)) {
+ xc_tx_task_queued = TRUE;
+ break;
+ }
+
+ work_done = 1;
+ }
+
+ if ( work_done && xen_console_up )
+ ttwakeup(xccons);
+}
+static void
+xencons_tx_flush_task_routine(void * data, int arg)
+{
+ int flags;
+ CN_LOCK(cn_mtx, flags);
+ xc_tx_task_queued = FALSE;
+ __xencons_tx_flush();
+ CN_UNLOCK(cn_mtx, flags);
+}
+
+int
+xcopen(struct cdev *dev, int flag, int mode, struct thread *td)
+{
+ struct xc_softc *sc;
+ int unit = XCUNIT(dev);
+ struct tty *tp;
+ int s, error;
+
+ sc = (struct xc_softc *)device_get_softc(
+ devclass_get_device(xc_devclass, unit));
+ if (sc == NULL)
+ return (ENXIO);
+
+ tp = dev->si_tty;
+ s = spltty();
+ if (!ISTTYOPEN(tp)) {
+ tp->t_state |= TS_CARR_ON;
+ ttychars(tp);
+ tp->t_iflag = TTYDEF_IFLAG;
+ tp->t_oflag = TTYDEF_OFLAG;
+ tp->t_cflag = TTYDEF_CFLAG|CLOCAL;
+ tp->t_lflag = TTYDEF_LFLAG;
+ tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED;
+ xcparam(tp, &tp->t_termios);
+ ttsetwater(tp);
+ } else if (tp->t_state & TS_XCLUDE && suser(td)) {
+ splx(s);
+ return (EBUSY);
+ }
+ splx(s);
+
+ xen_console_up = 1;
+
+ error = (*linesw[tp->t_line]->l_open)(dev, tp);
+
+ return error;
+}
+
+int
+xcclose(struct cdev *dev, int flag, int mode, struct thread *td)
+{
+ struct tty *tp = dev->si_tty;
+
+ if (tp == NULL)
+ return (0);
+ xen_console_up = 0;
+
+ spltty();
+ (*linesw[tp->t_line]->l_close)(tp, flag);
+ tty_close(tp);
+ spl0();
+ return (0);
+}
+
+
+int
+xcioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
+{
+ struct tty *tp = dev->si_tty;
+ int error;
+
+ error = (*linesw[tp->t_line]->l_ioctl)(tp, cmd, data, flag, td);
+ if (error != ENOIOCTL)
+ return (error);
+ error = ttioctl(tp, cmd, data, flag);
+ if (error != ENOIOCTL)
+ return (error);
+ return (ENOTTY);
+}
+
+static inline int
+__xencons_put_char(int ch)
+{
+ char _ch = (char)ch;
+ if ( (wp - wc) == WBUF_SIZE )
+ return 0;
+ wbuf[WBUF_MASK(wp++)] = _ch;
+ return 1;
+}
+
+
+static void
+xcstart(struct tty *tp)
+{
+ int flags;
+ int s;
+ boolean_t cons_full = FALSE;
+
+ s = spltty();
+ CN_LOCK(cn_mtx, flags);
+ if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) {
+ ttwwakeup(tp);
+ CN_UNLOCK(cn_mtx, flags);
+ return;
+ }
+
+ tp->t_state |= TS_BUSY;
+ while (tp->t_outq.c_cc != 0 && !cons_full)
+ cons_full = xcons_putc(getc(&tp->t_outq));
+
+ /* if the console is close to full leave our state as busy */
+ if (!cons_full) {
+ tp->t_state &= ~TS_BUSY;
+ ttwwakeup(tp);
+ } else {
+ /* let the timeout kick us in a bit */
+ xc_start_needed = TRUE;
+ }
+ CN_UNLOCK(cn_mtx, flags);
+ splx(s);
+}
+
+static void
+xcstop(struct tty *tp, int flag)
+{
+
+ if (tp->t_state & TS_BUSY) {
+ if ((tp->t_state & TS_TTSTOP) == 0) {
+ tp->t_state |= TS_FLUSH;
+ }
+ }
+}
+
+static void
+xc_timeout(void *v)
+{
+ struct tty *tp;
+ int c;
+
+ tp = (struct tty *)v;
+
+ while ((c = xccncheckc(NULL)) != -1) {
+ if (tp->t_state & TS_ISOPEN) {
+ (*linesw[tp->t_line]->l_rint)(c, tp);
+ }
+ }
+
+ if (xc_start_needed) {
+ xc_start_needed = FALSE;
+ xcstart(tp);
+ }
+
+ callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, tp);
+}
+
+/*
+ * Set line parameters.
+ */
+int
+xcparam(struct tty *tp, struct termios *t)
+{
+ tp->t_ispeed = t->c_ispeed;
+ tp->t_ospeed = t->c_ospeed;
+ tp->t_cflag = t->c_cflag;
+ return (0);
+}
+
+
+static device_method_t xc_methods[] = {
+ DEVMETHOD(device_identify, xc_identify),
+ DEVMETHOD(device_probe, xc_probe),
+ DEVMETHOD(device_attach, xc_attach),
+ {0, 0}
+};
+
+static driver_t xc_driver = {
+ driver_name,
+ xc_methods,
+ sizeof(struct xc_softc),
+};
+
+/*** Forcibly flush console data before dying. ***/
+void
+xcons_force_flush(void)
+{
+ ctrl_msg_t msg;
+ int sz;
+
+ /*
+ * We use dangerous control-interface functions that require a quiescent
+ * system and no interrupts. Try to ensure this with a global cli().
+ */
+ cli();
+
+ /* Spin until console data is flushed through to the domain controller. */
+ while ( (wc != wp) && !ctrl_if_transmitter_empty() )
+ {
+ /* Interrupts are disabled -- we must manually reap responses. */
+ ctrl_if_discard_responses();
+
+ if ( (sz = wp - wc) == 0 )
+ continue;
+ if ( sz > sizeof(msg.msg) )
+ sz = sizeof(msg.msg);
+ if ( sz > (WBUF_SIZE - WBUF_MASK(wc)) )
+ sz = WBUF_SIZE - WBUF_MASK(wc);
+
+ msg.type = CMSG_CONSOLE;
+ msg.subtype = CMSG_CONSOLE_DATA;
+ msg.length = sz;
+ memcpy(msg.msg, &wbuf[WBUF_MASK(wc)], sz);
+
+ if ( ctrl_if_send_message_noblock(&msg, NULL, 0) == 0 )
+ wc += sz;
+ }
+}
+
+DRIVER_MODULE(xc, xenbus, xc_driver, xc_devclass, 0, 0);
diff --git a/freebsd-5.3-xen-sparse/i386-xen/xen/misc/evtchn_dev.c b/freebsd-5.3-xen-sparse/i386-xen/xen/misc/evtchn_dev.c
new file mode 100644
index 0000000000..de379b6bf9
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/xen/misc/evtchn_dev.c
@@ -0,0 +1,410 @@
+/******************************************************************************
+ * evtchn.c
+ *
+ * Xenolinux driver for receiving and demuxing event-channel signals.
+ *
+ * Copyright (c) 2004, K A Fraser
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/uio.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/selinfo.h>
+#include <sys/poll.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
+#include <sys/ioccom.h>
+
+#include <machine/cpufunc.h>
+#include <machine/intr_machdep.h>
+#include <machine/xen-os.h>
+#include <machine/xen_intr.h>
+#include <machine/bus.h>
+#include <sys/rman.h>
+#include <machine/resource.h>
+#include <machine/synch_bitops.h>
+
+#include <machine/hypervisor.h>
+
+
+typedef struct evtchn_sotfc {
+
+ struct selinfo ev_rsel;
+} evtchn_softc_t;
+
+
+#ifdef linuxcrap
+/* NB. This must be shared amongst drivers if more things go in /dev/xen */
+static devfs_handle_t xen_dev_dir;
+#endif
+
+/* Only one process may open /dev/xen/evtchn at any time. */
+static unsigned long evtchn_dev_inuse;
+
+/* Notification ring, accessed via /dev/xen/evtchn. */
+#define RING_SIZE 2048 /* 2048 16-bit entries */
+#define RING_MASK(_i) ((_i)&(RING_SIZE-1))
+static uint16_t *ring;
+static unsigned int ring_cons, ring_prod, ring_overflow;
+
+/* Which ports is user-space bound to? */
+static uint32_t bound_ports[32];
+
+/* Unique address for processes to sleep on */
+static void *evtchn_waddr = &ring;
+
+static struct mtx lock, upcall_lock;
+
+static d_read_t evtchn_read;
+static d_write_t evtchn_write;
+static d_ioctl_t evtchn_ioctl;
+static d_poll_t evtchn_poll;
+static d_open_t evtchn_open;
+static d_close_t evtchn_close;
+
+
+void
+evtchn_device_upcall(int port)
+{
+ mtx_lock(&upcall_lock);
+
+ mask_evtchn(port);
+ clear_evtchn(port);
+
+ if ( ring != NULL ) {
+ if ( (ring_prod - ring_cons) < RING_SIZE ) {
+ ring[RING_MASK(ring_prod)] = (uint16_t)port;
+ if ( ring_cons == ring_prod++ ) {
+ wakeup(evtchn_waddr);
+ }
+ }
+ else {
+ ring_overflow = 1;
+ }
+ }
+
+ mtx_unlock(&upcall_lock);
+}
+
+static void
+__evtchn_reset_buffer_ring(void)
+{
+ /* Initialise the ring to empty. Clear errors. */
+ ring_cons = ring_prod = ring_overflow = 0;
+}
+
+static int
+evtchn_read(struct cdev *dev, struct uio *uio, int ioflag)
+{
+ int rc;
+ unsigned int count, c, p, sst = 0, bytes1 = 0, bytes2 = 0;
+ count = uio->uio_resid;
+
+ count &= ~1; /* even number of bytes */
+
+ if ( count == 0 )
+ {
+ rc = 0;
+ goto out;
+ }
+
+ if ( count > PAGE_SIZE )
+ count = PAGE_SIZE;
+
+ for ( ; ; ) {
+ if ( (c = ring_cons) != (p = ring_prod) )
+ break;
+
+ if ( ring_overflow ) {
+ rc = EFBIG;
+ goto out;
+ }
+
+ if (sst != 0) {
+ rc = EINTR;
+ goto out;
+ }
+
+ /* PCATCH == check for signals before and after sleeping
+ * PWAIT == priority of waiting on resource
+ */
+ sst = tsleep(evtchn_waddr, PWAIT|PCATCH, "evchwt", 10);
+ }
+
+ /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
+ if ( ((c ^ p) & RING_SIZE) != 0 ) {
+ bytes1 = (RING_SIZE - RING_MASK(c)) * sizeof(uint16_t);
+ bytes2 = RING_MASK(p) * sizeof(uint16_t);
+ }
+ else {
+ bytes1 = (p - c) * sizeof(uint16_t);
+ bytes2 = 0;
+ }
+
+ /* Truncate chunks according to caller's maximum byte count. */
+ if ( bytes1 > count ) {
+ bytes1 = count;
+ bytes2 = 0;
+ }
+ else if ( (bytes1 + bytes2) > count ) {
+ bytes2 = count - bytes1;
+ }
+
+ if ( uiomove(&ring[RING_MASK(c)], bytes1, uio) ||
+ ((bytes2 != 0) && uiomove(&ring[0], bytes2, uio)))
+ /* keeping this around as its replacement is not equivalent
+ * copyout(&ring[0], &buf[bytes1], bytes2)
+ */
+ {
+ rc = EFAULT;
+ goto out;
+ }
+
+ ring_cons += (bytes1 + bytes2) / sizeof(uint16_t);
+
+ rc = bytes1 + bytes2;
+
+ out:
+
+ return rc;
+}
+
+static int
+evtchn_write(struct cdev *dev, struct uio *uio, int ioflag)
+{
+ int rc, i, count;
+
+ count = uio->uio_resid;
+
+ uint16_t *kbuf = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
+
+
+ if ( kbuf == NULL )
+ return ENOMEM;
+
+ count &= ~1; /* even number of bytes */
+
+ if ( count == 0 ) {
+ rc = 0;
+ goto out;
+ }
+
+ if ( count > PAGE_SIZE )
+ count = PAGE_SIZE;
+
+ if ( uiomove(kbuf, count, uio) != 0 ) {
+ rc = EFAULT;
+ goto out;
+ }
+
+ mtx_lock_spin(&lock);
+ for ( i = 0; i < (count/2); i++ )
+ if ( test_bit(kbuf[i], &bound_ports[0]) )
+ unmask_evtchn(kbuf[i]);
+ mtx_unlock_spin(&lock);
+
+ rc = count;
+
+ out:
+ free(kbuf, M_DEVBUF);
+ return rc;
+}
+
+static int
+evtchn_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg,
+ int mode, struct thread *td __unused)
+{
+ int rc = 0;
+
+ mtx_lock_spin(&lock);
+
+ switch ( cmd )
+ {
+ case EVTCHN_RESET:
+ __evtchn_reset_buffer_ring();
+ break;
+ case EVTCHN_BIND:
+ if ( !synch_test_and_set_bit((int)arg, &bound_ports[0]) )
+ unmask_evtchn((int)arg);
+ else
+ rc = EINVAL;
+ break;
+ case EVTCHN_UNBIND:
+ if ( synch_test_and_clear_bit((int)arg, &bound_ports[0]) )
+ mask_evtchn((int)arg);
+ else
+ rc = EINVAL;
+ break;
+ default:
+ rc = ENOSYS;
+ break;
+ }
+
+ mtx_unlock_spin(&lock);
+
+ return rc;
+}
+
+static int
+evtchn_poll(struct cdev *dev, int poll_events, struct thread *td)
+{
+
+ evtchn_softc_t *sc;
+ unsigned int mask = POLLOUT | POLLWRNORM;
+
+ sc = dev->si_drv1;
+
+ if ( ring_cons != ring_prod )
+ mask |= POLLIN | POLLRDNORM;
+ else if ( ring_overflow )
+ mask = POLLERR;
+ else
+ selrecord(td, &sc->ev_rsel);
+
+
+ return mask;
+}
+
+
+static int
+evtchn_open(struct cdev *dev, int flag, int otyp, struct thread *td)
+{
+ uint16_t *_ring;
+
+ if (flag & O_NONBLOCK)
+ return EBUSY;
+
+ if ( synch_test_and_set_bit(0, &evtchn_dev_inuse) )
+ return EBUSY;
+
+ if ( (_ring = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK)) == NULL )
+ return ENOMEM;
+
+ mtx_lock_spin(&lock);
+ ring = _ring;
+ __evtchn_reset_buffer_ring();
+ mtx_unlock_spin(&lock);
+
+
+ return 0;
+}
+
+static int
+evtchn_close(struct cdev *dev, int flag, int otyp, struct thread *td __unused)
+{
+ int i;
+
+ mtx_lock_spin(&lock);
+ if (ring != NULL) {
+ free(ring, M_DEVBUF);
+ ring = NULL;
+ }
+ for ( i = 0; i < NR_EVENT_CHANNELS; i++ )
+ if ( synch_test_and_clear_bit(i, &bound_ports[0]) )
+ mask_evtchn(i);
+ mtx_unlock_spin(&lock);
+
+ evtchn_dev_inuse = 0;
+
+ return 0;
+}
+
+
+
+/* XXX wild assed guess as to a safe major number */
+#define EVTCHN_MAJOR 140
+
+static struct cdevsw evtchn_devsw = {
+ d_version: D_VERSION_00,
+ d_open: evtchn_open,
+ d_close: evtchn_close,
+ d_read: evtchn_read,
+ d_write: evtchn_write,
+ d_ioctl: evtchn_ioctl,
+ d_poll: evtchn_poll,
+ d_name: "evtchn",
+ d_maj: EVTCHN_MAJOR,
+ d_flags: 0,
+};
+
+
+/* XXX - if this device is ever supposed to support use by more than one process
+ * this global static will have to go away
+ */
+static struct cdev *evtchn_dev;
+
+
+
+static int
+evtchn_init(void *dummy __unused)
+{
+ /* XXX I believe we don't need these leaving them here for now until we
+ * have some semblance of it working
+ */
+#if 0
+ devfs_handle_t symlink_handle;
+ int err, pos;
+ char link_dest[64];
+#endif
+ mtx_init(&upcall_lock, "evtchup", NULL, MTX_DEF);
+
+ /* (DEVFS) create '/dev/misc/evtchn'. */
+ evtchn_dev = make_dev(&evtchn_devsw, 0, UID_ROOT, GID_WHEEL, 0600, "xen/evtchn");
+
+ mtx_init(&lock, "evch", NULL, MTX_SPIN | MTX_NOWITNESS);
+
+ evtchn_dev->si_drv1 = malloc(sizeof(evtchn_softc_t), M_DEVBUF, M_WAITOK);
+ bzero(evtchn_dev->si_drv1, sizeof(evtchn_softc_t));
+
+ /* XXX I don't think we need any of this rubbish */
+#if 0
+ if ( err != 0 )
+ {
+ printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
+ return err;
+ }
+
+ /* (DEVFS) create directory '/dev/xen'. */
+ xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL);
+
+ /* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */
+ pos = devfs_generate_path(evtchn_miscdev.devfs_handle,
+ &link_dest[3],
+ sizeof(link_dest) - 3);
+ if ( pos >= 0 )
+ strncpy(&link_dest[pos], "../", 3);
+ /* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */
+ (void)devfs_mk_symlink(xen_dev_dir,
+ "evtchn",
+ DEVFS_FL_DEFAULT,
+ &link_dest[pos],
+ &symlink_handle,
+ NULL);
+
+ /* (DEVFS) automatically destroy the symlink with its destination. */
+ devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle);
+#endif
+ printk("Event-channel device installed.\n");
+
+ return 0;
+}
+
+
+SYSINIT(evtchn_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, evtchn_init, NULL);
+
+
+#if 0
+
+static void cleanup_module(void)
+{
+ destroy_dev(evtchn_dev);
+;
+}
+
+module_init(init_module);
+module_exit(cleanup_module);
+#endif
diff --git a/freebsd-5.3-xen-sparse/i386-xen/xen/misc/npx.c b/freebsd-5.3-xen-sparse/i386-xen/xen/misc/npx.c
new file mode 100644
index 0000000000..604aec78c1
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/xen/misc/npx.c
@@ -0,0 +1,1109 @@
+/*-
+ * Copyright (c) 1990 William Jolitz.
+ * Copyright (c) 1991 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)npx.c 7.2 (Berkeley) 5/12/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/i386/isa/npx.c,v 1.144 2003/11/03 21:53:38 jhb Exp $");
+
+#include "opt_cpu.h"
+#include "opt_debug_npx.h"
+#include "opt_isa.h"
+#include "opt_npx.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <machine/bus.h>
+#include <sys/rman.h>
+#ifdef NPX_DEBUG
+#include <sys/syslog.h>
+#endif
+#include <sys/signalvar.h>
+#include <sys/user.h>
+
+#include <machine/asmacros.h>
+#include <machine/cputypes.h>
+#include <machine/frame.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/psl.h>
+#include <machine/clock.h>
+#include <machine/resource.h>
+#include <machine/specialreg.h>
+#include <machine/segments.h>
+#include <machine/ucontext.h>
+
+#include <machine/multicall.h>
+
+#include <i386/isa/icu.h>
+#ifdef PC98
+#include <pc98/pc98/pc98.h>
+#else
+#include <i386/isa/isa.h>
+#endif
+#include <machine/intr_machdep.h>
+#ifdef DEV_ISA
+#include <isa/isavar.h>
+#endif
+
+#if !defined(CPU_ENABLE_SSE) && defined(I686_CPU)
+#define CPU_ENABLE_SSE
+#endif
+#if defined(CPU_DISABLE_SSE)
+#undef CPU_ENABLE_SSE
+#endif
+
+/*
+ * 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
+ */
+
+/* Configuration flags. */
+#define NPX_DISABLE_I586_OPTIMIZED_BCOPY (1 << 0)
+#define NPX_DISABLE_I586_OPTIMIZED_BZERO (1 << 1)
+#define NPX_DISABLE_I586_OPTIMIZED_COPYIO (1 << 2)
+
+#if defined(__GNUC__) && !defined(lint)
+
+#define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr)))
+#define fnclex() __asm("fnclex")
+#define fninit() __asm("fninit")
+#define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr)))
+#define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr)))
+#define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr)))
+#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop")
+#define frstor(addr) __asm("frstor %0" : : "m" (*(addr)))
+#ifdef CPU_ENABLE_SSE
+#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
+#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
+#endif
+#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
+ : : "n" (CR0_TS) : "ax")
+#define stop_emulating() __asm("clts")
+
+#else /* not __GNUC__ */
+
+void fldcw(caddr_t addr);
+void fnclex(void);
+void fninit(void);
+void fnsave(caddr_t addr);
+void fnstcw(caddr_t addr);
+void fnstsw(caddr_t addr);
+void fp_divide_by_0(void);
+void frstor(caddr_t addr);
+#ifdef CPU_ENABLE_SSE
+void fxsave(caddr_t addr);
+void fxrstor(caddr_t addr);
+#endif
+void start_emulating(void);
+void stop_emulating(void);
+
+#endif /* __GNUC__ */
+
+#ifdef CPU_ENABLE_SSE
+#define GET_FPU_CW(thread) \
+ (cpu_fxsr ? \
+ (thread)->td_pcb->pcb_save.sv_xmm.sv_env.en_cw : \
+ (thread)->td_pcb->pcb_save.sv_87.sv_env.en_cw)
+#define GET_FPU_SW(thread) \
+ (cpu_fxsr ? \
+ (thread)->td_pcb->pcb_save.sv_xmm.sv_env.en_sw : \
+ (thread)->td_pcb->pcb_save.sv_87.sv_env.en_sw)
+#else /* CPU_ENABLE_SSE */
+#define GET_FPU_CW(thread) \
+ (thread->td_pcb->pcb_save.sv_87.sv_env.en_cw)
+#define GET_FPU_SW(thread) \
+ (thread->td_pcb->pcb_save.sv_87.sv_env.en_sw)
+#endif /* CPU_ENABLE_SSE */
+
+typedef u_char bool_t;
+
+static void fpusave(union savefpu *);
+static void fpurstor(union savefpu *);
+static int npx_attach(device_t dev);
+static void npx_identify(driver_t *driver, device_t parent);
+#if 0
+static void npx_intr(void *);
+#endif
+static int npx_probe(device_t dev);
+#ifdef I586_CPU_XXX
+static long timezero(const char *funcname,
+ void (*func)(void *buf, size_t len));
+#endif /* I586_CPU */
+
+int hw_float; /* XXX currently just alias for npx_exists */
+
+SYSCTL_INT(_hw,HW_FLOATINGPT, floatingpoint,
+ CTLFLAG_RD, &hw_float, 0,
+ "Floatingpoint instructions executed in hardware");
+#if 0
+static volatile u_int npx_intrs_while_probing;
+#endif
+static union savefpu npx_cleanstate;
+static bool_t npx_cleanstate_ready;
+static bool_t npx_ex16;
+static bool_t npx_exists;
+static bool_t npx_irq13;
+
+alias_for_inthand_t probetrap;
+#if 0
+__asm(" \n\
+ .text \n\
+ .p2align 2,0x90 \n\
+ .type " __XSTRING(CNAME(probetrap)) ",@function \n\
+" __XSTRING(CNAME(probetrap)) ": \n\
+ ss \n\
+ incl " __XSTRING(CNAME(npx_traps_while_probing)) " \n\
+ fnclex \n\
+ iret \n\
+");
+#endif
+/*
+ * Identify routine. Create a connection point on our parent for probing.
+ */
+static void
+npx_identify(driver, parent)
+ driver_t *driver;
+ device_t parent;
+{
+ device_t child;
+
+ child = BUS_ADD_CHILD(parent, 0, "npx", 0);
+ if (child == NULL)
+ panic("npx_identify");
+}
+#if 0
+/*
+ * Do minimal handling of npx interrupts to convert them to traps.
+ */
+static void
+npx_intr(dummy)
+ void *dummy;
+{
+ struct thread *td;
+
+ npx_intrs_while_probing++;
+
+ /*
+ * The BUSY# latch must be cleared in all cases so that the next
+ * unmasked npx exception causes an interrupt.
+ */
+#ifdef PC98
+ outb(0xf8, 0);
+#else
+ outb(0xf0, 0);
+#endif
+
+ /*
+ * fpcurthread is normally non-null here. In that case, schedule an
+ * AST to finish the exception handling in the correct context
+ * (this interrupt may occur after the thread has entered the
+ * kernel via a syscall or an interrupt). Otherwise, the npx
+ * state of the thread that caused this interrupt must have been
+ * pushed to the thread's pcb, and clearing of the busy latch
+ * above has finished the (essentially null) handling of this
+ * interrupt. Control will eventually return to the instruction
+ * that caused it and it will repeat. We will eventually (usually
+ * soon) win the race to handle the interrupt properly.
+ */
+ td = PCPU_GET(fpcurthread);
+ if (td != NULL) {
+ td->td_pcb->pcb_flags |= PCB_NPXTRAP;
+ mtx_lock_spin(&sched_lock);
+ td->td_flags |= TDF_ASTPENDING;
+ mtx_unlock_spin(&sched_lock);
+ }
+}
+#endif
+
+static int
+npx_probe(device_t dev)
+{
+
+ return 1;
+}
+
+#if 0
+/*
+ * Probe routine. Initialize cr0 to give correct behaviour for [f]wait
+ * whether the device exists or not (XXX should be elsewhere). Set flags
+ * to tell npxattach() what to do. Modify device struct if npx doesn't
+ * need to use interrupts. Return 0 if device exists.
+ */
+static int
+npx_probe(device_t dev)
+{
+ struct gate_descriptor save_idt_npxtrap;
+ struct resource *ioport_res, *irq_res;
+ void *irq_cookie;
+ int ioport_rid, irq_num, irq_rid;
+ u_short control;
+ u_short status;
+
+ save_idt_npxtrap = idt[IDT_MF];
+ setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ ioport_rid = 0;
+ ioport_res = bus_alloc_resource(dev, SYS_RES_IOPORT, &ioport_rid,
+ IO_NPX, IO_NPX, IO_NPXSIZE, RF_ACTIVE);
+ if (ioport_res == NULL)
+ panic("npx: can't get ports");
+#ifdef PC98
+ if (resource_int_value("npx", 0, "irq", &irq_num) != 0)
+ irq_num = 8;
+#else
+ if (resource_int_value("npx", 0, "irq", &irq_num) != 0)
+ irq_num = 13;
+#endif
+ irq_rid = 0;
+ irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &irq_rid, irq_num,
+ irq_num, 1, RF_ACTIVE);
+ if (irq_res == NULL)
+ panic("npx: can't get IRQ");
+ if (bus_setup_intr(dev, irq_res, INTR_TYPE_MISC | INTR_FAST, npx_intr,
+ NULL, &irq_cookie) != 0)
+ panic("npx: can't create intr");
+
+ /*
+ * Partially reset the coprocessor, if any. Some BIOS's don't reset
+ * it after a warm boot.
+ */
+#ifdef PC98
+ outb(0xf8,0);
+#else
+ outb(0xf1, 0); /* full reset on some systems, NOP on others */
+ outb(0xf0, 0); /* clear BUSY# latch */
+#endif
+ /*
+ * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT
+ * instructions. We must set the CR0_MP bit and use the CR0_TS
+ * bit to control the trap, because setting the CR0_EM bit does
+ * not cause WAIT instructions to trap. It's important to trap
+ * WAIT instructions - otherwise the "wait" variants of no-wait
+ * control instructions would degenerate to the "no-wait" variants
+ * after FP context switches but work correctly otherwise. It's
+ * particularly important to trap WAITs when there is no NPX -
+ * otherwise the "wait" variants would always degenerate.
+ *
+ * Try setting CR0_NE to get correct error reporting on 486DX's.
+ * Setting it should fail or do nothing on lesser processors.
+ */
+ load_cr0(rcr0() | CR0_MP | CR0_NE);
+ /*
+ * But don't trap while we're probing.
+ */
+ stop_emulating();
+ /*
+ * Finish resetting the coprocessor, if any. If there is an error
+ * pending, then we may get a bogus IRQ13, but npx_intr() will handle
+ * it OK. Bogus halts have never been observed, but we enabled
+ * IRQ13 and cleared the BUSY# latch early to handle them anyway.
+ */
+ fninit();
+
+ device_set_desc(dev, "math processor");
+
+ /*
+ * Don't use fwait here because it might hang.
+ * Don't use fnop here because it usually hangs if there is no FPU.
+ */
+ DELAY(1000); /* wait for any IRQ13 */
+#ifdef DIAGNOSTIC
+ if (npx_intrs_while_probing != 0)
+ printf("fninit caused %u bogus npx interrupt(s)\n",
+ npx_intrs_while_probing);
+ if (npx_traps_while_probing != 0)
+ printf("fninit caused %u bogus npx trap(s)\n",
+ npx_traps_while_probing);
+#endif
+ /*
+ * Check for a status of mostly zero.
+ */
+ status = 0x5a5a;
+ fnstsw(&status);
+ if ((status & 0xb8ff) == 0) {
+ /*
+ * Good, now check for a proper control word.
+ */
+ control = 0x5a5a;
+ fnstcw(&control);
+ if ((control & 0x1f3f) == 0x033f) {
+ hw_float = npx_exists = 1;
+ /*
+ * We have an npx, now divide by 0 to see if exception
+ * 16 works.
+ */
+ control &= ~(1 << 2); /* enable divide by 0 trap */
+ fldcw(&control);
+#ifdef FPU_ERROR_BROKEN
+ /*
+ * FPU error signal doesn't work on some CPU
+ * accelerator board.
+ */
+ npx_ex16 = 1;
+ return (0);
+#endif
+ npx_traps_while_probing = npx_intrs_while_probing = 0;
+ fp_divide_by_0();
+ if (npx_traps_while_probing != 0) {
+ /*
+ * Good, exception 16 works.
+ */
+ npx_ex16 = 1;
+ goto no_irq13;
+ }
+ if (npx_intrs_while_probing != 0) {
+ /*
+ * Bad, we are stuck with IRQ13.
+ */
+ npx_irq13 = 1;
+ idt[IDT_MF] = save_idt_npxtrap;
+#ifdef SMP
+ if (mp_ncpus > 1)
+ panic("npx0 cannot use IRQ 13 on an SMP system");
+#endif
+ return (0);
+ }
+ /*
+ * Worse, even IRQ13 is broken. Use emulator.
+ */
+ }
+ }
+ /*
+ * Probe failed, but we want to get to npxattach to initialize the
+ * emulator and say that it has been installed. XXX handle devices
+ * that aren't really devices better.
+ */
+#ifdef SMP
+ if (mp_ncpus > 1)
+ panic("npx0 cannot be emulated on an SMP system");
+#endif
+ /* FALLTHROUGH */
+no_irq13:
+ idt[IDT_MF] = save_idt_npxtrap;
+ bus_teardown_intr(dev, irq_res, irq_cookie);
+
+ /*
+ * XXX hack around brokenness of bus_teardown_intr(). If we left the
+ * irq active then we would get it instead of exception 16.
+ */
+ {
+ struct intsrc *isrc;
+
+ isrc = intr_lookup_source(irq_num);
+ isrc->is_pic->pic_disable_source(isrc);
+ }
+
+ bus_release_resource(dev, SYS_RES_IRQ, irq_rid, irq_res);
+ bus_release_resource(dev, SYS_RES_IOPORT, ioport_rid, ioport_res);
+ return (0);
+}
+#endif
+
+/*
+ * Attach routine - announce which it is, and wire into system
+ */
+static int
+npx_attach(device_t dev)
+{
+ int flags;
+ register_t s;
+
+ if (resource_int_value("npx", 0, "flags", &flags) != 0)
+ flags = 0;
+
+ if (flags)
+ device_printf(dev, "flags 0x%x ", flags);
+ if (npx_irq13) {
+ device_printf(dev, "using IRQ 13 interface\n");
+ } else {
+ if (npx_ex16)
+ device_printf(dev, "INT 16 interface\n");
+ else
+ device_printf(dev, "WARNING: no FPU!\n");
+ }
+ npxinit(__INITIAL_NPXCW__);
+
+ if (npx_cleanstate_ready == 0) {
+ s = intr_disable();
+ stop_emulating();
+ fpusave(&npx_cleanstate);
+ start_emulating();
+ npx_cleanstate_ready = 1;
+ intr_restore(s);
+ }
+#ifdef I586_CPU_XXX
+ if (cpu_class == CPUCLASS_586 && npx_ex16 && npx_exists &&
+ timezero("i586_bzero()", i586_bzero) <
+ timezero("bzero()", bzero) * 4 / 5) {
+ if (!(flags & NPX_DISABLE_I586_OPTIMIZED_BCOPY))
+ bcopy_vector = i586_bcopy;
+ if (!(flags & NPX_DISABLE_I586_OPTIMIZED_BZERO))
+ bzero_vector = i586_bzero;
+ if (!(flags & NPX_DISABLE_I586_OPTIMIZED_COPYIO)) {
+ copyin_vector = i586_copyin;
+ copyout_vector = i586_copyout;
+ }
+ }
+#endif
+
+ return (0); /* XXX unused */
+}
+
+/*
+ * Initialize floating point unit.
+ */
+void
+npxinit(control)
+ u_short control;
+{
+ static union savefpu dummy;
+ register_t savecrit;
+
+ if (!npx_exists)
+ return;
+ /*
+ * fninit has the same h/w bugs as fnsave. Use the detoxified
+ * fnsave to throw away any junk in the fpu. npxsave() initializes
+ * the fpu and sets fpcurthread = NULL as important side effects.
+ */
+ savecrit = intr_disable();
+ npxsave(&dummy);
+ stop_emulating();
+#ifdef CPU_ENABLE_SSE
+ /* XXX npxsave() doesn't actually initialize the fpu in the SSE case. */
+ if (cpu_fxsr)
+ fninit();
+#endif
+ fldcw(&control);
+ start_emulating();
+ intr_restore(savecrit);
+}
+
+/*
+ * Free coprocessor (if we have it).
+ */
+void
+npxexit(td)
+ struct thread *td;
+{
+ register_t savecrit;
+
+ savecrit = intr_disable();
+ if (curthread == PCPU_GET(fpcurthread))
+ npxsave(&PCPU_GET(curpcb)->pcb_save);
+ intr_restore(savecrit);
+#ifdef NPX_DEBUG
+ if (npx_exists) {
+ u_int masked_exceptions;
+
+ masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f;
+ /*
+ * Log exceptions that would have trapped with the old
+ * control word (overflow, divide by 0, and invalid operand).
+ */
+ if (masked_exceptions & 0x0d)
+ log(LOG_ERR,
+ "pid %d (%s) exited with masked floating point exceptions 0x%02x\n",
+ td->td_proc->p_pid, td->td_proc->p_comm,
+ masked_exceptions);
+ }
+#endif
+}
+
+int
+npxformat()
+{
+
+ if (!npx_exists)
+ return (_MC_FPFMT_NODEV);
+#ifdef CPU_ENABLE_SSE
+ if (cpu_fxsr)
+ return (_MC_FPFMT_XMM);
+#endif
+ return (_MC_FPFMT_387);
+}
+
+/*
+ * The following mechanism is used to ensure that the FPE_... value
+ * that is passed as a trapcode to the signal handler of the user
+ * process does not have more than one bit set.
+ *
+ * Multiple bits may be set if the user process modifies the control
+ * word while a status word bit is already set. While this is a sign
+ * of bad coding, we have no choise than to narrow them down to one
+ * bit, since we must not send a trapcode that is not exactly one of
+ * the FPE_ macros.
+ *
+ * The mechanism has a static table with 127 entries. Each combination
+ * of the 7 FPU status word exception bits directly translates to a
+ * position in this table, where a single FPE_... value is stored.
+ * This FPE_... value stored there is considered the "most important"
+ * of the exception bits and will be sent as the signal code. The
+ * precedence of the bits is based upon Intel Document "Numerical
+ * Applications", Chapter "Special Computational Situations".
+ *
+ * The macro to choose one of these values does these steps: 1) Throw
+ * away status word bits that cannot be masked. 2) Throw away the bits
+ * currently masked in the control word, assuming the user isn't
+ * interested in them anymore. 3) Reinsert status word bit 7 (stack
+ * fault) if it is set, which cannot be masked but must be presered.
+ * 4) Use the remaining bits to point into the trapcode table.
+ *
+ * The 6 maskable bits in order of their preference, as stated in the
+ * above referenced Intel manual:
+ * 1 Invalid operation (FP_X_INV)
+ * 1a Stack underflow
+ * 1b Stack overflow
+ * 1c Operand of unsupported format
+ * 1d SNaN operand.
+ * 2 QNaN operand (not an exception, irrelavant here)
+ * 3 Any other invalid-operation not mentioned above or zero divide
+ * (FP_X_INV, FP_X_DZ)
+ * 4 Denormal operand (FP_X_DNML)
+ * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL)
+ * 6 Inexact result (FP_X_IMP)
+ */
+static char fpetable[128] = {
+ 0,
+ FPE_FLTINV, /* 1 - INV */
+ FPE_FLTUND, /* 2 - DNML */
+ FPE_FLTINV, /* 3 - INV | DNML */
+ FPE_FLTDIV, /* 4 - DZ */
+ FPE_FLTINV, /* 5 - INV | DZ */
+ FPE_FLTDIV, /* 6 - DNML | DZ */
+ FPE_FLTINV, /* 7 - INV | DNML | DZ */
+ FPE_FLTOVF, /* 8 - OFL */
+ FPE_FLTINV, /* 9 - INV | OFL */
+ FPE_FLTUND, /* A - DNML | OFL */
+ FPE_FLTINV, /* B - INV | DNML | OFL */
+ FPE_FLTDIV, /* C - DZ | OFL */
+ FPE_FLTINV, /* D - INV | DZ | OFL */
+ FPE_FLTDIV, /* E - DNML | DZ | OFL */
+ FPE_FLTINV, /* F - INV | DNML | DZ | OFL */
+ FPE_FLTUND, /* 10 - UFL */
+ FPE_FLTINV, /* 11 - INV | UFL */
+ FPE_FLTUND, /* 12 - DNML | UFL */
+ FPE_FLTINV, /* 13 - INV | DNML | UFL */
+ FPE_FLTDIV, /* 14 - DZ | UFL */
+ FPE_FLTINV, /* 15 - INV | DZ | UFL */
+ FPE_FLTDIV, /* 16 - DNML | DZ | UFL */
+ FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */
+ FPE_FLTOVF, /* 18 - OFL | UFL */
+ FPE_FLTINV, /* 19 - INV | OFL | UFL */
+ FPE_FLTUND, /* 1A - DNML | OFL | UFL */
+ FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */
+ FPE_FLTDIV, /* 1C - DZ | OFL | UFL */
+ FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */
+ FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */
+ FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */
+ FPE_FLTRES, /* 20 - IMP */
+ FPE_FLTINV, /* 21 - INV | IMP */
+ FPE_FLTUND, /* 22 - DNML | IMP */
+ FPE_FLTINV, /* 23 - INV | DNML | IMP */
+ FPE_FLTDIV, /* 24 - DZ | IMP */
+ FPE_FLTINV, /* 25 - INV | DZ | IMP */
+ FPE_FLTDIV, /* 26 - DNML | DZ | IMP */
+ FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */
+ FPE_FLTOVF, /* 28 - OFL | IMP */
+ FPE_FLTINV, /* 29 - INV | OFL | IMP */
+ FPE_FLTUND, /* 2A - DNML | OFL | IMP */
+ FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */
+ FPE_FLTDIV, /* 2C - DZ | OFL | IMP */
+ FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */
+ FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */
+ FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */
+ FPE_FLTUND, /* 30 - UFL | IMP */
+ FPE_FLTINV, /* 31 - INV | UFL | IMP */
+ FPE_FLTUND, /* 32 - DNML | UFL | IMP */
+ FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */
+ FPE_FLTDIV, /* 34 - DZ | UFL | IMP */
+ FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */
+ FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */
+ FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */
+ FPE_FLTOVF, /* 38 - OFL | UFL | IMP */
+ FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */
+ FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */
+ FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */
+ FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */
+ FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */
+ FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */
+ FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */
+ FPE_FLTSUB, /* 40 - STK */
+ FPE_FLTSUB, /* 41 - INV | STK */
+ FPE_FLTUND, /* 42 - DNML | STK */
+ FPE_FLTSUB, /* 43 - INV | DNML | STK */
+ FPE_FLTDIV, /* 44 - DZ | STK */
+ FPE_FLTSUB, /* 45 - INV | DZ | STK */
+ FPE_FLTDIV, /* 46 - DNML | DZ | STK */
+ FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */
+ FPE_FLTOVF, /* 48 - OFL | STK */
+ FPE_FLTSUB, /* 49 - INV | OFL | STK */
+ FPE_FLTUND, /* 4A - DNML | OFL | STK */
+ FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */
+ FPE_FLTDIV, /* 4C - DZ | OFL | STK */
+ FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */
+ FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */
+ FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */
+ FPE_FLTUND, /* 50 - UFL | STK */
+ FPE_FLTSUB, /* 51 - INV | UFL | STK */
+ FPE_FLTUND, /* 52 - DNML | UFL | STK */
+ FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */
+ FPE_FLTDIV, /* 54 - DZ | UFL | STK */
+ FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */
+ FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */
+ FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */
+ FPE_FLTOVF, /* 58 - OFL | UFL | STK */
+ FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */
+ FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */
+ FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */
+ FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */
+ FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */
+ FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */
+ FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */
+ FPE_FLTRES, /* 60 - IMP | STK */
+ FPE_FLTSUB, /* 61 - INV | IMP | STK */
+ FPE_FLTUND, /* 62 - DNML | IMP | STK */
+ FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */
+ FPE_FLTDIV, /* 64 - DZ | IMP | STK */
+ FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */
+ FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */
+ FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */
+ FPE_FLTOVF, /* 68 - OFL | IMP | STK */
+ FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */
+ FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */
+ FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */
+ FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */
+ FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */
+ FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */
+ FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */
+ FPE_FLTUND, /* 70 - UFL | IMP | STK */
+ FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */
+ FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */
+ FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */
+ FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */
+ FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */
+ FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */
+ FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */
+ FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */
+ FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */
+ FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */
+ FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */
+ FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */
+ FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */
+ FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */
+ FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */
+};
+
+/*
+ * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE.
+ *
+ * Clearing exceptions is necessary mainly to avoid IRQ13 bugs. We now
+ * depend on longjmp() restoring a usable state. Restoring the state
+ * or examining it might fail if we didn't clear exceptions.
+ *
+ * The error code chosen will be one of the FPE_... macros. It will be
+ * sent as the second argument to old BSD-style signal handlers and as
+ * "siginfo_t->si_code" (second argument) to SA_SIGINFO signal handlers.
+ *
+ * XXX the FP state is not preserved across signal handlers. So signal
+ * handlers cannot afford to do FP unless they preserve the state or
+ * longjmp() out. Both preserving the state and longjmp()ing may be
+ * destroyed by IRQ13 bugs. Clearing FP exceptions is not an acceptable
+ * solution for signals other than SIGFPE.
+ */
+int
+npxtrap()
+{
+ register_t savecrit;
+ u_short control, status;
+
+ if (!npx_exists) {
+ printf("npxtrap: fpcurthread = %p, curthread = %p, npx_exists = %d\n",
+ PCPU_GET(fpcurthread), curthread, npx_exists);
+ panic("npxtrap from nowhere");
+ }
+ savecrit = intr_disable();
+
+ /*
+ * Interrupt handling (for another interrupt) may have pushed the
+ * state to memory. Fetch the relevant parts of the state from
+ * wherever they are.
+ */
+ if (PCPU_GET(fpcurthread) != curthread) {
+ control = GET_FPU_CW(curthread);
+ status = GET_FPU_SW(curthread);
+ } else {
+ fnstcw(&control);
+ fnstsw(&status);
+ }
+
+ if (PCPU_GET(fpcurthread) == curthread)
+ fnclex();
+ intr_restore(savecrit);
+ return (fpetable[status & ((~control & 0x3f) | 0x40)]);
+}
+
+/*
+ * Implement device not available (DNA) exception
+ *
+ * It would be better to switch FP context here (if curthread != fpcurthread)
+ * and not necessarily for every context switch, but it is too hard to
+ * access foreign pcb's.
+ */
+
+static int err_count = 0;
+
+int
+npxdna()
+{
+ struct pcb *pcb;
+ register_t s;
+ u_short control;
+
+ if (!npx_exists)
+ return (0);
+ if (PCPU_GET(fpcurthread) == curthread) {
+ printf("npxdna: fpcurthread == curthread %d times\n",
+ ++err_count);
+ stop_emulating();
+ return (1);
+ }
+ if (PCPU_GET(fpcurthread) != NULL) {
+ printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n",
+ PCPU_GET(fpcurthread),
+ PCPU_GET(fpcurthread)->td_proc->p_pid,
+ curthread, curthread->td_proc->p_pid);
+ panic("npxdna");
+ }
+ s = intr_disable();
+ stop_emulating();
+ /*
+ * Record new context early in case frstor causes an IRQ13.
+ */
+ PCPU_SET(fpcurthread, curthread);
+ pcb = PCPU_GET(curpcb);
+
+ if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) {
+ /*
+ * This is the first time this thread has used the FPU or
+ * the PCB doesn't contain a clean FPU state. Explicitly
+ * initialize the FPU and load the default control word.
+ */
+ fninit();
+ control = __INITIAL_NPXCW__;
+ fldcw(&control);
+ pcb->pcb_flags |= PCB_NPXINITDONE;
+ } else {
+ /*
+ * The following frstor may cause an IRQ13 when the state
+ * being restored has a pending error. The error will
+ * appear to have been triggered by the current (npx) user
+ * instruction even when that instruction is a no-wait
+ * instruction that should not trigger an error (e.g.,
+ * fnclex). On at least one 486 system all of the no-wait
+ * instructions are broken the same as frstor, so our
+ * treatment does not amplify the breakage. On at least
+ * one 386/Cyrix 387 system, fnclex works correctly while
+ * frstor and fnsave are broken, so our treatment breaks
+ * fnclex if it is the first FPU instruction after a context
+ * switch.
+ */
+ fpurstor(&pcb->pcb_save);
+ }
+ intr_restore(s);
+
+ return (1);
+}
+
+/*
+ * Wrapper for fnsave instruction, partly to handle hardware bugs. When npx
+ * exceptions are reported via IRQ13, spurious IRQ13's may be triggered by
+ * no-wait npx instructions. See the Intel application note AP-578 for
+ * details. This doesn't cause any additional complications here. IRQ13's
+ * are inherently asynchronous unless the CPU is frozen to deliver them --
+ * one that started in userland may be delivered many instructions later,
+ * after the process has entered the kernel. It may even be delivered after
+ * the fnsave here completes. A spurious IRQ13 for the fnsave is handled in
+ * the same way as a very-late-arriving non-spurious IRQ13 from user mode:
+ * it is normally ignored at first because we set fpcurthread to NULL; it is
+ * normally retriggered in npxdna() after return to user mode.
+ *
+ * npxsave() must be called with interrupts disabled, so that it clears
+ * fpcurthread atomically with saving the state. We require callers to do the
+ * disabling, since most callers need to disable interrupts anyway to call
+ * npxsave() atomically with checking fpcurthread.
+ *
+ * A previous version of npxsave() went to great lengths to excecute fnsave
+ * with interrupts enabled in case executing it froze the CPU. This case
+ * can't happen, at least for Intel CPU/NPX's. Spurious IRQ13's don't imply
+ * spurious freezes.
+ */
+void
+npxsave(addr)
+ union savefpu *addr;
+{
+
+ stop_emulating();
+ fpusave(addr);
+
+ start_emulating();
+ PCPU_SET(fpcurthread, NULL);
+ queue_multicall0(__HYPERVISOR_fpu_taskswitch);
+}
+
+/*
+ * This should be called with interrupts disabled and only when the owning
+ * FPU thread is non-null.
+ */
+void
+npxdrop()
+{
+ struct thread *td;
+
+ td = PCPU_GET(fpcurthread);
+ PCPU_SET(fpcurthread, NULL);
+ td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
+ start_emulating();
+}
+
+/*
+ * Get the state of the FPU without dropping ownership (if possible).
+ * It returns the FPU ownership status.
+ */
+int
+npxgetregs(td, addr)
+ struct thread *td;
+ union savefpu *addr;
+{
+ register_t s;
+
+ if (!npx_exists)
+ return (_MC_FPOWNED_NONE);
+
+ if ((td->td_pcb->pcb_flags & PCB_NPXINITDONE) == 0) {
+ if (npx_cleanstate_ready)
+ bcopy(&npx_cleanstate, addr, sizeof(npx_cleanstate));
+ else
+ bzero(addr, sizeof(*addr));
+ return (_MC_FPOWNED_NONE);
+ }
+ s = intr_disable();
+ if (td == PCPU_GET(fpcurthread)) {
+ fpusave(addr);
+#ifdef CPU_ENABLE_SSE
+ if (!cpu_fxsr)
+#endif
+ /*
+ * fnsave initializes the FPU and destroys whatever
+ * context it contains. Make sure the FPU owner
+ * starts with a clean state next time.
+ */
+ npxdrop();
+ intr_restore(s);
+ return (_MC_FPOWNED_FPU);
+ } else {
+ intr_restore(s);
+ bcopy(&td->td_pcb->pcb_save, addr, sizeof(*addr));
+ return (_MC_FPOWNED_PCB);
+ }
+}
+
+/*
+ * Set the state of the FPU.
+ */
+void
+npxsetregs(td, addr)
+ struct thread *td;
+ union savefpu *addr;
+{
+ register_t s;
+
+ if (!npx_exists)
+ return;
+
+ s = intr_disable();
+ if (td == PCPU_GET(fpcurthread)) {
+ fpurstor(addr);
+ intr_restore(s);
+ } else {
+ intr_restore(s);
+ bcopy(addr, &td->td_pcb->pcb_save, sizeof(*addr));
+ }
+ curthread->td_pcb->pcb_flags |= PCB_NPXINITDONE;
+}
+
+static void
+fpusave(addr)
+ union savefpu *addr;
+{
+
+#ifdef CPU_ENABLE_SSE
+ if (cpu_fxsr)
+ fxsave(addr);
+ else
+#endif
+ fnsave(addr);
+}
+
+static void
+fpurstor(addr)
+ union savefpu *addr;
+{
+
+#ifdef CPU_ENABLE_SSE
+ if (cpu_fxsr)
+ fxrstor(addr);
+ else
+#endif
+ frstor(addr);
+}
+
+#ifdef I586_CPU_XXX
+static long
+timezero(funcname, func)
+ const char *funcname;
+ void (*func)(void *buf, size_t len);
+
+{
+ void *buf;
+#define BUFSIZE 1048576
+ long usec;
+ struct timeval finish, start;
+
+ buf = malloc(BUFSIZE, M_TEMP, M_NOWAIT);
+ if (buf == NULL)
+ return (BUFSIZE);
+ microtime(&start);
+ (*func)(buf, BUFSIZE);
+ microtime(&finish);
+ usec = 1000000 * (finish.tv_sec - start.tv_sec) +
+ finish.tv_usec - start.tv_usec;
+ if (usec <= 0)
+ usec = 1;
+ if (bootverbose)
+ printf("%s bandwidth = %u kBps\n", funcname,
+ (u_int32_t)(((BUFSIZE >> 10) * 1000000) / usec));
+ free(buf, M_TEMP);
+ return (usec);
+}
+#endif /* I586_CPU */
+
+static device_method_t npx_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_identify, npx_identify),
+ DEVMETHOD(device_probe, npx_probe),
+ DEVMETHOD(device_attach, npx_attach),
+ DEVMETHOD(device_detach, bus_generic_detach),
+ DEVMETHOD(device_shutdown, bus_generic_shutdown),
+ DEVMETHOD(device_suspend, bus_generic_suspend),
+ DEVMETHOD(device_resume, bus_generic_resume),
+
+ { 0, 0 }
+};
+
+static driver_t npx_driver = {
+ "npx",
+ npx_methods,
+ 1, /* no softc */
+};
+
+static devclass_t npx_devclass;
+DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0);
+
+#ifdef DEV_ISA
+/*
+ * We prefer to attach to the root nexus so that the usual case (exception 16)
+ * doesn't describe the processor as being `on isa'.
+ */
+DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0);
+
+/*
+ * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI.
+ */
+static struct isa_pnp_id npxisa_ids[] = {
+ { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */
+ { 0 }
+};
+
+static int
+npxisa_probe(device_t dev)
+{
+ int result;
+ if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) {
+ device_quiet(dev);
+ }
+ return(result);
+}
+
+static int
+npxisa_attach(device_t dev)
+{
+ return (0);
+}
+
+static device_method_t npxisa_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, npxisa_probe),
+ DEVMETHOD(device_attach, npxisa_attach),
+ DEVMETHOD(device_detach, bus_generic_detach),
+ DEVMETHOD(device_shutdown, bus_generic_shutdown),
+ DEVMETHOD(device_suspend, bus_generic_suspend),
+ DEVMETHOD(device_resume, bus_generic_resume),
+
+ { 0, 0 }
+};
+
+static driver_t npxisa_driver = {
+ "npxisa",
+ npxisa_methods,
+ 1, /* no softc */
+};
+
+static devclass_t npxisa_devclass;
+
+DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0);
+#ifndef PC98
+DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0);
+#endif
+#endif /* DEV_ISA */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c b/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c
new file mode 100644
index 0000000000..e25f218eb3
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c
@@ -0,0 +1,1436 @@
+/*
+ *
+ * Copyright (c) 2004 Kip Macy
+ * All rights reserved.
+ *
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "opt_nfsroot.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sockio.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <net/ethernet.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+
+#include <net/bpf.h>
+
+#include <net/if_types.h>
+#include <net/if_vlan_var.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/clock.h> /* for DELAY */
+#include <machine/bus_memio.h>
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <machine/frame.h>
+
+
+#include <sys/bus.h>
+#include <sys/rman.h>
+
+#include <machine/intr_machdep.h>
+
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/hypervisor-ifs.h>
+#include <machine/xen_intr.h>
+#include <machine/evtchn.h>
+#include <machine/ctrl_if.h>
+
+struct xn_softc;
+static void xn_txeof(struct xn_softc *);
+static void xn_rxeof(struct xn_softc *);
+static void xn_alloc_rx_buffers(struct xn_softc *);
+
+static void xn_tick_locked(struct xn_softc *);
+static void xn_tick(void *);
+
+static void xn_intr(void *);
+static void xn_start_locked(struct ifnet *);
+static void xn_start(struct ifnet *);
+static int xn_ioctl(struct ifnet *, u_long, caddr_t);
+static void xn_ifinit_locked(struct xn_softc *);
+static void xn_ifinit(void *);
+static void xn_stop(struct xn_softc *);
+#ifdef notyet
+static void xn_watchdog(struct ifnet *);
+#endif
+/* Xenolinux helper functions */
+static void network_connect(struct xn_softc *, netif_fe_interface_status_t *);
+static void create_netdev(int handle, struct xn_softc **);
+static void netif_ctrlif_rx(ctrl_msg_t *,unsigned long);
+
+static void xn_free_rx_ring(struct xn_softc *);
+
+static void xn_free_tx_ring(struct xn_softc *);
+
+
+
+/* XXX: This isn't supported in FreeBSD, so ignore it for now. */
+#define TASK_UNINTERRUPTIBLE 0
+#define INVALID_P2M_ENTRY (~0UL)
+
+/*
+ * If the backend driver is pipelining transmit requests then we can be very
+ * aggressive in avoiding new-packet notifications -- only need to send a
+ * notification if there are no outstanding unreceived responses.
+ * If the backend may be buffering our transmit buffers for any reason then we
+ * are rather more conservative.
+ */
+#ifdef CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER
+#define TX_TEST_IDX resp_prod /* aggressive: any outstanding responses? */
+#else
+#define TX_TEST_IDX req_cons /* conservative: not seen all our requests? */
+#endif
+
+/*
+ * Mbuf pointers. We need these to keep track of the virtual addresses
+ * of our mbuf chains since we can only convert from virtual to physical,
+ * not the other way around. The size must track the free index arrays.
+ */
+struct xn_chain_data {
+ struct mbuf *xn_tx_chain[NETIF_TX_RING_SIZE+1];
+ struct mbuf *xn_rx_chain[NETIF_RX_RING_SIZE+1];
+};
+
+struct xn_softc {
+ struct arpcom arpcom; /* interface info */
+ device_t xn_dev;
+ SLIST_ENTRY(xn_softc) xn_links;
+ struct mtx xn_mtx;
+ void *xn_intrhand;
+ struct resource *xn_res;
+ u_int8_t xn_ifno; /* interface number */
+ struct xn_chain_data xn_cdata; /* mbufs */
+
+ netif_tx_interface_t *xn_tx_if;
+ netif_rx_interface_t *xn_rx_if;
+
+ int xn_if_flags;
+ int xn_txcnt;
+ int xn_rxbufcnt;
+ struct callout xn_stat_ch;
+ unsigned int xn_irq;
+ unsigned int xn_evtchn;
+
+
+ /* What is the status of our connection to the remote backend? */
+#define BEST_CLOSED 0
+#define BEST_DISCONNECTED 1
+#define BEST_CONNECTED 2
+ unsigned int xn_backend_state;
+
+ /* Is this interface open or closed (down or up)? */
+#define UST_CLOSED 0
+#define UST_OPEN 1
+ unsigned int xn_user_state;
+
+ /* Receive-ring batched refills. */
+#define RX_MIN_TARGET 64 /* XXX: larger than linux. was causing packet
+ * loss at the default of 8.
+ */
+#define RX_MAX_TARGET NETIF_RX_RING_SIZE
+ int xn_rx_target; /* number to allocate */
+ struct mbuf *xn_rx_batch; /* head of the batch queue */
+ struct mbuf *xn_rx_batchtail;
+ int xn_rx_batchlen; /* how many queued */
+
+ int xn_rx_resp_cons;
+ int xn_tx_resp_cons;
+ unsigned short xn_rx_free_idxs[NETIF_RX_RING_SIZE+1];
+ unsigned short xn_tx_free_idxs[NETIF_RX_RING_SIZE+1];
+};
+
+static unsigned long xn_rx_pfns[NETIF_RX_RING_SIZE];
+static multicall_entry_t xn_rx_mcl[NETIF_RX_RING_SIZE+1];
+static mmu_update_t xn_rx_mmu[NETIF_RX_RING_SIZE];
+
+static SLIST_HEAD(, xn_softc) xn_dev_list =
+ SLIST_HEAD_INITIALIZER(xn_dev_list);
+
+#define XN_LOCK_INIT(_sc, _name) \
+ mtx_init(&(_sc)->xn_mtx, _name, MTX_NETWORK_LOCK, MTX_DEF)
+#define XN_LOCK(_sc) mtx_lock(&(_sc)->xn_mtx)
+#define XN_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->xn_mtx, MA_OWNED)
+#define XN_UNLOCK(_sc) mtx_unlock(&(_sc)->xn_mtx)
+#define XN_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->xn_mtx)
+
+/* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */
+#define ADD_ID_TO_FREELIST(_list, _id) \
+ (_list)[(_id)] = (_list)[0]; \
+ (_list)[0] = (_id);
+#define GET_ID_FROM_FREELIST(_list) \
+ ({ unsigned short _id = (_list)[0]; \
+ (_list)[0] = (_list)[_id]; \
+ (unsigned short)_id; })
+#define FREELIST_EMPTY(_list, _maxid) \
+ ((_list)[0] == (_maxid+1))
+
+static char *status_name[] = {
+ [NETIF_INTERFACE_STATUS_CLOSED] = "closed",
+ [NETIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
+ [NETIF_INTERFACE_STATUS_CONNECTED] = "connected",
+ [NETIF_INTERFACE_STATUS_CHANGED] = "changed",
+};
+
+static char *be_state_name[] = {
+ [BEST_CLOSED] = "closed",
+ [BEST_DISCONNECTED] = "disconnected",
+ [BEST_CONNECTED] = "connected",
+};
+
+#define IPRINTK(fmt, args...) \
+ printk("[XEN] " fmt, ##args)
+#define WPRINTK(fmt, args...) \
+ printk("[XEN] " fmt, ##args)
+
+static struct xn_softc *
+find_sc_by_handle(unsigned int handle)
+{
+ struct xn_softc *sc;
+ SLIST_FOREACH(sc, &xn_dev_list, xn_links)
+ {
+ if ( sc->xn_ifno == handle )
+ return sc;
+ }
+ return NULL;
+}
+
+/** Network interface info. */
+struct netif_ctrl {
+ /** Number of interfaces. */
+ int interface_n;
+ /** Number of connected interfaces. */
+ int connected_n;
+ /** Error code. */
+ int err;
+ int up;
+};
+
+static struct netif_ctrl netctrl;
+
+static void
+netctrl_init(void)
+{
+ /*
+ * netctrl is already in bss, why are we setting it?
+ */
+ memset(&netctrl, 0, sizeof(netctrl));
+ netctrl.up = NETIF_DRIVER_STATUS_DOWN;
+}
+
+/** Get or set a network interface error.
+ */
+static int
+netctrl_err(int err)
+{
+ if ( (err < 0) && !netctrl.err )
+ netctrl.err = err;
+ return netctrl.err;
+}
+
+/** Test if all network interfaces are connected.
+ *
+ * @return 1 if all connected, 0 if not, negative error code otherwise
+ */
+static int
+netctrl_connected(void)
+{
+ int ok;
+
+ if (netctrl.err)
+ ok = netctrl.err;
+ else if (netctrl.up == NETIF_DRIVER_STATUS_UP)
+ ok = (netctrl.connected_n == netctrl.interface_n);
+ else
+ ok = 0;
+
+ return ok;
+}
+
+/** Count the connected network interfaces.
+ *
+ * @return connected count
+ */
+static int
+netctrl_connected_count(void)
+{
+
+ struct xn_softc *sc;
+ unsigned int connected;
+
+ connected = 0;
+
+ SLIST_FOREACH(sc, &xn_dev_list, xn_links)
+ {
+ if ( sc->xn_backend_state == BEST_CONNECTED )
+ connected++;
+ }
+
+ netctrl.connected_n = connected;
+ XENPRINTF("> connected_n=%d interface_n=%d\n",
+ netctrl.connected_n, netctrl.interface_n);
+ return connected;
+}
+
+static __inline struct mbuf*
+makembuf (struct mbuf *buf)
+{
+ struct mbuf *m = NULL;
+
+ MGETHDR (m, M_DONTWAIT, MT_DATA);
+
+ if (! m)
+ return 0;
+
+ M_MOVE_PKTHDR(m, buf);
+
+ MCLGET (m, M_DONTWAIT);
+
+ m->m_pkthdr.len = buf->m_pkthdr.len;
+ m->m_len = buf->m_len;
+ m_copydata(buf, 0, buf->m_pkthdr.len, mtod(m,caddr_t) );
+ m->m_ext.ext_args = (vm_paddr_t *)vtophys(mtod(m,caddr_t));
+
+ return m;
+}
+
+
+
+static void
+xn_free_rx_ring(struct xn_softc *sc)
+{
+#if 0
+ int i;
+
+ for (i = 0; i < NETIF_RX_RING_SIZE; i++) {
+ if (sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(i)] != NULL) {
+ m_freem(sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(i)]);
+ sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(i)] = NULL;
+ }
+ }
+
+ sc->xn_rx_resp_cons = 0;
+ sc->xn_rx_if->req_prod = 0;
+ sc->xn_rx_if->event = sc->xn_rx_resp_cons ;
+#endif
+}
+
+static void
+xn_free_tx_ring(struct xn_softc *sc)
+{
+#if 0
+ int i;
+
+ for (i = 0; i < NETIF_TX_RING_SIZE; i++) {
+ if (sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(i)] != NULL) {
+ m_freem(sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(i)]);
+ sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(i)] = NULL;
+ }
+ }
+
+ return;
+#endif
+}
+
+static void
+xn_alloc_rx_buffers(struct xn_softc *sc)
+{
+ unsigned short id;
+ struct mbuf *m_new, *next;
+ int i, batch_target;
+ NETIF_RING_IDX req_prod = sc->xn_rx_if->req_prod;
+
+ if (unlikely(sc->xn_backend_state != BEST_CONNECTED) )
+ return;
+
+ /*
+ * Allocate skbuffs greedily, even though we batch updates to the
+ * receive ring. This creates a less bursty demand on the memory allocator,
+ * so should reduce the chance of failed allocation requests both for
+ * ourself and for other kernel subsystems.
+ */
+ batch_target = sc->xn_rx_target - (req_prod - sc->xn_rx_resp_cons);
+ for ( i = sc->xn_rx_batchlen; i < batch_target; i++, sc->xn_rx_batchlen++) {
+ MGETHDR(m_new, M_DONTWAIT, MT_DATA);
+ if (m_new == NULL)
+ break;
+
+ MCLGET(m_new, M_DONTWAIT);
+ if (!(m_new->m_flags & M_EXT)) {
+ m_freem(m_new);
+ break;
+ }
+ m_new->m_len = m_new->m_pkthdr.len = MCLBYTES;
+
+ /* queue the mbufs allocated */
+ if (!sc->xn_rx_batch)
+ sc->xn_rx_batch = m_new;
+
+ if (sc->xn_rx_batchtail)
+ sc->xn_rx_batchtail->m_next = m_new;
+ sc->xn_rx_batchtail = m_new;
+ }
+
+ /* Is the batch large enough to be worthwhile? */
+ if ( i < (sc->xn_rx_target/2) )
+ return;
+
+ for (i = 0, m_new = sc->xn_rx_batch; m_new;
+ i++, sc->xn_rx_batchlen--, m_new = next) {
+
+ next = m_new->m_next;
+ m_new->m_next = NULL;
+
+ m_new->m_ext.ext_args = (vm_paddr_t *)vtophys(m_new->m_ext.ext_buf);
+
+ id = GET_ID_FROM_FREELIST(sc->xn_rx_free_idxs);
+ KASSERT(id != 0, ("alloc_rx_buffers: found free receive index of 0\n"));
+ sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(id)] = m_new;
+
+ sc->xn_rx_if->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
+
+ xn_rx_pfns[i] = vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT;
+
+ /* Remove this page from pseudo phys map before passing back to Xen. */
+ xen_phys_machine[((unsigned long)m_new->m_ext.ext_args >> PAGE_SHIFT)]
+ = INVALID_P2M_ENTRY;
+
+ xn_rx_mcl[i].op = __HYPERVISOR_update_va_mapping;
+ xn_rx_mcl[i].args[0] = (unsigned long)mtod(m_new,vm_offset_t)
+ >> PAGE_SHIFT;
+ xn_rx_mcl[i].args[1] = 0;
+ xn_rx_mcl[i].args[2] = 0;
+
+ }
+
+ KASSERT(i, ("no mbufs processed")); /* should have returned earlier */
+ KASSERT(sc->xn_rx_batchlen == 0, ("not all mbufs processed"));
+ sc->xn_rx_batch = sc->xn_rx_batchtail = NULL;
+
+ /*
+ * We may have allocated buffers which have entries outstanding
+ in the page * update queue -- make sure we flush those first! */
+ PT_UPDATES_FLUSH();
+
+ /* After all PTEs have been zapped we blow away stale TLB entries. */
+ xn_rx_mcl[i-1].args[2] = UVMF_FLUSH_TLB;
+
+ /* Give away a batch of pages. */
+ xn_rx_mcl[i].op = __HYPERVISOR_dom_mem_op;
+ xn_rx_mcl[i].args[0] = (unsigned long) MEMOP_decrease_reservation;
+ xn_rx_mcl[i].args[1] = (unsigned long)xn_rx_pfns;
+ xn_rx_mcl[i].args[2] = (unsigned long)i;
+ xn_rx_mcl[i].args[3] = 0;
+ xn_rx_mcl[i].args[4] = DOMID_SELF;
+
+ /* Zap PTEs and give away pages in one big multicall. */
+ (void)HYPERVISOR_multicall(xn_rx_mcl, i+1);
+
+ /* Check return status of HYPERVISOR_dom_mem_op(). */
+ if ( xn_rx_mcl[i].args[5] != i )
+ panic("Unable to reduce memory reservation\n");
+
+ /* Above is a suitable barrier to ensure backend will see requests. */
+ sc->xn_rx_if->req_prod = req_prod + i;
+
+ /* Adjust our floating fill target if we risked running out of buffers. */
+ if ( ((req_prod - sc->xn_rx_if->resp_prod) < (sc->xn_rx_target / 4)) &&
+ ((sc->xn_rx_target *= 2) > RX_MAX_TARGET) )
+ sc->xn_rx_target = RX_MAX_TARGET;
+}
+
+static void
+xn_rxeof(struct xn_softc *sc)
+{
+ struct ifnet *ifp;
+ netif_rx_response_t *rx;
+ NETIF_RING_IDX i, rp;
+ mmu_update_t *mmu = xn_rx_mmu;
+ multicall_entry_t *mcl = xn_rx_mcl;
+ struct mbuf *tail_mbuf = NULL, *head_mbuf = NULL, *m, *next;
+
+ XN_LOCK_ASSERT(sc);
+ if (sc->xn_backend_state != BEST_CONNECTED)
+ return;
+
+ ifp = &sc->arpcom.ac_if;
+
+ rp = sc->xn_rx_if->resp_prod;
+ rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+ for (i = sc->xn_rx_resp_cons; i != rp; i++) {
+
+ rx = &sc->xn_rx_if->ring[MASK_NETIF_RX_IDX(i)].resp;
+ KASSERT(rx->id != 0, ("xn_rxeof: found free receive index of 0\n"));
+
+ /*
+ * An error here is very odd. Usually indicates a backend bug,
+ * low-memory condition, or that we didn't have reservation headroom.
+ * Whatever - print an error and queue the id again straight away.
+ */
+ if (unlikely(rx->status <= 0)) {
+ printk("bad buffer on RX ring!(%d)\n", rx->status);
+ sc->xn_rx_if->ring[MASK_NETIF_RX_IDX(sc->xn_rx_if->req_prod)].req.id
+ = rx->id;
+ wmb();
+ sc->xn_rx_if->req_prod++;
+ continue;
+ }
+
+ m = (struct mbuf *)
+ sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(rx->id)];
+ if (m->m_next)
+ panic("mbuf is already part of a valid mbuf chain");
+ ADD_ID_TO_FREELIST(sc->xn_rx_free_idxs, rx->id);
+
+ m->m_data += (rx->addr & PAGE_MASK);
+ m->m_pkthdr.len = m->m_len = rx->status;
+ m->m_pkthdr.rcvif = ifp;
+
+ /* Remap the page. */
+ mmu->ptr = (rx->addr & ~PAGE_MASK) | MMU_MACHPHYS_UPDATE;
+ mmu->val = (unsigned long)m->m_ext.ext_args >> PAGE_SHIFT;
+ mmu++;
+ mcl->op = __HYPERVISOR_update_va_mapping;
+ mcl->args[0] = (unsigned long)m->m_data >> PAGE_SHIFT;
+ mcl->args[1] = (rx->addr & ~PAGE_MASK) | PG_KERNEL;
+ mcl->args[2] = 0;
+ mcl++;
+
+ xen_phys_machine[((unsigned long)m->m_ext.ext_args >> PAGE_SHIFT)] =
+ (rx->addr >> PAGE_SHIFT);
+
+ if (unlikely(!head_mbuf))
+ head_mbuf = m;
+
+ if (tail_mbuf)
+ tail_mbuf->m_next = m;
+ tail_mbuf = m;
+
+ sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(rx->id)] = NULL;
+ sc->xn_rxbufcnt++;
+ }
+
+ /* Do all the remapping work, and M->P updates, in one big hypercall. */
+ if (likely((mcl - xn_rx_mcl) != 0)) {
+ mcl->op = __HYPERVISOR_mmu_update;
+ mcl->args[0] = (unsigned long)xn_rx_mmu;
+ mcl->args[1] = mmu - xn_rx_mmu;
+ mcl->args[2] = 0;
+ mcl++;
+ (void)HYPERVISOR_multicall(xn_rx_mcl, mcl - xn_rx_mcl);
+ }
+
+
+ /*
+ * Process all the mbufs after the remapping is complete.
+ * Break the mbuf chain first though.
+ */
+ for (m = head_mbuf; m; m = next) {
+ next = m->m_next;
+ m->m_next = NULL;
+
+ ifp->if_ipackets++;
+
+ XN_UNLOCK(sc);
+
+ /* Pass it up. */
+ (*ifp->if_input)(ifp, m);
+ XN_LOCK(sc);
+ }
+
+ sc->xn_rx_resp_cons = i;
+
+ /* If we get a callback with very few responses, reduce fill target. */
+ /* NB. Note exponential increase, linear decrease. */
+ if (((sc->xn_rx_if->req_prod - sc->xn_rx_if->resp_prod) >
+ ((3*sc->xn_rx_target) / 4)) && (--sc->xn_rx_target < RX_MIN_TARGET))
+ sc->xn_rx_target = RX_MIN_TARGET;
+
+ xn_alloc_rx_buffers(sc);
+
+ sc->xn_rx_if->event = i + 1;
+}
+
+static void
+xn_txeof(struct xn_softc *sc)
+{
+ NETIF_RING_IDX i, prod;
+ unsigned short id;
+ struct ifnet *ifp;
+ struct mbuf *m;
+
+ XN_LOCK_ASSERT(sc);
+
+ if (sc->xn_backend_state != BEST_CONNECTED)
+ return;
+
+ ifp = &sc->arpcom.ac_if;
+ ifp->if_timer = 0;
+
+ do {
+ prod = sc->xn_tx_if->resp_prod;
+
+ for (i = sc->xn_tx_resp_cons; i != prod; i++) {
+ id = sc->xn_tx_if->ring[MASK_NETIF_TX_IDX(i)].resp.id;
+ m = sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(id)];
+
+ KASSERT(m != NULL, ("mbuf not found in xn_tx_chain"));
+ M_ASSERTVALID(m);
+
+ m_freem(m);
+ sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(id)] = NULL;
+ ADD_ID_TO_FREELIST(sc->xn_tx_free_idxs, id);
+ sc->xn_txcnt--;
+ }
+ sc->xn_tx_resp_cons = prod;
+
+ /*
+ * Set a new event, then check for race with update of tx_cons. Note
+ * that it is essential to schedule a callback, no matter how few
+ * buffers are pending. Even if there is space in the transmit ring,
+ * higher layers may be blocked because too much data is outstanding:
+ * in such cases notification from Xen is likely to be the only kick
+ * that we'll get.
+ */
+ sc->xn_tx_if->event =
+ prod + ((sc->xn_tx_if->req_prod - prod) >> 1) + 1;
+
+ mb();
+
+ } while (prod != sc->xn_tx_if->resp_prod);
+}
+
+static void
+xn_intr(void *xsc)
+{
+ struct xn_softc *sc = xsc;
+ struct ifnet *ifp = &sc->arpcom.ac_if;
+
+ XN_LOCK(sc);
+
+ /* sometimes we seem to lose packets. stay in the interrupt handler while
+ * there is stuff to process: continually recheck the response producer.
+ */
+ do {
+ xn_txeof(sc);
+
+ if (sc->xn_rx_resp_cons != sc->xn_rx_if->resp_prod &&
+ sc->xn_user_state == UST_OPEN)
+ xn_rxeof(sc);
+
+ if (ifp->if_flags & IFF_RUNNING && ifp->if_snd.ifq_head != NULL)
+ xn_start_locked(ifp);
+ } while (sc->xn_rx_resp_cons != sc->xn_rx_if->resp_prod &&
+ sc->xn_user_state == UST_OPEN);
+
+ XN_UNLOCK(sc);
+ return;
+}
+
+static void
+xn_tick_locked(struct xn_softc *sc)
+{
+ XN_LOCK_ASSERT(sc);
+ callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
+
+ /* XXX placeholder for printing debug information */
+
+}
+
+
+static void
+xn_tick(void *xsc)
+{
+ struct xn_softc *sc;
+
+ sc = xsc;
+ XN_LOCK(sc);
+ xn_tick_locked(sc);
+ XN_UNLOCK(sc);
+
+}
+static void
+xn_start_locked(struct ifnet *ifp)
+{
+ unsigned short id;
+ struct mbuf *m_head, *new_m;
+ struct xn_softc *sc = ifp->if_softc;
+ netif_tx_request_t *tx;
+ NETIF_RING_IDX i, start;
+
+ if (sc->xn_backend_state != BEST_CONNECTED)
+ return;
+
+ for (i = start = sc->xn_tx_if->req_prod; TRUE; i++, sc->xn_txcnt++) {
+
+ IF_DEQUEUE(&ifp->if_snd, m_head);
+ if (m_head == NULL)
+ break;
+
+ if (FREELIST_EMPTY(sc->xn_tx_free_idxs, NETIF_TX_RING_SIZE)) {
+ IF_PREPEND(&ifp->if_snd, m_head);
+ ifp->if_flags |= IFF_OACTIVE;
+ break;
+ }
+
+ i = sc->xn_tx_if->req_prod;
+
+ id = GET_ID_FROM_FREELIST(sc->xn_tx_free_idxs);
+
+ /*
+ * Start packing the mbufs in this chain into
+ * the fragment pointers. Stop when we run out
+ * of fragments or hit the end of the mbuf chain.
+ */
+ new_m = makembuf(m_head);
+ tx = &(sc->xn_tx_if->ring[MASK_NETIF_TX_IDX(i)].req);
+ tx->id = id;
+ tx->size = new_m->m_pkthdr.len;
+ new_m->m_next = NULL;
+ new_m->m_nextpkt = NULL;
+
+ m_freem(m_head);
+ tx->addr = vtomach(mtod(new_m, vm_offset_t));
+
+ sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(id)] = new_m;
+ BPF_MTAP(ifp, new_m);
+ }
+
+ sc->xn_tx_if->req_prod = i;
+ xn_txeof(sc);
+
+ /* Only notify Xen if we really have to. */
+ if (sc->xn_tx_if->TX_TEST_IDX == start)
+ notify_via_evtchn(sc->xn_evtchn);
+ return;
+}
+
+static void
+xn_start(struct ifnet *ifp)
+{
+ struct xn_softc *sc;
+ sc = ifp->if_softc;
+ XN_LOCK(sc);
+ xn_start_locked(ifp);
+ XN_UNLOCK(sc);
+}
+
+
+
+/* equivalent of network_open() in Linux */
+static void
+xn_ifinit_locked(struct xn_softc *sc)
+{
+ struct ifnet *ifp;
+
+ XN_LOCK_ASSERT(sc);
+
+ ifp = &sc->arpcom.ac_if;
+
+ if (ifp->if_flags & IFF_RUNNING)
+ return;
+
+ xn_stop(sc);
+
+ sc->xn_user_state = UST_OPEN;
+
+ xn_alloc_rx_buffers(sc);
+ sc->xn_rx_if->event = sc->xn_rx_resp_cons + 1;
+
+ ifp->if_flags |= IFF_RUNNING;
+ ifp->if_flags &= ~IFF_OACTIVE;
+
+ callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
+
+}
+
+
+static void
+xn_ifinit(void *xsc)
+{
+ struct xn_softc *sc = xsc;
+
+ XN_LOCK(sc);
+ xn_ifinit_locked(sc);
+ XN_UNLOCK(sc);
+
+}
+
+
+static int
+xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct xn_softc *sc = ifp->if_softc;
+ struct ifreq *ifr = (struct ifreq *) data;
+ int mask, error = 0;
+ switch(cmd) {
+ case SIOCSIFMTU:
+ /* XXX can we alter the MTU on a VN ?*/
+#ifdef notyet
+ if (ifr->ifr_mtu > XN_JUMBO_MTU)
+ error = EINVAL;
+ else
+#endif
+ {
+ ifp->if_mtu = ifr->ifr_mtu;
+ ifp->if_flags &= ~IFF_RUNNING;
+ xn_ifinit(sc);
+ }
+ break;
+ case SIOCSIFFLAGS:
+ XN_LOCK(sc);
+ if (ifp->if_flags & IFF_UP) {
+ /*
+ * If only the state of the PROMISC flag changed,
+ * then just use the 'set promisc mode' command
+ * instead of reinitializing the entire NIC. Doing
+ * a full re-init means reloading the firmware and
+ * waiting for it to start up, which may take a
+ * second or two.
+ */
+#ifdef notyet
+ /* No promiscuous mode with Xen */
+ if (ifp->if_flags & IFF_RUNNING &&
+ ifp->if_flags & IFF_PROMISC &&
+ !(sc->xn_if_flags & IFF_PROMISC)) {
+ XN_SETBIT(sc, XN_RX_MODE,
+ XN_RXMODE_RX_PROMISC);
+ } else if (ifp->if_flags & IFF_RUNNING &&
+ !(ifp->if_flags & IFF_PROMISC) &&
+ sc->xn_if_flags & IFF_PROMISC) {
+ XN_CLRBIT(sc, XN_RX_MODE,
+ XN_RXMODE_RX_PROMISC);
+ } else
+#endif
+ xn_ifinit_locked(sc);
+ } else {
+ if (ifp->if_flags & IFF_RUNNING) {
+ xn_stop(sc);
+ }
+ }
+ sc->xn_if_flags = ifp->if_flags;
+ XN_UNLOCK(sc);
+ error = 0;
+ break;
+ case SIOCSIFCAP:
+ mask = ifr->ifr_reqcap ^ ifp->if_capenable;
+ if (mask & IFCAP_HWCSUM) {
+ if (IFCAP_HWCSUM & ifp->if_capenable)
+ ifp->if_capenable &= ~IFCAP_HWCSUM;
+ else
+ ifp->if_capenable |= IFCAP_HWCSUM;
+ }
+ error = 0;
+ break;
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+#ifdef notyet
+ if (ifp->if_flags & IFF_RUNNING) {
+ XN_LOCK(sc);
+ xn_setmulti(sc);
+ XN_UNLOCK(sc);
+ error = 0;
+ }
+#endif
+ /* FALLTHROUGH */
+ case SIOCSIFMEDIA:
+ case SIOCGIFMEDIA:
+ error = EINVAL;
+ break;
+ default:
+ error = ether_ioctl(ifp, cmd, data);
+ }
+
+ return (error);
+}
+
+static void
+xn_stop(struct xn_softc *sc)
+{
+ struct ifnet *ifp;
+
+ XN_LOCK_ASSERT(sc);
+
+ ifp = &sc->arpcom.ac_if;
+
+ callout_stop(&sc->xn_stat_ch);
+
+ xn_free_rx_ring(sc);
+ xn_free_tx_ring(sc);
+
+ ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
+}
+
+/* START of Xenolinux helper functions adapted to FreeBSD */
+static void
+network_connect(struct xn_softc *sc, netif_fe_interface_status_t *status)
+{
+ struct ifnet *ifp;
+ int i, requeue_idx;
+ netif_tx_request_t *tx;
+
+ XN_LOCK(sc);
+
+ ifp = &sc->arpcom.ac_if;
+ /* first time through, setup the ifp info */
+ if (ifp->if_softc == NULL) {
+ ifp->if_softc = sc;
+ if_initname(ifp, "xn", sc->xn_ifno);
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX;
+ ifp->if_ioctl = xn_ioctl;
+ ifp->if_output = ether_output;
+ ifp->if_start = xn_start;
+#ifdef notyet
+ ifp->if_watchdog = xn_watchdog;
+#endif
+ ifp->if_init = xn_ifinit;
+ ifp->if_mtu = ETHERMTU;
+ ifp->if_snd.ifq_maxlen = NETIF_TX_RING_SIZE - 1;
+
+#ifdef notyet
+ ifp->if_hwassist = XN_CSUM_FEATURES;
+ ifp->if_capabilities = IFCAP_HWCSUM;
+ ifp->if_capenable = ifp->if_capabilities;
+#endif
+
+ ether_ifattach(ifp, sc->arpcom.ac_enaddr);
+ callout_init(&sc->xn_stat_ch, CALLOUT_MPSAFE);
+ }
+
+ /* Recovery procedure: */
+
+ /* Step 1: Reinitialise variables. */
+ sc->xn_rx_resp_cons = sc->xn_tx_resp_cons = 0;
+ sc->xn_rxbufcnt = sc->xn_txcnt = 0;
+ sc->xn_rx_if->event = sc->xn_tx_if->event = 1;
+
+ /* Step 2: Rebuild the RX and TX ring contents.
+ * NB. We could just free the queued TX packets now but we hope
+ * that sending them out might do some good. We have to rebuild
+ * the RX ring because some of our pages are currently flipped out
+ * so we can't just free the RX skbs.
+ */
+
+ /* Rebuild the TX buffer freelist and the TX ring itself.
+ * NB. This reorders packets. We could keep more private state
+ * to avoid this but maybe it doesn't matter so much given the
+ * interface has been down.
+ */
+ for ( requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++ )
+ {
+ if (sc->xn_cdata.xn_tx_chain[i] != NULL)
+ {
+ struct mbuf *m = sc->xn_cdata.xn_tx_chain[i];
+
+ tx = &sc->xn_tx_if->ring[requeue_idx++].req;
+
+ tx->id = i;
+ tx->addr = vtomach(mtod(m, vm_offset_t));
+ tx->size = m->m_pkthdr.len;
+ sc->xn_txcnt++;
+ }
+ }
+ wmb();
+ sc->xn_tx_if->req_prod = requeue_idx;
+
+ /* Rebuild the RX buffer freelist and the RX ring itself. */
+ for ( requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++ )
+ if (sc->xn_cdata.xn_rx_chain[i] != NULL)
+ sc->xn_rx_if->ring[requeue_idx++].req.id = i;
+ wmb();
+ sc->xn_rx_if->req_prod = requeue_idx;
+
+ printk("[XEN] Netfront recovered tx=%d rxfree=%d\n",
+ sc->xn_tx_if->req_prod,sc->xn_rx_if->req_prod);
+
+
+ /* Step 3: All public and private state should now be sane. Get
+ * ready to start sending and receiving packets and give the driver
+ * domain a kick because we've probably just requeued some
+ * packets.
+ */
+ sc->xn_backend_state = BEST_CONNECTED;
+ wmb();
+ notify_via_evtchn(status->evtchn);
+ xn_txeof(sc);
+
+ XN_UNLOCK(sc);
+}
+
+
+static void
+vif_show(struct xn_softc *sc)
+{
+#if DEBUG
+ if (sc) {
+ IPRINTK("<vif handle=%u %s(%s) evtchn=%u irq=%u tx=%p rx=%p>\n",
+ sc->xn_ifno,
+ be_state_name[sc->xn_backend_state],
+ sc->xn_user_state ? "open" : "closed",
+ sc->xn_evtchn,
+ sc->xn_irq,
+ sc->xn_tx_if,
+ sc->xn_rx_if);
+ } else {
+ IPRINTK("<vif NULL>\n");
+ }
+#endif
+}
+
+/* Send a connect message to xend to tell it to bring up the interface. */
+static void
+send_interface_connect(struct xn_softc *sc)
+{
+ ctrl_msg_t cmsg = {
+ .type = CMSG_NETIF_FE,
+ .subtype = CMSG_NETIF_FE_INTERFACE_CONNECT,
+ .length = sizeof(netif_fe_interface_connect_t),
+ };
+ netif_fe_interface_connect_t *msg = (void*)cmsg.msg;
+
+ vif_show(sc);
+ msg->handle = sc->xn_ifno;
+ msg->tx_shmem_frame = (vtomach(sc->xn_tx_if) >> PAGE_SHIFT);
+ msg->rx_shmem_frame = (vtomach(sc->xn_rx_if) >> PAGE_SHIFT);
+
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
+
+/* Send a driver status notification to the domain controller. */
+static int
+send_driver_status(int ok)
+{
+ int err = 0;
+ ctrl_msg_t cmsg = {
+ .type = CMSG_NETIF_FE,
+ .subtype = CMSG_NETIF_FE_DRIVER_STATUS,
+ .length = sizeof(netif_fe_driver_status_t),
+ };
+ netif_fe_driver_status_t *msg = (void*)cmsg.msg;
+
+ msg->status = (ok ? NETIF_DRIVER_STATUS_UP : NETIF_DRIVER_STATUS_DOWN);
+ err = ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+ return err;
+}
+
+/* Stop network device and free tx/rx queues and irq.
+ */
+static void
+vif_release(struct xn_softc *sc)
+{
+ /* Stop old i/f to prevent errors whilst we rebuild the state. */
+ XN_LOCK(sc);
+ /* sc->xn_backend_state = BEST_DISCONNECTED; */
+ XN_UNLOCK(sc);
+
+ /* Free resources. */
+ if(sc->xn_tx_if != NULL) {
+ unbind_evtchn_from_irq(sc->xn_evtchn);
+ free(sc->xn_tx_if, M_DEVBUF);
+ free(sc->xn_rx_if, M_DEVBUF);
+ sc->xn_irq = 0;
+ sc->xn_evtchn = 0;
+ sc->xn_tx_if = NULL;
+ sc->xn_rx_if = NULL;
+ }
+}
+
+/* Release vif resources and close it down completely.
+ */
+static void
+vif_close(struct xn_softc *sc)
+{
+ vif_show(sc);
+ WPRINTK("Unexpected netif-CLOSED message in state %s\n",
+ be_state_name[sc->xn_backend_state]);
+ vif_release(sc);
+ sc->xn_backend_state = BEST_CLOSED;
+ /* todo: take dev down and free. */
+ vif_show(sc);
+}
+
+/* Move the vif into disconnected state.
+ * Allocates tx/rx pages.
+ * Sends connect message to xend.
+ */
+static void
+vif_disconnect(struct xn_softc *sc)
+{
+ if (sc->xn_tx_if) free(sc->xn_tx_if, M_DEVBUF);
+ if (sc->xn_rx_if) free(sc->xn_rx_if, M_DEVBUF);
+
+ // Before this sc->xn_tx_if and sc->xn_rx_if had better be null.
+ sc->xn_tx_if = (netif_tx_interface_t *)malloc(PAGE_SIZE,M_DEVBUF,M_WAITOK);
+ sc->xn_rx_if = (netif_rx_interface_t *)malloc(PAGE_SIZE,M_DEVBUF,M_WAITOK);
+ memset(sc->xn_tx_if, 0, PAGE_SIZE);
+ memset(sc->xn_rx_if, 0, PAGE_SIZE);
+ sc->xn_backend_state = BEST_DISCONNECTED;
+ send_interface_connect(sc);
+ vif_show(sc);
+}
+
+/* Begin interface recovery.
+ *
+ * NB. Whilst we're recovering, we turn the carrier state off. We
+ * take measures to ensure that this device isn't used for
+ * anything. We also stop the queue for this device. Various
+ * different approaches (e.g. continuing to buffer packets) have
+ * been tested but don't appear to improve the overall impact on
+ * TCP connections.
+ *
+ * TODO: (MAW) Change the Xend<->Guest protocol so that a recovery
+ * is initiated by a special "RESET" message - disconnect could
+ * just mean we're not allowed to use this interface any more.
+ */
+static void
+vif_reset(struct xn_softc *sc)
+{
+ IPRINTK("Attempting to reconnect network interface: handle=%u\n",
+ sc->xn_ifno);
+ vif_release(sc);
+ vif_disconnect(sc);
+ vif_show(sc);
+}
+
+/* Move the vif into connected state.
+ * Sets the mac and event channel from the message.
+ * Binds the irq to the event channel.
+ */
+static void
+vif_connect(
+ struct xn_softc *sc, netif_fe_interface_status_t *status)
+{
+ memcpy(sc->arpcom.ac_enaddr, status->mac, ETHER_ADDR_LEN);
+ network_connect(sc, status);
+
+ sc->xn_evtchn = status->evtchn;
+ sc->xn_irq = bind_evtchn_to_irq(sc->xn_evtchn);
+
+ (void)intr_add_handler("xn", sc->xn_irq, (driver_intr_t *)xn_intr, sc,
+ INTR_TYPE_NET | INTR_MPSAFE, &sc->xn_intrhand);
+ netctrl_connected_count();
+ /* vif_wake(dev); Not needed for FreeBSD */
+ vif_show(sc);
+}
+
+/** Create a network device.
+ * @param handle device handle
+ */
+static void
+create_netdev(int handle, struct xn_softc **sc)
+{
+ int i;
+
+ *sc = (struct xn_softc *)malloc(sizeof(**sc), M_DEVBUF, M_WAITOK);
+ memset(*sc, 0, sizeof(struct xn_softc));
+
+ (*sc)->xn_backend_state = BEST_CLOSED;
+ (*sc)->xn_user_state = UST_CLOSED;
+ (*sc)->xn_ifno = handle;
+
+ XN_LOCK_INIT(*sc, "xnetif");
+ (*sc)->xn_rx_target = RX_MIN_TARGET;
+
+ /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
+ for ( i = 0; i <= NETIF_TX_RING_SIZE; i++ )
+ (*sc)->xn_tx_free_idxs[i] = (i+1);
+ for ( i = 0; i <= NETIF_RX_RING_SIZE; i++ )
+ (*sc)->xn_rx_free_idxs[i] = (i+1);
+
+ SLIST_INSERT_HEAD(&xn_dev_list, *sc, xn_links);
+}
+
+/* Get the target interface for a status message.
+ * Creates the interface when it makes sense.
+ * The returned interface may be null when there is no error.
+ *
+ * @param status status message
+ * @param sc return parameter for interface state
+ * @return 0 on success, error code otherwise
+ */
+static int
+target_vif(netif_fe_interface_status_t *status, struct xn_softc **sc)
+{
+ int err = 0;
+
+ XENPRINTF("> handle=%d\n", status->handle);
+ if ( status->handle < 0 )
+ {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ if ( (*sc = find_sc_by_handle(status->handle)) != NULL )
+ goto exit;
+
+ if ( status->status == NETIF_INTERFACE_STATUS_CLOSED )
+ goto exit;
+ if ( status->status == NETIF_INTERFACE_STATUS_CHANGED )
+ goto exit;
+
+ /* It's a new interface in a good state - create it. */
+ XENPRINTF("> create device...\n");
+ create_netdev(status->handle, sc);
+ netctrl.interface_n++;
+
+exit:
+ return err;
+}
+
+/* Handle an interface status message. */
+static void
+netif_interface_status(netif_fe_interface_status_t *status)
+{
+ int err = 0;
+ struct xn_softc *sc = NULL;
+
+ XENPRINTF("> status=%s handle=%d\n",
+ status_name[status->status], status->handle);
+
+ if ( (err = target_vif(status, &sc)) != 0 )
+ {
+ WPRINTK("Invalid netif: handle=%u\n", status->handle);
+ return;
+ }
+
+ if ( sc == NULL )
+ {
+ XENPRINTF("> no vif\n");
+ return;
+ }
+
+ vif_show(sc);
+
+ switch ( status->status )
+ {
+ case NETIF_INTERFACE_STATUS_CLOSED:
+ switch ( sc->xn_backend_state )
+ {
+ case BEST_CLOSED:
+ case BEST_DISCONNECTED:
+ case BEST_CONNECTED:
+ vif_close(sc);
+ break;
+ }
+ break;
+
+ case NETIF_INTERFACE_STATUS_DISCONNECTED:
+ switch ( sc->xn_backend_state )
+ {
+ case BEST_CLOSED:
+ vif_disconnect(sc);
+ break;
+ case BEST_DISCONNECTED:
+ case BEST_CONNECTED:
+ vif_reset(sc);
+ break;
+ }
+ break;
+
+ case NETIF_INTERFACE_STATUS_CONNECTED:
+ switch ( sc->xn_backend_state )
+ {
+ case BEST_CLOSED:
+ WPRINTK("Unexpected netif status %s in state %s\n",
+ status_name[status->status],
+ be_state_name[sc->xn_backend_state]);
+ vif_disconnect(sc);
+ vif_connect(sc, status);
+ break;
+ case BEST_DISCONNECTED:
+ vif_connect(sc, status);
+ break;
+ }
+ break;
+
+ case NETIF_INTERFACE_STATUS_CHANGED:
+ /*
+ * The domain controller is notifying us that a device has been
+ * added or removed.
+ */
+ break;
+
+ default:
+ WPRINTK("Invalid netif status code %d\n", status->status);
+ break;
+ }
+ vif_show(sc);
+}
+
+/*
+ * Initialize the network control interface.
+ */
+static void
+netif_driver_status(netif_fe_driver_status_t *status)
+{
+ XENPRINTF("> status=%d\n", status->status);
+ netctrl.up = status->status;
+ //netctrl.interface_n = status->max_handle;
+ //netctrl.connected_n = 0;
+ netctrl_connected_count();
+}
+
+/* Receive handler for control messages. */
+static void
+netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+
+ switch ( msg->subtype )
+ {
+ case CMSG_NETIF_FE_INTERFACE_STATUS:
+ if ( msg->length != sizeof(netif_fe_interface_status_t) )
+ goto error;
+ netif_interface_status((netif_fe_interface_status_t *)
+ &msg->msg[0]);
+ break;
+
+ case CMSG_NETIF_FE_DRIVER_STATUS:
+ if ( msg->length != sizeof(netif_fe_driver_status_t) )
+ goto error;
+ netif_driver_status((netif_fe_driver_status_t *)
+ &msg->msg[0]);
+ break;
+
+ error:
+ default:
+ msg->length = 0;
+ break;
+ }
+
+ ctrl_if_send_response(msg);
+}
+
+#if 1
+/* Wait for all interfaces to be connected.
+ *
+ * This works OK, but we'd like to use the probing mode (see below).
+ */
+static int probe_interfaces(void)
+{
+ int err = 0, conn = 0;
+ int wait_i, wait_n = 100;
+
+ for ( wait_i = 0; wait_i < wait_n; wait_i++)
+ {
+ XENPRINTF("> wait_i=%d\n", wait_i);
+ conn = netctrl_connected();
+ if(conn) break;
+ tsleep(&xn_dev_list, PWAIT | PCATCH, "netif", hz);
+ }
+
+ XENPRINTF("> wait finished...\n");
+ if ( conn <= 0 )
+ {
+ err = netctrl_err(-ENETDOWN);
+ WPRINTK("Failed to connect all virtual interfaces: err=%d\n", err);
+ }
+
+ XENPRINTF("< err=%d\n", err);
+
+ return err;
+}
+#else
+/* Probe for interfaces until no more are found.
+ *
+ * This is the mode we'd like to use, but at the moment it panics the kernel.
+*/
+static int
+probe_interfaces(void)
+{
+ int err = 0;
+ int wait_i, wait_n = 100;
+ ctrl_msg_t cmsg = {
+ .type = CMSG_NETIF_FE,
+ .subtype = CMSG_NETIF_FE_INTERFACE_STATUS,
+ .length = sizeof(netif_fe_interface_status_t),
+ };
+ netif_fe_interface_status_t msg = {};
+ ctrl_msg_t rmsg = {};
+ netif_fe_interface_status_t *reply = (void*)rmsg.msg;
+ int state = TASK_UNINTERRUPTIBLE;
+ uint32_t query = -1;
+
+
+ netctrl.interface_n = 0;
+ for ( wait_i = 0; wait_i < wait_n; wait_i++ )
+ {
+ XENPRINTF("> wait_i=%d query=%d\n", wait_i, query);
+ msg.handle = query;
+ memcpy(cmsg.msg, &msg, sizeof(msg));
+ XENPRINTF("> set_current_state...\n");
+ set_current_state(state);
+ XENPRINTF("> rmsg=%p msg=%p, reply=%p\n", &rmsg, rmsg.msg, reply);
+ XENPRINTF("> sending...\n");
+ err = ctrl_if_send_message_and_get_response(&cmsg, &rmsg, state);
+ XENPRINTF("> err=%d\n", err);
+ if(err) goto exit;
+ XENPRINTF("> rmsg=%p msg=%p, reply=%p\n", &rmsg, rmsg.msg, reply);
+ if((int)reply->handle < 0){
+ // No more interfaces.
+ break;
+ }
+ query = -reply->handle - 2;
+ XENPRINTF(">netif_interface_status ...\n");
+ netif_interface_status(reply);
+ }
+
+ exit:
+ if ( err )
+ {
+ err = netctrl_err(-ENETDOWN);
+ WPRINTK("Connecting virtual network interfaces failed: err=%d\n", err);
+ }
+
+ XENPRINTF("< err=%d\n", err);
+ return err;
+}
+
+#endif
+
+static void
+xn_init(void *unused)
+{
+
+ int err = 0;
+
+ netctrl_init();
+ (void)ctrl_if_register_receiver(CMSG_NETIF_FE, netif_ctrlif_rx,
+ CALLBACK_IN_BLOCKING_CONTEXT);
+
+ send_driver_status(1);
+ err = probe_interfaces();
+
+ if (err)
+ ctrl_if_unregister_receiver(CMSG_NETIF_FE, netif_ctrlif_rx);
+}
+
+SYSINIT(xndev, SI_SUB_PSEUDO, SI_ORDER_ANY, xn_init, NULL)
diff --git a/freebsd-5.3-xen-sparse/kern/kern_fork.c b/freebsd-5.3-xen-sparse/kern/kern_fork.c
new file mode 100644
index 0000000000..4b38ee45b6
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/kern/kern_fork.c
@@ -0,0 +1,846 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/kern/kern_fork.c,v 1.234.2.4 2004/09/18 04:11:35 julian Exp $");
+
+#include "opt_ktrace.h"
+#include "opt_mac.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sysproto.h>
+#include <sys/eventhandler.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/sysctl.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/pioctl.h>
+#include <sys/resourcevar.h>
+#include <sys/sched.h>
+#include <sys/syscall.h>
+#include <sys/vmmeter.h>
+#include <sys/vnode.h>
+#include <sys/acct.h>
+#include <sys/mac.h>
+#include <sys/ktr.h>
+#include <sys/ktrace.h>
+#include <sys/unistd.h>
+#include <sys/sx.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_extern.h>
+#include <vm/uma.h>
+
+#include <sys/user.h>
+#include <machine/critical.h>
+
+#ifndef _SYS_SYSPROTO_H_
+struct fork_args {
+ int dummy;
+};
+#endif
+
+static int forksleep; /* Place for fork1() to sleep on. */
+
+/*
+ * MPSAFE
+ */
+/* ARGSUSED */
+int
+fork(td, uap)
+ struct thread *td;
+ struct fork_args *uap;
+{
+ int error;
+ struct proc *p2;
+
+ error = fork1(td, RFFDG | RFPROC, 0, &p2);
+ if (error == 0) {
+ td->td_retval[0] = p2->p_pid;
+ td->td_retval[1] = 0;
+ }
+ return (error);
+}
+
+/*
+ * MPSAFE
+ */
+/* ARGSUSED */
+int
+vfork(td, uap)
+ struct thread *td;
+ struct vfork_args *uap;
+{
+ int error;
+ struct proc *p2;
+
+ error = fork1(td, RFFDG | RFPROC /* | RFPPWAIT | RFMEM */, 0, &p2);
+ if (error == 0) {
+ td->td_retval[0] = p2->p_pid;
+ td->td_retval[1] = 0;
+ }
+ return (error);
+}
+
+/*
+ * MPSAFE
+ */
+int
+rfork(td, uap)
+ struct thread *td;
+ struct rfork_args *uap;
+{
+ struct proc *p2;
+ int error;
+
+ /* Don't allow kernel-only flags. */
+ if ((uap->flags & RFKERNELONLY) != 0)
+ return (EINVAL);
+
+ error = fork1(td, uap->flags, 0, &p2);
+ if (error == 0) {
+ td->td_retval[0] = p2 ? p2->p_pid : 0;
+ td->td_retval[1] = 0;
+ }
+ return (error);
+}
+
+int nprocs = 1; /* process 0 */
+int lastpid = 0;
+SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0,
+ "Last used PID");
+
+/*
+ * Random component to lastpid generation. We mix in a random factor to make
+ * it a little harder to predict. We sanity check the modulus value to avoid
+ * doing it in critical paths. Don't let it be too small or we pointlessly
+ * waste randomness entropy, and don't let it be impossibly large. Using a
+ * modulus that is too big causes a LOT more process table scans and slows
+ * down fork processing as the pidchecked caching is defeated.
+ */
+static int randompid = 0;
+
+static int
+sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
+{
+ int error, pid;
+
+ error = sysctl_wire_old_buffer(req, sizeof(int));
+ if (error != 0)
+ return(error);
+ sx_xlock(&allproc_lock);
+ pid = randompid;
+ error = sysctl_handle_int(oidp, &pid, 0, req);
+ if (error == 0 && req->newptr != NULL) {
+ if (pid < 0 || pid > PID_MAX - 100) /* out of range */
+ pid = PID_MAX - 100;
+ else if (pid < 2) /* NOP */
+ pid = 0;
+ else if (pid < 100) /* Make it reasonable */
+ pid = 100;
+ randompid = pid;
+ }
+ sx_xunlock(&allproc_lock);
+ return (error);
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
+ 0, 0, sysctl_kern_randompid, "I", "Random PID modulus");
+
+int
+fork1(td, flags, pages, procp)
+ struct thread *td;
+ int flags;
+ int pages;
+ struct proc **procp;
+{
+ struct proc *p1, *p2, *pptr;
+ uid_t uid;
+ struct proc *newproc;
+ int ok, trypid;
+ static int curfail, pidchecked = 0;
+ static struct timeval lastfail;
+ struct filedesc *fd;
+ struct filedesc_to_leader *fdtol;
+ struct thread *td2;
+ struct ksegrp *kg2;
+ struct sigacts *newsigacts;
+ int error;
+
+ /* Can't copy and clear. */
+ if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
+ return (EINVAL);
+
+ p1 = td->td_proc;
+
+ /*
+ * Here we don't create a new process, but we divorce
+ * certain parts of a process from itself.
+ */
+ if ((flags & RFPROC) == 0) {
+ mtx_lock(&Giant);
+ vm_forkproc(td, NULL, NULL, flags);
+ mtx_unlock(&Giant);
+
+ /*
+ * Close all file descriptors.
+ */
+ if (flags & RFCFDG) {
+ struct filedesc *fdtmp;
+ FILEDESC_LOCK(td->td_proc->p_fd);
+ fdtmp = fdinit(td->td_proc->p_fd);
+ FILEDESC_UNLOCK(td->td_proc->p_fd);
+ fdfree(td);
+ p1->p_fd = fdtmp;
+ }
+
+ /*
+ * Unshare file descriptors (from parent).
+ */
+ if (flags & RFFDG) {
+ FILEDESC_LOCK(p1->p_fd);
+ if (p1->p_fd->fd_refcnt > 1) {
+ struct filedesc *newfd;
+
+ newfd = fdcopy(td->td_proc->p_fd);
+ FILEDESC_UNLOCK(p1->p_fd);
+ fdfree(td);
+ p1->p_fd = newfd;
+ } else
+ FILEDESC_UNLOCK(p1->p_fd);
+ }
+ *procp = NULL;
+ return (0);
+ }
+
+ /*
+ * Note 1:1 allows for forking with one thread coming out on the
+ * other side with the expectation that the process is about to
+ * exec.
+ */
+ if (p1->p_flag & P_HADTHREADS) {
+ /*
+ * Idle the other threads for a second.
+ * Since the user space is copied, it must remain stable.
+ * In addition, all threads (from the user perspective)
+ * need to either be suspended or in the kernel,
+ * where they will try restart in the parent and will
+ * be aborted in the child.
+ */
+ PROC_LOCK(p1);
+ if (thread_single(SINGLE_NO_EXIT)) {
+ /* Abort. Someone else is single threading before us. */
+ PROC_UNLOCK(p1);
+ return (ERESTART);
+ }
+ PROC_UNLOCK(p1);
+ /*
+ * All other activity in this process
+ * is now suspended at the user boundary,
+ * (or other safe places if we think of any).
+ */
+ }
+
+ /* Allocate new proc. */
+ newproc = uma_zalloc(proc_zone, M_WAITOK);
+#ifdef MAC
+ mac_init_proc(newproc);
+#endif
+ knlist_init(&newproc->p_klist, &newproc->p_mtx);
+
+ /* We have to lock the process tree while we look for a pid. */
+ sx_slock(&proctree_lock);
+
+ /*
+ * Although process entries are dynamically created, we still keep
+ * a global limit on the maximum number we will create. Don't allow
+ * a nonprivileged user to use the last ten processes; don't let root
+ * exceed the limit. The variable nprocs is the current number of
+ * processes, maxproc is the limit.
+ */
+ sx_xlock(&allproc_lock);
+ uid = td->td_ucred->cr_ruid;
+ if ((nprocs >= maxproc - 10 &&
+ suser_cred(td->td_ucred, SUSER_RUID) != 0) ||
+ nprocs >= maxproc) {
+ error = EAGAIN;
+ goto fail;
+ }
+
+ /*
+ * Increment the count of procs running with this uid. Don't allow
+ * a nonprivileged user to exceed their current limit.
+ */
+ PROC_LOCK(p1);
+ ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1,
+ (uid != 0) ? lim_cur(p1, RLIMIT_NPROC) : 0);
+ PROC_UNLOCK(p1);
+ if (!ok) {
+ error = EAGAIN;
+ goto fail;
+ }
+
+ /*
+ * Increment the nprocs resource before blocking can occur. There
+ * are hard-limits as to the number of processes that can run.
+ */
+ nprocs++;
+
+ /*
+ * Find an unused process ID. We remember a range of unused IDs
+ * ready to use (from lastpid+1 through pidchecked-1).
+ *
+ * If RFHIGHPID is set (used during system boot), do not allocate
+ * low-numbered pids.
+ */
+ trypid = lastpid + 1;
+ if (flags & RFHIGHPID) {
+ if (trypid < 10)
+ trypid = 10;
+ } else {
+ if (randompid)
+ trypid += arc4random() % randompid;
+ }
+retry:
+ /*
+ * If the process ID prototype has wrapped around,
+ * restart somewhat above 0, as the low-numbered procs
+ * tend to include daemons that don't exit.
+ */
+ if (trypid >= PID_MAX) {
+ trypid = trypid % PID_MAX;
+ if (trypid < 100)
+ trypid += 100;
+ pidchecked = 0;
+ }
+ if (trypid >= pidchecked) {
+ int doingzomb = 0;
+
+ pidchecked = PID_MAX;
+ /*
+ * Scan the active and zombie procs to check whether this pid
+ * is in use. Remember the lowest pid that's greater
+ * than trypid, so we can avoid checking for a while.
+ */
+ p2 = LIST_FIRST(&allproc);
+again:
+ for (; p2 != NULL; p2 = LIST_NEXT(p2, p_list)) {
+ PROC_LOCK(p2);
+ while (p2->p_pid == trypid ||
+ (p2->p_pgrp != NULL &&
+ (p2->p_pgrp->pg_id == trypid ||
+ (p2->p_session != NULL &&
+ p2->p_session->s_sid == trypid)))) {
+ trypid++;
+ if (trypid >= pidchecked) {
+ PROC_UNLOCK(p2);
+ goto retry;
+ }
+ }
+ if (p2->p_pid > trypid && pidchecked > p2->p_pid)
+ pidchecked = p2->p_pid;
+ if (p2->p_pgrp != NULL) {
+ if (p2->p_pgrp->pg_id > trypid &&
+ pidchecked > p2->p_pgrp->pg_id)
+ pidchecked = p2->p_pgrp->pg_id;
+ if (p2->p_session != NULL &&
+ p2->p_session->s_sid > trypid &&
+ pidchecked > p2->p_session->s_sid)
+ pidchecked = p2->p_session->s_sid;
+ }
+ PROC_UNLOCK(p2);
+ }
+ if (!doingzomb) {
+ doingzomb = 1;
+ p2 = LIST_FIRST(&zombproc);
+ goto again;
+ }
+ }
+ sx_sunlock(&proctree_lock);
+
+ /*
+ * RFHIGHPID does not mess with the lastpid counter during boot.
+ */
+ if (flags & RFHIGHPID)
+ pidchecked = 0;
+ else
+ lastpid = trypid;
+
+ p2 = newproc;
+ p2->p_state = PRS_NEW; /* protect against others */
+ p2->p_pid = trypid;
+ LIST_INSERT_HEAD(&allproc, p2, p_list);
+ LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
+ sx_xunlock(&allproc_lock);
+
+ /*
+ * Malloc things while we don't hold any locks.
+ */
+ if (flags & RFSIGSHARE)
+ newsigacts = NULL;
+ else
+ newsigacts = sigacts_alloc();
+
+ /*
+ * Copy filedesc.
+ */
+ if (flags & RFCFDG) {
+ FILEDESC_LOCK(td->td_proc->p_fd);
+ fd = fdinit(td->td_proc->p_fd);
+ FILEDESC_UNLOCK(td->td_proc->p_fd);
+ fdtol = NULL;
+ } else if (flags & RFFDG) {
+ FILEDESC_LOCK(p1->p_fd);
+ fd = fdcopy(td->td_proc->p_fd);
+ FILEDESC_UNLOCK(p1->p_fd);
+ fdtol = NULL;
+ } else {
+ fd = fdshare(p1->p_fd);
+ if (p1->p_fdtol == NULL)
+ p1->p_fdtol =
+ filedesc_to_leader_alloc(NULL,
+ NULL,
+ p1->p_leader);
+ if ((flags & RFTHREAD) != 0) {
+ /*
+ * Shared file descriptor table and
+ * shared process leaders.
+ */
+ fdtol = p1->p_fdtol;
+ FILEDESC_LOCK(p1->p_fd);
+ fdtol->fdl_refcount++;
+ FILEDESC_UNLOCK(p1->p_fd);
+ } else {
+ /*
+ * Shared file descriptor table, and
+ * different process leaders
+ */
+ fdtol = filedesc_to_leader_alloc(p1->p_fdtol,
+ p1->p_fd,
+ p2);
+ }
+ }
+ /*
+ * Make a proc table entry for the new process.
+ * Start by zeroing the section of proc that is zero-initialized,
+ * then copy the section that is copied directly from the parent.
+ */
+ td2 = FIRST_THREAD_IN_PROC(p2);
+ kg2 = FIRST_KSEGRP_IN_PROC(p2);
+
+ /* Allocate and switch to an alternate kstack if specified. */
+ if (pages != 0)
+ vm_thread_new_altkstack(td2, pages);
+
+ PROC_LOCK(p2);
+ PROC_LOCK(p1);
+
+#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
+
+ bzero(&p2->p_startzero,
+ (unsigned) RANGEOF(struct proc, p_startzero, p_endzero));
+ bzero(&td2->td_startzero,
+ (unsigned) RANGEOF(struct thread, td_startzero, td_endzero));
+ bzero(&kg2->kg_startzero,
+ (unsigned) RANGEOF(struct ksegrp, kg_startzero, kg_endzero));
+
+ bcopy(&p1->p_startcopy, &p2->p_startcopy,
+ (unsigned) RANGEOF(struct proc, p_startcopy, p_endcopy));
+ bcopy(&td->td_startcopy, &td2->td_startcopy,
+ (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy));
+ bcopy(&td->td_ksegrp->kg_startcopy, &kg2->kg_startcopy,
+ (unsigned) RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy));
+#undef RANGEOF
+
+ td2->td_sigstk = td->td_sigstk;
+
+ /*
+ * Duplicate sub-structures as needed.
+ * Increase reference counts on shared objects.
+ * The p_stats substruct is set in vm_forkproc.
+ */
+ p2->p_flag = 0;
+ if (p1->p_flag & P_PROFIL)
+ startprofclock(p2);
+ mtx_lock_spin(&sched_lock);
+ p2->p_sflag = PS_INMEM;
+ /*
+ * Allow the scheduler to adjust the priority of the child and
+ * parent while we hold the sched_lock.
+ */
+ sched_fork(td, td2);
+
+ mtx_unlock_spin(&sched_lock);
+ p2->p_ucred = crhold(td->td_ucred);
+ td2->td_ucred = crhold(p2->p_ucred); /* XXXKSE */
+
+ pargs_hold(p2->p_args);
+
+ if (flags & RFSIGSHARE) {
+ p2->p_sigacts = sigacts_hold(p1->p_sigacts);
+ } else {
+ sigacts_copy(newsigacts, p1->p_sigacts);
+ p2->p_sigacts = newsigacts;
+ }
+ if (flags & RFLINUXTHPN)
+ p2->p_sigparent = SIGUSR1;
+ else
+ p2->p_sigparent = SIGCHLD;
+
+ p2->p_textvp = p1->p_textvp;
+ p2->p_fd = fd;
+ p2->p_fdtol = fdtol;
+
+ /*
+ * p_limit is copy-on-write. Bump its refcount.
+ */
+ p2->p_limit = lim_hold(p1->p_limit);
+ PROC_UNLOCK(p1);
+ PROC_UNLOCK(p2);
+
+ /* Bump references to the text vnode (for procfs) */
+ if (p2->p_textvp)
+ vref(p2->p_textvp);
+
+ /*
+ * Set up linkage for kernel based threading.
+ */
+ if ((flags & RFTHREAD) != 0) {
+ mtx_lock(&ppeers_lock);
+ p2->p_peers = p1->p_peers;
+ p1->p_peers = p2;
+ p2->p_leader = p1->p_leader;
+ mtx_unlock(&ppeers_lock);
+ PROC_LOCK(p1->p_leader);
+ if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
+ PROC_UNLOCK(p1->p_leader);
+ /*
+ * The task leader is exiting, so process p1 is
+ * going to be killed shortly. Since p1 obviously
+ * isn't dead yet, we know that the leader is either
+ * sending SIGKILL's to all the processes in this
+ * task or is sleeping waiting for all the peers to
+ * exit. We let p1 complete the fork, but we need
+ * to go ahead and kill the new process p2 since
+ * the task leader may not get a chance to send
+ * SIGKILL to it. We leave it on the list so that
+ * the task leader will wait for this new process
+ * to commit suicide.
+ */
+ PROC_LOCK(p2);
+ psignal(p2, SIGKILL);
+ PROC_UNLOCK(p2);
+ } else
+ PROC_UNLOCK(p1->p_leader);
+ } else {
+ p2->p_peers = NULL;
+ p2->p_leader = p2;
+ }
+
+ sx_xlock(&proctree_lock);
+ PGRP_LOCK(p1->p_pgrp);
+ PROC_LOCK(p2);
+ PROC_LOCK(p1);
+
+ /*
+ * Preserve some more flags in subprocess. P_PROFIL has already
+ * been preserved.
+ */
+ p2->p_flag |= p1->p_flag & P_SUGID;
+ td2->td_pflags |= td->td_pflags & TDP_ALTSTACK;
+ SESS_LOCK(p1->p_session);
+ if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
+ p2->p_flag |= P_CONTROLT;
+ SESS_UNLOCK(p1->p_session);
+ if (flags & RFPPWAIT)
+ p2->p_flag |= P_PPWAIT;
+
+ p2->p_pgrp = p1->p_pgrp;
+ LIST_INSERT_AFTER(p1, p2, p_pglist);
+ PGRP_UNLOCK(p1->p_pgrp);
+ LIST_INIT(&p2->p_children);
+
+ callout_init(&p2->p_itcallout, CALLOUT_MPSAFE);
+
+#ifdef KTRACE
+ /*
+ * Copy traceflag and tracefile if enabled.
+ */
+ mtx_lock(&ktrace_mtx);
+ KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode"));
+ if (p1->p_traceflag & KTRFAC_INHERIT) {
+ p2->p_traceflag = p1->p_traceflag;
+ if ((p2->p_tracevp = p1->p_tracevp) != NULL) {
+ VREF(p2->p_tracevp);
+ KASSERT(p1->p_tracecred != NULL,
+ ("ktrace vnode with no cred"));
+ p2->p_tracecred = crhold(p1->p_tracecred);
+ }
+ }
+ mtx_unlock(&ktrace_mtx);
+#endif
+
+ /*
+ * If PF_FORK is set, the child process inherits the
+ * procfs ioctl flags from its parent.
+ */
+ if (p1->p_pfsflags & PF_FORK) {
+ p2->p_stops = p1->p_stops;
+ p2->p_pfsflags = p1->p_pfsflags;
+ }
+
+ /*
+ * This begins the section where we must prevent the parent
+ * from being swapped.
+ */
+ _PHOLD(p1);
+ PROC_UNLOCK(p1);
+
+ /*
+ * Attach the new process to its parent.
+ *
+ * If RFNOWAIT is set, the newly created process becomes a child
+ * of init. This effectively disassociates the child from the
+ * parent.
+ */
+ if (flags & RFNOWAIT)
+ pptr = initproc;
+ else
+ pptr = p1;
+ p2->p_pptr = pptr;
+ LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
+ sx_xunlock(&proctree_lock);
+
+ /* Inform accounting that we have forked. */
+ p2->p_acflag = AFORK;
+ PROC_UNLOCK(p2);
+
+ /*
+ * Finish creating the child process. It will return via a different
+ * execution path later. (ie: directly into user mode)
+ */
+ mtx_lock(&Giant);
+ vm_forkproc(td, p2, td2, flags);
+
+ if (flags == (RFFDG | RFPROC)) {
+ cnt.v_forks++;
+ cnt.v_forkpages += p2->p_vmspace->vm_dsize +
+ p2->p_vmspace->vm_ssize;
+ } else if (flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) {
+ cnt.v_vforks++;
+ cnt.v_vforkpages += p2->p_vmspace->vm_dsize +
+ p2->p_vmspace->vm_ssize;
+ } else if (p1 == &proc0) {
+ cnt.v_kthreads++;
+ cnt.v_kthreadpages += p2->p_vmspace->vm_dsize +
+ p2->p_vmspace->vm_ssize;
+ } else {
+ cnt.v_rforks++;
+ cnt.v_rforkpages += p2->p_vmspace->vm_dsize +
+ p2->p_vmspace->vm_ssize;
+ }
+ mtx_unlock(&Giant);
+
+ /*
+ * Both processes are set up, now check if any loadable modules want
+ * to adjust anything.
+ * What if they have an error? XXX
+ */
+ EVENTHANDLER_INVOKE(process_fork, p1, p2, flags);
+
+ /*
+ * Set the child start time and mark the process as being complete.
+ */
+ microuptime(&p2->p_stats->p_start);
+ mtx_lock_spin(&sched_lock);
+ p2->p_state = PRS_NORMAL;
+
+ /*
+ * If RFSTOPPED not requested, make child runnable and add to
+ * run queue.
+ */
+ if ((flags & RFSTOPPED) == 0) {
+ TD_SET_CAN_RUN(td2);
+ setrunqueue(td2, SRQ_BORING);
+ }
+ mtx_unlock_spin(&sched_lock);
+
+ /*
+ * Now can be swapped.
+ */
+ PROC_LOCK(p1);
+ _PRELE(p1);
+
+ /*
+ * Tell any interested parties about the new process.
+ */
+ KNOTE_LOCKED(&p1->p_klist, NOTE_FORK | p2->p_pid);
+
+ PROC_UNLOCK(p1);
+
+ /*
+ * Preserve synchronization semantics of vfork. If waiting for
+ * child to exec or exit, set P_PPWAIT on child, and sleep on our
+ * proc (in case of exit).
+ */
+ PROC_LOCK(p2);
+ while (p2->p_flag & P_PPWAIT)
+ msleep(p1, &p2->p_mtx, PWAIT, "ppwait", 0);
+ PROC_UNLOCK(p2);
+
+ /*
+ * If other threads are waiting, let them continue now.
+ */
+ if (p1->p_flag & P_HADTHREADS) {
+ PROC_LOCK(p1);
+ thread_single_end();
+ PROC_UNLOCK(p1);
+ }
+
+ /*
+ * Return child proc pointer to parent.
+ */
+ *procp = p2;
+ return (0);
+fail:
+ sx_sunlock(&proctree_lock);
+ if (ppsratecheck(&lastfail, &curfail, 1))
+ printf("maxproc limit exceeded by uid %i, please see tuning(7) and login.conf(5).\n",
+ uid);
+ sx_xunlock(&allproc_lock);
+#ifdef MAC
+ mac_destroy_proc(newproc);
+#endif
+ uma_zfree(proc_zone, newproc);
+ if (p1->p_flag & P_HADTHREADS) {
+ PROC_LOCK(p1);
+ thread_single_end();
+ PROC_UNLOCK(p1);
+ }
+ tsleep(&forksleep, PUSER, "fork", hz / 2);
+ return (error);
+}
+
+/*
+ * Handle the return of a child process from fork1(). This function
+ * is called from the MD fork_trampoline() entry point.
+ */
+void
+fork_exit(callout, arg, frame)
+ void (*callout)(void *, struct trapframe *);
+ void *arg;
+ struct trapframe *frame;
+{
+ struct proc *p;
+ struct thread *td;
+
+ /*
+ * Finish setting up thread glue so that it begins execution in a
+ * non-nested critical section with sched_lock held but not recursed.
+ */
+ td = curthread;
+ p = td->td_proc;
+ td->td_oncpu = PCPU_GET(cpuid);
+ KASSERT(p->p_state == PRS_NORMAL, ("executing process is still new"));
+
+ sched_lock.mtx_lock = (uintptr_t)td;
+ mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
+ cpu_critical_fork_exit();
+ CTR4(KTR_PROC, "fork_exit: new thread %p (kse %p, pid %d, %s)",
+ td, td->td_sched, p->p_pid, p->p_comm);
+
+ /*
+ * Processes normally resume in mi_switch() after being
+ * cpu_switch()'ed to, but when children start up they arrive here
+ * instead, so we must do much the same things as mi_switch() would.
+ */
+
+ if ((td = PCPU_GET(deadthread))) {
+ PCPU_SET(deadthread, NULL);
+ thread_stash(td);
+ }
+ td = curthread;
+ mtx_unlock_spin(&sched_lock);
+
+ /*
+ * cpu_set_fork_handler intercepts this function call to
+ * have this call a non-return function to stay in kernel mode.
+ * initproc has its own fork handler, but it does return.
+ */
+ KASSERT(callout != NULL, ("NULL callout in fork_exit"));
+ callout(arg, frame);
+
+ /*
+ * Check if a kernel thread misbehaved and returned from its main
+ * function.
+ */
+ PROC_LOCK(p);
+ if (p->p_flag & P_KTHREAD) {
+ PROC_UNLOCK(p);
+ printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n",
+ p->p_comm, p->p_pid);
+ kthread_exit(0);
+ }
+ PROC_UNLOCK(p);
+ mtx_assert(&Giant, MA_NOTOWNED);
+}
+
+/*
+ * Simplified back end of syscall(), used when returning from fork()
+ * directly into user mode. Giant is not held on entry, and must not
+ * be held on return. This function is passed in to fork_exit() as the
+ * first parameter and is called when returning to a new userland process.
+ */
+void
+fork_return(td, frame)
+ struct thread *td;
+ struct trapframe *frame;
+{
+
+ userret(td, frame, 0);
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_SYSRET))
+ ktrsysret(SYS_fork, 0, 0);
+#endif
+ mtx_assert(&Giant, MA_NOTOWNED);
+}
diff --git a/freebsd-5.3-xen-sparse/mkbuildtree b/freebsd-5.3-xen-sparse/mkbuildtree
new file mode 100644
index 0000000000..ce4c91d431
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/mkbuildtree
@@ -0,0 +1,119 @@
+#!/bin/bash
+
+# mkbuildtree <build tree>
+#
+# Creates symbolic links in <build tree> for the sparse tree
+# in the current directory.
+
+# Script to determine the relative path between two directories.
+# Copyright (c) D. J. Hawkey Jr. 2002
+# Fixed for Xen project by K. Fraser in 2003.
+abs_to_rel ()
+{
+ local CWD SRCPATH
+
+ if [ "$1" != "/" -a "${1##*[^/]}" = "/" ]; then
+ SRCPATH=${1%?}
+ else
+ SRCPATH=$1
+ fi
+ if [ "$2" != "/" -a "${2##*[^/]}" = "/" ]; then
+ DESTPATH=${2%?}
+ else
+ DESTPATH=$2
+ fi
+
+ CWD=$PWD
+ [ "${1%%[^/]*}" != "/" ] && cd $1 && SRCPATH=$PWD
+ [ "${2%%[^/]*}" != "/" ] && cd $2 && DESTPATH=$PWD
+ [ "$CWD" != "$PWD" ] && cd $CWD
+
+ BASEPATH=$SRCPATH
+
+ [ "$SRCPATH" = "$DESTPATH" ] && DESTPATH="." && return
+ [ "$SRCPATH" = "/" ] && DESTPATH=${DESTPATH#?} && return
+
+ while [ "$BASEPATH/" != "${DESTPATH%${DESTPATH#$BASEPATH/}}" ]; do
+ BASEPATH=${BASEPATH%/*}
+ done
+
+ SRCPATH=${SRCPATH#$BASEPATH}
+ DESTPATH=${DESTPATH#$BASEPATH}
+ DESTPATH=${DESTPATH#?}
+ while [ -n "$SRCPATH" ]; do
+ SRCPATH=${SRCPATH%/*}
+ DESTPATH="../$DESTPATH"
+ done
+
+ [ -z "$BASEPATH" ] && BASEPATH="/"
+ [ "${DESTPATH##*[^/]}" = "/" ] && DESTPATH=${DESTPATH%?}
+}
+
+# relative_lndir <target_dir>
+# Creates a tree of symlinks in the current working directory that mirror
+# real files in <target_dir>. <target_dir> should be relative to the current
+# working directory. Symlinks in <target_dir> are ignored. Source-control files
+# are ignored.
+relative_lndir ()
+{
+ local SYMLINK_DIR REAL_DIR pref i j
+ SYMLINK_DIR=$PWD
+ REAL_DIR=$1
+ (
+ cd $REAL_DIR
+ for i in `find . -type d | grep -v SCCS`; do
+ [ -d $SYMLINK_DIR/$i ] || mkdir -p $SYMLINK_DIR/$i
+ (
+ cd $i
+ pref=`echo $i | sed -e 's#/[^/]*#../#g' -e 's#^\.##'`
+ for j in `find . -type f -o -type l -maxdepth 1`; do
+ ln -sf ${pref}${REAL_DIR}/$i/$j ${SYMLINK_DIR}/$i/$j
+ done
+ )
+ done
+ )
+}
+
+[ "$1" == "" ] && { echo "Syntax: $0 <linux tree to xenify>"; exit 1; }
+
+# Get absolute path to the destination directory
+pushd . >/dev/null
+cd ${1}
+AD=$PWD
+popd >/dev/null
+
+# Get absolute path to the source directory
+AS=`pwd`
+
+# Get name of sparse directory
+SDN=$(basename $AS)
+
+# Get path to source, relative to destination
+abs_to_rel ${AD} ${AS}
+RS=$DESTPATH
+
+# Remove old copies of files and directories at the destination
+for i in `find sys -type f -o -type l` ; do rm -f ${AD}/${i#./} ; done
+
+# We now work from the destination directory
+cd ${AD}
+
+# Remove old symlinks
+find sys -type l | while read f
+do
+ case $(readlink $f) in
+ */$SDN/*)
+ rm -f $f
+ ;;
+ esac
+done
+
+if [ -f ${AD}/BUILDING ]; then
+ # Create symlinks of files and directories which exist in the sparse source
+ (cd sys && relative_lndir ../${RS}/sys)
+else
+ # Create symlinks of files and directories which exist in the sparse source
+ relative_lndir ${RS}
+ rm -f mkbuildtree
+fi
+
diff --git a/freebsd-5.3-xen-sparse/xenfbsd_kernel_build b/freebsd-5.3-xen-sparse/xenfbsd_kernel_build
new file mode 100644
index 0000000000..dc2c927c06
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/xenfbsd_kernel_build
@@ -0,0 +1,7 @@
+#!/bin/csh -f
+cd i386-xen/conf
+config XENCONF
+cd ../compile/XENCONF
+make kernel-clean
+ln -s ../../include/xen-public/io/ring.h
+make kernel-depend; make -j4 kernel