aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--docs/src/user.tex214
-rw-r--r--linux-2.6-xen-sparse/arch/xen/Makefile2
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c2
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c13
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S23
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c2
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/io_apic.c2
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c19
-rw-r--r--linux-2.6-xen-sparse/arch/xen/i386/mm/init.c2
-rw-r--r--linux-2.6-xen-sparse/arch/xen/kernel/reboot.c19
-rw-r--r--linux-2.6-xen-sparse/arch/xen/kernel/smpboot.c2
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c4
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S31
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c11
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/io_apic.c5
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c15
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ldt.c2
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c15
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c8
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c3
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c5
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c2
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c2
-rw-r--r--linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c165
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c2
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c3
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c17
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c16
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c19
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/console/console.c40
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/net_driver_util.c2
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/netback/interface.c30
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/netback/netback.c5
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c12
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c33
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c15
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c6
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c8
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c59
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c54
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c42
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h9
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/mach_traps.h33
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h3
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h12
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h5
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/nmi.h75
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/xenbus.h28
-rw-r--r--patches/linux-2.6.12/i386-mach-io-check-nmi.patch43
-rw-r--r--tools/Makefile1
-rw-r--r--tools/Rules.mk2
-rw-r--r--tools/console/client/main.c4
-rw-r--r--tools/console/daemon/io.c6
-rw-r--r--tools/debugger/libxendebug/xendebug.c12
-rwxr-xr-xtools/examples/network-bridge60
-rw-r--r--tools/examples/xen-network-common.sh17
-rw-r--r--tools/examples/xmexample.vmx5
-rw-r--r--tools/firmware/vmxassist/acpi_madt.c63
-rw-r--r--tools/firmware/vmxassist/vm86.h2
-rw-r--r--tools/guest-headers/Makefile13
-rw-r--r--tools/ioemu/hw/i8254.c2
-rw-r--r--tools/ioemu/hw/i8259.c2
-rw-r--r--tools/ioemu/hw/i8259_stub.c2
-rw-r--r--tools/ioemu/target-i386-dm/helper2.c2
-rw-r--r--tools/libxc/xc_core.c2
-rw-r--r--tools/libxc/xc_domain.c42
-rw-r--r--tools/libxc/xc_ia64_stubs.c15
-rw-r--r--tools/libxc/xc_linux_build.c24
-rw-r--r--tools/libxc/xc_linux_restore.c10
-rw-r--r--tools/libxc/xc_linux_save.c29
-rw-r--r--tools/libxc/xc_pagetab.c2
-rw-r--r--tools/libxc/xc_ptrace.c13
-rw-r--r--tools/libxc/xc_vmx_build.c82
-rw-r--r--tools/libxc/xenctrl.h30
-rw-r--r--tools/libxc/xenguest.h8
-rw-r--r--tools/libxc/xg_private.c3
-rw-r--r--tools/python/xen/lowlevel/xc/xc.c27
-rw-r--r--tools/python/xen/lowlevel/xs/xs.c34
-rw-r--r--tools/python/xen/xend/XendDomain.py2
-rw-r--r--tools/python/xen/xend/XendDomainInfo.py2
-rw-r--r--tools/python/xen/xend/image.py14
-rw-r--r--tools/python/xen/xend/server/DevController.py35
-rw-r--r--tools/python/xen/xm/create.py12
-rw-r--r--tools/python/xen/xm/main.py8
-rw-r--r--tools/tests/test_x86_emulator.c24
-rw-r--r--tools/vtpm/Makefile13
-rw-r--r--tools/vtpm/Rules.mk2
-rw-r--r--tools/vtpm_manager/Makefile11
-rw-r--r--tools/vtpm_manager/manager/vtsp.c217
-rw-r--r--tools/vtpm_manager/manager/vtsp.h18
-rw-r--r--tools/vtpm_manager/tcs/tcs.c2
-rw-r--r--tools/xcutils/xc_save.c20
-rw-r--r--tools/xenmon/Makefile7
-rw-r--r--tools/xenstat/libxenstat/src/xenstat.c2
-rw-r--r--tools/xenstore/xenstore_client.c8
-rw-r--r--tools/xenstore/xs.c46
-rw-r--r--tools/xenstore/xs.h24
-rw-r--r--tools/xenstore/xs_test.c11
-rw-r--r--tools/xenstore/xsls.c4
-rw-r--r--tools/xentrace/xenctx.c4
-rw-r--r--tools/xm-test/lib/XmTestLib/XenDomain.py3
-rw-r--r--tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py2
-rw-r--r--tools/xm-test/tests/network-attach/Makefile.am2
-rw-r--r--tools/xm-test/tests/network/02_network_local_ping_pos.py2
-rw-r--r--xen/arch/ia64/linux-xen/process-linux-xen.c2
-rw-r--r--xen/arch/ia64/linux-xen/smpboot.c6
-rw-r--r--xen/arch/ia64/vmx/mmio.c2
-rw-r--r--xen/arch/ia64/vmx/vlsapic.c22
-rw-r--r--xen/arch/ia64/vmx/vmx_init.c2
-rw-r--r--xen/arch/ia64/vmx/vmx_process.c2
-rw-r--r--xen/arch/ia64/vmx/vmx_support.c2
-rw-r--r--xen/arch/ia64/xen/domain.c73
-rw-r--r--xen/arch/ia64/xen/idle0_task.c31
-rw-r--r--xen/arch/ia64/xen/process.c30
-rw-r--r--xen/arch/ia64/xen/vcpu.c2
-rw-r--r--xen/arch/ia64/xen/xenmisc.c9
-rw-r--r--xen/arch/ia64/xen/xensetup.c38
-rw-r--r--xen/arch/ia64/xen/xentime.c6
-rw-r--r--xen/arch/x86/apic.c4
-rw-r--r--xen/arch/x86/boot/x86_32.S4
-rw-r--r--xen/arch/x86/dm/i8259.c2
-rw-r--r--xen/arch/x86/dm/vmx_vioapic.c2
-rw-r--r--xen/arch/x86/dom0_ops.c23
-rw-r--r--xen/arch/x86/domain.c209
-rw-r--r--xen/arch/x86/domain_build.c17
-rw-r--r--xen/arch/x86/idle0_task.c27
-rw-r--r--xen/arch/x86/io_apic.c44
-rw-r--r--xen/arch/x86/irq.c81
-rw-r--r--xen/arch/x86/mm.c80
-rw-r--r--xen/arch/x86/nmi.c76
-rw-r--r--xen/arch/x86/setup.c49
-rw-r--r--xen/arch/x86/shadow.c23
-rw-r--r--xen/arch/x86/shadow32.c41
-rw-r--r--xen/arch/x86/shadow_public.c25
-rw-r--r--xen/arch/x86/smpboot.c13
-rw-r--r--xen/arch/x86/time.c24
-rw-r--r--xen/arch/x86/traps.c85
-rw-r--r--xen/arch/x86/vmx.c33
-rw-r--r--xen/arch/x86/vmx_intercept.c16
-rw-r--r--xen/arch/x86/vmx_io.c4
-rw-r--r--xen/arch/x86/vmx_platform.c2
-rw-r--r--xen/arch/x86/vmx_vlapic.c12
-rw-r--r--xen/arch/x86/vmx_vmcs.c96
-rw-r--r--xen/arch/x86/x86_32/asm-offsets.c4
-rw-r--r--xen/arch/x86/x86_32/domain_page.c232
-rw-r--r--xen/arch/x86/x86_32/entry.S71
-rw-r--r--xen/arch/x86/x86_32/mm.c20
-rw-r--r--xen/arch/x86/x86_32/traps.c58
-rw-r--r--xen/arch/x86/x86_64/asm-offsets.c4
-rw-r--r--xen/arch/x86/x86_64/entry.S32
-rw-r--r--xen/arch/x86/x86_64/mm.c8
-rw-r--r--xen/arch/x86/x86_64/traps.c47
-rw-r--r--xen/common/bitmap.c105
-rw-r--r--xen/common/dom0_ops.c44
-rw-r--r--xen/common/domain.c42
-rw-r--r--xen/common/grant_table.c4
-rw-r--r--xen/common/kernel.c38
-rw-r--r--xen/common/keyhandler.c49
-rw-r--r--xen/common/memory.c3
-rw-r--r--xen/common/page_alloc.c2
-rw-r--r--xen/common/sched_bvt.c130
-rw-r--r--xen/common/sched_sedf.c540
-rw-r--r--xen/common/schedule.c203
-rw-r--r--xen/common/timer.c (renamed from xen/common/ac_timer.c)156
-rw-r--r--xen/common/vsprintf.c147
-rw-r--r--xen/common/xmalloc.c2
-rw-r--r--xen/drivers/char/console.c5
-rw-r--r--xen/drivers/char/ns16550.c8
-rw-r--r--xen/include/asm-ia64/config.h6
-rw-r--r--xen/include/asm-ia64/vmx.h2
-rw-r--r--xen/include/asm-ia64/vtm.h4
-rw-r--r--xen/include/asm-x86/config.h39
-rw-r--r--xen/include/asm-x86/domain.h43
-rw-r--r--xen/include/asm-x86/mm.h19
-rw-r--r--xen/include/asm-x86/nmi.h2
-rw-r--r--xen/include/asm-x86/processor.h27
-rw-r--r--xen/include/asm-x86/shadow.h12
-rw-r--r--xen/include/asm-x86/vmx.h5
-rw-r--r--xen/include/asm-x86/vmx_intercept.h2
-rw-r--r--xen/include/asm-x86/vmx_platform.h10
-rw-r--r--xen/include/asm-x86/vmx_vlapic.h6
-rw-r--r--xen/include/asm-x86/vmx_vmcs.h7
-rw-r--r--xen/include/asm-x86/vmx_vpit.h4
-rw-r--r--xen/include/asm-x86/x86_emulate.h31
-rw-r--r--xen/include/public/arch-ia64.h2
-rw-r--r--xen/include/public/arch-x86_32.h12
-rw-r--r--xen/include/public/arch-x86_64.h32
-rw-r--r--xen/include/public/dom0_ops.h12
-rw-r--r--xen/include/public/hvm/hvm_info_table.h24
-rw-r--r--xen/include/public/hvm/ioreq.h (renamed from xen/include/public/io/ioreq.h)31
-rw-r--r--xen/include/public/hvm/vmx_assist.h (renamed from xen/include/public/vmx_assist.h)0
-rw-r--r--xen/include/public/nmi.h54
-rw-r--r--xen/include/public/xen.h18
-rw-r--r--xen/include/xen/bitmap.h6
-rw-r--r--xen/include/xen/config.h9
-rw-r--r--xen/include/xen/cpumask.h28
-rw-r--r--xen/include/xen/domain.h2
-rw-r--r--xen/include/xen/domain_page.h32
-rw-r--r--xen/include/xen/lib.h10
-rw-r--r--xen/include/xen/perfc_defn.h2
-rw-r--r--xen/include/xen/sched-if.h42
-rw-r--r--xen/include/xen/sched.h94
-rw-r--r--xen/include/xen/softirq.h4
-rw-r--r--xen/include/xen/timer.h (renamed from xen/include/xen/ac_timer.h)39
204 files changed, 3818 insertions, 2175 deletions
diff --git a/docs/src/user.tex b/docs/src/user.tex
index 6e958e6075..a27472ccce 100644
--- a/docs/src/user.tex
+++ b/docs/src/user.tex
@@ -1885,6 +1885,220 @@ mailing lists and subscription information can be found at \begin{quote}
\appendix
+\chapter{Unmodified (VMX) guest domains in Xen with Intel\textregistered Virtualization Technology (VT)}
+
+Xen supports guest domains running unmodified Guest operating systems using Virtualization Technology (VT) available on recent Intel Processors. More information about the Intel Virtualization Technology implementing Virtual Machine Extensions (VMX) in the processor is available on the Intel website at \\
+ {\small {\tt http://www.intel.com/technology/computing/vptech}}
+
+\section{Building Xen with VT support}
+
+The following packages need to be installed in order to build Xen with VT support. Some Linux distributions do not provide these packages by default.
+
+\begin{tabular}{lp{11.0cm}}
+{\bfseries Package} & {\bfseries Description} \\
+
+dev86 & The dev86 package provides an assembler and linker for real mode 80x86 instructions. You need to have this package installed in order to build the BIOS code which runs in (virtual) real mode.
+
+If the dev86 package is not available on the x86\_64 distribution, you can install the i386 version of it. The dev86 rpm package for various distributions can be found at {\scriptsize {\tt http://www.rpmfind.net/linux/rpm2html/search.php?query=dev86\&submit=Search}} \\
+
+LibVNCServer & The unmodified guest's VGA display, keyboard, and mouse are virtualized using the vncserver library provided by this package. You can get the sources of libvncserver from {\small {\tt http://sourceforge.net/projects/libvncserver}}. Build and install the sources on the build system to get the libvncserver library. The 0.8pre version of libvncserver is currently working well with Xen.\\
+
+SDL-devel, SDL & Simple DirectMedia Layer (SDL) is another way of virtualizing the unmodified guest console. It provides an X window for the guest console.
+
+If the SDL and SDL-devel packages are not installed by default on the build system, they can be obtained from {\scriptsize {\tt http://www.rpmfind.net/linux/rpm2html/search.php?query=SDL\&submit=Search}}
+, {\scriptsize {\tt http://www.rpmfind.net/linux/rpm2html/search.php?query=SDL-devel\&submit=Search}} \\
+
+\end{tabular}
+
+\section{Configuration file for unmodified VMX guests}
+
+The Xen installation includes a sample configuration file, {\small {\tt /etc/xen/xmexample.vmx}}. There are comments describing all the options. In addition to the common options that are the same as those for paravirtualized guest configurations, VMX guest configurations have the following settings:
+
+\begin{tabular}{lp{11.0cm}}
+
+{\bfseries Parameter} & {\bfseries Description} \\
+
+kernel & The VMX firmware loader, {\small {\tt /usr/lib/xen/boot/vmxloader}}\\
+
+builder & The domain build function. The VMX domain uses the vmx builder.\\
+
+acpi & Enable VMX guest ACPI, default=0 (disabled)\\
+
+apic & Enable VMX guest APIC, default=0 (disabled)\\
+
+vif & Optionally defines MAC address and/or bridge for the network interfaces. Random MACs are assigned if not given. {\small {\tt type=ioemu}} means ioemu is used to virtualize the VMX NIC. If no type is specified, vbd is used, as with paravirtualized guests.\\
+
+disk & Defines the disk devices you want the domain to have access to, and what you want them accessible as. If using a physical device as the VMX guest's disk, each disk entry is of the form
+
+{\small {\tt phy:UNAME,ioemu:DEV,MODE,}}
+
+where UNAME is the device, DEV is the device name the domain will see, and MODE is r for read-only, w for read-write. ioemu means the disk will use ioemu to virtualize the VMX disk. If not adding ioemu, it uses vbd like paravirtualized guests.
+
+If using disk image file, its form should be like
+
+{\small {\tt file:FILEPATH,ioemu:DEV,MODE}}
+
+If using more than one disk, there should be a comma between each disk entry. For example:
+
+{\scriptsize {\tt disk = ['file:/var/images/image1.img,ioemu:hda,w', 'file:/var/images/image2.img,ioemu:hdb,w']}}\\
+
+cdrom & Disk image for CD-ROM. The default is {\small {\tt /dev/cdrom}} for Domain0. Inside the VMX domain, the CD-ROM will available as device {\small {\tt /dev/hdc}}. The entry can also point to an ISO file.\\
+
+boot & Boot from floppy (a), hard disk (c) or CD-ROM (d). For example, to boot from CD-ROM, the entry should be:
+
+boot='d'\\
+
+device\_model & The device emulation tool for VMX guests. This parameter should not be changed.\\
+
+sdl & Enable SDL library for graphics, default = 0 (disabled)\\
+
+vnc & Enable VNC library for graphics, default = 1 (enabled)\\
+
+vncviewer & Enable spawning of the vncviewer (only valid when vnc=1), default = 1 (enabled)
+
+If vnc=1 and vncviewer=0, user can use vncviewer to manually connect VMX from remote. For example:
+
+{\small {\tt vncviewer domain0\_IP\_address:VMX\_domain\_id}} \\
+
+ne2000 & Enable ne2000, default = 0 (disabled; use pcnet)\\
+
+serial & Enable redirection of VMX serial output to pty device\\
+
+localtime & Set the real time clock to local time [default=0, that is, set to UTC].\\
+
+enable-audio & Enable audio support. This is under development.\\
+
+full-screen & Start in full screen. This is under development.\\
+
+nographic & Another way to redirect serial output. If enabled, no 'sdl' or 'vnc' can work. Not recommended.\\
+
+\end{tabular}
+
+
+\section{Creating virtual disks from scratch}
+\subsection{Using physical disks}
+If you are using a physical disk or physical disk partition, you need to install a Linux OS on the disk first. Then the boot loader should be installed in the correct place. For example {\small {\tt dev/sda}} for booting from the whole disk, or {\small {\tt /dev/sda1}} for booting from partition 1.
+
+\subsection{Using disk image files}
+You need to create a large empty disk image file first; then, you need to install a Linux OS onto it. There are two methods you can choose. One is directly installing it using a VMX guest while booting from the OS installation CD-ROM. The other is copying an installed OS into it. The boot loader will also need to be installed.
+
+\subsubsection*{To create the image file:}
+The image size should be big enough to accommodate the entire OS. This example assumes the size is 1G (which is probably too small for most OSes).
+
+{\small {\tt \# dd if=/dev/zero of=hd.img bs=1M count=1 seek=1023}}
+
+\subsubsection*{To directly install Linux OS into an image file using a VMX guest:}
+
+Install Xen and create VMX with the original image file with booting from CD-ROM. Then it is just like a normal Linux OS installation. The VMX configuration file should have these two entries before creating:
+
+{\small {\tt cdrom='/dev/cdrom'
+boot='d'}}
+
+If this method does not succeed, you can choose the following method of copying an installed Linux OS into an image file.
+
+\subsubsection*{To copy a installed OS into an image file:}
+Directly installing is an easier way to make partitions and install an OS in a disk image file. But if you want to create a specific OS in your disk image, then you will most likely want to use this method.
+
+\begin{enumerate}
+\item {\bfseries Install a normal Linux OS on the host machine}\\
+You can choose any way to install Linux, such as using yum to install Red Hat Linux or YAST to install Novell SuSE Linux. The rest of this example assumes the Linux OS is installed in {\small {\tt /var/guestos/}}.
+
+\item {\bfseries Make the partition table}\\
+The image file will be treated as hard disk, so you should make the partition table in the image file. For example:
+
+{\scriptsize {\tt \# losetup /dev/loop0 hd.img\\
+\# fdisk -b 512 -C 4096 -H 16 -S 32 /dev/loop0\\
+press 'n' to add new partition\\
+press 'p' to choose primary partition\\
+press '1' to set partition number\\
+press "Enter" keys to choose default value of "First Cylinder" parameter.\\
+press "Enter" keys to choose default value of "Last Cylinder" parameter.\\
+press 'w' to write partition table and exit\\
+\# losetup -d /dev/loop0}}
+
+\item {\bfseries Make the file system and install grub}\\
+{\scriptsize {\tt \# ln -s /dev/loop0 /dev/loop\\
+\# losetup /dev/loop0 hd.img\\
+\# losetup -o 16384 /dev/loop1 hd.img\\
+\# mkfs.ext3 /dev/loop1\\
+\# mount /dev/loop1 /mnt\\
+\# mkdir -p /mnt/boot/grub\\
+\# cp /boot/grub/stage* /boot/grub/e2fs\_stage1\_5 /mnt/boot/grub\\
+\# umount /mnt\\
+\# grub\\
+grub> device (hd0) /dev/loop\\
+grub> root (hd0,0)\\
+grub> setup (hd0)\\
+grub> quit\\
+\# rm /dev/loop\\
+\# losetup -d /dev/loop0\\
+\# losetup -d /dev/loop1}}
+
+The {\small {\tt losetup}} option {\small {\tt -o 16384}} skips the partition table in the image file. It is the number of sectors times 512. We need {\small {\tt /dev/loop}} because grub is expecting a disk device \emph{name}, where \emph{name} represents the entire disk and \emph{name1} represents the first partition.
+
+\item {\bfseries Copy the OS files to the image}\\
+If you have Xen installed, you can easily use {\small {\tt lomount}} instead of {\small {\tt losetup}} and {\small {\tt mount}} when coping files to some partitions. {\small {\tt lomount}} just needs the partition information.
+
+{\scriptsize {\tt \# lomount -t ext3 -diskimage hd.img -partition 1 /mnt/guest\\
+\# cp -ax /var/guestos/\{root,dev,var,etc,usr,bin,sbin,lib\} /mnt/guest\\
+\# mkdir /mnt/guest/\{proc,sys,home,tmp\}}}
+
+\item {\bfseries Edit the {\small {\tt /etc/fstab}} of the guest image}\\
+The fstab should look like this:
+
+{\scriptsize {\tt \# vim /mnt/guest/etc/fstab\\
+/dev/hda1 / ext3 defaults 1 1\\
+none /dev/pts devpts gid=5,mode=620 0 0\\
+none /dev/shm tmpfs defaults 0 0\\
+none /proc proc defaults 0 0\\
+none /sys sysfs efaults 0 0}}
+
+\item {\bfseries umount the image file}\\
+{\small {\tt \# umount /mnt/guest}}
+\end{enumerate}
+
+Now, the guest OS image {\small {\tt hd.img}} is ready. You can also reference {\small {\tt http://free.oszoo.org}} for quickstart images. But make sure to install the boot loader.
+
+\subsection{Install Windows into an Image File using a VMX guest}
+In order to install a Windows OS, you should keep {\small {\tt acpi=0}} in your VMX configuration file.
+
+\section{VMX Guests}
+\subsection{Editing the Xen VMX config file}
+Make a copy of the example VMX configuration file {\small {\tt /etc/xen/xmeaxmple.vmx}} and edit the line that reads
+
+{\small {\tt disk = [ 'file:/var/images/\emph{guest.img},ioemu:hda,w' ]}}
+
+replacing \emph{guest.img} with the name of the guest OS image file you just made.
+
+\subsection{Creating VMX guests}
+Simply follow the usual method of creating the guest, using the -f parameter and providing the filename of your VMX configuration file:\\
+
+{\small {\tt \# xend start\\
+\# xm create /etc/xen/vmxguest.vmx}}
+
+In the default configuration, VNC is on and SDL is off. Therefore VNC windows will open when VMX guests are created. If you want to use SDL to create VMX guests, set {\small {\tt sdl=1}} in your VMX configuration file. You can also turn off VNC by setting {\small {\tt vnc=0}}.
+
+\subsection{Destroy VMX guests}
+VMX guests can be destroyed in the same way as can paravirtualized guests. We recommend that you type the command
+
+{\small {\tt poweroff}}
+
+in the VMX guest's console first to prevent data loss. Then execute the command
+
+{\small {\tt xm destroy \emph{vmx\_guest\_id} }}
+
+at the Domain0 console.
+
+\subsection{VMX window (X or VNC) Hot Key}
+If you are running in the X environment after creating a VMX guest, an X window is created. There are several hot keys for control of the VMX guest that can be used in the window.
+
+{\bfseries Ctrl+Alt+2} switches from guest VGA window to the control window. Typing {\small {\tt help }} shows the control commands help. For example, 'q' is the command to destroy the VMX guest.\\
+{\bfseries Ctrl+Alt+1} switches back to VMX guest's VGA.\\
+{\bfseries Ctrl+Alt+3} switches to serial port output. It captures serial output from the VMX guest. It works only if the VMX guest was configured to use the serial port. \\
+
+\subsection{Save/Restore and Migration}
+VMX guests currently cannot be saved and restored, nor migrated. These features are currently under active development.
+
%% Chapter Glossary of Terms moved to glossary.tex
\chapter{Glossary of Terms}
diff --git a/linux-2.6-xen-sparse/arch/xen/Makefile b/linux-2.6-xen-sparse/arch/xen/Makefile
index 115ec21c1e..12930e1009 100644
--- a/linux-2.6-xen-sparse/arch/xen/Makefile
+++ b/linux-2.6-xen-sparse/arch/xen/Makefile
@@ -77,8 +77,6 @@ install kernel_install:
install -m0664 .config $(INSTALL_PATH)/boot/config-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
install -m0664 System.map $(INSTALL_PATH)/boot/System.map-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
ln -f -s vmlinuz-$(XINSTALL_NAME)$(INSTALL_SUFFIX) $(INSTALL_PATH)/boot/vmlinuz-$(VERSION).$(PATCHLEVEL)$(XENGUEST)$(INSTALL_SUFFIX)
- mkdir -p $(INSTALL_PATH)/usr/include/xen/linux
- install -m0644 $(srctree)/include/asm-xen/linux-public/*.h $(INSTALL_PATH)/usr/include/xen/linux
archclean:
@if [ -e arch/xen/arch ]; then $(MAKE) $(clean)=arch/xen/arch; fi;
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c
index 96ee1a68ce..5e5d56cce6 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c
@@ -40,8 +40,6 @@
#include <asm/fixmap.h>
#endif
-void (*pm_power_off)(void) = NULL;
-
#ifdef CONFIG_X86_64
static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id) { }
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c
index 41808f1711..b0d0914a14 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c
@@ -67,8 +67,11 @@ int mtrr_add_page(unsigned long base, unsigned long size,
op.u.add_memtype.pfn = base;
op.u.add_memtype.nr_pfns = size;
op.u.add_memtype.type = type;
- if ((error = HYPERVISOR_dom0_op(&op)))
+ error = HYPERVISOR_dom0_op(&op);
+ if (error) {
+ BUG_ON(error > 0);
return error;
+ }
if (increment)
++usage_table[op.u.add_memtype.reg];
@@ -121,8 +124,12 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
if (--usage_table[reg] < 1) {
op.cmd = DOM0_DEL_MEMTYPE;
op.u.del_memtype.handle = 0;
- op.u.add_memtype.reg = reg;
- (void)HYPERVISOR_dom0_op(&op);
+ op.u.del_memtype.reg = reg;
+ error = HYPERVISOR_dom0_op(&op);
+ if (error) {
+ BUG_ON(error > 0);
+ goto out;
+ }
}
error = reg;
out:
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S b/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S
index 6522f7a249..ef23e7ca3c 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S
@@ -76,7 +76,9 @@ IF_MASK = 0x00000200
DF_MASK = 0x00000400
NT_MASK = 0x00004000
VM_MASK = 0x00020000
-
+/* Pseudo-eflags. */
+NMI_MASK = 0x80000000
+
/* Offsets into shared_info_t. */
#define evtchn_upcall_pending /* 0 */
#define evtchn_upcall_mask 1
@@ -305,8 +307,8 @@ restore_all:
je ldt_ss # returning to user-space with LDT SS
#endif /* XEN */
restore_nocheck:
- testl $VM_MASK, EFLAGS(%esp)
- jnz resume_vm86
+ testl $(VM_MASK|NMI_MASK), EFLAGS(%esp)
+ jnz hypervisor_iret
movb EVENT_MASK(%esp), %al
notb %al # %al == ~saved_mask
XEN_GET_VCPU_INFO(%esi)
@@ -328,11 +330,11 @@ iret_exc:
.long 1b,iret_exc
.previous
-resume_vm86:
- XEN_UNBLOCK_EVENTS(%esi)
+hypervisor_iret:
+ andl $~NMI_MASK, EFLAGS(%esp)
RESTORE_REGS
movl %eax,(%esp)
- movl $__HYPERVISOR_switch_vm86,%eax
+ movl $__HYPERVISOR_iret,%eax
int $0x82
ud2
@@ -691,6 +693,15 @@ debug_stack_correct:
call do_debug
jmp ret_from_exception
+ENTRY(nmi)
+ pushl %eax
+ SAVE_ALL
+ xorl %edx,%edx # zero error code
+ movl %esp,%eax # pt_regs pointer
+ call do_nmi
+ orl $NMI_MASK, EFLAGS(%esp)
+ jmp restore_all
+
#if 0 /* XEN */
/*
* NMI is doubly nasty. It can happen _while_ we're handling
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
index 491820d8cf..cf8fbeb1ac 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
@@ -76,9 +76,7 @@ EXPORT_SYMBOL(ioremap_nocache);
EXPORT_SYMBOL(iounmap);
EXPORT_SYMBOL(kernel_thread);
EXPORT_SYMBOL(pm_idle);
-#ifdef CONFIG_ACPI_BOOT
EXPORT_SYMBOL(pm_power_off);
-#endif
EXPORT_SYMBOL(get_cmos_time);
EXPORT_SYMBOL(cpu_khz);
EXPORT_SYMBOL(apm_info);
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/io_apic.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/io_apic.c
index 0b786dbfc7..9cc425faea 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/io_apic.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/io_apic.c
@@ -622,9 +622,11 @@ static int balanced_irq(void *unused)
try_to_freeze(PF_FREEZE);
if (time_after(jiffies,
prev_balance_time+balanced_irq_interval)) {
+ preempt_disable();
do_irq_balance();
prev_balance_time = jiffies;
time_remaining = balanced_irq_interval;
+ preempt_enable();
}
}
return 0;
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c
index 5ad8d1b273..f2669a71a3 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c
@@ -506,18 +506,11 @@ static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
static void io_check_error(unsigned char reason, struct pt_regs * regs)
{
- unsigned long i;
-
printk("NMI: IOCK error (debug interrupt?)\n");
show_registers(regs);
/* Re-enable the IOCK line, wait for a few seconds */
- reason = (reason & 0xf) | 8;
- outb(reason, 0x61);
- i = 2000;
- while (--i) udelay(1000);
- reason &= ~8;
- outb(reason, 0x61);
+ clear_io_check_error(reason);
}
static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
@@ -648,12 +641,6 @@ fastcall void do_int3(struct pt_regs *regs, long error_code)
}
#endif
-static inline void conditional_sti(struct pt_regs *regs)
-{
- if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
- local_irq_enable();
-}
-
/*
* Our handling of the processor debug registers is non-trivial.
* We do not clear them on entry and exit from the kernel. Therefore
@@ -686,9 +673,9 @@ fastcall void do_debug(struct pt_regs * regs, long error_code)
if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
SIGTRAP) == NOTIFY_STOP)
return;
-
/* It's safe to allow irq's after DR6 has been saved */
- conditional_sti(regs);
+ if (regs->eflags & X86_EFLAGS_IF)
+ local_irq_enable();
/* Mask out spurious debug traps due to lazy DR7 setting */
if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
index 771ea5ebfd..96797b44a6 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
@@ -65,7 +65,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
{
pud_t *pud;
pmd_t *pmd_table;
-
+
#ifdef CONFIG_X86_PAE
pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
make_lowmem_page_readonly(pmd_table);
diff --git a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
index 311003e32c..534c040c10 100644
--- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
@@ -17,6 +17,13 @@
#include <linux/kthread.h>
#include <asm-xen/xencons.h>
+#if defined(__i386__) || defined(__x86_64__)
+/*
+ * Power off function, if any
+ */
+void (*pm_power_off)(void);
+#endif
+
#define SHUTDOWN_INVALID -1
#define SHUTDOWN_POWEROFF 0
#define SHUTDOWN_REBOOT 1
@@ -283,15 +290,15 @@ static void shutdown_handler(struct xenbus_watch *watch,
const char **vec, unsigned int len)
{
char *str;
- struct xenbus_transaction *xbt;
+ xenbus_transaction_t xbt;
int err;
if (shutting_down != SHUTDOWN_INVALID)
return;
again:
- xbt = xenbus_transaction_start();
- if (IS_ERR(xbt))
+ err = xenbus_transaction_start(&xbt);
+ if (err)
return;
str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
/* Ignore read errors and empty reads. */
@@ -332,12 +339,12 @@ static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
unsigned int len)
{
char sysrq_key = '\0';
- struct xenbus_transaction *xbt;
+ xenbus_transaction_t xbt;
int err;
again:
- xbt = xenbus_transaction_start();
- if (IS_ERR(xbt))
+ err = xenbus_transaction_start(&xbt);
+ if (err)
return;
if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
printk(KERN_ERR "Unable to read sysrq code in "
diff --git a/linux-2.6-xen-sparse/arch/xen/kernel/smpboot.c b/linux-2.6-xen-sparse/arch/xen/kernel/smpboot.c
index 8d9b1531c5..196a4e3ec3 100644
--- a/linux-2.6-xen-sparse/arch/xen/kernel/smpboot.c
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/smpboot.c
@@ -298,7 +298,7 @@ static void vcpu_hotplug(unsigned int cpu)
return;
sprintf(dir, "cpu/%d", cpu);
- err = xenbus_scanf(NULL, dir, "availability", "%s", state);
+ err = xenbus_scanf(XBT_NULL, dir, "availability", "%s", state);
if (err != 1) {
printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
return;
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c
index f2a7160a0e..74596e589d 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c
@@ -526,7 +526,7 @@ extern union xen_start_info_union xen_start_info_union;
unsigned long __init e820_end_of_ram(void)
{
- unsigned long max_end_pfn;
+ unsigned long max_end_pfn;
if (xen_override_max_pfn == 0) {
max_end_pfn = xen_start_info->nr_pages;
@@ -612,7 +612,7 @@ void __init parse_memopt(char *p, char **from)
{
end_user_pfn = memparse(p, from);
end_user_pfn >>= PAGE_SHIFT;
- xen_override_max_pfn = (unsigned long) end_user_pfn;
+ xen_override_max_pfn = (unsigned long) end_user_pfn;
}
/*
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S
index 877a3027ee..2fbe90706c 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S
@@ -57,7 +57,7 @@
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
#endif
-
+
/*
* C code is not supposed to know about undefined top of stack. Every time
* a C function with an pt_regs argument is called from the SYSCALL based
@@ -65,7 +65,7 @@
* RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
* manipulation.
*/
-
+
/* %rsp:at FRAMEEND */
.macro FIXUP_TOP_OF_STACK tmp
movq $__USER_CS,CS(%rsp)
@@ -121,19 +121,19 @@
.endm
/*
- * Must be consistent with the definition in arch_x86_64.h:
- * struct switch_to_user {
+ * Must be consistent with the definition in arch-x86_64.h:
+ * struct iret_context {
* u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
* };
* #define VGCF_IN_SYSCALL (1<<8)
*/
- .macro SWITCH_TO_USER flag
+ .macro HYPERVISOR_IRET flag
subq $8*4,%rsp # reuse rip, cs, rflags, rsp, ss in the stack
movq %rax,(%rsp)
movq %r11,1*8(%rsp)
movq %rcx,2*8(%rsp) # we saved %rcx upon exceptions
movq $\flag,3*8(%rsp)
- movq $__HYPERVISOR_switch_to_user,%rax
+ movq $__HYPERVISOR_iret,%rax
syscall
.endm
@@ -225,7 +225,7 @@ sysret_check:
jnz sysret_careful
XEN_UNBLOCK_EVENTS(%rsi)
RESTORE_ARGS 0,8,0
- SWITCH_TO_USER VGCF_IN_SYSCALL
+ HYPERVISOR_IRET VGCF_IN_SYSCALL
/* Handle reschedules */
/* edx: work, edi: workmask */
@@ -418,7 +418,6 @@ ENTRY(stub_rt_sigreturn)
jmp int_ret_from_sys_call
CFI_ENDPROC
-
/*
* Interrupt entry/exit.
*
@@ -479,7 +478,7 @@ kernel_mode:
orb $3,1*8(%rsp)
iretq
user_mode:
- SWITCH_TO_USER 0
+ HYPERVISOR_IRET 0
/* edi: workmask, edx: work */
retint_careful:
@@ -720,6 +719,18 @@ ENTRY(do_hypervisor_callback) # do_hyperviosr_callback(struct *pt_regs)
call evtchn_do_upcall
jmp error_exit
+#ifdef CONFIG_X86_LOCAL_APIC
+ENTRY(nmi)
+ zeroentry do_nmi_callback
+ENTRY(do_nmi_callback)
+ addq $8, %rsp
+ call do_nmi
+ RESTORE_REST
+ XEN_BLOCK_EVENTS(%rsi)
+ GET_THREAD_INFO(%rcx)
+ jmp retint_restore_args
+#endif
+
ALIGN
restore_all_enable_events:
XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up...
@@ -734,7 +745,7 @@ scrit: /**** START OF CRITICAL REGION ****/
orb $3,1*8(%rsp)
iretq
crit_user_mode:
- SWITCH_TO_USER 0
+ HYPERVISOR_IRET 0
14: XEN_LOCKED_BLOCK_EVENTS(%rsi)
XEN_PUT_VCPU_INFO(%rsi)
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c
index 734b9d3082..2a7f4cee42 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c
@@ -68,13 +68,13 @@ static void __init copy_bootdata(char *real_mode_data)
static void __init setup_boot_cpu_data(void)
{
- int dummy, eax;
+ unsigned int dummy, eax;
/* get vendor info */
- cpuid(0, &boot_cpu_data.cpuid_level,
- (int *)&boot_cpu_data.x86_vendor_id[0],
- (int *)&boot_cpu_data.x86_vendor_id[8],
- (int *)&boot_cpu_data.x86_vendor_id[4]);
+ cpuid(0, (unsigned int *)&boot_cpu_data.cpuid_level,
+ (unsigned int *)&boot_cpu_data.x86_vendor_id[0],
+ (unsigned int *)&boot_cpu_data.x86_vendor_id[8],
+ (unsigned int *)&boot_cpu_data.x86_vendor_id[4]);
/* get cpu type */
cpuid(1, &eax, &dummy, &dummy,
@@ -109,7 +109,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
if (s != NULL)
setup_early_printk(s);
#endif
-
#ifdef CONFIG_DISCONTIGMEM
s = strstr(saved_command_line, "numa=");
if (s != NULL)
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/io_apic.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/io_apic.c
index 4cbb1aed8f..33651e64aa 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/io_apic.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/io_apic.c
@@ -255,11 +255,9 @@ static int __init enable_ioapic_setup(char *str)
return 1;
}
-
__setup("noapic", disable_ioapic_setup);
__setup("apic", enable_ioapic_setup);
-
#include <asm/pci-direct.h>
#include <linux/pci_ids.h>
#include <linux/pci.h>
@@ -1146,6 +1144,7 @@ void __apicdebuginit print_PIC(void)
v = inb(0x4d1) << 8 | inb(0x4d0);
printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
}
+
#endif /* 0 */
#else
@@ -1191,6 +1190,7 @@ void disable_IO_APIC(void)
* Clear the IO-APIC before rebooting:
*/
clear_IO_APIC();
+
#ifndef CONFIG_XEN
disconnect_bsp_APIC();
#endif
@@ -1202,6 +1202,7 @@ void disable_IO_APIC(void)
*
* by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
*/
+
#ifndef CONFIG_XEN
static void __init setup_ioapic_ids_from_mpc (void)
{
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c
index 7a8d319d2b..ca26c1234e 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c
@@ -9,18 +9,15 @@
* x86_64-specific irq controller code. (e.g. i8259.c and
* io_apic.c.)
*/
-#include <asm/uaccess.h>
-#include <linux/module.h>
-#include <linux/seq_file.h>
-#include <linux/interrupt.h>
-#include <linux/kernel_stat.h>
-/*
- * Interrupt statistics:
- */
+#include <linux/kernel_stat.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/io_apic.h>
atomic_t irq_err_count;
-
#ifdef CONFIG_X86_IO_APIC
#ifdef APIC_MISMATCH_DEBUG
atomic_t irq_mis_count;
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ldt.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ldt.c
index 72bde0ec16..fa77040acd 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ldt.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ldt.c
@@ -62,6 +62,7 @@ static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload)
if (reload) {
#ifdef CONFIG_SMP
cpumask_t mask;
+
preempt_disable();
#endif
make_pages_readonly(pc->ldt, (pc->size * LDT_ENTRY_SIZE) /
@@ -201,6 +202,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
struct user_desc ldt_info;
error = -EINVAL;
+
if (bytecount != sizeof(ldt_info))
goto out;
error = -EFAULT;
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
index 9e1a023282..ef7deb8938 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
@@ -62,6 +62,7 @@
#include <asm-xen/xen-public/physdev.h>
#include "setup_arch_pre.h"
#include <asm/hypervisor.h>
+#include <asm-xen/xen-public/nmi.h>
#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
#define end_pfn_map end_pfn
@@ -304,7 +305,6 @@ static void __init probe_roms(void)
}
#endif
-
static __init void parse_cmdline_early (char ** cmdline_p)
{
char c = ' ', *to = command_line, *from = COMMAND_LINE;
@@ -379,6 +379,7 @@ static __init void parse_cmdline_early (char ** cmdline_p)
acpi_skip_timer_override = 1;
#endif
#endif
+
#ifndef CONFIG_XEN
if (!memcmp(from, "nolapic", 7) ||
!memcmp(from, "disableapic", 11))
@@ -391,7 +392,8 @@ static __init void parse_cmdline_early (char ** cmdline_p)
skip_ioapic_setup = 0;
ioapic_force = 1;
}
-#endif
+#endif
+
if (!memcmp(from, "mem=", 4))
parse_memopt(from+4, &from);
@@ -588,7 +590,7 @@ void __init setup_arch(char **cmdline_p)
HYPERVISOR_vm_assist(VMASST_CMD_enable,
VMASST_TYPE_writable_pagetables);
- ARCH_SETUP
+ ARCH_SETUP
#else
ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
drive_info = DRIVE_INFO;
@@ -612,7 +614,7 @@ void __init setup_arch(char **cmdline_p)
init_mm.end_code = (unsigned long) &_etext;
init_mm.end_data = (unsigned long) &_edata;
#ifdef CONFIG_XEN
- init_mm.brk = start_pfn << PAGE_SHIFT;
+ init_mm.brk = start_pfn << PAGE_SHIFT;
#else
init_mm.brk = (unsigned long) &_end;
@@ -668,7 +670,6 @@ void __init setup_arch(char **cmdline_p)
reserve_ebda_region();
#endif
-
#ifdef CONFIG_SMP
/*
* But first pinch a few for the stack/trampoline stuff
@@ -790,8 +791,6 @@ void __init setup_arch(char **cmdline_p)
}
-
-
if ( ! (xen_start_info->flags & SIF_INITDOMAIN))
{
acpi_disabled = 1;
@@ -835,7 +834,7 @@ void __init setup_arch(char **cmdline_p)
* and also for regions reported as reserved by the e820.
*/
probe_roms();
- e820_reserve_resources();
+ e820_reserve_resources();
#endif
request_resource(&iomem_resource, &video_ram_resource);
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
index 56c546fec5..71cf502718 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
@@ -33,6 +33,7 @@
#ifdef CONFIG_XEN
#include <asm/hypervisor.h>
#endif
+
char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
@@ -165,7 +166,6 @@ void __init cpu_gdt_init(struct desc_ptr *gdt_descr)
}
#endif
-
void pda_init(int cpu)
{
struct x8664_pda *pda = &cpu_pda[cpu];
@@ -175,9 +175,10 @@ void pda_init(int cpu)
#ifndef CONFIG_XEN
wrmsrl(MSR_GS_BASE, cpu_pda + cpu);
#else
- HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
- (unsigned long)(cpu_pda + cpu));
+ HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
+ (unsigned long)(cpu_pda + cpu));
#endif
+
pda->me = pda;
pda->cpunumber = cpu;
pda->irqcount = -1;
@@ -201,6 +202,7 @@ void pda_init(int cpu)
}
switch_pt();
+
pda->irqstackptr += IRQSTACKSIZE-64;
}
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c
index 0e6ee3c72b..4f565ab325 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c
@@ -30,8 +30,9 @@
#include <asm/apicdef.h>
#ifdef CONFIG_XEN
#include <asm-xen/evtchn.h>
+#endif
-#else
+#ifndef CONFIG_XEN
/*
* Smarter SMP flushing macros.
* c/o Linus Torvalds.
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c
index 07a8a35dcc..6229915b11 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c
@@ -559,9 +559,11 @@ static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
printk("You probably have a hardware problem with your RAM chips\n");
+#if 0 /* XEN */
/* Clear and disable the memory parity error line. */
reason = (reason & 0xf) | 4;
outb(reason, 0x61);
+#endif /* XEN */
}
static void io_check_error(unsigned char reason, struct pt_regs * regs)
@@ -569,12 +571,14 @@ static void io_check_error(unsigned char reason, struct pt_regs * regs)
printk("NMI: IOCK error (debug interrupt?)\n");
show_registers(regs);
+#if 0 /* XEN */
/* Re-enable the IOCK line, wait for a few seconds */
reason = (reason & 0xf) | 8;
outb(reason, 0x61);
mdelay(2000);
reason &= ~8;
outb(reason, 0x61);
+#endif /* XEN */
}
static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
@@ -890,7 +894,6 @@ asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
asmlinkage void math_state_restore(void)
{
struct task_struct *me = current;
-
/* clts(); */ /* 'clts' is done for us by Xen during virtual trap. */
if (!used_math())
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c
index 4b3a505f98..fd611408fb 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c
@@ -59,9 +59,7 @@ EXPORT_SYMBOL(disable_irq_nosync);
EXPORT_SYMBOL(probe_irq_mask);
EXPORT_SYMBOL(kernel_thread);
EXPORT_SYMBOL(pm_idle);
-#ifdef CONFIG_ACPI_BOOT
EXPORT_SYMBOL(pm_power_off);
-#endif
EXPORT_SYMBOL(get_cmos_time);
EXPORT_SYMBOL(__down_failed);
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c
index 97e6ea1895..4edb3c40c5 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c
@@ -153,7 +153,6 @@ void dump_pagetable(unsigned long address)
pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id());
preempt_enable();
pgd += pgd_index(address);
-
printk("PGD %lx ", pgd_val(*pgd));
if (bad_address(pgd)) goto bad;
if (!pgd_present(*pgd)) goto ret;
@@ -258,7 +257,6 @@ static int vmalloc_fault(unsigned long address)
pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id());
preempt_enable();
pgd += pgd_index(address);
-
pgd_ref = pgd_offset_k(address);
if (pgd_none(*pgd_ref))
return -1;
diff --git a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c
index ff58486f33..d682ee1222 100644
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c
@@ -40,16 +40,16 @@
#include <asm/proto.h>
#include <asm/smp.h>
+#ifndef Dprintk
+#define Dprintk(x...)
+#endif
+
extern unsigned long *contiguous_bitmap;
#if defined(CONFIG_SWIOTLB)
extern void swiotlb_init(void);
#endif
-#ifndef Dprintk
-#define Dprintk(x...)
-#endif
-
extern char _stext[];
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -200,9 +200,9 @@ static void *spp_getpage(void)
static inline pud_t *pud_offset_u(unsigned long address)
{
- pud_t *pud = level3_user_pgt;
+ pud_t *pud = level3_user_pgt;
- return pud + pud_index(address);
+ return pud + pud_index(address);
}
static void set_pte_phys(unsigned long vaddr,
@@ -215,34 +215,27 @@ static void set_pte_phys(unsigned long vaddr,
Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
- pgd = (user_mode ? pgd_offset_u(vaddr) : pgd_offset_k(vaddr));
-
+ pgd = (user_mode ? pgd_offset_u(vaddr) : pgd_offset_k(vaddr));
if (pgd_none(*pgd)) {
printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
return;
}
-
- pud = (user_mode ? pud_offset_u(vaddr) : pud_offset(pgd, vaddr));
-
+ pud = (user_mode ? pud_offset_u(vaddr) : pud_offset(pgd, vaddr));
if (pud_none(*pud)) {
pmd = (pmd_t *) spp_getpage();
-
- make_page_readonly(pmd);
- xen_pmd_pin(__pa(pmd));
+ make_page_readonly(pmd);
+ xen_pmd_pin(__pa(pmd));
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
if (pmd != pmd_offset(pud, 0)) {
printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
return;
}
}
-
pmd = pmd_offset(pud, vaddr);
-
if (pmd_none(*pmd)) {
pte = (pte_t *) spp_getpage();
- make_page_readonly(pte);
-
- xen_pte_pin(__pa(pte));
+ make_page_readonly(pte);
+ xen_pte_pin(__pa(pte));
set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
if (pte != pte_offset_kernel(pmd, 0)) {
printk("PAGETABLE BUG #02!\n");
@@ -252,11 +245,10 @@ static void set_pte_phys(unsigned long vaddr,
new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
pte = pte_offset_kernel(pmd, vaddr);
-
if (!pte_none(*pte) &&
pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
pte_ERROR(*pte);
- set_pte(pte, new_pte);
+ set_pte(pte, new_pte);
/*
* It's enough to flush this one mapping.
@@ -284,11 +276,11 @@ static void set_pte_phys_ma(unsigned long vaddr,
if (pud_none(*pud)) {
pmd = (pmd_t *) spp_getpage();
- make_page_readonly(pmd);
- xen_pmd_pin(__pa(pmd));
+ make_page_readonly(pmd);
+ xen_pmd_pin(__pa(pmd));
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
-
+
if (pmd != pmd_offset(pud, 0)) {
printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
return;
@@ -298,8 +290,8 @@ static void set_pte_phys_ma(unsigned long vaddr,
if (pmd_none(*pmd)) {
pte = (pte_t *) spp_getpage();
- make_page_readonly(pte);
- xen_pte_pin(__pa(pte));
+ make_page_readonly(pte);
+ xen_pte_pin(__pa(pte));
set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
if (pte != pte_offset_kernel(pmd, 0)) {
@@ -311,12 +303,12 @@ static void set_pte_phys_ma(unsigned long vaddr,
new_pte = pfn_pte_ma(phys >> PAGE_SHIFT, prot);
pte = pte_offset_kernel(pmd, vaddr);
- /*
- * Note that the pte page is already RO, thus we want to use
- * xen_l1_entry_update(), not set_pte().
- */
- xen_l1_entry_update(pte,
- pfn_pte_ma(phys >> PAGE_SHIFT, prot));
+ /*
+ * Note that the pte page is already RO, thus we want to use
+ * xen_l1_entry_update(), not set_pte().
+ */
+ xen_l1_entry_update(pte,
+ pfn_pte_ma(phys >> PAGE_SHIFT, prot));
/*
* It's enough to flush this one mapping.
@@ -347,7 +339,6 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
}
}
-
/*
* At this point it only supports vsyscall area.
*/
@@ -360,18 +351,18 @@ void __set_fixmap_user (enum fixed_addresses idx, unsigned long phys, pgprot_t p
return;
}
- set_pte_phys(address, phys, prot, SET_FIXMAP_USER);
+ set_pte_phys(address, phys, prot, SET_FIXMAP_USER);
}
unsigned long __initdata table_start, tables_space;
unsigned long get_machine_pfn(unsigned long addr)
{
- pud_t* pud = pud_offset_k(addr);
- pmd_t* pmd = pmd_offset(pud, addr);
- pte_t *pte = pte_offset_kernel(pmd, addr);
-
- return pte_mfn(*pte);
+ pud_t* pud = pud_offset_k(addr);
+ pmd_t* pmd = pmd_offset(pud, addr);
+ pte_t *pte = pte_offset_kernel(pmd, addr);
+
+ return pte_mfn(*pte);
}
static __init void *alloc_static_page(unsigned long *phys)
@@ -411,12 +402,11 @@ static inline int make_readonly(unsigned long paddr)
static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
{
- long i, j, k;
- unsigned long paddr;
+ long i, j, k;
+ unsigned long paddr;
i = pud_index(address);
pud = pud + i;
-
for (; i < PTRS_PER_PUD; pud++, i++) {
unsigned long pmd_phys;
pmd_t *pmd;
@@ -429,38 +419,37 @@ static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned lon
}
pmd = alloc_static_page(&pmd_phys);
- early_make_page_readonly(pmd);
- xen_pmd_pin(pmd_phys);
+ early_make_page_readonly(pmd);
+ xen_pmd_pin(pmd_phys);
set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
-
for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
- unsigned long pte_phys;
- pte_t *pte, *pte_save;
+ unsigned long pte_phys;
+ pte_t *pte, *pte_save;
if (paddr >= end) {
for (; j < PTRS_PER_PMD; j++, pmd++)
set_pmd(pmd, __pmd(0));
break;
}
- pte = alloc_static_page(&pte_phys);
- pte_save = pte;
- for (k = 0; k < PTRS_PER_PTE; pte++, k++, paddr += PTE_SIZE) {
- if ((paddr >= end) ||
- ((paddr >> PAGE_SHIFT) >=
- xen_start_info->nr_pages)) {
- __set_pte(pte, __pte(0));
- continue;
- }
- if (make_readonly(paddr)) {
- __set_pte(pte,
- __pte(paddr | (_KERNPG_TABLE & ~_PAGE_RW)));
- continue;
- }
- __set_pte(pte, __pte(paddr | _KERNPG_TABLE));
- }
- pte = pte_save;
- early_make_page_readonly(pte);
- xen_pte_pin(pte_phys);
+ pte = alloc_static_page(&pte_phys);
+ pte_save = pte;
+ for (k = 0; k < PTRS_PER_PTE; pte++, k++, paddr += PTE_SIZE) {
+ if ((paddr >= end) ||
+ ((paddr >> PAGE_SHIFT) >=
+ xen_start_info->nr_pages)) {
+ __set_pte(pte, __pte(0));
+ continue;
+ }
+ if (make_readonly(paddr)) {
+ __set_pte(pte,
+ __pte(paddr | (_KERNPG_TABLE & ~_PAGE_RW)));
+ continue;
+ }
+ __set_pte(pte, __pte(paddr | _KERNPG_TABLE));
+ }
+ pte = pte_save;
+ early_make_page_readonly(pte);
+ xen_pte_pin(pte_phys);
set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE));
}
}
@@ -506,7 +495,7 @@ void __init xen_init_pt(void)
level3_kernel_pgt[pud_index(__START_KERNEL_map)] =
__pud(__pa_symbol(level2_kernel_pgt) |
_KERNPG_TABLE | _PAGE_USER);
- memcpy((void *)level2_kernel_pgt, page, PAGE_SIZE);
+ memcpy((void *)level2_kernel_pgt, page, PAGE_SIZE);
early_make_page_readonly(init_level4_pgt);
early_make_page_readonly(init_level4_user_pgt);
@@ -618,7 +607,7 @@ extern struct x8664_pda cpu_pda[NR_CPUS];
void zap_low_mappings(void)
{
- /* this is not required for Xen */
+ /* this is not required for Xen */
#if 0
swap_low_mappings();
#endif
@@ -629,11 +618,11 @@ void __init paging_init(void)
{
{
unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
- /* unsigned int max_dma; */
- /* max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; */
- /* if (end_pfn < max_dma) */
+ /* unsigned int max_dma; */
+ /* max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; */
+ /* if (end_pfn < max_dma) */
zones_size[ZONE_DMA] = end_pfn;
-#if 0
+#if 0
else {
zones_size[ZONE_DMA] = max_dma;
zones_size[ZONE_NORMAL] = end_pfn - max_dma;
@@ -642,16 +631,16 @@ void __init paging_init(void)
free_area_init(zones_size);
}
- set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
- HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
+ set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
+ HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
- memset(empty_zero_page, 0, sizeof(empty_zero_page));
+ memset(empty_zero_page, 0, sizeof(empty_zero_page));
init_mm.context.pinned = 1;
#ifdef CONFIG_XEN_PHYSDEV_ACCESS
{
int i;
- /* Setup mapping of lower 1st MB */
+ /* Setup mapping of lower 1st MB */
for (i = 0; i < NR_FIX_ISAMAPS; i++)
if (xen_start_info->flags & SIF_PRIVILEGED)
set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
@@ -701,7 +690,7 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size)
static inline int page_is_ram (unsigned long pagenr)
{
- return 1;
+ return 1;
}
static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
@@ -790,10 +779,10 @@ extern char __initdata_begin[], __initdata_end[];
void free_initmem(void)
{
#ifdef __DO_LATER__
- /*
- * Some pages can be pinned, but some are not. Unpinning such pages
- * triggers BUG().
- */
+ /*
+ * Some pages can be pinned, but some are not. Unpinning such pages
+ * triggers BUG().
+ */
unsigned long addr;
addr = (unsigned long)(&__init_begin);
@@ -801,12 +790,12 @@ void free_initmem(void)
ClearPageReserved(virt_to_page(addr));
set_page_count(virt_to_page(addr), 1);
memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE);
- xen_pte_unpin(__pa(addr));
- make_page_writable(__va(__pa(addr)));
- /*
- * Make pages from __PAGE_OFFSET address as well
- */
- make_page_writable((void *)addr);
+ xen_pte_unpin(__pa(addr));
+ make_page_writable(__va(__pa(addr)));
+ /*
+ * Make pages from __PAGE_OFFSET address as well
+ */
+ make_page_writable((void *)addr);
free_page(addr);
totalram_pages++;
}
@@ -856,7 +845,7 @@ int kern_addr_valid(unsigned long addr)
if (pgd_none(*pgd))
return 0;
- pud = pud_offset_k(addr);
+ pud = pud_offset_k(addr);
if (pud_none(*pud))
return 0;
diff --git a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
index 5ffd8a48fa..7b6d088016 100644
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
@@ -354,7 +354,7 @@ static void watch_target(struct xenbus_watch *watch,
unsigned long long new_target;
int err;
- err = xenbus_scanf(NULL, "memory", "target", "%llu", &new_target);
+ err = xenbus_scanf(XBT_NULL, "memory", "target", "%llu", &new_target);
if (err != 1) {
/* This is ok (for domain0 at least) - so just return */
return;
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
index 9228cb5190..34d3399a8e 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
@@ -540,6 +540,9 @@ static int __init blkif_init(void)
pending_vaddrs = kmalloc(sizeof(pending_vaddrs[0]) *
mmap_pages, GFP_KERNEL);
if (!pending_reqs || !pending_grant_handles || !pending_vaddrs) {
+ kfree(pending_reqs);
+ kfree(pending_grant_handles);
+ kfree(pending_vaddrs);
printk("%s: out of memory\n", __FUNCTION__);
return -1;
}
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
index 100e984b09..f3a0287935 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
@@ -145,7 +145,7 @@ static int blkback_probe(struct xenbus_device *dev,
if (err)
goto fail;
- err = xenbus_switch_state(dev, NULL, XenbusStateInitWait);
+ err = xenbus_switch_state(dev, XBT_NULL, XenbusStateInitWait);
if (err)
goto fail;
@@ -175,7 +175,7 @@ static void backend_changed(struct xenbus_watch *watch,
DPRINTK("");
- err = xenbus_scanf(NULL, dev->nodename, "physical-device", "%x:%x",
+ err = xenbus_scanf(XBT_NULL, dev->nodename, "physical-device", "%x:%x",
&major, &minor);
if (XENBUS_EXIST_ERR(err)) {
/* Since this watch will fire once immediately after it is
@@ -197,7 +197,7 @@ static void backend_changed(struct xenbus_watch *watch,
return;
}
- be->mode = xenbus_read(NULL, dev->nodename, "mode", NULL);
+ be->mode = xenbus_read(XBT_NULL, dev->nodename, "mode", NULL);
if (IS_ERR(be->mode)) {
err = PTR_ERR(be->mode);
be->mode = NULL;
@@ -268,7 +268,7 @@ static void frontend_changed(struct xenbus_device *dev,
break;
case XenbusStateClosing:
- xenbus_switch_state(dev, NULL, XenbusStateClosing);
+ xenbus_switch_state(dev, XBT_NULL, XenbusStateClosing);
break;
case XenbusStateClosed:
@@ -302,7 +302,7 @@ static void maybe_connect(struct backend_info *be)
*/
static void connect(struct backend_info *be)
{
- struct xenbus_transaction *xbt;
+ xenbus_transaction_t xbt;
int err;
struct xenbus_device *dev = be->dev;
@@ -310,10 +310,9 @@ static void connect(struct backend_info *be)
/* Supply the information about the device the frontend needs */
again:
- xbt = xenbus_transaction_start();
+ err = xenbus_transaction_start(&xbt);
- if (IS_ERR(xbt)) {
- err = PTR_ERR(xbt);
+ if (err) {
xenbus_dev_fatal(dev, err, "starting transaction");
return;
}
@@ -366,7 +365,7 @@ static int connect_ring(struct backend_info *be)
DPRINTK("%s", dev->otherend);
- err = xenbus_gather(NULL, dev->otherend, "ring-ref", "%lu", &ring_ref,
+ err = xenbus_gather(XBT_NULL, dev->otherend, "ring-ref", "%lu", &ring_ref,
"event-channel", "%u", &evtchn, NULL);
if (err) {
xenbus_dev_fatal(dev, err,
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
index 2ec97d1e0b..3e81abe8e4 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
@@ -91,7 +91,7 @@ static int blkfront_probe(struct xenbus_device *dev,
struct blkfront_info *info;
/* FIXME: Use dynamic device id if this is not set. */
- err = xenbus_scanf(NULL, dev->nodename,
+ err = xenbus_scanf(XBT_NULL, dev->nodename,
"virtual-device", "%i", &vdevice);
if (err != 1) {
xenbus_dev_fatal(dev, err, "reading virtual-device");
@@ -161,7 +161,7 @@ static int talk_to_backend(struct xenbus_device *dev,
struct blkfront_info *info)
{
const char *message = NULL;
- struct xenbus_transaction *xbt;
+ xenbus_transaction_t xbt;
int err;
/* Create shared ring, alloc event channel. */
@@ -170,8 +170,8 @@ static int talk_to_backend(struct xenbus_device *dev,
goto out;
again:
- xbt = xenbus_transaction_start();
- if (IS_ERR(xbt)) {
+ err = xenbus_transaction_start(&xbt);
+ if (err) {
xenbus_dev_fatal(dev, err, "starting transaction");
goto destroy_blkring;
}
@@ -319,7 +319,7 @@ static void connect(struct blkfront_info *info)
DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend);
- err = xenbus_gather(NULL, info->xbdev->otherend,
+ err = xenbus_gather(XBT_NULL, info->xbdev->otherend,
"sectors", "%lu", &sectors,
"info", "%u", &binfo,
"sector-size", "%lu", &sector_size,
@@ -338,7 +338,7 @@ static void connect(struct blkfront_info *info)
return;
}
- (void)xenbus_switch_state(info->xbdev, NULL, XenbusStateConnected);
+ (void)xenbus_switch_state(info->xbdev, XBT_NULL, XenbusStateConnected);
/* Kick pending requests. */
spin_lock_irq(&blkif_io_lock);
@@ -367,7 +367,7 @@ static void blkfront_closing(struct xenbus_device *dev)
info->mi = NULL;
}
- xenbus_switch_state(dev, NULL, XenbusStateClosed);
+ xenbus_switch_state(dev, XBT_NULL, XenbusStateClosed);
}
@@ -775,7 +775,7 @@ static void blkif_recover(struct blkfront_info *info)
kfree(copy);
- (void)xenbus_switch_state(info->xbdev, NULL, XenbusStateConnected);
+ (void)xenbus_switch_state(info->xbdev, XBT_NULL, XenbusStateConnected);
/* Now safe for us to use the shared ring */
spin_lock_irq(&blkif_io_lock);
diff --git a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
index 299a4a5a59..104dd816e7 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
@@ -208,7 +208,7 @@ static struct page *blktap_nopage(struct vm_area_struct *vma,
}
struct vm_operations_struct blktap_vm_ops = {
- nopage: blktap_nopage,
+ .nopage = blktap_nopage,
};
/******************************************************************
@@ -225,7 +225,7 @@ static int blktap_open(struct inode *inode, struct file *filp)
/* Allocate the fe ring. */
sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
if (sring == NULL)
- goto fail_nomem;
+ return -ENOMEM;
SetPageReserved(virt_to_page(sring));
@@ -233,9 +233,6 @@ static int blktap_open(struct inode *inode, struct file *filp)
FRONT_RING_INIT(&blktap_ufe_ring, sring, PAGE_SIZE);
return 0;
-
- fail_nomem:
- return -ENOMEM;
}
static int blktap_release(struct inode *inode, struct file *filp)
@@ -391,12 +388,12 @@ void blktap_kick_user(void)
}
static struct file_operations blktap_fops = {
- owner: THIS_MODULE,
- poll: blktap_poll,
- ioctl: blktap_ioctl,
- open: blktap_open,
- release: blktap_release,
- mmap: blktap_mmap,
+ .owner = THIS_MODULE,
+ .poll = blktap_poll,
+ .ioctl = blktap_ioctl,
+ .open = blktap_open,
+ .release = blktap_release,
+ .mmap = blktap_mmap,
};
diff --git a/linux-2.6-xen-sparse/drivers/xen/console/console.c b/linux-2.6-xen-sparse/drivers/xen/console/console.c
index 9183a47af8..44ae31d087 100644
--- a/linux-2.6-xen-sparse/drivers/xen/console/console.c
+++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c
@@ -314,39 +314,31 @@ static void __xencons_tx_flush(void)
{
int sent, sz, work_done = 0;
- if (xen_start_info->flags & SIF_INITDOMAIN) {
- if (x_char) {
+ if (x_char) {
+ if (xen_start_info->flags & SIF_INITDOMAIN)
kcons_write_dom0(NULL, &x_char, 1);
- x_char = 0;
- work_done = 1;
- }
+ else
+ while (x_char)
+ if (xencons_ring_send(&x_char, 1) == 1)
+ break;
+ x_char = 0;
+ work_done = 1;
+ }
- while (wc != wp) {
- sz = wp - wc;
- if (sz > (wbuf_size - WBUF_MASK(wc)))
- sz = wbuf_size - WBUF_MASK(wc);
+ while (wc != wp) {
+ sz = wp - wc;
+ if (sz > (wbuf_size - WBUF_MASK(wc)))
+ sz = wbuf_size - WBUF_MASK(wc);
+ if (xen_start_info->flags & SIF_INITDOMAIN) {
kcons_write_dom0(NULL, &wbuf[WBUF_MASK(wc)], sz);
wc += sz;
- work_done = 1;
- }
- } else {
- while (x_char) {
- if (xencons_ring_send(&x_char, 1) == 1) {
- x_char = 0;
- work_done = 1;
- }
- }
-
- while (wc != wp) {
- sz = wp - wc;
- if (sz > (wbuf_size - WBUF_MASK(wc)))
- sz = wbuf_size - WBUF_MASK(wc);
+ } else {
sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
if (sent == 0)
break;
wc += sent;
- work_done = 1;
}
+ work_done = 1;
}
if (work_done && (xencons_tty != NULL)) {
diff --git a/linux-2.6-xen-sparse/drivers/xen/net_driver_util.c b/linux-2.6-xen-sparse/drivers/xen/net_driver_util.c
index c6fe4a3838..8826b80ca8 100644
--- a/linux-2.6-xen-sparse/drivers/xen/net_driver_util.c
+++ b/linux-2.6-xen-sparse/drivers/xen/net_driver_util.c
@@ -38,7 +38,7 @@ int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
char *s;
int i;
char *e;
- char *macstr = xenbus_read(NULL, dev->nodename, "mac", NULL);
+ char *macstr = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
if (IS_ERR(macstr)) {
return PTR_ERR(macstr);
}
diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c
index 16cc929a11..a27533cf87 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c
@@ -183,7 +183,7 @@ static void unmap_frontend_pages(netif_t *netif)
int netif_map(netif_t *netif, unsigned long tx_ring_ref,
unsigned long rx_ring_ref, unsigned int evtchn)
{
- int err;
+ int err = -ENOMEM;
netif_tx_sring_t *txs;
netif_rx_sring_t *rxs;
evtchn_op_t op = {
@@ -199,25 +199,16 @@ int netif_map(netif_t *netif, unsigned long tx_ring_ref,
if (netif->tx_comms_area == NULL)
return -ENOMEM;
netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
- if (netif->rx_comms_area == NULL) {
- free_vm_area(netif->tx_comms_area);
- return -ENOMEM;
- }
+ if (netif->rx_comms_area == NULL)
+ goto err_rx;
err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
- if (err) {
- free_vm_area(netif->tx_comms_area);
- free_vm_area(netif->rx_comms_area);
- return err;
- }
+ if (err)
+ goto err_map;
err = HYPERVISOR_event_channel_op(&op);
- if (err) {
- unmap_frontend_pages(netif);
- free_vm_area(netif->tx_comms_area);
- free_vm_area(netif->rx_comms_area);
- return err;
- }
+ if (err)
+ goto err_hypervisor;
netif->evtchn = op.u.bind_interdomain.local_port;
@@ -245,6 +236,13 @@ int netif_map(netif_t *netif, unsigned long tx_ring_ref,
rtnl_unlock();
return 0;
+err_hypervisor:
+ unmap_frontend_pages(netif);
+err_map:
+ free_vm_area(netif->rx_comms_area);
+err_rx:
+ free_vm_area(netif->tx_comms_area);
+ return err;
}
static void free_netif_callback(void *arg)
diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c
index fa106a3834..b8ed4fdfab 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c
@@ -14,6 +14,7 @@
#include <asm-xen/balloon.h>
#include <asm-xen/xen-public/memory.h>
+/*#define NETBE_DEBUG_INTERRUPT*/
static void netif_idx_release(u16 pending_idx);
static void netif_page_release(struct page *page);
@@ -727,6 +728,7 @@ static int make_rx_response(netif_t *netif,
return notify;
}
+#ifdef NETBE_DEBUG_INTERRUPT
static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
{
struct list_head *ent;
@@ -758,6 +760,7 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
return IRQ_HANDLED;
}
+#endif
static int __init netback_init(void)
{
@@ -794,6 +797,7 @@ static int __init netback_init(void)
netif_xenbus_init();
+#ifdef NETBE_DEBUG_INTERRUPT
(void)bind_virq_to_irqhandler(
VIRQ_DEBUG,
0,
@@ -801,6 +805,7 @@ static int __init netback_init(void)
SA_SHIRQ,
"net-be-dbg",
&netif_be_dbg);
+#endif
return 0;
}
diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
index 086c2ec5fb..9db90d2e5e 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
@@ -93,7 +93,7 @@ static int netback_probe(struct xenbus_device *dev,
if (err)
goto fail;
- err = xenbus_switch_state(dev, NULL, XenbusStateInitWait);
+ err = xenbus_switch_state(dev, XBT_NULL, XenbusStateInitWait);
if (err) {
goto fail;
}
@@ -122,7 +122,7 @@ static int netback_hotplug(struct xenbus_device *xdev, char **envp,
DPRINTK("netback_hotplug");
- val = xenbus_read(NULL, xdev->nodename, "script", NULL);
+ val = xenbus_read(XBT_NULL, xdev->nodename, "script", NULL);
if (IS_ERR(val)) {
int err = PTR_ERR(val);
xenbus_dev_fatal(xdev, err, "reading script");
@@ -160,7 +160,7 @@ static void backend_changed(struct xenbus_watch *watch,
DPRINTK("");
- err = xenbus_scanf(NULL, dev->nodename, "handle", "%li", &handle);
+ err = xenbus_scanf(XBT_NULL, dev->nodename, "handle", "%li", &handle);
if (XENBUS_EXIST_ERR(err)) {
/* Since this watch will fire once immediately after it is
registered, we expect this. Ignore it, and wait for the
@@ -212,7 +212,7 @@ static void frontend_changed(struct xenbus_device *dev,
break;
case XenbusStateClosing:
- xenbus_switch_state(dev, NULL, XenbusStateClosing);
+ xenbus_switch_state(dev, XBT_NULL, XenbusStateClosing);
break;
case XenbusStateClosed:
@@ -256,7 +256,7 @@ static void connect(struct backend_info *be)
return;
}
- xenbus_switch_state(dev, NULL, XenbusStateConnected);
+ xenbus_switch_state(dev, XBT_NULL, XenbusStateConnected);
}
@@ -269,7 +269,7 @@ static int connect_rings(struct backend_info *be)
DPRINTK("");
- err = xenbus_gather(NULL, dev->otherend,
+ err = xenbus_gather(XBT_NULL, dev->otherend,
"tx-ring-ref", "%lu", &tx_ring_ref,
"rx-ring-ref", "%lu", &rx_ring_ref,
"event-channel", "%u", &evtchn, NULL);
diff --git a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
index eca6b4c888..f80250d55a 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
@@ -212,7 +212,7 @@ static int netfront_probe(struct xenbus_device *dev,
struct netfront_info *info;
unsigned int handle;
- err = xenbus_scanf(NULL, dev->nodename, "handle", "%u", &handle);
+ err = xenbus_scanf(XBT_NULL, dev->nodename, "handle", "%u", &handle);
if (err != 1) {
xenbus_dev_fatal(dev, err, "reading handle");
return err;
@@ -260,7 +260,7 @@ static int talk_to_backend(struct xenbus_device *dev,
struct netfront_info *info)
{
const char *message;
- struct xenbus_transaction *xbt;
+ xenbus_transaction_t xbt;
int err;
err = xen_net_read_mac(dev, info->mac);
@@ -275,8 +275,8 @@ static int talk_to_backend(struct xenbus_device *dev,
goto out;
again:
- xbt = xenbus_transaction_start();
- if (IS_ERR(xbt)) {
+ err = xenbus_transaction_start(&xbt);
+ if (err) {
xenbus_dev_fatal(dev, err, "starting transaction");
goto destroy_ring;
}
@@ -1199,7 +1199,7 @@ static void netfront_closing(struct xenbus_device *dev)
close_netdev(info);
- xenbus_switch_state(dev, NULL, XenbusStateClosed);
+ xenbus_switch_state(dev, XBT_NULL, XenbusStateClosed);
}
@@ -1218,22 +1218,14 @@ static int netfront_remove(struct xenbus_device *dev)
static void close_netdev(struct netfront_info *info)
{
- /* Stop old i/f to prevent errors whilst we rebuild the state. */
- spin_lock_irq(&info->tx_lock);
- spin_lock(&info->rx_lock);
+ spin_lock_irq(&info->netdev->xmit_lock);
netif_stop_queue(info->netdev);
- /* info->backend_state = BEST_DISCONNECTED; */
- spin_unlock(&info->rx_lock);
- spin_unlock_irq(&info->tx_lock);
+ spin_unlock_irq(&info->netdev->xmit_lock);
#ifdef CONFIG_PROC_FS
xennet_proc_delif(info->netdev);
#endif
- if (info->irq)
- unbind_from_irqhandler(info->irq, info->netdev);
- info->evtchn = info->irq = 0;
-
del_timer_sync(&info->rx_refill_timer);
unregister_netdev(info->netdev);
@@ -1242,6 +1234,17 @@ static void close_netdev(struct netfront_info *info)
static void netif_disconnect_backend(struct netfront_info *info)
{
+ /* Stop old i/f to prevent errors whilst we rebuild the state. */
+ spin_lock_irq(&info->tx_lock);
+ spin_lock(&info->rx_lock);
+ info->backend_state = BEST_DISCONNECTED;
+ spin_unlock(&info->rx_lock);
+ spin_unlock_irq(&info->tx_lock);
+
+ if (info->irq)
+ unbind_from_irqhandler(info->irq, info->netdev);
+ info->evtchn = info->irq = 0;
+
end_access(info->tx_ring_ref, info->tx.sring);
end_access(info->rx_ring_ref, info->rx.sring);
info->tx_ring_ref = GRANT_INVALID_REF;
diff --git a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c
index c1ce6f7a61..60f0c50d1c 100644
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c
@@ -89,7 +89,7 @@ static int tpmback_probe(struct xenbus_device *dev,
goto fail;
}
- err = xenbus_switch_state(dev, NULL, XenbusStateInitWait);
+ err = xenbus_switch_state(dev, XBT_NULL, XenbusStateInitWait);
if (err) {
goto fail;
}
@@ -109,7 +109,7 @@ static void backend_changed(struct xenbus_watch *watch,
= container_of(watch, struct backend_info, backend_watch);
struct xenbus_device *dev = be->dev;
- err = xenbus_scanf(NULL, dev->nodename,
+ err = xenbus_scanf(XBT_NULL, dev->nodename,
"instance","%li", &instance);
if (XENBUS_EXIST_ERR(err)) {
return;
@@ -177,7 +177,7 @@ static void frontend_changed(struct xenbus_device *dev,
break;
case XenbusStateClosing:
- xenbus_switch_state(dev, NULL, XenbusStateClosing);
+ xenbus_switch_state(dev, XBT_NULL, XenbusStateClosing);
break;
case XenbusStateClosed:
@@ -230,15 +230,14 @@ static void maybe_connect(struct backend_info *be)
static void connect(struct backend_info *be)
{
- struct xenbus_transaction *xbt;
+ xenbus_transaction_t xbt;
int err;
struct xenbus_device *dev = be->dev;
unsigned long ready = 1;
again:
- xbt = xenbus_transaction_start();
- if (IS_ERR(xbt)) {
- err = PTR_ERR(xbt);
+ err = xenbus_transaction_start(&xbt);
+ if (err) {
xenbus_dev_fatal(be->dev, err, "starting transaction");
return;
}
@@ -275,7 +274,7 @@ static int connect_ring(struct backend_info *be)
unsigned int evtchn;
int err;
- err = xenbus_gather(NULL, dev->otherend,
+ err = xenbus_gather(XBT_NULL, dev->otherend,
"ring-ref", "%lu", &ring_ref,
"event-channel", "%u", &evtchn, NULL);
if (err) {
diff --git a/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c b/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c
index 72b1971421..bb4bbe83c0 100644
--- a/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c
@@ -271,7 +271,7 @@ static int talk_to_backend(struct xenbus_device *dev,
{
const char *message = NULL;
int err;
- struct xenbus_transaction *xbt;
+ xenbus_transaction_t xbt;
err = setup_tpmring(dev, info);
if (err) {
@@ -280,8 +280,8 @@ static int talk_to_backend(struct xenbus_device *dev,
}
again:
- xbt = xenbus_transaction_start();
- if (IS_ERR(xbt)) {
+ err = xenbus_transaction_start(&xbt);
+ if (err) {
xenbus_dev_fatal(dev, err, "starting transaction");
goto destroy_tpmring;
}
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
index 42b974f425..303eab86c4 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
@@ -87,7 +87,7 @@ EXPORT_SYMBOL(xenbus_watch_path2);
int xenbus_switch_state(struct xenbus_device *dev,
- struct xenbus_transaction *xbt,
+ xenbus_transaction_t xbt,
XenbusState state)
{
/* We check whether the state is currently set to the given value, and
@@ -152,7 +152,7 @@ void _dev_error(struct xenbus_device *dev, int err, const char *fmt,
goto fail;
}
- if (xenbus_write(NULL, path_buffer, "error", printf_buffer) != 0) {
+ if (xenbus_write(XBT_NULL, path_buffer, "error", printf_buffer) != 0) {
printk("xenbus: failed to write error node for %s (%s)\n",
dev->nodename, printf_buffer);
goto fail;
@@ -187,7 +187,7 @@ void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt,
_dev_error(dev, err, fmt, ap);
va_end(ap);
- xenbus_switch_state(dev, NULL, XenbusStateClosing);
+ xenbus_switch_state(dev, XBT_NULL, XenbusStateClosing);
}
EXPORT_SYMBOL(xenbus_dev_fatal);
@@ -223,7 +223,7 @@ XenbusState xenbus_read_driver_state(const char *path)
{
XenbusState result;
- int err = xenbus_gather(NULL, path, "state", "%d", &result, NULL);
+ int err = xenbus_gather(XBT_NULL, path, "state", "%d", &result, NULL);
if (err)
result = XenbusStateClosed;
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
index 0731ff45f3..fc8b22453f 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
@@ -47,7 +47,7 @@
struct xenbus_dev_transaction {
struct list_head list;
- struct xenbus_transaction *handle;
+ xenbus_transaction_t handle;
};
struct xenbus_dev_data {
@@ -109,9 +109,8 @@ static ssize_t xenbus_dev_write(struct file *filp,
size_t len, loff_t *ppos)
{
struct xenbus_dev_data *u = filp->private_data;
- struct xenbus_dev_transaction *trans;
+ struct xenbus_dev_transaction *trans = NULL;
void *reply;
- int err = 0;
if ((len + u->len) > sizeof(u->u.buffer))
return -EINVAL;
@@ -135,42 +134,40 @@ static ssize_t xenbus_dev_write(struct file *filp,
case XS_MKDIR:
case XS_RM:
case XS_SET_PERMS:
+ if (u->u.msg.type == XS_TRANSACTION_START) {
+ trans = kmalloc(sizeof(*trans), GFP_KERNEL);
+ if (!trans)
+ return -ENOMEM;
+ }
+
reply = xenbus_dev_request_and_reply(&u->u.msg);
if (IS_ERR(reply)) {
- err = PTR_ERR(reply);
- } else {
- if (u->u.msg.type == XS_TRANSACTION_START) {
- trans = kmalloc(sizeof(*trans), GFP_KERNEL);
- trans->handle = (struct xenbus_transaction *)
- simple_strtoul(reply, NULL, 0);
- list_add(&trans->list, &u->transactions);
- } else if (u->u.msg.type == XS_TRANSACTION_END) {
- list_for_each_entry(trans, &u->transactions,
- list)
- if ((unsigned long)trans->handle ==
- (unsigned long)u->u.msg.tx_id)
- break;
- BUG_ON(&trans->list == &u->transactions);
- list_del(&trans->list);
- kfree(trans);
- }
- queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
- queue_reply(u, (char *)reply, u->u.msg.len);
- kfree(reply);
+ kfree(trans);
+ return PTR_ERR(reply);
}
- break;
- default:
- err = -EINVAL;
+ if (u->u.msg.type == XS_TRANSACTION_START) {
+ trans->handle = simple_strtoul(reply, NULL, 0);
+ list_add(&trans->list, &u->transactions);
+ } else if (u->u.msg.type == XS_TRANSACTION_END) {
+ list_for_each_entry(trans, &u->transactions, list)
+ if (trans->handle == u->u.msg.tx_id)
+ break;
+ BUG_ON(&trans->list == &u->transactions);
+ list_del(&trans->list);
+ kfree(trans);
+ }
+ queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
+ queue_reply(u, (char *)reply, u->u.msg.len);
+ kfree(reply);
break;
- }
- if (err == 0) {
- u->len = 0;
- err = len;
+ default:
+ return -EINVAL;
}
- return err;
+ u->len = 0;
+ return len;
}
static int xenbus_dev_open(struct inode *inode, struct file *filp)
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
index 09001a5463..6fbe1e3649 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
@@ -115,7 +115,7 @@ static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
static int read_otherend_details(struct xenbus_device *xendev,
char *id_node, char *path_node)
{
- int err = xenbus_gather(NULL, xendev->nodename,
+ int err = xenbus_gather(XBT_NULL, xendev->nodename,
id_node, "%i", &xendev->otherend_id,
path_node, NULL, &xendev->otherend,
NULL);
@@ -126,7 +126,7 @@ static int read_otherend_details(struct xenbus_device *xendev,
return err;
}
if (strlen(xendev->otherend) == 0 ||
- !xenbus_exists(NULL, xendev->otherend, "")) {
+ !xenbus_exists(XBT_NULL, xendev->otherend, "")) {
xenbus_dev_fatal(xendev, -ENOENT, "missing other end from %s",
xendev->nodename);
kfree(xendev->otherend);
@@ -200,14 +200,14 @@ static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
devid = strrchr(nodename, '/') + 1;
- err = xenbus_gather(NULL, nodename, "frontend-id", "%i", &domid,
+ err = xenbus_gather(XBT_NULL, nodename, "frontend-id", "%i", &domid,
"frontend", NULL, &frontend,
NULL);
if (err)
return err;
if (strlen(frontend) == 0)
err = -ERANGE;
- if (!err && !xenbus_exists(NULL, frontend, ""))
+ if (!err && !xenbus_exists(XBT_NULL, frontend, ""))
err = -ENOENT;
kfree(frontend);
@@ -373,7 +373,7 @@ static int xenbus_dev_probe(struct device *_dev)
return 0;
fail:
xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename);
- xenbus_switch_state(dev, NULL, XenbusStateClosed);
+ xenbus_switch_state(dev, XBT_NULL, XenbusStateClosed);
return -ENODEV;
}
@@ -391,7 +391,7 @@ static int xenbus_dev_remove(struct device *_dev)
if (drv->remove)
drv->remove(dev);
- xenbus_switch_state(dev, NULL, XenbusStateClosed);
+ xenbus_switch_state(dev, XBT_NULL, XenbusStateClosed);
return 0;
}
@@ -470,12 +470,17 @@ static int cleanup_dev(struct device *dev, void *data)
DPRINTK("%s", info->nodename);
- if (!strncmp(xendev->nodename, info->nodename, len)) {
- info->dev = xendev;
- get_device(dev);
- return 1;
- }
- return 0;
+ /* Match the info->nodename path, or any subdirectory of that path. */
+ if (strncmp(xendev->nodename, info->nodename, len))
+ return 0;
+
+ /* If the node name is longer, ensure it really is a subdirectory. */
+ if ((strlen(xendev->nodename) > len) && (xendev->nodename[len] != '/'))
+ return 0;
+
+ info->dev = xendev;
+ get_device(dev);
+ return 1;
}
static void xenbus_cleanup_devices(const char *path, struct bus_type *bus)
@@ -542,14 +547,6 @@ static int xenbus_probe_node(struct xen_bus_type *bus,
const char *type,
const char *nodename)
{
-#define CHECK_FAIL \
- do { \
- if (err) \
- goto fail; \
- } \
- while (0) \
-
-
int err;
struct xenbus_device *xendev;
size_t stringlen;
@@ -584,19 +581,18 @@ static int xenbus_probe_node(struct xen_bus_type *bus,
xendev->dev.release = xenbus_dev_release;
err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename);
- CHECK_FAIL;
+ if (err)
+ goto fail;
/* Register with generic device framework. */
err = device_register(&xendev->dev);
- CHECK_FAIL;
+ if (err)
+ goto fail;
device_create_file(&xendev->dev, &dev_attr_nodename);
device_create_file(&xendev->dev, &dev_attr_devtype);
return 0;
-
-#undef CHECK_FAIL
-
fail:
xenbus_dev_free(xendev);
return err;
@@ -652,7 +648,7 @@ static int xenbus_probe_backend(const char *type, const char *domid)
if (!nodename)
return -ENOMEM;
- dir = xenbus_directory(NULL, nodename, "", &dir_n);
+ dir = xenbus_directory(XBT_NULL, nodename, "", &dir_n);
if (IS_ERR(dir)) {
kfree(nodename);
return PTR_ERR(dir);
@@ -675,7 +671,7 @@ static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
unsigned int dir_n = 0;
int i;
- dir = xenbus_directory(NULL, bus->root, type, &dir_n);
+ dir = xenbus_directory(XBT_NULL, bus->root, type, &dir_n);
if (IS_ERR(dir))
return PTR_ERR(dir);
@@ -694,7 +690,7 @@ static int xenbus_probe_devices(struct xen_bus_type *bus)
char **dir;
unsigned int i, dir_n;
- dir = xenbus_directory(NULL, bus->root, "", &dir_n);
+ dir = xenbus_directory(XBT_NULL, bus->root, "", &dir_n);
if (IS_ERR(dir))
return PTR_ERR(dir);
@@ -740,7 +736,7 @@ static void dev_changed(const char *node, struct xen_bus_type *bus)
if (char_count(node, '/') < 2)
return;
- exists = xenbus_exists(NULL, node, "");
+ exists = xenbus_exists(XBT_NULL, node, "");
if (!exists) {
xenbus_cleanup_devices(node, &bus->bus);
return;
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
index 177b0d623f..fa404870f1 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
@@ -190,7 +190,7 @@ void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
}
/* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */
-static void *xs_talkv(struct xenbus_transaction *t,
+static void *xs_talkv(xenbus_transaction_t t,
enum xsd_sockmsg_type type,
const struct kvec *iovec,
unsigned int num_vecs,
@@ -201,7 +201,7 @@ static void *xs_talkv(struct xenbus_transaction *t,
unsigned int i;
int err;
- msg.tx_id = (u32)(unsigned long)t;
+ msg.tx_id = t;
msg.req_id = 0;
msg.type = type;
msg.len = 0;
@@ -242,7 +242,7 @@ static void *xs_talkv(struct xenbus_transaction *t,
}
/* Simplified version of xs_talkv: single message. */
-static void *xs_single(struct xenbus_transaction *t,
+static void *xs_single(xenbus_transaction_t t,
enum xsd_sockmsg_type type,
const char *string,
unsigned int *len)
@@ -309,7 +309,7 @@ static char **split(char *strings, unsigned int len, unsigned int *num)
return ret;
}
-char **xenbus_directory(struct xenbus_transaction *t,
+char **xenbus_directory(xenbus_transaction_t t,
const char *dir, const char *node, unsigned int *num)
{
char *strings, *path;
@@ -329,7 +329,7 @@ char **xenbus_directory(struct xenbus_transaction *t,
EXPORT_SYMBOL(xenbus_directory);
/* Check if a path exists. Return 1 if it does. */
-int xenbus_exists(struct xenbus_transaction *t,
+int xenbus_exists(xenbus_transaction_t t,
const char *dir, const char *node)
{
char **d;
@@ -347,7 +347,7 @@ EXPORT_SYMBOL(xenbus_exists);
* Returns a kmalloced value: call free() on it after use.
* len indicates length in bytes.
*/
-void *xenbus_read(struct xenbus_transaction *t,
+void *xenbus_read(xenbus_transaction_t t,
const char *dir, const char *node, unsigned int *len)
{
char *path;
@@ -366,7 +366,7 @@ EXPORT_SYMBOL(xenbus_read);
/* Write the value of a single file.
* Returns -err on failure.
*/
-int xenbus_write(struct xenbus_transaction *t,
+int xenbus_write(xenbus_transaction_t t,
const char *dir, const char *node, const char *string)
{
const char *path;
@@ -389,7 +389,7 @@ int xenbus_write(struct xenbus_transaction *t,
EXPORT_SYMBOL(xenbus_write);
/* Create a new directory. */
-int xenbus_mkdir(struct xenbus_transaction *t,
+int xenbus_mkdir(xenbus_transaction_t t,
const char *dir, const char *node)
{
char *path;
@@ -406,7 +406,7 @@ int xenbus_mkdir(struct xenbus_transaction *t,
EXPORT_SYMBOL(xenbus_mkdir);
/* Destroy a file or directory (directories must be empty). */
-int xenbus_rm(struct xenbus_transaction *t, const char *dir, const char *node)
+int xenbus_rm(xenbus_transaction_t t, const char *dir, const char *node)
{
char *path;
int ret;
@@ -424,30 +424,28 @@ EXPORT_SYMBOL(xenbus_rm);
/* Start a transaction: changes by others will not be seen during this
* transaction, and changes will not be visible to others until end.
*/
-struct xenbus_transaction *xenbus_transaction_start(void)
+int xenbus_transaction_start(xenbus_transaction_t *t)
{
char *id_str;
- unsigned long id;
down_read(&xs_state.suspend_mutex);
- id_str = xs_single(NULL, XS_TRANSACTION_START, "", NULL);
+ id_str = xs_single(XBT_NULL, XS_TRANSACTION_START, "", NULL);
if (IS_ERR(id_str)) {
up_read(&xs_state.suspend_mutex);
- return (struct xenbus_transaction *)id_str;
+ return PTR_ERR(id_str);
}
- id = simple_strtoul(id_str, NULL, 0);
+ *t = simple_strtoul(id_str, NULL, 0);
kfree(id_str);
-
- return (struct xenbus_transaction *)id;
+ return 0;
}
EXPORT_SYMBOL(xenbus_transaction_start);
/* End a transaction.
* If abandon is true, transaction is discarded instead of committed.
*/
-int xenbus_transaction_end(struct xenbus_transaction *t, int abort)
+int xenbus_transaction_end(xenbus_transaction_t t, int abort)
{
char abortstr[2];
int err;
@@ -466,7 +464,7 @@ int xenbus_transaction_end(struct xenbus_transaction *t, int abort)
EXPORT_SYMBOL(xenbus_transaction_end);
/* Single read and scanf: returns -errno or num scanned. */
-int xenbus_scanf(struct xenbus_transaction *t,
+int xenbus_scanf(xenbus_transaction_t t,
const char *dir, const char *node, const char *fmt, ...)
{
va_list ap;
@@ -489,7 +487,7 @@ int xenbus_scanf(struct xenbus_transaction *t,
EXPORT_SYMBOL(xenbus_scanf);
/* Single printf and write: returns -errno or 0. */
-int xenbus_printf(struct xenbus_transaction *t,
+int xenbus_printf(xenbus_transaction_t t,
const char *dir, const char *node, const char *fmt, ...)
{
va_list ap;
@@ -515,7 +513,7 @@ int xenbus_printf(struct xenbus_transaction *t,
EXPORT_SYMBOL(xenbus_printf);
/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
-int xenbus_gather(struct xenbus_transaction *t, const char *dir, ...)
+int xenbus_gather(xenbus_transaction_t t, const char *dir, ...)
{
va_list ap;
const char *name;
@@ -553,7 +551,7 @@ static int xs_watch(const char *path, const char *token)
iov[1].iov_base = (void *)token;
iov[1].iov_len = strlen(token) + 1;
- return xs_error(xs_talkv(NULL, XS_WATCH, iov,
+ return xs_error(xs_talkv(XBT_NULL, XS_WATCH, iov,
ARRAY_SIZE(iov), NULL));
}
@@ -566,7 +564,7 @@ static int xs_unwatch(const char *path, const char *token)
iov[1].iov_base = (char *)token;
iov[1].iov_len = strlen(token) + 1;
- return xs_error(xs_talkv(NULL, XS_UNWATCH, iov,
+ return xs_error(xs_talkv(XBT_NULL, XS_UNWATCH, iov,
ARRAY_SIZE(iov), NULL));
}
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h
index 6fc4203116..04edbd3741 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h
@@ -32,6 +32,7 @@
#include <asm-xen/xen-public/xen.h>
#include <asm-xen/xen-public/sched.h>
+#include <asm-xen/xen-public/nmi.h>
#define _hypercall0(type, name) \
({ \
@@ -300,6 +301,14 @@ HYPERVISOR_suspend(
SHUTDOWN_suspend, srec);
}
+static inline int
+HYPERVISOR_nmi_op(
+ unsigned long op,
+ unsigned long arg)
+{
+ return _hypercall2(int, nmi_op, op, arg);
+}
+
#endif /* __HYPERCALL_H__ */
/*
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/mach_traps.h b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/mach_traps.h
new file mode 100644
index 0000000000..14c607962b
--- /dev/null
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/mach_traps.h
@@ -0,0 +1,33 @@
+/*
+ * include/asm-xen/asm-i386/mach-xen/mach_traps.h
+ *
+ * Machine specific NMI handling for Xen
+ */
+#ifndef _MACH_TRAPS_H
+#define _MACH_TRAPS_H
+
+#include <linux/bitops.h>
+#include <asm-xen/xen-public/nmi.h>
+
+static inline void clear_mem_error(unsigned char reason) {}
+static inline void clear_io_check_error(unsigned char reason) {}
+
+static inline unsigned char get_nmi_reason(void)
+{
+ shared_info_t *s = HYPERVISOR_shared_info;
+ unsigned char reason = 0;
+
+ /* construct a value which looks like it came from
+ * port 0x61.
+ */
+ if (test_bit(_XEN_NMIREASON_io_error, &s->arch.nmi_reason))
+ reason |= 0x40;
+ if (test_bit(_XEN_NMIREASON_parity_error, &s->arch.nmi_reason))
+ reason |= 0x80;
+
+ return reason;
+}
+
+static inline void reassert_nmi(void) {}
+
+#endif /* !_MACH_TRAPS_H */
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
index b30853d2cf..fa7d6191b1 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
@@ -29,6 +29,7 @@ void __init machine_specific_modify_cpu_capabilities(struct cpuinfo_x86 *c)
extern void hypervisor_callback(void);
extern void failsafe_callback(void);
+extern void nmi(void);
static void __init machine_specific_arch_setup(void)
{
@@ -36,5 +37,7 @@ static void __init machine_specific_arch_setup(void)
__KERNEL_CS, (unsigned long)hypervisor_callback,
__KERNEL_CS, (unsigned long)failsafe_callback);
+ HYPERVISOR_nmi_op(XENNMI_register_callback, (unsigned long)&nmi);
+
machine_specific_modify_cpu_capabilities(&boot_cpu_data);
}
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h
index bb338772d0..521f004c00 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h
@@ -287,9 +287,9 @@ HYPERVISOR_vcpu_op(
}
static inline int
-HYPERVISOR_switch_to_user(void)
+HYPERVISOR_iret(void)
{
- return _hypercall0(int, switch_to_user);
+ return _hypercall0(int, iret);
}
static inline int
@@ -307,6 +307,14 @@ HYPERVISOR_suspend(
SHUTDOWN_suspend, srec);
}
+static inline int
+HYPERVISOR_nmi_op(
+ unsigned long op,
+ unsigned long arg)
+{
+ return _hypercall2(int, nmi_op, op, arg);
+}
+
#endif /* __HYPERCALL_H__ */
/*
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h
index 7be26e8660..84186bbe96 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h
@@ -35,6 +35,7 @@ void __init machine_specific_modify_cpu_capabilities(struct cpuinfo_x86 *c)
extern void hypervisor_callback(void);
extern void failsafe_callback(void);
+extern void nmi(void);
static void __init machine_specific_arch_setup(void)
{
@@ -43,5 +44,9 @@ static void __init machine_specific_arch_setup(void)
(unsigned long) failsafe_callback,
(unsigned long) system_call);
+#ifdef CONFIG_X86_LOCAL_APIC
+ HYPERVISOR_nmi_op(XENNMI_register_callback, (unsigned long)&nmi);
+#endif
+
machine_specific_modify_cpu_capabilities(&boot_cpu_data);
}
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/nmi.h b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/nmi.h
new file mode 100644
index 0000000000..1c5d28dad2
--- /dev/null
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/nmi.h
@@ -0,0 +1,75 @@
+/*
+ * linux/include/asm-i386/nmi.h
+ */
+#ifndef ASM_NMI_H
+#define ASM_NMI_H
+
+#include <linux/pm.h>
+
+#include <asm-xen/xen-public/nmi.h>
+
+struct pt_regs;
+
+typedef int (*nmi_callback_t)(struct pt_regs * regs, int cpu);
+
+/**
+ * set_nmi_callback
+ *
+ * Set a handler for an NMI. Only one handler may be
+ * set. Return 1 if the NMI was handled.
+ */
+void set_nmi_callback(nmi_callback_t callback);
+
+/**
+ * unset_nmi_callback
+ *
+ * Remove the handler previously set.
+ */
+void unset_nmi_callback(void);
+
+#ifdef CONFIG_PM
+
+/** Replace the PM callback routine for NMI. */
+struct pm_dev * set_nmi_pm_callback(pm_callback callback);
+
+/** Unset the PM callback routine back to the default. */
+void unset_nmi_pm_callback(struct pm_dev * dev);
+
+#else
+
+static inline struct pm_dev * set_nmi_pm_callback(pm_callback callback)
+{
+ return 0;
+}
+
+static inline void unset_nmi_pm_callback(struct pm_dev * dev)
+{
+}
+
+#endif /* CONFIG_PM */
+
+extern void default_do_nmi(struct pt_regs *);
+extern void die_nmi(char *str, struct pt_regs *regs);
+
+static inline unsigned char get_nmi_reason(void)
+{
+ shared_info_t *s = HYPERVISOR_shared_info;
+ unsigned char reason = 0;
+
+ /* construct a value which looks like it came from
+ * port 0x61.
+ */
+ if (test_bit(_XEN_NMIREASON_io_error, &s->arch.nmi_reason))
+ reason |= 0x40;
+ if (test_bit(_XEN_NMIREASON_parity_error, &s->arch.nmi_reason))
+ reason |= 0x80;
+
+ return reason;
+}
+
+extern int panic_on_timeout;
+extern int unknown_nmi_panic;
+
+extern int check_nmi_watchdog(void);
+
+#endif /* ASM_NMI_H */
diff --git a/linux-2.6-xen-sparse/include/asm-xen/xenbus.h b/linux-2.6-xen-sparse/include/asm-xen/xenbus.h
index 5c695b9388..5534fbe99c 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/xenbus.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/xenbus.h
@@ -37,6 +37,8 @@
#include <asm-xen/xen-public/io/xenbus.h>
#include <asm-xen/xen-public/io/xs_wire.h>
+#define XBT_NULL 0
+
/* Register callback to watch this node. */
struct xenbus_watch
{
@@ -100,35 +102,35 @@ int xenbus_register_frontend(struct xenbus_driver *drv);
int xenbus_register_backend(struct xenbus_driver *drv);
void xenbus_unregister_driver(struct xenbus_driver *drv);
-struct xenbus_transaction;
+typedef u32 xenbus_transaction_t;
-char **xenbus_directory(struct xenbus_transaction *t,
+char **xenbus_directory(xenbus_transaction_t t,
const char *dir, const char *node, unsigned int *num);
-void *xenbus_read(struct xenbus_transaction *t,
+void *xenbus_read(xenbus_transaction_t t,
const char *dir, const char *node, unsigned int *len);
-int xenbus_write(struct xenbus_transaction *t,
+int xenbus_write(xenbus_transaction_t t,
const char *dir, const char *node, const char *string);
-int xenbus_mkdir(struct xenbus_transaction *t,
+int xenbus_mkdir(xenbus_transaction_t t,
const char *dir, const char *node);
-int xenbus_exists(struct xenbus_transaction *t,
+int xenbus_exists(xenbus_transaction_t t,
const char *dir, const char *node);
-int xenbus_rm(struct xenbus_transaction *t, const char *dir, const char *node);
-struct xenbus_transaction *xenbus_transaction_start(void);
-int xenbus_transaction_end(struct xenbus_transaction *t, int abort);
+int xenbus_rm(xenbus_transaction_t t, const char *dir, const char *node);
+int xenbus_transaction_start(xenbus_transaction_t *t);
+int xenbus_transaction_end(xenbus_transaction_t t, int abort);
/* Single read and scanf: returns -errno or num scanned if > 0. */
-int xenbus_scanf(struct xenbus_transaction *t,
+int xenbus_scanf(xenbus_transaction_t t,
const char *dir, const char *node, const char *fmt, ...)
__attribute__((format(scanf, 4, 5)));
/* Single printf and write: returns -errno or 0. */
-int xenbus_printf(struct xenbus_transaction *t,
+int xenbus_printf(xenbus_transaction_t t,
const char *dir, const char *node, const char *fmt, ...)
__attribute__((format(printf, 4, 5)));
/* Generic read function: NULL-terminated triples of name,
* sprintf-style type string, and pointer. Returns 0 or errno.*/
-int xenbus_gather(struct xenbus_transaction *t, const char *dir, ...);
+int xenbus_gather(xenbus_transaction_t t, const char *dir, ...);
/* notifer routines for when the xenstore comes up */
int register_xenstore_notifier(struct notifier_block *nb);
@@ -194,7 +196,7 @@ int xenbus_watch_path2(struct xenbus_device *dev, const char *path,
* XenbusStateClosing, and the error will be saved in the store.
*/
int xenbus_switch_state(struct xenbus_device *dev,
- struct xenbus_transaction *xbt,
+ xenbus_transaction_t xbt,
XenbusState new_state);
diff --git a/patches/linux-2.6.12/i386-mach-io-check-nmi.patch b/patches/linux-2.6.12/i386-mach-io-check-nmi.patch
new file mode 100644
index 0000000000..3a6048833a
--- /dev/null
+++ b/patches/linux-2.6.12/i386-mach-io-check-nmi.patch
@@ -0,0 +1,43 @@
+--- ref-linux-2.6.12/arch/i386/kernel/traps.c 2005-12-19 09:23:44.000000000 +0000
++++ linux-2.6.12-xen0/arch/i386/kernel/traps.c 2006-01-05 15:51:52.000000000 +0000
+@@ -521,18 +521,11 @@
+
+ static void io_check_error(unsigned char reason, struct pt_regs * regs)
+ {
+- unsigned long i;
+-
+ printk("NMI: IOCK error (debug interrupt?)\n");
+ show_registers(regs);
+
+ /* Re-enable the IOCK line, wait for a few seconds */
+- reason = (reason & 0xf) | 8;
+- outb(reason, 0x61);
+- i = 2000;
+- while (--i) udelay(1000);
+- reason &= ~8;
+- outb(reason, 0x61);
++ clear_io_check_error(reason);
+ }
+
+ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+--- ref-linux-2.6.12/include/asm-i386/mach-default/mach_traps.h 2005-06-17 20:48:29.000000000 +0100
++++ linux-2.6.12-xen0/include/asm-i386/mach-default/mach_traps.h 2006-01-05 15:52:33.000000000 +0000
+@@ -15,6 +15,18 @@
+ outb(reason, 0x61);
+ }
+
++static inline void clear_io_check_error(unsigned char reason)
++{
++ unsigned long i;
++
++ reason = (reason & 0xf) | 8;
++ outb(reason, 0x61);
++ i = 2000;
++ while (--i) udelay(1000);
++ reason &= ~8;
++ outb(reason, 0x61);
++}
++
+ static inline unsigned char get_nmi_reason(void)
+ {
+ return inb(0x61);
diff --git a/tools/Makefile b/tools/Makefile
index c55fb5dfb0..c2f22da959 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -12,6 +12,7 @@ SUBDIRS += firmware
SUBDIRS += security
SUBDIRS += console
SUBDIRS += xenmon
+SUBDIRS += guest-headers
ifeq ($(VTPM_TOOLS),y)
SUBDIRS += vtpm_manager
SUBDIRS += vtpm
diff --git a/tools/Rules.mk b/tools/Rules.mk
index 2a003b1d7a..4672a40888 100644
--- a/tools/Rules.mk
+++ b/tools/Rules.mk
@@ -35,6 +35,8 @@ mk-symlinks: LINUX_ROOT=$(XEN_ROOT)/linux-2.6-xen-sparse
mk-symlinks:
mkdir -p xen
( cd xen && ln -sf ../$(XEN_ROOT)/xen/include/public/*.h . )
+ mkdir -p xen/hvm
+ ( cd xen/hvm && ln -sf ../../$(XEN_ROOT)/xen/include/public/hvm/*.h . )
mkdir -p xen/io
( cd xen/io && ln -sf ../../$(XEN_ROOT)/xen/include/public/io/*.h . )
mkdir -p xen/linux
diff --git a/tools/console/client/main.c b/tools/console/client/main.c
index 0760f0cb9c..f500a593ad 100644
--- a/tools/console/client/main.c
+++ b/tools/console/client/main.c
@@ -220,7 +220,7 @@ int main(int argc, char **argv)
if (path == NULL)
err(ENOMEM, "realloc");
strcat(path, "/console/tty");
- str_pty = xs_read(xs, NULL, path, &len);
+ str_pty = xs_read(xs, XBT_NULL, path, &len);
/* FIXME consoled currently does not assume domain-0 doesn't have a
console which is good when we break domain-0 up. To keep us
@@ -245,7 +245,7 @@ int main(int argc, char **argv)
struct timeval tv = { 0, 500 };
select(0, NULL, NULL, NULL, &tv); /* pause briefly */
- str_pty = xs_read(xs, NULL, path, &len);
+ str_pty = xs_read(xs, XBT_NULL, path, &len);
}
if (str_pty == NULL) {
diff --git a/tools/console/daemon/io.c b/tools/console/daemon/io.c
index 4587c5f7db..8b187e5e82 100644
--- a/tools/console/daemon/io.c
+++ b/tools/console/daemon/io.c
@@ -174,7 +174,7 @@ static int domain_create_tty(struct domain *dom)
success = asprintf(&path, "%s/limit", dom->conspath) != -1;
if (!success)
goto out;
- data = xs_read(xs, NULL, path, &len);
+ data = xs_read(xs, XBT_NULL, path, &len);
if (data) {
dom->buffer.max_capacity = strtoul(data, 0, 0);
free(data);
@@ -184,7 +184,7 @@ static int domain_create_tty(struct domain *dom)
success = asprintf(&path, "%s/tty", dom->conspath) != -1;
if (!success)
goto out;
- success = xs_write(xs, NULL, path, slave, strlen(slave));
+ success = xs_write(xs, XBT_NULL, path, slave, strlen(slave));
free(path);
if (!success)
goto out;
@@ -214,7 +214,7 @@ int xs_gather(struct xs_handle *xs, const char *dir, ...)
char *p;
asprintf(&path, "%s/%s", dir, name);
- p = xs_read(xs, NULL, path, NULL);
+ p = xs_read(xs, XBT_NULL, path, NULL);
free(path);
if (p == NULL) {
ret = ENOENT;
diff --git a/tools/debugger/libxendebug/xendebug.c b/tools/debugger/libxendebug/xendebug.c
index cb939a7424..7a9deaa07f 100644
--- a/tools/debugger/libxendebug/xendebug.c
+++ b/tools/debugger/libxendebug/xendebug.c
@@ -119,8 +119,8 @@ xendebug_get_context (int xc_handle, uint32_t domid, uint32_t vcpu)
if ( !ctxt->valid[vcpu] )
{
- if ( (rc = xc_domain_get_vcpu_context(xc_handle, domid, vcpu,
- &ctxt->context[vcpu])) )
+ if ( (rc = xc_vcpu_getcontext(xc_handle, domid, vcpu,
+ &ctxt->context[vcpu])) )
return NULL;
ctxt->valid[vcpu] = true;
@@ -139,10 +139,10 @@ xendebug_set_context (int xc_handle, domain_context_p ctxt, uint32_t vcpu)
return -EINVAL;
op.interface_version = DOM0_INTERFACE_VERSION;
- op.cmd = DOM0_SETDOMAININFO;
- op.u.setdomaininfo.domain = ctxt->domid;
- op.u.setdomaininfo.vcpu = vcpu;
- op.u.setdomaininfo.ctxt = &ctxt->context[vcpu];
+ op.cmd = DOM0_SETVCPUCONTEXT;
+ op.u.setvcpucontext.domain = ctxt->domid;
+ op.u.setvcpucontext.vcpu = vcpu;
+ op.u.setvcpucontext.ctxt = &ctxt->context[vcpu];
if ( (rc = mlock(&ctxt->context[vcpu], sizeof(vcpu_guest_context_t))) )
return rc;
diff --git a/tools/examples/network-bridge b/tools/examples/network-bridge
index e915ea95e3..652e4aacb2 100755
--- a/tools/examples/network-bridge
+++ b/tools/examples/network-bridge
@@ -68,48 +68,19 @@ pdev="p${netdev}"
vdev="veth${vifnum}"
vif0="vif0.${vifnum}"
-legacy_mask_to_prefix() {
- mask=$1
- first=${mask%%.*}
- second=${mask#*.}
- third=${second#*.}
- fourth=${third#*.}
- second=${second%%.*}
- third=${third%%.*}
- declare -i INT FULLMASK BIT
- INT=$((((($first*256)+$second)*256+$third)*256+$fourth))
- FULLMASK=4294967295
- BIT=1
- for bit in `seq 32 -1 0`; do
- if test $FULLMASK -eq $INT; then PREFIX=$bit; return; fi
- FULLMASK=$(($FULLMASK-$BIT))
- BIT=$((BIT*2))
- done
- echo "ERROR converting netmask $mask to prefix"
- exit 1
+get_ip_info() {
+ addr_pfx=`ip addr show dev $1 | egrep '^ *inet' | sed -e 's/ *inet //' -e 's/ .*//'`
+ gateway=`ip route show dev $1 | fgrep default | sed 's/default via //'`
}
-
-parse_kernel_ip() {
- if egrep 'ip=[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+:' /proc/cmdline; then
- kip=`sed -e 's!.*ip=\([0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+\):.*!\1!' /proc/cmdline`
- kmask=`sed -e 's!.*ip=[^:]*:[^:]*:[^:]*:\([^:]*\):.*!\1!' /proc/cmdline`
- kgate=`sed -e 's!.*ip=[^:]*:[^:]*:\([^:]*\):.*!\1!' /proc/cmdline`
- fi
-}
-
+
do_ifup() {
if ! ifup $1 ; then
- if [ ${kip} ] ; then
- # use the addresses we grocked from /proc/cmdline
- if [ -z "${kmask}" ]; then
- PREFIX=32
- else
- legacy_mask_to_prefix ${kmask}
- fi
+ if [ ${addr_pfx} ] ; then
+ # use the info from get_ip_info()
ip addr flush $1
- ip addr add ${kip}/${PREFIX} dev $1
+ ip addr add ${addr_pfx} dev $1
ip link set dev $1 up
- [ ${kgate} ] && ip route add default via ${kgate}
+ [ ${gateway} ] && ip route add default via ${gateway}
fi
fi
}
@@ -171,7 +142,7 @@ transfer_routes () {
#
link_exists()
{
- if ip link show "$1" >&/dev/null
+ if ip link show "$1" >/dev/null 2>/dev/null
then
return 0
else
@@ -231,7 +202,7 @@ show_status () {
}
op_start () {
- if [ "${bridge}" == "null" ] ; then
+ if [ "${bridge}" = "null" ] ; then
return
fi
@@ -259,9 +230,8 @@ using loopback.nloopbacks=<N> on the domain 0 kernel command line.
preiftransfer ${netdev}
transfer_addrs ${netdev} ${vdev}
if ! ifdown ${netdev}; then
- # If ifdown fails, take the IP details from the kernel command
- # line.
- parse_kernel_ip
+ # If ifdown fails, remember the IP details.
+ get_ip_info ${netdev}
ip link set ${netdev} down
ip addr flush ${netdev}
fi
@@ -283,13 +253,13 @@ using loopback.nloopbacks=<N> on the domain 0 kernel command line.
transfer_routes ${netdev} ${bridge}
fi
- if [ ${antispoof} == 'yes' ] ; then
+ if [ ${antispoof} = 'yes' ] ; then
antispoofing
fi
}
op_stop () {
- if [ "${bridge}" == "null" ]; then
+ if [ "${bridge}" = "null" ]; then
return
fi
if ! link_exists "$bridge"; then
@@ -301,7 +271,7 @@ op_stop () {
mac=`ip link show ${netdev} | grep 'link\/ether' | sed -e 's/.*ether \(..:..:..:..:..:..\).*/\1/'`
transfer_addrs ${netdev} ${pdev}
if ! ifdown ${netdev}; then
- parse_kernel_ip
+ get_ip_info ${netdev}
fi
ip link set ${netdev} down arp off
ip link set ${netdev} addr fe:ff:ff:ff:ff:ff
diff --git a/tools/examples/xen-network-common.sh b/tools/examples/xen-network-common.sh
index 0973560dc8..0cd37e6e95 100644
--- a/tools/examples/xen-network-common.sh
+++ b/tools/examples/xen-network-common.sh
@@ -42,7 +42,7 @@ then
{
/sbin/ifup ${HWD_CONFIG_0} $1
}
-elif ! which ifup >&/dev/null
+elif ! which ifup >/dev/null 2>/dev/null
then
if [ -e /etc/conf.d/net ]
then
@@ -59,9 +59,18 @@ then
/etc/init.d/net.$1 stop
}
else
- logger -p "daemon.crit" -- \
- "You don't have ifup and don't seem to be running Gentoo either!"
- exit 1
+ preiftransfer()
+ {
+ true
+ }
+ ifup()
+ {
+ false
+ }
+ ifdown()
+ {
+ false
+ }
fi
else
preiftransfer()
diff --git a/tools/examples/xmexample.vmx b/tools/examples/xmexample.vmx
index 62767f67a8..cdd7863912 100644
--- a/tools/examples/xmexample.vmx
+++ b/tools/examples/xmexample.vmx
@@ -28,11 +28,14 @@ name = "ExampleVMXDomain"
#-----------------------------------------------------------------------------
# the number of cpus guest platform has, default=1
-vcpus=1
+#vcpus=1
# enable/disalbe vmx guest ACPI, default=0 (disabled)
#acpi=0
+# enable/disalbe vmx guest APIC, default=0 (disabled)
+#apic=0
+
# List of which CPUS this domain is allowed to use, default Xen picks
#cpus = "" # leave to Xen to pick
#cpus = "0" # all vcpus run on CPU0
diff --git a/tools/firmware/vmxassist/acpi_madt.c b/tools/firmware/vmxassist/acpi_madt.c
index 37e33e5e8a..8e86646067 100644
--- a/tools/firmware/vmxassist/acpi_madt.c
+++ b/tools/firmware/vmxassist/acpi_madt.c
@@ -17,34 +17,34 @@
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
*/
+
#include "../acpi/acpi2_0.h"
#include "../acpi/acpi_madt.h"
+#include <xen/hvm/hvm_info_table.h>
+
#define NULL ((void*)0)
extern int puts(const char *s);
-#define HVM_INFO_PAGE 0x0009F000
-#define HVM_INFO_OFFSET 0x00000800
-
-struct hvm_info_table {
- char signature[8]; /* "HVM INFO" */
- uint32_t length;
- uint8_t checksum;
- uint8_t acpi_enabled;
- uint8_t pad[2];
- uint32_t nr_vcpus;
-};
-
static struct hvm_info_table *table = NULL;
-static int
-checksum_valid(uint8_t *ptr, int len)
+static int validate_hvm_info(struct hvm_info_table *t)
{
- uint8_t sum=0;
+ char signature[] = "HVM INFO";
+ uint8_t *ptr = (uint8_t *)t;
+ uint8_t sum = 0;
int i;
- for (i = 0; i < len; i++)
+ /* strncmp(t->signature, "HVM INFO", 8) */
+ for (i = 0; i < 8; i++) {
+ if (signature[i] != t->signature[i]) {
+ puts("Bad hvm info signature\n");
+ return 0;
+ }
+ }
+
+ for (i = 0; i < t->length; i++)
sum += ptr[i];
return (sum == 0);
@@ -55,24 +55,15 @@ static struct hvm_info_table *
get_hvm_info_table(void)
{
struct hvm_info_table *t;
- char signature[] = "HVM INFO";
int i;
if (table != NULL)
return table;
- t = (struct hvm_info_table *)(HVM_INFO_PAGE + HVM_INFO_OFFSET);
+ t = (struct hvm_info_table *)HVM_INFO_PADDR;
- /* strncmp(t->signature, "HVM INFO", 8) */
- for (i = 0; i < 8; i++) {
- if (signature[i] != t->signature[i]) {
- puts("Bad hvm info signature\n");
- return NULL;
- }
- }
-
- if (!checksum_valid((uint8_t *)t, t->length)) {
- puts("Bad hvm info checksum\n");
+ if (!validate_hvm_info(t)) {
+ puts("Bad hvm info table\n");
return NULL;
}
@@ -126,10 +117,10 @@ acpi_madt_get_madt(unsigned char *acpi_start)
return madt;
}
-static void
+static void
set_checksum(void *start, int checksum_offset, int len)
{
- unsigned char sum = 0;
+ unsigned char sum = 0;
unsigned char *ptr;
ptr = start;
@@ -141,9 +132,9 @@ set_checksum(void *start, int checksum_offset, int len)
ptr[checksum_offset] = -sum;
}
-static int
+static int
acpi_madt_set_local_apics(
- int nr_vcpu,
+ int nr_vcpu,
ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *madt)
{
int i;
@@ -156,14 +147,14 @@ acpi_madt_set_local_apics(
madt->LocalApic[i].Length = sizeof (ACPI_LOCAL_APIC_STRUCTURE);
madt->LocalApic[i].AcpiProcessorId = i;
madt->LocalApic[i].ApicId = i;
- madt->LocalApic[i].Flags = 1;
+ madt->LocalApic[i].Flags = 1;
}
madt->Header.Header.Length =
- sizeof(ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE) -
+ sizeof(ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE) -
(MAX_VIRT_CPUS - nr_vcpu)* sizeof(ACPI_LOCAL_APIC_STRUCTURE);
- return 0;
+ return 0;
}
#define FIELD_OFFSET(TYPE,Field) ((unsigned int)(&(((TYPE *) 0)->Field)))
@@ -185,7 +176,7 @@ int acpi_madt_update(unsigned char *acpi_start)
madt, FIELD_OFFSET(ACPI_TABLE_HEADER, Checksum),
madt->Header.Header.Length);
- return 0;
+ return 0;
}
/*
diff --git a/tools/firmware/vmxassist/vm86.h b/tools/firmware/vmxassist/vm86.h
index 2b6c64d183..d9798bce7e 100644
--- a/tools/firmware/vmxassist/vm86.h
+++ b/tools/firmware/vmxassist/vm86.h
@@ -24,7 +24,7 @@
#include <stdint.h>
#endif
-#include <xen/vmx_assist.h>
+#include <xen/hvm/vmx_assist.h>
#define NR_EXCEPTION_HANDLER 32
#define NR_INTERRUPT_HANDLERS 16
diff --git a/tools/guest-headers/Makefile b/tools/guest-headers/Makefile
new file mode 100644
index 0000000000..2033cb2caf
--- /dev/null
+++ b/tools/guest-headers/Makefile
@@ -0,0 +1,13 @@
+
+XEN_ROOT=../..
+linuxsparsetree = $(XEN_ROOT)/linux-2.6-xen-sparse
+
+all:
+
+check:
+
+install:
+ mkdir -p $(DESTDIR)/usr/include/xen/linux
+ install -m0644 $(linuxsparsetree)/include/asm-xen/linux-public/*.h $(DESTDIR)/usr/include/xen/linux
+
+clean:
diff --git a/tools/ioemu/hw/i8254.c b/tools/ioemu/hw/i8254.c
index 111a6c6144..226db96962 100644
--- a/tools/ioemu/hw/i8254.c
+++ b/tools/ioemu/hw/i8254.c
@@ -23,7 +23,7 @@
*/
#include "vl.h"
#include <xenctrl.h>
-#include <xen/io/ioreq.h>
+#include <xen/hvm/ioreq.h>
//#define DEBUG_PIT
diff --git a/tools/ioemu/hw/i8259.c b/tools/ioemu/hw/i8259.c
index 8009eea130..6385bca28f 100644
--- a/tools/ioemu/hw/i8259.c
+++ b/tools/ioemu/hw/i8259.c
@@ -23,7 +23,7 @@
*/
#include "vl.h"
#include <xenctrl.h>
-#include <xen/io/ioreq.h>
+#include <xen/hvm/ioreq.h>
/* debug PIC */
//#define DEBUG_PIC
diff --git a/tools/ioemu/hw/i8259_stub.c b/tools/ioemu/hw/i8259_stub.c
index 7dd0062803..1ca5dc9cf3 100644
--- a/tools/ioemu/hw/i8259_stub.c
+++ b/tools/ioemu/hw/i8259_stub.c
@@ -22,7 +22,7 @@
* THE SOFTWARE.
*/
#include "xenctrl.h"
-#include <xen/io/ioreq.h>
+#include <xen/hvm/ioreq.h>
#include <stdio.h>
#include "cpu.h"
#include "cpu-all.h"
diff --git a/tools/ioemu/target-i386-dm/helper2.c b/tools/ioemu/target-i386-dm/helper2.c
index d2a618c580..4d3088ce3b 100644
--- a/tools/ioemu/target-i386-dm/helper2.c
+++ b/tools/ioemu/target-i386-dm/helper2.c
@@ -48,7 +48,7 @@
#include <sys/ioctl.h>
#include <xenctrl.h>
-#include <xen/io/ioreq.h>
+#include <xen/hvm/ioreq.h>
#include <xen/linux/evtchn.h>
#include "cpu.h"
diff --git a/tools/libxc/xc_core.c b/tools/libxc/xc_core.c
index 207fbe8fb1..43ac87d47f 100644
--- a/tools/libxc/xc_core.c
+++ b/tools/libxc/xc_core.c
@@ -55,7 +55,7 @@ xc_domain_dumpcore(int xc_handle,
}
for (i = 0; i < info.max_vcpu_id; i++)
- if (xc_domain_get_vcpu_context(xc_handle, domid,
+ if (xc_vcpu_getcontext(xc_handle, domid,
i, &ctxt[nr_vcpus]) == 0)
nr_vcpus++;
diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index b018318d62..aae3236695 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -58,16 +58,16 @@ int xc_domain_destroy(int xc_handle,
return do_dom0_op(xc_handle, &op);
}
-int xc_domain_pincpu(int xc_handle,
- uint32_t domid,
- int vcpu,
- cpumap_t cpumap)
+int xc_vcpu_setaffinity(int xc_handle,
+ uint32_t domid,
+ int vcpu,
+ cpumap_t cpumap)
{
DECLARE_DOM0_OP;
- op.cmd = DOM0_PINCPUDOMAIN;
- op.u.pincpudomain.domain = (domid_t)domid;
- op.u.pincpudomain.vcpu = vcpu;
- op.u.pincpudomain.cpumap = cpumap;
+ op.cmd = DOM0_SETVCPUAFFINITY;
+ op.u.setvcpuaffinity.domain = (domid_t)domid;
+ op.u.setvcpuaffinity.vcpu = vcpu;
+ op.u.setvcpuaffinity.cpumap = cpumap;
return do_dom0_op(xc_handle, &op);
}
@@ -155,7 +155,7 @@ int xc_domain_getinfolist(int xc_handle,
return ret;
}
-int xc_domain_get_vcpu_context(int xc_handle,
+int xc_vcpu_getcontext(int xc_handle,
uint32_t domid,
uint32_t vcpu,
vcpu_guest_context_t *ctxt)
@@ -345,10 +345,10 @@ int xc_domain_sethandle(int xc_handle, uint32_t domid,
return do_dom0_op(xc_handle, &op);
}
-int xc_domain_get_vcpu_info(int xc_handle,
- uint32_t domid,
- uint32_t vcpu,
- xc_vcpuinfo_t *info)
+int xc_vcpu_getinfo(int xc_handle,
+ uint32_t domid,
+ uint32_t vcpu,
+ xc_vcpuinfo_t *info)
{
int rc;
DECLARE_DOM0_OP;
@@ -380,18 +380,18 @@ int xc_domain_ioport_permission(int xc_handle,
return do_dom0_op(xc_handle, &op);
}
-int xc_domain_setinfo(int xc_handle,
- uint32_t domid,
- uint32_t vcpu,
- vcpu_guest_context_t *ctxt)
+int xc_vcpu_setcontext(int xc_handle,
+ uint32_t domid,
+ uint32_t vcpu,
+ vcpu_guest_context_t *ctxt)
{
dom0_op_t op;
int rc;
- op.cmd = DOM0_SETDOMAININFO;
- op.u.setdomaininfo.domain = domid;
- op.u.setdomaininfo.vcpu = vcpu;
- op.u.setdomaininfo.ctxt = ctxt;
+ op.cmd = DOM0_SETVCPUCONTEXT;
+ op.u.setvcpucontext.domain = domid;
+ op.u.setvcpucontext.vcpu = vcpu;
+ op.u.setvcpucontext.ctxt = ctxt;
if ( (rc = mlock(ctxt, sizeof(*ctxt))) != 0 )
return rc;
diff --git a/tools/libxc/xc_ia64_stubs.c b/tools/libxc/xc_ia64_stubs.c
index c48f3a3044..a10992be8e 100644
--- a/tools/libxc/xc_ia64_stubs.c
+++ b/tools/libxc/xc_ia64_stubs.c
@@ -5,7 +5,7 @@
#include <stdlib.h>
#include <zlib.h>
#include "xen/arch-ia64.h"
-#include <xen/io/ioreq.h>
+#include <xen/hvm/ioreq.h>
/* this is a very ugly way of getting FPSR_DEFAULT. struct ia64_fpreg is
* mysteriously declared in two places: /usr/include/asm/fpu.h and
@@ -23,7 +23,8 @@ unsigned long xc_ia64_fpsr_default(void)
}
int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
- uint32_t max_factor, uint32_t flags)
+ uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
+ int (*suspend)(int domid))
{
PERROR("xc_linux_save not implemented\n");
return -1;
@@ -664,7 +665,7 @@ int xc_vmx_build(int xc_handle,
goto error_out;
}
- if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) ){
+ if ( xc_vcpu_getcontext(xc_handle, domid, 0, ctxt) ){
PERROR("Could not get vcpu context");
goto error_out;
}
@@ -688,11 +689,11 @@ int xc_vmx_build(int xc_handle,
memset( &launch_op, 0, sizeof(launch_op) );
- launch_op.u.setdomaininfo.domain = (domid_t)domid;
- launch_op.u.setdomaininfo.vcpu = 0;
- launch_op.u.setdomaininfo.ctxt = ctxt;
+ launch_op.u.setvcpucontext.domain = (domid_t)domid;
+ launch_op.u.setvcpucontext.vcpu = 0;
+ launch_op.u.setvcpucontext.ctxt = ctxt;
- launch_op.cmd = DOM0_SETDOMAININFO;
+ launch_op.cmd = DOM0_SETVCPUCONTEXT;
rc = do_dom0_op(xc_handle, &launch_op);
return rc;
diff --git a/tools/libxc/xc_linux_build.c b/tools/libxc/xc_linux_build.c
index f684b7185f..5312b42bba 100644
--- a/tools/libxc/xc_linux_build.c
+++ b/tools/libxc/xc_linux_build.c
@@ -402,8 +402,11 @@ static int setup_guest(int xc_handle,
ctxt->initrd.start = 0;
ctxt->initrd.size = 0;
}
- strncpy((char *)ctxt->cmdline, cmdline, IA64_COMMAND_LINE_SIZE);
- ctxt->cmdline[IA64_COMMAND_LINE_SIZE-1] = '\0';
+ if ( cmdline != NULL )
+ {
+ strncpy((char *)ctxt->cmdline, cmdline, IA64_COMMAND_LINE_SIZE);
+ ctxt->cmdline[IA64_COMMAND_LINE_SIZE-1] = '\0';
+ }
munmap(start_info, PAGE_SIZE);
free(page_array);
@@ -693,8 +696,11 @@ static int setup_guest(int xc_handle,
start_info->mod_start = vinitrd_start;
start_info->mod_len = initrd_len;
}
- strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE);
- start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0';
+ if ( cmdline != NULL )
+ {
+ strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE);
+ start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0';
+ }
munmap(start_info, PAGE_SIZE);
/* shared_info page starts its life empty. */
@@ -794,7 +800,7 @@ int xc_linux_build(int xc_handle,
goto error_out;
}
- if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
+ if ( xc_vcpu_getcontext(xc_handle, domid, 0, ctxt) )
{
PERROR("Could not get vcpu context");
goto error_out;
@@ -897,11 +903,11 @@ int xc_linux_build(int xc_handle,
memset( &launch_op, 0, sizeof(launch_op) );
- launch_op.u.setdomaininfo.domain = (domid_t)domid;
- launch_op.u.setdomaininfo.vcpu = 0;
- launch_op.u.setdomaininfo.ctxt = ctxt;
+ launch_op.u.setvcpucontext.domain = (domid_t)domid;
+ launch_op.u.setvcpucontext.vcpu = 0;
+ launch_op.u.setvcpucontext.ctxt = ctxt;
- launch_op.cmd = DOM0_SETDOMAININFO;
+ launch_op.cmd = DOM0_SETVCPUCONTEXT;
rc = xc_dom0_op(xc_handle, &launch_op);
return rc;
diff --git a/tools/libxc/xc_linux_restore.c b/tools/libxc/xc_linux_restore.c
index 95ead19389..c91efce4b7 100644
--- a/tools/libxc/xc_linux_restore.c
+++ b/tools/libxc/xc_linux_restore.c
@@ -171,7 +171,7 @@ int xc_linux_restore(int xc_handle, int io_fd,
/* Only have to worry about vcpu 0 even for SMP */
- if (xc_domain_get_vcpu_context( xc_handle, dom, 0, &ctxt)) {
+ if (xc_vcpu_getcontext( xc_handle, dom, 0, &ctxt)) {
ERR("Could not get vcpu context");
goto out;
}
@@ -735,10 +735,10 @@ int xc_linux_restore(int xc_handle, int io_fd,
DPRINTF("Domain ready to be built.\n");
- op.cmd = DOM0_SETDOMAININFO;
- op.u.setdomaininfo.domain = (domid_t)dom;
- op.u.setdomaininfo.vcpu = 0;
- op.u.setdomaininfo.ctxt = &ctxt;
+ op.cmd = DOM0_SETVCPUCONTEXT;
+ op.u.setvcpucontext.domain = (domid_t)dom;
+ op.u.setvcpucontext.vcpu = 0;
+ op.u.setvcpucontext.ctxt = &ctxt;
rc = xc_dom0_op(xc_handle, &op);
if (rc != 0) {
diff --git a/tools/libxc/xc_linux_save.c b/tools/libxc/xc_linux_save.c
index 1c32ed8a80..927cc6cbd5 100644
--- a/tools/libxc/xc_linux_save.c
+++ b/tools/libxc/xc_linux_save.c
@@ -357,21 +357,14 @@ static int analysis_phase(int xc_handle, uint32_t domid, int max_pfn,
}
-static int suspend_and_state(int xc_handle, int io_fd, int dom,
- xc_dominfo_t *info,
+static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
+ int dom, xc_dominfo_t *info,
vcpu_guest_context_t *ctxt)
{
int i = 0;
- char ans[30];
- printf("suspend\n");
- fflush(stdout);
- if (fgets(ans, sizeof(ans), stdin) == NULL) {
- ERR("failed reading suspend reply");
- return -1;
- }
- if (strncmp(ans, "done\n", 5)) {
- ERR("suspend reply incorrect: %s", ans);
+ if (!(*suspend)(dom)) {
+ ERR("Suspend request failed");
return -1;
}
@@ -382,7 +375,7 @@ static int suspend_and_state(int xc_handle, int io_fd, int dom,
return -1;
}
- if ( xc_domain_get_vcpu_context(xc_handle, dom, 0 /* XXX */, ctxt))
+ if ( xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, ctxt))
ERR("Could not get vcpu context");
@@ -568,7 +561,7 @@ static unsigned long *xc_map_m2p(int xc_handle,
int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
- uint32_t max_factor, uint32_t flags)
+ uint32_t max_factor, uint32_t flags, int (*suspend)(int))
{
xc_dominfo_t info;
@@ -643,7 +636,7 @@ int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
}
/* Only have to worry about vcpu 0 even for SMP */
- if (xc_domain_get_vcpu_context(xc_handle, dom, 0, &ctxt)) {
+ if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) {
ERR("Could not get vcpu context");
goto out;
}
@@ -748,7 +741,7 @@ int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
last_iter = 1;
- if (suspend_and_state( xc_handle, io_fd, dom, &info, &ctxt)) {
+ if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt)) {
ERR("Domain appears not to have suspended");
goto out;
}
@@ -1054,7 +1047,8 @@ int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
DPRINTF("Start last iteration\n");
last_iter = 1;
- if (suspend_and_state(xc_handle, io_fd, dom, &info, &ctxt)) {
+ if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info,
+ &ctxt)) {
ERR("Domain appears not to have suspended");
goto out;
}
@@ -1164,6 +1158,9 @@ int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
if (live_shinfo)
munmap(live_shinfo, PAGE_SIZE);
+ if (live_p2m_frame_list_list)
+ munmap(live_p2m_frame_list_list, PAGE_SIZE);
+
if (live_p2m_frame_list)
munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE);
diff --git a/tools/libxc/xc_pagetab.c b/tools/libxc/xc_pagetab.c
index b63ea89ade..02a19b5527 100644
--- a/tools/libxc/xc_pagetab.c
+++ b/tools/libxc/xc_pagetab.c
@@ -74,7 +74,7 @@ unsigned long xc_translate_foreign_address(int xc_handle, uint32_t dom,
#define pt_levels 4
#endif
- if (xc_domain_get_vcpu_context(xc_handle, dom, vcpu, &ctx) != 0) {
+ if (xc_vcpu_getcontext(xc_handle, dom, vcpu, &ctx) != 0) {
fprintf(stderr, "failed to retreive vcpu context\n");
goto out;
}
diff --git a/tools/libxc/xc_ptrace.c b/tools/libxc/xc_ptrace.c
index 0bf365a314..ac30269cfb 100644
--- a/tools/libxc/xc_ptrace.c
+++ b/tools/libxc/xc_ptrace.c
@@ -33,7 +33,7 @@ fetch_regs(int xc_handle, int cpu, int *online)
if (online)
*online = 0;
if ( !(regs_valid & (1 << cpu)) ) {
- retval = xc_domain_get_vcpu_context(xc_handle, current_domid,
+ retval = xc_vcpu_getcontext(xc_handle, current_domid,
cpu, &ctxt[cpu]);
if ( retval )
goto done;
@@ -43,8 +43,7 @@ fetch_regs(int xc_handle, int cpu, int *online)
if ( online == NULL )
goto done;
- retval = xc_domain_get_vcpu_info(xc_handle, current_domid,
- cpu, &info);
+ retval = xc_vcpu_getinfo(xc_handle, current_domid, cpu, &info);
*online = info.online;
done:
@@ -395,7 +394,7 @@ xc_ptrace(
case PTRACE_SETREGS:
SET_XC_REGS(((struct gdb_regs *)data), ctxt[cpu].user_regs);
- retval = xc_domain_setinfo(xc_handle, current_domid, cpu, &ctxt[cpu]);
+ retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu, &ctxt[cpu]);
if (retval)
goto error_out;
break;
@@ -405,7 +404,7 @@ xc_ptrace(
* during single-stepping - but that just seems retarded
*/
ctxt[cpu].user_regs.eflags |= PSL_T;
- retval = xc_domain_setinfo(xc_handle, current_domid, cpu, &ctxt[cpu]);
+ retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu, &ctxt[cpu]);
if ( retval )
{
perror("dom0 op failed");
@@ -423,8 +422,8 @@ xc_ptrace(
/* Clear trace flag */
if ( ctxt[cpu].user_regs.eflags & PSL_T ) {
ctxt[cpu].user_regs.eflags &= ~PSL_T;
- retval = xc_domain_setinfo(xc_handle, current_domid,
- cpu, &ctxt[cpu]);
+ retval = xc_vcpu_setcontext(xc_handle, current_domid,
+ cpu, &ctxt[cpu]);
if ( retval ) {
perror("dom0 op failed");
goto error_out;
diff --git a/tools/libxc/xc_vmx_build.c b/tools/libxc/xc_vmx_build.c
index 7316d855b1..ee18a4f8a2 100644
--- a/tools/libxc/xc_vmx_build.c
+++ b/tools/libxc/xc_vmx_build.c
@@ -9,7 +9,8 @@
#include <stdlib.h>
#include <unistd.h>
#include <zlib.h>
-#include <xen/io/ioreq.h>
+#include <xen/hvm/hvm_info_table.h>
+#include <xen/hvm/ioreq.h>
#define VMX_LOADER_ENTR_ADDR 0x00100000
@@ -33,18 +34,6 @@
#define E820_MAP_NR_OFFSET 0x000001E8
#define E820_MAP_OFFSET 0x000002D0
-#define HVM_INFO_PAGE 0x0009F000
-#define HVM_INFO_OFFSET 0x00000800
-
-struct hvm_info_table {
- char signature[8]; /* "HVM INFO" */
- uint32_t length;
- uint8_t checksum;
- uint8_t acpi_enabled;
- uint8_t pad[2];
- uint32_t nr_vcpus;
-};
-
struct e820entry {
uint64_t addr;
uint64_t size;
@@ -128,7 +117,7 @@ static unsigned char build_e820map(void *e820_page, unsigned long mem_size)
return (*(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map);
}
-static void
+static void
set_hvm_info_checksum(struct hvm_info_table *t)
{
uint8_t *ptr = (uint8_t *)t, sum = 0;
@@ -148,14 +137,18 @@ set_hvm_info_checksum(struct hvm_info_table *t)
*/
static int set_hvm_info(int xc_handle, uint32_t dom,
unsigned long *pfn_list, unsigned int vcpus,
- unsigned int acpi)
+ unsigned int acpi, unsigned int apic)
{
char *va_map;
struct hvm_info_table *va_hvm;
- va_map = xc_map_foreign_range(xc_handle, dom,
- PAGE_SIZE, PROT_READ|PROT_WRITE,
- pfn_list[HVM_INFO_PAGE >> PAGE_SHIFT]);
+ va_map = xc_map_foreign_range(
+ xc_handle,
+ dom,
+ PAGE_SIZE,
+ PROT_READ|PROT_WRITE,
+ pfn_list[HVM_INFO_PFN]);
+
if ( va_map == NULL )
return -1;
@@ -164,8 +157,9 @@ static int set_hvm_info(int xc_handle, uint32_t dom,
strncpy(va_hvm->signature, "HVM INFO", 8);
va_hvm->length = sizeof(struct hvm_info_table);
va_hvm->acpi_enabled = acpi;
+ va_hvm->apic_enabled = apic;
va_hvm->nr_vcpus = vcpus;
-
+
set_hvm_info_checksum(va_hvm);
munmap(va_map, PAGE_SIZE);
@@ -307,9 +301,9 @@ static int setup_guest(int xc_handle,
vcpu_guest_context_t *ctxt,
unsigned long shared_info_frame,
unsigned int control_evtchn,
- unsigned int lapic,
unsigned int vcpus,
unsigned int acpi,
+ unsigned int apic,
unsigned int store_evtchn,
unsigned long *store_mfn)
{
@@ -519,20 +513,14 @@ static int setup_guest(int xc_handle,
goto error_out;
}
- if (set_hvm_info(xc_handle, dom, page_array, vcpus, acpi)) {
+ if ( set_hvm_info(xc_handle, dom, page_array, vcpus, acpi, apic) ) {
fprintf(stderr, "Couldn't set hvm info for VMX guest.\n");
goto error_out;
}
- *store_mfn = page_array[(v_end-2) >> PAGE_SHIFT];
- if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) )
- goto error_out;
-
- shared_page_frame = (v_end - PAGE_SIZE) >> PAGE_SHIFT;
-
- if ((e820_page = xc_map_foreign_range(
- xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
- page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0)
+ if ( (e820_page = xc_map_foreign_range(
+ xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+ page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0 )
goto error_out;
memset(e820_page, 0, PAGE_SIZE);
e820_map_nr = build_e820map(e820_page, v_end);
@@ -547,26 +535,30 @@ static int setup_guest(int xc_handle,
munmap(e820_page, PAGE_SIZE);
/* shared_info page starts its life empty. */
- if ((shared_info = xc_map_foreign_range(
- xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
- shared_info_frame)) == 0)
+ if ( (shared_info = xc_map_foreign_range(
+ xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+ shared_info_frame)) == 0 )
goto error_out;
memset(shared_info, 0, sizeof(shared_info_t));
/* Mask all upcalls... */
for ( i = 0; i < MAX_VIRT_CPUS; i++ )
shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
-
munmap(shared_info, PAGE_SIZE);
/* Populate the event channel port in the shared page */
- if ((sp = (shared_iopage_t *) xc_map_foreign_range(
- xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
- page_array[shared_page_frame])) == 0)
+ shared_page_frame = page_array[(v_end >> PAGE_SHIFT) - 1];
+ if ( (sp = (shared_iopage_t *) xc_map_foreign_range(
+ xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+ shared_page_frame)) == 0 )
goto error_out;
memset(sp, 0, PAGE_SIZE);
sp->sp_global.eport = control_evtchn;
munmap(sp, PAGE_SIZE);
+ *store_mfn = page_array[(v_end >> PAGE_SHIFT) - 2];
+ if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) )
+ goto error_out;
+
/* Send the page update requests down to the hypervisor. */
if ( xc_finish_mmu_updates(xc_handle, mmu) )
goto error_out;
@@ -588,7 +580,7 @@ static int setup_guest(int xc_handle,
ctxt->user_regs.eax = 0;
ctxt->user_regs.esp = 0;
ctxt->user_regs.ebx = 0; /* startup_32 expects this to be 0 to signal boot cpu */
- ctxt->user_regs.ecx = lapic;
+ ctxt->user_regs.ecx = 0;
ctxt->user_regs.esi = 0;
ctxt->user_regs.edi = 0;
ctxt->user_regs.ebp = 0;
@@ -608,9 +600,9 @@ int xc_vmx_build(int xc_handle,
int memsize,
const char *image_name,
unsigned int control_evtchn,
- unsigned int lapic,
unsigned int vcpus,
unsigned int acpi,
+ unsigned int apic,
unsigned int store_evtchn,
unsigned long *store_mfn)
{
@@ -659,7 +651,7 @@ int xc_vmx_build(int xc_handle,
goto error_out;
}
- if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
+ if ( xc_vcpu_getcontext(xc_handle, domid, 0, ctxt) )
{
PERROR("Could not get vcpu context");
goto error_out;
@@ -674,7 +666,7 @@ int xc_vmx_build(int xc_handle,
if ( setup_guest(xc_handle, domid, memsize, image, image_size, nr_pages,
ctxt, op.u.getdomaininfo.shared_info_frame, control_evtchn,
- lapic, vcpus, acpi, store_evtchn, store_mfn) < 0)
+ vcpus, acpi, apic, store_evtchn, store_mfn) < 0)
{
ERROR("Error constructing guest OS");
goto error_out;
@@ -716,11 +708,11 @@ int xc_vmx_build(int xc_handle,
memset( &launch_op, 0, sizeof(launch_op) );
- launch_op.u.setdomaininfo.domain = (domid_t)domid;
- launch_op.u.setdomaininfo.vcpu = 0;
- launch_op.u.setdomaininfo.ctxt = ctxt;
+ launch_op.u.setvcpucontext.domain = (domid_t)domid;
+ launch_op.u.setvcpucontext.vcpu = 0;
+ launch_op.u.setvcpucontext.ctxt = ctxt;
- launch_op.cmd = DOM0_SETDOMAININFO;
+ launch_op.cmd = DOM0_SETVCPUCONTEXT;
rc = xc_dom0_op(xc_handle, &launch_op);
return rc;
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index f558b5985e..2adf825cd4 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -181,10 +181,11 @@ int xc_domain_unpause(int xc_handle,
*/
int xc_domain_destroy(int xc_handle,
uint32_t domid);
-int xc_domain_pincpu(int xc_handle,
- uint32_t domid,
- int vcpu,
- cpumap_t cpumap);
+
+int xc_vcpu_setaffinity(int xc_handle,
+ uint32_t domid,
+ int vcpu,
+ cpumap_t cpumap);
/**
* This function will return information about one or more domains. It is
@@ -208,7 +209,7 @@ int xc_domain_getinfo(int xc_handle,
/**
- * This function will set the vcpu context for the specified domain.
+ * This function will set the execution context for the specified vcpu.
*
* @parm xc_handle a handle to an open hypervisor interface
* @parm domid the domain to set the vcpu context for
@@ -216,10 +217,10 @@ int xc_domain_getinfo(int xc_handle,
* @parm ctxt pointer to the the cpu context with the values to set
* @return the number of domains enumerated or -1 on error
*/
-int xc_domain_setinfo(int xc_handle,
- uint32_t domid,
- uint32_t vcpu,
- vcpu_guest_context_t *ctxt);
+int xc_vcpu_setcontext(int xc_handle,
+ uint32_t domid,
+ uint32_t vcpu,
+ vcpu_guest_context_t *ctxt);
/**
* This function will return information about one or more domains, using a
* single hypercall. The domain information will be stored into the supplied
@@ -249,17 +250,16 @@ int xc_domain_getinfolist(int xc_handle,
* domain
* @return 0 on success, -1 on failure
*/
-int xc_domain_get_vcpu_context(int xc_handle,
+int xc_vcpu_getcontext(int xc_handle,
uint32_t domid,
uint32_t vcpu,
vcpu_guest_context_t *ctxt);
typedef dom0_getvcpuinfo_t xc_vcpuinfo_t;
-int xc_domain_get_vcpu_info(int xc_handle,
- uint32_t domid,
- uint32_t vcpu,
- xc_vcpuinfo_t *info);
-
+int xc_vcpu_getinfo(int xc_handle,
+ uint32_t domid,
+ uint32_t vcpu,
+ xc_vcpuinfo_t *info);
int xc_domain_setcpuweight(int xc_handle,
uint32_t domid,
diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h
index 4d6d80af3d..5cad964f80 100644
--- a/tools/libxc/xenguest.h
+++ b/tools/libxc/xenguest.h
@@ -21,8 +21,10 @@
* @parm dom the id of the domain
* @return 0 on success, -1 on failure
*/
-int xc_linux_save(int xc_handle, int fd, uint32_t dom, uint32_t max_iters,
- uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */);
+int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
+ uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
+ int (*suspend)(int domid));
+
/**
* This function will restore a saved domain running Linux.
@@ -56,9 +58,9 @@ int xc_vmx_build(int xc_handle,
int memsize,
const char *image_name,
unsigned int control_evtchn,
- unsigned int lapic,
unsigned int vcpus,
unsigned int acpi,
+ unsigned int apic,
unsigned int store_evtchn,
unsigned long *store_mfn);
diff --git a/tools/libxc/xg_private.c b/tools/libxc/xg_private.c
index 355ba1400d..982d8256be 100644
--- a/tools/libxc/xg_private.c
+++ b/tools/libxc/xg_private.c
@@ -17,6 +17,9 @@ char *xc_read_kernel_image(const char *filename, unsigned long *size)
char *image = NULL;
unsigned int bytes;
+ if ( filename == NULL )
+ goto out;
+
if ( (kernel_fd = open(filename, O_RDONLY)) < 0 )
{
PERROR("Could not open kernel image");
diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c
index 9a49ef9307..f935044b26 100644
--- a/tools/python/xen/lowlevel/xc/xc.c
+++ b/tools/python/xen/lowlevel/xc/xc.c
@@ -16,7 +16,7 @@
#include <netdb.h>
#include <arpa/inet.h>
-#include "xc_private.h"
+#include "xenctrl.h"
/* Needed for Python versions earlier than 2.3. */
#ifndef PyMODINIT_FUNC
@@ -135,9 +135,9 @@ static PyObject *pyxc_domain_destroy(XcObject *self, PyObject *args)
}
-static PyObject *pyxc_domain_pincpu(XcObject *self,
- PyObject *args,
- PyObject *kwds)
+static PyObject *pyxc_vcpu_setaffinity(XcObject *self,
+ PyObject *args,
+ PyObject *kwds)
{
uint32_t dom;
int vcpu = 0, i;
@@ -157,7 +157,7 @@ static PyObject *pyxc_domain_pincpu(XcObject *self,
cpumap |= (cpumap_t)1 << PyInt_AsLong(PyList_GetItem(cpulist, i));
}
- if ( xc_domain_pincpu(self->xc_handle, dom, vcpu, cpumap) != 0 )
+ if ( xc_vcpu_setaffinity(self->xc_handle, dom, vcpu, cpumap) != 0 )
return PyErr_SetFromErrno(xc_error);
Py_INCREF(zero);
@@ -297,7 +297,7 @@ static PyObject *pyxc_vcpu_getinfo(XcObject *self,
&dom, &vcpu) )
return NULL;
- rc = xc_domain_get_vcpu_info(self->xc_handle, dom, vcpu, &info);
+ rc = xc_vcpu_getinfo(self->xc_handle, dom, vcpu, &info);
if ( rc < 0 )
return PyErr_SetFromErrno(xc_error);
@@ -362,22 +362,23 @@ static PyObject *pyxc_vmx_build(XcObject *self,
uint32_t dom;
char *image;
int control_evtchn, store_evtchn;
+ int memsize;
int vcpus = 1;
- int lapic = 0;
int acpi = 0;
- int memsize;
+ int apic = 0;
unsigned long store_mfn = 0;
static char *kwd_list[] = { "dom", "control_evtchn", "store_evtchn",
- "memsize", "image", "lapic", "vcpus", "acpi",NULL };
+ "memsize", "image", "vcpus", "acpi", "apic",
+ NULL };
if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiisiii", kwd_list,
&dom, &control_evtchn, &store_evtchn,
- &memsize, &image, &lapic, &vcpus,&acpi) )
+ &memsize, &image, &vcpus, &acpi, &apic) )
return NULL;
if ( xc_vmx_build(self->xc_handle, dom, memsize, image, control_evtchn,
- lapic, vcpus, acpi, store_evtchn, &store_mfn) != 0 )
+ vcpus, acpi, apic, store_evtchn, &store_mfn) != 0 )
return PyErr_SetFromErrno(xc_error);
return Py_BuildValue("{s:i}", "store_mfn", store_mfn);
@@ -889,8 +890,8 @@ static PyMethodDef pyxc_methods[] = {
" dom [int]: Identifier of domain to be destroyed.\n\n"
"Returns: [int] 0 on success; -1 on error.\n" },
- { "domain_pincpu",
- (PyCFunction)pyxc_domain_pincpu,
+ { "vcpu_setaffinity",
+ (PyCFunction)pyxc_vcpu_setaffinity,
METH_VARARGS | METH_KEYWORDS, "\n"
"Pin a VCPU to a specified set CPUs.\n"
" dom [int]: Identifier of domain to which VCPU belongs.\n"
diff --git a/tools/python/xen/lowlevel/xs/xs.c b/tools/python/xen/lowlevel/xs/xs.c
index e473a4902b..25dfaaf2a4 100644
--- a/tools/python/xen/lowlevel/xs/xs.c
+++ b/tools/python/xen/lowlevel/xs/xs.c
@@ -66,7 +66,7 @@ static PyObject *none(bool result);
static int parse_transaction_path(XsHandle *self, PyObject *args,
struct xs_handle **xh,
- struct xs_transaction_handle **th,
+ xs_transaction_t *th,
char **path);
@@ -83,7 +83,7 @@ static int parse_transaction_path(XsHandle *self, PyObject *args,
static PyObject *xspy_read(XsHandle *self, PyObject *args)
{
struct xs_handle *xh;
- struct xs_transaction_handle *th;
+ xs_transaction_t th;
char *path;
char *xsval;
@@ -120,7 +120,7 @@ static PyObject *xspy_write(XsHandle *self, PyObject *args)
{
static char *arg_spec = "sss#";
struct xs_handle *xh = xshandle(self);
- struct xs_transaction_handle *th;
+ xs_transaction_t th;
char *thstr;
char *path;
char *data;
@@ -132,7 +132,7 @@ static PyObject *xspy_write(XsHandle *self, PyObject *args)
if (!PyArg_ParseTuple(args, arg_spec, &thstr, &path, &data, &data_n))
return NULL;
- th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
+ th = strtoul(thstr, NULL, 16);
Py_BEGIN_ALLOW_THREADS
result = xs_write(xh, th, path, data, data_n);
@@ -155,7 +155,7 @@ static PyObject *xspy_write(XsHandle *self, PyObject *args)
static PyObject *xspy_ls(XsHandle *self, PyObject *args)
{
struct xs_handle *xh;
- struct xs_transaction_handle *th;
+ xs_transaction_t th;
char *path;
char **xsval;
@@ -193,7 +193,7 @@ static PyObject *xspy_ls(XsHandle *self, PyObject *args)
static PyObject *xspy_mkdir(XsHandle *self, PyObject *args)
{
struct xs_handle *xh;
- struct xs_transaction_handle *th;
+ xs_transaction_t th;
char *path;
bool result;
@@ -221,7 +221,7 @@ static PyObject *xspy_mkdir(XsHandle *self, PyObject *args)
static PyObject *xspy_rm(XsHandle *self, PyObject *args)
{
struct xs_handle *xh;
- struct xs_transaction_handle *th;
+ xs_transaction_t th;
char *path;
bool result;
@@ -256,7 +256,7 @@ static PyObject *xspy_get_permissions(XsHandle *self, PyObject *args)
unsigned int perms_n = 0;
int i;
- struct xs_transaction_handle *th;
+ xs_transaction_t th;
char *thstr;
if (!xh)
@@ -264,7 +264,7 @@ static PyObject *xspy_get_permissions(XsHandle *self, PyObject *args)
if (!PyArg_ParseTuple(args, arg_spec, &thstr, &path))
return NULL;
- th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
+ th = strtoul(thstr, NULL, 16);
Py_BEGIN_ALLOW_THREADS
perms = xs_get_permissions(xh, th, path, &perms_n);
@@ -312,7 +312,7 @@ static PyObject *xspy_set_permissions(XsHandle *self, PyObject *args)
int xsperms_n;
PyObject *tuple0 = NULL;
- struct xs_transaction_handle *th;
+ xs_transaction_t th;
char *thstr;
if (!xh)
@@ -320,7 +320,7 @@ static PyObject *xspy_set_permissions(XsHandle *self, PyObject *args)
if (!PyArg_ParseTuple(args, "ssO", &thstr, &path, &perms))
goto exit;
- th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
+ th = strtoul(thstr, NULL, 16);
if (!PyList_Check(perms)) {
PyErr_SetString(PyExc_RuntimeError, "perms must be a list");
@@ -509,7 +509,7 @@ static PyObject *xspy_unwatch(XsHandle *self, PyObject *args)
static PyObject *xspy_transaction_start(XsHandle *self)
{
struct xs_handle *xh = xshandle(self);
- struct xs_transaction_handle *th;
+ xs_transaction_t th;
char thstr[MAX_STRLEN(unsigned long) + 1];
if (!xh)
@@ -519,7 +519,7 @@ static PyObject *xspy_transaction_start(XsHandle *self)
th = xs_transaction_start(xh);
Py_END_ALLOW_THREADS
- if (th == NULL) {
+ if (th == XBT_NULL) {
PyErr_SetFromErrno(PyExc_RuntimeError);
return NULL;
}
@@ -547,7 +547,7 @@ static PyObject *xspy_transaction_end(XsHandle *self, PyObject *args,
struct xs_handle *xh = xshandle(self);
bool result;
- struct xs_transaction_handle *th;
+ xs_transaction_t th;
char *thstr;
if (!xh)
@@ -556,7 +556,7 @@ static PyObject *xspy_transaction_end(XsHandle *self, PyObject *args,
&thstr, &abort))
return NULL;
- th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
+ th = strtoul(thstr, NULL, 16);
Py_BEGIN_ALLOW_THREADS
result = xs_transaction_end(xh, th, abort);
@@ -727,7 +727,7 @@ static void remove_watch(XsHandle *self, PyObject *token)
*/
static int parse_transaction_path(XsHandle *self, PyObject *args,
struct xs_handle **xh,
- struct xs_transaction_handle **th,
+ xs_transaction_t *th,
char **path)
{
char *thstr;
@@ -740,7 +740,7 @@ static int parse_transaction_path(XsHandle *self, PyObject *args,
if (!PyArg_ParseTuple(args, "ss", &thstr, path))
return 0;
- *th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
+ *th = strtoul(thstr, NULL, 16);
return 1;
}
diff --git a/tools/python/xen/xend/XendDomain.py b/tools/python/xen/xend/XendDomain.py
index 592ab5fd60..e7ab105323 100644
--- a/tools/python/xen/xend/XendDomain.py
+++ b/tools/python/xen/xend/XendDomain.py
@@ -443,7 +443,7 @@ class XendDomain:
cpumap = map(lambda x: int(x),
cpumap.replace("[", "").replace("]", "").split(","))
try:
- return xc.domain_pincpu(dominfo.getDomid(), vcpu, cpumap)
+ return xc.vcpu_setaffinity(dominfo.getDomid(), vcpu, cpumap)
except Exception, ex:
raise XendError(str(ex))
diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py
index 250bd12494..014a308ca1 100644
--- a/tools/python/xen/xend/XendDomainInfo.py
+++ b/tools/python/xen/xend/XendDomainInfo.py
@@ -1179,7 +1179,7 @@ class XendDomainInfo:
for v in range(0, self.info['max_vcpu_id']+1):
# pincpu takes a list of ints
cpu = [ int( cpus[v % len(cpus)] ) ]
- xc.domain_pincpu(self.domid, v, cpu)
+ xc.vcpu_setaffinity(self.domid, v, cpu)
m = self.image.getDomainMemory(self.info['memory'] * 1024)
balloon.free(m)
diff --git a/tools/python/xen/xend/image.py b/tools/python/xen/xend/image.py
index 982de14bc1..e50c2b4c3c 100644
--- a/tools/python/xen/xend/image.py
+++ b/tools/python/xen/xend/image.py
@@ -209,13 +209,9 @@ class VmxImageHandler(ImageHandler):
self.dmargs += self.configVNC(imageConfig)
- self.lapic = 0
- lapic = sxp.child_value(imageConfig, 'lapic')
- if not lapic is None:
- self.lapic = int(lapic)
-
self.acpi = int(sxp.child_value(imageConfig, 'acpi', 0))
-
+ self.apic = int(sxp.child_value(imageConfig, 'apic', 0))
+
def buildDomain(self):
# Create an event channel
self.device_channel = xc.evtchn_alloc_unbound(dom=self.vm.getDomid(),
@@ -229,18 +225,18 @@ class VmxImageHandler(ImageHandler):
log.debug("control_evtchn = %d", self.device_channel)
log.debug("store_evtchn = %d", store_evtchn)
log.debug("memsize = %d", self.vm.getMemoryTarget() / 1024)
- log.debug("lapic = %d", self.lapic)
log.debug("vcpus = %d", self.vm.getVCpuCount())
log.debug("acpi = %d", self.acpi)
+ log.debug("apic = %d", self.apic)
return xc.vmx_build(dom = self.vm.getDomid(),
image = self.kernel,
control_evtchn = self.device_channel,
store_evtchn = store_evtchn,
memsize = self.vm.getMemoryTarget() / 1024,
- lapic = self.lapic,
+ vcpus = self.vm.getVCpuCount(),
acpi = self.acpi,
- vcpus = self.vm.getVCpuCount())
+ apic = self.apic)
# Return a list of cmd line args to the device models based on the
# xm config file
diff --git a/tools/python/xen/xend/server/DevController.py b/tools/python/xen/xend/server/DevController.py
index 814b9405c6..6faadeb24e 100644
--- a/tools/python/xen/xend/server/DevController.py
+++ b/tools/python/xen/xend/server/DevController.py
@@ -32,11 +32,10 @@ HOTPLUG_STATUS_ERROR = "error"
HOTPLUG_STATUS_BUSY = "busy"
Connected = 1
-Died = 2
-Error = 3
-Missing = 4
-Timeout = 5
-Busy = 6
+Error = 2
+Missing = 3
+Timeout = 4
+Busy = 5
xenbusState = {
'Unknown' : 0,
@@ -157,11 +156,6 @@ class DevController:
raise VmError("Device %s (%s) could not be connected. "
"Device not found." % (devid, self.deviceClass))
- elif status == Died:
- self.destroyDevice(devid)
- raise VmError("Device %s (%s) could not be connected. "
- "Device has died." % (devid, self.deviceClass))
-
elif status == Busy:
err = None
frontpath = self.frontendPath(devid)
@@ -408,20 +402,17 @@ class DevController:
def hotplugStatusCallback(statusPath, ev, result):
log.debug("hotplugStatusCallback %s.", statusPath)
- try:
- status = xstransact.Read(statusPath)
+ status = xstransact.Read(statusPath)
- if status is not None:
- if status == HOTPLUG_STATUS_ERROR:
- result['status'] = Error
- elif status == HOTPLUG_STATUS_BUSY:
- result['status'] = Busy
- else:
- result['status'] = Connected
+ if status is not None:
+ if status == HOTPLUG_STATUS_ERROR:
+ result['status'] = Error
+ elif status == HOTPLUG_STATUS_BUSY:
+ result['status'] = Busy
else:
- return 1
- except VmError:
- result['status'] = Died
+ result['status'] = Connected
+ else:
+ return 1
log.debug("hotplugStatusCallback %d.", result['status'])
diff --git a/tools/python/xen/xm/create.py b/tools/python/xen/xm/create.py
index dd97a9dc08..009c2a1a58 100644
--- a/tools/python/xen/xm/create.py
+++ b/tools/python/xen/xm/create.py
@@ -160,14 +160,14 @@ gopts.var('cpus', val='CPUS',
fn=set_int, default=None,
use="CPUS to run the domain on.")
-gopts.var('lapic', val='LAPIC',
- fn=set_int, default=0,
- use="Disable or enable local APIC of VMX domain.")
-
gopts.var('acpi', val='ACPI',
fn=set_int, default=0,
use="Disable or enable ACPI of VMX domain.")
+gopts.var('apic', val='APIC',
+ fn=set_int, default=0,
+ use="Disable or enable APIC of VMX domain.")
+
gopts.var('vcpus', val='VCPUS',
fn=set_int, default=1,
use="# of Virtual CPUS in domain.")
@@ -534,8 +534,8 @@ def configure_vmx(config_image, vals):
"""
args = [ 'device_model', 'vcpus', 'cdrom', 'boot', 'fda', 'fdb',
'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'audio',
- 'vnc', 'vncviewer', 'sdl', 'display', 'ne2000', 'lapic',
- 'xauthority', 'acpi' ]
+ 'vnc', 'vncviewer', 'sdl', 'display', 'ne2000', 'acpi', 'apic',
+ 'xauthority' ]
for a in args:
if (vals.__dict__[a]):
config_image.append([a, vals.__dict__[a]])
diff --git a/tools/python/xen/xm/main.py b/tools/python/xen/xm/main.py
index 56f7ce385c..b446a45acb 100644
--- a/tools/python/xen/xm/main.py
+++ b/tools/python/xen/xm/main.py
@@ -390,7 +390,6 @@ def xm_brief_list(doms):
def xm_vcpu_list(args):
- print 'Name ID VCPU CPU State Time(s) CPU Affinity'
from xen.xend.XendClient import server
if args:
@@ -401,6 +400,8 @@ def xm_vcpu_list(args):
lambda x: server.xend_domain_vcpuinfo(sxp.child_value(x, 'name')),
doms)
+ print 'Name ID VCPU CPU State Time(s) CPU Affinity'
+
for dom in dominfo:
def get_info(n):
return sxp.child_value(dom, n)
@@ -625,6 +626,8 @@ def xm_sched_sedf(args):
server.xend_domain_cpu_sedf_set(dom, *v)
def xm_info(args):
+ arg_check(args, "info", 0)
+
from xen.xend.XendClient import server
info = server.xend_node()
@@ -645,9 +648,12 @@ def xm_console(args):
def xm_top(args):
+ arg_check(args, "top", 0)
+
os.execvp('xentop', ['xentop'])
def xm_dmesg(args):
+ arg_check(args, "dmesg", 0)
gopts = Opts(use="""[-c|--clear]
diff --git a/tools/tests/test_x86_emulator.c b/tools/tests/test_x86_emulator.c
index f1c1a51583..46c98d1b1e 100644
--- a/tools/tests/test_x86_emulator.c
+++ b/tools/tests/test_x86_emulator.c
@@ -92,7 +92,7 @@ int main(int argc, char **argv)
regs.ecx = 0x12345678;
cr2 = (unsigned long)&res;
res = 0x7FFFFFFF;
- rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(res != 0x92345677) ||
(regs.eflags != 0xa94) ||
@@ -110,7 +110,7 @@ int main(int argc, char **argv)
regs.ecx = 0x12345678UL;
#endif
cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(res != 0x92345677) ||
(regs.ecx != 0x8000000FUL) ||
@@ -125,7 +125,7 @@ int main(int argc, char **argv)
regs.eax = 0x92345677UL;
regs.ecx = 0xAA;
cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(res != 0x923456AA) ||
(regs.eflags != 0x244) ||
@@ -141,7 +141,7 @@ int main(int argc, char **argv)
regs.eax = 0xAABBCC77UL;
regs.ecx = 0xFF;
cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(res != 0x923456AA) ||
((regs.eflags&0x240) != 0x200) ||
@@ -157,7 +157,7 @@ int main(int argc, char **argv)
regs.eip = (unsigned long)&instr[0];
regs.ecx = 0x12345678;
cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(res != 0x12345678) ||
(regs.eflags != 0x200) ||
@@ -174,7 +174,7 @@ int main(int argc, char **argv)
regs.eax = 0x923456AAUL;
regs.ecx = 0xDDEEFF00L;
cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(res != 0xDDEEFF00) ||
(regs.eflags != 0x244) ||
@@ -193,7 +193,7 @@ int main(int argc, char **argv)
regs.edi = (unsigned long)&res + 2;
regs.error_code = 0; /* read fault */
cr2 = regs.esi;
- rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(res != 0x44554455) ||
(regs.eflags != 0x200) ||
@@ -211,7 +211,7 @@ int main(int argc, char **argv)
regs.eip = (unsigned long)&instr[0];
regs.edi = (unsigned long)&res;
cr2 = regs.edi;
- rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(res != 0x2233445D) ||
((regs.eflags&0x201) != 0x201) ||
@@ -229,7 +229,7 @@ int main(int argc, char **argv)
regs.eip = (unsigned long)&instr[0];
regs.edi = (unsigned long)cmpxchg8b_res;
cr2 = regs.edi;
- rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(cmpxchg8b_res[0] != 0x9999AAAA) ||
(cmpxchg8b_res[1] != 0xCCCCFFFF) ||
@@ -243,7 +243,7 @@ int main(int argc, char **argv)
regs.eip = (unsigned long)&instr[0];
regs.edi = (unsigned long)cmpxchg8b_res;
cr2 = regs.edi;
- rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(cmpxchg8b_res[0] != 0x9999AAAA) ||
(cmpxchg8b_res[1] != 0xCCCCFFFF) ||
@@ -260,7 +260,7 @@ int main(int argc, char **argv)
regs.ecx = 0x12345678;
cr2 = (unsigned long)&res;
res = 0x82;
- rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(res != 0x82) ||
(regs.ecx != 0xFFFFFF82) ||
@@ -275,7 +275,7 @@ int main(int argc, char **argv)
regs.ecx = 0x12345678;
cr2 = (unsigned long)&res;
res = 0x1234aa82;
- rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(res != 0x1234aa82) ||
(regs.ecx != 0xaa82) ||
diff --git a/tools/vtpm/Makefile b/tools/vtpm/Makefile
index a1c8aa6438..8224a9d304 100644
--- a/tools/vtpm/Makefile
+++ b/tools/vtpm/Makefile
@@ -11,6 +11,8 @@ VTPM_DIR = vtpm
# Emulator tarball name
TPM_EMULATOR_TARFILE = tpm_emulator-0.2b.tar.gz
+GMP_HEADER = /usr/include/gmp.h
+
all: build
build: $(TPM_EMULATOR_DIR) $(VTPM_DIR) build_sub
@@ -55,5 +57,12 @@ $(VTPM_DIR): $(TPM_EMULATOR_TARFILE)
patch -p1 <../vtpm.patch
build_sub:
- $(MAKE) -C $(TPM_EMULATOR_DIR)
- $(MAKE) -C $(VTPM_DIR)
+ if [ -e $(GMP_HEADER) ]; then \
+ $(MAKE) -C $(VTPM_DIR); \
+ if [ "$(BUILD_EMULATOR)" = "y" ]; then \
+ $(MAKE) -C $(TPM_EMULATOR_DIR); \
+ fi \
+ else \
+ echo "*** Unable to build VTPMs. libgmp could not be found."; \
+ fi
+
diff --git a/tools/vtpm/Rules.mk b/tools/vtpm/Rules.mk
index e840df141f..93b84cdaa5 100644
--- a/tools/vtpm/Rules.mk
+++ b/tools/vtpm/Rules.mk
@@ -33,5 +33,7 @@ $(OBJS): $(SRCS)
-include $(DEP_FILES)
+BUILD_EMULATOR = n
+
# Make sure these are just rules
.PHONY : all build install clean
diff --git a/tools/vtpm_manager/Makefile b/tools/vtpm_manager/Makefile
index dddfa5160b..16825a2279 100644
--- a/tools/vtpm_manager/Makefile
+++ b/tools/vtpm_manager/Makefile
@@ -4,13 +4,18 @@ XEN_ROOT = ../..
include $(XEN_ROOT)/tools/vtpm_manager/Rules.mk
SUBDIRS = crypto tcs util manager
+OPENSSL_HEADER = /usr/include/openssl/crypto.h
all: build
build:
- @set -e; for subdir in $(SUBDIRS); do \
- $(MAKE) -C $$subdir $@; \
- done
+ if [ -e $(OPENSSL_HEADER) ]; then \
+ @set -e; for subdir in $(SUBDIRS); do \
+ $(MAKE) -C $$subdir $@; \
+ done; \
+ else \
+ echo "*** Cannot build vtpm_manager: OpenSSL developement files missing."; \
+ fi
install: build
@set -e; for subdir in $(SUBDIRS); do \
diff --git a/tools/vtpm_manager/manager/vtsp.c b/tools/vtpm_manager/manager/vtsp.c
index 17c3335923..70a20cf616 100644
--- a/tools/vtpm_manager/manager/vtsp.c
+++ b/tools/vtpm_manager/manager/vtsp.c
@@ -144,7 +144,10 @@ TPM_RESULT VerifyAuth( /*[IN]*/ const BYTE *outParamDigestText,
if (memcmp (&hm, &(auth->HMAC), sizeof(TPM_DIGEST)) == 0) // 0 indicates equality
return (TPM_SUCCESS);
else {
- VTSP_OIAP( hContext, auth);
+ // If specified, reconnect the OIAP session.
+ // NOTE: This only works for TCS's that never have a 0 context.
+ if (hContext)
+ VTSP_OIAP( hContext, auth);
return (TPM_AUTHFAIL);
}
}
@@ -157,6 +160,10 @@ TPM_RESULT VTSP_OIAP(const TCS_CONTEXT_HANDLE hContext,
TPMTRYRETURN( TCSP_OIAP(hContext,
&auth->AuthHandle,
&auth->NonceEven) );
+
+ memset(&auth->HMAC, 0, sizeof(TPM_DIGEST));
+ auth->fContinueAuthSession = FALSE;
+
goto egress;
abort_egress:
@@ -195,6 +202,9 @@ TPM_RESULT VTSP_OSAP(const TCS_CONTEXT_HANDLE hContext,
BSG_TPM_NONCE, &nonceOddOSAP);
Crypto_HMAC(sharedSecretText, sizeof(sharedSecretText), (BYTE *) usageAuth, TPM_DIGEST_SIZE, (BYTE *) sharedSecret);
+
+ memset(&auth->HMAC, 0, sizeof(TPM_DIGEST));
+ auth->fContinueAuthSession = FALSE;
goto egress;
@@ -288,9 +298,6 @@ TPM_RESULT VTSP_TakeOwnership( const TCS_CONTEXT_HANDLE hContext,
struct pack_buf_t srkText;
- // GenerateAuth new nonceOdd
- Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
-
//These values are accurate for an enc(AuthData).
struct pack_buf_t encOwnerAuth, encSrkAuth;
@@ -383,9 +390,6 @@ TPM_RESULT VTSP_DisablePubekRead( const TCS_CONTEXT_HANDLE hContext,
BYTE *paramText; // Digest to make Auth.
UINT32 paramTextSize;
- // Generate HMAC
- Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
-
paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH);
paramTextSize = BSG_PackList(paramText, 1,
@@ -504,9 +508,6 @@ TPM_RESULT VTSP_CreateWrapKey( const TCS_CONTEXT_HANDLE hContext,
newKeyText.data = flatKey;
newKeyText.size = flatKeySize;
- // GenerateAuth new nonceOdd
- Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
-
// Generate HMAC
paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH);
@@ -587,9 +588,6 @@ TPM_RESULT VTSP_LoadKey(const TCS_CONTEXT_HANDLE hContext,
// Generate Extra TCS Parameters
TPM_HANDLE phKeyHMAC;
- // Generate HMAC
- Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
-
paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH);
paramTextSize = BSG_PackList(paramText, 1,
@@ -676,9 +674,6 @@ TPM_RESULT VTSP_Unbind( const TCS_CONTEXT_HANDLE hContext,
BYTE *clear_data_text;
UINT32 clear_data_size;
- // Generate HMAC
- Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
-
struct pack_buf_t bound_data32 = {bound_data->size, bound_data->bytes};
paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH);
@@ -787,6 +782,196 @@ TPM_RESULT VTSP_Bind( CRYPTO_INFO *cryptoInfo,
return TPM_SUCCESS;
}
+TPM_RESULT VTSP_Seal(const TCS_CONTEXT_HANDLE hContext,
+ const TPM_KEY_HANDLE keyHandle,
+ const TPM_AUTHDATA *sealDataAuth,
+ const TPM_PCR_COMPOSITE *pcrComp,
+ const buffer_t *inData,
+ TPM_STORED_DATA *sealedData,
+ const TPM_SECRET *osapSharedSecret,
+ TCS_AUTH *auth) {
+
+ TPM_RESULT status = TPM_SUCCESS;
+ TPM_COMMAND_CODE command = TPM_ORD_Seal;
+
+ BYTE *paramText; // Digest to make Auth.
+ UINT32 paramTextSize;
+
+ // Generate PCR_Info Struct from Comp
+ TPM_PCR_INFO pcrInfo;
+ UINT32 pcrInfoSize, flatpcrSize;
+ BYTE flatpcr[3 + // PCR_Select = 3 1 byte banks
+ sizeof(UINT16) + // 2 byte UINT16
+ sizeof(UINT32) + // PCR_Comp = 4 byte UINT32
+ 24 * sizeof(TPM_PCRVALUE) ]; // up to 24 PCRs
+
+ if (pcrComp != NULL) {
+ //printf("\n\tBinding to PCRs: ");
+ //for(int i = 0 ; i < pcrComp->select.sizeOfSelect ; i++)
+ //printf("%2.2x", pcrComp->select.pcrSelect[i]);
+
+ memcpy(&pcrInfo.pcrSelection, &pcrComp->select, sizeof(TPM_PCR_SELECTION));
+
+ flatpcrSize = BSG_Pack(BSG_TPM_PCR_COMPOSITE, (BYTE *) pcrComp, flatpcr);
+ Crypto_SHA1Full((BYTE *) flatpcr, flatpcrSize, (BYTE *) &(pcrInfo.digestAtRelease));
+ memset(&(pcrInfo.digestAtCreation), 0, sizeof(TPM_DIGEST));
+ pcrInfoSize = BSG_Pack(BSG_TPM_PCR_INFO, (BYTE *) &pcrInfo, flatpcr);
+ } else {
+ //printf("\n\tBinding to no PCRS.");
+ pcrInfoSize = 0;
+ }
+
+ // Calculate encUsageAuth
+ BYTE XORbuffer[sizeof(TPM_SECRET) + sizeof(TPM_NONCE)];
+ UINT32 XORbufferSize = sizeof(XORbuffer);
+ TPM_DIGEST XORKey;
+ TPM_ENCAUTH encAuth;
+
+ BSG_PackList( XORbuffer, 2,
+ BSG_TPM_SECRET, osapSharedSecret,
+ BSG_TPM_NONCE, &auth->NonceEven );
+
+ Crypto_SHA1Full(XORbuffer, XORbufferSize, (BYTE *) &XORKey);
+
+ int i;
+ for (i=0; i < TPM_DIGEST_SIZE; i++)
+ ((BYTE *) &encAuth)[i] = ((BYTE *) &XORKey)[i] ^ ((BYTE *) sealDataAuth)[i];
+
+ // Generate Extra TCS Parameters
+ UINT32 inDataSize = buffer_len(inData);
+ struct pack_buf_t inData_pack = {inDataSize, inData->bytes};
+ struct pack_buf_t pcrInfo_pack = {pcrInfoSize, flatpcr};
+
+ UINT32 sealedDataSize;
+ BYTE *flatSealedData=NULL;
+
+ paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH);
+
+ paramTextSize = BSG_PackList(paramText, 4,
+ BSG_TPM_COMMAND_CODE, &command,
+ BSG_TPM_ENCAUTH, &encAuth,
+ BSG_TPM_SIZE32_DATA, &pcrInfo_pack,
+ BSG_TPM_SIZE32_DATA, &inData_pack);
+
+ TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
+ osapSharedSecret, auth) );
+
+ // Call TCS
+ TPMTRYRETURN( TCSP_Seal( hContext,
+ keyHandle,
+ encAuth,
+ pcrInfoSize,
+ flatpcr,
+ inDataSize,
+ inData->bytes,
+ auth,
+ &sealedDataSize,
+ &flatSealedData) );
+
+ // Unpack/return key structure
+ BSG_Unpack( BSG_TPM_STORED_DATA, flatSealedData, sealedData );
+
+ paramTextSize = BSG_PackList(paramText, 3,
+ BSG_TPM_RESULT, &status,
+ BSG_TPM_COMMAND_CODE, &command,
+ BSG_TPM_STORED_DATA, sealedData);
+
+ TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
+ osapSharedSecret, auth,
+ 0) );
+
+
+ goto egress;
+
+ abort_egress:
+ egress:
+
+ if (flatSealedData)
+ TCS_FreeMemory( hContext, flatSealedData);
+
+ free(paramText);
+ return status;
+}
+
+
+TPM_RESULT VTSP_Unseal(const TCS_CONTEXT_HANDLE hContext,
+ const TPM_KEY_HANDLE keyHandle,
+ const TPM_STORED_DATA *sealedData,
+ const TPM_AUTHDATA *key_usage_auth,
+ const TPM_AUTHDATA *data_usage_auth,
+ buffer_t *outData,
+ TCS_AUTH *auth,
+ TCS_AUTH *dataAuth) {
+
+ TPM_RESULT status = TPM_SUCCESS;
+ TPM_COMMAND_CODE command = TPM_ORD_Unseal;
+
+ BYTE *paramText; // Digest to make Auth.
+ UINT32 paramTextSize;
+
+ // Generate Extra TCS Parameters
+ UINT32 sealDataSize, clearDataSize;
+ BYTE *flatSealedData= (BYTE *) malloc(sizeof(TPM_VERSION) +
+ 2 * sizeof(UINT32) +
+ sealedData->sealInfoSize +
+ sealedData->encDataSize),
+ *clearData=NULL;
+
+ sealDataSize = BSG_Pack(BSG_TPM_STORED_DATA, sealedData, flatSealedData );
+
+ paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH);
+
+ paramTextSize = BSG_PackList(paramText, 2,
+ BSG_TPM_COMMAND_CODE, &command,
+ BSG_TPM_STORED_DATA, sealedData);
+
+ TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
+ key_usage_auth, auth) );
+
+ TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
+ data_usage_auth, dataAuth) );
+ // Call TCS
+ TPMTRYRETURN( TCSP_Unseal( hContext,
+ keyHandle,
+ sealDataSize,
+ flatSealedData,
+ auth,
+ dataAuth,
+ &clearDataSize,
+ &clearData) );
+
+ // Verify Auth
+ struct pack_buf_t clearData_pack = {clearDataSize, clearData};
+
+ paramTextSize = BSG_PackList(paramText, 3,
+ BSG_TPM_RESULT, &status,
+ BSG_TPM_COMMAND_CODE, &command,
+ BSG_TPM_SIZE32_DATA, &clearData_pack);
+
+ TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
+ key_usage_auth, auth,
+ hContext) );
+
+ TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
+ data_usage_auth, dataAuth,
+ hContext) );
+
+ // Unpack/return key structure
+ TPMTRYRETURN( buffer_init(outData, clearDataSize, clearData) );
+
+ goto egress;
+
+ abort_egress:
+ egress:
+
+ if (flatSealedData)
+ TCS_FreeMemory( hContext, clearData);
+
+ free(paramText);
+ return status;
+}
+
+
// Function Reaches into unsupported TCS command, beware.
TPM_RESULT VTSP_RawTransmit(const TCS_CONTEXT_HANDLE hContext,
const buffer_t *inbuf,
diff --git a/tools/vtpm_manager/manager/vtsp.h b/tools/vtpm_manager/manager/vtsp.h
index 93f22d34e4..5baa2a377c 100644
--- a/tools/vtpm_manager/manager/vtsp.h
+++ b/tools/vtpm_manager/manager/vtsp.h
@@ -100,4 +100,22 @@ TPM_RESULT VTSP_Bind( CRYPTO_INFO *cryptoInfo,
const buffer_t *inData,
buffer_t *outData);
+TPM_RESULT VTSP_Seal(const TCS_CONTEXT_HANDLE hContext,
+ const TPM_KEY_HANDLE keyHandle,
+ const TPM_AUTHDATA *sealDataAuth,
+ const TPM_PCR_COMPOSITE *pcrComp,
+ const buffer_t *inData,
+ TPM_STORED_DATA *sealedData,
+ const TPM_SECRET *osapSharedSecret,
+ TCS_AUTH *auth);
+
+TPM_RESULT VTSP_Unseal(const TCS_CONTEXT_HANDLE hContext,
+ const TPM_KEY_HANDLE keyHandle,
+ const TPM_STORED_DATA *sealedData,
+ const TPM_AUTHDATA *key_usage_auth,
+ const TPM_AUTHDATA *data_usage_auth,
+ buffer_t *outData,
+ TCS_AUTH *auth,
+ TCS_AUTH *dataAuth);
+
#endif //_VTSP_H_
diff --git a/tools/vtpm_manager/tcs/tcs.c b/tools/vtpm_manager/tcs/tcs.c
index ad31b0c26f..376a11eb20 100644
--- a/tools/vtpm_manager/tcs/tcs.c
+++ b/tools/vtpm_manager/tcs/tcs.c
@@ -636,7 +636,7 @@ TPM_RESULT TCSP_Seal( TCS_CONTEXT_HANDLE hContext, // in
TDDL_UINT32 OutLength = TCPA_MAX_BUFFER_LENGTH;
// check input params
- if (inData == NULL || pubAuth == NULL || SealedDataSize == NULL || *SealedData == NULL)
+ if (inData == NULL || pubAuth == NULL || SealedDataSize == NULL || SealedData == NULL)
return TPM_BAD_PARAMETER;
// Convert Byte Input parameter in the input byte stream InBuf
diff --git a/tools/xcutils/xc_save.c b/tools/xcutils/xc_save.c
index eac8d1ad2b..44c4701379 100644
--- a/tools/xcutils/xc_save.c
+++ b/tools/xcutils/xc_save.c
@@ -10,10 +10,28 @@
#include <err.h>
#include <stdlib.h>
#include <stdint.h>
+#include <string.h>
#include <stdio.h>
#include <xenguest.h>
+
+/**
+ * Issue a suspend request through stdout, and receive the acknowledgement
+ * from stdin. This is handled by XendCheckpoint in the Python layer.
+ */
+static int suspend(int domid)
+{
+ char ans[30];
+
+ printf("suspend\n");
+ fflush(stdout);
+
+ return (fgets(ans, sizeof(ans), stdin) != NULL &&
+ !strncmp(ans, "done\n", 5));
+}
+
+
int
main(int argc, char **argv)
{
@@ -29,5 +47,5 @@ main(int argc, char **argv)
max_f = atoi(argv[5]);
flags = atoi(argv[6]);
- return xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags);
+ return xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags, &suspend);
}
diff --git a/tools/xenmon/Makefile b/tools/xenmon/Makefile
index 7fdf786445..6ff8391f9c 100644
--- a/tools/xenmon/Makefile
+++ b/tools/xenmon/Makefile
@@ -13,12 +13,9 @@
INSTALL = install
INSTALL_PROG = $(INSTALL) -m0755
INSTALL_DIR = $(INSTALL) -d -m0755
-INSTALL_DATA = $(INSTALL) -m064
+INSTALL_DATA = $(INSTALL) -m0644
-prefix=/usr/local
-mandir=$(prefix)/share/man
-man1dir=$(mandir)/man1
-sbindir=$(prefix)/sbin
+sbindir=/usr/sbin
XEN_ROOT=../..
include $(XEN_ROOT)/tools/Rules.mk
diff --git a/tools/xenstat/libxenstat/src/xenstat.c b/tools/xenstat/libxenstat/src/xenstat.c
index 921c7d29db..e8a6928dac 100644
--- a/tools/xenstat/libxenstat/src/xenstat.c
+++ b/tools/xenstat/libxenstat/src/xenstat.c
@@ -705,7 +705,7 @@ static char *xenstat_get_domain_name(xenstat_handle *handle, unsigned int domain
snprintf(path, sizeof(path),"/local/domain/%i/name", domain_id);
- name = xs_read(handle->xshandle, NULL, path, NULL);
+ name = xs_read(handle->xshandle, XBT_NULL, path, NULL);
if (name == NULL)
name = strdup(" ");
diff --git a/tools/xenstore/xenstore_client.c b/tools/xenstore/xenstore_client.c
index f8e1f5c893..07809e6c83 100644
--- a/tools/xenstore/xenstore_client.c
+++ b/tools/xenstore/xenstore_client.c
@@ -66,7 +66,7 @@ usage(const char *progname)
#if defined(CLIENT_rm)
static int
-do_rm(char *path, struct xs_handle *xsh, struct xs_transaction_handle *xth)
+do_rm(char *path, struct xs_handle *xsh, xs_transaction_t xth)
{
if (xs_rm(xsh, xth, path)) {
return 0;
@@ -81,7 +81,7 @@ do_rm(char *path, struct xs_handle *xsh, struct xs_transaction_handle *xth)
static int
perform(int optind, int argc, char **argv, struct xs_handle *xsh,
- struct xs_transaction_handle *xth, int prefix, int tidy)
+ xs_transaction_t xth, int prefix, int tidy)
{
while (optind < argc) {
#if defined(CLIENT_read)
@@ -179,7 +179,7 @@ int
main(int argc, char **argv)
{
struct xs_handle *xsh;
- struct xs_transaction_handle *xth;
+ xs_transaction_t xth;
int ret = 0, socket = 0;
int prefix = 0;
int tidy = 0;
@@ -243,7 +243,7 @@ main(int argc, char **argv)
again:
xth = xs_transaction_start(xsh);
- if (xth == NULL)
+ if (xth == XBT_NULL)
errx(1, "couldn't start transaction");
ret = perform(optind, argc, argv, xsh, xth, prefix, tidy);
diff --git a/tools/xenstore/xs.c b/tools/xenstore/xs.c
index 5cd4201e20..f7a576119f 100644
--- a/tools/xenstore/xs.c
+++ b/tools/xenstore/xs.c
@@ -292,7 +292,7 @@ static void *read_reply(
}
/* Send message to xs, get malloc'ed reply. NULL and set errno on error. */
-static void *xs_talkv(struct xs_handle *h, struct xs_transaction_handle *t,
+static void *xs_talkv(struct xs_handle *h, xs_transaction_t t,
enum xsd_sockmsg_type type,
const struct iovec *iovec,
unsigned int num_vecs,
@@ -304,7 +304,7 @@ static void *xs_talkv(struct xs_handle *h, struct xs_transaction_handle *t,
unsigned int i;
struct sigaction ignorepipe, oldact;
- msg.tx_id = (uint32_t)(unsigned long)t;
+ msg.tx_id = t;
msg.req_id = 0;
msg.type = type;
msg.len = 0;
@@ -368,7 +368,7 @@ static void free_no_errno(void *p)
}
/* Simplified version of xs_talkv: single message. */
-static void *xs_single(struct xs_handle *h, struct xs_transaction_handle *t,
+static void *xs_single(struct xs_handle *h, xs_transaction_t t,
enum xsd_sockmsg_type type,
const char *string,
unsigned int *len)
@@ -388,7 +388,7 @@ static bool xs_bool(char *reply)
return true;
}
-char **xs_directory(struct xs_handle *h, struct xs_transaction_handle *t,
+char **xs_directory(struct xs_handle *h, xs_transaction_t t,
const char *path, unsigned int *num)
{
char *strings, *p, **ret;
@@ -420,7 +420,7 @@ char **xs_directory(struct xs_handle *h, struct xs_transaction_handle *t,
* Returns a malloced value: call free() on it after use.
* len indicates length in bytes, not including the nul.
*/
-void *xs_read(struct xs_handle *h, struct xs_transaction_handle *t,
+void *xs_read(struct xs_handle *h, xs_transaction_t t,
const char *path, unsigned int *len)
{
return xs_single(h, t, XS_READ, path, len);
@@ -429,7 +429,7 @@ void *xs_read(struct xs_handle *h, struct xs_transaction_handle *t,
/* Write the value of a single file.
* Returns false on failure.
*/
-bool xs_write(struct xs_handle *h, struct xs_transaction_handle *t,
+bool xs_write(struct xs_handle *h, xs_transaction_t t,
const char *path, const void *data, unsigned int len)
{
struct iovec iovec[2];
@@ -446,7 +446,7 @@ bool xs_write(struct xs_handle *h, struct xs_transaction_handle *t,
/* Create a new directory.
* Returns false on failure, or success if it already exists.
*/
-bool xs_mkdir(struct xs_handle *h, struct xs_transaction_handle *t,
+bool xs_mkdir(struct xs_handle *h, xs_transaction_t t,
const char *path)
{
return xs_bool(xs_single(h, t, XS_MKDIR, path, NULL));
@@ -455,7 +455,7 @@ bool xs_mkdir(struct xs_handle *h, struct xs_transaction_handle *t,
/* Destroy a file or directory (directories must be empty).
* Returns false on failure, or success if it doesn't exist.
*/
-bool xs_rm(struct xs_handle *h, struct xs_transaction_handle *t,
+bool xs_rm(struct xs_handle *h, xs_transaction_t t,
const char *path)
{
return xs_bool(xs_single(h, t, XS_RM, path, NULL));
@@ -465,7 +465,7 @@ bool xs_rm(struct xs_handle *h, struct xs_transaction_handle *t,
* Returns malloced array, or NULL: call free() after use.
*/
struct xs_permissions *xs_get_permissions(struct xs_handle *h,
- struct xs_transaction_handle *t,
+ xs_transaction_t t,
const char *path, unsigned int *num)
{
char *strings;
@@ -499,7 +499,7 @@ struct xs_permissions *xs_get_permissions(struct xs_handle *h,
* Returns false on failure.
*/
bool xs_set_permissions(struct xs_handle *h,
- struct xs_transaction_handle *t,
+ xs_transaction_t t,
const char *path,
struct xs_permissions *perms,
unsigned int num_perms)
@@ -560,7 +560,7 @@ bool xs_watch(struct xs_handle *h, const char *path, const char *token)
iov[1].iov_base = (void *)token;
iov[1].iov_len = strlen(token) + 1;
- return xs_bool(xs_talkv(h, NULL, XS_WATCH, iov,
+ return xs_bool(xs_talkv(h, XBT_NULL, XS_WATCH, iov,
ARRAY_SIZE(iov), NULL));
}
@@ -627,28 +627,28 @@ bool xs_unwatch(struct xs_handle *h, const char *path, const char *token)
iov[1].iov_base = (char *)token;
iov[1].iov_len = strlen(token) + 1;
- return xs_bool(xs_talkv(h, NULL, XS_UNWATCH, iov,
+ return xs_bool(xs_talkv(h, XBT_NULL, XS_UNWATCH, iov,
ARRAY_SIZE(iov), NULL));
}
/* Start a transaction: changes by others will not be seen during this
* transaction, and changes will not be visible to others until end.
* You can only have one transaction at any time.
- * Returns NULL on failure.
+ * Returns XBT_NULL on failure.
*/
-struct xs_transaction_handle *xs_transaction_start(struct xs_handle *h)
+xs_transaction_t xs_transaction_start(struct xs_handle *h)
{
char *id_str;
- unsigned long id;
+ xs_transaction_t id;
- id_str = xs_single(h, NULL, XS_TRANSACTION_START, "", NULL);
+ id_str = xs_single(h, XBT_NULL, XS_TRANSACTION_START, "", NULL);
if (id_str == NULL)
- return NULL;
+ return XBT_NULL;
id = strtoul(id_str, NULL, 0);
free(id_str);
- return (struct xs_transaction_handle *)id;
+ return id;
}
/* End a transaction.
@@ -656,7 +656,7 @@ struct xs_transaction_handle *xs_transaction_start(struct xs_handle *h)
* Returns false on failure, which indicates an error: transactions will
* not fail spuriously.
*/
-bool xs_transaction_end(struct xs_handle *h, struct xs_transaction_handle *t,
+bool xs_transaction_end(struct xs_handle *h, xs_transaction_t t,
bool abort)
{
char abortstr[2];
@@ -693,7 +693,7 @@ bool xs_introduce_domain(struct xs_handle *h,
iov[2].iov_base = eventchn_str;
iov[2].iov_len = strlen(eventchn_str) + 1;
- return xs_bool(xs_talkv(h, NULL, XS_INTRODUCE, iov,
+ return xs_bool(xs_talkv(h, XBT_NULL, XS_INTRODUCE, iov,
ARRAY_SIZE(iov), NULL));
}
@@ -705,7 +705,7 @@ static void * single_with_domid(struct xs_handle *h,
sprintf(domid_str, "%u", domid);
- return xs_single(h, NULL, type, domid_str, NULL);
+ return xs_single(h, XBT_NULL, type, domid_str, NULL);
}
bool xs_release_domain(struct xs_handle *h, unsigned int domid)
@@ -719,7 +719,7 @@ char *xs_get_domain_path(struct xs_handle *h, unsigned int domid)
sprintf(domid_str, "%u", domid);
- return xs_single(h, NULL, XS_GET_DOMAIN_PATH, domid_str, NULL);
+ return xs_single(h, XBT_NULL, XS_GET_DOMAIN_PATH, domid_str, NULL);
}
bool xs_is_domain_introduced(struct xs_handle *h, unsigned int domid)
@@ -739,7 +739,7 @@ char *xs_debug_command(struct xs_handle *h, const char *cmd,
iov[1].iov_base = data;
iov[1].iov_len = len;
- return xs_talkv(h, NULL, XS_DEBUG, iov,
+ return xs_talkv(h, XBT_NULL, XS_DEBUG, iov,
ARRAY_SIZE(iov), NULL);
}
diff --git a/tools/xenstore/xs.h b/tools/xenstore/xs.h
index e476082531..cabf9d0711 100644
--- a/tools/xenstore/xs.h
+++ b/tools/xenstore/xs.h
@@ -22,8 +22,10 @@
#include <xs_lib.h>
+#define XBT_NULL 0
+
struct xs_handle;
-struct xs_transaction_handle;
+typedef uint32_t xs_transaction_t;
/* On failure, these routines set errno. */
@@ -45,45 +47,45 @@ void xs_daemon_close(struct xs_handle *);
* Returns a malloced array: call free() on it after use.
* Num indicates size.
*/
-char **xs_directory(struct xs_handle *h, struct xs_transaction_handle *t,
+char **xs_directory(struct xs_handle *h, xs_transaction_t t,
const char *path, unsigned int *num);
/* Get the value of a single file, nul terminated.
* Returns a malloced value: call free() on it after use.
* len indicates length in bytes, not including terminator.
*/
-void *xs_read(struct xs_handle *h, struct xs_transaction_handle *t,
+void *xs_read(struct xs_handle *h, xs_transaction_t t,
const char *path, unsigned int *len);
/* Write the value of a single file.
* Returns false on failure.
*/
-bool xs_write(struct xs_handle *h, struct xs_transaction_handle *t,
+bool xs_write(struct xs_handle *h, xs_transaction_t t,
const char *path, const void *data, unsigned int len);
/* Create a new directory.
* Returns false on failure, or success if it already exists.
*/
-bool xs_mkdir(struct xs_handle *h, struct xs_transaction_handle *t,
+bool xs_mkdir(struct xs_handle *h, xs_transaction_t t,
const char *path);
/* Destroy a file or directory (and children).
- * Returns false on failure, or success if it doesn't exist.
+ * Returns false on failure, or if it doesn't exist.
*/
-bool xs_rm(struct xs_handle *h, struct xs_transaction_handle *t,
+bool xs_rm(struct xs_handle *h, xs_transaction_t t,
const char *path);
/* Get permissions of node (first element is owner, first perms is "other").
* Returns malloced array, or NULL: call free() after use.
*/
struct xs_permissions *xs_get_permissions(struct xs_handle *h,
- struct xs_transaction_handle *t,
+ xs_transaction_t t,
const char *path, unsigned int *num);
/* Set permissions of node (must be owner).
* Returns false on failure.
*/
-bool xs_set_permissions(struct xs_handle *h, struct xs_transaction_handle *t,
+bool xs_set_permissions(struct xs_handle *h, xs_transaction_t t,
const char *path, struct xs_permissions *perms,
unsigned int num_perms);
@@ -113,14 +115,14 @@ bool xs_unwatch(struct xs_handle *h, const char *path, const char *token);
* You can only have one transaction at any time.
* Returns NULL on failure.
*/
-struct xs_transaction_handle *xs_transaction_start(struct xs_handle *h);
+xs_transaction_t xs_transaction_start(struct xs_handle *h);
/* End a transaction.
* If abandon is true, transaction is discarded instead of committed.
* Returns false on failure: if errno == EAGAIN, you have to restart
* transaction.
*/
-bool xs_transaction_end(struct xs_handle *h, struct xs_transaction_handle *t,
+bool xs_transaction_end(struct xs_handle *h, xs_transaction_t t,
bool abort);
/* Introduce a new domain.
diff --git a/tools/xenstore/xs_test.c b/tools/xenstore/xs_test.c
index 9c2ae92568..8b1a5ebf1a 100644
--- a/tools/xenstore/xs_test.c
+++ b/tools/xenstore/xs_test.c
@@ -37,12 +37,13 @@
#include <sys/time.h>
#include "utils.h"
#include "xs_lib.h"
+#include "xs.h"
#include "list.h"
#define XSTEST
static struct xs_handle *handles[10] = { NULL };
-static struct xs_transaction_handle *txh[10] = { NULL };
+static xs_transaction_t txh[10] = { XBT_NULL };
static unsigned int timeout_ms = 500;
static bool timeout_suppressed = true;
@@ -492,7 +493,7 @@ static void do_unwatch(unsigned int handle, const char *node, const char *token)
static void do_start(unsigned int handle)
{
txh[handle] = xs_transaction_start(handles[handle]);
- if (txh[handle] == NULL)
+ if (txh[handle] == XBT_NULL)
failed(handle);
}
@@ -500,7 +501,7 @@ static void do_end(unsigned int handle, bool abort)
{
if (!xs_transaction_end(handles[handle], txh[handle], abort))
failed(handle);
- txh[handle] = NULL;
+ txh[handle] = XBT_NULL;
}
static void do_introduce(unsigned int handle,
@@ -534,7 +535,7 @@ static void do_introduce(unsigned int handle,
*(uint16_t *)((void *)interface + 36) = atoi(eventchn);
if (!xs_introduce_domain(handles[handle], atoi(domid),
- atol(mfn), atoi(eventchn), path)) {
+ atol(mfn), atoi(eventchn))) {
failed(handle);
munmap(interface, getpagesize());
return;
@@ -718,7 +719,7 @@ static void do_command(unsigned int default_handle, char *line)
else if (streq(command, "close")) {
xs_daemon_close(handles[handle]);
handles[handle] = NULL;
- txh[handle] = NULL;
+ txh[handle] = XBT_NULL;
} else if (streq(command, "start"))
do_start(handle);
else if (streq(command, "commit"))
diff --git a/tools/xenstore/xsls.c b/tools/xenstore/xsls.c
index 781fc6d7a8..7f3fa4f7f1 100644
--- a/tools/xenstore/xsls.c
+++ b/tools/xenstore/xsls.c
@@ -11,7 +11,7 @@ void print_dir(struct xs_handle *h, char *path, int cur_depth)
int i;
unsigned int num, len;
- e = xs_directory(h, NULL, path, &num);
+ e = xs_directory(h, XBT_NULL, path, &num);
if (e == NULL)
err(1, "xs_directory (%s)", path);
@@ -22,7 +22,7 @@ void print_dir(struct xs_handle *h, char *path, int cur_depth)
sprintf(newpath, "%s%s%s", path,
path[strlen(path)-1] == '/' ? "" : "/",
e[i]);
- val = xs_read(h, NULL, newpath, &len);
+ val = xs_read(h, XBT_NULL, newpath, &len);
if (val == NULL)
printf(":\n");
else if ((unsigned)len > (151 - strlen(e[i])))
diff --git a/tools/xentrace/xenctx.c b/tools/xentrace/xenctx.c
index 32adccde7e..29c6969b06 100644
--- a/tools/xentrace/xenctx.c
+++ b/tools/xentrace/xenctx.c
@@ -380,10 +380,10 @@ void dump_ctx(int vcpu)
exit(-1);
}
- ret = xc_domain_get_vcpu_context(xc_handle, domid, vcpu, &ctx);
+ ret = xc_vcpu_getcontext(xc_handle, domid, vcpu, &ctx);
if (ret < 0) {
xc_domain_unpause(xc_handle, domid);
- perror("xc_domain_get_vcpu_context");
+ perror("xc_vcpu_getcontext");
exit(-1);
}
diff --git a/tools/xm-test/lib/XmTestLib/XenDomain.py b/tools/xm-test/lib/XmTestLib/XenDomain.py
index 5838f4693e..49e111e68b 100644
--- a/tools/xm-test/lib/XmTestLib/XenDomain.py
+++ b/tools/xm-test/lib/XmTestLib/XenDomain.py
@@ -72,7 +72,8 @@ ParavirtDefaults = {"memory" : 64,
}
VmxDefaults = {"memory" : 64,
"vcpus" : 1,
- "nics" : 0,
+ "acpi" : 0,
+ "apic" : 0,
"disk" : ["file:%s/disk.img,ioemu:%s,w" %
(getRdPath(), BLOCK_ROOT_DEV)],
"kernel" : "/usr/lib/xen/boot/vmxloader",
diff --git a/tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py b/tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py
index bc41be984e..7f1c7cd453 100644
--- a/tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py
+++ b/tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py
@@ -46,6 +46,8 @@ if not checkBlockList(domain):
if not checkXmLongList(domain):
FAIL("xm long list does not show that hda1 was attached")
+time.sleep(2)
+
s, o = traceCommand("xm block-detach %s hda1" % domain.getName())
if s != 0:
FAIL("block-detach failed")
diff --git a/tools/xm-test/tests/network-attach/Makefile.am b/tools/xm-test/tests/network-attach/Makefile.am
index d7e041c810..4edbb7db68 100644
--- a/tools/xm-test/tests/network-attach/Makefile.am
+++ b/tools/xm-test/tests/network-attach/Makefile.am
@@ -6,7 +6,7 @@ TESTS = 01_network_attach_pos.test \
03_network_attach_detach_multiple_pos.test \
04_network_attach_baddomain_neg.test
-XFAIL_TESTS = 03_network_attach_detach_multiple_pos.test
+XFAIL_TESTS =
EXTRA_DIST = $(TESTS) $(XFAIL_TESTS) network_utils.py
diff --git a/tools/xm-test/tests/network/02_network_local_ping_pos.py b/tools/xm-test/tests/network/02_network_local_ping_pos.py
index fc1dd9abe1..b1952cf0f5 100644
--- a/tools/xm-test/tests/network/02_network_local_ping_pos.py
+++ b/tools/xm-test/tests/network/02_network_local_ping_pos.py
@@ -50,7 +50,7 @@ try:
console.sendInput("bhs")
# Bring up the "lo" interface.
- console.runCmd("ifconfig lo up")
+ console.runCmd("ifconfig lo 127.0.0.1")
console.runCmd("ifconfig eth0 inet "+ip+" netmask "+mask+" up")
diff --git a/xen/arch/ia64/linux-xen/process-linux-xen.c b/xen/arch/ia64/linux-xen/process-linux-xen.c
index b02187ad8c..0f7b403dca 100644
--- a/xen/arch/ia64/linux-xen/process-linux-xen.c
+++ b/xen/arch/ia64/linux-xen/process-linux-xen.c
@@ -241,7 +241,7 @@ static inline void play_dead(void)
max_xtp();
local_irq_disable();
- idle_task_exit();
+ idle_domain_exit();
ia64_jump_to_sal(&sal_boot_rendez_state[this_cpu]);
/*
* The above is a point of no-return, the processor is
diff --git a/xen/arch/ia64/linux-xen/smpboot.c b/xen/arch/ia64/linux-xen/smpboot.c
index 89f6829648..c6970ffad9 100644
--- a/xen/arch/ia64/linux-xen/smpboot.c
+++ b/xen/arch/ia64/linux-xen/smpboot.c
@@ -482,10 +482,8 @@ do_rest:
struct vcpu *v;
void *stack;
- if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
- panic("failed 'createdomain' for CPU %d", cpu);
- set_bit(_DOMF_idle_domain, &idle->domain_flags);
- v = idle->vcpu[0];
+ v = idle_vcpu[cpu] = alloc_vcpu(idle_vcpu[0]->domain, cpu, cpu);
+ BUG_ON(v == NULL);
printf ("do_boot_cpu: cpu=%d, domain=%p, vcpu=%p\n", cpu, idle, v);
diff --git a/xen/arch/ia64/vmx/mmio.c b/xen/arch/ia64/vmx/mmio.c
index b3668acb81..342fa87621 100644
--- a/xen/arch/ia64/vmx/mmio.c
+++ b/xen/arch/ia64/vmx/mmio.c
@@ -29,7 +29,7 @@
#include <asm/vmx_vcpu.h>
#include <asm/privop.h>
#include <asm/types.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#include <asm/mm.h>
#include <asm/vmx.h>
diff --git a/xen/arch/ia64/vmx/vlsapic.c b/xen/arch/ia64/vmx/vlsapic.c
index 032d3633b1..93dabc168b 100644
--- a/xen/arch/ia64/vmx/vlsapic.c
+++ b/xen/arch/ia64/vmx/vlsapic.c
@@ -119,7 +119,7 @@ void vtm_init(VCPU *vcpu)
itc_freq = local_cpu_data->itc_freq;
vtm->cfg_max_jump=itc_freq*MAX_JUMP_STEP/1000;
vtm->cfg_min_grun=itc_freq*MIN_GUEST_RUNNING_TIME/1000;
- init_ac_timer(&vtm->vtm_timer, vtm_timer_fn, vcpu, 0);
+ init_timer(&vtm->vtm_timer, vtm_timer_fn, vcpu, 0);
vtm_reset(vcpu);
}
@@ -163,20 +163,20 @@ void vtm_set_itv(VCPU *vcpu)
local_irq_save(spsr);
itv = VCPU(vcpu, itv);
if ( ITV_IRQ_MASK(itv) )
- rem_ac_timer(&vtm->vtm_timer);
+ stop_timer(&vtm->vtm_timer);
vtm_interruption_update(vcpu, vtm);
local_irq_restore(spsr);
}
/*
- * Update interrupt or hook the vtm ac_timer for fire
+ * Update interrupt or hook the vtm timer for fire
* At this point vtm_timer should be removed if itv is masked.
*/
/* Interrupt must be disabled at this point */
extern u64 cycle_to_ns(u64 cyle);
-#define TIMER_SLOP (50*1000) /* ns */ /* copy from ac_timer.c */
+#define TIMER_SLOP (50*1000) /* ns */ /* copy from timer.c */
void vtm_interruption_update(VCPU *vcpu, vtime_t* vtm)
{
uint64_t cur_itc,vitm,vitv;
@@ -198,7 +198,7 @@ void vtm_interruption_update(VCPU *vcpu, vtime_t* vtm)
if ( diff_last >= 0 ) {
// interrupt already fired.
- rem_ac_timer(&vtm->vtm_timer);
+ stop_timer(&vtm->vtm_timer);
}
else if ( diff_now >= 0 ) {
// ITV is fired.
@@ -207,30 +207,30 @@ void vtm_interruption_update(VCPU *vcpu, vtime_t* vtm)
/* Both last_itc & cur_itc < itm, wait for fire condition */
else {
expires = NOW() + cycle_to_ns(0-diff_now) + TIMER_SLOP;
- set_ac_timer(&vtm->vtm_timer, expires);
+ set_timer(&vtm->vtm_timer, expires);
}
local_irq_restore(spsr);
}
/*
* Action for vtm when the domain is scheduled out.
- * Remove the ac_timer for vtm.
+ * Remove the timer for vtm.
*/
void vtm_domain_out(VCPU *vcpu)
{
- if(!is_idle_task(vcpu->domain))
- rem_ac_timer(&vcpu->arch.arch_vmx.vtm.vtm_timer);
+ if(!is_idle_domain(vcpu->domain))
+ stop_timer(&vcpu->arch.arch_vmx.vtm.vtm_timer);
}
/*
* Action for vtm when the domain is scheduled in.
- * Fire vtm IRQ or add the ac_timer for vtm.
+ * Fire vtm IRQ or add the timer for vtm.
*/
void vtm_domain_in(VCPU *vcpu)
{
vtime_t *vtm;
- if(!is_idle_task(vcpu->domain)) {
+ if(!is_idle_domain(vcpu->domain)) {
vtm=&(vcpu->arch.arch_vmx.vtm);
vtm_interruption_update(vcpu, vtm);
}
diff --git a/xen/arch/ia64/vmx/vmx_init.c b/xen/arch/ia64/vmx/vmx_init.c
index 0920b8c14c..788b7bc1a0 100644
--- a/xen/arch/ia64/vmx/vmx_init.c
+++ b/xen/arch/ia64/vmx/vmx_init.c
@@ -42,7 +42,7 @@
#include <xen/lib.h>
#include <asm/vmmu.h>
#include <public/arch-ia64.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#include <asm/vmx_phy_mode.h>
#include <asm/processor.h>
#include <asm/vmx.h>
diff --git a/xen/arch/ia64/vmx/vmx_process.c b/xen/arch/ia64/vmx/vmx_process.c
index a5fdcf87b3..99e4770091 100644
--- a/xen/arch/ia64/vmx/vmx_process.c
+++ b/xen/arch/ia64/vmx/vmx_process.c
@@ -231,7 +231,7 @@ void leave_hypervisor_tail(struct pt_regs *regs)
struct domain *d = current->domain;
struct vcpu *v = current;
// FIXME: Will this work properly if doing an RFI???
- if (!is_idle_task(d) ) { // always comes from guest
+ if (!is_idle_domain(d) ) { // always comes from guest
extern void vmx_dorfirfi(void);
struct pt_regs *user_regs = vcpu_regs(current);
if (local_softirq_pending())
diff --git a/xen/arch/ia64/vmx/vmx_support.c b/xen/arch/ia64/vmx/vmx_support.c
index 19ea7be6de..801eba6cf8 100644
--- a/xen/arch/ia64/vmx/vmx_support.c
+++ b/xen/arch/ia64/vmx/vmx_support.c
@@ -21,7 +21,7 @@
*/
#include <xen/config.h>
#include <xen/sched.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#include <asm/vmx.h>
#include <asm/vmx_vcpu.h>
diff --git a/xen/arch/ia64/xen/domain.c b/xen/arch/ia64/xen/domain.c
index 8c9dbada27..99b1646614 100644
--- a/xen/arch/ia64/xen/domain.c
+++ b/xen/arch/ia64/xen/domain.c
@@ -46,7 +46,7 @@
#include <asm/vmx_vcpu.h>
#include <asm/vmx_vpd.h>
#include <asm/pal.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#define CONFIG_DOMAIN0_CONTIGUOUS
unsigned long dom0_start = -1L;
@@ -87,7 +87,6 @@ static void continue_cpu_idle_loop(void)
int cpu = smp_processor_id();
for ( ; ; )
{
- printf ("idle%dD\n", cpu);
#ifdef IA64
// __IRQ_STAT(cpu, idle_timestamp) = jiffies
#else
@@ -146,15 +145,26 @@ struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id)
{
struct vcpu *v;
+ /* Still keep idle vcpu0 static allocated at compilation, due
+ * to some code from Linux still requires it in early phase.
+ */
+ if (is_idle_domain(d) && !vcpu_id)
+ return idle_vcpu[0];
+
if ((v = alloc_xenheap_pages(KERNEL_STACK_SIZE_ORDER)) == NULL)
return NULL;
memset(v, 0, sizeof(*v));
- memcpy(&v->arch, &idle0_vcpu.arch, sizeof(v->arch));
- v->arch.privregs =
+ memcpy(&v->arch, &idle_vcpu[0]->arch, sizeof(v->arch));
+
+ if (!is_idle_domain(d)) {
+ v->arch.privregs =
alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
+ BUG_ON(v->arch.privregs == NULL);
+ memset(v->arch.privregs, 0, PAGE_SIZE);
+ }
+
printf("arch_vcpu_info=%p\n", v->arch.privregs);
- memset(v->arch.privregs, 0, PAGE_SIZE);
return v;
}
@@ -191,6 +201,14 @@ int arch_do_createdomain(struct vcpu *v)
memset(ti, 0, sizeof(struct thread_info));
init_switch_stack(v);
+ // the following will eventually need to be negotiated dynamically
+ d->xen_vastart = XEN_START_ADDR;
+ d->xen_vaend = XEN_END_ADDR;
+ d->shared_info_va = SHAREDINFO_ADDR;
+
+ if (is_idle_vcpu(v))
+ return 0;
+
d->shared_info = (void *)alloc_xenheap_page();
if (!d->shared_info) {
printk("ERROR/HALTING: CAN'T ALLOC PAGE\n");
@@ -200,12 +218,7 @@ int arch_do_createdomain(struct vcpu *v)
if (v == d->vcpu[0])
memset(&d->shared_info->evtchn_mask[0], 0xff,
sizeof(d->shared_info->evtchn_mask));
-#if 0
- d->vcpu[0].arch.privregs =
- alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
- printf("arch_vcpu_info=%p\n", d->vcpu[0].arch.privregs);
- memset(d->vcpu.arch.privregs, 0, PAGE_SIZE);
-#endif
+
v->vcpu_info = &(d->shared_info->vcpu_info[0]);
d->max_pages = (128UL*1024*1024)/PAGE_SIZE; // 128MB default // FIXME
@@ -227,28 +240,21 @@ int arch_do_createdomain(struct vcpu *v)
BUG();
v->arch.starting_rid = d->arch.starting_rid;
v->arch.ending_rid = d->arch.ending_rid;
- // the following will eventually need to be negotiated dynamically
- d->xen_vastart = XEN_START_ADDR;
- d->xen_vaend = XEN_END_ADDR;
- d->shared_info_va = SHAREDINFO_ADDR;
d->arch.breakimm = 0x1000;
v->arch.breakimm = d->arch.breakimm;
d->arch.sys_pgnr = 0;
- if (d->domain_id != IDLE_DOMAIN_ID) {
- d->arch.mm = xmalloc(struct mm_struct);
- if (unlikely(!d->arch.mm)) {
- printk("Can't allocate mm_struct for domain %d\n",d->domain_id);
- return -ENOMEM;
- }
- memset(d->arch.mm, 0, sizeof(*d->arch.mm));
- d->arch.mm->pgd = pgd_alloc(d->arch.mm);
- if (unlikely(!d->arch.mm->pgd)) {
- printk("Can't allocate pgd for domain %d\n",d->domain_id);
- return -ENOMEM;
- }
- } else
- d->arch.mm = NULL;
+ d->arch.mm = xmalloc(struct mm_struct);
+ if (unlikely(!d->arch.mm)) {
+ printk("Can't allocate mm_struct for domain %d\n",d->domain_id);
+ return -ENOMEM;
+ }
+ memset(d->arch.mm, 0, sizeof(*d->arch.mm));
+ d->arch.mm->pgd = pgd_alloc(d->arch.mm);
+ if (unlikely(!d->arch.mm->pgd)) {
+ printk("Can't allocate pgd for domain %d\n",d->domain_id);
+ return -ENOMEM;
+ }
printf ("arch_do_create_domain: domain=%p\n", d);
return 0;
@@ -1070,15 +1076,6 @@ void domain_pend_keyboard_interrupt(int irq)
vcpu_pend_interrupt(dom0->vcpu[0],irq);
}
-void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
-{
- if ( v->processor == newcpu )
- return;
-
- set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
- v->processor = newcpu;
-}
-
void sync_vcpu_execstate(struct vcpu *v)
{
ia64_save_fpu(v->arch._thread.fph);
diff --git a/xen/arch/ia64/xen/idle0_task.c b/xen/arch/ia64/xen/idle0_task.c
index bfb49f7591..bc7aeed28a 100644
--- a/xen/arch/ia64/xen/idle0_task.c
+++ b/xen/arch/ia64/xen/idle0_task.c
@@ -11,31 +11,15 @@
.mmlist = LIST_HEAD_INIT(name.mmlist), \
}
-#define IDLE0_EXEC_DOMAIN(_ed,_d) \
+#define IDLE_VCPU(_v) \
{ \
processor: 0, \
- mm: 0, \
- thread: INIT_THREAD, \
- domain: (_d) \
-}
-
-#define IDLE0_DOMAIN(_t) \
-{ \
- domain_id: IDLE_DOMAIN_ID, \
- domain_flags:DOMF_idle_domain, \
- refcnt: ATOMIC_INIT(1) \
+ domain: 0 \
}
struct mm_struct init_mm = INIT_MM(init_mm);
EXPORT_SYMBOL(init_mm);
-struct domain idle0_domain = IDLE0_DOMAIN(idle0_domain);
-#if 0
-struct vcpu idle0_vcpu = IDLE0_EXEC_DOMAIN(idle0_vcpu,
- &idle0_domain);
-#endif
-
-
/*
* Initial task structure.
*
@@ -44,15 +28,12 @@ struct vcpu idle0_vcpu = IDLE0_EXEC_DOMAIN(idle0_vcpu,
*/
union {
struct {
- struct domain task;
+ struct vcpu task;
} s;
unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)];
-} init_task_mem asm ("init_task") __attribute__((section(".data.init_task")));
-// = {{
- ;
-//.task = IDLE0_EXEC_DOMAIN(init_task_mem.s.task,&idle0_domain),
-//};
-//};
+} init_task_mem asm ("init_task") __attribute__((section(".data.init_task"))) = {{
+ .task = IDLE_VCPU(init_task_mem.s.task)
+}};
EXPORT_SYMBOL(init_task);
diff --git a/xen/arch/ia64/xen/process.c b/xen/arch/ia64/xen/process.c
index ad5be5ba81..e1da875cdb 100644
--- a/xen/arch/ia64/xen/process.c
+++ b/xen/arch/ia64/xen/process.c
@@ -65,26 +65,16 @@ long do_iopl(domid_t domain, unsigned int new_io_pl)
extern struct schedule_data schedule_data[NR_CPUS];
-void schedule_tail(struct vcpu *next)
+void schedule_tail(struct vcpu *prev)
{
- unsigned long rr7;
- //printk("current=%lx,shared_info=%lx\n",current,current->vcpu_info);
- //printk("next=%lx,shared_info=%lx\n",next,next->vcpu_info);
-
- // TG: Real HACK FIXME.
- // This is currently necessary because when a new domain is started,
- // the context_switch function of xen/common/schedule.c(__enter_scheduler)
- // never returns. Therefore, the lock must be released.
- // schedule_tail is only called when a domain is started.
- spin_unlock_irq(&schedule_data[current->processor].schedule_lock);
-
- /* rr7 will be postponed to last point when resuming back to guest */
- if(VMX_DOMAIN(current)){
- vmx_load_all_rr(current);
- }else{
- load_region_regs(current);
- vcpu_load_kernel_regs(current);
- }
+ context_saved(prev);
+
+ if (VMX_DOMAIN(current)) {
+ vmx_load_all_rr(current);
+ } else {
+ load_region_regs(current);
+ vcpu_load_kernel_regs(current);
+ }
}
void tdpfoo(void) { }
@@ -252,7 +242,7 @@ void deliver_pending_interrupt(struct pt_regs *regs)
struct domain *d = current->domain;
struct vcpu *v = current;
// FIXME: Will this work properly if doing an RFI???
- if (!is_idle_task(d) && user_mode(regs)) {
+ if (!is_idle_domain(d) && user_mode(regs)) {
//vcpu_poke_timer(v);
if (vcpu_deliverable_interrupts(v))
reflect_extint(regs);
diff --git a/xen/arch/ia64/xen/vcpu.c b/xen/arch/ia64/xen/vcpu.c
index 2d62bdf86e..4e56524dd9 100644
--- a/xen/arch/ia64/xen/vcpu.c
+++ b/xen/arch/ia64/xen/vcpu.c
@@ -1085,7 +1085,7 @@ void vcpu_set_next_timer(VCPU *vcpu)
/* gloss over the wraparound problem for now... we know it exists
* but it doesn't matter right now */
- if (is_idle_task(vcpu->domain)) {
+ if (is_idle_domain(vcpu->domain)) {
// printf("****** vcpu_set_next_timer called during idle!!\n");
vcpu_safe_set_itm(s);
return;
diff --git a/xen/arch/ia64/xen/xenmisc.c b/xen/arch/ia64/xen/xenmisc.c
index cb8349e1d3..eaddbee41c 100644
--- a/xen/arch/ia64/xen/xenmisc.c
+++ b/xen/arch/ia64/xen/xenmisc.c
@@ -75,7 +75,7 @@ struct pt_regs *guest_cpu_user_regs(void) { return vcpu_regs(current); }
void raise_actimer_softirq(void)
{
- raise_softirq(AC_TIMER_SOFTIRQ);
+ raise_softirq(TIMER_SOFTIRQ);
}
unsigned long
@@ -320,18 +320,15 @@ if (!i--) { printk("+",id); i = 1000000; }
ia64_set_iva(&ia64_ivt);
ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
VHPT_ENABLED);
- if (!is_idle_task(current->domain)) {
+ if (!is_idle_domain(current->domain)) {
load_region_regs(current);
vcpu_load_kernel_regs(current);
if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
}
if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
}
-}
-void context_switch_finalise(struct vcpu *next)
-{
- /* nothing to do */
+ context_saved(prev);
}
void continue_running(struct vcpu *same)
diff --git a/xen/arch/ia64/xen/xensetup.c b/xen/arch/ia64/xen/xensetup.c
index 79d33cdc53..1b9dc72b75 100644
--- a/xen/arch/ia64/xen/xensetup.c
+++ b/xen/arch/ia64/xen/xensetup.c
@@ -27,7 +27,7 @@ unsigned long xenheap_phys_end;
char saved_command_line[COMMAND_LINE_SIZE];
-struct vcpu *idle_task[NR_CPUS] = { &idle0_vcpu };
+struct vcpu *idle_vcpu[NR_CPUS];
cpumask_t cpu_present_map;
@@ -157,16 +157,12 @@ void start_kernel(void)
unsigned long dom0_memory_start, dom0_memory_size;
unsigned long dom0_initrd_start, dom0_initrd_size;
unsigned long initial_images_start, initial_images_end;
+ struct domain *idle_domain;
running_on_sim = is_platform_hp_ski();
/* Kernel may be relocated by EFI loader */
xen_pstart = ia64_tpa(KERNEL_START);
- /* Must do this early -- e.g., spinlocks rely on get_current(). */
- //set_current(&idle0_vcpu);
- ia64_r13 = (void *)&idle0_vcpu;
- idle0_vcpu.domain = &idle0_domain;
-
early_setup_arch(&cmdline);
/* We initialise the serial devices very early so we can get debugging. */
@@ -282,18 +278,22 @@ void start_kernel(void)
(xenheap_phys_end-__pa(heap_start)) >> 20,
(xenheap_phys_end-__pa(heap_start)) >> 10);
+printk("About to call scheduler_init()\n");
+ scheduler_init();
+ idle_vcpu[0] = (struct vcpu*) ia64_r13;
+ idle_domain = do_createdomain(IDLE_DOMAIN_ID, 0);
+ BUG_ON(idle_domain == NULL);
+
late_setup_arch(&cmdline);
setup_per_cpu_areas();
mem_init();
-printk("About to call scheduler_init()\n");
- scheduler_init();
local_irq_disable();
init_IRQ ();
printk("About to call init_xen_time()\n");
init_xen_time(); /* initialise the time */
-printk("About to call ac_timer_init()\n");
- ac_timer_init();
+printk("About to call timer_init()\n");
+ timer_init();
#ifdef CONFIG_XEN_CONSOLE_INPUT /* CONFIG_SERIAL_8250_CONSOLE=n in dom0! */
initialize_keytable();
@@ -309,14 +309,10 @@ printk("About to call ac_timer_init()\n");
}
smp_prepare_cpus(max_cpus);
-
/* We aren't hotplug-capable yet. */
- //BUG_ON(!cpus_empty(cpu_present_map));
for_each_cpu ( i )
cpu_set(i, cpu_present_map);
- //BUG_ON(!local_irq_is_enabled());
-
/* Enable IRQ to receive IPI (needed for ITC sync). */
local_irq_enable();
@@ -345,12 +341,7 @@ printk("About to call sort_main_extable()\n");
/* Create initial domain 0. */
printk("About to call do_createdomain()\n");
dom0 = do_createdomain(0, 0);
- init_task.domain = &idle0_domain;
- init_task.processor = 0;
-// init_task.mm = &init_mm;
- init_task.domain->arch.mm = &init_mm;
-// init_task.thread = INIT_THREAD;
- //arch_do_createdomain(current);
+
#ifdef CLONE_DOMAIN0
{
int i;
@@ -383,8 +374,7 @@ printk("About to call do_createdomain()\n");
panic("Could not set up DOM0 guest OS\n");
/* PIN domain0 on CPU 0. */
- dom0->vcpu[0]->cpumap=1;
- set_bit(_VCPUF_cpu_pinned, &dom0->vcpu[0]->vcpu_flags);
+ dom0->vcpu[0]->cpu_affinity = cpumask_of_cpu(0);
#ifdef CLONE_DOMAIN0
{
@@ -433,8 +423,8 @@ printk("About to call init_trace_bufs()\n");
local_irq_enable();
- printf("About to call schedulers_start dom0=%p, idle0_dom=%p\n",
- dom0, &idle0_domain);
+ printf("About to call schedulers_start dom0=%p, idle_dom=%p\n",
+ dom0, &idle_domain);
schedulers_start();
domain_unpause_by_systemcontroller(dom0);
diff --git a/xen/arch/ia64/xen/xentime.c b/xen/arch/ia64/xen/xentime.c
index 1b15fb12c7..f407509f3c 100644
--- a/xen/arch/ia64/xen/xentime.c
+++ b/xen/arch/ia64/xen/xentime.c
@@ -127,7 +127,7 @@ xen_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
vcpu_wake(dom0->vcpu[0]);
}
}
- if (!is_idle_task(current->domain)) {
+ if (!is_idle_domain(current->domain)) {
if (vcpu_timer_expired(current)) {
vcpu_pend_timer(current);
// ensure another timer interrupt happens even if domain doesn't
@@ -196,7 +196,7 @@ xen_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
//#endif
/* double check, in case we got hit by a (slow) PMI: */
} while (time_after_eq(ia64_get_itc(), new_itm));
- raise_softirq(AC_TIMER_SOFTIRQ);
+ raise_softirq(TIMER_SOFTIRQ);
return IRQ_HANDLED;
}
@@ -235,7 +235,7 @@ int __init init_xen_time()
return 0;
}
-int reprogram_ac_timer(s_time_t timeout)
+int reprogram_timer(s_time_t timeout)
{
struct vcpu *v = current;
s_time_t expire;
diff --git a/xen/arch/x86/apic.c b/xen/arch/x86/apic.c
index 1a3d5f591e..7eb6000b37 100644
--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -870,7 +870,7 @@ void enable_APIC_timer(void)
* returns 1 on success
* returns 0 if the timeout value is too small or in the past.
*/
-int reprogram_ac_timer(s_time_t timeout)
+int reprogram_timer(s_time_t timeout)
{
s_time_t now;
s_time_t expire;
@@ -931,7 +931,7 @@ void smp_apic_timer_interrupt(struct cpu_user_regs * regs)
{
ack_APIC_irq();
perfc_incrc(apic_timer);
- raise_softirq(AC_TIMER_SOFTIRQ);
+ raise_softirq(TIMER_SOFTIRQ);
}
/*
diff --git a/xen/arch/x86/boot/x86_32.S b/xen/arch/x86/boot/x86_32.S
index b98e1c72bc..5534b2621b 100644
--- a/xen/arch/x86/boot/x86_32.S
+++ b/xen/arch/x86/boot/x86_32.S
@@ -100,7 +100,7 @@ __start:
1: stosl /* low mappings cover as much physmem as possible */
add $4,%edi
add $(1<<L2_PAGETABLE_SHIFT),%eax
- cmp $__HYPERVISOR_VIRT_START+0xe3,%eax
+ cmp $HYPERVISOR_VIRT_START+0xe3,%eax
jne 1b
#else
/* Initialize low and high mappings of all memory with 4MB pages */
@@ -113,7 +113,7 @@ __start:
jne 1b
1: stosl /* low mappings cover as much physmem as possible */
add $(1<<L2_PAGETABLE_SHIFT),%eax
- cmp $__HYPERVISOR_VIRT_START+0xe3,%eax
+ cmp $HYPERVISOR_VIRT_START+0xe3,%eax
jne 1b
#endif
diff --git a/xen/arch/x86/dm/i8259.c b/xen/arch/x86/dm/i8259.c
index c0d735dc52..8a27835e9f 100644
--- a/xen/arch/x86/dm/i8259.c
+++ b/xen/arch/x86/dm/i8259.c
@@ -29,7 +29,7 @@
#include <xen/lib.h>
#include <xen/errno.h>
#include <xen/sched.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#include <asm/vmx.h>
#include <asm/vmx_vpic.h>
#include <asm/current.h>
diff --git a/xen/arch/x86/dm/vmx_vioapic.c b/xen/arch/x86/dm/vmx_vioapic.c
index 769eb59f22..201788e858 100644
--- a/xen/arch/x86/dm/vmx_vioapic.c
+++ b/xen/arch/x86/dm/vmx_vioapic.c
@@ -37,7 +37,7 @@
#include <xen/lib.h>
#include <xen/errno.h>
#include <xen/sched.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#include <asm/vmx.h>
#include <asm/vmx_vpic.h>
#include <asm/current.h>
diff --git a/xen/arch/x86/dom0_ops.c b/xen/arch/x86/dom0_ops.c
index 5a4f493ce0..1ee7efd37b 100644
--- a/xen/arch/x86/dom0_ops.c
+++ b/xen/arch/x86/dom0_ops.c
@@ -36,13 +36,13 @@ static unsigned long msr_hi;
static void write_msr_for(void *unused)
{
- if ( ((1 << current->processor) & msr_cpu_mask) )
+ if ( ((1 << smp_processor_id()) & msr_cpu_mask) )
(void)wrmsr_user(msr_addr, msr_lo, msr_hi);
}
static void read_msr_for(void *unused)
{
- if ( ((1 << current->processor) & msr_cpu_mask) )
+ if ( ((1 << smp_processor_id()) & msr_cpu_mask) )
(void)rdmsr_user(msr_addr, msr_lo, msr_hi);
}
@@ -103,12 +103,27 @@ long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op)
op->u.add_memtype.nr_pfns,
op->u.add_memtype.type,
1);
+ if (ret > 0)
+ {
+ (void)__put_user(0, &u_dom0_op->u.add_memtype.handle);
+ (void)__put_user(ret, &u_dom0_op->u.add_memtype.reg);
+ ret = 0;
+ }
}
break;
case DOM0_DEL_MEMTYPE:
{
- ret = mtrr_del_page(op->u.del_memtype.reg, 0, 0);
+ if (op->u.del_memtype.handle == 0
+ /* mtrr/main.c otherwise does a lookup */
+ && (int)op->u.del_memtype.reg >= 0)
+ {
+ ret = mtrr_del_page(op->u.del_memtype.reg, 0, 0);
+ if (ret > 0)
+ ret = 0;
+ }
+ else
+ ret = -EINVAL;
}
break;
@@ -179,7 +194,7 @@ long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op)
memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4);
ret = 0;
if ( copy_to_user(u_dom0_op, op, sizeof(*op)) )
- ret = -EFAULT;
+ ret = -EFAULT;
}
break;
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index d905f9dfbf..19c29d084c 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -46,17 +46,16 @@ boolean_param("noreboot", opt_noreboot);
struct percpu_ctxt {
struct vcpu *curr_vcpu;
- unsigned int context_not_finalised;
unsigned int dirty_segment_mask;
} __cacheline_aligned;
static struct percpu_ctxt percpu_ctxt[NR_CPUS];
-static void continue_idle_task(struct vcpu *v)
+static void continue_idle_domain(struct vcpu *v)
{
reset_stack_and_jump(idle_loop);
}
-static void continue_nonidle_task(struct vcpu *v)
+static void continue_nonidle_domain(struct vcpu *v)
{
reset_stack_and_jump(ret_from_intr);
}
@@ -92,10 +91,9 @@ void startup_cpu_idle_loop(void)
{
struct vcpu *v = current;
- ASSERT(is_idle_task(v->domain));
- percpu_ctxt[smp_processor_id()].curr_vcpu = v;
- cpu_set(smp_processor_id(), v->domain->cpumask);
- v->arch.schedule_tail = continue_idle_task;
+ ASSERT(is_idle_vcpu(v));
+ cpu_set(smp_processor_id(), v->domain->domain_dirty_cpumask);
+ cpu_set(smp_processor_id(), v->vcpu_dirty_cpumask);
reset_stack_and_jump(idle_loop);
}
@@ -217,14 +215,20 @@ struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id)
memset(v, 0, sizeof(*v));
- memcpy(&v->arch, &idle0_vcpu.arch, sizeof(v->arch));
+ memcpy(&v->arch, &idle_vcpu[0]->arch, sizeof(v->arch));
v->arch.flags = TF_kernel_mode;
+ if ( is_idle_domain(d) )
+ {
+ percpu_ctxt[vcpu_id].curr_vcpu = v;
+ v->arch.schedule_tail = continue_idle_domain;
+ }
+
if ( (v->vcpu_id = vcpu_id) != 0 )
{
v->arch.schedule_tail = d->vcpu[0]->arch.schedule_tail;
v->arch.perdomain_ptes =
- d->arch.mm_perdomain_pt + (vcpu_id << PDPT_VCPU_SHIFT);
+ d->arch.mm_perdomain_pt + (vcpu_id << GDT_LDT_VCPU_SHIFT);
}
return v;
@@ -259,32 +263,11 @@ int arch_do_createdomain(struct vcpu *v)
int i;
#endif
- if ( is_idle_task(d) )
- return 0;
-
- d->arch.ioport_caps =
- rangeset_new(d, "I/O Ports", RANGESETF_prettyprint_hex);
- if ( d->arch.ioport_caps == NULL )
- return -ENOMEM;
-
- if ( (d->shared_info = alloc_xenheap_page()) == NULL )
- return -ENOMEM;
-
- if ( (rc = ptwr_init(d)) != 0 )
- {
- free_xenheap_page(d->shared_info);
- return rc;
- }
-
- v->arch.schedule_tail = continue_nonidle_task;
-
- memset(d->shared_info, 0, PAGE_SIZE);
- v->vcpu_info = &d->shared_info->vcpu_info[v->vcpu_id];
- v->cpumap = CPUMAP_RUNANYWHERE;
- SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
-
pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t));
d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order);
+ if ( d->arch.mm_perdomain_pt == NULL )
+ goto fail_nomem;
+
memset(d->arch.mm_perdomain_pt, 0, PAGE_SIZE << pdpt_order);
v->arch.perdomain_ptes = d->arch.mm_perdomain_pt;
@@ -297,49 +280,73 @@ int arch_do_createdomain(struct vcpu *v)
*/
gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
- d->arch.mm_perdomain_pt[
- (vcpuid << PDPT_VCPU_SHIFT) + FIRST_RESERVED_GDT_PAGE] = gdt_l1e;
+ d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
+ FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
v->arch.guest_vtable = __linear_l2_table;
v->arch.shadow_vtable = __shadow_linear_l2_table;
-#ifdef __x86_64__
+#if defined(__i386__)
+
+ mapcache_init(d);
+
+#else /* __x86_64__ */
+
v->arch.guest_vl3table = __linear_l3_table;
v->arch.guest_vl4table = __linear_l4_table;
d->arch.mm_perdomain_l2 = alloc_xenheap_page();
+ d->arch.mm_perdomain_l3 = alloc_xenheap_page();
+ if ( (d->arch.mm_perdomain_l2 == NULL) ||
+ (d->arch.mm_perdomain_l3 == NULL) )
+ goto fail_nomem;
+
memset(d->arch.mm_perdomain_l2, 0, PAGE_SIZE);
for ( i = 0; i < (1 << pdpt_order); i++ )
d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)+i] =
l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt)+i,
__PAGE_HYPERVISOR);
- d->arch.mm_perdomain_l3 = alloc_xenheap_page();
memset(d->arch.mm_perdomain_l3, 0, PAGE_SIZE);
d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] =
l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2),
__PAGE_HYPERVISOR);
-#endif
+
+#endif /* __x86_64__ */
shadow_lock_init(d);
INIT_LIST_HEAD(&d->arch.free_shadow_frames);
- return 0;
-}
+ if ( !is_idle_domain(d) )
+ {
+ d->arch.ioport_caps =
+ rangeset_new(d, "I/O Ports", RANGESETF_prettyprint_hex);
+ if ( d->arch.ioport_caps == NULL )
+ goto fail_nomem;
-void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
-{
- if ( v->processor == newcpu )
- return;
+ if ( (d->shared_info = alloc_xenheap_page()) == NULL )
+ goto fail_nomem;
- set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
- v->processor = newcpu;
+ if ( (rc = ptwr_init(d)) != 0 )
+ goto fail_nomem;
- if ( VMX_DOMAIN(v) )
- {
- __vmpclear(virt_to_phys(v->arch.arch_vmx.vmcs));
- v->arch.schedule_tail = arch_vmx_do_relaunch;
+ memset(d->shared_info, 0, PAGE_SIZE);
+ v->vcpu_info = &d->shared_info->vcpu_info[v->vcpu_id];
+ SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
+
+ v->arch.schedule_tail = continue_nonidle_domain;
}
+
+ return 0;
+
+ fail_nomem:
+ free_xenheap_page(d->shared_info);
+#ifdef __x86_64__
+ free_xenheap_page(d->arch.mm_perdomain_l2);
+ free_xenheap_page(d->arch.mm_perdomain_l3);
+#endif
+ free_xenheap_pages(d->arch.mm_perdomain_pt, pdpt_order);
+ return -ENOMEM;
}
/* This is called by arch_final_setup_guest and do_boot_vcpu */
@@ -473,14 +480,6 @@ void new_thread(struct vcpu *d,
#ifdef __x86_64__
-void toggle_guest_mode(struct vcpu *v)
-{
- v->arch.flags ^= TF_kernel_mode;
- __asm__ __volatile__ ( "swapgs" );
- update_pagetables(v);
- write_ptbase(v);
-}
-
#define loadsegment(seg,value) ({ \
int __r = 1; \
__asm__ __volatile__ ( \
@@ -650,35 +649,6 @@ static void save_segments(struct vcpu *v)
percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask;
}
-long do_switch_to_user(void)
-{
- struct cpu_user_regs *regs = guest_cpu_user_regs();
- struct switch_to_user stu;
- struct vcpu *v = current;
-
- if ( unlikely(copy_from_user(&stu, (void *)regs->rsp, sizeof(stu))) ||
- unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) )
- return -EFAULT;
-
- toggle_guest_mode(v);
-
- regs->rip = stu.rip;
- regs->cs = stu.cs | 3; /* force guest privilege */
- regs->rflags = (stu.rflags & ~(EF_IOPL|EF_VM)) | EF_IE;
- regs->rsp = stu.rsp;
- regs->ss = stu.ss | 3; /* force guest privilege */
-
- if ( !(stu.flags & VGCF_IN_SYSCALL) )
- {
- regs->entry_vector = 0;
- regs->r11 = stu.r11;
- regs->rcx = stu.rcx;
- }
-
- /* Saved %rax gets written back to regs->rax in entry.S. */
- return stu.rax;
-}
-
#define switch_kernel_stack(_n,_c) ((void)0)
#elif defined(__i386__)
@@ -705,7 +675,10 @@ static void __context_switch(void)
struct vcpu *p = percpu_ctxt[cpu].curr_vcpu;
struct vcpu *n = current;
- if ( !is_idle_task(p->domain) )
+ ASSERT(p != n);
+ ASSERT(cpus_empty(n->vcpu_dirty_cpumask));
+
+ if ( !is_idle_vcpu(p) )
{
memcpy(&p->arch.guest_context.user_regs,
stack_regs,
@@ -714,7 +687,7 @@ static void __context_switch(void)
save_segments(p);
}
- if ( !is_idle_task(n->domain) )
+ if ( !is_idle_vcpu(n) )
{
memcpy(stack_regs,
&n->arch.guest_context.user_regs,
@@ -740,7 +713,8 @@ static void __context_switch(void)
}
if ( p->domain != n->domain )
- cpu_set(cpu, n->domain->cpumask);
+ cpu_set(cpu, n->domain->domain_dirty_cpumask);
+ cpu_set(cpu, n->vcpu_dirty_cpumask);
write_ptbase(n);
@@ -753,7 +727,8 @@ static void __context_switch(void)
}
if ( p->domain != n->domain )
- cpu_clear(cpu, p->domain->cpumask);
+ cpu_clear(cpu, p->domain->domain_dirty_cpumask);
+ cpu_clear(cpu, p->vcpu_dirty_cpumask);
percpu_ctxt[cpu].curr_vcpu = n;
}
@@ -762,29 +737,32 @@ static void __context_switch(void)
void context_switch(struct vcpu *prev, struct vcpu *next)
{
unsigned int cpu = smp_processor_id();
+ cpumask_t dirty_mask = next->vcpu_dirty_cpumask;
- ASSERT(!local_irq_is_enabled());
-
- set_current(next);
+ ASSERT(local_irq_is_enabled());
- if ( (percpu_ctxt[cpu].curr_vcpu != next) && !is_idle_task(next->domain) )
+ /* Allow at most one CPU at a time to be dirty. */
+ ASSERT(cpus_weight(dirty_mask) <= 1);
+ if ( unlikely(!cpu_isset(cpu, dirty_mask) && !cpus_empty(dirty_mask)) )
{
- __context_switch();
- percpu_ctxt[cpu].context_not_finalised = 1;
+ /* Other cpus call __sync_lazy_execstate from flush ipi handler. */
+ flush_tlb_mask(dirty_mask);
}
-}
-void context_switch_finalise(struct vcpu *next)
-{
- unsigned int cpu = smp_processor_id();
+ local_irq_disable();
- ASSERT(local_irq_is_enabled());
+ set_current(next);
- if ( percpu_ctxt[cpu].context_not_finalised )
+ if ( (percpu_ctxt[cpu].curr_vcpu == next) || is_idle_vcpu(next) )
+ {
+ local_irq_enable();
+ }
+ else
{
- percpu_ctxt[cpu].context_not_finalised = 0;
+ __context_switch();
- BUG_ON(percpu_ctxt[cpu].curr_vcpu != next);
+ /* Re-enable interrupts before restoring state which may fault. */
+ local_irq_enable();
if ( VMX_DOMAIN(next) )
{
@@ -798,6 +776,8 @@ void context_switch_finalise(struct vcpu *next)
}
}
+ context_saved(prev);
+
schedule_tail(next);
BUG();
}
@@ -827,20 +807,11 @@ int __sync_lazy_execstate(void)
void sync_vcpu_execstate(struct vcpu *v)
{
- unsigned int cpu = v->processor;
-
- if ( !cpu_isset(cpu, v->domain->cpumask) )
- return;
-
- if ( cpu == smp_processor_id() )
- {
+ if ( cpu_isset(smp_processor_id(), v->vcpu_dirty_cpumask) )
(void)__sync_lazy_execstate();
- }
- else
- {
- /* Other cpus call __sync_lazy_execstate from flush ipi handler. */
- flush_tlb_mask(cpumask_of_cpu(cpu));
- }
+
+ /* Other cpus call __sync_lazy_execstate from flush ipi handler. */
+ flush_tlb_mask(v->vcpu_dirty_cpumask);
}
unsigned long __hypercall_create_continuation(
@@ -966,7 +937,7 @@ void domain_relinquish_resources(struct domain *d)
struct vcpu *v;
unsigned long pfn;
- BUG_ON(!cpus_empty(d->cpumask));
+ BUG_ON(!cpus_empty(d->domain_dirty_cpumask));
ptwr_destroy(d);
diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
index d08f2c12fb..84d84a66cf 100644
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -366,27 +366,20 @@ int construct_dom0(struct domain *d,
l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] =
l2e_from_paddr((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR);
}
- {
- unsigned long va;
- for (va = PERDOMAIN_VIRT_START; va < PERDOMAIN_VIRT_END;
- va += (1 << L2_PAGETABLE_SHIFT)) {
- l2tab[va >> L2_PAGETABLE_SHIFT] =
- l2e_from_paddr(__pa(d->arch.mm_perdomain_pt) +
- (va-PERDOMAIN_VIRT_START),
- __PAGE_HYPERVISOR);
- }
- }
v->arch.guest_table = mk_pagetable((unsigned long)l3start);
#else
l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
l2e_from_paddr((unsigned long)l2start, __PAGE_HYPERVISOR);
- l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
- l2e_from_paddr(__pa(d->arch.mm_perdomain_pt), __PAGE_HYPERVISOR);
v->arch.guest_table = mk_pagetable((unsigned long)l2start);
#endif
+ for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+ l2tab[l2_linear_offset(PERDOMAIN_VIRT_START) + i] =
+ l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt) + i,
+ __PAGE_HYPERVISOR);
+
l2tab += l2_linear_offset(dsi.v_start);
mfn = alloc_spfn;
for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
diff --git a/xen/arch/x86/idle0_task.c b/xen/arch/x86/idle0_task.c
deleted file mode 100644
index b876c619ef..0000000000
--- a/xen/arch/x86/idle0_task.c
+++ /dev/null
@@ -1,27 +0,0 @@
-
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <asm/desc.h>
-
-struct domain idle0_domain = {
- domain_id: IDLE_DOMAIN_ID,
- domain_flags:DOMF_idle_domain,
- refcnt: ATOMIC_INIT(1)
-};
-
-struct vcpu idle0_vcpu = {
- processor: 0,
- domain: &idle0_domain
-};
-
-struct tss_struct init_tss[NR_CPUS];
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c
index 7dd6bd590a..841bd10a03 100644
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -1807,3 +1807,47 @@ int ioapic_guest_write(int apicid, int address, u32 val)
return 0;
}
+
+void dump_ioapic_irq_info(void)
+{
+ struct irq_pin_list *entry;
+ struct IO_APIC_route_entry rte;
+ unsigned int irq, pin, printed = 0;
+ unsigned long flags;
+
+ for ( irq = 0; irq < NR_IRQS; irq++ )
+ {
+ entry = &irq_2_pin[irq];
+ if ( entry->pin == -1 )
+ continue;
+
+ if ( !printed++ )
+ printk("IO-APIC interrupt information:\n");
+
+ printk(" IRQ%3d Vec%3d:\n", irq, irq_to_vector(irq));
+
+ for ( ; ; )
+ {
+ pin = entry->pin;
+
+ printk(" Apic 0x%02x, Pin %2d: ", entry->apic, pin);
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(((int *)&rte) + 0) = io_apic_read(entry->apic, 0x10 + 2 * pin);
+ *(((int *)&rte) + 1) = io_apic_read(entry->apic, 0x11 + 2 * pin);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ printk("vector=%u, delivery_mode=%u, dest_mode=%s, "
+ "delivery_status=%d, polarity=%d, irr=%d, "
+ "trigger=%s, mask=%d\n",
+ rte.vector, rte.delivery_mode,
+ rte.dest_mode ? "logical" : "physical",
+ rte.delivery_status, rte.polarity, rte.irr,
+ rte.trigger ? "level" : "edge", rte.mask);
+
+ if ( entry->next == 0 )
+ break;
+ entry = &irq_2_pin[entry->next];
+ }
+ }
+}
diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
index a1aee360c3..d81d8749a6 100644
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -12,6 +12,7 @@
#include <xen/irq.h>
#include <xen/perfc.h>
#include <xen/sched.h>
+#include <xen/keyhandler.h>
#include <asm/current.h>
#include <asm/smpboot.h>
@@ -198,15 +199,21 @@ int pirq_guest_unmask(struct domain *d)
int pirq_guest_bind(struct vcpu *v, int irq, int will_share)
{
- unsigned int vector = irq_to_vector(irq);
- irq_desc_t *desc = &irq_desc[vector];
+ unsigned int vector;
+ irq_desc_t *desc;
irq_guest_action_t *action;
unsigned long flags;
int rc = 0;
cpumask_t cpumask = CPU_MASK_NONE;
+ if ( (irq < 0) || (irq >= NR_IRQS) )
+ return -EINVAL;
+
+ vector = irq_to_vector(irq);
if ( vector == 0 )
- return -EBUSY;
+ return -EINVAL;
+
+ desc = &irq_desc[vector];
spin_lock_irqsave(&desc->lock, flags);
@@ -305,3 +312,71 @@ int pirq_guest_unbind(struct domain *d, int irq)
spin_unlock_irqrestore(&desc->lock, flags);
return 0;
}
+
+extern void dump_ioapic_irq_info(void);
+
+static void dump_irqs(unsigned char key)
+{
+ int i, irq, vector;
+ irq_desc_t *desc;
+ irq_guest_action_t *action;
+ struct domain *d;
+ unsigned long flags;
+
+ printk("Guest interrupt information:\n");
+
+ for ( irq = 0; irq < NR_IRQS; irq++ )
+ {
+ vector = irq_to_vector(irq);
+ if ( vector == 0 )
+ continue;
+
+ desc = &irq_desc[vector];
+
+ spin_lock_irqsave(&desc->lock, flags);
+
+ if ( desc->status & IRQ_GUEST )
+ {
+ action = (irq_guest_action_t *)desc->action;
+
+ printk(" IRQ%3d Vec%3d: type=%-15s status=%08x "
+ "in-flight=%d domain-list=",
+ irq, vector, desc->handler->typename,
+ desc->status, action->in_flight);
+
+ for ( i = 0; i < action->nr_guests; i++ )
+ {
+ d = action->guest[i];
+ printk("%u(%c%c%c%c)",
+ d->domain_id,
+ (test_bit(d->pirq_to_evtchn[irq],
+ &d->shared_info->evtchn_pending[0]) ?
+ 'P' : '-'),
+ (test_bit(d->pirq_to_evtchn[irq]/BITS_PER_LONG,
+ &d->shared_info->vcpu_info[0].
+ evtchn_pending_sel) ?
+ 'S' : '-'),
+ (test_bit(d->pirq_to_evtchn[irq],
+ &d->shared_info->evtchn_mask[0]) ?
+ 'M' : '-'),
+ (test_bit(irq, &d->pirq_mask) ?
+ 'M' : '-'));
+ if ( i != action->nr_guests )
+ printk(",");
+ }
+
+ printk("\n");
+ }
+
+ spin_unlock_irqrestore(&desc->lock, flags);
+ }
+
+ dump_ioapic_irq_info();
+}
+
+static int __init setup_dump_irqs(void)
+{
+ register_keyhandler('i', dump_irqs, "dump interrupt bindings");
+ return 0;
+}
+__initcall(setup_dump_irqs);
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 683c4b7534..79da37d3ea 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -297,7 +297,6 @@ int map_ldt_shadow_page(unsigned int off)
#if defined(__x86_64__)
/* If in user mode, switch to kernel mode just to read LDT mapping. */
- extern void toggle_guest_mode(struct vcpu *);
int user_mode = !(v->arch.flags & TF_kernel_mode);
#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
#elif defined(__i386__)
@@ -841,10 +840,11 @@ static int alloc_l2_table(struct pfn_info *page, unsigned long type)
L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
pl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
l2e_from_pfn(pfn, __PAGE_HYPERVISOR);
- pl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
- l2e_from_page(
- virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt),
- __PAGE_HYPERVISOR);
+ for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+ pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+ l2e_from_page(
+ virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt) + i,
+ __PAGE_HYPERVISOR);
#endif
unmap_domain_page(pl2e);
@@ -1457,7 +1457,8 @@ int get_page_type(struct pfn_info *page, unsigned long type)
* was GDT/LDT) but those circumstances should be
* very rare.
*/
- cpumask_t mask = page_get_owner(page)->cpumask;
+ cpumask_t mask =
+ page_get_owner(page)->domain_dirty_cpumask;
tlbflush_filter(mask, page->tlbflush_timestamp);
if ( unlikely(!cpus_empty(mask)) )
@@ -1619,7 +1620,7 @@ static void process_deferred_ops(unsigned int cpu)
if ( shadow_mode_enabled(d) )
shadow_sync_all(d);
if ( deferred_ops & DOP_FLUSH_ALL_TLBS )
- flush_tlb_mask(d->cpumask);
+ flush_tlb_mask(d->domain_dirty_cpumask);
else
local_flush_tlb();
}
@@ -1691,7 +1692,7 @@ static inline cpumask_t vcpumask_to_pcpumask(
struct domain *d, unsigned long vmask)
{
unsigned int vcpu_id;
- cpumask_t pmask;
+ cpumask_t pmask = CPU_MASK_NONE;
struct vcpu *v;
while ( vmask != 0 )
@@ -1700,7 +1701,7 @@ static inline cpumask_t vcpumask_to_pcpumask(
vmask &= ~(1UL << vcpu_id);
if ( (vcpu_id < MAX_VIRT_CPUS) &&
((v = d->vcpu[vcpu_id]) != NULL) )
- cpu_set(v->processor, pmask);
+ cpus_or(pmask, pmask, v->vcpu_dirty_cpumask);
}
return pmask;
@@ -1869,7 +1870,6 @@ int do_mmuext_op(
break;
}
pmask = vcpumask_to_pcpumask(d, vmask);
- cpus_and(pmask, pmask, d->cpumask);
if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI )
flush_tlb_mask(pmask);
else
@@ -1878,11 +1878,11 @@ int do_mmuext_op(
}
case MMUEXT_TLB_FLUSH_ALL:
- flush_tlb_mask(d->cpumask);
+ flush_tlb_mask(d->domain_dirty_cpumask);
break;
case MMUEXT_INVLPG_ALL:
- flush_tlb_one_mask(d->cpumask, op.arg1.linear_addr);
+ flush_tlb_one_mask(d->domain_dirty_cpumask, op.arg1.linear_addr);
break;
case MMUEXT_FLUSH_CACHE:
@@ -2497,7 +2497,7 @@ int do_update_va_mapping(unsigned long va, u64 val64,
l1_pgentry_t val = l1e_from_intpte(val64);
struct vcpu *v = current;
struct domain *d = v->domain;
- unsigned int cpu = v->processor;
+ unsigned int cpu = smp_processor_id();
unsigned long vmask, bmap_ptr;
cpumask_t pmask;
int rc = 0;
@@ -2548,13 +2548,12 @@ int do_update_va_mapping(unsigned long va, u64 val64,
local_flush_tlb();
break;
case UVMF_ALL:
- flush_tlb_mask(d->cpumask);
+ flush_tlb_mask(d->domain_dirty_cpumask);
break;
default:
if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) )
rc = -EFAULT;
pmask = vcpumask_to_pcpumask(d, vmask);
- cpus_and(pmask, pmask, d->cpumask);
flush_tlb_mask(pmask);
break;
}
@@ -2569,13 +2568,12 @@ int do_update_va_mapping(unsigned long va, u64 val64,
local_flush_tlb_one(va);
break;
case UVMF_ALL:
- flush_tlb_one_mask(d->cpumask, va);
+ flush_tlb_one_mask(d->domain_dirty_cpumask, va);
break;
default:
if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) )
rc = -EFAULT;
pmask = vcpumask_to_pcpumask(d, vmask);
- cpus_and(pmask, pmask, d->cpumask);
flush_tlb_one_mask(pmask, va);
break;
}
@@ -2972,7 +2970,6 @@ void ptwr_flush(struct domain *d, const int which)
#ifdef CONFIG_X86_64
struct vcpu *v = current;
- extern void toggle_guest_mode(struct vcpu *);
int user_mode = !(v->arch.flags & TF_kernel_mode);
#endif
@@ -3002,7 +2999,7 @@ void ptwr_flush(struct domain *d, const int which)
BUG();
}
PTWR_PRINTK("[%c] disconnected_l1va at %p is %"PRIpte"\n",
- PTWR_PRINT_WHICH, ptep, pte.l1);
+ PTWR_PRINT_WHICH, ptep, l1e_get_intpte(pte));
l1e_remove_flags(pte, _PAGE_RW);
/* Write-protect the p.t. page in the guest page table. */
@@ -3018,20 +3015,33 @@ void ptwr_flush(struct domain *d, const int which)
/* Ensure that there are no stale writable mappings in any TLB. */
/* NB. INVLPG is a serialising instruction: flushes pending updates. */
- flush_tlb_one_mask(d->cpumask, l1va);
+ flush_tlb_one_mask(d->domain_dirty_cpumask, l1va);
PTWR_PRINTK("[%c] disconnected_l1va at %p now %"PRIpte"\n",
- PTWR_PRINT_WHICH, ptep, pte.l1);
+ PTWR_PRINT_WHICH, ptep, l1e_get_intpte(pte));
/*
* STEP 2. Validate any modified PTEs.
*/
- pl1e = d->arch.ptwr[which].pl1e;
- modified = revalidate_l1(d, pl1e, d->arch.ptwr[which].page);
- unmap_domain_page(pl1e);
- perfc_incr_histo(wpt_updates, modified, PT_UPDATES);
- ptwr_eip_stat_update(d->arch.ptwr[which].eip, d->domain_id, modified);
- d->arch.ptwr[which].prev_nr_updates = modified;
+ if ( likely(d == current->domain) )
+ {
+ pl1e = map_domain_page(l1e_get_pfn(pte));
+ modified = revalidate_l1(d, pl1e, d->arch.ptwr[which].page);
+ unmap_domain_page(pl1e);
+ perfc_incr_histo(wpt_updates, modified, PT_UPDATES);
+ ptwr_eip_stat_update(d->arch.ptwr[which].eip, d->domain_id, modified);
+ d->arch.ptwr[which].prev_nr_updates = modified;
+ }
+ else
+ {
+ /*
+ * Must make a temporary global mapping, since we are running in the
+ * wrong address space, so no access to our own mapcache.
+ */
+ pl1e = map_domain_page_global(l1e_get_pfn(pte));
+ modified = revalidate_l1(d, pl1e, d->arch.ptwr[which].page);
+ unmap_domain_page_global(pl1e);
+ }
/*
* STEP 3. Reattach the L1 p.t. page into the current address space.
@@ -3209,7 +3219,7 @@ int ptwr_do_page_fault(struct domain *d, unsigned long addr,
{
unsigned long pfn;
struct pfn_info *page;
- l1_pgentry_t pte;
+ l1_pgentry_t *pl1e, pte;
l2_pgentry_t *pl2e, l2e;
int which, flags;
unsigned long l2_idx;
@@ -3342,15 +3352,14 @@ int ptwr_do_page_fault(struct domain *d, unsigned long addr,
if ( which == PTWR_PT_ACTIVE )
{
l2e_remove_flags(*pl2e, _PAGE_PRESENT);
- flush_tlb_mask(d->cpumask);
+ flush_tlb_mask(d->domain_dirty_cpumask);
}
/* Temporarily map the L1 page, and make a copy of it. */
- d->arch.ptwr[which].pl1e = map_domain_page(pfn);
- memcpy(d->arch.ptwr[which].page,
- d->arch.ptwr[which].pl1e,
- L1_PAGETABLE_ENTRIES * sizeof(l1_pgentry_t));
-
+ pl1e = map_domain_page(pfn);
+ memcpy(d->arch.ptwr[which].page, pl1e, PAGE_SIZE);
+ unmap_domain_page(pl1e);
+
/* Finally, make the p.t. page writable by the guest OS. */
l1e_add_flags(pte, _PAGE_RW);
if ( unlikely(__put_user(pte.l1,
@@ -3359,7 +3368,6 @@ int ptwr_do_page_fault(struct domain *d, unsigned long addr,
MEM_LOG("ptwr: Could not update pte at %p", (unsigned long *)
&linear_pg_table[l1_linear_offset(addr)]);
/* Toss the writable pagetable state and crash. */
- unmap_domain_page(d->arch.ptwr[which].pl1e);
d->arch.ptwr[which].l1va = 0;
domain_crash(d);
return 0;
@@ -3369,7 +3377,7 @@ int ptwr_do_page_fault(struct domain *d, unsigned long addr,
emulate:
if ( x86_emulate_memop(guest_cpu_user_regs(), addr,
- &ptwr_mem_emulator, BITS_PER_LONG/8) )
+ &ptwr_mem_emulator, X86EMUL_MODE_HOST) )
return 0;
perfc_incrc(ptwr_emulations);
return EXCRET_fault_fixed;
diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c
index b63036ac54..96c55572cd 100644
--- a/xen/arch/x86/nmi.c
+++ b/xen/arch/x86/nmi.c
@@ -23,18 +23,20 @@
#include <xen/sched.h>
#include <xen/console.h>
#include <xen/smp.h>
+#include <xen/keyhandler.h>
#include <asm/current.h>
#include <asm/mc146818rtc.h>
#include <asm/msr.h>
#include <asm/mpspec.h>
#include <asm/debugger.h>
#include <asm/div64.h>
+#include <asm/apic.h>
unsigned int nmi_watchdog = NMI_NONE;
static unsigned int nmi_hz = HZ;
static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
static unsigned int nmi_p4_cccr_val;
-static struct ac_timer nmi_timer[NR_CPUS];
+static struct timer nmi_timer[NR_CPUS];
static unsigned int nmi_timer_ticks[NR_CPUS];
/*
@@ -132,7 +134,7 @@ static void nmi_timer_fn(void *unused)
{
int cpu = smp_processor_id();
nmi_timer_ticks[cpu]++;
- set_ac_timer(&nmi_timer[cpu], NOW() + MILLISECS(1000));
+ set_timer(&nmi_timer[cpu], NOW() + MILLISECS(1000));
}
static void disable_lapic_nmi_watchdog(void)
@@ -308,8 +310,6 @@ static int __pminit setup_p4_watchdog(void)
void __pminit setup_apic_nmi_watchdog(void)
{
- int cpu = smp_processor_id();
-
if (!nmi_watchdog)
return;
@@ -344,49 +344,37 @@ void __pminit setup_apic_nmi_watchdog(void)
lapic_nmi_owner = LAPIC_NMI_WATCHDOG;
nmi_active = 1;
-
- init_ac_timer(&nmi_timer[cpu], nmi_timer_fn, NULL, cpu);
}
static unsigned int
last_irq_sums [NR_CPUS],
alert_counter [NR_CPUS];
-static spinlock_t watchdog_lock = SPIN_LOCK_UNLOCKED;
-static unsigned int watchdog_disable_count = 1;
-static unsigned int watchdog_on;
+static atomic_t watchdog_disable_count = ATOMIC_INIT(1);
void watchdog_disable(void)
{
- unsigned long flags;
-
- spin_lock_irqsave(&watchdog_lock, flags);
-
- if ( watchdog_disable_count++ == 0 )
- watchdog_on = 0;
-
- spin_unlock_irqrestore(&watchdog_lock, flags);
+ atomic_inc(&watchdog_disable_count);
}
void watchdog_enable(void)
{
- unsigned int cpu;
- unsigned long flags;
+ static unsigned long heartbeat_initialised;
+ unsigned int cpu;
- spin_lock_irqsave(&watchdog_lock, flags);
+ if ( !atomic_dec_and_test(&watchdog_disable_count) ||
+ test_and_set_bit(0, &heartbeat_initialised) )
+ return;
- if ( --watchdog_disable_count == 0 )
+ /*
+ * Activate periodic heartbeats. We cannot do this earlier during
+ * setup because the timer infrastructure is not available.
+ */
+ for_each_online_cpu ( cpu )
{
- watchdog_on = 1;
- /*
- * Ensure periodic heartbeats are active. We cannot do this earlier
- * during setup because the timer infrastructure is not available.
- */
- for_each_online_cpu ( cpu )
- set_ac_timer(&nmi_timer[cpu], NOW());
+ init_timer(&nmi_timer[cpu], nmi_timer_fn, NULL, cpu);
+ set_timer(&nmi_timer[cpu], NOW());
}
-
- spin_unlock_irqrestore(&watchdog_lock, flags);
}
void nmi_watchdog_tick(struct cpu_user_regs * regs)
@@ -395,7 +383,7 @@ void nmi_watchdog_tick(struct cpu_user_regs * regs)
sum = nmi_timer_ticks[cpu];
- if ( (last_irq_sums[cpu] == sum) && watchdog_on )
+ if ( (last_irq_sums[cpu] == sum) && !atomic_read(&watchdog_disable_count) )
{
/*
* Ayiee, looks like this CPU is stuck ... wait a few IRQs (5 seconds)
@@ -440,3 +428,29 @@ void nmi_watchdog_tick(struct cpu_user_regs * regs)
write_watchdog_counter(NULL);
}
}
+
+/*
+ * For some reason the destination shorthand for self is not valid
+ * when used with the NMI delivery mode. This is documented in Tables
+ * 8-3 and 8-4 in IA32 Reference Manual Volume 3. We send the IPI to
+ * our own APIC ID explicitly which is valid.
+ */
+static void do_nmi_trigger(unsigned char key)
+{
+ u32 id = apic_read(APIC_ID);
+
+ printk("Triggering NMI on APIC ID %x\n", id);
+
+ local_irq_disable();
+ apic_wait_icr_idle();
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id));
+ apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_INT_ASSERT);
+ local_irq_enable();
+}
+
+static __init int register_nmi_trigger(void)
+{
+ register_keyhandler('n', do_nmi_trigger, "trigger an NMI");
+ return 0;
+}
+__initcall(register_nmi_trigger);
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index f27806f8f6..39bf4a523d 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -81,6 +81,10 @@ extern void early_time_init(void);
extern void initialize_keytable(void);
extern void early_cpu_init(void);
+struct tss_struct init_tss[NR_CPUS];
+
+struct vcpu *idle_vcpu[NR_CPUS];
+
extern unsigned long cpu0_stack[];
struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
@@ -92,8 +96,6 @@ unsigned long mmu_cr4_features = X86_CR4_PSE;
#endif
EXPORT_SYMBOL(mmu_cr4_features);
-struct vcpu *idle_task[NR_CPUS] = { &idle0_vcpu };
-
int acpi_disabled;
int acpi_force;
@@ -144,8 +146,8 @@ static struct e820entry e820_raw[E820MAX];
void __init __start_xen(multiboot_info_t *mbi)
{
- unsigned long vgdt, gdt_pfn;
char *cmdline;
+ struct domain *idle_domain;
unsigned long _initrd_start = 0, _initrd_len = 0;
unsigned int initrdidx = 1;
module_t *mod = (module_t *)__va(mbi->mods_addr);
@@ -163,9 +165,8 @@ void __init __start_xen(multiboot_info_t *mbi)
if ( (mbi->flags & MBI_CMDLINE) && (mbi->cmdline != 0) )
cmdline_parse(__va(mbi->cmdline));
- /* Must do this early -- e.g., spinlocks rely on get_current(). */
- set_current(&idle0_vcpu);
- set_processor_id(0);
+ set_current((struct vcpu *)0xfffff000); /* debug sanity */
+ set_processor_id(0); /* needed early, for smp_processor_id() */
smp_prepare_boot_cpu();
@@ -343,6 +344,12 @@ void __init __start_xen(multiboot_info_t *mbi)
BUG_ON(sizeof(shared_info_t) > PAGE_SIZE);
BUG_ON(sizeof(vcpu_info_t) != 64);
+ /* __foo are defined in public headers. Check they match internal defs. */
+ BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START);
+#ifdef HYPERVISOR_VIRT_END
+ BUG_ON(__HYPERVISOR_VIRT_END != HYPERVISOR_VIRT_END);
+#endif
+
init_frametable();
end_boot_allocator();
@@ -376,6 +383,14 @@ void __init __start_xen(multiboot_info_t *mbi)
early_cpu_init();
+ scheduler_init();
+
+ idle_domain = do_createdomain(IDLE_DOMAIN_ID, 0);
+ BUG_ON(idle_domain == NULL);
+
+ set_current(idle_domain->vcpu[0]);
+ idle_vcpu[0] = current;
+
paging_init();
/* Unmap the first page of CPU0's stack. */
@@ -388,21 +403,6 @@ void __init __start_xen(multiboot_info_t *mbi)
sort_exception_tables();
- if ( arch_do_createdomain(current) != 0 )
- BUG();
-
- /*
- * Map default GDT into its final positions in the idle page table. As
- * noted in arch_do_createdomain(), we must map for every possible VCPU#.
- */
- vgdt = GDT_VIRT_START(current) + FIRST_RESERVED_GDT_BYTE;
- gdt_pfn = virt_to_phys(gdt_table) >> PAGE_SHIFT;
- for ( i = 0; i < MAX_VIRT_CPUS; i++ )
- {
- map_pages_to_xen(vgdt, gdt_pfn, 1, PAGE_HYPERVISOR);
- vgdt += 1 << PDPT_VCPU_VA_SHIFT;
- }
-
find_smp_config();
smp_alloc_memory();
@@ -423,14 +423,12 @@ void __init __start_xen(multiboot_info_t *mbi)
trap_init();
- ac_timer_init();
+ timer_init();
early_time_init();
arch_init_memory();
- scheduler_init();
-
identify_cpu(&boot_cpu_data);
if ( cpu_has_fxsr )
set_in_cr4(X86_CR4_OSFXSR);
@@ -480,7 +478,8 @@ void __init __start_xen(multiboot_info_t *mbi)
schedulers_start();
- watchdog_enable();
+ if ( opt_watchdog )
+ watchdog_enable();
shadow_mode_init();
diff --git a/xen/arch/x86/shadow.c b/xen/arch/x86/shadow.c
index 41b76842fd..b2fd143452 100644
--- a/xen/arch/x86/shadow.c
+++ b/xen/arch/x86/shadow.c
@@ -469,6 +469,7 @@ static unsigned long shadow_l2_table(
{
unsigned long smfn;
l2_pgentry_t *spl2e;
+ int i;
SH_VVLOG("shadow_l2_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn);
@@ -503,9 +504,11 @@ static unsigned long shadow_l2_table(
spl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
- spl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
- l2e_from_paddr(__pa(page_get_owner(pfn_to_page(gmfn))->arch.mm_perdomain_pt),
- __PAGE_HYPERVISOR);
+ for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+ spl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+ l2e_from_page(virt_to_page(page_get_owner(pfn_to_page(gmfn))->
+ arch.mm_perdomain_pt) + i,
+ __PAGE_HYPERVISOR);
if ( shadow_mode_translate(d) ) // NB: not external
{
@@ -1800,7 +1803,7 @@ static void sync_all(struct domain *d)
}
/* Other VCPUs mustn't use the revoked writable mappings. */
- other_vcpus_mask = d->cpumask;
+ other_vcpus_mask = d->domain_dirty_cpumask;
cpu_clear(smp_processor_id(), other_vcpus_mask);
flush_tlb_mask(other_vcpus_mask);
@@ -2150,8 +2153,8 @@ static void shadow_update_pagetables(struct vcpu *v)
if ( max_mode & (SHM_enable | SHM_external) )
{
if ( likely(v->arch.guest_vtable != NULL) )
- unmap_domain_page(v->arch.guest_vtable);
- v->arch.guest_vtable = map_domain_page(gmfn);
+ unmap_domain_page_global(v->arch.guest_vtable);
+ v->arch.guest_vtable = map_domain_page_global(gmfn);
}
/*
@@ -2187,8 +2190,8 @@ static void shadow_update_pagetables(struct vcpu *v)
)
{
if ( v->arch.shadow_vtable )
- unmap_domain_page(v->arch.shadow_vtable);
- v->arch.shadow_vtable = map_domain_page(smfn);
+ unmap_domain_page_global(v->arch.shadow_vtable);
+ v->arch.shadow_vtable = map_domain_page_global(smfn);
}
#if CONFIG_PAGING_LEVELS == 2
@@ -2204,8 +2207,8 @@ static void shadow_update_pagetables(struct vcpu *v)
if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
if ( v->arch.hl2_vtable )
- unmap_domain_page(v->arch.hl2_vtable);
- v->arch.hl2_vtable = map_domain_page(hl2mfn);
+ unmap_domain_page_global(v->arch.hl2_vtable);
+ v->arch.hl2_vtable = map_domain_page_global(hl2mfn);
}
/*
diff --git a/xen/arch/x86/shadow32.c b/xen/arch/x86/shadow32.c
index 872c73f545..eb09ea92c5 100644
--- a/xen/arch/x86/shadow32.c
+++ b/xen/arch/x86/shadow32.c
@@ -726,6 +726,7 @@ static void alloc_monitor_pagetable(struct vcpu *v)
l2_pgentry_t *mpl2e;
struct pfn_info *mmfn_info;
struct domain *d = v->domain;
+ int i;
ASSERT(pagetable_get_paddr(v->arch.monitor_table) == 0);
@@ -733,16 +734,17 @@ static void alloc_monitor_pagetable(struct vcpu *v)
ASSERT(mmfn_info != NULL);
mmfn = page_to_pfn(mmfn_info);
- mpl2e = (l2_pgentry_t *)map_domain_page(mmfn);
+ mpl2e = (l2_pgentry_t *)map_domain_page_global(mmfn);
memset(mpl2e, 0, PAGE_SIZE);
memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
&idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
- mpl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
- l2e_from_paddr(__pa(d->arch.mm_perdomain_pt),
- __PAGE_HYPERVISOR);
+ for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+ mpl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+ l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt) + i,
+ __PAGE_HYPERVISOR);
// map the phys_to_machine map into the Read-Only MPT space for this domain
mpl2e[l2_table_offset(RO_MPT_VIRT_START)] =
@@ -794,7 +796,7 @@ void free_monitor_pagetable(struct vcpu *v)
* Then free monitor_table.
*/
mfn = pagetable_get_pfn(v->arch.monitor_table);
- unmap_domain_page(v->arch.monitor_vtable);
+ unmap_domain_page_global(v->arch.monitor_vtable);
free_domheap_page(pfn_to_page(mfn));
v->arch.monitor_table = mk_pagetable(0);
@@ -929,7 +931,7 @@ int __shadow_mode_enable(struct domain *d, unsigned int mode)
if ( v->arch.guest_vtable &&
(v->arch.guest_vtable != __linear_l2_table) )
{
- unmap_domain_page(v->arch.guest_vtable);
+ unmap_domain_page_global(v->arch.guest_vtable);
}
if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
v->arch.guest_vtable = __linear_l2_table;
@@ -942,7 +944,7 @@ int __shadow_mode_enable(struct domain *d, unsigned int mode)
if ( v->arch.shadow_vtable &&
(v->arch.shadow_vtable != __shadow_linear_l2_table) )
{
- unmap_domain_page(v->arch.shadow_vtable);
+ unmap_domain_page_global(v->arch.shadow_vtable);
}
if ( !(mode & SHM_external) )
v->arch.shadow_vtable = __shadow_linear_l2_table;
@@ -955,7 +957,7 @@ int __shadow_mode_enable(struct domain *d, unsigned int mode)
if ( v->arch.hl2_vtable &&
(v->arch.hl2_vtable != __linear_hl2_table) )
{
- unmap_domain_page(v->arch.hl2_vtable);
+ unmap_domain_page_global(v->arch.hl2_vtable);
}
if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
v->arch.hl2_vtable = __linear_hl2_table;
@@ -1508,6 +1510,7 @@ static unsigned long shadow_l2_table(
{
unsigned long smfn;
l2_pgentry_t *spl2e;
+ int i;
SH_VVLOG("shadow_l2_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn);
@@ -1542,9 +1545,11 @@ static unsigned long shadow_l2_table(
spl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
- spl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
- l2e_from_paddr(__pa(page_get_owner(pfn_to_page(gmfn))->arch.mm_perdomain_pt),
- __PAGE_HYPERVISOR);
+ for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+ spl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+ l2e_from_page(virt_to_page(page_get_owner(pfn_to_page(gmfn))->
+ arch.mm_perdomain_pt) + i,
+ __PAGE_HYPERVISOR);
if ( shadow_mode_translate(d) ) // NB: not external
{
@@ -2586,7 +2591,7 @@ void __shadow_sync_all(struct domain *d)
}
/* Other VCPUs mustn't use the revoked writable mappings. */
- other_vcpus_mask = d->cpumask;
+ other_vcpus_mask = d->domain_dirty_cpumask;
cpu_clear(smp_processor_id(), other_vcpus_mask);
flush_tlb_mask(other_vcpus_mask);
@@ -2906,8 +2911,8 @@ void __update_pagetables(struct vcpu *v)
if ( max_mode & (SHM_enable | SHM_external) )
{
if ( likely(v->arch.guest_vtable != NULL) )
- unmap_domain_page(v->arch.guest_vtable);
- v->arch.guest_vtable = map_domain_page(gmfn);
+ unmap_domain_page_global(v->arch.guest_vtable);
+ v->arch.guest_vtable = map_domain_page_global(gmfn);
}
/*
@@ -2932,8 +2937,8 @@ void __update_pagetables(struct vcpu *v)
if ( max_mode == SHM_external )
{
if ( v->arch.shadow_vtable )
- unmap_domain_page(v->arch.shadow_vtable);
- v->arch.shadow_vtable = map_domain_page(smfn);
+ unmap_domain_page_global(v->arch.shadow_vtable);
+ v->arch.shadow_vtable = map_domain_page_global(smfn);
}
/*
@@ -2948,8 +2953,8 @@ void __update_pagetables(struct vcpu *v)
if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
if ( v->arch.hl2_vtable )
- unmap_domain_page(v->arch.hl2_vtable);
- v->arch.hl2_vtable = map_domain_page(hl2mfn);
+ unmap_domain_page_global(v->arch.hl2_vtable);
+ v->arch.hl2_vtable = map_domain_page_global(hl2mfn);
}
/*
diff --git a/xen/arch/x86/shadow_public.c b/xen/arch/x86/shadow_public.c
index 931a31f83f..bb376bb737 100644
--- a/xen/arch/x86/shadow_public.c
+++ b/xen/arch/x86/shadow_public.c
@@ -151,6 +151,8 @@ free_shadow_fl1_table(struct domain *d, unsigned long smfn)
for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
put_page_from_l1e(pl1e[i], d);
+
+ unmap_domain_page(pl1e);
}
/*
@@ -254,6 +256,7 @@ static pagetable_t page_table_convert(struct domain *d)
pae_l3 = map_domain_page(pagetable_get_pfn(d->arch.phys_table));
for (i = 0; i < PDP_ENTRIES; i++)
l3[i] = l3e_from_pfn(l3e_get_pfn(pae_l3[i]), __PAGE_HYPERVISOR);
+ unmap_domain_page(pae_l3);
unmap_domain_page(l4);
unmap_domain_page(l3);
@@ -275,7 +278,7 @@ static void alloc_monitor_pagetable(struct vcpu *v)
ASSERT( mmfn_info );
mmfn = page_to_pfn(mmfn_info);
- mpl4e = (l4_pgentry_t *) map_domain_page(mmfn);
+ mpl4e = (l4_pgentry_t *) map_domain_page_global(mmfn);
memcpy(mpl4e, &idle_pg_table[0], PAGE_SIZE);
mpl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR);
@@ -298,7 +301,7 @@ void free_monitor_pagetable(struct vcpu *v)
* free monitor_table.
*/
mfn = pagetable_get_pfn(v->arch.monitor_table);
- unmap_domain_page(v->arch.monitor_vtable);
+ unmap_domain_page_global(v->arch.monitor_vtable);
free_domheap_page(pfn_to_page(mfn));
v->arch.monitor_table = mk_pagetable(0);
@@ -325,6 +328,7 @@ static void alloc_monitor_pagetable(struct vcpu *v)
l2_pgentry_t *mpl2e;
struct pfn_info *mmfn_info;
struct domain *d = v->domain;
+ int i;
ASSERT(pagetable_get_paddr(v->arch.monitor_table) == 0);
@@ -332,16 +336,17 @@ static void alloc_monitor_pagetable(struct vcpu *v)
ASSERT(mmfn_info != NULL);
mmfn = page_to_pfn(mmfn_info);
- mpl2e = (l2_pgentry_t *)map_domain_page(mmfn);
+ mpl2e = (l2_pgentry_t *)map_domain_page_global(mmfn);
memset(mpl2e, 0, PAGE_SIZE);
memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
&idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
- mpl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
- l2e_from_paddr(__pa(d->arch.mm_perdomain_pt),
- __PAGE_HYPERVISOR);
+ for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+ mpl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+ l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt) + i,
+ __PAGE_HYPERVISOR);
// map the phys_to_machine map into the Read-Only MPT space for this domain
mpl2e[l2_table_offset(RO_MPT_VIRT_START)] =
@@ -393,7 +398,7 @@ void free_monitor_pagetable(struct vcpu *v)
* Then free monitor_table.
*/
mfn = pagetable_get_pfn(v->arch.monitor_table);
- unmap_domain_page(v->arch.monitor_vtable);
+ unmap_domain_page_global(v->arch.monitor_vtable);
free_domheap_page(pfn_to_page(mfn));
v->arch.monitor_table = mk_pagetable(0);
@@ -977,7 +982,7 @@ int __shadow_mode_enable(struct domain *d, unsigned int mode)
if ( v->arch.guest_vtable &&
(v->arch.guest_vtable != __linear_l2_table) )
{
- unmap_domain_page(v->arch.guest_vtable);
+ unmap_domain_page_global(v->arch.guest_vtable);
}
if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
v->arch.guest_vtable = __linear_l2_table;
@@ -990,7 +995,7 @@ int __shadow_mode_enable(struct domain *d, unsigned int mode)
if ( v->arch.shadow_vtable &&
(v->arch.shadow_vtable != __shadow_linear_l2_table) )
{
- unmap_domain_page(v->arch.shadow_vtable);
+ unmap_domain_page_global(v->arch.shadow_vtable);
}
if ( !(mode & SHM_external) && d->arch.ops->guest_paging_levels == 2)
v->arch.shadow_vtable = __shadow_linear_l2_table;
@@ -1004,7 +1009,7 @@ int __shadow_mode_enable(struct domain *d, unsigned int mode)
if ( v->arch.hl2_vtable &&
(v->arch.hl2_vtable != __linear_hl2_table) )
{
- unmap_domain_page(v->arch.hl2_vtable);
+ unmap_domain_page_global(v->arch.hl2_vtable);
}
if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
v->arch.hl2_vtable = __linear_hl2_table;
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 30ca4864b2..b3cc714bcd 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -435,7 +435,7 @@ void __init start_secondary(void *unused)
extern void percpu_traps_init(void);
- set_current(idle_task[cpu]);
+ set_current(idle_vcpu[cpu]);
set_processor_id(cpu);
percpu_traps_init();
@@ -761,7 +761,6 @@ static int __init do_boot_cpu(int apicid)
* Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
*/
{
- struct domain *idle;
struct vcpu *v;
unsigned long boot_error;
int timeout, cpu;
@@ -770,14 +769,10 @@ static int __init do_boot_cpu(int apicid)
cpu = ++cpucount;
- if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
- panic("failed 'createdomain' for CPU %d", cpu);
+ v = idle_vcpu[cpu] = alloc_vcpu(idle_vcpu[0]->domain, cpu, cpu);
+ BUG_ON(v == NULL);
- v = idle_task[cpu] = idle->vcpu[0];
-
- set_bit(_DOMF_idle_domain, &idle->domain_flags);
-
- v->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
+ v->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
/* start_eip had better be page-aligned! */
start_eip = setup_trampoline();
diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
index 7e7c40fca1..1bd15c6702 100644
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -17,7 +17,7 @@
#include <xen/config.h>
#include <xen/init.h>
#include <xen/time.h>
-#include <xen/ac_timer.h>
+#include <xen/timer.h>
#include <xen/smp.h>
#include <xen/irq.h>
#include <xen/softirq.h>
@@ -56,7 +56,7 @@ struct cpu_time {
s_time_t stime_local_stamp;
s_time_t stime_master_stamp;
struct time_scale tsc_scale;
- struct ac_timer calibration_timer;
+ struct timer calibration_timer;
} __cacheline_aligned;
static struct cpu_time cpu_time[NR_CPUS];
@@ -163,7 +163,7 @@ void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
/* Rough hack to allow accurate timers to sort-of-work with no APIC. */
if ( !cpu_has_apic )
- raise_softirq(AC_TIMER_SOFTIRQ);
+ raise_softirq(TIMER_SOFTIRQ);
if ( using_pit )
pit_overflow();
@@ -342,7 +342,7 @@ static void init_pit(void)
/* Protected by platform_timer_lock. */
static u64 hpet_counter64, hpet_overflow_period;
static u32 hpet_stamp;
-static struct ac_timer hpet_overflow_timer;
+static struct timer hpet_overflow_timer;
static void hpet_overflow(void *unused)
{
@@ -354,7 +354,7 @@ static void hpet_overflow(void *unused)
hpet_stamp = counter;
spin_unlock_irq(&platform_timer_lock);
- set_ac_timer(&hpet_overflow_timer, NOW() + hpet_overflow_period);
+ set_timer(&hpet_overflow_timer, NOW() + hpet_overflow_period);
}
static u64 read_hpet_count(void)
@@ -430,7 +430,7 @@ static int init_hpet(void)
(void)do_div(hpet_overflow_period, (u32)hpet_rate);
}
- init_ac_timer(&hpet_overflow_timer, hpet_overflow, NULL, 0);
+ init_timer(&hpet_overflow_timer, hpet_overflow, NULL, 0);
hpet_overflow(NULL);
platform_timer_stamp = hpet_counter64;
@@ -459,7 +459,7 @@ int use_cyclone;
/* Protected by platform_timer_lock. */
static u64 cyclone_counter64;
static u32 cyclone_stamp;
-static struct ac_timer cyclone_overflow_timer;
+static struct timer cyclone_overflow_timer;
static volatile u32 *cyclone_timer; /* Cyclone MPMC0 register */
static void cyclone_overflow(void *unused)
@@ -472,7 +472,7 @@ static void cyclone_overflow(void *unused)
cyclone_stamp = counter;
spin_unlock_irq(&platform_timer_lock);
- set_ac_timer(&cyclone_overflow_timer, NOW() + MILLISECS(20000));
+ set_timer(&cyclone_overflow_timer, NOW() + MILLISECS(20000));
}
static u64 read_cyclone_count(void)
@@ -510,7 +510,7 @@ static int init_cyclone(void)
read_platform_count = read_cyclone_count;
- init_ac_timer(&cyclone_overflow_timer, cyclone_overflow, NULL, 0);
+ init_timer(&cyclone_overflow_timer, cyclone_overflow, NULL, 0);
cyclone_overflow(NULL);
platform_timer_stamp = cyclone_counter64;
set_time_scale(&platform_timer_scale, CYCLONE_TIMER_FREQ);
@@ -876,7 +876,7 @@ static void local_time_calibration(void *unused)
cpu_time[cpu].stime_master_stamp = curr_master_stime;
out:
- set_ac_timer(&cpu_time[cpu].calibration_timer, NOW() + EPOCH);
+ set_timer(&cpu_time[cpu].calibration_timer, NOW() + EPOCH);
if ( cpu == 0 )
platform_time_calibration();
@@ -896,9 +896,9 @@ void init_percpu_time(void)
cpu_time[cpu].stime_master_stamp = now;
cpu_time[cpu].stime_local_stamp = now;
- init_ac_timer(&cpu_time[cpu].calibration_timer,
+ init_timer(&cpu_time[cpu].calibration_timer,
local_time_calibration, NULL, cpu);
- set_ac_timer(&cpu_time[cpu].calibration_timer, NOW() + EPOCH);
+ set_timer(&cpu_time[cpu].calibration_timer, NOW() + EPOCH);
}
/* Late init function (after all CPUs are booted). */
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index a4be3db3b3..0a7280fb70 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -130,9 +130,19 @@ unsigned long kernel_text_end(void)
static void show_guest_stack(struct cpu_user_regs *regs)
{
int i;
- unsigned long *stack = (unsigned long *)regs->esp, addr;
+ unsigned long *stack, addr;
- printk("Guest stack trace from "__OP"sp=%p:\n ", stack);
+ if ( VM86_MODE(regs) )
+ {
+ stack = (unsigned long *)((regs->ss << 4) + (regs->esp & 0xffff));
+ printk("Guest stack trace from ss:sp = %04x:%04x (VM86)\n ",
+ regs->ss, (uint16_t)(regs->esp & 0xffff));
+ }
+ else
+ {
+ stack = (unsigned long *)regs->esp;
+ printk("Guest stack trace from "__OP"sp=%p:\n ", stack);
+ }
for ( i = 0; i < (debug_stack_lines*stack_words_per_line); i++ )
{
@@ -427,7 +437,7 @@ void propagate_page_fault(unsigned long addr, u16 error_code)
tb->flags |= TBF_INTERRUPT;
}
-static int handle_perdomain_mapping_fault(
+static int handle_gdt_ldt_mapping_fault(
unsigned long offset, struct cpu_user_regs *regs)
{
extern int map_ldt_shadow_page(unsigned int);
@@ -437,14 +447,14 @@ static int handle_perdomain_mapping_fault(
int ret;
/* Which vcpu's area did we fault in, and is it in the ldt sub-area? */
- unsigned int is_ldt_area = (offset >> (PDPT_VCPU_VA_SHIFT-1)) & 1;
- unsigned int vcpu_area = (offset >> PDPT_VCPU_VA_SHIFT);
+ unsigned int is_ldt_area = (offset >> (GDT_LDT_VCPU_VA_SHIFT-1)) & 1;
+ unsigned int vcpu_area = (offset >> GDT_LDT_VCPU_VA_SHIFT);
/* Should never fault in another vcpu's area. */
BUG_ON(vcpu_area != current->vcpu_id);
/* Byte offset within the gdt/ldt sub-area. */
- offset &= (1UL << (PDPT_VCPU_VA_SHIFT-1)) - 1UL;
+ offset &= (1UL << (GDT_LDT_VCPU_VA_SHIFT-1)) - 1UL;
if ( likely(is_ldt_area) )
{
@@ -490,9 +500,9 @@ static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
{
if ( shadow_mode_external(d) && GUEST_CONTEXT(v, regs) )
return shadow_fault(addr, regs);
- if ( (addr >= PERDOMAIN_VIRT_START) && (addr < PERDOMAIN_VIRT_END) )
- return handle_perdomain_mapping_fault(
- addr - PERDOMAIN_VIRT_START, regs);
+ if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) )
+ return handle_gdt_ldt_mapping_fault(
+ addr - GDT_LDT_VIRT_START, regs);
}
else if ( unlikely(shadow_mode_enabled(d)) )
{
@@ -596,7 +606,6 @@ static inline int guest_io_okay(
u16 x;
#if defined(__x86_64__)
/* If in user mode, switch to kernel mode just to read I/O bitmap. */
- extern void toggle_guest_mode(struct vcpu *);
int user_mode = !(v->arch.flags & TF_kernel_mode);
#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
#elif defined(__i386__)
@@ -964,16 +973,26 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
case 0x30: /* WRMSR */
/* Ignore the instruction if unprivileged. */
if ( !IS_PRIV(v->domain) )
- DPRINTK("Non-priv domain attempted WRMSR(%p,%08lx,%08lx).\n",
- _p(regs->ecx), (long)regs->eax, (long)regs->edx);
+ {
+ u32 l, h;
+ if ( (rdmsr_user(regs->ecx, l, h) != 0) ||
+ (regs->ecx != MSR_EFER) ||
+ (regs->eax != l) || (regs->edx != h) )
+ DPRINTK("Non-priv domain attempted WRMSR %p from "
+ "%08x:%08x to %08lx:%08lx.\n",
+ _p(regs->ecx), h, l, (long)regs->edx, (long)regs->eax);
+ }
else if ( wrmsr_user(regs->ecx, regs->eax, regs->edx) )
goto fail;
break;
case 0x32: /* RDMSR */
if ( !IS_PRIV(v->domain) )
- DPRINTK("Non-priv domain attempted RDMSR(%p,%08lx,%08lx).\n",
- _p(regs->ecx), (long)regs->eax, (long)regs->edx);
+ {
+ if ( regs->ecx != MSR_EFER )
+ DPRINTK("Non-priv domain attempted RDMSR %p.\n",
+ _p(regs->ecx));
+ }
/* Everyone can read the MSR space. */
if ( rdmsr_user(regs->ecx, regs->eax, regs->edx) )
goto fail;
@@ -1080,26 +1099,23 @@ asmlinkage int do_general_protection(struct cpu_user_regs *regs)
return 0;
}
+static void nmi_softirq(void)
+{
+ /* Only used to defer wakeup of dom0,vcpu0 to a safe (non-NMI) context. */
+ evtchn_notify(dom0->vcpu[0]);
+}
-/* Defer dom0 notification to softirq context (unsafe in NMI context). */
-static unsigned long nmi_dom0_softirq_reason;
-#define NMI_DOM0_PARITY_ERR 0
-#define NMI_DOM0_IO_ERR 1
-#define NMI_DOM0_UNKNOWN 2
-
-static void nmi_dom0_softirq(void)
+static void nmi_dom0_report(unsigned int reason_idx)
{
- if ( dom0 == NULL )
- return;
+ struct domain *d;
- if ( test_and_clear_bit(NMI_DOM0_PARITY_ERR, &nmi_dom0_softirq_reason) )
- send_guest_virq(dom0->vcpu[0], VIRQ_PARITY_ERR);
+ if ( (d = dom0) == NULL )
+ return;
- if ( test_and_clear_bit(NMI_DOM0_IO_ERR, &nmi_dom0_softirq_reason) )
- send_guest_virq(dom0->vcpu[0], VIRQ_IO_ERR);
+ set_bit(reason_idx, &d->shared_info->arch.nmi_reason);
- if ( test_and_clear_bit(NMI_DOM0_UNKNOWN, &nmi_dom0_softirq_reason) )
- send_guest_virq(dom0->vcpu[0], VIRQ_NMI);
+ if ( test_and_set_bit(_VCPUF_nmi_pending, &d->vcpu[0]->vcpu_flags) )
+ raise_softirq(NMI_SOFTIRQ); /* not safe to wake up a vcpu here */
}
asmlinkage void mem_parity_error(struct cpu_user_regs *regs)
@@ -1107,8 +1123,7 @@ asmlinkage void mem_parity_error(struct cpu_user_regs *regs)
switch ( opt_nmi[0] )
{
case 'd': /* 'dom0' */
- set_bit(NMI_DOM0_PARITY_ERR, &nmi_dom0_softirq_reason);
- raise_softirq(NMI_DOM0_SOFTIRQ);
+ nmi_dom0_report(_XEN_NMIREASON_parity_error);
case 'i': /* 'ignore' */
break;
default: /* 'fatal' */
@@ -1127,8 +1142,7 @@ asmlinkage void io_check_error(struct cpu_user_regs *regs)
switch ( opt_nmi[0] )
{
case 'd': /* 'dom0' */
- set_bit(NMI_DOM0_IO_ERR, &nmi_dom0_softirq_reason);
- raise_softirq(NMI_DOM0_SOFTIRQ);
+ nmi_dom0_report(_XEN_NMIREASON_io_error);
case 'i': /* 'ignore' */
break;
default: /* 'fatal' */
@@ -1147,8 +1161,7 @@ static void unknown_nmi_error(unsigned char reason)
switch ( opt_nmi[0] )
{
case 'd': /* 'dom0' */
- set_bit(NMI_DOM0_UNKNOWN, &nmi_dom0_softirq_reason);
- raise_softirq(NMI_DOM0_SOFTIRQ);
+ nmi_dom0_report(_XEN_NMIREASON_unknown);
case 'i': /* 'ignore' */
break;
default: /* 'fatal' */
@@ -1347,7 +1360,7 @@ void __init trap_init(void)
cpu_init();
- open_softirq(NMI_DOM0_SOFTIRQ, nmi_dom0_softirq);
+ open_softirq(NMI_SOFTIRQ, nmi_softirq);
}
diff --git a/xen/arch/x86/vmx.c b/xen/arch/x86/vmx.c
index 3cb18be4c2..6d6fa51764 100644
--- a/xen/arch/x86/vmx.c
+++ b/xen/arch/x86/vmx.c
@@ -42,7 +42,7 @@
#include <asm/shadow_64.h>
#endif
#include <public/sched.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#include <asm/vmx_vpic.h>
#include <asm/vmx_vlapic.h>
@@ -53,7 +53,7 @@ unsigned int opt_vmx_debug_level = 0;
integer_param("vmx_debug", opt_vmx_debug_level);
static unsigned long trace_values[NR_CPUS][4];
-#define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value
+#define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value
static int vmx_switch_on;
@@ -66,11 +66,6 @@ void vmx_final_setup_guest(struct vcpu *v)
struct domain *d = v->domain;
struct vcpu *vc;
- d->arch.vmx_platform.lapic_enable = v->arch.guest_context.user_regs.ecx;
- v->arch.guest_context.user_regs.ecx = 0;
- VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "lapic enable is %d.\n",
- d->arch.vmx_platform.lapic_enable);
-
/* Initialize monitor page table */
for_each_vcpu(d, vc)
vc->arch.monitor_table = mk_pagetable(0);
@@ -95,7 +90,7 @@ void vmx_final_setup_guest(struct vcpu *v)
void vmx_relinquish_resources(struct vcpu *v)
{
struct vmx_virpit *vpit;
-
+
if ( !VMX_DOMAIN(v) )
return;
@@ -103,19 +98,18 @@ void vmx_relinquish_resources(struct vcpu *v)
/* unmap IO shared page */
struct domain *d = v->domain;
if ( d->arch.vmx_platform.shared_page_va )
- unmap_domain_page((void *)d->arch.vmx_platform.shared_page_va);
+ unmap_domain_page_global(
+ (void *)d->arch.vmx_platform.shared_page_va);
}
destroy_vmcs(&v->arch.arch_vmx);
free_monitor_pagetable(v);
vpit = &v->domain->arch.vmx_platform.vmx_pit;
- if ( active_ac_timer(&(vpit->pit_timer)) )
- rem_ac_timer(&vpit->pit_timer);
- if ( active_ac_timer(&v->arch.arch_vmx.hlt_timer) )
- rem_ac_timer(&v->arch.arch_vmx.hlt_timer);
+ kill_timer(&vpit->pit_timer);
+ kill_timer(&v->arch.arch_vmx.hlt_timer);
if ( vmx_apic_support(v->domain) && (VLAPIC(v) != NULL) )
{
- rem_ac_timer(&VLAPIC(v)->vlapic_timer);
+ kill_timer(&VLAPIC(v)->vlapic_timer);
xfree(VLAPIC(v));
}
}
@@ -1604,7 +1598,7 @@ void vmx_vmexit_do_hlt(void)
next_wakeup = next_pit;
}
if ( next_wakeup != - 1 )
- set_ac_timer(&current->arch.arch_vmx.hlt_timer, next_wakeup);
+ set_timer(&current->arch.arch_vmx.hlt_timer, next_wakeup);
do_block();
}
@@ -1955,9 +1949,12 @@ asmlinkage void load_cr2(void)
asmlinkage void trace_vmentry (void)
{
- TRACE_5D(TRC_VMENTRY,trace_values[current->processor][0],
- trace_values[current->processor][1],trace_values[current->processor][2],
- trace_values[current->processor][3],trace_values[current->processor][4]);
+ TRACE_5D(TRC_VMENTRY,
+ trace_values[smp_processor_id()][0],
+ trace_values[smp_processor_id()][1],
+ trace_values[smp_processor_id()][2],
+ trace_values[smp_processor_id()][3],
+ trace_values[smp_processor_id()][4]);
TRACE_VMEXIT(0,9);
TRACE_VMEXIT(1,9);
TRACE_VMEXIT(2,9);
diff --git a/xen/arch/x86/vmx_intercept.c b/xen/arch/x86/vmx_intercept.c
index 8bac8a8e5c..419960842c 100644
--- a/xen/arch/x86/vmx_intercept.c
+++ b/xen/arch/x86/vmx_intercept.c
@@ -24,7 +24,7 @@
#include <asm/vmx_vpit.h>
#include <asm/vmx_intercept.h>
#include <asm/vmx_vlapic.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#include <xen/lib.h>
#include <xen/sched.h>
#include <asm/current.h>
@@ -356,19 +356,19 @@ static void pit_timer_fn(void *data)
vpit->pending_intr_nr++;
if ( test_bit(_VCPUF_running, &v->vcpu_flags) ) {
vpit->scheduled += vpit->period;
- set_ac_timer(&vpit->pit_timer, vpit->scheduled);
+ set_timer(&vpit->pit_timer, vpit->scheduled);
}
}
void pickup_deactive_ticks(struct vmx_virpit *vpit)
{
- if ( !active_ac_timer(&(vpit->pit_timer)) ) {
+ if ( !active_timer(&(vpit->pit_timer)) ) {
/* pick up missed timer tick */
missed_ticks(vpit);
vpit->scheduled += vpit->period;
- set_ac_timer(&vpit->pit_timer, vpit->scheduled);
+ set_timer(&vpit->pit_timer, vpit->scheduled);
}
}
@@ -385,14 +385,14 @@ void vmx_hooks_assist(struct vcpu *v)
/* load init count*/
if (p->state == STATE_IORESP_HOOK) {
/* set up actimer, handle re-init */
- if ( active_ac_timer(&(vpit->pit_timer)) ) {
+ if ( active_timer(&(vpit->pit_timer)) ) {
VMX_DBG_LOG(DBG_LEVEL_1, "VMX_PIT: guest reset PIT with channel %lx!\n", (unsigned long) ((p->u.data >> 24) & 0x3) );
- rem_ac_timer(&(vpit->pit_timer));
+ stop_timer(&(vpit->pit_timer));
reinit = 1;
}
else {
- init_ac_timer(&vpit->pit_timer, pit_timer_fn, v, v->processor);
+ init_timer(&vpit->pit_timer, pit_timer_fn, v, v->processor);
}
/* init count for this channel */
@@ -431,7 +431,7 @@ void vmx_hooks_assist(struct vcpu *v)
}
vpit->scheduled = NOW() + vpit->period;
- set_ac_timer(&vpit->pit_timer, vpit->scheduled);
+ set_timer(&vpit->pit_timer, vpit->scheduled);
/*restore the state*/
p->state = STATE_IORESP_READY;
diff --git a/xen/arch/x86/vmx_io.c b/xen/arch/x86/vmx_io.c
index b7689228bf..c979a8d741 100644
--- a/xen/arch/x86/vmx_io.c
+++ b/xen/arch/x86/vmx_io.c
@@ -37,7 +37,7 @@
#include <asm/shadow.h>
#include <asm/vmx_vpic.h>
#include <asm/vmx_vlapic.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#ifdef CONFIG_VMX
#if defined (__i386__)
@@ -819,7 +819,7 @@ interrupt_post_injection(struct vcpu * v, int vector, int type)
if ( !vpit->first_injected ) {
vpit->pending_intr_nr = 0;
vpit->scheduled = NOW() + vpit->period;
- set_ac_timer(&vpit->pit_timer, vpit->scheduled);
+ set_timer(&vpit->pit_timer, vpit->scheduled);
vpit->first_injected = 1;
} else {
vpit->pending_intr_nr--;
diff --git a/xen/arch/x86/vmx_platform.c b/xen/arch/x86/vmx_platform.c
index 2ee14c65ec..45d1e0052b 100644
--- a/xen/arch/x86/vmx_platform.c
+++ b/xen/arch/x86/vmx_platform.c
@@ -27,7 +27,7 @@
#include <xen/trace.h>
#include <asm/vmx.h>
#include <asm/vmx_platform.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#include <xen/lib.h>
#include <xen/sched.h>
diff --git a/xen/arch/x86/vmx_vlapic.c b/xen/arch/x86/vmx_vlapic.c
index fa1dc2118d..d487f9739e 100644
--- a/xen/arch/x86/vmx_vlapic.c
+++ b/xen/arch/x86/vmx_vlapic.c
@@ -32,7 +32,7 @@
#include <xen/lib.h>
#include <xen/sched.h>
#include <asm/current.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#ifdef CONFIG_VMX
@@ -62,7 +62,7 @@ int vlapic_find_highest_irr(struct vlapic *vlapic)
int vmx_apic_support(struct domain *d)
{
- return d->arch.vmx_platform.lapic_enable;
+ return d->arch.vmx_platform.apic_enabled;
}
s_time_t get_apictime_scheduled(struct vcpu *v)
@@ -391,7 +391,7 @@ static void vlapic_begin_timer(struct vlapic *vlapic)
(262144 / get_apic_bus_scale()) * vlapic->timer_divide_counter;
vlapic->vlapic_timer.expires = cur + offset;
- set_ac_timer(&(vlapic->vlapic_timer), vlapic->vlapic_timer.expires );
+ set_timer(&(vlapic->vlapic_timer), vlapic->vlapic_timer.expires );
VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_begin_timer: "
"bus_scale %x now %08x%08x expire %08x%08x "
@@ -739,7 +739,7 @@ static void vlapic_write(struct vcpu *v, unsigned long address,
case APIC_TMICT:
if (vlapic_timer_active(vlapic))
- rem_ac_timer(&(vlapic->vlapic_timer));
+ stop_timer(&(vlapic->vlapic_timer));
vlapic->timer_initial = val;
vlapic->timer_current = val;
@@ -846,7 +846,7 @@ void vlapic_timer_fn(void *data)
vlapic->timer_current = vlapic->timer_initial;
offset = vlapic->timer_current * (262144/get_apic_bus_scale()) * vlapic->timer_divide_counter;
vlapic->vlapic_timer.expires = NOW() + offset;
- set_ac_timer(&(vlapic->vlapic_timer), vlapic->vlapic_timer.expires);
+ set_timer(&(vlapic->vlapic_timer), vlapic->vlapic_timer.expires);
}else {
vlapic->timer_current = 0;
}
@@ -986,7 +986,7 @@ static int vlapic_reset(struct vlapic *vlapic)
vmx_vioapic_add_lapic(vlapic, v);
- init_ac_timer(&vlapic->vlapic_timer,
+ init_timer(&vlapic->vlapic_timer,
vlapic_timer_fn, vlapic, v->processor);
#ifdef VLAPIC_NO_BIOS
diff --git a/xen/arch/x86/vmx_vmcs.c b/xen/arch/x86/vmx_vmcs.c
index 17eb2caad3..9b7c9d41d2 100644
--- a/xen/arch/x86/vmx_vmcs.c
+++ b/xen/arch/x86/vmx_vmcs.c
@@ -32,7 +32,7 @@
#include <asm/flushtlb.h>
#include <xen/event.h>
#include <xen/kernel.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/hvm_info_table.h>
#if CONFIG_PAGING_LEVELS >= 4
#include <asm/shadow_64.h>
#endif
@@ -193,7 +193,7 @@ static void vmx_map_io_shared_page(struct domain *d)
domain_crash_synchronous();
}
- p = map_domain_page(mpfn);
+ p = map_domain_page_global(mpfn);
if (p == NULL) {
printk("Can not map io request shared page for VMX domain.\n");
domain_crash_synchronous();
@@ -206,35 +206,55 @@ static void vmx_map_io_shared_page(struct domain *d)
&d->shared_info->evtchn_mask[0]);
}
-#define VCPU_NR_PAGE 0x0009F000
-#define VCPU_NR_OFFSET 0x00000800
-#define VCPU_MAGIC 0x76637075 /* "vcpu" */
+static int validate_hvm_info(struct hvm_info_table *t)
+{
+ char signature[] = "HVM INFO";
+ uint8_t *ptr = (uint8_t *)t;
+ uint8_t sum = 0;
+ int i;
+
+ /* strncmp(t->signature, "HVM INFO", 8) */
+ for ( i = 0; i < 8; i++ ) {
+ if ( signature[i] != t->signature[i] ) {
+ printk("Bad hvm info signature\n");
+ return 0;
+ }
+ }
+
+ for ( i = 0; i < t->length; i++ )
+ sum += ptr[i];
-static void vmx_set_vcpu_nr(struct domain *d)
+ return (sum == 0);
+}
+
+static void vmx_get_hvm_info(struct domain *d)
{
unsigned char *p;
unsigned long mpfn;
- unsigned int *vcpus;
+ struct hvm_info_table *t;
- mpfn = get_mfn_from_pfn(VCPU_NR_PAGE >> PAGE_SHIFT);
- if (mpfn == INVALID_MFN) {
- printk("Can not get vcpu number page mfn for VMX domain.\n");
+ mpfn = get_mfn_from_pfn(HVM_INFO_PFN);
+ if ( mpfn == INVALID_MFN ) {
+ printk("Can not get hvm info page mfn for VMX domain.\n");
domain_crash_synchronous();
}
p = map_domain_page(mpfn);
- if (p == NULL) {
- printk("Can not map vcpu number page for VMX domain.\n");
+ if ( p == NULL ) {
+ printk("Can not map hvm info page for VMX domain.\n");
domain_crash_synchronous();
}
- vcpus = (unsigned int *)(p + VCPU_NR_OFFSET);
- if (vcpus[0] != VCPU_MAGIC) {
- printk("Bad vcpus magic, set vcpu number to 1 by default.\n");
- d->arch.vmx_platform.nr_vcpu = 1;
- }
+ t = (struct hvm_info_table *)(p + HVM_INFO_OFFSET);
- d->arch.vmx_platform.nr_vcpu = vcpus[1];
+ if ( validate_hvm_info(t) ) {
+ d->arch.vmx_platform.nr_vcpus = t->nr_vcpus;
+ d->arch.vmx_platform.apic_enabled = t->apic_enabled;
+ } else {
+ printk("Bad hvm info table\n");
+ d->arch.vmx_platform.nr_vcpus = 1;
+ d->arch.vmx_platform.apic_enabled = 0;
+ }
unmap_domain_page(p);
}
@@ -244,10 +264,10 @@ static void vmx_setup_platform(struct domain* d)
struct vmx_platform *platform;
vmx_map_io_shared_page(d);
- vmx_set_vcpu_nr(d);
+ vmx_get_hvm_info(d);
platform = &d->arch.vmx_platform;
- pic_init(&platform->vmx_pic, pic_irq_request,
+ pic_init(&platform->vmx_pic, pic_irq_request,
&platform->interrupt_request);
register_pic_io_hook();
@@ -321,7 +341,7 @@ static void vmx_do_launch(struct vcpu *v)
vlapic_init(v);
vmx_set_host_env(v);
- init_ac_timer(&v->arch.arch_vmx.hlt_timer, hlt_timer_fn, v, v->processor);
+ init_timer(&v->arch.arch_vmx.hlt_timer, hlt_timer_fn, v, v->processor);
error |= __vmwrite(GUEST_LDTR_SELECTOR, 0);
error |= __vmwrite(GUEST_LDTR_BASE, 0);
@@ -335,6 +355,8 @@ static void vmx_do_launch(struct vcpu *v)
__vmwrite(HOST_RSP, (unsigned long)get_stack_bottom());
v->arch.schedule_tail = arch_vmx_do_resume;
+ v->arch.arch_vmx.launch_cpu = smp_processor_id();
+
/* init guest tsc to start from 0 */
rdtscll(host_tsc);
v->arch.arch_vmx.tsc_offset = 0 - host_tsc;
@@ -617,11 +639,21 @@ void vm_resume_fail(unsigned long eflags)
void arch_vmx_do_resume(struct vcpu *v)
{
- u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
-
- load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
- vmx_do_resume(v);
- reset_stack_and_jump(vmx_asm_do_resume);
+ if ( v->arch.arch_vmx.launch_cpu == smp_processor_id() )
+ {
+ load_vmcs(&v->arch.arch_vmx, virt_to_phys(v->arch.arch_vmx.vmcs));
+ vmx_do_resume(v);
+ reset_stack_and_jump(vmx_asm_do_resume);
+ }
+ else
+ {
+ __vmpclear(virt_to_phys(v->arch.arch_vmx.vmcs));
+ load_vmcs(&v->arch.arch_vmx, virt_to_phys(v->arch.arch_vmx.vmcs));
+ vmx_do_resume(v);
+ vmx_set_host_env(v);
+ v->arch.arch_vmx.launch_cpu = smp_processor_id();
+ reset_stack_and_jump(vmx_asm_do_relaunch);
+ }
}
void arch_vmx_do_launch(struct vcpu *v)
@@ -643,18 +675,6 @@ void arch_vmx_do_launch(struct vcpu *v)
reset_stack_and_jump(vmx_asm_do_launch);
}
-void arch_vmx_do_relaunch(struct vcpu *v)
-{
- u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
-
- load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
- vmx_do_resume(v);
- vmx_set_host_env(v);
- v->arch.schedule_tail = arch_vmx_do_resume;
-
- reset_stack_and_jump(vmx_asm_do_relaunch);
-}
-
#endif /* CONFIG_VMX */
/*
diff --git a/xen/arch/x86/x86_32/asm-offsets.c b/xen/arch/x86/x86_32/asm-offsets.c
index 3a5c3ef9f8..42bef57240 100644
--- a/xen/arch/x86/x86_32/asm-offsets.c
+++ b/xen/arch/x86/x86_32/asm-offsets.c
@@ -65,6 +65,10 @@ void __dummy__(void)
arch.guest_context.kernel_ss);
OFFSET(VCPU_kernel_sp, struct vcpu,
arch.guest_context.kernel_sp);
+ OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
+ OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
+ DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
+ DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
BLANK();
OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending);
diff --git a/xen/arch/x86/x86_32/domain_page.c b/xen/arch/x86/x86_32/domain_page.c
index f7c194b775..222e813693 100644
--- a/xen/arch/x86/x86_32/domain_page.c
+++ b/xen/arch/x86/x86_32/domain_page.c
@@ -1,14 +1,9 @@
/******************************************************************************
* domain_page.h
*
- * Allow temporary mapping of domain pages. Based on ideas from the
- * Linux PKMAP code -- the copyrights and credits are retained below.
- */
-
-/*
- * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
- * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de *
- * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ * Allow temporary mapping of domain pages.
+ *
+ * Copyright (c) 2003-2006, Keir Fraser <keir@xensource.com>
*/
#include <xen/config.h>
@@ -20,80 +15,203 @@
#include <asm/flushtlb.h>
#include <asm/hardirq.h>
-#define MAPCACHE_ORDER 10
-#define MAPCACHE_ENTRIES (1 << MAPCACHE_ORDER)
-
-l1_pgentry_t *mapcache;
-static unsigned int map_idx, epoch, shadow_epoch[NR_CPUS];
-static spinlock_t map_lock = SPIN_LOCK_UNLOCKED;
-
-/* Use a spare PTE bit to mark entries ready for recycling. */
-#define READY_FOR_TLB_FLUSH (1<<10)
-
-static void flush_all_ready_maps(void)
-{
- l1_pgentry_t *cache = mapcache;
- unsigned int i;
-
- for ( i = 0; i < MAPCACHE_ENTRIES; i++ )
- if ( (l1e_get_flags(cache[i]) & READY_FOR_TLB_FLUSH) )
- cache[i] = l1e_empty();
-}
-
-void *map_domain_pages(unsigned long pfn, unsigned int order)
+void *map_domain_page(unsigned long pfn)
{
unsigned long va;
- unsigned int idx, i, flags, cpu = smp_processor_id();
- l1_pgentry_t *cache = mapcache;
-#ifndef NDEBUG
- unsigned int flush_count = 0;
-#endif
+ unsigned int idx, i, vcpu = current->vcpu_id;
+ struct domain *d;
+ struct mapcache *cache;
+ struct vcpu_maphash_entry *hashent;
ASSERT(!in_irq());
+
perfc_incrc(map_domain_page_count);
- spin_lock(&map_lock);
+ /* If we are the idle domain, ensure that we run on our own page tables. */
+ d = current->domain;
+ if ( unlikely(is_idle_domain(d)) )
+ __sync_lazy_execstate();
- /* Has some other CPU caused a wrap? We must flush if so. */
- if ( epoch != shadow_epoch[cpu] )
+ cache = &d->arch.mapcache;
+
+ hashent = &cache->vcpu_maphash[vcpu].hash[MAPHASH_HASHFN(pfn)];
+ if ( hashent->pfn == pfn )
{
- perfc_incrc(domain_page_tlb_flush);
- local_flush_tlb();
- shadow_epoch[cpu] = epoch;
+ idx = hashent->idx;
+ hashent->refcnt++;
+ ASSERT(hashent->refcnt != 0);
+ ASSERT(l1e_get_pfn(cache->l1tab[idx]) == pfn);
+ goto out;
}
- do {
- idx = map_idx = (map_idx + 1) & (MAPCACHE_ENTRIES - 1);
- if ( unlikely(idx == 0) )
+ spin_lock(&cache->lock);
+
+ /* Has some other CPU caused a wrap? We must flush if so. */
+ if ( unlikely(cache->epoch != cache->shadow_epoch[vcpu]) )
+ {
+ cache->shadow_epoch[vcpu] = cache->epoch;
+ if ( NEED_FLUSH(tlbflush_time[smp_processor_id()],
+ cache->tlbflush_timestamp) )
{
- ASSERT(flush_count++ == 0);
- flush_all_ready_maps();
perfc_incrc(domain_page_tlb_flush);
local_flush_tlb();
- shadow_epoch[cpu] = ++epoch;
+ }
+ }
+
+ idx = find_next_zero_bit(cache->inuse, MAPCACHE_ENTRIES, cache->cursor);
+ if ( unlikely(idx >= MAPCACHE_ENTRIES) )
+ {
+ /* /First/, clean the garbage map and update the inuse list. */
+ for ( i = 0; i < ARRAY_SIZE(cache->garbage); i++ )
+ {
+ unsigned long x = xchg(&cache->garbage[i], 0);
+ cache->inuse[i] &= ~x;
}
- flags = 0;
- for ( i = 0; i < (1U << order); i++ )
- flags |= l1e_get_flags(cache[idx+i]);
+ /* /Second/, flush TLBs. */
+ perfc_incrc(domain_page_tlb_flush);
+ local_flush_tlb();
+ cache->shadow_epoch[vcpu] = ++cache->epoch;
+ cache->tlbflush_timestamp = tlbflush_current_time();
+
+ idx = find_first_zero_bit(cache->inuse, MAPCACHE_ENTRIES);
+ ASSERT(idx < MAPCACHE_ENTRIES);
}
- while ( flags & _PAGE_PRESENT );
- for ( i = 0; i < (1U << order); i++ )
- cache[idx+i] = l1e_from_pfn(pfn+i, __PAGE_HYPERVISOR);
+ set_bit(idx, cache->inuse);
+ cache->cursor = idx + 1;
+
+ spin_unlock(&cache->lock);
- spin_unlock(&map_lock);
+ cache->l1tab[idx] = l1e_from_pfn(pfn, __PAGE_HYPERVISOR);
+ out:
va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT);
return (void *)va;
}
-void unmap_domain_pages(void *va, unsigned int order)
+void unmap_domain_page(void *va)
{
- unsigned int idx, i;
+ unsigned int idx;
+ struct mapcache *cache = &current->domain->arch.mapcache;
+ unsigned long pfn;
+ struct vcpu_maphash_entry *hashent;
+
+ ASSERT(!in_irq());
+
ASSERT((void *)MAPCACHE_VIRT_START <= va);
ASSERT(va < (void *)MAPCACHE_VIRT_END);
+
idx = ((unsigned long)va - MAPCACHE_VIRT_START) >> PAGE_SHIFT;
- for ( i = 0; i < (1U << order); i++ )
- l1e_add_flags(mapcache[idx+i], READY_FOR_TLB_FLUSH);
+ pfn = l1e_get_pfn(cache->l1tab[idx]);
+ hashent = &cache->vcpu_maphash[current->vcpu_id].hash[MAPHASH_HASHFN(pfn)];
+
+ if ( hashent->idx == idx )
+ {
+ ASSERT(hashent->pfn == pfn);
+ ASSERT(hashent->refcnt != 0);
+ hashent->refcnt--;
+ }
+ else if ( hashent->refcnt == 0 )
+ {
+ if ( hashent->idx != MAPHASHENT_NOTINUSE )
+ {
+ /* /First/, zap the PTE. */
+ ASSERT(l1e_get_pfn(cache->l1tab[hashent->idx]) == hashent->pfn);
+ cache->l1tab[hashent->idx] = l1e_empty();
+ /* /Second/, mark as garbage. */
+ set_bit(hashent->idx, cache->garbage);
+ }
+
+ /* Add newly-freed mapping to the maphash. */
+ hashent->pfn = pfn;
+ hashent->idx = idx;
+ }
+ else
+ {
+ /* /First/, zap the PTE. */
+ cache->l1tab[idx] = l1e_empty();
+ /* /Second/, mark as garbage. */
+ set_bit(idx, cache->garbage);
+ }
+}
+
+void mapcache_init(struct domain *d)
+{
+ unsigned int i, j;
+
+ d->arch.mapcache.l1tab = d->arch.mm_perdomain_pt +
+ (GDT_LDT_MBYTES << (20 - PAGE_SHIFT));
+ spin_lock_init(&d->arch.mapcache.lock);
+
+ /* Mark all maphash entries as not in use. */
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ for ( j = 0; j < MAPHASH_ENTRIES; j++ )
+ d->arch.mapcache.vcpu_maphash[i].hash[j].idx =
+ MAPHASHENT_NOTINUSE;
+}
+
+#define GLOBALMAP_BITS (IOREMAP_MBYTES << (20 - PAGE_SHIFT))
+static unsigned long inuse[BITS_TO_LONGS(GLOBALMAP_BITS)];
+static unsigned long garbage[BITS_TO_LONGS(GLOBALMAP_BITS)];
+static unsigned int inuse_cursor;
+static spinlock_t globalmap_lock = SPIN_LOCK_UNLOCKED;
+
+void *map_domain_page_global(unsigned long pfn)
+{
+ l2_pgentry_t *pl2e;
+ l1_pgentry_t *pl1e;
+ unsigned int idx, i;
+ unsigned long va;
+
+ ASSERT(!in_irq() && local_irq_is_enabled());
+
+ spin_lock(&globalmap_lock);
+
+ idx = find_next_zero_bit(inuse, GLOBALMAP_BITS, inuse_cursor);
+ va = IOREMAP_VIRT_START + (idx << PAGE_SHIFT);
+ if ( unlikely(va >= FIXADDR_START) )
+ {
+ /* /First/, clean the garbage map and update the inuse list. */
+ for ( i = 0; i < ARRAY_SIZE(garbage); i++ )
+ {
+ unsigned long x = xchg(&garbage[i], 0);
+ inuse[i] &= ~x;
+ }
+
+ /* /Second/, flush all TLBs to get rid of stale garbage mappings. */
+ flush_tlb_all();
+
+ idx = find_first_zero_bit(inuse, GLOBALMAP_BITS);
+ va = IOREMAP_VIRT_START + (idx << PAGE_SHIFT);
+ ASSERT(va < FIXADDR_START);
+ }
+
+ set_bit(idx, inuse);
+ inuse_cursor = idx + 1;
+
+ spin_unlock(&globalmap_lock);
+
+ pl2e = virt_to_xen_l2e(va);
+ pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(va);
+ *pl1e = l1e_from_pfn(pfn, __PAGE_HYPERVISOR);
+
+ return (void *)va;
+}
+
+void unmap_domain_page_global(void *va)
+{
+ unsigned long __va = (unsigned long)va;
+ l2_pgentry_t *pl2e;
+ l1_pgentry_t *pl1e;
+ unsigned int idx;
+
+ /* /First/, we zap the PTE. */
+ pl2e = virt_to_xen_l2e(__va);
+ pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(__va);
+ *pl1e = l1e_empty();
+
+ /* /Second/, we add to the garbage map. */
+ idx = (__va - IOREMAP_VIRT_START) >> PAGE_SHIFT;
+ set_bit(idx, garbage);
}
diff --git a/xen/arch/x86/x86_32/entry.S b/xen/arch/x86/x86_32/entry.S
index b890103160..e178d7383e 100644
--- a/xen/arch/x86/x86_32/entry.S
+++ b/xen/arch/x86/x86_32/entry.S
@@ -326,7 +326,9 @@ test_all_events:
shl $IRQSTAT_shift,%eax
test %ecx,irq_stat(%eax,1)
jnz process_softirqs
-/*test_guest_events:*/
+ btr $_VCPUF_nmi_pending,VCPU_flags(%ebx)
+ jc process_nmi
+test_guest_events:
movl VCPU_vcpu_info(%ebx),%eax
testb $0xFF,VCPUINFO_upcall_mask(%eax)
jnz restore_all_guest
@@ -348,7 +350,24 @@ process_softirqs:
sti
call do_softirq
jmp test_all_events
-
+
+ ALIGN
+process_nmi:
+ movl VCPU_nmi_addr(%ebx),%eax
+ test %eax,%eax
+ jz test_all_events
+ bts $_VCPUF_nmi_masked,VCPU_flags(%ebx)
+ jc 1f
+ sti
+ leal VCPU_trap_bounce(%ebx),%edx
+ movl %eax,TRAPBOUNCE_eip(%edx)
+ movw $FLAT_KERNEL_CS,TRAPBOUNCE_cs(%edx)
+ movw $TBF_INTERRUPT,TRAPBOUNCE_flags(%edx)
+ call create_bounce_frame
+ jmp test_all_events
+1: bts $_VCPUF_nmi_pending,VCPU_flags(%ebx)
+ jmp test_guest_events
+
/* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK: */
/* {EIP, CS, EFLAGS, [ESP, SS]} */
/* %edx == trap_bounce, %ebx == struct vcpu */
@@ -620,9 +639,7 @@ ENTRY(nmi)
jne defer_nmi
continue_nmi:
- movl $(__HYPERVISOR_DS),%edx
- movl %edx,%ds
- movl %edx,%es
+ SET_XEN_SEGMENTS(d)
movl %esp,%edx
pushl %edx
call do_nmi
@@ -660,42 +677,6 @@ do_arch_sched_op:
movl %eax,UREGS_eax(%ecx)
jmp do_sched_op
-do_switch_vm86:
- # Reset the stack pointer
- GET_GUEST_REGS(%ecx)
- movl %ecx,%esp
-
- # GS:ESI == Ring-1 stack activation
- movl UREGS_esp(%esp),%esi
-VFLT1: mov UREGS_ss(%esp),%gs
-
- # ES:EDI == Ring-0 stack activation
- leal UREGS_eip(%esp),%edi
-
- # Restore the hypercall-number-clobbered EAX on our stack frame
-VFLT2: movl %gs:(%esi),%eax
- movl %eax,UREGS_eax(%esp)
- addl $4,%esi
-
- # Copy the VM86 activation from the ring-1 stack to the ring-0 stack
- movl $(UREGS_user_sizeof-UREGS_eip)/4,%ecx
-VFLT3: movl %gs:(%esi),%eax
- stosl
- addl $4,%esi
- loop VFLT3
-
- # Fix up EFLAGS: IOPL=0, IF=1, VM=1
- andl $~X86_EFLAGS_IOPL,UREGS_eflags(%esp)
- orl $X86_EFLAGS_IF|X86_EFLAGS_VM,UREGS_eflags(%esp)
-
- jmp test_all_events
-
-.section __ex_table,"a"
- .long VFLT1,domain_crash_synchronous
- .long VFLT2,domain_crash_synchronous
- .long VFLT3,domain_crash_synchronous
-.previous
-
.data
ENTRY(exception_table)
@@ -744,11 +725,12 @@ ENTRY(hypercall_table)
.long do_grant_table_op /* 20 */
.long do_vm_assist
.long do_update_va_mapping_otherdomain
- .long do_switch_vm86
+ .long do_iret
.long do_vcpu_op
.long do_ni_hypercall /* 25 */
.long do_mmuext_op
- .long do_acm_op /* 27 */
+ .long do_acm_op
+ .long do_nmi_op
.rept NR_hypercalls-((.-hypercall_table)/4)
.long do_ni_hypercall
.endr
@@ -777,11 +759,12 @@ ENTRY(hypercall_args_table)
.byte 3 /* do_grant_table_op */ /* 20 */
.byte 2 /* do_vm_assist */
.byte 5 /* do_update_va_mapping_otherdomain */
- .byte 0 /* do_switch_vm86 */
+ .byte 0 /* do_iret */
.byte 3 /* do_vcpu_op */
.byte 0 /* do_ni_hypercall */ /* 25 */
.byte 4 /* do_mmuext_op */
.byte 1 /* do_acm_op */
+ .byte 2 /* do_nmi_op */
.rept NR_hypercalls-(.-hypercall_args_table)
.byte 0 /* do_ni_hypercall */
.endr
diff --git a/xen/arch/x86/x86_32/mm.c b/xen/arch/x86/x86_32/mm.c
index 4be333f4cf..95def3f2b4 100644
--- a/xen/arch/x86/x86_32/mm.c
+++ b/xen/arch/x86/x86_32/mm.c
@@ -29,8 +29,6 @@
#include <asm/fixmap.h>
#include <public/memory.h>
-extern l1_pgentry_t *mapcache;
-
unsigned int PAGE_HYPERVISOR = __PAGE_HYPERVISOR;
unsigned int PAGE_HYPERVISOR_NOCACHE = __PAGE_HYPERVISOR_NOCACHE;
@@ -68,7 +66,7 @@ void __init paging_init(void)
void *ioremap_pt;
unsigned long v;
struct pfn_info *pg;
- int i, mapcache_order;
+ int i;
#ifdef CONFIG_X86_PAE
printk("PAE enabled, limit: %d GB\n", MACHPHYS_MBYTES);
@@ -76,7 +74,7 @@ void __init paging_init(void)
printk("PAE disabled.\n");
#endif
- idle0_vcpu.arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
+ idle_vcpu[0]->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
if ( cpu_has_pge )
{
@@ -121,14 +119,12 @@ void __init paging_init(void)
l2e_from_page(virt_to_page(ioremap_pt), __PAGE_HYPERVISOR);
}
- /* Set up mapping cache for domain pages. */
- mapcache_order = get_order_from_bytes(
- MAPCACHE_MBYTES << (20 - PAGETABLE_ORDER));
- mapcache = alloc_xenheap_pages(mapcache_order);
- memset(mapcache, 0, PAGE_SIZE << mapcache_order);
- for ( i = 0; i < (MAPCACHE_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
- idle_pg_table_l2[l2_linear_offset(MAPCACHE_VIRT_START) + i] =
- l2e_from_page(virt_to_page(mapcache) + i, __PAGE_HYPERVISOR);
+ /* Install per-domain mappings for idle domain. */
+ for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+ idle_pg_table_l2[l2_linear_offset(PERDOMAIN_VIRT_START) + i] =
+ l2e_from_page(virt_to_page(idle_vcpu[0]->domain->
+ arch.mm_perdomain_pt) + i,
+ __PAGE_HYPERVISOR);
}
void __init zap_low_mappings(l2_pgentry_t *base)
diff --git a/xen/arch/x86/x86_32/traps.c b/xen/arch/x86/x86_32/traps.c
index cb2b7b9eaa..95b69a14bd 100644
--- a/xen/arch/x86/x86_32/traps.c
+++ b/xen/arch/x86/x86_32/traps.c
@@ -157,6 +157,64 @@ asmlinkage void do_double_fault(void)
__asm__ __volatile__ ( "hlt" );
}
+asmlinkage unsigned long do_iret(void)
+{
+ struct cpu_user_regs *regs = guest_cpu_user_regs();
+ u32 eflags;
+
+ /* Check worst-case stack frame for overlap with Xen protected area. */
+ if ( unlikely(!access_ok(regs->esp, 40)) )
+ domain_crash_synchronous();
+
+ /* Pop and restore EAX (clobbered by hypercall). */
+ if ( unlikely(__copy_from_user(&regs->eax, (void __user *)regs->esp, 4)) )
+ domain_crash_synchronous();
+ regs->esp += 4;
+
+ /* Pop and restore CS and EIP. */
+ if ( unlikely(__copy_from_user(&regs->eip, (void __user *)regs->esp, 8)) )
+ domain_crash_synchronous();
+ regs->esp += 8;
+
+ /*
+ * Pop, fix up and restore EFLAGS. We fix up in a local staging area
+ * to avoid firing the BUG_ON(IOPL) check in arch_getdomaininfo_ctxt.
+ */
+ if ( unlikely(__copy_from_user(&eflags, (void __user *)regs->esp, 4)) )
+ domain_crash_synchronous();
+ regs->esp += 4;
+ regs->eflags = (eflags & ~X86_EFLAGS_IOPL) | X86_EFLAGS_IF;
+
+ if ( VM86_MODE(regs) )
+ {
+ /* Return to VM86 mode: pop and restore ESP,SS,ES,DS,FS and GS. */
+ if ( __copy_from_user(&regs->esp, (void __user *)regs->esp, 24) )
+ domain_crash_synchronous();
+ }
+ else if ( unlikely(RING_0(regs)) )
+ {
+ domain_crash_synchronous();
+ }
+ else if ( !RING_1(regs) )
+ {
+ /* Return to ring 2/3: pop and restore ESP and SS. */
+ if ( __copy_from_user(&regs->esp, (void __user *)regs->esp, 8) )
+ domain_crash_synchronous();
+ }
+
+ /* No longer in NMI context. */
+ clear_bit(_VCPUF_nmi_masked, &current->vcpu_flags);
+
+ /* Restore upcall mask from saved value. */
+ current->vcpu_info->evtchn_upcall_mask = regs->saved_upcall_mask;
+
+ /*
+ * The hypercall exit path will overwrite EAX with this return
+ * value.
+ */
+ return regs->eax;
+}
+
BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi)
asmlinkage void smp_deferred_nmi(struct cpu_user_regs regs)
{
diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
index c7a3e6025c..0aa20ccabb 100644
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -65,6 +65,10 @@ void __dummy__(void)
arch.guest_context.syscall_callback_eip);
OFFSET(VCPU_kernel_sp, struct vcpu,
arch.guest_context.kernel_sp);
+ OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
+ OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
+ DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
+ DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
BLANK();
OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending);
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index 3c5c344a1a..88fe273bab 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -171,7 +171,9 @@ test_all_events:
leaq irq_stat(%rip),%rcx
testl $~0,(%rcx,%rax,1)
jnz process_softirqs
-/*test_guest_events:*/
+ btr $_VCPUF_nmi_pending,VCPU_flags(%rbx)
+ jc process_nmi
+test_guest_events:
movq VCPU_vcpu_info(%rbx),%rax
testb $0xFF,VCPUINFO_upcall_mask(%rax)
jnz restore_all_guest
@@ -322,6 +324,23 @@ process_softirqs:
call do_softirq
jmp test_all_events
+ ALIGN
+/* %rbx: struct vcpu */
+process_nmi:
+ movq VCPU_nmi_addr(%rbx),%rax
+ test %rax,%rax
+ jz test_all_events
+ bts $_VCPUF_nmi_masked,VCPU_flags(%rbx)
+ jc 1f
+ sti
+ leaq VCPU_trap_bounce(%rbx),%rdx
+ movq %rax,TRAPBOUNCE_eip(%rdx)
+ movw $(TBF_INTERRUPT|TBF_SLOW_IRET),TRAPBOUNCE_flags(%rdx)
+ call create_bounce_frame
+ jmp test_all_events
+1: bts $_VCPUF_nmi_pending,VCPU_flags(%rbx)
+ jmp test_guest_events
+
/* CREATE A BASIC EXCEPTION FRAME ON GUEST OS STACK: */
/* { RCX, R11, [DS-GS,] [CR2,] [ERRCODE,] RIP, CS, RFLAGS, RSP, SS } */
/* %rdx: trap_bounce, %rbx: struct vcpu */
@@ -339,6 +358,9 @@ create_bounce_frame:
1: /* In kernel context already: push new frame at existing %rsp. */
movq UREGS_rsp+8(%rsp),%rsi
andb $0xfc,UREGS_cs+8(%rsp) # Indicate kernel context to guest.
+ testw $(TBF_SLOW_IRET),TRAPBOUNCE_flags(%rdx)
+ jz 2f
+ orb $0x01,UREGS_cs+8(%rsp)
2: andq $~0xf,%rsi # Stack frames are 16-byte aligned.
movq $HYPERVISOR_VIRT_START,%rax
cmpq %rax,%rsi
@@ -569,7 +591,7 @@ ENTRY(nmi)
SAVE_ALL
movq %rsp,%rdi
call do_nmi
- jmp restore_all_xen
+ jmp ret_from_intr
do_arch_sched_op:
# Ensure we return success even if we return via schedule_tail()
@@ -626,11 +648,12 @@ ENTRY(hypercall_table)
.quad do_grant_table_op /* 20 */
.quad do_vm_assist
.quad do_update_va_mapping_otherdomain
- .quad do_switch_to_user
+ .quad do_iret
.quad do_vcpu_op
.quad do_set_segment_base /* 25 */
.quad do_mmuext_op
.quad do_acm_op
+ .quad do_nmi_op
.rept NR_hypercalls-((.-hypercall_table)/4)
.quad do_ni_hypercall
.endr
@@ -659,11 +682,12 @@ ENTRY(hypercall_args_table)
.byte 3 /* do_grant_table_op */ /* 20 */
.byte 2 /* do_vm_assist */
.byte 4 /* do_update_va_mapping_otherdomain */
- .byte 0 /* do_switch_to_user */
+ .byte 0 /* do_iret */
.byte 3 /* do_vcpu_op */
.byte 2 /* do_set_segment_base */ /* 25 */
.byte 4 /* do_mmuext_op */
.byte 1 /* do_acm_op */
+ .byte 2 /* do_nmi_op */
.rept NR_hypercalls-(.-hypercall_args_table)
.byte 0 /* do_ni_hypercall */
.endr
diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index 08e0f88bb8..085fb4d22e 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -80,7 +80,7 @@ void __init paging_init(void)
l2_pgentry_t *l2_ro_mpt;
struct pfn_info *pg;
- idle0_vcpu.arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
+ idle_vcpu[0]->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
/* Create user-accessible L2 directory to map the MPT for guests. */
l3_ro_mpt = alloc_xenheap_page();
@@ -119,6 +119,12 @@ void __init paging_init(void)
/* Set up linear page table mapping. */
idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)] =
l4e_from_paddr(__pa(idle_pg_table), __PAGE_HYPERVISOR);
+
+ /* Install per-domain mappings for idle domain. */
+ idle_pg_table[l4_table_offset(PERDOMAIN_VIRT_START)] =
+ l4e_from_page(
+ virt_to_page(idle_vcpu[0]->domain->arch.mm_perdomain_l3),
+ __PAGE_HYPERVISOR);
}
void __init zap_low_mappings(void)
diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c
index 4f7c822ef8..9756c54589 100644
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -12,6 +12,7 @@
#include <asm/current.h>
#include <asm/flushtlb.h>
#include <asm/msr.h>
+#include <asm/shadow.h>
#include <asm/vmx.h>
void show_registers(struct cpu_user_regs *regs)
@@ -113,6 +114,52 @@ asmlinkage void do_double_fault(struct cpu_user_regs *regs)
__asm__ __volatile__ ( "hlt" );
}
+void toggle_guest_mode(struct vcpu *v)
+{
+ v->arch.flags ^= TF_kernel_mode;
+ __asm__ __volatile__ ( "swapgs" );
+ update_pagetables(v);
+ write_ptbase(v);
+}
+
+long do_iret(void)
+{
+ struct cpu_user_regs *regs = guest_cpu_user_regs();
+ struct iret_context iret_saved;
+ struct vcpu *v = current;
+
+ if ( unlikely(copy_from_user(&iret_saved, (void *)regs->rsp,
+ sizeof(iret_saved))) )
+ domain_crash_synchronous();
+
+ /* Returning to user mode? */
+ if ( (iret_saved.cs & 3) == 3 )
+ {
+ if ( unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) )
+ return -EFAULT;
+ toggle_guest_mode(v);
+ }
+
+ regs->rip = iret_saved.rip;
+ regs->cs = iret_saved.cs | 3; /* force guest privilege */
+ regs->rflags = (iret_saved.rflags & ~(EF_IOPL|EF_VM)) | EF_IE;
+ regs->rsp = iret_saved.rsp;
+ regs->ss = iret_saved.ss | 3; /* force guest privilege */
+
+ if ( !(iret_saved.flags & VGCF_IN_SYSCALL) )
+ {
+ regs->entry_vector = 0;
+ regs->r11 = iret_saved.r11;
+ regs->rcx = iret_saved.rcx;
+ }
+
+ /* No longer in NMI context. */
+ clear_bit(_VCPUF_nmi_masked, &current->vcpu_flags);
+
+ /* Saved %rax gets written back to regs->rax in entry.S. */
+ return iret_saved.rax;
+}
+
asmlinkage void syscall_enter(void);
void __init percpu_traps_init(void)
{
diff --git a/xen/common/bitmap.c b/xen/common/bitmap.c
index d931eca83c..ea2da85c77 100644
--- a/xen/common/bitmap.c
+++ b/xen/common/bitmap.c
@@ -282,6 +282,111 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
#endif
EXPORT_SYMBOL(__bitmap_weight);
+/*
+ * Bitmap printing & parsing functions: first version by Bill Irwin,
+ * second version by Paul Jackson, third by Joe Korty.
+ */
+
+#define CHUNKSZ 32
+#define nbits_to_hold_value(val) fls(val)
+#define roundup_power2(val,modulus) (((val) + (modulus) - 1) & ~((modulus) - 1))
+#define unhex(c) (isdigit(c) ? (c - '0') : (toupper(c) - 'A' + 10))
+#define BASEDEC 10 /* fancier cpuset lists input in decimal */
+
+/**
+ * bitmap_scnprintf - convert bitmap to an ASCII hex string.
+ * @buf: byte buffer into which string is placed
+ * @buflen: reserved size of @buf, in bytes
+ * @maskp: pointer to bitmap to convert
+ * @nmaskbits: size of bitmap, in bits
+ *
+ * Exactly @nmaskbits bits are displayed. Hex digits are grouped into
+ * comma-separated sets of eight digits per set.
+ */
+int bitmap_scnprintf(char *buf, unsigned int buflen,
+ const unsigned long *maskp, int nmaskbits)
+{
+ int i, word, bit, len = 0;
+ unsigned long val;
+ const char *sep = "";
+ int chunksz;
+ u32 chunkmask;
+
+ chunksz = nmaskbits & (CHUNKSZ - 1);
+ if (chunksz == 0)
+ chunksz = CHUNKSZ;
+
+ i = roundup_power2(nmaskbits, CHUNKSZ) - CHUNKSZ;
+ for (; i >= 0; i -= CHUNKSZ) {
+ chunkmask = ((1ULL << chunksz) - 1);
+ word = i / BITS_PER_LONG;
+ bit = i % BITS_PER_LONG;
+ val = (maskp[word] >> bit) & chunkmask;
+ len += scnprintf(buf+len, buflen-len, "%s%0*lx", sep,
+ (chunksz+3)/4, val);
+ chunksz = CHUNKSZ;
+ sep = ",";
+ }
+ return len;
+}
+EXPORT_SYMBOL(bitmap_scnprintf);
+
+/*
+ * bscnl_emit(buf, buflen, rbot, rtop, bp)
+ *
+ * Helper routine for bitmap_scnlistprintf(). Write decimal number
+ * or range to buf, suppressing output past buf+buflen, with optional
+ * comma-prefix. Return len of what would be written to buf, if it
+ * all fit.
+ */
+static inline int bscnl_emit(char *buf, int buflen, int rbot, int rtop, int len)
+{
+ if (len > 0)
+ len += scnprintf(buf + len, buflen - len, ",");
+ if (rbot == rtop)
+ len += scnprintf(buf + len, buflen - len, "%d", rbot);
+ else
+ len += scnprintf(buf + len, buflen - len, "%d-%d", rbot, rtop);
+ return len;
+}
+
+/**
+ * bitmap_scnlistprintf - convert bitmap to list format ASCII string
+ * @buf: byte buffer into which string is placed
+ * @buflen: reserved size of @buf, in bytes
+ * @maskp: pointer to bitmap to convert
+ * @nmaskbits: size of bitmap, in bits
+ *
+ * Output format is a comma-separated list of decimal numbers and
+ * ranges. Consecutively set bits are shown as two hyphen-separated
+ * decimal numbers, the smallest and largest bit numbers set in
+ * the range. Output format is compatible with the format
+ * accepted as input by bitmap_parselist().
+ *
+ * The return value is the number of characters which would be
+ * generated for the given input, excluding the trailing '\0', as
+ * per ISO C99.
+ */
+int bitmap_scnlistprintf(char *buf, unsigned int buflen,
+ const unsigned long *maskp, int nmaskbits)
+{
+ int len = 0;
+ /* current bit is 'cur', most recently seen range is [rbot, rtop] */
+ int cur, rbot, rtop;
+
+ rbot = cur = find_first_bit(maskp, nmaskbits);
+ while (cur < nmaskbits) {
+ rtop = cur;
+ cur = find_next_bit(maskp, nmaskbits, cur+1);
+ if (cur >= nmaskbits || cur > rtop + 1) {
+ len = bscnl_emit(buf, buflen, rbot, rtop, len);
+ rbot = cur;
+ }
+ }
+ return len;
+}
+EXPORT_SYMBOL(bitmap_scnlistprintf);
+
/**
* bitmap_find_free_region - find a contiguous aligned mem region
* @bitmap: an array of unsigned longs corresponding to the bitmap
diff --git a/xen/common/dom0_ops.c b/xen/common/dom0_ops.c
index 97cd571376..8e7754d84a 100644
--- a/xen/common/dom0_ops.c
+++ b/xen/common/dom0_ops.c
@@ -110,13 +110,13 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
switch ( op->cmd )
{
- case DOM0_SETDOMAININFO:
+ case DOM0_SETVCPUCONTEXT:
{
- struct domain *d = find_domain_by_id(op->u.setdomaininfo.domain);
+ struct domain *d = find_domain_by_id(op->u.setvcpucontext.domain);
ret = -ESRCH;
if ( d != NULL )
{
- ret = set_info_guest(d, &op->u.setdomaininfo);
+ ret = set_info_guest(d, &op->u.setvcpucontext);
put_domain(d);
}
}
@@ -284,11 +284,12 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
}
break;
- case DOM0_PINCPUDOMAIN:
+ case DOM0_SETVCPUAFFINITY:
{
- domid_t dom = op->u.pincpudomain.domain;
+ domid_t dom = op->u.setvcpuaffinity.domain;
struct domain *d = find_domain_by_id(dom);
struct vcpu *v;
+ cpumask_t new_affinity;
if ( d == NULL )
{
@@ -296,15 +297,15 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
break;
}
- if ( (op->u.pincpudomain.vcpu >= MAX_VIRT_CPUS) ||
- !d->vcpu[op->u.pincpudomain.vcpu] )
+ if ( (op->u.setvcpuaffinity.vcpu >= MAX_VIRT_CPUS) ||
+ !d->vcpu[op->u.setvcpuaffinity.vcpu] )
{
ret = -EINVAL;
put_domain(d);
break;
}
- v = d->vcpu[op->u.pincpudomain.vcpu];
+ v = d->vcpu[op->u.setvcpuaffinity.vcpu];
if ( v == NULL )
{
ret = -ESRCH;
@@ -319,22 +320,13 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
break;
}
- v->cpumap = op->u.pincpudomain.cpumap;
+ new_affinity = v->cpu_affinity;
+ memcpy(cpus_addr(new_affinity),
+ &op->u.setvcpuaffinity.cpumap,
+ min((int)(BITS_TO_LONGS(NR_CPUS) * sizeof(long)),
+ (int)sizeof(op->u.setvcpuaffinity.cpumap)));
- if ( v->cpumap == CPUMAP_RUNANYWHERE )
- {
- clear_bit(_VCPUF_cpu_pinned, &v->vcpu_flags);
- }
- else
- {
- /* pick a new cpu from the usable map */
- int new_cpu;
- new_cpu = (int)find_first_set_bit(v->cpumap) % num_online_cpus();
- vcpu_pause(v);
- vcpu_migrate_cpu(v, new_cpu);
- set_bit(_VCPUF_cpu_pinned, &v->vcpu_flags);
- vcpu_unpause(v);
- }
+ ret = vcpu_set_affinity(v, &new_affinity);
put_domain(d);
}
@@ -506,7 +498,11 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
op->u.getvcpuinfo.running = test_bit(_VCPUF_running, &v->vcpu_flags);
op->u.getvcpuinfo.cpu_time = v->cpu_time;
op->u.getvcpuinfo.cpu = v->processor;
- op->u.getvcpuinfo.cpumap = v->cpumap;
+ op->u.getvcpuinfo.cpumap = 0;
+ memcpy(&op->u.getvcpuinfo.cpumap,
+ cpus_addr(v->cpu_affinity),
+ min((int)(BITS_TO_LONGS(NR_CPUS) * sizeof(long)),
+ (int)sizeof(op->u.getvcpuinfo.cpumap)));
ret = 0;
if ( copy_to_user(u_dom0_op, op, sizeof(*op)) )
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 0f206d8e1c..34f2ed8678 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -46,12 +46,10 @@ struct domain *do_createdomain(domid_t dom_id, unsigned int cpu)
INIT_LIST_HEAD(&d->page_list);
INIT_LIST_HEAD(&d->xenpage_list);
- if ( d->domain_id == IDLE_DOMAIN_ID )
- set_bit(_DOMF_idle_domain, &d->domain_flags);
- else
+ if ( !is_idle_domain(d) )
set_bit(_DOMF_ctrl_pause, &d->domain_flags);
- if ( !is_idle_task(d) &&
+ if ( !is_idle_domain(d) &&
((evtchn_init(d) != 0) || (grant_table_create(d) != 0)) )
goto fail1;
@@ -68,7 +66,7 @@ struct domain *do_createdomain(domid_t dom_id, unsigned int cpu)
(arch_do_createdomain(v) != 0) )
goto fail3;
- if ( !is_idle_task(d) )
+ if ( !is_idle_domain(d) )
{
write_lock(&domlist_lock);
pd = &domain_list; /* NB. domain_list maintained in order of dom_id. */
@@ -173,20 +171,23 @@ static void domain_shutdown_finalise(void)
BUG_ON(d == NULL);
BUG_ON(d == current->domain);
- BUG_ON(!test_bit(_DOMF_shuttingdown, &d->domain_flags));
- BUG_ON(test_bit(_DOMF_shutdown, &d->domain_flags));
+
+ LOCK_BIGLOCK(d);
/* Make sure that every vcpu is descheduled before we finalise. */
for_each_vcpu ( d, v )
vcpu_sleep_sync(v);
- BUG_ON(!cpus_empty(d->cpumask));
+ BUG_ON(!cpus_empty(d->domain_dirty_cpumask));
sync_pagetable_state(d);
- set_bit(_DOMF_shutdown, &d->domain_flags);
- clear_bit(_DOMF_shuttingdown, &d->domain_flags);
+ /* Don't set DOMF_shutdown until execution contexts are sync'ed. */
+ if ( !test_and_set_bit(_DOMF_shutdown, &d->domain_flags) )
+ send_guest_virq(dom0->vcpu[0], VIRQ_DOM_EXC);
- send_guest_virq(dom0->vcpu[0], VIRQ_DOM_EXC);
+ UNLOCK_BIGLOCK(d);
+
+ put_domain(d);
}
static __init int domain_shutdown_finaliser_init(void)
@@ -222,16 +223,17 @@ void domain_shutdown(struct domain *d, u8 reason)
/* Mark the domain as shutting down. */
d->shutdown_code = reason;
- if ( !test_and_set_bit(_DOMF_shuttingdown, &d->domain_flags) )
- {
- /* This vcpu won the race to finalise the shutdown. */
- domain_shuttingdown[smp_processor_id()] = d;
- raise_softirq(DOMAIN_SHUTDOWN_FINALISE_SOFTIRQ);
- }
/* Put every vcpu to sleep, but don't wait (avoids inter-vcpu deadlock). */
for_each_vcpu ( d, v )
+ {
+ atomic_inc(&v->pausecnt);
vcpu_sleep_nosync(v);
+ }
+
+ get_knownalive_domain(d);
+ domain_shuttingdown[smp_processor_id()] = d;
+ raise_softirq(DOMAIN_SHUTDOWN_FINALISE_SOFTIRQ);
}
@@ -357,11 +359,11 @@ void domain_unpause_by_systemcontroller(struct domain *d)
* of domains other than domain 0. ie. the domains that are being built by
* the userspace dom0 domain builder.
*/
-int set_info_guest(struct domain *d, dom0_setdomaininfo_t *setdomaininfo)
+int set_info_guest(struct domain *d, dom0_setvcpucontext_t *setvcpucontext)
{
int rc = 0;
struct vcpu_guest_context *c = NULL;
- unsigned long vcpu = setdomaininfo->vcpu;
+ unsigned long vcpu = setvcpucontext->vcpu;
struct vcpu *v;
if ( (vcpu >= MAX_VIRT_CPUS) || ((v = d->vcpu[vcpu]) == NULL) )
@@ -374,7 +376,7 @@ int set_info_guest(struct domain *d, dom0_setdomaininfo_t *setdomaininfo)
return -ENOMEM;
rc = -EFAULT;
- if ( copy_from_user(c, setdomaininfo->ctxt, sizeof(*c)) == 0 )
+ if ( copy_from_user(c, setvcpucontext->ctxt, sizeof(*c)) == 0 )
rc = arch_set_info_guest(v, c);
xfree(c);
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index 4622007c6a..93312868b2 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -312,8 +312,6 @@ __gnttab_map_grant_ref(
if ( !act->pin )
clear_bit(_GTF_reading, &sha->flags);
- spin_unlock(&rd->grant_table->lock);
-
unlock_out:
spin_unlock(&rd->grant_table->lock);
(void)__put_user(rc, &uop->status);
@@ -471,7 +469,7 @@ gnttab_unmap_grant_ref(
for ( i = 0; i < count; i++ )
(void)__gnttab_unmap_grant_ref(&uop[i]);
- flush_tlb_mask(current->domain->cpumask);
+ flush_tlb_mask(current->domain->domain_dirty_cpumask);
return 0;
}
diff --git a/xen/common/kernel.c b/xen/common/kernel.c
index 9975ee639d..f1d74c731a 100644
--- a/xen/common/kernel.c
+++ b/xen/common/kernel.c
@@ -11,6 +11,7 @@
#include <xen/compile.h>
#include <xen/sched.h>
#include <asm/current.h>
+#include <public/nmi.h>
#include <public/version.h>
void cmdline_parse(char *cmdline)
@@ -148,6 +149,43 @@ long do_xen_version(int cmd, void *arg)
return -ENOSYS;
}
+long do_nmi_op(unsigned int cmd, void *arg)
+{
+ struct vcpu *v = current;
+ struct domain *d = current->domain;
+ long rc = 0;
+
+ switch ( cmd )
+ {
+ case XENNMI_register_callback:
+ if ( (d->domain_id != 0) || (v->vcpu_id != 0) )
+ {
+ rc = -EINVAL;
+ }
+ else
+ {
+ v->nmi_addr = (unsigned long)arg;
+#ifdef CONFIG_X86
+ /*
+ * If no handler was registered we can 'lose the NMI edge'.
+ * Re-assert it now.
+ */
+ if ( d->shared_info->arch.nmi_reason != 0 )
+ set_bit(_VCPUF_nmi_pending, &v->vcpu_flags);
+#endif
+ }
+ break;
+ case XENNMI_unregister_callback:
+ v->nmi_addr = 0;
+ break;
+ default:
+ rc = -ENOSYS;
+ break;
+ }
+
+ return rc;
+}
+
long do_vm_assist(unsigned int cmd, unsigned int type)
{
return vm_assist(current->domain, cmd, type);
diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c
index c78e104747..5fc1e6b04c 100644
--- a/xen/common/keyhandler.c
+++ b/xen/common/keyhandler.c
@@ -97,13 +97,22 @@ static void halt_machine(unsigned char key, struct cpu_user_regs *regs)
machine_restart(NULL);
}
-static void do_task_queues(unsigned char key)
+static void cpuset_print(char *set, int size, cpumask_t mask)
+{
+ *set++ = '{';
+ set += cpulist_scnprintf(set, size-2, mask);
+ *set++ = '}';
+ *set++ = '\0';
+}
+
+static void dump_domains(unsigned char key)
{
struct domain *d;
struct vcpu *v;
s_time_t now = NOW();
+ char cpuset[100];
- printk("'%c' pressed -> dumping task queues (now=0x%X:%08X)\n", key,
+ printk("'%c' pressed -> dumping domain info (now=0x%X:%08X)\n", key,
(u32)(now>>32), (u32)now);
read_lock(&domlist_lock);
@@ -111,9 +120,11 @@ static void do_task_queues(unsigned char key)
for_each_domain ( d )
{
printk("General information for domain %u:\n", d->domain_id);
- printk(" flags=%lx refcnt=%d nr_pages=%d xenheap_pages=%d\n",
+ cpuset_print(cpuset, sizeof(cpuset), d->domain_dirty_cpumask);
+ printk(" flags=%lx refcnt=%d nr_pages=%d xenheap_pages=%d "
+ "dirty_cpus=%s\n",
d->domain_flags, atomic_read(&d->refcnt),
- d->tot_pages, d->xenheap_pages);
+ d->tot_pages, d->xenheap_pages, cpuset);
printk(" handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-"
"%02x%02x-%02x%02x%02x%02x%02x%02x\n",
d->handle[ 0], d->handle[ 1], d->handle[ 2], d->handle[ 3],
@@ -129,12 +140,16 @@ static void do_task_queues(unsigned char key)
d->domain_id);
for_each_vcpu ( d, v ) {
printk(" VCPU%d: CPU%d [has=%c] flags=%lx "
- "upcall_pend = %02x, upcall_mask = %02x\n",
+ "upcall_pend = %02x, upcall_mask = %02x ",
v->vcpu_id, v->processor,
test_bit(_VCPUF_running, &v->vcpu_flags) ? 'T':'F',
v->vcpu_flags,
v->vcpu_info->evtchn_upcall_pending,
v->vcpu_info->evtchn_upcall_mask);
+ cpuset_print(cpuset, sizeof(cpuset), v->vcpu_dirty_cpumask);
+ printk("dirty_cpus=%s ", cpuset);
+ cpuset_print(cpuset, sizeof(cpuset), v->cpu_affinity);
+ printk("cpu_affinity=%s\n", cpuset);
printk(" Notifying guest (virq %d, port %d, stat %d/%d/%d)\n",
VIRQ_DEBUG, v->virq_to_evtchn[VIRQ_DEBUG],
test_bit(v->virq_to_evtchn[VIRQ_DEBUG],
@@ -170,6 +185,27 @@ void do_debug_key(unsigned char key, struct cpu_user_regs *regs)
bit. */
}
+void do_nmi_stats(unsigned char key)
+{
+ int i;
+ struct domain *d;
+ struct vcpu *v;
+ printk("CPU\tNMI\n");
+ for_each_cpu(i)
+ printk("%3d\t%3d\n", i, nmi_count(i));
+
+ if ((d = dom0) == NULL)
+ return;
+ if ((v = d->vcpu[0]) == NULL)
+ return;
+ if (v->vcpu_flags & (VCPUF_nmi_pending|VCPUF_nmi_masked))
+ printk("dom0 vpu0: NMI %s%s\n",
+ v->vcpu_flags & VCPUF_nmi_pending ? "pending " : "",
+ v->vcpu_flags & VCPUF_nmi_masked ? "masked " : "");
+ else
+ printk("dom0 vcpu0: NMI neither pending nor masked\n");
+}
+
#ifndef NDEBUG
void debugtrace_key(unsigned char key)
{
@@ -193,11 +229,12 @@ void initialize_keytable(void)
register_keyhandler(
'L', reset_sched_histo, "reset sched latency histogram");
register_keyhandler(
- 'q', do_task_queues, "dump task queues + guest state");
+ 'q', dump_domains, "dump domain (and guest debug) info");
register_keyhandler(
'r', dump_runq, "dump run queues");
register_irq_keyhandler(
'R', halt_machine, "reboot machine");
+ register_keyhandler('N', do_nmi_stats, "NMI statistics");
#ifndef NDEBUG
register_keyhandler(
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 847fcbb0cc..ddad691d29 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -38,10 +38,7 @@ increase_reservation(
if ( (extent_order != 0) &&
!multipage_allocation_permitted(current->domain) )
- {
- DPRINTK("Only I/O-capable domains may allocate multi-page extents.\n");
return 0;
- }
for ( i = 0; i < nr_extents; i++ )
{
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 1890db8e81..a3eaf6ab0a 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -615,7 +615,7 @@ void free_domheap_pages(struct pfn_info *pg, unsigned int order)
shadow_drop_references(d, &pg[i]);
ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0);
pg[i].tlbflush_timestamp = tlbflush_current_time();
- pg[i].u.free.cpumask = d->cpumask;
+ pg[i].u.free.cpumask = d->domain_dirty_cpumask;
list_del(&pg[i].list);
}
diff --git a/xen/common/sched_bvt.c b/xen/common/sched_bvt.c
index 37e2f1b7ec..9996e8bfac 100644
--- a/xen/common/sched_bvt.c
+++ b/xen/common/sched_bvt.c
@@ -20,7 +20,7 @@
#include <xen/delay.h>
#include <xen/event.h>
#include <xen/time.h>
-#include <xen/ac_timer.h>
+#include <xen/timer.h>
#include <xen/perfc.h>
#include <xen/sched-if.h>
#include <xen/softirq.h>
@@ -31,7 +31,8 @@ struct bvt_vcpu_info
struct list_head run_list; /* runqueue list pointers */
u32 avt; /* actual virtual time */
u32 evt; /* effective virtual time */
- struct vcpu *vcpu;
+ int migrated; /* migrated to a new CPU */
+ struct vcpu *vcpu;
struct bvt_dom_info *inf;
};
@@ -44,9 +45,9 @@ struct bvt_dom_info
limits*/
s32 warp_value; /* virtual time warp */
s_time_t warpl; /* warp limit */
- struct ac_timer warp_timer; /* deals with warpl */
+ struct timer warp_timer; /* deals with warpl */
s_time_t warpu; /* unwarp time requirement */
- struct ac_timer unwarp_timer; /* deals with warpu */
+ struct timer unwarp_timer; /* deals with warpu */
struct bvt_vcpu_info vcpu_inf[MAX_VIRT_CPUS];
};
@@ -97,9 +98,9 @@ static inline int __task_on_runqueue(struct vcpu *d)
static void warp_timer_fn(void *data)
{
struct bvt_dom_info *inf = data;
- unsigned int cpu = inf->domain->vcpu[0]->processor;
-
- spin_lock_irq(&schedule_data[cpu].schedule_lock);
+ struct vcpu *v = inf->domain->vcpu[0];
+
+ vcpu_schedule_lock_irq(v);
inf->warp = 0;
@@ -107,28 +108,28 @@ static void warp_timer_fn(void *data)
if ( inf->warpu == 0 )
{
inf->warpback = 0;
- cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+ cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
}
- set_ac_timer(&inf->unwarp_timer, NOW() + inf->warpu);
+ set_timer(&inf->unwarp_timer, NOW() + inf->warpu);
- spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+ vcpu_schedule_unlock_irq(v);
}
static void unwarp_timer_fn(void *data)
{
struct bvt_dom_info *inf = data;
- unsigned int cpu = inf->domain->vcpu[0]->processor;
+ struct vcpu *v = inf->domain->vcpu[0];
- spin_lock_irq(&schedule_data[cpu].schedule_lock);
+ vcpu_schedule_lock_irq(v);
if ( inf->warpback )
{
inf->warp = 1;
- cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+ cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
}
- spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+ vcpu_schedule_unlock_irq(v);
}
static inline u32 calc_avt(struct vcpu *d, s_time_t now)
@@ -167,6 +168,7 @@ static inline u32 calc_evt(struct vcpu *d, u32 avt)
static int bvt_alloc_task(struct vcpu *v)
{
struct domain *d = v->domain;
+ struct bvt_dom_info *inf;
if ( (d->sched_priv == NULL) )
{
@@ -175,10 +177,27 @@ static int bvt_alloc_task(struct vcpu *v)
memset(d->sched_priv, 0, sizeof(struct bvt_dom_info));
}
- v->sched_priv = &BVT_INFO(d)->vcpu_inf[v->vcpu_id];
+ inf = BVT_INFO(d);
- BVT_INFO(d)->vcpu_inf[v->vcpu_id].inf = BVT_INFO(d);
- BVT_INFO(d)->vcpu_inf[v->vcpu_id].vcpu = v;
+ v->sched_priv = &inf->vcpu_inf[v->vcpu_id];
+
+ inf->vcpu_inf[v->vcpu_id].inf = BVT_INFO(d);
+ inf->vcpu_inf[v->vcpu_id].vcpu = v;
+
+ if ( v->vcpu_id == 0 )
+ {
+ inf->mcu_advance = MCU_ADVANCE;
+ inf->domain = v->domain;
+ inf->warpback = 0;
+ /* Set some default values here. */
+ inf->warp = 0;
+ inf->warp_value = 0;
+ inf->warpl = MILLISECS(2000);
+ inf->warpu = MILLISECS(1000);
+ /* Initialise the warp timers. */
+ init_timer(&inf->warp_timer, warp_timer_fn, inf, v->processor);
+ init_timer(&inf->unwarp_timer, unwarp_timer_fn, inf, v->processor);
+ }
return 0;
}
@@ -188,10 +207,7 @@ static int bvt_alloc_task(struct vcpu *v)
*/
static void bvt_add_task(struct vcpu *v)
{
- struct bvt_dom_info *inf = BVT_INFO(v->domain);
struct bvt_vcpu_info *einf = EBVT_INFO(v);
- ASSERT(inf != NULL);
- ASSERT(v != NULL);
/* Allocate per-CPU context if this is the first domain to be added. */
if ( CPU_INFO(v->processor) == NULL )
@@ -202,24 +218,7 @@ static void bvt_add_task(struct vcpu *v)
CPU_SVT(v->processor) = 0;
}
- if ( v->vcpu_id == 0 )
- {
- inf->mcu_advance = MCU_ADVANCE;
- inf->domain = v->domain;
- inf->warpback = 0;
- /* Set some default values here. */
- inf->warp = 0;
- inf->warp_value = 0;
- inf->warpl = MILLISECS(2000);
- inf->warpu = MILLISECS(1000);
- /* Initialise the warp timers. */
- init_ac_timer(&inf->warp_timer, warp_timer_fn, inf, v->processor);
- init_ac_timer(&inf->unwarp_timer, unwarp_timer_fn, inf, v->processor);
- }
-
- einf->vcpu = v;
-
- if ( is_idle_task(v->domain) )
+ if ( is_idle_vcpu(v) )
{
einf->avt = einf->evt = ~0U;
BUG_ON(__task_on_runqueue(v));
@@ -250,9 +249,11 @@ static void bvt_wake(struct vcpu *v)
/* Set the BVT parameters. AVT should always be updated
if CPU migration ocurred.*/
- if ( einf->avt < CPU_SVT(cpu) ||
- unlikely(test_bit(_VCPUF_cpu_migrated, &v->vcpu_flags)) )
+ if ( (einf->avt < CPU_SVT(cpu)) || einf->migrated )
+ {
einf->avt = CPU_SVT(cpu);
+ einf->migrated = 0;
+ }
/* Deal with warping here. */
einf->evt = calc_evt(v, einf->avt);
@@ -265,29 +266,51 @@ static void bvt_wake(struct vcpu *v)
((einf->evt - curr_evt) / BVT_INFO(curr->domain)->mcu_advance) +
ctx_allow;
- if ( is_idle_task(curr->domain) || (einf->evt <= curr_evt) )
+ if ( is_idle_vcpu(curr) || (einf->evt <= curr_evt) )
cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
else if ( schedule_data[cpu].s_timer.expires > r_time )
- set_ac_timer(&schedule_data[cpu].s_timer, r_time);
+ set_timer(&schedule_data[cpu].s_timer, r_time);
}
static void bvt_sleep(struct vcpu *v)
{
- if ( test_bit(_VCPUF_running, &v->vcpu_flags) )
+ if ( schedule_data[v->processor].curr == v )
cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
else if ( __task_on_runqueue(v) )
__del_from_runqueue(v);
}
+
+static int bvt_set_affinity(struct vcpu *v, cpumask_t *affinity)
+{
+ if ( v == current )
+ return cpu_isset(v->processor, *affinity) ? 0 : -EBUSY;
+
+ vcpu_pause(v);
+ v->cpu_affinity = *affinity;
+ v->processor = first_cpu(v->cpu_affinity);
+ EBVT_INFO(v)->migrated = 1;
+ vcpu_unpause(v);
+
+ return 0;
+}
+
+
/**
* bvt_free_task - free BVT private structures for a task
* @d: task
*/
static void bvt_free_task(struct domain *d)
{
- ASSERT(d->sched_priv != NULL);
- xfree(d->sched_priv);
+ struct bvt_dom_info *inf = BVT_INFO(d);
+
+ ASSERT(inf != NULL);
+
+ kill_timer(&inf->warp_timer);
+ kill_timer(&inf->unwarp_timer);
+
+ xfree(inf);
}
/* Control the scheduler. */
@@ -336,10 +359,10 @@ static int bvt_adjdom(
inf->warpu = MILLISECS(warpu);
/* If the unwarp timer set up it needs to be removed */
- rem_ac_timer(&inf->unwarp_timer);
+ stop_timer(&inf->unwarp_timer);
/* If we stop warping the warp timer needs to be removed */
if ( !warpback )
- rem_ac_timer(&inf->warp_timer);
+ stop_timer(&inf->warp_timer);
}
else if ( cmd->direction == SCHED_INFO_GET )
{
@@ -380,17 +403,17 @@ static struct task_slice bvt_do_schedule(s_time_t now)
ASSERT(prev_einf != NULL);
ASSERT(__task_on_runqueue(prev));
- if ( likely(!is_idle_task(prev->domain)) )
+ if ( likely(!is_idle_vcpu(prev)) )
{
prev_einf->avt = calc_avt(prev, now);
prev_einf->evt = calc_evt(prev, prev_einf->avt);
if(prev_inf->warpback && prev_inf->warpl > 0)
- rem_ac_timer(&prev_inf->warp_timer);
+ stop_timer(&prev_inf->warp_timer);
__del_from_runqueue(prev);
- if ( domain_runnable(prev) )
+ if ( vcpu_runnable(prev) )
__add_to_runqueue_tail(prev);
}
@@ -436,7 +459,7 @@ static struct task_slice bvt_do_schedule(s_time_t now)
}
if ( next_einf->inf->warp && next_einf->inf->warpl > 0 )
- set_ac_timer(&next_einf->inf->warp_timer, now + next_einf->inf->warpl);
+ set_timer(&next_einf->inf->warp_timer, now + next_einf->inf->warpl);
/* Extract the domain pointers from the dom infos */
next = next_einf->vcpu;
@@ -471,13 +494,13 @@ static struct task_slice bvt_do_schedule(s_time_t now)
}
/* work out time for next run through scheduler */
- if ( is_idle_task(next->domain) )
+ if ( is_idle_vcpu(next) )
{
r_time = ctx_allow;
goto sched_done;
}
- if ( (next_prime == NULL) || is_idle_task(next_prime->domain) )
+ if ( (next_prime == NULL) || is_idle_vcpu(next_prime) )
{
/* We have only one runnable task besides the idle task. */
r_time = 10 * ctx_allow; /* RN: random constant */
@@ -557,6 +580,7 @@ struct scheduler sched_bvt_def = {
.dump_cpu_state = bvt_dump_cpu_state,
.sleep = bvt_sleep,
.wake = bvt_wake,
+ .set_affinity = bvt_set_affinity
};
/*
diff --git a/xen/common/sched_sedf.c b/xen/common/sched_sedf.c
index 9ae98f129e..45fdac3073 100644
--- a/xen/common/sched_sedf.c
+++ b/xen/common/sched_sedf.c
@@ -9,7 +9,7 @@
#include <xen/sched.h>
#include <xen/sched-if.h>
#include <public/sched_ctl.h>
-#include <xen/ac_timer.h>
+#include <xen/timer.h>
#include <xen/softirq.h>
#include <xen/time.h>
@@ -325,22 +325,30 @@ DOMAIN_COMPARER(runq, list, d1->deadl_abs, d2->deadl_abs)
list_insert_sort(RUNQ(d->processor), LIST(d), runq_comp);
}
+
/* Allocates memory for per domain private scheduling data*/
-static int sedf_alloc_task(struct vcpu *d) {
- PRINT(2,"sedf_alloc_task was called, domain-id %i.%i\n",d->domain->domain_id,
- d->vcpu_id);
- if (d->domain->sched_priv == NULL) {
- if ((d->domain->sched_priv =
- xmalloc(struct sedf_dom_info)) == NULL )
+static int sedf_alloc_task(struct vcpu *d)
+{
+ PRINT(2, "sedf_alloc_task was called, domain-id %i.%i\n",
+ d->domain->domain_id, d->vcpu_id);
+
+ if ( d->domain->sched_priv == NULL )
+ {
+ d->domain->sched_priv = xmalloc(struct sedf_dom_info);
+ if ( d->domain->sched_priv == NULL )
return -1;
memset(d->domain->sched_priv, 0, sizeof(struct sedf_dom_info));
}
- if ((d->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
+
+ if ( (d->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
return -1;
+
memset(d->sched_priv, 0, sizeof(struct sedf_vcpu_info));
+
return 0;
}
+
/* Setup the sedf_dom_info */
static void sedf_add_task(struct vcpu *d)
{
@@ -363,14 +371,17 @@ static void sedf_add_task(struct vcpu *d)
INIT_LIST_HEAD(EXTRAQ(d->processor,EXTRA_UTIL_Q));
}
- if (d->domain->domain_id==0) {
+ if ( d->domain->domain_id == 0 )
+ {
/*set dom0 to something useful to boot the machine*/
inf->period = MILLISECS(20);
inf->slice = MILLISECS(15);
inf->latency = 0;
inf->deadl_abs = 0;
inf->status = EXTRA_AWARE | SEDF_ASLEEP;
- } else {
+ }
+ else
+ {
/*other domains run in best effort mode*/
inf->period = WEIGHT_PERIOD;
inf->slice = 0;
@@ -379,14 +390,18 @@ static void sedf_add_task(struct vcpu *d)
inf->status = EXTRA_AWARE | SEDF_ASLEEP;
inf->extraweight = 1;
}
+
inf->period_orig = inf->period; inf->slice_orig = inf->slice;
INIT_LIST_HEAD(&(inf->list));
INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q]));
INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q]));
- if (!is_idle_task(d->domain)) {
+ if ( !is_idle_vcpu(d) )
+ {
extraq_check(d);
- } else {
+ }
+ else
+ {
EDOM_INFO(d)->deadl_abs = 0;
EDOM_INFO(d)->status &= ~SEDF_ASLEEP;
}
@@ -396,19 +411,28 @@ static void sedf_add_task(struct vcpu *d)
static void sedf_free_task(struct domain *d)
{
int i;
+
PRINT(2,"sedf_free_task was called, domain-id %i\n",d->domain_id);
+
ASSERT(d->sched_priv != NULL);
xfree(d->sched_priv);
- for (i = 0; i < MAX_VIRT_CPUS; i++)
- if ( d->vcpu[i] ) {
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ {
+ if ( d->vcpu[i] )
+ {
ASSERT(d->vcpu[i]->sched_priv != NULL);
xfree(d->vcpu[i]->sched_priv);
}
+ }
}
-/* handles the rescheduling, bookkeeping of domains running in their realtime-time :)*/
-static inline void desched_edf_dom (s_time_t now, struct vcpu* d) {
+/*
+ * Handles the rescheduling & bookkeeping of domains running in their
+ * guaranteed timeslice.
+ */
+static void desched_edf_dom(s_time_t now, struct vcpu* d)
+{
struct sedf_vcpu_info* inf = EDOM_INFO(d);
/*current domain is running in real time mode*/
@@ -418,27 +442,30 @@ static inline void desched_edf_dom (s_time_t now, struct vcpu* d) {
/*scheduling decisions, which don't remove the running domain
from the runq*/
- if ((inf->cputime < inf->slice) && sedf_runnable(d))
+ if ( (inf->cputime < inf->slice) && sedf_runnable(d) )
return;
__del_from_queue(d);
/*manage bookkeeping (i.e. calculate next deadline,
memorize overun-time of slice) of finished domains*/
- if (inf->cputime >= inf->slice) {
+ if ( inf->cputime >= inf->slice )
+ {
inf->cputime -= inf->slice;
- if (inf->period < inf->period_orig) {
+ if ( inf->period < inf->period_orig )
+ {
/*this domain runs in latency scaling or burst mode*/
#if (UNBLOCK == UNBLOCK_BURST)
/*if we are runnig in burst scaling wait for two periods
before scaling periods up again*/
- if (now - inf->unblock_abs >= 2 * inf->period)
+ if ( (now - inf->unblock_abs) >= (2 * inf->period) )
#endif
{
inf->period *= 2; inf->slice *= 2;
- if ((inf->period > inf->period_orig) ||
- (inf->slice > inf->slice_orig)) {
+ if ( (inf->period > inf->period_orig) ||
+ (inf->slice > inf->slice_orig) )
+ {
/*reset slice & period*/
inf->period = inf->period_orig;
inf->slice = inf->slice_orig;
@@ -450,36 +477,46 @@ static inline void desched_edf_dom (s_time_t now, struct vcpu* d) {
}
/*add a runnable domain to the waitqueue*/
- if (sedf_runnable(d))
+ if ( sedf_runnable(d) )
+ {
__add_to_waitqueue_sort(d);
- else {
+ }
+ else
+ {
/*we have a blocked realtime task -> remove it from exqs too*/
#if (EXTRA > EXTRA_OFF)
#if (EXTRA == EXTRA_BLOCK_WEIGHT)
- if (extraq_on(d, EXTRA_PEN_Q)) extraq_del(d, EXTRA_PEN_Q);
+ if ( extraq_on(d, EXTRA_PEN_Q) )
+ extraq_del(d, EXTRA_PEN_Q);
#endif
- if (extraq_on(d, EXTRA_UTIL_Q)) extraq_del(d, EXTRA_UTIL_Q);
+ if ( extraq_on(d, EXTRA_UTIL_Q) )
+ extraq_del(d, EXTRA_UTIL_Q);
#endif
}
+
ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q),
sedf_runnable(d)));
}
+
/* Update all elements on the queues */
-static inline void update_queues(s_time_t now, struct list_head* runq,
- struct list_head* waitq) {
- struct list_head *cur,*tmp;
+static void update_queues(
+ s_time_t now, struct list_head *runq, struct list_head *waitq)
+{
+ struct list_head *cur, *tmp;
struct sedf_vcpu_info *curinf;
PRINT(3,"Updating waitq..\n");
+
/*check for the first elements of the waitqueue, whether their
next period has already started*/
list_for_each_safe(cur, tmp, waitq) {
curinf = list_entry(cur, struct sedf_vcpu_info, list);
PRINT(4,"\tLooking @ dom %i.%i\n",
curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id);
- if (PERIOD_BEGIN(curinf) <= now) {
+ if ( PERIOD_BEGIN(curinf) <= now )
+ {
__del_from_queue(curinf->vcpu);
__add_to_runqueue_sort(curinf->vcpu);
}
@@ -488,13 +525,16 @@ static inline void update_queues(s_time_t now, struct list_head* runq,
}
PRINT(3,"Updating runq..\n");
+
/*process the runq, find domains that are on
the runqueue which shouldn't be there*/
list_for_each_safe(cur, tmp, runq) {
curinf = list_entry(cur,struct sedf_vcpu_info,list);
PRINT(4,"\tLooking @ dom %i.%i\n",
curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id);
- if (unlikely(curinf->slice == 0)) {
+
+ if ( unlikely(curinf->slice == 0) )
+ {
/*ignore domains with empty slice*/
PRINT(4,"\tUpdating zero-slice domain %i.%i\n",
curinf->vcpu->domain->domain_id,
@@ -504,7 +544,8 @@ static inline void update_queues(s_time_t now, struct list_head* runq,
/*move them to their next period*/
curinf->deadl_abs += curinf->period;
/*ensure that the start of the next period is in the future*/
- if (unlikely(PERIOD_BEGIN(curinf) < now)) {
+ if ( unlikely(PERIOD_BEGIN(curinf) < now) )
+ {
curinf->deadl_abs +=
(DIV_UP(now - PERIOD_BEGIN(curinf),
curinf->period)) * curinf->period;
@@ -513,8 +554,10 @@ static inline void update_queues(s_time_t now, struct list_head* runq,
__add_to_waitqueue_sort(curinf->vcpu);
continue;
}
- if (unlikely((curinf->deadl_abs < now) ||
- (curinf->cputime > curinf->slice))) {
+
+ if ( unlikely((curinf->deadl_abs < now) ||
+ (curinf->cputime > curinf->slice)) )
+ {
/*we missed the deadline or the slice was
already finished... might hapen because
of dom_adj.*/
@@ -550,6 +593,7 @@ static inline void update_queues(s_time_t now, struct list_head* runq,
PRINT(3,"done updating the queues\n");
}
+
#if (EXTRA > EXTRA_OFF)
/* removes a domain from the head of the according extraQ and
requeues it at a specified position:
@@ -557,9 +601,10 @@ static inline void update_queues(s_time_t now, struct list_head* runq,
weighted ext.: insert in sorted list by score
if the domain is blocked / has regained its short-block-loss
time it is not put on any queue */
-static inline void desched_extra_dom(s_time_t now, struct vcpu* d) {
+static void desched_extra_dom(s_time_t now, struct vcpu* d)
+{
struct sedf_vcpu_info *inf = EDOM_INFO(d);
- int i = extra_get_cur_q(inf);
+ int i = extra_get_cur_q(inf);
#if (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT)
unsigned long oldscore;
@@ -575,14 +620,15 @@ static inline void desched_extra_dom(s_time_t now, struct vcpu* d) {
extraq_del(d, i);
#if (EXTRA == EXTRA_ROUNDR)
- if (sedf_runnable(d) && (inf->status & EXTRA_AWARE))
+ if ( sedf_runnable(d) && (inf->status & EXTRA_AWARE) )
/*add to the tail if it is runnable => round-robin*/
extraq_add_tail(d, EXTRA_UTIL_Q);
#elif (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT)
/*update the score*/
- oldscore = inf->score[i];
+ oldscore = inf->score[i];
#if (EXTRA == EXTRA_BLOCK_WEIGHT)
- if (i == EXTRA_PEN_Q) {
+ if ( i == EXTRA_PEN_Q )
+ {
/*domain was running in L0 extraq*/
/*reduce block lost, probably more sophistication here!*/
/*inf->short_block_lost_tot -= EXTRA_QUANTUM;*/
@@ -605,12 +651,13 @@ static inline void desched_extra_dom(s_time_t now, struct vcpu* d) {
inf->score[EXTRA_PEN_Q] = (inf->period << 10) /
inf->short_block_lost_tot;
oldscore = 0;
- } else
+ }
+ else
#endif
{
/*domain was running in L1 extraq => score is inverse of
utilization and is used somewhat incremental!*/
- if (!inf->extraweight)
+ if ( !inf->extraweight )
/*NB: use fixed point arithmetic with 10 bits*/
inf->score[EXTRA_UTIL_Q] = (inf->period << 10) /
inf->slice;
@@ -619,24 +666,32 @@ static inline void desched_extra_dom(s_time_t now, struct vcpu* d) {
full (ie 100%) utilization is equivalent to 128 extraweight*/
inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight;
}
+
check_extra_queues:
/* Adding a runnable domain to the right queue and removing blocked ones*/
- if (sedf_runnable(d)) {
+ if ( sedf_runnable(d) )
+ {
/*add according to score: weighted round robin*/
if (((inf->status & EXTRA_AWARE) && (i == EXTRA_UTIL_Q)) ||
((inf->status & EXTRA_WANT_PEN_Q) && (i == EXTRA_PEN_Q)))
extraq_add_sort_update(d, i, oldscore);
}
- else {
+ else
+ {
/*remove this blocked domain from the waitq!*/
__del_from_queue(d);
#if (EXTRA == EXTRA_BLOCK_WEIGHT)
/*make sure that we remove a blocked domain from the other
extraq too*/
- if (i == EXTRA_PEN_Q) {
- if (extraq_on(d, EXTRA_UTIL_Q)) extraq_del(d, EXTRA_UTIL_Q);
- } else {
- if (extraq_on(d, EXTRA_PEN_Q)) extraq_del(d, EXTRA_PEN_Q);
+ if ( i == EXTRA_PEN_Q )
+ {
+ if ( extraq_on(d, EXTRA_UTIL_Q) )
+ extraq_del(d, EXTRA_UTIL_Q);
+ }
+ else
+ {
+ if ( extraq_on(d, EXTRA_PEN_Q) )
+ extraq_del(d, EXTRA_PEN_Q);
}
#endif
}
@@ -647,16 +702,21 @@ static inline void desched_extra_dom(s_time_t now, struct vcpu* d) {
}
#endif
-static inline struct task_slice sedf_do_extra_schedule (s_time_t now,
- s_time_t end_xt, struct list_head *extraq[], int cpu) {
+
+static struct task_slice sedf_do_extra_schedule(
+ s_time_t now, s_time_t end_xt, struct list_head *extraq[], int cpu)
+{
struct task_slice ret;
struct sedf_vcpu_info *runinf;
ASSERT(end_xt > now);
+
/* Enough time left to use for extratime? */
- if (end_xt - now < EXTRA_QUANTUM)
+ if ( end_xt - now < EXTRA_QUANTUM )
goto return_idle;
+
#if (EXTRA == EXTRA_BLOCK_WEIGHT)
- if (!list_empty(extraq[EXTRA_PEN_Q])) {
+ if ( !list_empty(extraq[EXTRA_PEN_Q]) )
+ {
/*we still have elements on the level 0 extraq
=> let those run first!*/
runinf = list_entry(extraq[EXTRA_PEN_Q]->next,
@@ -667,9 +727,12 @@ static inline struct task_slice sedf_do_extra_schedule (s_time_t now,
#ifdef SEDF_STATS
runinf->pen_extra_slices++;
#endif
- } else
+ }
+ else
#endif
- if (!list_empty(extraq[EXTRA_UTIL_Q])) {
+ {
+ if ( !list_empty(extraq[EXTRA_UTIL_Q]) )
+ {
/*use elements from the normal extraqueue*/
runinf = list_entry(extraq[EXTRA_UTIL_Q]->next,
struct sedf_vcpu_info,
@@ -680,6 +743,7 @@ static inline struct task_slice sedf_do_extra_schedule (s_time_t now,
}
else
goto return_idle;
+ }
ASSERT(ret.time > 0);
ASSERT(sedf_runnable(ret.task));
@@ -692,6 +756,8 @@ static inline struct task_slice sedf_do_extra_schedule (s_time_t now,
ASSERT(sedf_runnable(ret.task));
return ret;
}
+
+
/* Main scheduling function
Reasons for calling this function are:
-timeslice for the current period used up
@@ -699,7 +765,7 @@ static inline struct task_slice sedf_do_extra_schedule (s_time_t now,
-and various others ;) in general: determine which domain to run next*/
static struct task_slice sedf_do_schedule(s_time_t now)
{
- int cpu = current->processor;
+ int cpu = smp_processor_id();
struct list_head *runq = RUNQ(cpu);
struct list_head *waitq = WAITQ(cpu);
#if (EXTRA > EXTRA_OFF)
@@ -711,20 +777,21 @@ static struct task_slice sedf_do_schedule(s_time_t now)
struct task_slice ret;
/*idle tasks don't need any of the following stuf*/
- if (is_idle_task(current->domain))
+ if ( is_idle_vcpu(current) )
goto check_waitq;
/* create local state of the status of the domain, in order to avoid
inconsistent state during scheduling decisions, because data for
- domain_runnable is not protected by the scheduling lock!*/
- if(!domain_runnable(current))
+ vcpu_runnable is not protected by the scheduling lock!*/
+ if ( !vcpu_runnable(current) )
inf->status |= SEDF_ASLEEP;
- if (inf->status & SEDF_ASLEEP)
+ if ( inf->status & SEDF_ASLEEP )
inf->block_abs = now;
#if (EXTRA > EXTRA_OFF)
- if (unlikely(extra_runs(inf))) {
+ if ( unlikely(extra_runs(inf)) )
+ {
/*special treatment of domains running in extra time*/
desched_extra_dom(now, current);
}
@@ -739,10 +806,12 @@ static struct task_slice sedf_do_schedule(s_time_t now)
/*now simply pick the first domain from the runqueue, which has the
earliest deadline, because the list is sorted*/
- if (!list_empty(runq)) {
+ if ( !list_empty(runq) )
+ {
runinf = list_entry(runq->next,struct sedf_vcpu_info,list);
ret.task = runinf->vcpu;
- if (!list_empty(waitq)) {
+ if ( !list_empty(waitq) )
+ {
waitinf = list_entry(waitq->next,
struct sedf_vcpu_info,list);
/*rerun scheduler, when scheduled domain reaches it's
@@ -751,14 +820,16 @@ static struct task_slice sedf_do_schedule(s_time_t now)
ret.time = MIN(now + runinf->slice - runinf->cputime,
PERIOD_BEGIN(waitinf)) - now;
}
- else {
+ else
+ {
ret.time = runinf->slice - runinf->cputime;
}
CHECK(ret.time > 0);
goto sched_done;
}
- if (!list_empty(waitq)) {
+ if ( !list_empty(waitq) )
+ {
waitinf = list_entry(waitq->next,struct sedf_vcpu_info, list);
/*we could not find any suitable domain
=> look for domains that are aware of extratime*/
@@ -771,7 +842,8 @@ static struct task_slice sedf_do_schedule(s_time_t now)
#endif
CHECK(ret.time > 0);
}
- else {
+ else
+ {
/*this could probably never happen, but one never knows...*/
/*it can... imagine a second CPU, which is pure scifi ATM,
but one never knows ;)*/
@@ -782,11 +854,13 @@ static struct task_slice sedf_do_schedule(s_time_t now)
sched_done:
/*TODO: Do something USEFUL when this happens and find out, why it
still can happen!!!*/
- if (ret.time<0) {
+ if ( ret.time < 0)
+ {
printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n",
ret.time);
ret.time = EXTRA_QUANTUM;
}
+
EDOM_INFO(ret.task)->sched_start_abs = now;
CHECK(ret.time > 0);
ASSERT(sedf_runnable(ret.task));
@@ -794,31 +868,37 @@ static struct task_slice sedf_do_schedule(s_time_t now)
return ret;
}
-static void sedf_sleep(struct vcpu *d) {
- PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",d->domain->domain_id, d->vcpu_id);
+
+static void sedf_sleep(struct vcpu *d)
+{
+ PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",
+ d->domain->domain_id, d->vcpu_id);
- if (is_idle_task(d->domain))
+ if ( is_idle_vcpu(d) )
return;
EDOM_INFO(d)->status |= SEDF_ASLEEP;
- if ( test_bit(_VCPUF_running, &d->vcpu_flags) ) {
+ if ( schedule_data[d->processor].curr == d )
+ {
cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
}
- else {
+ else
+ {
if ( __task_on_queue(d) )
__del_from_queue(d);
#if (EXTRA > EXTRA_OFF)
- if (extraq_on(d, EXTRA_UTIL_Q))
+ if ( extraq_on(d, EXTRA_UTIL_Q) )
extraq_del(d, EXTRA_UTIL_Q);
#endif
#if (EXTRA == EXTRA_BLOCK_WEIGHT)
- if (extraq_on(d, EXTRA_PEN_Q))
+ if ( extraq_on(d, EXTRA_PEN_Q) )
extraq_del(d, EXTRA_PEN_Q);
#endif
}
}
+
/* This function wakes up a domain, i.e. moves them into the waitqueue
* things to mention are: admission control is taking place nowhere at
* the moment, so we can't be sure, whether it is safe to wake the domain
@@ -890,17 +970,21 @@ static void sedf_sleep(struct vcpu *d) {
* -either behaviour can lead to missed deadlines in other domains as
* opposed to approaches 1,2a,2b
*/
-static inline void unblock_short_vcons
-(struct sedf_vcpu_info* inf, s_time_t now) {
+#if (UNBLOCK <= UNBLOCK_SHORT_RESUME)
+static void unblock_short_vcons(struct sedf_vcpu_info* inf, s_time_t now)
+{
inf->deadl_abs += inf->period;
inf->cputime = 0;
}
+#endif
-static inline void unblock_short_cons(struct sedf_vcpu_info* inf, s_time_t now)
+#if (UNBLOCK == UNBLOCK_SHORT_RESUME)
+static void unblock_short_cons(struct sedf_vcpu_info* inf, s_time_t now)
{
/*treat blocked time as consumed by the domain*/
inf->cputime += now - inf->block_abs;
- if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
+ if ( (inf->cputime + EXTRA_QUANTUM) > inf->slice )
+ {
/*we don't have a reasonable amount of time in
our slice left :( => start in next period!*/
unblock_short_vcons(inf, now);
@@ -910,8 +994,11 @@ static inline void unblock_short_cons(struct sedf_vcpu_info* inf, s_time_t now)
inf->short_cont++;
#endif
}
-static inline void unblock_short_extra_support (struct sedf_vcpu_info* inf,
- s_time_t now) {
+#endif
+
+static void unblock_short_extra_support(
+ struct sedf_vcpu_info* inf, s_time_t now)
+{
/*this unblocking scheme tries to support the domain, by assigning it
a priority in extratime distribution according to the loss of time
in this slice due to blocking*/
@@ -919,26 +1006,29 @@ static inline void unblock_short_extra_support (struct sedf_vcpu_info* inf,
/*no more realtime execution in this period!*/
inf->deadl_abs += inf->period;
- if (likely(inf->block_abs)) {
+ if ( likely(inf->block_abs) )
+ {
//treat blocked time as consumed by the domain*/
/*inf->cputime += now - inf->block_abs;*/
/*penalty is time the domain would have
had if it continued to run */
pen = (inf->slice - inf->cputime);
- if (pen < 0) pen = 0;
+ if ( pen < 0 )
+ pen = 0;
/*accumulate all penalties over the periods*/
/*inf->short_block_lost_tot += pen;*/
/*set penalty to the current value*/
inf->short_block_lost_tot = pen;
/*not sure which one is better.. but seems to work well...*/
- if (inf->short_block_lost_tot) {
+ if ( inf->short_block_lost_tot )
+ {
inf->score[0] = (inf->period << 10) /
inf->short_block_lost_tot;
#ifdef SEDF_STATS
inf->pen_extra_blocks++;
#endif
- if (extraq_on(inf->vcpu, EXTRA_PEN_Q))
+ if ( extraq_on(inf->vcpu, EXTRA_PEN_Q) )
/*remove domain for possible resorting!*/
extraq_del(inf->vcpu, EXTRA_PEN_Q);
else
@@ -951,36 +1041,53 @@ static inline void unblock_short_extra_support (struct sedf_vcpu_info* inf,
extraq_add_sort_update(inf->vcpu, EXTRA_PEN_Q, 0);
}
}
+
/*give it a fresh slice in the next period!*/
inf->cputime = 0;
}
-static inline void unblock_long_vcons(struct sedf_vcpu_info* inf, s_time_t now)
+
+
+#if (UNBLOCK == UNBLOCK_ISOCHRONOUS_EDF)
+static void unblock_long_vcons(struct sedf_vcpu_info* inf, s_time_t now)
{
/* align to next future period */
inf->deadl_abs += (DIV_UP(now - inf->deadl_abs, inf->period) +1)
* inf->period;
inf->cputime = 0;
}
+#endif
+
-static inline void unblock_long_cons_a (struct sedf_vcpu_info* inf,
- s_time_t now) {
+#if 0
+static void unblock_long_cons_a (struct sedf_vcpu_info* inf, s_time_t now)
+{
/*treat the time the domain was blocked in the
- CURRENT period as consumed by the domain*/
+ CURRENT period as consumed by the domain*/
inf->cputime = (now - inf->deadl_abs) % inf->period;
- if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
+ if ( (inf->cputime + EXTRA_QUANTUM) > inf->slice )
+ {
/*we don't have a reasonable amount of time in our slice
left :( => start in next period!*/
unblock_long_vcons(inf, now);
}
}
-static inline void unblock_long_cons_b(struct sedf_vcpu_info* inf,s_time_t now) {
+#endif
+
+
+static void unblock_long_cons_b(struct sedf_vcpu_info* inf,s_time_t now)
+{
/*Conservative 2b*/
/*Treat the unblocking time as a start of a new period */
inf->deadl_abs = now + inf->period;
inf->cputime = 0;
}
-static inline void unblock_long_cons_c(struct sedf_vcpu_info* inf,s_time_t now) {
- if (likely(inf->latency)) {
+
+
+#if (UNBLOCK == UNBLOCK_ATROPOS)
+static void unblock_long_cons_c(struct sedf_vcpu_info* inf,s_time_t now)
+{
+ if ( likely(inf->latency) )
+ {
/*scale the slice and period accordingly to the latency hint*/
/*reduce period temporarily to the latency hint*/
inf->period = inf->latency;
@@ -993,18 +1100,24 @@ static inline void unblock_long_cons_c(struct sedf_vcpu_info* inf,s_time_t now)
inf->deadl_abs = now + inf->period;
inf->cputime = 0;
}
- else {
+ else
+ {
/*we don't have a latency hint.. use some other technique*/
unblock_long_cons_b(inf, now);
}
}
+#endif
+
+
+#if (UNBLOCK == UNBLOCK_BURST)
/*a new idea of dealing with short blocks: burst period scaling*/
-static inline void unblock_short_burst(struct sedf_vcpu_info* inf, s_time_t now)
+static void unblock_short_burst(struct sedf_vcpu_info* inf, s_time_t now)
{
/*treat blocked time as consumed by the domain*/
inf->cputime += now - inf->block_abs;
- if (inf->cputime + EXTRA_QUANTUM <= inf->slice) {
+ if ( (inf->cputime + EXTRA_QUANTUM) <= inf->slice )
+ {
/*if we can still use some time in the current slice
then use it!*/
#ifdef SEDF_STATS
@@ -1012,10 +1125,12 @@ static inline void unblock_short_burst(struct sedf_vcpu_info* inf, s_time_t now)
inf->short_cont++;
#endif
}
- else {
+ else
+ {
/*we don't have a reasonable amount of time in
our slice left => switch to burst mode*/
- if (likely(inf->unblock_abs)) {
+ if ( likely(inf->unblock_abs) )
+ {
/*set the period-length to the current blocking
interval, possible enhancements: average over last
blocking intervals, user-specified minimum,...*/
@@ -1030,17 +1145,23 @@ static inline void unblock_short_burst(struct sedf_vcpu_info* inf, s_time_t now)
/*set new (shorter) deadline*/
inf->deadl_abs += inf->period;
}
- else {
+ else
+ {
/*in case we haven't unblocked before
start in next period!*/
inf->cputime=0;
inf->deadl_abs += inf->period;
}
}
+
inf->unblock_abs = now;
}
-static inline void unblock_long_burst(struct sedf_vcpu_info* inf, s_time_t now) {
- if (unlikely(inf->latency && (inf->period > inf->latency))) {
+
+
+static void unblock_long_burst(struct sedf_vcpu_info* inf, s_time_t now)
+{
+ if ( unlikely(inf->latency && (inf->period > inf->latency)) )
+ {
/*scale the slice and period accordingly to the latency hint*/
inf->period = inf->latency;
/*check for overflows on multiplication*/
@@ -1052,23 +1173,28 @@ static inline void unblock_long_burst(struct sedf_vcpu_info* inf, s_time_t now)
inf->deadl_abs = now + inf->period;
inf->cputime = 0;
}
- else {
+ else
+ {
/*we don't have a latency hint.. or we are currently in
"burst mode": use some other technique
NB: this should be in fact the normal way of operation,
when we are in sync with the device!*/
unblock_long_cons_b(inf, now);
}
+
inf->unblock_abs = now;
}
+#endif /* UNBLOCK == UNBLOCK_BURST */
+
#define DOMAIN_EDF 1
#define DOMAIN_EXTRA_PEN 2
#define DOMAIN_EXTRA_UTIL 3
#define DOMAIN_IDLE 4
-static inline int get_run_type(struct vcpu* d) {
+static inline int get_run_type(struct vcpu* d)
+{
struct sedf_vcpu_info* inf = EDOM_INFO(d);
- if (is_idle_task(d->domain))
+ if (is_idle_vcpu(d))
return DOMAIN_IDLE;
if (inf->status & EXTRA_RUN_PEN)
return DOMAIN_EXTRA_PEN;
@@ -1076,6 +1202,8 @@ static inline int get_run_type(struct vcpu* d) {
return DOMAIN_EXTRA_UTIL;
return DOMAIN_EDF;
}
+
+
/*Compares two domains in the relation of whether the one is allowed to
interrupt the others execution.
It returns true (!=0) if a switch to the other domain is good.
@@ -1085,8 +1213,10 @@ static inline int get_run_type(struct vcpu* d) {
In the same class priorities are assigned as following:
EDF: early deadline > late deadline
L0 extra-time: lower score > higher score*/
-static inline int should_switch(struct vcpu* cur,
- struct vcpu* other, s_time_t now) {
+static inline int should_switch(struct vcpu *cur,
+ struct vcpu *other,
+ s_time_t now)
+{
struct sedf_vcpu_info *cur_inf, *other_inf;
cur_inf = EDOM_INFO(cur);
other_inf = EDOM_INFO(other);
@@ -1119,41 +1249,51 @@ static inline int should_switch(struct vcpu* cur,
}
return 1;
}
-void sedf_wake(struct vcpu *d) {
+
+void sedf_wake(struct vcpu *d)
+{
s_time_t now = NOW();
struct sedf_vcpu_info* inf = EDOM_INFO(d);
PRINT(3, "sedf_wake was called, domain-id %i.%i\n",d->domain->domain_id,
d->vcpu_id);
- if (unlikely(is_idle_task(d->domain)))
+ if ( unlikely(is_idle_vcpu(d)) )
return;
- if ( unlikely(__task_on_queue(d)) ) {
+ if ( unlikely(__task_on_queue(d)) )
+ {
PRINT(3,"\tdomain %i.%i is already in some queue\n",
d->domain->domain_id, d->vcpu_id);
return;
}
+
ASSERT(!sedf_runnable(d));
inf->status &= ~SEDF_ASLEEP;
ASSERT(!extraq_on(d, EXTRA_UTIL_Q));
ASSERT(!extraq_on(d, EXTRA_PEN_Q));
- if (unlikely(inf->deadl_abs == 0))
+ if ( unlikely(inf->deadl_abs == 0) )
+ {
/*initial setup of the deadline*/
inf->deadl_abs = now + inf->slice;
+ }
- PRINT(3,"waking up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
- "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
- inf->period, now);
+ PRINT(3, "waking up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64
+ "now= %"PRIu64")\n",
+ d->domain->domain_id, d->vcpu_id, inf->deadl_abs, inf->period, now);
+
#ifdef SEDF_STATS
inf->block_tot++;
#endif
- if (unlikely(now < PERIOD_BEGIN(inf))) {
+
+ if ( unlikely(now < PERIOD_BEGIN(inf)) )
+ {
PRINT(4,"extratime unblock\n");
/* unblocking in extra-time! */
#if (EXTRA == EXTRA_BLOCK_WEIGHT)
- if (inf->status & EXTRA_WANT_PEN_Q) {
+ if ( inf->status & EXTRA_WANT_PEN_Q )
+ {
/*we have a domain that wants compensation
for block penalty and did just block in
its compensation time. Give it another
@@ -1163,8 +1303,10 @@ void sedf_wake(struct vcpu *d) {
#endif
extraq_check_add_unblocked(d, 0);
}
- else {
- if (now < inf->deadl_abs) {
+ else
+ {
+ if ( now < inf->deadl_abs )
+ {
PRINT(4,"short unblocking\n");
/*short blocking*/
#ifdef SEDF_STATS
@@ -1182,7 +1324,8 @@ void sedf_wake(struct vcpu *d) {
extraq_check_add_unblocked(d, 1);
}
- else {
+ else
+ {
PRINT(4,"long unblocking\n");
/*long unblocking*/
#ifdef SEDF_STATS
@@ -1197,7 +1340,6 @@ void sedf_wake(struct vcpu *d) {
unblock_long_cons_c(inf, now);
#elif (UNBLOCK == UNBLOCK_SHORT_RESUME)
unblock_long_cons_b(inf, now);
- /*unblock_short_cons_c(inf, now);*/
#elif (UNBLOCK == UNBLOCK_BURST)
unblock_long_burst(inf, now);
#endif
@@ -1205,26 +1347,33 @@ void sedf_wake(struct vcpu *d) {
extraq_check_add_unblocked(d, 1);
}
}
- PRINT(3,"woke up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
- "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
+
+ PRINT(3, "woke up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64
+ "now= %"PRIu64")\n",
+ d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
inf->period, now);
- if (PERIOD_BEGIN(inf) > now) {
+
+ if ( PERIOD_BEGIN(inf) > now )
+ {
__add_to_waitqueue_sort(d);
PRINT(3,"added to waitq\n");
}
- else {
+ else
+ {
__add_to_runqueue_sort(d);
PRINT(3,"added to runq\n");
}
#ifdef SEDF_STATS
/*do some statistics here...*/
- if (inf->block_abs != 0) {
+ if ( inf->block_abs != 0 )
+ {
inf->block_time_tot += now - inf->block_abs;
inf->penalty_time_tot +=
PERIOD_BEGIN(inf) + inf->cputime - inf->block_abs;
}
#endif
+
/*sanity check: make sure each extra-aware domain IS on the util-q!*/
ASSERT(IMPLY(inf->status & EXTRA_AWARE, extraq_on(d, EXTRA_UTIL_Q)));
ASSERT(__task_on_queue(d));
@@ -1234,27 +1383,48 @@ void sedf_wake(struct vcpu *d) {
ASSERT(d->processor >= 0);
ASSERT(d->processor < NR_CPUS);
ASSERT(schedule_data[d->processor].curr);
- if (should_switch(schedule_data[d->processor].curr, d, now))
+
+ if ( should_switch(schedule_data[d->processor].curr, d, now) )
cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
}
-/*Print a lot of use-{full, less} information about a domains in the system*/
-static void sedf_dump_domain(struct vcpu *d) {
+
+static int sedf_set_affinity(struct vcpu *v, cpumask_t *affinity)
+{
+ if ( v == current )
+ return cpu_isset(v->processor, *affinity) ? 0 : -EBUSY;
+
+ vcpu_pause(v);
+ v->cpu_affinity = *affinity;
+ v->processor = first_cpu(v->cpu_affinity);
+ vcpu_unpause(v);
+
+ return 0;
+}
+
+
+/* Print a lot of useful information about a domains in the system */
+static void sedf_dump_domain(struct vcpu *d)
+{
printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
test_bit(_VCPUF_running, &d->vcpu_flags) ? 'T':'F');
- printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64" sc=%i xtr(%s)=%"PRIu64" ew=%hu",
+ printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64
+ " sc=%i xtr(%s)=%"PRIu64" ew=%hu",
EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
- EDOM_INFO(d)->weight, d->cpu_time, EDOM_INFO(d)->score[EXTRA_UTIL_Q],
+ EDOM_INFO(d)->weight, d->cpu_time,
+ EDOM_INFO(d)->score[EXTRA_UTIL_Q],
(EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
- if (d->cpu_time !=0)
+
+ if ( d->cpu_time != 0 )
printf(" (%"PRIu64"%%)", (EDOM_INFO(d)->extra_time_tot * 100)
/ d->cpu_time);
+
#ifdef SEDF_STATS
- if (EDOM_INFO(d)->block_time_tot!=0)
+ if ( EDOM_INFO(d)->block_time_tot != 0 )
printf(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
EDOM_INFO(d)->block_time_tot);
- if (EDOM_INFO(d)->block_tot!=0)
+ if ( EDOM_INFO(d)->block_tot != 0 )
printf("\n blks=%u sh=%u (%u%%) (shc=%u (%u%%) shex=%i "\
"shexsl=%i) l=%u (%u%%) avg: b=%"PRIu64" p=%"PRIu64"",
EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_block_tot,
@@ -1271,7 +1441,8 @@ static void sedf_dump_domain(struct vcpu *d) {
printf("\n");
}
-/*dumps all domains on hte specified cpu*/
+
+/* dumps all domains on hte specified cpu */
static void sedf_dump_cpu_state(int i)
{
struct list_head *list, *queue, *tmp;
@@ -1284,7 +1455,8 @@ static void sedf_dump_cpu_state(int i)
queue = RUNQ(i);
printk("RUNQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
(unsigned long) queue->next, (unsigned long) queue->prev);
- list_for_each_safe ( list, tmp, queue ) {
+ list_for_each_safe ( list, tmp, queue )
+ {
printk("%3d: ",loop++);
d_inf = list_entry(list, struct sedf_vcpu_info, list);
sedf_dump_domain(d_inf->vcpu);
@@ -1293,7 +1465,8 @@ static void sedf_dump_cpu_state(int i)
queue = WAITQ(i); loop = 0;
printk("\nWAITQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
(unsigned long) queue->next, (unsigned long) queue->prev);
- list_for_each_safe ( list, tmp, queue ) {
+ list_for_each_safe ( list, tmp, queue )
+ {
printk("%3d: ",loop++);
d_inf = list_entry(list, struct sedf_vcpu_info, list);
sedf_dump_domain(d_inf->vcpu);
@@ -1303,7 +1476,8 @@ static void sedf_dump_cpu_state(int i)
printk("\nEXTRAQ (penalty) rq %lx n: %lx, p: %lx\n",
(unsigned long)queue, (unsigned long) queue->next,
(unsigned long) queue->prev);
- list_for_each_safe ( list, tmp, queue ) {
+ list_for_each_safe ( list, tmp, queue )
+ {
d_inf = list_entry(list, struct sedf_vcpu_info,
extralist[EXTRA_PEN_Q]);
printk("%3d: ",loop++);
@@ -1314,7 +1488,8 @@ static void sedf_dump_cpu_state(int i)
printk("\nEXTRAQ (utilization) rq %lx n: %lx, p: %lx\n",
(unsigned long)queue, (unsigned long) queue->next,
(unsigned long) queue->prev);
- list_for_each_safe ( list, tmp, queue ) {
+ list_for_each_safe ( list, tmp, queue )
+ {
d_inf = list_entry(list, struct sedf_vcpu_info,
extralist[EXTRA_UTIL_Q]);
printk("%3d: ",loop++);
@@ -1323,69 +1498,93 @@ static void sedf_dump_cpu_state(int i)
loop = 0;
printk("\nnot on Q\n");
- for_each_domain(d)
- for_each_vcpu(d, ed)
+
+ for_each_domain ( d )
{
- if (!__task_on_queue(ed) && (ed->processor == i)) {
- printk("%3d: ",loop++);
- sedf_dump_domain(ed);
+ for_each_vcpu(d, ed)
+ {
+ if ( !__task_on_queue(ed) && (ed->processor == i) )
+ {
+ printk("%3d: ",loop++);
+ sedf_dump_domain(ed);
+ }
}
}
}
-/*Adjusts periods and slices of the domains accordingly to their weights*/
-static inline int sedf_adjust_weights(struct sched_adjdom_cmd *cmd) {
+
+
+/* Adjusts periods and slices of the domains accordingly to their weights. */
+static int sedf_adjust_weights(struct sched_adjdom_cmd *cmd)
+{
struct vcpu *p;
struct domain *d;
int sumw[NR_CPUS];
s_time_t sumt[NR_CPUS];
int cpu;
- for (cpu=0; cpu < NR_CPUS; cpu++) {
+ for ( cpu = 0; cpu < NR_CPUS; cpu++ )
+ {
sumw[cpu] = 0;
sumt[cpu] = 0;
}
- /*sum up all weights*/
- for_each_domain(d)
- for_each_vcpu(d, p) {
- if (EDOM_INFO(p)->weight)
- sumw[p->processor] += EDOM_INFO(p)->weight;
- else {
- /*don't modify domains who don't have a weight, but sum
- up the time they need, projected to a WEIGHT_PERIOD,
- so that this time is not given to the weight-driven
- domains*/
- /*check for overflows*/
- ASSERT((WEIGHT_PERIOD < ULONG_MAX)
- && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
- sumt[p->processor] +=
- (WEIGHT_PERIOD * EDOM_INFO(p)->slice_orig) /
- EDOM_INFO(p)->period_orig;
+
+ /* sum up all weights */
+ for_each_domain( d )
+ {
+ for_each_vcpu( d, p )
+ {
+ if ( EDOM_INFO(p)->weight )
+ {
+ sumw[p->processor] += EDOM_INFO(p)->weight;
+ }
+ else
+ {
+ /*don't modify domains who don't have a weight, but sum
+ up the time they need, projected to a WEIGHT_PERIOD,
+ so that this time is not given to the weight-driven
+ domains*/
+ /*check for overflows*/
+ ASSERT((WEIGHT_PERIOD < ULONG_MAX)
+ && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
+ sumt[p->processor] +=
+ (WEIGHT_PERIOD * EDOM_INFO(p)->slice_orig) /
+ EDOM_INFO(p)->period_orig;
+ }
}
}
- /*adjust all slices (and periods) to the new weight*/
- for_each_domain(d)
- for_each_vcpu(d, p) {
- if (EDOM_INFO(p)->weight) {
- EDOM_INFO(p)->period_orig =
- EDOM_INFO(p)->period = WEIGHT_PERIOD;
- EDOM_INFO(p)->slice_orig =
- EDOM_INFO(p)->slice =
- (EDOM_INFO(p)->weight *
- (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[p->processor])) /
- sumw[p->processor];
+
+ /* adjust all slices (and periods) to the new weight */
+ for_each_domain( d )
+ {
+ for_each_vcpu ( d, p )
+ {
+ if ( EDOM_INFO(p)->weight )
+ {
+ EDOM_INFO(p)->period_orig =
+ EDOM_INFO(p)->period = WEIGHT_PERIOD;
+ EDOM_INFO(p)->slice_orig =
+ EDOM_INFO(p)->slice =
+ (EDOM_INFO(p)->weight *
+ (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[p->processor])) /
+ sumw[p->processor];
+ }
}
}
+
return 0;
}
+
/* set or fetch domain scheduling parameters */
-static int sedf_adjdom(struct domain *p, struct sched_adjdom_cmd *cmd) {
+static int sedf_adjdom(struct domain *p, struct sched_adjdom_cmd *cmd)
+{
struct vcpu *v;
PRINT(2,"sedf_adjdom was called, domain-id %i new period %"PRIu64" "\
"new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
p->domain_id, cmd->u.sedf.period, cmd->u.sedf.slice,
cmd->u.sedf.latency, (cmd->u.sedf.extratime)?"yes":"no");
+
if ( cmd->direction == SCHED_INFO_PUT )
{
/*check for sane parameters*/
@@ -1458,6 +1657,7 @@ struct scheduler sched_sedf_def = {
.sleep = sedf_sleep,
.wake = sedf_wake,
.adjdom = sedf_adjdom,
+ .set_affinity = sedf_set_affinity
};
/*
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index d74b3c8370..f6668d3593 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -21,7 +21,7 @@
#include <xen/delay.h>
#include <xen/event.h>
#include <xen/time.h>
-#include <xen/ac_timer.h>
+#include <xen/timer.h>
#include <xen/perfc.h>
#include <xen/sched-if.h>
#include <xen/softirq.h>
@@ -71,7 +71,7 @@ static struct scheduler ops;
: (typeof(ops.fn(__VA_ARGS__)))0 )
/* Per-CPU periodic timer sends an event to the currently-executing domain. */
-static struct ac_timer t_timer[NR_CPUS];
+static struct timer t_timer[NR_CPUS];
void free_domain(struct domain *d)
{
@@ -100,7 +100,9 @@ struct vcpu *alloc_vcpu(
v->vcpu_id = vcpu_id;
v->processor = cpu_id;
atomic_set(&v->pausecnt, 0);
- v->cpumap = CPUMAP_RUNANYWHERE;
+
+ v->cpu_affinity = is_idle_domain(d) ?
+ cpumask_of_cpu(cpu_id) : CPU_MASK_ALL;
d->vcpu[vcpu_id] = v;
@@ -138,12 +140,10 @@ struct domain *alloc_domain(void)
*/
void sched_add_domain(struct vcpu *v)
{
- struct domain *d = v->domain;
-
/* Initialise the per-domain timer. */
- init_ac_timer(&v->timer, dom_timer_fn, v, v->processor);
+ init_timer(&v->timer, dom_timer_fn, v, v->processor);
- if ( is_idle_task(d) )
+ if ( is_idle_vcpu(v) )
{
schedule_data[v->processor].curr = v;
schedule_data[v->processor].idle = v;
@@ -151,12 +151,12 @@ void sched_add_domain(struct vcpu *v)
}
SCHED_OP(add_task, v);
- TRACE_2D(TRC_SCHED_DOM_ADD, d->domain_id, v->vcpu_id);
+ TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
}
void sched_rem_domain(struct vcpu *v)
{
- rem_ac_timer(&v->timer);
+ kill_timer(&v->timer);
SCHED_OP(rem_task, v);
TRACE_2D(TRC_SCHED_DOM_REM, v->domain->domain_id, v->vcpu_id);
}
@@ -165,26 +165,19 @@ void vcpu_sleep_nosync(struct vcpu *v)
{
unsigned long flags;
- spin_lock_irqsave(&schedule_data[v->processor].schedule_lock, flags);
- if ( likely(!domain_runnable(v)) )
+ vcpu_schedule_lock_irqsave(v, flags);
+ if ( likely(!vcpu_runnable(v)) )
SCHED_OP(sleep, v);
- spin_unlock_irqrestore(&schedule_data[v->processor].schedule_lock, flags);
+ vcpu_schedule_unlock_irqrestore(v, flags);
TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id);
-}
+}
void vcpu_sleep_sync(struct vcpu *v)
{
vcpu_sleep_nosync(v);
- /*
- * We can be sure that the VCPU is finally descheduled after the running
- * flag is cleared and the scheduler lock is released. We also check that
- * the domain continues to be unrunnable, in case someone else wakes it.
- */
- while ( !domain_runnable(v) &&
- (test_bit(_VCPUF_running, &v->vcpu_flags) ||
- spin_is_locked(&schedule_data[v->processor].schedule_lock)) )
+ while ( !vcpu_runnable(v) && test_bit(_VCPUF_running, &v->vcpu_flags) )
cpu_relax();
sync_vcpu_execstate(v);
@@ -194,18 +187,28 @@ void vcpu_wake(struct vcpu *v)
{
unsigned long flags;
- spin_lock_irqsave(&schedule_data[v->processor].schedule_lock, flags);
- if ( likely(domain_runnable(v)) )
+ vcpu_schedule_lock_irqsave(v, flags);
+ if ( likely(vcpu_runnable(v)) )
{
SCHED_OP(wake, v);
v->wokenup = NOW();
}
- clear_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
- spin_unlock_irqrestore(&schedule_data[v->processor].schedule_lock, flags);
+ vcpu_schedule_unlock_irqrestore(v, flags);
TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
}
+int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
+{
+ cpumask_t online_affinity;
+
+ cpus_and(online_affinity, *affinity, cpu_online_map);
+ if ( cpus_empty(online_affinity) )
+ return -EINVAL;
+
+ return SCHED_OP(set_affinity, v, affinity);
+}
+
/* Block the currently-executing domain until a pertinent event occurs. */
long do_block(void)
{
@@ -275,9 +278,9 @@ long do_set_timer_op(s_time_t timeout)
struct vcpu *v = current;
if ( timeout == 0 )
- rem_ac_timer(&v->timer);
+ stop_timer(&v->timer);
else
- set_ac_timer(&v->timer, timeout);
+ set_timer(&v->timer, timeout);
return 0;
}
@@ -304,63 +307,42 @@ long sched_adjdom(struct sched_adjdom_cmd *cmd)
{
struct domain *d;
struct vcpu *v;
- int cpu;
-#if NR_CPUS <=32
- unsigned long have_lock;
- #else
- unsigned long long have_lock;
-#endif
- int succ;
-
- #define __set_cpu_bit(cpu, data) data |= ((typeof(data))1)<<cpu
- #define __get_cpu_bit(cpu, data) (data & ((typeof(data))1)<<cpu)
- #define __clear_cpu_bits(data) data = ((typeof(data))0)
- if ( cmd->sched_id != ops.sched_id )
- return -EINVAL;
-
- if ( cmd->direction != SCHED_INFO_PUT && cmd->direction != SCHED_INFO_GET )
+ if ( (cmd->sched_id != ops.sched_id) ||
+ ((cmd->direction != SCHED_INFO_PUT) &&
+ (cmd->direction != SCHED_INFO_GET)) )
return -EINVAL;
d = find_domain_by_id(cmd->domain);
if ( d == NULL )
return -ESRCH;
- /* acquire locks on all CPUs on which vcpus of this domain run */
- do {
- succ = 0;
- __clear_cpu_bits(have_lock);
- for_each_vcpu(d, v) {
- cpu = v->processor;
- if (!__get_cpu_bit(cpu, have_lock)) {
- /* if we don't have a lock on this CPU: acquire it*/
- if (spin_trylock(&schedule_data[cpu].schedule_lock)) {
- /*we have this lock!*/
- __set_cpu_bit(cpu, have_lock);
- succ = 1;
- } else {
- /*we didn,t get this lock -> free all other locks too!*/
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- if (__get_cpu_bit(cpu, have_lock))
- spin_unlock(&schedule_data[cpu].schedule_lock);
- /* and start from the beginning! */
- succ = 0;
- /* leave the "for_each_domain_loop" */
- break;
- }
- }
- }
- } while ( !succ );
+ /*
+ * Most VCPUs we can simply pause. If we are adjusting this VCPU then
+ * we acquire the local schedule_lock to guard against concurrent updates.
+ */
+ for_each_vcpu ( d, v )
+ {
+ if ( v == current )
+ vcpu_schedule_lock_irq(v);
+ else
+ vcpu_pause(v);
+ }
SCHED_OP(adjdom, d, cmd);
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- if (__get_cpu_bit(cpu, have_lock))
- spin_unlock(&schedule_data[cpu].schedule_lock);
- __clear_cpu_bits(have_lock);
-
TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id);
+
+ for_each_vcpu ( d, v )
+ {
+ if ( v == current )
+ vcpu_schedule_unlock_irq(v);
+ else
+ vcpu_unpause(v);
+ }
+
put_domain(d);
+
return 0;
}
@@ -371,22 +353,20 @@ long sched_adjdom(struct sched_adjdom_cmd *cmd)
*/
static void __enter_scheduler(void)
{
- struct vcpu *prev = current, *next = NULL;
- int cpu = prev->processor;
- s_time_t now;
+ struct vcpu *prev = current, *next = NULL;
+ int cpu = smp_processor_id();
+ s_time_t now = NOW();
struct task_slice next_slice;
s32 r_time; /* time for new dom to run */
+ ASSERT(!in_irq());
+
perfc_incrc(sched_run);
-
- spin_lock_irq(&schedule_data[cpu].schedule_lock);
- now = NOW();
+ spin_lock_irq(&schedule_data[cpu].schedule_lock);
- rem_ac_timer(&schedule_data[cpu].s_timer);
+ stop_timer(&schedule_data[cpu].s_timer);
- ASSERT(!in_irq());
-
prev->cpu_time += now - prev->lastschd;
/* get policy-specific decision on scheduling... */
@@ -394,12 +374,12 @@ static void __enter_scheduler(void)
r_time = next_slice.time;
next = next_slice.task;
-
+
schedule_data[cpu].curr = next;
next->lastschd = now;
- set_ac_timer(&schedule_data[cpu].s_timer, now + r_time);
+ set_timer(&schedule_data[cpu].s_timer, now + r_time);
if ( unlikely(prev == next) )
{
@@ -412,11 +392,6 @@ static void __enter_scheduler(void)
TRACE_3D(TRC_SCHED_SWITCH_INFNEXT,
next->domain->domain_id, now - next->wokenup, r_time);
- clear_bit(_VCPUF_running, &prev->vcpu_flags);
- set_bit(_VCPUF_running, &next->vcpu_flags);
-
- perfc_incrc(sched_ctx);
-
/*
* Logic of wokenup field in domain struct:
* Used to calculate "waiting time", which is the time that a domain
@@ -425,10 +400,10 @@ static void __enter_scheduler(void)
* also set here then a preempted runnable domain will get a screwed up
* "waiting time" value next time it is scheduled.
*/
- prev->wokenup = NOW();
+ prev->wokenup = now;
#if defined(WAKE_HISTO)
- if ( !is_idle_task(next->domain) && next->wokenup )
+ if ( !is_idle_vcpu(next) && next->wokenup )
{
ulong diff = (ulong)(now - next->wokenup);
diff /= (ulong)MILLISECS(1);
@@ -438,7 +413,7 @@ static void __enter_scheduler(void)
next->wokenup = (s_time_t)0;
#elif defined(BLOCKTIME_HISTO)
prev->lastdeschd = now;
- if ( !is_idle_task(next->domain) )
+ if ( !is_idle_vcpu(next) )
{
ulong diff = (ulong)((now - next->lastdeschd) / MILLISECS(10));
if (diff <= BUCKETS-2) schedule_data[cpu].hist[diff]++;
@@ -446,10 +421,16 @@ static void __enter_scheduler(void)
}
#endif
+ set_bit(_VCPUF_running, &next->vcpu_flags);
+
+ spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+
+ perfc_incrc(sched_ctx);
+
prev->sleep_tick = schedule_data[cpu].tick;
/* Ensure that the domain has an up-to-date time base. */
- if ( !is_idle_task(next->domain) )
+ if ( !is_idle_vcpu(next) )
{
update_dom_time(next);
if ( next->sleep_tick != schedule_data[cpu].tick )
@@ -461,17 +442,6 @@ static void __enter_scheduler(void)
next->domain->domain_id, next->vcpu_id);
context_switch(prev, next);
-
- spin_unlock_irq(&schedule_data[cpu].schedule_lock);
-
- context_switch_finalise(next);
-}
-
-/* No locking needed -- pointer comparison is safe :-) */
-int idle_cpu(int cpu)
-{
- struct vcpu *p = schedule_data[cpu].curr;
- return p == idle_task[cpu];
}
@@ -492,12 +462,12 @@ static void s_timer_fn(void *unused)
/* Periodic tick timer: send timer event to current domain */
static void t_timer_fn(void *unused)
{
- struct vcpu *v = current;
- unsigned int cpu = v->processor;
+ struct vcpu *v = current;
+ unsigned int cpu = smp_processor_id();
schedule_data[cpu].tick++;
- if ( !is_idle_task(v->domain) )
+ if ( !is_idle_vcpu(v) )
{
update_dom_time(v);
send_guest_virq(v, VIRQ_TIMER);
@@ -505,7 +475,7 @@ static void t_timer_fn(void *unused)
page_scrub_schedule_work();
- set_ac_timer(&t_timer[cpu], NOW() + MILLISECS(10));
+ set_timer(&t_timer[cpu], NOW() + MILLISECS(10));
}
/* Domain timer function, sends a virtual timer interrupt to domain */
@@ -527,13 +497,10 @@ void __init scheduler_init(void)
for ( i = 0; i < NR_CPUS; i++ )
{
spin_lock_init(&schedule_data[i].schedule_lock);
- init_ac_timer(&schedule_data[i].s_timer, s_timer_fn, NULL, i);
- init_ac_timer(&t_timer[i], t_timer_fn, NULL, i);
+ init_timer(&schedule_data[i].s_timer, s_timer_fn, NULL, i);
+ init_timer(&t_timer[i], t_timer_fn, NULL, i);
}
- schedule_data[0].curr = idle_task[0];
- schedule_data[0].idle = idle_task[0];
-
for ( i = 0; schedulers[i] != NULL; i++ )
{
ops = *schedulers[i];
@@ -546,10 +513,16 @@ void __init scheduler_init(void)
printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
- rc = SCHED_OP(alloc_task, idle_task[0]);
- BUG_ON(rc < 0);
+ if ( idle_vcpu[0] != NULL )
+ {
+ schedule_data[0].curr = idle_vcpu[0];
+ schedule_data[0].idle = idle_vcpu[0];
+
+ rc = SCHED_OP(alloc_task, idle_vcpu[0]);
+ BUG_ON(rc < 0);
- sched_add_domain(idle_task[0]);
+ sched_add_domain(idle_vcpu[0]);
+ }
}
/*
diff --git a/xen/common/ac_timer.c b/xen/common/timer.c
index 1bf443ac46..55f39bfa98 100644
--- a/xen/common/ac_timer.c
+++ b/xen/common/timer.c
@@ -1,5 +1,5 @@
/******************************************************************************
- * ac_timer.c
+ * timer.c
*
* Copyright (c) 2002-2003 Rolf Neugebauer
* Copyright (c) 2002-2005 K A Fraser
@@ -15,7 +15,7 @@
#include <xen/perfc.h>
#include <xen/time.h>
#include <xen/softirq.h>
-#include <xen/ac_timer.h>
+#include <xen/timer.h>
#include <xen/keyhandler.h>
#include <asm/system.h>
#include <asm/desc.h>
@@ -26,15 +26,15 @@
*/
#define TIMER_SLOP (50*1000) /* ns */
-struct ac_timers {
- spinlock_t lock;
- struct ac_timer **heap;
- unsigned int softirqs;
+struct timers {
+ spinlock_t lock;
+ struct timer **heap;
+ struct timer *running;
} __cacheline_aligned;
-struct ac_timers ac_timers[NR_CPUS];
+struct timers timers[NR_CPUS];
-extern int reprogram_ac_timer(s_time_t timeout);
+extern int reprogram_timer(s_time_t timeout);
/****************************************************************************
* HEAP OPERATIONS.
@@ -47,10 +47,10 @@ extern int reprogram_ac_timer(s_time_t timeout);
#define SET_HEAP_LIMIT(_h,_v) (((u16 *)(_h))[1] = (u16)(_v))
/* Sink down element @pos of @heap. */
-static void down_heap(struct ac_timer **heap, int pos)
+static void down_heap(struct timer **heap, int pos)
{
int sz = GET_HEAP_SIZE(heap), nxt;
- struct ac_timer *t = heap[pos];
+ struct timer *t = heap[pos];
while ( (nxt = (pos << 1)) <= sz )
{
@@ -68,9 +68,9 @@ static void down_heap(struct ac_timer **heap, int pos)
}
/* Float element @pos up @heap. */
-static void up_heap(struct ac_timer **heap, int pos)
+static void up_heap(struct timer **heap, int pos)
{
- struct ac_timer *t = heap[pos];
+ struct timer *t = heap[pos];
while ( (pos > 1) && (t->expires < heap[pos>>1]->expires) )
{
@@ -85,7 +85,7 @@ static void up_heap(struct ac_timer **heap, int pos)
/* Delete @t from @heap. Return TRUE if new top of heap. */
-static int remove_entry(struct ac_timer **heap, struct ac_timer *t)
+static int remove_entry(struct timer **heap, struct timer *t)
{
int sz = GET_HEAP_SIZE(heap);
int pos = t->heap_offset;
@@ -114,9 +114,9 @@ static int remove_entry(struct ac_timer **heap, struct ac_timer *t)
/* Add new entry @t to @heap. Return TRUE if new top of heap. */
-static int add_entry(struct ac_timer ***pheap, struct ac_timer *t)
+static int add_entry(struct timer ***pheap, struct timer *t)
{
- struct ac_timer **heap = *pheap;
+ struct timer **heap = *pheap;
int sz = GET_HEAP_SIZE(heap);
/* Copy the heap if it is full. */
@@ -125,7 +125,7 @@ static int add_entry(struct ac_timer ***pheap, struct ac_timer *t)
/* old_limit == (2^n)-1; new_limit == (2^(n+4))-1 */
int old_limit = GET_HEAP_LIMIT(heap);
int new_limit = ((old_limit + 1) << 4) - 1;
- heap = xmalloc_array(struct ac_timer *, new_limit + 1);
+ heap = xmalloc_array(struct timer *, new_limit + 1);
BUG_ON(heap == NULL);
memcpy(heap, *pheap, (old_limit + 1) * sizeof(*heap));
SET_HEAP_LIMIT(heap, new_limit);
@@ -146,61 +146,80 @@ static int add_entry(struct ac_timer ***pheap, struct ac_timer *t)
* TIMER OPERATIONS.
*/
-static inline void __add_ac_timer(struct ac_timer *timer)
+static inline void __add_timer(struct timer *timer)
{
int cpu = timer->cpu;
- if ( add_entry(&ac_timers[cpu].heap, timer) )
- cpu_raise_softirq(cpu, AC_TIMER_SOFTIRQ);
+ if ( add_entry(&timers[cpu].heap, timer) )
+ cpu_raise_softirq(cpu, TIMER_SOFTIRQ);
}
-static inline void __rem_ac_timer(struct ac_timer *timer)
+static inline void __stop_timer(struct timer *timer)
{
int cpu = timer->cpu;
- if ( remove_entry(ac_timers[cpu].heap, timer) )
- cpu_raise_softirq(cpu, AC_TIMER_SOFTIRQ);
+ if ( remove_entry(timers[cpu].heap, timer) )
+ cpu_raise_softirq(cpu, TIMER_SOFTIRQ);
}
-void set_ac_timer(struct ac_timer *timer, s_time_t expires)
+void set_timer(struct timer *timer, s_time_t expires)
{
int cpu = timer->cpu;
unsigned long flags;
- spin_lock_irqsave(&ac_timers[cpu].lock, flags);
- ASSERT(timer != NULL);
- if ( active_ac_timer(timer) )
- __rem_ac_timer(timer);
+ spin_lock_irqsave(&timers[cpu].lock, flags);
+ if ( active_timer(timer) )
+ __stop_timer(timer);
timer->expires = expires;
- __add_ac_timer(timer);
- spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
+ if ( likely(!timer->killed) )
+ __add_timer(timer);
+ spin_unlock_irqrestore(&timers[cpu].lock, flags);
}
-void rem_ac_timer(struct ac_timer *timer)
+void stop_timer(struct timer *timer)
{
int cpu = timer->cpu;
unsigned long flags;
- spin_lock_irqsave(&ac_timers[cpu].lock, flags);
- ASSERT(timer != NULL);
- if ( active_ac_timer(timer) )
- __rem_ac_timer(timer);
- spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
+ spin_lock_irqsave(&timers[cpu].lock, flags);
+ if ( active_timer(timer) )
+ __stop_timer(timer);
+ spin_unlock_irqrestore(&timers[cpu].lock, flags);
}
-static void ac_timer_softirq_action(void)
+void kill_timer(struct timer *timer)
{
- int cpu = smp_processor_id();
- struct ac_timer *t, **heap;
- s_time_t now;
- void (*fn)(void *);
+ int cpu = timer->cpu;
+ unsigned long flags;
+
+ BUG_ON(timers[cpu].running == timer);
+
+ spin_lock_irqsave(&timers[cpu].lock, flags);
+ if ( active_timer(timer) )
+ __stop_timer(timer);
+ timer->killed = 1;
+ spin_unlock_irqrestore(&timers[cpu].lock, flags);
+
+ for_each_online_cpu ( cpu )
+ while ( timers[cpu].running == timer )
+ cpu_relax();
+}
- spin_lock_irq(&ac_timers[cpu].lock);
+
+static void timer_softirq_action(void)
+{
+ int cpu = smp_processor_id();
+ struct timer *t, **heap;
+ s_time_t now;
+ void (*fn)(void *);
+ void *data;
+
+ spin_lock_irq(&timers[cpu].lock);
do {
- heap = ac_timers[cpu].heap;
+ heap = timers[cpu].heap;
now = NOW();
while ( (GET_HEAP_SIZE(heap) != 0) &&
@@ -208,56 +227,59 @@ static void ac_timer_softirq_action(void)
{
remove_entry(heap, t);
- if ( (fn = t->function) != NULL )
- {
- void *data = t->data;
- spin_unlock_irq(&ac_timers[cpu].lock);
- (*fn)(data);
- spin_lock_irq(&ac_timers[cpu].lock);
- }
+ timers[cpu].running = t;
+
+ fn = t->function;
+ data = t->data;
+
+ spin_unlock_irq(&timers[cpu].lock);
+ (*fn)(data);
+ spin_lock_irq(&timers[cpu].lock);
/* Heap may have grown while the lock was released. */
- heap = ac_timers[cpu].heap;
+ heap = timers[cpu].heap;
}
+
+ timers[cpu].running = NULL;
}
- while ( !reprogram_ac_timer(GET_HEAP_SIZE(heap) ? heap[1]->expires : 0) );
+ while ( !reprogram_timer(GET_HEAP_SIZE(heap) ? heap[1]->expires : 0) );
- spin_unlock_irq(&ac_timers[cpu].lock);
+ spin_unlock_irq(&timers[cpu].lock);
}
static void dump_timerq(unsigned char key)
{
- struct ac_timer *t;
- unsigned long flags;
- s_time_t now = NOW();
- int i, j;
+ struct timer *t;
+ unsigned long flags;
+ s_time_t now = NOW();
+ int i, j;
- printk("Dumping ac_timer queues: NOW=0x%08X%08X\n",
+ printk("Dumping timer queues: NOW=0x%08X%08X\n",
(u32)(now>>32), (u32)now);
for_each_online_cpu( i )
{
printk("CPU[%02d] ", i);
- spin_lock_irqsave(&ac_timers[i].lock, flags);
- for ( j = 1; j <= GET_HEAP_SIZE(ac_timers[i].heap); j++ )
+ spin_lock_irqsave(&timers[i].lock, flags);
+ for ( j = 1; j <= GET_HEAP_SIZE(timers[i].heap); j++ )
{
- t = ac_timers[i].heap[j];
+ t = timers[i].heap[j];
printk (" %d : %p ex=0x%08X%08X %p\n",
j, t, (u32)(t->expires>>32), (u32)t->expires, t->data);
}
- spin_unlock_irqrestore(&ac_timers[i].lock, flags);
+ spin_unlock_irqrestore(&timers[i].lock, flags);
printk("\n");
}
}
-void __init ac_timer_init(void)
+void __init timer_init(void)
{
- static struct ac_timer *dummy_heap;
+ static struct timer *dummy_heap;
int i;
- open_softirq(AC_TIMER_SOFTIRQ, ac_timer_softirq_action);
+ open_softirq(TIMER_SOFTIRQ, timer_softirq_action);
/*
* All CPUs initially share an empty dummy heap. Only those CPUs that
@@ -268,11 +290,11 @@ void __init ac_timer_init(void)
for ( i = 0; i < NR_CPUS; i++ )
{
- spin_lock_init(&ac_timers[i].lock);
- ac_timers[i].heap = &dummy_heap;
+ spin_lock_init(&timers[i].lock);
+ timers[i].heap = &dummy_heap;
}
- register_keyhandler('a', dump_timerq, "dump ac_timer queues");
+ register_keyhandler('a', dump_timerq, "dump timer queues");
}
/*
diff --git a/xen/common/vsprintf.c b/xen/common/vsprintf.c
index 55d538563d..da82029195 100644
--- a/xen/common/vsprintf.c
+++ b/xen/common/vsprintf.c
@@ -12,11 +12,15 @@
/*
* Fri Jul 13 2001 Crutcher Dunnavant <crutcher+kernel@datastacks.com>
* - changed to provide snprintf and vsnprintf functions
+ * So Feb 1 16:51:32 CET 2004 Juergen Quade <quade@hsnr.de>
+ * - scnprintf and vscnprintf
*/
#include <stdarg.h>
#include <xen/ctype.h>
#include <xen/lib.h>
+#include <asm/div64.h>
+#include <asm/page.h>
/**
* simple_strtoul - convert a string to an unsigned long
@@ -33,11 +37,14 @@ unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
if (*cp == '0') {
base = 8;
cp++;
- if ((*cp == 'x') && isxdigit(cp[1])) {
+ if ((toupper(*cp) == 'X') && isxdigit(cp[1])) {
cp++;
base = 16;
}
}
+ } else if (base == 16) {
+ if (cp[0] == '0' && toupper(cp[1]) == 'X')
+ cp += 2;
}
while (isxdigit(*cp) &&
(value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) {
@@ -49,6 +56,8 @@ unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
return result;
}
+EXPORT_SYMBOL(simple_strtoul);
+
/**
* simple_strtol - convert a string to a signed long
* @cp: The start of the string
@@ -62,6 +71,8 @@ long simple_strtol(const char *cp,char **endp,unsigned int base)
return simple_strtoul(cp,endp,base);
}
+EXPORT_SYMBOL(simple_strtol);
+
/**
* simple_strtoull - convert a string to an unsigned long long
* @cp: The start of the string
@@ -77,11 +88,14 @@ unsigned long long simple_strtoull(const char *cp,char **endp,unsigned int base)
if (*cp == '0') {
base = 8;
cp++;
- if ((*cp == 'x') && isxdigit(cp[1])) {
+ if ((toupper(*cp) == 'X') && isxdigit(cp[1])) {
cp++;
base = 16;
}
}
+ } else if (base == 16) {
+ if (cp[0] == '0' && toupper(cp[1]) == 'X')
+ cp += 2;
}
while (isxdigit(*cp) && (value = isdigit(*cp) ? *cp-'0' : (islower(*cp)
? toupper(*cp) : *cp)-'A'+10) < base) {
@@ -93,6 +107,8 @@ unsigned long long simple_strtoull(const char *cp,char **endp,unsigned int base)
return result;
}
+EXPORT_SYMBOL(simple_strtoull);
+
/**
* simple_strtoll - convert a string to a signed long long
* @cp: The start of the string
@@ -123,25 +139,25 @@ static int skip_atoi(const char **s)
#define SPECIAL 32 /* 0x */
#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */
-static char * number(char * buf, char * end, long long num, int base, int size, int precision, int type)
+static char * number(char * buf, char * end, unsigned long long num, int base, int size, int precision, int type)
{
char c,sign,tmp[66];
const char *digits;
- const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
- const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ static const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+ static const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
int i;
digits = (type & LARGE) ? large_digits : small_digits;
if (type & LEFT)
type &= ~ZEROPAD;
if (base < 2 || base > 36)
- return buf;
+ return NULL;
c = (type & ZEROPAD) ? '0' : ' ';
sign = 0;
if (type & SIGN) {
- if (num < 0) {
+ if ((signed long long) num < 0) {
sign = '-';
- num = -num;
+ num = - (signed long long) num;
size--;
} else if (type & PLUS) {
sign = '+';
@@ -160,6 +176,9 @@ static char * number(char * buf, char * end, long long num, int base, int size,
i = 0;
if (num == 0)
tmp[i++]='0';
+ else while (num != 0)
+ tmp[i++] = digits[do_div(num,base)];
+#if 0
else
{
/* XXX KAF: force unsigned mod and div. */
@@ -167,6 +186,7 @@ static char * number(char * buf, char * end, long long num, int base, int size,
unsigned int base2=(unsigned int)base;
while (num2 != 0) { tmp[i++] = digits[num2%base2]; num2 /= base2; }
}
+#endif
if (i > precision)
precision = i;
size -= precision;
@@ -222,14 +242,22 @@ static char * number(char * buf, char * end, long long num, int base, int size,
}
/**
-* vsnprintf - Format a string and place it in a buffer
-* @buf: The buffer to place the result into
-* @size: The size of the buffer, including the trailing null space
-* @fmt: The format string to use
-* @args: Arguments for the format string
-*
-* Call this function if you are already dealing with a va_list.
-* You probably want snprintf instead.
+ * vsnprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @args: Arguments for the format string
+ *
+ * The return value is the number of characters which would
+ * be generated for the given input, excluding the trailing
+ * '\0', as per ISO C99. If you want to have the exact
+ * number of characters written into @buf as return value
+ * (not including the trailing '\0'), use vscnprintf. If the
+ * return is greater than or equal to @size, the resulting
+ * string is truncated.
+ *
+ * Call this function if you are already dealing with a va_list.
+ * You probably want snprintf instead.
*/
int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
{
@@ -248,6 +276,9 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
/* 'z' support added 23/7/1999 S.H. */
/* 'z' changed to 'Z' --davidm 1/25/99 */
+ /* Reject out-of-range values early */
+ BUG_ON((int)size < 0);
+
str = buf;
end = buf + size - 1;
@@ -307,7 +338,8 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
/* get the conversion qualifier */
qualifier = -1;
- if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') {
+ if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
+ *fmt =='Z' || *fmt == 'z') {
qualifier = *fmt;
++fmt;
if (qualifier == 'l' && *fmt == 'l') {
@@ -315,10 +347,6 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
++fmt;
}
}
- if (*fmt == 'q') {
- qualifier = 'L';
- ++fmt;
- }
/* default base */
base = 10;
@@ -345,7 +373,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
case 's':
s = va_arg(args, char *);
- if (!s)
+ if ((unsigned long)s < PAGE_SIZE)
s = "<NULL>";
len = strnlen(s, precision);
@@ -386,7 +414,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
if (qualifier == 'l') {
long * ip = va_arg(args, long *);
*ip = (str - buf);
- } else if (qualifier == 'Z') {
+ } else if (qualifier == 'Z' || qualifier == 'z') {
size_t * ip = va_arg(args, size_t *);
*ip = (str - buf);
} else {
@@ -437,7 +465,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
num = va_arg(args, unsigned long);
if (flags & SIGN)
num = (signed long) num;
- } else if (qualifier == 'Z') {
+ } else if (qualifier == 'Z' || qualifier == 'z') {
num = va_arg(args, size_t);
} else if (qualifier == 'h') {
num = (unsigned short) va_arg(args, int);
@@ -463,12 +491,43 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
return str-buf;
}
+EXPORT_SYMBOL(vsnprintf);
+
+/**
+ * vscnprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @args: Arguments for the format string
+ *
+ * The return value is the number of characters which have been written into
+ * the @buf not including the trailing '\0'. If @size is <= 0 the function
+ * returns 0.
+ *
+ * Call this function if you are already dealing with a va_list.
+ * You probably want scnprintf instead.
+ */
+int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+ int i;
+
+ i=vsnprintf(buf,size,fmt,args);
+ return (i >= size) ? (size - 1) : i;
+}
+
+EXPORT_SYMBOL(vscnprintf);
+
/**
* snprintf - Format a string and place it in a buffer
* @buf: The buffer to place the result into
* @size: The size of the buffer, including the trailing null space
* @fmt: The format string to use
* @...: Arguments for the format string
+ *
+ * The return value is the number of characters which would be
+ * generated for the given input, excluding the trailing null,
+ * as per ISO C99. If the return is greater than or equal to
+ * @size, the resulting string is truncated.
*/
int snprintf(char * buf, size_t size, const char *fmt, ...)
{
@@ -481,26 +540,61 @@ int snprintf(char * buf, size_t size, const char *fmt, ...)
return i;
}
+EXPORT_SYMBOL(snprintf);
+
+/**
+ * scnprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @...: Arguments for the format string
+ *
+ * The return value is the number of characters written into @buf not including
+ * the trailing '\0'. If @size is <= 0 the function returns 0. If the return is
+ * greater than or equal to @size, the resulting string is truncated.
+ */
+
+int scnprintf(char * buf, size_t size, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i = vsnprintf(buf, size, fmt, args);
+ va_end(args);
+ return (i >= size) ? (size - 1) : i;
+}
+EXPORT_SYMBOL(scnprintf);
+
/**
* vsprintf - Format a string and place it in a buffer
* @buf: The buffer to place the result into
* @fmt: The format string to use
* @args: Arguments for the format string
*
+ * The function returns the number of characters written
+ * into @buf. Use vsnprintf or vscnprintf in order to avoid
+ * buffer overflows.
+ *
* Call this function if you are already dealing with a va_list.
* You probably want sprintf instead.
*/
int vsprintf(char *buf, const char *fmt, va_list args)
{
- return vsnprintf(buf, 0xFFFFFFFFUL, fmt, args);
+ return vsnprintf(buf, INT_MAX, fmt, args);
}
+EXPORT_SYMBOL(vsprintf);
/**
* sprintf - Format a string and place it in a buffer
* @buf: The buffer to place the result into
* @fmt: The format string to use
* @...: Arguments for the format string
+ *
+ * The function returns the number of characters written
+ * into @buf. Use snprintf or scnprintf in order to avoid
+ * buffer overflows.
*/
int sprintf(char * buf, const char *fmt, ...)
{
@@ -508,11 +602,12 @@ int sprintf(char * buf, const char *fmt, ...)
int i;
va_start(args, fmt);
- i=vsprintf(buf,fmt,args);
+ i=vsnprintf(buf, INT_MAX, fmt, args);
va_end(args);
return i;
}
+EXPORT_SYMBOL(sprintf);
/*
* Local variables:
diff --git a/xen/common/xmalloc.c b/xen/common/xmalloc.c
index 4d74baee70..24e4b6b2fe 100644
--- a/xen/common/xmalloc.c
+++ b/xen/common/xmalloc.c
@@ -30,7 +30,7 @@
#include <xen/config.h>
#include <xen/mm.h>
#include <xen/spinlock.h>
-#include <xen/ac_timer.h>
+#include <xen/timer.h>
#include <xen/cache.h>
#include <xen/prefetch.h>
diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
index 782ec814bf..e36285b7ca 100644
--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -311,7 +311,7 @@ static void serial_rx(char c, struct cpu_user_regs *regs)
long guest_console_write(char *buffer, int count)
{
- char kbuf[128];
+ char kbuf[128], *kptr;
int kcount;
while ( count > 0 )
@@ -334,6 +334,9 @@ long guest_console_write(char *buffer, int count)
serial_puts(sercon_handle, kbuf);
+ for ( kptr = kbuf; *kptr != '\0'; kptr++ )
+ putchar_console(*kptr);
+
buffer += kcount;
count -= kcount;
}
diff --git a/xen/drivers/char/ns16550.c b/xen/drivers/char/ns16550.c
index 8ef838d3db..9476f473a2 100644
--- a/xen/drivers/char/ns16550.c
+++ b/xen/drivers/char/ns16550.c
@@ -33,7 +33,7 @@ static struct ns16550 {
/* UART with IRQ line: interrupt-driven I/O. */
struct irqaction irqaction;
/* UART with no IRQ line: periodically-polled I/O. */
- struct ac_timer timer;
+ struct timer timer;
unsigned int timeout_ms;
} ns16550_com[2] = { { 0 } };
@@ -138,7 +138,7 @@ static void ns16550_poll(void *data)
if ( ns_read_reg(uart, LSR) & LSR_THRE )
serial_tx_interrupt(port, regs);
- set_ac_timer(&uart->timer, NOW() + MILLISECS(uart->timeout_ms));
+ set_timer(&uart->timer, NOW() + MILLISECS(uart->timeout_ms));
}
static int ns16550_tx_empty(struct serial_port *port)
@@ -214,8 +214,8 @@ static void ns16550_init_postirq(struct serial_port *port)
bits = uart->data_bits + uart->stop_bits + !!uart->parity;
uart->timeout_ms = max_t(
unsigned int, 1, (bits * port->tx_fifo_size * 1000) / uart->baud);
- init_ac_timer(&uart->timer, ns16550_poll, port, 0);
- set_ac_timer(&uart->timer, NOW() + MILLISECS(uart->timeout_ms));
+ init_timer(&uart->timer, ns16550_poll, port, 0);
+ set_timer(&uart->timer, NOW() + MILLISECS(uart->timeout_ms));
}
else
{
diff --git a/xen/include/asm-ia64/config.h b/xen/include/asm-ia64/config.h
index ed89d2996a..74ee48ac0f 100644
--- a/xen/include/asm-ia64/config.h
+++ b/xen/include/asm-ia64/config.h
@@ -141,10 +141,6 @@ struct page;
#undef alloc_task_struct
#define get_thread_info(v) alloc_thread_info(v)
-// initial task has a different name in Xen
-//#define idle0_task init_task
-#define idle0_vcpu init_task
-
// avoid redefining task_t in asm/thread_info.h
#define task_t struct domain
@@ -160,7 +156,7 @@ struct page;
#define platform_outl __ia64_outl
// FIXME: This just overrides a use in a typedef (not allowed in ia64,
-// or maybe just in older gcc's?) used in ac_timer.c but should be OK
+// or maybe just in older gcc's?) used in timer.c but should be OK
// (and indeed is probably required!) elsewhere
#undef __cacheline_aligned
#undef ____cacheline_aligned
diff --git a/xen/include/asm-ia64/vmx.h b/xen/include/asm-ia64/vmx.h
index f682b5acd1..910cc9b035 100644
--- a/xen/include/asm-ia64/vmx.h
+++ b/xen/include/asm-ia64/vmx.h
@@ -23,7 +23,7 @@
#define _ASM_IA64_VT_H
#define RR7_SWITCH_SHIFT 12 /* 4k enough */
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
extern void identify_vmx_feature(void);
extern unsigned int vmx_enabled;
diff --git a/xen/include/asm-ia64/vtm.h b/xen/include/asm-ia64/vtm.h
index 92564b9b6c..a89d9c2b21 100644
--- a/xen/include/asm-ia64/vtm.h
+++ b/xen/include/asm-ia64/vtm.h
@@ -23,7 +23,7 @@
#ifndef _VTM_H_
#define _VTM_H_
-#include <xen/ac_timer.h>
+#include <xen/timer.h>
#include <xen/types.h>
#define MAX_JUMP_STEP (5000) /* 500ms, max jump step */
@@ -46,7 +46,7 @@ typedef struct vtime {
uint64_t cfg_max_jump; // max jump within one time suspendsion
uint64_t cfg_min_grun; // min guest running time since last jump
// uint64_t latest_read_itc; // latest guest read ITC
- struct ac_timer vtm_timer;
+ struct timer vtm_timer;
// int triggered;
diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h
index 960e381f1f..d5d0e0acb0 100644
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -148,7 +148,8 @@ extern unsigned long _end; /* standard ELF symbol */
#define SH_LINEAR_PT_VIRT_END (SH_LINEAR_PT_VIRT_START + PML4_ENTRY_BYTES)
/* Slot 260: per-domain mappings. */
#define PERDOMAIN_VIRT_START (PML4_ADDR(260))
-#define PERDOMAIN_VIRT_END (PERDOMAIN_VIRT_START + PML4_ENTRY_BYTES)
+#define PERDOMAIN_VIRT_END (PERDOMAIN_VIRT_START + (PERDOMAIN_MBYTES<<20))
+#define PERDOMAIN_MBYTES ((unsigned long)GDT_LDT_MBYTES)
/* Slot 261: machine-to-phys conversion table (16GB). */
#define RDWR_MPT_VIRT_START (PML4_ADDR(261))
#define RDWR_MPT_VIRT_END (RDWR_MPT_VIRT_START + (16UL<<30))
@@ -195,8 +196,7 @@ extern unsigned long _end; /* standard ELF symbol */
* ------ ------
* I/O remapping area ( 4MB)
* Direct-map (1:1) area [Xen code/data/heap] (12MB)
- * map_domain_page cache ( 4MB)
- * Per-domain mappings ( 4MB)
+ * Per-domain mappings (inc. 4MB map_domain_page cache) ( 4MB)
* Shadow linear pagetable ( 4MB) ( 8MB)
* Guest linear pagetable ( 4MB) ( 8MB)
* Machine-to-physical translation table [writable] ( 4MB) (16MB)
@@ -209,7 +209,7 @@ extern unsigned long _end; /* standard ELF symbol */
#define IOREMAP_MBYTES 4
#define DIRECTMAP_MBYTES 12
#define MAPCACHE_MBYTES 4
-#define PERDOMAIN_MBYTES 4
+#define PERDOMAIN_MBYTES 8
#ifdef CONFIG_X86_PAE
# define LINEARPT_MBYTES 8
@@ -227,7 +227,7 @@ extern unsigned long _end; /* standard ELF symbol */
#define DIRECTMAP_VIRT_START (DIRECTMAP_VIRT_END - (DIRECTMAP_MBYTES<<20))
#define MAPCACHE_VIRT_END DIRECTMAP_VIRT_START
#define MAPCACHE_VIRT_START (MAPCACHE_VIRT_END - (MAPCACHE_MBYTES<<20))
-#define PERDOMAIN_VIRT_END MAPCACHE_VIRT_START
+#define PERDOMAIN_VIRT_END DIRECTMAP_VIRT_START
#define PERDOMAIN_VIRT_START (PERDOMAIN_VIRT_END - (PERDOMAIN_MBYTES<<20))
#define SH_LINEAR_PT_VIRT_END PERDOMAIN_VIRT_START
#define SH_LINEAR_PT_VIRT_START (SH_LINEAR_PT_VIRT_END - (LINEARPT_MBYTES<<20))
@@ -248,12 +248,10 @@ extern unsigned long _end; /* standard ELF symbol */
#ifdef CONFIG_X86_PAE
/* Hypervisor owns top 168MB of virtual address space. */
-# define __HYPERVISOR_VIRT_START 0xF5800000
-# define HYPERVISOR_VIRT_START (0xF5800000UL)
+#define HYPERVISOR_VIRT_START mk_unsigned_long(0xF5800000)
#else
/* Hypervisor owns top 64MB of virtual address space. */
-# define __HYPERVISOR_VIRT_START 0xFC000000
-# define HYPERVISOR_VIRT_START (0xFC000000UL)
+#define HYPERVISOR_VIRT_START mk_unsigned_long(0xFC000000)
#endif
#define L2_PAGETABLE_FIRST_XEN_SLOT \
@@ -284,14 +282,21 @@ extern unsigned long _end; /* standard ELF symbol */
extern unsigned long xenheap_phys_end; /* user-configurable */
#endif
-#define GDT_VIRT_START(ed) \
- (PERDOMAIN_VIRT_START + ((ed)->vcpu_id << PDPT_VCPU_VA_SHIFT))
-#define LDT_VIRT_START(ed) \
- (GDT_VIRT_START(ed) + (64*1024))
-
-#define PDPT_VCPU_SHIFT 5
-#define PDPT_VCPU_VA_SHIFT (PDPT_VCPU_SHIFT + PAGE_SHIFT)
-#define PDPT_L1_ENTRIES (MAX_VIRT_CPUS << PDPT_VCPU_SHIFT)
+/* GDT/LDT shadow mapping area. The first per-domain-mapping sub-area. */
+#define GDT_LDT_VCPU_SHIFT 5
+#define GDT_LDT_VCPU_VA_SHIFT (GDT_LDT_VCPU_SHIFT + PAGE_SHIFT)
+#define GDT_LDT_MBYTES (MAX_VIRT_CPUS >> (20-GDT_LDT_VCPU_VA_SHIFT))
+#define GDT_LDT_VIRT_START PERDOMAIN_VIRT_START
+#define GDT_LDT_VIRT_END (GDT_LDT_VIRT_START + (GDT_LDT_MBYTES << 20))
+
+/* The address of a particular VCPU's GDT or LDT. */
+#define GDT_VIRT_START(v) \
+ (PERDOMAIN_VIRT_START + ((v)->vcpu_id << GDT_LDT_VCPU_VA_SHIFT))
+#define LDT_VIRT_START(v) \
+ (GDT_VIRT_START(v) + (64*1024))
+
+#define PDPT_L1_ENTRIES \
+ ((PERDOMAIN_VIRT_END - PERDOMAIN_VIRT_START) >> PAGE_SHIFT)
#define PDPT_L2_ENTRIES \
((PDPT_L1_ENTRIES + (1 << PAGETABLE_ORDER) - 1) >> PAGETABLE_ORDER)
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 513b7d8aff..6438757d67 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -13,6 +13,44 @@ struct trap_bounce {
unsigned long eip;
};
+#define MAPHASH_ENTRIES 8
+#define MAPHASH_HASHFN(pfn) ((pfn) & (MAPHASH_ENTRIES-1))
+#define MAPHASHENT_NOTINUSE ((u16)~0U)
+struct vcpu_maphash {
+ struct vcpu_maphash_entry {
+ unsigned long pfn;
+ uint16_t idx;
+ uint16_t refcnt;
+ } hash[MAPHASH_ENTRIES];
+} __cacheline_aligned;
+
+#define MAPCACHE_ORDER 10
+#define MAPCACHE_ENTRIES (1 << MAPCACHE_ORDER)
+struct mapcache {
+ /* The PTEs that provide the mappings, and a cursor into the array. */
+ l1_pgentry_t *l1tab;
+ unsigned int cursor;
+
+ /* Protects map_domain_page(). */
+ spinlock_t lock;
+
+ /* Garbage mappings are flushed from TLBs in batches called 'epochs'. */
+ unsigned int epoch, shadow_epoch[MAX_VIRT_CPUS];
+ u32 tlbflush_timestamp;
+
+ /* Which mappings are in use, and which are garbage to reap next epoch? */
+ unsigned long inuse[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
+ unsigned long garbage[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
+
+ /* Lock-free per-VCPU hash of recently-used mappings. */
+ struct vcpu_maphash vcpu_maphash[MAX_VIRT_CPUS];
+};
+
+extern void mapcache_init(struct domain *);
+
+/* x86/64: toggle guest between kernel and user modes. */
+extern void toggle_guest_mode(struct vcpu *);
+
struct arch_domain
{
l1_pgentry_t *mm_perdomain_pt;
@@ -21,6 +59,11 @@ struct arch_domain
l3_pgentry_t *mm_perdomain_l3;
#endif
+#ifdef CONFIG_X86_32
+ /* map_domain_page() mapping cache. */
+ struct mapcache mapcache;
+#endif
+
/* Writable pagetables. */
struct ptwr_info ptwr[2];
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index bbc01dea81..2e0937ea2d 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -309,16 +309,13 @@ struct ptwr_info {
unsigned long l1va;
/* Copy of the p.t. page, taken before guest is given write access. */
l1_pgentry_t *page;
- /* A temporary Xen mapping of the actual p.t. page. */
- l1_pgentry_t *pl1e;
/* Index in L2 page table where this L1 p.t. is always hooked. */
unsigned int l2_idx; /* NB. Only used for PTWR_PT_ACTIVE. */
/* Info about last ptwr update batch. */
unsigned int prev_nr_updates;
- /* Exec domain which created writable mapping. */
+ /* VCPU which created writable mapping. */
struct vcpu *vcpu;
- /* EIP of the address which took the original write fault
- used for stats collection only */
+ /* EIP of the original write fault (stats collection only). */
unsigned long eip;
};
@@ -336,11 +333,13 @@ int ptwr_do_page_fault(struct domain *, unsigned long,
int revalidate_l1(struct domain *, l1_pgentry_t *, l1_pgentry_t *);
void cleanup_writable_pagetable(struct domain *d);
-#define sync_pagetable_state(d) \
- do { \
- LOCK_BIGLOCK(d); \
- cleanup_writable_pagetable(d); \
- UNLOCK_BIGLOCK(d); \
+#define sync_pagetable_state(d) \
+ do { \
+ LOCK_BIGLOCK(d); \
+ /* Avoid racing with ptwr_destroy(). */ \
+ if ( !test_bit(_DOMF_dying, &(d)->domain_flags) ) \
+ cleanup_writable_pagetable(d); \
+ UNLOCK_BIGLOCK(d); \
} while ( 0 )
int audit_adjust_pgtables(struct domain *d, int dir, int noisy);
diff --git a/xen/include/asm-x86/nmi.h b/xen/include/asm-x86/nmi.h
index 1529bbb8c7..d79b823ee2 100644
--- a/xen/include/asm-x86/nmi.h
+++ b/xen/include/asm-x86/nmi.h
@@ -2,6 +2,8 @@
#ifndef ASM_NMI_H
#define ASM_NMI_H
+#include <public/nmi.h>
+
struct cpu_user_regs;
typedef int (*nmi_callback_t)(struct cpu_user_regs *regs, int cpu);
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index 5e0d60d5be..18748e3367 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -123,6 +123,7 @@
#define TBF_EXCEPTION_ERRCODE 2
#define TBF_INTERRUPT 8
#define TBF_FAILSAFE 16
+#define TBF_SLOW_IRET 32
/* 'arch_vcpu' flags values */
#define _TF_kernel_mode 0
@@ -190,7 +191,7 @@ extern void dodgy_tsc(void);
#ifdef CONFIG_X86_HT
extern void detect_ht(struct cpuinfo_x86 *c);
#else
-static inline void detect_ht(struct cpuinfo_x86 *c) {}
+static always_inline void detect_ht(struct cpuinfo_x86 *c) {}
#endif
/*
@@ -209,7 +210,7 @@ static inline void detect_ht(struct cpuinfo_x86 *c) {}
/*
* CPUID functions returning a single datum
*/
-static inline unsigned int cpuid_eax(unsigned int op)
+static always_inline unsigned int cpuid_eax(unsigned int op)
{
unsigned int eax;
@@ -219,7 +220,7 @@ static inline unsigned int cpuid_eax(unsigned int op)
: "bx", "cx", "dx");
return eax;
}
-static inline unsigned int cpuid_ebx(unsigned int op)
+static always_inline unsigned int cpuid_ebx(unsigned int op)
{
unsigned int eax, ebx;
@@ -229,7 +230,7 @@ static inline unsigned int cpuid_ebx(unsigned int op)
: "cx", "dx" );
return ebx;
}
-static inline unsigned int cpuid_ecx(unsigned int op)
+static always_inline unsigned int cpuid_ecx(unsigned int op)
{
unsigned int eax, ecx;
@@ -239,7 +240,7 @@ static inline unsigned int cpuid_ecx(unsigned int op)
: "bx", "dx" );
return ecx;
}
-static inline unsigned int cpuid_edx(unsigned int op)
+static always_inline unsigned int cpuid_edx(unsigned int op)
{
unsigned int eax, edx;
@@ -281,7 +282,7 @@ static inline unsigned int cpuid_edx(unsigned int op)
*/
extern unsigned long mmu_cr4_features;
-static inline void set_in_cr4 (unsigned long mask)
+static always_inline void set_in_cr4 (unsigned long mask)
{
unsigned long dummy;
mmu_cr4_features |= mask;
@@ -292,7 +293,7 @@ static inline void set_in_cr4 (unsigned long mask)
: "=&r" (dummy) : "irg" (mask) );
}
-static inline void clear_in_cr4 (unsigned long mask)
+static always_inline void clear_in_cr4 (unsigned long mask)
{
unsigned long dummy;
mmu_cr4_features &= ~mask;
@@ -334,7 +335,7 @@ static inline void clear_in_cr4 (unsigned long mask)
outb((data), 0x23); \
} while (0)
-static inline void __monitor(const void *eax, unsigned long ecx,
+static always_inline void __monitor(const void *eax, unsigned long ecx,
unsigned long edx)
{
/* "monitor %eax,%ecx,%edx;" */
@@ -343,7 +344,7 @@ static inline void __monitor(const void *eax, unsigned long ecx,
: :"a" (eax), "c" (ecx), "d"(edx));
}
-static inline void __mwait(unsigned long eax, unsigned long ecx)
+static always_inline void __mwait(unsigned long eax, unsigned long ecx)
{
/* "mwait %eax,%ecx;" */
asm volatile(
@@ -460,7 +461,7 @@ struct extended_sigtable {
};
/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static inline void rep_nop(void)
+static always_inline void rep_nop(void)
{
__asm__ __volatile__ ( "rep;nop" : : : "memory" );
}
@@ -471,7 +472,7 @@ static inline void rep_nop(void)
#ifdef CONFIG_MPENTIUMIII
#define ARCH_HAS_PREFETCH
-extern inline void prefetch(const void *x)
+extern always_inline void prefetch(const void *x)
{
__asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x));
}
@@ -482,12 +483,12 @@ extern inline void prefetch(const void *x)
#define ARCH_HAS_PREFETCHW
#define ARCH_HAS_SPINLOCK_PREFETCH
-extern inline void prefetch(const void *x)
+extern always_inline void prefetch(const void *x)
{
__asm__ __volatile__ ("prefetch (%0)" : : "r"(x));
}
-extern inline void prefetchw(const void *x)
+extern always_inline void prefetchw(const void *x)
{
__asm__ __volatile__ ("prefetchw (%0)" : : "r"(x));
}
diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
index ae7c30079b..2e2769939e 100644
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -341,10 +341,10 @@ extern int shadow_status_noswap;
#if SHADOW_VERBOSE_DEBUG
#define SH_LOG(_f, _a...) \
printk("DOM%uP%u: SH_LOG(%d): " _f "\n", \
- current->domain->domain_id , current->processor, __LINE__ , ## _a )
+ current->domain->domain_id , smp_processor_id(), __LINE__ , ## _a )
#define SH_VLOG(_f, _a...) \
printk("DOM%uP%u: SH_VLOG(%d): " _f "\n", \
- current->domain->domain_id, current->processor, __LINE__ , ## _a )
+ current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
#else
#define SH_LOG(_f, _a...) ((void)0)
#define SH_VLOG(_f, _a...) ((void)0)
@@ -353,7 +353,7 @@ extern int shadow_status_noswap;
#if SHADOW_VVERBOSE_DEBUG
#define SH_VVLOG(_f, _a...) \
printk("DOM%uP%u: SH_VVLOG(%d): " _f "\n", \
- current->domain->domain_id, current->processor, __LINE__ , ## _a )
+ current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
#else
#define SH_VVLOG(_f, _a...) ((void)0)
#endif
@@ -361,7 +361,7 @@ extern int shadow_status_noswap;
#if SHADOW_VVVERBOSE_DEBUG
#define SH_VVVLOG(_f, _a...) \
printk("DOM%uP%u: SH_VVVLOG(%d): " _f "\n", \
- current->domain->domain_id, current->processor, __LINE__ , ## _a )
+ current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
#else
#define SH_VVVLOG(_f, _a...) ((void)0)
#endif
@@ -369,7 +369,7 @@ extern int shadow_status_noswap;
#if FULLSHADOW_DEBUG
#define FSH_LOG(_f, _a...) \
printk("DOM%uP%u: FSH_LOG(%d): " _f "\n", \
- current->domain->domain_id, current->processor, __LINE__ , ## _a )
+ current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
#else
#define FSH_LOG(_f, _a...) ((void)0)
#endif
@@ -591,7 +591,7 @@ update_hl2e(struct vcpu *v, unsigned long va)
if ( need_flush )
{
perfc_incrc(update_hl2e_invlpg);
- flush_tlb_one_mask(v->domain->cpumask,
+ flush_tlb_one_mask(v->domain->domain_dirty_cpumask,
&linear_pg_table[l1_linear_offset(va)]);
}
}
diff --git a/xen/include/asm-x86/vmx.h b/xen/include/asm-x86/vmx.h
index 771ac68ef4..05852a88be 100644
--- a/xen/include/asm-x86/vmx.h
+++ b/xen/include/asm-x86/vmx.h
@@ -26,7 +26,7 @@
#include <asm/vmx_vmcs.h>
#include <asm/i387.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
extern int hvm_enabled;
@@ -38,7 +38,6 @@ extern void pic_irq_request(int *interrupt_request, int level);
extern void arch_vmx_do_launch(struct vcpu *);
extern void arch_vmx_do_resume(struct vcpu *);
-extern void arch_vmx_do_relaunch(struct vcpu *);
extern unsigned int cpu_rev;
@@ -506,7 +505,7 @@ static inline int vmx_reflect_exception(struct vcpu *v)
static inline unsigned int vmx_get_vcpu_nr(struct domain *d)
{
- return d->arch.vmx_platform.nr_vcpu;
+ return d->arch.vmx_platform.nr_vcpus;
}
static inline shared_iopage_t *get_sp(struct domain *d)
diff --git a/xen/include/asm-x86/vmx_intercept.h b/xen/include/asm-x86/vmx_intercept.h
index d832ecf12c..11487ebe1b 100644
--- a/xen/include/asm-x86/vmx_intercept.h
+++ b/xen/include/asm-x86/vmx_intercept.h
@@ -6,7 +6,7 @@
#include <xen/lib.h>
#include <xen/time.h>
#include <xen/errno.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#define MAX_IO_HANDLER 8
diff --git a/xen/include/asm-x86/vmx_platform.h b/xen/include/asm-x86/vmx_platform.h
index 636fbd9942..385f35d0c2 100644
--- a/xen/include/asm-x86/vmx_platform.h
+++ b/xen/include/asm-x86/vmx_platform.h
@@ -33,10 +33,10 @@
(((size_reg) << 24) | ((index) << 16) | ((seg) << 8) | (flag))
#define operand_size(operand) \
- ((operand >> 24) & 0xFF)
+ ((operand >> 24) & 0xFF)
#define operand_index(operand) \
- ((operand >> 16) & 0xFF)
+ ((operand >> 16) & 0xFF)
/* for instruction.operand[].size */
#define BYTE 1
@@ -81,13 +81,13 @@ struct instruction {
struct vmx_platform {
unsigned long shared_page_va;
- unsigned int nr_vcpu;
- unsigned int lapic_enable;
+ unsigned int nr_vcpus;
+ unsigned int apic_enabled;
struct vmx_virpit vmx_pit;
struct vmx_io_handler vmx_io_handler;
struct vmx_virpic vmx_pic;
- struct vmx_vioapic vmx_vioapic;
+ struct vmx_vioapic vmx_vioapic;
unsigned char round_info[256];
spinlock_t round_robin_lock;
int interrupt_request;
diff --git a/xen/include/asm-x86/vmx_vlapic.h b/xen/include/asm-x86/vmx_vlapic.h
index ef33d42dab..2527800b60 100644
--- a/xen/include/asm-x86/vmx_vlapic.h
+++ b/xen/include/asm-x86/vmx_vlapic.h
@@ -21,7 +21,7 @@
#define VMX_VLAPIC_H
#include <asm/msr.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#if defined(__i386__) || defined(__x86_64__)
static inline int __fls(uint32_t word)
@@ -187,7 +187,7 @@ struct vlapic
uint32_t timer_current;
uint32_t timer_divconf;
uint32_t timer_divide_counter;
- struct ac_timer vlapic_timer;
+ struct timer vlapic_timer;
int intr_pending_count[MAX_VECTOR];
s_time_t timer_current_update;
uint32_t icr_high;
@@ -216,7 +216,7 @@ static inline int vlapic_set_irq(struct vlapic *t, uint8_t vec, uint8_t trig)
static inline int vlapic_timer_active(struct vlapic *vlapic)
{
- return active_ac_timer(&(vlapic->vlapic_timer));
+ return active_timer(&(vlapic->vlapic_timer));
}
int vlapic_find_highest_irr(struct vlapic *vlapic);
diff --git a/xen/include/asm-x86/vmx_vmcs.h b/xen/include/asm-x86/vmx_vmcs.h
index 53bde6dc5a..81b9fb6009 100644
--- a/xen/include/asm-x86/vmx_vmcs.h
+++ b/xen/include/asm-x86/vmx_vmcs.h
@@ -23,7 +23,7 @@
#include <asm/vmx_cpu.h>
#include <asm/vmx_platform.h>
#include <asm/vmx_vlapic.h>
-#include <public/vmx_assist.h>
+#include <public/hvm/vmx_assist.h>
extern int start_vmx(void);
extern void stop_vmx(void);
@@ -86,7 +86,8 @@ struct mmio_op {
#define PC_DEBUG_PORT 0x80
struct arch_vmx_struct {
- struct vmcs_struct *vmcs; /* VMCS pointer in virtual */
+ struct vmcs_struct *vmcs; /* VMCS pointer in virtual. */
+ unsigned int launch_cpu; /* VMCS is valid on this CPU. */
unsigned long flags; /* VMCS flags */
unsigned long cpu_cr0; /* copy of guest CR0 */
unsigned long cpu_shadow_cr0; /* copy of guest read shadow CR0 */
@@ -99,7 +100,7 @@ struct arch_vmx_struct {
void *io_bitmap_a, *io_bitmap_b;
struct vlapic *vlapic;
u64 tsc_offset;
- struct ac_timer hlt_timer; /* hlt ins emulation wakeup timer */
+ struct timer hlt_timer; /* hlt ins emulation wakeup timer */
};
#define vmx_schedule_tail(next) \
diff --git a/xen/include/asm-x86/vmx_vpit.h b/xen/include/asm-x86/vmx_vpit.h
index 3fc86a5adf..b47e6d6512 100644
--- a/xen/include/asm-x86/vmx_vpit.h
+++ b/xen/include/asm-x86/vmx_vpit.h
@@ -6,7 +6,7 @@
#include <xen/lib.h>
#include <xen/time.h>
#include <xen/errno.h>
-#include <xen/ac_timer.h>
+#include <xen/timer.h>
#include <asm/vmx_vmcs.h>
#include <asm/vmx_vpic.h>
@@ -23,7 +23,7 @@ struct vmx_virpit {
u64 inject_point; /* the time inject virt intr */
u64 shift; /* save the value of offset - drift */
s_time_t scheduled; /* scheduled timer interrupt */
- struct ac_timer pit_timer; /* periodic timer for mode 2*/
+ struct timer pit_timer; /* periodic timer for mode 2*/
unsigned int channel; /* the pit channel, counter 0~2 */
unsigned int pending_intr_nr; /* the couner for pending timer interrupts */
u32 period; /* pit frequency in ns */
diff --git a/xen/include/asm-x86/x86_emulate.h b/xen/include/asm-x86/x86_emulate.h
index 19482c1538..d87d33f06c 100644
--- a/xen/include/asm-x86/x86_emulate.h
+++ b/xen/include/asm-x86/x86_emulate.h
@@ -18,10 +18,11 @@
* special treatment or emulation (*_emulated).
*
* The emulator assumes that an instruction accesses only one 'emulated memory'
- * location, and that this is one of its data operands. Instruction fetches and
+ * location, that this location is the given linear faulting address (cr2), and
+ * that this is one of the instruction's data operands. Instruction fetches and
* stack operations are assumed never to access emulated memory. The emulator
* automatically deduces which operand of a string-move operation is accessing
- * emulated memory, and requires that the other operand accesses normal memory.
+ * emulated memory, and assumes that the other operand accesses normal memory.
*
* NOTES:
* 1. The emulator isn't very smart about emulated vs. standard memory.
@@ -36,6 +37,7 @@
* then immediately bail.
* 3. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only
* cmpxchg8b_emulated need support 8-byte accesses.
+ * 4. The emulator cannot handle 64-bit mode emulation on an x86/32 system.
*/
/* Access completed successfully: continue emulation as normal. */
#define X86EMUL_CONTINUE 0
@@ -141,22 +143,27 @@ x86_emulate_write_std(
struct cpu_user_regs;
-/* Current execution mode, passed to the emulator. */
-#define X86EMUL_MODE_REAL 0
-#define X86EMUL_MODE_PROT16 2
-#define X86EMUL_MODE_PROT32 4
-#define X86EMUL_MODE_PROT64 8
+/* Execution mode, passed to the emulator. */
+#define X86EMUL_MODE_REAL 0 /* Real mode. */
+#define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */
+#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */
+#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */
+
+/* Host execution mode. */
+#if defined(__i386__)
+#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
+#elif defined(__x86_64__)
+#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
+#endif
/*
* x86_emulate_memop: Emulate an instruction that faulted attempting to
* read/write a 'special' memory area.
* @regs: Register state at time of fault.
- * @cr2: Linear faulting address.
+ * @cr2: Linear faulting address within an emulated/special memory area.
* @ops: Interface to access special memory.
- * @mode: Current execution mode, represented by the default size of memory
- * addresses, in bytes. Valid values are 2, 4 and 8 (x86/64 only).
- * Alternatively use the appropriate X86EMUL_MODE value (which also
- * includes a value for emulating real mode).
+ * @mode: Emulated execution mode, represented by an X86EMUL_MODE value.
+ * Returns -1 on failure, 0 on success.
*/
extern int
x86_emulate_memop(
diff --git a/xen/include/public/arch-ia64.h b/xen/include/public/arch-ia64.h
index 7eaf856174..a1013e3112 100644
--- a/xen/include/public/arch-ia64.h
+++ b/xen/include/public/arch-ia64.h
@@ -9,7 +9,7 @@
/* Maximum number of virtual CPUs in multi-processor guests. */
/* WARNING: before changing this, check that shared_info fits on a page */
-#define MAX_VIRT_CPUS 1
+#define MAX_VIRT_CPUS 4
#ifndef __ASSEMBLY__
diff --git a/xen/include/public/arch-x86_32.h b/xen/include/public/arch-x86_32.h
index 9d7b69f694..534acb6b78 100644
--- a/xen/include/public/arch-x86_32.h
+++ b/xen/include/public/arch-x86_32.h
@@ -49,10 +49,15 @@
* machine->physical mapping table starts at this address, read-only.
*/
#ifdef CONFIG_X86_PAE
-# define HYPERVISOR_VIRT_START (0xF5800000UL)
+#define __HYPERVISOR_VIRT_START 0xF5800000
#else
-# define HYPERVISOR_VIRT_START (0xFC000000UL)
+#define __HYPERVISOR_VIRT_START 0xFC000000
#endif
+
+#ifndef HYPERVISOR_VIRT_START
+#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
+#endif
+
#ifndef machine_to_phys_mapping
#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
#endif
@@ -130,6 +135,7 @@ typedef struct arch_shared_info {
unsigned long max_pfn; /* max pfn that appears in table */
/* Frame containing list of mfns containing list of mfns containing p2m. */
unsigned long pfn_to_mfn_frame_list_list;
+ unsigned long nmi_reason;
} arch_shared_info_t;
typedef struct {
@@ -137,7 +143,7 @@ typedef struct {
unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */
} arch_vcpu_info_t;
-#endif
+#endif /* !__ASSEMBLY__ */
#endif
diff --git a/xen/include/public/arch-x86_64.h b/xen/include/public/arch-x86_64.h
index 5e3b6a7671..c8c2bd2809 100644
--- a/xen/include/public/arch-x86_64.h
+++ b/xen/include/public/arch-x86_64.h
@@ -59,9 +59,12 @@
/* And the trap vector is... */
#define TRAP_INSTR "syscall"
+#define __HYPERVISOR_VIRT_START 0xFFFF800000000000
+#define __HYPERVISOR_VIRT_END 0xFFFF880000000000
+
#ifndef HYPERVISOR_VIRT_START
-#define HYPERVISOR_VIRT_START (0xFFFF800000000000UL)
-#define HYPERVISOR_VIRT_END (0xFFFF880000000000UL)
+#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
+#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END)
#endif
/* Maximum number of virtual CPUs in multi-processor guests. */
@@ -85,11 +88,20 @@
#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */
/*
- * int HYPERVISOR_switch_to_user(void)
+ * int HYPERVISOR_iret(void)
* All arguments are on the kernel stack, in the following format.
* Never returns if successful. Current kernel context is lost.
+ * The saved CS is mapped as follows:
+ * RING0 -> RING3 kernel mode.
+ * RING1 -> RING3 kernel mode.
+ * RING2 -> RING3 kernel mode.
+ * RING3 -> RING3 user mode.
+ * However RING0 indicates that the guest kernel should return to iteself
+ * directly with
+ * orb $3,1*8(%rsp)
+ * iretq
* If flags contains VGCF_IN_SYSCALL:
- * Restore RAX, RIP, RFLAGS, RSP.
+ * Restore RAX, RIP, RFLAGS, RSP.
* Discard R11, RCX, CS, SS.
* Otherwise:
* Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP.
@@ -97,10 +109,19 @@
*/
/* Guest exited in SYSCALL context? Return to guest with SYSRET? */
#define VGCF_IN_SYSCALL (1<<8)
+struct iret_context {
+ /* Top of stack (%rsp at point of hypercall). */
+ uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
+ /* Bottom of iret stack frame. */
+};
+/*
+ * For compatibility with HYPERVISOR_switch_to_user which is the old
+ * name for HYPERVISOR_iret.
+ */
struct switch_to_user {
/* Top of stack (%rsp at point of hypercall). */
uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
- /* Bottom of switch_to_user stack frame. */
+ /* Bottom of iret stack frame. */
};
/*
@@ -199,6 +220,7 @@ typedef struct arch_shared_info {
unsigned long max_pfn; /* max pfn that appears in table */
/* Frame containing list of mfns containing list of mfns containing p2m. */
unsigned long pfn_to_mfn_frame_list_list;
+ unsigned long nmi_reason;
} arch_shared_info_t;
typedef struct {
diff --git a/xen/include/public/dom0_ops.h b/xen/include/public/dom0_ops.h
index a2c66be1f4..9e59e771b1 100644
--- a/xen/include/public/dom0_ops.h
+++ b/xen/include/public/dom0_ops.h
@@ -94,14 +94,14 @@ typedef struct {
xen_domain_handle_t handle;
} dom0_getdomaininfo_t;
-#define DOM0_SETDOMAININFO 13
+#define DOM0_SETVCPUCONTEXT 13
typedef struct {
/* IN variables. */
domid_t domain;
uint32_t vcpu;
/* IN/OUT parameters */
vcpu_guest_context_t *ctxt;
-} dom0_setdomaininfo_t;
+} dom0_setvcpucontext_t;
#define DOM0_MSR 15
typedef struct {
@@ -163,13 +163,13 @@ typedef struct {
/*
* Set which physical cpus a vcpu can execute on.
*/
-#define DOM0_PINCPUDOMAIN 20
+#define DOM0_SETVCPUAFFINITY 20
typedef struct {
/* IN variables. */
domid_t domain;
uint32_t vcpu;
cpumap_t cpumap;
-} dom0_pincpudomain_t;
+} dom0_setvcpuaffinity_t;
/* Get trace buffers machine base address */
#define DOM0_TBUFCONTROL 21
@@ -436,13 +436,13 @@ typedef struct {
dom0_getmemlist_t getmemlist;
dom0_schedctl_t schedctl;
dom0_adjustdom_t adjustdom;
- dom0_setdomaininfo_t setdomaininfo;
+ dom0_setvcpucontext_t setvcpucontext;
dom0_getdomaininfo_t getdomaininfo;
dom0_getpageframeinfo_t getpageframeinfo;
dom0_msr_t msr;
dom0_settime_t settime;
dom0_readconsole_t readconsole;
- dom0_pincpudomain_t pincpudomain;
+ dom0_setvcpuaffinity_t setvcpuaffinity;
dom0_tbufcontrol_t tbufcontrol;
dom0_physinfo_t physinfo;
dom0_sched_id_t sched_id;
diff --git a/xen/include/public/hvm/hvm_info_table.h b/xen/include/public/hvm/hvm_info_table.h
new file mode 100644
index 0000000000..a576eb7c50
--- /dev/null
+++ b/xen/include/public/hvm/hvm_info_table.h
@@ -0,0 +1,24 @@
+/******************************************************************************
+ * hvm/hvm_info_table.h
+ *
+ * HVM parameter and information table, written into guest memory map.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__
+#define __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__
+
+#define HVM_INFO_PFN 0x09F
+#define HVM_INFO_OFFSET 0x800
+#define HVM_INFO_PADDR ((HVM_INFO_PFN << 12) + HVM_INFO_OFFSET)
+
+struct hvm_info_table {
+ char signature[8]; /* "HVM INFO" */
+ uint32_t length;
+ uint8_t checksum;
+ uint8_t acpi_enabled;
+ uint8_t apic_enabled;
+ uint8_t pad[1];
+ uint32_t nr_vcpus;
+};
+
+#endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */
diff --git a/xen/include/public/io/ioreq.h b/xen/include/public/hvm/ioreq.h
index 9b462f242f..308ac39c9a 100644
--- a/xen/include/public/io/ioreq.h
+++ b/xen/include/public/hvm/ioreq.h
@@ -38,21 +38,21 @@
/*
* VMExit dispatcher should cooperate with instruction decoder to
* prepare this structure and notify service OS and DM by sending
- * virq
+ * virq
*/
typedef struct {
- uint64_t addr; /* physical address */
- uint64_t size; /* size in bytes */
- uint64_t count; /* for rep prefixes */
+ uint64_t addr; /* physical address */
+ uint64_t size; /* size in bytes */
+ uint64_t count; /* for rep prefixes */
union {
- uint64_t data; /* data */
- void *pdata; /* pointer to data */
+ uint64_t data; /* data */
+ void *pdata; /* pointer to data */
} u;
uint8_t state:4;
- uint8_t pdata_valid:1; /* if 1, use pdata above */
- uint8_t dir:1; /* 1=read, 0=write */
+ uint8_t pdata_valid:1; /* if 1, use pdata above */
+ uint8_t dir:1; /* 1=read, 0=write */
uint8_t df:1;
- uint8_t type; /* I/O type */
+ uint8_t type; /* I/O type */
} ioreq_t;
#define MAX_VECTOR 256
@@ -61,16 +61,15 @@ typedef struct {
#define INTR_LEN_32 (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint32_t)))
typedef struct {
- uint16_t pic_elcr;
- uint16_t pic_irr;
- uint16_t pic_last_irr;
- uint16_t pic_clear_irr;
- int eport; /* Event channel port */
+ uint16_t pic_elcr;
+ uint16_t pic_irr;
+ uint16_t pic_last_irr;
+ uint16_t pic_clear_irr;
+ int eport; /* Event channel port */
} global_iodata_t;
typedef struct {
- ioreq_t vp_ioreq;
- unsigned long vp_intr[INTR_LEN];
+ ioreq_t vp_ioreq;
} vcpu_iodata_t;
typedef struct {
diff --git a/xen/include/public/vmx_assist.h b/xen/include/public/hvm/vmx_assist.h
index 4826628c2f..4826628c2f 100644
--- a/xen/include/public/vmx_assist.h
+++ b/xen/include/public/hvm/vmx_assist.h
diff --git a/xen/include/public/nmi.h b/xen/include/public/nmi.h
new file mode 100644
index 0000000000..0c0c67b920
--- /dev/null
+++ b/xen/include/public/nmi.h
@@ -0,0 +1,54 @@
+/******************************************************************************
+ * nmi.h
+ *
+ * NMI callback registration and reason codes.
+ *
+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_NMI_H__
+#define __XEN_PUBLIC_NMI_H__
+
+/*
+ * NMI reason codes:
+ * Currently these are x86-specific, stored in arch_shared_info.nmi_reason.
+ */
+ /* I/O-check error reported via ISA port 0x61, bit 6. */
+#define _XEN_NMIREASON_io_error 0
+#define XEN_NMIREASON_io_error (1UL << _XEN_NMIREASON_io_error)
+ /* Parity error reported via ISA port 0x61, bit 7. */
+#define _XEN_NMIREASON_parity_error 1
+#define XEN_NMIREASON_parity_error (1UL << _XEN_NMIREASON_parity_error)
+ /* Unknown hardware-generated NMI. */
+#define _XEN_NMIREASON_unknown 2
+#define XEN_NMIREASON_unknown (1UL << _XEN_NMIREASON_unknown)
+
+/*
+ * long nmi_op(unsigned int cmd, void *arg)
+ * NB. All ops return zero on success, else a negative error code.
+ */
+
+/*
+ * Register NMI callback for this (calling) VCPU. Currently this only makes
+ * sense for domain 0, vcpu 0. All other callers will be returned EINVAL.
+ * arg == address of callback function.
+ */
+#define XENNMI_register_callback 0
+
+/*
+ * Deregister NMI callback for this (calling) VCPU.
+ * arg == NULL.
+ */
+#define XENNMI_unregister_callback 1
+
+#endif /* __XEN_PUBLIC_NMI_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
index 3aa683c018..ca1d4d1d03 100644
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -53,12 +53,14 @@
#define __HYPERVISOR_grant_table_op 20
#define __HYPERVISOR_vm_assist 21
#define __HYPERVISOR_update_va_mapping_otherdomain 22
-#define __HYPERVISOR_switch_vm86 23 /* x86/32 only */
-#define __HYPERVISOR_switch_to_user 23 /* x86/64 only */
+#define __HYPERVISOR_iret 23 /* x86 only */
+#define __HYPERVISOR_switch_vm86 23 /* x86/32 only (obsolete name) */
+#define __HYPERVISOR_switch_to_user 23 /* x86/64 only (obsolete name) */
#define __HYPERVISOR_vcpu_op 24
#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */
#define __HYPERVISOR_mmuext_op 26
#define __HYPERVISOR_acm_op 27
+#define __HYPERVISOR_nmi_op 28
/*
* VIRTUAL INTERRUPTS
@@ -69,10 +71,7 @@
#define VIRQ_DEBUG 1 /* Request guest to dump debug info. */
#define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */
#define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */
-#define VIRQ_PARITY_ERR 4 /* (DOM0) NMI parity error (port 0x61, bit 7). */
-#define VIRQ_IO_ERR 5 /* (DOM0) NMI I/O error (port 0x61, bit 6). */
#define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */
-#define VIRQ_NMI 7 /* (DOM0) Unknown NMI (not from ISA port 0x61).*/
#define NR_VIRQS 8
/*
@@ -426,6 +425,15 @@ typedef uint64_t cpumap_t;
typedef uint8_t xen_domain_handle_t[16];
+/* Turn a plain number into a C unsigned long constant. */
+#define __mk_unsigned_long(x) x ## UL
+#define mk_unsigned_long(x) __mk_unsigned_long(x)
+
+#else /* __ASSEMBLY__ */
+
+/* In assembly code we cannot use C numeric constant suffixes. */
+#define mk_unsigned_long(x) x
+
#endif /* !__ASSEMBLY__ */
#endif /* __XEN_PUBLIC_XEN_H__ */
diff --git a/xen/include/xen/bitmap.h b/xen/include/xen/bitmap.h
index 3703384c3d..91622645c5 100644
--- a/xen/include/xen/bitmap.h
+++ b/xen/include/xen/bitmap.h
@@ -41,6 +41,8 @@
* bitmap_weight(src, nbits) Hamming Weight: number set bits
* bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n
* bitmap_shift_left(dst, src, n, nbits) *dst = *src << n
+ * bitmap_scnprintf(buf, len, src, nbits) Print bitmap src to buf
+ * bitmap_scnlistprintf(buf, len, src, nbits) Print bitmap src as list to buf
*/
/*
@@ -93,6 +95,10 @@ extern int __bitmap_subset(const unsigned long *bitmap1,
const unsigned long *bitmap2, int bits);
extern int __bitmap_weight(const unsigned long *bitmap, int bits);
+extern int bitmap_scnprintf(char *buf, unsigned int len,
+ const unsigned long *src, int nbits);
+extern int bitmap_scnlistprintf(char *buf, unsigned int len,
+ const unsigned long *src, int nbits);
extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order);
extern void bitmap_release_region(unsigned long *bitmap, int pos, int order);
extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
diff --git a/xen/include/xen/config.h b/xen/include/xen/config.h
index e59bb86a7c..519dd4a3e5 100644
--- a/xen/include/xen/config.h
+++ b/xen/include/xen/config.h
@@ -43,4 +43,13 @@
#define __STR(...) #__VA_ARGS__
#define STR(...) __STR(__VA_ARGS__)
+#ifndef __ASSEMBLY__
+/* Turn a plain number into a C unsigned long constant. */
+#define __mk_unsigned_long(x) x ## UL
+#define mk_unsigned_long(x) __mk_unsigned_long(x)
+#else /* __ASSEMBLY__ */
+/* In assembly code we cannot use C numeric constant suffixes. */
+#define mk_unsigned_long(x) x
+#endif /* !__ASSEMBLY__ */
+
#endif /* __XEN_CONFIG_H__ */
diff --git a/xen/include/xen/cpumask.h b/xen/include/xen/cpumask.h
index 9ccafc7999..f1e256b4c8 100644
--- a/xen/include/xen/cpumask.h
+++ b/xen/include/xen/cpumask.h
@@ -8,8 +8,8 @@
* See detailed comments in the file xen/bitmap.h describing the
* data type on which these cpumasks are based.
*
- * For details of cpumask_scnprintf() and cpumask_parse(),
- * see bitmap_scnprintf() and bitmap_parse() in lib/bitmap.c.
+ * For details of cpumask_scnprintf() and cpulist_scnprintf(),
+ * see bitmap_scnprintf() and bitmap_scnlistprintf() in lib/bitmap.c.
*
* The available cpumask operations are:
*
@@ -36,8 +36,8 @@
* void cpus_shift_right(dst, src, n) Shift right
* void cpus_shift_left(dst, src, n) Shift left
*
- * int first_cpu(mask) Number lowest set bit, or >= NR_CPUS
- * int next_cpu(cpu, mask) Next cpu past 'cpu', or >= NR_CPUS
+ * int first_cpu(mask) Number lowest set bit, or NR_CPUS
+ * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS
*
* cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set
* CPU_MASK_ALL Initializer - all bits set
@@ -45,7 +45,7 @@
* unsigned long *cpus_addr(mask) Array of unsigned long's in mask
*
* int cpumask_scnprintf(buf, len, mask) Format cpumask for printing
- * int cpumask_parse(ubuf, ulen, mask) Parse ascii string as cpumask
+ * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing
*
* for_each_cpu_mask(cpu, mask) for-loop cpu over mask
*
@@ -207,13 +207,13 @@ static inline void __cpus_shift_left(cpumask_t *dstp,
#define first_cpu(src) __first_cpu(&(src), NR_CPUS)
static inline int __first_cpu(const cpumask_t *srcp, int nbits)
{
- return find_first_bit(srcp->bits, nbits);
+ return min_t(int, nbits, find_first_bit(srcp->bits, nbits));
}
#define next_cpu(n, src) __next_cpu((n), &(src), NR_CPUS)
static inline int __next_cpu(int n, const cpumask_t *srcp, int nbits)
{
- return find_next_bit(srcp->bits, nbits, n+1);
+ return min_t(int, nbits, find_next_bit(srcp->bits, nbits, n+1));
}
#define cpumask_of_cpu(cpu) \
@@ -259,7 +259,6 @@ static inline int __next_cpu(int n, const cpumask_t *srcp, int nbits)
#define cpus_addr(src) ((src).bits)
-/*
#define cpumask_scnprintf(buf, len, src) \
__cpumask_scnprintf((buf), (len), &(src), NR_CPUS)
static inline int __cpumask_scnprintf(char *buf, int len,
@@ -268,14 +267,13 @@ static inline int __cpumask_scnprintf(char *buf, int len,
return bitmap_scnprintf(buf, len, srcp->bits, nbits);
}
-#define cpumask_parse(ubuf, ulen, src) \
- __cpumask_parse((ubuf), (ulen), &(src), NR_CPUS)
-static inline int __cpumask_parse(const char __user *buf, int len,
- cpumask_t *dstp, int nbits)
+#define cpulist_scnprintf(buf, len, src) \
+ __cpulist_scnprintf((buf), (len), &(src), NR_CPUS)
+static inline int __cpulist_scnprintf(char *buf, int len,
+ const cpumask_t *srcp, int nbits)
{
- return bitmap_parse(buf, len, dstp->bits, nbits);
+ return bitmap_scnlistprintf(buf, len, srcp->bits, nbits);
}
-*/
#if NR_CPUS > 1
#define for_each_cpu_mask(cpu, mask) \
@@ -368,7 +366,7 @@ extern cpumask_t cpu_present_map;
for_each_cpu_mask(cpu, (mask)) \
if (cpu_online(cpu)) \
break; \
- min_t(int, NR_CPUS, cpu); \
+ cpu; \
})
#define for_each_cpu(cpu) for_each_cpu_mask((cpu), cpu_possible_map)
diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
index 761bf87a54..4ebf0ca286 100644
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -18,8 +18,6 @@ extern int arch_do_createdomain(struct vcpu *v);
extern int arch_set_info_guest(
struct vcpu *v, struct vcpu_guest_context *c);
-extern void vcpu_migrate_cpu(struct vcpu *v, int newcpu);
-
extern void free_perdomain_pt(struct domain *d);
extern void domain_relinquish_resources(struct domain *d);
diff --git a/xen/include/xen/domain_page.h b/xen/include/xen/domain_page.h
index ce152aece2..b73011216b 100644
--- a/xen/include/xen/domain_page.h
+++ b/xen/include/xen/domain_page.h
@@ -2,6 +2,8 @@
* domain_page.h
*
* Allow temporary mapping of domain page frames into Xen space.
+ *
+ * Copyright (c) 2003-2006, Keir Fraser <keir@xensource.com>
*/
#ifndef __XEN_DOMAIN_PAGE_H__
@@ -10,22 +12,27 @@
#include <xen/config.h>
#include <xen/mm.h>
-#define map_domain_page(pfn) map_domain_pages(pfn,0)
-#define unmap_domain_page(va) unmap_domain_pages(va,0)
-
#ifdef CONFIG_DOMAIN_PAGE
/*
- * Maps a given range of page frames, returning the mapped virtual address. The
- * pages are now accessible until a corresponding call to unmap_domain_page().
+ * Map a given page frame, returning the mapped virtual address. The page is
+ * then accessible within the current VCPU until a corresponding unmap call.
+ */
+extern void *map_domain_page(unsigned long pfn);
+
+/*
+ * Pass a VA within a page previously mapped in the context of the
+ * currently-executing VCPU via a call to map_domain_pages().
*/
-extern void *map_domain_pages(unsigned long pfn, unsigned int order);
+extern void unmap_domain_page(void *va);
/*
- * Pass a VA within the first page of a range previously mapped with
- * map_omain_pages(). Those pages will then be removed from the mapping lists.
+ * Similar to the above calls, except the mapping is accessible in all
+ * address spaces (not just within the VCPU that created the mapping). Global
+ * mappings can also be unmapped from any context.
*/
-extern void unmap_domain_pages(void *va, unsigned int order);
+extern void *map_domain_page_global(unsigned long pfn);
+extern void unmap_domain_page_global(void *va);
#define DMCACHE_ENTRY_VALID 1U
#define DMCACHE_ENTRY_HELD 2U
@@ -87,8 +94,11 @@ domain_mmap_cache_destroy(struct domain_mmap_cache *cache)
#else /* !CONFIG_DOMAIN_PAGE */
-#define map_domain_pages(pfn,order) phys_to_virt((pfn)<<PAGE_SHIFT)
-#define unmap_domain_pages(va,order) ((void)((void)(va),(void)(order)))
+#define map_domain_page(pfn) phys_to_virt((pfn)<<PAGE_SHIFT)
+#define unmap_domain_page(va) ((void)(va))
+
+#define map_domain_page_global(pfn) phys_to_virt((pfn)<<PAGE_SHIFT)
+#define unmap_domain_page_global(va) ((void)(va))
struct domain_mmap_cache {
};
diff --git a/xen/include/xen/lib.h b/xen/include/xen/lib.h
index 2c77b43056..01440e1533 100644
--- a/xen/include/xen/lib.h
+++ b/xen/include/xen/lib.h
@@ -53,10 +53,16 @@ extern long vm_assist(struct domain *, unsigned int, unsigned int);
/* vsprintf.c */
extern int sprintf(char * buf, const char * fmt, ...)
__attribute__ ((format (printf, 2, 3)));
-extern int vsprintf(char *buf, const char *, va_list);
+extern int vsprintf(char *buf, const char *, va_list)
+ __attribute__ ((format (printf, 2, 0)));
extern int snprintf(char * buf, size_t size, const char * fmt, ...)
__attribute__ ((format (printf, 3, 4)));
-extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
+extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
+ __attribute__ ((format (printf, 3, 0)));
+extern int scnprintf(char * buf, size_t size, const char * fmt, ...)
+ __attribute__ ((format (printf, 3, 4)));
+extern int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
+ __attribute__ ((format (printf, 3, 0)));
long simple_strtol(
const char *cp,char **endp, unsigned int base);
diff --git a/xen/include/xen/perfc_defn.h b/xen/include/xen/perfc_defn.h
index 1e6820acad..60d123689a 100644
--- a/xen/include/xen/perfc_defn.h
+++ b/xen/include/xen/perfc_defn.h
@@ -32,7 +32,7 @@ PERFCOUNTER_CPU(ipis, "#IPIs")
PERFCOUNTER_CPU(irq_time, "cycles spent in irq handler")
PERFCOUNTER_CPU(apic_timer, "apic timer interrupts")
-PERFCOUNTER_CPU(ac_timer_max, "ac_timer max error (ns)")
+PERFCOUNTER_CPU(timer_max, "timer max error (ns)")
PERFCOUNTER_CPU(sched_irq, "sched: timer")
PERFCOUNTER_CPU(sched_run, "sched: runs through scheduler")
PERFCOUNTER_CPU(sched_ctx, "sched: context switches")
diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h
index 58c33e8b3c..d61d5c70d3 100644
--- a/xen/include/xen/sched-if.h
+++ b/xen/include/xen/sched-if.h
@@ -13,19 +13,50 @@
struct schedule_data {
spinlock_t schedule_lock; /* spinlock protecting curr */
- struct vcpu *curr; /* current task */
- struct vcpu *idle; /* idle task for this cpu */
+ struct vcpu *curr; /* current task */
+ struct vcpu *idle; /* idle task for this cpu */
void *sched_priv;
- struct ac_timer s_timer; /* scheduling timer */
+ struct timer s_timer; /* scheduling timer */
unsigned long tick; /* current periodic 'tick' */
#ifdef BUCKETS
u32 hist[BUCKETS]; /* for scheduler latency histogram */
#endif
} __cacheline_aligned;
+extern struct schedule_data schedule_data[];
+
+static inline void vcpu_schedule_lock(struct vcpu *v)
+{
+ unsigned int cpu;
+
+ for ( ; ; )
+ {
+ cpu = v->processor;
+ spin_lock(&schedule_data[cpu].schedule_lock);
+ if ( likely(v->processor == cpu) )
+ break;
+ spin_unlock(&schedule_data[cpu].schedule_lock);
+ }
+}
+
+#define vcpu_schedule_lock_irq(v) \
+ do { local_irq_disable(); vcpu_schedule_lock(v); } while ( 0 )
+#define vcpu_schedule_lock_irqsave(v, flags) \
+ do { local_irq_save(flags); vcpu_schedule_lock(v); } while ( 0 )
+
+static inline void vcpu_schedule_unlock(struct vcpu *v)
+{
+ spin_unlock(&schedule_data[v->processor].schedule_lock);
+}
+
+#define vcpu_schedule_unlock_irq(v) \
+ do { vcpu_schedule_unlock(v); local_irq_enable(); } while ( 0 )
+#define vcpu_schedule_unlock_irqrestore(v, flags) \
+ do { vcpu_schedule_unlock(v); local_irq_restore(flags); } while ( 0 )
+
struct task_slice {
struct vcpu *task;
- s_time_t time;
+ s_time_t time;
};
struct scheduler {
@@ -39,6 +70,7 @@ struct scheduler {
void (*rem_task) (struct vcpu *);
void (*sleep) (struct vcpu *);
void (*wake) (struct vcpu *);
+ int (*set_affinity) (struct vcpu *, cpumask_t *);
struct task_slice (*do_schedule) (s_time_t);
int (*control) (struct sched_ctl_cmd *);
int (*adjdom) (struct domain *,
@@ -47,6 +79,4 @@ struct scheduler {
void (*dump_cpu_state) (int);
};
-extern struct schedule_data schedule_data[];
-
#endif /* __XEN_SCHED_IF_H__ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index c686394b7f..df7611a70b 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -9,7 +9,7 @@
#include <public/xen.h>
#include <public/dom0_ops.h>
#include <xen/time.h>
-#include <xen/ac_timer.h>
+#include <xen/timer.h>
#include <xen/grant_table.h>
#include <xen/rangeset.h>
#include <asm/domain.h>
@@ -51,8 +51,6 @@ struct evtchn
int evtchn_init(struct domain *d);
void evtchn_destroy(struct domain *d);
-#define CPUMAP_RUNANYWHERE 0xFFFFFFFF
-
struct vcpu
{
int vcpu_id;
@@ -65,7 +63,7 @@ struct vcpu
struct vcpu *next_in_list;
- struct ac_timer timer; /* one-shot timer for timeout values */
+ struct timer timer; /* one-shot timer for timeout values */
unsigned long sleep_tick; /* tick at which this vcpu started sleep */
s_time_t lastschd; /* time this domain was last scheduled */
@@ -80,7 +78,13 @@ struct vcpu
atomic_t pausecnt;
- cpumap_t cpumap; /* which cpus this domain can run on */
+ /* Bitmask of CPUs on which this VCPU may run. */
+ cpumask_t cpu_affinity;
+
+ unsigned long nmi_addr; /* NMI callback address. */
+
+ /* Bitmask of CPUs which are holding onto this VCPU's state. */
+ cpumask_t vcpu_dirty_cpumask;
struct arch_vcpu arch;
};
@@ -141,7 +145,7 @@ struct domain
struct vcpu *vcpu[MAX_VIRT_CPUS];
/* Bitmask of CPUs which are holding onto this domain's state. */
- cpumask_t cpumask;
+ cpumask_t domain_dirty_cpumask;
struct arch_domain arch;
@@ -170,12 +174,10 @@ struct domain_setup_info
char *xen_section_string;
};
-extern struct domain idle0_domain;
-extern struct vcpu idle0_vcpu;
-
-extern struct vcpu *idle_task[NR_CPUS];
+extern struct vcpu *idle_vcpu[NR_CPUS];
#define IDLE_DOMAIN_ID (0x7FFFU)
-#define is_idle_task(_d) (test_bit(_DOMF_idle_domain, &(_d)->domain_flags))
+#define is_idle_domain(d) ((d)->domain_id == IDLE_DOMAIN_ID)
+#define is_idle_vcpu(v) (is_idle_domain((v)->domain))
struct vcpu *alloc_vcpu(
struct domain *d, unsigned int vcpu_id, unsigned int cpu_id);
@@ -223,7 +225,7 @@ extern int construct_dom0(
unsigned long image_start, unsigned long image_len,
unsigned long initrd_start, unsigned long initrd_len,
char *cmdline);
-extern int set_info_guest(struct domain *d, dom0_setdomaininfo_t *);
+extern int set_info_guest(struct domain *d, dom0_setvcpucontext_t *);
struct domain *find_domain_by_id(domid_t dom);
extern void domain_destruct(struct domain *d);
@@ -269,37 +271,28 @@ void vcpu_sleep_sync(struct vcpu *d);
extern void sync_vcpu_execstate(struct vcpu *v);
/*
- * Called by the scheduler to switch to another VCPU. On entry, although
- * VCPUF_running is no longer asserted for @prev, its context is still running
- * on the local CPU and is not committed to memory. The local scheduler lock
- * is therefore still held, and interrupts are disabled, because the local CPU
- * is in an inconsistent state.
- *
- * The callee must ensure that the local CPU is no longer running in @prev's
- * context, and that the context is saved to memory, before returning.
- * Alternatively, if implementing lazy context switching, it suffices to ensure
- * that invoking sync_vcpu_execstate() will switch and commit @prev's state.
+ * Called by the scheduler to switch to another VCPU. This function must
+ * call context_saved(@prev) when the local CPU is no longer running in
+ * @prev's context, and that context is saved to memory. Alternatively, if
+ * implementing lazy context switching, it suffices to ensure that invoking
+ * sync_vcpu_execstate() will switch and commit @prev's state.
*/
extern void context_switch(
struct vcpu *prev,
struct vcpu *next);
/*
- * On some architectures (notably x86) it is not possible to entirely load
- * @next's context with interrupts disabled. These may implement a function to
- * finalise loading the new context after interrupts are re-enabled. This
- * function is not given @prev and is not permitted to access it.
+ * As described above, context_switch() must call this function when the
+ * local CPU is no longer running in @prev's context, and @prev's context is
+ * saved to memory. Alternatively, if implementing lazy context switching,
+ * ensure that invoking sync_vcpu_execstate() will switch and commit @prev.
*/
-extern void context_switch_finalise(
- struct vcpu *next);
+#define context_saved(prev) (clear_bit(_VCPUF_running, &(prev)->vcpu_flags))
/* Called by the scheduler to continue running the current VCPU. */
extern void continue_running(
struct vcpu *same);
-/* Is CPU 'cpu' idle right now? */
-int idle_cpu(int cpu);
-
void startup_cpu_idle_loop(void);
unsigned long __hypercall_create_continuation(
@@ -364,51 +357,44 @@ extern struct domain *domain_list;
/* Currently running on a CPU? */
#define _VCPUF_running 3
#define VCPUF_running (1UL<<_VCPUF_running)
- /* Disables auto-migration between CPUs. */
-#define _VCPUF_cpu_pinned 4
-#define VCPUF_cpu_pinned (1UL<<_VCPUF_cpu_pinned)
- /* Domain migrated between CPUs. */
-#define _VCPUF_cpu_migrated 5
-#define VCPUF_cpu_migrated (1UL<<_VCPUF_cpu_migrated)
/* Initialization completed. */
-#define _VCPUF_initialised 6
+#define _VCPUF_initialised 4
#define VCPUF_initialised (1UL<<_VCPUF_initialised)
/* VCPU is not-runnable */
-#define _VCPUF_down 7
+#define _VCPUF_down 5
#define VCPUF_down (1UL<<_VCPUF_down)
+ /* NMI callback pending for this VCPU? */
+#define _VCPUF_nmi_pending 8
+#define VCPUF_nmi_pending (1UL<<_VCPUF_nmi_pending)
+ /* Avoid NMI reentry by allowing NMIs to be masked for short periods. */
+#define _VCPUF_nmi_masked 9
+#define VCPUF_nmi_masked (1UL<<_VCPUF_nmi_masked)
/*
* Per-domain flags (domain_flags).
*/
- /* Is this one of the per-CPU idle domains? */
-#define _DOMF_idle_domain 0
-#define DOMF_idle_domain (1UL<<_DOMF_idle_domain)
/* Is this domain privileged? */
-#define _DOMF_privileged 1
+#define _DOMF_privileged 0
#define DOMF_privileged (1UL<<_DOMF_privileged)
/* Guest shut itself down for some reason. */
-#define _DOMF_shutdown 2
+#define _DOMF_shutdown 1
#define DOMF_shutdown (1UL<<_DOMF_shutdown)
- /* Guest is in process of shutting itself down (becomes DOMF_shutdown). */
-#define _DOMF_shuttingdown 3
-#define DOMF_shuttingdown (1UL<<_DOMF_shuttingdown)
/* Death rattle. */
-#define _DOMF_dying 4
+#define _DOMF_dying 2
#define DOMF_dying (1UL<<_DOMF_dying)
/* Domain is paused by controller software. */
-#define _DOMF_ctrl_pause 5
+#define _DOMF_ctrl_pause 3
#define DOMF_ctrl_pause (1UL<<_DOMF_ctrl_pause)
/* Domain is being debugged by controller software. */
-#define _DOMF_debugging 6
+#define _DOMF_debugging 4
#define DOMF_debugging (1UL<<_DOMF_debugging)
-static inline int domain_runnable(struct vcpu *v)
+static inline int vcpu_runnable(struct vcpu *v)
{
return ( (atomic_read(&v->pausecnt) == 0) &&
!(v->vcpu_flags & (VCPUF_blocked|VCPUF_down)) &&
- !(v->domain->domain_flags &
- (DOMF_shutdown|DOMF_shuttingdown|DOMF_ctrl_pause)) );
+ !(v->domain->domain_flags & (DOMF_shutdown|DOMF_ctrl_pause)) );
}
void vcpu_pause(struct vcpu *v);
@@ -419,6 +405,8 @@ void domain_pause_by_systemcontroller(struct domain *d);
void domain_unpause_by_systemcontroller(struct domain *d);
void cpu_init(void);
+int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
+
static inline void vcpu_unblock(struct vcpu *v)
{
if ( test_and_clear_bit(_VCPUF_blocked, &v->vcpu_flags) )
diff --git a/xen/include/xen/softirq.h b/xen/include/xen/softirq.h
index 5f1903695b..9293b3168e 100644
--- a/xen/include/xen/softirq.h
+++ b/xen/include/xen/softirq.h
@@ -2,11 +2,11 @@
#define __XEN_SOFTIRQ_H__
/* Common softirqs come first in the following list. */
-#define AC_TIMER_SOFTIRQ 0
+#define TIMER_SOFTIRQ 0
#define SCHEDULE_SOFTIRQ 1
#define NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ 2
#define KEYPRESS_SOFTIRQ 3
-#define NMI_DOM0_SOFTIRQ 4
+#define NMI_SOFTIRQ 4
#define PAGE_SCRUB_SOFTIRQ 5
#define DOMAIN_SHUTDOWN_FINALISE_SOFTIRQ 6
#define NR_SOFTIRQS 7
diff --git a/xen/include/xen/ac_timer.h b/xen/include/xen/timer.h
index bd5131d6e3..f53a7ed35f 100644
--- a/xen/include/xen/ac_timer.h
+++ b/xen/include/xen/timer.h
@@ -1,18 +1,18 @@
/******************************************************************************
- * ac_timer.h
+ * timer.h
*
* Copyright (c) 2002-2003 Rolf Neugebauer
* Copyright (c) 2002-2005 K A Fraser
*/
-#ifndef _AC_TIMER_H_
-#define _AC_TIMER_H_
+#ifndef _TIMER_H_
+#define _TIMER_H_
#include <xen/spinlock.h>
#include <xen/time.h>
#include <xen/string.h>
-struct ac_timer {
+struct timer {
/* System time expiry value (nanoseconds since boot). */
s_time_t expires;
/* CPU on which this timer will be installed and executed. */
@@ -22,6 +22,8 @@ struct ac_timer {
void *data;
/* Timer-heap offset. */
unsigned int heap_offset;
+ /* Has this timer been killed (cannot be activated)? */
+ int killed;
};
/*
@@ -29,17 +31,17 @@ struct ac_timer {
*/
/* Returns TRUE if the given timer is on a timer list. */
-static __inline__ int active_ac_timer(struct ac_timer *timer)
+static __inline__ int active_timer(struct timer *timer)
{
return (timer->heap_offset != 0);
}
/*
- * It initialises the static fields of the ac_timer structure.
+ * It initialises the static fields of the timer structure.
* It can be called multiple times to reinitialise a single (inactive) timer.
*/
-static __inline__ void init_ac_timer(
- struct ac_timer *timer,
+static __inline__ void init_timer(
+ struct timer *timer,
void (*function)(void *),
void *data,
unsigned int cpu)
@@ -52,23 +54,30 @@ static __inline__ void init_ac_timer(
/*
* Set the expiry time and activate a timer (which must previously have been
- * initialised by init_ac_timer).
+ * initialised by init_timer).
*/
-extern void set_ac_timer(struct ac_timer *timer, s_time_t expires);
+extern void set_timer(struct timer *timer, s_time_t expires);
/*
* Deactivate a timer (which must previously have been initialised by
- * init_ac_timer). This function has no effect if the timer is not currently
+ * init_timer). This function has no effect if the timer is not currently
* active.
*/
-extern void rem_ac_timer(struct ac_timer *timer);
+extern void stop_timer(struct timer *timer);
/*
- * Initialisation. Must be called before any other ac_timer function.
+ * Deactivate a timer and prevent it from being re-set (future calls to
+ * set_timer will silently fail). When this function returns it is guaranteed
+ * that the timer callback handler is not running on any CPU.
*/
-extern void ac_timer_init(void);
+extern void kill_timer(struct timer *timer);
-#endif /* _AC_TIMER_H_ */
+/*
+ * Initialisation. Must be called before any other timer function.
+ */
+extern void timer_init(void);
+
+#endif /* _TIMER_H_ */
/*
* Local variables: