aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.hgignore1
-rw-r--r--docs/ChangeLog10
-rw-r--r--docs/src/user.tex139
-rw-r--r--docs/xen-api/revision-history.tex11
-rw-r--r--docs/xen-api/xenapi-coversheet.tex2
-rw-r--r--docs/xen-api/xenapi-datamodel.tex35
-rw-r--r--extras/mini-os/blkfront.c21
-rw-r--r--extras/mini-os/gnttab.c20
-rw-r--r--extras/mini-os/kernel.c2
-rw-r--r--extras/mini-os/minios.mk4
-rw-r--r--extras/mini-os/netfront.c4
-rw-r--r--tools/blktap/drivers/block-qcow.c62
-rw-r--r--tools/firmware/hvmloader/Makefile15
-rw-r--r--tools/firmware/hvmloader/hvmloader.c2
-rw-r--r--tools/firmware/hvmloader/smbios.c57
-rw-r--r--tools/firmware/hvmloader/util.c53
-rw-r--r--tools/firmware/hvmloader/util.h48
-rw-r--r--tools/include/xen-foreign/mkheader.py4
-rw-r--r--tools/ioemu/block-qcow.c70
-rw-r--r--tools/ioemu/block-qcow2.c18
-rw-r--r--tools/ioemu/block-raw.c23
-rw-r--r--tools/ioemu/block-vmdk.c4
-rw-r--r--tools/ioemu/block.c36
-rw-r--r--tools/ioemu/block_int.h4
-rw-r--r--tools/ioemu/hw/ide.c62
-rw-r--r--tools/ioemu/hw/ne2000.c2
-rw-r--r--tools/ioemu/hw/scsi-disk.c8
-rw-r--r--tools/ioemu/hw/vga.c18
-rw-r--r--tools/ioemu/hw/vga_int.h1
-rw-r--r--tools/ioemu/hw/xenfb.c37
-rw-r--r--tools/ioemu/sdl.c70
-rw-r--r--tools/ioemu/vl.h5
-rw-r--r--tools/ioemu/vnc.c1
-rw-r--r--tools/libfsimage/Rules.mk1
-rwxr-xr-xtools/libfsimage/check-libext2fs4
-rw-r--r--tools/libfsimage/common/fsimage_grub.c48
-rw-r--r--tools/libfsimage/common/fsimage_grub.h2
-rw-r--r--tools/libfsimage/common/fsimage_plugin.c5
-rw-r--r--tools/libxc/Makefile9
-rw-r--r--tools/libxc/xc_dom_bzimageloader.c159
-rw-r--r--tools/libxc/xc_dom_elfloader.c2
-rw-r--r--tools/libxen/include/xen/api/xen_acmpolicy.h6
-rw-r--r--tools/libxen/include/xen/api/xen_xspolicy.h27
-rw-r--r--tools/libxen/src/xen_xspolicy.c18
-rw-r--r--tools/pygrub/src/pygrub15
-rw-r--r--tools/python/xen/lowlevel/xc/xc.c26
-rw-r--r--tools/python/xen/util/xsconstants.py8
-rw-r--r--tools/python/xen/util/xsm/acm/acm.py42
-rw-r--r--tools/python/xen/xend/XendBootloader.py13
-rw-r--r--tools/python/xen/xend/XendDomainInfo.py9
-rw-r--r--tools/python/xen/xend/XendNode.py3
-rw-r--r--tools/python/xen/xend/XendXSPolicy.py10
-rw-r--r--tools/python/xen/xend/server/vfbif.py2
-rw-r--r--tools/python/xen/xm/XenAPI.py1
-rw-r--r--tools/python/xen/xm/create.py8
-rw-r--r--tools/python/xen/xm/messages/xen-xm.pot7
-rw-r--r--tools/tests/Makefile10
-rw-r--r--tools/tests/test_x86_emulator.c29
-rw-r--r--tools/tests/x86_emulate.c13
-rw-r--r--xen/arch/ia64/xen/dom0_ops.c2
-rw-r--r--xen/arch/ia64/xen/dom_fw_common.c2
-rw-r--r--xen/arch/ia64/xen/dom_fw_domu.c2
-rw-r--r--xen/arch/powerpc/sysctl.c6
-rw-r--r--xen/arch/x86/boot/trampoline.S9
-rw-r--r--xen/arch/x86/hvm/emulate.c288
-rw-r--r--xen/arch/x86/hvm/hvm.c228
-rw-r--r--xen/arch/x86/hvm/io.c111
-rw-r--r--xen/arch/x86/hvm/svm/emulate.c4
-rw-r--r--xen/arch/x86/hvm/svm/svm.c10
-rw-r--r--xen/arch/x86/hvm/vmx/realmode.c4
-rw-r--r--xen/arch/x86/hvm/vmx/vmx.c98
-rw-r--r--xen/arch/x86/hvm/vmx/x86_32/exits.S4
-rw-r--r--xen/arch/x86/hvm/vmx/x86_64/exits.S4
-rw-r--r--xen/arch/x86/mm.c28
-rw-r--r--xen/arch/x86/mm/shadow/common.c8
-rw-r--r--xen/arch/x86/mm/shadow/multi.c6
-rw-r--r--xen/arch/x86/sysctl.c14
-rw-r--r--xen/arch/x86/x86_emulate.c3410
-rw-r--r--xen/arch/x86/x86_emulate/x86_emulate.c3429
-rw-r--r--xen/arch/x86/x86_emulate/x86_emulate.h401
-rw-r--r--xen/common/domain.c8
-rw-r--r--xen/common/domctl.c142
-rw-r--r--xen/common/event_channel.c49
-rw-r--r--xen/common/grant_table.c37
-rw-r--r--xen/common/memory.c51
-rw-r--r--xen/include/asm-x86/hvm/hvm.h29
-rw-r--r--xen/include/asm-x86/hvm/io.h9
-rw-r--r--xen/include/asm-x86/hvm/support.h37
-rw-r--r--xen/include/asm-x86/hvm/vcpu.h21
-rw-r--r--xen/include/asm-x86/hvm/vmx/vmx.h182
-rw-r--r--xen/include/asm-x86/x86_emulate.h403
-rw-r--r--xen/include/public/arch-ia64.h18
-rw-r--r--xen/include/public/arch-powerpc.h2
-rw-r--r--xen/include/public/arch-x86/xen-x86_64.h4
-rw-r--r--xen/include/public/arch-x86/xen.h2
-rw-r--r--xen/include/public/hvm/save.h4
-rw-r--r--xen/include/public/io/fbif.h29
-rw-r--r--xen/include/public/sysctl.h11
-rw-r--r--xen/include/public/xsm/acm.h1
-rw-r--r--xen/include/xen/hvm/save.h1
-rw-r--r--xen/include/xsm/acm/acm_hooks.h14
-rw-r--r--xen/xsm/acm/acm_chinesewall_hooks.c36
-rw-r--r--xen/xsm/acm/acm_policy.c3
-rw-r--r--xen/xsm/acm/acm_simple_type_enforcement_hooks.c2
104 files changed, 5729 insertions, 4760 deletions
diff --git a/.hgignore b/.hgignore
index 1aab698514..7c7d4a9d86 100644
--- a/.hgignore
+++ b/.hgignore
@@ -184,6 +184,7 @@
^tools/tests/blowfish\.bin$
^tools/tests/blowfish\.h$
^tools/tests/test_x86_emulator$
+^tools/tests/x86_emulate$
^tools/vnet/Make.local$
^tools/vnet/build/.*$
^tools/vnet/gc$
diff --git a/docs/ChangeLog b/docs/ChangeLog
index 99ec46fe5f..57aacc9a36 100644
--- a/docs/ChangeLog
+++ b/docs/ChangeLog
@@ -16,6 +16,16 @@ http://lists.xensource.com/archives/html/xen-devel/2008-01/msg00010.html
Xen 3.3 release
---------------
+17336: Add platform capabilities field to XEN_SYSCTL_physinfo
+http://xenbits.xensource.com/xen-unstable.hg?rev/250606290439
+
+17289: PV framebuffer dynamic resolution facility
+http://xenbits.xensource.com/xen-unstable.hg?rev/d97e61001d81
+
+Guest may send XENFB_TYPE_RESIZE if feature-resize=1 in
+xenstore of the backend VNC server. VNC server code sets
+feature-resize if it can handle the resize request.
+
16857: XS_SET_TARGET
http://xenbits.xensource.com/xen-unstable.hg?rev/26fc953a89bb
diff --git a/docs/src/user.tex b/docs/src/user.tex
index ac8c5f8e1e..9a02101db9 100644
--- a/docs/src/user.tex
+++ b/docs/src/user.tex
@@ -1618,9 +1618,9 @@ if a virtual machine uses only half of its disk space then the file
really takes up half of the size allocated.
For example, to create a 2GB sparse file-backed virtual block device
-(actually only consumes 1KB of disk):
+(actually only consumes no disk space at all):
\begin{quote}
- \verb_# dd if=/dev/zero of=vm1disk bs=1k seek=2048k count=1_
+ \verb_# dd if=/dev/zero of=vm1disk bs=1k seek=2048k count=0_
\end{quote}
Make a file system in the disk file:
@@ -4306,14 +4306,22 @@ mailing lists and subscription information can be found at \begin{quote}
\appendix
-\chapter{Unmodified (VMX) guest domains in Xen with Intel\textregistered Virtualization Technology (VT)}
+\chapter{Unmodified (HVM) guest domains in Xen with Hardware support for Virtualization}
-Xen supports guest domains running unmodified Guest operating systems using Virtualization Technology (VT) available on recent Intel Processors. More information about the Intel Virtualization Technology implementing Virtual Machine Extensions (VMX) in the processor is available on the Intel website at \\
+Xen supports guest domains running unmodified guest operating systems using
+virtualization extensions available on recent processors. Currently processors
+featuring the Intel Virtualization Extension (Intel-VT) or the AMD extension
+(AMD-V) are supported. The technology covering both implementations is
+called HVM (for Hardware Virtual Machine) in Xen. More information about the
+virtualization extensions are available on the respective websites:
{\small {\tt http://www.intel.com/technology/computing/vptech}}
-\section{Building Xen with VT support}
-The following packages need to be installed in order to build Xen with VT support. Some Linux distributions do not provide these packages by default.
+ {\small {\tt http://www.amd.com/us-en/assets/content\_type/white\_papers\_and\_tech\_docs/24593.pdf}}
+
+\section{Building Xen with HVM support}
+
+The following packages need to be installed in order to build Xen with HVM support. Some Linux distributions do not provide these packages by default.
\begin{tabular}{lp{11.0cm}}
{\bfseries Package} & {\bfseries Description} \\
@@ -4322,70 +4330,75 @@ dev86 & The dev86 package provides an assembler and linker for real mode 80x86 i
If the dev86 package is not available on the x86\_64 distribution, you can install the i386 version of it. The dev86 rpm package for various distributions can be found at {\scriptsize {\tt http://www.rpmfind.net/linux/rpm2html/search.php?query=dev86\&submit=Search}} \\
-LibVNCServer & The unmodified guest's VGA display, keyboard, and mouse can be virtualized by the vncserver library. You can get the sources of libvncserver from {\small {\tt http://sourceforge.net/projects/libvncserver}}. Build and install the sources on the build system to get the libvncserver library. There is a significant performance degradation in 0.8 version. The current sources in the CVS tree have fixed this degradation. So it is highly recommended to download the latest CVS sources and install them.\\
-
SDL-devel, SDL & Simple DirectMedia Layer (SDL) is another way of virtualizing the unmodified guest console. It provides an X window for the guest console.
If the SDL and SDL-devel packages are not installed by default on the build system, they can be obtained from {\scriptsize {\tt http://www.rpmfind.net/linux/rpm2html/search.php?query=SDL\&submit=Search}}
-, {\scriptsize {\tt http://www.rpmfind.net/linux/rpm2html/search.php?query=SDL-devel\&submit=Search}} \\
+
+
+{\scriptsize {\tt http://www.rpmfind.net/linux/rpm2html/search.php?query=SDL-devel\&submit=Search}} \\
\end{tabular}
-\section{Configuration file for unmodified VMX guests}
+\section{Configuration file for unmodified HVM guests}
-The Xen installation includes a sample configuration file, {\small {\tt /etc/xen/xmexample.vmx}}. There are comments describing all the options. In addition to the common options that are the same as those for paravirtualized guest configurations, VMX guest configurations have the following settings:
+The Xen installation includes a sample configuration file, {\small {\tt /etc/xen/xmexample.hvm}}. There are comments describing all the options. In addition to the common options that are the same as those for paravirtualized guest configurations, HVM guest configurations have the following settings:
\begin{tabular}{lp{11.0cm}}
{\bfseries Parameter} & {\bfseries Description} \\
-kernel & The VMX firmware loader, {\small {\tt /usr/lib/xen/boot/vmxloader}}\\
+kernel & The HVM firmware loader, {\small {\tt /usr/lib/xen/boot/hvmloader}}\\
+
+builder & The domain build function. The HVM domain uses the 'hvm' builder.\\
-builder & The domain build function. The VMX domain uses the vmx builder.\\
+acpi & Enable HVM guest ACPI, default=1 (enabled)\\
-acpi & Enable VMX guest ACPI, default=0 (disabled)\\
+apic & Enable HVM guest APIC, default=1 (enabled)\\
-apic & Enable VMX guest APIC, default=0 (disabled)\\
+pae & Enable HVM guest PAE, default=1 (enabled)\\
-pae & Enable VMX guest PAE, default=0 (disabled)\\
+hap & Enable hardware-assisted paging support, such as AMD-V's nested paging
+or Intel\textregistered VT's extended paging. If available, Xen will
+use hardware-assisted paging instead of shadow paging for this guest's memory
+management.\\
-vif & Optionally defines MAC address and/or bridge for the network interfaces. Random MACs are assigned if not given. {\small {\tt type=ioemu}} means ioemu is used to virtualize the VMX NIC. If no type is specified, vbd is used, as with paravirtualized guests.\\
+vif & Optionally defines MAC address and/or bridge for the network interfaces. Random MACs are assigned if not given. {\small {\tt type=ioemu}} means ioemu is used to virtualize the HVM NIC. If no type is specified, vbd is used, as with paravirtualized guests.\\
-disk & Defines the disk devices you want the domain to have access to, and what you want them accessible as. If using a physical device as the VMX guest's disk, each disk entry is of the form
+disk & Defines the disk devices you want the domain to have access to, and what you want them accessible as. If using a physical device as the HVM guest's disk, each disk entry is of the form
{\small {\tt phy:UNAME,ioemu:DEV,MODE,}}
-where UNAME is the device, DEV is the device name the domain will see, and MODE is r for read-only, w for read-write. ioemu means the disk will use ioemu to virtualize the VMX disk. If not adding ioemu, it uses vbd like paravirtualized guests.
+where UNAME is the host device file, DEV is the device name the domain will see, and MODE is r for read-only, w for read-write. ioemu means the disk will use ioemu to virtualize the HVM disk. If not adding ioemu, it uses vbd like paravirtualized guests.
If using disk image file, its form should be like
{\small {\tt file:FILEPATH,ioemu:DEV,MODE}}
-If using more than one disk, there should be a comma between each disk entry. For example:
+Optical devices can be emulated by appending cdrom to the device type
+
+{\small {\tt ',hdc:cdrom,r'}}
-{\scriptsize {\tt disk = ['file:/var/images/image1.img,ioemu:hda,w', 'file:/var/images/image2.img,ioemu:hdb,w']}}\\
+If using more than one disk, there should be a comma between each disk entry. For example:
-cdrom & Disk image for CD-ROM. The default is {\small {\tt /dev/cdrom}} for Domain0. Inside the VMX domain, the CD-ROM will available as device {\small {\tt /dev/hdc}}. The entry can also point to an ISO file.\\
+{\scriptsize {\tt disk = ['file:/var/images/image1.img,ioemu:hda,w', 'phy:hda1,hdb1,w', 'file:/var/images/install1.iso,hdc:cdrom,r']}}\\
-boot & Boot from floppy (a), hard disk (c) or CD-ROM (d). For example, to boot from CD-ROM, the entry should be:
+boot & Boot from floppy (a), hard disk (c) or CD-ROM (d). For example, to boot from CD-ROM and fallback to HD, the entry should be:
-boot='d'\\
+boot='dc'\\
-device\_model & The device emulation tool for VMX guests. This parameter should not be changed.\\
+device\_model & The device emulation tool for HVM guests. This parameter should not be changed.\\
sdl & Enable SDL library for graphics, default = 0 (disabled)\\
vnc & Enable VNC library for graphics, default = 1 (enabled)\\
-vncviewer & Enable spawning of the vncviewer (only valid when vnc=1), default = 1 (enabled)
+vncconsole & Enable spawning of the vncviewer (only valid when vnc=1), default = 0 (disabled)
-If vnc=1 and vncviewer=0, user can use vncviewer to manually connect VMX from remote. For example:
+If vnc=1 and vncconsole=0, user can use vncviewer to manually connect HVM from remote. For example:
-{\small {\tt vncviewer domain0\_IP\_address:VMX\_domain\_id}} \\
+{\small {\tt vncviewer domain0\_IP\_address:HVM\_domain\_id}} \\
-ne2000 & Enable ne2000, default = 0 (disabled; use pcnet)\\
-
-serial & Enable redirection of VMX serial output to pty device\\
+serial & Enable redirection of HVM serial output to pty device\\
\end{tabular}
@@ -4416,9 +4429,9 @@ Details about mouse emulation are provided in section \textbf{A.4.3}.\\
localtime & Set the real time clock to local time [default=0, that is, set to UTC].\\
-enable-audio & Enable audio support. This is under development.\\
+soundhw & Enable sound card support and specify the hardware to emulate. Values can be sb16, es1370 or all. Default is none.\\
-full-screen & Start in full screen. This is under development.\\
+full-screen & Start in full screen.\\
nographic & Another way to redirect serial output. If enabled, no 'sdl' or 'vnc' can work. Not recommended.\\
@@ -4430,18 +4443,18 @@ nographic & Another way to redirect serial output. If enabled, no 'sdl' or '
If you are using a physical disk or physical disk partition, you need to install a Linux OS on the disk first. Then the boot loader should be installed in the correct place. For example {\small {\tt dev/sda}} for booting from the whole disk, or {\small {\tt /dev/sda1}} for booting from partition 1.
\subsection{Using disk image files}
-You need to create a large empty disk image file first; then, you need to install a Linux OS onto it. There are two methods you can choose. One is directly installing it using a VMX guest while booting from the OS installation CD-ROM. The other is copying an installed OS into it. The boot loader will also need to be installed.
+You need to create a large empty disk image file first; then, you need to install a Linux OS onto it. There are two methods you can choose. One is directly installing it using a HVM guest while booting from the OS installation CD-ROM. The other is copying an installed OS into it. The boot loader will also need to be installed.
\subsubsection*{To create the image file:}
The image size should be big enough to accommodate the entire OS. This example assumes the size is 1G (which is probably too small for most OSes).
-{\small {\tt \# dd if=/dev/zero of=hd.img bs=1M count=1 seek=1023}}
+{\small {\tt \# dd if=/dev/zero of=hd.img bs=1M count=0 seek=1024}}
-\subsubsection*{To directly install Linux OS into an image file using a VMX guest:}
+\subsubsection*{To directly install Linux OS into an image file using a HVM guest:}
-Install Xen and create VMX with the original image file with booting from CD-ROM. Then it is just like a normal Linux OS installation. The VMX configuration file should have these two entries before creating:
+Install Xen and create HVM with the original image file with booting from CD-ROM. Then it is just like a normal Linux OS installation. The HVM configuration file should have a stanza for the CD-ROM as well as a boot device specification:
-{\small {\tt cdrom='/dev/cdrom'
+{\small {\tt disk=['file:/var/images/your-hd.img,hda,w', ',hdc:cdrom,r' ]
boot='d'}}
If this method does not succeed, you can choose the following method of copying an installed Linux OS into an image file.
@@ -4509,31 +4522,28 @@ none /sys sysfs efaults 0 0}}
Now, the guest OS image {\small {\tt hd.img}} is ready. You can also reference {\small {\tt http://free.oszoo.org}} for quickstart images. But make sure to install the boot loader.
-\subsection{Install Windows into an Image File using a VMX guest}
-In order to install a Windows OS, you should keep {\small {\tt acpi=0}} in your VMX configuration file.
-
-\section{VMX Guests}
-\subsection{Editing the Xen VMX config file}
-Make a copy of the example VMX configuration file {\small {\tt /etc/xen/xmeaxmple.vmx}} and edit the line that reads
+\section{HVM Guests}
+\subsection{Editing the Xen HVM config file}
+Make a copy of the example HVM configuration file {\small {\tt /etc/xen/xmexample.hvm}} and edit the line that reads
-{\small {\tt disk = [ 'file:/var/images/\emph{guest.img},ioemu:hda,w' ]}}
+{\small {\tt disk = [ 'file:/var/images/\emph{min-el3-i386.img},hda,w' ]}}
-replacing \emph{guest.img} with the name of the guest OS image file you just made.
+replacing \emph{min-el3-i386.img} with the name of the guest OS image file you just made.
-\subsection{Creating VMX guests}
-Simply follow the usual method of creating the guest, using the -f parameter and providing the filename of your VMX configuration file:\\
+\subsection{Creating HVM guests}
+Simply follow the usual method of creating the guest, providing the filename of your HVM configuration file:\\
{\small {\tt \# xend start\\
-\# xm create /etc/xen/vmxguest.vmx}}
+\# xm create /etc/xen/hvmguest.hvm}}
-In the default configuration, VNC is on and SDL is off. Therefore VNC windows will open when VMX guests are created. If you want to use SDL to create VMX guests, set {\small {\tt sdl=1}} in your VMX configuration file. You can also turn off VNC by setting {\small {\tt vnc=0}}.
+In the default configuration, VNC is on and SDL is off. Therefore VNC windows will open when HVM guests are created. If you want to use SDL to create HVM guests, set {\small {\tt sdl=1}} in your HVM configuration file. You can also turn off VNC by setting {\small {\tt vnc=0}}.
\subsection{Mouse issues, especially under VNC}
Mouse handling when using VNC is a little problematic.
The problem is that the VNC viewer provides a virtual pointer which is
located at an absolute location in the VNC window and only absolute
coordinates are provided.
-The VMX device model converts these absolute mouse coordinates
+The HVM device model converts these absolute mouse coordinates
into the relative motion deltas that are expected by the PS/2
mouse driver running in the guest.
Unfortunately,
@@ -4550,7 +4560,7 @@ there are no longer any left mouse deltas that
can be provided by the device model emulation code.)
To deal with these mouse issues there are 4 different
-mouse emulations available from the VMX device model:
+mouse emulations available from the HVM device model:
\begin{description}
\item[PS/2 mouse over the PS/2 port.]
@@ -4845,7 +4855,7 @@ vendor id
and product id
\textbf{310b}.
This device could be made available
-to the VMX guest by including the
+to the HVM guest by including the
config file entry
{\small
\begin{verbatim}
@@ -4959,7 +4969,7 @@ not the guest can see a USB mouse.}
will remove the USB mouse
driver from the Dom0 kernel
and the mouse will now be
-accessible by the VMX guest.
+accessible by the HVM guest.
Be aware the the Linux USB
hotplug system will reload
@@ -4981,26 +4991,25 @@ just to make sure it doesn't get
reloaded.
\end{description}
-\subsection{Destroy VMX guests}
-VMX guests can be destroyed in the same way as can paravirtualized guests. We recommend that you type the command
+\subsection{Destroy HVM guests}
+HVM guests can be destroyed in the same way as can paravirtualized guests. We recommend that you shut-down the guest using the guest OS' provided method, for Linux, type the command
{\small {\tt poweroff}}
-in the VMX guest's console first to prevent data loss. Then execute the command
+in the HVM guest's console, for Windows use Start -> Shutdown first to prevent
+data loss. Depending on the configuration the guest will be automatically
+destroyed, otherwise execute the command
{\small {\tt xm destroy \emph{vmx\_guest\_id} }}
at the Domain0 console.
-\subsection{VMX window (X or VNC) Hot Key}
-If you are running in the X environment after creating a VMX guest, an X window is created. There are several hot keys for control of the VMX guest that can be used in the window.
+\subsection{HVM window (X or VNC) Hot Key}
+If you are running in the X environment after creating a HVM guest, an X window is created. There are several hot keys for control of the HVM guest that can be used in the window.
-{\bfseries Ctrl+Alt+2} switches from guest VGA window to the control window. Typing {\small {\tt help }} shows the control commands help. For example, 'q' is the command to destroy the VMX guest.\\
-{\bfseries Ctrl+Alt+1} switches back to VMX guest's VGA.\\
-{\bfseries Ctrl+Alt+3} switches to serial port output. It captures serial output from the VMX guest. It works only if the VMX guest was configured to use the serial port. \\
-
-\subsection{Save/Restore and Migration}
-VMX guests currently cannot be saved and restored, nor migrated. These features are currently under active development.
+{\bfseries Ctrl+Alt+2} switches from guest VGA window to the control window. Typing {\small {\tt help }} shows the control commands help. For example, 'q' is the command to destroy the HVM guest.\\
+{\bfseries Ctrl+Alt+1} switches back to HVM guest's VGA.\\
+{\bfseries Ctrl+Alt+3} switches to serial port output. It captures serial output from the HVM guest. It works only if the HVM guest was configured to use the serial port. \\
\chapter{Vnets - Domain Virtual Networking}
diff --git a/docs/xen-api/revision-history.tex b/docs/xen-api/revision-history.tex
index d282adb106..cf4a211f62 100644
--- a/docs/xen-api/revision-history.tex
+++ b/docs/xen-api/revision-history.tex
@@ -23,12 +23,19 @@
\end{flushleft}
\end{minipage}\\
\hline
- 1.0.2 & 11th Feb. 08 & S. Berger &
+ 1.0.3 & 11th Feb. 08 & S. Berger &
\begin{minipage}[t]{7cm}
\begin{flushleft}
Added table of contents and hyperlink cross reference.
\end{flushleft}
\end{minipage}\\
\hline
+ 1.0.4 & 23rd March 08 & S. Berger &
+ \begin{minipage}[t]{7cm}
+ \begin{flushleft}
+ Added XSPolicy.can\_run
+ \end{flushleft}
+ \end{minipage}\\
+ \hline
\end{tabular}
-\end{center} \ No newline at end of file
+\end{center}
diff --git a/docs/xen-api/xenapi-coversheet.tex b/docs/xen-api/xenapi-coversheet.tex
index c35190acfe..ab6f0e3eff 100644
--- a/docs/xen-api/xenapi-coversheet.tex
+++ b/docs/xen-api/xenapi-coversheet.tex
@@ -22,7 +22,7 @@
\newcommand{\releasestatement}{Stable Release}
%% Document revision
-\newcommand{\revstring}{API Revision 1.0.2}
+\newcommand{\revstring}{API Revision 1.0.4}
%% Document authors
\newcommand{\docauthors}{
diff --git a/docs/xen-api/xenapi-datamodel.tex b/docs/xen-api/xenapi-datamodel.tex
index 102b59e04c..a9ebaad545 100644
--- a/docs/xen-api/xenapi-datamodel.tex
+++ b/docs/xen-api/xenapi-datamodel.tex
@@ -14938,6 +14938,41 @@ Currently active instantiation flags.
\vspace{0.3cm}
\vspace{0.3cm}
\vspace{0.3cm}
+\subsubsection{RPC name:~can\_run}
+
+{\bf Overview:}
+Check whether a VM with the given security label could run on the system.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int can_run (session_id s, string security_label)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt string } & security_label & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+Error code indicating whether a VM with the given security label could run.
+If zero, it can run.
+
+\vspace{0.3cm}
+
+\noindent{\bf Possible Error Codes:} {\tt SECURITY\_ERROR}
+
\subsubsection{RPC name:~get\_all}
{\bf Overview:}
diff --git a/extras/mini-os/blkfront.c b/extras/mini-os/blkfront.c
index 3a6c22aceb..b227f06247 100644
--- a/extras/mini-os/blkfront.c
+++ b/extras/mini-os/blkfront.c
@@ -319,6 +319,7 @@ int blkfront_aio_poll(struct blkfront_dev *dev)
{
RING_IDX rp, cons;
struct blkif_response *rsp;
+ int more;
moretodo:
#ifdef HAVE_LIBC
@@ -334,6 +335,7 @@ moretodo:
while ((cons != rp))
{
rsp = RING_GET_RESPONSE(&dev->ring, cons);
+ nr_consumed++;
if (rsp->status != BLKIF_RSP_OKAY)
printk("block error %d for op %d\n", rsp->status, rsp->operation);
@@ -343,29 +345,30 @@ moretodo:
case BLKIF_OP_WRITE:
{
struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id;
+ int status = rsp->status;
int j;
for (j = 0; j < aiocbp->n; j++)
gnttab_end_access(aiocbp->gref[j]);
+ dev->ring.rsp_cons = ++cons;
/* Nota: callback frees aiocbp itself */
- aiocbp->aio_cb(aiocbp, rsp->status ? -EIO : 0);
+ aiocbp->aio_cb(aiocbp, status ? -EIO : 0);
+ if (dev->ring.rsp_cons != cons)
+ /* We reentered, we must not continue here */
+ goto out;
break;
}
- case BLKIF_OP_WRITE_BARRIER:
- case BLKIF_OP_FLUSH_DISKCACHE:
- break;
default:
printk("unrecognized block operation %d response\n", rsp->operation);
+ case BLKIF_OP_WRITE_BARRIER:
+ case BLKIF_OP_FLUSH_DISKCACHE:
+ dev->ring.rsp_cons = ++cons;
break;
}
-
- nr_consumed++;
- ++cons;
}
- dev->ring.rsp_cons = cons;
- int more;
+out:
RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more);
if (more) goto moretodo;
diff --git a/extras/mini-os/gnttab.c b/extras/mini-os/gnttab.c
index 5c2dcea863..dd66b043bf 100644
--- a/extras/mini-os/gnttab.c
+++ b/extras/mini-os/gnttab.c
@@ -32,6 +32,9 @@
static grant_entry_t *gnttab_table;
static grant_ref_t gnttab_list[NR_GRANT_ENTRIES];
+#ifdef GNT_DEBUG
+static char inuse[NR_GRANT_ENTRIES];
+#endif
static __DECLARE_SEMAPHORE_GENERIC(gnttab_sem, NR_GRANT_ENTRIES);
static void
@@ -39,6 +42,10 @@ put_free_entry(grant_ref_t ref)
{
unsigned long flags;
local_irq_save(flags);
+#ifdef GNT_DEBUG
+ BUG_ON(!inuse[ref]);
+ inuse[ref] = 0;
+#endif
gnttab_list[ref] = gnttab_list[0];
gnttab_list[0] = ref;
local_irq_restore(flags);
@@ -54,6 +61,10 @@ get_free_entry(void)
local_irq_save(flags);
ref = gnttab_list[0];
gnttab_list[0] = gnttab_list[ref];
+#ifdef GNT_DEBUG
+ BUG_ON(inuse[ref]);
+ inuse[ref] = 1;
+#endif
local_irq_restore(flags);
return ref;
}
@@ -92,10 +103,12 @@ gnttab_end_access(grant_ref_t ref)
{
u16 flags, nflags;
+ BUG_ON(ref >= NR_GRANT_ENTRIES || ref < NR_RESERVED_ENTRIES);
+
nflags = gnttab_table[ref].flags;
do {
if ((flags = nflags) & (GTF_reading|GTF_writing)) {
- printk("WARNING: g.e. still in use!\n");
+ printk("WARNING: g.e. still in use! (%x)\n", flags);
return 0;
}
} while ((nflags = synch_cmpxchg(&gnttab_table[ref].flags, flags, 0)) !=
@@ -111,6 +124,8 @@ gnttab_end_transfer(grant_ref_t ref)
unsigned long frame;
u16 flags;
+ BUG_ON(ref >= NR_GRANT_ENTRIES || ref < NR_RESERVED_ENTRIES);
+
while (!((flags = gnttab_table[ref].flags) & GTF_transfer_committed)) {
if (synch_cmpxchg(&gnttab_table[ref].flags, flags, 0) == flags) {
printk("Release unused transfer grant.\n");
@@ -164,6 +179,9 @@ init_gnttab(void)
unsigned long frames[NR_GRANT_FRAMES];
int i;
+#ifdef GNT_DEBUG
+ memset(inuse, 1, sizeof(inuse));
+#endif
for (i = NR_RESERVED_ENTRIES; i < NR_GRANT_ENTRIES; i++)
put_free_entry(i);
diff --git a/extras/mini-os/kernel.c b/extras/mini-os/kernel.c
index 96535e5ec7..a8596c8471 100644
--- a/extras/mini-os/kernel.c
+++ b/extras/mini-os/kernel.c
@@ -341,7 +341,7 @@ static void kbdfront_thread(void *p)
{
struct kbdfront_dev *kbd_dev;
DEFINE_WAIT(w);
- int x = WIDTH / 2, y = HEIGHT / 2, z;
+ int x = WIDTH / 2, y = HEIGHT / 2, z = 0;
kbd_dev = init_kbdfront(NULL, 1);
if (!kbd_dev)
diff --git a/extras/mini-os/minios.mk b/extras/mini-os/minios.mk
index 078c396fc2..2282a187e2 100644
--- a/extras/mini-os/minios.mk
+++ b/extras/mini-os/minios.mk
@@ -16,6 +16,10 @@ DEF_LDFLAGS =
ifeq ($(debug),y)
DEF_CFLAGS += -g
+#DEF_CFLAGS += -DMM_DEBUG
+#DEF_CFLAGS += -DFS_DEBUG
+#DEF_CFLAGS += -DLIBC_DEBUG
+DEF_CFLAGS += -DGNT_DEBUG
else
DEF_CFLAGS += -O3
endif
diff --git a/extras/mini-os/netfront.c b/extras/mini-os/netfront.c
index 3159c30445..acdc76d994 100644
--- a/extras/mini-os/netfront.c
+++ b/extras/mini-os/netfront.c
@@ -120,6 +120,7 @@ moretodo:
if (rx->status == NETIF_RSP_NULL) continue;
int id = rx->id;
+ BUG_ON(id >= NET_TX_RING_SIZE);
buf = &dev->rx_buffers[id];
page = (unsigned char*)buf->page;
@@ -204,6 +205,7 @@ void network_tx_buf_gc(struct netfront_dev *dev)
printk("packet error\n");
id = txrsp->id;
+ BUG_ON(id >= NET_TX_RING_SIZE);
struct net_buffer* buf = &dev->tx_buffers[id];
gnttab_end_access(buf->gref);
buf->gref=GRANT_INVALID_REF;
@@ -510,6 +512,8 @@ void netfront_xmit(struct netfront_dev *dev, unsigned char* data,int len)
struct net_buffer* buf;
void* page;
+ BUG_ON(len > PAGE_SIZE);
+
down(&dev->tx_sem);
local_irq_save(flags);
diff --git a/tools/blktap/drivers/block-qcow.c b/tools/blktap/drivers/block-qcow.c
index f12fd90c4c..25838b44f9 100644
--- a/tools/blktap/drivers/block-qcow.c
+++ b/tools/blktap/drivers/block-qcow.c
@@ -76,6 +76,7 @@
#define QCOW_OFLAG_COMPRESSED (1LL << 63)
#define SPARSE_FILE 0x01
+#define EXTHDR_L1_BIG_ENDIAN 0x02
#ifndef O_BINARY
#define O_BINARY 0
@@ -147,19 +148,30 @@ static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset);
static uint32_t gen_cksum(char *ptr, int len)
{
+ int i;
unsigned char *md;
uint32_t ret;
md = malloc(MD5_DIGEST_LENGTH);
if(!md) return 0;
+
+ /* Convert L1 table to big endian */
+ for(i = 0; i < len / sizeof(uint64_t); i++) {
+ cpu_to_be64s(&((uint64_t*) ptr)[i]);
+ }
- if (MD5((unsigned char *)ptr, len, md) != md) {
- free(md);
- return 0;
+ /* Generate checksum */
+ if (MD5((unsigned char *)ptr, len, md) != md)
+ ret = 0;
+ else
+ memcpy(&ret, md, sizeof(uint32_t));
+
+ /* Convert L1 table back to native endianess */
+ for(i = 0; i < len / sizeof(uint64_t); i++) {
+ be64_to_cpus(&((uint64_t*) ptr)[i]);
}
- memcpy(&ret, md, sizeof(uint32_t));
free(md);
return ret;
}
@@ -354,7 +366,8 @@ static uint64_t get_cluster_offset(struct tdqcow_state *s,
int n_start, int n_end)
{
int min_index, i, j, l1_index, l2_index, l2_sector, l1_sector;
- char *tmp_ptr, *tmp_ptr2, *l2_ptr, *l1_ptr;
+ char *tmp_ptr2, *l2_ptr, *l1_ptr;
+ uint64_t *tmp_ptr;
uint64_t l2_offset, *l2_table, cluster_offset, tmp;
uint32_t min_count;
int new_l2_table;
@@ -401,6 +414,11 @@ static uint64_t get_cluster_offset(struct tdqcow_state *s,
}
memcpy(tmp_ptr, l1_ptr, 4096);
+ /* Convert block to write to big endian */
+ for(i = 0; i < 4096 / sizeof(uint64_t); i++) {
+ cpu_to_be64s(&tmp_ptr[i]);
+ }
+
/*
* Issue non-asynchronous L1 write.
* For safety, we must ensure that
@@ -777,7 +795,7 @@ int tdqcow_open (struct disk_driver *dd, const char *name, td_flag_t flags)
goto fail;
for(i = 0; i < s->l1_size; i++) {
- //be64_to_cpus(&s->l1_table[i]);
+ be64_to_cpus(&s->l1_table[i]);
//DPRINTF("L1[%d] => %llu\n", i, s->l1_table[i]);
if (s->l1_table[i] > final_cluster)
final_cluster = s->l1_table[i];
@@ -810,6 +828,38 @@ int tdqcow_open (struct disk_driver *dd, const char *name, td_flag_t flags)
be32_to_cpus(&exthdr->xmagic);
if(exthdr->xmagic != XEN_MAGIC)
goto end_xenhdr;
+
+ /* Try to detect old tapdisk images. They have to be fixed because
+ * they don't use big endian but native endianess for the L1 table */
+ if ((exthdr->flags & EXTHDR_L1_BIG_ENDIAN) == 0) {
+
+ /*
+ The image is broken. Fix it. The L1 table has already been
+ byte-swapped, so we can write it to the image file as it is
+ currently in memory. Then swap it back to native endianess
+ for operation.
+ */
+
+ DPRINTF("qcow: Converting image to big endian L1 table\n");
+
+ lseek(fd, s->l1_table_offset, SEEK_SET);
+ if (write(fd, s->l1_table, l1_table_size) != l1_table_size) {
+ DPRINTF("qcow: Failed to write new L1 table\n");
+ goto fail;
+ }
+
+ for(i = 0;i < s->l1_size; i++) {
+ cpu_to_be64s(&s->l1_table[i]);
+ }
+
+ /* Write the big endian flag to the extended header */
+ exthdr->flags |= EXTHDR_L1_BIG_ENDIAN;
+
+ if (write(fd, buf, 512) != 512) {
+ DPRINTF("qcow: Failed to write extended header\n");
+ goto fail;
+ }
+ }
/*Finally check the L1 table cksum*/
be32_to_cpus(&exthdr->cksum);
diff --git a/tools/firmware/hvmloader/Makefile b/tools/firmware/hvmloader/Makefile
index 0defed1264..eca4c7cbdf 100644
--- a/tools/firmware/hvmloader/Makefile
+++ b/tools/firmware/hvmloader/Makefile
@@ -42,16 +42,21 @@ OBJS = $(patsubst %.c,%.o,$(SRCS))
.PHONY: all
all: hvmloader
-hvmloader: roms.h subdirs-all $(SRCS)
- $(CC) $(CFLAGS) -c $(SRCS)
- $(LD) $(LDFLAGS_DIRECT) -N -Ttext $(LOADADDR) -o hvmloader.tmp $(OBJS) acpi/acpi.a
+smbios.o: CFLAGS += -D__SMBIOS_DATE__="\"$(shell date +%m/%d/%Y)\""
+
+hvmloader: roms.h subdirs-all $(OBJS)
+ $(LD) $(LDFLAGS_DIRECT) -N -Ttext $(LOADADDR) \
+ -o hvmloader.tmp $(OBJS) acpi/acpi.a
$(OBJCOPY) hvmloader.tmp hvmloader
rm -f hvmloader.tmp
-roms.h: ../rombios/BIOS-bochs-latest ../vgabios/VGABIOS-lgpl-latest.bin ../vgabios/VGABIOS-lgpl-latest.cirrus.bin ../etherboot/eb-roms.h ../extboot/extboot.bin
+roms.h: ../rombios/BIOS-bochs-latest ../vgabios/VGABIOS-lgpl-latest.bin \
+ ../vgabios/VGABIOS-lgpl-latest.cirrus.bin ../etherboot/eb-roms.h \
+ ../extboot/extboot.bin
sh ./mkhex rombios ../rombios/BIOS-bochs-latest > roms.h
sh ./mkhex vgabios_stdvga ../vgabios/VGABIOS-lgpl-latest.bin >> roms.h
- sh ./mkhex vgabios_cirrusvga ../vgabios/VGABIOS-lgpl-latest.cirrus.bin >> roms.h
+ sh ./mkhex vgabios_cirrusvga \
+ ../vgabios/VGABIOS-lgpl-latest.cirrus.bin >> roms.h
cat ../etherboot/eb-roms.h >> roms.h
sh ./mkhex extboot ../extboot/extboot.bin >> roms.h
diff --git a/tools/firmware/hvmloader/hvmloader.c b/tools/firmware/hvmloader/hvmloader.c
index 243f7e9b46..8bbcf6d6d0 100644
--- a/tools/firmware/hvmloader/hvmloader.c
+++ b/tools/firmware/hvmloader/hvmloader.c
@@ -420,6 +420,8 @@ int main(void)
init_hypercalls();
+ printf("CPU speed is %u MHz\n", get_cpu_mhz());
+
printf("Writing SMBIOS tables ...\n");
smbios_sz = hvm_write_smbios_tables();
diff --git a/tools/firmware/hvmloader/smbios.c b/tools/firmware/hvmloader/smbios.c
index 07d9e7b848..f124629925 100644
--- a/tools/firmware/hvmloader/smbios.c
+++ b/tools/firmware/hvmloader/smbios.c
@@ -21,6 +21,7 @@
*/
#include <stdint.h>
+#include <xen/xen.h>
#include <xen/version.h>
#include "smbios_types.h"
#include "util.h"
@@ -246,13 +247,14 @@ smbios_entry_point_init(void *start,
int i;
struct smbios_entry_point *ep = (struct smbios_entry_point *)start;
+ memset(ep, 0, sizeof(*ep));
+
strncpy(ep->anchor_string, "_SM_", 4);
ep->length = 0x1f;
ep->smbios_major_version = 2;
ep->smbios_minor_version = 4;
ep->max_structure_size = max_structure_size;
ep->entry_point_revision = 0;
- memset(ep->formatted_area, 0, 5);
strncpy(ep->intermediate_anchor_string, "_DMI_", 5);
ep->structure_table_length = structure_table_length;
@@ -260,9 +262,6 @@ smbios_entry_point_init(void *start,
ep->number_of_structures = number_of_structures;
ep->smbios_bcd_revision = 0x24;
- ep->checksum = 0;
- ep->intermediate_checksum = 0;
-
sum = 0;
for ( i = 0; i < 0x10; i++ )
sum += ((int8_t *)start)[i];
@@ -280,22 +279,27 @@ smbios_type_0_init(void *start, const char *xen_version,
uint32_t xen_major_version, uint32_t xen_minor_version)
{
struct smbios_type_0 *p = (struct smbios_type_0 *)start;
-
+ static const char *smbios_release_date = __SMBIOS_DATE__;
+
+ memset(p, 0, sizeof(*p));
+
p->header.type = 0;
p->header.length = sizeof(struct smbios_type_0);
p->header.handle = 0;
-
+
p->vendor_str = 1;
p->version_str = 2;
p->starting_address_segment = 0xe800;
- p->release_date_str = 0;
+ p->release_date_str = 3;
p->rom_size = 0;
-
- memset(p->characteristics, 0, 8);
- p->characteristics[7] = 0x08; /* BIOS characteristics not supported */
- p->characteristics_extension_bytes[0] = 0;
- p->characteristics_extension_bytes[1] = 0;
-
+
+ /* BIOS Characteristics. */
+ p->characteristics[0] = 0x80; /* PCI is supported */
+ p->characteristics[2] = 0x08; /* EDD is supported */
+
+ /* Extended Characteristics: Enable Targeted Content Distribution. */
+ p->characteristics_extension_bytes[1] = 0x04;
+
p->major_release = (uint8_t) xen_major_version;
p->minor_release = (uint8_t) xen_minor_version;
p->embedded_controller_major = 0xff;
@@ -306,6 +310,8 @@ smbios_type_0_init(void *start, const char *xen_version,
start += strlen("Xen") + 1;
strcpy((char *)start, xen_version);
start += strlen(xen_version) + 1;
+ strcpy((char *)start, smbios_release_date);
+ start += strlen(smbios_release_date) + 1;
*((uint8_t *)start) = 0;
return start + 1;
@@ -318,6 +324,9 @@ smbios_type_1_init(void *start, const char *xen_version,
{
char uuid_str[37];
struct smbios_type_1 *p = (struct smbios_type_1 *)start;
+
+ memset(p, 0, sizeof(*p));
+
p->header.type = 1;
p->header.length = sizeof(struct smbios_type_1);
p->header.handle = 0x100;
@@ -355,6 +364,8 @@ smbios_type_3_init(void *start)
{
struct smbios_type_3 *p = (struct smbios_type_3 *)start;
+ memset(p, 0, sizeof(*p));
+
p->header.type = 3;
p->header.length = sizeof(struct smbios_type_3);
p->header.handle = 0x300;
@@ -379,12 +390,15 @@ smbios_type_3_init(void *start)
/* Type 4 -- Processor Information */
static void *
-smbios_type_4_init(void *start, unsigned int cpu_number, char *cpu_manufacturer)
+smbios_type_4_init(
+ void *start, unsigned int cpu_number, char *cpu_manufacturer)
{
char buf[80];
struct smbios_type_4 *p = (struct smbios_type_4 *)start;
uint32_t eax, ebx, ecx, edx;
+ memset(p, 0, sizeof(*p));
+
p->header.type = 4;
p->header.length = sizeof(struct smbios_type_4);
p->header.handle = 0x400 + cpu_number;
@@ -403,8 +417,7 @@ smbios_type_4_init(void *start, unsigned int cpu_number, char *cpu_manufacturer)
p->voltage = 0;
p->external_clock = 0;
- p->max_speed = 0; /* unknown */
- p->current_speed = 0; /* unknown */
+ p->max_speed = p->current_speed = get_cpu_mhz();
p->status = 0x41; /* socket populated, CPU enabled */
p->upgrade = 0x01; /* other */
@@ -431,6 +444,8 @@ smbios_type_16_init(void *start, uint32_t memsize)
{
struct smbios_type_16 *p = (struct smbios_type_16*)start;
+ memset(p, 0, sizeof(*p));
+
p->header.type = 16;
p->header.handle = 0x1000;
p->header.length = sizeof(struct smbios_type_16);
@@ -453,6 +468,8 @@ smbios_type_17_init(void *start, uint32_t memory_size_mb)
{
struct smbios_type_17 *p = (struct smbios_type_17 *)start;
+ memset(p, 0, sizeof(*p));
+
p->header.type = 17;
p->header.length = sizeof(struct smbios_type_17);
p->header.handle = 0x1100;
@@ -484,6 +501,8 @@ smbios_type_19_init(void *start, uint32_t memory_size_mb)
{
struct smbios_type_19 *p = (struct smbios_type_19 *)start;
+ memset(p, 0, sizeof(*p));
+
p->header.type = 19;
p->header.length = sizeof(struct smbios_type_19);
p->header.handle = 0x1300;
@@ -504,6 +523,8 @@ smbios_type_20_init(void *start, uint32_t memory_size_mb)
{
struct smbios_type_20 *p = (struct smbios_type_20 *)start;
+ memset(p, 0, sizeof(*p));
+
p->header.type = 20;
p->header.length = sizeof(struct smbios_type_20);
p->header.handle = 0x1400;
@@ -528,6 +549,8 @@ smbios_type_32_init(void *start)
{
struct smbios_type_32 *p = (struct smbios_type_32 *)start;
+ memset(p, 0, sizeof(*p));
+
p->header.type = 32;
p->header.length = sizeof(struct smbios_type_32);
p->header.handle = 0x2000;
@@ -545,6 +568,8 @@ smbios_type_127_init(void *start)
{
struct smbios_type_127 *p = (struct smbios_type_127 *)start;
+ memset(p, 0, sizeof(*p));
+
p->header.type = 127;
p->header.length = sizeof(struct smbios_type_127);
p->header.handle = 0x7f00;
diff --git a/tools/firmware/hvmloader/util.c b/tools/firmware/hvmloader/util.c
index cb3fd754e7..b57575fe9a 100644
--- a/tools/firmware/hvmloader/util.c
+++ b/tools/firmware/hvmloader/util.c
@@ -21,7 +21,10 @@
#include "util.h"
#include "config.h"
#include "e820.h"
+#include "hypercall.h"
#include <stdint.h>
+#include <xen/xen.h>
+#include <xen/memory.h>
#include <xen/hvm/hvm_info_table.h>
void outb(uint16_t addr, uint8_t val)
@@ -585,6 +588,56 @@ int get_apic_mode(void)
return (t ? t->apic_mode : 1);
}
+uint16_t get_cpu_mhz(void)
+{
+ struct xen_add_to_physmap xatp;
+ struct shared_info *shared_info = (struct shared_info *)0xa0000;
+ struct vcpu_time_info *info = &shared_info->vcpu_info[0].time;
+ uint64_t cpu_khz;
+ uint32_t tsc_to_nsec_mul, version;
+ int8_t tsc_shift;
+
+ static uint16_t cpu_mhz;
+ if ( cpu_mhz != 0 )
+ return cpu_mhz;
+
+ /* Map shared-info page to 0xa0000 (i.e., overlap VGA hole). */
+ xatp.domid = DOMID_SELF;
+ xatp.space = XENMAPSPACE_shared_info;
+ xatp.idx = 0;
+ xatp.gpfn = (unsigned long)shared_info >> 12;
+ if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 )
+ BUG();
+
+ /* Get a consistent snapshot of scale factor (multiplier and shift). */
+ do {
+ version = info->version;
+ rmb();
+ tsc_to_nsec_mul = info->tsc_to_system_mul;
+ tsc_shift = info->tsc_shift;
+ rmb();
+ } while ((version & 1) | (version ^ info->version));
+
+ /* Compute CPU speed in kHz. */
+ cpu_khz = 1000000ull << 32;
+ do_div(cpu_khz, tsc_to_nsec_mul);
+ if ( tsc_shift < 0 )
+ cpu_khz = cpu_khz << -tsc_shift;
+ else
+ cpu_khz = cpu_khz >> tsc_shift;
+
+ /* Get the VGA MMIO hole back by remapping shared info to scratch. */
+ xatp.domid = DOMID_SELF;
+ xatp.space = XENMAPSPACE_shared_info;
+ xatp.idx = 0;
+ xatp.gpfn = 0xfffff; /* scratch pfn */
+ if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 )
+ BUG();
+
+ cpu_mhz = (uint16_t)(((uint32_t)cpu_khz + 500) / 1000);
+ return cpu_mhz;
+}
+
/*
* Local variables:
* mode: C
diff --git a/tools/firmware/hvmloader/util.h b/tools/firmware/hvmloader/util.h
index 6ce796b3cc..ac06983d9b 100644
--- a/tools/firmware/hvmloader/util.h
+++ b/tools/firmware/hvmloader/util.h
@@ -10,11 +10,11 @@
#undef NULL
#define NULL ((void*)0)
-extern void __assert_failed(char *assertion, char *file, int line)
+void __assert_failed(char *assertion, char *file, int line)
__attribute__((noreturn));
#define ASSERT(p) \
do { if (!(p)) __assert_failed(#p, __FILE__, __LINE__); } while (0)
-extern void __bug(char *file, int line) __attribute__((noreturn));
+void __bug(char *file, int line) __attribute__((noreturn));
#define BUG() __bug(__FILE__, __LINE__)
#define BUG_ON(p) do { if (p) BUG(); } while (0)
#define BUILD_BUG_ON(p) ((void)sizeof(char[1 - 2 * !!(p)]))
@@ -49,10 +49,54 @@ void pci_write(uint32_t devfn, uint32_t reg, uint32_t len, uint32_t val);
#define pci_writew(devfn, reg, val) (pci_write(devfn, reg, 2, (uint16_t)val))
#define pci_writel(devfn, reg, val) (pci_write(devfn, reg, 4, (uint32_t)val))
+/* Get CPU speed in MHz. */
+uint16_t get_cpu_mhz(void);
+
/* Do cpuid instruction, with operation 'idx' */
void cpuid(uint32_t idx, uint32_t *eax, uint32_t *ebx,
uint32_t *ecx, uint32_t *edx);
+/* Read the TSC register. */
+static inline uint64_t rdtsc(void)
+{
+ uint64_t tsc;
+ asm volatile ( "rdtsc" : "=A" (tsc) );
+ return tsc;
+}
+
+/* Relax the CPU and let the compiler know that time passes. */
+static inline void cpu_relax(void)
+{
+ asm volatile ( "rep ; nop" : : : "memory" );
+}
+
+/* Memory barriers. */
+#define barrier() asm volatile ( "" : : : "memory" )
+#define rmb() barrier()
+#define wmb() barrier()
+
+/*
+ * Divide a 64-bit dividend by a 32-bit divisor.
+ * (1) Overwrites the 64-bit dividend _in_place_ with the quotient
+ * (2) Returns the 32-bit remainder
+ */
+#define do_div(n, base) ({ \
+ unsigned long __upper, __low, __high, __mod, __base; \
+ __base = (base); \
+ asm ( "" : "=a" (__low), "=d" (__high) : "A" (n) ); \
+ __upper = __high; \
+ if ( __high ) \
+ { \
+ __upper = __high % (__base); \
+ __high = __high / (__base); \
+ } \
+ asm ( "divl %2" \
+ : "=a" (__low), "=d" (__mod) \
+ : "rm" (__base), "0" (__low), "1" (__upper) ); \
+ asm ( "" : "=A" (n) : "a" (__low), "d" (__high) ); \
+ __mod; \
+})
+
/* HVM-builder info. */
int get_vcpu_nr(void);
int get_acpi_enabled(void);
diff --git a/tools/include/xen-foreign/mkheader.py b/tools/include/xen-foreign/mkheader.py
index ba6f98cfce..8b249e3dfc 100644
--- a/tools/include/xen-foreign/mkheader.py
+++ b/tools/include/xen-foreign/mkheader.py
@@ -37,8 +37,8 @@ inttypes["x86_64"] = {
"xen_pfn_t" : "__align8__ uint64_t",
};
header["x86_64"] = """
-#ifdef __GNUC__
-# define __DECL_REG(name) __extension__ union { uint64_t r ## name, e ## name; }
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+# define __DECL_REG(name) union { uint64_t r ## name, e ## name; }
# define __align8__ __attribute__((aligned (8)))
#else
# define __DECL_REG(name) uint64_t r ## name
diff --git a/tools/ioemu/block-qcow.c b/tools/ioemu/block-qcow.c
index 458450dc08..d7dacc75be 100644
--- a/tools/ioemu/block-qcow.c
+++ b/tools/ioemu/block-qcow.c
@@ -37,6 +37,11 @@
#define QCOW_OFLAG_COMPRESSED (1LL << 63)
+#define XEN_MAGIC (('X' << 24) | ('E' << 16) | ('N' << 8) | 0xfb)
+
+#define EXTHDR_SPARSE_FILE 0x01
+#define EXTHDR_L1_BIG_ENDIAN 0x02
+
typedef struct QCowHeader {
uint32_t magic;
uint32_t version;
@@ -50,6 +55,14 @@ typedef struct QCowHeader {
uint64_t l1_table_offset;
} QCowHeader;
+/*Extended header for Xen enhancements*/
+typedef struct QCowHeader_ext {
+ uint32_t xmagic;
+ uint32_t cksum;
+ uint32_t min_cluster_alloc;
+ uint32_t flags;
+} QCowHeader_ext;
+
#define L2_CACHE_SIZE 16
typedef struct BDRVQcowState {
@@ -137,6 +150,51 @@ static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) !=
s->l1_size * sizeof(uint64_t))
goto fail;
+
+ /* Try to detect old tapdisk images. They have to be fixed because they
+ * don't use big endian but native endianess for the L1 table */
+ if (header.backing_file_offset == 0 && s->l1_table_offset % 4096 == 0) {
+
+ QCowHeader_ext exthdr;
+ uint64_t l1_bytes = s->l1_size * sizeof(uint64_t);
+
+ if (bdrv_pread(s->hd, sizeof(header), &exthdr, sizeof(exthdr))
+ != sizeof(exthdr))
+ goto end_xenhdr;
+
+ be32_to_cpus(&exthdr.xmagic);
+ if (exthdr.xmagic != XEN_MAGIC)
+ goto end_xenhdr;
+
+ be32_to_cpus(&exthdr.flags);
+ if (exthdr.flags & EXTHDR_L1_BIG_ENDIAN)
+ goto end_xenhdr;
+
+ /* The image is broken. Fix it. */
+ fprintf(stderr, "qcow: Converting image to big endian L1 table\n");
+
+ for(i = 0;i < s->l1_size; i++) {
+ cpu_to_be64s(&s->l1_table[i]);
+ }
+
+ if (bdrv_pwrite(s->hd, s->l1_table_offset, s->l1_table,
+ l1_bytes) != l1_bytes) {
+ fprintf(stderr, "qcow: Failed to write new L1 table\n");
+ goto fail;
+ }
+
+ exthdr.flags |= EXTHDR_L1_BIG_ENDIAN;
+ cpu_to_be32s(&exthdr.flags);
+
+ if (bdrv_pwrite(s->hd, sizeof(header), &exthdr, sizeof(exthdr))
+ != sizeof(exthdr)) {
+ fprintf(stderr, "qcow: Failed to write extended header\n");
+ goto fail;
+ }
+ }
+end_xenhdr:
+
+ /* L1 table is big endian now */
for(i = 0;i < s->l1_size; i++) {
be64_to_cpus(&s->l1_table[i]);
}
@@ -725,6 +783,13 @@ static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
qemu_aio_release(acb);
}
+static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BDRVQcowState *s = bs->opaque;
+ return bdrv_aio_flush(s->hd, cb, opaque);
+}
+
static void qcow_close(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
@@ -869,10 +934,10 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
return 0;
}
-static void qcow_flush(BlockDriverState *bs)
+static int qcow_flush(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
- bdrv_flush(s->hd);
+ return bdrv_flush(s->hd);
}
static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
@@ -899,6 +964,7 @@ BlockDriver bdrv_qcow = {
.bdrv_aio_read = qcow_aio_read,
.bdrv_aio_write = qcow_aio_write,
.bdrv_aio_cancel = qcow_aio_cancel,
+ .bdrv_aio_flush = qcow_aio_flush,
.aiocb_size = sizeof(QCowAIOCB),
.bdrv_write_compressed = qcow_write_compressed,
.bdrv_get_info = qcow_get_info,
diff --git a/tools/ioemu/block-qcow2.c b/tools/ioemu/block-qcow2.c
index a5f8255389..606716bdc1 100644
--- a/tools/ioemu/block-qcow2.c
+++ b/tools/ioemu/block-qcow2.c
@@ -1007,6 +1007,13 @@ static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
qemu_aio_release(acb);
}
+static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BDRVQcowState *s = bs->opaque;
+ return bdrv_aio_flush(s->hd, cb, opaque);
+}
+
static void qcow_close(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
@@ -1228,10 +1235,10 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
return 0;
}
-static void qcow_flush(BlockDriverState *bs)
+static int qcow_flush(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
- bdrv_flush(s->hd);
+ return bdrv_flush(s->hd);
}
static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
@@ -1886,6 +1893,8 @@ static int grow_refcount_table(BlockDriverState *bs, int min_size)
int64_t table_offset;
uint64_t data64;
uint32_t data32;
+ int old_table_size;
+ int64_t old_table_offset;
if (min_size <= s->refcount_table_size)
return 0;
@@ -1931,10 +1940,14 @@ static int grow_refcount_table(BlockDriverState *bs, int min_size)
&data32, sizeof(data32)) != sizeof(data32))
goto fail;
qemu_free(s->refcount_table);
+ old_table_offset = s->refcount_table_offset;
+ old_table_size = s->refcount_table_size;
s->refcount_table = new_table;
s->refcount_table_size = new_table_size;
+ s->refcount_table_offset = table_offset;
update_refcount(bs, table_offset, new_table_size2, 1);
+ free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
return 0;
fail:
free_clusters(bs, table_offset, new_table_size2);
@@ -2235,6 +2248,7 @@ BlockDriver bdrv_qcow2 = {
.bdrv_aio_read = qcow_aio_read,
.bdrv_aio_write = qcow_aio_write,
.bdrv_aio_cancel = qcow_aio_cancel,
+ .bdrv_aio_flush = qcow_aio_flush,
.aiocb_size = sizeof(QCowAIOCB),
.bdrv_write_compressed = qcow_write_compressed,
diff --git a/tools/ioemu/block-raw.c b/tools/ioemu/block-raw.c
index 182d2ec55e..cbc049f64e 100644
--- a/tools/ioemu/block-raw.c
+++ b/tools/ioemu/block-raw.c
@@ -496,6 +496,21 @@ static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
pacb = &acb->next;
}
}
+
+static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ RawAIOCB *acb;
+
+ acb = raw_aio_setup(bs, 0, NULL, 0, cb, opaque);
+ if (!acb)
+ return NULL;
+ if (aio_fsync(O_SYNC, &acb->aiocb) < 0) {
+ qemu_aio_release(acb);
+ return NULL;
+ }
+ return &acb->common;
+}
#endif
static void raw_close(BlockDriverState *bs)
@@ -600,10 +615,12 @@ static int raw_create(const char *filename, int64_t total_size,
return 0;
}
-static void raw_flush(BlockDriverState *bs)
+static int raw_flush(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
- fsync(s->fd);
+ if (fsync(s->fd))
+ return errno;
+ return 0;
}
BlockDriver bdrv_raw = {
@@ -621,6 +638,7 @@ BlockDriver bdrv_raw = {
.bdrv_aio_read = raw_aio_read,
.bdrv_aio_write = raw_aio_write,
.bdrv_aio_cancel = raw_aio_cancel,
+ .bdrv_aio_flush = raw_aio_flush,
.aiocb_size = sizeof(RawAIOCB),
#endif
.protocol_name = "file",
@@ -959,6 +977,7 @@ BlockDriver bdrv_host_device = {
.bdrv_aio_read = raw_aio_read,
.bdrv_aio_write = raw_aio_write,
.bdrv_aio_cancel = raw_aio_cancel,
+ .bdrv_aio_flush = raw_aio_flush,
.aiocb_size = sizeof(RawAIOCB),
#endif
.bdrv_pread = raw_pread,
diff --git a/tools/ioemu/block-vmdk.c b/tools/ioemu/block-vmdk.c
index 0558977abc..a8b8a35153 100644
--- a/tools/ioemu/block-vmdk.c
+++ b/tools/ioemu/block-vmdk.c
@@ -734,10 +734,10 @@ static void vmdk_close(BlockDriverState *bs)
vmdk_parent_close(s->hd);
}
-static void vmdk_flush(BlockDriverState *bs)
+static int vmdk_flush(BlockDriverState *bs)
{
BDRVVmdkState *s = bs->opaque;
- bdrv_flush(s->hd);
+ return bdrv_flush(s->hd);
}
BlockDriver bdrv_vmdk = {
diff --git a/tools/ioemu/block.c b/tools/ioemu/block.c
index 6e8f8cc737..8a730bd754 100644
--- a/tools/ioemu/block.c
+++ b/tools/ioemu/block.c
@@ -48,6 +48,8 @@ static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
int64_t sector_num, const uint8_t *buf, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque);
static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb);
+static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque);
static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors);
static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
@@ -155,6 +157,8 @@ void bdrv_register(BlockDriver *bdrv)
bdrv->bdrv_read = bdrv_read_em;
bdrv->bdrv_write = bdrv_write_em;
}
+ if (!bdrv->bdrv_aio_flush)
+ bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
bdrv->next = first_drv;
first_drv = bdrv;
}
@@ -885,12 +889,14 @@ const char *bdrv_get_device_name(BlockDriverState *bs)
return bs->device_name;
}
-void bdrv_flush(BlockDriverState *bs)
+int bdrv_flush(BlockDriverState *bs)
{
- if (bs->drv->bdrv_flush)
- bs->drv->bdrv_flush(bs);
- if (bs->backing_hd)
- bdrv_flush(bs->backing_hd);
+ int ret = 0;
+ if (bs->drv->bdrv_flush)
+ ret = bs->drv->bdrv_flush(bs);
+ if (!ret && bs->backing_hd)
+ ret = bdrv_flush(bs->backing_hd);
+ return ret;
}
void bdrv_info(void)
@@ -1138,6 +1144,17 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb)
drv->bdrv_aio_cancel(acb);
}
+BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (!drv)
+ return NULL;
+
+ return drv->bdrv_aio_flush(bs, cb, opaque);
+}
+
/**************************************************************/
/* async block device emulation */
@@ -1214,6 +1231,15 @@ static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
}
#endif /* !QEMU_TOOL */
+static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ int ret;
+ ret = bdrv_flush(bs);
+ cb(opaque, ret);
+ return NULL;
+}
+
/**************************************************************/
/* sync block device emulation */
diff --git a/tools/ioemu/block_int.h b/tools/ioemu/block_int.h
index c7a9d197b3..8c0318e795 100644
--- a/tools/ioemu/block_int.h
+++ b/tools/ioemu/block_int.h
@@ -36,7 +36,7 @@ struct BlockDriver {
void (*bdrv_close)(BlockDriverState *bs);
int (*bdrv_create)(const char *filename, int64_t total_sectors,
const char *backing_file, int flags);
- void (*bdrv_flush)(BlockDriverState *bs);
+ int (*bdrv_flush)(BlockDriverState *bs);
int (*bdrv_is_allocated)(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *pnum);
int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
@@ -49,6 +49,8 @@ struct BlockDriver {
int64_t sector_num, const uint8_t *buf, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque);
void (*bdrv_aio_cancel)(BlockDriverAIOCB *acb);
+ BlockDriverAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque);
int aiocb_size;
const char *protocol_name;
diff --git a/tools/ioemu/hw/ide.c b/tools/ioemu/hw/ide.c
index 7cb0f66797..cfe3e8a5b2 100644
--- a/tools/ioemu/hw/ide.c
+++ b/tools/ioemu/hw/ide.c
@@ -751,6 +751,7 @@ static inline void ide_abort_command(IDEState *s)
static inline void ide_set_irq(IDEState *s)
{
BMDMAState *bm = s->bmdma;
+ if (!s->bs) return; /* yikes */
if (!(s->cmd & IDE_CMD_DISABLE_IRQ)) {
if (bm) {
bm->status |= BM_STATUS_INT;
@@ -916,6 +917,8 @@ static void ide_read_dma_cb(void *opaque, int ret)
int n;
int64_t sector_num;
+ if (!s->bs) return; /* yikes */
+
n = s->io_buffer_size >> 9;
sector_num = ide_get_sector(s);
if (n > 0) {
@@ -1024,6 +1027,8 @@ static void ide_write_dma_cb(void *opaque, int ret)
int n;
int64_t sector_num;
+ if (!s->bs) return; /* yikes */
+
n = s->io_buffer_size >> 9;
sector_num = ide_get_sector(s);
if (n > 0) {
@@ -1072,6 +1077,39 @@ static void ide_sector_write_dma(IDEState *s)
ide_dma_start(s, ide_write_dma_cb);
}
+static void ide_device_utterly_broken(IDEState *s) {
+ s->status |= BUSY_STAT;
+ s->bs = NULL;
+ /* This prevents all future commands from working. All of the
+ * asynchronous callbacks (and ide_set_irq, as a safety measure)
+ * check to see whether this has happened and bail if so.
+ */
+}
+
+static void ide_flush_cb(void *opaque, int ret)
+{
+ IDEState *s = opaque;
+
+ if (!s->bs) return; /* yikes */
+
+ if (ret) {
+ /* We are completely doomed. The IDE spec does not permit us
+ * to return an error from a flush except via a protocol which
+ * requires us to say where the error is and which
+ * contemplates the guest repeating the flush attempt to
+ * attempt flush the remaining data. We can't support that
+ * because f(data)sync (which is what the block drivers use
+ * eventually) doesn't report the necessary information or
+ * give us the necessary control. So we make the disk vanish.
+ */
+ ide_device_utterly_broken(s);
+ return;
+ }
+ else
+ s->status = READY_STAT;
+ ide_set_irq(s);
+}
+
static void ide_atapi_cmd_ok(IDEState *s)
{
s->error = 0;
@@ -1298,6 +1336,8 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret)
IDEState *s = bm->ide_if;
int data_offset, n;
+ if (!s->bs) return; /* yikes */
+
if (ret < 0) {
ide_atapi_io_error(s, ret);
goto eot;
@@ -1703,6 +1743,8 @@ static void cdrom_change_cb(void *opaque)
IDEState *s = opaque;
int64_t nb_sectors;
+ if (!s->bs) return; /* yikes */
+
/* XXX: send interrupt too */
bdrv_get_geometry(s->bs, &nb_sectors);
s->nb_sectors = nb_sectors;
@@ -1744,6 +1786,7 @@ static void ide_ioport_write(void *opaque, uint32_t addr, uint32_t val)
IDEState *s;
int unit, n;
int lba48 = 0;
+ int ret;
#ifdef DEBUG_IDE
printf("IDE: write addr=0x%x val=0x%02x\n", addr, val);
@@ -1806,8 +1849,8 @@ static void ide_ioport_write(void *opaque, uint32_t addr, uint32_t val)
printf("ide: CMD=%02x\n", val);
#endif
s = ide_if->cur_drive;
- /* ignore commands to non existant slave */
- if (s != ide_if && !s->bs)
+ /* ignore commands to non existant device */
+ if (!s->bs)
break;
switch(val) {
@@ -1976,10 +2019,8 @@ static void ide_ioport_write(void *opaque, uint32_t addr, uint32_t val)
break;
case WIN_FLUSH_CACHE:
case WIN_FLUSH_CACHE_EXT:
- if (s->bs)
- bdrv_flush(s->bs);
- s->status = READY_STAT;
- ide_set_irq(s);
+ s->status = BUSY_STAT;
+ bdrv_aio_flush(s->bs, ide_flush_cb, s);
break;
case WIN_IDLEIMMEDIATE:
case WIN_STANDBY:
@@ -2723,6 +2764,7 @@ static void pci_ide_save(QEMUFile* f, void *opaque)
if (s->identify_set) {
qemu_put_buffer(f, (const uint8_t *)s->identify_data, 512);
}
+ qemu_put_8s(f, &s->write_cache);
qemu_put_8s(f, &s->feature);
qemu_put_8s(f, &s->error);
qemu_put_be32s(f, &s->nsector);
@@ -2749,7 +2791,7 @@ static int pci_ide_load(QEMUFile* f, void *opaque, int version_id)
PCIIDEState *d = opaque;
int ret, i;
- if (version_id != 1)
+ if (version_id != 1 && version_id != 2)
return -EINVAL;
ret = pci_device_load(&d->dev, f);
if (ret < 0)
@@ -2780,6 +2822,8 @@ static int pci_ide_load(QEMUFile* f, void *opaque, int version_id)
if (s->identify_set) {
qemu_get_buffer(f, (uint8_t *)s->identify_data, 512);
}
+ if (version_id >= 2)
+ qemu_get_8s(f, &s->write_cache);
qemu_get_8s(f, &s->feature);
qemu_get_8s(f, &s->error);
qemu_get_be32s(f, &s->nsector);
@@ -2854,7 +2898,7 @@ void pci_piix_ide_init(PCIBus *bus, BlockDriverState **hd_table, int devfn)
buffered_pio_init();
- register_savevm("ide", 0, 1, pci_ide_save, pci_ide_load, d);
+ register_savevm("ide", 0, 2, pci_ide_save, pci_ide_load, d);
}
/* hd_table must contain 4 block drivers */
@@ -2895,7 +2939,7 @@ void pci_piix3_ide_init(PCIBus *bus, BlockDriverState **hd_table, int devfn)
buffered_pio_init();
- register_savevm("ide", 0, 1, pci_ide_save, pci_ide_load, d);
+ register_savevm("ide", 0, 2, pci_ide_save, pci_ide_load, d);
}
/***********************************************************/
diff --git a/tools/ioemu/hw/ne2000.c b/tools/ioemu/hw/ne2000.c
index 89126a875f..977d202921 100644
--- a/tools/ioemu/hw/ne2000.c
+++ b/tools/ioemu/hw/ne2000.c
@@ -207,7 +207,7 @@ static int ne2000_buffer_full(NE2000State *s)
index = s->curpag << 8;
boundary = s->boundary << 8;
- if (index <= boundary)
+ if (index < boundary)
avail = boundary - index;
else
avail = (s->stop - s->start) - (index - boundary);
diff --git a/tools/ioemu/hw/scsi-disk.c b/tools/ioemu/hw/scsi-disk.c
index 9de1fe83a5..2ad204ba26 100644
--- a/tools/ioemu/hw/scsi-disk.c
+++ b/tools/ioemu/hw/scsi-disk.c
@@ -291,6 +291,7 @@ int32_t scsi_send_command(SCSIDevice *s, uint32_t tag, uint8_t *buf, int lun)
uint8_t command;
uint8_t *outbuf;
SCSIRequest *r;
+ int ret;
command = buf[0];
r = scsi_find_request(s, tag);
@@ -496,7 +497,12 @@ int32_t scsi_send_command(SCSIDevice *s, uint32_t tag, uint8_t *buf, int lun)
break;
case 0x35:
DPRINTF("Syncronise cache (sector %d, count %d)\n", lba, len);
- bdrv_flush(s->bdrv);
+ ret = bdrv_flush(s->bdrv);
+ if (ret) {
+ DPRINTF("IO error on bdrv_flush\n");
+ scsi_command_complete(r, SENSE_HARDWARE_ERROR);
+ return 0;
+ }
break;
case 0x43:
{
diff --git a/tools/ioemu/hw/vga.c b/tools/ioemu/hw/vga.c
index 02f898fee7..9aba2222f8 100644
--- a/tools/ioemu/hw/vga.c
+++ b/tools/ioemu/hw/vga.c
@@ -1486,7 +1486,7 @@ void check_sse2(void)
static void vga_draw_graphic(VGAState *s, int full_update)
{
int y1, y, update, linesize, y_start, double_scan, mask, depth;
- int width, height, shift_control, line_offset, bwidth, changed_flag;
+ int width, height, shift_control, line_offset, bwidth, ds_depth;
ram_addr_t page0, page1;
int disp_width, multi_scan, multi_run;
uint8_t *d;
@@ -1499,13 +1499,13 @@ static void vga_draw_graphic(VGAState *s, int full_update)
s->get_resolution(s, &width, &height);
disp_width = width;
- changed_flag = 0;
+ ds_depth = s->ds->depth;
depth = s->get_bpp(s);
if (s->ds->dpy_colourdepth != NULL &&
- (s->ds->depth != depth || !s->ds->shared_buf)) {
+ (ds_depth != depth || !s->ds->shared_buf))
s->ds->dpy_colourdepth(s->ds, depth);
- changed_flag = 1;
- }
+ if (ds_depth != s->ds->depth) full_update = 1;
+
s->rgb_to_pixel =
rgb_to_pixel_dup_table[get_depth_index(s->ds)];
@@ -1569,17 +1569,18 @@ static void vga_draw_graphic(VGAState *s, int full_update)
}
vga_draw_line = vga_draw_line_table[v * NB_DEPTHS + get_depth_index(s->ds)];
- if (disp_width != s->last_width ||
+ if (s->line_offset != s->last_line_offset ||
+ disp_width != s->last_width ||
height != s->last_height) {
dpy_resize(s->ds, disp_width, height, s->line_offset);
s->last_scr_width = disp_width;
s->last_scr_height = height;
s->last_width = disp_width;
s->last_height = height;
+ s->last_line_offset = s->line_offset;
full_update = 1;
- changed_flag = 1;
}
- if (s->ds->shared_buf && (changed_flag || s->ds->data != s->vram_ptr + (s->start_addr * 4)))
+ if (s->ds->shared_buf && (full_update || s->ds->data != s->vram_ptr + (s->start_addr * 4)))
s->ds->dpy_setdata(s->ds, s->vram_ptr + (s->start_addr * 4));
if (!s->ds->shared_buf && s->cursor_invalidate)
s->cursor_invalidate(s);
@@ -2072,6 +2073,7 @@ void vga_common_init(VGAState *s, DisplayState *ds, uint8_t *vga_ram_base,
s->vram_offset = vga_ram_offset;
s->vram_size = vga_ram_size;
s->ds = ds;
+ ds->palette = s->last_palette;
s->get_bpp = vga_get_bpp;
s->get_offsets = vga_get_offsets;
s->get_resolution = vga_get_resolution;
diff --git a/tools/ioemu/hw/vga_int.h b/tools/ioemu/hw/vga_int.h
index 3dd9330573..a8dad45af8 100644
--- a/tools/ioemu/hw/vga_int.h
+++ b/tools/ioemu/hw/vga_int.h
@@ -129,6 +129,7 @@
uint32_t line_compare; \
uint32_t start_addr; \
uint32_t plane_updated; \
+ uint32_t last_line_offset; \
uint8_t last_cw, last_ch; \
uint32_t last_width, last_height; /* in chars or pixels */ \
uint32_t last_scr_width, last_scr_height; /* in pixels */ \
diff --git a/tools/ioemu/hw/xenfb.c b/tools/ioemu/hw/xenfb.c
index affbcaaa0d..b699552361 100644
--- a/tools/ioemu/hw/xenfb.c
+++ b/tools/ioemu/hw/xenfb.c
@@ -56,6 +56,7 @@ struct xenfb {
int depth; /* colour depth of guest framebuffer */
int width; /* pixel width of guest framebuffer */
int height; /* pixel height of guest framebuffer */
+ int offset; /* offset of the framebuffer */
int abs_pointer_wanted; /* Whether guest supports absolute pointer */
int button_state; /* Last seen pointer button state */
char protocol[64]; /* frontend protocol */
@@ -516,6 +517,18 @@ static void xenfb_on_fb_event(struct xenfb *xenfb)
}
xenfb_guest_copy(xenfb, x, y, w, h);
break;
+ case XENFB_TYPE_RESIZE:
+ xenfb->width = event->resize.width;
+ xenfb->height = event->resize.height;
+ xenfb->depth = event->resize.depth;
+ xenfb->row_stride = event->resize.stride;
+ xenfb->offset = event->resize.offset;
+ dpy_colourdepth(xenfb->ds, xenfb->depth);
+ dpy_resize(xenfb->ds, xenfb->width, xenfb->height, xenfb->row_stride);
+ if (xenfb->ds->shared_buf)
+ dpy_setdata(xenfb->ds, xenfb->pixels + xenfb->offset);
+ xenfb_invalidate(xenfb);
+ break;
}
}
xen_mb(); /* ensure we're done with ring contents */
@@ -680,6 +693,7 @@ static void xenfb_dispatch_store(void *opaque)
static int xenfb_read_frontend_fb_config(struct xenfb *xenfb) {
struct xenfb_page *fb_page;
int val;
+ int videoram;
if (xenfb_xs_scanf1(xenfb->xsh, xenfb->fb.otherend, "feature-update",
"%d", &val) < 0)
@@ -702,11 +716,31 @@ static int xenfb_read_frontend_fb_config(struct xenfb *xenfb) {
/* TODO check for consistency with the above */
xenfb->fb_len = fb_page->mem_length;
xenfb->row_stride = fb_page->line_length;
+
+ /* Protect against hostile frontend, limit fb_len to max allowed */
+ if (xenfb_xs_scanf1(xenfb->xsh, xenfb->fb.nodename, "videoram", "%d",
+ &videoram) < 0)
+ videoram = 0;
+ videoram = videoram * 1024 * 1024;
+ if (videoram && xenfb->fb_len > videoram) {
+ fprintf(stderr, "Framebuffer requested length of %zd exceeded allowed %d\n",
+ xenfb->fb_len, videoram);
+ xenfb->fb_len = videoram;
+ if (xenfb->row_stride * xenfb->height > xenfb->fb_len)
+ xenfb->height = xenfb->fb_len / xenfb->row_stride;
+ }
fprintf(stderr, "Framebuffer depth %d width %d height %d line %d\n",
fb_page->depth, fb_page->width, fb_page->height, fb_page->line_length);
if (xenfb_map_fb(xenfb, xenfb->fb.otherend_id) < 0)
return -1;
+ /* Indicate we have the frame buffer resize feature */
+ xenfb_xs_printf(xenfb->xsh, xenfb->fb.nodename, "feature-resize", "1");
+
+ /* Tell kbd pointer the screen geometry */
+ xenfb_xs_printf(xenfb->xsh, xenfb->kbd.nodename, "width", "%d", xenfb->width);
+ xenfb_xs_printf(xenfb->xsh, xenfb->kbd.nodename, "height", "%d", xenfb->height);
+
if (xenfb_switch_state(&xenfb->fb, XenbusStateConnected))
return -1;
if (xenfb_switch_state(&xenfb->kbd, XenbusStateConnected))
@@ -1074,6 +1108,7 @@ static void xenfb_mouse_event(void *opaque,
#define BLT(SRC_T,DST_T,RSB,GSB,BSB,RDB,GDB,BDB) \
for (line = y ; line < (y+h) ; line++) { \
SRC_T *src = (SRC_T *)(xenfb->pixels \
+ + xenfb->offset \
+ (line * xenfb->row_stride) \
+ (x * xenfb->depth / 8)); \
DST_T *dst = (DST_T *)(xenfb->ds->data \
@@ -1116,7 +1151,7 @@ static void xenfb_guest_copy(struct xenfb *xenfb, int x, int y, int w, int h)
if (xenfb->depth == xenfb->ds->depth) { /* Perfect match can use fast path */
for (line = y ; line < (y+h) ; line++) {
memcpy(xenfb->ds->data + (line * xenfb->ds->linesize) + (x * xenfb->ds->depth / 8),
- xenfb->pixels + (line * xenfb->row_stride) + (x * xenfb->depth / 8),
+ xenfb->pixels + xenfb->offset + (line * xenfb->row_stride) + (x * xenfb->depth / 8),
w * xenfb->depth / 8);
}
} else { /* Mismatch requires slow pixel munging */
diff --git a/tools/ioemu/sdl.c b/tools/ioemu/sdl.c
index aacc54f041..536e9f480f 100644
--- a/tools/ioemu/sdl.c
+++ b/tools/ioemu/sdl.c
@@ -85,19 +85,33 @@ static void opengl_setdata(DisplayState *ds, void *pixels)
glPixelStorei(GL_UNPACK_LSB_FIRST, 1);
switch (ds->depth) {
case 8:
- tex_format = GL_RGB;
- tex_type = GL_UNSIGNED_BYTE_3_3_2;
- glPixelStorei (GL_UNPACK_ALIGNMENT, 1);
+ if (ds->palette == NULL) {
+ tex_format = GL_RGB;
+ tex_type = GL_UNSIGNED_BYTE_3_3_2;
+ } else {
+ int i;
+ GLushort paletter[256], paletteg[256], paletteb[256];
+ for (i = 0; i < 256; i++) {
+ uint8_t rgb = ds->palette[i] >> 16;
+ paletter[i] = ((rgb & 0xe0) >> 5) * 65535 / 7;
+ paletteg[i] = ((rgb & 0x1c) >> 2) * 65535 / 7;
+ paletteb[i] = (rgb & 0x3) * 65535 / 3;
+ }
+ glPixelMapusv(GL_PIXEL_MAP_I_TO_R, 256, paletter);
+ glPixelMapusv(GL_PIXEL_MAP_I_TO_G, 256, paletteg);
+ glPixelMapusv(GL_PIXEL_MAP_I_TO_B, 256, paletteb);
+
+ tex_format = GL_COLOR_INDEX;
+ tex_type = GL_UNSIGNED_BYTE;
+ }
break;
case 16:
tex_format = GL_RGB;
tex_type = GL_UNSIGNED_SHORT_5_6_5;
- glPixelStorei (GL_UNPACK_ALIGNMENT, 2);
break;
case 24:
tex_format = GL_BGR;
tex_type = GL_UNSIGNED_BYTE;
- glPixelStorei (GL_UNPACK_ALIGNMENT, 1);
break;
case 32:
if (!ds->bgr) {
@@ -107,7 +121,6 @@ static void opengl_setdata(DisplayState *ds, void *pixels)
tex_format = GL_RGBA;
tex_type = GL_UNSIGNED_BYTE;
}
- glPixelStorei (GL_UNPACK_ALIGNMENT, 4);
break;
}
glPixelStorei(GL_UNPACK_ROW_LENGTH, (ds->linesize * 8) / ds->depth);
@@ -184,6 +197,17 @@ static void sdl_setdata(DisplayState *ds, void *pixels)
return;
}
shared = SDL_CreateRGBSurfaceFrom(pixels, width, height, ds->depth, ds->linesize, rmask , gmask, bmask, amask);
+ if (ds->depth == 8 && ds->palette != NULL) {
+ SDL_Color palette[256];
+ int i;
+ for (i = 0; i < 256; i++) {
+ uint8_t rgb = ds->palette[i] >> 16;
+ palette[i].r = ((rgb & 0xe0) >> 5) * 255 / 7;
+ palette[i].g = ((rgb & 0x1c) >> 2) * 255 / 7;
+ palette[i].b = (rgb & 0x3) * 255 / 3;
+ }
+ SDL_SetColors(shared, palette, 0, 256);
+ }
ds->data = pixels;
}
@@ -210,21 +234,32 @@ static void sdl_resize(DisplayState *ds, int w, int h, int linesize)
again:
screen = SDL_SetVideoMode(w, h, 0, flags);
-#ifndef CONFIG_OPENGL
+
if (!screen) {
fprintf(stderr, "Could not open SDL display: %s\n", SDL_GetError());
+ if (opengl_enabled) {
+ /* Fallback to SDL */
+ opengl_enabled = 0;
+ ds->dpy_update = sdl_update;
+ ds->dpy_setdata = sdl_setdata;
+ sdl_resize(ds, w, h, linesize);
+ return;
+ }
exit(1);
}
- if (!screen->pixels && (flags & SDL_HWSURFACE) && (flags & SDL_FULLSCREEN)) {
- flags &= ~SDL_HWSURFACE;
- goto again;
- }
- if (!screen->pixels) {
- fprintf(stderr, "Could not open SDL display: %s\n", SDL_GetError());
- exit(1);
+ if (!opengl_enabled) {
+ if (!screen->pixels && (flags & SDL_HWSURFACE) && (flags & SDL_FULLSCREEN)) {
+ flags &= ~SDL_HWSURFACE;
+ goto again;
+ }
+
+ if (!screen->pixels) {
+ fprintf(stderr, "Could not open SDL display: %s\n", SDL_GetError());
+ exit(1);
+ }
}
-#endif
+
ds->width = w;
ds->height = h;
if (!ds->shared_buf) {
@@ -262,7 +297,10 @@ static void sdl_resize(DisplayState *ds, int w, int h, int linesize)
static void sdl_colourdepth(DisplayState *ds, int depth)
{
- if (!depth || !ds->depth) return;
+ if (!depth || !ds->depth) {
+ ds->shared_buf = 0;
+ return;
+ }
ds->shared_buf = 1;
ds->depth = depth;
ds->linesize = width * depth / 8;
diff --git a/tools/ioemu/vl.h b/tools/ioemu/vl.h
index 0a90fcc29e..f3ad84bbd1 100644
--- a/tools/ioemu/vl.h
+++ b/tools/ioemu/vl.h
@@ -653,6 +653,8 @@ BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque);
void bdrv_aio_cancel(BlockDriverAIOCB *acb);
+BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque);
void qemu_aio_init(void);
void qemu_aio_poll(void);
@@ -662,7 +664,7 @@ void qemu_aio_wait(void);
void qemu_aio_wait_end(void);
/* Ensure contents are flushed to disk. */
-void bdrv_flush(BlockDriverState *bs);
+int bdrv_flush(BlockDriverState *bs);
#define BDRV_TYPE_HD 0
#define BDRV_TYPE_CDROM 1
@@ -935,6 +937,7 @@ struct DisplayState {
int width;
int height;
void *opaque;
+ uint32_t *palette;
uint64_t gui_timer_interval;
int switchbpp;
diff --git a/tools/ioemu/vnc.c b/tools/ioemu/vnc.c
index 6cfba55be9..db6c643f06 100644
--- a/tools/ioemu/vnc.c
+++ b/tools/ioemu/vnc.c
@@ -1640,6 +1640,7 @@ static void vnc_dpy_colourdepth(DisplayState *ds, int depth)
if (ds->depth == 32) return;
depth = 32;
break;
+ case 8:
case 0:
ds->shared_buf = 0;
return;
diff --git a/tools/libfsimage/Rules.mk b/tools/libfsimage/Rules.mk
index 0cc3d2199f..afc08bdaab 100644
--- a/tools/libfsimage/Rules.mk
+++ b/tools/libfsimage/Rules.mk
@@ -11,6 +11,7 @@ FSDIR-$(CONFIG_Linux) = $(LIBDIR)/fs/$(FS)
FSDIR-$(CONFIG_SunOS)-x86_64 = $(PREFIX)/lib/fs/$(FS)/64
FSDIR-$(CONFIG_SunOS)-x86_32 = $(PREFIX)/lib/fs/$(FS)/
FSDIR-$(CONFIG_SunOS) = $(FSDIR-$(CONFIG_SunOS)-$(XEN_TARGET_ARCH))
+FSDIR-$(CONFIG_NetBSD) = $(LIBDIR)/fs/$(FS)
FSDIR = $(FSDIR-y)
FSLIB = fsimage.so
diff --git a/tools/libfsimage/check-libext2fs b/tools/libfsimage/check-libext2fs
index e6a8d186fc..a86180629a 100755
--- a/tools/libfsimage/check-libext2fs
+++ b/tools/libfsimage/check-libext2fs
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/sh
cat >ext2-test.c <<EOF
#include <ext2fs/ext2fs.h>
@@ -9,7 +9,7 @@ int main()
}
EOF
-${CC:-gcc} -o ext2-test ext2-test.c -lext2fs >/dev/null 2>&1
+${CC-gcc} -o ext2-test ext2-test.c -lext2fs >/dev/null 2>&1
if [ $? = 0 ]; then
echo ext2fs-lib
else
diff --git a/tools/libfsimage/common/fsimage_grub.c b/tools/libfsimage/common/fsimage_grub.c
index 5edb3ba05b..9ea2e35ac6 100644
--- a/tools/libfsimage/common/fsimage_grub.c
+++ b/tools/libfsimage/common/fsimage_grub.c
@@ -204,19 +204,47 @@ int
fsig_devread(fsi_file_t *ffi, unsigned int sector, unsigned int offset,
unsigned int bufsize, char *buf)
{
- uint64_t off = ffi->ff_fsi->f_off + ((uint64_t)sector * 512) + offset;
- ssize_t bytes_read = 0;
+ off_t off;
+ ssize_t ret;
+ int n, r;
+ char tmp[SECTOR_SIZE];
+
+ off = ffi->ff_fsi->f_off + ((off_t)sector * SECTOR_SIZE) + offset;
+
+ /*
+ * Make reads from a raw disk sector-aligned. This is a requirement
+ * for NetBSD. Split the read up into to three parts to meet this
+ * requirement.
+ */
+
+ n = (off & (SECTOR_SIZE - 1));
+ if (n > 0) {
+ r = SECTOR_SIZE - n;
+ if (r > bufsize)
+ r = bufsize;
+ ret = pread(ffi->ff_fsi->f_fd, tmp, SECTOR_SIZE, off - n);
+ if (ret < n + r)
+ return (0);
+ memcpy(buf, tmp + n, r);
+ buf += r;
+ bufsize -= r;
+ off += r;
+ }
- while (bufsize) {
- ssize_t ret = pread(ffi->ff_fsi->f_fd, buf + bytes_read,
- bufsize, (off_t)off);
- if (ret == -1)
+ n = (bufsize & ~(SECTOR_SIZE - 1));
+ if (n > 0) {
+ ret = pread(ffi->ff_fsi->f_fd, buf, n, off);
+ if (ret < n)
return (0);
- if (ret == 0)
+ buf += n;
+ bufsize -= n;
+ off += n;
+ }
+ if (bufsize > 0) {
+ ret = pread(ffi->ff_fsi->f_fd, tmp, SECTOR_SIZE, off);
+ if (ret < bufsize)
return (0);
-
- bytes_read += ret;
- bufsize -= ret;
+ memcpy(buf, tmp, bufsize);
}
return (1);
diff --git a/tools/libfsimage/common/fsimage_grub.h b/tools/libfsimage/common/fsimage_grub.h
index 800a918655..bca481bc28 100644
--- a/tools/libfsimage/common/fsimage_grub.h
+++ b/tools/libfsimage/common/fsimage_grub.h
@@ -44,7 +44,7 @@ typedef struct fsig_plugin_ops {
} fsig_plugin_ops_t;
#define STAGE1_5
-#define FSYS_BUFLEN 0x8000
+#define FSYS_BUFLEN 0x40000
#define SECTOR_BITS 9
#define SECTOR_SIZE 0x200
diff --git a/tools/libfsimage/common/fsimage_plugin.c b/tools/libfsimage/common/fsimage_plugin.c
index d62435c8dd..5ee9d746b2 100644
--- a/tools/libfsimage/common/fsimage_plugin.c
+++ b/tools/libfsimage/common/fsimage_plugin.c
@@ -131,7 +131,10 @@ static int load_plugins(void)
int err;
int ret = -1;
-#ifdef __sun__
+#if defined(FSIMAGE_FSDIR)
+ if (fsdir == NULL)
+ fsdir = FSIMAGE_FSDIR;
+#elif defined(__sun__)
if (fsdir == NULL)
fsdir = "/usr/lib/fs";
diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile
index b73bbfe55b..82c58f9ebf 100644
--- a/tools/libxc/Makefile
+++ b/tools/libxc/Makefile
@@ -46,10 +46,11 @@ GUEST_SRCS-y += libelf-tools.c libelf-loader.c
GUEST_SRCS-y += libelf-dominfo.c libelf-relocate.c
# new domain builder
-GUEST_SRCS-y += xc_dom_core.c xc_dom_boot.c
-GUEST_SRCS-y += xc_dom_elfloader.c
-GUEST_SRCS-y += xc_dom_binloader.c
-GUEST_SRCS-y += xc_dom_compat_linux.c
+GUEST_SRCS-y += xc_dom_core.c xc_dom_boot.c
+GUEST_SRCS-y += xc_dom_elfloader.c
+GUEST_SRCS-$(CONFIG_X86) += xc_dom_bzimageloader.c
+GUEST_SRCS-y += xc_dom_binloader.c
+GUEST_SRCS-y += xc_dom_compat_linux.c
GUEST_SRCS-$(CONFIG_X86) += xc_dom_x86.c
GUEST_SRCS-$(CONFIG_IA64) += xc_dom_ia64.c
diff --git a/tools/libxc/xc_dom_bzimageloader.c b/tools/libxc/xc_dom_bzimageloader.c
new file mode 100644
index 0000000000..628cd7ddb4
--- /dev/null
+++ b/tools/libxc/xc_dom_bzimageloader.c
@@ -0,0 +1,159 @@
+/*
+ * Xen domain builder -- bzImage bits
+ *
+ * Parse and load bzImage kernel images.
+ *
+ * This relies on version 2.08 of the boot protocol, which contains an
+ * ELF file embedded in the bzImage. The loader extracts this ELF
+ * image and passes it off to the standard ELF loader.
+ *
+ * This code is licenced under the GPL.
+ * written 2006 by Gerd Hoffmann <kraxel@suse.de>.
+ * written 2007 by Jeremy Fitzhardinge <jeremy@xensource.com>
+ * written 2008 by Ian Campbell <ijc@hellion.org.uk>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+
+struct setup_header {
+ uint8_t _pad0[0x1f1]; /* skip uninteresting stuff */
+ uint8_t setup_sects;
+ uint16_t root_flags;
+ uint32_t syssize;
+ uint16_t ram_size;
+ uint16_t vid_mode;
+ uint16_t root_dev;
+ uint16_t boot_flag;
+ uint16_t jump;
+ uint32_t header;
+#define HDR_MAGIC "HdrS"
+#define HDR_MAGIC_SZ 4
+ uint16_t version;
+#define VERSION(h,l) (((h)<<8) | (l))
+ uint32_t realmode_swtch;
+ uint16_t start_sys;
+ uint16_t kernel_version;
+ uint8_t type_of_loader;
+ uint8_t loadflags;
+ uint16_t setup_move_size;
+ uint32_t code32_start;
+ uint32_t ramdisk_image;
+ uint32_t ramdisk_size;
+ uint32_t bootsect_kludge;
+ uint16_t heap_end_ptr;
+ uint16_t _pad1;
+ uint32_t cmd_line_ptr;
+ uint32_t initrd_addr_max;
+ uint32_t kernel_alignment;
+ uint8_t relocatable_kernel;
+ uint8_t _pad2[3];
+ uint32_t cmdline_size;
+ uint32_t hardware_subarch;
+ uint64_t hardware_subarch_data;
+ uint32_t payload_offset;
+ uint32_t payload_length;
+} __attribute__((packed));
+
+extern struct xc_dom_loader elf_loader;
+
+static unsigned int payload_offset(struct setup_header *hdr)
+{
+ unsigned int off;
+
+ off = (hdr->setup_sects + 1) * 512;
+ off += hdr->payload_offset;
+ return off;
+}
+
+static int check_bzimage_kernel(struct xc_dom_image *dom, int verbose)
+{
+ struct setup_header *hdr;
+
+ if ( dom->kernel_blob == NULL )
+ {
+ if ( verbose )
+ xc_dom_panic(XC_INTERNAL_ERROR, "%s: no kernel image loaded\n",
+ __FUNCTION__);
+ return -EINVAL;
+ }
+ if ( dom->kernel_size < sizeof(struct setup_header) )
+ {
+ if ( verbose )
+ xc_dom_panic(XC_INTERNAL_ERROR, "%s: kernel image too small\n",
+ __FUNCTION__);
+ return -EINVAL;
+ }
+
+ hdr = dom->kernel_blob;
+
+ if ( memcmp(&hdr->header, HDR_MAGIC, HDR_MAGIC_SZ) != 0 )
+ {
+ if ( verbose )
+ xc_dom_panic(XC_INVALID_KERNEL, "%s: kernel is not a bzImage\n",
+ __FUNCTION__);
+ return -EINVAL;
+ }
+
+ if ( hdr->version < VERSION(2,8) )
+ {
+ if ( verbose )
+ xc_dom_panic(XC_INVALID_KERNEL, "%s: boot protocol too old (%04x)\n",
+ __FUNCTION__, hdr->version);
+ return -EINVAL;
+ }
+
+ dom->kernel_blob = dom->kernel_blob + payload_offset(hdr);
+ dom->kernel_size = hdr->payload_length;
+
+ if ( xc_dom_try_gunzip(dom, &dom->kernel_blob, &dom->kernel_size) == -1 )
+ {
+ if ( verbose )
+ xc_dom_panic(XC_INVALID_KERNEL, "%s: unable to decompress kernel\n",
+ __FUNCTION__);
+ return -EINVAL;
+ }
+
+ return elf_loader.probe(dom);
+}
+
+static int xc_dom_probe_bzimage_kernel(struct xc_dom_image *dom)
+{
+ return check_bzimage_kernel(dom, 0);
+}
+
+static int xc_dom_parse_bzimage_kernel(struct xc_dom_image *dom)
+{
+ return elf_loader.parser(dom);
+}
+
+static int xc_dom_load_bzimage_kernel(struct xc_dom_image *dom)
+{
+ return elf_loader.loader(dom);
+}
+
+static struct xc_dom_loader bzimage_loader = {
+ .name = "Linux bzImage",
+ .probe = xc_dom_probe_bzimage_kernel,
+ .parser = xc_dom_parse_bzimage_kernel,
+ .loader = xc_dom_load_bzimage_kernel,
+};
+
+static void __init register_loader(void)
+{
+ xc_dom_register_loader(&bzimage_loader);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libxc/xc_dom_elfloader.c b/tools/libxc/xc_dom_elfloader.c
index c34c24d08a..ec9af72548 100644
--- a/tools/libxc/xc_dom_elfloader.c
+++ b/tools/libxc/xc_dom_elfloader.c
@@ -281,7 +281,7 @@ static int xc_dom_load_elf_kernel(struct xc_dom_image *dom)
/* ------------------------------------------------------------------------ */
-static struct xc_dom_loader elf_loader = {
+struct xc_dom_loader elf_loader = {
.name = "ELF-generic",
.probe = xc_dom_probe_elf_kernel,
.parser = xc_dom_parse_elf_kernel,
diff --git a/tools/libxen/include/xen/api/xen_acmpolicy.h b/tools/libxen/include/xen/api/xen_acmpolicy.h
index d0b4a500ca..43aac5810d 100644
--- a/tools/libxen/include/xen/api/xen_acmpolicy.h
+++ b/tools/libxen/include/xen/api/xen_acmpolicy.h
@@ -74,7 +74,7 @@ xen_acm_header_free(xen_acm_header *hdr);
/**
* Get the referenced policy's record.
*/
-bool
+extern bool
xen_acmpolicy_get_record(xen_session *session, xen_acmpolicy_record **result,
xen_xspolicy xspolicy);
@@ -118,14 +118,14 @@ xen_acmpolicy_get_enforced_binary(xen_session *session, char **binary,
/**
* Get the ACM ssidref of the given VM.
*/
-bool
+extern bool
xen_acmpolicy_get_VM_ssidref(xen_session *session, int64_t *result,
xen_vm vm);
/**
* Get the UUID field of the given policy.
*/
-bool
+extern bool
xen_acmpolicy_get_uuid(xen_session *session, char **result,
xen_xspolicy xspolicy);
diff --git a/tools/libxen/include/xen/api/xen_xspolicy.h b/tools/libxen/include/xen/api/xen_xspolicy.h
index b0808f4d57..f685f150a8 100644
--- a/tools/libxen/include/xen/api/xen_xspolicy.h
+++ b/tools/libxen/include/xen/api/xen_xspolicy.h
@@ -68,6 +68,8 @@ enum xs_instantiationflags {
#define XSERR_RESOURCE_ACCESS 23 + XSERR_BASE
#define XSERR_HV_OP_FAILED 24 + XSERR_BASE
#define XSERR_BOOTPOLICY_INSTALL_ERROR 25 + XSERR_BASE
+#define XSERR_VM_NOT_AUTHORIZED 26 + XSERR_BASE
+#define XSERR_VM_IN_CONFLICT 27 + XSERR_BASE
/**
@@ -179,28 +181,28 @@ typedef struct xen_xs_policystate
char *errors;
} xen_xs_policystate;
-void
+extern void
xen_xs_policystate_free(xen_xs_policystate *state);
/**
* Get the referenced policy's record.
*/
-bool
+extern bool
xen_xspolicy_get_record(xen_session *session, xen_xspolicy_record **result,
xen_xspolicy xspolicy);
/**
* Get the UUID field of the given policy.
*/
-bool
+extern bool
xen_xspolicy_get_uuid(xen_session *session, char **result,
xen_xspolicy xspolicy);
/**
* Get a policy given it's UUID
*/
-bool
+extern bool
xen_xspolicy_get_by_uuid(xen_session *session, xen_xspolicy *result,
char *uuid);
@@ -208,7 +210,7 @@ xen_xspolicy_get_by_uuid(xen_session *session, xen_xspolicy *result,
/**
* Get the types of policies supported by the system.
*/
-bool
+extern bool
xen_xspolicy_get_xstype(xen_session *session, xs_type *result);
@@ -216,13 +218,13 @@ xen_xspolicy_get_xstype(xen_session *session, xs_type *result);
* Get information about the currently managed policy.
* (The API allows only one policy to be on the system.)
*/
-bool
+extern bool
xen_xspolicy_get_xspolicy(xen_session *session, xen_xs_policystate **result);
/**
* Activate the referenced policy by loading it into the hypervisor.
*/
-bool
+extern bool
xen_xspolicy_activate_xspolicy(xen_session *session, int64_t *result,
xen_xspolicy xspolicy,
xs_instantiationflags flags);
@@ -234,7 +236,7 @@ xen_xspolicy_activate_xspolicy(xen_session *session, int64_t *result,
* on whether to load the policy immediately and whether to overwrite
* an existing policy on the system.
*/
-bool
+extern bool
xen_xspolicy_set_xspolicy(xen_session *session, xen_xs_policystate **result,
xs_type type, char *repr, int64_t flags,
bool overwrite);
@@ -248,7 +250,7 @@ xen_xspolicy_set_xspolicy(xen_session *session, xen_xs_policystate **result,
* for example fail if other domains than Domain-0 are running and have
* different labels than Domain-0.
*/
-bool
+extern bool
xen_xspolicy_reset_xspolicy(xen_session *session, xen_xs_policystate **result,
xs_type type);
@@ -281,4 +283,11 @@ extern bool
xen_xspolicy_get_resource_label(xen_session *session, char **label,
char *resource);
+/**
+ * Check whether a VM with the given VM-label could run.
+ */
+extern bool
+xen_xspolicy_can_run(xen_session *session, int64_t *result,
+ char *security_label);
+
#endif
diff --git a/tools/libxen/src/xen_xspolicy.c b/tools/libxen/src/xen_xspolicy.c
index e5f290c1a1..fe0dc255a5 100644
--- a/tools/libxen/src/xen_xspolicy.c
+++ b/tools/libxen/src/xen_xspolicy.c
@@ -343,3 +343,21 @@ xen_xspolicy_activate_xspolicy(xen_session *session,
XEN_CALL_("XSPolicy.activate_xspolicy");
return session->ok;
}
+
+
+bool
+xen_xspolicy_can_run(xen_session *session, int64_t *result,
+ char *security_label)
+{
+ abstract_value param_values[] =
+ {
+ { .type = &abstract_type_string,
+ .u.string_val = security_label }
+ };
+
+ abstract_type result_type = abstract_type_int;
+
+ *result = 0;
+ XEN_CALL_("XSPolicy.can_run");
+ return session->ok;
+}
diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub
index 51a4220647..f4aec1c500 100644
--- a/tools/pygrub/src/pygrub
+++ b/tools/pygrub/src/pygrub
@@ -240,10 +240,10 @@ class Grub:
if y > self.start_image + maxy:
break
if y == self.selected_image:
- attr = curses.A_REVERSE
- else:
- attr = 0
- self.entry_win.addstr(y + 1 - self.start_image, 2, i.title.ljust(70), attr)
+ self.entry_win.attron(curses.A_REVERSE)
+ self.entry_win.addstr(y + 1 - self.start_image, 2, i.title.ljust(70))
+ if y == self.selected_image:
+ self.entry_win.attroff(curses.A_REVERSE)
self.entry_win.refresh()
def edit_entry(self, origimg):
@@ -269,16 +269,17 @@ class Grub:
self.entry_win.box()
for idx in range(1, len(img.lines)):
# current line should be highlighted
- attr = 0
if idx == curline:
- attr = curses.A_REVERSE
+ self.entry_win.attron(curses.A_REVERSE)
# trim the line
l = img.lines[idx].ljust(70)
if len(l) > 70:
l = l[:69] + ">"
- self.entry_win.addstr(idx, 2, l, attr)
+ self.entry_win.addstr(idx, 2, l)
+ if idx == curline:
+ self.entry_win.attroff(curses.A_REVERSE)
self.entry_win.refresh()
c = self.screen.getch()
diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c
index 6bc29d8537..1b23361142 100644
--- a/tools/python/xen/lowlevel/xc/xc.c
+++ b/tools/python/xen/lowlevel/xc/xc.c
@@ -762,11 +762,12 @@ static PyObject *pyxc_physinfo(XcObject *self)
{
#define MAX_CPU_ID 255
xc_physinfo_t info;
- char cpu_cap[128], *p=cpu_cap, *q=cpu_cap;
+ char cpu_cap[128], virt_caps[128], *p;
int i, j, max_cpu_id;
uint64_t free_heap;
PyObject *ret_obj, *node_to_cpu_obj, *node_to_memory_obj;
xc_cpu_to_node_t map[MAX_CPU_ID + 1];
+ const char *virtcap_names[] = { "hvm", "hvm_directio" };
set_xen_guest_handle(info.cpu_to_node, map);
info.max_cpu_id = MAX_CPU_ID;
@@ -774,17 +775,21 @@ static PyObject *pyxc_physinfo(XcObject *self)
if ( xc_physinfo(self->xc_handle, &info) != 0 )
return pyxc_error_to_exception();
- *q = 0;
+ p = cpu_cap;
+ *p = '\0';
for ( i = 0; i < sizeof(info.hw_cap)/4; i++ )
- {
p += sprintf(p, "%08x:", info.hw_cap[i]);
- if ( info.hw_cap[i] )
- q = p;
- }
- if ( q > cpu_cap )
- *(q-1) = 0;
+ *(p-1) = 0;
+
+ p = virt_caps;
+ *p = '\0';
+ for ( i = 0; i < 2; i++ )
+ if ( (info.capabilities >> i) & 1 )
+ p += sprintf(p, "%s ", virtcap_names[i]);
+ if ( p != virt_caps )
+ *(p-1) = '\0';
- ret_obj = Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}",
+ ret_obj = Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s:s:s}",
"nr_nodes", info.nr_nodes,
"max_cpu_id", info.max_cpu_id,
"threads_per_core", info.threads_per_core,
@@ -794,7 +799,8 @@ static PyObject *pyxc_physinfo(XcObject *self)
"free_memory", pages_to_kib(info.free_pages),
"scrub_memory", pages_to_kib(info.scrub_pages),
"cpu_khz", info.cpu_khz,
- "hw_caps", cpu_cap);
+ "hw_caps", cpu_cap,
+ "virt_caps", virt_caps);
max_cpu_id = info.max_cpu_id;
if ( max_cpu_id > MAX_CPU_ID )
diff --git a/tools/python/xen/util/xsconstants.py b/tools/python/xen/util/xsconstants.py
index 856ef43aca..730d66fbf5 100644
--- a/tools/python/xen/util/xsconstants.py
+++ b/tools/python/xen/util/xsconstants.py
@@ -57,7 +57,9 @@ XSERR_POLICY_NOT_LOADED = 22 + XSERR_BASE
XSERR_RESOURCE_ACCESS = 23 + XSERR_BASE
XSERR_HV_OP_FAILED = 24 + XSERR_BASE
XSERR_BOOTPOLICY_INSTALL_ERROR = 25 + XSERR_BASE
-XSERR_LAST = 25 + XSERR_BASE ## KEEP LAST
+XSERR_VM_NOT_AUTHORIZED = 26 + XSERR_BASE
+XSERR_VM_IN_CONFLICT = 27 + XSERR_BASE
+XSERR_LAST = 27 + XSERR_BASE ## KEEP LAST
XSERR_MESSAGES = [
'',
@@ -85,7 +87,9 @@ XSERR_MESSAGES = [
'The policy is not loaded',
'Error accessing resource',
'Operation failed in hypervisor',
- 'Boot policy installation error'
+ 'Boot policy installation error',
+ 'VM is not authorized to run',
+ 'VM label conflicts with another VM'
]
def xserr2string(err):
diff --git a/tools/python/xen/util/xsm/acm/acm.py b/tools/python/xen/util/xsm/acm/acm.py
index 98b6ec1312..e8de0fa60c 100644
--- a/tools/python/xen/util/xsm/acm/acm.py
+++ b/tools/python/xen/util/xsm/acm/acm.py
@@ -68,6 +68,7 @@ policy_name_re = re.compile(".*[chwall|ste|chwall_ste].*", re.IGNORECASE)
#decision hooks known to the hypervisor
ACMHOOK_sharing = 1
ACMHOOK_authorization = 2
+ACMHOOK_conflictset = 3
#other global variables
NULL_SSIDREF = 0
@@ -373,7 +374,7 @@ def label2ssidref(labelname, policyname, typ):
else:
return (sec_ssid[0] << 16) | pri_ssid[0]
finally:
- mapfile_unlock()
+ mapfile_unlock()
def refresh_ssidref(config):
@@ -552,6 +553,18 @@ def hv_get_policy():
return rc, bin_pol
+def is_in_conflict(ssidref):
+ """ Check whether the given ssidref is in conflict with any running
+ domain.
+ """
+ decision = acm.getdecision('ssidref', str(ssidref),
+ 'ssidref', str(ssidref),
+ ACMHOOK_conflictset)
+ if decision == "DENIED":
+ return True
+ return False
+
+
def set_policy(xs_type, xml, flags, overwrite):
"""
Xend exports this function via XML-RPC
@@ -1550,6 +1563,33 @@ def get_security_label(self, xspol=None):
return label
+def check_can_run(sec_label):
+ """ Check whether a VM could run, given its vm label. A VM can run if
+ - it is authorized
+ - is not in conflict with any running domain
+ """
+ try:
+ mapfile_lock()
+
+ if sec_label == None or sec_label == "":
+ vm_label = ACM_LABEL_UNLABELED
+ else:
+ poltype, policy, vm_label = sec_label.split(':')
+ if policy != get_active_policy_name():
+ return -xsconstants.XSERR_BAD_POLICY_NAME
+ ssidref = label2ssidref(vm_label, policy, 'dom')
+ if ssidref != xsconstants.INVALID_SSIDREF:
+ if not has_authorization(ssidref):
+ return -xsconstants.XSERR_VM_NOT_AUTHORIZED
+ if is_in_conflict(ssidref):
+ return -xsconstants.XSERR_VM_IN_CONFLICT
+ return -xsconstants.XSERR_SUCCESS
+ else:
+ return -xsconstants.XSERR_BAD_LABEL
+ finally:
+ mapfile_unlock()
+
+
__cond = threading.Condition()
__script_runner = None
__orders = []
diff --git a/tools/python/xen/xend/XendBootloader.py b/tools/python/xen/xend/XendBootloader.py
index 9c5578f36b..60e876140d 100644
--- a/tools/python/xen/xend/XendBootloader.py
+++ b/tools/python/xen/xend/XendBootloader.py
@@ -12,7 +12,7 @@
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
-import os, select, errno, stat, signal
+import os, select, errno, stat, signal, tty
import random
import shlex
from xen.xend import sxp
@@ -43,6 +43,9 @@ def bootloader(blexec, disk, dom, quiet = False, blargs = '', kernel = '',
log.error(msg)
raise VmError(msg)
+ if os.uname()[0] == "NetBSD" and disk.startswith('/dev/'):
+ disk = disk.replace("/dev/", "/dev/r")
+
mkdir.parents("/var/run/xend/boot/", stat.S_IRWXU)
while True:
@@ -63,12 +66,8 @@ def bootloader(blexec, disk, dom, quiet = False, blargs = '', kernel = '',
# where we copy characters between the two master fds, as well as
# listening on the bootloader's fifo for the results.
- # Termios runes for very raw access to the pty master fds.
- attr = [ 0, 0, termios.CS8 | termios.CREAD | termios.CLOCAL,
- 0, 0, 0, [0] * 32 ]
-
(m1, s1) = pty.openpty()
- termios.tcsetattr(m1, termios.TCSANOW, attr)
+ tty.setraw(m1);
fcntl.fcntl(m1, fcntl.F_SETFL, os.O_NDELAY);
os.close(s1)
slavename = ptsname.ptsname(m1)
@@ -109,7 +108,7 @@ def bootloader(blexec, disk, dom, quiet = False, blargs = '', kernel = '',
# record that this domain is bootloading
dom.bootloader_pid = child
- termios.tcsetattr(m2, termios.TCSANOW, attr)
+ tty.setraw(m2);
fcntl.fcntl(m2, fcntl.F_SETFL, os.O_NDELAY);
while True:
try:
diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py
index 46e738c478..5bfc9fa185 100644
--- a/tools/python/xen/xend/XendDomainInfo.py
+++ b/tools/python/xen/xend/XendDomainInfo.py
@@ -2047,11 +2047,10 @@ class XendDomainInfo:
try:
if self.info['platform'].get('localtime', 0):
- t = time.time()
- loc = time.localtime(t)
- utc = time.gmtime(t)
- timeoffset = int(time.mktime(loc) - time.mktime(utc))
- self.info['platform']['rtc_timeoffset'] = timeoffset
+ if time.localtime(time.time())[8]:
+ self.info['platform']['rtc_timeoffset'] = -time.altzone
+ else:
+ self.info['platform']['rtc_timeoffset'] = -time.timezone
self.image = image.create(self, self.info)
diff --git a/tools/python/xen/xend/XendNode.py b/tools/python/xen/xend/XendNode.py
index 26db7f2966..f394411a9c 100644
--- a/tools/python/xen/xend/XendNode.py
+++ b/tools/python/xen/xend/XendNode.py
@@ -92,6 +92,7 @@ class XendNode:
physinfo = self.physinfo_dict()
cpu_count = physinfo['nr_cpus']
cpu_features = physinfo['hw_caps']
+ virt_caps = physinfo['virt_caps']
# If the number of CPUs don't match, we should just reinitialise
# the CPU UUIDs.
@@ -112,6 +113,7 @@ class XendNode:
self.cpus[u].update(
{ 'host' : self.uuid,
'features' : cpu_features,
+ 'virt_caps': virt_caps,
'speed' : int(float(cpuinfo[number]['cpu MHz'])),
'vendor' : cpuinfo[number]['vendor_id'],
'modelname': cpuinfo[number]['model name'],
@@ -605,6 +607,7 @@ class XendNode:
'threads_per_core',
'cpu_mhz',
'hw_caps',
+ 'virt_caps',
'total_memory',
'free_memory',
'node_to_cpu',
diff --git a/tools/python/xen/xend/XendXSPolicy.py b/tools/python/xen/xend/XendXSPolicy.py
index dff029ddde..0b6d5bc388 100644
--- a/tools/python/xen/xend/XendXSPolicy.py
+++ b/tools/python/xen/xend/XendXSPolicy.py
@@ -48,7 +48,8 @@ class XendXSPolicy(XendBase):
'rm_xsbootpolicy',
'get_resource_label',
'set_resource_label',
- 'get_labeled_resources' ]
+ 'get_labeled_resources',
+ 'can_run' ]
return XendBase.getFuncs() + funcs
getClass = classmethod(getClass)
@@ -190,6 +191,12 @@ class XendXSPolicy(XendBase):
res = security.get_resource_label_xapi(resource)
return res
+ def can_run(self, sec_label):
+ irc = security.validate_label_xapi(sec_label, 'dom')
+ if irc != xsconstants.XSERR_SUCCESS:
+ raise SecurityError(irc)
+ return security.check_can_run(sec_label)
+
get_xstype = classmethod(get_xstype)
get_xspolicy = classmethod(get_xspolicy)
set_xspolicy = classmethod(set_xspolicy)
@@ -198,6 +205,7 @@ class XendXSPolicy(XendBase):
set_resource_label = classmethod(set_resource_label)
get_resource_label = classmethod(get_resource_label)
get_labeled_resources = classmethod(get_labeled_resources)
+ can_run = classmethod(can_run)
class XendACMPolicy(XendXSPolicy):
diff --git a/tools/python/xen/xend/server/vfbif.py b/tools/python/xen/xend/server/vfbif.py
index a79f50f019..6f049d3c13 100644
--- a/tools/python/xen/xend/server/vfbif.py
+++ b/tools/python/xen/xend/server/vfbif.py
@@ -6,7 +6,7 @@ import xen.xend
import os
CONFIG_ENTRIES = ['type', 'vncdisplay', 'vnclisten', 'vncpasswd', 'vncunused',
- 'display', 'xauthority', 'keymap',
+ 'videoram', 'display', 'xauthority', 'keymap',
'uuid', 'location', 'protocol', 'opengl']
class VfbifController(DevController):
diff --git a/tools/python/xen/xm/XenAPI.py b/tools/python/xen/xm/XenAPI.py
index a3717d70f2..dd38f37902 100644
--- a/tools/python/xen/xm/XenAPI.py
+++ b/tools/python/xen/xm/XenAPI.py
@@ -64,6 +64,7 @@ errormap = {
"HANDLE_INVALID": N_("The %(1)s handle %(2)s is invalid."),
"OPERATION_NOT_ALLOWED": N_("You attempted an operation that was not allowed."),
"NETWORK_ALREADY_CONNECTED": N_("The network you specified already has a PIF attached to it, and so another one may not be attached."),
+ "SECURITY_ERROR": N_("%(2)s"),
}
translation = gettext.translation('xen-xm', fallback = True)
diff --git a/tools/python/xen/xm/create.py b/tools/python/xen/xm/create.py
index a4e756825f..66c19afb5c 100644
--- a/tools/python/xen/xm/create.py
+++ b/tools/python/xen/xm/create.py
@@ -500,6 +500,11 @@ gopts.var('vncunused', val='',
use="""Try to find an unused port for the VNC server.
Only valid when vnc=1.""")
+gopts.var('videoram', val='',
+ fn=set_value, default=None,
+ use="""Maximum amount of videoram PV guest can allocate
+ for frame buffer.""")
+
gopts.var('sdl', val='',
fn=set_value, default=None,
use="""Should the device model use SDL?""")
@@ -645,7 +650,8 @@ def configure_vfbs(config_devs, vals):
d['type'] = 'sdl'
for (k,v) in d.iteritems():
if not k in [ 'vnclisten', 'vncunused', 'vncdisplay', 'display',
- 'xauthority', 'type', 'vncpasswd', 'opengl' ]:
+ 'videoram', 'xauthority', 'type', 'vncpasswd',
+ 'opengl' ]:
err("configuration option %s unknown to vfbs" % k)
config.append([k,v])
if not d.has_key("keymap"):
diff --git a/tools/python/xen/xm/messages/xen-xm.pot b/tools/python/xen/xm/messages/xen-xm.pot
index 0a89ea9b27..a600a69f0d 100644
--- a/tools/python/xen/xm/messages/xen-xm.pot
+++ b/tools/python/xen/xm/messages/xen-xm.pot
@@ -8,7 +8,7 @@ msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2007-04-05 14:17-0400\n"
+"POT-Creation-Date: 2008-03-31 17:40+0100\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
@@ -61,3 +61,8 @@ msgid ""
"The network you specified already has a PIF attached to it, and so another "
"one may not be attached."
msgstr ""
+
+#: xen/xm/XenAPI.py:67
+#, python-format
+msgid "%(2)s"
+msgstr ""
diff --git a/tools/tests/Makefile b/tools/tests/Makefile
index 45d4294f9a..cf32ef91df 100644
--- a/tools/tests/Makefile
+++ b/tools/tests/Makefile
@@ -21,13 +21,17 @@ $(TARGET): x86_emulate.o test_x86_emulator.o
.PHONY: clean
clean:
- rm -rf $(TARGET) *.o *~ core blowfish.h blowfish.bin
+ rm -rf $(TARGET) *.o *~ core blowfish.h blowfish.bin x86_emulate
.PHONY: install
install:
-x86_emulate.o: $(XEN_ROOT)/xen/arch/x86/x86_emulate.c
+.PHONY: x86_emulate
+x86_emulate:
+ [ -L x86_emulate ] || ln -sf $(XEN_ROOT)/xen/arch/x86/x86_emulate .
+
+x86_emulate.o: x86_emulate.c x86_emulate
$(HOSTCC) $(HOSTCFLAGS) -I$(XEN_ROOT)/xen/include -c -o $@ $<
-test_x86_emulator.o: test_x86_emulator.c blowfish.h
+test_x86_emulator.o: test_x86_emulator.c blowfish.h x86_emulate
$(HOSTCC) $(HOSTCFLAGS) -I$(XEN_ROOT)/xen/include -c -o $@ $<
diff --git a/tools/tests/test_x86_emulator.c b/tools/tests/test_x86_emulator.c
index fe48921b59..4ad5677dd4 100644
--- a/tools/tests/test_x86_emulator.c
+++ b/tools/tests/test_x86_emulator.c
@@ -1,20 +1,11 @@
-
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
-typedef uint8_t u8;
-typedef uint16_t u16;
-typedef uint32_t u32;
-typedef uint64_t u64;
-typedef int8_t s8;
-typedef int16_t s16;
-typedef int32_t s32;
-typedef int64_t s64;
#include <public/xen.h>
-#include <asm-x86/x86_emulate.h>
#include <sys/mman.h>
+#include "x86_emulate/x86_emulate.h"
#include "blowfish.h"
#define MMAP_SZ 16384
@@ -38,9 +29,9 @@ static int read(
unsigned long addr = offset;
switch ( bytes )
{
- case 1: *val = *(u8 *)addr; break;
- case 2: *val = *(u16 *)addr; break;
- case 4: *val = *(u32 *)addr; break;
+ case 1: *val = *(uint8_t *)addr; break;
+ case 2: *val = *(uint16_t *)addr; break;
+ case 4: *val = *(uint32_t *)addr; break;
case 8: *val = *(unsigned long *)addr; break;
}
return X86EMUL_OKAY;
@@ -56,9 +47,9 @@ static int write(
unsigned long addr = offset;
switch ( bytes )
{
- case 1: *(u8 *)addr = (u8)val; break;
- case 2: *(u16 *)addr = (u16)val; break;
- case 4: *(u32 *)addr = (u32)val; break;
+ case 1: *(uint8_t *)addr = (uint8_t)val; break;
+ case 2: *(uint16_t *)addr = (uint16_t)val; break;
+ case 4: *(uint32_t *)addr = (uint32_t)val; break;
case 8: *(unsigned long *)addr = val; break;
}
return X86EMUL_OKAY;
@@ -75,9 +66,9 @@ static int cmpxchg(
unsigned long addr = offset;
switch ( bytes )
{
- case 1: *(u8 *)addr = (u8)new; break;
- case 2: *(u16 *)addr = (u16)new; break;
- case 4: *(u32 *)addr = (u32)new; break;
+ case 1: *(uint8_t *)addr = (uint8_t)new; break;
+ case 2: *(uint16_t *)addr = (uint16_t)new; break;
+ case 4: *(uint32_t *)addr = (uint32_t)new; break;
case 8: *(unsigned long *)addr = new; break;
}
return X86EMUL_OKAY;
diff --git a/tools/tests/x86_emulate.c b/tools/tests/x86_emulate.c
new file mode 100644
index 0000000000..d58f65a38e
--- /dev/null
+++ b/tools/tests/x86_emulate.c
@@ -0,0 +1,13 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <public/xen.h>
+
+#include "x86_emulate/x86_emulate.h"
+
+#define __emulate_fpu_insn(_op) \
+do{ rc = X86EMUL_UNHANDLEABLE; \
+ goto done; \
+} while (0)
+
+#include "x86_emulate/x86_emulate.c"
diff --git a/xen/arch/ia64/xen/dom0_ops.c b/xen/arch/ia64/xen/dom0_ops.c
index 1f42acc1c3..af4e6b555f 100644
--- a/xen/arch/ia64/xen/dom0_ops.c
+++ b/xen/arch/ia64/xen/dom0_ops.c
@@ -410,6 +410,7 @@ long arch_do_sysctl(xen_sysctl_t *op, XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl)
xen_sysctl_physinfo_t *pi = &op->u.physinfo;
+ memset(pi, 0, sizeof(*pi));
pi->threads_per_core = cpus_weight(cpu_sibling_map[0]);
pi->cores_per_socket =
cpus_weight(cpu_core_map[0]) / pi->threads_per_core;
@@ -419,7 +420,6 @@ long arch_do_sysctl(xen_sysctl_t *op, XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl)
pi->free_pages = avail_domheap_pages();
pi->scrub_pages = avail_scrub_pages();
pi->cpu_khz = local_cpu_data->proc_freq / 1000;
- memset(pi->hw_cap, 0, sizeof(pi->hw_cap));
max_array_ent = pi->max_cpu_id;
pi->max_cpu_id = last_cpu(cpu_online_map);
diff --git a/xen/arch/ia64/xen/dom_fw_common.c b/xen/arch/ia64/xen/dom_fw_common.c
index 5d9864e4a0..0460c483f3 100644
--- a/xen/arch/ia64/xen/dom_fw_common.c
+++ b/xen/arch/ia64/xen/dom_fw_common.c
@@ -20,7 +20,7 @@
#include <assert.h>
#include <inttypes.h>
-#include <xen/arch-ia64.h>
+#include <xen/xen.h>
#include <asm/bundle.h>
#include "xg_private.h"
diff --git a/xen/arch/ia64/xen/dom_fw_domu.c b/xen/arch/ia64/xen/dom_fw_domu.c
index fe8b346fee..87b3fe52c3 100644
--- a/xen/arch/ia64/xen/dom_fw_domu.c
+++ b/xen/arch/ia64/xen/dom_fw_domu.c
@@ -37,7 +37,7 @@
#include <errno.h>
#include <inttypes.h>
-#include <xen/arch-ia64.h>
+#include <xen/xen.h>
#include "xg_private.h"
#include "xc_dom.h"
diff --git a/xen/arch/powerpc/sysctl.c b/xen/arch/powerpc/sysctl.c
index b211d4f9f7..24db2a9141 100644
--- a/xen/arch/powerpc/sysctl.c
+++ b/xen/arch/powerpc/sysctl.c
@@ -41,6 +41,7 @@ long arch_do_sysctl(struct xen_sysctl *sysctl,
{
xen_sysctl_physinfo_t *pi = &sysctl->u.physinfo;
+ memset(pi, 0, sizeof(*pi));
pi->threads_per_core =
cpus_weight(cpu_sibling_map[0]);
pi->cores_per_socket =
@@ -50,10 +51,7 @@ long arch_do_sysctl(struct xen_sysctl *sysctl,
pi->total_pages = total_pages;
pi->free_pages = avail_domheap_pages();
pi->cpu_khz = cpu_khz;
- memset(pi->hw_cap, 0, sizeof(pi->hw_cap));
- ret = 0;
- if ( copy_to_guest(u_sysctl, sysctl, 1) )
- ret = -EFAULT;
+ ret = copy_to_guest(u_sysctl, sysctl, 1) ? -EFAULT : 0;
}
break;
diff --git a/xen/arch/x86/boot/trampoline.S b/xen/arch/x86/boot/trampoline.S
index 143612c91f..af4de553a8 100644
--- a/xen/arch/x86/boot/trampoline.S
+++ b/xen/arch/x86/boot/trampoline.S
@@ -156,9 +156,12 @@ trampoline_boot_cpu_entry:
sti
#if defined(__x86_64__)
- /* Declare that our target operating mode is long mode. */
- movw $0xec00,%ax # declare target operating mode
- movw $0x0002,%bx # long mode
+ /*
+ * Declare that our target operating mode is long mode.
+ * Initialise 32-bit registers since some buggy BIOSes depend on it.
+ */
+ movl $0xec00,%eax # declare target operating mode
+ movl $0x0002,%ebx # long mode
int $0x15
#endif
diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index 9c1a2daeb6..57065f7625 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -19,23 +19,93 @@
#include <asm/hvm/hvm.h>
#include <asm/hvm/support.h>
+static int hvmemul_do_io(
+ int is_mmio, paddr_t addr, unsigned long count, int size,
+ paddr_t value, int dir, int df, int value_is_ptr, unsigned long *val)
+{
+ struct vcpu *curr = current;
+ vcpu_iodata_t *vio = get_ioreq(curr);
+ ioreq_t *p = &vio->vp_ioreq;
+
+ switch ( curr->arch.hvm_vcpu.io_state )
+ {
+ case HVMIO_none:
+ break;
+ case HVMIO_completed:
+ curr->arch.hvm_vcpu.io_state = HVMIO_none;
+ if ( val == NULL )
+ return X86EMUL_UNHANDLEABLE;
+ *val = curr->arch.hvm_vcpu.io_data;
+ return X86EMUL_OKAY;
+ default:
+ return X86EMUL_UNHANDLEABLE;
+ }
+
+ curr->arch.hvm_vcpu.io_state =
+ (val == NULL) ? HVMIO_dispatched : HVMIO_awaiting_completion;
+
+ if ( p->state != STATE_IOREQ_NONE )
+ gdprintk(XENLOG_WARNING, "WARNING: io already pending (%d)?\n",
+ p->state);
+
+ p->dir = dir;
+ p->data_is_ptr = value_is_ptr;
+ p->type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO;
+ p->size = size;
+ p->addr = addr;
+ p->count = count;
+ p->df = df;
+ p->data = value;
+ p->io_count++;
+
+ if ( is_mmio
+ ? (hvm_mmio_intercept(p) || hvm_buffered_io_intercept(p))
+ : hvm_portio_intercept(p) )
+ {
+ p->state = STATE_IORESP_READY;
+ hvm_io_assist();
+ if ( val != NULL )
+ *val = curr->arch.hvm_vcpu.io_data;
+ curr->arch.hvm_vcpu.io_state = HVMIO_none;
+ return X86EMUL_OKAY;
+ }
+
+ hvm_send_assist_req(curr);
+ return (val != NULL) ? X86EMUL_RETRY : X86EMUL_OKAY;
+}
+
+static int hvmemul_do_pio(
+ unsigned long port, unsigned long count, int size,
+ paddr_t value, int dir, int df, int value_is_ptr, unsigned long *val)
+{
+ return hvmemul_do_io(0, port, count, size, value,
+ dir, df, value_is_ptr, val);
+}
+
+static int hvmemul_do_mmio(
+ paddr_t gpa, unsigned long count, int size,
+ paddr_t value, int dir, int df, int value_is_ptr, unsigned long *val)
+{
+ return hvmemul_do_io(1, gpa, count, size, value,
+ dir, df, value_is_ptr, val);
+}
+
/*
* Convert addr from linear to physical form, valid over the range
* [addr, addr + *reps * bytes_per_rep]. *reps is adjusted according to
* the valid computed range. It is always >0 when X86EMUL_OKAY is returned.
+ * @pfec indicates the access checks to be performed during page-table walks.
*/
static int hvmemul_linear_to_phys(
unsigned long addr,
paddr_t *paddr,
unsigned int bytes_per_rep,
unsigned long *reps,
- enum hvm_access_type access_type,
+ uint32_t pfec,
struct hvm_emulate_ctxt *hvmemul_ctxt)
{
struct vcpu *curr = current;
unsigned long pfn, npfn, done, todo, i;
- struct segment_register *sreg;
- uint32_t pfec;
/* Clip repetitions to a sensible maximum. */
*reps = min_t(unsigned long, *reps, 4096);
@@ -49,14 +119,6 @@ static int hvmemul_linear_to_phys(
*paddr = addr & ~PAGE_MASK;
- /* Gather access-type information for the page walks. */
- sreg = hvmemul_get_seg_reg(x86_seg_ss, hvmemul_ctxt);
- pfec = PFEC_page_present;
- if ( sreg->attr.fields.dpl == 3 )
- pfec |= PFEC_user_mode;
- if ( access_type == hvm_access_write )
- pfec |= PFEC_write_access;
-
/* Get the first PFN in the range. */
if ( (pfn = paging_gva_to_gfn(curr, addr, &pfec)) == INVALID_GFN )
{
@@ -74,18 +136,19 @@ static int hvmemul_linear_to_phys(
for ( i = 1; done < todo; i++ )
{
/* Get the next PFN in the range. */
- if ( (npfn = paging_gva_to_gfn(curr, addr, &pfec)) == INVALID_GFN )
- {
- hvm_inject_exception(TRAP_page_fault, pfec, addr);
- return X86EMUL_EXCEPTION;
- }
+ npfn = paging_gva_to_gfn(curr, addr, &pfec);
/* Is it contiguous with the preceding PFNs? If not then we're done. */
- if ( npfn != (pfn + i) )
+ if ( (npfn == INVALID_GFN) || (npfn != (pfn + i)) )
{
done /= bytes_per_rep;
if ( done == 0 )
- return X86EMUL_UNHANDLEABLE;
+ {
+ if ( npfn != INVALID_GFN )
+ return X86EMUL_UNHANDLEABLE;
+ hvm_inject_exception(TRAP_page_fault, pfec, addr);
+ return X86EMUL_EXCEPTION;
+ }
*reps = done;
break;
}
@@ -142,7 +205,10 @@ static int __hvmemul_read(
enum hvm_access_type access_type,
struct hvm_emulate_ctxt *hvmemul_ctxt)
{
+ struct vcpu *curr = current;
unsigned long addr;
+ uint32_t pfec = PFEC_page_present;
+ paddr_t gpa;
int rc;
rc = hvmemul_virtual_to_linear(
@@ -152,41 +218,40 @@ static int __hvmemul_read(
*val = 0;
+ if ( unlikely(curr->arch.hvm_vcpu.mmio_gva == (addr & PAGE_MASK)) &&
+ curr->arch.hvm_vcpu.mmio_gva )
+ {
+ unsigned int off = addr & (PAGE_SIZE - 1);
+ if ( access_type == hvm_access_insn_fetch )
+ return X86EMUL_UNHANDLEABLE;
+ gpa = (((paddr_t)curr->arch.hvm_vcpu.mmio_gpfn << PAGE_SHIFT) | off);
+ if ( (off + bytes) <= PAGE_SIZE )
+ return hvmemul_do_mmio(gpa, 1, bytes, 0, IOREQ_READ, 0, 0, val);
+ }
+
+ if ( (seg != x86_seg_none) &&
+ (hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3) )
+ pfec |= PFEC_user_mode;
+
rc = ((access_type == hvm_access_insn_fetch) ?
- hvm_fetch_from_guest_virt(val, addr, bytes) :
- hvm_copy_from_guest_virt(val, addr, bytes));
+ hvm_fetch_from_guest_virt(val, addr, bytes, pfec) :
+ hvm_copy_from_guest_virt(val, addr, bytes, pfec));
if ( rc == HVMCOPY_bad_gva_to_gfn )
return X86EMUL_EXCEPTION;
if ( rc == HVMCOPY_bad_gfn_to_mfn )
{
- struct vcpu *curr = current;
unsigned long reps = 1;
- paddr_t gpa;
if ( access_type == hvm_access_insn_fetch )
return X86EMUL_UNHANDLEABLE;
rc = hvmemul_linear_to_phys(
- addr, &gpa, bytes, &reps, access_type, hvmemul_ctxt);
+ addr, &gpa, bytes, &reps, pfec, hvmemul_ctxt);
if ( rc != X86EMUL_OKAY )
return rc;
- if ( curr->arch.hvm_vcpu.io_in_progress )
- return X86EMUL_UNHANDLEABLE;
-
- if ( !curr->arch.hvm_vcpu.io_completed )
- {
- curr->arch.hvm_vcpu.io_in_progress = 1;
- send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, bytes,
- 0, IOREQ_READ, 0, 0);
- }
-
- if ( !curr->arch.hvm_vcpu.io_completed )
- return X86EMUL_RETRY;
-
- *val = curr->arch.hvm_vcpu.io_data;
- curr->arch.hvm_vcpu.io_completed = 0;
+ return hvmemul_do_mmio(gpa, 1, bytes, 0, IOREQ_READ, 0, 0, val);
}
return X86EMUL_OKAY;
@@ -236,7 +301,10 @@ static int hvmemul_write(
{
struct hvm_emulate_ctxt *hvmemul_ctxt =
container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+ struct vcpu *curr = current;
unsigned long addr;
+ uint32_t pfec = PFEC_page_present | PFEC_write_access;
+ paddr_t gpa;
int rc;
rc = hvmemul_virtual_to_linear(
@@ -244,27 +312,34 @@ static int hvmemul_write(
if ( rc != X86EMUL_OKAY )
return rc;
- rc = hvm_copy_to_guest_virt(addr, &val, bytes);
+ if ( unlikely(curr->arch.hvm_vcpu.mmio_gva == (addr & PAGE_MASK)) &&
+ curr->arch.hvm_vcpu.mmio_gva )
+ {
+ unsigned int off = addr & (PAGE_SIZE - 1);
+ gpa = (((paddr_t)curr->arch.hvm_vcpu.mmio_gpfn << PAGE_SHIFT) | off);
+ if ( (off + bytes) <= PAGE_SIZE )
+ return hvmemul_do_mmio(gpa, 1, bytes, val, IOREQ_WRITE,
+ 0, 0, NULL);
+ }
+
+ if ( (seg != x86_seg_none) &&
+ (hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3) )
+ pfec |= PFEC_user_mode;
+
+ rc = hvm_copy_to_guest_virt(addr, &val, bytes, pfec);
if ( rc == HVMCOPY_bad_gva_to_gfn )
return X86EMUL_EXCEPTION;
if ( rc == HVMCOPY_bad_gfn_to_mfn )
{
- struct vcpu *curr = current;
unsigned long reps = 1;
- paddr_t gpa;
rc = hvmemul_linear_to_phys(
- addr, &gpa, bytes, &reps, hvm_access_write, hvmemul_ctxt);
+ addr, &gpa, bytes, &reps, pfec, hvmemul_ctxt);
if ( rc != X86EMUL_OKAY )
return rc;
- if ( curr->arch.hvm_vcpu.io_in_progress )
- return X86EMUL_UNHANDLEABLE;
-
- curr->arch.hvm_vcpu.io_in_progress = 1;
- send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, bytes,
- val, IOREQ_WRITE, 0, 0);
+ return hvmemul_do_mmio(gpa, 1, bytes, val, IOREQ_WRITE, 0, 0, NULL);
}
return X86EMUL_OKAY;
@@ -292,8 +367,8 @@ static int hvmemul_rep_ins(
{
struct hvm_emulate_ctxt *hvmemul_ctxt =
container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
- struct vcpu *curr = current;
unsigned long addr;
+ uint32_t pfec = PFEC_page_present | PFEC_write_access;
paddr_t gpa;
int rc;
@@ -303,19 +378,16 @@ static int hvmemul_rep_ins(
if ( rc != X86EMUL_OKAY )
return rc;
+ if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
+ pfec |= PFEC_user_mode;
+
rc = hvmemul_linear_to_phys(
- addr, &gpa, bytes_per_rep, reps, hvm_access_write, hvmemul_ctxt);
+ addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
if ( rc != X86EMUL_OKAY )
return rc;
- if ( curr->arch.hvm_vcpu.io_in_progress )
- return X86EMUL_UNHANDLEABLE;
-
- curr->arch.hvm_vcpu.io_in_progress = 1;
- send_pio_req(src_port, *reps, bytes_per_rep, gpa, IOREQ_READ,
- !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1);
-
- return X86EMUL_OKAY;
+ return hvmemul_do_pio(src_port, *reps, bytes_per_rep, gpa, IOREQ_READ,
+ !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1, NULL);
}
static int hvmemul_rep_outs(
@@ -328,8 +400,8 @@ static int hvmemul_rep_outs(
{
struct hvm_emulate_ctxt *hvmemul_ctxt =
container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
- struct vcpu *curr = current;
unsigned long addr;
+ uint32_t pfec = PFEC_page_present;
paddr_t gpa;
int rc;
@@ -339,20 +411,16 @@ static int hvmemul_rep_outs(
if ( rc != X86EMUL_OKAY )
return rc;
+ if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
+ pfec |= PFEC_user_mode;
+
rc = hvmemul_linear_to_phys(
- addr, &gpa, bytes_per_rep, reps, hvm_access_read, hvmemul_ctxt);
+ addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
if ( rc != X86EMUL_OKAY )
return rc;
- if ( curr->arch.hvm_vcpu.io_in_progress )
- return X86EMUL_UNHANDLEABLE;
-
- curr->arch.hvm_vcpu.io_in_progress = 1;
- send_pio_req(dst_port, *reps, bytes_per_rep,
- gpa, IOREQ_WRITE,
- !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1);
-
- return X86EMUL_OKAY;
+ return hvmemul_do_pio(dst_port, *reps, bytes_per_rep, gpa, IOREQ_WRITE,
+ !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1, NULL);
}
static int hvmemul_rep_movs(
@@ -366,9 +434,9 @@ static int hvmemul_rep_movs(
{
struct hvm_emulate_ctxt *hvmemul_ctxt =
container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
- struct vcpu *curr = current;
unsigned long saddr, daddr;
paddr_t sgpa, dgpa;
+ uint32_t pfec = PFEC_page_present;
p2m_type_t p2mt;
int rc;
@@ -384,39 +452,32 @@ static int hvmemul_rep_movs(
if ( rc != X86EMUL_OKAY )
return rc;
+ if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
+ pfec |= PFEC_user_mode;
+
rc = hvmemul_linear_to_phys(
- saddr, &sgpa, bytes_per_rep, reps, hvm_access_read, hvmemul_ctxt);
+ saddr, &sgpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
if ( rc != X86EMUL_OKAY )
return rc;
rc = hvmemul_linear_to_phys(
- daddr, &dgpa, bytes_per_rep, reps, hvm_access_write, hvmemul_ctxt);
+ daddr, &dgpa, bytes_per_rep, reps,
+ pfec | PFEC_write_access, hvmemul_ctxt);
if ( rc != X86EMUL_OKAY )
return rc;
- if ( curr->arch.hvm_vcpu.io_in_progress )
- return X86EMUL_UNHANDLEABLE;
-
(void)gfn_to_mfn_current(sgpa >> PAGE_SHIFT, &p2mt);
if ( !p2m_is_ram(p2mt) )
- {
- curr->arch.hvm_vcpu.io_in_progress = 1;
- send_mmio_req(IOREQ_TYPE_COPY, sgpa, *reps, bytes_per_rep,
- dgpa, IOREQ_READ,
- !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1);
- }
- else
- {
- (void)gfn_to_mfn_current(dgpa >> PAGE_SHIFT, &p2mt);
- if ( p2m_is_ram(p2mt) )
- return X86EMUL_UNHANDLEABLE;
- curr->arch.hvm_vcpu.io_in_progress = 1;
- send_mmio_req(IOREQ_TYPE_COPY, dgpa, *reps, bytes_per_rep,
- sgpa, IOREQ_WRITE,
- !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1);
- }
+ return hvmemul_do_mmio(
+ sgpa, *reps, bytes_per_rep, dgpa, IOREQ_READ,
+ !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1, NULL);
- return X86EMUL_OKAY;
+ (void)gfn_to_mfn_current(dgpa >> PAGE_SHIFT, &p2mt);
+ if ( p2m_is_ram(p2mt) )
+ return X86EMUL_UNHANDLEABLE;
+ return hvmemul_do_mmio(
+ dgpa, *reps, bytes_per_rep, sgpa, IOREQ_WRITE,
+ !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1, NULL);
}
static int hvmemul_read_segment(
@@ -452,24 +513,7 @@ static int hvmemul_read_io(
unsigned long *val,
struct x86_emulate_ctxt *ctxt)
{
- struct vcpu *curr = current;
-
- if ( curr->arch.hvm_vcpu.io_in_progress )
- return X86EMUL_UNHANDLEABLE;
-
- if ( !curr->arch.hvm_vcpu.io_completed )
- {
- curr->arch.hvm_vcpu.io_in_progress = 1;
- send_pio_req(port, 1, bytes, 0, IOREQ_READ, 0, 0);
- }
-
- if ( !curr->arch.hvm_vcpu.io_completed )
- return X86EMUL_RETRY;
-
- *val = curr->arch.hvm_vcpu.io_data;
- curr->arch.hvm_vcpu.io_completed = 0;
-
- return X86EMUL_OKAY;
+ return hvmemul_do_pio(port, 1, bytes, 0, IOREQ_READ, 0, 0, val);
}
static int hvmemul_write_io(
@@ -478,21 +522,7 @@ static int hvmemul_write_io(
unsigned long val,
struct x86_emulate_ctxt *ctxt)
{
- struct vcpu *curr = current;
-
- if ( port == 0xe9 )
- {
- hvm_print_line(curr, val);
- return X86EMUL_OKAY;
- }
-
- if ( curr->arch.hvm_vcpu.io_in_progress )
- return X86EMUL_UNHANDLEABLE;
-
- curr->arch.hvm_vcpu.io_in_progress = 1;
- send_pio_req(port, 1, bytes, val, IOREQ_WRITE, 0, 0);
-
- return X86EMUL_OKAY;
+ return hvmemul_do_pio(port, 1, bytes, val, IOREQ_WRITE, 0, 0, NULL);
}
static int hvmemul_read_cr(
@@ -674,7 +704,7 @@ int hvm_emulate_one(
{
struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs;
struct vcpu *curr = current;
- uint32_t new_intr_shadow;
+ uint32_t new_intr_shadow, pfec = PFEC_page_present;
unsigned long addr;
int rc;
@@ -691,6 +721,9 @@ int hvm_emulate_one(
hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.db ? 32 : 16;
}
+ if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
+ pfec |= PFEC_user_mode;
+
hvmemul_ctxt->insn_buf_eip = regs->eip;
hvmemul_ctxt->insn_buf_bytes =
(hvm_virtual_to_linear_addr(
@@ -698,7 +731,8 @@ int hvm_emulate_one(
regs->eip, sizeof(hvmemul_ctxt->insn_buf),
hvm_access_insn_fetch, hvmemul_ctxt->ctxt.addr_size, &addr) &&
!hvm_fetch_from_guest_virt_nofault(
- hvmemul_ctxt->insn_buf, addr, sizeof(hvmemul_ctxt->insn_buf)))
+ hvmemul_ctxt->insn_buf, addr,
+ sizeof(hvmemul_ctxt->insn_buf), pfec))
? sizeof(hvmemul_ctxt->insn_buf) : 0;
hvmemul_ctxt->exn_pending = 0;
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index f1aa4fed34..961bfbf354 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -81,6 +81,58 @@ void hvm_enable(struct hvm_function_table *fns)
printk("HVM: Hardware Assisted Paging detected.\n");
}
+/*
+ * Need to re-inject a given event? We avoid re-injecting software exceptions
+ * and interrupts because the faulting/trapping instruction can simply be
+ * re-executed (neither VMX nor SVM update RIP when they VMEXIT during
+ * INT3/INTO/INTn).
+ */
+int hvm_event_needs_reinjection(uint8_t type, uint8_t vector)
+{
+ switch ( type )
+ {
+ case X86_EVENTTYPE_EXT_INTR:
+ case X86_EVENTTYPE_NMI:
+ return 1;
+ case X86_EVENTTYPE_HW_EXCEPTION:
+ /*
+ * SVM uses type 3 ("HW Exception") for #OF and #BP. We explicitly
+ * check for these vectors, as they are really SW Exceptions. SVM has
+ * not updated RIP to point after the trapping instruction (INT3/INTO).
+ */
+ return (vector != 3) && (vector != 4);
+ default:
+ /* Software exceptions/interrupts can be re-executed (e.g., INT n). */
+ break;
+ }
+ return 0;
+}
+
+/*
+ * Combine two hardware exceptions: @vec2 was raised during delivery of @vec1.
+ * This means we can assume that @vec2 is contributory or a page fault.
+ */
+uint8_t hvm_combine_hw_exceptions(uint8_t vec1, uint8_t vec2)
+{
+ /* Exception during double-fault delivery always causes a triple fault. */
+ if ( vec1 == TRAP_double_fault )
+ {
+ hvm_triple_fault();
+ return TRAP_double_fault; /* dummy return */
+ }
+
+ /* Exception during page-fault delivery always causes a double fault. */
+ if ( vec1 == TRAP_page_fault )
+ return TRAP_double_fault;
+
+ /* Discard the first exception if it's benign or if we now have a #PF. */
+ if ( !((1u << vec1) & 0x7c01u) || (vec2 == TRAP_page_fault) )
+ return vec2;
+
+ /* Cannot combine the exceptions: double fault. */
+ return TRAP_double_fault;
+}
+
void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc)
{
u64 host_tsc;
@@ -203,6 +255,30 @@ static int hvm_set_ioreq_page(
return 0;
}
+static int hvm_print_line(
+ int dir, uint32_t port, uint32_t bytes, uint32_t *val)
+{
+ struct vcpu *curr = current;
+ struct hvm_domain *hd = &curr->domain->arch.hvm_domain;
+ char c = *val;
+
+ BUG_ON(bytes != 1);
+
+ spin_lock(&hd->pbuf_lock);
+ hd->pbuf[hd->pbuf_idx++] = c;
+ if ( (hd->pbuf_idx == (sizeof(hd->pbuf) - 2)) || (c == '\n') )
+ {
+ if ( c != '\n' )
+ hd->pbuf[hd->pbuf_idx++] = '\n';
+ hd->pbuf[hd->pbuf_idx] = '\0';
+ printk(XENLOG_G_DEBUG "HVM%u: %s", curr->domain->domain_id, hd->pbuf);
+ hd->pbuf_idx = 0;
+ }
+ spin_unlock(&hd->pbuf_lock);
+
+ return 1;
+}
+
int hvm_domain_initialise(struct domain *d)
{
int rc;
@@ -237,6 +313,8 @@ int hvm_domain_initialise(struct domain *d)
hvm_init_ioreq_page(d, &d->arch.hvm_domain.ioreq);
hvm_init_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);
+ register_portio_handler(d, 0xe9, 1, hvm_print_line);
+
rc = hvm_funcs.domain_initialise(d);
if ( rc != 0 )
goto fail2;
@@ -1250,7 +1328,7 @@ void hvm_task_switch(
goto out;
}
- if ( !tr.attr.fields.g && (tr.limit < (sizeof(tss)-1)) )
+ if ( tr.limit < (sizeof(tss)-1) )
{
hvm_inject_exception(TRAP_invalid_tss, tss_sel & 0xfff8, 0);
goto out;
@@ -1358,7 +1436,7 @@ void hvm_task_switch(
if ( hvm_virtual_to_linear_addr(x86_seg_ss, &reg, regs->esp,
4, hvm_access_write, 32,
&linear_addr) )
- hvm_copy_to_guest_virt_nofault(linear_addr, &errcode, 4);
+ hvm_copy_to_guest_virt_nofault(linear_addr, &errcode, 4, 0);
}
out:
@@ -1366,60 +1444,31 @@ void hvm_task_switch(
hvm_unmap(nptss_desc);
}
-/*
- * __hvm_copy():
- * @buf = hypervisor buffer
- * @addr = guest address to copy to/from
- * @size = number of bytes to copy
- * @dir = copy *to* guest (TRUE) or *from* guest (FALSE)?
- * @virt = addr is *virtual* (TRUE) or *guest physical* (FALSE)?
- * @fetch = copy is an instruction fetch?
- * Returns number of bytes failed to copy (0 == complete success).
- */
+#define HVMCOPY_from_guest (0u<<0)
+#define HVMCOPY_to_guest (1u<<0)
+#define HVMCOPY_no_fault (0u<<1)
+#define HVMCOPY_fault (1u<<1)
+#define HVMCOPY_phys (0u<<2)
+#define HVMCOPY_virt (1u<<2)
static enum hvm_copy_result __hvm_copy(
- void *buf, paddr_t addr, int size, int dir, int virt, int fetch)
+ void *buf, paddr_t addr, int size, unsigned int flags, uint32_t pfec)
{
struct vcpu *curr = current;
unsigned long gfn, mfn;
p2m_type_t p2mt;
char *p;
- int count, todo;
- uint32_t pfec = PFEC_page_present;
+ int count, todo = size;
- /*
- * We cannot use hvm_get_segment_register() while executing in
- * vmx_realmode() as segment register state is cached. Furthermore,
- * VMREADs on every data access hurts emulation performance.
- * Hence we do not gather extra PFEC flags if CR0.PG == 0.
- */
- if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG) )
- virt = 0;
-
- if ( virt )
- {
- struct segment_register sreg;
- hvm_get_segment_register(curr, x86_seg_ss, &sreg);
- if ( sreg.attr.fields.dpl == 3 )
- pfec |= PFEC_user_mode;
-
- if ( dir )
- pfec |= PFEC_write_access;
-
- if ( fetch )
- pfec |= PFEC_insn_fetch;
- }
-
- todo = size;
while ( todo > 0 )
{
count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo);
- if ( virt )
+ if ( flags & HVMCOPY_virt )
{
gfn = paging_gva_to_gfn(curr, addr, &pfec);
if ( gfn == INVALID_GFN )
{
- if ( virt == 2 ) /* 2 means generate a fault */
+ if ( flags & HVMCOPY_fault )
hvm_inject_exception(TRAP_page_fault, pfec, addr);
return HVMCOPY_bad_gva_to_gfn;
}
@@ -1437,16 +1486,18 @@ static enum hvm_copy_result __hvm_copy(
p = (char *)map_domain_page(mfn) + (addr & ~PAGE_MASK);
- if ( dir )
+ if ( flags & HVMCOPY_to_guest )
{
- memcpy(p, buf, count); /* dir == TRUE: *to* guest */
+ memcpy(p, buf, count);
paging_mark_dirty(curr->domain, mfn);
}
else
- memcpy(buf, p, count); /* dir == FALSE: *from guest */
+ {
+ memcpy(buf, p, count);
+ }
unmap_domain_page(p);
-
+
addr += count;
buf += count;
todo -= count;
@@ -1458,56 +1509,73 @@ static enum hvm_copy_result __hvm_copy(
enum hvm_copy_result hvm_copy_to_guest_phys(
paddr_t paddr, void *buf, int size)
{
- return __hvm_copy(buf, paddr, size, 1, 0, 0);
+ return __hvm_copy(buf, paddr, size,
+ HVMCOPY_to_guest | HVMCOPY_fault | HVMCOPY_phys,
+ 0);
}
enum hvm_copy_result hvm_copy_from_guest_phys(
void *buf, paddr_t paddr, int size)
{
- return __hvm_copy(buf, paddr, size, 0, 0, 0);
+ return __hvm_copy(buf, paddr, size,
+ HVMCOPY_from_guest | HVMCOPY_fault | HVMCOPY_phys,
+ 0);
}
enum hvm_copy_result hvm_copy_to_guest_virt(
- unsigned long vaddr, void *buf, int size)
+ unsigned long vaddr, void *buf, int size, uint32_t pfec)
{
- return __hvm_copy(buf, vaddr, size, 1, 2, 0);
+ return __hvm_copy(buf, vaddr, size,
+ HVMCOPY_to_guest | HVMCOPY_fault | HVMCOPY_virt,
+ PFEC_page_present | PFEC_write_access | pfec);
}
enum hvm_copy_result hvm_copy_from_guest_virt(
- void *buf, unsigned long vaddr, int size)
+ void *buf, unsigned long vaddr, int size, uint32_t pfec)
{
- return __hvm_copy(buf, vaddr, size, 0, 2, 0);
+ return __hvm_copy(buf, vaddr, size,
+ HVMCOPY_from_guest | HVMCOPY_fault | HVMCOPY_virt,
+ PFEC_page_present | pfec);
}
enum hvm_copy_result hvm_fetch_from_guest_virt(
- void *buf, unsigned long vaddr, int size)
+ void *buf, unsigned long vaddr, int size, uint32_t pfec)
{
- return __hvm_copy(buf, vaddr, size, 0, 2, hvm_nx_enabled(current));
+ if ( hvm_nx_enabled(current) )
+ pfec |= PFEC_insn_fetch;
+ return __hvm_copy(buf, vaddr, size,
+ HVMCOPY_from_guest | HVMCOPY_fault | HVMCOPY_virt,
+ PFEC_page_present | pfec);
}
enum hvm_copy_result hvm_copy_to_guest_virt_nofault(
- unsigned long vaddr, void *buf, int size)
+ unsigned long vaddr, void *buf, int size, uint32_t pfec)
{
- return __hvm_copy(buf, vaddr, size, 1, 1, 0);
+ return __hvm_copy(buf, vaddr, size,
+ HVMCOPY_to_guest | HVMCOPY_no_fault | HVMCOPY_virt,
+ PFEC_page_present | PFEC_write_access | pfec);
}
enum hvm_copy_result hvm_copy_from_guest_virt_nofault(
- void *buf, unsigned long vaddr, int size)
+ void *buf, unsigned long vaddr, int size, uint32_t pfec)
{
- return __hvm_copy(buf, vaddr, size, 0, 1, 0);
+ return __hvm_copy(buf, vaddr, size,
+ HVMCOPY_from_guest | HVMCOPY_no_fault | HVMCOPY_virt,
+ PFEC_page_present | pfec);
}
enum hvm_copy_result hvm_fetch_from_guest_virt_nofault(
- void *buf, unsigned long vaddr, int size)
+ void *buf, unsigned long vaddr, int size, uint32_t pfec)
{
- return __hvm_copy(buf, vaddr, size, 0, 1, hvm_nx_enabled(current));
+ if ( hvm_nx_enabled(current) )
+ pfec |= PFEC_insn_fetch;
+ return __hvm_copy(buf, vaddr, size,
+ HVMCOPY_from_guest | HVMCOPY_no_fault | HVMCOPY_virt,
+ PFEC_page_present | pfec);
}
DEFINE_PER_CPU(int, guest_handles_in_xen_space);
-/* Note that copy_{to,from}_user_hvm require the PTE to be writable even
- when they're only trying to read from it. The guest is expected to
- deal with this. */
unsigned long copy_to_user_hvm(void *to, const void *from, unsigned len)
{
int rc;
@@ -1518,7 +1586,8 @@ unsigned long copy_to_user_hvm(void *to, const void *from, unsigned len)
return 0;
}
- rc = hvm_copy_to_guest_virt_nofault((unsigned long)to, (void *)from, len);
+ rc = hvm_copy_to_guest_virt_nofault((unsigned long)to, (void *)from,
+ len, 0);
return rc ? len : 0; /* fake a copy_to_user() return code */
}
@@ -1532,28 +1601,10 @@ unsigned long copy_from_user_hvm(void *to, const void *from, unsigned len)
return 0;
}
- rc = hvm_copy_from_guest_virt_nofault(to, (unsigned long)from, len);
+ rc = hvm_copy_from_guest_virt_nofault(to, (unsigned long)from, len, 0);
return rc ? len : 0; /* fake a copy_from_user() return code */
}
-/* HVM specific printbuf. Mostly used for hvmloader chit-chat. */
-void hvm_print_line(struct vcpu *v, const char c)
-{
- struct hvm_domain *hd = &v->domain->arch.hvm_domain;
-
- spin_lock(&hd->pbuf_lock);
- hd->pbuf[hd->pbuf_idx++] = c;
- if ( (hd->pbuf_idx == (sizeof(hd->pbuf) - 2)) || (c == '\n') )
- {
- if ( c != '\n' )
- hd->pbuf[hd->pbuf_idx++] = '\n';
- hd->pbuf[hd->pbuf_idx] = '\0';
- printk(XENLOG_G_DEBUG "HVM%u: %s", v->domain->domain_id, hd->pbuf);
- hd->pbuf_idx = 0;
- }
- spin_unlock(&hd->pbuf_lock);
-}
-
#define bitmaskof(idx) (1U << ((idx) & 31))
void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
@@ -1655,7 +1706,7 @@ enum hvm_intblk hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack)
static long hvm_grant_table_op(
unsigned int cmd, XEN_GUEST_HANDLE(void) uop, unsigned int count)
{
- if ( cmd != GNTTABOP_query_size )
+ if ( (cmd != GNTTABOP_query_size) && (cmd != GNTTABOP_setup_table) )
return -ENOSYS; /* all other commands need auditing */
return do_grant_table_op(cmd, uop, count);
}
@@ -2109,12 +2160,15 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg)
return -EINVAL;
if ( a.domid == DOMID_SELF )
+ {
d = rcu_lock_current_domain();
- else {
- d = rcu_lock_domain_by_id(a.domid);
- if ( d == NULL )
+ }
+ else
+ {
+ if ( (d = rcu_lock_domain_by_id(a.domid)) == NULL )
return -ESRCH;
- if ( !IS_PRIV_FOR(current->domain, d) ) {
+ if ( !IS_PRIV_FOR(current->domain, d) )
+ {
rc = -EPERM;
goto param_fail;
}
diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c
index 83d73a0e9e..ac1e62782a 100644
--- a/xen/arch/x86/hvm/io.c
+++ b/xen/arch/x86/hvm/io.c
@@ -123,73 +123,6 @@ int hvm_buffered_io_send(ioreq_t *p)
return 1;
}
-void send_pio_req(unsigned long port, unsigned long count, int size,
- paddr_t value, int dir, int df, int value_is_ptr)
-{
- struct vcpu *v = current;
- vcpu_iodata_t *vio = get_ioreq(v);
- ioreq_t *p = &vio->vp_ioreq;
-
- if ( p->state != STATE_IOREQ_NONE )
- gdprintk(XENLOG_WARNING,
- "WARNING: send pio with something already pending (%d)?\n",
- p->state);
-
- p->dir = dir;
- p->data_is_ptr = value_is_ptr;
- p->type = IOREQ_TYPE_PIO;
- p->size = size;
- p->addr = port;
- p->count = count;
- p->df = df;
- p->data = value;
- p->io_count++;
-
- if ( hvm_portio_intercept(p) )
- {
- p->state = STATE_IORESP_READY;
- hvm_io_assist();
- }
- else
- {
- hvm_send_assist_req(v);
- }
-}
-
-void send_mmio_req(unsigned char type, paddr_t gpa,
- unsigned long count, int size, paddr_t value,
- int dir, int df, int value_is_ptr)
-{
- struct vcpu *v = current;
- vcpu_iodata_t *vio = get_ioreq(v);
- ioreq_t *p = &vio->vp_ioreq;
-
- if ( p->state != STATE_IOREQ_NONE )
- gdprintk(XENLOG_WARNING,
- "WARNING: send mmio with something already pending (%d)?\n",
- p->state);
-
- p->dir = dir;
- p->data_is_ptr = value_is_ptr;
- p->type = type;
- p->size = size;
- p->addr = gpa;
- p->count = count;
- p->df = df;
- p->data = value;
- p->io_count++;
-
- if ( hvm_mmio_intercept(p) || hvm_buffered_io_intercept(p) )
- {
- p->state = STATE_IORESP_READY;
- hvm_io_assist();
- }
- else
- {
- hvm_send_assist_req(v);
- }
-}
-
void send_timeoffset_req(unsigned long timeoff)
{
ioreq_t p[1];
@@ -249,6 +182,11 @@ int handle_mmio(void)
rc = hvm_emulate_one(&ctxt);
+ if ( curr->arch.hvm_vcpu.io_state == HVMIO_awaiting_completion )
+ curr->arch.hvm_vcpu.io_state = HVMIO_handle_mmio_awaiting_completion;
+ else
+ curr->arch.hvm_vcpu.mmio_gva = 0;
+
switch ( rc )
{
case X86EMUL_UNHANDLEABLE:
@@ -271,41 +209,46 @@ int handle_mmio(void)
hvm_emulate_writeback(&ctxt);
- curr->arch.hvm_vcpu.mmio_in_progress = curr->arch.hvm_vcpu.io_in_progress;
-
return 1;
}
+int handle_mmio_with_translation(unsigned long gva, unsigned long gpfn)
+{
+ current->arch.hvm_vcpu.mmio_gva = gva & PAGE_MASK;
+ current->arch.hvm_vcpu.mmio_gpfn = gpfn;
+ return handle_mmio();
+}
+
void hvm_io_assist(void)
{
- struct vcpu *v = current;
- ioreq_t *p = &get_ioreq(v)->vp_ioreq;
+ struct vcpu *curr = current;
+ ioreq_t *p = &get_ioreq(curr)->vp_ioreq;
+ enum hvm_io_state io_state;
if ( p->state != STATE_IORESP_READY )
{
gdprintk(XENLOG_ERR, "Unexpected HVM iorequest state %d.\n", p->state);
- domain_crash(v->domain);
- goto out;
+ domain_crash_synchronous();
}
rmb(); /* see IORESP_READY /then/ read contents of ioreq */
p->state = STATE_IOREQ_NONE;
- if ( v->arch.hvm_vcpu.io_in_progress )
+ io_state = curr->arch.hvm_vcpu.io_state;
+ curr->arch.hvm_vcpu.io_state = HVMIO_none;
+
+ if ( (io_state == HVMIO_awaiting_completion) ||
+ (io_state == HVMIO_handle_mmio_awaiting_completion) )
{
- v->arch.hvm_vcpu.io_in_progress = 0;
- if ( (p->dir == IOREQ_READ) && !p->data_is_ptr )
- {
- v->arch.hvm_vcpu.io_completed = 1;
- v->arch.hvm_vcpu.io_data = p->data;
- if ( v->arch.hvm_vcpu.mmio_in_progress )
- (void)handle_mmio();
- }
+ curr->arch.hvm_vcpu.io_state = HVMIO_completed;
+ curr->arch.hvm_vcpu.io_data = p->data;
+ if ( io_state == HVMIO_handle_mmio_awaiting_completion )
+ (void)handle_mmio();
}
- out:
- vcpu_end_shutdown_deferral(v);
+ if ( p->state == STATE_IOREQ_NONE )
+ vcpu_end_shutdown_deferral(curr);
}
void dpci_ioport_read(uint32_t mport, ioreq_t *p)
diff --git a/xen/arch/x86/hvm/svm/emulate.c b/xen/arch/x86/hvm/svm/emulate.c
index cdf0059c99..8d0f0fc652 100644
--- a/xen/arch/x86/hvm/svm/emulate.c
+++ b/xen/arch/x86/hvm/svm/emulate.c
@@ -32,9 +32,11 @@
static int inst_copy_from_guest(
unsigned char *buf, unsigned long guest_eip, int inst_len)
{
+ struct vmcb_struct *vmcb = current->arch.hvm_svm.vmcb;
+ uint32_t pfec = (vmcb->cpl == 3) ? PFEC_user_mode : 0;
if ( (inst_len > MAX_INST_LEN) || (inst_len <= 0) )
return 0;
- if ( hvm_fetch_from_guest_virt_nofault(buf, guest_eip, inst_len) )
+ if ( hvm_fetch_from_guest_virt_nofault(buf, guest_eip, inst_len, pfec) )
return 0;
return inst_len;
}
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 225c9893c9..be166a868c 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -725,7 +725,15 @@ static void svm_inject_exception(
{
struct vcpu *curr = current;
struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
- eventinj_t event;
+ eventinj_t event = vmcb->eventinj;
+
+ if ( unlikely(event.fields.v) &&
+ (event.fields.type == X86_EVENTTYPE_HW_EXCEPTION) )
+ {
+ trapnr = hvm_combine_hw_exceptions(event.fields.vector, trapnr);
+ if ( trapnr == TRAP_double_fault )
+ errcode = 0;
+ }
event.bytes = 0;
event.fields.v = 1;
diff --git a/xen/arch/x86/hvm/vmx/realmode.c b/xen/arch/x86/hvm/vmx/realmode.c
index 8667588883..c00e8b1e42 100644
--- a/xen/arch/x86/hvm/vmx/realmode.c
+++ b/xen/arch/x86/hvm/vmx/realmode.c
@@ -190,7 +190,7 @@ void vmx_realmode(struct cpu_user_regs *regs)
hvm_emulate_prepare(&hvmemul_ctxt, regs);
- if ( curr->arch.hvm_vcpu.io_completed )
+ if ( curr->arch.hvm_vcpu.io_state == HVMIO_completed )
realmode_emulate_one(&hvmemul_ctxt);
/* Only deliver interrupts into emulated real mode. */
@@ -203,7 +203,7 @@ void vmx_realmode(struct cpu_user_regs *regs)
while ( curr->arch.hvm_vmx.vmxemul &&
!softirq_pending(smp_processor_id()) &&
- !curr->arch.hvm_vcpu.io_in_progress )
+ (curr->arch.hvm_vcpu.io_state == HVMIO_none) )
{
/*
* Check for pending interrupts only every 16 instructions, because
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index b78ca6451c..29dcb68503 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -983,6 +983,62 @@ static void vmx_flush_guest_tlbs(void)
* because VMRESUME will flush it for us. */
}
+
+
+static void __vmx_inject_exception(
+ struct vcpu *v, int trap, int type, int error_code)
+{
+ unsigned long intr_fields;
+
+ /*
+ * NB. Callers do not need to worry about clearing STI/MOV-SS blocking:
+ * "If the VM entry is injecting, there is no blocking by STI or by
+ * MOV SS following the VM entry, regardless of the contents of the
+ * interruptibility-state field [in the guest-state area before the
+ * VM entry]", PRM Vol. 3, 22.6.1 (Interruptibility State).
+ */
+
+ intr_fields = (INTR_INFO_VALID_MASK | (type<<8) | trap);
+ if ( error_code != HVM_DELIVER_NO_ERROR_CODE ) {
+ __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
+ intr_fields |= INTR_INFO_DELIVER_CODE_MASK;
+ }
+
+ __vmwrite(VM_ENTRY_INTR_INFO, intr_fields);
+
+ if ( trap == TRAP_page_fault )
+ HVMTRACE_2D(PF_INJECT, v, v->arch.hvm_vcpu.guest_cr[2], error_code);
+ else
+ HVMTRACE_2D(INJ_EXC, v, trap, error_code);
+}
+
+void vmx_inject_hw_exception(struct vcpu *v, int trap, int error_code)
+{
+ unsigned long intr_info = __vmread(VM_ENTRY_INTR_INFO);
+
+ if ( unlikely(intr_info & INTR_INFO_VALID_MASK) &&
+ (((intr_info >> 8) & 7) == X86_EVENTTYPE_HW_EXCEPTION) )
+ {
+ trap = hvm_combine_hw_exceptions((uint8_t)intr_info, trap);
+ if ( trap == TRAP_double_fault )
+ error_code = 0;
+ }
+
+ __vmx_inject_exception(v, trap, X86_EVENTTYPE_HW_EXCEPTION, error_code);
+}
+
+void vmx_inject_extint(struct vcpu *v, int trap)
+{
+ __vmx_inject_exception(v, trap, X86_EVENTTYPE_EXT_INTR,
+ HVM_DELIVER_NO_ERROR_CODE);
+}
+
+void vmx_inject_nmi(struct vcpu *v)
+{
+ __vmx_inject_exception(v, 2, X86_EVENTTYPE_NMI,
+ HVM_DELIVER_NO_ERROR_CODE);
+}
+
static void vmx_inject_exception(
unsigned int trapnr, int errcode, unsigned long cr2)
{
@@ -1184,23 +1240,6 @@ static void vmx_do_cpuid(struct cpu_user_regs *regs)
regs->edx = edx;
}
-#define CASE_GET_REG_P(REG, reg) \
- case REG_ ## REG: reg_p = (unsigned long *)&(regs->reg); break
-
-#ifdef __i386__
-#define CASE_EXTEND_GET_REG_P
-#else
-#define CASE_EXTEND_GET_REG_P \
- CASE_GET_REG_P(R8, r8); \
- CASE_GET_REG_P(R9, r9); \
- CASE_GET_REG_P(R10, r10); \
- CASE_GET_REG_P(R11, r11); \
- CASE_GET_REG_P(R12, r12); \
- CASE_GET_REG_P(R13, r13); \
- CASE_GET_REG_P(R14, r14); \
- CASE_GET_REG_P(R15, r15)
-#endif
-
static void vmx_dr_access(unsigned long exit_qualification,
struct cpu_user_regs *regs)
{
@@ -1224,9 +1263,9 @@ static void vmx_invlpg_intercept(unsigned long vaddr)
}
#define CASE_SET_REG(REG, reg) \
- case REG_ ## REG: regs->reg = value; break
+ case VMX_CONTROL_REG_ACCESS_GPR_ ## REG: regs->reg = value; break
#define CASE_GET_REG(REG, reg) \
- case REG_ ## REG: value = regs->reg; break
+ case VMX_CONTROL_REG_ACCESS_GPR_ ## REG: value = regs->reg; break
#define CASE_EXTEND_SET_REG \
CASE_EXTEND_REG(S)
@@ -1352,26 +1391,25 @@ static int vmx_cr_access(unsigned long exit_qualification,
unsigned long value;
struct vcpu *v = current;
- switch ( exit_qualification & CONTROL_REG_ACCESS_TYPE )
+ switch ( exit_qualification & VMX_CONTROL_REG_ACCESS_TYPE )
{
- case TYPE_MOV_TO_CR:
- gp = exit_qualification & CONTROL_REG_ACCESS_REG;
- cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
+ case VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR:
+ gp = exit_qualification & VMX_CONTROL_REG_ACCESS_GPR;
+ cr = exit_qualification & VMX_CONTROL_REG_ACCESS_NUM;
return mov_to_cr(gp, cr, regs);
- case TYPE_MOV_FROM_CR:
- gp = exit_qualification & CONTROL_REG_ACCESS_REG;
- cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
+ case VMX_CONTROL_REG_ACCESS_TYPE_MOV_FROM_CR:
+ gp = exit_qualification & VMX_CONTROL_REG_ACCESS_GPR;
+ cr = exit_qualification & VMX_CONTROL_REG_ACCESS_NUM;
mov_from_cr(cr, gp, regs);
break;
- case TYPE_CLTS:
+ case VMX_CONTROL_REG_ACCESS_TYPE_CLTS:
v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS;
vmx_update_guest_cr(v, 0);
HVMTRACE_0D(CLTS, current);
break;
- case TYPE_LMSW:
+ case VMX_CONTROL_REG_ACCESS_TYPE_LMSW:
value = v->arch.hvm_vcpu.guest_cr[0];
- value = (value & ~0xF) |
- (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF);
+ value = (value & ~0xFFFF) | ((exit_qualification >> 16) & 0xFFFF);
HVMTRACE_1D(LMSW, current, value);
return !hvm_set_cr0(value);
default:
diff --git a/xen/arch/x86/hvm/vmx/x86_32/exits.S b/xen/arch/x86/hvm/vmx/x86_32/exits.S
index 70cd1dae36..11db8cfc21 100644
--- a/xen/arch/x86/hvm/vmx/x86_32/exits.S
+++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S
@@ -60,6 +60,7 @@
ALIGN
ENTRY(vmx_asm_vmexit_handler)
HVM_SAVE_ALL_NOSEGREGS
+ GET_CURRENT(%ebx)
movl $GUEST_RIP,%eax
VMREAD(UREGS_eip)
@@ -68,6 +69,9 @@ ENTRY(vmx_asm_vmexit_handler)
movl $GUEST_RFLAGS,%eax
VMREAD(UREGS_eflags)
+ movl %cr2,%eax
+ movl %eax,VCPU_hvm_guest_cr2(%ebx)
+
#ifndef NDEBUG
movw $0xbeef,%ax
movw %ax,UREGS_error_code(%esp)
diff --git a/xen/arch/x86/hvm/vmx/x86_64/exits.S b/xen/arch/x86/hvm/vmx/x86_64/exits.S
index fda4f179b1..48da4869bd 100644
--- a/xen/arch/x86/hvm/vmx/x86_64/exits.S
+++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S
@@ -76,6 +76,7 @@
ALIGN
ENTRY(vmx_asm_vmexit_handler)
HVM_SAVE_ALL_NOSEGREGS
+ GET_CURRENT(%rbx)
leaq UREGS_rip(%rsp),%rdi
movl $GUEST_RIP,%eax
@@ -86,6 +87,9 @@ ENTRY(vmx_asm_vmexit_handler)
movl $GUEST_RFLAGS,%eax
VMREAD(UREGS_eflags)
+ movq %cr2,%rax
+ movq %rax,VCPU_hvm_guest_cr2(%rbx)
+
#ifndef NDEBUG
movw $0xbeef,%ax
movw %ax,UREGS_error_code(%rsp)
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index bd92ec2f32..a1220af3b3 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -2114,14 +2114,14 @@ static int set_foreigndom(domid_t domid)
info->foreign = rcu_lock_domain(dom_xen);
break;
default:
- e = rcu_lock_domain_by_id(domid);
- if ( e == NULL )
+ if ( (e = rcu_lock_domain_by_id(domid)) == NULL )
{
MEM_LOG("Unknown domain '%u'", domid);
okay = 0;
break;
}
- if (!IS_PRIV_FOR(d, e)) {
+ if ( !IS_PRIV_FOR(d, e) )
+ {
MEM_LOG("Cannot set foreign dom");
okay = 0;
rcu_unlock_domain(e);
@@ -3259,12 +3259,15 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
return -EFAULT;
if ( xatp.domid == DOMID_SELF )
+ {
d = rcu_lock_current_domain();
- else {
- d = rcu_lock_domain_by_id(xatp.domid);
- if ( d == NULL )
+ }
+ else
+ {
+ if ( (d = rcu_lock_domain_by_id(xatp.domid)) == NULL )
return -ESRCH;
- if ( !IS_PRIV_FOR(current->domain, d) ) {
+ if ( !IS_PRIV_FOR(current->domain, d) )
+ {
rcu_unlock_domain(d);
return -EPERM;
}
@@ -3355,12 +3358,15 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
return -EINVAL;
if ( fmap.domid == DOMID_SELF )
+ {
d = rcu_lock_current_domain();
- else {
- d = rcu_lock_domain_by_id(fmap.domid);
- if ( d == NULL )
+ }
+ else
+ {
+ if ( (d = rcu_lock_domain_by_id(fmap.domid)) == NULL )
return -ESRCH;
- if ( !IS_PRIV_FOR(current->domain, d) ) {
+ if ( !IS_PRIV_FOR(current->domain, d) )
+ {
rcu_unlock_domain(d);
return -EPERM;
}
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index fdb1d0e4aa..e4a04bb456 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -152,9 +152,9 @@ hvm_read(enum x86_segment seg,
*val = 0;
if ( access_type == hvm_access_insn_fetch )
- rc = hvm_fetch_from_guest_virt(val, addr, bytes);
+ rc = hvm_fetch_from_guest_virt(val, addr, bytes, 0);
else
- rc = hvm_copy_from_guest_virt(val, addr, bytes);
+ rc = hvm_copy_from_guest_virt(val, addr, bytes, 0);
switch ( rc )
{
@@ -416,7 +416,7 @@ struct x86_emulate_ops *shadow_init_emulation(
x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
hvm_access_insn_fetch, sh_ctxt, &addr) &&
!hvm_fetch_from_guest_virt_nofault(
- sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf)))
+ sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf), 0))
? sizeof(sh_ctxt->insn_buf) : 0;
return &hvm_shadow_emulator_ops;
@@ -444,7 +444,7 @@ void shadow_continue_emulation(struct sh_emulate_ctxt *sh_ctxt,
x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
hvm_access_insn_fetch, sh_ctxt, &addr) &&
!hvm_fetch_from_guest_virt_nofault(
- sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf)))
+ sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf), 0))
? sizeof(sh_ctxt->insn_buf) : 0;
sh_ctxt->insn_buf_eip = regs->eip;
}
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index 4689d132c1..f92687c246 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -2881,7 +2881,8 @@ static int sh_page_fault(struct vcpu *v,
perfc_incr(shadow_fault_fast_mmio);
SHADOW_PRINTK("fast path mmio %#"PRIpaddr"\n", gpa);
reset_early_unshadow(v);
- return handle_mmio() ? EXCRET_fault_fixed : 0;
+ return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT)
+ ? EXCRET_fault_fixed : 0);
}
else
{
@@ -3199,7 +3200,8 @@ static int sh_page_fault(struct vcpu *v,
shadow_audit_tables(v);
reset_early_unshadow(v);
shadow_unlock(d);
- return handle_mmio() ? EXCRET_fault_fixed : 0;
+ return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT)
+ ? EXCRET_fault_fixed : 0);
not_a_shadow_fault:
sh_audit_gw(v, &gw);
diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
index db94bfd812..13f177a7fa 100644
--- a/xen/arch/x86/sysctl.c
+++ b/xen/arch/x86/sysctl.c
@@ -47,18 +47,22 @@ long arch_do_sysctl(
if ( ret )
break;
+ memset(pi, 0, sizeof(*pi));
pi->threads_per_core =
cpus_weight(cpu_sibling_map[0]);
pi->cores_per_socket =
cpus_weight(cpu_core_map[0]) / pi->threads_per_core;
pi->nr_cpus = (u32)num_online_cpus();
pi->nr_nodes = num_online_nodes();
- pi->total_pages = total_pages;
- pi->free_pages = avail_domheap_pages();
- pi->scrub_pages = avail_scrub_pages();
- pi->cpu_khz = cpu_khz;
- memset(pi->hw_cap, 0, sizeof(pi->hw_cap));
+ pi->total_pages = total_pages;
+ pi->free_pages = avail_domheap_pages();
+ pi->scrub_pages = avail_scrub_pages();
+ pi->cpu_khz = cpu_khz;
memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4);
+ if ( hvm_enabled )
+ pi->capabilities |= XEN_SYSCTL_PHYSCAP_hvm;
+ if ( iommu_enabled )
+ pi->capabilities |= XEN_SYSCTL_PHYSCAP_hvm_directio;
max_array_ent = pi->max_cpu_id;
pi->max_cpu_id = last_cpu(cpu_online_map);
diff --git a/xen/arch/x86/x86_emulate.c b/xen/arch/x86/x86_emulate.c
index a3e71c2780..f743650a5d 100644
--- a/xen/arch/x86/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate.c
@@ -1,484 +1,18 @@
/******************************************************************************
* x86_emulate.c
*
- * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
+ * Wrapper for generic x86 instruction decoder and emulator.
*
- * Copyright (c) 2005-2007 Keir Fraser
- * Copyright (c) 2005-2007 XenSource Inc.
+ * Copyright (c) 2008, Citrix Systems, Inc.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef __XEN__
-#include <stddef.h>
-#include <stdint.h>
-#include <string.h>
-#include <public/xen.h>
-#else
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/lib.h>
-#include <asm/regs.h>
-#undef cmpxchg
-#endif
-#include <asm-x86/x86_emulate.h>
-
-/* Operand sizes: 8-bit operands or specified/overridden size. */
-#define ByteOp (1<<0) /* 8-bit operands. */
-/* Destination operand type. */
-#define DstBitBase (0<<1) /* Memory operand, bit string. */
-#define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */
-#define DstReg (2<<1) /* Register operand. */
-#define DstMem (3<<1) /* Memory operand. */
-#define DstMask (3<<1)
-/* Source operand type. */
-#define SrcNone (0<<3) /* No source operand. */
-#define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */
-#define SrcReg (1<<3) /* Register operand. */
-#define SrcMem (2<<3) /* Memory operand. */
-#define SrcMem16 (3<<3) /* Memory operand (16-bit). */
-#define SrcImm (4<<3) /* Immediate operand. */
-#define SrcImmByte (5<<3) /* 8-bit sign-extended immediate operand. */
-#define SrcMask (7<<3)
-/* Generic ModRM decode. */
-#define ModRM (1<<6)
-/* Destination is only written; never read. */
-#define Mov (1<<7)
-
-static uint8_t opcode_table[256] = {
- /* 0x00 - 0x07 */
- ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
- ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, ImplicitOps,
- /* 0x08 - 0x0F */
- ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
- ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, 0,
- /* 0x10 - 0x17 */
- ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
- ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, ImplicitOps,
- /* 0x18 - 0x1F */
- ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
- ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, ImplicitOps,
- /* 0x20 - 0x27 */
- ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
- ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps,
- /* 0x28 - 0x2F */
- ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
- ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps,
- /* 0x30 - 0x37 */
- ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
- ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps,
- /* 0x38 - 0x3F */
- ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
- ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps,
- /* 0x40 - 0x4F */
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- /* 0x50 - 0x5F */
- ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
- ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
- ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
- ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
- /* 0x60 - 0x67 */
- ImplicitOps, ImplicitOps, DstReg|SrcMem|ModRM, DstReg|SrcMem16|ModRM|Mov,
- 0, 0, 0, 0,
- /* 0x68 - 0x6F */
- ImplicitOps|Mov, DstReg|SrcImm|ModRM|Mov,
- ImplicitOps|Mov, DstReg|SrcImmByte|ModRM|Mov,
- ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
- /* 0x70 - 0x77 */
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- /* 0x78 - 0x7F */
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- /* 0x80 - 0x87 */
- ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
- ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM,
- ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
- ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
- /* 0x88 - 0x8F */
- ByteOp|DstMem|SrcReg|ModRM|Mov, DstMem|SrcReg|ModRM|Mov,
- ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
- DstMem|SrcReg|ModRM|Mov, DstReg|SrcNone|ModRM,
- DstReg|SrcMem|ModRM|Mov, DstMem|SrcNone|ModRM|Mov,
- /* 0x90 - 0x97 */
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- /* 0x98 - 0x9F */
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- /* 0xA0 - 0xA7 */
- ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
- ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
- ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
- ByteOp|ImplicitOps, ImplicitOps,
- /* 0xA8 - 0xAF */
- ByteOp|DstReg|SrcImm, DstReg|SrcImm,
- ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
- ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
- ByteOp|ImplicitOps, ImplicitOps,
- /* 0xB0 - 0xB7 */
- ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
- ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
- ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
- ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
- /* 0xB8 - 0xBF */
- DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov,
- DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov,
- /* 0xC0 - 0xC7 */
- ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM,
- ImplicitOps, ImplicitOps,
- DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
- ByteOp|DstMem|SrcImm|ModRM|Mov, DstMem|SrcImm|ModRM|Mov,
- /* 0xC8 - 0xCF */
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- /* 0xD0 - 0xD7 */
- ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM,
- ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM,
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- /* 0xD8 - 0xDF */
- 0, ImplicitOps|ModRM|Mov, 0, ImplicitOps|ModRM|Mov,
- 0, ImplicitOps|ModRM|Mov, ImplicitOps|ModRM|Mov, ImplicitOps|ModRM|Mov,
- /* 0xE0 - 0xE7 */
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- /* 0xE8 - 0xEF */
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- /* 0xF0 - 0xF7 */
- 0, ImplicitOps, 0, 0,
- ImplicitOps, ImplicitOps,
- ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM,
- /* 0xF8 - 0xFF */
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- ImplicitOps, ImplicitOps, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM
-};
-
-static uint8_t twobyte_table[256] = {
- /* 0x00 - 0x07 */
- 0, ImplicitOps|ModRM, 0, 0, 0, 0, ImplicitOps, 0,
- /* 0x08 - 0x0F */
- ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps|ModRM, 0, 0,
- /* 0x10 - 0x17 */
- 0, 0, 0, 0, 0, 0, 0, 0,
- /* 0x18 - 0x1F */
- ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM,
- ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM,
- /* 0x20 - 0x27 */
- ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM,
- 0, 0, 0, 0,
- /* 0x28 - 0x2F */
- 0, 0, 0, 0, 0, 0, 0, 0,
- /* 0x30 - 0x37 */
- ImplicitOps, ImplicitOps, ImplicitOps, 0, 0, 0, 0, 0,
- /* 0x38 - 0x3F */
- 0, 0, 0, 0, 0, 0, 0, 0,
- /* 0x40 - 0x47 */
- DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
- DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
- DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
- DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
- /* 0x48 - 0x4F */
- DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
- DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
- DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
- DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
- /* 0x50 - 0x5F */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /* 0x60 - 0x6F */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /* 0x70 - 0x7F */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /* 0x80 - 0x87 */
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- /* 0x88 - 0x8F */
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- /* 0x90 - 0x97 */
- ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov,
- ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov,
- ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov,
- ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov,
- /* 0x98 - 0x9F */
- ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov,
- ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov,
- ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov,
- ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov,
- /* 0xA0 - 0xA7 */
- ImplicitOps, ImplicitOps, ImplicitOps, DstBitBase|SrcReg|ModRM,
- DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, 0,
- /* 0xA8 - 0xAF */
- ImplicitOps, ImplicitOps, 0, DstBitBase|SrcReg|ModRM,
- DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, DstReg|SrcMem|ModRM,
- /* 0xB0 - 0xB7 */
- ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
- DstReg|SrcMem|ModRM|Mov, DstBitBase|SrcReg|ModRM,
- DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
- ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov,
- /* 0xB8 - 0xBF */
- 0, 0, DstBitBase|SrcImmByte|ModRM, DstBitBase|SrcReg|ModRM,
- DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov,
- /* 0xC0 - 0xC7 */
- ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, 0,
- 0, 0, 0, ImplicitOps|ModRM,
- /* 0xC8 - 0xCF */
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
- /* 0xD0 - 0xDF */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /* 0xE0 - 0xEF */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /* 0xF0 - 0xFF */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-/* Type, address-of, and value of an instruction's operand. */
-struct operand {
- enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
- unsigned int bytes;
- unsigned long val, orig_val;
- union {
- /* OP_REG: Pointer to register field. */
- unsigned long *reg;
- /* OP_MEM: Segment and offset. */
- struct {
- enum x86_segment seg;
- unsigned long off;
- } mem;
- };
-};
-
-/* MSRs. */
-#define MSR_TSC 0x10
-
-/* Control register flags. */
-#define CR0_PE (1<<0)
-#define CR4_TSD (1<<2)
-
-/* EFLAGS bit definitions. */
-#define EFLG_VIP (1<<20)
-#define EFLG_VIF (1<<19)
-#define EFLG_AC (1<<18)
-#define EFLG_VM (1<<17)
-#define EFLG_RF (1<<16)
-#define EFLG_NT (1<<14)
-#define EFLG_IOPL (3<<12)
-#define EFLG_OF (1<<11)
-#define EFLG_DF (1<<10)
-#define EFLG_IF (1<<9)
-#define EFLG_TF (1<<8)
-#define EFLG_SF (1<<7)
-#define EFLG_ZF (1<<6)
-#define EFLG_AF (1<<4)
-#define EFLG_PF (1<<2)
-#define EFLG_CF (1<<0)
-
-/* Exception definitions. */
-#define EXC_DE 0
-#define EXC_DB 1
-#define EXC_BP 3
-#define EXC_OF 4
-#define EXC_BR 5
-#define EXC_UD 6
-#define EXC_TS 10
-#define EXC_NP 11
-#define EXC_SS 12
-#define EXC_GP 13
-#define EXC_PF 14
-#define EXC_MF 16
-
-/*
- * Instruction emulation:
- * Most instructions are emulated directly via a fragment of inline assembly
- * code. This allows us to save/restore EFLAGS and thus very easily pick up
- * any modified flags.
+ * Authors:
+ * Keir Fraser <keir.fraser@citrix.com>
*/
-#if defined(__x86_64__)
-#define _LO32 "k" /* force 32-bit operand */
-#define _STK "%%rsp" /* stack pointer */
-#define _BYTES_PER_LONG "8"
-#elif defined(__i386__)
-#define _LO32 "" /* force 32-bit operand */
-#define _STK "%%esp" /* stack pointer */
-#define _BYTES_PER_LONG "4"
-#endif
-
-/*
- * These EFLAGS bits are restored from saved value during emulation, and
- * any changes are written back to the saved value after emulation.
- */
-#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
-
-/* Before executing instruction: restore necessary bits in EFLAGS. */
-#define _PRE_EFLAGS(_sav, _msk, _tmp) \
-/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
-"movl %"_sav",%"_LO32 _tmp"; " \
-"push %"_tmp"; " \
-"push %"_tmp"; " \
-"movl %"_msk",%"_LO32 _tmp"; " \
-"andl %"_LO32 _tmp",("_STK"); " \
-"pushf; " \
-"notl %"_LO32 _tmp"; " \
-"andl %"_LO32 _tmp",("_STK"); " \
-"andl %"_LO32 _tmp",2*"_BYTES_PER_LONG"("_STK"); " \
-"pop %"_tmp"; " \
-"orl %"_LO32 _tmp",("_STK"); " \
-"popf; " \
-"pop %"_sav"; "
-
-/* After executing instruction: write-back necessary bits in EFLAGS. */
-#define _POST_EFLAGS(_sav, _msk, _tmp) \
-/* _sav |= EFLAGS & _msk; */ \
-"pushf; " \
-"pop %"_tmp"; " \
-"andl %"_msk",%"_LO32 _tmp"; " \
-"orl %"_LO32 _tmp",%"_sav"; "
+#include <asm/x86_emulate.h>
-/* Raw emulation: instruction has two explicit operands. */
-#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy)\
-do{ unsigned long _tmp; \
- switch ( (_dst).bytes ) \
- { \
- case 2: \
- asm volatile ( \
- _PRE_EFLAGS("0","4","2") \
- _op"w %"_wx"3,%1; " \
- _POST_EFLAGS("0","4","2") \
- : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
- : _wy ((_src).val), "i" (EFLAGS_MASK), \
- "m" (_eflags), "m" ((_dst).val) ); \
- break; \
- case 4: \
- asm volatile ( \
- _PRE_EFLAGS("0","4","2") \
- _op"l %"_lx"3,%1; " \
- _POST_EFLAGS("0","4","2") \
- : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
- : _ly ((_src).val), "i" (EFLAGS_MASK), \
- "m" (_eflags), "m" ((_dst).val) ); \
- break; \
- case 8: \
- __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy); \
- break; \
- } \
-} while (0)
-#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy)\
-do{ unsigned long _tmp; \
- switch ( (_dst).bytes ) \
- { \
- case 1: \
- asm volatile ( \
- _PRE_EFLAGS("0","4","2") \
- _op"b %"_bx"3,%1; " \
- _POST_EFLAGS("0","4","2") \
- : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
- : _by ((_src).val), "i" (EFLAGS_MASK), \
- "m" (_eflags), "m" ((_dst).val) ); \
- break; \
- default: \
- __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy);\
- break; \
- } \
-} while (0)
-/* Source operand is byte-sized and may be restricted to just %cl. */
-#define emulate_2op_SrcB(_op, _src, _dst, _eflags) \
- __emulate_2op(_op, _src, _dst, _eflags, \
- "b", "c", "b", "c", "b", "c", "b", "c")
-/* Source operand is byte, word, long or quad sized. */
-#define emulate_2op_SrcV(_op, _src, _dst, _eflags) \
- __emulate_2op(_op, _src, _dst, _eflags, \
- "b", "q", "w", "r", _LO32, "r", "", "r")
-/* Source operand is word, long or quad sized. */
-#define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags) \
- __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
- "w", "r", _LO32, "r", "", "r")
-
-/* Instruction has only one explicit operand (no source operand). */
-#define emulate_1op(_op,_dst,_eflags) \
-do{ unsigned long _tmp; \
- switch ( (_dst).bytes ) \
- { \
- case 1: \
- asm volatile ( \
- _PRE_EFLAGS("0","3","2") \
- _op"b %1; " \
- _POST_EFLAGS("0","3","2") \
- : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
- : "i" (EFLAGS_MASK), "m" (_eflags), "m" ((_dst).val) ); \
- break; \
- case 2: \
- asm volatile ( \
- _PRE_EFLAGS("0","3","2") \
- _op"w %1; " \
- _POST_EFLAGS("0","3","2") \
- : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
- : "i" (EFLAGS_MASK), "m" (_eflags), "m" ((_dst).val) ); \
- break; \
- case 4: \
- asm volatile ( \
- _PRE_EFLAGS("0","3","2") \
- _op"l %1; " \
- _POST_EFLAGS("0","3","2") \
- : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
- : "i" (EFLAGS_MASK), "m" (_eflags), "m" ((_dst).val) ); \
- break; \
- case 8: \
- __emulate_1op_8byte(_op, _dst, _eflags); \
- break; \
- } \
-} while (0)
-
-/* Emulate an instruction with quadword operands (x86/64 only). */
-#if defined(__x86_64__)
-#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) \
-do{ asm volatile ( \
- _PRE_EFLAGS("0","4","2") \
- _op"q %"_qx"3,%1; " \
- _POST_EFLAGS("0","4","2") \
- : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
- : _qy ((_src).val), "i" (EFLAGS_MASK), \
- "m" (_eflags), "m" ((_dst).val) ); \
-} while (0)
-#define __emulate_1op_8byte(_op, _dst, _eflags) \
-do{ asm volatile ( \
- _PRE_EFLAGS("0","3","2") \
- _op"q %1; " \
- _POST_EFLAGS("0","3","2") \
- : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
- : "i" (EFLAGS_MASK), "m" (_eflags), "m" ((_dst).val) ); \
-} while (0)
-#elif defined(__i386__)
-#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)
-#define __emulate_1op_8byte(_op, _dst, _eflags)
-#endif /* __i386__ */
+#undef cmpxchg
-#ifdef __XEN__
#define __emulate_fpu_insn(_op) \
do{ int _exn; \
asm volatile ( \
@@ -495,2935 +29,5 @@ do{ int _exn; \
: "=r" (_exn) : "0" (0) ); \
generate_exception_if(_exn, EXC_MF, -1); \
} while (0)
-#else
-#define __emulate_fpu_insn(_op) \
-do{ rc = X86EMUL_UNHANDLEABLE; \
- goto done; \
-} while (0)
-#endif
-
-
-/* Fetch next part of the instruction being emulated. */
-#define insn_fetch_bytes(_size) \
-({ unsigned long _x, _eip = _regs.eip; \
- if ( !mode_64bit() ) _eip = (uint32_t)_eip; /* ignore upper dword */ \
- _regs.eip += (_size); /* real hardware doesn't truncate */ \
- generate_exception_if((uint8_t)(_regs.eip - ctxt->regs->eip) > 15, \
- EXC_GP, 0); \
- rc = ops->insn_fetch(x86_seg_cs, _eip, &_x, (_size), ctxt); \
- if ( rc ) goto done; \
- _x; \
-})
-#define insn_fetch_type(_type) ((_type)insn_fetch_bytes(sizeof(_type)))
-
-#define truncate_word(ea, byte_width) \
-({ unsigned long __ea = (ea); \
- unsigned int _width = (byte_width); \
- ((_width == sizeof(unsigned long)) ? __ea : \
- (__ea & ((1UL << (_width << 3)) - 1))); \
-})
-#define truncate_ea(ea) truncate_word((ea), ad_bytes)
-
-#define mode_64bit() (def_ad_bytes == 8)
-
-#define fail_if(p) \
-do { \
- rc = (p) ? X86EMUL_UNHANDLEABLE : X86EMUL_OKAY; \
- if ( rc ) goto done; \
-} while (0)
-
-#define generate_exception_if(p, e, ec) \
-({ if ( (p) ) { \
- fail_if(ops->inject_hw_exception == NULL); \
- rc = ops->inject_hw_exception(e, ec, ctxt) ? : X86EMUL_EXCEPTION; \
- goto done; \
- } \
-})
-
-/*
- * Given byte has even parity (even number of 1s)? SDM Vol. 1 Sec. 3.4.3.1,
- * "Status Flags": EFLAGS.PF reflects parity of least-sig. byte of result only.
- */
-static int even_parity(uint8_t v)
-{
- asm ( "test %b0,%b0; setp %b0" : "=a" (v) : "0" (v) );
- return v;
-}
-
-/* Update address held in a register, based on addressing mode. */
-#define _register_address_increment(reg, inc, byte_width) \
-do { \
- int _inc = (inc); /* signed type ensures sign extension to long */ \
- unsigned int _width = (byte_width); \
- if ( _width == sizeof(unsigned long) ) \
- (reg) += _inc; \
- else if ( mode_64bit() ) \
- (reg) = ((reg) + _inc) & ((1UL << (_width << 3)) - 1); \
- else \
- (reg) = ((reg) & ~((1UL << (_width << 3)) - 1)) | \
- (((reg) + _inc) & ((1UL << (_width << 3)) - 1)); \
-} while (0)
-#define register_address_increment(reg, inc) \
- _register_address_increment((reg), (inc), ad_bytes)
-
-#define sp_pre_dec(dec) ({ \
- _register_address_increment(_regs.esp, -(dec), ctxt->sp_size/8); \
- truncate_word(_regs.esp, ctxt->sp_size/8); \
-})
-#define sp_post_inc(inc) ({ \
- unsigned long __esp = truncate_word(_regs.esp, ctxt->sp_size/8); \
- _register_address_increment(_regs.esp, (inc), ctxt->sp_size/8); \
- __esp; \
-})
-
-#define jmp_rel(rel) \
-do { \
- int _rel = (int)(rel); \
- _regs.eip += _rel; \
- if ( !mode_64bit() ) \
- _regs.eip = ((op_bytes == 2) \
- ? (uint16_t)_regs.eip : (uint32_t)_regs.eip); \
-} while (0)
-
-static unsigned long __get_rep_prefix(
- struct cpu_user_regs *int_regs,
- struct cpu_user_regs *ext_regs,
- int ad_bytes)
-{
- unsigned long ecx = ((ad_bytes == 2) ? (uint16_t)int_regs->ecx :
- (ad_bytes == 4) ? (uint32_t)int_regs->ecx :
- int_regs->ecx);
-
- /* Skip the instruction if no repetitions are required. */
- if ( ecx == 0 )
- ext_regs->eip = int_regs->eip;
-
- return ecx;
-}
-
-#define get_rep_prefix() ({ \
- unsigned long max_reps = 1; \
- if ( rep_prefix ) \
- max_reps = __get_rep_prefix(&_regs, ctxt->regs, ad_bytes); \
- if ( max_reps == 0 ) \
- goto done; \
- max_reps; \
-})
-
-static void __put_rep_prefix(
- struct cpu_user_regs *int_regs,
- struct cpu_user_regs *ext_regs,
- int ad_bytes,
- unsigned long reps_completed)
-{
- unsigned long ecx = ((ad_bytes == 2) ? (uint16_t)int_regs->ecx :
- (ad_bytes == 4) ? (uint32_t)int_regs->ecx :
- int_regs->ecx);
-
- /* Reduce counter appropriately, and repeat instruction if non-zero. */
- ecx -= reps_completed;
- if ( ecx != 0 )
- int_regs->eip = ext_regs->eip;
-
- if ( ad_bytes == 2 )
- *(uint16_t *)&int_regs->ecx = ecx;
- else if ( ad_bytes == 4 )
- int_regs->ecx = (uint32_t)ecx;
- else
- int_regs->ecx = ecx;
-}
-
-#define put_rep_prefix(reps_completed) ({ \
- if ( rep_prefix ) \
- __put_rep_prefix(&_regs, ctxt->regs, ad_bytes, reps_completed); \
-})
-
-/*
- * Unsigned multiplication with double-word result.
- * IN: Multiplicand=m[0], Multiplier=m[1]
- * OUT: Return CF/OF (overflow status); Result=m[1]:m[0]
- */
-static int mul_dbl(unsigned long m[2])
-{
- int rc;
- asm ( "mul %4; seto %b2"
- : "=a" (m[0]), "=d" (m[1]), "=q" (rc)
- : "0" (m[0]), "1" (m[1]), "2" (0) );
- return rc;
-}
-
-/*
- * Signed multiplication with double-word result.
- * IN: Multiplicand=m[0], Multiplier=m[1]
- * OUT: Return CF/OF (overflow status); Result=m[1]:m[0]
- */
-static int imul_dbl(unsigned long m[2])
-{
- int rc;
- asm ( "imul %4; seto %b2"
- : "=a" (m[0]), "=d" (m[1]), "=q" (rc)
- : "0" (m[0]), "1" (m[1]), "2" (0) );
- return rc;
-}
-
-/*
- * Unsigned division of double-word dividend.
- * IN: Dividend=u[1]:u[0], Divisor=v
- * OUT: Return 1: #DE
- * Return 0: Quotient=u[0], Remainder=u[1]
- */
-static int div_dbl(unsigned long u[2], unsigned long v)
-{
- if ( (v == 0) || (u[1] >= v) )
- return 1;
- asm ( "div %4"
- : "=a" (u[0]), "=d" (u[1])
- : "0" (u[0]), "1" (u[1]), "r" (v) );
- return 0;
-}
-
-/*
- * Signed division of double-word dividend.
- * IN: Dividend=u[1]:u[0], Divisor=v
- * OUT: Return 1: #DE
- * Return 0: Quotient=u[0], Remainder=u[1]
- * NB. We don't use idiv directly as it's moderately hard to work out
- * ahead of time whether it will #DE, which we cannot allow to happen.
- */
-static int idiv_dbl(unsigned long u[2], unsigned long v)
-{
- int negu = (long)u[1] < 0, negv = (long)v < 0;
-
- /* u = abs(u) */
- if ( negu )
- {
- u[1] = ~u[1];
- if ( (u[0] = -u[0]) == 0 )
- u[1]++;
- }
-
- /* abs(u) / abs(v) */
- if ( div_dbl(u, negv ? -v : v) )
- return 1;
-
- /* Remainder has same sign as dividend. It cannot overflow. */
- if ( negu )
- u[1] = -u[1];
-
- /* Quotient is overflowed if sign bit is set. */
- if ( negu ^ negv )
- {
- if ( (long)u[0] >= 0 )
- u[0] = -u[0];
- else if ( (u[0] << 1) != 0 ) /* == 0x80...0 is okay */
- return 1;
- }
- else if ( (long)u[0] < 0 )
- return 1;
-
- return 0;
-}
-
-static int
-test_cc(
- unsigned int condition, unsigned int flags)
-{
- int rc = 0;
-
- switch ( (condition & 15) >> 1 )
- {
- case 0: /* o */
- rc |= (flags & EFLG_OF);
- break;
- case 1: /* b/c/nae */
- rc |= (flags & EFLG_CF);
- break;
- case 2: /* z/e */
- rc |= (flags & EFLG_ZF);
- break;
- case 3: /* be/na */
- rc |= (flags & (EFLG_CF|EFLG_ZF));
- break;
- case 4: /* s */
- rc |= (flags & EFLG_SF);
- break;
- case 5: /* p/pe */
- rc |= (flags & EFLG_PF);
- break;
- case 7: /* le/ng */
- rc |= (flags & EFLG_ZF);
- /* fall through */
- case 6: /* l/nge */
- rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
- break;
- }
-
- /* Odd condition identifiers (lsb == 1) have inverted sense. */
- return (!!rc ^ (condition & 1));
-}
-
-static int
-get_cpl(
- struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
-{
- struct segment_register reg;
-
- if ( ctxt->regs->eflags & EFLG_VM )
- return 3;
-
- if ( (ops->read_segment == NULL) ||
- ops->read_segment(x86_seg_ss, &reg, ctxt) )
- return -1;
-
- return reg.attr.fields.dpl;
-}
-
-static int
-_mode_iopl(
- struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
-{
- int cpl = get_cpl(ctxt, ops);
- if ( cpl == -1 )
- return -1;
- return ((cpl >= 0) && (cpl <= ((ctxt->regs->eflags >> 12) & 3)));
-}
-
-#define mode_ring0() ({ \
- int _cpl = get_cpl(ctxt, ops); \
- fail_if(_cpl < 0); \
- (_cpl == 0); \
-})
-#define mode_iopl() ({ \
- int _iopl = _mode_iopl(ctxt, ops); \
- fail_if(_iopl < 0); \
- _iopl; \
-})
-
-static int
-in_realmode(
- struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
-{
- unsigned long cr0;
- int rc;
-
- if ( ops->read_cr == NULL )
- return 0;
-
- rc = ops->read_cr(0, &cr0, ctxt);
- return (!rc && !(cr0 & CR0_PE));
-}
-
-static int
-realmode_load_seg(
- enum x86_segment seg,
- uint16_t sel,
- struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
-{
- struct segment_register reg;
- int rc;
-
- if ( (rc = ops->read_segment(seg, &reg, ctxt)) != 0 )
- return rc;
-
- reg.sel = sel;
- reg.base = (uint32_t)sel << 4;
-
- return ops->write_segment(seg, &reg, ctxt);
-}
-
-static int
-protmode_load_seg(
- enum x86_segment seg,
- uint16_t sel,
- struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
-{
- struct segment_register desctab, cs, segr;
- struct { uint32_t a, b; } desc;
- unsigned long val;
- uint8_t dpl, rpl, cpl;
- int rc, fault_type = EXC_TS;
-
- /* NULL selector? */
- if ( (sel & 0xfffc) == 0 )
- {
- if ( (seg == x86_seg_cs) || (seg == x86_seg_ss) )
- goto raise_exn;
- memset(&segr, 0, sizeof(segr));
- return ops->write_segment(seg, &segr, ctxt);
- }
-
- /* LDT descriptor must be in the GDT. */
- if ( (seg == x86_seg_ldtr) && (sel & 4) )
- goto raise_exn;
-
- if ( (rc = ops->read_segment(x86_seg_cs, &cs, ctxt)) ||
- (rc = ops->read_segment((sel & 4) ? x86_seg_ldtr : x86_seg_gdtr,
- &desctab, ctxt)) )
- return rc;
-
- /* Check against descriptor table limit. */
- if ( ((sel & 0xfff8) + 7) > desctab.limit )
- goto raise_exn;
-
- do {
- if ( (rc = ops->read(x86_seg_none, desctab.base + (sel & 0xfff8),
- &val, 4, ctxt)) )
- return rc;
- desc.a = val;
- if ( (rc = ops->read(x86_seg_none, desctab.base + (sel & 0xfff8) + 4,
- &val, 4, ctxt)) )
- return rc;
- desc.b = val;
-
- /* Segment present in memory? */
- if ( !(desc.b & (1u<<15)) )
- {
- fault_type = EXC_NP;
- goto raise_exn;
- }
-
- /* LDT descriptor is a system segment. All others are code/data. */
- if ( (desc.b & (1u<<12)) == ((seg == x86_seg_ldtr) << 12) )
- goto raise_exn;
-
- dpl = (desc.b >> 13) & 3;
- rpl = sel & 3;
- cpl = cs.sel & 3;
-
- switch ( seg )
- {
- case x86_seg_cs:
- /* Code segment? */
- if ( !(desc.b & (1u<<11)) )
- goto raise_exn;
- /* Non-conforming segment: check DPL against RPL. */
- if ( ((desc.b & (6u<<9)) != 6) && (dpl != rpl) )
- goto raise_exn;
- break;
- case x86_seg_ss:
- /* Writable data segment? */
- if ( (desc.b & (5u<<9)) != (1u<<9) )
- goto raise_exn;
- if ( (dpl != cpl) || (dpl != rpl) )
- goto raise_exn;
- break;
- case x86_seg_ldtr:
- /* LDT system segment? */
- if ( (desc.b & (15u<<8)) != (2u<<8) )
- goto raise_exn;
- goto skip_accessed_flag;
- default:
- /* Readable code or data segment? */
- if ( (desc.b & (5u<<9)) == (4u<<9) )
- goto raise_exn;
- /* Non-conforming segment: check DPL against RPL and CPL. */
- if ( ((desc.b & (6u<<9)) != 6) && ((dpl < cpl) || (dpl < rpl)) )
- goto raise_exn;
- break;
- }
-
- /* Ensure Accessed flag is set. */
- rc = ((desc.b & 0x100) ? X86EMUL_OKAY :
- ops->cmpxchg(
- x86_seg_none, desctab.base + (sel & 0xfff8) + 4, desc.b,
- desc.b | 0x100, 4, ctxt));
- } while ( rc == X86EMUL_CMPXCHG_FAILED );
-
- if ( rc )
- return rc;
-
- /* Force the Accessed flag in our local copy. */
- desc.b |= 0x100;
-
- skip_accessed_flag:
- segr.base = (((desc.b << 0) & 0xff000000u) |
- ((desc.b << 16) & 0x00ff0000u) |
- ((desc.a >> 16) & 0x0000ffffu));
- segr.attr.bytes = (((desc.b >> 8) & 0x00ffu) |
- ((desc.b >> 12) & 0x0f00u));
- segr.limit = (desc.b & 0x000f0000u) | (desc.a & 0x0000ffffu);
- if ( segr.attr.fields.g )
- segr.limit = (segr.limit << 12) | 0xfffu;
- segr.sel = sel;
- return ops->write_segment(seg, &segr, ctxt);
-
- raise_exn:
- if ( ops->inject_hw_exception == NULL )
- return X86EMUL_UNHANDLEABLE;
- if ( (rc = ops->inject_hw_exception(fault_type, sel & 0xfffc, ctxt)) )
- return rc;
- return X86EMUL_EXCEPTION;
-}
-
-static int
-load_seg(
- enum x86_segment seg,
- uint16_t sel,
- struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
-{
- if ( (ops->read_segment == NULL) ||
- (ops->write_segment == NULL) )
- return X86EMUL_UNHANDLEABLE;
-
- if ( in_realmode(ctxt, ops) )
- return realmode_load_seg(seg, sel, ctxt, ops);
-
- return protmode_load_seg(seg, sel, ctxt, ops);
-}
-
-void *
-decode_register(
- uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs)
-{
- void *p;
-
- switch ( modrm_reg )
- {
- case 0: p = &regs->eax; break;
- case 1: p = &regs->ecx; break;
- case 2: p = &regs->edx; break;
- case 3: p = &regs->ebx; break;
- case 4: p = (highbyte_regs ?
- ((unsigned char *)&regs->eax + 1) :
- (unsigned char *)&regs->esp); break;
- case 5: p = (highbyte_regs ?
- ((unsigned char *)&regs->ecx + 1) :
- (unsigned char *)&regs->ebp); break;
- case 6: p = (highbyte_regs ?
- ((unsigned char *)&regs->edx + 1) :
- (unsigned char *)&regs->esi); break;
- case 7: p = (highbyte_regs ?
- ((unsigned char *)&regs->ebx + 1) :
- (unsigned char *)&regs->edi); break;
-#if defined(__x86_64__)
- case 8: p = &regs->r8; break;
- case 9: p = &regs->r9; break;
- case 10: p = &regs->r10; break;
- case 11: p = &regs->r11; break;
- case 12: p = &regs->r12; break;
- case 13: p = &regs->r13; break;
- case 14: p = &regs->r14; break;
- case 15: p = &regs->r15; break;
-#endif
- default: p = NULL; break;
- }
-
- return p;
-}
-
-#define decode_segment_failed x86_seg_tr
-enum x86_segment
-decode_segment(
- uint8_t modrm_reg)
-{
- switch ( modrm_reg )
- {
- case 0: return x86_seg_es;
- case 1: return x86_seg_cs;
- case 2: return x86_seg_ss;
- case 3: return x86_seg_ds;
- case 4: return x86_seg_fs;
- case 5: return x86_seg_gs;
- default: break;
- }
- return decode_segment_failed;
-}
-
-int
-x86_emulate(
- struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
-{
- /* Shadow copy of register state. Committed on successful emulation. */
- struct cpu_user_regs _regs = *ctxt->regs;
-
- uint8_t b, d, sib, sib_index, sib_base, twobyte = 0, rex_prefix = 0;
- uint8_t modrm = 0, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
- unsigned int op_bytes, def_op_bytes, ad_bytes, def_ad_bytes;
-#define REPE_PREFIX 1
-#define REPNE_PREFIX 2
- unsigned int lock_prefix = 0, rep_prefix = 0;
- int override_seg = -1, rc = X86EMUL_OKAY;
- struct operand src, dst;
-
- /* Data operand effective address (usually computed from ModRM). */
- struct operand ea;
-
- /* Default is a memory operand relative to segment DS. */
- ea.type = OP_MEM;
- ea.mem.seg = x86_seg_ds;
- ea.mem.off = 0;
-
- ctxt->retire.byte = 0;
-
- op_bytes = def_op_bytes = ad_bytes = def_ad_bytes = ctxt->addr_size/8;
- if ( op_bytes == 8 )
- {
- op_bytes = def_op_bytes = 4;
-#ifndef __x86_64__
- return X86EMUL_UNHANDLEABLE;
-#endif
- }
-
- /* Prefix bytes. */
- for ( ; ; )
- {
- switch ( b = insn_fetch_type(uint8_t) )
- {
- case 0x66: /* operand-size override */
- op_bytes = def_op_bytes ^ 6;
- break;
- case 0x67: /* address-size override */
- ad_bytes = def_ad_bytes ^ (mode_64bit() ? 12 : 6);
- break;
- case 0x2e: /* CS override */
- override_seg = x86_seg_cs;
- break;
- case 0x3e: /* DS override */
- override_seg = x86_seg_ds;
- break;
- case 0x26: /* ES override */
- override_seg = x86_seg_es;
- break;
- case 0x64: /* FS override */
- override_seg = x86_seg_fs;
- break;
- case 0x65: /* GS override */
- override_seg = x86_seg_gs;
- break;
- case 0x36: /* SS override */
- override_seg = x86_seg_ss;
- break;
- case 0xf0: /* LOCK */
- lock_prefix = 1;
- break;
- case 0xf2: /* REPNE/REPNZ */
- rep_prefix = REPNE_PREFIX;
- break;
- case 0xf3: /* REP/REPE/REPZ */
- rep_prefix = REPE_PREFIX;
- break;
- case 0x40 ... 0x4f: /* REX */
- if ( !mode_64bit() )
- goto done_prefixes;
- rex_prefix = b;
- continue;
- default:
- goto done_prefixes;
- }
-
- /* Any legacy prefix after a REX prefix nullifies its effect. */
- rex_prefix = 0;
- }
- done_prefixes:
-
- if ( rex_prefix & 8 ) /* REX.W */
- op_bytes = 8;
-
- /* Opcode byte(s). */
- d = opcode_table[b];
- if ( d == 0 )
- {
- /* Two-byte opcode? */
- if ( b == 0x0f )
- {
- twobyte = 1;
- b = insn_fetch_type(uint8_t);
- d = twobyte_table[b];
- }
-
- /* Unrecognised? */
- if ( d == 0 )
- goto cannot_emulate;
- }
-
- /* Lock prefix is allowed only on RMW instructions. */
- generate_exception_if((d & Mov) && lock_prefix, EXC_GP, 0);
-
- /* ModRM and SIB bytes. */
- if ( d & ModRM )
- {
- modrm = insn_fetch_type(uint8_t);
- modrm_mod = (modrm & 0xc0) >> 6;
- modrm_reg = ((rex_prefix & 4) << 1) | ((modrm & 0x38) >> 3);
- modrm_rm = modrm & 0x07;
-
- if ( modrm_mod == 3 )
- {
- modrm_rm |= (rex_prefix & 1) << 3;
- ea.type = OP_REG;
- ea.reg = decode_register(
- modrm_rm, &_regs, (d & ByteOp) && (rex_prefix == 0));
- }
- else if ( ad_bytes == 2 )
- {
- /* 16-bit ModR/M decode. */
- switch ( modrm_rm )
- {
- case 0:
- ea.mem.off = _regs.ebx + _regs.esi;
- break;
- case 1:
- ea.mem.off = _regs.ebx + _regs.edi;
- break;
- case 2:
- ea.mem.seg = x86_seg_ss;
- ea.mem.off = _regs.ebp + _regs.esi;
- break;
- case 3:
- ea.mem.seg = x86_seg_ss;
- ea.mem.off = _regs.ebp + _regs.edi;
- break;
- case 4:
- ea.mem.off = _regs.esi;
- break;
- case 5:
- ea.mem.off = _regs.edi;
- break;
- case 6:
- if ( modrm_mod == 0 )
- break;
- ea.mem.seg = x86_seg_ss;
- ea.mem.off = _regs.ebp;
- break;
- case 7:
- ea.mem.off = _regs.ebx;
- break;
- }
- switch ( modrm_mod )
- {
- case 0:
- if ( modrm_rm == 6 )
- ea.mem.off = insn_fetch_type(int16_t);
- break;
- case 1:
- ea.mem.off += insn_fetch_type(int8_t);
- break;
- case 2:
- ea.mem.off += insn_fetch_type(int16_t);
- break;
- }
- ea.mem.off = truncate_ea(ea.mem.off);
- }
- else
- {
- /* 32/64-bit ModR/M decode. */
- if ( modrm_rm == 4 )
- {
- sib = insn_fetch_type(uint8_t);
- sib_index = ((sib >> 3) & 7) | ((rex_prefix << 2) & 8);
- sib_base = (sib & 7) | ((rex_prefix << 3) & 8);
- if ( sib_index != 4 )
- ea.mem.off = *(long*)decode_register(sib_index, &_regs, 0);
- ea.mem.off <<= (sib >> 6) & 3;
- if ( (modrm_mod == 0) && ((sib_base & 7) == 5) )
- ea.mem.off += insn_fetch_type(int32_t);
- else if ( sib_base == 4 )
- {
- ea.mem.seg = x86_seg_ss;
- ea.mem.off += _regs.esp;
- if ( !twobyte && (b == 0x8f) )
- /* POP <rm> computes its EA post increment. */
- ea.mem.off += ((mode_64bit() && (op_bytes == 4))
- ? 8 : op_bytes);
- }
- else if ( sib_base == 5 )
- {
- ea.mem.seg = x86_seg_ss;
- ea.mem.off += _regs.ebp;
- }
- else
- ea.mem.off += *(long*)decode_register(sib_base, &_regs, 0);
- }
- else
- {
- modrm_rm |= (rex_prefix & 1) << 3;
- ea.mem.off = *(long *)decode_register(modrm_rm, &_regs, 0);
- if ( (modrm_rm == 5) && (modrm_mod != 0) )
- ea.mem.seg = x86_seg_ss;
- }
- switch ( modrm_mod )
- {
- case 0:
- if ( (modrm_rm & 7) != 5 )
- break;
- ea.mem.off = insn_fetch_type(int32_t);
- if ( !mode_64bit() )
- break;
- /* Relative to RIP of next instruction. Argh! */
- ea.mem.off += _regs.eip;
- if ( (d & SrcMask) == SrcImm )
- ea.mem.off += (d & ByteOp) ? 1 :
- ((op_bytes == 8) ? 4 : op_bytes);
- else if ( (d & SrcMask) == SrcImmByte )
- ea.mem.off += 1;
- else if ( !twobyte && ((b & 0xfe) == 0xf6) &&
- ((modrm_reg & 7) <= 1) )
- /* Special case in Grp3: test has immediate operand. */
- ea.mem.off += (d & ByteOp) ? 1
- : ((op_bytes == 8) ? 4 : op_bytes);
- else if ( twobyte && ((b & 0xf7) == 0xa4) )
- /* SHLD/SHRD with immediate byte third operand. */
- ea.mem.off++;
- break;
- case 1:
- ea.mem.off += insn_fetch_type(int8_t);
- break;
- case 2:
- ea.mem.off += insn_fetch_type(int32_t);
- break;
- }
- ea.mem.off = truncate_ea(ea.mem.off);
- }
- }
-
- if ( override_seg != -1 )
- ea.mem.seg = override_seg;
-
- /* Special instructions do their own operand decoding. */
- if ( (d & DstMask) == ImplicitOps )
- goto special_insn;
-
- /* Decode and fetch the source operand: register, memory or immediate. */
- switch ( d & SrcMask )
- {
- case SrcNone:
- break;
- case SrcReg:
- src.type = OP_REG;
- if ( d & ByteOp )
- {
- src.reg = decode_register(modrm_reg, &_regs, (rex_prefix == 0));
- src.val = *(uint8_t *)src.reg;
- src.bytes = 1;
- }
- else
- {
- src.reg = decode_register(modrm_reg, &_regs, 0);
- switch ( (src.bytes = op_bytes) )
- {
- case 2: src.val = *(uint16_t *)src.reg; break;
- case 4: src.val = *(uint32_t *)src.reg; break;
- case 8: src.val = *(uint64_t *)src.reg; break;
- }
- }
- break;
- case SrcMem16:
- ea.bytes = 2;
- goto srcmem_common;
- case SrcMem:
- ea.bytes = (d & ByteOp) ? 1 : op_bytes;
- srcmem_common:
- src = ea;
- if ( src.type == OP_REG )
- {
- switch ( src.bytes )
- {
- case 1: src.val = *(uint8_t *)src.reg; break;
- case 2: src.val = *(uint16_t *)src.reg; break;
- case 4: src.val = *(uint32_t *)src.reg; break;
- case 8: src.val = *(uint64_t *)src.reg; break;
- }
- }
- else if ( (rc = ops->read(src.mem.seg, src.mem.off,
- &src.val, src.bytes, ctxt)) )
- goto done;
- break;
- case SrcImm:
- src.type = OP_IMM;
- src.bytes = (d & ByteOp) ? 1 : op_bytes;
- if ( src.bytes == 8 ) src.bytes = 4;
- /* NB. Immediates are sign-extended as necessary. */
- switch ( src.bytes )
- {
- case 1: src.val = insn_fetch_type(int8_t); break;
- case 2: src.val = insn_fetch_type(int16_t); break;
- case 4: src.val = insn_fetch_type(int32_t); break;
- }
- break;
- case SrcImmByte:
- src.type = OP_IMM;
- src.bytes = 1;
- src.val = insn_fetch_type(int8_t);
- break;
- }
-
- /* Decode and fetch the destination operand: register or memory. */
- switch ( d & DstMask )
- {
- case DstReg:
- dst.type = OP_REG;
- if ( d & ByteOp )
- {
- dst.reg = decode_register(modrm_reg, &_regs, (rex_prefix == 0));
- dst.val = *(uint8_t *)dst.reg;
- dst.bytes = 1;
- }
- else
- {
- dst.reg = decode_register(modrm_reg, &_regs, 0);
- switch ( (dst.bytes = op_bytes) )
- {
- case 2: dst.val = *(uint16_t *)dst.reg; break;
- case 4: dst.val = *(uint32_t *)dst.reg; break;
- case 8: dst.val = *(uint64_t *)dst.reg; break;
- }
- }
- break;
- case DstBitBase:
- if ( ((d & SrcMask) == SrcImmByte) || (ea.type == OP_REG) )
- {
- src.val &= (op_bytes << 3) - 1;
- }
- else
- {
- /*
- * EA += BitOffset DIV op_bytes*8
- * BitOffset = BitOffset MOD op_bytes*8
- * DIV truncates towards negative infinity.
- * MOD always produces a positive result.
- */
- if ( op_bytes == 2 )
- src.val = (int16_t)src.val;
- else if ( op_bytes == 4 )
- src.val = (int32_t)src.val;
- if ( (long)src.val < 0 )
- {
- unsigned long byte_offset;
- byte_offset = op_bytes + (((-src.val-1) >> 3) & ~(op_bytes-1));
- ea.mem.off -= byte_offset;
- src.val = (byte_offset << 3) + src.val;
- }
- else
- {
- ea.mem.off += (src.val >> 3) & ~(op_bytes - 1);
- src.val &= (op_bytes << 3) - 1;
- }
- }
- /* Becomes a normal DstMem operation from here on. */
- d = (d & ~DstMask) | DstMem;
- case DstMem:
- ea.bytes = (d & ByteOp) ? 1 : op_bytes;
- dst = ea;
- if ( dst.type == OP_REG )
- {
- switch ( dst.bytes )
- {
- case 1: dst.val = *(uint8_t *)dst.reg; break;
- case 2: dst.val = *(uint16_t *)dst.reg; break;
- case 4: dst.val = *(uint32_t *)dst.reg; break;
- case 8: dst.val = *(uint64_t *)dst.reg; break;
- }
- }
- else if ( !(d & Mov) ) /* optimisation - avoid slow emulated read */
- {
- if ( (rc = ops->read(dst.mem.seg, dst.mem.off,
- &dst.val, dst.bytes, ctxt)) )
- goto done;
- dst.orig_val = dst.val;
- }
- break;
- }
-
- /* LOCK prefix allowed only on instructions with memory destination. */
- generate_exception_if(lock_prefix && (dst.type != OP_MEM), EXC_GP, 0);
-
- if ( twobyte )
- goto twobyte_insn;
-
- switch ( b )
- {
- case 0x04 ... 0x05: /* add imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x00 ... 0x03: add: /* add */
- emulate_2op_SrcV("add", src, dst, _regs.eflags);
- break;
-
- case 0x0c ... 0x0d: /* or imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x08 ... 0x0b: or: /* or */
- emulate_2op_SrcV("or", src, dst, _regs.eflags);
- break;
-
- case 0x14 ... 0x15: /* adc imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x10 ... 0x13: adc: /* adc */
- emulate_2op_SrcV("adc", src, dst, _regs.eflags);
- break;
-
- case 0x1c ... 0x1d: /* sbb imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x18 ... 0x1b: sbb: /* sbb */
- emulate_2op_SrcV("sbb", src, dst, _regs.eflags);
- break;
-
- case 0x24 ... 0x25: /* and imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x20 ... 0x23: and: /* and */
- emulate_2op_SrcV("and", src, dst, _regs.eflags);
- break;
-
- case 0x2c ... 0x2d: /* sub imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x28 ... 0x2b: sub: /* sub */
- emulate_2op_SrcV("sub", src, dst, _regs.eflags);
- break;
-
- case 0x34 ... 0x35: /* xor imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x30 ... 0x33: xor: /* xor */
- emulate_2op_SrcV("xor", src, dst, _regs.eflags);
- break;
-
- case 0x3c ... 0x3d: /* cmp imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x38 ... 0x3b: cmp: /* cmp */
- emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
- break;
-
- case 0x62: /* bound */ {
- unsigned long src_val2;
- int lb, ub, idx;
- generate_exception_if(mode_64bit() || (src.type != OP_MEM),
- EXC_UD, -1);
- if ( (rc = ops->read(src.mem.seg, src.mem.off + op_bytes,
- &src_val2, op_bytes, ctxt)) )
- goto done;
- ub = (op_bytes == 2) ? (int16_t)src_val2 : (int32_t)src_val2;
- lb = (op_bytes == 2) ? (int16_t)src.val : (int32_t)src.val;
- idx = (op_bytes == 2) ? (int16_t)dst.val : (int32_t)dst.val;
- generate_exception_if((idx < lb) || (idx > ub), EXC_BR, -1);
- dst.type = OP_NONE;
- break;
- }
-
- case 0x63: /* movsxd (x86/64) / arpl (x86/32) */
- if ( mode_64bit() )
- {
- /* movsxd */
- if ( src.type == OP_REG )
- src.val = *(int32_t *)src.reg;
- else if ( (rc = ops->read(src.mem.seg, src.mem.off,
- &src.val, 4, ctxt)) )
- goto done;
- dst.val = (int32_t)src.val;
- }
- else
- {
- /* arpl */
- uint16_t src_val = dst.val;
- dst = src;
- _regs.eflags &= ~EFLG_ZF;
- _regs.eflags |= ((src_val & 3) > (dst.val & 3)) ? EFLG_ZF : 0;
- if ( _regs.eflags & EFLG_ZF )
- dst.val = (dst.val & ~3) | (src_val & 3);
- else
- dst.type = OP_NONE;
- generate_exception_if(in_realmode(ctxt, ops), EXC_UD, -1);
- }
- break;
-
- case 0x69: /* imul imm16/32 */
- case 0x6b: /* imul imm8 */ {
- unsigned long src1; /* ModR/M source operand */
- if ( ea.type == OP_REG )
- src1 = *ea.reg;
- else if ( (rc = ops->read(ea.mem.seg, ea.mem.off,
- &src1, op_bytes, ctxt)) )
- goto done;
- _regs.eflags &= ~(EFLG_OF|EFLG_CF);
- switch ( dst.bytes )
- {
- case 2:
- dst.val = ((uint32_t)(int16_t)src.val *
- (uint32_t)(int16_t)src1);
- if ( (int16_t)dst.val != (uint32_t)dst.val )
- _regs.eflags |= EFLG_OF|EFLG_CF;
- break;
-#ifdef __x86_64__
- case 4:
- dst.val = ((uint64_t)(int32_t)src.val *
- (uint64_t)(int32_t)src1);
- if ( (int32_t)dst.val != dst.val )
- _regs.eflags |= EFLG_OF|EFLG_CF;
- break;
-#endif
- default: {
- unsigned long m[2] = { src.val, src1 };
- if ( imul_dbl(m) )
- _regs.eflags |= EFLG_OF|EFLG_CF;
- dst.val = m[0];
- break;
- }
- }
- break;
- }
-
- case 0x82: /* Grp1 (x86/32 only) */
- generate_exception_if(mode_64bit(), EXC_UD, -1);
- case 0x80: case 0x81: case 0x83: /* Grp1 */
- switch ( modrm_reg & 7 )
- {
- case 0: goto add;
- case 1: goto or;
- case 2: goto adc;
- case 3: goto sbb;
- case 4: goto and;
- case 5: goto sub;
- case 6: goto xor;
- case 7: goto cmp;
- }
- break;
-
- case 0xa8 ... 0xa9: /* test imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x84 ... 0x85: test: /* test */
- emulate_2op_SrcV("test", src, dst, _regs.eflags);
- break;
-
- case 0x86 ... 0x87: xchg: /* xchg */
- /* Write back the register source. */
- switch ( dst.bytes )
- {
- case 1: *(uint8_t *)src.reg = (uint8_t)dst.val; break;
- case 2: *(uint16_t *)src.reg = (uint16_t)dst.val; break;
- case 4: *src.reg = (uint32_t)dst.val; break; /* 64b reg: zero-extend */
- case 8: *src.reg = dst.val; break;
- }
- /* Write back the memory destination with implicit LOCK prefix. */
- dst.val = src.val;
- lock_prefix = 1;
- break;
-
- case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
- generate_exception_if((modrm_reg & 7) != 0, EXC_UD, -1);
- case 0x88 ... 0x8b: /* mov */
- dst.val = src.val;
- break;
-
- case 0x8c: /* mov Sreg,r/m */ {
- struct segment_register reg;
- enum x86_segment seg = decode_segment(modrm_reg);
- generate_exception_if(seg == decode_segment_failed, EXC_UD, -1);
- fail_if(ops->read_segment == NULL);
- if ( (rc = ops->read_segment(seg, &reg, ctxt)) != 0 )
- goto done;
- dst.val = reg.sel;
- if ( dst.type == OP_MEM )
- dst.bytes = 2;
- break;
- }
-
- case 0x8e: /* mov r/m,Sreg */ {
- enum x86_segment seg = decode_segment(modrm_reg);
- generate_exception_if(seg == decode_segment_failed, EXC_UD, -1);
- if ( (rc = load_seg(seg, (uint16_t)src.val, ctxt, ops)) != 0 )
- goto done;
- if ( seg == x86_seg_ss )
- ctxt->retire.flags.mov_ss = 1;
- dst.type = OP_NONE;
- break;
- }
-
- case 0x8d: /* lea */
- dst.val = ea.mem.off;
- break;
-
- case 0x8f: /* pop (sole member of Grp1a) */
- generate_exception_if((modrm_reg & 7) != 0, EXC_UD, -1);
- /* 64-bit mode: POP defaults to a 64-bit operand. */
- if ( mode_64bit() && (dst.bytes == 4) )
- dst.bytes = 8;
- if ( (rc = ops->read(x86_seg_ss, sp_post_inc(dst.bytes),
- &dst.val, dst.bytes, ctxt)) != 0 )
- goto done;
- break;
-
- case 0xb0 ... 0xb7: /* mov imm8,r8 */
- dst.reg = decode_register(
- (b & 7) | ((rex_prefix & 1) << 3), &_regs, (rex_prefix == 0));
- dst.val = src.val;
- break;
-
- case 0xb8 ... 0xbf: /* mov imm{16,32,64},r{16,32,64} */
- if ( dst.bytes == 8 ) /* Fetch more bytes to obtain imm64 */
- src.val = ((uint32_t)src.val |
- ((uint64_t)insn_fetch_type(uint32_t) << 32));
- dst.reg = decode_register(
- (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
- dst.val = src.val;
- break;
-
- case 0xc0 ... 0xc1: grp2: /* Grp2 */
- switch ( modrm_reg & 7 )
- {
- case 0: /* rol */
- emulate_2op_SrcB("rol", src, dst, _regs.eflags);
- break;
- case 1: /* ror */
- emulate_2op_SrcB("ror", src, dst, _regs.eflags);
- break;
- case 2: /* rcl */
- emulate_2op_SrcB("rcl", src, dst, _regs.eflags);
- break;
- case 3: /* rcr */
- emulate_2op_SrcB("rcr", src, dst, _regs.eflags);
- break;
- case 4: /* sal/shl */
- case 6: /* sal/shl */
- emulate_2op_SrcB("sal", src, dst, _regs.eflags);
- break;
- case 5: /* shr */
- emulate_2op_SrcB("shr", src, dst, _regs.eflags);
- break;
- case 7: /* sar */
- emulate_2op_SrcB("sar", src, dst, _regs.eflags);
- break;
- }
- break;
-
- case 0xc4: /* les */ {
- unsigned long sel;
- dst.val = x86_seg_es;
- les: /* dst.val identifies the segment */
- generate_exception_if(src.type != OP_MEM, EXC_UD, -1);
- if ( (rc = ops->read(src.mem.seg, src.mem.off + src.bytes,
- &sel, 2, ctxt)) != 0 )
- goto done;
- if ( (rc = load_seg(dst.val, (uint16_t)sel, ctxt, ops)) != 0 )
- goto done;
- dst.val = src.val;
- break;
- }
-
- case 0xc5: /* lds */
- dst.val = x86_seg_ds;
- goto les;
-
- case 0xd0 ... 0xd1: /* Grp2 */
- src.val = 1;
- goto grp2;
-
- case 0xd2 ... 0xd3: /* Grp2 */
- src.val = _regs.ecx;
- goto grp2;
-
- case 0xf6 ... 0xf7: /* Grp3 */
- switch ( modrm_reg & 7 )
- {
- case 0 ... 1: /* test */
- /* Special case in Grp3: test has an immediate source operand. */
- src.type = OP_IMM;
- src.bytes = (d & ByteOp) ? 1 : op_bytes;
- if ( src.bytes == 8 ) src.bytes = 4;
- switch ( src.bytes )
- {
- case 1: src.val = insn_fetch_type(int8_t); break;
- case 2: src.val = insn_fetch_type(int16_t); break;
- case 4: src.val = insn_fetch_type(int32_t); break;
- }
- goto test;
- case 2: /* not */
- dst.val = ~dst.val;
- break;
- case 3: /* neg */
- emulate_1op("neg", dst, _regs.eflags);
- break;
- case 4: /* mul */
- src = dst;
- dst.type = OP_REG;
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = *dst.reg;
- _regs.eflags &= ~(EFLG_OF|EFLG_CF);
- switch ( src.bytes )
- {
- case 1:
- dst.val = (uint8_t)dst.val;
- dst.val *= src.val;
- if ( (uint8_t)dst.val != (uint16_t)dst.val )
- _regs.eflags |= EFLG_OF|EFLG_CF;
- dst.bytes = 2;
- break;
- case 2:
- dst.val = (uint16_t)dst.val;
- dst.val *= src.val;
- if ( (uint16_t)dst.val != (uint32_t)dst.val )
- _regs.eflags |= EFLG_OF|EFLG_CF;
- *(uint16_t *)&_regs.edx = dst.val >> 16;
- break;
-#ifdef __x86_64__
- case 4:
- dst.val = (uint32_t)dst.val;
- dst.val *= src.val;
- if ( (uint32_t)dst.val != dst.val )
- _regs.eflags |= EFLG_OF|EFLG_CF;
- _regs.edx = (uint32_t)(dst.val >> 32);
- break;
-#endif
- default: {
- unsigned long m[2] = { src.val, dst.val };
- if ( mul_dbl(m) )
- _regs.eflags |= EFLG_OF|EFLG_CF;
- _regs.edx = m[1];
- dst.val = m[0];
- break;
- }
- }
- break;
- case 5: /* imul */
- src = dst;
- dst.type = OP_REG;
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = *dst.reg;
- _regs.eflags &= ~(EFLG_OF|EFLG_CF);
- switch ( src.bytes )
- {
- case 1:
- dst.val = ((uint16_t)(int8_t)src.val *
- (uint16_t)(int8_t)dst.val);
- if ( (int8_t)dst.val != (uint16_t)dst.val )
- _regs.eflags |= EFLG_OF|EFLG_CF;
- dst.bytes = 2;
- break;
- case 2:
- dst.val = ((uint32_t)(int16_t)src.val *
- (uint32_t)(int16_t)dst.val);
- if ( (int16_t)dst.val != (uint32_t)dst.val )
- _regs.eflags |= EFLG_OF|EFLG_CF;
- *(uint16_t *)&_regs.edx = dst.val >> 16;
- break;
-#ifdef __x86_64__
- case 4:
- dst.val = ((uint64_t)(int32_t)src.val *
- (uint64_t)(int32_t)dst.val);
- if ( (int32_t)dst.val != dst.val )
- _regs.eflags |= EFLG_OF|EFLG_CF;
- _regs.edx = (uint32_t)(dst.val >> 32);
- break;
-#endif
- default: {
- unsigned long m[2] = { src.val, dst.val };
- if ( imul_dbl(m) )
- _regs.eflags |= EFLG_OF|EFLG_CF;
- _regs.edx = m[1];
- dst.val = m[0];
- break;
- }
- }
- break;
- case 6: /* div */ {
- unsigned long u[2], v;
- src = dst;
- dst.type = OP_REG;
- dst.reg = (unsigned long *)&_regs.eax;
- switch ( src.bytes )
- {
- case 1:
- u[0] = (uint16_t)_regs.eax;
- u[1] = 0;
- v = (uint8_t)src.val;
- generate_exception_if(
- div_dbl(u, v) || ((uint8_t)u[0] != (uint16_t)u[0]),
- EXC_DE, -1);
- dst.val = (uint8_t)u[0];
- ((uint8_t *)&_regs.eax)[1] = u[1];
- break;
- case 2:
- u[0] = ((uint32_t)_regs.edx << 16) | (uint16_t)_regs.eax;
- u[1] = 0;
- v = (uint16_t)src.val;
- generate_exception_if(
- div_dbl(u, v) || ((uint16_t)u[0] != (uint32_t)u[0]),
- EXC_DE, -1);
- dst.val = (uint16_t)u[0];
- *(uint16_t *)&_regs.edx = u[1];
- break;
-#ifdef __x86_64__
- case 4:
- u[0] = (_regs.edx << 32) | (uint32_t)_regs.eax;
- u[1] = 0;
- v = (uint32_t)src.val;
- generate_exception_if(
- div_dbl(u, v) || ((uint32_t)u[0] != u[0]),
- EXC_DE, -1);
- dst.val = (uint32_t)u[0];
- _regs.edx = (uint32_t)u[1];
- break;
-#endif
- default:
- u[0] = _regs.eax;
- u[1] = _regs.edx;
- v = src.val;
- generate_exception_if(div_dbl(u, v), EXC_DE, -1);
- dst.val = u[0];
- _regs.edx = u[1];
- break;
- }
- break;
- }
- case 7: /* idiv */ {
- unsigned long u[2], v;
- src = dst;
- dst.type = OP_REG;
- dst.reg = (unsigned long *)&_regs.eax;
- switch ( src.bytes )
- {
- case 1:
- u[0] = (int16_t)_regs.eax;
- u[1] = ((long)u[0] < 0) ? ~0UL : 0UL;
- v = (int8_t)src.val;
- generate_exception_if(
- idiv_dbl(u, v) || ((int8_t)u[0] != (int16_t)u[0]),
- EXC_DE, -1);
- dst.val = (int8_t)u[0];
- ((int8_t *)&_regs.eax)[1] = u[1];
- break;
- case 2:
- u[0] = (int32_t)((_regs.edx << 16) | (uint16_t)_regs.eax);
- u[1] = ((long)u[0] < 0) ? ~0UL : 0UL;
- v = (int16_t)src.val;
- generate_exception_if(
- idiv_dbl(u, v) || ((int16_t)u[0] != (int32_t)u[0]),
- EXC_DE, -1);
- dst.val = (int16_t)u[0];
- *(int16_t *)&_regs.edx = u[1];
- break;
-#ifdef __x86_64__
- case 4:
- u[0] = (_regs.edx << 32) | (uint32_t)_regs.eax;
- u[1] = ((long)u[0] < 0) ? ~0UL : 0UL;
- v = (int32_t)src.val;
- generate_exception_if(
- idiv_dbl(u, v) || ((int32_t)u[0] != u[0]),
- EXC_DE, -1);
- dst.val = (int32_t)u[0];
- _regs.edx = (uint32_t)u[1];
- break;
-#endif
- default:
- u[0] = _regs.eax;
- u[1] = _regs.edx;
- v = src.val;
- generate_exception_if(idiv_dbl(u, v), EXC_DE, -1);
- dst.val = u[0];
- _regs.edx = u[1];
- break;
- }
- break;
- }
- default:
- goto cannot_emulate;
- }
- break;
-
- case 0xfe: /* Grp4 */
- generate_exception_if((modrm_reg & 7) >= 2, EXC_UD, -1);
- case 0xff: /* Grp5 */
- switch ( modrm_reg & 7 )
- {
- case 0: /* inc */
- emulate_1op("inc", dst, _regs.eflags);
- break;
- case 1: /* dec */
- emulate_1op("dec", dst, _regs.eflags);
- break;
- case 2: /* call (near) */
- case 4: /* jmp (near) */
- if ( (dst.bytes != 8) && mode_64bit() )
- {
- dst.bytes = op_bytes = 8;
- if ( dst.type == OP_REG )
- dst.val = *dst.reg;
- else if ( (rc = ops->read(dst.mem.seg, dst.mem.off,
- &dst.val, 8, ctxt)) != 0 )
- goto done;
- }
- src.val = _regs.eip;
- _regs.eip = dst.val;
- if ( (modrm_reg & 7) == 2 )
- goto push; /* call */
- dst.type = OP_NONE;
- break;
- case 3: /* call (far, absolute indirect) */
- case 5: /* jmp (far, absolute indirect) */ {
- unsigned long sel;
-
- generate_exception_if(dst.type != OP_MEM, EXC_UD, -1);
-
- if ( (rc = ops->read(dst.mem.seg, dst.mem.off+dst.bytes,
- &sel, 2, ctxt)) )
- goto done;
-
- if ( (modrm_reg & 7) == 3 ) /* call */
- {
- struct segment_register reg;
- fail_if(ops->read_segment == NULL);
- if ( (rc = ops->read_segment(x86_seg_cs, &reg, ctxt)) ||
- (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
- reg.sel, op_bytes, ctxt)) ||
- (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
- _regs.eip, op_bytes, ctxt)) )
- goto done;
- }
-
- if ( (rc = load_seg(x86_seg_cs, sel, ctxt, ops)) != 0 )
- goto done;
- _regs.eip = dst.val;
-
- dst.type = OP_NONE;
- break;
- }
- case 6: /* push */
- /* 64-bit mode: PUSH defaults to a 64-bit operand. */
- if ( mode_64bit() && (dst.bytes == 4) )
- {
- dst.bytes = 8;
- if ( dst.type == OP_REG )
- dst.val = *dst.reg;
- else if ( (rc = ops->read(dst.mem.seg, dst.mem.off,
- &dst.val, 8, ctxt)) != 0 )
- goto done;
- }
- if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes),
- dst.val, dst.bytes, ctxt)) != 0 )
- goto done;
- dst.type = OP_NONE;
- break;
- case 7:
- generate_exception_if(1, EXC_UD, -1);
- default:
- goto cannot_emulate;
- }
- break;
- }
-
- writeback:
- switch ( dst.type )
- {
- case OP_REG:
- /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
- switch ( dst.bytes )
- {
- case 1: *(uint8_t *)dst.reg = (uint8_t)dst.val; break;
- case 2: *(uint16_t *)dst.reg = (uint16_t)dst.val; break;
- case 4: *dst.reg = (uint32_t)dst.val; break; /* 64b: zero-ext */
- case 8: *dst.reg = dst.val; break;
- }
- break;
- case OP_MEM:
- if ( !(d & Mov) && (dst.orig_val == dst.val) &&
- !ctxt->force_writeback )
- /* nothing to do */;
- else if ( lock_prefix )
- rc = ops->cmpxchg(
- dst.mem.seg, dst.mem.off, dst.orig_val,
- dst.val, dst.bytes, ctxt);
- else
- rc = ops->write(
- dst.mem.seg, dst.mem.off, dst.val, dst.bytes, ctxt);
- if ( rc != 0 )
- goto done;
- default:
- break;
- }
-
- /* Commit shadow register state. */
- _regs.eflags &= ~EFLG_RF;
- *ctxt->regs = _regs;
- if ( (_regs.eflags & EFLG_TF) && (rc == X86EMUL_OKAY) &&
- (ops->inject_hw_exception != NULL) )
- rc = ops->inject_hw_exception(EXC_DB, -1, ctxt) ? : X86EMUL_EXCEPTION;
-
- done:
- return rc;
-
- special_insn:
- dst.type = OP_NONE;
-
- /*
- * The only implicit-operands instructions allowed a LOCK prefix are
- * CMPXCHG{8,16}B, MOV CRn, MOV DRn.
- */
- generate_exception_if(lock_prefix &&
- ((b < 0x20) || (b > 0x23)) && /* MOV CRn/DRn */
- (b != 0xc7), /* CMPXCHG{8,16}B */
- EXC_GP, 0);
-
- if ( twobyte )
- goto twobyte_special_insn;
-
- switch ( b )
- {
- case 0x06: /* push %%es */ {
- struct segment_register reg;
- src.val = x86_seg_es;
- push_seg:
- fail_if(ops->read_segment == NULL);
- if ( (rc = ops->read_segment(src.val, &reg, ctxt)) != 0 )
- return rc;
- /* 64-bit mode: PUSH defaults to a 64-bit operand. */
- if ( mode_64bit() && (op_bytes == 4) )
- op_bytes = 8;
- if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
- reg.sel, op_bytes, ctxt)) != 0 )
- goto done;
- break;
- }
-
- case 0x07: /* pop %%es */
- src.val = x86_seg_es;
- pop_seg:
- fail_if(ops->write_segment == NULL);
- /* 64-bit mode: POP defaults to a 64-bit operand. */
- if ( mode_64bit() && (op_bytes == 4) )
- op_bytes = 8;
- if ( (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes),
- &dst.val, op_bytes, ctxt)) != 0 )
- goto done;
- if ( (rc = load_seg(src.val, (uint16_t)dst.val, ctxt, ops)) != 0 )
- return rc;
- break;
-
- case 0x0e: /* push %%cs */
- src.val = x86_seg_cs;
- goto push_seg;
-
- case 0x16: /* push %%ss */
- src.val = x86_seg_ss;
- goto push_seg;
-
- case 0x17: /* pop %%ss */
- src.val = x86_seg_ss;
- ctxt->retire.flags.mov_ss = 1;
- goto pop_seg;
-
- case 0x1e: /* push %%ds */
- src.val = x86_seg_ds;
- goto push_seg;
-
- case 0x1f: /* pop %%ds */
- src.val = x86_seg_ds;
- goto pop_seg;
-
- case 0x27: /* daa */ {
- uint8_t al = _regs.eax;
- unsigned long eflags = _regs.eflags;
- generate_exception_if(mode_64bit(), EXC_UD, -1);
- _regs.eflags &= ~(EFLG_CF|EFLG_AF);
- if ( ((al & 0x0f) > 9) || (eflags & EFLG_AF) )
- {
- *(uint8_t *)&_regs.eax += 6;
- _regs.eflags |= EFLG_AF;
- }
- if ( (al > 0x99) || (eflags & EFLG_CF) )
- {
- *(uint8_t *)&_regs.eax += 0x60;
- _regs.eflags |= EFLG_CF;
- }
- _regs.eflags &= ~(EFLG_SF|EFLG_ZF|EFLG_PF);
- _regs.eflags |= ((uint8_t)_regs.eax == 0) ? EFLG_ZF : 0;
- _regs.eflags |= (( int8_t)_regs.eax < 0) ? EFLG_SF : 0;
- _regs.eflags |= even_parity(_regs.eax) ? EFLG_PF : 0;
- break;
- }
-
- case 0x2f: /* das */ {
- uint8_t al = _regs.eax;
- unsigned long eflags = _regs.eflags;
- generate_exception_if(mode_64bit(), EXC_UD, -1);
- _regs.eflags &= ~(EFLG_CF|EFLG_AF);
- if ( ((al & 0x0f) > 9) || (eflags & EFLG_AF) )
- {
- _regs.eflags |= EFLG_AF;
- if ( (al < 6) || (eflags & EFLG_CF) )
- _regs.eflags |= EFLG_CF;
- *(uint8_t *)&_regs.eax -= 6;
- }
- if ( (al > 0x99) || (eflags & EFLG_CF) )
- {
- *(uint8_t *)&_regs.eax -= 0x60;
- _regs.eflags |= EFLG_CF;
- }
- _regs.eflags &= ~(EFLG_SF|EFLG_ZF|EFLG_PF);
- _regs.eflags |= ((uint8_t)_regs.eax == 0) ? EFLG_ZF : 0;
- _regs.eflags |= (( int8_t)_regs.eax < 0) ? EFLG_SF : 0;
- _regs.eflags |= even_parity(_regs.eax) ? EFLG_PF : 0;
- break;
- }
-
- case 0x37: /* aaa */
- case 0x3f: /* aas */
- generate_exception_if(mode_64bit(), EXC_UD, -1);
- _regs.eflags &= ~EFLG_CF;
- if ( ((uint8_t)_regs.eax > 9) || (_regs.eflags & EFLG_AF) )
- {
- ((uint8_t *)&_regs.eax)[0] += (b == 0x37) ? 6 : -6;
- ((uint8_t *)&_regs.eax)[1] += (b == 0x37) ? 1 : -1;
- _regs.eflags |= EFLG_CF | EFLG_AF;
- }
- ((uint8_t *)&_regs.eax)[0] &= 0x0f;
- break;
-
- case 0x40 ... 0x4f: /* inc/dec reg */
- dst.type = OP_REG;
- dst.reg = decode_register(b & 7, &_regs, 0);
- dst.bytes = op_bytes;
- dst.val = *dst.reg;
- if ( b & 8 )
- emulate_1op("dec", dst, _regs.eflags);
- else
- emulate_1op("inc", dst, _regs.eflags);
- break;
-
- case 0x50 ... 0x57: /* push reg */
- src.val = *(unsigned long *)decode_register(
- (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
- goto push;
-
- case 0x58 ... 0x5f: /* pop reg */
- dst.type = OP_REG;
- dst.reg = decode_register(
- (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
- dst.bytes = op_bytes;
- if ( mode_64bit() && (dst.bytes == 4) )
- dst.bytes = 8;
- if ( (rc = ops->read(x86_seg_ss, sp_post_inc(dst.bytes),
- &dst.val, dst.bytes, ctxt)) != 0 )
- goto done;
- break;
-
- case 0x60: /* pusha */ {
- int i;
- unsigned long regs[] = {
- _regs.eax, _regs.ecx, _regs.edx, _regs.ebx,
- _regs.esp, _regs.ebp, _regs.esi, _regs.edi };
- generate_exception_if(mode_64bit(), EXC_UD, -1);
- for ( i = 0; i < 8; i++ )
- if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
- regs[i], op_bytes, ctxt)) != 0 )
- goto done;
- break;
- }
-
- case 0x61: /* popa */ {
- int i;
- unsigned long dummy_esp, *regs[] = {
- (unsigned long *)&_regs.edi, (unsigned long *)&_regs.esi,
- (unsigned long *)&_regs.ebp, (unsigned long *)&dummy_esp,
- (unsigned long *)&_regs.ebx, (unsigned long *)&_regs.edx,
- (unsigned long *)&_regs.ecx, (unsigned long *)&_regs.eax };
- generate_exception_if(mode_64bit(), EXC_UD, -1);
- for ( i = 0; i < 8; i++ )
- {
- if ( (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes),
- &dst.val, op_bytes, ctxt)) != 0 )
- goto done;
- switch ( op_bytes )
- {
- case 1: *(uint8_t *)regs[i] = (uint8_t)dst.val; break;
- case 2: *(uint16_t *)regs[i] = (uint16_t)dst.val; break;
- case 4: *regs[i] = (uint32_t)dst.val; break; /* 64b: zero-ext */
- case 8: *regs[i] = dst.val; break;
- }
- }
- break;
- }
-
- case 0x68: /* push imm{16,32,64} */
- src.val = ((op_bytes == 2)
- ? (int32_t)insn_fetch_type(int16_t)
- : insn_fetch_type(int32_t));
- goto push;
-
- case 0x6a: /* push imm8 */
- src.val = insn_fetch_type(int8_t);
- push:
- d |= Mov; /* force writeback */
- dst.type = OP_MEM;
- dst.bytes = op_bytes;
- if ( mode_64bit() && (dst.bytes == 4) )
- dst.bytes = 8;
- dst.val = src.val;
- dst.mem.seg = x86_seg_ss;
- dst.mem.off = sp_pre_dec(dst.bytes);
- break;
-
- case 0x6c ... 0x6d: /* ins %dx,%es:%edi */ {
- unsigned long nr_reps = get_rep_prefix();
- dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
- dst.mem.seg = x86_seg_es;
- dst.mem.off = truncate_ea(_regs.edi);
- if ( (nr_reps > 1) && (ops->rep_ins != NULL) &&
- ((rc = ops->rep_ins((uint16_t)_regs.edx, dst.mem.seg,
- dst.mem.off, dst.bytes,
- &nr_reps, ctxt)) != X86EMUL_UNHANDLEABLE) )
- {
- if ( rc != 0 )
- goto done;
- }
- else
- {
- fail_if(ops->read_io == NULL);
- if ( (rc = ops->read_io((uint16_t)_regs.edx, dst.bytes,
- &dst.val, ctxt)) != 0 )
- goto done;
- dst.type = OP_MEM;
- nr_reps = 1;
- }
- register_address_increment(
- _regs.edi,
- nr_reps * ((_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes));
- put_rep_prefix(nr_reps);
- break;
- }
-
- case 0x6e ... 0x6f: /* outs %esi,%dx */ {
- unsigned long nr_reps = get_rep_prefix();
- dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
- if ( (nr_reps > 1) && (ops->rep_outs != NULL) &&
- ((rc = ops->rep_outs(ea.mem.seg, truncate_ea(_regs.esi),
- (uint16_t)_regs.edx, dst.bytes,
- &nr_reps, ctxt)) != X86EMUL_UNHANDLEABLE) )
- {
- if ( rc != 0 )
- goto done;
- }
- else
- {
- if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.esi),
- &dst.val, dst.bytes, ctxt)) != 0 )
- goto done;
- fail_if(ops->write_io == NULL);
- if ( (rc = ops->write_io((uint16_t)_regs.edx, dst.bytes,
- dst.val, ctxt)) != 0 )
- goto done;
- nr_reps = 1;
- }
- register_address_increment(
- _regs.esi,
- nr_reps * ((_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes));
- put_rep_prefix(nr_reps);
- break;
- }
-
- case 0x70 ... 0x7f: /* jcc (short) */ {
- int rel = insn_fetch_type(int8_t);
- if ( test_cc(b, _regs.eflags) )
- jmp_rel(rel);
- break;
- }
-
- case 0x90: /* nop / xchg %%r8,%%rax */
- if ( !(rex_prefix & 1) )
- break; /* nop */
-
- case 0x91 ... 0x97: /* xchg reg,%%rax */
- src.type = dst.type = OP_REG;
- src.bytes = dst.bytes = op_bytes;
- src.reg = (unsigned long *)&_regs.eax;
- src.val = *src.reg;
- dst.reg = decode_register(
- (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
- dst.val = *dst.reg;
- goto xchg;
-
- case 0x98: /* cbw/cwde/cdqe */
- switch ( op_bytes )
- {
- case 2: *(int16_t *)&_regs.eax = (int8_t)_regs.eax; break; /* cbw */
- case 4: _regs.eax = (uint32_t)(int16_t)_regs.eax; break; /* cwde */
- case 8: _regs.eax = (int32_t)_regs.eax; break; /* cdqe */
- }
- break;
-
- case 0x99: /* cwd/cdq/cqo */
- switch ( op_bytes )
- {
- case 2:
- *(int16_t *)&_regs.edx = ((int16_t)_regs.eax < 0) ? -1 : 0;
- break;
- case 4:
- _regs.edx = (uint32_t)(((int32_t)_regs.eax < 0) ? -1 : 0);
- break;
- case 8:
- _regs.edx = (_regs.eax < 0) ? -1 : 0;
- break;
- }
- break;
-
- case 0x9a: /* call (far, absolute) */ {
- struct segment_register reg;
- uint16_t sel;
- uint32_t eip;
-
- fail_if(ops->read_segment == NULL);
- generate_exception_if(mode_64bit(), EXC_UD, -1);
-
- eip = insn_fetch_bytes(op_bytes);
- sel = insn_fetch_type(uint16_t);
-
- if ( (rc = ops->read_segment(x86_seg_cs, &reg, ctxt)) ||
- (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
- reg.sel, op_bytes, ctxt)) ||
- (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
- _regs.eip, op_bytes, ctxt)) )
- goto done;
-
- if ( (rc = load_seg(x86_seg_cs, sel, ctxt, ops)) != 0 )
- goto done;
- _regs.eip = eip;
- break;
- }
-
- case 0x9b: /* wait/fwait */
- fail_if(ops->load_fpu_ctxt == NULL);
- ops->load_fpu_ctxt(ctxt);
- __emulate_fpu_insn("fwait");
- break;
-
- case 0x9c: /* pushf */
- src.val = _regs.eflags;
- goto push;
-
- case 0x9d: /* popf */ {
- uint32_t mask = EFLG_VIP | EFLG_VIF | EFLG_VM;
- if ( !mode_ring0() )
- mask |= EFLG_IOPL;
- if ( !mode_iopl() )
- mask |= EFLG_IF;
- /* 64-bit mode: POP defaults to a 64-bit operand. */
- if ( mode_64bit() && (op_bytes == 4) )
- op_bytes = 8;
- if ( (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes),
- &dst.val, op_bytes, ctxt)) != 0 )
- goto done;
- if ( op_bytes == 2 )
- dst.val = (uint16_t)dst.val | (_regs.eflags & 0xffff0000u);
- dst.val &= 0x257fd5;
- _regs.eflags &= mask;
- _regs.eflags |= (uint32_t)(dst.val & ~mask) | 0x02;
- break;
- }
-
- case 0x9e: /* sahf */
- *(uint8_t *)&_regs.eflags = (((uint8_t *)&_regs.eax)[1] & 0xd7) | 0x02;
- break;
-
- case 0x9f: /* lahf */
- ((uint8_t *)&_regs.eax)[1] = (_regs.eflags & 0xd7) | 0x02;
- break;
-
- case 0xa0 ... 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */
- /* Source EA is not encoded via ModRM. */
- dst.type = OP_REG;
- dst.reg = (unsigned long *)&_regs.eax;
- dst.bytes = (d & ByteOp) ? 1 : op_bytes;
- if ( (rc = ops->read(ea.mem.seg, insn_fetch_bytes(ad_bytes),
- &dst.val, dst.bytes, ctxt)) != 0 )
- goto done;
- break;
-
- case 0xa2 ... 0xa3: /* mov {%al,%ax,%eax,%rax},mem.offs */
- /* Destination EA is not encoded via ModRM. */
- dst.type = OP_MEM;
- dst.mem.seg = ea.mem.seg;
- dst.mem.off = insn_fetch_bytes(ad_bytes);
- dst.bytes = (d & ByteOp) ? 1 : op_bytes;
- dst.val = (unsigned long)_regs.eax;
- break;
-
- case 0xa4 ... 0xa5: /* movs */ {
- unsigned long nr_reps = get_rep_prefix();
- dst.bytes = (d & ByteOp) ? 1 : op_bytes;
- dst.mem.seg = x86_seg_es;
- dst.mem.off = truncate_ea(_regs.edi);
- if ( (nr_reps > 1) && (ops->rep_movs != NULL) &&
- ((rc = ops->rep_movs(ea.mem.seg, truncate_ea(_regs.esi),
- dst.mem.seg, dst.mem.off, dst.bytes,
- &nr_reps, ctxt)) != X86EMUL_UNHANDLEABLE) )
- {
- if ( rc != 0 )
- goto done;
- }
- else
- {
- if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.esi),
- &dst.val, dst.bytes, ctxt)) != 0 )
- goto done;
- dst.type = OP_MEM;
- nr_reps = 1;
- }
- register_address_increment(
- _regs.esi,
- nr_reps * ((_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes));
- register_address_increment(
- _regs.edi,
- nr_reps * ((_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes));
- put_rep_prefix(nr_reps);
- break;
- }
-
- case 0xa6 ... 0xa7: /* cmps */ {
- unsigned long next_eip = _regs.eip;
- get_rep_prefix();
- src.bytes = dst.bytes = (d & ByteOp) ? 1 : op_bytes;
- if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.esi),
- &dst.val, dst.bytes, ctxt)) ||
- (rc = ops->read(x86_seg_es, truncate_ea(_regs.edi),
- &src.val, src.bytes, ctxt)) )
- goto done;
- register_address_increment(
- _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
- register_address_increment(
- _regs.edi, (_regs.eflags & EFLG_DF) ? -src.bytes : src.bytes);
- put_rep_prefix(1);
- /* cmp: dst - src ==> src=*%%edi,dst=*%%esi ==> *%%esi - *%%edi */
- emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
- if ( ((rep_prefix == REPE_PREFIX) && !(_regs.eflags & EFLG_ZF)) ||
- ((rep_prefix == REPNE_PREFIX) && (_regs.eflags & EFLG_ZF)) )
- _regs.eip = next_eip;
- break;
- }
-
- case 0xaa ... 0xab: /* stos */ {
- /* unsigned long max_reps = */get_rep_prefix();
- dst.type = OP_MEM;
- dst.bytes = (d & ByteOp) ? 1 : op_bytes;
- dst.mem.seg = x86_seg_es;
- dst.mem.off = truncate_ea(_regs.edi);
- dst.val = _regs.eax;
- register_address_increment(
- _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
- put_rep_prefix(1);
- break;
- }
-
- case 0xac ... 0xad: /* lods */ {
- /* unsigned long max_reps = */get_rep_prefix();
- dst.type = OP_REG;
- dst.bytes = (d & ByteOp) ? 1 : op_bytes;
- dst.reg = (unsigned long *)&_regs.eax;
- if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.esi),
- &dst.val, dst.bytes, ctxt)) != 0 )
- goto done;
- register_address_increment(
- _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
- put_rep_prefix(1);
- break;
- }
-
- case 0xae ... 0xaf: /* scas */ {
- unsigned long next_eip = _regs.eip;
- get_rep_prefix();
- src.bytes = dst.bytes = (d & ByteOp) ? 1 : op_bytes;
- dst.val = _regs.eax;
- if ( (rc = ops->read(x86_seg_es, truncate_ea(_regs.edi),
- &src.val, src.bytes, ctxt)) != 0 )
- goto done;
- register_address_increment(
- _regs.edi, (_regs.eflags & EFLG_DF) ? -src.bytes : src.bytes);
- put_rep_prefix(1);
- /* cmp: dst - src ==> src=*%%edi,dst=%%eax ==> %%eax - *%%edi */
- emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
- if ( ((rep_prefix == REPE_PREFIX) && !(_regs.eflags & EFLG_ZF)) ||
- ((rep_prefix == REPNE_PREFIX) && (_regs.eflags & EFLG_ZF)) )
- _regs.eip = next_eip;
- break;
- }
-
- case 0xc2: /* ret imm16 (near) */
- case 0xc3: /* ret (near) */ {
- int offset = (b == 0xc2) ? insn_fetch_type(uint16_t) : 0;
- op_bytes = mode_64bit() ? 8 : op_bytes;
- if ( (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes + offset),
- &dst.val, op_bytes, ctxt)) != 0 )
- goto done;
- _regs.eip = dst.val;
- break;
- }
-
- case 0xc8: /* enter imm16,imm8 */ {
- uint16_t size = insn_fetch_type(uint16_t);
- uint8_t depth = insn_fetch_type(uint8_t) & 31;
- int i;
-
- dst.type = OP_REG;
- dst.bytes = (mode_64bit() && (op_bytes == 4)) ? 8 : op_bytes;
- dst.reg = (unsigned long *)&_regs.ebp;
- if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes),
- _regs.ebp, dst.bytes, ctxt)) )
- goto done;
- dst.val = _regs.esp;
-
- if ( depth > 0 )
- {
- for ( i = 1; i < depth; i++ )
- {
- unsigned long ebp, temp_data;
- ebp = truncate_word(_regs.ebp - i*dst.bytes, ctxt->sp_size/8);
- if ( (rc = ops->read(x86_seg_ss, ebp,
- &temp_data, dst.bytes, ctxt)) ||
- (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes),
- temp_data, dst.bytes, ctxt)) )
- goto done;
- }
- if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes),
- dst.val, dst.bytes, ctxt)) )
- goto done;
- }
-
- sp_pre_dec(size);
- break;
- }
-
- case 0xc9: /* leave */
- /* First writeback, to %%esp. */
- dst.type = OP_REG;
- dst.bytes = (mode_64bit() && (op_bytes == 4)) ? 8 : op_bytes;
- dst.reg = (unsigned long *)&_regs.esp;
- dst.val = _regs.ebp;
-
- /* Flush first writeback, since there is a second. */
- switch ( dst.bytes )
- {
- case 1: *(uint8_t *)dst.reg = (uint8_t)dst.val; break;
- case 2: *(uint16_t *)dst.reg = (uint16_t)dst.val; break;
- case 4: *dst.reg = (uint32_t)dst.val; break; /* 64b: zero-ext */
- case 8: *dst.reg = dst.val; break;
- }
-
- /* Second writeback, to %%ebp. */
- dst.reg = (unsigned long *)&_regs.ebp;
- if ( (rc = ops->read(x86_seg_ss, sp_post_inc(dst.bytes),
- &dst.val, dst.bytes, ctxt)) )
- goto done;
- break;
-
- case 0xca: /* ret imm16 (far) */
- case 0xcb: /* ret (far) */ {
- int offset = (b == 0xca) ? insn_fetch_type(uint16_t) : 0;
- op_bytes = mode_64bit() ? 8 : op_bytes;
- if ( (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes),
- &dst.val, op_bytes, ctxt)) ||
- (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes + offset),
- &src.val, op_bytes, ctxt)) ||
- (rc = load_seg(x86_seg_cs, (uint16_t)src.val, ctxt, ops)) )
- goto done;
- _regs.eip = dst.val;
- break;
- }
-
- case 0xcc: /* int3 */
- src.val = EXC_BP;
- goto swint;
-
- case 0xcd: /* int imm8 */
- src.val = insn_fetch_type(uint8_t);
- swint:
- fail_if(ops->inject_sw_interrupt == NULL);
- rc = ops->inject_sw_interrupt(src.val, _regs.eip - ctxt->regs->eip,
- ctxt) ? : X86EMUL_EXCEPTION;
- goto done;
-
- case 0xce: /* into */
- generate_exception_if(mode_64bit(), EXC_UD, -1);
- if ( !(_regs.eflags & EFLG_OF) )
- break;
- src.val = EXC_OF;
- goto swint;
-
- case 0xcf: /* iret */ {
- unsigned long cs, eip, eflags;
- uint32_t mask = EFLG_VIP | EFLG_VIF | EFLG_VM;
- if ( !mode_ring0() )
- mask |= EFLG_IOPL;
- if ( !mode_iopl() )
- mask |= EFLG_IF;
- fail_if(!in_realmode(ctxt, ops));
- if ( (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes),
- &eip, op_bytes, ctxt)) ||
- (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes),
- &cs, op_bytes, ctxt)) ||
- (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes),
- &eflags, op_bytes, ctxt)) )
- goto done;
- if ( op_bytes == 2 )
- eflags = (uint16_t)eflags | (_regs.eflags & 0xffff0000u);
- eflags &= 0x257fd5;
- _regs.eflags &= mask;
- _regs.eflags |= (uint32_t)(eflags & ~mask) | 0x02;
- _regs.eip = eip;
- if ( (rc = load_seg(x86_seg_cs, (uint16_t)cs, ctxt, ops)) != 0 )
- goto done;
- break;
- }
-
- case 0xd4: /* aam */ {
- unsigned int base = insn_fetch_type(uint8_t);
- uint8_t al = _regs.eax;
- generate_exception_if(mode_64bit(), EXC_UD, -1);
- generate_exception_if(base == 0, EXC_DE, -1);
- *(uint16_t *)&_regs.eax = ((al / base) << 8) | (al % base);
- _regs.eflags &= ~(EFLG_SF|EFLG_ZF|EFLG_PF);
- _regs.eflags |= ((uint8_t)_regs.eax == 0) ? EFLG_ZF : 0;
- _regs.eflags |= (( int8_t)_regs.eax < 0) ? EFLG_SF : 0;
- _regs.eflags |= even_parity(_regs.eax) ? EFLG_PF : 0;
- break;
- }
-
- case 0xd5: /* aad */ {
- unsigned int base = insn_fetch_type(uint8_t);
- uint16_t ax = _regs.eax;
- generate_exception_if(mode_64bit(), EXC_UD, -1);
- *(uint16_t *)&_regs.eax = (uint8_t)(ax + ((ax >> 8) * base));
- _regs.eflags &= ~(EFLG_SF|EFLG_ZF|EFLG_PF);
- _regs.eflags |= ((uint8_t)_regs.eax == 0) ? EFLG_ZF : 0;
- _regs.eflags |= (( int8_t)_regs.eax < 0) ? EFLG_SF : 0;
- _regs.eflags |= even_parity(_regs.eax) ? EFLG_PF : 0;
- break;
- }
-
- case 0xd6: /* salc */
- generate_exception_if(mode_64bit(), EXC_UD, -1);
- *(uint8_t *)&_regs.eax = (_regs.eflags & EFLG_CF) ? 0xff : 0x00;
- break;
-
- case 0xd7: /* xlat */ {
- unsigned long al = (uint8_t)_regs.eax;
- if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.ebx + al),
- &al, 1, ctxt)) != 0 )
- goto done;
- *(uint8_t *)&_regs.eax = al;
- break;
- }
-
- case 0xd9: /* FPU 0xd9 */
- fail_if(ops->load_fpu_ctxt == NULL);
- ops->load_fpu_ctxt(ctxt);
- switch ( modrm )
- {
- case 0xc0: __emulate_fpu_insn(".byte 0xd9,0xc0"); break;
- case 0xc1: __emulate_fpu_insn(".byte 0xd9,0xc1"); break;
- case 0xc2: __emulate_fpu_insn(".byte 0xd9,0xc2"); break;
- case 0xc3: __emulate_fpu_insn(".byte 0xd9,0xc3"); break;
- case 0xc4: __emulate_fpu_insn(".byte 0xd9,0xc4"); break;
- case 0xc5: __emulate_fpu_insn(".byte 0xd9,0xc5"); break;
- case 0xc6: __emulate_fpu_insn(".byte 0xd9,0xc6"); break;
- case 0xc7: __emulate_fpu_insn(".byte 0xd9,0xc7"); break;
- case 0xe0: __emulate_fpu_insn(".byte 0xd9,0xe0"); break;
- case 0xe8: __emulate_fpu_insn(".byte 0xd9,0xe8"); break;
- case 0xee: __emulate_fpu_insn(".byte 0xd9,0xee"); break;
- default:
- fail_if((modrm_reg & 7) != 7);
- fail_if(modrm >= 0xc0);
- /* fnstcw m2byte */
- ea.bytes = 2;
- dst = ea;
- asm volatile ( "fnstcw %0" : "=m" (dst.val) );
- }
- break;
-
- case 0xdb: /* FPU 0xdb */
- fail_if(ops->load_fpu_ctxt == NULL);
- ops->load_fpu_ctxt(ctxt);
- fail_if(modrm != 0xe3);
- /* fninit */
- asm volatile ( "fninit" );
- break;
-
- case 0xdd: /* FPU 0xdd */
- fail_if(ops->load_fpu_ctxt == NULL);
- ops->load_fpu_ctxt(ctxt);
- fail_if((modrm_reg & 7) != 7);
- fail_if(modrm >= 0xc0);
- /* fnstsw m2byte */
- ea.bytes = 2;
- dst = ea;
- asm volatile ( "fnstsw %0" : "=m" (dst.val) );
- break;
-
- case 0xde: /* FPU 0xde */
- fail_if(ops->load_fpu_ctxt == NULL);
- ops->load_fpu_ctxt(ctxt);
- switch ( modrm )
- {
- case 0xd9: __emulate_fpu_insn(".byte 0xde,0xd9"); break;
- case 0xf8: __emulate_fpu_insn(".byte 0xde,0xf8"); break;
- case 0xf9: __emulate_fpu_insn(".byte 0xde,0xf9"); break;
- case 0xfa: __emulate_fpu_insn(".byte 0xde,0xfa"); break;
- case 0xfb: __emulate_fpu_insn(".byte 0xde,0xfb"); break;
- case 0xfc: __emulate_fpu_insn(".byte 0xde,0xfc"); break;
- case 0xfd: __emulate_fpu_insn(".byte 0xde,0xfd"); break;
- case 0xfe: __emulate_fpu_insn(".byte 0xde,0xfe"); break;
- case 0xff: __emulate_fpu_insn(".byte 0xde,0xff"); break;
- default: goto cannot_emulate;
- }
- break;
-
- case 0xdf: /* FPU 0xdf */
- fail_if(ops->load_fpu_ctxt == NULL);
- ops->load_fpu_ctxt(ctxt);
- fail_if(modrm != 0xe0);
- /* fnstsw %ax */
- dst.bytes = 2;
- dst.type = OP_REG;
- dst.reg = (unsigned long *)&_regs.eax;
- asm volatile ( "fnstsw %0" : "=m" (dst.val) );
- break;
-
- case 0xe0 ... 0xe2: /* loop{,z,nz} */ {
- int rel = insn_fetch_type(int8_t);
- int do_jmp = !(_regs.eflags & EFLG_ZF); /* loopnz */
- if ( b == 0xe1 )
- do_jmp = !do_jmp; /* loopz */
- else if ( b == 0xe2 )
- do_jmp = 1; /* loop */
- switch ( ad_bytes )
- {
- case 2:
- do_jmp &= --(*(uint16_t *)&_regs.ecx) != 0;
- break;
- case 4:
- do_jmp &= --(*(uint32_t *)&_regs.ecx) != 0;
- _regs.ecx = (uint32_t)_regs.ecx; /* zero extend in x86/64 mode */
- break;
- default: /* case 8: */
- do_jmp &= --_regs.ecx != 0;
- break;
- }
- if ( do_jmp )
- jmp_rel(rel);
- break;
- }
-
- case 0xe3: /* jcxz/jecxz (short) */ {
- int rel = insn_fetch_type(int8_t);
- if ( (ad_bytes == 2) ? !(uint16_t)_regs.ecx :
- (ad_bytes == 4) ? !(uint32_t)_regs.ecx : !_regs.ecx )
- jmp_rel(rel);
- break;
- }
-
- case 0xe4: /* in imm8,%al */
- case 0xe5: /* in imm8,%eax */
- case 0xe6: /* out %al,imm8 */
- case 0xe7: /* out %eax,imm8 */
- case 0xec: /* in %dx,%al */
- case 0xed: /* in %dx,%eax */
- case 0xee: /* out %al,%dx */
- case 0xef: /* out %eax,%dx */ {
- unsigned int port = ((b < 0xe8)
- ? insn_fetch_type(uint8_t)
- : (uint16_t)_regs.edx);
- op_bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
- if ( b & 2 )
- {
- /* out */
- fail_if(ops->write_io == NULL);
- rc = ops->write_io(port, op_bytes, _regs.eax, ctxt);
-
- }
- else
- {
- /* in */
- dst.type = OP_REG;
- dst.bytes = op_bytes;
- dst.reg = (unsigned long *)&_regs.eax;
- fail_if(ops->read_io == NULL);
- rc = ops->read_io(port, dst.bytes, &dst.val, ctxt);
- }
- if ( rc != 0 )
- goto done;
- break;
- }
-
- case 0xe8: /* call (near) */ {
- int rel = (((op_bytes == 2) && !mode_64bit())
- ? (int32_t)insn_fetch_type(int16_t)
- : insn_fetch_type(int32_t));
- op_bytes = mode_64bit() ? 8 : op_bytes;
- src.val = _regs.eip;
- jmp_rel(rel);
- goto push;
- }
-
- case 0xe9: /* jmp (near) */ {
- int rel = (((op_bytes == 2) && !mode_64bit())
- ? (int32_t)insn_fetch_type(int16_t)
- : insn_fetch_type(int32_t));
- jmp_rel(rel);
- break;
- }
-
- case 0xea: /* jmp (far, absolute) */ {
- uint16_t sel;
- uint32_t eip;
- generate_exception_if(mode_64bit(), EXC_UD, -1);
- eip = insn_fetch_bytes(op_bytes);
- sel = insn_fetch_type(uint16_t);
- if ( (rc = load_seg(x86_seg_cs, sel, ctxt, ops)) != 0 )
- goto done;
- _regs.eip = eip;
- break;
- }
-
- case 0xeb: /* jmp (short) */ {
- int rel = insn_fetch_type(int8_t);
- jmp_rel(rel);
- break;
- }
-
- case 0xf1: /* int1 (icebp) */
- src.val = EXC_DB;
- goto swint;
-
- case 0xf4: /* hlt */
- ctxt->retire.flags.hlt = 1;
- break;
-
- case 0xf5: /* cmc */
- _regs.eflags ^= EFLG_CF;
- break;
-
- case 0xf8: /* clc */
- _regs.eflags &= ~EFLG_CF;
- break;
-
- case 0xf9: /* stc */
- _regs.eflags |= EFLG_CF;
- break;
-
- case 0xfa: /* cli */
- generate_exception_if(!mode_iopl(), EXC_GP, 0);
- _regs.eflags &= ~EFLG_IF;
- break;
-
- case 0xfb: /* sti */
- generate_exception_if(!mode_iopl(), EXC_GP, 0);
- if ( !(_regs.eflags & EFLG_IF) )
- {
- _regs.eflags |= EFLG_IF;
- ctxt->retire.flags.sti = 1;
- }
- break;
-
- case 0xfc: /* cld */
- _regs.eflags &= ~EFLG_DF;
- break;
-
- case 0xfd: /* std */
- _regs.eflags |= EFLG_DF;
- break;
- }
- goto writeback;
-
- twobyte_insn:
- switch ( b )
- {
- case 0x40 ... 0x4f: /* cmovcc */
- dst.val = src.val;
- if ( !test_cc(b, _regs.eflags) )
- dst.type = OP_NONE;
- break;
-
- case 0x90 ... 0x9f: /* setcc */
- dst.val = test_cc(b, _regs.eflags);
- break;
-
- case 0xb0 ... 0xb1: /* cmpxchg */
- /* Save real source value, then compare EAX against destination. */
- src.orig_val = src.val;
- src.val = _regs.eax;
- emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
- if ( _regs.eflags & EFLG_ZF )
- {
- /* Success: write back to memory. */
- dst.val = src.orig_val;
- }
- else
- {
- /* Failure: write the value we saw to EAX. */
- dst.type = OP_REG;
- dst.reg = (unsigned long *)&_regs.eax;
- }
- break;
-
- case 0xa3: bt: /* bt */
- emulate_2op_SrcV_nobyte("bt", src, dst, _regs.eflags);
- break;
-
- case 0xa4: /* shld imm8,r,r/m */
- case 0xa5: /* shld %%cl,r,r/m */
- case 0xac: /* shrd imm8,r,r/m */
- case 0xad: /* shrd %%cl,r,r/m */ {
- uint8_t shift, width = dst.bytes << 3;
- shift = (b & 1) ? (uint8_t)_regs.ecx : insn_fetch_type(uint8_t);
- if ( (shift &= width - 1) == 0 )
- break;
- dst.orig_val = truncate_word(dst.val, dst.bytes);
- dst.val = ((shift == width) ? src.val :
- (b & 8) ?
- /* shrd */
- ((dst.orig_val >> shift) |
- truncate_word(src.val << (width - shift), dst.bytes)) :
- /* shld */
- ((dst.orig_val << shift) |
- ((src.val >> (width - shift)) & ((1ull << shift) - 1))));
- dst.val = truncate_word(dst.val, dst.bytes);
- _regs.eflags &= ~(EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_PF|EFLG_CF);
- if ( (dst.val >> ((b & 8) ? (shift - 1) : (width - shift))) & 1 )
- _regs.eflags |= EFLG_CF;
- if ( ((dst.val ^ dst.orig_val) >> (width - 1)) & 1 )
- _regs.eflags |= EFLG_OF;
- _regs.eflags |= ((dst.val >> (width - 1)) & 1) ? EFLG_SF : 0;
- _regs.eflags |= (dst.val == 0) ? EFLG_ZF : 0;
- _regs.eflags |= even_parity(dst.val) ? EFLG_PF : 0;
- break;
- }
-
- case 0xb3: btr: /* btr */
- emulate_2op_SrcV_nobyte("btr", src, dst, _regs.eflags);
- break;
-
- case 0xab: bts: /* bts */
- emulate_2op_SrcV_nobyte("bts", src, dst, _regs.eflags);
- break;
-
- case 0xaf: /* imul */
- _regs.eflags &= ~(EFLG_OF|EFLG_CF);
- switch ( dst.bytes )
- {
- case 2:
- dst.val = ((uint32_t)(int16_t)src.val *
- (uint32_t)(int16_t)dst.val);
- if ( (int16_t)dst.val != (uint32_t)dst.val )
- _regs.eflags |= EFLG_OF|EFLG_CF;
- break;
-#ifdef __x86_64__
- case 4:
- dst.val = ((uint64_t)(int32_t)src.val *
- (uint64_t)(int32_t)dst.val);
- if ( (int32_t)dst.val != dst.val )
- _regs.eflags |= EFLG_OF|EFLG_CF;
- break;
-#endif
- default: {
- unsigned long m[2] = { src.val, dst.val };
- if ( imul_dbl(m) )
- _regs.eflags |= EFLG_OF|EFLG_CF;
- dst.val = m[0];
- break;
- }
- }
- break;
-
- case 0xb2: /* lss */
- dst.val = x86_seg_ss;
- goto les;
-
- case 0xb4: /* lfs */
- dst.val = x86_seg_fs;
- goto les;
-
- case 0xb5: /* lgs */
- dst.val = x86_seg_gs;
- goto les;
-
- case 0xb6: /* movzx rm8,r{16,32,64} */
- /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */
- dst.reg = decode_register(modrm_reg, &_regs, 0);
- dst.bytes = op_bytes;
- dst.val = (uint8_t)src.val;
- break;
-
- case 0xbc: /* bsf */ {
- int zf;
- asm ( "bsf %2,%0; setz %b1"
- : "=r" (dst.val), "=q" (zf)
- : "r" (src.val), "1" (0) );
- _regs.eflags &= ~EFLG_ZF;
- _regs.eflags |= zf ? EFLG_ZF : 0;
- break;
- }
-
- case 0xbd: /* bsr */ {
- int zf;
- asm ( "bsr %2,%0; setz %b1"
- : "=r" (dst.val), "=q" (zf)
- : "r" (src.val), "1" (0) );
- _regs.eflags &= ~EFLG_ZF;
- _regs.eflags |= zf ? EFLG_ZF : 0;
- break;
- }
-
- case 0xb7: /* movzx rm16,r{16,32,64} */
- dst.val = (uint16_t)src.val;
- break;
-
- case 0xbb: btc: /* btc */
- emulate_2op_SrcV_nobyte("btc", src, dst, _regs.eflags);
- break;
-
- case 0xba: /* Grp8 */
- switch ( modrm_reg & 7 )
- {
- case 4: goto bt;
- case 5: goto bts;
- case 6: goto btr;
- case 7: goto btc;
- default: generate_exception_if(1, EXC_UD, -1);
- }
- break;
-
- case 0xbe: /* movsx rm8,r{16,32,64} */
- /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */
- dst.reg = decode_register(modrm_reg, &_regs, 0);
- dst.bytes = op_bytes;
- dst.val = (int8_t)src.val;
- break;
-
- case 0xbf: /* movsx rm16,r{16,32,64} */
- dst.val = (int16_t)src.val;
- break;
-
- case 0xc0 ... 0xc1: /* xadd */
- /* Write back the register source. */
- switch ( dst.bytes )
- {
- case 1: *(uint8_t *)src.reg = (uint8_t)dst.val; break;
- case 2: *(uint16_t *)src.reg = (uint16_t)dst.val; break;
- case 4: *src.reg = (uint32_t)dst.val; break; /* 64b reg: zero-extend */
- case 8: *src.reg = dst.val; break;
- }
- goto add;
- }
- goto writeback;
-
- twobyte_special_insn:
- switch ( b )
- {
- case 0x01: /* Grp7 */ {
- struct segment_register reg;
- unsigned long base, limit, cr0, cr0w;
-
- if ( modrm == 0xdf ) /* invlpga */
- {
- generate_exception_if(in_realmode(ctxt, ops), EXC_UD, -1);
- generate_exception_if(!mode_ring0(), EXC_GP, 0);
- fail_if(ops->invlpg == NULL);
- if ( (rc = ops->invlpg(x86_seg_none, truncate_ea(_regs.eax),
- ctxt)) )
- goto done;
- break;
- }
-
- switch ( modrm_reg & 7 )
- {
- case 0: /* sgdt */
- case 1: /* sidt */
- generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
- fail_if(ops->read_segment == NULL);
- if ( (rc = ops->read_segment((modrm_reg & 1) ?
- x86_seg_idtr : x86_seg_gdtr,
- &reg, ctxt)) )
- goto done;
- if ( op_bytes == 2 )
- reg.base &= 0xffffff;
- if ( (rc = ops->write(ea.mem.seg, ea.mem.off+0,
- reg.limit, 2, ctxt)) ||
- (rc = ops->write(ea.mem.seg, ea.mem.off+2,
- reg.base, mode_64bit() ? 8 : 4, ctxt)) )
- goto done;
- break;
- case 2: /* lgdt */
- case 3: /* lidt */
- generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
- fail_if(ops->write_segment == NULL);
- memset(&reg, 0, sizeof(reg));
- if ( (rc = ops->read(ea.mem.seg, ea.mem.off+0,
- &limit, 2, ctxt)) ||
- (rc = ops->read(ea.mem.seg, ea.mem.off+2,
- &base, mode_64bit() ? 8 : 4, ctxt)) )
- goto done;
- reg.base = base;
- reg.limit = limit;
- if ( op_bytes == 2 )
- reg.base &= 0xffffff;
- if ( (rc = ops->write_segment((modrm_reg & 1) ?
- x86_seg_idtr : x86_seg_gdtr,
- &reg, ctxt)) )
- goto done;
- break;
- case 4: /* smsw */
- ea.bytes = 2;
- dst = ea;
- fail_if(ops->read_cr == NULL);
- if ( (rc = ops->read_cr(0, &dst.val, ctxt)) )
- goto done;
- d |= Mov; /* force writeback */
- break;
- case 6: /* lmsw */
- fail_if(ops->read_cr == NULL);
- fail_if(ops->write_cr == NULL);
- if ( (rc = ops->read_cr(0, &cr0, ctxt)) )
- goto done;
- if ( ea.type == OP_REG )
- cr0w = *ea.reg;
- else if ( (rc = ops->read(ea.mem.seg, ea.mem.off,
- &cr0w, 2, ctxt)) )
- goto done;
- cr0 &= 0xffff0000;
- cr0 |= (uint16_t)cr0w;
- if ( (rc = ops->write_cr(0, cr0, ctxt)) )
- goto done;
- break;
- case 7: /* invlpg */
- generate_exception_if(!mode_ring0(), EXC_GP, 0);
- generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
- fail_if(ops->invlpg == NULL);
- if ( (rc = ops->invlpg(ea.mem.seg, ea.mem.off, ctxt)) )
- goto done;
- break;
- default:
- goto cannot_emulate;
- }
- break;
- }
-
- case 0x06: /* clts */
- generate_exception_if(!mode_ring0(), EXC_GP, 0);
- fail_if((ops->read_cr == NULL) || (ops->write_cr == NULL));
- if ( (rc = ops->read_cr(0, &dst.val, ctxt)) ||
- (rc = ops->write_cr(0, dst.val&~8, ctxt)) )
- goto done;
- break;
-
- case 0x08: /* invd */
- case 0x09: /* wbinvd */
- generate_exception_if(!mode_ring0(), EXC_GP, 0);
- fail_if(ops->wbinvd == NULL);
- if ( (rc = ops->wbinvd(ctxt)) != 0 )
- goto done;
- break;
-
- case 0x0d: /* GrpP (prefetch) */
- case 0x18: /* Grp16 (prefetch/nop) */
- case 0x19 ... 0x1f: /* nop (amd-defined) */
- break;
-
- case 0x20: /* mov cr,reg */
- case 0x21: /* mov dr,reg */
- case 0x22: /* mov reg,cr */
- case 0x23: /* mov reg,dr */
- generate_exception_if(ea.type != OP_REG, EXC_UD, -1);
- generate_exception_if(!mode_ring0(), EXC_GP, 0);
- modrm_reg |= lock_prefix << 3;
- if ( b & 2 )
- {
- /* Write to CR/DR. */
- src.val = *(unsigned long *)decode_register(modrm_rm, &_regs, 0);
- if ( !mode_64bit() )
- src.val = (uint32_t)src.val;
- rc = ((b & 1)
- ? (ops->write_dr
- ? ops->write_dr(modrm_reg, src.val, ctxt)
- : X86EMUL_UNHANDLEABLE)
- : (ops->write_cr
- ? ops->write_cr(modrm_reg, src.val, ctxt)
- : X86EMUL_UNHANDLEABLE));
- }
- else
- {
- /* Read from CR/DR. */
- dst.type = OP_REG;
- dst.bytes = mode_64bit() ? 8 : 4;
- dst.reg = decode_register(modrm_rm, &_regs, 0);
- rc = ((b & 1)
- ? (ops->read_dr
- ? ops->read_dr(modrm_reg, &dst.val, ctxt)
- : X86EMUL_UNHANDLEABLE)
- : (ops->read_cr
- ? ops->read_cr(modrm_reg, &dst.val, ctxt)
- : X86EMUL_UNHANDLEABLE));
- }
- if ( rc != 0 )
- goto done;
- break;
-
- case 0x30: /* wrmsr */ {
- uint64_t val = ((uint64_t)_regs.edx << 32) | (uint32_t)_regs.eax;
- generate_exception_if(!mode_ring0(), EXC_GP, 0);
- fail_if(ops->write_msr == NULL);
- if ( (rc = ops->write_msr((uint32_t)_regs.ecx, val, ctxt)) != 0 )
- goto done;
- break;
- }
-
- case 0x31: /* rdtsc */ {
- unsigned long cr4;
- uint64_t val;
- fail_if(ops->read_cr == NULL);
- if ( (rc = ops->read_cr(4, &cr4, ctxt)) )
- goto done;
- generate_exception_if((cr4 & CR4_TSD) && !mode_ring0(), EXC_GP, 0);
- fail_if(ops->read_msr == NULL);
- if ( (rc = ops->read_msr(MSR_TSC, &val, ctxt)) != 0 )
- goto done;
- _regs.edx = (uint32_t)(val >> 32);
- _regs.eax = (uint32_t)(val >> 0);
- break;
- }
-
- case 0x32: /* rdmsr */ {
- uint64_t val;
- generate_exception_if(!mode_ring0(), EXC_GP, 0);
- fail_if(ops->read_msr == NULL);
- if ( (rc = ops->read_msr((uint32_t)_regs.ecx, &val, ctxt)) != 0 )
- goto done;
- _regs.edx = (uint32_t)(val >> 32);
- _regs.eax = (uint32_t)(val >> 0);
- break;
- }
-
- case 0x80 ... 0x8f: /* jcc (near) */ {
- int rel = (((op_bytes == 2) && !mode_64bit())
- ? (int32_t)insn_fetch_type(int16_t)
- : insn_fetch_type(int32_t));
- if ( test_cc(b, _regs.eflags) )
- jmp_rel(rel);
- break;
- }
-
- case 0xa0: /* push %%fs */
- src.val = x86_seg_fs;
- goto push_seg;
-
- case 0xa1: /* pop %%fs */
- src.val = x86_seg_fs;
- goto pop_seg;
-
- case 0xa2: /* cpuid */ {
- unsigned int eax = _regs.eax, ebx = _regs.ebx;
- unsigned int ecx = _regs.ecx, edx = _regs.edx;
- fail_if(ops->cpuid == NULL);
- if ( (rc = ops->cpuid(&eax, &ebx, &ecx, &edx, ctxt)) != 0 )
- goto done;
- _regs.eax = eax; _regs.ebx = ebx;
- _regs.ecx = ecx; _regs.edx = edx;
- break;
- }
-
- case 0xa8: /* push %%gs */
- src.val = x86_seg_gs;
- goto push_seg;
-
- case 0xa9: /* pop %%gs */
- src.val = x86_seg_gs;
- goto pop_seg;
-
- case 0xc7: /* Grp9 (cmpxchg8b) */
-#if defined(__i386__)
- {
- unsigned long old_lo, old_hi;
- generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1);
- generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
- if ( (rc = ops->read(ea.mem.seg, ea.mem.off+0, &old_lo, 4, ctxt)) ||
- (rc = ops->read(ea.mem.seg, ea.mem.off+4, &old_hi, 4, ctxt)) )
- goto done;
- if ( (old_lo != _regs.eax) || (old_hi != _regs.edx) )
- {
- _regs.eax = old_lo;
- _regs.edx = old_hi;
- _regs.eflags &= ~EFLG_ZF;
- }
- else if ( ops->cmpxchg8b == NULL )
- {
- rc = X86EMUL_UNHANDLEABLE;
- goto done;
- }
- else
- {
- if ( (rc = ops->cmpxchg8b(ea.mem.seg, ea.mem.off, old_lo, old_hi,
- _regs.ebx, _regs.ecx, ctxt)) != 0 )
- goto done;
- _regs.eflags |= EFLG_ZF;
- }
- break;
- }
-#elif defined(__x86_64__)
- {
- unsigned long old, new;
- generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1);
- generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
- if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &old, 8, ctxt)) != 0 )
- goto done;
- if ( ((uint32_t)(old>>0) != (uint32_t)_regs.eax) ||
- ((uint32_t)(old>>32) != (uint32_t)_regs.edx) )
- {
- _regs.eax = (uint32_t)(old>>0);
- _regs.edx = (uint32_t)(old>>32);
- _regs.eflags &= ~EFLG_ZF;
- }
- else
- {
- new = (_regs.ecx<<32)|(uint32_t)_regs.ebx;
- if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old,
- new, 8, ctxt)) != 0 )
- goto done;
- _regs.eflags |= EFLG_ZF;
- }
- break;
- }
-#endif
-
- case 0xc8 ... 0xcf: /* bswap */
- dst.type = OP_REG;
- dst.reg = decode_register(
- (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
- switch ( dst.bytes = op_bytes )
- {
- default: /* case 2: */
- /* Undefined behaviour. Writes zero on all tested CPUs. */
- dst.val = 0;
- break;
- case 4:
-#ifdef __x86_64__
- asm ( "bswap %k0" : "=r" (dst.val) : "0" (*dst.reg) );
- break;
- case 8:
-#endif
- asm ( "bswap %0" : "=r" (dst.val) : "0" (*dst.reg) );
- break;
- }
- break;
- }
- goto writeback;
- cannot_emulate:
-#if 0
- gdprintk(XENLOG_DEBUG, "Instr:");
- for ( ea.mem.off = ctxt->regs->eip; ea.mem.off < _regs.eip; ea.mem.off++ )
- {
- unsigned long x;
- ops->insn_fetch(x86_seg_cs, ea.mem.off, &x, 1, ctxt);
- printk(" %02x", (uint8_t)x);
- }
- printk("\n");
-#endif
- return X86EMUL_UNHANDLEABLE;
-}
+#include "x86_emulate/x86_emulate.c"
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
new file mode 100644
index 0000000000..9983a72904
--- /dev/null
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -0,0 +1,3429 @@
+/******************************************************************************
+ * x86_emulate.c
+ *
+ * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
+ *
+ * Copyright (c) 2005-2007 Keir Fraser
+ * Copyright (c) 2005-2007 XenSource Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* Operand sizes: 8-bit operands or specified/overridden size. */
+#define ByteOp (1<<0) /* 8-bit operands. */
+/* Destination operand type. */
+#define DstBitBase (0<<1) /* Memory operand, bit string. */
+#define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */
+#define DstReg (2<<1) /* Register operand. */
+#define DstMem (3<<1) /* Memory operand. */
+#define DstMask (3<<1)
+/* Source operand type. */
+#define SrcNone (0<<3) /* No source operand. */
+#define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */
+#define SrcReg (1<<3) /* Register operand. */
+#define SrcMem (2<<3) /* Memory operand. */
+#define SrcMem16 (3<<3) /* Memory operand (16-bit). */
+#define SrcImm (4<<3) /* Immediate operand. */
+#define SrcImmByte (5<<3) /* 8-bit sign-extended immediate operand. */
+#define SrcMask (7<<3)
+/* Generic ModRM decode. */
+#define ModRM (1<<6)
+/* Destination is only written; never read. */
+#define Mov (1<<7)
+
+static uint8_t opcode_table[256] = {
+ /* 0x00 - 0x07 */
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+ ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, ImplicitOps,
+ /* 0x08 - 0x0F */
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+ ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, 0,
+ /* 0x10 - 0x17 */
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+ ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, ImplicitOps,
+ /* 0x18 - 0x1F */
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+ ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, ImplicitOps,
+ /* 0x20 - 0x27 */
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+ ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps,
+ /* 0x28 - 0x2F */
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+ ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps,
+ /* 0x30 - 0x37 */
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+ ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps,
+ /* 0x38 - 0x3F */
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+ ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps,
+ /* 0x40 - 0x4F */
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ /* 0x50 - 0x5F */
+ ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
+ ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
+ ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
+ ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
+ /* 0x60 - 0x67 */
+ ImplicitOps, ImplicitOps, DstReg|SrcMem|ModRM, DstReg|SrcMem16|ModRM|Mov,
+ 0, 0, 0, 0,
+ /* 0x68 - 0x6F */
+ ImplicitOps|Mov, DstReg|SrcImm|ModRM|Mov,
+ ImplicitOps|Mov, DstReg|SrcImmByte|ModRM|Mov,
+ ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
+ /* 0x70 - 0x77 */
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ /* 0x78 - 0x7F */
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ /* 0x80 - 0x87 */
+ ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
+ ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM,
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ /* 0x88 - 0x8F */
+ ByteOp|DstMem|SrcReg|ModRM|Mov, DstMem|SrcReg|ModRM|Mov,
+ ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ DstMem|SrcReg|ModRM|Mov, DstReg|SrcNone|ModRM,
+ DstReg|SrcMem|ModRM|Mov, DstMem|SrcNone|ModRM|Mov,
+ /* 0x90 - 0x97 */
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ /* 0x98 - 0x9F */
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ /* 0xA0 - 0xA7 */
+ ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
+ ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
+ ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
+ ByteOp|ImplicitOps, ImplicitOps,
+ /* 0xA8 - 0xAF */
+ ByteOp|DstReg|SrcImm, DstReg|SrcImm,
+ ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
+ ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
+ ByteOp|ImplicitOps, ImplicitOps,
+ /* 0xB0 - 0xB7 */
+ ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
+ ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
+ ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
+ ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
+ /* 0xB8 - 0xBF */
+ DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov,
+ DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov,
+ /* 0xC0 - 0xC7 */
+ ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM,
+ ImplicitOps, ImplicitOps,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ ByteOp|DstMem|SrcImm|ModRM|Mov, DstMem|SrcImm|ModRM|Mov,
+ /* 0xC8 - 0xCF */
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ /* 0xD0 - 0xD7 */
+ ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM,
+ ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM,
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ /* 0xD8 - 0xDF */
+ 0, ImplicitOps|ModRM|Mov, 0, ImplicitOps|ModRM|Mov,
+ 0, ImplicitOps|ModRM|Mov, ImplicitOps|ModRM|Mov, ImplicitOps|ModRM|Mov,
+ /* 0xE0 - 0xE7 */
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ /* 0xE8 - 0xEF */
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ /* 0xF0 - 0xF7 */
+ 0, ImplicitOps, 0, 0,
+ ImplicitOps, ImplicitOps,
+ ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM,
+ /* 0xF8 - 0xFF */
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ ImplicitOps, ImplicitOps, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM
+};
+
+static uint8_t twobyte_table[256] = {
+ /* 0x00 - 0x07 */
+ 0, ImplicitOps|ModRM, 0, 0, 0, 0, ImplicitOps, 0,
+ /* 0x08 - 0x0F */
+ ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps|ModRM, 0, 0,
+ /* 0x10 - 0x17 */
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x18 - 0x1F */
+ ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM,
+ ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM,
+ /* 0x20 - 0x27 */
+ ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM,
+ 0, 0, 0, 0,
+ /* 0x28 - 0x2F */
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x30 - 0x37 */
+ ImplicitOps, ImplicitOps, ImplicitOps, 0, 0, 0, 0, 0,
+ /* 0x38 - 0x3F */
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x40 - 0x47 */
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ /* 0x48 - 0x4F */
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ /* 0x50 - 0x5F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x60 - 0x6F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x70 - 0x7F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x80 - 0x87 */
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ /* 0x88 - 0x8F */
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ /* 0x90 - 0x97 */
+ ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov,
+ ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov,
+ ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov,
+ ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov,
+ /* 0x98 - 0x9F */
+ ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov,
+ ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov,
+ ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov,
+ ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov,
+ /* 0xA0 - 0xA7 */
+ ImplicitOps, ImplicitOps, ImplicitOps, DstBitBase|SrcReg|ModRM,
+ DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, 0,
+ /* 0xA8 - 0xAF */
+ ImplicitOps, ImplicitOps, 0, DstBitBase|SrcReg|ModRM,
+ DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, DstReg|SrcMem|ModRM,
+ /* 0xB0 - 0xB7 */
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ DstReg|SrcMem|ModRM|Mov, DstBitBase|SrcReg|ModRM,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov,
+ /* 0xB8 - 0xBF */
+ 0, 0, DstBitBase|SrcImmByte|ModRM, DstBitBase|SrcReg|ModRM,
+ DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov,
+ /* 0xC0 - 0xC7 */
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, 0,
+ 0, 0, 0, ImplicitOps|ModRM,
+ /* 0xC8 - 0xCF */
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ /* 0xD0 - 0xDF */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0xE0 - 0xEF */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0xF0 - 0xFF */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/* Type, address-of, and value of an instruction's operand. */
+struct operand {
+ enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
+ unsigned int bytes;
+ unsigned long val, orig_val;
+ union {
+ /* OP_REG: Pointer to register field. */
+ unsigned long *reg;
+ /* OP_MEM: Segment and offset. */
+ struct {
+ enum x86_segment seg;
+ unsigned long off;
+ } mem;
+ };
+};
+
+/* MSRs. */
+#define MSR_TSC 0x10
+
+/* Control register flags. */
+#define CR0_PE (1<<0)
+#define CR4_TSD (1<<2)
+
+/* EFLAGS bit definitions. */
+#define EFLG_VIP (1<<20)
+#define EFLG_VIF (1<<19)
+#define EFLG_AC (1<<18)
+#define EFLG_VM (1<<17)
+#define EFLG_RF (1<<16)
+#define EFLG_NT (1<<14)
+#define EFLG_IOPL (3<<12)
+#define EFLG_OF (1<<11)
+#define EFLG_DF (1<<10)
+#define EFLG_IF (1<<9)
+#define EFLG_TF (1<<8)
+#define EFLG_SF (1<<7)
+#define EFLG_ZF (1<<6)
+#define EFLG_AF (1<<4)
+#define EFLG_PF (1<<2)
+#define EFLG_CF (1<<0)
+
+/* Exception definitions. */
+#define EXC_DE 0
+#define EXC_DB 1
+#define EXC_BP 3
+#define EXC_OF 4
+#define EXC_BR 5
+#define EXC_UD 6
+#define EXC_TS 10
+#define EXC_NP 11
+#define EXC_SS 12
+#define EXC_GP 13
+#define EXC_PF 14
+#define EXC_MF 16
+
+/*
+ * Instruction emulation:
+ * Most instructions are emulated directly via a fragment of inline assembly
+ * code. This allows us to save/restore EFLAGS and thus very easily pick up
+ * any modified flags.
+ */
+
+#if defined(__x86_64__)
+#define _LO32 "k" /* force 32-bit operand */
+#define _STK "%%rsp" /* stack pointer */
+#define _BYTES_PER_LONG "8"
+#elif defined(__i386__)
+#define _LO32 "" /* force 32-bit operand */
+#define _STK "%%esp" /* stack pointer */
+#define _BYTES_PER_LONG "4"
+#endif
+
+/*
+ * These EFLAGS bits are restored from saved value during emulation, and
+ * any changes are written back to the saved value after emulation.
+ */
+#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
+
+/* Before executing instruction: restore necessary bits in EFLAGS. */
+#define _PRE_EFLAGS(_sav, _msk, _tmp) \
+/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
+"movl %"_sav",%"_LO32 _tmp"; " \
+"push %"_tmp"; " \
+"push %"_tmp"; " \
+"movl %"_msk",%"_LO32 _tmp"; " \
+"andl %"_LO32 _tmp",("_STK"); " \
+"pushf; " \
+"notl %"_LO32 _tmp"; " \
+"andl %"_LO32 _tmp",("_STK"); " \
+"andl %"_LO32 _tmp",2*"_BYTES_PER_LONG"("_STK"); " \
+"pop %"_tmp"; " \
+"orl %"_LO32 _tmp",("_STK"); " \
+"popf; " \
+"pop %"_sav"; "
+
+/* After executing instruction: write-back necessary bits in EFLAGS. */
+#define _POST_EFLAGS(_sav, _msk, _tmp) \
+/* _sav |= EFLAGS & _msk; */ \
+"pushf; " \
+"pop %"_tmp"; " \
+"andl %"_msk",%"_LO32 _tmp"; " \
+"orl %"_LO32 _tmp",%"_sav"; "
+
+/* Raw emulation: instruction has two explicit operands. */
+#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy)\
+do{ unsigned long _tmp; \
+ switch ( (_dst).bytes ) \
+ { \
+ case 2: \
+ asm volatile ( \
+ _PRE_EFLAGS("0","4","2") \
+ _op"w %"_wx"3,%1; " \
+ _POST_EFLAGS("0","4","2") \
+ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+ : _wy ((_src).val), "i" (EFLAGS_MASK), \
+ "m" (_eflags), "m" ((_dst).val) ); \
+ break; \
+ case 4: \
+ asm volatile ( \
+ _PRE_EFLAGS("0","4","2") \
+ _op"l %"_lx"3,%1; " \
+ _POST_EFLAGS("0","4","2") \
+ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+ : _ly ((_src).val), "i" (EFLAGS_MASK), \
+ "m" (_eflags), "m" ((_dst).val) ); \
+ break; \
+ case 8: \
+ __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy); \
+ break; \
+ } \
+} while (0)
+#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy)\
+do{ unsigned long _tmp; \
+ switch ( (_dst).bytes ) \
+ { \
+ case 1: \
+ asm volatile ( \
+ _PRE_EFLAGS("0","4","2") \
+ _op"b %"_bx"3,%1; " \
+ _POST_EFLAGS("0","4","2") \
+ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+ : _by ((_src).val), "i" (EFLAGS_MASK), \
+ "m" (_eflags), "m" ((_dst).val) ); \
+ break; \
+ default: \
+ __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy);\
+ break; \
+ } \
+} while (0)
+/* Source operand is byte-sized and may be restricted to just %cl. */
+#define emulate_2op_SrcB(_op, _src, _dst, _eflags) \
+ __emulate_2op(_op, _src, _dst, _eflags, \
+ "b", "c", "b", "c", "b", "c", "b", "c")
+/* Source operand is byte, word, long or quad sized. */
+#define emulate_2op_SrcV(_op, _src, _dst, _eflags) \
+ __emulate_2op(_op, _src, _dst, _eflags, \
+ "b", "q", "w", "r", _LO32, "r", "", "r")
+/* Source operand is word, long or quad sized. */
+#define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags) \
+ __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
+ "w", "r", _LO32, "r", "", "r")
+
+/* Instruction has only one explicit operand (no source operand). */
+#define emulate_1op(_op,_dst,_eflags) \
+do{ unsigned long _tmp; \
+ switch ( (_dst).bytes ) \
+ { \
+ case 1: \
+ asm volatile ( \
+ _PRE_EFLAGS("0","3","2") \
+ _op"b %1; " \
+ _POST_EFLAGS("0","3","2") \
+ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+ : "i" (EFLAGS_MASK), "m" (_eflags), "m" ((_dst).val) ); \
+ break; \
+ case 2: \
+ asm volatile ( \
+ _PRE_EFLAGS("0","3","2") \
+ _op"w %1; " \
+ _POST_EFLAGS("0","3","2") \
+ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+ : "i" (EFLAGS_MASK), "m" (_eflags), "m" ((_dst).val) ); \
+ break; \
+ case 4: \
+ asm volatile ( \
+ _PRE_EFLAGS("0","3","2") \
+ _op"l %1; " \
+ _POST_EFLAGS("0","3","2") \
+ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+ : "i" (EFLAGS_MASK), "m" (_eflags), "m" ((_dst).val) ); \
+ break; \
+ case 8: \
+ __emulate_1op_8byte(_op, _dst, _eflags); \
+ break; \
+ } \
+} while (0)
+
+/* Emulate an instruction with quadword operands (x86/64 only). */
+#if defined(__x86_64__)
+#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) \
+do{ asm volatile ( \
+ _PRE_EFLAGS("0","4","2") \
+ _op"q %"_qx"3,%1; " \
+ _POST_EFLAGS("0","4","2") \
+ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+ : _qy ((_src).val), "i" (EFLAGS_MASK), \
+ "m" (_eflags), "m" ((_dst).val) ); \
+} while (0)
+#define __emulate_1op_8byte(_op, _dst, _eflags) \
+do{ asm volatile ( \
+ _PRE_EFLAGS("0","3","2") \
+ _op"q %1; " \
+ _POST_EFLAGS("0","3","2") \
+ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+ : "i" (EFLAGS_MASK), "m" (_eflags), "m" ((_dst).val) ); \
+} while (0)
+#elif defined(__i386__)
+#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)
+#define __emulate_1op_8byte(_op, _dst, _eflags)
+#endif /* __i386__ */
+
+/* Fetch next part of the instruction being emulated. */
+#define insn_fetch_bytes(_size) \
+({ unsigned long _x, _eip = _regs.eip; \
+ if ( !mode_64bit() ) _eip = (uint32_t)_eip; /* ignore upper dword */ \
+ _regs.eip += (_size); /* real hardware doesn't truncate */ \
+ generate_exception_if((uint8_t)(_regs.eip - ctxt->regs->eip) > 15, \
+ EXC_GP, 0); \
+ rc = ops->insn_fetch(x86_seg_cs, _eip, &_x, (_size), ctxt); \
+ if ( rc ) goto done; \
+ _x; \
+})
+#define insn_fetch_type(_type) ((_type)insn_fetch_bytes(sizeof(_type)))
+
+#define truncate_word(ea, byte_width) \
+({ unsigned long __ea = (ea); \
+ unsigned int _width = (byte_width); \
+ ((_width == sizeof(unsigned long)) ? __ea : \
+ (__ea & ((1UL << (_width << 3)) - 1))); \
+})
+#define truncate_ea(ea) truncate_word((ea), ad_bytes)
+
+#define mode_64bit() (def_ad_bytes == 8)
+
+#define fail_if(p) \
+do { \
+ rc = (p) ? X86EMUL_UNHANDLEABLE : X86EMUL_OKAY; \
+ if ( rc ) goto done; \
+} while (0)
+
+#define generate_exception_if(p, e, ec) \
+({ if ( (p) ) { \
+ fail_if(ops->inject_hw_exception == NULL); \
+ rc = ops->inject_hw_exception(e, ec, ctxt) ? : X86EMUL_EXCEPTION; \
+ goto done; \
+ } \
+})
+
+/*
+ * Given byte has even parity (even number of 1s)? SDM Vol. 1 Sec. 3.4.3.1,
+ * "Status Flags": EFLAGS.PF reflects parity of least-sig. byte of result only.
+ */
+static int even_parity(uint8_t v)
+{
+ asm ( "test %b0,%b0; setp %b0" : "=a" (v) : "0" (v) );
+ return v;
+}
+
+/* Update address held in a register, based on addressing mode. */
+#define _register_address_increment(reg, inc, byte_width) \
+do { \
+ int _inc = (inc); /* signed type ensures sign extension to long */ \
+ unsigned int _width = (byte_width); \
+ if ( _width == sizeof(unsigned long) ) \
+ (reg) += _inc; \
+ else if ( mode_64bit() ) \
+ (reg) = ((reg) + _inc) & ((1UL << (_width << 3)) - 1); \
+ else \
+ (reg) = ((reg) & ~((1UL << (_width << 3)) - 1)) | \
+ (((reg) + _inc) & ((1UL << (_width << 3)) - 1)); \
+} while (0)
+#define register_address_increment(reg, inc) \
+ _register_address_increment((reg), (inc), ad_bytes)
+
+#define sp_pre_dec(dec) ({ \
+ _register_address_increment(_regs.esp, -(dec), ctxt->sp_size/8); \
+ truncate_word(_regs.esp, ctxt->sp_size/8); \
+})
+#define sp_post_inc(inc) ({ \
+ unsigned long __esp = truncate_word(_regs.esp, ctxt->sp_size/8); \
+ _register_address_increment(_regs.esp, (inc), ctxt->sp_size/8); \
+ __esp; \
+})
+
+#define jmp_rel(rel) \
+do { \
+ int _rel = (int)(rel); \
+ _regs.eip += _rel; \
+ if ( !mode_64bit() ) \
+ _regs.eip = ((op_bytes == 2) \
+ ? (uint16_t)_regs.eip : (uint32_t)_regs.eip); \
+} while (0)
+
+static unsigned long __get_rep_prefix(
+ struct cpu_user_regs *int_regs,
+ struct cpu_user_regs *ext_regs,
+ int ad_bytes)
+{
+ unsigned long ecx = ((ad_bytes == 2) ? (uint16_t)int_regs->ecx :
+ (ad_bytes == 4) ? (uint32_t)int_regs->ecx :
+ int_regs->ecx);
+
+ /* Skip the instruction if no repetitions are required. */
+ if ( ecx == 0 )
+ ext_regs->eip = int_regs->eip;
+
+ return ecx;
+}
+
+#define get_rep_prefix() ({ \
+ unsigned long max_reps = 1; \
+ if ( rep_prefix ) \
+ max_reps = __get_rep_prefix(&_regs, ctxt->regs, ad_bytes); \
+ if ( max_reps == 0 ) \
+ goto done; \
+ max_reps; \
+})
+
+static void __put_rep_prefix(
+ struct cpu_user_regs *int_regs,
+ struct cpu_user_regs *ext_regs,
+ int ad_bytes,
+ unsigned long reps_completed)
+{
+ unsigned long ecx = ((ad_bytes == 2) ? (uint16_t)int_regs->ecx :
+ (ad_bytes == 4) ? (uint32_t)int_regs->ecx :
+ int_regs->ecx);
+
+ /* Reduce counter appropriately, and repeat instruction if non-zero. */
+ ecx -= reps_completed;
+ if ( ecx != 0 )
+ int_regs->eip = ext_regs->eip;
+
+ if ( ad_bytes == 2 )
+ *(uint16_t *)&int_regs->ecx = ecx;
+ else if ( ad_bytes == 4 )
+ int_regs->ecx = (uint32_t)ecx;
+ else
+ int_regs->ecx = ecx;
+}
+
+#define put_rep_prefix(reps_completed) ({ \
+ if ( rep_prefix ) \
+ __put_rep_prefix(&_regs, ctxt->regs, ad_bytes, reps_completed); \
+})
+
+/*
+ * Unsigned multiplication with double-word result.
+ * IN: Multiplicand=m[0], Multiplier=m[1]
+ * OUT: Return CF/OF (overflow status); Result=m[1]:m[0]
+ */
+static int mul_dbl(unsigned long m[2])
+{
+ int rc;
+ asm ( "mul %4; seto %b2"
+ : "=a" (m[0]), "=d" (m[1]), "=q" (rc)
+ : "0" (m[0]), "1" (m[1]), "2" (0) );
+ return rc;
+}
+
+/*
+ * Signed multiplication with double-word result.
+ * IN: Multiplicand=m[0], Multiplier=m[1]
+ * OUT: Return CF/OF (overflow status); Result=m[1]:m[0]
+ */
+static int imul_dbl(unsigned long m[2])
+{
+ int rc;
+ asm ( "imul %4; seto %b2"
+ : "=a" (m[0]), "=d" (m[1]), "=q" (rc)
+ : "0" (m[0]), "1" (m[1]), "2" (0) );
+ return rc;
+}
+
+/*
+ * Unsigned division of double-word dividend.
+ * IN: Dividend=u[1]:u[0], Divisor=v
+ * OUT: Return 1: #DE
+ * Return 0: Quotient=u[0], Remainder=u[1]
+ */
+static int div_dbl(unsigned long u[2], unsigned long v)
+{
+ if ( (v == 0) || (u[1] >= v) )
+ return 1;
+ asm ( "div %4"
+ : "=a" (u[0]), "=d" (u[1])
+ : "0" (u[0]), "1" (u[1]), "r" (v) );
+ return 0;
+}
+
+/*
+ * Signed division of double-word dividend.
+ * IN: Dividend=u[1]:u[0], Divisor=v
+ * OUT: Return 1: #DE
+ * Return 0: Quotient=u[0], Remainder=u[1]
+ * NB. We don't use idiv directly as it's moderately hard to work out
+ * ahead of time whether it will #DE, which we cannot allow to happen.
+ */
+static int idiv_dbl(unsigned long u[2], unsigned long v)
+{
+ int negu = (long)u[1] < 0, negv = (long)v < 0;
+
+ /* u = abs(u) */
+ if ( negu )
+ {
+ u[1] = ~u[1];
+ if ( (u[0] = -u[0]) == 0 )
+ u[1]++;
+ }
+
+ /* abs(u) / abs(v) */
+ if ( div_dbl(u, negv ? -v : v) )
+ return 1;
+
+ /* Remainder has same sign as dividend. It cannot overflow. */
+ if ( negu )
+ u[1] = -u[1];
+
+ /* Quotient is overflowed if sign bit is set. */
+ if ( negu ^ negv )
+ {
+ if ( (long)u[0] >= 0 )
+ u[0] = -u[0];
+ else if ( (u[0] << 1) != 0 ) /* == 0x80...0 is okay */
+ return 1;
+ }
+ else if ( (long)u[0] < 0 )
+ return 1;
+
+ return 0;
+}
+
+static int
+test_cc(
+ unsigned int condition, unsigned int flags)
+{
+ int rc = 0;
+
+ switch ( (condition & 15) >> 1 )
+ {
+ case 0: /* o */
+ rc |= (flags & EFLG_OF);
+ break;
+ case 1: /* b/c/nae */
+ rc |= (flags & EFLG_CF);
+ break;
+ case 2: /* z/e */
+ rc |= (flags & EFLG_ZF);
+ break;
+ case 3: /* be/na */
+ rc |= (flags & (EFLG_CF|EFLG_ZF));
+ break;
+ case 4: /* s */
+ rc |= (flags & EFLG_SF);
+ break;
+ case 5: /* p/pe */
+ rc |= (flags & EFLG_PF);
+ break;
+ case 7: /* le/ng */
+ rc |= (flags & EFLG_ZF);
+ /* fall through */
+ case 6: /* l/nge */
+ rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
+ break;
+ }
+
+ /* Odd condition identifiers (lsb == 1) have inverted sense. */
+ return (!!rc ^ (condition & 1));
+}
+
+static int
+get_cpl(
+ struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops)
+{
+ struct segment_register reg;
+
+ if ( ctxt->regs->eflags & EFLG_VM )
+ return 3;
+
+ if ( (ops->read_segment == NULL) ||
+ ops->read_segment(x86_seg_ss, &reg, ctxt) )
+ return -1;
+
+ return reg.attr.fields.dpl;
+}
+
+static int
+_mode_iopl(
+ struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops)
+{
+ int cpl = get_cpl(ctxt, ops);
+ if ( cpl == -1 )
+ return -1;
+ return (cpl <= ((ctxt->regs->eflags >> 12) & 3));
+}
+
+#define mode_ring0() ({ \
+ int _cpl = get_cpl(ctxt, ops); \
+ fail_if(_cpl < 0); \
+ (_cpl == 0); \
+})
+#define mode_iopl() ({ \
+ int _iopl = _mode_iopl(ctxt, ops); \
+ fail_if(_iopl < 0); \
+ _iopl; \
+})
+
+static int ioport_access_check(
+ unsigned int first_port,
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops)
+{
+ unsigned long iobmp;
+ struct segment_register tr;
+ int rc = X86EMUL_OKAY;
+
+ if ( !(ctxt->regs->eflags & EFLG_VM) && mode_iopl() )
+ return X86EMUL_OKAY;
+
+ fail_if(ops->read_segment == NULL);
+ if ( (rc = ops->read_segment(x86_seg_tr, &tr, ctxt)) != 0 )
+ return rc;
+
+ /* Ensure that the TSS is valid and has an io-bitmap-offset field. */
+ if ( !tr.attr.fields.p ||
+ ((tr.attr.fields.type & 0xd) != 0x9) ||
+ (tr.limit < 0x67) )
+ goto raise_exception;
+
+ if ( (rc = ops->read(x86_seg_none, tr.base + 0x66, &iobmp, 2, ctxt)) )
+ return rc;
+
+ /* Ensure TSS includes two bytes including byte containing first port. */
+ iobmp += first_port / 8;
+ if ( tr.limit <= iobmp )
+ goto raise_exception;
+
+ if ( (rc = ops->read(x86_seg_none, tr.base + iobmp, &iobmp, 2, ctxt)) )
+ return rc;
+ if ( (iobmp & (((1<<bytes)-1) << (first_port&7))) != 0 )
+ goto raise_exception;
+
+ done:
+ return rc;
+
+ raise_exception:
+ fail_if(ops->inject_hw_exception == NULL);
+ return ops->inject_hw_exception(EXC_GP, 0, ctxt) ? : X86EMUL_EXCEPTION;
+}
+
+static int
+in_realmode(
+ struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops)
+{
+ unsigned long cr0;
+ int rc;
+
+ if ( ops->read_cr == NULL )
+ return 0;
+
+ rc = ops->read_cr(0, &cr0, ctxt);
+ return (!rc && !(cr0 & CR0_PE));
+}
+
+static int
+realmode_load_seg(
+ enum x86_segment seg,
+ uint16_t sel,
+ struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops)
+{
+ struct segment_register reg;
+ int rc;
+
+ if ( (rc = ops->read_segment(seg, &reg, ctxt)) != 0 )
+ return rc;
+
+ reg.sel = sel;
+ reg.base = (uint32_t)sel << 4;
+
+ return ops->write_segment(seg, &reg, ctxt);
+}
+
+static int
+protmode_load_seg(
+ enum x86_segment seg,
+ uint16_t sel,
+ struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops)
+{
+ struct segment_register desctab, cs, segr;
+ struct { uint32_t a, b; } desc;
+ unsigned long val;
+ uint8_t dpl, rpl, cpl;
+ int rc, fault_type = EXC_TS;
+
+ /* NULL selector? */
+ if ( (sel & 0xfffc) == 0 )
+ {
+ if ( (seg == x86_seg_cs) || (seg == x86_seg_ss) )
+ goto raise_exn;
+ memset(&segr, 0, sizeof(segr));
+ return ops->write_segment(seg, &segr, ctxt);
+ }
+
+ /* LDT descriptor must be in the GDT. */
+ if ( (seg == x86_seg_ldtr) && (sel & 4) )
+ goto raise_exn;
+
+ if ( (rc = ops->read_segment(x86_seg_cs, &cs, ctxt)) ||
+ (rc = ops->read_segment((sel & 4) ? x86_seg_ldtr : x86_seg_gdtr,
+ &desctab, ctxt)) )
+ return rc;
+
+ /* Check against descriptor table limit. */
+ if ( ((sel & 0xfff8) + 7) > desctab.limit )
+ goto raise_exn;
+
+ do {
+ if ( (rc = ops->read(x86_seg_none, desctab.base + (sel & 0xfff8),
+ &val, 4, ctxt)) )
+ return rc;
+ desc.a = val;
+ if ( (rc = ops->read(x86_seg_none, desctab.base + (sel & 0xfff8) + 4,
+ &val, 4, ctxt)) )
+ return rc;
+ desc.b = val;
+
+ /* Segment present in memory? */
+ if ( !(desc.b & (1u<<15)) )
+ {
+ fault_type = EXC_NP;
+ goto raise_exn;
+ }
+
+ /* LDT descriptor is a system segment. All others are code/data. */
+ if ( (desc.b & (1u<<12)) == ((seg == x86_seg_ldtr) << 12) )
+ goto raise_exn;
+
+ dpl = (desc.b >> 13) & 3;
+ rpl = sel & 3;
+ cpl = cs.sel & 3;
+
+ switch ( seg )
+ {
+ case x86_seg_cs:
+ /* Code segment? */
+ if ( !(desc.b & (1u<<11)) )
+ goto raise_exn;
+ /* Non-conforming segment: check DPL against RPL. */
+ if ( ((desc.b & (6u<<9)) != 6) && (dpl != rpl) )
+ goto raise_exn;
+ break;
+ case x86_seg_ss:
+ /* Writable data segment? */
+ if ( (desc.b & (5u<<9)) != (1u<<9) )
+ goto raise_exn;
+ if ( (dpl != cpl) || (dpl != rpl) )
+ goto raise_exn;
+ break;
+ case x86_seg_ldtr:
+ /* LDT system segment? */
+ if ( (desc.b & (15u<<8)) != (2u<<8) )
+ goto raise_exn;
+ goto skip_accessed_flag;
+ default:
+ /* Readable code or data segment? */
+ if ( (desc.b & (5u<<9)) == (4u<<9) )
+ goto raise_exn;
+ /* Non-conforming segment: check DPL against RPL and CPL. */
+ if ( ((desc.b & (6u<<9)) != 6) && ((dpl < cpl) || (dpl < rpl)) )
+ goto raise_exn;
+ break;
+ }
+
+ /* Ensure Accessed flag is set. */
+ rc = ((desc.b & 0x100) ? X86EMUL_OKAY :
+ ops->cmpxchg(
+ x86_seg_none, desctab.base + (sel & 0xfff8) + 4, desc.b,
+ desc.b | 0x100, 4, ctxt));
+ } while ( rc == X86EMUL_CMPXCHG_FAILED );
+
+ if ( rc )
+ return rc;
+
+ /* Force the Accessed flag in our local copy. */
+ desc.b |= 0x100;
+
+ skip_accessed_flag:
+ segr.base = (((desc.b << 0) & 0xff000000u) |
+ ((desc.b << 16) & 0x00ff0000u) |
+ ((desc.a >> 16) & 0x0000ffffu));
+ segr.attr.bytes = (((desc.b >> 8) & 0x00ffu) |
+ ((desc.b >> 12) & 0x0f00u));
+ segr.limit = (desc.b & 0x000f0000u) | (desc.a & 0x0000ffffu);
+ if ( segr.attr.fields.g )
+ segr.limit = (segr.limit << 12) | 0xfffu;
+ segr.sel = sel;
+ return ops->write_segment(seg, &segr, ctxt);
+
+ raise_exn:
+ if ( ops->inject_hw_exception == NULL )
+ return X86EMUL_UNHANDLEABLE;
+ if ( (rc = ops->inject_hw_exception(fault_type, sel & 0xfffc, ctxt)) )
+ return rc;
+ return X86EMUL_EXCEPTION;
+}
+
+static int
+load_seg(
+ enum x86_segment seg,
+ uint16_t sel,
+ struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops)
+{
+ if ( (ops->read_segment == NULL) ||
+ (ops->write_segment == NULL) )
+ return X86EMUL_UNHANDLEABLE;
+
+ if ( in_realmode(ctxt, ops) )
+ return realmode_load_seg(seg, sel, ctxt, ops);
+
+ return protmode_load_seg(seg, sel, ctxt, ops);
+}
+
+void *
+decode_register(
+ uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs)
+{
+ void *p;
+
+ switch ( modrm_reg )
+ {
+ case 0: p = &regs->eax; break;
+ case 1: p = &regs->ecx; break;
+ case 2: p = &regs->edx; break;
+ case 3: p = &regs->ebx; break;
+ case 4: p = (highbyte_regs ?
+ ((unsigned char *)&regs->eax + 1) :
+ (unsigned char *)&regs->esp); break;
+ case 5: p = (highbyte_regs ?
+ ((unsigned char *)&regs->ecx + 1) :
+ (unsigned char *)&regs->ebp); break;
+ case 6: p = (highbyte_regs ?
+ ((unsigned char *)&regs->edx + 1) :
+ (unsigned char *)&regs->esi); break;
+ case 7: p = (highbyte_regs ?
+ ((unsigned char *)&regs->ebx + 1) :
+ (unsigned char *)&regs->edi); break;
+#if defined(__x86_64__)
+ case 8: p = &regs->r8; break;
+ case 9: p = &regs->r9; break;
+ case 10: p = &regs->r10; break;
+ case 11: p = &regs->r11; break;
+ case 12: p = &regs->r12; break;
+ case 13: p = &regs->r13; break;
+ case 14: p = &regs->r14; break;
+ case 15: p = &regs->r15; break;
+#endif
+ default: p = NULL; break;
+ }
+
+ return p;
+}
+
+#define decode_segment_failed x86_seg_tr
+enum x86_segment
+decode_segment(
+ uint8_t modrm_reg)
+{
+ switch ( modrm_reg )
+ {
+ case 0: return x86_seg_es;
+ case 1: return x86_seg_cs;
+ case 2: return x86_seg_ss;
+ case 3: return x86_seg_ds;
+ case 4: return x86_seg_fs;
+ case 5: return x86_seg_gs;
+ default: break;
+ }
+ return decode_segment_failed;
+}
+
+int
+x86_emulate(
+ struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops)
+{
+ /* Shadow copy of register state. Committed on successful emulation. */
+ struct cpu_user_regs _regs = *ctxt->regs;
+
+ uint8_t b, d, sib, sib_index, sib_base, twobyte = 0, rex_prefix = 0;
+ uint8_t modrm = 0, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
+ unsigned int op_bytes, def_op_bytes, ad_bytes, def_ad_bytes;
+#define REPE_PREFIX 1
+#define REPNE_PREFIX 2
+ unsigned int lock_prefix = 0, rep_prefix = 0;
+ int override_seg = -1, rc = X86EMUL_OKAY;
+ struct operand src, dst;
+
+ /* Data operand effective address (usually computed from ModRM). */
+ struct operand ea;
+
+ /* Default is a memory operand relative to segment DS. */
+ ea.type = OP_MEM;
+ ea.mem.seg = x86_seg_ds;
+ ea.mem.off = 0;
+
+ ctxt->retire.byte = 0;
+
+ op_bytes = def_op_bytes = ad_bytes = def_ad_bytes = ctxt->addr_size/8;
+ if ( op_bytes == 8 )
+ {
+ op_bytes = def_op_bytes = 4;
+#ifndef __x86_64__
+ return X86EMUL_UNHANDLEABLE;
+#endif
+ }
+
+ /* Prefix bytes. */
+ for ( ; ; )
+ {
+ switch ( b = insn_fetch_type(uint8_t) )
+ {
+ case 0x66: /* operand-size override */
+ op_bytes = def_op_bytes ^ 6;
+ break;
+ case 0x67: /* address-size override */
+ ad_bytes = def_ad_bytes ^ (mode_64bit() ? 12 : 6);
+ break;
+ case 0x2e: /* CS override */
+ override_seg = x86_seg_cs;
+ break;
+ case 0x3e: /* DS override */
+ override_seg = x86_seg_ds;
+ break;
+ case 0x26: /* ES override */
+ override_seg = x86_seg_es;
+ break;
+ case 0x64: /* FS override */
+ override_seg = x86_seg_fs;
+ break;
+ case 0x65: /* GS override */
+ override_seg = x86_seg_gs;
+ break;
+ case 0x36: /* SS override */
+ override_seg = x86_seg_ss;
+ break;
+ case 0xf0: /* LOCK */
+ lock_prefix = 1;
+ break;
+ case 0xf2: /* REPNE/REPNZ */
+ rep_prefix = REPNE_PREFIX;
+ break;
+ case 0xf3: /* REP/REPE/REPZ */
+ rep_prefix = REPE_PREFIX;
+ break;
+ case 0x40 ... 0x4f: /* REX */
+ if ( !mode_64bit() )
+ goto done_prefixes;
+ rex_prefix = b;
+ continue;
+ default:
+ goto done_prefixes;
+ }
+
+ /* Any legacy prefix after a REX prefix nullifies its effect. */
+ rex_prefix = 0;
+ }
+ done_prefixes:
+
+ if ( rex_prefix & 8 ) /* REX.W */
+ op_bytes = 8;
+
+ /* Opcode byte(s). */
+ d = opcode_table[b];
+ if ( d == 0 )
+ {
+ /* Two-byte opcode? */
+ if ( b == 0x0f )
+ {
+ twobyte = 1;
+ b = insn_fetch_type(uint8_t);
+ d = twobyte_table[b];
+ }
+
+ /* Unrecognised? */
+ if ( d == 0 )
+ goto cannot_emulate;
+ }
+
+ /* Lock prefix is allowed only on RMW instructions. */
+ generate_exception_if((d & Mov) && lock_prefix, EXC_GP, 0);
+
+ /* ModRM and SIB bytes. */
+ if ( d & ModRM )
+ {
+ modrm = insn_fetch_type(uint8_t);
+ modrm_mod = (modrm & 0xc0) >> 6;
+ modrm_reg = ((rex_prefix & 4) << 1) | ((modrm & 0x38) >> 3);
+ modrm_rm = modrm & 0x07;
+
+ if ( modrm_mod == 3 )
+ {
+ modrm_rm |= (rex_prefix & 1) << 3;
+ ea.type = OP_REG;
+ ea.reg = decode_register(
+ modrm_rm, &_regs, (d & ByteOp) && (rex_prefix == 0));
+ }
+ else if ( ad_bytes == 2 )
+ {
+ /* 16-bit ModR/M decode. */
+ switch ( modrm_rm )
+ {
+ case 0:
+ ea.mem.off = _regs.ebx + _regs.esi;
+ break;
+ case 1:
+ ea.mem.off = _regs.ebx + _regs.edi;
+ break;
+ case 2:
+ ea.mem.seg = x86_seg_ss;
+ ea.mem.off = _regs.ebp + _regs.esi;
+ break;
+ case 3:
+ ea.mem.seg = x86_seg_ss;
+ ea.mem.off = _regs.ebp + _regs.edi;
+ break;
+ case 4:
+ ea.mem.off = _regs.esi;
+ break;
+ case 5:
+ ea.mem.off = _regs.edi;
+ break;
+ case 6:
+ if ( modrm_mod == 0 )
+ break;
+ ea.mem.seg = x86_seg_ss;
+ ea.mem.off = _regs.ebp;
+ break;
+ case 7:
+ ea.mem.off = _regs.ebx;
+ break;
+ }
+ switch ( modrm_mod )
+ {
+ case 0:
+ if ( modrm_rm == 6 )
+ ea.mem.off = insn_fetch_type(int16_t);
+ break;
+ case 1:
+ ea.mem.off += insn_fetch_type(int8_t);
+ break;
+ case 2:
+ ea.mem.off += insn_fetch_type(int16_t);
+ break;
+ }
+ ea.mem.off = truncate_ea(ea.mem.off);
+ }
+ else
+ {
+ /* 32/64-bit ModR/M decode. */
+ if ( modrm_rm == 4 )
+ {
+ sib = insn_fetch_type(uint8_t);
+ sib_index = ((sib >> 3) & 7) | ((rex_prefix << 2) & 8);
+ sib_base = (sib & 7) | ((rex_prefix << 3) & 8);
+ if ( sib_index != 4 )
+ ea.mem.off = *(long*)decode_register(sib_index, &_regs, 0);
+ ea.mem.off <<= (sib >> 6) & 3;
+ if ( (modrm_mod == 0) && ((sib_base & 7) == 5) )
+ ea.mem.off += insn_fetch_type(int32_t);
+ else if ( sib_base == 4 )
+ {
+ ea.mem.seg = x86_seg_ss;
+ ea.mem.off += _regs.esp;
+ if ( !twobyte && (b == 0x8f) )
+ /* POP <rm> computes its EA post increment. */
+ ea.mem.off += ((mode_64bit() && (op_bytes == 4))
+ ? 8 : op_bytes);
+ }
+ else if ( sib_base == 5 )
+ {
+ ea.mem.seg = x86_seg_ss;
+ ea.mem.off += _regs.ebp;
+ }
+ else
+ ea.mem.off += *(long*)decode_register(sib_base, &_regs, 0);
+ }
+ else
+ {
+ modrm_rm |= (rex_prefix & 1) << 3;
+ ea.mem.off = *(long *)decode_register(modrm_rm, &_regs, 0);
+ if ( (modrm_rm == 5) && (modrm_mod != 0) )
+ ea.mem.seg = x86_seg_ss;
+ }
+ switch ( modrm_mod )
+ {
+ case 0:
+ if ( (modrm_rm & 7) != 5 )
+ break;
+ ea.mem.off = insn_fetch_type(int32_t);
+ if ( !mode_64bit() )
+ break;
+ /* Relative to RIP of next instruction. Argh! */
+ ea.mem.off += _regs.eip;
+ if ( (d & SrcMask) == SrcImm )
+ ea.mem.off += (d & ByteOp) ? 1 :
+ ((op_bytes == 8) ? 4 : op_bytes);
+ else if ( (d & SrcMask) == SrcImmByte )
+ ea.mem.off += 1;
+ else if ( !twobyte && ((b & 0xfe) == 0xf6) &&
+ ((modrm_reg & 7) <= 1) )
+ /* Special case in Grp3: test has immediate operand. */
+ ea.mem.off += (d & ByteOp) ? 1
+ : ((op_bytes == 8) ? 4 : op_bytes);
+ else if ( twobyte && ((b & 0xf7) == 0xa4) )
+ /* SHLD/SHRD with immediate byte third operand. */
+ ea.mem.off++;
+ break;
+ case 1:
+ ea.mem.off += insn_fetch_type(int8_t);
+ break;
+ case 2:
+ ea.mem.off += insn_fetch_type(int32_t);
+ break;
+ }
+ ea.mem.off = truncate_ea(ea.mem.off);
+ }
+ }
+
+ if ( override_seg != -1 )
+ ea.mem.seg = override_seg;
+
+ /* Special instructions do their own operand decoding. */
+ if ( (d & DstMask) == ImplicitOps )
+ goto special_insn;
+
+ /* Decode and fetch the source operand: register, memory or immediate. */
+ switch ( d & SrcMask )
+ {
+ case SrcNone:
+ break;
+ case SrcReg:
+ src.type = OP_REG;
+ if ( d & ByteOp )
+ {
+ src.reg = decode_register(modrm_reg, &_regs, (rex_prefix == 0));
+ src.val = *(uint8_t *)src.reg;
+ src.bytes = 1;
+ }
+ else
+ {
+ src.reg = decode_register(modrm_reg, &_regs, 0);
+ switch ( (src.bytes = op_bytes) )
+ {
+ case 2: src.val = *(uint16_t *)src.reg; break;
+ case 4: src.val = *(uint32_t *)src.reg; break;
+ case 8: src.val = *(uint64_t *)src.reg; break;
+ }
+ }
+ break;
+ case SrcMem16:
+ ea.bytes = 2;
+ goto srcmem_common;
+ case SrcMem:
+ ea.bytes = (d & ByteOp) ? 1 : op_bytes;
+ srcmem_common:
+ src = ea;
+ if ( src.type == OP_REG )
+ {
+ switch ( src.bytes )
+ {
+ case 1: src.val = *(uint8_t *)src.reg; break;
+ case 2: src.val = *(uint16_t *)src.reg; break;
+ case 4: src.val = *(uint32_t *)src.reg; break;
+ case 8: src.val = *(uint64_t *)src.reg; break;
+ }
+ }
+ else if ( (rc = ops->read(src.mem.seg, src.mem.off,
+ &src.val, src.bytes, ctxt)) )
+ goto done;
+ break;
+ case SrcImm:
+ src.type = OP_IMM;
+ src.bytes = (d & ByteOp) ? 1 : op_bytes;
+ if ( src.bytes == 8 ) src.bytes = 4;
+ /* NB. Immediates are sign-extended as necessary. */
+ switch ( src.bytes )
+ {
+ case 1: src.val = insn_fetch_type(int8_t); break;
+ case 2: src.val = insn_fetch_type(int16_t); break;
+ case 4: src.val = insn_fetch_type(int32_t); break;
+ }
+ break;
+ case SrcImmByte:
+ src.type = OP_IMM;
+ src.bytes = 1;
+ src.val = insn_fetch_type(int8_t);
+ break;
+ }
+
+ /* Decode and fetch the destination operand: register or memory. */
+ switch ( d & DstMask )
+ {
+ case DstReg:
+ dst.type = OP_REG;
+ if ( d & ByteOp )
+ {
+ dst.reg = decode_register(modrm_reg, &_regs, (rex_prefix == 0));
+ dst.val = *(uint8_t *)dst.reg;
+ dst.bytes = 1;
+ }
+ else
+ {
+ dst.reg = decode_register(modrm_reg, &_regs, 0);
+ switch ( (dst.bytes = op_bytes) )
+ {
+ case 2: dst.val = *(uint16_t *)dst.reg; break;
+ case 4: dst.val = *(uint32_t *)dst.reg; break;
+ case 8: dst.val = *(uint64_t *)dst.reg; break;
+ }
+ }
+ break;
+ case DstBitBase:
+ if ( ((d & SrcMask) == SrcImmByte) || (ea.type == OP_REG) )
+ {
+ src.val &= (op_bytes << 3) - 1;
+ }
+ else
+ {
+ /*
+ * EA += BitOffset DIV op_bytes*8
+ * BitOffset = BitOffset MOD op_bytes*8
+ * DIV truncates towards negative infinity.
+ * MOD always produces a positive result.
+ */
+ if ( op_bytes == 2 )
+ src.val = (int16_t)src.val;
+ else if ( op_bytes == 4 )
+ src.val = (int32_t)src.val;
+ if ( (long)src.val < 0 )
+ {
+ unsigned long byte_offset;
+ byte_offset = op_bytes + (((-src.val-1) >> 3) & ~(op_bytes-1));
+ ea.mem.off -= byte_offset;
+ src.val = (byte_offset << 3) + src.val;
+ }
+ else
+ {
+ ea.mem.off += (src.val >> 3) & ~(op_bytes - 1);
+ src.val &= (op_bytes << 3) - 1;
+ }
+ }
+ /* Becomes a normal DstMem operation from here on. */
+ d = (d & ~DstMask) | DstMem;
+ case DstMem:
+ ea.bytes = (d & ByteOp) ? 1 : op_bytes;
+ dst = ea;
+ if ( dst.type == OP_REG )
+ {
+ switch ( dst.bytes )
+ {
+ case 1: dst.val = *(uint8_t *)dst.reg; break;
+ case 2: dst.val = *(uint16_t *)dst.reg; break;
+ case 4: dst.val = *(uint32_t *)dst.reg; break;
+ case 8: dst.val = *(uint64_t *)dst.reg; break;
+ }
+ }
+ else if ( !(d & Mov) ) /* optimisation - avoid slow emulated read */
+ {
+ if ( (rc = ops->read(dst.mem.seg, dst.mem.off,
+ &dst.val, dst.bytes, ctxt)) )
+ goto done;
+ dst.orig_val = dst.val;
+ }
+ break;
+ }
+
+ /* LOCK prefix allowed only on instructions with memory destination. */
+ generate_exception_if(lock_prefix && (dst.type != OP_MEM), EXC_GP, 0);
+
+ if ( twobyte )
+ goto twobyte_insn;
+
+ switch ( b )
+ {
+ case 0x04 ... 0x05: /* add imm,%%eax */
+ dst.reg = (unsigned long *)&_regs.eax;
+ dst.val = _regs.eax;
+ case 0x00 ... 0x03: add: /* add */
+ emulate_2op_SrcV("add", src, dst, _regs.eflags);
+ break;
+
+ case 0x0c ... 0x0d: /* or imm,%%eax */
+ dst.reg = (unsigned long *)&_regs.eax;
+ dst.val = _regs.eax;
+ case 0x08 ... 0x0b: or: /* or */
+ emulate_2op_SrcV("or", src, dst, _regs.eflags);
+ break;
+
+ case 0x14 ... 0x15: /* adc imm,%%eax */
+ dst.reg = (unsigned long *)&_regs.eax;
+ dst.val = _regs.eax;
+ case 0x10 ... 0x13: adc: /* adc */
+ emulate_2op_SrcV("adc", src, dst, _regs.eflags);
+ break;
+
+ case 0x1c ... 0x1d: /* sbb imm,%%eax */
+ dst.reg = (unsigned long *)&_regs.eax;
+ dst.val = _regs.eax;
+ case 0x18 ... 0x1b: sbb: /* sbb */
+ emulate_2op_SrcV("sbb", src, dst, _regs.eflags);
+ break;
+
+ case 0x24 ... 0x25: /* and imm,%%eax */
+ dst.reg = (unsigned long *)&_regs.eax;
+ dst.val = _regs.eax;
+ case 0x20 ... 0x23: and: /* and */
+ emulate_2op_SrcV("and", src, dst, _regs.eflags);
+ break;
+
+ case 0x2c ... 0x2d: /* sub imm,%%eax */
+ dst.reg = (unsigned long *)&_regs.eax;
+ dst.val = _regs.eax;
+ case 0x28 ... 0x2b: sub: /* sub */
+ emulate_2op_SrcV("sub", src, dst, _regs.eflags);
+ break;
+
+ case 0x34 ... 0x35: /* xor imm,%%eax */
+ dst.reg = (unsigned long *)&_regs.eax;
+ dst.val = _regs.eax;
+ case 0x30 ... 0x33: xor: /* xor */
+ emulate_2op_SrcV("xor", src, dst, _regs.eflags);
+ break;
+
+ case 0x3c ... 0x3d: /* cmp imm,%%eax */
+ dst.reg = (unsigned long *)&_regs.eax;
+ dst.val = _regs.eax;
+ case 0x38 ... 0x3b: cmp: /* cmp */
+ emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
+ break;
+
+ case 0x62: /* bound */ {
+ unsigned long src_val2;
+ int lb, ub, idx;
+ generate_exception_if(mode_64bit() || (src.type != OP_MEM),
+ EXC_UD, -1);
+ if ( (rc = ops->read(src.mem.seg, src.mem.off + op_bytes,
+ &src_val2, op_bytes, ctxt)) )
+ goto done;
+ ub = (op_bytes == 2) ? (int16_t)src_val2 : (int32_t)src_val2;
+ lb = (op_bytes == 2) ? (int16_t)src.val : (int32_t)src.val;
+ idx = (op_bytes == 2) ? (int16_t)dst.val : (int32_t)dst.val;
+ generate_exception_if((idx < lb) || (idx > ub), EXC_BR, -1);
+ dst.type = OP_NONE;
+ break;
+ }
+
+ case 0x63: /* movsxd (x86/64) / arpl (x86/32) */
+ if ( mode_64bit() )
+ {
+ /* movsxd */
+ if ( src.type == OP_REG )
+ src.val = *(int32_t *)src.reg;
+ else if ( (rc = ops->read(src.mem.seg, src.mem.off,
+ &src.val, 4, ctxt)) )
+ goto done;
+ dst.val = (int32_t)src.val;
+ }
+ else
+ {
+ /* arpl */
+ uint16_t src_val = dst.val;
+ dst = src;
+ _regs.eflags &= ~EFLG_ZF;
+ _regs.eflags |= ((src_val & 3) > (dst.val & 3)) ? EFLG_ZF : 0;
+ if ( _regs.eflags & EFLG_ZF )
+ dst.val = (dst.val & ~3) | (src_val & 3);
+ else
+ dst.type = OP_NONE;
+ generate_exception_if(in_realmode(ctxt, ops), EXC_UD, -1);
+ }
+ break;
+
+ case 0x69: /* imul imm16/32 */
+ case 0x6b: /* imul imm8 */ {
+ unsigned long src1; /* ModR/M source operand */
+ if ( ea.type == OP_REG )
+ src1 = *ea.reg;
+ else if ( (rc = ops->read(ea.mem.seg, ea.mem.off,
+ &src1, op_bytes, ctxt)) )
+ goto done;
+ _regs.eflags &= ~(EFLG_OF|EFLG_CF);
+ switch ( dst.bytes )
+ {
+ case 2:
+ dst.val = ((uint32_t)(int16_t)src.val *
+ (uint32_t)(int16_t)src1);
+ if ( (int16_t)dst.val != (uint32_t)dst.val )
+ _regs.eflags |= EFLG_OF|EFLG_CF;
+ break;
+#ifdef __x86_64__
+ case 4:
+ dst.val = ((uint64_t)(int32_t)src.val *
+ (uint64_t)(int32_t)src1);
+ if ( (int32_t)dst.val != dst.val )
+ _regs.eflags |= EFLG_OF|EFLG_CF;
+ break;
+#endif
+ default: {
+ unsigned long m[2] = { src.val, src1 };
+ if ( imul_dbl(m) )
+ _regs.eflags |= EFLG_OF|EFLG_CF;
+ dst.val = m[0];
+ break;
+ }
+ }
+ break;
+ }
+
+ case 0x82: /* Grp1 (x86/32 only) */
+ generate_exception_if(mode_64bit(), EXC_UD, -1);
+ case 0x80: case 0x81: case 0x83: /* Grp1 */
+ switch ( modrm_reg & 7 )
+ {
+ case 0: goto add;
+ case 1: goto or;
+ case 2: goto adc;
+ case 3: goto sbb;
+ case 4: goto and;
+ case 5: goto sub;
+ case 6: goto xor;
+ case 7: goto cmp;
+ }
+ break;
+
+ case 0xa8 ... 0xa9: /* test imm,%%eax */
+ dst.reg = (unsigned long *)&_regs.eax;
+ dst.val = _regs.eax;
+ case 0x84 ... 0x85: test: /* test */
+ emulate_2op_SrcV("test", src, dst, _regs.eflags);
+ break;
+
+ case 0x86 ... 0x87: xchg: /* xchg */
+ /* Write back the register source. */
+ switch ( dst.bytes )
+ {
+ case 1: *(uint8_t *)src.reg = (uint8_t)dst.val; break;
+ case 2: *(uint16_t *)src.reg = (uint16_t)dst.val; break;
+ case 4: *src.reg = (uint32_t)dst.val; break; /* 64b reg: zero-extend */
+ case 8: *src.reg = dst.val; break;
+ }
+ /* Write back the memory destination with implicit LOCK prefix. */
+ dst.val = src.val;
+ lock_prefix = 1;
+ break;
+
+ case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
+ generate_exception_if((modrm_reg & 7) != 0, EXC_UD, -1);
+ case 0x88 ... 0x8b: /* mov */
+ dst.val = src.val;
+ break;
+
+ case 0x8c: /* mov Sreg,r/m */ {
+ struct segment_register reg;
+ enum x86_segment seg = decode_segment(modrm_reg);
+ generate_exception_if(seg == decode_segment_failed, EXC_UD, -1);
+ fail_if(ops->read_segment == NULL);
+ if ( (rc = ops->read_segment(seg, &reg, ctxt)) != 0 )
+ goto done;
+ dst.val = reg.sel;
+ if ( dst.type == OP_MEM )
+ dst.bytes = 2;
+ break;
+ }
+
+ case 0x8e: /* mov r/m,Sreg */ {
+ enum x86_segment seg = decode_segment(modrm_reg);
+ generate_exception_if(seg == decode_segment_failed, EXC_UD, -1);
+ if ( (rc = load_seg(seg, (uint16_t)src.val, ctxt, ops)) != 0 )
+ goto done;
+ if ( seg == x86_seg_ss )
+ ctxt->retire.flags.mov_ss = 1;
+ dst.type = OP_NONE;
+ break;
+ }
+
+ case 0x8d: /* lea */
+ dst.val = ea.mem.off;
+ break;
+
+ case 0x8f: /* pop (sole member of Grp1a) */
+ generate_exception_if((modrm_reg & 7) != 0, EXC_UD, -1);
+ /* 64-bit mode: POP defaults to a 64-bit operand. */
+ if ( mode_64bit() && (dst.bytes == 4) )
+ dst.bytes = 8;
+ if ( (rc = ops->read(x86_seg_ss, sp_post_inc(dst.bytes),
+ &dst.val, dst.bytes, ctxt)) != 0 )
+ goto done;
+ break;
+
+ case 0xb0 ... 0xb7: /* mov imm8,r8 */
+ dst.reg = decode_register(
+ (b & 7) | ((rex_prefix & 1) << 3), &_regs, (rex_prefix == 0));
+ dst.val = src.val;
+ break;
+
+ case 0xb8 ... 0xbf: /* mov imm{16,32,64},r{16,32,64} */
+ if ( dst.bytes == 8 ) /* Fetch more bytes to obtain imm64 */
+ src.val = ((uint32_t)src.val |
+ ((uint64_t)insn_fetch_type(uint32_t) << 32));
+ dst.reg = decode_register(
+ (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
+ dst.val = src.val;
+ break;
+
+ case 0xc0 ... 0xc1: grp2: /* Grp2 */
+ switch ( modrm_reg & 7 )
+ {
+ case 0: /* rol */
+ emulate_2op_SrcB("rol", src, dst, _regs.eflags);
+ break;
+ case 1: /* ror */
+ emulate_2op_SrcB("ror", src, dst, _regs.eflags);
+ break;
+ case 2: /* rcl */
+ emulate_2op_SrcB("rcl", src, dst, _regs.eflags);
+ break;
+ case 3: /* rcr */
+ emulate_2op_SrcB("rcr", src, dst, _regs.eflags);
+ break;
+ case 4: /* sal/shl */
+ case 6: /* sal/shl */
+ emulate_2op_SrcB("sal", src, dst, _regs.eflags);
+ break;
+ case 5: /* shr */
+ emulate_2op_SrcB("shr", src, dst, _regs.eflags);
+ break;
+ case 7: /* sar */
+ emulate_2op_SrcB("sar", src, dst, _regs.eflags);
+ break;
+ }
+ break;
+
+ case 0xc4: /* les */ {
+ unsigned long sel;
+ dst.val = x86_seg_es;
+ les: /* dst.val identifies the segment */
+ generate_exception_if(src.type != OP_MEM, EXC_UD, -1);
+ if ( (rc = ops->read(src.mem.seg, src.mem.off + src.bytes,
+ &sel, 2, ctxt)) != 0 )
+ goto done;
+ if ( (rc = load_seg(dst.val, (uint16_t)sel, ctxt, ops)) != 0 )
+ goto done;
+ dst.val = src.val;
+ break;
+ }
+
+ case 0xc5: /* lds */
+ dst.val = x86_seg_ds;
+ goto les;
+
+ case 0xd0 ... 0xd1: /* Grp2 */
+ src.val = 1;
+ goto grp2;
+
+ case 0xd2 ... 0xd3: /* Grp2 */
+ src.val = _regs.ecx;
+ goto grp2;
+
+ case 0xf6 ... 0xf7: /* Grp3 */
+ switch ( modrm_reg & 7 )
+ {
+ case 0 ... 1: /* test */
+ /* Special case in Grp3: test has an immediate source operand. */
+ src.type = OP_IMM;
+ src.bytes = (d & ByteOp) ? 1 : op_bytes;
+ if ( src.bytes == 8 ) src.bytes = 4;
+ switch ( src.bytes )
+ {
+ case 1: src.val = insn_fetch_type(int8_t); break;
+ case 2: src.val = insn_fetch_type(int16_t); break;
+ case 4: src.val = insn_fetch_type(int32_t); break;
+ }
+ goto test;
+ case 2: /* not */
+ dst.val = ~dst.val;
+ break;
+ case 3: /* neg */
+ emulate_1op("neg", dst, _regs.eflags);
+ break;
+ case 4: /* mul */
+ src = dst;
+ dst.type = OP_REG;
+ dst.reg = (unsigned long *)&_regs.eax;
+ dst.val = *dst.reg;
+ _regs.eflags &= ~(EFLG_OF|EFLG_CF);
+ switch ( src.bytes )
+ {
+ case 1:
+ dst.val = (uint8_t)dst.val;
+ dst.val *= src.val;
+ if ( (uint8_t)dst.val != (uint16_t)dst.val )
+ _regs.eflags |= EFLG_OF|EFLG_CF;
+ dst.bytes = 2;
+ break;
+ case 2:
+ dst.val = (uint16_t)dst.val;
+ dst.val *= src.val;
+ if ( (uint16_t)dst.val != (uint32_t)dst.val )
+ _regs.eflags |= EFLG_OF|EFLG_CF;
+ *(uint16_t *)&_regs.edx = dst.val >> 16;
+ break;
+#ifdef __x86_64__
+ case 4:
+ dst.val = (uint32_t)dst.val;
+ dst.val *= src.val;
+ if ( (uint32_t)dst.val != dst.val )
+ _regs.eflags |= EFLG_OF|EFLG_CF;
+ _regs.edx = (uint32_t)(dst.val >> 32);
+ break;
+#endif
+ default: {
+ unsigned long m[2] = { src.val, dst.val };
+ if ( mul_dbl(m) )
+ _regs.eflags |= EFLG_OF|EFLG_CF;
+ _regs.edx = m[1];
+ dst.val = m[0];
+ break;
+ }
+ }
+ break;
+ case 5: /* imul */
+ src = dst;
+ dst.type = OP_REG;
+ dst.reg = (unsigned long *)&_regs.eax;
+ dst.val = *dst.reg;
+ _regs.eflags &= ~(EFLG_OF|EFLG_CF);
+ switch ( src.bytes )
+ {
+ case 1:
+ dst.val = ((uint16_t)(int8_t)src.val *
+ (uint16_t)(int8_t)dst.val);
+ if ( (int8_t)dst.val != (uint16_t)dst.val )
+ _regs.eflags |= EFLG_OF|EFLG_CF;
+ dst.bytes = 2;
+ break;
+ case 2:
+ dst.val = ((uint32_t)(int16_t)src.val *
+ (uint32_t)(int16_t)dst.val);
+ if ( (int16_t)dst.val != (uint32_t)dst.val )
+ _regs.eflags |= EFLG_OF|EFLG_CF;
+ *(uint16_t *)&_regs.edx = dst.val >> 16;
+ break;
+#ifdef __x86_64__
+ case 4:
+ dst.val = ((uint64_t)(int32_t)src.val *
+ (uint64_t)(int32_t)dst.val);
+ if ( (int32_t)dst.val != dst.val )
+ _regs.eflags |= EFLG_OF|EFLG_CF;
+ _regs.edx = (uint32_t)(dst.val >> 32);
+ break;
+#endif
+ default: {
+ unsigned long m[2] = { src.val, dst.val };
+ if ( imul_dbl(m) )
+ _regs.eflags |= EFLG_OF|EFLG_CF;
+ _regs.edx = m[1];
+ dst.val = m[0];
+ break;
+ }
+ }
+ break;
+ case 6: /* div */ {
+ unsigned long u[2], v;
+ src = dst;
+ dst.type = OP_REG;
+ dst.reg = (unsigned long *)&_regs.eax;
+ switch ( src.bytes )
+ {
+ case 1:
+ u[0] = (uint16_t)_regs.eax;
+ u[1] = 0;
+ v = (uint8_t)src.val;
+ generate_exception_if(
+ div_dbl(u, v) || ((uint8_t)u[0] != (uint16_t)u[0]),
+ EXC_DE, -1);
+ dst.val = (uint8_t)u[0];
+ ((uint8_t *)&_regs.eax)[1] = u[1];
+ break;
+ case 2:
+ u[0] = ((uint32_t)_regs.edx << 16) | (uint16_t)_regs.eax;
+ u[1] = 0;
+ v = (uint16_t)src.val;
+ generate_exception_if(
+ div_dbl(u, v) || ((uint16_t)u[0] != (uint32_t)u[0]),
+ EXC_DE, -1);
+ dst.val = (uint16_t)u[0];
+ *(uint16_t *)&_regs.edx = u[1];
+ break;
+#ifdef __x86_64__
+ case 4:
+ u[0] = (_regs.edx << 32) | (uint32_t)_regs.eax;
+ u[1] = 0;
+ v = (uint32_t)src.val;
+ generate_exception_if(
+ div_dbl(u, v) || ((uint32_t)u[0] != u[0]),
+ EXC_DE, -1);
+ dst.val = (uint32_t)u[0];
+ _regs.edx = (uint32_t)u[1];
+ break;
+#endif
+ default:
+ u[0] = _regs.eax;
+ u[1] = _regs.edx;
+ v = src.val;
+ generate_exception_if(div_dbl(u, v), EXC_DE, -1);
+ dst.val = u[0];
+ _regs.edx = u[1];
+ break;
+ }
+ break;
+ }
+ case 7: /* idiv */ {
+ unsigned long u[2], v;
+ src = dst;
+ dst.type = OP_REG;
+ dst.reg = (unsigned long *)&_regs.eax;
+ switch ( src.bytes )
+ {
+ case 1:
+ u[0] = (int16_t)_regs.eax;
+ u[1] = ((long)u[0] < 0) ? ~0UL : 0UL;
+ v = (int8_t)src.val;
+ generate_exception_if(
+ idiv_dbl(u, v) || ((int8_t)u[0] != (int16_t)u[0]),
+ EXC_DE, -1);
+ dst.val = (int8_t)u[0];
+ ((int8_t *)&_regs.eax)[1] = u[1];
+ break;
+ case 2:
+ u[0] = (int32_t)((_regs.edx << 16) | (uint16_t)_regs.eax);
+ u[1] = ((long)u[0] < 0) ? ~0UL : 0UL;
+ v = (int16_t)src.val;
+ generate_exception_if(
+ idiv_dbl(u, v) || ((int16_t)u[0] != (int32_t)u[0]),
+ EXC_DE, -1);
+ dst.val = (int16_t)u[0];
+ *(int16_t *)&_regs.edx = u[1];
+ break;
+#ifdef __x86_64__
+ case 4:
+ u[0] = (_regs.edx << 32) | (uint32_t)_regs.eax;
+ u[1] = ((long)u[0] < 0) ? ~0UL : 0UL;
+ v = (int32_t)src.val;
+ generate_exception_if(
+ idiv_dbl(u, v) || ((int32_t)u[0] != u[0]),
+ EXC_DE, -1);
+ dst.val = (int32_t)u[0];
+ _regs.edx = (uint32_t)u[1];
+ break;
+#endif
+ default:
+ u[0] = _regs.eax;
+ u[1] = _regs.edx;
+ v = src.val;
+ generate_exception_if(idiv_dbl(u, v), EXC_DE, -1);
+ dst.val = u[0];
+ _regs.edx = u[1];
+ break;
+ }
+ break;
+ }
+ default:
+ goto cannot_emulate;
+ }
+ break;
+
+ case 0xfe: /* Grp4 */
+ generate_exception_if((modrm_reg & 7) >= 2, EXC_UD, -1);
+ case 0xff: /* Grp5 */
+ switch ( modrm_reg & 7 )
+ {
+ case 0: /* inc */
+ emulate_1op("inc", dst, _regs.eflags);
+ break;
+ case 1: /* dec */
+ emulate_1op("dec", dst, _regs.eflags);
+ break;
+ case 2: /* call (near) */
+ case 4: /* jmp (near) */
+ if ( (dst.bytes != 8) && mode_64bit() )
+ {
+ dst.bytes = op_bytes = 8;
+ if ( dst.type == OP_REG )
+ dst.val = *dst.reg;
+ else if ( (rc = ops->read(dst.mem.seg, dst.mem.off,
+ &dst.val, 8, ctxt)) != 0 )
+ goto done;
+ }
+ src.val = _regs.eip;
+ _regs.eip = dst.val;
+ if ( (modrm_reg & 7) == 2 )
+ goto push; /* call */
+ dst.type = OP_NONE;
+ break;
+ case 3: /* call (far, absolute indirect) */
+ case 5: /* jmp (far, absolute indirect) */ {
+ unsigned long sel;
+
+ generate_exception_if(dst.type != OP_MEM, EXC_UD, -1);
+
+ if ( (rc = ops->read(dst.mem.seg, dst.mem.off+dst.bytes,
+ &sel, 2, ctxt)) )
+ goto done;
+
+ if ( (modrm_reg & 7) == 3 ) /* call */
+ {
+ struct segment_register reg;
+ fail_if(ops->read_segment == NULL);
+ if ( (rc = ops->read_segment(x86_seg_cs, &reg, ctxt)) ||
+ (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
+ reg.sel, op_bytes, ctxt)) ||
+ (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
+ _regs.eip, op_bytes, ctxt)) )
+ goto done;
+ }
+
+ if ( (rc = load_seg(x86_seg_cs, sel, ctxt, ops)) != 0 )
+ goto done;
+ _regs.eip = dst.val;
+
+ dst.type = OP_NONE;
+ break;
+ }
+ case 6: /* push */
+ /* 64-bit mode: PUSH defaults to a 64-bit operand. */
+ if ( mode_64bit() && (dst.bytes == 4) )
+ {
+ dst.bytes = 8;
+ if ( dst.type == OP_REG )
+ dst.val = *dst.reg;
+ else if ( (rc = ops->read(dst.mem.seg, dst.mem.off,
+ &dst.val, 8, ctxt)) != 0 )
+ goto done;
+ }
+ if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes),
+ dst.val, dst.bytes, ctxt)) != 0 )
+ goto done;
+ dst.type = OP_NONE;
+ break;
+ case 7:
+ generate_exception_if(1, EXC_UD, -1);
+ default:
+ goto cannot_emulate;
+ }
+ break;
+ }
+
+ writeback:
+ switch ( dst.type )
+ {
+ case OP_REG:
+ /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
+ switch ( dst.bytes )
+ {
+ case 1: *(uint8_t *)dst.reg = (uint8_t)dst.val; break;
+ case 2: *(uint16_t *)dst.reg = (uint16_t)dst.val; break;
+ case 4: *dst.reg = (uint32_t)dst.val; break; /* 64b: zero-ext */
+ case 8: *dst.reg = dst.val; break;
+ }
+ break;
+ case OP_MEM:
+ if ( !(d & Mov) && (dst.orig_val == dst.val) &&
+ !ctxt->force_writeback )
+ /* nothing to do */;
+ else if ( lock_prefix )
+ rc = ops->cmpxchg(
+ dst.mem.seg, dst.mem.off, dst.orig_val,
+ dst.val, dst.bytes, ctxt);
+ else
+ rc = ops->write(
+ dst.mem.seg, dst.mem.off, dst.val, dst.bytes, ctxt);
+ if ( rc != 0 )
+ goto done;
+ default:
+ break;
+ }
+
+ /* Commit shadow register state. */
+ _regs.eflags &= ~EFLG_RF;
+ *ctxt->regs = _regs;
+ if ( (_regs.eflags & EFLG_TF) && (rc == X86EMUL_OKAY) &&
+ (ops->inject_hw_exception != NULL) )
+ rc = ops->inject_hw_exception(EXC_DB, -1, ctxt) ? : X86EMUL_EXCEPTION;
+
+ done:
+ return rc;
+
+ special_insn:
+ dst.type = OP_NONE;
+
+ /*
+ * The only implicit-operands instructions allowed a LOCK prefix are
+ * CMPXCHG{8,16}B, MOV CRn, MOV DRn.
+ */
+ generate_exception_if(lock_prefix &&
+ ((b < 0x20) || (b > 0x23)) && /* MOV CRn/DRn */
+ (b != 0xc7), /* CMPXCHG{8,16}B */
+ EXC_GP, 0);
+
+ if ( twobyte )
+ goto twobyte_special_insn;
+
+ switch ( b )
+ {
+ case 0x06: /* push %%es */ {
+ struct segment_register reg;
+ src.val = x86_seg_es;
+ push_seg:
+ fail_if(ops->read_segment == NULL);
+ if ( (rc = ops->read_segment(src.val, &reg, ctxt)) != 0 )
+ return rc;
+ /* 64-bit mode: PUSH defaults to a 64-bit operand. */
+ if ( mode_64bit() && (op_bytes == 4) )
+ op_bytes = 8;
+ if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
+ reg.sel, op_bytes, ctxt)) != 0 )
+ goto done;
+ break;
+ }
+
+ case 0x07: /* pop %%es */
+ src.val = x86_seg_es;
+ pop_seg:
+ fail_if(ops->write_segment == NULL);
+ /* 64-bit mode: POP defaults to a 64-bit operand. */
+ if ( mode_64bit() && (op_bytes == 4) )
+ op_bytes = 8;
+ if ( (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes),
+ &dst.val, op_bytes, ctxt)) != 0 )
+ goto done;
+ if ( (rc = load_seg(src.val, (uint16_t)dst.val, ctxt, ops)) != 0 )
+ return rc;
+ break;
+
+ case 0x0e: /* push %%cs */
+ src.val = x86_seg_cs;
+ goto push_seg;
+
+ case 0x16: /* push %%ss */
+ src.val = x86_seg_ss;
+ goto push_seg;
+
+ case 0x17: /* pop %%ss */
+ src.val = x86_seg_ss;
+ ctxt->retire.flags.mov_ss = 1;
+ goto pop_seg;
+
+ case 0x1e: /* push %%ds */
+ src.val = x86_seg_ds;
+ goto push_seg;
+
+ case 0x1f: /* pop %%ds */
+ src.val = x86_seg_ds;
+ goto pop_seg;
+
+ case 0x27: /* daa */ {
+ uint8_t al = _regs.eax;
+ unsigned long eflags = _regs.eflags;
+ generate_exception_if(mode_64bit(), EXC_UD, -1);
+ _regs.eflags &= ~(EFLG_CF|EFLG_AF);
+ if ( ((al & 0x0f) > 9) || (eflags & EFLG_AF) )
+ {
+ *(uint8_t *)&_regs.eax += 6;
+ _regs.eflags |= EFLG_AF;
+ }
+ if ( (al > 0x99) || (eflags & EFLG_CF) )
+ {
+ *(uint8_t *)&_regs.eax += 0x60;
+ _regs.eflags |= EFLG_CF;
+ }
+ _regs.eflags &= ~(EFLG_SF|EFLG_ZF|EFLG_PF);
+ _regs.eflags |= ((uint8_t)_regs.eax == 0) ? EFLG_ZF : 0;
+ _regs.eflags |= (( int8_t)_regs.eax < 0) ? EFLG_SF : 0;
+ _regs.eflags |= even_parity(_regs.eax) ? EFLG_PF : 0;
+ break;
+ }
+
+ case 0x2f: /* das */ {
+ uint8_t al = _regs.eax;
+ unsigned long eflags = _regs.eflags;
+ generate_exception_if(mode_64bit(), EXC_UD, -1);
+ _regs.eflags &= ~(EFLG_CF|EFLG_AF);
+ if ( ((al & 0x0f) > 9) || (eflags & EFLG_AF) )
+ {
+ _regs.eflags |= EFLG_AF;
+ if ( (al < 6) || (eflags & EFLG_CF) )
+ _regs.eflags |= EFLG_CF;
+ *(uint8_t *)&_regs.eax -= 6;
+ }
+ if ( (al > 0x99) || (eflags & EFLG_CF) )
+ {
+ *(uint8_t *)&_regs.eax -= 0x60;
+ _regs.eflags |= EFLG_CF;
+ }
+ _regs.eflags &= ~(EFLG_SF|EFLG_ZF|EFLG_PF);
+ _regs.eflags |= ((uint8_t)_regs.eax == 0) ? EFLG_ZF : 0;
+ _regs.eflags |= (( int8_t)_regs.eax < 0) ? EFLG_SF : 0;
+ _regs.eflags |= even_parity(_regs.eax) ? EFLG_PF : 0;
+ break;
+ }
+
+ case 0x37: /* aaa */
+ case 0x3f: /* aas */
+ generate_exception_if(mode_64bit(), EXC_UD, -1);
+ _regs.eflags &= ~EFLG_CF;
+ if ( ((uint8_t)_regs.eax > 9) || (_regs.eflags & EFLG_AF) )
+ {
+ ((uint8_t *)&_regs.eax)[0] += (b == 0x37) ? 6 : -6;
+ ((uint8_t *)&_regs.eax)[1] += (b == 0x37) ? 1 : -1;
+ _regs.eflags |= EFLG_CF | EFLG_AF;
+ }
+ ((uint8_t *)&_regs.eax)[0] &= 0x0f;
+ break;
+
+ case 0x40 ... 0x4f: /* inc/dec reg */
+ dst.type = OP_REG;
+ dst.reg = decode_register(b & 7, &_regs, 0);
+ dst.bytes = op_bytes;
+ dst.val = *dst.reg;
+ if ( b & 8 )
+ emulate_1op("dec", dst, _regs.eflags);
+ else
+ emulate_1op("inc", dst, _regs.eflags);
+ break;
+
+ case 0x50 ... 0x57: /* push reg */
+ src.val = *(unsigned long *)decode_register(
+ (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
+ goto push;
+
+ case 0x58 ... 0x5f: /* pop reg */
+ dst.type = OP_REG;
+ dst.reg = decode_register(
+ (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
+ dst.bytes = op_bytes;
+ if ( mode_64bit() && (dst.bytes == 4) )
+ dst.bytes = 8;
+ if ( (rc = ops->read(x86_seg_ss, sp_post_inc(dst.bytes),
+ &dst.val, dst.bytes, ctxt)) != 0 )
+ goto done;
+ break;
+
+ case 0x60: /* pusha */ {
+ int i;
+ unsigned long regs[] = {
+ _regs.eax, _regs.ecx, _regs.edx, _regs.ebx,
+ _regs.esp, _regs.ebp, _regs.esi, _regs.edi };
+ generate_exception_if(mode_64bit(), EXC_UD, -1);
+ for ( i = 0; i < 8; i++ )
+ if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
+ regs[i], op_bytes, ctxt)) != 0 )
+ goto done;
+ break;
+ }
+
+ case 0x61: /* popa */ {
+ int i;
+ unsigned long dummy_esp, *regs[] = {
+ (unsigned long *)&_regs.edi, (unsigned long *)&_regs.esi,
+ (unsigned long *)&_regs.ebp, (unsigned long *)&dummy_esp,
+ (unsigned long *)&_regs.ebx, (unsigned long *)&_regs.edx,
+ (unsigned long *)&_regs.ecx, (unsigned long *)&_regs.eax };
+ generate_exception_if(mode_64bit(), EXC_UD, -1);
+ for ( i = 0; i < 8; i++ )
+ {
+ if ( (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes),
+ &dst.val, op_bytes, ctxt)) != 0 )
+ goto done;
+ switch ( op_bytes )
+ {
+ case 1: *(uint8_t *)regs[i] = (uint8_t)dst.val; break;
+ case 2: *(uint16_t *)regs[i] = (uint16_t)dst.val; break;
+ case 4: *regs[i] = (uint32_t)dst.val; break; /* 64b: zero-ext */
+ case 8: *regs[i] = dst.val; break;
+ }
+ }
+ break;
+ }
+
+ case 0x68: /* push imm{16,32,64} */
+ src.val = ((op_bytes == 2)
+ ? (int32_t)insn_fetch_type(int16_t)
+ : insn_fetch_type(int32_t));
+ goto push;
+
+ case 0x6a: /* push imm8 */
+ src.val = insn_fetch_type(int8_t);
+ push:
+ d |= Mov; /* force writeback */
+ dst.type = OP_MEM;
+ dst.bytes = op_bytes;
+ if ( mode_64bit() && (dst.bytes == 4) )
+ dst.bytes = 8;
+ dst.val = src.val;
+ dst.mem.seg = x86_seg_ss;
+ dst.mem.off = sp_pre_dec(dst.bytes);
+ break;
+
+ case 0x6c ... 0x6d: /* ins %dx,%es:%edi */ {
+ unsigned long nr_reps = get_rep_prefix();
+ unsigned int port = (uint16_t)_regs.edx;
+ dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
+ dst.mem.seg = x86_seg_es;
+ dst.mem.off = truncate_ea(_regs.edi);
+ if ( (rc = ioport_access_check(port, dst.bytes, ctxt, ops)) != 0 )
+ goto done;
+ if ( (nr_reps > 1) && (ops->rep_ins != NULL) &&
+ ((rc = ops->rep_ins(port, dst.mem.seg, dst.mem.off, dst.bytes,
+ &nr_reps, ctxt)) != X86EMUL_UNHANDLEABLE) )
+ {
+ if ( rc != 0 )
+ goto done;
+ }
+ else
+ {
+ fail_if(ops->read_io == NULL);
+ if ( (rc = ops->read_io(port, dst.bytes, &dst.val, ctxt)) != 0 )
+ goto done;
+ dst.type = OP_MEM;
+ nr_reps = 1;
+ }
+ register_address_increment(
+ _regs.edi,
+ nr_reps * ((_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes));
+ put_rep_prefix(nr_reps);
+ break;
+ }
+
+ case 0x6e ... 0x6f: /* outs %esi,%dx */ {
+ unsigned long nr_reps = get_rep_prefix();
+ unsigned int port = (uint16_t)_regs.edx;
+ dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
+ if ( (rc = ioport_access_check(port, dst.bytes, ctxt, ops)) != 0 )
+ goto done;
+ if ( (nr_reps > 1) && (ops->rep_outs != NULL) &&
+ ((rc = ops->rep_outs(ea.mem.seg, truncate_ea(_regs.esi),
+ port, dst.bytes,
+ &nr_reps, ctxt)) != X86EMUL_UNHANDLEABLE) )
+ {
+ if ( rc != 0 )
+ goto done;
+ }
+ else
+ {
+ if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.esi),
+ &dst.val, dst.bytes, ctxt)) != 0 )
+ goto done;
+ fail_if(ops->write_io == NULL);
+ if ( (rc = ops->write_io(port, dst.bytes, dst.val, ctxt)) != 0 )
+ goto done;
+ nr_reps = 1;
+ }
+ register_address_increment(
+ _regs.esi,
+ nr_reps * ((_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes));
+ put_rep_prefix(nr_reps);
+ break;
+ }
+
+ case 0x70 ... 0x7f: /* jcc (short) */ {
+ int rel = insn_fetch_type(int8_t);
+ if ( test_cc(b, _regs.eflags) )
+ jmp_rel(rel);
+ break;
+ }
+
+ case 0x90: /* nop / xchg %%r8,%%rax */
+ if ( !(rex_prefix & 1) )
+ break; /* nop */
+
+ case 0x91 ... 0x97: /* xchg reg,%%rax */
+ src.type = dst.type = OP_REG;
+ src.bytes = dst.bytes = op_bytes;
+ src.reg = (unsigned long *)&_regs.eax;
+ src.val = *src.reg;
+ dst.reg = decode_register(
+ (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
+ dst.val = *dst.reg;
+ goto xchg;
+
+ case 0x98: /* cbw/cwde/cdqe */
+ switch ( op_bytes )
+ {
+ case 2: *(int16_t *)&_regs.eax = (int8_t)_regs.eax; break; /* cbw */
+ case 4: _regs.eax = (uint32_t)(int16_t)_regs.eax; break; /* cwde */
+ case 8: _regs.eax = (int32_t)_regs.eax; break; /* cdqe */
+ }
+ break;
+
+ case 0x99: /* cwd/cdq/cqo */
+ switch ( op_bytes )
+ {
+ case 2:
+ *(int16_t *)&_regs.edx = ((int16_t)_regs.eax < 0) ? -1 : 0;
+ break;
+ case 4:
+ _regs.edx = (uint32_t)(((int32_t)_regs.eax < 0) ? -1 : 0);
+ break;
+ case 8:
+ _regs.edx = (_regs.eax < 0) ? -1 : 0;
+ break;
+ }
+ break;
+
+ case 0x9a: /* call (far, absolute) */ {
+ struct segment_register reg;
+ uint16_t sel;
+ uint32_t eip;
+
+ fail_if(ops->read_segment == NULL);
+ generate_exception_if(mode_64bit(), EXC_UD, -1);
+
+ eip = insn_fetch_bytes(op_bytes);
+ sel = insn_fetch_type(uint16_t);
+
+ if ( (rc = ops->read_segment(x86_seg_cs, &reg, ctxt)) ||
+ (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
+ reg.sel, op_bytes, ctxt)) ||
+ (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
+ _regs.eip, op_bytes, ctxt)) )
+ goto done;
+
+ if ( (rc = load_seg(x86_seg_cs, sel, ctxt, ops)) != 0 )
+ goto done;
+ _regs.eip = eip;
+ break;
+ }
+
+ case 0x9b: /* wait/fwait */
+ fail_if(ops->load_fpu_ctxt == NULL);
+ ops->load_fpu_ctxt(ctxt);
+ __emulate_fpu_insn("fwait");
+ break;
+
+ case 0x9c: /* pushf */
+ src.val = _regs.eflags;
+ goto push;
+
+ case 0x9d: /* popf */ {
+ uint32_t mask = EFLG_VIP | EFLG_VIF | EFLG_VM;
+ if ( !mode_ring0() )
+ mask |= EFLG_IOPL;
+ if ( !mode_iopl() )
+ mask |= EFLG_IF;
+ /* 64-bit mode: POP defaults to a 64-bit operand. */
+ if ( mode_64bit() && (op_bytes == 4) )
+ op_bytes = 8;
+ if ( (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes),
+ &dst.val, op_bytes, ctxt)) != 0 )
+ goto done;
+ if ( op_bytes == 2 )
+ dst.val = (uint16_t)dst.val | (_regs.eflags & 0xffff0000u);
+ dst.val &= 0x257fd5;
+ _regs.eflags &= mask;
+ _regs.eflags |= (uint32_t)(dst.val & ~mask) | 0x02;
+ break;
+ }
+
+ case 0x9e: /* sahf */
+ *(uint8_t *)&_regs.eflags = (((uint8_t *)&_regs.eax)[1] & 0xd7) | 0x02;
+ break;
+
+ case 0x9f: /* lahf */
+ ((uint8_t *)&_regs.eax)[1] = (_regs.eflags & 0xd7) | 0x02;
+ break;
+
+ case 0xa0 ... 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */
+ /* Source EA is not encoded via ModRM. */
+ dst.type = OP_REG;
+ dst.reg = (unsigned long *)&_regs.eax;
+ dst.bytes = (d & ByteOp) ? 1 : op_bytes;
+ if ( (rc = ops->read(ea.mem.seg, insn_fetch_bytes(ad_bytes),
+ &dst.val, dst.bytes, ctxt)) != 0 )
+ goto done;
+ break;
+
+ case 0xa2 ... 0xa3: /* mov {%al,%ax,%eax,%rax},mem.offs */
+ /* Destination EA is not encoded via ModRM. */
+ dst.type = OP_MEM;
+ dst.mem.seg = ea.mem.seg;
+ dst.mem.off = insn_fetch_bytes(ad_bytes);
+ dst.bytes = (d & ByteOp) ? 1 : op_bytes;
+ dst.val = (unsigned long)_regs.eax;
+ break;
+
+ case 0xa4 ... 0xa5: /* movs */ {
+ unsigned long nr_reps = get_rep_prefix();
+ dst.bytes = (d & ByteOp) ? 1 : op_bytes;
+ dst.mem.seg = x86_seg_es;
+ dst.mem.off = truncate_ea(_regs.edi);
+ if ( (nr_reps > 1) && (ops->rep_movs != NULL) &&
+ ((rc = ops->rep_movs(ea.mem.seg, truncate_ea(_regs.esi),
+ dst.mem.seg, dst.mem.off, dst.bytes,
+ &nr_reps, ctxt)) != X86EMUL_UNHANDLEABLE) )
+ {
+ if ( rc != 0 )
+ goto done;
+ }
+ else
+ {
+ if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.esi),
+ &dst.val, dst.bytes, ctxt)) != 0 )
+ goto done;
+ dst.type = OP_MEM;
+ nr_reps = 1;
+ }
+ register_address_increment(
+ _regs.esi,
+ nr_reps * ((_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes));
+ register_address_increment(
+ _regs.edi,
+ nr_reps * ((_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes));
+ put_rep_prefix(nr_reps);
+ break;
+ }
+
+ case 0xa6 ... 0xa7: /* cmps */ {
+ unsigned long next_eip = _regs.eip;
+ get_rep_prefix();
+ src.bytes = dst.bytes = (d & ByteOp) ? 1 : op_bytes;
+ if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.esi),
+ &dst.val, dst.bytes, ctxt)) ||
+ (rc = ops->read(x86_seg_es, truncate_ea(_regs.edi),
+ &src.val, src.bytes, ctxt)) )
+ goto done;
+ register_address_increment(
+ _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+ register_address_increment(
+ _regs.edi, (_regs.eflags & EFLG_DF) ? -src.bytes : src.bytes);
+ put_rep_prefix(1);
+ /* cmp: dst - src ==> src=*%%edi,dst=*%%esi ==> *%%esi - *%%edi */
+ emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
+ if ( ((rep_prefix == REPE_PREFIX) && !(_regs.eflags & EFLG_ZF)) ||
+ ((rep_prefix == REPNE_PREFIX) && (_regs.eflags & EFLG_ZF)) )
+ _regs.eip = next_eip;
+ break;
+ }
+
+ case 0xaa ... 0xab: /* stos */ {
+ /* unsigned long max_reps = */get_rep_prefix();
+ dst.type = OP_MEM;
+ dst.bytes = (d & ByteOp) ? 1 : op_bytes;
+ dst.mem.seg = x86_seg_es;
+ dst.mem.off = truncate_ea(_regs.edi);
+ dst.val = _regs.eax;
+ register_address_increment(
+ _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+ put_rep_prefix(1);
+ break;
+ }
+
+ case 0xac ... 0xad: /* lods */ {
+ /* unsigned long max_reps = */get_rep_prefix();
+ dst.type = OP_REG;
+ dst.bytes = (d & ByteOp) ? 1 : op_bytes;
+ dst.reg = (unsigned long *)&_regs.eax;
+ if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.esi),
+ &dst.val, dst.bytes, ctxt)) != 0 )
+ goto done;
+ register_address_increment(
+ _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+ put_rep_prefix(1);
+ break;
+ }
+
+ case 0xae ... 0xaf: /* scas */ {
+ unsigned long next_eip = _regs.eip;
+ get_rep_prefix();
+ src.bytes = dst.bytes = (d & ByteOp) ? 1 : op_bytes;
+ dst.val = _regs.eax;
+ if ( (rc = ops->read(x86_seg_es, truncate_ea(_regs.edi),
+ &src.val, src.bytes, ctxt)) != 0 )
+ goto done;
+ register_address_increment(
+ _regs.edi, (_regs.eflags & EFLG_DF) ? -src.bytes : src.bytes);
+ put_rep_prefix(1);
+ /* cmp: dst - src ==> src=*%%edi,dst=%%eax ==> %%eax - *%%edi */
+ emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
+ if ( ((rep_prefix == REPE_PREFIX) && !(_regs.eflags & EFLG_ZF)) ||
+ ((rep_prefix == REPNE_PREFIX) && (_regs.eflags & EFLG_ZF)) )
+ _regs.eip = next_eip;
+ break;
+ }
+
+ case 0xc2: /* ret imm16 (near) */
+ case 0xc3: /* ret (near) */ {
+ int offset = (b == 0xc2) ? insn_fetch_type(uint16_t) : 0;
+ op_bytes = mode_64bit() ? 8 : op_bytes;
+ if ( (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes + offset),
+ &dst.val, op_bytes, ctxt)) != 0 )
+ goto done;
+ _regs.eip = dst.val;
+ break;
+ }
+
+ case 0xc8: /* enter imm16,imm8 */ {
+ uint16_t size = insn_fetch_type(uint16_t);
+ uint8_t depth = insn_fetch_type(uint8_t) & 31;
+ int i;
+
+ dst.type = OP_REG;
+ dst.bytes = (mode_64bit() && (op_bytes == 4)) ? 8 : op_bytes;
+ dst.reg = (unsigned long *)&_regs.ebp;
+ if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes),
+ _regs.ebp, dst.bytes, ctxt)) )
+ goto done;
+ dst.val = _regs.esp;
+
+ if ( depth > 0 )
+ {
+ for ( i = 1; i < depth; i++ )
+ {
+ unsigned long ebp, temp_data;
+ ebp = truncate_word(_regs.ebp - i*dst.bytes, ctxt->sp_size/8);
+ if ( (rc = ops->read(x86_seg_ss, ebp,
+ &temp_data, dst.bytes, ctxt)) ||
+ (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes),
+ temp_data, dst.bytes, ctxt)) )
+ goto done;
+ }
+ if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes),
+ dst.val, dst.bytes, ctxt)) )
+ goto done;
+ }
+
+ sp_pre_dec(size);
+ break;
+ }
+
+ case 0xc9: /* leave */
+ /* First writeback, to %%esp. */
+ dst.type = OP_REG;
+ dst.bytes = (mode_64bit() && (op_bytes == 4)) ? 8 : op_bytes;
+ dst.reg = (unsigned long *)&_regs.esp;
+ dst.val = _regs.ebp;
+
+ /* Flush first writeback, since there is a second. */
+ switch ( dst.bytes )
+ {
+ case 1: *(uint8_t *)dst.reg = (uint8_t)dst.val; break;
+ case 2: *(uint16_t *)dst.reg = (uint16_t)dst.val; break;
+ case 4: *dst.reg = (uint32_t)dst.val; break; /* 64b: zero-ext */
+ case 8: *dst.reg = dst.val; break;
+ }
+
+ /* Second writeback, to %%ebp. */
+ dst.reg = (unsigned long *)&_regs.ebp;
+ if ( (rc = ops->read(x86_seg_ss, sp_post_inc(dst.bytes),
+ &dst.val, dst.bytes, ctxt)) )
+ goto done;
+ break;
+
+ case 0xca: /* ret imm16 (far) */
+ case 0xcb: /* ret (far) */ {
+ int offset = (b == 0xca) ? insn_fetch_type(uint16_t) : 0;
+ op_bytes = mode_64bit() ? 8 : op_bytes;
+ if ( (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes),
+ &dst.val, op_bytes, ctxt)) ||
+ (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes + offset),
+ &src.val, op_bytes, ctxt)) ||
+ (rc = load_seg(x86_seg_cs, (uint16_t)src.val, ctxt, ops)) )
+ goto done;
+ _regs.eip = dst.val;
+ break;
+ }
+
+ case 0xcc: /* int3 */
+ src.val = EXC_BP;
+ goto swint;
+
+ case 0xcd: /* int imm8 */
+ src.val = insn_fetch_type(uint8_t);
+ swint:
+ fail_if(ops->inject_sw_interrupt == NULL);
+ rc = ops->inject_sw_interrupt(src.val, _regs.eip - ctxt->regs->eip,
+ ctxt) ? : X86EMUL_EXCEPTION;
+ goto done;
+
+ case 0xce: /* into */
+ generate_exception_if(mode_64bit(), EXC_UD, -1);
+ if ( !(_regs.eflags & EFLG_OF) )
+ break;
+ src.val = EXC_OF;
+ goto swint;
+
+ case 0xcf: /* iret */ {
+ unsigned long cs, eip, eflags;
+ uint32_t mask = EFLG_VIP | EFLG_VIF | EFLG_VM;
+ if ( !mode_ring0() )
+ mask |= EFLG_IOPL;
+ if ( !mode_iopl() )
+ mask |= EFLG_IF;
+ fail_if(!in_realmode(ctxt, ops));
+ if ( (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes),
+ &eip, op_bytes, ctxt)) ||
+ (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes),
+ &cs, op_bytes, ctxt)) ||
+ (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes),
+ &eflags, op_bytes, ctxt)) )
+ goto done;
+ if ( op_bytes == 2 )
+ eflags = (uint16_t)eflags | (_regs.eflags & 0xffff0000u);
+ eflags &= 0x257fd5;
+ _regs.eflags &= mask;
+ _regs.eflags |= (uint32_t)(eflags & ~mask) | 0x02;
+ _regs.eip = eip;
+ if ( (rc = load_seg(x86_seg_cs, (uint16_t)cs, ctxt, ops)) != 0 )
+ goto done;
+ break;
+ }
+
+ case 0xd4: /* aam */ {
+ unsigned int base = insn_fetch_type(uint8_t);
+ uint8_t al = _regs.eax;
+ generate_exception_if(mode_64bit(), EXC_UD, -1);
+ generate_exception_if(base == 0, EXC_DE, -1);
+ *(uint16_t *)&_regs.eax = ((al / base) << 8) | (al % base);
+ _regs.eflags &= ~(EFLG_SF|EFLG_ZF|EFLG_PF);
+ _regs.eflags |= ((uint8_t)_regs.eax == 0) ? EFLG_ZF : 0;
+ _regs.eflags |= (( int8_t)_regs.eax < 0) ? EFLG_SF : 0;
+ _regs.eflags |= even_parity(_regs.eax) ? EFLG_PF : 0;
+ break;
+ }
+
+ case 0xd5: /* aad */ {
+ unsigned int base = insn_fetch_type(uint8_t);
+ uint16_t ax = _regs.eax;
+ generate_exception_if(mode_64bit(), EXC_UD, -1);
+ *(uint16_t *)&_regs.eax = (uint8_t)(ax + ((ax >> 8) * base));
+ _regs.eflags &= ~(EFLG_SF|EFLG_ZF|EFLG_PF);
+ _regs.eflags |= ((uint8_t)_regs.eax == 0) ? EFLG_ZF : 0;
+ _regs.eflags |= (( int8_t)_regs.eax < 0) ? EFLG_SF : 0;
+ _regs.eflags |= even_parity(_regs.eax) ? EFLG_PF : 0;
+ break;
+ }
+
+ case 0xd6: /* salc */
+ generate_exception_if(mode_64bit(), EXC_UD, -1);
+ *(uint8_t *)&_regs.eax = (_regs.eflags & EFLG_CF) ? 0xff : 0x00;
+ break;
+
+ case 0xd7: /* xlat */ {
+ unsigned long al = (uint8_t)_regs.eax;
+ if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.ebx + al),
+ &al, 1, ctxt)) != 0 )
+ goto done;
+ *(uint8_t *)&_regs.eax = al;
+ break;
+ }
+
+ case 0xd9: /* FPU 0xd9 */
+ fail_if(ops->load_fpu_ctxt == NULL);
+ ops->load_fpu_ctxt(ctxt);
+ switch ( modrm )
+ {
+ case 0xc0: __emulate_fpu_insn(".byte 0xd9,0xc0"); break;
+ case 0xc1: __emulate_fpu_insn(".byte 0xd9,0xc1"); break;
+ case 0xc2: __emulate_fpu_insn(".byte 0xd9,0xc2"); break;
+ case 0xc3: __emulate_fpu_insn(".byte 0xd9,0xc3"); break;
+ case 0xc4: __emulate_fpu_insn(".byte 0xd9,0xc4"); break;
+ case 0xc5: __emulate_fpu_insn(".byte 0xd9,0xc5"); break;
+ case 0xc6: __emulate_fpu_insn(".byte 0xd9,0xc6"); break;
+ case 0xc7: __emulate_fpu_insn(".byte 0xd9,0xc7"); break;
+ case 0xe0: __emulate_fpu_insn(".byte 0xd9,0xe0"); break;
+ case 0xe8: __emulate_fpu_insn(".byte 0xd9,0xe8"); break;
+ case 0xee: __emulate_fpu_insn(".byte 0xd9,0xee"); break;
+ default:
+ fail_if((modrm_reg & 7) != 7);
+ fail_if(modrm >= 0xc0);
+ /* fnstcw m2byte */
+ ea.bytes = 2;
+ dst = ea;
+ asm volatile ( "fnstcw %0" : "=m" (dst.val) );
+ }
+ break;
+
+ case 0xdb: /* FPU 0xdb */
+ fail_if(ops->load_fpu_ctxt == NULL);
+ ops->load_fpu_ctxt(ctxt);
+ fail_if(modrm != 0xe3);
+ /* fninit */
+ asm volatile ( "fninit" );
+ break;
+
+ case 0xdd: /* FPU 0xdd */
+ fail_if(ops->load_fpu_ctxt == NULL);
+ ops->load_fpu_ctxt(ctxt);
+ fail_if((modrm_reg & 7) != 7);
+ fail_if(modrm >= 0xc0);
+ /* fnstsw m2byte */
+ ea.bytes = 2;
+ dst = ea;
+ asm volatile ( "fnstsw %0" : "=m" (dst.val) );
+ break;
+
+ case 0xde: /* FPU 0xde */
+ fail_if(ops->load_fpu_ctxt == NULL);
+ ops->load_fpu_ctxt(ctxt);
+ switch ( modrm )
+ {
+ case 0xd9: __emulate_fpu_insn(".byte 0xde,0xd9"); break;
+ case 0xf8: __emulate_fpu_insn(".byte 0xde,0xf8"); break;
+ case 0xf9: __emulate_fpu_insn(".byte 0xde,0xf9"); break;
+ case 0xfa: __emulate_fpu_insn(".byte 0xde,0xfa"); break;
+ case 0xfb: __emulate_fpu_insn(".byte 0xde,0xfb"); break;
+ case 0xfc: __emulate_fpu_insn(".byte 0xde,0xfc"); break;
+ case 0xfd: __emulate_fpu_insn(".byte 0xde,0xfd"); break;
+ case 0xfe: __emulate_fpu_insn(".byte 0xde,0xfe"); break;
+ case 0xff: __emulate_fpu_insn(".byte 0xde,0xff"); break;
+ default: goto cannot_emulate;
+ }
+ break;
+
+ case 0xdf: /* FPU 0xdf */
+ fail_if(ops->load_fpu_ctxt == NULL);
+ ops->load_fpu_ctxt(ctxt);
+ fail_if(modrm != 0xe0);
+ /* fnstsw %ax */
+ dst.bytes = 2;
+ dst.type = OP_REG;
+ dst.reg = (unsigned long *)&_regs.eax;
+ asm volatile ( "fnstsw %0" : "=m" (dst.val) );
+ break;
+
+ case 0xe0 ... 0xe2: /* loop{,z,nz} */ {
+ int rel = insn_fetch_type(int8_t);
+ int do_jmp = !(_regs.eflags & EFLG_ZF); /* loopnz */
+ if ( b == 0xe1 )
+ do_jmp = !do_jmp; /* loopz */
+ else if ( b == 0xe2 )
+ do_jmp = 1; /* loop */
+ switch ( ad_bytes )
+ {
+ case 2:
+ do_jmp &= --(*(uint16_t *)&_regs.ecx) != 0;
+ break;
+ case 4:
+ do_jmp &= --(*(uint32_t *)&_regs.ecx) != 0;
+ _regs.ecx = (uint32_t)_regs.ecx; /* zero extend in x86/64 mode */
+ break;
+ default: /* case 8: */
+ do_jmp &= --_regs.ecx != 0;
+ break;
+ }
+ if ( do_jmp )
+ jmp_rel(rel);
+ break;
+ }
+
+ case 0xe3: /* jcxz/jecxz (short) */ {
+ int rel = insn_fetch_type(int8_t);
+ if ( (ad_bytes == 2) ? !(uint16_t)_regs.ecx :
+ (ad_bytes == 4) ? !(uint32_t)_regs.ecx : !_regs.ecx )
+ jmp_rel(rel);
+ break;
+ }
+
+ case 0xe4: /* in imm8,%al */
+ case 0xe5: /* in imm8,%eax */
+ case 0xe6: /* out %al,imm8 */
+ case 0xe7: /* out %eax,imm8 */
+ case 0xec: /* in %dx,%al */
+ case 0xed: /* in %dx,%eax */
+ case 0xee: /* out %al,%dx */
+ case 0xef: /* out %eax,%dx */ {
+ unsigned int port = ((b < 0xe8)
+ ? insn_fetch_type(uint8_t)
+ : (uint16_t)_regs.edx);
+ op_bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
+ if ( (rc = ioport_access_check(port, op_bytes, ctxt, ops)) != 0 )
+ goto done;
+ if ( b & 2 )
+ {
+ /* out */
+ fail_if(ops->write_io == NULL);
+ rc = ops->write_io(port, op_bytes, _regs.eax, ctxt);
+
+ }
+ else
+ {
+ /* in */
+ dst.type = OP_REG;
+ dst.bytes = op_bytes;
+ dst.reg = (unsigned long *)&_regs.eax;
+ fail_if(ops->read_io == NULL);
+ rc = ops->read_io(port, dst.bytes, &dst.val, ctxt);
+ }
+ if ( rc != 0 )
+ goto done;
+ break;
+ }
+
+ case 0xe8: /* call (near) */ {
+ int rel = (((op_bytes == 2) && !mode_64bit())
+ ? (int32_t)insn_fetch_type(int16_t)
+ : insn_fetch_type(int32_t));
+ op_bytes = mode_64bit() ? 8 : op_bytes;
+ src.val = _regs.eip;
+ jmp_rel(rel);
+ goto push;
+ }
+
+ case 0xe9: /* jmp (near) */ {
+ int rel = (((op_bytes == 2) && !mode_64bit())
+ ? (int32_t)insn_fetch_type(int16_t)
+ : insn_fetch_type(int32_t));
+ jmp_rel(rel);
+ break;
+ }
+
+ case 0xea: /* jmp (far, absolute) */ {
+ uint16_t sel;
+ uint32_t eip;
+ generate_exception_if(mode_64bit(), EXC_UD, -1);
+ eip = insn_fetch_bytes(op_bytes);
+ sel = insn_fetch_type(uint16_t);
+ if ( (rc = load_seg(x86_seg_cs, sel, ctxt, ops)) != 0 )
+ goto done;
+ _regs.eip = eip;
+ break;
+ }
+
+ case 0xeb: /* jmp (short) */ {
+ int rel = insn_fetch_type(int8_t);
+ jmp_rel(rel);
+ break;
+ }
+
+ case 0xf1: /* int1 (icebp) */
+ src.val = EXC_DB;
+ goto swint;
+
+ case 0xf4: /* hlt */
+ ctxt->retire.flags.hlt = 1;
+ break;
+
+ case 0xf5: /* cmc */
+ _regs.eflags ^= EFLG_CF;
+ break;
+
+ case 0xf8: /* clc */
+ _regs.eflags &= ~EFLG_CF;
+ break;
+
+ case 0xf9: /* stc */
+ _regs.eflags |= EFLG_CF;
+ break;
+
+ case 0xfa: /* cli */
+ generate_exception_if(!mode_iopl(), EXC_GP, 0);
+ _regs.eflags &= ~EFLG_IF;
+ break;
+
+ case 0xfb: /* sti */
+ generate_exception_if(!mode_iopl(), EXC_GP, 0);
+ if ( !(_regs.eflags & EFLG_IF) )
+ {
+ _regs.eflags |= EFLG_IF;
+ ctxt->retire.flags.sti = 1;
+ }
+ break;
+
+ case 0xfc: /* cld */
+ _regs.eflags &= ~EFLG_DF;
+ break;
+
+ case 0xfd: /* std */
+ _regs.eflags |= EFLG_DF;
+ break;
+ }
+ goto writeback;
+
+ twobyte_insn:
+ switch ( b )
+ {
+ case 0x40 ... 0x4f: /* cmovcc */
+ dst.val = src.val;
+ if ( !test_cc(b, _regs.eflags) )
+ dst.type = OP_NONE;
+ break;
+
+ case 0x90 ... 0x9f: /* setcc */
+ dst.val = test_cc(b, _regs.eflags);
+ break;
+
+ case 0xb0 ... 0xb1: /* cmpxchg */
+ /* Save real source value, then compare EAX against destination. */
+ src.orig_val = src.val;
+ src.val = _regs.eax;
+ emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
+ if ( _regs.eflags & EFLG_ZF )
+ {
+ /* Success: write back to memory. */
+ dst.val = src.orig_val;
+ }
+ else
+ {
+ /* Failure: write the value we saw to EAX. */
+ dst.type = OP_REG;
+ dst.reg = (unsigned long *)&_regs.eax;
+ }
+ break;
+
+ case 0xa3: bt: /* bt */
+ emulate_2op_SrcV_nobyte("bt", src, dst, _regs.eflags);
+ break;
+
+ case 0xa4: /* shld imm8,r,r/m */
+ case 0xa5: /* shld %%cl,r,r/m */
+ case 0xac: /* shrd imm8,r,r/m */
+ case 0xad: /* shrd %%cl,r,r/m */ {
+ uint8_t shift, width = dst.bytes << 3;
+ shift = (b & 1) ? (uint8_t)_regs.ecx : insn_fetch_type(uint8_t);
+ if ( (shift &= width - 1) == 0 )
+ break;
+ dst.orig_val = truncate_word(dst.val, dst.bytes);
+ dst.val = ((shift == width) ? src.val :
+ (b & 8) ?
+ /* shrd */
+ ((dst.orig_val >> shift) |
+ truncate_word(src.val << (width - shift), dst.bytes)) :
+ /* shld */
+ ((dst.orig_val << shift) |
+ ((src.val >> (width - shift)) & ((1ull << shift) - 1))));
+ dst.val = truncate_word(dst.val, dst.bytes);
+ _regs.eflags &= ~(EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_PF|EFLG_CF);
+ if ( (dst.val >> ((b & 8) ? (shift - 1) : (width - shift))) & 1 )
+ _regs.eflags |= EFLG_CF;
+ if ( ((dst.val ^ dst.orig_val) >> (width - 1)) & 1 )
+ _regs.eflags |= EFLG_OF;
+ _regs.eflags |= ((dst.val >> (width - 1)) & 1) ? EFLG_SF : 0;
+ _regs.eflags |= (dst.val == 0) ? EFLG_ZF : 0;
+ _regs.eflags |= even_parity(dst.val) ? EFLG_PF : 0;
+ break;
+ }
+
+ case 0xb3: btr: /* btr */
+ emulate_2op_SrcV_nobyte("btr", src, dst, _regs.eflags);
+ break;
+
+ case 0xab: bts: /* bts */
+ emulate_2op_SrcV_nobyte("bts", src, dst, _regs.eflags);
+ break;
+
+ case 0xaf: /* imul */
+ _regs.eflags &= ~(EFLG_OF|EFLG_CF);
+ switch ( dst.bytes )
+ {
+ case 2:
+ dst.val = ((uint32_t)(int16_t)src.val *
+ (uint32_t)(int16_t)dst.val);
+ if ( (int16_t)dst.val != (uint32_t)dst.val )
+ _regs.eflags |= EFLG_OF|EFLG_CF;
+ break;
+#ifdef __x86_64__
+ case 4:
+ dst.val = ((uint64_t)(int32_t)src.val *
+ (uint64_t)(int32_t)dst.val);
+ if ( (int32_t)dst.val != dst.val )
+ _regs.eflags |= EFLG_OF|EFLG_CF;
+ break;
+#endif
+ default: {
+ unsigned long m[2] = { src.val, dst.val };
+ if ( imul_dbl(m) )
+ _regs.eflags |= EFLG_OF|EFLG_CF;
+ dst.val = m[0];
+ break;
+ }
+ }
+ break;
+
+ case 0xb2: /* lss */
+ dst.val = x86_seg_ss;
+ goto les;
+
+ case 0xb4: /* lfs */
+ dst.val = x86_seg_fs;
+ goto les;
+
+ case 0xb5: /* lgs */
+ dst.val = x86_seg_gs;
+ goto les;
+
+ case 0xb6: /* movzx rm8,r{16,32,64} */
+ /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */
+ dst.reg = decode_register(modrm_reg, &_regs, 0);
+ dst.bytes = op_bytes;
+ dst.val = (uint8_t)src.val;
+ break;
+
+ case 0xbc: /* bsf */ {
+ int zf;
+ asm ( "bsf %2,%0; setz %b1"
+ : "=r" (dst.val), "=q" (zf)
+ : "r" (src.val), "1" (0) );
+ _regs.eflags &= ~EFLG_ZF;
+ _regs.eflags |= zf ? EFLG_ZF : 0;
+ break;
+ }
+
+ case 0xbd: /* bsr */ {
+ int zf;
+ asm ( "bsr %2,%0; setz %b1"
+ : "=r" (dst.val), "=q" (zf)
+ : "r" (src.val), "1" (0) );
+ _regs.eflags &= ~EFLG_ZF;
+ _regs.eflags |= zf ? EFLG_ZF : 0;
+ break;
+ }
+
+ case 0xb7: /* movzx rm16,r{16,32,64} */
+ dst.val = (uint16_t)src.val;
+ break;
+
+ case 0xbb: btc: /* btc */
+ emulate_2op_SrcV_nobyte("btc", src, dst, _regs.eflags);
+ break;
+
+ case 0xba: /* Grp8 */
+ switch ( modrm_reg & 7 )
+ {
+ case 4: goto bt;
+ case 5: goto bts;
+ case 6: goto btr;
+ case 7: goto btc;
+ default: generate_exception_if(1, EXC_UD, -1);
+ }
+ break;
+
+ case 0xbe: /* movsx rm8,r{16,32,64} */
+ /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */
+ dst.reg = decode_register(modrm_reg, &_regs, 0);
+ dst.bytes = op_bytes;
+ dst.val = (int8_t)src.val;
+ break;
+
+ case 0xbf: /* movsx rm16,r{16,32,64} */
+ dst.val = (int16_t)src.val;
+ break;
+
+ case 0xc0 ... 0xc1: /* xadd */
+ /* Write back the register source. */
+ switch ( dst.bytes )
+ {
+ case 1: *(uint8_t *)src.reg = (uint8_t)dst.val; break;
+ case 2: *(uint16_t *)src.reg = (uint16_t)dst.val; break;
+ case 4: *src.reg = (uint32_t)dst.val; break; /* 64b reg: zero-extend */
+ case 8: *src.reg = dst.val; break;
+ }
+ goto add;
+ }
+ goto writeback;
+
+ twobyte_special_insn:
+ switch ( b )
+ {
+ case 0x01: /* Grp7 */ {
+ struct segment_register reg;
+ unsigned long base, limit, cr0, cr0w;
+
+ if ( modrm == 0xdf ) /* invlpga */
+ {
+ generate_exception_if(in_realmode(ctxt, ops), EXC_UD, -1);
+ generate_exception_if(!mode_ring0(), EXC_GP, 0);
+ fail_if(ops->invlpg == NULL);
+ if ( (rc = ops->invlpg(x86_seg_none, truncate_ea(_regs.eax),
+ ctxt)) )
+ goto done;
+ break;
+ }
+
+ switch ( modrm_reg & 7 )
+ {
+ case 0: /* sgdt */
+ case 1: /* sidt */
+ generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
+ fail_if(ops->read_segment == NULL);
+ if ( (rc = ops->read_segment((modrm_reg & 1) ?
+ x86_seg_idtr : x86_seg_gdtr,
+ &reg, ctxt)) )
+ goto done;
+ if ( op_bytes == 2 )
+ reg.base &= 0xffffff;
+ if ( (rc = ops->write(ea.mem.seg, ea.mem.off+0,
+ reg.limit, 2, ctxt)) ||
+ (rc = ops->write(ea.mem.seg, ea.mem.off+2,
+ reg.base, mode_64bit() ? 8 : 4, ctxt)) )
+ goto done;
+ break;
+ case 2: /* lgdt */
+ case 3: /* lidt */
+ generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
+ fail_if(ops->write_segment == NULL);
+ memset(&reg, 0, sizeof(reg));
+ if ( (rc = ops->read(ea.mem.seg, ea.mem.off+0,
+ &limit, 2, ctxt)) ||
+ (rc = ops->read(ea.mem.seg, ea.mem.off+2,
+ &base, mode_64bit() ? 8 : 4, ctxt)) )
+ goto done;
+ reg.base = base;
+ reg.limit = limit;
+ if ( op_bytes == 2 )
+ reg.base &= 0xffffff;
+ if ( (rc = ops->write_segment((modrm_reg & 1) ?
+ x86_seg_idtr : x86_seg_gdtr,
+ &reg, ctxt)) )
+ goto done;
+ break;
+ case 4: /* smsw */
+ ea.bytes = 2;
+ dst = ea;
+ fail_if(ops->read_cr == NULL);
+ if ( (rc = ops->read_cr(0, &dst.val, ctxt)) )
+ goto done;
+ d |= Mov; /* force writeback */
+ break;
+ case 6: /* lmsw */
+ fail_if(ops->read_cr == NULL);
+ fail_if(ops->write_cr == NULL);
+ if ( (rc = ops->read_cr(0, &cr0, ctxt)) )
+ goto done;
+ if ( ea.type == OP_REG )
+ cr0w = *ea.reg;
+ else if ( (rc = ops->read(ea.mem.seg, ea.mem.off,
+ &cr0w, 2, ctxt)) )
+ goto done;
+ cr0 &= 0xffff0000;
+ cr0 |= (uint16_t)cr0w;
+ if ( (rc = ops->write_cr(0, cr0, ctxt)) )
+ goto done;
+ break;
+ case 7: /* invlpg */
+ generate_exception_if(!mode_ring0(), EXC_GP, 0);
+ generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
+ fail_if(ops->invlpg == NULL);
+ if ( (rc = ops->invlpg(ea.mem.seg, ea.mem.off, ctxt)) )
+ goto done;
+ break;
+ default:
+ goto cannot_emulate;
+ }
+ break;
+ }
+
+ case 0x06: /* clts */
+ generate_exception_if(!mode_ring0(), EXC_GP, 0);
+ fail_if((ops->read_cr == NULL) || (ops->write_cr == NULL));
+ if ( (rc = ops->read_cr(0, &dst.val, ctxt)) ||
+ (rc = ops->write_cr(0, dst.val&~8, ctxt)) )
+ goto done;
+ break;
+
+ case 0x08: /* invd */
+ case 0x09: /* wbinvd */
+ generate_exception_if(!mode_ring0(), EXC_GP, 0);
+ fail_if(ops->wbinvd == NULL);
+ if ( (rc = ops->wbinvd(ctxt)) != 0 )
+ goto done;
+ break;
+
+ case 0x0d: /* GrpP (prefetch) */
+ case 0x18: /* Grp16 (prefetch/nop) */
+ case 0x19 ... 0x1f: /* nop (amd-defined) */
+ break;
+
+ case 0x20: /* mov cr,reg */
+ case 0x21: /* mov dr,reg */
+ case 0x22: /* mov reg,cr */
+ case 0x23: /* mov reg,dr */
+ generate_exception_if(ea.type != OP_REG, EXC_UD, -1);
+ generate_exception_if(!mode_ring0(), EXC_GP, 0);
+ modrm_reg |= lock_prefix << 3;
+ if ( b & 2 )
+ {
+ /* Write to CR/DR. */
+ src.val = *(unsigned long *)decode_register(modrm_rm, &_regs, 0);
+ if ( !mode_64bit() )
+ src.val = (uint32_t)src.val;
+ rc = ((b & 1)
+ ? (ops->write_dr
+ ? ops->write_dr(modrm_reg, src.val, ctxt)
+ : X86EMUL_UNHANDLEABLE)
+ : (ops->write_cr
+ ? ops->write_cr(modrm_reg, src.val, ctxt)
+ : X86EMUL_UNHANDLEABLE));
+ }
+ else
+ {
+ /* Read from CR/DR. */
+ dst.type = OP_REG;
+ dst.bytes = mode_64bit() ? 8 : 4;
+ dst.reg = decode_register(modrm_rm, &_regs, 0);
+ rc = ((b & 1)
+ ? (ops->read_dr
+ ? ops->read_dr(modrm_reg, &dst.val, ctxt)
+ : X86EMUL_UNHANDLEABLE)
+ : (ops->read_cr
+ ? ops->read_cr(modrm_reg, &dst.val, ctxt)
+ : X86EMUL_UNHANDLEABLE));
+ }
+ if ( rc != 0 )
+ goto done;
+ break;
+
+ case 0x30: /* wrmsr */ {
+ uint64_t val = ((uint64_t)_regs.edx << 32) | (uint32_t)_regs.eax;
+ generate_exception_if(!mode_ring0(), EXC_GP, 0);
+ fail_if(ops->write_msr == NULL);
+ if ( (rc = ops->write_msr((uint32_t)_regs.ecx, val, ctxt)) != 0 )
+ goto done;
+ break;
+ }
+
+ case 0x31: /* rdtsc */ {
+ unsigned long cr4;
+ uint64_t val;
+ fail_if(ops->read_cr == NULL);
+ if ( (rc = ops->read_cr(4, &cr4, ctxt)) )
+ goto done;
+ generate_exception_if((cr4 & CR4_TSD) && !mode_ring0(), EXC_GP, 0);
+ fail_if(ops->read_msr == NULL);
+ if ( (rc = ops->read_msr(MSR_TSC, &val, ctxt)) != 0 )
+ goto done;
+ _regs.edx = (uint32_t)(val >> 32);
+ _regs.eax = (uint32_t)(val >> 0);
+ break;
+ }
+
+ case 0x32: /* rdmsr */ {
+ uint64_t val;
+ generate_exception_if(!mode_ring0(), EXC_GP, 0);
+ fail_if(ops->read_msr == NULL);
+ if ( (rc = ops->read_msr((uint32_t)_regs.ecx, &val, ctxt)) != 0 )
+ goto done;
+ _regs.edx = (uint32_t)(val >> 32);
+ _regs.eax = (uint32_t)(val >> 0);
+ break;
+ }
+
+ case 0x80 ... 0x8f: /* jcc (near) */ {
+ int rel = (((op_bytes == 2) && !mode_64bit())
+ ? (int32_t)insn_fetch_type(int16_t)
+ : insn_fetch_type(int32_t));
+ if ( test_cc(b, _regs.eflags) )
+ jmp_rel(rel);
+ break;
+ }
+
+ case 0xa0: /* push %%fs */
+ src.val = x86_seg_fs;
+ goto push_seg;
+
+ case 0xa1: /* pop %%fs */
+ src.val = x86_seg_fs;
+ goto pop_seg;
+
+ case 0xa2: /* cpuid */ {
+ unsigned int eax = _regs.eax, ebx = _regs.ebx;
+ unsigned int ecx = _regs.ecx, edx = _regs.edx;
+ fail_if(ops->cpuid == NULL);
+ if ( (rc = ops->cpuid(&eax, &ebx, &ecx, &edx, ctxt)) != 0 )
+ goto done;
+ _regs.eax = eax; _regs.ebx = ebx;
+ _regs.ecx = ecx; _regs.edx = edx;
+ break;
+ }
+
+ case 0xa8: /* push %%gs */
+ src.val = x86_seg_gs;
+ goto push_seg;
+
+ case 0xa9: /* pop %%gs */
+ src.val = x86_seg_gs;
+ goto pop_seg;
+
+ case 0xc7: /* Grp9 (cmpxchg8b) */
+#if defined(__i386__)
+ {
+ unsigned long old_lo, old_hi;
+ generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1);
+ generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
+ if ( (rc = ops->read(ea.mem.seg, ea.mem.off+0, &old_lo, 4, ctxt)) ||
+ (rc = ops->read(ea.mem.seg, ea.mem.off+4, &old_hi, 4, ctxt)) )
+ goto done;
+ if ( (old_lo != _regs.eax) || (old_hi != _regs.edx) )
+ {
+ _regs.eax = old_lo;
+ _regs.edx = old_hi;
+ _regs.eflags &= ~EFLG_ZF;
+ }
+ else if ( ops->cmpxchg8b == NULL )
+ {
+ rc = X86EMUL_UNHANDLEABLE;
+ goto done;
+ }
+ else
+ {
+ if ( (rc = ops->cmpxchg8b(ea.mem.seg, ea.mem.off, old_lo, old_hi,
+ _regs.ebx, _regs.ecx, ctxt)) != 0 )
+ goto done;
+ _regs.eflags |= EFLG_ZF;
+ }
+ break;
+ }
+#elif defined(__x86_64__)
+ {
+ unsigned long old, new;
+ generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1);
+ generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
+ if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &old, 8, ctxt)) != 0 )
+ goto done;
+ if ( ((uint32_t)(old>>0) != (uint32_t)_regs.eax) ||
+ ((uint32_t)(old>>32) != (uint32_t)_regs.edx) )
+ {
+ _regs.eax = (uint32_t)(old>>0);
+ _regs.edx = (uint32_t)(old>>32);
+ _regs.eflags &= ~EFLG_ZF;
+ }
+ else
+ {
+ new = (_regs.ecx<<32)|(uint32_t)_regs.ebx;
+ if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old,
+ new, 8, ctxt)) != 0 )
+ goto done;
+ _regs.eflags |= EFLG_ZF;
+ }
+ break;
+ }
+#endif
+
+ case 0xc8 ... 0xcf: /* bswap */
+ dst.type = OP_REG;
+ dst.reg = decode_register(
+ (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
+ switch ( dst.bytes = op_bytes )
+ {
+ default: /* case 2: */
+ /* Undefined behaviour. Writes zero on all tested CPUs. */
+ dst.val = 0;
+ break;
+ case 4:
+#ifdef __x86_64__
+ asm ( "bswap %k0" : "=r" (dst.val) : "0" (*dst.reg) );
+ break;
+ case 8:
+#endif
+ asm ( "bswap %0" : "=r" (dst.val) : "0" (*dst.reg) );
+ break;
+ }
+ break;
+ }
+ goto writeback;
+
+ cannot_emulate:
+ return X86EMUL_UNHANDLEABLE;
+}
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h
new file mode 100644
index 0000000000..4ffdac75f3
--- /dev/null
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -0,0 +1,401 @@
+/******************************************************************************
+ * x86_emulate.h
+ *
+ * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
+ *
+ * Copyright (c) 2005-2007 Keir Fraser
+ * Copyright (c) 2005-2007 XenSource Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __X86_EMULATE_H__
+#define __X86_EMULATE_H__
+
+struct x86_emulate_ctxt;
+
+/* Comprehensive enumeration of x86 segment registers. */
+enum x86_segment {
+ /* General purpose. */
+ x86_seg_cs,
+ x86_seg_ss,
+ x86_seg_ds,
+ x86_seg_es,
+ x86_seg_fs,
+ x86_seg_gs,
+ /* System. */
+ x86_seg_tr,
+ x86_seg_ldtr,
+ x86_seg_gdtr,
+ x86_seg_idtr,
+ /*
+ * Dummy: used to emulate direct processor accesses to management
+ * structures (TSS, GDT, LDT, IDT, etc.) which use linear addressing
+ * (no segment component) and bypass usual segment- and page-level
+ * protection checks.
+ */
+ x86_seg_none
+};
+
+#define is_x86_user_segment(seg) ((unsigned)(seg) <= x86_seg_gs)
+
+/*
+ * Attribute for segment selector. This is a copy of bit 40:47 & 52:55 of the
+ * segment descriptor. It happens to match the format of an AMD SVM VMCB.
+ */
+typedef union segment_attributes {
+ uint16_t bytes;
+ struct
+ {
+ uint16_t type:4; /* 0; Bit 40-43 */
+ uint16_t s: 1; /* 4; Bit 44 */
+ uint16_t dpl: 2; /* 5; Bit 45-46 */
+ uint16_t p: 1; /* 7; Bit 47 */
+ uint16_t avl: 1; /* 8; Bit 52 */
+ uint16_t l: 1; /* 9; Bit 53 */
+ uint16_t db: 1; /* 10; Bit 54 */
+ uint16_t g: 1; /* 11; Bit 55 */
+ } fields;
+} __attribute__ ((packed)) segment_attributes_t;
+
+/*
+ * Full state of a segment register (visible and hidden portions).
+ * Again, this happens to match the format of an AMD SVM VMCB.
+ */
+struct segment_register {
+ uint16_t sel;
+ segment_attributes_t attr;
+ uint32_t limit;
+ uint64_t base;
+} __attribute__ ((packed));
+
+/*
+ * Return codes from state-accessor functions and from x86_emulate().
+ */
+ /* Completed successfully. State modified appropriately. */
+#define X86EMUL_OKAY 0
+ /* Unhandleable access or emulation. No state modified. */
+#define X86EMUL_UNHANDLEABLE 1
+ /* Exception raised and requires delivery. */
+#define X86EMUL_EXCEPTION 2
+ /* Retry the emulation for some reason. No state modified. */
+#define X86EMUL_RETRY 3
+ /* (cmpxchg accessor): CMPXCHG failed. Maps to X86EMUL_RETRY in caller. */
+#define X86EMUL_CMPXCHG_FAILED 3
+
+/*
+ * These operations represent the instruction emulator's interface to memory.
+ *
+ * NOTES:
+ * 1. If the access fails (cannot emulate, or a standard access faults) then
+ * it is up to the memop to propagate the fault to the guest VM via
+ * some out-of-band mechanism, unknown to the emulator. The memop signals
+ * failure by returning X86EMUL_EXCEPTION to the emulator, which will
+ * then immediately bail.
+ * 2. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only
+ * cmpxchg8b_emulated need support 8-byte accesses.
+ * 3. The emulator cannot handle 64-bit mode emulation on an x86/32 system.
+ */
+struct x86_emulate_ops
+{
+ /*
+ * All functions:
+ * @ctxt: [IN ] Emulation context info as passed to the emulator.
+ * All memory-access functions:
+ * @seg: [IN ] Segment being dereferenced (specified as x86_seg_??).
+ * @offset:[IN ] Offset within segment.
+ * Read functions:
+ * @val: [OUT] Value read, zero-extended to 'ulong'.
+ * Write functions:
+ * @val: [IN ] Value to write (low-order bytes used as req'd).
+ * Variable-length access functions:
+ * @bytes: [IN ] Number of bytes to read or write.
+ */
+
+ /* read: Emulate a memory read. */
+ int (*read)(
+ enum x86_segment seg,
+ unsigned long offset,
+ unsigned long *val,
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt);
+
+ /*
+ * insn_fetch: Emulate fetch from instruction byte stream.
+ * Parameters are same as for 'read'. @seg is always x86_seg_cs.
+ */
+ int (*insn_fetch)(
+ enum x86_segment seg,
+ unsigned long offset,
+ unsigned long *val,
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt);
+
+ /* write: Emulate a memory write. */
+ int (*write)(
+ enum x86_segment seg,
+ unsigned long offset,
+ unsigned long val,
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt);
+
+ /*
+ * cmpxchg: Emulate an atomic (LOCKed) CMPXCHG operation.
+ * @old: [IN ] Value expected to be current at @addr.
+ * @new: [IN ] Value to write to @addr.
+ */
+ int (*cmpxchg)(
+ enum x86_segment seg,
+ unsigned long offset,
+ unsigned long old,
+ unsigned long new,
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt);
+
+ /*
+ * cmpxchg8b: Emulate an atomic (LOCKed) CMPXCHG8B operation.
+ * @old: [IN ] Value expected to be current at @addr.
+ * @new: [IN ] Value to write to @addr.
+ * NOTES:
+ * 1. This function is only ever called when emulating a real CMPXCHG8B.
+ * 2. This function is *never* called on x86/64 systems.
+ * 2. Not defining this function (i.e., specifying NULL) is equivalent
+ * to defining a function that always returns X86EMUL_UNHANDLEABLE.
+ */
+ int (*cmpxchg8b)(
+ enum x86_segment seg,
+ unsigned long offset,
+ unsigned long old_lo,
+ unsigned long old_hi,
+ unsigned long new_lo,
+ unsigned long new_hi,
+ struct x86_emulate_ctxt *ctxt);
+
+ /*
+ * rep_ins: Emulate INS: <src_port> -> <dst_seg:dst_offset>.
+ * @bytes_per_rep: [IN ] Bytes transferred per repetition.
+ * @reps: [IN ] Maximum repetitions to be emulated.
+ * [OUT] Number of repetitions actually emulated.
+ */
+ int (*rep_ins)(
+ uint16_t src_port,
+ enum x86_segment dst_seg,
+ unsigned long dst_offset,
+ unsigned int bytes_per_rep,
+ unsigned long *reps,
+ struct x86_emulate_ctxt *ctxt);
+
+ /*
+ * rep_outs: Emulate OUTS: <src_seg:src_offset> -> <dst_port>.
+ * @bytes_per_rep: [IN ] Bytes transferred per repetition.
+ * @reps: [IN ] Maximum repetitions to be emulated.
+ * [OUT] Number of repetitions actually emulated.
+ */
+ int (*rep_outs)(
+ enum x86_segment src_seg,
+ unsigned long src_offset,
+ uint16_t dst_port,
+ unsigned int bytes_per_rep,
+ unsigned long *reps,
+ struct x86_emulate_ctxt *ctxt);
+
+ /*
+ * rep_movs: Emulate MOVS: <src_seg:src_offset> -> <dst_seg:dst_offset>.
+ * @bytes_per_rep: [IN ] Bytes transferred per repetition.
+ * @reps: [IN ] Maximum repetitions to be emulated.
+ * [OUT] Number of repetitions actually emulated.
+ */
+ int (*rep_movs)(
+ enum x86_segment src_seg,
+ unsigned long src_offset,
+ enum x86_segment dst_seg,
+ unsigned long dst_offset,
+ unsigned int bytes_per_rep,
+ unsigned long *reps,
+ struct x86_emulate_ctxt *ctxt);
+
+ /*
+ * read_segment: Emulate a read of full context of a segment register.
+ * @reg: [OUT] Contents of segment register (visible and hidden state).
+ */
+ int (*read_segment)(
+ enum x86_segment seg,
+ struct segment_register *reg,
+ struct x86_emulate_ctxt *ctxt);
+
+ /*
+ * write_segment: Emulate a read of full context of a segment register.
+ * @reg: [OUT] Contents of segment register (visible and hidden state).
+ */
+ int (*write_segment)(
+ enum x86_segment seg,
+ struct segment_register *reg,
+ struct x86_emulate_ctxt *ctxt);
+
+ /*
+ * read_io: Read from I/O port(s).
+ * @port: [IN ] Base port for access.
+ */
+ int (*read_io)(
+ unsigned int port,
+ unsigned int bytes,
+ unsigned long *val,
+ struct x86_emulate_ctxt *ctxt);
+
+ /*
+ * write_io: Write to I/O port(s).
+ * @port: [IN ] Base port for access.
+ */
+ int (*write_io)(
+ unsigned int port,
+ unsigned int bytes,
+ unsigned long val,
+ struct x86_emulate_ctxt *ctxt);
+
+ /*
+ * read_cr: Read from control register.
+ * @reg: [IN ] Register to read (0-15).
+ */
+ int (*read_cr)(
+ unsigned int reg,
+ unsigned long *val,
+ struct x86_emulate_ctxt *ctxt);
+
+ /*
+ * write_cr: Write to control register.
+ * @reg: [IN ] Register to write (0-15).
+ */
+ int (*write_cr)(
+ unsigned int reg,
+ unsigned long val,
+ struct x86_emulate_ctxt *ctxt);
+
+ /*
+ * read_dr: Read from debug register.
+ * @reg: [IN ] Register to read (0-15).
+ */
+ int (*read_dr)(
+ unsigned int reg,
+ unsigned long *val,
+ struct x86_emulate_ctxt *ctxt);
+
+ /*
+ * write_dr: Write to debug register.
+ * @reg: [IN ] Register to write (0-15).
+ */
+ int (*write_dr)(
+ unsigned int reg,
+ unsigned long val,
+ struct x86_emulate_ctxt *ctxt);
+
+ /*
+ * read_msr: Read from model-specific register.
+ * @reg: [IN ] Register to read.
+ */
+ int (*read_msr)(
+ unsigned long reg,
+ uint64_t *val,
+ struct x86_emulate_ctxt *ctxt);
+
+ /*
+ * write_dr: Write to model-specific register.
+ * @reg: [IN ] Register to write.
+ */
+ int (*write_msr)(
+ unsigned long reg,
+ uint64_t val,
+ struct x86_emulate_ctxt *ctxt);
+
+ /* wbinvd: Write-back and invalidate cache contents. */
+ int (*wbinvd)(
+ struct x86_emulate_ctxt *ctxt);
+
+ /* cpuid: Emulate CPUID via given set of EAX-EDX inputs/outputs. */
+ int (*cpuid)(
+ unsigned int *eax,
+ unsigned int *ebx,
+ unsigned int *ecx,
+ unsigned int *edx,
+ struct x86_emulate_ctxt *ctxt);
+
+ /* inject_hw_exception */
+ int (*inject_hw_exception)(
+ uint8_t vector,
+ int32_t error_code,
+ struct x86_emulate_ctxt *ctxt);
+
+ /* inject_sw_interrupt */
+ int (*inject_sw_interrupt)(
+ uint8_t vector,
+ uint8_t insn_len,
+ struct x86_emulate_ctxt *ctxt);
+
+ /* load_fpu_ctxt: Load emulated environment's FPU state onto processor. */
+ void (*load_fpu_ctxt)(
+ struct x86_emulate_ctxt *ctxt);
+
+ /* invlpg: Invalidate paging structures which map addressed byte. */
+ int (*invlpg)(
+ enum x86_segment seg,
+ unsigned long offset,
+ struct x86_emulate_ctxt *ctxt);
+};
+
+struct cpu_user_regs;
+
+struct x86_emulate_ctxt
+{
+ /* Register state before/after emulation. */
+ struct cpu_user_regs *regs;
+
+ /* Default address size in current execution mode (16, 32, or 64). */
+ unsigned int addr_size;
+
+ /* Stack pointer width in bits (16, 32 or 64). */
+ unsigned int sp_size;
+
+ /* Set this if writes may have side effects. */
+ uint8_t force_writeback;
+
+ /* Retirement state, set by the emulator (valid only on X86EMUL_OKAY). */
+ union {
+ struct {
+ uint8_t hlt:1; /* Instruction HLTed. */
+ uint8_t mov_ss:1; /* Instruction sets MOV-SS irq shadow. */
+ uint8_t sti:1; /* Instruction sets STI irq shadow. */
+ } flags;
+ uint8_t byte;
+ } retire;
+};
+
+/*
+ * x86_emulate: Emulate an instruction.
+ * Returns -1 on failure, 0 on success.
+ */
+int
+x86_emulate(
+ struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops);
+
+/*
+ * Given the 'reg' portion of a ModRM byte, and a register block, return a
+ * pointer into the block that addresses the relevant register.
+ * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
+ */
+void *
+decode_register(
+ uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs);
+
+#endif /* __X86_EMULATE_H__ */
diff --git a/xen/common/domain.c b/xen/common/domain.c
index bc9a7d9fad..76b48f4296 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -393,6 +393,8 @@ void __domain_crash_synchronous(void)
this_cpu(mc_state).flags = 0;
}
+ vcpu_end_shutdown_deferral(current);
+
for ( ; ; )
do_softirq();
}
@@ -459,10 +461,14 @@ void domain_resume(struct domain *d)
int vcpu_start_shutdown_deferral(struct vcpu *v)
{
+ if ( v->defer_shutdown )
+ return 1;
+
v->defer_shutdown = 1;
smp_mb(); /* set deferral status /then/ check for shutdown */
if ( unlikely(v->domain->is_shutting_down) )
vcpu_check_shutdown(v);
+
return v->defer_shutdown;
}
@@ -516,7 +522,7 @@ static void complete_domain_destroy(struct rcu_head *head)
if ( (v = d->vcpu[i]) != NULL )
free_vcpu_struct(v);
- if (d->target)
+ if ( d->target != NULL )
put_domain(d->target);
free_domain(d);
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index 2660af36d8..52143dbd1d 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -182,6 +182,9 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
struct xen_domctl curop, *op = &curop;
static DEFINE_SPINLOCK(domctl_lock);
+ if ( !IS_PRIV(current->domain) )
+ return -EPERM;
+
if ( copy_from_guest(op, u_domctl, 1) )
return -EFAULT;
@@ -204,10 +207,6 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
if ( d == NULL )
break;
- ret = -EPERM;
- if ( !IS_PRIV_FOR(current->domain, d) )
- goto svc_out;
-
ret = xsm_setvcpucontext(d);
if ( ret )
goto svc_out;
@@ -259,10 +258,6 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
ret = -ESRCH;
if ( d != NULL )
{
- ret = -EPERM;
- if ( !IS_PRIV_FOR(current->domain, d) )
- goto pausedomain_out;
-
ret = xsm_pausedomain(d);
if ( ret )
goto pausedomain_out;
@@ -287,18 +282,16 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
if ( d == NULL )
break;
- ret = -EPERM;
- if ( !IS_PRIV_FOR(current->domain, d) )
- goto unpausedomain_out;
-
ret = xsm_unpausedomain(d);
if ( ret )
- goto unpausedomain_out;
+ {
+ rcu_unlock_domain(d);
+ break;
+ }
domain_unpause_by_systemcontroller(d);
- ret = 0;
-unpausedomain_out:
rcu_unlock_domain(d);
+ ret = 0;
}
break;
@@ -310,18 +303,16 @@ unpausedomain_out:
if ( d == NULL )
break;
- ret = -EPERM;
- if ( !IS_PRIV_FOR(current->domain, d) )
- goto resumedomain_out;
-
ret = xsm_resumedomain(d);
if ( ret )
- goto resumedomain_out;
+ {
+ rcu_unlock_domain(d);
+ break;
+ }
domain_resume(d);
- ret = 0;
-resumedomain_out:
rcu_unlock_domain(d);
+ ret = 0;
}
break;
@@ -332,10 +323,6 @@ resumedomain_out:
static domid_t rover = 0;
unsigned int domcr_flags;
- ret = -EPERM;
- if ( !IS_PRIV(current->domain) )
- break;
-
ret = -EINVAL;
if ( supervisor_mode_kernel ||
(op->u.createdomain.flags &
@@ -401,13 +388,12 @@ resumedomain_out:
if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
break;
- ret = -EPERM;
- if ( !IS_PRIV_FOR(current->domain, d) )
- goto maxvcpu_out2;
-
ret = xsm_max_vcpus(d);
if ( ret )
- goto maxvcpu_out2;
+ {
+ rcu_unlock_domain(d);
+ break;
+ }
/* Needed, for example, to ensure writable p.t. state is synced. */
domain_pause(d);
@@ -435,7 +421,6 @@ resumedomain_out:
maxvcpu_out:
domain_unpause(d);
- maxvcpu_out2:
rcu_unlock_domain(d);
}
break;
@@ -446,9 +431,7 @@ resumedomain_out:
ret = -ESRCH;
if ( d != NULL )
{
- ret = -EPERM;
- if ( IS_PRIV_FOR(current->domain, d) )
- ret = xsm_destroydomain(d) ? : domain_kill(d);
+ ret = xsm_destroydomain(d) ? : domain_kill(d);
rcu_unlock_domain(d);
}
}
@@ -466,10 +449,6 @@ resumedomain_out:
if ( d == NULL )
break;
- ret = -EPERM;
- if ( !IS_PRIV_FOR(current->domain, d) )
- goto vcpuaffinity_out;
-
ret = xsm_vcpuaffinity(op->cmd, d);
if ( ret )
goto vcpuaffinity_out;
@@ -508,10 +487,6 @@ resumedomain_out:
if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
break;
- ret = -EPERM;
- if ( !IS_PRIV_FOR(current->domain, d) )
- goto scheduler_op_out;
-
ret = xsm_scheduler(d);
if ( ret )
goto scheduler_op_out;
@@ -533,7 +508,7 @@ resumedomain_out:
rcu_read_lock(&domlist_read_lock);
for_each_domain ( d )
- if ( d->domain_id >= dom && IS_PRIV_FOR(current->domain, d))
+ if ( d->domain_id >= dom )
break;
if ( d == NULL )
@@ -568,10 +543,6 @@ resumedomain_out:
if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
break;
- ret = -EPERM;
- if ( !IS_PRIV_FOR(current->domain, d) )
- goto getvcpucontext_out;
-
ret = xsm_getvcpucontext(d);
if ( ret )
goto getvcpucontext_out;
@@ -632,10 +603,6 @@ resumedomain_out:
if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
break;
- ret = -EPERM;
- if ( !IS_PRIV_FOR(current->domain, d) )
- goto getvcpuinfo_out;
-
ret = xsm_getvcpuinfo(d);
if ( ret )
goto getvcpuinfo_out;
@@ -675,10 +642,6 @@ resumedomain_out:
if ( d == NULL )
break;
- ret = -EPERM;
- if ( !IS_PRIV_FOR(current->domain, d) )
- goto max_mem_out;
-
ret = xsm_setdomainmaxmem(d);
if ( ret )
goto max_mem_out;
@@ -695,8 +658,6 @@ resumedomain_out:
d->max_pages = new_max;
ret = 0;
}
- else
- printk("new max %ld, tot pages %d\n", new_max, d->tot_pages);
spin_unlock(&d->page_alloc_lock);
max_mem_out:
@@ -713,19 +674,17 @@ resumedomain_out:
if ( d == NULL )
break;
- ret = -EPERM;
- if ( !IS_PRIV_FOR(current->domain, d) )
- goto setdomainhandle_out;
-
ret = xsm_setdomainhandle(d);
if ( ret )
- goto setdomainhandle_out;
+ {
+ rcu_unlock_domain(d);
+ break;
+ }
memcpy(d->handle, op->u.setdomainhandle.handle,
sizeof(xen_domain_handle_t));
- ret = 0;
-setdomainhandle_out:
rcu_unlock_domain(d);
+ ret = 0;
}
break;
@@ -738,20 +697,18 @@ setdomainhandle_out:
if ( d == NULL )
break;
- ret = -EPERM;
- if ( !IS_PRIV_FOR(current->domain, d) )
- goto setdebugging_out;
-
ret = xsm_setdebugging(d);
if ( ret )
- goto setdebugging_out;
+ {
+ rcu_unlock_domain(d);
+ break;
+ }
domain_pause(d);
d->debugger_attached = !!op->u.setdebugging.enable;
domain_unpause(d); /* causes guest to latch new status */
- ret = 0;
-setdebugging_out:
rcu_unlock_domain(d);
+ ret = 0;
}
break;
@@ -769,10 +726,6 @@ setdebugging_out:
if ( d == NULL )
break;
- ret = -EPERM;
- if ( !IS_PRIV_FOR(current->domain, d) )
- goto irq_permission_out;
-
ret = xsm_irq_permission(d, pirq, op->u.irq_permission.allow_access);
if ( ret )
goto irq_permission_out;
@@ -802,10 +755,6 @@ setdebugging_out:
if ( d == NULL )
break;
- ret = -EPERM;
- if ( !IS_PRIV_FOR(current->domain, d) )
- goto iomem_permission_out;
-
ret = xsm_iomem_permission(d, mfn, op->u.iomem_permission.allow_access);
if ( ret )
goto iomem_permission_out;
@@ -829,19 +778,16 @@ setdebugging_out:
if ( d == NULL )
break;
- ret = -EPERM;
- if ( !IS_PRIV_FOR(current->domain, d) )
- goto settimeoffset_out;
-
ret = xsm_domain_settime(d);
if ( ret )
- goto settimeoffset_out;
+ {
+ rcu_unlock_domain(d);
+ break;
+ }
d->time_offset_seconds = op->u.settimeoffset.time_offset_seconds;
-
- ret = 0;
-settimeoffset_out:
rcu_unlock_domain(d);
+ ret = 0;
}
break;
@@ -854,32 +800,24 @@ settimeoffset_out:
if ( d == NULL )
break;
- ret = -EPERM;
- if (!IS_PRIV_FOR(current->domain, d))
- goto set_target_out;
-
ret = -ESRCH;
e = get_domain_by_id(op->u.set_target.target);
if ( e == NULL )
goto set_target_out;
- if ( d == e ) {
- ret = -EINVAL;
- put_domain(e);
- goto set_target_out;
- }
-
- if (!IS_PRIV_FOR(current->domain, e)) {
- ret = -EPERM;
+ ret = -EINVAL;
+ if ( (d == e) || (d->target != NULL) )
+ {
put_domain(e);
goto set_target_out;
}
+ /* Hold reference on @e until we destroy @d. */
d->target = e;
- /* and we keep the reference on e, released when destroying d */
+
ret = 0;
-set_target_out:
+ set_target_out:
rcu_unlock_domain(d);
}
break;
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index 365adf4652..b385b54738 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -130,13 +130,17 @@ static long evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc)
long rc;
if ( dom == DOMID_SELF )
- d = current->domain;
- else {
+ {
+ d = rcu_lock_current_domain();
+ }
+ else
+ {
if ( (d = rcu_lock_domain_by_id(dom)) == NULL )
return -ESRCH;
- if ( !IS_PRIV_FOR(current->domain, d) ) {
- rc = -EPERM;
- goto out2;
+ if ( !IS_PRIV_FOR(current->domain, d) )
+ {
+ rcu_unlock_domain(d);
+ return -EPERM;
}
}
@@ -158,8 +162,6 @@ static long evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc)
out:
spin_unlock(&d->evtchn_lock);
-
- out2:
rcu_unlock_domain(d);
return rc;
@@ -201,7 +203,7 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
ERROR_EXIT_DOM(-EINVAL, rd);
rchn = evtchn_from_port(rd, rport);
if ( (rchn->state != ECS_UNBOUND) ||
- (rchn->u.unbound.remote_domid != ld->domain_id && !IS_PRIV_FOR(ld, rd)))
+ (rchn->u.unbound.remote_domid != ld->domain_id) )
ERROR_EXIT_DOM(-EINVAL, rd);
rc = xsm_evtchn_interdomain(ld, lchn, rd, rchn);
@@ -631,13 +633,17 @@ static long evtchn_status(evtchn_status_t *status)
long rc = 0;
if ( dom == DOMID_SELF )
- d = current->domain;
- else {
+ {
+ d = rcu_lock_current_domain();
+ }
+ else
+ {
if ( (d = rcu_lock_domain_by_id(dom)) == NULL )
return -ESRCH;
- if ( !IS_PRIV_FOR(current->domain, d) ) {
- rc = -EPERM;
- goto out2;
+ if ( !IS_PRIV_FOR(current->domain, d) )
+ {
+ rcu_unlock_domain(d);
+ return -EPERM;
}
}
@@ -690,8 +696,8 @@ static long evtchn_status(evtchn_status_t *status)
out:
spin_unlock(&d->evtchn_lock);
- out2:
rcu_unlock_domain(d);
+
return rc;
}
@@ -742,6 +748,7 @@ long evtchn_bind_vcpu(unsigned int port, unsigned int vcpu_id)
out:
spin_unlock(&d->evtchn_lock);
+
return rc;
}
@@ -784,15 +791,18 @@ static long evtchn_reset(evtchn_reset_t *r)
{
domid_t dom = r->dom;
struct domain *d;
- int i;
- int rc;
+ int i, rc;
if ( dom == DOMID_SELF )
- d = current->domain;
- else {
+ {
+ d = rcu_lock_current_domain();
+ }
+ else
+ {
if ( (d = rcu_lock_domain_by_id(dom)) == NULL )
return -ESRCH;
- if ( !IS_PRIV_FOR(current->domain, d) ) {
+ if ( !IS_PRIV_FOR(current->domain, d) )
+ {
rc = -EPERM;
goto out;
}
@@ -806,6 +816,7 @@ static long evtchn_reset(evtchn_reset_t *r)
(void)__evtchn_close(d, i);
rc = 0;
+
out:
rcu_unlock_domain(d);
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index 3f69e822fe..2dbcfab8b6 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -828,32 +828,34 @@ gnttab_setup_table(
" per domain.\n",
max_nr_grant_frames);
op.status = GNTST_general_error;
- goto out;
+ goto out1;
}
dom = op.dom;
if ( dom == DOMID_SELF )
{
- d = current->domain;
+ d = rcu_lock_current_domain();
}
- else {
+ else
+ {
if ( unlikely((d = rcu_lock_domain_by_id(dom)) == NULL) )
{
gdprintk(XENLOG_INFO, "Bad domid %d.\n", dom);
op.status = GNTST_bad_domain;
- goto out;
+ goto out1;
}
- if ( unlikely(!IS_PRIV_FOR(current->domain, d)) ) {
+
+ if ( unlikely(!IS_PRIV_FOR(current->domain, d)) )
+ {
op.status = GNTST_permission_denied;
- goto setup_unlock_out2;
+ goto out2;
}
}
if ( xsm_grant_setup(current->domain, d) )
{
- rcu_unlock_domain(d);
op.status = GNTST_permission_denied;
- goto out;
+ goto out2;
}
spin_lock(&d->grant_table->lock);
@@ -867,7 +869,7 @@ gnttab_setup_table(
nr_grant_frames(d->grant_table),
max_nr_grant_frames);
op.status = GNTST_general_error;
- goto setup_unlock_out;
+ goto out3;
}
op.status = GNTST_okay;
@@ -877,13 +879,11 @@ gnttab_setup_table(
(void)copy_to_guest_offset(op.frame_list, i, &gmfn, 1);
}
- setup_unlock_out:
+ out3:
spin_unlock(&d->grant_table->lock);
-
- setup_unlock_out2:
+ out2:
rcu_unlock_domain(d);
-
- out:
+ out1:
if ( unlikely(copy_to_guest(uop, &op, 1)) )
return -EFAULT;
@@ -911,16 +911,19 @@ gnttab_query_size(
dom = op.dom;
if ( dom == DOMID_SELF )
{
- d = current->domain;
+ d = rcu_lock_current_domain();
}
- else {
+ else
+ {
if ( unlikely((d = rcu_lock_domain_by_id(dom)) == NULL) )
{
gdprintk(XENLOG_INFO, "Bad domid %d.\n", dom);
op.status = GNTST_bad_domain;
goto query_out;
}
- if ( unlikely(!IS_PRIV_FOR(current->domain, d)) ) {
+
+ if ( unlikely(!IS_PRIV_FOR(current->domain, d)) )
+ {
op.status = GNTST_permission_denied;
goto query_out_unlock;
}
diff --git a/xen/common/memory.c b/xen/common/memory.c
index a3c2ad65e6..70a05d5367 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -232,12 +232,15 @@ static long translate_gpfn_list(
return -EFAULT;
if ( op.domid == DOMID_SELF )
- d = current->domain;
- else {
- d = rcu_lock_domain_by_id(op.domid);
- if ( d == NULL )
+ {
+ d = rcu_lock_current_domain();
+ }
+ else
+ {
+ if ( (d = rcu_lock_domain_by_id(op.domid)) == NULL )
return -ESRCH;
- if ( !IS_PRIV_FOR(current->domain, d) ) {
+ if ( !IS_PRIV_FOR(current->domain, d) )
+ {
rcu_unlock_domain(d);
return -EPERM;
}
@@ -539,12 +542,15 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg)
}
if ( likely(reservation.domid == DOMID_SELF) )
- d = current->domain;
- else {
- d = rcu_lock_domain_by_id(reservation.domid);
- if ( d == NULL)
+ {
+ d = rcu_lock_current_domain();
+ }
+ else
+ {
+ if ( (d = rcu_lock_domain_by_id(reservation.domid)) == NULL )
return start_extent;
- if ( !IS_PRIV_FOR(current->domain, d) ) {
+ if ( !IS_PRIV_FOR(current->domain, d) )
+ {
rcu_unlock_domain(d);
return start_extent;
}
@@ -554,8 +560,7 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg)
rc = xsm_memory_adjust_reservation(current->domain, d);
if ( rc )
{
- if ( reservation.domid != DOMID_SELF )
- rcu_unlock_domain(d);
+ rcu_unlock_domain(d);
return rc;
}
@@ -572,8 +577,7 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg)
break;
}
- if ( unlikely(reservation.domid != DOMID_SELF) )
- rcu_unlock_domain(d);
+ rcu_unlock_domain(d);
rc = args.nr_done;
@@ -599,12 +603,15 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg)
return -EFAULT;
if ( likely(domid == DOMID_SELF) )
- d = current->domain;
- else {
- d = rcu_lock_domain_by_id(domid);
- if ( d == NULL )
+ {
+ d = rcu_lock_current_domain();
+ }
+ else
+ {
+ if ( (d = rcu_lock_domain_by_id(domid)) == NULL )
return -ESRCH;
- if ( !IS_PRIV_FOR(current->domain, d) ) {
+ if ( !IS_PRIV_FOR(current->domain, d) )
+ {
rcu_unlock_domain(d);
return -EPERM;
}
@@ -613,8 +620,7 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg)
rc = xsm_memory_stat_reservation(current->domain, d);
if ( rc )
{
- if ( domid != DOMID_SELF )
- rcu_unlock_domain(d);
+ rcu_unlock_domain(d);
return rc;
}
@@ -632,8 +638,7 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg)
break;
}
- if ( unlikely(domid != DOMID_SELF) )
- rcu_unlock_domain(d);
+ rcu_unlock_domain(d);
break;
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 9224cbe798..fe58567263 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -270,32 +270,9 @@ static inline int hvm_do_pmu_interrupt(struct cpu_user_regs *regs)
#define X86_EVENTTYPE_SW_INTERRUPT 4 /* software interrupt */
#define X86_EVENTTYPE_SW_EXCEPTION 6 /* software exception */
-/*
- * Need to re-inject a given event? We avoid re-injecting software exceptions
- * and interrupts because the faulting/trapping instruction can simply be
- * re-executed (neither VMX nor SVM update RIP when they VMEXIT during
- * INT3/INTO/INTn).
- */
-static inline int hvm_event_needs_reinjection(uint8_t type, uint8_t vector)
-{
- switch ( type )
- {
- case X86_EVENTTYPE_EXT_INTR:
- case X86_EVENTTYPE_NMI:
- return 1;
- case X86_EVENTTYPE_HW_EXCEPTION:
- /*
- * SVM uses type 3 ("HW Exception") for #OF and #BP. We explicitly
- * check for these vectors, as they are really SW Exceptions. SVM has
- * not updated RIP to point after the trapping instruction (INT3/INTO).
- */
- return (vector != 3) && (vector != 4);
- default:
- /* Software exceptions/interrupts can be re-executed (e.g., INT n). */
- break;
- }
- return 0;
-}
+int hvm_event_needs_reinjection(uint8_t type, uint8_t vector);
+
+uint8_t hvm_combine_hw_exceptions(uint8_t vec1, uint8_t vec2);
static inline int hvm_cpu_up(void)
{
diff --git a/xen/include/asm-x86/hvm/io.h b/xen/include/asm-x86/hvm/io.h
index 8164742e01..9ef4f645a1 100644
--- a/xen/include/asm-x86/hvm/io.h
+++ b/xen/include/asm-x86/hvm/io.h
@@ -25,10 +25,9 @@
#include <public/hvm/ioreq.h>
#include <public/event_channel.h>
-#define MAX_IO_HANDLER 12
+#define MAX_IO_HANDLER 16
#define HVM_PORTIO 0
-#define HVM_MMIO 1
#define HVM_BUFFERED_IO 2
typedef unsigned long (*hvm_mmio_read_t)(struct vcpu *v,
@@ -96,14 +95,10 @@ static inline int register_buffered_io_handler(
return register_io_handler(d, addr, size, action, HVM_BUFFERED_IO);
}
-void send_mmio_req(unsigned char type, paddr_t gpa,
- unsigned long count, int size, paddr_t value,
- int dir, int df, int value_is_ptr);
-void send_pio_req(unsigned long port, unsigned long count, int size,
- paddr_t value, int dir, int df, int value_is_ptr);
void send_timeoffset_req(unsigned long timeoff);
void send_invalidate_req(void);
int handle_mmio(void);
+int handle_mmio_with_translation(unsigned long gva, unsigned long gpfn);
void hvm_interrupt_post(struct vcpu *v, int vector, int type);
void hvm_io_assist(void);
void hvm_dpci_eoi(struct domain *d, unsigned int guest_irq,
diff --git a/xen/include/asm-x86/hvm/support.h b/xen/include/asm-x86/hvm/support.h
index cbdea537fc..2ec00f2fb0 100644
--- a/xen/include/asm-x86/hvm/support.h
+++ b/xen/include/asm-x86/hvm/support.h
@@ -27,12 +27,6 @@
#include <asm/regs.h>
#include <asm/processor.h>
-#ifndef NDEBUG
-#define HVM_DEBUG 1
-#else
-#define HVM_DEBUG 1
-#endif
-
static inline vcpu_iodata_t *get_ioreq(struct vcpu *v)
{
struct domain *d = v->domain;
@@ -42,17 +36,9 @@ static inline vcpu_iodata_t *get_ioreq(struct vcpu *v)
return &p->vcpu_iodata[v->vcpu_id];
}
-/* XXX these are really VMX specific */
-#define TYPE_MOV_TO_DR (0 << 4)
-#define TYPE_MOV_FROM_DR (1 << 4)
-#define TYPE_MOV_TO_CR (0 << 4)
-#define TYPE_MOV_FROM_CR (1 << 4)
-#define TYPE_CLTS (2 << 4)
-#define TYPE_LMSW (3 << 4)
-
#define HVM_DELIVER_NO_ERROR_CODE -1
-#if HVM_DEBUG
+#ifndef NDEBUG
#define DBG_LEVEL_0 (1 << 0)
#define DBG_LEVEL_1 (1 << 1)
#define DBG_LEVEL_2 (1 << 2)
@@ -99,7 +85,11 @@ enum hvm_copy_result hvm_copy_from_guest_phys(
void *buf, paddr_t paddr, int size);
/*
- * Copy to/from a guest virtual address.
+ * Copy to/from a guest virtual address. @pfec should include PFEC_user_mode
+ * if emulating a user-mode access (CPL=3). All other flags in @pfec are
+ * managed by the called function: it is therefore optional for the caller
+ * to set them.
+ *
* Returns:
* HVMCOPY_okay: Copy was entirely successful.
* HVMCOPY_bad_gfn_to_mfn: Some guest physical address did not map to
@@ -110,25 +100,22 @@ enum hvm_copy_result hvm_copy_from_guest_phys(
* for injection into the current HVM VCPU.
*/
enum hvm_copy_result hvm_copy_to_guest_virt(
- unsigned long vaddr, void *buf, int size);
+ unsigned long vaddr, void *buf, int size, uint32_t pfec);
enum hvm_copy_result hvm_copy_from_guest_virt(
- void *buf, unsigned long vaddr, int size);
+ void *buf, unsigned long vaddr, int size, uint32_t pfec);
enum hvm_copy_result hvm_fetch_from_guest_virt(
- void *buf, unsigned long vaddr, int size);
+ void *buf, unsigned long vaddr, int size, uint32_t pfec);
/*
* As above (copy to/from a guest virtual address), but no fault is generated
* when HVMCOPY_bad_gva_to_gfn is returned.
*/
enum hvm_copy_result hvm_copy_to_guest_virt_nofault(
- unsigned long vaddr, void *buf, int size);
+ unsigned long vaddr, void *buf, int size, uint32_t pfec);
enum hvm_copy_result hvm_copy_from_guest_virt_nofault(
- void *buf, unsigned long vaddr, int size);
+ void *buf, unsigned long vaddr, int size, uint32_t pfec);
enum hvm_copy_result hvm_fetch_from_guest_virt_nofault(
- void *buf, unsigned long vaddr, int size);
-
-void hvm_print_line(struct vcpu *v, const char c);
-void hlt_timer_fn(void *data);
+ void *buf, unsigned long vaddr, int size, uint32_t pfec);
#define HVM_HCALL_completed 0 /* hypercall completed - no further action */
#define HVM_HCALL_preempted 1 /* hypercall preempted - re-execute VMCALL */
diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h
index 7439434b85..d3281d20dc 100644
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -29,6 +29,14 @@
#define HVM_VCPU_INIT_SIPI_SIPI_STATE_NORM 0
#define HVM_VCPU_INIT_SIPI_SIPI_STATE_WAIT_SIPI 1
+enum hvm_io_state {
+ HVMIO_none = 0,
+ HVMIO_dispatched,
+ HVMIO_awaiting_completion,
+ HVMIO_handle_mmio_awaiting_completion,
+ HVMIO_completed
+};
+
struct hvm_vcpu {
/* Guest control-register and EFER values, just as the guest sees them. */
unsigned long guest_cr[5];
@@ -70,10 +78,17 @@ struct hvm_vcpu {
u8 cache_mode;
/* I/O request in flight to device model. */
- bool_t mmio_in_progress;
- bool_t io_in_progress;
- bool_t io_completed;
+ enum hvm_io_state io_state;
unsigned long io_data;
+
+ /*
+ * HVM emulation:
+ * Virtual address @mmio_gva maps to MMIO physical frame @mmio_gpfn.
+ * The latter is known to be an MMIO frame (not RAM).
+ * This translation is only valid if @mmio_gva is non-zero.
+ */
+ unsigned long mmio_gva;
+ unsigned long mmio_gpfn;
};
#endif /* __ASM_X86_HVM_VCPU_H__ */
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h b/xen/include/asm-x86/hvm/vmx/vmx.h
index 94e4168fd7..1d2f37d4d1 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -95,35 +95,32 @@ void vmx_realmode(struct cpu_user_regs *regs);
/*
* Exit Qualifications for MOV for Control Register Access
*/
-#define CONTROL_REG_ACCESS_NUM 0xf /* 3:0, number of control register */
-#define CONTROL_REG_ACCESS_TYPE 0x30 /* 5:4, access type */
-#define CONTROL_REG_ACCESS_REG 0xf00 /* 10:8, general purpose register */
-#define LMSW_SOURCE_DATA (0xFFFF << 16) /* 16:31 lmsw source */
-#define REG_EAX (0 << 8)
-#define REG_ECX (1 << 8)
-#define REG_EDX (2 << 8)
-#define REG_EBX (3 << 8)
-#define REG_ESP (4 << 8)
-#define REG_EBP (5 << 8)
-#define REG_ESI (6 << 8)
-#define REG_EDI (7 << 8)
-#define REG_R8 (8 << 8)
-#define REG_R9 (9 << 8)
-#define REG_R10 (10 << 8)
-#define REG_R11 (11 << 8)
-#define REG_R12 (12 << 8)
-#define REG_R13 (13 << 8)
-#define REG_R14 (14 << 8)
-#define REG_R15 (15 << 8)
-
-/*
- * Exit Qualifications for MOV for Debug Register Access
- */
-#define DEBUG_REG_ACCESS_NUM 0x7 /* 2:0, number of debug register */
-#define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */
-#define TYPE_MOV_TO_DR (0 << 4)
-#define TYPE_MOV_FROM_DR (1 << 4)
-#define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose register */
+ /* 3:0 - control register number (CRn) */
+#define VMX_CONTROL_REG_ACCESS_NUM 0xf
+ /* 5:4 - access type (CR write, CR read, CLTS, LMSW) */
+#define VMX_CONTROL_REG_ACCESS_TYPE 0x30
+ /* 10:8 - general purpose register operand */
+#define VMX_CONTROL_REG_ACCESS_GPR 0xf00
+#define VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR (0 << 4)
+#define VMX_CONTROL_REG_ACCESS_TYPE_MOV_FROM_CR (1 << 4)
+#define VMX_CONTROL_REG_ACCESS_TYPE_CLTS (2 << 4)
+#define VMX_CONTROL_REG_ACCESS_TYPE_LMSW (3 << 4)
+#define VMX_CONTROL_REG_ACCESS_GPR_EAX (0 << 8)
+#define VMX_CONTROL_REG_ACCESS_GPR_ECX (1 << 8)
+#define VMX_CONTROL_REG_ACCESS_GPR_EDX (2 << 8)
+#define VMX_CONTROL_REG_ACCESS_GPR_EBX (3 << 8)
+#define VMX_CONTROL_REG_ACCESS_GPR_ESP (4 << 8)
+#define VMX_CONTROL_REG_ACCESS_GPR_EBP (5 << 8)
+#define VMX_CONTROL_REG_ACCESS_GPR_ESI (6 << 8)
+#define VMX_CONTROL_REG_ACCESS_GPR_EDI (7 << 8)
+#define VMX_CONTROL_REG_ACCESS_GPR_R8 (8 << 8)
+#define VMX_CONTROL_REG_ACCESS_GPR_R9 (9 << 8)
+#define VMX_CONTROL_REG_ACCESS_GPR_R10 (10 << 8)
+#define VMX_CONTROL_REG_ACCESS_GPR_R11 (11 << 8)
+#define VMX_CONTROL_REG_ACCESS_GPR_R12 (12 << 8)
+#define VMX_CONTROL_REG_ACCESS_GPR_R13 (13 << 8)
+#define VMX_CONTROL_REG_ACCESS_GPR_R14 (14 << 8)
+#define VMX_CONTROL_REG_ACCESS_GPR_R15 (15 << 8)
/*
* Access Rights
@@ -155,72 +152,72 @@ void vmx_realmode(struct cpu_user_regs *regs);
static inline void __vmptrld(u64 addr)
{
- __asm__ __volatile__ ( VMPTRLD_OPCODE
- MODRM_EAX_06
- /* CF==1 or ZF==1 --> crash (ud2) */
- "ja 1f ; ud2 ; 1:\n"
- :
- : "a" (&addr)
- : "memory");
+ asm volatile ( VMPTRLD_OPCODE
+ MODRM_EAX_06
+ /* CF==1 or ZF==1 --> crash (ud2) */
+ "ja 1f ; ud2 ; 1:\n"
+ :
+ : "a" (&addr)
+ : "memory");
}
static inline void __vmptrst(u64 addr)
{
- __asm__ __volatile__ ( VMPTRST_OPCODE
- MODRM_EAX_07
- :
- : "a" (&addr)
- : "memory");
+ asm volatile ( VMPTRST_OPCODE
+ MODRM_EAX_07
+ :
+ : "a" (&addr)
+ : "memory");
}
static inline void __vmpclear(u64 addr)
{
- __asm__ __volatile__ ( VMCLEAR_OPCODE
- MODRM_EAX_06
- /* CF==1 or ZF==1 --> crash (ud2) */
- "ja 1f ; ud2 ; 1:\n"
- :
- : "a" (&addr)
- : "memory");
+ asm volatile ( VMCLEAR_OPCODE
+ MODRM_EAX_06
+ /* CF==1 or ZF==1 --> crash (ud2) */
+ "ja 1f ; ud2 ; 1:\n"
+ :
+ : "a" (&addr)
+ : "memory");
}
static inline unsigned long __vmread(unsigned long field)
{
unsigned long ecx;
- __asm__ __volatile__ ( VMREAD_OPCODE
- MODRM_EAX_ECX
- /* CF==1 or ZF==1 --> crash (ud2) */
- "ja 1f ; ud2 ; 1:\n"
- : "=c" (ecx)
- : "a" (field)
- : "memory");
+ asm volatile ( VMREAD_OPCODE
+ MODRM_EAX_ECX
+ /* CF==1 or ZF==1 --> crash (ud2) */
+ "ja 1f ; ud2 ; 1:\n"
+ : "=c" (ecx)
+ : "a" (field)
+ : "memory");
return ecx;
}
static inline void __vmwrite(unsigned long field, unsigned long value)
{
- __asm__ __volatile__ ( VMWRITE_OPCODE
- MODRM_EAX_ECX
- /* CF==1 or ZF==1 --> crash (ud2) */
- "ja 1f ; ud2 ; 1:\n"
- :
- : "a" (field) , "c" (value)
- : "memory");
+ asm volatile ( VMWRITE_OPCODE
+ MODRM_EAX_ECX
+ /* CF==1 or ZF==1 --> crash (ud2) */
+ "ja 1f ; ud2 ; 1:\n"
+ :
+ : "a" (field) , "c" (value)
+ : "memory");
}
static inline unsigned long __vmread_safe(unsigned long field, int *error)
{
unsigned long ecx;
- __asm__ __volatile__ ( VMREAD_OPCODE
- MODRM_EAX_ECX
- /* CF==1 or ZF==1 --> rc = -1 */
- "setna %b0 ; neg %0"
- : "=q" (*error), "=c" (ecx)
- : "0" (0), "a" (field)
- : "memory");
+ asm volatile ( VMREAD_OPCODE
+ MODRM_EAX_ECX
+ /* CF==1 or ZF==1 --> rc = -1 */
+ "setna %b0 ; neg %0"
+ : "=q" (*error), "=c" (ecx)
+ : "0" (0), "a" (field)
+ : "memory");
return ecx;
}
@@ -264,49 +261,8 @@ static inline int __vmxon(u64 addr)
return rc;
}
-static inline void __vmx_inject_exception(
- struct vcpu *v, int trap, int type, int error_code)
-{
- unsigned long intr_fields;
-
- /*
- * NB. Callers do not need to worry about clearing STI/MOV-SS blocking:
- * "If the VM entry is injecting, there is no blocking by STI or by
- * MOV SS following the VM entry, regardless of the contents of the
- * interruptibility-state field [in the guest-state area before the
- * VM entry]", PRM Vol. 3, 22.6.1 (Interruptibility State).
- */
-
- intr_fields = (INTR_INFO_VALID_MASK | (type<<8) | trap);
- if ( error_code != HVM_DELIVER_NO_ERROR_CODE ) {
- __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
- intr_fields |= INTR_INFO_DELIVER_CODE_MASK;
- }
-
- __vmwrite(VM_ENTRY_INTR_INFO, intr_fields);
-
- if ( trap == TRAP_page_fault )
- HVMTRACE_2D(PF_INJECT, v, v->arch.hvm_vcpu.guest_cr[2], error_code);
- else
- HVMTRACE_2D(INJ_EXC, v, trap, error_code);
-}
-
-static inline void vmx_inject_hw_exception(
- struct vcpu *v, int trap, int error_code)
-{
- __vmx_inject_exception(v, trap, X86_EVENTTYPE_HW_EXCEPTION, error_code);
-}
-
-static inline void vmx_inject_extint(struct vcpu *v, int trap)
-{
- __vmx_inject_exception(v, trap, X86_EVENTTYPE_EXT_INTR,
- HVM_DELIVER_NO_ERROR_CODE);
-}
-
-static inline void vmx_inject_nmi(struct vcpu *v)
-{
- __vmx_inject_exception(v, 2, X86_EVENTTYPE_NMI,
- HVM_DELIVER_NO_ERROR_CODE);
-}
+void vmx_inject_hw_exception(struct vcpu *v, int trap, int error_code);
+void vmx_inject_extint(struct vcpu *v, int trap);
+void vmx_inject_nmi(struct vcpu *v);
#endif /* __ASM_X86_HVM_VMX_VMX_H__ */
diff --git a/xen/include/asm-x86/x86_emulate.h b/xen/include/asm-x86/x86_emulate.h
index 4ffdac75f3..d87966a22b 100644
--- a/xen/include/asm-x86/x86_emulate.h
+++ b/xen/include/asm-x86/x86_emulate.h
@@ -1,401 +1,22 @@
/******************************************************************************
* x86_emulate.h
*
- * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
+ * Wrapper for generic x86 instruction decoder and emulator.
*
- * Copyright (c) 2005-2007 Keir Fraser
- * Copyright (c) 2005-2007 XenSource Inc.
+ * Copyright (c) 2008, Citrix Systems, Inc.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef __X86_EMULATE_H__
-#define __X86_EMULATE_H__
-
-struct x86_emulate_ctxt;
-
-/* Comprehensive enumeration of x86 segment registers. */
-enum x86_segment {
- /* General purpose. */
- x86_seg_cs,
- x86_seg_ss,
- x86_seg_ds,
- x86_seg_es,
- x86_seg_fs,
- x86_seg_gs,
- /* System. */
- x86_seg_tr,
- x86_seg_ldtr,
- x86_seg_gdtr,
- x86_seg_idtr,
- /*
- * Dummy: used to emulate direct processor accesses to management
- * structures (TSS, GDT, LDT, IDT, etc.) which use linear addressing
- * (no segment component) and bypass usual segment- and page-level
- * protection checks.
- */
- x86_seg_none
-};
-
-#define is_x86_user_segment(seg) ((unsigned)(seg) <= x86_seg_gs)
-
-/*
- * Attribute for segment selector. This is a copy of bit 40:47 & 52:55 of the
- * segment descriptor. It happens to match the format of an AMD SVM VMCB.
- */
-typedef union segment_attributes {
- uint16_t bytes;
- struct
- {
- uint16_t type:4; /* 0; Bit 40-43 */
- uint16_t s: 1; /* 4; Bit 44 */
- uint16_t dpl: 2; /* 5; Bit 45-46 */
- uint16_t p: 1; /* 7; Bit 47 */
- uint16_t avl: 1; /* 8; Bit 52 */
- uint16_t l: 1; /* 9; Bit 53 */
- uint16_t db: 1; /* 10; Bit 54 */
- uint16_t g: 1; /* 11; Bit 55 */
- } fields;
-} __attribute__ ((packed)) segment_attributes_t;
-
-/*
- * Full state of a segment register (visible and hidden portions).
- * Again, this happens to match the format of an AMD SVM VMCB.
- */
-struct segment_register {
- uint16_t sel;
- segment_attributes_t attr;
- uint32_t limit;
- uint64_t base;
-} __attribute__ ((packed));
-
-/*
- * Return codes from state-accessor functions and from x86_emulate().
+ * Authors:
+ * Keir Fraser <keir.fraser@citrix.com>
*/
- /* Completed successfully. State modified appropriately. */
-#define X86EMUL_OKAY 0
- /* Unhandleable access or emulation. No state modified. */
-#define X86EMUL_UNHANDLEABLE 1
- /* Exception raised and requires delivery. */
-#define X86EMUL_EXCEPTION 2
- /* Retry the emulation for some reason. No state modified. */
-#define X86EMUL_RETRY 3
- /* (cmpxchg accessor): CMPXCHG failed. Maps to X86EMUL_RETRY in caller. */
-#define X86EMUL_CMPXCHG_FAILED 3
-
-/*
- * These operations represent the instruction emulator's interface to memory.
- *
- * NOTES:
- * 1. If the access fails (cannot emulate, or a standard access faults) then
- * it is up to the memop to propagate the fault to the guest VM via
- * some out-of-band mechanism, unknown to the emulator. The memop signals
- * failure by returning X86EMUL_EXCEPTION to the emulator, which will
- * then immediately bail.
- * 2. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only
- * cmpxchg8b_emulated need support 8-byte accesses.
- * 3. The emulator cannot handle 64-bit mode emulation on an x86/32 system.
- */
-struct x86_emulate_ops
-{
- /*
- * All functions:
- * @ctxt: [IN ] Emulation context info as passed to the emulator.
- * All memory-access functions:
- * @seg: [IN ] Segment being dereferenced (specified as x86_seg_??).
- * @offset:[IN ] Offset within segment.
- * Read functions:
- * @val: [OUT] Value read, zero-extended to 'ulong'.
- * Write functions:
- * @val: [IN ] Value to write (low-order bytes used as req'd).
- * Variable-length access functions:
- * @bytes: [IN ] Number of bytes to read or write.
- */
-
- /* read: Emulate a memory read. */
- int (*read)(
- enum x86_segment seg,
- unsigned long offset,
- unsigned long *val,
- unsigned int bytes,
- struct x86_emulate_ctxt *ctxt);
-
- /*
- * insn_fetch: Emulate fetch from instruction byte stream.
- * Parameters are same as for 'read'. @seg is always x86_seg_cs.
- */
- int (*insn_fetch)(
- enum x86_segment seg,
- unsigned long offset,
- unsigned long *val,
- unsigned int bytes,
- struct x86_emulate_ctxt *ctxt);
-
- /* write: Emulate a memory write. */
- int (*write)(
- enum x86_segment seg,
- unsigned long offset,
- unsigned long val,
- unsigned int bytes,
- struct x86_emulate_ctxt *ctxt);
-
- /*
- * cmpxchg: Emulate an atomic (LOCKed) CMPXCHG operation.
- * @old: [IN ] Value expected to be current at @addr.
- * @new: [IN ] Value to write to @addr.
- */
- int (*cmpxchg)(
- enum x86_segment seg,
- unsigned long offset,
- unsigned long old,
- unsigned long new,
- unsigned int bytes,
- struct x86_emulate_ctxt *ctxt);
-
- /*
- * cmpxchg8b: Emulate an atomic (LOCKed) CMPXCHG8B operation.
- * @old: [IN ] Value expected to be current at @addr.
- * @new: [IN ] Value to write to @addr.
- * NOTES:
- * 1. This function is only ever called when emulating a real CMPXCHG8B.
- * 2. This function is *never* called on x86/64 systems.
- * 2. Not defining this function (i.e., specifying NULL) is equivalent
- * to defining a function that always returns X86EMUL_UNHANDLEABLE.
- */
- int (*cmpxchg8b)(
- enum x86_segment seg,
- unsigned long offset,
- unsigned long old_lo,
- unsigned long old_hi,
- unsigned long new_lo,
- unsigned long new_hi,
- struct x86_emulate_ctxt *ctxt);
-
- /*
- * rep_ins: Emulate INS: <src_port> -> <dst_seg:dst_offset>.
- * @bytes_per_rep: [IN ] Bytes transferred per repetition.
- * @reps: [IN ] Maximum repetitions to be emulated.
- * [OUT] Number of repetitions actually emulated.
- */
- int (*rep_ins)(
- uint16_t src_port,
- enum x86_segment dst_seg,
- unsigned long dst_offset,
- unsigned int bytes_per_rep,
- unsigned long *reps,
- struct x86_emulate_ctxt *ctxt);
-
- /*
- * rep_outs: Emulate OUTS: <src_seg:src_offset> -> <dst_port>.
- * @bytes_per_rep: [IN ] Bytes transferred per repetition.
- * @reps: [IN ] Maximum repetitions to be emulated.
- * [OUT] Number of repetitions actually emulated.
- */
- int (*rep_outs)(
- enum x86_segment src_seg,
- unsigned long src_offset,
- uint16_t dst_port,
- unsigned int bytes_per_rep,
- unsigned long *reps,
- struct x86_emulate_ctxt *ctxt);
-
- /*
- * rep_movs: Emulate MOVS: <src_seg:src_offset> -> <dst_seg:dst_offset>.
- * @bytes_per_rep: [IN ] Bytes transferred per repetition.
- * @reps: [IN ] Maximum repetitions to be emulated.
- * [OUT] Number of repetitions actually emulated.
- */
- int (*rep_movs)(
- enum x86_segment src_seg,
- unsigned long src_offset,
- enum x86_segment dst_seg,
- unsigned long dst_offset,
- unsigned int bytes_per_rep,
- unsigned long *reps,
- struct x86_emulate_ctxt *ctxt);
- /*
- * read_segment: Emulate a read of full context of a segment register.
- * @reg: [OUT] Contents of segment register (visible and hidden state).
- */
- int (*read_segment)(
- enum x86_segment seg,
- struct segment_register *reg,
- struct x86_emulate_ctxt *ctxt);
+#ifndef __ASM_X86_X86_EMULATE_H__
+#define __ASM_X86_X86_EMULATE_H__
- /*
- * write_segment: Emulate a read of full context of a segment register.
- * @reg: [OUT] Contents of segment register (visible and hidden state).
- */
- int (*write_segment)(
- enum x86_segment seg,
- struct segment_register *reg,
- struct x86_emulate_ctxt *ctxt);
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/lib.h>
+#include <asm/regs.h>
- /*
- * read_io: Read from I/O port(s).
- * @port: [IN ] Base port for access.
- */
- int (*read_io)(
- unsigned int port,
- unsigned int bytes,
- unsigned long *val,
- struct x86_emulate_ctxt *ctxt);
-
- /*
- * write_io: Write to I/O port(s).
- * @port: [IN ] Base port for access.
- */
- int (*write_io)(
- unsigned int port,
- unsigned int bytes,
- unsigned long val,
- struct x86_emulate_ctxt *ctxt);
-
- /*
- * read_cr: Read from control register.
- * @reg: [IN ] Register to read (0-15).
- */
- int (*read_cr)(
- unsigned int reg,
- unsigned long *val,
- struct x86_emulate_ctxt *ctxt);
-
- /*
- * write_cr: Write to control register.
- * @reg: [IN ] Register to write (0-15).
- */
- int (*write_cr)(
- unsigned int reg,
- unsigned long val,
- struct x86_emulate_ctxt *ctxt);
-
- /*
- * read_dr: Read from debug register.
- * @reg: [IN ] Register to read (0-15).
- */
- int (*read_dr)(
- unsigned int reg,
- unsigned long *val,
- struct x86_emulate_ctxt *ctxt);
-
- /*
- * write_dr: Write to debug register.
- * @reg: [IN ] Register to write (0-15).
- */
- int (*write_dr)(
- unsigned int reg,
- unsigned long val,
- struct x86_emulate_ctxt *ctxt);
-
- /*
- * read_msr: Read from model-specific register.
- * @reg: [IN ] Register to read.
- */
- int (*read_msr)(
- unsigned long reg,
- uint64_t *val,
- struct x86_emulate_ctxt *ctxt);
-
- /*
- * write_dr: Write to model-specific register.
- * @reg: [IN ] Register to write.
- */
- int (*write_msr)(
- unsigned long reg,
- uint64_t val,
- struct x86_emulate_ctxt *ctxt);
-
- /* wbinvd: Write-back and invalidate cache contents. */
- int (*wbinvd)(
- struct x86_emulate_ctxt *ctxt);
-
- /* cpuid: Emulate CPUID via given set of EAX-EDX inputs/outputs. */
- int (*cpuid)(
- unsigned int *eax,
- unsigned int *ebx,
- unsigned int *ecx,
- unsigned int *edx,
- struct x86_emulate_ctxt *ctxt);
-
- /* inject_hw_exception */
- int (*inject_hw_exception)(
- uint8_t vector,
- int32_t error_code,
- struct x86_emulate_ctxt *ctxt);
-
- /* inject_sw_interrupt */
- int (*inject_sw_interrupt)(
- uint8_t vector,
- uint8_t insn_len,
- struct x86_emulate_ctxt *ctxt);
-
- /* load_fpu_ctxt: Load emulated environment's FPU state onto processor. */
- void (*load_fpu_ctxt)(
- struct x86_emulate_ctxt *ctxt);
-
- /* invlpg: Invalidate paging structures which map addressed byte. */
- int (*invlpg)(
- enum x86_segment seg,
- unsigned long offset,
- struct x86_emulate_ctxt *ctxt);
-};
-
-struct cpu_user_regs;
-
-struct x86_emulate_ctxt
-{
- /* Register state before/after emulation. */
- struct cpu_user_regs *regs;
-
- /* Default address size in current execution mode (16, 32, or 64). */
- unsigned int addr_size;
-
- /* Stack pointer width in bits (16, 32 or 64). */
- unsigned int sp_size;
-
- /* Set this if writes may have side effects. */
- uint8_t force_writeback;
-
- /* Retirement state, set by the emulator (valid only on X86EMUL_OKAY). */
- union {
- struct {
- uint8_t hlt:1; /* Instruction HLTed. */
- uint8_t mov_ss:1; /* Instruction sets MOV-SS irq shadow. */
- uint8_t sti:1; /* Instruction sets STI irq shadow. */
- } flags;
- uint8_t byte;
- } retire;
-};
-
-/*
- * x86_emulate: Emulate an instruction.
- * Returns -1 on failure, 0 on success.
- */
-int
-x86_emulate(
- struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops);
-
-/*
- * Given the 'reg' portion of a ModRM byte, and a register block, return a
- * pointer into the block that addresses the relevant register.
- * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
- */
-void *
-decode_register(
- uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs);
+#include "../../arch/x86/x86_emulate/x86_emulate.h"
-#endif /* __X86_EMULATE_H__ */
+#endif /* __ASM_X86_X86_EMULATE_H__ */
diff --git a/xen/include/public/arch-ia64.h b/xen/include/public/arch-ia64.h
index 76cb70fb1a..7a779d6005 100644
--- a/xen/include/public/arch-ia64.h
+++ b/xen/include/public/arch-ia64.h
@@ -23,9 +23,15 @@
*
*/
+#include "xen.h"
+
#ifndef __HYPERVISOR_IF_IA64_H__
#define __HYPERVISOR_IF_IA64_H__
+#if !defined(__GNUC__) || defined(__STRICT_ANSI__)
+#error "Anonymous structs/unions are a GNU extension."
+#endif
+
/* Structural guest handles introduced in 0x00030201. */
#if __XEN_INTERFACE_VERSION__ >= 0x00030201
#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
@@ -68,8 +74,6 @@ typedef unsigned long xen_pfn_t;
#ifndef __ASSEMBLY__
-#define __anonymous_union __extension__ union
-
typedef unsigned long xen_ulong_t;
#ifdef __XEN_TOOLS__
@@ -123,11 +127,11 @@ struct mapped_regs {
unsigned long reserved1[29];
unsigned long vhpi;
unsigned long reserved2[95];
- __anonymous_union {
+ union {
unsigned long vgr[16];
unsigned long bank1_regs[16]; // bank1 regs (r16-r31) when bank0 active
};
- __anonymous_union {
+ union {
unsigned long vbgr[16];
unsigned long bank0_regs[16]; // bank0 regs (r16-r31) when bank1 active
};
@@ -138,7 +142,7 @@ struct mapped_regs {
unsigned long vpsr;
unsigned long vpr;
unsigned long reserved4[76];
- __anonymous_union {
+ union {
unsigned long vcr[128];
struct {
unsigned long dcr; // CR0
@@ -172,7 +176,7 @@ struct mapped_regs {
unsigned long rsv6[46];
};
};
- __anonymous_union {
+ union {
unsigned long reserved5[128];
struct {
unsigned long precover_ifs;
@@ -569,7 +573,7 @@ struct xen_ia64_boot_param {
struct xen_ia64_opt_feature {
unsigned long cmd; /* Which feature */
unsigned char on; /* Switch feature on/off */
- __anonymous_union {
+ union {
struct {
/* The page protection bit mask of the pte.
* This will be or'ed with the pte. */
diff --git a/xen/include/public/arch-powerpc.h b/xen/include/public/arch-powerpc.h
index be668baec2..d92532f3ce 100644
--- a/xen/include/public/arch-powerpc.h
+++ b/xen/include/public/arch-powerpc.h
@@ -22,6 +22,8 @@
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
*/
+#include "xen.h"
+
#ifndef __XEN_PUBLIC_ARCH_PPC_64_H__
#define __XEN_PUBLIC_ARCH_PPC_64_H__
diff --git a/xen/include/public/arch-x86/xen-x86_64.h b/xen/include/public/arch-x86/xen-x86_64.h
index d07f03904b..1e54cf92a7 100644
--- a/xen/include/public/arch-x86/xen-x86_64.h
+++ b/xen/include/public/arch-x86/xen-x86_64.h
@@ -140,9 +140,9 @@ struct iret_context {
/* Bottom of iret stack frame. */
};
-#ifdef __GNUC__
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */
-#define __DECL_REG(name) __extension__ union { \
+#define __DECL_REG(name) union { \
uint64_t r ## name, e ## name; \
uint32_t _e ## name; \
}
diff --git a/xen/include/public/arch-x86/xen.h b/xen/include/public/arch-x86/xen.h
index 0638901b1f..5f7579aab1 100644
--- a/xen/include/public/arch-x86/xen.h
+++ b/xen/include/public/arch-x86/xen.h
@@ -24,6 +24,8 @@
* Copyright (c) 2004-2006, K A Fraser
*/
+#include "../xen.h"
+
#ifndef __XEN_PUBLIC_ARCH_X86_XEN_H__
#define __XEN_PUBLIC_ARCH_X86_XEN_H__
diff --git a/xen/include/public/hvm/save.h b/xen/include/public/hvm/save.h
index 3ffe182a50..d45f0c1115 100644
--- a/xen/include/public/hvm/save.h
+++ b/xen/include/public/hvm/save.h
@@ -39,6 +39,10 @@
* Internal mechanisms should be kept in Xen-private headers.
*/
+#if !defined(__GNUC__) || defined(__STRICT_ANSI__)
+#error "Anonymous structs/unions are a GNU extension."
+#endif
+
/*
* Each entry is preceded by a descriptor giving its type and length
*/
diff --git a/xen/include/public/io/fbif.h b/xen/include/public/io/fbif.h
index 261d756113..aecd1cd810 100644
--- a/xen/include/public/io/fbif.h
+++ b/xen/include/public/io/fbif.h
@@ -50,12 +50,29 @@ struct xenfb_update
int32_t height; /* rect height */
};
+/*
+ * Framebuffer resize notification event
+ * Capable backend sets feature-resize in xenstore.
+ */
+#define XENFB_TYPE_RESIZE 3
+
+struct xenfb_resize
+{
+ uint8_t type; /* XENFB_TYPE_RESIZE */
+ int32_t width; /* width in pixels */
+ int32_t height; /* height in pixels */
+ int32_t stride; /* stride in bytes */
+ int32_t depth; /* depth in bits */
+ int32_t offset; /* offset of the framebuffer in bytes */
+};
+
#define XENFB_OUT_EVENT_SIZE 40
union xenfb_out_event
{
uint8_t type;
struct xenfb_update update;
+ struct xenfb_resize resize;
char pad[XENFB_OUT_EVENT_SIZE];
};
@@ -109,15 +126,17 @@ struct xenfb_page
* Each directory page holds PAGE_SIZE / sizeof(*pd)
* framebuffer pages, and can thus map up to PAGE_SIZE *
* PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and
- * sizeof(unsigned long) == 4, that's 4 Megs. Two directory
- * pages should be enough for a while.
+ * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 Megs
+ * 64 bit. 256 directories give enough room for a 512 Meg
+ * framebuffer with a max resolution of 12,800x10,240. Should
+ * be enough for a while with room leftover for expansion.
*/
- unsigned long pd[2];
+ unsigned long pd[256];
};
/*
- * Wart: xenkbd needs to know resolution. Put it here until a better
- * solution is found, but don't leak it to the backend.
+ * Wart: xenkbd needs to know default resolution. Put it here until a
+ * better solution is found, but don't leak it to the backend.
*/
#ifdef __KERNEL__
#define XENFB_WIDTH 800
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index f56f05777a..b66c02190c 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -84,8 +84,13 @@ DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tbuf_op_t);
* Get physical information about the host machine
*/
#define XEN_SYSCTL_physinfo 3
+ /* (x86) The platform supports HVM guests. */
+#define _XEN_SYSCTL_PHYSCAP_hvm 0
+#define XEN_SYSCTL_PHYSCAP_hvm (1u<<_XEN_SYSCTL_PHYSCAP_hvm)
+ /* (x86) The platform supports HVM-guest direct access to I/O devices. */
+#define _XEN_SYSCTL_PHYSCAP_hvm_directio 1
+#define XEN_SYSCTL_PHYSCAP_hvm_directio (1u<<_XEN_SYSCTL_PHYSCAP_hvm_directio)
struct xen_sysctl_physinfo {
- /* IN variables. */
uint32_t threads_per_core;
uint32_t cores_per_socket;
uint32_t nr_cpus;
@@ -96,7 +101,6 @@ struct xen_sysctl_physinfo {
uint64_aligned_t scrub_pages;
uint32_t hw_cap[8];
- /* IN/OUT variables. */
/*
* IN: maximum addressable entry in the caller-provided cpu_to_node array.
* OUT: largest cpu identifier in the system.
@@ -112,6 +116,9 @@ struct xen_sysctl_physinfo {
* elements of the array will not be written by the sysctl.
*/
XEN_GUEST_HANDLE_64(uint32) cpu_to_node;
+
+ /* XEN_SYSCTL_PHYSCAP_??? */
+ uint32_t capabilities;
};
typedef struct xen_sysctl_physinfo xen_sysctl_physinfo_t;
DEFINE_XEN_GUEST_HANDLE(xen_sysctl_physinfo_t);
diff --git a/xen/include/public/xsm/acm.h b/xen/include/public/xsm/acm.h
index 09e7879238..a66d399c6c 100644
--- a/xen/include/public/xsm/acm.h
+++ b/xen/include/public/xsm/acm.h
@@ -102,6 +102,7 @@ typedef uint32_t ssidref_t;
#define ACMHOOK_none 0
#define ACMHOOK_sharing 1
#define ACMHOOK_authorization 2
+#define ACMHOOK_conflictset 3
/* -------security policy relevant type definitions-------- */
diff --git a/xen/include/xen/hvm/save.h b/xen/include/xen/hvm/save.h
index 2842cf0839..723369e248 100644
--- a/xen/include/xen/hvm/save.h
+++ b/xen/include/xen/hvm/save.h
@@ -18,6 +18,7 @@
#ifndef __XEN_HVM_SAVE_H__
#define __XEN_HVM_SAVE_H__
+#include <public/xen.h>
#include <public/hvm/save.h>
#include <asm/types.h>
diff --git a/xen/include/xsm/acm/acm_hooks.h b/xen/include/xsm/acm/acm_hooks.h
index 35ea144378..19e37f9e2e 100644
--- a/xen/include/xsm/acm/acm_hooks.h
+++ b/xen/include/xsm/acm/acm_hooks.h
@@ -116,6 +116,7 @@ struct acm_operations {
ssidref_t ssidref2);
int (*authorization) (ssidref_t ssidref1,
ssidref_t ssidref2);
+ int (*conflictset) (ssidref_t ssidref1);
/* determine whether the default policy is installed */
int (*is_default_policy) (void);
};
@@ -151,6 +152,8 @@ static inline int acm_sharing(ssidref_t ssidref1, ssidref_t ssidref2)
{ return 0; }
static inline int acm_authorization(ssidref_t ssidref1, ssidref_t ssidref2)
{ return 0; }
+static inline int acm_conflictset(ssidref_t ssidref1)
+{ return 0; }
static inline int acm_domain_create(struct domain *d, ssidref_t ssidref)
{ return 0; }
static inline void acm_domain_destroy(struct domain *d)
@@ -329,6 +332,17 @@ static inline int acm_authorization(ssidref_t ssidref1, ssidref_t ssidref2)
}
+static inline int acm_conflictset(ssidref_t ssidref1)
+{
+ if ((acm_primary_ops->conflictset != NULL) &&
+ acm_primary_ops->conflictset(ssidref1))
+ return ACM_ACCESS_DENIED;
+ else if ((acm_secondary_ops->conflictset != NULL) &&
+ acm_secondary_ops->conflictset(ssidref1))
+ return ACM_ACCESS_DENIED;
+ return ACM_ACCESS_PERMITTED;
+}
+
/* Return true iff buffer has an acm policy magic number. */
extern int acm_is_policy(char *buf, unsigned long len);
diff --git a/xen/xsm/acm/acm_chinesewall_hooks.c b/xen/xsm/acm/acm_chinesewall_hooks.c
index a6e2eb3949..65e60e7cb4 100644
--- a/xen/xsm/acm/acm_chinesewall_hooks.c
+++ b/xen/xsm/acm/acm_chinesewall_hooks.c
@@ -641,6 +641,41 @@ static int chwall_is_default_policy(void)
(chwall_bin_pol.max_ssidrefs == 2 ) );
}
+
+static int chwall_is_in_conflictset(ssidref_t ssidref1)
+{
+ /* is ssidref1 in conflict with any running domains ? */
+ int rc = 0;
+ int i, j;
+ ssidref_t ssid_chwall;
+
+ read_lock(&acm_bin_pol_rwlock);
+
+ ssid_chwall = GET_SSIDREF(ACM_CHINESE_WALL_POLICY, ssidref1);
+
+ if ( ssid_chwall >= 0 && ssid_chwall < chwall_bin_pol.max_ssidrefs ) {
+ for ( i = 0; i < chwall_bin_pol.max_conflictsets && rc == 0; i++ ) {
+ for ( j = 0; j < chwall_bin_pol.max_types; j++ ) {
+ if ( chwall_bin_pol.conflict_aggregate_set
+ [i * chwall_bin_pol.max_types + j] &&
+ chwall_bin_pol.ssidrefs
+ [ssid_chwall * chwall_bin_pol.max_types + j])
+ {
+ rc = 1;
+ break;
+ }
+ }
+ }
+ } else {
+ rc = 1;
+ }
+
+ read_unlock(&acm_bin_pol_rwlock);
+
+ return rc;
+}
+
+
struct acm_operations acm_chinesewall_ops = {
/* policy management services */
.init_domain_ssid = chwall_init_domain_ssid,
@@ -666,6 +701,7 @@ struct acm_operations acm_chinesewall_ops = {
/* generic domain-requested decision hooks */
.sharing = NULL,
.authorization = NULL,
+ .conflictset = chwall_is_in_conflictset,
.is_default_policy = chwall_is_default_policy,
};
diff --git a/xen/xsm/acm/acm_policy.c b/xen/xsm/acm/acm_policy.c
index a7eff56903..3f66e8ae73 100644
--- a/xen/xsm/acm/acm_policy.c
+++ b/xen/xsm/acm/acm_policy.c
@@ -446,6 +446,9 @@ acm_get_decision(ssidref_t ssidref1, ssidref_t ssidref2, u32 hook)
ret = acm_authorization(ssidref1, ssidref2);
break;
+ case ACMHOOK_conflictset:
+ ret = acm_conflictset(ssidref1);
+
default:
/* deny */
break;
diff --git a/xen/xsm/acm/acm_simple_type_enforcement_hooks.c b/xen/xsm/acm/acm_simple_type_enforcement_hooks.c
index d58c49fcd4..01eae51bb2 100644
--- a/xen/xsm/acm/acm_simple_type_enforcement_hooks.c
+++ b/xen/xsm/acm/acm_simple_type_enforcement_hooks.c
@@ -899,8 +899,10 @@ struct acm_operations acm_simple_type_enforcement_ops = {
.fail_grant_map_ref = NULL,
.pre_grant_setup = ste_pre_grant_setup,
.fail_grant_setup = NULL,
+ /* generic domain-requested decision hooks */
.sharing = ste_sharing,
.authorization = ste_authorization,
+ .conflictset = NULL,
.is_default_policy = ste_is_default_policy,
};