diff options
author | awilliam@xenbuild2.aw <awilliam@xenbuild2.aw> | 2007-01-10 08:40:47 -0700 |
---|---|---|
committer | awilliam@xenbuild2.aw <awilliam@xenbuild2.aw> | 2007-01-10 08:40:47 -0700 |
commit | a77c31c882ce9be75f2c4b8e09405bd1c9fd1cb9 (patch) | |
tree | 6765cb9b976e23af888506ec7a2456528ca56fff | |
parent | 6ed3af384fe8be0982eaf8ab841b41ed578761a7 (diff) | |
parent | 5fc32d22f2975d14e23fdbdab6db2e6297a47869 (diff) | |
download | xen-a77c31c882ce9be75f2c4b8e09405bd1c9fd1cb9.tar.gz xen-a77c31c882ce9be75f2c4b8e09405bd1c9fd1cb9.tar.bz2 xen-a77c31c882ce9be75f2c4b8e09405bd1c9fd1cb9.zip |
merge with xen-unstable.hg
341 files changed, 14143 insertions, 5990 deletions
@@ -20,6 +20,7 @@ ^\.config$ ^TAGS$ ^tags$ +^build.*$ ^dist/.*$ ^docs/.*\.aux$ ^docs/.*\.dvi$ @@ -205,6 +206,7 @@ ^xen/ddb/.*$ ^xen/include/asm$ ^xen/include/asm-.*/asm-offsets\.h$ +^xen/include/compat/.*$ ^xen/include/hypervisor-ifs/arch$ ^xen/include/public/public$ ^xen/include/xen/.*\.new$ @@ -16,3 +16,4 @@ c8fdb0caa77b429cf47f9707926e83947778cb48 RELEASE-3.0.0 af0573e9e5258db0a9d28aa954dd302ddd2c2d23 3.0.2-rc d0d3fef37685be264a7f52201f8ef44c030daad3 3.0.2-branched 6ed4368b4a9e1924c983774c4b1a2b6baf8e98a6 3.0.3-branched +057f7c4dbed1c75a3fbe446d346cee04cff31497 3.0.4-branched @@ -11,6 +11,8 @@ XEN_OS ?= $(shell uname -s) CONFIG_$(XEN_OS) := y +SHELL ?= /bin/sh + # Tools to run on system hosting the build HOSTCC = gcc HOSTCFLAGS = -Wall -Werror -Wstrict-prototypes -O2 -fomit-frame-pointer diff --git a/buildconfigs/mk.linux-2.6-xen b/buildconfigs/mk.linux-2.6-xen index 9a1e47a332..272a58479d 100644 --- a/buildconfigs/mk.linux-2.6-xen +++ b/buildconfigs/mk.linux-2.6-xen @@ -3,7 +3,8 @@ LINUX_VER = 2.6.16.33 EXTRAVERSION ?= xen -LINUX_DIR = linux-$(LINUX_VER)-$(EXTRAVERSION) +LINUX_SRCDIR = linux-$(LINUX_VER)-xen +LINUX_DIR = build-linux-$(LINUX_VER)-$(EXTRAVERSION)_$(XEN_TARGET_ARCH) IMAGE_TARGET ?= vmlinuz INSTALL_BOOT_PATH ?= $(DESTDIR) @@ -23,24 +24,31 @@ build: $(LINUX_DIR)/include/linux/autoconf.h mkdir -p $(INSTALL_BOOT_PATH) $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) INSTALL_PATH=$(INSTALL_BOOT_PATH) install -$(LINUX_DIR)/include/linux/autoconf.h: ref-linux-$(LINUX_VER)/.valid-ref - rm -rf $(LINUX_DIR) - cp -al $(<D) $(LINUX_DIR) +$(LINUX_SRCDIR)/.valid-src: ref-linux-$(LINUX_VER)/.valid-ref + rm -rf $(LINUX_SRCDIR) + cp -al $(<D) $(LINUX_SRCDIR) # Apply arch-xen patches ( cd linux-$(LINUX_SERIES)-xen-sparse && \ - LINUX_ARCH=$(LINUX_ARCH) bash ./mkbuildtree ../$(LINUX_DIR) ) + LINUX_ARCH=$(LINUX_ARCH) bash ./mkbuildtree ../$(LINUX_SRCDIR) ) + # Patch kernel Makefile to set EXTRAVERSION + ( cd $(LINUX_SRCDIR) ; \ + sed -e 's,^EXTRAVERSION.*,&$$(XENGUEST),' \ + -e 's,^KERNELRELEASE,XENGUEST := $$(shell [ -r $$(objtree)/.xenguest ] \&\& cat $$(objtree)/.xenguest)\n&,' Makefile >Mk.tmp ; \ + rm -f Makefile ; mv Mk.tmp Makefile ) + touch $@ + +$(LINUX_DIR)/include/linux/autoconf.h: $(LINUX_SRCDIR)/.valid-src + rm -rf $(LINUX_DIR) + mkdir -p $(LINUX_DIR) # Re-use config from install dir if one exits else use default config - CONFIG_VERSION=$$(sed -ne 's/^EXTRAVERSION = //p' $(LINUX_DIR)/Makefile); \ + CONFIG_VERSION=$$(sed -ne 's/$$(XENGUEST)//; s/^EXTRAVERSION = //p' $(LINUX_SRCDIR)/Makefile); \ [ -r $(DESTDIR)/boot/config-$(LINUX_VER3)$$CONFIG_VERSION-$(EXTRAVERSION) ] && \ cp $(DESTDIR)/boot/config-$(LINUX_VER3)$$CONFIG_VERSION-$(EXTRAVERSION) $(LINUX_DIR)/.config \ || sh buildconfigs/create_config.sh $(LINUX_DIR)/.config $(EXTRAVERSION) $(XEN_TARGET_ARCH) $(XEN_SYSTYPE) # See if we need to munge config to enable PAE $(MAKE) CONFIG_FILE=$(LINUX_DIR)/.config -f buildconfigs/Rules.mk config-update-pae - # Patch kernel Makefile to set EXTRAVERSION - ( cd $(LINUX_DIR) ; \ - sed -e 's/^EXTRAVERSION.*/&$$(XENGUEST)\nXENGUEST = -$(EXTRAVERSION)/' Makefile >Mk.tmp ; \ - rm -f Makefile ; mv Mk.tmp Makefile ) - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) oldconfig + echo "-$(EXTRAVERSION)" >$(LINUX_DIR)/.xenguest + $(MAKE) -C $(LINUX_SRCDIR) ARCH=$(LINUX_ARCH) oldconfig O=$$(/bin/pwd)/$(LINUX_DIR) .PHONY: prep prep: $(LINUX_DIR)/include/linux/autoconf.h @@ -62,4 +70,5 @@ delete: .PHONY: mrpropper mrpropper: + rm -rf $(LINUX_SRCDIR) rm -f linux-$(LINUX_VER).tar.bz2 diff --git a/config/StdGNU.mk b/config/StdGNU.mk index 2b767517b8..32a37b44fa 100644 --- a/config/StdGNU.mk +++ b/config/StdGNU.mk @@ -9,6 +9,8 @@ STRIP = $(CROSS_COMPILE)strip OBJCOPY = $(CROSS_COMPILE)objcopy OBJDUMP = $(CROSS_COMPILE)objdump +MSGFMT = msgfmt + INSTALL = install INSTALL_DIR = $(INSTALL) -d -m0755 INSTALL_DATA = $(INSTALL) -m0644 diff --git a/config/SunOS.mk b/config/SunOS.mk index 5d548054d3..77e4b24825 100644 --- a/config/SunOS.mk +++ b/config/SunOS.mk @@ -9,6 +9,8 @@ STRIP = $(CROSS_COMPILE)gstrip OBJCOPY = $(CROSS_COMPILE)gobjcopy OBJDUMP = $(CROSS_COMPILE)gobjdump +MSGFMT = gmsgfmt + SHELL = bash INSTALL = ginstall diff --git a/config/x86_64.mk b/config/x86_64.mk index 99b25d8059..288878638c 100644 --- a/config/x86_64.mk +++ b/config/x86_64.mk @@ -2,6 +2,7 @@ CONFIG_X86 := y CONFIG_X86_64 := y CONFIG_X86_$(XEN_OS) := y +CONFIG_COMPAT := y CONFIG_HVM := y CONFIG_MIGRATE := y CONFIG_XCUTILS := y diff --git a/docs/xen-api/wire-protocol.tex b/docs/xen-api/wire-protocol.tex index 5b15573809..bcb228379e 100644 --- a/docs/xen-api/wire-protocol.tex +++ b/docs/xen-api/wire-protocol.tex @@ -105,11 +105,13 @@ In the case where {\tt Status} is set to {\tt Failure} then the struct contains a second element named {\tt ErrorDescription}: \begin{itemize} \item The element of the struct named {\tt ErrorDescription} contains -an array of string values. The first element of the array is an XML-RPC 32-bit {\tt i4} and represents an error code; -the remainder of the array are strings representing error parameters relating to that code. +an array of string values. The first element of the array is an error code; +the remainder of the array are strings representing error parameters relating +to that code. \end{itemize} -For example, an XML-RPC return value from the {\tt Host.ListAllVMs} function above +For example, an XML-RPC return value from the {\tt host.get\_resident\_VMs} +function above may look like this: \begin{verbatim} <struct> @@ -122,9 +124,9 @@ may look like this: <value> <array> <data> - <value>vm-id-1</value> - <value>vm-id-2</value> - <value>vm-id-3</value> + <value>81547a35-205c-a551-c577-00b982c5fe00</value> + <value>61c85a22-05da-b8a2-2e55-06b0847da503</value> + <value>1d401ec4-3c17-35a6-fc79-cee6bd9811fe</value> </data> </array> </value> @@ -147,16 +149,16 @@ We ought to support at least The XML-RPC interface is session-based; before you can make arbitrary RPC calls you must login and initiate a session. For example: \begin{verbatim} - session_id Session.login_with_password(string uname, string pwd) + session_id session.login_with_password(string uname, string pwd) \end{verbatim} Where {\tt uname} and {\tt password} refer to your username and password respectively, as defined by the Xen administrator. -The {\tt session\_id} returned by {\tt Session.Login} is passed to subequent -RPC calls as an authentication token. +The {\tt session\_id} returned by {\tt session.login_with_password} is passed +to subequent RPC calls as an authentication token. -A session can be terminated with the {\tt Session.Logout} function: +A session can be terminated with the {\tt session.logout} function: \begin{verbatim} - void Session.Logout(session_id session) + void session.logout(session_id session) \end{verbatim} \subsection{Synchronous and Asynchronous invocation} @@ -214,10 +216,12 @@ Create a python object referencing the remote server: >>> xen = xmlrpclib.Server("http://test:4464") \end{verbatim} -Acquire a session token by logging in with a username and password (error-handling ommitted for brevity; the session token is pointed to by the key {\tt 'Value'} in the returned dictionary) +Acquire a session token by logging in with a username and password +(error-handling ommitted for brevity; the session token is pointed to by the +key {\tt 'Value'} in the returned dictionary) \begin{verbatim} ->>> session = xen.Session.do_login_with_password("user", "passwd")['Value'] +>>> session = session.login_with_password("user", "passwd")['Value'] \end{verbatim} When serialised, this call looks like the following: @@ -225,7 +229,7 @@ When serialised, this call looks like the following: \begin{verbatim} <?xml version='1.0'?> <methodCall> - <methodName>Session.do_login_with_password</methodName> + <methodName>session.login_with_password</methodName> <params> <param> <value><string>user</string></value> @@ -237,27 +241,34 @@ When serialised, this call looks like the following: </methodCall> \end{verbatim} -Next, the user may acquire a list of all the VMs known to the host: (Note the call takes the session token as the only parameter) +Next, the user may acquire a list of all the VMs known to the host: (Note the +call takes the session token as the only parameter) \begin{verbatim} ->>> all_vms = xen.VM.do_list(session)['Value'] +>>> all_vms = host.get_resident_VMs(session)['Value'] >>> all_vms ['b7b92d9e-d442-4710-92a5-ab039fd7d89b', '23e1e837-abbf-4675-b077-d4007989b0cc', '2045dbc0-0734-4eea-9cb2-b8218c6b5bf2', '3202ae18-a046-4c32-9fda-e32e9631866e'] \end{verbatim} -Note the VM references are internally UUIDs. Once a reference to a VM has been acquired a lifecycle operation may be invoked: +The VM references here are UUIDs, though they may not be that simple in the +future, and you should treat them as opaque strings. Once a reference to a VM +has been acquired a lifecycle operation may be invoked: \begin{verbatim} ->>> xen.VM.do_start(session, all_vms[3], False) -{'Status': 'Failure', 'ErrorDescription': 'Operation not implemented'} +>>> xen.VM.start(session, all_vms[3], False) +{'Status': 'Failure', 'ErrorDescription': ['VM_BAD_POWER_STATE', 'Halted', 'Running']} \end{verbatim} -In this case the {\tt start} message has not been implemented and an error response has been returned. Currently these high-level errors are returned as structured data (rather than as XMLRPC faults), allowing for internationalised errors in future. Finally, here are some examples of using accessors for object fields: +In this case the {\tt start} message has been rejected, because the VM is +already running, and so an error response has been returned. These high-level +errors are returned as structured data (rather than as XML-RPC faults), +allowing them to be internationalised. Finally, here are some examples of +using accessors for object fields: \begin{verbatim} ->>> xen.VM.getname_label(session, all_vms[3])['Value'] +>>> xen.VM.get_name_label(session, all_vms[3])['Value'] 'SMP' ->>> xen.VM.getname_description(session, all_vms[3])['Value'] +>>> xen.VM.get_name_description(session, all_vms[3])['Value'] 'Debian for Xen' \end{verbatim} diff --git a/docs/xen-api/xenapi-datamodel.tex b/docs/xen-api/xenapi-datamodel.tex index 2facf7fec6..3d74a76302 100644 --- a/docs/xen-api/xenapi-datamodel.tex +++ b/docs/xen-api/xenapi-datamodel.tex @@ -184,8 +184,8 @@ The following enumeration types are used: \hspace{0.5cm}{\tt NX} & Execute Disable \\ \hspace{0.5cm}{\tt MMXEXT} & AMD MMX extensions \\ \hspace{0.5cm}{\tt LM} & Long Mode (x86-64) \\ -\hspace{0.5cm}{\tt 3DNOWEXT} & AMD 3DNow! extensions \\ -\hspace{0.5cm}{\tt 3DNOW} & 3DNow! \\ +\hspace{0.5cm}{\tt THREEDNOWEXT} & AMD 3DNow! extensions \\ +\hspace{0.5cm}{\tt THREEDNOW} & 3DNow! \\ \hspace{0.5cm}{\tt RECOVERY} & CPU in recovery mode \\ \hspace{0.5cm}{\tt LONGRUN} & Longrun power control \\ \hspace{0.5cm}{\tt LRTI} & LongRun table interface \\ @@ -244,18 +244,6 @@ The following enumeration types are used: \vspace{1cm} \begin{longtable}{|ll|} \hline -{\tt enum boot\_type} & \\ -\hline -\hspace{0.5cm}{\tt bios} & boot an HVM guest using an emulated BIOS \\ -\hspace{0.5cm}{\tt grub} & boot from inside the machine using grub \\ -\hspace{0.5cm}{\tt kernel\_external} & boot from an external kernel \\ -\hspace{0.5cm}{\tt kernel\_internal} & boot from a kernel inside the guest filesystem \\ -\hline -\end{longtable} - -\vspace{1cm} -\begin{longtable}{|ll|} -\hline {\tt enum vbd\_mode} & \\ \hline \hspace{0.5cm}{\tt RO} & disk is mounted read-only \\ @@ -298,6 +286,7 @@ Quals & Field & Type & Description \\ $\mathit{RO}_\mathit{run}$ & {\tt uuid} & string & unique identifier/object reference \\ $\mathit{RO}_\mathit{ins}$ & {\tt this\_host} & host ref & Currently connected host \\ $\mathit{RO}_\mathit{ins}$ & {\tt this\_user} & user ref & Currently connected user \\ +$\mathit{RO}_\mathit{run}$ & {\tt last\_active} & int & Timestamp for last time session was active \\ \hline \end{longtable} \subsection{Additional RPCs associated with class: session} @@ -452,45 +441,13 @@ value of the field \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~create} +\subsubsection{RPC name:~get\_last\_active} {\bf Overview:} -Create a new session instance, and return its handle. +Get the last\_active field of the given session. \noindent {\bf Signature:} -\begin{verbatim} (session ref) create (session_id s, session record args)\end{verbatim} - - -\noindent{\bf Arguments:} - - -\vspace{0.3cm} -\begin{tabular}{|c|c|p{7cm}|} - \hline -{\bf type} & {\bf name} & {\bf description} \\ \hline -{\tt session record } & args & All constructor arguments \\ \hline - -\end{tabular} - -\vspace{0.3cm} - - \noindent {\bf Return Type:} -{\tt -session ref -} - - -reference to the newly created object -\vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} -\subsubsection{RPC name:~destroy} - -{\bf Overview:} -Destroy the specified session instance. - - \noindent {\bf Signature:} -\begin{verbatim} void destroy (session_id s, session ref self)\end{verbatim} +\begin{verbatim} int get_last_active (session_id s, session ref self)\end{verbatim} \noindent{\bf Arguments:} @@ -508,11 +465,11 @@ Destroy the specified session instance. \noindent {\bf Return Type:} {\tt -void +int } - +value of the field \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} @@ -608,7 +565,7 @@ $\mathit{RO}_\mathit{run}$ & {\tt error\_info} & string Set & if the task has f \subsubsection{RPC name:~get\_all} {\bf Overview:} -Return a list of all the tasks known to the system +Return a list of all the tasks known to the system. \noindent {\bf Signature:} \begin{verbatim} ((task ref) Set) get_all (session_id s)\end{verbatim} @@ -1014,70 +971,6 @@ value of the field \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~create} - -{\bf Overview:} -Create a new task instance, and return its handle. - - \noindent {\bf Signature:} -\begin{verbatim} (task ref) create (session_id s, task record args)\end{verbatim} - - -\noindent{\bf Arguments:} - - -\vspace{0.3cm} -\begin{tabular}{|c|c|p{7cm}|} - \hline -{\bf type} & {\bf name} & {\bf description} \\ \hline -{\tt task record } & args & All constructor arguments \\ \hline - -\end{tabular} - -\vspace{0.3cm} - - \noindent {\bf Return Type:} -{\tt -task ref -} - - -reference to the newly created object -\vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} -\subsubsection{RPC name:~destroy} - -{\bf Overview:} -Destroy the specified task instance. - - \noindent {\bf Signature:} -\begin{verbatim} void destroy (session_id s, task ref self)\end{verbatim} - - -\noindent{\bf Arguments:} - - -\vspace{0.3cm} -\begin{tabular}{|c|c|p{7cm}|} - \hline -{\bf type} & {\bf name} & {\bf description} \\ \hline -{\tt task ref } & self & reference to the object \\ \hline - -\end{tabular} - -\vspace{0.3cm} - - \noindent {\bf Return Type:} -{\tt -void -} - - - -\vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} \subsubsection{RPC name:~get\_by\_uuid} {\bf Overview:} @@ -1182,7 +1075,34 @@ references to objects with match names \begin{longtable}{|lllp{0.38\textwidth}|} \hline \multicolumn{1}{|l}{Name} & \multicolumn{3}{l|}{\bf VM} \\ -\multicolumn{1}{|l}{Description} & \multicolumn{3}{l|}{\parbox{11cm}{\em A virtual machine (or 'guest')}} \\ +\multicolumn{1}{|l}{Description} & \multicolumn{3}{l|}{\parbox{11cm}{\em A virtual machine (or 'guest'). + +VM booting is controlled by setting one of the two mutually exclusive +groups: "PV", and "HVM". If HVM.boot is the empty string, then paravirtual +domain building and booting will be used; otherwise the VM will be loaded +as an HVM domain, and booted using an emulated BIOS. + +When paravirtual booting is in use, the PV/bootloader field indicates the +bootloader to use. It may be "pygrub", in which case the platform's +default installation of pygrub will be used, or a full path within the +control domain to some other bootloader. The other fields, PV/kernel, +PV/ramdisk, PV/args and PV/bootloader\_args will be passed to the +bootloader unmodified, and interpretation of those fields is then specific +to the bootloader itself, including the possibility that the bootloader +will ignore some or all of those given values. + +If the bootloader is pygrub, then the menu.lst is parsed if present in the +guest's filesystem, otherwise the specified kernel and ramdisk are used, or +an autodetected kernel is used if nothing is specified and autodetection is +possible. PV/args is appended to the kernel command line, no matter which +mechanism is used for finding the kernel. + +If PV/bootloader is empty but PV/kernel is specified, then the kernel and +ramdisk values will be treated as paths within the control domain. If both +PV/bootloader and PV/kernel are empty, then the behaviour is as if +PV/bootloader was specified as "pygrub". + +When using HVM booting, HVM/boot specifies the order of the boot devices}} \\ \hline Quals & Field & Type & Description \\ \hline @@ -1215,18 +1135,17 @@ $\mathit{RO}_\mathit{run}$ & {\tt consoles} & (console ref) Set & virtual conso $\mathit{RO}_\mathit{run}$ & {\tt VIFs} & (VIF ref) Set & virtual network interfaces \\ $\mathit{RO}_\mathit{run}$ & {\tt VBDs} & (VBD ref) Set & virtual block devices \\ $\mathit{RO}_\mathit{run}$ & {\tt VTPMs} & (VTPM ref) Set & virtual TPMs \\ -$\mathit{RW}$ & {\tt bios/boot} & string & device to boot the guest from \\ +$\mathit{RW}$ & {\tt PV/bootloader} & string & name of or path to bootloader \\ +$\mathit{RW}$ & {\tt PV/kernel} & string & path to the kernel \\ +$\mathit{RW}$ & {\tt PV/ramdisk} & string & path to the initrd \\ +$\mathit{RW}$ & {\tt PV/args} & string & kernel command-line arguments \\ +$\mathit{RW}$ & {\tt PV/bootloader\_args} & string & miscellaneous arguments for the bootloader \\ +$\mathit{RW}$ & {\tt HVM/boot} & string & device boot order \\ $\mathit{RW}$ & {\tt platform/std\_VGA} & bool & emulate standard VGA instead of cirrus logic \\ $\mathit{RW}$ & {\tt platform/serial} & string & redirect serial port to pty \\ $\mathit{RW}$ & {\tt platform/localtime} & bool & set RTC to local time \\ $\mathit{RW}$ & {\tt platform/clock\_offset} & bool & timeshift applied to guest's clock \\ $\mathit{RW}$ & {\tt platform/enable\_audio} & bool & emulate audio \\ -$\mathit{RW}$ & {\tt builder} & string & domain builder to use \\ -$\mathit{RW}$ & {\tt boot\_method} & boot\_type & select how this machine should boot \\ -$\mathit{RW}$ & {\tt kernel/kernel} & string & path to kernel e.g. /boot/vmlinuz \\ -$\mathit{RW}$ & {\tt kernel/initrd} & string & path to the initrd e.g. /boot/initrd.img \\ -$\mathit{RW}$ & {\tt kernel/args} & string & extra kernel command-line arguments \\ -$\mathit{RW}$ & {\tt grub/cmdline} & string & grub command-line \\ $\mathit{RO}_\mathit{ins}$ & {\tt PCI\_bus} & string & PCI bus path for pass-through devices \\ $\mathit{RO}_\mathit{run}$ & {\tt tools\_version} & (string $\rightarrow$ string) Map & versions of installed paravirtualised drivers \\ $\mathit{RW}$ & {\tt otherConfig} & (string $\rightarrow$ string) Map & additional configuration \\ @@ -1265,8 +1184,10 @@ VM ref The ID of the newly created VM. \vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} + +\noindent{\bf Possible Error Codes:} {\tt VM\_BAD\_POWER\_STATE} + +\vspace{0.6cm} \subsubsection{RPC name:~start} {\bf Overview:} @@ -1299,8 +1220,10 @@ void \vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} + +\noindent{\bf Possible Error Codes:} {\tt VM\_BAD\_POWER\_STATE} + +\vspace{0.6cm} \subsubsection{RPC name:~pause} {\bf Overview:} @@ -1331,8 +1254,10 @@ void \vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} + +\noindent{\bf Possible Error Codes:} {\tt VM\_BAD\_POWER\_STATE} + +\vspace{0.6cm} \subsubsection{RPC name:~unpause} {\bf Overview:} @@ -1349,7 +1274,7 @@ Resume the specified VM. This can only be called when the specified VM is in the \begin{tabular}{|c|c|p{7cm}|} \hline {\bf type} & {\bf name} & {\bf description} \\ \hline -{\tt VM ref } & vm & The VM to pause \\ \hline +{\tt VM ref } & vm & The VM to unpause \\ \hline \end{tabular} @@ -1363,8 +1288,10 @@ void \vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} + +\noindent{\bf Possible Error Codes:} {\tt VM\_BAD\_POWER\_STATE} + +\vspace{0.6cm} \subsubsection{RPC name:~clean\_shutdown} {\bf Overview:} @@ -1372,6 +1299,8 @@ Attempt to cleanly shutdown the specified VM. (Note: this may not be supported-- Once shutdown has been completed perform poweroff action specified in guest configuration. +This can only be called when the specified VM is in the Running state. + \noindent {\bf Signature:} \begin{verbatim} void clean_shutdown (session_id s, VM ref vm)\end{verbatim} @@ -1397,8 +1326,10 @@ void \vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} + +\noindent{\bf Possible Error Codes:} {\tt VM\_BAD\_POWER\_STATE} + +\vspace{0.6cm} \subsubsection{RPC name:~clean\_reboot} {\bf Overview:} @@ -1406,6 +1337,8 @@ Attempt to cleanly shutdown the specified VM (Note: this may not be supported--- Once shutdown has been completed perform reboot action specified in guest configuration. +This can only be called when the specified VM is in the Running state. + \noindent {\bf Signature:} \begin{verbatim} void clean_reboot (session_id s, VM ref vm)\end{verbatim} @@ -1431,8 +1364,10 @@ void \vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} + +\noindent{\bf Possible Error Codes:} {\tt VM\_BAD\_POWER\_STATE} + +\vspace{0.6cm} \subsubsection{RPC name:~hard\_shutdown} {\bf Overview:} @@ -1500,7 +1435,7 @@ void \subsubsection{RPC name:~suspend} {\bf Overview:} -Suspend the specified VM to disk. +Suspend the specified VM to disk. This can only be called when the specified VM is in the Running state. \noindent {\bf Signature:} \begin{verbatim} void suspend (session_id s, VM ref vm)\end{verbatim} @@ -1527,12 +1462,14 @@ void \vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} + +\noindent{\bf Possible Error Codes:} {\tt VM\_BAD\_POWER\_STATE} + +\vspace{0.6cm} \subsubsection{RPC name:~resume} {\bf Overview:} -Awaken the specified VM and resume it. +Awaken the specified VM and resume it. This can only be called when the specified VM is in the Suspended state. \noindent {\bf Signature:} \begin{verbatim} void resume (session_id s, VM ref vm, bool start_paused)\end{verbatim} @@ -1561,8 +1498,10 @@ void \vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} + +\noindent{\bf Possible Error Codes:} {\tt VM\_BAD\_POWER\_STATE} + +\vspace{0.6cm} \subsubsection{RPC name:~get\_all} {\bf Overview:} @@ -2601,7 +2540,7 @@ void \subsubsection{RPC name:~add\_VCPUs\_features\_force\_on} {\bf Overview:} -set add message derived from field VCPUs/features/force\_on of object VM +Add the given value to the VCPUs/features/force\_on field of the given VM. If the value is already in that Set, then do nothing. \noindent {\bf Signature:} \begin{verbatim} void add_VCPUs_features_force_on (session_id s, VM ref self, cpu_feature value)\end{verbatim} @@ -2635,7 +2574,7 @@ void \subsubsection{RPC name:~remove\_VCPUs\_features\_force\_on} {\bf Overview:} -set remove message derived from field VCPUs/features/force\_on of object VM +Remove the given value from the VCPUs/features/force\_on field of the given VM. If the value is not in that Set, then do nothing. \noindent {\bf Signature:} \begin{verbatim} void remove_VCPUs_features_force_on (session_id s, VM ref self, cpu_feature value)\end{verbatim} @@ -2735,7 +2674,7 @@ void \subsubsection{RPC name:~add\_VCPUs\_features\_force\_off} {\bf Overview:} -set add message derived from field VCPUs/features/force\_off of object VM +Add the given value to the VCPUs/features/force\_off field of the given VM. If the value is already in that Set, then do nothing. \noindent {\bf Signature:} \begin{verbatim} void add_VCPUs_features_force_off (session_id s, VM ref self, cpu_feature value)\end{verbatim} @@ -2769,7 +2708,7 @@ void \subsubsection{RPC name:~remove\_VCPUs\_features\_force\_off} {\bf Overview:} -set remove message derived from field VCPUs/features/force\_off of object VM +Remove the given value from the VCPUs/features/force\_off field of the given VM. If the value is not in that Set, then do nothing. \noindent {\bf Signature:} \begin{verbatim} void remove_VCPUs_features_force_off (session_id s, VM ref self, cpu_feature value)\end{verbatim} @@ -3192,13 +3131,13 @@ value of the field \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~get\_bios\_boot} +\subsubsection{RPC name:~get\_PV\_bootloader} {\bf Overview:} -Get the bios/boot field of the given VM. +Get the PV/bootloader field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} string get_bios_boot (session_id s, VM ref self)\end{verbatim} +\begin{verbatim} string get_PV_bootloader (session_id s, VM ref self)\end{verbatim} \noindent{\bf Arguments:} @@ -3224,13 +3163,13 @@ value of the field \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~set\_bios\_boot} +\subsubsection{RPC name:~set\_PV\_bootloader} {\bf Overview:} -Set the bios/boot field of the given VM. +Set the PV/bootloader field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} void set_bios_boot (session_id s, VM ref self, string value)\end{verbatim} +\begin{verbatim} void set_PV_bootloader (session_id s, VM ref self, string value)\end{verbatim} \noindent{\bf Arguments:} @@ -3258,79 +3197,13 @@ void \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~get\_platform\_std\_VGA} - -{\bf Overview:} -Get the platform/std\_VGA field of the given VM. - - \noindent {\bf Signature:} -\begin{verbatim} bool get_platform_std_VGA (session_id s, VM ref self)\end{verbatim} - - -\noindent{\bf Arguments:} - - -\vspace{0.3cm} -\begin{tabular}{|c|c|p{7cm}|} - \hline -{\bf type} & {\bf name} & {\bf description} \\ \hline -{\tt VM ref } & self & reference to the object \\ \hline - -\end{tabular} - -\vspace{0.3cm} - - \noindent {\bf Return Type:} -{\tt -bool -} - - -value of the field -\vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} -\subsubsection{RPC name:~set\_platform\_std\_VGA} +\subsubsection{RPC name:~get\_PV\_kernel} {\bf Overview:} -Set the platform/std\_VGA field of the given VM. +Get the PV/kernel field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} void set_platform_std_VGA (session_id s, VM ref self, bool value)\end{verbatim} - - -\noindent{\bf Arguments:} - - -\vspace{0.3cm} -\begin{tabular}{|c|c|p{7cm}|} - \hline -{\bf type} & {\bf name} & {\bf description} \\ \hline -{\tt VM ref } & self & reference to the object \\ \hline - -{\tt bool } & value & New value to set \\ \hline - -\end{tabular} - -\vspace{0.3cm} - - \noindent {\bf Return Type:} -{\tt -void -} - - - -\vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} -\subsubsection{RPC name:~get\_platform\_serial} - -{\bf Overview:} -Get the platform/serial field of the given VM. - - \noindent {\bf Signature:} -\begin{verbatim} string get_platform_serial (session_id s, VM ref self)\end{verbatim} +\begin{verbatim} string get_PV_kernel (session_id s, VM ref self)\end{verbatim} \noindent{\bf Arguments:} @@ -3356,13 +3229,13 @@ value of the field \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~set\_platform\_serial} +\subsubsection{RPC name:~set\_PV\_kernel} {\bf Overview:} -Set the platform/serial field of the given VM. +Set the PV/kernel field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} void set_platform_serial (session_id s, VM ref self, string value)\end{verbatim} +\begin{verbatim} void set_PV_kernel (session_id s, VM ref self, string value)\end{verbatim} \noindent{\bf Arguments:} @@ -3390,13 +3263,13 @@ void \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~get\_platform\_localtime} +\subsubsection{RPC name:~get\_PV\_ramdisk} {\bf Overview:} -Get the platform/localtime field of the given VM. +Get the PV/ramdisk field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} bool get_platform_localtime (session_id s, VM ref self)\end{verbatim} +\begin{verbatim} string get_PV_ramdisk (session_id s, VM ref self)\end{verbatim} \noindent{\bf Arguments:} @@ -3414,7 +3287,7 @@ Get the platform/localtime field of the given VM. \noindent {\bf Return Type:} {\tt -bool +string } @@ -3422,13 +3295,13 @@ value of the field \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~set\_platform\_localtime} +\subsubsection{RPC name:~set\_PV\_ramdisk} {\bf Overview:} -Set the platform/localtime field of the given VM. +Set the PV/ramdisk field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} void set_platform_localtime (session_id s, VM ref self, bool value)\end{verbatim} +\begin{verbatim} void set_PV_ramdisk (session_id s, VM ref self, string value)\end{verbatim} \noindent{\bf Arguments:} @@ -3440,7 +3313,7 @@ Set the platform/localtime field of the given VM. {\bf type} & {\bf name} & {\bf description} \\ \hline {\tt VM ref } & self & reference to the object \\ \hline -{\tt bool } & value & New value to set \\ \hline +{\tt string } & value & New value to set \\ \hline \end{tabular} @@ -3456,13 +3329,13 @@ void \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~get\_platform\_clock\_offset} +\subsubsection{RPC name:~get\_PV\_args} {\bf Overview:} -Get the platform/clock\_offset field of the given VM. +Get the PV/args field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} bool get_platform_clock_offset (session_id s, VM ref self)\end{verbatim} +\begin{verbatim} string get_PV_args (session_id s, VM ref self)\end{verbatim} \noindent{\bf Arguments:} @@ -3480,7 +3353,7 @@ Get the platform/clock\_offset field of the given VM. \noindent {\bf Return Type:} {\tt -bool +string } @@ -3488,13 +3361,13 @@ value of the field \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~set\_platform\_clock\_offset} +\subsubsection{RPC name:~set\_PV\_args} {\bf Overview:} -Set the platform/clock\_offset field of the given VM. +Set the PV/args field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} void set_platform_clock_offset (session_id s, VM ref self, bool value)\end{verbatim} +\begin{verbatim} void set_PV_args (session_id s, VM ref self, string value)\end{verbatim} \noindent{\bf Arguments:} @@ -3506,7 +3379,7 @@ Set the platform/clock\_offset field of the given VM. {\bf type} & {\bf name} & {\bf description} \\ \hline {\tt VM ref } & self & reference to the object \\ \hline -{\tt bool } & value & New value to set \\ \hline +{\tt string } & value & New value to set \\ \hline \end{tabular} @@ -3522,13 +3395,13 @@ void \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~get\_platform\_enable\_audio} +\subsubsection{RPC name:~get\_PV\_bootloader\_args} {\bf Overview:} -Get the platform/enable\_audio field of the given VM. +Get the PV/bootloader\_args field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} bool get_platform_enable_audio (session_id s, VM ref self)\end{verbatim} +\begin{verbatim} string get_PV_bootloader_args (session_id s, VM ref self)\end{verbatim} \noindent{\bf Arguments:} @@ -3546,7 +3419,7 @@ Get the platform/enable\_audio field of the given VM. \noindent {\bf Return Type:} {\tt -bool +string } @@ -3554,13 +3427,13 @@ value of the field \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~set\_platform\_enable\_audio} +\subsubsection{RPC name:~set\_PV\_bootloader\_args} {\bf Overview:} -Set the platform/enable\_audio field of the given VM. +Set the PV/bootloader\_args field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} void set_platform_enable_audio (session_id s, VM ref self, bool value)\end{verbatim} +\begin{verbatim} void set_PV_bootloader_args (session_id s, VM ref self, string value)\end{verbatim} \noindent{\bf Arguments:} @@ -3572,7 +3445,7 @@ Set the platform/enable\_audio field of the given VM. {\bf type} & {\bf name} & {\bf description} \\ \hline {\tt VM ref } & self & reference to the object \\ \hline -{\tt bool } & value & New value to set \\ \hline +{\tt string } & value & New value to set \\ \hline \end{tabular} @@ -3588,13 +3461,13 @@ void \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~get\_builder} +\subsubsection{RPC name:~get\_HVM\_boot} {\bf Overview:} -Get the builder field of the given VM. +Get the HVM/boot field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} string get_builder (session_id s, VM ref self)\end{verbatim} +\begin{verbatim} string get_HVM_boot (session_id s, VM ref self)\end{verbatim} \noindent{\bf Arguments:} @@ -3620,13 +3493,13 @@ value of the field \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~set\_builder} +\subsubsection{RPC name:~set\_HVM\_boot} {\bf Overview:} -Set the builder field of the given VM. +Set the HVM/boot field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} void set_builder (session_id s, VM ref self, string value)\end{verbatim} +\begin{verbatim} void set_HVM_boot (session_id s, VM ref self, string value)\end{verbatim} \noindent{\bf Arguments:} @@ -3654,13 +3527,13 @@ void \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~get\_boot\_method} +\subsubsection{RPC name:~get\_platform\_std\_VGA} {\bf Overview:} -Get the boot\_method field of the given VM. +Get the platform/std\_VGA field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} (boot_type) get_boot_method (session_id s, VM ref self)\end{verbatim} +\begin{verbatim} bool get_platform_std_VGA (session_id s, VM ref self)\end{verbatim} \noindent{\bf Arguments:} @@ -3678,7 +3551,7 @@ Get the boot\_method field of the given VM. \noindent {\bf Return Type:} {\tt -boot\_type +bool } @@ -3686,13 +3559,13 @@ value of the field \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~set\_boot\_method} +\subsubsection{RPC name:~set\_platform\_std\_VGA} {\bf Overview:} -Set the boot\_method field of the given VM. +Set the platform/std\_VGA field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} void set_boot_method (session_id s, VM ref self, boot_type value)\end{verbatim} +\begin{verbatim} void set_platform_std_VGA (session_id s, VM ref self, bool value)\end{verbatim} \noindent{\bf Arguments:} @@ -3704,7 +3577,7 @@ Set the boot\_method field of the given VM. {\bf type} & {\bf name} & {\bf description} \\ \hline {\tt VM ref } & self & reference to the object \\ \hline -{\tt boot\_type } & value & New value to set \\ \hline +{\tt bool } & value & New value to set \\ \hline \end{tabular} @@ -3720,13 +3593,13 @@ void \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~get\_kernel\_kernel} +\subsubsection{RPC name:~get\_platform\_serial} {\bf Overview:} -Get the kernel/kernel field of the given VM. +Get the platform/serial field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} string get_kernel_kernel (session_id s, VM ref self)\end{verbatim} +\begin{verbatim} string get_platform_serial (session_id s, VM ref self)\end{verbatim} \noindent{\bf Arguments:} @@ -3752,13 +3625,13 @@ value of the field \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~set\_kernel\_kernel} +\subsubsection{RPC name:~set\_platform\_serial} {\bf Overview:} -Set the kernel/kernel field of the given VM. +Set the platform/serial field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} void set_kernel_kernel (session_id s, VM ref self, string value)\end{verbatim} +\begin{verbatim} void set_platform_serial (session_id s, VM ref self, string value)\end{verbatim} \noindent{\bf Arguments:} @@ -3786,13 +3659,13 @@ void \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~get\_kernel\_initrd} +\subsubsection{RPC name:~get\_platform\_localtime} {\bf Overview:} -Get the kernel/initrd field of the given VM. +Get the platform/localtime field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} string get_kernel_initrd (session_id s, VM ref self)\end{verbatim} +\begin{verbatim} bool get_platform_localtime (session_id s, VM ref self)\end{verbatim} \noindent{\bf Arguments:} @@ -3810,7 +3683,7 @@ Get the kernel/initrd field of the given VM. \noindent {\bf Return Type:} {\tt -string +bool } @@ -3818,13 +3691,13 @@ value of the field \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~set\_kernel\_initrd} +\subsubsection{RPC name:~set\_platform\_localtime} {\bf Overview:} -Set the kernel/initrd field of the given VM. +Set the platform/localtime field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} void set_kernel_initrd (session_id s, VM ref self, string value)\end{verbatim} +\begin{verbatim} void set_platform_localtime (session_id s, VM ref self, bool value)\end{verbatim} \noindent{\bf Arguments:} @@ -3836,7 +3709,7 @@ Set the kernel/initrd field of the given VM. {\bf type} & {\bf name} & {\bf description} \\ \hline {\tt VM ref } & self & reference to the object \\ \hline -{\tt string } & value & New value to set \\ \hline +{\tt bool } & value & New value to set \\ \hline \end{tabular} @@ -3852,13 +3725,13 @@ void \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~get\_kernel\_args} +\subsubsection{RPC name:~get\_platform\_clock\_offset} {\bf Overview:} -Get the kernel/args field of the given VM. +Get the platform/clock\_offset field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} string get_kernel_args (session_id s, VM ref self)\end{verbatim} +\begin{verbatim} bool get_platform_clock_offset (session_id s, VM ref self)\end{verbatim} \noindent{\bf Arguments:} @@ -3876,7 +3749,7 @@ Get the kernel/args field of the given VM. \noindent {\bf Return Type:} {\tt -string +bool } @@ -3884,13 +3757,13 @@ value of the field \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~set\_kernel\_args} +\subsubsection{RPC name:~set\_platform\_clock\_offset} {\bf Overview:} -Set the kernel/args field of the given VM. +Set the platform/clock\_offset field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} void set_kernel_args (session_id s, VM ref self, string value)\end{verbatim} +\begin{verbatim} void set_platform_clock_offset (session_id s, VM ref self, bool value)\end{verbatim} \noindent{\bf Arguments:} @@ -3902,7 +3775,7 @@ Set the kernel/args field of the given VM. {\bf type} & {\bf name} & {\bf description} \\ \hline {\tt VM ref } & self & reference to the object \\ \hline -{\tt string } & value & New value to set \\ \hline +{\tt bool } & value & New value to set \\ \hline \end{tabular} @@ -3918,13 +3791,13 @@ void \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~get\_grub\_cmdline} +\subsubsection{RPC name:~get\_platform\_enable\_audio} {\bf Overview:} -Get the grub/cmdline field of the given VM. +Get the platform/enable\_audio field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} string get_grub_cmdline (session_id s, VM ref self)\end{verbatim} +\begin{verbatim} bool get_platform_enable_audio (session_id s, VM ref self)\end{verbatim} \noindent{\bf Arguments:} @@ -3942,7 +3815,7 @@ Get the grub/cmdline field of the given VM. \noindent {\bf Return Type:} {\tt -string +bool } @@ -3950,13 +3823,13 @@ value of the field \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~set\_grub\_cmdline} +\subsubsection{RPC name:~set\_platform\_enable\_audio} {\bf Overview:} -Set the grub/cmdline field of the given VM. +Set the platform/enable\_audio field of the given VM. \noindent {\bf Signature:} -\begin{verbatim} void set_grub_cmdline (session_id s, VM ref self, string value)\end{verbatim} +\begin{verbatim} void set_platform_enable_audio (session_id s, VM ref self, bool value)\end{verbatim} \noindent{\bf Arguments:} @@ -3968,7 +3841,7 @@ Set the grub/cmdline field of the given VM. {\bf type} & {\bf name} & {\bf description} \\ \hline {\tt VM ref } & self & reference to the object \\ \hline -{\tt string } & value & New value to set \\ \hline +{\tt bool } & value & New value to set \\ \hline \end{tabular} @@ -10480,6 +10353,239 @@ all fields from the object \vspace{0.3cm} \vspace{1cm} +\newpage +\section{Error Handling} +When a low-level transport error occurs, or a request is malformed at the HTTP +or XML-RPC level, the server may send an XML-RPC Fault response, or the client +may simulate the same. The client must be prepared to handle these errors, +though they may be treated as fatal. On the wire, these are transmitted in a +form similar to this: + +\begin{verbatim} + <methodResponse> + <fault> + <value> + <struct> + <member> + <name>faultCode</name> + <value><int>-1</int></value> + </member> + <member> + <name>faultString</name> + <value><string>Malformed request</string></value> + </member> + </struct> + </value> + </fault> + </methodResponse> +\end{verbatim} + +All other failures are reported with a more structured error response, to +allow better automatic response to failures, proper internationalisation of +any error message, and easier debugging. On the wire, these are transmitted +like this: + +\begin{verbatim} + <struct> + <member> + <name>Status</name> + <value>Failure</value> + </member> + <member> + <name>ErrorDescription</name> + <value> + <array> + <data> + <value>MAP_DUPLICATE_KEY</value> + <value>Customer</value> + <value>eSpeil Inc.</value> + <value>eSpeil Incorporated</value> + </data> + </array> + </value> + </member> + </struct> +\end{verbatim} + +Note that {\tt ErrorDescription} value is an array of string values. The +first element of the array is an error code; the remainder of the array are +strings representing error parameters relating to that code. In this case, +the client has attempted to add the mapping {\tt Customer $\rightarrow$ +eSpiel Incorporated} to a Map, but it already contains the mapping +{\tt Customer $\rightarrow$ eSpiel Inc.}, and so the request has failed. + +Each possible error code is documented in the following section. + +\subsection{Error Codes} + +\subsubsection{HOST\_CPU\_HANDLE\_INVALID} + +You gave an invalid host\_cpu handle. The host\_cpu may have recently been +deleted. The handle parameter echoes the bad value given. + +\vspace{0.3cm} +{\bf Signature:} +\begin{verbatim}HOST_CPU_HANDLE_INVALID(handle)\end{verbatim} +\begin{center}\rule{10em}{0.1pt}\end{center} + +\subsubsection{HOST\_HANDLE\_INVALID} + +You gave an invalid host handle. The host may have recently been deleted. +The handle parameter echoes the bad value given. + +\vspace{0.3cm} +{\bf Signature:} +\begin{verbatim}HOST_HANDLE_INVALID(handle)\end{verbatim} +\begin{center}\rule{10em}{0.1pt}\end{center} + +\subsubsection{INTERNAL\_ERROR} + +The server failed to handle your request, due to an internal error. The +given message may give details useful for debugging the problem. + +\vspace{0.3cm} +{\bf Signature:} +\begin{verbatim}INTERNAL_ERROR(message)\end{verbatim} +\begin{center}\rule{10em}{0.1pt}\end{center} + +\subsubsection{MAP\_DUPLICATE\_KEY} + +You tried to add a key-value pair to a map, but that key is already there. +The key, current value, and the new value that you tried to set are all +echoed. + +\vspace{0.3cm} +{\bf Signature:} +\begin{verbatim}MAP_DUPLICATE_KEY(key, current value, new value)\end{verbatim} +\begin{center}\rule{10em}{0.1pt}\end{center} + +\subsubsection{MESSAGE\_METHOD\_UNKNOWN} + +You tried to call a method that does not exist. The method name that you +used is echoed. + +\vspace{0.3cm} +{\bf Signature:} +\begin{verbatim}MESSAGE_METHOD_UNKNOWN(method)\end{verbatim} +\begin{center}\rule{10em}{0.1pt}\end{center} + +\subsubsection{MESSAGE\_PARAMETER\_COUNT\_MISMATCH} + +You tried to call a method with the incorrect number of parameters. The +fully-qualified method name that you used, and the number of received and +expected parameters are returned. + +\vspace{0.3cm} +{\bf Signature:} +\begin{verbatim}MESSAGE_PARAMETER_COUNT_MISMATCH(method, expected, received)\end{verbatim} +\begin{center}\rule{10em}{0.1pt}\end{center} + +\subsubsection{NETWORK\_ALREADY\_CONNECTED} + +You tried to create a PIF, but the network you tried to attach it to is +already attached to some other PIF, and so the creation failed. + +\vspace{0.3cm} +{\bf Signature:} +\begin{verbatim}NETWORK_ALREADY_CONNECTED(network, connected PIF)\end{verbatim} +\begin{center}\rule{10em}{0.1pt}\end{center} + +\subsubsection{SESSION\_AUTHENTICATION\_FAILED} + +The credentials given by the user are incorrect, so access has been denied, +and you have not been issued a session handle. + +\vspace{0.3cm} +No parameters. +\begin{center}\rule{10em}{0.1pt}\end{center} + +\subsubsection{SESSION\_INVALID} + +You gave an invalid session handle. It may have been invalidated by a +server restart, or timed out. You should get a new session handle, using +one of the session.login\_ calls. This error does not invalidate the +current connection. The handle parameter echoes the bad value given. + +\vspace{0.3cm} +{\bf Signature:} +\begin{verbatim}SESSION_INVALID(handle)\end{verbatim} +\begin{center}\rule{10em}{0.1pt}\end{center} + +\subsubsection{SR\_HANDLE\_INVALID} + +You gave an invalid SR handle. The SR may have recently been deleted. The +handle parameter echoes the bad value given. + +\vspace{0.3cm} +{\bf Signature:} +\begin{verbatim}SR_HANDLE_INVALID(handle)\end{verbatim} +\begin{center}\rule{10em}{0.1pt}\end{center} + +\subsubsection{VBD\_HANDLE\_INVALID} + +You gave an invalid VBD handle. The VBD may have recently been deleted. +The handle parameter echoes the bad value given. + +\vspace{0.3cm} +{\bf Signature:} +\begin{verbatim}VBD_HANDLE_INVALID(handle)\end{verbatim} +\begin{center}\rule{10em}{0.1pt}\end{center} + +\subsubsection{VDI\_HANDLE\_INVALID} + +You gave an invalid VDI handle. The VDI may have recently been deleted. +The handle parameter echoes the bad value given. + +\vspace{0.3cm} +{\bf Signature:} +\begin{verbatim}VDI_HANDLE_INVALID(handle)\end{verbatim} +\begin{center}\rule{10em}{0.1pt}\end{center} + +\subsubsection{VIF\_HANDLE\_INVALID} + +You gave an invalid VIF handle. The VIF may have recently been deleted. +The handle parameter echoes the bad value given. + +\vspace{0.3cm} +{\bf Signature:} +\begin{verbatim}VIF_HANDLE_INVALID(handle)\end{verbatim} +\begin{center}\rule{10em}{0.1pt}\end{center} + +\subsubsection{VM\_BAD\_POWER\_STATE} + +You attempted an operation on a VM that was not in an appropriate power +state at the time; for example, you attempted to start a VM that was +already running. The parameters returned are the VM's UUID, and the +expected and actual VM state at the time of the call. + +\vspace{0.3cm} +{\bf Signature:} +\begin{verbatim}VM_BAD_POWER_STATE(vm, expected, actual)\end{verbatim} +\begin{center}\rule{10em}{0.1pt}\end{center} + +\subsubsection{VM\_HANDLE\_INVALID} + +You gave an invalid VM handle. The VM may have recently been deleted. The +handle parameter echoes the bad value given. + +\vspace{0.3cm} +{\bf Signature:} +\begin{verbatim}VM_HANDLE_INVALID(handle)\end{verbatim} +\begin{center}\rule{10em}{0.1pt}\end{center} + +\subsubsection{VTPM\_HANDLE\_INVALID} + +You gave an invalid VTPM handle. The VTPM may have recently been deleted. +The handle parameter echoes the bad value given. + +\vspace{0.3cm} +{\bf Signature:} +\begin{verbatim}VTPM_HANDLE_INVALID(handle)\end{verbatim} +\begin{center}\rule{10em}{0.1pt}\end{center} + + + +\newpage \section{DTD} General notes: \begin{itemize} diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c index 5c32dd0c39..7f0538c606 100644 --- a/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c +++ b/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c @@ -15,6 +15,7 @@ #include <linux/version.h> #include <asm/io.h> #include <xen/balloon.h> +#include <asm/swiotlb.h> #include <asm/tlbflush.h> #include <asm-i386/mach-xen/asm/swiotlb.h> #include <asm/bug.h> @@ -183,8 +184,8 @@ void *dma_alloc_coherent(struct device *dev, size_t size, ret = (void *)vstart; if (ret != NULL) { - /* NB. Hardcode 31 address bits for now: aacraid limitation. */ - if (xen_create_contiguous_region(vstart, order, 31) != 0) { + if (xen_create_contiguous_region(vstart, order, + dma_bits) != 0) { free_pages(vstart, order); return NULL; } diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c index e6724a00b1..0dbc6a412e 100644 --- a/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c +++ b/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c @@ -101,8 +101,24 @@ void enable_hlt(void) EXPORT_SYMBOL(enable_hlt); -/* XXX XEN doesn't use default_idle(), poll_idle(). Use xen_idle() instead. */ -void xen_idle(void) +/* + * On SMP it's slightly faster (but much more power-consuming!) + * to poll the ->work.need_resched flag instead of waiting for the + * cross-CPU IPI to arrive. Use this option with caution. + */ +static void poll_idle(void) +{ + local_irq_enable(); + + asm volatile( + "2:" + "testl %0, %1;" + "rep; nop;" + "je 2b;" + : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); +} + +static void xen_idle(void) { local_irq_disable(); @@ -152,17 +168,22 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { while (!need_resched()) { + void (*idle)(void); if (__get_cpu_var(cpu_idle_state)) __get_cpu_var(cpu_idle_state) = 0; rmb(); + idle = pm_idle; + + if (!idle) + idle = xen_idle; if (cpu_is_offline(cpu)) play_dead(); __get_cpu_var(irq_stat).idle_timestamp = jiffies; - xen_idle(); + idle(); } preempt_enable_no_resched(); schedule(); @@ -198,9 +219,22 @@ void cpu_idle_wait(void) } EXPORT_SYMBOL_GPL(cpu_idle_wait); -/* XXX XEN doesn't use mwait_idle(), select_idle_routine(), idle_setup(). */ -/* Always use xen_idle() instead. */ -void __devinit select_idle_routine(const struct cpuinfo_x86 *c) {} +void __devinit select_idle_routine(const struct cpuinfo_x86 *c) +{ +} + +static int __init idle_setup (char *str) +{ + if (!strncmp(str, "poll", 4)) { + printk("using polling idle threads.\n"); + pm_idle = poll_idle; + } + + boot_option_idle_override = 1; + return 1; +} + +__setup("idle=", idle_setup); void show_regs(struct pt_regs * regs) { diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c b/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c index 4fa98132f4..ac2c0da536 100644 --- a/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c +++ b/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c @@ -47,8 +47,8 @@ EXPORT_SYMBOL(swiotlb); */ #define IO_TLB_SHIFT 11 -/* Width of DMA addresses in the IO TLB. 30 bits is a b44 limitation. */ -#define DEFAULT_IO_TLB_DMA_BITS 30 +/* Width of DMA addresses. 30 bits is a b44 limitation. */ +#define DEFAULT_DMA_BITS 30 static int swiotlb_force; static char *iotlb_virt_start; @@ -98,14 +98,14 @@ static struct phys_addr { */ static DEFINE_SPINLOCK(io_tlb_lock); -static unsigned int io_tlb_dma_bits = DEFAULT_IO_TLB_DMA_BITS; +unsigned int dma_bits = DEFAULT_DMA_BITS; static int __init -setup_io_tlb_bits(char *str) +setup_dma_bits(char *str) { - io_tlb_dma_bits = simple_strtoul(str, NULL, 0); + dma_bits = simple_strtoul(str, NULL, 0); return 0; } -__setup("swiotlb_bits=", setup_io_tlb_bits); +__setup("dma_bits=", setup_dma_bits); static int __init setup_io_tlb_npages(char *str) @@ -167,7 +167,7 @@ swiotlb_init_with_default_size (size_t default_size) int rc = xen_create_contiguous_region( (unsigned long)iotlb_virt_start + (i << IO_TLB_SHIFT), get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT), - io_tlb_dma_bits); + dma_bits); BUG_ON(rc); } @@ -197,7 +197,7 @@ swiotlb_init_with_default_size (size_t default_size) bytes >> 20, (unsigned long)iotlb_virt_start, (unsigned long)iotlb_virt_start + bytes, - io_tlb_dma_bits); + dma_bits); } void @@ -665,7 +665,7 @@ swiotlb_dma_mapping_error(dma_addr_t dma_addr) int swiotlb_dma_supported (struct device *hwdev, u64 mask) { - return (mask >= ((1UL << io_tlb_dma_bits) - 1)); + return (mask >= ((1UL << dma_bits) - 1)); } EXPORT_SYMBOL(swiotlb_init); diff --git a/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c b/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c index f491038674..ee2f87191f 100644 --- a/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c +++ b/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c @@ -110,7 +110,6 @@ static struct irq_routing_table * __init pirq_find_routing_table(void) if (rt) return rt; } - return NULL; } @@ -261,13 +260,13 @@ static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, i */ static int pirq_via586_get(struct pci_dev *router, struct pci_dev *dev, int pirq) { - static unsigned int pirqmap[4] = { 3, 2, 5, 1 }; + static unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 }; return read_config_nybble(router, 0x55, pirqmap[pirq-1]); } static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { - static unsigned int pirqmap[4] = { 3, 2, 5, 1 }; + static unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 }; write_config_nybble(router, 0x55, pirqmap[pirq-1], irq); return 1; } diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c index 65ba83c625..8267a772b7 100644 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c @@ -119,8 +119,26 @@ void exit_idle(void) __exit_idle(); } -/* XXX XEN doesn't use default_idle(), poll_idle(). Use xen_idle() instead. */ -void xen_idle(void) +/* + * On SMP it's slightly faster (but much more power-consuming!) + * to poll the ->need_resched flag instead of waiting for the + * cross-CPU IPI to arrive. Use this option with caution. + */ +static void poll_idle(void) +{ + local_irq_enable(); + + asm volatile( + "2:" + "testl %0,%1;" + "rep; nop;" + "je 2b;" + : : + "i" (_TIF_NEED_RESCHED), + "m" (current_thread_info()->flags)); +} + +static void xen_idle(void) { local_irq_disable(); @@ -164,14 +182,18 @@ void cpu_idle (void) /* endless idle loop with no priority at all */ while (1) { while (!need_resched()) { + void (*idle)(void); + if (__get_cpu_var(cpu_idle_state)) __get_cpu_var(cpu_idle_state) = 0; rmb(); - + idle = pm_idle; + if (!idle) + idle = xen_idle; if (cpu_is_offline(smp_processor_id())) play_dead(); enter_idle(); - xen_idle(); + idle(); __exit_idle(); } @@ -210,9 +232,22 @@ void cpu_idle_wait(void) } EXPORT_SYMBOL_GPL(cpu_idle_wait); -/* XXX XEN doesn't use mwait_idle(), select_idle_routine(), idle_setup(). */ -/* Always use xen_idle() instead. */ -void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) {} +void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) +{ +} + +static int __init idle_setup (char *str) +{ + if (!strncmp(str, "poll", 4)) { + printk("using polling idle threads.\n"); + pm_idle = poll_idle; + } + + boot_option_idle_override = 1; + return 1; +} + +__setup("idle=", idle_setup); /* Prints also some state that isn't saved in the pt_regs */ void __show_regs(struct pt_regs * regs) diff --git a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c index adf016ba90..cdbc7decd7 100644 --- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c @@ -54,7 +54,6 @@ struct tpm_private { tpmif_tx_interface_t *tx; atomic_t refcnt; - unsigned int evtchn; unsigned int irq; u8 is_connected; u8 is_suspended; @@ -271,7 +270,7 @@ static void destroy_tpmring(struct tpm_private *tp) if (tp->irq) unbind_from_irqhandler(tp->irq, tp); - tp->evtchn = tp->irq = 0; + tp->irq = 0; } @@ -302,8 +301,8 @@ again: goto abort_transaction; } - err = xenbus_printf(xbt, dev->nodename, - "event-channel", "%u", tp->evtchn); + err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", + irq_to_evtchn_port(tp->irq)); if (err) { message = "writing event-channel"; goto abort_transaction; @@ -459,19 +458,15 @@ static int tpmif_connect(struct xenbus_device *dev, tp->backend_id = domid; - err = xenbus_alloc_evtchn(dev, &tp->evtchn); - if (err) - return err; - - err = bind_evtchn_to_irqhandler(tp->evtchn, - tpmif_int, SA_SAMPLE_RANDOM, "tpmif", - tp); + err = bind_listening_port_to_irqhandler( + domid, tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp); if (err <= 0) { - WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err); + WPRINTK("bind_listening_port_to_irqhandler failed " + "(err=%d)\n", err); return err; } - tp->irq = err; + return 0; } @@ -656,9 +651,6 @@ static int tpm_xmit(struct tpm_private *tp, mb(); - DPRINTK("Notifying backend via event channel %d\n", - tp->evtchn); - notify_remote_via_irq(tp->irq); spin_unlock_irq(&tp->tx_lock); diff --git a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c index b621d76383..d6d3f7640a 100644 --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c @@ -446,8 +446,10 @@ static struct notifier_block xenstore_notifier; static int __init balloon_init(void) { +#ifdef CONFIG_X86 unsigned long pfn; struct page *page; +#endif if (!is_running_on_xen()) return -ENODEV; @@ -476,13 +478,15 @@ static int __init balloon_init(void) balloon_pde->write_proc = balloon_write; #endif balloon_sysfs_init(); - + +#ifdef CONFIG_X86 /* Initialise the balloon with excess memory space. */ for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { page = pfn_to_page(pfn); if (!PageReserved(page)) balloon_append(page); } +#endif target_watch.callback = watch_target; xenstore_notifier.notifier_call = balloon_init_watcher; diff --git a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h index 1b5b6a427e..1c51768bb7 100644 --- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h @@ -65,7 +65,6 @@ typedef struct blkif_st { domid_t domid; unsigned int handle; /* Physical parameters of the comms window. */ - unsigned int evtchn; unsigned int irq; /* Comms information. */ blkif_back_ring_t blk_ring; diff --git a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c index 53b4764c42..12552fb17a 100644 --- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c @@ -97,7 +97,6 @@ int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) { blkif_sring_t *sring; int err; - struct evtchn_bind_interdomain bind_interdomain; /* Already connected through? */ if (blkif->irq) @@ -112,24 +111,18 @@ int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) return err; } - bind_interdomain.remote_dom = blkif->domid; - bind_interdomain.remote_port = evtchn; + sring = (blkif_sring_t *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE); - err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, - &bind_interdomain); - if (err) { + err = bind_interdomain_evtchn_to_irqhandler( + blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif); + if (err < 0) + { unmap_frontend_page(blkif); free_vm_area(blkif->blk_ring_area); return err; } - - blkif->evtchn = bind_interdomain.local_port; - - sring = (blkif_sring_t *)blkif->blk_ring_area->addr; - BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE); - - blkif->irq = bind_evtchn_to_irqhandler( - blkif->evtchn, blkif_be_int, 0, "blkif-backend", blkif); + blkif->irq = err; return 0; } diff --git a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c index 6b7100a1bc..5b07416fa2 100644 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c @@ -174,8 +174,8 @@ again: message = "writing ring-ref"; goto abort_transaction; } - err = xenbus_printf(xbt, dev->nodename, - "event-channel", "%u", info->evtchn); + err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", + irq_to_evtchn_port(info->irq)); if (err) { message = "writing event-channel"; goto abort_transaction; @@ -228,15 +228,11 @@ static int setup_blkring(struct xenbus_device *dev, } info->ring_ref = err; - err = xenbus_alloc_evtchn(dev, &info->evtchn); - if (err) - goto fail; - - err = bind_evtchn_to_irqhandler( - info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info); + err = bind_listening_port_to_irqhandler( + dev->otherend_id, blkif_int, SA_SAMPLE_RANDOM, "blkif", info); if (err <= 0) { xenbus_dev_fatal(dev, err, - "bind_evtchn_to_irqhandler failed"); + "bind_listening_port_to_irqhandler"); goto fail; } info->irq = err; @@ -310,7 +306,7 @@ static void connect(struct blkfront_info *info) DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend); err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "sectors", "%llu", §ors, + "sectors", "%Lu", §ors, "info", "%u", &binfo, "sector-size", "%lu", §or_size, NULL); @@ -775,8 +771,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) } if (info->irq) unbind_from_irqhandler(info->irq, info); - info->evtchn = info->irq = 0; - + info->irq = 0; } static void blkif_completion(struct blk_shadow *s) diff --git a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h index b86360f405..6747051cc5 100644 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h @@ -119,7 +119,7 @@ struct blkfront_info int connected; int ring_ref; blkif_front_ring_t ring; - unsigned int evtchn, irq; + unsigned int irq; struct xlbd_major_info *mi; request_queue_t *rq; struct work_struct work; diff --git a/linux-2.6-xen-sparse/drivers/xen/blktap/Makefile b/linux-2.6-xen-sparse/drivers/xen/blktap/Makefile index 409b07896f..f10cc4fe12 100644 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/Makefile +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/Makefile @@ -1,3 +1,5 @@ LINUXINCLUDE += -I../xen/include/public/io -obj-y := xenbus.o interface.o blktap.o +obj-$(CONFIG_XEN_BLKDEV_TAP) := xenblktap.o + +xenblktap-y := xenbus.o interface.o blktap.o diff --git a/linux-2.6-xen-sparse/drivers/xen/blktap/common.h b/linux-2.6-xen-sparse/drivers/xen/blktap/common.h index 56faca7a60..fc42c63508 100644 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/common.h +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/common.h @@ -56,7 +56,6 @@ typedef struct blkif_st { domid_t domid; unsigned int handle; /* Physical parameters of the comms window. */ - unsigned int evtchn; unsigned int irq; /* Comms information. */ blkif_back_ring_t blk_ring; diff --git a/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c b/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c index b0eccf4225..44f653ba51 100644 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c @@ -98,7 +98,6 @@ int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, { blkif_sring_t *sring; int err; - struct evtchn_bind_interdomain bind_interdomain; /* Already connected through? */ if (blkif->irq) @@ -113,24 +112,18 @@ int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, return err; } - bind_interdomain.remote_dom = blkif->domid; - bind_interdomain.remote_port = evtchn; + sring = (blkif_sring_t *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE); - err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, - &bind_interdomain); - if (err) { + err = bind_interdomain_evtchn_to_irqhandler( + blkif->domid, evtchn, tap_blkif_be_int, + 0, "blkif-backend", blkif); + if (err < 0) { unmap_frontend_page(blkif); free_vm_area(blkif->blk_ring_area); return err; } - - blkif->evtchn = bind_interdomain.local_port; - - sring = (blkif_sring_t *)blkif->blk_ring_area->addr; - BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE); - - blkif->irq = bind_evtchn_to_irqhandler( - blkif->evtchn, tap_blkif_be_int, 0, "blkif-backend", blkif); + blkif->irq = err; return 0; } diff --git a/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c b/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c index a39a22a6d4..1c584d3cce 100644 --- a/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c +++ b/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c @@ -121,7 +121,7 @@ int xencons_ring_init(void) !xen_start_info->console.domU.evtchn) return -ENODEV; - irq = bind_evtchn_to_irqhandler( + irq = bind_caller_port_to_irqhandler( xen_start_info->console.domU.evtchn, handle_input, 0, "xencons", NULL); if (irq < 0) { diff --git a/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c b/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c index 445cacf785..c2c39438ff 100644 --- a/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c +++ b/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c @@ -61,7 +61,14 @@ static int evtchn_to_irq[NR_EVENT_CHANNELS] = { static u32 irq_info[NR_IRQS]; /* Binding types. */ -enum { IRQT_UNBOUND, IRQT_PIRQ, IRQT_VIRQ, IRQT_IPI, IRQT_EVTCHN }; +enum { + IRQT_UNBOUND, + IRQT_PIRQ, + IRQT_VIRQ, + IRQT_IPI, + IRQT_LOCAL_PORT, + IRQT_CALLER_PORT +}; /* Constructor for packed IRQ information. */ static inline u32 mk_irq_info(u32 type, u32 index, u32 evtchn) @@ -208,38 +215,51 @@ void force_evtchn_callback(void) /* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */ EXPORT_SYMBOL(force_evtchn_callback); +static DEFINE_PER_CPU(unsigned int, upcall_count) = { 0 }; + /* NB. Interrupts are disabled on entry. */ asmlinkage void evtchn_do_upcall(struct pt_regs *regs) { unsigned long l1, l2; - unsigned int l1i, l2i, port; + unsigned int l1i, l2i, port, count; int irq, cpu = smp_processor_id(); shared_info_t *s = HYPERVISOR_shared_info; vcpu_info_t *vcpu_info = &s->vcpu_info[cpu]; - vcpu_info->evtchn_upcall_pending = 0; + do { + /* Avoid a callback storm when we reenable delivery. */ + vcpu_info->evtchn_upcall_pending = 0; + + /* Nested invocations bail immediately. */ + if (unlikely(per_cpu(upcall_count, cpu)++)) + return; #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ - /* Clear master pending flag /before/ clearing selector flag. */ - rmb(); + /* Clear master flag /before/ clearing selector flag. */ + rmb(); #endif - l1 = xchg(&vcpu_info->evtchn_pending_sel, 0); - while (l1 != 0) { - l1i = __ffs(l1); - l1 &= ~(1UL << l1i); - - while ((l2 = active_evtchns(cpu, s, l1i)) != 0) { - l2i = __ffs(l2); - - port = (l1i * BITS_PER_LONG) + l2i; - if ((irq = evtchn_to_irq[port]) != -1) - do_IRQ(irq, regs); - else { - exit_idle(); - evtchn_device_upcall(port); + l1 = xchg(&vcpu_info->evtchn_pending_sel, 0); + while (l1 != 0) { + l1i = __ffs(l1); + l1 &= ~(1UL << l1i); + + while ((l2 = active_evtchns(cpu, s, l1i)) != 0) { + l2i = __ffs(l2); + + port = (l1i * BITS_PER_LONG) + l2i; + if ((irq = evtchn_to_irq[port]) != -1) + do_IRQ(irq, regs); + else { + exit_idle(); + evtchn_device_upcall(port); + } } } - } + + /* If there were nested callbacks then we have more to do. */ + count = per_cpu(upcall_count, cpu); + per_cpu(upcall_count, cpu) = 0; + } while (unlikely(count != 1)); } static int find_unbound_irq(void) @@ -262,18 +282,18 @@ static int find_unbound_irq(void) return -ENOSPC; } -static int bind_evtchn_to_irq(unsigned int evtchn) +static int bind_caller_port_to_irq(unsigned int caller_port) { int irq; spin_lock(&irq_mapping_update_lock); - if ((irq = evtchn_to_irq[evtchn]) == -1) { + if ((irq = evtchn_to_irq[caller_port]) == -1) { if ((irq = find_unbound_irq()) < 0) goto out; - evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn); + evtchn_to_irq[caller_port] = irq; + irq_info[irq] = mk_irq_info(IRQT_CALLER_PORT, 0, caller_port); } irq_bindcount[irq]++; @@ -283,6 +303,59 @@ static int bind_evtchn_to_irq(unsigned int evtchn) return irq; } +static int bind_local_port_to_irq(unsigned int local_port) +{ + int irq; + + spin_lock(&irq_mapping_update_lock); + + BUG_ON(evtchn_to_irq[local_port] != -1); + + if ((irq = find_unbound_irq()) < 0) { + struct evtchn_close close = { .port = local_port }; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) + BUG(); + goto out; + } + + evtchn_to_irq[local_port] = irq; + irq_info[irq] = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port); + irq_bindcount[irq]++; + + out: + spin_unlock(&irq_mapping_update_lock); + return irq; +} + +static int bind_listening_port_to_irq(unsigned int remote_domain) +{ + struct evtchn_alloc_unbound alloc_unbound; + int err; + + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = remote_domain; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + + return err ? : bind_local_port_to_irq(alloc_unbound.port); +} + +static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, + unsigned int remote_port) +{ + struct evtchn_bind_interdomain bind_interdomain; + int err; + + bind_interdomain.remote_dom = remote_domain; + bind_interdomain.remote_port = remote_port; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, + &bind_interdomain); + + return err ? : bind_local_port_to_irq(bind_interdomain.local_port); +} + static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) { struct evtchn_bind_virq bind_virq; @@ -357,7 +430,8 @@ static void unbind_from_irq(unsigned int irq) if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) { close.port = evtchn; - if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) + if ((type_from_irq(irq) != IRQT_CALLER_PORT) && + HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) BUG(); switch (type_from_irq(irq)) { @@ -383,17 +457,39 @@ static void unbind_from_irq(unsigned int irq) spin_unlock(&irq_mapping_update_lock); } -int bind_evtchn_to_irqhandler( - unsigned int evtchn, +int bind_caller_port_to_irqhandler( + unsigned int caller_port, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char *devname, + void *dev_id) +{ + int irq, retval; + + irq = bind_caller_port_to_irq(caller_port); + if (irq < 0) + return irq; + + retval = request_irq(irq, handler, irqflags, devname, dev_id); + if (retval != 0) { + unbind_from_irq(irq); + return retval; + } + + return irq; +} +EXPORT_SYMBOL_GPL(bind_caller_port_to_irqhandler); + +int bind_listening_port_to_irqhandler( + unsigned int remote_domain, irqreturn_t (*handler)(int, void *, struct pt_regs *), unsigned long irqflags, const char *devname, void *dev_id) { - unsigned int irq; - int retval; + int irq, retval; - irq = bind_evtchn_to_irq(evtchn); + irq = bind_listening_port_to_irq(remote_domain); if (irq < 0) return irq; @@ -405,7 +501,31 @@ int bind_evtchn_to_irqhandler( return irq; } -EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); +EXPORT_SYMBOL_GPL(bind_listening_port_to_irqhandler); + +int bind_interdomain_evtchn_to_irqhandler( + unsigned int remote_domain, + unsigned int remote_port, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char *devname, + void *dev_id) +{ + int irq, retval; + + irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port); + if (irq < 0) + return irq; + + retval = request_irq(irq, handler, irqflags, devname, dev_id); + if (retval != 0) { + unbind_from_irq(irq); + return retval; + } + + return irq; +} +EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler); int bind_virq_to_irqhandler( unsigned int virq, @@ -415,8 +535,7 @@ int bind_virq_to_irqhandler( const char *devname, void *dev_id) { - unsigned int irq; - int retval; + int irq, retval; irq = bind_virq_to_irq(virq, cpu); if (irq < 0) @@ -440,8 +559,7 @@ int bind_ipi_to_irqhandler( const char *devname, void *dev_id) { - unsigned int irq; - int retval; + int irq, retval; irq = bind_ipi_to_irq(ipi, cpu); if (irq < 0) @@ -716,6 +834,12 @@ void notify_remote_via_irq(int irq) } EXPORT_SYMBOL_GPL(notify_remote_via_irq); +int irq_to_evtchn_port(int irq) +{ + return evtchn_from_irq(irq); +} +EXPORT_SYMBOL_GPL(irq_to_evtchn_port); + void mask_evtchn(int port) { shared_info_t *s = HYPERVISOR_shared_info; diff --git a/linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c b/linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c index a16e9d9a22..41b1405ff7 100644 --- a/linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c +++ b/linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c @@ -98,8 +98,8 @@ void xen_machine_kexec_setup_resources(void) err: /* * It isn't possible to free xen_phys_cpus this early in the - * boot. Since failure at this stage is unexpected and the - * amount is small we leak the memory. + * boot. Failure at this stage is unexpected and the amount of + * memory is small therefore we tolerate the potential leak. */ xen_max_nr_phys_cpus = 0; return; diff --git a/linux-2.6-xen-sparse/drivers/xen/fbfront/xenfb.c b/linux-2.6-xen-sparse/drivers/xen/fbfront/xenfb.c index 54f7b5c509..40846d1d09 100644 --- a/linux-2.6-xen-sparse/drivers/xen/fbfront/xenfb.c +++ b/linux-2.6-xen-sparse/drivers/xen/fbfront/xenfb.c @@ -56,7 +56,6 @@ struct xenfb_info struct page **pages; struct list_head mappings; /* protected by mm_lock */ - unsigned evtchn; int irq; struct xenfb_page *page; unsigned long *mfns; @@ -156,7 +155,7 @@ static void xenfb_do_update(struct xenfb_info *info, wmb(); /* ensure ring contents visible */ info->page->out_prod = prod + 1; - notify_remote_via_evtchn(info->evtchn); + notify_remote_via_irq(info->irq); } static int xenfb_queue_full(struct xenfb_info *info) @@ -429,7 +428,7 @@ static irqreturn_t xenfb_event_handler(int rq, void *dev_id, if (page->in_cons != page->in_prod) { info->page->in_cons = info->page->in_prod; - notify_remote_via_evtchn(info->evtchn); + notify_remote_via_irq(info->irq); } return IRQ_HANDLED; } @@ -618,14 +617,11 @@ static int xenfb_connect_backend(struct xenbus_device *dev, int ret; struct xenbus_transaction xbt; - ret = xenbus_alloc_evtchn(dev, &info->evtchn); - if (ret) - return ret; - ret = bind_evtchn_to_irqhandler(info->evtchn, xenfb_event_handler, - 0, "xenfb", info); + ret = bind_listening_port_to_irqhandler( + dev->otherend_id, xenfb_event_handler, 0, "xenfb", info); if (ret < 0) { - xenbus_free_evtchn(dev, info->evtchn); - xenbus_dev_fatal(dev, ret, "bind_evtchn_to_irqhandler"); + xenbus_dev_fatal(dev, ret, + "bind_listening_port_to_irqhandler"); return ret; } info->irq = ret; @@ -641,7 +637,7 @@ static int xenfb_connect_backend(struct xenbus_device *dev, if (ret) goto error_xenbus; ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", - info->evtchn); + irq_to_evtchn_port(info->irq)); if (ret) goto error_xenbus; ret = xenbus_printf(xbt, dev->nodename, "feature-update", "1"); diff --git a/linux-2.6-xen-sparse/drivers/xen/fbfront/xenkbd.c b/linux-2.6-xen-sparse/drivers/xen/fbfront/xenkbd.c index 7737732e34..e4debd83d3 100644 --- a/linux-2.6-xen-sparse/drivers/xen/fbfront/xenkbd.c +++ b/linux-2.6-xen-sparse/drivers/xen/fbfront/xenkbd.c @@ -31,7 +31,6 @@ struct xenkbd_info { struct input_dev *dev; struct xenkbd_page *page; - unsigned evtchn; int irq; struct xenbus_device *xbdev; }; @@ -76,7 +75,7 @@ static irqreturn_t input_handler(int rq, void *dev_id, struct pt_regs *regs) input_sync(info->dev); mb(); /* ensure we got ring contents */ page->in_cons = cons; - notify_remote_via_evtchn(info->evtchn); + notify_remote_via_irq(info->irq); return IRQ_HANDLED; } @@ -168,14 +167,11 @@ static int xenkbd_connect_backend(struct xenbus_device *dev, int ret; struct xenbus_transaction xbt; - ret = xenbus_alloc_evtchn(dev, &info->evtchn); - if (ret) - return ret; - ret = bind_evtchn_to_irqhandler(info->evtchn, input_handler, 0, - "xenkbd", info); + ret = bind_listening_port_to_irqhandler( + dev->otherend_id, input_handler, 0, "xenkbd", info); if (ret < 0) { - xenbus_free_evtchn(dev, info->evtchn); - xenbus_dev_fatal(dev, ret, "bind_evtchn_to_irqhandler"); + xenbus_dev_fatal(dev, ret, + "bind_listening_port_to_irqhandler"); return ret; } info->irq = ret; @@ -191,7 +187,7 @@ static int xenkbd_connect_backend(struct xenbus_device *dev, if (ret) goto error_xenbus; ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", - info->evtchn); + irq_to_evtchn_port(info->irq)); if (ret) goto error_xenbus; ret = xenbus_transaction_end(xbt, 0); diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/common.h b/linux-2.6-xen-sparse/drivers/xen/netback/common.h index 367c008d3b..e0537e52ff 100644 --- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h +++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h @@ -67,7 +67,6 @@ typedef struct netif_st { grant_ref_t tx_shmem_ref; grant_handle_t rx_shmem_handle; grant_ref_t rx_shmem_ref; - unsigned int evtchn; unsigned int irq; /* The shared rings and indexes. */ diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c index 9fae954bd2..4596fedce8 100644 --- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c +++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c @@ -259,7 +259,6 @@ int netif_map(netif_t *netif, unsigned long tx_ring_ref, int err = -ENOMEM; netif_tx_sring_t *txs; netif_rx_sring_t *rxs; - struct evtchn_bind_interdomain bind_interdomain; /* Already connected through? */ if (netif->irq) @@ -276,18 +275,12 @@ int netif_map(netif_t *netif, unsigned long tx_ring_ref, if (err) goto err_map; - bind_interdomain.remote_dom = netif->domid; - bind_interdomain.remote_port = evtchn; - - err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, - &bind_interdomain); - if (err) + err = bind_interdomain_evtchn_to_irqhandler( + netif->domid, evtchn, netif_be_int, 0, + netif->dev->name, netif); + if (err < 0) goto err_hypervisor; - - netif->evtchn = bind_interdomain.local_port; - - netif->irq = bind_evtchn_to_irqhandler( - netif->evtchn, netif_be_int, 0, netif->dev->name, netif); + netif->irq = err; disable_irq(netif->irq); txs = (netif_tx_sring_t *)netif->tx_comms_area->addr; diff --git a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c index 0e585475de..448a972cc4 100644 --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c @@ -153,7 +153,7 @@ struct netfront_info { spinlock_t tx_lock; spinlock_t rx_lock; - unsigned int evtchn, irq; + unsigned int irq; unsigned int copying_receiver; /* Receive-ring batched refills. */ @@ -244,12 +244,8 @@ static inline grant_ref_t xennet_get_rx_ref(struct netfront_info *np, static int setup_device(struct xenbus_device *, struct netfront_info *); static struct net_device *create_netdev(struct xenbus_device *); -static void netfront_closing(struct xenbus_device *); - static void end_access(int, void *); static void netif_disconnect_backend(struct netfront_info *); -static int open_netdev(struct netfront_info *); -static void close_netdev(struct netfront_info *); static int network_connect(struct net_device *); static void network_tx_buf_gc(struct net_device *); @@ -293,9 +289,20 @@ static int __devinit netfront_probe(struct xenbus_device *dev, info = netdev_priv(netdev); dev->dev.driver_data = info; - err = open_netdev(info); - if (err) + err = register_netdev(info->netdev); + if (err) { + printk(KERN_WARNING "%s: register_netdev err=%d\n", + __FUNCTION__, err); + goto fail; + } + + err = xennet_sysfs_addif(info->netdev); + if (err) { + unregister_netdev(info->netdev); + printk(KERN_WARNING "%s: add sysfs failed err=%d\n", + __FUNCTION__, err); goto fail; + } return 0; @@ -305,6 +312,24 @@ static int __devinit netfront_probe(struct xenbus_device *dev, return err; } +static int __devexit netfront_remove(struct xenbus_device *dev) +{ + struct netfront_info *info = dev->dev.driver_data; + + DPRINTK("%s\n", dev->nodename); + + netif_disconnect_backend(info); + + del_timer_sync(&info->rx_refill_timer); + + xennet_sysfs_delif(info->netdev); + + unregister_netdev(info->netdev); + + free_netdev(info->netdev); + + return 0; +} /** * We are reconnecting to the backend, due to a suspend/resume, or a backend @@ -383,7 +408,8 @@ again: goto abort_transaction; } err = xenbus_printf(xbt, dev->nodename, - "event-channel", "%u", info->evtchn); + "event-channel", "%u", + irq_to_evtchn_port(info->irq)); if (err) { message = "writing event-channel"; goto abort_transaction; @@ -488,17 +514,15 @@ static int setup_device(struct xenbus_device *dev, struct netfront_info *info) } info->rx_ring_ref = err; - err = xenbus_alloc_evtchn(dev, &info->evtchn); - if (err) - goto fail; - memcpy(netdev->dev_addr, info->mac, ETH_ALEN); - err = bind_evtchn_to_irqhandler(info->evtchn, netif_int, - SA_SAMPLE_RANDOM, netdev->name, - netdev); + + err = bind_listening_port_to_irqhandler( + dev->otherend_id, netif_int, SA_SAMPLE_RANDOM, netdev->name, + netdev); if (err < 0) goto fail; info->irq = err; + return 0; fail: @@ -534,9 +558,7 @@ static void backend_changed(struct xenbus_device *dev, break; case XenbusStateClosing: - if (dev->state == XenbusStateClosed) - break; - netfront_closing(dev); + xenbus_frontend_closed(dev); break; } } @@ -1995,70 +2017,6 @@ inetdev_notify(struct notifier_block *this, unsigned long event, void *ptr) } -/* ** Close down ** */ - - -/** - * Handle the change of state of the backend to Closing. We must delete our - * device-layer structures now, to ensure that writes are flushed through to - * the backend. Once is this done, we can switch to Closed in - * acknowledgement. - */ -static void netfront_closing(struct xenbus_device *dev) -{ - struct netfront_info *info = dev->dev.driver_data; - - DPRINTK("%s\n", dev->nodename); - - close_netdev(info); - xenbus_frontend_closed(dev); -} - - -static int __devexit netfront_remove(struct xenbus_device *dev) -{ - struct netfront_info *info = dev->dev.driver_data; - - DPRINTK("%s\n", dev->nodename); - - netif_disconnect_backend(info); - free_netdev(info->netdev); - - return 0; -} - - -static int open_netdev(struct netfront_info *info) -{ - int err; - - err = register_netdev(info->netdev); - if (err) { - printk(KERN_WARNING "%s: register_netdev err=%d\n", - __FUNCTION__, err); - return err; - } - - err = xennet_sysfs_addif(info->netdev); - if (err) { - unregister_netdev(info->netdev); - printk(KERN_WARNING "%s: add sysfs failed err=%d\n", - __FUNCTION__, err); - return err; - } - - return 0; -} - -static void close_netdev(struct netfront_info *info) -{ - del_timer_sync(&info->rx_refill_timer); - - xennet_sysfs_delif(info->netdev); - unregister_netdev(info->netdev); -} - - static void netif_disconnect_backend(struct netfront_info *info) { /* Stop old i/f to prevent errors whilst we rebuild the state. */ @@ -2070,7 +2028,7 @@ static void netif_disconnect_backend(struct netfront_info *info) if (info->irq) unbind_from_irqhandler(info->irq, info->netdev); - info->evtchn = info->irq = 0; + info->irq = 0; end_access(info->tx_ring_ref, info->tx.sring); end_access(info->rx_ring_ref, info->rx.sring); diff --git a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c index 0026753381..3ebd4b1e5a 100644 --- a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c +++ b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c @@ -71,7 +71,6 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref, int remote_evtchn) { int err = 0; - int evtchn; struct vm_struct *area; dev_dbg(&pdev->xdev->dev, @@ -86,12 +85,9 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref, pdev->sh_area = area; pdev->sh_info = area->addr; - err = xenbus_bind_evtchn(pdev->xdev, remote_evtchn, &evtchn); - if (err) - goto out; - - err = bind_evtchn_to_irqhandler(evtchn, pciback_handle_event, - SA_SAMPLE_RANDOM, "pciback", pdev); + err = bind_interdomain_evtchn_to_irqhandler( + pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event, + SA_SAMPLE_RANDOM, "pciback", pdev); if (err < 0) { xenbus_dev_fatal(pdev->xdev, err, "Error binding event channel to IRQ"); diff --git a/linux-2.6-xen-sparse/drivers/xen/tpmback/common.h b/linux-2.6-xen-sparse/drivers/xen/tpmback/common.h index b209b4f583..d28eb31fdd 100644 --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/common.h +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/common.h @@ -30,7 +30,6 @@ typedef struct tpmif_st { unsigned int handle; /* Physical parameters of the comms window. */ - unsigned int evtchn; unsigned int irq; /* The shared rings and indexes. */ diff --git a/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c b/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c index 2614aa5126..a9b66db1ac 100644 --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c @@ -118,11 +118,9 @@ static void unmap_frontend_page(tpmif_t *tpmif) int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn) { int err; - struct evtchn_bind_interdomain bind_interdomain; - if (tpmif->irq) { + if (tpmif->irq) return 0; - } if ((tpmif->tx_area = alloc_vm_area(PAGE_SIZE)) == NULL) return -ENOMEM; @@ -133,24 +131,17 @@ int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn) return err; } + tpmif->tx = (tpmif_tx_interface_t *)tpmif->tx_area->addr; - bind_interdomain.remote_dom = tpmif->domid; - bind_interdomain.remote_port = evtchn; - - err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, - &bind_interdomain); - if (err) { + err = bind_interdomain_evtchn_to_irqhandler( + tpmif->domid, evtchn, tpmif_be_int, 0, tpmif->devname, tpmif); + if (err < 0) { unmap_frontend_page(tpmif); free_vm_area(tpmif->tx_area); return err; } + tpmif->irq = err; - tpmif->evtchn = bind_interdomain.local_port; - - tpmif->tx = (tpmif_tx_interface_t *)tpmif->tx_area->addr; - - tpmif->irq = bind_evtchn_to_irqhandler( - tpmif->evtchn, tpmif_be_int, 0, tpmif->devname, tpmif); tpmif->shmem_ref = shared_page; tpmif->active = 1; diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c index a11140bda0..f294ec1cc9 100644 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c @@ -254,28 +254,6 @@ int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port) EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn); -int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port) -{ - struct evtchn_bind_interdomain bind_interdomain; - int err; - - bind_interdomain.remote_dom = dev->otherend_id; - bind_interdomain.remote_port = remote_port, - - err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, - &bind_interdomain); - if (err) - xenbus_dev_fatal(dev, err, - "binding to event channel %d from domain %d", - remote_port, dev->otherend_id); - else - *port = bind_interdomain.local_port; - - return err; -} -EXPORT_SYMBOL_GPL(xenbus_bind_evtchn); - - int xenbus_free_evtchn(struct xenbus_device *dev, int port) { struct evtchn_close close; diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c index e5af661c87..b758c9f676 100644 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c @@ -196,7 +196,7 @@ int xb_init_comms(void) if (xenbus_irq) unbind_from_irqhandler(xenbus_irq, &xb_waitq); - err = bind_evtchn_to_irqhandler( + err = bind_caller_port_to_irqhandler( xen_store_evtchn, wake_waiting, 0, "xenbus", &xb_waitq); if (err <= 0) { diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/swiotlb.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/swiotlb.h index 74a0c21bbe..bb3d7d2416 100644 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/swiotlb.h +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/swiotlb.h @@ -34,6 +34,8 @@ extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address, extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); extern void swiotlb_init(void); +extern unsigned int dma_bits; + #ifdef CONFIG_SWIOTLB extern int swiotlb; #else diff --git a/linux-2.6-xen-sparse/include/asm-ia64/swiotlb.h b/linux-2.6-xen-sparse/include/asm-ia64/swiotlb.h new file mode 100644 index 0000000000..bb3d7d2416 --- /dev/null +++ b/linux-2.6-xen-sparse/include/asm-ia64/swiotlb.h @@ -0,0 +1,45 @@ +#ifndef _ASM_SWIOTLB_H +#define _ASM_SWIOTLB_H 1 + +#include <linux/config.h> + +/* SWIOTLB interface */ + +extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, + int dir); +extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir); +extern void swiotlb_sync_single_for_cpu(struct device *hwdev, + dma_addr_t dev_addr, + size_t size, int dir); +extern void swiotlb_sync_single_for_device(struct device *hwdev, + dma_addr_t dev_addr, + size_t size, int dir); +extern void swiotlb_sync_sg_for_cpu(struct device *hwdev, + struct scatterlist *sg, int nelems, + int dir); +extern void swiotlb_sync_sg_for_device(struct device *hwdev, + struct scatterlist *sg, int nelems, + int dir); +extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, + int nents, int direction); +extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, + int nents, int direction); +extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr); +extern dma_addr_t swiotlb_map_page(struct device *hwdev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction); +extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address, + size_t size, enum dma_data_direction direction); +extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); +extern void swiotlb_init(void); + +extern unsigned int dma_bits; + +#ifdef CONFIG_SWIOTLB +extern int swiotlb; +#else +#define swiotlb 0 +#endif + +#endif diff --git a/linux-2.6-xen-sparse/include/xen/evtchn.h b/linux-2.6-xen-sparse/include/xen/evtchn.h index 4f111ae19b..a579cdf100 100644 --- a/linux-2.6-xen-sparse/include/xen/evtchn.h +++ b/linux-2.6-xen-sparse/include/xen/evtchn.h @@ -52,22 +52,34 @@ * The IRQ argument passed to the callback handler is the same as returned * from the bind call. It may not correspond to a Linux IRQ number. * Returns IRQ or negative errno. - * UNBIND: Takes IRQ to unbind from; automatically closes the event channel. */ -extern int bind_evtchn_to_irqhandler( - unsigned int evtchn, +int bind_caller_port_to_irqhandler( + unsigned int caller_port, irqreturn_t (*handler)(int, void *, struct pt_regs *), unsigned long irqflags, const char *devname, void *dev_id); -extern int bind_virq_to_irqhandler( +int bind_listening_port_to_irqhandler( + unsigned int remote_domain, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char *devname, + void *dev_id); +int bind_interdomain_evtchn_to_irqhandler( + unsigned int remote_domain, + unsigned int remote_port, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char *devname, + void *dev_id); +int bind_virq_to_irqhandler( unsigned int virq, unsigned int cpu, irqreturn_t (*handler)(int, void *, struct pt_regs *), unsigned long irqflags, const char *devname, void *dev_id); -extern int bind_ipi_to_irqhandler( +int bind_ipi_to_irqhandler( unsigned int ipi, unsigned int cpu, irqreturn_t (*handler)(int, void *, struct pt_regs *), @@ -77,21 +89,21 @@ extern int bind_ipi_to_irqhandler( /* * Common unbind function for all event sources. Takes IRQ to unbind from. - * Automatically closes the underlying event channel (even for bindings - * made with bind_evtchn_to_irqhandler()). + * Automatically closes the underlying event channel (except for bindings + * made with bind_caller_port_to_irqhandler()). */ -extern void unbind_from_irqhandler(unsigned int irq, void *dev_id); +void unbind_from_irqhandler(unsigned int irq, void *dev_id); -extern void irq_resume(void); +void irq_resume(void); /* Entry point for notifications into Linux subsystems. */ asmlinkage void evtchn_do_upcall(struct pt_regs *regs); /* Entry point for notifications into the userland character device. */ -extern void evtchn_device_upcall(int port); +void evtchn_device_upcall(int port); -extern void mask_evtchn(int port); -extern void unmask_evtchn(int port); +void mask_evtchn(int port); +void unmask_evtchn(int port); static inline void clear_evtchn(int port) { @@ -106,9 +118,10 @@ static inline void notify_remote_via_evtchn(int port) } /* - * Unlike notify_remote_via_evtchn(), this is safe to use across - * save/restore. Notifications on a broken connection are silently dropped. + * Use these to access the event channel underlying the IRQ handle returned + * by bind_*_to_irqhandler(). */ -extern void notify_remote_via_irq(int irq); +void notify_remote_via_irq(int irq); +int irq_to_evtchn_port(int irq); #endif /* __ASM_EVTCHN_H__ */ diff --git a/linux-2.6-xen-sparse/include/xen/xenbus.h b/linux-2.6-xen-sparse/include/xen/xenbus.h index c7cb7eaa3a..3f525fa2dd 100644 --- a/linux-2.6-xen-sparse/include/xen/xenbus.h +++ b/linux-2.6-xen-sparse/include/xen/xenbus.h @@ -262,14 +262,6 @@ int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port); /** - * Bind to an existing interdomain event channel in another domain. Returns 0 - * on success and stores the local port in *port. On error, returns -errno, - * switches the device to XenbusStateClosing, and saves the error in XenStore. - */ -int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port); - - -/** * Free an existing event channel. Returns 0 on success or -errno on error. */ int xenbus_free_evtchn(struct xenbus_device *dev, int port); diff --git a/linux-2.6-xen-sparse/kernel/kexec.c b/linux-2.6-xen-sparse/kernel/kexec.c index 9a1dbb6ae3..bb19157362 100644 --- a/linux-2.6-xen-sparse/kernel/kexec.c +++ b/linux-2.6-xen-sparse/kernel/kexec.c @@ -1012,9 +1012,11 @@ asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, goto out; } #ifdef CONFIG_XEN - result = xen_machine_kexec_load(image); - if (result) - goto out; + if (image) { + result = xen_machine_kexec_load(image); + if (result) + goto out; + } #endif /* Install the new kernel, and Uninstall the old */ image = xchg(dest_image, image); diff --git a/linux-2.6-xen-sparse/net/core/dev.c b/linux-2.6-xen-sparse/net/core/dev.c index ef92b788f0..3065f9242d 100644 --- a/linux-2.6-xen-sparse/net/core/dev.c +++ b/linux-2.6-xen-sparse/net/core/dev.c @@ -1248,14 +1248,13 @@ static int dev_gso_segment(struct sk_buff *skb) /* Verifying header integrity only. */ if (!segs) return 0; - + if (unlikely(IS_ERR(segs))) return PTR_ERR(segs); skb->next = segs; DEV_GSO_CB(skb)->destructor = skb->destructor; skb->destructor = dev_gso_skb_destructor; - return 0; } diff --git a/patches/linux-2.6.16.33/vsnprintf.patch b/patches/linux-2.6.16.33/vsnprintf.patch index 69a93fa5c5..3e32773986 100644 --- a/patches/linux-2.6.16.33/vsnprintf.patch +++ b/patches/linux-2.6.16.33/vsnprintf.patch @@ -203,7 +203,7 @@ index b07db5c..f595947 100644 + if (str < end) + *str = '\0'; + else -+ *end = '\0'; ++ end[-1] = '\0'; + } + /* the trailing null byte doesn't count towards the total */ return str-buf; diff --git a/tools/Rules.mk b/tools/Rules.mk index f4524eea7c..5dfa98017f 100644 --- a/tools/Rules.mk +++ b/tools/Rules.mk @@ -50,5 +50,7 @@ mk-symlinks-xen: ( cd xen/hvm && ln -sf ../../$(XEN_ROOT)/xen/include/public/hvm/*.h . ) mkdir -p xen/io ( cd xen/io && ln -sf ../../$(XEN_ROOT)/xen/include/public/io/*.h . ) + mkdir -p xen/arch-x86 + ( cd xen/arch-x86 && ln -sf ../../$(XEN_ROOT)/xen/include/public/arch-x86/*.h . ) mk-symlinks: mk-symlinks-xen mk-symlinks-$(XEN_OS) diff --git a/tools/blktap/drivers/block-qcow.c b/tools/blktap/drivers/block-qcow.c index 48814479d6..ff8e9a0a4a 100644 --- a/tools/blktap/drivers/block-qcow.c +++ b/tools/blktap/drivers/block-qcow.c @@ -47,6 +47,11 @@ #define ASSERT(_p) ((void)0) #endif +#define ROUNDUP(l, s) \ +({ \ + (uint64_t)( \ + (l + (s - 1)) - ((l + (s - 1)) % s)); \ +}) /******AIO DEFINES******/ #define REQUEST_ASYNC_FD 1 @@ -76,9 +81,9 @@ struct pending_aio { #define QCOW_CRYPT_NONE 0x00 #define QCOW_CRYPT_AES 0x01 -#define QCOW_SPARSE_FILE 0x02 #define QCOW_OFLAG_COMPRESSED (1LL << 63) +#define SPARSE_FILE 0x01 #ifndef O_BINARY #define O_BINARY 0 @@ -418,8 +423,9 @@ static void encrypt_sectors(struct tdqcow_state *s, int64_t sector_num, static int qtruncate(int fd, off_t length, int sparse) { - int current, ret, i; - int sectors = length/DEFAULT_SECTOR_SIZE; + int ret, i; + int current = 0, rem = 0; + int sectors = (length + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE; struct stat st; char buf[DEFAULT_SECTOR_SIZE]; @@ -429,22 +435,45 @@ static int qtruncate(int fd, off_t length, int sparse) */ memset(buf, 0x00, DEFAULT_SECTOR_SIZE); ret = fstat(fd, &st); - if((ret == -1) || S_ISBLK(st.st_mode)) + if (ret == -1) return -1; + if (S_ISBLK(st.st_mode)) + return 0; - if(st.st_size < length) { + current = (st.st_size + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE; + rem = st.st_size % DEFAULT_SECTOR_SIZE; + + /* If we are extending this file, we write zeros to the end -- + * this tries to ensure that the extents allocated wind up being + * contiguous on disk. + */ + if(st.st_size < sectors * DEFAULT_SECTOR_SIZE) { /*We are extending the file*/ - lseek(fd, 0, SEEK_END); - for (i = 0; i < sectors; i++ ) { + if (lseek(fd, 0, SEEK_END)==-1) { + fprintf(stderr, + "Lseek EOF failed (%d), internal error\n", + errno); + return -1; + } + if (rem) { + ret = write(fd, buf, rem); + if (ret != rem) + return -1; + } + for (i = current; i < sectors; i++ ) { ret = write(fd, buf, DEFAULT_SECTOR_SIZE); if (ret != DEFAULT_SECTOR_SIZE) return -1; } - } else if(sparse && (st.st_size > length)) - ftruncate(fd, length); - - return 1; + } else if(sparse && (st.st_size > sectors * DEFAULT_SECTOR_SIZE)) + if (ftruncate(fd, sectors * DEFAULT_SECTOR_SIZE)==-1) { + fprintf(stderr, + "Ftruncate failed (%d), internal error\n", + errno); + return -1; + } + return 0; } @@ -497,7 +526,12 @@ static uint64_t get_cluster_offset(struct td_state *bs, /*Truncate file for L2 table *(initialised to zero in case we crash)*/ - qtruncate(s->fd, l2_offset + (s->l2_size * sizeof(uint64_t)), s->sparse); + if (qtruncate(s->fd, + l2_offset + (s->l2_size * sizeof(uint64_t)), + s->sparse) != 0) { + DPRINTF("ERROR truncating file\n"); + return 0; + } s->fd_end = l2_offset + (s->l2_size * sizeof(uint64_t)); /*Update the L1 table entry on disk @@ -564,8 +598,12 @@ cache_miss: (s->l2_size * sizeof(uint64_t)); cluster_offset = (cluster_offset + s->cluster_size - 1) & ~(s->cluster_size - 1); - qtruncate(s->fd, cluster_offset + - (s->cluster_size * s->l2_size), s->sparse); + if (qtruncate(s->fd, cluster_offset + + (s->cluster_size * s->l2_size), + s->sparse) != 0) { + DPRINTF("ERROR truncating file\n"); + return 0; + } s->fd_end = cluster_offset + (s->cluster_size * s->l2_size); for (i = 0; i < s->l2_size; i++) { @@ -623,8 +661,11 @@ found: cluster_offset = (cluster_offset + s->cluster_size - 1) & ~(s->cluster_size - 1); - qtruncate(s->fd, cluster_offset + - s->cluster_size, s->sparse); + if (qtruncate(s->fd, cluster_offset + + s->cluster_size, s->sparse)!=0) { + DPRINTF("ERROR truncating file\n"); + return 0; + } s->fd_end = (cluster_offset + s->cluster_size); /* if encrypted, we must initialize the cluster content which won't be written */ @@ -909,15 +950,14 @@ int tdqcow_open (struct td_state *bs, const char *name) /*Finally check the L1 table cksum*/ be32_to_cpus(&exthdr->cksum); - cksum = gen_cksum((char *)s->l1_table, s->l1_size * sizeof(uint64_t)); - if(exthdr->cksum != cksum) { + cksum = gen_cksum((char *)s->l1_table, + s->l1_size * sizeof(uint64_t)); + if(exthdr->cksum != cksum) goto end_xenhdr; - } be32_to_cpus(&exthdr->min_cluster_alloc); be32_to_cpus(&exthdr->flags); - if (exthdr->flags & QCOW_SPARSE_FILE) - s->sparse = 1; + s->sparse = (exthdr->flags & SPARSE_FILE); s->min_cluster_alloc = exthdr->min_cluster_alloc; } @@ -1210,10 +1250,10 @@ int tdqcow_do_callbacks(struct td_state *s, int sid) } int qcow_create(const char *filename, uint64_t total_size, - const char *backing_file, int flags) + const char *backing_file, int sparse) { int fd, header_size, backing_filename_len, l1_size, i; - int shift, length, adjust, ret = 0; + int shift, length, adjust, flags = 0, ret = 0; QCowHeader header; QCowHeader_ext exthdr; char backing_filename[1024], *ptr; @@ -1305,41 +1345,41 @@ int qcow_create(const char *filename, uint64_t total_size, DPRINTF("L1 Table offset: %d, size %d\n", header_size, (int)(l1_size * sizeof(uint64_t))); - if (flags & QCOW_CRYPT_AES) { - header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); - } else { - header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); - } + header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); ptr = calloc(1, l1_size * sizeof(uint64_t)); exthdr.cksum = cpu_to_be32(gen_cksum(ptr, l1_size * sizeof(uint64_t))); printf("Created cksum: %d\n",exthdr.cksum); free(ptr); - /*adjust file length to 4 KByte boundary*/ - length = header_size + l1_size * sizeof(uint64_t); - if (length % 4096 > 0) { - length = ((length >> 12) + 1) << 12; - qtruncate(fd, length, 0); - DPRINTF("Adjusted filelength to %d for 4 " - "Kbyte alignment\n",length); + /*adjust file length to system page size boundary*/ + length = ROUNDUP(header_size + (l1_size * sizeof(uint64_t)), + getpagesize()); + if (qtruncate(fd, length, 0)!=0) { + DPRINTF("ERROR truncating file\n"); + return -1; } - if (!(flags & QCOW_SPARSE_FILE)) { - /*Filesize is length + l1_size * (1 << s->l2_bits) + (size*512)*/ + if (sparse == 0) { + /*Filesize is length+l1_size*(1 << s->l2_bits)+(size*512)*/ total_length = length + (l1_size * (1 << 9)) + (size * 512); - qtruncate(fd, total_length, 0); + if (qtruncate(fd, total_length, 0)!=0) { + DPRINTF("ERROR truncating file\n"); + return -1; + } printf("File truncated to length %"PRIu64"\n",total_length); - } + } else + flags = SPARSE_FILE; + exthdr.flags = cpu_to_be32(flags); /* write all the data */ lseek(fd, 0, SEEK_SET); ret += write(fd, &header, sizeof(header)); ret += write(fd, &exthdr, sizeof(exthdr)); - if (backing_file) { + if (backing_file) ret += write(fd, backing_filename, backing_filename_len); - } + lseek(fd, header_size, SEEK_SET); tmp = 0; for (i = 0;i < l1_size; i++) { @@ -1360,7 +1400,10 @@ int qcow_make_empty(struct td_state *bs) lseek(s->fd, s->l1_table_offset, SEEK_SET); if (write(s->fd, s->l1_table, l1_length) < 0) return -1; - qtruncate(s->fd, s->l1_table_offset + l1_length, s->sparse); + if (qtruncate(s->fd, s->l1_table_offset + l1_length, s->sparse)!=0) { + DPRINTF("ERROR truncating file\n"); + return -1; + } memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t)); diff --git a/tools/blktap/drivers/qcow-create.c b/tools/blktap/drivers/qcow-create.c index a9ea2bc257..79492f1bf5 100644 --- a/tools/blktap/drivers/qcow-create.c +++ b/tools/blktap/drivers/qcow-create.c @@ -47,15 +47,13 @@ #define DFPRINTF(_f, _a...) ((void)0) #endif -#define QCOW_NONSPARSE_FILE 0x00 -#define QCOW_SPARSE_FILE 0x02 #define MAX_NAME_LEN 1000 void help(void) { fprintf(stderr, "Qcow-utils: v1.0.0\n"); fprintf(stderr, - "usage: qcow-create [-h help] [-p reserve] <SIZE(MB)> <FILENAME> " + "usage: qcow-create [-h help] [-r reserve] <SIZE(MB)> <FILENAME> " "[<BACKING_FILENAME>]\n"); exit(-1); } @@ -63,12 +61,12 @@ void help(void) int main(int argc, char *argv[]) { int ret = -1, c, backed = 0; - int flags = QCOW_SPARSE_FILE; + int sparse = 1; uint64_t size; char filename[MAX_NAME_LEN], bfilename[MAX_NAME_LEN]; for(;;) { - c = getopt(argc, argv, "hp"); + c = getopt(argc, argv, "hr"); if (c == -1) break; switch(c) { @@ -76,9 +74,12 @@ int main(int argc, char *argv[]) help(); exit(0); break; - case 'p': - flags = QCOW_NONSPARSE_FILE; + case 'r': + sparse = 0; break; + default: + fprintf(stderr, "Unknown option\n"); + help(); } } @@ -96,6 +97,7 @@ int main(int argc, char *argv[]) } if (optind != argc) { + /*Backing file argument*/ backed = 1; if (snprintf(bfilename, MAX_NAME_LEN, "%s",argv[optind++]) >= MAX_NAME_LEN) { @@ -106,12 +108,14 @@ int main(int argc, char *argv[]) DFPRINTF("Creating file size %llu, name %s\n",(long long unsigned)size, filename); if (!backed) - ret = qcow_create(filename,size,NULL,flags); + ret = qcow_create(filename,size,NULL,sparse); else - ret = qcow_create(filename,size,bfilename,flags); + ret = qcow_create(filename,size,bfilename,sparse); - if (ret < 0) DPRINTF("Unable to create QCOW file\n"); - else DPRINTF("QCOW file successfully created\n"); + if (ret < 0) + DPRINTF("Unable to create QCOW file\n"); + else + DPRINTF("QCOW file successfully created\n"); return 0; } diff --git a/tools/examples/Makefile b/tools/examples/Makefile index 14f34135c8..5ef741c55a 100644 --- a/tools/examples/Makefile +++ b/tools/examples/Makefile @@ -9,6 +9,7 @@ XENDOMAINS_SYSCONFIG = init.d/sysconfig.xendomains # Xen configuration dir and configs to go there. XEN_CONFIG_DIR = /etc/xen XEN_CONFIGS = xend-config.sxp +XEN_CONFIGS += xm-config.xml XEN_CONFIGS += xmexample1 XEN_CONFIGS += xmexample2 XEN_CONFIGS += xmexample.hvm diff --git a/tools/examples/blktap b/tools/examples/blktap index bc25b48be2..5a7ee3236b 100644 --- a/tools/examples/blktap +++ b/tools/examples/blktap @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Copyright (c) 2005, XenSource Ltd. diff --git a/tools/examples/block b/tools/examples/block index 8ec73231bf..2b6b991125 100644 --- a/tools/examples/block +++ b/tools/examples/block @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash dir=$(dirname "$0") . "$dir/block-common.sh" diff --git a/tools/examples/block-enbd b/tools/examples/block-enbd index 75c6f257aa..67faa84268 100755 --- a/tools/examples/block-enbd +++ b/tools/examples/block-enbd @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Usage: block-enbd [bind server ctl_port |unbind node] # diff --git a/tools/examples/block-nbd b/tools/examples/block-nbd index 8c5cb17b79..b29b31564a 100644 --- a/tools/examples/block-nbd +++ b/tools/examples/block-nbd @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Usage: block-nbd [bind server ctl_port |unbind node] # diff --git a/tools/examples/external-device-migrate b/tools/examples/external-device-migrate index fa52334231..c7c99afec3 100644 --- a/tools/examples/external-device-migrate +++ b/tools/examples/external-device-migrate @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Copyright (c) 2005 IBM Corporation # diff --git a/tools/examples/network-bridge b/tools/examples/network-bridge index 49b79db95b..b616a72275 100755 --- a/tools/examples/network-bridge +++ b/tools/examples/network-bridge @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash #============================================================================ # Default Xen network start/stop script. # Xend calls a network script when it starts. diff --git a/tools/examples/network-nat b/tools/examples/network-nat index 94b84307c0..62d2b0c259 100644 --- a/tools/examples/network-nat +++ b/tools/examples/network-nat @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash #============================================================================ # Default Xen network start/stop script when using NAT. # Xend calls a network script when it starts. diff --git a/tools/examples/network-route b/tools/examples/network-route index de535c3280..574441e334 100755 --- a/tools/examples/network-route +++ b/tools/examples/network-route @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash #============================================================================ # Default Xen network start/stop script. # Xend calls a network script when it starts. diff --git a/tools/examples/vif-bridge b/tools/examples/vif-bridge index 7008210579..0935ad2fae 100755 --- a/tools/examples/vif-bridge +++ b/tools/examples/vif-bridge @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash #============================================================================ # /etc/xen/vif-bridge # diff --git a/tools/examples/vif-common.sh b/tools/examples/vif-common.sh index 50da9a4494..ee67ee2aaa 100644 --- a/tools/examples/vif-common.sh +++ b/tools/examples/vif-common.sh @@ -64,7 +64,7 @@ then fi -function frob_iptable() +frob_iptable() { if [ "$command" == "online" ] then @@ -89,7 +89,7 @@ If you are using iptables, this may affect networking for guest domains." # to those coming from the specified networks, though we allow DHCP requests # as well. # -function handle_iptable() +handle_iptable() { # Check for a working iptables installation. Checking for the iptables # binary is not sufficient, because the user may not have the appropriate @@ -123,7 +123,7 @@ function handle_iptable() # Print the IP address currently in use at the given interface, or nothing if # the interface is not up. # -function ip_of() +ip_of() { ip addr show "$1" | awk "/^.*inet.*$1\$/{print \$2}" | sed -n '1 s,/.*,,p' } @@ -137,7 +137,7 @@ function ip_of() # to these scripts, or eth0 by default. This function will call fatal if no # such interface could be found. # -function dom0_ip() +dom0_ip() { local nd=${netdev:-eth0} local result=$(ip_of "$nd") diff --git a/tools/examples/vif-nat b/tools/examples/vif-nat index 29611654eb..579d7eba35 100644 --- a/tools/examples/vif-nat +++ b/tools/examples/vif-nat @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash #============================================================================ # /etc/xen/vif-nat # diff --git a/tools/examples/vif-route b/tools/examples/vif-route index 8d0fb8d76c..f5fd88ed5a 100755 --- a/tools/examples/vif-route +++ b/tools/examples/vif-route @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash #============================================================================ # /etc/xen/vif-route # diff --git a/tools/examples/vtpm b/tools/examples/vtpm index cf8b50ff50..38a4532fc2 100644 --- a/tools/examples/vtpm +++ b/tools/examples/vtpm @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash dir=$(dirname "$0") . "$dir/vtpm-hotplug-common.sh" diff --git a/tools/examples/vtpm-delete b/tools/examples/vtpm-delete index 14bfddcd94..b54a093f02 100644 --- a/tools/examples/vtpm-delete +++ b/tools/examples/vtpm-delete @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # This scripts must be called the following way: # vtpm-delete <domain name> diff --git a/tools/examples/xen-backend.agent b/tools/examples/xen-backend.agent index 3a01a2c7ea..f043854ad0 100755 --- a/tools/examples/xen-backend.agent +++ b/tools/examples/xen-backend.agent @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash PATH=/etc/xen/scripts:$PATH diff --git a/tools/examples/xen-hotplug-cleanup b/tools/examples/xen-hotplug-cleanup index 6356a91892..f7337e45bf 100644 --- a/tools/examples/xen-hotplug-cleanup +++ b/tools/examples/xen-hotplug-cleanup @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash dir=$(dirname "$0") . "$dir/xen-hotplug-common.sh" diff --git a/tools/examples/xm-config.xml b/tools/examples/xm-config.xml index dbd8778bae..943b74d297 100644 --- a/tools/examples/xm-config.xml +++ b/tools/examples/xm-config.xml @@ -36,8 +36,10 @@ most useful for experimenting with the Xen-API preview in Xen 3.0.4. The username and password attributes will be used to log in if Xen-API is being used. --> + <!-- <server type='Xen-API' uri='http://localhost:9363/' username='me' password='mypassword' /> + --> </xm> diff --git a/tools/examples/xmexample.hvm b/tools/examples/xmexample.hvm index 143252e65b..33ce3203d3 100644 --- a/tools/examples/xmexample.hvm +++ b/tools/examples/xmexample.hvm @@ -29,7 +29,7 @@ memory = 128 # Shadow pagetable memory for the domain, in MB. # Should be at least 2KB per MB of domain memory, plus a few MB per vcpu. -shadow_memory = 8 +# shadow_memory = 8 # A name for your domain. All domains must have different names. name = "ExampleHVMDomain" @@ -116,7 +116,7 @@ disk = [ 'file:/var/images/min-el3-i386.img,hda,w', ',hdc:cdrom,r' ] device_model = '/usr/' + arch_libdir + '/xen/bin/qemu-dm' #----------------------------------------------------------------------------- -# boot on floppy (a), hard disk (c) or CD-ROM (d) +# boot on floppy (a), hard disk (c), Network (n) or CD-ROM (d) # default: hard disk, cd-rom, floppy #boot="cda" diff --git a/tools/firmware/etherboot/README b/tools/firmware/etherboot/README new file mode 100644 index 0000000000..c520699b5c --- /dev/null +++ b/tools/firmware/etherboot/README @@ -0,0 +1,7 @@ + +This is an Etherboot option ROM for the rtl8139 NIC. It has a few +non-standard settings, just to do with timeouts and when to give up. + +Rom-o-matic.net will provide this image at the following URL: + +http://rom-o-matic.net/5.4.2/build.php?version=5.4.2&F=ignore&nic=rtl8139%3Artl8139+--+%5B0x10ec%2C0x8139%5D&ofmt=Binary+ROM+Image%28.zrom%29&arch=i386&ASK_BOOT=-1&BOOT_FIRST=BOOT_NIC&BOOT_SECOND=BOOT_NOTHING&BOOT_THIRD=BOOT_NOTHING&BOOT_INDEX=0&STATIC_CLIENT_IP=&STATIC_SUBNET_MASK=&STATIC_SERVER_IP=&STATIC_GATEWAY_IP=&STATIC_BOOTFILE=&EXIT_ON_FILE_LOAD_ERROR=on&DHCP_CLIENT_ID=&DHCP_CLIENT_ID_LEN=&DHCP_CLIENT_ID_TYPE=&DHCP_USER_CLASS=&DHCP_USER_CLASS_LEN=&ALLOW_ONLY_ENCAPSULATED=on&DEFAULT_BOOTFILE=&CONGESTED=on&BACKOFF_LIMIT=7&TIMEOUT=180&TRY_FLOPPY_FIRST=0&EXIT_IF_NO_OFFER=on&TAGGED_IMAGE=on&ELF_IMAGE=on&PXE_IMAGE=on&DOWNLOAD_PROTO_TFTP=on&COMCONSOLE=0x3F8&CONSPEED=9600&COMPARM=0x03&PXE_EXPORT=on&CONFIG_PCI=on&CONFIG_ISA=on&BUILD_ID=&PCBIOS=on&A=Get+ROM diff --git a/tools/firmware/etherboot/eb-rtl8139.zrom b/tools/firmware/etherboot/eb-rtl8139.zrom Binary files differnew file mode 100644 index 0000000000..446b55fc57 --- /dev/null +++ b/tools/firmware/etherboot/eb-rtl8139.zrom diff --git a/tools/firmware/hvmloader/Makefile b/tools/firmware/hvmloader/Makefile index 763f472987..44adaa07f9 100644 --- a/tools/firmware/hvmloader/Makefile +++ b/tools/firmware/hvmloader/Makefile @@ -51,11 +51,12 @@ hvmloader: roms.h acpi/acpi.a $(SRCS) acpi/acpi.a: $(MAKE) -C acpi -roms.h: ../rombios/BIOS-bochs-latest ../vgabios/VGABIOS-lgpl-latest.bin ../vgabios/VGABIOS-lgpl-latest.cirrus.bin ../vmxassist/vmxassist.bin +roms.h: ../rombios/BIOS-bochs-latest ../vgabios/VGABIOS-lgpl-latest.bin ../vgabios/VGABIOS-lgpl-latest.cirrus.bin ../vmxassist/vmxassist.bin ../etherboot/eb-rtl8139.zrom sh ./mkhex rombios ../rombios/BIOS-bochs-latest > roms.h sh ./mkhex vgabios_stdvga ../vgabios/VGABIOS-lgpl-latest.bin >> roms.h sh ./mkhex vgabios_cirrusvga ../vgabios/VGABIOS-lgpl-latest.cirrus.bin >> roms.h sh ./mkhex vmxassist ../vmxassist/vmxassist.bin >> roms.h + sh ./mkhex etherboot ../etherboot/eb-rtl8139.zrom >> roms.h .PHONY: clean clean: diff --git a/tools/firmware/hvmloader/acpi/Makefile b/tools/firmware/hvmloader/acpi/Makefile index 16803d78d6..c5a1283a9e 100644 --- a/tools/firmware/hvmloader/acpi/Makefile +++ b/tools/firmware/hvmloader/acpi/Makefile @@ -24,7 +24,7 @@ C_SRC = build.c dsdt.c static_tables.c H_SRC = $(wildcard *.h) OBJS = $(patsubst %.c,%.o,$(C_SRC)) -IASL_VER = acpica-unix-20050513 +IASL_VER = acpica-unix-20060707 IASL_URL = http://developer.intel.com/technology/iapc/acpi/downloads/$(IASL_VER).tar.gz # Disable PIE/SSP if GCC supports them. They can break us. diff --git a/tools/firmware/hvmloader/acpi/acpi2_0.h b/tools/firmware/hvmloader/acpi/acpi2_0.h index 5b9b9f0495..04824f5642 100644 --- a/tools/firmware/hvmloader/acpi/acpi2_0.h +++ b/tools/firmware/hvmloader/acpi/acpi2_0.h @@ -50,17 +50,18 @@ struct acpi_header { uint8_t revision; uint8_t checksum; uint8_t oem_id[6]; - uint64_t oem_table_id; + uint8_t oem_table_id[8]; uint32_t oem_revision; uint32_t creator_id; uint32_t creator_revision; }; -#define ACPI_OEM_ID {'I','N','T','E','L',' '} -#define ACPI_OEM_TABLE_ID ASCII32(' ','T','B','D') -#define ACPI_OEM_REVISION 0x00000002 -#define ACPI_CREATOR_ID 0x00 /* TBD */ -#define ACPI_CREATOR_REVISION 0x00000002 +#define ACPI_OEM_ID "Xen" +#define ACPI_OEM_TABLE_ID "HVM" +#define ACPI_OEM_REVISION 0 + +#define ACPI_CREATOR_ID ASCII32('H','V','M','L') /* HVMLoader */ +#define ACPI_CREATOR_REVISION 0 /* * ACPI 2.0 Generic Address Space definition. @@ -121,7 +122,6 @@ struct acpi_20_rsdt { struct acpi_header header; uint32_t entry[1]; }; -#define ACPI_2_0_RSDT_REVISION 0x01 /* * Extended System Description Table (XSDT). @@ -130,7 +130,6 @@ struct acpi_20_xsdt { struct acpi_header header; uint64_t entry[1]; }; -#define ACPI_2_0_XSDT_REVISION 0x01 /* * TCG Hardware Interface Table (TCPA) @@ -141,8 +140,6 @@ struct acpi_20_tcpa { uint32_t laml; uint64_t lasa; }; - -#define ACPI_2_0_TCPA_REVISION 0x02 #define ACPI_2_0_TCPA_LAML_SIZE (64*1024) /* @@ -202,7 +199,6 @@ struct acpi_20_fadt { struct acpi_20_generic_address x_gpe0_blk; struct acpi_20_generic_address x_gpe1_blk; }; -#define ACPI_2_0_FADT_REVISION 0x03 /* * FADT Boot Architecture Flags. @@ -254,7 +250,19 @@ struct acpi_20_madt { uint32_t flags; }; -#define ACPI_2_0_MADT_REVISION 0x01 + +/* + * HPET Description Table + */ +struct acpi_20_hpet { + struct acpi_header header; + uint32_t timer_block_id; + struct acpi_20_generic_address addr; + uint8_t hpet_number; + uint16_t min_tick; + uint8_t page_protect; +}; +#define ACPI_HPET_ADDRESS 0xFED00000UL /* * Multiple APIC Flags. @@ -325,6 +333,18 @@ struct acpi_20_madt_intsrcovr { #define ACPI_2_0_RSDT_SIGNATURE ASCII32('R','S','D','T') #define ACPI_2_0_XSDT_SIGNATURE ASCII32('X','S','D','T') #define ACPI_2_0_TCPA_SIGNATURE ASCII32('T','C','P','A') +#define ACPI_2_0_HPET_SIGNATURE ASCII32('H','P','E','T') + +/* + * Table revision numbers. + */ +#define ACPI_2_0_RSDP_REVISION 0x02 +#define ACPI_2_0_FADT_REVISION 0x04 +#define ACPI_2_0_MADT_REVISION 0x02 +#define ACPI_2_0_RSDT_REVISION 0x01 +#define ACPI_2_0_XSDT_REVISION 0x01 +#define ACPI_2_0_TCPA_REVISION 0x02 +#define ACPI_2_0_HPET_REVISION 0x01 #pragma pack () diff --git a/tools/firmware/hvmloader/acpi/build.c b/tools/firmware/hvmloader/acpi/build.c index b221391807..3890bc3166 100644 --- a/tools/firmware/hvmloader/acpi/build.c +++ b/tools/firmware/hvmloader/acpi/build.c @@ -57,8 +57,8 @@ int construct_madt(struct acpi_20_madt *madt) memset(madt, 0, sizeof(*madt)); madt->header.signature = ACPI_2_0_MADT_SIGNATURE; madt->header.revision = ACPI_2_0_MADT_REVISION; - strncpy(madt->header.oem_id, "INTEL ", 6); - madt->header.oem_table_id = ACPI_OEM_TABLE_ID; + strncpy(madt->header.oem_id, ACPI_OEM_ID, 6); + strncpy(madt->header.oem_table_id, ACPI_OEM_TABLE_ID, 8); madt->header.oem_revision = ACPI_OEM_REVISION; madt->header.creator_id = ACPI_CREATOR_ID; madt->header.creator_revision = ACPI_CREATOR_REVISION; @@ -69,16 +69,28 @@ int construct_madt(struct acpi_20_madt *madt) intsrcovr = (struct acpi_20_madt_intsrcovr *)(madt + 1); for ( i = 0; i < 16; i++ ) { - if ( !(PCI_ISA_IRQ_MASK & (1U << i)) ) - continue; - - /* PCI: active-low level-triggered */ memset(intsrcovr, 0, sizeof(*intsrcovr)); intsrcovr->type = ACPI_INTERRUPT_SOURCE_OVERRIDE; intsrcovr->length = sizeof(*intsrcovr); intsrcovr->source = i; - intsrcovr->gsi = i; - intsrcovr->flags = 0xf; + + if ( i == 0 ) + { + /* ISA IRQ0 routed to IOAPIC GSI 2. */ + intsrcovr->gsi = 2; + intsrcovr->flags = 0x0; + } + else if ( PCI_ISA_IRQ_MASK & (1U << i) ) + { + /* PCI: active-low level-triggered. */ + intsrcovr->gsi = i; + intsrcovr->flags = 0xf; + } + else + { + /* No need for a INT source override structure. */ + continue; + } offset += sizeof(*intsrcovr); intsrcovr++; @@ -98,7 +110,7 @@ int construct_madt(struct acpi_20_madt *madt) memset(lapic, 0, sizeof(*lapic)); lapic->type = ACPI_PROCESSOR_LOCAL_APIC; lapic->length = sizeof(*lapic); - lapic->acpi_processor_id = lapic->apic_id = i + 1; + lapic->acpi_processor_id = lapic->apic_id = LAPIC_ID(i); lapic->flags = ACPI_LOCAL_APIC_ENABLED; offset += sizeof(*lapic); lapic++; @@ -110,10 +122,33 @@ int construct_madt(struct acpi_20_madt *madt) return align16(offset); } +int construct_hpet(struct acpi_20_hpet *hpet) +{ + int offset; + + memset(hpet, 0, sizeof(*hpet)); + hpet->header.signature = ACPI_2_0_HPET_SIGNATURE; + hpet->header.revision = ACPI_2_0_HPET_REVISION; + strncpy(hpet->header.oem_id, ACPI_OEM_ID, 6); + strncpy(hpet->header.oem_table_id, ACPI_OEM_TABLE_ID, 8); + hpet->header.oem_revision = ACPI_OEM_REVISION; + hpet->header.creator_id = ACPI_CREATOR_ID; + hpet->header.creator_revision = ACPI_CREATOR_REVISION; + hpet->timer_block_id = 0x8086a201; + hpet->addr.address = ACPI_HPET_ADDRESS; + offset = sizeof(*hpet); + + hpet->header.length = offset; + set_checksum(hpet, offsetof(struct acpi_header, checksum), offset); + + return offset; +} + int construct_secondary_tables(uint8_t *buf, unsigned long *table_ptrs) { int offset = 0, nr_tables = 0; struct acpi_20_madt *madt; + struct acpi_20_hpet *hpet; struct acpi_20_tcpa *tcpa; static const uint16_t tis_signature[] = {0x0001, 0x0001, 0x0001}; uint16_t *tis_hdr; @@ -126,6 +161,11 @@ int construct_secondary_tables(uint8_t *buf, unsigned long *table_ptrs) table_ptrs[nr_tables++] = (unsigned long)madt; } + /* HPET. */ + hpet = (struct acpi_20_hpet *)&buf[offset]; + offset += construct_hpet(hpet); + table_ptrs[nr_tables++] = (unsigned long)hpet; + /* TPM TCPA and SSDT. */ tis_hdr = (uint16_t *)0xFED40F00; if ( (tis_hdr[0] == tis_signature[0]) && @@ -144,12 +184,11 @@ int construct_secondary_tables(uint8_t *buf, unsigned long *table_ptrs) tcpa->header.signature = ACPI_2_0_TCPA_SIGNATURE; tcpa->header.length = sizeof(*tcpa); tcpa->header.revision = ACPI_2_0_TCPA_REVISION; - strncpy(tcpa->header.oem_id, "IBM ", 6); - tcpa->header.oem_table_id = ASCII64(' ', ' ', ' ', ' ', - ' ', 'x', 'e', 'n'); - tcpa->header.oem_revision = 1; - tcpa->header.creator_id = ASCII32('I', 'B', 'M', ' '); - tcpa->header.creator_revision = 1; + strncpy(tcpa->header.oem_id, ACPI_OEM_ID, 6); + strncpy(tcpa->header.oem_table_id, ACPI_OEM_TABLE_ID, 8); + tcpa->header.oem_revision = ACPI_OEM_REVISION; + tcpa->header.creator_id = ACPI_CREATOR_ID; + tcpa->header.creator_revision = ACPI_CREATOR_REVISION; tcpa->lasa = e820_malloc( ACPI_2_0_TCPA_LAML_SIZE, E820_RESERVED, (uint32_t)~0); if ( tcpa->lasa ) diff --git a/tools/firmware/hvmloader/acpi/dsdt.asl b/tools/firmware/hvmloader/acpi/dsdt.asl index f369c64045..d47ea8d0c4 100644 --- a/tools/firmware/hvmloader/acpi/dsdt.asl +++ b/tools/firmware/hvmloader/acpi/dsdt.asl @@ -17,7 +17,7 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. */ -DefinitionBlock ("DSDT.aml", "DSDT", 1, "INTEL","int-xen", 2006) +DefinitionBlock ("DSDT.aml", "DSDT", 2, "Xen", "HVM", 0) { Name (\PMBS, 0x0C00) Name (\PMLN, 0x08) @@ -280,6 +280,22 @@ DefinitionBlock ("DSDT.aml", "DSDT", 1, "INTEL","int-xen", 2006) } } + Device(HPET) { + Name(_HID, EISAID("PNP0103")) + Name(_UID, 0) + Name(_CRS, ResourceTemplate() { + DWordMemory( + ResourceConsumer, PosDecode, MinFixed, MaxFixed, + NonCacheable, ReadWrite, + 0x00000000, + 0xFED00000, + 0xFED003FF, + 0x00000000, + 0x00000400 /* 1K memory: FED00000 - FED003FF */ + ) + }) + } + Method(_PRT,0) { If(PICD) { Return(PRTA) diff --git a/tools/firmware/hvmloader/acpi/dsdt.c b/tools/firmware/hvmloader/acpi/dsdt.c index 6eb777cd05..698bd72848 100644 --- a/tools/firmware/hvmloader/acpi/dsdt.c +++ b/tools/firmware/hvmloader/acpi/dsdt.c @@ -1,22 +1,22 @@ /* * * Intel ACPI Component Architecture - * ASL Optimizing Compiler / AML Disassembler version 20050513 [Nov 16 2006] - * Copyright (C) 2000 - 2005 Intel Corporation - * Supports ACPI Specification Revision 3.0 + * ASL Optimizing Compiler version 20060707 [Dec 30 2006] + * Copyright (C) 2000 - 2006 Intel Corporation + * Supports ACPI Specification Revision 3.0a * - * Compilation of "dsdt.asl" - Wed Nov 22 18:26:19 2006 + * Compilation of "dsdt.asl" - Sat Dec 30 15:31:23 2006 * * C source code output * */ -unsigned char AmlCode[] = +unsigned char AmlCode[] = { - 0x44,0x53,0x44,0x54,0x9D,0x0D,0x00,0x00, /* 00000000 "DSDT...." */ - 0x01,0x83,0x49,0x4E,0x54,0x45,0x4C,0x00, /* 00000008 "..INTEL." */ - 0x69,0x6E,0x74,0x2D,0x78,0x65,0x6E,0x00, /* 00000010 "int-xen." */ - 0xD6,0x07,0x00,0x00,0x49,0x4E,0x54,0x4C, /* 00000018 "....INTL" */ - 0x13,0x05,0x05,0x20,0x08,0x50,0x4D,0x42, /* 00000020 "... .PMB" */ + 0x44,0x53,0x44,0x54,0xD9,0x0D,0x00,0x00, /* 00000000 "DSDT...." */ + 0x02,0xFB,0x58,0x65,0x6E,0x00,0x00,0x00, /* 00000008 "..Xen..." */ + 0x48,0x56,0x4D,0x00,0x00,0x00,0x00,0x00, /* 00000010 "HVM....." */ + 0x00,0x00,0x00,0x00,0x49,0x4E,0x54,0x4C, /* 00000018 "....INTL" */ + 0x07,0x07,0x06,0x20,0x08,0x50,0x4D,0x42, /* 00000020 "... .PMB" */ 0x53,0x0B,0x00,0x0C,0x08,0x50,0x4D,0x4C, /* 00000028 "S....PML" */ 0x4E,0x0A,0x08,0x08,0x49,0x4F,0x42,0x31, /* 00000030 "N...IOB1" */ 0x00,0x08,0x49,0x4F,0x4C,0x31,0x00,0x08, /* 00000038 "..IOL1.." */ @@ -34,7 +34,7 @@ unsigned char AmlCode[] = 0x12,0x08,0x04,0x0A,0x07,0x0A,0x07,0x00, /* 00000098 "........" */ 0x00,0x08,0x50,0x49,0x43,0x44,0x00,0x14, /* 000000A0 "..PICD.." */ 0x0C,0x5F,0x50,0x49,0x43,0x01,0x70,0x68, /* 000000A8 "._PIC.ph" */ - 0x50,0x49,0x43,0x44,0x10,0x48,0xCE,0x5F, /* 000000B0 "PICD.H._" */ + 0x50,0x49,0x43,0x44,0x10,0x44,0xD2,0x5F, /* 000000B0 "PICD.D._" */ 0x53,0x42,0x5F,0x5B,0x82,0x49,0x04,0x4D, /* 000000B8 "SB_[.I.M" */ 0x45,0x4D,0x30,0x08,0x5F,0x48,0x49,0x44, /* 000000C0 "EM0._HID" */ 0x0C,0x41,0xD0,0x0C,0x02,0x08,0x5F,0x43, /* 000000C8 ".A...._C" */ @@ -45,7 +45,7 @@ unsigned char AmlCode[] = 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 000000F0 "........" */ 0x00,0x00,0x00,0x00,0x00,0x00,0x0A,0x00, /* 000000F8 "........" */ 0x00,0x00,0x00,0x00,0x79,0x00,0x5B,0x82, /* 00000100 "....y.[." */ - 0x45,0xC9,0x50,0x43,0x49,0x30,0x08,0x5F, /* 00000108 "E.PCI0._" */ + 0x41,0xCD,0x50,0x43,0x49,0x30,0x08,0x5F, /* 00000108 "A.PCI0._" */ 0x48,0x49,0x44,0x0C,0x41,0xD0,0x0A,0x03, /* 00000110 "HID.A..." */ 0x08,0x5F,0x55,0x49,0x44,0x00,0x08,0x5F, /* 00000118 "._UID.._" */ 0x41,0x44,0x52,0x00,0x08,0x5F,0x42,0x42, /* 00000120 "ADR.._BB" */ @@ -140,313 +140,321 @@ unsigned char AmlCode[] = 0x5F,0x53,0x52,0x53,0x01,0x8B,0x68,0x01, /* 000003E8 "_SRS..h." */ 0x49,0x52,0x51,0x31,0x82,0x49,0x52,0x51, /* 000003F0 "IRQ1.IRQ" */ 0x31,0x60,0x76,0x60,0x70,0x60,0x50,0x49, /* 000003F8 "1`v`p`PI" */ - 0x52,0x44,0x14,0x16,0x5F,0x50,0x52,0x54, /* 00000400 "RD.._PRT" */ - 0x00,0xA0,0x0A,0x50,0x49,0x43,0x44,0xA4, /* 00000408 "...PICD." */ - 0x50,0x52,0x54,0x41,0xA4,0x50,0x52,0x54, /* 00000410 "PRTA.PRT" */ - 0x50,0x08,0x50,0x52,0x54,0x50,0x12,0x49, /* 00000418 "P.PRTP.I" */ - 0x36,0x3C,0x12,0x0D,0x04,0x0C,0xFF,0xFF, /* 00000420 "6<......" */ - 0x01,0x00,0x00,0x4C,0x4E,0x4B,0x42,0x00, /* 00000428 "...LNKB." */ - 0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x01,0x00, /* 00000430 "........" */ - 0x01,0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0E, /* 00000438 ".LNKC..." */ - 0x04,0x0C,0xFF,0xFF,0x01,0x00,0x0A,0x02, /* 00000440 "........" */ - 0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0E,0x04, /* 00000448 "LNKD...." */ - 0x0C,0xFF,0xFF,0x01,0x00,0x0A,0x03,0x4C, /* 00000450 ".......L" */ - 0x4E,0x4B,0x41,0x00,0x12,0x0D,0x04,0x0C, /* 00000458 "NKA....." */ - 0xFF,0xFF,0x02,0x00,0x00,0x4C,0x4E,0x4B, /* 00000460 ".....LNK" */ - 0x43,0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF, /* 00000468 "C......." */ - 0x02,0x00,0x01,0x4C,0x4E,0x4B,0x44,0x00, /* 00000470 "...LNKD." */ - 0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x02,0x00, /* 00000478 "........" */ - 0x0A,0x02,0x4C,0x4E,0x4B,0x41,0x00,0x12, /* 00000480 "..LNKA.." */ - 0x0E,0x04,0x0C,0xFF,0xFF,0x02,0x00,0x0A, /* 00000488 "........" */ - 0x03,0x4C,0x4E,0x4B,0x42,0x00,0x12,0x0D, /* 00000490 ".LNKB..." */ - 0x04,0x0C,0xFF,0xFF,0x03,0x00,0x00,0x4C, /* 00000498 ".......L" */ - 0x4E,0x4B,0x44,0x00,0x12,0x0D,0x04,0x0C, /* 000004A0 "NKD....." */ - 0xFF,0xFF,0x03,0x00,0x01,0x4C,0x4E,0x4B, /* 000004A8 ".....LNK" */ - 0x41,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 000004B0 "A......." */ - 0x03,0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x42, /* 000004B8 "....LNKB" */ - 0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x03, /* 000004C0 "........" */ - 0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x43,0x00, /* 000004C8 "...LNKC." */ - 0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x04,0x00, /* 000004D0 "........" */ - 0x00,0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0D, /* 000004D8 ".LNKA..." */ - 0x04,0x0C,0xFF,0xFF,0x04,0x00,0x01,0x4C, /* 000004E0 ".......L" */ - 0x4E,0x4B,0x42,0x00,0x12,0x0E,0x04,0x0C, /* 000004E8 "NKB....." */ - 0xFF,0xFF,0x04,0x00,0x0A,0x02,0x4C,0x4E, /* 000004F0 "......LN" */ - 0x4B,0x43,0x00,0x12,0x0E,0x04,0x0C,0xFF, /* 000004F8 "KC......" */ - 0xFF,0x04,0x00,0x0A,0x03,0x4C,0x4E,0x4B, /* 00000500 ".....LNK" */ - 0x44,0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF, /* 00000508 "D......." */ - 0x05,0x00,0x00,0x4C,0x4E,0x4B,0x42,0x00, /* 00000510 "...LNKB." */ - 0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x05,0x00, /* 00000518 "........" */ - 0x01,0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0E, /* 00000520 ".LNKC..." */ - 0x04,0x0C,0xFF,0xFF,0x05,0x00,0x0A,0x02, /* 00000528 "........" */ - 0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0E,0x04, /* 00000530 "LNKD...." */ - 0x0C,0xFF,0xFF,0x05,0x00,0x0A,0x03,0x4C, /* 00000538 ".......L" */ - 0x4E,0x4B,0x41,0x00,0x12,0x0D,0x04,0x0C, /* 00000540 "NKA....." */ - 0xFF,0xFF,0x06,0x00,0x00,0x4C,0x4E,0x4B, /* 00000548 ".....LNK" */ - 0x43,0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF, /* 00000550 "C......." */ - 0x06,0x00,0x01,0x4C,0x4E,0x4B,0x44,0x00, /* 00000558 "...LNKD." */ - 0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x06,0x00, /* 00000560 "........" */ - 0x0A,0x02,0x4C,0x4E,0x4B,0x41,0x00,0x12, /* 00000568 "..LNKA.." */ - 0x0E,0x04,0x0C,0xFF,0xFF,0x06,0x00,0x0A, /* 00000570 "........" */ - 0x03,0x4C,0x4E,0x4B,0x42,0x00,0x12,0x0D, /* 00000578 ".LNKB..." */ - 0x04,0x0C,0xFF,0xFF,0x07,0x00,0x00,0x4C, /* 00000580 ".......L" */ - 0x4E,0x4B,0x44,0x00,0x12,0x0D,0x04,0x0C, /* 00000588 "NKD....." */ - 0xFF,0xFF,0x07,0x00,0x01,0x4C,0x4E,0x4B, /* 00000590 ".....LNK" */ - 0x41,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 00000598 "A......." */ - 0x07,0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x42, /* 000005A0 "....LNKB" */ - 0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x07, /* 000005A8 "........" */ - 0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x43,0x00, /* 000005B0 "...LNKC." */ - 0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x08,0x00, /* 000005B8 "........" */ - 0x00,0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0D, /* 000005C0 ".LNKA..." */ - 0x04,0x0C,0xFF,0xFF,0x08,0x00,0x01,0x4C, /* 000005C8 ".......L" */ - 0x4E,0x4B,0x42,0x00,0x12,0x0E,0x04,0x0C, /* 000005D0 "NKB....." */ - 0xFF,0xFF,0x08,0x00,0x0A,0x02,0x4C,0x4E, /* 000005D8 "......LN" */ - 0x4B,0x43,0x00,0x12,0x0E,0x04,0x0C,0xFF, /* 000005E0 "KC......" */ - 0xFF,0x08,0x00,0x0A,0x03,0x4C,0x4E,0x4B, /* 000005E8 ".....LNK" */ - 0x44,0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF, /* 000005F0 "D......." */ - 0x09,0x00,0x00,0x4C,0x4E,0x4B,0x42,0x00, /* 000005F8 "...LNKB." */ - 0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x09,0x00, /* 00000600 "........" */ - 0x01,0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0E, /* 00000608 ".LNKC..." */ - 0x04,0x0C,0xFF,0xFF,0x09,0x00,0x0A,0x02, /* 00000610 "........" */ - 0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0E,0x04, /* 00000618 "LNKD...." */ - 0x0C,0xFF,0xFF,0x09,0x00,0x0A,0x03,0x4C, /* 00000620 ".......L" */ - 0x4E,0x4B,0x41,0x00,0x12,0x0D,0x04,0x0C, /* 00000628 "NKA....." */ - 0xFF,0xFF,0x0A,0x00,0x00,0x4C,0x4E,0x4B, /* 00000630 ".....LNK" */ - 0x43,0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF, /* 00000638 "C......." */ - 0x0A,0x00,0x01,0x4C,0x4E,0x4B,0x44,0x00, /* 00000640 "...LNKD." */ - 0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x0A,0x00, /* 00000648 "........" */ - 0x0A,0x02,0x4C,0x4E,0x4B,0x41,0x00,0x12, /* 00000650 "..LNKA.." */ - 0x0E,0x04,0x0C,0xFF,0xFF,0x0A,0x00,0x0A, /* 00000658 "........" */ - 0x03,0x4C,0x4E,0x4B,0x42,0x00,0x12,0x0D, /* 00000660 ".LNKB..." */ - 0x04,0x0C,0xFF,0xFF,0x0B,0x00,0x00,0x4C, /* 00000668 ".......L" */ - 0x4E,0x4B,0x44,0x00,0x12,0x0D,0x04,0x0C, /* 00000670 "NKD....." */ - 0xFF,0xFF,0x0B,0x00,0x01,0x4C,0x4E,0x4B, /* 00000678 ".....LNK" */ - 0x41,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 00000680 "A......." */ - 0x0B,0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x42, /* 00000688 "....LNKB" */ - 0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x0B, /* 00000690 "........" */ - 0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x43,0x00, /* 00000698 "...LNKC." */ - 0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x0C,0x00, /* 000006A0 "........" */ - 0x00,0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0D, /* 000006A8 ".LNKA..." */ - 0x04,0x0C,0xFF,0xFF,0x0C,0x00,0x01,0x4C, /* 000006B0 ".......L" */ - 0x4E,0x4B,0x42,0x00,0x12,0x0E,0x04,0x0C, /* 000006B8 "NKB....." */ - 0xFF,0xFF,0x0C,0x00,0x0A,0x02,0x4C,0x4E, /* 000006C0 "......LN" */ - 0x4B,0x43,0x00,0x12,0x0E,0x04,0x0C,0xFF, /* 000006C8 "KC......" */ - 0xFF,0x0C,0x00,0x0A,0x03,0x4C,0x4E,0x4B, /* 000006D0 ".....LNK" */ - 0x44,0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF, /* 000006D8 "D......." */ - 0x0D,0x00,0x00,0x4C,0x4E,0x4B,0x42,0x00, /* 000006E0 "...LNKB." */ - 0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x0D,0x00, /* 000006E8 "........" */ - 0x01,0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0E, /* 000006F0 ".LNKC..." */ - 0x04,0x0C,0xFF,0xFF,0x0D,0x00,0x0A,0x02, /* 000006F8 "........" */ - 0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0E,0x04, /* 00000700 "LNKD...." */ - 0x0C,0xFF,0xFF,0x0D,0x00,0x0A,0x03,0x4C, /* 00000708 ".......L" */ - 0x4E,0x4B,0x41,0x00,0x12,0x0D,0x04,0x0C, /* 00000710 "NKA....." */ - 0xFF,0xFF,0x0E,0x00,0x00,0x4C,0x4E,0x4B, /* 00000718 ".....LNK" */ - 0x43,0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF, /* 00000720 "C......." */ - 0x0E,0x00,0x01,0x4C,0x4E,0x4B,0x44,0x00, /* 00000728 "...LNKD." */ - 0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x0E,0x00, /* 00000730 "........" */ - 0x0A,0x02,0x4C,0x4E,0x4B,0x41,0x00,0x12, /* 00000738 "..LNKA.." */ - 0x0E,0x04,0x0C,0xFF,0xFF,0x0E,0x00,0x0A, /* 00000740 "........" */ - 0x03,0x4C,0x4E,0x4B,0x42,0x00,0x12,0x0D, /* 00000748 ".LNKB..." */ - 0x04,0x0C,0xFF,0xFF,0x0F,0x00,0x00,0x4C, /* 00000750 ".......L" */ - 0x4E,0x4B,0x44,0x00,0x12,0x0D,0x04,0x0C, /* 00000758 "NKD....." */ - 0xFF,0xFF,0x0F,0x00,0x01,0x4C,0x4E,0x4B, /* 00000760 ".....LNK" */ - 0x41,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 00000768 "A......." */ - 0x0F,0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x42, /* 00000770 "....LNKB" */ - 0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x0F, /* 00000778 "........" */ - 0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x43,0x00, /* 00000780 "...LNKC." */ - 0x08,0x50,0x52,0x54,0x41,0x12,0x41,0x2F, /* 00000788 ".PRTA.A/" */ - 0x3C,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x01, /* 00000790 "<......." */ - 0x00,0x00,0x00,0x0A,0x14,0x12,0x0B,0x04, /* 00000798 "........" */ - 0x0C,0xFF,0xFF,0x01,0x00,0x01,0x00,0x0A, /* 000007A0 "........" */ - 0x15,0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x01, /* 000007A8 "........" */ - 0x00,0x0A,0x02,0x00,0x0A,0x16,0x12,0x0C, /* 000007B0 "........" */ - 0x04,0x0C,0xFF,0xFF,0x01,0x00,0x0A,0x03, /* 000007B8 "........" */ - 0x00,0x0A,0x17,0x12,0x0B,0x04,0x0C,0xFF, /* 000007C0 "........" */ - 0xFF,0x02,0x00,0x00,0x00,0x0A,0x18,0x12, /* 000007C8 "........" */ - 0x0B,0x04,0x0C,0xFF,0xFF,0x02,0x00,0x01, /* 000007D0 "........" */ - 0x00,0x0A,0x19,0x12,0x0C,0x04,0x0C,0xFF, /* 000007D8 "........" */ - 0xFF,0x02,0x00,0x0A,0x02,0x00,0x0A,0x1A, /* 000007E0 "........" */ - 0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x02,0x00, /* 000007E8 "........" */ - 0x0A,0x03,0x00,0x0A,0x1B,0x12,0x0B,0x04, /* 000007F0 "........" */ - 0x0C,0xFF,0xFF,0x03,0x00,0x00,0x00,0x0A, /* 000007F8 "........" */ - 0x1C,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x03, /* 00000800 "........" */ - 0x00,0x01,0x00,0x0A,0x1D,0x12,0x0C,0x04, /* 00000808 "........" */ - 0x0C,0xFF,0xFF,0x03,0x00,0x0A,0x02,0x00, /* 00000810 "........" */ - 0x0A,0x1E,0x12,0x0C,0x04,0x0C,0xFF,0xFF, /* 00000818 "........" */ - 0x03,0x00,0x0A,0x03,0x00,0x0A,0x1F,0x12, /* 00000820 "........" */ - 0x0B,0x04,0x0C,0xFF,0xFF,0x04,0x00,0x00, /* 00000828 "........" */ - 0x00,0x0A,0x20,0x12,0x0B,0x04,0x0C,0xFF, /* 00000830 ".. ....." */ - 0xFF,0x04,0x00,0x01,0x00,0x0A,0x21,0x12, /* 00000838 "......!." */ - 0x0C,0x04,0x0C,0xFF,0xFF,0x04,0x00,0x0A, /* 00000840 "........" */ - 0x02,0x00,0x0A,0x22,0x12,0x0C,0x04,0x0C, /* 00000848 "..."...." */ - 0xFF,0xFF,0x04,0x00,0x0A,0x03,0x00,0x0A, /* 00000850 "........" */ - 0x23,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x05, /* 00000858 "#......." */ - 0x00,0x00,0x00,0x0A,0x24,0x12,0x0B,0x04, /* 00000860 "....$..." */ - 0x0C,0xFF,0xFF,0x05,0x00,0x01,0x00,0x0A, /* 00000868 "........" */ - 0x25,0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x05, /* 00000870 "%......." */ - 0x00,0x0A,0x02,0x00,0x0A,0x26,0x12,0x0C, /* 00000878 ".....&.." */ - 0x04,0x0C,0xFF,0xFF,0x05,0x00,0x0A,0x03, /* 00000880 "........" */ - 0x00,0x0A,0x27,0x12,0x0B,0x04,0x0C,0xFF, /* 00000888 "..'....." */ - 0xFF,0x06,0x00,0x00,0x00,0x0A,0x28,0x12, /* 00000890 "......(." */ - 0x0B,0x04,0x0C,0xFF,0xFF,0x06,0x00,0x01, /* 00000898 "........" */ - 0x00,0x0A,0x29,0x12,0x0C,0x04,0x0C,0xFF, /* 000008A0 "..)....." */ - 0xFF,0x06,0x00,0x0A,0x02,0x00,0x0A,0x2A, /* 000008A8 ".......*" */ - 0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x06,0x00, /* 000008B0 "........" */ - 0x0A,0x03,0x00,0x0A,0x2B,0x12,0x0B,0x04, /* 000008B8 "....+..." */ - 0x0C,0xFF,0xFF,0x07,0x00,0x00,0x00,0x0A, /* 000008C0 "........" */ - 0x2C,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x07, /* 000008C8 ",......." */ - 0x00,0x01,0x00,0x0A,0x2D,0x12,0x0C,0x04, /* 000008D0 "....-..." */ - 0x0C,0xFF,0xFF,0x07,0x00,0x0A,0x02,0x00, /* 000008D8 "........" */ - 0x0A,0x2E,0x12,0x0C,0x04,0x0C,0xFF,0xFF, /* 000008E0 "........" */ - 0x07,0x00,0x0A,0x03,0x00,0x0A,0x2F,0x12, /* 000008E8 "....../." */ - 0x0B,0x04,0x0C,0xFF,0xFF,0x08,0x00,0x00, /* 000008F0 "........" */ - 0x00,0x0A,0x11,0x12,0x0B,0x04,0x0C,0xFF, /* 000008F8 "........" */ - 0xFF,0x08,0x00,0x01,0x00,0x0A,0x12,0x12, /* 00000900 "........" */ - 0x0C,0x04,0x0C,0xFF,0xFF,0x08,0x00,0x0A, /* 00000908 "........" */ - 0x02,0x00,0x0A,0x13,0x12,0x0C,0x04,0x0C, /* 00000910 "........" */ - 0xFF,0xFF,0x08,0x00,0x0A,0x03,0x00,0x0A, /* 00000918 "........" */ - 0x14,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x09, /* 00000920 "........" */ - 0x00,0x00,0x00,0x0A,0x15,0x12,0x0B,0x04, /* 00000928 "........" */ - 0x0C,0xFF,0xFF,0x09,0x00,0x01,0x00,0x0A, /* 00000930 "........" */ - 0x16,0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x09, /* 00000938 "........" */ - 0x00,0x0A,0x02,0x00,0x0A,0x17,0x12,0x0C, /* 00000940 "........" */ - 0x04,0x0C,0xFF,0xFF,0x09,0x00,0x0A,0x03, /* 00000948 "........" */ - 0x00,0x0A,0x18,0x12,0x0B,0x04,0x0C,0xFF, /* 00000950 "........" */ - 0xFF,0x0A,0x00,0x00,0x00,0x0A,0x19,0x12, /* 00000958 "........" */ - 0x0B,0x04,0x0C,0xFF,0xFF,0x0A,0x00,0x01, /* 00000960 "........" */ - 0x00,0x0A,0x1A,0x12,0x0C,0x04,0x0C,0xFF, /* 00000968 "........" */ - 0xFF,0x0A,0x00,0x0A,0x02,0x00,0x0A,0x1B, /* 00000970 "........" */ - 0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x0A,0x00, /* 00000978 "........" */ - 0x0A,0x03,0x00,0x0A,0x1C,0x12,0x0B,0x04, /* 00000980 "........" */ - 0x0C,0xFF,0xFF,0x0B,0x00,0x00,0x00,0x0A, /* 00000988 "........" */ - 0x1D,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x0B, /* 00000990 "........" */ - 0x00,0x01,0x00,0x0A,0x1E,0x12,0x0C,0x04, /* 00000998 "........" */ - 0x0C,0xFF,0xFF,0x0B,0x00,0x0A,0x02,0x00, /* 000009A0 "........" */ - 0x0A,0x1F,0x12,0x0C,0x04,0x0C,0xFF,0xFF, /* 000009A8 "........" */ - 0x0B,0x00,0x0A,0x03,0x00,0x0A,0x20,0x12, /* 000009B0 "...... ." */ - 0x0B,0x04,0x0C,0xFF,0xFF,0x0C,0x00,0x00, /* 000009B8 "........" */ - 0x00,0x0A,0x21,0x12,0x0B,0x04,0x0C,0xFF, /* 000009C0 "..!....." */ - 0xFF,0x0C,0x00,0x01,0x00,0x0A,0x22,0x12, /* 000009C8 "......"." */ - 0x0C,0x04,0x0C,0xFF,0xFF,0x0C,0x00,0x0A, /* 000009D0 "........" */ - 0x02,0x00,0x0A,0x23,0x12,0x0C,0x04,0x0C, /* 000009D8 "...#...." */ - 0xFF,0xFF,0x0C,0x00,0x0A,0x03,0x00,0x0A, /* 000009E0 "........" */ - 0x24,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x0D, /* 000009E8 "$......." */ - 0x00,0x00,0x00,0x0A,0x25,0x12,0x0B,0x04, /* 000009F0 "....%..." */ - 0x0C,0xFF,0xFF,0x0D,0x00,0x01,0x00,0x0A, /* 000009F8 "........" */ - 0x26,0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x0D, /* 00000A00 "&......." */ - 0x00,0x0A,0x02,0x00,0x0A,0x27,0x12,0x0C, /* 00000A08 ".....'.." */ - 0x04,0x0C,0xFF,0xFF,0x0D,0x00,0x0A,0x03, /* 00000A10 "........" */ - 0x00,0x0A,0x28,0x12,0x0B,0x04,0x0C,0xFF, /* 00000A18 "..(....." */ - 0xFF,0x0E,0x00,0x00,0x00,0x0A,0x29,0x12, /* 00000A20 "......)." */ - 0x0B,0x04,0x0C,0xFF,0xFF,0x0E,0x00,0x01, /* 00000A28 "........" */ - 0x00,0x0A,0x2A,0x12,0x0C,0x04,0x0C,0xFF, /* 00000A30 "..*....." */ - 0xFF,0x0E,0x00,0x0A,0x02,0x00,0x0A,0x2B, /* 00000A38 ".......+" */ - 0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x0E,0x00, /* 00000A40 "........" */ - 0x0A,0x03,0x00,0x0A,0x2C,0x12,0x0B,0x04, /* 00000A48 "....,..." */ - 0x0C,0xFF,0xFF,0x0F,0x00,0x00,0x00,0x0A, /* 00000A50 "........" */ - 0x2D,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x0F, /* 00000A58 "-......." */ - 0x00,0x01,0x00,0x0A,0x2E,0x12,0x0C,0x04, /* 00000A60 "........" */ - 0x0C,0xFF,0xFF,0x0F,0x00,0x0A,0x02,0x00, /* 00000A68 "........" */ - 0x0A,0x2F,0x12,0x0C,0x04,0x0C,0xFF,0xFF, /* 00000A70 "./......" */ - 0x0F,0x00,0x0A,0x03,0x00,0x0A,0x10,0x5B, /* 00000A78 ".......[" */ - 0x82,0x4C,0x31,0x49,0x53,0x41,0x5F,0x08, /* 00000A80 ".L1ISA_." */ - 0x5F,0x41,0x44,0x52,0x0C,0x00,0x00,0x01, /* 00000A88 "_ADR...." */ - 0x00,0x5B,0x80,0x50,0x49,0x52,0x51,0x02, /* 00000A90 ".[.PIRQ." */ - 0x0A,0x60,0x0A,0x04,0x10,0x2E,0x5C,0x00, /* 00000A98 ".`....\." */ - 0x5B,0x81,0x29,0x5C,0x2F,0x04,0x5F,0x53, /* 00000AA0 "[.)\/._S" */ - 0x42,0x5F,0x50,0x43,0x49,0x30,0x49,0x53, /* 00000AA8 "B_PCI0IS" */ - 0x41,0x5F,0x50,0x49,0x52,0x51,0x01,0x50, /* 00000AB0 "A_PIRQ.P" */ - 0x49,0x52,0x41,0x08,0x50,0x49,0x52,0x42, /* 00000AB8 "IRA.PIRB" */ - 0x08,0x50,0x49,0x52,0x43,0x08,0x50,0x49, /* 00000AC0 ".PIRC.PI" */ - 0x52,0x44,0x08,0x5B,0x82,0x46,0x0B,0x53, /* 00000AC8 "RD.[.F.S" */ - 0x59,0x53,0x52,0x08,0x5F,0x48,0x49,0x44, /* 00000AD0 "YSR._HID" */ - 0x0C,0x41,0xD0,0x0C,0x02,0x08,0x5F,0x55, /* 00000AD8 ".A...._U" */ - 0x49,0x44,0x01,0x08,0x43,0x52,0x53,0x5F, /* 00000AE0 "ID..CRS_" */ - 0x11,0x4E,0x08,0x0A,0x8A,0x47,0x01,0x10, /* 00000AE8 ".N...G.." */ - 0x00,0x10,0x00,0x00,0x10,0x47,0x01,0x22, /* 00000AF0 ".....G."" */ - 0x00,0x22,0x00,0x00,0x0C,0x47,0x01,0x30, /* 00000AF8 "."...G.0" */ - 0x00,0x30,0x00,0x00,0x10,0x47,0x01,0x44, /* 00000B00 ".0...G.D" */ - 0x00,0x44,0x00,0x00,0x1C,0x47,0x01,0x62, /* 00000B08 ".D...G.b" */ - 0x00,0x62,0x00,0x00,0x02,0x47,0x01,0x65, /* 00000B10 ".b...G.e" */ - 0x00,0x65,0x00,0x00,0x0B,0x47,0x01,0x72, /* 00000B18 ".e...G.r" */ - 0x00,0x72,0x00,0x00,0x0E,0x47,0x01,0x80, /* 00000B20 ".r...G.." */ - 0x00,0x80,0x00,0x00,0x01,0x47,0x01,0x84, /* 00000B28 ".....G.." */ - 0x00,0x84,0x00,0x00,0x03,0x47,0x01,0x88, /* 00000B30 ".....G.." */ - 0x00,0x88,0x00,0x00,0x01,0x47,0x01,0x8C, /* 00000B38 ".....G.." */ - 0x00,0x8C,0x00,0x00,0x03,0x47,0x01,0x90, /* 00000B40 ".....G.." */ - 0x00,0x90,0x00,0x00,0x10,0x47,0x01,0xA2, /* 00000B48 ".....G.." */ - 0x00,0xA2,0x00,0x00,0x1C,0x47,0x01,0xE0, /* 00000B50 ".....G.." */ - 0x00,0xE0,0x00,0x00,0x10,0x47,0x01,0xA0, /* 00000B58 ".....G.." */ - 0x08,0xA0,0x08,0x00,0x04,0x47,0x01,0xC0, /* 00000B60 ".....G.." */ - 0x0C,0xC0,0x0C,0x00,0x10,0x47,0x01,0xD0, /* 00000B68 ".....G.." */ - 0x04,0xD0,0x04,0x00,0x02,0x79,0x00,0x14, /* 00000B70 ".....y.." */ - 0x0B,0x5F,0x43,0x52,0x53,0x00,0xA4,0x43, /* 00000B78 "._CRS..C" */ - 0x52,0x53,0x5F,0x5B,0x82,0x2B,0x50,0x49, /* 00000B80 "RS_[.+PI" */ - 0x43,0x5F,0x08,0x5F,0x48,0x49,0x44,0x0B, /* 00000B88 "C_._HID." */ - 0x41,0xD0,0x08,0x5F,0x43,0x52,0x53,0x11, /* 00000B90 "A.._CRS." */ - 0x18,0x0A,0x15,0x47,0x01,0x20,0x00,0x20, /* 00000B98 "...G. . " */ - 0x00,0x01,0x02,0x47,0x01,0xA0,0x00,0xA0, /* 00000BA0 "...G...." */ - 0x00,0x01,0x02,0x22,0x04,0x00,0x79,0x00, /* 00000BA8 "..."..y." */ - 0x5B,0x82,0x47,0x05,0x44,0x4D,0x41,0x30, /* 00000BB0 "[.G.DMA0" */ - 0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0, /* 00000BB8 "._HID.A." */ - 0x02,0x00,0x08,0x5F,0x43,0x52,0x53,0x11, /* 00000BC0 "..._CRS." */ - 0x41,0x04,0x0A,0x3D,0x2A,0x10,0x04,0x47, /* 00000BC8 "A..=*..G" */ - 0x01,0x00,0x00,0x00,0x00,0x00,0x10,0x47, /* 00000BD0 ".......G" */ - 0x01,0x81,0x00,0x81,0x00,0x00,0x03,0x47, /* 00000BD8 ".......G" */ - 0x01,0x87,0x00,0x87,0x00,0x00,0x01,0x47, /* 00000BE0 ".......G" */ - 0x01,0x89,0x00,0x89,0x00,0x00,0x03,0x47, /* 00000BE8 ".......G" */ - 0x01,0x8F,0x00,0x8F,0x00,0x00,0x01,0x47, /* 00000BF0 ".......G" */ - 0x01,0xC0,0x00,0xC0,0x00,0x00,0x20,0x47, /* 00000BF8 "...... G" */ - 0x01,0x80,0x04,0x80,0x04,0x00,0x10,0x79, /* 00000C00 ".......y" */ - 0x00,0x5B,0x82,0x25,0x54,0x4D,0x52,0x5F, /* 00000C08 ".[.%TMR_" */ - 0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0, /* 00000C10 "._HID.A." */ - 0x01,0x00,0x08,0x5F,0x43,0x52,0x53,0x11, /* 00000C18 "..._CRS." */ - 0x10,0x0A,0x0D,0x47,0x01,0x40,0x00,0x40, /* 00000C20 "...G.@.@" */ - 0x00,0x00,0x04,0x22,0x01,0x00,0x79,0x00, /* 00000C28 "..."..y." */ - 0x5B,0x82,0x25,0x52,0x54,0x43,0x5F,0x08, /* 00000C30 "[.%RTC_." */ - 0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,0x0B, /* 00000C38 "_HID.A.." */ - 0x00,0x08,0x5F,0x43,0x52,0x53,0x11,0x10, /* 00000C40 ".._CRS.." */ - 0x0A,0x0D,0x47,0x01,0x70,0x00,0x70,0x00, /* 00000C48 "..G.p.p." */ - 0x00,0x02,0x22,0x00,0x01,0x79,0x00,0x5B, /* 00000C50 ".."..y.[" */ - 0x82,0x22,0x53,0x50,0x4B,0x52,0x08,0x5F, /* 00000C58 "."SPKR._" */ - 0x48,0x49,0x44,0x0C,0x41,0xD0,0x08,0x00, /* 00000C60 "HID.A..." */ - 0x08,0x5F,0x43,0x52,0x53,0x11,0x0D,0x0A, /* 00000C68 "._CRS..." */ - 0x0A,0x47,0x01,0x61,0x00,0x61,0x00,0x00, /* 00000C70 ".G.a.a.." */ - 0x01,0x79,0x00,0x5B,0x82,0x31,0x50,0x53, /* 00000C78 ".y.[.1PS" */ - 0x32,0x4D,0x08,0x5F,0x48,0x49,0x44,0x0C, /* 00000C80 "2M._HID." */ - 0x41,0xD0,0x0F,0x13,0x08,0x5F,0x43,0x49, /* 00000C88 "A...._CI" */ - 0x44,0x0C,0x41,0xD0,0x0F,0x13,0x14,0x09, /* 00000C90 "D.A....." */ - 0x5F,0x53,0x54,0x41,0x00,0xA4,0x0A,0x0F, /* 00000C98 "_STA...." */ - 0x08,0x5F,0x43,0x52,0x53,0x11,0x08,0x0A, /* 00000CA0 "._CRS..." */ - 0x05,0x22,0x00,0x10,0x79,0x00,0x5B,0x82, /* 00000CA8 "."..y.[." */ - 0x42,0x04,0x50,0x53,0x32,0x4B,0x08,0x5F, /* 00000CB0 "B.PS2K._" */ - 0x48,0x49,0x44,0x0C,0x41,0xD0,0x03,0x03, /* 00000CB8 "HID.A..." */ - 0x08,0x5F,0x43,0x49,0x44,0x0C,0x41,0xD0, /* 00000CC0 "._CID.A." */ - 0x03,0x0B,0x14,0x09,0x5F,0x53,0x54,0x41, /* 00000CC8 "...._STA" */ - 0x00,0xA4,0x0A,0x0F,0x08,0x5F,0x43,0x52, /* 00000CD0 "....._CR" */ - 0x53,0x11,0x18,0x0A,0x15,0x47,0x01,0x60, /* 00000CD8 "S....G.`" */ - 0x00,0x60,0x00,0x00,0x01,0x47,0x01,0x64, /* 00000CE0 ".`...G.d" */ - 0x00,0x64,0x00,0x00,0x01,0x22,0x02,0x00, /* 00000CE8 ".d...".." */ - 0x79,0x00,0x5B,0x82,0x3A,0x46,0x44,0x43, /* 00000CF0 "y.[.:FDC" */ - 0x30,0x08,0x5F,0x48,0x49,0x44,0x0C,0x41, /* 00000CF8 "0._HID.A" */ - 0xD0,0x07,0x00,0x14,0x09,0x5F,0x53,0x54, /* 00000D00 "....._ST" */ - 0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F,0x43, /* 00000D08 "A....._C" */ - 0x52,0x53,0x11,0x1B,0x0A,0x18,0x47,0x01, /* 00000D10 "RS....G." */ - 0xF0,0x03,0xF0,0x03,0x01,0x06,0x47,0x01, /* 00000D18 "......G." */ - 0xF7,0x03,0xF7,0x03,0x01,0x01,0x22,0x40, /* 00000D20 "......"@" */ - 0x00,0x2A,0x04,0x00,0x79,0x00,0x5B,0x82, /* 00000D28 ".*..y.[." */ - 0x35,0x55,0x41,0x52,0x31,0x08,0x5F,0x48, /* 00000D30 "5UAR1._H" */ - 0x49,0x44,0x0C,0x41,0xD0,0x05,0x01,0x08, /* 00000D38 "ID.A...." */ - 0x5F,0x55,0x49,0x44,0x01,0x14,0x09,0x5F, /* 00000D40 "_UID..._" */ - 0x53,0x54,0x41,0x00,0xA4,0x0A,0x0F,0x08, /* 00000D48 "STA....." */ - 0x5F,0x43,0x52,0x53,0x11,0x10,0x0A,0x0D, /* 00000D50 "_CRS...." */ - 0x47,0x01,0xF8,0x03,0xF8,0x03,0x01,0x08, /* 00000D58 "G......." */ - 0x22,0x10,0x00,0x79,0x00,0x5B,0x82,0x36, /* 00000D60 ""..y.[.6" */ - 0x4C,0x54,0x50,0x31,0x08,0x5F,0x48,0x49, /* 00000D68 "LTP1._HI" */ - 0x44,0x0C,0x41,0xD0,0x04,0x00,0x08,0x5F, /* 00000D70 "D.A...._" */ - 0x55,0x49,0x44,0x0A,0x02,0x14,0x09,0x5F, /* 00000D78 "UID...._" */ - 0x53,0x54,0x41,0x00,0xA4,0x0A,0x0F,0x08, /* 00000D80 "STA....." */ - 0x5F,0x43,0x52,0x53,0x11,0x10,0x0A,0x0D, /* 00000D88 "_CRS...." */ - 0x47,0x01,0x78,0x03,0x78,0x03,0x08,0x08, /* 00000D90 "G.x.x..." */ - 0x22,0x80,0x00,0x79,0x00, + 0x52,0x44,0x5B,0x82,0x3A,0x48,0x50,0x45, /* 00000400 "RD[.:HPE" */ + 0x54,0x08,0x5F,0x48,0x49,0x44,0x0C,0x41, /* 00000408 "T._HID.A" */ + 0xD0,0x01,0x03,0x08,0x5F,0x55,0x49,0x44, /* 00000410 "...._UID" */ + 0x00,0x08,0x5F,0x43,0x52,0x53,0x11,0x1F, /* 00000418 ".._CRS.." */ + 0x0A,0x1C,0x87,0x17,0x00,0x00,0x0D,0x01, /* 00000420 "........" */ + 0x00,0x00,0x00,0x00,0x00,0x00,0xD0,0xFE, /* 00000428 "........" */ + 0xFF,0x03,0xD0,0xFE,0x00,0x00,0x00,0x00, /* 00000430 "........" */ + 0x00,0x04,0x00,0x00,0x79,0x00,0x14,0x16, /* 00000438 "....y..." */ + 0x5F,0x50,0x52,0x54,0x00,0xA0,0x0A,0x50, /* 00000440 "_PRT...P" */ + 0x49,0x43,0x44,0xA4,0x50,0x52,0x54,0x41, /* 00000448 "ICD.PRTA" */ + 0xA4,0x50,0x52,0x54,0x50,0x08,0x50,0x52, /* 00000450 ".PRTP.PR" */ + 0x54,0x50,0x12,0x49,0x36,0x3C,0x12,0x0D, /* 00000458 "TP.I6<.." */ + 0x04,0x0C,0xFF,0xFF,0x01,0x00,0x00,0x4C, /* 00000460 ".......L" */ + 0x4E,0x4B,0x42,0x00,0x12,0x0D,0x04,0x0C, /* 00000468 "NKB....." */ + 0xFF,0xFF,0x01,0x00,0x01,0x4C,0x4E,0x4B, /* 00000470 ".....LNK" */ + 0x43,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 00000478 "C......." */ + 0x01,0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x44, /* 00000480 "....LNKD" */ + 0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x01, /* 00000488 "........" */ + 0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x41,0x00, /* 00000490 "...LNKA." */ + 0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x02,0x00, /* 00000498 "........" */ + 0x00,0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0D, /* 000004A0 ".LNKC..." */ + 0x04,0x0C,0xFF,0xFF,0x02,0x00,0x01,0x4C, /* 000004A8 ".......L" */ + 0x4E,0x4B,0x44,0x00,0x12,0x0E,0x04,0x0C, /* 000004B0 "NKD....." */ + 0xFF,0xFF,0x02,0x00,0x0A,0x02,0x4C,0x4E, /* 000004B8 "......LN" */ + 0x4B,0x41,0x00,0x12,0x0E,0x04,0x0C,0xFF, /* 000004C0 "KA......" */ + 0xFF,0x02,0x00,0x0A,0x03,0x4C,0x4E,0x4B, /* 000004C8 ".....LNK" */ + 0x42,0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF, /* 000004D0 "B......." */ + 0x03,0x00,0x00,0x4C,0x4E,0x4B,0x44,0x00, /* 000004D8 "...LNKD." */ + 0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x03,0x00, /* 000004E0 "........" */ + 0x01,0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0E, /* 000004E8 ".LNKA..." */ + 0x04,0x0C,0xFF,0xFF,0x03,0x00,0x0A,0x02, /* 000004F0 "........" */ + 0x4C,0x4E,0x4B,0x42,0x00,0x12,0x0E,0x04, /* 000004F8 "LNKB...." */ + 0x0C,0xFF,0xFF,0x03,0x00,0x0A,0x03,0x4C, /* 00000500 ".......L" */ + 0x4E,0x4B,0x43,0x00,0x12,0x0D,0x04,0x0C, /* 00000508 "NKC....." */ + 0xFF,0xFF,0x04,0x00,0x00,0x4C,0x4E,0x4B, /* 00000510 ".....LNK" */ + 0x41,0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF, /* 00000518 "A......." */ + 0x04,0x00,0x01,0x4C,0x4E,0x4B,0x42,0x00, /* 00000520 "...LNKB." */ + 0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x04,0x00, /* 00000528 "........" */ + 0x0A,0x02,0x4C,0x4E,0x4B,0x43,0x00,0x12, /* 00000530 "..LNKC.." */ + 0x0E,0x04,0x0C,0xFF,0xFF,0x04,0x00,0x0A, /* 00000538 "........" */ + 0x03,0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0D, /* 00000540 ".LNKD..." */ + 0x04,0x0C,0xFF,0xFF,0x05,0x00,0x00,0x4C, /* 00000548 ".......L" */ + 0x4E,0x4B,0x42,0x00,0x12,0x0D,0x04,0x0C, /* 00000550 "NKB....." */ + 0xFF,0xFF,0x05,0x00,0x01,0x4C,0x4E,0x4B, /* 00000558 ".....LNK" */ + 0x43,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 00000560 "C......." */ + 0x05,0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x44, /* 00000568 "....LNKD" */ + 0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x05, /* 00000570 "........" */ + 0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x41,0x00, /* 00000578 "...LNKA." */ + 0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x06,0x00, /* 00000580 "........" */ + 0x00,0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0D, /* 00000588 ".LNKC..." */ + 0x04,0x0C,0xFF,0xFF,0x06,0x00,0x01,0x4C, /* 00000590 ".......L" */ + 0x4E,0x4B,0x44,0x00,0x12,0x0E,0x04,0x0C, /* 00000598 "NKD....." */ + 0xFF,0xFF,0x06,0x00,0x0A,0x02,0x4C,0x4E, /* 000005A0 "......LN" */ + 0x4B,0x41,0x00,0x12,0x0E,0x04,0x0C,0xFF, /* 000005A8 "KA......" */ + 0xFF,0x06,0x00,0x0A,0x03,0x4C,0x4E,0x4B, /* 000005B0 ".....LNK" */ + 0x42,0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF, /* 000005B8 "B......." */ + 0x07,0x00,0x00,0x4C,0x4E,0x4B,0x44,0x00, /* 000005C0 "...LNKD." */ + 0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x07,0x00, /* 000005C8 "........" */ + 0x01,0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0E, /* 000005D0 ".LNKA..." */ + 0x04,0x0C,0xFF,0xFF,0x07,0x00,0x0A,0x02, /* 000005D8 "........" */ + 0x4C,0x4E,0x4B,0x42,0x00,0x12,0x0E,0x04, /* 000005E0 "LNKB...." */ + 0x0C,0xFF,0xFF,0x07,0x00,0x0A,0x03,0x4C, /* 000005E8 ".......L" */ + 0x4E,0x4B,0x43,0x00,0x12,0x0D,0x04,0x0C, /* 000005F0 "NKC....." */ + 0xFF,0xFF,0x08,0x00,0x00,0x4C,0x4E,0x4B, /* 000005F8 ".....LNK" */ + 0x41,0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF, /* 00000600 "A......." */ + 0x08,0x00,0x01,0x4C,0x4E,0x4B,0x42,0x00, /* 00000608 "...LNKB." */ + 0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x08,0x00, /* 00000610 "........" */ + 0x0A,0x02,0x4C,0x4E,0x4B,0x43,0x00,0x12, /* 00000618 "..LNKC.." */ + 0x0E,0x04,0x0C,0xFF,0xFF,0x08,0x00,0x0A, /* 00000620 "........" */ + 0x03,0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0D, /* 00000628 ".LNKD..." */ + 0x04,0x0C,0xFF,0xFF,0x09,0x00,0x00,0x4C, /* 00000630 ".......L" */ + 0x4E,0x4B,0x42,0x00,0x12,0x0D,0x04,0x0C, /* 00000638 "NKB....." */ + 0xFF,0xFF,0x09,0x00,0x01,0x4C,0x4E,0x4B, /* 00000640 ".....LNK" */ + 0x43,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 00000648 "C......." */ + 0x09,0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x44, /* 00000650 "....LNKD" */ + 0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x09, /* 00000658 "........" */ + 0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x41,0x00, /* 00000660 "...LNKA." */ + 0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x0A,0x00, /* 00000668 "........" */ + 0x00,0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0D, /* 00000670 ".LNKC..." */ + 0x04,0x0C,0xFF,0xFF,0x0A,0x00,0x01,0x4C, /* 00000678 ".......L" */ + 0x4E,0x4B,0x44,0x00,0x12,0x0E,0x04,0x0C, /* 00000680 "NKD....." */ + 0xFF,0xFF,0x0A,0x00,0x0A,0x02,0x4C,0x4E, /* 00000688 "......LN" */ + 0x4B,0x41,0x00,0x12,0x0E,0x04,0x0C,0xFF, /* 00000690 "KA......" */ + 0xFF,0x0A,0x00,0x0A,0x03,0x4C,0x4E,0x4B, /* 00000698 ".....LNK" */ + 0x42,0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF, /* 000006A0 "B......." */ + 0x0B,0x00,0x00,0x4C,0x4E,0x4B,0x44,0x00, /* 000006A8 "...LNKD." */ + 0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x0B,0x00, /* 000006B0 "........" */ + 0x01,0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0E, /* 000006B8 ".LNKA..." */ + 0x04,0x0C,0xFF,0xFF,0x0B,0x00,0x0A,0x02, /* 000006C0 "........" */ + 0x4C,0x4E,0x4B,0x42,0x00,0x12,0x0E,0x04, /* 000006C8 "LNKB...." */ + 0x0C,0xFF,0xFF,0x0B,0x00,0x0A,0x03,0x4C, /* 000006D0 ".......L" */ + 0x4E,0x4B,0x43,0x00,0x12,0x0D,0x04,0x0C, /* 000006D8 "NKC....." */ + 0xFF,0xFF,0x0C,0x00,0x00,0x4C,0x4E,0x4B, /* 000006E0 ".....LNK" */ + 0x41,0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF, /* 000006E8 "A......." */ + 0x0C,0x00,0x01,0x4C,0x4E,0x4B,0x42,0x00, /* 000006F0 "...LNKB." */ + 0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x0C,0x00, /* 000006F8 "........" */ + 0x0A,0x02,0x4C,0x4E,0x4B,0x43,0x00,0x12, /* 00000700 "..LNKC.." */ + 0x0E,0x04,0x0C,0xFF,0xFF,0x0C,0x00,0x0A, /* 00000708 "........" */ + 0x03,0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0D, /* 00000710 ".LNKD..." */ + 0x04,0x0C,0xFF,0xFF,0x0D,0x00,0x00,0x4C, /* 00000718 ".......L" */ + 0x4E,0x4B,0x42,0x00,0x12,0x0D,0x04,0x0C, /* 00000720 "NKB....." */ + 0xFF,0xFF,0x0D,0x00,0x01,0x4C,0x4E,0x4B, /* 00000728 ".....LNK" */ + 0x43,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 00000730 "C......." */ + 0x0D,0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x44, /* 00000738 "....LNKD" */ + 0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x0D, /* 00000740 "........" */ + 0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x41,0x00, /* 00000748 "...LNKA." */ + 0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x0E,0x00, /* 00000750 "........" */ + 0x00,0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0D, /* 00000758 ".LNKC..." */ + 0x04,0x0C,0xFF,0xFF,0x0E,0x00,0x01,0x4C, /* 00000760 ".......L" */ + 0x4E,0x4B,0x44,0x00,0x12,0x0E,0x04,0x0C, /* 00000768 "NKD....." */ + 0xFF,0xFF,0x0E,0x00,0x0A,0x02,0x4C,0x4E, /* 00000770 "......LN" */ + 0x4B,0x41,0x00,0x12,0x0E,0x04,0x0C,0xFF, /* 00000778 "KA......" */ + 0xFF,0x0E,0x00,0x0A,0x03,0x4C,0x4E,0x4B, /* 00000780 ".....LNK" */ + 0x42,0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF, /* 00000788 "B......." */ + 0x0F,0x00,0x00,0x4C,0x4E,0x4B,0x44,0x00, /* 00000790 "...LNKD." */ + 0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x0F,0x00, /* 00000798 "........" */ + 0x01,0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0E, /* 000007A0 ".LNKA..." */ + 0x04,0x0C,0xFF,0xFF,0x0F,0x00,0x0A,0x02, /* 000007A8 "........" */ + 0x4C,0x4E,0x4B,0x42,0x00,0x12,0x0E,0x04, /* 000007B0 "LNKB...." */ + 0x0C,0xFF,0xFF,0x0F,0x00,0x0A,0x03,0x4C, /* 000007B8 ".......L" */ + 0x4E,0x4B,0x43,0x00,0x08,0x50,0x52,0x54, /* 000007C0 "NKC..PRT" */ + 0x41,0x12,0x41,0x2F,0x3C,0x12,0x0B,0x04, /* 000007C8 "A.A/<..." */ + 0x0C,0xFF,0xFF,0x01,0x00,0x00,0x00,0x0A, /* 000007D0 "........" */ + 0x14,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x01, /* 000007D8 "........" */ + 0x00,0x01,0x00,0x0A,0x15,0x12,0x0C,0x04, /* 000007E0 "........" */ + 0x0C,0xFF,0xFF,0x01,0x00,0x0A,0x02,0x00, /* 000007E8 "........" */ + 0x0A,0x16,0x12,0x0C,0x04,0x0C,0xFF,0xFF, /* 000007F0 "........" */ + 0x01,0x00,0x0A,0x03,0x00,0x0A,0x17,0x12, /* 000007F8 "........" */ + 0x0B,0x04,0x0C,0xFF,0xFF,0x02,0x00,0x00, /* 00000800 "........" */ + 0x00,0x0A,0x18,0x12,0x0B,0x04,0x0C,0xFF, /* 00000808 "........" */ + 0xFF,0x02,0x00,0x01,0x00,0x0A,0x19,0x12, /* 00000810 "........" */ + 0x0C,0x04,0x0C,0xFF,0xFF,0x02,0x00,0x0A, /* 00000818 "........" */ + 0x02,0x00,0x0A,0x1A,0x12,0x0C,0x04,0x0C, /* 00000820 "........" */ + 0xFF,0xFF,0x02,0x00,0x0A,0x03,0x00,0x0A, /* 00000828 "........" */ + 0x1B,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x03, /* 00000830 "........" */ + 0x00,0x00,0x00,0x0A,0x1C,0x12,0x0B,0x04, /* 00000838 "........" */ + 0x0C,0xFF,0xFF,0x03,0x00,0x01,0x00,0x0A, /* 00000840 "........" */ + 0x1D,0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x03, /* 00000848 "........" */ + 0x00,0x0A,0x02,0x00,0x0A,0x1E,0x12,0x0C, /* 00000850 "........" */ + 0x04,0x0C,0xFF,0xFF,0x03,0x00,0x0A,0x03, /* 00000858 "........" */ + 0x00,0x0A,0x1F,0x12,0x0B,0x04,0x0C,0xFF, /* 00000860 "........" */ + 0xFF,0x04,0x00,0x00,0x00,0x0A,0x20,0x12, /* 00000868 "...... ." */ + 0x0B,0x04,0x0C,0xFF,0xFF,0x04,0x00,0x01, /* 00000870 "........" */ + 0x00,0x0A,0x21,0x12,0x0C,0x04,0x0C,0xFF, /* 00000878 "..!....." */ + 0xFF,0x04,0x00,0x0A,0x02,0x00,0x0A,0x22, /* 00000880 "......."" */ + 0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x04,0x00, /* 00000888 "........" */ + 0x0A,0x03,0x00,0x0A,0x23,0x12,0x0B,0x04, /* 00000890 "....#..." */ + 0x0C,0xFF,0xFF,0x05,0x00,0x00,0x00,0x0A, /* 00000898 "........" */ + 0x24,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x05, /* 000008A0 "$......." */ + 0x00,0x01,0x00,0x0A,0x25,0x12,0x0C,0x04, /* 000008A8 "....%..." */ + 0x0C,0xFF,0xFF,0x05,0x00,0x0A,0x02,0x00, /* 000008B0 "........" */ + 0x0A,0x26,0x12,0x0C,0x04,0x0C,0xFF,0xFF, /* 000008B8 ".&......" */ + 0x05,0x00,0x0A,0x03,0x00,0x0A,0x27,0x12, /* 000008C0 "......'." */ + 0x0B,0x04,0x0C,0xFF,0xFF,0x06,0x00,0x00, /* 000008C8 "........" */ + 0x00,0x0A,0x28,0x12,0x0B,0x04,0x0C,0xFF, /* 000008D0 "..(....." */ + 0xFF,0x06,0x00,0x01,0x00,0x0A,0x29,0x12, /* 000008D8 "......)." */ + 0x0C,0x04,0x0C,0xFF,0xFF,0x06,0x00,0x0A, /* 000008E0 "........" */ + 0x02,0x00,0x0A,0x2A,0x12,0x0C,0x04,0x0C, /* 000008E8 "...*...." */ + 0xFF,0xFF,0x06,0x00,0x0A,0x03,0x00,0x0A, /* 000008F0 "........" */ + 0x2B,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x07, /* 000008F8 "+......." */ + 0x00,0x00,0x00,0x0A,0x2C,0x12,0x0B,0x04, /* 00000900 "....,..." */ + 0x0C,0xFF,0xFF,0x07,0x00,0x01,0x00,0x0A, /* 00000908 "........" */ + 0x2D,0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x07, /* 00000910 "-......." */ + 0x00,0x0A,0x02,0x00,0x0A,0x2E,0x12,0x0C, /* 00000918 "........" */ + 0x04,0x0C,0xFF,0xFF,0x07,0x00,0x0A,0x03, /* 00000920 "........" */ + 0x00,0x0A,0x2F,0x12,0x0B,0x04,0x0C,0xFF, /* 00000928 "../....." */ + 0xFF,0x08,0x00,0x00,0x00,0x0A,0x11,0x12, /* 00000930 "........" */ + 0x0B,0x04,0x0C,0xFF,0xFF,0x08,0x00,0x01, /* 00000938 "........" */ + 0x00,0x0A,0x12,0x12,0x0C,0x04,0x0C,0xFF, /* 00000940 "........" */ + 0xFF,0x08,0x00,0x0A,0x02,0x00,0x0A,0x13, /* 00000948 "........" */ + 0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x08,0x00, /* 00000950 "........" */ + 0x0A,0x03,0x00,0x0A,0x14,0x12,0x0B,0x04, /* 00000958 "........" */ + 0x0C,0xFF,0xFF,0x09,0x00,0x00,0x00,0x0A, /* 00000960 "........" */ + 0x15,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x09, /* 00000968 "........" */ + 0x00,0x01,0x00,0x0A,0x16,0x12,0x0C,0x04, /* 00000970 "........" */ + 0x0C,0xFF,0xFF,0x09,0x00,0x0A,0x02,0x00, /* 00000978 "........" */ + 0x0A,0x17,0x12,0x0C,0x04,0x0C,0xFF,0xFF, /* 00000980 "........" */ + 0x09,0x00,0x0A,0x03,0x00,0x0A,0x18,0x12, /* 00000988 "........" */ + 0x0B,0x04,0x0C,0xFF,0xFF,0x0A,0x00,0x00, /* 00000990 "........" */ + 0x00,0x0A,0x19,0x12,0x0B,0x04,0x0C,0xFF, /* 00000998 "........" */ + 0xFF,0x0A,0x00,0x01,0x00,0x0A,0x1A,0x12, /* 000009A0 "........" */ + 0x0C,0x04,0x0C,0xFF,0xFF,0x0A,0x00,0x0A, /* 000009A8 "........" */ + 0x02,0x00,0x0A,0x1B,0x12,0x0C,0x04,0x0C, /* 000009B0 "........" */ + 0xFF,0xFF,0x0A,0x00,0x0A,0x03,0x00,0x0A, /* 000009B8 "........" */ + 0x1C,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x0B, /* 000009C0 "........" */ + 0x00,0x00,0x00,0x0A,0x1D,0x12,0x0B,0x04, /* 000009C8 "........" */ + 0x0C,0xFF,0xFF,0x0B,0x00,0x01,0x00,0x0A, /* 000009D0 "........" */ + 0x1E,0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x0B, /* 000009D8 "........" */ + 0x00,0x0A,0x02,0x00,0x0A,0x1F,0x12,0x0C, /* 000009E0 "........" */ + 0x04,0x0C,0xFF,0xFF,0x0B,0x00,0x0A,0x03, /* 000009E8 "........" */ + 0x00,0x0A,0x20,0x12,0x0B,0x04,0x0C,0xFF, /* 000009F0 ".. ....." */ + 0xFF,0x0C,0x00,0x00,0x00,0x0A,0x21,0x12, /* 000009F8 "......!." */ + 0x0B,0x04,0x0C,0xFF,0xFF,0x0C,0x00,0x01, /* 00000A00 "........" */ + 0x00,0x0A,0x22,0x12,0x0C,0x04,0x0C,0xFF, /* 00000A08 ".."....." */ + 0xFF,0x0C,0x00,0x0A,0x02,0x00,0x0A,0x23, /* 00000A10 ".......#" */ + 0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x0C,0x00, /* 00000A18 "........" */ + 0x0A,0x03,0x00,0x0A,0x24,0x12,0x0B,0x04, /* 00000A20 "....$..." */ + 0x0C,0xFF,0xFF,0x0D,0x00,0x00,0x00,0x0A, /* 00000A28 "........" */ + 0x25,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x0D, /* 00000A30 "%......." */ + 0x00,0x01,0x00,0x0A,0x26,0x12,0x0C,0x04, /* 00000A38 "....&..." */ + 0x0C,0xFF,0xFF,0x0D,0x00,0x0A,0x02,0x00, /* 00000A40 "........" */ + 0x0A,0x27,0x12,0x0C,0x04,0x0C,0xFF,0xFF, /* 00000A48 ".'......" */ + 0x0D,0x00,0x0A,0x03,0x00,0x0A,0x28,0x12, /* 00000A50 "......(." */ + 0x0B,0x04,0x0C,0xFF,0xFF,0x0E,0x00,0x00, /* 00000A58 "........" */ + 0x00,0x0A,0x29,0x12,0x0B,0x04,0x0C,0xFF, /* 00000A60 "..)....." */ + 0xFF,0x0E,0x00,0x01,0x00,0x0A,0x2A,0x12, /* 00000A68 "......*." */ + 0x0C,0x04,0x0C,0xFF,0xFF,0x0E,0x00,0x0A, /* 00000A70 "........" */ + 0x02,0x00,0x0A,0x2B,0x12,0x0C,0x04,0x0C, /* 00000A78 "...+...." */ + 0xFF,0xFF,0x0E,0x00,0x0A,0x03,0x00,0x0A, /* 00000A80 "........" */ + 0x2C,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x0F, /* 00000A88 ",......." */ + 0x00,0x00,0x00,0x0A,0x2D,0x12,0x0B,0x04, /* 00000A90 "....-..." */ + 0x0C,0xFF,0xFF,0x0F,0x00,0x01,0x00,0x0A, /* 00000A98 "........" */ + 0x2E,0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x0F, /* 00000AA0 "........" */ + 0x00,0x0A,0x02,0x00,0x0A,0x2F,0x12,0x0C, /* 00000AA8 "...../.." */ + 0x04,0x0C,0xFF,0xFF,0x0F,0x00,0x0A,0x03, /* 00000AB0 "........" */ + 0x00,0x0A,0x10,0x5B,0x82,0x4C,0x31,0x49, /* 00000AB8 "...[.L1I" */ + 0x53,0x41,0x5F,0x08,0x5F,0x41,0x44,0x52, /* 00000AC0 "SA_._ADR" */ + 0x0C,0x00,0x00,0x01,0x00,0x5B,0x80,0x50, /* 00000AC8 ".....[.P" */ + 0x49,0x52,0x51,0x02,0x0A,0x60,0x0A,0x04, /* 00000AD0 "IRQ..`.." */ + 0x10,0x2E,0x5C,0x00,0x5B,0x81,0x29,0x5C, /* 00000AD8 "..\.[.)\" */ + 0x2F,0x04,0x5F,0x53,0x42,0x5F,0x50,0x43, /* 00000AE0 "/._SB_PC" */ + 0x49,0x30,0x49,0x53,0x41,0x5F,0x50,0x49, /* 00000AE8 "I0ISA_PI" */ + 0x52,0x51,0x01,0x50,0x49,0x52,0x41,0x08, /* 00000AF0 "RQ.PIRA." */ + 0x50,0x49,0x52,0x42,0x08,0x50,0x49,0x52, /* 00000AF8 "PIRB.PIR" */ + 0x43,0x08,0x50,0x49,0x52,0x44,0x08,0x5B, /* 00000B00 "C.PIRD.[" */ + 0x82,0x46,0x0B,0x53,0x59,0x53,0x52,0x08, /* 00000B08 ".F.SYSR." */ + 0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,0x0C, /* 00000B10 "_HID.A.." */ + 0x02,0x08,0x5F,0x55,0x49,0x44,0x01,0x08, /* 00000B18 ".._UID.." */ + 0x43,0x52,0x53,0x5F,0x11,0x4E,0x08,0x0A, /* 00000B20 "CRS_.N.." */ + 0x8A,0x47,0x01,0x10,0x00,0x10,0x00,0x00, /* 00000B28 ".G......" */ + 0x10,0x47,0x01,0x22,0x00,0x22,0x00,0x00, /* 00000B30 ".G.".".." */ + 0x0C,0x47,0x01,0x30,0x00,0x30,0x00,0x00, /* 00000B38 ".G.0.0.." */ + 0x10,0x47,0x01,0x44,0x00,0x44,0x00,0x00, /* 00000B40 ".G.D.D.." */ + 0x1C,0x47,0x01,0x62,0x00,0x62,0x00,0x00, /* 00000B48 ".G.b.b.." */ + 0x02,0x47,0x01,0x65,0x00,0x65,0x00,0x00, /* 00000B50 ".G.e.e.." */ + 0x0B,0x47,0x01,0x72,0x00,0x72,0x00,0x00, /* 00000B58 ".G.r.r.." */ + 0x0E,0x47,0x01,0x80,0x00,0x80,0x00,0x00, /* 00000B60 ".G......" */ + 0x01,0x47,0x01,0x84,0x00,0x84,0x00,0x00, /* 00000B68 ".G......" */ + 0x03,0x47,0x01,0x88,0x00,0x88,0x00,0x00, /* 00000B70 ".G......" */ + 0x01,0x47,0x01,0x8C,0x00,0x8C,0x00,0x00, /* 00000B78 ".G......" */ + 0x03,0x47,0x01,0x90,0x00,0x90,0x00,0x00, /* 00000B80 ".G......" */ + 0x10,0x47,0x01,0xA2,0x00,0xA2,0x00,0x00, /* 00000B88 ".G......" */ + 0x1C,0x47,0x01,0xE0,0x00,0xE0,0x00,0x00, /* 00000B90 ".G......" */ + 0x10,0x47,0x01,0xA0,0x08,0xA0,0x08,0x00, /* 00000B98 ".G......" */ + 0x04,0x47,0x01,0xC0,0x0C,0xC0,0x0C,0x00, /* 00000BA0 ".G......" */ + 0x10,0x47,0x01,0xD0,0x04,0xD0,0x04,0x00, /* 00000BA8 ".G......" */ + 0x02,0x79,0x00,0x14,0x0B,0x5F,0x43,0x52, /* 00000BB0 ".y..._CR" */ + 0x53,0x00,0xA4,0x43,0x52,0x53,0x5F,0x5B, /* 00000BB8 "S..CRS_[" */ + 0x82,0x2B,0x50,0x49,0x43,0x5F,0x08,0x5F, /* 00000BC0 ".+PIC_._" */ + 0x48,0x49,0x44,0x0B,0x41,0xD0,0x08,0x5F, /* 00000BC8 "HID.A.._" */ + 0x43,0x52,0x53,0x11,0x18,0x0A,0x15,0x47, /* 00000BD0 "CRS....G" */ + 0x01,0x20,0x00,0x20,0x00,0x01,0x02,0x47, /* 00000BD8 ". . ...G" */ + 0x01,0xA0,0x00,0xA0,0x00,0x01,0x02,0x22, /* 00000BE0 "......."" */ + 0x04,0x00,0x79,0x00,0x5B,0x82,0x47,0x05, /* 00000BE8 "..y.[.G." */ + 0x44,0x4D,0x41,0x30,0x08,0x5F,0x48,0x49, /* 00000BF0 "DMA0._HI" */ + 0x44,0x0C,0x41,0xD0,0x02,0x00,0x08,0x5F, /* 00000BF8 "D.A...._" */ + 0x43,0x52,0x53,0x11,0x41,0x04,0x0A,0x3D, /* 00000C00 "CRS.A..=" */ + 0x2A,0x10,0x04,0x47,0x01,0x00,0x00,0x00, /* 00000C08 "*..G...." */ + 0x00,0x00,0x10,0x47,0x01,0x81,0x00,0x81, /* 00000C10 "...G...." */ + 0x00,0x00,0x03,0x47,0x01,0x87,0x00,0x87, /* 00000C18 "...G...." */ + 0x00,0x00,0x01,0x47,0x01,0x89,0x00,0x89, /* 00000C20 "...G...." */ + 0x00,0x00,0x03,0x47,0x01,0x8F,0x00,0x8F, /* 00000C28 "...G...." */ + 0x00,0x00,0x01,0x47,0x01,0xC0,0x00,0xC0, /* 00000C30 "...G...." */ + 0x00,0x00,0x20,0x47,0x01,0x80,0x04,0x80, /* 00000C38 ".. G...." */ + 0x04,0x00,0x10,0x79,0x00,0x5B,0x82,0x25, /* 00000C40 "...y.[.%" */ + 0x54,0x4D,0x52,0x5F,0x08,0x5F,0x48,0x49, /* 00000C48 "TMR_._HI" */ + 0x44,0x0C,0x41,0xD0,0x01,0x00,0x08,0x5F, /* 00000C50 "D.A...._" */ + 0x43,0x52,0x53,0x11,0x10,0x0A,0x0D,0x47, /* 00000C58 "CRS....G" */ + 0x01,0x40,0x00,0x40,0x00,0x00,0x04,0x22, /* 00000C60 ".@.@..."" */ + 0x01,0x00,0x79,0x00,0x5B,0x82,0x25,0x52, /* 00000C68 "..y.[.%R" */ + 0x54,0x43,0x5F,0x08,0x5F,0x48,0x49,0x44, /* 00000C70 "TC_._HID" */ + 0x0C,0x41,0xD0,0x0B,0x00,0x08,0x5F,0x43, /* 00000C78 ".A...._C" */ + 0x52,0x53,0x11,0x10,0x0A,0x0D,0x47,0x01, /* 00000C80 "RS....G." */ + 0x70,0x00,0x70,0x00,0x00,0x02,0x22,0x00, /* 00000C88 "p.p..."." */ + 0x01,0x79,0x00,0x5B,0x82,0x22,0x53,0x50, /* 00000C90 ".y.[."SP" */ + 0x4B,0x52,0x08,0x5F,0x48,0x49,0x44,0x0C, /* 00000C98 "KR._HID." */ + 0x41,0xD0,0x08,0x00,0x08,0x5F,0x43,0x52, /* 00000CA0 "A...._CR" */ + 0x53,0x11,0x0D,0x0A,0x0A,0x47,0x01,0x61, /* 00000CA8 "S....G.a" */ + 0x00,0x61,0x00,0x00,0x01,0x79,0x00,0x5B, /* 00000CB0 ".a...y.[" */ + 0x82,0x31,0x50,0x53,0x32,0x4D,0x08,0x5F, /* 00000CB8 ".1PS2M._" */ + 0x48,0x49,0x44,0x0C,0x41,0xD0,0x0F,0x13, /* 00000CC0 "HID.A..." */ + 0x08,0x5F,0x43,0x49,0x44,0x0C,0x41,0xD0, /* 00000CC8 "._CID.A." */ + 0x0F,0x13,0x14,0x09,0x5F,0x53,0x54,0x41, /* 00000CD0 "...._STA" */ + 0x00,0xA4,0x0A,0x0F,0x08,0x5F,0x43,0x52, /* 00000CD8 "....._CR" */ + 0x53,0x11,0x08,0x0A,0x05,0x22,0x00,0x10, /* 00000CE0 "S....".." */ + 0x79,0x00,0x5B,0x82,0x42,0x04,0x50,0x53, /* 00000CE8 "y.[.B.PS" */ + 0x32,0x4B,0x08,0x5F,0x48,0x49,0x44,0x0C, /* 00000CF0 "2K._HID." */ + 0x41,0xD0,0x03,0x03,0x08,0x5F,0x43,0x49, /* 00000CF8 "A...._CI" */ + 0x44,0x0C,0x41,0xD0,0x03,0x0B,0x14,0x09, /* 00000D00 "D.A....." */ + 0x5F,0x53,0x54,0x41,0x00,0xA4,0x0A,0x0F, /* 00000D08 "_STA...." */ + 0x08,0x5F,0x43,0x52,0x53,0x11,0x18,0x0A, /* 00000D10 "._CRS..." */ + 0x15,0x47,0x01,0x60,0x00,0x60,0x00,0x00, /* 00000D18 ".G.`.`.." */ + 0x01,0x47,0x01,0x64,0x00,0x64,0x00,0x00, /* 00000D20 ".G.d.d.." */ + 0x01,0x22,0x02,0x00,0x79,0x00,0x5B,0x82, /* 00000D28 "."..y.[." */ + 0x3A,0x46,0x44,0x43,0x30,0x08,0x5F,0x48, /* 00000D30 ":FDC0._H" */ + 0x49,0x44,0x0C,0x41,0xD0,0x07,0x00,0x14, /* 00000D38 "ID.A...." */ + 0x09,0x5F,0x53,0x54,0x41,0x00,0xA4,0x0A, /* 00000D40 "._STA..." */ + 0x0F,0x08,0x5F,0x43,0x52,0x53,0x11,0x1B, /* 00000D48 ".._CRS.." */ + 0x0A,0x18,0x47,0x01,0xF0,0x03,0xF0,0x03, /* 00000D50 "..G....." */ + 0x01,0x06,0x47,0x01,0xF7,0x03,0xF7,0x03, /* 00000D58 "..G....." */ + 0x01,0x01,0x22,0x40,0x00,0x2A,0x04,0x00, /* 00000D60 ".."@.*.." */ + 0x79,0x00,0x5B,0x82,0x35,0x55,0x41,0x52, /* 00000D68 "y.[.5UAR" */ + 0x31,0x08,0x5F,0x48,0x49,0x44,0x0C,0x41, /* 00000D70 "1._HID.A" */ + 0xD0,0x05,0x01,0x08,0x5F,0x55,0x49,0x44, /* 00000D78 "...._UID" */ + 0x01,0x14,0x09,0x5F,0x53,0x54,0x41,0x00, /* 00000D80 "..._STA." */ + 0xA4,0x0A,0x0F,0x08,0x5F,0x43,0x52,0x53, /* 00000D88 "...._CRS" */ + 0x11,0x10,0x0A,0x0D,0x47,0x01,0xF8,0x03, /* 00000D90 "....G..." */ + 0xF8,0x03,0x01,0x08,0x22,0x10,0x00,0x79, /* 00000D98 "...."..y" */ + 0x00,0x5B,0x82,0x36,0x4C,0x54,0x50,0x31, /* 00000DA0 ".[.6LTP1" */ + 0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0, /* 00000DA8 "._HID.A." */ + 0x04,0x00,0x08,0x5F,0x55,0x49,0x44,0x0A, /* 00000DB0 "..._UID." */ + 0x02,0x14,0x09,0x5F,0x53,0x54,0x41,0x00, /* 00000DB8 "..._STA." */ + 0xA4,0x0A,0x0F,0x08,0x5F,0x43,0x52,0x53, /* 00000DC0 "...._CRS" */ + 0x11,0x10,0x0A,0x0D,0x47,0x01,0x78,0x03, /* 00000DC8 "....G.x." */ + 0x78,0x03,0x08,0x08,0x22,0x80,0x00,0x79, /* 00000DD0 "x..."..y" */ + 0x00, }; int DsdtLen=sizeof(AmlCode); diff --git a/tools/firmware/hvmloader/acpi/ssdt_tpm.asl b/tools/firmware/hvmloader/acpi/ssdt_tpm.asl index 98010a7f14..49a5e1bc9f 100644 --- a/tools/firmware/hvmloader/acpi/ssdt_tpm.asl +++ b/tools/firmware/hvmloader/acpi/ssdt_tpm.asl @@ -17,7 +17,7 @@ //* SSDT for TPM TIS Interface for Xen with Qemu device model -DefinitionBlock ("SSDT_TPM.aml", "SSDT", 1, "IBM","xen", 2006) +DefinitionBlock ("SSDT_TPM.aml", "SSDT", 2, "Xen", "HVM", 0) { Device (TPM) { Name (_HID, EisaId ("PNP0C31")) @@ -26,4 +26,4 @@ DefinitionBlock ("SSDT_TPM.aml", "SSDT", 1, "IBM","xen", 2006) Memory32Fixed (ReadWrite, 0xFED40000, 0x5000,) }) } -}
\ No newline at end of file +} diff --git a/tools/firmware/hvmloader/acpi/ssdt_tpm.h b/tools/firmware/hvmloader/acpi/ssdt_tpm.h index 9d943a3a36..2585314ceb 100644 --- a/tools/firmware/hvmloader/acpi/ssdt_tpm.h +++ b/tools/firmware/hvmloader/acpi/ssdt_tpm.h @@ -1,11 +1,11 @@ /* * * Intel ACPI Component Architecture - * ASL Optimizing Compiler version 20060707 [Sep 11 2006] + * ASL Optimizing Compiler version 20060707 [Dec 30 2006] * Copyright (C) 2000 - 2006 Intel Corporation * Supports ACPI Specification Revision 3.0a * - * Compilation of "acpi_ssdt_tpm.asl" - Mon Oct 30 11:28:27 2006 + * Compilation of "ssdt_tpm.asl" - Sat Dec 30 15:31:27 2006 * * C source code output * @@ -13,9 +13,9 @@ unsigned char AmlCode_TPM[] = { 0x53,0x53,0x44,0x54,0x4C,0x00,0x00,0x00, /* 00000000 "SSDTL..." */ - 0x01,0x6D,0x49,0x42,0x4D,0x00,0x00,0x00, /* 00000008 ".mIBM..." */ - 0x78,0x65,0x6E,0x00,0x00,0x00,0x00,0x00, /* 00000010 "xen....." */ - 0xD6,0x07,0x00,0x00,0x49,0x4E,0x54,0x4C, /* 00000018 "....INTL" */ + 0x02,0x56,0x58,0x65,0x6E,0x00,0x00,0x00, /* 00000008 ".VXen..." */ + 0x48,0x56,0x4D,0x00,0x00,0x00,0x00,0x00, /* 00000010 "HVM....." */ + 0x00,0x00,0x00,0x00,0x49,0x4E,0x54,0x4C, /* 00000018 "....INTL" */ 0x07,0x07,0x06,0x20,0x5B,0x82,0x26,0x54, /* 00000020 "... [.&T" */ 0x50,0x4D,0x5F,0x08,0x5F,0x48,0x49,0x44, /* 00000028 "PM_._HID" */ 0x0C,0x41,0xD0,0x0C,0x31,0x08,0x5F,0x43, /* 00000030 ".A..1._C" */ diff --git a/tools/firmware/hvmloader/acpi/static_tables.c b/tools/firmware/hvmloader/acpi/static_tables.c index 0890700f7a..96d7f0c331 100644 --- a/tools/firmware/hvmloader/acpi/static_tables.c +++ b/tools/firmware/hvmloader/acpi/static_tables.c @@ -118,7 +118,7 @@ struct acpi_20_xsdt Xsdt = { .signature = ACPI_2_0_XSDT_SIGNATURE, .length = sizeof(struct acpi_header), .revision = ACPI_2_0_XSDT_REVISION, - .oem_id = ACPI_OEM_ID, + .oem_id = ACPI_OEM_ID, .oem_table_id = ACPI_OEM_TABLE_ID, .oem_revision = ACPI_OEM_REVISION, .creator_id = ACPI_CREATOR_ID, @@ -130,7 +130,7 @@ struct acpi_20_xsdt Xsdt = { struct acpi_20_rsdp Rsdp = { .signature = ACPI_2_0_RSDP_SIGNATURE, .oem_id = ACPI_OEM_ID, - .revision = ACPI_OEM_REVISION, + .revision = ACPI_2_0_RSDP_REVISION, .length = sizeof(struct acpi_20_rsdp) }; diff --git a/tools/firmware/hvmloader/config.h b/tools/firmware/hvmloader/config.h index 18d906c4eb..2b21c74539 100644 --- a/tools/firmware/hvmloader/config.h +++ b/tools/firmware/hvmloader/config.h @@ -2,10 +2,11 @@ #define __HVMLOADER_CONFIG_H__ #define IOAPIC_BASE_ADDRESS 0xfec00000 -#define IOAPIC_ID 0x00 +#define IOAPIC_ID 0x01 #define IOAPIC_VERSION 0x11 #define LAPIC_BASE_ADDRESS 0xfee00000 +#define LAPIC_ID(vcpu_id) ((vcpu_id) * 2) #define PCI_ISA_DEVFN 0x08 /* dev 1, fn 0 */ #define PCI_ISA_IRQ_MASK 0x0c60U /* ISA IRQs 5,6,10,11 are PCI connected */ diff --git a/tools/firmware/hvmloader/hvmloader.c b/tools/firmware/hvmloader/hvmloader.c index b7729ed85d..5e06273043 100644 --- a/tools/firmware/hvmloader/hvmloader.c +++ b/tools/firmware/hvmloader/hvmloader.c @@ -34,6 +34,7 @@ /* memory map */ #define HYPERCALL_PHYSICAL_ADDRESS 0x00080000 #define VGABIOS_PHYSICAL_ADDRESS 0x000C0000 +#define ETHERBOOT_PHYSICAL_ADDRESS 0x000C8000 #define VMXASSIST_PHYSICAL_ADDRESS 0x000D0000 #define ROMBIOS_PHYSICAL_ADDRESS 0x000F0000 @@ -279,6 +280,27 @@ static void pci_setup(void) } } +static +int must_load_nic(void) +{ + /* If the network card is in the boot order, load the Etherboot + * option ROM. Read the boot order bytes from CMOS and check + * if any of them are 0x4. */ + uint8_t boot_order; + + /* Read CMOS register 0x3d (boot choices 0 and 1) */ + outb(0x70, 0x3d); + boot_order = inb(0x71); + if ( (boot_order & 0xf) == 0x4 || (boot_order & 0xf0) == 0x40 ) + return 1; + /* Read CMOS register 0x38 (boot choice 2 and FDD test flag) */ + outb(0x70, 0x38); + boot_order = inb(0x71); + if ( (boot_order & 0xf0) == 0x40 ) + return 1; + return 0; +} + int main(void) { int acpi_sz; @@ -312,6 +334,13 @@ int main(void) vgabios_stdvga, sizeof(vgabios_stdvga)); } + if ( must_load_nic() ) + { + printf("Loading ETHERBOOT ...\n"); + memcpy((void *)ETHERBOOT_PHYSICAL_ADDRESS, + etherboot, sizeof(etherboot)); + } + if ( get_acpi_enabled() != 0 ) { printf("Loading ACPI ...\n"); diff --git a/tools/firmware/hvmloader/mp_tables.c b/tools/firmware/hvmloader/mp_tables.c index f763d77f6b..3b0edb3c9b 100644 --- a/tools/firmware/hvmloader/mp_tables.c +++ b/tools/firmware/hvmloader/mp_tables.c @@ -222,7 +222,7 @@ void fill_mp_config_table(struct mp_config_table *mpct, int length) void fill_mp_proc_entry(struct mp_proc_entry *mppe, int vcpu_id) { mppe->type = ENTRY_TYPE_PROCESSOR; - mppe->lapic_id = vcpu_id + 1; + mppe->lapic_id = LAPIC_ID(vcpu_id); mppe->lapic_version = 0x11; mppe->cpu_flags = CPU_FLAG_ENABLED; if ( vcpu_id == 0 ) @@ -373,7 +373,7 @@ void create_mp_tables(void) { if ( i == 2 ) continue; /* skip the slave PIC connection */ fill_mp_io_intr_entry((struct mp_io_intr_entry *)p, - BUS_ID_ISA, i, IOAPIC_ID, i); + BUS_ID_ISA, i, IOAPIC_ID, (i == 0) ? 2 : i); p += sizeof(struct mp_io_intr_entry); } diff --git a/tools/firmware/rombios/rombios.c b/tools/firmware/rombios/rombios.c index 7a14b46ac4..34f7d78062 100644 --- a/tools/firmware/rombios/rombios.c +++ b/tools/firmware/rombios/rombios.c @@ -278,7 +278,6 @@ typedef unsigned short Bit16u; typedef unsigned short bx_bool; typedef unsigned long Bit32u; -#if BX_USE_ATADRV void memsetb(seg,offset,value,count); void memcpyb(dseg,doffset,sseg,soffset,count); @@ -418,7 +417,6 @@ typedef unsigned long Bit32u; ASM_END } #endif -#endif //BX_USE_ATADRV // read_dword and write_dword functions static Bit32u read_dword(); @@ -728,6 +726,8 @@ typedef struct { // The EBDA structure should conform to // http://www.cybertrails.com/~fys/rombios.htm document // I made the ata and cdemu structs begin at 0x121 in the EBDA seg + // EBDA must be at most 768 bytes; it lives at 0x9fc00, and the boot + // device tables are at 0x9ff00 -- 0x9ffff typedef struct { unsigned char filler1[0x3D]; @@ -885,7 +885,7 @@ static void int14_function(); static void int15_function(); static void int16_function(); static void int17_function(); -static Bit32u int19_function(); +static void int19_function(); static void int1a_function(); static void int70_function(); static void int74_function(); @@ -1435,10 +1435,17 @@ void copy_e820_table() { Bit8u nr_entries = read_byte(0x9000, 0x1e8); + Bit32u base_mem; if (nr_entries > 32) nr_entries = 32; write_word(0xe000, 0x8, nr_entries); memcpyb(0xe000, 0x10, 0x9000, 0x2d0, nr_entries * 0x14); + /* Report the proper base memory size at address 0x0413: otherwise + * non-e820 code will clobber things if BASE_MEM_IN_K is bigger than + * the first e820 entry. Get the size by reading the second 64bit + * field of the first e820 slot. */ + base_mem = read_dword(0x9000, 0x2d0 + 8); + write_word(0x40, 0x13, base_mem >> 10); } #endif /* HVMASSIST */ @@ -1847,28 +1854,100 @@ print_bios_banner() printf("\n"); } + +//-------------------------------------------------------------------------- +// BIOS Boot Specification 1.0.1 compatibility +// +// Very basic support for the BIOS Boot Specification, which allows expansion +// ROMs to register themselves as boot devices, instead of just stealing the +// INT 19h boot vector. +// +// This is a hack: to do it properly requires a proper PnP BIOS and we aren't +// one; we just lie to the option ROMs to make them behave correctly. +// We also don't support letting option ROMs register as bootable disk +// drives (BCVs), only as bootable devices (BEVs). +// +// http://www.phoenix.com/en/Customer+Services/White+Papers-Specs/pc+industry+specifications.htm +//-------------------------------------------------------------------------- + +/* 256 bytes at 0x9ff00 -- 0x9ffff is used for the IPL boot table. */ +#define IPL_SEG 0x9ff0 +#define IPL_TABLE_OFFSET 0x0000 +#define IPL_TABLE_ENTRIES 8 +#define IPL_COUNT_OFFSET 0x0080 /* u16: number of valid table entries */ +#define IPL_SEQUENCE_OFFSET 0x0082 /* u16: next boot device */ + +struct ipl_entry { + Bit16u type; + Bit16u flags; + Bit32u vector; + Bit32u description; + Bit32u reserved; +}; + +static void +init_boot_vectors() +{ + struct ipl_entry e; + Bit16u count = 0; + Bit16u ss = get_SS(); + + /* Clear out the IPL table. */ + memsetb(IPL_SEG, IPL_TABLE_OFFSET, 0, 0xff); + + /* Floppy drive */ + e.type = 1; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0; + memcpyb(IPL_SEG, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e)); + count++; + + /* First HDD */ + e.type = 2; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0; + memcpyb(IPL_SEG, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e)); + count++; + +#if BX_ELTORITO_BOOT + /* CDROM */ + e.type = 3; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0; + memcpyb(IPL_SEG, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e)); + count++; +#endif + + /* Remember how many devices we have */ + write_word(IPL_SEG, IPL_COUNT_OFFSET, count); + /* Not tried booting anything yet */ + write_word(IPL_SEG, IPL_SEQUENCE_OFFSET, 0xffff); +} + +static Bit8u +get_boot_vector(i, e) +Bit16u i; struct ipl_entry *e; +{ + Bit16u count; + Bit16u ss = get_SS(); + /* Get the count of boot devices, and refuse to overrun the array */ + count = read_word(IPL_SEG, IPL_COUNT_OFFSET); + if (i >= count) return 0; + /* OK to read this device */ + memcpyb(ss, e, IPL_SEG, IPL_TABLE_OFFSET + i * sizeof (*e), sizeof (*e)); + return 1; +} + + //-------------------------------------------------------------------------- // print_boot_device // displays the boot device //-------------------------------------------------------------------------- -static char drivetypes[][10]={"Floppy","Hard Disk","CD-Rom"}; +static char drivetypes[][10]={"", "Floppy","Hard Disk","CD-Rom", "Network"}; void -print_boot_device(cdboot, drive) - Bit8u cdboot; Bit16u drive; +print_boot_device(type) + Bit16u type; { - Bit8u i; - - // cdboot contains 0 if floppy/harddisk, 1 otherwise - // drive contains real/emulated boot drive - - if(cdboot)i=2; // CD-Rom - else if((drive&0x0080)==0x00)i=0; // Floppy - else if((drive&0x0080)==0x80)i=1; // Hard drive - else return; - - printf("Booting from %s...\n",drivetypes[i]); + /* NIC appears as type 0x80 */ + if (type == 0x80 ) type = 0x4; + if (type == 0 || type > 0x4) BX_PANIC("Bad drive type\n"); + printf("Booting from %s...\n", drivetypes[type]); } //-------------------------------------------------------------------------- @@ -1876,29 +1955,20 @@ print_boot_device(cdboot, drive) // displays the reason why boot failed //-------------------------------------------------------------------------- void -print_boot_failure(cdboot, drive, reason, lastdrive) - Bit8u cdboot; Bit8u drive; Bit8u lastdrive; +print_boot_failure(type, reason) + Bit16u type; Bit8u reason; { - Bit16u drivenum = drive&0x7f; + if (type == 0 || type > 0x3) BX_PANIC("Bad drive type\n"); - // cdboot: 1 if boot from cd, 0 otherwise - // drive : drive number - // reason: 0 signature check failed, 1 read error - // lastdrive: 1 boot drive is the last one in boot sequence - - if (cdboot) - bios_printf(BIOS_PRINTF_INFO | BIOS_PRINTF_SCREEN, "Boot from %s failed\n",drivetypes[2]); - else if (drive & 0x80) - bios_printf(BIOS_PRINTF_INFO | BIOS_PRINTF_SCREEN, "Boot from %s %d failed\n", drivetypes[1],drivenum); + printf("Boot from %s failed", drivetypes[type]); + if (type < 4) { + /* Report the reason too */ + if (reason==0) + printf(": not a bootable disk"); else - bios_printf(BIOS_PRINTF_INFO | BIOS_PRINTF_SCREEN, "Boot from %s %d failed\n", drivetypes[0],drivenum); - - if (lastdrive==1) { - if (reason==0) - BX_PANIC("Not a bootable disk\n"); - else - BX_PANIC("Could not read the boot disk\n"); + printf(": could not read the boot disk"); } + printf("\n"); } //-------------------------------------------------------------------------- @@ -7546,19 +7616,19 @@ int17_function(regs, ds, iret_addr) } } -// returns bootsegment in ax, drive in bl - Bit32u -int19_function(bseqnr) -Bit8u bseqnr; +void +int19_function(seq_nr) +Bit16u seq_nr; { Bit16u ebda_seg=read_word(0x0040,0x000E); - Bit16u bootseq; + Bit16u bootdev; Bit8u bootdrv; - Bit8u bootcd; Bit8u bootchk; Bit16u bootseg; + Bit16u bootip; Bit16u status; - Bit8u lastdrive=0; + + struct ipl_entry e; // if BX_ELTORITO_BOOT is not defined, old behavior // check bit 5 in CMOS reg 0x2d. load either 0x00 or 0x80 into DL @@ -7575,62 +7645,54 @@ Bit8u bseqnr; // 0x01 : first floppy // 0x02 : first harddrive // 0x03 : first cdrom + // 0x04 - 0x0f : PnP expansion ROMs (e.g. Etherboot) // else : boot failure // Get the boot sequence #if BX_ELTORITO_BOOT - bootseq=inb_cmos(0x3d); - bootseq|=((inb_cmos(0x38) & 0xf0) << 4); - - if (bseqnr==2) bootseq >>= 4; - if (bseqnr==3) bootseq >>= 8; - if (bootseq<0x10) lastdrive = 1; - bootdrv=0x00; bootcd=0; - switch(bootseq & 0x0f) { - case 0x01: bootdrv=0x00; bootcd=0; break; - case 0x02: bootdrv=0x80; bootcd=0; break; - case 0x03: bootdrv=0x00; bootcd=1; break; - default: return 0x00000000; - } -#else - bootseq=inb_cmos(0x2d); + bootdev = inb_cmos(0x3d); + bootdev |= ((inb_cmos(0x38) & 0xf0) << 4); + bootdev >>= 4 * seq_nr; + bootdev &= 0xf; + if (bootdev == 0) BX_PANIC("No bootable device.\n"); + + /* Translate from CMOS runes to an IPL table offset by subtracting 1 */ + bootdev -= 1; +#else + if (seq_nr ==2) BX_PANIC("No more boot devices."); + if (!!(inb_cmos(0x2d) & 0x20) ^ (seq_nr == 1)) + /* Boot from floppy if the bit is set or it's the second boot */ + bootdev = 0x00; + else + bootdev = 0x01; +#endif - if (bseqnr==2) { - bootseq ^= 0x20; - lastdrive = 1; + /* Read the boot device from the IPL table */ + if (get_boot_vector(bootdev, &e) == 0) { + BX_INFO("Invalid boot device (0x%x)\n", bootdev); + return; } - bootdrv=0x00; bootcd=0; - if((bootseq&0x20)==0) bootdrv=0x80; -#endif // BX_ELTORITO_BOOT -#if BX_ELTORITO_BOOT - // We have to boot from cd - if (bootcd != 0) { - status = cdrom_boot(); + /* Do the loading, and set up vector as a far pointer to the boot + * address, and bootdrv as the boot drive */ + print_boot_device(e.type); - // If failure - if ( (status & 0x00ff) !=0 ) { - print_cdromboot_failure(status); - print_boot_failure(bootcd, bootdrv, 1, lastdrive); - return 0x00000000; - } + switch(e.type) { + case 0x01: /* FDD */ + case 0x02: /* HDD */ - bootseg = read_word(ebda_seg,&EbdaData->cdemu.load_segment); - bootdrv = (Bit8u)(status>>8); - } - -#endif // BX_ELTORITO_BOOT - - // We have to boot from harddisk or floppy - if (bootcd == 0) { - bootseg=0x07c0; + bootdrv = (e.type == 0x02) ? 0x80 : 0x00; + bootseg = 0x07c0; + status = 0; ASM_START push bp mov bp, sp + push ax + push bx + push cx + push dx - mov ax, #0x0000 - mov _int19_function.status + 2[bp], ax mov dl, _int19_function.bootdrv + 2[bp] mov ax, _int19_function.bootseg + 2[bp] mov es, ax ;; segment @@ -7646,43 +7708,83 @@ ASM_START mov _int19_function.status + 2[bp], ax int19_load_done: + pop dx + pop cx + pop bx + pop ax pop bp ASM_END if (status != 0) { - print_boot_failure(bootcd, bootdrv, 1, lastdrive); - return 0x00000000; + print_boot_failure(e.type, 1); + return; + } + + /* Always check the signature on a HDD boot sector; on FDD, only do + * the check if the CMOS doesn't tell us to skip it */ + if (e.type != 0x00 || !((inb_cmos(0x38) & 0x01))) { + if (read_word(bootseg,0x1fe) != 0xaa55) { + print_boot_failure(e.type, 0); + return; } } - // check signature if instructed by cmos reg 0x38, only for floppy - // bootchk = 1 : signature check disabled - // bootchk = 0 : signature check enabled - if (bootdrv != 0) bootchk = 0; - else bootchk = inb_cmos(0x38) & 0x01; + /* Canonicalize bootseg:bootip */ + bootip = (bootseg & 0x0fff) << 4; + bootseg &= 0xf000; + break; #if BX_ELTORITO_BOOT - // if boot from cd, no signature check - if (bootcd != 0) - bootchk = 1; -#endif // BX_ELTORITO_BOOT + case 0x03: /* CD-ROM */ + status = cdrom_boot(); - if (bootchk == 0) { - if (read_word(bootseg,0x1fe) != 0xaa55) { - print_boot_failure(bootcd, bootdrv, 0, lastdrive); - return 0x00000000; - } + // If failure + if ( (status & 0x00ff) !=0 ) { + print_cdromboot_failure(status); + print_boot_failure(e.type, 1); + return; } - -#if BX_ELTORITO_BOOT - // Print out the boot string - print_boot_device(bootcd, bootdrv); -#else // BX_ELTORITO_BOOT - print_boot_device(0, bootdrv); -#endif // BX_ELTORITO_BOOT - // return the boot segment - return (((Bit32u)bootdrv) << 16) + bootseg; + bootdrv = (Bit8u)(status>>8); + bootseg = read_word(ebda_seg,&EbdaData->cdemu.load_segment); + /* Canonicalize bootseg:bootip */ + bootip = (bootseg & 0x0fff) << 4; + bootseg &= 0xf000; + break; +#endif + + case 0x80: /* Expansion ROM with a Bootstrap Entry Vector (a far pointer) */ + bootseg = e.vector >> 16; + bootip = e.vector & 0xffff; + break; + + default: return; + } + + /* Debugging info */ + printf("Booting from %x:%x\n", bootseg, bootip); + + /* Jump to the boot vector */ +ASM_START + mov bp, sp + ;; Build an iret stack frame that will take us to the boot vector. + ;; iret pops ip, then cs, then flags, so push them in the opposite order. + pushf + mov ax, _int19_function.bootseg + 0[bp] + push ax + mov ax, _int19_function.bootip + 0[bp] + push ax + ;; Set the magic number in ax and the boot drive in dl. + mov ax, #0xaa55 + mov dl, _int19_function.bootdrv + 0[bp] + ;; Zero some of the other registers. + xor bx, bx + mov ds, bx + mov es, bx + mov bp, bx + ;; Go! + iret +ASM_END } void @@ -8139,14 +8241,29 @@ int13_out: popa iret - ;---------- ;- INT18h - ;---------- -int18_handler: ;; Boot Failure routing - call _int18_panic_msg - hlt - iret +int18_handler: ;; Boot Failure recovery: try the next device. + + ;; Reset SP and SS + mov ax, #0xfffe + mov sp, ax + xor ax, ax + mov ss, ax + + ;; Get the boot sequence number out of the IPL memory + mov bx, #IPL_SEG + mov ds, bx ;; Set segment + mov bx, IPL_SEQUENCE_OFFSET ;; BX is now the sequence number + inc bx ;; ++ + mov IPL_SEQUENCE_OFFSET, bx ;; Write it back + mov ds, ax ;; and reset the segment to zero. + + ;; Carry on in the INT 19h handler, using the new sequence number + push bx + + jmp int19_next_boot ;---------- ;- INT19h - @@ -8154,62 +8271,32 @@ int18_handler: ;; Boot Failure routing int19_relocated: ;; Boot function, relocated ;; int19 was beginning to be really complex, so now it - ;; just calls an C function, that does the work - ;; it returns in BL the boot drive, and in AX the boot segment - ;; the boot segment will be 0x0000 if something has failed + ;; just calls a C function that does the work push bp mov bp, sp - - ;; drop ds + + ;; Reset SS and SP + mov ax, #0xfffe + mov sp, ax xor ax, ax - mov ds, ax + mov ss, ax + + ;; Start from the first boot device (0, in AX) + mov bx, #IPL_SEG + mov ds, bx ;; Set segment to write to the IPL memory + mov IPL_SEQUENCE_OFFSET, ax ;; Save the sequence number + mov ds, ax ;; and reset the segment. - ;; 1st boot device - mov ax, #0x0001 push ax - call _int19_function - inc sp - inc sp - ;; bl contains the boot drive - ;; ax contains the boot segment or 0 if failure - test ax, ax ;; if ax is 0 try next boot device - jnz boot_setup +int19_next_boot: - ;; 2nd boot device - mov ax, #0x0002 - push ax + ;; Call the C code for the next boot device call _int19_function - inc sp - inc sp - test ax, ax ;; if ax is 0 try next boot device - jnz boot_setup - ;; 3rd boot device - mov ax, #0x0003 - push ax - call _int19_function - inc sp - inc sp - test ax, ax ;; if ax is 0 call int18 - jz int18_handler - -boot_setup: - mov dl, bl ;; set drive so guest os find it - shl eax, #0x04 ;; convert seg to ip - mov 2[bp], ax ;; set ip - - shr eax, #0x04 ;; get cs back - and ax, #0xF000 ;; remove what went in ip - mov 4[bp], ax ;; set cs - xor ax, ax - mov es, ax ;; set es to zero fixes [ 549815 ] - mov [bp], ax ;; set bp to zero - mov ax, #0xaa55 ;; set ok flag - - pop bp - iret ;; Beam me up Scotty + ;; Boot failed: invoke the boot recovery function + int #0x18 ;---------- ;- INT1Ch - @@ -9387,6 +9474,15 @@ checksum_loop: pop ax ret + +;; We need a copy of this string, but we are not actually a PnP BIOS, +;; so make sure it is *not* aligned, so OSes will not see it if they scan. +.align 16 + db 0 +pnp_string: + .ascii "$PnP" + + rom_scan: ;; Scan for existence of valid expansion ROMS. ;; Video ROM: from 0xC0000..0xC7FFF in 2k increments @@ -9421,9 +9517,17 @@ block_count_rounded: xor bx, bx ;; Restore DS back to 0000: mov ds, bx push ax ;; Save AX + push di ;; Save DI ;; Push addr of ROM entry point push cx ;; Push seg push #0x0003 ;; Push offset + + ;; Point ES:DI at "$PnP", which tells the ROM that we are a PnP BIOS. + ;; That should stop it grabbing INT 19h; we will use its BEV instead. + mov ax, #0xf000 + mov es, ax + lea di, pnp_string + mov bp, sp ;; Call ROM init routine using seg:off on stack db 0xff ;; call_far ss:[bp+0] db 0x5e @@ -9431,6 +9535,38 @@ block_count_rounded: cli ;; In case expansion ROM BIOS turns IF on add sp, #2 ;; Pop offset value pop cx ;; Pop seg value (restore CX) + + ;; Look at the ROM's PnP Expansion header. Properly, we're supposed + ;; to init all the ROMs and then go back and build an IPL table of + ;; all the bootable devices, but we can get away with one pass. + mov ds, cx ;; ROM base + mov bx, 0x001a ;; 0x1A is the offset into ROM header that contains... + mov ax, [bx] ;; the offset of PnP expansion header, where... + cmp ax, #0x5024 ;; we look for signature "$PnP" + jne no_bev + mov ax, 2[bx] + cmp ax, #0x506e + jne no_bev + mov ax, 0x1a[bx] ;; 0x1A is also the offset into the expansion header of... + cmp ax, #0x0000 ;; the Bootstrap Entry Vector, or zero if there is none. + je no_bev + + ;; Found a device that thinks it can boot the system. Record its BEV. + mov bx, #IPL_SEG ;; Go to the segment where the IPL table lives + mov ds, bx + mov bx, IPL_COUNT_OFFSET ;; Read the number of entries so far + cmp bx, #IPL_TABLE_ENTRIES + je no_bev ;; Get out if the table is full + shl bx, #0x4 ;; Turn count into offset (entries are 16 bytes) + mov 0[bx], #0x80 ;; This entry is a BEV device + mov 6[bx], cx ;; Build a far pointer from the segment... + mov 4[bx], ax ;; and the offset + shr bx, #0x4 ;; Turn the offset back into a count + inc bx ;; We have one more entry now + mov IPL_COUNT_OFFSET, bx ;; Remember that. + +no_bev: + pop di ;; Restore DI pop ax ;; Restore AX rom_scan_increment: shl ax, #5 ;; convert 512-bytes blocks to 16-byte increments @@ -9764,6 +9900,8 @@ post_default_ints: call smbios_init #endif + call _init_boot_vectors + call rom_scan call _print_bios_banner diff --git a/tools/firmware/vmxassist/vm86.c b/tools/firmware/vmxassist/vm86.c index 7a76ad8b15..78c1da0d88 100644 --- a/tools/firmware/vmxassist/vm86.c +++ b/tools/firmware/vmxassist/vm86.c @@ -297,7 +297,7 @@ getreg32(struct regs *regs, int r) case 1: return regs->ecx; case 2: return regs->edx; case 3: return regs->ebx; - case 4: return regs->esp; + case 4: return regs->uesp; case 5: return regs->ebp; case 6: return regs->esi; case 7: return regs->edi; @@ -319,10 +319,10 @@ getreg8(struct regs *regs, int r) case 1: return regs->ecx & 0xFF; /* cl */ case 2: return regs->edx & 0xFF; /* dl */ case 3: return regs->ebx & 0xFF; /* bl */ - case 4: return (regs->esp >> 8) & 0xFF; /* ah */ - case 5: return (regs->ebp >> 8) & 0xFF; /* ch */ - case 6: return (regs->esi >> 8) & 0xFF; /* dh */ - case 7: return (regs->edi >> 8) & 0xFF; /* bh */ + case 4: return (regs->eax >> 8) & 0xFF; /* ah */ + case 5: return (regs->ecx >> 8) & 0xFF; /* ch */ + case 6: return (regs->edx >> 8) & 0xFF; /* dh */ + case 7: return (regs->ebx >> 8) & 0xFF; /* bh */ } return ~0; } @@ -335,7 +335,7 @@ setreg32(struct regs *regs, int r, unsigned v) case 1: regs->ecx = v; break; case 2: regs->edx = v; break; case 3: regs->ebx = v; break; - case 4: regs->esp = v; break; + case 4: regs->uesp = v; break; case 5: regs->ebp = v; break; case 6: regs->esi = v; break; case 7: regs->edi = v; break; @@ -357,10 +357,10 @@ setreg8(struct regs *regs, int r, unsigned v) case 1: regs->ecx = (regs->ecx & ~0xFF) | v; break; case 2: regs->edx = (regs->edx & ~0xFF) | v; break; case 3: regs->ebx = (regs->ebx & ~0xFF) | v; break; - case 4: regs->esp = (regs->esp & ~0xFF00) | (v << 8); break; - case 5: regs->ebp = (regs->ebp & ~0xFF00) | (v << 8); break; - case 6: regs->esi = (regs->esi & ~0xFF00) | (v << 8); break; - case 7: regs->edi = (regs->edi & ~0xFF00) | (v << 8); break; + case 4: regs->eax = (regs->eax & ~0xFF00) | (v << 8); break; + case 5: regs->ecx = (regs->ecx & ~0xFF00) | (v << 8); break; + case 6: regs->edx = (regs->edx & ~0xFF00) | (v << 8); break; + case 7: regs->ebx = (regs->ebx & ~0xFF00) | (v << 8); break; } } @@ -961,9 +961,9 @@ protected_mode(struct regs *regs) /* initialize jump environment to warp back to protected mode */ regs->uss = DATA_SELECTOR; - regs->uesp = stack_top; + regs->uesp = (unsigned long)stack_top; regs->cs = CODE_SELECTOR; - regs->eip = (unsigned) switch_to_protected_mode; + regs->eip = (unsigned long)switch_to_protected_mode; /* this should get us into 32-bit mode */ } diff --git a/tools/ioemu/hw/pc.c b/tools/ioemu/hw/pc.c index 234c4722bb..1a686515a7 100644 --- a/tools/ioemu/hw/pc.c +++ b/tools/ioemu/hw/pc.c @@ -168,6 +168,8 @@ static int get_bios_disk(char *boot_device, int index) { return 0x02; /* hard drive */ case 'd': return 0x03; /* cdrom */ + case 'n': + return 0x04; /* network */ } } return 0x00; /* no device */ diff --git a/tools/ioemu/hw/vga.c b/tools/ioemu/hw/vga.c index 6b9317f048..a8402aa75f 100644 --- a/tools/ioemu/hw/vga.c +++ b/tools/ioemu/hw/vga.c @@ -2002,7 +2002,10 @@ void vga_common_init(VGAState *s, DisplayState *ds, uint8_t *vga_ram_base, s->vram_shadow = (uint8_t *)((long)(s->vram_shadow + TARGET_PAGE_SIZE - 1) & ~(TARGET_PAGE_SIZE - 1)); - s->vram_ptr = qemu_malloc(vga_ram_size); + /* Video RAM must be 128-bit aligned for SSE optimizations later */ + s->vram_alloc = qemu_malloc(vga_ram_size + 15); + s->vram_ptr = (uint8_t *)((long)(s->vram_alloc + 15) & ~15L); + s->vram_offset = vga_ram_offset; s->vram_size = vga_ram_size; s->ds = ds; @@ -2126,7 +2129,7 @@ void *vga_update_vram(VGAState *s, void *vga_ram_base, int vga_ram_size) } if (!vga_ram_base) { - vga_ram_base = qemu_malloc(vga_ram_size); + vga_ram_base = qemu_malloc(vga_ram_size + TARGET_PAGE_SIZE + 1); if (!vga_ram_base) { fprintf(stderr, "reallocate error\n"); return NULL; @@ -2134,8 +2137,10 @@ void *vga_update_vram(VGAState *s, void *vga_ram_base, int vga_ram_size) } /* XXX lock needed? */ + old_pointer = s->vram_alloc; + s->vram_alloc = vga_ram_base; + vga_ram_base = (uint8_t *)((long)(vga_ram_base + 15) & ~15L); memcpy(vga_ram_base, s->vram_ptr, vga_ram_size); - old_pointer = s->vram_ptr; s->vram_ptr = vga_ram_base; return old_pointer; diff --git a/tools/ioemu/hw/vga_int.h b/tools/ioemu/hw/vga_int.h index 47f1574d11..f5a98a8904 100644 --- a/tools/ioemu/hw/vga_int.h +++ b/tools/ioemu/hw/vga_int.h @@ -78,6 +78,7 @@ #define VGA_MAX_HEIGHT 2048 #define VGA_STATE_COMMON \ + uint8_t *vram_alloc; \ uint8_t *vram_ptr; \ uint8_t *vram_shadow; \ unsigned long vram_offset; \ diff --git a/tools/ioemu/patches/domain-timeoffset b/tools/ioemu/patches/domain-timeoffset index 45e081381d..605fa32ed0 100644 --- a/tools/ioemu/patches/domain-timeoffset +++ b/tools/ioemu/patches/domain-timeoffset @@ -1,7 +1,7 @@ Index: ioemu/hw/mc146818rtc.c =================================================================== ---- ioemu.orig/hw/mc146818rtc.c 2006-12-08 18:36:31.000000000 +0000 -+++ ioemu/hw/mc146818rtc.c 2006-12-08 18:36:36.000000000 +0000 +--- ioemu.orig/hw/mc146818rtc.c 2006-12-20 15:21:33.000000000 +0000 ++++ ioemu/hw/mc146818rtc.c 2006-12-20 15:21:50.000000000 +0000 @@ -178,10 +178,27 @@ } } @@ -46,8 +46,8 @@ Index: ioemu/hw/mc146818rtc.c static void rtc_copy_date(RTCState *s) Index: ioemu/hw/pc.c =================================================================== ---- ioemu.orig/hw/pc.c 2006-12-08 18:36:35.000000000 +0000 -+++ ioemu/hw/pc.c 2006-12-08 18:36:36.000000000 +0000 +--- ioemu.orig/hw/pc.c 2006-12-20 15:21:49.000000000 +0000 ++++ ioemu/hw/pc.c 2006-12-20 15:21:50.000000000 +0000 @@ -159,7 +159,7 @@ } @@ -117,8 +117,8 @@ Index: ioemu/hw/pc.c QEMUMachine pc_machine = { Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-12-08 18:36:35.000000000 +0000 -+++ ioemu/vl.c 2006-12-08 18:36:36.000000000 +0000 +--- ioemu.orig/vl.c 2006-12-20 15:21:49.000000000 +0000 ++++ ioemu/vl.c 2006-12-20 15:21:50.000000000 +0000 @@ -163,6 +163,8 @@ int xc_handle; @@ -162,7 +162,7 @@ Index: ioemu/vl.c } } } -@@ -6492,7 +6500,8 @@ +@@ -6484,7 +6492,8 @@ machine->init(ram_size, vga_ram_size, boot_device, ds, fd_filename, snapshot, @@ -174,8 +174,8 @@ Index: ioemu/vl.c if (usb_enabled) { Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-12-08 18:36:35.000000000 +0000 -+++ ioemu/vl.h 2006-12-08 18:36:36.000000000 +0000 +--- ioemu.orig/vl.h 2006-12-20 15:21:49.000000000 +0000 ++++ ioemu/vl.h 2006-12-20 15:21:50.000000000 +0000 @@ -576,7 +576,7 @@ int boot_device, DisplayState *ds, const char **fd_filename, int snapshot, diff --git a/tools/ioemu/patches/fix-interrupt-routing b/tools/ioemu/patches/fix-interrupt-routing index b5d8285398..2d27d62472 100644 --- a/tools/ioemu/patches/fix-interrupt-routing +++ b/tools/ioemu/patches/fix-interrupt-routing @@ -17,23 +17,21 @@ Signed-off-by: Keir Fraser <keir@xensource.com> Index: ioemu/Makefile.target =================================================================== ---- ioemu.orig/Makefile.target 2006-12-08 18:21:56.000000000 +0000 -+++ ioemu/Makefile.target 2006-12-08 18:22:35.000000000 +0000 -@@ -298,7 +298,7 @@ +--- ioemu.orig/Makefile.target 2006-12-20 15:04:55.000000000 +0000 ++++ ioemu/Makefile.target 2006-12-20 15:08:16.000000000 +0000 +@@ -296,9 +296,9 @@ + + # qemu-dm objects ifeq ($(ARCH),ia64) - LIBOBJS=helper2.o exec-dm.o i8259-dm.o +-LIBOBJS=helper2.o exec-dm.o i8259-dm.o ++LIBOBJS=helper2.o exec-dm.o i8259-dm.o piix_pci-dm.o else -LIBOBJS=helper2.o exec-dm.o i8259-dm.o rtc-dm.o +LIBOBJS=helper2.o exec-dm.o i8259-dm.o rtc-dm.o piix_pci-dm.o endif all: $(PROGS) -@@ -360,11 +360,11 @@ - # Hardware support - VL_OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o $(AUDIODRV) - ifeq ($(ARCH),ia64) --VL_OBJS+= fdc.o mc146818rtc.o serial.o pc.o -+VL_OBJS+= fdc.o mc146818rtc.o serial.o pc.o piix_pci.o +@@ -364,7 +364,7 @@ else VL_OBJS+= fdc.o serial.o pc.o endif @@ -44,8 +42,8 @@ Index: ioemu/Makefile.target VL_OBJS+= xenstore.o Index: ioemu/target-i386-dm/i8259-dm.c =================================================================== ---- ioemu.orig/target-i386-dm/i8259-dm.c 2006-12-08 18:21:36.000000000 +0000 -+++ ioemu/target-i386-dm/i8259-dm.c 2006-12-08 18:22:35.000000000 +0000 +--- ioemu.orig/target-i386-dm/i8259-dm.c 2006-12-20 15:04:54.000000000 +0000 ++++ ioemu/target-i386-dm/i8259-dm.c 2006-12-20 15:04:55.000000000 +0000 @@ -33,7 +33,7 @@ void pic_set_irq_new(void *opaque, int irq, int level) @@ -58,7 +56,7 @@ Index: ioemu/target-i386-dm/i8259-dm.c Index: ioemu/target-i386-dm/piix_pci-dm.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/target-i386-dm/piix_pci-dm.c 2006-12-08 18:22:35.000000000 +0000 ++++ ioemu/target-i386-dm/piix_pci-dm.c 2006-12-20 15:08:13.000000000 +0000 @@ -0,0 +1,397 @@ +/* + * QEMU i440FX/PIIX3 PCI Bridge Emulation diff --git a/tools/ioemu/patches/ioemu-ia64 b/tools/ioemu/patches/ioemu-ia64 index d8afe7fe3c..863ecbd993 100644 --- a/tools/ioemu/patches/ioemu-ia64 +++ b/tools/ioemu/patches/ioemu-ia64 @@ -1,7 +1,7 @@ Index: ioemu/hw/iommu.c =================================================================== ---- ioemu.orig/hw/iommu.c 2006-12-08 02:02:07.000000000 +0000 -+++ ioemu/hw/iommu.c 2006-12-08 02:02:34.000000000 +0000 +--- ioemu.orig/hw/iommu.c 2006-12-20 15:04:54.000000000 +0000 ++++ ioemu/hw/iommu.c 2006-12-20 15:04:54.000000000 +0000 @@ -82,7 +82,11 @@ #define IOPTE_VALID 0x00000002 /* IOPTE is valid */ #define IOPTE_WAZ 0x00000001 /* Write as zeros */ @@ -16,8 +16,8 @@ Index: ioemu/hw/iommu.c Index: ioemu/cpu-all.h =================================================================== ---- ioemu.orig/cpu-all.h 2006-12-08 02:02:07.000000000 +0000 -+++ ioemu/cpu-all.h 2006-12-08 02:02:34.000000000 +0000 +--- ioemu.orig/cpu-all.h 2006-12-20 15:04:54.000000000 +0000 ++++ ioemu/cpu-all.h 2006-12-20 15:04:54.000000000 +0000 @@ -835,6 +835,31 @@ :"=m" (*(volatile long *)addr) :"dIr" (nr)); @@ -52,13 +52,17 @@ Index: ioemu/cpu-all.h /* memory API */ Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-12-08 02:02:28.000000000 +0000 -+++ ioemu/vl.c 2006-12-08 02:02:34.000000000 +0000 -@@ -6137,6 +6137,11 @@ +--- ioemu.orig/vl.c 2006-12-20 15:04:54.000000000 +0000 ++++ ioemu/vl.c 2006-12-20 15:12:00.000000000 +0000 +@@ -6137,6 +6137,15 @@ exit(1); } +#if defined (__ia64__) ++ /* ram_size passed from xend has added on GFW memory, ++ so we must subtract it here */ ++ ram_size -= 16 * MEM_M; ++ + if (ram_size > MMIO_START) + ram_size += 1 * MEM_G; /* skip 3G-4G MMIO, LEGACY_IO_SPACE etc. */ +#endif @@ -66,7 +70,7 @@ Index: ioemu/vl.c /* init the memory */ phys_ram_size = ram_size + vga_ram_size + bios_size; -@@ -6161,6 +6166,7 @@ +@@ -6161,6 +6170,7 @@ exit(-1); } @@ -74,41 +78,29 @@ Index: ioemu/vl.c for ( i = 0; i < tmp_nr_pages; i++) page_array[i] = i; -@@ -6185,6 +6191,48 @@ +@@ -6185,6 +6195,36 @@ free(page_array); +#elif defined(__ia64__) + -+ if (xc_ia64_get_pfn_list(xc_handle, domid, page_array, -+ IO_PAGE_START >> PAGE_SHIFT, 3) != 3) { -+ fprintf(logfile, "xc_ia64_get_pfn_list returned error %d\n", errno); -+ exit(-1); -+ } -+ + shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, + PROT_READ|PROT_WRITE, -+ page_array[0]); -+ -+ fprintf(logfile, "shared page at pfn:%lx, mfn: %016lx\n", -+ IO_PAGE_START >> PAGE_SHIFT, page_array[0]); ++ IO_PAGE_START >> PAGE_SHIFT); + + buffered_io_page =xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, + PROT_READ|PROT_WRITE, -+ page_array[2]); -+ fprintf(logfile, "Buffered IO page at pfn:%lx, mfn: %016lx\n", -+ BUFFER_IO_PAGE_START >> PAGE_SHIFT, page_array[2]); -+ -+ if (xc_ia64_get_pfn_list(xc_handle, domid, -+ page_array, 0, nr_pages) != nr_pages) { -+ fprintf(logfile, "xc_ia64_get_pfn_list returned error %d\n", errno); -+ exit(-1); -+ } ++ BUFFER_IO_PAGE_START >> PAGE_SHIFT); + ++ for (i = 0; i < tmp_nr_pages; i++) ++ page_array[i] = i; ++ ++ /* VTI will not use memory between 3G~4G, so we just pass a legal pfn ++ to make QEMU map continuous virtual memory space */ + if (ram_size > MMIO_START) { + for (i = 0 ; i < (MEM_G >> PAGE_SHIFT); i++) + page_array[(MMIO_START >> PAGE_SHIFT) + i] = -+ page_array[(IO_PAGE_START >> PAGE_SHIFT) + 1]; ++ (STORE_PAGE_START >> PAGE_SHIFT); + } + + phys_ram_base = xc_map_foreign_batch(xc_handle, domid, @@ -125,8 +117,8 @@ Index: ioemu/vl.c phys_ram_base = qemu_vmalloc(phys_ram_size); Index: ioemu/exec-all.h =================================================================== ---- ioemu.orig/exec-all.h 2006-12-08 02:02:07.000000000 +0000 -+++ ioemu/exec-all.h 2006-12-08 02:02:34.000000000 +0000 +--- ioemu.orig/exec-all.h 2006-12-20 15:04:54.000000000 +0000 ++++ ioemu/exec-all.h 2006-12-20 15:04:54.000000000 +0000 @@ -462,12 +462,13 @@ } #endif @@ -146,8 +138,8 @@ Index: ioemu/exec-all.h Index: ioemu/target-i386-dm/cpu.h =================================================================== ---- ioemu.orig/target-i386-dm/cpu.h 2006-12-08 02:02:07.000000000 +0000 -+++ ioemu/target-i386-dm/cpu.h 2006-12-08 02:02:34.000000000 +0000 +--- ioemu.orig/target-i386-dm/cpu.h 2006-12-20 15:04:54.000000000 +0000 ++++ ioemu/target-i386-dm/cpu.h 2006-12-20 15:10:13.000000000 +0000 @@ -78,7 +78,11 @@ /* helper2.c */ int main_loop(void); @@ -163,7 +155,7 @@ Index: ioemu/target-i386-dm/cpu.h Index: ioemu/ia64_intrinsic.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/ia64_intrinsic.h 2006-12-08 02:02:34.000000000 +0000 ++++ ioemu/ia64_intrinsic.h 2006-12-20 15:04:54.000000000 +0000 @@ -0,0 +1,276 @@ +#ifndef IA64_INTRINSIC_H +#define IA64_INTRINSIC_H diff --git a/tools/ioemu/patches/qemu-bootorder b/tools/ioemu/patches/qemu-bootorder index e0142b4313..c03e6a1c23 100644 --- a/tools/ioemu/patches/qemu-bootorder +++ b/tools/ioemu/patches/qemu-bootorder @@ -1,7 +1,7 @@ Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-12-08 02:02:38.000000000 +0000 -+++ ioemu/vl.c 2006-12-08 02:02:38.000000000 +0000 +--- ioemu.orig/vl.c 2006-12-20 15:12:08.000000000 +0000 ++++ ioemu/vl.c 2006-12-20 15:21:19.000000000 +0000 @@ -125,7 +125,7 @@ struct sockaddr_in vnclisten_addr; const char* keyboard_layout = NULL; @@ -17,15 +17,17 @@ Index: ioemu/vl.c case QEMU_OPTION_boot: - boot_device = optarg[0]; - if (boot_device != 'a' && -+ boot_device = strdup(optarg); -+ if (strspn(boot_device, "acd" - #ifdef TARGET_SPARC +-#ifdef TARGET_SPARC - // Network boot - boot_device != 'n' && -+ "n" - #endif +-#endif - boot_device != 'c' && boot_device != 'd') { - fprintf(stderr, "qemu: invalid boot device '%c'\n", boot_device); ++ boot_device = strdup(optarg); ++ if (strspn(boot_device, "acd" ++#if defined(TARGET_SPARC) || defined(TARGET_I386) ++ "n" ++#endif + ) != strlen(boot_device)) { + fprintf(stderr, "qemu: invalid boot device in '%s'\n", + boot_device); @@ -48,7 +50,7 @@ Index: ioemu/vl.c #endif /* !CONFIG_DM */ setvbuf(stdout, NULL, _IOLBF, 0); -@@ -6598,6 +6600,7 @@ +@@ -6590,6 +6592,7 @@ ds, fd_filename, snapshot, kernel_filename, kernel_cmdline, initrd_filename, timeoffset); @@ -58,8 +60,8 @@ Index: ioemu/vl.c if (usb_enabled) { Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-12-08 02:02:38.000000000 +0000 -+++ ioemu/vl.h 2006-12-08 02:02:38.000000000 +0000 +--- ioemu.orig/vl.h 2006-12-20 15:12:08.000000000 +0000 ++++ ioemu/vl.h 2006-12-20 15:21:14.000000000 +0000 @@ -578,7 +578,7 @@ #ifndef QEMU_TOOL @@ -80,9 +82,9 @@ Index: ioemu/vl.h uint32_t initrd_image, uint32_t initrd_size, Index: ioemu/hw/pc.c =================================================================== ---- ioemu.orig/hw/pc.c 2006-12-08 02:02:38.000000000 +0000 -+++ ioemu/hw/pc.c 2006-12-08 02:02:38.000000000 +0000 -@@ -158,8 +158,23 @@ +--- ioemu.orig/hw/pc.c 2006-12-20 15:12:08.000000000 +0000 ++++ ioemu/hw/pc.c 2006-12-20 15:21:19.000000000 +0000 +@@ -158,8 +158,25 @@ rtc_set_memory(s, info_ofs + 8, sectors); } @@ -96,6 +98,8 @@ Index: ioemu/hw/pc.c + return 0x02; /* hard drive */ + case 'd': + return 0x03; /* cdrom */ ++ case 'n': ++ return 0x04; /* network */ + } + } + return 0x00; /* no device */ @@ -107,7 +111,7 @@ Index: ioemu/hw/pc.c { RTCState *s = rtc_state; int val; -@@ -205,21 +220,14 @@ +@@ -205,21 +222,14 @@ rtc_set_memory(s, 0x34, val); rtc_set_memory(s, 0x35, val >> 8); @@ -137,7 +141,7 @@ Index: ioemu/hw/pc.c /* floppy type */ -@@ -617,7 +625,7 @@ +@@ -617,7 +627,7 @@ #define NOBIOS 1 /* PC hardware initialisation */ @@ -146,7 +150,7 @@ Index: ioemu/hw/pc.c DisplayState *ds, const char **fd_filename, int snapshot, const char *kernel_filename, const char *kernel_cmdline, const char *initrd_filename, time_t timeoffset, -@@ -919,7 +927,7 @@ +@@ -919,7 +929,7 @@ } } @@ -155,7 +159,7 @@ Index: ioemu/hw/pc.c DisplayState *ds, const char **fd_filename, int snapshot, const char *kernel_filename, -@@ -933,7 +941,7 @@ +@@ -933,7 +943,7 @@ initrd_filename, timeoffset, 1); } diff --git a/tools/ioemu/patches/series b/tools/ioemu/patches/series index 6d6f1f8ee6..f6022ddf8a 100644 --- a/tools/ioemu/patches/series +++ b/tools/ioemu/patches/series @@ -67,5 +67,5 @@ usb-uhci-buffer-size vnc-monitor-shift-key-processing ide-error-reporting vnc-numpad-handling -xen-mapcache -p3 +xen-mapcache usb-mouse-tablet-status-check -p3 diff --git a/tools/ioemu/patches/tpm-tis-device b/tools/ioemu/patches/tpm-tis-device index f0b4c00858..b4bcbaba91 100644 --- a/tools/ioemu/patches/tpm-tis-device +++ b/tools/ioemu/patches/tpm-tis-device @@ -22,8 +22,8 @@ Signed-off-by: Stefan Berger <stefanb@us.ibm.com> Index: ioemu/Makefile.target =================================================================== ---- ioemu.orig/Makefile.target 2006-12-08 18:33:48.000000000 +0000 -+++ ioemu/Makefile.target 2006-12-08 18:35:14.000000000 +0000 +--- ioemu.orig/Makefile.target 2006-12-20 15:21:55.000000000 +0000 ++++ ioemu/Makefile.target 2006-12-20 15:21:55.000000000 +0000 @@ -369,6 +369,7 @@ VL_OBJS+= piix4acpi.o VL_OBJS+= xenstore.o @@ -34,9 +34,9 @@ Index: ioemu/Makefile.target ifeq ($(TARGET_BASE_ARCH), ppc) Index: ioemu/hw/pc.c =================================================================== ---- ioemu.orig/hw/pc.c 2006-12-08 18:33:47.000000000 +0000 -+++ ioemu/hw/pc.c 2006-12-08 18:33:48.000000000 +0000 -@@ -875,6 +875,9 @@ +--- ioemu.orig/hw/pc.c 2006-12-20 15:21:54.000000000 +0000 ++++ ioemu/hw/pc.c 2006-12-20 15:21:55.000000000 +0000 +@@ -877,6 +877,9 @@ } } @@ -49,7 +49,7 @@ Index: ioemu/hw/pc.c Index: ioemu/hw/tpm_tis.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/hw/tpm_tis.c 2006-12-08 18:35:25.000000000 +0000 ++++ ioemu/hw/tpm_tis.c 2006-12-20 15:21:55.000000000 +0000 @@ -0,0 +1,1120 @@ +/* + * tpm_tis.c - QEMU emulator for a 1.2 TPM with TIS interface @@ -1173,8 +1173,8 @@ Index: ioemu/hw/tpm_tis.c +} Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-12-08 18:33:48.000000000 +0000 -+++ ioemu/vl.h 2006-12-08 18:35:14.000000000 +0000 +--- ioemu.orig/vl.h 2006-12-20 15:21:55.000000000 +0000 ++++ ioemu/vl.h 2006-12-20 15:21:55.000000000 +0000 @@ -932,6 +932,10 @@ void piix4_pm_init(PCIBus *bus, int devfn); void acpi_bios_init(void); diff --git a/tools/ioemu/patches/vnc-display-find-unused b/tools/ioemu/patches/vnc-display-find-unused index 2f7e509153..cec29ea456 100644 --- a/tools/ioemu/patches/vnc-display-find-unused +++ b/tools/ioemu/patches/vnc-display-find-unused @@ -1,7 +1,7 @@ Index: ioemu/vnc.c =================================================================== ---- ioemu.orig/vnc.c 2006-12-08 02:02:36.000000000 +0000 -+++ ioemu/vnc.c 2006-12-08 02:02:37.000000000 +0000 +--- ioemu.orig/vnc.c 2006-12-20 15:21:52.000000000 +0000 ++++ ioemu/vnc.c 2006-12-20 15:21:52.000000000 +0000 @@ -1197,7 +1197,7 @@ } } @@ -50,8 +50,8 @@ Index: ioemu/vnc.c int vnc_start_viewer(int port) Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-12-08 02:02:36.000000000 +0000 -+++ ioemu/vl.c 2006-12-08 02:02:37.000000000 +0000 +--- ioemu.orig/vl.c 2006-12-20 15:21:51.000000000 +0000 ++++ ioemu/vl.c 2006-12-20 15:21:52.000000000 +0000 @@ -121,6 +121,7 @@ static DisplayState display_state; int nographic; @@ -104,7 +104,7 @@ Index: ioemu/vl.c } } } -@@ -6468,7 +6478,7 @@ +@@ -6460,7 +6470,7 @@ if (nographic) { dumb_display_init(ds); } else if (vnc_display != -1) { @@ -115,8 +115,8 @@ Index: ioemu/vl.c } else { Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-12-08 02:02:36.000000000 +0000 -+++ ioemu/vl.h 2006-12-08 02:02:37.000000000 +0000 +--- ioemu.orig/vl.h 2006-12-20 15:21:51.000000000 +0000 ++++ ioemu/vl.h 2006-12-20 15:21:52.000000000 +0000 @@ -785,7 +785,7 @@ void cocoa_display_init(DisplayState *ds, int full_screen); diff --git a/tools/ioemu/patches/vnc-fixes b/tools/ioemu/patches/vnc-fixes index 0c847affa0..2a96cc46d5 100644 --- a/tools/ioemu/patches/vnc-fixes +++ b/tools/ioemu/patches/vnc-fixes @@ -1,8 +1,8 @@ Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-12-08 02:02:36.000000000 +0000 -+++ ioemu/vl.c 2006-12-08 02:02:36.000000000 +0000 -@@ -6519,8 +6519,10 @@ +--- ioemu.orig/vl.c 2006-12-20 15:21:51.000000000 +0000 ++++ ioemu/vl.c 2006-12-20 15:21:51.000000000 +0000 +@@ -6511,8 +6511,10 @@ } } @@ -17,8 +17,8 @@ Index: ioemu/vl.c if (use_gdbstub) { Index: ioemu/vnc.c =================================================================== ---- ioemu.orig/vnc.c 2006-12-08 02:02:36.000000000 +0000 -+++ ioemu/vnc.c 2006-12-08 02:02:36.000000000 +0000 +--- ioemu.orig/vnc.c 2006-12-20 15:21:51.000000000 +0000 ++++ ioemu/vnc.c 2006-12-20 15:21:51.000000000 +0000 @@ -3,6 +3,7 @@ * * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws> @@ -531,8 +531,8 @@ Index: ioemu/vnc.c } Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-12-08 02:02:36.000000000 +0000 -+++ ioemu/vl.h 2006-12-08 02:02:36.000000000 +0000 +--- ioemu.orig/vl.h 2006-12-20 15:21:51.000000000 +0000 ++++ ioemu/vl.h 2006-12-20 15:21:51.000000000 +0000 @@ -319,6 +319,7 @@ int is_graphic_console(void); CharDriverState *text_console_init(DisplayState *ds); diff --git a/tools/ioemu/patches/vnc-listen-specific-interface b/tools/ioemu/patches/vnc-listen-specific-interface index a11a554a52..7375479239 100644 --- a/tools/ioemu/patches/vnc-listen-specific-interface +++ b/tools/ioemu/patches/vnc-listen-specific-interface @@ -20,8 +20,8 @@ Signed-off-by: Daniel P. Berrange <berrange@redhat.com> Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-12-08 02:02:37.000000000 +0000 -+++ ioemu/vl.c 2006-12-08 02:02:37.000000000 +0000 +--- ioemu.orig/vl.c 2006-12-20 15:21:52.000000000 +0000 ++++ ioemu/vl.c 2006-12-20 15:21:52.000000000 +0000 @@ -122,6 +122,7 @@ int nographic; int vncviewer; @@ -114,7 +114,7 @@ Index: ioemu/vl.c } } } -@@ -6478,7 +6493,7 @@ +@@ -6470,7 +6485,7 @@ if (nographic) { dumb_display_init(ds); } else if (vnc_display != -1) { @@ -125,8 +125,8 @@ Index: ioemu/vl.c } else { Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-12-08 02:02:37.000000000 +0000 -+++ ioemu/vl.h 2006-12-08 02:02:37.000000000 +0000 +--- ioemu.orig/vl.h 2006-12-20 15:21:52.000000000 +0000 ++++ ioemu/vl.h 2006-12-20 15:21:52.000000000 +0000 @@ -37,6 +37,8 @@ #include <unistd.h> #include <fcntl.h> @@ -147,8 +147,8 @@ Index: ioemu/vl.h /* ide.c */ Index: ioemu/vnc.c =================================================================== ---- ioemu.orig/vnc.c 2006-12-08 02:02:37.000000000 +0000 -+++ ioemu/vnc.c 2006-12-08 02:02:37.000000000 +0000 +--- ioemu.orig/vnc.c 2006-12-20 15:21:52.000000000 +0000 ++++ ioemu/vnc.c 2006-12-20 15:21:52.000000000 +0000 @@ -1197,9 +1197,8 @@ } } diff --git a/tools/ioemu/patches/vnc-password b/tools/ioemu/patches/vnc-password index 1a923ddfc0..287f9a44eb 100644 --- a/tools/ioemu/patches/vnc-password +++ b/tools/ioemu/patches/vnc-password @@ -17,8 +17,8 @@ Signed-off-by: Masami Watanabe <masami.watanabe@jp.fujitsu.com> Index: ioemu/Makefile.target =================================================================== ---- ioemu.orig/Makefile.target 2006-12-08 18:20:53.000000000 +0000 -+++ ioemu/Makefile.target 2006-12-08 18:20:53.000000000 +0000 +--- ioemu.orig/Makefile.target 2006-12-20 15:21:55.000000000 +0000 ++++ ioemu/Makefile.target 2006-12-20 15:21:55.000000000 +0000 @@ -407,6 +407,7 @@ VL_OBJS+=sdl.o endif @@ -39,8 +39,8 @@ Index: ioemu/Makefile.target Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-12-08 18:20:52.000000000 +0000 -+++ ioemu/vl.c 2006-12-08 18:20:53.000000000 +0000 +--- ioemu.orig/vl.c 2006-12-20 15:21:54.000000000 +0000 ++++ ioemu/vl.c 2006-12-20 15:21:55.000000000 +0000 @@ -171,6 +171,9 @@ char domain_name[1024] = { 'H','V', 'M', 'X', 'E', 'N', '-'}; extern int domid; @@ -59,7 +59,7 @@ Index: ioemu/vl.c #ifndef CONFIG_DM #ifdef TARGET_PPC cdrom_index = 1; -@@ -6543,6 +6547,10 @@ +@@ -6535,6 +6539,10 @@ init_ioports(); @@ -72,8 +72,8 @@ Index: ioemu/vl.c dumb_display_init(ds); Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-12-08 18:20:52.000000000 +0000 -+++ ioemu/vl.h 2006-12-08 18:20:53.000000000 +0000 +--- ioemu.orig/vl.h 2006-12-20 15:21:54.000000000 +0000 ++++ ioemu/vl.h 2006-12-20 15:21:55.000000000 +0000 @@ -1214,6 +1214,7 @@ void xenstore_process_event(void *opaque); void xenstore_check_new_media_present(int timeout); @@ -92,8 +92,8 @@ Index: ioemu/vl.h #endif /* VL_H */ Index: ioemu/vnc.c =================================================================== ---- ioemu.orig/vnc.c 2006-12-08 18:20:52.000000000 +0000 -+++ ioemu/vnc.c 2006-12-08 18:20:53.000000000 +0000 +--- ioemu.orig/vnc.c 2006-12-20 15:21:52.000000000 +0000 ++++ ioemu/vnc.c 2006-12-20 15:21:55.000000000 +0000 @@ -44,6 +44,7 @@ #include "vnc_keysym.h" @@ -244,8 +244,8 @@ Index: ioemu/vnc.c +} Index: ioemu/xenstore.c =================================================================== ---- ioemu.orig/xenstore.c 2006-12-08 18:20:52.000000000 +0000 -+++ ioemu/xenstore.c 2006-12-08 18:20:53.000000000 +0000 +--- ioemu.orig/xenstore.c 2006-12-20 15:21:54.000000000 +0000 ++++ ioemu/xenstore.c 2006-12-20 15:21:55.000000000 +0000 @@ -213,3 +213,54 @@ free(portstr); free(buf); @@ -304,7 +304,7 @@ Index: ioemu/xenstore.c Index: ioemu/d3des.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/d3des.c 2006-12-08 18:20:53.000000000 +0000 ++++ ioemu/d3des.c 2006-12-20 15:21:55.000000000 +0000 @@ -0,0 +1,434 @@ +/* + * This is D3DES (V5.09) by Richard Outerbridge with the double and @@ -743,7 +743,7 @@ Index: ioemu/d3des.c Index: ioemu/d3des.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/d3des.h 2006-12-08 18:20:53.000000000 +0000 ++++ ioemu/d3des.h 2006-12-20 15:21:55.000000000 +0000 @@ -0,0 +1,51 @@ +/* + * This is D3DES (V5.09) by Richard Outerbridge with the double and diff --git a/tools/ioemu/patches/vnc-start-vncviewer b/tools/ioemu/patches/vnc-start-vncviewer index e897b09c02..9e279f4a97 100644 --- a/tools/ioemu/patches/vnc-start-vncviewer +++ b/tools/ioemu/patches/vnc-start-vncviewer @@ -1,7 +1,7 @@ Index: ioemu/vnc.c =================================================================== ---- ioemu.orig/vnc.c 2006-12-08 02:02:36.000000000 +0000 -+++ ioemu/vnc.c 2006-12-08 02:02:36.000000000 +0000 +--- ioemu.orig/vnc.c 2006-12-20 15:21:51.000000000 +0000 ++++ ioemu/vnc.c 2006-12-20 15:21:51.000000000 +0000 @@ -1189,3 +1189,25 @@ vnc_dpy_resize(vs->ds, 640, 400); @@ -30,8 +30,8 @@ Index: ioemu/vnc.c +} Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-12-08 02:02:36.000000000 +0000 -+++ ioemu/vl.c 2006-12-08 02:02:36.000000000 +0000 +--- ioemu.orig/vl.c 2006-12-20 15:21:51.000000000 +0000 ++++ ioemu/vl.c 2006-12-20 15:21:51.000000000 +0000 @@ -120,6 +120,7 @@ int bios_size; static DisplayState display_state; @@ -82,7 +82,7 @@ Index: ioemu/vl.c } } } -@@ -6461,6 +6469,8 @@ +@@ -6453,6 +6461,8 @@ dumb_display_init(ds); } else if (vnc_display != -1) { vnc_display_init(ds, vnc_display); @@ -93,8 +93,8 @@ Index: ioemu/vl.c sdl_display_init(ds, full_screen); Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-12-08 02:02:36.000000000 +0000 -+++ ioemu/vl.h 2006-12-08 02:02:36.000000000 +0000 +--- ioemu.orig/vl.h 2006-12-20 15:21:51.000000000 +0000 ++++ ioemu/vl.h 2006-12-20 15:21:51.000000000 +0000 @@ -786,6 +786,7 @@ /* vnc.c */ diff --git a/tools/ioemu/patches/xen-mapcache b/tools/ioemu/patches/xen-mapcache index dc37d662a7..a68ceefcec 100644 --- a/tools/ioemu/patches/xen-mapcache +++ b/tools/ioemu/patches/xen-mapcache @@ -15,83 +15,14 @@ Signed-off-by: Jun Nakajima <jun.nakajima@intel.com> Signed-off-by: Dexuan Cui <dexuan.cui@intel.com> Signed-off-by: Keir Fraser <keir@xensource.com> -diff -r 3f0ca90351e2 -r 67a06a9b7b1d tools/ioemu/target-i386-dm/cpu.h ---- a/tools/ioemu/target-i386-dm/cpu.h Thu Dec 07 10:54:43 2006 +0000 -+++ b/tools/ioemu/target-i386-dm/cpu.h Thu Dec 07 11:12:52 2006 +0000 -@@ -25,7 +25,8 @@ - #ifdef TARGET_X86_64 - #define TARGET_LONG_BITS 64 - #else --#define TARGET_LONG_BITS 32 -+/* #define TARGET_LONG_BITS 32 */ -+#define TARGET_LONG_BITS 64 /* for Qemu map cache */ - #endif - - /* target supports implicit self modifying code */ -diff -r 3f0ca90351e2 -r 67a06a9b7b1d tools/ioemu/target-i386-dm/exec-dm.c ---- a/tools/ioemu/target-i386-dm/exec-dm.c Thu Dec 07 10:54:43 2006 +0000 -+++ b/tools/ioemu/target-i386-dm/exec-dm.c Thu Dec 07 11:12:52 2006 +0000 -@@ -36,6 +36,7 @@ - - #include "cpu.h" - #include "exec-all.h" -+#include "vl.h" - - //#define DEBUG_TB_INVALIDATE - //#define DEBUG_FLUSH -@@ -426,6 +427,12 @@ static inline int paddr_is_ram(target_ph - #endif - } - -+#if defined(__i386__) || defined(__x86_64__) -+#define phys_ram_addr(x) (qemu_map_cache(x)) -+#elif defined(__ia64__) -+#define phys_ram_addr(x) (phys_ram_base + (x)) -+#endif -+ - void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, - int len, int is_write) - { -@@ -438,7 +445,7 @@ void cpu_physical_memory_rw(target_phys_ - l = TARGET_PAGE_SIZE - (addr & ~TARGET_PAGE_MASK); - if (l > len) - l = len; -- -+ - io_index = iomem_index(addr); - if (is_write) { - if (io_index) { -@@ -460,9 +467,10 @@ void cpu_physical_memory_rw(target_phys_ - } - } else if (paddr_is_ram(addr)) { - /* Reading from RAM */ -- memcpy(phys_ram_base + addr, buf, l); -+ ptr = phys_ram_addr(addr); -+ memcpy(ptr, buf, l); - #ifdef __ia64__ -- sync_icache((unsigned long)(phys_ram_base + addr), l); -+ sync_icache(ptr, l); - #endif - } - } else { -@@ -485,7 +493,8 @@ void cpu_physical_memory_rw(target_phys_ - } - } else if (paddr_is_ram(addr)) { - /* Reading from RAM */ -- memcpy(buf, phys_ram_base + addr, l); -+ ptr = phys_ram_addr(addr); -+ memcpy(buf, ptr, l); - } else { - /* Neither RAM nor known MMIO space */ - memset(buf, 0xff, len); -diff -r 3f0ca90351e2 -r 67a06a9b7b1d tools/ioemu/vl.c ---- a/tools/ioemu/vl.c Thu Dec 07 10:54:43 2006 +0000 -+++ b/tools/ioemu/vl.c Thu Dec 07 11:12:52 2006 +0000 -@@ -5807,6 +5807,92 @@ int set_mm_mapping(int xc_handle, uint32 - +Index: ioemu/vl.c +=================================================================== +--- ioemu.orig/vl.c 2006-12-20 15:21:55.000000000 +0000 ++++ ioemu/vl.c 2006-12-20 15:21:56.000000000 +0000 +@@ -5808,6 +5808,91 @@ return 0; } -+ + +#if defined(__i386__) || defined(__x86_64__) +static struct map_cache *mapcache_entry; +static unsigned long nr_buckets; @@ -104,8 +35,8 @@ diff -r 3f0ca90351e2 -r 67a06a9b7b1d tools/ioemu/vl.c + if (nr_pages < max_pages) + max_pages = nr_pages; + -+ nr_buckets = (max_pages << PAGE_SHIFT) >> MCACHE_BUCKET_SHIFT; -+ ++ nr_buckets = max_pages + (1UL << (MCACHE_BUCKET_SHIFT - PAGE_SHIFT)) - 1; ++ nr_buckets >>= (MCACHE_BUCKET_SHIFT - PAGE_SHIFT); + fprintf(logfile, "qemu_map_cache_init nr_buckets = %lx\n", nr_buckets); + + mapcache_entry = malloc(nr_buckets * sizeof(struct map_cache)); @@ -141,8 +72,7 @@ diff -r 3f0ca90351e2 -r 67a06a9b7b1d tools/ioemu/vl.c + + entry = &mapcache_entry[address_index % nr_buckets]; + -+ if (entry->vaddr_base == NULL || entry->paddr_index != address_index) -+ { ++ if (entry->vaddr_base == NULL || entry->paddr_index != address_index) { + /* We need to remap a bucket. */ + uint8_t *vaddr_base; + unsigned long pfns[MCACHE_BUCKET_SIZE >> PAGE_SHIFT]; @@ -177,10 +107,11 @@ diff -r 3f0ca90351e2 -r 67a06a9b7b1d tools/ioemu/vl.c + return last_address_vaddr + address_offset; +} +#endif - ++ int main(int argc, char **argv) { -@@ -6130,6 +6216,7 @@ int main(int argc, char **argv) + #ifdef CONFIG_GDBSTUB +@@ -6130,6 +6215,7 @@ break; case QEMU_OPTION_m: ram_size = atol(optarg) * 1024 * 1024; @@ -188,90 +119,195 @@ diff -r 3f0ca90351e2 -r 67a06a9b7b1d tools/ioemu/vl.c if (ram_size <= 0) help(); #ifndef CONFIG_DM -@@ -6400,50 +6487,41 @@ int main(int argc, char **argv) +@@ -6404,50 +6490,41 @@ shared_page_nr = nr_pages - 1; #endif -+#if defined(__i386__) || defined(__x86_64__) -+ -+ if ( qemu_map_cache_init(tmp_nr_pages) ) -+ { -+ fprintf(logfile, "qemu_map_cache_init returned: error %d\n", errno); -+ exit(-1); -+ } -+ -+ shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, -+ PROT_READ|PROT_WRITE, shared_page_nr); -+ if (shared_page == NULL) { -+ fprintf(logfile, "map shared IO page returned error %d\n", errno); -+ exit(-1); -+ } -+ -+ fprintf(logfile, "shared page at pfn:%lx\n", shared_page_nr); -+ -+ buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, -+ PROT_READ|PROT_WRITE, -+ shared_page_nr - 2); -+ if (buffered_io_page == NULL) { -+ fprintf(logfile, "map buffered IO page returned error %d\n", errno); -+ exit(-1); -+ } -+ -+ fprintf(logfile, "buffered io page at pfn:%lx\n", shared_page_nr - 2); -+ -+#elif defined(__ia64__) -+ - page_array = (xen_pfn_t *)malloc(tmp_nr_pages * sizeof(xen_pfn_t)); - if (page_array == NULL) { - fprintf(logfile, "malloc returned error %d\n", errno); - exit(-1); - } - --#if defined(__i386__) || defined(__x86_64__) +- page_array = (xen_pfn_t *)malloc(tmp_nr_pages * sizeof(xen_pfn_t)); +- if (page_array == NULL) { +- fprintf(logfile, "malloc returned error %d\n", errno); +- exit(-1); +- } +- + #if defined(__i386__) || defined(__x86_64__) - for ( i = 0; i < tmp_nr_pages; i++) - page_array[i] = i; -- + - phys_ram_base = xc_map_foreign_batch(xc_handle, domid, - PROT_READ|PROT_WRITE, page_array, - tmp_nr_pages); - if (phys_ram_base == NULL) { - fprintf(logfile, "batch map guest memory returned error %d\n", errno); -- exit(-1); -- } -- -- shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, ++ if ( qemu_map_cache_init(tmp_nr_pages) ) ++ { ++ fprintf(logfile, "qemu_map_cache_init returned: error %d\n", errno); + exit(-1); + } + + shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, - PROT_READ|PROT_WRITE, - page_array[shared_page_nr]); -- if (shared_page == NULL) { -- fprintf(logfile, "map shared IO page returned error %d\n", errno); -- exit(-1); -- } -- ++ PROT_READ|PROT_WRITE, shared_page_nr); + if (shared_page == NULL) { + fprintf(logfile, "map shared IO page returned error %d\n", errno); + exit(-1); + } + - fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n", - shared_page_nr, (uint64_t)(page_array[shared_page_nr])); -- -- buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, -- PROT_READ|PROT_WRITE, ++ fprintf(logfile, "shared page at pfn:%lx\n", shared_page_nr); + + buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, + PROT_READ|PROT_WRITE, - page_array[shared_page_nr - 2]); -- if (buffered_io_page == NULL) { -- fprintf(logfile, "map buffered IO page returned error %d\n", errno); -- exit(-1); -- } -- ++ shared_page_nr - 2); + if (buffered_io_page == NULL) { + fprintf(logfile, "map buffered IO page returned error %d\n", errno); + exit(-1); + } + - fprintf(logfile, "buffered io page at pfn:%lx, mfn: %"PRIx64"\n", - shared_page_nr - 2, (uint64_t)(page_array[shared_page_nr - 2])); - - free(page_array); -- --#elif defined(__ia64__) ++ fprintf(logfile, "buffered io page at pfn:%lx\n", shared_page_nr - 2); + + #elif defined(__ia64__) - - if (xc_ia64_get_pfn_list(xc_handle, domid, page_array, - IO_PAGE_START >> PAGE_SHIFT, 3) != 3) { - fprintf(logfile, "xc_ia64_get_pfn_list returned error %d\n", errno); -diff -r 3f0ca90351e2 -r 67a06a9b7b1d tools/ioemu/vl.h ---- a/tools/ioemu/vl.h Thu Dec 07 10:54:43 2006 +0000 -+++ b/tools/ioemu/vl.h Thu Dec 07 11:12:52 2006 +0000 -@@ -156,6 +156,26 @@ extern void *shared_vram; ++ ++ page_array = (xen_pfn_t *)malloc(tmp_nr_pages * sizeof(xen_pfn_t)); ++ if (page_array == NULL) { ++ fprintf(logfile, "malloc returned error %d\n", errno); ++ exit(-1); ++ } ++ + shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, + PROT_READ|PROT_WRITE, + IO_PAGE_START >> PAGE_SHIFT); +Index: ioemu/target-i386-dm/exec-dm.c +=================================================================== +--- ioemu.orig/target-i386-dm/exec-dm.c 2006-12-20 15:21:42.000000000 +0000 ++++ ioemu/target-i386-dm/exec-dm.c 2006-12-21 11:32:29.000000000 +0000 +@@ -36,6 +36,7 @@ + + #include "cpu.h" + #include "exec-all.h" ++#include "vl.h" + + //#define DEBUG_TB_INVALIDATE + //#define DEBUG_FLUSH +@@ -127,10 +128,29 @@ + FILE *logfile; + int loglevel; + ++ ++#if defined(__i386__) || defined(__x86_64__) ++#define MAPCACHE ++#endif ++ ++#ifdef MAPCACHE ++#include <pthread.h> ++static pthread_mutex_t mapcache_mutex; ++#define mapcache_lock() pthread_mutex_lock(&mapcache_mutex) ++#define mapcache_unlock() pthread_mutex_unlock(&mapcache_mutex) ++#else ++#define mapcache_lock() ( (void)0 ) ++#define mapcache_unlock() ( (void)0 ) ++#endif ++ ++ + void cpu_exec_init(CPUState *env) + { + CPUState **penv; + int cpu_index; ++#ifdef MAPCACHE ++ pthread_mutexattr_t mxattr; ++#endif + + env->next_cpu = NULL; + penv = &first_cpu; +@@ -144,6 +164,14 @@ + + /* alloc dirty bits array */ + phys_ram_dirty = qemu_malloc(phys_ram_size >> TARGET_PAGE_BITS); ++ ++#ifdef MAPCACHE ++ /* setup memory access mutex to protect mapcache */ ++ pthread_mutexattr_init(&mxattr); ++ pthread_mutexattr_settype(&mxattr, PTHREAD_MUTEX_RECURSIVE); ++ pthread_mutex_init(&mapcache_mutex, &mxattr); ++ pthread_mutexattr_destroy(&mxattr); ++#endif + } + + /* enable or disable low levels log */ +@@ -426,19 +454,27 @@ + #endif + } + ++#if defined(__i386__) || defined(__x86_64__) ++#define phys_ram_addr(x) (qemu_map_cache(x)) ++#elif defined(__ia64__) ++#define phys_ram_addr(x) (phys_ram_base + (x)) ++#endif ++ + void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, + int len, int is_write) + { + int l, io_index; + uint8_t *ptr; + uint32_t val; +- ++ ++ mapcache_lock(); ++ + while (len > 0) { + /* How much can we copy before the next page boundary? */ + l = TARGET_PAGE_SIZE - (addr & ~TARGET_PAGE_MASK); + if (l > len) + l = len; +- ++ + io_index = iomem_index(addr); + if (is_write) { + if (io_index) { +@@ -460,9 +496,10 @@ + } + } else if (paddr_is_ram(addr)) { + /* Reading from RAM */ +- memcpy(phys_ram_base + addr, buf, l); ++ ptr = phys_ram_addr(addr); ++ memcpy(ptr, buf, l); + #ifdef __ia64__ +- sync_icache((unsigned long)(phys_ram_base + addr), l); ++ sync_icache(ptr, l); + #endif + } + } else { +@@ -485,7 +522,8 @@ + } + } else if (paddr_is_ram(addr)) { + /* Reading from RAM */ +- memcpy(buf, phys_ram_base + addr, l); ++ ptr = phys_ram_addr(addr); ++ memcpy(buf, ptr, l); + } else { + /* Neither RAM nor known MMIO space */ + memset(buf, 0xff, len); +@@ -495,6 +533,8 @@ + buf += l; + addr += l; + } ++ ++ mapcache_unlock(); + } + #endif + +Index: ioemu/vl.h +=================================================================== +--- ioemu.orig/vl.h 2006-12-20 15:21:55.000000000 +0000 ++++ ioemu/vl.h 2006-12-20 15:21:56.000000000 +0000 +@@ -156,6 +156,26 @@ extern FILE *logfile; @@ -298,3 +334,17 @@ diff -r 3f0ca90351e2 -r 67a06a9b7b1d tools/ioemu/vl.h extern int xc_handle; extern int domid; +Index: ioemu/target-i386-dm/cpu.h +=================================================================== +--- ioemu.orig/target-i386-dm/cpu.h 2006-12-20 15:21:45.000000000 +0000 ++++ ioemu/target-i386-dm/cpu.h 2006-12-20 15:21:56.000000000 +0000 +@@ -25,7 +25,8 @@ + #ifdef TARGET_X86_64 + #define TARGET_LONG_BITS 64 + #else +-#define TARGET_LONG_BITS 32 ++/* #define TARGET_LONG_BITS 32 */ ++#define TARGET_LONG_BITS 64 /* for Qemu map cache */ + #endif + + /* target supports implicit self modifying code */ diff --git a/tools/ioemu/patches/xen-support-buffered-ioreqs b/tools/ioemu/patches/xen-support-buffered-ioreqs index 10d2a9a84a..5a0242c4bd 100644 --- a/tools/ioemu/patches/xen-support-buffered-ioreqs +++ b/tools/ioemu/patches/xen-support-buffered-ioreqs @@ -1,7 +1,7 @@ Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-12-08 02:02:37.000000000 +0000 -+++ ioemu/vl.c 2006-12-08 02:02:37.000000000 +0000 +--- ioemu.orig/vl.c 2006-12-20 15:21:54.000000000 +0000 ++++ ioemu/vl.c 2006-12-20 15:21:54.000000000 +0000 @@ -5838,6 +5838,7 @@ unsigned long nr_pages, tmp_nr_pages, shared_page_nr; xen_pfn_t *page_array; @@ -10,7 +10,7 @@ Index: ioemu/vl.c char qemu_dm_logfilename[64]; -@@ -6418,6 +6419,17 @@ +@@ -6422,6 +6423,17 @@ fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n", shared_page_nr, (uint64_t)(page_array[shared_page_nr])); @@ -30,8 +30,8 @@ Index: ioemu/vl.c #elif defined(__ia64__) Index: ioemu/target-i386-dm/helper2.c =================================================================== ---- ioemu.orig/target-i386-dm/helper2.c 2006-12-08 02:02:35.000000000 +0000 -+++ ioemu/target-i386-dm/helper2.c 2006-12-08 02:02:37.000000000 +0000 +--- ioemu.orig/target-i386-dm/helper2.c 2006-12-20 15:21:47.000000000 +0000 ++++ ioemu/target-i386-dm/helper2.c 2006-12-20 15:21:54.000000000 +0000 @@ -76,6 +76,10 @@ shared_iopage_t *shared_page = NULL; diff --git a/tools/ioemu/patches/xenstore-block-device-config b/tools/ioemu/patches/xenstore-block-device-config index 5944e3b45f..3ac28fd10f 100644 --- a/tools/ioemu/patches/xenstore-block-device-config +++ b/tools/ioemu/patches/xenstore-block-device-config @@ -1,7 +1,7 @@ Index: ioemu/Makefile.target =================================================================== ---- ioemu.orig/Makefile.target 2006-12-08 02:02:36.000000000 +0000 -+++ ioemu/Makefile.target 2006-12-08 02:02:37.000000000 +0000 +--- ioemu.orig/Makefile.target 2006-12-20 15:21:51.000000000 +0000 ++++ ioemu/Makefile.target 2006-12-20 15:21:53.000000000 +0000 @@ -359,6 +359,7 @@ VL_OBJS+= cirrus_vga.o mixeng.o parallel.o acpi.o piix_pci.o VL_OBJS+= usb-uhci.o @@ -13,7 +13,7 @@ Index: ioemu/Makefile.target Index: ioemu/xenstore.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/xenstore.c 2006-12-08 02:02:37.000000000 +0000 ++++ ioemu/xenstore.c 2006-12-20 15:21:53.000000000 +0000 @@ -0,0 +1,187 @@ +/* + * This file is subject to the terms and conditions of the GNU General @@ -204,8 +204,8 @@ Index: ioemu/xenstore.c +} Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-12-08 02:02:37.000000000 +0000 -+++ ioemu/vl.c 2006-12-08 02:02:37.000000000 +0000 +--- ioemu.orig/vl.c 2006-12-20 15:21:52.000000000 +0000 ++++ ioemu/vl.c 2006-12-20 15:21:53.000000000 +0000 @@ -5256,9 +5256,11 @@ "Standard options:\n" "-M machine select emulated machine (-M ? for list)\n" @@ -359,7 +359,7 @@ Index: ioemu/vl.c setvbuf(stdout, NULL, _IOLBF, 0); -@@ -6441,6 +6472,7 @@ +@@ -6433,6 +6464,7 @@ #endif /* !CONFIG_DM */ @@ -367,7 +367,7 @@ Index: ioemu/vl.c /* we always create the cdrom drive, even if no disk is there */ bdrv_init(); if (cdrom_index >= 0) { -@@ -6467,6 +6499,7 @@ +@@ -6459,6 +6491,7 @@ } } } @@ -375,7 +375,7 @@ Index: ioemu/vl.c /* we always create at least one floppy disk */ fd_table[0] = bdrv_new("fda"); -@@ -6545,6 +6578,8 @@ +@@ -6537,6 +6570,8 @@ } } @@ -386,8 +386,8 @@ Index: ioemu/vl.c kernel_filename, kernel_cmdline, initrd_filename, Index: ioemu/monitor.c =================================================================== ---- ioemu.orig/monitor.c 2006-12-08 02:02:35.000000000 +0000 -+++ ioemu/monitor.c 2006-12-08 02:02:37.000000000 +0000 +--- ioemu.orig/monitor.c 2006-12-20 15:21:47.000000000 +0000 ++++ ioemu/monitor.c 2006-12-20 15:21:53.000000000 +0000 @@ -24,6 +24,7 @@ #include "vl.h" #include "disas.h" @@ -416,8 +416,8 @@ Index: ioemu/monitor.c int i; Index: ioemu/block.c =================================================================== ---- ioemu.orig/block.c 2006-12-08 02:02:06.000000000 +0000 -+++ ioemu/block.c 2006-12-08 02:02:37.000000000 +0000 +--- ioemu.orig/block.c 2006-12-20 15:21:31.000000000 +0000 ++++ ioemu/block.c 2006-12-20 15:21:53.000000000 +0000 @@ -758,6 +758,7 @@ static void raw_close(BlockDriverState *bs) { @@ -428,8 +428,8 @@ Index: ioemu/block.c Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-12-08 02:02:37.000000000 +0000 -+++ ioemu/vl.h 2006-12-08 02:02:37.000000000 +0000 +--- ioemu.orig/vl.h 2006-12-20 15:21:52.000000000 +0000 ++++ ioemu/vl.h 2006-12-20 15:21:53.000000000 +0000 @@ -1191,6 +1191,8 @@ void term_print_help(void); void monitor_readline(const char *prompt, int is_password, @@ -455,8 +455,8 @@ Index: ioemu/vl.h extern char domain_name[]; Index: ioemu/hw/ide.c =================================================================== ---- ioemu.orig/hw/ide.c 2006-12-08 02:02:35.000000000 +0000 -+++ ioemu/hw/ide.c 2006-12-08 02:02:37.000000000 +0000 +--- ioemu.orig/hw/ide.c 2006-12-20 15:21:49.000000000 +0000 ++++ ioemu/hw/ide.c 2006-12-20 15:21:53.000000000 +0000 @@ -1158,6 +1158,7 @@ } else { ide_atapi_cmd_error(s, SENSE_NOT_READY, diff --git a/tools/ioemu/patches/xenstore-write-vnc-port b/tools/ioemu/patches/xenstore-write-vnc-port index 259d6451f3..725ccc70c0 100644 --- a/tools/ioemu/patches/xenstore-write-vnc-port +++ b/tools/ioemu/patches/xenstore-write-vnc-port @@ -1,7 +1,7 @@ Index: ioemu/xenstore.c =================================================================== ---- ioemu.orig/xenstore.c 2006-12-08 02:02:37.000000000 +0000 -+++ ioemu/xenstore.c 2006-12-08 02:02:37.000000000 +0000 +--- ioemu.orig/xenstore.c 2006-12-20 15:21:53.000000000 +0000 ++++ ioemu/xenstore.c 2006-12-20 15:21:54.000000000 +0000 @@ -185,3 +185,31 @@ free(image); free(vec); @@ -36,9 +36,9 @@ Index: ioemu/xenstore.c +} Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-12-08 02:02:37.000000000 +0000 -+++ ioemu/vl.c 2006-12-08 02:02:37.000000000 +0000 -@@ -6535,6 +6535,7 @@ +--- ioemu.orig/vl.c 2006-12-20 15:21:53.000000000 +0000 ++++ ioemu/vl.c 2006-12-20 15:21:54.000000000 +0000 +@@ -6527,6 +6527,7 @@ vnc_display = vnc_display_init(ds, vnc_display, vncunused, &vnclisten_addr); if (vncviewer) vnc_start_viewer(vnc_display); @@ -48,8 +48,8 @@ Index: ioemu/vl.c sdl_display_init(ds, full_screen); Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-12-08 02:02:37.000000000 +0000 -+++ ioemu/vl.h 2006-12-08 02:02:37.000000000 +0000 +--- ioemu.orig/vl.h 2006-12-20 15:21:53.000000000 +0000 ++++ ioemu/vl.h 2006-12-20 15:21:54.000000000 +0000 @@ -1210,6 +1210,7 @@ int xenstore_fd(void); void xenstore_process_event(void *opaque); diff --git a/tools/ioemu/target-i386-dm/exec-dm.c b/tools/ioemu/target-i386-dm/exec-dm.c index b3f9d3ae7d..cfc82dd221 100644 --- a/tools/ioemu/target-i386-dm/exec-dm.c +++ b/tools/ioemu/target-i386-dm/exec-dm.c @@ -134,6 +134,7 @@ int loglevel; #endif #ifdef MAPCACHE +#include <pthread.h> static pthread_mutex_t mapcache_mutex; #define mapcache_lock() pthread_mutex_lock(&mapcache_mutex) #define mapcache_unlock() pthread_mutex_unlock(&mapcache_mutex) diff --git a/tools/ioemu/target-i386-dm/helper2.c b/tools/ioemu/target-i386-dm/helper2.c index aa7b042561..ae84b9ed23 100644 --- a/tools/ioemu/target-i386-dm/helper2.c +++ b/tools/ioemu/target-i386-dm/helper2.c @@ -498,6 +498,8 @@ void handle_buffered_io(void *opaque) void cpu_handle_ioreq(void *opaque) { + extern int vm_running; + extern int shutdown_requested; CPUState *env = opaque; ioreq_t *req = cpu_get_ioreq(); @@ -516,6 +518,25 @@ void cpu_handle_ioreq(void *opaque) } wmb(); /* Update ioreq contents /then/ update state. */ + + /* + * We do this before we send the response so that the tools + * have the opportunity to pick up on the reset before the + * guest resumes and does a hlt with interrupts disabled which + * causes Xen to powerdown the domain. + */ + if (vm_running) { + if (shutdown_requested) { + fprintf(logfile, "shutdown requested in cpu_handle_ioreq\n"); + destroy_hvm_domain(); + } + if (reset_requested) { + fprintf(logfile, "reset requested in cpu_handle_ioreq.\n"); + qemu_system_reset(); + reset_requested = 0; + } + } + req->state = STATE_IORESP_READY; xc_evtchn_notify(xce_handle, ioreq_local_port[send_vcpu]); } diff --git a/tools/ioemu/vl.c b/tools/ioemu/vl.c index ffce41d173..0e3a50e517 100644 --- a/tools/ioemu/vl.c +++ b/tools/ioemu/vl.c @@ -6153,7 +6153,7 @@ int main(int argc, char **argv) case QEMU_OPTION_boot: boot_device = strdup(optarg); if (strspn(boot_device, "acd" -#ifdef TARGET_SPARC +#if defined(TARGET_SPARC) || defined(TARGET_I386) "n" #endif ) != strlen(boot_device)) { diff --git a/tools/ioemu/vnc.c b/tools/ioemu/vnc.c index 6d7cc31d4a..46f3614c21 100644 --- a/tools/ioemu/vnc.c +++ b/tools/ioemu/vnc.c @@ -54,12 +54,12 @@ typedef struct Buffer { size_t capacity; size_t offset; - char *buffer; + uint8_t *buffer; } Buffer; typedef struct VncState VncState; -typedef int VncReadEvent(VncState *vs, char *data, size_t len); +typedef int VncReadEvent(VncState *vs, uint8_t *data, size_t len); typedef void VncWritePixels(VncState *vs, void *data, int size); @@ -90,7 +90,7 @@ struct VncState uint64_t *update_row; /* outstanding updates */ int has_update; /* there's outstanding updates in the * visible area */ - char *old_data; + uint8_t *old_data; int depth; /* internal VNC frame buffer byte per pixel */ int has_resize; int has_hextile; @@ -140,7 +140,7 @@ static void _vnc_update_client(void *opaque); static void vnc_update_client(void *opaque); static void vnc_client_read(void *opaque); static void framebuffer_set_updated(VncState *vs, int x, int y, int w, int h); -static int make_challenge(char *random, int size); +static int make_challenge(unsigned char *random, int size); static void set_seed(unsigned int *seedp); static void get_random(int len, unsigned char *buf); @@ -330,7 +330,7 @@ static void vnc_write_pixels_generic(VncState *vs, void *pixels1, int size) static void send_framebuffer_update_raw(VncState *vs, int x, int y, int w, int h) { int i; - char *row; + uint8_t *row; vnc_framebuffer_update(vs, x, y, w, h, 0); @@ -394,9 +394,9 @@ static void send_framebuffer_update(VncState *vs, int x, int y, int w, int h) static void vnc_copy(DisplayState *ds, int src_x, int src_y, int dst_x, int dst_y, int w, int h) { int src, dst; - char *src_row; - char *dst_row; - char *old_row; + uint8_t *src_row; + uint8_t *dst_row; + uint8_t *old_row; int y = 0; int pitch = ds->linesize; VncState *vs = ds->opaque; @@ -465,8 +465,8 @@ static void _vnc_update_client(void *opaque) VncState *vs = opaque; int64_t now; int y; - char *row; - char *old_row; + uint8_t *row; + uint8_t *old_row; uint64_t width_mask; int n_rectangles; int saved_offset; @@ -491,7 +491,7 @@ static void _vnc_update_client(void *opaque) for (y = 0; y < vs->ds->height; y++) { if (vs->dirty_row[y] & width_mask) { int x; - char *ptr, *old_ptr; + uint8_t *ptr, *old_ptr; ptr = row; old_ptr = old_row; @@ -654,7 +654,7 @@ static int buffer_empty(Buffer *buffer) return buffer->offset == 0; } -static char *buffer_end(Buffer *buffer) +static uint8_t *buffer_end(Buffer *buffer) { return buffer->buffer + buffer->offset; } @@ -778,7 +778,7 @@ static void vnc_write_u32(VncState *vs, uint32_t value) static void vnc_write_u16(VncState *vs, uint16_t value) { - char buf[2]; + uint8_t buf[2]; buf[0] = (value >> 8) & 0xFF; buf[1] = value & 0xFF; @@ -788,7 +788,7 @@ static void vnc_write_u16(VncState *vs, uint16_t value) static void vnc_write_u8(VncState *vs, uint8_t value) { - vnc_write(vs, (char *)&value, 1); + vnc_write(vs, &value, 1); } static void vnc_flush(VncState *vs) @@ -797,23 +797,23 @@ static void vnc_flush(VncState *vs) vnc_client_write(vs); } -static uint8_t read_u8(char *data, size_t offset) +static uint8_t read_u8(uint8_t *data, size_t offset) { return data[offset]; } -static uint16_t read_u16(char *data, size_t offset) +static uint16_t read_u16(uint8_t *data, size_t offset) { return ((data[offset] & 0xFF) << 8) | (data[offset + 1] & 0xFF); } -static int32_t read_s32(char *data, size_t offset) +static int32_t read_s32(uint8_t *data, size_t offset) { return (int32_t)((data[offset] << 24) | (data[offset + 1] << 16) | (data[offset + 2] << 8) | data[offset + 3]); } -static uint32_t read_u32(char *data, size_t offset) +static uint32_t read_u32(uint8_t *data, size_t offset) { return ((data[offset] << 24) | (data[offset + 1] << 16) | (data[offset + 2] << 8) | data[offset + 3]); @@ -1115,11 +1115,10 @@ static void set_pixel_format(VncState *vs, vga_hw_update(); } -static int protocol_client_msg(VncState *vs, char *data, size_t len) +static int protocol_client_msg(VncState *vs, uint8_t *data, size_t len) { int i; uint16_t limit; - int64_t now; switch (data[0]) { case 0: @@ -1188,7 +1187,7 @@ static int protocol_client_msg(VncState *vs, char *data, size_t len) return 8 + v; } - client_cut_text(vs, read_u32(data, 4), data + 8); + client_cut_text(vs, read_u32(data, 4), (char *)(data + 8)); break; default: printf("Msg: %d\n", data[0]); @@ -1200,7 +1199,7 @@ static int protocol_client_msg(VncState *vs, char *data, size_t len) return 0; } -static int protocol_client_init(VncState *vs, char *data, size_t len) +static int protocol_client_init(VncState *vs, uint8_t *data, size_t len) { size_t l; char pad[3] = { 0, 0, 0 }; @@ -1261,7 +1260,7 @@ static int protocol_client_init(VncState *vs, char *data, size_t len) return 0; } -static int protocol_response(VncState *vs, char *client_response, size_t len) +static int protocol_response(VncState *vs, uint8_t *client_response, size_t len) { extern char vncpasswd[64]; extern unsigned char challenge[AUTHCHALLENGESIZE]; @@ -1299,7 +1298,7 @@ static int protocol_response(VncState *vs, char *client_response, size_t len) return 0; } -static int protocol_version(VncState *vs, char *version, size_t len) +static int protocol_version(VncState *vs, uint8_t *version, size_t len) { extern char vncpasswd[64]; extern unsigned char challenge[AUTHCHALLENGESIZE]; @@ -1318,7 +1317,7 @@ static int protocol_version(VncState *vs, char *version, size_t len) support = 0; - if (maj = 3) { + if (maj == 3) { if (min == 3 || min ==4) { support = 1; } @@ -1468,7 +1467,7 @@ int vnc_start_viewer(int port) unsigned int seed; -static int make_challenge(char *random, int size) +static int make_challenge(unsigned char *random, int size) { set_seed(&seed); diff --git a/tools/ioemu/vnchextile.h b/tools/ioemu/vnchextile.h index 16a354d60a..5d34074a65 100644 --- a/tools/ioemu/vnchextile.h +++ b/tools/ioemu/vnchextile.h @@ -13,7 +13,7 @@ static void CONCAT(send_hextile_tile_, NAME)(VncState *vs, uint32_t *last_fg32, int *has_bg, int *has_fg) { - char *row = (vs->ds->data + y * vs->ds->linesize + x * vs->depth); + uint8_t *row = (vs->ds->data + y * vs->ds->linesize + x * vs->depth); pixel_t *irow = (pixel_t *)row; int j, i; pixel_t *last_bg = (pixel_t *)last_bg32; @@ -119,7 +119,7 @@ static void CONCAT(send_hextile_tile_, NAME)(VncState *vs, for (j = 0; j < h; j++) { int has_color = 0; int min_x = -1; - pixel_t color; + pixel_t color = 0; for (i = 0; i < w; i++) { if (!has_color) { diff --git a/tools/libxc/xc_hvm_build.c b/tools/libxc/xc_hvm_build.c index 21e20ffaf9..8c0c52238b 100644 --- a/tools/libxc/xc_hvm_build.c +++ b/tools/libxc/xc_hvm_build.c @@ -233,8 +233,7 @@ static int setup_guest(int xc_handle, SCRATCH_PFN)) == NULL) ) goto error_out; memset(shared_info, 0, PAGE_SIZE); - for ( i = 0; i < MAX_VIRT_CPUS; i++ ) - shared_info->vcpu_info[i].evtchn_upcall_mask = 1; + /* NB. evtchn_upcall_mask is unused: leave as zero. */ memset(&shared_info->evtchn_mask[0], 0xff, sizeof(shared_info->evtchn_mask)); munmap(shared_info, PAGE_SIZE); diff --git a/tools/libxc/xc_linux_build.c b/tools/libxc/xc_linux_build.c index 1ce8107a32..8066d5a047 100644 --- a/tools/libxc/xc_linux_build.c +++ b/tools/libxc/xc_linux_build.c @@ -595,6 +595,7 @@ static int compat_check(int xc_handle, struct domain_setup_info *dsi) return 0; } +#ifndef __x86_64__//temp if (strstr(xen_caps, "xen-3.0-x86_32p")) { if (dsi->pae_kernel == PAEKERN_bimodal) { dsi->pae_kernel = PAEKERN_extended_cr3; @@ -612,6 +613,7 @@ static int compat_check(int xc_handle, struct domain_setup_info *dsi) return 0; } } +#endif return 1; } @@ -1079,7 +1081,7 @@ static int setup_guest(int xc_handle, static int xc_linux_build_internal(int xc_handle, uint32_t domid, unsigned int mem_mb, - char *image, + const char *image, unsigned long image_size, struct initrd_info *initrd, const char *cmdline, diff --git a/tools/libxc/xc_linux_restore.c b/tools/libxc/xc_linux_restore.c index 1d28226bd6..827c45693e 100644 --- a/tools/libxc/xc_linux_restore.c +++ b/tools/libxc/xc_linux_restore.c @@ -709,7 +709,7 @@ int xc_linux_restore(int xc_handle, int io_fd, goto out; } - for (i = 0; i < ctxt.gdt_ents; i += 512) { + for (i = 0; (512*i) < ctxt.gdt_ents; i++) { pfn = ctxt.gdt_frames[i]; if ((pfn >= max_pfn) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) { ERROR("GDT frame number is bad"); diff --git a/tools/libxc/xc_linux_save.c b/tools/libxc/xc_linux_save.c index a38f80c7fe..7ca8453848 100644 --- a/tools/libxc/xc_linux_save.c +++ b/tools/libxc/xc_linux_save.c @@ -44,6 +44,7 @@ static xen_pfn_t *live_p2m = NULL; /* Live mapping of system MFN to PFN table. */ static xen_pfn_t *live_m2p = NULL; +static unsigned long m2p_mfn0; /* grep fodder: machine_to_phys */ @@ -80,7 +81,7 @@ static xen_pfn_t *live_m2p = NULL; #define BITMAP_SIZE ((max_pfn + BITS_PER_LONG - 1) / 8) #define BITMAP_ENTRY(_nr,_bmap) \ - ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] + ((volatile unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] #define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG) @@ -112,7 +113,7 @@ static inline unsigned int hweight32(unsigned int w) static inline int count_bits ( int nr, volatile void *addr) { int i, count = 0; - unsigned long *p = (unsigned long *)addr; + volatile unsigned long *p = (volatile unsigned long *)addr; /* We know that the array is padded to unsigned long. */ for( i = 0; i < (nr / (sizeof(unsigned long)*8)); i++, p++ ) count += hweight32(*p); @@ -440,13 +441,23 @@ static int canonicalize_pagetable(unsigned long type, unsigned long pfn, ** that this check will fail for other L2s. */ if (pt_levels == 3 && type == XEN_DOMCTL_PFINFO_L2TAB) { - -/* XXX index of the L2 entry in PAE mode which holds the guest LPT */ -#define PAE_GLPT_L2ENTRY (495) - pte = ((uint64_t*)spage)[PAE_GLPT_L2ENTRY]; - - if(((pte >> PAGE_SHIFT) & 0x0fffffff) == live_p2m[pfn]) - xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff; + int hstart; + unsigned long he; + + hstart = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff; + he = ((const uint64_t *) spage)[hstart]; + + if ( ((he >> PAGE_SHIFT) & 0x0fffffff) == m2p_mfn0 ) { + /* hvirt starts with xen stuff... */ + xen_start = hstart; + } else if ( hvirt_start != 0xf5800000 ) { + /* old L2s from before hole was shrunk... */ + hstart = (0xf5800000 >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff; + he = ((const uint64_t *) spage)[hstart]; + + if( ((he >> PAGE_SHIFT) & 0x0fffffff) == m2p_mfn0 ) + xen_start = hstart; + } } if (pt_levels == 4 && type == XEN_DOMCTL_PFINFO_L4TAB) { @@ -464,9 +475,9 @@ static int canonicalize_pagetable(unsigned long type, unsigned long pfn, unsigned long pfn, mfn; if (pt_levels == 2) - pte = ((uint32_t*)spage)[i]; + pte = ((const uint32_t*)spage)[i]; else - pte = ((uint64_t*)spage)[i]; + pte = ((const uint64_t*)spage)[i]; if (i >= xen_start && i < xen_end) pte = 0; @@ -550,6 +561,8 @@ static xen_pfn_t *xc_map_m2p(int xc_handle, return NULL; } + m2p_mfn0 = entries[0].mfn; + free(extent_start); free(entries); @@ -915,14 +928,14 @@ int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, if(!is_mapped(pfn_type[batch])) { - /* not currently in pusedo-physical map -- set bit - in to_fix that we must send this page in last_iter - unless its sent sooner anyhow */ + /* + ** not currently in psuedo-physical map -- set bit + ** in to_fix since we must send this page in last_iter + ** unless its sent sooner anyhow, or it never enters + ** pseudo-physical map (e.g. for ballooned down domains) + */ set_bit(n, to_fix); - if( (iter > 1) && IS_REAL_PFN(n) ) - DPRINTF("netbuf race: iter %d, pfn %x. mfn %lx\n", - iter, n, pfn_type[batch]); continue; } @@ -1052,7 +1065,7 @@ int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, DPRINTF("(of which %ld were fixups)\n", needed_to_fix ); } - if (last_iter && debug){ + if (last_iter && debug) { int minusone = -1; memset(to_send, 0xff, BITMAP_SIZE); debug = 0; @@ -1068,17 +1081,14 @@ int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, continue; } - if (last_iter) break; + if (last_iter) + break; if (live) { - - - if( - ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || + if (((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || (iter >= max_iters) || (sent_this_iter+skip_this_iter < 50) || - (total_sent > max_pfn*max_factor) ) { - + (total_sent > max_pfn*max_factor)) { DPRINTF("Start last iteration\n"); last_iter = 1; @@ -1106,8 +1116,6 @@ int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, print_stats(xc_handle, dom, sent_this_iter, &stats, 1); } - - } /* end of while 1 */ DPRINTF("All memory is saved\n"); @@ -1159,7 +1167,7 @@ int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, } /* Canonicalise each GDT frame number. */ - for ( i = 0; i < ctxt.gdt_ents; i += 512 ) { + for ( i = 0; (512*i) < ctxt.gdt_ents; i++ ) { if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) ) { ERROR("GDT frame is not in range of pseudophys map"); goto out; diff --git a/tools/libxc/xc_load_bin.c b/tools/libxc/xc_load_bin.c index 5544ca899d..17409aaf09 100644 --- a/tools/libxc/xc_load_bin.c +++ b/tools/libxc/xc_load_bin.c @@ -98,7 +98,7 @@ struct xen_bin_image_table #define FLAGS_MASK ((~ 0) & (~ XEN_REACTOS_FLAG_ALIGN4K)) #define FLAGS_REQUIRED XEN_REACTOS_FLAG_ADDRSVALID -static struct xen_bin_image_table * +static const struct xen_bin_image_table * findtable(const char *image, unsigned long image_size); static int parsebinimage( @@ -122,11 +122,11 @@ int probe_bin(const char *image, return 0; } -static struct xen_bin_image_table * +static const struct xen_bin_image_table * findtable(const char *image, unsigned long image_size) { - struct xen_bin_image_table *table; - unsigned long *probe_ptr; + const struct xen_bin_image_table *table; + const unsigned long *probe_ptr; unsigned probe_index; unsigned probe_count; @@ -142,13 +142,13 @@ findtable(const char *image, unsigned long image_size) sizeof(unsigned long); /* Search for the magic header */ - probe_ptr = (unsigned long *) image; + probe_ptr = (const unsigned long *) image; table = NULL; for ( probe_index = 0; probe_index < probe_count; probe_index++ ) { if ( XEN_REACTOS_MAGIC3 == *probe_ptr ) { - table = (struct xen_bin_image_table *) probe_ptr; + table = (const struct xen_bin_image_table *) probe_ptr; /* Checksum correct? */ if ( 0 == table->magic + table->flags + table->checksum ) { @@ -165,7 +165,7 @@ static int parsebinimage(const char *image, unsigned long image_size, struct domain_setup_info *dsi) { - struct xen_bin_image_table *image_info; + const struct xen_bin_image_table *image_info; unsigned long start_addr; unsigned long end_addr; @@ -186,13 +186,13 @@ static int parsebinimage(const char *image, /* Sanity check on the addresses */ if ( image_info->header_addr < image_info->load_addr || - ((char *) image_info - image) < + ((const char *) image_info - image) < (image_info->header_addr - image_info->load_addr) ) { ERROR("Invalid header_addr."); return -EINVAL; } - start_addr = image_info->header_addr - ((char *) image_info - image); + start_addr = image_info->header_addr - ((const char *) image_info - image); if ( 0 != image_info->load_end_addr && ( image_info->load_end_addr < image_info->load_end_addr || start_addr + image_size < image_info->load_end_addr ) ) @@ -221,7 +221,7 @@ static int parsebinimage(const char *image, else { dsi->v_end = image_info->load_addr + image_size - - (((char *) image_info - image) - + (((const char *) image_info - image) - (image_info->header_addr - image_info->load_addr)); } dsi->v_kernstart = dsi->v_start; @@ -240,7 +240,7 @@ loadbinimage( unsigned long size; char *va; unsigned long done, chunksz; - struct xen_bin_image_table *image_info; + const struct xen_bin_image_table *image_info; image_info = findtable(image, image_size); if ( NULL == image_info ) @@ -252,7 +252,7 @@ loadbinimage( /* Determine image size */ if ( 0 == image_info->load_end_addr ) { - size = image_size - (((char *) image_info - image) - + size = image_size - (((const char *)image_info - image) - (image_info->header_addr - image_info->load_addr)); } @@ -262,7 +262,7 @@ loadbinimage( } /* It's possible that we need to skip the first part of the image */ - image += ((char *)image_info - image) - + image += ((const char *)image_info - image) - (image_info->header_addr - image_info->load_addr); for ( done = 0; done < size; done += chunksz ) diff --git a/tools/libxc/xc_load_elf.c b/tools/libxc/xc_load_elf.c index 331165f62a..cabc164cca 100644 --- a/tools/libxc/xc_load_elf.c +++ b/tools/libxc/xc_load_elf.c @@ -75,7 +75,7 @@ int probe_elf(const char *image, unsigned long image_size, struct load_funcs *load_funcs) { - Elf_Ehdr *ehdr = (Elf_Ehdr *)image; + const Elf_Ehdr *ehdr = (const Elf_Ehdr *)image; if ( !IS_ELF(*ehdr) ) return -EINVAL; @@ -86,7 +86,7 @@ int probe_elf(const char *image, return 0; } -static inline int is_loadable_phdr(Elf_Phdr *phdr) +static inline int is_loadable_phdr(const Elf_Phdr *phdr) { return ((phdr->p_type == PT_LOAD) && ((phdr->p_flags & (PF_W|PF_X)) != 0)); @@ -96,12 +96,13 @@ static inline int is_loadable_phdr(Elf_Phdr *phdr) * Fallback for kernels containing only the legacy __xen_guest string * and no ELF notes. */ -static int is_xen_guest_section(Elf_Shdr *shdr, const char *shstrtab) +static int is_xen_guest_section(const Elf_Shdr *shdr, const char *shstrtab) { return strcmp(&shstrtab[shdr->sh_name], "__xen_guest") == 0; } -static const char *xen_guest_lookup(struct domain_setup_info *dsi, int type) +static const char *xen_guest_lookup( + const struct domain_setup_info *dsi, int type) { const char *xenguest_fallbacks[] = { [XEN_ELFNOTE_ENTRY] = "VIRT_ENTRY=", @@ -134,7 +135,8 @@ static const char *xen_guest_lookup(struct domain_setup_info *dsi, int type) return p + strlen(fallback); } -static const char *xen_guest_string(struct domain_setup_info *dsi, int type) +static const char *xen_guest_string( + const struct domain_setup_info *dsi, int type) { const char *p = xen_guest_lookup(dsi, type); @@ -148,8 +150,8 @@ static const char *xen_guest_string(struct domain_setup_info *dsi, int type) return p; } -static unsigned long long xen_guest_numeric(struct domain_setup_info *dsi, - int type, int *defined) +static unsigned long long xen_guest_numeric( + const struct domain_setup_info *dsi, int type, int *defined) { const char *p = xen_guest_lookup(dsi, type); unsigned long long value; @@ -175,19 +177,19 @@ static unsigned long long xen_guest_numeric(struct domain_setup_info *dsi, /* * Interface to the Xen ELF notes. */ -#define ELFNOTE_NAME(_n_) ((void*)(_n_) + sizeof(*(_n_))) +#define ELFNOTE_NAME(_n_) ((const void*)(_n_) + sizeof(*(_n_))) #define ELFNOTE_DESC(_n_) (ELFNOTE_NAME(_n_) + (((_n_)->namesz+3)&~3)) #define ELFNOTE_NEXT(_n_) (ELFNOTE_DESC(_n_) + (((_n_)->descsz+3)&~3)) -static int is_xen_elfnote_section(const char *image, Elf_Shdr *shdr) +static int is_xen_elfnote_section(const char *image, const Elf_Shdr *shdr) { - Elf_Note *note; + const Elf_Note *note; if ( shdr->sh_type != SHT_NOTE ) return 0; - for ( note = (Elf_Note *)(image + shdr->sh_offset); - note < (Elf_Note *)(image + shdr->sh_offset + shdr->sh_size); + for ( note = (const Elf_Note *)(image + shdr->sh_offset); + note < (const Elf_Note *)(image + shdr->sh_offset + shdr->sh_size); note = ELFNOTE_NEXT(note) ) { if ( !strncmp(ELFNOTE_NAME(note), "Xen", 4) ) @@ -197,15 +199,16 @@ static int is_xen_elfnote_section(const char *image, Elf_Shdr *shdr) return 0; } -static Elf_Note *xen_elfnote_lookup(struct domain_setup_info *dsi, int type) +static const Elf_Note *xen_elfnote_lookup( + const struct domain_setup_info *dsi, int type) { - Elf_Note *note; + const Elf_Note *note; if ( !dsi->__elfnote_section ) return NULL; - for ( note = (Elf_Note *)dsi->__elfnote_section; - note < (Elf_Note *)dsi->__elfnote_section_end; + for ( note = (const Elf_Note *)dsi->__elfnote_section; + note < (const Elf_Note *)dsi->__elfnote_section_end; note = ELFNOTE_NEXT(note) ) { if ( strncmp(ELFNOTE_NAME(note), "Xen", 4) ) @@ -218,9 +221,9 @@ static Elf_Note *xen_elfnote_lookup(struct domain_setup_info *dsi, int type) return NULL; } -const char *xen_elfnote_string(struct domain_setup_info *dsi, int type) +const char *xen_elfnote_string(const struct domain_setup_info *dsi, int type) { - Elf_Note *note; + const Elf_Note *note; if ( !dsi->__elfnote_section ) return xen_guest_string(dsi, type); @@ -232,10 +235,10 @@ const char *xen_elfnote_string(struct domain_setup_info *dsi, int type) return (const char *)ELFNOTE_DESC(note); } -unsigned long long xen_elfnote_numeric(struct domain_setup_info *dsi, +unsigned long long xen_elfnote_numeric(const struct domain_setup_info *dsi, int type, int *defined) { - Elf_Note *note; + const Elf_Note *note; *defined = 0; @@ -252,10 +255,10 @@ unsigned long long xen_elfnote_numeric(struct domain_setup_info *dsi, { case 4: *defined = 1; - return *(uint32_t*)ELFNOTE_DESC(note); + return *(const uint32_t*)ELFNOTE_DESC(note); case 8: *defined = 1; - return *(uint64_t*)ELFNOTE_DESC(note); + return *(const uint64_t*)ELFNOTE_DESC(note); default: xc_set_error(XC_INVALID_KERNEL, "elfnotes: unknown data size %#x for numeric type note %#x\n", @@ -268,9 +271,9 @@ static int parseelfimage(const char *image, unsigned long image_len, struct domain_setup_info *dsi) { - Elf_Ehdr *ehdr = (Elf_Ehdr *)image; - Elf_Phdr *phdr; - Elf_Shdr *shdr; + const Elf_Ehdr *ehdr = (const Elf_Ehdr *)image; + const Elf_Phdr *phdr; + const Elf_Shdr *shdr; Elf_Addr kernstart = ~0, kernend = 0, vaddr, virt_entry; const char *shstrtab, *p; int h, virt_base_defined, elf_pa_off_defined, virt_entry_defined; @@ -331,12 +334,13 @@ static int parseelfimage(const char *image, /* Look for .notes segment containing at least one Xen note */ for ( h = 0; h < ehdr->e_shnum; h++ ) { - shdr = (Elf_Shdr *)(image + ehdr->e_shoff + (h*ehdr->e_shentsize)); + shdr = (const Elf_Shdr *)( + image + ehdr->e_shoff + (h*ehdr->e_shentsize)); if ( !is_xen_elfnote_section(image, shdr) ) continue; - dsi->__elfnote_section = (void *)image + shdr->sh_offset; + dsi->__elfnote_section = (const void *)image + shdr->sh_offset; dsi->__elfnote_section_end = - (void *)image + shdr->sh_offset + shdr->sh_size; + (const void *)image + shdr->sh_offset + shdr->sh_size; break; } @@ -350,16 +354,18 @@ static int parseelfimage(const char *image, "ELF image has no section-header strings table."); return -EINVAL; } - shdr = (Elf_Shdr *)(image + ehdr->e_shoff + + shdr = (const Elf_Shdr *)(image + ehdr->e_shoff + (ehdr->e_shstrndx*ehdr->e_shentsize)); shstrtab = image + shdr->sh_offset; for ( h = 0; h < ehdr->e_shnum; h++ ) { - shdr = (Elf_Shdr *)(image + ehdr->e_shoff + (h*ehdr->e_shentsize)); + shdr = (const Elf_Shdr *)( + image + ehdr->e_shoff + (h*ehdr->e_shentsize)); if ( is_xen_guest_section(shdr, shstrtab) ) { - dsi->__xen_guest_string = (char *)image + shdr->sh_offset; + dsi->__xen_guest_string = + (const char *)image + shdr->sh_offset; break; } } @@ -442,8 +448,8 @@ static int parseelfimage(const char *image, * If we are using the modern ELF notes interface then the default * is 0. */ - dsi->elf_paddr_offset = - xen_elfnote_numeric(dsi, XEN_ELFNOTE_PADDR_OFFSET, &elf_pa_off_defined); + dsi->elf_paddr_offset = xen_elfnote_numeric(dsi, XEN_ELFNOTE_PADDR_OFFSET, + &elf_pa_off_defined); if ( !elf_pa_off_defined ) { if ( dsi->__elfnote_section ) @@ -462,7 +468,8 @@ static int parseelfimage(const char *image, for ( h = 0; h < ehdr->e_phnum; h++ ) { - phdr = (Elf_Phdr *)(image + ehdr->e_phoff + (h*ehdr->e_phentsize)); + phdr = (const Elf_Phdr *)( + image + ehdr->e_phoff + (h*ehdr->e_phentsize)); if ( !is_loadable_phdr(phdr) ) continue; vaddr = phdr->p_paddr - dsi->elf_paddr_offset + dsi->v_start; @@ -514,8 +521,8 @@ loadelfimage( const char *image, unsigned long elfsize, int xch, uint32_t dom, xen_pfn_t *parray, struct domain_setup_info *dsi) { - Elf_Ehdr *ehdr = (Elf_Ehdr *)image; - Elf_Phdr *phdr; + const Elf_Ehdr *ehdr = (const Elf_Ehdr *)image; + const Elf_Phdr *phdr; int h; char *va; @@ -523,7 +530,8 @@ loadelfimage( for ( h = 0; h < ehdr->e_phnum; h++ ) { - phdr = (Elf_Phdr *)(image + ehdr->e_phoff + (h*ehdr->e_phentsize)); + phdr = (const Elf_Phdr *)( + image + ehdr->e_phoff + (h*ehdr->e_phentsize)); if ( !is_loadable_phdr(phdr) ) continue; @@ -569,7 +577,8 @@ loadelfsymtab( const char *image, int xch, uint32_t dom, xen_pfn_t *parray, struct domain_setup_info *dsi) { - Elf_Ehdr *ehdr = (Elf_Ehdr *)image, *sym_ehdr; + const Elf_Ehdr *ehdr = (const Elf_Ehdr *)image; + Elf_Ehdr *sym_ehdr; Elf_Shdr *shdr; unsigned long maxva, symva; char *p; diff --git a/tools/libxc/xg_private.h b/tools/libxc/xg_private.h index 5533364f4a..4b0f5c9124 100644 --- a/tools/libxc/xg_private.h +++ b/tools/libxc/xg_private.h @@ -146,7 +146,7 @@ struct domain_setup_info * You should use the xen_elfnote_* accessors below in order to * pickup the correct one and retain backwards compatibility. */ - void *__elfnote_section, *__elfnote_section_end; + const void *__elfnote_section, *__elfnote_section_end; const char *__xen_guest_string; }; @@ -162,14 +162,14 @@ typedef int (*loadimagefunc)(const char *image, unsigned long image_size, * in the note is returned and *defined is set to non-zero. If no such * note is found then *defined is set to 0 and 0 is returned. */ -extern unsigned long long xen_elfnote_numeric(struct domain_setup_info *dsi, +extern unsigned long long xen_elfnote_numeric(const struct domain_setup_info *dsi, int type, int *defined); /* * If an ELF note of the given type is found then the string contained * in the value is returned, otherwise NULL is returned. */ -extern const char * xen_elfnote_string(struct domain_setup_info *dsi, +extern const char * xen_elfnote_string(const struct domain_setup_info *dsi, int type); struct load_funcs diff --git a/tools/libxc/xg_save_restore.h b/tools/libxc/xg_save_restore.h index 0000eb7843..6275b37ef0 100644 --- a/tools/libxc/xg_save_restore.h +++ b/tools/libxc/xg_save_restore.h @@ -53,8 +53,17 @@ static int get_platform_info(int xc_handle, uint32_t dom, *hvirt_start = xen_params.virt_start; + /* + * XXX For now, 32bit dom0's can only save/restore 32bit domUs + * on 64bit hypervisors, so no need to check which type of domain + * we're dealing with. + */ if (strstr(xen_caps, "xen-3.0-x86_64")) +#if defined(__i386__) + *pt_levels = 3; +#else *pt_levels = 4; +#endif else if (strstr(xen_caps, "xen-3.0-x86_32p")) *pt_levels = 3; else if (strstr(xen_caps, "xen-3.0-x86_32")) @@ -101,12 +110,6 @@ static int get_platform_info(int xc_handle, uint32_t dom, /* Number of entries in the pfn_to_mfn_frame_list_list */ #define P2M_FLL_ENTRIES (((max_pfn)+(fpp*fpp)-1)/(fpp*fpp)) -/* Current guests allow 8MB 'slack' in their P2M */ -#define NR_SLACK_ENTRIES ((8 * 1024 * 1024) / PAGE_SIZE) - -/* Is the given PFN within the 'slack' region at the top of the P2M? */ -#define IS_REAL_PFN(_pfn) ((max_pfn - (_pfn)) > NR_SLACK_ENTRIES) - /* Returns TRUE if the PFN is currently mapped */ #define is_mapped(pfn_type) (!((pfn_type) & 0x80000000UL)) diff --git a/tools/libxen/include/xen_cpu_feature.h b/tools/libxen/include/xen_cpu_feature.h index 6e0614a8ac..57a34d2773 100644 --- a/tools/libxen/include/xen_cpu_feature.h +++ b/tools/libxen/include/xen_cpu_feature.h @@ -198,12 +198,12 @@ enum xen_cpu_feature /** * AMD 3DNow! extensions */ - XEN_CPU_FEATURE_3DNOWEXT, + XEN_CPU_FEATURE_THREEDNOWEXT, /** * 3DNow! */ - XEN_CPU_FEATURE_3DNOW, + XEN_CPU_FEATURE_THREEDNOW, /** * CPU in recovery mode diff --git a/tools/libxen/src/xen_common.c b/tools/libxen/src/xen_common.c index ea0909d1a4..9834565ff0 100644 --- a/tools/libxen/src/xen_common.c +++ b/tools/libxen/src/xen_common.c @@ -373,11 +373,18 @@ static void server_error_2(xen_session *session, const char *error_string, } -static bool is_container_node(xmlNode *n, char *type) +static bool is_node(xmlNode *n, char *type) { return n->type == XML_ELEMENT_NODE && - 0 == strcmp((char *)n->name, type) && + 0 == strcmp((char *)n->name, type); +} + + +static bool is_container_node(xmlNode *n, char *type) +{ + return + is_node(n, type) && n->children != NULL && n->children == n->last && n->children->type == XML_ELEMENT_NODE; @@ -390,13 +397,30 @@ static bool is_container_node(xmlNode *n, char *type) */ static xmlChar *string_from_value(xmlNode *n, char *type) { - return - is_container_node(n, "value") && - 0 == strcmp((char *)n->children->name, type) ? - (n->children->children == NULL ? - xmlStrdup(BAD_CAST("")) : - xmlNodeGetContent(n->children->children)) : - NULL; + /* + <value><type>XYZ</type></value> is normal, but the XML-RPC spec also + allows <value>XYZ</value> where XYZ is to be interpreted as a string. + */ + + if (is_container_node(n, "value") && + 0 == strcmp((char *)n->children->name, type)) + { + return + n->children->children == NULL ? + xmlStrdup(BAD_CAST("")) : + xmlNodeGetContent(n->children->children); + } + else if (0 == strcmp(type, "string") && is_node(n, "value")) + { + return + n->children == NULL ? + xmlStrdup(BAD_CAST("")) : + xmlNodeGetContent(n->children); + } + else + { + return NULL; + } } @@ -557,8 +581,14 @@ static void parse_into(xen_session *s, xmlNode *value_node, xmlChar *string = string_from_value(value_node, "double"); if (string == NULL) { +#if PERMISSIVE + fprintf(stderr, + "Expected a Float from the server, but didn't get one\n"); + ((double *)value)[slot] = 0.0; +#else server_error( s, "Expected a Float from the server, but didn't get one"); +#endif } else { diff --git a/tools/libxen/src/xen_cpu_feature.c b/tools/libxen/src/xen_cpu_feature.c index 98fb64f9e3..6f19f32704 100644 --- a/tools/libxen/src/xen_cpu_feature.c +++ b/tools/libxen/src/xen_cpu_feature.c @@ -62,8 +62,8 @@ static const char *lookup_table[] = "NX", "MMXEXT", "LM", - "3DNOWEXT", - "3DNOW", + "THREEDNOWEXT", + "THREEDNOW", "RECOVERY", "LONGRUN", "LRTI", diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub index 8fb43ca680..ab8a20f31b 100644 --- a/tools/pygrub/src/pygrub +++ b/tools/pygrub/src/pygrub @@ -13,7 +13,7 @@ # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # -import os, sys, string, struct, tempfile +import os, sys, string, struct, tempfile, re import copy import logging @@ -66,6 +66,15 @@ def get_active_offset(file): P1 = 446 return struct.unpack("<L", buf[P1+8:P1+12])[0] * SECTOR_SIZE +def open_fs(file): + offset = 0 + if is_disk_image(file): + offset = get_active_offset(file) + if offset == -1: + raise RuntimeError, "Unable to find active partition on disk" + + return fsimage.open(file, offset) + class GrubLineEditor(curses.textpad.Textbox): def __init__(self, screen, startx, starty, line = ""): screen.addstr(startx, starty, "> ") @@ -143,12 +152,12 @@ class GrubLineEditor(curses.textpad.Textbox): class Grub: - def __init__(self, file, isconfig = False): + def __init__(self, file, fs = None): self.screen = None self.entry_win = None self.text_win = None if file: - self.read_config(file, isconfig) + self.read_config(file, fs) def draw_main_windows(self): if self.screen is None: #only init stuff once @@ -295,8 +304,8 @@ class Grub: # else, we cancelled and should just go back break - def read_config(self, fn, isConfig = False): - """Read the given file to parse the config. If isconfig, then + def read_config(self, fn, fs = None): + """Read the given file to parse the config. If fs = None, then we're being given a raw config file rather than a disk image.""" if not os.access(fn, os.R_OK): @@ -304,38 +313,25 @@ class Grub: self.cf = grub.GrubConf.GrubConfigFile() - if isConfig: + if not fs: # set the config file and parse it self.cf.filename = fn self.cf.parse() return - offset = 0 - if is_disk_image(fn): - offset = get_active_offset(fn) - if offset == -1: - raise RuntimeError, "Unable to find active partition on disk" - - # open the image and read the grub config - fs = fsimage.open(fn, offset) - - if fs is not None: - grubfile = None - for f in ("/boot/grub/menu.lst", "/boot/grub/grub.conf", - "/grub/menu.lst", "/grub/grub.conf"): - if fs.file_exists(f): - grubfile = f - break - if grubfile is None: - raise RuntimeError, "we couldn't find grub config file in the image provided." - f = fs.open_file(grubfile) - buf = f.read() - del f - del fs - # then parse the grub config - self.cf.parse(buf) - else: - raise RuntimeError, "Unable to read filesystem" + grubfile = None + for f in ("/boot/grub/menu.lst", "/boot/grub/grub.conf", + "/grub/menu.lst", "/grub/grub.conf"): + if fs.file_exists(f): + grubfile = f + break + if grubfile is None: + raise RuntimeError, "we couldn't find grub config file in the image provided." + f = fs.open_file(grubfile) + buf = f.read() + del f + # then parse the grub config + self.cf.parse(buf) def run(self): timeout = int(self.cf.timeout) @@ -431,19 +427,93 @@ def get_entry_idx(cf, entry): return None -if __name__ == "__main__": - sel = None - +def run_grub(file, entry, fs): + global g + global sel + def run_main(scr, *args): global sel + global g sel = g.run() + g = Grub(file, fs) + if interactive: + curses.wrapper(run_main) + else: + sel = g.cf.default + + # set the entry to boot as requested + if entry is not None: + idx = get_entry_idx(g.cf, entry) + if idx is not None and idx > 0 and idx < len(g.cf.images): + sel = idx + + if sel == -1: + print "No kernel image selected!" + sys.exit(1) + + img = g.cf.images[sel] + + grubcfg = { "kernel": None, "ramdisk": None, "args": None } + + grubcfg["kernel"] = img.kernel[1] + if img.initrd: + grubcfg["ramdisk"] = img.initrd[1] + if img.args: + grubcfg["args"] = img.args + + return grubcfg + +# If nothing has been specified, look for a Solaris domU. If found, perform the +# necessary tweaks. +def sniff_solaris(fs, cfg): + if not fs.file_exists("/platform/i86xen/kernel/unix"): + return cfg + + # darned python + longmode = (sys.maxint != 2147483647L) + if not longmode: + longmode = os.uname()[4] == "x86_64" + if not longmode: + if (os.access("/usr/bin/isainfo", os.R_OK) and + os.popen("/usr/bin/isainfo -b").read() == "64\n"): + longmode = True + + if not cfg["kernel"]: + cfg["kernel"] = "/platform/i86xen/kernel/unix" + cfg["ramdisk"] = "/platform/i86pc/boot_archive" + if longmode: + cfg["kernel"] = "/platform/i86xen/kernel/amd64/unix" + cfg["ramdisk"] = "/platform/i86pc/amd64/boot_archive" + + # Unpleasant. Typically we'll have 'root=foo -k' or 'root=foo /kernel -k', + # and we need to maintain Xen properties (root= and ip=) and the kernel + # before any user args. + + xenargs = "" + userargs = "" + + if not cfg["args"]: + cfg["args"] = cfg["kernel"] + else: + for arg in cfg["args"].split(): + if re.match("^root=", arg) or re.match("^ip=", arg): + xenargs += arg + " " + elif arg != cfg["kernel"]: + userargs += arg + " " + cfg["args"] = xenargs + " " + cfg["kernel"] + " " + userargs + + return cfg + +if __name__ == "__main__": + sel = None + def usage(): - print >> sys.stderr, "Usage: %s [-q|--quiet] [--output=] [--entry=] <image>" %(sys.argv[0],) + print >> sys.stderr, "Usage: %s [-q|--quiet] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] <image>" %(sys.argv[0],) try: opts, args = getopt.gnu_getopt(sys.argv[1:], 'qh::', - ["quiet", "help", "output=", "entry=", + ["quiet", "help", "output=", "entry=", "kernel=", "ramdisk=", "args=", "isconfig"]) except getopt.GetoptError: usage() @@ -458,6 +528,14 @@ if __name__ == "__main__": entry = None interactive = True isconfig = False + + # what was passed in + incfg = { "kernel": None, "ramdisk": None, "args": None } + # what grub or sniffing chose + chosencfg = { "kernel": None, "ramdisk": None, "args": None } + # what to boot + bootcfg = { "kernel": None, "ramdisk": None, "args": None } + for o, a in opts: if o in ("-q", "--quiet"): interactive = False @@ -466,6 +544,12 @@ if __name__ == "__main__": sys.exit() elif o in ("--output",): output = a + elif o in ("--kernel",): + incfg["kernel"] = a + elif o in ("--ramdisk",): + incfg["ramdisk"] = a + elif o in ("--args",): + incfg["args"] = a elif o in ("--entry",): entry = a # specifying the entry to boot implies non-interactive @@ -478,58 +562,42 @@ if __name__ == "__main__": else: fd = os.open(output, os.O_WRONLY) - g = Grub(file, isconfig) - if interactive: - curses.wrapper(run_main) - else: - sel = g.cf.default - - # set the entry to boot as requested - if entry is not None: - idx = get_entry_idx(g.cf, entry) - if idx is not None and idx > 0 and idx < len(g.cf.images): - sel = idx - - if sel == -1: - print "No kernel image selected!" - sys.exit(1) - - img = g.cf.images[sel] - print "Going to boot %s" %(img.title) - print " kernel: %s" %(img.kernel[1],) - if img.initrd: - print " initrd: %s" %(img.initrd[1],) - + # debug if isconfig: - print " args: %s" %(img.args,) + chosencfg = run_grub(file, entry) + print " kernel: %s" % chosencfg["kernel"] + if img.initrd: + print " initrd: %s" % chosencfg["ramdisk"] + print " args: %s" % chosencfg["args"] sys.exit(0) - - offset = 0 - if is_disk_image(file): - offset = get_active_offset(file) - if offset == -1: - raise RuntimeError, "Unable to find active partition on disk" - # read the kernel and initrd onto the hostfs - fs = fsimage.open(file, offset) + fs = open_fs(file) - kernel = fs.open_file(img.kernel[1],).read() - (tfd, fn) = tempfile.mkstemp(prefix="boot_kernel.", + chosencfg = sniff_solaris(fs, incfg) + + if not chosencfg["kernel"]: + chosencfg = run_grub(file, entry, fs) + + data = fs.open_file(chosencfg["kernel"]).read() + (tfd, bootcfg["kernel"]) = tempfile.mkstemp(prefix="boot_kernel.", dir="/var/run/xend/boot") - os.write(tfd, kernel) + os.write(tfd, data) os.close(tfd) - sxp = "linux (kernel %s)" %(fn,) - if img.initrd: - initrd = fs.open_file(img.initrd[1],).read() - (tfd, fn) = tempfile.mkstemp(prefix="boot_ramdisk.", + if chosencfg["ramdisk"]: + data = fs.open_file(chosencfg["ramdisk"],).read() + (tfd, bootcfg["ramdisk"]) = tempfile.mkstemp(prefix="boot_ramdisk.", dir="/var/run/xend/boot") - os.write(tfd, initrd) + os.write(tfd, data) os.close(tfd) - sxp += "(ramdisk %s)" %(fn,) else: initrd = None - sxp += "(args '%s')" %(img.args,) + + sxp = "linux (kernel %s)" % bootcfg["kernel"] + if bootcfg["ramdisk"]: + sxp += "(ramdisk %s)" % bootcfg["ramdisk"] + if chosencfg["args"]: + sxp += "(args \"%s\")" % chosencfg["args"] sys.stdout.flush() os.write(fd, sxp) diff --git a/tools/python/Makefile b/tools/python/Makefile index 6b84446ee6..085ac84b5a 100644 --- a/tools/python/Makefile +++ b/tools/python/Makefile @@ -7,16 +7,30 @@ all: build .PHONY: build build: CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py build + if which $(MSGFMT) >/dev/null ; then \ + for file in `cd xen/xm; find messages -name xen-xm.po`; do \ + dest=`echo "build/$$file" | \ + sed -e 's#xen-xm.po#LC_MESSAGES/xen-xm.mo#'`; \ + mkdir -p `dirname "$$dest"`; \ + $(MSGFMT) -c -o "$$dest" "xen/xm/$$file"; \ + done; \ + fi .PHONY: install ifndef XEN_PYTHON_NATIVE_INSTALL -install: all +install: install-messages CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr" --prefix="" --force else -install: all +install: install-messages CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --root="$(DESTDIR)" --force endif +install-messages: all + if which $(MSGFMT) >/dev/null ; then \ + mkdir -p "$(DESTDIR)/usr/share/locale"; \ + cp -R build/messages/* "$(DESTDIR)/usr/share/locale/"; \ + fi + .PHONY: test test: export LD_LIBRARY_PATH=$$(readlink -f ../libxc):$$(readlink -f ../xenstore); python test.py -b -u diff --git a/tools/python/scripts/xapi.py b/tools/python/scripts/xapi.py index 06baeefe4c..9bc3b507b8 100644 --- a/tools/python/scripts/xapi.py +++ b/tools/python/scripts/xapi.py @@ -41,6 +41,7 @@ VBD_LIST_FORMAT = '%(name_label)-18s %(uuid)-36s %(VDI)-8s '\ COMMANDS = { 'host-info': ('', 'Get Xen Host Info'), 'host-set-name': ('', 'Set host name'), + 'pif-list': ('', 'List all PIFs'), 'sr-list': ('', 'List all SRs'), 'vbd-list': ('', 'List all VBDs'), 'vbd-create': ('<domname> <pycfg> [opts]', @@ -63,6 +64,15 @@ COMMANDS = { } OPTIONS = { + 'sr-list': [(('-l', '--long'), + {'action':'store_true', + 'help':'List all properties of SR'}) + ], + + 'vdi-list': [(('-l', '--long'), + {'action':'store_true', + 'help':'List all properties of VDI'}) + ], 'vm-list': [(('-l', '--long'), {'action':'store_true', 'help':'List all properties of VMs'}) @@ -145,7 +155,7 @@ _session = None def _connect(*args): global _server, _session, _initialised if not _initialised: - _server = ServerProxy('httpu:///var/run/xend/xmlrpc.sock') + _server = ServerProxy('httpu:///var/run/xend/xen-api.sock') login = raw_input("Login: ") password = getpass() creds = (login, password) @@ -361,29 +371,53 @@ def xapi_vbd_list(*args): print VBD_LIST_FORMAT % vbd_struct def xapi_vdi_list(*args): + opts, args = parse_args('vdi-list', args, set_defaults = True) + is_long = opts and opts.long + server, session = _connect() vdis = execute(server.VDI.get_all, session) - print VDI_LIST_FORMAT % {'name_label': 'VDI Label', - 'uuid' : 'UUID', - 'virtual_size': 'Sectors', - 'sector_size': 'Sector Size'} - - for vdi in vdis: - vdi_struct = execute(server.VDI.get_record, session, vdi) - print VDI_LIST_FORMAT % vdi_struct + if not is_long: + print VDI_LIST_FORMAT % {'name_label': 'VDI Label', + 'uuid' : 'UUID', + 'virtual_size': 'Sectors', + 'sector_size': 'Sector Size'} + + for vdi in vdis: + vdi_struct = execute(server.VDI.get_record, session, vdi) + print VDI_LIST_FORMAT % vdi_struct + + else: + + for vdi in vdis: + vdi_struct = execute(server.VDI.get_record, session, vdi) + pprint(vdi_struct) def xapi_sr_list(*args): + opts, args = parse_args('sr-list', args, set_defaults = True) + is_long = opts and opts.long + server, session = _connect() srs = execute(server.SR.get_all, session) - print SR_LIST_FORMAT % {'name_label': 'SR Label', - 'uuid' : 'UUID', - 'physical_size': 'Size', - 'type': 'Type'} - for sr in srs: - sr_struct = execute(server.SR.get_record, session, sr) - sr_struct['physical_size'] = int(sr_struct['physical_size'])/MB - print SR_LIST_FORMAT % sr_struct + if not is_long: + print SR_LIST_FORMAT % {'name_label': 'SR Label', + 'uuid' : 'UUID', + 'physical_size': 'Size (MB)', + 'type': 'Type'} + + for sr in srs: + sr_struct = execute(server.SR.get_record, session, sr) + sr_struct['physical_size'] = int(sr_struct['physical_size'])/MB + print SR_LIST_FORMAT % sr_struct + else: + for sr in srs: + sr_struct = execute(server.SR.get_record, session, sr) + pprint(sr_struct) + +def xapi_sr_rename(*args): + server, session = _connect() + sr = execute(server.SR.get_by_name_label, session, args[0]) + execute(server.SR.set_name_label, session, sr[0], args[1]) def xapi_vdi_create(*args): opts, args = parse_args('vdi-create', args) @@ -421,10 +455,11 @@ def xapi_vdi_rename(*args): if len(args) < 2: raise OptionError('Not enough arguments') - vdi_uuid = args[0] + vdi_uuid = execute(server.VDI.get_by_name_label, session, args[0]) vdi_name = args[1] - print 'Renaming VDI %s to %s' % (vdi_uuid, vdi_name) - result = execute(server.VDI.set_name_label, session, vdi_uuid, vdi_name) + + print 'Renaming VDI %s to %s' % (vdi_uuid[0], vdi_name) + result = execute(server.VDI.set_name_label, session, vdi_uuid[0], vdi_name) print 'Done.' @@ -448,6 +483,14 @@ def xapi_vtpm_create(*args): print "Has vtpm record '%s'" % vtpm_rec +def xapi_pif_list(*args): + server, session = _connect() + pif_uuids = execute(server.PIF.get_all, session) + for pif_uuid in pif_uuids: + pif = execute(server.PIF.get_record, session, pif_uuid) + print pif + + # # Command Line Utils # @@ -517,10 +560,12 @@ def usage(command = None, print_usage = True): print print 'Subcommands:' print - sorted_commands = sorted(COMMANDS.keys()) - for command in sorted_commands: - args, description = COMMANDS[command] - print '%-16s %-40s' % (command, description) + + for func in sorted(globals().keys()): + if func.startswith('xapi_'): + command = func[5:].replace('_', '-') + args, description = COMMANDS.get(command, ('', '')) + print '%-16s %-40s' % (command, description) print else: parse_args(command, ['-h']) @@ -549,7 +594,7 @@ def main(args): try: subcmd_func(*args[1:]) except XenAPIError, e: - print 'Error: %s' % str(e.args[1]) + print 'Error: %s' % str(e.args[0]) sys.exit(2) except OptionError, e: print 'Error: %s' % e diff --git a/tools/python/xen/lowlevel/acm/acm.c b/tools/python/xen/lowlevel/acm/acm.c index 930c568212..4290bb5ef7 100644 --- a/tools/python/xen/lowlevel/acm/acm.c +++ b/tools/python/xen/lowlevel/acm/acm.c @@ -35,6 +35,8 @@ fprintf(stderr, "ERROR: " _m " (%d = %s)\n" , ## _a , \ errno, strerror(errno)) +static PyObject *acm_error_obj; + /* generic shared function */ void * __getssid(int domid, uint32_t *buflen) { @@ -80,28 +82,26 @@ static PyObject *policy(PyObject * self, PyObject * args) { /* out */ char *policyreference; - PyObject *ret = NULL; + PyObject *ret; void *ssid_buffer; uint32_t buf_len; if (!PyArg_ParseTuple(args, "", NULL)) { - goto out1; + return NULL; } ssid_buffer = __getssid(0, &buf_len); - if (ssid_buffer == NULL) { - goto out1; - } else if (buf_len < sizeof(struct acm_ssid_buffer)) { - goto out2; - } else { + if (ssid_buffer == NULL || buf_len < sizeof(struct acm_ssid_buffer)) { + free(ssid_buffer); + return PyErr_SetFromErrno(acm_error_obj); + } + else { struct acm_ssid_buffer *ssid = (struct acm_ssid_buffer *)ssid_buffer; policyreference = (char *)(ssid_buffer + ssid->policy_reference_offset + sizeof (struct acm_policy_reference_buffer)); + ret = Py_BuildValue("s", policyreference); + free(ssid_buffer); + return ret; } - ret = Py_BuildValue("s", policyreference); - out2: - free(ssid_buffer); - out1: - return ret; } @@ -213,5 +213,8 @@ static PyMethodDef acmMethods[] = { /* inits */ PyMODINIT_FUNC initacm(void) { - Py_InitModule("acm", acmMethods); + PyObject *m = Py_InitModule("acm", acmMethods); + acm_error_obj = PyErr_NewException("acm.Error", PyExc_RuntimeError, NULL); + Py_INCREF(acm_error_obj); + PyModule_AddObject(m, "Error", acm_error_obj); } diff --git a/tools/python/xen/util/Brctl.py b/tools/python/xen/util/Brctl.py index 9ae8fdf51d..5dc9a7fb9f 100644 --- a/tools/python/xen/util/Brctl.py +++ b/tools/python/xen/util/Brctl.py @@ -28,6 +28,31 @@ def cmd(p, s): if not opts.dryrun: os.system(c) +bridgeRE = re.compile(r'([^\t]*)\t*[^\t]*\t*[^\t]*\t*([^\t]*)') +def get_state(): + fin = os.popen(CMD_BRCTL + ' show', 'r') + try: + bridges = {} + brlist = None + brname = None + first = True + for line in fin: + if first: + first = False + elif line[0] == '\t': + brlist.append(line.strip()) + else: + if brname: + bridges[brname] = brlist + m = bridgeRE.match(line) + brname = m.group(1) + brlist = [m.group(2).strip()] + if brname: + bridges[brname] = brlist + return bridges + finally: + fin.close() + def vif_bridge_add(params): """Add the network interface for vif on dom to a bridge. """ diff --git a/tools/python/xen/util/security.py b/tools/python/xen/util/security.py index 9957e7f586..2702fd3dbb 100644 --- a/tools/python/xen/util/security.py +++ b/tools/python/xen/util/security.py @@ -115,7 +115,7 @@ def get_security_info(info, field): if isinstance(info, dict): security = info['security'] elif isinstance(info, list): - security = sxp.child_value(info, 'security', ) + security = sxp.child_value(info, 'security') if not security: if field == 'ssidref': #return default ssid @@ -357,7 +357,7 @@ def refresh_ssidref(config): if isinstance(config, dict): security = config['security'] elif isinstance(config, list): - security = sxp.child_value(config, 'security',) + security = sxp.child_value(config, 'security') else: err("Instance type of config parameter not supported.") if not security: @@ -637,11 +637,11 @@ def res_security_check(resource, domain_label): """ rtnval = 1 - #build canonical resource name - resource = unify_resname(resource) - # if security is on, ask the hypervisor for a decision if on(): + #build canonical resource name + resource = unify_resname(resource) + (label, ssidref, policy) = get_res_security_details(resource) domac = ['access_control'] domac.append(['policy', active_policy]) @@ -660,6 +660,8 @@ def res_security_check(resource, domain_label): # security is off, make sure resource isn't labeled else: + # Note, we can't canonicalise the resource here, because people using + # xm without ACM are free to use relative paths. (label, policy) = get_res_label(resource) if policy != 'NULL': raise ACMError("Security is off, but '"+resource+"' is labeled") diff --git a/tools/python/xen/util/xmlrpclib2.py b/tools/python/xen/util/xmlrpclib2.py index efb29d7b0e..3ba1b259b1 100644 --- a/tools/python/xen/util/xmlrpclib2.py +++ b/tools/python/xen/util/xmlrpclib2.py @@ -20,6 +20,7 @@ An enhanced XML-RPC client/server interface for Python. """ +import re import string import fcntl from types import * @@ -49,13 +50,15 @@ except ImportError: def stringify(value): - if isinstance(value, IntType) and not isinstance(value, BooleanType): + if isinstance(value, float) or \ + isinstance(value, long) or \ + (isinstance(value, int) and not isinstance(value, bool)): return str(value) - elif isinstance(value, DictType): + elif isinstance(value, dict): for k, v in value.items(): value[k] = stringify(v) return value - elif isinstance(value, (TupleType, ListType)): + elif isinstance(value, (tuple, list)): return [stringify(v) for v in value] else: return value @@ -163,8 +166,10 @@ class ServerProxy(xmlrpclib.ServerProxy): class TCPXMLRPCServer(SocketServer.ThreadingMixIn, SimpleXMLRPCServer): allow_reuse_address = True - def __init__(self, addr, allowed, requestHandler=None, + def __init__(self, addr, allowed, xenapi, requestHandler=None, logRequests = 1): + self.xenapi = xenapi + if requestHandler is None: requestHandler = XMLRPCRequestHandler SimpleXMLRPCServer.__init__(self, addr, @@ -182,7 +187,7 @@ class TCPXMLRPCServer(SocketServer.ThreadingMixIn, SimpleXMLRPCServer): flags |= fcntl.FD_CLOEXEC fcntl.fcntl(client.fileno(), fcntl.F_SETFD, flags) return (client, addr) - + def _marshaled_dispatch(self, data, dispatch_method = None): params, method = xmlrpclib.loads(data) if False: @@ -214,13 +219,30 @@ class TCPXMLRPCServer(SocketServer.ThreadingMixIn, SimpleXMLRPCServer): except xmlrpclib.Fault, fault: response = xmlrpclib.dumps(fault) except Exception, exn: - import xen.xend.XendClient - log.exception(exn) - response = xmlrpclib.dumps( - xmlrpclib.Fault(xen.xend.XendClient.ERROR_INTERNAL, str(exn))) - + if self.xenapi: + if _is_not_supported(exn): + errdesc = ['MESSAGE_METHOD_UNKNOWN', method] + else: + log.exception('Internal error handling %s', method) + errdesc = ['INTERNAL_ERROR', str(exn)] + response = xmlrpclib.dumps( + ({ "Status": "Failure", + "ErrorDescription": errdesc },), + methodresponse = 1) + else: + log.exception('Internal error handling %s', method) + import xen.xend.XendClient + response = xmlrpclib.dumps( + xmlrpclib.Fault(xen.xend.XendClient.ERROR_INTERNAL, str(exn))) return response + +notSupportedRE = re.compile(r'method "(.*)" is not supported') +def _is_not_supported(exn): + m = notSupportedRE.search(exn[0]) + return m is not None + + # This is a XML-RPC server that sits on a Unix domain socket. # It implements proper support for allow_reuse_address by # unlink()'ing an existing socket. @@ -235,10 +257,10 @@ class UnixXMLRPCRequestHandler(XMLRPCRequestHandler): class UnixXMLRPCServer(TCPXMLRPCServer): address_family = socket.AF_UNIX - def __init__(self, addr, allowed, logRequests = 1): + def __init__(self, addr, allowed, xenapi, logRequests = 1): mkdir.parents(os.path.dirname(addr), stat.S_IRWXU, True) if self.allow_reuse_address and os.path.exists(addr): os.unlink(addr) - TCPXMLRPCServer.__init__(self, addr, allowed, + TCPXMLRPCServer.__init__(self, addr, allowed, xenapi, UnixXMLRPCRequestHandler, logRequests) diff --git a/tools/python/xen/xend/XendAPI.py b/tools/python/xen/xend/XendAPI.py index 540ab2e45d..96b81c32be 100644 --- a/tools/python/xen/xend/XendAPI.py +++ b/tools/python/xen/xend/XendAPI.py @@ -15,11 +15,16 @@ # Copyright (C) 2006 XenSource Ltd. #============================================================================ +import inspect +import os +import string +import sys +import traceback + from xen.xend import XendDomain, XendDomainInfo, XendNode from xen.xend import XendLogging from xen.xend.XendAuthSessions import instance as auth_manager -from xen.xend.XendAuthSessions import session_required from xen.xend.XendError import * from xen.xend.XendClient import ERROR_INVALID_DOMAIN from xen.xend.XendLogging import log @@ -30,13 +35,19 @@ from xen.util.xmlrpclib2 import stringify AUTH_NONE = 'none' AUTH_PAM = 'pam' +argcounts = {} + # ------------------------------------------ # Utility Methods for Xen API Implementation # ------------------------------------------ def xen_api_success(value): """Wraps a return value in XenAPI format.""" - return {"Status": "Success", "Value": stringify(value)} + if value is None: + s = '' + else: + s = stringify(value) + return {"Status": "Success", "Value": s} def xen_api_success_void(): """Return success, but caller expects no return value.""" @@ -44,7 +55,16 @@ def xen_api_success_void(): def xen_api_error(error): """Wraps an error value in XenAPI format.""" - return {"Status": "Error", "ErrorDescription": error} + if type(error) == tuple: + error = list(error) + if type(error) != list: + error = [error] + if len(error) == 0: + error = ['INTERNAL_ERROR', 'Empty list given to xen_api_error'] + + return { "Status": "Failure", + "ErrorDescription": [str(x) for x in error] } + def xen_api_todo(): """Temporary method to make sure we track down all the TODOs""" @@ -68,186 +88,164 @@ def trace(func, api_name = ''): trace_func.api = api_name return trace_func -def valid_host(func): - """Decorator to verify if host_ref is valid before calling - method. - @param func: function with params: (self, session, host_ref) +def catch_typeerror(func): + """Decorator to catch any TypeErrors and translate them into Xen-API + errors. + + @param func: function with params: (self, ...) + @rtype: callable object + """ + def f(self, *args, **kwargs): + try: + return func(self, *args, **kwargs) + except TypeError, exn: + #log.exception('catch_typeerror') + if hasattr(func, 'api') and func.api in argcounts: + # Assume that if the exception was thrown inside this + # file, then it is due to an invalid call from the client, + # but if it was thrown elsewhere, then it's an internal + # error (which will be handled further up). + tb = sys.exc_info()[2] + try: + sourcefile = traceback.extract_tb(tb)[-1][0] + if sourcefile == inspect.getsourcefile(XendAPI): + return xen_api_error( + ['MESSAGE_PARAMETER_COUNT_MISMATCH', + func.api, argcounts[func.api], + len(args) + len(kwargs)]) + finally: + del tb + raise + + return f + + +def session_required(func): + """Decorator to verify if session is valid before calling method. + + @param func: function with params: (self, session, ...) @rtype: callable object """ - def check_host_ref(self, session, host_ref, *args, **kwargs): - xennode = XendNode.instance() - if type(host_ref) == type(str()) and xennode.is_valid_host(host_ref): - return func(self, session, host_ref, *args, **kwargs) + def check_session(self, session, *args, **kwargs): + if auth_manager().is_session_valid(session): + return func(self, session, *args, **kwargs) else: - return {'Status': 'Failure', - 'ErrorDescription': XEND_ERROR_HOST_INVALID} + return xen_api_error(['SESSION_INVALID', session]) + + return check_session - # make sure we keep the 'api' attribute - if hasattr(func, 'api'): - check_host_ref.api = func.api - - return check_host_ref + +def _is_valid_ref(ref, validator): + return type(ref) == str and validator(ref) + +def _check_ref(validator, errcode, func, api, session, ref, *args, **kwargs): + if _is_valid_ref(ref, validator): + return func(api, session, ref, *args, **kwargs) + else: + return xen_api_error([errcode, ref]) + + +def valid_host(func): + """Decorator to verify if host_ref is valid before calling method. + + @param func: function with params: (self, session, host_ref, ...) + @rtype: callable object + """ + return lambda *args, **kwargs: \ + _check_ref(XendNode.instance().is_valid_host, + 'HOST_HANDLE_INVALID', func, *args, **kwargs) def valid_host_cpu(func): - """Decorator to verify if host_cpu_ref is valid before calling - method. + """Decorator to verify if host_cpu_ref is valid before calling method. - @param func: function with params: (self, session, host_cpu_ref) + @param func: function with params: (self, session, host_cpu_ref, ...) @rtype: callable object """ - def check_host_cpu_ref(self, session, host_cpu_ref, *args, **kwargs): - xennode = XendNode.instance() - if type(host_cpu_ref) == type(str()) and \ - xennode.is_valid_cpu(host_cpu_ref): - return func(self, session, host_cpu_ref, *args, **kwargs) - else: - return {'Status': 'Failure', - 'ErrorDescription': XEND_ERROR_HOST_CPU_INVALID} - - # make sure we keep the 'api' attribute - if hasattr(func, 'api'): - check_host_cpu_ref.api = func.api - - return check_host_cpu_ref + return lambda *args, **kwargs: \ + _check_ref(XendNode.instance().is_valid_cpu, + 'HOST_CPU_HANDLE_INVALID', func, *args, **kwargs) def valid_vm(func): - """Decorator to verify if vm_ref is valid before calling - method. + """Decorator to verify if vm_ref is valid before calling method. - @param func: function with params: (self, session, vm_ref) + @param func: function with params: (self, session, vm_ref, ...) @rtype: callable object """ - def check_vm_ref(self, session, *args, **kwargs): - if len(args) == 0: - return {'Status': 'Failure', - 'ErrorDescription': XEND_ERROR_VM_INVALID} + return lambda *args, **kwargs: \ + _check_ref(XendDomain.instance().is_valid_vm, + 'VM_HANDLE_INVALID', func, *args, **kwargs) - vm_ref = args[0] - xendom = XendDomain.instance() - if type(vm_ref) == type(str()) and \ - xendom.is_valid_vm(vm_ref): - return func(self, session, *args, **kwargs) - else: - return {'Status': 'Failure', - 'ErrorDescription': XEND_ERROR_VM_INVALID} +def valid_network(func): + """Decorator to verify if network_ref is valid before calling method. - # make sure we keep the 'api' attribute - if hasattr(func, 'api'): - check_vm_ref.api = func.api - - return check_vm_ref + @param func: function with params: (self, session, network_ref, ...) + @rtype: callable object + """ + return lambda *args, **kwargs: \ + _check_ref(XendNode.instance().is_valid_network, + 'NETWORK_HANDLE_INVALID', func, *args, **kwargs) def valid_vbd(func): - """Decorator to verify if vbd_ref is valid before calling - method. + """Decorator to verify if vbd_ref is valid before calling method. - @param func: function with params: (self, session, vbd_ref) + @param func: function with params: (self, session, vbd_ref, ...) @rtype: callable object """ - def check_vbd_ref(self, session, vbd_ref, *args, **kwargs): - xendom = XendDomain.instance() - if type(vbd_ref) == type(str()) and \ - xendom.is_valid_dev('vbd', vbd_ref): - return func(self, session, vbd_ref, *args, **kwargs) - else: - return {'Status': 'Failure', - 'ErrorDescription': XEND_ERROR_VBD_INVALID} - - # make sure we keep the 'api' attribute - if hasattr(func, 'api'): - check_vbd_ref.api = func.api - - return check_vbd_ref + return lambda *args, **kwargs: \ + _check_ref(lambda r: XendDomain.instance().is_valid_dev('vbd', r), + 'VBD_HANDLE_INVALID', func, *args, **kwargs) def valid_vif(func): - """Decorator to verify if vif_ref is valid before calling - method. + """Decorator to verify if vif_ref is valid before calling method. - @param func: function with params: (self, session, vif_ref) + @param func: function with params: (self, session, vif_ref, ...) @rtype: callable object """ - def check_vif_ref(self, session, vif_ref, *args, **kwargs): - xendom = XendDomain.instance() - if type(vif_ref) == type(str()) and \ - xendom.is_valid_dev('vif', vif_ref): - return func(self, session, vif_ref, *args, **kwargs) - else: - return {'Status': 'Failure', - 'ErrorDescription': XEND_ERROR_VIF_INVALID} - - # make sure we keep the 'api' attribute - if hasattr(func, 'api'): - check_vif_ref.api = func.api - - return check_vif_ref - + return lambda *args, **kwargs: \ + _check_ref(lambda r: XendDomain.instance().is_valid_dev('vif', r), + 'VIF_HANDLE_INVALID', func, *args, **kwargs) def valid_vdi(func): - """Decorator to verify if vdi_ref is valid before calling - method. + """Decorator to verify if vdi_ref is valid before calling method. - @param func: function with params: (self, session, vdi_ref) + @param func: function with params: (self, session, vdi_ref, ...) @rtype: callable object """ - def check_vdi_ref(self, session, vdi_ref, *args, **kwargs): - xennode = XendNode.instance() - if type(vdi_ref) == type(str()) and \ - xennode.get_sr().is_valid_vdi(vdi_ref): - return func(self, session, vdi_ref, *args, **kwargs) - else: - return {'Status': 'Failure', - 'ErrorDescription': XEND_ERROR_VDI_INVALID} - - # make sure we keep the 'api' attribute - if hasattr(func, 'api'): - check_vdi_ref.api = func.api - - return check_vdi_ref + return lambda *args, **kwargs: \ + _check_ref(XendNode.instance().get_sr().is_valid_vdi, + 'VDI_HANDLE_INVALID', func, *args, **kwargs) def valid_vtpm(func): - """Decorator to verify if vtpm_ref is valid before calling - method. + """Decorator to verify if vtpm_ref is valid before calling method. - @param func: function with params: (self, session, vtpm_ref) + @param func: function with params: (self, session, vtpm_ref, ...) @rtype: callable object """ - def check_vtpm_ref(self, session, vtpm_ref, *args, **kwargs): - xendom = XendDomain.instance() - if type(vtpm_ref) == type(str()) and \ - xendom.is_valid_dev('vtpm', vtpm_ref): - return func(self, session, vtpm_ref, *args, **kwargs) - else: - return {'Status': 'Failure', - 'ErrorDescription': XEND_ERROR_VTPM_INVALID} + return lambda *args, **kwargs: \ + _check_ref(lambda r: XendDomain.instance().is_valid_dev('vtpm', r), + 'VTPM_HANDLE_INVALID', func, *args, **kwargs) - # make sure we keep the 'api' attribute - if hasattr(func, 'api'): - check_vtpm_ref.api = func.api +def valid_sr(func): + """Decorator to verify if sr_ref is valid before calling method. - return check_vtpm_ref + @param func: function with params: (self, session, sr_ref, ...) + @rtype: callable object + """ + return lambda *args, **kwargs: \ + _check_ref(lambda r: XendNode.instance().get_sr().uuid == r, + 'SR_HANDLE_INVALID', func, *args, **kwargs) -def valid_sr(func): +def valid_pif(func): """Decorator to verify if sr_ref is valid before calling method. @param func: function with params: (self, session, sr_ref) @rtype: callable object """ - def check_sr_ref(self, session, sr_ref, *args, **kwargs): - xennode = XendNode.instance() - if type(sr_ref) == type(str()) and \ - xennode.get_sr().uuid == sr_ref: - return func(self, session, sr_ref, *args, **kwargs) - else: - return {'Status': 'Failure', - 'ErrorDescription': XEND_ERROR_SR_INVALID} - - # make sure we keep the 'api' attribute - if hasattr(func, 'api'): - check_sr_ref.api = func.api - - return check_sr_ref + return lambda *args, **kwargs: \ + _check_ref(lambda r: r in XendNode.instance().pifs, + 'PIF_HANDLE_INVALID', func, *args, **kwargs) # ----------------------------- # Bridge to Legacy XM API calls @@ -263,10 +261,14 @@ def do_vm_func(fn_name, vm_ref, *args, **kwargs): @param *args: more arguments @type *args: tuple """ - xendom = XendDomain.instance() - fn = getattr(xendom, fn_name) - xendom.do_legacy_api_with_uuid(fn, vm_ref, *args, **kwargs) - return xen_api_success_void() + try: + xendom = XendDomain.instance() + fn = getattr(xendom, fn_name) + xendom.do_legacy_api_with_uuid(fn, vm_ref, *args, **kwargs) + return xen_api_success_void() + except VMBadState, exn: + return xen_api_error(['VM_BAD_POWER_STATE', vm_ref, exn.expected, + exn.actual]) class XendAPI: @@ -274,7 +276,7 @@ class XendAPI: used via XMLRPCServer. All methods that need a valid session are marked with - a L{XendAuthManager.session_required} decorator that will + a L{session_required} decorator that will transparently perform the required session authentication. We need to support Python <2.4, so we use the old decorator syntax. @@ -284,109 +286,13 @@ class XendAPI: """ def __init__(self, auth): - """Initialised Xen API wrapper by making sure all functions - have the correct validation decorators such as L{valid_host} - and L{session_required}. - """ self.auth = auth - classes = { - 'session': (session_required,), - 'host': (valid_host, session_required), - 'host_cpu': (valid_host_cpu, session_required), - 'VM': (valid_vm, session_required), - 'VBD': (valid_vbd, session_required), - 'VIF': (valid_vif, session_required), - 'VDI': (valid_vdi, session_required), - 'VTPM':(valid_vtpm, session_required), - 'SR': (valid_sr, session_required)} - - # Cheat methods - # ------------- - # Methods that have a trivial implementation for all classes. - # 1. get_by_uuid == getting by ref, so just return uuid for - # all get_by_uuid() methods. - - for cls in classes.keys(): - get_by_uuid = '%s_get_by_uuid' % cls - get_uuid = '%s_get_uuid' % cls - setattr(XendAPI, get_by_uuid, - lambda s, sess, obj_ref: xen_api_success(obj_ref)) - setattr(XendAPI, get_uuid, - lambda s, sess, obj_ref: xen_api_success(obj_ref)) - - # 2. get_record is just getting all the attributes, so provide - # a fake template implementation. - # - # TODO: ... - - - # Wrapping validators around XMLRPC calls - # --------------------------------------- - - for cls, validators in classes.items(): - ro_attrs = getattr(self, '%s_attr_ro' % cls, []) - rw_attrs = getattr(self, '%s_attr_rw' % cls, []) - methods = getattr(self, '%s_methods' % cls, []) - funcs = getattr(self, '%s_funcs' % cls, []) - - # wrap validators around readable class attributes - for attr_name in ro_attrs + rw_attrs + self.Base_attr_ro: - getter_name = '%s_get_%s' % (cls, attr_name) - try: - getter = getattr(XendAPI, getter_name) - for validator in validators: - getter = validator(getter) - getter.api = '%s.get_%s' % (cls, attr_name) - setattr(XendAPI, getter_name, getter) - except AttributeError: - pass - #log.warn("API call: %s not found" % getter_name) - - # wrap validators around writable class attrributes - for attr_name in rw_attrs + self.Base_attr_rw: - setter_name = '%s_set_%s' % (cls, attr_name) - try: - setter = getattr(XendAPI, setter_name) - for validator in validators: - setter = validator(setter) - setter.api = '%s.set_%s' % (cls, attr_name) - setattr(XendAPI, setter_name, setter) - except AttributeError: - pass - #log.warn("API call: %s not found" % setter_name) - - # wrap validators around methods - for method_name in methods + self.Base_methods: - method_full_name = '%s_%s' % (cls, method_name) - - try: - method = getattr(XendAPI, method_full_name) - for validator in validators: - method = validator(method) - method.api = '%s.%s' % (cls, method_name) - setattr(XendAPI, method_full_name, method) - except AttributeError: - pass - #log.warn('API call: %s not found' % method_full_name) - - # wrap validators around class functions - for func_name in funcs + self.Base_funcs: - func_full_name = '%s_%s' % (cls, func_name) - try: - method = getattr(XendAPI, func_full_name) - method = session_required(method) - method.api = '%s.%s' % (cls, func_name) - setattr(XendAPI, func_full_name, method) - except AttributeError: - pass - #log.warn('API call: %s not found' % func_full_name) - Base_attr_ro = ['uuid'] Base_attr_rw = [] - Base_methods = ['destroy', 'get_record'] - Base_funcs = ['create', 'get_by_uuid', 'get_all'] + Base_methods = ['destroy', 'get_by_uuid', 'get_record'] + Base_funcs = ['create', 'get_all'] # Xen API: Class Session # ---------------------------------------------------------------- @@ -396,14 +302,20 @@ class XendAPI: session_methods = ['logout'] # session_funcs = ['login_with_password'] - def session_login_with_password(self, username, password): + def session_login_with_password(self, *args): + if len(args) != 2: + return xen_api_error( + ['MESSAGE_PARAMETER_COUNT_MISMATCH', + 'session.login_with_password', 2, len(args)]) + username = args[0] + password = args[1] try: session = (self.auth == AUTH_NONE and auth_manager().login_unconditionally(username) or auth_manager().login_with_password(username, password)) return xen_api_success(session) except XendError, e: - return xen_api_error(XEND_ERROR_AUTHENTICATION_FAILED) + return xen_api_error(['SESSION_AUTHENTICATION_FAILED']) session_login_with_password.api = 'session.login_with_password' @@ -425,7 +337,7 @@ class XendAPI: user = auth_manager().get_user(session) if user: return xen_api_success(user) - return xen_api_error(XEND_ERROR_SESSION_INVALID) + return xen_api_error(['SESSION_INVALID', session]) # Xen API: Class User @@ -548,18 +460,156 @@ class XendAPI: return xen_api_error(XEND_ERROR_UNSUPPORTED) - # Xen API: Class Network + # Xen API: Class network # ---------------------------------------------------------------- - # TODO: NOT IMPLEMENTED - Network_attr_ro = ['VIFs'] - Network_attr_rw = ['name_label', + network_attr_ro = ['VIFs', 'PIFs'] + network_attr_rw = ['name_label', 'name_description', - 'NIC', - 'VLAN', 'default_gateway', 'default_netmask'] + def network_create(self, _, name_label, name_description, + default_gateway, default_netmask): + return xen_api_success( + XendNode.instance().network_create(name_label, name_description, + default_gateway, + default_netmask)) + + def network_destroy(self, _, ref): + return xen_api_success(XendNode.instance().network_destroy(ref)) + + def _get_network(self, ref): + return XendNode.instance().get_network(ref) + + def network_get_all(self, _): + return xen_api_success(XendNode.instance().get_network_refs()) + + def network_get_record(self, _, ref): + return xen_api_success( + XendNode.instance().get_network(ref).get_record()) + + def network_get_name_label(self, _, ref): + return xen_api_success(self._get_network(ref).name_label) + + def network_get_name_description(self, _, ref): + return xen_api_success(self._get_network(ref).name_description) + + def network_get_default_gateway(self, _, ref): + return xen_api_success(self._get_network(ref).default_gateway) + + def network_get_default_netmask(self, _, ref): + return xen_api_success(self._get_network(ref).default_netmask) + + def network_get_VIFs(self, _, ref): + return xen_api_success(self._get_network(ref).get_VIF_UUIDs()) + + def network_get_PIFs(self, session, ref): + return xen_api_success(self._get_network(ref).get_PIF_UUIDs()) + + def network_set_name_label(self, _, ref, val): + return xen_api_success(self._get_network(ref).set_name_label(val)) + + def network_set_name_description(self, _, ref, val): + return xen_api_success(self._get_network(ref).set_name_description(val)) + + def network_set_default_gateway(self, _, ref, val): + return xen_api_success(self._get_network(ref).set_default_gateway(val)) + + def network_set_default_netmask(self, _, ref, val): + return xen_api_success(self._get_network(ref).set_default_netmask(val)) + + + # Xen API: Class PIF + # ---------------------------------------------------------------- + + PIF_attr_ro = ['io_read_kbs', + 'io_write_kbs'] + PIF_attr_rw = ['name', + 'network', + 'host', + 'MAC', + 'MTU', + 'VLAN'] + + PIF_attr_inst = PIF_attr_rw + + PIF_methods = ['create_VLAN'] + + def _get_PIF(self, ref): + return XendNode.instance().pifs[ref] + + def PIF_create(self, _, name, network_uuid, host_uuid, mac, mtu, vlan): + try: + node = XendNode.instance() + if host_uuid != node.uuid: + return xen_api_error(['HOST_HANDLE_INVALID', host_uuid]) + + elif _is_valid_ref(network_uuid, node.is_valid_network): + network = node.get_network(network_uuid) + return xen_api_success(node.PIF_create(name, mtu, vlan, mac, + network)) + else: + return xen_api_error(['NETWORK_HANDLE_INVALID', network_uuid]) + except NetworkAlreadyConnected, exn: + return xen_api_error(['NETWORK_ALREADY_CONNECTED', + network_uuid, exn.pif_uuid]) + + def PIF_destroy(self, _, ref): + return xen_api_success(XendNode.instance().PIF_destroy(ref)) + + # object methods + def PIF_get_record(self, _, ref): + return xen_api_success(self._get_PIF(ref).get_record()) + + def PIF_get_all(self, _): + return xen_api_success(XendNode.instance().pifs.keys()) + + def PIF_get_name(self, _, ref): + return xen_api_success(self._get_PIF(ref).name) + + def PIF_get_network(self, _, ref): + return xen_api_success(self._get_PIF(ref).network.uuid) + + def PIF_get_host(self, _, ref): + return xen_api_success(self._get_PIF(ref).host.uuid) + + def PIF_get_MAC(self, _, ref): + return xen_api_success(self._get_PIF(ref).mac) + + def PIF_get_MTU(self, _, ref): + return xen_api_success(self._get_PIF(ref).mtu) + + def PIF_get_VLAN(self, _, ref): + return xen_api_success(self._get_PIF(ref).vlan) + + def PIF_get_io_read_kbs(self, _, ref): + return xen_api_success(self._get_PIF(ref).get_io_read_kbs()) + + def PIF_get_io_write_kbs(self, _, ref): + return xen_api_success(self._get_PIF(ref).get_io_write_kbs()) + + def PIF_set_name(self, _, ref, name): + return xen_api_success(self._get_PIF(ref).set_name(name)) + + def PIF_set_MAC(self, _, ref, mac): + return xen_api_success(self._get_PIF(ref).set_mac(mac)) + + def PIF_set_MTU(self, _, ref, mtu): + return xen_api_success(self._get_PIF(ref).set_mtu(mtu)) + + def PIF_create_VLAN(self, _, ref, network, vlan): + try: + if _is_valid_ref(network, XendNode.instance().is_valid_network): + return xen_api_success(XendNode.instance().PIF_create_VLAN( + ref, network, vlan)) + else: + return xen_api_error(['NETWORK_HANDLE_INVALID', network]) + except NetworkAlreadyConnected, exn: + return xen_api_error(['NETWORK_ALREADY_CONNECTED', + network, exn.pif_uuid]) + + # Xen API: Class VM # ---------------------------------------------------------------- @@ -833,55 +883,55 @@ class XendAPI: dom.setName(label) return xen_api_success_void() - def VM_set_name_description(self, session, vm_ref): + def VM_set_name_description(self, session, vm_ref, desc): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) - return xen_api_success_void() + return xen_api_todo() - def VM_set_user_version(self, session, vm_ref): + def VM_set_user_version(self, session, vm_ref, ver): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) - return xen_api_success_void() + return xen_api_todo() - def VM_set_is_a_template(self, session, vm_ref): + def VM_set_is_a_template(self, session, vm_ref, is_template): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) - return xen_api_success_void() + return xen_api_todo() - def VM_set_memory_dynamic_max(self, session, vm_ref): + def VM_set_memory_dynamic_max(self, session, vm_ref, mem): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) - return xen_api_success_void() + return xen_api_todo() - def VM_set_memory_dynamic_min(self, session, vm_ref): + def VM_set_memory_dynamic_min(self, session, vm_ref, mem): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) - return xen_api_success_void() + return xen_api_todo() - def VM_set_VCPUs_policy(self, session, vm_ref): + def VM_set_VCPUs_policy(self, session, vm_ref, policy): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) - return xen_api_success_void() + return xen_api_todo() - def VM_set_VCPUs_params(self, session, vm_ref): + def VM_set_VCPUs_params(self, session, vm_ref, params): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) - return xen_api_success_void() + return xen_api_todo() - def VM_set_VCPUs_features_force_on(self, session, vm_ref): + def VM_set_VCPUs_features_force_on(self, session, vm_ref, features): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) - return xen_api_success_void() + return xen_api_todo() - def VM_set_VCPUs_features_force_off(self, session, vm_ref): + def VM_set_VCPUs_features_force_off(self, session, vm_ref, features): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) - return xen_api_success_void() + return xen_api_todo() - def VM_set_actions_after_shutdown(self, session, vm_ref): + def VM_set_actions_after_shutdown(self, session, vm_ref, action): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) - return xen_api_success_void() + return xen_api_todo() - def VM_set_actions_after_reboot(self, session, vm_ref): + def VM_set_actions_after_reboot(self, session, vm_ref, action): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) - return xen_api_success_void() + return xen_api_todo() - def VM_set_actions_after_suspend(self, session, vm_ref): + def VM_set_actions_after_suspend(self, session, vm_ref, action): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) - return xen_api_success_void() + return xen_api_todo() - def VM_set_actions_after_crash(self, session, vm_ref): + def VM_set_actions_after_crash(self, session, vm_ref, action): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) return xen_api_success_void() @@ -905,27 +955,27 @@ class XendAPI: def VM_set_platform_std_VGA(self, session, vm_ref): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) - return xen_api_success_void() + return xen_api_todo() - def VM_set_platform_serial(self, session, vm_ref): + def VM_set_platform_serial(self, session, vm_ref, serial): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) - return xen_api_success_void() + return xen_api_todo() - def VM_set_platform_localtime(self, session, vm_ref): + def VM_set_platform_localtime(self, session, vm_ref, localtime): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) - return xen_api_success_void() + return xen_api_todo() - def VM_set_platform_clock_offset(self, session, vm_ref): + def VM_set_platform_clock_offset(self, session, vm_ref, clock_offset): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) - return xen_api_success_void() + return xen_api_todo() - def VM_set_platform_enable_audio(self, session, vm_ref): + def VM_set_platform_enable_audio(self, session, vm_ref, enable_audio): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) - return xen_api_success_void() + return xen_api_todo() def VM_set_otherConfig(self, session, vm_ref): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) - return xen_api_success_void() + return xen_api_todo() # class methods def VM_get_all(self, session): @@ -937,7 +987,7 @@ class XendAPI: dom = xendom.domain_lookup_nr(label) if dom: return xen_api_success([dom.get_uuid()]) - return xen_api_error(XEND_ERROR_VM_INVALID) + return xen_api_success([]) def VM_create(self, session, vm_struct): xendom = XendDomain.instance() @@ -949,7 +999,7 @@ class XendAPI: xendom = XendDomain.instance() xeninfo = xendom.get_vm_by_uuid(vm_ref) if not xeninfo: - return xen_api_error(XEND_ERROR_VM_INVALID) + return xen_api_error(['VM_HANDLE_INVALID', vm_ref]) record = { 'uuid': xeninfo.get_uuid(), @@ -1051,10 +1101,10 @@ class XendAPI: xendom = XendDomain.instance() vm = xendom.get_vm_with_dev_uuid('vbd', vbd_ref) if not vm: - return xen_api_error(XEND_ERROR_VBD_INVALID) + return xen_api_error(['VBD_HANDLE_INVALID', vbd_ref]) cfg = vm.get_dev_xenapi_config('vbd', vbd_ref) if not cfg: - return xen_api_error(XEND_ERROR_VBD_INVALID) + return xen_api_error(['VBD_HANDLE_INVALID', vbd_ref]) valid_vbd_keys = self.VBD_attr_ro + self.VBD_attr_rw + \ self.Base_attr_ro + self.Base_attr_rw @@ -1073,7 +1123,7 @@ class XendAPI: def VBD_create(self, session, vbd_struct): xendom = XendDomain.instance() if not xendom.is_valid_vm(vbd_struct['VM']): - return xen_api_error(XEND_ERROR_DOMAIN_INVALID) + return xen_api_error(['VM_HANDLE_INVALID', vbd_struct['VM']]) dom = xendom.get_vm_by_uuid(vbd_struct['VM']) vbd_ref = '' @@ -1087,7 +1137,7 @@ class XendAPI: sr = XendNode.instance().get_sr() vdi_image = sr.xen_api_get_by_uuid(vdi_ref) if not vdi_image: - return xen_api_error(XEND_ERROR_VDI_INVALID) + return xen_api_error(['VDI_HANDLE_INVALID', vdi_ref]) vdi_image = vdi_image.qcow_path vbd_ref = dom.create_vbd_with_vdi(vbd_struct, vdi_image) except XendError: @@ -1137,10 +1187,10 @@ class XendAPI: xendom = XendDomain.instance() vm = xendom.get_vm_with_dev_uuid('vif', vif_ref) if not vm: - return xen_api_error(XEND_ERROR_VIF_INVALID) + return xen_api_error(['VIF_HANDLE_INVALID', vif_ref]) cfg = vm.get_dev_xenapi_config('vif', vif_ref) if not cfg: - return xen_api_error(XEND_ERROR_VIF_INVALID) + return xen_api_error(['VIF_HANDLE_INVALID', vif_ref]) valid_vif_keys = self.VIF_attr_ro + self.VIF_attr_rw + \ self.Base_attr_ro + self.Base_attr_rw @@ -1164,7 +1214,7 @@ class XendAPI: except XendError: return xen_api_error(XEND_ERROR_TODO) else: - return xen_api_error(XEND_ERROR_DOMAIN_INVALID) + return xen_api_error(['VM_HANDLE_INVALID', vif_struct['VM']]) # Xen API: Class VDI @@ -1185,74 +1235,54 @@ class XendAPI: VDI_methods = ['snapshot'] VDI_funcs = ['get_by_name_label'] + + def _get_VDI(self, ref): + return XendNode.instance().get_sr().xen_api_get_by_uuid(ref) def VDI_get_VBDs(self, session, vdi_ref): return xen_api_todo() def VDI_get_physical_utilisation(self, session, vdi_ref): - sr = XendNode.instance().get_sr() - image = sr.xen_api_get_by_uuid(vdi_ref) - return xen_api_success(image.get_physical_utilisation()) + return xen_api_success(self._get_VDI(vdi_ref). + get_physical_utilisation()) def VDI_get_sector_size(self, session, vdi_ref): - sr = XendNode.instance().get_sr() - image = sr.xen_api_get_by_uuid(vdi_ref) - return xen_api_success(image.sector_size) + return xen_api_success(self._get_VDI(vdi_ref).sector_size) def VDI_get_type(self, session, vdi_ref): - sr = XendNode.instance().get_sr() - image = sr.xen_api_get_by_uuid(vdi_ref) - return xen_api_success(image.type) + return xen_api_success(self._get_VDI(vdi_ref).type) def VDI_get_parent(self, session, vdi_ref): - sr = XendNode.instance().get_sr() - image = sr.xen_api_get_by_uuid(vdi_ref) - return xen_api_success(image.parent) + return xen_api_success(self._get_VDI(vdi_ref).parent) def VDI_get_children(self, session, vdi_ref): - sr = XendNode.instance().get_sr() - image = sr.xen_api_get_by_uuid(vdi_ref) - return xen_api_success(image.children) + return xen_api_success(self._get_VDI(vdi_ref).children) def VDI_get_name_label(self, session, vdi_ref): - sr = XendNode.instance().get_sr() - image = sr.xen_api_get_by_uuid(vdi_ref) - return xen_api_success(image.name_label) + return xen_api_success(self._get_VDI(vdi_ref).name_label) def VDI_get_name_description(self, session, vdi_ref): - sr = XendNode.instance().get_sr() - image = sr.xen_api_get_by_uuid(vdi_ref) - return xen_api_success(image.name_description) + return xen_api_success(self._get_VDI(vdi_ref).name_description) def VDI_get_SR(self, session, vdi_ref): sr = XendNode.instance().get_sr() return xen_api_success(sr.uuid) def VDI_get_virtual_size(self, session, vdi_ref): - sr = XendNode.instance().get_sr() - image = sr.xen_api_get_by_uuid(vdi_ref) - return xen_api_success(image.virtual_size) + return xen_api_success(self._get_VDI(vdi_ref).virtual_size) def VDI_get_sharable(self, session, vdi_ref): - sr = XendNode.instance().get_sr() - image = sr.xen_api_get_by_uuid(vdi_ref) - return xen_api_success(image.sharable) + return xen_api_success(self._get_VDI(vdi_ref).sharable) def VDI_get_read_only(self, session, vdi_ref): - sr = XendNode.instance().get_sr() - image = sr.xen_api_get_by_uuid(vdi_ref) - return xen_api_success(image.sharable) + return xen_api_success(self._get_VDI(vdi_ref).read_only) def VDI_set_name_label(self, session, vdi_ref, value): - sr = XendNode.instance().get_sr() - image = sr.xen_api_get_by_uuid(vdi_ref) - image.name_label = value + self._get_VDI(vdi_ref).name_label = value return xen_api_success_void() def VDI_set_name_description(self, session, vdi_ref, value): - sr = XendNode.instance().get_sr() - image = sr.xen_api_get_by_uuid(vdi_ref) - image.name_description = value + self._get_VDI(vdi_ref).name_description = value return xen_api_success_void() def VDI_set_SR(self, session, vdi_ref, value): @@ -1262,9 +1292,12 @@ class XendAPI: return xen_api_error(XEND_ERROR_UNSUPPORTED) def VDI_set_sharable(self, session, vdi_ref, value): - return xen_api_todo() + self._get_VDI(vdi_ref).sharable = bool(value) + return xen_api_success_void() + def VDI_set_read_only(self, session, vdi_ref, value): - return xen_api_todo() + self._get_VDI(vdi_ref).read_only = bool(value) + return xen_api_success_void() # Object Methods def VDI_snapshot(self, session, vdi_ref): @@ -1278,31 +1311,28 @@ class XendAPI: def VDI_get_record(self, session, vdi_ref): sr = XendNode.instance().get_sr() image = sr.xen_api_get_by_uuid(vdi_ref) - if image: - return xen_api_success({ - 'uuid': vdi_ref, - 'name_label': image.name_label, - 'name_description': image.name_description, - 'SR': sr.uuid, - 'VBDs': [], # TODO - 'virtual_size': image.virtual_size, - 'physical_utilisation': image.physical_utilisation, - 'sector_size': image.sector_size, - 'type': image.type, - 'parent': image.parent, - 'children': image.children, - 'sharable': image.sharable, - 'read_only': image.read_only, - }) - - return xen_api_error(XEND_ERROR_VDI_INVALID) + return xen_api_success({ + 'uuid': vdi_ref, + 'name_label': image.name_label, + 'name_description': image.name_description, + 'SR': sr.uuid, + 'VBDs': [], # TODO + 'virtual_size': image.virtual_size, + 'physical_utilisation': image.physical_utilisation, + 'sector_size': image.sector_size, + 'type': image.type, + 'parent': image.parent, + 'children': image.children, + 'sharable': image.sharable, + 'read_only': image.read_only, + }) # Class Functions def VDI_create(self, session, vdi_struct): sr = XendNode.instance().get_sr() sr_ref = vdi_struct['SR'] if sr.uuid != sr_ref: - return xen_api_error(XEND_ERROR_SR_INVALID) + return xen_api_error(['SR_HANDLE_INVALID', vdi_struct['SR']]) vdi_uuid = sr.create_image(vdi_struct) return xen_api_success(vdi_uuid) @@ -1315,9 +1345,8 @@ class XendAPI: sr = XendNode.instance().get_sr() image_uuid = sr.xen_api_get_by_name_label(name) if image_uuid: - return xen_api_success(image_uuid) - - return xen_api_error(XEND_ERROR_VDI_INVALID) + return xen_api_success([image_uuid]) + return xen_api_success([]) # Xen API: Class VTPM @@ -1336,10 +1365,10 @@ class XendAPI: xendom = XendDomain.instance() vm = xendom.get_vm_with_dev_uuid('vtpm', vtpm_ref) if not vm: - return xen_api_error(XEND_ERROR_VTPM_INVALID) + return xen_api_error(['VTPM_HANDLE_INVALID', vtpm_ref]) cfg = vm.get_dev_xenapi_config('vtpm', vtpm_ref) if not cfg: - return xen_api_error(XEND_ERROR_VTPM_INVALID) + return xen_api_error(['VTPM_HANDLE_INVALID', vtpm_ref]) valid_vtpm_keys = self.VTPM_attr_ro + self.VTPM_attr_rw + \ self.Base_attr_ro + self.Base_attr_rw for k in cfg.keys(): @@ -1353,10 +1382,10 @@ class XendAPI: xendom = XendDomain.instance() vm = xendom.get_vm_with_dev_uuid('vtpm', vtpm_ref) if not vm: - return xen_api_error(XEND_ERROR_VTPM_INVALID) + return xen_api_error(['VTPM_HANDLE_INVALID', vtpm_ref]) cfg = vm.get_dev_xenapi_config('vtpm', vtpm_ref) if not cfg: - return xen_api_error(XEND_ERROR_VTPM_INVALID) + return xen_api_error(['VTPM_HANDLE_INVALID', vtpm_ref]) if cfg.has_key('instance'): instance = cfg['instance'] else: @@ -1367,10 +1396,10 @@ class XendAPI: xendom = XendDomain.instance() vm = xendom.get_vm_with_dev_uuid('vtpm', vtpm_ref) if not vm: - return xen_api_error(XEND_ERROR_VTPM_INVALID) + return xen_api_error(['VTPM_HANDLE_INVALID', vtpm_ref]) cfg = vm.get_dev_xenapi_config('vtpm', vtpm_ref) if not cfg: - return xen_api_error(XEND_ERROR_VTPM_INVALID) + return xen_api_error(['VTPM_HANDLE_INVALID', vtpm_ref]) if cfg.has_key('type'): driver = cfg['type'] else: @@ -1381,10 +1410,10 @@ class XendAPI: xendom = XendDomain.instance() vm = xendom.get_vm_with_dev_uuid('vtpm', vtpm_ref) if not vm: - return xen_api_error(XEND_ERROR_VTPM_INVALID) + return xen_api_error(['VTPM_HANDLE_INVALID', vtpm_ref]) cfg = vm.get_dev_xenapi_config('vtpm', vtpm_ref) if not cfg: - return xen_api_error(XEND_ERROR_VTPM_INVALID) + return xen_api_error(['VTPM_HANDLE_INVALID', vtpm_ref]) if cfg.has_key('backend'): backend = cfg['backend'] else: @@ -1407,7 +1436,7 @@ class XendAPI: except XendError: return xen_api_error(XEND_ERROR_TODO) else: - return xen_api_error(XEND_ERROR_DOMAIN_INVALID) + return xen_api_error(['VM_HANDLE_INVALID', vtpm_struct['VM']]) # Xen API: Class SR @@ -1439,7 +1468,7 @@ class XendAPI: def SR_get_by_name_label(self, session, label): sr = XendNode.instance().get_sr() if sr.name_label != label: - return xen_api_error(XEND_ERROR_SR_INVALID) + return xen_api_success([]) return xen_api_success([sr.uuid]) def SR_create(self, session): @@ -1457,61 +1486,143 @@ class XendAPI: def SR_get_record(self, session, sr_ref): sr = XendNode.instance().get_sr() - return xen_api_success({ - 'uuid': sr.uuid, - 'name_label': sr.name_label, - 'name_description': sr.name_description, - 'VDIs': sr.list_images(), - 'virtual_allocation': sr.used_space_bytes(), - 'physical_utilisation': sr.used_space_bytes(), - 'physical_size': sr.total_space_bytes(), - 'type': sr.type, - 'location': sr.location - }) + return xen_api_success(sr.get_record()) # Attribute acceess - def SR_get_VDIs(self, session, sr_ref): - sr = XendNode.instance().get_sr() - return xen_api_success(sr.list_images()) - def SR_get_virtual_allocation(self, session, sr_ref): - sr = XendNode.instance().get_sr() - return sr.used_space_bytes() + def _get_SR_func(self, _, func): + return xen_api_success(getattr(XendNode.instance().get_sr(), func)()) - def SR_get_physical_utilisation(self, session, sr_ref): - sr = XendNode.instance().get_sr() - return sr.used_space_bytes() + def _get_SR_attr(self, _, attr): + return xen_api_success(getattr(XendNode.instance().get_sr(), attr)) - def SR_get_physical_size(self, session, sr_ref): - sr = XendNode.instance().get_sr() - return sr.total_space_bytes() + def SR_get_VDIs(self, _, ref): + return self._get_SR_func(ref, 'list_images') + + def SR_get_virtual_allocation(self, _, ref): + return self._get_SR_func(ref, 'virtual_allocation') + + def SR_get_physical_utilisation(self, _, ref): + return self._get_SR_func(ref, 'used_space_bytes') + + def SR_get_physical_size(self, _, ref): + return self._get_SR_func(ref, 'total_space_bytes') - def SR_get_type(self, session, sr_ref): - sr = XendNode.instance().get_sr() - return xen_api_success(sr.type) + def SR_get_type(self, _, ref): + return self._get_SR_attr(ref, 'type') - def SR_get_location(self, session, sr_ref): - sr = XendNode.instance().get_sr() - return xen_api_success(sr.location) + def SR_get_location(self, _, ref): + return self._get_SR_attr(ref, 'location') - def SR_get_name_label(self, session, sr_ref): - sr = XendNode.instance().get_sr() - return xen_api_success(sr.name_label) + def SR_get_name_label(self, _, ref): + return self._get_SR_attr(ref, 'name_label') - def SR_get_name_description(self, session, sr_ref): - sr = XendNode.instance().get_sr() - return xen_api_success(sr.name_description) + def SR_get_name_description(self, _, ref): + return self._get_SR_attr(ref, 'name_description') def SR_set_name_label(self, session, sr_ref, value): sr = XendNode.instance().get_sr() sr.name_label = value + XendNode.instance().save() return xen_api_success_void() def SR_set_name_description(self, session, sr_ref, value): sr = XendNode.instance().get_sr() sr.name_description = value + XendNode.instance().save() return xen_api_success_void() + +def _decorate(): + """Initialise Xen API wrapper by making sure all functions + have the correct validation decorators such as L{valid_host} + and L{session_required}. + """ + + global_validators = [session_required, catch_typeerror] + classes = { + 'session' : None, + 'host' : valid_host, + 'host_cpu': valid_host_cpu, + 'network' : valid_network, + 'VM' : valid_vm, + 'VBD' : valid_vbd, + 'VIF' : valid_vif, + 'VDI' : valid_vdi, + 'VTPM' : valid_vtpm, + 'SR' : valid_sr, + 'PIF' : valid_pif + } + + # Cheat methods + # ------------- + # Methods that have a trivial implementation for all classes. + # 1. get_by_uuid == getting by ref, so just return uuid for + # all get_by_uuid() methods. + + for cls in classes.keys(): + get_by_uuid = '%s_get_by_uuid' % cls + get_uuid = '%s_get_uuid' % cls + def _get_by_uuid(_1, _2, ref): + return xen_api_success(ref) + + def _get_uuid(_1, _2, ref): + return xen_api_success(ref) + + setattr(XendAPI, get_by_uuid, _get_by_uuid) + setattr(XendAPI, get_uuid, _get_uuid) + + # 2. get_record is just getting all the attributes, so provide + # a fake template implementation. + # + # TODO: ... + + + # Wrapping validators around XMLRPC calls + # --------------------------------------- + + for cls, validator in classes.items(): + def doit(n, takes_instance): + n_ = n.replace('.', '_') + try: + f = getattr(XendAPI, n_) + argcounts[n] = f.func_code.co_argcount - 1 + + validators = takes_instance and validator and [validator] \ + or [] + validators += global_validators + for v in validators: + f = v(f) + f.api = n + setattr(XendAPI, n_, f) + except AttributeError: + log.warn("API call: %s not found" % n) + + + ro_attrs = getattr(XendAPI, '%s_attr_ro' % cls, []) + rw_attrs = getattr(XendAPI, '%s_attr_rw' % cls, []) + methods = getattr(XendAPI, '%s_methods' % cls, []) + funcs = getattr(XendAPI, '%s_funcs' % cls, []) + + # wrap validators around readable class attributes + for attr_name in ro_attrs + rw_attrs + XendAPI.Base_attr_ro: + doit('%s.get_%s' % (cls, attr_name), True) + + # wrap validators around writable class attrributes + for attr_name in rw_attrs + XendAPI.Base_attr_rw: + doit('%s.set_%s' % (cls, attr_name), True) + + # wrap validators around methods + for method_name in methods + XendAPI.Base_methods: + doit('%s.%s' % (cls, method_name), True) + + # wrap validators around class functions + for func_name in funcs + XendAPI.Base_funcs: + doit('%s.%s' % (cls, func_name), False) + +_decorate() + + # # Auto generate some stubs based on XendAPI introspection # diff --git a/tools/python/xen/xend/XendAPIConstants.py b/tools/python/xen/xend/XendAPIConstants.py index 875f948bcf..78c9f1e5af 100644 --- a/tools/python/xen/xend/XendAPIConstants.py +++ b/tools/python/xen/xend/XendAPIConstants.py @@ -41,7 +41,7 @@ XEN_API_CPU_FEATURE = [ 'CMOV', 'PAT', 'PSE36', 'PN', 'CLFLSH', 'DTES', 'ACPI', 'MMX', 'FXCR', 'XMM', 'XMM2', 'SELFSNOOP', 'HT', 'ACC', 'IA64', 'SYSCALL', 'MP', 'NX', 'MMXEXT', - 'LM', '3DNOWEXT', '3DNOW', 'RECOVERY', 'LONGRUN', + 'LM', 'THREEDNOWEXT', 'THREEDNOW', 'RECOVERY', 'LONGRUN', 'LRTI', 'CXMMX', 'K6_MTRR', 'CYRIX_ARR', 'CENTAUR_MCR', 'K8', 'K7', 'P3', 'P4', 'CONSTANT_TSC', 'FXSAVE_LEAK', 'XMM3', 'MWAIT', 'DSCPL', 'EST', 'TM2', 'CID', 'CX16', @@ -73,3 +73,4 @@ XEN_API_BOOT_TYPE = [ XEN_API_VBD_MODE = ['RO', 'RW'] XEN_API_VDI_TYPE = ['system', 'user', 'ephemeral'] XEN_API_DRIVER_TYPE = ['ioemu', 'paravirtualised'] +XEN_API_VBD_TYPE = ['CD', 'Disk'] diff --git a/tools/python/xen/xend/XendAuthSessions.py b/tools/python/xen/xend/XendAuthSessions.py index 4081177597..5c9a4e9a2f 100644 --- a/tools/python/xen/xend/XendAuthSessions.py +++ b/tools/python/xen/xend/XendAuthSessions.py @@ -130,16 +130,3 @@ def instance(): inst = XendAuthSessions() inst.init() return inst - -# Handy Authentication Decorators -# ------------------------------- -def session_required(func): - def check_session(self, session, *args, **kwargs): - if instance().is_session_valid(session): - return func(self, session, *args, **kwargs) - else: - return {'Status': 'Failure', - 'ErrorDescription': XEND_ERROR_SESSION_INVALID} - return check_session - - diff --git a/tools/python/xen/xend/XendBootloader.py b/tools/python/xen/xend/XendBootloader.py index 170b637222..66d8427bff 100644 --- a/tools/python/xen/xend/XendBootloader.py +++ b/tools/python/xen/xend/XendBootloader.py @@ -53,6 +53,12 @@ def bootloader(blexec, disk, quiet = False, blargs = '', kernel = '', child = os.fork() if (not child): args = [ blexec ] + if kernel: + args.append("--kernel=%s" % kernel) + if ramdisk: + args.append("--ramdisk=%s" % ramdisk) + if kernel_args: + args.append("--args=%s" % kernel_args) if quiet: args.append("-q") args.append("--output=%s" % fifo) diff --git a/tools/python/xen/xend/XendConfig.py b/tools/python/xen/xend/XendConfig.py index 8dd17c4329..cfd2c20e6e 100644 --- a/tools/python/xen/xend/XendConfig.py +++ b/tools/python/xen/xend/XendConfig.py @@ -93,6 +93,8 @@ XENAPI_CFG_TO_LEGACY_CFG = { 'actions_after_reboot': 'on_reboot', 'actions_after_crash': 'on_crash', 'platform_localtime': 'localtime', + 'PV_bootloader': 'bootloader', + 'PV_bootloader_args': 'bootloader_args', } LEGACY_CFG_TO_XENAPI_CFG = reverse_dict(XENAPI_CFG_TO_LEGACY_CFG) @@ -139,7 +141,7 @@ XENAPI_CFG_TYPES = { 'tpm_backend': int, 'PV_bootloader': str, 'PV_kernel': str, - 'PV_initrd': str, + 'PV_ramdisk': str, 'PV_args': str, 'PV_bootloader_args': str, 'HVM_boot': str, @@ -297,7 +299,7 @@ class XendConfig(dict): self._sxp_to_xapi_unsupported(sxp_obj) elif xapi: self.update_with_xenapi_config(xapi) - self._add_xapi_unsupported() + self._add_xapi_unsupported(xapi) elif dominfo: # output from xc.domain_getinfo self._dominfo_to_xapi(dominfo) @@ -506,8 +508,12 @@ class XendConfig(dict): pci_devs = [] for pci_dev in sxp.children(config, 'dev'): pci_dev_info = {} - for opt, val in pci_dev[1:]: - pci_dev_info[opt] = val + for opt_val in pci_dev[1:]: + try: + opt, val = opt_val + pci_dev_info[opt] = val + except TypeError: + pass pci_devs.append(pci_dev_info) cfg['devices'][pci_devs_uuid] = (dev_type, @@ -570,7 +576,6 @@ class XendConfig(dict): if 'security' in cfg and isinstance(cfg['security'], str): cfg['security'] = sxp.from_string(cfg['security']) - # TODO: get states old_state = sxp.child_value(sxp_cfg, 'state') if old_state: for i in range(len(CONFIG_OLD_DOM_STATES)): @@ -715,6 +720,8 @@ class XendConfig(dict): _set_cfg_if_exists('on_xend_start') _set_cfg_if_exists('vcpu_avail') _set_cfg_if_exists('max_vcpu_id') # needed for vcpuDomDetails + _set_cfg_if_exists('cpu_weight') + _set_cfg_if_exists('cpu_cap') # Parse and store runtime configuration _set_cfg_if_exists('start_time') @@ -724,19 +731,22 @@ class XendConfig(dict): _set_cfg_if_exists('up_time') _set_cfg_if_exists('status') # TODO, deprecated - def _add_xapi_unsupported(self): + def _add_xapi_unsupported(self, xapi_dict): """Updates the configuration object with entries that are not officially supported by the Xen API but is required for the rest of Xend to function. """ # populate image - hvm = self['HVM_boot'] != '' - self['image']['type'] = hvm and 'hvm' or 'linux' - if hvm: - self['image']['hvm'] = {} - for xapi, cfgapi in XENAPI_HVM_CFG.items(): - self['image']['hvm'][cfgapi] = self[xapi] + if 'image' in xapi_dict: + self['image'].update(xapi_dict['image']) + else: + hvm = self['HVM_boot'] != '' + self['image']['type'] = hvm and 'hvm' or 'linux' + if hvm: + self['image']['hvm'] = {} + for xapi, cfgapi in XENAPI_HVM_CFG.items(): + self['image']['hvm'][cfgapi] = self[xapi] def _get_old_state_string(self): @@ -848,14 +858,15 @@ class XendConfig(dict): for cls in XendDevices.valid_devices(): found = False - # figure if there is a device that is running - if domain: + # figure if there is a dev controller is valid and running + if domain and domain.getDomid() != None: try: controller = domain.getDeviceController(cls) configs = controller.configurations() for config in configs: sxpr.append(['device', config]) - found = True + + found = True except: log.exception("dumping sxp from device controllers") pass @@ -916,11 +927,12 @@ class XendConfig(dict): dev_type = sxp.name(config) dev_info = {} - try: - for opt, val in config[1:]: + for opt_val in config[1:]: + try: + opt, val = opt_val dev_info[opt] = val - except ValueError: - pass # SXP has no options for this device + except (TypeError, ValueError): # unpack error + pass if dev_type == 'vbd': if dev_info.get('dev', '').startswith('ioemu:'): @@ -989,7 +1001,7 @@ class XendConfig(dict): self['vbd_refs'].append(dev_uuid) return dev_uuid - elif dev_type in ('vtpm'): + elif dev_type == 'vtpm': if cfg_xenapi.get('type'): dev_info['type'] = cfg_xenapi.get('type') @@ -1012,11 +1024,12 @@ class XendConfig(dict): dev_type = sxp.name(config) dev_info = {} - try: - for opt, val in config[1:]: - self['devices'][opt] = val - except ValueError: - pass # SXP has no options for this device + for opt_val in config[1:]: + try: + opt, val = opt_val + self['devices'][dev_uuid][opt] = val + except (TypeError, ValueError): + pass # no value for this config option return True @@ -1095,23 +1108,34 @@ class XendConfig(dict): return image - def update_with_image_sxp(self, image_sxp): + def update_with_image_sxp(self, image_sxp, bootloader = False): # Convert Legacy "image" config to Xen API PV_* # configuration log.debug("update_with_image_sxp(%s)" % scrub_password(image_sxp)) - self['PV_kernel'] = sxp.child_value(image_sxp, 'kernel','') - self['PV_ramdisk'] = sxp.child_value(image_sxp, 'ramdisk','') - kernel_args = sxp.child_value(image_sxp, 'args', '') - + kernel_args = "" + # attempt to extract extra arguments from SXP config arg_ip = sxp.child_value(image_sxp, 'ip') if arg_ip and not re.search(r'ip=[^ ]+', kernel_args): - kernel_args += ' ip=%s' % arg_ip + kernel_args += 'ip=%s ' % arg_ip arg_root = sxp.child_value(image_sxp, 'root') if arg_root and not re.search(r'root=', kernel_args): - kernel_args += ' root=%s' % arg_root - self['PV_args'] = kernel_args + kernel_args += 'root=%s ' % arg_root + + # user-specified args must come last: previous releases did this and + # some domU kernels rely upon the ordering. + kernel_args += sxp.child_value(image_sxp, 'args', '') + + if bootloader: + self['_temp_using_bootloader'] = '1' + self['_temp_kernel'] = sxp.child_value(image_sxp, 'kernel','') + self['_temp_ramdisk'] = sxp.child_value(image_sxp, 'ramdisk','') + self['_temp_args'] = kernel_args + else: + self['PV_kernel'] = sxp.child_value(image_sxp, 'kernel','') + self['PV_ramdisk'] = sxp.child_value(image_sxp, 'ramdisk','') + self['PV_args'] = kernel_args # Store image SXP in python dictionary format image = {} diff --git a/tools/python/xen/xend/XendConstants.py b/tools/python/xen/xend/XendConstants.py index e07fa127f1..e42216d06b 100644 --- a/tools/python/xen/xend/XendConstants.py +++ b/tools/python/xen/xend/XendConstants.py @@ -80,6 +80,7 @@ ZOMBIE_PREFIX = 'Zombie-' MINIMUM_RESTART_TIME = 20 RESTART_IN_PROGRESS = 'xend/restart_in_progress' +LAST_SHUTDOWN_REASON = 'xend/last_shutdown_reason' # # Device migration stages (eg. XendDomainInfo, XendCheckpoint, server.tpmif) diff --git a/tools/python/xen/xend/XendDomain.py b/tools/python/xen/xend/XendDomain.py index 81c39384d4..4257057398 100644 --- a/tools/python/xen/xend/XendDomain.py +++ b/tools/python/xen/xend/XendDomain.py @@ -36,6 +36,7 @@ from xen.xend import XendRoot, XendCheckpoint, XendDomainInfo from xen.xend.PrettyPrint import prettyprint from xen.xend.XendConfig import XendConfig from xen.xend.XendError import XendError, XendInvalidDomain, VmError +from xen.xend.XendError import VMBadState from xen.xend.XendLogging import log from xen.xend.XendAPIConstants import XEN_API_VM_POWER_STATE from xen.xend.XendConstants import XS_VMROOT @@ -376,7 +377,7 @@ class XendDomain: dom0.setVCpuCount(target) - def _refresh(self): + def _refresh(self, refresh_shutdown = True): """Refresh the domain list. Needs to be called when either xenstore has changed or when a method requires up to date information (like uptime, cputime stats). @@ -392,7 +393,7 @@ class XendDomain: for dom in running: domid = dom['domid'] if domid in self.domains: - self.domains[domid].update(dom) + self.domains[domid].update(dom, refresh_shutdown) elif domid not in self.domains and dom['dying'] != 1: try: new_dom = XendDomainInfo.recreate(dom, False) @@ -494,7 +495,7 @@ class XendDomain: """ self.domains_lock.acquire() try: - self._refresh() + self._refresh(refresh_shutdown = False) dom = self.domain_lookup_nr(domid) if not dom: raise XendError("No domain named '%s'." % str(domid)) @@ -604,6 +605,16 @@ class XendDomain: finally: self.domains_lock.release() + def get_all_vms(self): + self.domains_lock.acquire() + try: + result = self.domains.values() + result += [x for x in self.managed_domains.values() if + x not in result] + return result + finally: + self.domains_lock.release() + def get_vm_by_uuid(self, vm_uuid): self.domains_lock.acquire() try: @@ -720,7 +731,7 @@ class XendDomain: self.domains_lock.acquire() try: - self._refresh() + self._refresh(refresh_shutdown = False) # active domains active_domains = self.domains.values() @@ -782,7 +793,9 @@ class XendDomain: raise XendError("Cannot save privileged domain %s" % domname) if dominfo.state != DOM_STATE_RUNNING: - raise XendError("Cannot suspend domain that is not running.") + raise VMBadState("Domain is not running", + POWER_STATE_NAMES[DOM_STATE_RUNNING], + POWER_STATE_NAMES[dominfo.state]) dom_uuid = dominfo.get_uuid() @@ -869,6 +882,26 @@ class XendDomain: self.domains_lock.release() + def domain_create_from_dict(self, config_dict): + """Create a domain from a configuration dictionary. + + @param config_dict: configuration + @rtype: XendDomainInfo + """ + self.domains_lock.acquire() + try: + self._refresh() + + dominfo = XendDomainInfo.create_from_dict(config_dict) + self._add_domain(dominfo) + self.domain_sched_credit_set(dominfo.getDomid(), + dominfo.getWeight(), + dominfo.getCap()) + return dominfo + finally: + self.domains_lock.release() + + def domain_new(self, config): """Create a domain from a configuration but do not start it. @@ -912,7 +945,9 @@ class XendDomain: raise XendInvalidDomain(str(domid)) if dominfo.state != DOM_STATE_HALTED: - raise XendError("Domain is already running") + raise VMBadState("Domain is already running", + POWER_STATE_NAMES[DOM_STATE_HALTED], + POWER_STATE_NAMES[dominfo.state]) dominfo.start(is_managed = True) self._add_domain(dominfo) @@ -940,7 +975,9 @@ class XendDomain: raise XendInvalidDomain(str(domid)) if dominfo.state != DOM_STATE_HALTED: - raise XendError("Domain is still running") + raise VMBadState("Domain is still running", + POWER_STATE_NAMES[DOM_STATE_HALTED], + POWER_STATE_NAMES[dominfo.state]) log.info("Domain %s (%s) deleted." % (dominfo.getName(), dominfo.info.get('uuid'))) diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py index 27dcce7e71..099302f573 100644 --- a/tools/python/xen/xend/XendDomainInfo.py +++ b/tools/python/xen/xend/XendDomainInfo.py @@ -37,7 +37,7 @@ from xen.util import asserts from xen.util.blkif import blkdev_uname_to_file from xen.util import security -from xen.xend import balloon, sxp, uuid, image, arch +from xen.xend import balloon, sxp, uuid, image, arch, osdep from xen.xend import XendRoot, XendNode, XendConfig from xen.xend.XendConfig import scrub_password @@ -81,13 +81,12 @@ log = logging.getLogger("xend.XendDomainInfo") def create(config): """Creates and start a VM using the supplied configuration. - (called from XMLRPCServer directly) @param config: A configuration object involving lists of tuples. @type config: list of lists, eg ['vm', ['image', 'xen.gz']] @rtype: XendDomainInfo - @return: A up and running XendDomainInfo instance + @return: An up and running XendDomainInfo instance @raise VmError: Invalid configuration or failure to start. """ @@ -102,6 +101,28 @@ def create(config): return vm +def create_from_dict(config_dict): + """Creates and start a VM using the supplied configuration. + + @param config_dict: An configuration dictionary. + + @rtype: XendDomainInfo + @return: An up and running XendDomainInfo instance + @raise VmError: Invalid configuration or failure to start. + """ + + log.debug("XendDomainInfo.create_from_dict(%s)", + scrub_password(config_dict)) + vm = XendDomainInfo(XendConfig.XendConfig(xapi = config_dict)) + try: + vm.start() + except: + log.exception('Domain construction failed') + vm.destroy() + raise + + return vm + def recreate(info, priv): """Create the VM object for an existing domain. The domain must not be dying, as the paths in the store should already have been removed, @@ -418,7 +439,7 @@ class XendDomainInfo: def shutdown(self, reason): """Shutdown a domain by signalling this via xenstored.""" - log.debug('XendDomainInfo.shutdown') + log.debug('XendDomainInfo.shutdown(%s)', reason) if self.state in (DOM_STATE_SHUTDOWN, DOM_STATE_HALTED,): raise XendError('Domain cannot be shutdown') @@ -490,7 +511,7 @@ class XendDomainInfo: # look up uuid of the device dev_control = self.getDeviceController(deviceClass) dev_sxpr = dev_control.sxpr(devid) - dev_uuid = sxp.child_value(sxpr, 'uuid') + dev_uuid = sxp.child_value(dev_sxpr, 'uuid') if not dev_uuid: return False @@ -893,7 +914,7 @@ class XendDomainInfo: return self.info.get('cpu_cap', 0) def getWeight(self): - return self.info['cpu_weight'] + return self.info.get('cpu_weight', 256) def setResume(self, state): self._resume = state @@ -948,9 +969,15 @@ class XendDomainInfo: log.warn('Domain has crashed: name=%s id=%d.', self.info['name_label'], self.domid) + self._writeVm(LAST_SHUTDOWN_REASON, 'crash') if xroot.get_enable_dump(): - self.dumpCore() + try: + self.dumpCore() + except XendError: + # This error has been logged -- there's nothing more + # we can do in this context. + pass restart_reason = 'crash' self._stateSet(DOM_STATE_HALTED) @@ -967,6 +994,7 @@ class XendDomainInfo: log.info('Domain has shutdown: name=%s id=%d reason=%s.', self.info['name_label'], self.domid, reason) + self._writeVm(LAST_SHUTDOWN_REASON, reason) self._clearRestart() @@ -1051,12 +1079,6 @@ class XendDomainInfo: """ from xen.xend import XendDomain - config = self.sxpr() - - if self._infoIsSet('cpus') and len(self.info['cpus']) != 0: - config.append(['cpus', reduce(lambda x, y: str(x) + "," + str(y), - self.info['cpus'])]) - if self._readVm(RESTART_IN_PROGRESS): log.error('Xend failed during restart of domain %s. ' 'Refusing to restart to avoid loops.', @@ -1097,7 +1119,8 @@ class XendDomainInfo: new_dom = None try: - new_dom = XendDomain.instance().domain_create(config) + new_dom = XendDomain.instance().domain_create_from_dict( + self.info) new_dom.unpause() rst_cnt = self._readVm('xend/restart_count') rst_cnt = int(rst_cnt) + 1 @@ -1146,7 +1169,10 @@ class XendDomainInfo: # def dumpCore(self, corefile = None): - """Create a core dump for this domain. Nothrow guarantee.""" + """Create a core dump for this domain. + + @raise: XendError if core dumping failed. + """ try: if not corefile: @@ -1539,6 +1565,8 @@ class XendDomainInfo: if boot: # HVM booting. self.info['image']['type'] = 'hvm' + if not 'devices' in self.info['image']: + self.info['image']['devices'] = {} self.info['image']['devices']['boot'] = boot elif not blexec and kernel: # Boot from dom0. Nothing left to do -- the kernel and ramdisk @@ -1547,7 +1575,7 @@ class XendDomainInfo: else: # Boot using bootloader if not blexec or blexec == 'pygrub': - blexec = '/usr/bin/pygrub' + blexec = osdep.pygrub_path blcfg = None for (devtype, devinfo) in self.info.all_devices_sxpr(): @@ -1598,7 +1626,7 @@ class XendDomainInfo: log.error(msg) raise VmError(msg) - self.info.update_with_image_sxp(blcfg) + self.info.update_with_image_sxp(blcfg, True) # @@ -1831,8 +1859,7 @@ class XendDomainInfo: return self.info.get('memory_dynamic_max', 0) def get_memory_dynamic_min(self): return self.info.get('memory_dynamic_min', 0) - - + def get_vcpus_policy(self): sched_id = xc.sched_id_get() if sched_id == xen.lowlevel.xc.XEN_SCHEDULER_SEDF: @@ -1847,8 +1874,6 @@ class XendDomainInfo: return XEN_API_VM_POWER_STATE[self.state] def get_platform_std_vga(self): return self.info.get('platform_std_vga', False) - def get_platform_keymap(self): - return '' def get_platform_serial(self): return self.info.get('platform_serial', '') def get_platform_localtime(self): @@ -1952,8 +1977,19 @@ class XendDomainInfo: config['device'] = 'eth%d' % devid else: config['device'] = '' - - config['network'] = '' # Invalid for Xend + + if not config.has_key('network'): + try: + config['network'] = \ + XendNode.instance().bridge_to_network( + config.get('bridge')).uuid + except Exception: + log.exception('bridge_to_network') + # Ignore this for now -- it may happen if the device + # has been specified using the legacy methods, but at + # some point we're going to have to figure out how to + # handle that properly. + config['MTU'] = 1500 # TODO config['io_read_kbs'] = 0.0 config['io_write_kbs'] = 0.0 @@ -1965,7 +2001,7 @@ class XendDomainInfo: config['image'] = config.get('uname', '') config['io_read_kbs'] = 0.0 config['io_write_kbs'] = 0.0 - if config['mode'] == 'r': + if config.get('mode', 'r') == 'r': config['mode'] = 'RO' else: config['mode'] = 'RW' diff --git a/tools/python/xen/xend/XendError.py b/tools/python/xen/xend/XendError.py index 5947145267..439133d9a0 100644 --- a/tools/python/xen/xend/XendError.py +++ b/tools/python/xen/xend/XendError.py @@ -32,6 +32,17 @@ class XendError(Fault): def __str__(self): return self.value +class VMBadState(XendError): + def __init__(self, value, expected, actual): + XendError.__init__(self, value) + self.expected = expected + self.actual = actual + +class NetworkAlreadyConnected(XendError): + def __init__(self, pif_uuid): + XendError.__init__(self, 'Network already connected') + self.pif_uuid = pif_uuid + class VmError(XendError): """Vm construction error.""" pass diff --git a/tools/python/xen/xend/XendLogging.py b/tools/python/xen/xend/XendLogging.py index 32cd13c5db..e889449fd0 100644 --- a/tools/python/xen/xend/XendLogging.py +++ b/tools/python/xen/xend/XendLogging.py @@ -16,7 +16,10 @@ # Copyright (C) 2005, 2006 XenSource Ltd. #============================================================================ +import inspect import os +import os.path +import sys import stat import tempfile import types @@ -38,6 +41,24 @@ if 'TRACE' not in logging.__dict__: self.log(logging.TRACE, *args, **kwargs) logging.Logger.trace = trace + def findCaller(self): + """ + Override logging.Logger.findCaller so that the above trace function + does not appear as the source of log messages. The signature of this + function changed between Python 2.3 and 2.4. + """ + frames = inspect.stack() + thisfile = os.path.normcase(frames[0][1]) + for frame in frames: + filename = os.path.normcase(frame[1]) + if filename != thisfile and filename != logging._srcfile: + major, minor, _, _, _ = sys.version_info + if major == 2 and minor >= 4: + return filename, frame[2], frame[3] + else: + return filename, frame[2] + logging.Logger.findCaller = findCaller + log = logging.getLogger("xend") @@ -46,7 +67,7 @@ MAX_BYTES = 1 << 20 # 1MB BACKUP_COUNT = 5 STDERR_FORMAT = "[%(name)s] %(levelname)s (%(module)s:%(lineno)d) %(message)s" -LOGFILE_FORMAT = "[%(asctime)s %(name)s %(process)d] %(levelname)s (%(module)s:%(lineno)d) %(message)s" +LOGFILE_FORMAT = "[%(asctime)s %(process)d] %(levelname)s (%(module)s:%(lineno)d) %(message)s" DATE_FORMAT = "%Y-%m-%d %H:%M:%S" diff --git a/tools/python/xen/xend/XendNetwork.py b/tools/python/xen/xend/XendNetwork.py new file mode 100644 index 0000000000..5e606957f1 --- /dev/null +++ b/tools/python/xen/xend/XendNetwork.py @@ -0,0 +1,112 @@ +#============================================================================ +# This library is free software; you can redistribute it and/or +# modify it under the terms of version 2.1 of the GNU Lesser General Public +# License as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +#============================================================================ +# Copyright (c) 2006 Xensource Inc. +#============================================================================ + +import os +import commands +import re +import struct +import socket + +import XendDomain +import XendNode +from XendLogging import log + +IP_ROUTE_RE = r'^default via ([\d\.]+) dev (\w+)' + +def linux_get_default_network(): + """Returns the network details of the host.""" + + ip_cmd = '/sbin/ip route' + rc, output = commands.getstatusoutput(ip_cmd) + default_route = None + default_dev = None + default_netmask = None + if rc == 0: + # find default route/device + for line in output.split('\n'): + is_default = re.search(IP_ROUTE_RE, line) + if is_default: + default_route = is_default.group(1) + default_dev = is_default.group(2) + + # find network address and network mask + if default_dev: + dev_re = r'^([\d\.]+)/(\d+) dev %s' % default_dev + for line in output.split('\n'): + is_dev = re.search(dev_re, line) + if is_dev: + # convert integer netmask to string representation + netmask = 0xffffffff ^ (2**(32-int(is_dev.group(2))) - 1) + packed = struct.pack('!I', netmask) + default_netmask = socket.inet_ntoa(packed) + + return (default_route, default_netmask) + + +class XendNetwork: + def __init__(self, uuid, name, description, gateway, netmask): + self.uuid = uuid + self.name_label = name + self.name_description = description + self.default_gateway = gateway + self.default_netmask = netmask + + def set_name_label(self, new_name): + self.name_label = new_name + XendNode.instance().save_networks() + + def set_name_description(self, new_desc): + self.name_description = new_desc + XendNode.instance().save_networks() + + def set_default_gateway(self, new_gateway): + if re.search('^\d+\.\d+\.\d+\.\d+$', new_gateway): + self.default_gateway = new_gateway + XendNode.instance().save_networks() + + def set_default_netmask(self, new_netmask): + if re.search('^\d+\.\d+\.\d+\.\d+$', new_netmask): + self.default_netmask = new_netmask + XendNode.instance().save_networks() + + def get_VIF_UUIDs(self): + result = [] + vms = XendDomain.instance().get_all_vms() + for vm in vms: + vifs = vm.get_vifs() + for vif in vifs: + vif_cfg = vm.get_dev_xenapi_config('vif', vif) + if vif_cfg.get('network') == self.uuid: + result.append(vif) + return result + + def get_PIF_UUIDs(self): + return [x.uuid for x in XendNode.instance().pifs.values() + if x.network == self] + + def get_record(self, transient = True): + result = { + 'uuid': self.uuid, + 'name_label': self.name_label, + 'name_description': self.name_description, + 'default_gateway': self.default_gateway, + 'default_netmask': self.default_netmask, + } + if transient: + result['VIFs'] = self.get_VIF_UUIDs() + result['PIFs'] = self.get_PIF_UUIDs() + return result diff --git a/tools/python/xen/xend/XendNode.py b/tools/python/xen/xend/XendNode.py index 8850a71ce3..117ca3d25d 100644 --- a/tools/python/xen/xend/XendNode.py +++ b/tools/python/xen/xend/XendNode.py @@ -19,34 +19,181 @@ import os import socket import xen.lowlevel.xc + +from xen.util import Brctl + from xen.xend import uuid -from xen.xend.XendError import XendError +from xen.xend.XendError import XendError, NetworkAlreadyConnected +from xen.xend.XendRoot import instance as xendroot from xen.xend.XendStorageRepository import XendStorageRepository +from xen.xend.XendLogging import log +from xen.xend.XendPIF import * +from xen.xend.XendNetwork import * +from xen.xend.XendStateStore import XendStateStore class XendNode: """XendNode - Represents a Domain 0 Host.""" def __init__(self): - self.xc = xen.lowlevel.xc.xc() - self.uuid = uuid.createString() - self.cpus = {} - self.name = socket.gethostname() - self.desc = "" - self.sr = XendStorageRepository() + """Initalises the state of all host specific objects such as + + * Host + * Host_CPU + * PIF + * Network + * Storage Repository + """ + self.xc = xen.lowlevel.xc.xc() + self.state_store = XendStateStore(xendroot().get_xend_state_path()) + + # load host state from XML file + saved_host = self.state_store.load_state('host') + if saved_host and len(saved_host.keys()) == 1: + self.uuid = saved_host.keys()[0] + host = saved_host[self.uuid] + self.name = host.get('name_label', socket.gethostname()) + self.desc = host.get('name_description', '') + self.cpus = {} + else: + self.uuid = uuid.createString() + self.name = socket.gethostname() + self.desc = '' + self.cpus = {} + + # load CPU UUIDs + saved_cpus = self.state_store.load_state('cpu') + for cpu_uuid, cpu in saved_cpus.items(): + self.cpus[cpu_uuid] = cpu + + # verify we have enough cpus here physinfo = self.physinfo_dict() cpu_count = physinfo['nr_cpus'] cpu_features = physinfo['hw_caps'] - - for i in range(cpu_count): - # construct uuid by appending extra bit on the host. - # since CPUs belong to a host. - cpu_uuid = self.uuid + '-%04d' % i - cpu_info = {'uuid': cpu_uuid, - 'host': self.uuid, - 'number': i, - 'features': cpu_features} - self.cpus[cpu_uuid] = cpu_info + + # If the number of CPUs don't match, we should just reinitialise + # the CPU UUIDs. + if cpu_count != len(self.cpus): + self.cpus = {} + for i in range(cpu_count): + cpu_uuid = uuid.createString() + cpu_info = {'uuid': cpu_uuid, + 'host': self.uuid, + 'number': i, + 'features': cpu_features} + self.cpus[cpu_uuid] = cpu_info + + self.pifs = {} + self.networks = {} + + # initialise networks + saved_networks = self.state_store.load_state('network') + if saved_networks: + for net_uuid, network in saved_networks.items(): + self.network_create(network.get('name_label'), + network.get('name_description', ''), + network.get('default_gateway', ''), + network.get('default_netmask', ''), + False, net_uuid) + else: + gateway, netmask = linux_get_default_network() + self.network_create('net0', '', gateway, netmask, False) + + # initialise PIFs + saved_pifs = self.state_store.load_state('pif') + if saved_pifs: + for pif_uuid, pif in saved_pifs.items(): + if pif.get('network') in self.networks: + network = self.networks[pif['network']] + try: + self.PIF_create(pif['name'], pif['MTU'], pif['VLAN'], + pif['MAC'], network, False, pif_uuid) + except NetworkAlreadyConnected, exn: + log.error('Cannot load saved PIF %s, as network %s ' + + 'is already connected to PIF %s', + pif_uuid, pif['network'], exn.pif_uuid) + else: + for name, mtu, mac in linux_get_phy_ifaces(): + network = self.networks.values()[0] + self.PIF_create(name, mtu, '', mac, network, False) + + # initialise storage + saved_sr = self.state_store.load_state('sr') + if saved_sr and len(saved_sr) == 1: + sr_uuid = saved_sr.keys()[0] + self.sr = XendStorageRepository(sr_uuid) + else: + sr_uuid = uuid.createString() + self.sr = XendStorageRepository(sr_uuid) + + + def network_create(self, name_label, name_description, + default_gateway, default_netmask, persist = True, + net_uuid = None): + if net_uuid is None: + net_uuid = uuid.createString() + self.networks[net_uuid] = XendNetwork(net_uuid, name_label, + name_description, + default_gateway, + default_netmask) + if persist: + self.save_networks() + return net_uuid + + + def network_destroy(self, net_uuid): + del self.networks[net_uuid] + self.save_networks() + + + def PIF_create(self, name, mtu, vlan, mac, network, persist = True, + pif_uuid = None): + for pif in self.pifs.values(): + if pif.network == network: + raise NetworkAlreadyConnected(pif.uuid) + + if pif_uuid is None: + pif_uuid = uuid.createString() + self.pifs[pif_uuid] = XendPIF(pif_uuid, name, mtu, vlan, mac, network, + self) + if persist: + self.save_PIFs() + self.refreshBridges() + return pif_uuid + + + def PIF_create_VLAN(self, pif_uuid, network_uuid, vlan): + pif = self.pifs[pif_uuid] + network = self.networks[network_uuid] + return self.PIF_create(pif.name, pif.mtu, vlan, pif.mac, network) + + + def PIF_destroy(self, pif_uuid): + del self.pifs[pif_uuid] + self.save_PIFs() + + + def save(self): + # save state + host_record = {self.uuid: {'name_label':self.name, + 'name_description':self.desc}} + self.state_store.save_state('host',host_record) + self.state_store.save_state('cpu', self.cpus) + self.save_PIFs() + self.save_networks() + + sr_record = {self.sr.uuid: self.sr.get_record()} + self.state_store.save_state('sr', sr_record) + + def save_PIFs(self): + pif_records = dict([(k, v.get_record(transient = False)) + for k, v in self.pifs.items()]) + self.state_store.save_state('pif', pif_records) + + def save_networks(self): + net_records = dict([(k, v.get_record(transient = False)) + for k, v in self.networks.items()]) + self.state_store.save_state('network', net_records) def shutdown(self): return 0 @@ -56,7 +203,8 @@ class XendNode: def notify(self, _): return 0 - + + # # Ref validation # @@ -67,6 +215,9 @@ class XendNode: def is_valid_cpu(self, cpu_ref): return (cpu_ref in self.cpus) + def is_valid_network(self, network_ref): + return (network_ref in self.networks) + # # Storage Repo # @@ -100,6 +251,9 @@ class XendNode: def set_description(self, new_desc): self.desc = new_desc + def get_uuid(self): + return self.uuid + # # Host CPU Functions # @@ -133,7 +287,42 @@ class XendNode: def get_host_cpu_load(self, host_cpu_ref): return 0.0 + + # + # Network Functions + # + def get_network_refs(self): + return self.networks.keys() + + def get_network(self, network_ref): + return self.networks[network_ref] + + def bridge_to_network(self, bridge): + """ + Determine which network a particular bridge is attached to. + + @param bridge The name of the bridge. If empty, the default bridge + will be used instead (the first one in the list returned by brctl + show); this is the behaviour of the vif-bridge script. + @return The XendNetwork instance to which this bridge is attached. + @raise Exception if the interface is not connected to a network. + """ + if not bridge: + rc, bridge = commands.getstatusoutput( + 'brctl show | cut -d "\n" -f 2 | cut -f 1') + if rc != 0 or not bridge: + raise Exception( + 'Could not find default bridge, and none was specified') + + bridges = Brctl.get_state() + if bridge not in bridges: + raise Exception('Bridge %s is not up' % bridge) + for pif in self.pifs.values(): + if pif.interface_name() in bridges[bridge]: + return pif.network + raise Exception('Bridge %s is not connected to a network' % bridge) + # # Getting host information. @@ -210,7 +399,12 @@ class XendNode: return dict(self.physinfo()) def info_dict(self): return dict(self.info()) - + + + def refreshBridges(self): + for pif in self.pifs.values(): + pif.refresh(Brctl.get_state()) + def instance(): global inst @@ -218,5 +412,5 @@ def instance(): inst except: inst = XendNode() + inst.save() return inst - diff --git a/tools/python/xen/xend/XendPIF.py b/tools/python/xen/xend/XendPIF.py new file mode 100644 index 0000000000..b6f2345c36 --- /dev/null +++ b/tools/python/xen/xend/XendPIF.py @@ -0,0 +1,198 @@ +#============================================================================ +# This library is free software; you can redistribute it and/or +# modify it under the terms of version 2.1 of the GNU Lesser General Public +# License as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +#============================================================================ +# Copyright (c) 2006 Xensource Inc. +#============================================================================ + +import commands +import logging +import os +import re + + +log = logging.getLogger("xend.XendPIF") +log.setLevel(logging.TRACE) + + +MAC_RE = re.compile(':'.join(['[0-9a-f]{2}'] * 6)) +IP_IFACE_RE = re.compile(r'^\d+: (\w+):.*mtu (\d+) .* link/\w+ ([0-9a-f:]+)') + +def linux_phy_to_virt(pif_name): + return 'eth' + re.sub(r'^[a-z]+', '', pif_name) + +def linux_get_phy_ifaces(): + """Returns a list of physical interfaces. + + Identifies PIFs as those that have a interface name starting with 'p' + and have the fake 'fe:ff:ff:ff:ff:ff' MAC address. + + See /etc/xen/scripts/network-bridge for how the devices are renamed. + + @rtype: array of 3-element tuple (name, mtu, mac) + """ + + ip_cmd = 'ip -o link show' + rc, output = commands.getstatusoutput(ip_cmd) + ifaces = {} + phy_ifaces = [] + if rc == 0: + # parse all interfaces into (name, mtu, mac) + for line in output.split('\n'): + has_if = re.search(IP_IFACE_RE, line) + if has_if: + ifaces[has_if.group(1)] = has_if.groups() + + # resolve pifs' mac addresses + for name, mtu, mac in ifaces.values(): + if name[0] == 'p' and mac == 'fe:ff:ff:ff:ff:ff': + bridged_ifname = linux_phy_to_virt(name) + bridged_if = ifaces.get(bridged_ifname) + if bridged_if: + bridged_mac = bridged_if[2] + phy_ifaces.append((name, int(mtu), bridged_mac)) + + return phy_ifaces + +def linux_set_mac(iface, mac): + if not re.search(MAC_RE, mac): + return False + + ip_mac_cmd = 'ip link set %s addr %s' % \ + (linux_phy_to_virt(iface), mac) + rc, output = commands.getstatusoutput(ip_mac_cmd) + if rc == 0: + return True + + return False + +def linux_set_mtu(iface, mtu): + try: + ip_mtu_cmd = 'ip link set %s mtu %d' % \ + (linux_phy_to_virt(iface), int(mtu)) + rc, output = commands.getstatusoutput(ip_mtu_cmd) + if rc == 0: + return True + return False + except ValueError: + return False + +class XendPIF: + """Representation of a Physical Network Interface.""" + + def __init__(self, uuid, name, mtu, vlan, mac, network, host): + self.uuid = uuid + self.name = name + self.mac = mac + self.mtu = mtu + self.vlan = vlan + self.network = network + self.host = host + + def set_name(self, new_name): + self.name = new_name + + def set_mac(self, new_mac): + success = linux_set_mac(new_mac) + if success: + self.mac = new_mac + return success + + def set_mtu(self, new_mtu): + success = linux_set_mtu(new_mtu) + if success: + self.mtu = new_mtu + return success + + def get_io_read_kbs(self): + return 0.0 + + def get_io_write_kbs(self): + return 0.0 + + def get_record(self, transient = True): + result = {'name': self.name, + 'MAC': self.mac, + 'MTU': self.mtu, + 'VLAN': self.vlan, + 'host': self.host.uuid, + 'network': self.network.uuid} + if transient: + result['io_read_kbs'] = self.get_io_read_kbs() + result['io_write_kbs'] = self.get_io_write_kbs() + return result + + + def refresh(self, bridges): + ifname = self.interface_name() + rc, _ = _cmd('ip link show %s', ifname) + if rc != 0: + # Interface does not exist. If it's a physical interface, then + # there's nothing we can do -- this should have been set up with + # the network script. Otherwise, we can use vconfig to derive + # a subinterface. + if not self.vlan: + return + + rc, _ = _cmd('vconfig add %s %s', self.name, self.vlan) + if rc != 0: + log.error('Could not refresh %s', ifname) + return + log.info('Created network interface %s', ifname) + + for brname, nics in bridges.items(): + if ifname in nics: + log.debug('%s is already attached to %s', ifname, brname) + return + + # The interface is not attached to a bridge. Create one, and attach + # the interface to it. + brname = _new_bridge_name(bridges) + rc, _ = _cmd('brctl addbr %s', brname) + if rc != 0: + log.error('Could not create bridge %s for interface %s', brname, + ifname) + return + log.info('Created network bridge %s', brname) + + rc, _ = _cmd('brctl addif %s %s', brname, ifname) + if rc != 0: + log.error('Could not add %s to %s', ifname, brname) + return + log.info('Added network interface %s to bridge %s', ifname, brname) + + + def interface_name(self): + if self.vlan: + return '%s.%s' % (self.name, self.vlan) + else: + return self.name + + +def _cmd(cmd, *args): + if len(args) > 0: + cmd = cmd % args + rc, output = commands.getstatusoutput(cmd) + if rc != 0: + log.debug('%s failed with code %d' % (cmd, rc)) + log.trace('%s: %s' % (cmd, output)) + return rc, output + + +def _new_bridge_name(bridges): + n = 0 + while True: + brname = 'xenbr%d' % n + if brname not in bridges: + return brname + n += 1 diff --git a/tools/python/xen/xend/XendRoot.py b/tools/python/xen/xend/XendRoot.py index baeb8afd4c..640c96f5f8 100644 --- a/tools/python/xen/xend/XendRoot.py +++ b/tools/python/xen/xend/XendRoot.py @@ -104,6 +104,9 @@ class XendRoot: """Default session storage path.""" xend_domains_path_default = '/var/lib/xend/domains' + """Default xend management state storage.""" + xend_state_path_default = '/var/lib/xend/state' + components = {} def __init__(self): @@ -263,6 +266,11 @@ class XendRoot: """ return self.get_config_value("xend-domains-path", self.xend_domains_path_default) + def get_xend_state_path(self): + """ Get the path for persistent domain configuration storage + """ + return self.get_config_value("xend-state-path", self.xend_state_path_default) + def get_network_script(self): """@return the script used to alter the network configuration when Xend starts and stops, or None if no such script is specified.""" diff --git a/tools/python/xen/xend/XendStateStore.py b/tools/python/xen/xend/XendStateStore.py new file mode 100644 index 0000000000..8be634058f --- /dev/null +++ b/tools/python/xen/xend/XendStateStore.py @@ -0,0 +1,210 @@ +#============================================================================ +# This library is free software; you can redistribute it and/or +# modify it under the terms of version 2.1 of the GNU Lesser General Public +# License as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +#============================================================================ +# Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com> +# Copyright (c) 2006 Xensource Inc. +#============================================================================ + +import os + +from xen.xend import uuid +from xen.xend.XendLogging import log +from xml.dom import minidom +from xml.dom import Node + +class XendStateStore: + """Manages persistent storage of Xend's internal state, mainly + relating to API objects. + + It stores objects atomically in the file system as flat XML files + categorised by their 'class'. + + For example: + + /var/lib/xend/state/cpu.xml will contain the host cpu state + /var/lib/xend/state/sr.xml will contain the storage repository state. + + For the application, it will load the state via this class: + + load_state('cpu') will return a marshalled dictionary object + containing the cpu state. + + save_state('cpu', dict) will save the state contained in the dictionary + object about the 'cpu'. + + The state is stored where each top level element has a UUID in its + attributes. eg: + + host['49c01812-3c28-1ad4-a59d-2a3f81b13ec2'] = { + 'name': 'norwich', + 'desc': 'Test Xen Host', + 'cpu': {'6fc2d1ed-7eb0-4c9d-8006-3657d5483ae0': <obj>, + '669df3b8-62be-4e61-800b-bbe8ee63a760': <obj>} + } + + will turn into: + + <hosts> + <host uuid='49c01812-3c28-1ad4-a59d-2a3f81b13ec2'> + <name type='string'>norwich</name> + <description type='string'>Test Xen Host</description> + <cpu uuid='6fc2d1ed-7eb0-4c9d-8006-3657d5483ae0' /> + <cpu uuid='669df3b8-62be-4e61-800b-bbe8ee63a760' /> + </host> + </hosts> + + Note that it only dumps one level, so the references to CPU are + stored in a separate file. + + """ + + def __init__(self, base = "/var/lib/xend/state"): + self.base = base + if not os.path.exists(self.base): + os.makedirs(self.base) + + def _xml_file(self, cls): + """Return the absolute filename of the XML state storage file. + + @param cls: name of the class. + @type cls: string + @rtype: string + @return absolute filename of XML file to write/read from. + """ + return os.path.join(self.base, '%s.xml' % cls) + + def load_state(self, cls): + """Load the saved state of a class from persistent XML storage. + + References loaded from the XML will just point to an empty + dictionary which the caller will need to replace manually. + + @param cls: name of the class to load. + @type cls: string + @rtype: dict + """ + + xml_path = self._xml_file(cls) + if not os.path.exists(xml_path): + return {} + + dom = minidom.parse(xml_path) + root = dom.documentElement + state = {} + + for child in root.childNodes: + if child.nodeType != Node.ELEMENT_NODE: + continue # skip non element nodes + + uuid = child.getAttribute('uuid') + cls_dict = {} + for val_elem in child.childNodes: + if val_elem.nodeType != Node.ELEMENT_NODE: + continue # skip non element nodes + + val_name = val_elem.tagName + val_type = val_elem.getAttribute('type').encode('utf8') + val_uuid = val_elem.getAttribute('uuid').encode('utf8') + val_elem.normalize() + val_text = '' + if val_elem.firstChild: + val_text = val_elem.firstChild.nodeValue.strip() + + if val_type == '' and val_uuid != '': + # this is a reference + if val_name not in cls_dict: + cls_dict[val_name] = {} + cls_dict[val_name][val_uuid] = None + elif val_type == 'string': + cls_dict[val_name] = val_text.encode('utf8') + elif val_type == 'float': + cls_dict[val_name] = float(val_text) + elif val_type == 'int': + cls_dict[val_name] = int(val_text) + elif val_type == 'bool': + cls_dict[val_name] = bool(int(val_text)) + state[uuid] = cls_dict + + return state + + def save_state(self, cls, state): + """Save a Xen API record struct into an XML persistent storage + for future loading when Xend restarts. + + If we encounter a dictionary or a list, we only store the + keys because they are going to be UUID references to another + object. + + @param cls: Class name (singular) of the record + @type cls: string + @param state: a Xen API struct of the state of the class. + @type state: dict + @rtype: None + """ + + xml_path = self._xml_file(cls) + + doc = minidom.getDOMImplementation().createDocument(None, + cls + 's', + None) + root = doc.documentElement + + # Marshall a dictionary into our custom XML file format. + for uuid, info in state.items(): + node = doc.createElement(cls) + root.appendChild(node) + node.setAttribute('uuid', uuid) + + for key, val in info.items(): + store_val = val + store_type = None + + # deal with basic types + if type(val) in (str, unicode): + store_val = val + store_type = 'string' + elif type(val) == int: + store_val = str(val) + store_type = 'int' + elif type(val) == float: + store_val = str(val) + store_type = 'float' + elif type(val) == bool: + store_val = str(int(val)) + store_type = 'bool' + + if store_type != None: + val_node = doc.createElement(key) + val_node.setAttribute('type', store_type) + node.appendChild(val_node) + # attach the value + val_text = doc.createTextNode(store_val) + val_node.appendChild(val_text) + continue + + # deal with dicts and lists + if type(val) == dict: + for val_uuid in val.keys(): + val_node = doc.createElement(key) + val_node.setAttribute('uuid', val_uuid) + node.appendChild(val_node) + elif type(val) in (list, tuple): + for val_uuid in val: + val_node = doc.createElement(key) + val_node.setAttribute('uuid', val_uuid) + node.appendChild(val_node) + + open(xml_path, 'w').write(doc.toprettyxml()) + + diff --git a/tools/python/xen/xend/XendStorageRepository.py b/tools/python/xen/xend/XendStorageRepository.py index a9930e2730..fd3a5980cd 100644 --- a/tools/python/xen/xend/XendStorageRepository.py +++ b/tools/python/xen/xend/XendStorageRepository.py @@ -24,25 +24,38 @@ import logging import os import stat import threading +import re +import sys +import struct from xen.util import mkdir from xen.xend import uuid from xen.xend.XendError import XendError from xen.xend.XendVDI import * -XEND_STORAGE_MAX_IGNORE = -1 + +XEND_STORAGE_NO_MAXIMUM = sys.maxint XEND_STORAGE_DIR = "/var/lib/xend/storage/" XEND_STORAGE_QCOW_FILENAME = "%s.qcow" XEND_STORAGE_VDICFG_FILENAME = "%s.vdi.xml" -QCOW_CREATE_COMMAND = "/usr/sbin/qcow-create -p %d %s" +QCOW_CREATE_COMMAND = "/usr/sbin/qcow-create -r %d %s" MB = 1024 * 1024 log = logging.getLogger("xend.XendStorageRepository") -class DeviceInvalidError(Exception): - pass +def qcow_virtual_size(qcow_file): + """Read the first 32 bytes of the QCoW header to determine its size. + + See: http://www.gnome.org/~markmc/qcow-image-format.html. + """ + try: + qcow_header = open(qcow_file, 'rb').read(32) + parts = struct.unpack('>IIQIIQ', qcow_header) + return parts[-1] + except IOError: + return -1 class XendStorageRepository: """A simple file backed QCOW Storage Repository. @@ -54,11 +67,13 @@ class XendStorageRepository: The actual images are created in the format <uuid>.img and <uuid>.qcow. """ - def __init__(self, storage_dir = XEND_STORAGE_DIR, - storage_max = XEND_STORAGE_MAX_IGNORE): + def __init__(self, uuid, + sr_type = "qcow_file", + name_label = "Local", + name_description = "Xend Storage Repository", + location = XEND_STORAGE_DIR, + storage_max = XEND_STORAGE_NO_MAXIMUM): """ - @keyword storage_dir: Where the images will be stored. - @type storage_dir: string @keyword storage_max: Maximum disk space to use in bytes. @type storage_max: int @@ -67,71 +82,82 @@ class XendStorageRepository: @type images: dictionary by image uuid. @ivar lock: lock to provide thread safety. """ - - self.storage_dir = storage_dir - self.storage_max = storage_max - self.storage_free = 0 - self.images = {} # XenAPI Parameters - self.uuid = self._sr_uuid() - self.type = "qcow-file" - self.location = self.storage_dir - self.name_label = "Local" - self.name_description = "Xend Storage Repository" - - self.lock = threading.RLock() - self._refresh() + self.uuid = uuid + self.type = sr_type + self.location = location + self.name_label = name_label + self.name_description = name_description + self.images = {} - def _sr_uuid(self): - uuid_file = os.path.join(XEND_STORAGE_DIR, 'uuid') - try: - if uuid_file and os.path.exists(uuid_file): - return open(uuid_file, 'r').read().strip() - else: - new_uuid = uuid.createString() - open(uuid_file, 'w').write(new_uuid + '\n') - return new_uuid - except IOError: - log.exception("Failed to determine SR UUID") + self.storage_max = storage_max + self.storage_free = 0 + self.storage_used = 0 + self.storage_alloc = 0 - return uuid.createString() + self.lock = threading.RLock() + self._refresh() + + def get_record(self): + retval = {'uuid': self.uuid, + 'name_label': self.name_label, + 'name_description': self.name_description, + 'virtual_allocation': self.storage_alloc, + 'physical_utilisation': self.storage_used, + 'physical_size': self.storage_max, + 'type': self.type, + 'location': self.location, + 'VDIs': self.images.keys()} + + if self.storage_max == XEND_STORAGE_NO_MAXIMUM: + stfs = os.statvfs(self.location) + retval['physical_size'] = stfs.f_blocks * stfs.f_frsize + return retval + def _refresh(self): """Internal function that refreshes the state of the disk and updates the list of images available. """ self.lock.acquire() try: - mkdir.parents(XEND_STORAGE_DIR, stat.S_IRWXU) + mkdir.parents(self.location, stat.S_IRWXU) # scan the directory and populate self.images - total_used = 0 + virtual_alloc = 0 + physical_used = 0 seen_images = [] - for filename in os.listdir(XEND_STORAGE_DIR): + for filename in os.listdir(self.location): if filename[-5:] == XEND_STORAGE_QCOW_FILENAME[-5:]: image_uuid = filename[:-5] seen_images.append(image_uuid) + + qcow_file = XEND_STORAGE_QCOW_FILENAME % image_uuid + cfg_file = XEND_STORAGE_VDICFG_FILENAME % image_uuid + qcow_path = os.path.join(self.location, qcow_file) + cfg_path = os.path.join(self.location, cfg_file) + + phys_size = os.stat(qcow_path).st_size + virt_size = qcow_virtual_size(qcow_path) # add this image if we haven't seen it before if image_uuid not in self.images: - qcow_file = XEND_STORAGE_QCOW_FILENAME % image_uuid - cfg_file = XEND_STORAGE_VDICFG_FILENAME % image_uuid - qcow_path = os.path.join(XEND_STORAGE_DIR, qcow_file) - cfg_path = os.path.join(XEND_STORAGE_DIR, cfg_file) - - qcow_size = os.stat(qcow_path).st_size - - # TODO: no way to stat virtual size of qcow vdi = XendQCOWVDI(image_uuid, self.uuid, qcow_path, cfg_path, - qcow_size, qcow_size) + virt_size, phys_size) if cfg_path and os.path.exists(cfg_path): - vdi.load_config(cfg_path) + try: + vdi.load_config(cfg_path) + except: + log.error('Corrupt VDI configuration file %s' % + cfg_path) self.images[image_uuid] = vdi - total_used += qcow_size + + physical_used += phys_size + virtual_alloc += virt_size # remove images that aren't valid for image_uuid in self.images.keys(): @@ -142,11 +168,14 @@ class XendStorageRepository: pass del self.images[image_uuid] + self.storage_alloc = virtual_alloc + self.storage_used = physical_used + # update free storage if we have to track that - if self.storage_max != XEND_STORAGE_MAX_IGNORE: - self.storage_free = self.storage_max - total_used - else: + if self.storage_max == XEND_STORAGE_NO_MAXIMUM: self.storage_free = self._get_free_space() + else: + self.storage_free = self.storage_max - self.storage_alloc finally: self.lock.release() @@ -158,7 +187,7 @@ class XendStorageRepository: @rtype: int """ - stfs = os.statvfs(self.storage_dir) + stfs = os.statvfs(self.location) return stfs.f_bavail * stfs.f_frsize def _has_space_available_for(self, size_bytes): @@ -167,22 +196,19 @@ class XendStorageRepository: @rtype: bool """ - if self.storage_max != -1: - return self.storage_free + if self.storage_max != XEND_STORAGE_NO_MAXIMUM: + return self.storage_free > size_bytes bytes_free = self._get_free_space() - try: - if size_bytes < bytes_free: - return True - except DeviceInvalidError: - pass + if size_bytes < bytes_free: + return True return False def _create_image_files(self, desired_size_bytes): """Create an image and return its assigned UUID. - @param desired_size_kb: Desired image size in KB. - @type desired_size_kb: int + @param desired_size_bytes: Desired image size in bytes + @type desired_size_bytes: int @rtype: string @return: uuid @@ -194,7 +220,7 @@ class XendStorageRepository: raise XendError("Not enough space") image_uuid = uuid.createString() - qcow_path = os.path.join(XEND_STORAGE_DIR, + qcow_path = os.path.join(self.location, XEND_STORAGE_QCOW_FILENAME % image_uuid) if qcow_path and os.path.exists(qcow_path): @@ -268,10 +294,11 @@ class XendStorageRepository: """ self.lock.acquire() try: - if self.storage_max != XEND_STORAGE_MAX_IGNORE: - return self.storage_max + if self.storage_max == XEND_STORAGE_NO_MAXIMUM: + stfs = os.statvfs(self.location) + return stfs.f_blocks * stfs.f_frsize else: - return self.free_space_bytes() + self.used_space_bytes() + return self.storage_max finally: self.lock.release() @@ -281,10 +308,17 @@ class XendStorageRepository: """ self.lock.acquire() try: - total_used = 0 - for val in self.images.values(): - total_used += val.physical_utilisation - return total_used + return self.storage_used + finally: + self.lock.release() + + def virtual_allocation(self): + """Returns the total virtual space allocated within the storage repo. + @rtype: int + """ + self.lock.acquire() + try: + return self.storage_alloc finally: self.lock.release() @@ -315,7 +349,7 @@ class XendStorageRepository: # save configuration to file cfg_filename = XEND_STORAGE_VDICFG_FILENAME % image_uuid - cfg_path = os.path.join(XEND_STORAGE_DIR, cfg_filename) + cfg_path = os.path.join(self.location, cfg_filename) image.save_config(cfg_path) except Exception, e: @@ -327,10 +361,10 @@ class XendStorageRepository: return image_uuid - def xen_api_get_by_label(self, label): + def xen_api_get_by_name_label(self, label): self.lock.acquire() try: - for image_uuid, val in self.images.values(): + for image_uuid, val in self.images.items(): if val.name_label == label: return image_uuid return None diff --git a/tools/python/xen/xend/XendVDI.py b/tools/python/xen/xend/XendVDI.py index 22a1616360..205aa0a091 100644 --- a/tools/python/xen/xend/XendVDI.py +++ b/tools/python/xen/xend/XendVDI.py @@ -141,6 +141,21 @@ class XendVDI(AutoSaveObject): return True + def get_record(self): + return {'uuid': self.uuid, + 'name_label': self.name_label, + 'name_description': self.name_description, + 'virtual_size': self.virtual_size, + 'physical_utilisation': self.physical_utilisation, + 'sector_size': self.sector_size, + 'parent': None, + 'children': [], + 'sharable': False, + 'readonly': False, + 'SR': self.sr.get_uuid(), + 'VBDs': []} + + class XendQCOWVDI(XendVDI): def __init__(self, uuid, sr_uuid, qcow_path, cfg_path, vsize, psize): diff --git a/tools/python/xen/xend/image.py b/tools/python/xen/xend/image.py index 982feea114..5147e1fef0 100644 --- a/tools/python/xen/xend/image.py +++ b/tools/python/xen/xend/image.py @@ -68,7 +68,7 @@ class ImageHandler: def __init__(self, vm, vmConfig, imageConfig, deviceConfig): self.vm = vm - self.bootloader = None + self.bootloader = False self.kernel = None self.ramdisk = None self.cmdline = None @@ -77,10 +77,15 @@ class ImageHandler: def configure(self, vmConfig, imageConfig, _): """Config actions common to all unix-like domains.""" - self.bootloader = vmConfig['PV_bootloader'] - self.kernel = vmConfig['PV_kernel'] - self.cmdline = vmConfig['PV_args'] - self.ramdisk = vmConfig['PV_ramdisk'] + if '_temp_using_bootloader' in vmConfig: + self.bootloader = True + self.kernel = vmConfig['_temp_kernel'] + self.cmdline = vmConfig['_temp_args'] + self.ramdisk = vmConfig['_temp_ramdisk'] + else: + self.kernel = vmConfig['PV_kernel'] + self.cmdline = vmConfig['PV_args'] + self.ramdisk = vmConfig['PV_ramdisk'] self.vm.storeVm(("image/ostype", self.ostype), ("image/kernel", self.kernel), ("image/cmdline", self.cmdline), @@ -312,6 +317,9 @@ class HVMImageHandler(ImageHandler): def configure(self, vmConfig, imageConfig, deviceConfig): ImageHandler.configure(self, vmConfig, imageConfig, deviceConfig) + if not self.kernel: + self.kernel = '/usr/lib/xen/boot/hvmloader' + info = xc.xeninfo() if 'hvm' not in info['xen_caps']: raise VmError("HVM guest support is unavailable: is VT/AMD-V " diff --git a/tools/python/xen/xend/osdep.py b/tools/python/xen/xend/osdep.py index 35af7c55af..1dd3c7c1dd 100644 --- a/tools/python/xen/xend/osdep.py +++ b/tools/python/xen/xend/osdep.py @@ -29,8 +29,13 @@ _xend_autorestart = { "SunOS": False, } +_pygrub_path = { + "SunOS": "/usr/lib/xen/bin/pygrub" +} + def _get(var, default=None): return var.get(os.uname()[0], default) scripts_dir = _get(_scripts_dir, "/etc/xen/scripts") xend_autorestart = _get(_xend_autorestart) +pygrub_path = _get(_pygrub_path, "/usr/bin/pygrub") diff --git a/tools/python/xen/xend/server/SrvDaemon.py b/tools/python/xen/xend/server/SrvDaemon.py index bd93e6fc56..04f7a789a6 100644 --- a/tools/python/xen/xend/server/SrvDaemon.py +++ b/tools/python/xen/xend/server/SrvDaemon.py @@ -203,7 +203,7 @@ class Daemon: if not osdep.xend_autorestart: self.run(os.fdopen(w, 'w')) - break + os._exit(0) pid = self.fork_pid() if pid: diff --git a/tools/python/xen/xend/server/SrvServer.py b/tools/python/xen/xend/server/SrvServer.py index 4dfe8584e0..82df82e0f8 100644 --- a/tools/python/xen/xend/server/SrvServer.py +++ b/tools/python/xen/xend/server/SrvServer.py @@ -48,7 +48,7 @@ from threading import Thread from xen.web.httpserver import HttpServer, UnixHttpServer -from xen.xend import XendRoot, XendAPI +from xen.xend import XendNode, XendRoot, XendAPI from xen.xend import Vifctl from xen.xend.XendLogging import log from xen.xend.XendClient import XEN_API_SOCKET @@ -100,6 +100,8 @@ class XendServers: signal.signal(signal.SIGHUP, self.reloadConfig) while True: + XendNode.instance().refreshBridges() + threads = [] for server in self.servers: if server.ready: @@ -198,16 +200,18 @@ def _loadConfig(servers, root, reload): if len(addrport) == 1: if addrport[0] == 'unix': - servers.add(XMLRPCServer(auth, + servers.add(XMLRPCServer(auth, True, path = XEN_API_SOCKET, hosts_allowed = allowed)) else: servers.add( - XMLRPCServer(auth, True, '', int(addrport[0]), + XMLRPCServer(auth, True, True, '', + int(addrport[0]), hosts_allowed = allowed)) else: addr, port = addrport - servers.add(XMLRPCServer(auth, True, addr, int(port), + servers.add(XMLRPCServer(auth, True, True, addr, + int(port), hosts_allowed = allowed)) except ValueError, exn: log.error('Xen-API server configuration %s is invalid.', api_cfg) @@ -215,10 +219,10 @@ def _loadConfig(servers, root, reload): log.error('Xen-API server configuration %s is invalid.', api_cfg) if xroot.get_xend_tcp_xmlrpc_server(): - servers.add(XMLRPCServer(XendAPI.AUTH_PAM, True)) + servers.add(XMLRPCServer(XendAPI.AUTH_PAM, False, True)) if xroot.get_xend_unix_xmlrpc_server(): - servers.add(XMLRPCServer(XendAPI.AUTH_PAM)) + servers.add(XMLRPCServer(XendAPI.AUTH_PAM, False)) def create(): diff --git a/tools/python/xen/xend/server/XMLRPCServer.py b/tools/python/xen/xend/server/XMLRPCServer.py index c29b4b6bdd..05646fcea6 100644 --- a/tools/python/xen/xend/server/XMLRPCServer.py +++ b/tools/python/xen/xend/server/XMLRPCServer.py @@ -89,8 +89,8 @@ methods = ['device_create', 'device_configure', exclude = ['domain_create', 'domain_restore'] class XMLRPCServer: - def __init__(self, auth, use_tcp=False, host = "localhost", port = 8006, - path = XML_RPC_SOCKET, hosts_allowed = None): + def __init__(self, auth, use_xenapi, use_tcp=False, host = "localhost", + port = 8006, path = XML_RPC_SOCKET, hosts_allowed = None): self.use_tcp = use_tcp self.port = port self.host = host @@ -100,7 +100,7 @@ class XMLRPCServer: self.ready = False self.running = True self.auth = auth - self.xenapi = XendAPI.XendAPI(auth) + self.xenapi = use_xenapi and XendAPI.XendAPI(auth) or None def run(self): authmsg = (self.auth == XendAPI.AUTH_NONE and @@ -115,11 +115,13 @@ class XMLRPCServer: self.port, authmsg) self.server = TCPXMLRPCServer((self.host, self.port), self.hosts_allowed, + self.xenapi is not None, logRequests = False) else: log.info("Opening Unix domain socket XML-RPC server on %s%s", self.path, authmsg) self.server = UnixXMLRPCServer(self.path, self.hosts_allowed, + self.xenapi is not None, logRequests = False) except socket.error, exn: log.error('Cannot start server: %s!', exn.args[1]) @@ -133,9 +135,10 @@ class XMLRPCServer: # and has the 'api' attribute. for meth_name in dir(self.xenapi): - meth = getattr(self.xenapi, meth_name) - if meth_name[0] != '_' and callable(meth) and hasattr(meth, 'api'): - self.server.register_function(meth, getattr(meth, 'api')) + if meth_name[0] != '_': + meth = getattr(self.xenapi, meth_name) + if callable(meth) and hasattr(meth, 'api'): + self.server.register_function(meth, getattr(meth, 'api')) # Legacy deprecated xm xmlrpc api # -------------------------------------------------------------------- diff --git a/tools/python/xen/xend/server/netif.py b/tools/python/xen/xend/server/netif.py index 92e5626ade..c229ed7b20 100644 --- a/tools/python/xen/xend/server/netif.py +++ b/tools/python/xen/xend/server/netif.py @@ -150,16 +150,20 @@ class NetifController(DevController): devid = self.allocateDeviceID() + # The default type is 'netfront'. + if not typ: + typ = 'netfront' + if not mac: mac = randomMAC() back = { 'script' : script, 'mac' : mac, - 'handle' : "%i" % devid } + 'handle' : "%i" % devid, + 'type' : typ } if typ == 'ioemu': front = {} - back['type'] = 'ioemu' else: front = { 'handle' : "%i" % devid, 'mac' : mac } diff --git a/tools/python/xen/xend/server/vfbif.py b/tools/python/xen/xend/server/vfbif.py index d07cd2f9e8..816dcd5a6f 100644 --- a/tools/python/xen/xend/server/vfbif.py +++ b/tools/python/xen/xend/server/vfbif.py @@ -64,7 +64,7 @@ class VfbifController(DevController): if config.has_key("vncunused"): args += ["--unused"] elif config.has_key("vncdisplay"): - args += ["--vncport", "%d" % (5900 + config["vncdisplay"])] + args += ["--vncport", "%d" % (5900 + int(config["vncdisplay"]))] vnclisten = config.get("vnclisten", xen.xend.XendRoot.instance().get_vnclisten_address()) args += [ "--listen", vnclisten ] diff --git a/tools/python/xen/xm/XenAPI.py b/tools/python/xen/xm/XenAPI.py index 5cf929f48c..8d58c3f7f6 100644 --- a/tools/python/xen/xm/XenAPI.py +++ b/tools/python/xen/xm/XenAPI.py @@ -44,17 +44,45 @@ # OF THIS SOFTWARE. # -------------------------------------------------------------------- +import gettext import xmlrpclib import xen.util.xmlrpclib2 +translation = gettext.translation('xen-xm', fallback = True) + class Failure(Exception): def __init__(self, details): - self.details = details + try: + # If this failure is MESSAGE_PARAMETER_COUNT_MISMATCH, then we + # correct the return values here, to account for the fact that we + # transparently add the session handle as the first argument. + if details[0] == 'MESSAGE_PARAMETER_COUNT_MISMATCH': + details[2] = str(int(details[2]) - 1) + details[3] = str(int(details[3]) - 1) + + self.details = details + except Exception, exn: + self.details = ['INTERNAL_ERROR', 'Client-side: ' + str(exn)] def __str__(self): - return "Xen-API failure: %s" % str(self.details) + try: + return translation.ugettext(self.details[0]) % self._details_map() + except TypeError, exn: + return "Message database broken: %s.\nXen-API failure: %s" % \ + (exn, str(self.details)) + except Exception, exn: + import sys + print >>sys.stderr, exn + return "Xen-API failure: %s" % str(self.details) + + def _details_map(self): + return dict([(str(i), self.details[i]) + for i in range(len(self.details))]) + + +_RECONNECT_AND_RETRY = (lambda _ : ()) class Session(xen.util.xmlrpclib2.ServerProxy): @@ -80,6 +108,8 @@ class Session(xen.util.xmlrpclib2.ServerProxy): encoding, verbose, allow_none) self._session = None + self.last_login_method = None + self.last_login_params = None def xenapi_request(self, methodname, params): @@ -87,13 +117,31 @@ class Session(xen.util.xmlrpclib2.ServerProxy): self._login(methodname, params) return None else: - full_params = (self._session,) + params - return _parse_result(getattr(self, methodname)(*full_params)) + retry_count = 0 + while retry_count < 3: + full_params = (self._session,) + params + result = _parse_result(getattr(self, methodname)(*full_params)) + if result == _RECONNECT_AND_RETRY: + retry_count += 1 + if self.last_login_method: + self._login(self.last_login_method, + self.last_login_params) + else: + raise xmlrpclib.Fault(401, 'You must log in') + else: + return result + raise xmlrpclib.Fault( + 500, 'Tried 3 times to get a valid session, but failed') def _login(self, method, params): - self._session = _parse_result( - getattr(self, 'session.%s' % method)(*params)) + result = _parse_result(getattr(self, 'session.%s' % method)(*params)) + if result == _RECONNECT_AND_RETRY: + raise xmlrpclib.Fault( + 500, 'Received SESSION_INVALID when logging in') + self._session = result + self.last_login_method = method + self.last_login_params = params def __getattr__(self, name): @@ -106,7 +154,7 @@ class Session(xen.util.xmlrpclib2.ServerProxy): def _parse_result(result): - if 'Status' not in result: + if type(result) != dict or 'Status' not in result: raise xmlrpclib.Fault(500, 'Missing Status in response from server') if result['Status'] == 'Success': if 'Value' in result: @@ -116,7 +164,10 @@ def _parse_result(result): 'Missing Value in response from server') else: if 'ErrorDescription' in result: - raise Failure(result['ErrorDescription']) + if result['ErrorDescription'][0] == 'SESSION_INVALID': + return _RECONNECT_AND_RETRY + else: + raise Failure(result['ErrorDescription']) else: raise xmlrpclib.Fault( 500, 'Missing ErrorDescription in response from server') @@ -127,10 +178,18 @@ class _Dispatcher: def __init__(self, send, name): self.__send = send self.__name = name + + def __repr__(self): + if self.__name: + return '<XenAPI._Dispatcher for %s>' % self.__name + else: + return '<XenAPI._Dispatcher>' + def __getattr__(self, name): if self.__name is None: return _Dispatcher(self.__send, name) else: return _Dispatcher(self.__send, "%s.%s" % (self.__name, name)) + def __call__(self, *args): return self.__send(self.__name, args) diff --git a/tools/python/xen/xm/create.py b/tools/python/xen/xm/create.py index b603b922d6..094b583d57 100644 --- a/tools/python/xen/xm/create.py +++ b/tools/python/xen/xm/create.py @@ -28,6 +28,7 @@ import xmlrpclib from xen.xend import sxp from xen.xend import PrettyPrint +from xen.xend import osdep import xen.xend.XendClient from xen.xend.XendBootloader import bootloader from xen.util import blkif @@ -300,7 +301,7 @@ gopts.var('vif', val="type=TYPE,mac=MAC,bridge=BRIDGE,ip=IPADDR,script=SCRIPT,ba fn=append_value, default=[], use="""Add a network interface with the given MAC address and bridge. The vif is configured by calling the given configuration script. - If type is not specified, default is netfront not ioemu device. + If type is not specified, default is netfront. If mac is not specified a random MAC address is used. If not specified then the network backend chooses it's own MAC address. If bridge is not specified the first bridge found is used. @@ -718,8 +719,11 @@ def run_bootloader(vals, config_image): "--entry= directly.") vals.bootargs = "--entry=%s" %(vals.bootentry,) + kernel = sxp.child_value(config_image, 'kernel') + ramdisk = sxp.child_value(config_image, 'ramdisk') + args = sxp.child_value(config_image, 'args') return bootloader(vals.bootloader, file, not vals.console_autoconnect, - vals.bootargs, config_image) + vals.bootargs, kernel, ramdisk, args) def make_config(vals): """Create the domain configuration. @@ -759,7 +763,14 @@ def make_config(vals): config_image = configure_image(vals) if vals.bootloader: - config_image = run_bootloader(vals, config_image) + if vals.bootloader == "pygrub": + vals.bootloader = osdep.pygrub_path + + # if a kernel is specified, we're using the bootloader + # non-interactively, and need to let xend run it so we preserve the + # real kernel choice. + if not vals.kernel: + config_image = run_bootloader(vals, config_image) config.append(['bootloader', vals.bootloader]) if vals.bootargs: config.append(['bootloader_args', vals.bootargs]) @@ -823,7 +834,7 @@ def preprocess_ioports(vals): if len(d) == 1: d.append(d[0]) # Components are in hex: add hex specifier. - hexd = map(lambda v: '0x'+v, d) + hexd = ['0x' + x for x in d] ioports.append(hexd) vals.ioports = ioports @@ -990,8 +1001,6 @@ def preprocess_vnc(vals): vals.extra = vnc + ' ' + vals.extra def preprocess(vals): - if not vals.kernel and not vals.bootloader: - err("No kernel specified") preprocess_disk(vals) preprocess_pci(vals) preprocess_ioports(vals) @@ -1176,6 +1185,7 @@ def config_security_check(config, verbose): try: domain_label = security.ssidref2label(security.NULL_SSIDREF) except: + import traceback traceback.print_exc(limit=1) return 0 domain_policy = 'NULL' diff --git a/tools/python/xen/xm/main.py b/tools/python/xen/xm/main.py index 3c118172ae..87a0602e2d 100644 --- a/tools/python/xen/xm/main.py +++ b/tools/python/xen/xm/main.py @@ -130,7 +130,7 @@ SUBCOMMAND_HELP = { 'log' : ('', 'Print Xend log'), 'rename' : ('<Domain> <NewDomainName>', 'Rename a domain.'), 'sched-sedf' : ('<Domain> [options]', 'Get/set EDF parameters.'), - 'sched-credit': ('-d <Domain> [-w[=WEIGHT]|-c[=CAP]]', + 'sched-credit': ('[-d <Domain> [-w[=WEIGHT]|-c[=CAP]]]', 'Get/set credit scheduler parameters.'), 'sysrq' : ('<Domain> <letter>', 'Send a sysrq to a domain.'), 'vcpu-list' : ('[<Domain>]', @@ -717,6 +717,10 @@ def parse_sedf_info(info): 'weight' : get_info('weight', int, -1), } +def domid_match(domid, info): + return domid is None or domid == info['name'] or \ + domid == str(info['domid']) + def xm_brief_list(doms): print '%-40s %3s %5s %5s %10s %9s' % \ ('Name', 'ID', 'Mem', 'VCPUs', 'State', 'Time(s)') @@ -1091,10 +1095,6 @@ def xm_sched_sedf(args): print( ("%(name)-32s %(domid)3d %(period)9.1f %(slice)9.1f" + " %(latency)7.1f %(extratime)6d %(weight)6d") % info) - def domid_match(domid, info): - return domid is None or domid == info['name'] or \ - domid == str(info['domid']) - # we want to just display current info if no parameters are passed if len(args) == 0: domid = None @@ -1174,27 +1174,43 @@ def xm_sched_credit(args): err(opterr) usage('sched-credit') - domain = None + domid = None weight = None cap = None for o, a in opts: if o == "-d": - domain = a + domid = a elif o == "-w": weight = int(a) elif o == "-c": cap = int(a); - if domain is None: - # place holder for system-wide scheduler parameters - err("No domain given.") - usage('sched-credit') + doms = filter(lambda x : domid_match(domid, x), + [parse_doms_info(dom) + for dom in getDomains(None, 'running')]) if weight is None and cap is None: - print server.xend.domain.sched_credit_get(domain) + # print header if we aren't setting any parameters + print '%-33s %-2s %-6s %-4s' % ('Name','ID','Weight','Cap') + + for d in doms: + try: + info = server.xend.domain.sched_credit_get(d['domid']) + except xmlrpclib.Fault: + # domain does not support sched-credit? + info = {'weight': -1, 'cap': -1} + + info['name'] = d['name'] + info['domid'] = int(d['domid']) + print( ("%(name)-32s %(domid)3d %(weight)6d %(cap)4d") % info) else: - result = server.xend.domain.sched_credit_set(domain, weight, cap) + if domid is None: + # place holder for system-wide scheduler parameters + err("No domain given.") + usage('sched-credit') + + result = server.xend.domain.sched_credit_set(domid, weight, cap) if result != 0: err(str(result)) @@ -1244,7 +1260,7 @@ def xm_console(args): if quiet: sys.exit(1) else: - raise Exception("Domain is not started") + raise xmlrpclib.Fault(0, "Domain '%s' is not started" % dom) console.execConsole(domid) @@ -1780,7 +1796,8 @@ def _run_cmd(cmd, cmd_name, args): except SystemExit, code: return code == 0, code except XenAPI.Failure, exn: - err(str(exn)) + for line in [''] + wrap(str(exn), 80) + ['']: + print >>sys.stderr, line except xmlrpclib.Fault, ex: if ex.faultCode == XendClient.ERROR_INVALID_DOMAIN: err("Domain '%s' does not exist." % ex.faultString) diff --git a/tools/python/xen/xm/messages/en/xen-xm.po b/tools/python/xen/xm/messages/en/xen-xm.po new file mode 100644 index 0000000000..ff4b4ea75b --- /dev/null +++ b/tools/python/xen/xm/messages/en/xen-xm.po @@ -0,0 +1,72 @@ +# ============================================================================ +# This library is free software; you can redistribute it and/or +# modify it under the terms of version 2.1 of the GNU Lesser General Public +# License as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# ============================================================================ +# Copyright (c) 2006 XenSource Inc. +# ============================================================================ +# +# +msgid "" +msgstr "" +"Project-Id-Version: Xen-xm 3.0\n" +"PO-Revision-Date: 2006-12-28 15:43+0000\n" +"Last-Translator: Ewan Mellor <ewan@xensource.com>\n" +"Language-Team: xen-devel <xen-devel@lists.xensource.com>\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=ASCII\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "INTERNAL_ERROR" +msgstr "Internal error: %(1)s." + +msgid "MAP_DUPLICATE_KEY" +msgstr "This map already contains %(1)s -> %(2)s." + +msgid "MESSAGE_METHOD_UNKNOWN" +msgstr "The method %(1)s is unsupported." + +msgid "MESSAGE_PARAMETER_COUNT_MISMATCH" +msgstr "The method %(1)s takes %(2)s argument(s) (%(3)s given)." + +msgid "SESSION_AUTHENTICATION_FAILED" +msgstr "Permission denied." + +msgid "HOST_CPU_HANDLE_INVALID" +msgstr "The host_cpu handle %(1)s is invalid." + +msgid "HOST_HANDLE_INVALID" +msgstr "The host handle %(1)s is invalid." + +msgid "SR_HANDLE_INVALID" +msgstr "The SR handle %(1)s is invalid." + +msgid "VBD_HANDLE_INVALID" +msgstr "The VBD handle %(1)s is invalid." + +msgid "VDI_HANDLE_INVALID" +msgstr "The VDI handle %(1)s is invalid." + +msgid "VIF_HANDLE_INVALID" +msgstr "The VIF handle %(1)s is invalid." + +msgid "VM_HANDLE_INVALID" +msgstr "The VM handle %(1)s is invalid." + +msgid "VTPM_HANDLE_INVALID" +msgstr "The VTPM handle %(1)s is invalid." + +msgid "NETWORK_ALREADY_CONNECTED" +msgstr "The network you specified already has a PIF attached to it, and so another one may not be attached." + +msgid "VM_BAD_POWER_STATE" +msgstr "The VM must be %(2)s to perform the requested operation (it is currently %(3)s)." diff --git a/tools/python/xen/xm/migrate.py b/tools/python/xen/xm/migrate.py index 72aca71932..2b77e6b1ab 100644 --- a/tools/python/xen/xm/migrate.py +++ b/tools/python/xen/xm/migrate.py @@ -52,6 +52,7 @@ def help(): def main(argv): opts = gopts + opts.reset() args = opts.parse(argv) if len(args) != 2: diff --git a/tools/python/xen/xm/opts.py b/tools/python/xen/xm/opts.py index 1eac629aa1..7dd28bfa4e 100644 --- a/tools/python/xen/xm/opts.py +++ b/tools/python/xen/xm/opts.py @@ -559,14 +559,6 @@ def set_bool(opt, k, v): else: opt.opts.err('Invalid value:' +v) -def set_u32(opt, k, v): - """Set an option to an u32 value.""" - try: - v = u32(v) - except: - opt.opts.err('Invalid value: ' + str(v)) - opt.set(v) - def set_value(opt, k, v): """Set an option to a value.""" opt.set(v) diff --git a/tools/python/xen/xm/shutdown.py b/tools/python/xen/xm/shutdown.py index b5542d9611..88d36dfba1 100644 --- a/tools/python/xen/xm/shutdown.py +++ b/tools/python/xen/xm/shutdown.py @@ -118,6 +118,7 @@ def main_dom(opts, args): def main(argv): opts = gopts + opts.reset() args = opts.parse(argv) if opts.vals.help: return diff --git a/tools/tests/test_x86_emulator.c b/tools/tests/test_x86_emulator.c index 64797d3f81..2753795d5c 100644 --- a/tools/tests/test_x86_emulator.c +++ b/tools/tests/test_x86_emulator.c @@ -15,7 +15,14 @@ typedef int64_t s64; #include <asm-x86/x86_emulate.h> #include <sys/mman.h> -#define PFEC_write_access (1U<<1) +/* EFLAGS bit definitions. */ +#define EFLG_OF (1<<11) +#define EFLG_DF (1<<10) +#define EFLG_SF (1<<7) +#define EFLG_ZF (1<<6) +#define EFLG_AF (1<<4) +#define EFLG_PF (1<<2) +#define EFLG_CF (1<<0) static int read( unsigned int seg, @@ -99,12 +106,15 @@ int main(int argc, char **argv) { struct x86_emulate_ctxt ctxt; struct cpu_user_regs regs; - char instr[20] = { 0x01, 0x08 }; /* add %ecx,(%eax) */ + char *instr; unsigned int *res; int rc; +#ifndef __x86_64__ + unsigned int i, bcdres_native, bcdres_emul; +#endif ctxt.regs = ®s; - ctxt.mode = X86EMUL_MODE_PROT32; + ctxt.address_bytes = 4; res = mmap((void *)0x100000, 0x1000, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); @@ -113,16 +123,16 @@ int main(int argc, char **argv) fprintf(stderr, "mmap to low address failed\n"); exit(1); } + instr = (char *)res + 0x100; printf("%-40s", "Testing addl %%ecx,(%%eax)..."); instr[0] = 0x01; instr[1] = 0x08; regs.eflags = 0x200; regs.eip = (unsigned long)&instr[0]; regs.ecx = 0x12345678; - regs.error_code = PFEC_write_access; regs.eax = (unsigned long)res; *res = 0x7FFFFFFF; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x92345677) || (regs.eflags != 0xa94) || @@ -130,6 +140,21 @@ int main(int argc, char **argv) goto fail; printf("okay\n"); + printf("%-40s", "Testing addl %%ecx,%%eax..."); + instr[0] = 0x01; instr[1] = 0xc8; + regs.eflags = 0x200; + regs.eip = (unsigned long)&instr[0]; + regs.ecx = 0x12345678; + regs.eax = 0x7FFFFFFF; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != 0) || + (regs.ecx != 0x12345678) || + (regs.eax != 0x92345677) || + (regs.eflags != 0xa94) || + (regs.eip != (unsigned long)&instr[2]) ) + goto fail; + printf("okay\n"); + printf("%-40s", "Testing xorl (%%eax),%%ecx..."); instr[0] = 0x33; instr[1] = 0x08; regs.eflags = 0x200; @@ -140,8 +165,7 @@ int main(int argc, char **argv) regs.ecx = 0x12345678UL; #endif regs.eax = (unsigned long)res; - regs.error_code = 0; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x92345677) || (regs.ecx != 0x8000000FUL) || @@ -155,8 +179,7 @@ int main(int argc, char **argv) regs.eip = (unsigned long)&instr[0]; regs.ecx = ~0UL; regs.eax = (unsigned long)res; - regs.error_code = 0; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x92345677) || (regs.ecx != 0x92345677UL) || @@ -171,8 +194,7 @@ int main(int argc, char **argv) regs.eax = 0x92345677UL; regs.ecx = 0xAA; regs.ebx = (unsigned long)res; - regs.error_code = PFEC_write_access; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x923456AA) || (regs.eflags != 0x244) || @@ -188,8 +210,7 @@ int main(int argc, char **argv) regs.eax = 0xAABBCC77UL; regs.ecx = 0xFF; regs.ebx = (unsigned long)res; - regs.error_code = PFEC_write_access; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x923456AA) || ((regs.eflags&0x240) != 0x200) || @@ -205,8 +226,7 @@ int main(int argc, char **argv) regs.eip = (unsigned long)&instr[0]; regs.ecx = 0x12345678; regs.eax = (unsigned long)res; - regs.error_code = PFEC_write_access; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x12345678) || (regs.eflags != 0x200) || @@ -223,8 +243,7 @@ int main(int argc, char **argv) regs.eax = 0x923456AAUL; regs.ecx = 0xDDEEFF00L; regs.ebx = (unsigned long)res; - regs.error_code = PFEC_write_access; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0xDDEEFF00) || (regs.eflags != 0x244) || @@ -241,8 +260,7 @@ int main(int argc, char **argv) regs.eip = (unsigned long)&instr[0]; regs.esi = (unsigned long)res + 0; regs.edi = (unsigned long)res + 2; - regs.error_code = 0; /* read fault */ - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x44554455) || (regs.eflags != 0x200) || @@ -259,9 +277,8 @@ int main(int argc, char **argv) regs.eflags = 0x200; regs.eip = (unsigned long)&instr[0]; regs.edi = (unsigned long)res; - regs.error_code = PFEC_write_access; - rc = x86_emulate_memop(&ctxt, &emulops); - if ( (rc != 0) || + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != 0) || (*res != 0x2233445D) || ((regs.eflags&0x201) != 0x201) || (regs.eip != (unsigned long)&instr[4]) ) @@ -275,9 +292,8 @@ int main(int argc, char **argv) regs.eip = (unsigned long)&instr[0]; regs.eax = -32; regs.edi = (unsigned long)(res+1); - regs.error_code = PFEC_write_access; - rc = x86_emulate_memop(&ctxt, &emulops); - if ( (rc != 0) || + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != 0) || (*res != 0x2233445E) || ((regs.eflags&0x201) != 0x201) || (regs.eip != (unsigned long)&instr[3]) ) @@ -296,9 +312,8 @@ int main(int argc, char **argv) regs.ecx = 0xCCCCFFFF; regs.eip = (unsigned long)&instr[0]; regs.edi = (unsigned long)res; - regs.error_code = PFEC_write_access; - rc = x86_emulate_memop(&ctxt, &emulops); - if ( (rc != 0) || + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != 0) || (res[0] != 0x9999AAAA) || (res[1] != 0xCCCCFFFF) || ((regs.eflags&0x240) != 0x240) || @@ -311,8 +326,7 @@ int main(int argc, char **argv) regs.eflags = 0x200; regs.eip = (unsigned long)&instr[0]; regs.edi = (unsigned long)res; - regs.error_code = PFEC_write_access; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (res[0] != 0x9999AAAA) || (res[1] != 0xCCCCFFFF) || @@ -330,8 +344,7 @@ int main(int argc, char **argv) regs.ecx = 0x12345678; regs.eax = (unsigned long)res; *res = 0x82; - regs.error_code = 0; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x82) || (regs.ecx != 0xFFFFFF82) || @@ -347,8 +360,7 @@ int main(int argc, char **argv) regs.ecx = 0x12345678; regs.eax = (unsigned long)res; *res = 0x1234aa82; - regs.error_code = 0; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x1234aa82) || (regs.ecx != 0xaa82) || @@ -364,8 +376,7 @@ int main(int argc, char **argv) regs.ecx = (unsigned long)res; regs.eax = 0x12345678; *res = 0x11111111; - regs.error_code = 0; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x11116789) || (regs.eax != 0x12341111) || @@ -374,6 +385,103 @@ int main(int argc, char **argv) goto fail; printf("okay\n"); + printf("%-40s", "Testing dec %%ax..."); + instr[0] = 0x66; instr[1] = 0x48; + regs.eflags = 0x200; + regs.eip = (unsigned long)&instr[0]; + regs.eax = 0x00000000; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != 0) || + (regs.eax != 0x0000ffff) || + ((regs.eflags&0x240) != 0x200) || + (regs.eip != (unsigned long)&instr[2]) ) + goto fail; + printf("okay\n"); + + printf("%-40s", "Testing lea 8(%%ebp),%%eax..."); + instr[0] = 0x8d; instr[1] = 0x45; instr[2] = 0x08; + regs.eflags = 0x200; + regs.eip = (unsigned long)&instr[0]; + regs.eax = 0x12345678; + regs.ebp = 0xaaaaaaaa; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != 0) || + (regs.eax != 0xaaaaaab2) || + ((regs.eflags&0x240) != 0x200) || + (regs.eip != (unsigned long)&instr[3]) ) + goto fail; + printf("okay\n"); + + printf("%-40s", "Testing daa/das (all inputs)..."); +#ifndef __x86_64__ + /* Bits 0-7: AL; Bit 8: EFLG_AF; Bit 9: EFLG_CF; Bit 10: DAA vs. DAS. */ + for ( i = 0; i < 0x800; i++ ) + { + regs.eflags = (i & 0x200) ? EFLG_CF : 0; + regs.eflags |= (i & 0x100) ? EFLG_AF : 0; + if ( i & 0x400 ) + __asm__ ( + "pushf; and $0xffffffee,(%%esp); or %1,(%%esp); popf; das; " + "pushf; popl %1" + : "=a" (bcdres_native), "=r" (regs.eflags) + : "0" (i & 0xff), "1" (regs.eflags) ); + else + __asm__ ( + "pushf; and $0xffffffee,(%%esp); or %1,(%%esp); popf; daa; " + "pushf; popl %1" + : "=a" (bcdres_native), "=r" (regs.eflags) + : "0" (i & 0xff), "1" (regs.eflags) ); + bcdres_native |= (regs.eflags & EFLG_PF) ? 0x1000 : 0; + bcdres_native |= (regs.eflags & EFLG_ZF) ? 0x800 : 0; + bcdres_native |= (regs.eflags & EFLG_SF) ? 0x400 : 0; + bcdres_native |= (regs.eflags & EFLG_CF) ? 0x200 : 0; + bcdres_native |= (regs.eflags & EFLG_AF) ? 0x100 : 0; + + instr[0] = (i & 0x400) ? 0x2f: 0x27; /* daa/das */ + regs.eflags = (i & 0x200) ? EFLG_CF : 0; + regs.eflags |= (i & 0x100) ? EFLG_AF : 0; + regs.eip = (unsigned long)&instr[0]; + regs.eax = (unsigned char)i; + rc = x86_emulate(&ctxt, &emulops); + bcdres_emul = regs.eax; + bcdres_emul |= (regs.eflags & EFLG_PF) ? 0x1000 : 0; + bcdres_emul |= (regs.eflags & EFLG_ZF) ? 0x800 : 0; + bcdres_emul |= (regs.eflags & EFLG_SF) ? 0x400 : 0; + bcdres_emul |= (regs.eflags & EFLG_CF) ? 0x200 : 0; + bcdres_emul |= (regs.eflags & EFLG_AF) ? 0x100 : 0; + if ( (rc != 0) || (regs.eax > 255) || + (regs.eip != (unsigned long)&instr[1]) ) + goto fail; + + if ( bcdres_emul != bcdres_native ) + { + printf("%s: AL=%02x %s %s\n" + "Output: AL=%02x %s %s %s %s %s\n" + "Emul.: AL=%02x %s %s %s %s %s\n", + (i & 0x400) ? "DAS" : "DAA", + (unsigned char)i, + (i & 0x200) ? "CF" : " ", + (i & 0x100) ? "AF" : " ", + (unsigned char)bcdres_native, + (bcdres_native & 0x200) ? "CF" : " ", + (bcdres_native & 0x100) ? "AF" : " ", + (bcdres_native & 0x1000) ? "PF" : " ", + (bcdres_native & 0x800) ? "ZF" : " ", + (bcdres_native & 0x400) ? "SF" : " ", + (unsigned char)bcdres_emul, + (bcdres_emul & 0x200) ? "CF" : " ", + (bcdres_emul & 0x100) ? "AF" : " ", + (bcdres_emul & 0x1000) ? "PF" : " ", + (bcdres_emul & 0x800) ? "ZF" : " ", + (bcdres_emul & 0x400) ? "SF" : " "); + goto fail; + } + } + printf("okay\n"); +#else + printf("skipped\n"); +#endif + return 0; fail: diff --git a/tools/xenstat/xentop/Makefile b/tools/xenstat/xentop/Makefile index 2248bfb3c3..5f85d732e6 100644 --- a/tools/xenstat/xentop/Makefile +++ b/tools/xenstat/xentop/Makefile @@ -26,6 +26,7 @@ sbindir=$(prefix)/sbin CFLAGS += -DGCC_PRINTF -Wall -Werror -I$(XEN_LIBXENSTAT) LDFLAGS += -L$(XEN_LIBXENSTAT) LDLIBS += -lxenstat $(CURSES_LIBS) $(SOCKET_LIBS) +CFLAGS += -DHOST_$(XEN_OS) .PHONY: all all: xentop diff --git a/tools/xenstat/xentop/xentop.c b/tools/xenstat/xentop/xentop.c index b772f951fb..4b610c3668 100644 --- a/tools/xenstat/xentop/xentop.c +++ b/tools/xenstat/xentop/xentop.c @@ -49,6 +49,13 @@ #define KEY_ESCAPE '\x1B' +#ifdef HOST_SunOS +/* Old curses library on Solaris takes non-const strings. */ +#define curses_str_t char * +#else +#define curses_str_t const char * +#endif + /* * Function prototypes */ @@ -269,7 +276,7 @@ static void print(const char *fmt, ...) if (!batch) { if((current_row() < lines()-1)) { va_start(args, fmt); - vwprintw(stdscr, (char *)fmt, args); + vwprintw(stdscr, (curses_str_t)fmt, args); va_end(args); } } else { @@ -283,7 +290,7 @@ static void print(const char *fmt, ...) static void attr_addstr(int attr, const char *str) { attron(attr); - addstr((char *)str); + addstr((curses_str_t)str); attroff(attr); } diff --git a/tools/xenstore/Makefile b/tools/xenstore/Makefile index a18347d4b1..0596379aca 100644 --- a/tools/xenstore/Makefile +++ b/tools/xenstore/Makefile @@ -91,7 +91,7 @@ libxenstore.so.$(MAJOR): libxenstore.so.$(MAJOR).$(MINOR) ln -sf $< $@ libxenstore.so.$(MAJOR).$(MINOR): xs.opic xs_lib.opic - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libxenstore.so.$(MAJOR) $(SHLIB_CFLAGS) -o $@ $^ -lpthread + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libxenstore.so.$(MAJOR) $(SHLIB_CFLAGS) -o $@ $^ $(SOCKET_LIBS) -lpthread libxenstore.a: xs.o xs_lib.o $(AR) rcs libxenstore.a $^ diff --git a/tools/xenstore/list.h b/tools/xenstore/list.h index eb35293d7f..9cd53413cf 100644 --- a/tools/xenstore/list.h +++ b/tools/xenstore/list.h @@ -12,7 +12,7 @@ #define LIST_POISON2 ((void *) 0x00200200) #define container_of(ptr, type, member) ({ \ - const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + typeof( ((type *)0)->member ) *__mptr = (ptr); \ (type *)( (char *)__mptr - offsetof(type,member) );}) /* diff --git a/tools/xm-test/lib/XmTestLib/DomainTracking.py b/tools/xm-test/lib/XmTestLib/DomainTracking.py new file mode 100644 index 0000000000..cbd25146f5 --- /dev/null +++ b/tools/xm-test/lib/XmTestLib/DomainTracking.py @@ -0,0 +1,43 @@ +#!/usr/bin/python +""" + Copyright (C) International Business Machines Corp., 2005 + Author: Dan Smith <danms@us.ibm.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; under version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +""" + +import atexit +import Test + +# Tracking of managed domains +_managedDomains = [] +registered = 0 + +def addManagedDomain(name): + global registered + _managedDomains.append(name) + if not registered: + atexit.register(destroyManagedDomains) + registered = 1 + +def delManagedDomain(name): + if name in _managedDomains: + del _managedDomains[_managedDomains.index(name)] + +def destroyManagedDomains(): + if len(_managedDomains) > 0: + for m in _managedDomains: + Test.traceCommand("xm destroy %s" % m) + Test.traceCommand("xm delete %s" % m) diff --git a/tools/xm-test/lib/XmTestLib/XenDomain.py b/tools/xm-test/lib/XmTestLib/XenDomain.py index 40aaebf9b4..3fcfcea4f1 100644 --- a/tools/xm-test/lib/XmTestLib/XenDomain.py +++ b/tools/xm-test/lib/XmTestLib/XenDomain.py @@ -29,6 +29,7 @@ from Test import * from config import * from Console import * from XenDevice import * +from DomainTracking import * from acm import * @@ -147,7 +148,7 @@ class DomainError(Exception): class XenDomain: - def __init__(self, name=None, config=None): + def __init__(self, name=None, config=None, isManaged=False): """Create a domain object. @param config: String filename of config file """ @@ -162,6 +163,10 @@ class XenDomain: self.devices = {} self.netEnv = "bridge" + if os.getenv("XM_MANAGED_DOMAINS"): + isManaged = True + self.isManaged = isManaged + # Set domain type, either PV for ParaVirt domU or HVM for # FullVirt domain if ENABLE_HVM_SUPPORT: @@ -171,7 +176,17 @@ class XenDomain: def start(self, noConsole=False): - ret, output = traceCommand("xm create %s" % self.config) + if not self.isManaged: + ret, output = traceCommand("xm create %s" % self.config) + else: + ret, output = traceCommand("xm new %s" % self.config) + if ret != 0: + _ret, output = traceCommand("xm delete " + + self.config.getOpt("name")) + else: + ret, output = traceCommand("xm start " + + self.config.getOpt("name")) + addManagedDomain(self.config.getOpt("name")) if ret != 0: raise DomainError("Failed to create domain", @@ -218,6 +233,10 @@ class XenDomain: self.closeConsole() ret, output = traceCommand(prog + cmd + self.config.getOpt("name")) + if self.isManaged: + ret, output = traceCommand(prog + " delete " + + self.config.getOpt("name")) + delManagedDomain(self.config.getOpt("name")) return ret @@ -296,7 +315,7 @@ class XenDomain: class XmTestDomain(XenDomain): def __init__(self, name=None, extraConfig=None, - baseConfig=arch.configDefaults): + baseConfig=arch.configDefaults, isManaged=False): """Create a new xm-test domain @param name: The requested domain name @param extraConfig: Additional configuration options @@ -312,7 +331,8 @@ class XmTestDomain(XenDomain): elif not config.getOpt("name"): config.setOpt("name", getUniqueName()) - XenDomain.__init__(self, config.getOpt("name"), config=config) + XenDomain.__init__(self, config.getOpt("name"), config=config, + isManaged=isManaged) def minSafeMem(self): return arch.minSafeMem diff --git a/tools/xm-test/ramdisk/Makefile.am b/tools/xm-test/ramdisk/Makefile.am index b6c59d60e5..1de0d1423d 100644 --- a/tools/xm-test/ramdisk/Makefile.am +++ b/tools/xm-test/ramdisk/Makefile.am @@ -36,7 +36,12 @@ XMTEST_MAJ_VER = $(shell echo @PACKAGE_VERSION@ | perl -pe 's/(\d+)\.(\d+)\.\d+/ XMTEST_VER_IMG = initrd-$(XMTEST_MAJ_VER)-$(BR_ARCH).img XMTEST_DL_IMG = $(shell echo $(XMTEST_VER_IMG) | sed -e 's/x86_64/i386/g') -EXTRA_ROOT_DIRS = sys +EXTRA_ROOT_DIRS = sys modules + +BLKDRV = /lib/modules/$(shell uname -r)/kernel/drivers/xen/blkfront/xenblk.ko +NETDRV = /lib/modules/$(shell uname -r)/kernel/drivers/xen/netfront/xennet.ko +PKTDRV = /lib/modules/$(shell uname -r)/kernel/net/packet/af_packet.ko + if HVM all: initrd.img disk.img @@ -60,7 +65,11 @@ $(BR_IMG): $(BR_SRC) $(XMTEST_VER_IMG): $(BR_IMG) chmod a+x skel/etc/init.d/rcS - (cd skel; mkdir -p $(EXTRA_ROOT_DIRS); tar cf - .) \ + cd skel && mkdir -p $(EXTRA_ROOT_DIRS) + -[ -e "$(BLKDRV)" ] && cp $(BLKDRV) skel/modules + -[ -e "$(NETDRV)" ] && cp $(NETDRV) skel/modules + -[ -e "$(PKTDRV)" ] && cp $(PKTDRV) skel/modules + (cd skel; tar cf - .) \ | (cd $(BR_SRC)/$(BR_ROOT); tar xvf -) cd $(BR_SRC) && make cp $(BR_IMG) $(XMTEST_VER_IMG) diff --git a/tools/xm-test/ramdisk/skel/etc/init.d/rcS b/tools/xm-test/ramdisk/skel/etc/init.d/rcS index b3a92109e9..4c294f3dff 100644 --- a/tools/xm-test/ramdisk/skel/etc/init.d/rcS +++ b/tools/xm-test/ramdisk/skel/etc/init.d/rcS @@ -6,3 +6,14 @@ mount -a if uname -r | grep -q '^2.6'; then mount -t sysfs none /sys fi + +# If the block, net, and packet drivers are modules, we need to load them +if test -e /modules/xenblk.ko; then + insmod /modules/xenblk.ko > /dev/null 2>&1 +fi +if test -e /modules/xennet.ko; then + insmod /modules/xennet.ko > /dev/null 2>&1 +fi +if test -e /modules/af_packet.ko; then + insmod /modules/af_packet.ko > /dev/null 2>&1 +fi diff --git a/tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py b/tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py index 292db063d6..b9fdc0691b 100644 --- a/tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py +++ b/tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py @@ -6,7 +6,7 @@ from XmTestLib import * from XmTestLib.block_utils import * -import re +import re, time def checkXmLongList(domain): s, o = traceCommand("xm list --long %s" % domain.getName()) @@ -35,4 +35,8 @@ if not checkXmLongList(domain): block_detach(domain, "xvda1") if checkXmLongList(domain): - FAIL("xm long list does not show that xvda1 was removed") + # device info is removed by hotplug scripts - give them a chance + # to fire (they run asynchronously with us). + time.sleep(1) + if checkXmLongList(domain): + FAIL("xm long list does not show that xvda1 was removed") diff --git a/tools/xm-test/tests/sched-credit/01_sched_credit_weight_cap_pos.py b/tools/xm-test/tests/sched-credit/01_sched_credit_weight_cap_pos.py index e0b1fe5846..cdbca0f757 100644 --- a/tools/xm-test/tests/sched-credit/01_sched_credit_weight_cap_pos.py +++ b/tools/xm-test/tests/sched-credit/01_sched_credit_weight_cap_pos.py @@ -2,14 +2,27 @@ # # Sched-credit tests modified from SEDF tests # + +import re + from XmTestLib import * +paramsRE = re.compile(r'^[^ ]* *[^ ]* *([^ ]*) *([^ ]*)$') + def get_sched_credit_params(domain): - status, output = traceCommand("xm sched-credit -d %s" %(domain.getName())) - params = output.strip('{}').split(', ') - cap = int(params[0].split(':')[1].strip(' ')) - weight = int(params[1].split(':')[1].strip(' ')) - return (status, (weight, cap)) + status, output = traceCommand("xm sched-credit -d %s | tail -1" % + domain.getName()) + + if status != 0: + FAIL("Getting sched-credit parameters return non-zero rv (%d)", + status) + + m = paramsRE.match(output) + if not m: + FAIL("xm sched-credit gave bad output") + weight = int(m.group(1)) + cap = int(m.group(2)) + return (weight, cap) def set_sched_credit_weight(domain, weight): status, output = traceCommand("xm sched-credit -d %s -w %d" %(domain.getName(), weight)) @@ -31,11 +44,8 @@ except DomainError, e: FAIL(str(e)) # check default param values -(status, params) = get_sched_credit_params(domain) -if status != 0: - FAIL("Getting sched-credit parameters return non-zero rv (%d)", status) +(weight, cap) = get_sched_credit_params(domain) -(weight, cap) = params if weight != 256: FAIL("default weight is 256 (got %d)", weight) if cap != 0: @@ -51,11 +61,8 @@ if status != 0: FAIL("Setting sched-credit cap return non-zero rv (%d)", status) # check new param values -(status, params) = get_sched_credit_params(domain) -if status != 0: - FAIL("Getting sched-credit parameters return non-zero rv (%d)", status) +(weight, cap) = get_sched_credit_params(domain) -(weight, cap) = params if weight != 512: FAIL("expected weight of 512 (got %d)", weight) if cap != 100: diff --git a/unmodified_drivers/linux-2.6/platform-pci/evtchn.c b/unmodified_drivers/linux-2.6/platform-pci/evtchn.c index e328cf9663..9cf5972f0d 100644 --- a/unmodified_drivers/linux-2.6/platform-pci/evtchn.c +++ b/unmodified_drivers/linux-2.6/platform-pci/evtchn.c @@ -43,12 +43,18 @@ void *shared_info_area; #define MAX_EVTCHN 256 -static struct -{ +static struct { irqreturn_t(*handler) (int, void *, struct pt_regs *); void *dev_id; + int close; /* close on unbind_from_irqhandler()? */ } evtchns[MAX_EVTCHN]; +int irq_to_evtchn_port(int irq) +{ + return irq; +} +EXPORT_SYMBOL(irq_to_evtchn_port); + void mask_evtchn(int port) { shared_info_t *s = shared_info_area; @@ -94,22 +100,48 @@ void unmask_evtchn(int port) } EXPORT_SYMBOL(unmask_evtchn); -int -bind_evtchn_to_irqhandler(unsigned int evtchn, - irqreturn_t(*handler) (int, void *, - struct pt_regs *), - unsigned long irqflags, const char *devname, - void *dev_id) +int bind_listening_port_to_irqhandler( + unsigned int remote_domain, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char *devname, + void *dev_id) { - if (evtchn >= MAX_EVTCHN) + struct evtchn_alloc_unbound alloc_unbound; + int err; + + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = remote_domain; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (err) + return err; + + evtchns[alloc_unbound.port].handler = handler; + evtchns[alloc_unbound.port].dev_id = dev_id; + evtchns[alloc_unbound.port].close = 1; + unmask_evtchn(alloc_unbound.port); + return alloc_unbound.port; +} +EXPORT_SYMBOL(bind_listening_port_to_irqhandler); + +int bind_caller_port_to_irqhandler( + unsigned int caller_port, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char *devname, + void *dev_id) +{ + if (caller_port >= MAX_EVTCHN) return -EINVAL; - evtchns[evtchn].handler = handler; - evtchns[evtchn].dev_id = dev_id; - unmask_evtchn(evtchn); - return evtchn; + evtchns[caller_port].handler = handler; + evtchns[caller_port].dev_id = dev_id; + evtchns[caller_port].close = 0; + unmask_evtchn(caller_port); + return caller_port; } - -EXPORT_SYMBOL(bind_evtchn_to_irqhandler); +EXPORT_SYMBOL(bind_caller_port_to_irqhandler); void unbind_from_irqhandler(unsigned int evtchn, void *dev_id) { @@ -118,8 +150,12 @@ void unbind_from_irqhandler(unsigned int evtchn, void *dev_id) mask_evtchn(evtchn); evtchns[evtchn].handler = NULL; -} + if (evtchns[evtchn].close) { + struct evtchn_close close = { .port = evtchn }; + HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); + } +} EXPORT_SYMBOL(unbind_from_irqhandler); void notify_remote_via_irq(int irq) @@ -127,7 +163,6 @@ void notify_remote_via_irq(int irq) int evtchn = irq; notify_remote_via_evtchn(evtchn); } - EXPORT_SYMBOL(notify_remote_via_irq); irqreturn_t evtchn_interrupt(int irq, void *dev_id, struct pt_regs *regs) diff --git a/xen/Makefile b/xen/Makefile index 0c06e081fe..85472d0596 100644 --- a/xen/Makefile +++ b/xen/Makefile @@ -14,8 +14,8 @@ default: build .PHONY: dist dist: install -.PHONY: build install clean distclean cscope TAGS tags -build install debug clean distclean cscope TAGS tags:: +.PHONY: build install clean distclean cscope TAGS tags MAP +build install debug clean distclean cscope TAGS tags MAP:: $(MAKE) -f Rules.mk _$@ .PHONY: _build @@ -29,9 +29,15 @@ _install: $(TARGET).gz ln -f -s $(notdir $(TARGET))-$(XEN_FULLVERSION).gz $(DESTDIR)/boot/$(notdir $(TARGET))-$(XEN_VERSION).gz ln -f -s $(notdir $(TARGET))-$(XEN_FULLVERSION).gz $(DESTDIR)/boot/$(notdir $(TARGET)).gz $(INSTALL_DATA) $(TARGET)-syms $(DESTDIR)/boot/$(notdir $(TARGET))-syms-$(XEN_FULLVERSION) + [ -d $(DESTDIR)/usr/include/xen/arch-x86 ] || \ + $(INSTALL_DIR) $(DESTDIR)/usr/include/xen/arch-x86 + [ -d $(DESTDIR)/usr/include/xen/hvm ] || \ + $(INSTALL_DIR) $(DESTDIR)/usr/include/xen/hvm [ -d $(DESTDIR)/usr/include/xen/io ] || \ $(INSTALL_DIR) $(DESTDIR)/usr/include/xen/io $(INSTALL_DATA) include/public/*.h $(DESTDIR)/usr/include/xen + $(INSTALL_DATA) include/public/arch-x86/*.h $(DESTDIR)/usr/include/xen/arch-x86 + $(INSTALL_DATA) include/public/hvm/*.h $(DESTDIR)/usr/include/xen/hvm $(INSTALL_DATA) include/public/io/*.h $(DESTDIR)/usr/include/xen/io $(INSTALL_DATA) include/public/COPYING $(DESTDIR)/usr/include/xen @@ -42,6 +48,7 @@ _debug: .PHONY: _clean _clean: delete-unfresh-files $(MAKE) -C tools clean + $(MAKE) -f $(BASEDIR)/Rules.mk -C include clean $(MAKE) -f $(BASEDIR)/Rules.mk -C common clean $(MAKE) -f $(BASEDIR)/Rules.mk -C drivers clean $(MAKE) -f $(BASEDIR)/Rules.mk -C acm clean @@ -63,6 +70,7 @@ $(TARGET): delete-unfresh-files $(MAKE) -f $(BASEDIR)/Rules.mk include/xen/compile.h $(MAKE) -f $(BASEDIR)/Rules.mk include/xen/acm_policy.h [ -e include/asm ] || ln -sf asm-$(TARGET_ARCH) include/asm + $(MAKE) -f $(BASEDIR)/Rules.mk -C include $(MAKE) -f $(BASEDIR)/Rules.mk -C arch/$(TARGET_ARCH) asm-offsets.s $(MAKE) -f $(BASEDIR)/Rules.mk include/asm-$(TARGET_ARCH)/asm-offsets.h $(MAKE) -f $(BASEDIR)/Rules.mk -C arch/$(TARGET_ARCH) $(TARGET) @@ -152,9 +160,9 @@ _cscope: $(all_sources) > cscope.files cscope -k -b -q -.PHONY: MAP -MAP: - $(NM) -n $(TARGET) | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' > System.map +.PHONY: _MAP +_MAP: + $(NM) -n $(TARGET)-syms | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' > System.map .PHONY: FORCE FORCE: diff --git a/xen/Rules.mk b/xen/Rules.mk index 08c26aca44..28ee66b64d 100644 --- a/xen/Rules.mk +++ b/xen/Rules.mk @@ -34,6 +34,7 @@ TARGET := $(BASEDIR)/xen HDRS := $(wildcard $(BASEDIR)/include/xen/*.h) HDRS += $(wildcard $(BASEDIR)/include/public/*.h) +HDRS += $(wildcard $(BASEDIR)/include/compat/*.h) HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/*.h) HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/$(TARGET_SUBARCH)/*.h) diff --git a/xen/arch/ia64/xen/domain.c b/xen/arch/ia64/xen/domain.c index aafb0c2515..ed1a6519a0 100644 --- a/xen/arch/ia64/xen/domain.c +++ b/xen/arch/ia64/xen/domain.c @@ -522,14 +522,14 @@ void arch_domain_destroy(struct domain *d) deallocate_rid_range(d); } -void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c) +void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c) { int i; - struct vcpu_extra_regs *er = &c->extra_regs; + struct vcpu_extra_regs *er = &c.nat->extra_regs; - c->user_regs = *vcpu_regs (v); - c->privregs_pfn = get_gpfn_from_mfn(virt_to_maddr(v->arch.privregs) >> - PAGE_SHIFT); + c.nat->user_regs = *vcpu_regs(v); + c.nat->privregs_pfn = get_gpfn_from_mfn(virt_to_maddr(v->arch.privregs) >> + PAGE_SHIFT); /* Fill extra regs. */ for (i = 0; i < 8; i++) { @@ -549,12 +549,12 @@ void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c) er->iva = v->arch.iva; } -int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c) +int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_u c) { struct pt_regs *regs = vcpu_regs (v); struct domain *d = v->domain; - *regs = c->user_regs; + *regs = c.nat->user_regs; if (!d->arch.is_vti) { /* domain runs at PL2/3 */ @@ -562,9 +562,9 @@ int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c) regs->ar_rsc |= (2 << 2); /* force PL2/3 */ } - if (c->flags & VGCF_EXTRA_REGS) { + if (c.nat->flags & VGCF_EXTRA_REGS) { int i; - struct vcpu_extra_regs *er = &c->extra_regs; + struct vcpu_extra_regs *er = &c.nat->extra_regs; for (i = 0; i < 8; i++) { vcpu_set_itr(v, i, er->itrs[i].pte, @@ -868,7 +868,7 @@ int shadow_mode_control(struct domain *d, xen_domctl_shadow_op_t *sc) #endif // see arch/x86/xxx/domain_build.c -int elf_sanity_check(Elf_Ehdr *ehdr) +int elf_sanity_check(const Elf_Ehdr *ehdr) { if (!(IS_ELF(*ehdr))) { diff --git a/xen/arch/ia64/xen/xenmisc.c b/xen/arch/ia64/xen/xenmisc.c index 051f78ee30..532d812910 100644 --- a/xen/arch/ia64/xen/xenmisc.c +++ b/xen/arch/ia64/xen/xenmisc.c @@ -57,26 +57,6 @@ is_platform_hp_ski(void) struct pt_regs *guest_cpu_user_regs(void) { return vcpu_regs(current); } /////////////////////////////// -// from arch/ia64/traps.c -/////////////////////////////// - -int is_kernel_text(unsigned long addr) -{ - extern char _stext[], _etext[]; - if (addr >= (unsigned long) _stext && - addr <= (unsigned long) _etext) - return 1; - - return 0; -} - -unsigned long kernel_text_end(void) -{ - extern char _etext[]; - return (unsigned long) _etext; -} - -/////////////////////////////// // from common/keyhandler.c /////////////////////////////// void dump_pageframe_info(struct domain *d) diff --git a/xen/arch/powerpc/domain.c b/xen/arch/powerpc/domain.c index 45ed959945..44665df197 100644 --- a/xen/arch/powerpc/domain.c +++ b/xen/arch/powerpc/domain.c @@ -150,9 +150,9 @@ void vcpu_destroy(struct vcpu *v) { } -int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_t *c) +int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_u c) { - memcpy(&v->arch.ctxt, &c->user_regs, sizeof(c->user_regs)); + memcpy(&v->arch.ctxt, &c.nat->user_regs, sizeof(c.nat->user_regs)); printk("Domain[%d].%d: initializing\n", v->domain->domain_id, v->vcpu_id); diff --git a/xen/arch/powerpc/domain_build.c b/xen/arch/powerpc/domain_build.c index 12d9c5776e..0f283678f8 100644 --- a/xen/arch/powerpc/domain_build.c +++ b/xen/arch/powerpc/domain_build.c @@ -51,7 +51,7 @@ integer_param("dom0_max_vcpus", opt_dom0_max_vcpus); static unsigned int opt_dom0_shadow; boolean_param("dom0_shadow", opt_dom0_shadow); -int elf_sanity_check(Elf_Ehdr *ehdr) +int elf_sanity_check(const Elf_Ehdr *ehdr) { if (IS_ELF(*ehdr)) /* we are happy with either */ diff --git a/xen/arch/powerpc/domctl.c b/xen/arch/powerpc/domctl.c index c51430d132..20e6e536a8 100644 --- a/xen/arch/powerpc/domctl.c +++ b/xen/arch/powerpc/domctl.c @@ -22,6 +22,7 @@ #include <xen/types.h> #include <xen/lib.h> #include <xen/sched.h> +#include <xen/domain.h> #include <xen/guest_access.h> #include <xen/shadow.h> #include <public/xen.h> @@ -29,10 +30,9 @@ #include <public/sysctl.h> #include <asm/processor.h> -void arch_getdomaininfo_ctxt(struct vcpu *, vcpu_guest_context_t *); -void arch_getdomaininfo_ctxt(struct vcpu *v, vcpu_guest_context_t *c) +void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c) { - memcpy(&c->user_regs, &v->arch.ctxt, sizeof(struct cpu_user_regs)); + memcpy(&c.nat->user_regs, &v->arch.ctxt, sizeof(struct cpu_user_regs)); /* XXX fill in rest of vcpu_guest_context_t */ } diff --git a/xen/arch/powerpc/setup.c b/xen/arch/powerpc/setup.c index c22e6712bb..440acbbb1e 100644 --- a/xen/arch/powerpc/setup.c +++ b/xen/arch/powerpc/setup.c @@ -91,19 +91,6 @@ static struct domain *idle_domain; volatile struct processor_area * volatile global_cpu_table[NR_CPUS]; -int is_kernel_text(unsigned long addr) -{ - if (addr >= (unsigned long) &_start && - addr <= (unsigned long) &_etext) - return 1; - return 0; -} - -unsigned long kernel_text_end(void) -{ - return (unsigned long) &_etext; -} - static void __init do_initcalls(void) { initcall_t *call; diff --git a/xen/arch/powerpc/xen.lds.S b/xen/arch/powerpc/xen.lds.S index 2fe00d7b60..07bb7a6dae 100644 --- a/xen/arch/powerpc/xen.lds.S +++ b/xen/arch/powerpc/xen.lds.S @@ -113,10 +113,10 @@ SECTIONS . = ALIGN(32); __setup_start = .; - .setup.init : { *(.setup.init) } + .init.setup : { *(.init.setup) } __setup_end = .; __initcall_start = .; - .initcall.init : { *(.initcall.init) } + .initcall.init : { *(.initcall1.init) } __initcall_end = .; __inithcall_start = .; .inithcall.text : { *(.inithcall.text) } diff --git a/xen/arch/x86/boot/x86_32.S b/xen/arch/x86/boot/x86_32.S index c3466dee5f..c0f9d82c88 100644 --- a/xen/arch/x86/boot/x86_32.S +++ b/xen/arch/x86/boot/x86_32.S @@ -11,8 +11,6 @@ .text ENTRY(start) -ENTRY(stext) -ENTRY(_stext) jmp __start .align 4 diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S index ce0e42987c..fe07e36471 100644 --- a/xen/arch/x86/boot/x86_64.S +++ b/xen/arch/x86/boot/x86_64.S @@ -14,8 +14,6 @@ #define SYM_PHYS(sym) (sym - __PAGE_OFFSET) ENTRY(start) -ENTRY(stext) -ENTRY(_stext) jmp __start .org 0x004 @@ -226,15 +224,34 @@ high_start: .align PAGE_SIZE, 0 ENTRY(gdt_table) .quad 0x0000000000000000 /* unused */ - .quad 0x00cf9a000000ffff /* 0xe008 ring 0 code, compatibility */ - .quad 0x00af9a000000ffff /* 0xe010 ring 0 code, 64-bit mode */ - .quad 0x00cf92000000ffff /* 0xe018 ring 0 data */ + .quad 0x00af9a000000ffff /* 0xe008 ring 0 code, 64-bit mode */ + .quad 0x00cf92000000ffff /* 0xe010 ring 0 data */ + .quad 0x0000000000000000 /* reserved */ .quad 0x00cffa000000ffff /* 0xe023 ring 3 code, compatibility */ .quad 0x00cff2000000ffff /* 0xe02b ring 3 data */ .quad 0x00affa000000ffff /* 0xe033 ring 3 code, 64-bit mode */ - .quad 0x0000000000000000 /* unused */ + .quad 0x00cf9a000000ffff /* 0xe038 ring 0 code, compatibility */ + .org gdt_table - FIRST_RESERVED_GDT_BYTE + __TSS(0) * 8 .fill 4*NR_CPUS,8,0 /* space for TSS and LDT per CPU */ +#ifdef CONFIG_COMPAT + .align PAGE_SIZE, 0 +/* NB. Even rings != 0 get access to the full 4Gb, as only the */ +/* (compatibility) machine->physical mapping table lives there. */ +ENTRY(compat_gdt_table) + .quad 0x0000000000000000 /* unused */ + .quad 0x00af9a000000ffff /* 0xe008 ring 0 code, 64-bit mode */ + .quad 0x00cf92000000ffff /* 0xe010 ring 0 data */ + .quad 0x00cfba000000ffff /* 0xe019 ring 1 code, compatibility */ + .quad 0x00cfb2000000ffff /* 0xe021 ring 1 data */ + .quad 0x00cffa000000ffff /* 0xe02b ring 3 code, compatibility */ + .quad 0x00cff2000000ffff /* 0xe033 ring 3 data */ + .quad 0x00cf9a000000ffff /* 0xe038 ring 0 code, compatibility */ + .org compat_gdt_table - FIRST_RESERVED_GDT_BYTE + __TSS(0) * 8 + .fill 4*NR_CPUS,8,0 /* space for TSS and LDT per CPU */ +# undef LIMIT +#endif + /* Initial PML4 -- level-4 page table. */ .align PAGE_SIZE, 0 ENTRY(idle_pg_table) diff --git a/xen/arch/x86/compat.c b/xen/arch/x86/compat.c index 431c4560fd..a4fda062d6 100644 --- a/xen/arch/x86/compat.c +++ b/xen/arch/x86/compat.c @@ -9,17 +9,23 @@ #include <xen/guest_access.h> #include <xen/hypercall.h> +#ifndef COMPAT +typedef long ret_t; +#endif + /* Legacy hypercall (as of 0x00030202). */ -long do_physdev_op_compat(XEN_GUEST_HANDLE(physdev_op_t) uop) +ret_t do_physdev_op_compat(XEN_GUEST_HANDLE(physdev_op_t) uop) { struct physdev_op op; if ( unlikely(copy_from_guest(&op, uop, 1) != 0) ) return -EFAULT; - return do_physdev_op(op.cmd, (XEN_GUEST_HANDLE(void)) { &uop.p->u }); + return do_physdev_op(op.cmd, guest_handle_from_ptr(&uop.p->u, void)); } +#ifndef COMPAT + /* Legacy hypercall (as of 0x00030202). */ long do_event_channel_op_compat(XEN_GUEST_HANDLE(evtchn_op_t) uop) { @@ -28,5 +34,7 @@ long do_event_channel_op_compat(XEN_GUEST_HANDLE(evtchn_op_t) uop) if ( unlikely(copy_from_guest(&op, uop, 1) != 0) ) return -EFAULT; - return do_event_channel_op(op.cmd, (XEN_GUEST_HANDLE(void)) {&uop.p->u }); + return do_event_channel_op(op.cmd, guest_handle_from_ptr(&uop.p->u, void)); } + +#endif diff --git a/xen/arch/x86/crash.c b/xen/arch/x86/crash.c index a754cbbaab..73eb4724a8 100644 --- a/xen/arch/x86/crash.c +++ b/xen/arch/x86/crash.c @@ -25,6 +25,7 @@ #include <xen/kexec.h> #include <xen/sched.h> #include <public/xen.h> +#include <asm/shared.h> #include <asm/hvm/hvm.h> static atomic_t waiting_for_crash_ipi; @@ -103,7 +104,7 @@ void machine_crash_shutdown(void) info = kexec_crash_save_info(); info->dom0_pfn_to_mfn_frame_list_list = - dom0->shared_info->arch.pfn_to_mfn_frame_list_list; + arch_get_pfn_to_mfn_frame_list_list(dom0); } /* diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 1f6179fbdc..b03824f5ca 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -16,6 +16,7 @@ #include <xen/lib.h> #include <xen/errno.h> #include <xen/sched.h> +#include <xen/domain.h> #include <xen/smp.h> #include <xen/delay.h> #include <xen/softirq.h> @@ -40,6 +41,9 @@ #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> #include <asm/msr.h> +#ifdef CONFIG_COMPAT +#include <compat/vcpu.h> +#endif DEFINE_PER_CPU(struct vcpu *, curr_vcpu); @@ -127,6 +131,195 @@ void free_vcpu_struct(struct vcpu *v) xfree(v); } +#ifdef CONFIG_COMPAT + +int setup_arg_xlat_area(struct vcpu *v, l4_pgentry_t *l4tab) +{ + struct domain *d = v->domain; + unsigned i; + struct page_info *pg; + + if ( !d->arch.mm_arg_xlat_l3 ) + { + pg = alloc_domheap_page(NULL); + if ( !pg ) + return -ENOMEM; + d->arch.mm_arg_xlat_l3 = clear_page(page_to_virt(pg)); + } + + l4tab[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] = + l4e_from_paddr(__pa(d->arch.mm_arg_xlat_l3), __PAGE_HYPERVISOR); + + for ( i = 0; i < COMPAT_ARG_XLAT_PAGES; ++i ) + { + unsigned long va = COMPAT_ARG_XLAT_VIRT_START(v->vcpu_id) + i * PAGE_SIZE; + l2_pgentry_t *l2tab; + l1_pgentry_t *l1tab; + + if ( !l3e_get_intpte(d->arch.mm_arg_xlat_l3[l3_table_offset(va)]) ) + { + pg = alloc_domheap_page(NULL); + if ( !pg ) + return -ENOMEM; + clear_page(page_to_virt(pg)); + d->arch.mm_arg_xlat_l3[l3_table_offset(va)] = l3e_from_page(pg, __PAGE_HYPERVISOR); + } + l2tab = l3e_to_l2e(d->arch.mm_arg_xlat_l3[l3_table_offset(va)]); + if ( !l2e_get_intpte(l2tab[l2_table_offset(va)]) ) + { + pg = alloc_domheap_page(NULL); + if ( !pg ) + return -ENOMEM; + clear_page(page_to_virt(pg)); + l2tab[l2_table_offset(va)] = l2e_from_page(pg, __PAGE_HYPERVISOR); + } + l1tab = l2e_to_l1e(l2tab[l2_table_offset(va)]); + BUG_ON(l1e_get_intpte(l1tab[l1_table_offset(va)])); + pg = alloc_domheap_page(NULL); + if ( !pg ) + return -ENOMEM; + l1tab[l1_table_offset(va)] = l1e_from_page(pg, PAGE_HYPERVISOR); + } + + return 0; +} + +static void release_arg_xlat_area(struct domain *d) +{ + if ( d->arch.mm_arg_xlat_l3 ) + { + unsigned l3; + + for ( l3 = 0; l3 < L3_PAGETABLE_ENTRIES; ++l3 ) + { + if ( l3e_get_intpte(d->arch.mm_arg_xlat_l3[l3]) ) + { + l2_pgentry_t *l2tab = l3e_to_l2e(d->arch.mm_arg_xlat_l3[l3]); + unsigned l2; + + for ( l2 = 0; l2 < L2_PAGETABLE_ENTRIES; ++l2 ) + { + if ( l2e_get_intpte(l2tab[l2]) ) + { + l1_pgentry_t *l1tab = l2e_to_l1e(l2tab[l2]); + unsigned l1; + + for ( l1 = 0; l1 < L1_PAGETABLE_ENTRIES; ++l1 ) + { + if ( l1e_get_intpte(l1tab[l1]) ) + free_domheap_page(l1e_get_page(l1tab[l1])); + } + free_domheap_page(l2e_get_page(l2tab[l2])); + } + } + free_domheap_page(l3e_get_page(d->arch.mm_arg_xlat_l3[l3])); + } + } + free_domheap_page(virt_to_page(d->arch.mm_arg_xlat_l3)); + } +} + +static int setup_compat_l4(struct vcpu *v) +{ + struct page_info *pg = alloc_domheap_page(NULL); + l4_pgentry_t *l4tab; + int rc; + + if ( !pg ) + return -ENOMEM; + l4tab = copy_page(page_to_virt(pg), idle_pg_table); + l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] = + l4e_from_page(pg, __PAGE_HYPERVISOR); + l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] = + l4e_from_paddr(__pa(v->domain->arch.mm_perdomain_l3), __PAGE_HYPERVISOR); + v->arch.guest_table = pagetable_from_page(pg); + v->arch.guest_table_user = v->arch.guest_table; + + if ( (rc = setup_arg_xlat_area(v, l4tab)) < 0 ) + { + free_domheap_page(pg); + return rc; + } + + return 0; +} + +static void release_compat_l4(struct vcpu *v) +{ + free_domheap_page(pagetable_get_page(v->arch.guest_table)); + v->arch.guest_table = pagetable_null(); + v->arch.guest_table_user = pagetable_null(); +} + +static inline int may_switch_mode(struct domain *d) +{ + return 1; /* XXX */ +} + +int switch_native(struct domain *d) +{ + l1_pgentry_t gdt_l1e; + unsigned int vcpuid; + + if ( !d ) + return -EINVAL; + if ( !may_switch_mode(d) ) + return -EACCES; + if ( !IS_COMPAT(d) ) + return 0; + + clear_bit(_DOMF_compat, &d->domain_flags); + release_arg_xlat_area(d); + + /* switch gdt */ + gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR); + for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ ) + { + d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) + + FIRST_RESERVED_GDT_PAGE)] = gdt_l1e; + if (d->vcpu[vcpuid]) + release_compat_l4(d->vcpu[vcpuid]); + } + + return 0; +} + +int switch_compat(struct domain *d) +{ + l1_pgentry_t gdt_l1e; + unsigned int vcpuid; + + if ( !d ) + return -EINVAL; + if ( compat_disabled ) + return -ENOSYS; + if ( !may_switch_mode(d) ) + return -EACCES; + if ( IS_COMPAT(d) ) + return 0; + + set_bit(_DOMF_compat, &d->domain_flags); + + /* switch gdt */ + gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table), PAGE_HYPERVISOR); + for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ ) + { + d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) + + FIRST_RESERVED_GDT_PAGE)] = gdt_l1e; + if (d->vcpu[vcpuid] + && setup_compat_l4(d->vcpu[vcpuid]) != 0) + return -ENOMEM; + } + + return 0; +} + +#else +#define release_arg_xlat_area(d) ((void)0) +#define setup_compat_l4(v) 0 +#define release_compat_l4(v) ((void)0) +#endif + int vcpu_initialise(struct vcpu *v) { struct domain *d = v->domain; @@ -161,21 +354,27 @@ int vcpu_initialise(struct vcpu *v) v->arch.perdomain_ptes = d->arch.mm_perdomain_pt + (v->vcpu_id << GDT_LDT_VCPU_SHIFT); + if ( IS_COMPAT(d) && (rc = setup_compat_l4(v)) != 0 ) + return rc; + return 0; } void vcpu_destroy(struct vcpu *v) { + if ( IS_COMPAT(v->domain) ) + release_compat_l4(v); } int arch_domain_create(struct domain *d) { #ifdef __x86_64__ struct page_info *pg; + int i; #endif l1_pgentry_t gdt_l1e; int vcpuid, pdpt_order; - int i, rc = -ENOMEM; + int rc = -ENOMEM; pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t)); d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order); @@ -218,12 +417,11 @@ int arch_domain_create(struct domain *d) #endif /* __x86_64__ */ - shadow_lock_init(d); - for ( i = 0; i <= SHADOW_MAX_ORDER; i++ ) - INIT_LIST_HEAD(&d->arch.shadow.freelists[i]); - INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist); - INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse); - INIT_LIST_HEAD(&d->arch.shadow.pinned_shadows); +#ifdef CONFIG_COMPAT + HYPERVISOR_COMPAT_VIRT_START(d) = __HYPERVISOR_COMPAT_VIRT_START; +#endif + + shadow_domain_init(d); if ( !is_idle_domain(d) ) { @@ -274,47 +472,88 @@ void arch_domain_destroy(struct domain *d) free_domheap_page(virt_to_page(d->arch.mm_perdomain_l3)); #endif + if ( IS_COMPAT(d) ) + release_arg_xlat_area(d); + free_xenheap_page(d->shared_info); } /* This is called by arch_final_setup_guest and do_boot_vcpu */ int arch_set_info_guest( - struct vcpu *v, struct vcpu_guest_context *c) + struct vcpu *v, vcpu_guest_context_u c) { struct domain *d = v->domain; +#ifdef CONFIG_COMPAT +#define c(fld) (!IS_COMPAT(d) ? (c.nat->fld) : (c.cmp->fld)) +#else +#define c(fld) (c.nat->fld) +#endif unsigned long cr3_pfn = INVALID_MFN; + unsigned long flags = c(flags); int i, rc; if ( !is_hvm_vcpu(v) ) { - fixup_guest_stack_selector(c->user_regs.ss); - fixup_guest_stack_selector(c->kernel_ss); - fixup_guest_code_selector(c->user_regs.cs); - + if ( !IS_COMPAT(d) ) + { + fixup_guest_stack_selector(d, c.nat->user_regs.ss); + fixup_guest_stack_selector(d, c.nat->kernel_ss); + fixup_guest_code_selector(d, c.nat->user_regs.cs); #ifdef __i386__ - fixup_guest_code_selector(c->event_callback_cs); - fixup_guest_code_selector(c->failsafe_callback_cs); + fixup_guest_code_selector(d, c.nat->event_callback_cs); + fixup_guest_code_selector(d, c.nat->failsafe_callback_cs); #endif - for ( i = 0; i < 256; i++ ) - fixup_guest_code_selector(c->trap_ctxt[i].cs); + for ( i = 0; i < 256; i++ ) + fixup_guest_code_selector(d, c.nat->trap_ctxt[i].cs); - /* LDT safety checks. */ - if ( ((c->ldt_base & (PAGE_SIZE-1)) != 0) || - (c->ldt_ents > 8192) || - !array_access_ok(c->ldt_base, c->ldt_ents, LDT_ENTRY_SIZE) ) - return -EINVAL; + /* LDT safety checks. */ + if ( ((c.nat->ldt_base & (PAGE_SIZE-1)) != 0) || + (c.nat->ldt_ents > 8192) || + !array_access_ok(c.nat->ldt_base, + c.nat->ldt_ents, + LDT_ENTRY_SIZE) ) + return -EINVAL; + } +#ifdef CONFIG_COMPAT + else + { + fixup_guest_stack_selector(d, c.cmp->user_regs.ss); + fixup_guest_stack_selector(d, c.cmp->kernel_ss); + fixup_guest_code_selector(d, c.cmp->user_regs.cs); + fixup_guest_code_selector(d, c.cmp->event_callback_cs); + fixup_guest_code_selector(d, c.cmp->failsafe_callback_cs); + + for ( i = 0; i < 256; i++ ) + fixup_guest_code_selector(d, c.cmp->trap_ctxt[i].cs); + + /* LDT safety checks. */ + if ( ((c.cmp->ldt_base & (PAGE_SIZE-1)) != 0) || + (c.cmp->ldt_ents > 8192) || + !compat_array_access_ok(c.cmp->ldt_base, + c.cmp->ldt_ents, + LDT_ENTRY_SIZE) ) + return -EINVAL; + } +#endif } clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags); - if ( c->flags & VGCF_i387_valid ) + if ( flags & VGCF_I387_VALID ) set_bit(_VCPUF_fpu_initialised, &v->vcpu_flags); v->arch.flags &= ~TF_kernel_mode; - if ( (c->flags & VGCF_in_kernel) || is_hvm_vcpu(v)/*???*/ ) + if ( (flags & VGCF_in_kernel) || is_hvm_vcpu(v)/*???*/ ) v->arch.flags |= TF_kernel_mode; - memcpy(&v->arch.guest_context, c, sizeof(*c)); + if ( !IS_COMPAT(v->domain) ) + memcpy(&v->arch.guest_context, c.nat, sizeof(*c.nat)); +#ifdef CONFIG_COMPAT + else + { + XLAT_vcpu_guest_context(&v->arch.guest_context, c.cmp); + } +#endif /* Only CR0.TS is modifiable by guest or admin. */ v->arch.guest_context.ctrlreg[0] &= X86_CR0_TS; @@ -342,38 +581,67 @@ int arch_set_info_guest( memset(v->arch.guest_context.debugreg, 0, sizeof(v->arch.guest_context.debugreg)); for ( i = 0; i < 8; i++ ) - (void)set_debugreg(v, i, c->debugreg[i]); + (void)set_debugreg(v, i, c(debugreg[i])); if ( v->vcpu_id == 0 ) - d->vm_assist = c->vm_assist; + d->vm_assist = c(vm_assist); if ( !is_hvm_vcpu(v) ) { - if ( (rc = (int)set_gdt(v, c->gdt_frames, c->gdt_ents)) != 0 ) + if ( !IS_COMPAT(d) ) + rc = (int)set_gdt(v, c.nat->gdt_frames, c.nat->gdt_ents); +#ifdef CONFIG_COMPAT + else + { + unsigned long gdt_frames[ARRAY_SIZE(c.cmp->gdt_frames)]; + unsigned int i, n = (c.cmp->gdt_ents + 511) / 512; + + if ( n > ARRAY_SIZE(c.cmp->gdt_frames) ) + return -EINVAL; + for ( i = 0; i < n; ++i ) + gdt_frames[i] = c.cmp->gdt_frames[i]; + rc = (int)set_gdt(v, gdt_frames, c.cmp->gdt_ents); + } +#endif + if ( rc != 0 ) return rc; - cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c->ctrlreg[3])); - - if ( shadow_mode_refcounts(d) - ? !get_page(mfn_to_page(cr3_pfn), d) - : !get_page_and_type(mfn_to_page(cr3_pfn), d, - PGT_base_page_table) ) + if ( !IS_COMPAT(d) ) { - destroy_gdt(v); - return -EINVAL; + cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c.nat->ctrlreg[3])); + + if ( shadow_mode_refcounts(d) + ? !get_page(mfn_to_page(cr3_pfn), d) + : !get_page_and_type(mfn_to_page(cr3_pfn), d, + PGT_base_page_table) ) + { + destroy_gdt(v); + return -EINVAL; + } + + v->arch.guest_table = pagetable_from_pfn(cr3_pfn); } +#ifdef CONFIG_COMPAT + else + { + l4_pgentry_t *l4tab; - v->arch.guest_table = pagetable_from_pfn(cr3_pfn); - } + cr3_pfn = gmfn_to_mfn(d, compat_cr3_to_pfn(c.cmp->ctrlreg[3])); - /* Shadow: make sure the domain has enough shadow memory to - * boot another vcpu */ - if ( shadow_mode_enabled(d) - && d->arch.shadow.total_pages < shadow_min_acceptable_pages(d) ) - { - destroy_gdt(v); - return -ENOMEM; - } + if ( shadow_mode_refcounts(d) + ? !get_page(mfn_to_page(cr3_pfn), d) + : !get_page_and_type(mfn_to_page(cr3_pfn), d, + PGT_l3_page_table) ) + { + destroy_gdt(v); + return -EINVAL; + } + + l4tab = __va(pagetable_get_paddr(v->arch.guest_table)); + *l4tab = l4e_from_pfn(cr3_pfn, _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED); + } +#endif + } if ( v->vcpu_id == 0 ) update_domain_wallclock_time(d); @@ -387,6 +655,7 @@ int arch_set_info_guest( update_cr3(v); return 0; +#undef c } long @@ -410,16 +679,16 @@ arch_do_vcpu_op( break; rc = 0; - v->runstate_guest = area.addr.h; + runstate_guest(v) = area.addr.h; if ( v == current ) { - __copy_to_guest(v->runstate_guest, &v->runstate, 1); + __copy_to_guest(runstate_guest(v), &v->runstate, 1); } else { vcpu_runstate_get(v, &runstate); - __copy_to_guest(v->runstate_guest, &runstate, 1); + __copy_to_guest(runstate_guest(v), &runstate, 1); } break; @@ -502,27 +771,30 @@ static void load_segments(struct vcpu *n) all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs); } - /* This can only be non-zero if selector is NULL. */ - if ( nctxt->fs_base ) - wrmsr(MSR_FS_BASE, - nctxt->fs_base, - nctxt->fs_base>>32); - - /* Most kernels have non-zero GS base, so don't bother testing. */ - /* (This is also a serialising instruction, avoiding AMD erratum #88.) */ - wrmsr(MSR_SHADOW_GS_BASE, - nctxt->gs_base_kernel, - nctxt->gs_base_kernel>>32); - - /* This can only be non-zero if selector is NULL. */ - if ( nctxt->gs_base_user ) - wrmsr(MSR_GS_BASE, - nctxt->gs_base_user, - nctxt->gs_base_user>>32); - - /* If in kernel mode then switch the GS bases around. */ - if ( n->arch.flags & TF_kernel_mode ) - __asm__ __volatile__ ( "swapgs" ); + if ( !IS_COMPAT(n->domain) ) + { + /* This can only be non-zero if selector is NULL. */ + if ( nctxt->fs_base ) + wrmsr(MSR_FS_BASE, + nctxt->fs_base, + nctxt->fs_base>>32); + + /* Most kernels have non-zero GS base, so don't bother testing. */ + /* (This is also a serialising instruction, avoiding AMD erratum #88.) */ + wrmsr(MSR_SHADOW_GS_BASE, + nctxt->gs_base_kernel, + nctxt->gs_base_kernel>>32); + + /* This can only be non-zero if selector is NULL. */ + if ( nctxt->gs_base_user ) + wrmsr(MSR_GS_BASE, + nctxt->gs_base_user, + nctxt->gs_base_user>>32); + + /* If in kernel mode then switch the GS bases around. */ + if ( (n->arch.flags & TF_kernel_mode) ) + __asm__ __volatile__ ( "swapgs" ); + } if ( unlikely(!all_segs_okay) ) { @@ -533,6 +805,55 @@ static void load_segments(struct vcpu *n) (unsigned long *)nctxt->kernel_sp; unsigned long cs_and_mask, rflags; + if ( IS_COMPAT(n->domain) ) + { + unsigned int *esp = ring_1(regs) ? + (unsigned int *)regs->rsp : + (unsigned int *)nctxt->kernel_sp; + unsigned int cs_and_mask, eflags; + int ret = 0; + + /* CS longword also contains full evtchn_upcall_mask. */ + cs_and_mask = (unsigned short)regs->cs | + ((unsigned int)vcpu_info(n, evtchn_upcall_mask) << 16); + /* Fold upcall mask into RFLAGS.IF. */ + eflags = regs->_eflags & ~X86_EFLAGS_IF; + eflags |= !vcpu_info(n, evtchn_upcall_mask) << 9; + + if ( !ring_1(regs) ) + { + ret = put_user(regs->ss, esp-1); + ret |= put_user(regs->_esp, esp-2); + esp -= 2; + } + + if ( ret | + put_user(eflags, esp-1) | + put_user(cs_and_mask, esp-2) | + put_user(regs->_eip, esp-3) | + put_user(nctxt->user_regs.gs, esp-4) | + put_user(nctxt->user_regs.fs, esp-5) | + put_user(nctxt->user_regs.es, esp-6) | + put_user(nctxt->user_regs.ds, esp-7) ) + { + gdprintk(XENLOG_ERR, "Error while creating compat " + "failsafe callback frame.\n"); + domain_crash(n->domain); + } + + if ( test_bit(_VGCF_failsafe_disables_events, + &n->arch.guest_context.flags) ) + vcpu_info(n, evtchn_upcall_mask) = 1; + + regs->entry_vector = TRAP_syscall; + regs->_eflags &= 0xFFFCBEFFUL; + regs->ss = FLAT_COMPAT_KERNEL_SS; + regs->_esp = (unsigned long)(esp-7); + regs->cs = FLAT_COMPAT_KERNEL_CS; + regs->_eip = nctxt->failsafe_callback_eip; + return; + } + if ( !(n->arch.flags & TF_kernel_mode) ) toggle_guest_mode(n); else @@ -540,11 +861,11 @@ static void load_segments(struct vcpu *n) /* CS longword also contains full evtchn_upcall_mask. */ cs_and_mask = (unsigned long)regs->cs | - ((unsigned long)n->vcpu_info->evtchn_upcall_mask << 32); + ((unsigned long)vcpu_info(n, evtchn_upcall_mask) << 32); /* Fold upcall mask into RFLAGS.IF. */ rflags = regs->rflags & ~X86_EFLAGS_IF; - rflags |= !n->vcpu_info->evtchn_upcall_mask << 9; + rflags |= !vcpu_info(n, evtchn_upcall_mask) << 9; if ( put_user(regs->ss, rsp- 1) | put_user(regs->rsp, rsp- 2) | @@ -565,7 +886,7 @@ static void load_segments(struct vcpu *n) if ( test_bit(_VGCF_failsafe_disables_events, &n->arch.guest_context.flags) ) - n->vcpu_info->evtchn_upcall_mask = 1; + vcpu_info(n, evtchn_upcall_mask) = 1; regs->entry_vector = TRAP_syscall; regs->rflags &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF| @@ -594,7 +915,7 @@ static void save_segments(struct vcpu *v) if ( regs->es ) dirty_segment_mask |= DIRTY_ES; - if ( regs->fs ) + if ( regs->fs || IS_COMPAT(v->domain) ) { dirty_segment_mask |= DIRTY_FS; ctxt->fs_base = 0; /* != 0 selector kills fs_base */ @@ -604,7 +925,7 @@ static void save_segments(struct vcpu *v) dirty_segment_mask |= DIRTY_FS_BASE; } - if ( regs->gs ) + if ( regs->gs || IS_COMPAT(v->domain) ) { dirty_segment_mask |= DIRTY_GS; ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */ @@ -726,6 +1047,9 @@ void context_switch(struct vcpu *prev, struct vcpu *next) local_irq_disable(); + if ( is_hvm_vcpu(prev) ) + pt_freeze_time(prev); + set_current(next); if ( (per_cpu(curr_vcpu, cpu) == next) || is_idle_vcpu(next) ) @@ -736,6 +1060,23 @@ void context_switch(struct vcpu *prev, struct vcpu *next) { __context_switch(); +#ifdef CONFIG_COMPAT + if ( is_idle_vcpu(prev) + || IS_COMPAT(prev->domain) != IS_COMPAT(next->domain) ) + { + uint32_t efer_lo, efer_hi; + + local_flush_tlb_one(GDT_VIRT_START(next) + FIRST_RESERVED_GDT_BYTE); + + rdmsr(MSR_EFER, efer_lo, efer_hi); + if ( !IS_COMPAT(next->domain) == !(efer_lo & EFER_SCE) ) + { + efer_lo ^= EFER_SCE; + wrmsr(MSR_EFER, efer_lo, efer_hi); + } + } +#endif + /* Re-enable interrupts before restoring state which may fault. */ local_irq_enable(); @@ -749,8 +1090,20 @@ void context_switch(struct vcpu *prev, struct vcpu *next) context_saved(prev); /* Update per-VCPU guest runstate shared memory area (if registered). */ - if ( !guest_handle_is_null(next->runstate_guest) ) - __copy_to_guest(next->runstate_guest, &next->runstate, 1); + if ( !guest_handle_is_null(runstate_guest(next)) ) + { + if ( !IS_COMPAT(next->domain) ) + __copy_to_guest(runstate_guest(next), &next->runstate, 1); +#ifdef CONFIG_COMPAT + else + { + struct compat_vcpu_runstate_info info; + + XLAT_vcpu_runstate_info(&info, &next->runstate); + __copy_to_guest(next->runstate_guest.compat, &info, 1); + } +#endif + } schedule_tail(next); BUG(); @@ -821,55 +1174,153 @@ unsigned long hypercall_create_continuation( for ( i = 0; *p != '\0'; i++ ) mcs->call.args[i] = next_arg(p, args); + if ( IS_COMPAT(current->domain) ) + { + for ( ; i < 6; i++ ) + mcs->call.args[i] = 0; + } } else { regs = guest_cpu_user_regs(); -#if defined(__i386__) regs->eax = op; + regs->eip -= 2; /* re-execute 'syscall' / 'int 0x82' */ - if ( supervisor_mode_kernel || is_hvm_vcpu(current) ) - regs->eip &= ~31; /* re-execute entire hypercall entry stub */ +#ifdef __x86_64__ + if ( !IS_COMPAT(current->domain) ) + { + for ( i = 0; *p != '\0'; i++ ) + { + arg = next_arg(p, args); + switch ( i ) + { + case 0: regs->rdi = arg; break; + case 1: regs->rsi = arg; break; + case 2: regs->rdx = arg; break; + case 3: regs->r10 = arg; break; + case 4: regs->r8 = arg; break; + case 5: regs->r9 = arg; break; + } + } + } else - regs->eip -= 2; /* re-execute 'int 0x82' */ - - for ( i = 0; *p != '\0'; i++ ) +#endif { - arg = next_arg(p, args); - switch ( i ) + if ( supervisor_mode_kernel || is_hvm_vcpu(current) ) + regs->eip &= ~31; /* re-execute entire hypercall entry stub */ + + for ( i = 0; *p != '\0'; i++ ) { - case 0: regs->ebx = arg; break; - case 1: regs->ecx = arg; break; - case 2: regs->edx = arg; break; - case 3: regs->esi = arg; break; - case 4: regs->edi = arg; break; - case 5: regs->ebp = arg; break; + arg = next_arg(p, args); + switch ( i ) + { + case 0: regs->ebx = arg; break; + case 1: regs->ecx = arg; break; + case 2: regs->edx = arg; break; + case 3: regs->esi = arg; break; + case 4: regs->edi = arg; break; + case 5: regs->ebp = arg; break; + } } } -#elif defined(__x86_64__) - regs->rax = op; - regs->rip -= 2; /* re-execute 'syscall' */ + } - for ( i = 0; *p != '\0'; i++ ) + va_end(args); + + return op; +} + +#ifdef CONFIG_COMPAT +int hypercall_xlat_continuation(unsigned int *id, unsigned int mask, ...) +{ + int rc = 0; + struct mc_state *mcs = &this_cpu(mc_state); + struct cpu_user_regs *regs; + unsigned int i, cval = 0; + unsigned long nval = 0; + va_list args; + + BUG_ON(*id > 5); + BUG_ON(mask & (1U << *id)); + + va_start(args, mask); + + if ( test_bit(_MCSF_in_multicall, &mcs->flags) ) + { + if ( !test_bit(_MCSF_call_preempted, &mcs->flags) ) + return 0; + for ( i = 0; i < 6; ++i, mask >>= 1 ) + { + if ( mask & 1 ) + { + nval = va_arg(args, unsigned long); + cval = va_arg(args, unsigned int); + if ( cval == nval ) + mask &= ~1U; + else + BUG_ON(nval == (unsigned int)nval); + } + else if ( id && *id == i ) + { + *id = mcs->call.args[i]; + id = NULL; + } + if ( (mask & 1) && mcs->call.args[i] == nval ) + ++rc; + else + { + cval = mcs->call.args[i]; + BUG_ON(mcs->call.args[i] != cval); + } + mcs->compat_call.args[i] = cval; + } + } + else + { + regs = guest_cpu_user_regs(); + for ( i = 0; i < 6; ++i, mask >>= 1 ) { - arg = next_arg(p, args); + unsigned long *reg; + switch ( i ) { - case 0: regs->rdi = arg; break; - case 1: regs->rsi = arg; break; - case 2: regs->rdx = arg; break; - case 3: regs->r10 = arg; break; - case 4: regs->r8 = arg; break; - case 5: regs->r9 = arg; break; + case 0: reg = ®s->ebx; break; + case 1: reg = ®s->ecx; break; + case 2: reg = ®s->edx; break; + case 3: reg = ®s->esi; break; + case 4: reg = ®s->edi; break; + case 5: reg = ®s->ebp; break; + default: BUG(); reg = NULL; break; + } + if ( (mask & 1) ) + { + nval = va_arg(args, unsigned long); + cval = va_arg(args, unsigned int); + if ( cval == nval ) + mask &= ~1U; + else + BUG_ON(nval == (unsigned int)nval); } + else if ( id && *id == i ) + { + *id = *reg; + id = NULL; + } + if ( (mask & 1) && *reg == nval ) + { + *reg = cval; + ++rc; + } + else + BUG_ON(*reg != (unsigned int)*reg); } -#endif } va_end(args); - return op; + return rc; } +#endif static void relinquish_memory(struct domain *d, struct list_head *list) { @@ -941,6 +1392,24 @@ void domain_relinquish_resources(struct domain *d) { /* Drop ref to guest_table (from new_guest_cr3(), svm/vmx cr3 handling, * or sh_update_paging_modes()) */ +#ifdef CONFIG_COMPAT + if ( IS_COMPAT(d) ) + { + if ( is_hvm_vcpu(v) ) + pfn = pagetable_get_pfn(v->arch.guest_table); + else + pfn = l4e_get_pfn(*(l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table))); + + if ( pfn != 0 ) + { + if ( shadow_mode_refcounts(d) ) + put_page(mfn_to_page(pfn)); + else + put_page_and_type(mfn_to_page(pfn)); + } + continue; + } +#endif pfn = pagetable_get_pfn(v->arch.guest_table); if ( pfn != 0 ) { @@ -948,6 +1417,10 @@ void domain_relinquish_resources(struct domain *d) put_page(mfn_to_page(pfn)); else put_page_and_type(mfn_to_page(pfn)); +#ifdef __x86_64__ + if ( pfn == pagetable_get_pfn(v->arch.guest_table_user) ) + v->arch.guest_table_user = pagetable_null(); +#endif v->arch.guest_table = pagetable_null(); } diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c index c092149def..e55b13840d 100644 --- a/xen/arch/x86/domain_build.c +++ b/xen/arch/x86/domain_build.c @@ -19,6 +19,7 @@ #include <xen/version.h> #include <xen/iocap.h> #include <xen/bitops.h> +#include <xen/compat.h> #include <asm/regs.h> #include <asm/system.h> #include <asm/io.h> @@ -55,12 +56,12 @@ static long dom0_nrpages, dom0_min_nrpages, dom0_max_nrpages = LONG_MAX; * If +ve: The specified amount is an absolute value. * If -ve: The specified amount is subtracted from total available memory. */ -static long parse_amt(char *s, char **ps) +static long parse_amt(const char *s, const char **ps) { long pages = parse_size_and_unit((*s == '-') ? s+1 : s, ps) >> PAGE_SHIFT; return (*s == '-') ? -pages : pages; } -static void parse_dom0_mem(char *s) +static void parse_dom0_mem(const char *s) { do { if ( !strncmp(s, "min:", 4) ) @@ -90,9 +91,11 @@ string_param("dom0_ioports_disable", opt_dom0_ioports_disable); #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) #define L3_PROT (_PAGE_PRESENT) #elif defined(__x86_64__) -/* Allow ring-3 access in long mode as guest cannot use ring 1. */ +/* Allow ring-3 access in long mode as guest cannot use ring 1 ... */ #define BASE_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) #define L1_PROT (BASE_PROT|_PAGE_GUEST_KERNEL) +/* ... except for compatibility mode guests. */ +#define COMPAT_L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) #define L2_PROT (BASE_PROT|_PAGE_DIRTY) #define L3_PROT (BASE_PROT|_PAGE_DIRTY) #define L4_PROT (BASE_PROT|_PAGE_DIRTY) @@ -152,7 +155,8 @@ static unsigned long compute_dom0_nr_pages(void) static void process_dom0_ioports_disable(void) { unsigned long io_from, io_to; - char *t, *u, *s = opt_dom0_ioports_disable; + char *t, *s = opt_dom0_ioports_disable; + const char *u; if ( *s == '\0' ) return; @@ -260,8 +264,8 @@ int construct_dom0(struct domain *d, start_info_t *si; struct vcpu *v = d->vcpu[0]; const char *p; - unsigned long hypercall_page; - int hypercall_page_defined; + unsigned long long value; + int value_defined; #if defined(__i386__) char *image_start = (char *)_image_start; /* use lowmem mappings */ char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */ @@ -318,10 +322,40 @@ int construct_dom0(struct domain *d, nr_pages = compute_dom0_nr_pages(); - if ( (rc = parseelfimage(&dsi)) != 0 ) - return rc; + rc = parseelfimage(&dsi); +#ifdef CONFIG_COMPAT + if ( rc == -ENOSYS + && !compat_disabled + && (rc = parseelf32image(&dsi)) == 0 ) + { + l1_pgentry_t gdt_l1e; + + set_bit(_DOMF_compat, &d->domain_flags); + v->vcpu_info = (void *)&d->shared_info->compat.vcpu_info[0]; + + if ( nr_pages != (unsigned int)nr_pages ) + nr_pages = UINT_MAX; + + /* + * Map compatibility Xen segments into every VCPU's GDT. See + * arch_domain_create() for further comments. + */ + gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table), + PAGE_HYPERVISOR); + for ( i = 0; i < MAX_VIRT_CPUS; i++ ) + d->arch.mm_perdomain_pt[((i << GDT_LDT_VCPU_SHIFT) + + FIRST_RESERVED_GDT_PAGE)] = gdt_l1e; + local_flush_tlb_one(GDT_LDT_VIRT_START + FIRST_RESERVED_GDT_BYTE); + } +#endif + if ( rc != 0) + { + if ( rc == -ENOSYS ) + printk("DOM0 image is not a Xen-compatible Elf image.\n"); + return rc; + } - xen_pae = (CONFIG_PAGING_LEVELS == 3); + xen_pae = (CONFIG_PAGING_LEVELS == 3) || IS_COMPAT(d); if (dsi.pae_kernel == PAEKERN_bimodal) dom0_pae = xen_pae; else @@ -337,7 +371,40 @@ int construct_dom0(struct domain *d, dsi.pae_kernel == PAEKERN_bimodal) ) set_bit(VMASST_TYPE_pae_extended_cr3, &d->vm_assist); - if ( (p = xen_elfnote_string(&dsi, XEN_ELFNOTE_FEATURES)) != NULL ) +#ifdef CONFIG_COMPAT + if ( IS_COMPAT(d) ) + { + value = xen_elf32note_numeric(&dsi, XEN_ELFNOTE_HV_START_LOW, &value_defined); + p = xen_elf32note_string(&dsi, XEN_ELFNOTE_FEATURES); + } + else +#endif + { + value = xen_elfnote_numeric(&dsi, XEN_ELFNOTE_HV_START_LOW, &value_defined); + p = xen_elfnote_string(&dsi, XEN_ELFNOTE_FEATURES); + } + if ( value_defined ) + { +#if CONFIG_PAGING_LEVELS < 4 + unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1; +#else + unsigned long mask = !IS_COMPAT(d) + ? (1UL << L4_PAGETABLE_SHIFT) - 1 + : (1UL << L2_PAGETABLE_SHIFT) - 1; +#endif + + value = (value + mask) & ~mask; +#ifdef CONFIG_COMPAT + HYPERVISOR_COMPAT_VIRT_START(d) = max_t(unsigned int, m2p_compat_vstart, value); + if ( value > (!IS_COMPAT(d) ? + HYPERVISOR_VIRT_START : + __HYPERVISOR_COMPAT_VIRT_START) ) +#else + if ( value > HYPERVISOR_VIRT_START ) +#endif + panic("Domain 0 expects too high a hypervisor start address.\n"); + } + if ( p != NULL ) { parse_features(p, dom0_features_supported, @@ -363,7 +430,9 @@ int construct_dom0(struct domain *d, vinitrd_start = round_pgup(dsi.v_end); vinitrd_end = vinitrd_start + initrd_len; vphysmap_start = round_pgup(vinitrd_end); - vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long)); + vphysmap_end = vphysmap_start + (nr_pages * (!IS_COMPAT(d) ? + sizeof(unsigned long) : + sizeof(unsigned int))); vstartinfo_start = round_pgup(vphysmap_end); vstartinfo_end = (vstartinfo_start + sizeof(struct start_info) + @@ -392,7 +461,9 @@ int construct_dom0(struct domain *d, ((_l) & ~((1UL<<(_s))-1))) >> (_s)) if ( (1 + /* # L4 */ NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */ - NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */ + (!IS_COMPAT(d) ? + NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) : /* # L2 */ + 4) + /* # compat L2 */ NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */ <= nr_pt_pages ) break; @@ -582,22 +653,46 @@ int construct_dom0(struct domain *d, #elif defined(__x86_64__) /* Overlap with Xen protected area? */ - if ( (dsi.v_start < HYPERVISOR_VIRT_END) && - (v_end > HYPERVISOR_VIRT_START) ) + if ( !IS_COMPAT(d) ? + ((dsi.v_start < HYPERVISOR_VIRT_END) && + (v_end > HYPERVISOR_VIRT_START)) : + (v_end > HYPERVISOR_COMPAT_VIRT_START(d)) ) { printk("DOM0 image overlaps with Xen private area.\n"); return -EINVAL; } + if ( IS_COMPAT(d) ) + { + v->arch.guest_context.failsafe_callback_cs = FLAT_COMPAT_KERNEL_CS; + v->arch.guest_context.event_callback_cs = FLAT_COMPAT_KERNEL_CS; + } + /* WARNING: The new domain must have its 'processor' field filled in! */ - maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table; - l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; + if ( !IS_COMPAT(d) ) + { + maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table; + l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; + } + else + { + page = alloc_domheap_page(NULL); + if ( !page ) + panic("Not enough RAM for domain 0 PML4.\n"); + l4start = l4tab = page_to_virt(page); + } memcpy(l4tab, idle_pg_table, PAGE_SIZE); l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] = l4e_from_paddr(__pa(l4start), __PAGE_HYPERVISOR); l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] = l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR); v->arch.guest_table = pagetable_from_paddr(__pa(l4start)); + if ( IS_COMPAT(d) ) + { + v->arch.guest_table_user = v->arch.guest_table; + if ( setup_arg_xlat_area(v, l4start) < 0 ) + panic("Not enough RAM for domain 0 hypercall argument translation.\n"); + } l4tab += l4_table_offset(dsi.v_start); mfn = alloc_spfn; @@ -634,7 +729,7 @@ int construct_dom0(struct domain *d, *l2tab = l2e_from_paddr(__pa(l1start), L2_PROT); l2tab++; } - *l1tab = l1e_from_pfn(mfn, L1_PROT); + *l1tab = l1e_from_pfn(mfn, !IS_COMPAT(d) ? L1_PROT : COMPAT_L1_PROT); l1tab++; page = mfn_to_page(mfn); @@ -645,6 +740,30 @@ int construct_dom0(struct domain *d, mfn++; } +#ifdef CONFIG_COMPAT + if ( IS_COMPAT(d) ) + { + /* Ensure the first four L3 entries are all populated. */ + for ( i = 0, l3tab = l3start; i < 4; ++i, ++l3tab ) + { + if ( !l3e_get_intpte(*l3tab) ) + { + maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l2_page_table; + l2tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; + clear_page(l2tab); + *l3tab = l3e_from_paddr(__pa(l2tab), L3_PROT); + } + if ( i == 3 ) + l3e_get_page(*l3tab)->u.inuse.type_info |= PGT_pae_xen_l2; + } + /* Install read-only guest visible MPT mapping. */ + l2tab = l3e_to_l2e(l3start[3]); + memcpy(&l2tab[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)], + &compat_idle_pg_table_l2[l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)], + COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*l2tab)); + } +#endif + /* Pages that are part of page tables must be read only. */ l4tab = l4start + l4_table_offset(vpt_start); l3start = l3tab = l4e_to_l3e(*l4tab); @@ -663,7 +782,8 @@ int construct_dom0(struct domain *d, page->u.inuse.type_info |= PGT_validated | 1; /* Top-level p.t. is pinned. */ - if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_l4_page_table ) + if ( (page->u.inuse.type_info & PGT_type_mask) == + (!IS_COMPAT(d) ? PGT_l4_page_table : PGT_l3_page_table) ) { page->count_info += 1; page->u.inuse.type_info += 1 | PGT_pinned; @@ -686,7 +806,7 @@ int construct_dom0(struct domain *d, /* Mask all upcalls... */ for ( i = 0; i < MAX_VIRT_CPUS; i++ ) - d->shared_info->vcpu_info[i].evtchn_upcall_mask = 1; + shared_info(d, vcpu_info[i].evtchn_upcall_mask) = 1; if ( opt_dom0_max_vcpus == 0 ) opt_dom0_max_vcpus = num_online_cpus(); @@ -694,6 +814,8 @@ int construct_dom0(struct domain *d, opt_dom0_max_vcpus = num_online_cpus(); if ( opt_dom0_max_vcpus > MAX_VIRT_CPUS ) opt_dom0_max_vcpus = MAX_VIRT_CPUS; + if ( opt_dom0_max_vcpus > BITS_PER_GUEST_LONG(d) ) + opt_dom0_max_vcpus = BITS_PER_GUEST_LONG(d); printk("Dom0 has maximum %u VCPUs\n", opt_dom0_max_vcpus); for ( i = 1; i < opt_dom0_max_vcpus; i++ ) @@ -710,20 +832,30 @@ int construct_dom0(struct domain *d, write_ptbase(v); /* Copy the OS image and free temporary buffer. */ - (void)loadelfimage(&dsi); - - hypercall_page = - xen_elfnote_numeric(&dsi, XEN_ELFNOTE_HYPERCALL_PAGE, &hypercall_page_defined); - if ( hypercall_page_defined ) +#ifdef CONFIG_COMPAT + if ( IS_COMPAT(d) ) + { + (void)loadelf32image(&dsi); + value = + xen_elf32note_numeric(&dsi, XEN_ELFNOTE_HYPERCALL_PAGE, &value_defined); + } + else +#endif + { + (void)loadelfimage(&dsi); + value = + xen_elfnote_numeric(&dsi, XEN_ELFNOTE_HYPERCALL_PAGE, &value_defined); + } + if ( value_defined ) { - if ( (hypercall_page < dsi.v_start) || (hypercall_page >= v_end) ) + if ( (value < dsi.v_start) || (value >= v_end) ) { write_ptbase(current); local_irq_enable(); printk("Invalid HYPERCALL_PAGE field in ELF notes.\n"); return -1; } - hypercall_page_initialise(d, (void *)hypercall_page); + hypercall_page_initialise(d, (void *)(unsigned long)value); } /* Copy the initial ramdisk. */ @@ -741,12 +873,12 @@ int construct_dom0(struct domain *d, si->shared_info = virt_to_maddr(d->shared_info); si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN; - si->pt_base = vpt_start; + si->pt_base = vpt_start + 2 * PAGE_SIZE * !!IS_COMPAT(d); si->nr_pt_frames = nr_pt_pages; si->mfn_list = vphysmap_start; sprintf(si->magic, "xen-%i.%i-x86_%d%s", xen_major_version(), xen_minor_version(), - BITS_PER_LONG, xen_pae ? "p" : ""); + !IS_COMPAT(d) ? BITS_PER_LONG : 32, xen_pae ? "p" : ""); /* Write the phys->machine and machine->phys table entries. */ for ( pfn = 0; pfn < d->tot_pages; pfn++ ) @@ -757,7 +889,10 @@ int construct_dom0(struct domain *d, if ( pfn > REVERSE_START ) mfn = alloc_epfn - (pfn - REVERSE_START); #endif - ((unsigned long *)vphysmap_start)[pfn] = mfn; + if ( !IS_COMPAT(d) ) + ((unsigned long *)vphysmap_start)[pfn] = mfn; + else + ((unsigned int *)vphysmap_start)[pfn] = mfn; set_gpfn_from_mfn(mfn, pfn); } while ( pfn < nr_pages ) @@ -770,7 +905,10 @@ int construct_dom0(struct domain *d, #ifndef NDEBUG #define pfn (nr_pages - 1 - (pfn - (alloc_epfn - alloc_spfn))) #endif - ((unsigned long *)vphysmap_start)[pfn] = mfn; + if ( !IS_COMPAT(d) ) + ((unsigned long *)vphysmap_start)[pfn] = mfn; + else + ((unsigned int *)vphysmap_start)[pfn] = mfn; set_gpfn_from_mfn(mfn, pfn); #undef pfn page++; pfn++; @@ -795,6 +933,11 @@ int construct_dom0(struct domain *d, si->console.dom0.info_size = sizeof(struct dom0_vga_console_info); } +#ifdef CONFIG_COMPAT + if ( IS_COMPAT(d) ) + xlat_start_info(si, XLAT_start_info_console_dom0); +#endif + /* Reinstate the caller's page tables. */ write_ptbase(current); local_irq_enable(); @@ -818,16 +961,18 @@ int construct_dom0(struct domain *d, * [EAX,EBX,ECX,EDX,EDI,EBP are zero] */ regs = &v->arch.guest_context.user_regs; - regs->ds = regs->es = regs->fs = regs->gs = FLAT_KERNEL_DS; - regs->ss = FLAT_KERNEL_SS; - regs->cs = FLAT_KERNEL_CS; + regs->ds = regs->es = regs->fs = regs->gs = !IS_COMPAT(d) + ? FLAT_KERNEL_DS + : FLAT_COMPAT_KERNEL_DS; + regs->ss = !IS_COMPAT(d) ? FLAT_KERNEL_SS : FLAT_COMPAT_KERNEL_SS; + regs->cs = !IS_COMPAT(d) ? FLAT_KERNEL_CS : FLAT_COMPAT_KERNEL_CS; regs->eip = dsi.v_kernentry; regs->esp = vstack_end; regs->esi = vstartinfo_start; regs->eflags = X86_EFLAGS_IF; if ( opt_dom0_shadow ) - if ( shadow_test_enable(d) == 0 ) + if ( shadow_enable(d, SHM2_enable) == 0 ) shadow_update_paging_modes(v); if ( supervisor_mode_kernel ) @@ -892,7 +1037,7 @@ int construct_dom0(struct domain *d, return 0; } -int elf_sanity_check(Elf_Ehdr *ehdr) +int elf_sanity_check(const Elf_Ehdr *ehdr) { if ( !IS_ELF(*ehdr) || #if defined(__i386__) @@ -905,13 +1050,28 @@ int elf_sanity_check(Elf_Ehdr *ehdr) (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) || (ehdr->e_type != ET_EXEC) ) { - printk("DOM0 image is not a Xen-compatible Elf image.\n"); return 0; } return 1; } +#ifdef CONFIG_COMPAT +int elf32_sanity_check(const Elf32_Ehdr *ehdr) +{ + if ( !IS_ELF(*ehdr) || + (ehdr->e_ident[EI_CLASS] != ELFCLASS32) || + (ehdr->e_machine != EM_386) || + (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) || + (ehdr->e_type != ET_EXEC) ) + { + return 0; + } + + return 1; +} +#endif + /* * Local variables: * mode: C diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c index d78e3b18ce..fdc102c1a5 100644 --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -11,6 +11,7 @@ #include <xen/guest_access.h> #include <public/domctl.h> #include <xen/sched.h> +#include <xen/domain.h> #include <xen/event.h> #include <xen/domain_page.h> #include <asm/msr.h> @@ -23,12 +24,21 @@ #include <asm/hvm/support.h> #include <asm/processor.h> #include <public/hvm/e820.h> +#ifdef CONFIG_COMPAT +#include <compat/xen.h> +#endif -long arch_do_domctl( +#ifndef COMPAT +#define _long long +#define copy_from_xxx_offset copy_from_guest_offset +#define copy_to_xxx_offset copy_to_guest_offset +#endif + +_long arch_do_domctl( struct xen_domctl *domctl, XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) { - long ret = 0; + _long ret = 0; switch ( domctl->cmd ) { @@ -40,7 +50,9 @@ long arch_do_domctl( d = find_domain_by_id(domctl->domain); if ( d != NULL ) { - ret = shadow_domctl(d, &domctl->u.shadow_op, u_domctl); + ret = shadow_domctl(d, + &domctl->u.shadow_op, + guest_handle_cast(u_domctl, void)); put_domain(d); copy_to_guest(u_domctl, domctl, 1); } @@ -123,12 +135,12 @@ long arch_do_domctl( case XEN_DOMCTL_getpageframeinfo2: { -#define GPF2_BATCH (PAGE_SIZE / sizeof(long)) +#define GPF2_BATCH (PAGE_SIZE / sizeof(_long)) int n,j; int num = domctl->u.getpageframeinfo2.num; domid_t dom = domctl->domain; struct domain *d; - unsigned long *l_arr; + unsigned _long *l_arr; ret = -ESRCH; if ( unlikely((d = find_domain_by_id(dom)) == NULL) ) @@ -148,9 +160,9 @@ long arch_do_domctl( { int k = ((num-n)>GPF2_BATCH)?GPF2_BATCH:(num-n); - if ( copy_from_guest_offset(l_arr, - domctl->u.getpageframeinfo2.array, - n, k) ) + if ( copy_from_xxx_offset(l_arr, + domctl->u.getpageframeinfo2.array, + n, k) ) { ret = -EINVAL; break; @@ -159,13 +171,13 @@ long arch_do_domctl( for ( j = 0; j < k; j++ ) { struct page_info *page; - unsigned long mfn = l_arr[j]; + unsigned _long mfn = l_arr[j]; page = mfn_to_page(mfn); if ( likely(mfn_valid(mfn) && get_page(page, d)) ) { - unsigned long type = 0; + unsigned _long type = 0; switch( page->u.inuse.type_info & PGT_type_mask ) { @@ -193,8 +205,8 @@ long arch_do_domctl( } - if ( copy_to_guest_offset(domctl->u.getpageframeinfo2.array, - n, l_arr, k) ) + if ( copy_to_xxx_offset(domctl->u.getpageframeinfo2.array, + n, l_arr, k) ) { ret = -EINVAL; break; @@ -214,7 +226,7 @@ long arch_do_domctl( int i; struct domain *d = find_domain_by_id(domctl->domain); unsigned long max_pfns = domctl->u.getmemlist.max_pfns; - unsigned long mfn; + xen_pfn_t mfn; struct list_head *list_ent; ret = -EINVAL; @@ -229,8 +241,8 @@ long arch_do_domctl( { mfn = page_to_mfn(list_entry( list_ent, struct page_info, list)); - if ( copy_to_guest_offset(domctl->u.getmemlist.buffer, - i, &mfn, 1) ) + if ( copy_to_xxx_offset(domctl->u.getmemlist.buffer, + i, &mfn, 1) ) { ret = -EFAULT; break; @@ -289,32 +301,71 @@ long arch_do_domctl( return ret; } -void arch_getdomaininfo_ctxt( - struct vcpu *v, struct vcpu_guest_context *c) +#ifndef COMPAT +void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c) { - memcpy(c, &v->arch.guest_context, sizeof(*c)); +#ifdef CONFIG_COMPAT +#define c(fld) (!IS_COMPAT(v->domain) ? (c.nat->fld) : (c.cmp->fld)) +#else +#define c(fld) (c.nat->fld) +#endif + unsigned long flags; + + if ( !IS_COMPAT(v->domain) ) + memcpy(c.nat, &v->arch.guest_context, sizeof(*c.nat)); +#ifdef CONFIG_COMPAT + else + { + XLAT_vcpu_guest_context(c.cmp, &v->arch.guest_context); + } +#endif if ( is_hvm_vcpu(v) ) { - hvm_store_cpu_guest_regs(v, &c->user_regs, c->ctrlreg); + if ( !IS_COMPAT(v->domain) ) + hvm_store_cpu_guest_regs(v, &c.nat->user_regs, c.nat->ctrlreg); +#ifdef CONFIG_COMPAT + else + { + struct cpu_user_regs user_regs; + typeof(c.nat->ctrlreg) ctrlreg; + unsigned i; + + hvm_store_cpu_guest_regs(v, &user_regs, ctrlreg); + XLAT_cpu_user_regs(&c.cmp->user_regs, &user_regs); + for ( i = 0; i < ARRAY_SIZE(c.cmp->ctrlreg); ++i ) + c.cmp->ctrlreg[i] = ctrlreg[i]; + } +#endif } else { /* IOPL privileges are virtualised: merge back into returned eflags. */ - BUG_ON((c->user_regs.eflags & EF_IOPL) != 0); - c->user_regs.eflags |= v->arch.iopl << 12; + BUG_ON((c(user_regs.eflags) & EF_IOPL) != 0); + c(user_regs.eflags |= v->arch.iopl << 12); } - c->flags = 0; + flags = 0; if ( test_bit(_VCPUF_fpu_initialised, &v->vcpu_flags) ) - c->flags |= VGCF_i387_valid; + flags |= VGCF_i387_valid; if ( guest_kernel_mode(v, &v->arch.guest_context.user_regs) ) - c->flags |= VGCF_in_kernel; + flags |= VGCF_in_kernel; + c(flags = flags); - c->ctrlreg[3] = xen_pfn_to_cr3(pagetable_get_pfn(v->arch.guest_table)); + if ( !IS_COMPAT(v->domain) ) + c.nat->ctrlreg[3] = xen_pfn_to_cr3(pagetable_get_pfn(v->arch.guest_table)); +#ifdef CONFIG_COMPAT + else + { + l4_pgentry_t *l4e = __va(pagetable_get_paddr(v->arch.guest_table)); + c.cmp->ctrlreg[3] = compat_pfn_to_cr3(l4e_get_pfn(*l4e)); + } +#endif - c->vm_assist = v->domain->vm_assist; + c(vm_assist = v->domain->vm_assist); +#undef c } +#endif /* * Local variables: diff --git a/xen/arch/x86/e820.c b/xen/arch/x86/e820.c index b12eb68c87..95daf54302 100644 --- a/xen/arch/x86/e820.c +++ b/xen/arch/x86/e820.c @@ -1,6 +1,7 @@ #include <xen/config.h> #include <xen/init.h> #include <xen/lib.h> +#include <xen/compat.h> #include <asm/e820.h> #include <asm/page.h> @@ -341,6 +342,39 @@ static void __init clip_4gb(void) #define clip_4gb() ((void)0) #endif +#ifdef CONFIG_COMPAT +static void __init clip_compat(void) +{ + unsigned long long limit; + unsigned int i; + + if ( compat_disabled ) + return; + /* 32-bit guests restricted to 166 GB (with current memory allocator). */ + limit = (unsigned long long)(MACH2PHYS_COMPAT_VIRT_END - + __HYPERVISOR_COMPAT_VIRT_START) << 10; + for ( i = 0; i < e820.nr_map; i++ ) + { + if ( (e820.map[i].addr + e820.map[i].size) <= limit ) + continue; + printk("WARNING: Only the first %Lu GB of the physical memory map " + "can be accessed\n" + " by compatibility mode guests. " + "Truncating the memory map...\n", + limit >> 30); + if ( e820.map[i].addr >= limit ) + e820.nr_map = i; + else + { + e820.map[i].size = limit - e820.map[i].addr; + e820.nr_map = i + 1; + } + } +} +#else +#define clip_compat() ((void)0) +#endif + static void __init clip_mem(void) { int i; @@ -374,6 +408,7 @@ static void __init machine_specific_memory_setup( *raw_nr = nr; (void)copy_e820_map(raw, nr); clip_4gb(); + clip_compat(); clip_mem(); } diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile index a4aef978af..9fbcf51ede 100644 --- a/xen/arch/x86/hvm/Makefile +++ b/xen/arch/x86/hvm/Makefile @@ -10,6 +10,8 @@ obj-y += irq.o obj-y += platform.o obj-y += pmtimer.o obj-y += rtc.o +obj-y += hpet.o +obj-y += vpt.o obj-y += vioapic.o obj-y += vlapic.o obj-y += vpic.o diff --git a/xen/arch/x86/hvm/hpet.c b/xen/arch/x86/hvm/hpet.c new file mode 100644 index 0000000000..e835382eac --- /dev/null +++ b/xen/arch/x86/hvm/hpet.c @@ -0,0 +1,409 @@ +/* + * hpet.c: HPET emulation for HVM guests. + * Copyright (c) 2006, Intel Corporation. + * Copyright (c) 2006, Keir Fraser <keir@xensource.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include <asm/hvm/vpt.h> +#include <asm/hvm/io.h> +#include <asm/hvm/support.h> +#include <asm/current.h> +#include <xen/sched.h> +#include <xen/event.h> + +#define HPET_BASE_ADDRESS 0xfed00000ULL +#define HPET_MMAP_SIZE 1024 +#define S_TO_NS 1000000000ULL /* 1s = 10^9 ns */ +#define S_TO_FS 1000000000000000ULL /* 1s = 10^15 fs */ + +#define HPET_ID 0x000 +#define HPET_PERIOD 0x004 +#define HPET_CFG 0x010 +#define HPET_STATUS 0x020 +#define HPET_COUNTER 0x0f0 +#define HPET_T0_CFG 0x100 +#define HPET_T0_CMP 0x108 +#define HPET_T0_ROUTE 0x110 +#define HPET_T1_CFG 0x120 +#define HPET_T1_CMP 0x128 +#define HPET_T1_ROUTE 0x130 +#define HPET_T2_CFG 0x140 +#define HPET_T2_CMP 0x148 +#define HPET_T2_ROUTE 0x150 +#define HPET_T3_CFG 0x160 + +#define HPET_CFG_ENABLE 0x001 +#define HPET_CFG_LEGACY 0x002 + +#define HPET_TN_INT_TYPE_LEVEL 0x002 +#define HPET_TN_ENABLE 0x004 +#define HPET_TN_PERIODIC 0x008 +#define HPET_TN_PERIODIC_CAP 0x010 +#define HPET_TN_SIZE_CAP 0x020 +#define HPET_TN_SETVAL 0x040 +#define HPET_TN_32BIT 0x100 +#define HPET_TN_INT_ROUTE_MASK 0x3e00 +#define HPET_TN_INT_ROUTE_SHIFT 9 +#define HPET_TN_INT_ROUTE_CAP_SHIFT 32 +#define HPET_TN_CFG_BITS_READONLY_OR_RESERVED 0xffff80b1U + +/* can be routed to IOAPIC.redirect_table[23..20] */ +#define HPET_TN_INT_ROUTE_CAP (0x00f00000ULL \ + << HPET_TN_INT_ROUTE_CAP_SHIFT) + +#define HPET_TN_INT_ROUTE_CAP_MASK (0xffffffffULL \ + << HPET_TN_INT_ROUTE_CAP_SHIFT) + +#define hpet_tick_to_ns(h, tick) ((s_time_t)(tick)*S_TO_NS/h->tsc_freq) +#define timer_config(h, n) (h->hpet.timers[n].config) +#define timer_enabled(h, n) (timer_config(h, n) & HPET_TN_ENABLE) +#define timer_is_periodic(h, n) (timer_config(h, n) & HPET_TN_PERIODIC) +#define timer_is_32bit(h, n) (timer_config(h, n) & HPET_TN_32BIT) +#define hpet_enabled(h) (h->hpet.config & HPET_CFG_ENABLE) +#define timer_level(h, n) (timer_config(h, n) & HPET_TN_INT_TYPE_LEVEL) + +#define timer_int_route(h, n) \ + ((timer_config(h, n) & HPET_TN_INT_ROUTE_MASK) >> HPET_TN_INT_ROUTE_SHIFT) + +#define timer_int_route_cap(h, n) \ + ((timer_config(h, n) & HPET_TN_INT_ROUTE_CAP_MASK) \ + >> HPET_TN_INT_ROUTE_CAP_SHIFT) + +#define hpet_time_after(a, b) ((int32_t)(b) - (int32_t)(a) < 0) +#define hpet_time_after64(a, b) ((int64_t)(b) - (int64_t)(a) < 0) + +static inline uint64_t hpet_read64(HPETState *h, unsigned long addr) +{ + uint64_t *p = (uint64_t *)(((unsigned long)&h->hpet) + addr); + return (addr >= HPET_T3_CFG) ? 0 : *p; +} + +static inline int hpet_check_access_length( + unsigned long addr, unsigned long len) +{ + if ( (addr & (len - 1)) || (len > 8) ) + { + gdprintk(XENLOG_ERR, "HPET: access across register boundary: " + "%lx %lx\n", addr, len); + domain_crash(current->domain); + return -EINVAL; + } + + return 0; +} + +static inline uint64_t hpet_read_maincounter(HPETState *h) +{ + if ( hpet_enabled(h) ) + return hvm_get_guest_time(h->vcpu) + h->mc_offset; + else + return h->hpet.mc64; +} + +static unsigned long hpet_read( + struct vcpu *v, unsigned long addr, unsigned long length) +{ + HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet; + unsigned long result; + uint64_t val; + + addr &= HPET_MMAP_SIZE-1; + + if ( hpet_check_access_length(addr, length) != 0 ) + return ~0UL; + + val = hpet_read64(h, addr & ~7); + if ( (addr & ~7) == HPET_COUNTER ) + val = hpet_read_maincounter(h); + + result = val; + if ( length != 8 ) + result = (val >> ((addr & 7) * 8)) & ((1UL << (length * 8)) - 1); + + return result; +} + +static void hpet_stop_timer(HPETState *h, unsigned int tn) +{ + ASSERT(tn < HPET_TIMER_NUM); + stop_timer(&h->timers[tn]); +} + +static void hpet_set_timer(HPETState *h, unsigned int tn) +{ + uint64_t tn_cmp, cur_tick; + + ASSERT(tn < HPET_TIMER_NUM); + + if ( !hpet_enabled(h) || !timer_enabled(h, tn) ) + return; + + if ( (tn == 0) && (h->hpet.config & HPET_CFG_LEGACY) ) + { + /* HPET specification requires PIT shouldn't generate + * interrupts if LegacyReplacementRoute is set for timer0 */ + PITState *pit = &h->vcpu->domain->arch.hvm_domain.pl_time.vpit; + pit_stop_channel0_irq(pit); + } + + tn_cmp = h->hpet.timers[tn].cmp; + cur_tick = hpet_read_maincounter(h); + if ( timer_is_32bit(h, tn) ) + { + tn_cmp = (uint32_t)tn_cmp; + cur_tick = (uint32_t)cur_tick; + } + + if ( (int64_t)(tn_cmp - cur_tick) > 0 ) + set_timer(&h->timers[tn], NOW() + + hpet_tick_to_ns(h, tn_cmp-cur_tick)); + else + set_timer(&h->timers[tn], NOW()); +} + +static inline uint64_t hpet_fixup_reg( + uint64_t new, uint64_t old, uint64_t mask) +{ + new &= mask; + new |= old & ~mask; + return new; +} + +static void hpet_write( + struct vcpu *v, unsigned long addr, + unsigned long length, unsigned long val) +{ + HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet; + uint64_t old_val, new_val; + int tn, i; + + addr &= HPET_MMAP_SIZE-1; + + if ( hpet_check_access_length(addr, length) != 0 ) + return; + + old_val = hpet_read64(h, addr & ~7); + if ( (addr & ~7) == HPET_COUNTER ) + old_val = hpet_read_maincounter(h); + + new_val = val; + if ( length != 8 ) + new_val = hpet_fixup_reg( + new_val << (addr & 7) * 8, old_val, + ((1ULL << (length*8)) - 1) << ((addr & 7) * 8)); + + switch ( addr & ~7 ) + { + case HPET_CFG: + h->hpet.config = hpet_fixup_reg(new_val, old_val, 0x3); + + if ( !(old_val & HPET_CFG_ENABLE) && (new_val & HPET_CFG_ENABLE) ) + { + /* Enable main counter and interrupt generation. */ + h->mc_offset = h->hpet.mc64 - hvm_get_guest_time(h->vcpu); + for ( i = 0; i < HPET_TIMER_NUM; i++ ) + hpet_set_timer(h, i); + } + else if ( (old_val & HPET_CFG_ENABLE) && !(new_val & HPET_CFG_ENABLE) ) + { + /* Halt main counter and disable interrupt generation. */ + h->hpet.mc64 = h->mc_offset + hvm_get_guest_time(h->vcpu); + for ( i = 0; i < HPET_TIMER_NUM; i++ ) + hpet_stop_timer(h, i); + } + break; + + case HPET_COUNTER: + if ( hpet_enabled(h) ) + gdprintk(XENLOG_WARNING, + "HPET: writing main counter but it's not halted!\n"); + h->hpet.mc64 = new_val; + break; + + case HPET_T0_CFG: + case HPET_T1_CFG: + case HPET_T2_CFG: + tn = (addr - HPET_T0_CFG) >> 5; + + h->hpet.timers[tn].config = hpet_fixup_reg(new_val, old_val, 0x3f4e); + + if ( timer_level(h, tn) ) + { + gdprintk(XENLOG_ERR, + "HPET: level triggered interrupt not supported now\n"); + domain_crash(current->domain); + break; + } + + if ( new_val & HPET_TN_32BIT ) + h->hpet.timers[tn].cmp = (uint32_t)h->hpet.timers[tn].cmp; + + if ( !(old_val & HPET_TN_ENABLE) && (new_val & HPET_TN_ENABLE) ) + hpet_set_timer(h, tn); + else if ( (old_val & HPET_TN_ENABLE) && !(new_val & HPET_TN_ENABLE) ) + hpet_stop_timer(h, tn); + break; + + case HPET_T0_CMP: + case HPET_T1_CMP: + case HPET_T2_CMP: + tn = (addr - HPET_T0_CMP) >> 5; + if ( timer_is_32bit(h, tn) ) + new_val = (uint32_t)new_val; + if ( !timer_is_periodic(h, tn) || + (h->hpet.timers[tn].config & HPET_TN_SETVAL) ) + h->hpet.timers[tn].cmp = new_val; + else + h->period[tn] = new_val; + h->hpet.timers[tn].config &= ~HPET_TN_SETVAL; + if ( hpet_enabled(h) && timer_enabled(h, tn) ) + hpet_set_timer(h, tn); + break; + + case HPET_T0_ROUTE: + case HPET_T1_ROUTE: + case HPET_T2_ROUTE: + tn = (addr - HPET_T0_ROUTE) >> 5; + h->hpet.timers[tn].hpet_fsb[0] = new_val; + break; + + default: + /* Ignore writes to unsupported and reserved registers. */ + break; + } +} + +static int hpet_range(struct vcpu *v, unsigned long addr) +{ + return ((addr >= HPET_BASE_ADDRESS) && + (addr < (HPET_BASE_ADDRESS + HPET_MMAP_SIZE))); +} + +struct hvm_mmio_handler hpet_mmio_handler = { + .check_handler = hpet_range, + .read_handler = hpet_read, + .write_handler = hpet_write +}; + +static void hpet_route_interrupt(HPETState *h, unsigned int tn) +{ + unsigned int tn_int_route = timer_int_route(h, tn); + struct domain *d = h->vcpu->domain; + struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; + + if ( (tn <= 1) && (h->hpet.config & HPET_CFG_LEGACY) ) + { + /* if LegacyReplacementRoute bit is set, HPET specification requires + timer0 be routed to IRQ0 in NON-APIC or IRQ2 in the I/O APIC, + timer1 be routed to IRQ8 in NON-APIC or IRQ8 in the I/O APIC. */ + int isa_irq = (tn == 0) ? 0 : 8; + hvm_isa_irq_deassert(d, isa_irq); + hvm_isa_irq_assert(d, isa_irq); + return; + } + + if ( !(timer_int_route_cap(h, tn) & (1U << tn_int_route)) ) + { + gdprintk(XENLOG_ERR, + "HPET: timer%u: invalid interrupt route config\n", tn); + domain_crash(d); + return; + } + + /* We only support edge-triggered interrupt now */ + spin_lock(&hvm_irq->lock); + vioapic_irq_positive_edge(d, tn_int_route); + spin_unlock(&hvm_irq->lock); +} + +static void hpet_timer_fn(void *opaque) +{ + struct HPET_timer_fn_info *htfi = opaque; + HPETState *h = htfi->hs; + unsigned int tn = htfi->tn; + + if ( !hpet_enabled(h) || !timer_enabled(h, tn) ) + return; + + hpet_route_interrupt(h, tn); + + if ( timer_is_periodic(h, tn) && (h->period[tn] != 0) ) + { + uint64_t mc = hpet_read_maincounter(h); + if ( timer_is_32bit(h, tn) ) + { + while ( hpet_time_after(mc, h->hpet.timers[tn].cmp) ) + h->hpet.timers[tn].cmp = (uint32_t)( + h->hpet.timers[tn].cmp + h->period[tn]); + } + else + { + while ( hpet_time_after64(mc, h->hpet.timers[tn].cmp) ) + h->hpet.timers[tn].cmp += h->period[tn]; + } + set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, h->period[tn])); + } + + vcpu_kick(h->vcpu); +} + +void hpet_migrate_timers(struct vcpu *v) +{ + struct HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet; + int i; + + for ( i = 0; i < HPET_TIMER_NUM; i++ ) + migrate_timer(&h->timers[i], v->processor); +} + +void hpet_init(struct vcpu *v) +{ + HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet; + int i; + + memset(h, 0, sizeof(HPETState)); + + h->vcpu = v; + h->tsc_freq = ticks_per_sec(v); + + /* 64-bit main counter; 3 timers supported; LegacyReplacementRoute. */ + h->hpet.capability = 0x8086A201ULL; + + /* This is the number of femptoseconds per HPET tick. */ + /* Here we define HPET's frequency to be the same as the TSC's. */ + h->hpet.capability |= ((S_TO_FS/h->tsc_freq) << 32); + + for ( i = 0; i < HPET_TIMER_NUM; i++ ) + { + h->hpet.timers[i].config = + HPET_TN_INT_ROUTE_CAP | HPET_TN_SIZE_CAP | HPET_TN_PERIODIC_CAP; + h->hpet.timers[i].cmp = ~0ULL; + h->timer_fn_info[i].hs = h; + h->timer_fn_info[i].tn = i; + init_timer(&h->timers[i], hpet_timer_fn, &h->timer_fn_info[i], + v->processor); + } +} + +void hpet_deinit(struct domain *d) +{ + int i; + HPETState *h = &d->arch.hvm_domain.pl_time.vhpet; + + for ( i = 0; i < HPET_TIMER_NUM; i++ ) + kill_timer(&h->timers[i]); +} + diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index a4c66ff91a..8d8de4c513 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -82,56 +82,23 @@ u64 hvm_get_guest_time(struct vcpu *v) return host_tsc + v->arch.hvm_vcpu.cache_tsc_offset; } -void hvm_freeze_time(struct vcpu *v) -{ - struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm; - - if ( pt->enabled && pt->first_injected - && (v->vcpu_id == pt->bind_vcpu) - && !v->arch.hvm_vcpu.guest_time ) { - v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v); - if ( !test_bit(_VCPUF_blocked, &v->vcpu_flags) ) - { - stop_timer(&pt->timer); - rtc_freeze(v); - } - } -} - void hvm_migrate_timers(struct vcpu *v) { - struct periodic_time *pt = &v->domain->arch.hvm_domain.pl_time.periodic_tm; - struct PMTState *vpmt = &v->domain->arch.hvm_domain.pl_time.vpmt; - - if ( pt->enabled ) - { - migrate_timer(&pt->timer, v->processor); - } - migrate_timer(&vcpu_vlapic(v)->vlapic_timer, v->processor); - migrate_timer(&vpmt->timer, v->processor); + pit_migrate_timers(v); rtc_migrate_timers(v); + hpet_migrate_timers(v); + pmtimer_migrate_timers(v); + if ( vcpu_vlapic(v)->pt.enabled ) + migrate_timer(&vcpu_vlapic(v)->pt.timer, v->processor); } void hvm_do_resume(struct vcpu *v) { ioreq_t *p; - struct periodic_time *pt = &v->domain->arch.hvm_domain.pl_time.periodic_tm; hvm_stts(v); - /* Pick up the elapsed PIT ticks and re-enable pit_timer. */ - if ( pt->enabled && (v->vcpu_id == pt->bind_vcpu) && pt->first_injected ) - { - if ( v->arch.hvm_vcpu.guest_time ) - { - hvm_set_guest_time(v, v->arch.hvm_vcpu.guest_time); - v->arch.hvm_vcpu.guest_time = 0; - } - pickup_deactive_ticks(pt); - } - - /* Re-enable the RTC timer if needed */ - rtc_thaw(v); + pt_thaw_time(v); /* NB. Optimised for common case (p->state == STATE_IOREQ_NONE). */ p = &get_vio(v->domain, v->vcpu_id)->vp_ioreq; @@ -182,9 +149,10 @@ int hvm_domain_initialise(struct domain *d) void hvm_domain_destroy(struct domain *d) { - kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer); + pit_deinit(d); rtc_deinit(d); pmtimer_deinit(d); + hpet_deinit(d); if ( d->arch.hvm_domain.shared_page_va ) unmap_domain_page_global( @@ -196,7 +164,6 @@ void hvm_domain_destroy(struct domain *d) int hvm_vcpu_initialise(struct vcpu *v) { - struct hvm_domain *platform; int rc; if ( (rc = vlapic_init(v)) != 0 ) @@ -214,16 +181,14 @@ int hvm_vcpu_initialise(struct vcpu *v) get_vio(v->domain, v->vcpu_id)->vp_eport = v->arch.hvm_vcpu.xen_port; + INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list); + if ( v->vcpu_id != 0 ) return 0; - /* XXX Below should happen in hvm_domain_initialise(). */ - platform = &v->domain->arch.hvm_domain; - - init_timer(&platform->pl_time.periodic_tm.timer, - pt_timer_fn, v, v->processor); rtc_init(v, RTC_PORT(0), RTC_IRQ); pmtimer_init(v, ACPI_PM_TMR_BLK_ADDRESS); + hpet_init(v); /* Init guest TSC to start from zero. */ hvm_set_guest_time(v, 0); @@ -240,20 +205,6 @@ void hvm_vcpu_destroy(struct vcpu *v) /*free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port);*/ } -int cpu_get_interrupt(struct vcpu *v, int *type) -{ - int vector; - - if ( (vector = cpu_get_apic_interrupt(v, type)) != -1 ) - return vector; - - if ( (v->vcpu_id == 0) && - ((vector = cpu_get_pic_interrupt(v, type)) != -1) ) - return vector; - - return -1; -} - static void hvm_vcpu_down(void) { struct vcpu *v = current; @@ -318,6 +269,14 @@ void hvm_hlt(unsigned long rflags) do_sched_op_compat(SCHEDOP_block, 0); } +void hvm_triple_fault(void) +{ + struct vcpu *v = current; + gdprintk(XENLOG_INFO, "Triple fault on VCPU%d - " + "invoking HVM system reset.\n", v->vcpu_id); + domain_shutdown(v->domain, SHUTDOWN_reboot); +} + /* * __hvm_copy(): * @buf = hypervisor buffer @@ -402,6 +361,46 @@ void hvm_print_line(struct vcpu *v, const char c) spin_unlock(&hd->pbuf_lock); } +void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + if ( !cpuid_hypervisor_leaves(input, eax, ebx, ecx, edx) ) + { + cpuid(input, eax, ebx, ecx, edx); + + if ( input == 0x00000001 ) + { + struct vcpu *v = current; + + clear_bit(X86_FEATURE_MWAIT & 31, ecx); + + if ( vlapic_hw_disabled(vcpu_vlapic(v)) ) + clear_bit(X86_FEATURE_APIC & 31, edx); + +#if CONFIG_PAGING_LEVELS >= 3 + if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] ) +#endif + clear_bit(X86_FEATURE_PAE & 31, edx); + clear_bit(X86_FEATURE_PSE36 & 31, edx); + } + else if ( input == 0x80000001 ) + { +#if CONFIG_PAGING_LEVELS >= 3 + struct vcpu *v = current; + if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] ) +#endif + clear_bit(X86_FEATURE_NX & 31, edx); +#ifdef __i386__ + /* Mask feature for Intel ia32e or AMD long mode. */ + clear_bit(X86_FEATURE_LAHF_LM & 31, ecx); + + clear_bit(X86_FEATURE_LM & 31, edx); + clear_bit(X86_FEATURE_SYSCALL & 31, edx); +#endif + } + } +} + typedef unsigned long hvm_hypercall_t( unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); diff --git a/xen/arch/x86/hvm/i8254.c b/xen/arch/x86/hvm/i8254.c index faee87564b..a81ae57722 100644 --- a/xen/arch/x86/hvm/i8254.c +++ b/xen/arch/x86/hvm/i8254.c @@ -81,7 +81,7 @@ static int pit_get_count(PITChannelState *s) uint64_t d; int counter; - d = muldiv64(hvm_get_clock(s->vcpu) - s->count_load_time, PIT_FREQ, ticks_per_sec(s->vcpu)); + d = muldiv64(hvm_get_guest_time(s->pt.vcpu) - s->count_load_time, PIT_FREQ, ticks_per_sec(s->pt.vcpu)); switch(s->mode) { case 0: case 1: @@ -106,7 +106,7 @@ static int pit_get_out1(PITChannelState *s, int64_t current_time) uint64_t d; int out; - d = muldiv64(current_time - s->count_load_time, PIT_FREQ, ticks_per_sec(s->vcpu)); + d = muldiv64(current_time - s->count_load_time, PIT_FREQ, ticks_per_sec(s->pt.vcpu)); switch(s->mode) { default: case 0: @@ -153,7 +153,7 @@ void pit_set_gate(PITState *pit, int channel, int val) case 5: if (s->gate < val) { /* restart counting on rising edge */ - s->count_load_time = hvm_get_clock(s->vcpu); + s->count_load_time = hvm_get_guest_time(s->pt.vcpu); // pit_irq_timer_update(s, s->count_load_time); } break; @@ -161,7 +161,7 @@ void pit_set_gate(PITState *pit, int channel, int val) case 3: if (s->gate < val) { /* restart counting on rising edge */ - s->count_load_time = hvm_get_clock(s->vcpu); + s->count_load_time = hvm_get_guest_time(s->pt.vcpu); // pit_irq_timer_update(s, s->count_load_time); } /* XXX: disable/enable counting */ @@ -179,7 +179,7 @@ int pit_get_gate(PITState *pit, int channel) void pit_time_fired(struct vcpu *v, void *priv) { PITChannelState *s = priv; - s->count_load_time = hvm_get_clock(v); + s->count_load_time = hvm_get_guest_time(v); } static inline void pit_load_count(PITChannelState *s, int val) @@ -190,7 +190,7 @@ static inline void pit_load_count(PITChannelState *s, int val) if (val == 0) val = 0x10000; - s->count_load_time = hvm_get_clock(s->vcpu); + s->count_load_time = hvm_get_guest_time(s->pt.vcpu); s->count = val; period = DIV_ROUND((val * 1000000000ULL), PIT_FREQ); @@ -209,16 +209,17 @@ static inline void pit_load_count(PITChannelState *s, int val) switch (s->mode) { case 2: /* create periodic time */ - s->pt = create_periodic_time (period, 0, 0, pit_time_fired, s); + create_periodic_time(&s->pt, period, 0, 0, pit_time_fired, s); break; case 1: /* create one shot time */ - s->pt = create_periodic_time (period, 0, 1, pit_time_fired, s); + create_periodic_time(&s->pt, period, 0, 1, pit_time_fired, s); #ifdef DEBUG_PIT printk("HVM_PIT: create one shot time.\n"); #endif break; default: + destroy_periodic_time(&s->pt); break; } } @@ -253,7 +254,7 @@ static void pit_ioport_write(void *opaque, uint32_t addr, uint32_t val) if (!(val & 0x10) && !s->status_latched) { /* status latch */ /* XXX: add BCD and null count */ - s->status = (pit_get_out1(s, hvm_get_clock(s->vcpu)) << 7) | + s->status = (pit_get_out1(s, hvm_get_guest_time(s->pt.vcpu)) << 7) | (s->rw_mode << 4) | (s->mode << 1) | s->bcd; @@ -351,6 +352,12 @@ static uint32_t pit_ioport_read(void *opaque, uint32_t addr) return ret; } +void pit_stop_channel0_irq(PITState * pit) +{ + PITChannelState *s = &pit->channels[0]; + destroy_periodic_time(&s->pt); +} + static void pit_reset(void *opaque) { PITState *pit = opaque; @@ -359,10 +366,7 @@ static void pit_reset(void *opaque) for(i = 0;i < 3; i++) { s = &pit->channels[i]; - if ( s -> pt ) { - destroy_periodic_time (s->pt); - s->pt = NULL; - } + destroy_periodic_time(&s->pt); s->mode = 0xff; /* the init mode */ s->gate = (i != 2); pit_load_count(s, 0); @@ -375,10 +379,11 @@ void pit_init(struct vcpu *v, unsigned long cpu_khz) PITChannelState *s; s = &pit->channels[0]; + s->pt.vcpu = v; /* the timer 0 is connected to an IRQ */ - s->vcpu = v; - s++; s->vcpu = v; - s++; s->vcpu = v; + init_timer(&s->pt.timer, pt_timer_fn, &s->pt, v->processor); + s++; s->pt.vcpu = v; + s++; s->pt.vcpu = v; register_portio_handler(v->domain, PIT_BASE, 4, handle_pit_io); /* register the speaker port */ @@ -391,6 +396,25 @@ void pit_init(struct vcpu *v, unsigned long cpu_khz) return; } +void pit_migrate_timers(struct vcpu *v) +{ + PITState *pit = &v->domain->arch.hvm_domain.pl_time.vpit; + PITChannelState *s; + + s = &pit->channels[0]; + if ( s->pt.vcpu == v && s->pt.enabled ) + migrate_timer(&s->pt.timer, v->processor); +} + +void pit_deinit(struct domain *d) +{ + PITState *pit = &d->arch.hvm_domain.pl_time.vpit; + PITChannelState *s; + + s = &pit->channels[0]; + kill_timer(&s->pt.timer); +} + /* the intercept action for PIT DM retval:0--not handled; 1--handled */ static int handle_pit_io(ioreq_t *p) { @@ -426,7 +450,8 @@ static void speaker_ioport_write(void *opaque, uint32_t addr, uint32_t val) static uint32_t speaker_ioport_read(void *opaque, uint32_t addr) { PITState *pit = opaque; - int out = pit_get_out(pit, 2, hvm_get_clock(pit->channels[2].vcpu)); + int out = pit_get_out(pit, 2, + hvm_get_guest_time(pit->channels[2].pt.vcpu)); /* Refresh clock toggles at about 15us. We approximate as 2^14ns. */ unsigned int refresh_clock = ((unsigned int)NOW() >> 14) & 1; return ((pit->speaker_data_on << 1) | pit_get_gate(pit, 2) | diff --git a/xen/arch/x86/hvm/instrlen.c b/xen/arch/x86/hvm/instrlen.c index 85ee70c9de..0255d1f7c9 100644 --- a/xen/arch/x86/hvm/instrlen.c +++ b/xen/arch/x86/hvm/instrlen.c @@ -201,7 +201,7 @@ static uint8_t twobyte_table[256] = { if ( inst_copy_from_guest(&_x, pc, 1) != 1 ) { \ gdprintk(XENLOG_WARNING, \ "Cannot read from address %lx (eip %lx, mode %d)\n", \ - pc, org_pc, mode); \ + pc, org_pc, address_bytes); \ return -1; \ } \ pc += 1; \ @@ -218,30 +218,20 @@ static uint8_t twobyte_table[256] = { * EXTERNAL this routine calculates the length of the current instruction * pointed to by org_pc. The guest state is _not_ changed by this routine. */ -int hvm_instruction_length(unsigned long org_pc, int mode) +int hvm_instruction_length(unsigned long org_pc, int address_bytes) { uint8_t b, d, twobyte = 0, rex_prefix = 0, modrm_reg = 0; unsigned int op_default, op_bytes, ad_default, ad_bytes, tmp; int length = 0; unsigned long pc = org_pc; - switch ( mode ) + op_bytes = op_default = ad_bytes = ad_default = address_bytes; + if ( op_bytes == 8 ) { - case X86EMUL_MODE_REAL: - case X86EMUL_MODE_PROT16: - op_bytes = op_default = ad_bytes = ad_default = 2; - break; - case X86EMUL_MODE_PROT32: - op_bytes = op_default = ad_bytes = ad_default = 4; - break; -#ifdef __x86_64__ - case X86EMUL_MODE_PROT64: op_bytes = op_default = 4; - ad_bytes = ad_default = 8; - break; -#endif - default: +#ifndef __x86_64__ return -1; +#endif } /* Legacy prefixes. */ @@ -253,7 +243,7 @@ int hvm_instruction_length(unsigned long org_pc, int mode) op_bytes = op_default ^ 6; /* switch between 2/4 bytes */ break; case 0x67: /* address-size override */ - if ( mode == X86EMUL_MODE_PROT64 ) + if ( ad_default == 8 ) ad_bytes = ad_default ^ 12; /* switch between 4/8 bytes */ else ad_bytes = ad_default ^ 6; /* switch between 2/4 bytes */ @@ -270,7 +260,7 @@ int hvm_instruction_length(unsigned long org_pc, int mode) break; #ifdef __x86_64__ case 0x40 ... 0x4f: - if ( mode == X86EMUL_MODE_PROT64 ) + if ( ad_default == 8 ) { rex_prefix = b; continue; @@ -434,7 +424,7 @@ done: cannot_emulate: gdprintk(XENLOG_WARNING, - "Cannot emulate %02x at address %lx (%lx, mode %d)\n", - b, pc - 1, org_pc, mode); + "Cannot emulate %02x at address %lx (%lx, addr_bytes %d)\n", + b, pc - 1, org_pc, address_bytes); return -1; } diff --git a/xen/arch/x86/hvm/intercept.c b/xen/arch/x86/hvm/intercept.c index ea93a59f8e..3de58812c3 100644 --- a/xen/arch/x86/hvm/intercept.c +++ b/xen/arch/x86/hvm/intercept.c @@ -31,13 +31,15 @@ #include <xen/event.h> +extern struct hvm_mmio_handler hpet_mmio_handler; extern struct hvm_mmio_handler vlapic_mmio_handler; extern struct hvm_mmio_handler vioapic_mmio_handler; -#define HVM_MMIO_HANDLER_NR 2 +#define HVM_MMIO_HANDLER_NR 3 static struct hvm_mmio_handler *hvm_mmio_handlers[HVM_MMIO_HANDLER_NR] = { + &hpet_mmio_handler, &vlapic_mmio_handler, &vioapic_mmio_handler }; @@ -180,7 +182,7 @@ int hvm_buffered_io_intercept(ioreq_t *p) spin_lock(buffered_io_lock); if ( buffered_iopage->write_pointer - buffered_iopage->read_pointer == - (unsigned long)IOREQ_BUFFER_SLOT_NUM ) { + (unsigned int)IOREQ_BUFFER_SLOT_NUM ) { /* the queue is full. * send the iopacket through the normal path. * NOTE: The arithimetic operation could handle the situation for @@ -263,98 +265,6 @@ int register_io_handler( return 1; } - -static __inline__ void missed_ticks(struct periodic_time *pt) -{ - s_time_t missed_ticks; - - missed_ticks = NOW() - pt->scheduled; - if ( missed_ticks > 0 ) { - missed_ticks = missed_ticks / (s_time_t) pt->period + 1; - if ( missed_ticks > 1000 ) { - /* TODO: Adjust guest time togther */ - pt->pending_intr_nr++; - } - else { - pt->pending_intr_nr += missed_ticks; - } - pt->scheduled += missed_ticks * pt->period; - } -} - -/* hook function for the platform periodic time */ -void pt_timer_fn(void *data) -{ - struct vcpu *v = data; - struct periodic_time *pt = &v->domain->arch.hvm_domain.pl_time.periodic_tm; - - pt->pending_intr_nr++; - pt->scheduled += pt->period; - - /* Pick up missed timer ticks. */ - missed_ticks(pt); - - /* No need to run the timer while a VCPU is descheduled. */ - if ( test_bit(_VCPUF_running, &v->vcpu_flags) ) - set_timer(&pt->timer, pt->scheduled); - - vcpu_kick(v); -} - -/* pick up missed timer ticks at deactive time */ -void pickup_deactive_ticks(struct periodic_time *pt) -{ - if ( !active_timer(&(pt->timer)) ) { - missed_ticks(pt); - set_timer(&pt->timer, pt->scheduled); - } -} - -/* - * period: fire frequency in ns. - */ -struct periodic_time * create_periodic_time( - u32 period, - char irq, - char one_shot, - time_cb *cb, - void *data) -{ - struct periodic_time *pt = &(current->domain->arch.hvm_domain.pl_time.periodic_tm); - if ( pt->enabled ) { - stop_timer (&pt->timer); - pt->enabled = 0; - } - pt->bind_vcpu = 0; /* timer interrupt delivered to BSP by default */ - pt->pending_intr_nr = 0; - pt->first_injected = 0; - if (period < 900000) { /* < 0.9 ms */ - printk("HVM_PlatformTime: program too small period %u\n",period); - period = 900000; /* force to 0.9ms */ - } - pt->period = period; - pt->irq = irq; - pt->period_cycles = (u64)period * cpu_khz / 1000000L; - pt->one_shot = one_shot; - if ( one_shot ) { - printk("HVM_PL: No support for one shot platform time yet\n"); - } - pt->scheduled = NOW() + period; - set_timer (&pt->timer,pt->scheduled); - pt->enabled = 1; - pt->cb = cb; - pt->priv = data; - return pt; -} - -void destroy_periodic_time(struct periodic_time *pt) -{ - if ( pt->enabled ) { - stop_timer(&pt->timer); - pt->enabled = 0; - } -} - /* * Local variables: * mode: C diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c index 2006f09cb5..013e723f35 100644 --- a/xen/arch/x86/hvm/io.c +++ b/xen/arch/x86/hvm/io.c @@ -689,39 +689,6 @@ static void hvm_mmio_assist(struct cpu_user_regs *regs, ioreq_t *p, } } -void hvm_interrupt_post(struct vcpu *v, int vector, int type) -{ - struct periodic_time *pt = - &(v->domain->arch.hvm_domain.pl_time.periodic_tm); - - if ( pt->enabled && v->vcpu_id == pt->bind_vcpu - && is_periodic_irq(v, vector, type) ) { - if ( !pt->first_injected ) { - pt->pending_intr_nr = 0; - pt->last_plt_gtime = hvm_get_guest_time(v); - pt->scheduled = NOW() + pt->period; - set_timer(&pt->timer, pt->scheduled); - pt->first_injected = 1; - } else { - pt->pending_intr_nr--; - pt->last_plt_gtime += pt->period_cycles; - hvm_set_guest_time(v, pt->last_plt_gtime); - } - if (pt->cb) - pt->cb(v, pt->priv); - } - - switch(type) { - case APIC_DM_EXTINT: - break; - - default: - vlapic_post_injection(v, vector, type); - break; - } -} - - void hvm_io_assist(struct vcpu *v) { vcpu_iodata_t *vio; diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c index b65799b2aa..45d4f97042 100644 --- a/xen/arch/x86/hvm/irq.c +++ b/xen/arch/x86/hvm/irq.c @@ -85,15 +85,16 @@ void hvm_isa_irq_assert( struct domain *d, unsigned int isa_irq) { struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; + unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq); ASSERT(isa_irq <= 15); spin_lock(&hvm_irq->lock); if ( !__test_and_set_bit(isa_irq, &hvm_irq->isa_irq) && - (hvm_irq->gsi_assert_count[isa_irq]++ == 0) ) + (hvm_irq->gsi_assert_count[gsi]++ == 0) ) { - vioapic_irq_positive_edge(d, isa_irq); + vioapic_irq_positive_edge(d, gsi); vpic_irq_positive_edge(d, isa_irq); } @@ -104,13 +105,14 @@ void hvm_isa_irq_deassert( struct domain *d, unsigned int isa_irq) { struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; + unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq); ASSERT(isa_irq <= 15); spin_lock(&hvm_irq->lock); if ( __test_and_clear_bit(isa_irq, &hvm_irq->isa_irq) && - (--hvm_irq->gsi_assert_count[isa_irq] == 0) ) + (--hvm_irq->gsi_assert_count[gsi] == 0) ) vpic_irq_negative_edge(d, isa_irq); spin_unlock(&hvm_irq->lock); @@ -133,7 +135,8 @@ void hvm_set_callback_irq_level(void) if ( gsi == 0 ) goto out; - if ( local_events_need_delivery() ) + /* NB. Do not check the evtchn_upcall_mask. It is not used in HVM mode. */ + if ( vcpu_info(v, evtchn_upcall_pending) ) { if ( !__test_and_set_bit(0, &hvm_irq->callback_irq_wire) && (hvm_irq->gsi_assert_count[gsi]++ == 0) ) @@ -225,3 +228,55 @@ void hvm_set_callback_gsi(struct domain *d, unsigned int gsi) dprintk(XENLOG_G_INFO, "Dom%u callback GSI changed %u -> %u\n", d->domain_id, old_gsi, gsi); } + +int cpu_has_pending_irq(struct vcpu *v) +{ + struct hvm_domain *plat = &v->domain->arch.hvm_domain; + + /* APIC */ + if ( vlapic_has_interrupt(v) != -1 ) + return 1; + + /* PIC */ + if ( !vlapic_accept_pic_intr(v) ) + return 0; + + return plat->irq.vpic[0].int_output; +} + +int cpu_get_interrupt(struct vcpu *v, int *type) +{ + int vector; + + if ( (vector = cpu_get_apic_interrupt(v, type)) != -1 ) + return vector; + + if ( (v->vcpu_id == 0) && + ((vector = cpu_get_pic_interrupt(v, type)) != -1) ) + return vector; + + return -1; +} + +int get_isa_irq_vector(struct vcpu *v, int isa_irq, int type) +{ + unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq); + + if ( type == APIC_DM_EXTINT ) + return (v->domain->arch.hvm_domain.irq.vpic[isa_irq >> 3].irq_base + + (isa_irq & 7)); + + return domain_vioapic(v->domain)->redirtbl[gsi].fields.vector; +} + +int is_isa_irq_masked(struct vcpu *v, int isa_irq) +{ + unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq); + + if ( is_lvtt(v, isa_irq) ) + return !is_lvtt_enabled(v); + + return ((v->domain->arch.hvm_domain.irq.vpic[isa_irq >> 3].imr & + (1 << (isa_irq & 7))) && + domain_vioapic(v->domain)->redirtbl[gsi].fields.mask); +} diff --git a/xen/arch/x86/hvm/platform.c b/xen/arch/x86/hvm/platform.c index a822fc093f..1c8e4c11c4 100644 --- a/xen/arch/x86/hvm/platform.c +++ b/xen/arch/x86/hvm/platform.c @@ -352,7 +352,7 @@ static int reg_mem(unsigned char size, unsigned char *opcode, return DECODE_success; } -static int mmio_decode(int mode, unsigned char *opcode, +static int mmio_decode(int address_bytes, unsigned char *opcode, struct hvm_io_op *mmio_op, unsigned char *ad_size, unsigned char *op_size, unsigned char *seg_sel) @@ -368,9 +368,9 @@ static int mmio_decode(int mode, unsigned char *opcode, opcode = check_prefix(opcode, mmio_op, ad_size, op_size, seg_sel, &rex); - switch ( mode ) { - case X86EMUL_MODE_REAL: /* meaning is reversed */ - case X86EMUL_MODE_PROT16: + switch ( address_bytes ) + { + case 2: if ( *op_size == WORD ) *op_size = LONG; else if ( *op_size == LONG ) @@ -384,14 +384,14 @@ static int mmio_decode(int mode, unsigned char *opcode, else if ( *ad_size == 0 ) *ad_size = WORD; break; - case X86EMUL_MODE_PROT32: + case 4: if ( *op_size == 0 ) *op_size = LONG; if ( *ad_size == 0 ) *ad_size = LONG; break; #ifdef __x86_64__ - case X86EMUL_MODE_PROT64: + case 8: if ( *op_size == 0 ) *op_size = rex & 0x8 ? QUAD : LONG; if ( *ad_size == 0 ) @@ -907,7 +907,7 @@ void handle_mmio(unsigned long gpa) struct hvm_io_op *mmio_op; struct cpu_user_regs *regs; unsigned char inst[MAX_INST_LEN], ad_size, op_size, seg_sel; - int i, mode, df, inst_len; + int i, address_bytes, df, inst_len; struct vcpu *v = current; mmio_op = &v->arch.hvm_vcpu.io_op; @@ -919,9 +919,9 @@ void handle_mmio(unsigned long gpa) df = regs->eflags & X86_EFLAGS_DF ? 1 : 0; - mode = hvm_guest_x86_mode(v); + address_bytes = hvm_guest_x86_mode(v); inst_addr = hvm_get_segment_base(v, x86_seg_cs) + regs->eip; - inst_len = hvm_instruction_length(inst_addr, mode); + inst_len = hvm_instruction_length(inst_addr, address_bytes); if ( inst_len <= 0 ) { printk("handle_mmio: failed to get instruction length\n"); @@ -934,8 +934,8 @@ void handle_mmio(unsigned long gpa) domain_crash_synchronous(); } - if ( mmio_decode(mode, inst, mmio_op, &ad_size, &op_size, &seg_sel) - == DECODE_failure ) { + if ( mmio_decode(address_bytes, inst, mmio_op, &ad_size, + &op_size, &seg_sel) == DECODE_failure ) { printk("handle_mmio: failed to decode instruction\n"); printk("mmio opcode: gpa 0x%lx, len %d:", gpa, inst_len); for ( i = 0; i < inst_len; i++ ) @@ -965,7 +965,7 @@ void handle_mmio(unsigned long gpa) if ( ad_size == WORD ) addr &= 0xFFFF; addr += hvm_get_segment_base(v, x86_seg_es); - if ( addr == gpa ) + if ( shadow_gva_to_gpa(v, addr) == gpa ) { enum x86_segment seg; diff --git a/xen/arch/x86/hvm/pmtimer.c b/xen/arch/x86/hvm/pmtimer.c index b435fbdf9f..28be242f84 100644 --- a/xen/arch/x86/hvm/pmtimer.c +++ b/xen/arch/x86/hvm/pmtimer.c @@ -55,6 +55,14 @@ void pmtimer_init(struct vcpu *v, int base) register_portio_handler(v->domain, base, 4, handle_pmt_io); } +void pmtimer_migrate_timers(struct vcpu *v) +{ + struct PMTState *vpmt = &v->domain->arch.hvm_domain.pl_time.vpmt; + + if (vpmt->vcpu == v) + migrate_timer(&vpmt->timer, v->processor); +} + void pmtimer_deinit(struct domain *d) { PMTState *s = &d->arch.hvm_domain.pl_time.vpmt; diff --git a/xen/arch/x86/hvm/rtc.c b/xen/arch/x86/hvm/rtc.c index 1f26dc9252..07e79d7c3c 100644 --- a/xen/arch/x86/hvm/rtc.c +++ b/xen/arch/x86/hvm/rtc.c @@ -30,17 +30,18 @@ /* #define DEBUG_RTC */ -/* Callback that fires the RTC's periodic interrupt */ -void rtc_pie_callback(void *opaque) +void rtc_periodic_cb(struct vcpu *v, void *opaque) { RTCState *s = opaque; - /* Record that we have fired */ - s->cmos_data[RTC_REG_C] |= (RTC_IRQF|RTC_PF); /* 0xc0 */ - /* Fire */ - hvm_isa_irq_assert(s->vcpu->domain, s->irq); - /* Remember to fire again */ - s->next_pie = NOW() + s->period; - set_timer(&s->pie_timer, s->next_pie); + s->cmos_data[RTC_REG_C] |= 0xc0; +} + +int is_rtc_periodic_irq(void *opaque) +{ + RTCState *s = opaque; + + return !(s->cmos_data[RTC_REG_C] & RTC_AF || + s->cmos_data[RTC_REG_C] & RTC_UF); } /* Enable/configure/disable the periodic timer based on the RTC_PIE and @@ -58,17 +59,13 @@ static void rtc_timer_update(RTCState *s) period = 1 << (period_code - 1); /* period in 32 Khz cycles */ period = DIV_ROUND((period * 1000000000ULL), 32768); /* period in ns */ - s->period = period; #ifdef DEBUG_RTC printk("HVM_RTC: period = %uns\n", period); #endif - s->next_pie = NOW() + s->period; - set_timer(&s->pie_timer, s->next_pie); - } + create_periodic_time(&s->pt, period, RTC_IRQ, 0, rtc_periodic_cb, s); + } else - { - stop_timer(&s->pie_timer); - } + destroy_periodic_time(&s->pt); } static void rtc_set_time(RTCState *s); @@ -292,8 +289,8 @@ static void rtc_update_second2(void *opaque) s->current_tm.tm_hour) ) { s->cmos_data[RTC_REG_C] |= 0xa0; - hvm_isa_irq_deassert(s->vcpu->domain, s->irq); - hvm_isa_irq_assert(s->vcpu->domain, s->irq); + hvm_isa_irq_deassert(s->pt.vcpu->domain, s->irq); + hvm_isa_irq_assert(s->pt.vcpu->domain, s->irq); } } @@ -301,8 +298,8 @@ static void rtc_update_second2(void *opaque) if ( s->cmos_data[RTC_REG_B] & RTC_UIE ) { s->cmos_data[RTC_REG_C] |= 0x90; - hvm_isa_irq_deassert(s->vcpu->domain, s->irq); - hvm_isa_irq_assert(s->vcpu->domain, s->irq); + hvm_isa_irq_deassert(s->pt.vcpu->domain, s->irq); + hvm_isa_irq_assert(s->pt.vcpu->domain, s->irq); } /* clear update in progress bit */ @@ -336,7 +333,7 @@ static uint32_t rtc_ioport_read(void *opaque, uint32_t addr) break; case RTC_REG_C: ret = s->cmos_data[s->cmos_index]; - hvm_isa_irq_deassert(s->vcpu->domain, s->irq); + hvm_isa_irq_deassert(s->pt.vcpu->domain, s->irq); s->cmos_data[RTC_REG_C] = 0x00; break; default: @@ -377,36 +374,25 @@ static int handle_rtc_io(ioreq_t *p) return 0; } -/* Stop the periodic interrupts from this RTC */ -void rtc_freeze(struct vcpu *v) -{ - RTCState *s = &v->domain->arch.hvm_domain.pl_time.vrtc; - stop_timer(&s->pie_timer); -} - -/* Start them again */ -void rtc_thaw(struct vcpu *v) -{ - RTCState *s = &v->domain->arch.hvm_domain.pl_time.vrtc; - if ( (s->cmos_data[RTC_REG_A] & RTC_RATE_SELECT) /* Period is not zero */ - && (s->cmos_data[RTC_REG_B] & RTC_PIE) ) - set_timer(&s->pie_timer, s->next_pie); -} - /* Move the RTC timers on to this vcpu's current cpu */ void rtc_migrate_timers(struct vcpu *v) { RTCState *s = &v->domain->arch.hvm_domain.pl_time.vrtc; - migrate_timer(&s->second_timer, v->processor); - migrate_timer(&s->second_timer2, v->processor); - migrate_timer(&s->pie_timer, v->processor); + + if ( s->pt.vcpu == v ) + { + if ( s->pt.enabled ) + migrate_timer(&s->pt.timer, v->processor); + migrate_timer(&s->second_timer, v->processor); + migrate_timer(&s->second_timer2, v->processor); + } } void rtc_init(struct vcpu *v, int base, int irq) { RTCState *s = &v->domain->arch.hvm_domain.pl_time.vrtc; - s->vcpu = v; + s->pt.vcpu = v; s->irq = irq; s->cmos_data[RTC_REG_A] = RTC_REF_CLCK_32KHZ | 6; /* ~1kHz */ s->cmos_data[RTC_REG_B] = RTC_24H; @@ -416,9 +402,9 @@ void rtc_init(struct vcpu *v, int base, int irq) s->current_tm = gmtime(get_localtime(v->domain)); rtc_copy_date(s); + init_timer(&s->pt.timer, pt_timer_fn, &s->pt, v->processor); init_timer(&s->second_timer, rtc_update_second, s, v->processor); init_timer(&s->second_timer2, rtc_update_second2, s, v->processor); - init_timer(&s->pie_timer, rtc_pie_callback, s, v->processor); s->next_second_time = NOW() + 1000000000ULL; set_timer(&s->second_timer2, s->next_second_time); @@ -430,7 +416,7 @@ void rtc_deinit(struct domain *d) { RTCState *s = &d->arch.hvm_domain.pl_time.vrtc; + kill_timer(&s->pt.timer); kill_timer(&s->second_timer); kill_timer(&s->second_timer2); - kill_timer(&s->pie_timer); } diff --git a/xen/arch/x86/hvm/svm/intr.c b/xen/arch/x86/hvm/svm/intr.c index e42438aadb..e3e0f0a40e 100644 --- a/xen/arch/x86/hvm/svm/intr.c +++ b/xen/arch/x86/hvm/svm/intr.c @@ -63,8 +63,7 @@ asmlinkage void svm_intr_assist(void) { struct vcpu *v = current; struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - struct hvm_domain *plat=&v->domain->arch.hvm_domain; - struct periodic_time *pt = &plat->pl_time.periodic_tm; + struct periodic_time *pt; int intr_type = APIC_DM_EXTINT; int intr_vector = -1; int re_injecting = 0; @@ -78,26 +77,6 @@ asmlinkage void svm_intr_assist(void) re_injecting = 1; } - /* - * If event requires injecting then do not inject int. - */ - if ( unlikely(v->arch.hvm_svm.inject_event) ) - { - v->arch.hvm_svm.inject_event = 0; - return; - } - - /* - * Create a 'fake' virtual interrupt on to intercept as soon - * as the guest _can_ take interrupts. - */ - if ( irq_masked(vmcb->rflags) || vmcb->interrupt_shadow ) - { - vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR; - svm_inject_extint(v, 0x0); /* actual vector doesn't really matter */ - return; - } - /* Previous interrupt still pending? */ if ( vmcb->vintr.fields.irq ) { @@ -115,16 +94,25 @@ asmlinkage void svm_intr_assist(void) /* Now let's check for newer interrrupts */ else { - if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) - { - hvm_isa_irq_deassert(current->domain, pt->irq); - hvm_isa_irq_assert(current->domain, pt->irq); - } + pt_update_irq(v); hvm_set_callback_irq_level(); if ( cpu_has_pending_irq(v) ) + { + /* + * Create a 'fake' virtual interrupt on to intercept as soon + * as the guest _can_ take interrupts. Do not obtain the next + * interrupt from the vlapic/pic if unable to inject. + */ + if ( irq_masked(vmcb->rflags) || vmcb->interrupt_shadow ) + { + vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR; + svm_inject_extint(v, 0x0); /* actual vector doesn't really matter */ + return; + } intr_vector = cpu_get_interrupt(v, &intr_type); + } } /* have we got an interrupt to inject? */ @@ -137,8 +125,7 @@ asmlinkage void svm_intr_assist(void) case APIC_DM_FIXED: case APIC_DM_LOWEST: /* Re-injecting a PIT interruptt? */ - if ( re_injecting && pt->enabled && - is_periodic_irq(v, intr_vector, intr_type) ) + if ( re_injecting && (pt = is_pt_irq(v, intr_vector, intr_type)) ) ++pt->pending_intr_nr; /* let's inject this interrupt */ TRACE_3D(TRC_VMX_INTR, v->domain->domain_id, intr_vector, 0); @@ -154,7 +141,7 @@ asmlinkage void svm_intr_assist(void) break; } - hvm_interrupt_post(v, intr_vector, intr_type); + pt_intr_post(v, intr_vector, intr_type); } /* diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c index 55b631660e..2a3e6370d6 100644 --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -46,6 +46,7 @@ #include <asm/hvm/svm/intr.h> #include <asm/x86_emulate.h> #include <public/sched.h> +#include <asm/hvm/vpt.h> #define SVM_EXTRA_DEBUG @@ -191,7 +192,6 @@ static inline void svm_inject_exception(struct vcpu *v, int trap, ASSERT(vmcb->eventinj.fields.v == 0); vmcb->eventinj = event; - v->arch.hvm_svm.inject_event=1; } static void stop_svm(void) @@ -483,14 +483,12 @@ static int svm_guest_x86_mode(struct vcpu *v) struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; if ( vmcb->efer & EFER_LMA ) - return (vmcb->cs.attr.fields.l ? - X86EMUL_MODE_PROT64 : X86EMUL_MODE_PROT32); + return (vmcb->cs.attr.fields.l ? 8 : 4); if ( svm_realmode(v) ) - return X86EMUL_MODE_REAL; + return 2; - return (vmcb->cs.attr.fields.db ? - X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16); + return (vmcb->cs.attr.fields.db ? 4 : 2); } void svm_update_host_cr3(struct vcpu *v) @@ -771,7 +769,6 @@ static void arch_svm_do_launch(struct vcpu *v) static void svm_ctxt_switch_from(struct vcpu *v) { - hvm_freeze_time(v); svm_save_dr(v); } @@ -999,91 +996,65 @@ static void svm_do_general_protection_fault(struct vcpu *v, /* Reserved bits EDX: [31:29], [27], [22:20], [18], [10] */ #define SVM_VCPU_CPUID_L1_EDX_RESERVED 0xe8740400 -static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb, unsigned long input, - struct cpu_user_regs *regs) +static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb, + struct cpu_user_regs *regs) { + unsigned long input = regs->eax; unsigned int eax, ebx, ecx, edx; - unsigned long eip; struct vcpu *v = current; int inst_len; ASSERT(vmcb); - eip = vmcb->rip; + hvm_cpuid(input, &eax, &ebx, &ecx, &edx); - HVM_DBG_LOG(DBG_LEVEL_1, - "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx," - " (esi) %lx, (edi) %lx", - (unsigned long)regs->eax, (unsigned long)regs->ebx, - (unsigned long)regs->ecx, (unsigned long)regs->edx, - (unsigned long)regs->esi, (unsigned long)regs->edi); + if ( input == 0x00000001 ) + { + /* Clear out reserved bits. */ + ecx &= ~SVM_VCPU_CPUID_L1_ECX_RESERVED; + edx &= ~SVM_VCPU_CPUID_L1_EDX_RESERVED; - if ( !cpuid_hypervisor_leaves(input, &eax, &ebx, &ecx, &edx) ) + /* Guest should only see one logical processor. + * See details on page 23 of AMD CPUID Specification. + */ + clear_bit(X86_FEATURE_HT & 31, &edx); /* clear the hyperthread bit */ + ebx &= 0xFF00FFFF; /* clear the logical processor count when HTT=0 */ + ebx |= 0x00010000; /* set to 1 just for precaution */ + } + else if ( input == 0x80000001 ) { - cpuid(input, &eax, &ebx, &ecx, &edx); - if (input == 0x00000001 || input == 0x80000001 ) - { - if ( vlapic_hw_disabled(vcpu_vlapic(v)) ) - { - /* Since the apic is disabled, avoid any confusion - about SMP cpus being available */ - clear_bit(X86_FEATURE_APIC, &edx); - } + if ( vlapic_hw_disabled(vcpu_vlapic(v)) ) + clear_bit(X86_FEATURE_APIC & 31, &edx); + #if CONFIG_PAGING_LEVELS >= 3 - if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] ) + if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] ) #endif - { - clear_bit(X86_FEATURE_PAE, &edx); - if (input == 0x80000001 ) - clear_bit(X86_FEATURE_NX & 31, &edx); - } - clear_bit(X86_FEATURE_PSE36, &edx); - if (input == 0x00000001 ) - { - /* Clear out reserved bits. */ - ecx &= ~SVM_VCPU_CPUID_L1_ECX_RESERVED; - edx &= ~SVM_VCPU_CPUID_L1_EDX_RESERVED; + clear_bit(X86_FEATURE_PAE & 31, &edx); - clear_bit(X86_FEATURE_MWAIT & 31, &ecx); + clear_bit(X86_FEATURE_PSE36 & 31, &edx); - /* Guest should only see one logical processor. - * See details on page 23 of AMD CPUID Specification. - */ - clear_bit(X86_FEATURE_HT, &edx); /* clear the hyperthread bit */ - ebx &= 0xFF00FFFF; /* clear the logical processor count when HTT=0 */ - ebx |= 0x00010000; /* set to 1 just for precaution */ - } - else - { - /* Clear the Cmp_Legacy bit - * This bit is supposed to be zero when HTT = 0. - * See details on page 23 of AMD CPUID Specification. - */ - clear_bit(X86_FEATURE_CMP_LEGACY & 31, &ecx); - /* Make SVM feature invisible to the guest. */ - clear_bit(X86_FEATURE_SVME & 31, &ecx); -#ifdef __i386__ - /* Mask feature for Intel ia32e or AMD long mode. */ - clear_bit(X86_FEATURE_LAHF_LM & 31, &ecx); - - clear_bit(X86_FEATURE_LM & 31, &edx); - clear_bit(X86_FEATURE_SYSCALL & 31, &edx); -#endif - /* So far, we do not support 3DNow for the guest. */ - clear_bit(X86_FEATURE_3DNOW & 31, &edx); - clear_bit(X86_FEATURE_3DNOWEXT & 31, &edx); - } - } - else if ( ( input == 0x80000007 ) || ( input == 0x8000000A ) ) - { - /* Mask out features of power management and SVM extension. */ - eax = ebx = ecx = edx = 0; - } - else if ( input == 0x80000008 ) - { - /* Make sure Number of CPU core is 1 when HTT=0 */ - ecx &= 0xFFFFFF00; - } + /* Clear the Cmp_Legacy bit + * This bit is supposed to be zero when HTT = 0. + * See details on page 23 of AMD CPUID Specification. + */ + clear_bit(X86_FEATURE_CMP_LEGACY & 31, &ecx); + + /* Make SVM feature invisible to the guest. */ + clear_bit(X86_FEATURE_SVME & 31, &ecx); + + /* So far, we do not support 3DNow for the guest. */ + clear_bit(X86_FEATURE_3DNOW & 31, &edx); + clear_bit(X86_FEATURE_3DNOWEXT & 31, &edx); + } + else if ( input == 0x80000007 || input == 0x8000000A ) + { + /* Mask out features of power management and SVM extension. */ + eax = ebx = ecx = edx = 0; + } + else if ( input == 0x80000008 ) + { + /* Make sure Number of CPU core is 1 when HTT=0 */ + ecx &= 0xFFFFFF00; } regs->eax = (unsigned long)eax; @@ -1091,17 +1062,11 @@ static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb, unsigned long input, regs->ecx = (unsigned long)ecx; regs->edx = (unsigned long)edx; - HVM_DBG_LOG(DBG_LEVEL_1, - "svm_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, " - "ebx=%x, ecx=%x, edx=%x", - eip, input, eax, ebx, ecx, edx); - inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL); ASSERT(inst_len > 0); __update_guest_eip(vmcb, inst_len); } - static inline unsigned long *get_reg_p(unsigned int gpreg, struct cpu_user_regs *regs, struct vmcb_struct *vmcb) { @@ -2027,6 +1992,7 @@ static inline void svm_do_msr_access( switch (ecx) { case MSR_IA32_TIME_STAMP_COUNTER: + pt_reset(v); hvm_set_guest_time(v, msr_content); break; case MSR_IA32_SYSENTER_CS: @@ -2596,8 +2562,6 @@ asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs) exit_reason = vmcb->exitcode; save_svm_cpu_user_regs(v, regs); - v->arch.hvm_svm.inject_event = 0; - if (exit_reason == VMEXIT_INVALID) { svm_dump_vmcb(__func__, vmcb); @@ -2735,23 +2699,14 @@ asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs) } break; + case VMEXIT_INTR: case VMEXIT_NMI: - break; - case VMEXIT_SMI: - /* - * For asynchronous SMI's, we just need to allow global interrupts - * so that the SMI is taken properly in the context of the host. The - * standard code does a STGI after the VMEXIT which should accomplish - * this task. Continue as normal and restart the guest. - */ + /* Asynchronous events, handled when we STGI'd after the VMEXIT. */ break; case VMEXIT_INIT: - /* - * Nothing to do, in fact we should never get to this point. - */ - break; + BUG(); /* unreachable */ case VMEXIT_EXCEPTION_BP: #ifdef XEN_DEBUGGER @@ -2809,11 +2764,8 @@ asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs) break; case VMEXIT_VINTR: - vmcb->vintr.fields.irq = 0; - vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR; - break; - - case VMEXIT_INTR: + vmcb->vintr.fields.irq = 0; + vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR; break; case VMEXIT_INVD: @@ -2828,7 +2780,7 @@ asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs) goto exit_and_crash; case VMEXIT_CPUID: - svm_vmexit_do_cpuid(vmcb, regs->eax, regs); + svm_vmexit_do_cpuid(vmcb, regs); break; case VMEXIT_HLT: @@ -2887,7 +2839,7 @@ asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs) case VMEXIT_CR8_WRITE: svm_cr_access(v, 8, TYPE_MOV_TO_CR, regs); break; - + case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE: svm_dr_access(v, regs); break; @@ -2901,8 +2853,8 @@ asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs) break; case VMEXIT_SHUTDOWN: - gdprintk(XENLOG_ERR, "Guest shutdown exit\n"); - goto exit_and_crash; + hvm_triple_fault(); + break; default: exit_and_crash: @@ -2965,7 +2917,7 @@ asmlinkage void svm_asid(void) clear_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags ); } } - + /* * Local variables: * mode: C diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c index f986d5ca1d..843c76a6dd 100644 --- a/xen/arch/x86/hvm/vioapic.c +++ b/xen/arch/x86/hvm/vioapic.c @@ -341,7 +341,7 @@ static void vioapic_deliver(struct vioapic *vioapic, int irq) { #ifdef IRQ0_SPECIAL_ROUTING /* Force round-robin to pick VCPU 0 */ - if ( irq == 0 ) + if ( irq == hvm_isa_irq_to_gsi(0) ) { v = vioapic_domain(vioapic)->vcpu[0]; target = v ? vcpu_vlapic(v) : NULL; @@ -374,7 +374,7 @@ static void vioapic_deliver(struct vioapic *vioapic, int irq) deliver_bitmask &= ~(1 << bit); #ifdef IRQ0_SPECIAL_ROUTING /* Do not deliver timer interrupts to VCPU != 0 */ - if ( (irq == 0) && (bit != 0) ) + if ( irq == hvm_isa_irq_to_gsi(0) ) v = vioapic_domain(vioapic)->vcpu[0]; else #endif diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c index a0304deb37..b1b42650f4 100644 --- a/xen/arch/x86/hvm/vlapic.c +++ b/xen/arch/x86/hvm/vlapic.c @@ -39,9 +39,8 @@ #define VLAPIC_VERSION 0x00050014 #define VLAPIC_LVT_NUM 6 -extern u32 get_apic_bus_cycle(void); - -#define APIC_BUS_CYCLE_NS (((s_time_t)get_apic_bus_cycle()) / 1000) +/* vlapic's frequence is 100 MHz */ +#define APIC_BUS_CYCLE_NS 10 #define LVT_MASK \ APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK @@ -122,11 +121,6 @@ static int vlapic_test_and_set_irr(int vector, struct vlapic *vlapic) return vlapic_test_and_set_vector(vector, vlapic->regs + APIC_IRR); } -static void vlapic_set_irr(int vector, struct vlapic *vlapic) -{ - vlapic_set_vector(vector, vlapic->regs + APIC_IRR); -} - static void vlapic_clear_irr(int vector, struct vlapic *vlapic) { vlapic_clear_vector(vector, vlapic->regs + APIC_IRR); @@ -154,16 +148,6 @@ int vlapic_set_irq(struct vlapic *vlapic, uint8_t vec, uint8_t trig) return ret; } -s_time_t get_apictime_scheduled(struct vcpu *v) -{ - struct vlapic *vlapic = vcpu_vlapic(v); - - if ( !vlapic_lvt_enabled(vlapic, APIC_LVTT) ) - return -1; - - return vlapic->vlapic_timer.expires; -} - int vlapic_find_highest_isr(struct vlapic *vlapic) { int result; @@ -443,46 +427,19 @@ static void vlapic_ipi(struct vlapic *vlapic) static uint32_t vlapic_get_tmcct(struct vlapic *vlapic) { - uint32_t counter_passed; - s_time_t passed, now = NOW(); - uint32_t tmcct = vlapic_get_reg(vlapic, APIC_TMCCT); - - if ( unlikely(now <= vlapic->timer_last_update) ) - { - passed = ~0x0LL - vlapic->timer_last_update + now; - HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "time elapsed."); - } - else - passed = now - vlapic->timer_last_update; - - counter_passed = passed / (APIC_BUS_CYCLE_NS * vlapic->timer_divisor); - - tmcct -= counter_passed; - - if ( tmcct <= 0 ) - { - if ( unlikely(!vlapic_lvtt_period(vlapic)) ) - { - tmcct = 0; - /* FIXME: should we add interrupt here? */ - } - else - { - do { - tmcct += vlapic_get_reg(vlapic, APIC_TMICT); - } while ( tmcct <= 0 ); - } - } + struct vcpu *v = current; + uint32_t tmcct, tmict = vlapic_get_reg(vlapic, APIC_TMICT); + uint64_t counter_passed; - vlapic->timer_last_update = now; - vlapic_set_reg(vlapic, APIC_TMCCT, tmcct); + counter_passed = (hvm_get_guest_time(v) - vlapic->pt.last_plt_gtime) // TSC + * 1000000000ULL / ticks_per_sec(v) // NS + / APIC_BUS_CYCLE_NS / vlapic->timer_divisor; + tmcct = tmict - counter_passed; HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER, - "timer initial count 0x%x, timer current count 0x%x, " - "update 0x%016"PRIx64", now 0x%016"PRIx64", offset 0x%x.", - vlapic_get_reg(vlapic, APIC_TMICT), - vlapic_get_reg(vlapic, APIC_TMCCT), - vlapic->timer_last_update, now, counter_passed); + "timer initial count %d, timer current count %d, " + "offset %"PRId64".", + tmict, tmcct, counter_passed); return tmcct; } @@ -496,6 +453,9 @@ static void vlapic_set_tdcr(struct vlapic *vlapic, unsigned int val) /* Update the demangled timer_divisor. */ val = ((val & 3) | ((val & 8) >> 1)) + 1; vlapic->timer_divisor = 1 << (val & 7); + + HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER, + "vlapic_set_tdcr timer_divisor: %d.", vlapic->timer_divisor); } static void vlapic_read_aligned(struct vlapic *vlapic, unsigned int offset, @@ -587,6 +547,7 @@ static void vlapic_write(struct vcpu *v, unsigned long address, * According to the IA32 Manual, all accesses should be 32 bits. * Some OSes do 8- or 16-byte accesses, however. */ + val &= 0xffffffff; if ( len != 4 ) { unsigned int tmp; @@ -681,6 +642,7 @@ static void vlapic_write(struct vcpu *v, unsigned long address, break; case APIC_LVTT: /* LVT Timer Reg */ + vlapic->pt.irq = val & APIC_VECTOR_MASK; case APIC_LVTTHMR: /* LVT Thermal Monitor */ case APIC_LVTPC: /* LVT Performance Counter */ case APIC_LVT0: /* LVT LINT0 Reg */ @@ -694,25 +656,16 @@ static void vlapic_write(struct vcpu *v, unsigned long address, case APIC_TMICT: { - s_time_t now = NOW(), offset; - - stop_timer(&vlapic->vlapic_timer); + uint64_t period = APIC_BUS_CYCLE_NS * (uint32_t)val * vlapic->timer_divisor; vlapic_set_reg(vlapic, APIC_TMICT, val); - vlapic_set_reg(vlapic, APIC_TMCCT, val); - vlapic->timer_last_update = now; - - offset = APIC_BUS_CYCLE_NS * vlapic->timer_divisor * val; - - set_timer(&vlapic->vlapic_timer, now + offset); + create_periodic_time(&vlapic->pt, period, vlapic->pt.irq, + vlapic_lvtt_period(vlapic), NULL, vlapic); HVM_DBG_LOG(DBG_LEVEL_VLAPIC, - "bus cycle is %"PRId64"ns, now 0x%016"PRIx64", " - "timer initial count 0x%x, offset 0x%016"PRIx64", " - "expire @ 0x%016"PRIx64".", - APIC_BUS_CYCLE_NS, now, - vlapic_get_reg(vlapic, APIC_TMICT), - offset, now + offset); + "bus cycle is %uns, " + "initial count %lu, period %"PRIu64"ns", + APIC_BUS_CYCLE_NS, val, period); } break; @@ -763,48 +716,6 @@ void vlapic_msr_set(struct vlapic *vlapic, uint64_t value) "apic base msr is 0x%016"PRIx64".", vlapic->apic_base_msr); } -void vlapic_timer_fn(void *data) -{ - struct vlapic *vlapic = data; - uint32_t timer_vector; - s_time_t now; - - if ( unlikely(!vlapic_enabled(vlapic) || - !vlapic_lvt_enabled(vlapic, APIC_LVTT)) ) - return; - - timer_vector = vlapic_lvt_vector(vlapic, APIC_LVTT); - now = NOW(); - - vlapic->timer_last_update = now; - - if ( vlapic_test_and_set_irr(timer_vector, vlapic) ) - vlapic->timer_pending_count++; - - if ( vlapic_lvtt_period(vlapic) ) - { - s_time_t offset; - uint32_t tmict = vlapic_get_reg(vlapic, APIC_TMICT); - - vlapic_set_reg(vlapic, APIC_TMCCT, tmict); - - offset = APIC_BUS_CYCLE_NS * vlapic->timer_divisor * tmict; - - set_timer(&vlapic->vlapic_timer, now + offset); - } - else - vlapic_set_reg(vlapic, APIC_TMCCT, 0); - - vcpu_kick(vlapic_vcpu(vlapic)); - - HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER, - "now 0x%016"PRIx64", expire @ 0x%016"PRIx64", " - "timer initial count 0x%x, timer current count 0x%x.", - now, vlapic->vlapic_timer.expires, - vlapic_get_reg(vlapic, APIC_TMICT), - vlapic_get_reg(vlapic, APIC_TMCCT)); -} - int vlapic_accept_pic_intr(struct vcpu *v) { struct vlapic *vlapic = vcpu_vlapic(v); @@ -819,7 +730,7 @@ int vlapic_accept_pic_intr(struct vcpu *v) vlapic_hw_disabled(vlapic))); } -int cpu_get_apic_interrupt(struct vcpu *v, int *mode) +int vlapic_has_interrupt(struct vcpu *v) { struct vlapic *vlapic = vcpu_vlapic(v); int highest_irr; @@ -832,59 +743,22 @@ int cpu_get_apic_interrupt(struct vcpu *v, int *mode) ((highest_irr & 0xF0) <= vlapic_get_ppr(vlapic)) ) return -1; - *mode = APIC_DM_FIXED; return highest_irr; } -/* check to see if there is pending interrupt */ -int cpu_has_pending_irq(struct vcpu *v) -{ - struct hvm_domain *plat = &v->domain->arch.hvm_domain; - int dummy; - - /* APIC */ - if ( cpu_get_apic_interrupt(v, &dummy) != -1 ) - return 1; - - /* PIC */ - if ( !vlapic_accept_pic_intr(v) ) - return 0; - - return plat->irq.vpic[0].int_output; -} - -void vlapic_post_injection(struct vcpu *v, int vector, int deliver_mode) +int cpu_get_apic_interrupt(struct vcpu *v, int *mode) { + int vector = vlapic_has_interrupt(v); struct vlapic *vlapic = vcpu_vlapic(v); - switch ( deliver_mode ) - { - case APIC_DM_FIXED: - case APIC_DM_LOWEST: - vlapic_set_vector(vector, vlapic->regs + APIC_ISR); - vlapic_clear_irr(vector, vlapic); - if ( (vector == vlapic_lvt_vector(vlapic, APIC_LVTT)) && - (vlapic->timer_pending_count != 0) ) - { - vlapic->timer_pending_count--; - vlapic_set_irr(vector, vlapic); - } - break; - - case APIC_DM_REMRD: - gdprintk(XENLOG_WARNING, "Ignoring delivery mode 3.\n"); - break; - - case APIC_DM_SMI: - case APIC_DM_NMI: - case APIC_DM_INIT: - case APIC_DM_STARTUP: - break; + if ( vector == -1 ) + return -1; + + vlapic_set_vector(vector, vlapic->regs + APIC_ISR); + vlapic_clear_irr(vector, vlapic); - default: - gdprintk(XENLOG_WARNING, "Invalid delivery mode\n"); - break; - } + *mode = APIC_DM_FIXED; + return vector; } /* Reset the VLPAIC back to its power-on/reset state. */ @@ -893,7 +767,7 @@ static int vlapic_reset(struct vlapic *vlapic) struct vcpu *v = vlapic_vcpu(vlapic); int i; - vlapic_set_reg(vlapic, APIC_ID, (v->vcpu_id + 1) << 24); + vlapic_set_reg(vlapic, APIC_ID, (v->vcpu_id * 2) << 24); vlapic_set_reg(vlapic, APIC_LVR, VLAPIC_VERSION); for ( i = 0; i < 8; i++ ) @@ -945,8 +819,7 @@ int vlapic_init(struct vcpu *v) if ( v->vcpu_id == 0 ) vlapic->apic_base_msr |= MSR_IA32_APICBASE_BSP; - init_timer(&vlapic->vlapic_timer, - vlapic_timer_fn, vlapic, v->processor); + init_timer(&vlapic->pt.timer, pt_timer_fn, &vlapic->pt, v->processor); return 0; } @@ -955,7 +828,22 @@ void vlapic_destroy(struct vcpu *v) { struct vlapic *vlapic = vcpu_vlapic(v); - kill_timer(&vlapic->vlapic_timer); + kill_timer(&vlapic->pt.timer); unmap_domain_page_global(vlapic->regs); free_domheap_page(vlapic->regs_page); } + +int is_lvtt(struct vcpu *v, int vector) +{ + return vcpu_vlapic(v)->pt.enabled && + vector == vlapic_lvt_vector(vcpu_vlapic(v), APIC_LVTT); +} + +int is_lvtt_enabled(struct vcpu *v) +{ + if ( unlikely(!vlapic_enabled(vcpu_vlapic(v))) || + !vlapic_lvt_enabled(vcpu_vlapic(v), APIC_LVTT)) + return 0; + + return 1; +} diff --git a/xen/arch/x86/hvm/vmx/intr.c b/xen/arch/x86/hvm/vmx/intr.c index 662a6697a0..5be7aaeb02 100644 --- a/xen/arch/x86/hvm/vmx/intr.c +++ b/xen/arch/x86/hvm/vmx/intr.c @@ -91,17 +91,11 @@ asmlinkage void vmx_intr_assist(void) int highest_vector; unsigned long eflags; struct vcpu *v = current; - struct hvm_domain *plat=&v->domain->arch.hvm_domain; - struct periodic_time *pt = &plat->pl_time.periodic_tm; unsigned int idtv_info_field; unsigned long inst_len; int has_ext_irq; - if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) - { - hvm_isa_irq_deassert(current->domain, pt->irq); - hvm_isa_irq_assert(current->domain, pt->irq); - } + pt_update_irq(v); hvm_set_callback_irq_level(); @@ -181,8 +175,8 @@ asmlinkage void vmx_intr_assist(void) BUG(); break; } - - hvm_interrupt_post(v, highest_vector, intr_type); + + pt_intr_post(v, highest_vector, intr_type); } /* diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index c95afe5b83..c8782ef8a2 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -46,6 +46,7 @@ #include <asm/hvm/vpic.h> #include <asm/hvm/vlapic.h> #include <asm/x86_emulate.h> +#include <asm/hvm/vpt.h> static void vmx_ctxt_switch_from(struct vcpu *v); static void vmx_ctxt_switch_to(struct vcpu *v); @@ -276,6 +277,12 @@ static void vmx_restore_host_msrs(void) } } +static void vmx_save_guest_msrs(struct vcpu *v) +{ + /* MSR_SHADOW_GS_BASE may have been changed by swapgs instruction. */ + rdmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.msr_state.shadow_gs); +} + static void vmx_restore_guest_msrs(struct vcpu *v) { struct vmx_msr_state *guest_msr_state, *host_msr_state; @@ -307,6 +314,7 @@ static void vmx_restore_guest_msrs(struct vcpu *v) #define vmx_save_host_msrs() ((void)0) #define vmx_restore_host_msrs() ((void)0) +#define vmx_save_guest_msrs(v) ((void)0) #define vmx_restore_guest_msrs(v) ((void)0) static inline int long_mode_do_msr_read(struct cpu_user_regs *regs) @@ -372,12 +380,7 @@ static inline void vmx_restore_dr(struct vcpu *v) static void vmx_ctxt_switch_from(struct vcpu *v) { - hvm_freeze_time(v); - - /* NB. MSR_SHADOW_GS_BASE may be changed by swapgs instrucion in guest, - * so we must save it. */ - rdmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.msr_state.shadow_gs); - + vmx_save_guest_msrs(v); vmx_restore_host_msrs(); vmx_save_dr(v); } @@ -692,14 +695,12 @@ static int vmx_guest_x86_mode(struct vcpu *v) cs_ar_bytes = __vmread(GUEST_CS_AR_BYTES); if ( vmx_long_mode_enabled(v) ) - return ((cs_ar_bytes & (1u<<13)) ? - X86EMUL_MODE_PROT64 : X86EMUL_MODE_PROT32); + return ((cs_ar_bytes & (1u<<13)) ? 8 : 4); if ( vmx_realmode(v) ) - return X86EMUL_MODE_REAL; + return 2; - return ((cs_ar_bytes & (1u<<14)) ? - X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16); + return ((cs_ar_bytes & (1u<<14)) ? 4 : 2); } static int vmx_pae_enabled(struct vcpu *v) @@ -899,24 +900,14 @@ static void vmx_do_no_device_fault(void) } } -#define bitmaskof(idx) (1U << ((idx)&31)) +#define bitmaskof(idx) (1U << ((idx) & 31)) static void vmx_do_cpuid(struct cpu_user_regs *regs) { unsigned int input = (unsigned int)regs->eax; unsigned int count = (unsigned int)regs->ecx; unsigned int eax, ebx, ecx, edx; - unsigned long eip; - struct vcpu *v = current; - eip = __vmread(GUEST_RIP); - - HVM_DBG_LOG(DBG_LEVEL_3, "(eax) 0x%08lx, (ebx) 0x%08lx, " - "(ecx) 0x%08lx, (edx) 0x%08lx, (esi) 0x%08lx, (edi) 0x%08lx", - (unsigned long)regs->eax, (unsigned long)regs->ebx, - (unsigned long)regs->ecx, (unsigned long)regs->edx, - (unsigned long)regs->esi, (unsigned long)regs->edi); - - if ( input == CPUID_LEAF_0x4 ) + if ( input == 0x00000004 ) { cpuid_count(input, count, &eax, &ebx, &ecx, &edx); eax &= NUM_CORES_RESET_MASK; @@ -929,6 +920,7 @@ static void vmx_do_cpuid(struct cpu_user_regs *regs) */ u64 value = ((u64)regs->edx << 32) | (u32)regs->ecx; unsigned long mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); + struct vcpu *v = current; char *p; gdprintk(XENLOG_INFO, "Input address is 0x%"PRIx64".\n", value); @@ -946,72 +938,37 @@ static void vmx_do_cpuid(struct cpu_user_regs *regs) unmap_domain_page(p); gdprintk(XENLOG_INFO, "Output value is 0x%"PRIx64".\n", value); - ecx = (u32)(value >> 0); + ecx = (u32)value; edx = (u32)(value >> 32); - } - else if ( !cpuid_hypervisor_leaves(input, &eax, &ebx, &ecx, &edx) ) - { - cpuid(input, &eax, &ebx, &ecx, &edx); + } else { + hvm_cpuid(input, &eax, &ebx, &ecx, &edx); - if ( input == CPUID_LEAF_0x1 ) + if ( input == 0x00000001 ) { /* Mask off reserved bits. */ ecx &= ~VMX_VCPU_CPUID_L1_ECX_RESERVED; - if ( vlapic_hw_disabled(vcpu_vlapic(v)) ) - clear_bit(X86_FEATURE_APIC, &edx); - -#if CONFIG_PAGING_LEVELS >= 3 - if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] ) -#endif - clear_bit(X86_FEATURE_PAE, &edx); - clear_bit(X86_FEATURE_PSE36, &edx); - ebx &= NUM_THREADS_RESET_MASK; /* Unsupportable for virtualised CPUs. */ - ecx &= ~(bitmaskof(X86_FEATURE_VMXE) | - bitmaskof(X86_FEATURE_EST) | - bitmaskof(X86_FEATURE_TM2) | - bitmaskof(X86_FEATURE_CID) | - bitmaskof(X86_FEATURE_MWAIT) ); - - edx &= ~( bitmaskof(X86_FEATURE_HT) | - bitmaskof(X86_FEATURE_ACPI) | - bitmaskof(X86_FEATURE_ACC) ); - } - else if ( ( input == CPUID_LEAF_0x6 ) - || ( input == CPUID_LEAF_0x9 ) - || ( input == CPUID_LEAF_0xA )) - { - eax = ebx = ecx = edx = 0x0; + ecx &= ~(bitmaskof(X86_FEATURE_VMXE) | + bitmaskof(X86_FEATURE_EST) | + bitmaskof(X86_FEATURE_TM2) | + bitmaskof(X86_FEATURE_CID)); + + edx &= ~(bitmaskof(X86_FEATURE_HT) | + bitmaskof(X86_FEATURE_ACPI) | + bitmaskof(X86_FEATURE_ACC)); } - else if ( input == CPUID_LEAF_0x80000001 ) - { -#if CONFIG_PAGING_LEVELS >= 3 - if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] ) -#endif - clear_bit(X86_FEATURE_NX & 31, &edx); -#ifdef __i386__ - clear_bit(X86_FEATURE_LAHF_LM & 31, &ecx); - clear_bit(X86_FEATURE_LM & 31, &edx); - clear_bit(X86_FEATURE_SYSCALL & 31, &edx); -#endif - } + if ( input == 0x00000006 || input == 0x00000009 || input == 0x0000000A ) + eax = ebx = ecx = edx = 0x0; } - regs->eax = (unsigned long) eax; - regs->ebx = (unsigned long) ebx; - regs->ecx = (unsigned long) ecx; - regs->edx = (unsigned long) edx; - - HVM_DBG_LOG(DBG_LEVEL_3, "eip@%lx, input: 0x%lx, " - "output: eax = 0x%08lx, ebx = 0x%08lx, " - "ecx = 0x%08lx, edx = 0x%08lx", - (unsigned long)eip, (unsigned long)input, - (unsigned long)eax, (unsigned long)ebx, - (unsigned long)ecx, (unsigned long)edx); + regs->eax = (unsigned long)eax; + regs->ebx = (unsigned long)ebx; + regs->ecx = (unsigned long)ecx; + regs->edx = (unsigned long)edx; } #define CASE_GET_REG_P(REG, reg) \ @@ -2116,13 +2073,7 @@ static inline int vmx_do_msr_write(struct cpu_user_regs *regs) switch (ecx) { case MSR_IA32_TIME_STAMP_COUNTER: - { - struct periodic_time *pt = - &(v->domain->arch.hvm_domain.pl_time.periodic_tm); - if ( pt->enabled && pt->first_injected - && v->vcpu_id == pt->bind_vcpu ) - pt->first_injected = 0; - } + pt_reset(v); hvm_set_guest_time(v, msr_content); break; case MSR_IA32_SYSENTER_CS: @@ -2451,7 +2402,8 @@ asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs) vmx_do_extint(regs); break; case EXIT_REASON_TRIPLE_FAULT: - goto exit_and_crash; + hvm_triple_fault(); + break; case EXIT_REASON_PENDING_INTERRUPT: /* Disable the interrupt window. */ v->arch.hvm_vcpu.u.vmx.exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; diff --git a/xen/arch/x86/hvm/vpic.c b/xen/arch/x86/hvm/vpic.c index 268bf8234d..edd7e6b4ac 100644 --- a/xen/arch/x86/hvm/vpic.c +++ b/xen/arch/x86/hvm/vpic.c @@ -445,19 +445,3 @@ int cpu_get_pic_interrupt(struct vcpu *v, int *type) *type = APIC_DM_EXTINT; return vector; } - -int is_periodic_irq(struct vcpu *v, int irq, int type) -{ - int vec; - struct periodic_time *pt = &v->domain->arch.hvm_domain.pl_time.periodic_tm; - - if ( pt->irq != 0 ) - return 0; - - if ( type == APIC_DM_EXTINT ) - vec = v->domain->arch.hvm_domain.irq.vpic[0].irq_base; - else - vec = domain_vioapic(v->domain)->redirtbl[0].fields.vector; - - return (irq == vec); -} diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c new file mode 100644 index 0000000000..12e8999582 --- /dev/null +++ b/xen/arch/x86/hvm/vpt.c @@ -0,0 +1,234 @@ +/* + * vpt.c: Virtual Platform Timer + * + * Copyright (c) 2006, Xiaowei Yang, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ +#include <xen/time.h> +#include <asm/hvm/support.h> +#include <asm/hvm/vpt.h> +#include <asm/event.h> + +static __inline__ void missed_ticks(struct periodic_time *pt) +{ + s_time_t missed_ticks; + + missed_ticks = NOW() - pt->scheduled; + if ( missed_ticks > 0 ) + { + missed_ticks = missed_ticks / (s_time_t) pt->period + 1; + if ( missed_ticks > 1000 ) + { + /* TODO: Adjust guest time together */ + pt->pending_intr_nr++; + } + else + { + pt->pending_intr_nr += missed_ticks; + } + pt->scheduled += missed_ticks * pt->period; + } +} + +void pt_freeze_time(struct vcpu *v) +{ + struct list_head *head = &v->arch.hvm_vcpu.tm_list; + struct list_head *list; + struct periodic_time *pt; + + if ( test_bit(_VCPUF_blocked, &v->vcpu_flags) ) + return; + + v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v); + + list_for_each( list, head ) + { + pt = list_entry(list, struct periodic_time, list); + stop_timer(&pt->timer); + } +} + +void pt_thaw_time(struct vcpu *v) +{ + struct list_head *head = &v->arch.hvm_vcpu.tm_list; + struct list_head *list; + struct periodic_time *pt; + + if ( v->arch.hvm_vcpu.guest_time ) + { + hvm_set_guest_time(v, v->arch.hvm_vcpu.guest_time); + v->arch.hvm_vcpu.guest_time = 0; + + list_for_each( list, head ) + { + pt = list_entry(list, struct periodic_time, list); + missed_ticks(pt); + set_timer(&pt->timer, pt->scheduled); + } + } +} + +/* Hook function for the platform periodic time */ +void pt_timer_fn(void *data) +{ + struct periodic_time *pt = data; + + pt->pending_intr_nr++; + pt->scheduled += pt->period; + + missed_ticks(pt); + + if ( !pt->one_shot ) + set_timer(&pt->timer, pt->scheduled); + + vcpu_kick(pt->vcpu); +} + +void pt_update_irq(struct vcpu *v) +{ + struct list_head *head = &v->arch.hvm_vcpu.tm_list; + struct list_head *list; + struct periodic_time *pt; + uint64_t max_lag = -1ULL; + int irq = -1; + + list_for_each( list, head ) + { + pt = list_entry(list, struct periodic_time, list); + if ( !is_isa_irq_masked(v, pt->irq) && pt->pending_intr_nr && + ((pt->last_plt_gtime + pt->period_cycles) < max_lag) ) + { + max_lag = pt->last_plt_gtime + pt->period_cycles; + irq = pt->irq; + } + } + + if ( is_lvtt(v, irq) ) + { + vlapic_set_irq(vcpu_vlapic(v), irq, 0); + } + else if ( irq >= 0 ) + { + hvm_isa_irq_deassert(v->domain, irq); + hvm_isa_irq_assert(v->domain, irq); + } +} + +struct periodic_time *is_pt_irq(struct vcpu *v, int vector, int type) +{ + struct list_head *head = &v->arch.hvm_vcpu.tm_list; + struct list_head *list; + struct periodic_time *pt; + struct RTCState *rtc = &v->domain->arch.hvm_domain.pl_time.vrtc; + int vec; + + list_for_each( list, head ) + { + pt = list_entry(list, struct periodic_time, list); + if ( !pt->pending_intr_nr ) + continue; + + if ( is_lvtt(v, pt->irq) ) + { + if ( pt->irq != vector ) + continue; + return pt; + } + + vec = get_isa_irq_vector(v, pt->irq, type); + + /* RTC irq need special care */ + if ( (vector != vec) || (pt->irq == 8 && !is_rtc_periodic_irq(rtc)) ) + continue; + + return pt; + } + + return NULL; +} + +void pt_intr_post(struct vcpu *v, int vector, int type) +{ + struct periodic_time *pt = is_pt_irq(v, vector, type); + + if ( pt == NULL ) + return; + + pt->pending_intr_nr--; + pt->last_plt_gtime += pt->period_cycles; + hvm_set_guest_time(pt->vcpu, pt->last_plt_gtime); + + if ( pt->cb != NULL ) + pt->cb(pt->vcpu, pt->priv); +} + +/* If pt is enabled, discard pending intr */ +void pt_reset(struct vcpu *v) +{ + struct list_head *head = &v->arch.hvm_vcpu.tm_list; + struct list_head *list; + struct periodic_time *pt; + + list_for_each( list, head ) + { + pt = list_entry(list, struct periodic_time, list); + if ( pt->enabled ) + { + pt->pending_intr_nr = 0; + pt->last_plt_gtime = hvm_get_guest_time(pt->vcpu); + pt->scheduled = NOW() + pt->period; + set_timer(&pt->timer, pt->scheduled); + } + } +} + +void create_periodic_time(struct periodic_time *pt, uint64_t period, + uint8_t irq, char one_shot, time_cb *cb, void *data) +{ + destroy_periodic_time(pt); + + pt->enabled = 1; + if ( period < 900000 ) /* < 0.9 ms */ + { + gdprintk(XENLOG_WARNING, + "HVM_PlatformTime: program too small period %"PRIu64"\n", + period); + period = 900000; /* force to 0.9ms */ + } + pt->period = period; + pt->vcpu = current; + pt->last_plt_gtime = hvm_get_guest_time(pt->vcpu); + pt->irq = irq; + pt->period_cycles = (u64)period * cpu_khz / 1000000L; + pt->one_shot = one_shot; + pt->scheduled = NOW() + period; + pt->cb = cb; + pt->priv = data; + + list_add(&pt->list, ¤t->arch.hvm_vcpu.tm_list); + set_timer(&pt->timer, pt->scheduled); +} + +void destroy_periodic_time(struct periodic_time *pt) +{ + if ( pt->enabled ) + { + pt->enabled = 0; + pt->pending_intr_nr = 0; + list_del(&pt->list); + stop_timer(&pt->timer); + } +} diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c index 1821a56e24..74016d2906 100644 --- a/xen/arch/x86/irq.c +++ b/xen/arch/x86/irq.c @@ -13,6 +13,7 @@ #include <xen/perfc.h> #include <xen/sched.h> #include <xen/keyhandler.h> +#include <xen/compat.h> #include <asm/current.h> #include <asm/smpboot.h> @@ -332,7 +333,7 @@ int pirq_guest_unmask(struct domain *d) irq < NR_IRQS; irq = find_next_bit(d->pirq_mask, NR_IRQS, irq+1) ) { - if ( !test_bit(d->pirq_to_evtchn[irq], s->evtchn_mask) ) + if ( !test_bit(d->pirq_to_evtchn[irq], __shared_info_addr(d, s, evtchn_mask)) ) __pirq_guest_eoi(d, irq); } @@ -624,14 +625,13 @@ static void dump_irqs(unsigned char key) printk("%u(%c%c%c%c)", d->domain_id, (test_bit(d->pirq_to_evtchn[irq], - d->shared_info->evtchn_pending) ? + shared_info_addr(d, evtchn_pending)) ? 'P' : '-'), - (test_bit(d->pirq_to_evtchn[irq]/BITS_PER_LONG, - &d->shared_info->vcpu_info[0]. - evtchn_pending_sel) ? + (test_bit(d->pirq_to_evtchn[irq]/BITS_PER_GUEST_LONG(d), + vcpu_info_addr(d->vcpu[0], evtchn_pending_sel)) ? 'S' : '-'), (test_bit(d->pirq_to_evtchn[irq], - d->shared_info->evtchn_mask) ? + shared_info_addr(d, evtchn_mask)) ? 'M' : '-'), (test_bit(irq, d->pirq_mask) ? 'M' : '-')); diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 9c932e9686..026180c11a 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -106,6 +106,7 @@ #include <asm/ldt.h> #include <asm/x86_emulate.h> #include <asm/e820.h> +#include <asm/hypercall.h> #include <public/memory.h> #define MEM_LOG(_f, _a...) gdprintk(XENLOG_WARNING , _f "\n" , ## _a) @@ -119,20 +120,6 @@ #define PTE_UPDATE_WITH_CMPXCHG #endif -/* - * Both do_mmuext_op() and do_mmu_update(): - * We steal the m.s.b. of the @count parameter to indicate whether this - * invocation of do_mmu_update() is resuming a previously preempted call. - */ -#define MMU_UPDATE_PREEMPTED (~(~0U>>1)) - -static void free_l2_table(struct page_info *page); -static void free_l1_table(struct page_info *page); - -static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long, - unsigned long type); -static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t, unsigned long gl1mfn); - /* Used to defer flushing of memory structures. */ struct percpu_mm_info { #define DOP_FLUSH_TLB (1<<0) /* Flush the local TLB. */ @@ -158,6 +145,15 @@ struct page_info *frame_table; unsigned long max_page; unsigned long total_pages; +#ifdef CONFIG_COMPAT +l2_pgentry_t *compat_idle_pg_table_l2 = NULL; +#define l3_disallow_mask(d) (!IS_COMPAT(d) ? \ + L3_DISALLOW_MASK : \ + COMPAT_L3_DISALLOW_MASK) +#else +#define l3_disallow_mask(d) L3_DISALLOW_MASK +#endif + void __init init_frametable(void) { unsigned long nr_pages, page_step, i, mfn; @@ -365,6 +361,38 @@ void write_ptbase(struct vcpu *v) write_cr3(v->arch.cr3); } +/* Should be called after CR3 is updated. + * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3. + * + * Also updates other state derived from CR3 (vcpu->arch.guest_vtable, + * shadow_vtable, etc). + * + * Uses values found in vcpu->arch.(guest_table and guest_table_user), and + * for HVM guests, arch.monitor_table and hvm's guest CR3. + * + * Update ref counts to shadow tables appropriately. + */ +void update_cr3(struct vcpu *v) +{ + unsigned long cr3_mfn=0; + + if ( shadow_mode_enabled(v->domain) ) + { + shadow_update_cr3(v); + return; + } + +#if CONFIG_PAGING_LEVELS == 4 + if ( !(v->arch.flags & TF_kernel_mode) ) + cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user); + else +#endif + cr3_mfn = pagetable_get_pfn(v->arch.guest_table); + + make_cr3(v, cr3_mfn); +} + + void invalidate_shadow_ldt(struct vcpu *v) { int i; @@ -401,7 +429,7 @@ static int alloc_segdesc_page(struct page_info *page) descs = map_domain_page(page_to_mfn(page)); for ( i = 0; i < 512; i++ ) - if ( unlikely(!check_descriptor(&descs[i])) ) + if ( unlikely(!check_descriptor(page_get_owner(page), &descs[i])) ) goto fail; unmap_domain_page(descs); @@ -629,9 +657,9 @@ get_page_from_l3e( if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) return 1; - if ( unlikely((l3e_get_flags(l3e) & L3_DISALLOW_MASK)) ) + if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) ) { - MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & L3_DISALLOW_MASK); + MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & l3_disallow_mask(d)); return 0; } @@ -668,9 +696,10 @@ get_page_from_l4e( #ifdef __x86_64__ #ifdef USER_MAPPINGS_ARE_GLOBAL -#define adjust_guest_l1e(pl1e) \ +#define adjust_guest_l1e(pl1e, d) \ do { \ - if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) ) \ + if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \ + likely(!IS_COMPAT(d)) ) \ { \ /* _PAGE_GUEST_KERNEL page cannot have the Global bit set. */ \ if ( (l1e_get_flags((pl1e)) & (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL)) \ @@ -684,37 +713,53 @@ get_page_from_l4e( } \ } while ( 0 ) #else -#define adjust_guest_l1e(pl1e) \ +#define adjust_guest_l1e(pl1e, d) \ do { \ - if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) ) \ + if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \ + likely(!IS_COMPAT(d)) ) \ l1e_add_flags((pl1e), _PAGE_USER); \ } while ( 0 ) #endif -#define adjust_guest_l2e(pl2e) \ +#define adjust_guest_l2e(pl2e, d) \ do { \ - if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) ) \ + if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) && \ + likely(!IS_COMPAT(d)) ) \ l2e_add_flags((pl2e), _PAGE_USER); \ } while ( 0 ) -#define adjust_guest_l3e(pl3e) \ +#define adjust_guest_l3e(pl3e, d) \ do { \ if ( likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \ - l3e_add_flags((pl3e), _PAGE_USER); \ + l3e_add_flags((pl3e), likely(!IS_COMPAT(d)) ? \ + _PAGE_USER : \ + _PAGE_USER|_PAGE_RW); \ } while ( 0 ) -#define adjust_guest_l4e(pl4e) \ +#define adjust_guest_l4e(pl4e, d) \ do { \ - if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) ) \ + if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) && \ + likely(!IS_COMPAT(d)) ) \ l4e_add_flags((pl4e), _PAGE_USER); \ } while ( 0 ) #else /* !defined(__x86_64__) */ -#define adjust_guest_l1e(_p) ((void)0) -#define adjust_guest_l2e(_p) ((void)0) -#define adjust_guest_l3e(_p) ((void)0) +#define adjust_guest_l1e(_p, _d) ((void)(_d)) +#define adjust_guest_l2e(_p, _d) ((void)(_d)) +#define adjust_guest_l3e(_p, _d) ((void)(_d)) + +#endif +#ifdef CONFIG_COMPAT +#define unadjust_guest_l3e(pl3e, d) \ + do { \ + if ( unlikely(IS_COMPAT(d)) && \ + likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \ + l3e_remove_flags((pl3e), _PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); \ + } while ( 0 ) +#else +#define unadjust_guest_l3e(_p, _d) ((void)(_d)) #endif void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d) @@ -818,7 +863,7 @@ static int alloc_l1_table(struct page_info *page) unlikely(!get_page_from_l1e(pl1e[i], d)) ) goto fail; - adjust_guest_l1e(pl1e[i]); + adjust_guest_l1e(pl1e[i], d); } unmap_domain_page(pl1e); @@ -834,13 +879,20 @@ static int alloc_l1_table(struct page_info *page) return 0; } -#ifdef CONFIG_X86_PAE -static int create_pae_xen_mappings(l3_pgentry_t *pl3e) +#if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT) +static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e) { struct page_info *page; - l2_pgentry_t *pl2e, l2e; + l2_pgentry_t *pl2e; l3_pgentry_t l3e3; +#ifndef CONFIG_COMPAT + l2_pgentry_t l2e; int i; +#else + + if ( !IS_COMPAT(d) ) + return 1; +#endif pl3e = (l3_pgentry_t *)((unsigned long)pl3e & PAGE_MASK); @@ -873,6 +925,7 @@ static int create_pae_xen_mappings(l3_pgentry_t *pl3e) /* Xen private mappings. */ pl2e = map_domain_page(l3e_get_pfn(l3e3)); +#ifndef CONFIG_COMPAT memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)], &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT], L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t)); @@ -890,11 +943,20 @@ static int create_pae_xen_mappings(l3_pgentry_t *pl3e) l2e = l2e_from_pfn(l3e_get_pfn(pl3e[i]), __PAGE_HYPERVISOR); l2e_write(&pl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i], l2e); } +#else + memcpy(&pl2e[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)], + &compat_idle_pg_table_l2[l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)], + COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*pl2e)); +#endif unmap_domain_page(pl2e); return 1; } +#else +# define create_pae_xen_mappings(d, pl3e) (1) +#endif +#ifdef CONFIG_X86_PAE /* Flush a pgdir update into low-memory caches. */ static void pae_flush_pgd( unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e) @@ -929,12 +991,8 @@ static void pae_flush_pgd( flush_tlb_mask(d->domain_dirty_cpumask); } - -#elif CONFIG_X86_64 -# define create_pae_xen_mappings(pl3e) (1) -# define pae_flush_pgd(mfn, idx, nl3e) ((void)0) #else -# define create_pae_xen_mappings(pl3e) (1) +# define pae_flush_pgd(mfn, idx, nl3e) ((void)0) #endif static int alloc_l2_table(struct page_info *page, unsigned long type) @@ -948,11 +1006,11 @@ static int alloc_l2_table(struct page_info *page, unsigned long type) for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) { - if ( is_guest_l2_slot(type, i) && + if ( is_guest_l2_slot(d, type, i) && unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) ) goto fail; - adjust_guest_l2e(pl2e[i]); + adjust_guest_l2e(pl2e[i], d); } #if CONFIG_PAGING_LEVELS == 2 @@ -975,7 +1033,7 @@ static int alloc_l2_table(struct page_info *page, unsigned long type) fail: MEM_LOG("Failure in alloc_l2_table: entry %d", i); while ( i-- > 0 ) - if ( is_guest_l2_slot(type, i) ) + if ( is_guest_l2_slot(d, type, i) ) put_page_from_l2e(pl2e[i], pfn); unmap_domain_page(pl2e); @@ -1007,13 +1065,24 @@ static int alloc_l3_table(struct page_info *page) #endif pl3e = map_domain_page(pfn); + + /* + * PAE guests allocate full pages, but aren't required to initialize + * more than the first four entries; when running in compatibility + * mode, however, the full page is visible to the MMU, and hence all + * 512 entries must be valid/verified, which is most easily achieved + * by clearing them out. + */ + if ( IS_COMPAT(d) ) + memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e)); + for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) { -#ifdef CONFIG_X86_PAE - if ( i == 3 ) +#if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT) + if ( (CONFIG_PAGING_LEVELS < 4 || IS_COMPAT(d)) && i == 3 ) { if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) || - (l3e_get_flags(pl3e[i]) & L3_DISALLOW_MASK) || + (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) || !get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]), PGT_l2_page_table | PGT_pae_xen_l2, @@ -1026,10 +1095,10 @@ static int alloc_l3_table(struct page_info *page) unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) ) goto fail; - adjust_guest_l3e(pl3e[i]); + adjust_guest_l3e(pl3e[i], d); } - if ( !create_pae_xen_mappings(pl3e) ) + if ( !create_pae_xen_mappings(d, pl3e) ) goto fail; unmap_domain_page(pl3e); @@ -1062,7 +1131,7 @@ static int alloc_l4_table(struct page_info *page) unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) ) goto fail; - adjust_guest_l4e(pl4e[i]); + adjust_guest_l4e(pl4e[i], d); } /* Xen private mappings. */ @@ -1072,9 +1141,12 @@ static int alloc_l4_table(struct page_info *page) pl4e[l4_table_offset(LINEAR_PT_VIRT_START)] = l4e_from_pfn(pfn, __PAGE_HYPERVISOR); pl4e[l4_table_offset(PERDOMAIN_VIRT_START)] = - l4e_from_page( - virt_to_page(page_get_owner(page)->arch.mm_perdomain_l3), - __PAGE_HYPERVISOR); + l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3), + __PAGE_HYPERVISOR); + if ( IS_COMPAT(d) ) + pl4e[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] = + l4e_from_page(virt_to_page(d->arch.mm_arg_xlat_l3), + __PAGE_HYPERVISOR); return 1; @@ -1110,6 +1182,9 @@ static void free_l1_table(struct page_info *page) static void free_l2_table(struct page_info *page) { +#ifdef CONFIG_COMPAT + struct domain *d = page_get_owner(page); +#endif unsigned long pfn = page_to_mfn(page); l2_pgentry_t *pl2e; int i; @@ -1117,7 +1192,7 @@ static void free_l2_table(struct page_info *page) pl2e = map_domain_page(pfn); for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) - if ( is_guest_l2_slot(page->u.inuse.type_info, i) ) + if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) ) put_page_from_l2e(pl2e[i], pfn); unmap_domain_page(pl2e); @@ -1130,6 +1205,7 @@ static void free_l2_table(struct page_info *page) static void free_l3_table(struct page_info *page) { + struct domain *d = page_get_owner(page); unsigned long pfn = page_to_mfn(page); l3_pgentry_t *pl3e; int i; @@ -1138,7 +1214,10 @@ static void free_l3_table(struct page_info *page) for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) if ( is_guest_l3_slot(i) ) + { put_page_from_l3e(pl3e[i], pfn); + unadjust_guest_l3e(pl3e[i], d); + } unmap_domain_page(pl3e); } @@ -1160,53 +1239,57 @@ static void free_l4_table(struct page_info *page) #endif -static inline int update_l1e(l1_pgentry_t *pl1e, - l1_pgentry_t ol1e, - l1_pgentry_t nl1e, - unsigned long gl1mfn, - struct vcpu *v) + +/* How to write an entry to the guest pagetables. + * Returns 0 for failure (pointer not valid), 1 for success. */ +static inline int update_intpte(intpte_t *p, + intpte_t old, + intpte_t new, + unsigned long mfn, + struct vcpu *v) { int rv = 1; - if ( unlikely(shadow_mode_enabled(v->domain)) ) - shadow_lock(v->domain); #ifndef PTE_UPDATE_WITH_CMPXCHG - rv = (!__copy_to_user(pl1e, &nl1e, sizeof(nl1e))); + if ( unlikely(shadow_mode_enabled(v->domain)) ) + rv = shadow_write_guest_entry(v, p, new, _mfn(mfn)); + else + rv = (!__copy_to_user(p, &new, sizeof(new))); #else { - intpte_t o = l1e_get_intpte(ol1e); - intpte_t n = l1e_get_intpte(nl1e); - + intpte_t t = old; for ( ; ; ) { - if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ) + if ( unlikely(shadow_mode_enabled(v->domain)) ) + rv = shadow_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn)); + else + rv = (!cmpxchg_user(p, t, new)); + + if ( unlikely(rv == 0) ) { MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte - ": saw %" PRIpte, - l1e_get_intpte(ol1e), - l1e_get_intpte(nl1e), - o); - rv = 0; + ": saw %" PRIpte, old, new, t); break; } - if ( o == l1e_get_intpte(ol1e) ) + if ( t == old ) break; /* Allowed to change in Accessed/Dirty flags only. */ - BUG_ON((o ^ l1e_get_intpte(ol1e)) & - ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY)); - ol1e = l1e_from_intpte(o); + BUG_ON((t ^ old) & ~(intpte_t)(_PAGE_ACCESSED|_PAGE_DIRTY)); + + old = t; } } #endif - if ( unlikely(shadow_mode_enabled(v->domain)) && rv ) - { - shadow_validate_guest_entry(v, _mfn(gl1mfn), pl1e); - shadow_unlock(v->domain); - } return rv; } +/* Macro that wraps the appropriate type-changes around update_intpte(). + * Arguments are: type, ptr, old, new, mfn, vcpu */ +#define UPDATE_ENTRY(_t,_p,_o,_n,_m,_v) \ + update_intpte((intpte_t *)(_p), \ + _t ## e_get_intpte(_o), _t ## e_get_intpte(_n), \ + (_m), (_v)) /* Update the L1 entry at pl1e to new value nl1e. */ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, @@ -1219,7 +1302,7 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, return 0; if ( unlikely(shadow_mode_refcounts(d)) ) - return update_l1e(pl1e, ol1e, nl1e, gl1mfn, current); + return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current); if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) { @@ -1234,16 +1317,16 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, return 0; } - adjust_guest_l1e(nl1e); + adjust_guest_l1e(nl1e, d); /* Fast path for identical mapping, r/w and presence. */ if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) ) - return update_l1e(pl1e, ol1e, nl1e, gl1mfn, current); + return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current); if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) ) return 0; - if ( unlikely(!update_l1e(pl1e, ol1e, nl1e, gl1mfn, current)) ) + if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) ) { put_page_from_l1e(nl1e, d); return 0; @@ -1251,7 +1334,7 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, } else { - if ( unlikely(!update_l1e(pl1e, ol1e, nl1e, gl1mfn, current)) ) + if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) ) return 0; } @@ -1259,36 +1342,6 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, return 1; } -#ifndef PTE_UPDATE_WITH_CMPXCHG -#define _UPDATE_ENTRY(_t,_p,_o,_n) ({ (*(_p) = (_n)); 1; }) -#else -#define _UPDATE_ENTRY(_t,_p,_o,_n) ({ \ - for ( ; ; ) \ - { \ - intpte_t __o = cmpxchg((intpte_t *)(_p), \ - _t ## e_get_intpte(_o), \ - _t ## e_get_intpte(_n)); \ - if ( __o == _t ## e_get_intpte(_o) ) \ - break; \ - /* Allowed to change in Accessed/Dirty flags only. */ \ - BUG_ON((__o ^ _t ## e_get_intpte(_o)) & \ - ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY)); \ - _o = _t ## e_from_intpte(__o); \ - } \ - 1; }) -#endif -#define UPDATE_ENTRY(_t,_p,_o,_n,_m) ({ \ - int rv; \ - if ( unlikely(shadow_mode_enabled(current->domain)) ) \ - shadow_lock(current->domain); \ - rv = _UPDATE_ENTRY(_t, _p, _o, _n); \ - if ( unlikely(shadow_mode_enabled(current->domain)) ) \ - { \ - shadow_validate_guest_entry(current, _mfn(_m), (_p)); \ - shadow_unlock(current->domain); \ - } \ - rv; \ -}) /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */ static int mod_l2_entry(l2_pgentry_t *pl2e, @@ -1297,8 +1350,9 @@ static int mod_l2_entry(l2_pgentry_t *pl2e, unsigned long type) { l2_pgentry_t ol2e; + struct domain *d = current->domain; - if ( unlikely(!is_guest_l2_slot(type,pgentry_ptr_to_slot(pl2e))) ) + if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) ) { MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e); return 0; @@ -1316,22 +1370,22 @@ static int mod_l2_entry(l2_pgentry_t *pl2e, return 0; } - adjust_guest_l2e(nl2e); + adjust_guest_l2e(nl2e, d); /* Fast path for identical mapping and presence. */ if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT)) - return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn); + return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current); if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain)) ) return 0; - if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) ) + if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) ) { put_page_from_l2e(nl2e, pfn); return 0; } } - else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) ) + else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) ) { return 0; } @@ -1348,6 +1402,7 @@ static int mod_l3_entry(l3_pgentry_t *pl3e, unsigned long pfn) { l3_pgentry_t ol3e; + struct domain *d = current->domain; int okay; if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) ) @@ -1356,12 +1411,13 @@ static int mod_l3_entry(l3_pgentry_t *pl3e, return 0; } -#ifdef CONFIG_X86_PAE +#if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT) /* * Disallow updates to final L3 slot. It contains Xen mappings, and it * would be a pain to ensure they remain continuously valid throughout. */ - if ( pgentry_ptr_to_slot(pl3e) >= 3 ) + if ( (CONFIG_PAGING_LEVELS < 4 || IS_COMPAT(d)) && + pgentry_ptr_to_slot(pl3e) >= 3 ) return 0; #endif @@ -1370,34 +1426,34 @@ static int mod_l3_entry(l3_pgentry_t *pl3e, if ( l3e_get_flags(nl3e) & _PAGE_PRESENT ) { - if ( unlikely(l3e_get_flags(nl3e) & L3_DISALLOW_MASK) ) + if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) ) { MEM_LOG("Bad L3 flags %x", - l3e_get_flags(nl3e) & L3_DISALLOW_MASK); + l3e_get_flags(nl3e) & l3_disallow_mask(d)); return 0; } - adjust_guest_l3e(nl3e); + adjust_guest_l3e(nl3e, d); /* Fast path for identical mapping and presence. */ if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT)) - return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn); + return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current); if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain)) ) return 0; - if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) ) + if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) ) { put_page_from_l3e(nl3e, pfn); return 0; } } - else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) ) + else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) ) { return 0; } - okay = create_pae_xen_mappings(pl3e); + okay = create_pae_xen_mappings(d, pl3e); BUG_ON(!okay); pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e); @@ -1435,22 +1491,22 @@ static int mod_l4_entry(l4_pgentry_t *pl4e, return 0; } - adjust_guest_l4e(nl4e); + adjust_guest_l4e(nl4e, current->domain); /* Fast path for identical mapping and presence. */ if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT)) - return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn); + return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current); if ( unlikely(!get_page_from_l4e(nl4e, pfn, current->domain)) ) return 0; - if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) ) + if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) ) { put_page_from_l4e(nl4e, pfn); return 0; } } - else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) ) + else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) ) { return 0; } @@ -1706,6 +1762,33 @@ int new_guest_cr3(unsigned long mfn) if ( is_hvm_domain(d) && !hvm_paging_enabled(v) ) return 0; +#ifdef CONFIG_COMPAT + if ( IS_COMPAT(d) ) + { + l4_pgentry_t l4e = l4e_from_pfn(mfn, _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED); + + if ( shadow_mode_refcounts(d) ) + { + okay = get_page_from_pagenr(mfn, d); + old_base_mfn = l4e_get_pfn(l4e); + if ( okay && old_base_mfn ) + put_page(mfn_to_page(old_base_mfn)); + } + else + okay = mod_l4_entry(__va(pagetable_get_paddr(v->arch.guest_table)), + l4e, 0); + if ( unlikely(!okay) ) + { + MEM_LOG("Error while installing new compat baseptr %lx", mfn); + return 0; + } + + invalidate_shadow_ldt(v); + write_ptbase(v); + + return 1; + } +#endif if ( shadow_mode_refcounts(d) ) { okay = get_page_from_pagenr(mfn, d); @@ -1944,6 +2027,8 @@ int do_mmuext_op( goto pin_page; case MMUEXT_PIN_L4_TABLE: + if ( IS_COMPAT(FOREIGNDOM) ) + break; type = PGT_l4_page_table; pin_page: @@ -2007,7 +2092,11 @@ int do_mmuext_op( #ifdef __x86_64__ case MMUEXT_NEW_USER_BASEPTR: - okay = 1; + if ( IS_COMPAT(FOREIGNDOM) ) + { + okay = 0; + break; + } if (likely(mfn != 0)) { if ( shadow_mode_refcounts(d) ) @@ -2259,8 +2348,7 @@ int do_mmu_update( case PGT_l2_page_table: { l2_pgentry_t l2e = l2e_from_intpte(req.val); - okay = mod_l2_entry( - (l2_pgentry_t *)va, l2e, mfn, type_info); + okay = mod_l2_entry(va, l2e, mfn, type_info); } break; #if CONFIG_PAGING_LEVELS >= 3 @@ -2273,11 +2361,12 @@ int do_mmu_update( #endif #if CONFIG_PAGING_LEVELS >= 4 case PGT_l4_page_table: - { - l4_pgentry_t l4e = l4e_from_intpte(req.val); - okay = mod_l4_entry(va, l4e, mfn); - } - break; + if ( !IS_COMPAT(FOREIGNDOM) ) + { + l4_pgentry_t l4e = l4e_from_intpte(req.val); + okay = mod_l4_entry(va, l4e, mfn); + } + break; #endif } @@ -2292,15 +2381,11 @@ int do_mmu_update( break; if ( unlikely(shadow_mode_enabled(d)) ) - shadow_lock(d); - - *(intpte_t *)va = req.val; - okay = 1; - - if ( unlikely(shadow_mode_enabled(d)) ) + okay = shadow_write_guest_entry(v, va, req.val, _mfn(mfn)); + else { - shadow_validate_guest_entry(v, _mfn(mfn), va); - shadow_unlock(d); + *(intpte_t *)va = req.val; + okay = 1; } put_page_type(page); @@ -2385,7 +2470,7 @@ static int create_grant_pte_mapping( ASSERT(spin_is_locked(&d->big_lock)); - adjust_guest_l1e(nl1e); + adjust_guest_l1e(nl1e, d); gmfn = pte_addr >> PAGE_SHIFT; mfn = gmfn_to_mfn(d, gmfn); @@ -2409,7 +2494,7 @@ static int create_grant_pte_mapping( } ol1e = *(l1_pgentry_t *)va; - if ( !update_l1e(va, ol1e, nl1e, mfn, v) ) + if ( !UPDATE_ENTRY(l1, va, ol1e, nl1e, mfn, v) ) { put_page_type(page); rc = GNTST_general_error; @@ -2477,7 +2562,7 @@ static int destroy_grant_pte_mapping( } /* Delete pagetable entry. */ - if ( unlikely(!update_l1e( + if ( unlikely(!UPDATE_ENTRY(l1, (l1_pgentry_t *)va, ol1e, l1e_empty(), mfn, d->vcpu[0] /* Change if we go to per-vcpu shadows. */)) ) { @@ -2506,7 +2591,7 @@ static int create_grant_va_mapping( ASSERT(spin_is_locked(&d->big_lock)); - adjust_guest_l1e(nl1e); + adjust_guest_l1e(nl1e, d); pl1e = guest_map_l1e(v, va, &gl1mfn); if ( !pl1e ) @@ -2515,7 +2600,7 @@ static int create_grant_va_mapping( return GNTST_general_error; } ol1e = *pl1e; - okay = update_l1e(pl1e, ol1e, nl1e, gl1mfn, v); + okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v); guest_unmap_l1e(v, pl1e); pl1e = NULL; @@ -2553,7 +2638,7 @@ static int destroy_grant_va_mapping( } /* Delete pagetable entry. */ - if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty(), gl1mfn, v)) ) + if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(), gl1mfn, v)) ) { MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e); rc = GNTST_general_error; @@ -2674,7 +2759,9 @@ int do_update_va_mapping(unsigned long va, u64 val64, flush_tlb_mask(d->domain_dirty_cpumask); break; default: - if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) ) + if ( unlikely(!IS_COMPAT(d) ? + get_user(vmask, (unsigned long *)bmap_ptr) : + get_user(vmask, (unsigned int *)bmap_ptr)) ) rc = -EFAULT; pmask = vcpumask_to_pcpumask(d, vmask); flush_tlb_mask(pmask); @@ -2833,7 +2920,7 @@ long do_update_descriptor(u64 pa, u64 desc) mfn = gmfn_to_mfn(dom, gmfn); if ( (((unsigned int)pa % sizeof(struct desc_struct)) != 0) || !mfn_valid(mfn) || - !check_descriptor(&d) ) + !check_descriptor(dom, &d) ) { UNLOCK_BIGLOCK(dom); return -EINVAL; @@ -2952,16 +3039,6 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) UNLOCK_BIGLOCK(d); - /* If we're doing FAST_FAULT_PATH, then shadow mode may have - cached the fact that this is an mmio region in the shadow - page tables. Blow the tables away to remove the cache. - This is pretty heavy handed, but this is a rare operation - (it might happen a dozen times during boot and then never - again), so it doesn't matter too much. */ - shadow_lock(d); - shadow_blow_tables(d); - shadow_unlock(d); - put_domain(d); break; @@ -3159,7 +3236,7 @@ static int ptwr_emulated_update( nl1e = l1e_from_intpte(val); if ( unlikely(!get_page_from_l1e(gl1e_to_ml1e(d, nl1e), d)) ) { - if ( (CONFIG_PAGING_LEVELS == 3) && + if ( (CONFIG_PAGING_LEVELS == 3 || IS_COMPAT(d)) && (bytes == 4) && !do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) ) @@ -3181,34 +3258,37 @@ static int ptwr_emulated_update( } } - adjust_guest_l1e(nl1e); + adjust_guest_l1e(nl1e, d); /* Checked successfully: do the update (write or cmpxchg). */ pl1e = map_domain_page(page_to_mfn(page)); pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK)); if ( do_cmpxchg ) { - if ( shadow_mode_enabled(d) ) - shadow_lock(d); + int okay; ol1e = l1e_from_intpte(old); - if ( cmpxchg((intpte_t *)pl1e, old, val) != old ) + + if ( shadow_mode_enabled(d) ) + { + intpte_t t = old; + okay = shadow_cmpxchg_guest_entry(v, (intpte_t *) pl1e, + &t, val, _mfn(mfn)); + okay = (okay && t == old); + } + else + okay = (cmpxchg((intpte_t *)pl1e, old, val) == old); + + if ( !okay ) { - if ( shadow_mode_enabled(d) ) - shadow_unlock(d); unmap_domain_page(pl1e); put_page_from_l1e(gl1e_to_ml1e(d, nl1e), d); return X86EMUL_CMPXCHG_FAILED; } - if ( unlikely(shadow_mode_enabled(d)) ) - { - shadow_validate_guest_entry(v, _mfn(page_to_mfn(page)), pl1e); - shadow_unlock(d); - } } else { ol1e = *pl1e; - if ( !update_l1e(pl1e, ol1e, nl1e, page_to_mfn(page), v) ) + if ( !UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, page_to_mfn(page), v) ) BUG(); } @@ -3299,10 +3379,10 @@ int ptwr_do_page_fault(struct vcpu *v, unsigned long addr, goto bail; ptwr_ctxt.ctxt.regs = guest_cpu_user_regs(); - ptwr_ctxt.ctxt.mode = X86EMUL_MODE_HOST; - ptwr_ctxt.cr2 = addr; - ptwr_ctxt.pte = pte; - if ( x86_emulate_memop(&ptwr_ctxt.ctxt, &ptwr_emulate_ops) ) + ptwr_ctxt.ctxt.address_bytes = IS_COMPAT(d) ? 4 : sizeof(long); + ptwr_ctxt.cr2 = addr; + ptwr_ctxt.pte = pte; + if ( x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops) ) goto bail; UNLOCK_BIGLOCK(d); diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c index 6dedae37a8..e1a2e44447 100644 --- a/xen/arch/x86/mm/shadow/common.c +++ b/xen/arch/x86/mm/shadow/common.c @@ -36,8 +36,24 @@ #include <asm/current.h> #include <asm/flushtlb.h> #include <asm/shadow.h> +#include <asm/shared.h> #include "private.h" + +/* Set up the shadow-specific parts of a domain struct at start of day. + * Called for every domain from arch_domain_create() */ +void shadow_domain_init(struct domain *d) +{ + int i; + shadow_lock_init(d); + for ( i = 0; i <= SHADOW_MAX_ORDER; i++ ) + INIT_LIST_HEAD(&d->arch.shadow.freelists[i]); + INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist); + INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse); + INIT_LIST_HEAD(&d->arch.shadow.pinned_shadows); +} + + #if SHADOW_AUDIT int shadow_audit_enable = 0; @@ -94,7 +110,7 @@ static int hvm_translate_linear_addr( unsigned long limit, addr = offset; uint32_t last_byte; - if ( sh_ctxt->ctxt.mode != X86EMUL_MODE_PROT64 ) + if ( sh_ctxt->ctxt.address_bytes != 8 ) { /* * COMPATIBILITY MODE: Apply segment checks and add base. @@ -391,7 +407,7 @@ struct x86_emulate_ops *shadow_init_emulation( if ( !is_hvm_vcpu(v) ) { - sh_ctxt->ctxt.mode = X86EMUL_MODE_HOST; + sh_ctxt->ctxt.address_bytes = sizeof(long); return &pv_shadow_emulator_ops; } @@ -401,13 +417,11 @@ struct x86_emulate_ops *shadow_init_emulation( /* Work out the emulation mode. */ if ( hvm_long_mode_enabled(v) ) - sh_ctxt->ctxt.mode = creg->attr.fields.l ? - X86EMUL_MODE_PROT64 : X86EMUL_MODE_PROT32; + sh_ctxt->ctxt.address_bytes = creg->attr.fields.l ? 8 : 4; else if ( regs->eflags & X86_EFLAGS_VM ) - sh_ctxt->ctxt.mode = X86EMUL_MODE_REAL; + sh_ctxt->ctxt.address_bytes = 2; else - sh_ctxt->ctxt.mode = creg->attr.fields.db ? - X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; + sh_ctxt->ctxt.address_bytes = creg->attr.fields.db ? 4 : 2; /* Attempt to prefetch whole instruction. */ sh_ctxt->insn_buf_bytes = @@ -434,7 +448,7 @@ void shadow_promote(struct vcpu *v, mfn_t gmfn, unsigned int type) ASSERT(mfn_valid(gmfn)); /* We should never try to promote a gmfn that has writeable mappings */ - ASSERT(shadow_remove_write_access(v, gmfn, 0, 0) == 0); + ASSERT(sh_remove_write_access(v, gmfn, 0, 0) == 0); /* Is the page already shadowed? */ if ( !test_and_set_bit(_PGC_page_table, &page->count_info) ) @@ -466,8 +480,7 @@ void shadow_demote(struct vcpu *v, mfn_t gmfn, u32 type) * Returns a bitmask of SHADOW_SET_* flags. */ int -__shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, - void *entry, u32 size) +sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry, u32 size) { int result = 0; struct page_info *page = mfn_to_page(gmfn); @@ -546,22 +559,9 @@ __shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, } -int -shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry) -/* This is the entry point from hypercalls. It returns a bitmask of all the - * results of shadow_set_l*e() calls, so the caller knows to do TLB flushes. */ -{ - int rc; - - ASSERT(shadow_lock_is_acquired(v->domain)); - rc = __shadow_validate_guest_entry(v, gmfn, entry, sizeof(l1_pgentry_t)); - shadow_audit_tables(v); - return rc; -} - void -shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, - void *entry, u32 size) +sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, + void *entry, u32 size) /* This is the entry point for emulated writes to pagetables in HVM guests and * PV translated guests. */ @@ -569,8 +569,8 @@ shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, struct domain *d = v->domain; int rc; - ASSERT(shadow_lock_is_acquired(v->domain)); - rc = __shadow_validate_guest_entry(v, gmfn, entry, size); + ASSERT(shadow_locked_by_me(v->domain)); + rc = sh_validate_guest_entry(v, gmfn, entry, size); if ( rc & SHADOW_SET_FLUSH ) /* Need to flush TLBs to pick up shadow PT changes */ flush_tlb_mask(d->domain_dirty_cpumask); @@ -585,6 +585,38 @@ shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, } } +int shadow_write_guest_entry(struct vcpu *v, intpte_t *p, + intpte_t new, mfn_t gmfn) +/* Write a new value into the guest pagetable, and update the shadows + * appropriately. Returns 0 if we page-faulted, 1 for success. */ +{ + int failed; + shadow_lock(v->domain); + failed = __copy_to_user(p, &new, sizeof(new)); + if ( failed != sizeof(new) ) + sh_validate_guest_entry(v, gmfn, p, sizeof(new)); + shadow_unlock(v->domain); + return (failed == 0); +} + +int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p, + intpte_t *old, intpte_t new, mfn_t gmfn) +/* Cmpxchg a new value into the guest pagetable, and update the shadows + * appropriately. Returns 0 if we page-faulted, 1 if not. + * N.B. caller should check the value of "old" to see if the + * cmpxchg itself was successful. */ +{ + int failed; + intpte_t t = *old; + shadow_lock(v->domain); + failed = cmpxchg_user(p, t, new); + if ( t == *old ) + sh_validate_guest_entry(v, gmfn, p, sizeof(new)); + *old = t; + shadow_unlock(v->domain); + return (failed == 0); +} + /**************************************************************************/ /* Memory management for shadow pages. */ @@ -791,7 +823,7 @@ void shadow_prealloc(struct domain *d, unsigned int order) /* Deliberately free all the memory we can: this will tear down all of * this domain's shadows */ -void shadow_blow_tables(struct domain *d) +static void shadow_blow_tables(struct domain *d) { struct list_head *l, *t; struct shadow_page_info *sp; @@ -858,7 +890,7 @@ mfn_t shadow_alloc(struct domain *d, void *p; int i; - ASSERT(shadow_lock_is_acquired(d)); + ASSERT(shadow_locked_by_me(d)); ASSERT(order <= SHADOW_MAX_ORDER); ASSERT(shadow_type != SH_type_none); perfc_incrc(shadow_alloc); @@ -928,7 +960,7 @@ void shadow_free(struct domain *d, mfn_t smfn) unsigned long mask; int i; - ASSERT(shadow_lock_is_acquired(d)); + ASSERT(shadow_locked_by_me(d)); perfc_incrc(shadow_free); shadow_type = sp->type; @@ -989,7 +1021,7 @@ void shadow_free(struct domain *d, mfn_t smfn) * Also, we only ever allocate a max-order chunk, so as to preserve * the invariant that shadow_prealloc() always works. * Returns 0 iff it can't get a chunk (the caller should then - * free up some pages in domheap and call set_sh_allocation); + * free up some pages in domheap and call sh_set_allocation); * returns non-zero on success. */ static int @@ -997,7 +1029,7 @@ shadow_alloc_p2m_pages(struct domain *d) { struct page_info *pg; u32 i; - ASSERT(shadow_lock_is_acquired(d)); + ASSERT(shadow_locked_by_me(d)); if ( d->arch.shadow.total_pages < (shadow_min_acceptable_pages(d) + (1<<SHADOW_MAX_ORDER)) ) @@ -1143,20 +1175,20 @@ p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table, p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p2m_entry); /* Also, any vcpus running on shadows of the p2m need to * reload their CR3s so the change propagates to the shadow */ - ASSERT(shadow_lock_is_acquired(d)); + ASSERT(shadow_locked_by_me(d)); for_each_vcpu(d, v) { if ( pagetable_get_pfn(v->arch.guest_table) == pagetable_get_pfn(d->arch.phys_table) && v->arch.shadow.mode != NULL ) - v->arch.shadow.mode->update_cr3(v); + v->arch.shadow.mode->update_cr3(v, 0); } } #endif /* The P2M can be shadowed: keep the shadows synced */ if ( d->vcpu[0] != NULL ) - (void)__shadow_validate_guest_entry(d->vcpu[0], *table_mfn, - p2m_entry, sizeof *p2m_entry); + (void)sh_validate_guest_entry(d->vcpu[0], *table_mfn, + p2m_entry, sizeof *p2m_entry); } *table_mfn = _mfn(l1e_get_pfn(*p2m_entry)); next = sh_map_domain_page(*table_mfn); @@ -1216,8 +1248,8 @@ shadow_set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) /* The P2M can be shadowed: keep the shadows synced */ if ( d->vcpu[0] != NULL ) - (void)__shadow_validate_guest_entry( - d->vcpu[0], table_mfn, p2m_entry, sizeof(*p2m_entry)); + (void)sh_validate_guest_entry(d->vcpu[0], table_mfn, + p2m_entry, sizeof(*p2m_entry)); /* Success */ rv = 1; @@ -1427,15 +1459,15 @@ static void shadow_p2m_teardown(struct domain *d) * Input will be rounded up to at least shadow_min_acceptable_pages(), * plus space for the p2m table. * Returns 0 for success, non-zero for failure. */ -static unsigned int set_sh_allocation(struct domain *d, - unsigned int pages, - int *preempted) +static unsigned int sh_set_allocation(struct domain *d, + unsigned int pages, + int *preempted) { struct shadow_page_info *sp; unsigned int lower_bound; int j; - ASSERT(shadow_lock_is_acquired(d)); + ASSERT(shadow_locked_by_me(d)); /* Don't allocate less than the minimum acceptable, plus one page per * megabyte of RAM (for the p2m table) */ @@ -1499,20 +1531,12 @@ static unsigned int set_sh_allocation(struct domain *d, return 0; } -unsigned int shadow_set_allocation(struct domain *d, - unsigned int megabytes, - int *preempted) -/* Hypercall interface to set the shadow memory allocation */ +/* Return the size of the shadow pool, rounded up to the nearest MB */ +static unsigned int shadow_get_allocation(struct domain *d) { - unsigned int rv; - shadow_lock(d); - rv = set_sh_allocation(d, megabytes << (20 - PAGE_SHIFT), preempted); - SHADOW_PRINTK("dom %u allocation now %u pages (%u MB)\n", - d->domain_id, - d->arch.shadow.total_pages, - shadow_get_allocation(d)); - shadow_unlock(d); - return rv; + unsigned int pg = d->arch.shadow.total_pages; + return ((pg >> (20 - PAGE_SHIFT)) + + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0)); } /**************************************************************************/ @@ -1614,7 +1638,7 @@ static int shadow_hash_alloc(struct domain *d) { struct shadow_page_info **table; - ASSERT(shadow_lock_is_acquired(d)); + ASSERT(shadow_locked_by_me(d)); ASSERT(!d->arch.shadow.hash_table); table = xmalloc_array(struct shadow_page_info *, SHADOW_HASH_BUCKETS); @@ -1629,7 +1653,7 @@ static int shadow_hash_alloc(struct domain *d) * This function does not care whether the table is populated. */ static void shadow_hash_teardown(struct domain *d) { - ASSERT(shadow_lock_is_acquired(d)); + ASSERT(shadow_locked_by_me(d)); ASSERT(d->arch.shadow.hash_table); xfree(d->arch.shadow.hash_table); @@ -1645,7 +1669,7 @@ mfn_t shadow_hash_lookup(struct vcpu *v, unsigned long n, unsigned int t) struct shadow_page_info *sp, *prev; key_t key; - ASSERT(shadow_lock_is_acquired(d)); + ASSERT(shadow_locked_by_me(d)); ASSERT(d->arch.shadow.hash_table); ASSERT(t); @@ -1699,7 +1723,7 @@ void shadow_hash_insert(struct vcpu *v, unsigned long n, unsigned int t, struct shadow_page_info *sp; key_t key; - ASSERT(shadow_lock_is_acquired(d)); + ASSERT(shadow_locked_by_me(d)); ASSERT(d->arch.shadow.hash_table); ASSERT(t); @@ -1725,7 +1749,7 @@ void shadow_hash_delete(struct vcpu *v, unsigned long n, unsigned int t, struct shadow_page_info *sp, *x; key_t key; - ASSERT(shadow_lock_is_acquired(d)); + ASSERT(shadow_locked_by_me(d)); ASSERT(d->arch.shadow.hash_table); ASSERT(t); @@ -1780,7 +1804,7 @@ static void hash_foreach(struct vcpu *v, struct shadow_page_info *x; /* Say we're here, to stop hash-lookups reordering the chains */ - ASSERT(shadow_lock_is_acquired(d)); + ASSERT(shadow_locked_by_me(d)); ASSERT(d->arch.shadow.hash_walking == 0); d->arch.shadow.hash_walking = 1; @@ -1889,24 +1913,24 @@ void sh_destroy_shadow(struct vcpu *v, mfn_t smfn) * level and fault_addr desribe how we found this to be a pagetable; * level==0 means we have some other reason for revoking write access.*/ -int shadow_remove_write_access(struct vcpu *v, mfn_t gmfn, - unsigned int level, - unsigned long fault_addr) +int sh_remove_write_access(struct vcpu *v, mfn_t gmfn, + unsigned int level, + unsigned long fault_addr) { /* Dispatch table for getting per-type functions */ static hash_callback_t callbacks[16] = { NULL, /* none */ #if CONFIG_PAGING_LEVELS == 2 - SHADOW_INTERNAL_NAME(sh_remove_write_access,2,2), /* l1_32 */ - SHADOW_INTERNAL_NAME(sh_remove_write_access,2,2), /* fl1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,2,2), /* l1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,2,2), /* fl1_32 */ #else - SHADOW_INTERNAL_NAME(sh_remove_write_access,3,2), /* l1_32 */ - SHADOW_INTERNAL_NAME(sh_remove_write_access,3,2), /* fl1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,2), /* l1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,2), /* fl1_32 */ #endif NULL, /* l2_32 */ #if CONFIG_PAGING_LEVELS >= 3 - SHADOW_INTERNAL_NAME(sh_remove_write_access,3,3), /* l1_pae */ - SHADOW_INTERNAL_NAME(sh_remove_write_access,3,3), /* fl1_pae */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,3), /* l1_pae */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,3), /* fl1_pae */ #else NULL, /* l1_pae */ NULL, /* fl1_pae */ @@ -1914,8 +1938,8 @@ int shadow_remove_write_access(struct vcpu *v, mfn_t gmfn, NULL, /* l2_pae */ NULL, /* l2h_pae */ #if CONFIG_PAGING_LEVELS >= 4 - SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* l1_64 */ - SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* fl1_64 */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,4,4), /* l1_64 */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,4,4), /* fl1_64 */ #else NULL, /* l1_64 */ NULL, /* fl1_64 */ @@ -1937,7 +1961,7 @@ int shadow_remove_write_access(struct vcpu *v, mfn_t gmfn, ; struct page_info *pg = mfn_to_page(gmfn); - ASSERT(shadow_lock_is_acquired(v->domain)); + ASSERT(shadow_locked_by_me(v->domain)); /* Only remove writable mappings if we are doing shadow refcounts. * In guest refcounting, we trust Xen to already be restricting @@ -2077,25 +2101,25 @@ int shadow_remove_write_access(struct vcpu *v, mfn_t gmfn, /* Remove all mappings of a guest frame from the shadow tables. * Returns non-zero if we need to flush TLBs. */ -int shadow_remove_all_mappings(struct vcpu *v, mfn_t gmfn) +int sh_remove_all_mappings(struct vcpu *v, mfn_t gmfn) { struct page_info *page = mfn_to_page(gmfn); - int expected_count; + int expected_count, do_locking; /* Dispatch table for getting per-type functions */ static hash_callback_t callbacks[16] = { NULL, /* none */ #if CONFIG_PAGING_LEVELS == 2 - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,2,2), /* l1_32 */ - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,2,2), /* fl1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,2,2), /* l1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,2,2), /* fl1_32 */ #else - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,2), /* l1_32 */ - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,2), /* fl1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,2), /* l1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,2), /* fl1_32 */ #endif NULL, /* l2_32 */ #if CONFIG_PAGING_LEVELS >= 3 - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,3), /* l1_pae */ - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,3), /* fl1_pae */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,3), /* l1_pae */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,3), /* fl1_pae */ #else NULL, /* l1_pae */ NULL, /* fl1_pae */ @@ -2103,8 +2127,8 @@ int shadow_remove_all_mappings(struct vcpu *v, mfn_t gmfn) NULL, /* l2_pae */ NULL, /* l2h_pae */ #if CONFIG_PAGING_LEVELS >= 4 - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* l1_64 */ - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* fl1_64 */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,4,4), /* l1_64 */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,4,4), /* fl1_64 */ #else NULL, /* l1_64 */ NULL, /* fl1_64 */ @@ -2129,7 +2153,12 @@ int shadow_remove_all_mappings(struct vcpu *v, mfn_t gmfn) if ( (page->count_info & PGC_count_mask) == 0 ) return 0; - ASSERT(shadow_lock_is_acquired(v->domain)); + /* Although this is an externally visible function, we do not know + * whether the shadow lock will be held when it is called (since it + * can be called via put_page_type when we clear a shadow l1e). + * If the lock isn't held, take it for the duration of the call. */ + do_locking = !shadow_locked_by_me(v->domain); + if ( do_locking ) shadow_lock(v->domain); /* XXX TODO: * Heuristics for finding the (probably) single mapping of this gmfn */ @@ -2154,6 +2183,8 @@ int shadow_remove_all_mappings(struct vcpu *v, mfn_t gmfn) } } + if ( do_locking ) shadow_unlock(v->domain); + /* We killed at least one mapping, so must flush TLBs. */ return 1; } @@ -2236,9 +2267,10 @@ void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all) * (all != 0 implies fast == 0) */ { - struct page_info *pg; + struct page_info *pg = mfn_to_page(gmfn); mfn_t smfn; u32 sh_flags; + int do_locking; unsigned char t; /* Dispatch table for getting per-type functions: each level must @@ -2296,15 +2328,19 @@ void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all) 0 /* unused */ }; - ASSERT(shadow_lock_is_acquired(v->domain)); ASSERT(!(all && fast)); - pg = mfn_to_page(gmfn); - /* Bail out now if the page is not shadowed */ if ( (pg->count_info & PGC_page_table) == 0 ) return; + /* Although this is an externally visible function, we do not know + * whether the shadow lock will be held when it is called (since it + * can be called via put_page_type when we clear a shadow l1e). + * If the lock isn't held, take it for the duration of the call. */ + do_locking = !shadow_locked_by_me(v->domain); + if ( do_locking ) shadow_lock(v->domain); + SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n", v->domain->domain_id, v->vcpu_id, mfn_x(gmfn)); @@ -2356,14 +2392,16 @@ void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all) /* Need to flush TLBs now, so that linear maps are safe next time we * take a fault. */ flush_tlb_mask(v->domain->domain_dirty_cpumask); + + if ( do_locking ) shadow_unlock(v->domain); } -void -shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn) +static void +sh_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn) /* Even harsher: this is a HVM page that we thing is no longer a pagetable. * Unshadow it, and recursively unshadow pages that reference it. */ { - shadow_remove_all_shadows(v, gmfn); + sh_remove_shadows(v, gmfn, 0, 1); /* XXX TODO: * Rework this hashtable walker to return a linked-list of all * the shadows it modified, then do breadth-first recursion @@ -2376,13 +2414,13 @@ shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn) /**************************************************************************/ -void sh_update_paging_modes(struct vcpu *v) +static void sh_update_paging_modes(struct vcpu *v) { struct domain *d = v->domain; struct shadow_paging_mode *old_mode = v->arch.shadow.mode; mfn_t old_guest_table; - ASSERT(shadow_lock_is_acquired(d)); + ASSERT(shadow_locked_by_me(d)); // Valid transitions handled by this function: // - For PV guests: @@ -2394,7 +2432,8 @@ void sh_update_paging_modes(struct vcpu *v) // First, tear down any old shadow tables held by this vcpu. // - shadow_detach_old_tables(v); + if ( v->arch.shadow.mode ) + v->arch.shadow.mode->detach_old_tables(v); if ( !is_hvm_domain(d) ) { @@ -2403,7 +2442,7 @@ void sh_update_paging_modes(struct vcpu *v) /// #if CONFIG_PAGING_LEVELS == 4 if ( pv_32bit_guest(v) ) - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,3); + v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); else v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4); #elif CONFIG_PAGING_LEVELS == 3 @@ -2493,7 +2532,7 @@ void sh_update_paging_modes(struct vcpu *v) if ( pagetable_is_null(v->arch.monitor_table) ) { - mfn_t mmfn = shadow_make_monitor_table(v); + mfn_t mmfn = v->arch.shadow.mode->make_monitor_table(v); v->arch.monitor_table = pagetable_from_mfn(mmfn); make_cr3(v, mfn_x(mmfn)); hvm_update_host_cr3(v); @@ -2528,7 +2567,7 @@ void sh_update_paging_modes(struct vcpu *v) old_mfn = pagetable_get_mfn(v->arch.monitor_table); v->arch.monitor_table = pagetable_null(); - new_mfn = v->arch.shadow.mode->make_monitor_table(v); + new_mfn = v->arch.shadow.mode->make_monitor_table(v); v->arch.monitor_table = pagetable_from_mfn(new_mfn); SHADOW_PRINTK("new monitor table %"SH_PRI_mfn "\n", mfn_x(new_mfn)); @@ -2549,7 +2588,14 @@ void sh_update_paging_modes(struct vcpu *v) // This *does* happen, at least for CR4.PGE... } - v->arch.shadow.mode->update_cr3(v); + v->arch.shadow.mode->update_cr3(v, 0); +} + +void shadow_update_paging_modes(struct vcpu *v) +{ + shadow_lock(v->domain); + sh_update_paging_modes(v); + shadow_unlock(v->domain); } /**************************************************************************/ @@ -2560,7 +2606,7 @@ static void sh_new_mode(struct domain *d, u32 new_mode) { struct vcpu *v; - ASSERT(shadow_lock_is_acquired(d)); + ASSERT(shadow_locked_by_me(d)); ASSERT(d != current->domain); d->arch.shadow.mode = new_mode; if ( new_mode & SHM2_translate ) @@ -2610,9 +2656,9 @@ int shadow_enable(struct domain *d, u32 mode) /* Init the shadow memory allocation if the user hasn't done so */ old_pages = d->arch.shadow.total_pages; if ( old_pages == 0 ) - if ( set_sh_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */ + if ( sh_set_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */ { - set_sh_allocation(d, 0, NULL); + sh_set_allocation(d, 0, NULL); rv = -ENOMEM; goto out; } @@ -2620,7 +2666,7 @@ int shadow_enable(struct domain *d, u32 mode) /* Init the hash table */ if ( shadow_hash_alloc(d) != 0 ) { - set_sh_allocation(d, old_pages, NULL); + sh_set_allocation(d, old_pages, NULL); rv = -ENOMEM; goto out; } @@ -2630,7 +2676,7 @@ int shadow_enable(struct domain *d, u32 mode) if ( !shadow_alloc_p2m_table(d) ) { shadow_hash_teardown(d); - set_sh_allocation(d, old_pages, NULL); + sh_set_allocation(d, old_pages, NULL); shadow_p2m_teardown(d); rv = -ENOMEM; goto out; @@ -2661,7 +2707,7 @@ void shadow_teardown(struct domain *d) ASSERT(test_bit(_DOMF_dying, &d->domain_flags)); ASSERT(d != current->domain); - if ( !shadow_lock_is_acquired(d) ) + if ( !shadow_locked_by_me(d) ) shadow_lock(d); /* Keep various asserts happy */ if ( shadow_mode_enabled(d) ) @@ -2669,13 +2715,16 @@ void shadow_teardown(struct domain *d) /* Release the shadow and monitor tables held by each vcpu */ for_each_vcpu(d, v) { - shadow_detach_old_tables(v); - if ( shadow_mode_external(d) ) + if ( v->arch.shadow.mode ) { - mfn = pagetable_get_mfn(v->arch.monitor_table); - if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) ) - shadow_destroy_monitor_table(v, mfn); - v->arch.monitor_table = pagetable_null(); + v->arch.shadow.mode->detach_old_tables(v); + if ( shadow_mode_external(d) ) + { + mfn = pagetable_get_mfn(v->arch.monitor_table); + if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) ) + v->arch.shadow.mode->destroy_monitor_table(v, mfn); + v->arch.monitor_table = pagetable_null(); + } } } } @@ -2689,7 +2738,7 @@ void shadow_teardown(struct domain *d) d->arch.shadow.free_pages, d->arch.shadow.p2m_pages); /* Destroy all the shadows and release memory to domheap */ - set_sh_allocation(d, 0, NULL); + sh_set_allocation(d, 0, NULL); /* Release the hash table back to xenheap */ if (d->arch.shadow.hash_table) shadow_hash_teardown(d); @@ -2744,7 +2793,7 @@ void shadow_final_teardown(struct domain *d) static int shadow_one_bit_enable(struct domain *d, u32 mode) /* Turn on a single shadow mode feature */ { - ASSERT(shadow_lock_is_acquired(d)); + ASSERT(shadow_locked_by_me(d)); /* Sanity check the call */ if ( d == current->domain || (d->arch.shadow.mode & mode) ) @@ -2755,10 +2804,10 @@ static int shadow_one_bit_enable(struct domain *d, u32 mode) if ( d->arch.shadow.mode == 0 ) { /* Init the shadow memory allocation and the hash table */ - if ( set_sh_allocation(d, 1, NULL) != 0 + if ( sh_set_allocation(d, 1, NULL) != 0 || shadow_hash_alloc(d) != 0 ) { - set_sh_allocation(d, 0, NULL); + sh_set_allocation(d, 0, NULL); return -ENOMEM; } } @@ -2773,7 +2822,7 @@ static int shadow_one_bit_disable(struct domain *d, u32 mode) /* Turn off a single shadow mode feature */ { struct vcpu *v; - ASSERT(shadow_lock_is_acquired(d)); + ASSERT(shadow_locked_by_me(d)); /* Sanity check the call */ if ( d == current->domain || !(d->arch.shadow.mode & mode) ) @@ -2794,7 +2843,8 @@ static int shadow_one_bit_disable(struct domain *d, u32 mode) d->arch.shadow.p2m_pages); for_each_vcpu(d, v) { - shadow_detach_old_tables(v); + if ( v->arch.shadow.mode ) + v->arch.shadow.mode->detach_old_tables(v); #if CONFIG_PAGING_LEVELS == 4 if ( !(v->arch.flags & TF_kernel_mode) ) make_cr3(v, pagetable_get_pfn(v->arch.guest_table_user)); @@ -2805,7 +2855,7 @@ static int shadow_one_bit_disable(struct domain *d, u32 mode) } /* Pull down the memory allocation */ - if ( set_sh_allocation(d, 0, NULL) != 0 ) + if ( sh_set_allocation(d, 0, NULL) != 0 ) { // XXX - How can this occur? // Seems like a bug to return an error now that we've @@ -2826,7 +2876,7 @@ static int shadow_one_bit_disable(struct domain *d, u32 mode) } /* Enable/disable ops for the "test" and "log-dirty" modes */ -int shadow_test_enable(struct domain *d) +static int shadow_test_enable(struct domain *d) { int ret; @@ -2849,7 +2899,7 @@ int shadow_test_enable(struct domain *d) return ret; } -int shadow_test_disable(struct domain *d) +static int shadow_test_disable(struct domain *d) { int ret; @@ -2867,7 +2917,7 @@ sh_alloc_log_dirty_bitmap(struct domain *d) { ASSERT(d->arch.shadow.dirty_bitmap == NULL); d->arch.shadow.dirty_bitmap_size = - (d->shared_info->arch.max_pfn + (BITS_PER_LONG - 1)) & + (arch_get_max_pfn(d) + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1); d->arch.shadow.dirty_bitmap = xmalloc_array(unsigned long, @@ -2968,8 +3018,8 @@ sh_p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn) if ( v != NULL ) { - shadow_remove_all_shadows_and_parents(v, _mfn(mfn)); - if ( shadow_remove_all_mappings(v, _mfn(mfn)) ) + sh_remove_all_shadows_and_parents(v, _mfn(mfn)); + if ( sh_remove_all_mappings(v, _mfn(mfn)) ) flush_tlb_mask(d->domain_dirty_cpumask); } @@ -3012,8 +3062,8 @@ shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn, v = d->vcpu[0]; if ( v != NULL ) { - shadow_remove_all_shadows_and_parents(v, omfn); - if ( shadow_remove_all_mappings(v, omfn) ) + sh_remove_all_shadows_and_parents(v, omfn); + if ( sh_remove_all_mappings(v, omfn) ) flush_tlb_mask(d->domain_dirty_cpumask); } set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); @@ -3043,6 +3093,17 @@ shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn, shadow_set_p2m_entry(d, gfn, _mfn(mfn)); set_gpfn_from_mfn(mfn, gfn); + +#if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) + /* If we're doing FAST_FAULT_PATH, then shadow mode may have + cached the fact that this is an mmio region in the shadow + page tables. Blow the tables away to remove the cache. + This is pretty heavy handed, but this is a rare operation + (it might happen a dozen times during boot and then never + again), so it doesn't matter too much. */ + shadow_blow_tables(d); +#endif + shadow_audit_p2m(d); shadow_unlock(d); } @@ -3062,7 +3123,7 @@ void shadow_convert_to_log_dirty(struct vcpu *v, mfn_t smfn) static int shadow_log_dirty_op( struct domain *d, struct xen_domctl_shadow_op *sc) { - int i, rv = 0, clean = 0; + int i, rv = 0, clean = 0, peek = 1; domain_pause(d); shadow_lock(d); @@ -3076,12 +3137,12 @@ static int shadow_log_dirty_op( d->arch.shadow.dirty_count); sc->stats.fault_count = d->arch.shadow.fault_count; - sc->stats.dirty_count = d->arch.shadow.dirty_count; - - if ( clean ) + sc->stats.dirty_count = d->arch.shadow.dirty_count; + + if ( clean ) { - /* Need to revoke write access to the domain's pages again. - * In future, we'll have a less heavy-handed approach to this, + /* Need to revoke write access to the domain's pages again. + * In future, we'll have a less heavy-handed approach to this, * but for now, we just unshadow everything except Xen. */ shadow_blow_tables(d); @@ -3089,31 +3150,37 @@ static int shadow_log_dirty_op( d->arch.shadow.dirty_count = 0; } - if ( guest_handle_is_null(sc->dirty_bitmap) || - (d->arch.shadow.dirty_bitmap == NULL) ) + if ( guest_handle_is_null(sc->dirty_bitmap) ) + /* caller may have wanted just to clean the state or access stats. */ + peek = 0; + + if ( (peek || clean) && (d->arch.shadow.dirty_bitmap == NULL) ) { - rv = -EINVAL; + rv = -EINVAL; /* perhaps should be ENOMEM? */ goto out; } if ( sc->pages > d->arch.shadow.dirty_bitmap_size ) - sc->pages = d->arch.shadow.dirty_bitmap_size; + sc->pages = d->arch.shadow.dirty_bitmap_size; #define CHUNK (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */ for ( i = 0; i < sc->pages; i += CHUNK ) { - int bytes = ((((sc->pages - i) > CHUNK) - ? CHUNK + int bytes = ((((sc->pages - i) > CHUNK) + ? CHUNK : (sc->pages - i)) + 7) / 8; - - if ( copy_to_guest_offset( - sc->dirty_bitmap, - i/(8*sizeof(unsigned long)), - d->arch.shadow.dirty_bitmap + (i/(8*sizeof(unsigned long))), - (bytes + sizeof(unsigned long) - 1) / sizeof(unsigned long)) ) + + if ( likely(peek) ) { - rv = -EINVAL; - goto out; + if ( copy_to_guest_offset( + sc->dirty_bitmap, + i/(8*sizeof(unsigned long)), + d->arch.shadow.dirty_bitmap+(i/(8*sizeof(unsigned long))), + (bytes+sizeof(unsigned long)-1) / sizeof(unsigned long)) ) + { + rv = -EFAULT; + goto out; + } } if ( clean ) @@ -3130,14 +3197,13 @@ static int shadow_log_dirty_op( /* Mark a page as dirty */ -void sh_do_mark_dirty(struct domain *d, mfn_t gmfn) +void sh_mark_dirty(struct domain *d, mfn_t gmfn) { unsigned long pfn; - ASSERT(shadow_lock_is_acquired(d)); - ASSERT(shadow_mode_log_dirty(d)); + ASSERT(shadow_locked_by_me(d)); - if ( !mfn_valid(gmfn) ) + if ( !shadow_mode_log_dirty(d) || !mfn_valid(gmfn) ) return; ASSERT(d->arch.shadow.dirty_bitmap != NULL); @@ -3181,13 +3247,19 @@ void sh_do_mark_dirty(struct domain *d, mfn_t gmfn) } } +void shadow_mark_dirty(struct domain *d, mfn_t gmfn) +{ + shadow_lock(d); + sh_mark_dirty(d, gmfn); + shadow_unlock(d); +} /**************************************************************************/ /* Shadow-control XEN_DOMCTL dispatcher */ int shadow_domctl(struct domain *d, - xen_domctl_shadow_op_t *sc, - XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) + xen_domctl_shadow_op_t *sc, + XEN_GUEST_HANDLE(void) u_domctl) { int rc, preempted = 0; @@ -3233,7 +3305,18 @@ int shadow_domctl(struct domain *d, return 0; case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION: - rc = shadow_set_allocation(d, sc->mb, &preempted); + shadow_lock(d); + if ( sc->mb == 0 && shadow_mode_enabled(d) ) + { + /* Can't set the allocation to zero unless the domain stops using + * shadow pagetables first */ + SHADOW_ERROR("Can't set shadow allocation to zero, domain %u" + " is still using shadows.\n", d->domain_id); + shadow_unlock(d); + return -EINVAL; + } + rc = sh_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted); + shadow_unlock(d); if ( preempted ) /* Not finished. Set up to re-run the call. */ rc = hypercall_create_continuation( diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c index 0b9ec5512d..e040e60e71 100644 --- a/xen/arch/x86/mm/shadow/multi.c +++ b/xen/arch/x86/mm/shadow/multi.c @@ -227,7 +227,7 @@ guest_supports_nx(struct vcpu *v) static inline int guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op) { - ASSERT(!guest_op || shadow_lock_is_acquired(v->domain)); + ASSERT(!guest_op || shadow_locked_by_me(v->domain)); perfc_incrc(shadow_guest_walk); memset(gw, 0, sizeof(*gw)); @@ -243,7 +243,7 @@ guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op) gw->l3mfn = vcpu_gfn_to_mfn(v, guest_l4e_get_gfn(*gw->l4e)); if ( !mfn_valid(gw->l3mfn) ) return 1; /* This mfn is a pagetable: make sure the guest can't write to it. */ - if ( guest_op && shadow_remove_write_access(v, gw->l3mfn, 3, va) != 0 ) + if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 ) flush_tlb_mask(v->domain->domain_dirty_cpumask); gw->l3e = ((guest_l3e_t *)sh_map_domain_page(gw->l3mfn)) + guest_l3_table_offset(va); @@ -257,7 +257,7 @@ guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op) gw->l2mfn = vcpu_gfn_to_mfn(v, guest_l3e_get_gfn(*gw->l3e)); if ( !mfn_valid(gw->l2mfn) ) return 1; /* This mfn is a pagetable: make sure the guest can't write to it. */ - if ( guest_op && shadow_remove_write_access(v, gw->l2mfn, 2, va) != 0 ) + if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 ) flush_tlb_mask(v->domain->domain_dirty_cpumask); gw->l2e = ((guest_l2e_t *)sh_map_domain_page(gw->l2mfn)) + guest_l2_table_offset(va); @@ -299,7 +299,7 @@ guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op) if ( !mfn_valid(gw->l1mfn) ) return 1; /* This mfn is a pagetable: make sure the guest can't write to it. */ if ( guest_op - && shadow_remove_write_access(v, gw->l1mfn, 1, va) != 0 ) + && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 ) flush_tlb_mask(v->domain->domain_dirty_cpumask); gw->l1e = ((guest_l1e_t *)sh_map_domain_page(gw->l1mfn)) + guest_l1_table_offset(va); @@ -442,7 +442,7 @@ static u32 guest_set_ad_bits(struct vcpu *v, ASSERT(ep && !(((unsigned long)ep) & ((sizeof *ep) - 1))); ASSERT(level <= GUEST_PAGING_LEVELS); - ASSERT(shadow_lock_is_acquired(v->domain)); + ASSERT(shadow_locked_by_me(v->domain)); flags = guest_l1e_get_flags(*ep); @@ -492,7 +492,7 @@ static u32 guest_set_ad_bits(struct vcpu *v, u32 shflags = mfn_to_page(gmfn)->shadow_flags & SHF_page_type_mask; /* More than one type bit set in shadow-flags? */ if ( shflags & ~(1UL << find_first_set_bit(shflags)) ) - res = __shadow_validate_guest_entry(v, gmfn, ep, sizeof(*ep)); + res = sh_validate_guest_entry(v, gmfn, ep, sizeof (*ep)); } /* We should never need to flush the TLB or recopy PAE entries */ @@ -851,9 +851,7 @@ static inline void safe_write_entry(void *dst, void *src) * then writing the high word before the low word. */ BUILD_BUG_ON(sizeof (shadow_l1e_t) != 2 * sizeof (unsigned long)); d[0] = 0; - wmb(); d[1] = s[1]; - wmb(); d[0] = s[0]; #else /* In 32-bit and 64-bit, sizeof(pte) == sizeof(ulong) == 1 word, @@ -1422,7 +1420,7 @@ void sh_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn) } #endif -#if CONFIG_PAGING_LEVELS == 3 && GUEST_PAGING_LEVELS == 3 +#if (CONFIG_PAGING_LEVELS == 3 || defined(CONFIG_COMPAT)) && GUEST_PAGING_LEVELS == 3 // For 3-on-3 PV guests, we need to make sure the xen mappings are in // place, which means that we need to populate the l2h entry in the l3 // table. @@ -1432,12 +1430,20 @@ void sh_install_xen_entries_in_l2h(struct vcpu *v, { struct domain *d = v->domain; shadow_l2e_t *sl2e; +#if CONFIG_PAGING_LEVELS == 3 int i; +#else + + if ( !pv_32bit_guest(v) ) + return; +#endif sl2e = sh_map_domain_page(sl2hmfn); ASSERT(sl2e != NULL); ASSERT(sizeof (l2_pgentry_t) == sizeof (shadow_l2e_t)); +#if CONFIG_PAGING_LEVELS == 3 + /* Copy the common Xen mappings from the idle domain */ memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)], &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT], @@ -1478,6 +1484,15 @@ void sh_install_xen_entries_in_l2h(struct vcpu *v, } sh_unmap_domain_page(p2m); } + +#else + + /* Copy the common Xen mappings from the idle domain */ + memcpy(&sl2e[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)], + &compat_idle_pg_table_l2[l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)], + COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*sl2e)); + +#endif sh_unmap_domain_page(sl2e); } @@ -1638,12 +1653,15 @@ make_fl1_shadow(struct vcpu *v, gfn_t gfn) mfn_t sh_make_monitor_table(struct vcpu *v) { + struct domain *d = v->domain; ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0); + /* Guarantee we can get the memory we need */ + shadow_prealloc(d, SHADOW_MAX_ORDER); + #if CONFIG_PAGING_LEVELS == 4 { - struct domain *d = v->domain; mfn_t m4mfn; m4mfn = shadow_alloc(d, SH_type_monitor_table, 0); sh_install_xen_entries_in_l4(v, m4mfn, m4mfn); @@ -1660,6 +1678,19 @@ sh_make_monitor_table(struct vcpu *v) l4e = sh_map_domain_page(m4mfn); l4e[0] = l4e_from_pfn(mfn_x(m3mfn), __PAGE_HYPERVISOR); sh_unmap_domain_page(l4e); + if ( pv_32bit_guest(v) ) + { + // Install a monitor l2 table in slot 3 of the l3 table. + // This is used for all Xen entries. + mfn_t m2mfn; + l3_pgentry_t *l3e; + m2mfn = shadow_alloc(d, SH_type_monitor_table, 0); + mfn_to_page(m2mfn)->shadow_flags = 2; + l3e = sh_map_domain_page(m3mfn); + l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT); + sh_install_xen_entries_in_l2h(v, m2mfn); + sh_unmap_domain_page(l3e); + } } #endif /* SHADOW_PAGING_LEVELS < 4 */ return m4mfn; @@ -1668,7 +1699,6 @@ sh_make_monitor_table(struct vcpu *v) #elif CONFIG_PAGING_LEVELS == 3 { - struct domain *d = v->domain; mfn_t m3mfn, m2mfn; l3_pgentry_t *l3e; l2_pgentry_t *l2e; @@ -1702,7 +1732,6 @@ sh_make_monitor_table(struct vcpu *v) #elif CONFIG_PAGING_LEVELS == 2 { - struct domain *d = v->domain; mfn_t m2mfn; m2mfn = shadow_alloc(d, SH_type_monitor_table, 0); sh_install_xen_entries_in_l2(v, m2mfn, m2mfn); @@ -2065,9 +2094,19 @@ void sh_destroy_monitor_table(struct vcpu *v, mfn_t mmfn) #if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS != 4) /* Need to destroy the l3 monitor page in slot 0 too */ { + mfn_t m3mfn; l4_pgentry_t *l4e = sh_map_domain_page(mmfn); ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT); - shadow_free(d, _mfn(l4e_get_pfn(l4e[0]))); + m3mfn = _mfn(l4e_get_pfn(l4e[0])); + if ( pv_32bit_guest(v) ) + { + /* Need to destroy the l2 monitor page in slot 3 too */ + l3_pgentry_t *l3e = sh_map_domain_page(m3mfn); + ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT); + shadow_free(d, _mfn(l3e_get_pfn(l3e[3]))); + sh_unmap_domain_page(l3e); + } + shadow_free(d, m3mfn); sh_unmap_domain_page(l4e); } #elif CONFIG_PAGING_LEVELS == 3 @@ -2645,18 +2684,27 @@ static int sh_page_fault(struct vcpu *v, else { /* This should be exceptionally rare: another vcpu has fixed - * the tables between the fault and our reading the l1e. - * Fall through to the normal fault handing logic */ + * the tables between the fault and our reading the l1e. + * Retry and let the hardware give us the right fault next time. */ perfc_incrc(shadow_fault_fast_fail); - SHADOW_PRINTK("fast path false alarm!\n"); - /* Don't pass the reserved-bit bit: if we look at the fault - * below and decide to pass it to the guest, the reserved-bit - * bit won't make sense there. */ - regs->error_code &= ~PFEC_reserved_bit; + SHADOW_PRINTK("fast path false alarm!\n"); + return EXCRET_fault_fixed; } } #endif /* SHOPT_FAST_FAULT_PATH */ + /* Detect if this page fault happened while we were already in Xen + * doing a shadow operation. If that happens, the only thing we can + * do is let Xen's normal fault handlers try to fix it. In any case, + * a diagnostic trace of the fault will be more useful than + * a BUG() when we try to take the lock again. */ + if ( unlikely(shadow_locked_by_me(d)) ) + { + SHADOW_ERROR("Recursive shadow fault: lock was taken by %s\n", + d->arch.shadow.locker_function); + return 0; + } + shadow_lock(d); shadow_audit_tables(v); @@ -2827,7 +2875,7 @@ static int sh_page_fault(struct vcpu *v, * it seems very unlikely that any OS grants user access to page tables. */ if ( (regs->error_code & PFEC_user_mode) || - x86_emulate_memop(&emul_ctxt.ctxt, emul_ops) ) + x86_emulate(&emul_ctxt.ctxt, emul_ops) ) { SHADOW_PRINTK("emulator failure, unshadowing mfn %#lx\n", mfn_x(gmfn)); @@ -2835,7 +2883,7 @@ static int sh_page_fault(struct vcpu *v, /* If this is actually a page table, then we have a bug, and need * to support more operations in the emulator. More likely, * though, this is a hint that this page should not be shadowed. */ - shadow_remove_all_shadows(v, gmfn); + sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */); } /* Emulator has changed the user registers: write back */ @@ -3035,12 +3083,15 @@ sh_update_linear_entries(struct vcpu *v) #elif (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS == 3) - /* This case only exists in HVM. To give ourselves a linear map of the - * shadows, we need to extend a PAE shadow to 4 levels. We do this by - * having a monitor l3 in slot 0 of the monitor l4 table, and - * copying the PAE l3 entries into it. Then, by having the monitor l4e - * for shadow pagetables also point to the monitor l4, we can use it - * to access the shadows. */ + /* PV: XXX + * + * HVM: To give ourselves a linear map of the shadows, we need to + * extend a PAE shadow to 4 levels. We do this by having a monitor + * l3 in slot 0 of the monitor l4 table, and copying the PAE l3 + * entries into it. Then, by having the monitor l4e for shadow + * pagetables also point to the monitor l4, we can use it to access + * the shadows. + */ if ( shadow_mode_external(d) ) { @@ -3068,7 +3119,7 @@ sh_update_linear_entries(struct vcpu *v) sh_unmap_domain_page(ml4e); } - /* Shadow l3 tables are made up by update_cr3 */ + /* Shadow l3 tables are made up by sh_update_cr3 */ sl3e = v->arch.shadow.l3table; for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ ) @@ -3083,6 +3134,8 @@ sh_update_linear_entries(struct vcpu *v) if ( v != current ) sh_unmap_domain_page(ml3e); } + else + domain_crash(d); /* XXX */ #elif CONFIG_PAGING_LEVELS == 3 @@ -3106,7 +3159,7 @@ sh_update_linear_entries(struct vcpu *v) int unmap_l2e = 0; #if GUEST_PAGING_LEVELS == 2 - /* Shadow l3 tables were built by update_cr3 */ + /* Shadow l3 tables were built by sh_update_cr3 */ if ( shadow_mode_external(d) ) shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table; else @@ -3329,12 +3382,16 @@ sh_set_toplevel_shadow(struct vcpu *v, static void -sh_update_cr3(struct vcpu *v) +sh_update_cr3(struct vcpu *v, int do_locking) /* Updates vcpu->arch.cr3 after the guest has changed CR3. * Paravirtual guests should set v->arch.guest_table (and guest_table_user, * if appropriate). - * HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works, - * and read vcpu->arch.hvm_vcpu.hw_cr3 afterwards. + * HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works; + * this function will call hvm_update_guest_cr3() to tell them where the + * shadow tables are. + * If do_locking != 0, assume we are being called from outside the + * shadow code, and must take and release the shadow lock; otherwise + * that is the caller's responsibility. */ { struct domain *d = v->domain; @@ -3343,7 +3400,16 @@ sh_update_cr3(struct vcpu *v) u32 guest_idx=0; #endif - ASSERT(shadow_lock_is_acquired(v->domain)); + /* Don't do anything on an uninitialised vcpu */ + if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) + { + ASSERT(v->arch.cr3 == 0); + return; + } + + if ( do_locking ) shadow_lock(v->domain); + + ASSERT(shadow_locked_by_me(v->domain)); ASSERT(v->arch.shadow.mode); //// @@ -3382,17 +3448,12 @@ sh_update_cr3(struct vcpu *v) (unsigned long)pagetable_get_pfn(v->arch.guest_table)); #if GUEST_PAGING_LEVELS == 4 - if ( !(v->arch.flags & TF_kernel_mode) ) + if ( !(v->arch.flags & TF_kernel_mode) && !IS_COMPAT(v->domain) ) gmfn = pagetable_get_mfn(v->arch.guest_table_user); else #endif gmfn = pagetable_get_mfn(v->arch.guest_table); - if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) - { - ASSERT(v->arch.cr3 == 0); - return; - } //// //// vcpu->arch.guest_vtable @@ -3454,7 +3515,7 @@ sh_update_cr3(struct vcpu *v) * replace the old shadow pagetable(s), so that we can safely use the * (old) shadow linear maps in the writeable mapping heuristics. */ #if GUEST_PAGING_LEVELS == 2 - if ( shadow_remove_write_access(v, gmfn, 2, 0) != 0 ) + if ( sh_remove_write_access(v, gmfn, 2, 0) != 0 ) flush_tlb_mask(v->domain->domain_dirty_cpumask); sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow); #elif GUEST_PAGING_LEVELS == 3 @@ -3472,7 +3533,7 @@ sh_update_cr3(struct vcpu *v) { gl2gfn = guest_l3e_get_gfn(gl3e[i]); gl2mfn = vcpu_gfn_to_mfn(v, gl2gfn); - flush |= shadow_remove_write_access(v, gl2mfn, 2, 0); + flush |= sh_remove_write_access(v, gl2mfn, 2, 0); } } if ( flush ) @@ -3494,7 +3555,7 @@ sh_update_cr3(struct vcpu *v) } } #elif GUEST_PAGING_LEVELS == 4 - if ( shadow_remove_write_access(v, gmfn, 4, 0) != 0 ) + if ( sh_remove_write_access(v, gmfn, 4, 0) != 0 ) flush_tlb_mask(v->domain->domain_dirty_cpumask); sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow); #else @@ -3570,6 +3631,9 @@ sh_update_cr3(struct vcpu *v) /* Fix up the linear pagetable mappings */ sh_update_linear_entries(v); + + /* Release the lock, if we took it (otherwise it's the caller's problem) */ + if ( do_locking ) shadow_unlock(v->domain); } @@ -3625,7 +3689,8 @@ static int sh_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn) } #endif -int sh_remove_write_access(struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn) +int sh_rm_write_access_from_l1(struct vcpu *v, mfn_t sl1mfn, + mfn_t readonly_mfn) /* Excises all writeable mappings to readonly_mfn from this l1 shadow table */ { shadow_l1e_t *sl1e; @@ -3656,7 +3721,7 @@ int sh_remove_write_access(struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn) } -int sh_remove_all_mappings(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn) +int sh_rm_mappings_from_l1(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn) /* Excises all mappings to guest frame from this shadow l1 table */ { shadow_l1e_t *sl1e; @@ -3827,24 +3892,56 @@ static inline void * emulate_map_dest(struct vcpu *v, return NULL; } +static int safe_not_to_verify_write(mfn_t gmfn, void *dst, void *src, + int bytes) +{ +#if (SHADOW_OPTIMIZATIONS & SHOPT_SKIP_VERIFY) + struct page_info *pg = mfn_to_page(gmfn); + if ( !(pg->shadow_flags & SHF_32) + && bytes == 4 + && ((unsigned long)dst & 3) == 0 ) + { + /* Not shadowed 32-bit: aligned 64-bit writes that leave the + * present bit unset are safe to ignore. */ + if ( (*(u64*)src & _PAGE_PRESENT) == 0 + && (*(u64*)dst & _PAGE_PRESENT) == 0 ) + return 1; + } + else if ( !(pg->shadow_flags & (SHF_PAE|SHF_64)) + && bytes == 8 + && ((unsigned long)dst & 7) == 0 ) + { + /* Not shadowed PAE/64-bit: aligned 32-bit writes that leave the + * present bit unset are safe to ignore. */ + if ( (*(u32*)src & _PAGE_PRESENT) == 0 + && (*(u32*)dst & _PAGE_PRESENT) == 0 ) + return 1; + } +#endif + return 0; +} + + int sh_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src, u32 bytes, struct sh_emulate_ctxt *sh_ctxt) { mfn_t mfn; void *addr; + int skip; if ( vaddr & (bytes-1) ) return X86EMUL_UNHANDLEABLE; - ASSERT(shadow_lock_is_acquired(v->domain)); + ASSERT(shadow_locked_by_me(v->domain)); ASSERT(((vaddr & ~PAGE_MASK) + bytes) <= PAGE_SIZE); if ( (addr = emulate_map_dest(v, vaddr, sh_ctxt, &mfn)) == NULL ) return X86EMUL_PROPAGATE_FAULT; + skip = safe_not_to_verify_write(mfn, addr, src, bytes); memcpy(addr, src, bytes); - shadow_validate_guest_pt_write(v, mfn, addr, bytes); + if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, bytes); /* If we are writing zeros to this page, might want to unshadow */ if ( likely(bytes >= 4) && (*(u32 *)addr == 0) ) @@ -3863,9 +3960,9 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr, mfn_t mfn; void *addr; unsigned long prev; - int rv = X86EMUL_CONTINUE; + int rv = X86EMUL_CONTINUE, skip; - ASSERT(shadow_lock_is_acquired(v->domain)); + ASSERT(shadow_locked_by_me(v->domain)); ASSERT(bytes <= sizeof(unsigned long)); if ( vaddr & (bytes-1) ) @@ -3874,6 +3971,8 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr, if ( (addr = emulate_map_dest(v, vaddr, sh_ctxt, &mfn)) == NULL ) return X86EMUL_PROPAGATE_FAULT; + skip = safe_not_to_verify_write(mfn, &new, &old, bytes); + switch ( bytes ) { case 1: prev = cmpxchg(((u8 *)addr), old, new); break; @@ -3886,7 +3985,9 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr, } if ( prev == old ) - shadow_validate_guest_pt_write(v, mfn, addr, bytes); + { + if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, bytes); + } else rv = X86EMUL_CMPXCHG_FAILED; @@ -3912,9 +4013,9 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v, unsigned long vaddr, mfn_t mfn; void *addr; u64 old, new, prev; - int rv = X86EMUL_CONTINUE; + int rv = X86EMUL_CONTINUE, skip; - ASSERT(shadow_lock_is_acquired(v->domain)); + ASSERT(shadow_locked_by_me(v->domain)); if ( vaddr & 7 ) return X86EMUL_UNHANDLEABLE; @@ -3924,10 +4025,13 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v, unsigned long vaddr, old = (((u64) old_hi) << 32) | (u64) old_lo; new = (((u64) new_hi) << 32) | (u64) new_lo; + skip = safe_not_to_verify_write(mfn, &new, &old, 8); prev = cmpxchg(((u64 *)addr), old, new); if ( prev == old ) - shadow_validate_guest_pt_write(v, mfn, addr, 8); + { + if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, 8); + } else rv = X86EMUL_CMPXCHG_FAILED; diff --git a/xen/arch/x86/mm/shadow/multi.h b/xen/arch/x86/mm/shadow/multi.h index 2cc61b830f..97903059d4 100644 --- a/xen/arch/x86/mm/shadow/multi.h +++ b/xen/arch/x86/mm/shadow/multi.h @@ -61,10 +61,10 @@ SHADOW_INTERNAL_NAME(sh_unhook_64b_mappings, SHADOW_LEVELS, GUEST_LEVELS) (struct vcpu *v, mfn_t sl4mfn); extern int -SHADOW_INTERNAL_NAME(sh_remove_write_access, SHADOW_LEVELS, GUEST_LEVELS) +SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, SHADOW_LEVELS, GUEST_LEVELS) (struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn); extern int -SHADOW_INTERNAL_NAME(sh_remove_all_mappings, SHADOW_LEVELS, GUEST_LEVELS) +SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1, SHADOW_LEVELS, GUEST_LEVELS) (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn); extern void diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h index 6fab2789f9..913fa43770 100644 --- a/xen/arch/x86/mm/shadow/private.h +++ b/xen/arch/x86/mm/shadow/private.h @@ -33,8 +33,43 @@ /****************************************************************************** + * Levels of self-test and paranoia + */ + +#define SHADOW_AUDIT_HASH 0x01 /* Check current hash bucket */ +#define SHADOW_AUDIT_HASH_FULL 0x02 /* Check every hash bucket */ +#define SHADOW_AUDIT_ENTRIES 0x04 /* Check this walk's shadows */ +#define SHADOW_AUDIT_ENTRIES_FULL 0x08 /* Check every shadow */ +#define SHADOW_AUDIT_ENTRIES_MFNS 0x10 /* Check gfn-mfn map in shadows */ +#define SHADOW_AUDIT_P2M 0x20 /* Check the p2m table */ + +#ifdef NDEBUG +#define SHADOW_AUDIT 0 +#define SHADOW_AUDIT_ENABLE 0 +#else +#define SHADOW_AUDIT 0x15 /* Basic audit of all except p2m. */ +#define SHADOW_AUDIT_ENABLE shadow_audit_enable +extern int shadow_audit_enable; +#endif + +/****************************************************************************** + * Levels of optimization + */ + +#define SHOPT_WRITABLE_HEURISTIC 0x01 /* Guess at RW PTEs via linear maps */ +#define SHOPT_EARLY_UNSHADOW 0x02 /* Unshadow l1s on fork or exit */ +#define SHOPT_FAST_FAULT_PATH 0x04 /* Fast-path MMIO and not-present */ +#define SHOPT_PREFETCH 0x08 /* Shadow multiple entries per fault */ +#define SHOPT_LINUX_L3_TOPLEVEL 0x10 /* Pin l3es on early 64bit linux */ +#define SHOPT_SKIP_VERIFY 0x20 /* Skip PTE v'fy when safe to do so */ + +#define SHADOW_OPTIMIZATIONS 0x3f + + +/****************************************************************************** * Debug and error-message output */ + #define SHADOW_PRINTK(_f, _a...) \ debugtrace_printk("sh: %s(): " _f, __func__, ##_a) #define SHADOW_ERROR(_f, _a...) \ @@ -54,6 +89,58 @@ #define SHADOW_DEBUG_EMULATE 1 #define SHADOW_DEBUG_LOGDIRTY 0 +/****************************************************************************** + * The shadow lock. + * + * This lock is per-domain. It is intended to allow us to make atomic + * updates to the software TLB that the shadow tables provide. + * + * Specifically, it protects: + * - all changes to shadow page table pages + * - the shadow hash table + * - the shadow page allocator + * - all changes to guest page table pages + * - all changes to the page_info->tlbflush_timestamp + * - the page_info->count fields on shadow pages + * - the shadow dirty bit array and count + */ +#ifndef CONFIG_SMP +#error shadow.h currently requires CONFIG_SMP +#endif + +#define shadow_lock_init(_d) \ + do { \ + spin_lock_init(&(_d)->arch.shadow.lock); \ + (_d)->arch.shadow.locker = -1; \ + (_d)->arch.shadow.locker_function = "nobody"; \ + } while (0) + +#define shadow_locked_by_me(_d) \ + (current->processor == (_d)->arch.shadow.locker) + +#define shadow_lock(_d) \ + do { \ + if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \ + { \ + printk("Error: shadow lock held by %s\n", \ + (_d)->arch.shadow.locker_function); \ + BUG(); \ + } \ + spin_lock(&(_d)->arch.shadow.lock); \ + ASSERT((_d)->arch.shadow.locker == -1); \ + (_d)->arch.shadow.locker = current->processor; \ + (_d)->arch.shadow.locker_function = __func__; \ + } while (0) + +#define shadow_unlock(_d) \ + do { \ + ASSERT((_d)->arch.shadow.locker == current->processor); \ + (_d)->arch.shadow.locker = -1; \ + (_d)->arch.shadow.locker_function = "nobody"; \ + spin_unlock(&(_d)->arch.shadow.lock); \ + } while (0) + + /****************************************************************************** * Auditing routines @@ -249,6 +336,10 @@ static inline int sh_type_is_pinnable(struct vcpu *v, unsigned int t) #define SHF_L3_64 (1u << SH_type_l3_64_shadow) #define SHF_L4_64 (1u << SH_type_l4_64_shadow) +#define SHF_32 (SHF_L1_32|SHF_FL1_32|SHF_L2_32) +#define SHF_PAE (SHF_L1_PAE|SHF_FL1_PAE|SHF_L2_PAE|SHF_L2H_PAE) +#define SHF_64 (SHF_L1_64|SHF_FL1_64|SHF_L2_64|SHF_L3_64|SHF_L4_64) + /* Used for hysteresis when automatically unhooking mappings on fork/exit */ #define SHF_unhooked_mappings (1u<<31) @@ -287,6 +378,21 @@ void sh_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn); void sh_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn); void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn); +/* Update the shadows in response to a pagetable write from Xen */ +extern int sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, + void *entry, u32 size); + +/* Update the shadows in response to a pagetable write from a HVM guest */ +extern void sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, + void *entry, u32 size); + +/* Remove all writeable mappings of a guest frame from the shadows. + * Returns non-zero if we need to flush TLBs. + * level and fault_addr desribe how we found this to be a pagetable; + * level==0 means we have some other reason for revoking write access. */ +extern int sh_remove_write_access(struct vcpu *v, mfn_t readonly_mfn, + unsigned int level, + unsigned long fault_addr); /****************************************************************************** * Flags used in the return value of the shadow_set_lXe() functions... @@ -321,6 +427,26 @@ void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn); #undef mfn_valid #define mfn_valid(_mfn) (mfn_x(_mfn) < max_page) + +static inline int +sh_mfn_is_a_page_table(mfn_t gmfn) +{ + struct page_info *page = mfn_to_page(gmfn); + struct domain *owner; + unsigned long type_info; + + if ( !mfn_valid(gmfn) ) + return 0; + + owner = page_get_owner(page); + if ( owner && shadow_mode_refcounts(owner) + && (page->count_info & PGC_page_table) ) + return 1; + + type_info = page->u.inuse.type_info & PGT_type_mask; + return type_info && (type_info <= PGT_l4_page_table); +} + // Provide mfn_t-aware versions of common xen functions static inline void * sh_map_domain_page(mfn_t mfn) @@ -346,6 +472,25 @@ sh_unmap_domain_page_global(void *p) unmap_domain_page_global(p); } +static inline mfn_t +pagetable_get_mfn(pagetable_t pt) +{ + return _mfn(pagetable_get_pfn(pt)); +} + +static inline pagetable_t +pagetable_from_mfn(mfn_t mfn) +{ + return pagetable_from_pfn(mfn_x(mfn)); +} + + +/****************************************************************************** + * Log-dirty mode bitmap handling + */ + +extern void sh_mark_dirty(struct domain *d, mfn_t gmfn); + static inline int sh_mfn_is_dirty(struct domain *d, mfn_t gmfn) /* Is this guest page dirty? Call only in log-dirty mode. */ @@ -364,25 +509,6 @@ sh_mfn_is_dirty(struct domain *d, mfn_t gmfn) return 0; } -static inline int -sh_mfn_is_a_page_table(mfn_t gmfn) -{ - struct page_info *page = mfn_to_page(gmfn); - struct domain *owner; - unsigned long type_info; - - if ( !mfn_valid(gmfn) ) - return 0; - - owner = page_get_owner(page); - if ( owner && shadow_mode_refcounts(owner) - && (page->count_info & PGC_page_table) ) - return 1; - - type_info = page->u.inuse.type_info & PGT_type_mask; - return type_info && (type_info <= PGT_l4_page_table); -} - /**************************************************************************/ /* Shadow-page refcounting. */ diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h index 4aed70aa8c..e2edebe555 100644 --- a/xen/arch/x86/mm/shadow/types.h +++ b/xen/arch/x86/mm/shadow/types.h @@ -477,8 +477,8 @@ struct shadow_walk_t #define sh_gva_to_gpa INTERNAL_NAME(sh_gva_to_gpa) #define sh_gva_to_gfn INTERNAL_NAME(sh_gva_to_gfn) #define sh_update_cr3 INTERNAL_NAME(sh_update_cr3) -#define sh_remove_write_access INTERNAL_NAME(sh_remove_write_access) -#define sh_remove_all_mappings INTERNAL_NAME(sh_remove_all_mappings) +#define sh_rm_write_access_from_l1 INTERNAL_NAME(sh_rm_write_access_from_l1) +#define sh_rm_mappings_from_l1 INTERNAL_NAME(sh_rm_mappings_from_l1) #define sh_remove_l1_shadow INTERNAL_NAME(sh_remove_l1_shadow) #define sh_remove_l2_shadow INTERNAL_NAME(sh_remove_l2_shadow) #define sh_remove_l3_shadow INTERNAL_NAME(sh_remove_l3_shadow) diff --git a/xen/arch/x86/physdev.c b/xen/arch/x86/physdev.c index d24aad62aa..356e4079be 100644 --- a/xen/arch/x86/physdev.c +++ b/xen/arch/x86/physdev.c @@ -9,9 +9,14 @@ #include <xen/guest_access.h> #include <asm/current.h> #include <asm/smpboot.h> +#include <asm/hypercall.h> #include <public/xen.h> #include <public/physdev.h> +#ifndef COMPAT +typedef long ret_t; +#endif + int ioapic_guest_read( unsigned long physbase, unsigned int reg, u32 *pval); @@ -19,10 +24,10 @@ int ioapic_guest_write( unsigned long physbase, unsigned int reg, u32 pval); -long do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg) +ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg) { int irq; - long ret; + ret_t ret; switch ( cmd ) { @@ -129,7 +134,11 @@ long do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg) (set_iobitmap.nr_ports > 65536) ) break; ret = 0; +#ifndef COMPAT current->arch.iobmp = set_iobitmap.bitmap; +#else + guest_from_compat_handle(current->arch.iobmp, set_iobitmap.bitmap); +#endif current->arch.iobmp_limit = set_iobitmap.nr_ports; break; } diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c index 7d2ee6f496..766a357e65 100644 --- a/xen/arch/x86/platform_hypercall.c +++ b/xen/arch/x86/platform_hypercall.c @@ -23,11 +23,17 @@ #include <asm/mtrr.h> #include "cpu/mtrr/mtrr.h" -long do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op) +#ifndef COMPAT +typedef long ret_t; +DEFINE_SPINLOCK(xenpf_lock); +#else +extern spinlock_t xenpf_lock; +#endif + +ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op) { - long ret = 0; + ret_t ret = 0; struct xen_platform_op curop, *op = &curop; - static DEFINE_SPINLOCK(xenpf_lock); if ( !IS_PRIV(current->domain) ) return -EPERM; @@ -105,8 +111,15 @@ long do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op) case XENPF_microcode_update: { extern int microcode_update(XEN_GUEST_HANDLE(void), unsigned long len); +#ifndef COMPAT ret = microcode_update(op->u.microcode.data, op->u.microcode.length); +#else + XEN_GUEST_HANDLE(void) data; + + guest_from_compat_handle(data, op->u.microcode.data); + ret = microcode_update(data, op->u.microcode.length); +#endif } break; diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index 8d9f6f9e08..8770a29509 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -18,6 +18,10 @@ #include <xen/keyhandler.h> #include <xen/numa.h> #include <public/version.h> +#ifdef CONFIG_COMPAT +#include <compat/platform.h> +#include <compat/xen.h> +#endif #include <asm/bitops.h> #include <asm/smp.h> #include <asm/processor.h> @@ -544,7 +548,14 @@ void __init __start_xen(multiboot_info_t *mbi) BUILD_BUG_ON(sizeof(start_info_t) > PAGE_SIZE); BUILD_BUG_ON(sizeof(shared_info_t) > PAGE_SIZE); - BUILD_BUG_ON(sizeof(vcpu_info_t) != 64); + BUILD_BUG_ON(sizeof(struct vcpu_info) != 64); + +#ifdef CONFIG_COMPAT + BUILD_BUG_ON(sizeof(((struct compat_platform_op *)0)->u) != + sizeof(((struct compat_platform_op *)0)->u.pad)); + BUILD_BUG_ON(sizeof(start_info_compat_t) > PAGE_SIZE); + BUILD_BUG_ON(sizeof(struct compat_vcpu_info) != 64); +#endif /* Check definitions in public headers match internal defs. */ BUILD_BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START); @@ -791,6 +802,9 @@ void arch_get_xen_caps(xen_capabilities_info_t info) #elif defined(CONFIG_X86_64) p += sprintf(p, "xen-%d.%d-x86_64 ", major, minor); +#ifdef CONFIG_COMPAT + p += sprintf(p, "xen-%d.%d-x86_32p ", major, minor); +#endif if ( hvm_enabled ) { p += sprintf(p, "hvm-%d.%d-x86_32 ", major, minor); diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c index 0f265825bd..be96055290 100644 --- a/xen/arch/x86/sysctl.c +++ b/xen/arch/x86/sysctl.c @@ -25,10 +25,14 @@ #include <asm/hvm/support.h> #include <asm/processor.h> -long arch_do_sysctl( +#ifndef COMPAT +typedef long ret_t; +#endif + +ret_t arch_do_sysctl( struct xen_sysctl *sysctl, XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl) { - long ret = 0; + ret_t ret = 0; switch ( sysctl->cmd ) { diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c index 970a742efc..387b18c20a 100644 --- a/xen/arch/x86/time.c +++ b/xen/arch/x86/time.c @@ -41,7 +41,7 @@ boolean_param("hpet_force", opt_hpet_force); unsigned long cpu_khz; /* CPU clock frequency in kHz. */ unsigned long hpet_address; DEFINE_SPINLOCK(rtc_lock); -unsigned long volatile jiffies; +volatile unsigned long jiffies; static u32 wc_sec, wc_nsec; /* UTC time at last 'time update'. */ static DEFINE_SPINLOCK(wc_lock); @@ -148,7 +148,7 @@ void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs) ASSERT(local_irq_is_enabled()); /* Update jiffies counter. */ - (*(unsigned long *)&jiffies)++; + (*(volatile unsigned long *)&jiffies)++; /* Rough hack to allow accurate timers to sort-of-work with no APIC. */ if ( !cpu_has_apic ) @@ -676,7 +676,7 @@ static inline void __update_vcpu_system_time(struct vcpu *v) struct vcpu_time_info *u; t = &this_cpu(cpu_time); - u = &v->vcpu_info->time; + u = &vcpu_info(v, time); version_update_begin(&u->version); @@ -690,7 +690,7 @@ static inline void __update_vcpu_system_time(struct vcpu *v) void update_vcpu_system_time(struct vcpu *v) { - if ( v->vcpu_info->time.tsc_timestamp != + if ( vcpu_info(v, time.tsc_timestamp) != this_cpu(cpu_time).local_tsc_stamp ) __update_vcpu_system_time(v); } @@ -698,10 +698,10 @@ void update_vcpu_system_time(struct vcpu *v) void update_domain_wallclock_time(struct domain *d) { spin_lock(&wc_lock); - version_update_begin(&d->shared_info->wc_version); - d->shared_info->wc_sec = wc_sec + d->time_offset_seconds; - d->shared_info->wc_nsec = wc_nsec; - version_update_end(&d->shared_info->wc_version); + version_update_begin(&shared_info(d, wc_version)); + shared_info(d, wc_sec) = wc_sec + d->time_offset_seconds; + shared_info(d, wc_nsec) = wc_nsec; + version_update_end(&shared_info(d, wc_version)); spin_unlock(&wc_lock); } diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 606bc003c1..cf1defeae3 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -58,6 +58,7 @@ #include <asm/i387.h> #include <asm/debugger.h> #include <asm/msr.h> +#include <asm/shared.h> #include <asm/x86_emulate.h> #include <asm/hvm/vpt.h> @@ -115,22 +116,6 @@ integer_param("debug_stack_lines", debug_stack_lines); #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)regs->rsp) #endif -int is_kernel_text(unsigned long addr) -{ - extern char _stext, _etext; - if (addr >= (unsigned long) &_stext && - addr <= (unsigned long) &_etext) - return 1; - return 0; - -} - -unsigned long kernel_text_end(void) -{ - extern char _etext; - return (unsigned long) &_etext; -} - static void show_guest_stack(struct cpu_user_regs *regs) { int i; @@ -139,6 +124,12 @@ static void show_guest_stack(struct cpu_user_regs *regs) if ( is_hvm_vcpu(current) ) return; + if ( IS_COMPAT(container_of(regs, struct cpu_info, guest_cpu_user_regs)->current_vcpu->domain) ) + { + compat_show_guest_stack(regs, debug_stack_lines); + return; + } + if ( vm86_mode(regs) ) { stack = (unsigned long *)((regs->ss << 4) + (regs->esp & 0xffff)); @@ -187,7 +178,7 @@ static void show_trace(struct cpu_user_regs *regs) while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 ) { addr = *stack++; - if ( is_kernel_text(addr) ) + if ( is_kernel_text(addr) || is_kernel_inittext(addr) ) { printk("[<%p>]", _p(addr)); print_symbol(" %s\n ", addr); @@ -316,7 +307,7 @@ void show_stack_overflow(unsigned long esp) while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 ) { addr = *stack++; - if ( is_kernel_text(addr) ) + if ( is_kernel_text(addr) || is_kernel_inittext(addr) ) { printk("%p: [<%p>]", stack, _p(addr)); print_symbol(" %s\n ", addr); @@ -398,7 +389,7 @@ static int do_guest_trap( if ( TI_GET_IF(ti) ) tb->flags |= TBF_INTERRUPT; - if ( unlikely(null_trap_bounce(tb)) ) + if ( unlikely(null_trap_bounce(v, tb)) ) gdprintk(XENLOG_WARNING, "Unhandled %s fault/trap [#%d] in " "domain %d on VCPU %d [ec=%04x]\n", trapstr(trapnr), trapnr, v->domain->domain_id, v->vcpu_id, @@ -675,7 +666,7 @@ void propagate_page_fault(unsigned long addr, u16 error_code) struct trap_bounce *tb = &v->arch.trap_bounce; v->arch.guest_context.ctrlreg[2] = addr; - v->vcpu_info->arch.cr2 = addr; + arch_set_cr2(v, addr); /* Re-set error_code.user flag appropriately for the guest. */ error_code &= ~PFEC_user_mode; @@ -689,7 +680,7 @@ void propagate_page_fault(unsigned long addr, u16 error_code) tb->eip = ti->address; if ( TI_GET_IF(ti) ) tb->flags |= TBF_INTERRUPT; - if ( unlikely(null_trap_bounce(tb)) ) + if ( unlikely(null_trap_bounce(v, tb)) ) { printk("Unhandled page fault in domain %d on VCPU %d (ec=%04X)\n", v->domain->domain_id, v->vcpu_id, error_code); @@ -986,6 +977,64 @@ long do_fpu_taskswitch(int set) return 0; } +static int read_descriptor(unsigned int sel, + const struct vcpu *v, + const struct cpu_user_regs * regs, + unsigned long *base, + unsigned long *limit, + unsigned int *ar, + unsigned int vm86attr) +{ + struct desc_struct desc; + + if ( !vm86_mode(regs) ) + { + if ( sel < 4) + desc.b = desc.a = 0; + else if ( __get_user(desc, + (const struct desc_struct *)(!(sel & 4) + ? GDT_VIRT_START(v) + : LDT_VIRT_START(v)) + + (sel >> 3)) ) + return 0; + if ( !(vm86attr & _SEGMENT_CODE) ) + desc.b &= ~_SEGMENT_L; + } + else + { + desc.a = (sel << 20) | 0xffff; + desc.b = vm86attr | (sel >> 12); + } + + *ar = desc.b & 0x00f0ff00; + if ( !(desc.b & _SEGMENT_L) ) + { + *base = (desc.a >> 16) + ((desc.b & 0xff) << 16) + (desc.b & 0xff000000); + *limit = (desc.a & 0xffff) | (desc.b & 0x000f0000); + if ( desc.b & _SEGMENT_G ) + *limit = ((*limit + 1) << 12) - 1; +#ifndef NDEBUG + if ( !vm86_mode(regs) && sel > 3 ) + { + unsigned int a, l; + unsigned char valid; + + __asm__("larl %2, %0\n\tsetz %1" : "=r" (a), "=rm" (valid) : "rm" (sel)); + BUG_ON(valid && (a & 0x00f0ff00) != *ar); + __asm__("lsll %2, %0\n\tsetz %1" : "=r" (l), "=rm" (valid) : "rm" (sel)); + BUG_ON(valid && l != *limit); + } +#endif + } + else + { + *base = 0UL; + *limit = ~0UL; + } + + return 1; +} + /* Has the guest requested sufficient permission for this I/O access? */ static inline int guest_io_okay( unsigned int port, unsigned int bytes, @@ -1050,65 +1099,113 @@ unsigned long guest_to_host_gpr_switch(unsigned long) __attribute__((__regparm__(1))); /* Instruction fetch with error handling. */ -#define insn_fetch(_type, _size, cs, eip) \ -({ unsigned long _rc, _x, _ptr = eip; \ - if ( vm86_mode(regs) ) \ - _ptr += cs << 4; \ - if ( (_rc = copy_from_user(&_x, (_type *)_ptr, sizeof(_type))) != 0 ) \ +#define insn_fetch(type, base, eip, limit) \ +({ unsigned long _rc, _ptr = (base) + (eip); \ + type _x; \ + if ( (limit) < sizeof(_x) - 1 || (eip) > (limit) - (sizeof(_x) - 1) ) \ + goto fail; \ + if ( (_rc = copy_from_user(&_x, (type *)_ptr, sizeof(_x))) != 0 ) \ { \ - propagate_page_fault(eip + sizeof(_type) - _rc, 0); \ + propagate_page_fault(_ptr + sizeof(_x) - _rc, 0); \ return EXCRET_fault_fixed; \ } \ - eip += _size; (_type)_x; }) + (eip) += sizeof(_x); _x; }) + +#if defined(CONFIG_X86_32) +# define read_sreg(regs, sr) ((regs)->sr) +#elif defined(CONFIG_X86_64) +# define read_sreg(regs, sr) read_segment_register(sr) +#endif static int emulate_privileged_op(struct cpu_user_regs *regs) { struct vcpu *v = current; - unsigned long *reg, eip = regs->eip, cs = regs->cs, res; - u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0; - unsigned int port, i, op_bytes = 4, data, rc; + unsigned long *reg, eip = regs->eip, res; + u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0, rex = 0; + enum { lm_seg_none, lm_seg_fs, lm_seg_gs } lm_ovr = lm_seg_none; + unsigned int port, i, data_sel, ar, data, rc; + unsigned int op_bytes, op_default, ad_bytes, ad_default; +#define rd_ad(reg) (ad_bytes >= sizeof(regs->reg) \ + ? regs->reg \ + : ad_bytes == 4 \ + ? (u32)regs->reg \ + : (u16)regs->reg) +#define wr_ad(reg, val) (ad_bytes >= sizeof(regs->reg) \ + ? regs->reg = (val) \ + : ad_bytes == 4 \ + ? (*(u32 *)®s->reg = (val)) \ + : (*(u16 *)®s->reg = (val))) + unsigned long code_base, code_limit; char io_emul_stub[16]; void (*io_emul)(struct cpu_user_regs *) __attribute__((__regparm__(1))); u32 l, h; + if ( !read_descriptor(regs->cs, v, regs, + &code_base, &code_limit, &ar, + _SEGMENT_CODE|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P) ) + goto fail; + op_default = op_bytes = (ar & (_SEGMENT_L|_SEGMENT_DB)) ? 4 : 2; + ad_default = ad_bytes = (ar & _SEGMENT_L) ? 8 : op_default; + if ( !(ar & (_SEGMENT_CODE|_SEGMENT_S|_SEGMENT_P)) ) + goto fail; + + /* emulating only opcodes not allowing SS to be default */ + data_sel = read_sreg(regs, ds); + /* Legacy prefixes. */ - for ( i = 0; i < 8; i++ ) + for ( i = 0; i < 8; i++, rex == opcode || (rex = 0) ) { - switch ( opcode = insn_fetch(u8, 1, cs, eip) ) + switch ( opcode = insn_fetch(u8, code_base, eip, code_limit) ) { case 0x66: /* operand-size override */ - op_bytes ^= 6; /* switch between 2/4 bytes */ - break; + op_bytes = op_default ^ 6; /* switch between 2/4 bytes */ + continue; case 0x67: /* address-size override */ + ad_bytes = ad_default != 4 ? 4 : 2; /* switch to 2/4 bytes */ + continue; case 0x2e: /* CS override */ + data_sel = regs->cs; + continue; case 0x3e: /* DS override */ + data_sel = read_sreg(regs, ds); + continue; case 0x26: /* ES override */ + data_sel = read_sreg(regs, es); + continue; case 0x64: /* FS override */ + data_sel = read_sreg(regs, fs); + lm_ovr = lm_seg_fs; + continue; case 0x65: /* GS override */ + data_sel = read_sreg(regs, gs); + lm_ovr = lm_seg_gs; + continue; case 0x36: /* SS override */ + data_sel = regs->ss; + continue; case 0xf0: /* LOCK */ + continue; case 0xf2: /* REPNE/REPNZ */ - break; case 0xf3: /* REP/REPE/REPZ */ rep_prefix = 1; - break; + continue; default: - goto done_prefixes; + if ( (ar & _SEGMENT_L) && (opcode & 0xf0) == 0x40 ) + { + rex = opcode; + continue; + } + break; } + break; } - done_prefixes: -#ifdef __x86_64__ /* REX prefix. */ - if ( (opcode & 0xf0) == 0x40 ) - { - modrm_reg = (opcode & 4) << 1; /* REX.R */ - modrm_rm = (opcode & 1) << 3; /* REX.B */ - - /* REX.W and REX.X do not need to be decoded. */ - opcode = insn_fetch(u8, 1, cs, eip); - } -#endif + if ( rex & 8 ) /* REX.W */ + op_bytes = 4; /* emulating only opcodes not supporting 64-bit operands */ + modrm_reg = (rex & 4) << 1; /* REX.R */ + /* REX.X does not need to be decoded. */ + modrm_rm = (rex & 1) << 3; /* REX.B */ if ( opcode == 0x0f ) goto twobyte_opcode; @@ -1116,16 +1213,68 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) /* Input/Output String instructions. */ if ( (opcode >= 0x6c) && (opcode <= 0x6f) ) { - if ( rep_prefix && (regs->ecx == 0) ) + unsigned long data_base, data_limit; + + if ( rep_prefix && (rd_ad(ecx) == 0) ) goto done; + if ( !(opcode & 2) ) + { + data_sel = read_sreg(regs, es); + lm_ovr = lm_seg_none; + } + + if ( !(ar & _SEGMENT_L) ) + { + if ( !read_descriptor(data_sel, v, regs, + &data_base, &data_limit, &ar, + _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P) ) + goto fail; + if ( !(ar & (_SEGMENT_S|_SEGMENT_P)) || + (opcode & 2 ? + (ar & _SEGMENT_CODE) && !(ar & _SEGMENT_WR) : + (ar & _SEGMENT_CODE) || !(ar & _SEGMENT_WR)) ) + goto fail; + } +#ifdef CONFIG_X86_64 + else + { + if ( lm_ovr == lm_seg_none || data_sel < 4 ) + { + switch ( lm_ovr ) + { + case lm_seg_none: + data_base = 0UL; + break; + case lm_seg_fs: + data_base = v->arch.guest_context.fs_base; + break; + case lm_seg_gs: + if ( guest_kernel_mode(v, regs) ) + data_base = v->arch.guest_context.gs_base_kernel; + else + data_base = v->arch.guest_context.gs_base_user; + break; + } + } + else + read_descriptor(data_sel, v, regs, + &data_base, &data_limit, &ar, + 0); + data_limit = ~0UL; + ar = _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P; + } +#endif + continue_io_string: switch ( opcode ) { case 0x6c: /* INSB */ op_bytes = 1; case 0x6d: /* INSW/INSL */ - if ( !guest_io_okay((u16)regs->edx, op_bytes, v, regs) ) + if ( data_limit < op_bytes - 1 || + rd_ad(edi) > data_limit - (op_bytes - 1) || + !guest_io_okay((u16)regs->edx, op_bytes, v, regs) ) goto fail; port = (u16)regs->edx; switch ( op_bytes ) @@ -1143,24 +1292,26 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) data = (u32)(guest_inl_okay(port, v, regs) ? inl(port) : ~0); break; } - if ( (rc = copy_to_user((void *)regs->edi, &data, op_bytes)) != 0 ) + if ( (rc = copy_to_user((void *)data_base + rd_ad(edi), &data, op_bytes)) != 0 ) { - propagate_page_fault(regs->edi + op_bytes - rc, + propagate_page_fault(data_base + rd_ad(edi) + op_bytes - rc, PFEC_write_access); return EXCRET_fault_fixed; } - regs->edi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes); + wr_ad(edi, regs->edi + (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes)); break; case 0x6e: /* OUTSB */ op_bytes = 1; case 0x6f: /* OUTSW/OUTSL */ - if ( !guest_io_okay((u16)regs->edx, op_bytes, v, regs) ) + if ( data_limit < op_bytes - 1 || + rd_ad(esi) > data_limit - (op_bytes - 1) || + !guest_io_okay((u16)regs->edx, op_bytes, v, regs) ) goto fail; - rc = copy_from_user(&data, (void *)regs->esi, op_bytes); + rc = copy_from_user(&data, (void *)data_base + rd_ad(esi), op_bytes); if ( rc != 0 ) { - propagate_page_fault(regs->esi + op_bytes - rc, 0); + propagate_page_fault(data_base + rd_ad(esi) + op_bytes - rc, 0); return EXCRET_fault_fixed; } port = (u16)regs->edx; @@ -1181,11 +1332,11 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) outl((u32)data, port); break; } - regs->esi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes); + wr_ad(esi, regs->esi + (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes)); break; } - if ( rep_prefix && (--regs->ecx != 0) ) + if ( rep_prefix && (wr_ad(ecx, regs->ecx - 1) != 0) ) { if ( !hypercall_preempt_check() ) goto continue_io_string; @@ -1225,7 +1376,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) case 0xe4: /* IN imm8,%al */ op_bytes = 1; case 0xe5: /* IN imm8,%eax */ - port = insn_fetch(u8, 1, cs, eip); + port = insn_fetch(u8, code_base, eip, code_limit); io_emul_stub[7] = port; /* imm8 */ exec_in: if ( !guest_io_okay(port, op_bytes, v, regs) ) @@ -1267,7 +1418,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) case 0xe6: /* OUT %al,imm8 */ op_bytes = 1; case 0xe7: /* OUT %eax,imm8 */ - port = insn_fetch(u8, 1, cs, eip); + port = insn_fetch(u8, code_base, eip, code_limit); io_emul_stub[7] = port; /* imm8 */ exec_out: if ( !guest_io_okay(port, op_bytes, v, regs) ) @@ -1320,7 +1471,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) goto fail; /* Privileged (ring 0) instructions. */ - opcode = insn_fetch(u8, 1, cs, eip); + opcode = insn_fetch(u8, code_base, eip, code_limit); switch ( opcode ) { case 0x06: /* CLTS */ @@ -1338,7 +1489,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) break; case 0x20: /* MOV CR?,<reg> */ - opcode = insn_fetch(u8, 1, cs, eip); + opcode = insn_fetch(u8, code_base, eip, code_limit); modrm_reg |= (opcode >> 3) & 7; modrm_rm |= (opcode >> 0) & 7; reg = decode_register(modrm_rm, regs, 0); @@ -1354,8 +1505,14 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) break; case 3: /* Read CR3 */ - *reg = xen_pfn_to_cr3(mfn_to_gmfn( - v->domain, pagetable_get_pfn(v->arch.guest_table))); + if ( !IS_COMPAT(v->domain) ) + *reg = xen_pfn_to_cr3(mfn_to_gmfn( + v->domain, pagetable_get_pfn(v->arch.guest_table))); +#ifdef CONFIG_COMPAT + else + *reg = compat_pfn_to_cr3(mfn_to_gmfn( + v->domain, l4e_get_pfn(*(l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table))))); +#endif break; case 4: /* Read CR4 */ @@ -1372,7 +1529,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) break; case 0x21: /* MOV DR?,<reg> */ - opcode = insn_fetch(u8, 1, cs, eip); + opcode = insn_fetch(u8, code_base, eip, code_limit); modrm_reg |= (opcode >> 3) & 7; modrm_rm |= (opcode >> 0) & 7; reg = decode_register(modrm_rm, regs, 0); @@ -1382,7 +1539,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) break; case 0x22: /* MOV <reg>,CR? */ - opcode = insn_fetch(u8, 1, cs, eip); + opcode = insn_fetch(u8, code_base, eip, code_limit); modrm_reg |= (opcode >> 3) & 7; modrm_rm |= (opcode >> 0) & 7; reg = decode_register(modrm_rm, regs, 0); @@ -1400,12 +1557,17 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) case 2: /* Write CR2 */ v->arch.guest_context.ctrlreg[2] = *reg; - v->vcpu_info->arch.cr2 = *reg; + arch_set_cr2(v, *reg); break; case 3: /* Write CR3 */ LOCK_BIGLOCK(v->domain); - rc = new_guest_cr3(gmfn_to_mfn(v->domain, xen_cr3_to_pfn(*reg))); + if ( !IS_COMPAT(v->domain) ) + rc = new_guest_cr3(gmfn_to_mfn(v->domain, xen_cr3_to_pfn(*reg))); +#ifdef CONFIG_COMPAT + else + rc = new_guest_cr3(gmfn_to_mfn(v->domain, compat_cr3_to_pfn(*reg))); +#endif UNLOCK_BIGLOCK(v->domain); if ( rc == 0 ) /* not okay */ goto fail; @@ -1425,7 +1587,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) break; case 0x23: /* MOV <reg>,DR? */ - opcode = insn_fetch(u8, 1, cs, eip); + opcode = insn_fetch(u8, code_base, eip, code_limit); modrm_reg |= (opcode >> 3) & 7; modrm_rm |= (opcode >> 0) & 7; reg = decode_register(modrm_rm, regs, 0); @@ -1438,18 +1600,24 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) { #ifdef CONFIG_X86_64 case MSR_FS_BASE: + if ( IS_COMPAT(v->domain) ) + goto fail; if ( wrmsr_safe(MSR_FS_BASE, regs->eax, regs->edx) ) goto fail; v->arch.guest_context.fs_base = ((u64)regs->edx << 32) | regs->eax; break; case MSR_GS_BASE: + if ( IS_COMPAT(v->domain) ) + goto fail; if ( wrmsr_safe(MSR_GS_BASE, regs->eax, regs->edx) ) goto fail; v->arch.guest_context.gs_base_kernel = ((u64)regs->edx << 32) | regs->eax; break; case MSR_SHADOW_GS_BASE: + if ( IS_COMPAT(v->domain) ) + goto fail; if ( wrmsr_safe(MSR_SHADOW_GS_BASE, regs->eax, regs->edx) ) goto fail; v->arch.guest_context.gs_base_user = @@ -1474,14 +1642,20 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) { #ifdef CONFIG_X86_64 case MSR_FS_BASE: + if ( IS_COMPAT(v->domain) ) + goto fail; regs->eax = v->arch.guest_context.fs_base & 0xFFFFFFFFUL; regs->edx = v->arch.guest_context.fs_base >> 32; break; case MSR_GS_BASE: + if ( IS_COMPAT(v->domain) ) + goto fail; regs->eax = v->arch.guest_context.gs_base_kernel & 0xFFFFFFFFUL; regs->edx = v->arch.guest_context.gs_base_kernel >> 32; break; case MSR_SHADOW_GS_BASE: + if ( IS_COMPAT(v->domain) ) + goto fail; regs->eax = v->arch.guest_context.gs_base_user & 0xFFFFFFFFUL; regs->edx = v->arch.guest_context.gs_base_user >> 32; break; @@ -1510,6 +1684,9 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) goto fail; } +#undef wr_ad +#undef rd_ad + done: regs->eip = eip; return EXCRET_fault_fixed; @@ -1611,7 +1788,7 @@ static void nmi_dom0_report(unsigned int reason_idx) if ( ((d = dom0) == NULL) || ((v = d->vcpu[0]) == NULL) ) return; - set_bit(reason_idx, &d->shared_info->arch.nmi_reason); + set_bit(reason_idx, nmi_reason(d)); if ( test_and_set_bit(_VCPUF_nmi_pending, &v->vcpu_flags) ) raise_softirq(NMI_SOFTIRQ); /* not safe to wake up a vcpu here */ @@ -1801,6 +1978,13 @@ void set_tss_desc(unsigned int n, void *addr) (unsigned long)addr, offsetof(struct tss_struct, __cacheline_filler) - 1, 9); +#ifdef CONFIG_COMPAT + _set_tssldt_desc( + compat_gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY, + (unsigned long)addr, + offsetof(struct tss_struct, __cacheline_filler) - 1, + 11); +#endif } void __init trap_init(void) @@ -1875,7 +2059,7 @@ long do_set_trap_table(XEN_GUEST_HANDLE(trap_info_t) traps) if ( cur.address == 0 ) break; - fixup_guest_code_selector(cur.cs); + fixup_guest_code_selector(current->domain, cur.cs); memcpy(&dst[cur.vector], &cur, sizeof(cur)); diff --git a/xen/arch/x86/x86_32/mm.c b/xen/arch/x86/x86_32/mm.c index 4978b82dad..76303fc275 100644 --- a/xen/arch/x86/x86_32/mm.c +++ b/xen/arch/x86/x86_32/mm.c @@ -230,7 +230,7 @@ long do_stack_switch(unsigned long ss, unsigned long esp) int nr = smp_processor_id(); struct tss_struct *t = &init_tss[nr]; - fixup_guest_stack_selector(ss); + fixup_guest_stack_selector(current->domain, ss); current->arch.guest_context.kernel_ss = ss; current->arch.guest_context.kernel_sp = esp; @@ -241,7 +241,7 @@ long do_stack_switch(unsigned long ss, unsigned long esp) } /* Returns TRUE if given descriptor is valid for GDT or LDT. */ -int check_descriptor(struct desc_struct *d) +int check_descriptor(const struct domain *dom, struct desc_struct *d) { unsigned long base, limit; u32 a = d->a, b = d->b; @@ -261,8 +261,8 @@ int check_descriptor(struct desc_struct *d) * gates (consider a call gate pointing at another kernel descriptor with * DPL 0 -- this would get the OS ring-0 privileges). */ - if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL << 13) ) - d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL << 13); + if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL(dom) << 13) ) + d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL(dom) << 13); if ( !(b & _SEGMENT_S) ) { @@ -284,8 +284,8 @@ int check_descriptor(struct desc_struct *d) /* Validate and fix up the target code selector. */ cs = a >> 16; - fixup_guest_code_selector(cs); - if ( !guest_gate_selector_okay(cs) ) + fixup_guest_code_selector(dom, cs); + if ( !guest_gate_selector_okay(dom, cs) ) goto bad; a = d->a = (d->a & 0xffffU) | (cs << 16); diff --git a/xen/arch/x86/x86_32/seg_fixup.c b/xen/arch/x86/x86_32/seg_fixup.c index 643a1eec1f..7225717742 100644 --- a/xen/arch/x86/x86_32/seg_fixup.c +++ b/xen/arch/x86/x86_32/seg_fixup.c @@ -268,7 +268,7 @@ int gpf_emulate_4gb(struct cpu_user_regs *regs) struct trap_info *ti; struct trap_bounce *tb; u8 modrm, mod, reg, rm, decode; - void *memreg, *regreg; + void *memreg; unsigned long offset; u8 disp8; u32 disp32 = 0; @@ -384,8 +384,7 @@ int gpf_emulate_4gb(struct cpu_user_regs *regs) goto fixme; } - /* Decode Reg and R/M fields. */ - regreg = decode_register(reg, regs, 0); + /* Decode R/M field. */ memreg = decode_register(rm, regs, 0); /* Decode Mod field. */ diff --git a/xen/arch/x86/x86_32/traps.c b/xen/arch/x86/x86_32/traps.c index d5d1e5e8f7..e6cd085962 100644 --- a/xen/arch/x86/x86_32/traps.c +++ b/xen/arch/x86/x86_32/traps.c @@ -193,7 +193,7 @@ unsigned long do_iret(void) /* * Pop, fix up and restore EFLAGS. We fix up in a local staging area - * to avoid firing the BUG_ON(IOPL) check in arch_getdomaininfo_ctxt. + * to avoid firing the BUG_ON(IOPL) check in arch_get_info_guest. */ if ( unlikely(__copy_from_user(&eflags, (void __user *)regs->esp, 4)) ) goto exit_and_crash; @@ -296,7 +296,7 @@ void init_int80_direct_trap(struct vcpu *v) * switch to the Xen stack and we need to swap back to the guest * kernel stack before passing control to the system call entry point. */ - if ( TI_GET_IF(ti) || !guest_gate_selector_okay(ti->cs) || + if ( TI_GET_IF(ti) || !guest_gate_selector_okay(v->domain, ti->cs) || supervisor_mode_kernel ) { v->arch.int80_desc.a = v->arch.int80_desc.b = 0; @@ -326,7 +326,7 @@ static long register_guest_callback(struct callback_register *reg) long ret = 0; struct vcpu *v = current; - fixup_guest_code_selector(reg->address.cs); + fixup_guest_code_selector(v->domain, reg->address.cs); switch ( reg->type ) { diff --git a/xen/arch/x86/x86_32/xen.lds.S b/xen/arch/x86/x86_32/xen.lds.S index a44bf8701b..16fa9c895e 100644 --- a/xen/arch/x86/x86_32/xen.lds.S +++ b/xen/arch/x86/x86_32/xen.lds.S @@ -19,7 +19,8 @@ PHDRS SECTIONS { . = 0xFF000000 + 0x100000; - _text = .; /* Text and read-only data */ + _start = .; + _stext = .; /* Text and read-only data */ .text : { *(.text) *(.fixup) @@ -51,14 +52,16 @@ SECTIONS . = ALIGN(4096); /* Init code and data */ __init_begin = .; - .text.init : { *(.text.init) } :text - .data.init : { *(.data.init) } :text + _sinittext = .; + .init.text : { *(.init.text) } :text + _einittext = .; + .init.data : { *(.init.data) } :text . = ALIGN(32); __setup_start = .; - .setup.init : { *(.setup.init) } :text + .init.setup : { *(.init.setup) } :text __setup_end = .; __initcall_start = .; - .initcall.init : { *(.initcall.init) } :text + .initcall.init : { *(.initcall1.init) } :text __initcall_end = .; . = ALIGN(PAGE_SIZE); __init_end = .; @@ -80,8 +83,8 @@ SECTIONS /* Sections to be discarded */ /DISCARD/ : { - *(.text.exit) - *(.data.exit) + *(.exit.text) + *(.exit.data) *(.exitcall.exit) } diff --git a/xen/arch/x86/x86_64/Makefile b/xen/arch/x86/x86_64/Makefile index 3c43c696d1..a2985ff0d3 100644 --- a/xen/arch/x86/x86_64/Makefile +++ b/xen/arch/x86/x86_64/Makefile @@ -2,3 +2,22 @@ obj-y += entry.o obj-y += gpr_switch.o obj-y += mm.o obj-y += traps.o + +obj-$(CONFIG_COMPAT) += compat.o +obj-$(CONFIG_COMPAT) += domain.o +obj-$(CONFIG_COMPAT) += domctl.o +obj-$(CONFIG_COMPAT) += physdev.o +obj-$(CONFIG_COMPAT) += platform_hypercall.o +obj-$(CONFIG_COMPAT) += sysctl.o + +ifeq ($(CONFIG_COMPAT),y) +# extra dependencies +compat.o: ../compat.c +domctl.o: ../domctl.c +entry.o: compat/entry.S +mm.o: compat/mm.c +physdev.o: ../physdev.c +platform_hypercall.o: ../platform_hypercall.c +sysctl.o: ../sysctl.c +traps.o: compat/traps.c +endif diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c index 41f93fe9c5..001a781235 100644 --- a/xen/arch/x86/x86_64/asm-offsets.c +++ b/xen/arch/x86/x86_64/asm-offsets.c @@ -7,6 +7,9 @@ #include <xen/config.h> #include <xen/perfc.h> #include <xen/sched.h> +#ifdef CONFIG_COMPAT +#include <compat/xen.h> +#endif #include <asm/fixmap.h> #include <asm/hardirq.h> @@ -53,17 +56,22 @@ void __dummy__(void) BLANK(); OFFSET(VCPU_processor, struct vcpu, processor); + OFFSET(VCPU_domain, struct vcpu, domain); OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info); OFFSET(VCPU_trap_bounce, struct vcpu, arch.trap_bounce); OFFSET(VCPU_thread_flags, struct vcpu, arch.flags); OFFSET(VCPU_event_addr, struct vcpu, arch.guest_context.event_callback_eip); + OFFSET(VCPU_event_sel, struct vcpu, + arch.guest_context.event_callback_cs); OFFSET(VCPU_failsafe_addr, struct vcpu, arch.guest_context.failsafe_callback_eip); + OFFSET(VCPU_failsafe_sel, struct vcpu, + arch.guest_context.failsafe_callback_cs); OFFSET(VCPU_syscall_addr, struct vcpu, arch.guest_context.syscall_callback_eip); - OFFSET(VCPU_kernel_sp, struct vcpu, - arch.guest_context.kernel_sp); + OFFSET(VCPU_kernel_sp, struct vcpu, arch.guest_context.kernel_sp); + OFFSET(VCPU_kernel_ss, struct vcpu, arch.guest_context.kernel_ss); OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags); OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt); OFFSET(VCPU_flags, struct vcpu, vcpu_flags); @@ -83,14 +91,25 @@ void __dummy__(void) OFFSET(VCPU_vmx_cr2, struct vcpu, arch.hvm_vmx.cpu_cr2); BLANK(); + OFFSET(DOMAIN_domain_flags, struct domain, domain_flags); + DEFINE(_DOMF_compat, _DOMF_compat); + BLANK(); + OFFSET(VMCB_rax, struct vmcb_struct, rax); OFFSET(VMCB_tsc_offset, struct vmcb_struct, tsc_offset); BLANK(); - OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending); - OFFSET(VCPUINFO_upcall_mask, vcpu_info_t, evtchn_upcall_mask); + OFFSET(VCPUINFO_upcall_pending, struct vcpu_info, evtchn_upcall_pending); + OFFSET(VCPUINFO_upcall_mask, struct vcpu_info, evtchn_upcall_mask); BLANK(); +#ifdef CONFIG_COMPAT + OFFSET(COMPAT_VCPUINFO_upcall_pending, struct compat_vcpu_info, evtchn_upcall_pending); + OFFSET(COMPAT_VCPUINFO_upcall_mask, struct compat_vcpu_info, evtchn_upcall_mask); + BLANK(); +#endif + + OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu); DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); BLANK(); @@ -116,5 +135,17 @@ void __dummy__(void) OFFSET(MULTICALL_result, struct multicall_entry, result); BLANK(); +#ifdef CONFIG_COMPAT + OFFSET(COMPAT_MULTICALL_op, struct compat_multicall_entry, op); + OFFSET(COMPAT_MULTICALL_arg0, struct compat_multicall_entry, args[0]); + OFFSET(COMPAT_MULTICALL_arg1, struct compat_multicall_entry, args[1]); + OFFSET(COMPAT_MULTICALL_arg2, struct compat_multicall_entry, args[2]); + OFFSET(COMPAT_MULTICALL_arg3, struct compat_multicall_entry, args[3]); + OFFSET(COMPAT_MULTICALL_arg4, struct compat_multicall_entry, args[4]); + OFFSET(COMPAT_MULTICALL_arg5, struct compat_multicall_entry, args[5]); + OFFSET(COMPAT_MULTICALL_result, struct compat_multicall_entry, result); + BLANK(); +#endif + DEFINE(IRQSTAT_shift, LOG_2(sizeof(irq_cpustat_t))); } diff --git a/xen/arch/x86/x86_64/compat.c b/xen/arch/x86/x86_64/compat.c new file mode 100644 index 0000000000..2a14d18a13 --- /dev/null +++ b/xen/arch/x86/x86_64/compat.c @@ -0,0 +1,30 @@ +/****************************************************************************** + * compat.c + */ + +#include <xen/config.h> +#include <xen/hypercall.h> +#include <compat/xen.h> +#include <compat/physdev.h> + +DEFINE_XEN_GUEST_HANDLE(physdev_op_compat_t); +#define physdev_op compat_physdev_op +#define physdev_op_t physdev_op_compat_t +#define do_physdev_op compat_physdev_op +#define do_physdev_op_compat(x) compat_physdev_op_compat(_##x) + +#define COMPAT +#define _XEN_GUEST_HANDLE(t) XEN_GUEST_HANDLE(t) +typedef int ret_t; + +#include "../compat.c" + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S new file mode 100644 index 0000000000..25ea08bacf --- /dev/null +++ b/xen/arch/x86/x86_64/compat/entry.S @@ -0,0 +1,365 @@ +/* + * Compatibility hypercall routines. + */ + +#include <asm/desc.h> + +.text + +ENTRY(compat_hypercall) + pushq $0 + movl $TRAP_syscall,4(%rsp) + SAVE_ALL + GET_CURRENT(%rbx) + + cmpl $NR_hypercalls,%eax + jae compat_bad_hypercall +#ifndef NDEBUG + /* Deliberately corrupt parameter regs not used by this hypercall. */ + pushq UREGS_rbx(%rsp); pushq %rcx; pushq %rdx; pushq %rsi; pushq %rdi; pushq UREGS_rbp+5*8(%rsp) + leaq compat_hypercall_args_table(%rip),%r10 + movq $6,%rcx + subb (%r10,%rax,1),%cl + movq %rsp,%rdi + movl $0xDEADBEEF,%eax + rep stosq + popq %r9 ; popq %r8 ; popq %rcx; popq %rdx; popq %rsi; popq %rdi + movl UREGS_rax(%rsp),%eax + pushq %rax + pushq UREGS_rip+8(%rsp) +#else + movl %eax,%eax + movl %ebp,%r9d + movl %edi,%r8d + xchgl %ecx,%esi + movl UREGS_rbx(%rsp),%edi +#endif + leaq compat_hypercall_table(%rip),%r10 + PERFC_INCR(PERFC_hypercalls, %rax) + callq *(%r10,%rax,8) +#ifndef NDEBUG + /* Deliberately corrupt parameter regs used by this hypercall. */ + popq %r10 # Shadow RIP + cmpq %r10,UREGS_rip+8(%rsp) + popq %rcx # Shadow hypercall index + jne compat_skip_clobber /* If RIP has changed then don't clobber. */ + leaq compat_hypercall_args_table(%rip),%r10 + movb (%r10,%rcx,1),%cl + movl $0xDEADBEEF,%r10d + testb %cl,%cl; jz compat_skip_clobber; movl %r10d,UREGS_rbx(%rsp) + cmpb $2, %cl; jb compat_skip_clobber; movl %r10d,UREGS_rcx(%rsp) + cmpb $3, %cl; jb compat_skip_clobber; movl %r10d,UREGS_rdx(%rsp) + cmpb $4, %cl; jb compat_skip_clobber; movl %r10d,UREGS_rsi(%rsp) + cmpb $5, %cl; jb compat_skip_clobber; movl %r10d,UREGS_rdi(%rsp) + cmpb $6, %cl; jb compat_skip_clobber; movl %r10d,UREGS_rbp(%rsp) +compat_skip_clobber: +#endif + movl %eax,UREGS_rax(%rsp) # save the return value + +/* %rbx: struct vcpu */ +compat_test_all_events: + cli # tests must not race interrupts +/*compat_test_softirqs:*/ + movl VCPU_processor(%rbx),%eax + shlq $IRQSTAT_shift,%rax + leaq irq_stat(%rip),%rcx + testl $~0,(%rcx,%rax,1) + jnz compat_process_softirqs + btrq $_VCPUF_nmi_pending,VCPU_flags(%rbx) + jc compat_process_nmi +compat_test_guest_events: + movq VCPU_vcpu_info(%rbx),%rax + testb $0xFF,COMPAT_VCPUINFO_upcall_mask(%rax) + jnz compat_restore_all_guest + testb $0xFF,COMPAT_VCPUINFO_upcall_pending(%rax) + jz compat_restore_all_guest +/*compat_process_guest_events:*/ + sti + leaq VCPU_trap_bounce(%rbx),%rdx + movl VCPU_event_addr(%rbx),%eax + movl %eax,TRAPBOUNCE_eip(%rdx) + movl VCPU_event_sel(%rbx),%eax + movl %eax,TRAPBOUNCE_cs(%rdx) + movw $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx) + call compat_create_bounce_frame + jmp compat_test_all_events + + ALIGN +/* %rbx: struct vcpu */ +compat_process_softirqs: + sti + call do_softirq + jmp compat_test_all_events + + ALIGN +/* %rbx: struct vcpu */ +compat_process_nmi: + movl VCPU_nmi_addr(%rbx),%eax + testl %eax,%eax + jz compat_test_all_events + btsq $_VCPUF_nmi_masked,VCPU_flags(%rbx) + jc 1f + sti + leaq VCPU_trap_bounce(%rbx),%rdx + movl %eax,TRAPBOUNCE_eip(%rdx) + movl $FLAT_COMPAT_KERNEL_CS,TRAPBOUNCE_cs(%rdx) + movw $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx) + call compat_create_bounce_frame + jmp compat_test_all_events +1: + btsq $_VCPUF_nmi_pending,VCPU_flags(%rbx) + jmp compat_test_guest_events + +compat_bad_hypercall: + movl $-ENOSYS,UREGS_rax(%rsp) + jmp compat_test_all_events + +/* %rbx: struct vcpu, interrupts disabled */ +compat_restore_all_guest: + RESTORE_ALL + addq $8,%rsp +CFLT0: iretq + +.section .fixup,"ax" +CFIX0: popq -15*8-8(%rsp) # error_code/entry_vector + SAVE_ALL # 15*8 bytes pushed + movq -8(%rsp),%rsi # error_code/entry_vector + sti # after stack abuse (-1024(%rsp)) + pushq $__HYPERVISOR_DS # SS + leaq 8(%rsp),%rax + pushq %rax # RSP + pushfq # RFLAGS + pushq $__HYPERVISOR_CS # CS + leaq CDBLFLT0(%rip),%rax + pushq %rax # RIP + pushq %rsi # error_code/entry_vector + jmp handle_exception +CDBLFLT0:GET_CURRENT(%rbx) + jmp compat_test_all_events +compat_failsafe_callback: + GET_CURRENT(%rbx) + leaq VCPU_trap_bounce(%rbx),%rdx + movl VCPU_failsafe_addr(%rbx),%eax + movl %eax,TRAPBOUNCE_eip(%rdx) + movl VCPU_failsafe_sel(%rbx),%eax + movl %eax,TRAPBOUNCE_cs(%rdx) + movw $TBF_FAILSAFE,TRAPBOUNCE_flags(%rdx) + btq $_VGCF_failsafe_disables_events,VCPU_guest_context_flags(%rbx) + jnc 1f + orw $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx) +1: + call compat_create_bounce_frame + jmp compat_test_all_events +.previous +.section __pre_ex_table,"a" + .quad CFLT0,CFIX0 +.previous +.section __ex_table,"a" + .quad CDBLFLT0,compat_failsafe_callback +.previous + +/* %rdx: trap_bounce, %rbx: struct vcpu */ +compat_post_handle_exception: + testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx) + jz compat_test_all_events + call compat_create_bounce_frame + jmp compat_test_all_events + +/* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK: */ +/* {[ERRCODE,] EIP, CS, EFLAGS, [ESP, SS]} */ +/* %rdx: trap_bounce, %rbx: struct vcpu */ +/* On return only %rbx is guaranteed non-clobbered. */ +compat_create_bounce_frame: + mov %fs,%edi + testb $2,UREGS_cs+8(%rsp) + jz 1f + /* Push new frame at registered guest-OS stack base. */ + movl VCPU_kernel_sp(%rbx),%esi +CFLT1: mov VCPU_kernel_ss(%rbx),%fs + subl $2*4,%esi + movl UREGS_rsp+8(%rsp),%eax +CFLT2: movl %eax,%fs:(%rsi) + movl UREGS_ss+8(%rsp),%eax +CFLT3: movl %eax,%fs:4(%rsi) + jmp 2f +1: /* In kernel context already: push new frame at existing %rsp. */ + movl UREGS_rsp+8(%rsp),%esi +CFLT4: mov UREGS_ss+8(%rsp),%fs +2: + movb TRAPBOUNCE_flags(%rdx),%cl + subl $3*4,%esi + movq VCPU_vcpu_info(%rbx),%rax + pushq COMPAT_VCPUINFO_upcall_mask(%rax) + testb $TBF_INTERRUPT,%cl + setnz %ch # TBF_INTERRUPT -> set upcall mask + orb %ch,COMPAT_VCPUINFO_upcall_mask(%rax) + popq %rax + shll $16,%eax # Bits 16-23: saved_upcall_mask + movw UREGS_cs+8(%rsp),%ax # Bits 0-15: CS +CFLT5: movl %eax,%fs:4(%rsi) # CS / saved_upcall_mask + shrl $16,%eax + testb %al,%al # Bits 0-7: saved_upcall_mask + setz %ch # %ch == !saved_upcall_mask + movl UREGS_eflags+8(%rsp),%eax + andl $~X86_EFLAGS_IF,%eax + shlb $1,%ch # Bit 9 (EFLAGS.IF) + orb %ch,%ah # Fold EFLAGS.IF into %eax +CFLT6: movl %eax,%fs:2*4(%rsi) # EFLAGS + movl UREGS_rip+8(%rsp),%eax +CFLT7: movl %eax,%fs:(%rsi) # EIP + testb $TBF_EXCEPTION_ERRCODE,%cl + jz 1f + subl $4,%esi + movl TRAPBOUNCE_error_code(%rdx),%eax +CFLT8: movl %eax,%fs:(%rsi) # ERROR CODE +1: + testb $TBF_FAILSAFE,%cl + jz 2f + subl $4*4,%esi + movl %gs,%eax +CFLT9: movl %eax,%fs:3*4(%rsi) # GS +CFLT10: movl %edi,%fs:2*4(%rsi) # FS + movl %es,%eax +CFLT11: movl %eax,%fs:1*4(%rsi) # ES + movl %ds,%eax +CFLT12: movl %eax,%fs:0*4(%rsi) # DS +2: + /* Rewrite our stack frame and return to guest-OS mode. */ + /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */ + movl $TRAP_syscall,UREGS_entry_vector+8(%rsp) + andl $~(X86_EFLAGS_VM|X86_EFLAGS_RF|\ + X86_EFLAGS_NT|X86_EFLAGS_TF),UREGS_eflags+8(%rsp) + mov %fs,UREGS_ss+8(%rsp) + movl %esi,UREGS_rsp+8(%rsp) +CFLT13: mov %edi,%fs + movzwl TRAPBOUNCE_cs(%rdx),%eax + /* Null selectors (0-3) are not allowed. */ + testl $~3,%eax + jz domain_crash_synchronous + movl %eax,UREGS_cs+8(%rsp) + movl TRAPBOUNCE_eip(%rdx),%eax + movl %eax,UREGS_rip+8(%rsp) + movb $0,TRAPBOUNCE_flags(%rdx) + ret +.section .fixup,"ax" +CFIX13: + xorl %edi,%edi + jmp CFLT13 +.previous +.section __ex_table,"a" + .quad CFLT1,domain_crash_synchronous , CFLT2,compat_crash_page_fault + .quad CFLT3,compat_crash_page_fault_4 , CFLT4,domain_crash_synchronous + .quad CFLT5,compat_crash_page_fault_4 , CFLT6,compat_crash_page_fault_8 + .quad CFLT7,compat_crash_page_fault , CFLT8,compat_crash_page_fault + .quad CFLT9,compat_crash_page_fault_12, CFLT10,compat_crash_page_fault_8 + .quad CFLT11,compat_crash_page_fault_4 , CFLT12,compat_crash_page_fault + .quad CFLT13,CFIX13 +.previous + +compat_crash_page_fault_12: + addl $4,%esi +compat_crash_page_fault_8: + addl $4,%esi +compat_crash_page_fault_4: + addl $4,%esi +compat_crash_page_fault: +CFLT14: mov %edi,%fs + movl %esi,%edi + call show_page_walk + jmp domain_crash_synchronous +.section .fixup,"ax" +CFIX14: + xorl %edi,%edi + jmp CFLT14 +.previous +.section __ex_table,"a" + .quad CFLT14,CFIX14 +.previous + +.section .rodata, "a", @progbits + +ENTRY(compat_hypercall_table) + .quad compat_set_trap_table /* 0 */ + .quad do_mmu_update + .quad compat_set_gdt + .quad do_stack_switch + .quad compat_set_callbacks + .quad do_fpu_taskswitch /* 5 */ + .quad do_sched_op_compat + .quad compat_platform_op + .quad do_set_debugreg + .quad do_get_debugreg + .quad compat_update_descriptor /* 10 */ + .quad compat_ni_hypercall + .quad compat_memory_op + .quad compat_multicall + .quad compat_update_va_mapping + .quad compat_set_timer_op /* 15 */ + .quad do_event_channel_op_compat + .quad compat_xen_version + .quad do_console_io + .quad compat_physdev_op_compat + .quad compat_grant_table_op /* 20 */ + .quad compat_vm_assist + .quad compat_update_va_mapping_otherdomain + .quad compat_iret + .quad compat_vcpu_op + .quad compat_ni_hypercall /* 25 */ + .quad compat_mmuext_op + .quad compat_acm_op + .quad compat_nmi_op + .quad compat_sched_op + .quad compat_callback_op /* 30 */ + .quad compat_xenoprof_op + .quad do_event_channel_op + .quad compat_physdev_op + .quad do_hvm_op + .quad compat_sysctl /* 35 */ + .quad compat_domctl + .quad compat_kexec_op + .rept NR_hypercalls-((.-compat_hypercall_table)/8) + .quad compat_ni_hypercall + .endr + +ENTRY(compat_hypercall_args_table) + .byte 1 /* compat_set_trap_table */ /* 0 */ + .byte 4 /* compat_mmu_update */ + .byte 2 /* compat_set_gdt */ + .byte 2 /* compat_stack_switch */ + .byte 4 /* compat_set_callbacks */ + .byte 1 /* compat_fpu_taskswitch */ /* 5 */ + .byte 2 /* compat_sched_op_compat */ + .byte 1 /* compat_platform_op */ + .byte 2 /* compat_set_debugreg */ + .byte 1 /* compat_get_debugreg */ + .byte 4 /* compat_update_descriptor */ /* 10 */ + .byte 0 /* compat_ni_hypercall */ + .byte 2 /* compat_memory_op */ + .byte 2 /* compat_multicall */ + .byte 4 /* compat_update_va_mapping */ + .byte 2 /* compat_set_timer_op */ /* 15 */ + .byte 1 /* compat_event_channel_op_compat */ + .byte 2 /* compat_xen_version */ + .byte 3 /* compat_console_io */ + .byte 1 /* compat_physdev_op_compat */ + .byte 3 /* compat_grant_table_op */ /* 20 */ + .byte 2 /* compat_vm_assist */ + .byte 5 /* compat_update_va_mapping_otherdomain */ + .byte 0 /* compat_iret */ + .byte 3 /* compat_vcpu_op */ + .byte 0 /* compat_ni_hypercall */ /* 25 */ + .byte 4 /* compat_mmuext_op */ + .byte 1 /* compat_acm_op */ + .byte 2 /* compat_nmi_op */ + .byte 2 /* compat_sched_op */ + .byte 2 /* compat_callback_op */ /* 30 */ + .byte 2 /* compat_xenoprof_op */ + .byte 2 /* compat_event_channel_op */ + .byte 2 /* compat_physdev_op */ + .byte 2 /* do_hvm_op */ + .byte 1 /* compat_sysctl */ /* 35 */ + .byte 1 /* compat_domctl */ + .byte 2 /* compat_kexec_op */ + .rept NR_hypercalls-(.-compat_hypercall_args_table) + .byte 0 /* compat_ni_hypercall */ + .endr diff --git a/xen/arch/x86/x86_64/compat/mm.c b/xen/arch/x86/x86_64/compat/mm.c new file mode 100644 index 0000000000..ae833ff4a6 --- /dev/null +++ b/xen/arch/x86/x86_64/compat/mm.c @@ -0,0 +1,329 @@ +#ifdef CONFIG_COMPAT + +#include <xen/event.h> +#include <compat/memory.h> +#include <compat/xen.h> + +int compat_set_gdt(XEN_GUEST_HANDLE(uint) frame_list, unsigned int entries) +{ + unsigned int i, nr_pages = (entries + 511) / 512; + unsigned long frames[16]; + long ret; + + /* Rechecked in set_gdt, but ensures a sane limit for copy_from_user(). */ + if ( entries > FIRST_RESERVED_GDT_ENTRY ) + return -EINVAL; + + if ( !guest_handle_okay(frame_list, nr_pages) ) + return -EFAULT; + + for ( i = 0; i < nr_pages; ++i ) + { + unsigned int frame; + + if ( __copy_from_guest(&frame, frame_list, 1) ) + return -EFAULT; + frames[i] = frame; + guest_handle_add_offset(frame_list, 1); + } + + LOCK_BIGLOCK(current->domain); + + if ( (ret = set_gdt(current, frames, entries)) == 0 ) + local_flush_tlb(); + + UNLOCK_BIGLOCK(current->domain); + + return ret; +} + +int compat_update_descriptor(u32 pa_lo, u32 pa_hi, u32 desc_lo, u32 desc_hi) +{ + return do_update_descriptor(pa_lo | ((u64)pa_hi << 32), + desc_lo | ((u64)desc_hi << 32)); +} + +int compat_arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) +{ + struct compat_machphys_mfn_list xmml; + l2_pgentry_t l2e; + unsigned long v; + compat_pfn_t mfn; + unsigned int i; + int rc = 0; + + switch ( op ) + { + case XENMEM_add_to_physmap: + { + struct compat_add_to_physmap cmp; + struct xen_add_to_physmap *nat = (void *)COMPAT_ARG_XLAT_VIRT_START(current->vcpu_id); + + if ( copy_from_guest(&cmp, arg, 1) ) + return -EFAULT; + + XLAT_add_to_physmap(nat, &cmp); + rc = arch_memory_op(op, guest_handle_from_ptr(nat, void)); + + break; + } + + case XENMEM_set_memory_map: + { + struct compat_foreign_memory_map cmp; + struct xen_foreign_memory_map *nat = (void *)COMPAT_ARG_XLAT_VIRT_START(current->vcpu_id); + + if ( copy_from_guest(&cmp, arg, 1) ) + return -EFAULT; + +#define XLAT_memory_map_HNDL_buffer(_d_, _s_) \ + guest_from_compat_handle((_d_)->buffer, (_s_)->buffer) + XLAT_foreign_memory_map(nat, &cmp); +#undef XLAT_memory_map_HNDL_buffer + + rc = arch_memory_op(op, guest_handle_from_ptr(nat, void)); + + break; + } + + case XENMEM_memory_map: + case XENMEM_machine_memory_map: + { + struct compat_memory_map cmp; + struct xen_memory_map *nat = (void *)COMPAT_ARG_XLAT_VIRT_START(current->vcpu_id); + + if ( copy_from_guest(&cmp, arg, 1) ) + return -EFAULT; + +#define XLAT_memory_map_HNDL_buffer(_d_, _s_) \ + guest_from_compat_handle((_d_)->buffer, (_s_)->buffer) + XLAT_memory_map(nat, &cmp); +#undef XLAT_memory_map_HNDL_buffer + + rc = arch_memory_op(op, guest_handle_from_ptr(nat, void)); + if ( rc < 0 ) + break; + +#define XLAT_memory_map_HNDL_buffer(_d_, _s_) ((void)0) + XLAT_memory_map(&cmp, nat); +#undef XLAT_memory_map_HNDL_buffer + if ( copy_to_guest(arg, &cmp, 1) ) + rc = -EFAULT; + + break; + } + + case XENMEM_machphys_mapping: + { + struct domain *d = current->domain; + struct compat_machphys_mapping mapping = { + .v_start = MACH2PHYS_COMPAT_VIRT_START(d), + .v_end = MACH2PHYS_COMPAT_VIRT_END, + .max_mfn = MACH2PHYS_COMPAT_NR_ENTRIES(d) - 1 + }; + + if ( copy_to_guest(arg, &mapping, 1) ) + rc = -EFAULT; + + break; + } + + case XENMEM_machphys_mfn_list: + if ( copy_from_guest(&xmml, arg, 1) ) + return -EFAULT; + + for ( i = 0, v = RDWR_COMPAT_MPT_VIRT_START; + (i != xmml.max_extents) && (v != RDWR_COMPAT_MPT_VIRT_END); + i++, v += 1 << L2_PAGETABLE_SHIFT ) + { + l2e = compat_idle_pg_table_l2[l2_table_offset(v)]; + if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) + break; + mfn = l2e_get_pfn(l2e) + l1_table_offset(v); + if ( copy_to_compat_offset(xmml.extent_start, i, &mfn, 1) ) + return -EFAULT; + } + + xmml.nr_extents = i; + if ( copy_to_guest(arg, &xmml, 1) ) + rc = -EFAULT; + + break; + + default: + rc = -ENOSYS; + break; + } + + return rc; +} + +int compat_update_va_mapping(unsigned int va, u32 lo, u32 hi, + unsigned int flags) +{ + return do_update_va_mapping(va, lo | ((u64)hi << 32), flags); +} + +int compat_update_va_mapping_otherdomain(unsigned long va, u32 lo, u32 hi, + unsigned long flags, + domid_t domid) +{ + return do_update_va_mapping_otherdomain(va, lo | ((u64)hi << 32), flags, domid); +} + +DEFINE_XEN_GUEST_HANDLE(mmuext_op_compat_t); + +int compat_mmuext_op(XEN_GUEST_HANDLE(mmuext_op_compat_t) cmp_uops, + unsigned int count, + XEN_GUEST_HANDLE(uint) pdone, + unsigned int foreigndom) +{ + unsigned int i, preempt_mask; + int rc = 0; + XEN_GUEST_HANDLE(mmuext_op_t) nat_ops; + + preempt_mask = count & MMU_UPDATE_PREEMPTED; + count ^= preempt_mask; + + if ( unlikely(!guest_handle_okay(cmp_uops, count)) ) + return -EFAULT; + + set_xen_guest_handle(nat_ops, (void *)COMPAT_ARG_XLAT_VIRT_START(current->vcpu_id)); + + for ( ; count; count -= i ) + { + mmuext_op_t *nat_op = nat_ops.p; + unsigned int limit; + int err; + + if ( hypercall_preempt_check() ) + { + rc = hypercall_create_continuation( + __HYPERVISOR_mmuext_op, "hihi", + cmp_uops, count | MMU_UPDATE_PREEMPTED, pdone, foreigndom); + break; + } + + limit = COMPAT_ARG_XLAT_SIZE / sizeof(*nat_op); + + for ( i = 0; i < min(limit, count); ++i ) + { + mmuext_op_compat_t cmp_op; + enum XLAT_mmuext_op_arg1 arg1; + enum XLAT_mmuext_op_arg2 arg2; + + if ( unlikely(__copy_from_guest(&cmp_op, cmp_uops, 1) != 0) ) + { + rc = -EFAULT; + break; + } + + switch ( cmp_op.cmd ) + { + case MMUEXT_PIN_L1_TABLE: + case MMUEXT_PIN_L2_TABLE: + case MMUEXT_PIN_L3_TABLE: + case MMUEXT_PIN_L4_TABLE: + case MMUEXT_UNPIN_TABLE: + case MMUEXT_NEW_BASEPTR: + arg1 = XLAT_mmuext_op_arg1_mfn; + break; + default: + arg1 = XLAT_mmuext_op_arg1_linear_addr; + break; + case MMUEXT_NEW_USER_BASEPTR: + rc = -EINVAL; + case MMUEXT_TLB_FLUSH_LOCAL: + case MMUEXT_TLB_FLUSH_MULTI: + case MMUEXT_TLB_FLUSH_ALL: + case MMUEXT_FLUSH_CACHE: + arg1 = -1; + break; + } + + if ( rc ) + break; + + switch ( cmp_op.cmd ) + { + case MMUEXT_SET_LDT: + arg2 = XLAT_mmuext_op_arg2_nr_ents; + break; + case MMUEXT_TLB_FLUSH_MULTI: + case MMUEXT_INVLPG_MULTI: + arg2 = XLAT_mmuext_op_arg2_vcpumask; + break; + default: + arg2 = -1; + break; + } + +#define XLAT_mmuext_op_HNDL_arg2_vcpumask(_d_, _s_) \ + do \ + { \ + unsigned int vcpumask; \ + if ( i < --limit ) \ + { \ + (_d_)->arg2.vcpumask.p = (void *)(nat_ops.p + limit); \ + if ( copy_from_compat(&vcpumask, (_s_)->arg2.vcpumask, 1) == 0 ) \ + *(unsigned long *)(_d_)->arg2.vcpumask.p = vcpumask; \ + else \ + rc = -EFAULT; \ + } \ + } while(0) + XLAT_mmuext_op(nat_op, &cmp_op); +#undef XLAT_mmuext_op_HNDL_arg2_vcpumask + + if ( rc || i >= limit ) + break; + + guest_handle_add_offset(cmp_uops, 1); + ++nat_op; + } + + err = do_mmuext_op(nat_ops, i | preempt_mask, pdone, foreigndom); + + if ( err ) + { + BUILD_BUG_ON(__HYPERVISOR_mmuext_op <= 0); + if ( err == __HYPERVISOR_mmuext_op ) + { + struct cpu_user_regs *regs = guest_cpu_user_regs(); + unsigned int left = regs->ecx & ~MMU_UPDATE_PREEMPTED; + + BUG_ON(!(regs->ecx & MMU_UPDATE_PREEMPTED)); + BUG_ON(left > count); + guest_handle_add_offset(nat_ops, count - left); + BUG_ON(left + i < count); + guest_handle_add_offset(cmp_uops, (signed int)(count - left - i)); + left = 1; + BUG_ON(!hypercall_xlat_continuation(&left, 0x01, nat_ops, cmp_uops)); + BUG_ON(left != regs->ecx); + regs->ecx += count - i; + } + else + BUG_ON(rc > 0); + rc = err; + } + + if ( rc ) + break; + + /* Force do_mmuext_op() to not start counting from zero again. */ + preempt_mask = MMU_UPDATE_PREEMPTED; + } + + return rc; +} + +#endif /* CONFIG_COMPAT */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/arch/x86/x86_64/compat/traps.c b/xen/arch/x86/x86_64/compat/traps.c new file mode 100644 index 0000000000..a7f24cecf8 --- /dev/null +++ b/xen/arch/x86/x86_64/compat/traps.c @@ -0,0 +1,338 @@ +#ifdef CONFIG_COMPAT + +#include <xen/event.h> +#include <compat/callback.h> +#include <compat/arch-x86_32.h> + +void compat_show_guest_stack(struct cpu_user_regs *regs, int debug_stack_lines) +{ + unsigned int i, *stack, addr; + + stack = (unsigned int *)(unsigned long)regs->_esp; + printk("Guest stack trace from esp=%08lx:\n ", (unsigned long)stack); + + for ( i = 0; i < debug_stack_lines * 8; i++ ) + { + if ( (((long)stack + 3) & (STACK_SIZE - 4)) == 0 ) + break; + if ( get_user(addr, stack) ) + { + if ( i != 0 ) + printk("\n "); + printk("Fault while accessing guest memory."); + i = 1; + break; + } + if ( (i != 0) && ((i % 8) == 0) ) + printk("\n "); + printk(" %08x", addr); + stack++; + } + if ( i == 0 ) + printk("Stack empty."); + printk("\n"); +} + +unsigned int compat_iret(void) +{ + struct cpu_user_regs *regs = guest_cpu_user_regs(); + u32 eflags; + + /* Restore EAX (clobbered by hypercall). */ + if ( unlikely(__get_user(regs->_eax, (u32 __user *)regs->rsp)) ) + goto exit_and_crash; + + /* Restore CS and EIP. */ + if ( unlikely(__get_user(regs->_eip, (u32 __user *)regs->rsp + 1)) || + unlikely(__get_user(regs->cs, (u32 __user *)regs->rsp + 2)) ) + goto exit_and_crash; + + /* + * Fix up and restore EFLAGS. We fix up in a local staging area + * to avoid firing the BUG_ON(IOPL) check in arch_get_info_guest. + */ + if ( unlikely(__get_user(eflags, (u32 __user *)regs->rsp + 3)) ) + goto exit_and_crash; + regs->_eflags = (eflags & ~X86_EFLAGS_IOPL) | X86_EFLAGS_IF; + + if ( unlikely(eflags & X86_EFLAGS_VM) ) + { + /* + * Cannot return to VM86 mode: inject a GP fault instead. Note that + * the GP fault is reported on the first VM86 mode instruction, not on + * the IRET (which is why we can simply leave the stack frame as-is + * (except for perhaps having to copy it), which in turn seems better + * than teaching create_bounce_frame() to needlessly deal with vm86 + * mode frames). + */ + const struct trap_info *ti; + u32 x, ksp = current->arch.guest_context.kernel_sp - 40; + unsigned int i; + int rc = 0; + + gdprintk(XENLOG_ERR, "VM86 mode unavailable (ksp:%08X->%08X)\n", + regs->_esp, ksp); + if ( ksp < regs->_esp ) + { + for (i = 1; i < 10; ++i) + { + rc |= __get_user(x, (u32 __user *)regs->rsp + i); + rc |= __put_user(x, (u32 __user *)(unsigned long)ksp + i); + } + } + else if ( ksp > regs->_esp ) + { + for (i = 9; i > 0; ++i) + { + rc |= __get_user(x, (u32 __user *)regs->rsp + i); + rc |= __put_user(x, (u32 __user *)(unsigned long)ksp + i); + } + } + if ( rc ) + goto exit_and_crash; + regs->_esp = ksp; + regs->ss = current->arch.guest_context.kernel_ss; + + ti = ¤t->arch.guest_context.trap_ctxt[13]; + if ( TI_GET_IF(ti) ) + eflags &= ~X86_EFLAGS_IF; + regs->_eflags = eflags & ~(X86_EFLAGS_VM|X86_EFLAGS_RF| + X86_EFLAGS_NT|X86_EFLAGS_TF); + + if ( unlikely(__put_user(0, (u32 __user *)regs->rsp)) ) + goto exit_and_crash; + regs->_eip = ti->address; + regs->cs = ti->cs; + } + else if ( unlikely(ring_0(regs)) ) + goto exit_and_crash; + else if ( !ring_1(regs) ) + { + /* Return to ring 2/3: restore ESP and SS. */ + if ( __get_user(regs->ss, (u32 __user *)regs->rsp + 5) + || __get_user(regs->_esp, (u32 __user *)regs->rsp + 4)) + goto exit_and_crash; + } + else + regs->_esp += 16; + + /* No longer in NMI context. */ + clear_bit(_VCPUF_nmi_masked, ¤t->vcpu_flags); + + /* Restore upcall mask from supplied EFLAGS.IF. */ + vcpu_info(current, evtchn_upcall_mask) = !(eflags & X86_EFLAGS_IF); + + /* + * The hypercall exit path will overwrite EAX with this return + * value. + */ + return regs->_eax; + + exit_and_crash: + gdprintk(XENLOG_ERR, "Fatal error\n"); + domain_crash(current->domain); + return 0; +} + +static long compat_register_guest_callback(struct compat_callback_register *reg) +{ + long ret = 0; + struct vcpu *v = current; + + fixup_guest_code_selector(v->domain, reg->address.cs); + + switch ( reg->type ) + { + case CALLBACKTYPE_event: + v->arch.guest_context.event_callback_cs = reg->address.cs; + v->arch.guest_context.event_callback_eip = reg->address.eip; + break; + + case CALLBACKTYPE_failsafe: + v->arch.guest_context.failsafe_callback_cs = reg->address.cs; + v->arch.guest_context.failsafe_callback_eip = reg->address.eip; + if ( reg->flags & CALLBACKF_mask_events ) + set_bit(_VGCF_failsafe_disables_events, + &v->arch.guest_context.flags); + else + clear_bit(_VGCF_failsafe_disables_events, + &v->arch.guest_context.flags); + break; + + case CALLBACKTYPE_nmi: + ret = register_guest_nmi_callback(reg->address.eip); + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static long compat_unregister_guest_callback(struct compat_callback_unregister *unreg) +{ + long ret; + + switch ( unreg->type ) + { + case CALLBACKTYPE_nmi: + ret = unregister_guest_nmi_callback(); + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + + +long compat_callback_op(int cmd, XEN_GUEST_HANDLE(void) arg) +{ + long ret; + + switch ( cmd ) + { + case CALLBACKOP_register: + { + struct compat_callback_register reg; + + ret = -EFAULT; + if ( copy_from_guest(®, arg, 1) ) + break; + + ret = compat_register_guest_callback(®); + } + break; + + case CALLBACKOP_unregister: + { + struct compat_callback_unregister unreg; + + ret = -EFAULT; + if ( copy_from_guest(&unreg, arg, 1) ) + break; + + ret = compat_unregister_guest_callback(&unreg); + } + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +long compat_set_callbacks(unsigned long event_selector, + unsigned long event_address, + unsigned long failsafe_selector, + unsigned long failsafe_address) +{ + struct compat_callback_register event = { + .type = CALLBACKTYPE_event, + .address = { + .cs = event_selector, + .eip = event_address + } + }; + struct compat_callback_register failsafe = { + .type = CALLBACKTYPE_failsafe, + .address = { + .cs = failsafe_selector, + .eip = failsafe_address + } + }; + + compat_register_guest_callback(&event); + compat_register_guest_callback(&failsafe); + + return 0; +} + +DEFINE_XEN_GUEST_HANDLE(trap_info_compat_t); + +int compat_set_trap_table(XEN_GUEST_HANDLE(trap_info_compat_t) traps) +{ + struct compat_trap_info cur; + struct trap_info *dst = current->arch.guest_context.trap_ctxt; + long rc = 0; + + /* If no table is presented then clear the entire virtual IDT. */ + if ( guest_handle_is_null(traps) ) + { + memset(dst, 0, 256 * sizeof(*dst)); + return 0; + } + + for ( ; ; ) + { + if ( hypercall_preempt_check() ) + { + rc = hypercall_create_continuation( + __HYPERVISOR_set_trap_table, "h", traps); + break; + } + + if ( copy_from_guest(&cur, traps, 1) ) + { + rc = -EFAULT; + break; + } + + if ( cur.address == 0 ) + break; + + fixup_guest_code_selector(current->domain, cur.cs); + + XLAT_trap_info(dst + cur.vector, &cur); + + guest_handle_add_offset(traps, 1); + } + + return rc; +} + +#endif /* CONFIG_COMPAT */ + +static void hypercall_page_initialise_ring1_kernel(void *hypercall_page) +{ + char *p; + int i; + + /* Fill in all the transfer points with template machine code. */ + + for ( i = 0; i < (PAGE_SIZE / 32); i++ ) + { + p = (char *)(hypercall_page + (i * 32)); + *(u8 *)(p+ 0) = 0xb8; /* mov $<i>,%eax */ + *(u32 *)(p+ 1) = i; + *(u16 *)(p+ 5) = 0x82cd; /* int $0x82 */ + *(u8 *)(p+ 7) = 0xc3; /* ret */ + } + + /* + * HYPERVISOR_iret is special because it doesn't return and expects a + * special stack frame. Guests jump at this transfer point instead of + * calling it. + */ + p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32)); + *(u8 *)(p+ 0) = 0x50; /* push %eax */ + *(u8 *)(p+ 1) = 0xb8; /* mov $__HYPERVISOR_iret,%eax */ + *(u32 *)(p+ 2) = __HYPERVISOR_iret; + *(u16 *)(p+ 6) = 0x82cd; /* int $0x82 */ +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/arch/x86/x86_64/domain.c b/xen/arch/x86/x86_64/domain.c new file mode 100644 index 0000000000..775e536f4d --- /dev/null +++ b/xen/arch/x86/x86_64/domain.c @@ -0,0 +1,68 @@ +/****************************************************************************** + * arch/x86/x86_64/domain.c + * + */ + +#include <xen/config.h> +#include <xen/types.h> +#include <xen/guest_access.h> +#include <asm/hypercall.h> +#include <compat/vcpu.h> + +int +arch_compat_vcpu_op( + int cmd, struct vcpu *v, XEN_GUEST_HANDLE(void) arg) +{ + long rc = 0; + + switch ( cmd ) + { + case VCPUOP_register_runstate_memory_area: + { + struct compat_vcpu_register_runstate_memory_area area; + struct compat_vcpu_runstate_info info; + + rc = -EFAULT; + if ( copy_from_guest(&area, arg, 1) ) + break; + + if ( area.addr.h.c != area.addr.p || + !compat_handle_okay(area.addr.h, 1) ) + break; + + rc = 0; + guest_from_compat_handle(v->runstate_guest.compat, area.addr.h); + + if ( v == current ) + { + XLAT_vcpu_runstate_info(&info, &v->runstate); + } + else + { + struct vcpu_runstate_info runstate; + + vcpu_runstate_get(v, &runstate); + XLAT_vcpu_runstate_info(&info, &v->runstate); + } + __copy_to_guest(v->runstate_guest.compat, &info, 1); + + break; + } + + default: + rc = -ENOSYS; + break; + } + + return rc; +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/arch/x86/x86_64/domctl.c b/xen/arch/x86/x86_64/domctl.c new file mode 100644 index 0000000000..4af72ff193 --- /dev/null +++ b/xen/arch/x86/x86_64/domctl.c @@ -0,0 +1,111 @@ +/****************************************************************************** + * Arch-specific compatibility domctl.c + */ + +#include <xen/config.h> +#include <compat/domctl.h> +#include <xen/guest_access.h> +#include <asm/shadow.h> + +DEFINE_XEN_GUEST_HANDLE(compat_domctl_t); +#define xen_domctl compat_domctl +#define xen_domctl_t compat_domctl_t +#define arch_do_domctl(x, h) arch_compat_domctl(x, _##h) + +static int compat_shadow_domctl(struct domain *d, + compat_domctl_shadow_op_t *csc, + XEN_GUEST_HANDLE(void) u_domctl) +{ + xen_domctl_shadow_op_t nsc; + int rc, mode; + +#define XLAT_domctl_shadow_op_HNDL_dirty_bitmap(_d_, _s_) \ + do \ + { \ + if ( (_s_)->op != XEN_DOMCTL_SHADOW_OP_CLEAN \ + && (_s_)->op != XEN_DOMCTL_SHADOW_OP_PEEK ) \ + { \ + set_xen_guest_handle((_d_)->dirty_bitmap, NULL); \ + mode = -1; \ + } \ + else if ( compat_handle_is_null((_s_)->dirty_bitmap) \ + || (((_s_)->pages - 1) \ + & (BITS_PER_LONG - COMPAT_BITS_PER_LONG)) \ + == BITS_PER_LONG - COMPAT_BITS_PER_LONG ) \ + { \ + XEN_GUEST_HANDLE(void) tmp; \ + guest_from_compat_handle(tmp, (_s_)->dirty_bitmap); \ + (_d_)->dirty_bitmap = guest_handle_cast(tmp, ulong); \ + mode = 0; \ + } \ + else if ( (_s_)->pages > COMPAT_ARG_XLAT_SIZE * 8 ) \ + { \ + printk("Cannot translate compatibility mode XEN_DOMCTL_SHADOW_OP_{CLEAN,PEEK} (0x%lX)\n", \ + (_s_)->pages); \ + return -E2BIG; \ + } \ + else \ + { \ + set_xen_guest_handle((_d_)->dirty_bitmap, \ + (void *)COMPAT_ARG_XLAT_VIRT_START(current->vcpu_id)); \ + mode = 1; \ + } \ + } while (0) + XLAT_domctl_shadow_op(&nsc, csc); +#undef XLAT_domctl_shadow_op_HNDL_dirty_bitmap + rc = shadow_domctl(d, &nsc, u_domctl); + if ( rc != __HYPERVISOR_domctl ) + { + BUG_ON(rc > 0); +#define XLAT_domctl_shadow_op_HNDL_dirty_bitmap(_d_, _s_) \ + do \ + { \ + if ( rc == 0 \ + && mode > 0 \ + && copy_to_compat((_d_)->dirty_bitmap, \ + (unsigned int *)(_s_)->dirty_bitmap.p, \ + ((_s_)->pages + COMPAT_BITS_PER_LONG - 1) / COMPAT_BITS_PER_LONG) ) \ + rc = -EFAULT; \ + } while (0) + XLAT_domctl_shadow_op(csc, &nsc); +#undef XLAT_domctl_shadow_op_HNDL_dirty_bitmap + } + return rc; +} +#define xen_domctl_shadow_op compat_domctl_shadow_op +#define xen_domctl_shadow_op_t compat_domctl_shadow_op_t +#define shadow_domctl(d, sc, u) compat_shadow_domctl(d, sc, u) + +#define xen_domctl_ioport_permission compat_domctl_ioport_permission +#define xen_domctl_ioport_permission_t compat_domctl_ioport_permission_t + +#define xen_domctl_getpageframeinfo compat_domctl_getpageframeinfo +#define xen_domctl_getpageframeinfo_t compat_domctl_getpageframeinfo_t + +#define xen_domctl_getpageframeinfo2 compat_domctl_getpageframeinfo2 +#define xen_domctl_getpageframeinfo2_t compat_domctl_getpageframeinfo2_t + +#define xen_domctl_getmemlist compat_domctl_getmemlist +#define xen_domctl_getmemlist_t compat_domctl_getmemlist_t +#define xen_pfn_t compat_pfn_t + +#define xen_domctl_hypercall_init compat_domctl_hypercall_init +#define xen_domctl_hypercall_init_t compat_domctl_hypercall_init_t + +#define COMPAT +#define _XEN_GUEST_HANDLE(t) XEN_GUEST_HANDLE(t) +#define _long int +#define copy_from_xxx_offset copy_from_compat_offset +#define copy_to_xxx_offset copy_to_compat_offset + +#include "../domctl.c" + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S index b28b644c33..24f7ea6eb5 100644 --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -324,7 +324,16 @@ domain_crash_synchronous: GET_GUEST_REGS(%rax) movq %rax,%rsp # create_bounce_frame() temporarily clobbers CS.RPL. Fix up. +#ifdef CONFIG_COMPAT + movq CPUINFO_current_vcpu(%rax),%rax + movq VCPU_domain(%rax),%rax + btl $_DOMF_compat,DOMAIN_domain_flags(%rax) + setnc %al + leal (%rax,%rax,2),%eax + orb %al,UREGS_cs(%rsp) +#else orb $3,UREGS_cs(%rsp) +#endif # printk(domain_crash_synchronous_string) leaq domain_crash_synchronous_string(%rip),%rdi xorl %eax,%eax @@ -336,8 +345,15 @@ domain_crash_synchronous: ENTRY(ret_from_intr) GET_CURRENT(%rbx) testb $3,UREGS_cs(%rsp) - jnz test_all_events - jmp restore_all_xen + jz restore_all_xen +#ifndef CONFIG_COMPAT + jmp test_all_events +#else + movq VCPU_domain(%rbx),%rax + btl $_DOMF_compat,DOMAIN_domain_flags(%rax) + jnc test_all_events + jmp compat_test_all_events +#endif ALIGN /* No special register assumptions. */ @@ -355,6 +371,11 @@ handle_exception: testb $3,UREGS_cs(%rsp) jz restore_all_xen leaq VCPU_trap_bounce(%rbx),%rdx +#ifdef CONFIG_COMPAT + movq VCPU_domain(%rbx),%rax + btl $_DOMF_compat,DOMAIN_domain_flags(%rax) + jc compat_post_handle_exception +#endif testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx) jz test_all_events call create_bounce_frame @@ -612,3 +633,7 @@ ENTRY(hypercall_args_table) .rept NR_hypercalls-(.-hypercall_args_table) .byte 0 /* do_ni_hypercall */ .endr + +#ifdef CONFIG_COMPAT +#include "compat/entry.S" +#endif diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c index e14d266e12..a482c10a06 100644 --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -28,9 +28,14 @@ #include <asm/page.h> #include <asm/flushtlb.h> #include <asm/fixmap.h> +#include <asm/hypercall.h> #include <asm/msr.h> #include <public/memory.h> +#ifdef CONFIG_COMPAT +unsigned int m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START; +#endif + struct page_info *alloc_xen_pagetable(void) { extern int early_boot; @@ -121,6 +126,47 @@ void __init paging_init(void) l2_ro_mpt++; } +#ifdef CONFIG_COMPAT + if ( !compat_disabled ) + { + /* Create user-accessible L2 directory to map the MPT for compatibility guests. */ + BUILD_BUG_ON(l4_table_offset(RDWR_MPT_VIRT_START) != + l4_table_offset(HIRO_COMPAT_MPT_VIRT_START)); + l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(HIRO_COMPAT_MPT_VIRT_START)]); + if ( (l2_pg = alloc_domheap_page(NULL)) == NULL ) + goto nomem; + compat_idle_pg_table_l2 = l2_ro_mpt = clear_page(page_to_virt(l2_pg)); + l3e_write(&l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)], + l3e_from_page(l2_pg, __PAGE_HYPERVISOR)); + l2_ro_mpt += l2_table_offset(HIRO_COMPAT_MPT_VIRT_START); + /* + * Allocate and map the compatibility mode machine-to-phys table. + */ + mpt_size = (mpt_size >> 1) + (1UL << (L2_PAGETABLE_SHIFT - 1)); + if ( mpt_size > RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START ) + mpt_size = RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START; + mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL); + if ( m2p_compat_vstart + mpt_size < MACH2PHYS_COMPAT_VIRT_END ) + m2p_compat_vstart = MACH2PHYS_COMPAT_VIRT_END - mpt_size; + for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ ) + { + if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL ) + goto nomem; + map_pages_to_xen( + RDWR_COMPAT_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT), + page_to_mfn(l1_pg), + 1UL << PAGETABLE_ORDER, + PAGE_HYPERVISOR); + memset((void *)(RDWR_COMPAT_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), + 0x55, + 1UL << L2_PAGETABLE_SHIFT); + /* NB. Cannot be GLOBAL as the pt entries get copied into per-VM space. */ + l2e_write(l2_ro_mpt, l2e_from_page(l1_pg, _PAGE_PSE|_PAGE_PRESENT)); + l2_ro_mpt++; + } + } +#endif + /* Set up linear page table mapping. */ l4e_write(&idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)], l4e_from_paddr(__pa(idle_pg_table), __PAGE_HYPERVISOR)); @@ -182,6 +228,30 @@ void subarch_init_memory(void) share_xen_page_with_privileged_guests(page, XENSHARE_readonly); } } +#ifdef CONFIG_COMPAT + if ( !compat_disabled ) + { + for ( v = RDWR_COMPAT_MPT_VIRT_START; + v != RDWR_COMPAT_MPT_VIRT_END; + v += 1 << L2_PAGETABLE_SHIFT ) + { + l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[ + l3_table_offset(v)]; + if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) + continue; + l2e = l3e_to_l2e(l3e)[l2_table_offset(v)]; + if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) + continue; + m2p_start_mfn = l2e_get_pfn(l2e); + + for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) + { + struct page_info *page = mfn_to_page(m2p_start_mfn + i); + share_xen_page_with_privileged_guests(page, XENSHARE_readonly); + } + } + } +#endif } long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) @@ -189,7 +259,8 @@ long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) struct xen_machphys_mfn_list xmml; l3_pgentry_t l3e; l2_pgentry_t l2e; - unsigned long mfn, v; + unsigned long v; + xen_pfn_t mfn; unsigned int i; long rc = 0; @@ -231,7 +302,7 @@ long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) long do_stack_switch(unsigned long ss, unsigned long esp) { - fixup_guest_stack_selector(ss); + fixup_guest_stack_selector(current->domain, ss); current->arch.guest_context.kernel_ss = ss; current->arch.guest_context.kernel_sp = esp; return 0; @@ -291,7 +362,7 @@ long do_set_segment_base(unsigned int which, unsigned long base) /* Returns TRUE if given descriptor is valid for GDT or LDT. */ -int check_descriptor(struct desc_struct *d) +int check_descriptor(const struct domain *dom, struct desc_struct *d) { u32 a = d->a, b = d->b; u16 cs; @@ -301,12 +372,16 @@ int check_descriptor(struct desc_struct *d) goto good; /* Check and fix up the DPL. */ - if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL << 13) ) - d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL << 13); + if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL(dom) << 13) ) + d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL(dom) << 13); /* All code and data segments are okay. No base/limit checking. */ if ( (b & _SEGMENT_S) ) - goto good; + { + if ( !IS_COMPAT(dom) || !(b & _SEGMENT_L) ) + goto good; + goto bad; + } /* Invalid type 0 is harmless. It is used for 2nd half of a call gate. */ if ( (b & _SEGMENT_TYPE) == 0x000 ) @@ -318,8 +393,8 @@ int check_descriptor(struct desc_struct *d) /* Validate and fix up the target code selector. */ cs = a >> 16; - fixup_guest_code_selector(cs); - if ( !guest_gate_selector_okay(cs) ) + fixup_guest_code_selector(dom, cs); + if ( !guest_gate_selector_okay(dom, cs) ) goto bad; a = d->a = (d->a & 0xffffU) | (cs << 16); @@ -333,6 +408,8 @@ int check_descriptor(struct desc_struct *d) return 0; } +#include "compat/mm.c" + /* * Local variables: * mode: C diff --git a/xen/arch/x86/x86_64/physdev.c b/xen/arch/x86/x86_64/physdev.c new file mode 100644 index 0000000000..42b8269e4d --- /dev/null +++ b/xen/arch/x86/x86_64/physdev.c @@ -0,0 +1,48 @@ +/****************************************************************************** + * physdev.c + */ + +#include <xen/config.h> +#include <xen/types.h> +#include <xen/guest_access.h> +#include <compat/xen.h> +#include <compat/event_channel.h> +#include <compat/physdev.h> +#include <asm/hypercall.h> + +#define do_physdev_op compat_physdev_op + +#define physdev_apic compat_physdev_apic +#define physdev_apic_t physdev_apic_compat_t + +#define physdev_eoi compat_physdev_eoi +#define physdev_eoi_t physdev_eoi_compat_t + +#define physdev_set_iobitmap compat_physdev_set_iobitmap +#define physdev_set_iobitmap_t physdev_set_iobitmap_compat_t + +#define physdev_set_iopl compat_physdev_set_iopl +#define physdev_set_iopl_t physdev_set_iopl_compat_t + +#define physdev_irq compat_physdev_irq +#define physdev_irq_t physdev_irq_compat_t + +#define physdev_irq_status_query compat_physdev_irq_status_query +#define physdev_irq_status_query_t physdev_irq_status_query_compat_t + +#define COMPAT +#undef guest_handle_okay +#define guest_handle_okay compat_handle_okay +typedef int ret_t; + +#include "../physdev.c" + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/arch/x86/x86_64/platform_hypercall.c b/xen/arch/x86/x86_64/platform_hypercall.c new file mode 100644 index 0000000000..2ce7c12dc1 --- /dev/null +++ b/xen/arch/x86/x86_64/platform_hypercall.c @@ -0,0 +1,29 @@ +/****************************************************************************** + * platform_hypercall.c + * + */ + +#include <xen/config.h> +#include <xen/types.h> +#include <compat/platform.h> + +DEFINE_XEN_GUEST_HANDLE(compat_platform_op_t); +#define xen_platform_op compat_platform_op +#define xen_platform_op_t compat_platform_op_t +#define do_platform_op(x) compat_platform_op(_##x) + +#define COMPAT +#define _XEN_GUEST_HANDLE(t) XEN_GUEST_HANDLE(t) +typedef int ret_t; + +#include "../platform_hypercall.c" + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/arch/x86/x86_64/sysctl.c b/xen/arch/x86/x86_64/sysctl.c new file mode 100644 index 0000000000..66e2d5d93d --- /dev/null +++ b/xen/arch/x86/x86_64/sysctl.c @@ -0,0 +1,33 @@ +/****************************************************************************** + * Arch-specific compatibility sysctl.c + */ + +#include <xen/config.h> +#include <compat/sysctl.h> + +DEFINE_XEN_GUEST_HANDLE(compat_sysctl_t); +#define xen_sysctl compat_sysctl +#define xen_sysctl_t compat_sysctl_t +#define arch_do_sysctl(x, h) arch_compat_sysctl(x, _##h) + +#define xen_sysctl_physinfo compat_sysctl_physinfo +#define xen_sysctl_physinfo_t compat_sysctl_physinfo_t + +#define xen_sysctl_ioport_emulation compat_sysctl_ioport_emulation +#define xen_sysctl_ioport_emulation_t compat_sysctl_ioport_emulation_t + +#define COMPAT +#define _XEN_GUEST_HANDLE(t) XEN_GUEST_HANDLE(t) +typedef int ret_t; + +#include "../sysctl.c" + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c index 5eb91130a5..65d9c427dc 100644 --- a/xen/arch/x86/x86_64/traps.c +++ b/xen/arch/x86/x86_64/traps.c @@ -17,6 +17,7 @@ #include <asm/msr.h> #include <asm/page.h> #include <asm/shadow.h> +#include <asm/shared.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> @@ -52,7 +53,7 @@ void show_registers(struct cpu_user_regs *regs) if ( guest_mode(regs) ) { context = "guest"; - fault_crs[2] = current->vcpu_info->arch.cr2; + fault_crs[2] = arch_get_cr2(current); } else { @@ -178,6 +179,8 @@ asmlinkage void do_double_fault(struct cpu_user_regs *regs) void toggle_guest_mode(struct vcpu *v) { + if ( IS_COMPAT(v->domain) ) + return; v->arch.flags ^= TF_kernel_mode; __asm__ __volatile__ ( "swapgs" ); update_cr3(v); @@ -232,7 +235,7 @@ unsigned long do_iret(void) clear_bit(_VCPUF_nmi_masked, ¤t->vcpu_flags); /* Restore upcall mask from supplied EFLAGS.IF. */ - current->vcpu_info->evtchn_upcall_mask = !(iret_saved.rflags & EF_IE); + vcpu_info(current, evtchn_upcall_mask) = !(iret_saved.rflags & EF_IE); /* Saved %rax gets written back to regs->rax in entry.S. */ return iret_saved.rax; @@ -244,6 +247,7 @@ unsigned long do_iret(void) } asmlinkage void syscall_enter(void); +asmlinkage void compat_hypercall(void); void __init percpu_traps_init(void) { char *stack_bottom, *stack; @@ -255,6 +259,11 @@ void __init percpu_traps_init(void) set_intr_gate(TRAP_double_fault, &double_fault); idt_table[TRAP_double_fault].a |= 1UL << 32; /* IST1 */ idt_table[TRAP_nmi].a |= 2UL << 32; /* IST2 */ + +#ifdef CONFIG_COMPAT + /* The hypercall entry vector is only accessible from ring 1. */ + _set_gate(idt_table+HYPERCALL_VECTOR, 15, 1, &compat_hypercall); +#endif } stack_bottom = (char *)get_stack_bottom(); @@ -501,12 +510,16 @@ static void hypercall_page_initialise_ring3_kernel(void *hypercall_page) *(u16 *)(p+ 9) = 0x050f; /* syscall */ } +#include "compat/traps.c" + void hypercall_page_initialise(struct domain *d, void *hypercall_page) { if ( is_hvm_domain(d) ) hvm_hypercall_page_initialise(d, hypercall_page); - else + else if ( !IS_COMPAT(d) ) hypercall_page_initialise_ring3_kernel(hypercall_page); + else + hypercall_page_initialise_ring1_kernel(hypercall_page); } /* diff --git a/xen/arch/x86/x86_64/xen.lds.S b/xen/arch/x86/x86_64/xen.lds.S index f2ab8f8aa5..9841882c94 100644 --- a/xen/arch/x86/x86_64/xen.lds.S +++ b/xen/arch/x86/x86_64/xen.lds.S @@ -17,7 +17,8 @@ PHDRS SECTIONS { . = 0xFFFF830000100000; - _text = .; /* Text and read-only data */ + _start = .; + _stext = .; /* Text and read-only data */ .text : { *(.text) *(.fixup) @@ -49,14 +50,16 @@ SECTIONS . = ALIGN(4096); /* Init code and data */ __init_begin = .; - .text.init : { *(.text.init) } :text - .data.init : { *(.data.init) } :text + _sinittext = .; + .init.text : { *(.init.text) } :text + _einittext = .; + .init.data : { *(.init.data) } :text . = ALIGN(32); __setup_start = .; - .setup.init : { *(.setup.init) } :text + .init.setup : { *(.init.setup) } :text __setup_end = .; __initcall_start = .; - .initcall.init : { *(.initcall.init) } :text + .initcall.init : { *(.initcall1.init) } :text __initcall_end = .; . = ALIGN(PAGE_SIZE); __init_end = .; @@ -78,8 +81,8 @@ SECTIONS /* Sections to be discarded */ /DISCARD/ : { - *(.text.exit) - *(.data.exit) + *(.exit.text) + *(.exit.data) *(.exitcall.exit) } diff --git a/xen/arch/x86/x86_emulate.c b/xen/arch/x86/x86_emulate.c index 4f882556c4..57650986f1 100644 --- a/xen/arch/x86/x86_emulate.c +++ b/xen/arch/x86/x86_emulate.c @@ -19,15 +19,6 @@ #endif #include <asm-x86/x86_emulate.h> -/* - * Opcode effective-address decode tables. - * Note that we only emulate instructions that have at least one memory - * operand (excluding implicit stack references). We assume that stack - * references and instruction fetches will never occur in special memory - * areas that require emulation. So, for example, 'mov <imm>,<reg>' need - * not be handled. - */ - /* Operand sizes: 8-bit operands or specified/overridden size. */ #define ByteOp (1<<0) /* 8-bit operands. */ /* Destination operand type. */ @@ -55,44 +46,54 @@ static uint8_t opcode_table[256] = { /* 0x00 - 0x07 */ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, 0, /* 0x08 - 0x0F */ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, 0, /* 0x10 - 0x17 */ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, 0, /* 0x18 - 0x1F */ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, 0, /* 0x20 - 0x27 */ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps, /* 0x28 - 0x2F */ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps, /* 0x30 - 0x37 */ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps, /* 0x38 - 0x3F */ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - 0, 0, 0, 0, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps, /* 0x40 - 0x4F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, /* 0x50 - 0x5F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, + ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, + ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, + ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, /* 0x60 - 0x6F */ 0, 0, 0, DstReg|SrcMem32|ModRM|Mov /* movsxd (x86/64) */, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x70 - 0x7F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x70 - 0x77 */ + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + /* 0x78 - 0x7F */ + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, /* 0x80 - 0x87 */ ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM, ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, @@ -101,18 +102,28 @@ static uint8_t opcode_table[256] = { /* 0x88 - 0x8F */ ByteOp|DstMem|SrcReg|ModRM|Mov, DstMem|SrcReg|ModRM|Mov, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, - 0, 0, 0, DstMem|SrcNone|ModRM|Mov, - /* 0x90 - 0x9F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, DstReg|SrcNone|ModRM, 0, DstMem|SrcNone|ModRM|Mov, + /* 0x90 - 0x97 */ + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + /* 0x98 - 0x9F */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 0xA0 - 0xA7 */ ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, 0, 0, /* 0xA8 - 0xAF */ - 0, 0, ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, + ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, 0, 0, - /* 0xB0 - 0xBF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xB0 - 0xB7 */ + ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov, + ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov, + ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov, + ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov, + /* 0xB8 - 0xBF */ + DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, + DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, /* 0xC0 - 0xC7 */ ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, 0, 0, 0, 0, ByteOp|DstMem|SrcImm|ModRM|Mov, DstMem|SrcImm|ModRM|Mov, @@ -121,17 +132,19 @@ static uint8_t opcode_table[256] = { /* 0xD0 - 0xD7 */ ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, - 0, 0, 0, 0, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, /* 0xD8 - 0xDF */ 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xE0 - 0xEF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xE0 - 0xE7 */ + 0, 0, 0, ImplicitOps, 0, 0, 0, 0, + /* 0xE8 - 0xEF */ + 0, ImplicitOps, 0, ImplicitOps, 0, 0, 0, 0, /* 0xF0 - 0xF7 */ 0, 0, 0, 0, - 0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM, + 0, ImplicitOps, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM, /* 0xF8 - 0xFF */ - 0, 0, 0, 0, - 0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM + ImplicitOps, ImplicitOps, 0, 0, + ImplicitOps, ImplicitOps, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM }; static uint8_t twobyte_table[256] = { @@ -159,8 +172,12 @@ static uint8_t twobyte_table[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70 - 0x7F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x80 - 0x8F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x80 - 0x87 */ + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + /* 0x88 - 0x8F */ + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, /* 0x90 - 0x9F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xA0 - 0xA7 */ @@ -192,11 +209,15 @@ struct operand { enum { OP_REG, OP_MEM, OP_IMM } type; unsigned int bytes; unsigned long val, orig_val; - /* OP_REG: Pointer to register field. */ - unsigned long *reg; - /* OP_MEM: Segment and offset. */ - enum x86_segment mem_seg; - unsigned long mem_off; + union { + /* OP_REG: Pointer to register field. */ + unsigned long *reg; + /* OP_MEM: Segment and offset. */ + struct { + enum x86_segment seg; + unsigned long off; + } mem; + }; }; /* EFLAGS bit definitions. */ @@ -208,6 +229,9 @@ struct operand { #define EFLG_PF (1<<2) #define EFLG_CF (1<<0) +/* Exception definitions. */ +#define EXC_DE 0 + /* * Instruction emulation: * Most instructions are emulated directly via a fragment of inline assembly @@ -370,32 +394,102 @@ do{ __asm__ __volatile__ ( \ /* Fetch next part of the instruction being emulated. */ #define insn_fetch_bytes(_size) \ -({ unsigned long _x; \ - rc = ops->insn_fetch(x86_seg_cs, _regs.eip, &_x, (_size), ctxt); \ - if ( rc != 0 ) \ - goto done; \ - _regs.eip += (_size); \ +({ unsigned long _x, _eip = _truncate_ea(_regs.eip, def_ad_bytes); \ + if ( !mode_64bit() ) _eip = (uint32_t)_eip; /* ignore upper dword */ \ + rc = ops->insn_fetch(x86_seg_cs, _eip, &_x, (_size), ctxt); \ + if ( rc ) goto done; \ + _regs.eip += (_size); /* real hardware doesn't truncate */ \ _x; \ }) #define insn_fetch_type(_type) ((_type)insn_fetch_bytes(sizeof(_type))) -#define truncate_ea(ea) \ +#define _truncate_ea(ea, byte_width) \ ({ unsigned long __ea = (ea); \ - ((ad_bytes == sizeof(unsigned long)) ? __ea : \ - (__ea & ((1UL << (ad_bytes << 3)) - 1))); \ + (((byte_width) == sizeof(unsigned long)) ? __ea : \ + (__ea & ((1UL << ((byte_width) << 3)) - 1))); \ }) +#define truncate_ea(ea) _truncate_ea((ea), ad_bytes) + +#define mode_64bit() (def_ad_bytes == 8) + +#define fail_if(p) \ +do { \ + rc = (p) ? X86EMUL_UNHANDLEABLE : 0; \ + if ( rc ) goto done; \ +} while (0) + +/* In future we will be able to generate arbitrary exceptions. */ +#define generate_exception_if(p, e) fail_if(p) + +/* Given byte has even parity (even number of 1s)? */ +static int even_parity(uint8_t v) +{ + __asm__ ( "test %%al,%%al; setp %%al" + : "=a" (v) : "0" (v) ); + return v; +} /* Update address held in a register, based on addressing mode. */ -#define register_address_increment(reg, inc) \ +#define _register_address_increment(reg, inc, byte_width) \ do { \ int _inc = (inc); /* signed type ensures sign extension to long */ \ - if ( ad_bytes == sizeof(unsigned long) ) \ + if ( (byte_width) == sizeof(unsigned long) ) \ (reg) += _inc; \ + else if ( mode_64bit() ) \ + (reg) = ((reg) + _inc) & ((1UL << ((byte_width) << 3)) - 1); \ else \ - (reg) = ((reg) & ~((1UL << (ad_bytes << 3)) - 1)) | \ - (((reg) + _inc) & ((1UL << (ad_bytes << 3)) - 1)); \ + (reg) = ((reg) & ~((1UL << ((byte_width) << 3)) - 1)) | \ + (((reg) + _inc) & ((1UL << ((byte_width) << 3)) - 1)); \ +} while (0) +#define register_address_increment(reg, inc) \ + _register_address_increment((reg), (inc), ad_bytes) + +#define jmp_rel(rel) \ +do { \ + _regs.eip += (int)(rel); \ + if ( !mode_64bit() ) \ + _regs.eip = ((op_bytes == 2) \ + ? (uint16_t)_regs.eip : (uint32_t)_regs.eip); \ } while (0) +static int +test_cc( + unsigned int condition, unsigned int flags) +{ + int rc = 0; + + switch ( (condition & 15) >> 1 ) + { + case 0: /* o */ + rc |= (flags & EFLG_OF); + break; + case 1: /* b/c/nae */ + rc |= (flags & EFLG_CF); + break; + case 2: /* z/e */ + rc |= (flags & EFLG_ZF); + break; + case 3: /* be/na */ + rc |= (flags & (EFLG_CF|EFLG_ZF)); + break; + case 4: /* s */ + rc |= (flags & EFLG_SF); + break; + case 5: /* p/pe */ + rc |= (flags & EFLG_PF); + break; + case 7: /* le/ng */ + rc |= (flags & EFLG_ZF); + /* fall through */ + case 6: /* l/nge */ + rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF)); + break; + } + + /* Odd condition identifiers (lsb == 1) have inverted sense. */ + return (!!rc ^ (condition & 1)); +} + void * decode_register( uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs) @@ -437,7 +531,7 @@ decode_register( } int -x86_emulate_memop( +x86_emulate( struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { @@ -446,34 +540,29 @@ x86_emulate_memop( uint8_t b, d, sib, sib_index, sib_base, twobyte = 0, rex_prefix = 0; uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0; - unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i; + unsigned int op_bytes, ad_bytes, def_ad_bytes; + unsigned int lock_prefix = 0, rep_prefix = 0, i; int rc = 0; struct operand src, dst; - int mode = ctxt->mode; - enum x86_segment ea_seg = x86_seg_ds; - unsigned long ea_off = 0; + /* Data operand effective address (usually computed from ModRM). */ + struct operand ea; + + /* Default is a memory operand relative to segment DS. */ + ea.type = OP_MEM; + ea.mem.seg = x86_seg_ds; + ea.mem.off = 0; - switch ( mode ) + op_bytes = ad_bytes = def_ad_bytes = ctxt->address_bytes; + if ( op_bytes == 8 ) { - case X86EMUL_MODE_REAL: - case X86EMUL_MODE_PROT16: - op_bytes = ad_bytes = 2; - break; - case X86EMUL_MODE_PROT32: - op_bytes = ad_bytes = 4; - break; -#ifdef __x86_64__ - case X86EMUL_MODE_PROT64: op_bytes = 4; - ad_bytes = 8; - break; -#endif - default: +#ifndef __x86_64__ return -1; +#endif } - /* Legacy prefixes. */ + /* Prefix bytes. */ for ( i = 0; i < 8; i++ ) { switch ( b = insn_fetch_type(uint8_t) ) @@ -482,51 +571,52 @@ x86_emulate_memop( op_bytes ^= 6; /* switch between 2/4 bytes */ break; case 0x67: /* address-size override */ - if ( mode == X86EMUL_MODE_PROT64 ) + if ( mode_64bit() ) ad_bytes ^= 12; /* switch between 4/8 bytes */ else ad_bytes ^= 6; /* switch between 2/4 bytes */ break; case 0x2e: /* CS override */ - ea_seg = x86_seg_cs; + ea.mem.seg = x86_seg_cs; break; case 0x3e: /* DS override */ - ea_seg = x86_seg_ds; + ea.mem.seg = x86_seg_ds; break; case 0x26: /* ES override */ - ea_seg = x86_seg_es; + ea.mem.seg = x86_seg_es; break; case 0x64: /* FS override */ - ea_seg = x86_seg_fs; + ea.mem.seg = x86_seg_fs; break; case 0x65: /* GS override */ - ea_seg = x86_seg_gs; + ea.mem.seg = x86_seg_gs; break; case 0x36: /* SS override */ - ea_seg = x86_seg_ss; + ea.mem.seg = x86_seg_ss; break; case 0xf0: /* LOCK */ lock_prefix = 1; break; + case 0xf2: /* REPNE/REPNZ */ case 0xf3: /* REP/REPE/REPZ */ rep_prefix = 1; break; - case 0xf2: /* REPNE/REPNZ */ - break; + case 0x40 ... 0x4f: /* REX */ + if ( !mode_64bit() ) + goto done_prefixes; + rex_prefix = b; + continue; default: goto done_prefixes; } + + /* Any legacy prefix after a REX prefix nullifies its effect. */ + rex_prefix = 0; } done_prefixes: - /* REX prefix. */ - if ( (mode == X86EMUL_MODE_PROT64) && ((b & 0xf0) == 0x40) ) - { - rex_prefix = b; - if ( b & 8 ) /* REX.W */ - op_bytes = 8; - b = insn_fetch_type(uint8_t); - } + if ( rex_prefix & 8 ) /* REX.W */ + op_bytes = 8; /* Opcode byte(s). */ d = opcode_table[b]; @@ -554,35 +644,40 @@ x86_emulate_memop( modrm_rm = modrm & 0x07; if ( modrm_mod == 3 ) - goto cannot_emulate; - - if ( ad_bytes == 2 ) + { + modrm_rm |= (rex_prefix & 1) << 3; + ea.type = OP_REG; + ea.reg = decode_register( + modrm_rm, &_regs, (d & ByteOp) && (rex_prefix == 0)); + } + else if ( ad_bytes == 2 ) { /* 16-bit ModR/M decode. */ switch ( modrm_rm ) { - case 0: ea_off = _regs.ebx + _regs.esi; break; - case 1: ea_off = _regs.ebx + _regs.edi; break; - case 2: ea_off = _regs.ebp + _regs.esi; break; - case 3: ea_off = _regs.ebp + _regs.edi; break; - case 4: ea_off = _regs.esi; break; - case 5: ea_off = _regs.edi; break; - case 6: ea_off = _regs.ebp; break; - case 7: ea_off = _regs.ebx; break; + case 0: ea.mem.off = _regs.ebx + _regs.esi; break; + case 1: ea.mem.off = _regs.ebx + _regs.edi; break; + case 2: ea.mem.off = _regs.ebp + _regs.esi; break; + case 3: ea.mem.off = _regs.ebp + _regs.edi; break; + case 4: ea.mem.off = _regs.esi; break; + case 5: ea.mem.off = _regs.edi; break; + case 6: ea.mem.off = _regs.ebp; break; + case 7: ea.mem.off = _regs.ebx; break; } switch ( modrm_mod ) { case 0: if ( modrm_rm == 6 ) - ea_off = insn_fetch_type(int16_t); + ea.mem.off = insn_fetch_type(int16_t); break; case 1: - ea_off += insn_fetch_type(int8_t); + ea.mem.off += insn_fetch_type(int8_t); break; case 2: - ea_off += insn_fetch_type(int16_t); + ea.mem.off += insn_fetch_type(int16_t); break; } + ea.mem.off = truncate_ea(ea.mem.off); } else { @@ -593,49 +688,52 @@ x86_emulate_memop( sib_index = ((sib >> 3) & 7) | ((rex_prefix << 2) & 8); sib_base = (sib & 7) | ((rex_prefix << 3) & 8); if ( sib_index != 4 ) - ea_off = *(long *)decode_register(sib_index, &_regs, 0); - ea_off <<= (sib >> 6) & 3; + ea.mem.off = *(long*)decode_register(sib_index, &_regs, 0); + ea.mem.off <<= (sib >> 6) & 3; if ( (modrm_mod == 0) && ((sib_base & 7) == 5) ) - ea_off += insn_fetch_type(int32_t); + ea.mem.off += insn_fetch_type(int32_t); + else if ( (sib_base == 4) && !twobyte && (b == 0x8f) ) + /* POP <rm> must have its EA calculated post increment. */ + ea.mem.off += _regs.esp + + ((mode_64bit() && (op_bytes == 4)) ? 8 : op_bytes); else - ea_off += *(long *)decode_register(sib_base, &_regs, 0); + ea.mem.off += *(long*)decode_register(sib_base, &_regs, 0); } else { modrm_rm |= (rex_prefix & 1) << 3; - ea_off = *(long *)decode_register(modrm_rm, &_regs, 0); + ea.mem.off = *(long *)decode_register(modrm_rm, &_regs, 0); } switch ( modrm_mod ) { case 0: if ( (modrm_rm & 7) != 5 ) break; - ea_off = insn_fetch_type(int32_t); - if ( mode != X86EMUL_MODE_PROT64 ) + ea.mem.off = insn_fetch_type(int32_t); + if ( !mode_64bit() ) break; /* Relative to RIP of next instruction. Argh! */ - ea_off += _regs.eip; + ea.mem.off += _regs.eip; if ( (d & SrcMask) == SrcImm ) - ea_off += (d & ByteOp) ? 1 : + ea.mem.off += (d & ByteOp) ? 1 : ((op_bytes == 8) ? 4 : op_bytes); else if ( (d & SrcMask) == SrcImmByte ) - ea_off += 1; + ea.mem.off += 1; else if ( ((b == 0xf6) || (b == 0xf7)) && ((modrm_reg & 7) <= 1) ) /* Special case in Grp3: test has immediate operand. */ - ea_off += (d & ByteOp) ? 1 + ea.mem.off += (d & ByteOp) ? 1 : ((op_bytes == 8) ? 4 : op_bytes); break; case 1: - ea_off += insn_fetch_type(int8_t); + ea.mem.off += insn_fetch_type(int8_t); break; case 2: - ea_off += insn_fetch_type(int32_t); + ea.mem.off += insn_fetch_type(int32_t); break; } + ea.mem.off = truncate_ea(ea.mem.off); } - - ea_off = truncate_ea(ea_off); } /* Special instructions do their own operand decoding. */ @@ -652,7 +750,7 @@ x86_emulate_memop( if ( d & ByteOp ) { src.reg = decode_register(modrm_reg, &_regs, (rex_prefix == 0)); - src.val = src.orig_val = *(uint8_t *)src.reg; + src.val = *(uint8_t *)src.reg; src.bytes = 1; } else @@ -660,28 +758,35 @@ x86_emulate_memop( src.reg = decode_register(modrm_reg, &_regs, 0); switch ( (src.bytes = op_bytes) ) { - case 2: src.val = src.orig_val = *(uint16_t *)src.reg; break; - case 4: src.val = src.orig_val = *(uint32_t *)src.reg; break; - case 8: src.val = src.orig_val = *(uint64_t *)src.reg; break; + case 2: src.val = *(uint16_t *)src.reg; break; + case 4: src.val = *(uint32_t *)src.reg; break; + case 8: src.val = *(uint64_t *)src.reg; break; } } break; case SrcMem16: - src.bytes = 2; + ea.bytes = 2; goto srcmem_common; case SrcMem32: - src.bytes = 4; + ea.bytes = 4; goto srcmem_common; case SrcMem: - src.bytes = (d & ByteOp) ? 1 : op_bytes; + ea.bytes = (d & ByteOp) ? 1 : op_bytes; srcmem_common: - src.type = OP_MEM; - src.mem_seg = ea_seg; - src.mem_off = ea_off; - if ( (rc = ops->read(src.mem_seg, src.mem_off, - &src.val, src.bytes, ctxt)) != 0 ) + src = ea; + if ( src.type == OP_REG ) + { + switch ( src.bytes ) + { + case 1: src.val = *(uint8_t *)src.reg; break; + case 2: src.val = *(uint16_t *)src.reg; break; + case 4: src.val = *(uint32_t *)src.reg; break; + case 8: src.val = *(uint64_t *)src.reg; break; + } + } + else if ( (rc = ops->read(src.mem.seg, src.mem.off, + &src.val, src.bytes, ctxt)) ) goto done; - src.orig_val = src.val; break; case SrcImm: src.type = OP_IMM; @@ -725,8 +830,7 @@ x86_emulate_memop( } break; case DstBitBase: - dst.mem_off = ea_off; - if ( (d & SrcMask) == SrcImmByte ) + if ( ((d & SrcMask) == SrcImmByte) || (ea.type == OP_REG) ) { src.val &= (op_bytes << 3) - 1; } @@ -746,28 +850,34 @@ x86_emulate_memop( { unsigned long byte_offset; byte_offset = op_bytes + (((-src.val-1) >> 3) & ~(op_bytes-1)); - dst.mem_off -= byte_offset; + ea.mem.off -= byte_offset; src.val = (byte_offset << 3) + src.val; } else { - dst.mem_off += (src.val >> 3) & ~(op_bytes - 1); + ea.mem.off += (src.val >> 3) & ~(op_bytes - 1); src.val &= (op_bytes << 3) - 1; } } /* Becomes a normal DstMem operation from here on. */ d = (d & ~DstMask) | DstMem; - goto dstmem_common; case DstMem: - dst.mem_off = ea_off; - dstmem_common: - dst.mem_seg = ea_seg; - dst.type = OP_MEM; - dst.bytes = (d & ByteOp) ? 1 : op_bytes; - if ( !(d & Mov) && /* optimisation - avoid slow emulated read */ - ((rc = ops->read(dst.mem_seg, dst.mem_off, - &dst.val, dst.bytes, ctxt)) != 0) ) - goto done; + ea.bytes = (d & ByteOp) ? 1 : op_bytes; + dst = ea; + if ( dst.type == OP_REG ) + { + switch ( dst.bytes ) + { + case 1: dst.val = *(uint8_t *)dst.reg; break; + case 2: dst.val = *(uint16_t *)dst.reg; break; + case 4: dst.val = *(uint32_t *)dst.reg; break; + case 8: dst.val = *(uint64_t *)dst.reg; break; + } + } + else if ( !(d & Mov) && /* optimisation - avoid slow emulated read */ + (rc = ops->read(dst.mem.seg, dst.mem.off, + &dst.val, dst.bytes, ctxt)) ) + goto done; break; } dst.orig_val = dst.val; @@ -777,35 +887,68 @@ x86_emulate_memop( switch ( b ) { - case 0x00 ... 0x05: add: /* add */ + case 0x04 ... 0x05: /* add imm,%%eax */ + dst.reg = (unsigned long *)&_regs.eax; + dst.val = dst.orig_val = _regs.eax; + case 0x00 ... 0x03: add: /* add */ emulate_2op_SrcV("add", src, dst, _regs.eflags); break; - case 0x08 ... 0x0d: or: /* or */ + + case 0x0c ... 0x0d: /* or imm,%%eax */ + dst.reg = (unsigned long *)&_regs.eax; + dst.val = dst.orig_val = _regs.eax; + case 0x08 ... 0x0b: or: /* or */ emulate_2op_SrcV("or", src, dst, _regs.eflags); break; - case 0x10 ... 0x15: adc: /* adc */ + + case 0x14 ... 0x15: /* adc imm,%%eax */ + dst.reg = (unsigned long *)&_regs.eax; + dst.val = dst.orig_val = _regs.eax; + case 0x10 ... 0x13: adc: /* adc */ emulate_2op_SrcV("adc", src, dst, _regs.eflags); break; - case 0x18 ... 0x1d: sbb: /* sbb */ + + case 0x1c ... 0x1d: /* sbb imm,%%eax */ + dst.reg = (unsigned long *)&_regs.eax; + dst.val = dst.orig_val = _regs.eax; + case 0x18 ... 0x1b: sbb: /* sbb */ emulate_2op_SrcV("sbb", src, dst, _regs.eflags); break; - case 0x20 ... 0x25: and: /* and */ + + case 0x24 ... 0x25: /* and imm,%%eax */ + dst.reg = (unsigned long *)&_regs.eax; + dst.val = dst.orig_val = _regs.eax; + case 0x20 ... 0x23: and: /* and */ emulate_2op_SrcV("and", src, dst, _regs.eflags); break; - case 0x28 ... 0x2d: sub: /* sub */ + + case 0x2c ... 0x2d: /* sub imm,%%eax */ + dst.reg = (unsigned long *)&_regs.eax; + dst.val = dst.orig_val = _regs.eax; + case 0x28 ... 0x2b: sub: /* sub */ emulate_2op_SrcV("sub", src, dst, _regs.eflags); break; - case 0x30 ... 0x35: xor: /* xor */ + + case 0x34 ... 0x35: /* xor imm,%%eax */ + dst.reg = (unsigned long *)&_regs.eax; + dst.val = dst.orig_val = _regs.eax; + case 0x30 ... 0x33: xor: /* xor */ emulate_2op_SrcV("xor", src, dst, _regs.eflags); break; - case 0x38 ... 0x3d: cmp: /* cmp */ + + case 0x3c ... 0x3d: /* cmp imm,%%eax */ + dst.reg = (unsigned long *)&_regs.eax; + dst.val = dst.orig_val = _regs.eax; + case 0x38 ... 0x3b: cmp: /* cmp */ emulate_2op_SrcV("cmp", src, dst, _regs.eflags); break; + case 0x63: /* movsxd */ - if ( mode != X86EMUL_MODE_PROT64 ) + if ( !mode_64bit() ) goto cannot_emulate; dst.val = (int32_t)src.val; break; + case 0x80 ... 0x83: /* Grp1 */ switch ( modrm_reg & 7 ) { @@ -819,10 +962,15 @@ x86_emulate_memop( case 7: goto cmp; } break; + + case 0xa8 ... 0xa9: /* test imm,%%eax */ + dst.reg = (unsigned long *)&_regs.eax; + dst.val = dst.orig_val = _regs.eax; case 0x84 ... 0x85: test: /* test */ emulate_2op_SrcV("test", src, dst, _regs.eflags); break; - case 0x86 ... 0x87: /* xchg */ + + case 0x86 ... 0x87: xchg: /* xchg */ /* Write back the register source. */ switch ( dst.bytes ) { @@ -835,19 +983,43 @@ x86_emulate_memop( dst.val = src.val; lock_prefix = 1; break; - case 0x88 ... 0x8b: /* mov */ + case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ + fail_if((modrm_reg & 7) != 0); + case 0x88 ... 0x8b: /* mov */ dst.val = src.val; break; + + case 0x8d: /* lea */ + dst.val = ea.mem.off; + break; + case 0x8f: /* pop (sole member of Grp1a) */ - /* 64-bit mode: POP always pops a 64-bit operand. */ - if ( mode == X86EMUL_MODE_PROT64 ) + fail_if((modrm_reg & 7) != 0); + /* 64-bit mode: POP defaults to a 64-bit operand. */ + if ( mode_64bit() && (dst.bytes == 4) ) dst.bytes = 8; if ( (rc = ops->read(x86_seg_ss, truncate_ea(_regs.esp), &dst.val, dst.bytes, ctxt)) != 0 ) goto done; register_address_increment(_regs.esp, dst.bytes); break; + + case 0xb0 ... 0xb7: /* mov imm8,r8 */ + dst.reg = decode_register( + (b & 7) | ((rex_prefix & 1) << 3), &_regs, (rex_prefix == 0)); + dst.val = src.val; + break; + + case 0xb8 ... 0xbf: /* mov imm{16,32,64},r{16,32,64} */ + if ( dst.bytes == 8 ) /* Fetch more bytes to obtain imm64 */ + src.val = ((uint32_t)src.val | + ((uint64_t)insn_fetch_type(uint32_t) << 32)); + dst.reg = decode_register( + (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0); + dst.val = src.val; + break; + case 0xc0 ... 0xc1: grp2: /* Grp2 */ switch ( modrm_reg & 7 ) { @@ -875,12 +1047,15 @@ x86_emulate_memop( break; } break; + case 0xd0 ... 0xd1: /* Grp2 */ src.val = 1; goto grp2; + case 0xd2 ... 0xd3: /* Grp2 */ src.val = _regs.ecx; goto grp2; + case 0xf6 ... 0xf7: /* Grp3 */ switch ( modrm_reg & 7 ) { @@ -906,7 +1081,10 @@ x86_emulate_memop( goto cannot_emulate; } break; - case 0xfe ... 0xff: /* Grp4/Grp5 */ + + case 0xfe: /* Grp4 */ + fail_if((modrm_reg & 7) >= 2); + case 0xff: /* Grp5 */ switch ( modrm_reg & 7 ) { case 0: /* inc */ @@ -916,11 +1094,11 @@ x86_emulate_memop( emulate_1op("dec", dst, _regs.eflags); break; case 6: /* push */ - /* 64-bit mode: PUSH always pushes a 64-bit operand. */ - if ( mode == X86EMUL_MODE_PROT64 ) + /* 64-bit mode: PUSH defaults to a 64-bit operand. */ + if ( mode_64bit() && (dst.bytes == 4) ) { dst.bytes = 8; - if ( (rc = ops->read(dst.mem_seg, dst.mem_off, + if ( (rc = ops->read(dst.mem.seg, dst.mem.off, &dst.val, 8, ctxt)) != 0 ) goto done; } @@ -930,6 +1108,8 @@ x86_emulate_memop( goto done; dst.val = dst.orig_val; /* skanky: disable writeback */ break; + case 7: + fail_if(1); default: goto cannot_emulate; } @@ -954,11 +1134,11 @@ x86_emulate_memop( case OP_MEM: if ( lock_prefix ) rc = ops->cmpxchg( - dst.mem_seg, dst.mem_off, dst.orig_val, + dst.mem.seg, dst.mem.off, dst.orig_val, dst.val, dst.bytes, ctxt); else rc = ops->write( - dst.mem_seg, dst.mem_off, dst.val, dst.bytes, ctxt); + dst.mem.seg, dst.mem.off, dst.val, dst.bytes, ctxt); if ( rc != 0 ) goto done; default: @@ -973,8 +1153,12 @@ x86_emulate_memop( return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; special_insn: + /* Default action: disable writeback. There may be no dest operand. */ + dst.orig_val = dst.val; + if ( twobyte ) goto twobyte_special_insn; + if ( rep_prefix ) { if ( _regs.ecx == 0 ) @@ -985,31 +1169,150 @@ x86_emulate_memop( _regs.ecx--; _regs.eip = ctxt->regs->eip; } + switch ( b ) { - case 0xa0 ... 0xa1: /* mov moffs,{%al,%ax,%eax,%rax} */ + case 0x27: /* daa */ { + uint8_t al = _regs.eax; + unsigned long eflags = _regs.eflags; + fail_if(mode_64bit()); + _regs.eflags &= ~(EFLG_CF|EFLG_AF); + if ( ((al & 0x0f) > 9) || (eflags & EFLG_AF) ) + { + *(uint8_t *)&_regs.eax += 6; + _regs.eflags |= EFLG_AF; + } + if ( (al > 0x99) || (eflags & EFLG_CF) ) + { + *(uint8_t *)&_regs.eax += 0x60; + _regs.eflags |= EFLG_CF; + } + _regs.eflags &= ~(EFLG_SF|EFLG_ZF|EFLG_PF); + _regs.eflags |= ((uint8_t)_regs.eax == 0) ? EFLG_ZF : 0; + _regs.eflags |= (( int8_t)_regs.eax < 0) ? EFLG_SF : 0; + _regs.eflags |= even_parity(_regs.eax) ? EFLG_PF : 0; + break; + } + + case 0x2f: /* das */ { + uint8_t al = _regs.eax; + unsigned long eflags = _regs.eflags; + fail_if(mode_64bit()); + _regs.eflags &= ~(EFLG_CF|EFLG_AF); + if ( ((al & 0x0f) > 9) || (eflags & EFLG_AF) ) + { + _regs.eflags |= EFLG_AF; + if ( (al < 6) || (eflags & EFLG_CF) ) + _regs.eflags |= EFLG_CF; + *(uint8_t *)&_regs.eax -= 6; + } + if ( (al > 0x99) || (eflags & EFLG_CF) ) + { + *(uint8_t *)&_regs.eax -= 0x60; + _regs.eflags |= EFLG_CF; + } + _regs.eflags &= ~(EFLG_SF|EFLG_ZF|EFLG_PF); + _regs.eflags |= ((uint8_t)_regs.eax == 0) ? EFLG_ZF : 0; + _regs.eflags |= (( int8_t)_regs.eax < 0) ? EFLG_SF : 0; + _regs.eflags |= even_parity(_regs.eax) ? EFLG_PF : 0; + break; + } + + case 0x37: /* aaa */ + case 0x3f: /* aas */ + fail_if(mode_64bit()); + _regs.eflags &= ~EFLG_CF; + if ( ((uint8_t)_regs.eax > 9) || (_regs.eflags & EFLG_AF) ) + { + ((uint8_t *)&_regs.eax)[0] += (b == 0x37) ? 6 : -6; + ((uint8_t *)&_regs.eax)[1] += (b == 0x37) ? 1 : -1; + _regs.eflags |= EFLG_CF | EFLG_AF; + } + ((uint8_t *)&_regs.eax)[0] &= 0x0f; + break; + + case 0x40 ... 0x4f: /* inc/dec reg */ + dst.type = OP_REG; + dst.reg = decode_register(b & 7, &_regs, 0); + dst.bytes = op_bytes; + dst.orig_val = dst.val = *dst.reg; + if ( b & 8 ) + emulate_1op("dec", dst, _regs.eflags); + else + emulate_1op("inc", dst, _regs.eflags); + break; + + case 0x50 ... 0x57: /* push reg */ + dst.type = OP_MEM; + dst.bytes = op_bytes; + if ( mode_64bit() && (dst.bytes == 4) ) + dst.bytes = 8; + dst.val = *(unsigned long *)decode_register( + (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0); + register_address_increment(_regs.esp, -dst.bytes); + dst.mem.seg = x86_seg_ss; + dst.mem.off = truncate_ea(_regs.esp); + break; + + case 0x58 ... 0x5f: /* pop reg */ + dst.type = OP_REG; + dst.reg = decode_register( + (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0); + dst.bytes = op_bytes; + if ( mode_64bit() && (dst.bytes == 4) ) + dst.bytes = 8; + if ( (rc = ops->read(x86_seg_ss, truncate_ea(_regs.esp), + &dst.val, dst.bytes, ctxt)) != 0 ) + goto done; + register_address_increment(_regs.esp, dst.bytes); + break; + + case 0x70 ... 0x7f: /* jcc (short) */ { + int rel = insn_fetch_type(int8_t); + if ( test_cc(b, _regs.eflags) ) + jmp_rel(rel); + break; + } + + case 0x90: /* nop / xchg %%r8,%%rax */ + if ( !(rex_prefix & 1) ) + break; /* nop */ + + case 0x91 ... 0x97: /* xchg reg,%%rax */ + src.type = dst.type = OP_REG; + src.bytes = dst.bytes = op_bytes; + src.reg = (unsigned long *)&_regs.eax; + src.val = *src.reg; + dst.reg = decode_register( + (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0); + dst.val = dst.orig_val = *dst.reg; + goto xchg; + + case 0xa0 ... 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */ /* Source EA is not encoded via ModRM. */ dst.type = OP_REG; dst.reg = (unsigned long *)&_regs.eax; dst.bytes = (d & ByteOp) ? 1 : op_bytes; - if ( (rc = ops->read(ea_seg, insn_fetch_bytes(ad_bytes), + if ( (rc = ops->read(ea.mem.seg, insn_fetch_bytes(ad_bytes), &dst.val, dst.bytes, ctxt)) != 0 ) goto done; break; - case 0xa2 ... 0xa3: /* mov {%al,%ax,%eax,%rax},moffs */ + + case 0xa2 ... 0xa3: /* mov {%al,%ax,%eax,%rax},mem.offs */ /* Destination EA is not encoded via ModRM. */ - dst.type = OP_MEM; - dst.mem_seg = ea_seg; - dst.mem_off = insn_fetch_bytes(ad_bytes); - dst.bytes = (d & ByteOp) ? 1 : op_bytes; - dst.val = (unsigned long)_regs.eax; + dst.type = OP_MEM; + dst.mem.seg = ea.mem.seg; + dst.mem.off = insn_fetch_bytes(ad_bytes); + dst.bytes = (d & ByteOp) ? 1 : op_bytes; + dst.val = (unsigned long)_regs.eax; break; + case 0xa4 ... 0xa5: /* movs */ dst.type = OP_MEM; dst.bytes = (d & ByteOp) ? 1 : op_bytes; - dst.mem_seg = x86_seg_es; - dst.mem_off = truncate_ea(_regs.edi); - if ( (rc = ops->read(ea_seg, truncate_ea(_regs.esi), + dst.mem.seg = x86_seg_es; + dst.mem.off = truncate_ea(_regs.edi); + if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.esi), &dst.val, dst.bytes, ctxt)) != 0 ) goto done; register_address_increment( @@ -1017,25 +1320,102 @@ x86_emulate_memop( register_address_increment( _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes); break; + case 0xaa ... 0xab: /* stos */ dst.type = OP_MEM; dst.bytes = (d & ByteOp) ? 1 : op_bytes; - dst.mem_seg = x86_seg_es; - dst.mem_off = truncate_ea(_regs.edi); + dst.mem.seg = x86_seg_es; + dst.mem.off = truncate_ea(_regs.edi); dst.val = _regs.eax; register_address_increment( _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes); break; + case 0xac ... 0xad: /* lods */ dst.type = OP_REG; dst.bytes = (d & ByteOp) ? 1 : op_bytes; dst.reg = (unsigned long *)&_regs.eax; - if ( (rc = ops->read(ea_seg, truncate_ea(_regs.esi), + if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.esi), &dst.val, dst.bytes, ctxt)) != 0 ) goto done; register_address_increment( _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes); break; + + case 0xd4: /* aam */ { + unsigned int base = insn_fetch_type(uint8_t); + uint8_t al = _regs.eax; + fail_if(mode_64bit()); + generate_exception_if(base == 0, EXC_DE); + *(uint16_t *)&_regs.eax = ((al / base) << 8) | (al % base); + _regs.eflags &= ~(EFLG_SF|EFLG_ZF|EFLG_PF); + _regs.eflags |= ((uint8_t)_regs.eax == 0) ? EFLG_ZF : 0; + _regs.eflags |= (( int8_t)_regs.eax < 0) ? EFLG_SF : 0; + _regs.eflags |= even_parity(_regs.eax) ? EFLG_PF : 0; + break; + } + + case 0xd5: /* aad */ { + unsigned int base = insn_fetch_type(uint8_t); + uint16_t ax = _regs.eax; + fail_if(mode_64bit()); + *(uint16_t *)&_regs.eax = (uint8_t)(ax + ((ax >> 8) * base)); + _regs.eflags &= ~(EFLG_SF|EFLG_ZF|EFLG_PF); + _regs.eflags |= ((uint8_t)_regs.eax == 0) ? EFLG_ZF : 0; + _regs.eflags |= (( int8_t)_regs.eax < 0) ? EFLG_SF : 0; + _regs.eflags |= even_parity(_regs.eax) ? EFLG_PF : 0; + break; + } + + case 0xd6: /* salc */ + fail_if(mode_64bit()); + *(uint8_t *)&_regs.eax = (_regs.eflags & EFLG_CF) ? 0xff : 0x00; + break; + + case 0xd7: /* xlat */ { + unsigned long al = (uint8_t)_regs.eax; + if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.ebx + al), + &al, 1, ctxt)) != 0 ) + goto done; + *(uint8_t *)&_regs.eax = al; + break; + } + + case 0xe3: /* jcxz/jecxz (short) */ { + int rel = insn_fetch_type(int8_t); + if ( (ad_bytes == 2) ? !(uint16_t)_regs.ecx : + (ad_bytes == 4) ? !(uint32_t)_regs.ecx : !_regs.ecx ) + jmp_rel(rel); + break; + } + + case 0xe9: /* jmp (short) */ + jmp_rel(insn_fetch_type(int8_t)); + break; + + case 0xeb: /* jmp (near) */ + jmp_rel(insn_fetch_bytes(mode_64bit() ? 4 : op_bytes)); + break; + + case 0xf5: /* cmc */ + _regs.eflags ^= EFLG_CF; + break; + + case 0xf8: /* clc */ + _regs.eflags &= ~EFLG_CF; + break; + + case 0xf9: /* stc */ + _regs.eflags |= EFLG_CF; + break; + + case 0xfc: /* cld */ + _regs.eflags &= ~EFLG_DF; + break; + + case 0xfd: /* std */ + _regs.eflags |= EFLG_DF; + break; } goto writeback; @@ -1044,39 +1424,9 @@ x86_emulate_memop( { case 0x40 ... 0x4f: /* cmov */ dst.val = dst.orig_val = src.val; - d &= ~Mov; /* default to no move */ - /* First, assume we're decoding an even cmov opcode (lsb == 0). */ - switch ( (b & 15) >> 1 ) - { - case 0: /* cmovo */ - d |= (_regs.eflags & EFLG_OF) ? Mov : 0; - break; - case 1: /* cmovb/cmovc/cmovnae */ - d |= (_regs.eflags & EFLG_CF) ? Mov : 0; - break; - case 2: /* cmovz/cmove */ - d |= (_regs.eflags & EFLG_ZF) ? Mov : 0; - break; - case 3: /* cmovbe/cmovna */ - d |= (_regs.eflags & (EFLG_CF|EFLG_ZF)) ? Mov : 0; - break; - case 4: /* cmovs */ - d |= (_regs.eflags & EFLG_SF) ? Mov : 0; - break; - case 5: /* cmovp/cmovpe */ - d |= (_regs.eflags & EFLG_PF) ? Mov : 0; - break; - case 7: /* cmovle/cmovng */ - d |= (_regs.eflags & EFLG_ZF) ? Mov : 0; - /* fall through */ - case 6: /* cmovl/cmovnge */ - d |= (!(_regs.eflags & EFLG_SF) != !(_regs.eflags & EFLG_OF)) ? - Mov : 0; - break; - } - /* Odd cmov opcodes (lsb == 1) have inverted sense. */ - d ^= (b & 1) ? Mov : 0; + d = (d & ~Mov) | (test_cc(b, _regs.eflags) ? Mov : 0); break; + case 0xb0 ... 0xb1: /* cmpxchg */ /* Save real source value, then compare EAX against destination. */ src.orig_val = src.val; @@ -1096,22 +1446,34 @@ x86_emulate_memop( dst.reg = (unsigned long *)&_regs.eax; } break; + case 0xa3: bt: /* bt */ emulate_2op_SrcV_nobyte("bt", src, dst, _regs.eflags); break; + case 0xb3: btr: /* btr */ emulate_2op_SrcV_nobyte("btr", src, dst, _regs.eflags); break; + case 0xab: bts: /* bts */ emulate_2op_SrcV_nobyte("bts", src, dst, _regs.eflags); break; - case 0xb6 ... 0xb7: /* movzx */ + + case 0xb6: /* movzx rm8,r{16,32,64} */ + /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */ + dst.reg = decode_register(modrm_reg, &_regs, 0); dst.bytes = op_bytes; - dst.val = (d & ByteOp) ? (uint8_t)src.val : (uint16_t)src.val; + dst.val = (uint8_t)src.val; + break; + + case 0xb7: /* movzx rm16,r{16,32,64} */ + dst.val = (uint16_t)src.val; break; + case 0xbb: btc: /* btc */ emulate_2op_SrcV_nobyte("btc", src, dst, _regs.eflags); break; + case 0xba: /* Grp8 */ switch ( modrm_reg & 3 ) { @@ -1121,10 +1483,18 @@ x86_emulate_memop( case 3: goto btc; } break; - case 0xbe ... 0xbf: /* movsx */ + + case 0xbe: /* movsx rm8,r{16,32,64} */ + /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */ + dst.reg = decode_register(modrm_reg, &_regs, 0); dst.bytes = op_bytes; - dst.val = (d & ByteOp) ? (int8_t)src.val : (int16_t)src.val; + dst.val = (int8_t)src.val; + break; + + case 0xbf: /* movsx rm16,r{16,32,64} */ + dst.val = (int16_t)src.val; break; + case 0xc0 ... 0xc1: /* xadd */ /* Write back the register source. */ switch ( dst.bytes ) @@ -1139,19 +1509,25 @@ x86_emulate_memop( goto writeback; twobyte_special_insn: - /* Disable writeback. */ - dst.orig_val = dst.val; switch ( b ) { case 0x0d: /* GrpP (prefetch) */ case 0x18: /* Grp16 (prefetch/nop) */ break; + + case 0x80 ... 0x8f: /* jcc (near) */ { + int rel = insn_fetch_bytes(mode_64bit() ? 4 : op_bytes); + if ( test_cc(b, _regs.eflags) ) + jmp_rel(rel); + break; + } + case 0xc7: /* Grp9 (cmpxchg8b) */ #if defined(__i386__) { unsigned long old_lo, old_hi; - if ( ((rc = ops->read(ea_seg, ea_off+0, &old_lo, 4, ctxt)) != 0) || - ((rc = ops->read(ea_seg, ea_off+4, &old_hi, 4, ctxt)) != 0) ) + if ( (rc = ops->read(ea.mem.seg, ea.mem.off+0, &old_lo, 4, ctxt)) || + (rc = ops->read(ea.mem.seg, ea.mem.off+4, &old_hi, 4, ctxt)) ) goto done; if ( (old_lo != _regs.eax) || (old_hi != _regs.edx) ) { @@ -1166,7 +1542,7 @@ x86_emulate_memop( } else { - if ( (rc = ops->cmpxchg8b(ea_seg, ea_off, old_lo, old_hi, + if ( (rc = ops->cmpxchg8b(ea.mem.seg, ea.mem.off, old_lo, old_hi, _regs.ebx, _regs.ecx, ctxt)) != 0 ) goto done; _regs.eflags |= EFLG_ZF; @@ -1176,7 +1552,7 @@ x86_emulate_memop( #elif defined(__x86_64__) { unsigned long old, new; - if ( (rc = ops->read(ea_seg, ea_off, &old, 8, ctxt)) != 0 ) + if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &old, 8, ctxt)) != 0 ) goto done; if ( ((uint32_t)(old>>0) != (uint32_t)_regs.eax) || ((uint32_t)(old>>32) != (uint32_t)_regs.edx) ) @@ -1188,7 +1564,8 @@ x86_emulate_memop( else { new = (_regs.ecx<<32)|(uint32_t)_regs.ebx; - if ( (rc = ops->cmpxchg(ea_seg, ea_off, old, new, 8, ctxt)) != 0 ) + if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, + new, 8, ctxt)) != 0 ) goto done; _regs.eflags |= EFLG_ZF; } @@ -1201,10 +1578,10 @@ x86_emulate_memop( cannot_emulate: #ifdef __XEN__ gdprintk(XENLOG_DEBUG, "Instr:"); - for ( ea_off = ctxt->regs->eip; ea_off < _regs.eip; ea_off++ ) + for ( ea.mem.off = ctxt->regs->eip; ea.mem.off < _regs.eip; ea.mem.off++ ) { unsigned long x; - ops->insn_fetch(x86_seg_cs, ea_off, &x, 1, ctxt); + ops->insn_fetch(x86_seg_cs, ea.mem.off, &x, 1, ctxt); printk(" %02x", (uint8_t)x); } printk("\n"); diff --git a/xen/common/Makefile b/xen/common/Makefile index 6f3a9a097e..0c9b9b2333 100644 --- a/xen/common/Makefile +++ b/xen/common/Makefile @@ -3,6 +3,7 @@ obj-y += bitmap.o obj-y += domctl.o obj-y += domain.o obj-y += elf.o +obj-$(CONFIG_COMPAT) += elf32.o obj-y += event_channel.o obj-y += grant_table.o obj-y += kernel.o @@ -34,5 +35,16 @@ obj-$(xenoprof) += xenoprof.o obj-$(CONFIG_XENCOMM) += xencomm.o +subdir-$(CONFIG_COMPAT) += compat + # Object file contains changeset and compiler information. version.o: $(BASEDIR)/include/xen/compile.h + +ifeq ($(CONFIG_COMPAT),y) +# extra dependencies +acm_ops.o: compat/acm_ops.c +grant_table.o: compat/grant_table.c +kexec.o: compat/kexec.c +schedule.o: compat/schedule.c +xenoprof.o: compat/xenoprof.c +endif diff --git a/xen/common/acm_ops.c b/xen/common/acm_ops.c index d6ed629398..f9d8a72da6 100644 --- a/xen/common/acm_ops.c +++ b/xen/common/acm_ops.c @@ -15,6 +15,7 @@ * */ +#ifndef COMPAT #include <xen/config.h> #include <xen/types.h> #include <xen/lib.h> @@ -28,6 +29,10 @@ #include <xen/guest_access.h> #include <acm/acm_hooks.h> +typedef long ret_t; + +#endif /* !COMPAT */ + #ifndef ACM_SECURITY @@ -40,6 +45,7 @@ long do_acm_op(int cmd, XEN_GUEST_HANDLE(void) arg) #else +#ifndef COMPAT int acm_authorize_acm_ops(struct domain *d) { /* currently, policy management functions are restricted to privileged domains */ @@ -47,11 +53,12 @@ int acm_authorize_acm_ops(struct domain *d) return -EPERM; return 0; } +#endif -long do_acm_op(int cmd, XEN_GUEST_HANDLE(void) arg) +ret_t do_acm_op(int cmd, XEN_GUEST_HANDLE(void) arg) { - long rc = -EFAULT; + ret_t rc = -EFAULT; if (acm_authorize_acm_ops(current->domain)) return -EPERM; @@ -219,6 +226,10 @@ long do_acm_op(int cmd, XEN_GUEST_HANDLE(void) arg) #endif +#if defined(CONFIG_COMPAT) && !defined(COMPAT) +#include "compat/acm_ops.c" +#endif + /* * Local variables: * mode: C diff --git a/xen/common/compat/Makefile b/xen/common/compat/Makefile new file mode 100644 index 0000000000..ad5ec1b174 --- /dev/null +++ b/xen/common/compat/Makefile @@ -0,0 +1,13 @@ +obj-y += domain.o +obj-y += domctl.o +obj-y += kernel.o +obj-y += memory.o +obj-y += multicall.o +obj-y += sysctl.o +obj-y += xlat.o + +# extra dependencies +domctl.o: ../domctl.c +kernel.o: ../kernel.c +multicall.o: ../multicall.c +sysctl.o: ../sysctl.c diff --git a/xen/common/compat/acm_ops.c b/xen/common/compat/acm_ops.c new file mode 100644 index 0000000000..28af1a8e6c --- /dev/null +++ b/xen/common/compat/acm_ops.c @@ -0,0 +1,47 @@ +/****************************************************************************** + * compat/acm_ops.c + */ + +#include <compat/acm.h> +#include <compat/acm_ops.h> + +#define COMPAT +#define ret_t int + +#define do_acm_op compat_acm_op + +static inline XEN_GUEST_HANDLE(void) acm_xlat_handle(COMPAT_HANDLE(void) cmp) +{ + XEN_GUEST_HANDLE(void) nat; + + guest_from_compat_handle(nat, cmp); + return nat; +} + +#define acm_setpolicy compat_acm_setpolicy +#define acm_set_policy(h, sz) acm_set_policy(acm_xlat_handle(h), sz) + +#define acm_getpolicy compat_acm_getpolicy +#define acm_get_policy(h, sz) acm_get_policy(acm_xlat_handle(h), sz) + +#define acm_dumpstats compat_acm_dumpstats +#define acm_dump_statistics(h, sz) acm_dump_statistics(acm_xlat_handle(h), sz) + +#define acm_getssid compat_acm_getssid +#define acm_get_ssid(r, h, sz) acm_get_ssid(r, acm_xlat_handle(h), sz) + +#define xen_acm_getdecision acm_getdecision +CHECK_acm_getdecision; +#undef xen_acm_getdecision + +#include "../acm_ops.c" + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/common/compat/domain.c b/xen/common/compat/domain.c new file mode 100644 index 0000000000..3f54260316 --- /dev/null +++ b/xen/common/compat/domain.c @@ -0,0 +1,91 @@ +/****************************************************************************** + * domain.c + * + */ + +#include <xen/config.h> +#include <xen/lib.h> +#include <xen/sched.h> +#include <xen/domain.h> +#include <xen/guest_access.h> +#include <xen/hypercall.h> +#include <compat/vcpu.h> + +int compat_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg) +{ + struct domain *d = current->domain; + struct vcpu *v; + long rc = 0; + + if ( (vcpuid < 0) || (vcpuid >= MAX_VIRT_CPUS) ) + return -EINVAL; + + if ( (v = d->vcpu[vcpuid]) == NULL ) + return -ENOENT; + + switch ( cmd ) + { + case VCPUOP_initialise: + { + struct compat_vcpu_guest_context *cmp_ctxt; + + if ( (cmp_ctxt = xmalloc(struct compat_vcpu_guest_context)) == NULL ) + { + rc = -ENOMEM; + break; + } + + if ( copy_from_guest(cmp_ctxt, arg, 1) ) + { + xfree(cmp_ctxt); + rc = -EFAULT; + break; + } + + LOCK_BIGLOCK(d); + rc = -EEXIST; + if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) + rc = boot_vcpu(d, vcpuid, cmp_ctxt); + UNLOCK_BIGLOCK(d); + + xfree(cmp_ctxt); + break; + } + + case VCPUOP_up: + case VCPUOP_down: + case VCPUOP_is_up: + rc = do_vcpu_op(cmd, vcpuid, arg); + break; + + case VCPUOP_get_runstate_info: + { + union { + struct vcpu_runstate_info nat; + struct compat_vcpu_runstate_info cmp; + } runstate; + + vcpu_runstate_get(v, &runstate.nat); + xlat_vcpu_runstate_info(&runstate.nat); + if ( copy_to_guest(arg, &runstate.cmp, 1) ) + rc = -EFAULT; + break; + } + + default: + rc = arch_compat_vcpu_op(cmd, v, arg); + break; + } + + return rc; +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/common/compat/domctl.c b/xen/common/compat/domctl.c new file mode 100644 index 0000000000..e04e3aae04 --- /dev/null +++ b/xen/common/compat/domctl.c @@ -0,0 +1,137 @@ +/****************************************************************************** + * compat/domctl.c + */ + +#include <xen/config.h> +#include <compat/domctl.h> +#include <xen/sched.h> +#include <xen/cpumask.h> +#include <asm/uaccess.h> + +DEFINE_XEN_GUEST_HANDLE(compat_domctl_t); +#define xen_domctl compat_domctl +#define xen_domctl_t compat_domctl_t +#define do_domctl(h) compat_domctl(_##h) +#define arch_do_domctl(x, h) arch_compat_domctl(x, _##h) + +#define xen_domain_handle_t compat_domain_handle_t + +#define xen_domctl_vcpucontext compat_domctl_vcpucontext +#define xen_domctl_vcpucontext_t compat_domctl_vcpucontext_t + +#define xen_domctl_createdomain compat_domctl_createdomain +#define xen_domctl_createdomain_t compat_domctl_createdomain_t + +#define xen_domctl_max_vcpus compat_domctl_max_vcpus +#define xen_domctl_max_vcpus_t compat_domctl_max_vcpus_t + +static void cpumask_to_compat_ctl_cpumap( + struct compat_ctl_cpumap *cmpctl_cpumap, cpumask_t *cpumask) +{ + unsigned int guest_bytes, copy_bytes, i; + /*static const*/ uint8_t zero = 0; + + if ( compat_handle_is_null(cmpctl_cpumap->bitmap) ) + return; + + guest_bytes = (cmpctl_cpumap->nr_cpus + 7) / 8; + copy_bytes = min_t(unsigned int, guest_bytes, (NR_CPUS + 7) / 8); + + copy_to_compat(cmpctl_cpumap->bitmap, + (uint8_t *)cpus_addr(*cpumask), + copy_bytes); + + for ( i = copy_bytes; i < guest_bytes; i++ ) + copy_to_compat_offset(cmpctl_cpumap->bitmap, i, &zero, 1); +} +#define cpumask_to_xenctl_cpumap cpumask_to_compat_ctl_cpumap + +void compat_ctl_cpumap_to_cpumask( + cpumask_t *cpumask, struct compat_ctl_cpumap *cmpctl_cpumap) +{ + unsigned int guest_bytes, copy_bytes; + + guest_bytes = (cmpctl_cpumap->nr_cpus + 7) / 8; + copy_bytes = min_t(unsigned int, guest_bytes, (NR_CPUS + 7) / 8); + + cpus_clear(*cpumask); + + if ( compat_handle_is_null(cmpctl_cpumap->bitmap) ) + return; + + copy_from_compat((uint8_t *)cpus_addr(*cpumask), + cmpctl_cpumap->bitmap, + copy_bytes); +} +#define xenctl_cpumap_to_cpumask compat_ctl_cpumap_to_cpumask + +#define xen_domctl_vcpuaffinity compat_domctl_vcpuaffinity +#define xen_domctl_vcpuaffinity_t compat_domctl_vcpuaffinity_t + +static int compat_sched_adjust(struct domain *d, + struct compat_domctl_scheduler_op *cop) +{ + struct xen_domctl_scheduler_op nop; + int ret; + enum XLAT_domctl_scheduler_op_u u; + + switch ( cop->sched_id ) + { + case XEN_SCHEDULER_SEDF: u = XLAT_domctl_scheduler_op_u_sedf; break; + case XEN_SCHEDULER_CREDIT: u = XLAT_domctl_scheduler_op_u_credit; break; + default: return -EINVAL; + } + XLAT_domctl_scheduler_op(&nop, cop); + ret = sched_adjust(d, &nop); + XLAT_domctl_scheduler_op(cop, &nop); + + return ret; +} +#define sched_adjust(d, op) compat_sched_adjust(d, op) +#define xen_domctl_scheduler_op compat_domctl_scheduler_op +#define xen_domctl_scheduler_op_t compat_domctl_scheduler_op_t + +#define xen_domctl_getdomaininfo compat_domctl_getdomaininfo +#define xen_domctl_getdomaininfo_t compat_domctl_getdomaininfo_t +#define getdomaininfo(d, i) compat_getdomaininfo(d, i) + +#define xen_domctl_getvcpuinfo compat_domctl_getvcpuinfo +#define xen_domctl_getvcpuinfo_t compat_domctl_getvcpuinfo_t + +#define xen_domctl_max_mem compat_domctl_max_mem +#define xen_domctl_max_mem_t compat_domctl_max_mem_t + +#define xen_domctl_setdomainhandle compat_domctl_setdomainhandle +#define xen_domctl_setdomainhandle_t compat_domctl_setdomainhandle_t + +#define xen_domctl_setdebugging compat_domctl_setdebugging +#define xen_domctl_setdebugging_t compat_domctl_setdebugging_t + +#define xen_domctl_irq_permission compat_domctl_irq_permission +#define xen_domctl_irq_permission_t compat_domctl_irq_permission_t + +#define xen_domctl_iomem_permission compat_domctl_iomem_permission +#define xen_domctl_iomem_permission_t compat_domctl_iomem_permission_t + +#define xen_domctl_settimeoffset compat_domctl_settimeoffset +#define xen_domctl_settimeoffset_t compat_domctl_settimeoffset_t + +#define COMPAT +#define _XEN_GUEST_HANDLE(t) XEN_GUEST_HANDLE(t) +#define _u_domctl u_domctl +//#undef guest_handle_cast +//#define guest_handle_cast compat_handle_cast +//#define copy_to_xxx_offset copy_to_compat_offset +typedef int ret_t; + +#include "../domctl.c" + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/common/compat/grant_table.c b/xen/common/compat/grant_table.c new file mode 100644 index 0000000000..9680a1779d --- /dev/null +++ b/xen/common/compat/grant_table.c @@ -0,0 +1,218 @@ +/****************************************************************************** + * common/compat/grant_table.c + * + */ + +#include <compat/grant_table.h> + +#define xen_grant_entry grant_entry +CHECK_grant_entry; +#undef xen_grant_entry + +#define xen_gnttab_map_grant_ref gnttab_map_grant_ref +CHECK_gnttab_map_grant_ref; +#undef xen_gnttab_map_grant_ref + +#define xen_gnttab_unmap_grant_ref gnttab_unmap_grant_ref +CHECK_gnttab_unmap_grant_ref; +#undef xen_gnttab_unmap_grant_ref + +DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_compat_t); +DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_compat_t); +DEFINE_XEN_GUEST_HANDLE(gnttab_copy_compat_t); + +#define xen_gnttab_dump_table gnttab_dump_table +CHECK_gnttab_dump_table; +#undef xen_gnttab_dump_table + +int compat_grant_table_op(unsigned int cmd, + XEN_GUEST_HANDLE(void) cmp_uop, + unsigned int count) +{ + int rc = 0; + unsigned int i; + + switch ( cmd ) + { +#define CASE(name) \ + case GNTTABOP_##name: \ + if ( unlikely(!guest_handle_okay(guest_handle_cast(cmp_uop, \ + gnttab_##name##_compat_t), \ + count)) ) \ + rc = -EFAULT; \ + break + +#ifndef CHECK_gnttab_map_grant_ref + CASE(map_grant_ref); +#endif + +#ifndef CHECK_gnttab_unmap_grant_ref + CASE(unmap_grant_ref); +#endif + +#ifndef CHECK_gnttab_setup_table + CASE(setup_table); +#endif + +#ifndef CHECK_gnttab_transfer + CASE(transfer); +#endif + +#ifndef CHECK_gnttab_copy + CASE(copy); +#endif + +#ifndef CHECK_gnttab_dump_table + CASE(dump_table); +#endif + +#undef CASE + default: + return do_grant_table_op(cmd, cmp_uop, count); + } + + if ( count > 512 ) + rc = -EINVAL; + + for ( i = 0; i < count && rc == 0; ) + { + unsigned int n; + union { + XEN_GUEST_HANDLE(void) uop; + struct gnttab_setup_table *setup; + struct gnttab_transfer *xfer; + struct gnttab_copy *copy; + } nat; + union { + struct compat_gnttab_setup_table setup; + struct compat_gnttab_transfer xfer; + struct compat_gnttab_copy copy; + } cmp; + + set_xen_guest_handle(nat.uop, (void *)COMPAT_ARG_XLAT_VIRT_START(current->vcpu_id)); + switch ( cmd ) + { + case GNTTABOP_setup_table: + if ( unlikely(count > 1) ) + rc = -EINVAL; + else if ( unlikely(__copy_from_guest(&cmp.setup, cmp_uop, 1)) ) + rc = -EFAULT; + else if ( unlikely(!compat_handle_okay(cmp.setup.frame_list, cmp.setup.nr_frames)) ) + rc = -EFAULT; + else + { + BUILD_BUG_ON((COMPAT_ARG_XLAT_SIZE - sizeof(*nat.setup)) / sizeof(*nat.setup->frame_list.p) < NR_GRANT_FRAMES); +#define XLAT_gnttab_setup_table_HNDL_frame_list(_d_, _s_) \ + set_xen_guest_handle((_d_)->frame_list, (unsigned long *)(nat.setup + 1)) + XLAT_gnttab_setup_table(nat.setup, &cmp.setup); +#undef XLAT_gnttab_setup_table_HNDL_frame_list + rc = gnttab_setup_table(guest_handle_cast(nat.uop, gnttab_setup_table_t), 1); + } + if ( rc == 0 ) + { + BUG_ON(nat.setup->nr_frames > NR_GRANT_FRAMES); +#define XLAT_gnttab_setup_table_HNDL_frame_list(_d_, _s_) \ + do \ + { \ + if ( (_s_)->status == GNTST_okay ) \ + { \ + for ( i = 0; i < (_s_)->nr_frames; ++i ) \ + { \ + unsigned int frame = (_s_)->frame_list.p[i]; \ + BUG_ON(frame != (_s_)->frame_list.p[i]); \ + (void)__copy_to_compat_offset((_d_)->frame_list, i, &frame, 1); \ + } \ + } \ + } while (0) + XLAT_gnttab_setup_table(&cmp.setup, nat.setup); +#undef XLAT_gnttab_setup_table_HNDL_frame_list + if ( unlikely(__copy_to_guest(cmp_uop, &cmp.setup, 1)) ) + rc = -EFAULT; + else + i = 1; + } + break; + + case GNTTABOP_transfer: + for ( n = 0; i < COMPAT_ARG_XLAT_SIZE / sizeof(*nat.xfer) && i < count && rc == 0; ++i, ++n ) + { + if ( unlikely(__copy_from_guest_offset(&cmp.xfer, cmp_uop, i, 1)) ) + rc = -EFAULT; + else + { + XLAT_gnttab_transfer(nat.xfer + n, &cmp.xfer); + } + } + if ( rc == 0 ) + rc = gnttab_transfer(guest_handle_cast(nat.uop, gnttab_transfer_t), n); + if ( rc == 0 ) + { + XEN_GUEST_HANDLE(gnttab_transfer_compat_t) xfer; + + xfer = guest_handle_cast(cmp_uop, gnttab_transfer_compat_t); + guest_handle_add_offset(xfer, i); + while ( n-- ) + { + guest_handle_add_offset(xfer, -1); + if ( __copy_field_to_guest(xfer, nat.xfer, status) ) + rc = -EFAULT; + } + } + break; + + case GNTTABOP_copy: + for ( n = 0; i < COMPAT_ARG_XLAT_SIZE / sizeof(*nat.copy) && i < count && rc == 0; ++i, ++n ) + { + if ( unlikely(__copy_from_guest_offset(&cmp.copy, cmp_uop, i, 1)) ) + rc = -EFAULT; + else + { + enum XLAT_gnttab_copy_source_u source_u; + enum XLAT_gnttab_copy_dest_u dest_u; + + if ( cmp.copy.flags & GNTCOPY_source_gref ) + source_u = XLAT_gnttab_copy_source_u_ref; + else + source_u = XLAT_gnttab_copy_source_u_gmfn; + if ( cmp.copy.flags & GNTCOPY_dest_gref ) + dest_u = XLAT_gnttab_copy_dest_u_ref; + else + dest_u = XLAT_gnttab_copy_dest_u_gmfn; + XLAT_gnttab_copy(nat.copy + n, &cmp.copy); + } + } + if ( rc == 0 ) + rc = gnttab_copy(guest_handle_cast(nat.uop, gnttab_copy_t), n); + if ( rc == 0 ) + { + XEN_GUEST_HANDLE(gnttab_copy_compat_t) copy; + + copy = guest_handle_cast(cmp_uop, gnttab_copy_compat_t); + guest_handle_add_offset(copy, i); + while ( n-- ) + { + guest_handle_add_offset(copy, -1); + if ( __copy_field_to_guest(copy, nat.copy, status) ) + rc = -EFAULT; + } + } + break; + + default: + domain_crash(current->domain); + break; + } + } + + return rc; +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/common/compat/kernel.c b/xen/common/compat/kernel.c new file mode 100644 index 0000000000..07b1166ffd --- /dev/null +++ b/xen/common/compat/kernel.c @@ -0,0 +1,59 @@ +/****************************************************************************** + * kernel.c + */ + +#include <xen/config.h> +#include <xen/init.h> +#include <xen/lib.h> +#include <xen/errno.h> +#include <xen/version.h> +#include <xen/sched.h> +#include <xen/shadow.h> +#include <xen/nmi.h> +#include <xen/guest_access.h> +#include <asm/current.h> +#include <compat/xen.h> +#include <compat/nmi.h> +#include <compat/version.h> + +#define xen_extraversion compat_extraversion +#define xen_extraversion_t compat_extraversion_t + +#define xen_compile_info compat_compile_info +#define xen_compile_info_t compat_compile_info_t + +CHECK_TYPE(capabilities_info); + +#define xen_platform_parameters compat_platform_parameters +#define xen_platform_parameters_t compat_platform_parameters_t +#undef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START HYPERVISOR_COMPAT_VIRT_START(current->domain) + +#define xen_changeset_info compat_changeset_info +#define xen_changeset_info_t compat_changeset_info_t + +#define xen_feature_info compat_feature_info +#define xen_feature_info_t compat_feature_info_t + +CHECK_TYPE(domain_handle); + +#define xennmi_callback compat_nmi_callback +#define xennmi_callback_t compat_nmi_callback_t + +#define DO(fn) int compat_##fn +#define COMPAT + +#include "../kernel.c" + +int compat_disabled = 0; +boolean_param("no-pv-compat", compat_disabled); + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/common/compat/kexec.c b/xen/common/compat/kexec.c new file mode 100644 index 0000000000..b167d8a4ff --- /dev/null +++ b/xen/common/compat/kexec.c @@ -0,0 +1,33 @@ +/* + * compat/kexec.c + */ + +#include <compat/kexec.h> + +#define COMPAT +#define ret_t int + +#define do_kexec_op compat_kexec_op + +#undef kexec_get +#define kexec_get(x) compat_kexec_get_##x +#define xen_kexec_range compat_kexec_range +#define xen_kexec_range_t compat_kexec_range_t + +#define kexec_load_unload compat_kexec_load_unload +#define xen_kexec_load compat_kexec_load +#define xen_kexec_load_t compat_kexec_load_t + +CHECK_kexec_exec; + +#include "../kexec.c" + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/common/compat/memory.c b/xen/common/compat/memory.c new file mode 100644 index 0000000000..1301e42f04 --- /dev/null +++ b/xen/common/compat/memory.c @@ -0,0 +1,364 @@ +#include <xen/config.h> +#include <xen/types.h> +#include <xen/hypercall.h> +#include <xen/guest_access.h> +#include <xen/sched.h> +#include <xen/event.h> +#include <asm/current.h> +#include <compat/memory.h> + +int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE(void) compat) +{ + int rc, split, op = cmd & MEMOP_CMD_MASK; + unsigned int start_extent = cmd >> MEMOP_EXTENT_SHIFT; + + do + { + unsigned int i, end_extent = 0; + union { + XEN_GUEST_HANDLE(void) hnd; + struct xen_memory_reservation *rsrv; + struct xen_memory_exchange *xchg; + struct xen_translate_gpfn_list *xlat; + } nat; + union { + struct compat_memory_reservation rsrv; + struct compat_memory_exchange xchg; + struct compat_translate_gpfn_list xlat; + } cmp; + + set_xen_guest_handle(nat.hnd, (void *)COMPAT_ARG_XLAT_VIRT_START(current->vcpu_id)); + split = 0; + switch ( op ) + { + xen_pfn_t *space; + + case XENMEM_increase_reservation: + case XENMEM_decrease_reservation: + case XENMEM_populate_physmap: + if ( copy_from_guest(&cmp.rsrv, compat, 1) ) + return start_extent; + + /* Is size too large for us to encode a continuation? */ + if ( cmp.rsrv.nr_extents > (UINT_MAX >> MEMOP_EXTENT_SHIFT) ) + return start_extent; + + if ( !compat_handle_is_null(cmp.rsrv.extent_start) && + !compat_handle_okay(cmp.rsrv.extent_start, cmp.rsrv.nr_extents) ) + return start_extent; + + end_extent = start_extent + (COMPAT_ARG_XLAT_SIZE - sizeof(*nat.rsrv)) / + sizeof(*space); + if ( end_extent > cmp.rsrv.nr_extents ) + end_extent = cmp.rsrv.nr_extents; + + space = (xen_pfn_t *)(nat.rsrv + 1); +#define XLAT_memory_reservation_HNDL_extent_start(_d_, _s_) \ + do \ + { \ + if ( !compat_handle_is_null((_s_)->extent_start) ) \ + { \ + set_xen_guest_handle((_d_)->extent_start, space - start_extent); \ + if ( op != XENMEM_increase_reservation ) \ + { \ + for ( i = start_extent; i < end_extent; ++i ) \ + { \ + compat_pfn_t pfn; \ + if ( __copy_from_compat_offset(&pfn, (_s_)->extent_start, i, 1) ) \ + { \ + end_extent = i; \ + split = -1; \ + break; \ + } \ + *space++ = pfn; \ + } \ + } \ + } \ + else \ + { \ + set_xen_guest_handle((_d_)->extent_start, NULL); \ + end_extent = cmp.rsrv.nr_extents; \ + } \ + } while (0) + XLAT_memory_reservation(nat.rsrv, &cmp.rsrv); +#undef XLAT_memory_reservation_HNDL_extent_start + + if ( end_extent < cmp.rsrv.nr_extents ) + { + nat.rsrv->nr_extents = end_extent; + ++split; + } + + break; + + case XENMEM_exchange: + { + int order_delta; + + if ( copy_from_guest(&cmp.xchg, compat, 1) ) + return -EFAULT; + + order_delta = cmp.xchg.out.extent_order - cmp.xchg.in.extent_order; + /* Various sanity checks. */ + if ( (cmp.xchg.nr_exchanged > cmp.xchg.in.nr_extents) || + (order_delta > 0 && (cmp.xchg.nr_exchanged & ((1U << order_delta) - 1))) || + /* Sizes of input and output lists do not overflow an int? */ + ((~0U >> cmp.xchg.in.extent_order) < cmp.xchg.in.nr_extents) || + ((~0U >> cmp.xchg.out.extent_order) < cmp.xchg.out.nr_extents) || + /* Sizes of input and output lists match? */ + ((cmp.xchg.in.nr_extents << cmp.xchg.in.extent_order) != + (cmp.xchg.out.nr_extents << cmp.xchg.out.extent_order)) ) + return -EINVAL; + + start_extent = cmp.xchg.nr_exchanged; + end_extent = (COMPAT_ARG_XLAT_SIZE - sizeof(*nat.xchg)) / + (((1U << __builtin_abs(order_delta)) + 1) * + sizeof(*space)); + if ( end_extent == 0 ) + { + printk("Cannot translate compatibility mode XENMEM_exchange extents (%u,%u)\n", + cmp.xchg.in.extent_order, cmp.xchg.out.extent_order); + return -E2BIG; + } + if ( order_delta > 0 ) + end_extent <<= order_delta; + end_extent += start_extent; + if ( end_extent > cmp.xchg.in.nr_extents ) + end_extent = cmp.xchg.in.nr_extents; + + space = (xen_pfn_t *)(nat.xchg + 1); + /* Code below depends upon .in preceding .out. */ + BUILD_BUG_ON(offsetof(xen_memory_exchange_t, in) > offsetof(xen_memory_exchange_t, out)); +#define XLAT_memory_reservation_HNDL_extent_start(_d_, _s_) \ + do \ + { \ + set_xen_guest_handle((_d_)->extent_start, space - start_extent); \ + for ( i = start_extent; i < end_extent; ++i ) \ + { \ + compat_pfn_t pfn; \ + if ( __copy_from_compat_offset(&pfn, (_s_)->extent_start, i, 1) ) \ + return -EFAULT; \ + *space++ = pfn; \ + } \ + if ( order_delta > 0 ) \ + { \ + start_extent >>= order_delta; \ + end_extent >>= order_delta; \ + } \ + else \ + { \ + start_extent <<= -order_delta; \ + end_extent <<= -order_delta; \ + } \ + order_delta = -order_delta; \ + } while (0) + XLAT_memory_exchange(nat.xchg, &cmp.xchg); +#undef XLAT_memory_reservation_HNDL_extent_start + + if ( end_extent < cmp.xchg.in.nr_extents ) + { + nat.xchg->in.nr_extents = end_extent; + if ( order_delta >= 0 ) + nat.xchg->out.nr_extents = end_extent >> order_delta; + else + nat.xchg->out.nr_extents = end_extent << order_delta; + ++split; + } + + break; + } + + case XENMEM_current_reservation: + case XENMEM_maximum_reservation: + { +#define xen_domid_t domid_t +#define compat_domid_t domid_compat_t + CHECK_TYPE(domid); +#undef compat_domid_t +#undef xen_domid_t + } + case XENMEM_maximum_ram_page: + nat.hnd = compat; + break; + + case XENMEM_translate_gpfn_list: + if ( copy_from_guest(&cmp.xlat, compat, 1) ) + return -EFAULT; + + /* Is size too large for us to encode a continuation? */ + if ( cmp.xlat.nr_gpfns > (UINT_MAX >> MEMOP_EXTENT_SHIFT) ) + return -EINVAL; + + if ( !compat_handle_okay(cmp.xlat.gpfn_list, cmp.xlat.nr_gpfns) || + !compat_handle_okay(cmp.xlat.mfn_list, cmp.xlat.nr_gpfns) ) + return -EFAULT; + + end_extent = start_extent + (COMPAT_ARG_XLAT_SIZE - sizeof(*nat.xlat)) / + sizeof(*space); + if ( end_extent > cmp.xlat.nr_gpfns ) + end_extent = cmp.xlat.nr_gpfns; + + space = (xen_pfn_t *)(nat.xlat + 1); + /* Code below depends upon .gpfn_list preceding .mfn_list. */ + BUILD_BUG_ON(offsetof(xen_translate_gpfn_list_t, gpfn_list) > offsetof(xen_translate_gpfn_list_t, mfn_list)); +#define XLAT_translate_gpfn_list_HNDL_gpfn_list(_d_, _s_) \ + do \ + { \ + set_xen_guest_handle((_d_)->gpfn_list, space - start_extent); \ + for ( i = start_extent; i < end_extent; ++i ) \ + { \ + compat_pfn_t pfn; \ + if ( __copy_from_compat_offset(&pfn, (_s_)->gpfn_list, i, 1) ) \ + return -EFAULT; \ + *space++ = pfn; \ + } \ + } while (0) +#define XLAT_translate_gpfn_list_HNDL_mfn_list(_d_, _s_) \ + (_d_)->mfn_list = (_d_)->gpfn_list + XLAT_translate_gpfn_list(nat.xlat, &cmp.xlat); +#undef XLAT_translate_gpfn_list_HNDL_mfn_list +#undef XLAT_translate_gpfn_list_HNDL_gpfn_list + + if ( end_extent < cmp.xlat.nr_gpfns ) + { + nat.xlat->nr_gpfns = end_extent; + ++split; + } + + break; + + default: + return compat_arch_memory_op(cmd, compat); + } + + rc = do_memory_op(cmd, nat.hnd); + if ( rc < 0 ) + return rc; + + cmd = 0; + if ( hypercall_xlat_continuation(&cmd, 0x02, nat.hnd, compat) ) + { + BUG_ON(rc != __HYPERVISOR_memory_op); + BUG_ON((cmd & MEMOP_CMD_MASK) != op); + split = -1; + } + + switch ( op ) + { + case XENMEM_increase_reservation: + case XENMEM_decrease_reservation: + case XENMEM_populate_physmap: + end_extent = split >= 0 ? rc : cmd >> MEMOP_EXTENT_SHIFT; + if ( op != XENMEM_decrease_reservation && + !guest_handle_is_null(nat.rsrv->extent_start) ) + { + for ( ; start_extent < end_extent; ++start_extent ) + { + compat_pfn_t pfn = nat.rsrv->extent_start.p[start_extent]; + + BUG_ON(pfn != nat.rsrv->extent_start.p[start_extent]); + if ( __copy_to_compat_offset(cmp.rsrv.extent_start, start_extent, &pfn, 1) ) + { + if ( split >= 0 ) + { + rc = start_extent; + split = 0; + } + else + /* + * Short of being able to cancel the continuation, + * force it to restart here; eventually we shall + * get out of this state. + */ + rc = (start_extent << MEMOP_EXTENT_SHIFT) | op; + break; + } + } + } + else + start_extent = end_extent; + break; + + case XENMEM_exchange: + { + DEFINE_XEN_GUEST_HANDLE(compat_memory_exchange_t); + int order_delta; + + BUG_ON(split >= 0 && rc); + BUG_ON(end_extent < nat.xchg->nr_exchanged); + end_extent = nat.xchg->nr_exchanged; + + order_delta = cmp.xchg.out.extent_order - cmp.xchg.in.extent_order; + if ( order_delta > 0 ) + { + start_extent >>= order_delta; + BUG_ON(end_extent & ((1U << order_delta) - 1)); + end_extent >>= order_delta; + } + else + { + start_extent <<= -order_delta; + end_extent <<= -order_delta; + } + + for ( ; start_extent < end_extent; ++start_extent ) + { + compat_pfn_t pfn = nat.xchg->out.extent_start.p[start_extent]; + + BUG_ON(pfn != nat.xchg->out.extent_start.p[start_extent]); + /* Note that we ignore errors accessing the output extent list. */ + __copy_to_compat_offset(cmp.xchg.out.extent_start, start_extent, &pfn, 1); + } + + cmp.xchg.nr_exchanged = nat.xchg->nr_exchanged; + if ( copy_field_to_guest(guest_handle_cast(compat, compat_memory_exchange_t), + &cmp.xchg, nr_exchanged) ) + { + if ( split < 0 ) + /* Cannot cancel the continuation... */ + domain_crash(current->domain); + return -EFAULT; + } + break; + } + + case XENMEM_maximum_ram_page: + case XENMEM_current_reservation: + case XENMEM_maximum_reservation: + break; + + case XENMEM_translate_gpfn_list: + if ( split < 0 ) + end_extent = cmd >> MEMOP_EXTENT_SHIFT; + else + BUG_ON(rc); + + for ( ; start_extent < end_extent; ++start_extent ) + { + compat_pfn_t pfn = nat.xlat->mfn_list.p[start_extent]; + + BUG_ON(pfn != nat.xlat->mfn_list.p[start_extent]); + if ( __copy_to_compat_offset(cmp.xlat.mfn_list, start_extent, &pfn, 1) ) + { + if ( split < 0 ) + /* Cannot cancel the continuation... */ + domain_crash(current->domain); + return -EFAULT; + } + } + break; + + default: + domain_crash(current->domain); + split = 0; + break; + } + + cmd = op | (start_extent << MEMOP_EXTENT_SHIFT); + if ( split > 0 && hypercall_preempt_check() ) + return hypercall_create_continuation( + __HYPERVISOR_memory_op, "ih", cmd, compat); + } while ( split > 0 ); + + return rc; +} diff --git a/xen/common/compat/multicall.c b/xen/common/compat/multicall.c new file mode 100644 index 0000000000..17b12d6462 --- /dev/null +++ b/xen/common/compat/multicall.c @@ -0,0 +1,31 @@ +/****************************************************************************** + * multicall.c + */ + +#include <xen/config.h> +#include <xen/types.h> +#include <xen/multicall.h> + +#define COMPAT +typedef int ret_t; +#undef do_multicall_call + +DEFINE_XEN_GUEST_HANDLE(multicall_entry_compat_t); +#define multicall_entry compat_multicall_entry +#define multicall_entry_t multicall_entry_compat_t +#define do_multicall_call compat_multicall_call +#define call compat_call +#define do_multicall(l, n) compat_multicall(_##l, n) +#define _XEN_GUEST_HANDLE(t) XEN_GUEST_HANDLE(t) + +#include "../multicall.c" + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/common/compat/schedule.c b/xen/common/compat/schedule.c new file mode 100644 index 0000000000..5795279978 --- /dev/null +++ b/xen/common/compat/schedule.c @@ -0,0 +1,51 @@ +/**************************************************************************** + * schedule.c + * + */ + +#include <compat/sched.h> + +#define COMPAT +#define ret_t int + +#define do_sched_op compat_sched_op + +#define xen_sched_shutdown sched_shutdown +CHECK_sched_shutdown; +#undef xen_sched_shutdown + +#define xen_sched_remote_shutdown sched_remote_shutdown +CHECK_sched_remote_shutdown; +#undef xen_sched_remote_shutdown + +static int compat_poll(struct compat_sched_poll *compat) +{ + struct sched_poll native; + +#define XLAT_sched_poll_HNDL_ports(_d_, _s_) \ + guest_from_compat_handle((_d_)->ports, (_s_)->ports) + XLAT_sched_poll(&native, compat); +#undef XLAT_sched_poll_HNDL_ports + + return do_poll(&native); +} + +#define do_poll compat_poll +#define sched_poll compat_sched_poll + +#include "../schedule.c" + +int compat_set_timer_op(u32 lo, s32 hi) +{ + return do_set_timer_op(((s64)hi << 32) | lo); +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/common/compat/sysctl.c b/xen/common/compat/sysctl.c new file mode 100644 index 0000000000..6072443566 --- /dev/null +++ b/xen/common/compat/sysctl.c @@ -0,0 +1,95 @@ +/****************************************************************************** + * compat/sysctl.c + */ + +#include <xen/config.h> +#include <compat/sysctl.h> +#include <xen/domain.h> +#include <xen/guest_access.h> +#include <xen/perfc.h> +#include <xen/trace.h> + +DEFINE_XEN_GUEST_HANDLE(compat_sysctl_t); +#define xen_sysctl compat_sysctl +#define xen_sysctl_t compat_sysctl_t +#define do_sysctl(h) compat_sysctl(_##h) +#define arch_do_sysctl(x, h) arch_compat_sysctl(x, _##h) + +#define xen_sysctl_readconsole compat_sysctl_readconsole +#define xen_sysctl_readconsole_t compat_sysctl_readconsole_t + +static int compat_tb_control(struct compat_sysctl_tbuf_op *cmp_tbc) +{ + struct xen_sysctl_tbuf_op nat_tbc; + int ret; + +#define XLAT_ctl_cpumap_HNDL_bitmap(_d_, _s_) \ + guest_from_compat_handle((_d_)->bitmap, (_s_)->bitmap) + XLAT_sysctl_tbuf_op(&nat_tbc, cmp_tbc); +#undef XLAT_ctl_cpumap_HNDL_bitmap + ret = tb_control(&nat_tbc); +#define XLAT_ctl_cpumap_HNDL_bitmap(_d_, _s_) ((void)0) + XLAT_sysctl_tbuf_op(cmp_tbc, &nat_tbc); +#undef XLAT_ctl_cpumap_HNDL_bitmap + return ret; +} +#define xen_sysctl_tbuf_op compat_sysctl_tbuf_op +#define xen_sysctl_tbuf_op_t compat_sysctl_tbuf_op_t +#define tb_control(p) compat_tb_control(p) + +#define xen_sysctl_sched_id compat_sysctl_sched_id +#define xen_sysctl_sched_id_t compat_sysctl_sched_id_t + +#define xen_sysctl_getdomaininfolist compat_sysctl_getdomaininfolist +#define xen_sysctl_getdomaininfolist_t compat_sysctl_getdomaininfolist_t +#define xen_domctl_getdomaininfo compat_domctl_getdomaininfo +#define xen_domctl_getdomaininfo_t compat_domctl_getdomaininfo_t +#define getdomaininfo(d, i) compat_getdomaininfo(d, i) + +#ifdef PERF_COUNTERS +static int compat_perfc_control(struct compat_sysctl_perfc_op *cmp_pc) +{ + CHECK_sysctl_perfc_desc; + CHECK_TYPE(sysctl_perfc_val); + struct xen_sysctl_perfc_op nat_pc; + int ret; + +#define XLAT_sysctl_perfc_op_HNDL_desc(_d_, _s_) \ + guest_from_compat_handle((_d_)->desc, (_s_)->desc) +#define XLAT_sysctl_perfc_op_HNDL_val(_d_, _s_) \ + guest_from_compat_handle((_d_)->val, (_s_)->val) + XLAT_sysctl_perfc_op(&nat_pc, cmp_pc); +#undef XLAT_sysctl_perfc_op_HNDL_val +#undef XLAT_sysctl_perfc_op_HNDL_desc + ret = perfc_control(&nat_pc); +#define XLAT_sysctl_perfc_op_HNDL_desc(_d_, _s_) +#define XLAT_sysctl_perfc_op_HNDL_val(_d_, _s_) + XLAT_sysctl_perfc_op(cmp_pc, &nat_pc); +#undef XLAT_sysctl_perfc_op_HNDL_val +#undef XLAT_sysctl_perfc_op_HNDL_desc + return ret; +} +#define xen_sysctl_perfc_op compat_sysctl_perfc_op +#define xen_sysctl_perfc_op_t compat_sysctl_perfc_op_t +#define perfc_control(p) compat_perfc_control(p) +#endif + +#define COMPAT +#define _XEN_GUEST_HANDLE(t) XEN_GUEST_HANDLE(t) +#define _u_sysctl u_sysctl +#undef guest_handle_cast +#define guest_handle_cast compat_handle_cast +#define copy_to_xxx_offset copy_to_compat_offset +typedef int ret_t; + +#include "../sysctl.c" + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/common/compat/xenoprof.c b/xen/common/compat/xenoprof.c new file mode 100644 index 0000000000..e1cad00d2a --- /dev/null +++ b/xen/common/compat/xenoprof.c @@ -0,0 +1,40 @@ +/* + * compat/xenoprof.c + */ + +#include <compat/xenoprof.h> + +#define COMPAT + +#define do_xenoprof_op compat_xenoprof_op + +#define xen_oprof_init xenoprof_init +CHECK_oprof_init; +#undef xen_oprof_init + +#define xenoprof_get_buffer compat_oprof_get_buffer +#define xenoprof_op_get_buffer compat_oprof_op_get_buffer + +#define xen_domid_t domid_t +#define compat_domid_t domid_compat_t +CHECK_TYPE(domid); +#undef compat_domid_t +#undef xen_domid_t + +#define xen_oprof_passive xenoprof_passive +CHECK_oprof_passive; +#undef xen_oprof_passive + +#define xenoprof_counter compat_oprof_counter + +#include "../xenoprof.c" + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/common/compat/xlat.c b/xen/common/compat/xlat.c new file mode 100644 index 0000000000..2a8dc6decf --- /dev/null +++ b/xen/common/compat/xlat.c @@ -0,0 +1,73 @@ +/****************************************************************************** + * xlat.c + */ + +#include <xen/compat.h> +#include <xen/lib.h> +#include <compat/xen.h> +#include <compat/event_channel.h> +#include <compat/vcpu.h> + +/* In-place translation functons: */ +void xlat_start_info(struct start_info *native, + enum XLAT_start_info_console console) +{ + struct compat_start_info *compat = (void *)native; + + BUILD_BUG_ON(sizeof(*native) < sizeof(*compat)); + XLAT_start_info(compat, native); +} + +void xlat_vcpu_runstate_info(struct vcpu_runstate_info *native) +{ + struct compat_vcpu_runstate_info *compat = (void *)native; + + BUILD_BUG_ON(sizeof(*native) < sizeof(*compat)); + XLAT_vcpu_runstate_info(compat, native); +} + +#define xen_dom0_vga_console_info dom0_vga_console_info +CHECK_dom0_vga_console_info; +#undef dom0_vga_console_info + +#define xen_evtchn_alloc_unbound evtchn_alloc_unbound +#define xen_evtchn_bind_interdomain evtchn_bind_interdomain +#define xen_evtchn_bind_ipi evtchn_bind_ipi +#define xen_evtchn_bind_pirq evtchn_bind_pirq +#define xen_evtchn_bind_vcpu evtchn_bind_vcpu +#define xen_evtchn_bind_virq evtchn_bind_virq +#define xen_evtchn_close evtchn_close +#define xen_evtchn_op evtchn_op +#define xen_evtchn_send evtchn_send +#define xen_evtchn_status evtchn_status +#define xen_evtchn_unmask evtchn_unmask +CHECK_evtchn_op; +#undef xen_evtchn_alloc_unbound +#undef xen_evtchn_bind_interdomain +#undef xen_evtchn_bind_ipi +#undef xen_evtchn_bind_pirq +#undef xen_evtchn_bind_vcpu +#undef xen_evtchn_bind_virq +#undef xen_evtchn_close +#undef xen_evtchn_op +#undef xen_evtchn_send +#undef xen_evtchn_status +#undef xen_evtchn_unmask + +#define xen_mmu_update mmu_update +CHECK_mmu_update; +#undef xen_mmu_update + +#define xen_vcpu_time_info vcpu_time_info +CHECK_vcpu_time_info; +#undef xen_vcpu_time_info + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/common/domain.c b/xen/common/domain.c index c61de823ac..28530c40d5 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -26,6 +26,9 @@ #include <asm/debugger.h> #include <public/sched.h> #include <public/vcpu.h> +#ifdef CONFIG_COMPAT +#include <compat/domctl.h> +#endif /* Both these structures are protected by the domlist_lock. */ DEFINE_RWLOCK(domlist_lock); @@ -90,7 +93,7 @@ struct vcpu *alloc_vcpu( v->domain = d; v->vcpu_id = vcpu_id; - v->vcpu_info = &d->shared_info->vcpu_info[vcpu_id]; + v->vcpu_info = shared_info_addr(d, vcpu_info[vcpu_id]); spin_lock_init(&v->pause_lock); v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline; @@ -451,32 +454,64 @@ void domain_unpause_by_systemcontroller(struct domain *d) * the userspace dom0 domain builder. */ int set_info_guest(struct domain *d, - xen_domctl_vcpucontext_t *vcpucontext) + xen_domctl_vcpucontext_u vcpucontext) { int rc = 0; - struct vcpu_guest_context *c = NULL; - unsigned long vcpu = vcpucontext->vcpu; + vcpu_guest_context_u c; +#ifdef CONFIG_COMPAT + CHECK_FIELD(domctl_vcpucontext, vcpu); +#endif + unsigned long vcpu = vcpucontext.nat->vcpu; struct vcpu *v; if ( (vcpu >= MAX_VIRT_CPUS) || ((v = d->vcpu[vcpu]) == NULL) ) return -EINVAL; - if ( (c = xmalloc(struct vcpu_guest_context)) == NULL ) +#ifdef CONFIG_COMPAT + BUILD_BUG_ON(sizeof(struct vcpu_guest_context) + < sizeof(struct compat_vcpu_guest_context)); +#endif + if ( (c.nat = xmalloc(struct vcpu_guest_context)) == NULL ) return -ENOMEM; domain_pause(d); - rc = -EFAULT; - if ( copy_from_guest(c, vcpucontext->ctxt, 1) == 0 ) + if ( !IS_COMPAT(v->domain) ) + { + if ( !IS_COMPAT(current->domain) + ? copy_from_guest(c.nat, vcpucontext.nat->ctxt, 1) +#ifndef CONFIG_COMPAT + : 0 ) +#else + : copy_from_guest(c.nat, + compat_handle_cast(vcpucontext.cmp->ctxt, + void), + 1) ) +#endif + rc = -EFAULT; + } +#ifdef CONFIG_COMPAT + else + { + if ( !IS_COMPAT(current->domain) + ? copy_from_guest(c.cmp, + guest_handle_cast(vcpucontext.nat->ctxt, void), + 1) + : copy_from_compat(c.cmp, vcpucontext.cmp->ctxt, 1) ) + rc = -EFAULT; + } +#endif + + if ( rc == 0 ) rc = arch_set_info_guest(v, c); domain_unpause(d); - xfree(c); + xfree(c.nat); return rc; } -int boot_vcpu(struct domain *d, int vcpuid, struct vcpu_guest_context *ctxt) +int boot_vcpu(struct domain *d, int vcpuid, vcpu_guest_context_u ctxt) { struct vcpu *v = d->vcpu[vcpuid]; diff --git a/xen/common/domctl.c b/xen/common/domctl.c index b05ecd1099..b9cd57e949 100644 --- a/xen/common/domctl.c +++ b/xen/common/domctl.c @@ -18,14 +18,22 @@ #include <xen/console.h> #include <xen/iocap.h> #include <xen/guest_access.h> +#ifdef CONFIG_COMPAT +#include <xen/compat.h> +#endif #include <asm/current.h> #include <public/domctl.h> #include <acm/acm_hooks.h> -extern long arch_do_domctl( +#ifndef COMPAT +typedef long ret_t; +#define copy_to_xxx_offset copy_to_guest_offset +#endif + +extern ret_t arch_do_domctl( struct xen_domctl *op, XEN_GUEST_HANDLE(xen_domctl_t) u_domctl); -extern void arch_getdomaininfo_ctxt( - struct vcpu *, struct vcpu_guest_context *); + +#ifndef COMPAT void cpumask_to_xenctl_cpumap( struct xenctl_cpumap *xenctl_cpumap, cpumask_t *cpumask) @@ -65,6 +73,8 @@ void xenctl_cpumap_to_cpumask( copy_bytes); } +#endif /* COMPAT */ + static inline int is_free_domid(domid_t dom) { struct domain *d; @@ -169,9 +179,9 @@ static unsigned int default_vcpu0_location(void) return cpu; } -long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) +ret_t do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) { - long ret = 0; + ret_t ret = 0; struct xen_domctl curop, *op = &curop; void *ssid = NULL; /* save security ptr between pre and post/fail hooks */ static DEFINE_SPINLOCK(domctl_lock); @@ -283,11 +293,36 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) if ( (d = domain_create(dom, domcr_flags)) == NULL ) break; + ret = 0; + switch ( (op->u.createdomain.flags >> XEN_DOMCTL_CDF_WORDSIZE_SHIFT) + & XEN_DOMCTL_CDF_WORDSIZE_MASK ) + { + case 0: + if ( !IS_COMPAT(current->domain) ) + op->u.createdomain.flags |= BITS_PER_LONG + << XEN_DOMCTL_CDF_WORDSIZE_SHIFT; +#ifdef CONFIG_COMPAT + else + { + op->u.createdomain.flags |= COMPAT_BITS_PER_LONG + << XEN_DOMCTL_CDF_WORDSIZE_SHIFT; + case COMPAT_BITS_PER_LONG: + ret = switch_compat(d); + } +#endif + break; + case BITS_PER_LONG: + break; + default: + ret = -EINVAL; + break; + } + if ( ret ) + break; + memcpy(d->handle, op->u.createdomain.handle, sizeof(xen_domain_handle_t)); - ret = 0; - op->domain = d->domain_id; if ( copy_to_guest(u_domctl, op, 1) ) ret = -EFAULT; @@ -446,7 +481,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) case XEN_DOMCTL_getvcpucontext: { - struct vcpu_guest_context *c; + vcpu_guest_context_u c; struct domain *d; struct vcpu *v; @@ -466,23 +501,48 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) goto getvcpucontext_out; +#ifdef CONFIG_COMPAT + BUILD_BUG_ON(sizeof(struct vcpu_guest_context) + < sizeof(struct compat_vcpu_guest_context)); +#endif ret = -ENOMEM; - if ( (c = xmalloc(struct vcpu_guest_context)) == NULL ) + if ( (c.nat = xmalloc(struct vcpu_guest_context)) == NULL ) goto getvcpucontext_out; if ( v != current ) vcpu_pause(v); - arch_getdomaininfo_ctxt(v,c); + arch_get_info_guest(v, c); ret = 0; if ( v != current ) vcpu_unpause(v); - if ( copy_to_guest(op->u.vcpucontext.ctxt, c, 1) ) - ret = -EFAULT; + if ( !IS_COMPAT(v->domain) ) + { +#ifndef COMPAT + if ( copy_to_guest(op->u.vcpucontext.ctxt, c.nat, 1) ) +#else + if ( copy_to_guest(compat_handle_cast(op->u.vcpucontext.ctxt, + void), + c.nat, 1) ) +#endif + ret = -EFAULT; + } +#ifdef CONFIG_COMPAT + else + { +#ifndef COMPAT + if ( copy_to_guest(guest_handle_cast(op->u.vcpucontext.ctxt, void), + c.cmp, 1) ) +#else + if ( copy_to_compat(op->u.vcpucontext.ctxt, c.cmp, 1) ) +#endif + ret = -EFAULT; + } +#endif - xfree(c); + xfree(c.nat); if ( copy_to_guest(u_domctl, op, 1) ) ret = -EFAULT; @@ -646,6 +706,16 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) } break; +#ifdef CONFIG_COMPAT + case XEN_DOMCTL_set_compat: + ret = switch_compat(find_domain_by_id(op->domain)); + break; + + case XEN_DOMCTL_set_native: + ret = switch_native(find_domain_by_id(op->domain)); + break; +#endif + default: ret = arch_do_domctl(op, u_domctl); break; diff --git a/xen/common/elf.c b/xen/common/elf.c index ab5a9ba883..6a55053e74 100644 --- a/xen/common/elf.c +++ b/xen/common/elf.c @@ -16,7 +16,7 @@ #include <public/elfnote.h> static void loadelfsymtab(struct domain_setup_info *dsi, int doload); -static inline int is_loadable_phdr(Elf_Phdr *phdr) +static inline int is_loadable_phdr(const Elf_Phdr *phdr) { return ((phdr->p_type == PT_LOAD) && ((phdr->p_flags & (PF_W|PF_X)) != 0)); @@ -26,7 +26,7 @@ static inline int is_loadable_phdr(Elf_Phdr *phdr) * Fallback for kernels containing only the legacy __xen_guest string * and no ELF notes. */ -static int is_xen_guest_section(Elf_Shdr *shdr, const char *shstrtab) +static int is_xen_guest_section(const Elf_Shdr *shdr, const char *shstrtab) { return strcmp(&shstrtab[shdr->sh_name], "__xen_guest") == 0; } @@ -102,19 +102,19 @@ static unsigned long long xen_guest_numeric(struct domain_setup_info *dsi, /* * Interface to the Xen ELF notes. */ -#define ELFNOTE_NAME(_n_) ((void*)(_n_) + sizeof(*(_n_))) +#define ELFNOTE_NAME(_n_) ((const void*)(_n_) + sizeof(*(_n_))) #define ELFNOTE_DESC(_n_) (ELFNOTE_NAME(_n_) + (((_n_)->namesz+3)&~3)) #define ELFNOTE_NEXT(_n_) (ELFNOTE_DESC(_n_) + (((_n_)->descsz+3)&~3)) -static int is_xen_elfnote_section(const char *image, Elf_Shdr *shdr) +static int is_xen_elfnote_section(const char *image, const Elf_Shdr *shdr) { - Elf_Note *note; + const Elf_Note *note; if ( shdr->sh_type != SHT_NOTE ) return 0; - for ( note = (Elf_Note *)(image + shdr->sh_offset); - note < (Elf_Note *)(image + shdr->sh_offset + shdr->sh_size); + for ( note = (const Elf_Note *)(image + shdr->sh_offset); + note < (const Elf_Note *)(image + shdr->sh_offset + shdr->sh_size); note = ELFNOTE_NEXT(note) ) { if ( !strncmp(ELFNOTE_NAME(note), "Xen", 4) ) @@ -124,15 +124,16 @@ static int is_xen_elfnote_section(const char *image, Elf_Shdr *shdr) return 0; } -static Elf_Note *xen_elfnote_lookup(struct domain_setup_info *dsi, int type) +static const Elf_Note *xen_elfnote_lookup( + struct domain_setup_info *dsi, int type) { - Elf_Note *note; + const Elf_Note *note; if ( !dsi->__elfnote_section ) return NULL; - for ( note = (Elf_Note *)dsi->__elfnote_section; - note < (Elf_Note *)dsi->__elfnote_section_end; + for ( note = (const Elf_Note *)dsi->__elfnote_section; + note < (const Elf_Note *)dsi->__elfnote_section_end; note = ELFNOTE_NEXT(note) ) { if ( strncmp(ELFNOTE_NAME(note), "Xen", 4) ) @@ -147,7 +148,7 @@ static Elf_Note *xen_elfnote_lookup(struct domain_setup_info *dsi, int type) const char *xen_elfnote_string(struct domain_setup_info *dsi, int type) { - Elf_Note *note; + const Elf_Note *note; if ( !dsi->__elfnote_section ) return xen_guest_string(dsi, type); @@ -162,7 +163,7 @@ const char *xen_elfnote_string(struct domain_setup_info *dsi, int type) unsigned long long xen_elfnote_numeric(struct domain_setup_info *dsi, int type, int *defined) { - Elf_Note *note; + const Elf_Note *note; *defined = 0; @@ -179,10 +180,10 @@ unsigned long long xen_elfnote_numeric(struct domain_setup_info *dsi, { case 4: *defined = 1; - return *(uint32_t*)ELFNOTE_DESC(note); + return *(const uint32_t*)ELFNOTE_DESC(note); case 8: *defined = 1; - return *(uint64_t*)ELFNOTE_DESC(note); + return *(const uint64_t*)ELFNOTE_DESC(note); default: printk("ERROR: unknown data size %#x for numeric type note %#x\n", note->descsz, type); @@ -192,9 +193,9 @@ unsigned long long xen_elfnote_numeric(struct domain_setup_info *dsi, int parseelfimage(struct domain_setup_info *dsi) { - Elf_Ehdr *ehdr = (Elf_Ehdr *)dsi->image_addr; - Elf_Phdr *phdr; - Elf_Shdr *shdr; + const Elf_Ehdr *ehdr = (const Elf_Ehdr *)dsi->image_addr; + const Elf_Phdr *phdr; + const Elf_Shdr *shdr; Elf_Addr kernstart = ~0, kernend = 0, vaddr, virt_entry; const char *shstrtab, *p; const char *image = (char *)dsi->image_addr; @@ -202,7 +203,7 @@ int parseelfimage(struct domain_setup_info *dsi) int h, virt_base_defined, elf_pa_off_defined, virt_entry_defined; if ( !elf_sanity_check(ehdr) ) - return -EINVAL; + return -ENOSYS; if ( (ehdr->e_phoff + (ehdr->e_phnum*ehdr->e_phentsize)) > image_len ) { @@ -222,12 +223,13 @@ int parseelfimage(struct domain_setup_info *dsi) /* Look for .notes segment containing at least one Xen note */ for ( h = 0; h < ehdr->e_shnum; h++ ) { - shdr = (Elf_Shdr *)(image + ehdr->e_shoff + (h*ehdr->e_shentsize)); + shdr = (const Elf_Shdr *)( + image + ehdr->e_shoff + (h*ehdr->e_shentsize)); if ( !is_xen_elfnote_section(image, shdr) ) continue; - dsi->__elfnote_section = (void *)image + shdr->sh_offset; + dsi->__elfnote_section = (const void *)image + shdr->sh_offset; dsi->__elfnote_section_end = - (void *)image + shdr->sh_offset + shdr->sh_size; + (const void *)image + shdr->sh_offset + shdr->sh_size; break; } @@ -240,16 +242,18 @@ int parseelfimage(struct domain_setup_info *dsi) printk("ELF image has no section-header strings table.\n"); return -EINVAL; } - shdr = (Elf_Shdr *)(image + ehdr->e_shoff + + shdr = (const Elf_Shdr *)(image + ehdr->e_shoff + (ehdr->e_shstrndx*ehdr->e_shentsize)); shstrtab = image + shdr->sh_offset; for ( h = 0; h < ehdr->e_shnum; h++ ) { - shdr = (Elf_Shdr *)(image + ehdr->e_shoff + (h*ehdr->e_shentsize)); + shdr = (const Elf_Shdr *)( + image + ehdr->e_shoff + (h*ehdr->e_shentsize)); if ( is_xen_guest_section(shdr, shstrtab) ) { - dsi->__xen_guest_string = (char *)image + shdr->sh_offset; + dsi->__xen_guest_string = + (const char *)image + shdr->sh_offset; break; } } @@ -327,8 +331,8 @@ int parseelfimage(struct domain_setup_info *dsi) * If we are using the modern ELF notes interface then the default * is 0. */ - dsi->elf_paddr_offset = - xen_elfnote_numeric(dsi, XEN_ELFNOTE_PADDR_OFFSET, &elf_pa_off_defined); + dsi->elf_paddr_offset = xen_elfnote_numeric(dsi, XEN_ELFNOTE_PADDR_OFFSET, + &elf_pa_off_defined); if ( !elf_pa_off_defined ) { if ( dsi->__elfnote_section ) @@ -346,7 +350,8 @@ int parseelfimage(struct domain_setup_info *dsi) for ( h = 0; h < ehdr->e_phnum; h++ ) { - phdr = (Elf_Phdr *)(image + ehdr->e_phoff + (h*ehdr->e_phentsize)); + phdr = (const Elf_Phdr *)( + image + ehdr->e_phoff + (h*ehdr->e_phentsize)); if ( !is_loadable_phdr(phdr) ) continue; vaddr = phdr->p_paddr - dsi->elf_paddr_offset + dsi->v_start; diff --git a/xen/common/elf32.c b/xen/common/elf32.c new file mode 100644 index 0000000000..566c143667 --- /dev/null +++ b/xen/common/elf32.c @@ -0,0 +1,19 @@ +/****************************************************************************** + * elf32.c + * + * Stub to support 32-bit ELF images on 64-bit platforms. + */ + +#include <xen/config.h> +#undef ELFSIZE +#define ELFSIZE 32 +#include <xen/types.h> +#include <xen/elf.h> + +#define xen_elfnote_string xen_elf32note_string +#define xen_elfnote_numeric xen_elf32note_numeric +#define parseelfimage parseelf32image +#define loadelfimage loadelf32image +#define elf_sanity_check elf32_sanity_check + +#include "elf.c" diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c index b0d23eb8e0..70665a7e78 100644 --- a/xen/common/event_channel.c +++ b/xen/common/event_channel.c @@ -23,6 +23,7 @@ #include <xen/event.h> #include <xen/irq.h> #include <xen/iocap.h> +#include <xen/compat.h> #include <xen/guest_access.h> #include <asm/current.h> @@ -33,7 +34,7 @@ #define bucket_from_port(d,p) \ ((d)->evtchn[(p)/EVTCHNS_PER_BUCKET]) #define port_is_valid(d,p) \ - (((p) >= 0) && ((p) < MAX_EVTCHNS) && \ + (((p) >= 0) && ((p) < MAX_EVTCHNS(d)) && \ (bucket_from_port(d,p) != NULL)) #define evtchn_from_port(d,p) \ (&(bucket_from_port(d,p))[(p)&(EVTCHNS_PER_BUCKET-1)]) @@ -82,7 +83,7 @@ static int get_free_port(struct domain *d) if ( evtchn_from_port(d, port)->state == ECS_FREE ) return port; - if ( port == MAX_EVTCHNS ) + if ( port == MAX_EVTCHNS(d) ) return -ENOSPC; chn = xmalloc_array(struct evtchn, EVTCHNS_PER_BUCKET); @@ -517,12 +518,12 @@ void evtchn_set_pending(struct vcpu *v, int port) * others may require explicit memory barriers. */ - if ( test_and_set_bit(port, s->evtchn_pending) ) + if ( test_and_set_bit(port, __shared_info_addr(d, s, evtchn_pending)) ) return; - if ( !test_bit (port, s->evtchn_mask) && - !test_and_set_bit(port / BITS_PER_LONG, - &v->vcpu_info->evtchn_pending_sel) ) + if ( !test_bit (port, __shared_info_addr(d, s, evtchn_mask)) && + !test_and_set_bit(port / BITS_PER_GUEST_LONG(d), + vcpu_info_addr(v, evtchn_pending_sel)) ) { vcpu_mark_events_pending(v); } @@ -720,10 +721,10 @@ static long evtchn_unmask(evtchn_unmask_t *unmask) * These operations must happen in strict order. Based on * include/xen/event.h:evtchn_set_pending(). */ - if ( test_and_clear_bit(port, s->evtchn_mask) && - test_bit (port, s->evtchn_pending) && - !test_and_set_bit (port / BITS_PER_LONG, - &v->vcpu_info->evtchn_pending_sel) ) + if ( test_and_clear_bit(port, __shared_info_addr(d, s, evtchn_mask)) && + test_bit (port, __shared_info_addr(d, s, evtchn_pending)) && + !test_and_set_bit (port / BITS_PER_GUEST_LONG(d), + vcpu_info_addr(v, evtchn_pending_sel)) ) { vcpu_mark_events_pending(v); } diff --git a/xen/common/gdbstub.c b/xen/common/gdbstub.c index de3ba1f40f..8c863080e0 100644 --- a/xen/common/gdbstub.c +++ b/xen/common/gdbstub.c @@ -382,7 +382,7 @@ gdbstub_detach(struct gdb_context *ctx) static int process_command(struct cpu_user_regs *regs, struct gdb_context *ctx) { - char *ptr; + const char *ptr; unsigned long addr, length; int resume = 0; diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c index 3a6bc9587f..3df8af4116 100644 --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -1048,6 +1048,10 @@ do_grant_table_op( return rc; } +#ifdef CONFIG_COMPAT +#include "compat/grant_table.c" +#endif + int grant_table_create( struct domain *d) diff --git a/xen/common/kernel.c b/xen/common/kernel.c index 0f14494c20..016270b2c5 100644 --- a/xen/common/kernel.c +++ b/xen/common/kernel.c @@ -11,16 +11,23 @@ #include <xen/version.h> #include <xen/sched.h> #include <xen/shadow.h> +#include <xen/nmi.h> #include <xen/guest_access.h> #include <asm/current.h> #include <public/nmi.h> #include <public/version.h> +#ifdef CONFIG_X86 +#include <asm/shared.h> +#endif + +#ifndef COMPAT int tainted; void cmdline_parse(char *cmdline) { - char opt[100], *optval, *p = cmdline, *q; + char opt[100], *optval, *q; + const char *p = cmdline; struct kernel_param *param; if ( p == NULL ) @@ -70,13 +77,13 @@ void cmdline_parse(char *cmdline) break; case OPT_UINT: *(unsigned int *)param->var = - simple_strtol(optval, (char **)&optval, 0); + simple_strtol(optval, (const char **)&optval, 0); break; case OPT_BOOL: *(int *)param->var = 1; break; case OPT_CUSTOM: - ((void (*)(char *))param->var)(optval); + ((void (*)(const char *))param->var)(optval); break; } } @@ -115,11 +122,15 @@ void add_taint(unsigned flag) tainted |= flag; } +# define DO(fn) long do_##fn + +#endif + /* * Simple hypercalls. */ -long do_xen_version(int cmd, XEN_GUEST_HANDLE(void) arg) +DO(xen_version)(int cmd, XEN_GUEST_HANDLE(void) arg) { switch ( cmd ) { @@ -229,6 +240,8 @@ long do_xen_version(int cmd, XEN_GUEST_HANDLE(void) arg) return -ENOSYS; } +#ifndef COMPAT + long register_guest_nmi_callback(unsigned long address) { struct vcpu *v = current; @@ -243,7 +256,7 @@ long register_guest_nmi_callback(unsigned long address) * If no handler was registered we can 'lose the NMI edge'. Re-assert it * now. */ - if ( d->shared_info->arch.nmi_reason != 0 ) + if ( arch_get_nmi_reason(d) != 0 ) set_bit(_VCPUF_nmi_pending, &v->vcpu_flags); #endif @@ -259,7 +272,9 @@ long unregister_guest_nmi_callback(void) return 0; } -long do_nmi_op(unsigned int cmd, XEN_GUEST_HANDLE(void) arg) +#endif + +DO(nmi_op)(unsigned int cmd, XEN_GUEST_HANDLE(void) arg) { struct xennmi_callback cb; long rc = 0; @@ -283,12 +298,12 @@ long do_nmi_op(unsigned int cmd, XEN_GUEST_HANDLE(void) arg) return rc; } -long do_vm_assist(unsigned int cmd, unsigned int type) +DO(vm_assist)(unsigned int cmd, unsigned int type) { return vm_assist(current->domain, cmd, type); } -long do_ni_hypercall(void) +DO(ni_hypercall)(void) { /* No-op hypercall. */ return -ENOSYS; diff --git a/xen/common/kexec.c b/xen/common/kexec.c index 4fb9c6e706..294e57b353 100644 --- a/xen/common/kexec.c +++ b/xen/common/kexec.c @@ -22,22 +22,34 @@ #include <xen/version.h> #include <public/elfnote.h> -DEFINE_PER_CPU (crash_note_t, crash_notes); -cpumask_t crash_saved_cpus; +#ifndef COMPAT -xen_kexec_image_t kexec_image[KEXEC_IMAGE_NR]; +typedef long ret_t; + +#define ELFNOTE_ALIGN(_n_) (((_n_)+3)&~3) +#define ELFNOTE_NAME(_n_) ((void*)(_n_) + sizeof(*(_n_))) +#define ELFNOTE_DESC(_n_) (ELFNOTE_NAME(_n_) + ELFNOTE_ALIGN((_n_)->namesz)) +#define ELFNOTE_NEXT(_n_) (ELFNOTE_DESC(_n_) + ELFNOTE_ALIGN((_n_)->descsz)) + +static DEFINE_PER_CPU(void *, crash_notes); + +static Elf_Note *xen_crash_note; + +static cpumask_t crash_saved_cpus; + +static xen_kexec_image_t kexec_image[KEXEC_IMAGE_NR]; #define KEXEC_FLAG_DEFAULT_POS (KEXEC_IMAGE_NR + 0) #define KEXEC_FLAG_CRASH_POS (KEXEC_IMAGE_NR + 1) #define KEXEC_FLAG_IN_PROGRESS (KEXEC_IMAGE_NR + 2) -unsigned long kexec_flags = 0; /* the lowest bits are for KEXEC_IMAGE... */ +static unsigned long kexec_flags = 0; /* the lowest bits are for KEXEC_IMAGE... */ -spinlock_t kexec_lock = SPIN_LOCK_UNLOCKED; +static spinlock_t kexec_lock = SPIN_LOCK_UNLOCKED; xen_kexec_reserve_t kexec_crash_area; -static void __init parse_crashkernel(char *str) +static void __init parse_crashkernel(const char *str) { unsigned long start, size; @@ -66,40 +78,29 @@ static void one_cpu_only(void) void kexec_crash_save_cpu(void) { int cpu = smp_processor_id(); - crash_note_t *cntp; + Elf_Note *note = per_cpu(crash_notes, cpu); + ELF_Prstatus *prstatus; + crash_xen_core_t *xencore; if ( cpu_test_and_set(cpu, crash_saved_cpus) ) return; - cntp = &per_cpu(crash_notes, cpu); - elf_core_save_regs(&cntp->core.desc.desc.pr_reg, - &cntp->xen_regs.desc.desc); + prstatus = ELFNOTE_DESC(note); - /* Set up crash "CORE" note. */ - setup_crash_note(cntp, core, CORE_STR, CORE_STR_LEN, NT_PRSTATUS); + note = ELFNOTE_NEXT(note); + xencore = ELFNOTE_DESC(note); - /* Set up crash note "Xen", XEN_ELFNOTE_CRASH_REGS. */ - setup_crash_note(cntp, xen_regs, XEN_STR, XEN_STR_LEN, - XEN_ELFNOTE_CRASH_REGS); + elf_core_save_regs(&prstatus->pr_reg, xencore); } /* Set up the single Xen-specific-info crash note. */ crash_xen_info_t *kexec_crash_save_info(void) { int cpu = smp_processor_id(); - crash_note_t *cntp; - crash_xen_info_t *info; + crash_xen_info_t *info = ELFNOTE_DESC(xen_crash_note); BUG_ON(!cpu_test_and_set(cpu, crash_saved_cpus)); - cntp = &per_cpu(crash_notes, cpu); - - /* Set up crash note "Xen", XEN_ELFNOTE_CRASH_INFO. */ - setup_crash_note(cntp, xen_info, XEN_STR, XEN_STR_LEN, - XEN_ELFNOTE_CRASH_INFO); - - info = &cntp->xen_info.desc.desc; - info->xen_major_version = xen_major_version(); info->xen_minor_version = xen_minor_version(); info->xen_extra_version = __pa(xen_extra_version()); @@ -143,33 +144,76 @@ static __init int register_crashdump_trigger(void) } __initcall(register_crashdump_trigger); -static int kexec_get_reserve(xen_kexec_range_t *range) +static void setup_note(Elf_Note *n, const char *name, int type, int descsz) +{ + strcpy(ELFNOTE_NAME(n), name); + n->namesz = strlen(name); + n->descsz = descsz; + n->type = type; +} + +#define kexec_get(x) kexec_get_##x + +#endif + +static int kexec_get(reserve)(xen_kexec_range_t *range) { range->start = kexec_crash_area.start; range->size = kexec_crash_area.size; return 0; } -extern unsigned long _text; - -static int kexec_get_xen(xen_kexec_range_t *range) +static int kexec_get(xen)(xen_kexec_range_t *range) { - range->start = virt_to_maddr(&_text); - range->size = (unsigned long)&_end - (unsigned long)&_text; + range->start = virt_to_maddr(_start); + range->size = (unsigned long)_end - (unsigned long)_start; return 0; } -static int kexec_get_cpu(xen_kexec_range_t *range) +static int kexec_get(cpu)(xen_kexec_range_t *range) { - if ( range->nr < 0 || range->nr >= num_present_cpus() ) + int nr = range->nr; + int nr_bytes = sizeof(Elf_Note) * 2 + + ELFNOTE_ALIGN(sizeof(ELF_Prstatus)) + + ELFNOTE_ALIGN(sizeof(crash_xen_core_t)); + + if ( nr < 0 || nr >= num_present_cpus() ) return -EINVAL; - range->start = __pa((unsigned long)&per_cpu(crash_notes, range->nr)); - range->size = sizeof(crash_note_t); + /* The Xen info note is included in CPU0's range. */ + if ( nr == 0 ) + nr_bytes += sizeof(Elf_Note) + ELFNOTE_ALIGN(sizeof(crash_xen_info_t)); + + if ( per_cpu(crash_notes, nr) == NULL ) + { + Elf_Note *note; + + note = per_cpu(crash_notes, nr) = xmalloc_bytes(nr_bytes); + + if ( note == NULL ) + return -ENOMEM; + + /* Setup CORE note. */ + setup_note(note, "CORE", NT_PRSTATUS, sizeof(ELF_Prstatus)); + + /* Setup Xen CORE note. */ + note = ELFNOTE_NEXT(note); + setup_note(note, "Xen", XEN_ELFNOTE_CRASH_REGS, sizeof(crash_xen_core_t)); + + if (nr == 0) + { + /* Setup system wide Xen info note. */ + xen_crash_note = note = ELFNOTE_NEXT(note); + setup_note(note, "Xen", XEN_ELFNOTE_CRASH_INFO, sizeof(crash_xen_info_t)); + } + } + + range->start = __pa((unsigned long)per_cpu(crash_notes, nr)); + range->size = nr_bytes; return 0; } -static int kexec_get_range(XEN_GUEST_HANDLE(void) uarg) +static int kexec_get(range)(XEN_GUEST_HANDLE(void) uarg) { xen_kexec_range_t range; int ret = -EINVAL; @@ -180,13 +224,13 @@ static int kexec_get_range(XEN_GUEST_HANDLE(void) uarg) switch ( range.range ) { case KEXEC_RANGE_MA_CRASH: - ret = kexec_get_reserve(&range); + ret = kexec_get(reserve)(&range); break; case KEXEC_RANGE_MA_XEN: - ret = kexec_get_xen(&range); + ret = kexec_get(xen)(&range); break; case KEXEC_RANGE_MA_CPU: - ret = kexec_get_cpu(&range); + ret = kexec_get(cpu)(&range); break; } @@ -196,6 +240,8 @@ static int kexec_get_range(XEN_GUEST_HANDLE(void) uarg) return ret; } +#ifndef COMPAT + static int kexec_load_get_bits(int type, int *base, int *bit) { switch ( type ) @@ -214,6 +260,8 @@ static int kexec_load_get_bits(int type, int *base, int *bit) return 0; } +#endif + static int kexec_load_unload(unsigned long op, XEN_GUEST_HANDLE(void) uarg) { xen_kexec_load_t load; @@ -236,7 +284,11 @@ static int kexec_load_unload(unsigned long op, XEN_GUEST_HANDLE(void) uarg) BUG_ON(test_bit((base + !pos), &kexec_flags)); /* must be free */ +#ifndef COMPAT memcpy(image, &load.image, sizeof(*image)); +#else + XLAT_kexec_image(image, &load.image); +#endif if ( !(ret = machine_kexec_load(load.type, base + !pos, image)) ) { @@ -261,6 +313,8 @@ static int kexec_load_unload(unsigned long op, XEN_GUEST_HANDLE(void) uarg) return ret; } +#ifndef COMPAT + static int kexec_exec(XEN_GUEST_HANDLE(void) uarg) { xen_kexec_exec_t exec; @@ -294,7 +348,9 @@ static int kexec_exec(XEN_GUEST_HANDLE(void) uarg) return -EINVAL; /* never reached */ } -long do_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg) +#endif + +ret_t do_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg) { unsigned long flags; int ret = -EINVAL; @@ -305,7 +361,7 @@ long do_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg) switch ( op ) { case KEXEC_CMD_kexec_get_range: - ret = kexec_get_range(uarg); + ret = kexec_get(range)(uarg); break; case KEXEC_CMD_kexec_load: case KEXEC_CMD_kexec_unload: @@ -324,6 +380,10 @@ long do_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg) return ret; } +#if defined(CONFIG_COMPAT) && !defined(COMPAT) +#include "compat/kexec.c" +#endif + /* * Local variables: * mode: C diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c index 85fe6d89ca..3b5aeb05e2 100644 --- a/xen/common/keyhandler.c +++ b/xen/common/keyhandler.c @@ -12,6 +12,7 @@ #include <xen/softirq.h> #include <xen/domain.h> #include <xen/rangeset.h> +#include <xen/compat.h> #include <asm/debugger.h> #include <asm/shadow.h> #include <asm/div64.h> @@ -171,8 +172,8 @@ static void dump_domains(unsigned char key) v->vcpu_id, v->processor, test_bit(_VCPUF_running, &v->vcpu_flags) ? 'T':'F', v->vcpu_flags, - v->vcpu_info->evtchn_upcall_pending, - v->vcpu_info->evtchn_upcall_mask); + vcpu_info(v, evtchn_upcall_pending), + vcpu_info(v, evtchn_upcall_mask)); cpuset_print(cpuset, sizeof(cpuset), v->vcpu_dirty_cpumask); printk("dirty_cpus=%s ", cpuset); cpuset_print(cpuset, sizeof(cpuset), v->cpu_affinity); @@ -181,11 +182,11 @@ static void dump_domains(unsigned char key) printk(" Notifying guest (virq %d, port %d, stat %d/%d/%d)\n", VIRQ_DEBUG, v->virq_to_evtchn[VIRQ_DEBUG], test_bit(v->virq_to_evtchn[VIRQ_DEBUG], - d->shared_info->evtchn_pending), + shared_info_addr(d, evtchn_pending)), test_bit(v->virq_to_evtchn[VIRQ_DEBUG], - d->shared_info->evtchn_mask), - test_bit(v->virq_to_evtchn[VIRQ_DEBUG]/BITS_PER_LONG, - &v->vcpu_info->evtchn_pending_sel)); + shared_info_addr(d, evtchn_mask)), + test_bit(v->virq_to_evtchn[VIRQ_DEBUG]/BITS_PER_GUEST_LONG(d), + vcpu_info_addr(v, evtchn_pending_sel))); send_guest_vcpu_virq(v, VIRQ_DEBUG); } } diff --git a/xen/common/lib.c b/xen/common/lib.c index 0eb58f19d2..661f76420d 100644 --- a/xen/common/lib.c +++ b/xen/common/lib.c @@ -439,11 +439,14 @@ s64 __moddi3(s64 a, s64 b) #endif /* BITS_PER_LONG == 32 */ -unsigned long long parse_size_and_unit(const char *s, char **ps) +unsigned long long parse_size_and_unit(const char *s, const char **ps) { - unsigned long long ret = simple_strtoull(s, (char **)&s, 0); + unsigned long long ret; + const char *s1; - switch (*s) { + ret = simple_strtoull(s, &s1, 0); + + switch (*s1) { case 'G': case 'g': ret <<= 10; case 'M': case 'm': @@ -451,7 +454,7 @@ unsigned long long parse_size_and_unit(const char *s, char **ps) case 'K': case 'k': ret <<= 10; case 'B': case 'b': - s++; + s1++; break; default: ret <<= 10; /* default to kB */ @@ -459,7 +462,7 @@ unsigned long long parse_size_and_unit(const char *s, char **ps) } if (ps != NULL) - *ps = (char *)s; + *ps = s1; return ret; } diff --git a/xen/common/memory.c b/xen/common/memory.c index f17ae4b913..1d9e6d1044 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -17,18 +17,12 @@ #include <xen/shadow.h> #include <xen/iocap.h> #include <xen/guest_access.h> +#include <xen/hypercall.h> #include <xen/errno.h> #include <asm/current.h> #include <asm/hardirq.h> #include <public/memory.h> -/* - * To allow safe resume of do_memory_op() after preemption, we need to know - * at what point in the page list to resume. For this purpose I steal the - * high-order bits of the @cmd parameter, which are otherwise unused and zero. - */ -#define START_EXTENT_SHIFT 4 /* cmd[:4] == start_extent */ - struct memop_args { /* INPUT */ struct domain *domain; /* Domain to be affected. */ @@ -236,7 +230,7 @@ static long translate_gpfn_list( return -EFAULT; /* Is size too large for us to encode a continuation? */ - if ( op.nr_gpfns > (ULONG_MAX >> START_EXTENT_SHIFT) ) + if ( op.nr_gpfns > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) ) return -EINVAL; if ( !guest_handle_okay(op.gpfn_list, op.nr_gpfns) || @@ -511,20 +505,20 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg) struct memop_args args; domid_t domid; - op = cmd & ((1 << START_EXTENT_SHIFT) - 1); + op = cmd & MEMOP_CMD_MASK; switch ( op ) { case XENMEM_increase_reservation: case XENMEM_decrease_reservation: case XENMEM_populate_physmap: - start_extent = cmd >> START_EXTENT_SHIFT; + start_extent = cmd >> MEMOP_EXTENT_SHIFT; if ( copy_from_guest(&reservation, arg, 1) ) return start_extent; /* Is size too large for us to encode a continuation? */ - if ( reservation.nr_extents > (ULONG_MAX >> START_EXTENT_SHIFT) ) + if ( reservation.nr_extents > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) ) return start_extent; if ( unlikely(start_extent > reservation.nr_extents) ) @@ -574,7 +568,7 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg) if ( args.preempted ) return hypercall_create_continuation( __HYPERVISOR_memory_op, "lh", - op | (rc << START_EXTENT_SHIFT), arg); + op | (rc << MEMOP_EXTENT_SHIFT), arg); break; @@ -606,14 +600,14 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg) break; case XENMEM_translate_gpfn_list: - progress = cmd >> START_EXTENT_SHIFT; + progress = cmd >> MEMOP_EXTENT_SHIFT; rc = translate_gpfn_list( guest_handle_cast(arg, xen_translate_gpfn_list_t), &progress); if ( rc == -EAGAIN ) return hypercall_create_continuation( __HYPERVISOR_memory_op, "lh", - op | (progress << START_EXTENT_SHIFT), arg); + op | (progress << MEMOP_EXTENT_SHIFT), arg); break; default: diff --git a/xen/common/multicall.c b/xen/common/multicall.c index 499e6bd62c..1a43838ca0 100644 --- a/xen/common/multicall.c +++ b/xen/common/multicall.c @@ -13,9 +13,12 @@ #include <asm/current.h> #include <asm/hardirq.h> +#ifndef COMPAT DEFINE_PER_CPU(struct mc_state, mc_state); +typedef long ret_t; +#endif -long +ret_t do_multicall( XEN_GUEST_HANDLE(multicall_entry_t) call_list, unsigned int nr_calls) { diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index 4a7527859b..1102d2fd5b 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -197,7 +197,7 @@ paddr_t init_boot_allocator(paddr_t bitmap_start) void init_boot_pages(paddr_t ps, paddr_t pe) { unsigned long bad_spfn, bad_epfn, i; - char *p; + const char *p; ps = round_pgup(ps); pe = round_pgdown(pe); diff --git a/xen/common/schedule.c b/xen/common/schedule.c index 10f63b85bf..55abd5f5ca 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -13,6 +13,7 @@ * */ +#ifndef COMPAT #include <xen/config.h> #include <xen/init.h> #include <xen/lib.h> @@ -32,8 +33,6 @@ #include <xen/multicall.h> #include <public/sched.h> -extern void arch_getdomaininfo_ctxt(struct vcpu *, - struct vcpu_guest_context *); /* opt_sched: scheduler - default to credit */ static char opt_sched[10] = "credit"; string_param("sched", opt_sched); @@ -277,10 +276,11 @@ static long do_block(void) static long do_poll(struct sched_poll *sched_poll) { - struct vcpu *v = current; - evtchn_port_t port; - long rc = 0; - unsigned int i; + struct vcpu *v = current; + struct domain *d = v->domain; + evtchn_port_t port; + long rc = 0; + unsigned int i; /* Fairly arbitrary limit. */ if ( sched_poll->nr_ports > 128 ) @@ -292,7 +292,7 @@ static long do_poll(struct sched_poll *sched_poll) /* These operations must occur in order. */ set_bit(_VCPUF_blocked, &v->vcpu_flags); set_bit(_VCPUF_polling, &v->vcpu_flags); - set_bit(_DOMF_polling, &v->domain->domain_flags); + set_bit(_DOMF_polling, &d->domain_flags); /* Check for events /after/ setting flags: avoids wakeup waiting race. */ for ( i = 0; i < sched_poll->nr_ports; i++ ) @@ -302,18 +302,18 @@ static long do_poll(struct sched_poll *sched_poll) goto out; rc = -EINVAL; - if ( port >= MAX_EVTCHNS ) + if ( port >= MAX_EVTCHNS(d) ) goto out; rc = 0; - if ( test_bit(port, v->domain->shared_info->evtchn_pending) ) + if ( test_bit(port, shared_info_addr(d, evtchn_pending)) ) goto out; } if ( sched_poll->timeout != 0 ) set_timer(&v->poll_timer, sched_poll->timeout); - TRACE_2D(TRC_SCHED_BLOCK, v->domain->domain_id, v->vcpu_id); + TRACE_2D(TRC_SCHED_BLOCK, d->domain_id, v->vcpu_id); raise_softirq(SCHEDULE_SOFTIRQ); return 0; @@ -365,9 +365,13 @@ long do_sched_op_compat(int cmd, unsigned long arg) return ret; } -long do_sched_op(int cmd, XEN_GUEST_HANDLE(void) arg) +typedef long ret_t; + +#endif /* !COMPAT */ + +ret_t do_sched_op(int cmd, XEN_GUEST_HANDLE(void) arg) { - long ret = 0; + ret_t ret = 0; switch ( cmd ) { @@ -444,6 +448,8 @@ long do_sched_op(int cmd, XEN_GUEST_HANDLE(void) arg) return ret; } +#ifndef COMPAT + /* Per-domain one-shot-timer hypercall. */ long do_set_timer_op(s_time_t timeout) { @@ -735,6 +741,12 @@ void dump_runq(unsigned char key) local_irq_restore(flags); } +#ifdef CONFIG_COMPAT +#include "compat/schedule.c" +#endif + +#endif /* !COMPAT */ + /* * Local variables: * mode: C diff --git a/xen/common/symbols.c b/xen/common/symbols.c index 6bc3956a13..fba6cf0867 100644 --- a/xen/common/symbols.c +++ b/xen/common/symbols.c @@ -12,6 +12,7 @@ #include <xen/config.h> #include <xen/symbols.h> +#include <xen/kernel.h> #include <xen/init.h> #include <xen/lib.h> #include <xen/string.h> @@ -93,13 +94,10 @@ const char *symbols_lookup(unsigned long addr, unsigned long i, low, high, mid; unsigned long symbol_end = 0; - /* This kernel should never had been booted. */ - BUG_ON(!symbols_addresses); - namebuf[KSYM_NAME_LEN] = 0; namebuf[0] = 0; - if (!is_kernel_text(addr)) + if (!is_kernel_text(addr) && !is_kernel_inittext(addr)) return NULL; /* do a binary search on the sorted symbols_addresses array */ @@ -130,7 +128,8 @@ const char *symbols_lookup(unsigned long addr, /* if we found no next symbol, we use the end of the section */ if (!symbol_end) - symbol_end = kernel_text_end(); + symbol_end = is_kernel_inittext(addr) ? + (unsigned long)_einittext : (unsigned long)_etext; *symbolsize = symbol_end - symbols_addresses[low]; *offset = addr - symbols_addresses[low]; diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c index 9f991f39b3..33a5108079 100644 --- a/xen/common/sysctl.c +++ b/xen/common/sysctl.c @@ -21,14 +21,17 @@ #include <asm/current.h> #include <public/sysctl.h> -extern long arch_do_sysctl( +#ifndef COMPAT +typedef long ret_t; +#define copy_to_xxx_offset copy_to_guest_offset +#endif + +extern ret_t arch_do_sysctl( struct xen_sysctl *op, XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl); -extern void getdomaininfo( - struct domain *d, struct xen_domctl_getdomaininfo *info); -long do_sysctl(XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl) +ret_t do_sysctl(XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl) { - long ret = 0; + ret_t ret = 0; struct xen_sysctl curop, *op = &curop; static DEFINE_SPINLOCK(sysctl_lock); @@ -98,8 +101,8 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl) put_domain(d); - if ( copy_to_guest_offset(op->u.getdomaininfolist.buffer, - num_domains, &info, 1) ) + if ( copy_to_xxx_offset(op->u.getdomaininfolist.buffer, + num_domains, &info, 1) ) { ret = -EFAULT; break; @@ -123,7 +126,6 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl) #ifdef PERF_COUNTERS case XEN_SYSCTL_perfc_op: { - extern int perfc_control(xen_sysctl_perfc_op_t *); ret = perfc_control(&op->u.perfc_op); if ( copy_to_guest(u_sysctl, op, 1) ) ret = -EFAULT; diff --git a/xen/common/time.c b/xen/common/time.c index 65929d1c1f..9072fc8386 100644 --- a/xen/common/time.c +++ b/xen/common/time.c @@ -40,7 +40,7 @@ struct tm gmtime(unsigned long t) struct tm tbuf; long days, rem; int y; - unsigned short int *ip; + const unsigned short int *ip; days = t / SECS_PER_DAY; rem = t % SECS_PER_DAY; @@ -66,7 +66,7 @@ struct tm gmtime(unsigned long t) } tbuf.tm_year = y - 1900; tbuf.tm_yday = days; - ip = (unsigned short int *)__mon_lengths[__isleap(y)]; + ip = (const unsigned short int *)__mon_lengths[__isleap(y)]; for ( y = 0; days >= ip[y]; ++y ) days -= ip[y]; tbuf.tm_mon = y; diff --git a/xen/common/trace.c b/xen/common/trace.c index a5cec8e69a..eb791c1f8f 100644 --- a/xen/common/trace.c +++ b/xen/common/trace.c @@ -32,13 +32,29 @@ #include <asm/atomic.h> #include <public/sysctl.h> +#ifdef CONFIG_COMPAT +#include <compat/trace.h> +#define xen_t_buf t_buf +CHECK_t_buf; +#undef xen_t_buf +#define TB_COMPAT IS_COMPAT(dom0) +#else +#define compat_t_rec t_rec +#define TB_COMPAT 0 +#endif + +typedef union { + struct t_rec *nat; + struct compat_t_rec *cmp; +} t_rec_u; + /* opt_tbuf_size: trace buffer size (in pages) */ static unsigned int opt_tbuf_size = 0; integer_param("tbuf_size", opt_tbuf_size); /* Pointers to the meta-data objects for all system trace buffers */ static DEFINE_PER_CPU(struct t_buf *, t_bufs); -static DEFINE_PER_CPU(struct t_rec *, t_recs); +static DEFINE_PER_CPU(t_rec_u, t_recs); static int nr_recs; /* High water mark for trace buffers; */ @@ -87,7 +103,7 @@ static int alloc_trace_bufs(void) nr_pages = num_online_cpus() * opt_tbuf_size; order = get_order_from_pages(nr_pages); nr_recs = (opt_tbuf_size * PAGE_SIZE - sizeof(struct t_buf)) / - sizeof(struct t_rec); + (!TB_COMPAT ? sizeof(struct t_rec) : sizeof(struct compat_t_rec)); if ( (rawbuf = alloc_xenheap_pages(order)) == NULL ) { @@ -106,7 +122,7 @@ static int alloc_trace_bufs(void) buf = per_cpu(t_bufs, i) = (struct t_buf *) &rawbuf[i*opt_tbuf_size*PAGE_SIZE]; buf->cons = buf->prod = 0; - per_cpu(t_recs, i) = (struct t_rec *)(buf + 1); + per_cpu(t_recs, i).nat = (struct t_rec *)(buf + 1); } t_buf_highwater = nr_recs >> 1; /* 50% high water */ @@ -232,7 +248,7 @@ void trace(u32 event, unsigned long d1, unsigned long d2, unsigned long d3, unsigned long d4, unsigned long d5) { struct t_buf *buf; - struct t_rec *rec; + t_rec_u rec; unsigned long flags; BUG_ON(!tb_init_done); @@ -269,25 +285,51 @@ void trace(u32 event, unsigned long d1, unsigned long d2, if ( unlikely(this_cpu(lost_records) != 0) ) { - rec = &this_cpu(t_recs)[buf->prod % nr_recs]; - memset(rec, 0, sizeof(*rec)); - rec->cycles = (u64)get_cycles(); - rec->event = TRC_LOST_RECORDS; - rec->data[0] = this_cpu(lost_records); - this_cpu(lost_records) = 0; + if ( !TB_COMPAT ) + { + rec.nat = &this_cpu(t_recs).nat[buf->prod % nr_recs]; + memset(rec.nat, 0, sizeof(*rec.nat)); + rec.nat->cycles = (u64)get_cycles(); + rec.nat->event = TRC_LOST_RECORDS; + rec.nat->data[0] = this_cpu(lost_records); + this_cpu(lost_records) = 0; + } + else + { + rec.cmp = &this_cpu(t_recs).cmp[buf->prod % nr_recs]; + memset(rec.cmp, 0, sizeof(*rec.cmp)); + rec.cmp->cycles = (u64)get_cycles(); + rec.cmp->event = TRC_LOST_RECORDS; + rec.cmp->data[0] = this_cpu(lost_records); + this_cpu(lost_records) = 0; + } wmb(); buf->prod++; } - rec = &this_cpu(t_recs)[buf->prod % nr_recs]; - rec->cycles = (u64)get_cycles(); - rec->event = event; - rec->data[0] = d1; - rec->data[1] = d2; - rec->data[2] = d3; - rec->data[3] = d4; - rec->data[4] = d5; + if ( !TB_COMPAT ) + { + rec.nat = &this_cpu(t_recs).nat[buf->prod % nr_recs]; + rec.nat->cycles = (u64)get_cycles(); + rec.nat->event = event; + rec.nat->data[0] = d1; + rec.nat->data[1] = d2; + rec.nat->data[2] = d3; + rec.nat->data[3] = d4; + rec.nat->data[4] = d5; + } + else + { + rec.cmp = &this_cpu(t_recs).cmp[buf->prod % nr_recs]; + rec.cmp->cycles = (u64)get_cycles(); + rec.cmp->event = event; + rec.cmp->data[0] = d1; + rec.cmp->data[1] = d2; + rec.cmp->data[2] = d3; + rec.cmp->data[3] = d4; + rec.cmp->data[4] = d5; + } wmb(); buf->prod++; diff --git a/xen/common/vsprintf.c b/xen/common/vsprintf.c index 7de43593fe..3ed7b7052a 100644 --- a/xen/common/vsprintf.c +++ b/xen/common/vsprintf.c @@ -28,7 +28,8 @@ * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use */ -unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base) +unsigned long simple_strtoul( + const char *cp, const char **endp, unsigned int base) { unsigned long result = 0,value; @@ -52,7 +53,7 @@ unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base) cp++; } if (endp) - *endp = (char *)cp; + *endp = cp; return result; } @@ -64,7 +65,7 @@ EXPORT_SYMBOL(simple_strtoul); * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use */ -long simple_strtol(const char *cp,char **endp,unsigned int base) +long simple_strtol(const char *cp, const char **endp, unsigned int base) { if(*cp=='-') return -simple_strtoul(cp+1,endp,base); @@ -79,7 +80,8 @@ EXPORT_SYMBOL(simple_strtol); * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use */ -unsigned long long simple_strtoull(const char *cp,char **endp,unsigned int base) +unsigned long long simple_strtoull( + const char *cp, const char **endp, unsigned int base) { unsigned long long result = 0,value; @@ -103,7 +105,7 @@ unsigned long long simple_strtoull(const char *cp,char **endp,unsigned int base) cp++; } if (endp) - *endp = (char *)cp; + *endp = cp; return result; } @@ -115,7 +117,7 @@ EXPORT_SYMBOL(simple_strtoull); * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use */ -long long simple_strtoll(const char *cp,char **endp,unsigned int base) +long long simple_strtoll(const char *cp,const char **endp,unsigned int base) { if(*cp=='-') return -simple_strtoull(cp+1,endp,base); @@ -139,7 +141,9 @@ static int skip_atoi(const char **s) #define SPECIAL 32 /* 0x */ #define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ -static char * number(char * buf, char * end, unsigned long long num, int base, int size, int precision, int type) +static char *number( + char *buf, char *end, unsigned long long num, + int base, int size, int precision, int type) { char c,sign,tmp[66]; const char *digits; diff --git a/xen/common/xenoprof.c b/xen/common/xenoprof.c index f297859de5..d61e3f173e 100644 --- a/xen/common/xenoprof.c +++ b/xen/common/xenoprof.c @@ -9,6 +9,7 @@ * VA Linux Systems Japan K.K. */ +#ifndef COMPAT #include <xen/guest_access.h> #include <xen/sched.h> #include <public/xenoprof.h> @@ -72,7 +73,7 @@ static void xenoprof_reset_stat(void) static void xenoprof_reset_buf(struct domain *d) { int j; - struct xenoprof_buf *buf; + xenoprof_buf_t *buf; if ( d->xenoprof == NULL ) { @@ -86,8 +87,8 @@ static void xenoprof_reset_buf(struct domain *d) buf = d->xenoprof->vcpu[j].buffer; if ( buf != NULL ) { - buf->event_head = 0; - buf->event_tail = 0; + xenoprof_buf(d, buf, event_head) = 0; + xenoprof_buf(d, buf, event_tail) = 0; } } } @@ -166,15 +167,24 @@ static int alloc_xenoprof_struct( for_each_vcpu ( d, v ) nvcpu++; + bufsize = sizeof(struct xenoprof_buf); + i = sizeof(struct event_log); +#ifdef CONFIG_COMPAT + d->xenoprof->is_compat = IS_COMPAT(is_passive ? dom0 : d); + if ( XENOPROF_COMPAT(d->xenoprof) ) + { + bufsize = sizeof(struct compat_oprof_buf); + i = sizeof(struct compat_event_log); + } +#endif + /* reduce max_samples if necessary to limit pages allocated */ max_bufsize = (MAX_OPROF_SHARED_PAGES * PAGE_SIZE) / nvcpu; - max_max_samples = ( (max_bufsize - sizeof(struct xenoprof_buf)) / - sizeof(struct event_log) ) + 1; + max_max_samples = ( (max_bufsize - bufsize) / i ) + 1; if ( (unsigned)max_samples > max_max_samples ) max_samples = max_max_samples; - bufsize = sizeof(struct xenoprof_buf) + - (max_samples - 1) * sizeof(struct event_log); + bufsize += (max_samples - 1) * i; npages = (nvcpu * bufsize - 1) / PAGE_SIZE + 1; d->xenoprof->rawbuf = alloc_xenheap_pages(get_order_from_pages(npages)); @@ -195,11 +205,12 @@ static int alloc_xenoprof_struct( i = 0; for_each_vcpu ( d, v ) { + xenoprof_buf_t *buf = (xenoprof_buf_t *)&d->xenoprof->rawbuf[i * bufsize]; + d->xenoprof->vcpu[v->vcpu_id].event_size = max_samples; - d->xenoprof->vcpu[v->vcpu_id].buffer = - (struct xenoprof_buf *)&d->xenoprof->rawbuf[i * bufsize]; - d->xenoprof->vcpu[v->vcpu_id].buffer->event_size = max_samples; - d->xenoprof->vcpu[v->vcpu_id].buffer->vcpu_id = v->vcpu_id; + d->xenoprof->vcpu[v->vcpu_id].buffer = buf; + xenoprof_buf(d, buf, event_size) = max_samples; + xenoprof_buf(d, buf, vcpu_id) = v->vcpu_id; i++; /* in the unlikely case that the number of active vcpus changes */ @@ -406,8 +417,9 @@ static int add_passive_list(XEN_GUEST_HANDLE(void) arg) void xenoprof_log_event( struct vcpu *vcpu, unsigned long eip, int mode, int event) { + struct domain *d = vcpu->domain; struct xenoprof_vcpu *v; - struct xenoprof_buf *buf; + xenoprof_buf_t *buf; int head; int tail; int size; @@ -417,13 +429,13 @@ void xenoprof_log_event( /* ignore samples of un-monitored domains */ /* Count samples in idle separate from other unmonitored domains */ - if ( !is_profiled(vcpu->domain) ) + if ( !is_profiled(d) ) { others_samples++; return; } - v = &vcpu->domain->xenoprof->vcpu[vcpu->vcpu_id]; + v = &d->xenoprof->vcpu[vcpu->vcpu_id]; /* Sanity check. Should never happen */ if ( v->buffer == NULL ) @@ -432,10 +444,10 @@ void xenoprof_log_event( return; } - buf = vcpu->domain->xenoprof->vcpu[vcpu->vcpu_id].buffer; + buf = v->buffer; - head = buf->event_head; - tail = buf->event_tail; + head = xenoprof_buf(d, buf, event_head); + tail = xenoprof_buf(d, buf, event_tail); size = v->event_size; /* make sure indexes in shared buffer are sane */ @@ -447,28 +459,28 @@ void xenoprof_log_event( if ( (head == tail - 1) || (head == size - 1 && tail == 0) ) { - buf->lost_samples++; + xenoprof_buf(d, buf, lost_samples)++; lost_samples++; } else { - buf->event_log[head].eip = eip; - buf->event_log[head].mode = mode; - buf->event_log[head].event = event; + xenoprof_buf(d, buf, event_log[head].eip) = eip; + xenoprof_buf(d, buf, event_log[head].mode) = mode; + xenoprof_buf(d, buf, event_log[head].event) = event; head++; if ( head >= size ) head = 0; - buf->event_head = head; + xenoprof_buf(d, buf, event_head) = head; if ( is_active(vcpu->domain) ) active_samples++; else passive_samples++; if ( mode == 0 ) - buf->user_samples++; + xenoprof_buf(d, buf, user_samples)++; else if ( mode == 1 ) - buf->kernel_samples++; + xenoprof_buf(d, buf, kernel_samples)++; else - buf->xen_samples++; + xenoprof_buf(d, buf, xen_samples)++; } } @@ -494,6 +506,8 @@ static int xenoprof_op_init(XEN_GUEST_HANDLE(void) arg) return 0; } +#endif /* !COMPAT */ + static int xenoprof_op_get_buffer(XEN_GUEST_HANDLE(void) arg) { struct xenoprof_get_buffer xenoprof_get_buffer; @@ -732,6 +746,10 @@ int do_xenoprof_op(int op, XEN_GUEST_HANDLE(void) arg) return ret; } +#if defined(CONFIG_COMPAT) && !defined(COMPAT) +#include "compat/xenoprof.c" +#endif + /* * Local variables: * mode: C diff --git a/xen/common/xmalloc.c b/xen/common/xmalloc.c index 60f75d60b1..012ce375e8 100644 --- a/xen/common/xmalloc.c +++ b/xen/common/xmalloc.c @@ -87,7 +87,7 @@ static void *data_from_header(struct xmalloc_hdr *hdr) #endif } -static struct xmalloc_hdr *header_from_data(const void *p) +static struct xmalloc_hdr *header_from_data(void *p) { #if XMALLOC_DEBUG unsigned char *data = (unsigned char *)p - SMP_CACHE_BYTES; @@ -208,7 +208,7 @@ void *_xmalloc(size_t size, size_t align) return xmalloc_new_page(size); } -void xfree(const void *p) +void xfree(void *p) { unsigned long flags; struct xmalloc_hdr *i, *tmp, *hdr; diff --git a/xen/drivers/char/ns16550.c b/xen/drivers/char/ns16550.c index 5067d985b3..7fc77ff9c5 100644 --- a/xen/drivers/char/ns16550.c +++ b/xen/drivers/char/ns16550.c @@ -300,7 +300,7 @@ static int parse_parity_char(int c) return; \ } while ( 0 ) -static void ns16550_parse_port_config(struct ns16550 *uart, char *conf) +static void ns16550_parse_port_config(struct ns16550 *uart, const char *conf) { int baud; diff --git a/xen/include/Makefile b/xen/include/Makefile new file mode 100644 index 0000000000..d2efb237c3 --- /dev/null +++ b/xen/include/Makefile @@ -0,0 +1,69 @@ +ifneq ($(CONFIG_COMPAT),) + +compat-arch-$(CONFIG_X86) := x86_32 + +headers-y := $(shell echo public/*.h | sed -e 's,[^[:space:]]*-[^[:space:]]*,,g' -e 's,public/,compat/,g') +headers-y := $(filter-out %/dom0_ops.h,$(headers-y)) +headers-$(CONFIG_X86) += compat/arch-x86/xen.h +headers-$(CONFIG_X86) += compat/arch-x86/xen-$(compat-arch-y).h +headers-y += compat/arch-$(compat-arch-y).h compat/xlat.h + +cppflags-y := -include public/xen-compat.h +cppflags-$(CONFIG_X86) += -m32 + +# 8-byte types are 4-byte aligned on x86_32 ... +prefix-$(CONFIG_X86) := \#pragma pack(push, 4) +suffix-$(CONFIG_X86) := \#pragma pack(pop) + +endif + +.PHONY: all +all: $(headers-y) + +compat/%.h: compat/%.i Makefile + id=_$$(echo $@ | sed 'y,abcdefghijklmnopqrstuvwxyz-/.,ABCDEFGHIJKLMNOPQRSTUVWXYZ___,'); \ + echo "#ifndef $$id" >$@.new; \ + echo "#define $$id" >>$@.new; \ + echo "#include <xen/compat.h>" >>$@.new; \ + $(if $(filter-out compat/arch-%.h,$@),echo "#include <$(patsubst compat/%,public/%,$@)>" >>$@.new;) \ + $(if $(prefix-y),echo "$(prefix-y)" >>$@.new;) \ + grep -v '^# [[:digit:]]' $< | \ + sed -e 's,__InClUdE__,#include,' \ + -e 's,"xen-compat.h",<public/xen-compat.h>,' \ + -e 's,\(struct\|union\|enum\)[[:space:]]\+\(xen_\?\)\?\([[:alpha:]_]\),\1 compat_\3,g' \ + -e 's,@KeeP@,,g' \ + -e 's,_t\([^[:alnum:]_]\|$$\),_compat_t\1,g' \ + -e 's,\(8\|16\|32\|64\)_compat_t\([^[:alnum:]_]\|$$\),\1_t\2,g' \ + -e 's,\(^\|[^[:alnum:]_]\)xen_\?\([[:alnum:]_]*\)_compat_t\([^[:alnum:]_]\|$$\),\1compat_\2_t\3,g' \ + -e 's,\(^\|[^[:alnum:]_]\)XEN_\?,\1COMPAT_,' \ + -e 's,\(^\|[^[:alnum:]_]\)Xen_\?,\1Compat_,' \ + -e 's,\(^\|[^[:alnum:]]\)long\([^[:alnum:]]\|$$\),\1int\2,g' | \ + uniq >>$@.new; \ + $(if $(suffix-y),echo "$(suffix-y)" >>$@.new;) \ + echo "#endif /* $$id */" >>$@.new + mv -f $@.new $@ + +compat/%.i: compat/%.c Makefile + $(CPP) $(CFLAGS) $(cppflags-y) -o $@ $< + +compat/%.c: public/%.h xlat.lst Makefile + mkdir -p $(@D) + grep -v 'DEFINE_XEN_GUEST_HANDLE(long)' $< | \ + sed -e 's,^[[:space:]]*#[[:space:]]*include[[:space:]]\+,__InClUdE__ ,' \ + -e 's,^[[:space:]]*#[[:space:]]*define[[:space:]]\+\([[:upper:]_]*_GUEST_HANDLE\),#define HIDE_\1,' \ + -e 's,^[[:space:]]*#[[:space:]]*define[[:space:]]\+\([[:lower:]_]*_guest_handle\),#define hide_\1,' \ + -e 's,XEN_GUEST_HANDLE\(_[[:xdigit:]]\+\)\?,COMPAT_HANDLE,g' \ + $(foreach n,$(shell sed -n 's,^[[:space:]]*?[[:space:]]\+\([[:alnum:]_]*\)[[:space:]].*,\1,p' xlat.lst), \ + -e 's,\(struct\|union\)[[:space:]]\+\(xen_\?\)\?$n[[:space:]]\+\([[:alpha:]_]\),\1 @KeeP@\2$n \3,g') \ + >$@.new + mv -f $@.new $@ + +compat/xlat.h: xlat.lst $(filter-out compat/xlat.h,$(headers-y)) $(BASEDIR)/tools/get-fields.sh Makefile + grep -v '^[[:space:]]*#' xlat.lst | \ + while read what name hdr; do \ + $(SHELL) $(BASEDIR)/tools/get-fields.sh "$$what" compat_$$name $$(echo compat/$$hdr | sed 's,@arch@,$(compat-arch-y),g') || exit $$?; \ + done >$@.new + mv -f $@.new $@ + +clean:: + rm -rf compat diff --git a/xen/include/asm-ia64/init.h b/xen/include/asm-ia64/init.h index 7e5df20da6..5295b35e63 100644 --- a/xen/include/asm-ia64/init.h +++ b/xen/include/asm-ia64/init.h @@ -1,29 +1,4 @@ #ifndef _XEN_ASM_INIT_H #define _XEN_ASM_INIT_H -/* - * Mark functions and data as being only used at initialization - * or exit time. - */ -#define __init \ - __attribute__ ((__section__ (".init.text"))) -#define __exit \ - __attribute_used__ __attribute__ ((__section__(".text.exit"))) -#define __initdata \ - __attribute__ ((__section__ (".init.data"))) -#define __exitdata \ - __attribute_used__ __attribute__ ((__section__ (".data.exit"))) -#define __initsetup \ - __attribute_used__ __attribute__ ((__section__ (".init.setup"))) -#define __init_call \ - __attribute_used__ __attribute__ ((__section__ (".initcall1.init"))) -#define __exit_call \ - __attribute_used__ __attribute__ ((__section__ (".exitcall.exit"))) - -/* For assembly routines -#define __INIT .section ".text.init","ax" -#define __FINIT .previous -#define __INITDATA .section ".data.init","aw" -*/ - #endif /* _XEN_ASM_INIT_H */ diff --git a/xen/include/asm-ia64/shared.h b/xen/include/asm-ia64/shared.h new file mode 100644 index 0000000000..4f1ebb2a5d --- /dev/null +++ b/xen/include/asm-ia64/shared.h @@ -0,0 +1,4 @@ +#ifndef __XEN_ASM_SHARED_H__ +#define __XEN_ASM_SHARED_H__ + +#endif /* __XEN_ASM_SHARED_H__ */ diff --git a/xen/include/asm-powerpc/init.h b/xen/include/asm-powerpc/init.h index 7ac4f0cbf7..11f9d6e104 100644 --- a/xen/include/asm-powerpc/init.h +++ b/xen/include/asm-powerpc/init.h @@ -21,25 +21,6 @@ #ifndef _XEN_ASM_INIT_H #define _XEN_ASM_INIT_H -/* - * Mark functions and data as being only used at initialization - * or exit time. - */ -#define __init \ - __attribute__ ((__section__ (".init.text"))) -#define __exit \ - __attribute_used__ __attribute__ ((__section__(".text.exit"))) -#define __initdata \ - __attribute__ ((__section__ (".init.data"))) -#define __exitdata \ - __attribute_used__ __attribute__ ((__section__ (".data.exit"))) -#define __initsetup \ - __attribute_used__ __attribute__ ((__section__ (".setup.init"))) -#define __init_call \ - __attribute_used__ __attribute__ ((__section__ (".initcall.init"))) -#define __exit_call \ - __attribute_used__ __attribute__ ((__section__ (".exitcall.exit"))) - struct cpu_user_regs; typedef void (*hcall_handler_t)(struct cpu_user_regs *regs); diff --git a/xen/include/asm-powerpc/shared.h b/xen/include/asm-powerpc/shared.h new file mode 100644 index 0000000000..4f1ebb2a5d --- /dev/null +++ b/xen/include/asm-powerpc/shared.h @@ -0,0 +1,4 @@ +#ifndef __XEN_ASM_SHARED_H__ +#define __XEN_ASM_SHARED_H__ + +#endif /* __XEN_ASM_SHARED_H__ */ diff --git a/xen/include/asm-x86/bitops.h b/xen/include/asm-x86/bitops.h index a1e0754b82..8cb4693fab 100644 --- a/xen/include/asm-x86/bitops.h +++ b/xen/include/asm-x86/bitops.h @@ -23,6 +23,7 @@ */ #define ADDR (*(volatile long *) addr) +#define CONST_ADDR (*(const volatile long *) addr) /** * set_bit - Atomically set a bit in memory @@ -253,7 +254,7 @@ static __inline__ int variable_test_bit(int nr, const volatile void * addr) __asm__ __volatile__( "btl %2,%1\n\tsbbl %0,%0" :"=r" (oldbit) - :"m" (ADDR),"dIr" (nr)); + :"m" (CONST_ADDR),"dIr" (nr)); return oldbit; } @@ -288,7 +289,7 @@ static inline unsigned int __scanbit(unsigned long val) */ #define find_first_bit(addr,size) \ ((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \ - (__scanbit(*(unsigned long *)addr)) : \ + (__scanbit(*(const unsigned long *)addr)) : \ __find_first_bit(addr,size))) /** @@ -299,7 +300,7 @@ static inline unsigned int __scanbit(unsigned long val) */ #define find_next_bit(addr,size,off) \ ((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \ - ((off) + (__scanbit((*(unsigned long *)addr) >> (off)))) : \ + ((off) + (__scanbit((*(const unsigned long *)addr) >> (off)))) : \ __find_next_bit(addr,size,off))) /** @@ -312,7 +313,7 @@ static inline unsigned int __scanbit(unsigned long val) */ #define find_first_zero_bit(addr,size) \ ((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \ - (__scanbit(~*(unsigned long *)addr)) : \ + (__scanbit(~*(const unsigned long *)addr)) : \ __find_first_zero_bit(addr,size))) /** @@ -323,7 +324,7 @@ static inline unsigned int __scanbit(unsigned long val) */ #define find_next_zero_bit(addr,size,off) \ ((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \ - ((off)+(__scanbit(~(((*(unsigned long *)addr)) >> (off))))) : \ + ((off)+(__scanbit(~(((*(const unsigned long *)addr)) >> (off))))) : \ __find_next_zero_bit(addr,size,off))) diff --git a/xen/include/asm-x86/compat.h b/xen/include/asm-x86/compat.h new file mode 100644 index 0000000000..f9df74ece8 --- /dev/null +++ b/xen/include/asm-x86/compat.h @@ -0,0 +1,8 @@ +/****************************************************************************** + * compat.h + */ + +#define COMPAT_BITS_PER_LONG 32 + +typedef uint32_t compat_ptr_t; +typedef unsigned long full_ptr_t; diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h index 1ccb34edd2..71e3dd8e4a 100644 --- a/xen/include/asm-x86/config.h +++ b/xen/include/asm-x86/config.h @@ -84,13 +84,10 @@ #define CONFIG_DMA_BITSIZE 30 -#ifndef __ASSEMBLY__ -extern unsigned long _end; /* standard ELF symbol */ -#endif /* __ASSEMBLY__ */ - #if defined(__x86_64__) #define CONFIG_X86_64 1 +#define CONFIG_COMPAT 1 #define asmlinkage @@ -111,7 +108,7 @@ extern unsigned long _end; /* standard ELF symbol */ /* * Memory layout: * 0x0000000000000000 - 0x00007fffffffffff [128TB, 2^47 bytes, PML4:0-255] - * Guest-defined use. + * Guest-defined use (see below for compatibility mode guests). * 0x0000800000000000 - 0xffff7fffffffffff [16EB] * Inaccessible: current arch only supports 48-bit sign-extended VAs. * 0xffff800000000000 - 0xffff803fffffffff [256GB, 2^38 bytes, PML4:256] @@ -132,7 +129,11 @@ extern unsigned long _end; /* standard ELF symbol */ * Page-frame information array. * 0xffff828800000000 - 0xffff828bffffffff [16GB, 2^34 bytes, PML4:261] * ioremap()/fixmap area. - * 0xffff828c00000000 - 0xffff82ffffffffff [464GB, PML4:261] + * 0xffff828c00000000 - 0xffff828c3fffffff [1GB, 2^30 bytes, PML4:261] + * Compatibility machine-to-phys translation table. + * 0xffff828c40000000 - 0xffff828c7fffffff [1GB, 2^30 bytes, PML4:261] + * High read-only compatibility machine-to-phys translation table. + * 0xffff828c80000000 - 0xffff82ffffffffff [462GB, PML4:261] * Reserved for future use. * 0xffff830000000000 - 0xffff83ffffffffff [1TB, 2^40 bytes, PML4:262-263] * 1:1 direct mapping of all physical memory. Xen and its heap live here. @@ -140,6 +141,18 @@ extern unsigned long _end; /* standard ELF symbol */ * Reserved for future use. * 0xffff880000000000 - 0xffffffffffffffff [120TB, PML4:272-511] * Guest-defined use. + * + * Compatibility guest area layout: + * 0x0000000000000000 - 0x00000000f57fffff [3928MB, PML4:0] + * Guest-defined use. + * 0x0000000f58000000 - 0x00000000ffffffff [168MB, PML4:0] + * Read-only machine-to-phys translation table (GUEST ACCESSIBLE). + * 0x0000000000000000 - 0x00000000ffffffff [508GB, PML4:0] + * Unused. + * 0x0000008000000000 - 0x000000ffffffffff [512GB, 2^39 bytes, PML4:1] + * Hypercall argument translation area. + * 0x0000010000000000 - 0x00007fffffffffff [127TB, 2^46 bytes, PML4:2-255] + * Reserved for future use. */ @@ -181,17 +194,49 @@ extern unsigned long _end; /* standard ELF symbol */ /* Slot 261: ioremap()/fixmap area (16GB). */ #define IOREMAP_VIRT_START (FRAMETABLE_VIRT_END) #define IOREMAP_VIRT_END (IOREMAP_VIRT_START + (16UL<<30)) +/* Slot 261: compatibility machine-to-phys conversion table (1GB). */ +#define RDWR_COMPAT_MPT_VIRT_START IOREMAP_VIRT_END +#define RDWR_COMPAT_MPT_VIRT_END (RDWR_COMPAT_MPT_VIRT_START + (1UL << 30)) +/* Slot 261: high read-only compatibility machine-to-phys conversion table (1GB). */ +#define HIRO_COMPAT_MPT_VIRT_START RDWR_COMPAT_MPT_VIRT_END +#define HIRO_COMPAT_MPT_VIRT_END (HIRO_COMPAT_MPT_VIRT_START + (1UL << 30)) /* Slot 262-263: A direct 1:1 mapping of all of physical memory. */ #define DIRECTMAP_VIRT_START (PML4_ADDR(262)) #define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + PML4_ENTRY_BYTES*2) +#ifndef __ASSEMBLY__ + +/* This is not a fixed value, just a lower limit. */ +#define __HYPERVISOR_COMPAT_VIRT_START 0xF5800000 +#define HYPERVISOR_COMPAT_VIRT_START(d) ((d)->arch.hv_compat_vstart) +#define MACH2PHYS_COMPAT_VIRT_START HYPERVISOR_COMPAT_VIRT_START +#define MACH2PHYS_COMPAT_VIRT_END 0xFFE00000 +#define MACH2PHYS_COMPAT_NR_ENTRIES(d) \ + ((MACH2PHYS_COMPAT_VIRT_END-MACH2PHYS_COMPAT_VIRT_START(d))>>2) + +#define COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d) \ + l2_table_offset(HYPERVISOR_COMPAT_VIRT_START(d)) +#define COMPAT_L2_PAGETABLE_LAST_XEN_SLOT l2_table_offset(~0U) +#define COMPAT_L2_PAGETABLE_XEN_SLOTS(d) \ + (COMPAT_L2_PAGETABLE_LAST_XEN_SLOT - COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d) + 1) + +#endif + +#define COMPAT_ARG_XLAT_VIRT_BASE (1UL << ROOT_PAGETABLE_SHIFT) +#define COMPAT_ARG_XLAT_SHIFT 0 +#define COMPAT_ARG_XLAT_PAGES (1U << COMPAT_ARG_XLAT_SHIFT) +#define COMPAT_ARG_XLAT_SIZE (COMPAT_ARG_XLAT_PAGES << PAGE_SHIFT) +#define COMPAT_ARG_XLAT_VIRT_START(vcpu_id) \ + (COMPAT_ARG_XLAT_VIRT_BASE + ((unsigned long)(vcpu_id) << \ + (PAGE_SHIFT + COMPAT_ARG_XLAT_SHIFT + 1))) + #define PGT_base_page_table PGT_l4_page_table -#define __HYPERVISOR_CS64 0xe010 -#define __HYPERVISOR_CS32 0xe008 +#define __HYPERVISOR_CS64 0xe008 +#define __HYPERVISOR_CS32 0xe038 #define __HYPERVISOR_CS __HYPERVISOR_CS64 #define __HYPERVISOR_DS64 0x0000 -#define __HYPERVISOR_DS32 0xe018 +#define __HYPERVISOR_DS32 0xe010 #define __HYPERVISOR_DS __HYPERVISOR_DS64 /* For generic assembly code: use macros to define operation/operand sizes. */ diff --git a/xen/include/asm-x86/desc.h b/xen/include/asm-x86/desc.h index c2262d79fb..154f76736b 100644 --- a/xen/include/asm-x86/desc.h +++ b/xen/include/asm-x86/desc.h @@ -18,31 +18,76 @@ #define LDT_ENTRY_SIZE 8 +#if defined(__x86_64__) + +#define FLAT_COMPAT_RING1_CS 0xe019 /* GDT index 259 */ +#define FLAT_COMPAT_RING1_DS 0xe021 /* GDT index 260 */ +#define FLAT_COMPAT_RING1_SS 0xe021 /* GDT index 260 */ +#define FLAT_COMPAT_RING3_CS 0xe02b /* GDT index 261 */ +#define FLAT_COMPAT_RING3_DS 0xe033 /* GDT index 262 */ +#define FLAT_COMPAT_RING3_SS 0xe033 /* GDT index 262 */ + +#define FLAT_COMPAT_KERNEL_DS FLAT_COMPAT_RING1_DS +#define FLAT_COMPAT_KERNEL_CS FLAT_COMPAT_RING1_CS +#define FLAT_COMPAT_KERNEL_SS FLAT_COMPAT_RING1_SS +#define FLAT_COMPAT_USER_DS FLAT_COMPAT_RING3_DS +#define FLAT_COMPAT_USER_CS FLAT_COMPAT_RING3_CS +#define FLAT_COMPAT_USER_SS FLAT_COMPAT_RING3_SS + +#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8) +#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 2) + +#define __TSS(n) (((n)<<2) + __FIRST_TSS_ENTRY) +#define __LDT(n) (((n)<<2) + __FIRST_LDT_ENTRY) + +#elif defined(__i386__) + +#define FLAT_COMPAT_KERNEL_CS FLAT_KERNEL_CS +#define FLAT_COMPAT_KERNEL_DS FLAT_KERNEL_DS +#define FLAT_COMPAT_KERNEL_SS FLAT_KERNEL_SS +#define FLAT_COMPAT_USER_CS FLAT_USER_CS +#define FLAT_COMPAT_USER_DS FLAT_USER_DS +#define FLAT_COMPAT_USER_SS FLAT_USER_SS + +#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY + +#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8) +#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 1) + +#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY) +#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY) + +#endif + +#ifndef __ASSEMBLY__ + #define load_TR(n) __asm__ __volatile__ ("ltr %%ax" : : "a" (__TSS(n)<<3) ) #if defined(__x86_64__) -#define GUEST_KERNEL_RPL 3 +#define GUEST_KERNEL_RPL(d) (!IS_COMPAT(d) ? 3 : 1) #elif defined(__i386__) -#define GUEST_KERNEL_RPL 1 +#define GUEST_KERNEL_RPL(d) ((void)(d), 1) #endif /* Fix up the RPL of a guest segment selector. */ -#define __fixup_guest_selector(sel) \ - ((sel) = (((sel) & 3) >= GUEST_KERNEL_RPL) ? (sel) : \ - (((sel) & ~3) | GUEST_KERNEL_RPL)) +#define __fixup_guest_selector(d, sel) \ +({ \ + uint16_t _rpl = GUEST_KERNEL_RPL(d); \ + (sel) = (((sel) & 3) >= _rpl) ? (sel) : (((sel) & ~3) | _rpl); \ +}) /* Stack selectors don't need fixing up if the kernel runs in ring 0. */ #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL -#define fixup_guest_stack_selector(ss) ((void)0) +#define fixup_guest_stack_selector(d, ss) ((void)0) #else -#define fixup_guest_stack_selector(ss) __fixup_guest_selector(ss) +#define fixup_guest_stack_selector(d, ss) __fixup_guest_selector(d, ss) #endif /* * Code selectors are always fixed up. It allows the Xen exit stub to detect * return to guest context, even when the guest kernel runs in ring 0. */ -#define fixup_guest_code_selector(cs) __fixup_guest_selector(cs) +#define fixup_guest_code_selector(d, cs) __fixup_guest_selector(d, cs) /* * We need this function because enforcing the correct guest kernel RPL is @@ -57,19 +102,30 @@ * DPL < CPL then they'll be cleared automatically. If SS RPL or DPL differs * from CS RPL then we'll #GP. */ -#define guest_gate_selector_okay(sel) \ +#define guest_gate_selector_okay(d, sel) \ ((((sel)>>3) < FIRST_RESERVED_GDT_ENTRY) || /* Guest seg? */ \ - ((sel) == FLAT_KERNEL_CS) || /* Xen default seg? */ \ + ((sel) == (!IS_COMPAT(d) ? \ + FLAT_KERNEL_CS : /* Xen default seg? */ \ + FLAT_COMPAT_KERNEL_CS)) || /* Xen default compat seg? */ \ ((sel) & 4)) /* LDT seg? */ +#endif /* __ASSEMBLY__ */ + /* These are bitmasks for the high 32 bits of a descriptor table entry. */ #define _SEGMENT_TYPE (15<< 8) +#define _SEGMENT_WR ( 1<< 9) /* Writeable (data) or Readable (code) + segment */ #define _SEGMENT_EC ( 1<<10) /* Expand-down or Conforming segment */ #define _SEGMENT_CODE ( 1<<11) /* Code (vs data) segment for non-system segments */ #define _SEGMENT_S ( 1<<12) /* System descriptor (yes iff S==0) */ #define _SEGMENT_DPL ( 3<<13) /* Descriptor Privilege Level */ #define _SEGMENT_P ( 1<<15) /* Segment Present */ +#ifdef __x86_64 +#define _SEGMENT_L ( 1<<21) /* 64-bit segment */ +#else +#define _SEGMENT_L 0 +#endif #define _SEGMENT_DB ( 1<<22) /* 16- or 32-bit segment */ #define _SEGMENT_G ( 1<<23) /* Granularity */ @@ -81,12 +137,6 @@ struct desc_struct { #if defined(__x86_64__) -#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8) -#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 2) - -#define __TSS(n) (((n)<<2) + __FIRST_TSS_ENTRY) -#define __LDT(n) (((n)<<2) + __FIRST_LDT_ENTRY) - typedef struct { u64 a, b; } idt_entry_t; @@ -118,14 +168,6 @@ do { \ #elif defined(__i386__) -#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY - -#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8) -#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 1) - -#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY) -#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY) - typedef struct desc_struct idt_entry_t; #define _set_gate(gate_addr,type,dpl,addr) \ @@ -155,6 +197,11 @@ __asm__ __volatile__ ("movw %w3,0(%2)\n\t" \ #endif extern struct desc_struct gdt_table[]; +#ifdef CONFIG_COMPAT +extern struct desc_struct compat_gdt_table[]; +#else +# define compat_gdt_table gdt_table +#endif struct Xgt_desc_struct { unsigned short size; diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h index efda6dd263..83315353e1 100644 --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -98,6 +98,11 @@ struct arch_domain struct mapcache mapcache; #endif +#ifdef CONFIG_COMPAT + unsigned int hv_compat_vstart; + l3_pgentry_t *mm_arg_xlat_l3; +#endif + /* I/O-port admin-specified access capabilities. */ struct rangeset *ioport_caps; diff --git a/xen/include/asm-x86/event.h b/xen/include/asm-x86/event.h index 7085bd3138..475339550b 100644 --- a/xen/include/asm-x86/event.h +++ b/xen/include/asm-x86/event.h @@ -9,6 +9,8 @@ #ifndef __ASM_EVENT_H__ #define __ASM_EVENT_H__ +#include <xen/shared.h> + static inline void vcpu_kick(struct vcpu *v) { /* @@ -28,7 +30,7 @@ static inline void vcpu_kick(struct vcpu *v) static inline void vcpu_mark_events_pending(struct vcpu *v) { - if ( !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) ) + if ( !test_and_set_bit(0, &vcpu_info(v, evtchn_upcall_pending)) ) vcpu_kick(v); } @@ -36,23 +38,23 @@ static inline int local_events_need_delivery(void) { struct vcpu *v = current; /* Note: Bitwise operations result in fast code with no branches. */ - return (!!v->vcpu_info->evtchn_upcall_pending & - !v->vcpu_info->evtchn_upcall_mask); + return (!!vcpu_info(v, evtchn_upcall_pending) & + !vcpu_info(v, evtchn_upcall_mask)); } static inline int local_event_delivery_is_enabled(void) { - return !current->vcpu_info->evtchn_upcall_mask; + return !vcpu_info(current, evtchn_upcall_mask); } static inline void local_event_delivery_disable(void) { - current->vcpu_info->evtchn_upcall_mask = 1; + vcpu_info(current, evtchn_upcall_mask) = 1; } static inline void local_event_delivery_enable(void) { - current->vcpu_info->evtchn_upcall_mask = 0; + vcpu_info(current, evtchn_upcall_mask) = 0; } /* No arch specific virq definition now. Default to global. */ diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h index fb1dac3c46..5051ab276b 100644 --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -185,7 +185,7 @@ hvm_guest_x86_mode(struct vcpu *v) return hvm_funcs.guest_x86_mode(v); } -int hvm_instruction_length(unsigned long pc, int mode); +int hvm_instruction_length(unsigned long pc, int address_bytes); static inline void hvm_update_host_cr3(struct vcpu *v) @@ -219,9 +219,11 @@ hvm_get_segment_register(struct vcpu *v, enum x86_segment seg, hvm_funcs.get_segment_register(v, seg, reg); } +void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx); void hvm_stts(struct vcpu *v); void hvm_set_guest_time(struct vcpu *v, u64 gtime); -void hvm_freeze_time(struct vcpu *v); +u64 hvm_get_guest_time(struct vcpu *v); void hvm_migrate_timers(struct vcpu *v); void hvm_do_resume(struct vcpu *v); diff --git a/xen/include/asm-x86/hvm/io.h b/xen/include/asm-x86/hvm/io.h index 028a885eb2..c71a9bbc6c 100644 --- a/xen/include/asm-x86/hvm/io.h +++ b/xen/include/asm-x86/hvm/io.h @@ -147,8 +147,6 @@ extern void send_pio_req(unsigned long port, unsigned long count, int size, extern void handle_mmio(unsigned long gpa); extern void hvm_interrupt_post(struct vcpu *v, int vector, int type); extern void hvm_io_assist(struct vcpu *v); -extern int cpu_get_interrupt(struct vcpu *v, int *type); -extern int cpu_has_pending_irq(struct vcpu *v); #endif /* __ASM_X86_HVM_IO_H__ */ diff --git a/xen/include/asm-x86/hvm/irq.h b/xen/include/asm-x86/hvm/irq.h index cfc0de765d..af66d67f2c 100644 --- a/xen/include/asm-x86/hvm/irq.h +++ b/xen/include/asm-x86/hvm/irq.h @@ -61,7 +61,8 @@ struct hvm_irq { /* * Number of wires asserting each GSI. * - * GSIs 0-15 are the ISA IRQs. ISA devices map directly into this space. + * GSIs 0-15 are the ISA IRQs. ISA devices map directly into this space + * except ISA IRQ 0, which is connected to GSI 2. * PCI links map into this space via the PCI-ISA bridge. * * GSIs 16+ are used only be PCI devices. The mapping from PCI device to @@ -87,6 +88,8 @@ struct hvm_irq { #define hvm_pci_intx_link(dev, intx) \ (((dev) + (intx)) & 3) +#define hvm_isa_irq_to_gsi(isa_irq) ((isa_irq) ? : 2) + /* Modify state of a PCI INTx wire. */ void hvm_pci_intx_assert( struct domain *d, unsigned int device, unsigned int intx); @@ -104,4 +107,9 @@ void hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq); void hvm_set_callback_irq_level(void); void hvm_set_callback_gsi(struct domain *d, unsigned int gsi); +int cpu_get_interrupt(struct vcpu *v, int *type); +int cpu_has_pending_irq(struct vcpu *v); +int get_isa_irq_vector(struct vcpu *vcpu, int irq, int type); +int is_isa_irq_masked(struct vcpu *v, int isa_irq); + #endif /* __ASM_X86_HVM_IRQ_H__ */ diff --git a/xen/include/asm-x86/hvm/support.h b/xen/include/asm-x86/hvm/support.h index 3f3ff94386..721492412a 100644 --- a/xen/include/asm-x86/hvm/support.h +++ b/xen/include/asm-x86/hvm/support.h @@ -134,5 +134,6 @@ void hlt_timer_fn(void *data); void hvm_do_hypercall(struct cpu_user_regs *pregs); void hvm_hlt(unsigned long rflags); +void hvm_triple_fault(void); #endif /* __ASM_X86_HVM_SUPPORT_H__ */ diff --git a/xen/include/asm-x86/hvm/svm/vmcb.h b/xen/include/asm-x86/hvm/svm/vmcb.h index 81d41ea688..3340ec3a9e 100644 --- a/xen/include/asm-x86/hvm/svm/vmcb.h +++ b/xen/include/asm-x86/hvm/svm/vmcb.h @@ -456,7 +456,6 @@ struct arch_svm_struct { u32 *msrpm; u64 vmexit_tsc; /* tsc read at #VMEXIT. for TSC_OFFSET */ int saved_irq_vector; - u32 inject_event; u32 launch_core; u32 asid_core; diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h index 0c073028ee..0d4c95930c 100644 --- a/xen/include/asm-x86/hvm/vcpu.h +++ b/xen/include/asm-x86/hvm/vcpu.h @@ -35,6 +35,7 @@ struct hvm_vcpu { struct vlapic vlapic; s64 cache_tsc_offset; u64 guest_time; + struct list_head tm_list; /* For AP startup */ unsigned long init_sipi_sipi_state; diff --git a/xen/include/asm-x86/hvm/vlapic.h b/xen/include/asm-x86/hvm/vlapic.h index 2053fc56ec..0137b34515 100644 --- a/xen/include/asm-x86/hvm/vlapic.h +++ b/xen/include/asm-x86/hvm/vlapic.h @@ -23,6 +23,7 @@ #include <asm/msr.h> #include <public/hvm/ioreq.h> +#include <asm/hvm/vpt.h> #define MAX_VECTOR 256 @@ -49,14 +50,14 @@ #define vlapic_enabled(vlapic) (!vlapic_disabled(vlapic)) struct vlapic { - uint64_t apic_base_msr; - uint32_t disabled; /* VLAPIC_xx_DISABLED */ - uint32_t timer_divisor; - struct timer vlapic_timer; - int timer_pending_count; - s_time_t timer_last_update; - struct page_info *regs_page; - void *regs; + uint64_t apic_base_msr; + uint32_t disabled; /* VLAPIC_xx_DISABLED */ + uint32_t timer_divisor; + struct periodic_time pt; + int timer_pending_count; + s_time_t timer_last_update; + struct page_info *regs_page; + void *regs; }; static inline uint32_t vlapic_get_reg(struct vlapic *vlapic, uint32_t reg) @@ -70,13 +71,11 @@ static inline void vlapic_set_reg( *((uint32_t *)(vlapic->regs + reg)) = val; } - int vlapic_set_irq(struct vlapic *vlapic, uint8_t vec, uint8_t trig); -void vlapic_post_injection(struct vcpu *v, int vector, int deliver_mode); - int vlapic_find_highest_irr(struct vlapic *vlapic); +int vlapic_has_interrupt(struct vcpu *v); int cpu_get_apic_interrupt(struct vcpu *v, int *mode); int vlapic_init(struct vcpu *v); @@ -89,8 +88,9 @@ int vlapic_accept_pic_intr(struct vcpu *v); struct vlapic *apic_round_robin( struct domain *d, uint8_t vector, uint32_t bitmap); -s_time_t get_apictime_scheduled(struct vcpu *v); - int vlapic_match_logical_addr(struct vlapic *vlapic, uint8_t mda); +int is_lvtt(struct vcpu *v, int vector); +int is_lvtt_enabled(struct vcpu *v); + #endif /* __ASM_X86_HVM_VLAPIC_H__ */ diff --git a/xen/include/asm-x86/hvm/vmx/cpu.h b/xen/include/asm-x86/hvm/vmx/cpu.h index 8c214ae1ce..c988b13343 100644 --- a/xen/include/asm-x86/hvm/vmx/cpu.h +++ b/xen/include/asm-x86/hvm/vmx/cpu.h @@ -32,21 +32,14 @@ struct arch_state_struct { #define VMX_MF_32 1 #define VMX_MF_64 2 -#define CPUID_LEAF_0x1 0x1 -#define CPUID_LEAF_0x4 0x4 -#define CPUID_LEAF_0x6 0x6 -#define CPUID_LEAF_0x9 0x9 -#define CPUID_LEAF_0xA 0xA -#define CPUID_LEAF_0x80000001 0x80000001 - #define NUM_CORES_RESET_MASK 0x00003FFF #define NUM_THREADS_RESET_MASK 0xFF00FFFF #define VMX_VCPU_CPUID_L1_ECX_RESERVED_18 0x00040000 #define VMX_VCPU_CPUID_L1_ECX_RESERVED_6 0x00000040 -#define VMX_VCPU_CPUID_L1_ECX_RESERVED \ - ( VMX_VCPU_CPUID_L1_ECX_RESERVED_18 | \ - VMX_VCPU_CPUID_L1_ECX_RESERVED_6 ) +#define VMX_VCPU_CPUID_L1_ECX_RESERVED \ + ( VMX_VCPU_CPUID_L1_ECX_RESERVED_18 | \ + VMX_VCPU_CPUID_L1_ECX_RESERVED_6 ) #endif /* __ASM_X86_HVM_VMX_CPU_H__ */ diff --git a/xen/include/asm-x86/hvm/vpt.h b/xen/include/asm-x86/hvm/vpt.h index 9a83417996..d9ad3f70df 100644 --- a/xen/include/asm-x86/hvm/vpt.h +++ b/xen/include/asm-x86/hvm/vpt.h @@ -27,8 +27,66 @@ #include <xen/errno.h> #include <xen/time.h> #include <xen/timer.h> +#include <xen/list.h> #include <asm/hvm/vpic.h> + +#define HPET_TIMER_NUM 3 /* 3 timers supported now */ +struct HPET { + uint64_t capability; /* capabilities */ + uint64_t res0; /* reserved */ + uint64_t config; /* configuration */ + uint64_t res1; /* reserved */ + uint64_t isr; /* interrupt status reg */ + uint64_t res2[25]; /* reserved */ + uint64_t mc64; /* main counter */ + uint64_t res3; /* reserved */ + struct { /* timers */ + uint64_t config; /* configuration/cap */ + uint64_t cmp; /* comparator */ + uint64_t hpet_fsb[2]; /* FSB route, not supported now */ + } timers[HPET_TIMER_NUM]; +}; + +struct HPETState; +struct HPET_timer_fn_info { + struct HPETState *hs; + unsigned int tn; +}; + +typedef struct HPETState { + struct HPET hpet; + struct vcpu *vcpu; + uint64_t tsc_freq; + uint64_t mc_offset; + uint64_t period[HPET_TIMER_NUM]; + struct timer timers[HPET_TIMER_NUM]; + struct HPET_timer_fn_info timer_fn_info[HPET_TIMER_NUM]; +} HPETState; + + +/* + * Abstract layer of periodic time, one short time. + */ +typedef void time_cb(struct vcpu *v, void *opaque); + +struct periodic_time { + struct list_head list; + char enabled; + char one_shot; /* one shot time */ + u8 irq; + struct vcpu *vcpu; /* vcpu timer interrupt delivers to */ + u32 pending_intr_nr; /* the couner for pending timer interrupts */ + u64 period; /* frequency in ns */ + u64 period_cycles; /* frequency in cpu cycles */ + s_time_t scheduled; /* scheduled timer interrupt */ + u64 last_plt_gtime; /* platform time when last IRQ is injected */ + struct timer timer; /* ac_timer */ + time_cb *cb; + void *priv; /* ponit back to platform time source */ +}; + + #define PIT_FREQ 1193181 #define PIT_BASE 0x40 @@ -47,8 +105,7 @@ typedef struct PITChannelState { u8 gate; /* timer start */ s64 count_load_time; /* irq handling */ - struct vcpu *vcpu; - struct periodic_time *pt; + struct periodic_time pt; } PITChannelState; typedef struct PITState { @@ -66,10 +123,7 @@ typedef struct RTCState { int64_t next_second_time; struct timer second_timer; struct timer second_timer2; - struct timer pie_timer; - int period; - s_time_t next_pie; - struct vcpu *vcpu; + struct periodic_time pt; } RTCState; #define FREQUENCE_PMTIMER 3579545 @@ -82,58 +136,41 @@ typedef struct PMTState { struct vcpu *vcpu; } PMTState; -/* - * Abstract layer of periodic time, one short time. - */ -typedef void time_cb(struct vcpu *v, void *opaque); - -struct periodic_time { - char enabled; /* enabled */ - char one_shot; /* one shot time */ - char irq; - char first_injected; /* flag to prevent shadow window */ - u32 bind_vcpu; /* vcpu timer interrupt delivers to */ - u32 pending_intr_nr; /* the couner for pending timer interrupts */ - u32 period; /* frequency in ns */ - u64 period_cycles; /* frequency in cpu cycles */ - s_time_t scheduled; /* scheduled timer interrupt */ - u64 last_plt_gtime; /* platform time when last IRQ is injected */ - struct timer timer; /* ac_timer */ - time_cb *cb; - void *priv; /* ponit back to platform time source */ -}; - struct pl_time { /* platform time */ - struct periodic_time periodic_tm; - struct PITState vpit; - struct RTCState vrtc; - struct PMTState vpmt; + struct PITState vpit; + struct RTCState vrtc; + struct HPETState vhpet; + struct PMTState vpmt; }; -extern u64 hvm_get_guest_time(struct vcpu *v); -static inline int64_t hvm_get_clock(struct vcpu *v) -{ - return hvm_get_guest_time(v); -} - #define ticks_per_sec(v) (v->domain->arch.hvm_domain.tsc_frequency) -/* to hook the ioreq packet to get the PIT initialization info */ -extern void hvm_hooks_assist(struct vcpu *v); -extern void pickup_deactive_ticks(struct periodic_time *vpit); -extern struct periodic_time *create_periodic_time( - u32 period, char irq, char one_shot, time_cb *cb, void *data); -extern void destroy_periodic_time(struct periodic_time *pt); +void pt_freeze_time(struct vcpu *v); +void pt_thaw_time(struct vcpu *v); +void pt_timer_fn(void *data); +void pt_update_irq(struct vcpu *v); +struct periodic_time *is_pt_irq(struct vcpu *v, int vector, int type); +void pt_intr_post(struct vcpu *v, int vector, int type); +void pt_reset(struct vcpu *v); +void create_periodic_time(struct periodic_time *pt, uint64_t period, + uint8_t irq, char one_shot, time_cb *cb, void *data); +void destroy_periodic_time(struct periodic_time *pt); + int pv_pit_handler(int port, int data, int write); void pit_init(struct vcpu *v, unsigned long cpu_khz); +void pit_stop_channel0_irq(PITState * pit); +void pit_migrate_timers(struct vcpu *v); +void pit_deinit(struct domain *d); void rtc_init(struct vcpu *v, int base, int irq); -void rtc_deinit(struct domain *d); -void rtc_freeze(struct vcpu *v); -void rtc_thaw(struct vcpu *v); void rtc_migrate_timers(struct vcpu *v); +void rtc_deinit(struct domain *d); +int is_rtc_periodic_irq(void *opaque); void pmtimer_init(struct vcpu *v, int base); +void pmtimer_migrate_timers(struct vcpu *v); void pmtimer_deinit(struct domain *d); -void pt_timer_fn(void *data); -void pit_time_fired(struct vcpu *v, void *priv); + +void hpet_migrate_timers(struct vcpu *v); +void hpet_init(struct vcpu *v); +void hpet_deinit(struct domain *d); #endif /* __ASM_X86_HVM_VPT_H__ */ diff --git a/xen/include/asm-x86/hypercall.h b/xen/include/asm-x86/hypercall.h index 860fb2ce77..fa9476c1d3 100644 --- a/xen/include/asm-x86/hypercall.h +++ b/xen/include/asm-x86/hypercall.h @@ -8,6 +8,13 @@ #include <public/physdev.h> #include <xen/types.h> +/* + * Both do_mmuext_op() and do_mmu_update(): + * We steal the m.s.b. of the @count parameter to indicate whether this + * invocation of do_mmu_update() is resuming a previously preempted call. + */ +#define MMU_UPDATE_PREEMPTED (~(~0U>>1)) + extern long do_event_channel_op_compat( XEN_GUEST_HANDLE(evtchn_op_t) uop); @@ -116,4 +123,17 @@ do_set_callbacks( #endif +#ifdef CONFIG_COMPAT + +extern int +compat_physdev_op( + int cmd, + XEN_GUEST_HANDLE(void) arg); + +extern int +arch_compat_vcpu_op( + int cmd, struct vcpu *v, XEN_GUEST_HANDLE(void) arg); + +#endif + #endif /* __ASM_X86_HYPERCALL_H__ */ diff --git a/xen/include/asm-x86/init.h b/xen/include/asm-x86/init.h index 8f1e764080..5295b35e63 100644 --- a/xen/include/asm-x86/init.h +++ b/xen/include/asm-x86/init.h @@ -1,29 +1,4 @@ #ifndef _XEN_ASM_INIT_H #define _XEN_ASM_INIT_H -/* - * Mark functions and data as being only used at initialization - * or exit time. - */ -#define __init \ - __attribute__ ((__section__ (".init.text"))) -#define __exit \ - __attribute_used__ __attribute__ ((__section__(".text.exit"))) -#define __initdata \ - __attribute__ ((__section__ (".init.data"))) -#define __exitdata \ - __attribute_used__ __attribute__ ((__section__ (".data.exit"))) -#define __initsetup \ - __attribute_used__ __attribute__ ((__section__ (".setup.init"))) -#define __init_call \ - __attribute_used__ __attribute__ ((__section__ (".initcall.init"))) -#define __exit_call \ - __attribute_used__ __attribute__ ((__section__ (".exitcall.exit"))) - -/* For assembly routines -#define __INIT .section ".text.init","ax" -#define __FINIT .previous -#define __INITDATA .section ".data.init","aw" -*/ - #endif /* _XEN_ASM_INIT_H */ diff --git a/xen/include/asm-x86/ldt.h b/xen/include/asm-x86/ldt.h index 107e67720b..27f4750558 100644 --- a/xen/include/asm-x86/ldt.h +++ b/xen/include/asm-x86/ldt.h @@ -17,7 +17,8 @@ static inline void load_LDT(struct vcpu *v) else { cpu = smp_processor_id(); - desc = gdt_table + __LDT(cpu) - FIRST_RESERVED_GDT_ENTRY; + desc = (!IS_COMPAT(v->domain) ? gdt_table : compat_gdt_table) + + __LDT(cpu) - FIRST_RESERVED_GDT_ENTRY; _set_tssldt_desc(desc, LDT_VIRT_START(v), ents*8-1, 2); __asm__ __volatile__ ( "lldt %%ax" : : "a" (__LDT(cpu)<<3) ); } diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index 93d96df4fd..405a753b39 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -244,7 +244,7 @@ unsigned long pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab); #endif /* CONFIG_PAGING_LEVELS == 3 */ -int check_descriptor(struct desc_struct *d); +int check_descriptor(const struct domain *, struct desc_struct *d); /* * The MPT (machine->physical mapping table) is an array of word-sized @@ -257,7 +257,16 @@ int check_descriptor(struct desc_struct *d); #define INVALID_M2P_ENTRY (~0UL) #define VALID_M2P(_e) (!((_e) & (1UL<<(BITS_PER_LONG-1)))) +#ifdef CONFIG_COMPAT +#define compat_machine_to_phys_mapping ((unsigned int *)RDWR_COMPAT_MPT_VIRT_START) +#define set_gpfn_from_mfn(mfn, pfn) \ + ((void)(compat_disabled || \ + (mfn) >= (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) / 4 || \ + (compat_machine_to_phys_mapping[(mfn)] = (unsigned int)(pfn))), \ + machine_to_phys_mapping[(mfn)] = (pfn)) +#else #define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn)) +#endif #define get_gpfn_from_mfn(mfn) (machine_to_phys_mapping[(mfn)]) @@ -270,6 +279,11 @@ int check_descriptor(struct desc_struct *d); #define INVALID_MFN (~0UL) +#ifdef CONFIG_COMPAT +#define compat_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) +#define compat_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) +#endif + #ifdef MEMORY_GUARD void memguard_init(void); void memguard_guard_range(void *p, unsigned long l); @@ -307,7 +321,7 @@ void audit_domains(void); int new_guest_cr3(unsigned long pfn); void make_cr3(struct vcpu *v, unsigned long mfn); - +void update_cr3(struct vcpu *v); void propagate_page_fault(unsigned long addr, u16 error_code); int __sync_lazy_execstate(void); @@ -315,10 +329,20 @@ int __sync_lazy_execstate(void); /* Arch-specific portion of memory_op hypercall. */ long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg); long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg); +#ifdef CONFIG_COMPAT +int compat_arch_memory_op(int op, XEN_GUEST_HANDLE(void)); +int compat_subarch_memory_op(int op, XEN_GUEST_HANDLE(void)); +#endif int steal_page( struct domain *d, struct page_info *page, unsigned int memflags); int map_ldt_shadow_page(unsigned int); +#ifdef CONFIG_COMPAT +int setup_arg_xlat_area(struct vcpu *, l4_pgentry_t *); +#else +# define setup_arg_xlat_area(vcpu, l4tab) 0 +#endif + #endif /* __ASM_X86_MM_H__ */ diff --git a/xen/include/asm-x86/multicall.h b/xen/include/asm-x86/multicall.h index 1ce7866c3e..8414dca055 100644 --- a/xen/include/asm-x86/multicall.h +++ b/xen/include/asm-x86/multicall.h @@ -35,6 +35,31 @@ "r8", "r9", "r10", "r11" ); \ } while ( 0 ) +#define compat_multicall_call(_call) \ + do { \ + __asm__ __volatile__ ( \ + " movl "STR(COMPAT_MULTICALL_op)"(%0),%%eax; " \ + " leaq compat_hypercall_table(%%rip),%%rdi; " \ + " cmpl $("STR(NR_hypercalls)"),%%eax; " \ + " jae 2f; " \ + " movq (%%rdi,%%rax,8),%%rax; " \ + " movl "STR(COMPAT_MULTICALL_arg0)"(%0),%%edi; " \ + " movl "STR(COMPAT_MULTICALL_arg1)"(%0),%%esi; " \ + " movl "STR(COMPAT_MULTICALL_arg2)"(%0),%%edx; " \ + " movl "STR(COMPAT_MULTICALL_arg3)"(%0),%%ecx; " \ + " movl "STR(COMPAT_MULTICALL_arg4)"(%0),%%r8d; " \ + " callq *%%rax; " \ + "1: movl %%eax,"STR(COMPAT_MULTICALL_result)"(%0)\n"\ + ".section .fixup,\"ax\"\n" \ + "2: movl $-"STR(ENOSYS)",%%eax\n" \ + " jmp 1b\n" \ + ".previous\n" \ + : : "b" (_call) \ + /* all the caller-saves registers */ \ + : "rax", "rcx", "rdx", "rsi", "rdi", \ + "r8", "r9", "r10", "r11" ); \ + } while ( 0 ) + #else #define do_multicall_call(_call) \ diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h index e2bbc6c03f..53ddc287fc 100644 --- a/xen/include/asm-x86/page.h +++ b/xen/include/asm-x86/page.h @@ -206,6 +206,7 @@ typedef struct { u32 pfn; } pagetable_t; typedef struct { u64 pfn; } pagetable_t; #endif #define pagetable_get_paddr(x) ((paddr_t)(x).pfn << PAGE_SHIFT) +#define pagetable_get_page(x) mfn_to_page((x).pfn) #define pagetable_get_pfn(x) ((x).pfn) #define pagetable_is_null(x) ((x).pfn == 0) #define pagetable_from_pfn(pfn) ((pagetable_t) { (pfn) }) @@ -287,6 +288,10 @@ extern l2_pgentry_t idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES*L2_PAGETABLE_ENTRI #else extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES]; extern l2_pgentry_t idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES]; +#ifdef CONFIG_COMPAT +extern l2_pgentry_t *compat_idle_pg_table_l2; +extern unsigned int m2p_compat_vstart; +#endif #endif void paging_init(void); void setup_idle_pagetable(void); diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h index 7a18e0d3d7..0d1d2533ce 100644 --- a/xen/include/asm-x86/processor.h +++ b/xen/include/asm-x86/processor.h @@ -559,6 +559,12 @@ void show_execution_state(struct cpu_user_regs *regs); void show_page_walk(unsigned long addr); asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs); +#ifdef CONFIG_COMPAT +void compat_show_guest_stack(struct cpu_user_regs *, int lines); +#else +#define compat_show_guest_stack(regs, lines) ((void)0) +#endif + /* Dumps current register and stack state. */ #define dump_execution_state() \ /* NB. Needs interrupts enabled else we end up in fatal_trap(). */ \ diff --git a/xen/include/asm-x86/regs.h b/xen/include/asm-x86/regs.h index 48f8b86c85..90cfee68a8 100644 --- a/xen/include/asm-x86/regs.h +++ b/xen/include/asm-x86/regs.h @@ -38,7 +38,8 @@ enum EFLAGS { ASSERT(diff < STACK_SIZE); \ /* If a guest frame, it must be have guest privs (unless HVM guest). */ \ /* We permit CS==0 which can come from an uninitialised trap entry. */ \ - ASSERT((diff != 0) || vm86_mode(r) || ((r->cs&3) >= GUEST_KERNEL_RPL) || \ + ASSERT((diff != 0) || vm86_mode(r) || \ + ((r->cs&3) >= GUEST_KERNEL_RPL(current->domain)) || \ (r->cs == 0) || is_hvm_vcpu(current)); \ /* If not a guest frame, it must be a hypervisor frame. */ \ ASSERT((diff == 0) || (!vm86_mode(r) && (r->cs == __HYPERVISOR_CS))); \ diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h index f923f662de..2afc0b0562 100644 --- a/xen/include/asm-x86/shadow.h +++ b/xen/include/asm-x86/shadow.h @@ -29,20 +29,8 @@ #include <xen/domain_page.h> #include <asm/flushtlb.h> -/* How to make sure a page is not referred to in a shadow PT */ -/* This will need to be a for_each_vcpu if we go to per-vcpu shadows */ -#define shadow_drop_references(_d, _p) \ - shadow_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p))) -#define shadow_sync_and_drop_references(_d, _p) \ - shadow_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p))) - -/* How to add and remove entries in the p2m mapping. */ -#define guest_physmap_add_page(_d, _p, _m) \ - shadow_guest_physmap_add_page((_d), (_p), (_m)) -#define guest_physmap_remove_page(_d, _p, _m ) \ - shadow_guest_physmap_remove_page((_d), (_p), (_m)) - -/* Shadow PT operation mode : shadow-mode variable in arch_domain. */ +/***************************************************************************** + * Macros to tell which shadow paging mode a domain is in */ #define SHM2_shift 10 /* We're in one of the shadow modes */ @@ -64,106 +52,32 @@ #define shadow_mode_external(_d) ((_d)->arch.shadow.mode & SHM2_external) /* Xen traps & emulates all reads of all page table pages: - * not yet supported - */ + * not yet supported */ #define shadow_mode_trap_reads(_d) ({ (void)(_d); 0; }) -// How do we tell that we have a 32-bit PV guest in a 64-bit Xen? -#ifdef __x86_64__ -#define pv_32bit_guest(_v) 0 // not yet supported -#else -#define pv_32bit_guest(_v) !is_hvm_vcpu(v) -#endif -/* The shadow lock. +/****************************************************************************** + * The equivalent for a particular vcpu of a shadowed domain. */ + +/* Is this vcpu using the P2M table to translate between GFNs and MFNs? * - * This lock is per-domain. It is intended to allow us to make atomic - * updates to the software TLB that the shadow tables provide. - * - * Specifically, it protects: - * - all changes to shadow page table pages - * - the shadow hash table - * - the shadow page allocator - * - all changes to guest page table pages; if/when the notion of - * out-of-sync pages is added to this code, then the shadow lock is - * protecting all guest page table pages which are not listed as - * currently as both guest-writable and out-of-sync... - * XXX -- need to think about this relative to writable page tables. - * - all changes to the page_info->tlbflush_timestamp - * - the page_info->count fields on shadow pages - * - the shadow dirty bit array and count - * - XXX - */ -#ifndef CONFIG_SMP -#error shadow.h currently requires CONFIG_SMP -#endif + * This is true of translated HVM domains on a vcpu which has paging + * enabled. (HVM vcpus with paging disabled are using the p2m table as + * its paging table, so no translation occurs in this case.) + * It is also true for all vcpus of translated PV domains. */ +#define shadow_vcpu_mode_translate(_v) ((_v)->arch.shadow.translate_enabled) -#define shadow_lock_init(_d) \ - do { \ - spin_lock_init(&(_d)->arch.shadow.lock); \ - (_d)->arch.shadow.locker = -1; \ - (_d)->arch.shadow.locker_function = "nobody"; \ - } while (0) - -#define shadow_lock_is_acquired(_d) \ - (current->processor == (_d)->arch.shadow.locker) - -#define shadow_lock(_d) \ - do { \ - if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \ - { \ - printk("Error: shadow lock held by %s\n", \ - (_d)->arch.shadow.locker_function); \ - BUG(); \ - } \ - spin_lock(&(_d)->arch.shadow.lock); \ - ASSERT((_d)->arch.shadow.locker == -1); \ - (_d)->arch.shadow.locker = current->processor; \ - (_d)->arch.shadow.locker_function = __func__; \ - } while (0) - -#define shadow_unlock(_d) \ - do { \ - ASSERT((_d)->arch.shadow.locker == current->processor); \ - (_d)->arch.shadow.locker = -1; \ - (_d)->arch.shadow.locker_function = "nobody"; \ - spin_unlock(&(_d)->arch.shadow.lock); \ - } while (0) - -/* - * Levels of self-test and paranoia - * XXX should go in config files somewhere? +/* + * 32on64 support */ -#define SHADOW_AUDIT_HASH 0x01 /* Check current hash bucket */ -#define SHADOW_AUDIT_HASH_FULL 0x02 /* Check every hash bucket */ -#define SHADOW_AUDIT_ENTRIES 0x04 /* Check this walk's shadows */ -#define SHADOW_AUDIT_ENTRIES_FULL 0x08 /* Check every shadow */ -#define SHADOW_AUDIT_ENTRIES_MFNS 0x10 /* Check gfn-mfn map in shadows */ -#define SHADOW_AUDIT_P2M 0x20 /* Check the p2m table */ - -#ifdef NDEBUG -#define SHADOW_AUDIT 0 -#define SHADOW_AUDIT_ENABLE 0 +#ifdef __x86_64__ +#define pv_32bit_guest(_v) (!is_hvm_vcpu(_v) && IS_COMPAT((_v)->domain)) #else -#define SHADOW_AUDIT 0x15 /* Basic audit of all except p2m. */ -#define SHADOW_AUDIT_ENABLE shadow_audit_enable -extern int shadow_audit_enable; +#define pv_32bit_guest(_v) (!is_hvm_vcpu(_v)) #endif -/* - * Levels of optimization - * XXX should go in config files somewhere? - */ -#define SHOPT_WRITABLE_HEURISTIC 0x01 /* Guess at RW PTEs via linear maps */ -#define SHOPT_EARLY_UNSHADOW 0x02 /* Unshadow l1s on fork or exit */ -#define SHOPT_FAST_FAULT_PATH 0x04 /* Fast-path MMIO and not-present */ -#define SHOPT_PREFETCH 0x08 /* Shadow multiple entries per fault */ -#define SHOPT_LINUX_L3_TOPLEVEL 0x10 /* Pin l3es on early 64bit linux */ - -#define SHADOW_OPTIMIZATIONS 0x1f - - -/* With shadow pagetables, the different kinds of address start +/****************************************************************************** + * With shadow pagetables, the different kinds of address start * to get get confusing. * * Virtual addresses are what they usually are: the addresses that are used @@ -213,38 +127,16 @@ static inline _type _name##_x(_name##_t n) { return n; } #endif TYPE_SAFE(unsigned long,mfn) -#define SH_PRI_mfn "05lx" - -static inline mfn_t -pagetable_get_mfn(pagetable_t pt) -{ - return _mfn(pagetable_get_pfn(pt)); -} - -static inline pagetable_t -pagetable_from_mfn(mfn_t mfn) -{ - return pagetable_from_pfn(mfn_x(mfn)); -} -static inline int -shadow_vcpu_mode_translate(struct vcpu *v) -{ - // Returns true if this VCPU needs to be using the P2M table to translate - // between GFNs and MFNs. - // - // This is true of translated HVM domains on a vcpu which has paging - // enabled. (HVM vcpu's with paging disabled are using the p2m table as - // its paging table, so no translation occurs in this case.) - // - // It is also true for translated PV domains. - // - return v->arch.shadow.translate_enabled; -} +/* Macro for printk formats: use as printk("%"SH_PRI_mfn"\n", mfn_x(foo)); */ +#define SH_PRI_mfn "05lx" -/**************************************************************************/ -/* Mode-specific entry points into the shadow code */ +/***************************************************************************** + * Mode-specific entry points into the shadow code. + * + * These shouldn't be used directly by callers; rather use the functions + * below which will indirect through this table as appropriate. */ struct sh_emulate_ctxt; struct shadow_paging_mode { @@ -253,7 +145,7 @@ struct shadow_paging_mode { int (*invlpg )(struct vcpu *v, unsigned long va); paddr_t (*gva_to_gpa )(struct vcpu *v, unsigned long va); unsigned long (*gva_to_gfn )(struct vcpu *v, unsigned long va); - void (*update_cr3 )(struct vcpu *v); + void (*update_cr3 )(struct vcpu *v, int do_locking); int (*map_and_validate_gl1e )(struct vcpu *v, mfn_t gmfn, void *new_guest_entry, u32 size); int (*map_and_validate_gl2e )(struct vcpu *v, mfn_t gmfn, @@ -285,35 +177,30 @@ struct shadow_paging_mode { unsigned long *gl1mfn); void (*guest_get_eff_l1e )(struct vcpu *v, unsigned long va, void *eff_l1e); -#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC int (*guess_wrmap )(struct vcpu *v, unsigned long vaddr, mfn_t gmfn); -#endif /* For outsiders to tell what mode we're in */ unsigned int shadow_levels; unsigned int guest_levels; }; -static inline int shadow_guest_paging_levels(struct vcpu *v) -{ - ASSERT(v->arch.shadow.mode != NULL); - return v->arch.shadow.mode->guest_levels; -} -/**************************************************************************/ -/* Entry points into the shadow code */ +/***************************************************************************** + * Entry points into the shadow code */ -/* Enable arbitrary shadow mode. */ -int shadow_enable(struct domain *d, u32 mode); +/* Set up the shadow-specific parts of a domain struct at start of day. + * Called for every domain from arch_domain_create() */ +void shadow_domain_init(struct domain *d); -/* Turning on shadow test mode */ -int shadow_test_enable(struct domain *d); +/* Enable an arbitrary shadow mode. Call once at domain creation. */ +int shadow_enable(struct domain *d, u32 mode); -/* Handler for shadow control ops: enabling and disabling shadow modes, - * and log-dirty bitmap ops all happen through here. */ +/* Handler for shadow control ops: operations from user-space to enable + * and disable ephemeral shadow modes (test mode and log-dirty mode) and + * manipulate the log-dirty bitmap. */ int shadow_domctl(struct domain *d, - xen_domctl_shadow_op_t *sc, - XEN_GUEST_HANDLE(xen_domctl_t) u_domctl); + xen_domctl_shadow_op_t *sc, + XEN_GUEST_HANDLE(void) u_domctl); /* Call when destroying a domain */ void shadow_teardown(struct domain *d); @@ -321,164 +208,94 @@ void shadow_teardown(struct domain *d); /* Call once all of the references to the domain have gone away */ void shadow_final_teardown(struct domain *d); - -/* Mark a page as dirty in the bitmap */ -void sh_do_mark_dirty(struct domain *d, mfn_t gmfn); +/* Mark a page as dirty in the log-dirty bitmap: called when Xen + * makes changes to guest memory on its behalf. */ +void shadow_mark_dirty(struct domain *d, mfn_t gmfn); +/* Cleaner version so we don't pepper shadow_mode tests all over the place */ static inline void mark_dirty(struct domain *d, unsigned long gmfn) { - if ( likely(!shadow_mode_log_dirty(d)) ) - return; - - shadow_lock(d); - sh_do_mark_dirty(d, _mfn(gmfn)); - shadow_unlock(d); -} - -/* Internal version, for when the shadow lock is already held */ -static inline void sh_mark_dirty(struct domain *d, mfn_t gmfn) -{ - ASSERT(shadow_lock_is_acquired(d)); if ( unlikely(shadow_mode_log_dirty(d)) ) - sh_do_mark_dirty(d, gmfn); + shadow_mark_dirty(d, _mfn(gmfn)); } -static inline int -shadow_fault(unsigned long va, struct cpu_user_regs *regs) -/* Called from pagefault handler in Xen, and from the HVM trap handlers +/* Handle page-faults caused by the shadow pagetable mechanisms. + * Called from pagefault handler in Xen, and from the HVM trap handlers * for pagefaults. Returns 1 if this fault was an artefact of the * shadow code (and the guest should retry) or 0 if it is not (and the * fault should be handled elsewhere or passed to the guest). */ +static inline int shadow_fault(unsigned long va, struct cpu_user_regs *regs) { struct vcpu *v = current; perfc_incrc(shadow_fault); return v->arch.shadow.mode->page_fault(v, va, regs); } -static inline int -shadow_invlpg(struct vcpu *v, unsigned long va) -/* Called when the guest requests an invlpg. Returns 1 if the invlpg - * instruction should be issued on the hardware, or 0 if it's safe not - * to do so. */ +/* Handle invlpg requests on shadowed vcpus. + * Returns 1 if the invlpg instruction should be issued on the hardware, + * or 0 if it's safe not to do so. */ +static inline int shadow_invlpg(struct vcpu *v, unsigned long va) { return v->arch.shadow.mode->invlpg(v, va); } -static inline paddr_t -shadow_gva_to_gpa(struct vcpu *v, unsigned long va) -/* Called to translate a guest virtual address to what the *guest* - * pagetables would map it to. */ +/* Translate a guest virtual address to the physical address that the + * *guest* pagetables would map it to. */ +static inline paddr_t shadow_gva_to_gpa(struct vcpu *v, unsigned long va) { if ( unlikely(!shadow_vcpu_mode_translate(v)) ) return (paddr_t) va; return v->arch.shadow.mode->gva_to_gpa(v, va); } -static inline unsigned long -shadow_gva_to_gfn(struct vcpu *v, unsigned long va) -/* Called to translate a guest virtual address to what the *guest* - * pagetables would map it to. */ +/* Translate a guest virtual address to the frame number that the + * *guest* pagetables would map it to. */ +static inline unsigned long shadow_gva_to_gfn(struct vcpu *v, unsigned long va) { if ( unlikely(!shadow_vcpu_mode_translate(v)) ) return va >> PAGE_SHIFT; return v->arch.shadow.mode->gva_to_gfn(v, va); } -static inline void -shadow_update_cr3(struct vcpu *v) -/* Updates all the things that are derived from the guest's CR3. - * Called when the guest changes CR3. */ -{ - shadow_lock(v->domain); - v->arch.shadow.mode->update_cr3(v); - shadow_unlock(v->domain); -} - - -/* Should be called after CR3 is updated. - * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3. - * - * Also updates other state derived from CR3 (vcpu->arch.guest_vtable, - * shadow_vtable, etc). - * - * Uses values found in vcpu->arch.(guest_table and guest_table_user), and - * for HVM guests, arch.monitor_table and hvm's guest CR3. - * - * Update ref counts to shadow tables appropriately. - */ -static inline void update_cr3(struct vcpu *v) +/* Update all the things that are derived from the guest's CR3. + * Called when the guest changes CR3; the caller can then use v->arch.cr3 + * as the value to load into the host CR3 to schedule this vcpu */ +static inline void shadow_update_cr3(struct vcpu *v) { - unsigned long cr3_mfn=0; - - if ( shadow_mode_enabled(v->domain) ) - { - shadow_update_cr3(v); - return; - } - -#if CONFIG_PAGING_LEVELS == 4 - if ( !(v->arch.flags & TF_kernel_mode) ) - cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user); - else -#endif - cr3_mfn = pagetable_get_pfn(v->arch.guest_table); - - make_cr3(v, cr3_mfn); + v->arch.shadow.mode->update_cr3(v, 1); } -extern void sh_update_paging_modes(struct vcpu *v); - -/* Should be called to initialise paging structures if the paging mode +/* Update all the things that are derived from the guest's CR0/CR3/CR4. + * Called to initialize paging structures if the paging mode * has changed, and when bringing up a VCPU for the first time. */ -static inline void shadow_update_paging_modes(struct vcpu *v) -{ - ASSERT(shadow_mode_enabled(v->domain)); - shadow_lock(v->domain); - sh_update_paging_modes(v); - shadow_unlock(v->domain); -} +void shadow_update_paging_modes(struct vcpu *v); -static inline void -shadow_detach_old_tables(struct vcpu *v) -{ - if ( v->arch.shadow.mode ) - v->arch.shadow.mode->detach_old_tables(v); -} -static inline mfn_t -shadow_make_monitor_table(struct vcpu *v) -{ - return v->arch.shadow.mode->make_monitor_table(v); -} - -static inline void -shadow_destroy_monitor_table(struct vcpu *v, mfn_t mmfn) -{ - v->arch.shadow.mode->destroy_monitor_table(v, mmfn); -} +/***************************************************************************** + * Access to the guest pagetables */ +/* Get a mapping of a PV guest's l1e for this virtual address. */ static inline void * guest_map_l1e(struct vcpu *v, unsigned long addr, unsigned long *gl1mfn) { - if ( likely(!shadow_mode_translate(v->domain)) ) - { - l2_pgentry_t l2e; - ASSERT(!shadow_mode_external(v->domain)); - /* Find this l1e and its enclosing l1mfn in the linear map */ - if ( __copy_from_user(&l2e, - &__linear_l2_table[l2_linear_offset(addr)], - sizeof(l2_pgentry_t)) != 0 ) - return NULL; - /* Check flags that it will be safe to read the l1e */ - if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) - != _PAGE_PRESENT ) - return NULL; - *gl1mfn = l2e_get_pfn(l2e); - return &__linear_l1_table[l1_linear_offset(addr)]; - } + l2_pgentry_t l2e; - return v->arch.shadow.mode->guest_map_l1e(v, addr, gl1mfn); + if ( unlikely(shadow_mode_translate(v->domain)) ) + return v->arch.shadow.mode->guest_map_l1e(v, addr, gl1mfn); + + /* Find this l1e and its enclosing l1mfn in the linear map */ + if ( __copy_from_user(&l2e, + &__linear_l2_table[l2_linear_offset(addr)], + sizeof(l2_pgentry_t)) != 0 ) + return NULL; + /* Check flags that it will be safe to read the l1e */ + if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) + != _PAGE_PRESENT ) + return NULL; + *gl1mfn = l2e_get_pfn(l2e); + return &__linear_l1_table[l1_linear_offset(addr)]; } +/* Pull down the mapping we got from guest_map_l1e() */ static inline void guest_unmap_l1e(struct vcpu *v, void *p) { @@ -486,6 +303,7 @@ guest_unmap_l1e(struct vcpu *v, void *p) unmap_domain_page(p); } +/* Read the guest's l1e that maps this address. */ static inline void guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) { @@ -502,6 +320,8 @@ guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) v->arch.shadow.mode->guest_get_eff_l1e(v, addr, eff_l1e); } +/* Read the guest's l1e that maps this address, from the kernel-mode + * pagetables. */ static inline void guest_get_eff_kern_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) { @@ -517,82 +337,36 @@ guest_get_eff_kern_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) TOGGLE_MODE(); } - -/* Validate a pagetable change from the guest and update the shadows. */ -extern int shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, - void *new_guest_entry); -extern int __shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, - void *entry, u32 size); - -/* Update the shadows in response to a pagetable write from a HVM guest */ -extern void shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, - void *entry, u32 size); - -/* Remove all writeable mappings of a guest frame from the shadows. - * Returns non-zero if we need to flush TLBs. - * level and fault_addr desribe how we found this to be a pagetable; - * level==0 means we have some other reason for revoking write access. */ -extern int shadow_remove_write_access(struct vcpu *v, mfn_t readonly_mfn, - unsigned int level, - unsigned long fault_addr); - -/* Remove all mappings of the guest mfn from the shadows. - * Returns non-zero if we need to flush TLBs. */ -extern int shadow_remove_all_mappings(struct vcpu *v, mfn_t target_mfn); - -/* Remove all mappings from the shadows. */ -extern void shadow_blow_tables(struct domain *d); - -void -shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn); -/* This is a HVM page that we thing is no longer a pagetable. - * Unshadow it, and recursively unshadow pages that reference it. */ - -/* Remove all shadows of the guest mfn. */ -extern void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all); -static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn) +/* Write a new value into the guest pagetable, and update the shadows + * appropriately. Returns 0 if we page-faulted, 1 for success. */ +int shadow_write_guest_entry(struct vcpu *v, intpte_t *p, + intpte_t new, mfn_t gmfn); + +/* Cmpxchg a new value into the guest pagetable, and update the shadows + * appropriately. Returns 0 if we page-faulted, 1 if not. + * N.B. caller should check the value of "old" to see if the + * cmpxchg itself was successful. */ +int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p, + intpte_t *old, intpte_t new, mfn_t gmfn); + +/* Remove all mappings of the guest page from the shadows. + * This is called from common code. It does not flush TLBs. */ +int sh_remove_all_mappings(struct vcpu *v, mfn_t target_mfn); +static inline void +shadow_drop_references(struct domain *d, struct page_info *p) { - int was_locked = shadow_lock_is_acquired(v->domain); - if ( !was_locked ) - shadow_lock(v->domain); - sh_remove_shadows(v, gmfn, 0, 1); - if ( !was_locked ) - shadow_unlock(v->domain); + /* See the comment about locking in sh_remove_all_mappings */ + sh_remove_all_mappings(d->vcpu[0], _mfn(page_to_mfn(p))); } -/* Add a page to a domain */ -void -shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn, - unsigned long mfn); - -/* Remove a page from a domain */ -void -shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn, - unsigned long mfn); - -/* - * Allocation of shadow pages - */ - -/* Return the minumum acceptable number of shadow pages a domain needs */ -unsigned int shadow_min_acceptable_pages(struct domain *d); - -/* Set the pool of shadow pages to the required number of MB. - * Input will be rounded up to at least min_acceptable_shadow_pages(). - * Returns 0 for success, 1 for failure. */ -unsigned int shadow_set_allocation(struct domain *d, - unsigned int megabytes, - int *preempted); - -/* Return the size of the shadow pool, rounded up to the nearest MB */ -static inline unsigned int shadow_get_allocation(struct domain *d) +/* Remove all shadows of the guest mfn. */ +void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all); +static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn) { - unsigned int pg = d->arch.shadow.total_pages; - return ((pg >> (20 - PAGE_SHIFT)) - + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0)); + /* See the comment about locking in sh_remove_shadows */ + sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */); } - /**************************************************************************/ /* Guest physmap (p2m) support * @@ -601,9 +375,20 @@ static inline unsigned int shadow_get_allocation(struct domain *d) * guests, so we steal the address space that would have normally * been used by the read-only MPT map. */ - #define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START) +/* Add a page to a domain's p2m table */ +void shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn, + unsigned long mfn); + +/* Remove a page from a domain's p2m table */ +void shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn, + unsigned long mfn); + +/* Aliases, called from common code. */ +#define guest_physmap_add_page shadow_guest_physmap_add_page +#define guest_physmap_remove_page shadow_guest_physmap_remove_page + /* Read the current domain's P2M table. */ static inline mfn_t sh_gfn_to_mfn_current(unsigned long gfn) { @@ -626,8 +411,8 @@ static inline mfn_t sh_gfn_to_mfn_current(unsigned long gfn) return _mfn(INVALID_MFN); } -/* Walk another domain's P2M table, mapping pages as we go */ -extern mfn_t sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); +/* Read another domain's P2M table, mapping pages as we go */ +mfn_t sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); /* General conversion function from gfn to mfn */ static inline mfn_t @@ -665,6 +450,7 @@ mmio_space(paddr_t gpa) return !mfn_valid(mfn_x(sh_gfn_to_mfn_current(gfn))); } +/* Translate the frame number held in an l1e from guest to machine */ static inline l1_pgentry_t gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e) { @@ -684,4 +470,3 @@ gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e) * indent-tabs-mode: nil * End: */ - diff --git a/xen/include/asm-x86/shared.h b/xen/include/asm-x86/shared.h new file mode 100644 index 0000000000..32275dd285 --- /dev/null +++ b/xen/include/asm-x86/shared.h @@ -0,0 +1,78 @@ +#ifndef __XEN_X86_SHARED_H__ +#define __XEN_X86_SHARED_H__ + +#ifdef CONFIG_COMPAT + +#define nmi_reason(d) (!IS_COMPAT(d) ? \ + (void *)&(d)->shared_info->native.arch.nmi_reason : \ + (void *)&(d)->shared_info->compat.arch.nmi_reason) + +#define GET_SET_SHARED(type, field) \ +static inline type arch_get_##field(const struct domain *d) \ +{ \ + return !IS_COMPAT(d) ? \ + d->shared_info->native.arch.field : \ + d->shared_info->compat.arch.field; \ +} \ +static inline void arch_set_##field(struct domain *d, \ + type val) \ +{ \ + if ( !IS_COMPAT(d) ) \ + d->shared_info->native.arch.field = val; \ + else \ + d->shared_info->compat.arch.field = val; \ +} + +#define GET_SET_VCPU(type, field) \ +static inline type arch_get_##field(const struct vcpu *v) \ +{ \ + return !IS_COMPAT(v->domain) ? \ + v->vcpu_info->native.arch.field : \ + v->vcpu_info->compat.arch.field; \ +} \ +static inline void arch_set_##field(struct vcpu *v, \ + type val) \ +{ \ + if ( !IS_COMPAT(v->domain) ) \ + v->vcpu_info->native.arch.field = val; \ + else \ + v->vcpu_info->compat.arch.field = val; \ +} + +#else + +#define nmi_reason(d) ((void *)&(d)->shared_info->arch.nmi_reason) + +#define GET_SET_SHARED(type, field) \ +static inline type arch_get_##field(const struct domain *d) \ +{ \ + return d->shared_info->arch.field; \ +} \ +static inline void arch_set_##field(struct domain *d, \ + type val) \ +{ \ + d->shared_info->arch.field = val; \ +} + +#define GET_SET_VCPU(type, field) \ +static inline type arch_get_##field(const struct vcpu *v) \ +{ \ + return v->vcpu_info->arch.field; \ +} \ +static inline void arch_set_##field(struct vcpu *v, \ + type val) \ +{ \ + v->vcpu_info->arch.field = val; \ +} +#endif + +GET_SET_SHARED(unsigned long, max_pfn) +GET_SET_SHARED(xen_pfn_t, pfn_to_mfn_frame_list_list) +GET_SET_SHARED(unsigned long, nmi_reason) + +GET_SET_VCPU(unsigned long, cr2) + +#undef GET_SET_VCPU +#undef GET_SET_SHARED + +#endif /* __XEN_X86_SHARED_H__ */ diff --git a/xen/include/asm-x86/system.h b/xen/include/asm-x86/system.h index 5166d3f656..cd297534d6 100644 --- a/xen/include/asm-x86/system.h +++ b/xen/include/asm-x86/system.h @@ -19,7 +19,7 @@ #define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) struct __xchg_dummy { unsigned long a[100]; }; -#define __xg(x) ((struct __xchg_dummy *)(x)) +#define __xg(x) ((volatile struct __xchg_dummy *)(x)) /* diff --git a/xen/include/asm-x86/uaccess.h b/xen/include/asm-x86/uaccess.h index e3bc3b48bf..e4b0cffcab 100644 --- a/xen/include/asm-x86/uaccess.h +++ b/xen/include/asm-x86/uaccess.h @@ -140,7 +140,7 @@ extern void __put_user_bad(void); }) struct __large_struct { unsigned long buf[100]; }; -#define __m(x) (*(struct __large_struct *)(x)) +#define __m(x) (*(const struct __large_struct *)(x)) /* * Tell gcc we read from memory instead of writing: this is because @@ -200,16 +200,16 @@ __copy_to_user(void __user *to, const void *from, unsigned long n) switch (n) { case 1: - __put_user_size(*(u8 *)from, (u8 __user *)to, 1, ret, 1); + __put_user_size(*(const u8 *)from, (u8 __user *)to, 1, ret, 1); return ret; case 2: - __put_user_size(*(u16 *)from, (u16 __user *)to, 2, ret, 2); + __put_user_size(*(const u16 *)from, (u16 __user *)to, 2, ret, 2); return ret; case 4: - __put_user_size(*(u32 *)from, (u32 __user *)to, 4, ret, 4); + __put_user_size(*(const u32 *)from, (u32 __user *)to, 4, ret, 4); return ret; case 8: - __put_user_size(*(u64 *)from, (u64 __user *)to, 8, ret, 8); + __put_user_size(*(const u64 *)from, (u64 __user *)to, 8, ret, 8); return ret; } } diff --git a/xen/include/asm-x86/x86_32/kexec.h b/xen/include/asm-x86/x86_32/kexec.h index aa6db02088..cb4ada715e 100644 --- a/xen/include/asm-x86/x86_32/kexec.h +++ b/xen/include/asm-x86/x86_32/kexec.h @@ -10,7 +10,6 @@ #include <xen/types.h> #include <xen/kexec.h> -#include <asm/fixmap.h> typedef asmlinkage void (*relocate_new_kernel_t)( unsigned long indirection_page, diff --git a/xen/include/asm-x86/x86_32/page-2level.h b/xen/include/asm-x86/x86_32/page-2level.h index 14a1d6411f..79e25024b4 100644 --- a/xen/include/asm-x86/x86_32/page-2level.h +++ b/xen/include/asm-x86/x86_32/page-2level.h @@ -42,7 +42,7 @@ typedef l2_pgentry_t root_pgentry_t; /* misc */ #define is_guest_l1_slot(_s) (1) -#define is_guest_l2_slot(_t,_s) ((_s) < L2_PAGETABLE_FIRST_XEN_SLOT) +#define is_guest_l2_slot(_d, _t,_s) ((_s) < L2_PAGETABLE_FIRST_XEN_SLOT) /* * PTE pfn and flags: diff --git a/xen/include/asm-x86/x86_32/page-3level.h b/xen/include/asm-x86/x86_32/page-3level.h index 6837933675..59ac4e1190 100644 --- a/xen/include/asm-x86/x86_32/page-3level.h +++ b/xen/include/asm-x86/x86_32/page-3level.h @@ -67,7 +67,7 @@ typedef l3_pgentry_t root_pgentry_t; /* misc */ #define is_guest_l1_slot(s) (1) -#define is_guest_l2_slot(t,s) \ +#define is_guest_l2_slot(d,t,s) \ ( !((t) & PGT_pae_xen_l2) || \ ((s) < (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES - 1))) ) #define is_guest_l3_slot(s) (1) diff --git a/xen/include/asm-x86/x86_32/regs.h b/xen/include/asm-x86/x86_32/regs.h index 7306b65af5..c65c57ffe4 100644 --- a/xen/include/asm-x86/x86_32/regs.h +++ b/xen/include/asm-x86/x86_32/regs.h @@ -17,7 +17,7 @@ ((dpl) >= (vm86_mode(r) ? 3 : ((r)->cs & 3))) /* Check for null trap callback handler: Is the selector null (0-3)? */ -#define null_trap_bounce(tb) (((tb)->cs & ~3) == 0) +#define null_trap_bounce(v, tb) (((tb)->cs & ~3) == 0) /* Number of bytes of on-stack execution state to be context-switched. */ #define CTXT_SWITCH_STACK_BYTES (sizeof(struct cpu_user_regs)) diff --git a/xen/include/asm-x86/x86_32/uaccess.h b/xen/include/asm-x86/x86_32/uaccess.h index 1ea7f86cae..947593fd08 100644 --- a/xen/include/asm-x86/x86_32/uaccess.h +++ b/xen/include/asm-x86/x86_32/uaccess.h @@ -83,7 +83,7 @@ do { \ case 2: __get_user_asm(x,ptr,retval,"w","w","=r",errret);break; \ case 4: __get_user_asm(x,ptr,retval,"l","","=r",errret);break; \ case 8: __get_user_u64(x,ptr,retval,errret);break; \ - default: (x) = __get_user_bad(); \ + default: __get_user_bad(); \ } \ } while (0) diff --git a/xen/include/asm-x86/x86_64/kexec.h b/xen/include/asm-x86/x86_64/kexec.h index b9779aa9d7..c54dbb9ba6 100644 --- a/xen/include/asm-x86/x86_64/kexec.h +++ b/xen/include/asm-x86/x86_64/kexec.h @@ -10,7 +10,6 @@ #include <xen/types.h> #include <xen/kexec.h> -#include <asm/fixmap.h> typedef void (*relocate_new_kernel_t)( unsigned long indirection_page, diff --git a/xen/include/asm-x86/x86_64/page.h b/xen/include/asm-x86/x86_64/page.h index e6dcb1c025..ecda3a7bf5 100644 --- a/xen/include/asm-x86/x86_64/page.h +++ b/xen/include/asm-x86/x86_64/page.h @@ -54,7 +54,10 @@ typedef l4_pgentry_t root_pgentry_t; #define l4_linear_offset(_a) (((_a) & VADDR_MASK) >> L4_PAGETABLE_SHIFT) #define is_guest_l1_slot(_s) (1) -#define is_guest_l2_slot(_t, _s) (1) +#define is_guest_l2_slot(_d, _t, _s) \ + ( !IS_COMPAT(_d) || \ + !((_t) & PGT_pae_xen_l2) || \ + ((_s) < COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(_d)) ) #define is_guest_l3_slot(_s) (1) #define is_guest_l4_slot(_s) \ (((_s) < ROOT_PAGETABLE_FIRST_XEN_SLOT) || \ @@ -93,6 +96,8 @@ typedef l4_pgentry_t root_pgentry_t; #define L3_DISALLOW_MASK (BASE_DISALLOW_MASK | 0x180U /* must-be-zero */) #define L4_DISALLOW_MASK (BASE_DISALLOW_MASK | 0x180U /* must-be-zero */) +#define COMPAT_L3_DISALLOW_MASK L3_DISALLOW_MASK + #define PAGE_HYPERVISOR (__PAGE_HYPERVISOR | _PAGE_GLOBAL) #define PAGE_HYPERVISOR_NOCACHE (__PAGE_HYPERVISOR_NOCACHE | _PAGE_GLOBAL) diff --git a/xen/include/asm-x86/x86_64/regs.h b/xen/include/asm-x86/x86_64/regs.h index 5dadccdab1..5b3d6df972 100644 --- a/xen/include/asm-x86/x86_64/regs.h +++ b/xen/include/asm-x86/x86_64/regs.h @@ -11,13 +11,16 @@ #define ring_3(r) (((r)->cs & 3) == 3) #define guest_kernel_mode(v, r) \ - (ring_3(r) && ((v)->arch.flags & TF_kernel_mode)) + (!IS_COMPAT((v)->domain) ? \ + ring_3(r) && ((v)->arch.flags & TF_kernel_mode) : \ + ring_1(r)) #define permit_softint(dpl, v, r) \ ((dpl) >= (guest_kernel_mode(v, r) ? 1 : 3)) /* Check for null trap callback handler: Is the EIP null? */ -#define null_trap_bounce(tb) ((tb)->eip == 0) +#define null_trap_bounce(v, tb) \ + (!IS_COMPAT((v)->domain) ? (tb)->eip == 0 : ((tb)->cs & ~3) == 0) /* Number of bytes of on-stack execution state to be context-switched. */ /* NB. Segment registers and bases are not saved/restored on x86/64 stack. */ diff --git a/xen/include/asm-x86/x86_64/uaccess.h b/xen/include/asm-x86/x86_64/uaccess.h index 4d5f65c890..565a33c81d 100644 --- a/xen/include/asm-x86/x86_64/uaccess.h +++ b/xen/include/asm-x86/x86_64/uaccess.h @@ -15,6 +15,19 @@ #define array_access_ok(addr, count, size) (__addr_ok(addr)) +#ifdef CONFIG_COMPAT + +#define __compat_addr_ok(addr) \ + ((unsigned long)(addr) < HYPERVISOR_COMPAT_VIRT_START(current->domain)) + +#define compat_access_ok(addr, size) __compat_addr_ok((addr) + (size)) + +#define compat_array_access_ok(addr,count,size) \ + (likely((count) < (~0U / (size))) && \ + compat_access_ok(addr, (count) * (size))) + +#endif + #define __put_user_size(x,ptr,size,retval,errret) \ do { \ retval = 0; \ @@ -35,7 +48,7 @@ do { \ case 2: __get_user_asm(x,ptr,retval,"w","w","=r",errret);break; \ case 4: __get_user_asm(x,ptr,retval,"l","k","=r",errret);break; \ case 8: __get_user_asm(x,ptr,retval,"q","","=r",errret); break; \ - default: (x) = __get_user_bad(); \ + default: __get_user_bad(); \ } \ } while (0) diff --git a/xen/include/asm-x86/x86_emulate.h b/xen/include/asm-x86/x86_emulate.h index 89a42da7b0..e1d6f1d50d 100644 --- a/xen/include/asm-x86/x86_emulate.h +++ b/xen/include/asm-x86/x86_emulate.h @@ -134,32 +134,18 @@ struct cpu_user_regs; struct x86_emulate_ctxt { /* Register state before/after emulation. */ - struct cpu_user_regs *regs; + struct cpu_user_regs *regs; - /* Emulated execution mode, represented by an X86EMUL_MODE value. */ - int mode; + /* Default address size in current execution mode (2, 4, or 8). */ + int address_bytes; }; -/* Execution mode, passed to the emulator. */ -#define X86EMUL_MODE_REAL 0 /* Real mode. */ -#define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */ -#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ -#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ - -/* Host execution mode. */ -#if defined(__i386__) -#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 -#elif defined(__x86_64__) -#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 -#endif - /* - * x86_emulate_memop: Emulate an instruction that faulted attempting to - * read/write a 'special' memory area. + * x86_emulate: Emulate an instruction. * Returns -1 on failure, 0 on success. */ int -x86_emulate_memop( +x86_emulate( struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops); diff --git a/xen/include/public/arch-x86/xen-x86_32.h b/xen/include/public/arch-x86/xen-x86_32.h new file mode 100644 index 0000000000..54959faf55 --- /dev/null +++ b/xen/include/public/arch-x86/xen-x86_32.h @@ -0,0 +1,151 @@ +/****************************************************************************** + * xen-x86_32.h + * + * Guest OS interface to x86 32-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ +#define __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ + +/* + * Hypercall interface: + * Input: %ebx, %ecx, %edx, %esi, %edi (arguments 1-5) + * Output: %eax + * Access is via hypercall page (set up by guest loader or via a Xen MSR): + * call hypercall_page + hypercall-number * 32 + * Clobbered: Argument registers (e.g., 2-arg hypercall clobbers %ebx,%ecx) + */ + +#if __XEN_INTERFACE_VERSION__ < 0x00030203 +/* + * Legacy hypercall interface: + * As above, except the entry sequence to the hypervisor is: + * mov $hypercall-number*32,%eax ; int $0x82 + */ +#define TRAP_INSTR "int $0x82" +#endif + +/* + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ +#define FLAT_RING1_CS 0xe019 /* GDT index 259 */ +#define FLAT_RING1_DS 0xe021 /* GDT index 260 */ +#define FLAT_RING1_SS 0xe021 /* GDT index 260 */ +#define FLAT_RING3_CS 0xe02b /* GDT index 261 */ +#define FLAT_RING3_DS 0xe033 /* GDT index 262 */ +#define FLAT_RING3_SS 0xe033 /* GDT index 262 */ + +#define FLAT_KERNEL_CS FLAT_RING1_CS +#define FLAT_KERNEL_DS FLAT_RING1_DS +#define FLAT_KERNEL_SS FLAT_RING1_SS +#define FLAT_USER_CS FLAT_RING3_CS +#define FLAT_USER_DS FLAT_RING3_DS +#define FLAT_USER_SS FLAT_RING3_SS + +/* + * Virtual addresses beyond this are not modifiable by guest OSes. The + * machine->physical mapping table starts at this address, read-only. + */ +#ifdef CONFIG_X86_PAE +#define __HYPERVISOR_VIRT_START 0xF5800000 +#define __MACH2PHYS_VIRT_START 0xF5800000 +#define __MACH2PHYS_VIRT_END 0xF6800000 +#else +#define __HYPERVISOR_VIRT_START 0xFC000000 +#define __MACH2PHYS_VIRT_START 0xFC000000 +#define __MACH2PHYS_VIRT_END 0xFC400000 +#endif + +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#endif + +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2) +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START) +#endif + +#ifndef __ASSEMBLY__ + +struct cpu_user_regs { + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t esi; + uint32_t edi; + uint32_t ebp; + uint32_t eax; + uint16_t error_code; /* private */ + uint16_t entry_vector; /* private */ + uint32_t eip; + uint16_t cs; + uint8_t saved_upcall_mask; + uint8_t _pad0; + uint32_t eflags; /* eflags.IF == !saved_upcall_mask */ + uint32_t esp; + uint16_t ss, _pad1; + uint16_t es, _pad2; + uint16_t ds, _pad3; + uint16_t fs, _pad4; + uint16_t gs, _pad5; +}; +typedef struct cpu_user_regs cpu_user_regs_t; +DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); + +/* + * Page-directory addresses above 4GB do not fit into architectural %cr3. + * When accessing %cr3, or equivalent field in vcpu_guest_context, guests + * must use the following accessor macros to pack/unpack valid MFNs. + */ +#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) +#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */ +}; +typedef struct arch_vcpu_info arch_vcpu_info_t; + +struct xen_callback { + unsigned long cs; + unsigned long eip; +}; +typedef struct xen_callback xen_callback_t; + +#endif /* !__ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/include/public/arch-x86/xen-x86_64.h b/xen/include/public/arch-x86/xen-x86_64.h new file mode 100644 index 0000000000..80135d25ca --- /dev/null +++ b/xen/include/public/arch-x86/xen-x86_64.h @@ -0,0 +1,211 @@ +/****************************************************************************** + * xen-x86_64.h + * + * Guest OS interface to x86 64-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ +#define __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ + +/* + * Hypercall interface: + * Input: %rdi, %rsi, %rdx, %r10, %r8 (arguments 1-5) + * Output: %rax + * Access is via hypercall page (set up by guest loader or via a Xen MSR): + * call hypercall_page + hypercall-number * 32 + * Clobbered: argument registers (e.g., 2-arg hypercall clobbers %rdi,%rsi) + */ + +#if __XEN_INTERFACE_VERSION__ < 0x00030203 +/* + * Legacy hypercall interface: + * As above, except the entry sequence to the hypervisor is: + * mov $hypercall-number*32,%eax ; syscall + * Clobbered: %rcx, %r11, argument registers (as above) + */ +#define TRAP_INSTR "syscall" +#endif + +/* + * 64-bit segment selectors + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ + +#define FLAT_RING3_CS32 0xe023 /* GDT index 260 */ +#define FLAT_RING3_CS64 0xe033 /* GDT index 261 */ +#define FLAT_RING3_DS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_DS64 0x0000 /* NULL selector */ +#define FLAT_RING3_SS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_SS64 0xe02b /* GDT index 262 */ + +#define FLAT_KERNEL_DS64 FLAT_RING3_DS64 +#define FLAT_KERNEL_DS32 FLAT_RING3_DS32 +#define FLAT_KERNEL_DS FLAT_KERNEL_DS64 +#define FLAT_KERNEL_CS64 FLAT_RING3_CS64 +#define FLAT_KERNEL_CS32 FLAT_RING3_CS32 +#define FLAT_KERNEL_CS FLAT_KERNEL_CS64 +#define FLAT_KERNEL_SS64 FLAT_RING3_SS64 +#define FLAT_KERNEL_SS32 FLAT_RING3_SS32 +#define FLAT_KERNEL_SS FLAT_KERNEL_SS64 + +#define FLAT_USER_DS64 FLAT_RING3_DS64 +#define FLAT_USER_DS32 FLAT_RING3_DS32 +#define FLAT_USER_DS FLAT_USER_DS64 +#define FLAT_USER_CS64 FLAT_RING3_CS64 +#define FLAT_USER_CS32 FLAT_RING3_CS32 +#define FLAT_USER_CS FLAT_USER_CS64 +#define FLAT_USER_SS64 FLAT_RING3_SS64 +#define FLAT_USER_SS32 FLAT_RING3_SS32 +#define FLAT_USER_SS FLAT_USER_SS64 + +#define __HYPERVISOR_VIRT_START 0xFFFF800000000000 +#define __HYPERVISOR_VIRT_END 0xFFFF880000000000 +#define __MACH2PHYS_VIRT_START 0xFFFF800000000000 +#define __MACH2PHYS_VIRT_END 0xFFFF804000000000 + +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) +#endif + +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) +#endif + +#ifndef __ASSEMBLY__ + +/* + * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) + * @which == SEGBASE_* ; @base == 64-bit base address + * Returns 0 on success. + */ +#define SEGBASE_FS 0 +#define SEGBASE_GS_USER 1 +#define SEGBASE_GS_KERNEL 2 +#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */ + +/* + * int HYPERVISOR_iret(void) + * All arguments are on the kernel stack, in the following format. + * Never returns if successful. Current kernel context is lost. + * The saved CS is mapped as follows: + * RING0 -> RING3 kernel mode. + * RING1 -> RING3 kernel mode. + * RING2 -> RING3 kernel mode. + * RING3 -> RING3 user mode. + * However RING0 indicates that the guest kernel should return to iteself + * directly with + * orb $3,1*8(%rsp) + * iretq + * If flags contains VGCF_in_syscall: + * Restore RAX, RIP, RFLAGS, RSP. + * Discard R11, RCX, CS, SS. + * Otherwise: + * Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP. + * All other registers are saved on hypercall entry and restored to user. + */ +/* Guest exited in SYSCALL context? Return to guest with SYSRET? */ +#define _VGCF_in_syscall 8 +#define VGCF_in_syscall (1<<_VGCF_in_syscall) +#define VGCF_IN_SYSCALL VGCF_in_syscall +struct iret_context { + /* Top of stack (%rsp at point of hypercall). */ + uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; + /* Bottom of iret stack frame. */ +}; + +#ifdef __GNUC__ +/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */ +#define __DECL_REG(name) union { \ + uint64_t r ## name, e ## name; \ + uint32_t _e ## name; \ +} +#else +/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */ +#define __DECL_REG(name) uint64_t r ## name +#endif + +struct cpu_user_regs { + uint64_t r15; + uint64_t r14; + uint64_t r13; + uint64_t r12; + __DECL_REG(bp); + __DECL_REG(bx); + uint64_t r11; + uint64_t r10; + uint64_t r9; + uint64_t r8; + __DECL_REG(ax); + __DECL_REG(cx); + __DECL_REG(dx); + __DECL_REG(si); + __DECL_REG(di); + uint32_t error_code; /* private */ + uint32_t entry_vector; /* private */ + __DECL_REG(ip); + uint16_t cs, _pad0[1]; + uint8_t saved_upcall_mask; + uint8_t _pad1[3]; + __DECL_REG(flags); /* rflags.IF == !saved_upcall_mask */ + __DECL_REG(sp); + uint16_t ss, _pad2[3]; + uint16_t es, _pad3[3]; + uint16_t ds, _pad4[3]; + uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */ + uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */ +}; +typedef struct cpu_user_regs cpu_user_regs_t; +DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); + +#undef __DECL_REG + +#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12) +#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12) + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ +}; +typedef struct arch_vcpu_info arch_vcpu_info_t; + +typedef unsigned long xen_callback_t; + +#endif /* !__ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/include/public/arch-x86/xen.h b/xen/include/public/arch-x86/xen.h new file mode 100644 index 0000000000..34cbdb7c59 --- /dev/null +++ b/xen/include/public/arch-x86/xen.h @@ -0,0 +1,200 @@ +/****************************************************************************** + * arch-x86/xen.h + * + * Guest OS interface to x86 Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_XEN_H__ +#define __XEN_PUBLIC_ARCH_X86_XEN_H__ + +/* Structural guest handles introduced in 0x00030201. */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030201 +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef struct { type *p; } __guest_handle_ ## name +#else +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef type * __guest_handle_ ## name +#endif + +#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) +#define XEN_GUEST_HANDLE(name) __guest_handle_ ## name +#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) +#ifdef __XEN_TOOLS__ +#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) +#endif + +#ifndef __ASSEMBLY__ +/* Guest handles for primitive C types. */ +__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); +__DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); +__DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long); +DEFINE_XEN_GUEST_HANDLE(char); +DEFINE_XEN_GUEST_HANDLE(int); +DEFINE_XEN_GUEST_HANDLE(long); +DEFINE_XEN_GUEST_HANDLE(void); + +typedef unsigned long xen_pfn_t; +DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); +#endif + +#if defined(__i386__) +#include "xen-x86_32.h" +#elif defined(__x86_64__) +#include "xen-x86_64.h" +#endif + +/* + * SEGMENT DESCRIPTOR TABLES + */ +/* + * A number of GDT entries are reserved by Xen. These are not situated at the + * start of the GDT because some stupid OSes export hard-coded selector values + * in their ABI. These hard-coded values are always near the start of the GDT, + * so Xen places itself out of the way, at the far end of the GDT. + */ +#define FIRST_RESERVED_GDT_PAGE 14 +#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) +#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) + +/* Maximum number of virtual CPUs in multi-processor guests. */ +#define MAX_VIRT_CPUS 32 + +#ifndef __ASSEMBLY__ + +typedef unsigned long xen_ulong_t; + +/* + * Send an array of these to HYPERVISOR_set_trap_table(). + * The privilege level specifies which modes may enter a trap via a software + * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate + * privilege levels as follows: + * Level == 0: Noone may enter + * Level == 1: Kernel may enter + * Level == 2: Kernel may enter + * Level == 3: Everyone may enter + */ +#define TI_GET_DPL(_ti) ((_ti)->flags & 3) +#define TI_GET_IF(_ti) ((_ti)->flags & 4) +#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl)) +#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2)) +struct trap_info { + uint8_t vector; /* exception vector */ + uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ + uint16_t cs; /* code selector */ + unsigned long address; /* code offset */ +}; +typedef struct trap_info trap_info_t; +DEFINE_XEN_GUEST_HANDLE(trap_info_t); + +typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ + +/* + * The following is all CPU context. Note that the fpu_ctxt block is filled + * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. + */ +struct vcpu_guest_context { + /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ + struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ +#define VGCF_I387_VALID (1<<0) +#define VGCF_IN_KERNEL (1<<2) +#define _VGCF_i387_valid 0 +#define VGCF_i387_valid (1<<_VGCF_i387_valid) +#define _VGCF_in_kernel 2 +#define VGCF_in_kernel (1<<_VGCF_in_kernel) +#define _VGCF_failsafe_disables_events 3 +#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) +#define _VGCF_syscall_disables_events 4 +#define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events) + unsigned long flags; /* VGCF_* flags */ + struct cpu_user_regs user_regs; /* User-level CPU registers */ + struct trap_info trap_ctxt[256]; /* Virtual IDT */ + unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ + unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ + unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ + unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */ + unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ +#ifdef __i386__ + unsigned long event_callback_cs; /* CS:EIP of event callback */ + unsigned long event_callback_eip; + unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ + unsigned long failsafe_callback_eip; +#else + unsigned long event_callback_eip; + unsigned long failsafe_callback_eip; +#ifdef __XEN__ + union { + unsigned long syscall_callback_eip; + struct { + unsigned int event_callback_cs; /* compat CS of event cb */ + unsigned int failsafe_callback_cs; /* compat CS of failsafe cb */ + }; + }; +#else + unsigned long syscall_callback_eip; +#endif +#endif + unsigned long vm_assist; /* VMASST_TYPE_* bitmap */ +#ifdef __x86_64__ + /* Segment base addresses. */ + uint64_t fs_base; + uint64_t gs_base_kernel; + uint64_t gs_base_user; +#endif +}; +typedef struct vcpu_guest_context vcpu_guest_context_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); + +struct arch_shared_info { + unsigned long max_pfn; /* max pfn that appears in table */ + /* Frame containing list of mfns containing list of mfns containing p2m. */ + xen_pfn_t pfn_to_mfn_frame_list_list; + unsigned long nmi_reason; + uint64_t pad[32]; +}; +typedef struct arch_shared_info arch_shared_info_t; + +#endif /* !__ASSEMBLY__ */ + +/* + * Prefix forces emulation of some non-trapping instructions. + * Currently only CPUID. + */ +#ifdef __ASSEMBLY__ +#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; +#define XEN_CPUID XEN_EMULATE_PREFIX cpuid +#else +#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " +#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" +#endif + +#endif /* __XEN_PUBLIC_ARCH_X86_XEN_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/include/public/arch-x86_32.h b/xen/include/public/arch-x86_32.h index 42651c20b6..45842b2034 100644 --- a/xen/include/public/arch-x86_32.h +++ b/xen/include/public/arch-x86_32.h @@ -21,250 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * - * Copyright (c) 2004, K A Fraser + * Copyright (c) 2004-2006, K A Fraser */ -#ifndef __XEN_PUBLIC_ARCH_X86_32_H__ -#define __XEN_PUBLIC_ARCH_X86_32_H__ - -/* - * Hypercall interface: - * Input: %ebx, %ecx, %edx, %esi, %edi (arguments 1-5) - * Output: %eax - * Access is via hypercall page (set up by guest loader or via a Xen MSR): - * call hypercall_page + hypercall-number * 32 - * Clobbered: Argument registers (e.g., 2-arg hypercall clobbers %ebx,%ecx) - */ - -#if __XEN_INTERFACE_VERSION__ < 0x00030203 -/* - * Legacy hypercall interface: - * As above, except the entry sequence to the hypervisor is: - * mov $hypercall-number*32,%eax ; int $0x82 - */ -#define TRAP_INSTR "int $0x82" -#endif - -/* Structural guest handles introduced in 0x00030201. */ -#if __XEN_INTERFACE_VERSION__ >= 0x00030201 -#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ - typedef struct { type *p; } __guest_handle_ ## name -#else -#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ - typedef type * __guest_handle_ ## name -#endif - -#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) -#define XEN_GUEST_HANDLE(name) __guest_handle_ ## name -#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) -#ifdef __XEN_TOOLS__ -#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) -#endif - -#ifndef __ASSEMBLY__ -/* Guest handles for primitive C types. */ -__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); -__DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); -__DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long); -DEFINE_XEN_GUEST_HANDLE(char); -DEFINE_XEN_GUEST_HANDLE(int); -DEFINE_XEN_GUEST_HANDLE(long); -DEFINE_XEN_GUEST_HANDLE(void); - -typedef unsigned long xen_pfn_t; -DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); -#endif - -/* - * SEGMENT DESCRIPTOR TABLES - */ -/* - * A number of GDT entries are reserved by Xen. These are not situated at the - * start of the GDT because some stupid OSes export hard-coded selector values - * in their ABI. These hard-coded values are always near the start of the GDT, - * so Xen places itself out of the way, at the far end of the GDT. - */ -#define FIRST_RESERVED_GDT_PAGE 14 -#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) -#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) - -/* - * These flat segments are in the Xen-private section of every GDT. Since these - * are also present in the initial GDT, many OSes will be able to avoid - * installing their own GDT. - */ -#define FLAT_RING1_CS 0xe019 /* GDT index 259 */ -#define FLAT_RING1_DS 0xe021 /* GDT index 260 */ -#define FLAT_RING1_SS 0xe021 /* GDT index 260 */ -#define FLAT_RING3_CS 0xe02b /* GDT index 261 */ -#define FLAT_RING3_DS 0xe033 /* GDT index 262 */ -#define FLAT_RING3_SS 0xe033 /* GDT index 262 */ - -#define FLAT_KERNEL_CS FLAT_RING1_CS -#define FLAT_KERNEL_DS FLAT_RING1_DS -#define FLAT_KERNEL_SS FLAT_RING1_SS -#define FLAT_USER_CS FLAT_RING3_CS -#define FLAT_USER_DS FLAT_RING3_DS -#define FLAT_USER_SS FLAT_RING3_SS - -/* - * Virtual addresses beyond this are not modifiable by guest OSes. The - * machine->physical mapping table starts at this address, read-only. - */ -#ifdef CONFIG_X86_PAE -#define __HYPERVISOR_VIRT_START 0xF5800000 -#define __MACH2PHYS_VIRT_START 0xF5800000 -#define __MACH2PHYS_VIRT_END 0xF6800000 -#else -#define __HYPERVISOR_VIRT_START 0xFC000000 -#define __MACH2PHYS_VIRT_START 0xFC000000 -#define __MACH2PHYS_VIRT_END 0xFC400000 -#endif - -#ifndef HYPERVISOR_VIRT_START -#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) -#endif - -#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) -#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) -#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2) -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START) -#endif - -/* Maximum number of virtual CPUs in multi-processor guests. */ -#define MAX_VIRT_CPUS 32 - -#ifndef __ASSEMBLY__ - -typedef unsigned long xen_ulong_t; - -/* - * Send an array of these to HYPERVISOR_set_trap_table() - */ -#define TI_GET_DPL(_ti) ((_ti)->flags & 3) -#define TI_GET_IF(_ti) ((_ti)->flags & 4) -#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl)) -#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2)) -struct trap_info { - uint8_t vector; /* exception vector */ - uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ - uint16_t cs; /* code selector */ - unsigned long address; /* code offset */ -}; -typedef struct trap_info trap_info_t; -DEFINE_XEN_GUEST_HANDLE(trap_info_t); - -struct cpu_user_regs { - uint32_t ebx; - uint32_t ecx; - uint32_t edx; - uint32_t esi; - uint32_t edi; - uint32_t ebp; - uint32_t eax; - uint16_t error_code; /* private */ - uint16_t entry_vector; /* private */ - uint32_t eip; - uint16_t cs; - uint8_t saved_upcall_mask; - uint8_t _pad0; - uint32_t eflags; /* eflags.IF == !saved_upcall_mask */ - uint32_t esp; - uint16_t ss, _pad1; - uint16_t es, _pad2; - uint16_t ds, _pad3; - uint16_t fs, _pad4; - uint16_t gs, _pad5; -}; -typedef struct cpu_user_regs cpu_user_regs_t; -DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); - -typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ - -/* - * The following is all CPU context. Note that the fpu_ctxt block is filled - * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. - */ -struct vcpu_guest_context { - /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ - struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ -#define VGCF_I387_VALID (1<<0) -#define VGCF_IN_KERNEL (1<<2) -#define _VGCF_i387_valid 0 -#define VGCF_i387_valid (1<<_VGCF_i387_valid) -#define _VGCF_in_kernel 2 -#define VGCF_in_kernel (1<<_VGCF_in_kernel) -#define _VGCF_failsafe_disables_events 3 -#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) - unsigned long flags; /* VGCF_* flags */ - struct cpu_user_regs user_regs; /* User-level CPU registers */ - struct trap_info trap_ctxt[256]; /* Virtual IDT */ - unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ - unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ - unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ - unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */ - unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ - unsigned long event_callback_cs; /* CS:EIP of event callback */ - unsigned long event_callback_eip; - unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ - unsigned long failsafe_callback_eip; - unsigned long vm_assist; /* VMASST_TYPE_* bitmap */ -}; -typedef struct vcpu_guest_context vcpu_guest_context_t; -DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); - -/* - * Page-directory addresses above 4GB do not fit into architectural %cr3. - * When accessing %cr3, or equivalent field in vcpu_guest_context, guests - * must use the following accessor macros to pack/unpack valid MFNs. - */ -#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) -#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) - -struct arch_shared_info { - unsigned long max_pfn; /* max pfn that appears in table */ - /* Frame containing list of mfns containing list of mfns containing p2m. */ - xen_pfn_t pfn_to_mfn_frame_list_list; - unsigned long nmi_reason; - uint64_t pad[32]; -}; -typedef struct arch_shared_info arch_shared_info_t; - -struct arch_vcpu_info { - unsigned long cr2; - unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */ -}; -typedef struct arch_vcpu_info arch_vcpu_info_t; - -struct xen_callback { - unsigned long cs; - unsigned long eip; -}; -typedef struct xen_callback xen_callback_t; - -#endif /* !__ASSEMBLY__ */ - -/* - * Prefix forces emulation of some non-trapping instructions. - * Currently only CPUID. - */ -#ifdef __ASSEMBLY__ -#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; -#define XEN_CPUID XEN_EMULATE_PREFIX cpuid -#else -#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " -#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" -#endif - -#endif - -/* - * Local variables: - * mode: C - * c-set-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ +#include "arch-x86/xen.h" diff --git a/xen/include/public/arch-x86_64.h b/xen/include/public/arch-x86_64.h index a60bc204ef..fbb263999a 100644 --- a/xen/include/public/arch-x86_64.h +++ b/xen/include/public/arch-x86_64.h @@ -21,319 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * - * Copyright (c) 2004, K A Fraser + * Copyright (c) 2004-2006, K A Fraser */ -#ifndef __XEN_PUBLIC_ARCH_X86_64_H__ -#define __XEN_PUBLIC_ARCH_X86_64_H__ - -/* - * Hypercall interface: - * Input: %rdi, %rsi, %rdx, %r10, %r8 (arguments 1-5) - * Output: %rax - * Access is via hypercall page (set up by guest loader or via a Xen MSR): - * call hypercall_page + hypercall-number * 32 - * Clobbered: argument registers (e.g., 2-arg hypercall clobbers %rdi,%rsi) - */ - -#if __XEN_INTERFACE_VERSION__ < 0x00030203 -/* - * Legacy hypercall interface: - * As above, except the entry sequence to the hypervisor is: - * mov $hypercall-number*32,%eax ; syscall - * Clobbered: %rcx, %r11, argument registers (as above) - */ -#define TRAP_INSTR "syscall" -#endif - -/* Structural guest handles introduced in 0x00030201. */ -#if __XEN_INTERFACE_VERSION__ >= 0x00030201 -#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ - typedef struct { type *p; } __guest_handle_ ## name -#else -#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ - typedef type * __guest_handle_ ## name -#endif - -#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) -#define XEN_GUEST_HANDLE(name) __guest_handle_ ## name -#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) -#ifdef __XEN_TOOLS__ -#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) -#endif - -#ifndef __ASSEMBLY__ -/* Guest handles for primitive C types. */ -__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); -__DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); -__DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long); -DEFINE_XEN_GUEST_HANDLE(char); -DEFINE_XEN_GUEST_HANDLE(int); -DEFINE_XEN_GUEST_HANDLE(long); -DEFINE_XEN_GUEST_HANDLE(void); - -typedef unsigned long xen_pfn_t; -DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); -#endif - -/* - * SEGMENT DESCRIPTOR TABLES - */ -/* - * A number of GDT entries are reserved by Xen. These are not situated at the - * start of the GDT because some stupid OSes export hard-coded selector values - * in their ABI. These hard-coded values are always near the start of the GDT, - * so Xen places itself out of the way, at the far end of the GDT. - */ -#define FIRST_RESERVED_GDT_PAGE 14 -#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) -#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) - -/* - * 64-bit segment selectors - * These flat segments are in the Xen-private section of every GDT. Since these - * are also present in the initial GDT, many OSes will be able to avoid - * installing their own GDT. - */ - -#define FLAT_RING3_CS32 0xe023 /* GDT index 260 */ -#define FLAT_RING3_CS64 0xe033 /* GDT index 261 */ -#define FLAT_RING3_DS32 0xe02b /* GDT index 262 */ -#define FLAT_RING3_DS64 0x0000 /* NULL selector */ -#define FLAT_RING3_SS32 0xe02b /* GDT index 262 */ -#define FLAT_RING3_SS64 0xe02b /* GDT index 262 */ - -#define FLAT_KERNEL_DS64 FLAT_RING3_DS64 -#define FLAT_KERNEL_DS32 FLAT_RING3_DS32 -#define FLAT_KERNEL_DS FLAT_KERNEL_DS64 -#define FLAT_KERNEL_CS64 FLAT_RING3_CS64 -#define FLAT_KERNEL_CS32 FLAT_RING3_CS32 -#define FLAT_KERNEL_CS FLAT_KERNEL_CS64 -#define FLAT_KERNEL_SS64 FLAT_RING3_SS64 -#define FLAT_KERNEL_SS32 FLAT_RING3_SS32 -#define FLAT_KERNEL_SS FLAT_KERNEL_SS64 - -#define FLAT_USER_DS64 FLAT_RING3_DS64 -#define FLAT_USER_DS32 FLAT_RING3_DS32 -#define FLAT_USER_DS FLAT_USER_DS64 -#define FLAT_USER_CS64 FLAT_RING3_CS64 -#define FLAT_USER_CS32 FLAT_RING3_CS32 -#define FLAT_USER_CS FLAT_USER_CS64 -#define FLAT_USER_SS64 FLAT_RING3_SS64 -#define FLAT_USER_SS32 FLAT_RING3_SS32 -#define FLAT_USER_SS FLAT_USER_SS64 - -#define __HYPERVISOR_VIRT_START 0xFFFF800000000000 -#define __HYPERVISOR_VIRT_END 0xFFFF880000000000 -#define __MACH2PHYS_VIRT_START 0xFFFF800000000000 -#define __MACH2PHYS_VIRT_END 0xFFFF804000000000 - -#ifndef HYPERVISOR_VIRT_START -#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) -#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) -#endif - -#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) -#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) -#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) -#endif - -/* Maximum number of virtual CPUs in multi-processor guests. */ -#define MAX_VIRT_CPUS 32 - -#ifndef __ASSEMBLY__ - -typedef unsigned long xen_ulong_t; - -/* - * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) - * @which == SEGBASE_* ; @base == 64-bit base address - * Returns 0 on success. - */ -#define SEGBASE_FS 0 -#define SEGBASE_GS_USER 1 -#define SEGBASE_GS_KERNEL 2 -#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */ - -/* - * int HYPERVISOR_iret(void) - * All arguments are on the kernel stack, in the following format. - * Never returns if successful. Current kernel context is lost. - * The saved CS is mapped as follows: - * RING0 -> RING3 kernel mode. - * RING1 -> RING3 kernel mode. - * RING2 -> RING3 kernel mode. - * RING3 -> RING3 user mode. - * However RING0 indicates that the guest kernel should return to iteself - * directly with - * orb $3,1*8(%rsp) - * iretq - * If flags contains VGCF_in_syscall: - * Restore RAX, RIP, RFLAGS, RSP. - * Discard R11, RCX, CS, SS. - * Otherwise: - * Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP. - * All other registers are saved on hypercall entry and restored to user. - */ -/* Guest exited in SYSCALL context? Return to guest with SYSRET? */ -#define _VGCF_in_syscall 8 -#define VGCF_in_syscall (1<<_VGCF_in_syscall) -#define VGCF_IN_SYSCALL VGCF_in_syscall -struct iret_context { - /* Top of stack (%rsp at point of hypercall). */ - uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; - /* Bottom of iret stack frame. */ -}; - -/* - * Send an array of these to HYPERVISOR_set_trap_table(). - * N.B. As in x86/32 mode, the privilege level specifies which modes may enter - * a trap via a software interrupt. Since rings 1 and 2 are unavailable, we - * allocate privilege levels as follows: - * Level == 0: Noone may enter - * Level == 1: Kernel may enter - * Level == 2: Kernel may enter - * Level == 3: Everyone may enter - */ -#define TI_GET_DPL(_ti) ((_ti)->flags & 3) -#define TI_GET_IF(_ti) ((_ti)->flags & 4) -#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl)) -#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2)) -struct trap_info { - uint8_t vector; /* exception vector */ - uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ - uint16_t cs; /* code selector */ - unsigned long address; /* code offset */ -}; -typedef struct trap_info trap_info_t; -DEFINE_XEN_GUEST_HANDLE(trap_info_t); - -#ifdef __GNUC__ -/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */ -#define __DECL_REG(name) union { uint64_t r ## name, e ## name; } -#else -/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */ -#define __DECL_REG(name) uint64_t r ## name -#endif - -struct cpu_user_regs { - uint64_t r15; - uint64_t r14; - uint64_t r13; - uint64_t r12; - __DECL_REG(bp); - __DECL_REG(bx); - uint64_t r11; - uint64_t r10; - uint64_t r9; - uint64_t r8; - __DECL_REG(ax); - __DECL_REG(cx); - __DECL_REG(dx); - __DECL_REG(si); - __DECL_REG(di); - uint32_t error_code; /* private */ - uint32_t entry_vector; /* private */ - __DECL_REG(ip); - uint16_t cs, _pad0[1]; - uint8_t saved_upcall_mask; - uint8_t _pad1[3]; - __DECL_REG(flags); /* rflags.IF == !saved_upcall_mask */ - __DECL_REG(sp); - uint16_t ss, _pad2[3]; - uint16_t es, _pad3[3]; - uint16_t ds, _pad4[3]; - uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */ - uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */ -}; -typedef struct cpu_user_regs cpu_user_regs_t; -DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); - -#undef __DECL_REG - -typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ - -/* - * The following is all CPU context. Note that the fpu_ctxt block is filled - * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. - */ -struct vcpu_guest_context { - /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ - struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ -#define VGCF_I387_VALID (1<<0) -#define VGCF_IN_KERNEL (1<<2) -#define _VGCF_i387_valid 0 -#define VGCF_i387_valid (1<<_VGCF_i387_valid) -#define _VGCF_in_kernel 2 -#define VGCF_in_kernel (1<<_VGCF_in_kernel) -#define _VGCF_failsafe_disables_events 3 -#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) -#define _VGCF_syscall_disables_events 4 -#define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events) - unsigned long flags; /* VGCF_* flags */ - struct cpu_user_regs user_regs; /* User-level CPU registers */ - struct trap_info trap_ctxt[256]; /* Virtual IDT */ - unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ - unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ - unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ - unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */ - unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ - unsigned long event_callback_eip; - unsigned long failsafe_callback_eip; - unsigned long syscall_callback_eip; - unsigned long vm_assist; /* VMASST_TYPE_* bitmap */ - /* Segment base addresses. */ - uint64_t fs_base; - uint64_t gs_base_kernel; - uint64_t gs_base_user; -}; -typedef struct vcpu_guest_context vcpu_guest_context_t; -DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); - -#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12) -#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12) - -struct arch_shared_info { - unsigned long max_pfn; /* max pfn that appears in table */ - /* Frame containing list of mfns containing list of mfns containing p2m. */ - xen_pfn_t pfn_to_mfn_frame_list_list; - unsigned long nmi_reason; - uint64_t pad[32]; -}; -typedef struct arch_shared_info arch_shared_info_t; - -struct arch_vcpu_info { - unsigned long cr2; - unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ -}; -typedef struct arch_vcpu_info arch_vcpu_info_t; - -typedef unsigned long xen_callback_t; - -#endif /* !__ASSEMBLY__ */ - -/* - * Prefix forces emulation of some non-trapping instructions. - * Currently only CPUID. - */ -#ifdef __ASSEMBLY__ -#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; -#define XEN_CPUID XEN_EMULATE_PREFIX cpuid -#else -#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " -#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" -#endif - -#endif - -/* - * Local variables: - * mode: C - * c-set-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ +#include "arch-x86/xen.h" diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h index 588d430511..d9b23d5b75 100644 --- a/xen/include/public/domctl.h +++ b/xen/include/public/domctl.h @@ -53,6 +53,8 @@ struct xen_domctl_createdomain { /* Is this an HVM guest (as opposed to a PV guest)? */ #define _XEN_DOMCTL_CDF_hvm_guest 0 #define XEN_DOMCTL_CDF_hvm_guest (1U<<_XEN_DOMCTL_CDF_hvm_guest) +#define XEN_DOMCTL_CDF_WORDSIZE_MASK 255 +#define XEN_DOMCTL_CDF_WORDSIZE_SHIFT 24 uint32_t flags; }; typedef struct xen_domctl_createdomain xen_domctl_createdomain_t; @@ -392,6 +394,9 @@ struct xen_domctl_real_mode_area { typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t); +#define XEN_DOMCTL_set_compat 42 +#define XEN_DOMCTL_set_native 43 + struct xen_domctl { uint32_t cmd; uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */ diff --git a/xen/include/public/hvm/ioreq.h b/xen/include/public/hvm/ioreq.h index 0d3dc3277a..351801a9b2 100644 --- a/xen/include/public/hvm/ioreq.h +++ b/xen/include/public/hvm/ioreq.h @@ -56,6 +56,7 @@ struct ioreq { uint8_t dir:1; /* 1=read, 0=write */ uint8_t df:1; uint8_t type; /* I/O type */ + uint8_t _pad0[6]; uint64_t io_count; /* How many IO done on a vcpu */ }; typedef struct ioreq ioreq_t; @@ -74,8 +75,8 @@ typedef struct shared_iopage shared_iopage_t; #define IOREQ_BUFFER_SLOT_NUM 80 struct buffered_iopage { - unsigned long read_pointer; - unsigned long write_pointer; + unsigned int read_pointer; + unsigned int write_pointer; ioreq_t ioreq[IOREQ_BUFFER_SLOT_NUM]; }; /* sizeof this structure must be in one page */ typedef struct buffered_iopage buffered_iopage_t; diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h index 1b810df364..de365d6459 100644 --- a/xen/include/public/xen.h +++ b/xen/include/public/xen.h @@ -29,10 +29,8 @@ #include "xen-compat.h" -#if defined(__i386__) -#include "arch-x86_32.h" -#elif defined(__x86_64__) -#include "arch-x86_64.h" +#if defined(__i386__) || defined(__x86_64__) +#include "arch-x86/xen.h" #elif defined(__ia64__) #include "arch-ia64.h" #elif defined(__powerpc__) @@ -410,7 +408,9 @@ struct vcpu_info { struct arch_vcpu_info arch; struct vcpu_time_info time; }; /* 64 bytes (x86) */ +#ifndef __XEN__ typedef struct vcpu_info vcpu_info_t; +#endif /* * Xen/kernel shared data -- pointer provided in start_info. @@ -468,7 +468,9 @@ struct shared_info { struct arch_shared_info arch; }; +#ifndef __XEN__ typedef struct shared_info shared_info_t; +#endif /* * Start-of-day memory layout for the initial domain (DOM0): diff --git a/xen/include/public/xenoprof.h b/xen/include/public/xenoprof.h index 709b60fa1f..d16d8ca3f6 100644 --- a/xen/include/public/xenoprof.h +++ b/xen/include/public/xenoprof.h @@ -74,8 +74,10 @@ struct xenoprof_buf { uint64_t lost_samples; struct event_log event_log[1]; }; +#ifndef __XEN__ typedef struct xenoprof_buf xenoprof_buf_t; DEFINE_XEN_GUEST_HANDLE(xenoprof_buf_t); +#endif struct xenoprof_init { int32_t num_events; diff --git a/xen/include/xen/compat.h b/xen/include/xen/compat.h new file mode 100644 index 0000000000..06771a1c23 --- /dev/null +++ b/xen/include/xen/compat.h @@ -0,0 +1,180 @@ +/****************************************************************************** + * compat.h + */ + +#ifndef __XEN_COMPAT_H__ +#define __XEN_COMPAT_H__ + +#include <xen/config.h> + +#ifdef CONFIG_COMPAT + +#include <xen/types.h> +#include <asm/compat.h> +#include <compat/xlat.h> + +#define __DEFINE_COMPAT_HANDLE(name, type) \ + typedef struct { \ + compat_ptr_t c; \ + type *_[0] __attribute__((__packed__)); \ + } __compat_handle_ ## name + +#define DEFINE_COMPAT_HANDLE(name) __DEFINE_COMPAT_HANDLE(name, name) +#define COMPAT_HANDLE(name) __compat_handle_ ## name + +/* Is the compat handle a NULL reference? */ +#define compat_handle_is_null(hnd) ((hnd).c == 0) + +/* Offset the given compat handle into the array it refers to. */ +#define compat_handle_add_offset(hnd, nr) \ + ((hnd).c += (nr) * sizeof(**(hnd)._)) + +/* Cast a compat handle to the specified type of handle. */ +#define compat_handle_cast(chnd, type) ({ \ + type *_x = (__typeof__(**(chnd)._) *)(full_ptr_t)(chnd).c; \ + (XEN_GUEST_HANDLE(type)) { _x }; \ +}) + +#define guest_from_compat_handle(ghnd, chnd) \ + set_xen_guest_handle(ghnd, \ + (__typeof__(**(chnd)._) *)(full_ptr_t)(chnd).c) + +/* + * Copy an array of objects to guest context via a compat handle, + * specifying an offset into the guest array. + */ +#define copy_to_compat_offset(hnd, off, ptr, nr) ({ \ + const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \ + const typeof(*(ptr)) *const _y = (ptr); \ + copy_to_user(_x + (off), _y, sizeof(*_x) * (nr)); \ +}) + +/* + * Copy an array of objects from guest context via a compat handle, + * specifying an offset into the guest array. + */ +#define copy_from_compat_offset(ptr, hnd, off, nr) ({ \ + const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \ + const typeof(ptr) _y = (ptr); \ + copy_from_user(_y, _x + (off), sizeof(*_x) * (nr)); \ +}) + +#define copy_to_compat(hnd, ptr, nr) \ + copy_to_compat_offset(hnd, 0, ptr, nr) + +#define copy_from_compat(ptr, hnd, nr) \ + copy_from_compat_offset(ptr, hnd, 0, nr) + +/* Copy sub-field of a structure to guest context via a compat handle. */ +#define copy_field_to_compat(hnd, ptr, field) ({ \ + typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \ + const typeof((ptr)->field) *const _y = &(ptr)->field; \ + copy_to_user(_x, _y, sizeof(*_x)); \ +}) + +/* Copy sub-field of a structure from guest context via a compat handle. */ +#define copy_field_from_compat(ptr, hnd, field) ({ \ + typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \ + typeof((ptr)->field) *const _y = &(ptr)->field; \ + copy_from_user(_y, _x, sizeof(*_x)); \ +}) + +/* + * Pre-validate a guest handle. + * Allows use of faster __copy_* functions. + */ +#define compat_handle_okay(hnd, nr) \ + compat_array_access_ok((void *)(full_ptr_t)(hnd).c, (nr), sizeof(**(hnd)._)) + +#define __copy_to_compat_offset(hnd, off, ptr, nr) ({ \ + const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \ + const typeof(*(ptr)) *const _y = (ptr); \ + __copy_to_user(_x + (off), _y, sizeof(*_x) * (nr)); \ +}) + +#define __copy_from_compat_offset(ptr, hnd, off, nr) ({ \ + const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \ + const typeof(ptr) _y = (ptr); \ + __copy_from_user(_y, _x + (off), sizeof(*_x) * (nr)); \ +}) + +#define __copy_to_compat(hnd, ptr, nr) \ + __copy_to_compat_offset(hnd, 0, ptr, nr) + +#define __copy_from_compat(ptr, hnd, nr) \ + __copy_from_compat_offset(ptr, hnd, 0, nr) + +#define __copy_field_to_compat(hnd, ptr, field) ({ \ + typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \ + const typeof((ptr)->field) *const _y = &(ptr)->field; \ + __copy_to_user(_x, _y, sizeof(*_x)); \ +}) + +#define __copy_field_from_compat(ptr, hnd, field) ({ \ + typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \ + typeof((ptr)->field) *const _y = &(ptr)->field; \ + __copy_from_user(_y, _x, sizeof(*_x)); \ +}) + + +#define CHECK_TYPE(name) \ + typedef int __checkT ## name[1 - ((xen_ ## name ## _t *)0 != \ + (compat_ ## name ## _t *)0) * 2] +#define CHECK_TYPE_(k, n) \ + typedef int __checkT ## k ## _ ## n[1 - ((k xen_ ## n *)0 != \ + (k compat_ ## n *)0) * 2] + +#define CHECK_SIZE(name) \ + typedef int __checkS ## name[1 - (sizeof(xen_ ## name ## _t) != \ + sizeof(compat_ ## name ## _t)) * 2] +#define CHECK_SIZE_(k, n) \ + typedef int __checkS ## k ## _ ## n[1 - (sizeof(k xen_ ## n) != \ + sizeof(k compat_ ## n)) * 2] + +#define CHECK_FIELD(t, f) \ + typedef int __checkF ## t ## __ ## f[1 - (&((xen_ ## t ## _t *)0)->f != \ + &((compat_ ## t ## _t *)0)->f) * 2] +#define CHECK_FIELD_(k, n, f) \ + typedef int __checkF ## k ## _ ## n ## __ ## f[1 - (&((k xen_ ## n *)0)->f != \ + &((k compat_ ## n *)0)->f) * 2] + +#define CHECK_SUBFIELD_1(t, f1, f2) \ + typedef int __checkF1 ## t ## __ ## f1 ## __ ## f2 \ + [1 - (&((xen_ ## t ## _t *)0)->f1.f2 != \ + &((compat_ ## t ## _t *)0)->f1.f2) * 2] +#define CHECK_SUBFIELD_1_(k, n, f1, f2) \ + typedef int __checkF1 ## k ## _ ## n ## __ ## f1 ## __ ## f2 \ + [1 - (&((k xen_ ## n *)0)->f1.f2 != \ + &((k compat_ ## n *)0)->f1.f2) * 2] + +#define CHECK_SUBFIELD_2(t, f1, f2, f3) \ + typedef int __checkF2 ## t ## __ ## f1 ## __ ## f2 ## __ ## f3 \ + [1 - (&((xen_ ## t ## _t *)0)->f1.f2.f3 != \ + &((compat_ ## t ## _t *)0)->f1.f2.f3) * 2] +#define CHECK_SUBFIELD_2_(k, n, f1, f2, f3) \ + typedef int __checkF2 ## k ## _ ## n ## __ ## f1 ## __ ## f2 ## __ ## f3 \ + [1 - (&((k xen_ ## n *)0)->f1.f2.f3 != \ + &((k compat_ ## n *)0)->f1.f2.f3) * 2] + +extern int compat_disabled; + +int hypercall_xlat_continuation(unsigned int *id, unsigned int mask, ...); + +/* In-place translation functons: */ +struct start_info; +void xlat_start_info(struct start_info *, enum XLAT_start_info_console); +struct vcpu_runstate_info; +void xlat_vcpu_runstate_info(struct vcpu_runstate_info *); + +int switch_compat(struct domain *); +int switch_native(struct domain *); + +#define BITS_PER_GUEST_LONG(d) (!IS_COMPAT(d) ? BITS_PER_LONG : COMPAT_BITS_PER_LONG) + +#else + +#define BITS_PER_GUEST_LONG(d) BITS_PER_LONG + +#endif + +#endif /* __XEN_COMPAT_H__ */ diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h index 70e8902a73..26b2047f5a 100644 --- a/xen/include/xen/domain.h +++ b/xen/include/xen/domain.h @@ -2,15 +2,27 @@ #ifndef __XEN_DOMAIN_H__ #define __XEN_DOMAIN_H__ +typedef union { + struct vcpu_guest_context *nat; + struct compat_vcpu_guest_context *cmp; +} vcpu_guest_context_u __attribute__((__transparent_union__)); + struct vcpu *alloc_vcpu( struct domain *d, unsigned int vcpu_id, unsigned int cpu_id); int boot_vcpu( - struct domain *d, int vcpuid, struct vcpu_guest_context *ctxt); + struct domain *d, int vcpuid, vcpu_guest_context_u ctxt); struct vcpu *alloc_idle_vcpu(unsigned int cpu_id); struct domain *alloc_domain(domid_t domid); void free_domain(struct domain *d); +struct xen_domctl_getdomaininfo; +void getdomaininfo( + struct domain *d, struct xen_domctl_getdomaininfo *info); +struct compat_domctl_getdomaininfo; +void compat_getdomaininfo( + struct domain *d, struct compat_domctl_getdomaininfo *info); + /* * Arch-specifics. */ @@ -33,7 +45,8 @@ int arch_domain_create(struct domain *d); void arch_domain_destroy(struct domain *d); -int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c); +int arch_set_info_guest(struct vcpu *, vcpu_guest_context_u); +void arch_get_info_guest(struct vcpu *, vcpu_guest_context_u); void domain_relinquish_resources(struct domain *d); diff --git a/xen/include/xen/elf.h b/xen/include/xen/elf.h index 3728484c11..131276a4d6 100644 --- a/xen/include/xen/elf.h +++ b/xen/include/xen/elf.h @@ -525,8 +525,17 @@ extern unsigned long long xen_elfnote_numeric(struct domain_setup_info *dsi, int type, int *defined); extern const char *xen_elfnote_string(struct domain_setup_info *dsi, int type); +#ifdef CONFIG_COMPAT +extern int elf32_sanity_check(const Elf32_Ehdr *ehdr); +extern int loadelf32image(struct domain_setup_info *); +extern int parseelf32image(struct domain_setup_info *); +extern unsigned long long xen_elf32note_numeric(struct domain_setup_info *, + int type, int *defined); +extern const char *xen_elf32note_string(struct domain_setup_info *, int type); +#endif + #ifdef Elf_Ehdr -extern int elf_sanity_check(Elf_Ehdr *ehdr); +extern int elf_sanity_check(const Elf_Ehdr *ehdr); #endif #endif /* __XEN_ELF_H__ */ diff --git a/xen/include/xen/elfcore.h b/xen/include/xen/elfcore.h index 75ce7c49ef..fbe8ed1a6a 100644 --- a/xen/include/xen/elfcore.h +++ b/xen/include/xen/elfcore.h @@ -56,49 +56,6 @@ typedef struct int pr_fpvalid; /* True if math co-processor being used. */ } ELF_Prstatus; -/* - * The following data structures provide 64-bit ELF notes. In theory it should - * be possible to support both 64-bit and 32-bit ELF files, but to keep it - * simple we only do 64-bit. - * - * Please note that the current code aligns the 64-bit notes in the same - * way as Linux does. We are not following the 64-bit ELF spec, no one does. - * - * We are avoiding two problems by restricting us to 64-bit notes only: - * - Alignment of notes change with the word size. Ick. - * - We would need to tell kexec-tools which format we are using in the - * hypervisor to make sure the right ELF format is generated. - * That requires infrastructure. Let's not. - */ - -#define NOTE_ALIGN(x, n) ((x + ((1 << n) - 1)) / (1 << n)) -#define PAD32(x) u32 pad_data[NOTE_ALIGN(x, 2)] - -#define TYPEDEF_NOTE(type, strlen, desctype) \ - typedef struct { \ - union { \ - struct { \ - Elf_Note note; \ - unsigned char name[strlen]; \ - } note; \ - PAD32(sizeof(Elf_Note) + strlen); \ - } note; \ - union { \ - desctype desc; \ - PAD32(sizeof(desctype)); \ - } desc; \ - } __attribute__ ((packed)) type - -#define CORE_STR "CORE" -#define CORE_STR_LEN 5 /* including terminating zero */ - -TYPEDEF_NOTE(crash_note_core_t, CORE_STR_LEN, ELF_Prstatus); - -#define XEN_STR "Xen" -#define XEN_STR_LEN 4 /* including terminating zero */ - -TYPEDEF_NOTE(crash_note_xen_core_t, XEN_STR_LEN, crash_xen_core_t); - typedef struct { unsigned long xen_major_version; unsigned long xen_minor_version; @@ -113,20 +70,6 @@ typedef struct { #endif } crash_xen_info_t; -TYPEDEF_NOTE(crash_note_xen_info_t, XEN_STR_LEN, crash_xen_info_t); - -typedef struct { - crash_note_core_t core; - crash_note_xen_core_t xen_regs; - crash_note_xen_info_t xen_info; -} __attribute__ ((packed)) crash_note_t; - -#define setup_crash_note(np, member, str, str_len, id) \ - np->member.note.note.note.namesz = str_len; \ - np->member.note.note.note.descsz = sizeof(np->member.desc.desc); \ - np->member.note.note.note.type = id; \ - memcpy(np->member.note.note.name, str, str_len) - #endif /* __ELFCOREC_H__ */ /* diff --git a/xen/include/xen/hypercall.h b/xen/include/xen/hypercall.h index b8fc94d2c6..415ea330e3 100644 --- a/xen/include/xen/hypercall.h +++ b/xen/include/xen/hypercall.h @@ -42,9 +42,17 @@ extern long do_platform_op( XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op); +/* + * To allow safe resume of do_memory_op() after preemption, we need to know + * at what point in the page list to resume. For this purpose I steal the + * high-order bits of the @cmd parameter, which are otherwise unused and zero. + */ +#define MEMOP_EXTENT_SHIFT 4 /* cmd[:4] == start_extent */ +#define MEMOP_CMD_MASK ((1 << MEMOP_EXTENT_SHIFT) - 1) + extern long do_memory_op( - int cmd, + unsigned long cmd, XEN_GUEST_HANDLE(void) arg); extern long @@ -108,4 +116,13 @@ do_kexec_op( int arg1, XEN_GUEST_HANDLE(void) arg); +#ifdef CONFIG_COMPAT + +extern int +compat_memory_op( + unsigned int cmd, + XEN_GUEST_HANDLE(void) arg); + +#endif + #endif /* __XEN_HYPERCALL_H__ */ diff --git a/xen/include/xen/init.h b/xen/include/xen/init.h index 0709c9abd8..3ff3ff0e37 100644 --- a/xen/include/xen/init.h +++ b/xen/include/xen/init.h @@ -4,6 +4,25 @@ #include <xen/config.h> #include <asm/init.h> +/* + * Mark functions and data as being only used at initialization + * or exit time. + */ +#define __init \ + __attribute__ ((__section__ (".init.text"))) +#define __exit \ + __attribute_used__ __attribute__ ((__section__(".exit.text"))) +#define __initdata \ + __attribute__ ((__section__ (".init.data"))) +#define __exitdata \ + __attribute_used__ __attribute__ ((__section__ (".exit.data"))) +#define __initsetup \ + __attribute_used__ __attribute__ ((__section__ (".init.setup"))) +#define __init_call \ + __attribute_used__ __attribute__ ((__section__ (".initcall1.init"))) +#define __exit_call \ + __attribute_used__ __attribute__ ((__section__ (".exitcall.exit"))) + /* These macros are used to mark some functions or * initialized data (doesn't apply to uninitialized data) * as `initialization' functions. The kernel can take this diff --git a/xen/include/xen/kernel.h b/xen/include/xen/kernel.h index 53a7251838..34e831a39a 100644 --- a/xen/include/xen/kernel.h +++ b/xen/include/xen/kernel.h @@ -41,21 +41,38 @@ * @member: the name of the member within the struct. * */ -#define container_of(ptr, type, member) ({ \ - const typeof( ((type *)0)->member ) *__mptr = (ptr); \ +#define container_of(ptr, type, member) ({ \ + typeof( ((type *)0)->member ) *__mptr = (ptr); \ (type *)( (char *)__mptr - offsetof(type,member) );}) /* * Check at compile time that something is of a particular type. * Always evaluates to 1 so you may use it easily in comparisons. */ -#define typecheck(type,x) \ -({ type __dummy; \ - typeof(x) __dummy2; \ - (void)(&__dummy == &__dummy2); \ - 1; \ +#define typecheck(type,x) \ +({ type __dummy; \ + typeof(x) __dummy2; \ + (void)(&__dummy == &__dummy2); \ + 1; \ }) +extern char _start[], _end[]; +#define is_kernel(p) ({ \ + char *__p = (char *)(unsigned long)(p); \ + (__p >= _start) && (__p <= _end); \ +}) + +extern char _stext[], _etext[]; +#define is_kernel_text(p) ({ \ + char *__p = (char *)(unsigned long)(p); \ + (__p >= _stext) && (__p <= _etext); \ +}) + +extern char _sinittext[], _einittext[]; +#define is_kernel_inittext(p) ({ \ + char *__p = (char *)(unsigned long)(p); \ + (__p >= _sinittext) && (__p <= _einittext); \ +}) #endif /* _LINUX_KERNEL_H */ diff --git a/xen/include/xen/lib.h b/xen/include/xen/lib.h index e05eac28a9..44ad7874cc 100644 --- a/xen/include/xen/lib.h +++ b/xen/include/xen/lib.h @@ -74,15 +74,15 @@ extern int vscnprintf(char *buf, size_t size, const char *fmt, va_list args) __attribute__ ((format (printf, 3, 0))); long simple_strtol( - const char *cp,char **endp, unsigned int base); + const char *cp,const char **endp, unsigned int base); unsigned long simple_strtoul( - const char *cp,char **endp, unsigned int base); + const char *cp,const char **endp, unsigned int base); long long simple_strtoll( - const char *cp,char **endp, unsigned int base); + const char *cp,const char **endp, unsigned int base); unsigned long long simple_strtoull( - const char *cp,char **endp, unsigned int base); + const char *cp,const char **endp, unsigned int base); -unsigned long long parse_size_and_unit(const char *s, char **ps); +unsigned long long parse_size_and_unit(const char *s, const char **ps); #define TAINT_UNSAFE_SMP (1<<0) #define TAINT_MACHINE_CHECK (1<<1) diff --git a/xen/include/xen/multicall.h b/xen/include/xen/multicall.h index 5d8b4db1ca..34cdb4664f 100644 --- a/xen/include/xen/multicall.h +++ b/xen/include/xen/multicall.h @@ -7,6 +7,9 @@ #include <xen/percpu.h> #include <asm/multicall.h> +#ifdef CONFIG_COMPAT +#include <compat/xen.h> +#endif #define _MCSF_in_multicall 0 #define _MCSF_call_preempted 1 @@ -14,7 +17,12 @@ #define MCSF_call_preempted (1<<_MCSF_call_preempted) struct mc_state { unsigned long flags; - struct multicall_entry call; + union { + struct multicall_entry call; +#ifdef CONFIG_COMPAT + struct compat_multicall_entry compat_call; +#endif + }; }; DECLARE_PER_CPU(struct mc_state, mc_state); diff --git a/xen/include/xen/perfc.h b/xen/include/xen/perfc.h index ab2b84b00c..97ef9cd2ed 100644 --- a/xen/include/xen/perfc.h +++ b/xen/include/xen/perfc.h @@ -102,6 +102,9 @@ extern struct perfcounter perfcounters; #else #define perfc_incr_histo(_x,_v,_n) ((void)0) #endif + +struct xen_sysctl_perfc_op; +int perfc_control(struct xen_sysctl_perfc_op *); #else /* PERF_COUNTERS */ diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 1aa4f59a3c..3e72ea9b31 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -6,6 +6,7 @@ #include <xen/types.h> #include <xen/spinlock.h> #include <xen/smp.h> +#include <xen/shared.h> #include <public/xen.h> #include <public/domctl.h> #include <public/vcpu.h> @@ -17,15 +18,26 @@ #include <xen/xenoprof.h> #include <xen/irq.h> +#ifdef CONFIG_COMPAT +#include <compat/vcpu.h> +DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t); +#endif + extern unsigned long volatile jiffies; extern rwlock_t domlist_lock; /* A global pointer to the initial domain (DOM0). */ extern struct domain *dom0; -#define MAX_EVTCHNS NR_EVENT_CHANNELS +#ifndef CONFIG_COMPAT +#define MAX_EVTCHNS(d) NR_EVENT_CHANNELS +#else +#define MAX_EVTCHNS(d) (!IS_COMPAT(d) ? \ + NR_EVENT_CHANNELS : \ + sizeof(unsigned int) * sizeof(unsigned int) * 64) +#endif #define EVTCHNS_PER_BUCKET 128 -#define NR_EVTCHN_BUCKETS (MAX_EVTCHNS / EVTCHNS_PER_BUCKET) +#define NR_EVTCHN_BUCKETS (NR_EVENT_CHANNELS / EVTCHNS_PER_BUCKET) struct evtchn { @@ -75,7 +87,16 @@ struct vcpu void *sched_priv; /* scheduler-specific data */ struct vcpu_runstate_info runstate; +#ifndef CONFIG_COMPAT +# define runstate_guest(v) ((v)->runstate_guest) XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */ +#else +# define runstate_guest(v) ((v)->runstate_guest.native) + union { + XEN_GUEST_HANDLE(vcpu_runstate_info_t) native; + XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat; + } runstate_guest; /* guest address */ +#endif unsigned long vcpu_flags; @@ -202,8 +223,8 @@ struct domain_setup_info * You should use the xen_elfnote_* accessors below in order to * pickup the correct one and retain backwards compatibility. */ - void *__elfnote_section, *__elfnote_section_end; - char *__xen_guest_string; + const void *__elfnote_section, *__elfnote_section_end; + const char *__xen_guest_string; }; extern struct vcpu *idle_vcpu[NR_CPUS]; @@ -254,7 +275,13 @@ int construct_dom0( unsigned long image_start, unsigned long image_len, unsigned long initrd_start, unsigned long initrd_len, char *cmdline); -int set_info_guest(struct domain *d, xen_domctl_vcpucontext_t *); + +typedef union { + struct xen_domctl_vcpucontext *nat; + struct compat_domctl_vcpucontext *cmp; +} xen_domctl_vcpucontext_u __attribute__((__transparent_union__)); + +int set_info_guest(struct domain *d, xen_domctl_vcpucontext_u); struct domain *find_domain_by_id(domid_t dom); void domain_destroy(struct domain *d); @@ -422,6 +449,9 @@ extern struct domain *domain_list; /* Domain is paused by the hypervisor? */ #define _DOMF_paused 5 #define DOMF_paused (1UL<<_DOMF_paused) + /* Domain is a compatibility one? */ +#define _DOMF_compat 6 +#define DOMF_compat (1UL<<_DOMF_compat) static inline int vcpu_runnable(struct vcpu *v) { @@ -458,6 +488,13 @@ static inline void vcpu_unblock(struct vcpu *v) #define IS_PRIV(_d) ((_d)->is_privileged) +#ifdef CONFIG_COMPAT +#define IS_COMPAT(_d) \ + (test_bit(_DOMF_compat, &(_d)->domain_flags)) +#else +#define IS_COMPAT(_d) 0 +#endif + #define VM_ASSIST(_d,_t) (test_bit((_t), &(_d)->vm_assist)) #define is_hvm_domain(d) ((d)->is_hvm) diff --git a/xen/include/xen/shared.h b/xen/include/xen/shared.h new file mode 100644 index 0000000000..ee0510cf67 --- /dev/null +++ b/xen/include/xen/shared.h @@ -0,0 +1,54 @@ +#ifndef __XEN_SHARED_H__ +#define __XEN_SHARED_H__ + +#include <xen/config.h> + +#ifdef CONFIG_COMPAT + +#include <compat/xen.h> + +typedef union { + struct shared_info native; + struct compat_shared_info compat; +} shared_info_t; + +#define __shared_info(d, s, field) (*(!IS_COMPAT(d) ? \ + &(s)->native.field : \ + &(s)->compat.field)) +#define __shared_info_addr(d, s, field) (!IS_COMPAT(d) ? \ + (void *)&(s)->native.field : \ + (void *)&(s)->compat.field) + +#define shared_info(d, field) __shared_info(d, (d)->shared_info, field) +#define shared_info_addr(d, field) __shared_info_addr(d, (d)->shared_info, field) + +typedef union { + struct vcpu_info native; + struct compat_vcpu_info compat; +} vcpu_info_t; + +#define vcpu_info(v, field) (*(!IS_COMPAT((v)->domain) ? \ + &(v)->vcpu_info->native.field : \ + &(v)->vcpu_info->compat.field)) +#define vcpu_info_addr(v, field) (!IS_COMPAT((v)->domain) ? \ + (void *)&(v)->vcpu_info->native.field : \ + (void *)&(v)->vcpu_info->compat.field) + +#else + +typedef struct shared_info shared_info_t; + +#define __shared_info(d, s, field) ((s)->field) +#define __shared_info_addr(d, s, field) ((void *)&(s)->field) + +#define shared_info(d, field) ((d)->shared_info->field) +#define shared_info_addr(d, field) ((void *)&(d)->shared_info->field) + +typedef struct vcpu_info vcpu_info_t; + +#define vcpu_info(v, field) ((v)->vcpu_info->field) +#define vcpu_info_addr(v, field) ((void *)&(v)->vcpu_info->field) + +#endif + +#endif /* __XEN_SHARED_H__ */ diff --git a/xen/include/xen/symbols.h b/xen/include/xen/symbols.h index a30b82c002..082d707469 100644 --- a/xen/include/xen/symbols.h +++ b/xen/include/xen/symbols.h @@ -6,9 +6,6 @@ #define KSYM_NAME_LEN 127 -extern int is_kernel_text(unsigned long addr); -extern unsigned long kernel_text_end(void); - /* Lookup an address. */ const char *symbols_lookup(unsigned long addr, unsigned long *symbolsize, @@ -16,7 +13,7 @@ const char *symbols_lookup(unsigned long addr, char *namebuf); /* Replace "%s" in format with address, if found */ -extern void __print_symbol(const char *fmt, unsigned long address); +void __print_symbol(const char *fmt, unsigned long address); /* This macro allows us to keep printk typechecking */ static void __check_printsym_format(const char *fmt, ...) diff --git a/xen/include/xen/xenoprof.h b/xen/include/xen/xenoprof.h index b9fe9de528..8983f0c987 100644 --- a/xen/include/xen/xenoprof.h +++ b/xen/include/xen/xenoprof.h @@ -10,6 +10,7 @@ #ifndef __XEN_XENOPROF_H__ #define __XEN_XENOPROF_H__ +#include <xen/config.h> #include <public/xenoprof.h> #include <asm/xenoprof.h> @@ -22,9 +23,19 @@ #define XENOPROF_READY 2 #define XENOPROF_PROFILING 3 +#ifndef CONFIG_COMPAT +typedef struct xenoprof_buf xenoprof_buf_t; +#else +#include <compat/xenoprof.h> +typedef union { + struct xenoprof_buf native; + struct compat_oprof_buf compat; +} xenoprof_buf_t; +#endif + struct xenoprof_vcpu { int event_size; - struct xenoprof_buf *buffer; + xenoprof_buf_t *buffer; }; struct xenoprof { @@ -35,9 +46,22 @@ struct xenoprof { int domain_type; int domain_ready; int is_primary; +#ifdef CONFIG_COMPAT + int is_compat; +#endif struct xenoprof_vcpu vcpu [MAX_VIRT_CPUS]; }; +#ifndef CONFIG_COMPAT +#define XENOPROF_COMPAT(x) 0 +#define xenoprof_buf(d, b, field) ((b)->field) +#else +#define XENOPROF_COMPAT(x) ((x)->is_compat) +#define xenoprof_buf(d, b, field) (*(!(d)->xenoprof->is_compat ? \ + &(b)->native.field : \ + &(b)->compat.field)) +#endif + struct domain; void free_xenoprof_pages(struct domain *d); diff --git a/xen/include/xen/xmalloc.h b/xen/include/xen/xmalloc.h index 893627f04a..dbfdce4fa4 100644 --- a/xen/include/xen/xmalloc.h +++ b/xen/include/xen/xmalloc.h @@ -12,7 +12,7 @@ #define xmalloc_bytes(_bytes) (_xmalloc(_bytes, SMP_CACHE_BYTES)) /* Free any of the above. */ -extern void xfree(const void *); +extern void xfree(void *); /* Underlying functions */ extern void *_xmalloc(size_t size, size_t align); diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst new file mode 100644 index 0000000000..950f60b2f4 --- /dev/null +++ b/xen/include/xlat.lst @@ -0,0 +1,52 @@ +# First column indicator: +# ! - needs translation +# ? - needs checking +? dom0_vga_console_info xen.h +? mmu_update xen.h +! mmuext_op xen.h +! start_info xen.h +? vcpu_time_info xen.h +! cpu_user_regs arch-x86/xen-@arch@.h +! trap_info arch-x86/xen.h +! vcpu_guest_context arch-x86/xen.h +? acm_getdecision acm_ops.h +! ctl_cpumap domctl.h +! domctl_scheduler_op domctl.h +! domctl_shadow_op domctl.h +! domctl_shadow_op_stats domctl.h +? evtchn_alloc_unbound event_channel.h +? evtchn_bind_interdomain event_channel.h +? evtchn_bind_ipi event_channel.h +? evtchn_bind_pirq event_channel.h +? evtchn_bind_vcpu event_channel.h +? evtchn_bind_virq event_channel.h +? evtchn_close event_channel.h +? evtchn_op event_channel.h +? evtchn_send event_channel.h +? evtchn_status event_channel.h +? evtchn_unmask event_channel.h +! gnttab_copy grant_table.h +? gnttab_dump_table grant_table.h +? gnttab_map_grant_ref grant_table.h +! gnttab_setup_table grant_table.h +! gnttab_transfer grant_table.h +? gnttab_unmap_grant_ref grant_table.h +? grant_entry grant_table.h +? kexec_exec kexec.h +! kexec_image kexec.h +! add_to_physmap memory.h +! foreign_memory_map memory.h +! memory_exchange memory.h +! memory_map memory.h +! memory_reservation memory.h +! translate_gpfn_list memory.h +! sched_poll sched.h +? sched_remote_shutdown sched.h +? sched_shutdown sched.h +? sysctl_perfc_desc sysctl.h +! sysctl_perfc_op sysctl.h +! sysctl_tbuf_op sysctl.h +? t_buf trace.h +! vcpu_runstate_info vcpu.h +? xenoprof_init xenoprof.h +? xenoprof_passive xenoprof.h diff --git a/xen/tools/get-fields.sh b/xen/tools/get-fields.sh new file mode 100644 index 0000000000..01af5eaaf3 --- /dev/null +++ b/xen/tools/get-fields.sh @@ -0,0 +1,425 @@ +#!/bin/sh +test -n "$1" -a -n "$2" -a -n "$3" +set -ef + +get_fields() { + local level=1 aggr=0 name= fields= + for token in $2 + do + case "$token" in + struct|union) + test $level != 1 || aggr=1 fields= name= + ;; + "{") + level=$(expr $level + 1) + ;; + "}") + level=$(expr $level - 1) + if [ $level = 1 -a $name = $1 ] + then + echo "$fields }" + return 0 + fi + ;; + [[:alpha:]_]*) + test $aggr = 0 -o -n "$name" || name="$token" + ;; + esac + test $aggr = 0 || fields="$fields $token" + done +} + +build_enums() { + local level=1 kind= fields= members= named= id= token + for token in $2 + do + case "$token" in + struct|union) + test $level != 2 || fields=" " + kind="$token;$kind" + ;; + "{") + level=$(expr $level + 1) + ;; + "}") + level=$(expr $level - 1) + if [ $level = 1 ] + then + if [ "${kind%%;*}" = union ] + then + echo + echo "enum XLAT_$1 {" + for m in $members + do + echo " XLAT_${1}_$m," + done + echo "};" + fi + return 0 + elif [ $level = 2 ] + then + named='?' + fi + ;; + [[:alpha:]]*) + id=$token + if [ -n "$named" -a -n "${kind#*;}" ] + then + build_enums ${1}_$token "$fields" + named='!' + fi + ;; + ",") + test $level != 2 || members="$members $id" + ;; + ";") + test $level != 2 || members="$members $id" + test -z "$named" || kind=${kind#*;} + named= + ;; + esac + test -z "$fields" || fields="$fields $token" + done +} + +handle_field() { + if [ -z "$5" ] + then + echo " \\" + if [ -z "$4" ] + then + echo -n "$1(_d_)->$3 = (_s_)->$3;" + else + echo -n "$1XLAT_${2}_HNDL_$(echo $3 | sed 's,\.,_,g')(_d_, _s_);" + fi + elif [ -z "$(echo "$5" | sed 's,[^{}],,g')" ] + then + local tag=$(echo "$5" | sed 's,[[:space:]]*\(struct\|union\)[[:space:]]\+\(compat_\)\?\([[:alnum:]_]\+\)[[:space:]].*,\3,') + echo " \\" + echo -n "${1}XLAT_$tag(&(_d_)->$3, &(_s_)->$3);" + else + local level=1 kind= fields= id= array= arrlvl=1 array_type= type= token + for token in $5 + do + case "$token" in + struct|union) + test $level != 2 || fields=" " + if [ $level == 1 ] + then + kind=$token + if [ $kind = union ] + then + echo " \\" + echo -n "${1}switch ($(echo $3 | sed 's,\.,_,g')) {" + fi + fi + ;; + "{") + level=$(expr $level + 1) id= + ;; + "}") + level=$(expr $level - 1) id= + if [ $level == 1 -a $kind = union ] + then + echo " \\" + echo -n "$1}" + fi + ;; + "[") + if [ $level != 2 -o $arrlvl != 1 ] + then + : + elif [ -z "$array" ] + then + array=" " + else + array="$array;" + fi + arrlvl=$(expr $arrlvl + 1) + ;; + "]") + arrlvl=$(expr $arrlvl - 1) + ;; + COMPAT_HANDLE\(*\)) + if [ $level == 2 -a -z "$id" ] + then + type=${token#COMPAT_HANDLE?} + type=${type%?} + type=${type#compat_} + fi + ;; + compat_domain_handle_t) + if [ $level == 2 -a -z "$id" ] + then + array_type=$token + fi + ;; + [[:alpha:]]*) + id=$token + ;; + [\,\;]) + if [ $level == 2 -a -n "$(echo $id | sed 's,^_pad[[:digit:]]*,,')" ] + then + if [ $kind = union ] + then + echo " \\" + echo -n "${1}case XLAT_${2}_$(echo $3.$id | sed 's,\.,_,g'):" + handle_field "$1 " $2 $3.$id "$type" "$fields" + elif [ -z "$array" -a -z "$array_type" ] + then + handle_field "$1" $2 $3.$id "$type" "$fields" + elif [ -z "$array" ] + then + copy_array " " $3.$id + else + handle_array "$1" $2 $3.$id "${array#*;}" "$type" "$fields" + fi + test "$token" != ";" || fields= id= type= + array= + if [ $kind = union ] + then + echo " \\" + echo -n "$1 break;" + fi + fi + ;; + *) + if [ -n "$array" ] + then + array="$array $token" + fi + ;; + esac + test -z "$fields" || fields="$fields $token" + done + fi +} + +copy_array() { + echo " \\" + echo "${1}if ((_d_)->$2 != (_s_)->$2) \\" + echo -n "$1 memcpy((_d_)->$2, (_s_)->$2, sizeof((_d_)->$2));" +} + +handle_array() { + local i="i$(echo $4 | sed 's,[^;], ,g' | wc -w)" + echo " \\" + echo "$1{ \\" + echo "$1 unsigned int $i; \\" + echo -n "$1 for ($i = 0; $i < "${4%%;*}"; ++$i) {" + if [ "$4" = "${4#*;}" ] + then + handle_field "$1 " $2 $3[$i] "$5" "$6" + else + handle_array "$1 " $2 $3[$i] "${4#*;}" "$5" "$6" + fi + echo " \\" + echo "$1 } \\" + echo -n "$1}" +} + +build_body() { + echo + echo -n "#define XLAT_$1(_d_, _s_)" + local level=1 fields= id= array= arrlvl=1 array_type= type= token + for token in $2 + do + case "$token" in + struct|union) + test $level != 2 || fields=" " + ;; + "{") + level=$(expr $level + 1) id= + ;; + "}") + level=$(expr $level - 1) id= + ;; + "[") + if [ $level != 2 -o $arrlvl != 1 ] + then + : + elif [ -z "$array" ] + then + array=" " + else + array="$array;" + fi + arrlvl=$(expr $arrlvl + 1) + ;; + "]") + arrlvl=$(expr $arrlvl - 1) + ;; + COMPAT_HANDLE\(*\)) + if [ $level == 2 -a -z "$id" ] + then + type=${token#COMPAT_HANDLE?} + type=${type%?} + type=${type#compat_} + fi + ;; + compat_domain_handle_t) + if [ $level == 2 -a -z "$id" ] + then + array_type=$token + fi + ;; + [[:alpha:]_]*) + if [ -n "$array" ] + then + array="$array $token" + else + id=$token + fi + ;; + [\,\;]) + if [ $level == 2 -a -n "$(echo $id | sed 's,^_pad[[:digit:]]*,,')" ] + then + if [ -z "$array" -a -z "$array_type" ] + then + handle_field " " $1 $id "$type" "$fields" + elif [ -z "$array" ] + then + copy_array " " $id + else + handle_array " " $1 $id "${array#*;}" "$type" "$fields" + fi + test "$token" != ";" || fields= id= type= + array= + fi + ;; + *) + if [ -n "$array" ] + then + array="$array $token" + fi + ;; + esac + test -z "$fields" || fields="$fields $token" + done + echo "" +} + +check_field() { + if [ -z "$(echo "$4" | sed 's,[^{}],,g')" ] + then + echo "; \\" + local n=$(echo $3 | sed 's,[^.], ,g' | wc -w) + if [ -n "$4" ] + then + for n in $4 + do + case $n in + struct|union) + ;; + [[:alpha:]_]*) + echo -n " CHECK_$n" + break + ;; + *) + echo "Malformed compound declaration: '$n'" >&2 + exit 1 + ;; + esac + done + elif [ $n = 0 ] + then + echo -n " CHECK_FIELD_($1, $2, $3)" + else + echo -n " CHECK_SUBFIELD_${n}_($1, $2, $(echo $3 | sed 's!\.!, !g'))" + fi + else + local level=1 fields= id= token + for token in $4 + do + case "$token" in + struct|union) + test $level != 2 || fields=" " + ;; + "{") + level=$(expr $level + 1) id= + ;; + "}") + level=$(expr $level - 1) id= + ;; + [[:alpha:]]*) + id=$token + ;; + [\,\;]) + if [ $level == 2 -a -n "$(echo $id | sed 's,^_pad[[:digit:]]*,,')" ] + then + check_field $1 $2 $3.$id "$fields" + test "$token" != ";" || fields= id= + fi + ;; + esac + test -z "$fields" || fields="$fields $token" + done + fi +} + +build_check() { + echo + echo "#define CHECK_$1 \\" + local level=1 fields= kind= id= arrlvl=1 token + for token in $2 + do + case "$token" in + struct|union) + if [ $level == 1 ] + then + kind=$token + echo -n " CHECK_SIZE_($kind, $1)" + elif [ $level == 2 ] + then + fields=" " + fi + ;; + "{") + level=$(expr $level + 1) id= + ;; + "}") + level=$(expr $level - 1) id= + ;; + "[") + arrlvl=$(expr $arrlvl + 1) + ;; + "]") + arrlvl=$(expr $arrlvl - 1) + ;; + [[:alpha:]_]*) + test $level != 2 -o $arrlvl != 1 || id=$token + ;; + [\,\;]) + if [ $level == 2 -a -n "$(echo $id | sed 's,^_pad[[:digit:]]*,,')" ] + then + check_field $kind $1 $id "$fields" + test "$token" != ";" || fields= id= + fi + ;; + esac + test -z "$fields" || fields="$fields $token" + done + echo "" +} + +fields="$(get_fields $(echo $2 | sed 's,^compat_xen,compat_,') "$(sed -e 's,^[[:space:]]#.*,,' -e 's!\([]\[,;:{}]\)! \1 !g' $3)")" +if [ -z "$fields" ] +then + echo "Fields of '$2' not found in '$3'" >&2 + exit 1 +fi +name=${2#compat_} +name=${name#xen} +case "$1" in +"!") + build_enums $name "$fields" + build_body $name "$fields" + ;; +"?") + build_check $name "$fields" + ;; +*) + echo "Invalid translation indicator: '$1'" >&2 + exit 1 + ;; +esac diff --git a/xen/tools/symbols.c b/xen/tools/symbols.c index 3eca4cae1b..c21e012206 100644 --- a/xen/tools/symbols.c +++ b/xen/tools/symbols.c @@ -5,7 +5,7 @@ * This software may be used and distributed according to the terms * of the GNU General Public License, incorporated herein by reference. * - * Usage: nm -n <object-file> | scripts/symbols [--all-symbols] > symbols.S + * Usage: nm -n vmlinux | scripts/symbols [--all-symbols] > symbols.S * * ChangeLog: * @@ -24,75 +24,37 @@ * */ +#define _GNU_SOURCE + #include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> -/* maximum token length used. It doesn't pay to increase it a lot, because - * very long substrings probably don't repeat themselves too often. */ -#define MAX_TOK_SIZE 11 #define KSYM_NAME_LEN 127 -/* we use only a subset of the complete symbol table to gather the token count, - * to speed up compression, at the expense of a little compression ratio */ -#define WORKING_SET 1024 - -/* first find the best token only on the list of tokens that would profit more - * than GOOD_BAD_THRESHOLD. Only if this list is empty go to the "bad" list. - * Increasing this value will put less tokens on the "good" list, so the search - * is faster. However, if the good list runs out of tokens, we must painfully - * search the bad list. */ -#define GOOD_BAD_THRESHOLD 10 - -/* token hash parameters */ -#define HASH_BITS 18 -#define HASH_TABLE_SIZE (1 << HASH_BITS) -#define HASH_MASK (HASH_TABLE_SIZE - 1) -#define HASH_BASE_OFFSET 2166136261U -#define HASH_FOLD(a) ((a)&(HASH_MASK)) - -/* flags to mark symbols */ -#define SYM_FLAG_VALID 1 -#define SYM_FLAG_SAMPLED 2 struct sym_entry { unsigned long long addr; - char type; - unsigned char flags; - unsigned char len; + unsigned int len; unsigned char *sym; }; static struct sym_entry *table; -static int size, cnt; -static unsigned long long _stext, _etext; +static unsigned int table_size, table_cnt; +static unsigned long long _stext, _etext, _sinittext, _einittext, _sextratext, _eextratext; static int all_symbols = 0; static char symbol_prefix_char = '\0'; -struct token { - unsigned char data[MAX_TOK_SIZE]; - unsigned char len; - /* profit: the number of bytes that could be saved by inserting this - * token into the table */ - int profit; - struct token *next; /* next token on the hash list */ - struct token *right; /* next token on the good/bad list */ - struct token *left; /* previous token on the good/bad list */ - struct token *smaller; /* token that is less one letter than this one */ - }; - -struct token bad_head, good_head; -struct token *hash_table[HASH_TABLE_SIZE]; +int token_profit[0x10000]; /* the table that holds the result of the compression */ -unsigned char best_table[256][MAX_TOK_SIZE+1]; +unsigned char best_table[256][2]; unsigned char best_table_len[256]; -static void -usage(void) +static void usage(void) { fprintf(stderr, "Usage: symbols [--all-symbols] [--symbol-prefix=<prefix char>] < in.map > out.S\n"); exit(1); @@ -102,21 +64,19 @@ usage(void) * This ignores the intensely annoying "mapping symbols" found * in ARM ELF files: $a, $t and $d. */ -static inline int -is_arm_mapping_symbol(const char *str) +static inline int is_arm_mapping_symbol(const char *str) { return str[0] == '$' && strchr("atd", str[1]) && (str[2] == '\0' || str[2] == '.'); } -static int -read_symbol(FILE *in, struct sym_entry *s) +static int read_symbol(FILE *in, struct sym_entry *s) { char str[500]; - char *sym; + char *sym, stype; int rc; - rc = fscanf(in, "%llx %c %499s\n", &s->addr, &s->type, str); + rc = fscanf(in, "%llx %c %499s\n", &s->addr, &stype, str); if (rc != 3) { if (rc != EOF) { /* skip line */ @@ -135,7 +95,15 @@ read_symbol(FILE *in, struct sym_entry *s) _stext = s->addr; else if (strcmp(sym, "_etext") == 0) _etext = s->addr; - else if (toupper(s->type) == 'A') + else if (strcmp(sym, "_sinittext") == 0) + _sinittext = s->addr; + else if (strcmp(sym, "_einittext") == 0) + _einittext = s->addr; + else if (strcmp(sym, "_sextratext") == 0) + _sextratext = s->addr; + else if (strcmp(sym, "_eextratext") == 0) + _eextratext = s->addr; + else if (toupper(stype) == 'A') { /* Keep these useful absolute symbols */ if (strcmp(sym, "__kernel_syscall_via_break") && @@ -145,22 +113,24 @@ read_symbol(FILE *in, struct sym_entry *s) return -1; } - else if (toupper(s->type) == 'U' || + else if (toupper(stype) == 'U' || is_arm_mapping_symbol(sym)) return -1; + /* exclude also MIPS ELF local symbols ($L123 instead of .L123) */ + else if (str[0] == '$') + return -1; /* include the type field in the symbol name, so that it gets * compressed together */ s->len = strlen(str) + 1; - s->sym = (unsigned char *) malloc(s->len + 1); + s->sym = malloc(s->len + 1); strcpy((char *)s->sym + 1, str); - s->sym[0] = s->type; + s->sym[0] = stype; return 0; } -static int -symbol_valid(struct sym_entry *s) +static int symbol_valid(struct sym_entry *s) { /* Symbols which vary between passes. Passes 1 and 2 must have * identical symbol lists. The symbols_* symbols below are only added @@ -189,7 +159,9 @@ symbol_valid(struct sym_entry *s) /* if --all-symbols is not specified, then symbols outside the text * and inittext sections are discarded */ if (!all_symbols) { - if (s->addr < _stext || s->addr > _etext) + if ((s->addr < _stext || s->addr > _etext) + && (s->addr < _sinittext || s->addr > _einittext) + && (s->addr < _sextratext || s->addr > _eextratext)) return 0; /* Corner case. Discard any symbols with the same value as * _etext _einittext or _eextratext; they can move between pass @@ -197,7 +169,9 @@ symbol_valid(struct sym_entry *s) * move then they may get dropped in pass 2, which breaks the * symbols rules. */ - if (s->addr == _etext && strcmp((char *)s->sym + offset, "_etext")) + if ((s->addr == _etext && strcmp((char*)s->sym + offset, "_etext")) || + (s->addr == _einittext && strcmp((char*)s->sym + offset, "_einittext")) || + (s->addr == _eextratext && strcmp((char*)s->sym + offset, "_eextratext"))) return 0; } @@ -212,20 +186,19 @@ symbol_valid(struct sym_entry *s) return 1; } -static void -read_map(FILE *in) +static void read_map(FILE *in) { while (!feof(in)) { - if (cnt >= size) { - size += 10000; - table = realloc(table, sizeof(*table) * size); + if (table_cnt >= table_size) { + table_size += 10000; + table = realloc(table, sizeof(*table) * table_size); if (!table) { fprintf(stderr, "out of memory\n"); exit (1); } } - if (read_symbol(in, &table[cnt]) == 0) - cnt++; + if (read_symbol(in, &table[table_cnt]) == 0) + table_cnt++; } } @@ -269,10 +242,9 @@ static int expand_symbol(unsigned char *data, int len, char *result) return total; } -static void -write_src(void) +static void write_src(void) { - int i, k, off, valid; + unsigned int i, k, off; unsigned int best_idx[256]; unsigned int *markers; char buf[KSYM_NAME_LEN+1]; @@ -289,33 +261,24 @@ write_src(void) printf(".data\n"); output_label("symbols_addresses"); - valid = 0; - for (i = 0; i < cnt; i++) { - if (table[i].flags & SYM_FLAG_VALID) { - printf("\tPTR\t%#llx\n", table[i].addr); - valid++; - } + for (i = 0; i < table_cnt; i++) { + printf("\tPTR\t%#llx\n", table[i].addr); } printf("\n"); output_label("symbols_num_syms"); - printf("\tPTR\t%d\n", valid); + printf("\tPTR\t%d\n", table_cnt); printf("\n"); /* table of offset markers, that give the offset in the compressed stream * every 256 symbols */ - markers = (unsigned int *) malloc(sizeof(unsigned int)*((valid + 255) / 256)); + markers = (unsigned int *) malloc(sizeof(unsigned int) * ((table_cnt + 255) / 256)); output_label("symbols_names"); - valid = 0; off = 0; - for (i = 0; i < cnt; i++) { - - if (!(table[i].flags & SYM_FLAG_VALID)) - continue; - - if ((valid & 0xFF) == 0) - markers[valid >> 8] = off; + for (i = 0; i < table_cnt; i++) { + if ((i & 0xFF) == 0) + markers[i >> 8] = off; printf("\t.byte 0x%02x", table[i].len); for (k = 0; k < table[i].len; k++) @@ -323,12 +286,11 @@ write_src(void) printf("\n"); off += table[i].len + 1; - valid++; } printf("\n"); output_label("symbols_markers"); - for (i = 0; i < ((valid + 255) >> 8); i++) + for (i = 0; i < ((table_cnt + 255) >> 8); i++) printf("\tPTR\t%d\n", markers[i]); printf("\n"); @@ -338,7 +300,7 @@ write_src(void) off = 0; for (i = 0; i < 256; i++) { best_idx[i] = off; - expand_symbol(best_table[i],best_table_len[i],buf); + expand_symbol(best_table[i], best_table_len[i], buf); printf("\t.asciz\t\"%s\"\n", buf); off += strlen(buf) + 1; } @@ -353,153 +315,13 @@ write_src(void) /* table lookup compression functions */ -static inline unsigned int rehash_token(unsigned int hash, unsigned char data) -{ - return ((hash * 16777619) ^ data); -} - -static unsigned int hash_token(unsigned char *data, int len) -{ - unsigned int hash=HASH_BASE_OFFSET; - int i; - - for (i = 0; i < len; i++) - hash = rehash_token(hash, data[i]); - - return HASH_FOLD(hash); -} - -/* find a token given its data and hash value */ -static struct token *find_token_hash(unsigned char *data, int len, unsigned int hash) -{ - struct token *ptr; - - ptr = hash_table[hash]; - - while (ptr) { - if ((ptr->len == len) && (memcmp(ptr->data, data, len) == 0)) - return ptr; - ptr=ptr->next; - } - - return NULL; -} - -static inline void insert_token_in_group(struct token *head, struct token *ptr) -{ - ptr->right = head->right; - ptr->right->left = ptr; - head->right = ptr; - ptr->left = head; -} - -static inline void remove_token_from_group(struct token *ptr) -{ - ptr->left->right = ptr->right; - ptr->right->left = ptr->left; -} - - -/* build the counts for all the tokens that start with "data", and have lenghts - * from 2 to "len" */ -static void learn_token(unsigned char *data, int len) -{ - struct token *ptr,*last_ptr; - int i, newprofit; - unsigned int hash = HASH_BASE_OFFSET; - unsigned int hashes[MAX_TOK_SIZE + 1]; - - if (len > MAX_TOK_SIZE) - len = MAX_TOK_SIZE; - - /* calculate and store the hash values for all the sub-tokens */ - hash = rehash_token(hash, data[0]); - for (i = 2; i <= len; i++) { - hash = rehash_token(hash, data[i-1]); - hashes[i] = HASH_FOLD(hash); - } - - last_ptr = NULL; - ptr = NULL; - - for (i = len; i >= 2; i--) { - hash = hashes[i]; - - if (!ptr) ptr = find_token_hash(data, i, hash); - - if (!ptr) { - /* create a new token entry */ - ptr = (struct token *) malloc(sizeof(*ptr)); - - memcpy(ptr->data, data, i); - ptr->len = i; - - /* when we create an entry, it's profit is 0 because - * we also take into account the size of the token on - * the compressed table. We then subtract GOOD_BAD_THRESHOLD - * so that the test to see if this token belongs to - * the good or bad list, is a comparison to zero */ - ptr->profit = -GOOD_BAD_THRESHOLD; - - ptr->next = hash_table[hash]; - hash_table[hash] = ptr; - - insert_token_in_group(&bad_head, ptr); - - ptr->smaller = NULL; - } else { - newprofit = ptr->profit + (ptr->len - 1); - /* check to see if this token needs to be moved to a - * different list */ - if((ptr->profit < 0) && (newprofit >= 0)) { - remove_token_from_group(ptr); - insert_token_in_group(&good_head,ptr); - } - ptr->profit = newprofit; - } - - if (last_ptr) last_ptr->smaller = ptr; - last_ptr = ptr; - - ptr = ptr->smaller; - } -} - -/* decrease the counts for all the tokens that start with "data", and have lenghts - * from 2 to "len". This function is much simpler than learn_token because we have - * more guarantees (tho tokens exist, the ->smaller pointer is set, etc.) - * The two separate functions exist only because of compression performance */ -static void forget_token(unsigned char *data, int len) -{ - struct token *ptr; - int i, newprofit; - unsigned int hash=0; - - if (len > MAX_TOK_SIZE) len = MAX_TOK_SIZE; - - hash = hash_token(data, len); - ptr = find_token_hash(data, len, hash); - - for (i = len; i >= 2; i--) { - - newprofit = ptr->profit - (ptr->len - 1); - if ((ptr->profit >= 0) && (newprofit < 0)) { - remove_token_from_group(ptr); - insert_token_in_group(&bad_head, ptr); - } - ptr->profit=newprofit; - - ptr=ptr->smaller; - } -} - /* count all the possible tokens in a symbol */ static void learn_symbol(unsigned char *symbol, int len) { int i; for (i = 0; i < len - 1; i++) - learn_token(symbol + i, len - i); + token_profit[ symbol[i] + (symbol[i + 1] << 8) ]++; } /* decrease the count for all the possible tokens in a symbol */ @@ -508,117 +330,98 @@ static void forget_symbol(unsigned char *symbol, int len) int i; for (i = 0; i < len - 1; i++) - forget_token(symbol + i, len - i); + token_profit[ symbol[i] + (symbol[i + 1] << 8) ]--; } -/* set all the symbol flags and do the initial token count */ +/* remove all the invalid symbols from the table and do the initial token count */ static void build_initial_tok_table(void) { - int i, use_it, valid; + unsigned int i, pos; - valid = 0; - for (i = 0; i < cnt; i++) { - table[i].flags = 0; + pos = 0; + for (i = 0; i < table_cnt; i++) { if ( symbol_valid(&table[i]) ) { - table[i].flags |= SYM_FLAG_VALID; - valid++; + if (pos != i) + table[pos] = table[i]; + learn_symbol(table[pos].sym, table[pos].len); + pos++; } } + table_cnt = pos; +} - use_it = 0; - for (i = 0; i < cnt; i++) { - - /* subsample the available symbols. This method is almost like - * a Bresenham's algorithm to get uniformly distributed samples - * across the symbol table */ - if (table[i].flags & SYM_FLAG_VALID) { - - use_it += WORKING_SET; - - if (use_it >= valid) { - table[i].flags |= SYM_FLAG_SAMPLED; - use_it -= valid; - } - } - if (table[i].flags & SYM_FLAG_SAMPLED) - learn_symbol(table[i].sym, table[i].len); - } +static void *memmem_pvt(void *h, size_t hlen, void *n, size_t nlen) +{ + char *p; + for (p = h; (p - (char *)h) <= (long)(hlen - nlen); p++) + if (!memcmp(p, n, nlen)) return p; + return NULL; } /* replace a given token in all the valid symbols. Use the sampled symbols * to update the counts */ -static void compress_symbols(unsigned char *str, int tlen, int idx) +static void compress_symbols(unsigned char *str, int idx) { - int i, len, learn, size; - unsigned char *p; + unsigned int i, len, size; + unsigned char *p1, *p2; - for (i = 0; i < cnt; i++) { - - if (!(table[i].flags & SYM_FLAG_VALID)) continue; + for (i = 0; i < table_cnt; i++) { len = table[i].len; - learn = 0; - p = table[i].sym; + p1 = table[i].sym; + + /* find the token on the symbol */ + p2 = memmem_pvt(p1, len, str, 2); + if (!p2) continue; + + /* decrease the counts for this symbol's tokens */ + forget_symbol(table[i].sym, len); + + size = len; do { + *p2 = idx; + p2++; + size -= (p2 - p1); + memmove(p2, p2 + 1, size); + p1 = p2; + len--; + + if (size < 2) break; + /* find the token on the symbol */ - p = (unsigned char *) strstr((char *) p, (char *) str); - if (!p) break; - - if (!learn) { - /* if this symbol was used to count, decrease it */ - if (table[i].flags & SYM_FLAG_SAMPLED) - forget_symbol(table[i].sym, len); - learn = 1; - } + p2 = memmem_pvt(p1, size, str, 2); - *p = idx; - size = (len - (p - table[i].sym)) - tlen + 1; - memmove(p + 1, p + tlen, size); - p++; - len -= tlen - 1; + } while (p2); - } while (size >= tlen); + table[i].len = len; - if(learn) { - table[i].len = len; - /* if this symbol was used to count, learn it again */ - if(table[i].flags & SYM_FLAG_SAMPLED) - learn_symbol(table[i].sym, len); - } + /* increase the counts for this symbol's new tokens */ + learn_symbol(table[i].sym, len); } } /* search the token with the maximum profit */ -static struct token *find_best_token(void) +static int find_best_token(void) { - struct token *ptr,*best,*head; - int bestprofit; + int i, best, bestprofit; bestprofit=-10000; + best = 0; - /* failsafe: if the "good" list is empty search from the "bad" list */ - if(good_head.right == &good_head) head = &bad_head; - else head = &good_head; - - ptr = head->right; - best = NULL; - while (ptr != head) { - if (ptr->profit > bestprofit) { - bestprofit = ptr->profit; - best = ptr; + for (i = 0; i < 0x10000; i++) { + if (token_profit[i] > bestprofit) { + best = i; + bestprofit = token_profit[i]; } - ptr = ptr->right; } - return best; } /* this is the core of the algorithm: calculate the "best" table */ static void optimize_result(void) { - struct token *best; - int i; + int i, best; /* using the '\0' symbol last allows compress_symbols to use standard * fast string functions */ @@ -632,14 +435,12 @@ static void optimize_result(void) best = find_best_token(); /* place it in the "best" table */ - best_table_len[i] = best->len; - memcpy(best_table[i], best->data, best_table_len[i]); - /* zero terminate the token so that we can use strstr - in compress_symbols */ - best_table[i][best_table_len[i]]='\0'; + best_table_len[i] = 2; + best_table[i][0] = best & 0xFF; + best_table[i][1] = (best >> 8) & 0xFF; /* replace this token in all the valid symbols */ - compress_symbols(best_table[i], best_table_len[i], i); + compress_symbols(best_table[i], i); } } } @@ -647,39 +448,28 @@ static void optimize_result(void) /* start by placing the symbols that are actually used on the table */ static void insert_real_symbols_in_table(void) { - int i, j, c; + unsigned int i, j, c; memset(best_table, 0, sizeof(best_table)); memset(best_table_len, 0, sizeof(best_table_len)); - for (i = 0; i < cnt; i++) { - if (table[i].flags & SYM_FLAG_VALID) { - for (j = 0; j < table[i].len; j++) { - c = table[i].sym[j]; - best_table[c][0]=c; - best_table_len[c]=1; - } + for (i = 0; i < table_cnt; i++) { + for (j = 0; j < table[i].len; j++) { + c = table[i].sym[j]; + best_table[c][0]=c; + best_table_len[c]=1; } } } static void optimize_token_table(void) { - memset(hash_table, 0, sizeof(hash_table)); - - good_head.left = &good_head; - good_head.right = &good_head; - - bad_head.left = &bad_head; - bad_head.right = &bad_head; - build_initial_tok_table(); insert_real_symbols_in_table(); /* When valid symbol is not registered, exit to error */ - if (good_head.left == good_head.right && - bad_head.left == bad_head.right) { + if (!table_cnt) { fprintf(stderr, "No valid symbol.\n"); exit(1); } @@ -688,8 +478,7 @@ static void optimize_token_table(void) } -int -main(int argc, char **argv) +int main(int argc, char **argv) { if (argc >= 2) { int i; |