aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Williamson <alex.williamson@hp.com>2007-08-16 10:47:33 -0600
committerAlex Williamson <alex.williamson@hp.com>2007-08-16 10:47:33 -0600
commitf628da7abf0393892246e568ae2090541cd72a4c (patch)
tree88999035fe4cfc22dcadb3742cbca11fc82b266c
parenta787af4ec3fd8874011ae0e9750fdb481a92319a (diff)
parente28e04d59312ec8cef673ffdca34231648c03431 (diff)
downloadxen-f628da7abf0393892246e568ae2090541cd72a4c.tar.gz
xen-f628da7abf0393892246e568ae2090541cd72a4c.tar.bz2
xen-f628da7abf0393892246e568ae2090541cd72a4c.zip
merge with xen-unstable.hg
-rw-r--r--tools/libfsimage/fat/fsys_fat.c6
-rw-r--r--tools/libxc/xc_domain_restore.c35
-rw-r--r--tools/libxc/xc_evtchn.c7
-rw-r--r--tools/libxc/xc_linux.c28
-rw-r--r--tools/libxc/xc_solaris.c28
-rw-r--r--tools/libxc/xenctrl.h134
-rw-r--r--tools/python/xen/util/acmpolicy.py21
-rw-r--r--tools/python/xen/util/security.py64
-rw-r--r--tools/python/xen/xend/XendCheckpoint.py3
-rw-r--r--tools/python/xen/xend/XendDomainInfo.py110
-rw-r--r--tools/python/xen/xend/server/DevController.py71
-rw-r--r--tools/python/xen/xend/server/blkif.py18
-rw-r--r--tools/python/xen/xm/main.py5
-rw-r--r--tools/xm-test/configure.ac7
-rw-r--r--tools/xm-test/lib/XmTestLib/NetConfig.py14
-rw-r--r--tools/xm-test/lib/XmTestLib/XenDevice.py2
-rw-r--r--tools/xm-test/lib/XmTestLib/config.py.in1
-rw-r--r--unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c3
-rw-r--r--xen/acm/acm_policy.c12
-rw-r--r--xen/arch/x86/acpi/boot.c15
-rw-r--r--xen/arch/x86/hvm/hvm.c185
-rw-r--r--xen/arch/x86/hvm/svm/svm.c514
-rw-r--r--xen/arch/x86/hvm/svm/vmcb.c18
-rw-r--r--xen/arch/x86/hvm/vioapic.c4
-rw-r--r--xen/arch/x86/hvm/vmx/vmcs.c78
-rw-r--r--xen/arch/x86/hvm/vmx/vmx.c562
-rw-r--r--xen/arch/x86/hvm/vmx/x86_32/exits.S2
-rw-r--r--xen/arch/x86/hvm/vmx/x86_64/exits.S2
-rw-r--r--xen/arch/x86/mm.c4
-rw-r--r--xen/arch/x86/mm/hap/guest_walk.c2
-rw-r--r--xen/arch/x86/mm/hap/hap.c37
-rw-r--r--xen/arch/x86/mm/shadow/common.c4
-rw-r--r--xen/arch/x86/mm/shadow/multi.c25
-rw-r--r--xen/arch/x86/physdev.c19
-rw-r--r--xen/arch/x86/setup.c25
-rw-r--r--xen/arch/x86/string.c22
-rw-r--r--xen/arch/x86/traps.c28
-rw-r--r--xen/arch/x86/x86_32/asm-offsets.c2
-rw-r--r--xen/arch/x86/x86_32/traps.c7
-rw-r--r--xen/arch/x86/x86_64/asm-offsets.c2
-rw-r--r--xen/arch/x86/x86_64/compat/traps.c19
-rw-r--r--xen/arch/x86/x86_64/traps.c4
-rw-r--r--xen/common/domctl.c8
-rw-r--r--xen/common/page_alloc.c9
-rw-r--r--xen/common/xencomm.c181
-rw-r--r--xen/drivers/char/console.c12
-rw-r--r--xen/drivers/video/Makefile12
-rw-r--r--xen/drivers/video/vesa.c307
-rw-r--r--xen/drivers/video/vga.c108
-rw-r--r--xen/include/asm-x86/hvm/hvm.h72
-rw-r--r--xen/include/asm-x86/hvm/support.h4
-rw-r--r--xen/include/asm-x86/hvm/svm/asid.h14
-rw-r--r--xen/include/asm-x86/hvm/svm/vmcb.h5
-rw-r--r--xen/include/asm-x86/hvm/vcpu.h13
-rw-r--r--xen/include/asm-x86/hvm/vmx/vmcs.h6
-rw-r--r--xen/include/asm-x86/hvm/vmx/vmx.h4
-rw-r--r--xen/include/public/arch-x86/xen-x86_32.h36
-rw-r--r--xen/include/xen/vga.h8
-rw-r--r--xen/include/xen/xencomm.h15
59 files changed, 1555 insertions, 1408 deletions
diff --git a/tools/libfsimage/fat/fsys_fat.c b/tools/libfsimage/fat/fsys_fat.c
index 9d74bdf1a9..a0afb87f49 100644
--- a/tools/libfsimage/fat/fsys_fat.c
+++ b/tools/libfsimage/fat/fsys_fat.c
@@ -228,15 +228,15 @@ fat_read (fsi_file_t *ffi, char *buf, int len)
if (!devread (ffi, sector, 0, FAT_CACHE_SIZE, (char*) FAT_BUF))
return 0;
}
- next_cluster = * (unsigned long *) (FAT_BUF + (cached_pos >> 1));
+ next_cluster = ((__u16 *) (FAT_BUF + (cached_pos >> 1)))[0];
if (FAT_SUPER->fat_size == 3)
{
if (cached_pos & 1)
next_cluster >>= 4;
next_cluster &= 0xFFF;
}
- else if (FAT_SUPER->fat_size == 4)
- next_cluster &= 0xFFFF;
+ else if (FAT_SUPER->fat_size > 4)
+ next_cluster |= ((__u16 *) (FAT_BUF + (cached_pos >> 1)))[1] << 16;
if (next_cluster >= FAT_SUPER->clust_eof_marker)
return ret;
diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
index e7ff90dcfd..3a26669aa7 100644
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -272,7 +272,8 @@ int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
/* The new domain's shared-info frame number. */
unsigned long shared_info_frame;
unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
- shared_info_t *shared_info = (shared_info_t *)shared_info_page;
+ shared_info_t *old_shared_info = (shared_info_t *)shared_info_page;
+ shared_info_t *new_shared_info;
/* A copy of the CPU context of the guest. */
vcpu_guest_context_t ctxt;
@@ -286,9 +287,6 @@ int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
/* Types of the pfns in the current region */
unsigned long region_pfn_type[MAX_BATCH_SIZE];
- /* A temporary mapping, and a copy, of one frame of guest memory. */
- unsigned long *page = NULL;
-
/* A copy of the pfn-to-mfn table frame list. */
xen_pfn_t *p2m_frame_list = NULL;
@@ -1084,17 +1082,30 @@ int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
goto out;
}
+ /* Restore contents of shared-info page. No checking needed. */
+ new_shared_info = xc_map_foreign_range(
+ xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame);
+
+ /* restore saved vcpu_info and arch specific info */
+ memcpy(&new_shared_info->vcpu_info,
+ &old_shared_info->vcpu_info,
+ sizeof(new_shared_info->vcpu_info));
+ memcpy(&new_shared_info->arch,
+ &old_shared_info->arch,
+ sizeof(new_shared_info->arch));
+
/* clear any pending events and the selector */
- memset(&(shared_info->evtchn_pending[0]), 0,
- sizeof (shared_info->evtchn_pending));
+ memset(&(new_shared_info->evtchn_pending[0]), 0,
+ sizeof (new_shared_info->evtchn_pending));
for ( i = 0; i < MAX_VIRT_CPUS; i++ )
- shared_info->vcpu_info[i].evtchn_pending_sel = 0;
+ new_shared_info->vcpu_info[i].evtchn_pending_sel = 0;
- /* Copy saved contents of shared-info page. No checking needed. */
- page = xc_map_foreign_range(
- xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame);
- memcpy(page, shared_info, PAGE_SIZE);
- munmap(page, PAGE_SIZE);
+ /* mask event channels */
+ memset(&(new_shared_info->evtchn_mask[0]), 0xff,
+ sizeof (new_shared_info->evtchn_mask));
+
+ /* leave wallclock time. set by hypervisor */
+ munmap(new_shared_info, PAGE_SIZE);
/* Uncanonicalise the pfn-to-mfn table frame-number list. */
for ( i = 0; i < P2M_FL_ENTRIES; i++ )
diff --git a/tools/libxc/xc_evtchn.c b/tools/libxc/xc_evtchn.c
index c0f3b9b54c..0992a7bdbb 100644
--- a/tools/libxc/xc_evtchn.c
+++ b/tools/libxc/xc_evtchn.c
@@ -33,9 +33,10 @@ static int do_evtchn_op(int xc_handle, int cmd, void *arg, size_t arg_size)
}
-int xc_evtchn_alloc_unbound(int xc_handle,
- uint32_t dom,
- uint32_t remote_dom)
+evtchn_port_or_error_t
+xc_evtchn_alloc_unbound(int xc_handle,
+ uint32_t dom,
+ uint32_t remote_dom)
{
int rc;
struct evtchn_alloc_unbound arg = {
diff --git a/tools/libxc/xc_linux.c b/tools/libxc/xc_linux.c
index 440cb4f0f4..d12cf94c05 100644
--- a/tools/libxc/xc_linux.c
+++ b/tools/libxc/xc_linux.c
@@ -254,7 +254,8 @@ int xc_evtchn_notify(int xce_handle, evtchn_port_t port)
return ioctl(xce_handle, IOCTL_EVTCHN_NOTIFY, &notify);
}
-evtchn_port_t xc_evtchn_bind_unbound_port(int xce_handle, int domid)
+evtchn_port_or_error_t
+xc_evtchn_bind_unbound_port(int xce_handle, int domid)
{
struct ioctl_evtchn_bind_unbound_port bind;
@@ -263,8 +264,9 @@ evtchn_port_t xc_evtchn_bind_unbound_port(int xce_handle, int domid)
return ioctl(xce_handle, IOCTL_EVTCHN_BIND_UNBOUND_PORT, &bind);
}
-evtchn_port_t xc_evtchn_bind_interdomain(int xce_handle, int domid,
- evtchn_port_t remote_port)
+evtchn_port_or_error_t
+xc_evtchn_bind_interdomain(int xce_handle, int domid,
+ evtchn_port_t remote_port)
{
struct ioctl_evtchn_bind_interdomain bind;
@@ -274,22 +276,23 @@ evtchn_port_t xc_evtchn_bind_interdomain(int xce_handle, int domid,
return ioctl(xce_handle, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind);
}
-int xc_evtchn_unbind(int xce_handle, evtchn_port_t port)
+evtchn_port_or_error_t
+xc_evtchn_bind_virq(int xce_handle, unsigned int virq)
{
- struct ioctl_evtchn_unbind unbind;
+ struct ioctl_evtchn_bind_virq bind;
- unbind.port = port;
+ bind.virq = virq;
- return ioctl(xce_handle, IOCTL_EVTCHN_UNBIND, &unbind);
+ return ioctl(xce_handle, IOCTL_EVTCHN_BIND_VIRQ, &bind);
}
-evtchn_port_t xc_evtchn_bind_virq(int xce_handle, unsigned int virq)
+int xc_evtchn_unbind(int xce_handle, evtchn_port_t port)
{
- struct ioctl_evtchn_bind_virq bind;
+ struct ioctl_evtchn_unbind unbind;
- bind.virq = virq;
+ unbind.port = port;
- return ioctl(xce_handle, IOCTL_EVTCHN_BIND_VIRQ, &bind);
+ return ioctl(xce_handle, IOCTL_EVTCHN_UNBIND, &unbind);
}
static int dorw(int fd, char *data, size_t size, int do_write)
@@ -317,7 +320,8 @@ static int dorw(int fd, char *data, size_t size, int do_write)
return 0;
}
-evtchn_port_t xc_evtchn_pending(int xce_handle)
+evtchn_port_or_error_t
+xc_evtchn_pending(int xce_handle)
{
evtchn_port_t port;
diff --git a/tools/libxc/xc_solaris.c b/tools/libxc/xc_solaris.c
index 65008115ea..a39157028d 100644
--- a/tools/libxc/xc_solaris.c
+++ b/tools/libxc/xc_solaris.c
@@ -165,7 +165,8 @@ int xc_evtchn_notify(int xce_handle, evtchn_port_t port)
return ioctl(xce_handle, IOCTL_EVTCHN_NOTIFY, &notify);
}
-evtchn_port_t xc_evtchn_bind_unbound_port(int xce_handle, int domid)
+evtchn_port_or_error_t
+xc_evtchn_bind_unbound_port(int xce_handle, int domid)
{
struct ioctl_evtchn_bind_unbound_port bind;
@@ -174,8 +175,9 @@ evtchn_port_t xc_evtchn_bind_unbound_port(int xce_handle, int domid)
return ioctl(xce_handle, IOCTL_EVTCHN_BIND_UNBOUND_PORT, &bind);
}
-evtchn_port_t xc_evtchn_bind_interdomain(int xce_handle, int domid,
- evtchn_port_t remote_port)
+evtchn_port_or_error_t
+xc_evtchn_bind_interdomain(int xce_handle, int domid,
+ evtchn_port_t remote_port)
{
struct ioctl_evtchn_bind_interdomain bind;
@@ -185,22 +187,23 @@ evtchn_port_t xc_evtchn_bind_interdomain(int xce_handle, int domid,
return ioctl(xce_handle, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind);
}
-int xc_evtchn_unbind(int xce_handle, evtchn_port_t port)
+evtchn_port_or_error_t
+xc_evtchn_bind_virq(int xce_handle, unsigned int virq)
{
- struct ioctl_evtchn_unbind unbind;
+ struct ioctl_evtchn_bind_virq bind;
- unbind.port = port;
+ bind.virq = virq;
- return ioctl(xce_handle, IOCTL_EVTCHN_UNBIND, &unbind);
+ return ioctl(xce_handle, IOCTL_EVTCHN_BIND_VIRQ, &bind);
}
-evtchn_port_t xc_evtchn_bind_virq(int xce_handle, unsigned int virq)
+int xc_evtchn_unbind(int xce_handle, evtchn_port_t port)
{
- struct ioctl_evtchn_bind_virq bind;
+ struct ioctl_evtchn_unbind unbind;
- bind.virq = virq;
+ unbind.port = port;
- return ioctl(xce_handle, IOCTL_EVTCHN_BIND_VIRQ, &bind);
+ return ioctl(xce_handle, IOCTL_EVTCHN_UNBIND, &unbind);
}
static int dorw(int fd, char *data, size_t size, int do_write)
@@ -228,7 +231,8 @@ static int dorw(int fd, char *data, size_t size, int do_write)
return 0;
}
-evtchn_port_t xc_evtchn_pending(int xce_handle)
+evtchn_port_or_error_t
+xc_evtchn_pending(int xce_handle)
{
evtchn_port_t port;
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index 7d3a7ecc79..73ff16c2cf 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -449,6 +449,9 @@ int xc_domain_setdebugging(int xc_handle,
* EVENT CHANNEL FUNCTIONS
*/
+/* A port identifier is guaranteed to fit in 31 bits. */
+typedef int evtchn_port_or_error_t;
+
/**
* This function allocates an unbound port. Ports are named endpoints used for
* interdomain communication. This function is most useful in opening a
@@ -463,13 +466,78 @@ int xc_domain_setdebugging(int xc_handle,
* @parm remote_dom the ID of the domain who will later bind
* @return allocated port (in @dom) on success, -1 on failure
*/
-int xc_evtchn_alloc_unbound(int xc_handle,
- uint32_t dom,
- uint32_t remote_dom);
+evtchn_port_or_error_t
+xc_evtchn_alloc_unbound(int xc_handle,
+ uint32_t dom,
+ uint32_t remote_dom);
int xc_evtchn_reset(int xc_handle,
uint32_t dom);
+/*
+ * Return a handle to the event channel driver, or -1 on failure, in which case
+ * errno will be set appropriately.
+ */
+int xc_evtchn_open(void);
+
+/*
+ * Close a handle previously allocated with xc_evtchn_open().
+ */
+int xc_evtchn_close(int xce_handle);
+
+/*
+ * Return an fd that can be select()ed on for further calls to
+ * xc_evtchn_pending().
+ */
+int xc_evtchn_fd(int xce_handle);
+
+/*
+ * Notify the given event channel. Returns -1 on failure, in which case
+ * errno will be set appropriately.
+ */
+int xc_evtchn_notify(int xce_handle, evtchn_port_t port);
+
+/*
+ * Returns a new event port awaiting interdomain connection from the given
+ * domain ID, or -1 on failure, in which case errno will be set appropriately.
+ */
+evtchn_port_or_error_t
+xc_evtchn_bind_unbound_port(int xce_handle, int domid);
+
+/*
+ * Returns a new event port bound to the remote port for the given domain ID,
+ * or -1 on failure, in which case errno will be set appropriately.
+ */
+evtchn_port_or_error_t
+xc_evtchn_bind_interdomain(int xce_handle, int domid,
+ evtchn_port_t remote_port);
+
+/*
+ * Bind an event channel to the given VIRQ. Returns the event channel bound to
+ * the VIRQ, or -1 on failure, in which case errno will be set appropriately.
+ */
+evtchn_port_or_error_t
+xc_evtchn_bind_virq(int xce_handle, unsigned int virq);
+
+/*
+ * Unbind the given event channel. Returns -1 on failure, in which case errno
+ * will be set appropriately.
+ */
+int xc_evtchn_unbind(int xce_handle, evtchn_port_t port);
+
+/*
+ * Return the next event channel to become pending, or -1 on failure, in which
+ * case errno will be set appropriately.
+ */
+evtchn_port_or_error_t
+xc_evtchn_pending(int xce_handle);
+
+/*
+ * Unmask the given event channel. Returns -1 on failure, in which case errno
+ * will be set appropriately.
+ */
+int xc_evtchn_unmask(int xce_handle, evtchn_port_t port);
+
int xc_physdev_pci_access_modify(int xc_handle,
uint32_t domid,
int bus,
@@ -699,66 +767,6 @@ int xc_version(int xc_handle, int cmd, void *arg);
int xc_acm_op(int xc_handle, int cmd, void *arg, unsigned long arg_size);
-/*
- * Return a handle to the event channel driver, or -1 on failure, in which case
- * errno will be set appropriately.
- */
-int xc_evtchn_open(void);
-
-/*
- * Close a handle previously allocated with xc_evtchn_open().
- */
-int xc_evtchn_close(int xce_handle);
-
-/*
- * Return an fd that can be select()ed on for further calls to
- * xc_evtchn_pending().
- */
-int xc_evtchn_fd(int xce_handle);
-
-/*
- * Notify the given event channel. Returns -1 on failure, in which case
- * errno will be set appropriately.
- */
-int xc_evtchn_notify(int xce_handle, evtchn_port_t port);
-
-/*
- * Returns a new event port awaiting interdomain connection from the given
- * domain ID, or -1 on failure, in which case errno will be set appropriately.
- */
-evtchn_port_t xc_evtchn_bind_unbound_port(int xce_handle, int domid);
-
-/*
- * Returns a new event port bound to the remote port for the given domain ID,
- * or -1 on failure, in which case errno will be set appropriately.
- */
-evtchn_port_t xc_evtchn_bind_interdomain(int xce_handle, int domid,
- evtchn_port_t remote_port);
-
-/*
- * Unbind the given event channel. Returns -1 on failure, in which case errno
- * will be set appropriately.
- */
-int xc_evtchn_unbind(int xce_handle, evtchn_port_t port);
-
-/*
- * Bind an event channel to the given VIRQ. Returns the event channel bound to
- * the VIRQ, or -1 on failure, in which case errno will be set appropriately.
- */
-evtchn_port_t xc_evtchn_bind_virq(int xce_handle, unsigned int virq);
-
-/*
- * Return the next event channel to become pending, or -1 on failure, in which
- * case errno will be set appropriately.
- */
-evtchn_port_t xc_evtchn_pending(int xce_handle);
-
-/*
- * Unmask the given event channel. Returns -1 on failure, in which case errno
- * will be set appropriately.
- */
-int xc_evtchn_unmask(int xce_handle, evtchn_port_t port);
-
/**************************
* GRANT TABLE OPERATIONS *
**************************/
diff --git a/tools/python/xen/util/acmpolicy.py b/tools/python/xen/util/acmpolicy.py
index bb36cd7fa2..3ea1ac5272 100644
--- a/tools/python/xen/util/acmpolicy.py
+++ b/tools/python/xen/util/acmpolicy.py
@@ -191,20 +191,21 @@ class ACMPolicy(XSPolicy):
acmpol_old.policy_get_virtualmachinelabel_names_sorted()
del_array = ""
chg_array = ""
+
for o in oldvmnames:
if o not in newvmnames:
- old_idx = oldvmnames.index(o) + 1 # for _NULL_LABEL_
+ old_idx = oldvmnames.index(o)
if vmlabel_map.has_key(o):
#not a deletion, but a renaming
new = vmlabel_map[o]
- new_idx = newvmnames.index(new) + 1 # for _NULL_LABEL_
+ new_idx = newvmnames.index(new)
chg_array += struct.pack("ii", old_idx, new_idx)
else:
del_array += struct.pack("i", old_idx)
for v in newvmnames:
if v in oldvmnames:
- old_idx = oldvmnames.index(v) + 1 # for _NULL_LABEL_
- new_idx = newvmnames.index(v) + 1 # for _NULL_LABEL_
+ old_idx = oldvmnames.index(v)
+ new_idx = newvmnames.index(v)
if old_idx != new_idx:
chg_array += struct.pack("ii", old_idx, new_idx)
@@ -348,7 +349,7 @@ class ACMPolicy(XSPolicy):
ssidref = xsconstants.INVALID_SSIDREF
names = self.policy_get_virtualmachinelabel_names_sorted()
try:
- vmidx = names.index(vm_label) + 1 # for _NULL_LABEL_
+ vmidx = names.index(vm_label)
ssidref = (vmidx << 16) | vmidx
except:
pass
@@ -618,6 +619,9 @@ class ACMPolicy(XSPolicy):
vmnames.remove(bootstrap)
vmnames.sort()
vmnames.insert(0, bootstrap)
+ if ACM_LABEL_UNLABELED in vmnames:
+ vmnames.remove(ACM_LABEL_UNLABELED)
+ vmnames.insert(0, ACM_LABEL_UNLABELED)
return vmnames
def policy_get_virtualmachinelabel_names_sorted(self):
@@ -625,7 +629,10 @@ class ACMPolicy(XSPolicy):
label will be the first one in that list, followed
by an alphabetically sorted list of VM label names """
vmnames = self.policy_get_virtualmachinelabel_names()
- return self.policy_sort_virtualmachinelabel_names(vmnames)
+ res = self.policy_sort_virtualmachinelabel_names(vmnames)
+ if res[0] != ACM_LABEL_UNLABELED:
+ res.insert(0, ACM_LABEL_UNLABELED)
+ return res
def policy_get_virtualmachinelabels(self):
""" Get a list of all virtual machine labels in this policy """
@@ -906,7 +913,7 @@ class ACMPolicy(XSPolicy):
allvmtypes = self.policy_get_virtualmachinelabel_names_sorted()
except:
return None
- return allvmtypes[chwall_ref-1] # skip _NULL_LABEL_
+ return allvmtypes[chwall_ref]
def policy_get_domain_label_formatted(self, domid):
label = self.policy_get_domain_label(domid)
diff --git a/tools/python/xen/util/security.py b/tools/python/xen/util/security.py
index 18e6219f9b..1deaf9d914 100644
--- a/tools/python/xen/util/security.py
+++ b/tools/python/xen/util/security.py
@@ -838,13 +838,28 @@ def set_resource_label_xapi(resource, reslabel_xapi, oldlabel_xapi):
def is_resource_in_use(resource):
- """ Investigate all running domains whether they use this device """
+ """
+ Domain-0 'owns' resources of type 'VLAN', the rest are owned by
+ the guests.
+ """
from xen.xend import XendDomain
- dominfos = XendDomain.instance().list('all')
lst = []
- for dominfo in dominfos:
- if is_resource_in_use_by_dom(dominfo, resource):
- lst.append(dominfo)
+ if resource.startswith('vlan'):
+ from xen.xend.XendXSPolicyAdmin import XSPolicyAdminInstance
+ curpol = XSPolicyAdminInstance().get_loaded_policy()
+ policytype, label, policy = get_res_label(resource)
+ if curpol and \
+ policytype == xsconstants.ACM_POLICY_ID and \
+ policy == curpol.get_name() and \
+ label in curpol.policy_get_resourcelabel_names():
+ # VLAN is in use.
+ lst.append(XendDomain.instance().
+ get_vm_by_uuid(XendDomain.DOM0_UUID))
+ else:
+ dominfos = XendDomain.instance().list('all')
+ for dominfo in dominfos:
+ if is_resource_in_use_by_dom(dominfo, resource):
+ lst.append(dominfo)
return lst
def devices_equal(res1, res2, mustexist=True):
@@ -892,6 +907,10 @@ def get_domain_resources(dominfo):
if sec_lab:
resources[typ].append(sec_lab)
else:
+ # !!! This should really get the label of the domain
+ # or at least a resource label that has the same STE type
+ # as the domain has
+ from xen.util.acmpolicy import ACM_LABEL_UNLABELED
resources[typ].append("%s:%s:%s" %
(xsconstants.ACM_POLICY_ID,
active_policy,
@@ -924,7 +943,8 @@ def resources_compatible_with_vmlabel(xspol, dominfo, vmlabel):
def __resources_compatible_with_vmlabel(xspol, dominfo, vmlabel,
- access_control):
+ access_control,
+ is_policy_update=False):
"""
Check whether the resources' labels are compatible with the
given VM label. The access_control parameter provides a
@@ -955,15 +975,23 @@ def __resources_compatible_with_vmlabel(xspol, dominfo, vmlabel,
elif key in [ 'vif' ]:
for xapi_label in value:
label = xapi_label.split(":")
- if not collect_labels(reslabels, label, polname):
- return False
+ from xen.util.acmpolicy import ACM_LABEL_UNLABELED
+ if not (is_policy_update and \
+ label[2] == ACM_LABEL_UNLABELED):
+ if not collect_labels(reslabels, label, polname):
+ return False
else:
log.error("Unhandled device type: %s" % key)
return False
# Check that all resource labes have a common STE type with the
# vmlabel
- rc = xspol.policy_check_vmlabel_against_reslabels(vmlabel, reslabels)
+ if len(reslabels) > 0:
+ rc = xspol.policy_check_vmlabel_against_reslabels(vmlabel, reslabels)
+ else:
+ rc = True
+ log.info("vmlabel=%s, reslabels=%s, rc=%s" %
+ (vmlabel, reslabels, str(rc)))
return rc;
def set_resource_label(resource, policytype, policyref, reslabel, \
@@ -1234,11 +1262,12 @@ def change_acm_policy(bin_pol, del_array, chg_array,
compatible = __resources_compatible_with_vmlabel(new_acmpol,
dominfo,
new_vmlabel,
- access_control)
+ access_control,
+ is_policy_update=True)
log.info("Domain %s with new label '%s' can access its "
"resources? : %s" %
(name, new_vmlabel, str(compatible)))
- log.info("VM labels in new domain: %s" %
+ log.info("VM labels in new policy: %s" %
new_acmpol.policy_get_virtualmachinelabel_names())
if not compatible:
return (-xsconstants.XSERR_RESOURCE_ACCESS, "")
@@ -1252,11 +1281,16 @@ def change_acm_policy(bin_pol, del_array, chg_array,
sec_lab, new_seclab = labels
if sec_lab != new_seclab:
log.info("Updating domain %s to new label '%s'." % \
- (sec_lab, new_seclab))
+ (dominfo.getName(), new_seclab))
# This better be working!
- dominfo.set_security_label(new_seclab,
- sec_lab,
- new_acmpol)
+ res = dominfo.set_security_label(new_seclab,
+ sec_lab,
+ new_acmpol,
+ cur_acmpol)
+ if res[0] != xsconstants.XSERR_SUCCESS:
+ log.info("ERROR: Could not chg label on domain %s: %s" %
+ (dominfo.getName(),
+ xsconstants.xserr2string(-int(res[0]))))
finally:
log.info("----------------------------------------------")
mapfile_unlock()
diff --git a/tools/python/xen/xend/XendCheckpoint.py b/tools/python/xen/xend/XendCheckpoint.py
index 47b4cfba13..f88d8a8788 100644
--- a/tools/python/xen/xend/XendCheckpoint.py
+++ b/tools/python/xen/xend/XendCheckpoint.py
@@ -98,6 +98,9 @@ def save(fd, dominfo, network, live, dst, checkpoint=False):
log.info("Domain %d suspended.", dominfo.getDomid())
dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP3,
domain_name)
+ if hvm:
+ dominfo.image.saveDeviceModel()
+
tochild.write("done\n")
tochild.flush()
log.debug('Written done')
diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py
index 095c665214..3ca3f506d0 100644
--- a/tools/python/xen/xend/XendDomainInfo.py
+++ b/tools/python/xen/xend/XendDomainInfo.py
@@ -558,9 +558,64 @@ class XendDomainInfo:
for devclass in XendDevices.valid_devices():
self.getDeviceController(devclass).waitForDevices()
- def destroyDevice(self, deviceClass, devid, force = False):
- log.debug("dev = %s", devid)
- return self.getDeviceController(deviceClass).destroyDevice(devid, force)
+ def destroyDevice(self, deviceClass, devid, force = False, rm_cfg = False):
+ log.debug("XendDomainInfo.destroyDevice: deviceClass = %s, device = %s",
+ deviceClass, devid)
+
+ if rm_cfg:
+ # Convert devid to device number. A device number is
+ # needed to remove its configuration.
+ dev = self.getDeviceController(deviceClass).convertToDeviceNumber(devid)
+
+ # Save current sxprs. A device number and a backend
+ # path are needed to remove its configuration but sxprs
+ # do not have those after calling destroyDevice.
+ sxprs = self.getDeviceSxprs(deviceClass)
+
+ rc = None
+ if self.domid is not None:
+ rc = self.getDeviceController(deviceClass).destroyDevice(devid, force)
+ if not force and rm_cfg:
+ # The backend path, other than the device itself,
+ # has to be passed because its accompanied frontend
+ # path may be void until its removal is actually
+ # issued. It is probable because destroyDevice is
+ # issued first.
+ for dev_num, dev_info in sxprs:
+ dev_num = int(dev_num)
+ if dev_num == dev:
+ for x in dev_info:
+ if x[0] == 'backend':
+ backend = x[1]
+ break
+ break
+ self._waitForDevice_destroy(deviceClass, devid, backend)
+
+ if rm_cfg:
+ if deviceClass == 'vif':
+ if self.domid is not None:
+ for dev_num, dev_info in sxprs:
+ dev_num = int(dev_num)
+ if dev_num == dev:
+ for x in dev_info:
+ if x[0] == 'mac':
+ mac = x[1]
+ break
+ break
+ dev_info = self.getDeviceInfo_vif(mac)
+ else:
+ _, dev_info = sxprs[dev]
+ else: # 'vbd' or 'tap'
+ dev_info = self.getDeviceInfo_vbd(dev)
+ if dev_info is None:
+ return rc
+
+ dev_uuid = sxp.child_value(dev_info, 'uuid')
+ del self.info['devices'][dev_uuid]
+ self.info['%s_refs' % deviceClass].remove(dev_uuid)
+ xen.xend.XendDomain.instance().managed_config_save(self)
+
+ return rc
def getDeviceSxprs(self, deviceClass):
if self._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED):
@@ -574,6 +629,23 @@ class XendDomainInfo:
dev_num += 1
return sxprs
+ def getDeviceInfo_vif(self, mac):
+ for dev_type, dev_info in self.info.all_devices_sxpr():
+ if dev_type != 'vif':
+ continue
+ if mac == sxp.child_value(dev_info, 'mac'):
+ return dev_info
+
+ def getDeviceInfo_vbd(self, devid):
+ for dev_type, dev_info in self.info.all_devices_sxpr():
+ if dev_type != 'vbd' and dev_type != 'tap':
+ continue
+ dev = sxp.child_value(dev_info, 'dev')
+ dev = dev.split(':')[0]
+ dev = self.getDeviceController(dev_type).convertToDeviceNumber(dev)
+ if devid == dev:
+ return dev_info
+
def setMemoryTarget(self, target):
"""Set the memory target of this domain.
@@ -1112,8 +1184,6 @@ class XendDomainInfo:
self._clearRestart()
if reason == 'suspend':
- if self._stateGet() != DOM_STATE_SUSPENDED:
- self.image.saveDeviceModel()
self._stateSet(DOM_STATE_SUSPENDED)
# Don't destroy the domain. XendCheckpoint will do
# this once it has finished. However, stop watching
@@ -1321,6 +1391,10 @@ class XendDomainInfo:
deviceClass, config = self.info['devices'].get(dev_uuid)
self._waitForDevice(deviceClass, config['devid'])
+ def _waitForDevice_destroy(self, deviceClass, devid, backpath):
+ return self.getDeviceController(deviceClass).waitForDevice_destroy(
+ devid, backpath)
+
def _reconfigureDevice(self, deviceClass, devid, devconfig):
return self.getDeviceController(deviceClass).reconfigureDevice(
devid, devconfig)
@@ -2187,11 +2261,18 @@ class XendDomainInfo:
return self.metrics.get_uuid();
- def get_security_label(self):
+ def get_security_label(self, xspol=None):
+ """
+ Get the security label of a domain
+ @param xspol The policy to use when converting the ssid into
+ a label; only to be passed during the updating
+ of the policy
+ """
domid = self.getDomid()
- from xen.xend.XendXSPolicyAdmin import XSPolicyAdminInstance
- xspol = XSPolicyAdminInstance().get_loaded_policy()
+ if not xspol:
+ from xen.xend.XendXSPolicyAdmin import XSPolicyAdminInstance
+ xspol = XSPolicyAdminInstance().get_loaded_policy()
if domid == 0:
if xspol:
@@ -2202,7 +2283,8 @@ class XendDomainInfo:
label = self.info.get('security_label', '')
return label
- def set_security_label(self, seclab, old_seclab, xspol=None):
+ def set_security_label(self, seclab, old_seclab, xspol=None,
+ xspol_old=None):
"""
Set the security label of a domain from its old to
a new value.
@@ -2213,6 +2295,8 @@ class XendDomainInfo:
@param xspol An optional policy under which this
update should be done. If not given,
then the current active policy is used.
+ @param xspol_old The old policy; only to be passed during
+ the updating of a policy
@return Returns return code, a string with errors from
the hypervisor's operation, old label of the
domain
@@ -2223,6 +2307,7 @@ class XendDomainInfo:
new_ssidref = 0
domid = self.getDomid()
res_labels = None
+ is_policy_update = (xspol_old != None)
from xen.xend.XendXSPolicyAdmin import XSPolicyAdminInstance
from xen.util import xsconstants
@@ -2276,13 +2361,16 @@ class XendDomainInfo:
# Check that all used resources are accessible under the
# new label
- if not security.resources_compatible_with_vmlabel(xspol,
+ if not is_policy_update and \
+ not security.resources_compatible_with_vmlabel(xspol,
self, label):
return (-xsconstants.XSERR_BAD_LABEL, "", "", 0)
#Check label against expected one.
- old_label = self.get_security_label()
+ old_label = self.get_security_label(xspol_old)
if old_label != old_seclab:
+ log.info("old_label != old_seclab: %s != %s" %
+ (old_label, old_seclab))
return (-xsconstants.XSERR_BAD_LABEL, "", "", 0)
# relabel domain in the hypervisor
diff --git a/tools/python/xen/xend/server/DevController.py b/tools/python/xen/xend/server/DevController.py
index 0de81efa3a..927dabaa6f 100644
--- a/tools/python/xen/xend/server/DevController.py
+++ b/tools/python/xen/xend/server/DevController.py
@@ -28,17 +28,19 @@ from xen.xend.xenstore.xswatch import xswatch
import os
-DEVICE_CREATE_TIMEOUT = 100
+DEVICE_CREATE_TIMEOUT = 100
+DEVICE_DESTROY_TIMEOUT = 100
HOTPLUG_STATUS_NODE = "hotplug-status"
HOTPLUG_ERROR_NODE = "hotplug-error"
HOTPLUG_STATUS_ERROR = "error"
HOTPLUG_STATUS_BUSY = "busy"
-Connected = 1
-Error = 2
-Missing = 3
-Timeout = 4
-Busy = 5
+Connected = 1
+Error = 2
+Missing = 3
+Timeout = 4
+Busy = 5
+Disconnected = 6
xenbusState = {
'Unknown' : 0,
@@ -185,6 +187,18 @@ class DevController:
(devid, self.deviceClass, err))
+ def waitForDevice_destroy(self, devid, backpath):
+ log.debug("Waiting for %s - destroyDevice.", devid)
+
+ if not self.hotplug:
+ return
+
+ status = self.waitForBackend_destroy(backpath)
+
+ if status == Timeout:
+ raise VmError("Device %s (%s) could not be disconnected. " %
+ (devid, self.deviceClass))
+
def reconfigureDevice(self, devid, config):
"""Reconfigure the specified device.
@@ -209,12 +223,7 @@ class DevController:
here.
"""
- try:
- dev = int(devid)
- except ValueError:
- # Does devid contain devicetype/deviceid?
- # Propogate exception if unable to find an integer devid
- dev = int(type(devid) is str and devid.split('/')[-1] or None)
+ dev = self.convertToDeviceNumber(devid)
# Modify online status /before/ updating state (latter is watched by
# drivers, so this ordering avoids a race).
@@ -283,6 +292,15 @@ class DevController:
all_configs[devid] = config_dict
return all_configs
+
+ def convertToDeviceNumber(self, devid):
+ try:
+ return int(devid)
+ except ValueError:
+ # Does devid contain devicetype/deviceid?
+ # Propogate exception if unable to find an integer devid
+ return int(type(devid) is str and devid.split('/')[-1] or None)
+
## protected:
def getDeviceDetails(self, config):
@@ -513,6 +531,19 @@ class DevController:
return (Missing, None)
+ def waitForBackend_destroy(self, backpath):
+
+ statusPath = backpath + '/' + HOTPLUG_STATUS_NODE
+ ev = Event()
+ result = { 'status': Timeout }
+
+ xswatch(statusPath, deviceDestroyCallback, ev, result)
+
+ ev.wait(DEVICE_DESTROY_TIMEOUT)
+
+ return result['status']
+
+
def backendPath(self, backdom, devid):
"""Construct backend path given the backend domain and device id.
@@ -561,3 +592,19 @@ def hotplugStatusCallback(statusPath, ev, result):
ev.set()
return 0
+
+
+def deviceDestroyCallback(statusPath, ev, result):
+ log.debug("deviceDestroyCallback %s.", statusPath)
+
+ status = xstransact.Read(statusPath)
+
+ if status is None:
+ result['status'] = Disconnected
+ else:
+ return 1
+
+ log.debug("deviceDestroyCallback %d.", result['status'])
+
+ ev.set()
+ return 0
diff --git a/tools/python/xen/xend/server/blkif.py b/tools/python/xen/xend/server/blkif.py
index b63e05f2a5..24879ab38a 100644
--- a/tools/python/xen/xend/server/blkif.py
+++ b/tools/python/xen/xend/server/blkif.py
@@ -165,11 +165,23 @@ class BlkifController(DevController):
try:
DevController.destroyDevice(self, devid, force)
except ValueError:
- devid_end = type(devid) is str and devid.split('/')[-1] or None
+ dev = self.convertToDeviceNumber(devid)
for i in self.deviceIDs():
- d = self.readBackend(i, 'dev')
- if d == devid or (devid_end and d == devid_end):
+ if i == dev:
DevController.destroyDevice(self, i, force)
return
raise VmError("Device %s not connected" % devid)
+
+ def convertToDeviceNumber(self, devid):
+ try:
+ dev = int(devid)
+ except ValueError:
+ if type(devid) is not str:
+ raise VmError("devid %s is wrong type" % str(devid))
+ try:
+ dev = devid.split('/')[-1]
+ dev = int(dev)
+ except ValueError:
+ dev = blkif.blkdev_name_to_number(dev)
+ return dev
diff --git a/tools/python/xen/xm/main.py b/tools/python/xen/xm/main.py
index dd0202920a..03a1f0820b 100644
--- a/tools/python/xen/xm/main.py
+++ b/tools/python/xen/xm/main.py
@@ -876,7 +876,7 @@ def parse_doms_info(info):
if len(tmp) != 3:
seclabel = ""
else:
- seclabel = tmp[2]
+ seclabel = security_label
parsed_info['seclabel'] = seclabel
if serverType == SERVER_XEN_API:
@@ -2186,6 +2186,7 @@ def xm_network_attach(args):
def detach(args, deviceClass):
+ rm_cfg = True
dom = args[0]
dev = args[1]
try:
@@ -2196,7 +2197,7 @@ def detach(args, deviceClass):
except IndexError:
force = None
- server.xend.domain.destroyDevice(dom, deviceClass, dev, force)
+ server.xend.domain.destroyDevice(dom, deviceClass, dev, force, rm_cfg)
def xm_block_detach(args):
diff --git a/tools/xm-test/configure.ac b/tools/xm-test/configure.ac
index d3c651a250..14aee142b7 100644
--- a/tools/xm-test/configure.ac
+++ b/tools/xm-test/configure.ac
@@ -85,6 +85,13 @@ AC_SUBST(NET_IP_RANGE)
AC_SUBST(NETWORK_ADDRESS)
AC_SUBST(NETMASK)
+DOM0_INTF="vif0.0"
+AC_ARG_WITH(dom0-intf,
+ [ --with-dom0-intf=intf Set dom0 interface name [[default="vif0.0"]]],
+ [ DOM0_INTF="$withval" ])
+
+AC_SUBST(DOM0_INTF)
+
AC_ARG_WITH(hvm-kernel,
[[ --with-hvm-kernel=kernel Use this kernel for hvm disk.img testing]],
HVMKERNEL=$withval,
diff --git a/tools/xm-test/lib/XmTestLib/NetConfig.py b/tools/xm-test/lib/XmTestLib/NetConfig.py
index 652db573f6..fe0cfb429f 100644
--- a/tools/xm-test/lib/XmTestLib/NetConfig.py
+++ b/tools/xm-test/lib/XmTestLib/NetConfig.py
@@ -104,8 +104,8 @@ class NetConfig:
if self.network == "169.254.0.0":
checkZeroconfAddresses()
- # Clean out any aliases in the network range for vif0.0. If
- # an alias exists, a test xendevice add command could fail.
+ # Clean out any aliases in the network range for dom0's interface.
+ # If an alias exists, a test xendevice add command could fail.
if NETWORK_IP_RANGE != "dhcp":
self.__cleanDom0Aliases()
@@ -139,20 +139,22 @@ class NetConfig:
def __cleanDom0Aliases(self):
# Remove any aliases within the supplied network IP range on dom0
- scmd = 'ip addr show dev vif0.0'
+ scmd = 'ip addr show dev %s' % (DOM0_INTF)
status, output = traceCommand(scmd)
if status:
- raise NetworkError("Failed to show vif0.0 aliases: %d" % status)
+ raise NetworkError("Failed to show %s aliases: %d" %
+ (DOM0_INTF, status))
lines = output.split("\n")
for line in lines:
ip = re.search('(\d+\.\d+\.\d+\.\d+)', line)
if ip and self.isIPInRange(ip.group(1)) == True:
- dcmd = 'ip addr del %s dev vif0.0' % ip.group(1)
+ dcmd = 'ip addr del %s dev %s' % (ip.group(1), DOM0_INTF)
dstatus, doutput = traceCommand(dcmd)
if dstatus:
- raise NetworkError("Failed to remove vif0.0 aliases: %d" % status)
+ raise NetworkError("Failed to remove %s aliases: %d" %
+ (DOM0_INTF, status))
def getNetEnv(self):
return self.netenv
diff --git a/tools/xm-test/lib/XmTestLib/XenDevice.py b/tools/xm-test/lib/XmTestLib/XenDevice.py
index d899a5e4c8..79dfbfc73a 100644
--- a/tools/xm-test/lib/XmTestLib/XenDevice.py
+++ b/tools/xm-test/lib/XmTestLib/XenDevice.py
@@ -214,7 +214,7 @@ class XenNetDevice(XenDevice):
def removeDevice(self):
self.releaseNetDevIP()
- def addDom0AliasCmd(self, dev="vif0.0"):
+ def addDom0AliasCmd(self, dev=DOM0_INTF):
# Method to add start and remove dom0 alias cmds
acmd = 'ip addr add %s dev %s' % (self.dom0_alias_ip, dev)
rcmd = 'ip addr del %s dev %s' % (self.dom0_alias_ip, dev)
diff --git a/tools/xm-test/lib/XmTestLib/config.py.in b/tools/xm-test/lib/XmTestLib/config.py.in
index 986eb36aff..21c6cc5585 100644
--- a/tools/xm-test/lib/XmTestLib/config.py.in
+++ b/tools/xm-test/lib/XmTestLib/config.py.in
@@ -4,3 +4,4 @@ ENABLE_HVM_SUPPORT = @ENABLE_HVM@
NETWORK_IP_RANGE = "@NET_IP_RANGE@"
NETWORK = "@NETWORK_ADDRESS@"
NETMASK = "@NETMASK@"
+DOM0_INTF = "@DOM0_INTF@"
diff --git a/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c b/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c
index bd58d2ceb5..33c468d594 100644
--- a/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c
+++ b/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c
@@ -1,5 +1,6 @@
#include <linux/config.h>
-#include <linux/stop_machine.h>
+#include <linux/cpumask.h>
+#include <linux/preempt.h>
#include <xen/evtchn.h>
#include <xen/gnttab.h>
#include <xen/xenbus.h>
diff --git a/xen/acm/acm_policy.c b/xen/acm/acm_policy.c
index 23c5bbc79c..e0c7bce544 100644
--- a/xen/acm/acm_policy.c
+++ b/xen/acm/acm_policy.c
@@ -710,12 +710,12 @@ acm_change_policy(struct acm_change_policy *chgpolicy)
goto acm_chg_policy_exit;
}
- if ( copy_from_guest((u8 *)dels.array,
+ if ( copy_from_guest(dels.array,
chgpolicy->del_array,
- chgpolicy->delarray_size) ||
- copy_from_guest((u8 *)ssidmap.array,
+ dels.num_items) ||
+ copy_from_guest(ssidmap.array,
chgpolicy->chg_array,
- chgpolicy->chgarray_size) ||
+ ssidmap.num_items) ||
copy_from_guest(binpolicy,
chgpolicy->policy_pushcache,
chgpolicy->policy_pushcache_size ))
@@ -844,9 +844,9 @@ acm_relabel_domains(struct acm_relabel_doms *relabel)
memset(errors.array, 0x0, sizeof(uint32_t) * errors.num_items);
}
- if ( copy_from_guest((u8 *)relabels.array,
+ if ( copy_from_guest(relabels.array,
relabel->relabel_map,
- relabel->relabel_map_size) )
+ relabels.num_items) )
{
rc = -EFAULT;
goto acm_relabel_doms_exit;
diff --git a/xen/arch/x86/acpi/boot.c b/xen/arch/x86/acpi/boot.c
index 85aa49414f..f3ce4119df 100644
--- a/xen/arch/x86/acpi/boot.c
+++ b/xen/arch/x86/acpi/boot.c
@@ -423,7 +423,7 @@ acpi_fadt_parse_sleep_info(struct fadt_descriptor_rev2 *fadt)
goto bad;
if (strncmp(facs->signature, "FACS", 4)) {
- printk(KERN_ERR PREFIX "Invalid FACS signature %s\n",
+ printk(KERN_ERR PREFIX "Invalid FACS signature %.4s\n",
facs->signature);
goto bad;
}
@@ -451,12 +451,13 @@ acpi_fadt_parse_sleep_info(struct fadt_descriptor_rev2 *fadt)
acpi_sinfo.vector_width = 64;
}
- printk (KERN_INFO PREFIX
- "ACPI SLEEP INFO: pm1x_cnt[%x,%x], pm1x_evt[%x,%x]\n"
- " wakeup_vec[%"PRIx64"], vec_size[%x]\n",
- acpi_sinfo.pm1a_cnt, acpi_sinfo.pm1b_cnt,
- acpi_sinfo.pm1a_evt, acpi_sinfo.pm1b_cnt,
- acpi_sinfo.wakeup_vector, acpi_sinfo.vector_width);
+ printk(KERN_INFO PREFIX
+ "ACPI SLEEP INFO: pm1x_cnt[%x,%x], pm1x_evt[%x,%x]\n",
+ acpi_sinfo.pm1a_cnt, acpi_sinfo.pm1b_cnt,
+ acpi_sinfo.pm1a_evt, acpi_sinfo.pm1b_cnt);
+ printk(KERN_INFO PREFIX
+ " wakeup_vec[%"PRIx64"], vec_size[%x]\n",
+ acpi_sinfo.wakeup_vector, acpi_sinfo.vector_width);
return;
bad:
memset(&acpi_sinfo, 0, sizeof(acpi_sinfo));
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 27b544598d..0961f3372b 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -76,13 +76,6 @@ void hvm_enable(struct hvm_function_table *fns)
hvm_enabled = 1;
}
-void hvm_stts(struct vcpu *v)
-{
- /* FPU state already dirty? Then no need to setup_fpu() lazily. */
- if ( !v->fpu_dirtied )
- hvm_funcs.stts(v);
-}
-
void hvm_set_guest_time(struct vcpu *v, u64 gtime)
{
u64 host_tsc;
@@ -112,7 +105,8 @@ void hvm_do_resume(struct vcpu *v)
{
ioreq_t *p;
- hvm_stts(v);
+ if ( !v->fpu_dirtied )
+ hvm_funcs.stts(v);
pt_thaw_time(v);
@@ -520,6 +514,174 @@ void hvm_triple_fault(void)
domain_shutdown(v->domain, SHUTDOWN_reboot);
}
+int hvm_set_cr0(unsigned long value)
+{
+ struct vcpu *v = current;
+ unsigned long mfn, old_base_mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];
+
+ HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
+
+ if ( (u32)value != value )
+ {
+ HVM_DBG_LOG(DBG_LEVEL_1,
+ "Guest attempts to set upper 32 bits in CR0: %lx",
+ value);
+ hvm_inject_exception(TRAP_gp_fault, 0, 0);
+ return 0;
+ }
+
+ value &= ~HVM_CR0_GUEST_RESERVED_BITS;
+
+ /* ET is reserved and should be always be 1. */
+ value |= X86_CR0_ET;
+
+ if ( (value & (X86_CR0_PE|X86_CR0_PG)) == X86_CR0_PG )
+ {
+ hvm_inject_exception(TRAP_gp_fault, 0, 0);
+ return 0;
+ }
+
+ if ( (value & X86_CR0_PG) && !(old_value & X86_CR0_PG) )
+ {
+ if ( v->arch.hvm_vcpu.guest_efer & EFER_LME )
+ {
+ if ( !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PAE) )
+ {
+ HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable");
+ hvm_inject_exception(TRAP_gp_fault, 0, 0);
+ return 0;
+ }
+ HVM_DBG_LOG(DBG_LEVEL_1, "Enabling long mode");
+ v->arch.hvm_vcpu.guest_efer |= EFER_LMA;
+ hvm_update_guest_efer(v);
+ }
+
+ if ( !paging_mode_hap(v->domain) )
+ {
+ /* The guest CR3 must be pointing to the guest physical. */
+ mfn = get_mfn_from_gpfn(v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT);
+ if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain))
+ {
+ gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n",
+ v->arch.hvm_vcpu.guest_cr[3], mfn);
+ domain_crash(v->domain);
+ return 0;
+ }
+
+ /* Now arch.guest_table points to machine physical. */
+ old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+ v->arch.guest_table = pagetable_from_pfn(mfn);
+ if ( old_base_mfn )
+ put_page(mfn_to_page(old_base_mfn));
+
+ HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
+ v->arch.hvm_vcpu.guest_cr[3], mfn);
+ }
+ }
+ else if ( !(value & X86_CR0_PG) && (old_value & X86_CR0_PG) )
+ {
+ /* When CR0.PG is cleared, LMA is cleared immediately. */
+ if ( hvm_long_mode_enabled(v) )
+ {
+ v->arch.hvm_vcpu.guest_efer &= ~EFER_LMA;
+ hvm_update_guest_efer(v);
+ }
+
+ if ( !paging_mode_hap(v->domain) )
+ {
+ put_page(mfn_to_page(get_mfn_from_gpfn(
+ v->arch.hvm_vcpu.guest_cr[3] >> PAGE_SHIFT)));
+ v->arch.guest_table = pagetable_null();
+ }
+ }
+
+ v->arch.hvm_vcpu.guest_cr[0] = value;
+ hvm_update_guest_cr(v, 0);
+
+ if ( (value ^ old_value) & X86_CR0_PG )
+ paging_update_paging_modes(v);
+
+ return 1;
+}
+
+int hvm_set_cr3(unsigned long value)
+{
+ unsigned long old_base_mfn, mfn;
+ struct vcpu *v = current;
+
+ if ( paging_mode_hap(v->domain) || !hvm_paging_enabled(v) )
+ {
+ /* Nothing to do. */
+ }
+ else if ( value == v->arch.hvm_vcpu.guest_cr[3] )
+ {
+ /* Shadow-mode TLB flush. Invalidate the shadow. */
+ mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
+ if ( mfn != pagetable_get_pfn(v->arch.guest_table) )
+ goto bad_cr3;
+ }
+ else
+ {
+ /* Shadow-mode CR3 change. Check PDBR and then make a new shadow. */
+ HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
+ mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
+ if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
+ goto bad_cr3;
+
+ old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+ v->arch.guest_table = pagetable_from_pfn(mfn);
+
+ if ( old_base_mfn )
+ put_page(mfn_to_page(old_base_mfn));
+
+ HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
+ }
+
+ v->arch.hvm_vcpu.guest_cr[3] = value;
+ paging_update_cr3(v);
+ return 1;
+
+ bad_cr3:
+ gdprintk(XENLOG_ERR, "Invalid CR3\n");
+ domain_crash(v->domain);
+ return 0;
+}
+
+int hvm_set_cr4(unsigned long value)
+{
+ struct vcpu *v = current;
+ unsigned long old_cr;
+
+ if ( value & HVM_CR4_GUEST_RESERVED_BITS )
+ {
+ HVM_DBG_LOG(DBG_LEVEL_1,
+ "Guest attempts to set reserved bit in CR4: %lx",
+ value);
+ goto gpf;
+ }
+
+ if ( !(value & X86_CR4_PAE) && hvm_long_mode_enabled(v) )
+ {
+ HVM_DBG_LOG(DBG_LEVEL_1, "Guest cleared CR4.PAE while "
+ "EFER.LMA is set");
+ goto gpf;
+ }
+
+ old_cr = v->arch.hvm_vcpu.guest_cr[4];
+ v->arch.hvm_vcpu.guest_cr[4] = value;
+ hvm_update_guest_cr(v, 4);
+
+ /* Modifying CR4.{PSE,PAE,PGE} invalidates all TLB entries, inc. Global. */
+ if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )
+ paging_update_paging_modes(v);
+
+ return 1;
+
+ gpf:
+ hvm_inject_exception(TRAP_gp_fault, 0, 0);
+ return 0;
+}
+
/*
* __hvm_copy():
* @buf = hypervisor buffer
@@ -668,7 +830,6 @@ typedef unsigned long hvm_hypercall_t(
static hvm_hypercall_t *hvm_hypercall32_table[NR_hypercalls] = {
HYPERCALL(memory_op),
[ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
- HYPERCALL(multicall),
HYPERCALL(xen_version),
HYPERCALL(grant_table_op),
HYPERCALL(event_channel_op),
@@ -813,12 +974,6 @@ int hvm_do_hypercall(struct cpu_user_regs *regs)
flush ? HVM_HCALL_invalidate : HVM_HCALL_completed);
}
-void hvm_update_guest_cr3(struct vcpu *v, unsigned long guest_cr3)
-{
- v->arch.hvm_vcpu.hw_cr3 = guest_cr3;
- hvm_funcs.update_guest_cr3(v);
-}
-
static void hvm_latch_shinfo_size(struct domain *d)
{
/*
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 4578aaa616..597f976a41 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -59,8 +59,9 @@ int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
int inst_len);
asmlinkage void do_IRQ(struct cpu_user_regs *);
-static int svm_reset_to_realmode(struct vcpu *v,
- struct cpu_user_regs *regs);
+static int svm_reset_to_realmode(
+ struct vcpu *v, struct cpu_user_regs *regs);
+static void svm_update_guest_cr(struct vcpu *v, unsigned int cr);
/* va of hardware host save area */
static void *hsa[NR_CPUS] __read_mostly;
@@ -78,7 +79,7 @@ static void svm_inject_exception(
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
if ( trap == TRAP_page_fault )
- HVMTRACE_2D(PF_INJECT, v, v->arch.hvm_svm.cpu_cr2, error_code);
+ HVMTRACE_2D(PF_INJECT, v, v->arch.hvm_vcpu.guest_cr[2], error_code);
else
HVMTRACE_2D(INJ_EXC, v, trap, error_code);
@@ -97,55 +98,14 @@ static void svm_cpu_down(void)
write_efer(read_efer() & ~EFER_SVME);
}
-#ifdef __x86_64__
-
static int svm_lme_is_set(struct vcpu *v)
{
- u64 guest_efer = v->arch.hvm_svm.cpu_shadow_efer;
+#ifdef __x86_64__
+ u64 guest_efer = v->arch.hvm_vcpu.guest_efer;
return guest_efer & EFER_LME;
-}
-
-static int svm_long_mode_enabled(struct vcpu *v)
-{
- u64 guest_efer = v->arch.hvm_svm.cpu_shadow_efer;
- return guest_efer & EFER_LMA;
-}
-
-#else /* __i386__ */
-
-static int svm_lme_is_set(struct vcpu *v)
-{ return 0; }
-static int svm_long_mode_enabled(struct vcpu *v)
-{ return 0; }
-
+#else
+ return 0;
#endif
-
-static int svm_cr4_pae_is_set(struct vcpu *v)
-{
- unsigned long guest_cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
- return guest_cr4 & X86_CR4_PAE;
-}
-
-static int svm_paging_enabled(struct vcpu *v)
-{
- unsigned long guest_cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
- return (guest_cr0 & X86_CR0_PE) && (guest_cr0 & X86_CR0_PG);
-}
-
-static int svm_pae_enabled(struct vcpu *v)
-{
- unsigned long guest_cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
- return svm_paging_enabled(v) && (guest_cr4 & X86_CR4_PAE);
-}
-
-static int svm_nx_enabled(struct vcpu *v)
-{
- return v->arch.hvm_svm.cpu_shadow_efer & EFER_NX;
-}
-
-static int svm_pgbit_test(struct vcpu *v)
-{
- return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
}
static void svm_store_cpu_guest_regs(
@@ -165,10 +125,10 @@ static void svm_store_cpu_guest_regs(
if ( crs != NULL )
{
/* Returning the guest's regs */
- crs[0] = v->arch.hvm_svm.cpu_shadow_cr0;
- crs[2] = v->arch.hvm_svm.cpu_cr2;
- crs[3] = v->arch.hvm_svm.cpu_cr3;
- crs[4] = v->arch.hvm_svm.cpu_shadow_cr4;
+ crs[0] = v->arch.hvm_vcpu.guest_cr[0];
+ crs[2] = v->arch.hvm_vcpu.guest_cr[2];
+ crs[3] = v->arch.hvm_vcpu.guest_cr[3];
+ crs[4] = v->arch.hvm_vcpu.guest_cr[4];
}
}
@@ -202,7 +162,8 @@ static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
if ( (msr_content & EFER_LME) && !svm_lme_is_set(v) )
{
/* EFER.LME transition from 0 to 1. */
- if ( svm_paging_enabled(v) || !svm_cr4_pae_is_set(v) )
+ if ( hvm_paging_enabled(v) ||
+ !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PAE) )
{
gdprintk(XENLOG_WARNING, "Trying to set LME bit when "
"in paging mode or PAE bit is not set\n");
@@ -212,7 +173,7 @@ static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
else if ( !(msr_content & EFER_LME) && svm_lme_is_set(v) )
{
/* EFER.LME transistion from 1 to 0. */
- if ( svm_paging_enabled(v) )
+ if ( hvm_paging_enabled(v) )
{
gdprintk(XENLOG_WARNING,
"Trying to clear EFER.LME while paging enabled\n");
@@ -220,9 +181,9 @@ static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
}
}
- v->arch.hvm_svm.cpu_shadow_efer = msr_content;
+ v->arch.hvm_vcpu.guest_efer = msr_content;
vmcb->efer = msr_content | EFER_SVME;
- if ( !svm_paging_enabled(v) )
+ if ( !hvm_paging_enabled(v) )
vmcb->efer &= ~(EFER_LME | EFER_LMA);
break;
@@ -297,10 +258,10 @@ int svm_vmcb_save(struct vcpu *v, struct hvm_hw_cpu *c)
c->rsp = vmcb->rsp;
c->rflags = vmcb->rflags;
- c->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
- c->cr2 = v->arch.hvm_svm.cpu_cr2;
- c->cr3 = v->arch.hvm_svm.cpu_cr3;
- c->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
+ c->cr0 = v->arch.hvm_vcpu.guest_cr[0];
+ c->cr2 = v->arch.hvm_vcpu.guest_cr[2];
+ c->cr3 = v->arch.hvm_vcpu.guest_cr[3];
+ c->cr4 = v->arch.hvm_vcpu.guest_cr[4];
#ifdef HVM_DEBUG_SUSPEND
printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
@@ -383,58 +344,31 @@ int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
vmcb->rsp = c->rsp;
vmcb->rflags = c->rflags;
- v->arch.hvm_svm.cpu_shadow_cr0 = c->cr0;
- vmcb->cr0 = c->cr0 | X86_CR0_WP | X86_CR0_ET | X86_CR0_PG;
-
- v->arch.hvm_svm.cpu_cr2 = c->cr2;
+ v->arch.hvm_vcpu.guest_cr[0] = c->cr0 | X86_CR0_ET;
+ v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
+ v->arch.hvm_vcpu.guest_cr[3] = c->cr3;
+ v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
+ svm_update_guest_cr(v, 0);
+ svm_update_guest_cr(v, 2);
+ svm_update_guest_cr(v, 4);
#ifdef HVM_DEBUG_SUSPEND
printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
- __func__,
- c->cr3,
- c->cr0,
- c->cr4);
+ __func__, c->cr3, c->cr0, c->cr4);
#endif
- if ( !svm_paging_enabled(v) )
+ if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) )
{
- printk("%s: paging not enabled.\n", __func__);
- goto skip_cr3;
- }
-
- if ( c->cr3 == v->arch.hvm_svm.cpu_cr3 )
- {
- /*
- * This is simple TLB flush, implying the guest has
- * removed some translation or changed page attributes.
- * We simply invalidate the shadow.
- */
- mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
- if ( mfn != pagetable_get_pfn(v->arch.guest_table) )
- goto bad_cr3;
- }
- else
- {
- /*
- * If different, make a shadow. Check if the PDBR is valid
- * first.
- */
- HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64, c->cr3);
+ HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 = %"PRIx64, c->cr3);
mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
goto bad_cr3;
-
old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
v->arch.guest_table = pagetable_from_pfn(mfn);
- if (old_base_mfn)
+ if ( old_base_mfn )
put_page(mfn_to_page(old_base_mfn));
- v->arch.hvm_svm.cpu_cr3 = c->cr3;
}
- skip_cr3:
- vmcb->cr4 = c->cr4 | HVM_CR4_HOST_MASK;
- v->arch.hvm_svm.cpu_shadow_cr4 = c->cr4;
-
vmcb->idtr.limit = c->idtr_limit;
vmcb->idtr.base = c->idtr_base;
@@ -488,10 +422,6 @@ int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
if ( paging_mode_hap(v->domain) )
{
- vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
- vmcb->cr4 = (v->arch.hvm_svm.cpu_shadow_cr4 |
- (HVM_CR4_HOST_MASK & ~X86_CR4_PAE));
- vmcb->cr3 = c->cr3;
vmcb->np_enable = 1;
vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
@@ -521,7 +451,6 @@ int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
}
paging_update_paging_modes(v);
- svm_asid_g_update_paging(v);
return 0;
@@ -540,7 +469,7 @@ static void svm_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
data->msr_star = vmcb->star;
data->msr_cstar = vmcb->cstar;
data->msr_syscall_mask = vmcb->sfmask;
- data->msr_efer = v->arch.hvm_svm.cpu_shadow_efer;
+ data->msr_efer = v->arch.hvm_vcpu.guest_efer;
data->msr_flags = -1ULL;
data->tsc = hvm_get_guest_time(v);
@@ -556,7 +485,7 @@ static void svm_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
vmcb->star = data->msr_star;
vmcb->cstar = data->msr_cstar;
vmcb->sfmask = data->msr_syscall_mask;
- v->arch.hvm_svm.cpu_shadow_efer = data->msr_efer;
+ v->arch.hvm_vcpu.guest_efer = data->msr_efer;
vmcb->efer = data->msr_efer | EFER_SVME;
/* VMCB's EFER.LME isn't set unless we're actually in long mode
* (see long_mode_do_msr_write()) */
@@ -605,11 +534,11 @@ static int svm_guest_x86_mode(struct vcpu *v)
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- if ( unlikely(!(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PE)) )
+ if ( unlikely(!(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE)) )
return 0;
if ( unlikely(vmcb->rflags & X86_EFLAGS_VM) )
return 1;
- if ( svm_long_mode_enabled(v) && likely(vmcb->cs.attr.fields.l) )
+ if ( hvm_long_mode_enabled(v) && likely(vmcb->cs.attr.fields.l) )
return 8;
return (likely(vmcb->cs.attr.fields.db) ? 4 : 2);
}
@@ -619,9 +548,45 @@ static void svm_update_host_cr3(struct vcpu *v)
/* SVM doesn't have a HOST_CR3 equivalent to update. */
}
-static void svm_update_guest_cr3(struct vcpu *v)
+static void svm_update_guest_cr(struct vcpu *v, unsigned int cr)
+{
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+ switch ( cr )
+ {
+ case 0:
+ vmcb->cr0 = v->arch.hvm_vcpu.guest_cr[0];
+ if ( !paging_mode_hap(v->domain) )
+ vmcb->cr0 |= X86_CR0_PG | X86_CR0_WP;
+ break;
+ case 2:
+ vmcb->cr2 = v->arch.hvm_vcpu.guest_cr[2];
+ break;
+ case 3:
+ vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3];
+ svm_asid_inv_asid(v);
+ break;
+ case 4:
+ vmcb->cr4 = HVM_CR4_HOST_MASK;
+ if ( paging_mode_hap(v->domain) )
+ vmcb->cr4 &= ~X86_CR4_PAE;
+ vmcb->cr4 |= v->arch.hvm_vcpu.guest_cr[4];
+ break;
+ default:
+ BUG();
+ }
+}
+
+static void svm_update_guest_efer(struct vcpu *v)
{
- v->arch.hvm_svm.vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
+#ifdef __x86_64__
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+ if ( v->arch.hvm_vcpu.guest_efer & EFER_LMA )
+ vmcb->efer |= EFER_LME | EFER_LMA;
+ else
+ vmcb->efer &= ~(EFER_LME | EFER_LMA);
+#endif
}
static void svm_flush_guest_tlbs(void)
@@ -639,24 +604,6 @@ static void svm_update_vtpr(struct vcpu *v, unsigned long value)
vmcb->vintr.fields.tpr = value & 0x0f;
}
-static unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num)
-{
- switch ( num )
- {
- case 0:
- return v->arch.hvm_svm.cpu_shadow_cr0;
- case 2:
- return v->arch.hvm_svm.cpu_cr2;
- case 3:
- return v->arch.hvm_svm.cpu_cr3;
- case 4:
- return v->arch.hvm_svm.cpu_shadow_cr4;
- default:
- BUG();
- }
- return 0; /* dummy */
-}
-
static void svm_sync_vmcb(struct vcpu *v)
{
struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
@@ -674,7 +621,7 @@ static void svm_sync_vmcb(struct vcpu *v)
static unsigned long svm_get_segment_base(struct vcpu *v, enum x86_segment seg)
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- int long_mode = vmcb->cs.attr.fields.l && svm_long_mode_enabled(v);
+ int long_mode = vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v);
switch ( seg )
{
@@ -748,7 +695,7 @@ static void svm_stts(struct vcpu *v)
* then this is not necessary: no FPU activity can occur until the guest
* clears CR0.TS, and we will initialise the FPU when that happens.
*/
- if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
+ if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
{
v->arch.hvm_svm.vmcb->exception_intercepts |= 1U << TRAP_no_device;
vmcb->cr0 |= X86_CR0_TS;
@@ -949,7 +896,7 @@ static void svm_hvm_inject_exception(
{
struct vcpu *v = current;
if ( trapnr == TRAP_page_fault )
- v->arch.hvm_svm.vmcb->cr2 = v->arch.hvm_svm.cpu_cr2 = cr2;
+ v->arch.hvm_svm.vmcb->cr2 = v->arch.hvm_vcpu.guest_cr[2] = cr2;
svm_inject_exception(v, trapnr, (errcode != -1), errcode);
}
@@ -970,17 +917,13 @@ static struct hvm_function_table svm_function_table = {
.load_cpu_guest_regs = svm_load_cpu_guest_regs,
.save_cpu_ctxt = svm_save_vmcb_ctxt,
.load_cpu_ctxt = svm_load_vmcb_ctxt,
- .paging_enabled = svm_paging_enabled,
- .long_mode_enabled = svm_long_mode_enabled,
- .pae_enabled = svm_pae_enabled,
- .nx_enabled = svm_nx_enabled,
.interrupts_enabled = svm_interrupts_enabled,
.guest_x86_mode = svm_guest_x86_mode,
- .get_guest_ctrl_reg = svm_get_ctrl_reg,
.get_segment_base = svm_get_segment_base,
.get_segment_register = svm_get_segment_register,
.update_host_cr3 = svm_update_host_cr3,
- .update_guest_cr3 = svm_update_guest_cr3,
+ .update_guest_cr = svm_update_guest_cr,
+ .update_guest_efer = svm_update_guest_efer,
.flush_guest_tlbs = svm_flush_guest_tlbs,
.update_vtpr = svm_update_vtpr,
.stts = svm_stts,
@@ -1075,7 +1018,7 @@ static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
setup_fpu(v);
vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
- if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
+ if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
vmcb->cr0 &= ~X86_CR0_TS;
}
@@ -1347,7 +1290,7 @@ static int svm_get_io_address(
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
/* If we're in long mode, don't check the segment presence & limit */
- long_mode = vmcb->cs.attr.fields.l && svm_long_mode_enabled(v);
+ long_mode = vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v);
/* d field of cs.attr is 1 for 32-bit, 0 for 16 or 64 bit.
* l field combined with EFER_LMA says whether it's 16 or 64 bit.
@@ -1650,31 +1593,11 @@ static void svm_io_instruction(struct vcpu *v)
static int svm_set_cr0(unsigned long value)
{
struct vcpu *v = current;
- unsigned long mfn, old_value = v->arch.hvm_svm.cpu_shadow_cr0;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- unsigned long old_base_mfn;
-
- HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
+ int rc = hvm_set_cr0(value);
- if ( (u32)value != value )
- {
- HVM_DBG_LOG(DBG_LEVEL_1,
- "Guest attempts to set upper 32 bits in CR0: %lx",
- value);
- svm_inject_exception(v, TRAP_gp_fault, 1, 0);
+ if ( rc == 0 )
return 0;
- }
-
- value &= ~HVM_CR0_GUEST_RESERVED_BITS;
-
- /* ET is reserved and should be always be 1. */
- value |= X86_CR0_ET;
-
- if ( (value & (X86_CR0_PE|X86_CR0_PG)) == X86_CR0_PG )
- {
- svm_inject_exception(v, TRAP_gp_fault, 1, 0);
- return 0;
- }
/* TS cleared? Then initialise FPU now. */
if ( !(value & X86_CR0_TS) )
@@ -1683,76 +1606,9 @@ static int svm_set_cr0(unsigned long value)
vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
}
- if ( (value & X86_CR0_PG) && !(old_value & X86_CR0_PG) )
- {
- if ( svm_lme_is_set(v) )
- {
- if ( !svm_cr4_pae_is_set(v) )
- {
- HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable");
- svm_inject_exception(v, TRAP_gp_fault, 1, 0);
- return 0;
- }
- HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode");
- v->arch.hvm_svm.cpu_shadow_efer |= EFER_LMA;
- vmcb->efer |= EFER_LMA | EFER_LME;
- }
-
- if ( !paging_mode_hap(v->domain) )
- {
- /* The guest CR3 must be pointing to the guest physical. */
- mfn = get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT);
- if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain))
- {
- gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n",
- v->arch.hvm_svm.cpu_cr3, mfn);
- domain_crash(v->domain);
- return 0;
- }
-
- /* Now arch.guest_table points to machine physical. */
- old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
- v->arch.guest_table = pagetable_from_pfn(mfn);
- if ( old_base_mfn )
- put_page(mfn_to_page(old_base_mfn));
-
- HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
- v->arch.hvm_vmx.cpu_cr3, mfn);
- }
- }
- else if ( !(value & X86_CR0_PG) && (old_value & X86_CR0_PG) )
- {
- /* When CR0.PG is cleared, LMA is cleared immediately. */
- if ( svm_long_mode_enabled(v) )
- {
- vmcb->efer &= ~(EFER_LME | EFER_LMA);
- v->arch.hvm_svm.cpu_shadow_efer &= ~EFER_LMA;
- }
-
- if ( !paging_mode_hap(v->domain) && v->arch.hvm_svm.cpu_cr3 )
- {
- put_page(mfn_to_page(get_mfn_from_gpfn(
- v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)));
- v->arch.guest_table = pagetable_null();
- }
- }
-
- vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0 = value;
- if ( !paging_mode_hap(v->domain) )
- vmcb->cr0 |= X86_CR0_PG | X86_CR0_WP;
-
- if ( (value ^ old_value) & X86_CR0_PG )
- {
- paging_update_paging_modes(v);
- svm_asid_g_update_paging(v);
- }
-
return 1;
}
-/*
- * Read from control registers. CR0 and CR4 are read from the shadow.
- */
static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
{
unsigned long value = 0;
@@ -1763,16 +1619,16 @@ static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
switch ( cr )
{
case 0:
- value = v->arch.hvm_svm.cpu_shadow_cr0;
+ value = v->arch.hvm_vcpu.guest_cr[0];
break;
case 2:
value = vmcb->cr2;
break;
case 3:
- value = (unsigned long)v->arch.hvm_svm.cpu_cr3;
+ value = (unsigned long)v->arch.hvm_vcpu.guest_cr[3];
break;
case 4:
- value = (unsigned long)v->arch.hvm_svm.cpu_shadow_cr4;
+ value = (unsigned long)v->arch.hvm_vcpu.guest_cr[4];
break;
case 8:
value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
@@ -1791,13 +1647,9 @@ static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx", cr, value);
}
-
-/*
- * Write to control registers
- */
static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
{
- unsigned long value, old_cr, old_base_mfn, mfn;
+ unsigned long value;
struct vcpu *v = current;
struct vlapic *vlapic = vcpu_vlapic(v);
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
@@ -1815,131 +1667,10 @@ static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
return svm_set_cr0(value);
case 3:
- if ( paging_mode_hap(v->domain) )
- {
- vmcb->cr3 = v->arch.hvm_svm.cpu_cr3 = value;
- break;
- }
+ return hvm_set_cr3(value);
- /* If paging is not enabled yet, simply copy the value to CR3. */
- if ( !svm_paging_enabled(v) )
- {
- v->arch.hvm_svm.cpu_cr3 = value;
- break;
- }
-
- /* We make a new one if the shadow does not exist. */
- if ( value == v->arch.hvm_svm.cpu_cr3 )
- {
- /*
- * This is simple TLB flush, implying the guest has
- * removed some translation or changed page attributes.
- * We simply invalidate the shadow.
- */
- mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
- if ( mfn != pagetable_get_pfn(v->arch.guest_table) )
- goto bad_cr3;
- paging_update_cr3(v);
- /* signal paging update to ASID handler */
- svm_asid_g_mov_to_cr3 (v);
- }
- else
- {
- /*
- * If different, make a shadow. Check if the PDBR is valid
- * first.
- */
- HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
- mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
- if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
- goto bad_cr3;
-
- old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
- v->arch.guest_table = pagetable_from_pfn(mfn);
-
- if ( old_base_mfn )
- put_page(mfn_to_page(old_base_mfn));
-
- v->arch.hvm_svm.cpu_cr3 = value;
- update_cr3(v);
- HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
- /* signal paging update to ASID handler */
- svm_asid_g_mov_to_cr3 (v);
- }
- break;
-
- case 4: /* CR4 */
- if ( value & HVM_CR4_GUEST_RESERVED_BITS )
- {
- HVM_DBG_LOG(DBG_LEVEL_1,
- "Guest attempts to set reserved bit in CR4: %lx",
- value);
- svm_inject_exception(v, TRAP_gp_fault, 1, 0);
- break;
- }
-
- if ( paging_mode_hap(v->domain) )
- {
- v->arch.hvm_svm.cpu_shadow_cr4 = value;
- vmcb->cr4 = value | (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
- paging_update_paging_modes(v);
- /* signal paging update to ASID handler */
- svm_asid_g_update_paging (v);
- break;
- }
-
- old_cr = v->arch.hvm_svm.cpu_shadow_cr4;
- if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
- {
- if ( svm_pgbit_test(v) )
- {
-#if CONFIG_PAGING_LEVELS >= 3
- /* The guest is a 32-bit PAE guest. */
- unsigned long mfn, old_base_mfn;
- mfn = get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT);
- if ( !mfn_valid(mfn) ||
- !get_page(mfn_to_page(mfn), v->domain) )
- goto bad_cr3;
-
- /*
- * Now arch.guest_table points to machine physical.
- */
- old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
- v->arch.guest_table = pagetable_from_pfn(mfn);
- if ( old_base_mfn )
- put_page(mfn_to_page(old_base_mfn));
- paging_update_paging_modes(v);
- /* signal paging update to ASID handler */
- svm_asid_g_update_paging (v);
-
- HVM_DBG_LOG(DBG_LEVEL_VMMU,
- "Update CR3 value = %lx, mfn = %lx",
- v->arch.hvm_svm.cpu_cr3, mfn);
-#endif
- }
- }
- else if ( !(value & X86_CR4_PAE) )
- {
- if ( svm_long_mode_enabled(v) )
- {
- svm_inject_exception(v, TRAP_gp_fault, 1, 0);
- }
- }
-
- v->arch.hvm_svm.cpu_shadow_cr4 = value;
- vmcb->cr4 = value | HVM_CR4_HOST_MASK;
-
- /*
- * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
- * all TLB entries except global entries.
- */
- if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE))
- {
- paging_update_paging_modes(v);
- /* signal paging update to ASID handler */
- svm_asid_g_update_paging (v);
- }
- break;
+ case 4:
+ return hvm_set_cr4(value);
case 8:
vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
@@ -1953,19 +1684,11 @@ static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
}
return 1;
-
- bad_cr3:
- gdprintk(XENLOG_ERR, "Invalid CR3\n");
- domain_crash(v->domain);
- return 0;
}
-
-#define ARR_SIZE(x) (sizeof(x) / sizeof(x[0]))
-
-
-static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
- struct cpu_user_regs *regs)
+static void svm_cr_access(
+ struct vcpu *v, unsigned int cr, unsigned int type,
+ struct cpu_user_regs *regs)
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
int inst_len = 0;
@@ -1990,12 +1713,12 @@ static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
if ( type == TYPE_MOV_TO_CR )
{
inst_len = __get_instruction_length_from_list(
- v, list_a, ARR_SIZE(list_a), &buffer[index], &match);
+ v, list_a, ARRAY_SIZE(list_a), &buffer[index], &match);
}
else /* type == TYPE_MOV_FROM_CR */
{
inst_len = __get_instruction_length_from_list(
- v, list_b, ARR_SIZE(list_b), &buffer[index], &match);
+ v, list_b, ARRAY_SIZE(list_b), &buffer[index], &match);
}
ASSERT(inst_len > 0);
@@ -2008,7 +1731,8 @@ static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long) vmcb->rip);
- switch (match)
+ switch ( match )
+
{
case INSTR_MOV2CR:
gpreg = decode_src_reg(prefix, buffer[index+2]);
@@ -2025,18 +1749,18 @@ static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
setup_fpu(current);
vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */
- v->arch.hvm_svm.cpu_shadow_cr0 &= ~X86_CR0_TS; /* clear TS */
+ v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS; /* clear TS */
break;
case INSTR_LMSW:
gpreg = decode_src_reg(prefix, buffer[index+2]);
value = get_reg(gpreg, regs, vmcb) & 0xF;
- value = (v->arch.hvm_svm.cpu_shadow_cr0 & ~0xF) | value;
+ value = (v->arch.hvm_vcpu.guest_cr[0] & ~0xF) | value;
result = svm_set_cr0(value);
break;
case INSTR_SMSW:
- value = v->arch.hvm_svm.cpu_shadow_cr0 & 0xFFFF;
+ value = v->arch.hvm_vcpu.guest_cr[0] & 0xFFFF;
modrm = buffer[index+2];
addr_size = svm_guest_x86_mode(v);
if ( addr_size < 2 )
@@ -2099,9 +1823,8 @@ static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
ASSERT(inst_len);
- __update_guest_eip(vmcb, inst_len);
-
- return result;
+ if ( result )
+ __update_guest_eip(vmcb, inst_len);
}
static void svm_do_msr_access(
@@ -2129,7 +1852,7 @@ static void svm_do_msr_access(
break;
case MSR_EFER:
- msr_content = v->arch.hvm_svm.cpu_shadow_efer;
+ msr_content = v->arch.hvm_vcpu.guest_efer;
break;
case MSR_K8_MC4_MISC: /* Threshold register */
@@ -2319,8 +2042,7 @@ void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
HVMTRACE_3D(INVLPG, v, (invlpga?1:0), g_vaddr, (invlpga?regs->ecx:0));
paging_invlpg(v, g_vaddr);
- /* signal invplg to ASID handler */
- svm_asid_g_invlpg (v, g_vaddr);
+ svm_asid_g_invlpg(v, g_vaddr);
}
@@ -2335,29 +2057,23 @@ static int svm_reset_to_realmode(struct vcpu *v,
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- /* clear the vmcb and user regs */
memset(regs, 0, sizeof(struct cpu_user_regs));
-
- /* VMCB State */
- vmcb->cr0 = X86_CR0_ET | X86_CR0_PG | X86_CR0_WP;
- v->arch.hvm_svm.cpu_shadow_cr0 = X86_CR0_ET;
- vmcb->cr2 = 0;
- vmcb->efer = EFER_SVME;
+ v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_ET;
+ svm_update_guest_cr(v, 0);
- vmcb->cr4 = HVM_CR4_HOST_MASK;
- v->arch.hvm_svm.cpu_shadow_cr4 = 0;
+ v->arch.hvm_vcpu.guest_cr[2] = 0;
+ svm_update_guest_cr(v, 2);
- if ( paging_mode_hap(v->domain) ) {
- vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
- vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 |
- (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
- }
+ v->arch.hvm_vcpu.guest_cr[4] = 0;
+ svm_update_guest_cr(v, 4);
+
+ vmcb->efer = EFER_SVME;
/* This will jump to ROMBIOS */
vmcb->rip = 0xFFF0;
- /* setup the segment registers and all their hidden states */
+ /* Set up the segment registers and all their hidden states. */
vmcb->cs.sel = 0xF000;
vmcb->cs.attr.bytes = 0x089b;
vmcb->cs.limit = 0xffff;
@@ -2483,7 +2199,7 @@ asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
unsigned long va;
va = vmcb->exitinfo2;
regs->error_code = vmcb->exitinfo1;
- HVM_DBG_LOG(DBG_LEVEL_VMMU,
+ HVM_DBG_LOG(DBG_LEVEL_VMMU,
"eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
(unsigned long)regs->eax, (unsigned long)regs->ebx,
(unsigned long)regs->ecx, (unsigned long)regs->edx,
@@ -2495,7 +2211,7 @@ asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
break;
}
- v->arch.hvm_svm.cpu_cr2 = vmcb->cr2 = va;
+ v->arch.hvm_vcpu.guest_cr[2] = vmcb->cr2 = va;
svm_inject_exception(v, TRAP_page_fault, 1, regs->error_code);
break;
}
diff --git a/xen/arch/x86/hvm/svm/vmcb.c b/xen/arch/x86/hvm/svm/vmcb.c
index 1e50e9d0dc..e704747393 100644
--- a/xen/arch/x86/hvm/svm/vmcb.c
+++ b/xen/arch/x86/hvm/svm/vmcb.c
@@ -111,7 +111,7 @@ static int construct_vmcb(struct vcpu *v)
svm_segment_attributes_t attrib;
/* TLB control, and ASID assigment. */
- svm_asid_init_vcpu (v);
+ svm_asid_init_vcpu(v);
vmcb->general1_intercepts =
GENERAL1_INTERCEPT_INTR | GENERAL1_INTERCEPT_NMI |
@@ -216,27 +216,19 @@ static int construct_vmcb(struct vcpu *v)
vmcb->tr.base = 0;
vmcb->tr.limit = 0xff;
- /* Guest CR0. */
- vmcb->cr0 = read_cr0();
- arch_svm->cpu_shadow_cr0 = vmcb->cr0 & ~(X86_CR0_PG | X86_CR0_TS);
- vmcb->cr0 |= X86_CR0_WP;
+ v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_TS;
+ hvm_update_guest_cr(v, 0);
- /* Guest CR4. */
- arch_svm->cpu_shadow_cr4 =
- read_cr4() & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE);
- vmcb->cr4 = arch_svm->cpu_shadow_cr4 | HVM_CR4_HOST_MASK;
+ v->arch.hvm_vcpu.guest_cr[4] = 0;
+ hvm_update_guest_cr(v, 4);
paging_update_paging_modes(v);
- vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
if ( paging_mode_hap(v->domain) )
{
- vmcb->cr0 = arch_svm->cpu_shadow_cr0;
vmcb->np_enable = 1; /* enable nested paging */
vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
- vmcb->cr4 = arch_svm->cpu_shadow_cr4 =
- (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
vmcb->exception_intercepts = HVM_TRAP_MASK;
/* No point in intercepting CR3/4 reads, because the hardware
diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c
index 016d91c3e4..e73ad68881 100644
--- a/xen/arch/x86/hvm/vioapic.c
+++ b/xen/arch/x86/hvm/vioapic.c
@@ -43,10 +43,6 @@
/* HACK: Route IRQ0 only to VCPU0 to prevent time jumps. */
#define IRQ0_SPECIAL_ROUTING 1
-#if defined(__ia64__)
-#define opt_hvm_debug_level opt_vmx_debug_level
-#endif
-
static void vioapic_deliver(struct hvm_hw_vioapic *vioapic, int irq);
static unsigned long vioapic_read_indirect(struct hvm_hw_vioapic *vioapic,
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 62bfc5fbe0..0795420820 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -315,34 +315,69 @@ void vmx_cpu_down(void)
local_irq_restore(flags);
}
+struct foreign_vmcs {
+ struct vcpu *v;
+ unsigned int count;
+};
+static DEFINE_PER_CPU(struct foreign_vmcs, foreign_vmcs);
+
void vmx_vmcs_enter(struct vcpu *v)
{
+ struct foreign_vmcs *fv;
+
/*
* NB. We must *always* run an HVM VCPU on its own VMCS, except for
* vmx_vmcs_enter/exit critical regions.
*/
- if ( v == current )
+ if ( likely(v == current) )
return;
- vcpu_pause(v);
- spin_lock(&v->arch.hvm_vmx.vmcs_lock);
+ fv = &this_cpu(foreign_vmcs);
- vmx_clear_vmcs(v);
- vmx_load_vmcs(v);
+ if ( fv->v == v )
+ {
+ BUG_ON(fv->count == 0);
+ }
+ else
+ {
+ BUG_ON(fv->v != NULL);
+ BUG_ON(fv->count != 0);
+
+ vcpu_pause(v);
+ spin_lock(&v->arch.hvm_vmx.vmcs_lock);
+
+ vmx_clear_vmcs(v);
+ vmx_load_vmcs(v);
+
+ fv->v = v;
+ }
+
+ fv->count++;
}
void vmx_vmcs_exit(struct vcpu *v)
{
- if ( v == current )
+ struct foreign_vmcs *fv;
+
+ if ( likely(v == current) )
return;
- /* Don't confuse vmx_do_resume (for @v or @current!) */
- vmx_clear_vmcs(v);
- if ( is_hvm_vcpu(current) )
- vmx_load_vmcs(current);
+ fv = &this_cpu(foreign_vmcs);
+ BUG_ON(fv->v != v);
+ BUG_ON(fv->count == 0);
+
+ if ( --fv->count == 0 )
+ {
+ /* Don't confuse vmx_do_resume (for @v or @current!) */
+ vmx_clear_vmcs(v);
+ if ( is_hvm_vcpu(current) )
+ vmx_load_vmcs(current);
+
+ spin_unlock(&v->arch.hvm_vmx.vmcs_lock);
+ vcpu_unpause(v);
- spin_unlock(&v->arch.hvm_vmx.vmcs_lock);
- vcpu_unpause(v);
+ fv->v = NULL;
+ }
}
struct xgt_desc {
@@ -380,7 +415,6 @@ static void vmx_set_host_env(struct vcpu *v)
static void construct_vmcs(struct vcpu *v)
{
- unsigned long cr0, cr4;
union vmcs_arbytes arbytes;
vmx_vmcs_enter(v);
@@ -504,19 +538,11 @@ static void construct_vmcs(struct vcpu *v)
__vmwrite(EXCEPTION_BITMAP, HVM_TRAP_MASK | (1U << TRAP_page_fault));
- /* Guest CR0. */
- cr0 = read_cr0();
- v->arch.hvm_vmx.cpu_cr0 = cr0;
- __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
- v->arch.hvm_vmx.cpu_shadow_cr0 = cr0 & ~(X86_CR0_PG | X86_CR0_TS);
- __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
-
- /* Guest CR4. */
- cr4 = read_cr4();
- __vmwrite(GUEST_CR4, cr4 & ~X86_CR4_PSE);
- v->arch.hvm_vmx.cpu_shadow_cr4 =
- cr4 & ~(X86_CR4_PGE | X86_CR4_VMXE | X86_CR4_PAE);
- __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
+ v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET;
+ hvm_update_guest_cr(v, 0);
+
+ v->arch.hvm_vcpu.guest_cr[4] = 0;
+ hvm_update_guest_cr(v, 4);
if ( cpu_has_vmx_tpr_shadow )
{
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 37d8857acc..4309b3d9df 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -61,6 +61,8 @@ static void vmx_ctxt_switch_to(struct vcpu *v);
static int vmx_alloc_vlapic_mapping(struct domain *d);
static void vmx_free_vlapic_mapping(struct domain *d);
static void vmx_install_vlapic_mapping(struct vcpu *v);
+static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr);
+static void vmx_update_guest_efer(struct vcpu *v);
static int vmx_domain_initialise(struct domain *d)
{
@@ -100,63 +102,8 @@ static void vmx_vcpu_destroy(struct vcpu *v)
vmx_destroy_vmcs(v);
}
-static int vmx_paging_enabled(struct vcpu *v)
-{
- unsigned long cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
- return (cr0 & (X86_CR0_PE | X86_CR0_PG)) == (X86_CR0_PE | X86_CR0_PG);
-}
-
-static int vmx_pgbit_test(struct vcpu *v)
-{
- unsigned long cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
- return cr0 & X86_CR0_PG;
-}
-
-static int vmx_pae_enabled(struct vcpu *v)
-{
- unsigned long cr4 = v->arch.hvm_vmx.cpu_shadow_cr4;
- return vmx_paging_enabled(v) && (cr4 & X86_CR4_PAE);
-}
-
-static int vmx_nx_enabled(struct vcpu *v)
-{
- return v->arch.hvm_vmx.efer & EFER_NX;
-}
-
#ifdef __x86_64__
-static int vmx_lme_is_set(struct vcpu *v)
-{
- return v->arch.hvm_vmx.efer & EFER_LME;
-}
-
-static int vmx_long_mode_enabled(struct vcpu *v)
-{
- return v->arch.hvm_vmx.efer & EFER_LMA;
-}
-
-static void vmx_enable_long_mode(struct vcpu *v)
-{
- unsigned long vm_entry_value;
-
- vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
- vm_entry_value |= VM_ENTRY_IA32E_MODE;
- __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
-
- v->arch.hvm_vmx.efer |= EFER_LMA;
-}
-
-static void vmx_disable_long_mode(struct vcpu *v)
-{
- unsigned long vm_entry_value;
-
- vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
- vm_entry_value &= ~VM_ENTRY_IA32E_MODE;
- __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
-
- v->arch.hvm_vmx.efer &= ~EFER_LMA;
-}
-
static DEFINE_PER_CPU(struct vmx_msr_state, host_msr_state);
static u32 msr_index[VMX_MSR_COUNT] =
@@ -190,7 +137,7 @@ static enum handler_return long_mode_do_msr_read(struct cpu_user_regs *regs)
switch ( ecx )
{
case MSR_EFER:
- msr_content = v->arch.hvm_vmx.efer;
+ msr_content = v->arch.hvm_vcpu.guest_efer;
break;
case MSR_FS_BASE:
@@ -204,7 +151,7 @@ static enum handler_return long_mode_do_msr_read(struct cpu_user_regs *regs)
case MSR_SHADOW_GS_BASE:
msr_content = v->arch.hvm_vmx.shadow_gs;
check_long_mode:
- if ( !(vmx_long_mode_enabled(v)) )
+ if ( !(hvm_long_mode_enabled(v)) )
{
vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
return HNDL_exception_raised;
@@ -263,9 +210,9 @@ static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
}
if ( (msr_content & EFER_LME)
- && !(v->arch.hvm_vmx.efer & EFER_LME) )
+ && !(v->arch.hvm_vcpu.guest_efer & EFER_LME) )
{
- if ( unlikely(vmx_paging_enabled(v)) )
+ if ( unlikely(hvm_paging_enabled(v)) )
{
gdprintk(XENLOG_WARNING,
"Trying to set EFER.LME with paging enabled\n");
@@ -273,9 +220,9 @@ static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
}
}
else if ( !(msr_content & EFER_LME)
- && (v->arch.hvm_vmx.efer & EFER_LME) )
+ && (v->arch.hvm_vcpu.guest_efer & EFER_LME) )
{
- if ( unlikely(vmx_paging_enabled(v)) )
+ if ( unlikely(hvm_paging_enabled(v)) )
{
gdprintk(XENLOG_WARNING,
"Trying to clear EFER.LME with paging enabled\n");
@@ -283,17 +230,17 @@ static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
}
}
- if ( (msr_content ^ v->arch.hvm_vmx.efer) & (EFER_NX|EFER_SCE) )
+ if ( (msr_content ^ v->arch.hvm_vcpu.guest_efer) & (EFER_NX|EFER_SCE) )
write_efer((read_efer() & ~(EFER_NX|EFER_SCE)) |
(msr_content & (EFER_NX|EFER_SCE)));
- v->arch.hvm_vmx.efer = msr_content;
+ v->arch.hvm_vcpu.guest_efer = msr_content;
break;
case MSR_FS_BASE:
case MSR_GS_BASE:
case MSR_SHADOW_GS_BASE:
- if ( !vmx_long_mode_enabled(v) )
+ if ( !hvm_long_mode_enabled(v) )
goto gp_fault;
if ( !is_canonical_address(msr_content) )
@@ -394,27 +341,18 @@ static void vmx_restore_guest_msrs(struct vcpu *v)
clear_bit(i, &guest_flags);
}
- if ( (v->arch.hvm_vmx.efer ^ read_efer()) & (EFER_NX | EFER_SCE) )
+ if ( (v->arch.hvm_vcpu.guest_efer ^ read_efer()) & (EFER_NX | EFER_SCE) )
{
HVM_DBG_LOG(DBG_LEVEL_2,
"restore guest's EFER with value %lx",
- v->arch.hvm_vmx.efer);
+ v->arch.hvm_vcpu.guest_efer);
write_efer((read_efer() & ~(EFER_NX | EFER_SCE)) |
- (v->arch.hvm_vmx.efer & (EFER_NX | EFER_SCE)));
+ (v->arch.hvm_vcpu.guest_efer & (EFER_NX | EFER_SCE)));
}
}
#else /* __i386__ */
-static int vmx_lme_is_set(struct vcpu *v)
-{ return 0; }
-static int vmx_long_mode_enabled(struct vcpu *v)
-{ return 0; }
-static void vmx_enable_long_mode(struct vcpu *v)
-{ BUG(); }
-static void vmx_disable_long_mode(struct vcpu *v)
-{ BUG(); }
-
#define vmx_save_host_msrs() ((void)0)
static void vmx_restore_host_msrs(void)
@@ -427,13 +365,13 @@ static void vmx_restore_host_msrs(void)
static void vmx_restore_guest_msrs(struct vcpu *v)
{
- if ( (v->arch.hvm_vmx.efer ^ read_efer()) & EFER_NX )
+ if ( (v->arch.hvm_vcpu.guest_efer ^ read_efer()) & EFER_NX )
{
HVM_DBG_LOG(DBG_LEVEL_2,
"restore guest's EFER with value %lx",
- v->arch.hvm_vmx.efer);
+ v->arch.hvm_vcpu.guest_efer);
write_efer((read_efer() & ~EFER_NX) |
- (v->arch.hvm_vmx.efer & EFER_NX));
+ (v->arch.hvm_vcpu.guest_efer & EFER_NX));
}
}
@@ -444,7 +382,7 @@ static enum handler_return long_mode_do_msr_read(struct cpu_user_regs *regs)
switch ( regs->ecx ) {
case MSR_EFER:
- msr_content = v->arch.hvm_vmx.efer;
+ msr_content = v->arch.hvm_vcpu.guest_efer;
break;
default:
@@ -475,10 +413,10 @@ static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
return HNDL_exception_raised;
}
- if ( (msr_content ^ v->arch.hvm_vmx.efer) & EFER_NX )
+ if ( (msr_content ^ v->arch.hvm_vcpu.guest_efer) & EFER_NX )
write_efer((read_efer() & ~EFER_NX) | (msr_content & EFER_NX));
- v->arch.hvm_vmx.efer = msr_content;
+ v->arch.hvm_vcpu.guest_efer = msr_content;
break;
default:
@@ -501,12 +439,12 @@ static int vmx_guest_x86_mode(struct vcpu *v)
ASSERT(v == current);
- if ( unlikely(!(v->arch.hvm_vmx.cpu_shadow_cr0 & X86_CR0_PE)) )
+ if ( unlikely(!(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE)) )
return 0;
if ( unlikely(__vmread(GUEST_RFLAGS) & X86_EFLAGS_VM) )
return 1;
cs_ar_bytes = __vmread(GUEST_CS_AR_BYTES);
- if ( vmx_long_mode_enabled(v) &&
+ if ( hvm_long_mode_enabled(v) &&
likely(cs_ar_bytes & X86_SEG_AR_CS_LM_ACTIVE) )
return 8;
return (likely(cs_ar_bytes & X86_SEG_AR_DEF_OP_SIZE) ? 4 : 2);
@@ -551,12 +489,12 @@ void vmx_vmcs_save(struct vcpu *v, struct hvm_hw_cpu *c)
c->rsp = __vmread(GUEST_RSP);
c->rflags = __vmread(GUEST_RFLAGS);
- c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
- c->cr2 = v->arch.hvm_vmx.cpu_cr2;
- c->cr3 = v->arch.hvm_vmx.cpu_cr3;
- c->cr4 = v->arch.hvm_vmx.cpu_shadow_cr4;
+ c->cr0 = v->arch.hvm_vcpu.guest_cr[0];
+ c->cr2 = v->arch.hvm_vcpu.guest_cr[2];
+ c->cr3 = v->arch.hvm_vcpu.guest_cr[3];
+ c->cr4 = v->arch.hvm_vcpu.guest_cr[4];
- c->msr_efer = v->arch.hvm_vmx.efer;
+ c->msr_efer = v->arch.hvm_vcpu.guest_efer;
#ifdef HVM_DEBUG_SUSPEND
printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
@@ -635,51 +573,33 @@ int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
__vmwrite(GUEST_RSP, c->rsp);
__vmwrite(GUEST_RFLAGS, c->rflags);
- v->arch.hvm_vmx.cpu_cr0 = (c->cr0 | X86_CR0_PE | X86_CR0_PG |
- X86_CR0_NE | X86_CR0_WP | X86_CR0_ET);
- __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
- v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
- __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
-
- v->arch.hvm_vmx.cpu_cr2 = c->cr2;
-
- v->arch.hvm_vmx.efer = c->msr_efer;
+ v->arch.hvm_vcpu.guest_cr[0] = c->cr0 | X86_CR0_ET;
+ v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
+ v->arch.hvm_vcpu.guest_cr[3] = c->cr3;
+ v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
+ vmx_update_guest_cr(v, 0);
+ vmx_update_guest_cr(v, 2);
+ vmx_update_guest_cr(v, 4);
#ifdef HVM_DEBUG_SUSPEND
printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
__func__, c->cr3, c->cr0, c->cr4);
#endif
- if ( !vmx_paging_enabled(v) )
+ if ( hvm_paging_enabled(v) )
{
- HVM_DBG_LOG(DBG_LEVEL_VMMU, "%s: paging not enabled.", __func__);
- goto skip_cr3;
- }
-
- HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 = %"PRIx64, c->cr3);
- /* current!=vcpu as not called by arch_vmx_do_launch */
- mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
- if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
- {
- gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64".\n", c->cr3);
- vmx_vmcs_exit(v);
- return -EINVAL;
+ HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 = %"PRIx64, c->cr3);
+ mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
+ if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
+ goto bad_cr3;
+ old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+ v->arch.guest_table = pagetable_from_pfn(mfn);
+ if ( old_base_mfn )
+ put_page(mfn_to_page(old_base_mfn));
}
- old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
- v->arch.guest_table = pagetable_from_pfn(mfn);
- if ( old_base_mfn )
- put_page(mfn_to_page(old_base_mfn));
-
- skip_cr3:
- v->arch.hvm_vmx.cpu_cr3 = c->cr3;
-
- if ( vmx_long_mode_enabled(v) )
- vmx_enable_long_mode(v);
-
- __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
- v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
- __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
+ v->arch.hvm_vcpu.guest_efer = c->msr_efer;
+ vmx_update_guest_efer(v);
__vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
__vmwrite(GUEST_IDTR_BASE, c->idtr_base);
@@ -760,6 +680,11 @@ int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
}
return 0;
+
+ bad_cr3:
+ gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n", c->cr3);
+ vmx_vmcs_exit(v);
+ return -EINVAL;
}
#if defined(__x86_64__) && defined(HVM_DEBUG_SUSPEND)
@@ -884,10 +809,10 @@ static void vmx_store_cpu_guest_regs(
if ( crs != NULL )
{
- crs[0] = v->arch.hvm_vmx.cpu_shadow_cr0;
- crs[2] = v->arch.hvm_vmx.cpu_cr2;
- crs[3] = v->arch.hvm_vmx.cpu_cr3;
- crs[4] = v->arch.hvm_vmx.cpu_shadow_cr4;
+ crs[0] = v->arch.hvm_vcpu.guest_cr[0];
+ crs[2] = v->arch.hvm_vcpu.guest_cr[2];
+ crs[3] = v->arch.hvm_vcpu.guest_cr[3];
+ crs[4] = v->arch.hvm_vcpu.guest_cr[4];
}
vmx_vmcs_exit(v);
@@ -928,24 +853,6 @@ static void vmx_load_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs)
vmx_vmcs_exit(v);
}
-static unsigned long vmx_get_ctrl_reg(struct vcpu *v, unsigned int num)
-{
- switch ( num )
- {
- case 0:
- return v->arch.hvm_vmx.cpu_cr0;
- case 2:
- return v->arch.hvm_vmx.cpu_cr2;
- case 3:
- return v->arch.hvm_vmx.cpu_cr3;
- case 4:
- return v->arch.hvm_vmx.cpu_shadow_cr4;
- default:
- BUG();
- }
- return 0; /* dummy */
-}
-
static unsigned long vmx_get_segment_base(struct vcpu *v, enum x86_segment seg)
{
unsigned long base = 0;
@@ -953,7 +860,7 @@ static unsigned long vmx_get_segment_base(struct vcpu *v, enum x86_segment seg)
ASSERT(v == current);
- if ( vmx_long_mode_enabled(v) &&
+ if ( hvm_long_mode_enabled(v) &&
(__vmread(GUEST_CS_AR_BYTES) & X86_SEG_AR_CS_LM_ACTIVE) )
long_mode = 1;
@@ -1045,6 +952,9 @@ static void vmx_get_segment_register(struct vcpu *v, enum x86_segment seg,
}
reg->attr.bytes = (attr & 0xff) | ((attr >> 4) & 0xf00);
+ /* Unusable flag is folded into Present flag. */
+ if ( attr & (1u<<16) )
+ reg->attr.fields.p = 0;
}
/* Make sure that xen intercepts any FP accesses from current */
@@ -1059,10 +969,10 @@ static void vmx_stts(struct vcpu *v)
* then this is not necessary: no FPU activity can occur until the guest
* clears CR0.TS, and we will initialise the FPU when that happens.
*/
- if ( !(v->arch.hvm_vmx.cpu_shadow_cr0 & X86_CR0_TS) )
+ if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
{
- v->arch.hvm_vmx.cpu_cr0 |= X86_CR0_TS;
- __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
+ v->arch.hvm_vcpu.hw_cr[0] |= X86_CR0_TS;
+ __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]);
__vm_set_bit(EXCEPTION_BITMAP, TRAP_no_device);
}
}
@@ -1135,12 +1045,58 @@ static void vmx_update_host_cr3(struct vcpu *v)
vmx_vmcs_exit(v);
}
-static void vmx_update_guest_cr3(struct vcpu *v)
+static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
+{
+ ASSERT((v == current) || !vcpu_runnable(v));
+
+ vmx_vmcs_enter(v);
+
+ switch ( cr )
+ {
+ case 0:
+ v->arch.hvm_vcpu.hw_cr[0] =
+ v->arch.hvm_vcpu.guest_cr[0] |
+ X86_CR0_PE | X86_CR0_NE | X86_CR0_PG | X86_CR0_WP;
+ __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]);
+ __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]);
+ break;
+ case 2:
+ /* CR2 is updated in exit stub. */
+ break;
+ case 3:
+ __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr[3]);
+ break;
+ case 4:
+ v->arch.hvm_vcpu.hw_cr[4] =
+ v->arch.hvm_vcpu.guest_cr[4] | HVM_CR4_HOST_MASK;
+ __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]);
+ __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]);
+ break;
+ default:
+ BUG();
+ }
+
+ vmx_vmcs_exit(v);
+}
+
+static void vmx_update_guest_efer(struct vcpu *v)
{
+#ifdef __x86_64__
+ unsigned long vm_entry_value;
+
ASSERT((v == current) || !vcpu_runnable(v));
+
vmx_vmcs_enter(v);
- __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
+
+ vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
+ if ( v->arch.hvm_vcpu.guest_efer & EFER_LMA )
+ vm_entry_value |= VM_ENTRY_IA32E_MODE;
+ else
+ vm_entry_value &= ~VM_ENTRY_IA32E_MODE;
+ __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
+
vmx_vmcs_exit(v);
+#endif
}
static void vmx_flush_guest_tlbs(void)
@@ -1156,7 +1112,7 @@ static void vmx_inject_exception(
struct vcpu *v = current;
vmx_inject_hw_exception(v, trapnr, errcode);
if ( trapnr == TRAP_page_fault )
- v->arch.hvm_vmx.cpu_cr2 = cr2;
+ v->arch.hvm_vcpu.guest_cr[2] = cr2;
}
static void vmx_update_vtpr(struct vcpu *v, unsigned long value)
@@ -1200,17 +1156,13 @@ static struct hvm_function_table vmx_function_table = {
.load_cpu_guest_regs = vmx_load_cpu_guest_regs,
.save_cpu_ctxt = vmx_save_vmcs_ctxt,
.load_cpu_ctxt = vmx_load_vmcs_ctxt,
- .paging_enabled = vmx_paging_enabled,
- .long_mode_enabled = vmx_long_mode_enabled,
- .pae_enabled = vmx_pae_enabled,
- .nx_enabled = vmx_nx_enabled,
.interrupts_enabled = vmx_interrupts_enabled,
.guest_x86_mode = vmx_guest_x86_mode,
- .get_guest_ctrl_reg = vmx_get_ctrl_reg,
.get_segment_base = vmx_get_segment_base,
.get_segment_register = vmx_get_segment_register,
.update_host_cr3 = vmx_update_host_cr3,
- .update_guest_cr3 = vmx_update_guest_cr3,
+ .update_guest_cr = vmx_update_guest_cr,
+ .update_guest_efer = vmx_update_guest_efer,
.flush_guest_tlbs = vmx_flush_guest_tlbs,
.update_vtpr = vmx_update_vtpr,
.stts = vmx_stts,
@@ -1315,10 +1267,10 @@ static void vmx_do_no_device_fault(void)
__vm_clear_bit(EXCEPTION_BITMAP, TRAP_no_device);
/* Disable TS in guest CR0 unless the guest wants the exception too. */
- if ( !(v->arch.hvm_vmx.cpu_shadow_cr0 & X86_CR0_TS) )
+ if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
{
- v->arch.hvm_vmx.cpu_cr0 &= ~X86_CR0_TS;
- __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
+ v->arch.hvm_vcpu.hw_cr[0] &= ~X86_CR0_TS;
+ __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]);
}
}
@@ -1773,7 +1725,7 @@ static void vmx_do_str_pio(unsigned long exit_qualification,
sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
ar_bytes = __vmread(GUEST_CS_AR_BYTES);
- if ( vmx_long_mode_enabled(current) &&
+ if ( hvm_long_mode_enabled(current) &&
(ar_bytes & X86_SEG_AR_CS_LM_ACTIVE) )
long_mode = 1;
addr = __vmread(GUEST_LINEAR_ADDRESS);
@@ -1900,9 +1852,9 @@ static void vmx_world_save(struct vcpu *v, struct vmx_assist_context *c)
c->esp = __vmread(GUEST_RSP);
c->eflags = __vmread(GUEST_RFLAGS) & ~X86_EFLAGS_RF;
- c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
- c->cr3 = v->arch.hvm_vmx.cpu_cr3;
- c->cr4 = v->arch.hvm_vmx.cpu_shadow_cr4;
+ c->cr0 = v->arch.hvm_vcpu.guest_cr[0];
+ c->cr3 = v->arch.hvm_vcpu.guest_cr[3];
+ c->cr4 = v->arch.hvm_vcpu.guest_cr[4];
c->idtr_limit = __vmread(GUEST_IDTR_LIMIT);
c->idtr_base = __vmread(GUEST_IDTR_BASE);
@@ -1959,30 +1911,15 @@ static int vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
__vmwrite(GUEST_RSP, c->esp);
__vmwrite(GUEST_RFLAGS, c->eflags);
- v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
- __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
+ v->arch.hvm_vcpu.guest_cr[0] = c->cr0;
+ v->arch.hvm_vcpu.guest_cr[3] = c->cr3;
+ v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
+ vmx_update_guest_cr(v, 0);
+ vmx_update_guest_cr(v, 4);
- if ( !vmx_paging_enabled(v) )
- goto skip_cr3;
-
- if ( c->cr3 == v->arch.hvm_vmx.cpu_cr3 )
+ if ( hvm_paging_enabled(v) )
{
- /*
- * This is simple TLB flush, implying the guest has
- * removed some translation or changed page attributes.
- * We simply invalidate the shadow.
- */
- mfn = get_mfn_from_gpfn(c->cr3 >> PAGE_SHIFT);
- if ( mfn != pagetable_get_pfn(v->arch.guest_table) )
- goto bad_cr3;
- }
- else
- {
- /*
- * If different, make a shadow. Check if the PDBR is valid
- * first.
- */
- HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %x", c->cr3);
+ HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 = %x", c->cr3);
mfn = get_mfn_from_gpfn(c->cr3 >> PAGE_SHIFT);
if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
goto bad_cr3;
@@ -1990,19 +1927,8 @@ static int vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
v->arch.guest_table = pagetable_from_pfn(mfn);
if ( old_base_mfn )
put_page(mfn_to_page(old_base_mfn));
- v->arch.hvm_vmx.cpu_cr3 = c->cr3;
}
- skip_cr3:
- if ( !vmx_paging_enabled(v) )
- HVM_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
- else
- HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
-
- __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
- v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
- __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
-
__vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
__vmwrite(GUEST_IDTR_BASE, c->idtr_base);
@@ -2149,33 +2075,11 @@ static int vmx_assist(struct vcpu *v, int mode)
static int vmx_set_cr0(unsigned long value)
{
struct vcpu *v = current;
- unsigned long mfn;
unsigned long eip;
- int paging_enabled;
- unsigned long old_cr0;
- unsigned long old_base_mfn;
+ int rc = hvm_set_cr0(value);
- HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
-
- if ( (u32)value != value )
- {
- HVM_DBG_LOG(DBG_LEVEL_1,
- "Guest attempts to set upper 32 bits in CR0: %lx",
- value);
- vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
+ if ( rc == 0 )
return 0;
- }
-
- value &= ~HVM_CR0_GUEST_RESERVED_BITS;
-
- /* ET is reserved and should be always be 1. */
- value |= X86_CR0_ET;
-
- if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PG )
- {
- vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
- return 0;
- }
/* TS cleared? Then initialise FPU now. */
if ( !(value & X86_CR0_TS) )
@@ -2184,88 +2088,13 @@ static int vmx_set_cr0(unsigned long value)
__vm_clear_bit(EXCEPTION_BITMAP, TRAP_no_device);
}
- old_cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
- paging_enabled = old_cr0 & X86_CR0_PG;
-
- v->arch.hvm_vmx.cpu_cr0 = (value | X86_CR0_PE | X86_CR0_PG
- | X86_CR0_NE | X86_CR0_WP);
- __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
-
- v->arch.hvm_vmx.cpu_shadow_cr0 = value;
- __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
-
- /* Trying to enable paging. */
- if ( (value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled )
- {
- if ( vmx_lme_is_set(v) && !vmx_long_mode_enabled(v) )
- {
- if ( !(v->arch.hvm_vmx.cpu_shadow_cr4 & X86_CR4_PAE) )
- {
- HVM_DBG_LOG(DBG_LEVEL_1, "Guest enabled paging "
- "with EFER.LME set but not CR4.PAE");
- vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
- return 0;
- }
-
- HVM_DBG_LOG(DBG_LEVEL_1, "Enabling long mode");
- vmx_enable_long_mode(v);
- }
-
- /*
- * The guest CR3 must be pointing to the guest physical.
- */
- mfn = get_mfn_from_gpfn(v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT);
- if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
- {
- gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n",
- v->arch.hvm_vmx.cpu_cr3, mfn);
- domain_crash(v->domain);
- return 0;
- }
-
- /*
- * Now arch.guest_table points to machine physical.
- */
- old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
- v->arch.guest_table = pagetable_from_pfn(mfn);
- if ( old_base_mfn )
- put_page(mfn_to_page(old_base_mfn));
-
- HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
- v->arch.hvm_vmx.cpu_cr3, mfn);
-
- paging_update_paging_modes(v);
- }
-
- /* Trying to disable paging. */
- if ( ((value & (X86_CR0_PE | X86_CR0_PG)) != (X86_CR0_PE | X86_CR0_PG)) &&
- paging_enabled )
- {
- /* When CR0.PG is cleared, LMA is cleared immediately. */
- if ( vmx_long_mode_enabled(v) )
- vmx_disable_long_mode(v);
-
- if ( v->arch.hvm_vmx.cpu_cr3 )
- {
- put_page(mfn_to_page(get_mfn_from_gpfn(
- v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)));
- v->arch.guest_table = pagetable_null();
- }
- }
-
/*
* VMX does not implement real-mode virtualization. We emulate
* real-mode by performing a world switch to VMXAssist whenever
* a partition disables the CR0.PE bit.
*/
- if ( (value & X86_CR0_PE) == 0 )
+ if ( !(value & X86_CR0_PE) )
{
- if ( value & X86_CR0_PG )
- {
- vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
- return 0;
- }
-
if ( vmx_assist(v, VMX_ASSIST_INVOKE) )
{
eip = __vmread(GUEST_RIP);
@@ -2286,8 +2115,6 @@ static int vmx_set_cr0(unsigned long value)
return 0; /* do not update eip! */
}
}
- else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
- paging_update_paging_modes(v);
return 1;
}
@@ -2316,12 +2143,9 @@ static int vmx_set_cr0(unsigned long value)
CASE_ ## T ## ET_REG(R15, r15)
#endif
-/*
- * Write to control registers
- */
static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
{
- unsigned long value, old_cr, old_base_mfn, mfn;
+ unsigned long value;
struct vcpu *v = current;
struct vlapic *vlapic = vcpu_vlapic(v);
@@ -2353,108 +2177,10 @@ static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
return vmx_set_cr0(value);
case 3:
- /*
- * If paging is not enabled yet, simply copy the value to CR3.
- */
- if ( !vmx_paging_enabled(v) )
- {
- v->arch.hvm_vmx.cpu_cr3 = value;
- break;
- }
-
- /*
- * We make a new one if the shadow does not exist.
- */
- if ( value == v->arch.hvm_vmx.cpu_cr3 ) {
- /*
- * This is simple TLB flush, implying the guest has
- * removed some translation or changed page attributes.
- * We simply invalidate the shadow.
- */
- mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
- if ( mfn != pagetable_get_pfn(v->arch.guest_table) )
- goto bad_cr3;
- paging_update_cr3(v);
- } else {
- /*
- * If different, make a shadow. Check if the PDBR is valid
- * first.
- */
- HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
- mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
- if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
- goto bad_cr3;
- old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
- v->arch.guest_table = pagetable_from_pfn(mfn);
- if ( old_base_mfn )
- put_page(mfn_to_page(old_base_mfn));
- v->arch.hvm_vmx.cpu_cr3 = value;
- update_cr3(v);
- HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
- }
- break;
-
- case 4: /* CR4 */
- old_cr = v->arch.hvm_vmx.cpu_shadow_cr4;
+ return hvm_set_cr3(value);
- if ( value & HVM_CR4_GUEST_RESERVED_BITS )
- {
- HVM_DBG_LOG(DBG_LEVEL_1,
- "Guest attempts to set reserved bit in CR4: %lx",
- value);
- vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
- return 0;
- }
-
- if ( (value & X86_CR4_PAE) && !(old_cr & X86_CR4_PAE) )
- {
- if ( vmx_pgbit_test(v) )
- {
-#if CONFIG_PAGING_LEVELS >= 3
- /* The guest is a 32-bit PAE guest. */
- unsigned long mfn, old_base_mfn;
- mfn = get_mfn_from_gpfn(v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT);
- if ( !mfn_valid(mfn) ||
- !get_page(mfn_to_page(mfn), v->domain) )
- goto bad_cr3;
-
- /*
- * Now arch.guest_table points to machine physical.
- */
- old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
- v->arch.guest_table = pagetable_from_pfn(mfn);
- if ( old_base_mfn )
- put_page(mfn_to_page(old_base_mfn));
-
- HVM_DBG_LOG(DBG_LEVEL_VMMU,
- "Update CR3 value = %lx, mfn = %lx",
- v->arch.hvm_vmx.cpu_cr3, mfn);
-#endif
- }
- }
- else if ( !(value & X86_CR4_PAE) )
- {
- if ( unlikely(vmx_long_mode_enabled(v)) )
- {
- HVM_DBG_LOG(DBG_LEVEL_1, "Guest cleared CR4.PAE while "
- "EFER.LMA is set");
- vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
- return 0;
- }
- }
-
- __vmwrite(GUEST_CR4, value | HVM_CR4_HOST_MASK);
- v->arch.hvm_vmx.cpu_shadow_cr4 = value;
- __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
-
- /*
- * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
- * all TLB entries except global entries.
- */
- if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )
- paging_update_paging_modes(v);
-
- break;
+ case 4:
+ return hvm_set_cr4(value);
case 8:
vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
@@ -2462,14 +2188,11 @@ static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
default:
gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
- domain_crash(v->domain);
- return 0;
+ goto exit_and_crash;
}
return 1;
- bad_cr3:
- gdprintk(XENLOG_ERR, "Invalid CR3\n");
exit_and_crash:
domain_crash(v->domain);
return 0;
@@ -2487,7 +2210,7 @@ static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
switch ( cr )
{
case 3:
- value = (unsigned long)v->arch.hvm_vmx.cpu_cr3;
+ value = (unsigned long)v->arch.hvm_vcpu.guest_cr[3];
break;
case 8:
value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
@@ -2530,7 +2253,8 @@ static int vmx_cr_access(unsigned long exit_qualification,
unsigned long value;
struct vcpu *v = current;
- switch ( exit_qualification & CONTROL_REG_ACCESS_TYPE ) {
+ switch ( exit_qualification & CONTROL_REG_ACCESS_TYPE )
+ {
case TYPE_MOV_TO_CR:
gp = exit_qualification & CONTROL_REG_ACCESS_REG;
cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
@@ -2545,14 +2269,14 @@ static int vmx_cr_access(unsigned long exit_qualification,
setup_fpu(v);
__vm_clear_bit(EXCEPTION_BITMAP, TRAP_no_device);
- v->arch.hvm_vmx.cpu_cr0 &= ~X86_CR0_TS; /* clear TS */
- __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
+ v->arch.hvm_vcpu.hw_cr[0] &= ~X86_CR0_TS; /* clear TS */
+ __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]);
- v->arch.hvm_vmx.cpu_shadow_cr0 &= ~X86_CR0_TS; /* clear TS */
- __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
+ v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS; /* clear TS */
+ __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]);
break;
case TYPE_LMSW:
- value = v->arch.hvm_vmx.cpu_shadow_cr0;
+ value = v->arch.hvm_vcpu.guest_cr[0];
value = (value & ~0xF) |
(((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF);
return vmx_set_cr0(value);
@@ -2943,7 +2667,7 @@ asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
break;
}
- v->arch.hvm_vmx.cpu_cr2 = exit_qualification;
+ v->arch.hvm_vcpu.guest_cr[2] = exit_qualification;
vmx_inject_hw_exception(v, TRAP_page_fault, regs->error_code);
break;
case TRAP_nmi:
diff --git a/xen/arch/x86/hvm/vmx/x86_32/exits.S b/xen/arch/x86/hvm/vmx/x86_32/exits.S
index ef24dce0ea..7f080bccd4 100644
--- a/xen/arch/x86/hvm/vmx/x86_32/exits.S
+++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S
@@ -74,7 +74,7 @@ ENTRY(vmx_asm_do_vmentry)
jnz vmx_process_softirqs
call vmx_intr_assist
- movl VCPU_vmx_cr2(%ebx),%eax
+ movl VCPU_hvm_guest_cr2(%ebx),%eax
movl %eax,%cr2
call vmx_trace_vmentry
diff --git a/xen/arch/x86/hvm/vmx/x86_64/exits.S b/xen/arch/x86/hvm/vmx/x86_64/exits.S
index 0bebeb6ec3..5b2527d055 100644
--- a/xen/arch/x86/hvm/vmx/x86_64/exits.S
+++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S
@@ -88,7 +88,7 @@ ENTRY(vmx_asm_do_vmentry)
jnz vmx_process_softirqs
call vmx_intr_assist
- movq VCPU_vmx_cr2(%rbx),%rax
+ movq VCPU_hvm_guest_cr2(%rbx),%rax
movq %rax,%cr2
call vmx_trace_vmentry
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index f308dfb248..9af4f6ff5f 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -394,8 +394,8 @@ void write_ptbase(struct vcpu *v)
write_cr3(v->arch.cr3);
}
-/* Should be called after CR3 is updated.
- * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3.
+/*
+ * Should be called after CR3 is updated.
*
* Uses values found in vcpu->arch.(guest_table and guest_table_user), and
* for HVM guests, arch.monitor_table and hvm's guest CR3.
diff --git a/xen/arch/x86/mm/hap/guest_walk.c b/xen/arch/x86/mm/hap/guest_walk.c
index 77d9af2eeb..c8ea48e61c 100644
--- a/xen/arch/x86/mm/hap/guest_walk.c
+++ b/xen/arch/x86/mm/hap/guest_walk.c
@@ -62,7 +62,7 @@ unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
struct vcpu *v, unsigned long gva)
{
- unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3);
+ unsigned long gcr3 = v->arch.hvm_vcpu.guest_cr[3];
int mode = GUEST_PAGING_LEVELS;
int lev, index;
paddr_t gpa = 0;
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index cfb979ab21..63f1675659 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -603,48 +603,37 @@ static int hap_invlpg(struct vcpu *v, unsigned long va)
return 0;
}
-/*
- * HAP guests do not need to take any action on CR3 writes (they are still
- * intercepted, so that Xen's copy of the guest's CR3 can be kept in sync.)
- */
static void hap_update_cr3(struct vcpu *v, int do_locking)
{
+ v->arch.hvm_vcpu.hw_cr[3] = v->arch.hvm_vcpu.guest_cr[3];
+ hvm_update_guest_cr(v, 3);
}
static void hap_update_paging_modes(struct vcpu *v)
{
- struct domain *d;
+ struct domain *d = v->domain;
- d = v->domain;
hap_lock(d);
- /* update guest paging mode. Note that we rely on hvm functions to detect
- * guest's paging mode. So, make sure the shadow registers (CR0, CR4, EFER)
- * reflect guest's status correctly.
- */
- if ( hvm_paging_enabled(v) )
- {
- if ( hvm_long_mode_enabled(v) )
- v->arch.paging.mode = &hap_paging_long_mode;
- else if ( hvm_pae_enabled(v) )
- v->arch.paging.mode = &hap_paging_pae_mode;
- else
- v->arch.paging.mode = &hap_paging_protected_mode;
- }
- else
- {
- v->arch.paging.mode = &hap_paging_real_mode;
- }
+ v->arch.paging.mode =
+ !hvm_paging_enabled(v) ? &hap_paging_real_mode :
+ hvm_long_mode_enabled(v) ? &hap_paging_long_mode :
+ hvm_pae_enabled(v) ? &hap_paging_pae_mode :
+ &hap_paging_protected_mode;
- v->arch.paging.translate_enabled = !!hvm_paging_enabled(v);
+ v->arch.paging.translate_enabled = hvm_paging_enabled(v);
if ( pagetable_is_null(v->arch.monitor_table) )
{
mfn_t mmfn = hap_make_monitor_table(v);
v->arch.monitor_table = pagetable_from_mfn(mmfn);
make_cr3(v, mfn_x(mmfn));
+ hvm_update_host_cr3(v);
}
+ /* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */
+ hap_update_cr3(v, 0);
+
hap_unlock(d);
}
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index 7e53c23a36..54d1c484d6 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -2266,7 +2266,7 @@ static void sh_update_paging_modes(struct vcpu *v)
ASSERT(shadow_mode_translate(d));
ASSERT(shadow_mode_external(d));
- v->arch.paging.translate_enabled = !!hvm_paging_enabled(v);
+ v->arch.paging.translate_enabled = hvm_paging_enabled(v);
if ( !v->arch.paging.translate_enabled )
{
/* Set v->arch.guest_table to use the p2m map, and choose
@@ -2347,7 +2347,7 @@ static void sh_update_paging_modes(struct vcpu *v)
SHADOW_PRINTK("new paging mode: d=%u v=%u pe=%d g=%u s=%u "
"(was g=%u s=%u)\n",
d->domain_id, v->vcpu_id,
- is_hvm_domain(d) ? !!hvm_paging_enabled(v) : 1,
+ is_hvm_domain(d) ? hvm_paging_enabled(v) : 1,
v->arch.paging.mode->guest_levels,
v->arch.paging.mode->shadow.shadow_levels,
old_mode ? old_mode->guest_levels : 0,
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index 7c316ff31d..84857e43e1 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -175,7 +175,7 @@ guest_supports_superpages(struct vcpu *v)
/* The _PAGE_PSE bit must be honoured in HVM guests, whenever
* CR4.PSE is set or the guest is in PAE or long mode */
return (is_hvm_vcpu(v) && (GUEST_PAGING_LEVELS != 2
- || (hvm_get_guest_ctrl_reg(v, 4) & X86_CR4_PSE)));
+ || (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PSE)));
}
static inline int
@@ -3483,7 +3483,7 @@ sh_update_cr3(struct vcpu *v, int do_locking)
* Paravirtual guests should set v->arch.guest_table (and guest_table_user,
* if appropriate).
* HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works;
- * this function will call hvm_update_guest_cr3() to tell them where the
+ * this function will call hvm_update_guest_cr(v, 3) to tell them where the
* shadow tables are.
* If do_locking != 0, assume we are being called from outside the
* shadow code, and must take and release the shadow lock; otherwise
@@ -3525,7 +3525,7 @@ sh_update_cr3(struct vcpu *v, int do_locking)
// Is paging enabled on this vcpu?
if ( paging_vcpu_mode_translate(v) )
{
- gfn = _gfn(paddr_to_pfn(hvm_get_guest_ctrl_reg(v, 3)));
+ gfn = _gfn(paddr_to_pfn(v->arch.hvm_vcpu.guest_cr[3]));
gmfn = vcpu_gfn_to_mfn(v, gfn);
ASSERT(mfn_valid(gmfn));
ASSERT(pagetable_get_pfn(v->arch.guest_table) == mfn_x(gmfn));
@@ -3576,11 +3576,11 @@ sh_update_cr3(struct vcpu *v, int do_locking)
if ( shadow_mode_external(d) && paging_vcpu_mode_translate(v) )
/* Paging enabled: find where in the page the l3 table is */
- guest_idx = guest_index((void *)hvm_get_guest_ctrl_reg(v, 3));
- else
- /* Paging disabled or PV: l3 is at the start of a page */
- guest_idx = 0;
-
+ guest_idx = guest_index((void *)v->arch.hvm_vcpu.guest_cr[3]);
+ else
+ /* Paging disabled or PV: l3 is at the start of a page */
+ guest_idx = 0;
+
// Ignore the low 2 bits of guest_idx -- they are really just
// cache control.
guest_idx &= ~3;
@@ -3718,18 +3718,21 @@ sh_update_cr3(struct vcpu *v, int do_locking)
///
- /// v->arch.hvm_vcpu.hw_cr3
+ /// v->arch.hvm_vcpu.hw_cr[3]
///
if ( shadow_mode_external(d) )
{
ASSERT(is_hvm_domain(d));
#if SHADOW_PAGING_LEVELS == 3
/* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated */
- hvm_update_guest_cr3(v, virt_to_maddr(&v->arch.paging.shadow.l3table));
+ v->arch.hvm_vcpu.hw_cr[3] =
+ virt_to_maddr(&v->arch.paging.shadow.l3table);
#else
/* 2-on-2 or 4-on-4: Just use the shadow top-level directly */
- hvm_update_guest_cr3(v, pagetable_get_paddr(v->arch.shadow_table[0]));
+ v->arch.hvm_vcpu.hw_cr[3] =
+ pagetable_get_paddr(v->arch.shadow_table[0]);
#endif
+ hvm_update_guest_cr(v, 3);
}
/* Fix up the linear pagetable mappings */
diff --git a/xen/arch/x86/physdev.c b/xen/arch/x86/physdev.c
index 356e4079be..7b9bfedb51 100644
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -28,6 +28,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg)
{
int irq;
ret_t ret;
+ struct vcpu *v = current;
switch ( cmd )
{
@@ -36,13 +37,13 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg)
ret = -EFAULT;
if ( copy_from_guest(&eoi, arg, 1) != 0 )
break;
- ret = pirq_guest_eoi(current->domain, eoi.irq);
+ ret = pirq_guest_eoi(v->domain, eoi.irq);
break;
}
/* Legacy since 0x00030202. */
case PHYSDEVOP_IRQ_UNMASK_NOTIFY: {
- ret = pirq_guest_unmask(current->domain);
+ ret = pirq_guest_unmask(v->domain);
break;
}
@@ -70,7 +71,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg)
if ( copy_from_guest(&apic, arg, 1) != 0 )
break;
ret = -EPERM;
- if ( !IS_PRIV(current->domain) )
+ if ( !IS_PRIV(v->domain) )
break;
ret = ioapic_guest_read(apic.apic_physbase, apic.reg, &apic.value);
if ( copy_to_guest(arg, &apic, 1) != 0 )
@@ -84,7 +85,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg)
if ( copy_from_guest(&apic, arg, 1) != 0 )
break;
ret = -EPERM;
- if ( !IS_PRIV(current->domain) )
+ if ( !IS_PRIV(v->domain) )
break;
ret = ioapic_guest_write(apic.apic_physbase, apic.reg, apic.value);
break;
@@ -98,7 +99,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg)
break;
ret = -EPERM;
- if ( !IS_PRIV(current->domain) )
+ if ( !IS_PRIV(v->domain) )
break;
irq = irq_op.irq;
@@ -120,7 +121,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg)
if ( set_iopl.iopl > 3 )
break;
ret = 0;
- current->arch.iopl = set_iopl.iopl;
+ v->arch.iopl = set_iopl.iopl;
break;
}
@@ -135,11 +136,11 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg)
break;
ret = 0;
#ifndef COMPAT
- current->arch.iobmp = set_iobitmap.bitmap;
+ v->arch.iobmp = set_iobitmap.bitmap;
#else
- guest_from_compat_handle(current->arch.iobmp, set_iobitmap.bitmap);
+ guest_from_compat_handle(v->arch.iobmp, set_iobitmap.bitmap);
#endif
- current->arch.iobmp_limit = set_iobitmap.nr_ports;
+ v->arch.iobmp_limit = set_iobitmap.nr_ports;
break;
}
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index ba76243c47..7998612653 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -106,6 +106,8 @@ extern void init_IRQ(void);
extern void trap_init(void);
extern void early_time_init(void);
extern void early_cpu_init(void);
+extern void vesa_init(void);
+extern void vesa_mtrr_init(void);
struct tss_struct init_tss[NR_CPUS];
@@ -282,9 +284,28 @@ static void __init srat_detect_node(int cpu)
printk(KERN_INFO "CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
}
+/*
+ * Ensure a given physical memory range is present in the bootstrap mappings.
+ * Use superpage mappings to ensure that pagetable memory needn't be allocated.
+ */
+static void __init bootstrap_map(unsigned long start, unsigned long end)
+{
+ unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
+ start = start & ~mask;
+ end = (end + mask) & ~mask;
+ if ( end > BOOTSTRAP_DIRECTMAP_END )
+ panic("Cannot access memory beyond end of "
+ "bootstrap direct-map area\n");
+ map_pages_to_xen(
+ (unsigned long)maddr_to_bootstrap_virt(start),
+ start >> PAGE_SHIFT, (end-start) >> PAGE_SHIFT, PAGE_HYPERVISOR);
+}
+
static void __init move_memory(
unsigned long dst, unsigned long src_start, unsigned long src_end)
{
+ bootstrap_map(src_start, src_end);
+ bootstrap_map(dst, dst + src_end - src_start);
memmove(maddr_to_bootstrap_virt(dst),
maddr_to_bootstrap_virt(src_start),
src_end - src_start);
@@ -882,6 +903,7 @@ void __init __start_xen(unsigned long mbi_p)
#ifdef __x86_64__
init_xenheap_pages(xen_phys_start, __pa(&_start));
nr_pages += (__pa(&_start) - xen_phys_start) >> PAGE_SHIFT;
+ vesa_init();
#endif
xenheap_phys_start = xen_phys_start;
printk("Xen heap: %luMB (%lukB)\n",
@@ -947,6 +969,9 @@ void __init __start_xen(unsigned long mbi_p)
set_in_cr4(X86_CR4_OSFXSR);
if ( cpu_has_xmm )
set_in_cr4(X86_CR4_OSXMMEXCPT);
+#ifdef CONFIG_X86_64
+ vesa_mtrr_init();
+#endif
if ( opt_nosmp )
max_cpus = 0;
diff --git a/xen/arch/x86/string.c b/xen/arch/x86/string.c
index 745670b926..a649df9954 100644
--- a/xen/arch/x86/string.c
+++ b/xen/arch/x86/string.c
@@ -11,10 +11,18 @@
#undef memcpy
void *memcpy(void *dest, const void *src, size_t n)
{
- int d0, d1, d2;
+ long d0, d1, d2;
__asm__ __volatile__ (
- " rep ; movsl ; "
+#ifdef __i386__
+ " rep movsl ; "
+#else
+ " rep movsq ; "
+ " testb $4,%b4 ; "
+ " je 0f ; "
+ " movsl ; "
+ "0: ; "
+#endif
" testb $2,%b4 ; "
" je 1f ; "
" movsw ; "
@@ -23,7 +31,7 @@ void *memcpy(void *dest, const void *src, size_t n)
" movsb ; "
"2: "
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
- : "0" (n/4), "q" (n), "1" (dest), "2" (src)
+ : "0" (n/sizeof(long)), "q" (n), "1" (dest), "2" (src)
: "memory");
return dest;
@@ -32,10 +40,10 @@ void *memcpy(void *dest, const void *src, size_t n)
#undef memset
void *memset(void *s, int c, size_t n)
{
- int d0, d1;
+ long d0, d1;
__asm__ __volatile__ (
- "rep ; stosb"
+ "rep stosb"
: "=&c" (d0), "=&D" (d1)
: "a" (c), "1" (s), "0" (n)
: "memory");
@@ -46,14 +54,14 @@ void *memset(void *s, int c, size_t n)
#undef memmove
void *memmove(void *dest, const void *src, size_t n)
{
- int d0, d1, d2;
+ long d0, d1, d2;
if ( dest < src )
return memcpy(dest, src, n);
__asm__ __volatile__ (
" std ; "
- " rep ; movsb ; "
+ " rep movsb ; "
" cld "
: "=&c" (d0), "=&S" (d1), "=&D" (d2)
: "0" (n), "1" (n-1+(const char *)src), "2" (n-1+(char *)dest)
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index d42a168dbd..f89e169108 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1219,7 +1219,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
unsigned long code_base, code_limit;
char io_emul_stub[16];
void (*io_emul)(struct cpu_user_regs *) __attribute__((__regparm__(1)));
- u32 l, h;
+ u32 l, h, eax, edx;
if ( !read_descriptor(regs->cs, v, regs,
&code_base, &code_limit, &ar,
@@ -1696,43 +1696,43 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
break;
case 0x30: /* WRMSR */
+ eax = regs->eax;
+ edx = regs->edx;
+ res = ((u64)edx << 32) | eax;
switch ( regs->ecx )
{
#ifdef CONFIG_X86_64
case MSR_FS_BASE:
if ( is_pv_32on64_vcpu(v) )
goto fail;
- if ( wrmsr_safe(MSR_FS_BASE, regs->eax, regs->edx) )
+ if ( wrmsr_safe(MSR_FS_BASE, eax, edx) )
goto fail;
- v->arch.guest_context.fs_base =
- ((u64)regs->edx << 32) | regs->eax;
+ v->arch.guest_context.fs_base = res;
break;
case MSR_GS_BASE:
if ( is_pv_32on64_vcpu(v) )
goto fail;
- if ( wrmsr_safe(MSR_GS_BASE, regs->eax, regs->edx) )
+ if ( wrmsr_safe(MSR_GS_BASE, eax, edx) )
goto fail;
- v->arch.guest_context.gs_base_kernel =
- ((u64)regs->edx << 32) | regs->eax;
+ v->arch.guest_context.gs_base_kernel = res;
break;
case MSR_SHADOW_GS_BASE:
if ( is_pv_32on64_vcpu(v) )
goto fail;
- if ( wrmsr_safe(MSR_SHADOW_GS_BASE, regs->eax, regs->edx) )
+ if ( wrmsr_safe(MSR_SHADOW_GS_BASE, eax, edx) )
goto fail;
- v->arch.guest_context.gs_base_user =
- ((u64)regs->edx << 32) | regs->eax;
+ v->arch.guest_context.gs_base_user = res;
break;
#endif
default:
- if ( wrmsr_hypervisor_regs(regs->ecx, regs->eax, regs->edx) )
+ if ( wrmsr_hypervisor_regs(regs->ecx, eax, edx) )
break;
if ( (rdmsr_safe(regs->ecx, l, h) != 0) ||
- (regs->eax != l) || (regs->edx != h) )
+ (eax != l) || (edx != h) )
gdprintk(XENLOG_WARNING, "Domain attempted WRMSR %p from "
- "%08x:%08x to %08lx:%08lx.\n",
- _p(regs->ecx), h, l, (long)regs->edx, (long)regs->eax);
+ "%08x:%08x to %08x:%08x.\n",
+ _p(regs->ecx), h, l, edx, eax);
break;
}
break;
diff --git a/xen/arch/x86/x86_32/asm-offsets.c b/xen/arch/x86/x86_32/asm-offsets.c
index 2d6b47de2c..fc276245c7 100644
--- a/xen/arch/x86/x86_32/asm-offsets.c
+++ b/xen/arch/x86/x86_32/asm-offsets.c
@@ -85,7 +85,7 @@ void __dummy__(void)
BLANK();
OFFSET(VCPU_vmx_launched, struct vcpu, arch.hvm_vmx.launched);
- OFFSET(VCPU_vmx_cr2, struct vcpu, arch.hvm_vmx.cpu_cr2);
+ OFFSET(VCPU_hvm_guest_cr2, struct vcpu, arch.hvm_vcpu.guest_cr[2]);
BLANK();
OFFSET(VMCB_rax, struct vmcb_struct, rax);
diff --git a/xen/arch/x86/x86_32/traps.c b/xen/arch/x86/x86_32/traps.c
index 5b391d60c1..813283b285 100644
--- a/xen/arch/x86/x86_32/traps.c
+++ b/xen/arch/x86/x86_32/traps.c
@@ -172,6 +172,7 @@ asmlinkage void do_double_fault(void)
unsigned long do_iret(void)
{
struct cpu_user_regs *regs = guest_cpu_user_regs();
+ struct vcpu *v = current;
u32 eflags;
/* Check worst-case stack frame for overlap with Xen protected area. */
@@ -215,10 +216,10 @@ unsigned long do_iret(void)
}
/* No longer in NMI context. */
- current->nmi_masked = 0;
+ v->nmi_masked = 0;
/* Restore upcall mask from supplied EFLAGS.IF. */
- current->vcpu_info->evtchn_upcall_mask = !(eflags & X86_EFLAGS_IF);
+ vcpu_info(v, evtchn_upcall_mask) = !(eflags & X86_EFLAGS_IF);
/*
* The hypercall exit path will overwrite EAX with this return
@@ -228,7 +229,7 @@ unsigned long do_iret(void)
exit_and_crash:
gdprintk(XENLOG_ERR, "Fatal error\n");
- domain_crash(current->domain);
+ domain_crash(v->domain);
return 0;
}
diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
index 836ce6a054..eb3f623351 100644
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -88,7 +88,7 @@ void __dummy__(void)
BLANK();
OFFSET(VCPU_vmx_launched, struct vcpu, arch.hvm_vmx.launched);
- OFFSET(VCPU_vmx_cr2, struct vcpu, arch.hvm_vmx.cpu_cr2);
+ OFFSET(VCPU_hvm_guest_cr2, struct vcpu, arch.hvm_vcpu.guest_cr[2]);
BLANK();
OFFSET(DOMAIN_is_32bit_pv, struct domain, arch.is_32bit_pv);
diff --git a/xen/arch/x86/x86_64/compat/traps.c b/xen/arch/x86/x86_64/compat/traps.c
index df1e512a21..4c6298dd58 100644
--- a/xen/arch/x86/x86_64/compat/traps.c
+++ b/xen/arch/x86/x86_64/compat/traps.c
@@ -37,6 +37,7 @@ void compat_show_guest_stack(struct cpu_user_regs *regs, int debug_stack_lines)
unsigned int compat_iret(void)
{
struct cpu_user_regs *regs = guest_cpu_user_regs();
+ struct vcpu *v = current;
u32 eflags;
/* Trim stack pointer to 32 bits. */
@@ -70,7 +71,7 @@ unsigned int compat_iret(void)
* mode frames).
*/
const struct trap_info *ti;
- u32 x, ksp = current->arch.guest_context.kernel_sp - 40;
+ u32 x, ksp = v->arch.guest_context.kernel_sp - 40;
unsigned int i;
int rc = 0;
@@ -95,9 +96,9 @@ unsigned int compat_iret(void)
if ( rc )
goto exit_and_crash;
regs->_esp = ksp;
- regs->ss = current->arch.guest_context.kernel_ss;
+ regs->ss = v->arch.guest_context.kernel_ss;
- ti = &current->arch.guest_context.trap_ctxt[13];
+ ti = &v->arch.guest_context.trap_ctxt[13];
if ( TI_GET_IF(ti) )
eflags &= ~X86_EFLAGS_IF;
regs->_eflags = eflags & ~(X86_EFLAGS_VM|X86_EFLAGS_RF|
@@ -121,10 +122,10 @@ unsigned int compat_iret(void)
regs->_esp += 16;
/* No longer in NMI context. */
- current->nmi_masked = 0;
+ v->nmi_masked = 0;
/* Restore upcall mask from supplied EFLAGS.IF. */
- vcpu_info(current, evtchn_upcall_mask) = !(eflags & X86_EFLAGS_IF);
+ vcpu_info(v, evtchn_upcall_mask) = !(eflags & X86_EFLAGS_IF);
/*
* The hypercall exit path will overwrite EAX with this return
@@ -134,11 +135,12 @@ unsigned int compat_iret(void)
exit_and_crash:
gdprintk(XENLOG_ERR, "Fatal error\n");
- domain_crash(current->domain);
+ domain_crash(v->domain);
return 0;
}
-static long compat_register_guest_callback(struct compat_callback_register *reg)
+static long compat_register_guest_callback(
+ struct compat_callback_register *reg)
{
long ret = 0;
struct vcpu *v = current;
@@ -175,7 +177,8 @@ static long compat_register_guest_callback(struct compat_callback_register *reg)
return ret;
}
-static long compat_unregister_guest_callback(struct compat_callback_unregister *unreg)
+static long compat_unregister_guest_callback(
+ struct compat_callback_unregister *unreg)
{
long ret;
diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c
index ad4577ad55..1b8b0ad496 100644
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -235,10 +235,10 @@ unsigned long do_iret(void)
}
/* No longer in NMI context. */
- current->nmi_masked = 0;
+ v->nmi_masked = 0;
/* Restore upcall mask from supplied EFLAGS.IF. */
- vcpu_info(current, evtchn_upcall_mask) = !(iret_saved.rflags & EF_IE);
+ vcpu_info(v, evtchn_upcall_mask) = !(iret_saved.rflags & EF_IE);
/* Saved %rax gets written back to regs->rax in entry.S. */
return iret_saved.rax;
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index 09a1e84d98..5d29667b7c 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -463,19 +463,13 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
case XEN_DOMCTL_getdomaininfo:
{
struct domain *d;
- domid_t dom;
-
- dom = op->domain;
- if ( dom == DOMID_SELF )
- dom = current->domain->domain_id;
+ domid_t dom = op->domain;
rcu_read_lock(&domlist_read_lock);
for_each_domain ( d )
- {
if ( d->domain_id >= dom )
break;
- }
if ( d == NULL )
{
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 854887a484..04a7b1838c 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -54,21 +54,14 @@ boolean_param("bootscrub", opt_bootscrub);
/*
* Bit width of the DMA heap.
*/
-static unsigned int dma_bitsize = CONFIG_DMA_BITSIZE;
-static unsigned long max_dma_mfn = (1UL<<(CONFIG_DMA_BITSIZE-PAGE_SHIFT))-1;
+static unsigned int dma_bitsize = CONFIG_DMA_BITSIZE;
static void __init parse_dma_bits(char *s)
{
unsigned int v = simple_strtol(s, NULL, 0);
if ( v >= (BITS_PER_LONG + PAGE_SHIFT) )
- {
dma_bitsize = BITS_PER_LONG + PAGE_SHIFT;
- max_dma_mfn = ~0UL;
- }
else if ( v > PAGE_SHIFT + 1 )
- {
dma_bitsize = v;
- max_dma_mfn = (1UL << (dma_bitsize - PAGE_SHIFT)) - 1;
- }
else
printk("Invalid dma_bits value of %u ignored.\n", v);
}
diff --git a/xen/common/xencomm.c b/xen/common/xencomm.c
index 8b7e502869..9aa0777f89 100644
--- a/xen/common/xencomm.c
+++ b/xen/common/xencomm.c
@@ -26,35 +26,36 @@
#include <public/xen.h>
#include <public/xencomm.h>
-
#undef DEBUG
#ifdef DEBUG
-static int xencomm_debug = 1; /* extremely verbose */
+#define xc_dprintk(f, a...) printk("[xencomm]" f , ## a)
#else
-#define xencomm_debug 0
+#define xc_dprintk(f, a...) ((void)0)
#endif
-static unsigned long
-xencomm_inline_from_guest(void *to, const void *from, unsigned int n,
- unsigned int skip)
+static void*
+xencomm_maddr_to_vaddr(unsigned long maddr)
{
- unsigned long src_paddr = xencomm_inline_addr(from);
+ return maddr ? maddr_to_virt(maddr) : NULL;
+}
- src_paddr += skip;
+static unsigned long
+xencomm_inline_from_guest(
+ void *to, const void *from, unsigned int n, unsigned int skip)
+{
+ unsigned long src_paddr = xencomm_inline_addr(from) + skip;
- while (n > 0) {
- unsigned int chunksz;
+ while ( n > 0 )
+ {
+ unsigned int chunksz, bytes;
unsigned long src_maddr;
- unsigned int bytes;
chunksz = PAGE_SIZE - (src_paddr % PAGE_SIZE);
-
- bytes = min(chunksz, n);
+ bytes = min(chunksz, n);
src_maddr = paddr_to_maddr(src_paddr);
- if (xencomm_debug)
- printk("%lx[%d] -> %lx\n", src_maddr, bytes, (unsigned long)to);
- memcpy(to, (void *)src_maddr, bytes);
+ xc_dprintk("%lx[%d] -> %lx\n", src_maddr, bytes, (unsigned long)to);
+ memcpy(to, maddr_to_virt(src_maddr), bytes);
src_paddr += bytes;
to += bytes;
n -= bytes;
@@ -77,36 +78,40 @@ xencomm_inline_from_guest(void *to, const void *from, unsigned int n,
* On success, this will be zero.
*/
unsigned long
-xencomm_copy_from_guest(void *to, const void *from, unsigned int n,
- unsigned int skip)
+xencomm_copy_from_guest(
+ void *to, const void *from, unsigned int n, unsigned int skip)
{
struct xencomm_desc *desc;
unsigned int from_pos = 0;
unsigned int to_pos = 0;
unsigned int i = 0;
- if (xencomm_is_inline(from))
+ if ( xencomm_is_inline(from) )
return xencomm_inline_from_guest(to, from, n, skip);
- /* first we need to access the descriptor */
- desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)from);
- if (desc == NULL)
+ /* First we need to access the descriptor. */
+ desc = (struct xencomm_desc *)
+ xencomm_maddr_to_vaddr(paddr_to_maddr((unsigned long)from));
+ if ( desc == NULL )
return n;
- if (desc->magic != XENCOMM_MAGIC) {
+ if ( desc->magic != XENCOMM_MAGIC )
+ {
printk("%s: error: %p magic was 0x%x\n",
__func__, desc, desc->magic);
return n;
}
- /* iterate through the descriptor, copying up to a page at a time */
- while ((to_pos < n) && (i < desc->nr_addrs)) {
+ /* Iterate through the descriptor, copying up to a page at a time. */
+ while ( (to_pos < n) && (i < desc->nr_addrs) )
+ {
unsigned long src_paddr = desc->address[i];
unsigned int pgoffset;
unsigned int chunksz;
unsigned int chunk_skip;
- if (src_paddr == XENCOMM_INVALID) {
+ if ( src_paddr == XENCOMM_INVALID )
+ {
i++;
continue;
}
@@ -119,18 +124,18 @@ xencomm_copy_from_guest(void *to, const void *from, unsigned int n,
chunksz -= chunk_skip;
skip -= chunk_skip;
- if (skip == 0 && chunksz > 0) {
+ if ( (skip == 0) && (chunksz > 0) )
+ {
unsigned long src_maddr;
unsigned long dest = (unsigned long)to + to_pos;
unsigned int bytes = min(chunksz, n - to_pos);
src_maddr = paddr_to_maddr(src_paddr + chunk_skip);
- if (src_maddr == 0)
+ if ( src_maddr == 0 )
return n - to_pos;
- if (xencomm_debug)
- printk("%lx[%d] -> %lx\n", src_maddr, bytes, dest);
- memcpy((void *)dest, (void *)src_maddr, bytes);
+ xc_dprintk("%lx[%d] -> %lx\n", src_maddr, bytes, dest);
+ memcpy((void *)dest, maddr_to_virt(src_maddr), bytes);
from_pos += bytes;
to_pos += bytes;
}
@@ -142,32 +147,28 @@ xencomm_copy_from_guest(void *to, const void *from, unsigned int n,
}
static unsigned long
-xencomm_inline_to_guest(void *to, const void *from, unsigned int n,
- unsigned int skip)
+xencomm_inline_to_guest(
+ void *to, const void *from, unsigned int n, unsigned int skip)
{
- unsigned long dest_paddr = xencomm_inline_addr(to);
+ unsigned long dest_paddr = xencomm_inline_addr(to) + skip;
- dest_paddr += skip;
-
- while (n > 0) {
- unsigned int chunksz;
+ while ( n > 0 )
+ {
+ unsigned int chunksz, bytes;
unsigned long dest_maddr;
- unsigned int bytes;
chunksz = PAGE_SIZE - (dest_paddr % PAGE_SIZE);
-
- bytes = min(chunksz, n);
+ bytes = min(chunksz, n);
dest_maddr = paddr_to_maddr(dest_paddr);
- if (xencomm_debug)
- printk("%lx[%d] -> %lx\n", (unsigned long)from, bytes, dest_maddr);
- memcpy((void *)dest_maddr, (void *)from, bytes);
+ xc_dprintk("%lx[%d] -> %lx\n", (unsigned long)from, bytes, dest_maddr);
+ memcpy(maddr_to_virt(dest_maddr), (void *)from, bytes);
dest_paddr += bytes;
from += bytes;
n -= bytes;
}
- /* Always successful. */
+ /* Always successful. */
return 0;
}
@@ -184,35 +185,37 @@ xencomm_inline_to_guest(void *to, const void *from, unsigned int n,
* On success, this will be zero.
*/
unsigned long
-xencomm_copy_to_guest(void *to, const void *from, unsigned int n,
- unsigned int skip)
+xencomm_copy_to_guest(
+ void *to, const void *from, unsigned int n, unsigned int skip)
{
struct xencomm_desc *desc;
unsigned int from_pos = 0;
unsigned int to_pos = 0;
unsigned int i = 0;
- if (xencomm_is_inline(to))
+ if ( xencomm_is_inline(to) )
return xencomm_inline_to_guest(to, from, n, skip);
- /* first we need to access the descriptor */
- desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)to);
- if (desc == NULL)
+ /* First we need to access the descriptor. */
+ desc = (struct xencomm_desc *)
+ xencomm_maddr_to_vaddr(paddr_to_maddr((unsigned long)to));
+ if ( desc == NULL )
return n;
- if (desc->magic != XENCOMM_MAGIC) {
+ if ( desc->magic != XENCOMM_MAGIC )
+ {
printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic);
return n;
}
- /* iterate through the descriptor, copying up to a page at a time */
- while ((from_pos < n) && (i < desc->nr_addrs)) {
+ /* Iterate through the descriptor, copying up to a page at a time. */
+ while ( (from_pos < n) && (i < desc->nr_addrs) )
+ {
unsigned long dest_paddr = desc->address[i];
- unsigned int pgoffset;
- unsigned int chunksz;
- unsigned int chunk_skip;
+ unsigned int pgoffset, chunksz, chunk_skip;
- if (dest_paddr == XENCOMM_INVALID) {
+ if ( dest_paddr == XENCOMM_INVALID )
+ {
i++;
continue;
}
@@ -225,18 +228,18 @@ xencomm_copy_to_guest(void *to, const void *from, unsigned int n,
chunksz -= chunk_skip;
skip -= chunk_skip;
- if (skip == 0 && chunksz > 0) {
+ if ( (skip == 0) && (chunksz > 0) )
+ {
unsigned long dest_maddr;
unsigned long source = (unsigned long)from + from_pos;
unsigned int bytes = min(chunksz, n - from_pos);
dest_maddr = paddr_to_maddr(dest_paddr + chunk_skip);
- if (dest_maddr == 0)
- return -1;
+ if ( dest_maddr == 0 )
+ return n - from_pos;
- if (xencomm_debug)
- printk("%lx[%d] -> %lx\n", source, bytes, dest_maddr);
- memcpy((void *)dest_maddr, (void *)source, bytes);
+ xc_dprintk("%lx[%d] -> %lx\n", source, bytes, dest_maddr);
+ memcpy(maddr_to_virt(dest_maddr), (void *)source, bytes);
from_pos += bytes;
to_pos += bytes;
}
@@ -260,38 +263,46 @@ int xencomm_add_offset(void **handle, unsigned int bytes)
struct xencomm_desc *desc;
int i = 0;
- if (xencomm_is_inline(*handle))
+ if ( xencomm_is_inline(*handle) )
return xencomm_inline_add_offset(handle, bytes);
- /* first we need to access the descriptor */
- desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)*handle);
- if (desc == NULL)
+ /* First we need to access the descriptor. */
+ desc = (struct xencomm_desc *)
+ xencomm_maddr_to_vaddr(paddr_to_maddr((unsigned long)*handle));
+ if ( desc == NULL )
return -1;
- if (desc->magic != XENCOMM_MAGIC) {
+ if ( desc->magic != XENCOMM_MAGIC )
+ {
printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic);
return -1;
}
- /* iterate through the descriptor incrementing addresses */
- while ((bytes > 0) && (i < desc->nr_addrs)) {
+ /* Iterate through the descriptor incrementing addresses. */
+ while ( (bytes > 0) && (i < desc->nr_addrs) )
+ {
unsigned long dest_paddr = desc->address[i];
- unsigned int pgoffset;
- unsigned int chunksz;
- unsigned int chunk_skip;
+ unsigned int pgoffset, chunksz, chunk_skip;
+
+ if ( dest_paddr == XENCOMM_INVALID )
+ {
+ i++;
+ continue;
+ }
pgoffset = dest_paddr % PAGE_SIZE;
chunksz = PAGE_SIZE - pgoffset;
chunk_skip = min(chunksz, bytes);
- if (chunk_skip == chunksz) {
- /* exhausted this page */
- desc->address[i] = XENCOMM_INVALID;
- } else {
+ if ( chunk_skip == chunksz )
+ desc->address[i] = XENCOMM_INVALID; /* exchausted this page */
+ else
desc->address[i] += chunk_skip;
- }
bytes -= chunk_skip;
+
+ i++;
}
+
return 0;
}
@@ -300,17 +311,17 @@ int xencomm_handle_is_null(void *handle)
struct xencomm_desc *desc;
int i;
- if (xencomm_is_inline(handle))
+ if ( xencomm_is_inline(handle) )
return xencomm_inline_addr(handle) == 0;
- desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)handle);
- if (desc == NULL)
+ desc = (struct xencomm_desc *)
+ xencomm_maddr_to_vaddr(paddr_to_maddr((unsigned long)handle));
+ if ( desc == NULL )
return 1;
- for (i = 0; i < desc->nr_addrs; i++)
- if (desc->address[i] != XENCOMM_INVALID)
+ for ( i = 0; i < desc->nr_addrs; i++ )
+ if ( desc->address[i] != XENCOMM_INVALID )
return 0;
return 1;
}
-
diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
index c3c982347f..de351265db 100644
--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -331,13 +331,11 @@ static long guest_console_write(XEN_GUEST_HANDLE(char) buffer, int count)
kbuf[kcount] = '\0';
sercon_puts(kbuf);
+ vga_puts(kbuf);
- for ( kptr = kbuf; *kptr != '\0'; kptr++ )
- {
- vga_putchar(*kptr);
- if ( opt_console_to_ring )
+ if ( opt_console_to_ring )
+ for ( kptr = kbuf; *kptr != '\0'; kptr++ )
putchar_console_ring(*kptr);
- }
if ( opt_console_to_ring )
send_guest_global_virq(dom0, VIRQ_CON_RING);
@@ -404,12 +402,10 @@ static void __putstr(const char *str)
int c;
sercon_puts(str);
+ vga_puts(str);
while ( (c = *str++) != '\0' )
- {
- vga_putchar(c);
putchar_console_ring(c);
- }
send_guest_global_virq(dom0, VIRQ_CON_RING);
}
diff --git a/xen/drivers/video/Makefile b/xen/drivers/video/Makefile
index c7c69194bf..5123cfe990 100644
--- a/xen/drivers/video/Makefile
+++ b/xen/drivers/video/Makefile
@@ -1,4 +1,8 @@
-obj-y += font_8x14.o
-obj-y += font_8x16.o
-obj-y += font_8x8.o
-obj-y += vga.o
+obj-y := vga.o
+obj-$(CONFIG_X86_64) += font_8x14.o
+obj-$(CONFIG_X86_64) += font_8x16.o
+obj-$(CONFIG_X86_64) += font_8x8.o
+obj-$(CONFIG_X86_64) += vesa.o
+
+# extra dependencies
+vesa.o: font.h
diff --git a/xen/drivers/video/vesa.c b/xen/drivers/video/vesa.c
new file mode 100644
index 0000000000..d12aa0361a
--- /dev/null
+++ b/xen/drivers/video/vesa.c
@@ -0,0 +1,307 @@
+/******************************************************************************
+ * vesa.c
+ *
+ * VESA linear frame buffer handling.
+ */
+
+#include <xen/config.h>
+#include <xen/compile.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/mm.h>
+#include <xen/errno.h>
+#include <xen/console.h>
+#include <xen/vga.h>
+#include "font.h"
+
+#define vlfb_info vga_console_info.u.vesa_lfb
+#define text_columns (vlfb_info.width / font->width)
+#define text_rows (vlfb_info.height / font->height)
+
+static void vesa_redraw_puts(const char *s);
+static void vesa_scroll_puts(const char *s);
+
+static unsigned char *lfb, *lbuf, *text_buf;
+static const struct font_desc *font;
+static bool_t vga_compat;
+static unsigned int pixel_on;
+static unsigned int xpos, ypos;
+
+static unsigned int vram_total;
+integer_param("vesa-ram", vram_total);
+
+static unsigned int vram_remap;
+integer_param("vesa-map", vram_remap);
+
+static int font_height;
+static void __init parse_font_height(const char *s)
+{
+ if ( simple_strtoul(s, &s, 10) == 8 && (*s++ == 'x') )
+ font_height = simple_strtoul(s, &s, 10);
+ if ( *s != '\0' )
+ font_height = 0;
+}
+custom_param("font", parse_font_height);
+
+void __init vesa_early_init(void)
+{
+ unsigned int vram_vmode;
+
+ /* XXX vga_compat = !(boot_video_info.capabilities & 2); */
+
+ if ( (vlfb_info.bits_per_pixel < 8) || (vlfb_info.bits_per_pixel > 32) )
+ return;
+
+ if ( font_height == 0 ) /* choose a sensible default */
+ font = ((vlfb_info.height <= 600) ? &font_vga_8x8 :
+ (vlfb_info.height <= 768) ? &font_vga_8x14 : &font_vga_8x16);
+ else if ( font_height <= 8 )
+ font = &font_vga_8x8;
+ else if ( font_height <= 14 )
+ font = &font_vga_8x14;
+ else
+ font = &font_vga_8x16;
+
+ /* vram_vmode -- that is the amount of memory needed for the
+ * used video mode, i.e. the minimum amount of
+ * memory we need. */
+ vram_vmode = vlfb_info.height * vlfb_info.bytes_per_line;
+
+ /* vram_total -- all video memory we have. Used for mtrr
+ * entries. */
+ vram_total = vram_total ? (vram_total << 20) : (vlfb_info.lfb_size << 16);
+ vram_total = max_t(unsigned int, vram_total, vram_vmode);
+
+ /* vram_remap -- the amount of video memory we are going to
+ * use for vesafb. With modern cards it is no
+ * option to simply use vram_total as that
+ * wastes plenty of kernel address space. */
+ vram_remap = (vram_remap ?
+ (vram_remap << 20) :
+ ((vram_vmode + (1 << L2_PAGETABLE_SHIFT) - 1) &
+ ~((1 << L2_PAGETABLE_SHIFT) - 1)));
+ vram_remap = max_t(unsigned int, vram_remap, vram_vmode);
+ vram_remap = min_t(unsigned int, vram_remap, vram_total);
+}
+
+void __init vesa_init(void)
+{
+ if ( !font )
+ goto fail;
+
+ lbuf = xmalloc_bytes(vlfb_info.bytes_per_line);
+ if ( !lbuf )
+ goto fail;
+
+ text_buf = xmalloc_bytes(text_columns * text_rows);
+ if ( !text_buf )
+ goto fail;
+
+ if ( map_pages_to_xen(IOREMAP_VIRT_START,
+ vlfb_info.lfb_base >> PAGE_SHIFT,
+ vram_remap >> PAGE_SHIFT,
+ PAGE_HYPERVISOR_NOCACHE) )
+ goto fail;
+
+ lfb = memset((void *)IOREMAP_VIRT_START, 0, vram_remap);
+ memset(text_buf, 0, text_columns * text_rows);
+
+ vga_puts = vesa_redraw_puts;
+
+ printk(XENLOG_INFO "vesafb: framebuffer at 0x%x, mapped to 0x%p, "
+ "using %uk, total %uk\n",
+ vlfb_info.lfb_base, lfb,
+ vram_remap >> 10, vram_total >> 10);
+ printk(XENLOG_INFO "vesafb: mode is %dx%dx%u, linelength=%d, font %ux%u\n",
+ vlfb_info.width, vlfb_info.height,
+ vlfb_info.bits_per_pixel, vlfb_info.bytes_per_line,
+ font->width, font->height);
+ printk(XENLOG_INFO "vesafb: %scolor: size=%d:%d:%d:%d, "
+ "shift=%d:%d:%d:%d\n",
+ vlfb_info.bits_per_pixel > 8 ? "True" :
+ vga_compat ? "Pseudo" : "Static Pseudo",
+ vlfb_info.rsvd_size, vlfb_info.red_size,
+ vlfb_info.green_size, vlfb_info.blue_size,
+ vlfb_info.rsvd_pos, vlfb_info.red_pos,
+ vlfb_info.green_pos, vlfb_info.blue_pos);
+
+ if ( vlfb_info.bits_per_pixel > 8 )
+ {
+ /* Light grey in truecolor. */
+ unsigned int grey = 0xaaaaaaaa;
+ pixel_on =
+ ((grey >> (32 - vlfb_info. red_size)) << vlfb_info. red_pos) |
+ ((grey >> (32 - vlfb_info.green_size)) << vlfb_info.green_pos) |
+ ((grey >> (32 - vlfb_info. blue_size)) << vlfb_info. blue_pos);
+ }
+ else
+ {
+ /* White(ish) in default pseudocolor palette. */
+ pixel_on = 7;
+ }
+
+ return;
+
+ fail:
+ xfree(lbuf);
+ xfree(text_buf);
+}
+
+void __init vesa_endboot(void)
+{
+ xpos = 0;
+ vga_puts = vesa_scroll_puts;
+}
+
+#if defined(CONFIG_X86)
+
+#include <asm/mtrr.h>
+
+static unsigned int vesa_mtrr;
+integer_param("vesa-mtrr", vesa_mtrr);
+
+void __init vesa_mtrr_init(void)
+{
+ static const int mtrr_types[] = {
+ 0, MTRR_TYPE_UNCACHABLE, MTRR_TYPE_WRBACK,
+ MTRR_TYPE_WRCOMB, MTRR_TYPE_WRTHROUGH };
+ unsigned int size_total;
+ int rc, type;
+
+ if ( !lfb || (vesa_mtrr == 0) || (vesa_mtrr >= ARRAY_SIZE(mtrr_types)) )
+ return;
+
+ type = mtrr_types[vesa_mtrr];
+ if ( !type )
+ return;
+
+ /* Find the largest power-of-two */
+ size_total = vram_total;
+ while ( size_total & (size_total - 1) )
+ size_total &= size_total - 1;
+
+ /* Try and find a power of two to add */
+ do {
+ rc = mtrr_add(vlfb_info.lfb_base, size_total, type, 1);
+ size_total >>= 1;
+ } while ( (size_total >= PAGE_SIZE) && (rc == -EINVAL) );
+}
+
+static void lfb_flush(void)
+{
+ if ( vesa_mtrr == 3 )
+ __asm__ __volatile__ ("sfence" : : : "memory");
+}
+
+#else /* !defined(CONFIG_X86) */
+
+#define lfb_flush() ((void)0)
+
+#endif
+
+/* Render one line of text to given linear framebuffer line. */
+static void vesa_show_line(
+ const unsigned char *text_line,
+ unsigned char *video_line,
+ unsigned int nr_chars)
+{
+ unsigned int i, j, b, bpp, pixel;
+
+ bpp = (vlfb_info.bits_per_pixel + 7) >> 3;
+
+ for ( i = 0; i < font->height; i++ )
+ {
+ unsigned char *ptr = lbuf;
+
+ for ( j = 0; j < nr_chars; j++ )
+ {
+ const unsigned char *bits = font->data;
+ bits += ((text_line[j] * font->height + i) *
+ ((font->width + 7) >> 3));
+ for ( b = font->width; b--; )
+ {
+ pixel = test_bit(b, bits) ? pixel_on : 0;
+ memcpy(ptr, &pixel, bpp);
+ ptr += bpp;
+ }
+ }
+
+ memset(ptr, 0, (vlfb_info.width - nr_chars * font->width) * bpp);
+ memcpy(video_line, lbuf, vlfb_info.width * bpp);
+ video_line += vlfb_info.bytes_per_line;
+ }
+}
+
+/* Fast mode which redraws all modified parts of a 2D text buffer. */
+static void vesa_redraw_puts(const char *s)
+{
+ unsigned int i, min_redraw_y = ypos;
+ char c;
+
+ /* Paste characters into text buffer. */
+ while ( (c = *s++) != '\0' )
+ {
+ if ( (c == '\n') || (xpos >= text_columns) )
+ {
+ if ( ++ypos >= text_rows )
+ {
+ min_redraw_y = 0;
+ ypos = text_rows - 1;
+ memmove(text_buf, text_buf + text_columns,
+ ypos * text_columns);
+ memset(text_buf + ypos * text_columns, 0, xpos);
+ }
+ xpos = 0;
+ }
+
+ if ( c != '\n' )
+ text_buf[xpos++ + ypos * text_columns] = c;
+ }
+
+ /* Render modified section of text buffer to VESA linear framebuffer. */
+ for ( i = min_redraw_y; i <= ypos; i++ )
+ vesa_show_line(text_buf + i * text_columns,
+ lfb + i * font->height * vlfb_info.bytes_per_line,
+ text_columns);
+
+ lfb_flush();
+}
+
+/* Slower line-based scroll mode which interacts better with dom0. */
+static void vesa_scroll_puts(const char *s)
+{
+ unsigned int i;
+ char c;
+
+ while ( (c = *s++) != '\0' )
+ {
+ if ( (c == '\n') || (xpos >= text_columns) )
+ {
+ unsigned int bytes = (vlfb_info.width *
+ ((vlfb_info.bits_per_pixel + 7) >> 3));
+ unsigned char *src = lfb + font->height * vlfb_info.bytes_per_line;
+ unsigned char *dst = lfb;
+
+ /* New line: scroll all previous rows up one line. */
+ for ( i = font->height; i < vlfb_info.height; i++ )
+ {
+ memcpy(dst, src, bytes);
+ src += vlfb_info.bytes_per_line;
+ dst += vlfb_info.bytes_per_line;
+ }
+
+ /* Render new line. */
+ vesa_show_line(
+ text_buf,
+ lfb + (text_rows-1) * font->height * vlfb_info.bytes_per_line,
+ xpos);
+
+ xpos = 0;
+ }
+
+ if ( c != '\n' )
+ text_buf[xpos++] = c;
+ }
+
+ lfb_flush();
+}
diff --git a/xen/drivers/video/vga.c b/xen/drivers/video/vga.c
index 7c9326e40c..44b7092091 100644
--- a/xen/drivers/video/vga.c
+++ b/xen/drivers/video/vga.c
@@ -10,23 +10,21 @@
#include <xen/lib.h>
#include <xen/mm.h>
#include <xen/errno.h>
-#include <xen/event.h>
-#include <xen/spinlock.h>
#include <xen/console.h>
#include <xen/vga.h>
#include <asm/io.h>
-#include "font.h"
/* Filled in by arch boot code. */
struct xen_vga_console_info vga_console_info;
-static int vgacon_enabled = 0;
-static int vgacon_keep = 0;
-/*static const struct font_desc *font;*/
-
-static int xpos, ypos;
+static int vgacon_keep;
+static unsigned int xpos, ypos;
static unsigned char *video;
+static void vga_text_puts(const char *s);
+static void vga_noop_puts(const char *s) {}
+void (*vga_puts)(const char *) = vga_noop_puts;
+
/*
* 'vga=<mode-specifier>[,keep]' where <mode-specifier> is one of:
*
@@ -55,10 +53,16 @@ static char opt_vga[30] = "";
string_param("vga", opt_vga);
/* VGA text-mode definitions. */
-#define COLUMNS vga_console_info.u.text_mode_3.columns
-#define LINES vga_console_info.u.text_mode_3.rows
+static unsigned int columns, lines;
#define ATTRIBUTE 7
-#define VIDEO_SIZE (COLUMNS * LINES * 2)
+
+#ifdef CONFIG_X86_64
+void vesa_early_init(void);
+void vesa_endboot(void);
+#else
+#define vesa_early_init() ((void)0)
+#define vesa_endboot() ((void)0)
+#endif
void __init vga_init(void)
{
@@ -76,77 +80,61 @@ void __init vga_init(void)
switch ( vga_console_info.video_type )
{
case XEN_VGATYPE_TEXT_MODE_3:
- if ( memory_is_conventional_ram(0xB8000) )
- return;
- video = ioremap(0xB8000, 0x8000);
- if ( video == NULL )
+ if ( memory_is_conventional_ram(0xB8000) ||
+ ((video = ioremap(0xB8000, 0x8000)) == NULL) )
return;
- /* Disable cursor. */
- outw(0x200a, 0x3d4);
- memset(video, 0, VIDEO_SIZE);
+ outw(0x200a, 0x3d4); /* disable cursor */
+ columns = vga_console_info.u.text_mode_3.columns;
+ lines = vga_console_info.u.text_mode_3.rows;
+ memset(video, 0, columns * lines * 2);
+ vga_puts = vga_text_puts;
break;
case XEN_VGATYPE_VESA_LFB:
-#if 0
- /* XXX Implement me! */
- video = ioremap(vga_console_info.u.vesa_lfb.lfb_base,
- vga_console_info.u.vesa_lfb.lfb_size);
- if ( video == NULL )
- return;
- memset(video, 0, vga_console_info.u.vesa_lfb.lfb_size);
+ vesa_early_init();
break;
-#else
- return;
-#endif
default:
memset(&vga_console_info, 0, sizeof(vga_console_info));
- return;
+ break;
}
-
- vgacon_enabled = 1;
}
void __init vga_endboot(void)
{
- if ( !vgacon_enabled )
+ if ( vga_puts == vga_noop_puts )
return;
printk("Xen is %s VGA console.\n",
vgacon_keep ? "keeping" : "relinquishing");
- vgacon_enabled = vgacon_keep;
-}
-
-
-static void put_newline(void)
-{
- xpos = 0;
- ypos++;
+ vesa_endboot();
- if ( ypos >= LINES )
- {
- ypos = LINES-1;
- memmove((char*)video,
- (char*)video + 2*COLUMNS, (LINES-1)*2*COLUMNS);
- memset((char*)video + (LINES-1)*2*COLUMNS, 0, 2*COLUMNS);
- }
+ if ( !vgacon_keep )
+ vga_puts = vga_noop_puts;
}
-void vga_putchar(int c)
+static void vga_text_puts(const char *s)
{
- if ( !vgacon_enabled )
- return;
+ char c;
- if ( c == '\n' )
- {
- put_newline();
- }
- else
+ while ( (c = *s++) != '\0' )
{
- if ( xpos >= COLUMNS )
- put_newline();
- video[(xpos + ypos * COLUMNS) * 2] = c & 0xFF;
- video[(xpos + ypos * COLUMNS) * 2 + 1] = ATTRIBUTE;
- ++xpos;
+ if ( (c == '\n') || (xpos >= columns) )
+ {
+ if ( ++ypos >= lines )
+ {
+ ypos = lines - 1;
+ memmove(video, video + 2 * columns, ypos * 2 * columns);
+ memset(video + ypos * 2 * columns, 0, 2 * xpos);
+ }
+ xpos = 0;
+ }
+
+ if ( c != '\n' )
+ {
+ video[(xpos + ypos * columns) * 2] = c;
+ video[(xpos + ypos * columns) * 2 + 1] = ATTRIBUTE;
+ xpos++;
+ }
}
}
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 7913110795..f9b95681b6 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -95,36 +95,27 @@ struct hvm_function_table {
/*
* Examine specifics of the guest state:
- * 1) determine whether paging is enabled,
- * 2) determine whether long mode is enabled,
- * 3) determine whether PAE paging is enabled,
- * 4) determine whether NX is enabled,
- * 5) determine whether interrupts are enabled or not,
- * 6) determine the mode the guest is running in,
- * 7) return the current guest control-register value
- * 8) return the current guest segment descriptor base
- * 9) return the current guest segment descriptor
+ * 1) determine whether interrupts are enabled or not
+ * 2) determine the mode the guest is running in
+ * 3) return the current guest segment descriptor base
+ * 4) return the current guest segment descriptor
*/
- int (*paging_enabled)(struct vcpu *v);
- int (*long_mode_enabled)(struct vcpu *v);
- int (*pae_enabled)(struct vcpu *v);
- int (*nx_enabled)(struct vcpu *v);
int (*interrupts_enabled)(struct vcpu *v, enum hvm_intack);
int (*guest_x86_mode)(struct vcpu *v);
- unsigned long (*get_guest_ctrl_reg)(struct vcpu *v, unsigned int num);
unsigned long (*get_segment_base)(struct vcpu *v, enum x86_segment seg);
void (*get_segment_register)(struct vcpu *v, enum x86_segment seg,
struct segment_register *reg);
/*
- * Re-set the value of CR3 that Xen runs on when handling VM exits
+ * Re-set the value of CR3 that Xen runs on when handling VM exits.
*/
void (*update_host_cr3)(struct vcpu *v);
/*
- * Called to inform HVM layer that a guest cr3 has changed
+ * Called to inform HVM layer that a guest CRn or EFER has changed.
*/
- void (*update_guest_cr3)(struct vcpu *v);
+ void (*update_guest_cr)(struct vcpu *v, unsigned int cr);
+ void (*update_guest_efer)(struct vcpu *v);
/*
* Called to ensure than all guest-specific mappings in a tagged TLB
@@ -189,41 +180,27 @@ hvm_load_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *r)
void hvm_set_guest_time(struct vcpu *v, u64 gtime);
u64 hvm_get_guest_time(struct vcpu *v);
-static inline int
-hvm_paging_enabled(struct vcpu *v)
-{
- return hvm_funcs.paging_enabled(v);
-}
+#define hvm_paging_enabled(v) \
+ (!!((v)->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG))
+#define hvm_pae_enabled(v) \
+ (hvm_paging_enabled(v) && ((v)->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PAE))
+#define hvm_nx_enabled(v) \
+ (!!((v)->arch.hvm_vcpu.guest_efer & EFER_NX))
#ifdef __x86_64__
-static inline int
-hvm_long_mode_enabled(struct vcpu *v)
-{
- return hvm_funcs.long_mode_enabled(v);
-}
+#define hvm_long_mode_enabled(v) \
+ ((v)->arch.hvm_vcpu.guest_efer & EFER_LMA)
#else
#define hvm_long_mode_enabled(v) (v,0)
#endif
static inline int
-hvm_pae_enabled(struct vcpu *v)
-{
- return hvm_funcs.pae_enabled(v);
-}
-
-static inline int
hvm_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
{
return hvm_funcs.interrupts_enabled(v, type);
}
static inline int
-hvm_nx_enabled(struct vcpu *v)
-{
- return hvm_funcs.nx_enabled(v);
-}
-
-static inline int
hvm_guest_x86_mode(struct vcpu *v)
{
return hvm_funcs.guest_x86_mode(v);
@@ -244,7 +221,15 @@ hvm_update_vtpr(struct vcpu *v, unsigned long value)
hvm_funcs.update_vtpr(v, value);
}
-void hvm_update_guest_cr3(struct vcpu *v, unsigned long guest_cr3);
+static inline void hvm_update_guest_cr(struct vcpu *v, unsigned int cr)
+{
+ hvm_funcs.update_guest_cr(v, cr);
+}
+
+static inline void hvm_update_guest_efer(struct vcpu *v)
+{
+ hvm_funcs.update_guest_efer(v);
+}
static inline void
hvm_flush_guest_tlbs(void)
@@ -257,12 +242,6 @@ void hvm_hypercall_page_initialise(struct domain *d,
void *hypercall_page);
static inline unsigned long
-hvm_get_guest_ctrl_reg(struct vcpu *v, unsigned int num)
-{
- return hvm_funcs.get_guest_ctrl_reg(v, num);
-}
-
-static inline unsigned long
hvm_get_segment_base(struct vcpu *v, enum x86_segment seg)
{
return hvm_funcs.get_segment_base(v, seg);
@@ -277,7 +256,6 @@ hvm_get_segment_register(struct vcpu *v, enum x86_segment seg,
void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
-void hvm_stts(struct vcpu *v);
void hvm_migrate_timers(struct vcpu *v);
void hvm_do_resume(struct vcpu *v);
diff --git a/xen/include/asm-x86/hvm/support.h b/xen/include/asm-x86/hvm/support.h
index b4d4c17e53..33288e3a7d 100644
--- a/xen/include/asm-x86/hvm/support.h
+++ b/xen/include/asm-x86/hvm/support.h
@@ -234,4 +234,8 @@ int hvm_do_hypercall(struct cpu_user_regs *pregs);
void hvm_hlt(unsigned long rflags);
void hvm_triple_fault(void);
+int hvm_set_cr0(unsigned long value);
+int hvm_set_cr3(unsigned long value);
+int hvm_set_cr4(unsigned long value);
+
#endif /* __ASM_X86_HVM_SUPPORT_H__ */
diff --git a/xen/include/asm-x86/hvm/svm/asid.h b/xen/include/asm-x86/hvm/svm/asid.h
index be5180e65a..1b679d05e4 100644
--- a/xen/include/asm-x86/hvm/svm/asid.h
+++ b/xen/include/asm-x86/hvm/svm/asid.h
@@ -32,20 +32,6 @@ void svm_asid_init_vcpu(struct vcpu *v);
void svm_asid_inv_asid(struct vcpu *v);
void svm_asid_inc_generation(void);
-/*
- * ASID related, guest triggered events.
- */
-
-static inline void svm_asid_g_update_paging(struct vcpu *v)
-{
- svm_asid_inv_asid(v);
-}
-
-static inline void svm_asid_g_mov_to_cr3(struct vcpu *v)
-{
- svm_asid_inv_asid(v);
-}
-
static inline void svm_asid_g_invlpg(struct vcpu *v, unsigned long g_vaddr)
{
#if 0
diff --git a/xen/include/asm-x86/hvm/svm/vmcb.h b/xen/include/asm-x86/hvm/svm/vmcb.h
index 1400e81035..19fa838d79 100644
--- a/xen/include/asm-x86/hvm/svm/vmcb.h
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h
@@ -440,11 +440,6 @@ struct arch_svm_struct {
u32 *msrpm;
int launch_core;
bool_t vmcb_in_sync; /* VMCB sync'ed with VMSAVE? */
- unsigned long cpu_shadow_cr0; /* Guest value for CR0 */
- unsigned long cpu_shadow_cr4; /* Guest value for CR4 */
- unsigned long cpu_shadow_efer; /* Guest value for EFER */
- unsigned long cpu_cr2;
- unsigned long cpu_cr3;
};
struct vmcb_struct *alloc_vmcb(void);
diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h
index a6a762ef36..c4ea85c189 100644
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -29,7 +29,18 @@
#define HVM_VCPU_INIT_SIPI_SIPI_STATE_WAIT_SIPI 1
struct hvm_vcpu {
- unsigned long hw_cr3; /* value we give to HW to use */
+ /* Guest control-register and EFER values, just as the guest sees them. */
+ unsigned long guest_cr[5];
+ unsigned long guest_efer;
+
+ /*
+ * Processor-visible control-register values, while guest executes.
+ * CR0, CR4: Used as a cache of VMCS contents by VMX only.
+ * CR1, CR2: Never used (guest_cr[2] is always processor-visible CR2).
+ * CR3: Always used and kept up to date by paging subsystem.
+ */
+ unsigned long hw_cr[5];
+
struct hvm_io_op io_op;
struct vlapic vlapic;
s64 cache_tsc_offset;
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
index d1b5ee54d7..c2fde90522 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -67,17 +67,11 @@ struct arch_vmx_struct {
/* Cache of cpu execution control. */
u32 exec_control;
- unsigned long cpu_cr0; /* copy of guest CR0 */
- unsigned long cpu_shadow_cr0; /* copy of guest read shadow CR0 */
- unsigned long cpu_shadow_cr4; /* copy of guest read shadow CR4 */
- unsigned long cpu_cr2; /* save CR2 */
- unsigned long cpu_cr3;
#ifdef __x86_64__
struct vmx_msr_state msr_state;
unsigned long shadow_gs;
unsigned long cstar;
#endif
- unsigned long efer;
/* Following fields are all specific to vmxassist. */
unsigned long vmxassist_enabled:1;
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h b/xen/include/asm-x86/hvm/vmx/vmx.h
index 963079359a..28edcfe088 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -279,8 +279,8 @@ static inline void __vmx_inject_exception(
__vmwrite(VM_ENTRY_INTR_INFO, intr_fields);
- if (trap == TRAP_page_fault)
- HVMTRACE_2D(PF_INJECT, v, v->arch.hvm_vmx.cpu_cr2, error_code);
+ if ( trap == TRAP_page_fault )
+ HVMTRACE_2D(PF_INJECT, v, v->arch.hvm_vcpu.guest_cr[2], error_code);
else
HVMTRACE_2D(INJ_EXC, v, trap, error_code);
}
diff --git a/xen/include/public/arch-x86/xen-x86_32.h b/xen/include/public/arch-x86/xen-x86_32.h
index 6728b818cd..bf9b1d5f31 100644
--- a/xen/include/public/arch-x86/xen-x86_32.h
+++ b/xen/include/public/arch-x86/xen-x86_32.h
@@ -64,18 +64,34 @@
#define FLAT_USER_DS FLAT_RING3_DS
#define FLAT_USER_SS FLAT_RING3_SS
-/*
- * Virtual addresses beyond this are not modifiable by guest OSes. The
- * machine->physical mapping table starts at this address, read-only.
- */
+#define __HYPERVISOR_VIRT_START_PAE 0xF5800000
+#define __MACH2PHYS_VIRT_START_PAE 0xF5800000
+#define __MACH2PHYS_VIRT_END_PAE 0xF6800000
+#define HYPERVISOR_VIRT_START_PAE \
+ mk_unsigned_long(__HYPERVISOR_VIRT_START_PAE)
+#define MACH2PHYS_VIRT_START_PAE \
+ mk_unsigned_long(__MACH2PHYS_VIRT_START_PAE)
+#define MACH2PHYS_VIRT_END_PAE \
+ mk_unsigned_long(__MACH2PHYS_VIRT_END_PAE)
+
+#define __HYPERVISOR_VIRT_START_NONPAE 0xFC000000
+#define __MACH2PHYS_VIRT_START_NONPAE 0xFC000000
+#define __MACH2PHYS_VIRT_END_NONPAE 0xFC400000
+#define HYPERVISOR_VIRT_START_NONPAE \
+ mk_unsigned_long(__HYPERVISOR_VIRT_START_NONPAE)
+#define MACH2PHYS_VIRT_START_NONPAE \
+ mk_unsigned_long(__MACH2PHYS_VIRT_START_NONPAE)
+#define MACH2PHYS_VIRT_END_NONPAE \
+ mk_unsigned_long(__MACH2PHYS_VIRT_END_NONPAE)
+
#ifdef CONFIG_X86_PAE
-#define __HYPERVISOR_VIRT_START 0xF5800000
-#define __MACH2PHYS_VIRT_START 0xF5800000
-#define __MACH2PHYS_VIRT_END 0xF6800000
+#define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_PAE
+#define __MACH2PHYS_VIRT_START __MACH2PHYS_VIRT_START_PAE
+#define __MACH2PHYS_VIRT_END __MACH2PHYS_VIRT_END_PAE
#else
-#define __HYPERVISOR_VIRT_START 0xFC000000
-#define __MACH2PHYS_VIRT_START 0xFC000000
-#define __MACH2PHYS_VIRT_END 0xFC400000
+#define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_NONPAE
+#define __MACH2PHYS_VIRT_START __MACH2PHYS_VIRT_START_NONPAE
+#define __MACH2PHYS_VIRT_END __MACH2PHYS_VIRT_END_NONPAE
#endif
#ifndef HYPERVISOR_VIRT_START
diff --git a/xen/include/xen/vga.h b/xen/include/xen/vga.h
index e0737659c1..f96776d2f8 100644
--- a/xen/include/xen/vga.h
+++ b/xen/include/xen/vga.h
@@ -15,11 +15,11 @@
extern struct xen_vga_console_info vga_console_info;
void vga_init(void);
void vga_endboot(void);
-void vga_putchar(int c);
+extern void (*vga_puts)(const char *);
#else
-#define vga_init() ((void)0)
-#define vga_endboot() ((void)0)
-#define vga_putchar(c) ((void)0)
+#define vga_init() ((void)0)
+#define vga_endboot() ((void)0)
+#define vga_puts(s) ((void)0)
#endif
#endif /* _XEN_VGA_H */
diff --git a/xen/include/xen/xencomm.h b/xen/include/xen/xencomm.h
index 95126ba67f..9b46c89dbf 100644
--- a/xen/include/xen/xencomm.h
+++ b/xen/include/xen/xencomm.h
@@ -23,13 +23,12 @@
#include <public/xen.h>
-extern unsigned long xencomm_copy_to_guest(void *to, const void *from,
- unsigned int len, unsigned int skip);
-extern unsigned long xencomm_copy_from_guest(void *to, const void *from,
- unsigned int len, unsigned int skip);
-extern int xencomm_add_offset(void **handle, unsigned int bytes);
-extern int xencomm_handle_is_null(void *ptr);
-
+unsigned long xencomm_copy_to_guest(
+ void *to, const void *from, unsigned int len, unsigned int skip);
+unsigned long xencomm_copy_from_guest(
+ void *to, const void *from, unsigned int len, unsigned int skip);
+int xencomm_add_offset(void **handle, unsigned int bytes);
+int xencomm_handle_is_null(void *ptr);
static inline int xencomm_is_inline(const void *handle)
{
@@ -39,7 +38,7 @@ static inline int xencomm_is_inline(const void *handle)
static inline unsigned long xencomm_inline_addr(const void *handle)
{
- return (unsigned long)handle & ~XENCOMM_INLINE_FLAG;
+ return (unsigned long)handle & ~XENCOMM_INLINE_FLAG;
}
/* Is the guest handle a NULL reference? */