aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordjm@kirby.fc.hp.com <djm@kirby.fc.hp.com>2006-01-03 10:19:20 -0600
committerdjm@kirby.fc.hp.com <djm@kirby.fc.hp.com>2006-01-03 10:19:20 -0600
commite31c95409c7c96fad0abb5af6d931fdd4369275b (patch)
tree0fef18c33b9e42c386d0ab79da0bad2d9b094036
parentfe8281c24dede8ae0aa380e96f519a2762d632f0 (diff)
parent61cf7e808a609abd16ae453de1a214e813274446 (diff)
downloadxen-e31c95409c7c96fad0abb5af6d931fdd4369275b.tar.gz
xen-e31c95409c7c96fad0abb5af6d931fdd4369275b.tar.bz2
xen-e31c95409c7c96fad0abb5af6d931fdd4369275b.zip
Merge
-rw-r--r--.hgignore1
-rw-r--r--linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c44
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c7
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/netback/common.h2
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/netback/interface.c32
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c88
-rw-r--r--linux-2.6-xen-sparse/include/asm-xen/asm-i386/bug.h16
-rw-r--r--tools/examples/xmexample.vmx3
-rw-r--r--tools/firmware/vmxassist/Makefile2
-rw-r--r--tools/firmware/vmxassist/acpi_madt.c74
-rw-r--r--tools/firmware/vmxassist/vmxloader.c23
-rw-r--r--tools/ioemu/vl.c1
-rw-r--r--tools/libxc/Makefile5
-rw-r--r--tools/libxc/xc_domain.c32
-rw-r--r--tools/libxc/xc_pagetab.c192
-rw-r--r--tools/libxc/xc_vmx_build.c97
-rw-r--r--tools/libxc/xenctrl.h24
-rw-r--r--tools/libxc/xenguest.h1
-rw-r--r--tools/python/xen/lowlevel/xc/xc.c74
-rw-r--r--tools/python/xen/xend/image.py82
-rw-r--r--tools/python/xen/xend/server/blkif.py6
-rw-r--r--tools/python/xen/xm/create.py14
-rw-r--r--tools/python/xen/xm/main.py8
-rw-r--r--tools/vtpm_manager/README5
-rw-r--r--tools/vtpm_manager/Rules.mk3
-rw-r--r--tools/vtpm_manager/manager/securestorage.c233
-rw-r--r--tools/vtpm_manager/manager/vtpm_manager.c96
-rw-r--r--tools/vtpm_manager/manager/vtpmpriv.h13
-rw-r--r--tools/vtpm_manager/manager/vtsp.c84
-rw-r--r--tools/vtpm_manager/manager/vtsp.h3
-rw-r--r--tools/xentrace/Makefile16
-rw-r--r--tools/xentrace/xenctx.c398
-rw-r--r--xen/arch/ia64/xen/domain.c11
-rw-r--r--xen/arch/ia64/xen/irq.c3
-rw-r--r--xen/arch/x86/Makefile13
-rw-r--r--xen/arch/x86/boot/x86_32.S10
-rw-r--r--xen/arch/x86/boot/x86_64.S14
-rw-r--r--xen/arch/x86/dom0_ops.c28
-rw-r--r--xen/arch/x86/domain.c51
-rw-r--r--xen/arch/x86/domain_build.c33
-rw-r--r--xen/arch/x86/irq.c4
-rw-r--r--xen/arch/x86/mm.c7
-rw-r--r--xen/arch/x86/physdev.c33
-rw-r--r--xen/arch/x86/setup.c226
-rw-r--r--xen/arch/x86/smpboot.c11
-rw-r--r--xen/arch/x86/traps.c38
-rw-r--r--xen/arch/x86/x86_32/xen.lds.S (renamed from xen/arch/x86/x86_32/xen.lds)10
-rw-r--r--xen/arch/x86/x86_64/xen.lds.S (renamed from xen/arch/x86/x86_64/xen.lds)10
-rw-r--r--xen/arch/x86/x86_emulate.c100
-rw-r--r--xen/common/dom0_ops.c49
-rw-r--r--xen/common/domain.c37
-rw-r--r--xen/common/event_channel.c4
-rw-r--r--xen/common/keyhandler.c26
-rw-r--r--xen/common/memory.c4
-rw-r--r--xen/common/rangeset.c399
-rw-r--r--xen/drivers/char/ns16550.c5
-rw-r--r--xen/include/asm-ia64/domain.h2
-rw-r--r--xen/include/asm-ia64/iocap.h10
-rw-r--r--xen/include/asm-x86/current.h2
-rw-r--r--xen/include/asm-x86/domain.h4
-rw-r--r--xen/include/asm-x86/iocap.h20
-rw-r--r--xen/include/asm-x86/msr.h2
-rw-r--r--xen/include/asm-x86/physdev.h17
-rw-r--r--xen/include/asm-x86/x86_emulate.h8
-rw-r--r--xen/include/public/dom0_ops.h17
-rw-r--r--xen/include/xen/compiler.h6
-rw-r--r--xen/include/xen/domain.h4
-rw-r--r--xen/include/xen/iocap.h34
-rw-r--r--xen/include/xen/rangeset.h71
-rw-r--r--xen/include/xen/sched.h23
70 files changed, 2300 insertions, 725 deletions
diff --git a/.hgignore b/.hgignore
index 766a688126..9a52e9ce12 100644
--- a/.hgignore
+++ b/.hgignore
@@ -181,6 +181,7 @@
^xen/TAGS$
^xen/arch/x86/asm-offsets\.s$
^xen/arch/x86/boot/mkelf32$
+^xen/arch/x86/xen\.lds$
^xen/ddb/.*$
^xen/include/asm$
^xen/include/asm-.*/asm-offsets\.h$
diff --git a/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c b/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c
index 446e5d359d..632ab1fafd 100644
--- a/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c
@@ -389,6 +389,30 @@ grant_write(struct file *file, const char __user *buffer, unsigned long count,
return -ENOSYS;
}
+static int __init
+gnttab_proc_init(void)
+{
+ /*
+ * /proc/xen/grant : used by libxc to access grant tables
+ */
+ if ((grant_pde = create_xen_proc_entry("grant", 0600)) == NULL) {
+ WPRINTK("Unable to create grant xen proc entry\n");
+ return -1;
+ }
+
+ grant_file_ops.read = grant_pde->proc_fops->read;
+ grant_file_ops.write = grant_pde->proc_fops->write;
+
+ grant_pde->proc_fops = &grant_file_ops;
+
+ grant_pde->read_proc = &grant_read;
+ grant_pde->write_proc = &grant_write;
+
+ return 0;
+}
+
+device_initcall(gnttab_proc_init);
+
#endif /* CONFIG_PROC_FS */
int
@@ -446,29 +470,11 @@ gnttab_init(void)
gnttab_free_count = NR_GRANT_ENTRIES - NR_RESERVED_ENTRIES;
gnttab_free_head = NR_RESERVED_ENTRIES;
-#ifdef CONFIG_PROC_FS
- /*
- * /proc/xen/grant : used by libxc to access grant tables
- */
- if ((grant_pde = create_xen_proc_entry("grant", 0600)) == NULL) {
- WPRINTK("Unable to create grant xen proc entry\n");
- return -1;
- }
-
- grant_file_ops.read = grant_pde->proc_fops->read;
- grant_file_ops.write = grant_pde->proc_fops->write;
-
- grant_pde->proc_fops = &grant_file_ops;
-
- grant_pde->read_proc = &grant_read;
- grant_pde->write_proc = &grant_write;
-#endif
-
printk("Grant table initialized\n");
return 0;
}
-__initcall(gnttab_init);
+core_initcall(gnttab_init);
/*
* Local variables:
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
index 7140449af1..2ec97d1e0b 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
@@ -331,7 +331,12 @@ static void connect(struct blkfront_info *info)
return;
}
- xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
+ err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
+ if (err) {
+ xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
+ info->xbdev->otherend);
+ return;
+ }
(void)xenbus_switch_state(info->xbdev, NULL, XenbusStateConnected);
diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/common.h b/linux-2.6-xen-sparse/drivers/xen/netback/common.h
index c278956279..6fa0d226bb 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h
@@ -82,7 +82,7 @@ typedef struct netif_st {
#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
void netif_creditlimit(netif_t *netif);
-int netif_disconnect(netif_t *netif);
+void netif_disconnect(netif_t *netif);
netif_t *alloc_netif(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN]);
void free_netif(netif_t *netif);
diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c
index 9e336db31c..16cc929a11 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c
@@ -196,9 +196,13 @@ int netif_map(netif_t *netif, unsigned long tx_ring_ref,
return 0;
netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
+ if (netif->tx_comms_area == NULL)
+ return -ENOMEM;
netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
- if (netif->tx_comms_area == NULL || netif->rx_comms_area == NULL)
+ if (netif->rx_comms_area == NULL) {
+ free_vm_area(netif->tx_comms_area);
return -ENOMEM;
+ }
err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
if (err) {
@@ -247,13 +251,9 @@ static void free_netif_callback(void *arg)
{
netif_t *netif = (netif_t *)arg;
- /* Already disconnected? */
- if (!netif->irq)
- return;
-
- unbind_from_irqhandler(netif->irq, netif);
- netif->irq = 0;
-
+ if (netif->irq)
+ unbind_from_irqhandler(netif->irq, netif);
+
unregister_netdev(netif->dev);
if (netif->tx.sring) {
@@ -290,10 +290,10 @@ void netif_creditlimit(netif_t *netif)
#endif
}
-int netif_disconnect(netif_t *netif)
+void netif_disconnect(netif_t *netif)
{
-
- if (netif->status == CONNECTED) {
+ switch (netif->status) {
+ case CONNECTED:
rtnl_lock();
netif->status = DISCONNECTING;
wmb();
@@ -301,10 +301,14 @@ int netif_disconnect(netif_t *netif)
__netif_down(netif);
rtnl_unlock();
netif_put(netif);
- return 0; /* Caller should not send response message. */
+ break;
+ case DISCONNECTED:
+ BUG_ON(atomic_read(&netif->refcnt) != 0);
+ free_netif(netif);
+ break;
+ default:
+ BUG();
}
-
- return 1;
}
/*
diff --git a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
index e9b3610690..eca6b4c888 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
@@ -117,6 +117,8 @@ struct netfront_info
int rx_min_target, rx_max_target, rx_target;
struct sk_buff_head rx_batch;
+ struct timer_list rx_refill_timer;
+
/*
* {tx,rx}_skbs store outstanding skbuffs. The first entry in each
* array is an index into a chain of free entries.
@@ -517,6 +519,13 @@ static void network_tx_buf_gc(struct net_device *dev)
}
+static void rx_refill_timeout(unsigned long data)
+{
+ struct net_device *dev = (struct net_device *)data;
+ netif_rx_schedule(dev);
+}
+
+
static void network_alloc_rx_buffers(struct net_device *dev)
{
unsigned short id;
@@ -534,7 +543,7 @@ static void network_alloc_rx_buffers(struct net_device *dev)
* Allocate skbuffs greedily, even though we batch updates to the
* receive ring. This creates a less bursty demand on the memory
* allocator, so should reduce the chance of failed allocation requests
- * both for ourself and for other kernel subsystems.
+ * both for ourself and for other kernel subsystems.
*/
batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
@@ -545,8 +554,15 @@ static void network_alloc_rx_buffers(struct net_device *dev)
skb = alloc_xen_skb(
((PAGE_SIZE - sizeof(struct skb_shared_info)) &
(-SKB_DATA_ALIGN(1))) - 16);
- if (skb == NULL)
- break;
+ if (skb == NULL) {
+ /* Any skbuffs queued for refill? Force them out. */
+ if (i != 0)
+ goto refill;
+ /* Could not allocate any skbuffs. Try again later. */
+ mod_timer(&np->rx_refill_timer,
+ jiffies + (HZ/10));
+ return;
+ }
__skb_queue_tail(&np->rx_batch, skb);
}
@@ -554,6 +570,12 @@ static void network_alloc_rx_buffers(struct net_device *dev)
if (i < (np->rx_target/2))
return;
+ /* Adjust our fill target if we risked running out of buffers. */
+ if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
+ ((np->rx_target *= 2) > np->rx_max_target))
+ np->rx_target = np->rx_max_target;
+
+ refill:
for (i = 0; ; i++) {
if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
break;
@@ -608,11 +630,6 @@ static void network_alloc_rx_buffers(struct net_device *dev)
/* Above is a suitable barrier to ensure backend will see requests. */
np->rx.req_prod_pvt = req_prod + i;
RING_PUSH_REQUESTS(&np->rx);
-
- /* Adjust our fill target if we risked running out of buffers. */
- if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
- ((np->rx_target *= 2) > np->rx_max_target))
- np->rx_target = np->rx_max_target;
}
@@ -1077,6 +1094,10 @@ static int create_netdev(int handle, struct xenbus_device *dev,
np->rx_min_target = RX_MIN_TARGET;
np->rx_max_target = RX_MAX_TARGET;
+ init_timer(&np->rx_refill_timer);
+ np->rx_refill_timer.data = (unsigned long)netdev;
+ np->rx_refill_timer.function = rx_refill_timeout;
+
/* Initialise {tx,rx}_skbs as a free chain containing every entry. */
for (i = 0; i <= NET_TX_RING_SIZE; i++) {
np->tx_skbs[i] = (void *)((unsigned long) i+1);
@@ -1188,34 +1209,15 @@ static int netfront_remove(struct xenbus_device *dev)
DPRINTK("%s\n", dev->nodename);
- netif_free(info);
- kfree(info);
+ netif_disconnect_backend(info);
+ free_netdev(info->netdev);
return 0;
}
-static void netif_free(struct netfront_info *info)
-{
- netif_disconnect_backend(info);
- close_netdev(info);
-}
-
-
static void close_netdev(struct netfront_info *info)
{
- if (info->netdev) {
-#ifdef CONFIG_PROC_FS
- xennet_proc_delif(info->netdev);
-#endif
- unregister_netdev(info->netdev);
- info->netdev = NULL;
- }
-}
-
-
-static void netif_disconnect_backend(struct netfront_info *info)
-{
/* Stop old i/f to prevent errors whilst we rebuild the state. */
spin_lock_irq(&info->tx_lock);
spin_lock(&info->rx_lock);
@@ -1223,17 +1225,37 @@ static void netif_disconnect_backend(struct netfront_info *info)
/* info->backend_state = BEST_DISCONNECTED; */
spin_unlock(&info->rx_lock);
spin_unlock_irq(&info->tx_lock);
-
+
+#ifdef CONFIG_PROC_FS
+ xennet_proc_delif(info->netdev);
+#endif
+
+ if (info->irq)
+ unbind_from_irqhandler(info->irq, info->netdev);
+ info->evtchn = info->irq = 0;
+
+ del_timer_sync(&info->rx_refill_timer);
+
+ unregister_netdev(info->netdev);
+}
+
+
+static void netif_disconnect_backend(struct netfront_info *info)
+{
end_access(info->tx_ring_ref, info->tx.sring);
end_access(info->rx_ring_ref, info->rx.sring);
info->tx_ring_ref = GRANT_INVALID_REF;
info->rx_ring_ref = GRANT_INVALID_REF;
info->tx.sring = NULL;
info->rx.sring = NULL;
+}
- if (info->irq)
- unbind_from_irqhandler(info->irq, info->netdev);
- info->evtchn = info->irq = 0;
+
+static void netif_free(struct netfront_info *info)
+{
+ close_netdev(info);
+ netif_disconnect_backend(info);
+ free_netdev(info->netdev);
}
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/bug.h b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/bug.h
deleted file mode 100644
index db60354ff8..0000000000
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/bug.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _I386_BUG_H
-#define _I386_BUG_H
-
-#include <linux/config.h>
-
-#define BUG() do { \
- printk("kernel BUG at %s:%d (%s)!\n", \
- __FILE__, __LINE__, __FUNCTION__); \
- dump_stack(); \
- panic("BUG!"); \
-} while (0)
-#define HAVE_ARCH_BUG
-
-#include <asm-generic/bug.h>
-
-#endif
diff --git a/tools/examples/xmexample.vmx b/tools/examples/xmexample.vmx
index faad23dd44..62767f67a8 100644
--- a/tools/examples/xmexample.vmx
+++ b/tools/examples/xmexample.vmx
@@ -30,6 +30,9 @@ name = "ExampleVMXDomain"
# the number of cpus guest platform has, default=1
vcpus=1
+# enable/disalbe vmx guest ACPI, default=0 (disabled)
+#acpi=0
+
# List of which CPUS this domain is allowed to use, default Xen picks
#cpus = "" # leave to Xen to pick
#cpus = "0" # all vcpus run on CPU0
diff --git a/tools/firmware/vmxassist/Makefile b/tools/firmware/vmxassist/Makefile
index 616f64d4e7..e64473716b 100644
--- a/tools/firmware/vmxassist/Makefile
+++ b/tools/firmware/vmxassist/Makefile
@@ -24,7 +24,7 @@ include $(XEN_ROOT)/tools/Rules.mk
# The emulator code lives in ROM space
TEXTADDR=0x000D0000
-DEFINES=-DDEBUG -D_ACPI_ -DTEXTADDR=$(TEXTADDR)
+DEFINES=-DDEBUG -DTEXTADDR=$(TEXTADDR)
XENINC=-I$(XEN_ROOT)/tools/libxc
LD = ld
diff --git a/tools/firmware/vmxassist/acpi_madt.c b/tools/firmware/vmxassist/acpi_madt.c
index 1efbaa4b5d..37e33e5e8a 100644
--- a/tools/firmware/vmxassist/acpi_madt.c
+++ b/tools/firmware/vmxassist/acpi_madt.c
@@ -24,23 +24,75 @@
extern int puts(const char *s);
-#define VCPU_NR_PAGE 0x0009F000
-#define VCPU_NR_OFFSET 0x00000800
-#define VCPU_MAGIC 0x76637075 /* "vcpu" */
+#define HVM_INFO_PAGE 0x0009F000
+#define HVM_INFO_OFFSET 0x00000800
+
+struct hvm_info_table {
+ char signature[8]; /* "HVM INFO" */
+ uint32_t length;
+ uint8_t checksum;
+ uint8_t acpi_enabled;
+ uint8_t pad[2];
+ uint32_t nr_vcpus;
+};
+
+static struct hvm_info_table *table = NULL;
-/* xc_vmx_builder wrote vcpu block at 0x9F800. Return it. */
static int
-get_vcpu_nr(void)
+checksum_valid(uint8_t *ptr, int len)
+{
+ uint8_t sum=0;
+ int i;
+
+ for (i = 0; i < len; i++)
+ sum += ptr[i];
+
+ return (sum == 0);
+}
+
+/* xc_vmx_builder wrote hvm info at 0x9F800. Return it. */
+static struct hvm_info_table *
+get_hvm_info_table(void)
{
- unsigned int *vcpus;
+ struct hvm_info_table *t;
+ char signature[] = "HVM INFO";
+ int i;
+
+ if (table != NULL)
+ return table;
+
+ t = (struct hvm_info_table *)(HVM_INFO_PAGE + HVM_INFO_OFFSET);
- vcpus = (unsigned int *)(VCPU_NR_PAGE + VCPU_NR_OFFSET);
- if (vcpus[0] != VCPU_MAGIC) {
- puts("Bad vcpus magic, set vcpu number to 1 by default.\n");
- return 1;
+ /* strncmp(t->signature, "HVM INFO", 8) */
+ for (i = 0; i < 8; i++) {
+ if (signature[i] != t->signature[i]) {
+ puts("Bad hvm info signature\n");
+ return NULL;
+ }
}
- return vcpus[1];
+ if (!checksum_valid((uint8_t *)t, t->length)) {
+ puts("Bad hvm info checksum\n");
+ return NULL;
+ }
+
+ table = t;
+
+ return table;
+}
+
+int
+get_vcpu_nr(void)
+{
+ struct hvm_info_table *t = get_hvm_info_table();
+ return (t ? t->nr_vcpus : 1); /* default 1 vcpu */
+}
+
+int
+get_acpi_enabled(void)
+{
+ struct hvm_info_table *t = get_hvm_info_table();
+ return (t ? t->acpi_enabled : 0); /* default no acpi */
}
static void *
diff --git a/tools/firmware/vmxassist/vmxloader.c b/tools/firmware/vmxassist/vmxloader.c
index 2a2a17bc4f..2a20715bfc 100644
--- a/tools/firmware/vmxassist/vmxloader.c
+++ b/tools/firmware/vmxassist/vmxloader.c
@@ -24,12 +24,10 @@
#include "machine.h"
#include "roms.h"
-#ifdef _ACPI_
#include "acpi.h"
#include "../acpi/acpi2_0.h" // for ACPI_PHYSICAL_ADDRESS
int acpi_madt_update(unsigned char* acpi_start);
-#endif
-
+int get_acpi_enabled(void);
/*
* C runtime start off
@@ -120,18 +118,17 @@ main(void)
memcpy((void *)0xC0000,
vgabios_stdvga, sizeof(vgabios_stdvga));
}
-#ifdef _ACPI_
- puts("Loading ACPI ...\n");
-
- acpi_madt_update(acpi);
- if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000) {
- /* make sure acpi table does not overlap rombios
- * currently acpi less than 8K will be OK.
- */
- memcpy((void *)ACPI_PHYSICAL_ADDRESS, acpi, sizeof(acpi));
+ if (get_acpi_enabled() != 0) {
+ puts("Loading ACPI ...\n");
+ acpi_madt_update((unsigned char*)acpi);
+ if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000) {
+ /* make sure acpi table does not overlap rombios
+ * currently acpi less than 8K will be OK.
+ */
+ memcpy((void *)ACPI_PHYSICAL_ADDRESS, acpi, sizeof(acpi));
+ }
}
-#endif
puts("Loading VMXAssist ...\n");
memcpy((void *)TEXTADDR, vmxassist, sizeof(vmxassist));
diff --git a/tools/ioemu/vl.c b/tools/ioemu/vl.c
index 6d1073f3dc..f38f90b0c6 100644
--- a/tools/ioemu/vl.c
+++ b/tools/ioemu/vl.c
@@ -2948,6 +2948,7 @@ int main(int argc, char **argv)
case QEMU_OPTION_vcpus:
vcpus = atoi(optarg);
fprintf(logfile, "qemu: the number of cpus is %d\n", vcpus);
+ break;
case QEMU_OPTION_pci:
pci_enabled = 1;
break;
diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile
index a077ad42e6..d4ad7d34e0 100644
--- a/tools/libxc/Makefile
+++ b/tools/libxc/Makefile
@@ -27,6 +27,11 @@ SRCS += xc_tbuf.c
ifeq ($(XEN_TARGET_ARCH),x86_32)
SRCS += xc_ptrace.c
SRCS += xc_ptrace_core.c
+SRCS += xc_pagetab.c
+endif
+
+ifeq ($(XEN_TARGET_ARCH),x86_64)
+SRCS += xc_pagetab.c
endif
BUILD_SRCS :=
diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index fe3cf55618..b018318d62 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -404,6 +404,38 @@ int xc_domain_setinfo(int xc_handle,
}
+int xc_domain_irq_permission(int xc_handle,
+ uint32_t domid,
+ uint8_t pirq,
+ uint8_t allow_access)
+{
+ dom0_op_t op;
+
+ op.cmd = DOM0_IRQ_PERMISSION;
+ op.u.irq_permission.domain = domid;
+ op.u.irq_permission.pirq = pirq;
+ op.u.irq_permission.allow_access = allow_access;
+
+ return do_dom0_op(xc_handle, &op);
+}
+
+int xc_domain_iomem_permission(int xc_handle,
+ uint32_t domid,
+ unsigned long first_pfn,
+ unsigned long nr_pfns,
+ uint8_t allow_access)
+{
+ dom0_op_t op;
+
+ op.cmd = DOM0_IOMEM_PERMISSION;
+ op.u.iomem_permission.domain = domid;
+ op.u.iomem_permission.first_pfn = first_pfn;
+ op.u.iomem_permission.nr_pfns = nr_pfns;
+ op.u.iomem_permission.allow_access = allow_access;
+
+ return do_dom0_op(xc_handle, &op);
+}
+
/*
* Local variables:
* mode: C
diff --git a/tools/libxc/xc_pagetab.c b/tools/libxc/xc_pagetab.c
new file mode 100644
index 0000000000..b63ea89ade
--- /dev/null
+++ b/tools/libxc/xc_pagetab.c
@@ -0,0 +1,192 @@
+/******************************************************************************
+ * xc_pagetab.c
+ *
+ * Function to translate virtual to physical addresses.
+ */
+#include "xc_private.h"
+
+#if defined(__i386__)
+
+#define L1_PAGETABLE_SHIFT_PAE 12
+#define L2_PAGETABLE_SHIFT_PAE 21
+#define L3_PAGETABLE_SHIFT_PAE 30
+
+#define L1_PAGETABLE_SHIFT 12
+#define L2_PAGETABLE_SHIFT 22
+
+#define L0_PAGETABLE_MASK_PAE 0x0000000ffffff000ULL
+#define L1_PAGETABLE_MASK_PAE 0x1ffULL
+#define L2_PAGETABLE_MASK_PAE 0x1ffULL
+#define L3_PAGETABLE_MASK_PAE 0x3ULL
+
+#define L0_PAGETABLE_MASK 0xfffff000ULL
+#define L1_PAGETABLE_MASK 0x3ffULL
+#define L2_PAGETABLE_MASK 0x3ffULL
+
+#elif defined(__x86_64__)
+
+#define L1_PAGETABLE_SHIFT_PAE 12
+#define L2_PAGETABLE_SHIFT_PAE 21
+#define L3_PAGETABLE_SHIFT_PAE 30
+#define L4_PAGETABLE_SHIFT_PAE 39
+
+#define L1_PAGETABLE_SHIFT L1_PAGETABLE_SHIFT_PAE
+#define L2_PAGETABLE_SHIFT L2_PAGETABLE_SHIFT_PAE
+
+#define L0_PAGETABLE_MASK_PAE 0x000000fffffff000ULL
+#define L1_PAGETABLE_MASK_PAE 0x1ffULL
+#define L2_PAGETABLE_MASK_PAE 0x1ffULL
+#define L3_PAGETABLE_MASK_PAE 0x1ffULL
+#define L4_PAGETABLE_MASK_PAE 0x1ffULL
+
+#define L0_PAGETABLE_MASK L0_PAGETABLE_MASK_PAE
+#define L1_PAGETABLE_MASK L1_PAGETABLE_MASK_PAE
+#define L2_PAGETABLE_MASK L2_PAGETABLE_MASK_PAE
+
+#endif
+
+unsigned long xc_translate_foreign_address(int xc_handle, uint32_t dom,
+ int vcpu, unsigned long long virt )
+{
+ vcpu_guest_context_t ctx;
+ unsigned long long cr3;
+ void *pd, *pt, *pdppage = NULL, *pdp, *pml = NULL;
+ unsigned long long pde, pte, pdpe, pmle;
+ unsigned long mfn = 0;
+#if defined (__i386__)
+ static int pt_levels = 0;
+
+ if (pt_levels == 0) {
+ xen_capabilities_info_t xen_caps = "";
+
+ if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0)
+ goto out;
+ if (strstr(xen_caps, "xen-3.0-x86_64"))
+ pt_levels = 4;
+ else if (strstr(xen_caps, "xen-3.0-x86_32p"))
+ pt_levels = 3;
+ else if (strstr(xen_caps, "xen-3.0-x86_32"))
+ pt_levels = 2;
+ else
+ goto out;
+ }
+#elif defined (__x86_64__)
+#define pt_levels 4
+#endif
+
+ if (xc_domain_get_vcpu_context(xc_handle, dom, vcpu, &ctx) != 0) {
+ fprintf(stderr, "failed to retreive vcpu context\n");
+ goto out;
+ }
+ cr3 = ctx.ctrlreg[3];
+
+ /* Page Map Level 4 */
+
+#if defined(__i386__)
+ pmle = cr3;
+#elif defined(__x86_64__)
+ pml = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, cr3 >> PAGE_SHIFT);
+ if (pml == NULL) {
+ fprintf(stderr, "failed to map PML4\n");
+ goto out;
+ }
+ pmle = *(unsigned long long *)(pml + 8 * ((virt >> L4_PAGETABLE_SHIFT_PAE) & L4_PAGETABLE_MASK_PAE));
+ if((pmle & 1) == 0) {
+ fprintf(stderr, "page entry not present in PML4\n");
+ goto out_unmap_pml;
+ }
+#endif
+
+ /* Page Directory Pointer Table */
+
+ if (pt_levels >= 3) {
+ pdppage = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, pmle >> PAGE_SHIFT);
+ if (pdppage == NULL) {
+ fprintf(stderr, "failed to map PDP\n");
+ goto out_unmap_pml;
+ }
+ if (pt_levels >= 4)
+ pdp = pdppage;
+ else
+ /* PDP is only 32 bit aligned with 3 level pts */
+ pdp = pdppage + (pmle & ~(XC_PAGE_MASK | 0x1f));
+
+ pdpe = *(unsigned long long *)(pdp + 8 * ((virt >> L3_PAGETABLE_SHIFT_PAE) & L3_PAGETABLE_MASK_PAE));
+
+ if((pdpe & 1) == 0) {
+ fprintf(stderr, "page entry not present in PDP\n");
+ goto out_unmap_pdp;
+ }
+ } else {
+ pdpe = pmle;
+ }
+
+ /* Page Directory */
+
+ pd = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, pdpe >> PAGE_SHIFT);
+ if (pd == NULL) {
+ fprintf(stderr, "failed to map PD\n");
+ goto out_unmap_pdp;
+ }
+
+ if (pt_levels >= 3)
+ pde = *(unsigned long long *)(pd + 8 * ((virt >> L2_PAGETABLE_SHIFT_PAE) & L2_PAGETABLE_MASK_PAE));
+ else
+ pde = *(unsigned long long *)(pd + 4 * ((virt >> L2_PAGETABLE_SHIFT) & L2_PAGETABLE_MASK));
+
+ if ((pde & 1) == 0) {
+ fprintf(stderr, "page entry not present in PD\n");
+ goto out_unmap_pd;
+ }
+
+ /* Page Table */
+
+ if (pde & 0x00000008) { /* 4M page (or 2M in PAE mode) */
+ fprintf(stderr, "Cannot currently cope with 2/4M pages\n");
+ exit(-1);
+ } else { /* 4k page */
+ pt = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ,
+ pde >> PAGE_SHIFT);
+
+ if (pt == NULL) {
+ fprintf(stderr, "failed to map PT\n");
+ goto out_unmap_pd;
+ }
+
+ if (pt_levels >= 3)
+ pte = *(unsigned long long *)(pt + 8 * ((virt >> L1_PAGETABLE_SHIFT_PAE) & L1_PAGETABLE_MASK_PAE));
+ else
+ pte = *(unsigned long long *)(pt + 4 * ((virt >> L1_PAGETABLE_SHIFT) & L1_PAGETABLE_MASK));
+
+ if ((pte & 0x00000001) == 0) {
+ fprintf(stderr, "page entry not present in PT\n");
+ goto out_unmap_pt;
+ }
+
+ if (pt_levels >= 3)
+ mfn = (pte & L0_PAGETABLE_MASK_PAE) >> PAGE_SHIFT;
+ else
+ mfn = (pte & L0_PAGETABLE_MASK) >> PAGE_SHIFT;
+ }
+
+ out_unmap_pt:
+ munmap(pt, PAGE_SIZE);
+ out_unmap_pd:
+ munmap(pd, PAGE_SIZE);
+ out_unmap_pdp:
+ munmap(pdppage, PAGE_SIZE);
+ out_unmap_pml:
+ munmap(pml, PAGE_SIZE);
+ out:
+ return mfn;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libxc/xc_vmx_build.c b/tools/libxc/xc_vmx_build.c
index 9d0fbc18d5..7316d855b1 100644
--- a/tools/libxc/xc_vmx_build.c
+++ b/tools/libxc/xc_vmx_build.c
@@ -33,8 +33,17 @@
#define E820_MAP_NR_OFFSET 0x000001E8
#define E820_MAP_OFFSET 0x000002D0
-#define VCPU_NR_PAGE 0x0009F000
-#define VCPU_NR_OFFSET 0x00000800
+#define HVM_INFO_PAGE 0x0009F000
+#define HVM_INFO_OFFSET 0x00000800
+
+struct hvm_info_table {
+ char signature[8]; /* "HVM INFO" */
+ uint32_t length;
+ uint8_t checksum;
+ uint8_t acpi_enabled;
+ uint8_t pad[2];
+ uint32_t nr_vcpus;
+};
struct e820entry {
uint64_t addr;
@@ -119,26 +128,45 @@ static unsigned char build_e820map(void *e820_page, unsigned long mem_size)
return (*(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map);
}
+static void
+set_hvm_info_checksum(struct hvm_info_table *t)
+{
+ uint8_t *ptr = (uint8_t *)t, sum = 0;
+ unsigned int i;
+
+ t->checksum = 0;
+
+ for (i = 0; i < t->length; i++)
+ sum += *ptr++;
+
+ t->checksum = -sum;
+}
+
/*
- * Use E820 reserved memory 0x9F800 to pass number of vcpus to vmxloader
- * vmxloader will use it to config ACPI MADT table
+ * Use E820 reserved memory 0x9F800 to pass HVM info to vmxloader
+ * vmxloader will use this info to set BIOS accordingly
*/
-#define VCPU_MAGIC 0x76637075 /* "vcpu" */
-static int set_vcpu_nr(int xc_handle, uint32_t dom,
- unsigned long *pfn_list, unsigned int vcpus)
+static int set_hvm_info(int xc_handle, uint32_t dom,
+ unsigned long *pfn_list, unsigned int vcpus,
+ unsigned int acpi)
{
- char *va_map;
- unsigned int *va_vcpus;
+ char *va_map;
+ struct hvm_info_table *va_hvm;
va_map = xc_map_foreign_range(xc_handle, dom,
PAGE_SIZE, PROT_READ|PROT_WRITE,
- pfn_list[VCPU_NR_PAGE >> PAGE_SHIFT]);
+ pfn_list[HVM_INFO_PAGE >> PAGE_SHIFT]);
if ( va_map == NULL )
return -1;
- va_vcpus = (unsigned int *)(va_map + VCPU_NR_OFFSET);
- va_vcpus[0] = VCPU_MAGIC;
- va_vcpus[1] = vcpus;
+ va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
+ memset(va_hvm, 0, sizeof(*va_hvm));
+ strncpy(va_hvm->signature, "HVM INFO", 8);
+ va_hvm->length = sizeof(struct hvm_info_table);
+ va_hvm->acpi_enabled = acpi;
+ va_hvm->nr_vcpus = vcpus;
+
+ set_hvm_info_checksum(va_hvm);
munmap(va_map, PAGE_SIZE);
@@ -281,6 +309,7 @@ static int setup_guest(int xc_handle,
unsigned int control_evtchn,
unsigned int lapic,
unsigned int vcpus,
+ unsigned int acpi,
unsigned int store_evtchn,
unsigned long *store_mfn)
{
@@ -490,8 +519,8 @@ static int setup_guest(int xc_handle,
goto error_out;
}
- if (set_vcpu_nr(xc_handle, dom, page_array, vcpus)) {
- fprintf(stderr, "Couldn't set vcpu number for VMX guest.\n");
+ if (set_hvm_info(xc_handle, dom, page_array, vcpus, acpi)) {
+ fprintf(stderr, "Couldn't set hvm info for VMX guest.\n");
goto error_out;
}
@@ -574,29 +603,6 @@ static int setup_guest(int xc_handle,
return -1;
}
-#define VMX_FEATURE_FLAG 0x20
-
-static int vmx_identify(void)
-{
- int eax, ecx;
-
- __asm__ __volatile__ (
-#if defined(__i386__)
- "push %%ebx; cpuid; pop %%ebx"
-#elif defined(__x86_64__)
- "push %%rbx; cpuid; pop %%rbx"
-#endif
- : "=a" (eax), "=c" (ecx)
- : "0" (1)
- : "dx");
-
- if (!(ecx & VMX_FEATURE_FLAG)) {
- return -1;
- }
-
- return 0;
-}
-
int xc_vmx_build(int xc_handle,
uint32_t domid,
int memsize,
@@ -604,6 +610,7 @@ int xc_vmx_build(int xc_handle,
unsigned int control_evtchn,
unsigned int lapic,
unsigned int vcpus,
+ unsigned int acpi,
unsigned int store_evtchn,
unsigned long *store_mfn)
{
@@ -613,10 +620,18 @@ int xc_vmx_build(int xc_handle,
unsigned long nr_pages;
char *image = NULL;
unsigned long image_size;
+ xen_capabilities_info_t xen_caps;
+
+ if ( (rc = xc_version(xc_handle, XENVER_capabilities, &xen_caps)) != 0 )
+ {
+ PERROR("Failed to get xen version info");
+ goto error_out;
+ }
- if ( vmx_identify() < 0 )
+ if ( !strstr(xen_caps, "hvm") )
{
- PERROR("CPU doesn't support VMX Extensions");
+ PERROR("CPU doesn't support VMX Extensions or "
+ "CPU VMX Extensions are not turned on");
goto error_out;
}
@@ -659,7 +674,7 @@ int xc_vmx_build(int xc_handle,
if ( setup_guest(xc_handle, domid, memsize, image, image_size, nr_pages,
ctxt, op.u.getdomaininfo.shared_info_frame, control_evtchn,
- lapic, vcpus, store_evtchn, store_mfn) < 0)
+ lapic, vcpus, acpi, store_evtchn, store_mfn) < 0)
{
ERROR("Error constructing guest OS");
goto error_out;
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index ecd2284e86..f558b5985e 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -380,6 +380,17 @@ int xc_domain_ioport_permission(int xc_handle,
uint32_t nr_ports,
uint32_t allow_access);
+int xc_domain_irq_permission(int xc_handle,
+ uint32_t domid,
+ uint8_t pirq,
+ uint8_t allow_access);
+
+int xc_domain_iomem_permission(int xc_handle,
+ uint32_t domid,
+ unsigned long first_pfn,
+ unsigned long nr_pfns,
+ uint8_t allow_access);
+
unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid,
unsigned long mfn);
@@ -416,6 +427,19 @@ void *xc_map_foreign_range(int xc_handle, uint32_t dom,
void *xc_map_foreign_batch(int xc_handle, uint32_t dom, int prot,
unsigned long *arr, int num );
+/**
+ * Translates a virtual address in the context of a given domain and
+ * vcpu returning the machine page frame number of the associated
+ * page.
+ *
+ * @parm xc_handle a handle on an open hypervisor interface
+ * @parm dom the domain to perform the translation in
+ * @parm vcpu the vcpu to perform the translation on
+ * @parm virt the virtual address to translate
+ */
+unsigned long xc_translate_foreign_address(int xc_handle, uint32_t dom,
+ int vcpu, unsigned long long virt);
+
int xc_get_pfn_list(int xc_handle, uint32_t domid, unsigned long *pfn_buf,
unsigned long max_pfns);
diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h
index 8e01b73b10..4d6d80af3d 100644
--- a/tools/libxc/xenguest.h
+++ b/tools/libxc/xenguest.h
@@ -58,6 +58,7 @@ int xc_vmx_build(int xc_handle,
unsigned int control_evtchn,
unsigned int lapic,
unsigned int vcpus,
+ unsigned int acpi,
unsigned int store_evtchn,
unsigned long *store_mfn);
diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c
index 95b0a9f101..9a49ef9307 100644
--- a/tools/python/xen/lowlevel/xc/xc.c
+++ b/tools/python/xen/lowlevel/xc/xc.c
@@ -364,19 +364,20 @@ static PyObject *pyxc_vmx_build(XcObject *self,
int control_evtchn, store_evtchn;
int vcpus = 1;
int lapic = 0;
+ int acpi = 0;
int memsize;
unsigned long store_mfn = 0;
static char *kwd_list[] = { "dom", "control_evtchn", "store_evtchn",
- "memsize", "image", "lapic", "vcpus", NULL };
+ "memsize", "image", "lapic", "vcpus", "acpi",NULL };
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiisii", kwd_list,
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiisiii", kwd_list,
&dom, &control_evtchn, &store_evtchn,
- &memsize, &image, &lapic, &vcpus) )
+ &memsize, &image, &lapic, &vcpus,&acpi) )
return NULL;
if ( xc_vmx_build(self->xc_handle, dom, memsize, image, control_evtchn,
- lapic, vcpus, store_evtchn, &store_mfn) != 0 )
+ lapic, vcpus, acpi, store_evtchn, &store_mfn) != 0 )
return PyErr_SetFromErrno(xc_error);
return Py_BuildValue("{s:i}", "store_mfn", store_mfn);
@@ -774,6 +775,52 @@ static PyObject *pyxc_domain_ioport_permission(XcObject *self,
return zero;
}
+static PyObject *pyxc_domain_irq_permission(PyObject *self,
+ PyObject *args,
+ PyObject *kwds)
+{
+ XcObject *xc = (XcObject *)self;
+ uint32_t dom;
+ int pirq, allow_access, ret;
+
+ static char *kwd_list[] = { "dom", "pirq", "allow_access", NULL };
+
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iii", kwd_list,
+ &dom, &pirq, &allow_access) )
+ return NULL;
+
+ ret = xc_domain_irq_permission(
+ xc->xc_handle, dom, pirq, allow_access);
+ if ( ret != 0 )
+ return PyErr_SetFromErrno(xc_error);
+
+ Py_INCREF(zero);
+ return zero;
+}
+
+static PyObject *pyxc_domain_iomem_permission(PyObject *self,
+ PyObject *args,
+ PyObject *kwds)
+{
+ XcObject *xc = (XcObject *)self;
+ uint32_t dom;
+ unsigned long first_pfn, nr_pfns, allow_access, ret;
+
+ static char *kwd_list[] = { "dom", "first_pfn", "nr_pfns", "allow_access", NULL };
+
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "illi", kwd_list,
+ &dom, &first_pfn, &nr_pfns, &allow_access) )
+ return NULL;
+
+ ret = xc_domain_iomem_permission(
+ xc->xc_handle, dom, first_pfn, nr_pfns, allow_access);
+ if ( ret != 0 )
+ return PyErr_SetFromErrno(xc_error);
+
+ Py_INCREF(zero);
+ return zero;
+}
+
static PyObject *dom_op(XcObject *self, PyObject *args,
int (*fn)(int, uint32_t))
@@ -1070,6 +1117,25 @@ static PyMethodDef pyxc_methods[] = {
" allow_access [int]: Non-zero means enable access; else disable access\n\n"
"Returns: [int] 0 on success; -1 on error.\n" },
+ { "domain_irq_permission",
+ (PyCFunction)pyxc_domain_irq_permission,
+ METH_VARARGS | METH_KEYWORDS, "\n"
+ "Allow a domain access to a physical IRQ\n"
+ " dom [int]: Identifier of domain to be allowed access.\n"
+ " pirq [int]: The Physical IRQ\n"
+ " allow_access [int]: Non-zero means enable access; else disable access\n\n"
+ "Returns: [int] 0 on success; -1 on error.\n" },
+
+ { "domain_iomem_permission",
+ (PyCFunction)pyxc_domain_iomem_permission,
+ METH_VARARGS | METH_KEYWORDS, "\n"
+ "Allow a domain access to a range of IO memory pages\n"
+ " dom [int]: Identifier of domain to be allowed access.\n"
+ " first_pfn [long]: First page of I/O Memory\n"
+ " nr_pfns [long]: Number of pages of I/O Memory (>0)\n"
+ " allow_access [int]: Non-zero means enable access; else disable access\n\n"
+ "Returns: [int] 0 on success; -1 on error.\n" },
+
{ "pages_to_kib",
(PyCFunction)pyxc_pages_to_kib,
METH_VARARGS, "\n"
diff --git a/tools/python/xen/xend/image.py b/tools/python/xen/xend/image.py
index 5fe114a59d..982de14bc1 100644
--- a/tools/python/xen/xend/image.py
+++ b/tools/python/xen/xend/image.py
@@ -189,11 +189,16 @@ class VmxImageHandler(ImageHandler):
def configure(self, imageConfig, deviceConfig):
ImageHandler.configure(self, imageConfig, deviceConfig)
+ info = xc.xeninfo()
+ if not 'hvm' in info['xen_caps']:
+ raise VmError("vmx: not an Intel VT platform, we stop creating!")
+
self.dmargs = self.parseDeviceModelArgs(imageConfig, deviceConfig)
self.device_model = sxp.child_value(imageConfig, 'device_model')
if not self.device_model:
raise VmError("vmx: missing device model")
self.display = sxp.child_value(imageConfig, 'display')
+ self.xauthority = sxp.child_value(imageConfig, 'xauthority')
self.vm.storeVm(("image/dmargs", " ".join(self.dmargs)),
("image/device-model", self.device_model),
@@ -209,6 +214,8 @@ class VmxImageHandler(ImageHandler):
if not lapic is None:
self.lapic = int(lapic)
+ self.acpi = int(sxp.child_value(imageConfig, 'acpi', 0))
+
def buildDomain(self):
# Create an event channel
self.device_channel = xc.evtchn_alloc_unbound(dom=self.vm.getDomid(),
@@ -224,6 +231,7 @@ class VmxImageHandler(ImageHandler):
log.debug("memsize = %d", self.vm.getMemoryTarget() / 1024)
log.debug("lapic = %d", self.lapic)
log.debug("vcpus = %d", self.vm.getVCpuCount())
+ log.debug("acpi = %d", self.acpi)
return xc.vmx_build(dom = self.vm.getDomid(),
image = self.kernel,
@@ -231,9 +239,9 @@ class VmxImageHandler(ImageHandler):
store_evtchn = store_evtchn,
memsize = self.vm.getMemoryTarget() / 1024,
lapic = self.lapic,
+ acpi = self.acpi,
vcpus = self.vm.getVCpuCount())
-
# Return a list of cmd line args to the device models based on the
# xm config file
def parseDeviceModelArgs(self, imageConfig, deviceConfig):
@@ -264,44 +272,44 @@ class VmxImageHandler(ImageHandler):
nics = 0
for (name, info) in deviceConfig:
if name == 'vbd':
- uname = sxp.child_value(info, 'uname')
- typedev = sxp.child_value(info, 'dev')
- (_, vbdparam) = string.split(uname, ':', 1)
- if re.match('^ioemu:', typedev):
- (emtype, vbddev) = string.split(typedev, ':', 1)
- else:
- emtype = 'vbd'
- vbddev = typedev
- if emtype != 'ioemu':
- continue;
- vbddev_list = ['hda', 'hdb', 'hdc', 'hdd']
- if vbddev not in vbddev_list:
- raise VmError("vmx: for qemu vbd type=file&dev=hda~hdd")
- ret.append("-%s" % vbddev)
- ret.append("%s" % vbdparam)
+ uname = sxp.child_value(info, 'uname')
+ typedev = sxp.child_value(info, 'dev')
+ (_, vbdparam) = string.split(uname, ':', 1)
+ if 'ioemu:' in typedev:
+ (emtype, vbddev) = string.split(typedev, ':', 1)
+ else:
+ emtype = 'vbd'
+ vbddev = typedev
+ if emtype == 'vbd':
+ continue;
+ vbddev_list = ['hda', 'hdb', 'hdc', 'hdd']
+ if vbddev not in vbddev_list:
+ raise VmError("vmx: for qemu vbd type=file&dev=hda~hdd")
+ ret.append("-%s" % vbddev)
+ ret.append("%s" % vbdparam)
if name == 'vif':
- type = sxp.child_value(info, 'type')
- if type != 'ioemu':
- continue
- nics += 1
- if mac != None:
- continue
- mac = sxp.child_value(info, 'mac')
- bridge = sxp.child_value(info, 'bridge')
- if mac == None:
- mac = randomMAC()
- if bridge == None:
- bridge = 'xenbr0'
- ret.append("-macaddr")
- ret.append("%s" % mac)
- ret.append("-bridge")
- ret.append("%s" % bridge)
+ type = sxp.child_value(info, 'type')
+ if type != 'ioemu':
+ continue
+ nics += 1
+ if mac != None:
+ continue
+ mac = sxp.child_value(info, 'mac')
+ bridge = sxp.child_value(info, 'bridge')
+ if mac == None:
+ mac = randomMAC()
+ if bridge == None:
+ bridge = 'xenbr0'
+ ret.append("-macaddr")
+ ret.append("%s" % mac)
+ ret.append("-bridge")
+ ret.append("%s" % bridge)
if name == 'vtpm':
- instance = sxp.child_value(info, 'pref_instance')
- ret.append("-instance")
- ret.append("%s" % instance)
+ instance = sxp.child_value(info, 'pref_instance')
+ ret.append("-instance")
+ ret.append("%s" % instance)
ret.append("-nics")
- ret.append("%d" % nics)
+ ret.append("%d" % nics)
return ret
def configVNC(self, config):
@@ -340,6 +348,8 @@ class VmxImageHandler(ImageHandler):
env = dict(os.environ)
if self.display:
env['DISPLAY'] = self.display
+ if self.xauthority:
+ env['XAUTHORITY'] = self.xauthority
log.info("spawning device models: %s %s", self.device_model, args)
self.pid = os.spawnve(os.P_NOWAIT, self.device_model, args, env)
log.info("device model pid: %d", self.pid)
diff --git a/tools/python/xen/xend/server/blkif.py b/tools/python/xen/xend/server/blkif.py
index 337dfe5805..23135aa08b 100644
--- a/tools/python/xen/xend/server/blkif.py
+++ b/tools/python/xen/xend/server/blkif.py
@@ -31,7 +31,7 @@ class BlkifController(DevController):
"""Block device interface controller. Handles all block devices
for a domain.
"""
-
+
def __init__(self, vm):
"""Create a block device controller.
"""
@@ -40,9 +40,9 @@ class BlkifController(DevController):
def getDeviceDetails(self, config):
"""@see DevController.getDeviceDetails"""
-
+
dev = sxp.child_value(config, 'dev')
- if re.match('^ioemu:', dev):
+ if 'ioemu:' in dev:
return (None,{},{})
devid = blkif.blkdev_name_to_number(dev)
diff --git a/tools/python/xen/xm/create.py b/tools/python/xen/xm/create.py
index 8d4c0f9cd3..dd97a9dc08 100644
--- a/tools/python/xen/xm/create.py
+++ b/tools/python/xen/xm/create.py
@@ -164,6 +164,10 @@ gopts.var('lapic', val='LAPIC',
fn=set_int, default=0,
use="Disable or enable local APIC of VMX domain.")
+gopts.var('acpi', val='ACPI',
+ fn=set_int, default=0,
+ use="Disable or enable ACPI of VMX domain.")
+
gopts.var('vcpus', val='VCPUS',
fn=set_int, default=1,
use="# of Virtual CPUS in domain.")
@@ -388,6 +392,10 @@ gopts.var('display', val='DISPLAY',
fn=set_value, default=None,
use="X11 display to use")
+gopts.var('xauthority', val='XAUTHORITY',
+ fn=set_value, default=None,
+ use="X11 Authority to use")
+
def err(msg):
"""Print an error to stderr and exit.
@@ -526,7 +534,8 @@ def configure_vmx(config_image, vals):
"""
args = [ 'device_model', 'vcpus', 'cdrom', 'boot', 'fda', 'fdb',
'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'audio',
- 'vnc', 'vncviewer', 'sdl', 'display', 'ne2000', 'lapic']
+ 'vnc', 'vncviewer', 'sdl', 'display', 'ne2000', 'lapic',
+ 'xauthority', 'acpi' ]
for a in args:
if (vals.__dict__[a]):
config_image.append([a, vals.__dict__[a]])
@@ -801,6 +810,9 @@ def parseCommandLine(argv):
if not gopts.vals.display:
gopts.vals.display = os.getenv("DISPLAY")
+ if not gopts.vals.xauthority:
+ gopts.vals.xauthority = os.getenv("XAUTHORITY")
+
# Process remaining args as config variables.
for arg in args:
if '=' in arg:
diff --git a/tools/python/xen/xm/main.py b/tools/python/xen/xm/main.py
index ad69090464..56f7ce385c 100644
--- a/tools/python/xen/xm/main.py
+++ b/tools/python/xen/xm/main.py
@@ -75,7 +75,7 @@ domname_help = "domname <DomId> Convert a domain id to a domain
vcpu_set_help = """vcpu-set <DomId> <VCPUs> Set the number of VCPUs for a domain"""
vcpu_list_help = "vcpu-list <DomId> List the VCPUs for a domain (or all domains)"
vcpu_pin_help = "vcpu-pin <DomId> <VCPU> <CPUs> Set which cpus a VCPU can use"
-dmesg_help = "dmesg [--clear] Read or clear Xen's message buffer"
+dmesg_help = "dmesg [-c|--clear] Read or clear Xen's message buffer"
info_help = "info Get information about the xen host"
rename_help = "rename <DomId> <New Name> Rename a domain"
log_help = "log Print the xend log"
@@ -672,7 +672,7 @@ its contents if the [-c|--clear] flag is specified.
server.xend_node_clear_dmesg()
def xm_log(args):
- arg_check(args, 'xm-log', 0)
+ arg_check(args, "log", 0)
from xen.xend.XendClient import server
print server.xend_node_log()
@@ -845,8 +845,8 @@ aliases = {
"balloon": "mem-set",
"set-vcpus": "vcpu-set",
"vif-list": "network-list",
- "vbd-create": "block-create",
- "vbd-destroy": "block-destroy",
+ "vbd-create": "block-attach",
+ "vbd-destroy": "block-detach",
"vbd-list": "block-list",
}
diff --git a/tools/vtpm_manager/README b/tools/vtpm_manager/README
index d01abf7e4b..f0d1e98cfb 100644
--- a/tools/vtpm_manager/README
+++ b/tools/vtpm_manager/README
@@ -53,11 +53,6 @@ DUMMY_BACKEND -> vtpm_manager listens on /tmp/in.fifo and
MANUAL_DM_LAUNCH -> Must manually launch & kill VTPMs
-WELL_KNOWN_SRK_AUTH -> Rather than randomly generating the password for the SRK,
- use a well known value. This is necessary for sharing use
- of the SRK across applications. Such as VTPM and Dom0
- measurement software.
-
WELL_KNOWN_OWNER_AUTH -> Rather than randomly generating the password for the owner,
use a well known value. This is useful for debugging and for
poor bios which do not support clearing TPM if OwnerAuth is
diff --git a/tools/vtpm_manager/Rules.mk b/tools/vtpm_manager/Rules.mk
index 26b44563c1..c7395864ac 100644
--- a/tools/vtpm_manager/Rules.mk
+++ b/tools/vtpm_manager/Rules.mk
@@ -56,8 +56,7 @@ CFLAGS += -DLOGGING_MODULES="(BITMASK(VTPM_LOG_TCS)|BITMASK(VTPM_LOG_VTSP)|BITMA
# Do not have manager launch DMs.
#CFLAGS += -DMANUAL_DM_LAUNCH
-# Fixed SRK
-CFLAGS += -DWELL_KNOWN_SRK_AUTH
+# Fixed OwnerAuth
#CFLAGS += -DWELL_KNOWN_OWNER_AUTH
# TPM Hardware Device or TPM Simulator
diff --git a/tools/vtpm_manager/manager/securestorage.c b/tools/vtpm_manager/manager/securestorage.c
index 5f19aa63ef..4df8531c40 100644
--- a/tools/vtpm_manager/manager/securestorage.c
+++ b/tools/vtpm_manager/manager/securestorage.c
@@ -65,7 +65,7 @@ TPM_RESULT envelope_encrypt(const buffer_t *inbuf,
UINT32 i;
struct pack_constbuf_t symkey_cipher32, data_cipher32;
- vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Enveloping[%d]: 0x", buffer_len(inbuf));
+ vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Enveloping Input[%d]: 0x", buffer_len(inbuf));
for (i=0; i< buffer_len(inbuf); i++)
vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", inbuf->bytes[i]);
vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
@@ -94,6 +94,12 @@ TPM_RESULT envelope_encrypt(const buffer_t *inbuf,
BSG_TPM_SIZE32_DATA, &data_cipher32);
vtpmloginfo(VTPM_LOG_VTPM, "Saved %d bytes of E(symkey) + %d bytes of E(data)\n", buffer_len(&symkey_cipher), buffer_len(&data_cipher));
+
+ vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Enveloping Output[%d]: 0x", buffer_len(sealed_data));
+ for (i=0; i< buffer_len(sealed_data); i++)
+ vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", sealed_data->bytes[i]);
+ vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+
goto egress;
abort_egress:
@@ -125,7 +131,7 @@ TPM_RESULT envelope_decrypt(const long cipher_size,
memset(&symkey, 0, sizeof(symkey_t));
- vtpmloginfo(VTPM_LOG_VTPM_DEEP, "envelope decrypting[%ld]: 0x", cipher_size);
+ vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Envelope Decrypt Input[%ld]: 0x", cipher_size);
for (i=0; i< cipher_size; i++)
vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", cipher[i]);
vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
@@ -155,6 +161,11 @@ TPM_RESULT envelope_decrypt(const long cipher_size,
// Decrypt State
TPMTRY(TPM_DECRYPT_ERROR, Crypto_symcrypto_decrypt (&symkey, &data_cipher, unsealed_data) );
+
+ vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Envelope Decrypte Output[%d]: 0x", buffer_len(unsealed_data));
+ for (i=0; i< buffer_len(unsealed_data); i++)
+ vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", unsealed_data->bytes[i]);
+ vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
goto egress;
@@ -291,124 +302,175 @@ TPM_RESULT VTPM_Handle_Load_NVM(VTPM_DMI_RESOURCE *myDMI,
return status;
}
+
TPM_RESULT VTPM_SaveService(void) {
TPM_RESULT status=TPM_SUCCESS;
int fh, dmis=-1;
-
- BYTE *flat_global;
- int flat_global_size, bytes_written;
+
+ BYTE *flat_boot_key, *flat_dmis, *flat_enc;
+ buffer_t clear_flat_global, enc_flat_global;
UINT32 storageKeySize = buffer_len(&vtpm_globals->storageKeyWrap);
+ UINT32 bootKeySize = buffer_len(&vtpm_globals->bootKeyWrap);
struct pack_buf_t storage_key_pack = {storageKeySize, vtpm_globals->storageKeyWrap.bytes};
-
+ struct pack_buf_t boot_key_pack = {bootKeySize, vtpm_globals->bootKeyWrap.bytes};
+
struct hashtable_itr *dmi_itr;
VTPM_DMI_RESOURCE *dmi_res;
-
- UINT32 flat_global_full_size;
-
- // Global Values needing to be saved
- flat_global_full_size = 3*sizeof(TPM_DIGEST) + // Auths
- sizeof(UINT32) + // storagekeysize
- storageKeySize + // storage key
- hashtable_count(vtpm_globals->dmi_map) * // num DMIS
- (sizeof(UINT32) + 2*sizeof(TPM_DIGEST)); // Per DMI info
-
-
- flat_global = (BYTE *) malloc( flat_global_full_size);
-
- flat_global_size = BSG_PackList(flat_global, 4,
- BSG_TPM_AUTHDATA, &vtpm_globals->owner_usage_auth,
- BSG_TPM_AUTHDATA, &vtpm_globals->srk_usage_auth,
- BSG_TPM_SECRET, &vtpm_globals->storage_key_usage_auth,
- BSG_TPM_SIZE32_DATA, &storage_key_pack);
-
+
+ UINT32 boot_key_size, flat_dmis_size;
+
+ // Initially fill these with buffer sizes for each data type. Later fill
+ // in actual size, once flattened.
+ boot_key_size = sizeof(UINT32) + // bootkeysize
+ bootKeySize; // boot key
+
+ TPMTRYRETURN(buffer_init(&clear_flat_global, 3*sizeof(TPM_DIGEST) + // Auths
+ sizeof(UINT32) +// storagekeysize
+ storageKeySize, NULL) ); // storage key
+
+ flat_dmis_size = (hashtable_count(vtpm_globals->dmi_map) - 1) * // num DMIS (-1 for Dom0)
+ (sizeof(UINT32) + 2*sizeof(TPM_DIGEST)); // Per DMI info
+
+ flat_boot_key = (BYTE *) malloc( boot_key_size );
+ flat_enc = (BYTE *) malloc( sizeof(UINT32) );
+ flat_dmis = (BYTE *) malloc( flat_dmis_size );
+
+ boot_key_size = BSG_PackList(flat_boot_key, 1,
+ BSG_TPM_SIZE32_DATA, &boot_key_pack);
+
+ BSG_PackList(clear_flat_global.bytes, 3,
+ BSG_TPM_AUTHDATA, &vtpm_globals->owner_usage_auth,
+ BSG_TPM_SECRET, &vtpm_globals->storage_key_usage_auth,
+ BSG_TPM_SIZE32_DATA, &storage_key_pack);
+
+ TPMTRYRETURN(envelope_encrypt(&clear_flat_global,
+ &vtpm_globals->bootKey,
+ &enc_flat_global) );
+
+ BSG_PackConst(buffer_len(&enc_flat_global), 4, flat_enc);
+
// Per DMI values to be saved
if (hashtable_count(vtpm_globals->dmi_map) > 0) {
-
+
dmi_itr = hashtable_iterator(vtpm_globals->dmi_map);
do {
dmi_res = (VTPM_DMI_RESOURCE *) hashtable_iterator_value(dmi_itr);
dmis++;
// No need to save dmi0.
- if (dmi_res->dmi_id == 0)
- continue;
-
-
- flat_global_size += BSG_PackList( flat_global + flat_global_size, 3,
- BSG_TYPE_UINT32, &dmi_res->dmi_id,
- BSG_TPM_DIGEST, &dmi_res->NVM_measurement,
- BSG_TPM_DIGEST, &dmi_res->DMI_measurement);
-
+ if (dmi_res->dmi_id == 0)
+ continue;
+
+
+ flat_dmis_size += BSG_PackList( flat_dmis + flat_dmis_size, 3,
+ BSG_TYPE_UINT32, &dmi_res->dmi_id,
+ BSG_TPM_DIGEST, &dmi_res->NVM_measurement,
+ BSG_TPM_DIGEST, &dmi_res->DMI_measurement);
+
} while (hashtable_iterator_advance(dmi_itr));
}
-
- //FIXME: Once we have a way to protect a TPM key, we should use it to
- // encrypt this blob. BUT, unless there is a way to ensure the key is
- // not used by other apps, this encryption is useless.
+
fh = open(STATE_FILE, O_WRONLY | O_CREAT, S_IREAD | S_IWRITE);
if (fh == -1) {
vtpmlogerror(VTPM_LOG_VTPM, "Unable to open %s file for write.\n", STATE_FILE);
status = TPM_IOERROR;
goto abort_egress;
}
-
- if ( (bytes_written = write(fh, flat_global, flat_global_size)) != flat_global_size ) {
- vtpmlogerror(VTPM_LOG_VTPM, "Failed to save service data. %d/%d bytes written.\n", bytes_written, flat_global_size);
+
+ if ( ( write(fh, flat_boot_key, boot_key_size) != boot_key_size ) ||
+ ( write(fh, flat_enc, sizeof(UINT32)) != sizeof(UINT32) ) ||
+ ( write(fh, enc_flat_global.bytes, buffer_len(&enc_flat_global)) != buffer_len(&enc_flat_global) ) ||
+ ( write(fh, flat_dmis, flat_dmis_size) != flat_dmis_size ) ) {
+ vtpmlogerror(VTPM_LOG_VTPM, "Failed to completely write service data.\n");
status = TPM_IOERROR;
goto abort_egress;
- }
- vtpm_globals->DMI_table_dirty = FALSE;
-
+ }
+
+ vtpm_globals->DMI_table_dirty = FALSE;
+
goto egress;
-
+
abort_egress:
egress:
-
- free(flat_global);
+
+ free(flat_boot_key);
+ free(flat_enc);
+ buffer_free(&enc_flat_global);
+ free(flat_dmis);
close(fh);
-
+
vtpmloginfo(VTPM_LOG_VTPM, "Saved VTPM Service state (status = %d, dmis = %d)\n", (int) status, dmis);
return status;
}
TPM_RESULT VTPM_LoadService(void) {
-
+
TPM_RESULT status=TPM_SUCCESS;
int fh, stat_ret, dmis=0;
long fh_size = 0, step_size;
- BYTE *flat_global=NULL;
- struct pack_buf_t storage_key_pack;
- UINT32 *dmi_id_key;
-
+ BYTE *flat_table=NULL;
+ buffer_t unsealed_data;
+ struct pack_buf_t storage_key_pack, boot_key_pack;
+ UINT32 *dmi_id_key, enc_size;
+
VTPM_DMI_RESOURCE *dmi_res;
struct stat file_stat;
-
+
+ TPM_HANDLE boot_key_handle;
+ TPM_AUTHDATA boot_usage_auth;
+ memset(&boot_usage_auth, 0, sizeof(TPM_AUTHDATA));
+
fh = open(STATE_FILE, O_RDONLY );
stat_ret = fstat(fh, &file_stat);
- if (stat_ret == 0)
+ if (stat_ret == 0)
fh_size = file_stat.st_size;
else {
status = TPM_IOERROR;
goto abort_egress;
}
-
- flat_global = (BYTE *) malloc(fh_size);
-
- if ((long) read(fh, flat_global, fh_size) != fh_size ) {
+
+ flat_table = (BYTE *) malloc(fh_size);
+
+ if ((long) read(fh, flat_table, fh_size) != fh_size ) {
status = TPM_IOERROR;
goto abort_egress;
}
-
+
+ // Read Boot Key
+ step_size = BSG_UnpackList( flat_table, 2,
+ BSG_TPM_SIZE32_DATA, &boot_key_pack,
+ BSG_TYPE_UINT32, &enc_size);
+
+ TPMTRYRETURN(buffer_init(&vtpm_globals->bootKeyWrap, 0, 0) );
+ TPMTRYRETURN(buffer_append_raw(&vtpm_globals->bootKeyWrap, boot_key_pack.size, boot_key_pack.data) );
+
+ //Load Boot Key
+ TPMTRYRETURN( VTSP_LoadKey( vtpm_globals->manager_tcs_handle,
+ TPM_SRK_KEYHANDLE,
+ &vtpm_globals->bootKeyWrap,
+ &SRK_AUTH,
+ &boot_key_handle,
+ &vtpm_globals->keyAuth,
+ &vtpm_globals->bootKey,
+ FALSE) );
+
+ TPMTRYRETURN( envelope_decrypt(enc_size,
+ flat_table + step_size,
+ vtpm_globals->manager_tcs_handle,
+ boot_key_handle,
+ (const TPM_AUTHDATA*) &boot_usage_auth,
+ &unsealed_data) );
+ step_size += enc_size;
+
// Global Values needing to be saved
- step_size = BSG_UnpackList( flat_global, 4,
- BSG_TPM_AUTHDATA, &vtpm_globals->owner_usage_auth,
- BSG_TPM_AUTHDATA, &vtpm_globals->srk_usage_auth,
- BSG_TPM_SECRET, &vtpm_globals->storage_key_usage_auth,
- BSG_TPM_SIZE32_DATA, &storage_key_pack);
-
+ BSG_UnpackList( unsealed_data.bytes, 3,
+ BSG_TPM_AUTHDATA, &vtpm_globals->owner_usage_auth,
+ BSG_TPM_SECRET, &vtpm_globals->storage_key_usage_auth,
+ BSG_TPM_SIZE32_DATA, &storage_key_pack);
+
TPMTRYRETURN(buffer_init(&vtpm_globals->storageKeyWrap, 0, 0) );
TPMTRYRETURN(buffer_append_raw(&vtpm_globals->storageKeyWrap, storage_key_pack.size, storage_key_pack.data) );
-
+
// Per DMI values to be saved
while ( step_size < fh_size ){
if (fh_size - step_size < (long) (sizeof(UINT32) + 2*sizeof(TPM_DIGEST))) {
@@ -417,35 +479,38 @@ TPM_RESULT VTPM_LoadService(void) {
} else {
dmi_res = (VTPM_DMI_RESOURCE *) malloc(sizeof(VTPM_DMI_RESOURCE));
dmis++;
-
+
dmi_res->connected = FALSE;
-
- step_size += BSG_UnpackList(flat_global + step_size, 3,
- BSG_TYPE_UINT32, &dmi_res->dmi_id,
- BSG_TPM_DIGEST, &dmi_res->NVM_measurement,
- BSG_TPM_DIGEST, &dmi_res->DMI_measurement);
-
+
+ step_size += BSG_UnpackList(flat_table + step_size, 3,
+ BSG_TYPE_UINT32, &dmi_res->dmi_id,
+ BSG_TPM_DIGEST, &dmi_res->NVM_measurement,
+ BSG_TPM_DIGEST, &dmi_res->DMI_measurement);
+
// install into map
dmi_id_key = (UINT32 *) malloc (sizeof(UINT32));
*dmi_id_key = dmi_res->dmi_id;
if (!hashtable_insert(vtpm_globals->dmi_map, dmi_id_key, dmi_res)) {
- status = TPM_FAIL;
- goto abort_egress;
+ status = TPM_FAIL;
+ goto abort_egress;
}
-
+
}
-
+
}
-
+
vtpmloginfo(VTPM_LOG_VTPM, "Loaded saved state (dmis = %d).\n", dmis);
goto egress;
-
+
abort_egress:
vtpmlogerror(VTPM_LOG_VTPM, "Failed to load service data with error = %s\n", tpm_get_error_name(status));
egress:
-
- free(flat_global);
+
+ free(flat_table);
close(fh);
-
+
+ // TODO: Could be nice and evict BootKey. (Need to add EvictKey to VTSP.
+
return status;
}
+
diff --git a/tools/vtpm_manager/manager/vtpm_manager.c b/tools/vtpm_manager/manager/vtpm_manager.c
index 48143fbc3b..c9eda0440c 100644
--- a/tools/vtpm_manager/manager/vtpm_manager.c
+++ b/tools/vtpm_manager/manager/vtpm_manager.c
@@ -74,16 +74,15 @@ VTPM_GLOBALS *vtpm_globals=NULL;
#endif
// --------------------------- Well Known Auths --------------------------
-#ifdef WELL_KNOWN_SRK_AUTH
-static BYTE FIXED_SRK_AUTH[20] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+const TPM_AUTHDATA SRK_AUTH = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
-#endif
#ifdef WELL_KNOWN_OWNER_AUTH
static BYTE FIXED_OWNER_AUTH[20] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
#endif
-
+
+
// -------------------------- Hash table functions --------------------
static unsigned int hashfunc32(void *ky) {
@@ -100,13 +99,7 @@ TPM_RESULT VTPM_Create_Service(){
TPM_RESULT status = TPM_SUCCESS;
- // Generate Auth's for SRK & Owner
-#ifdef WELL_KNOWN_SRK_AUTH
- memcpy(vtpm_globals->srk_usage_auth, FIXED_SRK_AUTH, sizeof(TPM_AUTHDATA));
-#else
- Crypto_GetRandom(vtpm_globals->srk_usage_auth, sizeof(TPM_AUTHDATA) );
-#endif
-
+ // Generate Auth for Owner
#ifdef WELL_KNOWN_OWNER_AUTH
memcpy(vtpm_globals->owner_usage_auth, FIXED_OWNER_AUTH, sizeof(TPM_AUTHDATA));
#else
@@ -116,14 +109,14 @@ TPM_RESULT VTPM_Create_Service(){
// Take Owership of TPM
CRYPTO_INFO ek_cryptoInfo;
- vtpmloginfo(VTPM_LOG_VTPM, "Attempting Pubek Read. NOTE: Failure is ok.\n");
status = VTSP_ReadPubek(vtpm_globals->manager_tcs_handle, &ek_cryptoInfo);
// If we can read PubEK then there is no owner and we should take it.
if (status == TPM_SUCCESS) {
+ vtpmloginfo(VTPM_LOG_VTPM, "Failed to readEK meaning TPM has an owner. Creating Keys off existing SRK.\n");
TPMTRYRETURN(VTSP_TakeOwnership(vtpm_globals->manager_tcs_handle,
(const TPM_AUTHDATA*)&vtpm_globals->owner_usage_auth,
- (const TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
+ &SRK_AUTH,
&ek_cryptoInfo,
&vtpm_globals->keyAuth));
@@ -142,7 +135,7 @@ TPM_RESULT VTPM_Create_Service(){
TPMTRYRETURN( VTSP_OSAP(vtpm_globals->manager_tcs_handle,
TPM_ET_KEYHANDLE,
TPM_SRK_KEYHANDLE,
- (const TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
+ &SRK_AUTH,
&sharedsecret,
&osap) );
@@ -157,8 +150,43 @@ TPM_RESULT VTPM_Create_Service(){
&vtpm_globals->storageKeyWrap,
&osap) );
- vtpm_globals->keyAuth.fContinueAuthSession = TRUE;
+ // Generate boot key's auth
+ Crypto_GetRandom( &vtpm_globals->storage_key_usage_auth,
+ sizeof(TPM_AUTHDATA) );
+ TPM_AUTHDATA bootKeyWrapAuth;
+ memset(&bootKeyWrapAuth, 0, sizeof(bootKeyWrapAuth));
+
+ TPMTRYRETURN( VTSP_OSAP(vtpm_globals->manager_tcs_handle,
+ TPM_ET_KEYHANDLE,
+ TPM_SRK_KEYHANDLE,
+ &SRK_AUTH,
+ &sharedsecret,
+ &osap) );
+
+ osap.fContinueAuthSession = FALSE;
+
+ // FIXME: This key protects the global secrets on disk. It should use TPM
+ // PCR bindings to limit its use to legit configurations.
+ // Current binds are open, implying a Trusted VM contains this code.
+ // If this VM is not Trusted, use measurement and PCR bindings.
+ TPMTRYRETURN( VTSP_CreateWrapKey( vtpm_globals->manager_tcs_handle,
+ TPM_KEY_BIND,
+ (const TPM_AUTHDATA*)&bootKeyWrapAuth,
+ TPM_SRK_KEYHANDLE,
+ (const TPM_AUTHDATA*)&sharedsecret,
+ &vtpm_globals->bootKeyWrap,
+ &osap) );
+
+ // Populate CRYPTO_INFO vtpm_globals->bootKey. This does not load it into the TPM
+ TPMTRYRETURN( VTSP_LoadKey( vtpm_globals->manager_tcs_handle,
+ TPM_SRK_KEYHANDLE,
+ &vtpm_globals->bootKeyWrap,
+ NULL,
+ NULL,
+ NULL,
+ &vtpm_globals->bootKey,
+ TRUE ) );
goto egress;
abort_egress:
@@ -278,24 +306,26 @@ void *VTPM_Service_Handler(void *threadTypePtr){
#endif
// Check status of rx_fh. If necessary attempt to re-open it.
+ char* s = NULL;
if (*rx_fh < 0) {
#ifdef VTPM_MULTI_VM
- *rx_fh = open(VTPM_BE_DEV, O_RDWR);
+ s = VTPM_BE_DEV;
#else
if (threadType == BE_LISTENER_THREAD)
#ifdef DUMMY_BACKEND
- *rx_fh = open("/tmp/in.fifo", O_RDWR);
+ s = "/tmp/in.fifo";
#else
- *rx_fh = open(VTPM_BE_DEV, O_RDWR);
+ s = VTPM_BE_DEV;
#endif
else // DMI Listener
- *rx_fh = open(VTPM_RX_FIFO, O_RDWR);
+ s = VTPM_RX_FIFO;
+ *rx_fh = open(s, O_RDWR);
#endif
}
// Respond to failures to open rx_fh
if (*rx_fh < 0) {
- vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't open inbound fh.\n");
+ vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't open inbound fh for %s.\n", s);
#ifdef VTPM_MULTI_VM
return TPM_IOERROR;
#else
@@ -713,7 +743,7 @@ void *VTPM_Service_Handler(void *threadTypePtr){
///////////////////////////////////////////////////////////////////////////////
TPM_RESULT VTPM_Init_Service() {
- TPM_RESULT status = TPM_FAIL;
+ TPM_RESULT status = TPM_FAIL, serviceStatus;
BYTE *randomsead;
UINT32 randomsize;
@@ -737,7 +767,7 @@ TPM_RESULT VTPM_Init_Service() {
// Create new TCS Object
vtpm_globals->manager_tcs_handle = 0;
-
+
TPMTRYRETURN(TCS_create());
// Create TCS Context for service
@@ -756,17 +786,24 @@ TPM_RESULT VTPM_Init_Service() {
vtpm_globals->keyAuth.fContinueAuthSession = TRUE;
// If failed, create new Service.
- if (VTPM_LoadService() != TPM_SUCCESS)
+ serviceStatus = VTPM_LoadService();
+ if (serviceStatus == TPM_IOERROR) {
+ vtpmloginfo(VTPM_LOG_VTPM, "Failed to read service file. Assuming first time initialization.\n");
TPMTRYRETURN( VTPM_Create_Service() );
+ } else if (serviceStatus != TPM_SUCCESS) {
+ vtpmlogerror(VTPM_LOG_VTPM, "Failed to read existing service file");
+ exit(1);
+ }
//Load Storage Key
TPMTRYRETURN( VTSP_LoadKey( vtpm_globals->manager_tcs_handle,
TPM_SRK_KEYHANDLE,
&vtpm_globals->storageKeyWrap,
- (const TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
+ &SRK_AUTH,
&vtpm_globals->storageKeyHandle,
&vtpm_globals->keyAuth,
- &vtpm_globals->storageKey) );
+ &vtpm_globals->storageKey,
+ FALSE ) );
// Create entry for Dom0 for control messages
TPMTRYRETURN( VTPM_Handle_New_DMI(NULL) );
@@ -797,12 +834,11 @@ void VTPM_Stop_Service() {
free (dmi_itr);
}
-
- TCS_CloseContext(vtpm_globals->manager_tcs_handle);
-
- if ( (vtpm_globals->DMI_table_dirty) &&
- (VTPM_SaveService() != TPM_SUCCESS) )
+ if ( (vtpm_globals->DMI_table_dirty) && (VTPM_SaveService() != TPM_SUCCESS) )
vtpmlogerror(VTPM_LOG_VTPM, "Unable to save manager data.\n");
+
+ TCS_CloseContext(vtpm_globals->manager_tcs_handle);
+ TCS_destroy();
hashtable_destroy(vtpm_globals->dmi_map, 1);
free(vtpm_globals);
diff --git a/tools/vtpm_manager/manager/vtpmpriv.h b/tools/vtpm_manager/manager/vtpmpriv.h
index bb613aec2c..2f8c2ebc67 100644
--- a/tools/vtpm_manager/manager/vtpmpriv.h
+++ b/tools/vtpm_manager/manager/vtpmpriv.h
@@ -108,6 +108,7 @@ typedef struct tdVTPM_GLOBALS {
TCS_CONTEXT_HANDLE manager_tcs_handle; // TCS Handle used by manager
TPM_HANDLE storageKeyHandle; // Key used by persistent store
CRYPTO_INFO storageKey; // For software encryption
+ CRYPTO_INFO bootKey; // For saving table
TCS_AUTH keyAuth; // OIAP session for storageKey
BOOL DMI_table_dirty; // Indicates that a command
// has updated the DMI table
@@ -115,15 +116,17 @@ typedef struct tdVTPM_GLOBALS {
// Persistent Data
TPM_AUTHDATA owner_usage_auth; // OwnerAuth of real TPM
- TPM_AUTHDATA srk_usage_auth; // SRK Auth of real TPM
buffer_t storageKeyWrap; // Wrapped copy of storageKey
-
+ TPM_AUTHDATA srk_usage_auth;
TPM_AUTHDATA storage_key_usage_auth;
-
+
+ buffer_t bootKeyWrap; // Wrapped copy of boot key
+
}VTPM_GLOBALS;
-//Global dmi map
-extern VTPM_GLOBALS *vtpm_globals;
+// --------------------------- Global Values --------------------------
+extern VTPM_GLOBALS *vtpm_globals; // Key info and DMI states
+extern const TPM_AUTHDATA SRK_AUTH; // SRK Well Known Auth Value
// ********************** Command Handler Prototypes ***********************
TPM_RESULT VTPM_Handle_Load_NVM( VTPM_DMI_RESOURCE *myDMI,
diff --git a/tools/vtpm_manager/manager/vtsp.c b/tools/vtpm_manager/manager/vtsp.c
index b6f82e4b3a..17c3335923 100644
--- a/tools/vtpm_manager/manager/vtsp.c
+++ b/tools/vtpm_manager/manager/vtsp.c
@@ -563,63 +563,69 @@ TPM_RESULT VTSP_LoadKey(const TCS_CONTEXT_HANDLE hContext,
const TPM_AUTHDATA *parentAuth,
TPM_HANDLE *newKeyHandle,
TCS_AUTH *auth,
- CRYPTO_INFO *cryptoinfo /*= NULL*/) {
+ CRYPTO_INFO *cryptoinfo,
+ const BOOL skipTPMLoad) {
- vtpmloginfo(VTPM_LOG_VTSP, "Loading Key.\n%s","");
+ vtpmloginfo(VTPM_LOG_VTSP, "Loading Key %s.\n", (!skipTPMLoad ? "into TPM" : "only into memory"));
TPM_RESULT status = TPM_SUCCESS;
TPM_COMMAND_CODE command = TPM_ORD_LoadKey;
-
- BYTE *paramText; // Digest to make Auth.
+
+ BYTE *paramText=NULL; // Digest to make Auth.
UINT32 paramTextSize;
+
+ // SkipTPMLoad stops key from being loaded into TPM, but still generates CRYPTO_INFO for it
+ if (! skipTPMLoad) {
- if ((rgbWrappedKeyBlob == NULL) || (parentAuth == NULL) ||
- (newKeyHandle==NULL) || (auth==NULL)) {
- status = TPM_BAD_PARAMETER;
- goto abort_egress;
- }
+ if ((rgbWrappedKeyBlob == NULL) || (parentAuth == NULL) ||
+ (newKeyHandle==NULL) || (auth==NULL)) {
+ status = TPM_BAD_PARAMETER;
+ goto abort_egress;
+ }
- // Generate Extra TCS Parameters
- TPM_HANDLE phKeyHMAC;
+ // Generate Extra TCS Parameters
+ TPM_HANDLE phKeyHMAC;
- // Generate HMAC
- Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
+ // Generate HMAC
+ Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
- paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH);
+ paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH);
- paramTextSize = BSG_PackList(paramText, 1,
- BSG_TPM_COMMAND_CODE, &command);
+ paramTextSize = BSG_PackList(paramText, 1,
+ BSG_TPM_COMMAND_CODE, &command);
- memcpy(paramText + paramTextSize, rgbWrappedKeyBlob->bytes, buffer_len(rgbWrappedKeyBlob));
- paramTextSize += buffer_len(rgbWrappedKeyBlob);
+ memcpy(paramText + paramTextSize, rgbWrappedKeyBlob->bytes, buffer_len(rgbWrappedKeyBlob));
+ paramTextSize += buffer_len(rgbWrappedKeyBlob);
- TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
+ TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
parentAuth, auth) );
- // Call TCS
- TPMTRYRETURN( TCSP_LoadKeyByBlob( hContext,
- hUnwrappingKey,
- buffer_len(rgbWrappedKeyBlob),
- rgbWrappedKeyBlob->bytes,
- auth,
- newKeyHandle,
- &phKeyHMAC) );
-
- // Verify Auth
- paramTextSize = BSG_PackList(paramText, 3,
- BSG_TPM_RESULT, &status,
- BSG_TPM_COMMAND_CODE, &command,
- BSG_TPM_HANDLE, newKeyHandle);
-
- TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
- parentAuth, auth,
- hContext) );
-
- // Unpack/return key structure
+ // Call TCS
+ TPMTRYRETURN( TCSP_LoadKeyByBlob( hContext,
+ hUnwrappingKey,
+ buffer_len(rgbWrappedKeyBlob),
+ rgbWrappedKeyBlob->bytes,
+ auth,
+ newKeyHandle,
+ &phKeyHMAC) );
+
+ // Verify Auth
+ paramTextSize = BSG_PackList(paramText, 3,
+ BSG_TPM_RESULT, &status,
+ BSG_TPM_COMMAND_CODE, &command,
+ BSG_TPM_HANDLE, newKeyHandle);
+
+ TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
+ parentAuth, auth,
+ hContext) );
+ }
+
+ // Build cryptoinfo structure for software crypto function.
if (cryptoinfo != NULL) {
TPM_KEY newKey;
+ // Unpack/return key structure
BSG_Unpack(BSG_TPM_KEY, rgbWrappedKeyBlob->bytes , &newKey);
TPM_RSA_KEY_PARMS rsaKeyParms;
diff --git a/tools/vtpm_manager/manager/vtsp.h b/tools/vtpm_manager/manager/vtsp.h
index ddae64e483..93f22d34e4 100644
--- a/tools/vtpm_manager/manager/vtsp.h
+++ b/tools/vtpm_manager/manager/vtsp.h
@@ -86,7 +86,8 @@ TPM_RESULT VTSP_LoadKey(const TCS_CONTEXT_HANDLE hContext,
const TPM_AUTHDATA *parentAuth,
TPM_HANDLE *newKeyHandle,
TCS_AUTH *pAuth,
- CRYPTO_INFO *cryptoinfo);
+ CRYPTO_INFO *cryptoinfo,
+ const BOOL skipTPMLoad);
TPM_RESULT VTSP_Unbind( const TCS_CONTEXT_HANDLE hContext,
const TPM_KEY_HANDLE key_handle,
diff --git a/tools/xentrace/Makefile b/tools/xentrace/Makefile
index 5c3a77be83..cf91113105 100644
--- a/tools/xentrace/Makefile
+++ b/tools/xentrace/Makefile
@@ -15,25 +15,37 @@ HDRS = $(wildcard *.h)
OBJS = $(patsubst %.c,%.o,$(wildcard *.c))
BIN = xentrace tbctl setsize
+LIBBIN =
SCRIPTS = xentrace_format
MAN1 = $(wildcard *.1)
MAN8 = $(wildcard *.8)
+ifeq ($(XEN_TARGET_ARCH),x86_32)
+LIBBIN += xenctx
+endif
+
+ifeq ($(XEN_TARGET_ARCH),x86_64)
+LIBBIN += xenctx
+endif
+
all: build
-build: $(BIN)
+build: $(BIN) $(LIBBIN)
install: build
[ -d $(DESTDIR)/usr/bin ] || $(INSTALL_DIR) $(DESTDIR)/usr/bin
+ [ -z "$(LIBBIN)"] || [ -d $(DESTDIR)/usr/$(LIBDIR)/xen/bin ] || \
+ $(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR)/xen/bin
[ -d $(DESTDIR)/usr/share/man/man1 ] || \
$(INSTALL_DIR) $(DESTDIR)/usr/share/man/man1
[ -d $(DESTDIR)/usr/share/man/man8 ] || \
$(INSTALL_DIR) $(DESTDIR)/usr/share/man/man8
$(INSTALL_PROG) $(BIN) $(SCRIPTS) $(DESTDIR)/usr/bin
+ [ -z "$(LIBBIN)"] || $(INSTALL_PROG) $(LIBBIN) $(DESTDIR)/usr/$(LIBDIR)/xen/bin
$(INSTALL_DATA) $(MAN1) $(DESTDIR)/usr/share/man/man1
$(INSTALL_DATA) $(MAN8) $(DESTDIR)/usr/share/man/man8
clean:
- $(RM) *.a *.so *.o *.rpm $(BIN)
+ $(RM) *.a *.so *.o *.rpm $(BIN) $(LIBBIN)
%: %.c $(HDRS) Makefile
$(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -lxenctrl
diff --git a/tools/xentrace/xenctx.c b/tools/xentrace/xenctx.c
index fbfd341cd8..32adccde7e 100644
--- a/tools/xentrace/xenctx.c
+++ b/tools/xentrace/xenctx.c
@@ -20,15 +20,184 @@
#include <errno.h>
#include <argp.h>
#include <signal.h>
+#include <string.h>
+#include <getopt.h>
#include "xenctrl.h"
+int xc_handle = 0;
+int domid = 0;
+int frame_ptrs = 0;
+int stack_trace = 0;
+
+#if defined (__i386__)
+#define FMT_SIZE_T "%08x"
+#define STACK_POINTER(regs) (regs->esp)
+#define FRAME_POINTER(regs) (regs->ebp)
+#define INSTR_POINTER(regs) (regs->eip)
+#define STACK_ROWS 4
+#define STACK_COLS 8
+#elif defined (__x86_64__)
+#define FMT_SIZE_T "%016lx"
+#define STACK_POINTER(regs) (regs->rsp)
+#define FRAME_POINTER(regs) (regs->rbp)
+#define INSTR_POINTER(regs) (regs->rip)
+#define STACK_ROWS 4
+#define STACK_COLS 4
+#endif
+
+struct symbol {
+ size_t address;
+ char type;
+ char *name;
+ struct symbol *next;
+} *symbol_table = NULL;
+
+size_t kernel_stext, kernel_etext, kernel_sinittext, kernel_einittext;
+
+int is_kernel_text(size_t addr)
+{
+#if defined (__i386__)
+ if (symbol_table == NULL)
+ return (addr > 0xc000000);
+#elif defined (__x86_64__)
+ if (symbol_table == NULL)
+ return (addr > 0xffffffff80000000UL);
+#endif
+
+ if (addr >= kernel_stext &&
+ addr <= kernel_etext)
+ return 1;
+ if (addr >= kernel_sinittext &&
+ addr <= kernel_einittext)
+ return 1;
+ return 0;
+}
+
+void free_symbol(struct symbol *symbol)
+{
+ if (symbol == NULL)
+ return;
+ if (symbol->name)
+ free(symbol->name);
+ free(symbol);
+}
+
+void insert_symbol(struct symbol *symbol)
+{
+ static struct symbol *prev = NULL;
+ struct symbol *s = symbol_table;
+
+ if (s == NULL) {
+ symbol_table = symbol;
+ symbol->next = NULL;
+ return;
+ }
+
+ /* The System.map is usually already sorted... */
+ if (prev
+ && prev->address < symbol->address
+ && (!prev->next || prev->next->address > symbol->address)) {
+ s = prev;
+ } else {
+ /* ... otherwise do crappy/slow search for the correct place */
+ while(s && s->next && s->next->address < symbol->address)
+ s = s->next;
+ }
+
+ symbol->next = s->next;
+ s->next = symbol;
+ prev = symbol;
+}
+
+struct symbol *lookup_symbol(size_t address)
+{
+ struct symbol *s = symbol_table;
+
+ while(s && s->next && s->next->address < address)
+ s = s->next;
+
+ if (s && s->address < address)
+ return s;
+
+ return NULL;
+}
+
+void print_symbol(size_t addr)
+{
+ struct symbol *s;
+
+ if (!is_kernel_text(addr))
+ return;
+
+ s = lookup_symbol(addr);
+
+ if (s==NULL)
+ return;
+
+ if (addr==s->address)
+ printf("%s", s->name);
+ else
+ printf("%s+%#x", s->name, (unsigned int)(addr - s->address));
+}
+
+void read_symbol_table(const char *symtab)
+{
+ char line[256];
+ char *p;
+ struct symbol *symbol;
+ FILE *f;
+
+ f = fopen(symtab, "r");
+ if(f == NULL) {
+ fprintf(stderr, "failed to open symbol table %s\n", symtab);
+ exit(-1);
+ }
+
+ while(!feof(f)) {
+ if(fgets(line,256,f)==NULL)
+ break;
+
+ symbol = malloc(sizeof(*symbol));
+
+ /* need more checks for syntax here... */
+ symbol->address = strtoull(line, &p, 16);
+ p++;
+ symbol->type = *p++;
+ p++;
+
+ /* in the future we should handle the module name
+ * being appended here, this would allow us to use
+ * /proc/kallsyms as our symbol table
+ */
+ if (p[strlen(p)-1] == '\n')
+ p[strlen(p)-1] = '\0';
+ symbol->name = strdup(p);
+
+ insert_symbol(symbol);
+
+ if (strcmp(symbol->name, "_stext") == 0)
+ kernel_stext = symbol->address;
+ else if (strcmp(symbol->name, "_etext") == 0)
+ kernel_etext = symbol->address;
+ else if (strcmp(symbol->name, "_sinittext") == 0)
+ kernel_sinittext = symbol->address;
+ else if (strcmp(symbol->name, "_einittext") == 0)
+ kernel_einittext = symbol->address;
+ }
+
+ fclose(f);
+}
+
#ifdef __i386__
void print_ctx(vcpu_guest_context_t *ctx1)
{
struct cpu_user_regs *regs = &ctx1->user_regs;
- printf("eip: %08x\t", regs->eip);
+ printf("eip: %08x ", regs->eip);
+ print_symbol(regs->eip);
+ printf("\n");
+
printf("esp: %08x\n", regs->esp);
printf("eax: %08x\t", regs->eax);
@@ -51,7 +220,9 @@ void print_ctx(vcpu_guest_context_t *ctx1)
{
struct cpu_user_regs *regs = &ctx1->user_regs;
- printf("rip: %08lx\t", regs->rip);
+ printf("rip: %08lx ", regs->rip);
+ print_symbol(regs->rip);
+ printf("\n");
printf("rsp: %08lx\n", regs->rsp);
printf("rax: %08lx\t", regs->rax);
@@ -63,8 +234,8 @@ void print_ctx(vcpu_guest_context_t *ctx1)
printf("rdi: %08lx\t", regs->rdi);
printf("rbp: %08lx\n", regs->rbp);
- printf("r8: %08lx\t", regs->r8);
- printf("r9: %08lx\t", regs->r9);
+ printf(" r8: %08lx\t", regs->r8);
+ printf(" r9: %08lx\t", regs->r9);
printf("r10: %08lx\t", regs->r10);
printf("r11: %08lx\n", regs->r11);
@@ -81,35 +252,238 @@ void print_ctx(vcpu_guest_context_t *ctx1)
}
#endif
-void dump_ctx(uint32_t domid, uint32_t vcpu)
+void *map_page(vcpu_guest_context_t *ctx, int vcpu, size_t virt)
+{
+ static unsigned long previous_mfn = 0;
+ static void *mapped = NULL;
+
+ unsigned long mfn = xc_translate_foreign_address(xc_handle, domid, vcpu, virt);
+ unsigned long offset = virt & ~XC_PAGE_MASK;
+
+ if (mapped && mfn == previous_mfn)
+ goto out;
+
+ if (mapped)
+ munmap(mapped, XC_PAGE_SIZE);
+
+ previous_mfn = mfn;
+
+ mapped = xc_map_foreign_range(xc_handle, domid, XC_PAGE_SIZE, PROT_READ, mfn);
+
+ if (mapped == NULL) {
+ fprintf(stderr, "failed to map page.\n");
+ exit(-1);
+ }
+
+ out:
+ return (void *)(mapped + offset);
+}
+
+void print_stack(vcpu_guest_context_t *ctx, int vcpu)
+{
+ struct cpu_user_regs *regs = &ctx->user_regs;
+ size_t stack = STACK_POINTER(regs);
+ size_t stack_limit = (STACK_POINTER(regs) & XC_PAGE_MASK) + XC_PAGE_SIZE;
+ size_t frame;
+ size_t instr;
+ size_t *p;
+ int i;
+
+ printf("\n");
+ printf("Stack:\n");
+ for (i=1; i<STACK_ROWS+1 && stack < stack_limit; i++) {
+ while(stack < stack_limit && stack < STACK_POINTER(regs) + i*STACK_COLS*sizeof(stack)) {
+ p = map_page(ctx, vcpu, stack);
+ printf(" " FMT_SIZE_T, *p);
+ stack += sizeof(stack);
+ }
+ printf("\n");
+ }
+ printf("\n");
+
+ printf("Code:\n");
+ instr = INSTR_POINTER(regs) - 21;
+ for(i=0; i<32; i++) {
+ unsigned char *c = map_page(ctx, vcpu, instr+i);
+ if (instr+i == INSTR_POINTER(regs))
+ printf("<%02x> ", *c);
+ else
+ printf("%02x ", *c);
+ }
+ printf("\n");
+
+ printf("\n");
+
+ if(stack_trace)
+ printf("Stack Trace:\n");
+ else
+ printf("Call Trace:\n");
+ printf("%c [<" FMT_SIZE_T ">] ", stack_trace ? '*' : ' ', INSTR_POINTER(regs));
+
+ print_symbol(INSTR_POINTER(regs));
+ printf(" <--\n");
+ if (frame_ptrs) {
+ stack = STACK_POINTER(regs);
+ frame = FRAME_POINTER(regs);
+ while(frame && stack < stack_limit) {
+ if (stack_trace) {
+ while (stack < frame) {
+ p = map_page(ctx, vcpu, stack);
+ printf("| " FMT_SIZE_T " ", *p);
+ printf("\n");
+ stack += sizeof(*p);
+ }
+ } else {
+ stack = frame;
+ }
+
+ p = map_page(ctx, vcpu, stack);
+ frame = *p;
+ if (stack_trace)
+ printf("|-- " FMT_SIZE_T "\n", *p);
+ stack += sizeof(*p);
+
+ if (frame) {
+ p = map_page(ctx, vcpu, stack);
+ printf("%c [<" FMT_SIZE_T ">] ", stack_trace ? '|' : ' ', *p);
+ print_symbol(*p);
+ printf("\n");
+ stack += sizeof(*p);
+ }
+ }
+ } else {
+ stack = STACK_POINTER(regs);
+ while(stack < stack_limit) {
+ p = map_page(ctx, vcpu, stack);
+ if (is_kernel_text(*p)) {
+ printf(" [<" FMT_SIZE_T ">] ", *p);
+ print_symbol(*p);
+ printf("\n");
+ } else if (stack_trace) {
+ printf(" " FMT_SIZE_T "\n", *p);
+ }
+ stack += sizeof(*p);
+ }
+ }
+}
+
+void dump_ctx(int vcpu)
{
int ret;
vcpu_guest_context_t ctx;
- int xc_handle = xc_interface_open(); /* for accessing control interface */
+ xc_handle = xc_interface_open(); /* for accessing control interface */
+
+ ret = xc_domain_pause(xc_handle, domid);
+ if (ret < 0) {
+ perror("xc_domain_pause");
+ exit(-1);
+ }
ret = xc_domain_get_vcpu_context(xc_handle, domid, vcpu, &ctx);
- if (ret != 0) {
+ if (ret < 0) {
+ xc_domain_unpause(xc_handle, domid);
perror("xc_domain_get_vcpu_context");
exit(-1);
}
+
print_ctx(&ctx);
+ if (is_kernel_text(ctx.user_regs.eip))
+ print_stack(&ctx, vcpu);
+
+ ret = xc_domain_unpause(xc_handle, domid);
+ if (ret < 0) {
+ perror("xc_domain_unpause");
+ exit(-1);
+ }
+
xc_interface_close(xc_handle);
+ if (ret < 0) {
+ perror("xc_interface_close");
+ exit(-1);
+ }
+}
+
+void usage(void)
+{
+ printf("usage:\n\n");
+
+ printf(" xenctx [options] <DOMAIN> [VCPU]\n\n");
+
+ printf("options:\n");
+ printf(" -f, --frame-pointers\n");
+ printf(" assume the kernel was compiled with\n");
+ printf(" frame pointers.\n");
+ printf(" -s SYMTAB, --symbol-table=SYMTAB\n");
+ printf(" read symbol table from SYMTAB.\n");
+ printf(" --stack-trace print a complete stack trace.\n");
}
int main(int argc, char **argv)
{
+ int ch;
+ const char *sopts = "fs:h";
+ const struct option lopts[] = {
+ {"stack-trace", 0, NULL, 'S'},
+ {"symbol-table", 1, NULL, 's'},
+ {"frame-pointers", 0, NULL, 'f'},
+ {"help", 0, NULL, 'h'},
+ {0, 0, 0, 0}
+ };
+ const char *symbol_table = NULL;
+
int vcpu = 0;
- if (argc < 2) {
- printf("usage: xenctx <domid> <optional vcpu>\n");
+ while ((ch = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) {
+ switch(ch) {
+ case 'f':
+ frame_ptrs = 1;
+ break;
+ case 's':
+ symbol_table = optarg;
+ break;
+ case 'S':
+ stack_trace = 1;
+ break;
+ case 'h':
+ usage();
+ exit(-1);
+ case '?':
+ fprintf(stderr, "%s --help for more options\n", argv[0]);
+ exit(-1);
+ }
+ }
+
+ argv += optind; argc -= optind;
+
+ if (argc < 1 || argc > 2) {
+ printf("usage: xenctx [options] <domid> <optional vcpu>\n");
exit(-1);
}
- if (argc == 3)
- vcpu = atoi(argv[2]);
+ domid = atoi(argv[0]);
+ if (domid==0) {
+ fprintf(stderr, "cannot trace dom0\n");
+ exit(-1);
+ }
+
+ if (argc == 2)
+ vcpu = atoi(argv[1]);
- dump_ctx(atoi(argv[1]), vcpu);
+ if (symbol_table)
+ read_symbol_table(symbol_table);
+
+ dump_ctx(vcpu);
return 0;
}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/ia64/xen/domain.c b/xen/arch/ia64/xen/domain.c
index 21209d8987..d407458138 100644
--- a/xen/arch/ia64/xen/domain.c
+++ b/xen/arch/ia64/xen/domain.c
@@ -181,7 +181,7 @@ static void init_switch_stack(struct vcpu *v)
memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96);
}
-void arch_do_createdomain(struct vcpu *v)
+int arch_do_createdomain(struct vcpu *v)
{
struct domain *d = v->domain;
struct thread_info *ti = alloc_thread_info(v);
@@ -248,7 +248,9 @@ void arch_do_createdomain(struct vcpu *v)
}
} else
d->arch.mm = NULL;
- printf ("arch_do_create_domain: domain=%p\n", d);
+ printf ("arch_do_create_domain: domain=%p\n", d);
+
+ return 0;
}
void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c)
@@ -754,7 +756,10 @@ void alloc_dom0(void)
*/
void physdev_init_dom0(struct domain *d)
{
- set_bit(_DOMF_physdev_access, &d->domain_flags);
+ if (iomem_permit_access(d, 0UL, ~0UL))
+ BUG();
+ if (irqs_permit_access(d, 0, NR_PIRQS-1))
+ BUG();
}
unsigned int vmx_dom0 = 0;
diff --git a/xen/arch/ia64/xen/irq.c b/xen/arch/ia64/xen/irq.c
index b694d62bc9..1537873e04 100644
--- a/xen/arch/ia64/xen/irq.c
+++ b/xen/arch/ia64/xen/irq.c
@@ -1378,9 +1378,6 @@ int pirq_guest_bind(struct vcpu *d, int irq, int will_share)
unsigned long flags;
int rc = 0;
- if ( !IS_CAPABLE_PHYSDEV(d->domain) )
- return -EPERM;
-
spin_lock_irqsave(&desc->lock, flags);
action = (irq_guest_action_t *)desc->action;
diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
index 54fcf82dae..9cdbc5798c 100644
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -29,6 +29,7 @@ ifeq ($(TARGET_SUBARCH),x86_32)
endif
OBJS := $(subst $(TARGET_SUBARCH)/asm-offsets.o,,$(OBJS))
+OBJS := $(subst $(TARGET_SUBARCH)/xen.lds.o,,$(OBJS))
ifneq ($(crash_debug),y)
OBJS := $(patsubst cdb%.o,,$(OBJS))
@@ -43,22 +44,25 @@ $(TARGET): $(TARGET)-syms boot/mkelf32
$(CURDIR)/arch.o: $(OBJS)
$(LD) $(LDFLAGS) -r -o $@ $(OBJS)
-$(TARGET)-syms: boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(TARGET_SUBARCH)/xen.lds
- $(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
+$(TARGET)-syms: boot/$(TARGET_SUBARCH).o $(ALL_OBJS) xen.lds
+ $(LD) $(LDFLAGS) -T xen.lds -N \
boot/$(TARGET_SUBARCH).o $(ALL_OBJS) -o $@
$(NM) -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S
$(MAKE) $(BASEDIR)/xen-syms.o
- $(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
+ $(LD) $(LDFLAGS) -T xen.lds -N \
boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@
$(NM) -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S
$(MAKE) $(BASEDIR)/xen-syms.o
- $(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
+ $(LD) $(LDFLAGS) -T xen.lds -N \
boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@
rm -f $(BASEDIR)/xen-syms.S $(BASEDIR)/xen-syms.o
asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(HDRS)
$(CC) $(CFLAGS) -S -o $@ $<
+xen.lds: $(TARGET_SUBARCH)/xen.lds.S $(HDRS)
+ $(CC) $(CFLAGS) -P -E -Ui386 -D__ASSEMBLY__ -o $@ $<
+
boot/mkelf32: boot/mkelf32.c
$(HOSTCC) $(HOSTCFLAGS) -o $@ $<
@@ -73,5 +77,6 @@ clean:
rm -f dm/*.o dm/*~ dm/core
rm -f genapic/*.o genapic/*~ genapic/core
rm -f cpu/*.o cpu/*~ cpu/core
+ rm -f xen.lds
.PHONY: default clean
diff --git a/xen/arch/x86/boot/x86_32.S b/xen/arch/x86/boot/x86_32.S
index 59e68ca7ef..b98e1c72bc 100644
--- a/xen/arch/x86/boot/x86_32.S
+++ b/xen/arch/x86/boot/x86_32.S
@@ -1,5 +1,6 @@
#include <xen/config.h>
#include <public/xen.h>
+#include <asm/asm_defns.h>
#include <asm/desc.h>
#include <asm/page.h>
#include <asm/msr.h>
@@ -53,6 +54,7 @@ __start:
mov %ecx,%gs
ljmp $(__HYPERVISOR_CS),$(1f)-__PAGE_OFFSET
1: lss stack_start-__PAGE_OFFSET,%esp
+ add $(STACK_SIZE-CPUINFO_sizeof-__PAGE_OFFSET),%esp
/* Reset EFLAGS (subsumes CLI and CLD). */
pushl $0
@@ -189,7 +191,7 @@ ignore_int:
/*** STACK LOCATION ***/
ENTRY(stack_start)
- .long cpu0_stack + STACK_SIZE - 200 - __PAGE_OFFSET
+ .long cpu0_stack
.long __HYPERVISOR_DS
/*** DESCRIPTOR TABLES ***/
@@ -256,10 +258,6 @@ ENTRY(idle_pg_table_l2)
.fill 1*PAGE_SIZE,1,0
#endif
-#if (STACK_ORDER == 0)
-.section ".bss.page_aligned","w"
-#else
-.section ".bss.twopage_aligned","w"
-#endif
+.section ".bss.stack_aligned","w"
ENTRY(cpu0_stack)
.fill STACK_SIZE,1,0
diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S
index 126850a0f8..3ab012aad8 100644
--- a/xen/arch/x86/boot/x86_64.S
+++ b/xen/arch/x86/boot/x86_64.S
@@ -1,5 +1,6 @@
#include <xen/config.h>
#include <public/xen.h>
+#include <asm/asm_defns.h>
#include <asm/desc.h>
#include <asm/page.h>
#include <asm/msr.h>
@@ -121,7 +122,8 @@ skip_boot_checks:
mov %rcx,%cr4
mov stack_start(%rip),%rsp
-
+ or $(STACK_SIZE-CPUINFO_sizeof),%rsp
+
/* Reset EFLAGS (subsumes CLI and CLD). */
pushq $0
popf
@@ -140,7 +142,7 @@ __high_start:
mov %ecx,%ss
lidt idt_descr(%rip)
-
+
cmp $(SECONDARY_CPU_FLAG),%ebx
je start_secondary
@@ -219,7 +221,7 @@ idt:
.quad idt_table
ENTRY(stack_start)
- .quad cpu0_stack + STACK_SIZE - 200
+ .quad cpu0_stack
high_start:
.quad __high_start
@@ -265,10 +267,6 @@ ENTRY(idle_pg_table_l2)
.org 0x4000 + PAGE_SIZE
.code64
-#if (STACK_ORDER == 0)
-.section ".bss.page_aligned","w"
-#else
-.section ".bss.twopage_aligned","w"
-#endif
+.section ".bss.stack_aligned","w"
ENTRY(cpu0_stack)
.fill STACK_SIZE,1,0
diff --git a/xen/arch/x86/dom0_ops.c b/xen/arch/x86/dom0_ops.c
index 527b4dd387..5a4f493ce0 100644
--- a/xen/arch/x86/dom0_ops.c
+++ b/xen/arch/x86/dom0_ops.c
@@ -17,6 +17,7 @@
#include <asm/msr.h>
#include <xen/trace.h>
#include <xen/console.h>
+#include <xen/iocap.h>
#include <asm/shadow.h>
#include <asm/irq.h>
#include <asm/processor.h>
@@ -141,7 +142,6 @@ long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op)
struct domain *d;
unsigned int fp = op->u.ioport_permission.first_port;
unsigned int np = op->u.ioport_permission.nr_ports;
- unsigned int p;
ret = -EINVAL;
if ( (fp + np) > 65536 )
@@ -152,26 +152,12 @@ long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op)
op->u.ioport_permission.domain)) == NULL) )
break;
- ret = -ENOMEM;
- if ( d->arch.iobmp_mask != NULL )
- {
- if ( (d->arch.iobmp_mask = xmalloc_array(
- u8, IOBMP_BYTES)) == NULL )
- {
- put_domain(d);
- break;
- }
- memset(d->arch.iobmp_mask, 0xFF, IOBMP_BYTES);
- }
-
- ret = 0;
- for ( p = fp; p < (fp + np); p++ )
- {
- if ( op->u.ioport_permission.allow_access )
- clear_bit(p, d->arch.iobmp_mask);
- else
- set_bit(p, d->arch.iobmp_mask);
- }
+ if ( np == 0 )
+ ret = 0;
+ else if ( op->u.ioport_permission.allow_access )
+ ret = ioports_permit_access(d, fp, fp + np - 1);
+ else
+ ret = ioports_deny_access(d, fp, fp + np - 1);
put_domain(d);
}
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 4c586226dd..d905f9dfbf 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -20,6 +20,7 @@
#include <xen/delay.h>
#include <xen/softirq.h>
#include <xen/grant_table.h>
+#include <xen/iocap.h>
#include <asm/regs.h>
#include <asm/mc146818rtc.h>
#include <asm/system.h>
@@ -35,9 +36,7 @@
#include <xen/console.h>
#include <xen/elf.h>
#include <asm/vmx.h>
-#include <asm/vmx_vmcs.h>
#include <asm/msr.h>
-#include <asm/physdev.h>
#include <xen/kernel.h>
#include <xen/multicall.h>
@@ -98,7 +97,7 @@ void startup_cpu_idle_loop(void)
cpu_set(smp_processor_id(), v->domain->cpumask);
v->arch.schedule_tail = continue_idle_task;
- idle_loop();
+ reset_stack_and_jump(idle_loop);
}
static long no_idt[2];
@@ -185,11 +184,17 @@ void dump_pageframe_info(struct domain *d)
{
struct pfn_info *page;
- if ( d->tot_pages < 10 )
+ printk("Memory pages belonging to domain %u:\n", d->domain_id);
+
+ if ( d->tot_pages >= 10 )
+ {
+ printk(" DomPage list too long to display\n");
+ }
+ else
{
list_for_each_entry ( page, &d->page_list, list )
{
- printk("Page %p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
+ printk(" DomPage %p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
_p(page_to_phys(page)), _p(page_to_pfn(page)),
page->count_info, page->u.inuse.type_info);
}
@@ -197,15 +202,10 @@ void dump_pageframe_info(struct domain *d)
list_for_each_entry ( page, &d->xenpage_list, list )
{
- printk("XenPage %p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
+ printk(" XenPage %p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
_p(page_to_phys(page)), _p(page_to_pfn(page)),
page->count_info, page->u.inuse.type_info);
}
-
- page = virt_to_page(d->shared_info);
- printk("Shared_info@%p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
- _p(page_to_phys(page)), _p(page_to_pfn(page)), page->count_info,
- page->u.inuse.type_info);
}
struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id)
@@ -250,21 +250,34 @@ void free_perdomain_pt(struct domain *d)
#endif
}
-void arch_do_createdomain(struct vcpu *v)
+int arch_do_createdomain(struct vcpu *v)
{
struct domain *d = v->domain;
l1_pgentry_t gdt_l1e;
- int vcpuid, pdpt_order;
+ int vcpuid, pdpt_order, rc;
#ifdef __x86_64__
int i;
#endif
if ( is_idle_task(d) )
- return;
+ return 0;
+
+ d->arch.ioport_caps =
+ rangeset_new(d, "I/O Ports", RANGESETF_prettyprint_hex);
+ if ( d->arch.ioport_caps == NULL )
+ return -ENOMEM;
+
+ if ( (d->shared_info = alloc_xenheap_page()) == NULL )
+ return -ENOMEM;
+
+ if ( (rc = ptwr_init(d)) != 0 )
+ {
+ free_xenheap_page(d->shared_info);
+ return rc;
+ }
v->arch.schedule_tail = continue_nonidle_task;
- d->shared_info = alloc_xenheap_page();
memset(d->shared_info, 0, PAGE_SIZE);
v->vcpu_info = &d->shared_info->vcpu_info[v->vcpu_id];
v->cpumap = CPUMAP_RUNANYWHERE;
@@ -308,10 +321,10 @@ void arch_do_createdomain(struct vcpu *v)
__PAGE_HYPERVISOR);
#endif
- (void)ptwr_init(d);
-
shadow_lock_init(d);
INIT_LIST_HEAD(&d->arch.free_shadow_frames);
+
+ return 0;
}
void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
@@ -348,6 +361,8 @@ int arch_set_info_guest(
((c->user_regs.ss & 3) == 0) )
return -EINVAL;
}
+ else if ( !hvm_enabled )
+ return -EINVAL;
clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
if ( c->flags & VGCF_I387_VALID )
@@ -953,8 +968,6 @@ void domain_relinquish_resources(struct domain *d)
BUG_ON(!cpus_empty(d->cpumask));
- physdev_destroy_state(d);
-
ptwr_destroy(d);
/* Drop the in-use references to page-table bases. */
diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
index f3cbf8237f..d08f2c12fb 100644
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -16,13 +16,13 @@
#include <xen/kernel.h>
#include <xen/domain.h>
#include <xen/compile.h>
+#include <xen/iocap.h>
#include <asm/regs.h>
#include <asm/system.h>
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/desc.h>
#include <asm/i387.h>
-#include <asm/physdev.h>
#include <asm/shadow.h>
static long dom0_nrpages;
@@ -94,9 +94,9 @@ static struct pfn_info *alloc_chunk(struct domain *d, unsigned long max_pages)
return page;
}
-static void process_dom0_ioports_disable()
+static void process_dom0_ioports_disable(void)
{
- unsigned long io_from, io_to, io_nr;
+ unsigned long io_from, io_to;
char *t, *u, *s = opt_dom0_ioports_disable;
if ( *s == '\0' )
@@ -126,8 +126,8 @@ static void process_dom0_ioports_disable()
printk("Disabling dom0 access to ioport range %04lx-%04lx\n",
io_from, io_to);
- io_nr = io_to - io_from + 1;
- physdev_modify_ioport_access_range(dom0, 0, io_from, io_nr);
+ if ( ioports_deny_access(dom0, io_from, io_to) != 0 )
+ BUG();
}
}
@@ -183,7 +183,6 @@ int construct_dom0(struct domain *d,
/* Machine address of next candidate page-table page. */
unsigned long mpt_alloc;
- extern void physdev_init_dom0(struct domain *);
extern void translate_l2pgtable(
struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn);
@@ -692,9 +691,6 @@ int construct_dom0(struct domain *d,
zap_low_mappings(l2start);
zap_low_mappings(idle_pg_table_l2);
#endif
-
- /* DOM0 gets access to everything. */
- physdev_init_dom0(d);
init_domain_time(d);
@@ -746,20 +742,29 @@ int construct_dom0(struct domain *d,
printk("dom0: shadow setup done\n");
}
+ i = 0;
+
+ /* DOM0 is permitted full I/O capabilities. */
+ i |= ioports_permit_access(dom0, 0, 0xFFFF);
+ i |= iomem_permit_access(dom0, 0UL, ~0UL);
+ i |= irqs_permit_access(dom0, 0, NR_PIRQS-1);
+
/*
* Modify I/O port access permissions.
*/
/* Master Interrupt Controller (PIC). */
- physdev_modify_ioport_access_range(dom0, 0, 0x20, 2);
+ i |= ioports_deny_access(dom0, 0x20, 0x21);
/* Slave Interrupt Controller (PIC). */
- physdev_modify_ioport_access_range(dom0, 0, 0xA0, 2);
+ i |= ioports_deny_access(dom0, 0xA0, 0xA1);
/* Interval Timer (PIT). */
- physdev_modify_ioport_access_range(dom0, 0, 0x40, 4);
+ i |= ioports_deny_access(dom0, 0x40, 0x43);
/* PIT Channel 2 / PC Speaker Control. */
- physdev_modify_ioport_access_range(dom0, 0, 0x61, 1);
- /* Command-line passed i/o ranges */
+ i |= ioports_deny_access(dom0, 0x61, 0x61);
+ /* Command-line I/O ranges. */
process_dom0_ioports_disable();
+ BUG_ON(i != 0);
+
return 0;
}
diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
index e0553486bc..a1aee360c3 100644
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -199,16 +199,12 @@ int pirq_guest_unmask(struct domain *d)
int pirq_guest_bind(struct vcpu *v, int irq, int will_share)
{
unsigned int vector = irq_to_vector(irq);
- struct domain *d = v->domain;
irq_desc_t *desc = &irq_desc[vector];
irq_guest_action_t *action;
unsigned long flags;
int rc = 0;
cpumask_t cpumask = CPU_MASK_NONE;
- if ( !IS_CAPABLE_PHYSDEV(d) )
- return -EPERM;
-
if ( vector == 0 )
return -EBUSY;
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index e6a99065e3..683c4b7534 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -96,6 +96,7 @@
#include <xen/softirq.h>
#include <xen/domain_page.h>
#include <xen/event.h>
+#include <xen/iocap.h>
#include <asm/shadow.h>
#include <asm/page.h>
#include <asm/flushtlb.h>
@@ -437,7 +438,6 @@ get_page_from_l1e(
unsigned long mfn = l1e_get_pfn(l1e);
struct pfn_info *page = pfn_to_page(mfn);
int okay;
- extern int domain_iomem_in_pfn(struct domain *d, unsigned long pfn);
if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
return 1;
@@ -455,8 +455,7 @@ get_page_from_l1e(
if ( d == dom_io )
d = current->domain;
- if ( (!IS_PRIV(d)) &&
- (!IS_CAPABLE_PHYSDEV(d) || !domain_iomem_in_pfn(d, mfn)) )
+ if ( !iomem_access_permitted(d, mfn, mfn) )
{
MEM_LOG("Non-privileged attempt to map I/O space %08lx", mfn);
return 0;
@@ -1887,7 +1886,7 @@ int do_mmuext_op(
break;
case MMUEXT_FLUSH_CACHE:
- if ( unlikely(!IS_CAPABLE_PHYSDEV(d)) )
+ if ( unlikely(!cache_flush_permitted(d)) )
{
MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.");
okay = 0;
diff --git a/xen/arch/x86/physdev.c b/xen/arch/x86/physdev.c
index 376419b239..e8cb5b8707 100644
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -14,27 +14,6 @@
extern int ioapic_guest_read(int apicid, int address, u32 *pval);
extern int ioapic_guest_write(int apicid, int address, u32 pval);
-void physdev_modify_ioport_access_range(
- struct domain *d, int enable, int port, int num)
-{
- int i;
- for ( i = port; i < (port + num); i++ )
- (enable ? clear_bit : set_bit)(i, d->arch.iobmp_mask);
-}
-
-void physdev_destroy_state(struct domain *d)
-{
- xfree(d->arch.iobmp_mask);
- d->arch.iobmp_mask = NULL;
-}
-
-/* Check if a domain controls a device with IO memory within frame @pfn.
- * Returns: 1 if the domain should be allowed to map @pfn, 0 otherwise. */
-int domain_iomem_in_pfn(struct domain *p, unsigned long pfn)
-{
- return 0;
-}
-
/*
* Demuxing hypercall.
*/
@@ -120,18 +99,6 @@ long do_physdev_op(physdev_op_t *uop)
return ret;
}
-/* Domain 0 has read access to all devices. */
-void physdev_init_dom0(struct domain *d)
-{
- /* Access to all I/O ports. */
- d->arch.iobmp_mask = xmalloc_array(u8, IOBMP_BYTES);
- BUG_ON(d->arch.iobmp_mask == NULL);
- memset(d->arch.iobmp_mask, 0, IOBMP_BYTES);
-
- set_bit(_DOMF_physdev_access, &d->domain_flags);
-}
-
-
/*
* Local variables:
* mode: C
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 1259e095e8..f27806f8f6 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -138,131 +138,19 @@ static void __init do_initcalls(void)
(*call)();
}
-static void __init start_of_day(void)
-{
- int i;
- unsigned long vgdt, gdt_pfn;
-
- early_cpu_init();
-
- paging_init();
-
- /* Unmap the first page of CPU0's stack. */
- memguard_guard_stack(cpu0_stack);
-
- open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);
-
- if ( opt_watchdog )
- nmi_watchdog = NMI_LOCAL_APIC;
-
- sort_exception_tables();
-
- arch_do_createdomain(current);
-
- /*
- * Map default GDT into its final positions in the idle page table. As
- * noted in arch_do_createdomain(), we must map for every possible VCPU#.
- */
- vgdt = GDT_VIRT_START(current) + FIRST_RESERVED_GDT_BYTE;
- gdt_pfn = virt_to_phys(gdt_table) >> PAGE_SHIFT;
- for ( i = 0; i < MAX_VIRT_CPUS; i++ )
- {
- map_pages_to_xen(vgdt, gdt_pfn, 1, PAGE_HYPERVISOR);
- vgdt += 1 << PDPT_VCPU_VA_SHIFT;
- }
-
- find_smp_config();
-
- smp_alloc_memory();
-
- dmi_scan_machine();
-
- generic_apic_probe();
-
- acpi_boot_table_init();
- acpi_boot_init();
-
- if ( smp_found_config )
- get_smp_config();
-
- init_apic_mappings();
-
- init_IRQ();
-
- trap_init();
-
- ac_timer_init();
-
- early_time_init();
-
- arch_init_memory();
-
- scheduler_init();
-
- identify_cpu(&boot_cpu_data);
- if ( cpu_has_fxsr )
- set_in_cr4(X86_CR4_OSFXSR);
- if ( cpu_has_xmm )
- set_in_cr4(X86_CR4_OSXMMEXCPT);
-
- if ( opt_nosmp )
- {
- max_cpus = 0;
- smp_num_siblings = 1;
- boot_cpu_data.x86_num_cores = 1;
- }
-
- smp_prepare_cpus(max_cpus);
-
- /* We aren't hotplug-capable yet. */
- BUG_ON(!cpus_empty(cpu_present_map));
- for_each_cpu ( i )
- cpu_set(i, cpu_present_map);
-
- /*
- * Initialise higher-level timer functions. We do this fairly late
- * (post-SMP) because the time bases and scale factors need to be updated
- * regularly, and SMP initialisation can cause a long delay with
- * interrupts not yet enabled.
- */
- init_xen_time();
-
- initialize_keytable();
-
- serial_init_postirq();
-
- BUG_ON(!local_irq_is_enabled());
-
- for_each_present_cpu ( i )
- {
- if ( num_online_cpus() >= max_cpus )
- break;
- if ( !cpu_online(i) )
- __cpu_up(i);
- }
-
- printk("Brought up %ld CPUs\n", (long)num_online_cpus());
- smp_cpus_done(max_cpus);
-
- do_initcalls();
-
- schedulers_start();
-
- watchdog_enable();
-}
-
#define EARLY_FAIL() for ( ; ; ) __asm__ __volatile__ ( "hlt" )
static struct e820entry e820_raw[E820MAX];
void __init __start_xen(multiboot_info_t *mbi)
{
+ unsigned long vgdt, gdt_pfn;
char *cmdline;
+ unsigned long _initrd_start = 0, _initrd_len = 0;
+ unsigned int initrdidx = 1;
module_t *mod = (module_t *)__va(mbi->mods_addr);
unsigned long nr_pages, modules_length;
unsigned long initial_images_start, initial_images_end;
- unsigned long _initrd_start = 0, _initrd_len = 0;
- unsigned int initrdidx = 1;
physaddr_t s, e;
int i, e820_warn = 0, e820_raw_nr = 0, bytes = 0;
struct ns16550_defaults ns16550 = {
@@ -486,7 +374,113 @@ void __init __start_xen(multiboot_info_t *mbi)
early_boot = 0;
- start_of_day();
+ early_cpu_init();
+
+ paging_init();
+
+ /* Unmap the first page of CPU0's stack. */
+ memguard_guard_stack(cpu0_stack);
+
+ open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);
+
+ if ( opt_watchdog )
+ nmi_watchdog = NMI_LOCAL_APIC;
+
+ sort_exception_tables();
+
+ if ( arch_do_createdomain(current) != 0 )
+ BUG();
+
+ /*
+ * Map default GDT into its final positions in the idle page table. As
+ * noted in arch_do_createdomain(), we must map for every possible VCPU#.
+ */
+ vgdt = GDT_VIRT_START(current) + FIRST_RESERVED_GDT_BYTE;
+ gdt_pfn = virt_to_phys(gdt_table) >> PAGE_SHIFT;
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ {
+ map_pages_to_xen(vgdt, gdt_pfn, 1, PAGE_HYPERVISOR);
+ vgdt += 1 << PDPT_VCPU_VA_SHIFT;
+ }
+
+ find_smp_config();
+
+ smp_alloc_memory();
+
+ dmi_scan_machine();
+
+ generic_apic_probe();
+
+ acpi_boot_table_init();
+ acpi_boot_init();
+
+ if ( smp_found_config )
+ get_smp_config();
+
+ init_apic_mappings();
+
+ init_IRQ();
+
+ trap_init();
+
+ ac_timer_init();
+
+ early_time_init();
+
+ arch_init_memory();
+
+ scheduler_init();
+
+ identify_cpu(&boot_cpu_data);
+ if ( cpu_has_fxsr )
+ set_in_cr4(X86_CR4_OSFXSR);
+ if ( cpu_has_xmm )
+ set_in_cr4(X86_CR4_OSXMMEXCPT);
+
+ if ( opt_nosmp )
+ {
+ max_cpus = 0;
+ smp_num_siblings = 1;
+ boot_cpu_data.x86_num_cores = 1;
+ }
+
+ smp_prepare_cpus(max_cpus);
+
+ /* We aren't hotplug-capable yet. */
+ BUG_ON(!cpus_empty(cpu_present_map));
+ for_each_cpu ( i )
+ cpu_set(i, cpu_present_map);
+
+ /*
+ * Initialise higher-level timer functions. We do this fairly late
+ * (post-SMP) because the time bases and scale factors need to be updated
+ * regularly, and SMP initialisation can cause a long delay with
+ * interrupts not yet enabled.
+ */
+ init_xen_time();
+
+ initialize_keytable();
+
+ serial_init_postirq();
+
+ BUG_ON(!local_irq_is_enabled());
+
+ for_each_present_cpu ( i )
+ {
+ if ( num_online_cpus() >= max_cpus )
+ break;
+ if ( !cpu_online(i) )
+ __cpu_up(i);
+ }
+
+ printk("Brought up %ld CPUs\n", (long)num_online_cpus());
+ smp_cpus_done(max_cpus);
+
+ do_initcalls();
+
+ schedulers_start();
+
+ watchdog_enable();
shadow_mode_init();
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 4bb2d27a32..30ca4864b2 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -763,7 +763,6 @@ static int __init do_boot_cpu(int apicid)
{
struct domain *idle;
struct vcpu *v;
- void *stack;
unsigned long boot_error;
int timeout, cpu;
unsigned long start_eip;
@@ -786,16 +785,10 @@ static int __init do_boot_cpu(int apicid)
/* So we see what's up */
printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
- stack = alloc_xenheap_pages(STACK_ORDER);
-#if defined(__i386__)
- stack_start.esp = (void *)__pa(stack);
-#elif defined(__x86_64__)
- stack_start.esp = stack;
-#endif
- stack_start.esp += STACK_SIZE - sizeof(struct cpu_info);
+ stack_start.esp = alloc_xenheap_pages(STACK_ORDER);
/* Debug build: detect stack overflow by setting up a guard page. */
- memguard_guard_stack(stack);
+ memguard_guard_stack(stack_start.esp);
/*
* This grunge runs the startup process for
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 8e1ca7f579..a4be3db3b3 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -41,6 +41,7 @@
#include <xen/softirq.h>
#include <xen/domain_page.h>
#include <xen/symbols.h>
+#include <xen/iocap.h>
#include <asm/shadow.h>
#include <asm/system.h>
#include <asm/io.h>
@@ -192,7 +193,8 @@ static void show_trace(struct cpu_user_regs *regs)
/* Bounds for range of valid frame pointer. */
low = (unsigned long)(ESP_BEFORE_EXCEPTION(regs) - 2);
- high = (low & ~(STACK_SIZE - 1)) + (STACK_SIZE - sizeof(struct cpu_info));
+ high = (low & ~(STACK_SIZE - 1)) +
+ (STACK_SIZE - sizeof(struct cpu_info) - 2*sizeof(unsigned long));
/* The initial frame pointer. */
next = regs->ebp;
@@ -200,14 +202,14 @@ static void show_trace(struct cpu_user_regs *regs)
for ( ; ; )
{
/* Valid frame pointer? */
- if ( (next < low) || (next > high) )
+ if ( (next < low) || (next >= high) )
{
/*
* Exception stack frames have a different layout, denoted by an
* inverted frame pointer.
*/
next = ~next;
- if ( (next < low) || (next > high) )
+ if ( (next < low) || (next >= high) )
break;
frame = (unsigned long *)next;
next = frame[0];
@@ -621,17 +623,7 @@ static inline int admin_io_okay(
unsigned int port, unsigned int bytes,
struct vcpu *v, struct cpu_user_regs *regs)
{
- struct domain *d = v->domain;
- u16 x;
-
- if ( d->arch.iobmp_mask != NULL )
- {
- x = *(u16 *)(d->arch.iobmp_mask + (port >> 3));
- if ( (x & (((1<<bytes)-1) << (port&7))) == 0 )
- return 1;
- }
-
- return 0;
+ return ioports_access_permitted(v->domain, port, port + bytes - 1);
}
/* Check admin limits. Silently fail the access if it is disallowed. */
@@ -871,7 +863,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
case 0x09: /* WBINVD */
/* Ignore the instruction if unprivileged. */
- if ( !IS_CAPABLE_PHYSDEV(v->domain) )
+ if ( !cache_flush_permitted(v->domain) )
DPRINTK("Non-physdev domain attempted WBINVD.\n");
else
wbinvd();
@@ -885,7 +877,8 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
switch ( modrm_reg )
{
case 0: /* Read CR0 */
- *reg = v->arch.guest_context.ctrlreg[0];
+ *reg = (read_cr0() & ~X86_CR0_TS) |
+ v->arch.guest_context.ctrlreg[0];
break;
case 2: /* Read CR2 */
@@ -927,6 +920,11 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
switch ( modrm_reg )
{
case 0: /* Write CR0 */
+ if ( (*reg ^ read_cr0()) & ~X86_CR0_TS )
+ {
+ DPRINTK("Attempt to change unmodifiable CR0 flags.\n");
+ goto fail;
+ }
(void)do_fpu_taskswitch(!!(*reg & X86_CR0_TS));
break;
@@ -941,6 +939,14 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
UNLOCK_BIGLOCK(v->domain);
break;
+ case 4:
+ if ( *reg != (read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE)) )
+ {
+ DPRINTK("Attempt to change CR4 flags.\n");
+ goto fail;
+ }
+ break;
+
default:
goto fail;
}
diff --git a/xen/arch/x86/x86_32/xen.lds b/xen/arch/x86/x86_32/xen.lds.S
index f3c168f662..fb76cbc73d 100644
--- a/xen/arch/x86/x86_32/xen.lds
+++ b/xen/arch/x86/x86_32/xen.lds.S
@@ -2,6 +2,12 @@
* Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
* Modified for i386 Xen by Keir Fraser
*/
+
+#include <xen/config.h>
+#include <asm/page.h>
+#undef ENTRY
+#undef ALIGN
+
OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
OUTPUT_ARCH(i386)
ENTRY(start)
@@ -50,12 +56,12 @@ SECTIONS
__initcall_start = .;
.initcall.init : { *(.initcall.init) } :text
__initcall_end = .;
- . = ALIGN(8192);
+ . = ALIGN(STACK_SIZE);
__init_end = .;
__bss_start = .; /* BSS */
.bss : {
- *(.bss.twopage_aligned)
+ *(.bss.stack_aligned)
*(.bss.page_aligned)
*(.bss)
} :text
diff --git a/xen/arch/x86/x86_64/xen.lds b/xen/arch/x86/x86_64/xen.lds.S
index 837d335f0d..d8685201ab 100644
--- a/xen/arch/x86/x86_64/xen.lds
+++ b/xen/arch/x86/x86_64/xen.lds.S
@@ -1,5 +1,11 @@
/* Excerpts written by Martin Mares <mj@atrey.karlin.mff.cuni.cz> */
/* Modified for x86-64 Xen by Keir Fraser */
+
+#include <xen/config.h>
+#include <asm/page.h>
+#undef ENTRY
+#undef ALIGN
+
OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
OUTPUT_ARCH(i386:x86-64)
ENTRY(start)
@@ -48,12 +54,12 @@ SECTIONS
__initcall_start = .;
.initcall.init : { *(.initcall.init) } :text
__initcall_end = .;
- . = ALIGN(8192);
+ . = ALIGN(STACK_SIZE);
__init_end = .;
__bss_start = .; /* BSS */
.bss : {
- *(.bss.twopage_aligned)
+ *(.bss.stack_aligned)
*(.bss.page_aligned)
*(.bss)
} :text
diff --git a/xen/arch/x86/x86_emulate.c b/xen/arch/x86/x86_emulate.c
index 528e363e8e..7c28dd87b9 100644
--- a/xen/arch/x86/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate.c
@@ -371,6 +371,21 @@ do{ __asm__ __volatile__ ( \
(_type)_x; \
})
+/* Access/update address held in a register, based on addressing mode. */
+#define register_address(sel, reg) \
+ ((ad_bytes == sizeof(unsigned long)) ? (reg) : \
+ ((mode == X86EMUL_MODE_REAL) ? /* implies ad_bytes == 2 */ \
+ (((unsigned long)(sel) << 4) + ((reg) & 0xffff)) : \
+ ((reg) & ((1UL << (ad_bytes << 3)) - 1))))
+#define register_address_increment(reg, inc) \
+do { \
+ if ( ad_bytes == sizeof(unsigned long) ) \
+ (reg) += (inc); \
+ else \
+ (reg) = ((reg) & ~((1UL << (ad_bytes << 3)) - 1)) | \
+ (((reg) + (inc)) & ((1UL << (ad_bytes << 3)) - 1)); \
+} while (0)
+
void *
decode_register(
uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs)
@@ -420,32 +435,64 @@ x86_emulate_memop(
{
uint8_t b, d, sib, twobyte = 0, rex_prefix = 0;
uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
- unsigned int op_bytes = (mode == 8) ? 4 : mode, ad_bytes = mode;
- unsigned int lock_prefix = 0, rep_prefix = 0, i;
+ uint16_t *seg = NULL; /* override segment */
+ unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i;
int rc = 0;
struct operand src, dst;
/* Shadow copy of register state. Committed on successful emulation. */
struct cpu_user_regs _regs = *regs;
+ switch ( mode )
+ {
+ case X86EMUL_MODE_REAL:
+ case X86EMUL_MODE_PROT16:
+ op_bytes = ad_bytes = 2;
+ break;
+ case X86EMUL_MODE_PROT32:
+ op_bytes = ad_bytes = 4;
+ break;
+#ifdef __x86_64__
+ case X86EMUL_MODE_PROT64:
+ op_bytes = 4;
+ ad_bytes = 8;
+ break;
+#endif
+ default:
+ return -1;
+ }
+
/* Legacy prefixes. */
for ( i = 0; i < 8; i++ )
{
switch ( b = insn_fetch(uint8_t, 1, _regs.eip) )
{
case 0x66: /* operand-size override */
- op_bytes ^= 6; /* switch between 2/4 bytes */
+ op_bytes ^= 6; /* switch between 2/4 bytes */
break;
case 0x67: /* address-size override */
- ad_bytes ^= (mode == 8) ? 12 : 6; /* switch between 2/4/8 bytes */
+ if ( mode == X86EMUL_MODE_PROT64 )
+ ad_bytes ^= 12; /* switch between 4/8 bytes */
+ else
+ ad_bytes ^= 6; /* switch between 2/4 bytes */
break;
case 0x2e: /* CS override */
+ seg = &_regs.cs;
+ break;
case 0x3e: /* DS override */
+ seg = &_regs.ds;
+ break;
case 0x26: /* ES override */
+ seg = &_regs.es;
+ break;
case 0x64: /* FS override */
+ seg = &_regs.fs;
+ break;
case 0x65: /* GS override */
+ seg = &_regs.gs;
+ break;
case 0x36: /* SS override */
- DPRINTF("Warning: ignoring a segment override.\n");
+ seg = &_regs.ss;
break;
case 0xf0: /* LOCK */
lock_prefix = 1;
@@ -461,8 +508,12 @@ x86_emulate_memop(
}
done_prefixes:
+ /* Note quite the same as 80386 real mode, but hopefully good enough. */
+ if ( (mode == X86EMUL_MODE_REAL) && (ad_bytes != 2) )
+ goto cannot_emulate;
+
/* REX prefix. */
- if ( (mode == 8) && ((b & 0xf0) == 0x40) )
+ if ( (mode == X86EMUL_MODE_PROT64) && ((b & 0xf0) == 0x40) )
{
rex_prefix = b;
if ( b & 8 )
@@ -674,7 +725,7 @@ x86_emulate_memop(
emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
break;
case 0x63: /* movsxd */
- if ( mode != 8 ) /* x86/64 long mode only */
+ if ( mode != X86EMUL_MODE_PROT64 )
goto cannot_emulate;
dst.val = (int32_t)src.val;
break;
@@ -721,12 +772,13 @@ x86_emulate_memop(
dst.val = src.val;
break;
case 0x8f: /* pop (sole member of Grp1a) */
- /* 64-bit mode: POP defaults to 64-bit operands. */
- if ( (mode == 8) && (dst.bytes == 4) )
+ /* 64-bit mode: POP always pops a 64-bit operand. */
+ if ( mode == X86EMUL_MODE_PROT64 )
dst.bytes = 8;
- if ( (rc = ops->read_std(_regs.esp, &dst.val, dst.bytes)) != 0 )
+ if ( (rc = ops->read_std(register_address(_regs.ss, _regs.esp),
+ &dst.val, dst.bytes)) != 0 )
goto done;
- _regs.esp += dst.bytes;
+ register_address_increment(_regs.esp, dst.bytes);
break;
case 0xc0 ... 0xc1: grp2: /* Grp2 */
switch ( modrm_reg )
@@ -797,16 +849,17 @@ x86_emulate_memop(
emulate_1op("dec", dst, _regs.eflags);
break;
case 6: /* push */
- /* 64-bit mode: PUSH defaults to 64-bit operands. */
- if ( (mode == 8) && (dst.bytes == 4) )
+ /* 64-bit mode: PUSH always pushes a 64-bit operand. */
+ if ( mode == X86EMUL_MODE_PROT64 )
{
dst.bytes = 8;
if ( (rc = ops->read_std((unsigned long)dst.ptr,
&dst.val, 8)) != 0 )
goto done;
}
- _regs.esp -= dst.bytes;
- if ( (rc = ops->write_std(_regs.esp, dst.val, dst.bytes)) != 0 )
+ register_address_increment(_regs.esp, -dst.bytes);
+ if ( (rc = ops->write_std(register_address(_regs.ss, _regs.esp),
+ dst.val, dst.bytes)) != 0 )
goto done;
dst.val = dst.orig_val; /* skanky: disable writeback */
break;
@@ -873,19 +926,22 @@ x86_emulate_memop(
{
/* Write fault: destination is special memory. */
dst.ptr = (unsigned long *)cr2;
- if ( (rc = ops->read_std(_regs.esi - _regs.edi + cr2,
+ if ( (rc = ops->read_std(register_address(seg ? *seg : _regs.ds,
+ _regs.esi),
&dst.val, dst.bytes)) != 0 )
goto done;
}
else
{
/* Read fault: source is special memory. */
- dst.ptr = (unsigned long *)(_regs.edi - _regs.esi + cr2);
+ dst.ptr = (unsigned long *)register_address(_regs.es, _regs.edi);
if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
goto done;
}
- _regs.esi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
- _regs.edi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
+ register_address_increment(
+ _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+ register_address_increment(
+ _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
break;
case 0xa6 ... 0xa7: /* cmps */
DPRINTF("Urk! I don't handle CMPS.\n");
@@ -895,7 +951,8 @@ x86_emulate_memop(
dst.bytes = (d & ByteOp) ? 1 : op_bytes;
dst.ptr = (unsigned long *)cr2;
dst.val = _regs.eax;
- _regs.edi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
+ register_address_increment(
+ _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
break;
case 0xac ... 0xad: /* lods */
dst.type = OP_REG;
@@ -903,7 +960,8 @@ x86_emulate_memop(
dst.ptr = (unsigned long *)&_regs.eax;
if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
goto done;
- _regs.esi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
+ register_address_increment(
+ _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
break;
case 0xae ... 0xaf: /* scas */
DPRINTF("Urk! I don't handle SCAS.\n");
diff --git a/xen/common/dom0_ops.c b/xen/common/dom0_ops.c
index a4cbc48b4f..97cd571376 100644
--- a/xen/common/dom0_ops.c
+++ b/xen/common/dom0_ops.c
@@ -16,6 +16,7 @@
#include <xen/domain_page.h>
#include <xen/trace.h>
#include <xen/console.h>
+#include <xen/iocap.h>
#include <asm/current.h>
#include <public/dom0_ops.h>
#include <public/sched_ctl.h>
@@ -582,6 +583,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
}
}
break;
+
case DOM0_SETDEBUGGING:
{
struct domain *d;
@@ -599,6 +601,53 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
}
break;
+ case DOM0_IRQ_PERMISSION:
+ {
+ struct domain *d;
+ unsigned int pirq = op->u.irq_permission.pirq;
+
+ ret = -EINVAL;
+ if ( pirq >= NR_PIRQS )
+ break;
+
+ ret = -ESRCH;
+ d = find_domain_by_id(op->u.irq_permission.domain);
+ if ( d == NULL )
+ break;
+
+ if ( op->u.irq_permission.allow_access )
+ ret = irq_permit_access(d, pirq);
+ else
+ ret = irq_deny_access(d, pirq);
+
+ put_domain(d);
+ }
+ break;
+
+ case DOM0_IOMEM_PERMISSION:
+ {
+ struct domain *d;
+ unsigned long pfn = op->u.iomem_permission.first_pfn;
+ unsigned long nr_pfns = op->u.iomem_permission.nr_pfns;
+
+ ret = -EINVAL;
+ if ( (pfn + nr_pfns - 1) < pfn ) /* wrap? */
+ break;
+
+ ret = -ESRCH;
+ d = find_domain_by_id(op->u.iomem_permission.domain);
+ if ( d == NULL )
+ break;
+
+ if ( op->u.iomem_permission.allow_access )
+ ret = iomem_permit_access(d, pfn, pfn + nr_pfns - 1);
+ else
+ ret = iomem_deny_access(d, pfn, pfn + nr_pfns - 1);
+
+ put_domain(d);
+ }
+ break;
+
#ifdef PERF_COUNTERS
case DOM0_PERFCCONTROL:
{
diff --git a/xen/common/domain.c b/xen/common/domain.c
index bc54c313de..0f206d8e1c 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -16,6 +16,7 @@
#include <xen/console.h>
#include <xen/softirq.h>
#include <xen/domain_page.h>
+#include <xen/rangeset.h>
#include <asm/debugger.h>
#include <public/dom0_ops.h>
#include <public/sched.h>
@@ -52,22 +53,21 @@ struct domain *do_createdomain(domid_t dom_id, unsigned int cpu)
if ( !is_idle_task(d) &&
((evtchn_init(d) != 0) || (grant_table_create(d) != 0)) )
- {
- evtchn_destroy(d);
- free_domain(d);
- return NULL;
- }
+ goto fail1;
if ( (v = alloc_vcpu(d, 0, cpu)) == NULL )
- {
- grant_table_destroy(d);
- evtchn_destroy(d);
- free_domain(d);
- return NULL;
- }
+ goto fail2;
+
+ rangeset_domain_initialise(d);
+
+ d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
+ d->irq_caps = rangeset_new(d, "Interrupts", 0);
+
+ if ( (d->iomem_caps == NULL) ||
+ (d->irq_caps == NULL) ||
+ (arch_do_createdomain(v) != 0) )
+ goto fail3;
- arch_do_createdomain(v);
-
if ( !is_idle_task(d) )
{
write_lock(&domlist_lock);
@@ -83,6 +83,15 @@ struct domain *do_createdomain(domid_t dom_id, unsigned int cpu)
}
return d;
+
+ fail3:
+ rangeset_domain_destroy(d);
+ fail2:
+ grant_table_destroy(d);
+ fail1:
+ evtchn_destroy(d);
+ free_domain(d);
+ return NULL;
}
@@ -271,6 +280,8 @@ void domain_destruct(struct domain *d)
*pd = d->next_in_hashbucket;
write_unlock(&domlist_lock);
+ rangeset_domain_destroy(d);
+
evtchn_destroy(d);
grant_table_destroy(d);
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index eeccc0e160..fdda1e86b8 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -22,6 +22,7 @@
#include <xen/sched.h>
#include <xen/event.h>
#include <xen/irq.h>
+#include <xen/iocap.h>
#include <asm/current.h>
#include <public/xen.h>
@@ -242,6 +243,9 @@ static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
if ( pirq >= ARRAY_SIZE(d->pirq_to_evtchn) )
return -EINVAL;
+ if ( !irq_access_permitted(d, pirq) )
+ return -EPERM;
+
spin_lock(&d->evtchn_lock);
if ( d->pirq_to_evtchn[pirq] != 0 )
diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c
index 7f569d99fb..c78e104747 100644
--- a/xen/common/keyhandler.c
+++ b/xen/common/keyhandler.c
@@ -11,6 +11,7 @@
#include <xen/sched.h>
#include <xen/softirq.h>
#include <xen/domain.h>
+#include <xen/rangeset.h>
#include <asm/debugger.h>
#define KEY_MAX 256
@@ -109,31 +110,32 @@ static void do_task_queues(unsigned char key)
for_each_domain ( d )
{
- printk("Xen: DOM %u, flags=%lx refcnt=%d nr_pages=%d "
- "xenheap_pages=%d\n", d->domain_id, d->domain_flags,
- atomic_read(&d->refcnt), d->tot_pages, d->xenheap_pages);
- /* The handle is printed according to the OSF DCE UUID spec., even
- though it is not necessarily such a thing, for ease of use when it
- _is_ one of those. */
- printk(" handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-"
+ printk("General information for domain %u:\n", d->domain_id);
+ printk(" flags=%lx refcnt=%d nr_pages=%d xenheap_pages=%d\n",
+ d->domain_flags, atomic_read(&d->refcnt),
+ d->tot_pages, d->xenheap_pages);
+ printk(" handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-"
"%02x%02x-%02x%02x%02x%02x%02x%02x\n",
d->handle[ 0], d->handle[ 1], d->handle[ 2], d->handle[ 3],
d->handle[ 4], d->handle[ 5], d->handle[ 6], d->handle[ 7],
d->handle[ 8], d->handle[ 9], d->handle[10], d->handle[11],
d->handle[12], d->handle[13], d->handle[14], d->handle[15]);
+ rangeset_domain_printk(d);
+
dump_pageframe_info(d);
+ printk("VCPU information and callbacks for domain %u:\n",
+ d->domain_id);
for_each_vcpu ( d, v ) {
- printk("Guest: %p CPU %d [has=%c] flags=%lx "
- "upcall_pend = %02x, upcall_mask = %02x\n", v,
- v->processor,
+ printk(" VCPU%d: CPU%d [has=%c] flags=%lx "
+ "upcall_pend = %02x, upcall_mask = %02x\n",
+ v->vcpu_id, v->processor,
test_bit(_VCPUF_running, &v->vcpu_flags) ? 'T':'F',
v->vcpu_flags,
v->vcpu_info->evtchn_upcall_pending,
v->vcpu_info->evtchn_upcall_mask);
- printk("Notifying guest... %d/%d\n", d->domain_id, v->vcpu_id);
- printk("port %d/%d stat %d %d %d\n",
+ printk(" Notifying guest (virq %d, port %d, stat %d/%d/%d)\n",
VIRQ_DEBUG, v->virq_to_evtchn[VIRQ_DEBUG],
test_bit(v->virq_to_evtchn[VIRQ_DEBUG],
&d->shared_info->evtchn_pending[0]),
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 45934fdf76..847fcbb0cc 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -15,6 +15,7 @@
#include <xen/sched.h>
#include <xen/event.h>
#include <xen/shadow.h>
+#include <xen/iocap.h>
#include <asm/current.h>
#include <asm/hardirq.h>
#include <public/memory.h>
@@ -35,7 +36,8 @@ increase_reservation(
!array_access_ok(extent_list, nr_extents, sizeof(*extent_list)) )
return 0;
- if ( (extent_order != 0) && !IS_CAPABLE_PHYSDEV(current->domain) )
+ if ( (extent_order != 0) &&
+ !multipage_allocation_permitted(current->domain) )
{
DPRINTK("Only I/O-capable domains may allocate multi-page extents.\n");
return 0;
diff --git a/xen/common/rangeset.c b/xen/common/rangeset.c
new file mode 100644
index 0000000000..95757a60cc
--- /dev/null
+++ b/xen/common/rangeset.c
@@ -0,0 +1,399 @@
+/******************************************************************************
+ * rangeset.c
+ *
+ * Creation, maintenance and automatic destruction of per-domain sets of
+ * numeric ranges.
+ *
+ * Copyright (c) 2005, K A Fraser
+ */
+
+#include <xen/sched.h>
+#include <xen/rangeset.h>
+
+/* An inclusive range [s,e] and pointer to next range in ascending order. */
+struct range {
+ struct list_head list;
+ unsigned long s, e;
+};
+
+struct rangeset {
+ /* Owning domain and threaded list of rangesets. */
+ struct list_head rangeset_list;
+ struct domain *domain;
+
+ /* Ordered list of ranges contained in this set, and protecting lock. */
+ struct list_head range_list;
+ spinlock_t lock;
+
+ /* Pretty-printing name. */
+ char name[32];
+
+ /* RANGESETF flags. */
+ unsigned int flags;
+};
+
+/*****************************
+ * Private range functions hide the underlying linked-list implemnetation.
+ */
+
+/* Find highest range lower than or containing s. NULL if no such range. */
+static struct range *find_range(
+ struct rangeset *r, unsigned long s)
+{
+ struct range *x = NULL, *y;
+
+ list_for_each_entry ( y, &r->range_list, list )
+ {
+ if ( y->s > s )
+ break;
+ x = y;
+ }
+
+ return x;
+}
+
+/* Return the lowest range in the set r, or NULL if r is empty. */
+static struct range *first_range(
+ struct rangeset *r)
+{
+ if ( list_empty(&r->range_list) )
+ return NULL;
+ return list_entry(r->range_list.next, struct range, list);
+}
+
+/* Return range following x in ascending order, or NULL if x is the highest. */
+static struct range *next_range(
+ struct rangeset *r, struct range *x)
+{
+ if ( x->list.next == &r->range_list )
+ return NULL;
+ return list_entry(x->list.next, struct range, list);
+}
+
+/* Insert range y after range x in r. Insert as first range if x is NULL. */
+static void insert_range(
+ struct rangeset *r, struct range *x, struct range *y)
+{
+ list_add(&y->list, (x != NULL) ? &x->list : &r->range_list);
+}
+
+/* Remove a range from its list and free it. */
+static void destroy_range(
+ struct range *x)
+{
+ list_del(&x->list);
+ xfree(x);
+}
+
+/*****************************
+ * Core public functions
+ */
+
+int rangeset_add_range(
+ struct rangeset *r, unsigned long s, unsigned long e)
+{
+ struct range *x, *y;
+ int rc = 0;
+
+ spin_lock(&r->lock);
+
+ x = find_range(r, s);
+ y = find_range(r, e);
+
+ if ( x == y )
+ {
+ if ( (x == NULL) || ((x->e < s) && ((x->e + 1) != s)) )
+ {
+ x = xmalloc(struct range);
+ if ( x == NULL )
+ {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ x->s = s;
+ x->e = e;
+
+ insert_range(r, y, x);
+ }
+ else if ( x->e < e )
+ x->e = e;
+ }
+ else
+ {
+ if ( x == NULL )
+ {
+ x = first_range(r);
+ x->s = s;
+ }
+ else if ( (x->e < s) && ((x->e + 1) != s) )
+ {
+ x = next_range(r, x);
+ x->s = s;
+ }
+
+ x->e = (y->e > e) ? y->e : e;
+
+ for ( ; ; )
+ {
+ y = next_range(r, x);
+ if ( (y == NULL) || (y->e > x->e) )
+ break;
+ destroy_range(y);
+ }
+ }
+
+ y = next_range(r, x);
+ if ( (y != NULL) && ((x->e + 1) == y->s) )
+ {
+ x->e = y->e;
+ destroy_range(y);
+ }
+
+ out:
+ spin_unlock(&r->lock);
+ return rc;
+}
+
+int rangeset_remove_range(
+ struct rangeset *r, unsigned long s, unsigned long e)
+{
+ struct range *x, *y, *t;
+ int rc = 0;
+
+ spin_lock(&r->lock);
+
+ x = find_range(r, s);
+ y = find_range(r, e);
+
+ if ( x == y )
+ {
+ if ( (x == NULL) || (x->e < s) )
+ goto out;
+
+ if ( (x->s < s) && (x->e > e) )
+ {
+ y = xmalloc(struct range);
+ if ( y == NULL )
+ {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ y->s = e + 1;
+ y->e = x->e;
+ x->e = s - 1;
+
+ insert_range(r, x, y);
+ }
+ else if ( (x->s == s) && (x->e <= e) )
+ destroy_range(x);
+ else if ( x->s == s )
+ x->s = e + 1;
+ else if ( x->e <= e )
+ x->e = s - 1;
+ }
+ else
+ {
+ if ( x == NULL )
+ x = first_range(r);
+
+ if ( x->s < s )
+ {
+ x->e = s - 1;
+ x = next_range(r, x);
+ }
+
+ while ( x != y )
+ {
+ t = x;
+ x = next_range(r, x);
+ destroy_range(t);
+ }
+
+ x->s = e + 1;
+ if ( x->s > x->e )
+ destroy_range(x);
+ }
+
+ out:
+ spin_unlock(&r->lock);
+ return rc;
+}
+
+int rangeset_contains_range(
+ struct rangeset *r, unsigned long s, unsigned long e)
+{
+ struct range *x;
+ int contains;
+
+ spin_lock(&r->lock);
+ x = find_range(r, s);
+ contains = (x && (x->e >= e));
+ spin_unlock(&r->lock);
+
+ return contains;
+}
+
+int rangeset_add_singleton(
+ struct rangeset *r, unsigned long s)
+{
+ return rangeset_add_range(r, s, s);
+}
+
+int rangeset_remove_singleton(
+ struct rangeset *r, unsigned long s)
+{
+ return rangeset_remove_range(r, s, s);
+}
+
+int rangeset_contains_singleton(
+ struct rangeset *r, unsigned long s)
+{
+ return rangeset_contains_range(r, s, s);
+}
+
+int rangeset_is_empty(
+ struct rangeset *r)
+{
+ return list_empty(&r->range_list);
+}
+
+struct rangeset *rangeset_new(
+ struct domain *d, char *name, unsigned int flags)
+{
+ struct rangeset *r;
+
+ r = xmalloc(struct rangeset);
+ if ( r == NULL )
+ return NULL;
+
+ spin_lock_init(&r->lock);
+ INIT_LIST_HEAD(&r->range_list);
+
+ BUG_ON(flags & ~RANGESETF_prettyprint_hex);
+ r->flags = flags;
+
+ if ( name != NULL )
+ {
+ strncpy(r->name, name, sizeof(r->name));
+ r->name[sizeof(r->name)-1] = '\0';
+ }
+ else
+ {
+ sprintf(r->name, "(no name)");
+ }
+
+ if ( (r->domain = d) != NULL )
+ {
+ spin_lock(&d->rangesets_lock);
+ list_add(&r->rangeset_list, &d->rangesets);
+ spin_unlock(&d->rangesets_lock);
+ }
+
+ return r;
+}
+
+void rangeset_destroy(
+ struct rangeset *r)
+{
+ struct range *x;
+
+ if ( r == NULL )
+ return;
+
+ if ( r->domain != NULL )
+ {
+ spin_lock(&r->domain->rangesets_lock);
+ list_del(&r->rangeset_list);
+ spin_unlock(&r->domain->rangesets_lock);
+ }
+
+ while ( (x = first_range(r)) != NULL )
+ destroy_range(x);
+
+ xfree(r);
+}
+
+void rangeset_domain_initialise(
+ struct domain *d)
+{
+ INIT_LIST_HEAD(&d->rangesets);
+ spin_lock_init(&d->rangesets_lock);
+}
+
+void rangeset_domain_destroy(
+ struct domain *d)
+{
+ struct rangeset *r;
+
+ while ( !list_empty(&d->rangesets) )
+ {
+ r = list_entry(d->rangesets.next, struct rangeset, rangeset_list);
+
+ BUG_ON(r->domain != d);
+ r->domain = NULL;
+ list_del(&r->rangeset_list);
+
+ rangeset_destroy(r);
+ }
+}
+
+/*****************************
+ * Pretty-printing functions
+ */
+
+static void print_limit(struct rangeset *r, unsigned long s)
+{
+ printk((r->flags & RANGESETF_prettyprint_hex) ? "%lx" : "%lu", s);
+}
+
+void rangeset_printk(
+ struct rangeset *r)
+{
+ int nr_printed = 0;
+ struct range *x;
+
+ spin_lock(&r->lock);
+
+ printk("%-10s {", r->name);
+
+ for ( x = first_range(r); x != NULL; x = next_range(r, x) )
+ {
+ if ( nr_printed++ )
+ printk(",");
+ printk(" ");
+ print_limit(r, x->s);
+ if ( x->s != x->e )
+ {
+ printk("-");
+ print_limit(r, x->e);
+ }
+ }
+
+ printk(" }");
+
+ spin_unlock(&r->lock);
+}
+
+void rangeset_domain_printk(
+ struct domain *d)
+{
+ struct rangeset *r;
+
+ printk("Rangesets belonging to domain %u:\n", d->domain_id);
+
+ spin_lock(&d->rangesets_lock);
+
+ if ( list_empty(&d->rangesets) )
+ printk(" None\n");
+
+ list_for_each_entry ( r, &d->rangesets, rangeset_list )
+ {
+ printk(" ");
+ rangeset_printk(r);
+ printk("\n");
+ }
+
+ spin_unlock(&d->rangesets_lock);
+}
diff --git a/xen/drivers/char/ns16550.c b/xen/drivers/char/ns16550.c
index b7d24107b6..8ef838d3db 100644
--- a/xen/drivers/char/ns16550.c
+++ b/xen/drivers/char/ns16550.c
@@ -13,6 +13,7 @@
#include <xen/irq.h>
#include <xen/sched.h>
#include <xen/serial.h>
+#include <xen/iocap.h>
#include <asm/io.h>
/*
@@ -233,11 +234,11 @@ static void ns16550_init_postirq(struct serial_port *port)
}
#ifdef CONFIG_X86
-#include <asm/physdev.h>
static void ns16550_endboot(struct serial_port *port)
{
struct ns16550 *uart = port->uart;
- physdev_modify_ioport_access_range(dom0, 0, uart->io_base, 8);
+ if ( ioports_deny_access(dom0, uart->io_base, uart->io_base + 7) != 0 )
+ BUG();
}
#else
#define ns16550_endboot NULL
diff --git a/xen/include/asm-ia64/domain.h b/xen/include/asm-ia64/domain.h
index 524bd1c348..863def1ff5 100644
--- a/xen/include/asm-ia64/domain.h
+++ b/xen/include/asm-ia64/domain.h
@@ -10,7 +10,7 @@
#include <asm/vmx_platform.h>
#include <xen/list.h>
-extern void arch_do_createdomain(struct vcpu *);
+extern int arch_do_createdomain(struct vcpu *);
extern void domain_relinquish_resources(struct domain *);
diff --git a/xen/include/asm-ia64/iocap.h b/xen/include/asm-ia64/iocap.h
new file mode 100644
index 0000000000..3d8b845d85
--- /dev/null
+++ b/xen/include/asm-ia64/iocap.h
@@ -0,0 +1,10 @@
+/******************************************************************************
+ * iocap.h
+ *
+ * Architecture-specific per-domain I/O capabilities.
+ */
+
+#ifndef __IA64_IOCAP_H__
+#define __IA64_IOCAP_H__
+
+#endif /* __IA64_IOCAP_H__ */
diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h
index 7916b57ae9..2a63b75002 100644
--- a/xen/include/asm-x86/current.h
+++ b/xen/include/asm-x86/current.h
@@ -49,7 +49,7 @@ static inline struct cpu_info *get_cpu_info(void)
#define reset_stack_and_jump(__fn) \
__asm__ __volatile__ ( \
"mov %0,%%"__OP"sp; jmp "STR(__fn) \
- : : "r" (guest_cpu_user_regs()) )
+ : : "r" (guest_cpu_user_regs()) : "memory" )
#define schedule_tail(_ed) (((_ed)->arch.schedule_tail)(_ed))
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 94111fe1d1..513b7d8aff 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -24,8 +24,8 @@ struct arch_domain
/* Writable pagetables. */
struct ptwr_info ptwr[2];
- /* I/O-port access bitmap mask. */
- u8 *iobmp_mask; /* Address of IO bitmap mask, or NULL. */
+ /* I/O-port admin-specified access capabilities. */
+ struct rangeset *ioport_caps;
/* Shadow mode status and controls. */
struct shadow_ops *ops;
diff --git a/xen/include/asm-x86/iocap.h b/xen/include/asm-x86/iocap.h
new file mode 100644
index 0000000000..c7463cb6f2
--- /dev/null
+++ b/xen/include/asm-x86/iocap.h
@@ -0,0 +1,20 @@
+/******************************************************************************
+ * iocap.h
+ *
+ * Architecture-specific per-domain I/O capabilities.
+ */
+
+#ifndef __X86_IOCAP_H__
+#define __X86_IOCAP_H__
+
+#define ioports_permit_access(d, s, e) \
+ rangeset_add_range((d)->arch.ioport_caps, s, e)
+#define ioports_deny_access(d, s, e) \
+ rangeset_remove_range((d)->arch.ioport_caps, s, e)
+#define ioports_access_permitted(d, s, e) \
+ rangeset_contains_range((d)->arch.ioport_caps, s, e)
+
+#define cache_flush_permitted(d) \
+ (!rangeset_is_empty((d)->iomem_caps))
+
+#endif /* __X86_IOCAP_H__ */
diff --git a/xen/include/asm-x86/msr.h b/xen/include/asm-x86/msr.h
index f9a07e4791..3c1e8fb6ba 100644
--- a/xen/include/asm-x86/msr.h
+++ b/xen/include/asm-x86/msr.h
@@ -12,7 +12,7 @@
__asm__ __volatile__("rdmsr" \
: "=a" (a__), "=d" (b__) \
: "c" (msr)); \
- val = a__ | (b__<<32); \
+ val = a__ | ((u64)b__<<32); \
} while(0);
#define wrmsr(msr,val1,val2) \
diff --git a/xen/include/asm-x86/physdev.h b/xen/include/asm-x86/physdev.h
deleted file mode 100644
index 0b004d4958..0000000000
--- a/xen/include/asm-x86/physdev.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/******************************************************************************
- * physdev.h
- */
-
-#ifndef __XEN_PHYSDEV_H__
-#define __XEN_PHYSDEV_H__
-
-#include <public/physdev.h>
-
-void physdev_modify_ioport_access_range(
- struct domain *d, int enable, int port, int num );
-void physdev_destroy_state(struct domain *d);
-int domain_iomem_in_pfn(struct domain *p, unsigned long pfn);
-long do_physdev_op(physdev_op_t *uop);
-void physdev_init_dom0(struct domain *d);
-
-#endif /* __XEN_PHYSDEV_H__ */
diff --git a/xen/include/asm-x86/x86_emulate.h b/xen/include/asm-x86/x86_emulate.h
index f6b6190215..19482c1538 100644
--- a/xen/include/asm-x86/x86_emulate.h
+++ b/xen/include/asm-x86/x86_emulate.h
@@ -141,6 +141,12 @@ x86_emulate_write_std(
struct cpu_user_regs;
+/* Current execution mode, passed to the emulator. */
+#define X86EMUL_MODE_REAL 0
+#define X86EMUL_MODE_PROT16 2
+#define X86EMUL_MODE_PROT32 4
+#define X86EMUL_MODE_PROT64 8
+
/*
* x86_emulate_memop: Emulate an instruction that faulted attempting to
* read/write a 'special' memory area.
@@ -149,6 +155,8 @@ struct cpu_user_regs;
* @ops: Interface to access special memory.
* @mode: Current execution mode, represented by the default size of memory
* addresses, in bytes. Valid values are 2, 4 and 8 (x86/64 only).
+ * Alternatively use the appropriate X86EMUL_MODE value (which also
+ * includes a value for emulating real mode).
*/
extern int
x86_emulate_memop(
diff --git a/xen/include/public/dom0_ops.h b/xen/include/public/dom0_ops.h
index b6b5914200..a2c66be1f4 100644
--- a/xen/include/public/dom0_ops.h
+++ b/xen/include/public/dom0_ops.h
@@ -410,6 +410,21 @@ typedef struct {
uint8_t enable;
} dom0_setdebugging_t;
+#define DOM0_IRQ_PERMISSION 46
+typedef struct {
+ domid_t domain; /* domain to be affected */
+ uint8_t pirq;
+ uint8_t allow_access; /* flag to specify enable/disable of IRQ access */
+} dom0_irq_permission_t;
+
+#define DOM0_IOMEM_PERMISSION 47
+typedef struct {
+ domid_t domain; /* domain to be affected */
+ unsigned long first_pfn; /* first page (physical page number) in range */
+ unsigned long nr_pfns; /* number of pages in range (>0) */
+ uint8_t allow_access; /* allow (!0) or deny (0) access to range? */
+} dom0_iomem_permission_t;
+
typedef struct {
uint32_t cmd;
uint32_t interface_version; /* DOM0_INTERFACE_VERSION */
@@ -448,6 +463,8 @@ typedef struct {
dom0_max_vcpus_t max_vcpus;
dom0_setdomainhandle_t setdomainhandle;
dom0_setdebugging_t setdebugging;
+ dom0_irq_permission_t irq_permission;
+ dom0_iomem_permission_t iomem_permission;
uint8_t pad[128];
} u;
} dom0_op_t;
diff --git a/xen/include/xen/compiler.h b/xen/include/xen/compiler.h
index f0c5fbf17a..29acdc59e8 100644
--- a/xen/include/xen/compiler.h
+++ b/xen/include/xen/compiler.h
@@ -19,4 +19,10 @@
#define __attribute_used__ __attribute__((__unused__))
#endif
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+#define __must_check __attribute__((warn_unused_result))
+#else
+#define __must_check
+#endif
+
#endif /* __LINUX_COMPILER_H */
diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
index 3baf56babd..761bf87a54 100644
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -13,9 +13,9 @@ struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id);
extern void free_vcpu_struct(struct vcpu *v);
-extern void arch_do_createdomain(struct vcpu *v);
+extern int arch_do_createdomain(struct vcpu *v);
-extern int arch_set_info_guest(
+extern int arch_set_info_guest(
struct vcpu *v, struct vcpu_guest_context *c);
extern void vcpu_migrate_cpu(struct vcpu *v, int newcpu);
diff --git a/xen/include/xen/iocap.h b/xen/include/xen/iocap.h
new file mode 100644
index 0000000000..db461b9dcb
--- /dev/null
+++ b/xen/include/xen/iocap.h
@@ -0,0 +1,34 @@
+/******************************************************************************
+ * iocap.h
+ *
+ * Per-domain I/O capabilities.
+ */
+
+#ifndef __XEN_IOCAP_H__
+#define __XEN_IOCAP_H__
+
+#include <xen/rangeset.h>
+#include <asm/iocap.h>
+
+#define iomem_permit_access(d, s, e) \
+ rangeset_add_range((d)->iomem_caps, s, e)
+#define iomem_deny_access(d, s, e) \
+ rangeset_remove_range((d)->iomem_caps, s, e)
+#define iomem_access_permitted(d, s, e) \
+ rangeset_contains_range((d)->iomem_caps, s, e)
+
+#define irq_permit_access(d, i) \
+ rangeset_add_singleton((d)->irq_caps, i)
+#define irq_deny_access(d, i) \
+ rangeset_remove_singleton((d)->irq_caps, i)
+#define irqs_permit_access(d, s, e) \
+ rangeset_add_range((d)->irq_caps, s, e)
+#define irqs_deny_access(d, s, e) \
+ rangeset_remove_range((d)->irq_caps, s, e)
+#define irq_access_permitted(d, i) \
+ rangeset_contains_singleton((d)->irq_caps, i)
+
+#define multipage_allocation_permitted(d) \
+ (!rangeset_is_empty((d)->iomem_caps))
+
+#endif /* __XEN_IOCAP_H__ */
diff --git a/xen/include/xen/rangeset.h b/xen/include/xen/rangeset.h
new file mode 100644
index 0000000000..d4a8e00393
--- /dev/null
+++ b/xen/include/xen/rangeset.h
@@ -0,0 +1,71 @@
+/******************************************************************************
+ * rangeset.h
+ *
+ * Creation, maintenance and automatic destruction of per-domain sets of
+ * numeric ranges.
+ *
+ * Copyright (c) 2005, K A Fraser
+ */
+
+#ifndef __XEN_RANGESET_H__
+#define __XEN_RANGESET_H__
+
+struct domain;
+struct rangeset;
+
+/*
+ * Initialise/destroy per-domain rangeset information.
+ *
+ * It is invalid to create or destroy a rangeset belonging to a domain @d
+ * before rangeset_domain_initialise(d) returns or after calling
+ * rangeset_domain_destroy(d).
+ */
+void rangeset_domain_initialise(
+ struct domain *d);
+void rangeset_domain_destroy(
+ struct domain *d);
+
+/*
+ * Create/destroy a rangeset. Optionally attach to specified domain @d for
+ * auto-destruction when the domain dies. A name may be specified, for use
+ * in debug pretty-printing, and various RANGESETF flags (defined below).
+ *
+ * It is invalid to perform any operation on a rangeset @r after calling
+ * rangeset_destroy(r).
+ */
+struct rangeset *rangeset_new(
+ struct domain *d, char *name, unsigned int flags);
+void rangeset_destroy(
+ struct rangeset *r);
+
+/* Flags for passing to rangeset_new(). */
+ /* Pretty-print range limits in hexadecimal. */
+#define _RANGESETF_prettyprint_hex 0
+#define RANGESETF_prettyprint_hex (1U << _RANGESETF_prettyprint_hex)
+
+int __must_check rangeset_is_empty(
+ struct rangeset *r);
+
+/* Add/remove/query a numeric range. */
+int __must_check rangeset_add_range(
+ struct rangeset *r, unsigned long s, unsigned long e);
+int __must_check rangeset_remove_range(
+ struct rangeset *r, unsigned long s, unsigned long e);
+int __must_check rangeset_contains_range(
+ struct rangeset *r, unsigned long s, unsigned long e);
+
+/* Add/remove/query a single number. */
+int __must_check rangeset_add_singleton(
+ struct rangeset *r, unsigned long s);
+int __must_check rangeset_remove_singleton(
+ struct rangeset *r, unsigned long s);
+int __must_check rangeset_contains_singleton(
+ struct rangeset *r, unsigned long s);
+
+/* Rangeset pretty printing. */
+void rangeset_printk(
+ struct rangeset *r);
+void rangeset_domain_printk(
+ struct domain *d);
+
+#endif /* __XEN_RANGESET_H__ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 80d17a5e24..c686394b7f 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -11,6 +11,7 @@
#include <xen/time.h>
#include <xen/ac_timer.h>
#include <xen/grant_table.h>
+#include <xen/rangeset.h>
#include <asm/domain.h>
extern unsigned long volatile jiffies;
@@ -110,6 +111,9 @@ struct domain
struct domain *next_in_list;
struct domain *next_in_hashbucket;
+ struct list_head rangesets;
+ spinlock_t rangesets_lock;
+
/* Event channel information. */
struct evtchn *evtchn[NR_EVTCHN_BUCKETS];
spinlock_t evtchn_lock;
@@ -125,6 +129,10 @@ struct domain
u16 pirq_to_evtchn[NR_PIRQS];
u32 pirq_mask[NR_PIRQS/32];
+ /* I/O capabilities (access to IRQs and memory-mapped I/O). */
+ struct rangeset *iomem_caps;
+ struct rangeset *irq_caps;
+
unsigned long domain_flags;
unsigned long vm_assist;
@@ -378,23 +386,20 @@ extern struct domain *domain_list;
/* Is this domain privileged? */
#define _DOMF_privileged 1
#define DOMF_privileged (1UL<<_DOMF_privileged)
- /* May this domain do IO to physical devices? */
-#define _DOMF_physdev_access 2
-#define DOMF_physdev_access (1UL<<_DOMF_physdev_access)
/* Guest shut itself down for some reason. */
-#define _DOMF_shutdown 3
+#define _DOMF_shutdown 2
#define DOMF_shutdown (1UL<<_DOMF_shutdown)
/* Guest is in process of shutting itself down (becomes DOMF_shutdown). */
-#define _DOMF_shuttingdown 4
+#define _DOMF_shuttingdown 3
#define DOMF_shuttingdown (1UL<<_DOMF_shuttingdown)
/* Death rattle. */
-#define _DOMF_dying 5
+#define _DOMF_dying 4
#define DOMF_dying (1UL<<_DOMF_dying)
/* Domain is paused by controller software. */
-#define _DOMF_ctrl_pause 6
+#define _DOMF_ctrl_pause 5
#define DOMF_ctrl_pause (1UL<<_DOMF_ctrl_pause)
/* Domain is being debugged by controller software. */
-#define _DOMF_debugging 7
+#define _DOMF_debugging 6
#define DOMF_debugging (1UL<<_DOMF_debugging)
@@ -422,8 +427,6 @@ static inline void vcpu_unblock(struct vcpu *v)
#define IS_PRIV(_d) \
(test_bit(_DOMF_privileged, &(_d)->domain_flags))
-#define IS_CAPABLE_PHYSDEV(_d) \
- (test_bit(_DOMF_physdev_access, &(_d)->domain_flags))
#define VM_ASSIST(_d,_t) (test_bit((_t), &(_d)->vm_assist))