aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile4
-rwxr-xr-xtools/examples/xc_dom_control.py4
-rwxr-xr-xtools/examples/xc_dom_create.py123
-rw-r--r--tools/xc/lib/Makefile2
-rw-r--r--tools/xc/lib/xc_domain.c1
-rw-r--r--tools/xc/lib/xc_linux_build.c25
-rw-r--r--tools/xc/lib/xc_linux_restore.c242
-rw-r--r--tools/xc/lib/xc_linux_save.c392
-rw-r--r--tools/xc/lib/xc_netbsd_build.c26
-rw-r--r--tools/xc/lib/xc_private.c228
-rw-r--r--tools/xc/lib/xc_private.h100
-rw-r--r--tools/xend/lib/blkif.py143
-rw-r--r--tools/xend/lib/console.py83
-rw-r--r--tools/xend/lib/domain_controller.h124
-rwxr-xr-xtools/xend/lib/main.py179
-rw-r--r--tools/xend/lib/manager.py89
-rw-r--r--tools/xend/lib/utils.c4
17 files changed, 1350 insertions, 419 deletions
diff --git a/tools/Makefile b/tools/Makefile
index 0d4c43fb24..9ddf5f25a2 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -17,6 +17,10 @@ install: all
$(MAKE) -C xenctl install
$(MAKE) -C xend install
+dist: $(TARGET)
+ $(MAKE) prefix=`pwd`/../../install dist=yes install
+
+
clean:
$(MAKE) -C balloon clean
$(MAKE) -C xc clean
diff --git a/tools/examples/xc_dom_control.py b/tools/examples/xc_dom_control.py
index ea97a45f0e..4f0bd5de52 100755
--- a/tools/examples/xc_dom_control.py
+++ b/tools/examples/xc_dom_control.py
@@ -137,6 +137,10 @@ elif cmd == 'suspend':
os.kill(pid, signal.SIGTERM)
xc.domain_stop( dom=dom )
+
+ while not xc.domain_getinfo( first_dom=dom, max_doms=1 )[0]['stopped']:
+ time.sleep(0.1);
+
rc = xc.linux_save( dom=dom, state_file=file, progress=1)
if rc == 0 : xc.domain_destroy( dom=dom, force=1 )
diff --git a/tools/examples/xc_dom_create.py b/tools/examples/xc_dom_create.py
index e803737ef0..0a66613da4 100755
--- a/tools/examples/xc_dom_create.py
+++ b/tools/examples/xc_dom_create.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python
import string, sys, os, time, socket, getopt, signal, syslog
-import Xc, xenctl.utils, xenctl.console_client
+import Xc, xenctl.utils, xenctl.console_client, re
config_dir = '/etc/xc/'
config_file = xc_config_file = config_dir + 'defaults'
@@ -195,6 +195,15 @@ output('VM cmdline : "%s"' % cmdline)
if dryrun:
sys.exit(1)
+##### HACK HACK HACK
+##### Until everyone moves to the new I/O world, and a more robust domain
+##### controller (xend), we use this little trick to discover whether we
+##### are in a testing environment for new I/O stuff.
+new_io_world = True
+for line in os.popen('cat /proc/interrupts').readlines():
+ if re.search('blkdev', line):
+ new_io_world = False
+
##### Code beyond this point is actually used to manage the mechanics of
##### starting (and watching if necessary) guest virtual machines.
@@ -228,19 +237,19 @@ def make_domain():
cmsg = 'new_control_interface(dom='+str(id)+', console_port='+str(console_port)+')'
- xend_response = xenctl.utils.xend_control_message(cmsg)
+ cons_response = xenctl.utils.xend_control_message(cmsg)
- if not xend_response['success']:
+ if not cons_response['success']:
print "Error creating initial event channel"
- print "Error type: " + xend_response['error_type']
- if xend_response['error_type'] == 'exception':
- print "Exception type: " + xend_response['exception_type']
- print "Exception value: " + xend_response['exception_value']
+ print "Error type: " + cons_response['error_type']
+ if cons_response['error_type'] == 'exception':
+ print "Exception type: " + cons_response['exception_type']
+ print "Exception value: " + cons_response['exception_value']
xc.domain_destroy ( dom=id )
sys.exit()
if restore:
- ret = eval('xc.%s_restore ( dom=id, state_file=state_file, progress=1 )' % builder_fn )
+ ret = eval('xc.%s_restore ( dom=id, state_file=state_file, progress=1)' % (builder_fn) )
if ret < 0:
print "Error restoring domain"
print "Return code = " + str(ret)
@@ -248,7 +257,7 @@ def make_domain():
sys.exit()
else:
- ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline, control_evtchn=xend_response["remote_port"] )' % builder_fn )
+ ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline, control_evtchn=cons_response["remote_port"] )' % builder_fn )
if ret < 0:
print "Error building Linux guest OS: "
print "Return code = " + str(ret)
@@ -259,6 +268,18 @@ def make_domain():
# set the expertise level appropriately
xenctl.utils.VBD_EXPERT_MODE = vbd_expert
+
+ if new_io_world:
+ cmsg = 'new_block_interface(dom='+str(id)+')'
+ xend_response = xenctl.utils.xend_control_message(cmsg)
+ if not xend_response['success']:
+ print "Error creating block interface"
+ print "Error type: " + xend_response['error_type']
+ if xend_response['error_type'] == 'exception':
+ print "Exception type: " + xend_response['exception_type']
+ print "Exception val: " + xend_response['exception_value']
+ xc.domain_destroy ( dom=id )
+ sys.exit()
for ( uname, virt_name, rw ) in vbd_list:
virt_dev = xenctl.utils.blkdev_name_to_number( virt_name )
@@ -269,42 +290,70 @@ def make_domain():
xc.domain_destroy ( dom=id )
sys.exit()
- # check that setting up this VBD won't violate the sharing
- # allowed by the current VBD expertise level
- if xenctl.utils.vd_extents_validate(segments, rw=='w' or rw=='rw') < 0:
- xc.domain_destroy( dom = id )
- sys.exit()
+ if new_io_world:
+ if len(segments) > 1:
+ print "New I/O world cannot deal with multi-extent vdisks"
+ xc.domain_destroy ( dom=id )
+ sys.exit()
+ seg = segments[0]
+ cmsg = 'new_block_device(dom=' + str(id) + \
+ ',handle=0,vdev=' + str(virt_dev) + \
+ ',pdev=' + str(seg['device']) + \
+ ',start_sect=' + str(seg['start_sector']) + \
+ ',nr_sect=' + str(seg['nr_sectors']) + \
+ ',readonly=' + str(not re.match('w',rw)) + ')'
+ xend_response = xenctl.utils.xend_control_message(cmsg)
+ if not xend_response['success']:
+ print "Error creating virtual block device"
+ print "Error type: " + xend_response['error_type']
+ if xend_response['error_type'] == 'exception':
+ print "Exception type: " + xend_response['exception_type']
+ print "Exception val: " + xend_response['exception_value']
+ xc.domain_destroy ( dom=id )
+ sys.exit()
+ else:
+ # check that setting up this VBD won't violate the sharing
+ # allowed by the current VBD expertise level
+ if xenctl.utils.vd_extents_validate(segments,
+ rw=='w' or rw=='rw') < 0:
+ xc.domain_destroy( dom = id )
+ sys.exit()
- if xc.vbd_create( dom=id, vbd=virt_dev, writeable= rw=='w' or rw=='rw' ):
- print "Error creating VBD vbd=%d writeable=%d\n" % (virt_dev,rw)
- xc.domain_destroy ( dom=id )
- sys.exit()
+ if xc.vbd_create( dom=id, vbd=virt_dev,
+ writeable= rw=='w' or rw=='rw' ):
+ print "Error creating VBD %d (writeable=%d)\n" % (virt_dev,rw)
+ xc.domain_destroy ( dom=id )
+ sys.exit()
- if xc.vbd_setextents( dom=id,
- vbd=virt_dev,
- extents=segments):
- print "Error populating VBD vbd=%d\n" % virt_dev
- xc.domain_destroy ( dom=id )
- sys.exit()
-
- # setup virtual firewall rules for all aliases
- for ip in vfr_ipaddr:
- xenctl.utils.setup_vfr_rules_for_vif( id, 0, ip )
-
- # check for physical device access
- for (pci_bus, pci_dev, pci_func) in pci_device_list:
- if xc.physdev_pci_access_modify(
- dom=id, bus=pci_bus, dev=pci_dev, func=pci_func, enable=1 ) < 0:
- print "Non-fatal error enabling PCI device access."
- else:
- print "Enabled PCI access (%d:%d:%d)." % (pci_bus,pci_dev,pci_func)
+ if xc.vbd_setextents( dom=id,
+ vbd=virt_dev,
+ extents=segments):
+ print "Error populating VBD vbd=%d\n" % virt_dev
+ xc.domain_destroy ( dom=id )
+ sys.exit()
+
+ if not new_io_world:
+ # setup virtual firewall rules for all aliases
+ for ip in vfr_ipaddr:
+ xenctl.utils.setup_vfr_rules_for_vif( id, 0, ip )
+
+ if new_io_world:
+ # check for physical device access
+ for (pci_bus, pci_dev, pci_func) in pci_device_list:
+ if xc.physdev_pci_access_modify(
+ dom=id, bus=pci_bus, dev=pci_dev,
+ func=pci_func, enable=1 ) < 0:
+ print "Non-fatal error enabling PCI device access."
+ else:
+ print "Enabled PCI access (%d:%d:%d)." % \
+ (pci_bus,pci_dev,pci_func)
if xc.domain_start( dom=id ) < 0:
print "Error starting domain"
xc.domain_destroy ( dom=id )
sys.exit()
- return (id, xend_response['console_port'])
+ return (id, cons_response['console_port'])
# end of make_domain()
def mkpidfile():
diff --git a/tools/xc/lib/Makefile b/tools/xc/lib/Makefile
index 79dce046df..f542935167 100644
--- a/tools/xc/lib/Makefile
+++ b/tools/xc/lib/Makefile
@@ -4,7 +4,7 @@ MINOR = 0
SONAME = libxc.so.$(MAJOR)
CC = gcc
-CFLAGS = -c -Wall -O3 -fno-strict-aliasing
+CFLAGS = -c -Werror -O3 -fno-strict-aliasing
CFLAGS += -I../../../xen/include/hypervisor-ifs
CFLAGS += -I../../xend/lib
CFLAGS += -I../../../xenolinux-sparse/include
diff --git a/tools/xc/lib/xc_domain.c b/tools/xc/lib/xc_domain.c
index ec28f2686b..1d77bfc016 100644
--- a/tools/xc/lib/xc_domain.c
+++ b/tools/xc/lib/xc_domain.c
@@ -84,6 +84,7 @@ int xc_domain_getinfo(int xc_handle,
{
op.cmd = DOM0_GETDOMAININFO;
op.u.getdomaininfo.domain = (domid_t)next_domid;
+ op.u.getdomaininfo.ctxt = NULL; // no exec context info, thanks.
if ( do_dom0_op(xc_handle, &op) < 0 )
break;
info->domid = (u64)op.u.getdomaininfo.domain;
diff --git a/tools/xc/lib/xc_linux_build.c b/tools/xc/lib/xc_linux_build.c
index f1bd182827..83debd904d 100644
--- a/tools/xc/lib/xc_linux_build.c
+++ b/tools/xc/lib/xc_linux_build.c
@@ -26,6 +26,7 @@ static long get_tot_pages(int xc_handle, u64 domid)
dom0_op_t op;
op.cmd = DOM0_GETDOMAININFO;
op.u.getdomaininfo.domain = (domid_t)domid;
+ op.u.getdomaininfo.ctxt = NULL;
return (do_dom0_op(xc_handle, &op) < 0) ?
-1 : op.u.getdomaininfo.tot_pages;
}
@@ -70,7 +71,7 @@ static int setup_guestos(int xc_handle,
gzFile initrd_gfd, unsigned long initrd_len,
unsigned long nr_pages,
unsigned long *pvsi, unsigned long *pvke,
- dom0_builddomain_t *builddomain,
+ full_execution_context_t *ctxt,
const char *cmdline,
unsigned long shared_info_frame,
unsigned int control_evtchn)
@@ -163,8 +164,6 @@ static int setup_guestos(int xc_handle,
v_start, v_end);
printf(" ENTRY ADDRESS: %08lx\n", vkern_entry);
- memset(builddomain, 0, sizeof(*builddomain));
-
if ( (pm_handle = init_pfn_mapper((domid_t)dom)) < 0 )
goto error_out;
@@ -205,7 +204,7 @@ static int setup_guestos(int xc_handle,
/* First allocate page for page dir. */
ppt_alloc = (vpt_start - v_start) >> PAGE_SHIFT;
l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
- builddomain->ctxt.pt_base = l2tab;
+ ctxt->pt_base = l2tab;
/* Initialise the page tables. */
if ( (vl2tab = map_pfn_writeable(pm_handle, l2tab >> PAGE_SHIFT)) == NULL )
@@ -388,7 +387,7 @@ int xc_linux_build(int xc_handle,
int initrd_fd = -1;
gzFile initrd_gfd = NULL;
int rc, i;
- full_execution_context_t *ctxt;
+ full_execution_context_t st_ctxt, *ctxt = &st_ctxt;
unsigned long nr_pages;
char *image = NULL;
unsigned long image_size, initrd_size=0;
@@ -420,8 +419,15 @@ int xc_linux_build(int xc_handle,
}
}
+ if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
+ {
+ PERROR("Unable to mlock ctxt");
+ return 1;
+ }
+
op.cmd = DOM0_GETDOMAININFO;
op.u.getdomaininfo.domain = (domid_t)domid;
+ op.u.getdomaininfo.ctxt = ctxt;
if ( (do_dom0_op(xc_handle, &op) < 0) ||
((u64)op.u.getdomaininfo.domain != domid) )
{
@@ -429,7 +435,7 @@ int xc_linux_build(int xc_handle,
goto error_out;
}
if ( (op.u.getdomaininfo.state != DOMSTATE_STOPPED) ||
- (op.u.getdomaininfo.ctxt.pt_base != 0) )
+ (ctxt->pt_base != 0) )
{
ERROR("Domain is already constructed");
goto error_out;
@@ -438,7 +444,7 @@ int xc_linux_build(int xc_handle,
if ( setup_guestos(xc_handle, domid, image, image_size,
initrd_gfd, initrd_size, nr_pages,
&vstartinfo_start, &vkern_entry,
- &launch_op.u.builddomain, cmdline,
+ ctxt, cmdline,
op.u.getdomaininfo.shared_info_frame,
control_evtchn) < 0 )
{
@@ -453,8 +459,6 @@ int xc_linux_build(int xc_handle,
if ( image != NULL )
free(image);
- ctxt = &launch_op.u.builddomain.ctxt;
-
ctxt->flags = 0;
/*
@@ -507,8 +511,11 @@ int xc_linux_build(int xc_handle,
ctxt->failsafe_callback_cs = FLAT_GUESTOS_CS;
ctxt->failsafe_callback_eip = 0;
+ memset( &launch_op, 0, sizeof(launch_op) );
+
launch_op.u.builddomain.domain = (domid_t)domid;
launch_op.u.builddomain.num_vifs = 1;
+ launch_op.u.builddomain.ctxt = ctxt;
launch_op.cmd = DOM0_BUILDDOMAIN;
rc = do_dom0_op(xc_handle, &launch_op);
diff --git a/tools/xc/lib/xc_linux_restore.c b/tools/xc/lib/xc_linux_restore.c
index 239df65984..e27221281a 100644
--- a/tools/xc/lib/xc_linux_restore.c
+++ b/tools/xc/lib/xc_linux_restore.c
@@ -10,6 +10,8 @@
#include <asm-xen/suspend.h>
#include <zlib.h>
+#define MAX_BATCH_SIZE 1024
+
/* This may allow us to create a 'quiet' command-line option, if necessary. */
#define verbose_printf(_f, _a...) \
do { \
@@ -58,8 +60,8 @@ int xc_linux_restore(int xc_handle,
u64 *pdomid)
{
dom0_op_t op;
- int rc = 1, i, j;
- unsigned long mfn, pfn;
+ int rc = 1, i, j, n, k;
+ unsigned long mfn, pfn, xpfn;
unsigned int prev_pc, this_pc;
/* Number of page frames in use by this Linux session. */
@@ -93,6 +95,9 @@ int xc_linux_restore(int xc_handle,
/* A temporary mapping of the guest's suspend record. */
suspend_record_t *p_srec;
+ mfn_mapper_t *region_mapper, *mapper_handle1;
+ char *region_base;
+
/* The name and descriptor of the file that we are reading from. */
int fd;
gzFile gfd;
@@ -114,6 +119,14 @@ int xc_linux_restore(int xc_handle,
return 1;
}
+ if ( mlock(&ctxt, sizeof(ctxt) ) )
+ {
+ /* needed for when we do the build dom0 op,
+ but might as well do early */
+ PERROR("Unable to mlock ctxt");
+ return 1;
+ }
+
/* Start writing out the saved-domain record. */
if ( !checked_read(gfd, signature, 16) ||
(memcmp(signature, "LinuxGuestRecord", 16) != 0) )
@@ -159,12 +172,6 @@ int xc_linux_restore(int xc_handle,
goto out;
}
- if ( !checked_read(gfd, pfn_type, 4 * nr_pfns) )
- {
- ERROR("Error when reading from state file");
- goto out;
- }
-
/* Set the domain's name to that from the restore file */
if ( xc_domain_setname( xc_handle, dom, name ) )
{
@@ -184,6 +191,7 @@ int xc_linux_restore(int xc_handle,
/* Get the domain's shared-info frame. */
op.cmd = DOM0_GETDOMAININFO;
op.u.getdomaininfo.domain = (domid_t)dom;
+ op.u.getdomaininfo.ctxt = NULL;
if ( do_dom0_op(xc_handle, &op) < 0 )
{
ERROR("Could not get information on new domain");
@@ -212,6 +220,15 @@ int xc_linux_restore(int xc_handle,
goto out;
}
+
+ if ( (region_mapper = mfn_mapper_init(xc_handle, dom,
+ MAX_BATCH_SIZE*PAGE_SIZE,
+ PROT_WRITE ))
+ == NULL )
+ goto out;
+
+ region_base = mfn_mapper_base( region_mapper );
+
verbose_printf("Reloading memory pages: 0%%");
/*
@@ -219,75 +236,141 @@ int xc_linux_restore(int xc_handle,
* We uncanonicalise page tables as we go.
*/
prev_pc = 0;
- for ( i = 0; i < nr_pfns; i++ )
+
+ n=0;
+ while(1)
{
- this_pc = (i * 100) / nr_pfns;
+ int j;
+ unsigned long region_pfn_type[1024];
+
+ this_pc = (n * 100) / nr_pfns;
if ( (this_pc - prev_pc) >= 5 )
{
verbose_printf("\b\b\b\b%3d%%", this_pc);
prev_pc = this_pc;
}
- mfn = pfn_to_mfn_table[i];
-
- ppage = map_pfn_writeable(pm_handle, mfn);
-
- if ( !checked_read(gfd, ppage, PAGE_SIZE) )
+ if ( !checked_read(gfd, &j, sizeof(int)) )
{
ERROR("Error when reading from state file");
goto out;
}
- if ( pfn_type[i] == L1TAB )
+ //printf("batch=%d\n",j);
+
+ if(j==0) break; // our work here is done
+
+ if ( !checked_read(gfd, region_pfn_type, j*sizeof(unsigned long)) )
{
- for ( j = 0; j < 1024; j++ )
- {
- if ( ppage[j] & _PAGE_PRESENT )
- {
- if ( (pfn = ppage[j] >> PAGE_SHIFT) >= nr_pfns )
- {
- ERROR("Frame number in page table is out of range");
- goto out;
- }
- if ( (pfn_type[pfn] != NONE) && (ppage[j] & _PAGE_RW) )
- {
- ERROR("Write access requested for a restricted frame");
- goto out;
- }
- ppage[j] &= (PAGE_SIZE - 1) & ~(_PAGE_GLOBAL | _PAGE_PAT);
- ppage[j] |= pfn_to_mfn_table[pfn] << PAGE_SHIFT;
- }
- }
- }
- else if ( pfn_type[i] == L2TAB )
- {
- for ( j = 0; j < (HYPERVISOR_VIRT_START>>L2_PAGETABLE_SHIFT); j++ )
- {
- if ( ppage[j] & _PAGE_PRESENT )
- {
- if ( (pfn = ppage[j] >> PAGE_SHIFT) >= nr_pfns )
- {
- ERROR("Frame number in page table is out of range");
- goto out;
- }
- if ( pfn_type[pfn] != L1TAB )
- {
- ERROR("Page table mistyping");
- goto out;
- }
- ppage[j] &= (PAGE_SIZE - 1) & ~(_PAGE_GLOBAL | _PAGE_PSE);
- ppage[j] |= pfn_to_mfn_table[pfn] << PAGE_SHIFT;
- }
- }
+ ERROR("Error when reading from state file");
+ goto out;
}
- unmap_pfn(pm_handle, ppage);
+ for(i=0;i<j;i++)
+ {
+ pfn = region_pfn_type[i] & ~PGT_type_mask;
+ mfn = pfn_to_mfn_table[pfn];
+
+ mfn_mapper_queue_entry( region_mapper, i<<PAGE_SHIFT,
+ mfn, PAGE_SIZE );
+ }
+
+ if( mfn_mapper_flush_queue(region_mapper) )
+ {
+ ERROR("Couldn't map page region");
+ goto out;
+ }
+
+
+ for(i=0;i<j;i++)
+ {
+ unsigned long *ppage;
+
+ pfn = region_pfn_type[i] & ~PGT_type_mask;
+
+//if(pfn_type[i])printf("^pfn=%d %08lx\n",pfn,pfn_type[i]);
+
+ if (pfn>nr_pfns)
+ {
+ ERROR("pfn out of range");
+ goto out;
+ }
+
+ region_pfn_type[i] &= PGT_type_mask;
+
+ pfn_type[pfn] = region_pfn_type[i];
+
+ mfn = pfn_to_mfn_table[pfn];
+
+//if(region_pfn_type[i])printf("i=%d pfn=%d mfn=%d type=%lx\n",i,pfn,mfn,region_pfn_type[i]);
+
+ ppage = (unsigned long*) (region_base + i*PAGE_SIZE);
+
+ if ( !checked_read(gfd, ppage, PAGE_SIZE) )
+ {
+ ERROR("Error when reading from state file");
+ goto out;
+ }
+
+ if ( region_pfn_type[i] == L1TAB )
+ {
+ for ( k = 0; k < 1024; k++ )
+ {
+ if ( ppage[k] & _PAGE_PRESENT )
+ {
+ if ( (xpfn = ppage[k] >> PAGE_SHIFT) >= nr_pfns )
+ {
+ ERROR("Frame number in type %d page table is out of range. i=%d k=%d pfn=%d nr_pfns=%d",region_pfn_type[i],i,k,xpfn,nr_pfns);
+ goto out;
+ }
+#if 0
+ if ( (region_pfn_type[xpfn] != NONE) && (ppage[k] & _PAGE_RW) )
+ {
+ ERROR("Write access requested for a restricted frame");
+ goto out;
+ }
+#endif
+ ppage[k] &= (PAGE_SIZE - 1) & ~(_PAGE_GLOBAL | _PAGE_PAT);
+ ppage[k] |= pfn_to_mfn_table[xpfn] << PAGE_SHIFT;
+ }
+ }
+ }
+ else if ( region_pfn_type[i] == L2TAB )
+ {
+ for ( k = 0; k < (HYPERVISOR_VIRT_START>>L2_PAGETABLE_SHIFT); k++ )
+ {
+ if ( ppage[k] & _PAGE_PRESENT )
+ {
+ if ( (xpfn = ppage[k] >> PAGE_SHIFT) >= nr_pfns )
+ {
+ ERROR("Frame number in page table is out of range");
+ goto out;
+ }
+#if 0
+ if ( region_pfn_type[pfn] != L1TAB )
+ {
+ ERROR("Page table mistyping");
+ goto out;
+ }
+#endif
+ ppage[k] &= (PAGE_SIZE - 1) & ~(_PAGE_GLOBAL | _PAGE_PSE);
+ ppage[k] |= pfn_to_mfn_table[xpfn] << PAGE_SHIFT;
+ }
+ }
+ }
+
+ if ( add_mmu_update(xc_handle, mmu,
+ (mfn<<PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, pfn) )
+ goto out;
+
+ }
+
+ n+=j; // crude stats
- if ( add_mmu_update(xc_handle, mmu,
- (mfn<<PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, i) )
- goto out;
}
+ mfn_mapper_close( region_mapper );
+
/*
* Pin page tables. Do this after writing to them as otherwise Xen
* will barf when doing the type-checking.
@@ -352,26 +435,47 @@ int xc_linux_restore(int xc_handle,
pfn = ctxt.pt_base >> PAGE_SHIFT;
if ( (pfn >= nr_pfns) || (pfn_type[pfn] != L2TAB) )
{
- ERROR("PT base is bad");
+ printf("PT base is bad. pfn=%d nr=%d type=%08lx %08lx\n",
+ pfn, nr_pfns, pfn_type[pfn], L2TAB);
+ ERROR("PT base is bad.");
goto out;
}
ctxt.pt_base = pfn_to_mfn_table[pfn] << PAGE_SHIFT;
/* Uncanonicalise the pfn-to-mfn table frame-number list. */
- for ( i = 0; i < nr_pfns; i += 1024 )
+
+
+ if ( (mapper_handle1 = mfn_mapper_init(xc_handle, dom,
+ 1024*1024, PROT_WRITE ))
+ == NULL )
+ goto out;
+
+ for ( i = 0; i < (nr_pfns+1023)/1024; i++ )
{
- unsigned long copy_size = (nr_pfns - i) * sizeof(unsigned long);
- if ( copy_size > PAGE_SIZE ) copy_size = PAGE_SIZE;
- pfn = pfn_to_mfn_frame_list[i/1024];
+ unsigned long pfn, mfn;
+
+ pfn = pfn_to_mfn_frame_list[i];
if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NONE) )
{
ERROR("PFN-to-MFN frame number is bad");
goto out;
}
- ppage = map_pfn_writeable(pm_handle, pfn_to_mfn_table[pfn]);
- memcpy(ppage, &pfn_to_mfn_table[i], copy_size);
- unmap_pfn(pm_handle, ppage);
+ mfn = pfn_to_mfn_table[pfn];
+
+ mfn_mapper_queue_entry( mapper_handle1, i<<PAGE_SHIFT,
+ mfn, PAGE_SIZE );
}
+
+ if ( mfn_mapper_flush_queue(mapper_handle1) )
+ {
+ ERROR("Couldn't map pfn_to_mfn table");
+ goto out;
+ }
+
+ memcpy( mfn_mapper_base( mapper_handle1 ), pfn_to_mfn_table,
+ nr_pfns*sizeof(unsigned long) );
+
+ mfn_mapper_close( mapper_handle1 );
/*
* Safety checking of saved context:
@@ -406,11 +510,11 @@ int xc_linux_restore(int xc_handle,
ERROR("Bad LDT base or size");
goto out;
}
-
+
op.cmd = DOM0_BUILDDOMAIN;
op.u.builddomain.domain = (domid_t)dom;
op.u.builddomain.num_vifs = 1;
- memcpy(&op.u.builddomain.ctxt, &ctxt, sizeof(ctxt));
+ op.u.builddomain.ctxt = &ctxt;
rc = do_dom0_op(xc_handle, &op);
out:
diff --git a/tools/xc/lib/xc_linux_save.c b/tools/xc/lib/xc_linux_save.c
index dc759f546c..88ed9e15d7 100644
--- a/tools/xc/lib/xc_linux_save.c
+++ b/tools/xc/lib/xc_linux_save.c
@@ -10,6 +10,8 @@
#include <asm-xen/suspend.h>
#include <zlib.h>
+#define BATCH_SIZE 1024 /* 1024 pages (4MB) at a time */
+
/* This may allow us to create a 'quiet' command-line option, if necessary. */
#define verbose_printf(_f, _a...) \
do { \
@@ -24,7 +26,7 @@
*/
#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \
(((_mfn) < (1024*1024)) && \
- (pfn_to_mfn_table[mfn_to_pfn_table[_mfn]] == (_mfn)))
+ (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == (_mfn)))
/* Returns TRUE if MFN is successfully converted to a PFN. */
#define translate_mfn_to_pfn(_pmfn) \
@@ -34,37 +36,11 @@
if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) \
_res = 0; \
else \
- *(_pmfn) = mfn_to_pfn_table[mfn]; \
+ *(_pmfn) = live_mfn_to_pfn_table[mfn]; \
_res; \
})
-static int check_pfn_ownership(int xc_handle,
- unsigned long mfn,
- u64 dom)
-{
- dom0_op_t op;
- op.cmd = DOM0_GETPAGEFRAMEINFO;
- op.u.getpageframeinfo.pfn = mfn;
- op.u.getpageframeinfo.domain = (domid_t)dom;
- return (do_dom0_op(xc_handle, &op) >= 0);
-}
-#define GETPFN_ERR (~0U)
-static unsigned int get_pfn_type(int xc_handle,
- unsigned long mfn,
- u64 dom)
-{
- dom0_op_t op;
- op.cmd = DOM0_GETPAGEFRAMEINFO;
- op.u.getpageframeinfo.pfn = mfn;
- op.u.getpageframeinfo.domain = (domid_t)dom;
- if ( do_dom0_op(xc_handle, &op) < 0 )
- {
- PERROR("Unexpected failure when getting page frame info!");
- return GETPFN_ERR;
- }
- return op.u.getpageframeinfo.type;
-}
static int checked_write(gzFile fd, void *buf, size_t count)
{
@@ -80,10 +56,13 @@ int xc_linux_save(int xc_handle,
int verbose)
{
dom0_op_t op;
- int rc = 1, i, j;
+ int rc = 1, i, j, k, n;
unsigned long mfn;
unsigned int prev_pc, this_pc;
+ /* state of the new MFN mapper */
+ mfn_mapper_t *mapper_handle1, *mapper_handle2;
+
/* Remember if we stopped the guest, so we can restart it on exit. */
int we_stopped_it = 0;
@@ -100,18 +79,23 @@ int xc_linux_save(int xc_handle,
unsigned long *pfn_type = NULL;
/* A temporary mapping, and a copy, of one frame of guest memory. */
- unsigned long *ppage, page[1024];
+ unsigned long page[1024];
- /* A temporary mapping, and a copy, of the pfn-to-mfn table frame list. */
- unsigned long *p_pfn_to_mfn_frame_list, pfn_to_mfn_frame_list[1024];
- /* A temporary mapping of one frame in the above list. */
- unsigned long *pfn_to_mfn_frame;
+ /* A copy of the pfn-to-mfn table frame list. */
+ unsigned long *live_pfn_to_mfn_frame_list;
+ unsigned long pfn_to_mfn_frame_list[1024];
- /* A table mapping each PFN to its current MFN. */
- unsigned long *pfn_to_mfn_table = NULL;
- /* A table mapping each current MFN to its canonical PFN. */
- unsigned long *mfn_to_pfn_table = NULL;
+ /* Live mapping of the table mapping each PFN to its current MFN. */
+ unsigned long *live_pfn_to_mfn_table = NULL;
+ /* Live mapping of system MFN to PFN table. */
+ unsigned long *live_mfn_to_pfn_table = NULL;
+ /* Live mapping of shared info structure */
+ unsigned long *live_shinfo;
+
+ /* base of the region in which domain memory is mapped */
+ unsigned char *region_base;
+
/* A temporary mapping, and a copy, of the guest's suspend record. */
suspend_record_t *p_srec, srec;
@@ -138,11 +122,18 @@ int xc_linux_save(int xc_handle,
return 1;
}
+ if ( mlock(&ctxt, sizeof(ctxt) ) )
+ {
+ PERROR("Unable to mlock ctxt");
+ return 1;
+ }
+
/* Ensure that the domain exists, and that it is stopped. */
for ( ; ; )
{
op.cmd = DOM0_GETDOMAININFO;
op.u.getdomaininfo.domain = (domid_t)domid;
+ op.u.getdomaininfo.ctxt = &ctxt;
if ( (do_dom0_op(xc_handle, &op) < 0) ||
((u64)op.u.getdomaininfo.domain != domid) )
{
@@ -150,7 +141,6 @@ int xc_linux_save(int xc_handle,
goto out;
}
- memcpy(&ctxt, &op.u.getdomaininfo.ctxt, sizeof(ctxt));
memcpy(name, op.u.getdomaininfo.name, sizeof(name));
shared_info_frame = op.u.getdomaininfo.shared_info_frame;
@@ -178,99 +168,115 @@ int xc_linux_save(int xc_handle,
goto out;
}
- if ( (pm_handle = init_pfn_mapper((domid_t)domid)) < 0 )
- goto out;
- /* Is the suspend-record MFN actually valid for this domain? */
- if ( !check_pfn_ownership(xc_handle, ctxt.cpu_ctxt.esi, domid) )
+ /* Map the suspend-record MFN to pin it. The page must be owned by
+ domid for this to succeed. */
+ p_srec = mfn_mapper_map_single(xc_handle, domid,
+ sizeof(srec), PROT_READ,
+ ctxt.cpu_ctxt.esi );
+
+ if (!p_srec)
{
- ERROR("Invalid state record pointer");
+ ERROR("Couldn't map state record");
goto out;
}
- /* If the suspend-record MFN is okay then grab a copy of it to @srec. */
- p_srec = map_pfn_readonly(pm_handle, ctxt.cpu_ctxt.esi);
- memcpy(&srec, p_srec, sizeof(srec));
- unmap_pfn(pm_handle, p_srec);
+ memcpy( &srec, p_srec, sizeof(srec) );
+ /* cheesy sanity check */
if ( srec.nr_pfns > 1024*1024 )
{
ERROR("Invalid state record -- pfn count out of range");
goto out;
}
- if ( !check_pfn_ownership(xc_handle, srec.pfn_to_mfn_frame_list, domid) )
+ /* the pfn_to_mfn_frame_list fits in a single page */
+ live_pfn_to_mfn_frame_list =
+ mfn_mapper_map_single(xc_handle, domid,
+ PAGE_SIZE, PROT_READ,
+ srec.pfn_to_mfn_frame_list );
+
+ if (!live_pfn_to_mfn_frame_list)
{
- ERROR("Invalid pfn-to-mfn frame list pointer");
+ ERROR("Couldn't map pfn_to_mfn_frame_list");
goto out;
}
+
+
+ if ( (mapper_handle1 = mfn_mapper_init(xc_handle, domid,
+ 1024*1024, PROT_READ ))
+ == NULL )
+ goto out;
+
+ for ( i = 0; i < (srec.nr_pfns+1023)/1024; i++ )
+ {
+ /* Grab a copy of the pfn-to-mfn table frame list.
+ This has the effect of preventing the page from being freed and
+ given to another domain. (though the domain is stopped anyway...) */
+ mfn_mapper_queue_entry( mapper_handle1, i<<PAGE_SHIFT,
+ live_pfn_to_mfn_frame_list[i],
+ PAGE_SIZE );
+ }
+
+ if ( mfn_mapper_flush_queue(mapper_handle1) )
+ {
+ ERROR("Couldn't map pfn_to_mfn table");
+ goto out;
+ }
+
+ live_pfn_to_mfn_table = mfn_mapper_base( mapper_handle1 );
+
- /* Grab a copy of the pfn-to-mfn table frame list. */
- p_pfn_to_mfn_frame_list = map_pfn_readonly(
- pm_handle, srec.pfn_to_mfn_frame_list);
- memcpy(pfn_to_mfn_frame_list, p_pfn_to_mfn_frame_list, PAGE_SIZE);
- unmap_pfn(pm_handle, p_pfn_to_mfn_frame_list);
/* We want zeroed memory so use calloc rather than malloc. */
- mfn_to_pfn_table = calloc(1, 4 * 1024 * 1024);
- pfn_to_mfn_table = calloc(1, 4 * srec.nr_pfns);
- pfn_type = calloc(1, 4 * srec.nr_pfns);
+ pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long));
- if ( (mfn_to_pfn_table == NULL) ||
- (pfn_to_mfn_table == NULL) ||
- (pfn_type == NULL) )
+ if ( (pfn_type == NULL) )
{
errno = ENOMEM;
goto out;
}
+ if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) )
+ {
+ ERROR("Unable to mlock");
+ goto out;
+ }
- /*
- * Construct the local pfn-to-mfn and mfn-to-pfn tables. On exit from this
- * loop we have each MFN mapped at most once. Note that there may be MFNs
- * that aren't mapped at all: we detect these by MFN_IS_IN_PSEUDOPHYS_MAP.
- */
- pfn_to_mfn_frame = NULL;
- for ( i = 0; i < srec.nr_pfns; i++ )
+
+ /* Track the mfn_to_pfn table down from the domains PT */
{
- /* Each frameful of table frames must be checked & mapped on demand. */
- if ( (i & 1023) == 0 )
- {
- mfn = pfn_to_mfn_frame_list[i/1024];
- if ( !check_pfn_ownership(xc_handle, mfn, domid) )
- {
- ERROR("Invalid frame number if pfn-to-mfn frame list");
- goto out;
- }
- if ( pfn_to_mfn_frame != NULL )
- unmap_pfn(pm_handle, pfn_to_mfn_frame);
- pfn_to_mfn_frame = map_pfn_readonly(pm_handle, mfn);
- }
-
- mfn = pfn_to_mfn_frame[i & 1023];
+ unsigned long *pgd;
+ unsigned long mfn_to_pfn_table_start_mfn;
- if ( !check_pfn_ownership(xc_handle, mfn, domid) )
- {
- ERROR("Invalid frame specified with pfn-to-mfn table");
- goto out;
- }
+ pgd = mfn_mapper_map_single(xc_handle, domid,
+ PAGE_SIZE, PROT_READ,
+ ctxt.pt_base>>PAGE_SHIFT);
- /* Did we map this MFN already? That would be invalid! */
- if ( MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
- {
- ERROR("A machine frame appears twice in pseudophys space");
- goto out;
- }
+ mfn_to_pfn_table_start_mfn =
+ pgd[HYPERVISOR_VIRT_START>>L2_PAGETABLE_SHIFT]>>PAGE_SHIFT;
+
+ live_mfn_to_pfn_table =
+ mfn_mapper_map_single(xc_handle, ~0ULL,
+ PAGE_SIZE*1024, PROT_READ,
+ mfn_to_pfn_table_start_mfn );
+ }
- pfn_to_mfn_table[i] = mfn;
- mfn_to_pfn_table[mfn] = i;
- /* Query page type by MFN, but store it by PFN. */
- if ( (pfn_type[i] = get_pfn_type(xc_handle, mfn, domid)) ==
- GETPFN_ERR )
- goto out;
+ /*
+ * Quick belt and braces sanity check.
+ */
+
+ for ( i = 0; i < srec.nr_pfns; i++ )
+ {
+ mfn = live_pfn_to_mfn_table[i];
+
+ if( live_mfn_to_pfn_table[mfn] != i )
+ printf("i=%d mfn=%d live_mfn_to_pfn_table=%d\n",
+ i,mfn,live_mfn_to_pfn_table[mfn]);
}
+
/* Canonicalise the suspend-record frame number. */
if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) )
{
@@ -294,9 +300,10 @@ int xc_linux_save(int xc_handle,
ERROR("PT base is not in range of pseudophys map");
goto out;
}
- ctxt.pt_base = mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT;
+ ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT;
/* Canonicalise the pfn-to-mfn table frame-number list. */
+ memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE );
for ( i = 0; i < srec.nr_pfns; i += 1024 )
{
if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) )
@@ -307,63 +314,152 @@ int xc_linux_save(int xc_handle,
}
/* Start writing out the saved-domain record. */
- ppage = map_pfn_readonly(pm_handle, shared_info_frame);
+ live_shinfo = mfn_mapper_map_single(xc_handle, domid,
+ PAGE_SIZE, PROT_READ,
+ shared_info_frame);
+
+ if (!live_shinfo)
+ {
+ ERROR("Couldn't map live_shinfo");
+ goto out;
+ }
+
if ( !checked_write(gfd, "LinuxGuestRecord", 16) ||
!checked_write(gfd, name, sizeof(name)) ||
!checked_write(gfd, &srec.nr_pfns, sizeof(unsigned long)) ||
!checked_write(gfd, &ctxt, sizeof(ctxt)) ||
- !checked_write(gfd, ppage, PAGE_SIZE) ||
- !checked_write(gfd, pfn_to_mfn_frame_list, PAGE_SIZE) ||
- !checked_write(gfd, pfn_type, 4 * srec.nr_pfns) )
+ !checked_write(gfd, live_shinfo, PAGE_SIZE) ||
+ !checked_write(gfd, pfn_to_mfn_frame_list, PAGE_SIZE) )
{
ERROR("Error when writing to state file");
goto out;
}
- unmap_pfn(pm_handle, ppage);
+ munmap(live_shinfo, PAGE_SIZE);
verbose_printf("Saving memory pages: 0%%");
+ if ( (mapper_handle2 = mfn_mapper_init(xc_handle, domid,
+ BATCH_SIZE*4096, PROT_READ ))
+ == NULL )
+ goto out;
+
+ region_base = mfn_mapper_base( mapper_handle2 );
+
/* Now write out each data page, canonicalising page tables as we go... */
prev_pc = 0;
- for ( i = 0; i < srec.nr_pfns; i++ )
+ for ( n = 0; n < srec.nr_pfns; )
{
- this_pc = (i * 100) / srec.nr_pfns;
+ this_pc = (n * 100) / srec.nr_pfns;
if ( (this_pc - prev_pc) >= 5 )
{
verbose_printf("\b\b\b\b%3d%%", this_pc);
prev_pc = this_pc;
}
- mfn = pfn_to_mfn_table[i];
-
- ppage = map_pfn_readonly(pm_handle, mfn);
- memcpy(page, ppage, PAGE_SIZE);
- unmap_pfn(pm_handle, ppage);
-
- if ( (pfn_type[i] == L1TAB) || (pfn_type[i] == L2TAB) )
- {
- for ( j = 0;
- j < ((pfn_type[i] == L2TAB) ?
- (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : 1024);
- j++ )
- {
- if ( !(page[j] & _PAGE_PRESENT) ) continue;
- mfn = page[j] >> PAGE_SHIFT;
- if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
- {
- ERROR("Frame number in pagetable page is invalid");
- goto out;
- }
- page[j] &= PAGE_SIZE - 1;
- page[j] |= mfn_to_pfn_table[mfn] << PAGE_SHIFT;
- }
- }
-
- if ( !checked_write(gfd, page, PAGE_SIZE) )
- {
- ERROR("Error when writing to state file");
- goto out;
- }
+ for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
+ {
+ pfn_type[j] = live_pfn_to_mfn_table[i];
+ }
+
+
+ for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
+ {
+ /* queue up mappings for all of the pages in this batch */
+
+//printf("region n=%d j=%d i=%d mfn=%d\n",n,j,i,live_pfn_to_mfn_table[i]);
+ mfn_mapper_queue_entry( mapper_handle2, j<<PAGE_SHIFT,
+ live_pfn_to_mfn_table[i],
+ PAGE_SIZE );
+ }
+
+ if( mfn_mapper_flush_queue(mapper_handle2) )
+ {
+ ERROR("Couldn't map page region");
+ goto out;
+ }
+
+ if ( get_pfn_type_batch(xc_handle, domid, j, pfn_type) )
+ {
+ ERROR("get_pfn_type_batch failed");
+ goto out;
+ }
+
+ for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
+ {
+ if((pfn_type[j]>>29) == 7)
+ {
+ ERROR("bogus page");
+ goto out;
+ }
+
+ /* canonicalise mfn->pfn */
+ pfn_type[j] = (pfn_type[j] & PGT_type_mask) |
+ live_mfn_to_pfn_table[pfn_type[j]&~PGT_type_mask];
+
+/* if(pfn_type[j]>>29)
+ printf("i=%d type=%d\n",i,pfn_type[i]); */
+ }
+
+
+ if ( !checked_write(gfd, &j, sizeof(int) ) )
+ {
+ ERROR("Error when writing to state file");
+ goto out;
+ }
+
+ if ( !checked_write(gfd, pfn_type, sizeof(unsigned long)*j ) )
+ {
+ ERROR("Error when writing to state file");
+ goto out;
+ }
+
+
+ for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
+ {
+ /* write out pages in batch */
+
+ if ( ((pfn_type[j] & PGT_type_mask) == L1TAB) ||
+ ((pfn_type[j] & PGT_type_mask) == L2TAB) )
+ {
+
+ memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE);
+
+ for ( k = 0;
+ k < (((pfn_type[j] & PGT_type_mask) == L2TAB) ?
+ (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : 1024);
+ k++ )
+ {
+ if ( !(page[k] & _PAGE_PRESENT) ) continue;
+ mfn = page[k] >> PAGE_SHIFT;
+ if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
+ {
+ ERROR("Frame number in pagetable page is invalid");
+ goto out;
+ }
+ page[k] &= PAGE_SIZE - 1;
+ page[k] |= live_mfn_to_pfn_table[mfn] << PAGE_SHIFT;
+
+ }
+
+ if ( !checked_write(gfd, page, PAGE_SIZE) )
+ {
+ ERROR("Error when writing to state file");
+ goto out;
+ }
+
+
+ }
+ else
+ {
+ if ( !checked_write(gfd, region_base + (PAGE_SIZE*j), PAGE_SIZE) )
+ {
+ ERROR("Error when writing to state file");
+ goto out;
+ }
+ }
+ }
+
+ n+=j; /* i is the master loop counter */
}
verbose_printf("\b\b\b\b100%%\nMemory saved.\n");
@@ -371,10 +467,19 @@ int xc_linux_save(int xc_handle,
/* Success! */
rc = 0;
- out:
+ /* Zero terminate */
+ if ( !checked_write(gfd, &rc, sizeof(int)) )
+ {
+ ERROR("Error when writing to state file");
+ goto out;
+ }
+
+
+out:
/* Restart the domain if we had to stop it to save its state. */
if ( we_stopped_it )
{
+ printf("Restart domain\n");
op.cmd = DOM0_STARTDOMAIN;
op.u.startdomain.domain = (domid_t)domid;
(void)do_dom0_op(xc_handle, &op);
@@ -382,13 +487,6 @@ int xc_linux_save(int xc_handle,
gzclose(gfd);
- if ( pm_handle >= 0 )
- (void)close_pfn_mapper(pm_handle);
-
- if ( pfn_to_mfn_table != NULL )
- free(pfn_to_mfn_table);
- if ( mfn_to_pfn_table != NULL )
- free(mfn_to_pfn_table);
if ( pfn_type != NULL )
free(pfn_type);
@@ -397,4 +495,6 @@ int xc_linux_save(int xc_handle,
unlink(state_file);
return !!rc;
+
+
}
diff --git a/tools/xc/lib/xc_netbsd_build.c b/tools/xc/lib/xc_netbsd_build.c
index 8793a512f2..7c67d57d71 100644
--- a/tools/xc/lib/xc_netbsd_build.c
+++ b/tools/xc/lib/xc_netbsd_build.c
@@ -27,6 +27,7 @@ static long get_tot_pages(int xc_handle, u64 domid)
dom0_op_t op;
op.cmd = DOM0_GETDOMAININFO;
op.u.getdomaininfo.domain = (domid_t)domid;
+ op.u.getdomaininfo.ctxt = NULL;
return (do_dom0_op(xc_handle, &op) < 0) ?
-1 : op.u.getdomaininfo.tot_pages;
}
@@ -59,7 +60,7 @@ static int setup_guestos(int xc_handle,
unsigned long tot_pages,
unsigned long *virt_startinfo_addr,
unsigned long *virt_load_addr,
- dom0_builddomain_t *builddomain,
+ full_execution_context_t *ctxt,
const char *cmdline,
unsigned long shared_info_frame,
unsigned int control_evtchn)
@@ -78,8 +79,6 @@ static int setup_guestos(int xc_handle,
mmu_t *mmu = NULL;
int pm_handle, i;
- memset(builddomain, 0, sizeof(*builddomain));
-
if ( (pm_handle = init_pfn_mapper((domid_t)dom)) < 0 )
goto error_out;
@@ -119,7 +118,7 @@ static int setup_guestos(int xc_handle,
*/
l2tab = page_array[alloc_index] << PAGE_SHIFT;
alloc_index--;
- builddomain->ctxt.pt_base = l2tab;
+ ctxt->pt_base = l2tab;
if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
goto error_out;
@@ -221,7 +220,7 @@ int xc_netbsd_build(int xc_handle,
int kernel_fd = -1;
gzFile kernel_gfd = NULL;
int rc, i;
- full_execution_context_t *ctxt;
+ full_execution_context_t st_ctxt, *ctxt = &st_ctxt;
unsigned long virt_startinfo_addr;
if ( (tot_pages = get_tot_pages(xc_handle, domid)) < 0 )
@@ -244,8 +243,15 @@ int xc_netbsd_build(int xc_handle,
return 1;
}
+ if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
+ {
+ PERROR("Unable to mlock ctxt");
+ return 1;
+ }
+
op.cmd = DOM0_GETDOMAININFO;
op.u.getdomaininfo.domain = (domid_t)domid;
+ op.u.getdomaininfo.ctxt = ctxt;
if ( (do_dom0_op(xc_handle, &op) < 0) ||
((u64)op.u.getdomaininfo.domain != domid) )
{
@@ -253,7 +259,7 @@ int xc_netbsd_build(int xc_handle,
goto error_out;
}
if ( (op.u.getdomaininfo.state != DOMSTATE_STOPPED) ||
- (op.u.getdomaininfo.ctxt.pt_base != 0) )
+ (op.u.getdomaininfo.ctxt->pt_base != 0) )
{
ERROR("Domain is already constructed");
goto error_out;
@@ -261,7 +267,7 @@ int xc_netbsd_build(int xc_handle,
if ( setup_guestos(xc_handle, domid, kernel_gfd, tot_pages,
&virt_startinfo_addr,
- &load_addr, &launch_op.u.builddomain, cmdline,
+ &load_addr, &st_ctxt, cmdline,
op.u.getdomaininfo.shared_info_frame,
control_evtchn) < 0 )
{
@@ -274,8 +280,6 @@ int xc_netbsd_build(int xc_handle,
if( kernel_gfd )
gzclose(kernel_gfd);
- ctxt = &launch_op.u.builddomain.ctxt;
-
ctxt->flags = 0;
/*
@@ -328,9 +332,11 @@ int xc_netbsd_build(int xc_handle,
ctxt->failsafe_callback_cs = FLAT_GUESTOS_CS;
ctxt->failsafe_callback_eip = 0;
+ memset( &launch_op, 0, sizeof(launch_op) );
+
launch_op.u.builddomain.domain = (domid_t)domid;
launch_op.u.builddomain.num_vifs = 1;
-
+ launch_op.u.builddomain.ctxt = ctxt;
launch_op.cmd = DOM0_BUILDDOMAIN;
rc = do_dom0_op(xc_handle, &launch_op);
diff --git a/tools/xc/lib/xc_private.c b/tools/xc/lib/xc_private.c
index 485aa58754..d137176ca8 100644
--- a/tools/xc/lib/xc_private.c
+++ b/tools/xc/lib/xc_private.c
@@ -45,6 +45,234 @@ void unmap_pfn(int pm_handle, void *vaddr)
(void)munmap(vaddr, PAGE_SIZE);
}
+/*******************/
+
+void * mfn_mapper_map_single(int xc_handle, domid_t dom,
+ int size, int prot,
+ unsigned long mfn )
+{
+ privcmd_mmap_t ioctlx;
+ privcmd_mmap_entry_t entry;
+ void *addr;
+ addr = mmap( NULL, size, prot, MAP_SHARED, xc_handle, 0 );
+ if (addr)
+ {
+ ioctlx.num=1;
+ ioctlx.dom=dom;
+ ioctlx.entry=&entry;
+ entry.va=(unsigned long) addr;
+ entry.mfn=mfn;
+ entry.npages=(size+PAGE_SIZE-1)>>PAGE_SHIFT;
+ if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx ) <0 )
+ return 0;
+ }
+ return addr;
+}
+
+mfn_mapper_t * mfn_mapper_init(int xc_handle, domid_t dom, int size, int prot)
+{
+ mfn_mapper_t * t;
+ t = calloc( 1, sizeof(mfn_mapper_t)+
+ mfn_mapper_queue_size*sizeof(privcmd_mmap_entry_t) );
+ if (!t) return NULL;
+ t->xc_handle = xc_handle;
+ t->size = size;
+ t->prot = prot;
+ t->error = 0;
+ t->max_queue_size = mfn_mapper_queue_size;
+ t->addr = mmap( NULL, size, prot, MAP_SHARED, xc_handle, 0 );
+ if (!t->addr)
+ {
+ free(t);
+ return NULL;
+ }
+ t->ioctl.num = 0;
+ t->ioctl.dom = dom;
+ t->ioctl.entry = (privcmd_mmap_entry_t *) &t[1];
+ return t;
+}
+
+void * mfn_mapper_base(mfn_mapper_t *t)
+{
+ return t->addr;
+}
+
+void mfn_mapper_close(mfn_mapper_t *t)
+{
+ if(t->addr) munmap( t->addr, t->size );
+ free(t);
+}
+
+static int __mfn_mapper_flush_queue(mfn_mapper_t *t)
+{
+ int rc;
+ rc = ioctl( t->xc_handle, IOCTL_PRIVCMD_MMAP, &t->ioctl );
+ t->ioctl.num = 0;
+ if(rc && !t->error)
+ t->error = rc;
+ return rc;
+}
+
+int mfn_mapper_flush_queue(mfn_mapper_t *t)
+{
+ int rc;
+
+ rc = __mfn_mapper_flush_queue(t);
+
+ if ( t->error )
+ {
+ rc = t->error;
+ }
+
+ t->error = 0;
+ return rc;
+}
+
+void * mfn_mapper_queue_entry(mfn_mapper_t *t, int offset,
+ unsigned long mfn, int size)
+{
+ privcmd_mmap_entry_t *entry, *prev;
+ int pages;
+
+ offset &= PAGE_MASK;
+ pages =(size+PAGE_SIZE-1)>>PAGE_SHIFT;
+ entry = &t->ioctl.entry[t->ioctl.num];
+
+ if ( t->ioctl.num > 0 )
+ {
+ prev = &t->ioctl.entry[t->ioctl.num-1];
+
+ if ( (prev->va+(prev->npages*PAGE_SIZE)) ==
+ ((unsigned long)t->addr+offset) &&
+ (prev->mfn+prev->npages) == mfn )
+ {
+ prev->npages += pages;
+ return t->addr+offset;
+ }
+ }
+
+ entry->va = (unsigned long)t->addr+offset;
+ entry->mfn = mfn;
+ entry->npages = pages;
+ t->ioctl.num++;
+
+ if(t->ioctl.num == t->max_queue_size)
+ {
+ if ( __mfn_mapper_flush_queue(t) )
+ return 0;
+ }
+
+ return t->addr+offset;
+}
+
+
+/*******************/
+
+#if 0
+
+mfn_typer_t *mfn_typer_init(int xc_handle, domid_t dom, int num )
+{
+ mfn_typer_t *t;
+ multicall_entry_t *m;
+ dom0_op_compact_getpageframeinfo_t *d;
+
+ t = calloc(1, sizeof(mfn_typer_t) );
+ m = calloc(num, sizeof(multicall_entry_t));
+ d = calloc(num, sizeof(dom0_op_compact_getpageframeinfo_t));
+
+ if (!t || !m || !d)
+ {
+ if(t) free(t);
+ if(m) free(m);
+ if(d) free(d);
+ return NULL;
+ }
+
+printf("sizeof(m)=%d sizeof(d)=%d m=%p d=%p\n",sizeof(multicall_entry_t), sizeof(dom0_op_compact_getpageframeinfo_t),m,d);
+
+ if ( (mlock(m, sizeof(multicall_entry_t)*num ) != 0) ||
+ (mlock(d, sizeof(dom0_op_compact_getpageframeinfo_t)*num ) != 0) )
+ {
+ PERROR("Could not lock memory for Xen hypercall");
+ return NULL;
+ }
+
+ t->xc_handle = xc_handle;
+ t->max = num;
+ t->nr_multicall_ents=0;
+ t->multicall_list=m;
+ t->gpf_list=d;
+ t->dom = dom;
+
+ return t;
+}
+
+void mfn_typer_queue_entry(mfn_typer_t *t, unsigned long mfn )
+{
+ int i = t->nr_multicall_ents;
+ multicall_entry_t *m = &t->multicall_list[i];
+ dom0_op_compact_getpageframeinfo_t *d = &t->gpf_list[i];
+
+ d->cmd = DOM0_GETPAGEFRAMEINFO;
+ d->interface_version = DOM0_INTERFACE_VERSION;
+ d->getpageframeinfo.pfn = mfn;
+ d->getpageframeinfo.domain = t->dom;
+ d->getpageframeinfo.type = 1000; //~0UL;
+
+ m->op = __HYPERVISOR_dom0_op;
+ m->args[0] = (unsigned long)d;
+
+ t->nr_multicall_ents++;
+}
+
+int mfn_typer_flush_queue(mfn_typer_t *t)
+{
+ if (t->nr_multicall_ents == 0) return 0;
+ do_multicall_op(t->xc_handle, t->multicall_list, t->nr_multicall_ents);
+ t->nr_multicall_ents = 0;
+}
+
+unsigned int mfn_typer_get_result(mfn_typer_t *t, int idx)
+{
+ return t->gpf_list[idx].getpageframeinfo.type;
+}
+
+#endif
+
+/* NB: arr must be mlock'ed */
+
+int get_pfn_type_batch(int xc_handle,
+ u64 dom, int num, unsigned long *arr)
+{
+ dom0_op_t op;
+ op.cmd = DOM0_GETPAGEFRAMEINFO2;
+ op.u.getpageframeinfo2.domain = (domid_t)dom;
+ op.u.getpageframeinfo2.num = num;
+ op.u.getpageframeinfo2.array = arr;
+ return do_dom0_op(xc_handle, &op);
+}
+
+#define GETPFN_ERR (~0U)
+unsigned int get_pfn_type(int xc_handle,
+ unsigned long mfn,
+ u64 dom)
+{
+ dom0_op_t op;
+ op.cmd = DOM0_GETPAGEFRAMEINFO;
+ op.u.getpageframeinfo.pfn = mfn;
+ op.u.getpageframeinfo.domain = (domid_t)dom;
+ if ( do_dom0_op(xc_handle, &op) < 0 )
+ {
+ PERROR("Unexpected failure when getting page frame info!");
+ return GETPFN_ERR;
+ }
+ return op.u.getpageframeinfo.type;
+}
+
+
+
+/*******************/
+
#define FIRST_MMU_UPDATE 2
static int flush_mmu_updates(int xc_handle, mmu_t *mmu)
diff --git a/tools/xc/lib/xc_private.h b/tools/xc/lib/xc_private.h
index d4299109e5..eaa301772d 100644
--- a/tools/xc/lib/xc_private.h
+++ b/tools/xc/lib/xc_private.h
@@ -16,8 +16,6 @@
#include "xc.h"
-#include <asm-xen/proc_cmd.h>
-
/* from xen/include/hypervisor-ifs */
#include <hypervisor-if.h>
#include <dom0_ops.h>
@@ -25,6 +23,10 @@
#include <event_channel.h>
#include <sched_ctl.h>
+#include <asm-xen/proc_cmd.h>
+
+
+
/* from xend/lib */
#include <domain_controller.h>
@@ -108,6 +110,27 @@ static inline int do_dom0_op(int xc_handle, dom0_op_t *op)
out1: return ret;
}
+static inline int do_multicall_op(int xc_handle,
+ void *call_list, int nr_calls)
+{
+ int ret = -1;
+ privcmd_hypercall_t hypercall;
+
+ hypercall.op = __HYPERVISOR_multicall;
+ hypercall.arg[0] = (unsigned long)call_list;
+ hypercall.arg[1] = (unsigned long)nr_calls;
+
+ if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
+ {
+ if ( errno == EACCES )
+ fprintf(stderr, "Dom0 operation failed -- need to"
+ " rebuild the user-space tool set?\n");
+ goto out1;
+ }
+
+ out1: return ret;
+}
+
static inline int do_network_op(int xc_handle, network_op_t *op)
{
int ret = -1;
@@ -174,4 +197,77 @@ int add_mmu_update(int xc_handle, mmu_t *mmu,
unsigned long ptr, unsigned long val);
int finish_mmu_updates(int xc_handle, mmu_t *mmu);
+
+/*
+ * ioctl-based mfn mapping interface
+ */
+
+/*
+typedef struct privcmd_mmap_entry {
+ unsigned long va;
+ unsigned long mfn;
+ unsigned long npages;
+} privcmd_mmap_entry_t;
+
+typedef struct privcmd_mmap {
+ int num;
+ domid_t dom;
+ privcmd_mmap_entry_t *entry;
+} privcmd_mmap_t;
+*/
+
+#define mfn_mapper_queue_size 128
+
+typedef struct mfn_mapper {
+ int xc_handle;
+ int size;
+ int prot;
+ int error;
+ int max_queue_size;
+ void * addr;
+ privcmd_mmap_t ioctl;
+
+} mfn_mapper_t;
+
+void * mfn_mapper_map_single(int xc_handle, domid_t dom, int size, int prot,
+ unsigned long mfn );
+
+mfn_mapper_t * mfn_mapper_init(int xc_handle, domid_t dom, int size, int prot);
+
+void * mfn_mapper_base(mfn_mapper_t *t);
+
+void mfn_mapper_close(mfn_mapper_t *t);
+
+int mfn_mapper_flush_queue(mfn_mapper_t *t);
+
+void * mfn_mapper_queue_entry(mfn_mapper_t *t, int offset,
+ unsigned long mfn, int size );
+
+/*********************/
+
+
+#if 0
+typedef struct mfn_typer {
+ domid_t dom;
+ int xc_handle;
+ int max;
+ dom0_op_t op;
+} mfn_typer_t;
+
+
+mfn_typer_t *mfn_typer_init(int xc_handle, domid_t dom, int num );
+
+void mfn_typer_queue_entry(mfn_typer_t *t, unsigned long mfn );
+
+int mfn_typer_flush_queue(mfn_typer_t *t);
+#endif
+
+int get_pfn_type_batch(int xc_handle,
+ u64 dom, int num, unsigned long *arr);
+
+unsigned int get_pfn_type(int xc_handle,
+ unsigned long mfn,
+ u64 dom);
+
+
#endif /* __XC_PRIVATE_H__ */
diff --git a/tools/xend/lib/blkif.py b/tools/xend/lib/blkif.py
new file mode 100644
index 0000000000..94e058f7ce
--- /dev/null
+++ b/tools/xend/lib/blkif.py
@@ -0,0 +1,143 @@
+
+#################################################################
+## xend/blkif.py -- Block-interface management functions for Xend
+## Copyright (c) 2004, K A Fraser (University of Cambridge)
+#################################################################
+
+import errno, re, os, select, signal, socket, struct, sys
+import xend.main, xend.console, xend.manager, xend.utils, Xc
+
+CMSG_BLKIF_BE = 1
+CMSG_BLKIF_FE = 2
+CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED = 0
+CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED = 32
+CMSG_BLKIF_FE_INTERFACE_CONNECT = 33
+CMSG_BLKIF_FE_INTERFACE_DISCONNECT = 34
+CMSG_BLKIF_BE_CREATE = 0
+CMSG_BLKIF_BE_DESTROY = 1
+CMSG_BLKIF_BE_CONNECT = 2
+CMSG_BLKIF_BE_DISCONNECT = 3
+CMSG_BLKIF_BE_VBD_CREATE = 4
+CMSG_BLKIF_BE_VBD_DESTROY = 5
+CMSG_BLKIF_BE_VBD_GROW = 6
+CMSG_BLKIF_BE_VBD_SHRINK = 7
+
+pendmsg = None
+pendaddr = None
+
+def backend_tx_req(msg):
+ port = xend.main.dom0_port
+ if port.space_to_write_request():
+ port.write_request(msg)
+ port.notify()
+ else:
+ xend.blkif.pendmsg = msg
+
+def backend_rx_req(port, msg):
+ port.write_response(msg)
+
+def backend_rx_rsp(port, msg):
+ subtype = (msg.get_header())['subtype']
+ print "Received blkif-be response, subtype %d" % subtype
+ if subtype == CMSG_BLKIF_BE_CREATE:
+ rsp = { 'success': True }
+ xend.main.send_management_response(rsp, xend.blkif.pendaddr)
+ elif subtype == CMSG_BLKIF_BE_CONNECT:
+ (dom,hnd,evtchn,frame,st) = struct.unpack("QIILI", msg.get_payload())
+ blkif = interface.list[xend.main.port_from_dom(dom).local_port]
+ msg = xend.utils.message(CMSG_BLKIF_FE, \
+ CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED, 0)
+ msg.append_payload(struct.pack("III",0,2,blkif.evtchn['port2']))
+ blkif.ctrlif_tx_req(xend.main.port_list[blkif.key], msg)
+ elif subtype == CMSG_BLKIF_BE_VBD_CREATE:
+ (dom,hnd,vdev,ro,st) = struct.unpack("QIHII", msg.get_payload())
+ blkif = interface.list[xend.main.port_from_dom(dom).local_port]
+ (pdev, start_sect, nr_sect, readonly) = blkif.devices[vdev]
+ msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_GROW, 0)
+ msg.append_payload(struct.pack("QIHHHQQI",dom,0,vdev,0, \
+ pdev,start_sect,nr_sect,0))
+ backend_tx_req(msg)
+ elif subtype == CMSG_BLKIF_BE_VBD_GROW:
+ rsp = { 'success': True }
+ xend.main.send_management_response(rsp, xend.blkif.pendaddr)
+
+def backend_do_work(port):
+ global pendmsg
+ if pendmsg and port.space_to_write_request():
+ port.write_request(pendmsg)
+ pendmsg = None
+ return True
+ return False
+
+
+class interface:
+
+ # Dictionary of all block-device interfaces.
+ list = {}
+
+
+ # NB. 'key' is an opaque value that has no meaning in this class.
+ def __init__(self, dom, key):
+ self.dom = dom
+ self.key = key
+ self.devices = {}
+ self.pendmsg = None
+ interface.list[key] = self
+ msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_CREATE, 0)
+ msg.append_payload(struct.pack("QII",dom,0,0))
+ xend.blkif.pendaddr = xend.main.mgmt_req_addr
+ backend_tx_req(msg)
+
+ # Attach a device to the specified interface
+ def attach_device(self, vdev, pdev, start_sect, nr_sect, readonly):
+ if self.devices.has_key(vdev):
+ return False
+ self.devices[vdev] = (pdev, start_sect, nr_sect, readonly)
+ msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_CREATE, 0)
+ msg.append_payload(struct.pack("QIHII",self.dom,0,vdev,readonly,0))
+ xend.blkif.pendaddr = xend.main.mgmt_req_addr
+ backend_tx_req(msg)
+ return True
+
+
+ # Completely destroy this interface.
+ def destroy(self):
+ del interface.list[self.key]
+ msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_DESTROY, 0)
+ msg.append_payload(struct.pack("QII",self.dom,0,0))
+ backend_tx_req(msg)
+
+
+ # The parameter @port is the control-interface event channel. This method
+ # returns True if messages were written to the control interface.
+ def ctrlif_transmit_work(self, port):
+ if self.pendmsg and port.space_to_write_request():
+ port.write_request(self.pendmsg)
+ self.pendmsg = None
+ return True
+ return False
+
+ def ctrlif_tx_req(self, port, msg):
+ if port.space_to_write_request():
+ port.write_request(msg)
+ port.notify()
+ else:
+ self.pendmsg = msg
+
+ def ctrlif_rx_req(self, port, msg):
+ port.write_response(msg)
+ subtype = (msg.get_header())['subtype']
+ if subtype == CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED:
+ msg = xend.utils.message(CMSG_BLKIF_FE, \
+ CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED, 0)
+ msg.append_payload(struct.pack("III",0,1,0))
+ self.ctrlif_tx_req(port, msg)
+ elif subtype == CMSG_BLKIF_FE_INTERFACE_CONNECT:
+ (hnd,frame) = struct.unpack("IL", msg.get_payload())
+ xc = Xc.new()
+ self.evtchn = xc.evtchn_bind_interdomain(dom1=0,dom2=self.dom)
+ msg = xend.utils.message(CMSG_BLKIF_BE, \
+ CMSG_BLKIF_BE_CONNECT, 0)
+ msg.append_payload(struct.pack("QIILI",self.dom,0, \
+ self.evtchn['port1'],frame,0))
+ backend_tx_req(msg)
diff --git a/tools/xend/lib/console.py b/tools/xend/lib/console.py
index aad6069979..57898817f5 100644
--- a/tools/xend/lib/console.py
+++ b/tools/xend/lib/console.py
@@ -5,7 +5,7 @@
#############################################################
import errno, re, os, select, signal, socket, struct, sys
-
+import xend.blkif, xend.main, xend.manager, xend.utils, Xc
##
## interface:
@@ -16,7 +16,7 @@ import errno, re, os, select, signal, socket, struct, sys
## CONNECTED: sending/receiving console data on TCP port 'self.port'
##
## A dictionary of all active interfaces, indexed by TCP socket descriptor,
-## is accessible as 'interface.interface_list'.
+## is accessible as 'interface.list_by_fd'.
##
## NB. When a class instance is to be destroyed you *must* call the 'close'
## method. Otherwise a stale reference will eb left in the interface list.
@@ -30,7 +30,11 @@ class interface:
# Dictionary of all active (non-closed) console interfaces.
- interface_list = {}
+ list_by_fd = {}
+
+
+ # Dictionary of all console interfaces, closed and open.
+ list = {}
# NB. 'key' is an opaque value that has no meaning in this class.
@@ -38,6 +42,9 @@ class interface:
self.status = interface.CLOSED
self.port = port
self.key = key
+ self.rbuf = xend.utils.buffer()
+ self.wbuf = xend.utils.buffer()
+ interface.list[key] = self
# Is this interface closed (inactive)?
@@ -58,14 +65,14 @@ class interface:
# Close the interface, if it is not closed already.
def close(self):
if not self.closed():
- del interface.interface_list[self.sock.fileno()]
+ del interface.list_by_fd[self.sock.fileno()]
self.sock.close()
del self.sock
self.status = interface.CLOSED
# Move the interface into the 'listening' state. Opens a new listening
- # socket and updates 'interface_list'.
+ # socket and updates 'list_by_fd'.
def listen(self):
# Close old socket (if any), and create a fresh one.
self.close()
@@ -80,7 +87,7 @@ class interface:
# Announce the new status of thsi interface.
self.status = interface.LISTENING
- interface.interface_list[self.sock.fileno()] = self
+ interface.list_by_fd[self.sock.fileno()] = self
except:
# In case of trouble ensure we get rid of dangling socket reference
@@ -105,7 +112,69 @@ class interface:
# Publish the new socket and the new interface state.
self.sock = sock
self.status = interface.CONNECTED
- interface.interface_list[self.sock.fileno()] = self
+ interface.list_by_fd[self.sock.fileno()] = self
return 1
+ # Completely sestroy a console interface.
+ def destroy(self):
+ self.close()
+ del interface.list[self.key]
+
+
+ # Do work triggered by resource availability on a console-interface socket.
+ def socket_work(self):
+ # If the interface is listening, check for pending connections.
+ if self.listening():
+ self.connect()
+
+ # All done if the interface is not connected.
+ if not self.connected():
+ return
+
+ # Send as much pending data as possible via the socket.
+ while not self.rbuf.empty():
+ try:
+ bytes = self.sock.send(self.rbuf.peek())
+ if bytes > 0:
+ self.rbuf.discard(bytes)
+ except socket.error, error:
+ pass
+
+ # Read as much data as is available. Don't worry about
+ # overflowing our buffer: it's more important to read the
+ # incoming data stream and detect errors or closure of the
+ # remote end in a timely manner.
+ try:
+ while 1:
+ data = self.sock.recv(2048)
+ # Return of zero means the remote end has disconnected.
+ # We therefore return the console interface to listening.
+ if not data:
+ self.listen()
+ break
+ self.wbuf.write(data)
+ except socket.error, error:
+ # Assume that most errors mean that the connection is dead.
+ # In such cases we return the interface to 'listening' state.
+ if error[0] != errno.EAGAIN:
+ print "Better return to listening"
+ self.listen()
+ print "New status: " + str(self.status)
+
+
+ # The parameter @port is the control-interface event channel. This method
+ # returns True if messages were written to the control interface.
+ def ctrlif_transmit_work(self, port):
+ work_done = False
+ while not self.wbuf.empty() and port.space_to_write_request():
+ msg = xend.utils.message(0, 0, 0)
+ msg.append_payload(self.wbuf.read(msg.MAX_PAYLOAD))
+ port.write_request(msg)
+ work_done = True
+ return work_done
+
+
+ def ctrlif_rx_req(self, port, msg):
+ self.rbuf.write(msg.get_payload())
+ port.write_response(msg)
diff --git a/tools/xend/lib/domain_controller.h b/tools/xend/lib/domain_controller.h
index d9ea7d6160..68d4fac1d2 100644
--- a/tools/xend/lib/domain_controller.h
+++ b/tools/xend/lib/domain_controller.h
@@ -76,8 +76,8 @@ typedef struct {
/* Messages from guest to domain controller. */
#define CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED 32
-#define CMSG_BLKIF_FE_INTERFACE_UP 33
-#define CMSG_BLKIF_FE_INTERFACE_DOWN 34
+#define CMSG_BLKIF_FE_INTERFACE_CONNECT 33
+#define CMSG_BLKIF_FE_INTERFACE_DISCONNECT 34
/* These are used by both front-end and back-end drivers. */
#define blkif_vdev_t u16
@@ -91,13 +91,13 @@ typedef struct {
* 1. The shared-memory frame is available for reuse.
* 2. Any unacknowledged messgaes pending on the interface were dropped.
*/
-#define BLKIF_INTERFACE_STATUS_DESTROYED 0 /* Interface doesn't exist. */
-#define BLKIF_INTERFACE_STATUS_DOWN 1 /* Interface exists but is down. */
-#define BLKIF_INTERFACE_STATUS_UP 2 /* Interface exists and is up. */
+#define BLKIF_INTERFACE_STATUS_DESTROYED 0 /* Interface doesn't exist. */
+#define BLKIF_INTERFACE_STATUS_DISCONNECTED 1 /* Exists but is disconnected. */
+#define BLKIF_INTERFACE_STATUS_CONNECTED 2 /* Exists and is connected. */
typedef struct {
unsigned int handle;
unsigned int status;
- unsigned int evtchn; /* status == BLKIF_INTERFACE_STATUS_UP */
+ unsigned int evtchn; /* status == BLKIF_INTERFACE_STATUS_CONNECTED */
} blkif_fe_interface_status_changed_t;
/*
@@ -109,30 +109,37 @@ typedef struct {
* If the driver goes DOWN while interfaces are still UP, the domain
* will automatically take the interfaces DOWN.
*/
-#define BLKIF_DRIVER_STATUS_DOWN 0
-#define BLKIF_DRIVER_STATUS_UP 1
+#define BLKIF_DRIVER_STATUS_DOWN 0
+#define BLKIF_DRIVER_STATUS_UP 1
typedef struct {
unsigned int status; /* BLKIF_DRIVER_STATUS_??? */
} blkif_fe_driver_status_changed_t;
/*
- * CMSG_BLKIF_FE_INTERFACE_UP:
- * If successful, the domain controller will acknowledge with a STATUS_UP
- * message.
+ * CMSG_BLKIF_FE_INTERFACE_CONNECT:
+ * If successful, the domain controller will acknowledge with a
+ * STATUS_CONNECTED message.
*/
typedef struct {
unsigned int handle;
unsigned long shmem_frame;
-} blkif_fe_interface_up_t;
+} blkif_fe_interface_connect_t;
/*
- * CMSG_BLKIF_FE_INTERFACE_DOWN:
- * If successful, the domain controller will acknowledge with a STATUS_DOWN
- * message.
+ * CMSG_BLKIF_FE_INTERFACE_DISCONNECT:
+ * If successful, the domain controller will acknowledge with a
+ * STATUS_DISCONNECTED message.
*/
typedef struct {
+ /* IN */
unsigned int handle;
-} blkif_fe_interface_down_t;
+ /* OUT */
+ /*
+ * Tells driver how many interfaces it should expect to immediately
+ * receive notifications about.
+ */
+ unsigned int nr_interfaces;
+} blkif_fe_interface_disconnect_t;
/******************************************************************************
@@ -142,10 +149,12 @@ typedef struct {
/* Messages from domain controller. */
#define CMSG_BLKIF_BE_CREATE 0 /* Create a new block-device interface. */
#define CMSG_BLKIF_BE_DESTROY 1 /* Destroy a block-device interface. */
-#define CMSG_BLKIF_BE_VBD_CREATE 2 /* Create a new VBD for an interface. */
-#define CMSG_BLKIF_BE_VBD_DESTROY 3 /* Delete a VBD from an interface. */
-#define CMSG_BLKIF_BE_VBD_GROW 4 /* Append an extent to a given VBD. */
-#define CMSG_BLKIF_BE_VBD_SHRINK 5 /* Remove last extent from a given VBD. */
+#define CMSG_BLKIF_BE_CONNECT 2 /* Connect i/f to remote driver. */
+#define CMSG_BLKIF_BE_DISCONNECT 3 /* Disconnect i/f from remote driver. */
+#define CMSG_BLKIF_BE_VBD_CREATE 4 /* Create a new VBD for an interface. */
+#define CMSG_BLKIF_BE_VBD_DESTROY 5 /* Delete a VBD from an interface. */
+#define CMSG_BLKIF_BE_VBD_GROW 6 /* Append an extent to a given VBD. */
+#define CMSG_BLKIF_BE_VBD_SHRINK 7 /* Remove last extent from a given VBD. */
/* Messages to domain controller. */
#define CMSG_BLKIF_BE_DRIVER_STATUS_CHANGED 32
@@ -167,36 +176,36 @@ typedef struct {
/* The following are specific error returns. */
#define BLKIF_BE_STATUS_INTERFACE_EXISTS 2
#define BLKIF_BE_STATUS_INTERFACE_NOT_FOUND 3
-#define BLKIF_BE_STATUS_VBD_EXISTS 4
-#define BLKIF_BE_STATUS_VBD_NOT_FOUND 5
-#define BLKIF_BE_STATUS_OUT_OF_MEMORY 6
-#define BLKIF_BE_STATUS_EXTENT_NOT_FOUND 7
-#define BLKIF_BE_STATUS_MAPPING_ERROR 8
+#define BLKIF_BE_STATUS_INTERFACE_CONNECTED 4
+#define BLKIF_BE_STATUS_VBD_EXISTS 5
+#define BLKIF_BE_STATUS_VBD_NOT_FOUND 6
+#define BLKIF_BE_STATUS_OUT_OF_MEMORY 7
+#define BLKIF_BE_STATUS_EXTENT_NOT_FOUND 8
+#define BLKIF_BE_STATUS_MAPPING_ERROR 9
/* This macro can be used to create an array of descriptive error strings. */
-#define BLKIF_BE_STATUS_ERRORS { \
- "Okay", \
- "Non-specific error", \
- "Interface already exists", \
- "Interface not found", \
- "VBD already exists", \
- "VBD not found", \
- "Out of memory", \
- "Extent not found for VBD", \
+#define BLKIF_BE_STATUS_ERRORS { \
+ "Okay", \
+ "Non-specific error", \
+ "Interface already exists", \
+ "Interface not found", \
+ "Interface is still connected", \
+ "VBD already exists", \
+ "VBD not found", \
+ "Out of memory", \
+ "Extent not found for VBD", \
"Could not map domain memory" }
/*
* CMSG_BLKIF_BE_CREATE:
* When the driver sends a successful response then the interface is fully
- * set up. The controller will send an UP notification to the front-end
+ * created. The controller will send a DOWN notification to the front-end
* driver.
*/
typedef struct {
/* IN */
domid_t domid; /* Domain attached to new interface. */
unsigned int blkif_handle; /* Domain-specific interface handle. */
- unsigned int evtchn; /* Event channel for notifications. */
- unsigned long shmem_frame; /* Page cont. shared comms window. */
/* OUT */
unsigned int status;
} blkif_be_create_t;
@@ -204,8 +213,8 @@ typedef struct {
/*
* CMSG_BLKIF_BE_DESTROY:
* When the driver sends a successful response then the interface is fully
- * torn down. The controller will send a DOWN notification to the front-end
- * driver.
+ * torn down. The controller will send a DESTROYED notification to the
+ * front-end driver.
*/
typedef struct {
/* IN */
@@ -215,6 +224,36 @@ typedef struct {
unsigned int status;
} blkif_be_destroy_t;
+/*
+ * CMSG_BLKIF_BE_CONNECT:
+ * When the driver sends a successful response then the interface is fully
+ * connected. The controller will send a CONNECTED notification to the
+ * front-end driver.
+ */
+typedef struct {
+ /* IN */
+ domid_t domid; /* Domain attached to new interface. */
+ unsigned int blkif_handle; /* Domain-specific interface handle. */
+ unsigned int evtchn; /* Event channel for notifications. */
+ unsigned long shmem_frame; /* Page cont. shared comms window. */
+ /* OUT */
+ unsigned int status;
+} blkif_be_connect_t;
+
+/*
+ * CMSG_BLKIF_BE_DISCONNECT:
+ * When the driver sends a successful response then the interface is fully
+ * disconnected. The controller will send a DOWN notification to the front-end
+ * driver.
+ */
+typedef struct {
+ /* IN */
+ domid_t domid; /* Domain attached to new interface. */
+ unsigned int blkif_handle; /* Domain-specific interface handle. */
+ /* OUT */
+ unsigned int status;
+} blkif_be_disconnect_t;
+
/* CMSG_BLKIF_BE_VBD_CREATE */
typedef struct {
/* IN */
@@ -264,7 +303,14 @@ typedef struct {
* will automatically send DOWN notifications.
*/
typedef struct {
+ /* IN */
unsigned int status; /* BLKIF_DRIVER_STATUS_??? */
+ /* OUT */
+ /*
+ * Tells driver how many interfaces it should expect to immediately
+ * receive notifications about.
+ */
+ unsigned int nr_interfaces;
} blkif_be_driver_status_changed_t;
#endif /* __DOMAIN_CONTROLLER_H__ */
diff --git a/tools/xend/lib/main.py b/tools/xend/lib/main.py
index b870af55d1..7b5adbab83 100755
--- a/tools/xend/lib/main.py
+++ b/tools/xend/lib/main.py
@@ -5,7 +5,7 @@
###########################################################
import errno, re, os, pwd, select, signal, socket, struct, sys, time
-import xend.console, xend.manager, xend.utils, Xc
+import xend.blkif, xend.console, xend.manager, xend.utils, Xc
# The following parameters could be placed in a configuration file.
@@ -16,13 +16,35 @@ CONTROL_DIR = '/var/run/xend'
UNIX_SOCK = 'management_sock' # relative to CONTROL_DIR
+CMSG_CONSOLE = 0
+CMSG_BLKIF_BE = 1
+CMSG_BLKIF_FE = 2
+
+
+def port_from_dom(dom):
+ global port_list
+ for idx, port in port_list.items():
+ if port.remote_dom == dom:
+ return port
+ return None
+
+
+def send_management_response(response, addr):
+ try:
+ response = str(response)
+ print "Mgmt_rsp[%s]: %s" % (addr, response)
+ management_interface.sendto(response, addr)
+ except socket.error, error:
+ pass
+
+
def daemon_loop():
# Could we do this more nicely? The xend.manager functions need access
# to this global state to do their work.
- global control_list, notifier
+ global port_list, notifier, management_interface, mgmt_req_addr, dom0_port
- # List of all control interfaces, indexed by local event-channel port.
- control_list = {}
+ # Lists of all interfaces, indexed by local event-channel port.
+ port_list = {}
xc = Xc.new()
@@ -46,13 +68,10 @@ def daemon_loop():
# The DOM0 control interface is not set up via the management interface.
# Note that console messages don't come our way (actually, only driver
- # back-ends should use the DOM0 control interface) -- the console
- # structures are dummies.
+ # back-ends should use the DOM0 control interface).
dom0_port = xend.utils.port(0)
- xend.main.notifier.bind(dom0_port.local_port)
- xend.main.control_list[dom0_port.local_port] = \
- (dom0_port, xend.utils.buffer(), xend.utils.buffer(), \
- xend.console.interface(0, dom0_port.local_port))
+ notifier.bind(dom0_port.local_port)
+ port_list[dom0_port.local_port] = dom0_port
##
## MAIN LOOP
@@ -68,10 +87,10 @@ def daemon_loop():
waitset = select.poll()
waitset.register(management_interface, select.POLLIN)
waitset.register(notifier, select.POLLIN)
- for idx, (port, rbuf, wbuf, con_if) in control_list.items():
+ for idx, con_if in xend.console.interface.list_by_fd.items():
if not con_if.closed():
pflags = select.POLLIN
- if not rbuf.empty() and con_if.connected():
+ if not con_if.rbuf.empty() and con_if.connected():
pflags = select.POLLIN | select.POLLOUT
waitset.register(con_if.sock.fileno(), pflags)
@@ -82,16 +101,16 @@ def daemon_loop():
# These should consist of executable Python statements that call
# well-known management functions (e.g., new_control_interface(dom=9)).
try:
- data, addr = management_interface.recvfrom(2048)
+ data, mgmt_req_addr = management_interface.recvfrom(2048)
except socket.error, error:
if error[0] != errno.EAGAIN:
raise
else:
- if addr:
+ if mgmt_req_addr:
# Evaluate the request in an exception-trapping sandbox.
try:
- print "Mgmt_req[%s]: %s" % (addr, data)
- response = str(eval('xend.manager.'+data))
+ print "Mgmt_req[%s]: %s" % (mgmt_req_addr, data)
+ response = eval('xend.manager.'+data)
except:
# Catch all exceptions and turn into an error response:
@@ -107,69 +126,20 @@ def daemon_loop():
response = str(response)
# Try to send a response to the requester.
- try:
- print "Mgmt_rsp[%s]: %s" % (addr, response)
- management_interface.sendto(response, addr)
- except socket.error, error:
- pass
+ if response:
+ send_management_response(response, mgmt_req_addr)
# Do work for every console interface that hit in the poll set.
for (fd, events) in fdset:
- if not xend.console.interface.interface_list.has_key(fd):
- continue
- con_if = xend.console.interface.interface_list[fd]
-
- # If the interface is listening, check for pending connections.
- if con_if.listening():
- con_if.connect()
-
- # All done if the interface is not connected.
- if not con_if.connected():
- continue
- (port, rbuf, wbuf, con_if) = control_list[con_if.key]
-
- # Send as much pending data as possible via the socket.
- while not rbuf.empty():
- try:
- bytes = con_if.sock.send(rbuf.peek())
- if bytes > 0:
- rbuf.discard(bytes)
- except socket.error, error:
- pass
-
- # Read as much data as is available. Don't worry about
- # overflowing our buffer: it's more important to read the
- # incoming data stream and detect errors or closure of the
- # remote end in a timely manner.
- try:
- while 1:
- data = con_if.sock.recv(2048)
- # Return of zero means the remote end has disconnected.
- # We therefore return the console interface to listening.
- if not data:
- con_if.listen()
- break
- wbuf.write(data)
- except socket.error, error:
- # Assume that most errors mean that the connection is dead.
- # In such cases we return the interface to 'listening' state.
- if error[0] != errno.EAGAIN:
- print "Better return to listening"
- con_if.listen()
- print "New status: " + str(con_if.status)
-
- # We may now have pending data to send via the relevant
- # inter-domain control interface. If so then we send all we can
- # and notify the remote end.
- work_done = False
- while not wbuf.empty() and port.space_to_write_request():
- msg = xend.utils.message(0, 0, 0)
- msg.append_payload(wbuf.read(msg.MAX_PAYLOAD))
- port.write_request(msg)
- work_done = True
- if work_done:
- port.notify()
-
+ if xend.console.interface.list_by_fd.has_key(fd):
+ con_if = xend.console.interface.list_by_fd[fd]
+ con_if.socket_work()
+ # We may now have pending data to send via the control
+ # interface. If so then send all we can and notify the remote.
+ port = port_list[con_if.key]
+ if con_if.ctrlif_transmit_work(port):
+ port.notify()
+
# Process control-interface notifications from other guest OSes.
while 1:
# Grab a notification, if there is one.
@@ -178,42 +148,69 @@ def daemon_loop():
break
(idx, type) = notification
- if not control_list.has_key(idx):
+ if not port_list.has_key(idx):
continue
- (port, rbuf, wbuf, con_if) = control_list[idx]
+ port = port_list[idx]
work_done = False
+ con_if = False
+ if xend.console.interface.list.has_key(idx):
+ con_if = xend.console.interface.list[idx]
+
+ blk_if = False
+ if xend.blkif.interface.list.has_key(idx):
+ blk_if = xend.blkif.interface.list[idx]
+
# If we pick up a disconnect notification then we do any necessary
# cleanup.
if type == notifier.EXCEPTION:
ret = xc.evtchn_status(idx)
if ret['status'] == 'unbound':
notifier.unbind(idx)
- con_if.close()
- del control_list[idx], port, rbuf, wbuf, con_if
+ del port_list[idx], port
+ if con_if:
+ con_if.destroy()
+ del con_if
+ if blk_if:
+ blk_if.destroy()
+ del blk_if
continue
- # Read incoming requests. Currently assume that request
- # message always containb console data.
+ # Process incoming requests.
while port.request_to_read():
msg = port.read_request()
- rbuf.write(msg.get_payload())
- port.write_response(msg)
work_done = True
-
- # Incoming responses are currently thrown on the floor.
+ type = (msg.get_header())['type']
+ if type == CMSG_CONSOLE and con_if:
+ con_if.ctrlif_rx_req(port, msg)
+ elif type == CMSG_BLKIF_FE and blk_if:
+ blk_if.ctrlif_rx_req(port, msg)
+ elif type == CMSG_BLKIF_BE and port == dom0_port:
+ xend.blkif.backend_rx_req(port, msg)
+ else:
+ port.write_response(msg)
+
+ # Process incoming responses.
while port.response_to_read():
msg = port.read_response()
work_done = True
+ type = (msg.get_header())['type']
+ if type == CMSG_BLKIF_BE and port == dom0_port:
+ xend.blkif.backend_rx_rsp(port, msg)
+
+ # Send console data.
+ if con_if and con_if.ctrlif_transmit_work(port):
+ work_done = True
- # Send as much pending console data as there is room for.
- while not wbuf.empty() and port.space_to_write_request():
- msg = xend.utils.message(0, 0, 0)
- msg.append_payload(wbuf.read(msg.MAX_PAYLOAD))
- port.write_request(msg)
+ # Send blkif messages.
+ if blk_if and blk_if.ctrlif_transmit_work(port):
work_done = True
+ # Back-end block-device work.
+ if port == dom0_port and xend.blkif.backend_do_work(port):
+ work_done = True
+
# Finally, notify the remote end of any work that we did.
if work_done:
port.notify()
diff --git a/tools/xend/lib/manager.py b/tools/xend/lib/manager.py
index 42d66d3a95..ea7398cd4c 100644
--- a/tools/xend/lib/manager.py
+++ b/tools/xend/lib/manager.py
@@ -4,13 +4,13 @@
## Copyright (c) 2004, K A Fraser (University of Cambridge)
#############################################################
-import xend.console, xend.main, xend.utils
+import xend.blkif, xend.console, xend.main, xend.utils
##
## new_control_interface:
-## Create a new control interface with the specified domain 'dom'.
-## The console port may also be specified; otehrwise a suitable port is
+## Create a new control interface with the specified domain @dom.
+## The console port may also be specified; otherwise a suitable port is
## automatically allocated.
##
def new_control_interface(dom, console_port=-1):
@@ -26,9 +26,8 @@ def new_control_interface(dom, console_port=-1):
con_if = xend.console.interface(console_port, port.local_port)
con_if.listen()
- # Add control state to the master list.
- xend.main.control_list[port.local_port] = \
- (port, xend.utils.buffer(), xend.utils.buffer(), con_if)
+ # Update the master port list.
+ xend.main.port_list[port.local_port] = port
# Construct the successful response to be returned to the requester.
response = { 'success': True }
@@ -36,3 +35,81 @@ def new_control_interface(dom, console_port=-1):
response['remote_port'] = port.remote_port
response['console_port'] = console_port
return response
+
+
+##
+## new_block_interface:
+## Create a new block interface for the specified domain @dom.
+##
+def new_block_interface(dom, handle=-1):
+ # By default we create an interface with handle zero.
+ if handle < 0:
+ handle = 0
+
+ # We only support one interface per domain, which must have handle zero.
+ if handle != 0:
+ response = { 'success': False }
+ response['error_type'] = 'Bad handle %d (only handle 0 ' + \
+ 'is supported)' % handle
+ return response
+
+ # Find local event-channel port associated with the specified domain.
+ port = xend.main.port_from_dom(dom)
+ if not port:
+ response = { 'success': False }
+ response['error_type'] = 'Unknown domain %d' % dom
+ return response
+
+ # The interface must not already exist.
+ if xend.blkif.interface.list.has_key(port.local_port):
+ response = { 'success': False }
+ response['error_type'] = 'Interface (dom=%d,handle=%d) already ' + \
+ 'exists' % (dom, handle)
+ return response
+
+ # Create the new interface. Initially no virtual devices are attached.
+ xend.blkif.interface(dom, port.local_port)
+
+ # Response is deferred until back-end driver sends acknowledgement.
+ return None
+
+
+##
+## new_block_device:
+## Attach a new virtual block device to the specified block interface
+## (@dom, @handle). The new device is identified by @vdev, and maps to
+## the real block extent (@pdev, @start_sect, @nr_sect). If @readonly then
+## write requests to @vdev will be rejected.
+##
+def new_block_device(dom, handle, vdev, pdev, start_sect, nr_sect, readonly):
+ # We only support one interface per domain, which must have handle zero.
+ if handle != 0:
+ response = { 'success': False }
+ response['error_type'] = 'Bad handle %d (only handle 0 ' + \
+ 'is supported)' % handle
+ return response
+
+ # Find local event-channel port associated with the specified domain.
+ port = xend.main.port_from_dom(dom)
+ if not port:
+ response = { 'success': False }
+ response['error_type'] = 'Unknown domain %d' % dom
+ return response
+
+ # The interface must exist.
+ if not xend.blkif.interface.list.has_key(port.local_port):
+ response = { 'success': False }
+ response['error_type'] = 'Interface (dom=%d,handle=%d) does not ' + \
+ 'exists' % (dom, handle)
+ return response
+
+ # The virtual device must not yet exist.
+ blkif = xend.blkif.interface.list[port.local_port]
+ if not blkif.attach_device(vdev, pdev, start_sect, nr_sect, readonly):
+ response = { 'success': False }
+ response['error_type'] = 'Vdevice (dom=%d,handle=%d,vdevice=%d) ' + \
+ 'already exists' % (dom, handle, vdev)
+ return response
+
+ # Response is deferred until back-end driver sends acknowledgement.
+ return None
diff --git a/tools/xend/lib/utils.c b/tools/xend/lib/utils.c
index c28d682ec9..297976e9be 100644
--- a/tools/xend/lib/utils.c
+++ b/tools/xend/lib/utils.c
@@ -22,11 +22,11 @@
#include <signal.h>
#include <xc.h>
-#include <asm-xen/proc_cmd.h>
-
#include <hypervisor-if.h>
#include "domain_controller.h"
+#include <asm-xen/proc_cmd.h>
+
/* Needed for Python versions earlier than 2.3. */
#ifndef PyMODINIT_FUNC
#define PyMODINIT_FUNC DL_EXPORT(void)