aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>2004-05-02 00:52:45 +0000
committeriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>2004-05-02 00:52:45 +0000
commit3882a78594317ddcaeb97e585080a998ab8a17fe (patch)
tree2d0182dc449cbd96e404e3b94f8964e393fd39f0
parent685d71bab181325dd4f5a5965314fa647fdf68a6 (diff)
parentf7a3ec9450a8f4aeb43bfa4ad65636325e756a61 (diff)
downloadxen-3882a78594317ddcaeb97e585080a998ab8a17fe.tar.gz
xen-3882a78594317ddcaeb97e585080a998ab8a17fe.tar.bz2
xen-3882a78594317ddcaeb97e585080a998ab8a17fe.zip
bitkeeper revision 1.885 (4094465dmX2TefiJicy4taNzc0jiaw)
manual merge
-rw-r--r--.rootkeys5
-rwxr-xr-xtools/examples/xc_dom_create.py127
-rw-r--r--tools/xc/lib/xc.h18
-rw-r--r--tools/xc/lib/xc_linux_build.c16
-rw-r--r--tools/xc/lib/xc_netbsd_build.c10
-rw-r--r--tools/xc/py/Xc.c28
-rw-r--r--tools/xend/lib/blkif.py143
-rw-r--r--tools/xend/lib/console.py83
-rw-r--r--tools/xend/lib/domain_controller.h217
-rwxr-xr-xtools/xend/lib/main.py177
-rw-r--r--tools/xend/lib/manager.py89
-rw-r--r--tools/xend/lib/utils.c24
-rw-r--r--tools/xend/setup.py3
-rw-r--r--xen/arch/i386/pdb-stub.c8
-rw-r--r--xen/common/debug-linux.c38
-rw-r--r--xen/common/event_channel.c5
-rw-r--r--xen/common/kernel.c4
-rw-r--r--xen/common/physdev.c93
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h45
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c53
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c214
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c282
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c322
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h23
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile2
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/common.h (renamed from xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h)24
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c (renamed from xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c)358
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c100
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c11
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c26
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c53
-rw-r--r--xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h2
-rw-r--r--xenolinux-2.4.26-sparse/include/asm-xen/io.h36
-rw-r--r--xenolinux-2.4.26-sparse/include/asm-xen/pgtable-2level.h13
-rw-r--r--xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h18
-rw-r--r--xenolinux-2.4.26-sparse/mm/vmalloc.c9
36 files changed, 1742 insertions, 937 deletions
diff --git a/.rootkeys b/.rootkeys
index 74cc58825a..f73ce770b4 100644
--- a/.rootkeys
+++ b/.rootkeys
@@ -102,6 +102,7 @@
4055ee44Bu6oP7U0WxxXypbUt4dNPQ tools/xenctl/setup.py
40431ac64Hj4ixUnKmlugZKhXPFE_Q tools/xend/Makefile
4055ad95Se-FqttgxollqOAAHB94zA tools/xend/lib/__init__.py
+4092738fMRGC9fFBcPRCWaJaj9U3ag tools/xend/lib/blkif.py
4055ad97wMLUj0BZT0e_T0EwQN0Bvw tools/xend/lib/console.py
4048c0ddsF0WrU7HUzTvg1MJoCIfWg tools/xend/lib/domain_controller.h
4054a301VEag2GwrBrFBna5U1BGlLA tools/xend/lib/main.py
@@ -665,8 +666,8 @@
4087cf0dlv1Dw4MAbeRStPPG8IvPPg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c
40880cc6hHg6s2cPHbqPNQxENefjoQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h
4075806dI5kfeMD5RV-DA0PYoThx_w xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile
-4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c
-4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h
+4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/common.h
+4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c
4075806dibjCcfuXv6CINMhxWTw3jQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c
3e5a4e65iHEuC5sjFhj42XALYbLVRw xenolinux-2.4.26-sparse/arch/xen/drivers/block/Makefile
3e5a4e65pP5spJErBW69pJxSSdK9RA xenolinux-2.4.26-sparse/arch/xen/drivers/block/block.c
diff --git a/tools/examples/xc_dom_create.py b/tools/examples/xc_dom_create.py
index 799319c6a6..0fae2b251b 100755
--- a/tools/examples/xc_dom_create.py
+++ b/tools/examples/xc_dom_create.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python
import string, sys, os, time, socket, getopt, signal, syslog
-import Xc, xenctl.utils, xenctl.console_client
+import Xc, xenctl.utils, xenctl.console_client, re
config_dir = '/etc/xc/'
config_file = xc_config_file = config_dir + 'defaults'
@@ -195,6 +195,15 @@ output('VM cmdline : "%s"' % cmdline)
if dryrun:
sys.exit(1)
+##### HACK HACK HACK
+##### Until everyone moves to the new I/O world, and a more robust domain
+##### controller (xend), we use this little trick to discover whether we
+##### are in a testing environment for new I/O stuff.
+new_io_world = True
+for line in os.popen('cat /proc/interrupts').readlines():
+ if re.search('blkdev', line):
+ new_io_world = False
+
##### Code beyond this point is actually used to manage the mechanics of
##### starting (and watching if necessary) guest virtual machines.
@@ -228,19 +237,23 @@ def make_domain():
cmsg = 'new_control_interface(dom='+str(id)+', console_port='+str(console_port)+')'
- xend_response = xenctl.utils.xend_control_message(cmsg)
+ cons_response = xenctl.utils.xend_control_message(cmsg)
- if not xend_response['success']:
+ if not cons_response['success']:
print "Error creating initial event channel"
- print "Error type: " + xend_response['error_type']
- if xend_response['error_type'] == 'exception':
- print "Exception type: " + xend_response['exception_type']
- print "Exception value: " + xend_response['exception_value']
+ print "Error type: " + cons_response['error_type']
+ if cons_response['error_type'] == 'exception':
+ print "Exception type: " + cons_response['exception_type']
+ print "Exception value: " + cons_response['exception_value']
xc.domain_destroy ( dom=id )
sys.exit()
+ # will the domain have IO privileges?
+ if pci_device_list != []: io_priv = True
+ else: io_priv = False
+
if restore:
- ret = eval('xc.%s_restore ( dom=id, state_file=state_file, progress=1 )' % builder_fn)
+ ret = eval('xc.%s_restore ( dom=id, state_file=state_file, progress=1, io_priv=%d )' % (builder_fn, io_priv))
if ret < 0:
print "Error restoring domain"
print "Return code = " + str(ret)
@@ -248,7 +261,7 @@ def make_domain():
sys.exit()
else:
- ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline, control_evtchn=xend_response["remote_port"] )' % builder_fn)
+ ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline, control_evtchn=cons_response["remote_port"], io_priv=%d )' % (builder_fn, io_priv) )
if ret < 0:
print "Error building Linux guest OS: "
print "Return code = " + str(ret)
@@ -259,6 +272,18 @@ def make_domain():
# set the expertise level appropriately
xenctl.utils.VBD_EXPERT_MODE = vbd_expert
+
+ if new_io_world:
+ cmsg = 'new_block_interface(dom='+str(id)+')'
+ xend_response = xenctl.utils.xend_control_message(cmsg)
+ if not xend_response['success']:
+ print "Error creating block interface"
+ print "Error type: " + xend_response['error_type']
+ if xend_response['error_type'] == 'exception':
+ print "Exception type: " + xend_response['exception_type']
+ print "Exception val: " + xend_response['exception_value']
+ xc.domain_destroy ( dom=id )
+ sys.exit()
for ( uname, virt_name, rw ) in vbd_list:
virt_dev = xenctl.utils.blkdev_name_to_number( virt_name )
@@ -269,42 +294,70 @@ def make_domain():
xc.domain_destroy ( dom=id )
sys.exit()
- # check that setting up this VBD won't violate the sharing
- # allowed by the current VBD expertise level
- if xenctl.utils.vd_extents_validate(segments, rw=='w' or rw=='rw') < 0:
- xc.domain_destroy( dom = id )
- sys.exit()
+ if new_io_world:
+ if len(segments) > 1:
+ print "New I/O world cannot deal with multi-extent vdisks"
+ xc.domain_destroy ( dom=id )
+ sys.exit()
+ seg = segments[0]
+ cmsg = 'new_block_device(dom=' + str(id) + \
+ ',handle=0,vdev=' + str(virt_dev) + \
+ ',pdev=' + str(seg['device']) + \
+ ',start_sect=' + str(seg['start_sector']) + \
+ ',nr_sect=' + str(seg['nr_sectors']) + \
+ ',readonly=' + str(not re.match('w',rw)) + ')'
+ xend_response = xenctl.utils.xend_control_message(cmsg)
+ if not xend_response['success']:
+ print "Error creating virtual block device"
+ print "Error type: " + xend_response['error_type']
+ if xend_response['error_type'] == 'exception':
+ print "Exception type: " + xend_response['exception_type']
+ print "Exception val: " + xend_response['exception_value']
+ xc.domain_destroy ( dom=id )
+ sys.exit()
+ else:
+ # check that setting up this VBD won't violate the sharing
+ # allowed by the current VBD expertise level
+ if xenctl.utils.vd_extents_validate(segments,
+ rw=='w' or rw=='rw') < 0:
+ xc.domain_destroy( dom = id )
+ sys.exit()
- if xc.vbd_create( dom=id, vbd=virt_dev, writeable= rw=='w' or rw=='rw' ):
- print "Error creating VBD vbd=%d writeable=%d\n" % (virt_dev,rw)
- xc.domain_destroy ( dom=id )
- sys.exit()
+ if xc.vbd_create( dom=id, vbd=virt_dev,
+ writeable= rw=='w' or rw=='rw' ):
+ print "Error creating VBD %d (writeable=%d)\n" % (virt_dev,rw)
+ xc.domain_destroy ( dom=id )
+ sys.exit()
- if xc.vbd_setextents( dom=id,
- vbd=virt_dev,
- extents=segments):
- print "Error populating VBD vbd=%d\n" % virt_dev
- xc.domain_destroy ( dom=id )
- sys.exit()
-
- # setup virtual firewall rules for all aliases
- for ip in vfr_ipaddr:
- xenctl.utils.setup_vfr_rules_for_vif( id, 0, ip )
-
- # check for physical device access
- for (pci_bus, pci_dev, pci_func) in pci_device_list:
- if xc.physdev_pci_access_modify(
- dom=id, bus=pci_bus, dev=pci_dev, func=pci_func, enable=1 ) < 0:
- print "Non-fatal error enabling PCI device access."
- else:
- print "Enabled PCI access (%d:%d:%d)." % (pci_bus,pci_dev,pci_func)
+ if xc.vbd_setextents( dom=id,
+ vbd=virt_dev,
+ extents=segments):
+ print "Error populating VBD vbd=%d\n" % virt_dev
+ xc.domain_destroy ( dom=id )
+ sys.exit()
+
+ if not new_io_world:
+ # setup virtual firewall rules for all aliases
+ for ip in vfr_ipaddr:
+ xenctl.utils.setup_vfr_rules_for_vif( id, 0, ip )
+
+ if new_io_world:
+ # check for physical device access
+ for (pci_bus, pci_dev, pci_func) in pci_device_list:
+ if xc.physdev_pci_access_modify(
+ dom=id, bus=pci_bus, dev=pci_dev,
+ func=pci_func, enable=1 ) < 0:
+ print "Non-fatal error enabling PCI device access."
+ else:
+ print "Enabled PCI access (%d:%d:%d)." % \
+ (pci_bus,pci_dev,pci_func)
if xc.domain_start( dom=id ) < 0:
print "Error starting domain"
xc.domain_destroy ( dom=id )
sys.exit()
- return (id, xend_response['console_port'])
+ return (id, cons_response['console_port'])
# end of make_domain()
def mkpidfile():
diff --git a/tools/xc/lib/xc.h b/tools/xc/lib/xc.h
index 4afb905955..eb1b07da91 100644
--- a/tools/xc/lib/xc.h
+++ b/tools/xc/lib/xc.h
@@ -64,7 +64,7 @@ int xc_linux_save(int xc_handle,
int verbose);
int xc_linux_restore(int xc_handle,
- u64 domid,
+ u64 domid,
const char *state_file,
int verbose,
u64 *pdomid);
@@ -74,13 +74,15 @@ int xc_linux_build(int xc_handle,
const char *image_name,
const char *ramdisk_name,
const char *cmdline,
- unsigned int control_evtchn);
+ unsigned int control_evtchn,
+ int io_priv);
int xc_netbsd_build(int xc_handle,
u64 domid,
const char *image_name,
const char *cmdline,
- unsigned int control_evtchn);
+ unsigned int control_evtchn,
+ int io_priv);
int xc_bvtsched_global_set(int xc_handle,
unsigned long ctx_allow);
@@ -248,15 +250,15 @@ int xc_shadow_control(int xc_handle,
int xc_domain_setname(int xc_handle,
u64 domid,
- char *name);
+ char *name);
int xc_domain_setinitialmem(int xc_handle,
- u64 domid,
- unsigned int initial_memkb);
+ u64 domid,
+ unsigned int initial_memkb);
int xc_domain_setmaxmem(int xc_handle,
- u64 domid,
- unsigned int max_memkb);
+ u64 domid,
+ unsigned int max_memkb);
#endif /* __XC_H__ */
diff --git a/tools/xc/lib/xc_linux_build.c b/tools/xc/lib/xc_linux_build.c
index 42696666a8..67351210fc 100644
--- a/tools/xc/lib/xc_linux_build.c
+++ b/tools/xc/lib/xc_linux_build.c
@@ -74,7 +74,8 @@ static int setup_guestos(int xc_handle,
full_execution_context_t *ctxt,
const char *cmdline,
unsigned long shared_info_frame,
- unsigned int control_evtchn)
+ unsigned int control_evtchn,
+ int io_priv)
{
l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
@@ -268,7 +269,7 @@ static int setup_guestos(int xc_handle,
memset(start_info, 0, sizeof(*start_info));
start_info->nr_pages = nr_pages;
start_info->shared_info = shared_info_frame << PAGE_SHIFT;
- start_info->flags = 0;
+ start_info->flags = io_priv ? SIF_PRIVILEGED : 0;
start_info->pt_base = vpt_start;
start_info->nr_pt_frames = nr_pt_pages;
start_info->mfn_list = vphysmap_start;
@@ -381,7 +382,8 @@ int xc_linux_build(int xc_handle,
const char *image_name,
const char *ramdisk_name,
const char *cmdline,
- unsigned int control_evtchn)
+ unsigned int control_evtchn,
+ int io_priv)
{
dom0_op_t launch_op, op;
int initrd_fd = -1;
@@ -446,7 +448,7 @@ int xc_linux_build(int xc_handle,
&vstartinfo_start, &vkern_entry,
ctxt, cmdline,
op.u.getdomaininfo.shared_info_frame,
- control_evtchn) < 0 )
+ control_evtchn, io_priv) < 0 )
{
ERROR("Error constructing guest OS");
goto error_out;
@@ -560,13 +562,13 @@ static int readelfimage_base_and_size(char *elfbase,
if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
{
- ERROR("ELF program headers extend beyond end of image.");
+ ERROR("ELF program headers extend beyond end of image.");
return -EINVAL;
}
if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
{
- ERROR("ELF section headers extend beyond end of image.");
+ ERROR("ELF section headers extend beyond end of image.");
return -EINVAL;
}
@@ -642,7 +644,7 @@ static int loadelfimage(char *elfbase, int pmh, unsigned long *parray,
{
phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
if ( !is_loadable_phdr(phdr) )
- continue;
+ continue;
for ( done = 0; done < phdr->p_filesz; done += chunksz )
{
diff --git a/tools/xc/lib/xc_netbsd_build.c b/tools/xc/lib/xc_netbsd_build.c
index 7c67d57d71..8260c75ea4 100644
--- a/tools/xc/lib/xc_netbsd_build.c
+++ b/tools/xc/lib/xc_netbsd_build.c
@@ -63,7 +63,8 @@ static int setup_guestos(int xc_handle,
full_execution_context_t *ctxt,
const char *cmdline,
unsigned long shared_info_frame,
- unsigned int control_evtchn)
+ unsigned int control_evtchn,
+ int io_priv)
{
l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
@@ -175,7 +176,7 @@ static int setup_guestos(int xc_handle,
start_info->mod_len = symtab_len;
start_info->nr_pages = tot_pages;
start_info->shared_info = shared_info_frame << PAGE_SHIFT;
- start_info->flags = 0;
+ start_info->flags = io_priv ? SIF_PRIVILEGED : 0;
start_info->domain_controller_evtchn = control_evtchn;
strncpy(start_info->cmd_line, cmdline, MAX_CMDLINE);
start_info->cmd_line[MAX_CMDLINE-1] = '\0';
@@ -212,7 +213,8 @@ int xc_netbsd_build(int xc_handle,
u64 domid,
const char *image_name,
const char *cmdline,
- unsigned int control_evtchn)
+ unsigned int control_evtchn,
+ int io_priv)
{
dom0_op_t launch_op, op;
unsigned long load_addr;
@@ -269,7 +271,7 @@ int xc_netbsd_build(int xc_handle,
&virt_startinfo_addr,
&load_addr, &st_ctxt, cmdline,
op.u.getdomaininfo.shared_info_frame,
- control_evtchn) < 0 )
+ control_evtchn, io_priv) < 0 )
{
ERROR("Error constructing guest OS");
goto error_out;
diff --git a/tools/xc/py/Xc.c b/tools/xc/py/Xc.c
index 92f77f7051..322a20b411 100644
--- a/tools/xc/py/Xc.c
+++ b/tools/xc/py/Xc.c
@@ -228,18 +228,19 @@ static PyObject *pyxc_linux_build(PyObject *self,
u64 dom;
char *image, *ramdisk = NULL, *cmdline = "";
- int control_evtchn;
+ int control_evtchn, io_priv = 0;
static char *kwd_list[] = { "dom", "control_evtchn",
- "image", "ramdisk", "cmdline", NULL };
+ "image", "ramdisk", "cmdline", "io_priv",
+ NULL };
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lis|ss", kwd_list,
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lis|ssi", kwd_list,
&dom, &control_evtchn,
- &image, &ramdisk, &cmdline) )
+ &image, &ramdisk, &cmdline, &io_priv) )
return NULL;
if ( xc_linux_build(xc->xc_handle, dom, image,
- ramdisk, cmdline, control_evtchn) != 0 )
+ ramdisk, cmdline, control_evtchn, io_priv) != 0 )
return PyErr_SetFromErrno(xc_error);
Py_INCREF(zero);
@@ -254,18 +255,19 @@ static PyObject *pyxc_netbsd_build(PyObject *self,
u64 dom;
char *image, *ramdisk = NULL, *cmdline = "";
- int control_evtchn;
+ int control_evtchn, io_priv = 0;
static char *kwd_list[] = { "dom", "control_evtchn",
- "image", "ramdisk", "cmdline", NULL };
+ "image", "ramdisk", "cmdline", "io_priv",
+ NULL };
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lis|ss", kwd_list,
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lis|ssi", kwd_list,
&dom, &control_evtchn,
- &image, &ramdisk, &cmdline) )
+ &image, &ramdisk, &cmdline, &io_priv) )
return NULL;
if ( xc_netbsd_build(xc->xc_handle, dom, image,
- cmdline, control_evtchn) != 0 )
+ cmdline, control_evtchn, io_priv) != 0 )
return PyErr_SetFromErrno(xc_error);
Py_INCREF(zero);
@@ -1160,7 +1162,8 @@ static PyMethodDef pyxc_methods[] = {
" dom [long]: Identifier of domain to build into.\n"
" image [str]: Name of kernel image file. May be gzipped.\n"
" ramdisk [str, n/a]: Name of ramdisk file, if any.\n"
- " cmdline [str, n/a]: Kernel parameters, if any.\n\n"
+ " cmdline [str, n/a]: Kernel parameters, if any.\n"
+ " io_priv [boolean]: Does the domain have IO privileges?\n\n"
"Returns: [int] 0 on success; -1 on error.\n" },
{ "netbsd_build",
@@ -1169,7 +1172,8 @@ static PyMethodDef pyxc_methods[] = {
"Build a new NetBSD guest OS.\n"
" dom [long]: Identifier of domain to build into.\n"
" image [str]: Name of kernel image file. May be gzipped.\n"
- " cmdline [str, n/a]: Kernel parameters, if any.\n\n"
+ " cmdline [str, n/a]: Kernel parameters, if any.\n"
+ " io_priv [boolean]: Does the domain have IO privileges?\n\n"
"Returns: [int] 0 on success; -1 on error.\n" },
{ "bvtsched_global_set",
diff --git a/tools/xend/lib/blkif.py b/tools/xend/lib/blkif.py
new file mode 100644
index 0000000000..94e058f7ce
--- /dev/null
+++ b/tools/xend/lib/blkif.py
@@ -0,0 +1,143 @@
+
+#################################################################
+## xend/blkif.py -- Block-interface management functions for Xend
+## Copyright (c) 2004, K A Fraser (University of Cambridge)
+#################################################################
+
+import errno, re, os, select, signal, socket, struct, sys
+import xend.main, xend.console, xend.manager, xend.utils, Xc
+
+CMSG_BLKIF_BE = 1
+CMSG_BLKIF_FE = 2
+CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED = 0
+CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED = 32
+CMSG_BLKIF_FE_INTERFACE_CONNECT = 33
+CMSG_BLKIF_FE_INTERFACE_DISCONNECT = 34
+CMSG_BLKIF_BE_CREATE = 0
+CMSG_BLKIF_BE_DESTROY = 1
+CMSG_BLKIF_BE_CONNECT = 2
+CMSG_BLKIF_BE_DISCONNECT = 3
+CMSG_BLKIF_BE_VBD_CREATE = 4
+CMSG_BLKIF_BE_VBD_DESTROY = 5
+CMSG_BLKIF_BE_VBD_GROW = 6
+CMSG_BLKIF_BE_VBD_SHRINK = 7
+
+pendmsg = None
+pendaddr = None
+
+def backend_tx_req(msg):
+ port = xend.main.dom0_port
+ if port.space_to_write_request():
+ port.write_request(msg)
+ port.notify()
+ else:
+ xend.blkif.pendmsg = msg
+
+def backend_rx_req(port, msg):
+ port.write_response(msg)
+
+def backend_rx_rsp(port, msg):
+ subtype = (msg.get_header())['subtype']
+ print "Received blkif-be response, subtype %d" % subtype
+ if subtype == CMSG_BLKIF_BE_CREATE:
+ rsp = { 'success': True }
+ xend.main.send_management_response(rsp, xend.blkif.pendaddr)
+ elif subtype == CMSG_BLKIF_BE_CONNECT:
+ (dom,hnd,evtchn,frame,st) = struct.unpack("QIILI", msg.get_payload())
+ blkif = interface.list[xend.main.port_from_dom(dom).local_port]
+ msg = xend.utils.message(CMSG_BLKIF_FE, \
+ CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED, 0)
+ msg.append_payload(struct.pack("III",0,2,blkif.evtchn['port2']))
+ blkif.ctrlif_tx_req(xend.main.port_list[blkif.key], msg)
+ elif subtype == CMSG_BLKIF_BE_VBD_CREATE:
+ (dom,hnd,vdev,ro,st) = struct.unpack("QIHII", msg.get_payload())
+ blkif = interface.list[xend.main.port_from_dom(dom).local_port]
+ (pdev, start_sect, nr_sect, readonly) = blkif.devices[vdev]
+ msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_GROW, 0)
+ msg.append_payload(struct.pack("QIHHHQQI",dom,0,vdev,0, \
+ pdev,start_sect,nr_sect,0))
+ backend_tx_req(msg)
+ elif subtype == CMSG_BLKIF_BE_VBD_GROW:
+ rsp = { 'success': True }
+ xend.main.send_management_response(rsp, xend.blkif.pendaddr)
+
+def backend_do_work(port):
+ global pendmsg
+ if pendmsg and port.space_to_write_request():
+ port.write_request(pendmsg)
+ pendmsg = None
+ return True
+ return False
+
+
+class interface:
+
+ # Dictionary of all block-device interfaces.
+ list = {}
+
+
+ # NB. 'key' is an opaque value that has no meaning in this class.
+ def __init__(self, dom, key):
+ self.dom = dom
+ self.key = key
+ self.devices = {}
+ self.pendmsg = None
+ interface.list[key] = self
+ msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_CREATE, 0)
+ msg.append_payload(struct.pack("QII",dom,0,0))
+ xend.blkif.pendaddr = xend.main.mgmt_req_addr
+ backend_tx_req(msg)
+
+ # Attach a device to the specified interface
+ def attach_device(self, vdev, pdev, start_sect, nr_sect, readonly):
+ if self.devices.has_key(vdev):
+ return False
+ self.devices[vdev] = (pdev, start_sect, nr_sect, readonly)
+ msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_CREATE, 0)
+ msg.append_payload(struct.pack("QIHII",self.dom,0,vdev,readonly,0))
+ xend.blkif.pendaddr = xend.main.mgmt_req_addr
+ backend_tx_req(msg)
+ return True
+
+
+ # Completely destroy this interface.
+ def destroy(self):
+ del interface.list[self.key]
+ msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_DESTROY, 0)
+ msg.append_payload(struct.pack("QII",self.dom,0,0))
+ backend_tx_req(msg)
+
+
+ # The parameter @port is the control-interface event channel. This method
+ # returns True if messages were written to the control interface.
+ def ctrlif_transmit_work(self, port):
+ if self.pendmsg and port.space_to_write_request():
+ port.write_request(self.pendmsg)
+ self.pendmsg = None
+ return True
+ return False
+
+ def ctrlif_tx_req(self, port, msg):
+ if port.space_to_write_request():
+ port.write_request(msg)
+ port.notify()
+ else:
+ self.pendmsg = msg
+
+ def ctrlif_rx_req(self, port, msg):
+ port.write_response(msg)
+ subtype = (msg.get_header())['subtype']
+ if subtype == CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED:
+ msg = xend.utils.message(CMSG_BLKIF_FE, \
+ CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED, 0)
+ msg.append_payload(struct.pack("III",0,1,0))
+ self.ctrlif_tx_req(port, msg)
+ elif subtype == CMSG_BLKIF_FE_INTERFACE_CONNECT:
+ (hnd,frame) = struct.unpack("IL", msg.get_payload())
+ xc = Xc.new()
+ self.evtchn = xc.evtchn_bind_interdomain(dom1=0,dom2=self.dom)
+ msg = xend.utils.message(CMSG_BLKIF_BE, \
+ CMSG_BLKIF_BE_CONNECT, 0)
+ msg.append_payload(struct.pack("QIILI",self.dom,0, \
+ self.evtchn['port1'],frame,0))
+ backend_tx_req(msg)
diff --git a/tools/xend/lib/console.py b/tools/xend/lib/console.py
index aad6069979..57898817f5 100644
--- a/tools/xend/lib/console.py
+++ b/tools/xend/lib/console.py
@@ -5,7 +5,7 @@
#############################################################
import errno, re, os, select, signal, socket, struct, sys
-
+import xend.blkif, xend.main, xend.manager, xend.utils, Xc
##
## interface:
@@ -16,7 +16,7 @@ import errno, re, os, select, signal, socket, struct, sys
## CONNECTED: sending/receiving console data on TCP port 'self.port'
##
## A dictionary of all active interfaces, indexed by TCP socket descriptor,
-## is accessible as 'interface.interface_list'.
+## is accessible as 'interface.list_by_fd'.
##
## NB. When a class instance is to be destroyed you *must* call the 'close'
## method. Otherwise a stale reference will eb left in the interface list.
@@ -30,7 +30,11 @@ class interface:
# Dictionary of all active (non-closed) console interfaces.
- interface_list = {}
+ list_by_fd = {}
+
+
+ # Dictionary of all console interfaces, closed and open.
+ list = {}
# NB. 'key' is an opaque value that has no meaning in this class.
@@ -38,6 +42,9 @@ class interface:
self.status = interface.CLOSED
self.port = port
self.key = key
+ self.rbuf = xend.utils.buffer()
+ self.wbuf = xend.utils.buffer()
+ interface.list[key] = self
# Is this interface closed (inactive)?
@@ -58,14 +65,14 @@ class interface:
# Close the interface, if it is not closed already.
def close(self):
if not self.closed():
- del interface.interface_list[self.sock.fileno()]
+ del interface.list_by_fd[self.sock.fileno()]
self.sock.close()
del self.sock
self.status = interface.CLOSED
# Move the interface into the 'listening' state. Opens a new listening
- # socket and updates 'interface_list'.
+ # socket and updates 'list_by_fd'.
def listen(self):
# Close old socket (if any), and create a fresh one.
self.close()
@@ -80,7 +87,7 @@ class interface:
# Announce the new status of thsi interface.
self.status = interface.LISTENING
- interface.interface_list[self.sock.fileno()] = self
+ interface.list_by_fd[self.sock.fileno()] = self
except:
# In case of trouble ensure we get rid of dangling socket reference
@@ -105,7 +112,69 @@ class interface:
# Publish the new socket and the new interface state.
self.sock = sock
self.status = interface.CONNECTED
- interface.interface_list[self.sock.fileno()] = self
+ interface.list_by_fd[self.sock.fileno()] = self
return 1
+ # Completely sestroy a console interface.
+ def destroy(self):
+ self.close()
+ del interface.list[self.key]
+
+
+ # Do work triggered by resource availability on a console-interface socket.
+ def socket_work(self):
+ # If the interface is listening, check for pending connections.
+ if self.listening():
+ self.connect()
+
+ # All done if the interface is not connected.
+ if not self.connected():
+ return
+
+ # Send as much pending data as possible via the socket.
+ while not self.rbuf.empty():
+ try:
+ bytes = self.sock.send(self.rbuf.peek())
+ if bytes > 0:
+ self.rbuf.discard(bytes)
+ except socket.error, error:
+ pass
+
+ # Read as much data as is available. Don't worry about
+ # overflowing our buffer: it's more important to read the
+ # incoming data stream and detect errors or closure of the
+ # remote end in a timely manner.
+ try:
+ while 1:
+ data = self.sock.recv(2048)
+ # Return of zero means the remote end has disconnected.
+ # We therefore return the console interface to listening.
+ if not data:
+ self.listen()
+ break
+ self.wbuf.write(data)
+ except socket.error, error:
+ # Assume that most errors mean that the connection is dead.
+ # In such cases we return the interface to 'listening' state.
+ if error[0] != errno.EAGAIN:
+ print "Better return to listening"
+ self.listen()
+ print "New status: " + str(self.status)
+
+
+ # The parameter @port is the control-interface event channel. This method
+ # returns True if messages were written to the control interface.
+ def ctrlif_transmit_work(self, port):
+ work_done = False
+ while not self.wbuf.empty() and port.space_to_write_request():
+ msg = xend.utils.message(0, 0, 0)
+ msg.append_payload(self.wbuf.read(msg.MAX_PAYLOAD))
+ port.write_request(msg)
+ work_done = True
+ return work_done
+
+
+ def ctrlif_rx_req(self, port, msg):
+ self.rbuf.write(msg.get_payload())
+ port.write_response(msg)
diff --git a/tools/xend/lib/domain_controller.h b/tools/xend/lib/domain_controller.h
index 14f970dd04..68d4fac1d2 100644
--- a/tools/xend/lib/domain_controller.h
+++ b/tools/xend/lib/domain_controller.h
@@ -56,29 +56,113 @@ typedef struct {
#define CMSG_BLKIF_BE 1 /* Block-device backend */
#define CMSG_BLKIF_FE 2 /* Block-device frontend */
+
+/******************************************************************************
+ * CONSOLE DEFINITIONS
+ */
+
/*
* Subtypes for console messages.
*/
#define CMSG_CONSOLE_DATA 0
+
+/******************************************************************************
+ * BLOCK-INTERFACE FRONTEND DEFINITIONS
+ */
+
+/* Messages from domain controller to guest. */
+#define CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED 0
+
+/* Messages from guest to domain controller. */
+#define CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED 32
+#define CMSG_BLKIF_FE_INTERFACE_CONNECT 33
+#define CMSG_BLKIF_FE_INTERFACE_DISCONNECT 34
+
+/* These are used by both front-end and back-end drivers. */
+#define blkif_vdev_t u16
+#define blkif_pdev_t u16
+#define blkif_sector_t u64
+
+/*
+ * CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED:
+ * Notify a guest about a status change on one of its block interfaces.
+ * If the interface is DESTROYED or DOWN then the interface is disconnected:
+ * 1. The shared-memory frame is available for reuse.
+ * 2. Any unacknowledged messgaes pending on the interface were dropped.
+ */
+#define BLKIF_INTERFACE_STATUS_DESTROYED 0 /* Interface doesn't exist. */
+#define BLKIF_INTERFACE_STATUS_DISCONNECTED 1 /* Exists but is disconnected. */
+#define BLKIF_INTERFACE_STATUS_CONNECTED 2 /* Exists and is connected. */
+typedef struct {
+ unsigned int handle;
+ unsigned int status;
+ unsigned int evtchn; /* status == BLKIF_INTERFACE_STATUS_CONNECTED */
+} blkif_fe_interface_status_changed_t;
+
/*
- * Subtypes for block-device messages.
+ * CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED:
+ * Notify the domain controller that the front-end driver is DOWN or UP.
+ * When the driver goes DOWN then the controller will send no more
+ * status-change notifications. When the driver comes UP then the controller
+ * will send a notification for each interface that currently exists.
+ * If the driver goes DOWN while interfaces are still UP, the domain
+ * will automatically take the interfaces DOWN.
*/
+#define BLKIF_DRIVER_STATUS_DOWN 0
+#define BLKIF_DRIVER_STATUS_UP 1
+typedef struct {
+ unsigned int status; /* BLKIF_DRIVER_STATUS_??? */
+} blkif_fe_driver_status_changed_t;
+
+/*
+ * CMSG_BLKIF_FE_INTERFACE_CONNECT:
+ * If successful, the domain controller will acknowledge with a
+ * STATUS_CONNECTED message.
+ */
+typedef struct {
+ unsigned int handle;
+ unsigned long shmem_frame;
+} blkif_fe_interface_connect_t;
+
+/*
+ * CMSG_BLKIF_FE_INTERFACE_DISCONNECT:
+ * If successful, the domain controller will acknowledge with a
+ * STATUS_DISCONNECTED message.
+ */
+typedef struct {
+ /* IN */
+ unsigned int handle;
+ /* OUT */
+ /*
+ * Tells driver how many interfaces it should expect to immediately
+ * receive notifications about.
+ */
+ unsigned int nr_interfaces;
+} blkif_fe_interface_disconnect_t;
+
+
+/******************************************************************************
+ * BLOCK-INTERFACE BACKEND DEFINITIONS
+ */
+
+/* Messages from domain controller. */
#define CMSG_BLKIF_BE_CREATE 0 /* Create a new block-device interface. */
#define CMSG_BLKIF_BE_DESTROY 1 /* Destroy a block-device interface. */
-#define CMSG_BLKIF_BE_VBD_CREATE 2 /* Create a new VBD for an interface. */
-#define CMSG_BLKIF_BE_VBD_DESTROY 3 /* Delete a VBD from an interface. */
-#define CMSG_BLKIF_BE_VBD_GROW 4 /* Append an extent to a given VBD. */
-#define CMSG_BLKIF_BE_VBD_SHRINK 5 /* Remove last extent from a given VBD. */
+#define CMSG_BLKIF_BE_CONNECT 2 /* Connect i/f to remote driver. */
+#define CMSG_BLKIF_BE_DISCONNECT 3 /* Disconnect i/f from remote driver. */
+#define CMSG_BLKIF_BE_VBD_CREATE 4 /* Create a new VBD for an interface. */
+#define CMSG_BLKIF_BE_VBD_DESTROY 5 /* Delete a VBD from an interface. */
+#define CMSG_BLKIF_BE_VBD_GROW 6 /* Append an extent to a given VBD. */
+#define CMSG_BLKIF_BE_VBD_SHRINK 7 /* Remove last extent from a given VBD. */
+
+/* Messages to domain controller. */
+#define CMSG_BLKIF_BE_DRIVER_STATUS_CHANGED 32
/*
- * Message request/response defintions for block-device messages.
+ * Message request/response definitions for block-device messages.
*/
-#define blkif_vdev_t u16
-#define blkif_pdev_t u16
-#define blkif_sector_t u64
-
typedef struct {
blkif_pdev_t device;
blkif_sector_t sector_start;
@@ -86,41 +170,91 @@ typedef struct {
} blkif_extent_t;
/* Non-specific 'okay' return. */
-#define BLKIF_STATUS_OKAY 0
+#define BLKIF_BE_STATUS_OKAY 0
/* Non-specific 'error' return. */
-#define BLKIF_STATUS_ERROR 1
+#define BLKIF_BE_STATUS_ERROR 1
/* The following are specific error returns. */
-#define BLKIF_STATUS_INTERFACE_EXISTS 2
-#define BLKIF_STATUS_INTERFACE_NOT_FOUND 3
+#define BLKIF_BE_STATUS_INTERFACE_EXISTS 2
+#define BLKIF_BE_STATUS_INTERFACE_NOT_FOUND 3
+#define BLKIF_BE_STATUS_INTERFACE_CONNECTED 4
+#define BLKIF_BE_STATUS_VBD_EXISTS 5
+#define BLKIF_BE_STATUS_VBD_NOT_FOUND 6
+#define BLKIF_BE_STATUS_OUT_OF_MEMORY 7
+#define BLKIF_BE_STATUS_EXTENT_NOT_FOUND 8
+#define BLKIF_BE_STATUS_MAPPING_ERROR 9
/* This macro can be used to create an array of descriptive error strings. */
-#define BLKIF_STATUS_ERRORS { \
- "Okay", \
- "Non-specific error", \
- "Interface already exists", \
- "Interface not found" }
+#define BLKIF_BE_STATUS_ERRORS { \
+ "Okay", \
+ "Non-specific error", \
+ "Interface already exists", \
+ "Interface not found", \
+ "Interface is still connected", \
+ "VBD already exists", \
+ "VBD not found", \
+ "Out of memory", \
+ "Extent not found for VBD", \
+ "Could not map domain memory" }
-/* CMSG_BLKIF_CREATE */
+/*
+ * CMSG_BLKIF_BE_CREATE:
+ * When the driver sends a successful response then the interface is fully
+ * created. The controller will send a DOWN notification to the front-end
+ * driver.
+ */
typedef struct {
/* IN */
domid_t domid; /* Domain attached to new interface. */
unsigned int blkif_handle; /* Domain-specific interface handle. */
- unsigned int evtchn; /* Event channel for notifications. */
- unsigned long shmem_frame; /* Page cont. shared comms window. */
/* OUT */
unsigned int status;
-} blkif_create_t;
+} blkif_be_create_t;
-/* CMSG_BLKIF_DESTROY */
+/*
+ * CMSG_BLKIF_BE_DESTROY:
+ * When the driver sends a successful response then the interface is fully
+ * torn down. The controller will send a DESTROYED notification to the
+ * front-end driver.
+ */
typedef struct {
/* IN */
domid_t domid; /* Identify interface to be destroyed. */
unsigned int blkif_handle; /* ...ditto... */
/* OUT */
unsigned int status;
-} blkif_destroy_t;
+} blkif_be_destroy_t;
-/* CMSG_BLKIF_VBD_CREATE */
+/*
+ * CMSG_BLKIF_BE_CONNECT:
+ * When the driver sends a successful response then the interface is fully
+ * connected. The controller will send a CONNECTED notification to the
+ * front-end driver.
+ */
+typedef struct {
+ /* IN */
+ domid_t domid; /* Domain attached to new interface. */
+ unsigned int blkif_handle; /* Domain-specific interface handle. */
+ unsigned int evtchn; /* Event channel for notifications. */
+ unsigned long shmem_frame; /* Page cont. shared comms window. */
+ /* OUT */
+ unsigned int status;
+} blkif_be_connect_t;
+
+/*
+ * CMSG_BLKIF_BE_DISCONNECT:
+ * When the driver sends a successful response then the interface is fully
+ * disconnected. The controller will send a DOWN notification to the front-end
+ * driver.
+ */
+typedef struct {
+ /* IN */
+ domid_t domid; /* Domain attached to new interface. */
+ unsigned int blkif_handle; /* Domain-specific interface handle. */
+ /* OUT */
+ unsigned int status;
+} blkif_be_disconnect_t;
+
+/* CMSG_BLKIF_BE_VBD_CREATE */
typedef struct {
/* IN */
domid_t domid; /* Identify blkdev interface. */
@@ -129,9 +263,9 @@ typedef struct {
int readonly; /* Non-zero -> VBD isn't writeable. */
/* OUT */
unsigned int status;
-} blkif_vbd_create_t;
+} blkif_be_vbd_create_t;
-/* CMSG_BLKIF_VBD_DESTROY */
+/* CMSG_BLKIF_BE_VBD_DESTROY */
typedef struct {
/* IN */
domid_t domid; /* Identify blkdev interface. */
@@ -139,9 +273,9 @@ typedef struct {
blkif_vdev_t vdevice; /* Interface-specific id of the VBD. */
/* OUT */
unsigned int status;
-} blkif_vbd_destroy_t;
+} blkif_be_vbd_destroy_t;
-/* CMSG_BLKIF_VBD_GROW */
+/* CMSG_BLKIF_BE_VBD_GROW */
typedef struct {
/* IN */
domid_t domid; /* Identify blkdev interface. */
@@ -150,9 +284,9 @@ typedef struct {
blkif_extent_t extent; /* Physical extent to append to VBD. */
/* OUT */
unsigned int status;
-} blkif_vbd_grow_t;
+} blkif_be_vbd_grow_t;
-/* CMSG_BLKIF_VBD_SHRINK */
+/* CMSG_BLKIF_BE_VBD_SHRINK */
typedef struct {
/* IN */
domid_t domid; /* Identify blkdev interface. */
@@ -160,6 +294,23 @@ typedef struct {
blkif_vdev_t vdevice; /* Interface-specific id of the VBD. */
/* OUT */
unsigned int status;
-} blkif_vbd_shrink_t;
+} blkif_be_vbd_shrink_t;
+
+/*
+ * CMSG_BLKIF_BE_DRIVER_STATUS_CHANGED:
+ * Notify the domain controller that the back-end driver is DOWN or UP.
+ * If the driver goes DOWN while interfaces are still UP, the domain
+ * will automatically send DOWN notifications.
+ */
+typedef struct {
+ /* IN */
+ unsigned int status; /* BLKIF_DRIVER_STATUS_??? */
+ /* OUT */
+ /*
+ * Tells driver how many interfaces it should expect to immediately
+ * receive notifications about.
+ */
+ unsigned int nr_interfaces;
+} blkif_be_driver_status_changed_t;
#endif /* __DOMAIN_CONTROLLER_H__ */
diff --git a/tools/xend/lib/main.py b/tools/xend/lib/main.py
index 4b243b3307..7b5adbab83 100755
--- a/tools/xend/lib/main.py
+++ b/tools/xend/lib/main.py
@@ -5,7 +5,7 @@
###########################################################
import errno, re, os, pwd, select, signal, socket, struct, sys, time
-import xend.console, xend.manager, xend.utils, Xc
+import xend.blkif, xend.console, xend.manager, xend.utils, Xc
# The following parameters could be placed in a configuration file.
@@ -16,13 +16,35 @@ CONTROL_DIR = '/var/run/xend'
UNIX_SOCK = 'management_sock' # relative to CONTROL_DIR
+CMSG_CONSOLE = 0
+CMSG_BLKIF_BE = 1
+CMSG_BLKIF_FE = 2
+
+
+def port_from_dom(dom):
+ global port_list
+ for idx, port in port_list.items():
+ if port.remote_dom == dom:
+ return port
+ return None
+
+
+def send_management_response(response, addr):
+ try:
+ response = str(response)
+ print "Mgmt_rsp[%s]: %s" % (addr, response)
+ management_interface.sendto(response, addr)
+ except socket.error, error:
+ pass
+
+
def daemon_loop():
# Could we do this more nicely? The xend.manager functions need access
# to this global state to do their work.
- global control_list, notifier
+ global port_list, notifier, management_interface, mgmt_req_addr, dom0_port
- # List of all control interfaces, indexed by local event-channel port.
- control_list = {}
+ # Lists of all interfaces, indexed by local event-channel port.
+ port_list = {}
xc = Xc.new()
@@ -44,6 +66,13 @@ def daemon_loop():
# notifications.
notifier = xend.utils.notifier()
+ # The DOM0 control interface is not set up via the management interface.
+ # Note that console messages don't come our way (actually, only driver
+ # back-ends should use the DOM0 control interface).
+ dom0_port = xend.utils.port(0)
+ notifier.bind(dom0_port.local_port)
+ port_list[dom0_port.local_port] = dom0_port
+
##
## MAIN LOOP
##
@@ -58,10 +87,10 @@ def daemon_loop():
waitset = select.poll()
waitset.register(management_interface, select.POLLIN)
waitset.register(notifier, select.POLLIN)
- for idx, (port, rbuf, wbuf, con_if) in control_list.items():
+ for idx, con_if in xend.console.interface.list_by_fd.items():
if not con_if.closed():
pflags = select.POLLIN
- if not rbuf.empty() and con_if.connected():
+ if not con_if.rbuf.empty() and con_if.connected():
pflags = select.POLLIN | select.POLLOUT
waitset.register(con_if.sock.fileno(), pflags)
@@ -72,16 +101,16 @@ def daemon_loop():
# These should consist of executable Python statements that call
# well-known management functions (e.g., new_control_interface(dom=9)).
try:
- data, addr = management_interface.recvfrom(2048)
+ data, mgmt_req_addr = management_interface.recvfrom(2048)
except socket.error, error:
if error[0] != errno.EAGAIN:
raise
else:
- if addr:
+ if mgmt_req_addr:
# Evaluate the request in an exception-trapping sandbox.
try:
- print "Mgmt_req[%s]: %s" % (addr, data)
- response = str(eval('xend.manager.'+data))
+ print "Mgmt_req[%s]: %s" % (mgmt_req_addr, data)
+ response = eval('xend.manager.'+data)
except:
# Catch all exceptions and turn into an error response:
@@ -97,69 +126,20 @@ def daemon_loop():
response = str(response)
# Try to send a response to the requester.
- try:
- print "Mgmt_rsp[%s]: %s" % (addr, response)
- management_interface.sendto(response, addr)
- except socket.error, error:
- pass
+ if response:
+ send_management_response(response, mgmt_req_addr)
# Do work for every console interface that hit in the poll set.
for (fd, events) in fdset:
- if not xend.console.interface.interface_list.has_key(fd):
- continue
- con_if = xend.console.interface.interface_list[fd]
-
- # If the interface is listening, check for pending connections.
- if con_if.listening():
- con_if.connect()
-
- # All done if the interface is not connected.
- if not con_if.connected():
- continue
- (port, rbuf, wbuf, con_if) = control_list[con_if.key]
-
- # Send as much pending data as possible via the socket.
- while not rbuf.empty():
- try:
- bytes = con_if.sock.send(rbuf.peek())
- if bytes > 0:
- rbuf.discard(bytes)
- except socket.error, error:
- pass
-
- # Read as much data as is available. Don't worry about
- # overflowing our buffer: it's more important to read the
- # incoming data stream and detect errors or closure of the
- # remote end in a timely manner.
- try:
- while 1:
- data = con_if.sock.recv(2048)
- # Return of zero means the remote end has disconnected.
- # We therefore return the console interface to listening.
- if not data:
- con_if.listen()
- break
- wbuf.write(data)
- except socket.error, error:
- # Assume that most errors mean that the connection is dead.
- # In such cases we return the interface to 'listening' state.
- if error[0] != errno.EAGAIN:
- print "Better return to listening"
- con_if.listen()
- print "New status: " + str(con_if.status)
-
- # We may now have pending data to send via the relevant
- # inter-domain control interface. If so then we send all we can
- # and notify the remote end.
- work_done = False
- while not wbuf.empty() and port.space_to_write_request():
- msg = xend.utils.message(0, 0, 0)
- msg.append_payload(wbuf.read(msg.MAX_PAYLOAD))
- port.write_request(msg)
- work_done = True
- if work_done:
- port.notify()
-
+ if xend.console.interface.list_by_fd.has_key(fd):
+ con_if = xend.console.interface.list_by_fd[fd]
+ con_if.socket_work()
+ # We may now have pending data to send via the control
+ # interface. If so then send all we can and notify the remote.
+ port = port_list[con_if.key]
+ if con_if.ctrlif_transmit_work(port):
+ port.notify()
+
# Process control-interface notifications from other guest OSes.
while 1:
# Grab a notification, if there is one.
@@ -168,42 +148,69 @@ def daemon_loop():
break
(idx, type) = notification
- if not control_list.has_key(idx):
+ if not port_list.has_key(idx):
continue
- (port, rbuf, wbuf, con_if) = control_list[idx]
+ port = port_list[idx]
work_done = False
+ con_if = False
+ if xend.console.interface.list.has_key(idx):
+ con_if = xend.console.interface.list[idx]
+
+ blk_if = False
+ if xend.blkif.interface.list.has_key(idx):
+ blk_if = xend.blkif.interface.list[idx]
+
# If we pick up a disconnect notification then we do any necessary
# cleanup.
if type == notifier.EXCEPTION:
ret = xc.evtchn_status(idx)
if ret['status'] == 'unbound':
notifier.unbind(idx)
- con_if.close()
- del control_list[idx], port, rbuf, wbuf, con_if
+ del port_list[idx], port
+ if con_if:
+ con_if.destroy()
+ del con_if
+ if blk_if:
+ blk_if.destroy()
+ del blk_if
continue
- # Read incoming requests. Currently assume that request
- # message always containb console data.
+ # Process incoming requests.
while port.request_to_read():
msg = port.read_request()
- rbuf.write(msg.get_payload())
- port.write_response(msg)
work_done = True
-
- # Incoming responses are currently thrown on the floor.
+ type = (msg.get_header())['type']
+ if type == CMSG_CONSOLE and con_if:
+ con_if.ctrlif_rx_req(port, msg)
+ elif type == CMSG_BLKIF_FE and blk_if:
+ blk_if.ctrlif_rx_req(port, msg)
+ elif type == CMSG_BLKIF_BE and port == dom0_port:
+ xend.blkif.backend_rx_req(port, msg)
+ else:
+ port.write_response(msg)
+
+ # Process incoming responses.
while port.response_to_read():
msg = port.read_response()
work_done = True
+ type = (msg.get_header())['type']
+ if type == CMSG_BLKIF_BE and port == dom0_port:
+ xend.blkif.backend_rx_rsp(port, msg)
- # Send as much pending console data as there is room for.
- while not wbuf.empty() and port.space_to_write_request():
- msg = xend.utils.message(0, 0, 0)
- msg.append_payload(wbuf.read(msg.MAX_PAYLOAD))
- port.write_request(msg)
+ # Send console data.
+ if con_if and con_if.ctrlif_transmit_work(port):
work_done = True
+ # Send blkif messages.
+ if blk_if and blk_if.ctrlif_transmit_work(port):
+ work_done = True
+
+ # Back-end block-device work.
+ if port == dom0_port and xend.blkif.backend_do_work(port):
+ work_done = True
+
# Finally, notify the remote end of any work that we did.
if work_done:
port.notify()
diff --git a/tools/xend/lib/manager.py b/tools/xend/lib/manager.py
index 42d66d3a95..ea7398cd4c 100644
--- a/tools/xend/lib/manager.py
+++ b/tools/xend/lib/manager.py
@@ -4,13 +4,13 @@
## Copyright (c) 2004, K A Fraser (University of Cambridge)
#############################################################
-import xend.console, xend.main, xend.utils
+import xend.blkif, xend.console, xend.main, xend.utils
##
## new_control_interface:
-## Create a new control interface with the specified domain 'dom'.
-## The console port may also be specified; otehrwise a suitable port is
+## Create a new control interface with the specified domain @dom.
+## The console port may also be specified; otherwise a suitable port is
## automatically allocated.
##
def new_control_interface(dom, console_port=-1):
@@ -26,9 +26,8 @@ def new_control_interface(dom, console_port=-1):
con_if = xend.console.interface(console_port, port.local_port)
con_if.listen()
- # Add control state to the master list.
- xend.main.control_list[port.local_port] = \
- (port, xend.utils.buffer(), xend.utils.buffer(), con_if)
+ # Update the master port list.
+ xend.main.port_list[port.local_port] = port
# Construct the successful response to be returned to the requester.
response = { 'success': True }
@@ -36,3 +35,81 @@ def new_control_interface(dom, console_port=-1):
response['remote_port'] = port.remote_port
response['console_port'] = console_port
return response
+
+
+##
+## new_block_interface:
+## Create a new block interface for the specified domain @dom.
+##
+def new_block_interface(dom, handle=-1):
+ # By default we create an interface with handle zero.
+ if handle < 0:
+ handle = 0
+
+ # We only support one interface per domain, which must have handle zero.
+ if handle != 0:
+ response = { 'success': False }
+ response['error_type'] = 'Bad handle %d (only handle 0 ' + \
+ 'is supported)' % handle
+ return response
+
+ # Find local event-channel port associated with the specified domain.
+ port = xend.main.port_from_dom(dom)
+ if not port:
+ response = { 'success': False }
+ response['error_type'] = 'Unknown domain %d' % dom
+ return response
+
+ # The interface must not already exist.
+ if xend.blkif.interface.list.has_key(port.local_port):
+ response = { 'success': False }
+ response['error_type'] = 'Interface (dom=%d,handle=%d) already ' + \
+ 'exists' % (dom, handle)
+ return response
+
+ # Create the new interface. Initially no virtual devices are attached.
+ xend.blkif.interface(dom, port.local_port)
+
+ # Response is deferred until back-end driver sends acknowledgement.
+ return None
+
+
+##
+## new_block_device:
+## Attach a new virtual block device to the specified block interface
+## (@dom, @handle). The new device is identified by @vdev, and maps to
+## the real block extent (@pdev, @start_sect, @nr_sect). If @readonly then
+## write requests to @vdev will be rejected.
+##
+def new_block_device(dom, handle, vdev, pdev, start_sect, nr_sect, readonly):
+ # We only support one interface per domain, which must have handle zero.
+ if handle != 0:
+ response = { 'success': False }
+ response['error_type'] = 'Bad handle %d (only handle 0 ' + \
+ 'is supported)' % handle
+ return response
+
+ # Find local event-channel port associated with the specified domain.
+ port = xend.main.port_from_dom(dom)
+ if not port:
+ response = { 'success': False }
+ response['error_type'] = 'Unknown domain %d' % dom
+ return response
+
+ # The interface must exist.
+ if not xend.blkif.interface.list.has_key(port.local_port):
+ response = { 'success': False }
+ response['error_type'] = 'Interface (dom=%d,handle=%d) does not ' + \
+ 'exists' % (dom, handle)
+ return response
+
+ # The virtual device must not yet exist.
+ blkif = xend.blkif.interface.list[port.local_port]
+ if not blkif.attach_device(vdev, pdev, start_sect, nr_sect, readonly):
+ response = { 'success': False }
+ response['error_type'] = 'Vdevice (dom=%d,handle=%d,vdevice=%d) ' + \
+ 'already exists' % (dom, handle, vdev)
+ return response
+
+ # Response is deferred until back-end driver sends acknowledgement.
+ return None
diff --git a/tools/xend/lib/utils.c b/tools/xend/lib/utils.c
index 4883ec1a46..c28d682ec9 100644
--- a/tools/xend/lib/utils.c
+++ b/tools/xend/lib/utils.c
@@ -22,6 +22,8 @@
#include <signal.h>
#include <xc.h>
+#include <asm-xen/proc_cmd.h>
+
#include <hypervisor-if.h>
#include "domain_controller.h"
@@ -684,8 +686,23 @@ static PyObject *xu_port_new(PyObject *self, PyObject *args)
goto fail2;
}
- if ( xc_evtchn_bind_interdomain(xup->xc_handle,
- DOMID_SELF, dom, &port1, &port2) != 0 )
+ if ( dom == 0ULL )
+ {
+ /*
+ * The control-interface event channel for DOM0 is already set up.
+ * We use an ioctl to discover the port at our end of the channel.
+ */
+ port1 = ioctl(xup->xc_handle, IOCTL_PRIVCMD_INITDOMAIN_EVTCHN, NULL);
+ port2 = -1; /* We don't need the remote end of the DOM0 link. */
+ if ( port1 < 0 )
+ {
+ PyErr_SetString(port_error, "Could not open channel to DOM0");
+ goto fail3;
+ }
+ }
+ else if ( xc_evtchn_bind_interdomain(xup->xc_handle,
+ DOMID_SELF, dom,
+ &port1, &port2) != 0 )
{
PyErr_SetString(port_error, "Could not open channel to domain");
goto fail3;
@@ -744,7 +761,8 @@ static void xu_port_dealloc(PyObject *self)
{
xu_port_object *xup = (xu_port_object *)self;
unmap_control_interface(xup->mem_fd, xup->interface);
- (void)xc_evtchn_close(xup->xc_handle, DOMID_SELF, xup->local_port);
+ if ( xup->remote_dom != 0ULL )
+ (void)xc_evtchn_close(xup->xc_handle, DOMID_SELF, xup->local_port);
(void)xc_interface_close(xup->xc_handle);
(void)close(xup->mem_fd);
PyObject_Del(self);
diff --git a/tools/xend/setup.py b/tools/xend/setup.py
index 1f39cb4572..5567d7093c 100644
--- a/tools/xend/setup.py
+++ b/tools/xend/setup.py
@@ -4,7 +4,8 @@ from distutils.core import setup, Extension
utils = Extension("utils",
extra_compile_args = ["-fno-strict-aliasing"],
include_dirs = ["../xc/lib",
- "../../xen/include/hypervisor-ifs"],
+ "../../xen/include/hypervisor-ifs",
+ "../../xenolinux-sparse/include"],
library_dirs = ["../xc/lib"],
libraries = ["xc"],
sources = ["lib/utils.c"])
diff --git a/xen/arch/i386/pdb-stub.c b/xen/arch/i386/pdb-stub.c
index 049f330cf6..5b42e9a746 100644
--- a/xen/arch/i386/pdb-stub.c
+++ b/xen/arch/i386/pdb-stub.c
@@ -51,6 +51,8 @@ static unsigned char pdb_xmit_checksum;
unsigned long pdb_linux_pid_ptbr (unsigned long cr3, int pid);
void pdb_linux_get_values(char *buffer, int length, unsigned long address,
int pid, unsigned long cr3);
+void pdb_linux_set_values(char *buffer, int length, unsigned long address,
+ int pid, unsigned long cr3);
struct pdb_context
{
@@ -571,6 +573,12 @@ pdb_process_command (char *ptr, struct pt_regs *regs, unsigned long cr3,
{
hex2mem (ptr, (char *)addr, length);
}
+ else if (pdb_ctx.process != -1)
+ {
+ pdb_linux_set_values(ptr, length, addr,
+ pdb_ctx.process,
+ pdb_ctx.ptbr);
+ }
else
{
pdb_set_values (ptr, length,
diff --git a/xen/common/debug-linux.c b/xen/common/debug-linux.c
index 4fbcdf2918..ff767b51cd 100644
--- a/xen/common/debug-linux.c
+++ b/xen/common/debug-linux.c
@@ -171,6 +171,44 @@ void pdb_linux_get_values(char *buffer, int length, unsigned long address,
}
}
+
+void pdb_linux_set_value(int pid, unsigned long cr3, unsigned long addr,
+ u_char *value)
+{
+ unsigned long pgd;
+ unsigned long l2tab, page;
+
+ /* get the process' pgd */
+ pgd = pdb_linux_pid_ptbr(cr3, pid);
+
+ /* get the l2 table entry */
+ pdb_get_values((u_char *) &l2tab, sizeof(l2tab),
+ cr3, pgd + (addr >> PGDIR_SHIFT) * 4);
+ l2tab = (unsigned long)__va(machine_to_phys(cr3, l2tab) & PAGE_MASK);
+
+ /* get the page table entry */
+ pdb_get_values((u_char *) &page, sizeof(page),
+ cr3, l2tab + ((addr & L1_PAGE_BITS) >> PAGE_SHIFT) * 4);
+ page = (unsigned long)__va(machine_to_phys(cr3, page) & PAGE_MASK);
+
+ /* set the byte */
+ pdb_set_values(value, sizeof(u_char), cr3, page + (addr & ~PAGE_MASK));
+}
+
+void pdb_linux_set_values(char *buffer, int length, unsigned long address,
+ int pid, unsigned long cr3)
+{
+ int loop;
+
+ /* it's difficult to imagine a more inefficient algorithm */
+ for (loop = 0; loop < length; loop++)
+ {
+ pdb_linux_set_value(pid, cr3, address + loop, &buffer[loop * 2]);
+ }
+}
+
+/**********************************************************************/
+
/*
* return 1 if is the virtual address is in the operating system's
* address space, else 0
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index c0bea86320..c6011cebb4 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -109,15 +109,18 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
goto out;
}
+ /* 'Allocate' port1 before searching for a free port2. */
+ p1->event_channel[port1].state = ECS_INTERDOMAIN;
+
if ( (port2 = get_free_port(p2)) < 0 )
{
+ p1->event_channel[port1].state = ECS_FREE;
rc = port2;
goto out;
}
p1->event_channel[port1].u.remote.dom = p2;
p1->event_channel[port1].u.remote.port = (u16)port2;
- p1->event_channel[port1].state = ECS_INTERDOMAIN;
p2->event_channel[port2].u.remote.dom = p1;
p2->event_channel[port2].u.remote.port = (u16)port1;
diff --git a/xen/common/kernel.c b/xen/common/kernel.c
index d8d5d03251..7f814391cf 100644
--- a/xen/common/kernel.c
+++ b/xen/common/kernel.c
@@ -73,6 +73,9 @@ unsigned char opt_pdb[10] = "none";
unsigned int opt_tbuf_size = 1;
/* opt_sched: scheduler - default to Borrowed Virtual Time */
char opt_sched[10] = "bvt";
+/* opt_physdev_dom0_hide: list of PCI slots to hide from dom0
+ * Should have the format '(%02x:%02x.%1x)(%02x:%02x.%1x)...etc' */
+char opt_physdev_dom0_hide[20] = "";
static struct {
unsigned char *name;
@@ -94,6 +97,7 @@ static struct {
{ "pdb", OPT_STR, &opt_pdb },
{ "tbuf_size", OPT_UINT, &opt_tbuf_size },
{ "sched", OPT_STR, &opt_sched },
+ { "physdev_dom0_hide",OPT_STR, &opt_physdev_dom0_hide },
{ NULL, 0, NULL }
};
diff --git a/xen/common/physdev.c b/xen/common/physdev.c
index 0d14a31527..0cc2654e8a 100644
--- a/xen/common/physdev.c
+++ b/xen/common/physdev.c
@@ -115,16 +115,17 @@ static void add_dev_to_task(struct task_struct *p,
/*
* physdev_pci_access_modify:
- * Allow/disallow access to a specific PCI device. Also allow read access to
- * PCI devices from the device to the root of the device tree. If the given
- * device is a bridge, then the domain should get access to all the devices
- * attached to that bridge (XXX this is unimplemented!).
+ * Allow/disallow access to a specific PCI device. Guests should not be
+ * allowed to see bridge devices as it needlessly complicates things (one
+ * possible exception to this is the AGP bridge). If the given device is a
+ * bridge, then the domain should get access to all the leaf devices below
+ * that bridge (XXX this is unimplemented!).
*/
int physdev_pci_access_modify(
domid_t dom, int bus, int dev, int func, int enable)
{
struct task_struct *p;
- struct pci_dev *pdev, *rdev, *tdev;
+ struct pci_dev *pdev;
int rc = 0;
if ( !IS_PRIV(current) )
@@ -145,7 +146,7 @@ int physdev_pci_access_modify(
return -ESRCH;
/* Make the domain privileged. */
- set_bit(PF_PRIVILEGED, &p->flags);
+ set_bit(PF_PRIVILEGED, &p->flags);
/* Grant write access to the specified device. */
if ( (pdev = pci_find_slot(bus, PCI_DEVFN(dev, func))) == NULL )
@@ -155,27 +156,10 @@ int physdev_pci_access_modify(
goto out;
}
add_dev_to_task(p, pdev, ACC_WRITE);
+
INFO(" add RW %02x:%02x:%02x\n", pdev->bus->number,
PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
- /* Grant read access to the root device. */
- if ( (rdev = pci_find_slot(0, PCI_DEVFN(0, 0))) == NULL )
- {
- INFO(" bizarre -- no PCI root dev\n");
- rc = -ENODEV;
- goto out;
- }
- add_dev_to_task(p, rdev, ACC_READ);
- INFO(" add R0 %02x:%02x:%02x\n", 0, 0, 0);
-
- /* Grant read access to all devices on the path to the root. */
- for ( tdev = pdev->bus->self; tdev != NULL; tdev = tdev->bus->self )
- {
- add_dev_to_task(p, tdev, ACC_READ);
- INFO(" add RO %02x:%02x:%02x\n", tdev->bus->number,
- PCI_SLOT(tdev->devfn), PCI_FUNC(tdev->devfn));
- }
-
/* Is the device a bridge or cardbus? */
if ( pdev->hdr_type != PCI_HEADER_TYPE_NORMAL )
INFO("XXX can't give access to bridge devices yet\n");
@@ -256,8 +240,16 @@ static int do_base_address_access(phys_dev_t *pdev, int acc, int idx,
if ( len != sizeof(u32) )
{
- INFO("Guest attempting sub-dword %s to BASE_ADDRESS %d\n",
+ /* This isn't illegal, but there doesn't seem to be a very good reason
+ * to do it for normal devices (bridges are another matter). Since it
+ * would complicate the code below, we don't support this for now. */
+
+ /* We could set *val to some value but the guest may well be in trouble
+ * anyway if this write fails. Hopefully the printk will give us a
+ * clue what went wrong. */
+ printk("Guest attempting sub-dword %s to BASE_ADDRESS %d\n",
(acc == ACC_READ) ? "read" : "write", idx);
+
return -EPERM;
}
@@ -420,7 +412,13 @@ static long pci_cfgreg_read(int bus, int dev, int func, int reg,
phys_dev_t *pdev;
if ( (ret = check_dev_acc(current, bus, dev, func, &pdev)) != 0 )
- return ret;
+ {
+ /* PCI spec states that reads from non-existent devices should return
+ * all 1s. In this case the domain has no read access, which should
+ * also look like the device is non-existent. */
+ *val = 0xFFFFFFFF;
+ return ret; /* KAF: error return seems to matter on my test machine. */
+ }
/* Fake out read requests for some registers. */
switch ( reg )
@@ -608,6 +606,21 @@ long do_physdev_op(physdev_op_t *uop)
return ret;
}
+/* Test if boot params specify this device should NOT be visible to DOM0
+ * (e.g. so that another domain can control it instead) */
+int pcidev_dom0_hidden(struct pci_dev *dev)
+{
+ extern char opt_physdev_dom0_hide[];
+ char cmp[10] = "(.......)";
+
+ strncpy(&cmp[1], dev->slot_name, 7);
+
+ if ( strstr(opt_physdev_dom0_hide, dev->slot_name) == NULL )
+ return 0;
+
+ return 1;
+}
+
/* Domain 0 has read access to all devices. */
void physdev_init_dom0(struct task_struct *p)
@@ -619,14 +632,22 @@ void physdev_init_dom0(struct task_struct *p)
pci_for_each_dev(dev)
{
- /* Skip bridges and other peculiarities for now. */
- if ( dev->hdr_type != PCI_HEADER_TYPE_NORMAL )
- continue;
- pdev = kmalloc(sizeof(phys_dev_t), GFP_KERNEL);
- pdev->dev = dev;
- pdev->flags = ACC_WRITE;
- pdev->state = 0;
- pdev->owner = p;
- list_add(&pdev->node, &p->pcidev_list);
- }
+ if ( !pcidev_dom0_hidden(dev) )
+ {
+ /* Skip bridges and other peculiarities for now. */
+ if ( dev->hdr_type != PCI_HEADER_TYPE_NORMAL )
+ continue;
+ pdev = kmalloc(sizeof(phys_dev_t), GFP_KERNEL);
+ pdev->dev = dev;
+ pdev->flags = ACC_WRITE;
+ pdev->state = 0;
+ pdev->owner = p;
+ list_add(&pdev->node, &p->pcidev_list);
+ }
+ else
+ {
+ printk("Hiding PCI device %s from DOM0\n", dev->slot_name);
+ }
+ }
}
+
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h
index 4895172937..e6004b4a8e 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h
@@ -34,23 +34,37 @@ typedef struct blkif_st {
unsigned int evtchn;
int irq;
/* Comms information. */
- blk_ring_t *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
+ blkif_ring_t *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
BLK_RING_IDX blk_req_cons; /* Request consumer. */
BLK_RING_IDX blk_resp_prod; /* Private version of response producer. */
/* VBDs attached to this interface. */
rb_root_t vbd_rb; /* Mapping from 16-bit vdevices to VBDs. */
spinlock_t vbd_lock; /* Protects VBD mapping. */
/* Private fields. */
+ enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
+ /*
+ * DISCONNECT response is deferred until pending requests are ack'ed.
+ * We therefore need to store the id from the original request.
+ */
+ u8 disconnect_rspid;
struct blkif_st *hash_next;
struct list_head blkdev_list;
spinlock_t blk_ring_lock;
+ atomic_t refcnt;
} blkif_t;
-void blkif_create(blkif_create_t *create);
-void blkif_destroy(blkif_destroy_t *destroy);
+void blkif_create(blkif_be_create_t *create);
+void blkif_destroy(blkif_be_destroy_t *destroy);
+void blkif_connect(blkif_be_connect_t *connect);
+int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id);
+void __blkif_disconnect_complete(blkif_t *blkif);
blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
-void blkif_get(blkif_t *blkif);
-void blkif_put(blkif_t *blkif);
+#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
+#define blkif_put(_b) \
+ do { \
+ if ( atomic_dec_and_test(&(_b)->refcnt) ) \
+ __blkif_disconnect_complete(_b); \
+ } while (0)
/* An entry in a list of xen_extents. */
typedef struct _blkif_extent_le {
@@ -60,25 +74,25 @@ typedef struct _blkif_extent_le {
typedef struct _vbd {
blkif_vdev_t vdevice; /* what the domain refers to this vbd as */
- unsigned char mode; /* VBD_MODE_{R,W} */
+ unsigned char readonly; /* Non-zero -> read-only */
unsigned char type; /* XD_TYPE_xxx */
blkif_extent_le_t *extents; /* list of xen_extents making up this vbd */
rb_node_t rb; /* for linking into R-B tree lookup struct */
} vbd_t;
-long vbd_create(blkif_vbd_create_t *create_params);
-long vbd_grow(blkif_vbd_grow_t *grow_params);
-long vbd_shrink(blkif_vbd_shrink_t *shrink_params);
-long vbd_destroy(blkif_vbd_destroy_t *delete_params);
-
-void destroy_all_vbds(struct task_struct *p);
+void vbd_create(blkif_be_vbd_create_t *create);
+void vbd_grow(blkif_be_vbd_grow_t *grow);
+void vbd_shrink(blkif_be_vbd_shrink_t *shrink);
+void vbd_destroy(blkif_be_vbd_destroy_t *delete);
+int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds);
+void destroy_all_vbds(blkif_t *blkif);
typedef struct {
blkif_t *blkif;
unsigned long id;
atomic_t pendcnt;
unsigned short operation;
- unsigned short status;
+ int status;
} pending_req_t;
/* Describes a [partial] disk extent (part of a block io request) */
@@ -91,7 +105,10 @@ typedef struct {
int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation);
-int blkif_be_controller_init(void);
+void blkif_interface_init(void);
+void blkif_ctrlif_init(void);
+
+void blkif_deschedule(blkif_t *blkif);
void blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c
index c7ef10c3ba..0746ecfab0 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c
@@ -10,37 +10,50 @@
static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
{
+ DPRINTK("Received blkif backend message, subtype=%d\n", msg->subtype);
+
switch ( msg->subtype )
{
case CMSG_BLKIF_BE_CREATE:
- if ( msg->length != sizeof(blkif_create_t) )
+ if ( msg->length != sizeof(blkif_be_create_t) )
goto parse_error;
- blkif_create((blkif_create_t *)&msg->msg[0]);
+ blkif_create((blkif_be_create_t *)&msg->msg[0]);
break;
case CMSG_BLKIF_BE_DESTROY:
- if ( msg->length != sizeof(blkif_destroy_t) )
+ if ( msg->length != sizeof(blkif_be_destroy_t) )
goto parse_error;
- blkif_destroy((blkif_destroy_t *)&msg->msg[0]);
+ blkif_destroy((blkif_be_destroy_t *)&msg->msg[0]);
+ break;
+ case CMSG_BLKIF_BE_CONNECT:
+ if ( msg->length != sizeof(blkif_be_connect_t) )
+ goto parse_error;
+ blkif_connect((blkif_be_connect_t *)&msg->msg[0]);
+ break;
+ case CMSG_BLKIF_BE_DISCONNECT:
+ if ( msg->length != sizeof(blkif_be_disconnect_t) )
+ goto parse_error;
+ if ( !blkif_disconnect((blkif_be_disconnect_t *)&msg->msg[0],msg->id) )
+ return; /* Sending the response is deferred until later. */
break;
case CMSG_BLKIF_BE_VBD_CREATE:
- if ( msg->length != sizeof(blkif_vbd_create_t) )
+ if ( msg->length != sizeof(blkif_be_vbd_create_t) )
goto parse_error;
- vbd_create((blkif_vbd_create_t *)&msg->msg[0]);
+ vbd_create((blkif_be_vbd_create_t *)&msg->msg[0]);
break;
case CMSG_BLKIF_BE_VBD_DESTROY:
- if ( msg->length != sizeof(blkif_vbd_destroy_t) )
+ if ( msg->length != sizeof(blkif_be_vbd_destroy_t) )
goto parse_error;
- vbd_destroy((blkif_vbd_destroy_t *)&msg->msg[0]);
+ vbd_destroy((blkif_be_vbd_destroy_t *)&msg->msg[0]);
break;
case CMSG_BLKIF_BE_VBD_GROW:
- if ( msg->length != sizeof(blkif_vbd_grow_t) )
+ if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
goto parse_error;
- vbd_grow((blkif_vbd_grow_t *)&msg->msg[0]);
+ vbd_grow((blkif_be_vbd_grow_t *)&msg->msg[0]);
break;
case CMSG_BLKIF_BE_VBD_SHRINK:
- if ( msg->length != sizeof(blkif_vbd_shrink_t) )
+ if ( msg->length != sizeof(blkif_be_vbd_shrink_t) )
goto parse_error;
- vbd_shrink((blkif_vbd_shrink_t *)&msg->msg[0]);
+ vbd_shrink((blkif_be_vbd_shrink_t *)&msg->msg[0]);
break;
default:
goto parse_error;
@@ -50,12 +63,24 @@ static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
return;
parse_error:
+ DPRINTK("Parse error while reading message subtype %d, len %d\n",
+ msg->subtype, msg->length);
msg->length = 0;
ctrl_if_send_response(msg);
}
-int blkif_ctrlif_init(void)
+void blkif_ctrlif_init(void)
{
+ ctrl_msg_t cmsg;
+ blkif_be_driver_status_changed_t st;
+
(void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx);
- return 0;
+
+ /* Send a driver-UP notification to the domain controller. */
+ cmsg.type = CMSG_BLKIF_BE;
+ cmsg.subtype = CMSG_BLKIF_BE_DRIVER_STATUS_CHANGED;
+ cmsg.length = sizeof(blkif_be_driver_status_changed_t);
+ st.status = BLKIF_DRIVER_STATUS_UP;
+ memcpy(cmsg.msg, &st, sizeof(st));
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
}
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c
index 579795deb9..9acbac35ab 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c
@@ -12,85 +12,223 @@
#define BLKIF_HASH(_d,_h) \
(((int)(_d)^(int)((_d)>>32)^(int)(_h))&(BLKIF_HASHSZ-1))
-static blkif_t *blkif_hash[BLKIF_HASHSZ];
+static kmem_cache_t *blkif_cachep;
+static blkif_t *blkif_hash[BLKIF_HASHSZ];
blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
{
blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
while ( (blkif != NULL) &&
- (blkif->domid != domid) &&
- (blkif->handle != handle) )
+ ((blkif->domid != domid) || (blkif->handle != handle)) )
blkif = blkif->hash_next;
return blkif;
}
-void blkif_create(blkif_create_t *create)
+void __blkif_disconnect_complete(blkif_t *blkif)
+{
+ ctrl_msg_t cmsg;
+ blkif_be_disconnect_t disc;
+
+ /*
+ * These can't be done in __blkif_disconnect() because at that point there
+ * may be outstanding requests at the disc whose asynchronous responses
+ * must still be notified to the remote driver.
+ */
+ unbind_evtchn_from_irq(blkif->evtchn);
+ vfree(blkif->blk_ring_base);
+
+ /* Construct the deferred response message. */
+ cmsg.type = CMSG_BLKIF_BE;
+ cmsg.subtype = CMSG_BLKIF_BE_DISCONNECT;
+ cmsg.id = blkif->disconnect_rspid;
+ cmsg.length = sizeof(blkif_be_disconnect_t);
+ disc.domid = blkif->domid;
+ disc.blkif_handle = blkif->handle;
+ disc.status = BLKIF_BE_STATUS_OKAY;
+ memcpy(cmsg.msg, &disc, sizeof(disc));
+
+ /*
+ * Make sure message is constructed /before/ status change, because
+ * after the status change the 'blkif' structure could be deallocated at
+ * any time. Also make sure we send the response /after/ status change,
+ * as otherwise a subsequent CONNECT request could spuriously fail if
+ * another CPU doesn't see the status change yet.
+ */
+ mb();
+ if ( blkif->status != DISCONNECTING )
+ BUG();
+ blkif->status = DISCONNECTED;
+ mb();
+
+ /* Send the successful response. */
+ ctrl_if_send_response(&cmsg);
+}
+
+void blkif_create(blkif_be_create_t *create)
{
domid_t domid = create->domid;
unsigned int handle = create->blkif_handle;
- unsigned int evtchn = create->evtchn;
- unsigned long shmem_frame = create->shmem_frame;
blkif_t **pblkif, *blkif;
- pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
- while ( *pblkif == NULL )
+ if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_ATOMIC)) == NULL )
{
- if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
- goto found_match;
- pblkif = &(*pblkif)->hash_next;
+ DPRINTK("Could not create blkif: out of memory\n");
+ create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+ return;
}
- blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
memset(blkif, 0, sizeof(*blkif));
- blkif->domid = domid;
- blkif->handle = handle;
- blkif->evtchn = evtchn;
- blkif->irq = bind_evtchn_to_irq(evtchn);
- blkif->shmem_frame = shmem_frame;
- blkif->shmem_vbase = ioremap(shmem_frame<<PAGE_SHIFT, PAGE_SIZE);
+ blkif->domid = domid;
+ blkif->handle = handle;
+ blkif->status = DISCONNECTED;
spin_lock_init(&blkif->vbd_lock);
spin_lock_init(&blkif->blk_ring_lock);
+ atomic_set(&blkif->refcnt, 0);
- request_irq(irq, blkif_be_int, 0, "blkif-backend", blkif);
+ pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
+ while ( *pblkif != NULL )
+ {
+ if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
+ {
+ DPRINTK("Could not create blkif: already exists\n");
+ create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
+ kmem_cache_free(blkif_cachep, blkif);
+ return;
+ }
+ pblkif = &(*pblkif)->hash_next;
+ }
blkif->hash_next = *pblkif;
*pblkif = blkif;
- create->status = BLKIF_STATUS_OKAY;
- return;
-
- found_match:
- create->status = BLKIF_STATUS_INTERFACE_EXISTS;
- return;
-
- evtchn_in_use:
- unbind_evtchn_from_irq(evtchn); /* drop refcnt */
- create->status = BLKIF_STATUS_ERROR;
- return;
+ DPRINTK("Successfully created blkif\n");
+ create->status = BLKIF_BE_STATUS_OKAY;
}
-void blkif_destroy(blkif_destroy_t *destroy)
+void blkif_destroy(blkif_be_destroy_t *destroy)
{
domid_t domid = destroy->domid;
unsigned int handle = destroy->blkif_handle;
blkif_t **pblkif, *blkif;
pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
- while ( (blkif = *pblkif) == NULL )
+ while ( (blkif = *pblkif) != NULL )
{
if ( (blkif->domid == domid) && (blkif->handle == handle) )
- goto found_match;
+ {
+ if ( blkif->status != DISCONNECTED )
+ goto still_connected;
+ goto destroy;
+ }
pblkif = &blkif->hash_next;
}
- destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
+ destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
return;
- found_match:
- free_irq(blkif->irq, NULL);
- unbind_evtchn_from_irq(blkif->evtchn);
+ still_connected:
+ destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
+ return;
+
+ destroy:
*pblkif = blkif->hash_next;
+ destroy_all_vbds(blkif);
kmem_cache_free(blkif_cachep, blkif);
- destroy->status = BLKIF_STATUS_OKAY;
+ destroy->status = BLKIF_BE_STATUS_OKAY;
}
+void blkif_connect(blkif_be_connect_t *connect)
+{
+ domid_t domid = connect->domid;
+ unsigned int handle = connect->blkif_handle;
+ unsigned int evtchn = connect->evtchn;
+ unsigned long shmem_frame = connect->shmem_frame;
+ struct vm_struct *vma;
+ pgprot_t prot;
+ int error;
+ blkif_t *blkif;
+
+ blkif = blkif_find_by_handle(domid, handle);
+ if ( unlikely(blkif == NULL) )
+ {
+ DPRINTK("blkif_connect attempted for non-existent blkif (%llu,%u)\n",
+ connect->domid, connect->blkif_handle);
+ connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+ }
+
+ if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
+ {
+ connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+ return;
+ }
+
+ prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
+ error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
+ shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
+ prot, domid);
+ if ( error != 0 )
+ {
+ if ( error == -ENOMEM )
+ connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+ else if ( error == -EFAULT )
+ connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
+ else
+ connect->status = BLKIF_BE_STATUS_ERROR;
+ vfree(vma->addr);
+ return;
+ }
+
+ if ( blkif->status != DISCONNECTED )
+ {
+ connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
+ vfree(vma->addr);
+ return;
+ }
+
+ blkif->evtchn = evtchn;
+ blkif->irq = bind_evtchn_to_irq(evtchn);
+ blkif->shmem_frame = shmem_frame;
+ blkif->blk_ring_base = (blkif_ring_t *)vma->addr;
+ blkif->status = CONNECTED;
+ blkif_get(blkif);
+
+ request_irq(blkif->irq, blkif_be_int, 0, "blkif-backend", blkif);
+
+ connect->status = BLKIF_BE_STATUS_OKAY;
+}
+
+int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id)
+{
+ domid_t domid = disconnect->domid;
+ unsigned int handle = disconnect->blkif_handle;
+ blkif_t *blkif;
+
+ blkif = blkif_find_by_handle(domid, handle);
+ if ( unlikely(blkif == NULL) )
+ {
+ DPRINTK("blkif_disconnect attempted for non-existent blkif"
+ " (%llu,%u)\n", disconnect->domid, disconnect->blkif_handle);
+ disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return 1; /* Caller will send response error message. */
+ }
+
+ if ( blkif->status == CONNECTED )
+ {
+ blkif->status = DISCONNECTING;
+ blkif->disconnect_rspid = rsp_id;
+ wmb(); /* Let other CPUs see the status change. */
+ free_irq(blkif->irq, NULL);
+ blkif_deschedule(blkif);
+ blkif_put(blkif);
+ }
+
+ return 0; /* Caller should not send response message. */
+}
+
+void __init blkif_interface_init(void)
+{
+ blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t),
+ 0, 0, NULL, NULL);
+ memset(blkif_hash, 0, sizeof(blkif_hash));
+}
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c
index 1e6190c3e6..2582287360 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c
@@ -24,6 +24,18 @@
#define MAX_PENDING_REQS 64
#define BATCH_PER_DOMAIN 16
+static struct vm_struct *mmap_vma;
+#define MMAP_PAGES_PER_SEGMENT \
+ ((BLKIF_MAX_SEGMENTS_PER_REQUEST >> (PAGE_SHIFT-9)) + 1)
+#define MMAP_PAGES_PER_REQUEST \
+ (2 * BLKIF_MAX_SEGMENTS_PER_REQUEST * MMAP_PAGES_PER_SEGMENT)
+#define MMAP_PAGES \
+ (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
+#define MMAP_VADDR(_req,_seg) \
+ ((unsigned long)mmap_vma->addr + \
+ ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
+ ((_seg) * MMAP_PAGES_PER_SEGMENT * PAGE_SIZE))
+
/*
* Each outstanding request that we've passed to the lower device layers has a
* 'pending_req' allocated to it. Each buffer_head that completes decrements
@@ -46,22 +58,11 @@ static PEND_RING_IDX pending_prod, pending_cons;
static kmem_cache_t *buffer_head_cachep;
-static struct buffer_head *completed_bhs[NR_CPUS] __cacheline_aligned;
-
-static int lock_buffer(blkif_t *blkif,
- unsigned long buffer,
- unsigned short size,
- int writeable_buffer);
-static void unlock_buffer(unsigned long buffer,
- unsigned short size,
- int writeable_buffer);
-
-static void io_schedule(unsigned long unused);
static int do_block_io_op(blkif_t *blkif, int max_to_do);
-static void dispatch_rw_block_io(blkif_t *blkif,
- blk_ring_req_entry_t *req);
+static void dispatch_probe(blkif_t *blkif, blkif_request_t *req);
+static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req);
static void make_response(blkif_t *blkif, unsigned long id,
- unsigned short op, unsigned long st);
+ unsigned short op, int st);
/******************************************************************
@@ -95,7 +96,7 @@ static void add_to_blkdev_list_tail(blkif_t *blkif)
unsigned long flags;
if ( __on_blkdev_list(blkif) ) return;
spin_lock_irqsave(&io_schedule_list_lock, flags);
- if ( !__on_blkdev_list(blkif) )
+ if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
{
list_add_tail(&blkif->blkdev_list, &io_schedule_list);
blkif_get(blkif);
@@ -108,8 +109,6 @@ static void add_to_blkdev_list_tail(blkif_t *blkif)
* SCHEDULER FUNCTIONS
*/
-static DECLARE_TASKLET(io_schedule_tasklet, io_schedule, 0);
-
static void io_schedule(unsigned long unused)
{
blkif_t *blkif;
@@ -132,6 +131,8 @@ static void io_schedule(unsigned long unused)
run_task_queue(&tq_disk);
}
+static DECLARE_TASKLET(io_schedule_tasklet, io_schedule, 0);
+
static void maybe_trigger_io_schedule(void)
{
/*
@@ -155,28 +156,26 @@ static void maybe_trigger_io_schedule(void)
static void end_block_io_op(struct buffer_head *bh, int uptodate)
{
pending_req_t *pending_req = bh->b_private;
+ unsigned long flags;
/* An error fails the entire request. */
if ( !uptodate )
{
DPRINTK("Buffer not up-to-date at end of operation\n");
- pending_req->status = 2;
+ pending_req->status = BLKIF_RSP_ERROR;
}
- unlock_buffer(virt_to_phys(bh->b_data),
- bh->b_size,
- (pending_req->operation==READ));
-
if ( atomic_dec_and_test(&pending_req->pendcnt) )
{
+ int pending_idx = pending_req - pending_reqs;
+ vmfree_area_pages(MMAP_VADDR(pending_idx, 0),
+ MMAP_PAGES_PER_REQUEST * PAGE_SIZE);
make_response(pending_req->blkif, pending_req->id,
pending_req->operation, pending_req->status);
blkif_put(pending_req->blkif);
- spin_lock(&pend_prod_lock);
- pending_ring[MASK_PEND_IDX(pending_prod)] =
- pending_req - pending_reqs;
- pending_prod++;
- spin_unlock(&pend_prod_lock);
+ spin_lock_irqsave(&pend_prod_lock, flags);
+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+ spin_unlock_irqrestore(&pend_prod_lock, flags);
maybe_trigger_io_schedule();
}
}
@@ -200,45 +199,10 @@ void blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
* DOWNWARD CALLS -- These interface with the block-device layer proper.
*/
-static int lock_buffer(blkif_t *blkif,
- unsigned long buffer,
- unsigned short size,
- int writeable_buffer)
-{
- unsigned long pfn;
-
- for ( pfn = buffer >> PAGE_SHIFT;
- pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
- pfn++ )
- {
- }
-
- return 1;
-
- fail:
- while ( pfn-- > (buffer >> PAGE_SHIFT) )
- {
- }
- return 0;
-}
-
-static void unlock_buffer(unsigned long buffer,
- unsigned short size,
- int writeable_buffer)
-{
- unsigned long pfn;
-
- for ( pfn = buffer >> PAGE_SHIFT;
- pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
- pfn++ )
- {
- }
-}
-
static int do_block_io_op(blkif_t *blkif, int max_to_do)
{
- blk_ring_t *blk_ring = blkif->blk_ring_base;
- blk_ring_req_entry_t *req;
+ blkif_ring_t *blk_ring = blkif->blk_ring_base;
+ blkif_request_t *req;
BLK_RING_IDX i;
int more_to_do = 0;
@@ -262,11 +226,15 @@ static int do_block_io_op(blkif_t *blkif, int max_to_do)
dispatch_rw_block_io(blkif, req);
break;
+ case BLKIF_OP_PROBE:
+ dispatch_probe(blkif, req);
+ break;
+
default:
DPRINTK("error: unknown block io operation [%d]\n",
blk_ring->ring[i].req.operation);
make_response(blkif, blk_ring->ring[i].req.id,
- blk_ring->ring[i].req.operation, 1);
+ blk_ring->ring[i].req.operation, BLKIF_RSP_ERROR);
break;
}
}
@@ -275,24 +243,64 @@ static int do_block_io_op(blkif_t *blkif, int max_to_do)
return more_to_do;
}
-static void dispatch_rw_block_io(blkif_t *blkif,
- blk_ring_req_entry_t *req)
+static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
+{
+ int i, rc, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+ pgprot_t prot;
+
+ /* Check that number of segments is sane. */
+ if ( unlikely(req->nr_segments == 0) ||
+ unlikely(req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) )
+ {
+ DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments);
+ goto bad_descriptor;
+ }
+
+ prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW);
+ for ( i = 0; i < req->nr_segments; i++ )
+ {
+ if ( (req->buffer_and_sects[i] & ~PAGE_MASK) != (PAGE_SIZE / 512) )
+ goto bad_descriptor;
+ rc = direct_remap_area_pages(&init_mm,
+ MMAP_VADDR(pending_idx, i),
+ req->buffer_and_sects[i] & PAGE_MASK,
+ PAGE_SIZE, prot, blkif->domid);
+ if ( rc != 0 )
+ goto bad_descriptor;
+ }
+
+ rc = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0),
+ (req->nr_segments * PAGE_SIZE) / sizeof(vdisk_t));
+
+ vmfree_area_pages(MMAP_VADDR(pending_idx, 0),
+ MMAP_PAGES_PER_REQUEST * PAGE_SIZE);
+ make_response(blkif, req->id, req->operation, rc);
+ return;
+
+ bad_descriptor:
+ vmfree_area_pages(MMAP_VADDR(pending_idx, 0),
+ MMAP_PAGES_PER_REQUEST * PAGE_SIZE);
+ make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+}
+
+static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
{
extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
struct buffer_head *bh;
- int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ;
+ int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
unsigned short nr_sects;
unsigned long buffer;
- int i, tot_sects;
+ int i, tot_sects, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
pending_req_t *pending_req;
+ pgprot_t prot;
/* We map virtual scatter/gather segments to physical segments. */
int new_segs, nr_psegs = 0;
- phys_seg_t phys_seg[MAX_BLK_SEGS * 2];
+ phys_seg_t phys_seg[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
/* Check that number of segments is sane. */
if ( unlikely(req->nr_segments == 0) ||
- unlikely(req->nr_segments > MAX_BLK_SEGS) )
+ unlikely(req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) )
{
DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments);
goto bad_descriptor;
@@ -310,8 +318,11 @@ static void dispatch_rw_block_io(blkif_t *blkif,
nr_sects = req->buffer_and_sects[i] & 0x1FF;
if ( unlikely(nr_sects == 0) )
+ continue;
+
+ if ( unlikely(nr_sects > BLKIF_MAX_SECTORS_PER_SEGMENT) )
{
- DPRINTK("zero-sized data request\n");
+ DPRINTK("Too many sectors in segment\n");
goto bad_descriptor;
}
@@ -333,29 +344,41 @@ static void dispatch_rw_block_io(blkif_t *blkif,
}
nr_psegs += new_segs;
- ASSERT(nr_psegs <= MAX_BLK_SEGS*2);
+ ASSERT(nr_psegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST*2);
}
+ /* Nonsensical zero-sized request? */
+ if ( unlikely(nr_psegs == 0) )
+ goto bad_descriptor;
+
+ if ( operation == READ )
+ prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW);
+ else
+ prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED);
+
for ( i = 0; i < nr_psegs; i++ )
{
- if ( unlikely(!lock_buffer(blkif, phys_seg[i].buffer,
- phys_seg[i].nr_sects << 9,
- operation==READ)) )
+ unsigned long sz = ((phys_seg[i].buffer & ~PAGE_MASK) +
+ (phys_seg[i].nr_sects << 9) +
+ (PAGE_SIZE - 1)) & PAGE_MASK;
+ int rc = direct_remap_area_pages(&init_mm,
+ MMAP_VADDR(pending_idx, i),
+ phys_seg[i].buffer & PAGE_MASK,
+ sz, prot, blkif->domid);
+ if ( rc != 0 )
{
DPRINTK("invalid buffer\n");
- while ( i-- > 0 )
- unlock_buffer(phys_seg[i].buffer,
- phys_seg[i].nr_sects << 9,
- operation==READ);
+ vmfree_area_pages(MMAP_VADDR(pending_idx, 0),
+ MMAP_PAGES_PER_REQUEST * PAGE_SIZE);
goto bad_descriptor;
}
}
- pending_req = &pending_reqs[pending_ring[MASK_PEND_IDX(pending_cons++)]];
+ pending_req = &pending_reqs[pending_idx];
pending_req->blkif = blkif;
pending_req->id = req->id;
pending_req->operation = operation;
- pending_req->status = 0;
+ pending_req->status = BLKIF_RSP_OKAY;
atomic_set(&pending_req->pendcnt, nr_psegs);
blkif_get(blkif);
@@ -363,38 +386,37 @@ static void dispatch_rw_block_io(blkif_t *blkif,
/* Now we pass each segment down to the real blkdev layer. */
for ( i = 0; i < nr_psegs; i++ )
{
- bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
+ bh = kmem_cache_alloc(buffer_head_cachep, GFP_ATOMIC);
if ( unlikely(bh == NULL) )
panic("bh is null\n");
memset(bh, 0, sizeof (struct buffer_head));
-
+
+ init_waitqueue_head(&bh->b_wait);
bh->b_size = phys_seg[i].nr_sects << 9;
bh->b_dev = phys_seg[i].dev;
+ bh->b_rdev = phys_seg[i].dev;
bh->b_rsector = (unsigned long)phys_seg[i].sector_number;
-
- /* SMH: we store a 'pseudo-virtual' bogus address in b_data since
- later code will undo this transformation (i.e. +-PAGE_OFFSET). */
- bh->b_data = phys_to_virt(phys_seg[i].buffer);
-
- /* SMH: bh_phys() uses the below field as a 'cheap' virt_to_phys */
- bh->b_page = &mem_map[phys_seg[i].buffer>>PAGE_SHIFT];
+ bh->b_data = (char *)MMAP_VADDR(pending_idx, i) +
+ (phys_seg[i].buffer & ~PAGE_MASK);
bh->b_end_io = end_block_io_op;
bh->b_private = pending_req;
- bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock);
+ bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock) |
+ (1 << BH_Req) | (1 << BH_Launder);
if ( operation == WRITE )
bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate);
atomic_set(&bh->b_count, 1);
/* Dispatch a single request. We'll flush it to disc later. */
- submit_bh(operation, bh);
+ generic_make_request(operation, bh);
}
+ pending_cons++;
return;
bad_descriptor:
- make_response(blkif, req->id, req->operation, 1);
+ make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
}
@@ -405,12 +427,13 @@ static void dispatch_rw_block_io(blkif_t *blkif,
static void make_response(blkif_t *blkif, unsigned long id,
- unsigned short op, unsigned long st)
+ unsigned short op, int st)
{
- blk_ring_resp_entry_t *resp;
+ blkif_response_t *resp;
+ unsigned long flags;
/* Place on the response ring for the relevant domain. */
- spin_lock(&blkif->blk_ring_lock);
+ spin_lock_irqsave(&blkif->blk_ring_lock, flags);
resp = &blkif->blk_ring_base->
ring[MASK_BLK_IDX(blkif->blk_resp_prod)].resp;
resp->id = id;
@@ -418,64 +441,28 @@ static void make_response(blkif_t *blkif, unsigned long id,
resp->status = st;
wmb();
blkif->blk_ring_base->resp_prod = ++blkif->blk_resp_prod;
- spin_unlock(&blkif->blk_ring_lock);
+ spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
/* Kick the relevant domain. */
notify_via_evtchn(blkif->evtchn);
}
-static void blkif_debug_int(int irq, void *unused, struct pt_regs *regs)
+void blkif_deschedule(blkif_t *blkif)
{
-#if 0
- unsigned long flags;
- struct task_struct *p;
- blk_ring_t *blk_ring;
- int i;
-
- printk("Dumping block queue stats: nr_pending = %d"
- " (prod=0x%08x,cons=0x%08x)\n",
- NR_PENDING_REQS, pending_prod, pending_cons);
-
- read_lock_irqsave(&tasklist_lock, flags);
- for_each_domain ( p )
- {
- printk("Domain: %llu\n", blkif->domain);
- blk_ring = blkif->blk_ring_base;
- printk(" req_prod:0x%08x, req_cons:0x%08x resp_prod:0x%08x/"
- "0x%08x on_list=%d\n",
- blk_ring->req_prod, blkif->blk_req_cons,
- blk_ring->resp_prod, blkif->blk_resp_prod,
- __on_blkdev_list(p));
- }
- read_unlock_irqrestore(&tasklist_lock, flags);
-
- for ( i = 0; i < MAX_PENDING_REQS; i++ )
- {
- printk("Pend%d: dom=%p, id=%08lx, cnt=%d, op=%d, status=%d\n",
- i, pending_reqs[i].domain, pending_reqs[i].id,
- atomic_read(&pending_reqs[i].pendcnt),
- pending_reqs[i].operation, pending_reqs[i].status);
- }
-#endif
+ remove_from_blkdev_list(blkif);
}
-void unlink_blkdev_info(blkif_t *blkif)
+static int __init init_module(void)
{
- unsigned long flags;
+ int i;
- spin_lock_irqsave(&io_schedule_list_lock, flags);
- if ( __on_blkdev_list(blkif) )
+ blkif_interface_init();
+
+ if ( (mmap_vma = get_vm_area(MMAP_PAGES * PAGE_SIZE, VM_IOREMAP)) == NULL )
{
- list_del(&blkif->blkdev_list);
- blkif->blkdev_list.next = (void *)0xdeadbeef;
- blkif_put(blkif);
+ printk(KERN_WARNING "Could not allocate VMA for blkif backend.\n");
+ return -ENOMEM;
}
- spin_unlock_irqrestore(&io_schedule_list_lock, flags);
-}
-
-static int __init init_module(void)
-{
- int i;
pending_cons = 0;
pending_prod = MAX_PENDING_REQS;
@@ -483,20 +470,15 @@ static int __init init_module(void)
for ( i = 0; i < MAX_PENDING_REQS; i++ )
pending_ring[i] = i;
- for ( i = 0; i < NR_CPUS; i++ )
- completed_bhs[i] = NULL;
-
spin_lock_init(&io_schedule_list_lock);
INIT_LIST_HEAD(&io_schedule_list);
- if ( request_irq(bind_virq_to_irq(VIRQ_DEBUG), blkif_debug_int,
- SA_SHIRQ, "blkif-backend-dbg", &blkif_debug_int) != 0 )
- printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n");
-
buffer_head_cachep = kmem_cache_create(
"buffer_head_cache", sizeof(struct buffer_head),
0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+ blkif_ctrlif_init();
+
return 0;
}
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c
index bd6c40125c..19b0b3015d 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c
@@ -8,7 +8,7 @@
#include "common.h"
-void vbd_create(blkif_vbd_create_t *create)
+void vbd_create(blkif_be_vbd_create_t *create)
{
vbd_t *vbd;
rb_node_t **rb_p, *rb_parent = NULL;
@@ -18,9 +18,9 @@ void vbd_create(blkif_vbd_create_t *create)
blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
if ( unlikely(blkif == NULL) )
{
- DPRINTK("vbd_create attempted for non-existent blkif (%llu,&u)\n",
+ DPRINTK("vbd_create attempted for non-existent blkif (%llu,%u)\n",
create->domid, create->blkif_handle);
- create->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
+ create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
return;
}
@@ -42,49 +42,50 @@ void vbd_create(blkif_vbd_create_t *create)
else
{
DPRINTK("vbd_create attempted for already existing vbd\n");
- create->status = BLKIF_STATUS_VBD_EXISTS;
+ create->status = BLKIF_BE_STATUS_VBD_EXISTS;
goto out;
}
}
- if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) )
+ if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_ATOMIC)) == NULL) )
{
DPRINTK("vbd_create: out of memory\n");
- create->status = BLKIF_STATUS_OUT_OF_MEMORY;
+ create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
goto out;
}
- vbd->vdevice = vdevice;
- vbd->mode = create->mode;
- vbd->type = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
- vbd->extents = NULL;
+ vbd->vdevice = vdevice;
+ vbd->readonly = create->readonly;
+ vbd->type = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
+ vbd->extents = NULL;
rb_link_node(&vbd->rb, rb_parent, rb_p);
rb_insert_color(&vbd->rb, &blkif->vbd_rb);
- create->status = BLKIF_STATUS_OKAY;
+ DPRINTK("Successful creation of vdev=%04x (dom=%llu)\n",
+ vdevice, create->domid);
+ create->status = BLKIF_BE_STATUS_OKAY;
out:
spin_unlock(&blkif->vbd_lock);
- blkif_put(blkif);
}
/* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */
-void vbd_grow(blkif_vbd_grow_t *grow)
+void vbd_grow(blkif_be_vbd_grow_t *grow)
{
- blkif_t *blkif;
- xen_extent_le_t **px, *x;
- vbd_t *vbd = NULL;
- rb_node_t *rb;
- blkif_vdev_t vdevice = grow->vdevice;
+ blkif_t *blkif;
+ blkif_extent_le_t **px, *x;
+ vbd_t *vbd = NULL;
+ rb_node_t *rb;
+ blkif_vdev_t vdevice = grow->vdevice;
blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
if ( unlikely(blkif == NULL) )
{
- DPRINTK("vbd_grow attempted for non-existent blkif (%llu,&u)\n",
+ DPRINTK("vbd_grow attempted for non-existent blkif (%llu,%u)\n",
grow->domid, grow->blkif_handle);
- grow->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
+ grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
return;
}
@@ -105,49 +106,51 @@ void vbd_grow(blkif_vbd_grow_t *grow)
if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
{
DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n");
- grow->status = BLKIF_STATUS_VBD_NOT_FOUND;
+ grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
goto out;
}
- if ( unlikely((x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL)) == NULL) )
+ if ( unlikely((x = kmalloc(sizeof(blkif_extent_le_t),
+ GFP_ATOMIC)) == NULL) )
{
DPRINTK("vbd_grow: out of memory\n");
- grow->status = BLKIF_STATUS_OUT_OF_MEMORY;
+ grow->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
goto out;
}
x->extent.device = grow->extent.device;
x->extent.sector_start = grow->extent.sector_start;
x->extent.sector_length = grow->extent.sector_length;
- x->next = (xen_extent_le_t *)NULL;
+ x->next = (blkif_extent_le_t *)NULL;
for ( px = &vbd->extents; *px != NULL; px = &(*px)->next )
continue;
*px = x;
- grow->status = BLKIF_STATUS_OKAY;
+ DPRINTK("Successful grow of vdev=%04x (dom=%llu)\n",
+ vdevice, grow->domid);
+ grow->status = BLKIF_BE_STATUS_OKAY;
out:
spin_unlock(&blkif->vbd_lock);
- blkif_put(blkif);
}
-void vbd_shrink(blkif_vbd_shrink_t *shrink)
+void vbd_shrink(blkif_be_vbd_shrink_t *shrink)
{
- blkif_t *blkif;
- xen_extent_le_t **px, *x;
- vbd_t *vbd = NULL;
- rb_node_t *rb;
- blkif_vdev_t vdevice = shrink->vdevice;
+ blkif_t *blkif;
+ blkif_extent_le_t **px, *x;
+ vbd_t *vbd = NULL;
+ rb_node_t *rb;
+ blkif_vdev_t vdevice = shrink->vdevice;
blkif = blkif_find_by_handle(shrink->domid, shrink->blkif_handle);
if ( unlikely(blkif == NULL) )
{
- DPRINTK("vbd_shrink attempted for non-existent blkif (%llu,&u)\n",
+ DPRINTK("vbd_shrink attempted for non-existent blkif (%llu,%u)\n",
shrink->domid, shrink->blkif_handle);
- shrink->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
+ shrink->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
return;
}
@@ -167,13 +170,13 @@ void vbd_shrink(blkif_vbd_shrink_t *shrink)
if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
{
- shrink->status = BLKIF_STATUS_VBD_NOT_FOUND;
+ shrink->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
goto out;
}
if ( unlikely(vbd->extents == NULL) )
{
- shrink->status = BLKIF_STATUS_EXTENT_NOT_FOUND;
+ shrink->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
goto out;
}
@@ -185,28 +188,27 @@ void vbd_shrink(blkif_vbd_shrink_t *shrink)
*px = x->next;
kfree(x);
- shrink->status = BLKIF_STATUS_OKAY;
+ shrink->status = BLKIF_BE_STATUS_OKAY;
out:
spin_unlock(&blkif->vbd_lock);
- blkif_put(blkif);
}
-void vbd_destroy(blkif_vbd_destroy_t *destroy)
+void vbd_destroy(blkif_be_vbd_destroy_t *destroy)
{
- blkif_t *blkif;
- vbd_t *vbd;
- rb_node_t *rb;
- xen_extent_le_t *x, *t;
- blkif_vdev_t vdevice = destroy->vdevice;
+ blkif_t *blkif;
+ vbd_t *vbd;
+ rb_node_t *rb;
+ blkif_extent_le_t *x, *t;
+ blkif_vdev_t vdevice = destroy->vdevice;
blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
if ( unlikely(blkif == NULL) )
{
- DPRINTK("vbd_destroy attempted for non-existent blkif (%llu,&u)\n",
+ DPRINTK("vbd_destroy attempted for non-existent blkif (%llu,%u)\n",
destroy->domid, destroy->blkif_handle);
- destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
+ destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
return;
}
@@ -224,7 +226,7 @@ void vbd_destroy(blkif_vbd_destroy_t *destroy)
goto found;
}
- destroy->status = BLKIF_STATUS_VBD_NOT_FOUND;
+ destroy->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
goto out;
found:
@@ -241,7 +243,6 @@ void vbd_destroy(blkif_vbd_destroy_t *destroy)
out:
spin_unlock(&blkif->vbd_lock);
- blkif_put(blkif);
}
@@ -249,7 +250,7 @@ void destroy_all_vbds(blkif_t *blkif)
{
vbd_t *vbd;
rb_node_t *rb;
- xen_extent_le_t *x, *t;
+ blkif_extent_le_t *x, *t;
spin_lock(&blkif->vbd_lock);
@@ -273,51 +274,30 @@ void destroy_all_vbds(blkif_t *blkif)
}
-static int vbd_probe_single(xen_disk_info_t *xdi,
- vbd_t *vbd,
- struct task_struct *p)
+static int vbd_probe_single(blkif_t *blkif, vdisk_t *vbd_info, vbd_t *vbd)
{
- xen_extent_le_t *x;
- xen_disk_t cur_disk;
+ blkif_extent_le_t *x;
- if ( xdi->count == xdi->max )
- {
- DPRINTK("vbd_probe_devices: out of space for probe.\n");
- return -ENOMEM;
- }
-
- cur_disk.device = vbd->vdevice;
- cur_disk.info = vbd->type;
- if ( !VBD_CAN_WRITE(vbd) )
- cur_disk.info |= XD_FLAG_RO;
- cur_disk.capacity = 0ULL;
+ vbd_info->device = vbd->vdevice;
+ vbd_info->info = vbd->type;
+ if ( vbd->readonly )
+ vbd_info->info |= VDISK_FLAG_RO;
+ vbd_info->capacity = 0ULL;
for ( x = vbd->extents; x != NULL; x = x->next )
- cur_disk.capacity += x->extent.nr_sectors;
- cur_disk.domain = p->domain;
+ vbd_info->capacity += x->extent.sector_length;
- /* Now copy into relevant part of user-space buffer */
- if( copy_to_user(&xdi->disks[xdi->count],
- &cur_disk,
- sizeof(xen_disk_t)) )
- {
- DPRINTK("vbd_probe_devices: copy_to_user failed\n");
- return -EFAULT;
- }
-
- xdi->count++;
-
return 0;
}
-static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p)
+int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds)
{
- int rc = 0;
+ int rc = 0, nr_vbds = 0;
rb_node_t *rb;
- spin_lock(&p->vbd_lock);
+ spin_lock(&blkif->vbd_lock);
- if ( (rb = p->vbd_rb.rb_node) == NULL )
+ if ( (rb = blkif->vbd_rb.rb_node) == NULL )
goto out;
new_subtree:
@@ -328,7 +308,10 @@ static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p)
for ( ; ; )
{
/* STEP 2. Dealt with left subtree. Now process current node. */
- if ( (rc = vbd_probe_single(xdi, rb_entry(rb, vbd_t, rb), p)) != 0 )
+ if ( (rc = vbd_probe_single(blkif, &vbd_info[nr_vbds],
+ rb_entry(rb, vbd_t, rb))) != 0 )
+ goto out;
+ if ( ++nr_vbds == max_vbds )
goto out;
/* STEP 3. Process right subtree, if any. */
@@ -355,146 +338,22 @@ static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p)
}
out:
- spin_unlock(&p->vbd_lock);
- return rc;
-}
-
-
-/*
- * Return information about the VBDs available for a given domain, or for all
- * domains; in the general case the 'domain' argument will be 0 which means
- * "information about the caller"; otherwise the 'domain' argument will
- * specify either a given domain, or all domains ("VBD_PROBE_ALL") -- both of
- * these cases require the caller to be privileged.
- */
-long vbd_probe(vbd_probe_t *probe)
-{
- struct task_struct *p = NULL;
- unsigned long flags;
- long ret = 0;
-
- if ( probe->domain != 0 )
- {
- /* We can only probe for ourselves (unless we're privileged). */
- if( (probe->domain != current->domain) && !IS_PRIV(current) )
- return -EPERM;
-
- if ( (probe->domain != VBD_PROBE_ALL) &&
- ((p = find_domain_by_id(probe->domain)) == NULL) )
- {
- DPRINTK("vbd_probe attempted for non-existent domain %llu\n",
- probe->domain);
- return -EINVAL;
- }
- }
- else
- {
- /* Default is to probe for ourselves. */
- p = current;
- get_task_struct(p); /* to mirror final put_task_struct */
- }
-
- if ( probe->domain == VBD_PROBE_ALL )
- {
- read_lock_irqsave(&tasklist_lock, flags);
- for_each_domain ( p )
- {
- if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 )
- {
- read_unlock_irqrestore(&tasklist_lock, flags);
- goto out;
- }
- }
- read_unlock_irqrestore(&tasklist_lock, flags);
- }
- else if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 )
- goto out;
-
- out:
- if ( ret != 0 )
- DPRINTK("vbd_probe: err %ld in probing virtual devices\n", ret);
- if ( p != NULL )
- put_task_struct(p);
- return ret;
-}
-
-
-long vbd_info(vbd_info_t *info)
-{
- struct task_struct *p;
- xen_extent_le_t *x;
- xen_extent_t *extents;
- vbd_t *vbd = NULL;
- rb_node_t *rb;
- long ret = 0;
-
- if ( (info->domain != current->domain) && !IS_PRIV(current) )
- return -EPERM;
-
- if ( (p = find_domain_by_id(info->domain)) == NULL )
- {
- DPRINTK("vbd_info attempted for non-existent domain %llu\n",
- info->domain);
- return -EINVAL;
- }
-
- spin_lock(&p->vbd_lock);
-
- rb = p->vbd_rb.rb_node;
- while ( rb != NULL )
- {
- vbd = rb_entry(rb, vbd_t, rb);
- if ( info->vdevice < vbd->vdevice )
- rb = rb->rb_left;
- else if ( info->vdevice > vbd->vdevice )
- rb = rb->rb_right;
- else
- break;
- }
-
- if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != info->vdevice) )
- {
- DPRINTK("vbd_info attempted on non-existent VBD.\n");
- ret = -EINVAL;
- goto out;
- }
-
- info->mode = vbd->mode;
- info->nextents = 0;
-
- extents = info->extents;
- for ( x = vbd->extents; x != NULL; x = x->next )
- {
- if ( info->nextents == info->maxextents )
- break;
- if ( copy_to_user(extents, &x->extent, sizeof(xen_extent_t)) )
- {
- DPRINTK("vbd_info: copy_to_user failed\n");
- ret = -EFAULT;
- goto out;
- }
- extents++;
- info->nextents++;
- }
-
- out:
- spin_unlock(&p->vbd_lock);
- put_task_struct(p);
- return ret;
+ spin_unlock(&blkif->vbd_lock);
+ return (rc == 0) ? nr_vbds : rc;
}
-int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation)
+int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation)
{
- xen_extent_le_t *x;
- vbd_t *vbd;
- rb_node_t *rb;
- xen_sector_t sec_off;
- unsigned long nr_secs;
+ blkif_extent_le_t *x;
+ vbd_t *vbd;
+ rb_node_t *rb;
+ blkif_sector_t sec_off;
+ unsigned long nr_secs;
- spin_lock(&p->vbd_lock);
+ spin_lock(&blkif->vbd_lock);
- rb = p->vbd_rb.rb_node;
+ rb = blkif->vbd_rb.rb_node;
while ( rb != NULL )
{
vbd = rb_entry(rb, vbd_t, rb);
@@ -507,42 +366,41 @@ int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation)
}
DPRINTK("vbd_translate; domain %llu attempted to access "
- "non-existent VBD.\n", p->domain);
+ "non-existent VBD.\n", blkif->domid);
- spin_unlock(&p->vbd_lock);
+ spin_unlock(&blkif->vbd_lock);
return -ENODEV;
found:
- if ( ((operation == READ) && !VBD_CAN_READ(vbd)) ||
- ((operation == WRITE) && !VBD_CAN_WRITE(vbd)) )
+ if ( (operation == WRITE) && vbd->readonly )
{
- spin_unlock(&p->vbd_lock);
+ spin_unlock(&blkif->vbd_lock);
return -EACCES;
}
/*
- * Now iterate through the list of xen_extents, working out which should
+ * Now iterate through the list of blkif_extents, working out which should
* be used to perform the translation.
*/
sec_off = pseg->sector_number;
nr_secs = pseg->nr_sects;
for ( x = vbd->extents; x != NULL; x = x->next )
{
- if ( sec_off < x->extent.nr_sectors )
+ if ( sec_off < x->extent.sector_length )
{
pseg->dev = x->extent.device;
- pseg->sector_number = x->extent.start_sector + sec_off;
- if ( unlikely((sec_off + nr_secs) > x->extent.nr_sectors) )
+ pseg->sector_number = x->extent.sector_start + sec_off;
+ if ( unlikely((sec_off + nr_secs) > x->extent.sector_length) )
goto overrun;
spin_unlock(&p->vbd_lock);
return 1;
}
- sec_off -= x->extent.nr_sectors;
+ sec_off -= x->extent.sector_length;
}
DPRINTK("vbd_translate: end of vbd.\n");
- spin_unlock(&p->vbd_lock);
+ spin_unlock(&blkif->vbd_lock);
return -EACCES;
/*
@@ -554,7 +412,7 @@ int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation)
overrun:
/* Adjust length of first chunk to run to end of first extent. */
- pseg[0].nr_sects = x->extent.nr_sectors - sec_off;
+ pseg[0].nr_sects = x->extent.sector_length - sec_off;
/* Set second chunk buffer and length to start where first chunk ended. */
pseg[1].buffer = pseg[0].buffer + (pseg[0].nr_sects << 9);
@@ -562,7 +420,7 @@ int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation)
/* Now move to the next extent. Check it exists and is long enough! */
if ( unlikely((x = x->next) == NULL) ||
- unlikely(x->extent.nr_sectors < pseg[1].nr_sects) )
+ unlikely(x->extent.sector_length < pseg[1].nr_sects) )
{
DPRINTK("vbd_translate: multiple overruns or end of vbd.\n");
spin_unlock(&p->vbd_lock);
@@ -571,8 +429,8 @@ int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation)
/* Store the real device and start sector for the second chunk. */
pseg[1].dev = x->extent.device;
- pseg[1].sector_number = x->extent.start_sector;
+ pseg[1].sector_number = x->extent.sector_start;
- spin_unlock(&p->vbd_lock);
+ spin_unlock(&blkif->vbd_lock);
return 2;
}
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h
index f6e8a4d5c8..5db2b48a51 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h
@@ -16,24 +16,27 @@
#define BLKIF_OP_WRITE 1
#define BLKIF_OP_PROBE 2
-/* NB. Ring size must be small enough for sizeof(blk_ring_t) <= PAGE_SIZE. */
+/* NB. Ring size must be small enough for sizeof(blkif_ring_t) <= PAGE_SIZE. */
#define BLKIF_RING_SIZE 64
/*
* Maximum scatter/gather segments per request.
- * This is carefully chosen so that sizeof(blk_ring_t) <= PAGE_SIZE.
+ * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE.
* NB. This could be 12 if the ring indexes weren't stored in the same page.
*/
-#define BLKIF_REQUEST_MAX_SEGMENTS 11
+#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
+
+#define BLKIF_MAX_SECTORS_PER_SEGMENT 16
typedef struct {
unsigned char operation; /* BLKIF_OP_??? */
- unsigned char nr_segments; /* number of segments (<= MAX_BLK_SEGS) */
+ unsigned char nr_segments; /* number of segments */
blkif_vdev_t device; /* only for read/write requests */
unsigned long id; /* private guest value, echoed in resp */
- xen_sector_t sector_number; /* start sector idx on disk (r/w only) */
- /* Least 9 bits is 'nr_sects'. High 23 bits is the address. */
- unsigned long buffer_and_sects[MAX_BLK_SEGS];
+ blkif_sector_t sector_number; /* start sector idx on disk (r/w only) */
+ /* Least 9 bits is 'nr_sects'. High 23 bits is the address. */
+ /* We must have '0 <= nr_sects <= BLKIF_MAX_SECTORS_PER_SEGMENT'. */
+ unsigned long buffer_and_sects[BLKIF_MAX_SEGMENTS_PER_REQUEST];
} blkif_request_t;
typedef struct {
@@ -59,8 +62,8 @@ typedef unsigned int BLKIF_RING_IDX;
#define MASK_BLKIF_IDX(_i) ((_i)&(BLKIF_RING_SIZE-1))
typedef struct {
- BLKIF_RING_IDX req_prod; /* Request producer. Updated by guest OS. */
- BLKIF_RING_IDX resp_prod; /* Response producer. Updated by Xen. */
+ BLKIF_RING_IDX req_prod; /* Request producer. Updated by front-end. */
+ BLKIF_RING_IDX resp_prod; /* Response producer. Updated by back-end. */
union {
blkif_request_t req;
blkif_response_t resp;
@@ -103,7 +106,7 @@ typedef struct {
typedef struct {
blkif_vdev_t device; /* Device number (opaque 16 bit value). */
unsigned short info; /* Device type and flags (VDISK_*). */
- xen_sector_t capacity; /* Size in terms of 512-byte sectors. */
+ blkif_sector_t capacity; /* Size in terms of 512-byte sectors. */
} vdisk_t;
#endif /* __SHARED_BLKIF_H__ */
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile
index 35986ca54a..b0d27cf698 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile
@@ -1,3 +1,3 @@
O_TARGET := drv.o
-obj-y := block.o vbd.o
+obj-y := main.o vbd.o
include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/common.h
index e41e03970e..2d4415bdef 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/common.h
@@ -1,11 +1,11 @@
/******************************************************************************
- * block.h
+ * arch/xen/drivers/blkif/frontend/common.h
*
* Shared definitions between all levels of XenoLinux Virtual block devices.
*/
-#ifndef __XEN_DRIVERS_BLOCK_H__
-#define __XEN_DRIVERS_BLOCK_H__
+#ifndef __XEN_DRIVERS_COMMON_H__
+#define __XEN_DRIVERS_COMMON_H__
#include <linux/config.h>
#include <linux/module.h>
@@ -27,6 +27,8 @@
#include <asm/atomic.h>
#include <asm/uaccess.h>
+#include "../blkif.h"
+
#if 0
#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
#else
@@ -52,14 +54,14 @@ typedef struct xl_disk {
int usage;
} xl_disk_t;
-extern int xen_control_msg(int operration, char *buffer, int size);
-extern int xen_block_open(struct inode *inode, struct file *filep);
-extern int xen_block_release(struct inode *inode, struct file *filep);
-extern int xen_block_ioctl(struct inode *inode, struct file *filep,
+extern int blkif_open(struct inode *inode, struct file *filep);
+extern int blkif_release(struct inode *inode, struct file *filep);
+extern int blkif_ioctl(struct inode *inode, struct file *filep,
unsigned command, unsigned long argument);
-extern int xen_block_check(kdev_t dev);
-extern int xen_block_revalidate(kdev_t dev);
-extern void do_xlblk_request (request_queue_t *rq);
+extern int blkif_check(kdev_t dev);
+extern int blkif_revalidate(kdev_t dev);
+extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
+extern void do_blkif_request (request_queue_t *rq);
extern void xlvbd_update_vbds(void);
@@ -79,4 +81,4 @@ static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev)
extern int xlvbd_init(void);
extern void xlvbd_cleanup(void);
-#endif /* __XEN_DRIVERS_BLOCK_H__ */
+#endif /* __XEN_DRIVERS_COMMON_H__ */
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c
index d00dd98f7b..29cc01d087 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c
@@ -1,5 +1,5 @@
/******************************************************************************
- * block.c
+ * arch/xen/drivers/blkif/frontend/main.c
*
* Xenolinux virtual block-device driver.
*
@@ -7,32 +7,35 @@
* Modifications by Mark A. Williamson are (c) Intel Research Cambridge
*/
-#include "block.h"
+#include "common.h"
#include <linux/blk.h>
#include <linux/cdrom.h>
#include <linux/tqueue.h>
#include <linux/sched.h>
-#include <scsi/scsi.h>
-
#include <linux/interrupt.h>
+#include <scsi/scsi.h>
+#include <asm/ctrl_if.h>
typedef unsigned char byte; /* from linux/ide.h */
-#define STATE_ACTIVE 0
-#define STATE_SUSPENDED 1
-#define STATE_CLOSED 2
-static unsigned int state = STATE_SUSPENDED;
+#define BLKIF_STATE_CLOSED 0
+#define BLKIF_STATE_DISCONNECTED 1
+#define BLKIF_STATE_CONNECTED 2
+static unsigned int blkif_state = BLKIF_STATE_CLOSED;
+static unsigned int blkif_evtchn, blkif_irq;
-/* Dynamically-mapped IRQs. */
-static int xlblk_response_irq, xlblk_update_irq;
+static struct tq_struct blkif_statechange_tq;
-static blk_ring_t *blk_ring;
+static int blkif_control_rsp_valid;
+static blkif_response_t blkif_control_rsp;
+
+static blkif_ring_t *blk_ring;
static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */
static BLK_RING_IDX req_prod; /* Private request producer. */
/* We plug the I/O ring if the driver is suspended or if the ring is full. */
#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \
- (state != STATE_ACTIVE))
+ (blkif_state != BLKIF_STATE_CONNECTED))
/*
@@ -49,39 +52,27 @@ static int sg_operation = -1;
static unsigned long sg_next_sect;
#define DISABLE_SCATTERGATHER() (sg_operation = -1)
-static inline void signal_requests_to_xen(void)
+static inline void flush_requests(void)
{
- block_io_op_t op;
-
DISABLE_SCATTERGATHER();
blk_ring->req_prod = req_prod;
-
- op.cmd = BLOCK_IO_OP_SIGNAL;
- HYPERVISOR_block_io_op(&op);
- return;
+ notify_via_evtchn(blkif_evtchn);
}
/*
- * xlblk_update_int/update-vbds_task - handle VBD update events from Xen
- *
- * Schedule a task for keventd to run, which will update the VBDs and perform
- * the corresponding updates to our view of VBD state, so the XenoLinux will
- * respond to changes / additions / deletions to the set of VBDs automatically.
+ * blkif_update_int/update-vbds_task - handle VBD update events.
+ * Schedule a task for keventd to run, which will update the VBDs and perform
+ * the corresponding updates to our view of VBD state.
*/
static struct tq_struct update_tq;
static void update_vbds_task(void *unused)
{
xlvbd_update_vbds();
}
-static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs)
-{
- update_tq.routine = update_vbds_task;
- schedule_task(&update_tq);
-}
-int xen_block_open(struct inode *inode, struct file *filep)
+int blkif_open(struct inode *inode, struct file *filep)
{
short xldev = inode->i_rdev;
struct gendisk *gd = get_gendisk(xldev);
@@ -122,7 +113,7 @@ int xen_block_open(struct inode *inode, struct file *filep)
}
-int xen_block_release(struct inode *inode, struct file *filep)
+int blkif_release(struct inode *inode, struct file *filep)
{
xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
@@ -132,15 +123,17 @@ int xen_block_release(struct inode *inode, struct file *filep)
*/
if ( --disk->usage == 0 )
{
+#if 0
update_tq.routine = update_vbds_task;
schedule_task(&update_tq);
+#endif
}
return 0;
}
-int xen_block_ioctl(struct inode *inode, struct file *filep,
+int blkif_ioctl(struct inode *inode, struct file *filep,
unsigned command, unsigned long argument)
{
kdev_t dev = inode->i_rdev;
@@ -170,7 +163,7 @@ int xen_block_ioctl(struct inode *inode, struct file *filep,
case BLKRRPART: /* re-read partition table */
DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART);
- return xen_block_revalidate(dev);
+ return blkif_revalidate(dev);
case BLKSSZGET:
return hardsect_size[MAJOR(dev)][MINOR(dev)];
@@ -218,11 +211,11 @@ int xen_block_ioctl(struct inode *inode, struct file *filep,
return 0;
case SCSI_IOCTL_GET_BUS_NUMBER:
- DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in Xen blkdev");
+ DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in XL blkif");
return -ENOSYS;
default:
- printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", command);
+ printk(KERN_ALERT "ioctl %08x not supported by XL blkif\n", command);
return -ENOSYS;
}
@@ -230,13 +223,13 @@ int xen_block_ioctl(struct inode *inode, struct file *filep,
}
/* check media change: should probably do something here in some cases :-) */
-int xen_block_check(kdev_t dev)
+int blkif_check(kdev_t dev)
{
- DPRINTK("xen_block_check\n");
+ DPRINTK("blkif_check\n");
return 0;
}
-int xen_block_revalidate(kdev_t dev)
+int blkif_revalidate(kdev_t dev)
{
struct block_device *bd;
struct gendisk *gd;
@@ -289,25 +282,25 @@ int xen_block_revalidate(kdev_t dev)
/*
- * hypervisor_request
+ * blkif_queue_request
*
* request block io
*
* id: for guest use only.
- * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*}
+ * operation: BLKIF_OP_{READ,WRITE,PROBE}
* buffer: buffer to read/write into. this should be a
* virtual address in the guest os.
*/
-static int hypervisor_request(unsigned long id,
- int operation,
- char * buffer,
- unsigned long sector_number,
- unsigned short nr_sectors,
- kdev_t device)
+static int blkif_queue_request(unsigned long id,
+ int operation,
+ char * buffer,
+ unsigned long sector_number,
+ unsigned short nr_sectors,
+ kdev_t device)
{
- unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer));
- struct gendisk *gd;
- blk_ring_req_entry_t *req;
+ unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer));
+ struct gendisk *gd;
+ blkif_request_t *req;
struct buffer_head *bh;
if ( unlikely(nr_sectors >= (1<<9)) )
@@ -315,26 +308,26 @@ static int hypervisor_request(unsigned long id,
if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
BUG();
- if ( unlikely(state == STATE_CLOSED) )
+ if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
return 1;
switch ( operation )
{
- case XEN_BLOCK_READ:
- case XEN_BLOCK_WRITE:
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
gd = get_gendisk(device);
/*
* Update the sector_number we'll pass down as appropriate; note that
* we could sanity check that resulting sector will be in this
- * partition, but this will happen in xen anyhow.
+ * partition, but this will happen in driver backend anyhow.
*/
sector_number += gd->part[MINOR(device)].start_sect;
/*
- * If this unit doesn't consist of virtual (i.e., Xen-specified)
- * partitions then we clear the partn bits from the device number.
+ * If this unit doesn't consist of virtual partitions then we clear
+ * the partn bits from the device number.
*/
if ( !(gd->flags[MINOR(device)>>gd->minor_shift] &
GENHD_FL_VIRT_PARTNS) )
@@ -375,7 +368,7 @@ static int hypervisor_request(unsigned long id,
req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req;
req->id = id;
req->operation = operation;
- req->sector_number = (xen_sector_t)sector_number;
+ req->sector_number = (blkif_sector_t)sector_number;
req->device = device;
req->nr_segments = 1;
req->buffer_and_sects[0] = buffer_ma | nr_sectors;
@@ -386,23 +379,23 @@ static int hypervisor_request(unsigned long id,
/*
- * do_xlblk_request
+ * do_blkif_request
* read a block; request is in a request queue
*/
-void do_xlblk_request(request_queue_t *rq)
+void do_blkif_request(request_queue_t *rq)
{
struct request *req;
struct buffer_head *bh, *next_bh;
int rw, nsect, full, queued = 0;
- DPRINTK("xlblk.c::do_xlblk_request\n");
+ DPRINTK("Entered do_blkif_request\n");
while ( !rq->plugged && !list_empty(&rq->queue_head))
{
if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL )
goto out;
- DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
+ DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
req, req->cmd, req->sector,
req->current_nr_sectors, req->nr_sectors, req->bh);
@@ -420,9 +413,9 @@ void do_xlblk_request(request_queue_t *rq)
next_bh = bh->b_reqnext;
bh->b_reqnext = NULL;
- full = hypervisor_request(
+ full = blkif_queue_request(
(unsigned long)bh,
- (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE,
+ (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE,
bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
if ( full )
@@ -462,7 +455,8 @@ void do_xlblk_request(request_queue_t *rq)
}
out:
- if ( queued != 0 ) signal_requests_to_xen();
+ if ( queued != 0 )
+ flush_requests();
}
@@ -474,30 +468,30 @@ static void kick_pending_request_queues(void)
{
/* Attempt to drain the queue, but bail if the ring becomes full. */
while ( (nr_pending != 0) && !RING_PLUGGED )
- do_xlblk_request(pending_queues[--nr_pending]);
+ do_blkif_request(pending_queues[--nr_pending]);
}
}
-static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
+static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
{
BLK_RING_IDX i;
unsigned long flags;
struct buffer_head *bh, *next_bh;
- if ( unlikely(state == STATE_CLOSED) )
+ if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) )
return;
spin_lock_irqsave(&io_request_lock, flags);
for ( i = resp_cons; i != blk_ring->resp_prod; i++ )
{
- blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp;
+ blkif_response_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp;
switch ( bret->operation )
{
- case XEN_BLOCK_READ:
- case XEN_BLOCK_WRITE:
- if ( unlikely(bret->status != 0) )
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ if ( unlikely(bret->status != BLKIF_RSP_OKAY) )
DPRINTK("Bad return from blkdev data request: %lx\n",
bret->status);
for ( bh = (struct buffer_head *)bret->id;
@@ -506,10 +500,13 @@ static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
{
next_bh = bh->b_reqnext;
bh->b_reqnext = NULL;
- bh->b_end_io(bh, !bret->status);
+ bh->b_end_io(bh, bret->status == BLKIF_RSP_OKAY);
}
break;
-
+ case BLKIF_OP_PROBE:
+ memcpy(&blkif_control_rsp, bret, sizeof(*bret));
+ blkif_control_rsp_valid = 1;
+ break;
default:
BUG();
}
@@ -523,70 +520,190 @@ static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
}
-static void reset_xlblk_interface(void)
+void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
{
- block_io_op_t op;
+ unsigned long flags;
- nr_pending = 0;
+ retry:
+ while ( (req_prod - resp_cons) == BLK_RING_SIZE )
+ {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(1);
+ }
- op.cmd = BLOCK_IO_OP_RESET;
- if ( HYPERVISOR_block_io_op(&op) != 0 )
- printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n");
+ spin_lock_irqsave(&io_request_lock, flags);
+ if ( (req_prod - resp_cons) == BLK_RING_SIZE )
+ {
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ goto retry;
+ }
- op.cmd = BLOCK_IO_OP_RING_ADDRESS;
- (void)HYPERVISOR_block_io_op(&op);
+ DISABLE_SCATTERGATHER();
+ memcpy(&blk_ring->ring[MASK_BLK_IDX(req_prod)].req, req, sizeof(*req));
+ req_prod++;
+ flush_requests();
- set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT);
- blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE);
- blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
+ spin_unlock_irqrestore(&io_request_lock, flags);
- wmb();
- state = STATE_ACTIVE;
+ while ( !blkif_control_rsp_valid )
+ {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(1);
+ }
+
+ memcpy(rsp, &blkif_control_rsp, sizeof(*rsp));
+ blkif_control_rsp_valid = 0;
}
-int __init xlblk_init(void)
+static void blkif_bringup_phase1(void *unused)
{
- int error;
+ ctrl_msg_t cmsg;
+ blkif_fe_interface_connect_t up;
+
+ /* Move from CLOSED to DISCONNECTED state. */
+ blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
+ blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
+ blkif_state = BLKIF_STATE_DISCONNECTED;
+
+ /* Construct an interface-CONNECT message for the domain controller. */
+ cmsg.type = CMSG_BLKIF_FE;
+ cmsg.subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT;
+ cmsg.length = sizeof(blkif_fe_interface_connect_t);
+ up.handle = 0;
+ up.shmem_frame = virt_to_machine(blk_ring) >> PAGE_SHIFT;
+ memcpy(cmsg.msg, &up, sizeof(up));
+
+ /* Tell the controller to bring up the interface. */
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
+
+static void blkif_bringup_phase2(void *unused)
+{
+ blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
+ (void)request_irq(blkif_irq, blkif_int, 0, "blkif", NULL);
+
+ /* Probe for discs that are attached to the interface. */
+ xlvbd_init();
- reset_xlblk_interface();
+ blkif_state = BLKIF_STATE_CONNECTED;
- xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV);
- xlblk_update_irq = bind_virq_to_irq(VIRQ_VBD_UPD);
+ /* Kick pending requests. */
+ spin_lock_irq(&io_request_lock);
+ kick_pending_request_queues();
+ spin_unlock_irq(&io_request_lock);
+}
- error = request_irq(xlblk_response_irq, xlblk_response_int,
- SA_SAMPLE_RANDOM, "blkdev", NULL);
- if ( error )
+static void blkif_status_change(blkif_fe_interface_status_changed_t *status)
+{
+ if ( status->handle != 0 )
{
- printk(KERN_ALERT "Could not allocate receive interrupt\n");
- goto fail;
+ printk(KERN_WARNING "Status change on unsupported blkif %d\n",
+ status->handle);
+ return;
+ }
+
+ switch ( status->status )
+ {
+ case BLKIF_INTERFACE_STATUS_DESTROYED:
+ printk(KERN_WARNING "Unexpected blkif-DESTROYED message in state %d\n",
+ blkif_state);
+ break;
+
+ case BLKIF_INTERFACE_STATUS_DISCONNECTED:
+ if ( blkif_state != BLKIF_STATE_CLOSED )
+ {
+ printk(KERN_WARNING "Unexpected blkif-DISCONNECTED message"
+ " in state %d\n", blkif_state);
+ break;
+ }
+ blkif_statechange_tq.routine = blkif_bringup_phase1;
+ schedule_task(&blkif_statechange_tq);
+ break;
+
+ case BLKIF_INTERFACE_STATUS_CONNECTED:
+ if ( blkif_state == BLKIF_STATE_CLOSED )
+ {
+ printk(KERN_WARNING "Unexpected blkif-CONNECTED message"
+ " in state %d\n", blkif_state);
+ break;
+ }
+ blkif_evtchn = status->evtchn;
+ blkif_statechange_tq.routine = blkif_bringup_phase2;
+ schedule_task(&blkif_statechange_tq);
+ break;
+
+ default:
+ printk(KERN_WARNING "Status change to unknown value %d\n",
+ status->status);
+ break;
}
+}
- error = request_irq(xlblk_update_irq, xlblk_update_int,
- 0, "blkdev", NULL);
- if ( error )
+static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+ switch ( msg->subtype )
{
- printk(KERN_ALERT "Could not allocate block update interrupt\n");
- goto fail;
+ case CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED:
+ if ( msg->length != sizeof(blkif_fe_interface_status_changed_t) )
+ goto parse_error;
+ blkif_status_change((blkif_fe_interface_status_changed_t *)
+ &msg->msg[0]);
+ break;
+#if 0
+ case CMSG_BLKIF_FE_VBD_STATUS_CHANGED:
+ update_tq.routine = update_vbds_task;
+ schedule_task(&update_tq);
+ break;
+#endif
+ default:
+ goto parse_error;
}
- (void)xlvbd_init();
+ ctrl_if_send_response(msg);
+ return;
- return 0;
+ parse_error:
+ msg->length = 0;
+ ctrl_if_send_response(msg);
+}
- fail:
- return error;
+
+int __init xlblk_init(void)
+{
+ ctrl_msg_t cmsg;
+ blkif_fe_driver_status_changed_t st;
+
+ (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx);
+
+ /* Send a driver-UP notification to the domain controller. */
+ cmsg.type = CMSG_BLKIF_FE;
+ cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED;
+ cmsg.length = sizeof(blkif_fe_driver_status_changed_t);
+ st.status = BLKIF_DRIVER_STATUS_UP;
+ memcpy(cmsg.msg, &st, sizeof(st));
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+
+ /*
+ * We should read 'nr_interfaces' from response message and wait
+ * for notifications before proceeding. For now we assume that we
+ * will be notified of exactly one interface.
+ */
+ while ( blkif_state != BLKIF_STATE_CONNECTED )
+ {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(1);
+ }
+
+ return 0;
}
static void __exit xlblk_cleanup(void)
{
- xlvbd_cleanup();
- free_irq(xlblk_response_irq, NULL);
- free_irq(xlblk_update_irq, NULL);
- unbind_virq_from_irq(VIRQ_BLKDEV);
- unbind_virq_from_irq(VIRQ_VBD_UPD);
+ /* XXX FIXME */
+ BUG();
}
@@ -598,28 +715,13 @@ module_exit(xlblk_cleanup);
void blkdev_suspend(void)
{
- state = STATE_SUSPENDED;
- wmb();
-
- while ( resp_cons != blk_ring->req_prod )
- {
- barrier();
- current->state = TASK_INTERRUPTIBLE;
- schedule_timeout(1);
- }
-
- wmb();
- state = STATE_CLOSED;
- wmb();
-
- clear_fixmap(FIX_BLKRING_BASE);
+ /* XXX FIXME */
+ BUG();
}
void blkdev_resume(void)
{
- reset_xlblk_interface();
- spin_lock_irq(&io_request_lock);
- kick_pending_request_queues();
- spin_unlock_irq(&io_request_lock);
+ /* XXX FIXME */
+ BUG();
}
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c
index e08b976c56..b26907192a 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c
@@ -1,13 +1,13 @@
/******************************************************************************
- * vbd.c
+ * arch/xen/drivers/blkif/frontend/vbd.c
*
- * Xenolinux virtual block-device driver (xvd).
+ * Xenolinux virtual block-device driver.
*
* Copyright (c) 2003-2004, Keir Fraser & Steve Hand
* Modifications by Mark A. Williamson are (c) Intel Research Cambridge
*/
-#include "block.h"
+#include "common.h"
#include <linux/blk.h>
/*
@@ -43,54 +43,59 @@ static int xlvbd_blksize_size[256];
static int xlvbd_hardsect_size[256];
static int xlvbd_max_sectors[256];
-/* Information from Xen about our VBDs. */
+/* Information about our VBDs. */
#define MAX_VBDS 64
static int nr_vbds;
-static xen_disk_t *vbd_info;
+static vdisk_t *vbd_info;
static struct block_device_operations xlvbd_block_fops =
{
- open: xen_block_open,
- release: xen_block_release,
- ioctl: xen_block_ioctl,
- check_media_change: xen_block_check,
- revalidate: xen_block_revalidate,
+ open: blkif_open,
+ release: blkif_release,
+ ioctl: blkif_ioctl,
+ check_media_change: blkif_check,
+ revalidate: blkif_revalidate,
};
-static int xlvbd_get_vbd_info(xen_disk_t *disk_info)
+static int xlvbd_get_vbd_info(vdisk_t *disk_info)
{
- int error;
- block_io_op_t op;
-
- /* Probe for disk information. */
- memset(&op, 0, sizeof(op));
- op.cmd = BLOCK_IO_OP_VBD_PROBE;
- op.u.probe_params.domain = 0;
- op.u.probe_params.xdi.max = MAX_VBDS;
- op.u.probe_params.xdi.disks = disk_info;
- op.u.probe_params.xdi.count = 0;
-
- if ( (error = HYPERVISOR_block_io_op(&op)) != 0 )
+ vdisk_t *buf = (vdisk_t *)__get_free_page(GFP_KERNEL);
+ blkif_request_t req;
+ blkif_response_t rsp;
+ int nr;
+
+ memset(&req, 0, sizeof(req));
+ req.operation = BLKIF_OP_PROBE;
+ req.nr_segments = 1;
+ req.buffer_and_sects[0] = virt_to_machine(buf) | (PAGE_SIZE/512);
+
+ blkif_control_send(&req, &rsp);
+
+ if ( rsp.status <= 0 )
{
- printk(KERN_ALERT "Could not probe disks (%d)\n", error);
+ printk(KERN_ALERT "Could not probe disks (%d)\n", rsp.status);
return -1;
}
- return op.u.probe_params.xdi.count;
+ if ( (nr = rsp.status) > MAX_VBDS )
+ nr = MAX_VBDS;
+ memcpy(disk_info, buf, nr * sizeof(vdisk_t));
+
+ return nr;
}
/*
* xlvbd_init_device - initialise a VBD device
- * @disk: a xen_disk_t describing the VBD
+ * @disk: a vdisk_t describing the VBD
*
- * Takes a xen_disk_t * that describes a VBD the domain has access to.
+ * Takes a vdisk_t * that describes a VBD the domain has access to.
* Performs appropriate initialisation and registration of the device.
*
* Care needs to be taken when making re-entrant calls to ensure that
* corruption does not occur. Also, devices that are in use should not have
* their details updated. This is the caller's responsibility.
*/
-static int xlvbd_init_device(xen_disk_t *xd)
+static int xlvbd_init_device(vdisk_t *xd)
{
int device = xd->device;
int major = MAJOR(device);
@@ -181,11 +186,11 @@ static int xlvbd_init_device(xen_disk_t *xd)
read_ahead[major] = 8;
}
- blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request);
+ blk_init_queue(BLK_DEFAULT_QUEUE(major), do_blkif_request);
/*
* Turn off barking 'headactive' mode. We dequeue buffer heads as
- * soon as we pass them down to Xen.
+ * soon as we pass them to the back-end driver.
*/
blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0);
@@ -431,12 +436,12 @@ static int xlvbd_remove_device(int device)
void xlvbd_update_vbds(void)
{
int i, j, k, old_nr, new_nr;
- xen_disk_t *old_info, *new_info, *merged_info;
+ vdisk_t *old_info, *new_info, *merged_info;
old_info = vbd_info;
old_nr = nr_vbds;
- new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
+ new_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
{
kfree(new_info);
@@ -448,7 +453,7 @@ void xlvbd_update_vbds(void)
* old list and new list do not overlap at all, and we cannot yet destroy
* VBDs in the old list because the usage counts are busy.
*/
- merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL);
+ merged_info = kmalloc((old_nr + new_nr) * sizeof(vdisk_t), GFP_KERNEL);
/* @i tracks old list; @j tracks new list; @k tracks merged list. */
i = j = k = 0;
@@ -458,13 +463,13 @@ void xlvbd_update_vbds(void)
if ( old_info[i].device < new_info[j].device )
{
if ( xlvbd_remove_device(old_info[i].device) != 0 )
- memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
+ memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
i++;
}
else if ( old_info[i].device > new_info[j].device )
{
if ( xlvbd_init_device(&new_info[j]) == 0 )
- memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
+ memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
j++;
}
else
@@ -472,9 +477,9 @@ void xlvbd_update_vbds(void)
if ( ((old_info[i].capacity == new_info[j].capacity) &&
(old_info[i].info == new_info[j].info)) ||
(xlvbd_remove_device(old_info[i].device) != 0) )
- memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
+ memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
else if ( xlvbd_init_device(&new_info[j]) == 0 )
- memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
+ memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
i++; j++;
}
}
@@ -482,13 +487,13 @@ void xlvbd_update_vbds(void)
for ( ; i < old_nr; i++ )
{
if ( xlvbd_remove_device(old_info[i].device) != 0 )
- memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
+ memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
}
for ( ; j < new_nr; j++ )
{
if ( xlvbd_init_device(&new_info[j]) == 0 )
- memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
+ memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
}
vbd_info = merged_info;
@@ -500,14 +505,14 @@ void xlvbd_update_vbds(void)
/*
- * Set up all the linux device goop for the virtual block devices (vbd's) that
- * xen tells us about. Note that although from xen's pov VBDs are addressed
- * simply an opaque 16-bit device number, the domain creation tools
+ * Set up all the linux device goop for the virtual block devices (vbd's) that
+ * we know about. Note that although from the backend driver's p.o.v. VBDs are
+ * addressed simply an opaque 16-bit device number, the domain creation tools
* conventionally allocate these numbers to correspond to those used by 'real'
* linux -- this is just for convenience as it means e.g. that the same
- * /etc/fstab can be used when booting with or without xen.
+ * /etc/fstab can be used when booting with or without Xen.
*/
-int __init xlvbd_init(void)
+int xlvbd_init(void)
{
int i;
@@ -537,7 +542,7 @@ int __init xlvbd_init(void)
xlvbd_max_sectors[i] = 128;
}
- vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
+ vbd_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
nr_vbds = xlvbd_get_vbd_info(vbd_info);
if ( nr_vbds < 0 )
@@ -554,8 +559,3 @@ int __init xlvbd_init(void)
return 0;
}
-
-
-#ifdef MODULE
-module_init(xlvbd_init);
-#endif
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c
index 4e507081be..98eff63453 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c
@@ -36,7 +36,7 @@ static struct proc_dir_entry *privcmd_intf;
static int privcmd_ioctl(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long data)
{
- int ret = 0;
+ int ret = -ENOSYS;
switch ( cmd )
{
@@ -108,6 +108,13 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
}
break;
+ case IOCTL_PRIVCMD_INITDOMAIN_EVTCHN:
+ {
+ extern int initdom_ctrlif_domcontroller_port;
+ ret = initdom_ctrlif_domcontroller_port;
+ }
+ break;
+
default:
ret = -EINVAL;
break;
@@ -140,7 +147,7 @@ static int __init init_module(void)
{
privcmd_intf->owner = THIS_MODULE;
privcmd_intf->nlink = 1;
- privcmd_intf->proc_fops = &privcmd_file_ops;
+ privcmd_intf->proc_fops = &privcmd_file_ops;
}
return 0;
diff --git a/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c b/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c
index 7d59ad2e16..715f707eb0 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c
@@ -17,6 +17,13 @@
#include <asm/ctrl_if.h>
#include <asm/evtchn.h>
+/*
+ * Only used by initial domain which must create its own control-interface
+ * event channel. This value is picked up by the user-space domain controller
+ * via an ioctl.
+ */
+int initdom_ctrlif_domcontroller_port = -1;
+
static int ctrl_if_evtchn;
static int ctrl_if_irq;
static spinlock_t ctrl_if_lock;
@@ -276,9 +283,6 @@ void ctrl_if_unregister_receiver(u8 type, ctrl_msg_handler_t hnd)
void ctrl_if_suspend(void)
{
- if ( start_info.flags & SIF_INITDOMAIN )
- return;
-
free_irq(ctrl_if_irq, NULL);
unbind_evtchn_from_irq(ctrl_if_evtchn);
}
@@ -286,7 +290,21 @@ void ctrl_if_suspend(void)
void ctrl_if_resume(void)
{
if ( start_info.flags & SIF_INITDOMAIN )
- return;
+ {
+ /*
+ * The initial domain must create its own domain-controller link.
+ * The controller is probably not running at this point, but will
+ * pick up its end of the event channel from
+ */
+ evtchn_op_t op;
+ op.cmd = EVTCHNOP_bind_interdomain;
+ op.u.bind_interdomain.dom1 = DOMID_SELF;
+ op.u.bind_interdomain.dom2 = DOMID_SELF;
+ if ( HYPERVISOR_event_channel_op(&op) != 0 )
+ BUG();
+ start_info.domain_controller_evtchn = op.u.bind_interdomain.port1;
+ initdom_ctrlif_domcontroller_port = op.u.bind_interdomain.port2;
+ }
ctrl_if_tx_resp_cons = 0;
ctrl_if_rx_req_cons = 0;
diff --git a/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c b/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c
index 7b4d1ff0a9..773a1f83c3 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c
@@ -5,7 +5,7 @@
*
* (C) Copyright 1995 1996 Linus Torvalds
*
- * Modifications for Xenolinux (c) 2003 Keir Fraser
+ * Modifications for Xenolinux (c) 2003-2004 Keir Fraser
*/
#include <linux/slab.h>
@@ -28,21 +28,26 @@
__direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
static inline int direct_remap_area_pte(pte_t *pte,
- unsigned long address,
- unsigned long size,
- unsigned long machine_addr,
- pgprot_t prot,
- domid_t domid)
+ unsigned long address,
+ unsigned long size,
+ unsigned long machine_addr,
+ pgprot_t prot,
+ domid_t domid)
{
unsigned long end;
+#define MAX_DIRECTMAP_MMU_QUEUE 64
+ mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v;
- mmu_update_t *u, *v;
- u = v = vmalloc(3*PAGE_SIZE); /* plenty */
-
- if (!u)
- return -ENOMEM;
+ address &= ~PMD_MASK;
+ end = address + size;
+ if (end > PMD_SIZE)
+ end = PMD_SIZE;
+ if (address >= end)
+ BUG();
+ reset_buffer:
/* If not I/O mapping then specify General-Purpose Subject Domain (GPS). */
+ v = &u[0];
if ( domid != 0 )
{
v[0].val = (unsigned long)(domid<<16) & ~0xFFFFUL;
@@ -56,12 +61,6 @@ static inline int direct_remap_area_pte(pte_t *pte,
v += 2;
}
- address &= ~PMD_MASK;
- end = address + size;
- if (end > PMD_SIZE)
- end = PMD_SIZE;
- if (address >= end)
- BUG();
do {
#if 0 /* thanks to new ioctl mmaping interface this is no longer a bug */
if (!pte_none(*pte)) {
@@ -71,7 +70,12 @@ static inline int direct_remap_area_pte(pte_t *pte,
#endif
v->ptr = virt_to_machine(pte);
v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot) | _PAGE_IO;
- v++;
+ if ( ++v == MAX_DIRECTMAP_MMU_QUEUE )
+ {
+ if ( HYPERVISOR_mmu_update(u, MAX_DIRECTMAP_MMU_QUEUE) < 0 )
+ return -EFAULT;
+ goto reset_buffer;
+ }
address += PAGE_SIZE;
machine_addr += PAGE_SIZE;
pte++;
@@ -84,7 +88,6 @@ static inline int direct_remap_area_pte(pte_t *pte,
return -EINVAL;
}
- vfree(u);
return 0;
}
@@ -96,8 +99,8 @@ static inline int direct_remap_area_pmd(struct mm_struct *mm,
pgprot_t prot,
domid_t domid)
{
+ int error = 0;
unsigned long end;
- int rc;
address &= ~PGDIR_MASK;
end = address + size;
@@ -111,14 +114,14 @@ static inline int direct_remap_area_pmd(struct mm_struct *mm,
if (!pte)
return -ENOMEM;
- if ( rc = direct_remap_area_pte(pte, address, end - address,
- address + machine_addr, prot, domid) )
- return rc;
-
+ error = direct_remap_area_pte(pte, address, end - address,
+ address + machine_addr, prot, domid);
+ if ( error )
+ break;
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address && (address < end));
- return 0;
+ return error;
}
int direct_remap_area_pages(struct mm_struct *mm,
diff --git a/xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h b/xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h
index f1d2b77c2e..a02e2471ea 100644
--- a/xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h
+++ b/xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h
@@ -52,7 +52,7 @@ int ctrl_if_send_message_noblock(
* function returns.
* 2. If @hnd is NULL then no callback is executed.
*/
-int ctrl_if_send_message(
+int ctrl_if_send_message_block(
ctrl_msg_t *msg,
ctrl_msg_handler_t hnd,
unsigned long id,
diff --git a/xenolinux-2.4.26-sparse/include/asm-xen/io.h b/xenolinux-2.4.26-sparse/include/asm-xen/io.h
index 3d78e20950..f5243bb6a7 100644
--- a/xenolinux-2.4.26-sparse/include/asm-xen/io.h
+++ b/xenolinux-2.4.26-sparse/include/asm-xen/io.h
@@ -159,13 +159,47 @@ extern void iounmap(void *addr);
extern void *bt_ioremap(unsigned long offset, unsigned long size);
extern void bt_iounmap(void *addr, unsigned long size);
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+
+#ifdef CONFIG_HIGHMEM
+#error "Highmem is not yet compatible with physical device access"
+#endif
+
/*
- * IO bus memory addresses are also 1:1 with the physical address
+ * The bus translation macros need special care if we are executing device
+ * accesses to/from other domains' memory. In these cases the virtual address
+ * is actually a temporary mapping in the 'vmalloc' space. The physical
+ * address will therefore be >max_low_pfn, and will not have a valid entry
+ * in the phys_to_mach mapping table.
*/
+static inline unsigned long phys_to_bus(unsigned long phys)
+{
+ extern unsigned long max_pfn;
+ pgd_t *pgd; pmd_t *pmd; pte_t *pte;
+ void *addr;
+ unsigned long bus;
+ if ( (phys >> PAGE_SHIFT) < max_pfn )
+ return phys_to_machine(phys);
+ addr = phys_to_virt(phys);
+ pgd = pgd_offset_k( (unsigned long)addr);
+ pmd = pmd_offset(pgd, (unsigned long)addr);
+ pte = pte_offset(pmd, (unsigned long)addr);
+ bus = (pte->pte_low & PAGE_MASK) | (phys & ~PAGE_MASK);
+ return bus;
+}
+
+#define virt_to_bus(_x) phys_to_bus(virt_to_phys(_x))
+#define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x))
+#define page_to_bus(_x) phys_to_bus(page_to_phys(_x))
+
+#else
+
#define virt_to_bus(_x) phys_to_machine(virt_to_phys(_x))
#define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x))
#define page_to_bus(_x) phys_to_machine(page_to_phys(_x))
+#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
+
/*
* readX/writeX() are used to access memory mapped devices. On some
* architectures the memory mapped IO stuff needs to be accessed
diff --git a/xenolinux-2.4.26-sparse/include/asm-xen/pgtable-2level.h b/xenolinux-2.4.26-sparse/include/asm-xen/pgtable-2level.h
index c780f644c0..162ba1fbed 100644
--- a/xenolinux-2.4.26-sparse/include/asm-xen/pgtable-2level.h
+++ b/xenolinux-2.4.26-sparse/include/asm-xen/pgtable-2level.h
@@ -47,6 +47,11 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
return (pmd_t *) dir;
}
+#define pte_same(a, b) ((a).pte_low == (b).pte_low)
+#define pte_page(x) (mem_map+((unsigned long)((pte_val(x) >> PAGE_SHIFT))))
+#define pte_none(x) (!(x).pte_low)
+#define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
+
/*
* A note on implementation of this atomic 'get-and-clear' operation.
* This is actually very simple because XenoLinux can only run on a single
@@ -59,13 +64,9 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
static inline pte_t ptep_get_and_clear(pte_t *xp)
{
pte_t pte = *xp;
- queue_l1_entry_update(xp, 0);
+ if ( !pte_none(pte) )
+ queue_l1_entry_update(xp, 0);
return pte;
}
-#define pte_same(a, b) ((a).pte_low == (b).pte_low)
-#define pte_page(x) (mem_map+((unsigned long)((pte_val(x) >> PAGE_SHIFT))))
-#define pte_none(x) (!(x).pte_low)
-#define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
-
#endif /* _I386_PGTABLE_2LEVEL_H */
diff --git a/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h b/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h
index 30cec9aff5..3bf03c6064 100644
--- a/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h
+++ b/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h
@@ -32,10 +32,22 @@ typedef struct privcmd_blkmsg
int buf_size;
} privcmd_blkmsg_t;
-#define IOCTL_PRIVCMD_HYPERCALL \
+/*
+ * @cmd: IOCTL_PRIVCMD_HYPERCALL
+ * @arg: &privcmd_hypercall_t
+ * Return: Value returned from execution of the specified hypercall.
+ */
+#define IOCTL_PRIVCMD_HYPERCALL \
_IOC(_IOC_NONE, 'P', 0, sizeof(privcmd_hypercall_t))
-#define IOCTL_PRIVCMD_BLKMSG \
- _IOC(_IOC_NONE, 'P', 1, sizeof(privcmd_blkmsg_t))
+
+/*
+ * @cmd: IOCTL_PRIVCMD_INITDOMAIN_EVTCHN
+ * @arg: n/a
+ * Return: Port associated with domain-controller end of control event channel
+ * for the initial domain.
+ */
+#define IOCTL_PRIVCMD_INITDOMAIN_EVTCHN \
+ _IOC(_IOC_NONE, 'P', 1, 0)
#define IOCTL_PRIVCMD_MMAP \
_IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmap_t))
diff --git a/xenolinux-2.4.26-sparse/mm/vmalloc.c b/xenolinux-2.4.26-sparse/mm/vmalloc.c
index 4d583b54a7..b030270b42 100644
--- a/xenolinux-2.4.26-sparse/mm/vmalloc.c
+++ b/xenolinux-2.4.26-sparse/mm/vmalloc.c
@@ -45,6 +45,10 @@ static inline void free_area_pte(pmd_t * pmd, unsigned long address, unsigned lo
continue;
if (pte_present(page)) {
struct page *ptpage = pte_page(page);
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
+ if (pte_io(page))
+ continue;
+#endif
if (VALID_PAGE(ptpage) && (!PageReserved(ptpage)))
__free_page(ptpage);
continue;
@@ -250,11 +254,6 @@ void __vfree(void * addr, int free_area_pages)
for (p = &vmlist ; (tmp = *p) ; p = &tmp->next) {
if (tmp->addr == addr) {
*p = tmp->next;
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
- if (tmp->flags & VM_IOREMAP)
- zap_page_range(&init_mm, VMALLOC_VMADDR(tmp->addr), tmp->size);
- else
-#endif
if (free_area_pages)
vmfree_area_pages(VMALLOC_VMADDR(tmp->addr), tmp->size);
write_unlock(&vmlist_lock);