aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>2004-04-30 15:41:03 +0000
committerkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>2004-04-30 15:41:03 +0000
commitaa4ae4af7a3b8b0f98aa989f1775d8847f6e245a (patch)
tree21f0bb6d9feee156adf220506c9e88eb1f4350c0
parenta93317142fe7f3eeb624ec322a2dcf8e18c24b0e (diff)
downloadxen-aa4ae4af7a3b8b0f98aa989f1775d8847f6e245a.tar.gz
xen-aa4ae4af7a3b8b0f98aa989f1775d8847f6e245a.tar.bz2
xen-aa4ae4af7a3b8b0f98aa989f1775d8847f6e245a.zip
bitkeeper revision 1.879.2.1 (4092738fCfvp-pu-UzwhXsHdzHbYPw)
Completed first cut of new blkdev i/o world.
-rw-r--r--.rootkeys1
-rwxr-xr-xtools/examples/xc_dom_create.py121
-rw-r--r--tools/xend/lib/blkif.py143
-rw-r--r--tools/xend/lib/console.py83
-rw-r--r--tools/xend/lib/domain_controller.h124
-rwxr-xr-xtools/xend/lib/main.py179
-rw-r--r--tools/xend/lib/manager.py89
-rw-r--r--xen/common/event_channel.c5
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h12
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c15
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c235
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c60
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c12
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c55
-rw-r--r--xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c7
-rw-r--r--xenolinux-2.4.26-sparse/include/asm-xen/io.h36
16 files changed, 847 insertions, 330 deletions
diff --git a/.rootkeys b/.rootkeys
index f391d811f6..f73ce770b4 100644
--- a/.rootkeys
+++ b/.rootkeys
@@ -102,6 +102,7 @@
4055ee44Bu6oP7U0WxxXypbUt4dNPQ tools/xenctl/setup.py
40431ac64Hj4ixUnKmlugZKhXPFE_Q tools/xend/Makefile
4055ad95Se-FqttgxollqOAAHB94zA tools/xend/lib/__init__.py
+4092738fMRGC9fFBcPRCWaJaj9U3ag tools/xend/lib/blkif.py
4055ad97wMLUj0BZT0e_T0EwQN0Bvw tools/xend/lib/console.py
4048c0ddsF0WrU7HUzTvg1MJoCIfWg tools/xend/lib/domain_controller.h
4054a301VEag2GwrBrFBna5U1BGlLA tools/xend/lib/main.py
diff --git a/tools/examples/xc_dom_create.py b/tools/examples/xc_dom_create.py
index 799319c6a6..bb9a0576d9 100755
--- a/tools/examples/xc_dom_create.py
+++ b/tools/examples/xc_dom_create.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python
import string, sys, os, time, socket, getopt, signal, syslog
-import Xc, xenctl.utils, xenctl.console_client
+import Xc, xenctl.utils, xenctl.console_client, re
config_dir = '/etc/xc/'
config_file = xc_config_file = config_dir + 'defaults'
@@ -195,6 +195,15 @@ output('VM cmdline : "%s"' % cmdline)
if dryrun:
sys.exit(1)
+##### HACK HACK HACK
+##### Until everyone moves to the new I/O world, and a more robust domain
+##### controller (xend), we use this little trick to discover whether we
+##### are in a testing environment for new I/O stuff.
+new_io_world = True
+for line in os.popen('cat /proc/interrupts').readlines():
+ if re.search('blkdev', line):
+ new_io_world = False
+
##### Code beyond this point is actually used to manage the mechanics of
##### starting (and watching if necessary) guest virtual machines.
@@ -228,14 +237,14 @@ def make_domain():
cmsg = 'new_control_interface(dom='+str(id)+', console_port='+str(console_port)+')'
- xend_response = xenctl.utils.xend_control_message(cmsg)
+ cons_response = xenctl.utils.xend_control_message(cmsg)
- if not xend_response['success']:
+ if not cons_response['success']:
print "Error creating initial event channel"
- print "Error type: " + xend_response['error_type']
- if xend_response['error_type'] == 'exception':
- print "Exception type: " + xend_response['exception_type']
- print "Exception value: " + xend_response['exception_value']
+ print "Error type: " + cons_response['error_type']
+ if cons_response['error_type'] == 'exception':
+ print "Exception type: " + cons_response['exception_type']
+ print "Exception value: " + cons_response['exception_value']
xc.domain_destroy ( dom=id )
sys.exit()
@@ -248,7 +257,7 @@ def make_domain():
sys.exit()
else:
- ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline, control_evtchn=xend_response["remote_port"] )' % builder_fn)
+ ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline, control_evtchn=cons_response["remote_port"] )' % builder_fn)
if ret < 0:
print "Error building Linux guest OS: "
print "Return code = " + str(ret)
@@ -259,6 +268,18 @@ def make_domain():
# set the expertise level appropriately
xenctl.utils.VBD_EXPERT_MODE = vbd_expert
+
+ if new_io_world:
+ cmsg = 'new_block_interface(dom='+str(id)+')'
+ xend_response = xenctl.utils.xend_control_message(cmsg)
+ if not xend_response['success']:
+ print "Error creating block interface"
+ print "Error type: " + xend_response['error_type']
+ if xend_response['error_type'] == 'exception':
+ print "Exception type: " + xend_response['exception_type']
+ print "Exception val: " + xend_response['exception_value']
+ xc.domain_destroy ( dom=id )
+ sys.exit()
for ( uname, virt_name, rw ) in vbd_list:
virt_dev = xenctl.utils.blkdev_name_to_number( virt_name )
@@ -269,42 +290,70 @@ def make_domain():
xc.domain_destroy ( dom=id )
sys.exit()
- # check that setting up this VBD won't violate the sharing
- # allowed by the current VBD expertise level
- if xenctl.utils.vd_extents_validate(segments, rw=='w' or rw=='rw') < 0:
- xc.domain_destroy( dom = id )
- sys.exit()
+ if new_io_world:
+ if len(segments) > 1:
+ print "New I/O world cannot deal with multi-extent vdisks"
+ xc.domain_destroy ( dom=id )
+ sys.exit()
+ seg = segments[0]
+ cmsg = 'new_block_device(dom=' + str(id) + \
+ ',handle=0,vdev=' + str(virt_dev) + \
+ ',pdev=' + str(seg['device']) + \
+ ',start_sect=' + str(seg['start_sector']) + \
+ ',nr_sect=' + str(seg['nr_sectors']) + \
+ ',readonly=' + str(not re.match('w',rw)) + ')'
+ xend_response = xenctl.utils.xend_control_message(cmsg)
+ if not xend_response['success']:
+ print "Error creating virtual block device"
+ print "Error type: " + xend_response['error_type']
+ if xend_response['error_type'] == 'exception':
+ print "Exception type: " + xend_response['exception_type']
+ print "Exception val: " + xend_response['exception_value']
+ xc.domain_destroy ( dom=id )
+ sys.exit()
+ else:
+ # check that setting up this VBD won't violate the sharing
+ # allowed by the current VBD expertise level
+ if xenctl.utils.vd_extents_validate(segments,
+ rw=='w' or rw=='rw') < 0:
+ xc.domain_destroy( dom = id )
+ sys.exit()
- if xc.vbd_create( dom=id, vbd=virt_dev, writeable= rw=='w' or rw=='rw' ):
- print "Error creating VBD vbd=%d writeable=%d\n" % (virt_dev,rw)
- xc.domain_destroy ( dom=id )
- sys.exit()
+ if xc.vbd_create( dom=id, vbd=virt_dev,
+ writeable= rw=='w' or rw=='rw' ):
+ print "Error creating VBD %d (writeable=%d)\n" % (virt_dev,rw)
+ xc.domain_destroy ( dom=id )
+ sys.exit()
- if xc.vbd_setextents( dom=id,
- vbd=virt_dev,
- extents=segments):
- print "Error populating VBD vbd=%d\n" % virt_dev
- xc.domain_destroy ( dom=id )
- sys.exit()
-
- # setup virtual firewall rules for all aliases
- for ip in vfr_ipaddr:
- xenctl.utils.setup_vfr_rules_for_vif( id, 0, ip )
-
- # check for physical device access
- for (pci_bus, pci_dev, pci_func) in pci_device_list:
- if xc.physdev_pci_access_modify(
- dom=id, bus=pci_bus, dev=pci_dev, func=pci_func, enable=1 ) < 0:
- print "Non-fatal error enabling PCI device access."
- else:
- print "Enabled PCI access (%d:%d:%d)." % (pci_bus,pci_dev,pci_func)
+ if xc.vbd_setextents( dom=id,
+ vbd=virt_dev,
+ extents=segments):
+ print "Error populating VBD vbd=%d\n" % virt_dev
+ xc.domain_destroy ( dom=id )
+ sys.exit()
+
+ if not new_io_world:
+ # setup virtual firewall rules for all aliases
+ for ip in vfr_ipaddr:
+ xenctl.utils.setup_vfr_rules_for_vif( id, 0, ip )
+
+ if new_io_world:
+ # check for physical device access
+ for (pci_bus, pci_dev, pci_func) in pci_device_list:
+ if xc.physdev_pci_access_modify(
+ dom=id, bus=pci_bus, dev=pci_dev,
+ func=pci_func, enable=1 ) < 0:
+ print "Non-fatal error enabling PCI device access."
+ else:
+ print "Enabled PCI access (%d:%d:%d)." % \
+ (pci_bus,pci_dev,pci_func)
if xc.domain_start( dom=id ) < 0:
print "Error starting domain"
xc.domain_destroy ( dom=id )
sys.exit()
- return (id, xend_response['console_port'])
+ return (id, cons_response['console_port'])
# end of make_domain()
def mkpidfile():
diff --git a/tools/xend/lib/blkif.py b/tools/xend/lib/blkif.py
new file mode 100644
index 0000000000..94e058f7ce
--- /dev/null
+++ b/tools/xend/lib/blkif.py
@@ -0,0 +1,143 @@
+
+#################################################################
+## xend/blkif.py -- Block-interface management functions for Xend
+## Copyright (c) 2004, K A Fraser (University of Cambridge)
+#################################################################
+
+import errno, re, os, select, signal, socket, struct, sys
+import xend.main, xend.console, xend.manager, xend.utils, Xc
+
+CMSG_BLKIF_BE = 1
+CMSG_BLKIF_FE = 2
+CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED = 0
+CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED = 32
+CMSG_BLKIF_FE_INTERFACE_CONNECT = 33
+CMSG_BLKIF_FE_INTERFACE_DISCONNECT = 34
+CMSG_BLKIF_BE_CREATE = 0
+CMSG_BLKIF_BE_DESTROY = 1
+CMSG_BLKIF_BE_CONNECT = 2
+CMSG_BLKIF_BE_DISCONNECT = 3
+CMSG_BLKIF_BE_VBD_CREATE = 4
+CMSG_BLKIF_BE_VBD_DESTROY = 5
+CMSG_BLKIF_BE_VBD_GROW = 6
+CMSG_BLKIF_BE_VBD_SHRINK = 7
+
+pendmsg = None
+pendaddr = None
+
+def backend_tx_req(msg):
+ port = xend.main.dom0_port
+ if port.space_to_write_request():
+ port.write_request(msg)
+ port.notify()
+ else:
+ xend.blkif.pendmsg = msg
+
+def backend_rx_req(port, msg):
+ port.write_response(msg)
+
+def backend_rx_rsp(port, msg):
+ subtype = (msg.get_header())['subtype']
+ print "Received blkif-be response, subtype %d" % subtype
+ if subtype == CMSG_BLKIF_BE_CREATE:
+ rsp = { 'success': True }
+ xend.main.send_management_response(rsp, xend.blkif.pendaddr)
+ elif subtype == CMSG_BLKIF_BE_CONNECT:
+ (dom,hnd,evtchn,frame,st) = struct.unpack("QIILI", msg.get_payload())
+ blkif = interface.list[xend.main.port_from_dom(dom).local_port]
+ msg = xend.utils.message(CMSG_BLKIF_FE, \
+ CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED, 0)
+ msg.append_payload(struct.pack("III",0,2,blkif.evtchn['port2']))
+ blkif.ctrlif_tx_req(xend.main.port_list[blkif.key], msg)
+ elif subtype == CMSG_BLKIF_BE_VBD_CREATE:
+ (dom,hnd,vdev,ro,st) = struct.unpack("QIHII", msg.get_payload())
+ blkif = interface.list[xend.main.port_from_dom(dom).local_port]
+ (pdev, start_sect, nr_sect, readonly) = blkif.devices[vdev]
+ msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_GROW, 0)
+ msg.append_payload(struct.pack("QIHHHQQI",dom,0,vdev,0, \
+ pdev,start_sect,nr_sect,0))
+ backend_tx_req(msg)
+ elif subtype == CMSG_BLKIF_BE_VBD_GROW:
+ rsp = { 'success': True }
+ xend.main.send_management_response(rsp, xend.blkif.pendaddr)
+
+def backend_do_work(port):
+ global pendmsg
+ if pendmsg and port.space_to_write_request():
+ port.write_request(pendmsg)
+ pendmsg = None
+ return True
+ return False
+
+
+class interface:
+
+ # Dictionary of all block-device interfaces.
+ list = {}
+
+
+ # NB. 'key' is an opaque value that has no meaning in this class.
+ def __init__(self, dom, key):
+ self.dom = dom
+ self.key = key
+ self.devices = {}
+ self.pendmsg = None
+ interface.list[key] = self
+ msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_CREATE, 0)
+ msg.append_payload(struct.pack("QII",dom,0,0))
+ xend.blkif.pendaddr = xend.main.mgmt_req_addr
+ backend_tx_req(msg)
+
+ # Attach a device to the specified interface
+ def attach_device(self, vdev, pdev, start_sect, nr_sect, readonly):
+ if self.devices.has_key(vdev):
+ return False
+ self.devices[vdev] = (pdev, start_sect, nr_sect, readonly)
+ msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_CREATE, 0)
+ msg.append_payload(struct.pack("QIHII",self.dom,0,vdev,readonly,0))
+ xend.blkif.pendaddr = xend.main.mgmt_req_addr
+ backend_tx_req(msg)
+ return True
+
+
+ # Completely destroy this interface.
+ def destroy(self):
+ del interface.list[self.key]
+ msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_DESTROY, 0)
+ msg.append_payload(struct.pack("QII",self.dom,0,0))
+ backend_tx_req(msg)
+
+
+ # The parameter @port is the control-interface event channel. This method
+ # returns True if messages were written to the control interface.
+ def ctrlif_transmit_work(self, port):
+ if self.pendmsg and port.space_to_write_request():
+ port.write_request(self.pendmsg)
+ self.pendmsg = None
+ return True
+ return False
+
+ def ctrlif_tx_req(self, port, msg):
+ if port.space_to_write_request():
+ port.write_request(msg)
+ port.notify()
+ else:
+ self.pendmsg = msg
+
+ def ctrlif_rx_req(self, port, msg):
+ port.write_response(msg)
+ subtype = (msg.get_header())['subtype']
+ if subtype == CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED:
+ msg = xend.utils.message(CMSG_BLKIF_FE, \
+ CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED, 0)
+ msg.append_payload(struct.pack("III",0,1,0))
+ self.ctrlif_tx_req(port, msg)
+ elif subtype == CMSG_BLKIF_FE_INTERFACE_CONNECT:
+ (hnd,frame) = struct.unpack("IL", msg.get_payload())
+ xc = Xc.new()
+ self.evtchn = xc.evtchn_bind_interdomain(dom1=0,dom2=self.dom)
+ msg = xend.utils.message(CMSG_BLKIF_BE, \
+ CMSG_BLKIF_BE_CONNECT, 0)
+ msg.append_payload(struct.pack("QIILI",self.dom,0, \
+ self.evtchn['port1'],frame,0))
+ backend_tx_req(msg)
diff --git a/tools/xend/lib/console.py b/tools/xend/lib/console.py
index aad6069979..57898817f5 100644
--- a/tools/xend/lib/console.py
+++ b/tools/xend/lib/console.py
@@ -5,7 +5,7 @@
#############################################################
import errno, re, os, select, signal, socket, struct, sys
-
+import xend.blkif, xend.main, xend.manager, xend.utils, Xc
##
## interface:
@@ -16,7 +16,7 @@ import errno, re, os, select, signal, socket, struct, sys
## CONNECTED: sending/receiving console data on TCP port 'self.port'
##
## A dictionary of all active interfaces, indexed by TCP socket descriptor,
-## is accessible as 'interface.interface_list'.
+## is accessible as 'interface.list_by_fd'.
##
## NB. When a class instance is to be destroyed you *must* call the 'close'
## method. Otherwise a stale reference will eb left in the interface list.
@@ -30,7 +30,11 @@ class interface:
# Dictionary of all active (non-closed) console interfaces.
- interface_list = {}
+ list_by_fd = {}
+
+
+ # Dictionary of all console interfaces, closed and open.
+ list = {}
# NB. 'key' is an opaque value that has no meaning in this class.
@@ -38,6 +42,9 @@ class interface:
self.status = interface.CLOSED
self.port = port
self.key = key
+ self.rbuf = xend.utils.buffer()
+ self.wbuf = xend.utils.buffer()
+ interface.list[key] = self
# Is this interface closed (inactive)?
@@ -58,14 +65,14 @@ class interface:
# Close the interface, if it is not closed already.
def close(self):
if not self.closed():
- del interface.interface_list[self.sock.fileno()]
+ del interface.list_by_fd[self.sock.fileno()]
self.sock.close()
del self.sock
self.status = interface.CLOSED
# Move the interface into the 'listening' state. Opens a new listening
- # socket and updates 'interface_list'.
+ # socket and updates 'list_by_fd'.
def listen(self):
# Close old socket (if any), and create a fresh one.
self.close()
@@ -80,7 +87,7 @@ class interface:
# Announce the new status of thsi interface.
self.status = interface.LISTENING
- interface.interface_list[self.sock.fileno()] = self
+ interface.list_by_fd[self.sock.fileno()] = self
except:
# In case of trouble ensure we get rid of dangling socket reference
@@ -105,7 +112,69 @@ class interface:
# Publish the new socket and the new interface state.
self.sock = sock
self.status = interface.CONNECTED
- interface.interface_list[self.sock.fileno()] = self
+ interface.list_by_fd[self.sock.fileno()] = self
return 1
+ # Completely sestroy a console interface.
+ def destroy(self):
+ self.close()
+ del interface.list[self.key]
+
+
+ # Do work triggered by resource availability on a console-interface socket.
+ def socket_work(self):
+ # If the interface is listening, check for pending connections.
+ if self.listening():
+ self.connect()
+
+ # All done if the interface is not connected.
+ if not self.connected():
+ return
+
+ # Send as much pending data as possible via the socket.
+ while not self.rbuf.empty():
+ try:
+ bytes = self.sock.send(self.rbuf.peek())
+ if bytes > 0:
+ self.rbuf.discard(bytes)
+ except socket.error, error:
+ pass
+
+ # Read as much data as is available. Don't worry about
+ # overflowing our buffer: it's more important to read the
+ # incoming data stream and detect errors or closure of the
+ # remote end in a timely manner.
+ try:
+ while 1:
+ data = self.sock.recv(2048)
+ # Return of zero means the remote end has disconnected.
+ # We therefore return the console interface to listening.
+ if not data:
+ self.listen()
+ break
+ self.wbuf.write(data)
+ except socket.error, error:
+ # Assume that most errors mean that the connection is dead.
+ # In such cases we return the interface to 'listening' state.
+ if error[0] != errno.EAGAIN:
+ print "Better return to listening"
+ self.listen()
+ print "New status: " + str(self.status)
+
+
+ # The parameter @port is the control-interface event channel. This method
+ # returns True if messages were written to the control interface.
+ def ctrlif_transmit_work(self, port):
+ work_done = False
+ while not self.wbuf.empty() and port.space_to_write_request():
+ msg = xend.utils.message(0, 0, 0)
+ msg.append_payload(self.wbuf.read(msg.MAX_PAYLOAD))
+ port.write_request(msg)
+ work_done = True
+ return work_done
+
+
+ def ctrlif_rx_req(self, port, msg):
+ self.rbuf.write(msg.get_payload())
+ port.write_response(msg)
diff --git a/tools/xend/lib/domain_controller.h b/tools/xend/lib/domain_controller.h
index d9ea7d6160..68d4fac1d2 100644
--- a/tools/xend/lib/domain_controller.h
+++ b/tools/xend/lib/domain_controller.h
@@ -76,8 +76,8 @@ typedef struct {
/* Messages from guest to domain controller. */
#define CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED 32
-#define CMSG_BLKIF_FE_INTERFACE_UP 33
-#define CMSG_BLKIF_FE_INTERFACE_DOWN 34
+#define CMSG_BLKIF_FE_INTERFACE_CONNECT 33
+#define CMSG_BLKIF_FE_INTERFACE_DISCONNECT 34
/* These are used by both front-end and back-end drivers. */
#define blkif_vdev_t u16
@@ -91,13 +91,13 @@ typedef struct {
* 1. The shared-memory frame is available for reuse.
* 2. Any unacknowledged messgaes pending on the interface were dropped.
*/
-#define BLKIF_INTERFACE_STATUS_DESTROYED 0 /* Interface doesn't exist. */
-#define BLKIF_INTERFACE_STATUS_DOWN 1 /* Interface exists but is down. */
-#define BLKIF_INTERFACE_STATUS_UP 2 /* Interface exists and is up. */
+#define BLKIF_INTERFACE_STATUS_DESTROYED 0 /* Interface doesn't exist. */
+#define BLKIF_INTERFACE_STATUS_DISCONNECTED 1 /* Exists but is disconnected. */
+#define BLKIF_INTERFACE_STATUS_CONNECTED 2 /* Exists and is connected. */
typedef struct {
unsigned int handle;
unsigned int status;
- unsigned int evtchn; /* status == BLKIF_INTERFACE_STATUS_UP */
+ unsigned int evtchn; /* status == BLKIF_INTERFACE_STATUS_CONNECTED */
} blkif_fe_interface_status_changed_t;
/*
@@ -109,30 +109,37 @@ typedef struct {
* If the driver goes DOWN while interfaces are still UP, the domain
* will automatically take the interfaces DOWN.
*/
-#define BLKIF_DRIVER_STATUS_DOWN 0
-#define BLKIF_DRIVER_STATUS_UP 1
+#define BLKIF_DRIVER_STATUS_DOWN 0
+#define BLKIF_DRIVER_STATUS_UP 1
typedef struct {
unsigned int status; /* BLKIF_DRIVER_STATUS_??? */
} blkif_fe_driver_status_changed_t;
/*
- * CMSG_BLKIF_FE_INTERFACE_UP:
- * If successful, the domain controller will acknowledge with a STATUS_UP
- * message.
+ * CMSG_BLKIF_FE_INTERFACE_CONNECT:
+ * If successful, the domain controller will acknowledge with a
+ * STATUS_CONNECTED message.
*/
typedef struct {
unsigned int handle;
unsigned long shmem_frame;
-} blkif_fe_interface_up_t;
+} blkif_fe_interface_connect_t;
/*
- * CMSG_BLKIF_FE_INTERFACE_DOWN:
- * If successful, the domain controller will acknowledge with a STATUS_DOWN
- * message.
+ * CMSG_BLKIF_FE_INTERFACE_DISCONNECT:
+ * If successful, the domain controller will acknowledge with a
+ * STATUS_DISCONNECTED message.
*/
typedef struct {
+ /* IN */
unsigned int handle;
-} blkif_fe_interface_down_t;
+ /* OUT */
+ /*
+ * Tells driver how many interfaces it should expect to immediately
+ * receive notifications about.
+ */
+ unsigned int nr_interfaces;
+} blkif_fe_interface_disconnect_t;
/******************************************************************************
@@ -142,10 +149,12 @@ typedef struct {
/* Messages from domain controller. */
#define CMSG_BLKIF_BE_CREATE 0 /* Create a new block-device interface. */
#define CMSG_BLKIF_BE_DESTROY 1 /* Destroy a block-device interface. */
-#define CMSG_BLKIF_BE_VBD_CREATE 2 /* Create a new VBD for an interface. */
-#define CMSG_BLKIF_BE_VBD_DESTROY 3 /* Delete a VBD from an interface. */
-#define CMSG_BLKIF_BE_VBD_GROW 4 /* Append an extent to a given VBD. */
-#define CMSG_BLKIF_BE_VBD_SHRINK 5 /* Remove last extent from a given VBD. */
+#define CMSG_BLKIF_BE_CONNECT 2 /* Connect i/f to remote driver. */
+#define CMSG_BLKIF_BE_DISCONNECT 3 /* Disconnect i/f from remote driver. */
+#define CMSG_BLKIF_BE_VBD_CREATE 4 /* Create a new VBD for an interface. */
+#define CMSG_BLKIF_BE_VBD_DESTROY 5 /* Delete a VBD from an interface. */
+#define CMSG_BLKIF_BE_VBD_GROW 6 /* Append an extent to a given VBD. */
+#define CMSG_BLKIF_BE_VBD_SHRINK 7 /* Remove last extent from a given VBD. */
/* Messages to domain controller. */
#define CMSG_BLKIF_BE_DRIVER_STATUS_CHANGED 32
@@ -167,36 +176,36 @@ typedef struct {
/* The following are specific error returns. */
#define BLKIF_BE_STATUS_INTERFACE_EXISTS 2
#define BLKIF_BE_STATUS_INTERFACE_NOT_FOUND 3
-#define BLKIF_BE_STATUS_VBD_EXISTS 4
-#define BLKIF_BE_STATUS_VBD_NOT_FOUND 5
-#define BLKIF_BE_STATUS_OUT_OF_MEMORY 6
-#define BLKIF_BE_STATUS_EXTENT_NOT_FOUND 7
-#define BLKIF_BE_STATUS_MAPPING_ERROR 8
+#define BLKIF_BE_STATUS_INTERFACE_CONNECTED 4
+#define BLKIF_BE_STATUS_VBD_EXISTS 5
+#define BLKIF_BE_STATUS_VBD_NOT_FOUND 6
+#define BLKIF_BE_STATUS_OUT_OF_MEMORY 7
+#define BLKIF_BE_STATUS_EXTENT_NOT_FOUND 8
+#define BLKIF_BE_STATUS_MAPPING_ERROR 9
/* This macro can be used to create an array of descriptive error strings. */
-#define BLKIF_BE_STATUS_ERRORS { \
- "Okay", \
- "Non-specific error", \
- "Interface already exists", \
- "Interface not found", \
- "VBD already exists", \
- "VBD not found", \
- "Out of memory", \
- "Extent not found for VBD", \
+#define BLKIF_BE_STATUS_ERRORS { \
+ "Okay", \
+ "Non-specific error", \
+ "Interface already exists", \
+ "Interface not found", \
+ "Interface is still connected", \
+ "VBD already exists", \
+ "VBD not found", \
+ "Out of memory", \
+ "Extent not found for VBD", \
"Could not map domain memory" }
/*
* CMSG_BLKIF_BE_CREATE:
* When the driver sends a successful response then the interface is fully
- * set up. The controller will send an UP notification to the front-end
+ * created. The controller will send a DOWN notification to the front-end
* driver.
*/
typedef struct {
/* IN */
domid_t domid; /* Domain attached to new interface. */
unsigned int blkif_handle; /* Domain-specific interface handle. */
- unsigned int evtchn; /* Event channel for notifications. */
- unsigned long shmem_frame; /* Page cont. shared comms window. */
/* OUT */
unsigned int status;
} blkif_be_create_t;
@@ -204,8 +213,8 @@ typedef struct {
/*
* CMSG_BLKIF_BE_DESTROY:
* When the driver sends a successful response then the interface is fully
- * torn down. The controller will send a DOWN notification to the front-end
- * driver.
+ * torn down. The controller will send a DESTROYED notification to the
+ * front-end driver.
*/
typedef struct {
/* IN */
@@ -215,6 +224,36 @@ typedef struct {
unsigned int status;
} blkif_be_destroy_t;
+/*
+ * CMSG_BLKIF_BE_CONNECT:
+ * When the driver sends a successful response then the interface is fully
+ * connected. The controller will send a CONNECTED notification to the
+ * front-end driver.
+ */
+typedef struct {
+ /* IN */
+ domid_t domid; /* Domain attached to new interface. */
+ unsigned int blkif_handle; /* Domain-specific interface handle. */
+ unsigned int evtchn; /* Event channel for notifications. */
+ unsigned long shmem_frame; /* Page cont. shared comms window. */
+ /* OUT */
+ unsigned int status;
+} blkif_be_connect_t;
+
+/*
+ * CMSG_BLKIF_BE_DISCONNECT:
+ * When the driver sends a successful response then the interface is fully
+ * disconnected. The controller will send a DOWN notification to the front-end
+ * driver.
+ */
+typedef struct {
+ /* IN */
+ domid_t domid; /* Domain attached to new interface. */
+ unsigned int blkif_handle; /* Domain-specific interface handle. */
+ /* OUT */
+ unsigned int status;
+} blkif_be_disconnect_t;
+
/* CMSG_BLKIF_BE_VBD_CREATE */
typedef struct {
/* IN */
@@ -264,7 +303,14 @@ typedef struct {
* will automatically send DOWN notifications.
*/
typedef struct {
+ /* IN */
unsigned int status; /* BLKIF_DRIVER_STATUS_??? */
+ /* OUT */
+ /*
+ * Tells driver how many interfaces it should expect to immediately
+ * receive notifications about.
+ */
+ unsigned int nr_interfaces;
} blkif_be_driver_status_changed_t;
#endif /* __DOMAIN_CONTROLLER_H__ */
diff --git a/tools/xend/lib/main.py b/tools/xend/lib/main.py
index b870af55d1..7b5adbab83 100755
--- a/tools/xend/lib/main.py
+++ b/tools/xend/lib/main.py
@@ -5,7 +5,7 @@
###########################################################
import errno, re, os, pwd, select, signal, socket, struct, sys, time
-import xend.console, xend.manager, xend.utils, Xc
+import xend.blkif, xend.console, xend.manager, xend.utils, Xc
# The following parameters could be placed in a configuration file.
@@ -16,13 +16,35 @@ CONTROL_DIR = '/var/run/xend'
UNIX_SOCK = 'management_sock' # relative to CONTROL_DIR
+CMSG_CONSOLE = 0
+CMSG_BLKIF_BE = 1
+CMSG_BLKIF_FE = 2
+
+
+def port_from_dom(dom):
+ global port_list
+ for idx, port in port_list.items():
+ if port.remote_dom == dom:
+ return port
+ return None
+
+
+def send_management_response(response, addr):
+ try:
+ response = str(response)
+ print "Mgmt_rsp[%s]: %s" % (addr, response)
+ management_interface.sendto(response, addr)
+ except socket.error, error:
+ pass
+
+
def daemon_loop():
# Could we do this more nicely? The xend.manager functions need access
# to this global state to do their work.
- global control_list, notifier
+ global port_list, notifier, management_interface, mgmt_req_addr, dom0_port
- # List of all control interfaces, indexed by local event-channel port.
- control_list = {}
+ # Lists of all interfaces, indexed by local event-channel port.
+ port_list = {}
xc = Xc.new()
@@ -46,13 +68,10 @@ def daemon_loop():
# The DOM0 control interface is not set up via the management interface.
# Note that console messages don't come our way (actually, only driver
- # back-ends should use the DOM0 control interface) -- the console
- # structures are dummies.
+ # back-ends should use the DOM0 control interface).
dom0_port = xend.utils.port(0)
- xend.main.notifier.bind(dom0_port.local_port)
- xend.main.control_list[dom0_port.local_port] = \
- (dom0_port, xend.utils.buffer(), xend.utils.buffer(), \
- xend.console.interface(0, dom0_port.local_port))
+ notifier.bind(dom0_port.local_port)
+ port_list[dom0_port.local_port] = dom0_port
##
## MAIN LOOP
@@ -68,10 +87,10 @@ def daemon_loop():
waitset = select.poll()
waitset.register(management_interface, select.POLLIN)
waitset.register(notifier, select.POLLIN)
- for idx, (port, rbuf, wbuf, con_if) in control_list.items():
+ for idx, con_if in xend.console.interface.list_by_fd.items():
if not con_if.closed():
pflags = select.POLLIN
- if not rbuf.empty() and con_if.connected():
+ if not con_if.rbuf.empty() and con_if.connected():
pflags = select.POLLIN | select.POLLOUT
waitset.register(con_if.sock.fileno(), pflags)
@@ -82,16 +101,16 @@ def daemon_loop():
# These should consist of executable Python statements that call
# well-known management functions (e.g., new_control_interface(dom=9)).
try:
- data, addr = management_interface.recvfrom(2048)
+ data, mgmt_req_addr = management_interface.recvfrom(2048)
except socket.error, error:
if error[0] != errno.EAGAIN:
raise
else:
- if addr:
+ if mgmt_req_addr:
# Evaluate the request in an exception-trapping sandbox.
try:
- print "Mgmt_req[%s]: %s" % (addr, data)
- response = str(eval('xend.manager.'+data))
+ print "Mgmt_req[%s]: %s" % (mgmt_req_addr, data)
+ response = eval('xend.manager.'+data)
except:
# Catch all exceptions and turn into an error response:
@@ -107,69 +126,20 @@ def daemon_loop():
response = str(response)
# Try to send a response to the requester.
- try:
- print "Mgmt_rsp[%s]: %s" % (addr, response)
- management_interface.sendto(response, addr)
- except socket.error, error:
- pass
+ if response:
+ send_management_response(response, mgmt_req_addr)
# Do work for every console interface that hit in the poll set.
for (fd, events) in fdset:
- if not xend.console.interface.interface_list.has_key(fd):
- continue
- con_if = xend.console.interface.interface_list[fd]
-
- # If the interface is listening, check for pending connections.
- if con_if.listening():
- con_if.connect()
-
- # All done if the interface is not connected.
- if not con_if.connected():
- continue
- (port, rbuf, wbuf, con_if) = control_list[con_if.key]
-
- # Send as much pending data as possible via the socket.
- while not rbuf.empty():
- try:
- bytes = con_if.sock.send(rbuf.peek())
- if bytes > 0:
- rbuf.discard(bytes)
- except socket.error, error:
- pass
-
- # Read as much data as is available. Don't worry about
- # overflowing our buffer: it's more important to read the
- # incoming data stream and detect errors or closure of the
- # remote end in a timely manner.
- try:
- while 1:
- data = con_if.sock.recv(2048)
- # Return of zero means the remote end has disconnected.
- # We therefore return the console interface to listening.
- if not data:
- con_if.listen()
- break
- wbuf.write(data)
- except socket.error, error:
- # Assume that most errors mean that the connection is dead.
- # In such cases we return the interface to 'listening' state.
- if error[0] != errno.EAGAIN:
- print "Better return to listening"
- con_if.listen()
- print "New status: " + str(con_if.status)
-
- # We may now have pending data to send via the relevant
- # inter-domain control interface. If so then we send all we can
- # and notify the remote end.
- work_done = False
- while not wbuf.empty() and port.space_to_write_request():
- msg = xend.utils.message(0, 0, 0)
- msg.append_payload(wbuf.read(msg.MAX_PAYLOAD))
- port.write_request(msg)
- work_done = True
- if work_done:
- port.notify()
-
+ if xend.console.interface.list_by_fd.has_key(fd):
+ con_if = xend.console.interface.list_by_fd[fd]
+ con_if.socket_work()
+ # We may now have pending data to send via the control
+ # interface. If so then send all we can and notify the remote.
+ port = port_list[con_if.key]
+ if con_if.ctrlif_transmit_work(port):
+ port.notify()
+
# Process control-interface notifications from other guest OSes.
while 1:
# Grab a notification, if there is one.
@@ -178,42 +148,69 @@ def daemon_loop():
break
(idx, type) = notification
- if not control_list.has_key(idx):
+ if not port_list.has_key(idx):
continue
- (port, rbuf, wbuf, con_if) = control_list[idx]
+ port = port_list[idx]
work_done = False
+ con_if = False
+ if xend.console.interface.list.has_key(idx):
+ con_if = xend.console.interface.list[idx]
+
+ blk_if = False
+ if xend.blkif.interface.list.has_key(idx):
+ blk_if = xend.blkif.interface.list[idx]
+
# If we pick up a disconnect notification then we do any necessary
# cleanup.
if type == notifier.EXCEPTION:
ret = xc.evtchn_status(idx)
if ret['status'] == 'unbound':
notifier.unbind(idx)
- con_if.close()
- del control_list[idx], port, rbuf, wbuf, con_if
+ del port_list[idx], port
+ if con_if:
+ con_if.destroy()
+ del con_if
+ if blk_if:
+ blk_if.destroy()
+ del blk_if
continue
- # Read incoming requests. Currently assume that request
- # message always containb console data.
+ # Process incoming requests.
while port.request_to_read():
msg = port.read_request()
- rbuf.write(msg.get_payload())
- port.write_response(msg)
work_done = True
-
- # Incoming responses are currently thrown on the floor.
+ type = (msg.get_header())['type']
+ if type == CMSG_CONSOLE and con_if:
+ con_if.ctrlif_rx_req(port, msg)
+ elif type == CMSG_BLKIF_FE and blk_if:
+ blk_if.ctrlif_rx_req(port, msg)
+ elif type == CMSG_BLKIF_BE and port == dom0_port:
+ xend.blkif.backend_rx_req(port, msg)
+ else:
+ port.write_response(msg)
+
+ # Process incoming responses.
while port.response_to_read():
msg = port.read_response()
work_done = True
+ type = (msg.get_header())['type']
+ if type == CMSG_BLKIF_BE and port == dom0_port:
+ xend.blkif.backend_rx_rsp(port, msg)
+
+ # Send console data.
+ if con_if and con_if.ctrlif_transmit_work(port):
+ work_done = True
- # Send as much pending console data as there is room for.
- while not wbuf.empty() and port.space_to_write_request():
- msg = xend.utils.message(0, 0, 0)
- msg.append_payload(wbuf.read(msg.MAX_PAYLOAD))
- port.write_request(msg)
+ # Send blkif messages.
+ if blk_if and blk_if.ctrlif_transmit_work(port):
work_done = True
+ # Back-end block-device work.
+ if port == dom0_port and xend.blkif.backend_do_work(port):
+ work_done = True
+
# Finally, notify the remote end of any work that we did.
if work_done:
port.notify()
diff --git a/tools/xend/lib/manager.py b/tools/xend/lib/manager.py
index 42d66d3a95..ea7398cd4c 100644
--- a/tools/xend/lib/manager.py
+++ b/tools/xend/lib/manager.py
@@ -4,13 +4,13 @@
## Copyright (c) 2004, K A Fraser (University of Cambridge)
#############################################################
-import xend.console, xend.main, xend.utils
+import xend.blkif, xend.console, xend.main, xend.utils
##
## new_control_interface:
-## Create a new control interface with the specified domain 'dom'.
-## The console port may also be specified; otehrwise a suitable port is
+## Create a new control interface with the specified domain @dom.
+## The console port may also be specified; otherwise a suitable port is
## automatically allocated.
##
def new_control_interface(dom, console_port=-1):
@@ -26,9 +26,8 @@ def new_control_interface(dom, console_port=-1):
con_if = xend.console.interface(console_port, port.local_port)
con_if.listen()
- # Add control state to the master list.
- xend.main.control_list[port.local_port] = \
- (port, xend.utils.buffer(), xend.utils.buffer(), con_if)
+ # Update the master port list.
+ xend.main.port_list[port.local_port] = port
# Construct the successful response to be returned to the requester.
response = { 'success': True }
@@ -36,3 +35,81 @@ def new_control_interface(dom, console_port=-1):
response['remote_port'] = port.remote_port
response['console_port'] = console_port
return response
+
+
+##
+## new_block_interface:
+## Create a new block interface for the specified domain @dom.
+##
+def new_block_interface(dom, handle=-1):
+ # By default we create an interface with handle zero.
+ if handle < 0:
+ handle = 0
+
+ # We only support one interface per domain, which must have handle zero.
+ if handle != 0:
+ response = { 'success': False }
+ response['error_type'] = 'Bad handle %d (only handle 0 ' + \
+ 'is supported)' % handle
+ return response
+
+ # Find local event-channel port associated with the specified domain.
+ port = xend.main.port_from_dom(dom)
+ if not port:
+ response = { 'success': False }
+ response['error_type'] = 'Unknown domain %d' % dom
+ return response
+
+ # The interface must not already exist.
+ if xend.blkif.interface.list.has_key(port.local_port):
+ response = { 'success': False }
+ response['error_type'] = 'Interface (dom=%d,handle=%d) already ' + \
+ 'exists' % (dom, handle)
+ return response
+
+ # Create the new interface. Initially no virtual devices are attached.
+ xend.blkif.interface(dom, port.local_port)
+
+ # Response is deferred until back-end driver sends acknowledgement.
+ return None
+
+
+##
+## new_block_device:
+## Attach a new virtual block device to the specified block interface
+## (@dom, @handle). The new device is identified by @vdev, and maps to
+## the real block extent (@pdev, @start_sect, @nr_sect). If @readonly then
+## write requests to @vdev will be rejected.
+##
+def new_block_device(dom, handle, vdev, pdev, start_sect, nr_sect, readonly):
+ # We only support one interface per domain, which must have handle zero.
+ if handle != 0:
+ response = { 'success': False }
+ response['error_type'] = 'Bad handle %d (only handle 0 ' + \
+ 'is supported)' % handle
+ return response
+
+ # Find local event-channel port associated with the specified domain.
+ port = xend.main.port_from_dom(dom)
+ if not port:
+ response = { 'success': False }
+ response['error_type'] = 'Unknown domain %d' % dom
+ return response
+
+ # The interface must exist.
+ if not xend.blkif.interface.list.has_key(port.local_port):
+ response = { 'success': False }
+ response['error_type'] = 'Interface (dom=%d,handle=%d) does not ' + \
+ 'exists' % (dom, handle)
+ return response
+
+ # The virtual device must not yet exist.
+ blkif = xend.blkif.interface.list[port.local_port]
+ if not blkif.attach_device(vdev, pdev, start_sect, nr_sect, readonly):
+ response = { 'success': False }
+ response['error_type'] = 'Vdevice (dom=%d,handle=%d,vdevice=%d) ' + \
+ 'already exists' % (dom, handle, vdev)
+ return response
+
+ # Response is deferred until back-end driver sends acknowledgement.
+ return None
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index c0bea86320..c6011cebb4 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -109,15 +109,18 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
goto out;
}
+ /* 'Allocate' port1 before searching for a free port2. */
+ p1->event_channel[port1].state = ECS_INTERDOMAIN;
+
if ( (port2 = get_free_port(p2)) < 0 )
{
+ p1->event_channel[port1].state = ECS_FREE;
rc = port2;
goto out;
}
p1->event_channel[port1].u.remote.dom = p2;
p1->event_channel[port1].u.remote.port = (u16)port2;
- p1->event_channel[port1].state = ECS_INTERDOMAIN;
p2->event_channel[port2].u.remote.dom = p1;
p2->event_channel[port2].u.remote.port = (u16)port1;
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h
index 646f4855f3..e6004b4a8e 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h
@@ -41,6 +41,12 @@ typedef struct blkif_st {
rb_root_t vbd_rb; /* Mapping from 16-bit vdevices to VBDs. */
spinlock_t vbd_lock; /* Protects VBD mapping. */
/* Private fields. */
+ enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
+ /*
+ * DISCONNECT response is deferred until pending requests are ack'ed.
+ * We therefore need to store the id from the original request.
+ */
+ u8 disconnect_rspid;
struct blkif_st *hash_next;
struct list_head blkdev_list;
spinlock_t blk_ring_lock;
@@ -49,13 +55,15 @@ typedef struct blkif_st {
void blkif_create(blkif_be_create_t *create);
void blkif_destroy(blkif_be_destroy_t *destroy);
-void __blkif_destroy(blkif_t *blkif);
+void blkif_connect(blkif_be_connect_t *connect);
+int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id);
+void __blkif_disconnect_complete(blkif_t *blkif);
blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
#define blkif_put(_b) \
do { \
if ( atomic_dec_and_test(&(_b)->refcnt) ) \
- __blkif_destroy(_b); \
+ __blkif_disconnect_complete(_b); \
} while (0)
/* An entry in a list of xen_extents. */
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c
index 2baddcd616..0746ecfab0 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c
@@ -10,6 +10,8 @@
static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
{
+ DPRINTK("Received blkif backend message, subtype=%d\n", msg->subtype);
+
switch ( msg->subtype )
{
case CMSG_BLKIF_BE_CREATE:
@@ -22,6 +24,17 @@ static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
goto parse_error;
blkif_destroy((blkif_be_destroy_t *)&msg->msg[0]);
break;
+ case CMSG_BLKIF_BE_CONNECT:
+ if ( msg->length != sizeof(blkif_be_connect_t) )
+ goto parse_error;
+ blkif_connect((blkif_be_connect_t *)&msg->msg[0]);
+ break;
+ case CMSG_BLKIF_BE_DISCONNECT:
+ if ( msg->length != sizeof(blkif_be_disconnect_t) )
+ goto parse_error;
+ if ( !blkif_disconnect((blkif_be_disconnect_t *)&msg->msg[0],msg->id) )
+ return; /* Sending the response is deferred until later. */
+ break;
case CMSG_BLKIF_BE_VBD_CREATE:
if ( msg->length != sizeof(blkif_be_vbd_create_t) )
goto parse_error;
@@ -50,6 +63,8 @@ static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
return;
parse_error:
+ DPRINTK("Parse error while reading message subtype %d, len %d\n",
+ msg->subtype, msg->length);
msg->length = 0;
ctrl_if_send_response(msg);
}
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c
index 87925681da..9acbac35ab 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c
@@ -14,144 +14,216 @@
static kmem_cache_t *blkif_cachep;
static blkif_t *blkif_hash[BLKIF_HASHSZ];
-static spinlock_t blkif_hash_lock;
blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
{
- blkif_t *blkif;
- unsigned long flags;
-
- spin_lock_irqsave(&blkif_hash_lock, flags);
- blkif = blkif_hash[BLKIF_HASH(domid, handle)];
- while ( blkif != NULL )
- {
- if ( (blkif->domid == domid) && (blkif->handle == handle) )
- {
- blkif_get(blkif);
- break;
- }
+ blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
+ while ( (blkif != NULL) &&
+ ((blkif->domid != domid) || (blkif->handle != handle)) )
blkif = blkif->hash_next;
- }
- spin_unlock_irqrestore(&blkif_hash_lock, flags);
-
return blkif;
}
-void __blkif_destroy(blkif_t *blkif)
+void __blkif_disconnect_complete(blkif_t *blkif)
{
- free_irq(blkif->irq, NULL);
+ ctrl_msg_t cmsg;
+ blkif_be_disconnect_t disc;
+
+ /*
+ * These can't be done in __blkif_disconnect() because at that point there
+ * may be outstanding requests at the disc whose asynchronous responses
+ * must still be notified to the remote driver.
+ */
unbind_evtchn_from_irq(blkif->evtchn);
vfree(blkif->blk_ring_base);
- destroy_all_vbds(blkif);
- kmem_cache_free(blkif_cachep, blkif);
+
+ /* Construct the deferred response message. */
+ cmsg.type = CMSG_BLKIF_BE;
+ cmsg.subtype = CMSG_BLKIF_BE_DISCONNECT;
+ cmsg.id = blkif->disconnect_rspid;
+ cmsg.length = sizeof(blkif_be_disconnect_t);
+ disc.domid = blkif->domid;
+ disc.blkif_handle = blkif->handle;
+ disc.status = BLKIF_BE_STATUS_OKAY;
+ memcpy(cmsg.msg, &disc, sizeof(disc));
+
+ /*
+ * Make sure message is constructed /before/ status change, because
+ * after the status change the 'blkif' structure could be deallocated at
+ * any time. Also make sure we send the response /after/ status change,
+ * as otherwise a subsequent CONNECT request could spuriously fail if
+ * another CPU doesn't see the status change yet.
+ */
+ mb();
+ if ( blkif->status != DISCONNECTING )
+ BUG();
+ blkif->status = DISCONNECTED;
+ mb();
+
+ /* Send the successful response. */
+ ctrl_if_send_response(&cmsg);
}
void blkif_create(blkif_be_create_t *create)
{
domid_t domid = create->domid;
unsigned int handle = create->blkif_handle;
- unsigned int evtchn = create->evtchn;
- unsigned long shmem_frame = create->shmem_frame;
- unsigned long flags;
blkif_t **pblkif, *blkif;
- struct vm_struct *vma;
- pgprot_t prot;
- int error;
- if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
+ if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_ATOMIC)) == NULL )
{
+ DPRINTK("Could not create blkif: out of memory\n");
create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
return;
}
- if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL )
- {
- create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
- goto fail1;
- }
-
- prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
- error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
- shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
- prot, domid);
- if ( error != 0 )
- {
- if ( error == -ENOMEM )
- create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
- else if ( error == -EFAULT )
- create->status = BLKIF_BE_STATUS_MAPPING_ERROR;
- else
- create->status = BLKIF_BE_STATUS_ERROR;
- goto fail2;
- }
-
memset(blkif, 0, sizeof(*blkif));
- blkif->domid = domid;
- blkif->handle = handle;
- blkif->evtchn = evtchn;
- blkif->irq = bind_evtchn_to_irq(evtchn);
- blkif->shmem_frame = shmem_frame;
- blkif->blk_ring_base = (blkif_ring_t *)vma->addr;
+ blkif->domid = domid;
+ blkif->handle = handle;
+ blkif->status = DISCONNECTED;
spin_lock_init(&blkif->vbd_lock);
spin_lock_init(&blkif->blk_ring_lock);
-
- spin_lock_irqsave(&blkif_hash_lock, flags);
+ atomic_set(&blkif->refcnt, 0);
pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
- while ( *pblkif == NULL )
+ while ( *pblkif != NULL )
{
if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
{
- spin_unlock_irqrestore(&blkif_hash_lock, flags);
+ DPRINTK("Could not create blkif: already exists\n");
create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
- goto fail3;
+ kmem_cache_free(blkif_cachep, blkif);
+ return;
}
pblkif = &(*pblkif)->hash_next;
}
- atomic_set(&blkif->refcnt, 1);
blkif->hash_next = *pblkif;
*pblkif = blkif;
- spin_unlock_irqrestore(&blkif_hash_lock, flags);
-
- request_irq(blkif->irq, blkif_be_int, 0, "blkif-backend", blkif);
-
+ DPRINTK("Successfully created blkif\n");
create->status = BLKIF_BE_STATUS_OKAY;
- return;
-
- fail3: unbind_evtchn_from_irq(evtchn);
- fail2: kmem_cache_free(blkif_cachep, blkif);
- fail1: vfree(vma->addr);
}
void blkif_destroy(blkif_be_destroy_t *destroy)
{
domid_t domid = destroy->domid;
unsigned int handle = destroy->blkif_handle;
- unsigned long flags;
blkif_t **pblkif, *blkif;
- spin_lock_irqsave(&blkif_hash_lock, flags);
-
pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
- while ( (blkif = *pblkif) == NULL )
+ while ( (blkif = *pblkif) != NULL )
{
if ( (blkif->domid == domid) && (blkif->handle == handle) )
{
- *pblkif = blkif->hash_next;
- spin_unlock_irqrestore(&blkif_hash_lock, flags);
- blkif_deschedule(blkif);
- blkif_put(blkif);
- destroy->status = BLKIF_BE_STATUS_OKAY;
- return;
+ if ( blkif->status != DISCONNECTED )
+ goto still_connected;
+ goto destroy;
}
pblkif = &blkif->hash_next;
}
- spin_unlock_irqrestore(&blkif_hash_lock, flags);
-
destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+
+ still_connected:
+ destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
+ return;
+
+ destroy:
+ *pblkif = blkif->hash_next;
+ destroy_all_vbds(blkif);
+ kmem_cache_free(blkif_cachep, blkif);
+ destroy->status = BLKIF_BE_STATUS_OKAY;
+}
+
+void blkif_connect(blkif_be_connect_t *connect)
+{
+ domid_t domid = connect->domid;
+ unsigned int handle = connect->blkif_handle;
+ unsigned int evtchn = connect->evtchn;
+ unsigned long shmem_frame = connect->shmem_frame;
+ struct vm_struct *vma;
+ pgprot_t prot;
+ int error;
+ blkif_t *blkif;
+
+ blkif = blkif_find_by_handle(domid, handle);
+ if ( unlikely(blkif == NULL) )
+ {
+ DPRINTK("blkif_connect attempted for non-existent blkif (%llu,%u)\n",
+ connect->domid, connect->blkif_handle);
+ connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+ }
+
+ if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
+ {
+ connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+ return;
+ }
+
+ prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
+ error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
+ shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
+ prot, domid);
+ if ( error != 0 )
+ {
+ if ( error == -ENOMEM )
+ connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+ else if ( error == -EFAULT )
+ connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
+ else
+ connect->status = BLKIF_BE_STATUS_ERROR;
+ vfree(vma->addr);
+ return;
+ }
+
+ if ( blkif->status != DISCONNECTED )
+ {
+ connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
+ vfree(vma->addr);
+ return;
+ }
+
+ blkif->evtchn = evtchn;
+ blkif->irq = bind_evtchn_to_irq(evtchn);
+ blkif->shmem_frame = shmem_frame;
+ blkif->blk_ring_base = (blkif_ring_t *)vma->addr;
+ blkif->status = CONNECTED;
+ blkif_get(blkif);
+
+ request_irq(blkif->irq, blkif_be_int, 0, "blkif-backend", blkif);
+
+ connect->status = BLKIF_BE_STATUS_OKAY;
+}
+
+int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id)
+{
+ domid_t domid = disconnect->domid;
+ unsigned int handle = disconnect->blkif_handle;
+ blkif_t *blkif;
+
+ blkif = blkif_find_by_handle(domid, handle);
+ if ( unlikely(blkif == NULL) )
+ {
+ DPRINTK("blkif_disconnect attempted for non-existent blkif"
+ " (%llu,%u)\n", disconnect->domid, disconnect->blkif_handle);
+ disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return 1; /* Caller will send response error message. */
+ }
+
+ if ( blkif->status == CONNECTED )
+ {
+ blkif->status = DISCONNECTING;
+ blkif->disconnect_rspid = rsp_id;
+ wmb(); /* Let other CPUs see the status change. */
+ free_irq(blkif->irq, NULL);
+ blkif_deschedule(blkif);
+ blkif_put(blkif);
+ }
+
+ return 0; /* Caller should not send response message. */
}
void __init blkif_interface_init(void)
@@ -159,5 +231,4 @@ void __init blkif_interface_init(void)
blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t),
0, 0, NULL, NULL);
memset(blkif_hash, 0, sizeof(blkif_hash));
- spin_lock_init(&blkif_hash_lock);
}
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c
index 8862798250..2582287360 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c
@@ -33,8 +33,8 @@ static struct vm_struct *mmap_vma;
(MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
#define MMAP_VADDR(_req,_seg) \
((unsigned long)mmap_vma->addr + \
- ((_req) * MMAP_PAGES_PER_REQUEST) + \
- ((_seg) * MMAP_PAGES_PER_SEGMENT))
+ ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
+ ((_seg) * MMAP_PAGES_PER_SEGMENT * PAGE_SIZE))
/*
* Each outstanding request that we've passed to the lower device layers has a
@@ -96,7 +96,7 @@ static void add_to_blkdev_list_tail(blkif_t *blkif)
unsigned long flags;
if ( __on_blkdev_list(blkif) ) return;
spin_lock_irqsave(&io_schedule_list_lock, flags);
- if ( !__on_blkdev_list(blkif) )
+ if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
{
list_add_tail(&blkif->blkdev_list, &io_schedule_list);
blkif_get(blkif);
@@ -168,7 +168,8 @@ static void end_block_io_op(struct buffer_head *bh, int uptodate)
if ( atomic_dec_and_test(&pending_req->pendcnt) )
{
int pending_idx = pending_req - pending_reqs;
- vmfree_area_pages(MMAP_VADDR(pending_idx, 0), MMAP_PAGES_PER_REQUEST);
+ vmfree_area_pages(MMAP_VADDR(pending_idx, 0),
+ MMAP_PAGES_PER_REQUEST * PAGE_SIZE);
make_response(pending_req->blkif, pending_req->id,
pending_req->operation, pending_req->status);
blkif_put(pending_req->blkif);
@@ -260,10 +261,11 @@ static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
{
if ( (req->buffer_and_sects[i] & ~PAGE_MASK) != (PAGE_SIZE / 512) )
goto bad_descriptor;
- if ( direct_remap_area_pages(&init_mm,
+ rc = direct_remap_area_pages(&init_mm,
MMAP_VADDR(pending_idx, i),
req->buffer_and_sects[i] & PAGE_MASK,
- PAGE_SIZE, prot, blkif->domid) != 0 )
+ PAGE_SIZE, prot, blkif->domid);
+ if ( rc != 0 )
goto bad_descriptor;
}
@@ -271,12 +273,13 @@ static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
(req->nr_segments * PAGE_SIZE) / sizeof(vdisk_t));
vmfree_area_pages(MMAP_VADDR(pending_idx, 0),
- MMAP_PAGES_PER_REQUEST);
+ MMAP_PAGES_PER_REQUEST * PAGE_SIZE);
make_response(blkif, req->id, req->operation, rc);
return;
bad_descriptor:
- vmfree_area_pages(MMAP_VADDR(pending_idx, 0), MMAP_PAGES_PER_REQUEST);
+ vmfree_area_pages(MMAP_VADDR(pending_idx, 0),
+ MMAP_PAGES_PER_REQUEST * PAGE_SIZE);
make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
}
@@ -284,7 +287,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
{
extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
struct buffer_head *bh;
- int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ;
+ int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
unsigned short nr_sects;
unsigned long buffer;
int i, tot_sects, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
@@ -358,14 +361,15 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
unsigned long sz = ((phys_seg[i].buffer & ~PAGE_MASK) +
(phys_seg[i].nr_sects << 9) +
(PAGE_SIZE - 1)) & PAGE_MASK;
- if ( direct_remap_area_pages(&init_mm,
- MMAP_VADDR(pending_idx, i),
- phys_seg[i].buffer & PAGE_MASK,
- sz, prot, blkif->domid) != 0 )
+ int rc = direct_remap_area_pages(&init_mm,
+ MMAP_VADDR(pending_idx, i),
+ phys_seg[i].buffer & PAGE_MASK,
+ sz, prot, blkif->domid);
+ if ( rc != 0 )
{
DPRINTK("invalid buffer\n");
vmfree_area_pages(MMAP_VADDR(pending_idx, 0),
- MMAP_PAGES_PER_REQUEST);
+ MMAP_PAGES_PER_REQUEST * PAGE_SIZE);
goto bad_descriptor;
}
}
@@ -374,7 +378,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
pending_req->blkif = blkif;
pending_req->id = req->id;
pending_req->operation = operation;
- pending_req->status = BLKIF_RSP_ERROR;
+ pending_req->status = BLKIF_RSP_OKAY;
atomic_set(&pending_req->pendcnt, nr_psegs);
blkif_get(blkif);
@@ -382,29 +386,30 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
/* Now we pass each segment down to the real blkdev layer. */
for ( i = 0; i < nr_psegs; i++ )
{
- bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
+ bh = kmem_cache_alloc(buffer_head_cachep, GFP_ATOMIC);
if ( unlikely(bh == NULL) )
panic("bh is null\n");
memset(bh, 0, sizeof (struct buffer_head));
-
+
+ init_waitqueue_head(&bh->b_wait);
bh->b_size = phys_seg[i].nr_sects << 9;
bh->b_dev = phys_seg[i].dev;
+ bh->b_rdev = phys_seg[i].dev;
bh->b_rsector = (unsigned long)phys_seg[i].sector_number;
- bh->b_data = (char *)MMAP_VADDR(pending_idx, i) +
+ bh->b_data = (char *)MMAP_VADDR(pending_idx, i) +
(phys_seg[i].buffer & ~PAGE_MASK);
- /* SMH: bh_phys() uses the below field as a 'cheap' virt_to_phys */
- bh->b_page = &mem_map[phys_seg[i].buffer>>PAGE_SHIFT];
bh->b_end_io = end_block_io_op;
bh->b_private = pending_req;
- bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock);
+ bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock) |
+ (1 << BH_Req) | (1 << BH_Launder);
if ( operation == WRITE )
bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate);
atomic_set(&bh->b_count, 1);
/* Dispatch a single request. We'll flush it to disc later. */
- submit_bh(operation, bh);
+ generic_make_request(operation, bh);
}
pending_cons++;
@@ -444,16 +449,7 @@ static void make_response(blkif_t *blkif, unsigned long id,
void blkif_deschedule(blkif_t *blkif)
{
- unsigned long flags;
-
- spin_lock_irqsave(&io_schedule_list_lock, flags);
- if ( __on_blkdev_list(blkif) )
- {
- list_del(&blkif->blkdev_list);
- blkif->blkdev_list.next = (void *)0xdeadbeef;
- blkif_put(blkif);
- }
- spin_unlock_irqrestore(&io_schedule_list_lock, flags);
+ remove_from_blkdev_list(blkif);
}
static int __init init_module(void)
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c
index bc5390eeb9..19b0b3015d 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c
@@ -47,7 +47,7 @@ void vbd_create(blkif_be_vbd_create_t *create)
}
}
- if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) )
+ if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_ATOMIC)) == NULL) )
{
DPRINTK("vbd_create: out of memory\n");
create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
@@ -62,11 +62,12 @@ void vbd_create(blkif_be_vbd_create_t *create)
rb_link_node(&vbd->rb, rb_parent, rb_p);
rb_insert_color(&vbd->rb, &blkif->vbd_rb);
+ DPRINTK("Successful creation of vdev=%04x (dom=%llu)\n",
+ vdevice, create->domid);
create->status = BLKIF_BE_STATUS_OKAY;
out:
spin_unlock(&blkif->vbd_lock);
- blkif_put(blkif);
}
@@ -110,7 +111,7 @@ void vbd_grow(blkif_be_vbd_grow_t *grow)
}
if ( unlikely((x = kmalloc(sizeof(blkif_extent_le_t),
- GFP_KERNEL)) == NULL) )
+ GFP_ATOMIC)) == NULL) )
{
DPRINTK("vbd_grow: out of memory\n");
grow->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
@@ -127,11 +128,12 @@ void vbd_grow(blkif_be_vbd_grow_t *grow)
*px = x;
+ DPRINTK("Successful grow of vdev=%04x (dom=%llu)\n",
+ vdevice, grow->domid);
grow->status = BLKIF_BE_STATUS_OKAY;
out:
spin_unlock(&blkif->vbd_lock);
- blkif_put(blkif);
}
@@ -190,7 +192,6 @@ void vbd_shrink(blkif_be_vbd_shrink_t *shrink)
out:
spin_unlock(&blkif->vbd_lock);
- blkif_put(blkif);
}
@@ -242,7 +243,6 @@ void vbd_destroy(blkif_be_vbd_destroy_t *destroy)
out:
spin_unlock(&blkif->vbd_lock);
- blkif_put(blkif);
}
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c
index 2936d78ea2..29cc01d087 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c
@@ -18,9 +18,9 @@
typedef unsigned char byte; /* from linux/ide.h */
-#define BLKIF_STATE_CLOSED 0
-#define BLKIF_STATE_DOWN 1
-#define BLKIF_STATE_UP 2
+#define BLKIF_STATE_CLOSED 0
+#define BLKIF_STATE_DISCONNECTED 1
+#define BLKIF_STATE_CONNECTED 2
static unsigned int blkif_state = BLKIF_STATE_CLOSED;
static unsigned int blkif_evtchn, blkif_irq;
@@ -35,7 +35,7 @@ static BLK_RING_IDX req_prod; /* Private request producer. */
/* We plug the I/O ring if the driver is suspended or if the ring is full. */
#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \
- (blkif_state != BLKIF_STATE_UP))
+ (blkif_state != BLKIF_STATE_CONNECTED))
/*
@@ -123,8 +123,10 @@ int blkif_release(struct inode *inode, struct file *filep)
*/
if ( --disk->usage == 0 )
{
+#if 0
update_tq.routine = update_vbds_task;
schedule_task(&update_tq);
+#endif
}
return 0;
@@ -306,7 +308,7 @@ static int blkif_queue_request(unsigned long id,
if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
BUG();
- if ( unlikely(blkif_state != BLKIF_STATE_UP) )
+ if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
return 1;
switch ( operation )
@@ -498,7 +500,7 @@ static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
{
next_bh = bh->b_reqnext;
bh->b_reqnext = NULL;
- bh->b_end_io(bh, !bret->status);
+ bh->b_end_io(bh, bret->status == BLKIF_RSP_OKAY);
}
break;
case BLKIF_OP_PROBE:
@@ -556,18 +558,18 @@ void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
static void blkif_bringup_phase1(void *unused)
{
- ctrl_msg_t cmsg;
- blkif_fe_interface_up_t up;
+ ctrl_msg_t cmsg;
+ blkif_fe_interface_connect_t up;
- /* Move from CLOSED to DOWN state. */
+ /* Move from CLOSED to DISCONNECTED state. */
blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
- blkif_state = BLKIF_STATE_DOWN;
+ blkif_state = BLKIF_STATE_DISCONNECTED;
- /* Construct an interface-UP message for the domain controller. */
+ /* Construct an interface-CONNECT message for the domain controller. */
cmsg.type = CMSG_BLKIF_FE;
- cmsg.subtype = CMSG_BLKIF_FE_INTERFACE_UP;
- cmsg.length = sizeof(blkif_fe_interface_up_t);
+ cmsg.subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT;
+ cmsg.length = sizeof(blkif_fe_interface_connect_t);
up.handle = 0;
up.shmem_frame = virt_to_machine(blk_ring) >> PAGE_SHIFT;
memcpy(cmsg.msg, &up, sizeof(up));
@@ -578,14 +580,14 @@ static void blkif_bringup_phase1(void *unused)
static void blkif_bringup_phase2(void *unused)
{
- /* Move from DOWN to UP state. */
blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
(void)request_irq(blkif_irq, blkif_int, 0, "blkif", NULL);
- blkif_state = BLKIF_STATE_UP;
/* Probe for discs that are attached to the interface. */
xlvbd_init();
+ blkif_state = BLKIF_STATE_CONNECTED;
+
/* Kick pending requests. */
spin_lock_irq(&io_request_lock);
kick_pending_request_queues();
@@ -608,22 +610,22 @@ static void blkif_status_change(blkif_fe_interface_status_changed_t *status)
blkif_state);
break;
- case BLKIF_INTERFACE_STATUS_DOWN:
+ case BLKIF_INTERFACE_STATUS_DISCONNECTED:
if ( blkif_state != BLKIF_STATE_CLOSED )
{
- printk(KERN_WARNING "Unexpected blkif-DOWN message in state %d\n",
- blkif_state);
+ printk(KERN_WARNING "Unexpected blkif-DISCONNECTED message"
+ " in state %d\n", blkif_state);
break;
}
blkif_statechange_tq.routine = blkif_bringup_phase1;
schedule_task(&blkif_statechange_tq);
break;
- case BLKIF_INTERFACE_STATUS_UP:
+ case BLKIF_INTERFACE_STATUS_CONNECTED:
if ( blkif_state == BLKIF_STATE_CLOSED )
{
- printk(KERN_WARNING "Unexpected blkif-UP message in state %d\n",
- blkif_state);
+ printk(KERN_WARNING "Unexpected blkif-CONNECTED message"
+ " in state %d\n", blkif_state);
break;
}
blkif_evtchn = status->evtchn;
@@ -683,6 +685,17 @@ int __init xlblk_init(void)
memcpy(cmsg.msg, &st, sizeof(st));
ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+ /*
+ * We should read 'nr_interfaces' from response message and wait
+ * for notifications before proceeding. For now we assume that we
+ * will be notified of exactly one interface.
+ */
+ while ( blkif_state != BLKIF_STATE_CONNECTED )
+ {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(1);
+ }
+
return 0;
}
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c
index 944bf7eace..b26907192a 100644
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c
@@ -512,7 +512,7 @@ void xlvbd_update_vbds(void)
* linux -- this is just for convenience as it means e.g. that the same
* /etc/fstab can be used when booting with or without Xen.
*/
-int __init xlvbd_init(void)
+int xlvbd_init(void)
{
int i;
@@ -559,8 +559,3 @@ int __init xlvbd_init(void)
return 0;
}
-
-
-#ifdef MODULE
-module_init(xlvbd_init);
-#endif
diff --git a/xenolinux-2.4.26-sparse/include/asm-xen/io.h b/xenolinux-2.4.26-sparse/include/asm-xen/io.h
index 3d78e20950..f5243bb6a7 100644
--- a/xenolinux-2.4.26-sparse/include/asm-xen/io.h
+++ b/xenolinux-2.4.26-sparse/include/asm-xen/io.h
@@ -159,13 +159,47 @@ extern void iounmap(void *addr);
extern void *bt_ioremap(unsigned long offset, unsigned long size);
extern void bt_iounmap(void *addr, unsigned long size);
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+
+#ifdef CONFIG_HIGHMEM
+#error "Highmem is not yet compatible with physical device access"
+#endif
+
/*
- * IO bus memory addresses are also 1:1 with the physical address
+ * The bus translation macros need special care if we are executing device
+ * accesses to/from other domains' memory. In these cases the virtual address
+ * is actually a temporary mapping in the 'vmalloc' space. The physical
+ * address will therefore be >max_low_pfn, and will not have a valid entry
+ * in the phys_to_mach mapping table.
*/
+static inline unsigned long phys_to_bus(unsigned long phys)
+{
+ extern unsigned long max_pfn;
+ pgd_t *pgd; pmd_t *pmd; pte_t *pte;
+ void *addr;
+ unsigned long bus;
+ if ( (phys >> PAGE_SHIFT) < max_pfn )
+ return phys_to_machine(phys);
+ addr = phys_to_virt(phys);
+ pgd = pgd_offset_k( (unsigned long)addr);
+ pmd = pmd_offset(pgd, (unsigned long)addr);
+ pte = pte_offset(pmd, (unsigned long)addr);
+ bus = (pte->pte_low & PAGE_MASK) | (phys & ~PAGE_MASK);
+ return bus;
+}
+
+#define virt_to_bus(_x) phys_to_bus(virt_to_phys(_x))
+#define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x))
+#define page_to_bus(_x) phys_to_bus(page_to_phys(_x))
+
+#else
+
#define virt_to_bus(_x) phys_to_machine(virt_to_phys(_x))
#define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x))
#define page_to_bus(_x) phys_to_machine(page_to_phys(_x))
+#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
+
/*
* readX/writeX() are used to access memory mapped devices. On some
* architectures the memory mapped IO stuff needs to be accessed