diff options
author | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2004-04-30 15:41:03 +0000 |
---|---|---|
committer | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2004-04-30 15:41:03 +0000 |
commit | aa4ae4af7a3b8b0f98aa989f1775d8847f6e245a (patch) | |
tree | 21f0bb6d9feee156adf220506c9e88eb1f4350c0 | |
parent | a93317142fe7f3eeb624ec322a2dcf8e18c24b0e (diff) | |
download | xen-aa4ae4af7a3b8b0f98aa989f1775d8847f6e245a.tar.gz xen-aa4ae4af7a3b8b0f98aa989f1775d8847f6e245a.tar.bz2 xen-aa4ae4af7a3b8b0f98aa989f1775d8847f6e245a.zip |
bitkeeper revision 1.879.2.1 (4092738fCfvp-pu-UzwhXsHdzHbYPw)
Completed first cut of new blkdev i/o world.
-rw-r--r-- | .rootkeys | 1 | ||||
-rwxr-xr-x | tools/examples/xc_dom_create.py | 121 | ||||
-rw-r--r-- | tools/xend/lib/blkif.py | 143 | ||||
-rw-r--r-- | tools/xend/lib/console.py | 83 | ||||
-rw-r--r-- | tools/xend/lib/domain_controller.h | 124 | ||||
-rwxr-xr-x | tools/xend/lib/main.py | 179 | ||||
-rw-r--r-- | tools/xend/lib/manager.py | 89 | ||||
-rw-r--r-- | xen/common/event_channel.c | 5 | ||||
-rw-r--r-- | xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h | 12 | ||||
-rw-r--r-- | xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c | 15 | ||||
-rw-r--r-- | xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c | 235 | ||||
-rw-r--r-- | xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c | 60 | ||||
-rw-r--r-- | xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c | 12 | ||||
-rw-r--r-- | xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c | 55 | ||||
-rw-r--r-- | xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c | 7 | ||||
-rw-r--r-- | xenolinux-2.4.26-sparse/include/asm-xen/io.h | 36 |
16 files changed, 847 insertions, 330 deletions
@@ -102,6 +102,7 @@ 4055ee44Bu6oP7U0WxxXypbUt4dNPQ tools/xenctl/setup.py 40431ac64Hj4ixUnKmlugZKhXPFE_Q tools/xend/Makefile 4055ad95Se-FqttgxollqOAAHB94zA tools/xend/lib/__init__.py +4092738fMRGC9fFBcPRCWaJaj9U3ag tools/xend/lib/blkif.py 4055ad97wMLUj0BZT0e_T0EwQN0Bvw tools/xend/lib/console.py 4048c0ddsF0WrU7HUzTvg1MJoCIfWg tools/xend/lib/domain_controller.h 4054a301VEag2GwrBrFBna5U1BGlLA tools/xend/lib/main.py diff --git a/tools/examples/xc_dom_create.py b/tools/examples/xc_dom_create.py index 799319c6a6..bb9a0576d9 100755 --- a/tools/examples/xc_dom_create.py +++ b/tools/examples/xc_dom_create.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import string, sys, os, time, socket, getopt, signal, syslog -import Xc, xenctl.utils, xenctl.console_client +import Xc, xenctl.utils, xenctl.console_client, re config_dir = '/etc/xc/' config_file = xc_config_file = config_dir + 'defaults' @@ -195,6 +195,15 @@ output('VM cmdline : "%s"' % cmdline) if dryrun: sys.exit(1) +##### HACK HACK HACK +##### Until everyone moves to the new I/O world, and a more robust domain +##### controller (xend), we use this little trick to discover whether we +##### are in a testing environment for new I/O stuff. +new_io_world = True +for line in os.popen('cat /proc/interrupts').readlines(): + if re.search('blkdev', line): + new_io_world = False + ##### Code beyond this point is actually used to manage the mechanics of ##### starting (and watching if necessary) guest virtual machines. @@ -228,14 +237,14 @@ def make_domain(): cmsg = 'new_control_interface(dom='+str(id)+', console_port='+str(console_port)+')' - xend_response = xenctl.utils.xend_control_message(cmsg) + cons_response = xenctl.utils.xend_control_message(cmsg) - if not xend_response['success']: + if not cons_response['success']: print "Error creating initial event channel" - print "Error type: " + xend_response['error_type'] - if xend_response['error_type'] == 'exception': - print "Exception type: " + xend_response['exception_type'] - print "Exception value: " + xend_response['exception_value'] + print "Error type: " + cons_response['error_type'] + if cons_response['error_type'] == 'exception': + print "Exception type: " + cons_response['exception_type'] + print "Exception value: " + cons_response['exception_value'] xc.domain_destroy ( dom=id ) sys.exit() @@ -248,7 +257,7 @@ def make_domain(): sys.exit() else: - ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline, control_evtchn=xend_response["remote_port"] )' % builder_fn) + ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline, control_evtchn=cons_response["remote_port"] )' % builder_fn) if ret < 0: print "Error building Linux guest OS: " print "Return code = " + str(ret) @@ -259,6 +268,18 @@ def make_domain(): # set the expertise level appropriately xenctl.utils.VBD_EXPERT_MODE = vbd_expert + + if new_io_world: + cmsg = 'new_block_interface(dom='+str(id)+')' + xend_response = xenctl.utils.xend_control_message(cmsg) + if not xend_response['success']: + print "Error creating block interface" + print "Error type: " + xend_response['error_type'] + if xend_response['error_type'] == 'exception': + print "Exception type: " + xend_response['exception_type'] + print "Exception val: " + xend_response['exception_value'] + xc.domain_destroy ( dom=id ) + sys.exit() for ( uname, virt_name, rw ) in vbd_list: virt_dev = xenctl.utils.blkdev_name_to_number( virt_name ) @@ -269,42 +290,70 @@ def make_domain(): xc.domain_destroy ( dom=id ) sys.exit() - # check that setting up this VBD won't violate the sharing - # allowed by the current VBD expertise level - if xenctl.utils.vd_extents_validate(segments, rw=='w' or rw=='rw') < 0: - xc.domain_destroy( dom = id ) - sys.exit() + if new_io_world: + if len(segments) > 1: + print "New I/O world cannot deal with multi-extent vdisks" + xc.domain_destroy ( dom=id ) + sys.exit() + seg = segments[0] + cmsg = 'new_block_device(dom=' + str(id) + \ + ',handle=0,vdev=' + str(virt_dev) + \ + ',pdev=' + str(seg['device']) + \ + ',start_sect=' + str(seg['start_sector']) + \ + ',nr_sect=' + str(seg['nr_sectors']) + \ + ',readonly=' + str(not re.match('w',rw)) + ')' + xend_response = xenctl.utils.xend_control_message(cmsg) + if not xend_response['success']: + print "Error creating virtual block device" + print "Error type: " + xend_response['error_type'] + if xend_response['error_type'] == 'exception': + print "Exception type: " + xend_response['exception_type'] + print "Exception val: " + xend_response['exception_value'] + xc.domain_destroy ( dom=id ) + sys.exit() + else: + # check that setting up this VBD won't violate the sharing + # allowed by the current VBD expertise level + if xenctl.utils.vd_extents_validate(segments, + rw=='w' or rw=='rw') < 0: + xc.domain_destroy( dom = id ) + sys.exit() - if xc.vbd_create( dom=id, vbd=virt_dev, writeable= rw=='w' or rw=='rw' ): - print "Error creating VBD vbd=%d writeable=%d\n" % (virt_dev,rw) - xc.domain_destroy ( dom=id ) - sys.exit() + if xc.vbd_create( dom=id, vbd=virt_dev, + writeable= rw=='w' or rw=='rw' ): + print "Error creating VBD %d (writeable=%d)\n" % (virt_dev,rw) + xc.domain_destroy ( dom=id ) + sys.exit() - if xc.vbd_setextents( dom=id, - vbd=virt_dev, - extents=segments): - print "Error populating VBD vbd=%d\n" % virt_dev - xc.domain_destroy ( dom=id ) - sys.exit() - - # setup virtual firewall rules for all aliases - for ip in vfr_ipaddr: - xenctl.utils.setup_vfr_rules_for_vif( id, 0, ip ) - - # check for physical device access - for (pci_bus, pci_dev, pci_func) in pci_device_list: - if xc.physdev_pci_access_modify( - dom=id, bus=pci_bus, dev=pci_dev, func=pci_func, enable=1 ) < 0: - print "Non-fatal error enabling PCI device access." - else: - print "Enabled PCI access (%d:%d:%d)." % (pci_bus,pci_dev,pci_func) + if xc.vbd_setextents( dom=id, + vbd=virt_dev, + extents=segments): + print "Error populating VBD vbd=%d\n" % virt_dev + xc.domain_destroy ( dom=id ) + sys.exit() + + if not new_io_world: + # setup virtual firewall rules for all aliases + for ip in vfr_ipaddr: + xenctl.utils.setup_vfr_rules_for_vif( id, 0, ip ) + + if new_io_world: + # check for physical device access + for (pci_bus, pci_dev, pci_func) in pci_device_list: + if xc.physdev_pci_access_modify( + dom=id, bus=pci_bus, dev=pci_dev, + func=pci_func, enable=1 ) < 0: + print "Non-fatal error enabling PCI device access." + else: + print "Enabled PCI access (%d:%d:%d)." % \ + (pci_bus,pci_dev,pci_func) if xc.domain_start( dom=id ) < 0: print "Error starting domain" xc.domain_destroy ( dom=id ) sys.exit() - return (id, xend_response['console_port']) + return (id, cons_response['console_port']) # end of make_domain() def mkpidfile(): diff --git a/tools/xend/lib/blkif.py b/tools/xend/lib/blkif.py new file mode 100644 index 0000000000..94e058f7ce --- /dev/null +++ b/tools/xend/lib/blkif.py @@ -0,0 +1,143 @@ + +################################################################# +## xend/blkif.py -- Block-interface management functions for Xend +## Copyright (c) 2004, K A Fraser (University of Cambridge) +################################################################# + +import errno, re, os, select, signal, socket, struct, sys +import xend.main, xend.console, xend.manager, xend.utils, Xc + +CMSG_BLKIF_BE = 1 +CMSG_BLKIF_FE = 2 +CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED = 0 +CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED = 32 +CMSG_BLKIF_FE_INTERFACE_CONNECT = 33 +CMSG_BLKIF_FE_INTERFACE_DISCONNECT = 34 +CMSG_BLKIF_BE_CREATE = 0 +CMSG_BLKIF_BE_DESTROY = 1 +CMSG_BLKIF_BE_CONNECT = 2 +CMSG_BLKIF_BE_DISCONNECT = 3 +CMSG_BLKIF_BE_VBD_CREATE = 4 +CMSG_BLKIF_BE_VBD_DESTROY = 5 +CMSG_BLKIF_BE_VBD_GROW = 6 +CMSG_BLKIF_BE_VBD_SHRINK = 7 + +pendmsg = None +pendaddr = None + +def backend_tx_req(msg): + port = xend.main.dom0_port + if port.space_to_write_request(): + port.write_request(msg) + port.notify() + else: + xend.blkif.pendmsg = msg + +def backend_rx_req(port, msg): + port.write_response(msg) + +def backend_rx_rsp(port, msg): + subtype = (msg.get_header())['subtype'] + print "Received blkif-be response, subtype %d" % subtype + if subtype == CMSG_BLKIF_BE_CREATE: + rsp = { 'success': True } + xend.main.send_management_response(rsp, xend.blkif.pendaddr) + elif subtype == CMSG_BLKIF_BE_CONNECT: + (dom,hnd,evtchn,frame,st) = struct.unpack("QIILI", msg.get_payload()) + blkif = interface.list[xend.main.port_from_dom(dom).local_port] + msg = xend.utils.message(CMSG_BLKIF_FE, \ + CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED, 0) + msg.append_payload(struct.pack("III",0,2,blkif.evtchn['port2'])) + blkif.ctrlif_tx_req(xend.main.port_list[blkif.key], msg) + elif subtype == CMSG_BLKIF_BE_VBD_CREATE: + (dom,hnd,vdev,ro,st) = struct.unpack("QIHII", msg.get_payload()) + blkif = interface.list[xend.main.port_from_dom(dom).local_port] + (pdev, start_sect, nr_sect, readonly) = blkif.devices[vdev] + msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_GROW, 0) + msg.append_payload(struct.pack("QIHHHQQI",dom,0,vdev,0, \ + pdev,start_sect,nr_sect,0)) + backend_tx_req(msg) + elif subtype == CMSG_BLKIF_BE_VBD_GROW: + rsp = { 'success': True } + xend.main.send_management_response(rsp, xend.blkif.pendaddr) + +def backend_do_work(port): + global pendmsg + if pendmsg and port.space_to_write_request(): + port.write_request(pendmsg) + pendmsg = None + return True + return False + + +class interface: + + # Dictionary of all block-device interfaces. + list = {} + + + # NB. 'key' is an opaque value that has no meaning in this class. + def __init__(self, dom, key): + self.dom = dom + self.key = key + self.devices = {} + self.pendmsg = None + interface.list[key] = self + msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_CREATE, 0) + msg.append_payload(struct.pack("QII",dom,0,0)) + xend.blkif.pendaddr = xend.main.mgmt_req_addr + backend_tx_req(msg) + + # Attach a device to the specified interface + def attach_device(self, vdev, pdev, start_sect, nr_sect, readonly): + if self.devices.has_key(vdev): + return False + self.devices[vdev] = (pdev, start_sect, nr_sect, readonly) + msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_CREATE, 0) + msg.append_payload(struct.pack("QIHII",self.dom,0,vdev,readonly,0)) + xend.blkif.pendaddr = xend.main.mgmt_req_addr + backend_tx_req(msg) + return True + + + # Completely destroy this interface. + def destroy(self): + del interface.list[self.key] + msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_DESTROY, 0) + msg.append_payload(struct.pack("QII",self.dom,0,0)) + backend_tx_req(msg) + + + # The parameter @port is the control-interface event channel. This method + # returns True if messages were written to the control interface. + def ctrlif_transmit_work(self, port): + if self.pendmsg and port.space_to_write_request(): + port.write_request(self.pendmsg) + self.pendmsg = None + return True + return False + + def ctrlif_tx_req(self, port, msg): + if port.space_to_write_request(): + port.write_request(msg) + port.notify() + else: + self.pendmsg = msg + + def ctrlif_rx_req(self, port, msg): + port.write_response(msg) + subtype = (msg.get_header())['subtype'] + if subtype == CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED: + msg = xend.utils.message(CMSG_BLKIF_FE, \ + CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED, 0) + msg.append_payload(struct.pack("III",0,1,0)) + self.ctrlif_tx_req(port, msg) + elif subtype == CMSG_BLKIF_FE_INTERFACE_CONNECT: + (hnd,frame) = struct.unpack("IL", msg.get_payload()) + xc = Xc.new() + self.evtchn = xc.evtchn_bind_interdomain(dom1=0,dom2=self.dom) + msg = xend.utils.message(CMSG_BLKIF_BE, \ + CMSG_BLKIF_BE_CONNECT, 0) + msg.append_payload(struct.pack("QIILI",self.dom,0, \ + self.evtchn['port1'],frame,0)) + backend_tx_req(msg) diff --git a/tools/xend/lib/console.py b/tools/xend/lib/console.py index aad6069979..57898817f5 100644 --- a/tools/xend/lib/console.py +++ b/tools/xend/lib/console.py @@ -5,7 +5,7 @@ ############################################################# import errno, re, os, select, signal, socket, struct, sys - +import xend.blkif, xend.main, xend.manager, xend.utils, Xc ## ## interface: @@ -16,7 +16,7 @@ import errno, re, os, select, signal, socket, struct, sys ## CONNECTED: sending/receiving console data on TCP port 'self.port' ## ## A dictionary of all active interfaces, indexed by TCP socket descriptor, -## is accessible as 'interface.interface_list'. +## is accessible as 'interface.list_by_fd'. ## ## NB. When a class instance is to be destroyed you *must* call the 'close' ## method. Otherwise a stale reference will eb left in the interface list. @@ -30,7 +30,11 @@ class interface: # Dictionary of all active (non-closed) console interfaces. - interface_list = {} + list_by_fd = {} + + + # Dictionary of all console interfaces, closed and open. + list = {} # NB. 'key' is an opaque value that has no meaning in this class. @@ -38,6 +42,9 @@ class interface: self.status = interface.CLOSED self.port = port self.key = key + self.rbuf = xend.utils.buffer() + self.wbuf = xend.utils.buffer() + interface.list[key] = self # Is this interface closed (inactive)? @@ -58,14 +65,14 @@ class interface: # Close the interface, if it is not closed already. def close(self): if not self.closed(): - del interface.interface_list[self.sock.fileno()] + del interface.list_by_fd[self.sock.fileno()] self.sock.close() del self.sock self.status = interface.CLOSED # Move the interface into the 'listening' state. Opens a new listening - # socket and updates 'interface_list'. + # socket and updates 'list_by_fd'. def listen(self): # Close old socket (if any), and create a fresh one. self.close() @@ -80,7 +87,7 @@ class interface: # Announce the new status of thsi interface. self.status = interface.LISTENING - interface.interface_list[self.sock.fileno()] = self + interface.list_by_fd[self.sock.fileno()] = self except: # In case of trouble ensure we get rid of dangling socket reference @@ -105,7 +112,69 @@ class interface: # Publish the new socket and the new interface state. self.sock = sock self.status = interface.CONNECTED - interface.interface_list[self.sock.fileno()] = self + interface.list_by_fd[self.sock.fileno()] = self return 1 + # Completely sestroy a console interface. + def destroy(self): + self.close() + del interface.list[self.key] + + + # Do work triggered by resource availability on a console-interface socket. + def socket_work(self): + # If the interface is listening, check for pending connections. + if self.listening(): + self.connect() + + # All done if the interface is not connected. + if not self.connected(): + return + + # Send as much pending data as possible via the socket. + while not self.rbuf.empty(): + try: + bytes = self.sock.send(self.rbuf.peek()) + if bytes > 0: + self.rbuf.discard(bytes) + except socket.error, error: + pass + + # Read as much data as is available. Don't worry about + # overflowing our buffer: it's more important to read the + # incoming data stream and detect errors or closure of the + # remote end in a timely manner. + try: + while 1: + data = self.sock.recv(2048) + # Return of zero means the remote end has disconnected. + # We therefore return the console interface to listening. + if not data: + self.listen() + break + self.wbuf.write(data) + except socket.error, error: + # Assume that most errors mean that the connection is dead. + # In such cases we return the interface to 'listening' state. + if error[0] != errno.EAGAIN: + print "Better return to listening" + self.listen() + print "New status: " + str(self.status) + + + # The parameter @port is the control-interface event channel. This method + # returns True if messages were written to the control interface. + def ctrlif_transmit_work(self, port): + work_done = False + while not self.wbuf.empty() and port.space_to_write_request(): + msg = xend.utils.message(0, 0, 0) + msg.append_payload(self.wbuf.read(msg.MAX_PAYLOAD)) + port.write_request(msg) + work_done = True + return work_done + + + def ctrlif_rx_req(self, port, msg): + self.rbuf.write(msg.get_payload()) + port.write_response(msg) diff --git a/tools/xend/lib/domain_controller.h b/tools/xend/lib/domain_controller.h index d9ea7d6160..68d4fac1d2 100644 --- a/tools/xend/lib/domain_controller.h +++ b/tools/xend/lib/domain_controller.h @@ -76,8 +76,8 @@ typedef struct { /* Messages from guest to domain controller. */ #define CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED 32 -#define CMSG_BLKIF_FE_INTERFACE_UP 33 -#define CMSG_BLKIF_FE_INTERFACE_DOWN 34 +#define CMSG_BLKIF_FE_INTERFACE_CONNECT 33 +#define CMSG_BLKIF_FE_INTERFACE_DISCONNECT 34 /* These are used by both front-end and back-end drivers. */ #define blkif_vdev_t u16 @@ -91,13 +91,13 @@ typedef struct { * 1. The shared-memory frame is available for reuse. * 2. Any unacknowledged messgaes pending on the interface were dropped. */ -#define BLKIF_INTERFACE_STATUS_DESTROYED 0 /* Interface doesn't exist. */ -#define BLKIF_INTERFACE_STATUS_DOWN 1 /* Interface exists but is down. */ -#define BLKIF_INTERFACE_STATUS_UP 2 /* Interface exists and is up. */ +#define BLKIF_INTERFACE_STATUS_DESTROYED 0 /* Interface doesn't exist. */ +#define BLKIF_INTERFACE_STATUS_DISCONNECTED 1 /* Exists but is disconnected. */ +#define BLKIF_INTERFACE_STATUS_CONNECTED 2 /* Exists and is connected. */ typedef struct { unsigned int handle; unsigned int status; - unsigned int evtchn; /* status == BLKIF_INTERFACE_STATUS_UP */ + unsigned int evtchn; /* status == BLKIF_INTERFACE_STATUS_CONNECTED */ } blkif_fe_interface_status_changed_t; /* @@ -109,30 +109,37 @@ typedef struct { * If the driver goes DOWN while interfaces are still UP, the domain * will automatically take the interfaces DOWN. */ -#define BLKIF_DRIVER_STATUS_DOWN 0 -#define BLKIF_DRIVER_STATUS_UP 1 +#define BLKIF_DRIVER_STATUS_DOWN 0 +#define BLKIF_DRIVER_STATUS_UP 1 typedef struct { unsigned int status; /* BLKIF_DRIVER_STATUS_??? */ } blkif_fe_driver_status_changed_t; /* - * CMSG_BLKIF_FE_INTERFACE_UP: - * If successful, the domain controller will acknowledge with a STATUS_UP - * message. + * CMSG_BLKIF_FE_INTERFACE_CONNECT: + * If successful, the domain controller will acknowledge with a + * STATUS_CONNECTED message. */ typedef struct { unsigned int handle; unsigned long shmem_frame; -} blkif_fe_interface_up_t; +} blkif_fe_interface_connect_t; /* - * CMSG_BLKIF_FE_INTERFACE_DOWN: - * If successful, the domain controller will acknowledge with a STATUS_DOWN - * message. + * CMSG_BLKIF_FE_INTERFACE_DISCONNECT: + * If successful, the domain controller will acknowledge with a + * STATUS_DISCONNECTED message. */ typedef struct { + /* IN */ unsigned int handle; -} blkif_fe_interface_down_t; + /* OUT */ + /* + * Tells driver how many interfaces it should expect to immediately + * receive notifications about. + */ + unsigned int nr_interfaces; +} blkif_fe_interface_disconnect_t; /****************************************************************************** @@ -142,10 +149,12 @@ typedef struct { /* Messages from domain controller. */ #define CMSG_BLKIF_BE_CREATE 0 /* Create a new block-device interface. */ #define CMSG_BLKIF_BE_DESTROY 1 /* Destroy a block-device interface. */ -#define CMSG_BLKIF_BE_VBD_CREATE 2 /* Create a new VBD for an interface. */ -#define CMSG_BLKIF_BE_VBD_DESTROY 3 /* Delete a VBD from an interface. */ -#define CMSG_BLKIF_BE_VBD_GROW 4 /* Append an extent to a given VBD. */ -#define CMSG_BLKIF_BE_VBD_SHRINK 5 /* Remove last extent from a given VBD. */ +#define CMSG_BLKIF_BE_CONNECT 2 /* Connect i/f to remote driver. */ +#define CMSG_BLKIF_BE_DISCONNECT 3 /* Disconnect i/f from remote driver. */ +#define CMSG_BLKIF_BE_VBD_CREATE 4 /* Create a new VBD for an interface. */ +#define CMSG_BLKIF_BE_VBD_DESTROY 5 /* Delete a VBD from an interface. */ +#define CMSG_BLKIF_BE_VBD_GROW 6 /* Append an extent to a given VBD. */ +#define CMSG_BLKIF_BE_VBD_SHRINK 7 /* Remove last extent from a given VBD. */ /* Messages to domain controller. */ #define CMSG_BLKIF_BE_DRIVER_STATUS_CHANGED 32 @@ -167,36 +176,36 @@ typedef struct { /* The following are specific error returns. */ #define BLKIF_BE_STATUS_INTERFACE_EXISTS 2 #define BLKIF_BE_STATUS_INTERFACE_NOT_FOUND 3 -#define BLKIF_BE_STATUS_VBD_EXISTS 4 -#define BLKIF_BE_STATUS_VBD_NOT_FOUND 5 -#define BLKIF_BE_STATUS_OUT_OF_MEMORY 6 -#define BLKIF_BE_STATUS_EXTENT_NOT_FOUND 7 -#define BLKIF_BE_STATUS_MAPPING_ERROR 8 +#define BLKIF_BE_STATUS_INTERFACE_CONNECTED 4 +#define BLKIF_BE_STATUS_VBD_EXISTS 5 +#define BLKIF_BE_STATUS_VBD_NOT_FOUND 6 +#define BLKIF_BE_STATUS_OUT_OF_MEMORY 7 +#define BLKIF_BE_STATUS_EXTENT_NOT_FOUND 8 +#define BLKIF_BE_STATUS_MAPPING_ERROR 9 /* This macro can be used to create an array of descriptive error strings. */ -#define BLKIF_BE_STATUS_ERRORS { \ - "Okay", \ - "Non-specific error", \ - "Interface already exists", \ - "Interface not found", \ - "VBD already exists", \ - "VBD not found", \ - "Out of memory", \ - "Extent not found for VBD", \ +#define BLKIF_BE_STATUS_ERRORS { \ + "Okay", \ + "Non-specific error", \ + "Interface already exists", \ + "Interface not found", \ + "Interface is still connected", \ + "VBD already exists", \ + "VBD not found", \ + "Out of memory", \ + "Extent not found for VBD", \ "Could not map domain memory" } /* * CMSG_BLKIF_BE_CREATE: * When the driver sends a successful response then the interface is fully - * set up. The controller will send an UP notification to the front-end + * created. The controller will send a DOWN notification to the front-end * driver. */ typedef struct { /* IN */ domid_t domid; /* Domain attached to new interface. */ unsigned int blkif_handle; /* Domain-specific interface handle. */ - unsigned int evtchn; /* Event channel for notifications. */ - unsigned long shmem_frame; /* Page cont. shared comms window. */ /* OUT */ unsigned int status; } blkif_be_create_t; @@ -204,8 +213,8 @@ typedef struct { /* * CMSG_BLKIF_BE_DESTROY: * When the driver sends a successful response then the interface is fully - * torn down. The controller will send a DOWN notification to the front-end - * driver. + * torn down. The controller will send a DESTROYED notification to the + * front-end driver. */ typedef struct { /* IN */ @@ -215,6 +224,36 @@ typedef struct { unsigned int status; } blkif_be_destroy_t; +/* + * CMSG_BLKIF_BE_CONNECT: + * When the driver sends a successful response then the interface is fully + * connected. The controller will send a CONNECTED notification to the + * front-end driver. + */ +typedef struct { + /* IN */ + domid_t domid; /* Domain attached to new interface. */ + unsigned int blkif_handle; /* Domain-specific interface handle. */ + unsigned int evtchn; /* Event channel for notifications. */ + unsigned long shmem_frame; /* Page cont. shared comms window. */ + /* OUT */ + unsigned int status; +} blkif_be_connect_t; + +/* + * CMSG_BLKIF_BE_DISCONNECT: + * When the driver sends a successful response then the interface is fully + * disconnected. The controller will send a DOWN notification to the front-end + * driver. + */ +typedef struct { + /* IN */ + domid_t domid; /* Domain attached to new interface. */ + unsigned int blkif_handle; /* Domain-specific interface handle. */ + /* OUT */ + unsigned int status; +} blkif_be_disconnect_t; + /* CMSG_BLKIF_BE_VBD_CREATE */ typedef struct { /* IN */ @@ -264,7 +303,14 @@ typedef struct { * will automatically send DOWN notifications. */ typedef struct { + /* IN */ unsigned int status; /* BLKIF_DRIVER_STATUS_??? */ + /* OUT */ + /* + * Tells driver how many interfaces it should expect to immediately + * receive notifications about. + */ + unsigned int nr_interfaces; } blkif_be_driver_status_changed_t; #endif /* __DOMAIN_CONTROLLER_H__ */ diff --git a/tools/xend/lib/main.py b/tools/xend/lib/main.py index b870af55d1..7b5adbab83 100755 --- a/tools/xend/lib/main.py +++ b/tools/xend/lib/main.py @@ -5,7 +5,7 @@ ########################################################### import errno, re, os, pwd, select, signal, socket, struct, sys, time -import xend.console, xend.manager, xend.utils, Xc +import xend.blkif, xend.console, xend.manager, xend.utils, Xc # The following parameters could be placed in a configuration file. @@ -16,13 +16,35 @@ CONTROL_DIR = '/var/run/xend' UNIX_SOCK = 'management_sock' # relative to CONTROL_DIR +CMSG_CONSOLE = 0 +CMSG_BLKIF_BE = 1 +CMSG_BLKIF_FE = 2 + + +def port_from_dom(dom): + global port_list + for idx, port in port_list.items(): + if port.remote_dom == dom: + return port + return None + + +def send_management_response(response, addr): + try: + response = str(response) + print "Mgmt_rsp[%s]: %s" % (addr, response) + management_interface.sendto(response, addr) + except socket.error, error: + pass + + def daemon_loop(): # Could we do this more nicely? The xend.manager functions need access # to this global state to do their work. - global control_list, notifier + global port_list, notifier, management_interface, mgmt_req_addr, dom0_port - # List of all control interfaces, indexed by local event-channel port. - control_list = {} + # Lists of all interfaces, indexed by local event-channel port. + port_list = {} xc = Xc.new() @@ -46,13 +68,10 @@ def daemon_loop(): # The DOM0 control interface is not set up via the management interface. # Note that console messages don't come our way (actually, only driver - # back-ends should use the DOM0 control interface) -- the console - # structures are dummies. + # back-ends should use the DOM0 control interface). dom0_port = xend.utils.port(0) - xend.main.notifier.bind(dom0_port.local_port) - xend.main.control_list[dom0_port.local_port] = \ - (dom0_port, xend.utils.buffer(), xend.utils.buffer(), \ - xend.console.interface(0, dom0_port.local_port)) + notifier.bind(dom0_port.local_port) + port_list[dom0_port.local_port] = dom0_port ## ## MAIN LOOP @@ -68,10 +87,10 @@ def daemon_loop(): waitset = select.poll() waitset.register(management_interface, select.POLLIN) waitset.register(notifier, select.POLLIN) - for idx, (port, rbuf, wbuf, con_if) in control_list.items(): + for idx, con_if in xend.console.interface.list_by_fd.items(): if not con_if.closed(): pflags = select.POLLIN - if not rbuf.empty() and con_if.connected(): + if not con_if.rbuf.empty() and con_if.connected(): pflags = select.POLLIN | select.POLLOUT waitset.register(con_if.sock.fileno(), pflags) @@ -82,16 +101,16 @@ def daemon_loop(): # These should consist of executable Python statements that call # well-known management functions (e.g., new_control_interface(dom=9)). try: - data, addr = management_interface.recvfrom(2048) + data, mgmt_req_addr = management_interface.recvfrom(2048) except socket.error, error: if error[0] != errno.EAGAIN: raise else: - if addr: + if mgmt_req_addr: # Evaluate the request in an exception-trapping sandbox. try: - print "Mgmt_req[%s]: %s" % (addr, data) - response = str(eval('xend.manager.'+data)) + print "Mgmt_req[%s]: %s" % (mgmt_req_addr, data) + response = eval('xend.manager.'+data) except: # Catch all exceptions and turn into an error response: @@ -107,69 +126,20 @@ def daemon_loop(): response = str(response) # Try to send a response to the requester. - try: - print "Mgmt_rsp[%s]: %s" % (addr, response) - management_interface.sendto(response, addr) - except socket.error, error: - pass + if response: + send_management_response(response, mgmt_req_addr) # Do work for every console interface that hit in the poll set. for (fd, events) in fdset: - if not xend.console.interface.interface_list.has_key(fd): - continue - con_if = xend.console.interface.interface_list[fd] - - # If the interface is listening, check for pending connections. - if con_if.listening(): - con_if.connect() - - # All done if the interface is not connected. - if not con_if.connected(): - continue - (port, rbuf, wbuf, con_if) = control_list[con_if.key] - - # Send as much pending data as possible via the socket. - while not rbuf.empty(): - try: - bytes = con_if.sock.send(rbuf.peek()) - if bytes > 0: - rbuf.discard(bytes) - except socket.error, error: - pass - - # Read as much data as is available. Don't worry about - # overflowing our buffer: it's more important to read the - # incoming data stream and detect errors or closure of the - # remote end in a timely manner. - try: - while 1: - data = con_if.sock.recv(2048) - # Return of zero means the remote end has disconnected. - # We therefore return the console interface to listening. - if not data: - con_if.listen() - break - wbuf.write(data) - except socket.error, error: - # Assume that most errors mean that the connection is dead. - # In such cases we return the interface to 'listening' state. - if error[0] != errno.EAGAIN: - print "Better return to listening" - con_if.listen() - print "New status: " + str(con_if.status) - - # We may now have pending data to send via the relevant - # inter-domain control interface. If so then we send all we can - # and notify the remote end. - work_done = False - while not wbuf.empty() and port.space_to_write_request(): - msg = xend.utils.message(0, 0, 0) - msg.append_payload(wbuf.read(msg.MAX_PAYLOAD)) - port.write_request(msg) - work_done = True - if work_done: - port.notify() - + if xend.console.interface.list_by_fd.has_key(fd): + con_if = xend.console.interface.list_by_fd[fd] + con_if.socket_work() + # We may now have pending data to send via the control + # interface. If so then send all we can and notify the remote. + port = port_list[con_if.key] + if con_if.ctrlif_transmit_work(port): + port.notify() + # Process control-interface notifications from other guest OSes. while 1: # Grab a notification, if there is one. @@ -178,42 +148,69 @@ def daemon_loop(): break (idx, type) = notification - if not control_list.has_key(idx): + if not port_list.has_key(idx): continue - (port, rbuf, wbuf, con_if) = control_list[idx] + port = port_list[idx] work_done = False + con_if = False + if xend.console.interface.list.has_key(idx): + con_if = xend.console.interface.list[idx] + + blk_if = False + if xend.blkif.interface.list.has_key(idx): + blk_if = xend.blkif.interface.list[idx] + # If we pick up a disconnect notification then we do any necessary # cleanup. if type == notifier.EXCEPTION: ret = xc.evtchn_status(idx) if ret['status'] == 'unbound': notifier.unbind(idx) - con_if.close() - del control_list[idx], port, rbuf, wbuf, con_if + del port_list[idx], port + if con_if: + con_if.destroy() + del con_if + if blk_if: + blk_if.destroy() + del blk_if continue - # Read incoming requests. Currently assume that request - # message always containb console data. + # Process incoming requests. while port.request_to_read(): msg = port.read_request() - rbuf.write(msg.get_payload()) - port.write_response(msg) work_done = True - - # Incoming responses are currently thrown on the floor. + type = (msg.get_header())['type'] + if type == CMSG_CONSOLE and con_if: + con_if.ctrlif_rx_req(port, msg) + elif type == CMSG_BLKIF_FE and blk_if: + blk_if.ctrlif_rx_req(port, msg) + elif type == CMSG_BLKIF_BE and port == dom0_port: + xend.blkif.backend_rx_req(port, msg) + else: + port.write_response(msg) + + # Process incoming responses. while port.response_to_read(): msg = port.read_response() work_done = True + type = (msg.get_header())['type'] + if type == CMSG_BLKIF_BE and port == dom0_port: + xend.blkif.backend_rx_rsp(port, msg) + + # Send console data. + if con_if and con_if.ctrlif_transmit_work(port): + work_done = True - # Send as much pending console data as there is room for. - while not wbuf.empty() and port.space_to_write_request(): - msg = xend.utils.message(0, 0, 0) - msg.append_payload(wbuf.read(msg.MAX_PAYLOAD)) - port.write_request(msg) + # Send blkif messages. + if blk_if and blk_if.ctrlif_transmit_work(port): work_done = True + # Back-end block-device work. + if port == dom0_port and xend.blkif.backend_do_work(port): + work_done = True + # Finally, notify the remote end of any work that we did. if work_done: port.notify() diff --git a/tools/xend/lib/manager.py b/tools/xend/lib/manager.py index 42d66d3a95..ea7398cd4c 100644 --- a/tools/xend/lib/manager.py +++ b/tools/xend/lib/manager.py @@ -4,13 +4,13 @@ ## Copyright (c) 2004, K A Fraser (University of Cambridge) ############################################################# -import xend.console, xend.main, xend.utils +import xend.blkif, xend.console, xend.main, xend.utils ## ## new_control_interface: -## Create a new control interface with the specified domain 'dom'. -## The console port may also be specified; otehrwise a suitable port is +## Create a new control interface with the specified domain @dom. +## The console port may also be specified; otherwise a suitable port is ## automatically allocated. ## def new_control_interface(dom, console_port=-1): @@ -26,9 +26,8 @@ def new_control_interface(dom, console_port=-1): con_if = xend.console.interface(console_port, port.local_port) con_if.listen() - # Add control state to the master list. - xend.main.control_list[port.local_port] = \ - (port, xend.utils.buffer(), xend.utils.buffer(), con_if) + # Update the master port list. + xend.main.port_list[port.local_port] = port # Construct the successful response to be returned to the requester. response = { 'success': True } @@ -36,3 +35,81 @@ def new_control_interface(dom, console_port=-1): response['remote_port'] = port.remote_port response['console_port'] = console_port return response + + +## +## new_block_interface: +## Create a new block interface for the specified domain @dom. +## +def new_block_interface(dom, handle=-1): + # By default we create an interface with handle zero. + if handle < 0: + handle = 0 + + # We only support one interface per domain, which must have handle zero. + if handle != 0: + response = { 'success': False } + response['error_type'] = 'Bad handle %d (only handle 0 ' + \ + 'is supported)' % handle + return response + + # Find local event-channel port associated with the specified domain. + port = xend.main.port_from_dom(dom) + if not port: + response = { 'success': False } + response['error_type'] = 'Unknown domain %d' % dom + return response + + # The interface must not already exist. + if xend.blkif.interface.list.has_key(port.local_port): + response = { 'success': False } + response['error_type'] = 'Interface (dom=%d,handle=%d) already ' + \ + 'exists' % (dom, handle) + return response + + # Create the new interface. Initially no virtual devices are attached. + xend.blkif.interface(dom, port.local_port) + + # Response is deferred until back-end driver sends acknowledgement. + return None + + +## +## new_block_device: +## Attach a new virtual block device to the specified block interface +## (@dom, @handle). The new device is identified by @vdev, and maps to +## the real block extent (@pdev, @start_sect, @nr_sect). If @readonly then +## write requests to @vdev will be rejected. +## +def new_block_device(dom, handle, vdev, pdev, start_sect, nr_sect, readonly): + # We only support one interface per domain, which must have handle zero. + if handle != 0: + response = { 'success': False } + response['error_type'] = 'Bad handle %d (only handle 0 ' + \ + 'is supported)' % handle + return response + + # Find local event-channel port associated with the specified domain. + port = xend.main.port_from_dom(dom) + if not port: + response = { 'success': False } + response['error_type'] = 'Unknown domain %d' % dom + return response + + # The interface must exist. + if not xend.blkif.interface.list.has_key(port.local_port): + response = { 'success': False } + response['error_type'] = 'Interface (dom=%d,handle=%d) does not ' + \ + 'exists' % (dom, handle) + return response + + # The virtual device must not yet exist. + blkif = xend.blkif.interface.list[port.local_port] + if not blkif.attach_device(vdev, pdev, start_sect, nr_sect, readonly): + response = { 'success': False } + response['error_type'] = 'Vdevice (dom=%d,handle=%d,vdevice=%d) ' + \ + 'already exists' % (dom, handle, vdev) + return response + + # Response is deferred until back-end driver sends acknowledgement. + return None diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c index c0bea86320..c6011cebb4 100644 --- a/xen/common/event_channel.c +++ b/xen/common/event_channel.c @@ -109,15 +109,18 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind) goto out; } + /* 'Allocate' port1 before searching for a free port2. */ + p1->event_channel[port1].state = ECS_INTERDOMAIN; + if ( (port2 = get_free_port(p2)) < 0 ) { + p1->event_channel[port1].state = ECS_FREE; rc = port2; goto out; } p1->event_channel[port1].u.remote.dom = p2; p1->event_channel[port1].u.remote.port = (u16)port2; - p1->event_channel[port1].state = ECS_INTERDOMAIN; p2->event_channel[port2].u.remote.dom = p1; p2->event_channel[port2].u.remote.port = (u16)port1; diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h index 646f4855f3..e6004b4a8e 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h @@ -41,6 +41,12 @@ typedef struct blkif_st { rb_root_t vbd_rb; /* Mapping from 16-bit vdevices to VBDs. */ spinlock_t vbd_lock; /* Protects VBD mapping. */ /* Private fields. */ + enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; + /* + * DISCONNECT response is deferred until pending requests are ack'ed. + * We therefore need to store the id from the original request. + */ + u8 disconnect_rspid; struct blkif_st *hash_next; struct list_head blkdev_list; spinlock_t blk_ring_lock; @@ -49,13 +55,15 @@ typedef struct blkif_st { void blkif_create(blkif_be_create_t *create); void blkif_destroy(blkif_be_destroy_t *destroy); -void __blkif_destroy(blkif_t *blkif); +void blkif_connect(blkif_be_connect_t *connect); +int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id); +void __blkif_disconnect_complete(blkif_t *blkif); blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle); #define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) #define blkif_put(_b) \ do { \ if ( atomic_dec_and_test(&(_b)->refcnt) ) \ - __blkif_destroy(_b); \ + __blkif_disconnect_complete(_b); \ } while (0) /* An entry in a list of xen_extents. */ diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c index 2baddcd616..0746ecfab0 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c @@ -10,6 +10,8 @@ static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) { + DPRINTK("Received blkif backend message, subtype=%d\n", msg->subtype); + switch ( msg->subtype ) { case CMSG_BLKIF_BE_CREATE: @@ -22,6 +24,17 @@ static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) goto parse_error; blkif_destroy((blkif_be_destroy_t *)&msg->msg[0]); break; + case CMSG_BLKIF_BE_CONNECT: + if ( msg->length != sizeof(blkif_be_connect_t) ) + goto parse_error; + blkif_connect((blkif_be_connect_t *)&msg->msg[0]); + break; + case CMSG_BLKIF_BE_DISCONNECT: + if ( msg->length != sizeof(blkif_be_disconnect_t) ) + goto parse_error; + if ( !blkif_disconnect((blkif_be_disconnect_t *)&msg->msg[0],msg->id) ) + return; /* Sending the response is deferred until later. */ + break; case CMSG_BLKIF_BE_VBD_CREATE: if ( msg->length != sizeof(blkif_be_vbd_create_t) ) goto parse_error; @@ -50,6 +63,8 @@ static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) return; parse_error: + DPRINTK("Parse error while reading message subtype %d, len %d\n", + msg->subtype, msg->length); msg->length = 0; ctrl_if_send_response(msg); } diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c index 87925681da..9acbac35ab 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c @@ -14,144 +14,216 @@ static kmem_cache_t *blkif_cachep; static blkif_t *blkif_hash[BLKIF_HASHSZ]; -static spinlock_t blkif_hash_lock; blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle) { - blkif_t *blkif; - unsigned long flags; - - spin_lock_irqsave(&blkif_hash_lock, flags); - blkif = blkif_hash[BLKIF_HASH(domid, handle)]; - while ( blkif != NULL ) - { - if ( (blkif->domid == domid) && (blkif->handle == handle) ) - { - blkif_get(blkif); - break; - } + blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)]; + while ( (blkif != NULL) && + ((blkif->domid != domid) || (blkif->handle != handle)) ) blkif = blkif->hash_next; - } - spin_unlock_irqrestore(&blkif_hash_lock, flags); - return blkif; } -void __blkif_destroy(blkif_t *blkif) +void __blkif_disconnect_complete(blkif_t *blkif) { - free_irq(blkif->irq, NULL); + ctrl_msg_t cmsg; + blkif_be_disconnect_t disc; + + /* + * These can't be done in __blkif_disconnect() because at that point there + * may be outstanding requests at the disc whose asynchronous responses + * must still be notified to the remote driver. + */ unbind_evtchn_from_irq(blkif->evtchn); vfree(blkif->blk_ring_base); - destroy_all_vbds(blkif); - kmem_cache_free(blkif_cachep, blkif); + + /* Construct the deferred response message. */ + cmsg.type = CMSG_BLKIF_BE; + cmsg.subtype = CMSG_BLKIF_BE_DISCONNECT; + cmsg.id = blkif->disconnect_rspid; + cmsg.length = sizeof(blkif_be_disconnect_t); + disc.domid = blkif->domid; + disc.blkif_handle = blkif->handle; + disc.status = BLKIF_BE_STATUS_OKAY; + memcpy(cmsg.msg, &disc, sizeof(disc)); + + /* + * Make sure message is constructed /before/ status change, because + * after the status change the 'blkif' structure could be deallocated at + * any time. Also make sure we send the response /after/ status change, + * as otherwise a subsequent CONNECT request could spuriously fail if + * another CPU doesn't see the status change yet. + */ + mb(); + if ( blkif->status != DISCONNECTING ) + BUG(); + blkif->status = DISCONNECTED; + mb(); + + /* Send the successful response. */ + ctrl_if_send_response(&cmsg); } void blkif_create(blkif_be_create_t *create) { domid_t domid = create->domid; unsigned int handle = create->blkif_handle; - unsigned int evtchn = create->evtchn; - unsigned long shmem_frame = create->shmem_frame; - unsigned long flags; blkif_t **pblkif, *blkif; - struct vm_struct *vma; - pgprot_t prot; - int error; - if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL ) + if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_ATOMIC)) == NULL ) { + DPRINTK("Could not create blkif: out of memory\n"); create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; return; } - if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL ) - { - create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; - goto fail1; - } - - prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED); - error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr), - shmem_frame<<PAGE_SHIFT, PAGE_SIZE, - prot, domid); - if ( error != 0 ) - { - if ( error == -ENOMEM ) - create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; - else if ( error == -EFAULT ) - create->status = BLKIF_BE_STATUS_MAPPING_ERROR; - else - create->status = BLKIF_BE_STATUS_ERROR; - goto fail2; - } - memset(blkif, 0, sizeof(*blkif)); - blkif->domid = domid; - blkif->handle = handle; - blkif->evtchn = evtchn; - blkif->irq = bind_evtchn_to_irq(evtchn); - blkif->shmem_frame = shmem_frame; - blkif->blk_ring_base = (blkif_ring_t *)vma->addr; + blkif->domid = domid; + blkif->handle = handle; + blkif->status = DISCONNECTED; spin_lock_init(&blkif->vbd_lock); spin_lock_init(&blkif->blk_ring_lock); - - spin_lock_irqsave(&blkif_hash_lock, flags); + atomic_set(&blkif->refcnt, 0); pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; - while ( *pblkif == NULL ) + while ( *pblkif != NULL ) { if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) { - spin_unlock_irqrestore(&blkif_hash_lock, flags); + DPRINTK("Could not create blkif: already exists\n"); create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS; - goto fail3; + kmem_cache_free(blkif_cachep, blkif); + return; } pblkif = &(*pblkif)->hash_next; } - atomic_set(&blkif->refcnt, 1); blkif->hash_next = *pblkif; *pblkif = blkif; - spin_unlock_irqrestore(&blkif_hash_lock, flags); - - request_irq(blkif->irq, blkif_be_int, 0, "blkif-backend", blkif); - + DPRINTK("Successfully created blkif\n"); create->status = BLKIF_BE_STATUS_OKAY; - return; - - fail3: unbind_evtchn_from_irq(evtchn); - fail2: kmem_cache_free(blkif_cachep, blkif); - fail1: vfree(vma->addr); } void blkif_destroy(blkif_be_destroy_t *destroy) { domid_t domid = destroy->domid; unsigned int handle = destroy->blkif_handle; - unsigned long flags; blkif_t **pblkif, *blkif; - spin_lock_irqsave(&blkif_hash_lock, flags); - pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; - while ( (blkif = *pblkif) == NULL ) + while ( (blkif = *pblkif) != NULL ) { if ( (blkif->domid == domid) && (blkif->handle == handle) ) { - *pblkif = blkif->hash_next; - spin_unlock_irqrestore(&blkif_hash_lock, flags); - blkif_deschedule(blkif); - blkif_put(blkif); - destroy->status = BLKIF_BE_STATUS_OKAY; - return; + if ( blkif->status != DISCONNECTED ) + goto still_connected; + goto destroy; } pblkif = &blkif->hash_next; } - spin_unlock_irqrestore(&blkif_hash_lock, flags); - destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; + return; + + still_connected: + destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; + return; + + destroy: + *pblkif = blkif->hash_next; + destroy_all_vbds(blkif); + kmem_cache_free(blkif_cachep, blkif); + destroy->status = BLKIF_BE_STATUS_OKAY; +} + +void blkif_connect(blkif_be_connect_t *connect) +{ + domid_t domid = connect->domid; + unsigned int handle = connect->blkif_handle; + unsigned int evtchn = connect->evtchn; + unsigned long shmem_frame = connect->shmem_frame; + struct vm_struct *vma; + pgprot_t prot; + int error; + blkif_t *blkif; + + blkif = blkif_find_by_handle(domid, handle); + if ( unlikely(blkif == NULL) ) + { + DPRINTK("blkif_connect attempted for non-existent blkif (%llu,%u)\n", + connect->domid, connect->blkif_handle); + connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; + return; + } + + if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL ) + { + connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; + return; + } + + prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED); + error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr), + shmem_frame<<PAGE_SHIFT, PAGE_SIZE, + prot, domid); + if ( error != 0 ) + { + if ( error == -ENOMEM ) + connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; + else if ( error == -EFAULT ) + connect->status = BLKIF_BE_STATUS_MAPPING_ERROR; + else + connect->status = BLKIF_BE_STATUS_ERROR; + vfree(vma->addr); + return; + } + + if ( blkif->status != DISCONNECTED ) + { + connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; + vfree(vma->addr); + return; + } + + blkif->evtchn = evtchn; + blkif->irq = bind_evtchn_to_irq(evtchn); + blkif->shmem_frame = shmem_frame; + blkif->blk_ring_base = (blkif_ring_t *)vma->addr; + blkif->status = CONNECTED; + blkif_get(blkif); + + request_irq(blkif->irq, blkif_be_int, 0, "blkif-backend", blkif); + + connect->status = BLKIF_BE_STATUS_OKAY; +} + +int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id) +{ + domid_t domid = disconnect->domid; + unsigned int handle = disconnect->blkif_handle; + blkif_t *blkif; + + blkif = blkif_find_by_handle(domid, handle); + if ( unlikely(blkif == NULL) ) + { + DPRINTK("blkif_disconnect attempted for non-existent blkif" + " (%llu,%u)\n", disconnect->domid, disconnect->blkif_handle); + disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; + return 1; /* Caller will send response error message. */ + } + + if ( blkif->status == CONNECTED ) + { + blkif->status = DISCONNECTING; + blkif->disconnect_rspid = rsp_id; + wmb(); /* Let other CPUs see the status change. */ + free_irq(blkif->irq, NULL); + blkif_deschedule(blkif); + blkif_put(blkif); + } + + return 0; /* Caller should not send response message. */ } void __init blkif_interface_init(void) @@ -159,5 +231,4 @@ void __init blkif_interface_init(void) blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 0, 0, NULL, NULL); memset(blkif_hash, 0, sizeof(blkif_hash)); - spin_lock_init(&blkif_hash_lock); } diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c index 8862798250..2582287360 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c @@ -33,8 +33,8 @@ static struct vm_struct *mmap_vma; (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST) #define MMAP_VADDR(_req,_seg) \ ((unsigned long)mmap_vma->addr + \ - ((_req) * MMAP_PAGES_PER_REQUEST) + \ - ((_seg) * MMAP_PAGES_PER_SEGMENT)) + ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \ + ((_seg) * MMAP_PAGES_PER_SEGMENT * PAGE_SIZE)) /* * Each outstanding request that we've passed to the lower device layers has a @@ -96,7 +96,7 @@ static void add_to_blkdev_list_tail(blkif_t *blkif) unsigned long flags; if ( __on_blkdev_list(blkif) ) return; spin_lock_irqsave(&io_schedule_list_lock, flags); - if ( !__on_blkdev_list(blkif) ) + if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) ) { list_add_tail(&blkif->blkdev_list, &io_schedule_list); blkif_get(blkif); @@ -168,7 +168,8 @@ static void end_block_io_op(struct buffer_head *bh, int uptodate) if ( atomic_dec_and_test(&pending_req->pendcnt) ) { int pending_idx = pending_req - pending_reqs; - vmfree_area_pages(MMAP_VADDR(pending_idx, 0), MMAP_PAGES_PER_REQUEST); + vmfree_area_pages(MMAP_VADDR(pending_idx, 0), + MMAP_PAGES_PER_REQUEST * PAGE_SIZE); make_response(pending_req->blkif, pending_req->id, pending_req->operation, pending_req->status); blkif_put(pending_req->blkif); @@ -260,10 +261,11 @@ static void dispatch_probe(blkif_t *blkif, blkif_request_t *req) { if ( (req->buffer_and_sects[i] & ~PAGE_MASK) != (PAGE_SIZE / 512) ) goto bad_descriptor; - if ( direct_remap_area_pages(&init_mm, + rc = direct_remap_area_pages(&init_mm, MMAP_VADDR(pending_idx, i), req->buffer_and_sects[i] & PAGE_MASK, - PAGE_SIZE, prot, blkif->domid) != 0 ) + PAGE_SIZE, prot, blkif->domid); + if ( rc != 0 ) goto bad_descriptor; } @@ -271,12 +273,13 @@ static void dispatch_probe(blkif_t *blkif, blkif_request_t *req) (req->nr_segments * PAGE_SIZE) / sizeof(vdisk_t)); vmfree_area_pages(MMAP_VADDR(pending_idx, 0), - MMAP_PAGES_PER_REQUEST); + MMAP_PAGES_PER_REQUEST * PAGE_SIZE); make_response(blkif, req->id, req->operation, rc); return; bad_descriptor: - vmfree_area_pages(MMAP_VADDR(pending_idx, 0), MMAP_PAGES_PER_REQUEST); + vmfree_area_pages(MMAP_VADDR(pending_idx, 0), + MMAP_PAGES_PER_REQUEST * PAGE_SIZE); make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); } @@ -284,7 +287,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) { extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); struct buffer_head *bh; - int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ; + int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ; unsigned short nr_sects; unsigned long buffer; int i, tot_sects, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; @@ -358,14 +361,15 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) unsigned long sz = ((phys_seg[i].buffer & ~PAGE_MASK) + (phys_seg[i].nr_sects << 9) + (PAGE_SIZE - 1)) & PAGE_MASK; - if ( direct_remap_area_pages(&init_mm, - MMAP_VADDR(pending_idx, i), - phys_seg[i].buffer & PAGE_MASK, - sz, prot, blkif->domid) != 0 ) + int rc = direct_remap_area_pages(&init_mm, + MMAP_VADDR(pending_idx, i), + phys_seg[i].buffer & PAGE_MASK, + sz, prot, blkif->domid); + if ( rc != 0 ) { DPRINTK("invalid buffer\n"); vmfree_area_pages(MMAP_VADDR(pending_idx, 0), - MMAP_PAGES_PER_REQUEST); + MMAP_PAGES_PER_REQUEST * PAGE_SIZE); goto bad_descriptor; } } @@ -374,7 +378,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) pending_req->blkif = blkif; pending_req->id = req->id; pending_req->operation = operation; - pending_req->status = BLKIF_RSP_ERROR; + pending_req->status = BLKIF_RSP_OKAY; atomic_set(&pending_req->pendcnt, nr_psegs); blkif_get(blkif); @@ -382,29 +386,30 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) /* Now we pass each segment down to the real blkdev layer. */ for ( i = 0; i < nr_psegs; i++ ) { - bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL); + bh = kmem_cache_alloc(buffer_head_cachep, GFP_ATOMIC); if ( unlikely(bh == NULL) ) panic("bh is null\n"); memset(bh, 0, sizeof (struct buffer_head)); - + + init_waitqueue_head(&bh->b_wait); bh->b_size = phys_seg[i].nr_sects << 9; bh->b_dev = phys_seg[i].dev; + bh->b_rdev = phys_seg[i].dev; bh->b_rsector = (unsigned long)phys_seg[i].sector_number; - bh->b_data = (char *)MMAP_VADDR(pending_idx, i) + + bh->b_data = (char *)MMAP_VADDR(pending_idx, i) + (phys_seg[i].buffer & ~PAGE_MASK); - /* SMH: bh_phys() uses the below field as a 'cheap' virt_to_phys */ - bh->b_page = &mem_map[phys_seg[i].buffer>>PAGE_SHIFT]; bh->b_end_io = end_block_io_op; bh->b_private = pending_req; - bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock); + bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | + (1 << BH_Req) | (1 << BH_Launder); if ( operation == WRITE ) bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate); atomic_set(&bh->b_count, 1); /* Dispatch a single request. We'll flush it to disc later. */ - submit_bh(operation, bh); + generic_make_request(operation, bh); } pending_cons++; @@ -444,16 +449,7 @@ static void make_response(blkif_t *blkif, unsigned long id, void blkif_deschedule(blkif_t *blkif) { - unsigned long flags; - - spin_lock_irqsave(&io_schedule_list_lock, flags); - if ( __on_blkdev_list(blkif) ) - { - list_del(&blkif->blkdev_list); - blkif->blkdev_list.next = (void *)0xdeadbeef; - blkif_put(blkif); - } - spin_unlock_irqrestore(&io_schedule_list_lock, flags); + remove_from_blkdev_list(blkif); } static int __init init_module(void) diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c index bc5390eeb9..19b0b3015d 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c @@ -47,7 +47,7 @@ void vbd_create(blkif_be_vbd_create_t *create) } } - if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) ) + if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_ATOMIC)) == NULL) ) { DPRINTK("vbd_create: out of memory\n"); create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; @@ -62,11 +62,12 @@ void vbd_create(blkif_be_vbd_create_t *create) rb_link_node(&vbd->rb, rb_parent, rb_p); rb_insert_color(&vbd->rb, &blkif->vbd_rb); + DPRINTK("Successful creation of vdev=%04x (dom=%llu)\n", + vdevice, create->domid); create->status = BLKIF_BE_STATUS_OKAY; out: spin_unlock(&blkif->vbd_lock); - blkif_put(blkif); } @@ -110,7 +111,7 @@ void vbd_grow(blkif_be_vbd_grow_t *grow) } if ( unlikely((x = kmalloc(sizeof(blkif_extent_le_t), - GFP_KERNEL)) == NULL) ) + GFP_ATOMIC)) == NULL) ) { DPRINTK("vbd_grow: out of memory\n"); grow->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; @@ -127,11 +128,12 @@ void vbd_grow(blkif_be_vbd_grow_t *grow) *px = x; + DPRINTK("Successful grow of vdev=%04x (dom=%llu)\n", + vdevice, grow->domid); grow->status = BLKIF_BE_STATUS_OKAY; out: spin_unlock(&blkif->vbd_lock); - blkif_put(blkif); } @@ -190,7 +192,6 @@ void vbd_shrink(blkif_be_vbd_shrink_t *shrink) out: spin_unlock(&blkif->vbd_lock); - blkif_put(blkif); } @@ -242,7 +243,6 @@ void vbd_destroy(blkif_be_vbd_destroy_t *destroy) out: spin_unlock(&blkif->vbd_lock); - blkif_put(blkif); } diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c index 2936d78ea2..29cc01d087 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c @@ -18,9 +18,9 @@ typedef unsigned char byte; /* from linux/ide.h */ -#define BLKIF_STATE_CLOSED 0 -#define BLKIF_STATE_DOWN 1 -#define BLKIF_STATE_UP 2 +#define BLKIF_STATE_CLOSED 0 +#define BLKIF_STATE_DISCONNECTED 1 +#define BLKIF_STATE_CONNECTED 2 static unsigned int blkif_state = BLKIF_STATE_CLOSED; static unsigned int blkif_evtchn, blkif_irq; @@ -35,7 +35,7 @@ static BLK_RING_IDX req_prod; /* Private request producer. */ /* We plug the I/O ring if the driver is suspended or if the ring is full. */ #define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \ - (blkif_state != BLKIF_STATE_UP)) + (blkif_state != BLKIF_STATE_CONNECTED)) /* @@ -123,8 +123,10 @@ int blkif_release(struct inode *inode, struct file *filep) */ if ( --disk->usage == 0 ) { +#if 0 update_tq.routine = update_vbds_task; schedule_task(&update_tq); +#endif } return 0; @@ -306,7 +308,7 @@ static int blkif_queue_request(unsigned long id, if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) ) BUG(); - if ( unlikely(blkif_state != BLKIF_STATE_UP) ) + if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) ) return 1; switch ( operation ) @@ -498,7 +500,7 @@ static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) { next_bh = bh->b_reqnext; bh->b_reqnext = NULL; - bh->b_end_io(bh, !bret->status); + bh->b_end_io(bh, bret->status == BLKIF_RSP_OKAY); } break; case BLKIF_OP_PROBE: @@ -556,18 +558,18 @@ void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp) static void blkif_bringup_phase1(void *unused) { - ctrl_msg_t cmsg; - blkif_fe_interface_up_t up; + ctrl_msg_t cmsg; + blkif_fe_interface_connect_t up; - /* Move from CLOSED to DOWN state. */ + /* Move from CLOSED to DISCONNECTED state. */ blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL); blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0; - blkif_state = BLKIF_STATE_DOWN; + blkif_state = BLKIF_STATE_DISCONNECTED; - /* Construct an interface-UP message for the domain controller. */ + /* Construct an interface-CONNECT message for the domain controller. */ cmsg.type = CMSG_BLKIF_FE; - cmsg.subtype = CMSG_BLKIF_FE_INTERFACE_UP; - cmsg.length = sizeof(blkif_fe_interface_up_t); + cmsg.subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT; + cmsg.length = sizeof(blkif_fe_interface_connect_t); up.handle = 0; up.shmem_frame = virt_to_machine(blk_ring) >> PAGE_SHIFT; memcpy(cmsg.msg, &up, sizeof(up)); @@ -578,14 +580,14 @@ static void blkif_bringup_phase1(void *unused) static void blkif_bringup_phase2(void *unused) { - /* Move from DOWN to UP state. */ blkif_irq = bind_evtchn_to_irq(blkif_evtchn); (void)request_irq(blkif_irq, blkif_int, 0, "blkif", NULL); - blkif_state = BLKIF_STATE_UP; /* Probe for discs that are attached to the interface. */ xlvbd_init(); + blkif_state = BLKIF_STATE_CONNECTED; + /* Kick pending requests. */ spin_lock_irq(&io_request_lock); kick_pending_request_queues(); @@ -608,22 +610,22 @@ static void blkif_status_change(blkif_fe_interface_status_changed_t *status) blkif_state); break; - case BLKIF_INTERFACE_STATUS_DOWN: + case BLKIF_INTERFACE_STATUS_DISCONNECTED: if ( blkif_state != BLKIF_STATE_CLOSED ) { - printk(KERN_WARNING "Unexpected blkif-DOWN message in state %d\n", - blkif_state); + printk(KERN_WARNING "Unexpected blkif-DISCONNECTED message" + " in state %d\n", blkif_state); break; } blkif_statechange_tq.routine = blkif_bringup_phase1; schedule_task(&blkif_statechange_tq); break; - case BLKIF_INTERFACE_STATUS_UP: + case BLKIF_INTERFACE_STATUS_CONNECTED: if ( blkif_state == BLKIF_STATE_CLOSED ) { - printk(KERN_WARNING "Unexpected blkif-UP message in state %d\n", - blkif_state); + printk(KERN_WARNING "Unexpected blkif-CONNECTED message" + " in state %d\n", blkif_state); break; } blkif_evtchn = status->evtchn; @@ -683,6 +685,17 @@ int __init xlblk_init(void) memcpy(cmsg.msg, &st, sizeof(st)); ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); + /* + * We should read 'nr_interfaces' from response message and wait + * for notifications before proceeding. For now we assume that we + * will be notified of exactly one interface. + */ + while ( blkif_state != BLKIF_STATE_CONNECTED ) + { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(1); + } + return 0; } diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c index 944bf7eace..b26907192a 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c @@ -512,7 +512,7 @@ void xlvbd_update_vbds(void) * linux -- this is just for convenience as it means e.g. that the same * /etc/fstab can be used when booting with or without Xen. */ -int __init xlvbd_init(void) +int xlvbd_init(void) { int i; @@ -559,8 +559,3 @@ int __init xlvbd_init(void) return 0; } - - -#ifdef MODULE -module_init(xlvbd_init); -#endif diff --git a/xenolinux-2.4.26-sparse/include/asm-xen/io.h b/xenolinux-2.4.26-sparse/include/asm-xen/io.h index 3d78e20950..f5243bb6a7 100644 --- a/xenolinux-2.4.26-sparse/include/asm-xen/io.h +++ b/xenolinux-2.4.26-sparse/include/asm-xen/io.h @@ -159,13 +159,47 @@ extern void iounmap(void *addr); extern void *bt_ioremap(unsigned long offset, unsigned long size); extern void bt_iounmap(void *addr, unsigned long size); +#ifdef CONFIG_XEN_PHYSDEV_ACCESS + +#ifdef CONFIG_HIGHMEM +#error "Highmem is not yet compatible with physical device access" +#endif + /* - * IO bus memory addresses are also 1:1 with the physical address + * The bus translation macros need special care if we are executing device + * accesses to/from other domains' memory. In these cases the virtual address + * is actually a temporary mapping in the 'vmalloc' space. The physical + * address will therefore be >max_low_pfn, and will not have a valid entry + * in the phys_to_mach mapping table. */ +static inline unsigned long phys_to_bus(unsigned long phys) +{ + extern unsigned long max_pfn; + pgd_t *pgd; pmd_t *pmd; pte_t *pte; + void *addr; + unsigned long bus; + if ( (phys >> PAGE_SHIFT) < max_pfn ) + return phys_to_machine(phys); + addr = phys_to_virt(phys); + pgd = pgd_offset_k( (unsigned long)addr); + pmd = pmd_offset(pgd, (unsigned long)addr); + pte = pte_offset(pmd, (unsigned long)addr); + bus = (pte->pte_low & PAGE_MASK) | (phys & ~PAGE_MASK); + return bus; +} + +#define virt_to_bus(_x) phys_to_bus(virt_to_phys(_x)) +#define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x)) +#define page_to_bus(_x) phys_to_bus(page_to_phys(_x)) + +#else + #define virt_to_bus(_x) phys_to_machine(virt_to_phys(_x)) #define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x)) #define page_to_bus(_x) phys_to_machine(page_to_phys(_x)) +#endif /* CONFIG_XEN_PHYSDEV_ACCESS */ + /* * readX/writeX() are used to access memory mapped devices. On some * architectures the memory mapped IO stuff needs to be accessed |