diff options
author | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2004-04-28 09:35:33 +0000 |
---|---|---|
committer | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2004-04-28 09:35:33 +0000 |
commit | 3b84d97dbdacf463aa95357c85868b06e14caa8d (patch) | |
tree | 88ac8b6960e7b1a39adae0210f1db033ad4942a7 | |
parent | f57f5ec70d3e1b9d8047b14e0d5deafc2c549e5c (diff) | |
download | xen-3b84d97dbdacf463aa95357c85868b06e14caa8d.tar.gz xen-3b84d97dbdacf463aa95357c85868b06e14caa8d.tar.bz2 xen-3b84d97dbdacf463aa95357c85868b06e14caa8d.zip |
bitkeeper revision 1.879.1.1 (408f7ae5PHe1i2motf-Iulpr3dEVhQ)
Further modifications towards new block-device drivers for new I/O
model.
22 files changed, 880 insertions, 705 deletions
@@ -665,8 +665,8 @@ 4087cf0dlv1Dw4MAbeRStPPG8IvPPg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c 40880cc6hHg6s2cPHbqPNQxENefjoQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h 4075806dI5kfeMD5RV-DA0PYoThx_w xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile -4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c -4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h +4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/common.h +4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c 4075806dibjCcfuXv6CINMhxWTw3jQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c 3e5a4e65iHEuC5sjFhj42XALYbLVRw xenolinux-2.4.26-sparse/arch/xen/drivers/block/Makefile 3e5a4e65pP5spJErBW69pJxSSdK9RA xenolinux-2.4.26-sparse/arch/xen/drivers/block/block.c diff --git a/tools/xend/lib/domain_controller.h b/tools/xend/lib/domain_controller.h index 14f970dd04..eec8402e5f 100644 --- a/tools/xend/lib/domain_controller.h +++ b/tools/xend/lib/domain_controller.h @@ -56,14 +56,90 @@ typedef struct { #define CMSG_BLKIF_BE 1 /* Block-device backend */ #define CMSG_BLKIF_FE 2 /* Block-device frontend */ + +/****************************************************************************** + * CONSOLE DEFINITIONS + */ + /* * Subtypes for console messages. */ #define CMSG_CONSOLE_DATA 0 + +/****************************************************************************** + * BLOCK-INTERFACE FRONTEND DEFINITIONS + */ + +/* Messages from domain controller to guest. */ +#define CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED 0 + +/* Messages from guest to domain controller. */ +#define CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED 32 +#define CMSG_BLKIF_FE_INTERFACE_UP 33 +#define CMSG_BLKIF_FE_INTERFACE_DOWN 34 + +/* These are used by both front-end and back-end drivers. */ +#define blkif_vdev_t u16 +#define blkif_pdev_t u16 +#define blkif_sector_t u64 + +/* + * CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED: + * Notify a guest about a status change on one of its block interfaces. + * If the interface is DESTROYED or DOWN then the interface is disconnected: + * 1. The shared-memory frame is available for reuse. + * 2. Any unacknowledged messgaes pending on the interface were dropped. + */ +#define BLKIF_INTERFACE_STATUS_DESTROYED 0 /* Interface doesn't exist. */ +#define BLKIF_INTERFACE_STATUS_DOWN 1 /* Interface exists but is down. */ +#define BLKIF_INTERFACE_STATUS_UP 2 /* Interface exists and is up. */ +typedef struct { + unsigned int handle; + unsigned int status; + unsigned int evtchn; /* status == BLKIF_INTERFACE_STATUS_UP */ +} blkif_fe_interface_status_changed_t; + /* - * Subtypes for block-device messages. + * CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED: + * Notify the domain controller that the front-end driver is DOWN or UP. + * When the driver goes DOWN then the controller will send no more + * status-change notifications. When the driver comes UP then the controller + * will send a notification for each interface that currently exists. + * If the driver goes DOWN while interfaces are still UP, the domain + * will automatically take the interfaces DOWN. */ +#define BLKIF_DRIVER_STATUS_DOWN 0 +#define BLKIF_DRIVER_STATUS_UP 1 +typedef struct { + unsigned int status; /* BLKIF_DRIVER_STATUS_??? */ +} blkif_fe_driver_status_changed_t; + +/* + * CMSG_BLKIF_FE_INTERFACE_UP: + * If successful, the domain controller will acknowledge with a STATUS_UP + * message. + */ +typedef struct { + unsigned int handle; + unsigned long shmem_frame; +} blkif_fe_interface_up_t; + +/* + * CMSG_BLKIF_FE_INTERFACE_DOWN: + * If successful, the domain controller will acknowledge with a STATUS_DOWN + * message. + */ +typedef struct { + unsigned int handle; +} blkif_fe_interface_down_t; + + +/****************************************************************************** + * BLOCK-INTERFACE BACKEND DEFINITIONS + */ + +/* Messages from domain controller. */ #define CMSG_BLKIF_BE_CREATE 0 /* Create a new block-device interface. */ #define CMSG_BLKIF_BE_DESTROY 1 /* Destroy a block-device interface. */ #define CMSG_BLKIF_BE_VBD_CREATE 2 /* Create a new VBD for an interface. */ @@ -71,14 +147,13 @@ typedef struct { #define CMSG_BLKIF_BE_VBD_GROW 4 /* Append an extent to a given VBD. */ #define CMSG_BLKIF_BE_VBD_SHRINK 5 /* Remove last extent from a given VBD. */ +/* Messages to domain controller. */ +#define CMSG_BLKIF_BE_DRIVER_STATUS_CHANGED 32 + /* - * Message request/response defintions for block-device messages. + * Message request/response definitions for block-device messages. */ -#define blkif_vdev_t u16 -#define blkif_pdev_t u16 -#define blkif_sector_t u64 - typedef struct { blkif_pdev_t device; blkif_sector_t sector_start; @@ -86,21 +161,36 @@ typedef struct { } blkif_extent_t; /* Non-specific 'okay' return. */ -#define BLKIF_STATUS_OKAY 0 +#define BLKIF_BE_STATUS_OKAY 0 /* Non-specific 'error' return. */ -#define BLKIF_STATUS_ERROR 1 +#define BLKIF_BE_STATUS_ERROR 1 /* The following are specific error returns. */ -#define BLKIF_STATUS_INTERFACE_EXISTS 2 -#define BLKIF_STATUS_INTERFACE_NOT_FOUND 3 +#define BLKIF_BE_STATUS_INTERFACE_EXISTS 2 +#define BLKIF_BE_STATUS_INTERFACE_NOT_FOUND 3 +#define BLKIF_BE_STATUS_VBD_EXISTS 4 +#define BLKIF_BE_STATUS_VBD_NOT_FOUND 5 +#define BLKIF_BE_STATUS_OUT_OF_MEMORY 6 +#define BLKIF_BE_STATUS_EXTENT_NOT_FOUND 7 +#define BLKIF_BE_STATUS_MAPPING_ERROR 8 /* This macro can be used to create an array of descriptive error strings. */ -#define BLKIF_STATUS_ERRORS { \ - "Okay", \ - "Non-specific error", \ - "Interface already exists", \ - "Interface not found" } +#define BLKIF_BE_STATUS_ERRORS { \ + "Okay", \ + "Non-specific error", \ + "Interface already exists", \ + "Interface not found", \ + "VBD already exists", \ + "VBD not found", \ + "Out of memory", \ + "Extent not found for VBD", \ + "Could not map domain memory" } -/* CMSG_BLKIF_CREATE */ +/* + * CMSG_BLKIF_BE_CREATE: + * When the driver sends a successful response then the interface is fully + * set up. The controller will send an UP notification to the front-end + * driver. + */ typedef struct { /* IN */ domid_t domid; /* Domain attached to new interface. */ @@ -109,18 +199,23 @@ typedef struct { unsigned long shmem_frame; /* Page cont. shared comms window. */ /* OUT */ unsigned int status; -} blkif_create_t; +} blkif_be_create_t; -/* CMSG_BLKIF_DESTROY */ +/* + * CMSG_BLKIF_BE_DESTROY: + * When the driver sends a successful response then the interface is fully + * torn down. The controller will send a DOWN notification to the front-end + * driver. + */ typedef struct { /* IN */ domid_t domid; /* Identify interface to be destroyed. */ unsigned int blkif_handle; /* ...ditto... */ /* OUT */ unsigned int status; -} blkif_destroy_t; +} blkif_be_destroy_t; -/* CMSG_BLKIF_VBD_CREATE */ +/* CMSG_BLKIF_BE_VBD_CREATE */ typedef struct { /* IN */ domid_t domid; /* Identify blkdev interface. */ @@ -129,9 +224,9 @@ typedef struct { int readonly; /* Non-zero -> VBD isn't writeable. */ /* OUT */ unsigned int status; -} blkif_vbd_create_t; +} blkif_be_vbd_create_t; -/* CMSG_BLKIF_VBD_DESTROY */ +/* CMSG_BLKIF_BE_VBD_DESTROY */ typedef struct { /* IN */ domid_t domid; /* Identify blkdev interface. */ @@ -139,9 +234,9 @@ typedef struct { blkif_vdev_t vdevice; /* Interface-specific id of the VBD. */ /* OUT */ unsigned int status; -} blkif_vbd_destroy_t; +} blkif_be_vbd_destroy_t; -/* CMSG_BLKIF_VBD_GROW */ +/* CMSG_BLKIF_BE_VBD_GROW */ typedef struct { /* IN */ domid_t domid; /* Identify blkdev interface. */ @@ -150,9 +245,9 @@ typedef struct { blkif_extent_t extent; /* Physical extent to append to VBD. */ /* OUT */ unsigned int status; -} blkif_vbd_grow_t; +} blkif_be_vbd_grow_t; -/* CMSG_BLKIF_VBD_SHRINK */ +/* CMSG_BLKIF_BE_VBD_SHRINK */ typedef struct { /* IN */ domid_t domid; /* Identify blkdev interface. */ @@ -160,6 +255,16 @@ typedef struct { blkif_vdev_t vdevice; /* Interface-specific id of the VBD. */ /* OUT */ unsigned int status; -} blkif_vbd_shrink_t; +} blkif_be_vbd_shrink_t; + +/* + * CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED: + * Notify the domain controller that the back-end driver is DOWN or UP. + * If the driver goes DOWN while interfaces are still UP, the domain + * will automatically send DOWN notifications. + */ +typedef struct { + unsigned int status; /* BLKIF_DRIVER_STATUS_??? */ +} blkif_be_driver_status_changed; #endif /* __DOMAIN_CONTROLLER_H__ */ diff --git a/tools/xend/lib/main.py b/tools/xend/lib/main.py index 4b243b3307..4dd26ca8c5 100755 --- a/tools/xend/lib/main.py +++ b/tools/xend/lib/main.py @@ -44,6 +44,14 @@ def daemon_loop(): # notifications. notifier = xend.utils.notifier() + # The DOM0 control interface is not set up via the management interface. + # Note that console messages don't come our way (actually, only driver + # back-ends should use the DOM0 control interface). We therefore don't + # need to set up console structures. + xend.utils.port(0) + xend.main.notifier.bind(port.local_port) + xend.main.control_list[port.local_port] = (port, 0, 0, 0) + ## ## MAIN LOOP ## diff --git a/tools/xend/lib/utils.c b/tools/xend/lib/utils.c index 4883ec1a46..c28d682ec9 100644 --- a/tools/xend/lib/utils.c +++ b/tools/xend/lib/utils.c @@ -22,6 +22,8 @@ #include <signal.h> #include <xc.h> +#include <asm-xen/proc_cmd.h> + #include <hypervisor-if.h> #include "domain_controller.h" @@ -684,8 +686,23 @@ static PyObject *xu_port_new(PyObject *self, PyObject *args) goto fail2; } - if ( xc_evtchn_bind_interdomain(xup->xc_handle, - DOMID_SELF, dom, &port1, &port2) != 0 ) + if ( dom == 0ULL ) + { + /* + * The control-interface event channel for DOM0 is already set up. + * We use an ioctl to discover the port at our end of the channel. + */ + port1 = ioctl(xup->xc_handle, IOCTL_PRIVCMD_INITDOMAIN_EVTCHN, NULL); + port2 = -1; /* We don't need the remote end of the DOM0 link. */ + if ( port1 < 0 ) + { + PyErr_SetString(port_error, "Could not open channel to DOM0"); + goto fail3; + } + } + else if ( xc_evtchn_bind_interdomain(xup->xc_handle, + DOMID_SELF, dom, + &port1, &port2) != 0 ) { PyErr_SetString(port_error, "Could not open channel to domain"); goto fail3; @@ -744,7 +761,8 @@ static void xu_port_dealloc(PyObject *self) { xu_port_object *xup = (xu_port_object *)self; unmap_control_interface(xup->mem_fd, xup->interface); - (void)xc_evtchn_close(xup->xc_handle, DOMID_SELF, xup->local_port); + if ( xup->remote_dom != 0ULL ) + (void)xc_evtchn_close(xup->xc_handle, DOMID_SELF, xup->local_port); (void)xc_interface_close(xup->xc_handle); (void)close(xup->mem_fd); PyObject_Del(self); diff --git a/tools/xend/setup.py b/tools/xend/setup.py index 1f39cb4572..5567d7093c 100644 --- a/tools/xend/setup.py +++ b/tools/xend/setup.py @@ -4,7 +4,8 @@ from distutils.core import setup, Extension utils = Extension("utils", extra_compile_args = ["-fno-strict-aliasing"], include_dirs = ["../xc/lib", - "../../xen/include/hypervisor-ifs"], + "../../xen/include/hypervisor-ifs", + "../../xenolinux-sparse/include"], library_dirs = ["../xc/lib"], libraries = ["xc"], sources = ["lib/utils.c"]) diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h index 4895172937..646f4855f3 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h @@ -34,7 +34,7 @@ typedef struct blkif_st { unsigned int evtchn; int irq; /* Comms information. */ - blk_ring_t *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */ + blkif_ring_t *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */ BLK_RING_IDX blk_req_cons; /* Request consumer. */ BLK_RING_IDX blk_resp_prod; /* Private version of response producer. */ /* VBDs attached to this interface. */ @@ -44,13 +44,19 @@ typedef struct blkif_st { struct blkif_st *hash_next; struct list_head blkdev_list; spinlock_t blk_ring_lock; + atomic_t refcnt; } blkif_t; -void blkif_create(blkif_create_t *create); -void blkif_destroy(blkif_destroy_t *destroy); +void blkif_create(blkif_be_create_t *create); +void blkif_destroy(blkif_be_destroy_t *destroy); +void __blkif_destroy(blkif_t *blkif); blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle); -void blkif_get(blkif_t *blkif); -void blkif_put(blkif_t *blkif); +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) +#define blkif_put(_b) \ + do { \ + if ( atomic_dec_and_test(&(_b)->refcnt) ) \ + __blkif_destroy(_b); \ + } while (0) /* An entry in a list of xen_extents. */ typedef struct _blkif_extent_le { @@ -60,25 +66,25 @@ typedef struct _blkif_extent_le { typedef struct _vbd { blkif_vdev_t vdevice; /* what the domain refers to this vbd as */ - unsigned char mode; /* VBD_MODE_{R,W} */ + unsigned char readonly; /* Non-zero -> read-only */ unsigned char type; /* XD_TYPE_xxx */ blkif_extent_le_t *extents; /* list of xen_extents making up this vbd */ rb_node_t rb; /* for linking into R-B tree lookup struct */ } vbd_t; -long vbd_create(blkif_vbd_create_t *create_params); -long vbd_grow(blkif_vbd_grow_t *grow_params); -long vbd_shrink(blkif_vbd_shrink_t *shrink_params); -long vbd_destroy(blkif_vbd_destroy_t *delete_params); - -void destroy_all_vbds(struct task_struct *p); +void vbd_create(blkif_be_vbd_create_t *create); +void vbd_grow(blkif_be_vbd_grow_t *grow); +void vbd_shrink(blkif_be_vbd_shrink_t *shrink); +void vbd_destroy(blkif_be_vbd_destroy_t *delete); +int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds); +void destroy_all_vbds(blkif_t *blkif); typedef struct { blkif_t *blkif; unsigned long id; atomic_t pendcnt; unsigned short operation; - unsigned short status; + int status; } pending_req_t; /* Describes a [partial] disk extent (part of a block io request) */ @@ -91,7 +97,10 @@ typedef struct { int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation); -int blkif_be_controller_init(void); +void blkif_interface_init(void); +void blkif_ctrlif_init(void); + +void blkif_deschedule(blkif_t *blkif); void blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c index c7ef10c3ba..e1ed295ed3 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c @@ -13,34 +13,34 @@ static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) switch ( msg->subtype ) { case CMSG_BLKIF_BE_CREATE: - if ( msg->length != sizeof(blkif_create_t) ) + if ( msg->length != sizeof(blkif_be_create_t) ) goto parse_error; - blkif_create((blkif_create_t *)&msg->msg[0]); + blkif_create((blkif_be_create_t *)&msg->msg[0]); break; case CMSG_BLKIF_BE_DESTROY: - if ( msg->length != sizeof(blkif_destroy_t) ) + if ( msg->length != sizeof(blkif_be_destroy_t) ) goto parse_error; - blkif_destroy((blkif_destroy_t *)&msg->msg[0]); + blkif_destroy((blkif_be_destroy_t *)&msg->msg[0]); break; case CMSG_BLKIF_BE_VBD_CREATE: - if ( msg->length != sizeof(blkif_vbd_create_t) ) + if ( msg->length != sizeof(blkif_be_vbd_create_t) ) goto parse_error; - vbd_create((blkif_vbd_create_t *)&msg->msg[0]); + vbd_create((blkif_be_vbd_create_t *)&msg->msg[0]); break; case CMSG_BLKIF_BE_VBD_DESTROY: - if ( msg->length != sizeof(blkif_vbd_destroy_t) ) + if ( msg->length != sizeof(blkif_be_vbd_destroy_t) ) goto parse_error; - vbd_destroy((blkif_vbd_destroy_t *)&msg->msg[0]); + vbd_destroy((blkif_be_vbd_destroy_t *)&msg->msg[0]); break; case CMSG_BLKIF_BE_VBD_GROW: - if ( msg->length != sizeof(blkif_vbd_grow_t) ) + if ( msg->length != sizeof(blkif_be_vbd_grow_t) ) goto parse_error; - vbd_grow((blkif_vbd_grow_t *)&msg->msg[0]); + vbd_grow((blkif_be_vbd_grow_t *)&msg->msg[0]); break; case CMSG_BLKIF_BE_VBD_SHRINK: - if ( msg->length != sizeof(blkif_vbd_shrink_t) ) + if ( msg->length != sizeof(blkif_be_vbd_shrink_t) ) goto parse_error; - vbd_shrink((blkif_vbd_shrink_t *)&msg->msg[0]); + vbd_shrink((blkif_be_vbd_shrink_t *)&msg->msg[0]); break; default: goto parse_error; @@ -54,8 +54,7 @@ static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) ctrl_if_send_response(msg); } -int blkif_ctrlif_init(void) +void blkif_ctrlif_init(void) { (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx); - return 0; } diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c index 579795deb9..87925681da 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c @@ -12,85 +12,152 @@ #define BLKIF_HASH(_d,_h) \ (((int)(_d)^(int)((_d)>>32)^(int)(_h))&(BLKIF_HASHSZ-1)) -static blkif_t *blkif_hash[BLKIF_HASHSZ]; +static kmem_cache_t *blkif_cachep; +static blkif_t *blkif_hash[BLKIF_HASHSZ]; +static spinlock_t blkif_hash_lock; blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle) { - blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)]; - while ( (blkif != NULL) && - (blkif->domid != domid) && - (blkif->handle != handle) ) + blkif_t *blkif; + unsigned long flags; + + spin_lock_irqsave(&blkif_hash_lock, flags); + blkif = blkif_hash[BLKIF_HASH(domid, handle)]; + while ( blkif != NULL ) + { + if ( (blkif->domid == domid) && (blkif->handle == handle) ) + { + blkif_get(blkif); + break; + } blkif = blkif->hash_next; + } + spin_unlock_irqrestore(&blkif_hash_lock, flags); + return blkif; } -void blkif_create(blkif_create_t *create) +void __blkif_destroy(blkif_t *blkif) +{ + free_irq(blkif->irq, NULL); + unbind_evtchn_from_irq(blkif->evtchn); + vfree(blkif->blk_ring_base); + destroy_all_vbds(blkif); + kmem_cache_free(blkif_cachep, blkif); +} + +void blkif_create(blkif_be_create_t *create) { domid_t domid = create->domid; unsigned int handle = create->blkif_handle; unsigned int evtchn = create->evtchn; unsigned long shmem_frame = create->shmem_frame; + unsigned long flags; blkif_t **pblkif, *blkif; + struct vm_struct *vma; + pgprot_t prot; + int error; - pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; - while ( *pblkif == NULL ) + if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL ) { - if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) - goto found_match; - pblkif = &(*pblkif)->hash_next; + create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; + return; + } + + if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL ) + { + create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; + goto fail1; + } + + prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED); + error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr), + shmem_frame<<PAGE_SHIFT, PAGE_SIZE, + prot, domid); + if ( error != 0 ) + { + if ( error == -ENOMEM ) + create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; + else if ( error == -EFAULT ) + create->status = BLKIF_BE_STATUS_MAPPING_ERROR; + else + create->status = BLKIF_BE_STATUS_ERROR; + goto fail2; } - blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); memset(blkif, 0, sizeof(*blkif)); - blkif->domid = domid; - blkif->handle = handle; - blkif->evtchn = evtchn; - blkif->irq = bind_evtchn_to_irq(evtchn); - blkif->shmem_frame = shmem_frame; - blkif->shmem_vbase = ioremap(shmem_frame<<PAGE_SHIFT, PAGE_SIZE); + blkif->domid = domid; + blkif->handle = handle; + blkif->evtchn = evtchn; + blkif->irq = bind_evtchn_to_irq(evtchn); + blkif->shmem_frame = shmem_frame; + blkif->blk_ring_base = (blkif_ring_t *)vma->addr; spin_lock_init(&blkif->vbd_lock); spin_lock_init(&blkif->blk_ring_lock); - request_irq(irq, blkif_be_int, 0, "blkif-backend", blkif); + spin_lock_irqsave(&blkif_hash_lock, flags); + pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; + while ( *pblkif == NULL ) + { + if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) + { + spin_unlock_irqrestore(&blkif_hash_lock, flags); + create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS; + goto fail3; + } + pblkif = &(*pblkif)->hash_next; + } + + atomic_set(&blkif->refcnt, 1); blkif->hash_next = *pblkif; *pblkif = blkif; - create->status = BLKIF_STATUS_OKAY; - return; + spin_unlock_irqrestore(&blkif_hash_lock, flags); - found_match: - create->status = BLKIF_STATUS_INTERFACE_EXISTS; - return; + request_irq(blkif->irq, blkif_be_int, 0, "blkif-backend", blkif); - evtchn_in_use: - unbind_evtchn_from_irq(evtchn); /* drop refcnt */ - create->status = BLKIF_STATUS_ERROR; + create->status = BLKIF_BE_STATUS_OKAY; return; + + fail3: unbind_evtchn_from_irq(evtchn); + fail2: kmem_cache_free(blkif_cachep, blkif); + fail1: vfree(vma->addr); } -void blkif_destroy(blkif_destroy_t *destroy) +void blkif_destroy(blkif_be_destroy_t *destroy) { domid_t domid = destroy->domid; unsigned int handle = destroy->blkif_handle; + unsigned long flags; blkif_t **pblkif, *blkif; + spin_lock_irqsave(&blkif_hash_lock, flags); + pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; while ( (blkif = *pblkif) == NULL ) { if ( (blkif->domid == domid) && (blkif->handle == handle) ) - goto found_match; + { + *pblkif = blkif->hash_next; + spin_unlock_irqrestore(&blkif_hash_lock, flags); + blkif_deschedule(blkif); + blkif_put(blkif); + destroy->status = BLKIF_BE_STATUS_OKAY; + return; + } pblkif = &blkif->hash_next; } - destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND; - return; + spin_unlock_irqrestore(&blkif_hash_lock, flags); - found_match: - free_irq(blkif->irq, NULL); - unbind_evtchn_from_irq(blkif->evtchn); - *pblkif = blkif->hash_next; - kmem_cache_free(blkif_cachep, blkif); - destroy->status = BLKIF_STATUS_OKAY; + destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; } +void __init blkif_interface_init(void) +{ + blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), + 0, 0, NULL, NULL); + memset(blkif_hash, 0, sizeof(blkif_hash)); + spin_lock_init(&blkif_hash_lock); +} diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c index 1e6190c3e6..8862798250 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c @@ -24,6 +24,18 @@ #define MAX_PENDING_REQS 64 #define BATCH_PER_DOMAIN 16 +static struct vm_struct *mmap_vma; +#define MMAP_PAGES_PER_SEGMENT \ + ((BLKIF_MAX_SEGMENTS_PER_REQUEST >> (PAGE_SHIFT-9)) + 1) +#define MMAP_PAGES_PER_REQUEST \ + (2 * BLKIF_MAX_SEGMENTS_PER_REQUEST * MMAP_PAGES_PER_SEGMENT) +#define MMAP_PAGES \ + (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST) +#define MMAP_VADDR(_req,_seg) \ + ((unsigned long)mmap_vma->addr + \ + ((_req) * MMAP_PAGES_PER_REQUEST) + \ + ((_seg) * MMAP_PAGES_PER_SEGMENT)) + /* * Each outstanding request that we've passed to the lower device layers has a * 'pending_req' allocated to it. Each buffer_head that completes decrements @@ -46,22 +58,11 @@ static PEND_RING_IDX pending_prod, pending_cons; static kmem_cache_t *buffer_head_cachep; -static struct buffer_head *completed_bhs[NR_CPUS] __cacheline_aligned; - -static int lock_buffer(blkif_t *blkif, - unsigned long buffer, - unsigned short size, - int writeable_buffer); -static void unlock_buffer(unsigned long buffer, - unsigned short size, - int writeable_buffer); - -static void io_schedule(unsigned long unused); static int do_block_io_op(blkif_t *blkif, int max_to_do); -static void dispatch_rw_block_io(blkif_t *blkif, - blk_ring_req_entry_t *req); +static void dispatch_probe(blkif_t *blkif, blkif_request_t *req); +static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req); static void make_response(blkif_t *blkif, unsigned long id, - unsigned short op, unsigned long st); + unsigned short op, int st); /****************************************************************** @@ -108,8 +109,6 @@ static void add_to_blkdev_list_tail(blkif_t *blkif) * SCHEDULER FUNCTIONS */ -static DECLARE_TASKLET(io_schedule_tasklet, io_schedule, 0); - static void io_schedule(unsigned long unused) { blkif_t *blkif; @@ -132,6 +131,8 @@ static void io_schedule(unsigned long unused) run_task_queue(&tq_disk); } +static DECLARE_TASKLET(io_schedule_tasklet, io_schedule, 0); + static void maybe_trigger_io_schedule(void) { /* @@ -155,28 +156,25 @@ static void maybe_trigger_io_schedule(void) static void end_block_io_op(struct buffer_head *bh, int uptodate) { pending_req_t *pending_req = bh->b_private; + unsigned long flags; /* An error fails the entire request. */ if ( !uptodate ) { DPRINTK("Buffer not up-to-date at end of operation\n"); - pending_req->status = 2; + pending_req->status = BLKIF_RSP_ERROR; } - unlock_buffer(virt_to_phys(bh->b_data), - bh->b_size, - (pending_req->operation==READ)); - if ( atomic_dec_and_test(&pending_req->pendcnt) ) { + int pending_idx = pending_req - pending_reqs; + vmfree_area_pages(MMAP_VADDR(pending_idx, 0), MMAP_PAGES_PER_REQUEST); make_response(pending_req->blkif, pending_req->id, pending_req->operation, pending_req->status); blkif_put(pending_req->blkif); - spin_lock(&pend_prod_lock); - pending_ring[MASK_PEND_IDX(pending_prod)] = - pending_req - pending_reqs; - pending_prod++; - spin_unlock(&pend_prod_lock); + spin_lock_irqsave(&pend_prod_lock, flags); + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; + spin_unlock_irqrestore(&pend_prod_lock, flags); maybe_trigger_io_schedule(); } } @@ -200,45 +198,10 @@ void blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) * DOWNWARD CALLS -- These interface with the block-device layer proper. */ -static int lock_buffer(blkif_t *blkif, - unsigned long buffer, - unsigned short size, - int writeable_buffer) -{ - unsigned long pfn; - - for ( pfn = buffer >> PAGE_SHIFT; - pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT); - pfn++ ) - { - } - - return 1; - - fail: - while ( pfn-- > (buffer >> PAGE_SHIFT) ) - { - } - return 0; -} - -static void unlock_buffer(unsigned long buffer, - unsigned short size, - int writeable_buffer) -{ - unsigned long pfn; - - for ( pfn = buffer >> PAGE_SHIFT; - pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT); - pfn++ ) - { - } -} - static int do_block_io_op(blkif_t *blkif, int max_to_do) { - blk_ring_t *blk_ring = blkif->blk_ring_base; - blk_ring_req_entry_t *req; + blkif_ring_t *blk_ring = blkif->blk_ring_base; + blkif_request_t *req; BLK_RING_IDX i; int more_to_do = 0; @@ -262,11 +225,15 @@ static int do_block_io_op(blkif_t *blkif, int max_to_do) dispatch_rw_block_io(blkif, req); break; + case BLKIF_OP_PROBE: + dispatch_probe(blkif, req); + break; + default: DPRINTK("error: unknown block io operation [%d]\n", blk_ring->ring[i].req.operation); make_response(blkif, blk_ring->ring[i].req.id, - blk_ring->ring[i].req.operation, 1); + blk_ring->ring[i].req.operation, BLKIF_RSP_ERROR); break; } } @@ -275,24 +242,62 @@ static int do_block_io_op(blkif_t *blkif, int max_to_do) return more_to_do; } -static void dispatch_rw_block_io(blkif_t *blkif, - blk_ring_req_entry_t *req) +static void dispatch_probe(blkif_t *blkif, blkif_request_t *req) +{ + int i, rc, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; + pgprot_t prot; + + /* Check that number of segments is sane. */ + if ( unlikely(req->nr_segments == 0) || + unlikely(req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) + { + DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments); + goto bad_descriptor; + } + + prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW); + for ( i = 0; i < req->nr_segments; i++ ) + { + if ( (req->buffer_and_sects[i] & ~PAGE_MASK) != (PAGE_SIZE / 512) ) + goto bad_descriptor; + if ( direct_remap_area_pages(&init_mm, + MMAP_VADDR(pending_idx, i), + req->buffer_and_sects[i] & PAGE_MASK, + PAGE_SIZE, prot, blkif->domid) != 0 ) + goto bad_descriptor; + } + + rc = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0), + (req->nr_segments * PAGE_SIZE) / sizeof(vdisk_t)); + + vmfree_area_pages(MMAP_VADDR(pending_idx, 0), + MMAP_PAGES_PER_REQUEST); + make_response(blkif, req->id, req->operation, rc); + return; + + bad_descriptor: + vmfree_area_pages(MMAP_VADDR(pending_idx, 0), MMAP_PAGES_PER_REQUEST); + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); +} + +static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) { extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); struct buffer_head *bh; int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ; unsigned short nr_sects; unsigned long buffer; - int i, tot_sects; + int i, tot_sects, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; pending_req_t *pending_req; + pgprot_t prot; /* We map virtual scatter/gather segments to physical segments. */ int new_segs, nr_psegs = 0; - phys_seg_t phys_seg[MAX_BLK_SEGS * 2]; + phys_seg_t phys_seg[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2]; /* Check that number of segments is sane. */ if ( unlikely(req->nr_segments == 0) || - unlikely(req->nr_segments > MAX_BLK_SEGS) ) + unlikely(req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) { DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments); goto bad_descriptor; @@ -310,8 +315,11 @@ static void dispatch_rw_block_io(blkif_t *blkif, nr_sects = req->buffer_and_sects[i] & 0x1FF; if ( unlikely(nr_sects == 0) ) + continue; + + if ( unlikely(nr_sects > BLKIF_MAX_SECTORS_PER_SEGMENT) ) { - DPRINTK("zero-sized data request\n"); + DPRINTK("Too many sectors in segment\n"); goto bad_descriptor; } @@ -333,29 +341,40 @@ static void dispatch_rw_block_io(blkif_t *blkif, } nr_psegs += new_segs; - ASSERT(nr_psegs <= MAX_BLK_SEGS*2); + ASSERT(nr_psegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST*2); } + /* Nonsensical zero-sized request? */ + if ( unlikely(nr_psegs == 0) ) + goto bad_descriptor; + + if ( operation == READ ) + prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW); + else + prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED); + for ( i = 0; i < nr_psegs; i++ ) { - if ( unlikely(!lock_buffer(blkif, phys_seg[i].buffer, - phys_seg[i].nr_sects << 9, - operation==READ)) ) + unsigned long sz = ((phys_seg[i].buffer & ~PAGE_MASK) + + (phys_seg[i].nr_sects << 9) + + (PAGE_SIZE - 1)) & PAGE_MASK; + if ( direct_remap_area_pages(&init_mm, + MMAP_VADDR(pending_idx, i), + phys_seg[i].buffer & PAGE_MASK, + sz, prot, blkif->domid) != 0 ) { DPRINTK("invalid buffer\n"); - while ( i-- > 0 ) - unlock_buffer(phys_seg[i].buffer, - phys_seg[i].nr_sects << 9, - operation==READ); + vmfree_area_pages(MMAP_VADDR(pending_idx, 0), + MMAP_PAGES_PER_REQUEST); goto bad_descriptor; } } - pending_req = &pending_reqs[pending_ring[MASK_PEND_IDX(pending_cons++)]]; + pending_req = &pending_reqs[pending_idx]; pending_req->blkif = blkif; pending_req->id = req->id; pending_req->operation = operation; - pending_req->status = 0; + pending_req->status = BLKIF_RSP_ERROR; atomic_set(&pending_req->pendcnt, nr_psegs); blkif_get(blkif); @@ -371,11 +390,8 @@ static void dispatch_rw_block_io(blkif_t *blkif, bh->b_size = phys_seg[i].nr_sects << 9; bh->b_dev = phys_seg[i].dev; bh->b_rsector = (unsigned long)phys_seg[i].sector_number; - - /* SMH: we store a 'pseudo-virtual' bogus address in b_data since - later code will undo this transformation (i.e. +-PAGE_OFFSET). */ - bh->b_data = phys_to_virt(phys_seg[i].buffer); - + bh->b_data = (char *)MMAP_VADDR(pending_idx, i) + + (phys_seg[i].buffer & ~PAGE_MASK); /* SMH: bh_phys() uses the below field as a 'cheap' virt_to_phys */ bh->b_page = &mem_map[phys_seg[i].buffer>>PAGE_SHIFT]; bh->b_end_io = end_block_io_op; @@ -391,10 +407,11 @@ static void dispatch_rw_block_io(blkif_t *blkif, submit_bh(operation, bh); } + pending_cons++; return; bad_descriptor: - make_response(blkif, req->id, req->operation, 1); + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); } @@ -405,12 +422,13 @@ static void dispatch_rw_block_io(blkif_t *blkif, static void make_response(blkif_t *blkif, unsigned long id, - unsigned short op, unsigned long st) + unsigned short op, int st) { - blk_ring_resp_entry_t *resp; + blkif_response_t *resp; + unsigned long flags; /* Place on the response ring for the relevant domain. */ - spin_lock(&blkif->blk_ring_lock); + spin_lock_irqsave(&blkif->blk_ring_lock, flags); resp = &blkif->blk_ring_base-> ring[MASK_BLK_IDX(blkif->blk_resp_prod)].resp; resp->id = id; @@ -418,48 +436,13 @@ static void make_response(blkif_t *blkif, unsigned long id, resp->status = st; wmb(); blkif->blk_ring_base->resp_prod = ++blkif->blk_resp_prod; - spin_unlock(&blkif->blk_ring_lock); + spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); /* Kick the relevant domain. */ notify_via_evtchn(blkif->evtchn); } -static void blkif_debug_int(int irq, void *unused, struct pt_regs *regs) -{ -#if 0 - unsigned long flags; - struct task_struct *p; - blk_ring_t *blk_ring; - int i; - - printk("Dumping block queue stats: nr_pending = %d" - " (prod=0x%08x,cons=0x%08x)\n", - NR_PENDING_REQS, pending_prod, pending_cons); - - read_lock_irqsave(&tasklist_lock, flags); - for_each_domain ( p ) - { - printk("Domain: %llu\n", blkif->domain); - blk_ring = blkif->blk_ring_base; - printk(" req_prod:0x%08x, req_cons:0x%08x resp_prod:0x%08x/" - "0x%08x on_list=%d\n", - blk_ring->req_prod, blkif->blk_req_cons, - blk_ring->resp_prod, blkif->blk_resp_prod, - __on_blkdev_list(p)); - } - read_unlock_irqrestore(&tasklist_lock, flags); - - for ( i = 0; i < MAX_PENDING_REQS; i++ ) - { - printk("Pend%d: dom=%p, id=%08lx, cnt=%d, op=%d, status=%d\n", - i, pending_reqs[i].domain, pending_reqs[i].id, - atomic_read(&pending_reqs[i].pendcnt), - pending_reqs[i].operation, pending_reqs[i].status); - } -#endif -} - -void unlink_blkdev_info(blkif_t *blkif) +void blkif_deschedule(blkif_t *blkif) { unsigned long flags; @@ -477,26 +460,29 @@ static int __init init_module(void) { int i; + blkif_interface_init(); + + if ( (mmap_vma = get_vm_area(MMAP_PAGES * PAGE_SIZE, VM_IOREMAP)) == NULL ) + { + printk(KERN_WARNING "Could not allocate VMA for blkif backend.\n"); + return -ENOMEM; + } + pending_cons = 0; pending_prod = MAX_PENDING_REQS; memset(pending_reqs, 0, sizeof(pending_reqs)); for ( i = 0; i < MAX_PENDING_REQS; i++ ) pending_ring[i] = i; - for ( i = 0; i < NR_CPUS; i++ ) - completed_bhs[i] = NULL; - spin_lock_init(&io_schedule_list_lock); INIT_LIST_HEAD(&io_schedule_list); - if ( request_irq(bind_virq_to_irq(VIRQ_DEBUG), blkif_debug_int, - SA_SHIRQ, "blkif-backend-dbg", &blkif_debug_int) != 0 ) - printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n"); - buffer_head_cachep = kmem_cache_create( "buffer_head_cache", sizeof(struct buffer_head), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + blkif_ctrlif_init(); + return 0; } diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c index bd6c40125c..bc5390eeb9 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c @@ -8,7 +8,7 @@ #include "common.h" -void vbd_create(blkif_vbd_create_t *create) +void vbd_create(blkif_be_vbd_create_t *create) { vbd_t *vbd; rb_node_t **rb_p, *rb_parent = NULL; @@ -18,9 +18,9 @@ void vbd_create(blkif_vbd_create_t *create) blkif = blkif_find_by_handle(create->domid, create->blkif_handle); if ( unlikely(blkif == NULL) ) { - DPRINTK("vbd_create attempted for non-existent blkif (%llu,&u)\n", + DPRINTK("vbd_create attempted for non-existent blkif (%llu,%u)\n", create->domid, create->blkif_handle); - create->status = BLKIF_STATUS_INTERFACE_NOT_FOUND; + create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; return; } @@ -42,7 +42,7 @@ void vbd_create(blkif_vbd_create_t *create) else { DPRINTK("vbd_create attempted for already existing vbd\n"); - create->status = BLKIF_STATUS_VBD_EXISTS; + create->status = BLKIF_BE_STATUS_VBD_EXISTS; goto out; } } @@ -50,19 +50,19 @@ void vbd_create(blkif_vbd_create_t *create) if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) ) { DPRINTK("vbd_create: out of memory\n"); - create->status = BLKIF_STATUS_OUT_OF_MEMORY; + create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; goto out; } - vbd->vdevice = vdevice; - vbd->mode = create->mode; - vbd->type = VDISK_TYPE_DISK | VDISK_FLAG_VIRT; - vbd->extents = NULL; + vbd->vdevice = vdevice; + vbd->readonly = create->readonly; + vbd->type = VDISK_TYPE_DISK | VDISK_FLAG_VIRT; + vbd->extents = NULL; rb_link_node(&vbd->rb, rb_parent, rb_p); rb_insert_color(&vbd->rb, &blkif->vbd_rb); - create->status = BLKIF_STATUS_OKAY; + create->status = BLKIF_BE_STATUS_OKAY; out: spin_unlock(&blkif->vbd_lock); @@ -71,20 +71,20 @@ void vbd_create(blkif_vbd_create_t *create) /* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */ -void vbd_grow(blkif_vbd_grow_t *grow) +void vbd_grow(blkif_be_vbd_grow_t *grow) { - blkif_t *blkif; - xen_extent_le_t **px, *x; - vbd_t *vbd = NULL; - rb_node_t *rb; - blkif_vdev_t vdevice = grow->vdevice; + blkif_t *blkif; + blkif_extent_le_t **px, *x; + vbd_t *vbd = NULL; + rb_node_t *rb; + blkif_vdev_t vdevice = grow->vdevice; blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle); if ( unlikely(blkif == NULL) ) { - DPRINTK("vbd_grow attempted for non-existent blkif (%llu,&u)\n", + DPRINTK("vbd_grow attempted for non-existent blkif (%llu,%u)\n", grow->domid, grow->blkif_handle); - grow->status = BLKIF_STATUS_INTERFACE_NOT_FOUND; + grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; return; } @@ -105,28 +105,29 @@ void vbd_grow(blkif_vbd_grow_t *grow) if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) ) { DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n"); - grow->status = BLKIF_STATUS_VBD_NOT_FOUND; + grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND; goto out; } - if ( unlikely((x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL)) == NULL) ) + if ( unlikely((x = kmalloc(sizeof(blkif_extent_le_t), + GFP_KERNEL)) == NULL) ) { DPRINTK("vbd_grow: out of memory\n"); - grow->status = BLKIF_STATUS_OUT_OF_MEMORY; + grow->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; goto out; } x->extent.device = grow->extent.device; x->extent.sector_start = grow->extent.sector_start; x->extent.sector_length = grow->extent.sector_length; - x->next = (xen_extent_le_t *)NULL; + x->next = (blkif_extent_le_t *)NULL; for ( px = &vbd->extents; *px != NULL; px = &(*px)->next ) continue; *px = x; - grow->status = BLKIF_STATUS_OKAY; + grow->status = BLKIF_BE_STATUS_OKAY; out: spin_unlock(&blkif->vbd_lock); @@ -134,20 +135,20 @@ void vbd_grow(blkif_vbd_grow_t *grow) } -void vbd_shrink(blkif_vbd_shrink_t *shrink) +void vbd_shrink(blkif_be_vbd_shrink_t *shrink) { - blkif_t *blkif; - xen_extent_le_t **px, *x; - vbd_t *vbd = NULL; - rb_node_t *rb; - blkif_vdev_t vdevice = shrink->vdevice; + blkif_t *blkif; + blkif_extent_le_t **px, *x; + vbd_t *vbd = NULL; + rb_node_t *rb; + blkif_vdev_t vdevice = shrink->vdevice; blkif = blkif_find_by_handle(shrink->domid, shrink->blkif_handle); if ( unlikely(blkif == NULL) ) { - DPRINTK("vbd_shrink attempted for non-existent blkif (%llu,&u)\n", + DPRINTK("vbd_shrink attempted for non-existent blkif (%llu,%u)\n", shrink->domid, shrink->blkif_handle); - shrink->status = BLKIF_STATUS_INTERFACE_NOT_FOUND; + shrink->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; return; } @@ -167,13 +168,13 @@ void vbd_shrink(blkif_vbd_shrink_t *shrink) if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) ) { - shrink->status = BLKIF_STATUS_VBD_NOT_FOUND; + shrink->status = BLKIF_BE_STATUS_VBD_NOT_FOUND; goto out; } if ( unlikely(vbd->extents == NULL) ) { - shrink->status = BLKIF_STATUS_EXTENT_NOT_FOUND; + shrink->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND; goto out; } @@ -185,7 +186,7 @@ void vbd_shrink(blkif_vbd_shrink_t *shrink) *px = x->next; kfree(x); - shrink->status = BLKIF_STATUS_OKAY; + shrink->status = BLKIF_BE_STATUS_OKAY; out: spin_unlock(&blkif->vbd_lock); @@ -193,20 +194,20 @@ void vbd_shrink(blkif_vbd_shrink_t *shrink) } -void vbd_destroy(blkif_vbd_destroy_t *destroy) +void vbd_destroy(blkif_be_vbd_destroy_t *destroy) { - blkif_t *blkif; - vbd_t *vbd; - rb_node_t *rb; - xen_extent_le_t *x, *t; - blkif_vdev_t vdevice = destroy->vdevice; + blkif_t *blkif; + vbd_t *vbd; + rb_node_t *rb; + blkif_extent_le_t *x, *t; + blkif_vdev_t vdevice = destroy->vdevice; blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle); if ( unlikely(blkif == NULL) ) { - DPRINTK("vbd_destroy attempted for non-existent blkif (%llu,&u)\n", + DPRINTK("vbd_destroy attempted for non-existent blkif (%llu,%u)\n", destroy->domid, destroy->blkif_handle); - destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND; + destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; return; } @@ -224,7 +225,7 @@ void vbd_destroy(blkif_vbd_destroy_t *destroy) goto found; } - destroy->status = BLKIF_STATUS_VBD_NOT_FOUND; + destroy->status = BLKIF_BE_STATUS_VBD_NOT_FOUND; goto out; found: @@ -249,7 +250,7 @@ void destroy_all_vbds(blkif_t *blkif) { vbd_t *vbd; rb_node_t *rb; - xen_extent_le_t *x, *t; + blkif_extent_le_t *x, *t; spin_lock(&blkif->vbd_lock); @@ -273,51 +274,30 @@ void destroy_all_vbds(blkif_t *blkif) } -static int vbd_probe_single(xen_disk_info_t *xdi, - vbd_t *vbd, - struct task_struct *p) +static int vbd_probe_single(blkif_t *blkif, vdisk_t *vbd_info, vbd_t *vbd) { - xen_extent_le_t *x; - xen_disk_t cur_disk; + blkif_extent_le_t *x; - if ( xdi->count == xdi->max ) - { - DPRINTK("vbd_probe_devices: out of space for probe.\n"); - return -ENOMEM; - } - - cur_disk.device = vbd->vdevice; - cur_disk.info = vbd->type; - if ( !VBD_CAN_WRITE(vbd) ) - cur_disk.info |= XD_FLAG_RO; - cur_disk.capacity = 0ULL; + vbd_info->device = vbd->vdevice; + vbd_info->info = vbd->type; + if ( vbd->readonly ) + vbd_info->info |= VDISK_FLAG_RO; + vbd_info->capacity = 0ULL; for ( x = vbd->extents; x != NULL; x = x->next ) - cur_disk.capacity += x->extent.nr_sectors; - cur_disk.domain = p->domain; + vbd_info->capacity += x->extent.sector_length; - /* Now copy into relevant part of user-space buffer */ - if( copy_to_user(&xdi->disks[xdi->count], - &cur_disk, - sizeof(xen_disk_t)) ) - { - DPRINTK("vbd_probe_devices: copy_to_user failed\n"); - return -EFAULT; - } - - xdi->count++; - return 0; } -static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p) +int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds) { - int rc = 0; + int rc = 0, nr_vbds = 0; rb_node_t *rb; - spin_lock(&p->vbd_lock); + spin_lock(&blkif->vbd_lock); - if ( (rb = p->vbd_rb.rb_node) == NULL ) + if ( (rb = blkif->vbd_rb.rb_node) == NULL ) goto out; new_subtree: @@ -328,7 +308,10 @@ static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p) for ( ; ; ) { /* STEP 2. Dealt with left subtree. Now process current node. */ - if ( (rc = vbd_probe_single(xdi, rb_entry(rb, vbd_t, rb), p)) != 0 ) + if ( (rc = vbd_probe_single(blkif, &vbd_info[nr_vbds], + rb_entry(rb, vbd_t, rb))) != 0 ) + goto out; + if ( ++nr_vbds == max_vbds ) goto out; /* STEP 3. Process right subtree, if any. */ @@ -355,146 +338,22 @@ static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p) } out: - spin_unlock(&p->vbd_lock); - return rc; -} - - -/* - * Return information about the VBDs available for a given domain, or for all - * domains; in the general case the 'domain' argument will be 0 which means - * "information about the caller"; otherwise the 'domain' argument will - * specify either a given domain, or all domains ("VBD_PROBE_ALL") -- both of - * these cases require the caller to be privileged. - */ -long vbd_probe(vbd_probe_t *probe) -{ - struct task_struct *p = NULL; - unsigned long flags; - long ret = 0; - - if ( probe->domain != 0 ) - { - /* We can only probe for ourselves (unless we're privileged). */ - if( (probe->domain != current->domain) && !IS_PRIV(current) ) - return -EPERM; - - if ( (probe->domain != VBD_PROBE_ALL) && - ((p = find_domain_by_id(probe->domain)) == NULL) ) - { - DPRINTK("vbd_probe attempted for non-existent domain %llu\n", - probe->domain); - return -EINVAL; - } - } - else - { - /* Default is to probe for ourselves. */ - p = current; - get_task_struct(p); /* to mirror final put_task_struct */ - } - - if ( probe->domain == VBD_PROBE_ALL ) - { - read_lock_irqsave(&tasklist_lock, flags); - for_each_domain ( p ) - { - if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 ) - { - read_unlock_irqrestore(&tasklist_lock, flags); - goto out; - } - } - read_unlock_irqrestore(&tasklist_lock, flags); - } - else if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 ) - goto out; - - out: - if ( ret != 0 ) - DPRINTK("vbd_probe: err %ld in probing virtual devices\n", ret); - if ( p != NULL ) - put_task_struct(p); - return ret; -} - - -long vbd_info(vbd_info_t *info) -{ - struct task_struct *p; - xen_extent_le_t *x; - xen_extent_t *extents; - vbd_t *vbd = NULL; - rb_node_t *rb; - long ret = 0; - - if ( (info->domain != current->domain) && !IS_PRIV(current) ) - return -EPERM; - - if ( (p = find_domain_by_id(info->domain)) == NULL ) - { - DPRINTK("vbd_info attempted for non-existent domain %llu\n", - info->domain); - return -EINVAL; - } - - spin_lock(&p->vbd_lock); - - rb = p->vbd_rb.rb_node; - while ( rb != NULL ) - { - vbd = rb_entry(rb, vbd_t, rb); - if ( info->vdevice < vbd->vdevice ) - rb = rb->rb_left; - else if ( info->vdevice > vbd->vdevice ) - rb = rb->rb_right; - else - break; - } - - if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != info->vdevice) ) - { - DPRINTK("vbd_info attempted on non-existent VBD.\n"); - ret = -EINVAL; - goto out; - } - - info->mode = vbd->mode; - info->nextents = 0; - - extents = info->extents; - for ( x = vbd->extents; x != NULL; x = x->next ) - { - if ( info->nextents == info->maxextents ) - break; - if ( copy_to_user(extents, &x->extent, sizeof(xen_extent_t)) ) - { - DPRINTK("vbd_info: copy_to_user failed\n"); - ret = -EFAULT; - goto out; - } - extents++; - info->nextents++; - } - - out: - spin_unlock(&p->vbd_lock); - put_task_struct(p); - return ret; + spin_unlock(&blkif->vbd_lock); + return (rc == 0) ? nr_vbds : rc; } -int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation) +int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation) { - xen_extent_le_t *x; - vbd_t *vbd; - rb_node_t *rb; - xen_sector_t sec_off; - unsigned long nr_secs; + blkif_extent_le_t *x; + vbd_t *vbd; + rb_node_t *rb; + blkif_sector_t sec_off; + unsigned long nr_secs; - spin_lock(&p->vbd_lock); + spin_lock(&blkif->vbd_lock); - rb = p->vbd_rb.rb_node; + rb = blkif->vbd_rb.rb_node; while ( rb != NULL ) { vbd = rb_entry(rb, vbd_t, rb); @@ -507,42 +366,41 @@ int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation) } DPRINTK("vbd_translate; domain %llu attempted to access " - "non-existent VBD.\n", p->domain); + "non-existent VBD.\n", blkif->domid); - spin_unlock(&p->vbd_lock); + spin_unlock(&blkif->vbd_lock); return -ENODEV; found: - if ( ((operation == READ) && !VBD_CAN_READ(vbd)) || - ((operation == WRITE) && !VBD_CAN_WRITE(vbd)) ) + if ( (operation == WRITE) && vbd->readonly ) { - spin_unlock(&p->vbd_lock); + spin_unlock(&blkif->vbd_lock); return -EACCES; } /* - * Now iterate through the list of xen_extents, working out which should + * Now iterate through the list of blkif_extents, working out which should * be used to perform the translation. */ sec_off = pseg->sector_number; nr_secs = pseg->nr_sects; for ( x = vbd->extents; x != NULL; x = x->next ) { - if ( sec_off < x->extent.nr_sectors ) + if ( sec_off < x->extent.sector_length ) { pseg->dev = x->extent.device; - pseg->sector_number = x->extent.start_sector + sec_off; - if ( unlikely((sec_off + nr_secs) > x->extent.nr_sectors) ) + pseg->sector_number = x->extent.sector_start + sec_off; + if ( unlikely((sec_off + nr_secs) > x->extent.sector_length) ) goto overrun; spin_unlock(&p->vbd_lock); return 1; } - sec_off -= x->extent.nr_sectors; + sec_off -= x->extent.sector_length; } DPRINTK("vbd_translate: end of vbd.\n"); - spin_unlock(&p->vbd_lock); + spin_unlock(&blkif->vbd_lock); return -EACCES; /* @@ -554,7 +412,7 @@ int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation) overrun: /* Adjust length of first chunk to run to end of first extent. */ - pseg[0].nr_sects = x->extent.nr_sectors - sec_off; + pseg[0].nr_sects = x->extent.sector_length - sec_off; /* Set second chunk buffer and length to start where first chunk ended. */ pseg[1].buffer = pseg[0].buffer + (pseg[0].nr_sects << 9); @@ -562,7 +420,7 @@ int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation) /* Now move to the next extent. Check it exists and is long enough! */ if ( unlikely((x = x->next) == NULL) || - unlikely(x->extent.nr_sectors < pseg[1].nr_sects) ) + unlikely(x->extent.sector_length < pseg[1].nr_sects) ) { DPRINTK("vbd_translate: multiple overruns or end of vbd.\n"); spin_unlock(&p->vbd_lock); @@ -571,8 +429,8 @@ int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation) /* Store the real device and start sector for the second chunk. */ pseg[1].dev = x->extent.device; - pseg[1].sector_number = x->extent.start_sector; + pseg[1].sector_number = x->extent.sector_start; - spin_unlock(&p->vbd_lock); + spin_unlock(&blkif->vbd_lock); return 2; } diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h index f6e8a4d5c8..5db2b48a51 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h @@ -16,24 +16,27 @@ #define BLKIF_OP_WRITE 1 #define BLKIF_OP_PROBE 2 -/* NB. Ring size must be small enough for sizeof(blk_ring_t) <= PAGE_SIZE. */ +/* NB. Ring size must be small enough for sizeof(blkif_ring_t) <= PAGE_SIZE. */ #define BLKIF_RING_SIZE 64 /* * Maximum scatter/gather segments per request. - * This is carefully chosen so that sizeof(blk_ring_t) <= PAGE_SIZE. + * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE. * NB. This could be 12 if the ring indexes weren't stored in the same page. */ -#define BLKIF_REQUEST_MAX_SEGMENTS 11 +#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 + +#define BLKIF_MAX_SECTORS_PER_SEGMENT 16 typedef struct { unsigned char operation; /* BLKIF_OP_??? */ - unsigned char nr_segments; /* number of segments (<= MAX_BLK_SEGS) */ + unsigned char nr_segments; /* number of segments */ blkif_vdev_t device; /* only for read/write requests */ unsigned long id; /* private guest value, echoed in resp */ - xen_sector_t sector_number; /* start sector idx on disk (r/w only) */ - /* Least 9 bits is 'nr_sects'. High 23 bits is the address. */ - unsigned long buffer_and_sects[MAX_BLK_SEGS]; + blkif_sector_t sector_number; /* start sector idx on disk (r/w only) */ + /* Least 9 bits is 'nr_sects'. High 23 bits is the address. */ + /* We must have '0 <= nr_sects <= BLKIF_MAX_SECTORS_PER_SEGMENT'. */ + unsigned long buffer_and_sects[BLKIF_MAX_SEGMENTS_PER_REQUEST]; } blkif_request_t; typedef struct { @@ -59,8 +62,8 @@ typedef unsigned int BLKIF_RING_IDX; #define MASK_BLKIF_IDX(_i) ((_i)&(BLKIF_RING_SIZE-1)) typedef struct { - BLKIF_RING_IDX req_prod; /* Request producer. Updated by guest OS. */ - BLKIF_RING_IDX resp_prod; /* Response producer. Updated by Xen. */ + BLKIF_RING_IDX req_prod; /* Request producer. Updated by front-end. */ + BLKIF_RING_IDX resp_prod; /* Response producer. Updated by back-end. */ union { blkif_request_t req; blkif_response_t resp; @@ -103,7 +106,7 @@ typedef struct { typedef struct { blkif_vdev_t device; /* Device number (opaque 16 bit value). */ unsigned short info; /* Device type and flags (VDISK_*). */ - xen_sector_t capacity; /* Size in terms of 512-byte sectors. */ + blkif_sector_t capacity; /* Size in terms of 512-byte sectors. */ } vdisk_t; #endif /* __SHARED_BLKIF_H__ */ diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile index 35986ca54a..b0d27cf698 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile @@ -1,3 +1,3 @@ O_TARGET := drv.o -obj-y := block.o vbd.o +obj-y := main.o vbd.o include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/common.h index e41e03970e..2d4415bdef 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/common.h @@ -1,11 +1,11 @@ /****************************************************************************** - * block.h + * arch/xen/drivers/blkif/frontend/common.h * * Shared definitions between all levels of XenoLinux Virtual block devices. */ -#ifndef __XEN_DRIVERS_BLOCK_H__ -#define __XEN_DRIVERS_BLOCK_H__ +#ifndef __XEN_DRIVERS_COMMON_H__ +#define __XEN_DRIVERS_COMMON_H__ #include <linux/config.h> #include <linux/module.h> @@ -27,6 +27,8 @@ #include <asm/atomic.h> #include <asm/uaccess.h> +#include "../blkif.h" + #if 0 #define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a ) #else @@ -52,14 +54,14 @@ typedef struct xl_disk { int usage; } xl_disk_t; -extern int xen_control_msg(int operration, char *buffer, int size); -extern int xen_block_open(struct inode *inode, struct file *filep); -extern int xen_block_release(struct inode *inode, struct file *filep); -extern int xen_block_ioctl(struct inode *inode, struct file *filep, +extern int blkif_open(struct inode *inode, struct file *filep); +extern int blkif_release(struct inode *inode, struct file *filep); +extern int blkif_ioctl(struct inode *inode, struct file *filep, unsigned command, unsigned long argument); -extern int xen_block_check(kdev_t dev); -extern int xen_block_revalidate(kdev_t dev); -extern void do_xlblk_request (request_queue_t *rq); +extern int blkif_check(kdev_t dev); +extern int blkif_revalidate(kdev_t dev); +extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp); +extern void do_blkif_request (request_queue_t *rq); extern void xlvbd_update_vbds(void); @@ -79,4 +81,4 @@ static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev) extern int xlvbd_init(void); extern void xlvbd_cleanup(void); -#endif /* __XEN_DRIVERS_BLOCK_H__ */ +#endif /* __XEN_DRIVERS_COMMON_H__ */ diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c index d00dd98f7b..b0c524f390 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c @@ -1,5 +1,5 @@ /****************************************************************************** - * block.c + * arch/xen/drivers/blkif/frontend/main.c * * Xenolinux virtual block-device driver. * @@ -7,32 +7,35 @@ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge */ -#include "block.h" +#include "common.h" #include <linux/blk.h> #include <linux/cdrom.h> #include <linux/tqueue.h> #include <linux/sched.h> -#include <scsi/scsi.h> - #include <linux/interrupt.h> +#include <scsi/scsi.h> +#include <asm/ctrl_if.h> typedef unsigned char byte; /* from linux/ide.h */ -#define STATE_ACTIVE 0 -#define STATE_SUSPENDED 1 -#define STATE_CLOSED 2 -static unsigned int state = STATE_SUSPENDED; +#define BLKIF_STATE_CLOSED 0 +#define BLKIF_STATE_DOWN 1 +#define BLKIF_STATE_UP 2 +static unsigned int blkif_state = BLKIF_STATE_CLOSED; +static unsigned int blkif_evtchn, blkif_irq; -/* Dynamically-mapped IRQs. */ -static int xlblk_response_irq, xlblk_update_irq; +static struct tq_struct blkif_statechange_tq; -static blk_ring_t *blk_ring; +static int blkif_control_rsp_valid; +static blkif_response_t blkif_control_rsp; + +static blkif_ring_t *blk_ring; static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */ static BLK_RING_IDX req_prod; /* Private request producer. */ /* We plug the I/O ring if the driver is suspended or if the ring is full. */ #define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \ - (state != STATE_ACTIVE)) + (blkif_state != BLKIF_STATE_UP)) /* @@ -49,39 +52,27 @@ static int sg_operation = -1; static unsigned long sg_next_sect; #define DISABLE_SCATTERGATHER() (sg_operation = -1) -static inline void signal_requests_to_xen(void) +static inline void flush_requests(void) { - block_io_op_t op; - DISABLE_SCATTERGATHER(); blk_ring->req_prod = req_prod; - - op.cmd = BLOCK_IO_OP_SIGNAL; - HYPERVISOR_block_io_op(&op); - return; + notify_via_evtchn(blkif_evtchn); } /* - * xlblk_update_int/update-vbds_task - handle VBD update events from Xen - * - * Schedule a task for keventd to run, which will update the VBDs and perform - * the corresponding updates to our view of VBD state, so the XenoLinux will - * respond to changes / additions / deletions to the set of VBDs automatically. + * blkif_update_int/update-vbds_task - handle VBD update events. + * Schedule a task for keventd to run, which will update the VBDs and perform + * the corresponding updates to our view of VBD state. */ static struct tq_struct update_tq; static void update_vbds_task(void *unused) { xlvbd_update_vbds(); } -static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs) -{ - update_tq.routine = update_vbds_task; - schedule_task(&update_tq); -} -int xen_block_open(struct inode *inode, struct file *filep) +int blkif_open(struct inode *inode, struct file *filep) { short xldev = inode->i_rdev; struct gendisk *gd = get_gendisk(xldev); @@ -122,7 +113,7 @@ int xen_block_open(struct inode *inode, struct file *filep) } -int xen_block_release(struct inode *inode, struct file *filep) +int blkif_release(struct inode *inode, struct file *filep) { xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); @@ -140,7 +131,7 @@ int xen_block_release(struct inode *inode, struct file *filep) } -int xen_block_ioctl(struct inode *inode, struct file *filep, +int blkif_ioctl(struct inode *inode, struct file *filep, unsigned command, unsigned long argument) { kdev_t dev = inode->i_rdev; @@ -170,7 +161,7 @@ int xen_block_ioctl(struct inode *inode, struct file *filep, case BLKRRPART: /* re-read partition table */ DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART); - return xen_block_revalidate(dev); + return blkif_revalidate(dev); case BLKSSZGET: return hardsect_size[MAJOR(dev)][MINOR(dev)]; @@ -218,11 +209,11 @@ int xen_block_ioctl(struct inode *inode, struct file *filep, return 0; case SCSI_IOCTL_GET_BUS_NUMBER: - DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in Xen blkdev"); + DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in XL blkif"); return -ENOSYS; default: - printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", command); + printk(KERN_ALERT "ioctl %08x not supported by XL blkif\n", command); return -ENOSYS; } @@ -230,13 +221,13 @@ int xen_block_ioctl(struct inode *inode, struct file *filep, } /* check media change: should probably do something here in some cases :-) */ -int xen_block_check(kdev_t dev) +int blkif_check(kdev_t dev) { - DPRINTK("xen_block_check\n"); + DPRINTK("blkif_check\n"); return 0; } -int xen_block_revalidate(kdev_t dev) +int blkif_revalidate(kdev_t dev) { struct block_device *bd; struct gendisk *gd; @@ -289,25 +280,25 @@ int xen_block_revalidate(kdev_t dev) /* - * hypervisor_request + * blkif_queue_request * * request block io * * id: for guest use only. - * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*} + * operation: BLKIF_OP_{READ,WRITE,PROBE} * buffer: buffer to read/write into. this should be a * virtual address in the guest os. */ -static int hypervisor_request(unsigned long id, - int operation, - char * buffer, - unsigned long sector_number, - unsigned short nr_sectors, - kdev_t device) +static int blkif_queue_request(unsigned long id, + int operation, + char * buffer, + unsigned long sector_number, + unsigned short nr_sectors, + kdev_t device) { - unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); - struct gendisk *gd; - blk_ring_req_entry_t *req; + unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); + struct gendisk *gd; + blkif_request_t *req; struct buffer_head *bh; if ( unlikely(nr_sectors >= (1<<9)) ) @@ -315,26 +306,26 @@ static int hypervisor_request(unsigned long id, if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) ) BUG(); - if ( unlikely(state == STATE_CLOSED) ) + if ( unlikely(blkif_state != BLKIF_STATE_UP) ) return 1; switch ( operation ) { - case XEN_BLOCK_READ: - case XEN_BLOCK_WRITE: + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: gd = get_gendisk(device); /* * Update the sector_number we'll pass down as appropriate; note that * we could sanity check that resulting sector will be in this - * partition, but this will happen in xen anyhow. + * partition, but this will happen in driver backend anyhow. */ sector_number += gd->part[MINOR(device)].start_sect; /* - * If this unit doesn't consist of virtual (i.e., Xen-specified) - * partitions then we clear the partn bits from the device number. + * If this unit doesn't consist of virtual partitions then we clear + * the partn bits from the device number. */ if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS) ) @@ -375,7 +366,7 @@ static int hypervisor_request(unsigned long id, req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req; req->id = id; req->operation = operation; - req->sector_number = (xen_sector_t)sector_number; + req->sector_number = (blkif_sector_t)sector_number; req->device = device; req->nr_segments = 1; req->buffer_and_sects[0] = buffer_ma | nr_sectors; @@ -386,23 +377,23 @@ static int hypervisor_request(unsigned long id, /* - * do_xlblk_request + * do_blkif_request * read a block; request is in a request queue */ -void do_xlblk_request(request_queue_t *rq) +void do_blkif_request(request_queue_t *rq) { struct request *req; struct buffer_head *bh, *next_bh; int rw, nsect, full, queued = 0; - DPRINTK("xlblk.c::do_xlblk_request\n"); + DPRINTK("Entered do_blkif_request\n"); while ( !rq->plugged && !list_empty(&rq->queue_head)) { if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) goto out; - DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", + DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", req, req->cmd, req->sector, req->current_nr_sectors, req->nr_sectors, req->bh); @@ -420,9 +411,9 @@ void do_xlblk_request(request_queue_t *rq) next_bh = bh->b_reqnext; bh->b_reqnext = NULL; - full = hypervisor_request( + full = blkif_queue_request( (unsigned long)bh, - (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, + (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE, bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev); if ( full ) @@ -462,7 +453,8 @@ void do_xlblk_request(request_queue_t *rq) } out: - if ( queued != 0 ) signal_requests_to_xen(); + if ( queued != 0 ) + flush_requests(); } @@ -474,30 +466,30 @@ static void kick_pending_request_queues(void) { /* Attempt to drain the queue, but bail if the ring becomes full. */ while ( (nr_pending != 0) && !RING_PLUGGED ) - do_xlblk_request(pending_queues[--nr_pending]); + do_blkif_request(pending_queues[--nr_pending]); } } -static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs) +static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) { BLK_RING_IDX i; unsigned long flags; struct buffer_head *bh, *next_bh; - if ( unlikely(state == STATE_CLOSED) ) + if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) ) return; spin_lock_irqsave(&io_request_lock, flags); for ( i = resp_cons; i != blk_ring->resp_prod; i++ ) { - blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp; + blkif_response_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp; switch ( bret->operation ) { - case XEN_BLOCK_READ: - case XEN_BLOCK_WRITE: - if ( unlikely(bret->status != 0) ) + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) DPRINTK("Bad return from blkdev data request: %lx\n", bret->status); for ( bh = (struct buffer_head *)bret->id; @@ -509,7 +501,10 @@ static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs) bh->b_end_io(bh, !bret->status); } break; - + case BLKIF_OP_PROBE: + memcpy(&blkif_control_rsp, bret, sizeof(*bret)); + blkif_control_rsp_valid = 1; + break; default: BUG(); } @@ -523,70 +518,167 @@ static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs) } -static void reset_xlblk_interface(void) +void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp) { - block_io_op_t op; + unsigned long flags; - nr_pending = 0; + retry: + while ( (req_prod - resp_cons) == BLK_RING_SIZE ) + { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(1); + } - op.cmd = BLOCK_IO_OP_RESET; - if ( HYPERVISOR_block_io_op(&op) != 0 ) - printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n"); + spin_lock_irqsave(&io_request_lock, flags); + if ( (req_prod - resp_cons) == BLK_RING_SIZE ) + { + spin_unlock_irqrestore(&io_request_lock, flags); + goto retry; + } - op.cmd = BLOCK_IO_OP_RING_ADDRESS; - (void)HYPERVISOR_block_io_op(&op); + DISABLE_SCATTERGATHER(); + memcpy(&blk_ring->ring[MASK_BLK_IDX(req_prod)].req, req, sizeof(*req)); + req_prod++; + flush_requests(); - set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT); - blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE); - blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0; + spin_unlock_irqrestore(&io_request_lock, flags); + + while ( !blkif_control_rsp_valid ) + { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(1); + } - wmb(); - state = STATE_ACTIVE; + memcpy(rsp, &blkif_control_rsp, sizeof(*rsp)); + blkif_control_rsp_valid = 0; } -int __init xlblk_init(void) +static void blkif_bringup_phase1(void *unused) { - int error; + ctrl_msg_t cmsg; + blkif_fe_interface_up_t up; - reset_xlblk_interface(); + /* Move from CLOSED to DOWN state. */ + blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL); + blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0; + blkif_state = BLKIF_STATE_DOWN; + + /* Construct an interface-UP message for the domain controller. */ + cmsg.type = CMSG_BLKIF_FE; + cmsg.subtype = CMSG_BLKIF_FE_INTERFACE_UP; + cmsg.length = sizeof(blkif_fe_interface_up_t); + up.handle = 0; + up.shmem_frame = virt_to_machine(blk_ring) >> PAGE_SHIFT; + memcpy(cmsg.msg, &up, sizeof(up)); + + /* Tell the controller to bring up the interface. */ + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); +} + +static void blkif_bringup_phase2(void *unused) +{ + /* Move from DOWN to UP state. */ + blkif_irq = bind_evtchn_to_irq(blkif_evtchn); + (void)request_irq(blkif_irq, blkif_int, 0, "blkif", NULL); + blkif_state = BLKIF_STATE_UP; - xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV); - xlblk_update_irq = bind_virq_to_irq(VIRQ_VBD_UPD); + /* Probe for discs that are attached to the interface. */ + xlvbd_init(); - error = request_irq(xlblk_response_irq, xlblk_response_int, - SA_SAMPLE_RANDOM, "blkdev", NULL); - if ( error ) + /* Kick pending requests. */ + spin_lock_irq(&io_request_lock); + kick_pending_request_queues(); + spin_unlock_irq(&io_request_lock); +} + +static void blkif_status_change(blkif_fe_interface_status_changed_t *status) +{ + if ( status->handle != 0 ) { - printk(KERN_ALERT "Could not allocate receive interrupt\n"); - goto fail; + printk(KERN_WARNING "Status change on unsupported blkif %d\n", + status->handle); + return; } - error = request_irq(xlblk_update_irq, xlblk_update_int, - 0, "blkdev", NULL); + switch ( status->status ) + { + case BLKIF_INTERFACE_STATUS_DESTROYED: + printk(KERN_WARNING "Unexpected blkif-DESTROYED message in state %d\n", + blkif_state); + break; - if ( error ) + case BLKIF_INTERFACE_STATUS_DOWN: + if ( blkif_state != BLKIF_STATE_CLOSED ) + { + printk(KERN_WARNING "Unexpected blkif-DOWN message in state %d\n", + blkif_state); + break; + } + blkif_statechange_tq.routine = blkif_bringup_phase1; + schedule_task(&blkif_statechange_tq); + break; + + case BLKIF_INTERFACE_STATUS_UP: + if ( blkif_state == BLKIF_STATE_CLOSED ) + { + printk(KERN_WARNING "Unexpected blkif-UP message in state %d\n", + blkif_state); + break; + } + blkif_evtchn = status->evtchn; + blkif_statechange_tq.routine = blkif_bringup_phase2; + schedule_task(&blkif_statechange_tq); + break; + + default: + printk(KERN_WARNING "Status change to unknown value %d\n", + status->status); + break; + } +} + + +static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) +{ + switch ( msg->subtype ) { - printk(KERN_ALERT "Could not allocate block update interrupt\n"); - goto fail; + case CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED: + if ( msg->length != sizeof(blkif_fe_interface_status_changed_t) ) + goto parse_error; + blkif_status_change((blkif_fe_interface_status_changed_t *) + &msg->msg[0]); + break; +#if 0 + case CMSG_BLKIF_FE_VBD_STATUS_CHANGED: + update_tq.routine = update_vbds_task; + schedule_task(&update_tq); + break; +#endif + default: + goto parse_error; } - (void)xlvbd_init(); + ctrl_if_send_response(msg); + return; + + parse_error: + msg->length = 0; + ctrl_if_send_response(msg); +} - return 0; - fail: - return error; +int __init xlblk_init(void) +{ + (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx); + return 0; } static void __exit xlblk_cleanup(void) { - xlvbd_cleanup(); - free_irq(xlblk_response_irq, NULL); - free_irq(xlblk_update_irq, NULL); - unbind_virq_from_irq(VIRQ_BLKDEV); - unbind_virq_from_irq(VIRQ_VBD_UPD); + /* XXX FIXME */ + BUG(); } @@ -598,28 +690,13 @@ module_exit(xlblk_cleanup); void blkdev_suspend(void) { - state = STATE_SUSPENDED; - wmb(); - - while ( resp_cons != blk_ring->req_prod ) - { - barrier(); - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(1); - } - - wmb(); - state = STATE_CLOSED; - wmb(); - - clear_fixmap(FIX_BLKRING_BASE); + /* XXX FIXME */ + BUG(); } void blkdev_resume(void) { - reset_xlblk_interface(); - spin_lock_irq(&io_request_lock); - kick_pending_request_queues(); - spin_unlock_irq(&io_request_lock); + /* XXX FIXME */ + BUG(); } diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c index e08b976c56..944bf7eace 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c @@ -1,13 +1,13 @@ /****************************************************************************** - * vbd.c + * arch/xen/drivers/blkif/frontend/vbd.c * - * Xenolinux virtual block-device driver (xvd). + * Xenolinux virtual block-device driver. * * Copyright (c) 2003-2004, Keir Fraser & Steve Hand * Modifications by Mark A. Williamson are (c) Intel Research Cambridge */ -#include "block.h" +#include "common.h" #include <linux/blk.h> /* @@ -43,54 +43,59 @@ static int xlvbd_blksize_size[256]; static int xlvbd_hardsect_size[256]; static int xlvbd_max_sectors[256]; -/* Information from Xen about our VBDs. */ +/* Information about our VBDs. */ #define MAX_VBDS 64 static int nr_vbds; -static xen_disk_t *vbd_info; +static vdisk_t *vbd_info; static struct block_device_operations xlvbd_block_fops = { - open: xen_block_open, - release: xen_block_release, - ioctl: xen_block_ioctl, - check_media_change: xen_block_check, - revalidate: xen_block_revalidate, + open: blkif_open, + release: blkif_release, + ioctl: blkif_ioctl, + check_media_change: blkif_check, + revalidate: blkif_revalidate, }; -static int xlvbd_get_vbd_info(xen_disk_t *disk_info) +static int xlvbd_get_vbd_info(vdisk_t *disk_info) { - int error; - block_io_op_t op; - - /* Probe for disk information. */ - memset(&op, 0, sizeof(op)); - op.cmd = BLOCK_IO_OP_VBD_PROBE; - op.u.probe_params.domain = 0; - op.u.probe_params.xdi.max = MAX_VBDS; - op.u.probe_params.xdi.disks = disk_info; - op.u.probe_params.xdi.count = 0; - - if ( (error = HYPERVISOR_block_io_op(&op)) != 0 ) + vdisk_t *buf = (vdisk_t *)__get_free_page(GFP_KERNEL); + blkif_request_t req; + blkif_response_t rsp; + int nr; + + memset(&req, 0, sizeof(req)); + req.operation = BLKIF_OP_PROBE; + req.nr_segments = 1; + req.buffer_and_sects[0] = virt_to_machine(buf) | (PAGE_SIZE/512); + + blkif_control_send(&req, &rsp); + + if ( rsp.status <= 0 ) { - printk(KERN_ALERT "Could not probe disks (%d)\n", error); + printk(KERN_ALERT "Could not probe disks (%d)\n", rsp.status); return -1; } - return op.u.probe_params.xdi.count; + if ( (nr = rsp.status) > MAX_VBDS ) + nr = MAX_VBDS; + memcpy(disk_info, buf, nr * sizeof(vdisk_t)); + + return nr; } /* * xlvbd_init_device - initialise a VBD device - * @disk: a xen_disk_t describing the VBD + * @disk: a vdisk_t describing the VBD * - * Takes a xen_disk_t * that describes a VBD the domain has access to. + * Takes a vdisk_t * that describes a VBD the domain has access to. * Performs appropriate initialisation and registration of the device. * * Care needs to be taken when making re-entrant calls to ensure that * corruption does not occur. Also, devices that are in use should not have * their details updated. This is the caller's responsibility. */ -static int xlvbd_init_device(xen_disk_t *xd) +static int xlvbd_init_device(vdisk_t *xd) { int device = xd->device; int major = MAJOR(device); @@ -181,11 +186,11 @@ static int xlvbd_init_device(xen_disk_t *xd) read_ahead[major] = 8; } - blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request); + blk_init_queue(BLK_DEFAULT_QUEUE(major), do_blkif_request); /* * Turn off barking 'headactive' mode. We dequeue buffer heads as - * soon as we pass them down to Xen. + * soon as we pass them to the back-end driver. */ blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0); @@ -431,12 +436,12 @@ static int xlvbd_remove_device(int device) void xlvbd_update_vbds(void) { int i, j, k, old_nr, new_nr; - xen_disk_t *old_info, *new_info, *merged_info; + vdisk_t *old_info, *new_info, *merged_info; old_info = vbd_info; old_nr = nr_vbds; - new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL); + new_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL); if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 ) { kfree(new_info); @@ -448,7 +453,7 @@ void xlvbd_update_vbds(void) * old list and new list do not overlap at all, and we cannot yet destroy * VBDs in the old list because the usage counts are busy. */ - merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL); + merged_info = kmalloc((old_nr + new_nr) * sizeof(vdisk_t), GFP_KERNEL); /* @i tracks old list; @j tracks new list; @k tracks merged list. */ i = j = k = 0; @@ -458,13 +463,13 @@ void xlvbd_update_vbds(void) if ( old_info[i].device < new_info[j].device ) { if ( xlvbd_remove_device(old_info[i].device) != 0 ) - memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); + memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t)); i++; } else if ( old_info[i].device > new_info[j].device ) { if ( xlvbd_init_device(&new_info[j]) == 0 ) - memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); + memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t)); j++; } else @@ -472,9 +477,9 @@ void xlvbd_update_vbds(void) if ( ((old_info[i].capacity == new_info[j].capacity) && (old_info[i].info == new_info[j].info)) || (xlvbd_remove_device(old_info[i].device) != 0) ) - memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); + memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t)); else if ( xlvbd_init_device(&new_info[j]) == 0 ) - memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); + memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t)); i++; j++; } } @@ -482,13 +487,13 @@ void xlvbd_update_vbds(void) for ( ; i < old_nr; i++ ) { if ( xlvbd_remove_device(old_info[i].device) != 0 ) - memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); + memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t)); } for ( ; j < new_nr; j++ ) { if ( xlvbd_init_device(&new_info[j]) == 0 ) - memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); + memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t)); } vbd_info = merged_info; @@ -500,12 +505,12 @@ void xlvbd_update_vbds(void) /* - * Set up all the linux device goop for the virtual block devices (vbd's) that - * xen tells us about. Note that although from xen's pov VBDs are addressed - * simply an opaque 16-bit device number, the domain creation tools + * Set up all the linux device goop for the virtual block devices (vbd's) that + * we know about. Note that although from the backend driver's p.o.v. VBDs are + * addressed simply an opaque 16-bit device number, the domain creation tools * conventionally allocate these numbers to correspond to those used by 'real' * linux -- this is just for convenience as it means e.g. that the same - * /etc/fstab can be used when booting with or without xen. + * /etc/fstab can be used when booting with or without Xen. */ int __init xlvbd_init(void) { @@ -537,7 +542,7 @@ int __init xlvbd_init(void) xlvbd_max_sectors[i] = 128; } - vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL); + vbd_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL); nr_vbds = xlvbd_get_vbd_info(vbd_info); if ( nr_vbds < 0 ) diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c index b59f3e8a84..e6fc3aed05 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c @@ -36,7 +36,7 @@ static struct proc_dir_entry *privcmd_intf; static int privcmd_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long data) { - int ret = 0; + int ret = -ENOSYS; switch ( cmd ) { @@ -62,10 +62,14 @@ static int privcmd_ioctl(struct inode *inode, struct file *file, } break; - default: - ret = -EINVAL; - break; - } + case IOCTL_PRIVCMD_INITDOMAIN_EVTCHN: + { + extern int initdom_ctrlif_domcontroller_port; + ret = initdom_ctrlif_domcontroller_port; + } + break; + } + return ret; } @@ -85,7 +89,7 @@ static int __init init_module(void) { privcmd_intf->owner = THIS_MODULE; privcmd_intf->nlink = 1; - privcmd_intf->proc_fops = &privcmd_file_ops; + privcmd_intf->proc_fops = &privcmd_file_ops; } return 0; diff --git a/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c b/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c index 7d59ad2e16..715f707eb0 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c +++ b/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c @@ -17,6 +17,13 @@ #include <asm/ctrl_if.h> #include <asm/evtchn.h> +/* + * Only used by initial domain which must create its own control-interface + * event channel. This value is picked up by the user-space domain controller + * via an ioctl. + */ +int initdom_ctrlif_domcontroller_port = -1; + static int ctrl_if_evtchn; static int ctrl_if_irq; static spinlock_t ctrl_if_lock; @@ -276,9 +283,6 @@ void ctrl_if_unregister_receiver(u8 type, ctrl_msg_handler_t hnd) void ctrl_if_suspend(void) { - if ( start_info.flags & SIF_INITDOMAIN ) - return; - free_irq(ctrl_if_irq, NULL); unbind_evtchn_from_irq(ctrl_if_evtchn); } @@ -286,7 +290,21 @@ void ctrl_if_suspend(void) void ctrl_if_resume(void) { if ( start_info.flags & SIF_INITDOMAIN ) - return; + { + /* + * The initial domain must create its own domain-controller link. + * The controller is probably not running at this point, but will + * pick up its end of the event channel from + */ + evtchn_op_t op; + op.cmd = EVTCHNOP_bind_interdomain; + op.u.bind_interdomain.dom1 = DOMID_SELF; + op.u.bind_interdomain.dom2 = DOMID_SELF; + if ( HYPERVISOR_event_channel_op(&op) != 0 ) + BUG(); + start_info.domain_controller_evtchn = op.u.bind_interdomain.port1; + initdom_ctrlif_domcontroller_port = op.u.bind_interdomain.port2; + } ctrl_if_tx_resp_cons = 0; ctrl_if_rx_req_cons = 0; diff --git a/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c b/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c index 665357d4bc..4eeac0c4dd 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c +++ b/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c @@ -5,7 +5,7 @@ * * (C) Copyright 1995 1996 Linus Torvalds * - * Modifications for Xenolinux (c) 2003 Keir Fraser + * Modifications for Xenolinux (c) 2003-2004 Keir Fraser */ #include <linux/slab.h> @@ -27,19 +27,27 @@ #define direct_mk_pte_phys(physpage, pgprot) \ __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot) -static inline void direct_remap_area_pte(pte_t *pte, - unsigned long address, - unsigned long size, - unsigned long machine_addr, - pgprot_t prot, - domid_t domid) +static inline int direct_remap_area_pte(pte_t *pte, + unsigned long address, + unsigned long size, + unsigned long machine_addr, + pgprot_t prot, + domid_t domid) { unsigned long end; +#define MAX_DIRECTMAP_MMU_QUEUE 64 + mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v; - mmu_update_t *u, *v; - u = v = vmalloc(3*PAGE_SIZE); /* plenty */ + address &= ~PMD_MASK; + end = address + size; + if (end > PMD_SIZE) + end = PMD_SIZE; + if (address >= end) + BUG(); + reset_buffer: /* If not I/O mapping then specify General-Purpose Subject Domain (GPS). */ + v = &u[0]; if ( domid != 0 ) { v[0].val = (unsigned long)(domid<<16) & ~0xFFFFUL; @@ -53,12 +61,6 @@ static inline void direct_remap_area_pte(pte_t *pte, v += 2; } - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - if (address >= end) - BUG(); do { if (!pte_none(*pte)) { printk("direct_remap_area_pte: page already exists\n"); @@ -66,16 +68,21 @@ static inline void direct_remap_area_pte(pte_t *pte, } v->ptr = virt_to_machine(pte); v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot) | _PAGE_IO; - v++; + if ( ++v == MAX_DIRECTMAP_MMU_QUEUE ) + { + if ( HYPERVISOR_mmu_update(u, MAX_DIRECTMAP_MMU_QUEUE) < 0 ) + return -EFAULT; + goto reset_buffer; + } address += PAGE_SIZE; machine_addr += PAGE_SIZE; pte++; } while (address && (address < end)); if ( ((v-u) != 0) && (HYPERVISOR_mmu_update(u, v-u) < 0) ) - printk(KERN_WARNING "Failed to ioremap %08lx->%08lx (%08lx)\n", - end-size, end, machine_addr-size); - vfree(u); + return -EFAULT; + + return 0; } static inline int direct_remap_area_pmd(struct mm_struct *mm, @@ -86,6 +93,7 @@ static inline int direct_remap_area_pmd(struct mm_struct *mm, pgprot_t prot, domid_t domid) { + int error = 0; unsigned long end; address &= ~PGDIR_MASK; @@ -99,12 +107,14 @@ static inline int direct_remap_area_pmd(struct mm_struct *mm, pte_t * pte = pte_alloc(mm, pmd, address); if (!pte) return -ENOMEM; - direct_remap_area_pte(pte, address, end - address, - address + machine_addr, prot, domid); + error = direct_remap_area_pte(pte, address, end - address, + address + machine_addr, prot, domid); + if ( error ) + break; address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); - return 0; + return error; } int direct_remap_area_pages(struct mm_struct *mm, diff --git a/xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h b/xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h index f1d2b77c2e..a02e2471ea 100644 --- a/xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h +++ b/xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h @@ -52,7 +52,7 @@ int ctrl_if_send_message_noblock( * function returns. * 2. If @hnd is NULL then no callback is executed. */ -int ctrl_if_send_message( +int ctrl_if_send_message_block( ctrl_msg_t *msg, ctrl_msg_handler_t hnd, unsigned long id, diff --git a/xenolinux-2.4.26-sparse/include/asm-xen/pgtable-2level.h b/xenolinux-2.4.26-sparse/include/asm-xen/pgtable-2level.h index c780f644c0..162ba1fbed 100644 --- a/xenolinux-2.4.26-sparse/include/asm-xen/pgtable-2level.h +++ b/xenolinux-2.4.26-sparse/include/asm-xen/pgtable-2level.h @@ -47,6 +47,11 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) return (pmd_t *) dir; } +#define pte_same(a, b) ((a).pte_low == (b).pte_low) +#define pte_page(x) (mem_map+((unsigned long)((pte_val(x) >> PAGE_SHIFT)))) +#define pte_none(x) (!(x).pte_low) +#define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) + /* * A note on implementation of this atomic 'get-and-clear' operation. * This is actually very simple because XenoLinux can only run on a single @@ -59,13 +64,9 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) static inline pte_t ptep_get_and_clear(pte_t *xp) { pte_t pte = *xp; - queue_l1_entry_update(xp, 0); + if ( !pte_none(pte) ) + queue_l1_entry_update(xp, 0); return pte; } -#define pte_same(a, b) ((a).pte_low == (b).pte_low) -#define pte_page(x) (mem_map+((unsigned long)((pte_val(x) >> PAGE_SHIFT)))) -#define pte_none(x) (!(x).pte_low) -#define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) - #endif /* _I386_PGTABLE_2LEVEL_H */ diff --git a/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h b/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h index 4ce2930daa..d359b6eaa7 100644 --- a/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h +++ b/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h @@ -13,16 +13,21 @@ typedef struct privcmd_hypercall unsigned long arg[5]; } privcmd_hypercall_t; -typedef struct privcmd_blkmsg -{ - unsigned long op; - void *buf; - int buf_size; -} privcmd_blkmsg_t; - -#define IOCTL_PRIVCMD_HYPERCALL \ +/* + * @cmd: IOCTL_PRIVCMD_HYPERCALL + * @arg: &privcmd_hypercall_t + * Return: Value returned from execution of the specified hypercall. + */ +#define IOCTL_PRIVCMD_HYPERCALL \ _IOC(_IOC_NONE, 'P', 0, sizeof(privcmd_hypercall_t)) -#define IOCTL_PRIVCMD_BLKMSG \ - _IOC(_IOC_NONE, 'P', 1, sizeof(privcmd_blkmsg_t)) + +/* + * @cmd: IOCTL_PRIVCMD_INITDOMAIN_EVTCHN + * @arg: n/a + * Return: Port associated with domain-controller end of control event channel + * for the initial domain. + */ +#define IOCTL_PRIVCMD_INITDOMAIN_EVTCHN \ + _IOC(_IOC_NONE, 'P', 1, 0) #endif /* __PROC_CMD_H__ */ diff --git a/xenolinux-2.4.26-sparse/mm/vmalloc.c b/xenolinux-2.4.26-sparse/mm/vmalloc.c index 4d583b54a7..b030270b42 100644 --- a/xenolinux-2.4.26-sparse/mm/vmalloc.c +++ b/xenolinux-2.4.26-sparse/mm/vmalloc.c @@ -45,6 +45,10 @@ static inline void free_area_pte(pmd_t * pmd, unsigned long address, unsigned lo continue; if (pte_present(page)) { struct page *ptpage = pte_page(page); +#if defined(CONFIG_XEN_PRIVILEGED_GUEST) + if (pte_io(page)) + continue; +#endif if (VALID_PAGE(ptpage) && (!PageReserved(ptpage))) __free_page(ptpage); continue; @@ -250,11 +254,6 @@ void __vfree(void * addr, int free_area_pages) for (p = &vmlist ; (tmp = *p) ; p = &tmp->next) { if (tmp->addr == addr) { *p = tmp->next; -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - if (tmp->flags & VM_IOREMAP) - zap_page_range(&init_mm, VMALLOC_VMADDR(tmp->addr), tmp->size); - else -#endif if (free_area_pages) vmfree_area_pages(VMALLOC_VMADDR(tmp->addr), tmp->size); write_unlock(&vmlist_lock); |