aboutsummaryrefslogtreecommitdiffstats
path: root/xen-2.4.16/drivers
diff options
context:
space:
mode:
authorkaf24@labyrinth.cl.cam.ac.uk <kaf24@labyrinth.cl.cam.ac.uk>2003-02-21 12:46:51 +0000
committerkaf24@labyrinth.cl.cam.ac.uk <kaf24@labyrinth.cl.cam.ac.uk>2003-02-21 12:46:51 +0000
commitb772c635b961040f38d69fc098cc330eafbaf01f (patch)
tree9250153c71f713d86110fb200e07f6676bcc0dee /xen-2.4.16/drivers
parent28e3860f49e692cee29ea62c3193359367128ac1 (diff)
downloadxen-b772c635b961040f38d69fc098cc330eafbaf01f.tar.gz
xen-b772c635b961040f38d69fc098cc330eafbaf01f.tar.bz2
xen-b772c635b961040f38d69fc098cc330eafbaf01f.zip
bitkeeper revision 1.81 (3e561fbbcTcsPP9qTjC_BVR3zYpKjw)
Many files: Block-device layer all fixed up. Serialisation removed. Should fly! :-)
Diffstat (limited to 'xen-2.4.16/drivers')
-rw-r--r--xen-2.4.16/drivers/block/ll_rw_blk.c17
-rw-r--r--xen-2.4.16/drivers/block/xen_block.c208
2 files changed, 86 insertions, 139 deletions
diff --git a/xen-2.4.16/drivers/block/ll_rw_blk.c b/xen-2.4.16/drivers/block/ll_rw_blk.c
index e644974eab..615b332c4b 100644
--- a/xen-2.4.16/drivers/block/ll_rw_blk.c
+++ b/xen-2.4.16/drivers/block/ll_rw_blk.c
@@ -5,7 +5,6 @@
* Copyright (C) 1994, Karl Keyte: Added support for disk statistics
* Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
* Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
- * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000
*/
/*
@@ -40,14 +39,6 @@
#define DPRINTK(_f, _a...) ((void)0)
#endif
-/* XXX SMH: temporarily we just dive at xen_block completion handler */
-extern void end_block_io_op(struct buffer_head *bh);
-
-static void end_buffer_dummy(struct buffer_head *bh, int uptodate)
-{
- /* do nothing */
-}
-
/* This will die as all synchronous stuff is coming to an end */
#define complete(_r) panic("completion.h stuff may be needed...")
@@ -1036,8 +1027,6 @@ out:
return 0;
end_io:
bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
- /* XXX SMH: do we need this every time? */
- end_block_io_op(bh);
return 0;
}
@@ -1107,8 +1096,6 @@ void generic_make_request (int rw, struct buffer_head * bh)
/* Yecch again */
bh->b_end_io(bh, 0);
- /* XXX SMH */
- end_block_io_op(bh);
return;
}
}
@@ -1238,7 +1225,6 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
/* We have the buffer lock */
atomic_inc(&bh->b_count);
- bh->b_end_io = end_buffer_dummy;
switch(rw) {
case WRITE:
@@ -1258,8 +1244,6 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
BUG();
end_io:
bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
- /* XXX SMH */
- end_block_io_op(bh);
continue;
}
@@ -1313,7 +1297,6 @@ int end_that_request_first (struct request *req, int uptodate, char *name)
req->bh = bh->b_reqnext;
bh->b_reqnext = NULL;
bh->b_end_io(bh, uptodate);
- end_block_io_op(bh);
if ((bh = req->bh) != NULL) {
req->hard_sector += nsect;
req->hard_nr_sectors -= nsect;
diff --git a/xen-2.4.16/drivers/block/xen_block.c b/xen-2.4.16/drivers/block/xen_block.c
index 177dc23bb0..bf70737dab 100644
--- a/xen-2.4.16/drivers/block/xen_block.c
+++ b/xen-2.4.16/drivers/block/xen_block.c
@@ -23,24 +23,26 @@
#define DPRINTK(_f, _a...) ((void)0)
#endif
-typedef struct blk_request
-{
- struct buffer_head *bh;
- void *id;
- struct task_struct *domain;
-} blk_request_t;
-
+/*
+ * These are rather arbitrary. They are fairly large because adjacent
+ * requests pulled from a communication ring are quite likely to end
+ * up being part of the same scatter/gather request at the disc.
+ * It might be a good idea to add scatter/gather support explicitly to
+ * the scatter/gather ring (eg. each request has an array of N pointers);
+ * then these values would better reflect real costs at the disc.
+ */
#define MAX_PENDING_REQS 32
#define BATCH_PER_DOMAIN 8
-static kmem_cache_t *blk_request_cachep;
+static kmem_cache_t *buffer_head_cachep;
static atomic_t nr_pending;
static void io_schedule(unsigned long unused);
-static int do_block_io_op_domain(struct task_struct* task, int max_to_do);
+static int do_block_io_op_domain(struct task_struct *p, int max_to_do);
static void dispatch_rw_block_io(struct task_struct *p, int index);
static void dispatch_probe_block_io(struct task_struct *p, int index);
static void dispatch_debug_block_io(struct task_struct *p, int index);
+static void make_response(struct task_struct *p, void *id, unsigned long st);
/******************************************************************
@@ -104,6 +106,7 @@ static void io_schedule(unsigned long unused)
struct task_struct *p;
struct list_head *ent;
+ /* Queue up a batch of requests. */
while ( (atomic_read(&nr_pending) < MAX_PENDING_REQS) &&
!list_empty(&io_schedule_list) )
{
@@ -113,10 +116,20 @@ static void io_schedule(unsigned long unused)
if ( do_block_io_op_domain(p, BATCH_PER_DOMAIN) )
add_to_blkdev_list_tail(p);
}
+
+ /* Push the batch through to disc. */
+ run_task_queue(&tq_disk);
}
static void maybe_trigger_io_schedule(void)
{
+ /*
+ * Needed so that two processes, who together make the following predicate
+ * true, don't both read stale values and evaluate the predicate
+ * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
+ */
+ smp_mb();
+
if ( (atomic_read(&nr_pending) < (MAX_PENDING_REQS/2)) &&
!list_empty(&io_schedule_list) )
{
@@ -127,51 +140,17 @@ static void maybe_trigger_io_schedule(void)
/******************************************************************
- * COMPLETION CALLBACK -- XXX Hook properly into bh->b_end_io
+ * COMPLETION CALLBACK -- Called as bh->b_end_io()
*/
-void end_block_io_op(struct buffer_head * bh)
+static void end_block_io_op(struct buffer_head *bh, int uptodate)
{
- unsigned long cpu_mask;
- blk_request_t *blk_request = NULL;
- unsigned long flags;
- struct task_struct *p;
- int position = 0;
- blk_ring_t *blk_ring;
-
- DPRINTK("XEN end_block_io_op, bh: %p\n", bh);
-
- if ( (blk_request = (blk_request_t *)bh->b_xen_request) == NULL )
- goto bad_interrupt;
-
atomic_dec(&nr_pending);
-
- p = blk_request->domain;
+ make_response(bh->b_xen_domain, bh->b_xen_id, uptodate ? 0 : 1);
- /* Place on the response ring for the relevant domain. */
- spin_lock_irqsave(&p->blk_ring_lock, flags);
- blk_ring = p->blk_ring_base;
- position = blk_ring->resp_prod;
- blk_ring->resp_ring[position].id = blk_request->id;
- blk_ring->resp_ring[position].status = 0;
- blk_ring->resp_prod = BLK_RESP_RING_INC(blk_ring->resp_prod);
- spin_unlock_irqrestore(&p->blk_ring_lock, flags);
-
- /* Kick the relevant domain. */
- cpu_mask = mark_guest_event(p, _EVENT_BLK_RESP);
- guest_event_notify(cpu_mask);
-
- /* Free state associated with this request. */
- if ( blk_request->bh )
- kfree(blk_request->bh);
- kmem_cache_free(blk_request_cachep, blk_request);
+ kmem_cache_free(buffer_head_cachep, bh);
maybe_trigger_io_schedule();
-
- return;
-
- bad_interrupt:
- panic("Block IO interrupt received for unknown buffer [%p]\n", bh);
}
@@ -193,55 +172,43 @@ long do_block_io_op(void)
* DOWNWARD CALLS -- These interface with the block-device layer proper.
*/
-static int do_block_io_op_domain(struct task_struct* task, int max_to_do)
+static int do_block_io_op_domain(struct task_struct* p, int max_to_do)
{
- blk_ring_t *blk_ring = task->blk_ring_base;
- int loop, more_to_do = 0;
- int resp_ring_ents =
- (blk_ring->resp_prod - blk_ring->resp_cons) & (BLK_RESP_RING_SIZE - 1);
-
- DPRINTK("XEN do_block_io_op %d %d\n",
- blk_ring->req_cons, blk_ring->req_prod);
+ blk_ring_t *blk_ring = p->blk_ring_base;
+ int i, more_to_do = 0;
- for ( loop = blk_ring->req_cons;
- loop != blk_ring->req_prod;
- loop = BLK_REQ_RING_INC(loop) )
+ for ( i = p->blk_req_cons;
+ i != blk_ring->req_prod;
+ i = BLK_RING_INC(i) )
{
- /*
- * Bail if we've reached the batch allowance for thsi interface,
- * or if we risk producing enough responses to overflow the
- * communication ring.
- */
- if ( (max_to_do-- == 0) ||
- ((atomic_read(&nr_pending) + resp_ring_ents) >
- BLK_RESP_RING_MAX_ENTRIES) )
+ if ( max_to_do-- == 0 )
{
more_to_do = 1;
break;
}
- switch ( blk_ring->req_ring[loop].operation )
+ switch ( blk_ring->ring[i].req.operation )
{
case XEN_BLOCK_READ:
case XEN_BLOCK_WRITE:
- dispatch_rw_block_io(task, loop);
+ dispatch_rw_block_io(p, i);
break;
case XEN_BLOCK_PROBE:
- dispatch_probe_block_io(task, loop);
+ dispatch_probe_block_io(p, i);
break;
case XEN_BLOCK_DEBUG:
- dispatch_debug_block_io(task, loop);
+ dispatch_debug_block_io(p, i);
break;
default:
panic("error: unknown block io operation [%d]\n",
- blk_ring->req_ring[loop].operation);
+ blk_ring->ring[i].req.operation);
}
}
- blk_ring->req_cons = loop;
+ p->blk_req_cons = i;
return more_to_do;
}
@@ -255,20 +222,11 @@ static void dispatch_probe_block_io(struct task_struct *p, int index)
extern void ide_probe_devices(xen_disk_info_t *xdi);
blk_ring_t *blk_ring = p->blk_ring_base;
xen_disk_info_t *xdi;
- unsigned long flags, cpu_mask;
-
- xdi = phys_to_virt((unsigned long)blk_ring->req_ring[index].buffer);
-
- ide_probe_devices(xdi);
- spin_lock_irqsave(&p->blk_ring_lock, flags);
- blk_ring->resp_ring[blk_ring->resp_prod].id = blk_ring->req_ring[index].id;
- blk_ring->resp_ring[blk_ring->resp_prod].status = 0;
- blk_ring->resp_prod = BLK_RESP_RING_INC(blk_ring->resp_prod);
- spin_unlock_irqrestore(&p->blk_ring_lock, flags);
+ xdi = phys_to_virt((unsigned long)blk_ring->ring[index].req.buffer);
+ ide_probe_devices(xdi);
- cpu_mask = mark_guest_event(p, _EVENT_BLK_RESP);
- guest_event_notify(cpu_mask);
+ make_response(p, blk_ring->ring[index].req.id, 0);
}
static void dispatch_rw_block_io(struct task_struct *p, int index)
@@ -276,49 +234,45 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
blk_ring_t *blk_ring = p->blk_ring_base;
struct buffer_head *bh;
- struct request_queue *rq;
int operation;
- blk_request_t *blk_request;
/*
* check to make sure that the block request seems at least
* a bit legitimate
*/
- if ( (blk_ring->req_ring[index].block_size & (0x200 - 1)) != 0 )
+ if ( (blk_ring->ring[index].req.block_size & (0x200 - 1)) != 0 )
panic("error: dodgy block size: %d\n",
- blk_ring->req_ring[index].block_size);
+ blk_ring->ring[index].req.block_size);
- if ( blk_ring->req_ring[index].buffer == NULL )
+ if ( blk_ring->ring[index].req.buffer == NULL )
panic("xen_block: bogus buffer from guestOS\n");
- DPRINTK("req_cons: %d req_prod %d index: %d, op: %s, pri: %s\n",
- blk_ring->req_cons, blk_ring->req_prod, index,
- (blk_ring->req_ring[index].operation == XEN_BLOCK_READ ?
- "read" : "write"),
- (blk_ring->req_ring[index].priority == XEN_BLOCK_SYNC ?
- "sync" : "async"));
+ DPRINTK("req_cons: %d req_prod %d index: %d, op: %s\n",
+ p->blk_req_cons, blk_ring->req_prod, index,
+ (blk_ring->ring[index].req.operation == XEN_BLOCK_READ ?
+ "read" : "write"));
atomic_inc(&nr_pending);
- blk_request = kmem_cache_alloc(blk_request_cachep, GFP_ATOMIC);
-
- /* we'll be doing this frequently, would a cache be appropriate? */
- bh = (struct buffer_head *) kmalloc(sizeof(struct buffer_head),
- GFP_KERNEL);
+ bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
if ( bh == NULL ) panic("bh is null\n");
/* set just the important bits of the buffer header */
memset (bh, 0, sizeof (struct buffer_head));
- bh->b_blocknr = blk_ring->req_ring[index].block_number;
- bh->b_size = blk_ring->req_ring[index].block_size;
- bh->b_dev = blk_ring->req_ring[index].device;
- bh->b_rsector = blk_ring->req_ring[index].sector_number;
+ bh->b_blocknr = blk_ring->ring[index].req.block_number;
+ bh->b_size = blk_ring->ring[index].req.block_size;
+ bh->b_dev = blk_ring->ring[index].req.device;
+ bh->b_rsector = blk_ring->ring[index].req.sector_number;
bh->b_data = phys_to_virt((unsigned long)
- blk_ring->req_ring[index].buffer);
+ blk_ring->ring[index].req.buffer);
bh->b_count.counter = 1;
- bh->b_xen_request = (void *)blk_request;
-
- if ( blk_ring->req_ring[index].operation == XEN_BLOCK_WRITE )
+ bh->b_end_io = end_block_io_op;
+
+ /* Save meta data about request. */
+ bh->b_xen_domain = p;
+ bh->b_xen_id = blk_ring->ring[index].req.id;
+
+ if ( blk_ring->ring[index].req.operation == XEN_BLOCK_WRITE )
{
bh->b_state = ((1 << BH_JBD) | (1 << BH_Mapped) | (1 << BH_Req) |
(1 << BH_Dirty) | (1 << BH_Uptodate));
@@ -330,15 +284,8 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
operation = READ;
}
- /* save meta data about request */
- blk_request->id = blk_ring->req_ring[index].id;
- blk_request->bh = bh;
- blk_request->domain = p;
-
- /* dispatch single block request */
- ll_rw_block(operation, 1, &bh); /* linux top half */
- rq = blk_get_queue(bh->b_rdev);
- generic_unplug_device(rq); /* linux bottom half */
+ /* Dispatch a single request. We'll flush it to disc later. */
+ ll_rw_block(operation, 1, &bh);
}
@@ -347,6 +294,26 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
* MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
*/
+static void make_response(struct task_struct *p, void *id, unsigned long st)
+{
+ unsigned long cpu_mask, flags;
+ int position;
+ blk_ring_t *blk_ring;
+
+ /* Place on the response ring for the relevant domain. */
+ spin_lock_irqsave(&p->blk_ring_lock, flags);
+ blk_ring = p->blk_ring_base;
+ position = blk_ring->resp_prod;
+ blk_ring->ring[position].resp.id = id;
+ blk_ring->ring[position].resp.status = st;
+ blk_ring->resp_prod = BLK_RING_INC(position);
+ spin_unlock_irqrestore(&p->blk_ring_lock, flags);
+
+ /* Kick the relevant domain. */
+ cpu_mask = mark_guest_event(p, _EVENT_BLK_RESP);
+ guest_event_notify(cpu_mask);
+}
+
static void dump_blockq(u_char key, void *dev_id, struct pt_regs *regs)
{
printk("Dumping block queue stats: nr_pending = %d\n",
@@ -378,12 +345,9 @@ void initialize_block_io ()
spin_lock_init(&io_schedule_list_lock);
INIT_LIST_HEAD(&io_schedule_list);
- blk_request_cachep = kmem_cache_create(
- "blk_request_cache", sizeof(blk_request_t),
+ buffer_head_cachep = kmem_cache_create(
+ "buffer_head_cache", sizeof(struct buffer_head),
0, SLAB_HWCACHE_ALIGN, NULL, NULL);
add_key_handler('b', dump_blockq, "dump xen ide blkdev stats");
}
-
-
-