diff options
author | kaf24@labyrinth.cl.cam.ac.uk <kaf24@labyrinth.cl.cam.ac.uk> | 2003-02-21 12:46:51 +0000 |
---|---|---|
committer | kaf24@labyrinth.cl.cam.ac.uk <kaf24@labyrinth.cl.cam.ac.uk> | 2003-02-21 12:46:51 +0000 |
commit | b772c635b961040f38d69fc098cc330eafbaf01f (patch) | |
tree | 9250153c71f713d86110fb200e07f6676bcc0dee /xen-2.4.16 | |
parent | 28e3860f49e692cee29ea62c3193359367128ac1 (diff) | |
download | xen-b772c635b961040f38d69fc098cc330eafbaf01f.tar.gz xen-b772c635b961040f38d69fc098cc330eafbaf01f.tar.bz2 xen-b772c635b961040f38d69fc098cc330eafbaf01f.zip |
bitkeeper revision 1.81 (3e561fbbcTcsPP9qTjC_BVR3zYpKjw)
Many files:
Block-device layer all fixed up. Serialisation removed. Should fly! :-)
Diffstat (limited to 'xen-2.4.16')
-rw-r--r-- | xen-2.4.16/drivers/block/ll_rw_blk.c | 17 | ||||
-rw-r--r-- | xen-2.4.16/drivers/block/xen_block.c | 208 | ||||
-rw-r--r-- | xen-2.4.16/include/hypervisor-ifs/block.h | 30 | ||||
-rw-r--r-- | xen-2.4.16/include/xeno/blkdev.h | 24 | ||||
-rw-r--r-- | xen-2.4.16/include/xeno/sched.h | 1 |
5 files changed, 103 insertions, 177 deletions
diff --git a/xen-2.4.16/drivers/block/ll_rw_blk.c b/xen-2.4.16/drivers/block/ll_rw_blk.c index e644974eab..615b332c4b 100644 --- a/xen-2.4.16/drivers/block/ll_rw_blk.c +++ b/xen-2.4.16/drivers/block/ll_rw_blk.c @@ -5,7 +5,6 @@ * Copyright (C) 1994, Karl Keyte: Added support for disk statistics * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> - * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000 */ /* @@ -40,14 +39,6 @@ #define DPRINTK(_f, _a...) ((void)0) #endif -/* XXX SMH: temporarily we just dive at xen_block completion handler */ -extern void end_block_io_op(struct buffer_head *bh); - -static void end_buffer_dummy(struct buffer_head *bh, int uptodate) -{ - /* do nothing */ -} - /* This will die as all synchronous stuff is coming to an end */ #define complete(_r) panic("completion.h stuff may be needed...") @@ -1036,8 +1027,6 @@ out: return 0; end_io: bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); - /* XXX SMH: do we need this every time? */ - end_block_io_op(bh); return 0; } @@ -1107,8 +1096,6 @@ void generic_make_request (int rw, struct buffer_head * bh) /* Yecch again */ bh->b_end_io(bh, 0); - /* XXX SMH */ - end_block_io_op(bh); return; } } @@ -1238,7 +1225,6 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]) /* We have the buffer lock */ atomic_inc(&bh->b_count); - bh->b_end_io = end_buffer_dummy; switch(rw) { case WRITE: @@ -1258,8 +1244,6 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]) BUG(); end_io: bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); - /* XXX SMH */ - end_block_io_op(bh); continue; } @@ -1313,7 +1297,6 @@ int end_that_request_first (struct request *req, int uptodate, char *name) req->bh = bh->b_reqnext; bh->b_reqnext = NULL; bh->b_end_io(bh, uptodate); - end_block_io_op(bh); if ((bh = req->bh) != NULL) { req->hard_sector += nsect; req->hard_nr_sectors -= nsect; diff --git a/xen-2.4.16/drivers/block/xen_block.c b/xen-2.4.16/drivers/block/xen_block.c index 177dc23bb0..bf70737dab 100644 --- a/xen-2.4.16/drivers/block/xen_block.c +++ b/xen-2.4.16/drivers/block/xen_block.c @@ -23,24 +23,26 @@ #define DPRINTK(_f, _a...) ((void)0) #endif -typedef struct blk_request -{ - struct buffer_head *bh; - void *id; - struct task_struct *domain; -} blk_request_t; - +/* + * These are rather arbitrary. They are fairly large because adjacent + * requests pulled from a communication ring are quite likely to end + * up being part of the same scatter/gather request at the disc. + * It might be a good idea to add scatter/gather support explicitly to + * the scatter/gather ring (eg. each request has an array of N pointers); + * then these values would better reflect real costs at the disc. + */ #define MAX_PENDING_REQS 32 #define BATCH_PER_DOMAIN 8 -static kmem_cache_t *blk_request_cachep; +static kmem_cache_t *buffer_head_cachep; static atomic_t nr_pending; static void io_schedule(unsigned long unused); -static int do_block_io_op_domain(struct task_struct* task, int max_to_do); +static int do_block_io_op_domain(struct task_struct *p, int max_to_do); static void dispatch_rw_block_io(struct task_struct *p, int index); static void dispatch_probe_block_io(struct task_struct *p, int index); static void dispatch_debug_block_io(struct task_struct *p, int index); +static void make_response(struct task_struct *p, void *id, unsigned long st); /****************************************************************** @@ -104,6 +106,7 @@ static void io_schedule(unsigned long unused) struct task_struct *p; struct list_head *ent; + /* Queue up a batch of requests. */ while ( (atomic_read(&nr_pending) < MAX_PENDING_REQS) && !list_empty(&io_schedule_list) ) { @@ -113,10 +116,20 @@ static void io_schedule(unsigned long unused) if ( do_block_io_op_domain(p, BATCH_PER_DOMAIN) ) add_to_blkdev_list_tail(p); } + + /* Push the batch through to disc. */ + run_task_queue(&tq_disk); } static void maybe_trigger_io_schedule(void) { + /* + * Needed so that two processes, who together make the following predicate + * true, don't both read stale values and evaluate the predicate + * incorrectly. Incredibly unlikely to stall the scheduler on x86, but... + */ + smp_mb(); + if ( (atomic_read(&nr_pending) < (MAX_PENDING_REQS/2)) && !list_empty(&io_schedule_list) ) { @@ -127,51 +140,17 @@ static void maybe_trigger_io_schedule(void) /****************************************************************** - * COMPLETION CALLBACK -- XXX Hook properly into bh->b_end_io + * COMPLETION CALLBACK -- Called as bh->b_end_io() */ -void end_block_io_op(struct buffer_head * bh) +static void end_block_io_op(struct buffer_head *bh, int uptodate) { - unsigned long cpu_mask; - blk_request_t *blk_request = NULL; - unsigned long flags; - struct task_struct *p; - int position = 0; - blk_ring_t *blk_ring; - - DPRINTK("XEN end_block_io_op, bh: %p\n", bh); - - if ( (blk_request = (blk_request_t *)bh->b_xen_request) == NULL ) - goto bad_interrupt; - atomic_dec(&nr_pending); - - p = blk_request->domain; + make_response(bh->b_xen_domain, bh->b_xen_id, uptodate ? 0 : 1); - /* Place on the response ring for the relevant domain. */ - spin_lock_irqsave(&p->blk_ring_lock, flags); - blk_ring = p->blk_ring_base; - position = blk_ring->resp_prod; - blk_ring->resp_ring[position].id = blk_request->id; - blk_ring->resp_ring[position].status = 0; - blk_ring->resp_prod = BLK_RESP_RING_INC(blk_ring->resp_prod); - spin_unlock_irqrestore(&p->blk_ring_lock, flags); - - /* Kick the relevant domain. */ - cpu_mask = mark_guest_event(p, _EVENT_BLK_RESP); - guest_event_notify(cpu_mask); - - /* Free state associated with this request. */ - if ( blk_request->bh ) - kfree(blk_request->bh); - kmem_cache_free(blk_request_cachep, blk_request); + kmem_cache_free(buffer_head_cachep, bh); maybe_trigger_io_schedule(); - - return; - - bad_interrupt: - panic("Block IO interrupt received for unknown buffer [%p]\n", bh); } @@ -193,55 +172,43 @@ long do_block_io_op(void) * DOWNWARD CALLS -- These interface with the block-device layer proper. */ -static int do_block_io_op_domain(struct task_struct* task, int max_to_do) +static int do_block_io_op_domain(struct task_struct* p, int max_to_do) { - blk_ring_t *blk_ring = task->blk_ring_base; - int loop, more_to_do = 0; - int resp_ring_ents = - (blk_ring->resp_prod - blk_ring->resp_cons) & (BLK_RESP_RING_SIZE - 1); - - DPRINTK("XEN do_block_io_op %d %d\n", - blk_ring->req_cons, blk_ring->req_prod); + blk_ring_t *blk_ring = p->blk_ring_base; + int i, more_to_do = 0; - for ( loop = blk_ring->req_cons; - loop != blk_ring->req_prod; - loop = BLK_REQ_RING_INC(loop) ) + for ( i = p->blk_req_cons; + i != blk_ring->req_prod; + i = BLK_RING_INC(i) ) { - /* - * Bail if we've reached the batch allowance for thsi interface, - * or if we risk producing enough responses to overflow the - * communication ring. - */ - if ( (max_to_do-- == 0) || - ((atomic_read(&nr_pending) + resp_ring_ents) > - BLK_RESP_RING_MAX_ENTRIES) ) + if ( max_to_do-- == 0 ) { more_to_do = 1; break; } - switch ( blk_ring->req_ring[loop].operation ) + switch ( blk_ring->ring[i].req.operation ) { case XEN_BLOCK_READ: case XEN_BLOCK_WRITE: - dispatch_rw_block_io(task, loop); + dispatch_rw_block_io(p, i); break; case XEN_BLOCK_PROBE: - dispatch_probe_block_io(task, loop); + dispatch_probe_block_io(p, i); break; case XEN_BLOCK_DEBUG: - dispatch_debug_block_io(task, loop); + dispatch_debug_block_io(p, i); break; default: panic("error: unknown block io operation [%d]\n", - blk_ring->req_ring[loop].operation); + blk_ring->ring[i].req.operation); } } - blk_ring->req_cons = loop; + p->blk_req_cons = i; return more_to_do; } @@ -255,20 +222,11 @@ static void dispatch_probe_block_io(struct task_struct *p, int index) extern void ide_probe_devices(xen_disk_info_t *xdi); blk_ring_t *blk_ring = p->blk_ring_base; xen_disk_info_t *xdi; - unsigned long flags, cpu_mask; - - xdi = phys_to_virt((unsigned long)blk_ring->req_ring[index].buffer); - - ide_probe_devices(xdi); - spin_lock_irqsave(&p->blk_ring_lock, flags); - blk_ring->resp_ring[blk_ring->resp_prod].id = blk_ring->req_ring[index].id; - blk_ring->resp_ring[blk_ring->resp_prod].status = 0; - blk_ring->resp_prod = BLK_RESP_RING_INC(blk_ring->resp_prod); - spin_unlock_irqrestore(&p->blk_ring_lock, flags); + xdi = phys_to_virt((unsigned long)blk_ring->ring[index].req.buffer); + ide_probe_devices(xdi); - cpu_mask = mark_guest_event(p, _EVENT_BLK_RESP); - guest_event_notify(cpu_mask); + make_response(p, blk_ring->ring[index].req.id, 0); } static void dispatch_rw_block_io(struct task_struct *p, int index) @@ -276,49 +234,45 @@ static void dispatch_rw_block_io(struct task_struct *p, int index) extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); blk_ring_t *blk_ring = p->blk_ring_base; struct buffer_head *bh; - struct request_queue *rq; int operation; - blk_request_t *blk_request; /* * check to make sure that the block request seems at least * a bit legitimate */ - if ( (blk_ring->req_ring[index].block_size & (0x200 - 1)) != 0 ) + if ( (blk_ring->ring[index].req.block_size & (0x200 - 1)) != 0 ) panic("error: dodgy block size: %d\n", - blk_ring->req_ring[index].block_size); + blk_ring->ring[index].req.block_size); - if ( blk_ring->req_ring[index].buffer == NULL ) + if ( blk_ring->ring[index].req.buffer == NULL ) panic("xen_block: bogus buffer from guestOS\n"); - DPRINTK("req_cons: %d req_prod %d index: %d, op: %s, pri: %s\n", - blk_ring->req_cons, blk_ring->req_prod, index, - (blk_ring->req_ring[index].operation == XEN_BLOCK_READ ? - "read" : "write"), - (blk_ring->req_ring[index].priority == XEN_BLOCK_SYNC ? - "sync" : "async")); + DPRINTK("req_cons: %d req_prod %d index: %d, op: %s\n", + p->blk_req_cons, blk_ring->req_prod, index, + (blk_ring->ring[index].req.operation == XEN_BLOCK_READ ? + "read" : "write")); atomic_inc(&nr_pending); - blk_request = kmem_cache_alloc(blk_request_cachep, GFP_ATOMIC); - - /* we'll be doing this frequently, would a cache be appropriate? */ - bh = (struct buffer_head *) kmalloc(sizeof(struct buffer_head), - GFP_KERNEL); + bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL); if ( bh == NULL ) panic("bh is null\n"); /* set just the important bits of the buffer header */ memset (bh, 0, sizeof (struct buffer_head)); - bh->b_blocknr = blk_ring->req_ring[index].block_number; - bh->b_size = blk_ring->req_ring[index].block_size; - bh->b_dev = blk_ring->req_ring[index].device; - bh->b_rsector = blk_ring->req_ring[index].sector_number; + bh->b_blocknr = blk_ring->ring[index].req.block_number; + bh->b_size = blk_ring->ring[index].req.block_size; + bh->b_dev = blk_ring->ring[index].req.device; + bh->b_rsector = blk_ring->ring[index].req.sector_number; bh->b_data = phys_to_virt((unsigned long) - blk_ring->req_ring[index].buffer); + blk_ring->ring[index].req.buffer); bh->b_count.counter = 1; - bh->b_xen_request = (void *)blk_request; - - if ( blk_ring->req_ring[index].operation == XEN_BLOCK_WRITE ) + bh->b_end_io = end_block_io_op; + + /* Save meta data about request. */ + bh->b_xen_domain = p; + bh->b_xen_id = blk_ring->ring[index].req.id; + + if ( blk_ring->ring[index].req.operation == XEN_BLOCK_WRITE ) { bh->b_state = ((1 << BH_JBD) | (1 << BH_Mapped) | (1 << BH_Req) | (1 << BH_Dirty) | (1 << BH_Uptodate)); @@ -330,15 +284,8 @@ static void dispatch_rw_block_io(struct task_struct *p, int index) operation = READ; } - /* save meta data about request */ - blk_request->id = blk_ring->req_ring[index].id; - blk_request->bh = bh; - blk_request->domain = p; - - /* dispatch single block request */ - ll_rw_block(operation, 1, &bh); /* linux top half */ - rq = blk_get_queue(bh->b_rdev); - generic_unplug_device(rq); /* linux bottom half */ + /* Dispatch a single request. We'll flush it to disc later. */ + ll_rw_block(operation, 1, &bh); } @@ -347,6 +294,26 @@ static void dispatch_rw_block_io(struct task_struct *p, int index) * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING */ +static void make_response(struct task_struct *p, void *id, unsigned long st) +{ + unsigned long cpu_mask, flags; + int position; + blk_ring_t *blk_ring; + + /* Place on the response ring for the relevant domain. */ + spin_lock_irqsave(&p->blk_ring_lock, flags); + blk_ring = p->blk_ring_base; + position = blk_ring->resp_prod; + blk_ring->ring[position].resp.id = id; + blk_ring->ring[position].resp.status = st; + blk_ring->resp_prod = BLK_RING_INC(position); + spin_unlock_irqrestore(&p->blk_ring_lock, flags); + + /* Kick the relevant domain. */ + cpu_mask = mark_guest_event(p, _EVENT_BLK_RESP); + guest_event_notify(cpu_mask); +} + static void dump_blockq(u_char key, void *dev_id, struct pt_regs *regs) { printk("Dumping block queue stats: nr_pending = %d\n", @@ -378,12 +345,9 @@ void initialize_block_io () spin_lock_init(&io_schedule_list_lock); INIT_LIST_HEAD(&io_schedule_list); - blk_request_cachep = kmem_cache_create( - "blk_request_cache", sizeof(blk_request_t), + buffer_head_cachep = kmem_cache_create( + "buffer_head_cache", sizeof(struct buffer_head), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); add_key_handler('b', dump_blockq, "dump xen ide blkdev stats"); } - - - diff --git a/xen-2.4.16/include/hypervisor-ifs/block.h b/xen-2.4.16/include/hypervisor-ifs/block.h index 1bf198e196..627055bf0b 100644 --- a/xen-2.4.16/include/hypervisor-ifs/block.h +++ b/xen-2.4.16/include/hypervisor-ifs/block.h @@ -21,26 +21,14 @@ #define XEN_BLOCK_PROBE 8 /* determine io configuration from hypervisor */ #define XEN_BLOCK_DEBUG 16 /* debug */ -#define XEN_BLOCK_SYNC 2 -#define XEN_BLOCK_ASYNC 3 - -#define XEN_BLOCK_MAX_DOMAINS 32 /* NOTE: FIX THIS. VALUE SHOULD COME FROM? */ - -#define BLK_REQ_RING_SIZE 64 -#define BLK_RESP_RING_SIZE 64 - -#define BLK_REQ_RING_MAX_ENTRIES (BLK_REQ_RING_SIZE - 2) -#define BLK_RESP_RING_MAX_ENTRIES (BLK_RESP_RING_SIZE - 2) - -#define BLK_REQ_RING_INC(_i) (((_i)+1) & (BLK_REQ_RING_SIZE-1)) -#define BLK_RESP_RING_INC(_i) (((_i)+1) & (BLK_RESP_RING_SIZE-1)) -#define BLK_REQ_RING_ADD(_i,_j) (((_i)+(_j)) & (BLK_REQ_RING_SIZE-1)) -#define BLK_RESP_RING_ADD(_i,_j) (((_i)+(_j)) & (BLK_RESP_RING_SIZE-1)) +#define BLK_RING_SIZE 128 +#define BLK_RING_MAX_ENTRIES (BLK_RING_SIZE - 2) +#define BLK_RING_INC(_i) (((_i)+1) & (BLK_RING_SIZE-1)) +#define BLK_RING_ADD(_i,_j) (((_i)+(_j)) & (BLK_RING_SIZE-1)) typedef struct blk_ring_req_entry { void * id; /* for guest os use */ - int priority; /* SYNC or ASYNC for now */ int operation; /* XEN_BLOCK_READ or XEN_BLOCK_WRITE */ char * buffer; unsigned long block_number; /* block number */ @@ -57,10 +45,12 @@ typedef struct blk_ring_resp_entry typedef struct blk_ring_st { - unsigned int req_prod, req_cons; - unsigned int resp_prod, resp_cons; - blk_ring_req_entry_t req_ring[BLK_REQ_RING_SIZE]; - blk_ring_resp_entry_t resp_ring[BLK_RESP_RING_SIZE]; + unsigned int req_prod; /* Request producer. Updated by guest OS. */ + unsigned int resp_prod; /* Response producer. Updated by Xen. */ + union { + blk_ring_req_entry_t req; + blk_ring_resp_entry_t resp; + } ring[BLK_RING_SIZE]; } blk_ring_t; #define MAX_XEN_DISK_COUNT 100 diff --git a/xen-2.4.16/include/xeno/blkdev.h b/xen-2.4.16/include/xeno/blkdev.h index 03ea926af2..3fbc78343a 100644 --- a/xen-2.4.16/include/xeno/blkdev.h +++ b/xen-2.4.16/include/xeno/blkdev.h @@ -62,8 +62,8 @@ enum bh_state_bits { * for private allocation by other entities */ }; + struct buffer_head { - struct buffer_head *b_next; /* Hash queue list */ unsigned long b_blocknr; /* block number */ unsigned short b_size; /* block size */ unsigned short b_list; /* List that this buffer appears */ @@ -72,24 +72,18 @@ struct buffer_head { atomic_t b_count; /* users using this block */ kdev_t b_rdev; /* Real device */ unsigned long b_state; /* buffer state bitmap (see above) */ - unsigned long b_flushtime; /* Time when (dirty) buffer should be written */ - struct buffer_head *b_next_free;/* lru/free list linkage */ - struct buffer_head *b_prev_free;/* doubly linked list of buffers */ - struct buffer_head *b_this_page;/* circular list of buffers in one page */ struct buffer_head *b_reqnext; /* request queue */ - struct buffer_head **b_pprev; /* doubly linked list of hash-queue */ char * b_data; /* pointer to data block */ struct pfn_info *b_page; /* the page this bh is mapped to */ - void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */ - void *b_private; /* reserved for b_end_io */ + void (*b_end_io)(struct buffer_head *bh, int uptodate); unsigned long b_rsector; /* Real buffer location on disk */ - struct inode * b_inode; - struct list_head b_inode_buffers; /* doubly linked list of inode dirty buffers */ - void *b_xen_request; /* xen request structure */ + /* Both used by b_end_io function in xen_block.c */ + void *b_xen_domain; + void *b_xen_id; }; typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); @@ -127,15 +121,9 @@ static inline void mark_buffer_clean(struct buffer_head * bh) static inline void buffer_IO_error(struct buffer_head * bh) { - extern void end_block_io_op(struct buffer_head *bh); - mark_buffer_clean(bh); - /* - * b_end_io has to clear the BH_Uptodate bitflag in the error case! - */ + /* b_end_io has to clear the BH_Uptodate bitflag in the error case! */ bh->b_end_io(bh, 0); - /* XXX KAF */ - end_block_io_op(bh); } /**** XXX END OF BUFFER_HEAD STUFF XXXX ****/ diff --git a/xen-2.4.16/include/xeno/sched.h b/xen-2.4.16/include/xeno/sched.h index b1cd749e99..3cffa46bf1 100644 --- a/xen-2.4.16/include/xeno/sched.h +++ b/xen-2.4.16/include/xeno/sched.h @@ -76,6 +76,7 @@ struct task_struct { /* Block I/O */ blk_ring_t *blk_ring_base; + unsigned int blk_req_cons; /* request consumer */ struct list_head blkdev_list; spinlock_t blk_ring_lock; |