aboutsummaryrefslogtreecommitdiffstats
path: root/xen-2.4.16
diff options
context:
space:
mode:
authorkaf24@labyrinth.cl.cam.ac.uk <kaf24@labyrinth.cl.cam.ac.uk>2003-02-21 12:46:51 +0000
committerkaf24@labyrinth.cl.cam.ac.uk <kaf24@labyrinth.cl.cam.ac.uk>2003-02-21 12:46:51 +0000
commitb772c635b961040f38d69fc098cc330eafbaf01f (patch)
tree9250153c71f713d86110fb200e07f6676bcc0dee /xen-2.4.16
parent28e3860f49e692cee29ea62c3193359367128ac1 (diff)
downloadxen-b772c635b961040f38d69fc098cc330eafbaf01f.tar.gz
xen-b772c635b961040f38d69fc098cc330eafbaf01f.tar.bz2
xen-b772c635b961040f38d69fc098cc330eafbaf01f.zip
bitkeeper revision 1.81 (3e561fbbcTcsPP9qTjC_BVR3zYpKjw)
Many files: Block-device layer all fixed up. Serialisation removed. Should fly! :-)
Diffstat (limited to 'xen-2.4.16')
-rw-r--r--xen-2.4.16/drivers/block/ll_rw_blk.c17
-rw-r--r--xen-2.4.16/drivers/block/xen_block.c208
-rw-r--r--xen-2.4.16/include/hypervisor-ifs/block.h30
-rw-r--r--xen-2.4.16/include/xeno/blkdev.h24
-rw-r--r--xen-2.4.16/include/xeno/sched.h1
5 files changed, 103 insertions, 177 deletions
diff --git a/xen-2.4.16/drivers/block/ll_rw_blk.c b/xen-2.4.16/drivers/block/ll_rw_blk.c
index e644974eab..615b332c4b 100644
--- a/xen-2.4.16/drivers/block/ll_rw_blk.c
+++ b/xen-2.4.16/drivers/block/ll_rw_blk.c
@@ -5,7 +5,6 @@
* Copyright (C) 1994, Karl Keyte: Added support for disk statistics
* Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
* Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
- * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000
*/
/*
@@ -40,14 +39,6 @@
#define DPRINTK(_f, _a...) ((void)0)
#endif
-/* XXX SMH: temporarily we just dive at xen_block completion handler */
-extern void end_block_io_op(struct buffer_head *bh);
-
-static void end_buffer_dummy(struct buffer_head *bh, int uptodate)
-{
- /* do nothing */
-}
-
/* This will die as all synchronous stuff is coming to an end */
#define complete(_r) panic("completion.h stuff may be needed...")
@@ -1036,8 +1027,6 @@ out:
return 0;
end_io:
bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
- /* XXX SMH: do we need this every time? */
- end_block_io_op(bh);
return 0;
}
@@ -1107,8 +1096,6 @@ void generic_make_request (int rw, struct buffer_head * bh)
/* Yecch again */
bh->b_end_io(bh, 0);
- /* XXX SMH */
- end_block_io_op(bh);
return;
}
}
@@ -1238,7 +1225,6 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
/* We have the buffer lock */
atomic_inc(&bh->b_count);
- bh->b_end_io = end_buffer_dummy;
switch(rw) {
case WRITE:
@@ -1258,8 +1244,6 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
BUG();
end_io:
bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
- /* XXX SMH */
- end_block_io_op(bh);
continue;
}
@@ -1313,7 +1297,6 @@ int end_that_request_first (struct request *req, int uptodate, char *name)
req->bh = bh->b_reqnext;
bh->b_reqnext = NULL;
bh->b_end_io(bh, uptodate);
- end_block_io_op(bh);
if ((bh = req->bh) != NULL) {
req->hard_sector += nsect;
req->hard_nr_sectors -= nsect;
diff --git a/xen-2.4.16/drivers/block/xen_block.c b/xen-2.4.16/drivers/block/xen_block.c
index 177dc23bb0..bf70737dab 100644
--- a/xen-2.4.16/drivers/block/xen_block.c
+++ b/xen-2.4.16/drivers/block/xen_block.c
@@ -23,24 +23,26 @@
#define DPRINTK(_f, _a...) ((void)0)
#endif
-typedef struct blk_request
-{
- struct buffer_head *bh;
- void *id;
- struct task_struct *domain;
-} blk_request_t;
-
+/*
+ * These are rather arbitrary. They are fairly large because adjacent
+ * requests pulled from a communication ring are quite likely to end
+ * up being part of the same scatter/gather request at the disc.
+ * It might be a good idea to add scatter/gather support explicitly to
+ * the scatter/gather ring (eg. each request has an array of N pointers);
+ * then these values would better reflect real costs at the disc.
+ */
#define MAX_PENDING_REQS 32
#define BATCH_PER_DOMAIN 8
-static kmem_cache_t *blk_request_cachep;
+static kmem_cache_t *buffer_head_cachep;
static atomic_t nr_pending;
static void io_schedule(unsigned long unused);
-static int do_block_io_op_domain(struct task_struct* task, int max_to_do);
+static int do_block_io_op_domain(struct task_struct *p, int max_to_do);
static void dispatch_rw_block_io(struct task_struct *p, int index);
static void dispatch_probe_block_io(struct task_struct *p, int index);
static void dispatch_debug_block_io(struct task_struct *p, int index);
+static void make_response(struct task_struct *p, void *id, unsigned long st);
/******************************************************************
@@ -104,6 +106,7 @@ static void io_schedule(unsigned long unused)
struct task_struct *p;
struct list_head *ent;
+ /* Queue up a batch of requests. */
while ( (atomic_read(&nr_pending) < MAX_PENDING_REQS) &&
!list_empty(&io_schedule_list) )
{
@@ -113,10 +116,20 @@ static void io_schedule(unsigned long unused)
if ( do_block_io_op_domain(p, BATCH_PER_DOMAIN) )
add_to_blkdev_list_tail(p);
}
+
+ /* Push the batch through to disc. */
+ run_task_queue(&tq_disk);
}
static void maybe_trigger_io_schedule(void)
{
+ /*
+ * Needed so that two processes, who together make the following predicate
+ * true, don't both read stale values and evaluate the predicate
+ * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
+ */
+ smp_mb();
+
if ( (atomic_read(&nr_pending) < (MAX_PENDING_REQS/2)) &&
!list_empty(&io_schedule_list) )
{
@@ -127,51 +140,17 @@ static void maybe_trigger_io_schedule(void)
/******************************************************************
- * COMPLETION CALLBACK -- XXX Hook properly into bh->b_end_io
+ * COMPLETION CALLBACK -- Called as bh->b_end_io()
*/
-void end_block_io_op(struct buffer_head * bh)
+static void end_block_io_op(struct buffer_head *bh, int uptodate)
{
- unsigned long cpu_mask;
- blk_request_t *blk_request = NULL;
- unsigned long flags;
- struct task_struct *p;
- int position = 0;
- blk_ring_t *blk_ring;
-
- DPRINTK("XEN end_block_io_op, bh: %p\n", bh);
-
- if ( (blk_request = (blk_request_t *)bh->b_xen_request) == NULL )
- goto bad_interrupt;
-
atomic_dec(&nr_pending);
-
- p = blk_request->domain;
+ make_response(bh->b_xen_domain, bh->b_xen_id, uptodate ? 0 : 1);
- /* Place on the response ring for the relevant domain. */
- spin_lock_irqsave(&p->blk_ring_lock, flags);
- blk_ring = p->blk_ring_base;
- position = blk_ring->resp_prod;
- blk_ring->resp_ring[position].id = blk_request->id;
- blk_ring->resp_ring[position].status = 0;
- blk_ring->resp_prod = BLK_RESP_RING_INC(blk_ring->resp_prod);
- spin_unlock_irqrestore(&p->blk_ring_lock, flags);
-
- /* Kick the relevant domain. */
- cpu_mask = mark_guest_event(p, _EVENT_BLK_RESP);
- guest_event_notify(cpu_mask);
-
- /* Free state associated with this request. */
- if ( blk_request->bh )
- kfree(blk_request->bh);
- kmem_cache_free(blk_request_cachep, blk_request);
+ kmem_cache_free(buffer_head_cachep, bh);
maybe_trigger_io_schedule();
-
- return;
-
- bad_interrupt:
- panic("Block IO interrupt received for unknown buffer [%p]\n", bh);
}
@@ -193,55 +172,43 @@ long do_block_io_op(void)
* DOWNWARD CALLS -- These interface with the block-device layer proper.
*/
-static int do_block_io_op_domain(struct task_struct* task, int max_to_do)
+static int do_block_io_op_domain(struct task_struct* p, int max_to_do)
{
- blk_ring_t *blk_ring = task->blk_ring_base;
- int loop, more_to_do = 0;
- int resp_ring_ents =
- (blk_ring->resp_prod - blk_ring->resp_cons) & (BLK_RESP_RING_SIZE - 1);
-
- DPRINTK("XEN do_block_io_op %d %d\n",
- blk_ring->req_cons, blk_ring->req_prod);
+ blk_ring_t *blk_ring = p->blk_ring_base;
+ int i, more_to_do = 0;
- for ( loop = blk_ring->req_cons;
- loop != blk_ring->req_prod;
- loop = BLK_REQ_RING_INC(loop) )
+ for ( i = p->blk_req_cons;
+ i != blk_ring->req_prod;
+ i = BLK_RING_INC(i) )
{
- /*
- * Bail if we've reached the batch allowance for thsi interface,
- * or if we risk producing enough responses to overflow the
- * communication ring.
- */
- if ( (max_to_do-- == 0) ||
- ((atomic_read(&nr_pending) + resp_ring_ents) >
- BLK_RESP_RING_MAX_ENTRIES) )
+ if ( max_to_do-- == 0 )
{
more_to_do = 1;
break;
}
- switch ( blk_ring->req_ring[loop].operation )
+ switch ( blk_ring->ring[i].req.operation )
{
case XEN_BLOCK_READ:
case XEN_BLOCK_WRITE:
- dispatch_rw_block_io(task, loop);
+ dispatch_rw_block_io(p, i);
break;
case XEN_BLOCK_PROBE:
- dispatch_probe_block_io(task, loop);
+ dispatch_probe_block_io(p, i);
break;
case XEN_BLOCK_DEBUG:
- dispatch_debug_block_io(task, loop);
+ dispatch_debug_block_io(p, i);
break;
default:
panic("error: unknown block io operation [%d]\n",
- blk_ring->req_ring[loop].operation);
+ blk_ring->ring[i].req.operation);
}
}
- blk_ring->req_cons = loop;
+ p->blk_req_cons = i;
return more_to_do;
}
@@ -255,20 +222,11 @@ static void dispatch_probe_block_io(struct task_struct *p, int index)
extern void ide_probe_devices(xen_disk_info_t *xdi);
blk_ring_t *blk_ring = p->blk_ring_base;
xen_disk_info_t *xdi;
- unsigned long flags, cpu_mask;
-
- xdi = phys_to_virt((unsigned long)blk_ring->req_ring[index].buffer);
-
- ide_probe_devices(xdi);
- spin_lock_irqsave(&p->blk_ring_lock, flags);
- blk_ring->resp_ring[blk_ring->resp_prod].id = blk_ring->req_ring[index].id;
- blk_ring->resp_ring[blk_ring->resp_prod].status = 0;
- blk_ring->resp_prod = BLK_RESP_RING_INC(blk_ring->resp_prod);
- spin_unlock_irqrestore(&p->blk_ring_lock, flags);
+ xdi = phys_to_virt((unsigned long)blk_ring->ring[index].req.buffer);
+ ide_probe_devices(xdi);
- cpu_mask = mark_guest_event(p, _EVENT_BLK_RESP);
- guest_event_notify(cpu_mask);
+ make_response(p, blk_ring->ring[index].req.id, 0);
}
static void dispatch_rw_block_io(struct task_struct *p, int index)
@@ -276,49 +234,45 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
blk_ring_t *blk_ring = p->blk_ring_base;
struct buffer_head *bh;
- struct request_queue *rq;
int operation;
- blk_request_t *blk_request;
/*
* check to make sure that the block request seems at least
* a bit legitimate
*/
- if ( (blk_ring->req_ring[index].block_size & (0x200 - 1)) != 0 )
+ if ( (blk_ring->ring[index].req.block_size & (0x200 - 1)) != 0 )
panic("error: dodgy block size: %d\n",
- blk_ring->req_ring[index].block_size);
+ blk_ring->ring[index].req.block_size);
- if ( blk_ring->req_ring[index].buffer == NULL )
+ if ( blk_ring->ring[index].req.buffer == NULL )
panic("xen_block: bogus buffer from guestOS\n");
- DPRINTK("req_cons: %d req_prod %d index: %d, op: %s, pri: %s\n",
- blk_ring->req_cons, blk_ring->req_prod, index,
- (blk_ring->req_ring[index].operation == XEN_BLOCK_READ ?
- "read" : "write"),
- (blk_ring->req_ring[index].priority == XEN_BLOCK_SYNC ?
- "sync" : "async"));
+ DPRINTK("req_cons: %d req_prod %d index: %d, op: %s\n",
+ p->blk_req_cons, blk_ring->req_prod, index,
+ (blk_ring->ring[index].req.operation == XEN_BLOCK_READ ?
+ "read" : "write"));
atomic_inc(&nr_pending);
- blk_request = kmem_cache_alloc(blk_request_cachep, GFP_ATOMIC);
-
- /* we'll be doing this frequently, would a cache be appropriate? */
- bh = (struct buffer_head *) kmalloc(sizeof(struct buffer_head),
- GFP_KERNEL);
+ bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
if ( bh == NULL ) panic("bh is null\n");
/* set just the important bits of the buffer header */
memset (bh, 0, sizeof (struct buffer_head));
- bh->b_blocknr = blk_ring->req_ring[index].block_number;
- bh->b_size = blk_ring->req_ring[index].block_size;
- bh->b_dev = blk_ring->req_ring[index].device;
- bh->b_rsector = blk_ring->req_ring[index].sector_number;
+ bh->b_blocknr = blk_ring->ring[index].req.block_number;
+ bh->b_size = blk_ring->ring[index].req.block_size;
+ bh->b_dev = blk_ring->ring[index].req.device;
+ bh->b_rsector = blk_ring->ring[index].req.sector_number;
bh->b_data = phys_to_virt((unsigned long)
- blk_ring->req_ring[index].buffer);
+ blk_ring->ring[index].req.buffer);
bh->b_count.counter = 1;
- bh->b_xen_request = (void *)blk_request;
-
- if ( blk_ring->req_ring[index].operation == XEN_BLOCK_WRITE )
+ bh->b_end_io = end_block_io_op;
+
+ /* Save meta data about request. */
+ bh->b_xen_domain = p;
+ bh->b_xen_id = blk_ring->ring[index].req.id;
+
+ if ( blk_ring->ring[index].req.operation == XEN_BLOCK_WRITE )
{
bh->b_state = ((1 << BH_JBD) | (1 << BH_Mapped) | (1 << BH_Req) |
(1 << BH_Dirty) | (1 << BH_Uptodate));
@@ -330,15 +284,8 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
operation = READ;
}
- /* save meta data about request */
- blk_request->id = blk_ring->req_ring[index].id;
- blk_request->bh = bh;
- blk_request->domain = p;
-
- /* dispatch single block request */
- ll_rw_block(operation, 1, &bh); /* linux top half */
- rq = blk_get_queue(bh->b_rdev);
- generic_unplug_device(rq); /* linux bottom half */
+ /* Dispatch a single request. We'll flush it to disc later. */
+ ll_rw_block(operation, 1, &bh);
}
@@ -347,6 +294,26 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
* MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
*/
+static void make_response(struct task_struct *p, void *id, unsigned long st)
+{
+ unsigned long cpu_mask, flags;
+ int position;
+ blk_ring_t *blk_ring;
+
+ /* Place on the response ring for the relevant domain. */
+ spin_lock_irqsave(&p->blk_ring_lock, flags);
+ blk_ring = p->blk_ring_base;
+ position = blk_ring->resp_prod;
+ blk_ring->ring[position].resp.id = id;
+ blk_ring->ring[position].resp.status = st;
+ blk_ring->resp_prod = BLK_RING_INC(position);
+ spin_unlock_irqrestore(&p->blk_ring_lock, flags);
+
+ /* Kick the relevant domain. */
+ cpu_mask = mark_guest_event(p, _EVENT_BLK_RESP);
+ guest_event_notify(cpu_mask);
+}
+
static void dump_blockq(u_char key, void *dev_id, struct pt_regs *regs)
{
printk("Dumping block queue stats: nr_pending = %d\n",
@@ -378,12 +345,9 @@ void initialize_block_io ()
spin_lock_init(&io_schedule_list_lock);
INIT_LIST_HEAD(&io_schedule_list);
- blk_request_cachep = kmem_cache_create(
- "blk_request_cache", sizeof(blk_request_t),
+ buffer_head_cachep = kmem_cache_create(
+ "buffer_head_cache", sizeof(struct buffer_head),
0, SLAB_HWCACHE_ALIGN, NULL, NULL);
add_key_handler('b', dump_blockq, "dump xen ide blkdev stats");
}
-
-
-
diff --git a/xen-2.4.16/include/hypervisor-ifs/block.h b/xen-2.4.16/include/hypervisor-ifs/block.h
index 1bf198e196..627055bf0b 100644
--- a/xen-2.4.16/include/hypervisor-ifs/block.h
+++ b/xen-2.4.16/include/hypervisor-ifs/block.h
@@ -21,26 +21,14 @@
#define XEN_BLOCK_PROBE 8 /* determine io configuration from hypervisor */
#define XEN_BLOCK_DEBUG 16 /* debug */
-#define XEN_BLOCK_SYNC 2
-#define XEN_BLOCK_ASYNC 3
-
-#define XEN_BLOCK_MAX_DOMAINS 32 /* NOTE: FIX THIS. VALUE SHOULD COME FROM? */
-
-#define BLK_REQ_RING_SIZE 64
-#define BLK_RESP_RING_SIZE 64
-
-#define BLK_REQ_RING_MAX_ENTRIES (BLK_REQ_RING_SIZE - 2)
-#define BLK_RESP_RING_MAX_ENTRIES (BLK_RESP_RING_SIZE - 2)
-
-#define BLK_REQ_RING_INC(_i) (((_i)+1) & (BLK_REQ_RING_SIZE-1))
-#define BLK_RESP_RING_INC(_i) (((_i)+1) & (BLK_RESP_RING_SIZE-1))
-#define BLK_REQ_RING_ADD(_i,_j) (((_i)+(_j)) & (BLK_REQ_RING_SIZE-1))
-#define BLK_RESP_RING_ADD(_i,_j) (((_i)+(_j)) & (BLK_RESP_RING_SIZE-1))
+#define BLK_RING_SIZE 128
+#define BLK_RING_MAX_ENTRIES (BLK_RING_SIZE - 2)
+#define BLK_RING_INC(_i) (((_i)+1) & (BLK_RING_SIZE-1))
+#define BLK_RING_ADD(_i,_j) (((_i)+(_j)) & (BLK_RING_SIZE-1))
typedef struct blk_ring_req_entry
{
void * id; /* for guest os use */
- int priority; /* SYNC or ASYNC for now */
int operation; /* XEN_BLOCK_READ or XEN_BLOCK_WRITE */
char * buffer;
unsigned long block_number; /* block number */
@@ -57,10 +45,12 @@ typedef struct blk_ring_resp_entry
typedef struct blk_ring_st
{
- unsigned int req_prod, req_cons;
- unsigned int resp_prod, resp_cons;
- blk_ring_req_entry_t req_ring[BLK_REQ_RING_SIZE];
- blk_ring_resp_entry_t resp_ring[BLK_RESP_RING_SIZE];
+ unsigned int req_prod; /* Request producer. Updated by guest OS. */
+ unsigned int resp_prod; /* Response producer. Updated by Xen. */
+ union {
+ blk_ring_req_entry_t req;
+ blk_ring_resp_entry_t resp;
+ } ring[BLK_RING_SIZE];
} blk_ring_t;
#define MAX_XEN_DISK_COUNT 100
diff --git a/xen-2.4.16/include/xeno/blkdev.h b/xen-2.4.16/include/xeno/blkdev.h
index 03ea926af2..3fbc78343a 100644
--- a/xen-2.4.16/include/xeno/blkdev.h
+++ b/xen-2.4.16/include/xeno/blkdev.h
@@ -62,8 +62,8 @@ enum bh_state_bits {
* for private allocation by other entities
*/
};
+
struct buffer_head {
- struct buffer_head *b_next; /* Hash queue list */
unsigned long b_blocknr; /* block number */
unsigned short b_size; /* block size */
unsigned short b_list; /* List that this buffer appears */
@@ -72,24 +72,18 @@ struct buffer_head {
atomic_t b_count; /* users using this block */
kdev_t b_rdev; /* Real device */
unsigned long b_state; /* buffer state bitmap (see above) */
- unsigned long b_flushtime; /* Time when (dirty) buffer should be written */
- struct buffer_head *b_next_free;/* lru/free list linkage */
- struct buffer_head *b_prev_free;/* doubly linked list of buffers */
- struct buffer_head *b_this_page;/* circular list of buffers in one page */
struct buffer_head *b_reqnext; /* request queue */
- struct buffer_head **b_pprev; /* doubly linked list of hash-queue */
char * b_data; /* pointer to data block */
struct pfn_info *b_page; /* the page this bh is mapped to */
- void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */
- void *b_private; /* reserved for b_end_io */
+ void (*b_end_io)(struct buffer_head *bh, int uptodate);
unsigned long b_rsector; /* Real buffer location on disk */
- struct inode * b_inode;
- struct list_head b_inode_buffers; /* doubly linked list of inode dirty buffers */
- void *b_xen_request; /* xen request structure */
+ /* Both used by b_end_io function in xen_block.c */
+ void *b_xen_domain;
+ void *b_xen_id;
};
typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate);
@@ -127,15 +121,9 @@ static inline void mark_buffer_clean(struct buffer_head * bh)
static inline void buffer_IO_error(struct buffer_head * bh)
{
- extern void end_block_io_op(struct buffer_head *bh);
-
mark_buffer_clean(bh);
- /*
- * b_end_io has to clear the BH_Uptodate bitflag in the error case!
- */
+ /* b_end_io has to clear the BH_Uptodate bitflag in the error case! */
bh->b_end_io(bh, 0);
- /* XXX KAF */
- end_block_io_op(bh);
}
/**** XXX END OF BUFFER_HEAD STUFF XXXX ****/
diff --git a/xen-2.4.16/include/xeno/sched.h b/xen-2.4.16/include/xeno/sched.h
index b1cd749e99..3cffa46bf1 100644
--- a/xen-2.4.16/include/xeno/sched.h
+++ b/xen-2.4.16/include/xeno/sched.h
@@ -76,6 +76,7 @@ struct task_struct {
/* Block I/O */
blk_ring_t *blk_ring_base;
+ unsigned int blk_req_cons; /* request consumer */
struct list_head blkdev_list;
spinlock_t blk_ring_lock;