diff options
Diffstat (limited to 'xen-2.4.16/drivers/block/xen_block.c')
-rw-r--r-- | xen-2.4.16/drivers/block/xen_block.c | 175 |
1 files changed, 109 insertions, 66 deletions
diff --git a/xen-2.4.16/drivers/block/xen_block.c b/xen-2.4.16/drivers/block/xen_block.c index 4663524910..3124500ccc 100644 --- a/xen-2.4.16/drivers/block/xen_block.c +++ b/xen-2.4.16/drivers/block/xen_block.c @@ -19,30 +19,67 @@ #define XEN_BLK_DEBUG 0 #define XEN_BLK_DEBUG_LEVEL KERN_ALERT -/* - * KAF XXX: the current state of play with blk_requests. - * - * The following infrastructure is really here for future use. - * blk_requests are currently not used by any mechanism, but eventually - * pending blk_requests will go into an IO scheduler. This entry point - * will go where we currently increment 'nr_pending'. The scheduler will - * refuse admission of a blk_request if it is already full. - */ typedef struct blk_request { - struct list_head queue; - struct buffer_head *bh; - blk_ring_req_entry_t *request; - struct task_struct *domain; /* requesting domain */ + struct buffer_head *bh; + void *id; + struct task_struct *domain; } blk_request_t; -#define MAX_PENDING_REQS 256 /* very arbitrary */ +#define MAX_PENDING_REQS 32 +#define BATCH_PER_DOMAIN 8 static kmem_cache_t *blk_request_cachep; static atomic_t nr_pending; -static int pending_work; /* Bitmask: which domains have work for us? */ -static long do_block_io_op_domain (struct task_struct* task); -static int dispatch_rw_block_io (int index); -static int dispatch_probe_block_io (int index); -static int dispatch_debug_block_io (int index); +static int do_block_io_op_domain(struct task_struct* task, int max_to_do); +static int dispatch_rw_block_io(int index); +static int dispatch_probe_block_io(int index); +static int dispatch_debug_block_io(int index); + +static spinlock_t io_schedule_lock; +static struct list_head io_schedule_list; + +static int on_blkdev_list(struct task_struct *p) +{ + return p->blkdev_list.next != NULL; +} + +static void remove_from_blkdev_list(struct task_struct *p) +{ + list_del(&p->blkdev_list); + p->blkdev_list.next = NULL; +} + +static void add_to_blkdev_list(struct task_struct *p) +{ + list_add(&p->blkdev_list, &io_schedule_list); +} + +static void add_to_blkdev_list_tail(struct task_struct *p) +{ + list_add_tail(&p->blkdev_list, &io_schedule_list); +} + +static void io_schedule(void) +{ + struct task_struct *p; + struct list_head *ent; + + while ( (atomic_read(&nr_pending) < (MAX_PENDING_REQS / 2)) && + !list_empty(&io_schedule_list) && + spin_trylock(&io_schedule_lock) ) + { + while ( (atomic_read(&nr_pending) < MAX_PENDING_REQS) && + !list_empty(&io_schedule_list) ) + { + ent = io_schedule_list.next; + p = list_entry(ent, struct task_struct, blkdev_list); + remove_from_blkdev_list(p); + if ( do_block_io_op_domain(p, BATCH_PER_DOMAIN) ) + add_to_blkdev_list_tail(p); + } + spin_unlock(&io_schedule_lock); + } +} + /* * end_block_io_op: @@ -58,7 +95,6 @@ void end_block_io_op(struct buffer_head * bh) struct task_struct *p; int position = 0; blk_ring_t *blk_ring; - int loop; if (XEN_BLK_DEBUG) printk(XEN_BLK_DEBUG_LEVEL "XEN end_block_io_op, bh: %lx\n", @@ -74,7 +110,7 @@ void end_block_io_op(struct buffer_head * bh) spin_lock_irqsave(&p->blk_ring_lock, flags); blk_ring = p->blk_ring_base; position = blk_ring->resp_prod; - blk_ring->resp_ring[position].id = blk_request->request->id; + blk_ring->resp_ring[position].id = blk_request->id; blk_ring->resp_ring[position].status = 0; blk_ring->resp_prod = BLK_RESP_RING_INC(blk_ring->resp_prod); spin_unlock_irqrestore(&p->blk_ring_lock, flags); @@ -87,27 +123,9 @@ void end_block_io_op(struct buffer_head * bh) if ( blk_request->bh ) kfree(blk_request->bh); kmem_cache_free(blk_request_cachep, blk_request); - - /* XXX SMH: below is ugly and dangerous -- fix */ - /* - * now check if there is any pending work from any domain - * that we were previously unable to process. - */ - for ( loop = 0; loop < XEN_BLOCK_MAX_DOMAINS; loop++ ) - { - int domain = pending_work & (1 << loop); - - if ( domain ) - { - struct task_struct *mytask = current; - while ( mytask->domain != loop ) - mytask = mytask->next_task; - - pending_work = pending_work & !(1 << loop); - do_block_io_op_domain(mytask); - } - } + /* Get more work to do. */ + io_schedule(); return; @@ -125,21 +143,25 @@ void end_block_io_op(struct buffer_head * bh) * Accept a block io request from a guest operating system. * There is an entry in the hypervisor_call_table (xen/arch/i386/entry.S). */ - -long do_block_io_op (void) +long do_block_io_op(void) { - return do_block_io_op_domain(current); + if ( !on_blkdev_list(current) ) + { + spin_lock_irq(&io_schedule_lock); + add_to_blkdev_list_tail(current); + spin_unlock_irq(&io_schedule_lock); + } + + io_schedule(); + + return 0L; } -/* - * do_block_io_op_domain: - * Handle the requests for a particular domain - */ -static long do_block_io_op_domain (struct task_struct* task) +static int do_block_io_op_domain(struct task_struct* task, int max_to_do) { blk_ring_t *blk_ring = task->blk_ring_base; - int loop, status; + int loop, status = 0; if (XEN_BLK_DEBUG) printk(XEN_BLK_DEBUG_LEVEL "XEN do_block_io_op %d %d\n", @@ -151,6 +173,8 @@ static long do_block_io_op_domain (struct task_struct* task) { status = 1; + if ( max_to_do-- == 0 ) break; + switch (blk_ring->req_ring[loop].operation) { case XEN_BLOCK_READ: @@ -172,20 +196,11 @@ static long do_block_io_op_domain (struct task_struct* task) BUG(); } - - if (status) { - /* - ** Unable to successfully issue / complete command, maybe because - ** another resource (e.g. disk request buffers) is unavailable. - ** stop removing items from the communications ring and try later - */ - pending_work = pending_work | (1 << task->domain); - break; - } + if ( status ) break; } blk_ring->req_cons = loop; - return 0L; + return status; } @@ -284,7 +299,7 @@ static int dispatch_rw_block_io (int index) } /* save meta data about request */ - blk_request->request = &blk_ring->req_ring[index]; + blk_request->id = blk_ring->req_ring[index].id; blk_request->bh = bh; blk_request->domain = current; @@ -304,16 +319,44 @@ static void dump_blockq(u_char key, void *dev_id, struct pt_regs *regs) } +/* Start-of-day initialisation for a new domain. */ +void init_blkdev_info(struct task_struct *p) +{ + if ( sizeof(*p->blk_ring_base) > PAGE_SIZE ) BUG(); + p->blk_ring_base = (blk_ring_t *)get_free_page(GFP_KERNEL); + clear_page(p->blk_ring_base); + SHARE_PFN_WITH_DOMAIN(virt_to_page(p->blk_ring_base), p->domain); + p->blkdev_list.next = NULL; +} + + +/* End-of-day teardown for a domain. XXX Outstanding requests? */ +void destroy_blkdev_info(struct task_struct *p) +{ + unsigned long flags; + if ( on_blkdev_list(p) ) + { + spin_lock_irqsave(&io_schedule_lock, flags); + if ( on_blkdev_list(p) ) remove_from_blkdev_list(p); + spin_unlock_irqrestore(&io_schedule_lock, flags); + } + UNSHARE_PFN(virt_to_page(p->blk_ring_base)); + free_page((unsigned long)p->blk_ring_base); +} + + void initialize_block_io () { + atomic_set(&nr_pending, 0); + + spin_lock_init(&io_schedule_lock); + INIT_LIST_HEAD(&io_schedule_list); + blk_request_cachep = kmem_cache_create( "blk_request_cache", sizeof(blk_request_t), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); - add_key_handler('b', dump_blockq, "dump xen ide blkdev stats"); - - pending_work = 0; - atomic_set(&nr_pending, 0); + add_key_handler('b', dump_blockq, "dump xen ide blkdev stats"); } |