aboutsummaryrefslogtreecommitdiffstats
path: root/xen-2.4.16
diff options
context:
space:
mode:
authorkaf24@labyrinth.cl.cam.ac.uk <kaf24@labyrinth.cl.cam.ac.uk>2003-02-20 11:04:29 +0000
committerkaf24@labyrinth.cl.cam.ac.uk <kaf24@labyrinth.cl.cam.ac.uk>2003-02-20 11:04:29 +0000
commitad4bdd4addaf17d33a96df0c5ddeb97b2c7f6657 (patch)
treefaca124b115425922101f3305568b19f99c2fd0f /xen-2.4.16
parent57146cc7df77ca92f038a60de990e973f0b1a488 (diff)
downloadxen-ad4bdd4addaf17d33a96df0c5ddeb97b2c7f6657.tar.gz
xen-ad4bdd4addaf17d33a96df0c5ddeb97b2c7f6657.tar.bz2
xen-ad4bdd4addaf17d33a96df0c5ddeb97b2c7f6657.zip
bitkeeper revision 1.74 (3e54b63dw5kX3U_MQzXBiMsEyQdHJQ)
sched.h, blkdev.h, xen_block.c, domain.c: Beginnings of cheesy IO scheduling.
Diffstat (limited to 'xen-2.4.16')
-rw-r--r--xen-2.4.16/common/domain.c9
-rw-r--r--xen-2.4.16/drivers/block/xen_block.c175
-rw-r--r--xen-2.4.16/include/xeno/blkdev.h4
-rw-r--r--xen-2.4.16/include/xeno/sched.h1
4 files changed, 117 insertions, 72 deletions
diff --git a/xen-2.4.16/common/domain.c b/xen-2.4.16/common/domain.c
index 4a9a8a8359..5e862ada6d 100644
--- a/xen-2.4.16/common/domain.c
+++ b/xen-2.4.16/common/domain.c
@@ -15,6 +15,7 @@
#include <asm/flushtlb.h>
#include <asm/msr.h>
#include <xeno/multiboot.h>
+#include <xeno/blkdev.h>
#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)
#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)
@@ -48,10 +49,7 @@ struct task_struct *do_newdomain(unsigned int dom_id, unsigned int cpu)
memset(p->shared_info, 0, PAGE_SIZE);
SHARE_PFN_WITH_DOMAIN(virt_to_page(p->shared_info), dom_id);
- if ( sizeof(*p->blk_ring_base) > PAGE_SIZE ) BUG();
- p->blk_ring_base = (blk_ring_t *)get_free_page(GFP_KERNEL);
- memset(p->blk_ring_base, 0, PAGE_SIZE);
- SHARE_PFN_WITH_DOMAIN(virt_to_page(p->blk_ring_base), dom_id);
+ init_blkdev_info(p);
SET_GDT_ENTRIES(p, DEFAULT_GDT_ENTRIES);
SET_GDT_ADDRESS(p, DEFAULT_GDT_ADDRESS);
@@ -216,8 +214,7 @@ void release_task(struct task_struct *p)
}
if ( p->mm.perdomain_pt ) free_page((unsigned long)p->mm.perdomain_pt);
- UNSHARE_PFN(virt_to_page(p->blk_ring_base));
- free_page((unsigned long)p->blk_ring_base);
+ destroy_blkdev_info(p);
UNSHARE_PFN(virt_to_page(p->shared_info));
free_page((unsigned long)p->shared_info);
diff --git a/xen-2.4.16/drivers/block/xen_block.c b/xen-2.4.16/drivers/block/xen_block.c
index 4663524910..3124500ccc 100644
--- a/xen-2.4.16/drivers/block/xen_block.c
+++ b/xen-2.4.16/drivers/block/xen_block.c
@@ -19,30 +19,67 @@
#define XEN_BLK_DEBUG 0
#define XEN_BLK_DEBUG_LEVEL KERN_ALERT
-/*
- * KAF XXX: the current state of play with blk_requests.
- *
- * The following infrastructure is really here for future use.
- * blk_requests are currently not used by any mechanism, but eventually
- * pending blk_requests will go into an IO scheduler. This entry point
- * will go where we currently increment 'nr_pending'. The scheduler will
- * refuse admission of a blk_request if it is already full.
- */
typedef struct blk_request {
- struct list_head queue;
- struct buffer_head *bh;
- blk_ring_req_entry_t *request;
- struct task_struct *domain; /* requesting domain */
+ struct buffer_head *bh;
+ void *id;
+ struct task_struct *domain;
} blk_request_t;
-#define MAX_PENDING_REQS 256 /* very arbitrary */
+#define MAX_PENDING_REQS 32
+#define BATCH_PER_DOMAIN 8
static kmem_cache_t *blk_request_cachep;
static atomic_t nr_pending;
-static int pending_work; /* Bitmask: which domains have work for us? */
-static long do_block_io_op_domain (struct task_struct* task);
-static int dispatch_rw_block_io (int index);
-static int dispatch_probe_block_io (int index);
-static int dispatch_debug_block_io (int index);
+static int do_block_io_op_domain(struct task_struct* task, int max_to_do);
+static int dispatch_rw_block_io(int index);
+static int dispatch_probe_block_io(int index);
+static int dispatch_debug_block_io(int index);
+
+static spinlock_t io_schedule_lock;
+static struct list_head io_schedule_list;
+
+static int on_blkdev_list(struct task_struct *p)
+{
+ return p->blkdev_list.next != NULL;
+}
+
+static void remove_from_blkdev_list(struct task_struct *p)
+{
+ list_del(&p->blkdev_list);
+ p->blkdev_list.next = NULL;
+}
+
+static void add_to_blkdev_list(struct task_struct *p)
+{
+ list_add(&p->blkdev_list, &io_schedule_list);
+}
+
+static void add_to_blkdev_list_tail(struct task_struct *p)
+{
+ list_add_tail(&p->blkdev_list, &io_schedule_list);
+}
+
+static void io_schedule(void)
+{
+ struct task_struct *p;
+ struct list_head *ent;
+
+ while ( (atomic_read(&nr_pending) < (MAX_PENDING_REQS / 2)) &&
+ !list_empty(&io_schedule_list) &&
+ spin_trylock(&io_schedule_lock) )
+ {
+ while ( (atomic_read(&nr_pending) < MAX_PENDING_REQS) &&
+ !list_empty(&io_schedule_list) )
+ {
+ ent = io_schedule_list.next;
+ p = list_entry(ent, struct task_struct, blkdev_list);
+ remove_from_blkdev_list(p);
+ if ( do_block_io_op_domain(p, BATCH_PER_DOMAIN) )
+ add_to_blkdev_list_tail(p);
+ }
+ spin_unlock(&io_schedule_lock);
+ }
+}
+
/*
* end_block_io_op:
@@ -58,7 +95,6 @@ void end_block_io_op(struct buffer_head * bh)
struct task_struct *p;
int position = 0;
blk_ring_t *blk_ring;
- int loop;
if (XEN_BLK_DEBUG)
printk(XEN_BLK_DEBUG_LEVEL "XEN end_block_io_op, bh: %lx\n",
@@ -74,7 +110,7 @@ void end_block_io_op(struct buffer_head * bh)
spin_lock_irqsave(&p->blk_ring_lock, flags);
blk_ring = p->blk_ring_base;
position = blk_ring->resp_prod;
- blk_ring->resp_ring[position].id = blk_request->request->id;
+ blk_ring->resp_ring[position].id = blk_request->id;
blk_ring->resp_ring[position].status = 0;
blk_ring->resp_prod = BLK_RESP_RING_INC(blk_ring->resp_prod);
spin_unlock_irqrestore(&p->blk_ring_lock, flags);
@@ -87,27 +123,9 @@ void end_block_io_op(struct buffer_head * bh)
if ( blk_request->bh )
kfree(blk_request->bh);
kmem_cache_free(blk_request_cachep, blk_request);
-
- /* XXX SMH: below is ugly and dangerous -- fix */
- /*
- * now check if there is any pending work from any domain
- * that we were previously unable to process.
- */
- for ( loop = 0; loop < XEN_BLOCK_MAX_DOMAINS; loop++ )
- {
- int domain = pending_work & (1 << loop);
-
- if ( domain )
- {
- struct task_struct *mytask = current;
- while ( mytask->domain != loop )
- mytask = mytask->next_task;
-
- pending_work = pending_work & !(1 << loop);
- do_block_io_op_domain(mytask);
- }
- }
+ /* Get more work to do. */
+ io_schedule();
return;
@@ -125,21 +143,25 @@ void end_block_io_op(struct buffer_head * bh)
* Accept a block io request from a guest operating system.
* There is an entry in the hypervisor_call_table (xen/arch/i386/entry.S).
*/
-
-long do_block_io_op (void)
+long do_block_io_op(void)
{
- return do_block_io_op_domain(current);
+ if ( !on_blkdev_list(current) )
+ {
+ spin_lock_irq(&io_schedule_lock);
+ add_to_blkdev_list_tail(current);
+ spin_unlock_irq(&io_schedule_lock);
+ }
+
+ io_schedule();
+
+ return 0L;
}
-/*
- * do_block_io_op_domain:
- * Handle the requests for a particular domain
- */
-static long do_block_io_op_domain (struct task_struct* task)
+static int do_block_io_op_domain(struct task_struct* task, int max_to_do)
{
blk_ring_t *blk_ring = task->blk_ring_base;
- int loop, status;
+ int loop, status = 0;
if (XEN_BLK_DEBUG)
printk(XEN_BLK_DEBUG_LEVEL "XEN do_block_io_op %d %d\n",
@@ -151,6 +173,8 @@ static long do_block_io_op_domain (struct task_struct* task)
{
status = 1;
+ if ( max_to_do-- == 0 ) break;
+
switch (blk_ring->req_ring[loop].operation) {
case XEN_BLOCK_READ:
@@ -172,20 +196,11 @@ static long do_block_io_op_domain (struct task_struct* task)
BUG();
}
-
- if (status) {
- /*
- ** Unable to successfully issue / complete command, maybe because
- ** another resource (e.g. disk request buffers) is unavailable.
- ** stop removing items from the communications ring and try later
- */
- pending_work = pending_work | (1 << task->domain);
- break;
- }
+ if ( status ) break;
}
blk_ring->req_cons = loop;
- return 0L;
+ return status;
}
@@ -284,7 +299,7 @@ static int dispatch_rw_block_io (int index)
}
/* save meta data about request */
- blk_request->request = &blk_ring->req_ring[index];
+ blk_request->id = blk_ring->req_ring[index].id;
blk_request->bh = bh;
blk_request->domain = current;
@@ -304,16 +319,44 @@ static void dump_blockq(u_char key, void *dev_id, struct pt_regs *regs)
}
+/* Start-of-day initialisation for a new domain. */
+void init_blkdev_info(struct task_struct *p)
+{
+ if ( sizeof(*p->blk_ring_base) > PAGE_SIZE ) BUG();
+ p->blk_ring_base = (blk_ring_t *)get_free_page(GFP_KERNEL);
+ clear_page(p->blk_ring_base);
+ SHARE_PFN_WITH_DOMAIN(virt_to_page(p->blk_ring_base), p->domain);
+ p->blkdev_list.next = NULL;
+}
+
+
+/* End-of-day teardown for a domain. XXX Outstanding requests? */
+void destroy_blkdev_info(struct task_struct *p)
+{
+ unsigned long flags;
+ if ( on_blkdev_list(p) )
+ {
+ spin_lock_irqsave(&io_schedule_lock, flags);
+ if ( on_blkdev_list(p) ) remove_from_blkdev_list(p);
+ spin_unlock_irqrestore(&io_schedule_lock, flags);
+ }
+ UNSHARE_PFN(virt_to_page(p->blk_ring_base));
+ free_page((unsigned long)p->blk_ring_base);
+}
+
+
void initialize_block_io ()
{
+ atomic_set(&nr_pending, 0);
+
+ spin_lock_init(&io_schedule_lock);
+ INIT_LIST_HEAD(&io_schedule_list);
+
blk_request_cachep = kmem_cache_create(
"blk_request_cache", sizeof(blk_request_t),
0, SLAB_HWCACHE_ALIGN, NULL, NULL);
- add_key_handler('b', dump_blockq, "dump xen ide blkdev stats");
-
- pending_work = 0;
- atomic_set(&nr_pending, 0);
+ add_key_handler('b', dump_blockq, "dump xen ide blkdev stats");
}
diff --git a/xen-2.4.16/include/xeno/blkdev.h b/xen-2.4.16/include/xeno/blkdev.h
index 2a592f38fe..03ea926af2 100644
--- a/xen-2.4.16/include/xeno/blkdev.h
+++ b/xen-2.4.16/include/xeno/blkdev.h
@@ -6,6 +6,7 @@
#include <asm/bitops.h>
#include <xeno/list.h>
#include <xeno/kdev_t.h>
+#include <xeno/sched.h>
/* Some defines from fs.h that may actually be useful to the blkdev layer. */
#define READ 0
@@ -14,6 +15,9 @@
#define BLOCK_SIZE_BITS 10
#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
+extern void init_blkdev_info(struct task_struct *);
+extern void destroy_blkdev_info(struct task_struct *);
+
extern int unregister_blkdev(unsigned int, const char *);
extern int invalidate_device(kdev_t, int);
extern int check_disk_change(kdev_t);
diff --git a/xen-2.4.16/include/xeno/sched.h b/xen-2.4.16/include/xeno/sched.h
index a02c28ff22..b1cd749e99 100644
--- a/xen-2.4.16/include/xeno/sched.h
+++ b/xen-2.4.16/include/xeno/sched.h
@@ -76,6 +76,7 @@ struct task_struct {
/* Block I/O */
blk_ring_t *blk_ring_base;
+ struct list_head blkdev_list;
spinlock_t blk_ring_lock;
int has_cpu, policy, counter;