From ad4bdd4addaf17d33a96df0c5ddeb97b2c7f6657 Mon Sep 17 00:00:00 2001
From: "kaf24@labyrinth.cl.cam.ac.uk" <kaf24@labyrinth.cl.cam.ac.uk>
Date: Thu, 20 Feb 2003 11:04:29 +0000
Subject: bitkeeper revision 1.74 (3e54b63dw5kX3U_MQzXBiMsEyQdHJQ)

sched.h, blkdev.h, xen_block.c, domain.c:
  Beginnings of cheesy IO scheduling.
---
 xen-2.4.16/common/domain.c           |   9 +-
 xen-2.4.16/drivers/block/xen_block.c | 175 ++++++++++++++++++++++-------------
 xen-2.4.16/include/xeno/blkdev.h     |   4 +
 xen-2.4.16/include/xeno/sched.h      |   1 +
 4 files changed, 117 insertions(+), 72 deletions(-)

(limited to 'xen-2.4.16')

diff --git a/xen-2.4.16/common/domain.c b/xen-2.4.16/common/domain.c
index 4a9a8a8359..5e862ada6d 100644
--- a/xen-2.4.16/common/domain.c
+++ b/xen-2.4.16/common/domain.c
@@ -15,6 +15,7 @@
 #include <asm/flushtlb.h>
 #include <asm/msr.h>
 #include <xeno/multiboot.h>
+#include <xeno/blkdev.h>
 
 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)
 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)
@@ -48,10 +49,7 @@ struct task_struct *do_newdomain(unsigned int dom_id, unsigned int cpu)
     memset(p->shared_info, 0, PAGE_SIZE);
     SHARE_PFN_WITH_DOMAIN(virt_to_page(p->shared_info), dom_id);
 
-    if ( sizeof(*p->blk_ring_base) > PAGE_SIZE ) BUG();
-    p->blk_ring_base = (blk_ring_t *)get_free_page(GFP_KERNEL);
-    memset(p->blk_ring_base, 0, PAGE_SIZE);
-    SHARE_PFN_WITH_DOMAIN(virt_to_page(p->blk_ring_base), dom_id);
+    init_blkdev_info(p);
 
     SET_GDT_ENTRIES(p, DEFAULT_GDT_ENTRIES);
     SET_GDT_ADDRESS(p, DEFAULT_GDT_ADDRESS);
@@ -216,8 +214,7 @@ void release_task(struct task_struct *p)
     }
     if ( p->mm.perdomain_pt ) free_page((unsigned long)p->mm.perdomain_pt);
 
-    UNSHARE_PFN(virt_to_page(p->blk_ring_base));
-    free_page((unsigned long)p->blk_ring_base);
+    destroy_blkdev_info(p);
 
     UNSHARE_PFN(virt_to_page(p->shared_info));
     free_page((unsigned long)p->shared_info);
diff --git a/xen-2.4.16/drivers/block/xen_block.c b/xen-2.4.16/drivers/block/xen_block.c
index 4663524910..3124500ccc 100644
--- a/xen-2.4.16/drivers/block/xen_block.c
+++ b/xen-2.4.16/drivers/block/xen_block.c
@@ -19,30 +19,67 @@
 #define XEN_BLK_DEBUG 0
 #define XEN_BLK_DEBUG_LEVEL KERN_ALERT
 
-/*
- * KAF XXX: the current state of play with blk_requests.
- * 
- * The following infrastructure is really here for future use.
- * blk_requests are currently not used by any mechanism, but eventually
- * pending blk_requests will go into an IO scheduler. This entry point
- * will go where we currently increment 'nr_pending'. The scheduler will
- * refuse admission of a blk_request if it is already full.
- */
 typedef struct blk_request {
-  struct list_head queue;
-  struct buffer_head *bh;
-  blk_ring_req_entry_t *request;
-  struct task_struct *domain;                /* requesting domain */
+    struct buffer_head *bh;
+    void               *id;
+    struct task_struct *domain;
 } blk_request_t;
-#define MAX_PENDING_REQS 256                 /* very arbitrary */
+#define MAX_PENDING_REQS 32
+#define BATCH_PER_DOMAIN 8
 static kmem_cache_t *blk_request_cachep;
 static atomic_t nr_pending;
-static int pending_work; /* Bitmask: which domains have work for us? */
 
-static long do_block_io_op_domain (struct task_struct* task);
-static int dispatch_rw_block_io (int index);
-static int dispatch_probe_block_io (int index);
-static int dispatch_debug_block_io (int index);
+static int do_block_io_op_domain(struct task_struct* task, int max_to_do);
+static int dispatch_rw_block_io(int index);
+static int dispatch_probe_block_io(int index);
+static int dispatch_debug_block_io(int index);
+
+static spinlock_t io_schedule_lock;
+static struct list_head io_schedule_list;
+
+static int on_blkdev_list(struct task_struct *p)
+{
+    return p->blkdev_list.next != NULL;
+}
+
+static void remove_from_blkdev_list(struct task_struct *p)
+{
+    list_del(&p->blkdev_list);
+    p->blkdev_list.next = NULL;
+}
+
+static void add_to_blkdev_list(struct task_struct *p)
+{
+    list_add(&p->blkdev_list, &io_schedule_list);
+}
+
+static void add_to_blkdev_list_tail(struct task_struct *p)
+{
+    list_add_tail(&p->blkdev_list, &io_schedule_list);
+}
+
+static void io_schedule(void)
+{
+    struct task_struct *p;
+    struct list_head *ent;
+
+    while ( (atomic_read(&nr_pending) < (MAX_PENDING_REQS / 2)) &&
+            !list_empty(&io_schedule_list) &&
+            spin_trylock(&io_schedule_lock) )
+    {
+        while ( (atomic_read(&nr_pending) < MAX_PENDING_REQS) &&
+                !list_empty(&io_schedule_list) )
+        {
+            ent = io_schedule_list.next;
+            p = list_entry(ent, struct task_struct, blkdev_list);
+            remove_from_blkdev_list(p);
+            if ( do_block_io_op_domain(p, BATCH_PER_DOMAIN) )
+                add_to_blkdev_list_tail(p);
+        }
+        spin_unlock(&io_schedule_lock);
+    }
+}
+
 
 /*
  * end_block_io_op:
@@ -58,7 +95,6 @@ void end_block_io_op(struct buffer_head * bh)
     struct task_struct *p;
     int position = 0;
     blk_ring_t *blk_ring;
-    int loop;
 
     if (XEN_BLK_DEBUG)  
 	printk(XEN_BLK_DEBUG_LEVEL "XEN end_block_io_op,  bh: %lx\n",
@@ -74,7 +110,7 @@ void end_block_io_op(struct buffer_head * bh)
     spin_lock_irqsave(&p->blk_ring_lock, flags);
     blk_ring = p->blk_ring_base;
     position = blk_ring->resp_prod;
-    blk_ring->resp_ring[position].id     = blk_request->request->id;
+    blk_ring->resp_ring[position].id     = blk_request->id;
     blk_ring->resp_ring[position].status = 0;
     blk_ring->resp_prod = BLK_RESP_RING_INC(blk_ring->resp_prod);
     spin_unlock_irqrestore(&p->blk_ring_lock, flags);
@@ -87,27 +123,9 @@ void end_block_io_op(struct buffer_head * bh)
     if ( blk_request->bh ) 
         kfree(blk_request->bh);     
     kmem_cache_free(blk_request_cachep, blk_request);
-    
-    /* XXX SMH: below is ugly and dangerous -- fix */
-    /*
-     * now check if there is any pending work from any domain
-     * that we were previously unable to process.
-     */
-    for ( loop = 0; loop < XEN_BLOCK_MAX_DOMAINS; loop++ )
-    {
-	int domain = pending_work & (1 << loop);
-
-	if ( domain ) 
-        {
-	    struct task_struct *mytask = current;
 
-	    while ( mytask->domain != loop )
-		mytask = mytask->next_task;
-
-	    pending_work = pending_work & !(1 << loop);
-	    do_block_io_op_domain(mytask);
-	}
-    }
+    /* Get more work to do. */
+    io_schedule();
 
     return;
 
@@ -125,21 +143,25 @@ void end_block_io_op(struct buffer_head * bh)
  *  Accept a block io request from a guest operating system.
  *  There is an entry in the hypervisor_call_table (xen/arch/i386/entry.S).
  */
-
-long do_block_io_op (void)
+long do_block_io_op(void)
 {
-    return do_block_io_op_domain(current);
+    if ( !on_blkdev_list(current) )
+    {
+        spin_lock_irq(&io_schedule_lock);
+        add_to_blkdev_list_tail(current);
+        spin_unlock_irq(&io_schedule_lock);
+    }
+
+    io_schedule();
+
+    return 0L;
 }
 
 
-/*
- * do_block_io_op_domain:
- *  Handle the requests for a particular domain
- */
-static long do_block_io_op_domain (struct task_struct* task)
+static int do_block_io_op_domain(struct task_struct* task, int max_to_do)
 {
     blk_ring_t *blk_ring = task->blk_ring_base;
-    int loop, status;
+    int loop, status = 0;
 
     if (XEN_BLK_DEBUG)  
 	printk(XEN_BLK_DEBUG_LEVEL "XEN do_block_io_op %d %d\n",
@@ -151,6 +173,8 @@ static long do_block_io_op_domain (struct task_struct* task)
     {
 	status = 1;
 
+        if ( max_to_do-- == 0 ) break;
+        
 	switch (blk_ring->req_ring[loop].operation) {
 
 	case XEN_BLOCK_READ:
@@ -172,20 +196,11 @@ static long do_block_io_op_domain (struct task_struct* task)
 	    BUG();
 	}
 
-
-	if (status) {
-	    /* 
-	    ** Unable to successfully issue / complete command, maybe because
-	    ** another resource (e.g. disk request buffers) is unavailable.
-	    ** stop removing items from the communications ring and try later 
-	    */
-	    pending_work = pending_work | (1 << task->domain);
-	    break;
-	}
+	if ( status ) break;
     }
 
     blk_ring->req_cons = loop;
-    return 0L;
+    return status;
 }
 
 
@@ -284,7 +299,7 @@ static int dispatch_rw_block_io (int index)
     }
 
     /* save meta data about request */
-    blk_request->request = &blk_ring->req_ring[index];
+    blk_request->id     = blk_ring->req_ring[index].id;
     blk_request->bh     = bh;
     blk_request->domain = current; 
     
@@ -304,16 +319,44 @@ static void dump_blockq(u_char key, void *dev_id, struct pt_regs *regs)
 }
 
 
+/* Start-of-day initialisation for a new domain. */
+void init_blkdev_info(struct task_struct *p)
+{
+    if ( sizeof(*p->blk_ring_base) > PAGE_SIZE ) BUG();
+    p->blk_ring_base = (blk_ring_t *)get_free_page(GFP_KERNEL);
+    clear_page(p->blk_ring_base);
+    SHARE_PFN_WITH_DOMAIN(virt_to_page(p->blk_ring_base), p->domain);
+    p->blkdev_list.next = NULL;
+}
+
+
+/* End-of-day teardown for a domain. XXX Outstanding requests? */
+void destroy_blkdev_info(struct task_struct *p)
+{
+    unsigned long flags;
+    if ( on_blkdev_list(p) )
+    {
+        spin_lock_irqsave(&io_schedule_lock, flags);
+        if ( on_blkdev_list(p) ) remove_from_blkdev_list(p);
+        spin_unlock_irqrestore(&io_schedule_lock, flags);
+    }
+    UNSHARE_PFN(virt_to_page(p->blk_ring_base));
+    free_page((unsigned long)p->blk_ring_base);
+}
+
+
 void initialize_block_io ()
 {
+    atomic_set(&nr_pending, 0);
+
+    spin_lock_init(&io_schedule_lock);
+    INIT_LIST_HEAD(&io_schedule_list);
+
     blk_request_cachep = kmem_cache_create(
         "blk_request_cache", sizeof(blk_request_t),
         0, SLAB_HWCACHE_ALIGN, NULL, NULL);
     
-    add_key_handler('b', dump_blockq, "dump xen ide blkdev stats"); 
-    
-    pending_work = 0;
-    atomic_set(&nr_pending, 0);
+    add_key_handler('b', dump_blockq, "dump xen ide blkdev stats");     
 }
 
 
diff --git a/xen-2.4.16/include/xeno/blkdev.h b/xen-2.4.16/include/xeno/blkdev.h
index 2a592f38fe..03ea926af2 100644
--- a/xen-2.4.16/include/xeno/blkdev.h
+++ b/xen-2.4.16/include/xeno/blkdev.h
@@ -6,6 +6,7 @@
 #include <asm/bitops.h>
 #include <xeno/list.h>
 #include <xeno/kdev_t.h>
+#include <xeno/sched.h>
 
 /* Some defines from fs.h that may actually be useful to the blkdev layer. */
 #define READ 0
@@ -14,6 +15,9 @@
 #define BLOCK_SIZE_BITS 10
 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
 
+extern void init_blkdev_info(struct task_struct *);
+extern void destroy_blkdev_info(struct task_struct *);
+
 extern int unregister_blkdev(unsigned int, const char *);
 extern int invalidate_device(kdev_t, int);
 extern int check_disk_change(kdev_t);
diff --git a/xen-2.4.16/include/xeno/sched.h b/xen-2.4.16/include/xeno/sched.h
index a02c28ff22..b1cd749e99 100644
--- a/xen-2.4.16/include/xeno/sched.h
+++ b/xen-2.4.16/include/xeno/sched.h
@@ -76,6 +76,7 @@ struct task_struct {
 
     /* Block I/O */
     blk_ring_t *blk_ring_base;
+    struct list_head blkdev_list;
     spinlock_t blk_ring_lock;
 
     int has_cpu, policy, counter;
-- 
cgit v1.2.3