From a16eb6b1a16f60f8616b37996d6da32fcdcfecec Mon Sep 17 00:00:00 2001
From: "rn@wyvis.research.intel-research.net"
 <rn@wyvis.research.intel-research.net>
Date: Fri, 14 Mar 2003 15:43:32 +0000
Subject: bitkeeper revision 1.124 (3e71f8a4QvveKwitZNAJi1H3BJpPEQ)

ac_timer.c:
  rewrite of do timer/add_timer + perfcounters
apic.c:
  added perfcounter and try to disable APIC when no timeout value is zero.
irq.c:
  count interrupts and cycles spent in them
sched.h:
  added fields for BVT
schedule.c:
  BVT without warping
keyhandler.c:
  added handler for dumping run queues
  moved handler for ac_timers here
.del-dom0_ops.h~f77c7a14cfa618f8:
  Delete: tools/domain_builder/dom0_ops.h
---
 .rootkeys                       |   1 -
 tools/domain_builder/dom0_ops.h |  81 --------
 xen/arch/i386/apic.c            |   8 +
 xen/arch/i386/irq.c             |   9 +
 xen/common/ac_timer.c           | 245 ++++++++++++----------
 xen/common/keyhandler.c         |  52 ++---
 xen/common/schedule.c           | 438 +++++++++++++++++++++++-----------------
 xen/include/xeno/sched.h        |  53 ++---
 8 files changed, 469 insertions(+), 418 deletions(-)
 delete mode 100644 tools/domain_builder/dom0_ops.h

diff --git a/.rootkeys b/.rootkeys
index da527ca68f..9bb57b3a37 100644
--- a/.rootkeys
+++ b/.rootkeys
@@ -182,7 +182,6 @@
 3e4d00468-FN2VDeEHo96zxrMHK_mA tools/domain_builder/Makefile
 3e4d0046SPau_y0sw2WLJz8QkqNoRA tools/domain_builder/README
 3e4d0046bbdH0GsI9J_1Eb4ZQHfIiQ tools/domain_builder/dom0_defs.h
-3e4d0046RgYCfGOw6qGz_7kYLMV2Vw tools/domain_builder/dom0_ops.h
 3e4d0046ouLij_CMN_j7-dUHZIBI_A tools/domain_builder/dom_builder.c
 3e4d0046EKs06fY0CWDEgZQcn7DYUg tools/domain_builder/dom_kill.c
 3e4d0046aPbGiRTtdWxqY5b3ytWurA tools/domain_builder/hypervisor_defs.h
diff --git a/tools/domain_builder/dom0_ops.h b/tools/domain_builder/dom0_ops.h
deleted file mode 100644
index 6c60a93ff6..0000000000
--- a/tools/domain_builder/dom0_ops.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/******************************************************************************
- * dom0_ops.h
- * 
- * Process command requests from domain-0 guest OS.
- * 
- * Copyright (c) 2002, K A Fraser, B Dragovic
- */
-
-#define DOM0_NEWDOMAIN   0
-#define DOM0_KILLDOMAIN  1
-#define DOM0_GETMEMLIST  2
-#define DOM0_STARTDOM    4
-#define MAP_DOM_MEM      6 /* Not passed down to Xen */
-#define DO_PGUPDATES     7 /* Not passed down to Xen */
-#define MAX_CMD          8
-
-#define MAX_CMD_LEN     256
-
-typedef struct dom0_newdomain_st
-{
-    unsigned int domain;
-    unsigned int memory_kb;
-    unsigned int num_vifs;  // temporary
-    unsigned long pg_head;  // return parameter
-} dom0_newdomain_t;
-
-typedef struct dom0_killdomain_st
-{
-    unsigned int domain;
-    int          force;
-} dom0_killdomain_t;
-
-typedef struct dom0_getmemlist_st
-{
-    unsigned long start_pfn;
-    unsigned long num_pfns;
-    void *buffer;
-} dom0_getmemlist_t;
-
-/* This is entirely processed by XenoLinux */
-typedef struct dom_mem 
-{
-    unsigned int domain;
-    unsigned long vaddr;
-    unsigned long start_pfn;
-    int tot_pages;
-} dom_mem_t;
-
-/* This is entirely processed by XenoLinux */
-typedef struct dom_pgupdate
-{
-    unsigned long pgt_update_arr;
-    unsigned long num_pgt_updates;
-} dom_pgupdate_t;
-
-typedef struct domain_launch
-{
-    unsigned int domain;
-    unsigned long l2_pgt_addr;
-    unsigned long virt_load_addr;
-    unsigned long virt_shinfo_addr;
-    unsigned long virt_startinfo_addr;
-    unsigned int num_vifs;
-    char cmd_line[MAX_CMD_LEN];
-} dom_meminfo_t;
-
-typedef struct dom0_op_st
-{
-    unsigned long cmd;
-    union
-    {
-        dom0_newdomain_t newdomain;
-        dom0_killdomain_t killdomain;
-        dom0_getmemlist_t getmemlist;
-        dom_mem_t dommem;
-        dom_pgupdate_t pgupdate;
-        dom_meminfo_t meminfo;
-    }
-    u;
-} dom0_op_t;
-
diff --git a/xen/arch/i386/apic.c b/xen/arch/i386/apic.c
index 865a279d8c..0acf7067c3 100644
--- a/xen/arch/i386/apic.c
+++ b/xen/arch/i386/apic.c
@@ -659,6 +659,13 @@ int reprogram_ac_timer(s_time_t timeout)
     s_time_t	expire;
     u64			apic_tmict;
 
+    if (timeout  == 0) {
+        /* XXX RN: not sure if this disables it or cause interruptto 
+         * go off imediately */
+        apic_tmict = 0;
+        goto reprogram;
+    }
+
     now = NOW();
     expire = timeout - now;	/* value from now */
 
@@ -680,6 +687,7 @@ int reprogram_ac_timer(s_time_t timeout)
         return 0;
     }
 
+ reprogram:
     /* programm timer */
     apic_write(APIC_TMICT, (unsigned long)apic_tmict);
 
diff --git a/xen/arch/i386/irq.c b/xen/arch/i386/irq.c
index e58fb8f2ad..312cfe7970 100644
--- a/xen/arch/i386/irq.c
+++ b/xen/arch/i386/irq.c
@@ -36,6 +36,7 @@
 #include <asm/pgalloc.h>
 #include <xeno/delay.h>
 
+#include <xeno/perfc.h>
 
 /*
  * Linux has a controller-independent x86 interrupt architecture.
@@ -469,6 +470,11 @@ asmlinkage unsigned int do_IRQ(struct pt_regs regs)
     struct irqaction * action;
     unsigned int status;
 
+    u32     cc_start, cc_end;
+
+    perfc_incra(irqs, cpu);
+    rdtscl(cc_start);
+
     spin_lock(&desc->lock);
     desc->handler->ack(irq);
     /*
@@ -530,6 +536,9 @@ asmlinkage unsigned int do_IRQ(struct pt_regs regs)
     if (softirq_pending(cpu))
         do_softirq();
 
+    rdtscl(cc_end);
+    perfc_adda(irq_time, cpu, cc_end - cc_start);
+
     return 1;
 }
 
diff --git a/xen/common/ac_timer.c b/xen/common/ac_timer.c
index 9bb5d7e301..73ac893e08 100644
--- a/xen/common/ac_timer.c
+++ b/xen/common/ac_timer.c
@@ -23,9 +23,9 @@
 #include <xeno/errno.h>
 #include <xeno/sched.h>
 #include <xeno/lib.h>
-#include <xeno/config.h>
 #include <xeno/smp.h>
-#include <xeno/init.h>
+
+#include <xeno/perfc.h>
 
 #include <xeno/time.h>
 #include <xeno/ac_timer.h>
@@ -34,20 +34,16 @@
 #include <asm/system.h>
 #include <asm/desc.h>
 
-
-#undef AC_TIMER_TRACE
-#undef AC_TIMER_STATS
-
 #ifdef AC_TIMER_TRACE
 #define TRC(_x) _x
 #else
 #define TRC(_x)
 #endif
 
-/*
+/*****************************************************************************
  * We pull handlers off the timer list this far in future,
  * rather than reprogramming the time hardware.
- */
+ *****************************************************************************/
 #define TIMER_SLOP (50*1000) /* ns */
 
 /* A timer list per CPU */
@@ -55,47 +51,35 @@ typedef struct ac_timers_st
 {
     spinlock_t lock;
     struct list_head timers;
-    struct ac_timer *prev, *curr;
+    s_time_t    max_diff;
 } __cacheline_aligned ac_timers_t;
 static ac_timers_t ac_timers[NR_CPUS];
 
-#ifdef AC_TIMER_STATS
-#define BUCKETS		1000
-#define MAX_STATS
-typedef struct act_stats_st
-{
-    u32 count;
-    u32 times[2*(BUCKETS)];
-} __cacheline_aligned act_stats_t;
-static act_stats_t act_stats[NR_CPUS];
-
-#endif
-
 /* local prototypes */
 static int  detach_ac_timer(struct ac_timer *timer);
-/*static void ac_timer_debug(unsigned long);*/
 
-/*
+
+/*****************************************************************************
  * add a timer.
  * return value:
  *  0: success
  *  1: failure, timer in the past or timeout value to small
  * -1: failure, timer uninitialised
  * fail
- */
+ *****************************************************************************/
 int add_ac_timer(struct ac_timer *timer)
 {
-    int 			 cpu = smp_processor_id();
-    unsigned long 	 flags;
-    s_time_t		 now;
+    int              cpu = smp_processor_id();
+    unsigned long    flags;
+    s_time_t         now;
 
     /* make sure timeout value is in the future */
-	
+    
     now = NOW();
-    if (timer->expires <= now) {	
+    if (timer->expires <= now) {    
         TRC(printk("ACT[%02d] add_ac_timer:now=0x%08X%08X>expire=0x%08X%08X\n",
-				   cpu, (u32)(now>>32), (u32)now,
-				   (u32)(timer->expires>>32), (u32)timer->expires));
+                   cpu, (u32)(now>>32), (u32)now,
+                   (u32)(timer->expires>>32), (u32)timer->expires));
         return 1;
     }
     spin_lock_irqsave(&ac_timers[cpu].lock, flags);
@@ -104,79 +88,89 @@ int add_ac_timer(struct ac_timer *timer)
      * reprogramm the timer
      */
     if (list_empty(&ac_timers[cpu].timers)) {
-        /* Reprogramm and add to head of list */
         if (!reprogram_ac_timer(timer->expires)) {
+            printk("ACT[%02d] add at head failed\n", cpu);
             spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
             return 1; /* failed */
         }
         list_add(&timer->timer_list, &ac_timers[cpu].timers);
     } else {
         struct list_head *pos;
-        struct ac_timer	 *t;
+        struct ac_timer  *t;
 
-		list_for_each(pos, &ac_timers[cpu].timers) {
-			t = list_entry(pos, struct ac_timer, timer_list);
-			if (t->expires > timer->expires)
+        list_for_each(pos, &ac_timers[cpu].timers) {
+            t = list_entry(pos, struct ac_timer, timer_list);
+            if (t->expires > timer->expires)
                 break;
-		}
-		list_add (&(timer->timer_list), pos->prev);
+        }
+        list_add (&(timer->timer_list), pos->prev);
 
-		if (timer->timer_list.prev == &ac_timers[cpu].timers) {
-			/* added at head */
+        if (timer->timer_list.prev == &ac_timers[cpu].timers) {
+            /* added at head */
             if (!reprogram_ac_timer(timer->expires)) {
-				detach_ac_timer(timer);
+                printk("ACT[%02d] add at head failed\n", cpu);
+                detach_ac_timer(timer);
                 spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
                 return 1; /* failed */
             }
-		}
+        }
     }
     spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
     return 0;
 }
 
-/*
- * remove a timer
+/*****************************************************************************
+ * detach a timer (no locking)
  * return values:
  *  0: success
  * -1: bogus timer
- */
+ *****************************************************************************/
 static int detach_ac_timer(struct ac_timer *timer)
 {  
-    TRC(int 			 cpu = smp_processor_id());
     TRC(printk("ACT  [%02d] detach(): \n", cpu));
     list_del(&timer->timer_list);
     timer->timer_list.next = NULL;
     return 0;
 }
 
-/*
+/*****************************************************************************
  * remove a timer
  * return values:
  *  0: success
  * -1: bogus timer
- */
+ *****************************************************************************/
 int rem_ac_timer(struct ac_timer *timer)
 {
-    int 		  cpu = smp_processor_id();
+    int           cpu = smp_processor_id();
     int           res = 0;
     unsigned long flags;
 
     TRC(printk("ACT  [%02d] remove(): timo=%lld \n", cpu, timer->expires));
-
     spin_lock_irqsave(&ac_timers[cpu].lock, flags);
-	if (!timer->timer_list.next == NULL)
-		res = detach_ac_timer(timer);	
+    if (timer->timer_list.next) {
+        res = detach_ac_timer(timer);
+
+        if (timer->timer_list.prev == &ac_timers[cpu].timers) {
+            /* just removed the head */
+            if (list_empty(&ac_timers[cpu].timers)) {
+                reprogram_ac_timer((s_time_t) 0);
+            }
+            /* XXX should actaully reprogramm APIC to new head */
+        }
+    } else
+        res = -1;
+
     spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
 
     return res;
 }
 
-/*
+/*****************************************************************************
  * modify a timer, i.e., set a new timeout value
  * return value:
  *  0: sucess
  * -1: error
- */
+ *****************************************************************************/
 int mod_ac_timer(struct ac_timer *timer, s_time_t new_time)
 {
     if (rem_ac_timer(timer) != 0)
@@ -187,69 +181,59 @@ int mod_ac_timer(struct ac_timer *timer, s_time_t new_time)
     return 0;
 }
 
-/*
+/*****************************************************************************
  * do_ac_timer
  * deal with timeouts and run the handlers
- */
+ *****************************************************************************/
 void do_ac_timer(void)
 {
-    int 			 cpu = smp_processor_id();
-    unsigned long 	 flags;
-    struct ac_timer	 *t;
+    int              cpu = smp_processor_id();
+    unsigned long    flags;
+    struct ac_timer  *t;
+    s_time_t diff, now = NOW();
+    long max;
 
     spin_lock_irqsave(&ac_timers[cpu].lock, flags);
 
  do_timer_again:
 
     TRC(printk("ACT  [%02d] do(): now=%lld\n", cpu, NOW()));
-		
-	/* Sanity: is the timer list empty? */
-    if ( list_empty(&ac_timers[cpu].timers) )
-        printk("ACT[%02d] do_ac_timer(): timer irq without timer\n", cpu);
-
-#ifdef AC_TIMER_STATS
-    {
-        s32	diff;
-        u32 i;
-        diff = ((s32)(NOW() - t->expires)) / 1000; /* delta in us */
-        if (diff < -BUCKETS)
-            diff = -BUCKETS;
-        else if (diff > BUCKETS)
-            diff = BUCKETS;
-        act_stats[cpu].times[diff+BUCKETS]++;
-        act_stats[cpu].count++;
-
-        if (act_stats[cpu].count >= 5000) {
-            printk("ACT Stats\n");
-			for (i=0; i < 2*BUCKETS; i++) {
-				if (act_stats[cpu].times[i] != 0)
-                    printk("ACT [%02d]: %3dus: %5d\n",
-                           cpu,i-BUCKETS, act_stats[cpu].times[i]);
-                act_stats[cpu].times[i]=0;
-            }
-            act_stats[cpu].count = 0;
-            printk("\n");
-        }
+        
+    /* Sanity: is the timer list empty? */
+    if ( list_empty(&ac_timers[cpu].timers) ) {
+        /*
+         * XXX RN: This shouldn't happen, but does! Two possibilities:
+         * - Race condition between removing and reseting APIC
+         * - setting an APIC timeout value of 0 causes an immediate
+         *   timer interrupt to fire.
+         * None of these should be critical!
+         */
+        spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
+        return;
     }
-#endif
 
     /* Handle all timeouts in the near future. */
     while ( !list_empty(&ac_timers[cpu].timers) )
     {
-        t = list_entry(ac_timers[cpu].timers.next, 
-                       struct ac_timer, timer_list);
+        t = list_entry(ac_timers[cpu].timers.next,struct ac_timer, timer_list);
         if ( t->expires > (NOW() + TIMER_SLOP) ) break;
+
+        /* do some stats */
+        diff = (now - t->expires);
+        if (diff > 0x7fffffff) diff =  0x7fffffff; /* THIS IS BAD! */
+        max = perfc_valuea(ac_timer_max, cpu);
+        if (diff > max) perfc_seta(ac_timer_max, cpu, diff);
+
         detach_ac_timer(t);
         spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
         if ( t->function != NULL ) t->function(t->data);
         spin_lock_irqsave(&ac_timers[cpu].lock, flags);
     }
-		
+        
     /* If list not empty then reprogram timer to new head of list */
     if ( !list_empty(&ac_timers[cpu].timers) )
     {
-        t = list_entry(ac_timers[cpu].timers.next, 
-                       struct ac_timer, timer_list);
+        t = list_entry(ac_timers[cpu].timers.next,struct ac_timer, timer_list);
         if ( t->expires > 0 )
         {
             TRC(printk("ACT  [%02d] do(): reprog timo=%lld\n",cpu,t->expires));
@@ -259,21 +243,23 @@ void do_ac_timer(void)
                 goto do_timer_again;
             }
         }
+    } else {
+        reprogram_ac_timer((s_time_t) 0);
     }
 
     spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
     TRC(printk("ACT  [%02d] do(): end\n", cpu));
 }
 
-/*
+/*****************************************************************************
  * debug dump_queue
  * arguments: queue head, name of queue
- */
+ *****************************************************************************/
 static void dump_tqueue(struct list_head *queue, char *name)
 {
     struct list_head *list;
     int loop = 0;
-    struct ac_timer	 *t;
+    struct ac_timer  *t;
 
     printk ("QUEUE %s %lx   n: %lx, p: %lx\n", name,  (unsigned long)queue,
             (unsigned long) queue->next, (unsigned long) queue->prev);
@@ -288,19 +274,21 @@ static void dump_tqueue(struct list_head *queue, char *name)
     return; 
 }
 
-
-static void dump_timerq(u_char key, void *dev_id, struct pt_regs *regs)
+void dump_timerq(u_char key, void *dev_id, struct pt_regs *regs)
 {
     u_long   flags; 
     s_time_t now = NOW();
+    int i;
 
-    printk("Dumping ac_timer queues for cpu 0: NOW=0x%08X%08X\n",
+    printk("Dumping ac_timer queues: NOW=0x%08X%08X\n",
            (u32)(now>>32), (u32)now); 
-	
-    spin_lock_irqsave(&ac_timers[0].lock, flags);
-    dump_tqueue(&ac_timers[0].timers, "ac_time"); 
-    spin_unlock_irqrestore(&ac_timers[0].lock, flags);
-    printk("\n");
+    for (i = 0; i < smp_num_cpus; i++) {
+        printk("CPU[%02d] ", i);
+        spin_lock_irqsave(&ac_timers[i].lock, flags);
+        dump_tqueue(&ac_timers[i].timers, "ac_time"); 
+        spin_unlock_irqrestore(&ac_timers[i].lock, flags);
+        printk("\n");
+    }
     return; 
 }
 
@@ -316,6 +304,51 @@ void __init ac_timer_init(void)
         INIT_LIST_HEAD(&ac_timers[i].timers);
         spin_lock_init(&ac_timers[i].lock);
     }
-
-    add_key_handler('a', dump_timerq, "dump ac_timer queues");
 }
+
+/*****************************************************************************
+ * GRAVEYARD
+ *****************************************************************************/
+
+#if 0
+
+#ifdef AC_TIMER_STATS
+#define BUCKETS     1000
+#define MAX_STATS
+typedef struct act_stats_st
+{
+    u32 count;
+    u32 times[2*(BUCKETS)];
+} __cacheline_aligned act_stats_t;
+static act_stats_t act_stats[NR_CPUS];
+
+#endif
+
+#ifdef AC_TIMER_STATS
+    {
+        XXX this is at the wrong place
+        s32 diff;
+        u32 i;
+        diff = ((s32)(NOW() - t->expires)) / 1000; /* delta in us */
+        if (diff < -BUCKETS)
+            diff = -BUCKETS;
+        else if (diff > BUCKETS)
+            diff = BUCKETS;
+        act_stats[cpu].times[diff+BUCKETS]++;
+        act_stats[cpu].count++;
+
+        if (act_stats[cpu].count >= 5000) {
+            printk("ACT Stats\n");
+            for (i=0; i < 2*BUCKETS; i++) {
+                if (act_stats[cpu].times[i] != 0)
+                    printk("ACT [%02d]: %3dus: %5d\n",
+                           cpu,i-BUCKETS, act_stats[cpu].times[i]);
+                act_stats[cpu].times[i]=0;
+            }
+            act_stats[cpu].count = 0;
+            printk("\n");
+        }
+    }
+#endif
+
+#endif /* 0 */
diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c
index 8bb4fecab0..12fd4e7105 100644
--- a/xen/common/keyhandler.c
+++ b/xen/common/keyhandler.c
@@ -19,18 +19,18 @@ void add_key_handler(u_char key, key_handler *handler, char *desc)
     char *str; 
 
     if(key_table[key].handler != NULL) 
-	printk("Warning: overwriting handler for key 0x%x\n", key); 
+    printk("Warning: overwriting handler for key 0x%x\n", key); 
 
     key_table[key].handler = handler; 
 
     str = key_table[key].desc; 
     for(i = 0; i < STR_MAX; i++) {
-	if(*desc) 
-	    *str++ = *desc++; 
-	else break; 
+    if(*desc) 
+        *str++ = *desc++; 
+    else break; 
     }
     if (i == STR_MAX) 
-	key_table[key].desc[STR_MAX-1] = '\0'; 
+    key_table[key].desc[STR_MAX-1] = '\0'; 
 
     return; 
 }
@@ -47,10 +47,10 @@ void show_handlers(u_char key, void *dev_id, struct pt_regs *regs)
 
     printk("'%c' pressed -> showing installed handlers\n", key); 
     for(i=0; i < KEY_MAX; i++) 
-	if(key_table[i].handler) 
-	    printk(" key '%c' (ascii '%02x') => %s\n", 
-			(i<33 || i>126)?(' '):(i),i,
-			key_table[i].desc);
+    if(key_table[i].handler) 
+        printk(" key '%c' (ascii '%02x') => %s\n", 
+            (i<33 || i>126)?(' '):(i),i,
+            key_table[i].desc);
     return; 
 }
 
@@ -94,36 +94,42 @@ void do_task_queues(u_char key, void *dev_id, struct pt_regs *regs)
     p = &idle0_task;
     do {
         printk("Xen: DOM %d, CPU %d [has=%c], state = %s, "
-	       "hyp_events = %08x\n", 
-	       p->domain, p->processor, p->has_cpu ? 'T':'F', 
-	       task_states[p->state], p->hyp_events); 
-	s = p->shared_info; 
-	if(!is_idle_task(p)) {
-	    printk("Guest: events = %08lx, event_enable = %08lx\n", 
-		   s->events, s->events_enable); 
-	    printk("Notifying guest...\n"); 
-	    set_bit(_EVENT_DEBUG, &s->events); 
-	}
+           "hyp_events = %08x\n", 
+           p->domain, p->processor, p->has_cpu ? 'T':'F', 
+           task_states[p->state], p->hyp_events); 
+    s = p->shared_info; 
+    if(!is_idle_task(p)) {
+        printk("Guest: events = %08lx, event_enable = %08lx\n", 
+           s->events, s->events_enable); 
+        printk("Notifying guest...\n"); 
+        set_bit(_EVENT_DEBUG, &s->events); 
+    }
     } while ( (p = p->next_task) != &idle0_task );
 
     read_unlock_irqrestore(&tasklist_lock, flags); 
 }
 
 
+extern void dump_timerq(u_char key, void *dev_id, struct pt_regs *regs);
+extern void dump_runq(u_char key, void *dev_id, struct pt_regs *regs);
+
+
 void initialize_keytable() 
 {
     int i; 
 
     /* first initialize key handler table */
     for(i = 0; i < KEY_MAX; i++) 
-	key_table[i].handler = (key_handler *)NULL; 
-	
+    key_table[i].handler = (key_handler *)NULL; 
+    
     /* setup own handlers */
+    add_key_handler('a', dump_timerq,    "dump ac_timer queues");
     add_key_handler('d', dump_registers, "dump registers"); 
-    add_key_handler('h', show_handlers, "show this message");
+    add_key_handler('h', show_handlers,  "show this message");
     add_key_handler('p', perfc_printall, "print performance counters"); 
     add_key_handler('q', do_task_queues, "dump task queues + guest state");
-    add_key_handler('R', halt_machine, "reboot machine ungracefully"); 
+    add_key_handler('r', dump_runq,      "dump run queue"); 
+    add_key_handler('R', halt_machine,   "reboot machine ungracefully"); 
     
     return; 
 }
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 2f4ba31c32..ce46069167 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -40,8 +40,8 @@
 #endif
 
 
-#define MCU			(s32)MICROSECS(100)		/* Minimum unit */
-#define CTX_ALLOW	(s32)MILLISECS(10)		/* context switch allowance */
+#define MCU         (s32)MICROSECS(100)     /* Minimum unit */
+static s32 ctx_allow=(s32)MILLISECS(10);    /* context switch allowance */
 
 /*****************************************************************************
  * per CPU data for the scheduler.
@@ -50,15 +50,15 @@ typedef struct schedule_data_st
 {
     spinlock_t          lock;           /* lock for protecting this */
     struct list_head    runqueue;       /* runqueue */
-    struct task_struct *prev, *curr;	/* dito */
-
-	long				svt;			/* system virtual time. per CPU??? */
-	struct ac_timer     s_timer;		/* scheduling timer  */
+    struct task_struct *prev, *curr;    /* previous and current task */
+    struct task_struct *idle;           /* idle task for this cpu */
+    u32                 svt;            /* system virtual time. per CPU??? */
+    struct ac_timer     s_timer;        /* scheduling timer  */
 
 } __cacheline_aligned schedule_data_t;
 schedule_data_t schedule_data[NR_CPUS];
 
-struct ac_timer     v_timer;		/* scheduling timer  */
+struct ac_timer     v_timer;        /* scheduling timer  */
 static void virt_timer(unsigned long foo);
 
 
@@ -68,7 +68,7 @@ static void virt_timer(unsigned long foo);
 /* add a task to the head of the runqueue */
 static inline void __add_to_runqueue_head(struct task_struct * p)
 {
-	
+    
     list_add(&p->run_list, &schedule_data[p->processor].runqueue);
 }
 /* add a task to the tail of the runqueue */
@@ -97,11 +97,19 @@ static inline int __task_on_runqueue(struct task_struct *p)
 ******************************************************************************/
 void sched_add_domain(struct task_struct *p) 
 {
-    p->state    = TASK_UNINTERRUPTIBLE;
-	/* set avt end evt to system virtual time */
-	p->avt		= schedule_data[p->processor].svt;
-	p->evt		= schedule_data[p->processor].svt;
-	/* RN: XXX BVT fill in other bits */
+    p->state       = TASK_UNINTERRUPTIBLE;
+    p->mcu_advance = 10;
+
+    if (p->domain == IDLE_DOMAIN_ID) {
+        p->avt = 0xffffffff;
+        p->evt = 0xffffffff;
+        schedule_data[p->processor].idle = p;
+    } else {
+        /* set avt end evt to system virtual time */
+        p->avt         = schedule_data[p->processor].svt;
+        p->evt         = schedule_data[p->processor].svt;
+        /* RN: XXX BVT fill in other bits */
+    }
 }
 
 void sched_rem_domain(struct task_struct *p) 
@@ -117,16 +125,20 @@ int wake_up(struct task_struct *p)
 {
     unsigned long flags;
     int ret = 0;
+
     spin_lock_irqsave(&schedule_data[p->processor].lock, flags);
+
     if ( __task_on_runqueue(p) ) goto out;
-    p->state = TASK_RUNNING;
 
-	/* set the BVT parameters */
-	if (p->avt < schedule_data[p->processor].svt)
-		p->avt = schedule_data[p->processor].svt;
-	p->evt = p->avt; /* RN: XXX BVT deal with warping here */
-	
+    p->state = TASK_RUNNING;
     __add_to_runqueue_head(p);
+
+    /* set the BVT parameters */
+    if (p->avt < schedule_data[p->processor].svt)
+        p->avt = schedule_data[p->processor].svt;
+
+    p->evt = p->avt; /* RN: XXX BVT deal with warping here */
+
     ret = 1;
 
  out:
@@ -134,30 +146,56 @@ int wake_up(struct task_struct *p)
     return ret;
 }
 
-/* RN: XXX turn this into do_halt() */
 /****************************************************************************
  * Domain requested scheduling operations
  ****************************************************************************/
 long do_sched_op(void)
 {
+    /* XXX implement proper */
     current->state = TASK_INTERRUPTIBLE;
     schedule();
     return 0;
 }
 
+/****************************************************************************
+ * Control the scheduler
+ ****************************************************************************/
+long sched_bvtctl(unsigned long c_allow)
+{
+    printk("sched: bvtctl %lu\n", c_allow);
+    ctx_allow = c_allow;
+    return 0;
+}
+
 /****************************************************************************
  * Adjust scheduling parameter for a given domain
  ****************************************************************************/
 long sched_adjdom(int dom, unsigned long mcu_adv, unsigned long warp, 
-				 unsigned long warpl, unsigned long warpu)
+                 unsigned long warpl, unsigned long warpu)
 {
-	printk("sched: adjdom %02d %lu %lu %lu %lu\n",
-		   dom, mcu_adv, warp, warpl, warpu);
-	return 0;
+    struct task_struct *p;
+
+    printk("sched: adjdom %02d %lu %lu %lu %lu\n",
+           dom, mcu_adv, warp, warpl, warpu);
+
+    p = find_domain_by_id(dom);
+    if ( p == NULL ) return -ESRCH;
+
+    spin_lock_irq(&schedule_data[p->processor].lock);   
+
+    p->mcu_advance = mcu_adv;
+
+    spin_unlock_irq(&schedule_data[p->processor].lock); 
+
+    return 0;
 }
 
 /****************************************************************************
  * cause a run through the scheduler when appropriate
+ * Appropriate is:
+ * - current task is idle task
+ * - new processes evt is lower than current one
+ * - the current task already ran for it's context switch allowance
  ****************************************************************************/
 void reschedule(struct task_struct *p)
 {
@@ -166,16 +204,20 @@ void reschedule(struct task_struct *p)
     unsigned long flags;
 
     if (p->has_cpu)
-		return;
+        return;
 
     spin_lock_irqsave(&schedule_data[cpu].lock, flags);
     curr = schedule_data[cpu].curr;
-    if (is_idle_task(curr)) {
+
+    if ( is_idle_task(curr) ||
+         (p->evt < curr->evt) ||
+         (curr->lastschd + ctx_allow >= NOW()) ) {
+        /* reschedule */
         set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
         spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
 #ifdef CONFIG_SMP
         if (cpu != smp_processor_id())
-			smp_send_event_check_cpu(cpu);
+            smp_send_event_check_cpu(cpu);
 #endif
     } else {
         spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
@@ -194,27 +236,26 @@ asmlinkage void schedule(void)
 {
     struct task_struct *prev, *next, *next_prime, *p;
     struct list_head   *tmp;
-    int 				this_cpu;
-	s_time_t			now;
-	s32					r_time;		/* time for new dom to run */
-	s32					ranfor;	    /* assume we never run longer than 2.1s! */
-	s32					mcus;
-	u32					next_evt, next_prime_evt;
-
-	perfc_incrc(sched_run1);
+    int                 this_cpu;
+    s_time_t            now;
+    s32                 r_time;     /* time for new dom to run */
+    s32                 ranfor;     /* assume we never run longer than 2.1s! */
+    s32                 mcus;
+    u32                 next_evt, next_prime_evt, min_avt;
+
+    perfc_incrc(sched_run1);
  need_resched_back:
-	perfc_incrc(sched_run2);
-
-	now = NOW();
-
-	/* remove timer  */
-	rem_ac_timer(&schedule_data[smp_processor_id()].s_timer);
+    perfc_incrc(sched_run2);
 
+    now = NOW();
     next = NULL;
     prev = current;
     this_cpu = prev->processor;
 
-	/*
+    /* remove timer  */
+    rem_ac_timer(&schedule_data[this_cpu].s_timer);
+
+    /*
      * deschedule the current domain
      */
 
@@ -223,95 +264,115 @@ asmlinkage void schedule(void)
     ASSERT(!in_interrupt());
     ASSERT(__task_on_runqueue(prev));
 
-	if (is_idle_task(prev)) 
-		goto deschedule_done;
+    if (is_idle_task(prev)) 
+        goto deschedule_done;
 
-	/* do some accounting */
-	ranfor = (s32)(now - prev->lastschd);
+    /* do some accounting */
+    ranfor = (s32)(now - prev->lastschd);
     ASSERT((ranfor>0));
-	prev->cpu_time += ranfor;
-	
-	/* calculate mcu and update avt */
-	mcus = ranfor/MCU;
-	if (ranfor % MCU) mcus ++;	/* always round up */
-	prev->avt += mcus * prev->mcu_advance;
-	prev->evt = prev->avt; /* RN: XXX BVT deal with warping here */
-
-	/* dequeue */
-	__del_from_runqueue(prev);
-	switch (prev->state) {
-	case TASK_INTERRUPTIBLE:
-		if (signal_pending(prev)) {
-			prev->state = TASK_RUNNING; /* but has events pending */
-			break;
-		}
-	case TASK_UNINTERRUPTIBLE:
-	case TASK_WAIT:
-	case TASK_DYING:
-	default:
-		/* done if not running. Else, continue */
-		goto deschedule_done;
-	case TASK_RUNNING:;
-	}
-
-	/* requeue */
-	__add_to_runqueue_tail(prev);
-	
+    prev->cpu_time += ranfor;
+    
+    /* calculate mcu and update avt */
+    mcus = ranfor/MCU;
+    if (ranfor % MCU) mcus ++;  /* always round up */
+    prev->avt += mcus * prev->mcu_advance;
+    prev->evt = prev->avt; /* RN: XXX BVT deal with warping here */
+
+    /* dequeue */
+    __del_from_runqueue(prev);
+    switch (prev->state) {
+    case TASK_INTERRUPTIBLE:
+        if (signal_pending(prev)) {
+            prev->state = TASK_RUNNING; /* but has events pending */
+            break;
+        }
+    case TASK_UNINTERRUPTIBLE:
+    case TASK_WAIT:
+    case TASK_DYING:
+    default:
+        /* done if not running. Else, continue */
+        goto deschedule_done;
+    case TASK_RUNNING:;
+    }
+
+    /* requeue */
+    __add_to_runqueue_tail(prev);
+    
 
  deschedule_done:
     clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events);
 
-	/*
+    /*
      * Pick a new domain
      */
 
-	/* we should at least have the idle task */
-	ASSERT(!list_empty(&schedule_data[smp_processor_id()].runqueue));
+    /* we should at least have the idle task */
+    ASSERT(!list_empty(&schedule_data[this_cpu].runqueue));
 
-	/*
+    /*
      * scan through the run queue and pick the task with the lowest evt
      * *and* the task the second lowest evt.
-	 * this code is O(n) but we expect n to be small.
+     * this code is O(n) but we expect n to be small.
      */
-	next       = NULL;
-	next_prime = NULL;
-
-	next_evt       = 0xffffffff;
-	next_prime_evt = 0xffffffff;
-
-	list_for_each(tmp, &schedule_data[smp_processor_id()].runqueue) {
-		p = list_entry(tmp, struct task_struct, run_list);
-		if (p->evt < next_evt) {
-			next_prime     = next;
-			next_prime_evt = next_evt;
-			next = p;
-			next_evt = p->evt;
-		}
-	}
-	ASSERT(next != NULL);	/* we should have at least the idle task */
-
-	if (next == NULL || is_idle_task(next)) {
-		next = &idle0_task;	/* to be sure */
-		r_time = CTX_ALLOW;
-		goto sched_done;
-	}
-
-	if (next_prime == NULL || is_idle_task(next_prime)) {
-		/* we have only one runable task besides the idle task */
-		r_time = CTX_ALLOW;		/* RN: XXX should be much larger */
-		goto sched_done;
-	}
-
-	/*
+    next       = schedule_data[this_cpu].idle;
+    next_prime = NULL;
+
+    next_evt       = 0xffffffff;
+    next_prime_evt = 0xffffffff;
+    min_avt        = 0xffffffff;    /* to calculate svt */
+
+
+    list_for_each(tmp, &schedule_data[this_cpu].runqueue) {
+        p = list_entry(tmp, struct task_struct, run_list);
+        if (p->evt < next_evt) {
+            next_prime     = next;
+            next_prime_evt = next_evt;
+            next = p;
+            next_evt = p->evt;
+        } else if (next_prime_evt == 0xffffffff) {
+            next_prime_evt = p->evt;
+            next_prime     = p;
+        } else if (p->evt < next_prime_evt) {
+            next_prime_evt = p->evt;
+            next_prime     = p;
+        }
+        /* determine system virtual time */
+        if (p->avt < min_avt)
+            min_avt = p->avt;
+    }
+    ASSERT(next != NULL);   /* we should have at least the idle task */
+
+    /* update system virtual time  */
+    if (min_avt != 0xffffffff) schedule_data[this_cpu].svt = min_avt;
+
+    if (is_idle_task(next)) {
+        r_time = ctx_allow;
+        goto sched_done;
+    }
+
+    if (next_prime == NULL || is_idle_task(next_prime)) {
+        /* we have only one runable task besides the idle task */
+        r_time = 10 * ctx_allow;     /* RN: random constant */
+        goto sched_done;
+    }
+
+    /*
      * if we are here we have two runable tasks.
-	 * work out how long 'next' can run till its evt is greater than
+     * work out how long 'next' can run till its evt is greater than
      * 'next_prime's evt. Taking context switch allowance into account.
      */
-	r_time = ((next_prime->evt - next->evt)/next->mcu_advance) + CTX_ALLOW;
+    ASSERT(next_prime->evt > next->evt);
+    r_time = ((next_prime->evt - next->evt)/next->mcu_advance) + ctx_allow;
 
  sched_done:
-	ASSERT(r_time != 0);
-	ASSERT(r_time > 0);
+    ASSERT(r_time != 0);
+    ASSERT(r_time > ctx_allow);
+
+    if ( (r_time==0) || (r_time < ctx_allow)) {
+        printk("[%02d]: %lx\n", this_cpu, r_time);
+        dump_rqueue(&schedule_data[this_cpu].runqueue, "foo");
+    }
+
 
     prev->has_cpu = 0;
     next->has_cpu = 1;
@@ -319,16 +380,16 @@ asmlinkage void schedule(void)
     schedule_data[this_cpu].prev = prev;
     schedule_data[this_cpu].curr = next;
 
-	next->lastschd = now;
+    next->lastschd = now;
 
-	/* reprogramm the timer */
+    /* reprogramm the timer */
  timer_redo:
-	schedule_data[this_cpu].s_timer.expires  = now + r_time;
-	if (add_ac_timer(&schedule_data[this_cpu].s_timer) == 1) {
-		printk("SCHED: Shit this shouldn't happen\n");
-		now = NOW();
-		goto timer_redo;
-	}
+    schedule_data[this_cpu].s_timer.expires  = now + r_time;
+    if (add_ac_timer(&schedule_data[this_cpu].s_timer) == 1) {
+        printk("SCHED[%02d]: Shit this shouldn't happen\n", this_cpu);
+        now = NOW();
+        goto timer_redo;
+    }
 
     spin_unlock_irq(&schedule_data[this_cpu].lock);
 
@@ -339,6 +400,8 @@ asmlinkage void schedule(void)
         goto same_process;
     }
 
+    perfc_incrc(sched_ctx);
+
     prepare_to_switch();
     switch_to(prev, next);
     prev = schedule_data[this_cpu].prev;
@@ -347,12 +410,12 @@ asmlinkage void schedule(void)
     if ( prev->state == TASK_DYING ) release_task(prev);
 
  same_process:
-	/* update the domains notion of time  */
+    /* update the domains notion of time  */
     update_dom_time(current->shared_info);
 
     if ( test_bit(_HYP_EVENT_NEED_RESCHED, &current->hyp_events) ) {
         goto need_resched_back;
-	}
+    }
     return;
 }
 
@@ -361,11 +424,11 @@ asmlinkage void schedule(void)
  */
 static void sched_timer(unsigned long foo)
 {
-    int 				cpu  = smp_processor_id();
+    int                 cpu  = smp_processor_id();
     struct task_struct *curr = schedule_data[cpu].curr;
-	/* cause a reschedule */
-	set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
-	perfc_incrc(sched_irq);
+    /* cause a reschedule */
+    set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
+    perfc_incrc(sched_irq);
 }
 
 /*
@@ -373,23 +436,23 @@ static void sched_timer(unsigned long foo)
  */
 static void virt_timer(unsigned long foo)
 {
-	unsigned long cpu_mask = 0;
-	struct task_struct *p;
-	s_time_t now;
-	int res;
-
-	/* send virtual timer interrupt */
-	read_lock(&tasklist_lock);
-	p = &idle0_task;
-	do {
-		if ( is_idle_task(p) ) continue;
-		cpu_mask |= mark_guest_event(p, _EVENT_TIMER);
-	}
-	while ( (p = p->next_task) != &idle0_task );
-	read_unlock(&tasklist_lock);
-	guest_event_notify(cpu_mask);
-
-	again:
+    unsigned long cpu_mask = 0;
+    struct task_struct *p;
+    s_time_t now;
+    int res;
+
+    /* send virtual timer interrupt */
+    read_lock(&tasklist_lock);
+    p = &idle0_task;
+    do {
+        if ( is_idle_task(p) ) continue;
+        cpu_mask |= mark_guest_event(p, _EVENT_TIMER);
+    }
+    while ( (p = p->next_task) != &idle0_task );
+    read_unlock(&tasklist_lock);
+    guest_event_notify(cpu_mask);
+
+    again:
     now = NOW();
     v_timer.expires  = now + MILLISECS(10);
     res=add_ac_timer(&v_timer);
@@ -412,14 +475,15 @@ void __init scheduler_init(void)
         spin_lock_init(&schedule_data[i].lock);
         schedule_data[i].prev = &idle0_task;
         schedule_data[i].curr = &idle0_task;
-		
+        
         /* a timer for each CPU  */
         init_ac_timer(&schedule_data[i].s_timer);
         schedule_data[i].s_timer.function = &sched_timer;
 
     }
-	init_ac_timer(&v_timer);
-	v_timer.function = &virt_timer;
+    schedule_data[0].idle = &idle0_task; /* idle on CPU 0 is special */
+    init_ac_timer(&v_timer);
+    v_timer.function = &virt_timer;
 }
 
 /*
@@ -427,46 +491,14 @@ void __init scheduler_init(void)
  * This has to be done *after* the timers, e.g., APICs, have been initialised
  */
 void schedulers_start(void) 
-{	
+{   
     printk("Start schedulers\n");
     __cli();
     sched_timer(0);
-	virt_timer(0);
+    virt_timer(0);
     smp_call_function((void *)sched_timer, NULL, 1, 1);
     __sti();
-
-	//add_key_handler('r', dump_run_queues, "dump run queues")
 }
-#if 0
-/****************************************************************************
- * Debugging functions
- ****************************************************************************/
-static void dump_run_queues(u_char key, void *dev_id, struct pt_regs *regs) 
-{
-    u_long flags; 
-    struct task_struct *p; 
-    shared_info_t *s; 
-
-    printk("'%c' pressed -> dumping run queues\n", key); 
-    read_lock_irqsave(&tasklist_lock, flags); 
-    p = &idle0_task;
-    do {
-        printk("Xen: DOM %d, CPU %d [has=%c], state = %s, "
-	       "hyp_events = %08x\n", 
-	       p->domain, p->processor, p->has_cpu ? 'T':'F', 
-	       task_states[p->state], p->hyp_events); 
-	s = p->shared_info; 
-	if(!is_idle_task(p)) {
-	    printk("Guest: events = %08lx, event_enable = %08lx\n", 
-		   s->events, s->events_enable); 
-	    printk("Notifying guest...\n"); 
-	    set_bit(_EVENT_DEBUG, &s->events); 
-	}
-    } while ( (p = p->next_task) != &idle0_task );
-
-    read_unlock_irqrestore(&tasklist_lock, flags); 
-}
-#endif
 
 
 /****************************************************************************
@@ -533,3 +565,47 @@ long schedule_timeout(long timeout)
  out:
     return timeout < 0 ? 0 : timeout;
 }
+
+/****************************************************************************
+ * debug function
+ ****************************************************************************/
+
+static void dump_rqueue(struct list_head *queue, char *name)
+{
+    struct list_head *list;
+    int loop = 0;
+    struct task_struct  *p;
+
+    printk ("QUEUE %s %lx   n: %lx, p: %lx\n", name,  (unsigned long)queue,
+            (unsigned long) queue->next, (unsigned long) queue->prev);
+    list_for_each (list, queue) {
+        p = list_entry(list, struct task_struct, run_list);
+        printk("%3d: %3d has=%c mcua=0x%04X ev=0x%08X av=0x%08X c=0x%X%08X\n",
+               loop++, p->domain,
+               p->has_cpu ? 'T':'F',
+               p->mcu_advance, p->evt, p->avt,
+               (u32)(p->cpu_time>>32), (u32)p->cpu_time);
+        printk("         l: %lx n: %lx  p: %lx\n",
+               (unsigned long)list, (unsigned long)list->next,
+               (unsigned long)list->prev);
+    }
+    return; 
+}
+
+void dump_runq(u_char key, void *dev_id, struct pt_regs *regs)
+{
+    u_long   flags; 
+    s_time_t now = NOW();
+    int i;
+
+    printk("BVT: mcu=0x%08Xns ctx_allow=0x%08Xns NOW=0x%08X%08X\n",
+           (u32)MCU, (u32)ctx_allow, (u32)(now>>32), (u32)now); 
+    for (i = 0; i < smp_num_cpus; i++) {
+        spin_lock_irqsave(&schedule_data[i].lock, flags);
+        printk("CPU[%02d] svt=0x%08X ", i, (s32)schedule_data[i].svt);
+        dump_rqueue(&schedule_data[i].runqueue, "rq"); 
+        spin_unlock_irqrestore(&schedule_data[i].lock, flags);
+    }
+    return; 
+}
+
diff --git a/xen/include/xeno/sched.h b/xen/include/xeno/sched.h
index b636c36f31..dbbf6a927e 100644
--- a/xen/include/xeno/sched.h
+++ b/xen/include/xeno/sched.h
@@ -63,20 +63,20 @@ extern struct mm_struct init_mm;
 
 struct task_struct {
 
-	/*
+    /*
      * DO NOT CHANGE THE ORDER OF THE FOLLOWING.
      * There offsets are hardcoded in entry.S
      */
 
     int processor;               /* 00: current processor */
-    int state;	                 /* 04: current run state */
-	int hyp_events;              /* 08: pending events */
+    int state;                   /* 04: current run state */
+    int hyp_events;              /* 08: pending events */
     unsigned int domain;         /* 12: domain id */
 
     /* An unsafe pointer into a shared data area. */
     shared_info_t *shared_info;  /* 16: shared data area */
 
-	/*
+    /*
      * From here on things can be added and shuffled without special attention
      */
     
@@ -84,25 +84,25 @@ struct task_struct {
     unsigned int tot_pages;     /* number of pages currently possesed */
     unsigned int max_pages;     /* max number of pages that can be possesed */
 
-	/* scheduling */
-    struct list_head run_list;		/* the run list  */
-    int 			 has_cpu;
-	int 			 policy;
-	int 			 counter;
+    /* scheduling */
+    struct list_head run_list;      /* the run list  */
+    int              has_cpu;
+    int              policy;
+    int              counter;
     
-	struct ac_timer	blt;	        /* blocked timeout */
+    struct ac_timer blt;            /* blocked timeout */
 
-	s_time_t lastschd;		        /* time this domain was last scheduled */
-	s_time_t cpu_time;		        /* total CPU time received till now */
+    s_time_t lastschd;              /* time this domain was last scheduled */
+    s_time_t cpu_time;              /* total CPU time received till now */
 
-	long mcu_advance;		        /* inverse of weight */
-	u32  avt;			            /* actual virtual time */
-	u32  evt;			            /* effective virtual time */
-	long warp;			            /* virtual time warp */
-	long warpl;			            /* warp limit */
-	long warpu;			            /* unwarp time requirement */
-	long warped;		            /* time it ran warped last time */
-	long uwarped;		            /* time it ran unwarped last time */
+    unsigned long mcu_advance;      /* inverse of weight */
+    s32  avt;                       /* actual virtual time */
+    s32  evt;                       /* effective virtual time */
+    long warp;                      /* virtual time warp */
+    long warpl;                     /* warp limit */
+    long warpu;                     /* unwarp time requirement */
+    long warped;                    /* time it ran warped last time */
+    long uwarped;                   /* time it ran unwarped last time */
 
 
     /* Network I/O */
@@ -119,7 +119,7 @@ struct task_struct {
     segment_t *segment_list[XEN_MAX_SEGMENTS];                        /* vhd */
     int segment_count;
 
-	/* VM */
+    /* VM */
     struct mm_struct mm;
     /* We need this lock to check page types and frob reference counts. */
     spinlock_t page_lock;
@@ -158,7 +158,7 @@ struct task_struct {
 #define TASK_RUNNING            0
 #define TASK_INTERRUPTIBLE      1
 #define TASK_UNINTERRUPTIBLE    2
-#define TASK_WAIT				4
+#define TASK_WAIT               4
 #define TASK_DYING              16
 /* #define TASK_STOPPED            8  not really used */
 
@@ -172,8 +172,8 @@ struct task_struct {
     domain:      IDLE_DOMAIN_ID, \
     state:       TASK_RUNNING,   \
     has_cpu:     0,              \
-    evt:         0x7fffffff,     \
-    avt:         0x7fffffff,     \
+    evt:         0xffffffff,     \
+    avt:         0xffffffff,     \
     mm:          IDLE0_MM,       \
     addr_limit:  KERNEL_DS,      \
     active_mm:   &idle0_task.mm, \
@@ -186,7 +186,7 @@ struct task_struct {
 #define is_idle_task(_p) ((_p)->domain == IDLE_DOMAIN_ID)
 
 #ifndef IDLE0_TASK_SIZE
-#define IDLE0_TASK_SIZE	2048*sizeof(long)
+#define IDLE0_TASK_SIZE 2048*sizeof(long)
 #endif
 
 union task_union {
@@ -235,8 +235,9 @@ void scheduler_init(void);
 void schedulers_start(void);
 void sched_add_domain(struct task_struct *p);
 void sched_rem_domain(struct task_struct *p);
+long sched_bvtctl(unsigned long ctx_allow);
 long sched_adjdom(int dom, unsigned long mcu_adv, unsigned long warp, 
-				  unsigned long warpl, unsigned long warpu);
+                  unsigned long warpl, unsigned long warpu);
 int  wake_up(struct task_struct *p);
 long schedule_timeout(long timeout);
 long do_yield(void);
-- 
cgit v1.2.3