diff options
author | rn@wyvis.research.intel-research.net <rn@wyvis.research.intel-research.net> | 2003-03-11 10:34:08 +0000 |
---|---|---|
committer | rn@wyvis.research.intel-research.net <rn@wyvis.research.intel-research.net> | 2003-03-11 10:34:08 +0000 |
commit | ffed65943e85325584b550ff8c878d01a90dc581 (patch) | |
tree | 0de93c402b34ee44a4b9f0a1ff7f0b2fb40fd9f9 | |
parent | 1f41c8f857594c33942f4bd2b72ec417f77e7ed9 (diff) | |
parent | 45f798dbd21b76d2d11431557f1226b0182971df (diff) | |
download | xen-ffed65943e85325584b550ff8c878d01a90dc581.tar.gz xen-ffed65943e85325584b550ff8c878d01a90dc581.tar.bz2 xen-ffed65943e85325584b550ff8c878d01a90dc581.zip |
bitkeeper revision 1.123 (3e6dbba0SePQkkmiWShmVqlCiF7ekg)
manual merge
-rw-r--r-- | .rootkeys | 1 | ||||
-rw-r--r-- | BitKeeper/etc/logging_ok | 1 | ||||
-rw-r--r-- | xen/arch/i386/apic.c | 3 | ||||
-rw-r--r-- | xen/common/ac_timer.c | 52 | ||||
-rw-r--r-- | xen/common/dom0_ops.c | 20 | ||||
-rw-r--r-- | xen/common/perfc.c | 115 | ||||
-rw-r--r-- | xen/common/schedule.c | 492 | ||||
-rw-r--r-- | xen/include/xeno/dom0_ops.h | 21 | ||||
-rw-r--r-- | xen/include/xeno/perfc.h | 32 | ||||
-rw-r--r-- | xen/include/xeno/perfc_defn.h | 5 | ||||
-rw-r--r-- | xen/include/xeno/sched.h | 53 | ||||
-rw-r--r-- | xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/Makefile | 2 | ||||
-rw-r--r-- | xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_ops.h | 32 | ||||
-rw-r--r-- | xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/sched_ops.c | 108 |
14 files changed, 667 insertions, 270 deletions
@@ -487,6 +487,7 @@ 3e5a4e65BXtftInNHUC2PjDfPhdZZA xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_core.c 3e5a4e65uXAx05p6B1-HU2tijuw8qA xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_memory.c 3e5a4e65EOOLlPwXnhSuX-iVdWLmnA xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_ops.h +3e6dba59C8o0kBks7UZ4IW_FY853Aw xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/sched_ops.c 3e5a4e65gfn_ltB8ujHMVFApnTTNRQ xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/vfr.c 3e5a4e65gZBRBB6RsSVg1c9iahigAw xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/Makefile 3e5a4e65ZxKrbFetVB84JhrTyZ1YuQ xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c diff --git a/BitKeeper/etc/logging_ok b/BitKeeper/etc/logging_ok index cc166e6358..ee427973c1 100644 --- a/BitKeeper/etc/logging_ok +++ b/BitKeeper/etc/logging_ok @@ -9,5 +9,6 @@ kaf24@plym.cl.cam.ac.uk kaf24@striker.cl.cam.ac.uk lynx@idefix.cl.cam.ac.uk rn@wyvis.camb.intel-research.net +rn@wyvis.research.intel-research.net smh22@boulderdash.cl.cam.ac.uk smh22@uridium.cl.cam.ac.uk diff --git a/xen/arch/i386/apic.c b/xen/arch/i386/apic.c index 9b999df951..865a279d8c 100644 --- a/xen/arch/i386/apic.c +++ b/xen/arch/i386/apic.c @@ -48,6 +48,8 @@ #include <xeno/ac_timer.h> +#include <xeno/perfc.h> + #undef APIC_TIME_TRACE #ifdef APIC_TIME_TRACE #define TRC(_x) _x @@ -748,6 +750,7 @@ void smp_apic_timer_interrupt(struct pt_regs * regs) /* call the local handler */ irq_enter(cpu, 0); + perfc_incrc(apic_timer); smp_local_timer_interrupt(regs); irq_exit(cpu, 0); diff --git a/xen/common/ac_timer.c b/xen/common/ac_timer.c index 8f65ff7093..9bb5d7e301 100644 --- a/xen/common/ac_timer.c +++ b/xen/common/ac_timer.c @@ -90,13 +90,12 @@ int add_ac_timer(struct ac_timer *timer) s_time_t now; /* make sure timeout value is in the future */ + now = NOW(); - TRC(printk("ACT [%02d] add(): now=%lld timo=%lld\n", - cpu, now, timer->expires)); if (timer->expires <= now) { - printk("ACT[%02d] add_ac_timer: now=0x%08X%08X > expire=0x%08X%08X\n", - cpu, (u32)(now>>32), (u32)now, - (u32)(timer->expires>>32), (u32)timer->expires); + TRC(printk("ACT[%02d] add_ac_timer:now=0x%08X%08X>expire=0x%08X%08X\n", + cpu, (u32)(now>>32), (u32)now, + (u32)(timer->expires>>32), (u32)timer->expires)); return 1; } spin_lock_irqsave(&ac_timers[cpu].lock, flags); @@ -107,43 +106,29 @@ int add_ac_timer(struct ac_timer *timer) if (list_empty(&ac_timers[cpu].timers)) { /* Reprogramm and add to head of list */ if (!reprogram_ac_timer(timer->expires)) { - /* failed */ - printk("ACT [%02d] add(): add at head failed\n", cpu); spin_unlock_irqrestore(&ac_timers[cpu].lock, flags); - return 1; + return 1; /* failed */ } list_add(&timer->timer_list, &ac_timers[cpu].timers); - TRC(printk("ACT [%02d] add(0x%08X%08X): added at head\n", cpu, - (u32)(timer->expires>>32), (u32)timer->expires)); } else { struct list_head *pos; struct ac_timer *t; - for (pos = ac_timers[cpu].timers.next; - pos != &ac_timers[cpu].timers; - pos = pos->next) { - t = list_entry(pos, struct ac_timer, timer_list); - if (t->expires > timer->expires) + + list_for_each(pos, &ac_timers[cpu].timers) { + t = list_entry(pos, struct ac_timer, timer_list); + if (t->expires > timer->expires) break; - } + } + list_add (&(timer->timer_list), pos->prev); - if (pos->prev == &ac_timers[cpu].timers) { - /* added to head, reprogramm timer */ + if (timer->timer_list.prev == &ac_timers[cpu].timers) { + /* added at head */ if (!reprogram_ac_timer(timer->expires)) { - /* failed */ - TRC(printk("ACT [%02d] add(): add at head failed\n", cpu)); + detach_ac_timer(timer); spin_unlock_irqrestore(&ac_timers[cpu].lock, flags); - return 1; + return 1; /* failed */ } - list_add (&(timer->timer_list), pos->prev); - TRC(printk("ACT [%02d] add(0x%08X%08X): added at head\n", cpu, - (u32)(timer->expires>>32), (u32)timer->expires)); - } else { - list_add (&(timer->timer_list), pos->prev); - TRC(printk("ACT [%02d] add(0x%08X%08X): add < exp=0x%08X%08X\n", - cpu, - (u32)(timer->expires>>32), (u32)timer->expires, - (u32)(t->expires>>32), (u32)t->expires)); - } + } } spin_unlock_irqrestore(&ac_timers[cpu].lock, flags); return 0; @@ -173,13 +158,14 @@ static int detach_ac_timer(struct ac_timer *timer) int rem_ac_timer(struct ac_timer *timer) { int cpu = smp_processor_id(); - int res; + int res = 0; unsigned long flags; TRC(printk("ACT [%02d] remove(): timo=%lld \n", cpu, timer->expires)); spin_lock_irqsave(&ac_timers[cpu].lock, flags); - res = detach_ac_timer(timer); + if (!timer->timer_list.next == NULL) + res = detach_ac_timer(timer); spin_unlock_irqrestore(&ac_timers[cpu].lock, flags); return res; diff --git a/xen/common/dom0_ops.c b/xen/common/dom0_ops.c index e451a8f3e7..aa7768c033 100644 --- a/xen/common/dom0_ops.c +++ b/xen/common/dom0_ops.c @@ -126,6 +126,26 @@ long do_dom0_op(dom0_op_t *u_dom0_op) } break; + case DOM0_ADJUSTDOM: + { + unsigned int dom = op.u.adjustdom.domain; + unsigned long mcu_adv = op.u.adjustdom.mcu_adv; + unsigned long warp = op.u.adjustdom.warp; + unsigned long warpl = op.u.adjustdom.warpl; + unsigned long warpu = op.u.adjustdom.warpu; + + + if ( dom == IDLE_DOMAIN_ID ) + { + ret = -EPERM; + } + else + { + ret = sched_adjdom(dom, mcu_adv, warp, warpl, warpu); + } + } + break; + case DOM0_GETMEMLIST: { int i; diff --git a/xen/common/perfc.c b/xen/common/perfc.c index 55554eba70..925ac77264 100644 --- a/xen/common/perfc.c +++ b/xen/common/perfc.c @@ -2,10 +2,18 @@ * xen performance counters */ +#include <xeno/smp.h> + #include <xeno/perfc.h> #include <xeno/keyhandler.h> +/* used for different purposes in perfc.h and here */ +#undef PERFCOUNTER +#undef PERFCOUNTER_CPU +#undef PERFCOUNTER_ARRAY + #define PERFCOUNTER( var, name ) "[0]"name"\0", +#define PERFCOUNTER_CPU( var, name ) "C"name"\0", #define PERFCOUNTER_ARRAY( var, name, size ) "["#size"]"name"\0", char* perfc_name[] = { @@ -19,63 +27,78 @@ void __perfc_print (unsigned long counter[], int offset) int loop; int total_size = 0; int element_size = 0; + int cpus = 0; int num = 0; - for (loop = 0; loop < sizeof(perfc_name) / sizeof(char *); loop++) - { - num = sscanf (perfc_name[loop], "[%d]", &element_size); - total_size += element_size == 0 ? 1 : element_size; - if (total_size > offset) break; - } - if (loop == sizeof(perfc_name) / sizeof(char *)) - { - printf ("error: couldn't find variable\n"); - return; + for (loop = 0; loop < sizeof(perfc_name) / sizeof(char *); loop++) { + if (perfc_name[loop][0] == 'C') { + element_size = NR_CPUS; + cpus = 1; + } else { + num = sscanf (perfc_name[loop], "[%d]", &element_size); + } + + total_size += element_size == 0 ? 1 : element_size; + if (total_size > offset) break; } - if (element_size == 0) /* single counter */ - { - printf ("%10ld 0x%08lx %s\n", counter[0], counter[0], - perfc_name[loop] + 2 + num); + if (loop == sizeof(perfc_name) / sizeof(char *)) { + printf ("error: couldn't find variable\n"); + return; } - else /* show entire array */ - { - for (loop = 0; loop < element_size; loop++) - { - printf ("%10ld 0x%08lx %s:%d\n", - counter[loop], counter[loop], - perfc_name[loop] + 2 + num, loop); - } + if (element_size == 0) { /* single counter */ + printf ("%10ld 0x%08lx %s\n", counter[0], counter[0], + perfc_name[loop] + 2 + num); + } else if (cpus) { /* counter per CPU */ + for (loop = 0; loop < smp_num_cpus; loop++) { + printf ("%10ld 0x%08lx cpu[%02d] %s\n", + counter[loop], counter[loop], + loop, perfc_name[loop]); + } + + } else { /* show entire array */ + for (loop = 0; loop < element_size; loop++) { + printf ("%10ld 0x%08lx %s:%d\n", + counter[loop], counter[loop], + perfc_name[loop] + 2 + num, loop); + } } return; } void perfc_printall (u_char key, void *dev_id, struct pt_regs *regs) { - int loop, idx; - int element_size; - int num; - unsigned long *counters = (unsigned long *)&perfcounters; + int loop, idx; + int element_size; + int cpus=0; + int num = 0; + unsigned long *counters = (unsigned long *)&perfcounters; - printf ("xen performance counters\n"); - for (loop = 0; loop < sizeof(perfc_name) / sizeof(char *); loop++) - { - num = sscanf (perfc_name[loop], "[%d]", &element_size); + printf ("xen performance counters\n"); + + for (loop = 0; loop < sizeof(perfc_name) / sizeof(char *); loop++) { + + if (perfc_name[loop][0] == 'C') { + element_size = NR_CPUS; + cpus = 1; + } else { + num = sscanf (perfc_name[loop], "[%d]", &element_size); + } - for (idx = 0; idx < (element_size ? element_size : 1); idx++) - { - if (element_size) - { - printf ("%10ld 0x%08lx %s:%d\n", - *counters, *counters, perfc_name[loop] + num + 2, idx); - } - else - { - printf ("%10ld 0x%08lx %s\n", - *counters, *counters, perfc_name[loop] + num + 2); - } - counters++; - } - } + for (idx = 0; idx < (element_size ? element_size : 1); idx++) { + if (cpus) { + if (idx < smp_num_cpus) + printf ("%10ld 0x%08lx cpu[%02d] %s\n", + *counters, *counters, idx, perfc_name[loop] + 1); + } else if (element_size) { + printf ("%10ld 0x%08lx %s:%d\n", + *counters, *counters, perfc_name[loop] + num + 2, idx); + } else { + printf ("%10ld 0x%08lx %s\n", + *counters, *counters, perfc_name[loop] + num + 2); + } + counters++; + } + } - return; + return; } diff --git a/xen/common/schedule.c b/xen/common/schedule.c index 787b43d900..2f4ba31c32 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -11,7 +11,8 @@ * * Environment: Xen Hypervisor * Description: CPU scheduling - * partially moved from domain.c + * implements A Borrowed Virtual Time scheduler. + * (see Duda & Cheriton SOSP'99) * **************************************************************************** * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ @@ -28,6 +29,9 @@ #include <xeno/ac_timer.h> #include <xeno/interrupt.h> +#include <xeno/perfc.h> + + #undef SCHEDULER_TRACE #ifdef SCHEDULER_TRACE #define TRC(_x) _x @@ -35,72 +39,80 @@ #define TRC(_x) #endif -/* + +#define MCU (s32)MICROSECS(100) /* Minimum unit */ +#define CTX_ALLOW (s32)MILLISECS(10) /* context switch allowance */ + +/***************************************************************************** * per CPU data for the scheduler. - */ + *****************************************************************************/ typedef struct schedule_data_st { - spinlock_t lock; - struct list_head runqueue; - struct task_struct *prev, *curr; + spinlock_t lock; /* lock for protecting this */ + struct list_head runqueue; /* runqueue */ + struct task_struct *prev, *curr; /* dito */ + + long svt; /* system virtual time. per CPU??? */ + struct ac_timer s_timer; /* scheduling timer */ + } __cacheline_aligned schedule_data_t; schedule_data_t schedule_data[NR_CPUS]; -static __cacheline_aligned struct ac_timer s_timer[NR_CPUS]; +struct ac_timer v_timer; /* scheduling timer */ +static void virt_timer(unsigned long foo); -/* - * Some convenience functions - */ -static inline void __add_to_runqueue(struct task_struct * p) +/***************************************************************************** + * Some convenience functions + *****************************************************************************/ +/* add a task to the head of the runqueue */ +static inline void __add_to_runqueue_head(struct task_struct * p) { + list_add(&p->run_list, &schedule_data[p->processor].runqueue); } - -static inline void __move_last_runqueue(struct task_struct * p) +/* add a task to the tail of the runqueue */ +static inline void __add_to_runqueue_tail(struct task_struct * p) { - list_del(&p->run_list); list_add_tail(&p->run_list, &schedule_data[p->processor].runqueue); } -static inline void __move_first_runqueue(struct task_struct * p) -{ - list_del(&p->run_list); - list_add(&p->run_list, &schedule_data[p->processor].runqueue); -} - +/* remove a task from runqueue */ static inline void __del_from_runqueue(struct task_struct * p) { list_del(&p->run_list); p->run_list.next = NULL; } - +/* is task on run queue? */ static inline int __task_on_runqueue(struct task_struct *p) { return (p->run_list.next != NULL); } +#define next_domain(p) \\ + list_entry((p)->run_list.next, struct task_struct, run_list) -/* - * Add a new domain to the scheduler - */ +/****************************************************************************** +* Add and remove a domain +******************************************************************************/ void sched_add_domain(struct task_struct *p) { - p->state = TASK_UNINTERRUPTIBLE; + p->state = TASK_UNINTERRUPTIBLE; + /* set avt end evt to system virtual time */ + p->avt = schedule_data[p->processor].svt; + p->evt = schedule_data[p->processor].svt; + /* RN: XXX BVT fill in other bits */ } -/* - * Remove domain to the scheduler - */ void sched_rem_domain(struct task_struct *p) { p->state = TASK_DYING; } -/* +/**************************************************************************** * wake up a domain which had been sleeping - */ + ****************************************************************************/ int wake_up(struct task_struct *p) { unsigned long flags; @@ -108,7 +120,13 @@ int wake_up(struct task_struct *p) spin_lock_irqsave(&schedule_data[p->processor].lock, flags); if ( __task_on_runqueue(p) ) goto out; p->state = TASK_RUNNING; - __add_to_runqueue(p); + + /* set the BVT parameters */ + if (p->avt < schedule_data[p->processor].svt) + p->avt = schedule_data[p->processor].svt; + p->evt = p->avt; /* RN: XXX BVT deal with warping here */ + + __add_to_runqueue_head(p); ret = 1; out: @@ -116,67 +134,10 @@ int wake_up(struct task_struct *p) return ret; } -static void process_timeout(unsigned long __data) -{ - struct task_struct * p = (struct task_struct *) __data; - wake_up(p); -} - -long schedule_timeout(long timeout) -{ - struct timer_list timer; - unsigned long expire; - - switch (timeout) - { - case MAX_SCHEDULE_TIMEOUT: - /* - * These two special cases are useful to be comfortable in the caller. - * Nothing more. We could take MAX_SCHEDULE_TIMEOUT from one of the - * negative value but I' d like to return a valid offset (>=0) to allow - * the caller to do everything it want with the retval. - */ - schedule(); - goto out; - default: - /* - * Another bit of PARANOID. Note that the retval will be 0 since no - * piece of kernel is supposed to do a check for a negative retval of - * schedule_timeout() (since it should never happens anyway). You just - * have the printk() that will tell you if something is gone wrong and - * where. - */ - if (timeout < 0) - { - printk(KERN_ERR "schedule_timeout: wrong timeout " - "value %lx from %p\n", timeout, - __builtin_return_address(0)); - current->state = TASK_RUNNING; - goto out; - } - } - - expire = timeout + jiffies; - - init_timer(&timer); - timer.expires = expire; - timer.data = (unsigned long) current; - timer.function = process_timeout; - - add_timer(&timer); - schedule(); - del_timer_sync(&timer); - - timeout = expire - jiffies; - - out: - return timeout < 0 ? 0 : timeout; -} - /* RN: XXX turn this into do_halt() */ -/* - * yield the current process - */ +/**************************************************************************** + * Domain requested scheduling operations + ****************************************************************************/ long do_sched_op(void) { current->state = TASK_INTERRUPTIBLE; @@ -184,7 +145,20 @@ long do_sched_op(void) return 0; } +/**************************************************************************** + * Adjust scheduling parameter for a given domain + ****************************************************************************/ +long sched_adjdom(int dom, unsigned long mcu_adv, unsigned long warp, + unsigned long warpl, unsigned long warpu) +{ + printk("sched: adjdom %02d %lu %lu %lu %lu\n", + dom, mcu_adv, warp, warpl, warpu); + return 0; +} +/**************************************************************************** + * cause a run through the scheduler when appropriate + ****************************************************************************/ void reschedule(struct task_struct *p) { int cpu = p->processor; @@ -209,47 +183,135 @@ void reschedule(struct task_struct *p) } -/* - * Pick the next domain to run - */ - +/**************************************************************************** + * The main function + * - deschedule the current domain. + * - pick a new domain. + * i.e., the domain with lowest EVT. + * The runqueue should be ordered by EVT so that is easy. + ****************************************************************************/ asmlinkage void schedule(void) { - struct task_struct *prev, *next, *p; - struct list_head *tmp; - int this_cpu; - + struct task_struct *prev, *next, *next_prime, *p; + struct list_head *tmp; + int this_cpu; + s_time_t now; + s32 r_time; /* time for new dom to run */ + s32 ranfor; /* assume we never run longer than 2.1s! */ + s32 mcus; + u32 next_evt, next_prime_evt; + + perfc_incrc(sched_run1); need_resched_back: + perfc_incrc(sched_run2); + + now = NOW(); + + /* remove timer */ + rem_ac_timer(&schedule_data[smp_processor_id()].s_timer); + + next = NULL; prev = current; this_cpu = prev->processor; + /* + * deschedule the current domain + */ + spin_lock_irq(&schedule_data[this_cpu].lock); ASSERT(!in_interrupt()); ASSERT(__task_on_runqueue(prev)); - __move_last_runqueue(prev); - - switch ( prev->state ) - { - case TASK_INTERRUPTIBLE: - if ( signal_pending(prev) ) - { - prev->state = TASK_RUNNING; - break; - } - default: - __del_from_runqueue(prev); - case TASK_RUNNING:; - } + if (is_idle_task(prev)) + goto deschedule_done; + + /* do some accounting */ + ranfor = (s32)(now - prev->lastschd); + ASSERT((ranfor>0)); + prev->cpu_time += ranfor; + + /* calculate mcu and update avt */ + mcus = ranfor/MCU; + if (ranfor % MCU) mcus ++; /* always round up */ + prev->avt += mcus * prev->mcu_advance; + prev->evt = prev->avt; /* RN: XXX BVT deal with warping here */ + + /* dequeue */ + __del_from_runqueue(prev); + switch (prev->state) { + case TASK_INTERRUPTIBLE: + if (signal_pending(prev)) { + prev->state = TASK_RUNNING; /* but has events pending */ + break; + } + case TASK_UNINTERRUPTIBLE: + case TASK_WAIT: + case TASK_DYING: + default: + /* done if not running. Else, continue */ + goto deschedule_done; + case TASK_RUNNING:; + } + + /* requeue */ + __add_to_runqueue_tail(prev); + + + deschedule_done: clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events); - next = NULL; - list_for_each(tmp, &schedule_data[smp_processor_id()].runqueue) { - p = list_entry(tmp, struct task_struct, run_list); - next = p; - if ( !is_idle_task(next) ) break; - } + /* + * Pick a new domain + */ + + /* we should at least have the idle task */ + ASSERT(!list_empty(&schedule_data[smp_processor_id()].runqueue)); + + /* + * scan through the run queue and pick the task with the lowest evt + * *and* the task the second lowest evt. + * this code is O(n) but we expect n to be small. + */ + next = NULL; + next_prime = NULL; + + next_evt = 0xffffffff; + next_prime_evt = 0xffffffff; + + list_for_each(tmp, &schedule_data[smp_processor_id()].runqueue) { + p = list_entry(tmp, struct task_struct, run_list); + if (p->evt < next_evt) { + next_prime = next; + next_prime_evt = next_evt; + next = p; + next_evt = p->evt; + } + } + ASSERT(next != NULL); /* we should have at least the idle task */ + + if (next == NULL || is_idle_task(next)) { + next = &idle0_task; /* to be sure */ + r_time = CTX_ALLOW; + goto sched_done; + } + + if (next_prime == NULL || is_idle_task(next_prime)) { + /* we have only one runable task besides the idle task */ + r_time = CTX_ALLOW; /* RN: XXX should be much larger */ + goto sched_done; + } + + /* + * if we are here we have two runable tasks. + * work out how long 'next' can run till its evt is greater than + * 'next_prime's evt. Taking context switch allowance into account. + */ + r_time = ((next_prime->evt - next->evt)/next->mcu_advance) + CTX_ALLOW; + + sched_done: + ASSERT(r_time != 0); + ASSERT(r_time > 0); prev->has_cpu = 0; next->has_cpu = 1; @@ -257,6 +319,17 @@ asmlinkage void schedule(void) schedule_data[this_cpu].prev = prev; schedule_data[this_cpu].curr = next; + next->lastschd = now; + + /* reprogramm the timer */ + timer_redo: + schedule_data[this_cpu].s_timer.expires = now + r_time; + if (add_ac_timer(&schedule_data[this_cpu].s_timer) == 1) { + printk("SCHED: Shit this shouldn't happen\n"); + now = NOW(); + goto timer_redo; + } + spin_unlock_irq(&schedule_data[this_cpu].lock); if ( unlikely(prev == next) ) @@ -274,67 +347,56 @@ asmlinkage void schedule(void) if ( prev->state == TASK_DYING ) release_task(prev); same_process: + /* update the domains notion of time */ update_dom_time(current->shared_info); - if ( test_bit(_HYP_EVENT_NEED_RESCHED, ¤t->hyp_events) ) + if ( test_bit(_HYP_EVENT_NEED_RESCHED, ¤t->hyp_events) ) { goto need_resched_back; + } return; } /* - * The scheduling timer. + * The scheduler timer. */ -static __cacheline_aligned int count[NR_CPUS]; static void sched_timer(unsigned long foo) { int cpu = smp_processor_id(); struct task_struct *curr = schedule_data[cpu].curr; - s_time_t now; - int res; - - /* reschedule after each 5 ticks */ - if (count[cpu] >= 5) { - set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events); - count[cpu] = 0; - } - count[cpu]++; - - /* - * deliver virtual timer interrups to domains if we are CPU 0 XXX RN: We - * don't have a per CPU list of domains yet. Otherwise would use that. - * Plus, this should be removed anyway once Domains "know" about virtual - * time and timeouts. But, it's better here then where it was before. - */ - if (cpu == 0) { - struct task_struct *p; - unsigned long cpu_mask = 0; - - /* send virtual timer interrupt */ - read_lock(&tasklist_lock); - p = &idle0_task; - do { - if ( is_idle_task(p) ) continue; - cpu_mask |= mark_guest_event(p, _EVENT_TIMER); - } - while ( (p = p->next_task) != &idle0_task ); - read_unlock(&tasklist_lock); - guest_event_notify(cpu_mask); - } + /* cause a reschedule */ + set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events); + perfc_incrc(sched_irq); +} - again: +/* + * The Domain virtual time timer + */ +static void virt_timer(unsigned long foo) +{ + unsigned long cpu_mask = 0; + struct task_struct *p; + s_time_t now; + int res; + + /* send virtual timer interrupt */ + read_lock(&tasklist_lock); + p = &idle0_task; + do { + if ( is_idle_task(p) ) continue; + cpu_mask |= mark_guest_event(p, _EVENT_TIMER); + } + while ( (p = p->next_task) != &idle0_task ); + read_unlock(&tasklist_lock); + guest_event_notify(cpu_mask); + + again: now = NOW(); - s_timer[cpu].expires = now + MILLISECS(10); - res=add_ac_timer(&s_timer[cpu]); - - TRC(printk("SCHED[%02d] timer(): now=0x%08X%08X timo=0x%08X%08X\n", - cpu, (u32)(now>>32), (u32)now, - (u32)(s_timer[cpu].expires>>32), (u32)s_timer[cpu].expires)); + v_timer.expires = now + MILLISECS(10); + res=add_ac_timer(&v_timer); if (res==1) goto again; - } - /* * Initialise the data structures */ @@ -352,9 +414,12 @@ void __init scheduler_init(void) schedule_data[i].curr = &idle0_task; /* a timer for each CPU */ - init_ac_timer(&s_timer[i]); - s_timer[i].function = &sched_timer; + init_ac_timer(&schedule_data[i].s_timer); + schedule_data[i].s_timer.function = &sched_timer; + } + init_ac_timer(&v_timer); + v_timer.function = &virt_timer; } /* @@ -366,6 +431,105 @@ void schedulers_start(void) printk("Start schedulers\n"); __cli(); sched_timer(0); + virt_timer(0); smp_call_function((void *)sched_timer, NULL, 1, 1); __sti(); + + //add_key_handler('r', dump_run_queues, "dump run queues") +} +#if 0 +/**************************************************************************** + * Debugging functions + ****************************************************************************/ +static void dump_run_queues(u_char key, void *dev_id, struct pt_regs *regs) +{ + u_long flags; + struct task_struct *p; + shared_info_t *s; + + printk("'%c' pressed -> dumping run queues\n", key); + read_lock_irqsave(&tasklist_lock, flags); + p = &idle0_task; + do { + printk("Xen: DOM %d, CPU %d [has=%c], state = %s, " + "hyp_events = %08x\n", + p->domain, p->processor, p->has_cpu ? 'T':'F', + task_states[p->state], p->hyp_events); + s = p->shared_info; + if(!is_idle_task(p)) { + printk("Guest: events = %08lx, event_enable = %08lx\n", + s->events, s->events_enable); + printk("Notifying guest...\n"); + set_bit(_EVENT_DEBUG, &s->events); + } + } while ( (p = p->next_task) != &idle0_task ); + + read_unlock_irqrestore(&tasklist_lock, flags); +} +#endif + + +/**************************************************************************** + * Functions for legacy support. + * Schedule timeout is used at a number of places and is a bit meaningless + * in the context of Xen, as Domains are not able to call these and all + * there entry points into Xen should be asynchronous. If a domain wishes + * to block for a while it should use Xen's sched_op entry point. + ****************************************************************************/ + +static void process_timeout(unsigned long __data) +{ + struct task_struct * p = (struct task_struct *) __data; + wake_up(p); +} + +long schedule_timeout(long timeout) +{ + struct timer_list timer; + unsigned long expire; + + switch (timeout) + { + case MAX_SCHEDULE_TIMEOUT: + /* + * These two special cases are useful to be comfortable in the caller. + * Nothing more. We could take MAX_SCHEDULE_TIMEOUT from one of the + * negative value but I' d like to return a valid offset (>=0) to allow + * the caller to do everything it want with the retval. + */ + schedule(); + goto out; + default: + /* + * Another bit of PARANOID. Note that the retval will be 0 since no + * piece of kernel is supposed to do a check for a negative retval of + * schedule_timeout() (since it should never happens anyway). You just + * have the printk() that will tell you if something is gone wrong and + * where. + */ + if (timeout < 0) + { + printk(KERN_ERR "schedule_timeout: wrong timeout " + "value %lx from %p\n", timeout, + __builtin_return_address(0)); + current->state = TASK_RUNNING; + goto out; + } + } + + expire = timeout + jiffies; + + init_timer(&timer); + timer.expires = expire; + timer.data = (unsigned long) current; + timer.function = process_timeout; + + add_timer(&timer); + schedule(); + del_timer_sync(&timer); + + timeout = expire - jiffies; + + out: + return timeout < 0 ? 0 : timeout; } diff --git a/xen/include/xeno/dom0_ops.h b/xen/include/xeno/dom0_ops.h index 5e498de1bc..c0159d12cc 100644 --- a/xen/include/xeno/dom0_ops.h +++ b/xen/include/xeno/dom0_ops.h @@ -4,8 +4,11 @@ * Process command requests from domain-0 guest OS. * * Copyright (c) 2002, K A Fraser, B Dragovic + * + * MUST BE KEPT IN SYNC WITH xenolinux<*>/arch/xeno/drivers/dom0/dom0_ops.h */ + #ifndef __DOM0_OPS_H__ #define __DOM0_OPS_H__ @@ -13,6 +16,8 @@ #define DOM0_KILLDOMAIN 1 #define DOM0_GETMEMLIST 2 #define DOM0_STARTDOM 4 +#define DOM0_BVTCTL 6 +#define DOM0_ADJUSTDOM 7 #define MAX_CMD_LEN 256 @@ -48,6 +53,20 @@ typedef struct domain_launch char cmd_line[MAX_CMD_LEN]; } dom_meminfo_t; +typedef struct dom0_bvtctl_st +{ + unsigned long ctx_allow; /* context switch allowance */ +} dom0_bvtctl_t; + +typedef struct dom0_adjustdom_st +{ + unsigned int domain; /* domain id */ + unsigned long mcu_adv; /* mcu advance: inverse of weight */ + unsigned long warp; /* time warp */ + unsigned long warpl; /* warp limit */ + unsigned long warpu; /* unwarp time requirement */ +} dom0_adjustdom_t; + typedef struct dom0_op_st { unsigned long cmd; @@ -56,6 +75,8 @@ typedef struct dom0_op_st dom0_newdomain_t newdomain; dom0_killdomain_t killdomain; dom0_getmemlist_t getmemlist; + dom0_bvtctl_t bvtctl; + dom0_adjustdom_t adjustdom; dom_meminfo_t meminfo; } u; diff --git a/xen/include/xeno/perfc.h b/xen/include/xeno/perfc.h index 31201eaa6d..330bb8eba9 100644 --- a/xen/include/xeno/perfc.h +++ b/xen/include/xeno/perfc.h @@ -3,22 +3,27 @@ */ /* - * NOTE: new counters must be defined in xen_perf_defn.h + * NOTE: new counters must be defined in perfc_defn.h * * PERFCOUNTER (counter, string) define a new performance counter + * PERFCOUNTER_CPU (counter, string, size) define a counter per CPU * PERFCOUNTER_ARRY (counter, string, size) define an array of counters * * unsigned long perfc_value (counter) get value of a counter * unsigned long perfc_valuea (counter, index) get value of an array counter - * void perfc_incr (counter) increment a counter - * void perfc_incra (counter, index) increment an array counter - * void perfc_add (counter, value) add a value to a counter - * void perfc_adda (counter, index, value) add a value to array counter - * void perfc_print (counter) print out the counter + * void perfc_incr (counter) increment a counter + * void perfc_incrc (counter, index) increment a per CPU counter + * void perfc_incra (counter, index) increment an array counter + * void perfc_add (counter, value) add a value to a counter + * void perfc_addc (counter, value) add a value to a per CPU counter + * void perfc_adda (counter, index, value) add a value to array counter + * void perfc_print (counter) print out the counter */ #define PERFCOUNTER( var, name ) \ unsigned long var[1]; +#define PERFCOUNTER_CPU( var, name ) \ +unsigned long var[NR_CPUS]; #define PERFCOUNTER_ARRAY( var, name, size ) \ unsigned long var[size]; @@ -30,12 +35,15 @@ struct perfcounter_t extern struct perfcounter_t perfcounters; extern char *perfc_name[]; -#define perf_value(x) perfcounters.x[0] -#define perf_valuea(x,y) perfcounters.x[y] -#define perf_incr(x) perfcounters.x[0]++ -#define perf_incra(x,y) perfcounters.x[y]++ -#define perf_add(x,y) perfcounters.x[0]+=(y) -#define perf_adda(x,y,z) perfcounters.x[y]+=(z) +#define perfc_value(x) perfcounters.x[0] +#define perfc_valuec(x) perfcounters.x[smp_processor_id()] +#define perfc_valuea(x,y) perfcounters.x[y] +#define perfc_incr(x) perfcounters.x[0]++ +#define perfc_incrc(x) perfcounters.x[smp_processor_id()]++ +#define perfc_incra(x,y) perfcounters.x[y]++ +#define perfc_add(x,y) perfcounters.x[0]+=(y) +#define perfc_addc(x,y) perfcounters.x[smp_processor_id()]+=(y) +#define perfc_adda(x,y,z) perfcounters.x[y]+=(z) #define perf_print(x) \ __perfc_print(perfcounters.x, \ diff --git a/xen/include/xeno/perfc_defn.h b/xen/include/xeno/perfc_defn.h index 16ab4cd8c9..fde3e1dd72 100644 --- a/xen/include/xeno/perfc_defn.h +++ b/xen/include/xeno/perfc_defn.h @@ -2,3 +2,8 @@ PERFCOUNTER( blockio_tx, "block io: messages received from tx queue" ) PERFCOUNTER( blockio_rx, "block io: messages sent on rx queue" ) +PERFCOUNTER_CPU( apic_timer, "apic timer interrupts" ) +PERFCOUNTER_CPU( sched_irq, "sched: timer" ) +PERFCOUNTER_CPU( sched_run1, "sched: calls to schedule" ) +PERFCOUNTER_CPU( sched_run2, "sched: runs through scheduler" ) + diff --git a/xen/include/xeno/sched.h b/xen/include/xeno/sched.h index 6d1842a2ea..b636c36f31 100644 --- a/xen/include/xeno/sched.h +++ b/xen/include/xeno/sched.h @@ -12,6 +12,10 @@ #include <hypervisor-ifs/hypervisor-if.h> #include <xeno/dom0_ops.h> +#include <xeno/list.h> +#include <xeno/time.h> +#include <xeno/ac_timer.h> + extern unsigned long volatile jiffies; extern rwlock_t tasklist_lock; @@ -59,18 +63,48 @@ extern struct mm_struct init_mm; struct task_struct { - int processor; - int state; - int hyp_events; - unsigned int domain; + /* + * DO NOT CHANGE THE ORDER OF THE FOLLOWING. + * There offsets are hardcoded in entry.S + */ + + int processor; /* 00: current processor */ + int state; /* 04: current run state */ + int hyp_events; /* 08: pending events */ + unsigned int domain; /* 12: domain id */ /* An unsafe pointer into a shared data area. */ - shared_info_t *shared_info; + shared_info_t *shared_info; /* 16: shared data area */ + + /* + * From here on things can be added and shuffled without special attention + */ struct list_head pg_head; unsigned int tot_pages; /* number of pages currently possesed */ unsigned int max_pages; /* max number of pages that can be possesed */ + /* scheduling */ + struct list_head run_list; /* the run list */ + int has_cpu; + int policy; + int counter; + + struct ac_timer blt; /* blocked timeout */ + + s_time_t lastschd; /* time this domain was last scheduled */ + s_time_t cpu_time; /* total CPU time received till now */ + + long mcu_advance; /* inverse of weight */ + u32 avt; /* actual virtual time */ + u32 evt; /* effective virtual time */ + long warp; /* virtual time warp */ + long warpl; /* warp limit */ + long warpu; /* unwarp time requirement */ + long warped; /* time it ran warped last time */ + long uwarped; /* time it ran unwarped last time */ + + /* Network I/O */ net_ring_t *net_ring_base; net_vif_t *net_vif_list[MAX_GUEST_VIFS]; @@ -85,10 +119,7 @@ struct task_struct { segment_t *segment_list[XEN_MAX_SEGMENTS]; /* vhd */ int segment_count; - int has_cpu, policy, counter; - - struct list_head run_list; - + /* VM */ struct mm_struct mm; /* We need this lock to check page types and frob reference counts. */ spinlock_t page_lock; @@ -141,6 +172,8 @@ struct task_struct { domain: IDLE_DOMAIN_ID, \ state: TASK_RUNNING, \ has_cpu: 0, \ + evt: 0x7fffffff, \ + avt: 0x7fffffff, \ mm: IDLE0_MM, \ addr_limit: KERNEL_DS, \ active_mm: &idle0_task.mm, \ @@ -202,6 +235,8 @@ void scheduler_init(void); void schedulers_start(void); void sched_add_domain(struct task_struct *p); void sched_rem_domain(struct task_struct *p); +long sched_adjdom(int dom, unsigned long mcu_adv, unsigned long warp, + unsigned long warpl, unsigned long warpu); int wake_up(struct task_struct *p); long schedule_timeout(long timeout); long do_yield(void); diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/Makefile b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/Makefile index 4738fc0ba4..eeb3413842 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/Makefile +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/Makefile @@ -1,3 +1,3 @@ O_TARGET := dom0.o -obj-y := dom0_memory.o dom0_core.o vfr.o +obj-y := dom0_memory.o dom0_core.o vfr.o sched_ops.o include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_ops.h b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_ops.h index 6c60a93ff6..a482d3c4d9 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_ops.h +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_ops.h @@ -4,15 +4,19 @@ * Process command requests from domain-0 guest OS. * * Copyright (c) 2002, K A Fraser, B Dragovic + * + * MUST BE KEPT IN SYNC WITH xen/include/xeno/dom0_ops.h */ #define DOM0_NEWDOMAIN 0 #define DOM0_KILLDOMAIN 1 #define DOM0_GETMEMLIST 2 #define DOM0_STARTDOM 4 -#define MAP_DOM_MEM 6 /* Not passed down to Xen */ -#define DO_PGUPDATES 7 /* Not passed down to Xen */ -#define MAX_CMD 8 +#define DOM0_BVTCTL 6 +#define DOM0_ADJUSTDOM 7 +#define MAP_DOM_MEM 8 /* Not passed down to Xen */ +#define DO_PGUPDATES 9 /* Not passed down to Xen */ +#define MAX_CMD 10 #define MAX_CMD_LEN 256 @@ -20,8 +24,8 @@ typedef struct dom0_newdomain_st { unsigned int domain; unsigned int memory_kb; - unsigned int num_vifs; // temporary - unsigned long pg_head; // return parameter + unsigned int num_vifs; /* temporary */ + unsigned long pg_head; /* return parameter */ } dom0_newdomain_t; typedef struct dom0_killdomain_st @@ -37,6 +41,20 @@ typedef struct dom0_getmemlist_st void *buffer; } dom0_getmemlist_t; +typedef struct dom0_bvtctl_st +{ + unsigned long ctx_allow; /* context switch allowance */ +} dom0_bvtctl_t; + +typedef struct dom0_adjustdom_st +{ + unsigned int domain; /* domain id */ + unsigned long mcu_adv; /* mcu advance: inverse of weight */ + unsigned long warp; /* time warp */ + unsigned long warpl; /* warp limit */ + unsigned long warpu; /* unwarp time requirement */ +} dom0_adjustdom_t; + /* This is entirely processed by XenoLinux */ typedef struct dom_mem { @@ -64,6 +82,8 @@ typedef struct domain_launch char cmd_line[MAX_CMD_LEN]; } dom_meminfo_t; + + typedef struct dom0_op_st { unsigned long cmd; @@ -72,6 +92,8 @@ typedef struct dom0_op_st dom0_newdomain_t newdomain; dom0_killdomain_t killdomain; dom0_getmemlist_t getmemlist; + dom0_bvtctl_t bvtctl; + dom0_adjustdom_t adjustdom; dom_mem_t dommem; dom_pgupdate_t pgupdate; dom_meminfo_t meminfo; diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/sched_ops.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/sched_ops.c new file mode 100644 index 0000000000..2408f83880 --- /dev/null +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/sched_ops.c @@ -0,0 +1,108 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: sched_ops.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Mar 2003 + * + * Environment: XenoLinux + * Description: Dom0 Control interface to scheduler in Xen + * + * code based on Andy's vfr parsing code + * + * Commands understood by the interface: + * + * S <did> <mcu advance> [ <warp> <warp limit> <unwarp limit> ] + * C <context swith allowance> + * + **************************************************************************** + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ + **************************************************************************** + */ + + +#include <linux/proc_fs.h> +#include <asm/hypervisor.h> +#include "dom0_ops.h" + +#define SCHED_ENTRY "sched" +extern struct proc_dir_entry *xeno_base; +static struct proc_dir_entry *sched_pde; + + +static int sched_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + strcpy(page, readbuf); + *readbuf = '\0'; + *eof = 1; + *start = page; + return strlen(page); +} + + +static int sched_write_proc(struct file *file, const char *buffer, + u_long count, void *data) +{ + dom0_op_t op; + + int ret, len; + int ts, te, tl; /* token start, end, and length */ + + /* Only admin can adjust scheduling parameters */ + if ( !capable(CAP_SYS_ADMIN) ) + return -EPERM; + + /* parse the commands */ + len = count; + ts = te = 0; + + while ( count && isspace(buffer[ts]) ) { ts++; count--; } // skip spaces. + te = ts; + if ( te <= ts ) goto bad; + tl = te - ts; + + if ( strncmp(&buffer[ts], "S", tl) == 0 ) + { + op.cmd = NETWORK_OP_ADDRULE; + } + else if ( strncmp(&buffer[ts], "C", tl) == 0 ) + { + op.cmd = NETWORK_OP_DELETERULE; + } + + +} + + +/* + * main scheduler interface driver driver initialization function. + */ +static int __init init_module(void) +{ + printk(KERN_ALERT "Starting Domain Scheduler Control Interface\n"); + + sched_pde = create_proc_entry(SCHED_ENTRY, 0600, xeno_base); + if ( sched_pde == NULL ) + { + printk(KERN_ALERT "Unable to create dom scheduler proc entry!"); + return -1; + } + + sched_pde->read_proc = sched_read_proc; + sched_pde->write_proc = sched_write_proc; + + return 0; +} + +static void __exit cleanup_module(void) +{ +} + +module_init(init_module); +module_exit(cleanup_module); + |