diff options
Diffstat (limited to 'xen/common/schedule.c')
-rw-r--r-- | xen/common/schedule.c | 438 |
1 files changed, 257 insertions, 181 deletions
diff --git a/xen/common/schedule.c b/xen/common/schedule.c index 2f4ba31c32..ce46069167 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -40,8 +40,8 @@ #endif -#define MCU (s32)MICROSECS(100) /* Minimum unit */ -#define CTX_ALLOW (s32)MILLISECS(10) /* context switch allowance */ +#define MCU (s32)MICROSECS(100) /* Minimum unit */ +static s32 ctx_allow=(s32)MILLISECS(10); /* context switch allowance */ /***************************************************************************** * per CPU data for the scheduler. @@ -50,15 +50,15 @@ typedef struct schedule_data_st { spinlock_t lock; /* lock for protecting this */ struct list_head runqueue; /* runqueue */ - struct task_struct *prev, *curr; /* dito */ - - long svt; /* system virtual time. per CPU??? */ - struct ac_timer s_timer; /* scheduling timer */ + struct task_struct *prev, *curr; /* previous and current task */ + struct task_struct *idle; /* idle task for this cpu */ + u32 svt; /* system virtual time. per CPU??? */ + struct ac_timer s_timer; /* scheduling timer */ } __cacheline_aligned schedule_data_t; schedule_data_t schedule_data[NR_CPUS]; -struct ac_timer v_timer; /* scheduling timer */ +struct ac_timer v_timer; /* scheduling timer */ static void virt_timer(unsigned long foo); @@ -68,7 +68,7 @@ static void virt_timer(unsigned long foo); /* add a task to the head of the runqueue */ static inline void __add_to_runqueue_head(struct task_struct * p) { - + list_add(&p->run_list, &schedule_data[p->processor].runqueue); } /* add a task to the tail of the runqueue */ @@ -97,11 +97,19 @@ static inline int __task_on_runqueue(struct task_struct *p) ******************************************************************************/ void sched_add_domain(struct task_struct *p) { - p->state = TASK_UNINTERRUPTIBLE; - /* set avt end evt to system virtual time */ - p->avt = schedule_data[p->processor].svt; - p->evt = schedule_data[p->processor].svt; - /* RN: XXX BVT fill in other bits */ + p->state = TASK_UNINTERRUPTIBLE; + p->mcu_advance = 10; + + if (p->domain == IDLE_DOMAIN_ID) { + p->avt = 0xffffffff; + p->evt = 0xffffffff; + schedule_data[p->processor].idle = p; + } else { + /* set avt end evt to system virtual time */ + p->avt = schedule_data[p->processor].svt; + p->evt = schedule_data[p->processor].svt; + /* RN: XXX BVT fill in other bits */ + } } void sched_rem_domain(struct task_struct *p) @@ -117,16 +125,20 @@ int wake_up(struct task_struct *p) { unsigned long flags; int ret = 0; + spin_lock_irqsave(&schedule_data[p->processor].lock, flags); + if ( __task_on_runqueue(p) ) goto out; - p->state = TASK_RUNNING; - /* set the BVT parameters */ - if (p->avt < schedule_data[p->processor].svt) - p->avt = schedule_data[p->processor].svt; - p->evt = p->avt; /* RN: XXX BVT deal with warping here */ - + p->state = TASK_RUNNING; __add_to_runqueue_head(p); + + /* set the BVT parameters */ + if (p->avt < schedule_data[p->processor].svt) + p->avt = schedule_data[p->processor].svt; + + p->evt = p->avt; /* RN: XXX BVT deal with warping here */ + ret = 1; out: @@ -134,30 +146,56 @@ int wake_up(struct task_struct *p) return ret; } -/* RN: XXX turn this into do_halt() */ /**************************************************************************** * Domain requested scheduling operations ****************************************************************************/ long do_sched_op(void) { + /* XXX implement proper */ current->state = TASK_INTERRUPTIBLE; schedule(); return 0; } /**************************************************************************** + * Control the scheduler + ****************************************************************************/ +long sched_bvtctl(unsigned long c_allow) +{ + printk("sched: bvtctl %lu\n", c_allow); + ctx_allow = c_allow; + return 0; +} + +/**************************************************************************** * Adjust scheduling parameter for a given domain ****************************************************************************/ long sched_adjdom(int dom, unsigned long mcu_adv, unsigned long warp, - unsigned long warpl, unsigned long warpu) + unsigned long warpl, unsigned long warpu) { - printk("sched: adjdom %02d %lu %lu %lu %lu\n", - dom, mcu_adv, warp, warpl, warpu); - return 0; + struct task_struct *p; + + printk("sched: adjdom %02d %lu %lu %lu %lu\n", + dom, mcu_adv, warp, warpl, warpu); + + p = find_domain_by_id(dom); + if ( p == NULL ) return -ESRCH; + + spin_lock_irq(&schedule_data[p->processor].lock); + + p->mcu_advance = mcu_adv; + + spin_unlock_irq(&schedule_data[p->processor].lock); + + return 0; } /**************************************************************************** * cause a run through the scheduler when appropriate + * Appropriate is: + * - current task is idle task + * - new processes evt is lower than current one + * - the current task already ran for it's context switch allowance ****************************************************************************/ void reschedule(struct task_struct *p) { @@ -166,16 +204,20 @@ void reschedule(struct task_struct *p) unsigned long flags; if (p->has_cpu) - return; + return; spin_lock_irqsave(&schedule_data[cpu].lock, flags); curr = schedule_data[cpu].curr; - if (is_idle_task(curr)) { + + if ( is_idle_task(curr) || + (p->evt < curr->evt) || + (curr->lastschd + ctx_allow >= NOW()) ) { + /* reschedule */ set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events); spin_unlock_irqrestore(&schedule_data[cpu].lock, flags); #ifdef CONFIG_SMP if (cpu != smp_processor_id()) - smp_send_event_check_cpu(cpu); + smp_send_event_check_cpu(cpu); #endif } else { spin_unlock_irqrestore(&schedule_data[cpu].lock, flags); @@ -194,27 +236,26 @@ asmlinkage void schedule(void) { struct task_struct *prev, *next, *next_prime, *p; struct list_head *tmp; - int this_cpu; - s_time_t now; - s32 r_time; /* time for new dom to run */ - s32 ranfor; /* assume we never run longer than 2.1s! */ - s32 mcus; - u32 next_evt, next_prime_evt; - - perfc_incrc(sched_run1); + int this_cpu; + s_time_t now; + s32 r_time; /* time for new dom to run */ + s32 ranfor; /* assume we never run longer than 2.1s! */ + s32 mcus; + u32 next_evt, next_prime_evt, min_avt; + + perfc_incrc(sched_run1); need_resched_back: - perfc_incrc(sched_run2); - - now = NOW(); - - /* remove timer */ - rem_ac_timer(&schedule_data[smp_processor_id()].s_timer); + perfc_incrc(sched_run2); + now = NOW(); next = NULL; prev = current; this_cpu = prev->processor; - /* + /* remove timer */ + rem_ac_timer(&schedule_data[this_cpu].s_timer); + + /* * deschedule the current domain */ @@ -223,95 +264,115 @@ asmlinkage void schedule(void) ASSERT(!in_interrupt()); ASSERT(__task_on_runqueue(prev)); - if (is_idle_task(prev)) - goto deschedule_done; + if (is_idle_task(prev)) + goto deschedule_done; - /* do some accounting */ - ranfor = (s32)(now - prev->lastschd); + /* do some accounting */ + ranfor = (s32)(now - prev->lastschd); ASSERT((ranfor>0)); - prev->cpu_time += ranfor; - - /* calculate mcu and update avt */ - mcus = ranfor/MCU; - if (ranfor % MCU) mcus ++; /* always round up */ - prev->avt += mcus * prev->mcu_advance; - prev->evt = prev->avt; /* RN: XXX BVT deal with warping here */ - - /* dequeue */ - __del_from_runqueue(prev); - switch (prev->state) { - case TASK_INTERRUPTIBLE: - if (signal_pending(prev)) { - prev->state = TASK_RUNNING; /* but has events pending */ - break; - } - case TASK_UNINTERRUPTIBLE: - case TASK_WAIT: - case TASK_DYING: - default: - /* done if not running. Else, continue */ - goto deschedule_done; - case TASK_RUNNING:; - } - - /* requeue */ - __add_to_runqueue_tail(prev); - + prev->cpu_time += ranfor; + + /* calculate mcu and update avt */ + mcus = ranfor/MCU; + if (ranfor % MCU) mcus ++; /* always round up */ + prev->avt += mcus * prev->mcu_advance; + prev->evt = prev->avt; /* RN: XXX BVT deal with warping here */ + + /* dequeue */ + __del_from_runqueue(prev); + switch (prev->state) { + case TASK_INTERRUPTIBLE: + if (signal_pending(prev)) { + prev->state = TASK_RUNNING; /* but has events pending */ + break; + } + case TASK_UNINTERRUPTIBLE: + case TASK_WAIT: + case TASK_DYING: + default: + /* done if not running. Else, continue */ + goto deschedule_done; + case TASK_RUNNING:; + } + + /* requeue */ + __add_to_runqueue_tail(prev); + deschedule_done: clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events); - /* + /* * Pick a new domain */ - /* we should at least have the idle task */ - ASSERT(!list_empty(&schedule_data[smp_processor_id()].runqueue)); + /* we should at least have the idle task */ + ASSERT(!list_empty(&schedule_data[this_cpu].runqueue)); - /* + /* * scan through the run queue and pick the task with the lowest evt * *and* the task the second lowest evt. - * this code is O(n) but we expect n to be small. + * this code is O(n) but we expect n to be small. */ - next = NULL; - next_prime = NULL; - - next_evt = 0xffffffff; - next_prime_evt = 0xffffffff; - - list_for_each(tmp, &schedule_data[smp_processor_id()].runqueue) { - p = list_entry(tmp, struct task_struct, run_list); - if (p->evt < next_evt) { - next_prime = next; - next_prime_evt = next_evt; - next = p; - next_evt = p->evt; - } - } - ASSERT(next != NULL); /* we should have at least the idle task */ - - if (next == NULL || is_idle_task(next)) { - next = &idle0_task; /* to be sure */ - r_time = CTX_ALLOW; - goto sched_done; - } - - if (next_prime == NULL || is_idle_task(next_prime)) { - /* we have only one runable task besides the idle task */ - r_time = CTX_ALLOW; /* RN: XXX should be much larger */ - goto sched_done; - } - - /* + next = schedule_data[this_cpu].idle; + next_prime = NULL; + + next_evt = 0xffffffff; + next_prime_evt = 0xffffffff; + min_avt = 0xffffffff; /* to calculate svt */ + + + list_for_each(tmp, &schedule_data[this_cpu].runqueue) { + p = list_entry(tmp, struct task_struct, run_list); + if (p->evt < next_evt) { + next_prime = next; + next_prime_evt = next_evt; + next = p; + next_evt = p->evt; + } else if (next_prime_evt == 0xffffffff) { + next_prime_evt = p->evt; + next_prime = p; + } else if (p->evt < next_prime_evt) { + next_prime_evt = p->evt; + next_prime = p; + } + /* determine system virtual time */ + if (p->avt < min_avt) + min_avt = p->avt; + } + ASSERT(next != NULL); /* we should have at least the idle task */ + + /* update system virtual time */ + if (min_avt != 0xffffffff) schedule_data[this_cpu].svt = min_avt; + + if (is_idle_task(next)) { + r_time = ctx_allow; + goto sched_done; + } + + if (next_prime == NULL || is_idle_task(next_prime)) { + /* we have only one runable task besides the idle task */ + r_time = 10 * ctx_allow; /* RN: random constant */ + goto sched_done; + } + + /* * if we are here we have two runable tasks. - * work out how long 'next' can run till its evt is greater than + * work out how long 'next' can run till its evt is greater than * 'next_prime's evt. Taking context switch allowance into account. */ - r_time = ((next_prime->evt - next->evt)/next->mcu_advance) + CTX_ALLOW; + ASSERT(next_prime->evt > next->evt); + r_time = ((next_prime->evt - next->evt)/next->mcu_advance) + ctx_allow; sched_done: - ASSERT(r_time != 0); - ASSERT(r_time > 0); + ASSERT(r_time != 0); + ASSERT(r_time > ctx_allow); + + if ( (r_time==0) || (r_time < ctx_allow)) { + printk("[%02d]: %lx\n", this_cpu, r_time); + dump_rqueue(&schedule_data[this_cpu].runqueue, "foo"); + } + prev->has_cpu = 0; next->has_cpu = 1; @@ -319,16 +380,16 @@ asmlinkage void schedule(void) schedule_data[this_cpu].prev = prev; schedule_data[this_cpu].curr = next; - next->lastschd = now; + next->lastschd = now; - /* reprogramm the timer */ + /* reprogramm the timer */ timer_redo: - schedule_data[this_cpu].s_timer.expires = now + r_time; - if (add_ac_timer(&schedule_data[this_cpu].s_timer) == 1) { - printk("SCHED: Shit this shouldn't happen\n"); - now = NOW(); - goto timer_redo; - } + schedule_data[this_cpu].s_timer.expires = now + r_time; + if (add_ac_timer(&schedule_data[this_cpu].s_timer) == 1) { + printk("SCHED[%02d]: Shit this shouldn't happen\n", this_cpu); + now = NOW(); + goto timer_redo; + } spin_unlock_irq(&schedule_data[this_cpu].lock); @@ -339,6 +400,8 @@ asmlinkage void schedule(void) goto same_process; } + perfc_incrc(sched_ctx); + prepare_to_switch(); switch_to(prev, next); prev = schedule_data[this_cpu].prev; @@ -347,12 +410,12 @@ asmlinkage void schedule(void) if ( prev->state == TASK_DYING ) release_task(prev); same_process: - /* update the domains notion of time */ + /* update the domains notion of time */ update_dom_time(current->shared_info); if ( test_bit(_HYP_EVENT_NEED_RESCHED, ¤t->hyp_events) ) { goto need_resched_back; - } + } return; } @@ -361,11 +424,11 @@ asmlinkage void schedule(void) */ static void sched_timer(unsigned long foo) { - int cpu = smp_processor_id(); + int cpu = smp_processor_id(); struct task_struct *curr = schedule_data[cpu].curr; - /* cause a reschedule */ - set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events); - perfc_incrc(sched_irq); + /* cause a reschedule */ + set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events); + perfc_incrc(sched_irq); } /* @@ -373,23 +436,23 @@ static void sched_timer(unsigned long foo) */ static void virt_timer(unsigned long foo) { - unsigned long cpu_mask = 0; - struct task_struct *p; - s_time_t now; - int res; - - /* send virtual timer interrupt */ - read_lock(&tasklist_lock); - p = &idle0_task; - do { - if ( is_idle_task(p) ) continue; - cpu_mask |= mark_guest_event(p, _EVENT_TIMER); - } - while ( (p = p->next_task) != &idle0_task ); - read_unlock(&tasklist_lock); - guest_event_notify(cpu_mask); - - again: + unsigned long cpu_mask = 0; + struct task_struct *p; + s_time_t now; + int res; + + /* send virtual timer interrupt */ + read_lock(&tasklist_lock); + p = &idle0_task; + do { + if ( is_idle_task(p) ) continue; + cpu_mask |= mark_guest_event(p, _EVENT_TIMER); + } + while ( (p = p->next_task) != &idle0_task ); + read_unlock(&tasklist_lock); + guest_event_notify(cpu_mask); + + again: now = NOW(); v_timer.expires = now + MILLISECS(10); res=add_ac_timer(&v_timer); @@ -412,14 +475,15 @@ void __init scheduler_init(void) spin_lock_init(&schedule_data[i].lock); schedule_data[i].prev = &idle0_task; schedule_data[i].curr = &idle0_task; - + /* a timer for each CPU */ init_ac_timer(&schedule_data[i].s_timer); schedule_data[i].s_timer.function = &sched_timer; } - init_ac_timer(&v_timer); - v_timer.function = &virt_timer; + schedule_data[0].idle = &idle0_task; /* idle on CPU 0 is special */ + init_ac_timer(&v_timer); + v_timer.function = &virt_timer; } /* @@ -427,46 +491,14 @@ void __init scheduler_init(void) * This has to be done *after* the timers, e.g., APICs, have been initialised */ void schedulers_start(void) -{ +{ printk("Start schedulers\n"); __cli(); sched_timer(0); - virt_timer(0); + virt_timer(0); smp_call_function((void *)sched_timer, NULL, 1, 1); __sti(); - - //add_key_handler('r', dump_run_queues, "dump run queues") } -#if 0 -/**************************************************************************** - * Debugging functions - ****************************************************************************/ -static void dump_run_queues(u_char key, void *dev_id, struct pt_regs *regs) -{ - u_long flags; - struct task_struct *p; - shared_info_t *s; - - printk("'%c' pressed -> dumping run queues\n", key); - read_lock_irqsave(&tasklist_lock, flags); - p = &idle0_task; - do { - printk("Xen: DOM %d, CPU %d [has=%c], state = %s, " - "hyp_events = %08x\n", - p->domain, p->processor, p->has_cpu ? 'T':'F', - task_states[p->state], p->hyp_events); - s = p->shared_info; - if(!is_idle_task(p)) { - printk("Guest: events = %08lx, event_enable = %08lx\n", - s->events, s->events_enable); - printk("Notifying guest...\n"); - set_bit(_EVENT_DEBUG, &s->events); - } - } while ( (p = p->next_task) != &idle0_task ); - - read_unlock_irqrestore(&tasklist_lock, flags); -} -#endif /**************************************************************************** @@ -533,3 +565,47 @@ long schedule_timeout(long timeout) out: return timeout < 0 ? 0 : timeout; } + +/**************************************************************************** + * debug function + ****************************************************************************/ + +static void dump_rqueue(struct list_head *queue, char *name) +{ + struct list_head *list; + int loop = 0; + struct task_struct *p; + + printk ("QUEUE %s %lx n: %lx, p: %lx\n", name, (unsigned long)queue, + (unsigned long) queue->next, (unsigned long) queue->prev); + list_for_each (list, queue) { + p = list_entry(list, struct task_struct, run_list); + printk("%3d: %3d has=%c mcua=0x%04X ev=0x%08X av=0x%08X c=0x%X%08X\n", + loop++, p->domain, + p->has_cpu ? 'T':'F', + p->mcu_advance, p->evt, p->avt, + (u32)(p->cpu_time>>32), (u32)p->cpu_time); + printk(" l: %lx n: %lx p: %lx\n", + (unsigned long)list, (unsigned long)list->next, + (unsigned long)list->prev); + } + return; +} + +void dump_runq(u_char key, void *dev_id, struct pt_regs *regs) +{ + u_long flags; + s_time_t now = NOW(); + int i; + + printk("BVT: mcu=0x%08Xns ctx_allow=0x%08Xns NOW=0x%08X%08X\n", + (u32)MCU, (u32)ctx_allow, (u32)(now>>32), (u32)now); + for (i = 0; i < smp_num_cpus; i++) { + spin_lock_irqsave(&schedule_data[i].lock, flags); + printk("CPU[%02d] svt=0x%08X ", i, (s32)schedule_data[i].svt); + dump_rqueue(&schedule_data[i].runqueue, "rq"); + spin_unlock_irqrestore(&schedule_data[i].lock, flags); + } + return; +} + |