aboutsummaryrefslogtreecommitdiffstats
path: root/xen/common/schedule.c
diff options
context:
space:
mode:
Diffstat (limited to 'xen/common/schedule.c')
-rw-r--r--xen/common/schedule.c438
1 files changed, 257 insertions, 181 deletions
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 2f4ba31c32..ce46069167 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -40,8 +40,8 @@
#endif
-#define MCU (s32)MICROSECS(100) /* Minimum unit */
-#define CTX_ALLOW (s32)MILLISECS(10) /* context switch allowance */
+#define MCU (s32)MICROSECS(100) /* Minimum unit */
+static s32 ctx_allow=(s32)MILLISECS(10); /* context switch allowance */
/*****************************************************************************
* per CPU data for the scheduler.
@@ -50,15 +50,15 @@ typedef struct schedule_data_st
{
spinlock_t lock; /* lock for protecting this */
struct list_head runqueue; /* runqueue */
- struct task_struct *prev, *curr; /* dito */
-
- long svt; /* system virtual time. per CPU??? */
- struct ac_timer s_timer; /* scheduling timer */
+ struct task_struct *prev, *curr; /* previous and current task */
+ struct task_struct *idle; /* idle task for this cpu */
+ u32 svt; /* system virtual time. per CPU??? */
+ struct ac_timer s_timer; /* scheduling timer */
} __cacheline_aligned schedule_data_t;
schedule_data_t schedule_data[NR_CPUS];
-struct ac_timer v_timer; /* scheduling timer */
+struct ac_timer v_timer; /* scheduling timer */
static void virt_timer(unsigned long foo);
@@ -68,7 +68,7 @@ static void virt_timer(unsigned long foo);
/* add a task to the head of the runqueue */
static inline void __add_to_runqueue_head(struct task_struct * p)
{
-
+
list_add(&p->run_list, &schedule_data[p->processor].runqueue);
}
/* add a task to the tail of the runqueue */
@@ -97,11 +97,19 @@ static inline int __task_on_runqueue(struct task_struct *p)
******************************************************************************/
void sched_add_domain(struct task_struct *p)
{
- p->state = TASK_UNINTERRUPTIBLE;
- /* set avt end evt to system virtual time */
- p->avt = schedule_data[p->processor].svt;
- p->evt = schedule_data[p->processor].svt;
- /* RN: XXX BVT fill in other bits */
+ p->state = TASK_UNINTERRUPTIBLE;
+ p->mcu_advance = 10;
+
+ if (p->domain == IDLE_DOMAIN_ID) {
+ p->avt = 0xffffffff;
+ p->evt = 0xffffffff;
+ schedule_data[p->processor].idle = p;
+ } else {
+ /* set avt end evt to system virtual time */
+ p->avt = schedule_data[p->processor].svt;
+ p->evt = schedule_data[p->processor].svt;
+ /* RN: XXX BVT fill in other bits */
+ }
}
void sched_rem_domain(struct task_struct *p)
@@ -117,16 +125,20 @@ int wake_up(struct task_struct *p)
{
unsigned long flags;
int ret = 0;
+
spin_lock_irqsave(&schedule_data[p->processor].lock, flags);
+
if ( __task_on_runqueue(p) ) goto out;
- p->state = TASK_RUNNING;
- /* set the BVT parameters */
- if (p->avt < schedule_data[p->processor].svt)
- p->avt = schedule_data[p->processor].svt;
- p->evt = p->avt; /* RN: XXX BVT deal with warping here */
-
+ p->state = TASK_RUNNING;
__add_to_runqueue_head(p);
+
+ /* set the BVT parameters */
+ if (p->avt < schedule_data[p->processor].svt)
+ p->avt = schedule_data[p->processor].svt;
+
+ p->evt = p->avt; /* RN: XXX BVT deal with warping here */
+
ret = 1;
out:
@@ -134,30 +146,56 @@ int wake_up(struct task_struct *p)
return ret;
}
-/* RN: XXX turn this into do_halt() */
/****************************************************************************
* Domain requested scheduling operations
****************************************************************************/
long do_sched_op(void)
{
+ /* XXX implement proper */
current->state = TASK_INTERRUPTIBLE;
schedule();
return 0;
}
/****************************************************************************
+ * Control the scheduler
+ ****************************************************************************/
+long sched_bvtctl(unsigned long c_allow)
+{
+ printk("sched: bvtctl %lu\n", c_allow);
+ ctx_allow = c_allow;
+ return 0;
+}
+
+/****************************************************************************
* Adjust scheduling parameter for a given domain
****************************************************************************/
long sched_adjdom(int dom, unsigned long mcu_adv, unsigned long warp,
- unsigned long warpl, unsigned long warpu)
+ unsigned long warpl, unsigned long warpu)
{
- printk("sched: adjdom %02d %lu %lu %lu %lu\n",
- dom, mcu_adv, warp, warpl, warpu);
- return 0;
+ struct task_struct *p;
+
+ printk("sched: adjdom %02d %lu %lu %lu %lu\n",
+ dom, mcu_adv, warp, warpl, warpu);
+
+ p = find_domain_by_id(dom);
+ if ( p == NULL ) return -ESRCH;
+
+ spin_lock_irq(&schedule_data[p->processor].lock);
+
+ p->mcu_advance = mcu_adv;
+
+ spin_unlock_irq(&schedule_data[p->processor].lock);
+
+ return 0;
}
/****************************************************************************
* cause a run through the scheduler when appropriate
+ * Appropriate is:
+ * - current task is idle task
+ * - new processes evt is lower than current one
+ * - the current task already ran for it's context switch allowance
****************************************************************************/
void reschedule(struct task_struct *p)
{
@@ -166,16 +204,20 @@ void reschedule(struct task_struct *p)
unsigned long flags;
if (p->has_cpu)
- return;
+ return;
spin_lock_irqsave(&schedule_data[cpu].lock, flags);
curr = schedule_data[cpu].curr;
- if (is_idle_task(curr)) {
+
+ if ( is_idle_task(curr) ||
+ (p->evt < curr->evt) ||
+ (curr->lastschd + ctx_allow >= NOW()) ) {
+ /* reschedule */
set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
#ifdef CONFIG_SMP
if (cpu != smp_processor_id())
- smp_send_event_check_cpu(cpu);
+ smp_send_event_check_cpu(cpu);
#endif
} else {
spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
@@ -194,27 +236,26 @@ asmlinkage void schedule(void)
{
struct task_struct *prev, *next, *next_prime, *p;
struct list_head *tmp;
- int this_cpu;
- s_time_t now;
- s32 r_time; /* time for new dom to run */
- s32 ranfor; /* assume we never run longer than 2.1s! */
- s32 mcus;
- u32 next_evt, next_prime_evt;
-
- perfc_incrc(sched_run1);
+ int this_cpu;
+ s_time_t now;
+ s32 r_time; /* time for new dom to run */
+ s32 ranfor; /* assume we never run longer than 2.1s! */
+ s32 mcus;
+ u32 next_evt, next_prime_evt, min_avt;
+
+ perfc_incrc(sched_run1);
need_resched_back:
- perfc_incrc(sched_run2);
-
- now = NOW();
-
- /* remove timer */
- rem_ac_timer(&schedule_data[smp_processor_id()].s_timer);
+ perfc_incrc(sched_run2);
+ now = NOW();
next = NULL;
prev = current;
this_cpu = prev->processor;
- /*
+ /* remove timer */
+ rem_ac_timer(&schedule_data[this_cpu].s_timer);
+
+ /*
* deschedule the current domain
*/
@@ -223,95 +264,115 @@ asmlinkage void schedule(void)
ASSERT(!in_interrupt());
ASSERT(__task_on_runqueue(prev));
- if (is_idle_task(prev))
- goto deschedule_done;
+ if (is_idle_task(prev))
+ goto deschedule_done;
- /* do some accounting */
- ranfor = (s32)(now - prev->lastschd);
+ /* do some accounting */
+ ranfor = (s32)(now - prev->lastschd);
ASSERT((ranfor>0));
- prev->cpu_time += ranfor;
-
- /* calculate mcu and update avt */
- mcus = ranfor/MCU;
- if (ranfor % MCU) mcus ++; /* always round up */
- prev->avt += mcus * prev->mcu_advance;
- prev->evt = prev->avt; /* RN: XXX BVT deal with warping here */
-
- /* dequeue */
- __del_from_runqueue(prev);
- switch (prev->state) {
- case TASK_INTERRUPTIBLE:
- if (signal_pending(prev)) {
- prev->state = TASK_RUNNING; /* but has events pending */
- break;
- }
- case TASK_UNINTERRUPTIBLE:
- case TASK_WAIT:
- case TASK_DYING:
- default:
- /* done if not running. Else, continue */
- goto deschedule_done;
- case TASK_RUNNING:;
- }
-
- /* requeue */
- __add_to_runqueue_tail(prev);
-
+ prev->cpu_time += ranfor;
+
+ /* calculate mcu and update avt */
+ mcus = ranfor/MCU;
+ if (ranfor % MCU) mcus ++; /* always round up */
+ prev->avt += mcus * prev->mcu_advance;
+ prev->evt = prev->avt; /* RN: XXX BVT deal with warping here */
+
+ /* dequeue */
+ __del_from_runqueue(prev);
+ switch (prev->state) {
+ case TASK_INTERRUPTIBLE:
+ if (signal_pending(prev)) {
+ prev->state = TASK_RUNNING; /* but has events pending */
+ break;
+ }
+ case TASK_UNINTERRUPTIBLE:
+ case TASK_WAIT:
+ case TASK_DYING:
+ default:
+ /* done if not running. Else, continue */
+ goto deschedule_done;
+ case TASK_RUNNING:;
+ }
+
+ /* requeue */
+ __add_to_runqueue_tail(prev);
+
deschedule_done:
clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events);
- /*
+ /*
* Pick a new domain
*/
- /* we should at least have the idle task */
- ASSERT(!list_empty(&schedule_data[smp_processor_id()].runqueue));
+ /* we should at least have the idle task */
+ ASSERT(!list_empty(&schedule_data[this_cpu].runqueue));
- /*
+ /*
* scan through the run queue and pick the task with the lowest evt
* *and* the task the second lowest evt.
- * this code is O(n) but we expect n to be small.
+ * this code is O(n) but we expect n to be small.
*/
- next = NULL;
- next_prime = NULL;
-
- next_evt = 0xffffffff;
- next_prime_evt = 0xffffffff;
-
- list_for_each(tmp, &schedule_data[smp_processor_id()].runqueue) {
- p = list_entry(tmp, struct task_struct, run_list);
- if (p->evt < next_evt) {
- next_prime = next;
- next_prime_evt = next_evt;
- next = p;
- next_evt = p->evt;
- }
- }
- ASSERT(next != NULL); /* we should have at least the idle task */
-
- if (next == NULL || is_idle_task(next)) {
- next = &idle0_task; /* to be sure */
- r_time = CTX_ALLOW;
- goto sched_done;
- }
-
- if (next_prime == NULL || is_idle_task(next_prime)) {
- /* we have only one runable task besides the idle task */
- r_time = CTX_ALLOW; /* RN: XXX should be much larger */
- goto sched_done;
- }
-
- /*
+ next = schedule_data[this_cpu].idle;
+ next_prime = NULL;
+
+ next_evt = 0xffffffff;
+ next_prime_evt = 0xffffffff;
+ min_avt = 0xffffffff; /* to calculate svt */
+
+
+ list_for_each(tmp, &schedule_data[this_cpu].runqueue) {
+ p = list_entry(tmp, struct task_struct, run_list);
+ if (p->evt < next_evt) {
+ next_prime = next;
+ next_prime_evt = next_evt;
+ next = p;
+ next_evt = p->evt;
+ } else if (next_prime_evt == 0xffffffff) {
+ next_prime_evt = p->evt;
+ next_prime = p;
+ } else if (p->evt < next_prime_evt) {
+ next_prime_evt = p->evt;
+ next_prime = p;
+ }
+ /* determine system virtual time */
+ if (p->avt < min_avt)
+ min_avt = p->avt;
+ }
+ ASSERT(next != NULL); /* we should have at least the idle task */
+
+ /* update system virtual time */
+ if (min_avt != 0xffffffff) schedule_data[this_cpu].svt = min_avt;
+
+ if (is_idle_task(next)) {
+ r_time = ctx_allow;
+ goto sched_done;
+ }
+
+ if (next_prime == NULL || is_idle_task(next_prime)) {
+ /* we have only one runable task besides the idle task */
+ r_time = 10 * ctx_allow; /* RN: random constant */
+ goto sched_done;
+ }
+
+ /*
* if we are here we have two runable tasks.
- * work out how long 'next' can run till its evt is greater than
+ * work out how long 'next' can run till its evt is greater than
* 'next_prime's evt. Taking context switch allowance into account.
*/
- r_time = ((next_prime->evt - next->evt)/next->mcu_advance) + CTX_ALLOW;
+ ASSERT(next_prime->evt > next->evt);
+ r_time = ((next_prime->evt - next->evt)/next->mcu_advance) + ctx_allow;
sched_done:
- ASSERT(r_time != 0);
- ASSERT(r_time > 0);
+ ASSERT(r_time != 0);
+ ASSERT(r_time > ctx_allow);
+
+ if ( (r_time==0) || (r_time < ctx_allow)) {
+ printk("[%02d]: %lx\n", this_cpu, r_time);
+ dump_rqueue(&schedule_data[this_cpu].runqueue, "foo");
+ }
+
prev->has_cpu = 0;
next->has_cpu = 1;
@@ -319,16 +380,16 @@ asmlinkage void schedule(void)
schedule_data[this_cpu].prev = prev;
schedule_data[this_cpu].curr = next;
- next->lastschd = now;
+ next->lastschd = now;
- /* reprogramm the timer */
+ /* reprogramm the timer */
timer_redo:
- schedule_data[this_cpu].s_timer.expires = now + r_time;
- if (add_ac_timer(&schedule_data[this_cpu].s_timer) == 1) {
- printk("SCHED: Shit this shouldn't happen\n");
- now = NOW();
- goto timer_redo;
- }
+ schedule_data[this_cpu].s_timer.expires = now + r_time;
+ if (add_ac_timer(&schedule_data[this_cpu].s_timer) == 1) {
+ printk("SCHED[%02d]: Shit this shouldn't happen\n", this_cpu);
+ now = NOW();
+ goto timer_redo;
+ }
spin_unlock_irq(&schedule_data[this_cpu].lock);
@@ -339,6 +400,8 @@ asmlinkage void schedule(void)
goto same_process;
}
+ perfc_incrc(sched_ctx);
+
prepare_to_switch();
switch_to(prev, next);
prev = schedule_data[this_cpu].prev;
@@ -347,12 +410,12 @@ asmlinkage void schedule(void)
if ( prev->state == TASK_DYING ) release_task(prev);
same_process:
- /* update the domains notion of time */
+ /* update the domains notion of time */
update_dom_time(current->shared_info);
if ( test_bit(_HYP_EVENT_NEED_RESCHED, &current->hyp_events) ) {
goto need_resched_back;
- }
+ }
return;
}
@@ -361,11 +424,11 @@ asmlinkage void schedule(void)
*/
static void sched_timer(unsigned long foo)
{
- int cpu = smp_processor_id();
+ int cpu = smp_processor_id();
struct task_struct *curr = schedule_data[cpu].curr;
- /* cause a reschedule */
- set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
- perfc_incrc(sched_irq);
+ /* cause a reschedule */
+ set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
+ perfc_incrc(sched_irq);
}
/*
@@ -373,23 +436,23 @@ static void sched_timer(unsigned long foo)
*/
static void virt_timer(unsigned long foo)
{
- unsigned long cpu_mask = 0;
- struct task_struct *p;
- s_time_t now;
- int res;
-
- /* send virtual timer interrupt */
- read_lock(&tasklist_lock);
- p = &idle0_task;
- do {
- if ( is_idle_task(p) ) continue;
- cpu_mask |= mark_guest_event(p, _EVENT_TIMER);
- }
- while ( (p = p->next_task) != &idle0_task );
- read_unlock(&tasklist_lock);
- guest_event_notify(cpu_mask);
-
- again:
+ unsigned long cpu_mask = 0;
+ struct task_struct *p;
+ s_time_t now;
+ int res;
+
+ /* send virtual timer interrupt */
+ read_lock(&tasklist_lock);
+ p = &idle0_task;
+ do {
+ if ( is_idle_task(p) ) continue;
+ cpu_mask |= mark_guest_event(p, _EVENT_TIMER);
+ }
+ while ( (p = p->next_task) != &idle0_task );
+ read_unlock(&tasklist_lock);
+ guest_event_notify(cpu_mask);
+
+ again:
now = NOW();
v_timer.expires = now + MILLISECS(10);
res=add_ac_timer(&v_timer);
@@ -412,14 +475,15 @@ void __init scheduler_init(void)
spin_lock_init(&schedule_data[i].lock);
schedule_data[i].prev = &idle0_task;
schedule_data[i].curr = &idle0_task;
-
+
/* a timer for each CPU */
init_ac_timer(&schedule_data[i].s_timer);
schedule_data[i].s_timer.function = &sched_timer;
}
- init_ac_timer(&v_timer);
- v_timer.function = &virt_timer;
+ schedule_data[0].idle = &idle0_task; /* idle on CPU 0 is special */
+ init_ac_timer(&v_timer);
+ v_timer.function = &virt_timer;
}
/*
@@ -427,46 +491,14 @@ void __init scheduler_init(void)
* This has to be done *after* the timers, e.g., APICs, have been initialised
*/
void schedulers_start(void)
-{
+{
printk("Start schedulers\n");
__cli();
sched_timer(0);
- virt_timer(0);
+ virt_timer(0);
smp_call_function((void *)sched_timer, NULL, 1, 1);
__sti();
-
- //add_key_handler('r', dump_run_queues, "dump run queues")
}
-#if 0
-/****************************************************************************
- * Debugging functions
- ****************************************************************************/
-static void dump_run_queues(u_char key, void *dev_id, struct pt_regs *regs)
-{
- u_long flags;
- struct task_struct *p;
- shared_info_t *s;
-
- printk("'%c' pressed -> dumping run queues\n", key);
- read_lock_irqsave(&tasklist_lock, flags);
- p = &idle0_task;
- do {
- printk("Xen: DOM %d, CPU %d [has=%c], state = %s, "
- "hyp_events = %08x\n",
- p->domain, p->processor, p->has_cpu ? 'T':'F',
- task_states[p->state], p->hyp_events);
- s = p->shared_info;
- if(!is_idle_task(p)) {
- printk("Guest: events = %08lx, event_enable = %08lx\n",
- s->events, s->events_enable);
- printk("Notifying guest...\n");
- set_bit(_EVENT_DEBUG, &s->events);
- }
- } while ( (p = p->next_task) != &idle0_task );
-
- read_unlock_irqrestore(&tasklist_lock, flags);
-}
-#endif
/****************************************************************************
@@ -533,3 +565,47 @@ long schedule_timeout(long timeout)
out:
return timeout < 0 ? 0 : timeout;
}
+
+/****************************************************************************
+ * debug function
+ ****************************************************************************/
+
+static void dump_rqueue(struct list_head *queue, char *name)
+{
+ struct list_head *list;
+ int loop = 0;
+ struct task_struct *p;
+
+ printk ("QUEUE %s %lx n: %lx, p: %lx\n", name, (unsigned long)queue,
+ (unsigned long) queue->next, (unsigned long) queue->prev);
+ list_for_each (list, queue) {
+ p = list_entry(list, struct task_struct, run_list);
+ printk("%3d: %3d has=%c mcua=0x%04X ev=0x%08X av=0x%08X c=0x%X%08X\n",
+ loop++, p->domain,
+ p->has_cpu ? 'T':'F',
+ p->mcu_advance, p->evt, p->avt,
+ (u32)(p->cpu_time>>32), (u32)p->cpu_time);
+ printk(" l: %lx n: %lx p: %lx\n",
+ (unsigned long)list, (unsigned long)list->next,
+ (unsigned long)list->prev);
+ }
+ return;
+}
+
+void dump_runq(u_char key, void *dev_id, struct pt_regs *regs)
+{
+ u_long flags;
+ s_time_t now = NOW();
+ int i;
+
+ printk("BVT: mcu=0x%08Xns ctx_allow=0x%08Xns NOW=0x%08X%08X\n",
+ (u32)MCU, (u32)ctx_allow, (u32)(now>>32), (u32)now);
+ for (i = 0; i < smp_num_cpus; i++) {
+ spin_lock_irqsave(&schedule_data[i].lock, flags);
+ printk("CPU[%02d] svt=0x%08X ", i, (s32)schedule_data[i].svt);
+ dump_rqueue(&schedule_data[i].runqueue, "rq");
+ spin_unlock_irqrestore(&schedule_data[i].lock, flags);
+ }
+ return;
+}
+