/**************************************************************************** * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge * (C) 2002-2003 University of Cambridge * (C) 2004 - Mark Williamson - Intel Research Cambridge **************************************************************************** * * File: common/schedule.c * Author: Rolf Neugebauer & Keir Fraser * Updated for generic API by Mark Williamson * * Description: Generic CPU scheduling code * implements support functionality for the Xen scheduler API. * */ #ifndef COMPAT #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* opt_sched: scheduler - default to credit */ static char opt_sched[10] = "credit"; string_param("sched", opt_sched); /* opt_dom0_vcpus_pin: If true, dom0 VCPUs are pinned. */ static unsigned int opt_dom0_vcpus_pin; boolean_param("dom0_vcpus_pin", opt_dom0_vcpus_pin); #define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */ /* Various timer handlers. */ static void s_timer_fn(void *unused); static void t_timer_fn(void *unused); static void vcpu_timer_fn(void *data); static void poll_timer_fn(void *data); /* This is global for now so that private implementations can reach it */ DEFINE_PER_CPU(struct schedule_data, schedule_data); extern struct scheduler sched_sedf_def; extern struct scheduler sched_credit_def; static struct scheduler *schedulers[] = { &sched_sedf_def, &sched_credit_def, NULL }; static struct scheduler ops; #define SCHED_OP(fn, ...) \ (( ops.fn != NULL ) ? ops.fn( __VA_ARGS__ ) \ : (typeof(ops.fn(__VA_ARGS__)))0 ) /* Per-CPU periodic timer sends an event to the currently-executing domain. */ static DEFINE_PER_CPU(struct timer, t_timer); static inline void vcpu_runstate_change( struct vcpu *v, int new_state, s_time_t new_entry_time) { ASSERT(v->runstate.state != new_state); ASSERT(spin_is_locked(&per_cpu(schedule_data,v->processor).schedule_lock)); v->runstate.time[v->runstate.state] += new_entry_time - v->runstate.state_entry_time; v->runstate.state_entry_time = new_entry_time; v->runstate.state = new_state; } void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate) { if ( likely(v == current) ) { /* Fast lock-free path. */ memcpy(runstate, &v->runstate, sizeof(*runstate)); ASSERT(runstate->state == RUNSTATE_running); runstate->time[RUNSTATE_running] += NOW() - runstate->state_entry_time; } else { vcpu_schedule_lock_irq(v); memcpy(runstate, &v->runstate, sizeof(*runstate)); runstate->time[runstate->state] += NOW() - runstate->state_entry_time; vcpu_schedule_unlock_irq(v); } } int sched_init_vcpu(struct vcpu *v, unsigned int processor) { struct domain *d = v->domain; /* * Initialize processor and affinity settings. The idler, and potentially * domain-0 VCPUs, are pinned onto their respective physical CPUs. */ v->processor = processor; if ( is_idle_domain(d) || ((d->domain_id == 0) && opt_dom0_vcpus_pin) ) v->cpu_affinity = cpumask_of_cpu(processor); else cpus_setall(v->cpu_affinity); /* Initialise the per-domain timers. */ init_timer(&v->timer, vcpu_timer_fn, v, v->processor); init_timer(&v->poll_timer, poll_timer_fn, v, v->processor); /* Idle VCPUs are scheduled immediately. */ if ( is_idle_domain(d) ) { per_cpu(schedule_data, v->processor).curr = v; per_cpu(schedule_data, v->processor).idle = v; set_bit(_VCPUF_running, &v->vcpu_flags); } TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id); return SCHED_OP(init_vcpu, v); } void sched_destroy_vcpu(struct vcpu *v) { kill_timer(&v->timer); kill_timer(&v->poll_timer); SCHED_OP(destroy_vcpu, v); } int sched_init_domain(struct domain *d) { return SCHED_OP(init_domain, d); } void sched_destroy_domain(struct domain *d) { SCHED_OP(destroy_domain, d); } void vcpu_sleep_nosync(struct vcpu *v) { unsigned long flags; vcpu_schedule_lock_irqsave(v, flags); if ( likely(!vcpu_runnable(v)) ) { if ( v->runstate.state == RUNSTATE_runnable ) vcpu_runstate_change(v, RUNSTATE_offline, NOW()); SCHED_OP(sleep, v); } vcpu_schedule_unlock_irqrestore(v, flags); TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id); } v
# Use the default kernel version if the Makefile doesn't override it

ifeq ($(KERNEL),2.4)
  LINUX_VERSION?=2.4.37.1
else
  LINUX_VERSION?=2.6.21.7
endif
LINUX_RELEASE?=1

ifeq ($(LINUX_VERSION),2.4.37.1)
  LINUX_KERNEL_MD5SUM:=28bf1bcc94c78804ad67d2f06f950958
endif
ifeq ($(LINUX_VERSION),2.6.21.7)
  LINUX_KERNEL_MD5SUM:=bc15fad1487336d5dcb0945cd039d8ed
endif
ifeq ($(LINUX_VERSION),2.6.23.17)
  LINUX_KERNEL_MD5SUM:=a0300a393ac91ce9c64bf31522b45e2e
endif
ifeq ($(LINUX_VERSION),2.6.25.20)
  LINUX_KERNEL_MD5SUM:=0da698edccf03e2235abc2830a495114
endif
ifeq ($(LINUX_VERSION),2.6.27.24)
  LINUX_KERNEL_MD5SUM:=3e272117ceb50ad8ab3686ae00afa9d8
endif
ifeq ($(LINUX_VERSION),2.6.28.10)
  LINUX_KERNEL_MD5SUM:=c4efb2c494d749cb5de274f8ae41c3fa
endif
ifeq ($(LINUX_VERSION),2.6.29.2)
  LINUX_KERNEL_MD5SUM:=a6839571a9e70baf821d2fb752f9c4c6
endif
ifeq ($(LINUX_VERSION),2.6.30)
  LINUX_KERNEL_MD5SUM:=7a80058a6382e5108cdb5554d1609615
endif

# disable the md5sum check for unknown kernel versions
LINUX_KERNEL_MD5SUM?=x

split_version=$(subst ., ,$(1))
merge_version=$(subst $(space),.,$(1))
KERNEL_BASE=$(firstword $(subst -, ,$(LINUX_VERSION)))
KERNEL=$(call merge_version,$(wordlist 1,2,$(call split_version,$(KERNEL_BASE))))
KERNEL_PATCHVER=$(call merge_version,$(wordlist 1,3,$(call split_version,$(KERNEL_BASE))))
he schedule * lock of the CPU they are running on. This CPU could be the * same as ours. */ for_each_vcpu ( d, v ) { if ( v != current ) vcpu_pause(v); } if ( d == current->domain ) vcpu_schedule_lock_irq(current); SCHED_OP(adjust, d, op); TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id); if ( d == current->domain ) vcpu_schedule_unlock_irq(current); for_each_vcpu ( d, v ) { if ( v != current ) vcpu_unpause(v); } return 0; } /* * The main function * - deschedule the current domain (scheduler independent). * - pick a new domain (scheduler dependent). */ static void schedule(void) { struct vcpu *prev = current, *next = NULL; s_time_t now = NOW(); struct schedule_data *sd; struct task_slice next_slice; s32 r_time; /* time for new dom to run */ ASSERT(!in_irq()); ASSERT(this_cpu(mc_state).flags == 0); perfc_incrc(sched_run); sd = &this_cpu(schedule_data); spin_lock_irq(&sd->schedule_lock); stop_timer(&sd->s_timer); /* get policy-specific decision on scheduling... */ next_slice = ops.do_schedule(now); r_time = next_slice.time; next = next_slice.task; sd->curr = next; set_timer(&sd->s_timer, now + r_time); if ( unlikely(prev == next) ) { spin_unlock_irq(&sd->schedule_lock); return continue_running(prev); } TRACE_2D(TRC_SCHED_SWITCH_INFPREV, prev->domain->domain_id, now - prev->runstate.state_entry_time); TRACE_3D(TRC_SCHED_SWITCH_INFNEXT, next->domain->domain_id, (next->runstate.state == RUNSTATE_runnable) ? (now - next->runstate.state_entry_time) : 0, r_time); ASSERT(prev->runstate.state == RUNSTATE_running); vcpu_runstate_change( prev, (test_bit(_VCPUF_blocked, &prev->vcpu_flags) ? RUNSTATE_blocked : (vcpu_runnable(prev) ? RUNSTATE_runnable : RUNSTATE_offline)), now); ASSERT(next->runstate.state != RUNSTATE_running); vcpu_runstate_change(next, RUNSTATE_running, now); ASSERT(!test_bit(_VCPUF_running, &next->vcpu_flags)); set_bit(_VCPUF_running, &next->vcpu_flags); spin_unlock_irq(&sd->schedule_lock); perfc_incrc(sched_ctx); prev->sleep_tick = sd->tick; /* Ensure that the domain has an up-to-date time base. */ if ( !is_idle_vcpu(next) ) { update_vcpu_system_time(next); if ( next->sleep_tick != sd->tick ) send_timer_event(next); } TRACE_4D(TRC_SCHED_SWITCH, prev->domain->domain_id, prev->vcpu_id, next->domain->domain_id, next->vcpu_id); context_switch(prev, next); } void context_saved(struct vcpu *prev) { clear_bit(_VCPUF_running, &prev->vcpu_flags); if ( unlikely(test_bit(_VCPUF_migrating, &prev->vcpu_flags)) ) vcpu_migrate(prev); } /**************************************************************************** * Timers: the scheduler utilises a number of timers * - s_timer: per CPU timer for preemption and scheduling decisions * - t_timer: per CPU periodic timer to send timer interrupt to current dom * - dom_timer: per domain timer to specifiy timeout values ****************************************************************************/ /* The scheduler timer: force a run through the scheduler */ static void s_timer_fn(void *unused) { raise_softirq(SCHEDULE_SOFTIRQ); perfc_incrc(sched_irq); } /* Periodic tick timer: send timer event to current domain */ static void t_timer_fn(void *unused) { struct vcpu *v = current; this_cpu(schedule_data).tick++; if ( !is_idle_vcpu(v) ) { update_vcpu_system_time(v); send_timer_event(v); } page_scrub_schedule_work(); SCHED_OP(tick, smp_processor_id()); set_timer(&this_cpu(t_timer), NOW() + MILLISECS(10)); } /* Per-VCPU timer function: sends a virtual timer interrupt. */ static void vcpu_timer_fn(void *data) { struct vcpu *v = data; send_timer_event(v); } /* SCHEDOP_poll timeout callback. */ static void poll_timer_fn(void *data) { struct vcpu *v = data; if ( test_and_clear_bit(_VCPUF_polling, &v->vcpu_flags) ) vcpu_unblock(v); } /* Initialise the data structures. */ void __init scheduler_init(void) { int i; open_softirq(SCHEDULE_SOFTIRQ, schedule); for_each_cpu ( i ) { spin_lock_init(&per_cpu(schedule_data, i).schedule_lock); init_timer(&per_cpu(schedule_data, i).s_timer, s_timer_fn, NULL, i); init_timer(&per_cpu(t_timer, i), t_timer_fn, NULL, i); } for ( i = 0; schedulers[i] != NULL; i++ ) { ops = *schedulers[i]; if ( strcmp(ops.opt_name, opt_sched) == 0 ) break; } if ( schedulers[i] == NULL ) printk("Could not find scheduler: %s\n", opt_sched); printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name); SCHED_OP(init); } /* * Start a scheduler for each CPU * This has to be done *after* the timers, e.g., APICs, have been initialised */ void schedulers_start(void) { t_timer_fn(0); smp_call_function((void *)t_timer_fn, NULL, 1, 1); } void dump_runq(unsigned char key) { s_time_t now = NOW(); int i; unsigned long flags; local_irq_save(flags); printk("Scheduler: %s (%s)\n", ops.name, ops.opt_name); SCHED_OP(dump_settings); printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now); for_each_online_cpu ( i ) { spin_lock(&per_cpu(schedule_data, i).schedule_lock); printk("CPU[%02d] ", i); SCHED_OP(dump_cpu_state, i); spin_unlock(&per_cpu(schedule_data, i).schedule_lock); } local_irq_restore(flags); } #ifdef CONFIG_COMPAT #include "compat/schedule.c" #endif #endif /* !COMPAT */ /* * Local variables: * mode: C * c-set-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */