diff options
author | Keir Fraser <keir.fraser@citrix.com> | 2010-04-21 12:48:03 +0100 |
---|---|---|
committer | Keir Fraser <keir.fraser@citrix.com> | 2010-04-21 12:48:03 +0100 |
commit | 78be3dbbfefa43c4ff4a19ae84342b907e0cef4a (patch) | |
tree | 09f193bbd11b37f0a9721e347ab27eeb000dbb13 /xen/common/cpupool.c | |
parent | b066d6894d38a2be3f4eb67e974cfdf1275e9da2 (diff) | |
download | xen-78be3dbbfefa43c4ff4a19ae84342b907e0cef4a.tar.gz xen-78be3dbbfefa43c4ff4a19ae84342b907e0cef4a.tar.bz2 xen-78be3dbbfefa43c4ff4a19ae84342b907e0cef4a.zip |
cpupools [1/6]: hypervisor changes
Signed-off-by: Juergen Gross <juergen.gross@ts.fujitsu.com>
Diffstat (limited to 'xen/common/cpupool.c')
-rw-r--r-- | xen/common/cpupool.c | 604 |
1 files changed, 604 insertions, 0 deletions
diff --git a/xen/common/cpupool.c b/xen/common/cpupool.c new file mode 100644 index 0000000000..7e32296061 --- /dev/null +++ b/xen/common/cpupool.c @@ -0,0 +1,604 @@ +/****************************************************************************** + * cpupool.c + * + * Generic cpupool-handling functions. + * + * Cpupools are a feature to have configurable scheduling domains. Each + * cpupool runs an own scheduler on a dedicated set of physical cpus. + * A domain is bound to one cpupool at any time, but it can be moved to + * another cpupool. + * + * (C) 2009, Juergen Gross, Fujitsu Technology Solutions + */ + +#include <xen/lib.h> +#include <xen/init.h> +#include <xen/cpumask.h> +#include <xen/percpu.h> +#include <xen/sched.h> +#include <xen/sched-if.h> + +#define for_each_cpupool(ptr) \ + for ((ptr) = &cpupool_list; *(ptr) != NULL; (ptr) = &((*(ptr))->next)) + +struct cpupool *cpupool0; /* Initial cpupool with Dom0 */ +cpumask_t cpupool_free_cpus; /* cpus not in any cpupool */ + +static struct cpupool *cpupool_list; /* linked list, sorted by poolid */ + +static int cpupool0_max_cpus; +integer_param("pool0_max_cpus", cpupool0_max_cpus); + +static int cpupool_moving_cpu = -1; +static struct cpupool *cpupool_cpu_moving = NULL; +static cpumask_t cpupool_locked_cpus = CPU_MASK_NONE; + +/* cpupool lock: be carefull, this lock is sometimes released on another cpu + * as it was obtained! + */ +static DEFINE_SPINLOCK(cpupool_lock); + +DEFINE_PER_CPU(struct cpupool *, cpupool); + +static struct cpupool *alloc_cpupool_struct(void) +{ + return xmalloc(struct cpupool); +} + +static void free_cpupool_struct(struct cpupool *c) +{ + xfree(c); +} + +/* + * find a cpupool by it's id. to be called with cpupool lock held + * if exact is not specified, the first cpupool with an id larger or equal to + * the searched id is returned + * returns NULL if not found. + */ +static struct cpupool *cpupool_find_by_id(int id, int exact) +{ + struct cpupool **q; + + for_each_cpupool(q) + { + if ( (*q)->cpupool_id == id ) + return *q; + if ( (*q)->cpupool_id > id ) + break; + } + return exact ? NULL : *q; +} + +/* + * create a new cpupool with specified poolid and scheduler + * returns pointer to new cpupool structure if okay, NULL else + * possible failures: + * - no memory + * - poolid already used + * - unknown scheduler + */ +struct cpupool *cpupool_create(int poolid, char *sched) +{ + struct cpupool *c; + struct cpupool **q; + int last = 0; + + if ( (c = alloc_cpupool_struct()) == NULL ) + return NULL; + memset(c, 0, sizeof(*c)); + + printk(XENLOG_DEBUG "cpupool_create(pool=%d,sched=%s)\n", poolid, sched); + spin_lock(&cpupool_lock); + for_each_cpupool(q) + { + last = (*q)->cpupool_id; + if ( (poolid != CPUPOOLID_NONE) && (last >= poolid) ) + break; + } + if ( *q != NULL ) + { + if ( (*q)->cpupool_id == poolid ) + { + spin_unlock(&cpupool_lock); + free_cpupool_struct(c); + return NULL; + } + c->next = *q; + } + *q = c; + c->cpupool_id = (poolid == CPUPOOLID_NONE) ? (last + 1) : poolid; + if ( schedule_init_global(sched, &(c->sched)) ) + { + spin_unlock(&cpupool_lock); + cpupool_destroy(c); + return NULL; + } + spin_unlock(&cpupool_lock); + + printk("Created cpupool %d with scheduler %s (%s)\n", c->cpupool_id, + c->sched.name, c->sched.opt_name); + + return c; +} +/* + * destroys the given cpupool + * returns 0 on success, 1 else + * possible failures: + * - pool still in use + * - cpus still assigned to pool + * - pool not in list + */ +int cpupool_destroy(struct cpupool *c) +{ + struct cpupool **q; + + spin_lock(&cpupool_lock); + for_each_cpupool(q) + if ( *q == c ) + break; + if ( (*q != c) || (c->n_dom != 0) || cpus_weight(c->cpu_valid) ) + { + spin_unlock(&cpupool_lock); + return 1; + } + *q = c->next; + spin_unlock(&cpupool_lock); + printk(XENLOG_DEBUG "cpupool_destroy(pool=%d)\n", c->cpupool_id); + schedule_deinit_global(&(c->sched)); + free_cpupool_struct(c); + return 0; +} + +/* + * assign a specific cpu to a cpupool + * cpupool_lock must be held + */ +static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu) +{ + if ( (cpupool_moving_cpu == cpu) && (c != cpupool_cpu_moving) ) + return -EBUSY; + per_cpu(cpupool, cpu) = c; + schedule_cpu_switch(cpu, c); + cpu_clear(cpu, cpupool_free_cpus); + if (cpupool_moving_cpu == cpu) + { + cpupool_moving_cpu = -1; + cpupool_cpu_moving = NULL; + } + cpu_set(cpu, c->cpu_valid); + return 0; +} + +/* + * assign free physical cpus to a cpupool + * cpus assigned are unused cpus with lowest possible ids + * returns the number of cpus assigned + */ +int cpupool_assign_ncpu(struct cpupool *c, int ncpu) +{ + int i; + int n; + + n = 0; + spin_lock(&cpupool_lock); + for_each_cpu_mask(i, cpupool_free_cpus) + { + if ( cpupool_assign_cpu_locked(c, i) == 0 ) + n++; + if ( n == ncpu ) + break; + } + spin_unlock(&cpupool_lock); + printk(XENLOG_DEBUG "cpupool_assign_ncpu(pool=%d,ncpu=%d) rc %d\n", + c->cpupool_id, ncpu, n); + return n; +} + +static long cpupool_unassign_cpu_helper(void *info) +{ + struct cpupool *c = (struct cpupool *)info; + int cpu = cpupool_moving_cpu; + long ret; + int cpupool_id = c->cpupool_id; + + ret = cpu_disable_scheduler(cpu); + cpu_set(cpu, cpupool_free_cpus); + if ( !ret ) + { + schedule_cpu_switch(cpu, NULL); + per_cpu(cpupool, cpu) = NULL; + cpupool_moving_cpu = -1; + cpupool_cpu_moving = NULL; + } + spin_unlock(&cpupool_lock); + printk(XENLOG_DEBUG "cpupool_unassign_cpu(pool=%d,cpu=%d) ret %ld\n", + cpupool_id, cpu, ret); + return ret; +} + +/* + * unassign a specific cpu from a cpupool + * we must be sure not to run on the cpu to be unassigned! to achieve this + * the main functionality is performed via continue_hypercall_on_cpu on a + * specific cpu. + * if the cpu to be removed is the last one of the cpupool no active domain + * must be bound to the cpupool. dying domains are moved to cpupool0 as they + * might be zombies. + * possible failures: + * - last cpu and still active domains in cpupool + * - cpu just being unplugged + */ +int cpupool_unassign_cpu(struct cpupool *c, unsigned int cpu) +{ + int work_cpu; + int ret; + struct domain *d; + int cpupool_id = c->cpupool_id; + + printk(XENLOG_DEBUG "cpupool_unassign_cpu(pool=%d,cpu=%d)\n", + cpupool_id, cpu); + spin_lock(&cpupool_lock); + ret = -EBUSY; + if ( (cpupool_moving_cpu != -1) && (cpu != cpupool_moving_cpu) ) + goto out; + if ( cpu_isset(cpu, cpupool_locked_cpus) ) + goto out; + + ret = 0; + if ( !cpu_isset(cpu, c->cpu_valid) && (cpu != cpupool_moving_cpu) ) + goto out; + + if ( (c->n_dom > 0) && (cpus_weight(c->cpu_valid) == 1) && + (cpu != cpupool_moving_cpu) ) + { + for_each_domain(d) + { + if ( d->cpupool != c ) + continue; + if ( !d->is_dying ) + { + ret = -EBUSY; + break; + } + c->n_dom--; + ret = sched_move_domain(d, cpupool0); + if ( ret ) + { + c->n_dom++; + break; + } + cpupool0->n_dom++; + } + if ( ret ) + goto out; + } + cpupool_moving_cpu = cpu; + cpupool_cpu_moving = c; + cpu_clear(cpu, c->cpu_valid); + work_cpu = smp_processor_id(); + if ( work_cpu == cpu ) + { + work_cpu = first_cpu(cpupool0->cpu_valid); + if ( work_cpu == cpu ) + work_cpu = next_cpu(cpu, cpupool0->cpu_valid); + } + return continue_hypercall_on_cpu(work_cpu, cpupool_unassign_cpu_helper, c); + +out: + spin_unlock(&cpupool_lock); + printk(XENLOG_DEBUG "cpupool_unassign_cpu(pool=%d,cpu=%d) ret %d\n", + cpupool_id, cpu, ret); + return ret; +} + +/* + * assign cpus to the default cpupool + * default are all cpus, less cpus may be specified as boot parameter + * possible failures: + * - no cpu assigned + */ +int __init cpupool0_cpu_assign(struct cpupool *c) +{ + if ( (cpupool0_max_cpus == 0) || (cpupool0_max_cpus > num_online_cpus()) ) + cpupool0_max_cpus = num_online_cpus(); + if ( !cpupool_assign_ncpu(cpupool0, cpupool0_max_cpus) ) + return 1; + return 0; +} + +/* + * add a new domain to a cpupool + * possible failures: + * - pool does not exist + * - no cpu assigned to pool + */ +int cpupool_add_domain(struct domain *d, int poolid) +{ + struct cpupool *c; + int rc = 1; + int n_dom; + + if ( poolid == CPUPOOLID_NONE ) + return 0; + spin_lock(&cpupool_lock); + c = cpupool_find_by_id(poolid, 1); + if ( (c != NULL) && cpus_weight(c->cpu_valid) ) + { + c->n_dom++; + n_dom = c->n_dom; + d->cpupool = c; + rc = 0; + } + spin_unlock(&cpupool_lock); + if (!rc) + printk(XENLOG_DEBUG "cpupool_add_domain(dom=%d,pool=%d) n_dom %d\n", + d->domain_id, poolid, n_dom); + return rc; +} + +/* + * remove a domain from a cpupool + */ +void cpupool_rm_domain(struct domain *d) +{ + int cpupool_id; + int n_dom; + + if ( d->cpupool == NULL ) + return; + spin_lock(&cpupool_lock); + cpupool_id = d->cpupool->cpupool_id; + d->cpupool->n_dom--; + n_dom = d->cpupool->n_dom; + d->cpupool = NULL; + spin_unlock(&cpupool_lock); + printk(XENLOG_DEBUG "cpupool_rm_domain(dom=%d,pool=%d) n_dom %d\n", + d->domain_id, cpupool_id, n_dom); + return; +} + +/* + * called to add a new cpu to pool admin + * we add a hotplugged cpu to the cpupool0 to be able to add it to dom0 + */ +void cpupool_cpu_add(unsigned int cpu) +{ + if ( cpupool0 == NULL ) + return; + spin_lock(&cpupool_lock); + cpu_clear(cpu, cpupool_locked_cpus); + cpu_set(cpu, cpupool_free_cpus); + cpupool_assign_cpu_locked(cpupool0, cpu); + spin_unlock(&cpupool_lock); + return; +} + +/* + * called to remove a cpu from pool admin + * the cpu to be removed is locked to avoid removing it from dom0 + * returns failure if not in pool0 + */ +int cpupool_cpu_remove(unsigned int cpu) +{ + int ret = 0; + + spin_lock(&cpupool_lock); + if ( !cpu_isset(cpu, cpupool0->cpu_valid)) + ret = -EBUSY; + else + cpu_set(cpu, cpupool_locked_cpus); + spin_unlock(&cpupool_lock); + + return ret; +} + +/* + * do cpupool related domctl operations + */ +int cpupool_do_domctl(struct xen_domctl_cpupool_op *op) +{ + int ret; + struct cpupool *c; + + switch ( op->op ) + { + + case XEN_DOMCTL_CPUPOOL_OP_CREATE: + { + int poolid; + const struct scheduler *sched; + + poolid = (op->cpupool_id == XEN_DOMCTL_CPUPOOL_PAR_ANY) ? + CPUPOOLID_NONE: op->cpupool_id; + sched = scheduler_get_by_id(op->sched_id); + ret = -ENOENT; + if ( sched == NULL ) + break; + ret = 0; + c = cpupool_create(poolid, sched->opt_name); + if ( c == NULL ) + ret = -EINVAL; + else + op->cpupool_id = c->cpupool_id; + } + break; + + case XEN_DOMCTL_CPUPOOL_OP_DESTROY: + { + spin_lock(&cpupool_lock); + c = cpupool_find_by_id(op->cpupool_id, 1); + spin_unlock(&cpupool_lock); + ret = -ENOENT; + if ( c == NULL ) + break; + ret = (cpupool_destroy(c) != 0) ? -EBUSY : 0; + } + break; + + case XEN_DOMCTL_CPUPOOL_OP_INFO: + { + spin_lock(&cpupool_lock); + c = cpupool_find_by_id(op->cpupool_id, 0); + spin_unlock(&cpupool_lock); + ret = -ENOENT; + if ( c == NULL ) + break; + op->cpupool_id = c->cpupool_id; + op->sched_id = c->sched.sched_id; + op->n_dom = c->n_dom; + cpumask_to_xenctl_cpumap(&(op->cpumap), &(c->cpu_valid)); + ret = 0; + } + break; + + case XEN_DOMCTL_CPUPOOL_OP_ADDCPU: + { + unsigned cpu; + + cpu = op->cpu; + printk(XENLOG_DEBUG "cpupool_assign_cpu(pool=%d,cpu=%d)\n", + op->cpupool_id, cpu); + spin_lock(&cpupool_lock); + if ( cpu == XEN_DOMCTL_CPUPOOL_PAR_ANY ) + cpu = first_cpu(cpupool_free_cpus); + ret = -EINVAL; + if ( cpu >= NR_CPUS ) + goto addcpu_out; + ret = -EBUSY; + if ( !cpu_isset(cpu, cpupool_free_cpus) ) + goto addcpu_out; + c = cpupool_find_by_id(op->cpupool_id, 0); + ret = -ENOENT; + if ( c == NULL ) + goto addcpu_out; + ret = cpupool_assign_cpu_locked(c, cpu); +addcpu_out: + spin_unlock(&cpupool_lock); + printk(XENLOG_DEBUG "cpupool_assign_cpu(pool=%d,cpu=%d) ret %d\n", + op->cpupool_id, cpu, ret); + } + break; + + case XEN_DOMCTL_CPUPOOL_OP_RMCPU: + { + unsigned cpu; + + spin_lock(&cpupool_lock); + c = cpupool_find_by_id(op->cpupool_id, 0); + spin_unlock(&cpupool_lock); + ret = -ENOENT; + if ( c == NULL ) + break; + cpu = op->cpu; + if ( cpu == XEN_DOMCTL_CPUPOOL_PAR_ANY ) + cpu = last_cpu(c->cpu_valid); + ret = -EINVAL; + if ( cpu >= NR_CPUS ) + break; + /* caution: cpupool_unassign_cpu uses continue_hypercall_on_cpu and + * will continue after the local return + */ + ret = cpupool_unassign_cpu(c, cpu); + } + break; + + case XEN_DOMCTL_CPUPOOL_OP_MOVEDOMAIN: + { + struct domain *d; + + ret = -EINVAL; + if ( op->domid == 0 ) + break; + ret = -ESRCH; + d = rcu_lock_domain_by_id(op->domid); + if ( d == NULL ) + break; + if ( d->cpupool == NULL ) + { + ret = -EINVAL; + rcu_unlock_domain(d); + break; + } + printk(XENLOG_DEBUG "cpupool move_domain(dom=%d)->pool=%d\n", + d->domain_id, op->cpupool_id); + ret = -ENOENT; + spin_lock(&cpupool_lock); + c = cpupool_find_by_id(op->cpupool_id, 1); + if ( (c != NULL) && cpus_weight(c->cpu_valid) ) + { + d->cpupool->n_dom--; + ret = sched_move_domain(d, c); + if ( ret ) + d->cpupool->n_dom++; + else + c->n_dom++; + } + spin_unlock(&cpupool_lock); + printk(XENLOG_DEBUG "cpupool move_domain(dom=%d)->pool=%d ret %d\n", + d->domain_id, op->cpupool_id, ret); + rcu_unlock_domain(d); + } + break; + + case XEN_DOMCTL_CPUPOOL_OP_FREEINFO: + { + cpumask_to_xenctl_cpumap(&(op->cpumap), + &cpupool_free_cpus); + ret = 0; + } + break; + + default: + ret = -ENOSYS; + + } + + return ret; +} + +void schedule_dump(struct cpupool *c); + +void dump_runq(unsigned char key) +{ + unsigned long flags; + s_time_t now = NOW(); + struct cpupool **c; + + spin_lock(&cpupool_lock); + local_irq_save(flags); + + printk("sched_smt_power_savings: %s\n", + sched_smt_power_savings? "enabled":"disabled"); + printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now); + + printk("Idle cpupool:\n"); + schedule_dump(NULL); + + for_each_cpupool(c) + { + printk("Cpupool %d:\n", (*c)->cpupool_id); + schedule_dump(*c); + } + + local_irq_restore(flags); + spin_unlock(&cpupool_lock); +} + +static int __init cpupool_init(void) +{ + cpupool_free_cpus = cpu_online_map; + cpupool_list = NULL; + return 0; +} +__initcall(cpupool_init); + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ |