aboutsummaryrefslogtreecommitdiffstats
path: root/xen/common/cpupool.c
diff options
context:
space:
mode:
authorKeir Fraser <keir.fraser@citrix.com>2010-04-21 12:48:03 +0100
committerKeir Fraser <keir.fraser@citrix.com>2010-04-21 12:48:03 +0100
commit78be3dbbfefa43c4ff4a19ae84342b907e0cef4a (patch)
tree09f193bbd11b37f0a9721e347ab27eeb000dbb13 /xen/common/cpupool.c
parentb066d6894d38a2be3f4eb67e974cfdf1275e9da2 (diff)
downloadxen-78be3dbbfefa43c4ff4a19ae84342b907e0cef4a.tar.gz
xen-78be3dbbfefa43c4ff4a19ae84342b907e0cef4a.tar.bz2
xen-78be3dbbfefa43c4ff4a19ae84342b907e0cef4a.zip
cpupools [1/6]: hypervisor changes
Signed-off-by: Juergen Gross <juergen.gross@ts.fujitsu.com>
Diffstat (limited to 'xen/common/cpupool.c')
-rw-r--r--xen/common/cpupool.c604
1 files changed, 604 insertions, 0 deletions
diff --git a/xen/common/cpupool.c b/xen/common/cpupool.c
new file mode 100644
index 0000000000..7e32296061
--- /dev/null
+++ b/xen/common/cpupool.c
@@ -0,0 +1,604 @@
+/******************************************************************************
+ * cpupool.c
+ *
+ * Generic cpupool-handling functions.
+ *
+ * Cpupools are a feature to have configurable scheduling domains. Each
+ * cpupool runs an own scheduler on a dedicated set of physical cpus.
+ * A domain is bound to one cpupool at any time, but it can be moved to
+ * another cpupool.
+ *
+ * (C) 2009, Juergen Gross, Fujitsu Technology Solutions
+ */
+
+#include <xen/lib.h>
+#include <xen/init.h>
+#include <xen/cpumask.h>
+#include <xen/percpu.h>
+#include <xen/sched.h>
+#include <xen/sched-if.h>
+
+#define for_each_cpupool(ptr) \
+ for ((ptr) = &cpupool_list; *(ptr) != NULL; (ptr) = &((*(ptr))->next))
+
+struct cpupool *cpupool0; /* Initial cpupool with Dom0 */
+cpumask_t cpupool_free_cpus; /* cpus not in any cpupool */
+
+static struct cpupool *cpupool_list; /* linked list, sorted by poolid */
+
+static int cpupool0_max_cpus;
+integer_param("pool0_max_cpus", cpupool0_max_cpus);
+
+static int cpupool_moving_cpu = -1;
+static struct cpupool *cpupool_cpu_moving = NULL;
+static cpumask_t cpupool_locked_cpus = CPU_MASK_NONE;
+
+/* cpupool lock: be carefull, this lock is sometimes released on another cpu
+ * as it was obtained!
+ */
+static DEFINE_SPINLOCK(cpupool_lock);
+
+DEFINE_PER_CPU(struct cpupool *, cpupool);
+
+static struct cpupool *alloc_cpupool_struct(void)
+{
+ return xmalloc(struct cpupool);
+}
+
+static void free_cpupool_struct(struct cpupool *c)
+{
+ xfree(c);
+}
+
+/*
+ * find a cpupool by it's id. to be called with cpupool lock held
+ * if exact is not specified, the first cpupool with an id larger or equal to
+ * the searched id is returned
+ * returns NULL if not found.
+ */
+static struct cpupool *cpupool_find_by_id(int id, int exact)
+{
+ struct cpupool **q;
+
+ for_each_cpupool(q)
+ {
+ if ( (*q)->cpupool_id == id )
+ return *q;
+ if ( (*q)->cpupool_id > id )
+ break;
+ }
+ return exact ? NULL : *q;
+}
+
+/*
+ * create a new cpupool with specified poolid and scheduler
+ * returns pointer to new cpupool structure if okay, NULL else
+ * possible failures:
+ * - no memory
+ * - poolid already used
+ * - unknown scheduler
+ */
+struct cpupool *cpupool_create(int poolid, char *sched)
+{
+ struct cpupool *c;
+ struct cpupool **q;
+ int last = 0;
+
+ if ( (c = alloc_cpupool_struct()) == NULL )
+ return NULL;
+ memset(c, 0, sizeof(*c));
+
+ printk(XENLOG_DEBUG "cpupool_create(pool=%d,sched=%s)\n", poolid, sched);
+ spin_lock(&cpupool_lock);
+ for_each_cpupool(q)
+ {
+ last = (*q)->cpupool_id;
+ if ( (poolid != CPUPOOLID_NONE) && (last >= poolid) )
+ break;
+ }
+ if ( *q != NULL )
+ {
+ if ( (*q)->cpupool_id == poolid )
+ {
+ spin_unlock(&cpupool_lock);
+ free_cpupool_struct(c);
+ return NULL;
+ }
+ c->next = *q;
+ }
+ *q = c;
+ c->cpupool_id = (poolid == CPUPOOLID_NONE) ? (last + 1) : poolid;
+ if ( schedule_init_global(sched, &(c->sched)) )
+ {
+ spin_unlock(&cpupool_lock);
+ cpupool_destroy(c);
+ return NULL;
+ }
+ spin_unlock(&cpupool_lock);
+
+ printk("Created cpupool %d with scheduler %s (%s)\n", c->cpupool_id,
+ c->sched.name, c->sched.opt_name);
+
+ return c;
+}
+/*
+ * destroys the given cpupool
+ * returns 0 on success, 1 else
+ * possible failures:
+ * - pool still in use
+ * - cpus still assigned to pool
+ * - pool not in list
+ */
+int cpupool_destroy(struct cpupool *c)
+{
+ struct cpupool **q;
+
+ spin_lock(&cpupool_lock);
+ for_each_cpupool(q)
+ if ( *q == c )
+ break;
+ if ( (*q != c) || (c->n_dom != 0) || cpus_weight(c->cpu_valid) )
+ {
+ spin_unlock(&cpupool_lock);
+ return 1;
+ }
+ *q = c->next;
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_destroy(pool=%d)\n", c->cpupool_id);
+ schedule_deinit_global(&(c->sched));
+ free_cpupool_struct(c);
+ return 0;
+}
+
+/*
+ * assign a specific cpu to a cpupool
+ * cpupool_lock must be held
+ */
+static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
+{
+ if ( (cpupool_moving_cpu == cpu) && (c != cpupool_cpu_moving) )
+ return -EBUSY;
+ per_cpu(cpupool, cpu) = c;
+ schedule_cpu_switch(cpu, c);
+ cpu_clear(cpu, cpupool_free_cpus);
+ if (cpupool_moving_cpu == cpu)
+ {
+ cpupool_moving_cpu = -1;
+ cpupool_cpu_moving = NULL;
+ }
+ cpu_set(cpu, c->cpu_valid);
+ return 0;
+}
+
+/*
+ * assign free physical cpus to a cpupool
+ * cpus assigned are unused cpus with lowest possible ids
+ * returns the number of cpus assigned
+ */
+int cpupool_assign_ncpu(struct cpupool *c, int ncpu)
+{
+ int i;
+ int n;
+
+ n = 0;
+ spin_lock(&cpupool_lock);
+ for_each_cpu_mask(i, cpupool_free_cpus)
+ {
+ if ( cpupool_assign_cpu_locked(c, i) == 0 )
+ n++;
+ if ( n == ncpu )
+ break;
+ }
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_assign_ncpu(pool=%d,ncpu=%d) rc %d\n",
+ c->cpupool_id, ncpu, n);
+ return n;
+}
+
+static long cpupool_unassign_cpu_helper(void *info)
+{
+ struct cpupool *c = (struct cpupool *)info;
+ int cpu = cpupool_moving_cpu;
+ long ret;
+ int cpupool_id = c->cpupool_id;
+
+ ret = cpu_disable_scheduler(cpu);
+ cpu_set(cpu, cpupool_free_cpus);
+ if ( !ret )
+ {
+ schedule_cpu_switch(cpu, NULL);
+ per_cpu(cpupool, cpu) = NULL;
+ cpupool_moving_cpu = -1;
+ cpupool_cpu_moving = NULL;
+ }
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_unassign_cpu(pool=%d,cpu=%d) ret %ld\n",
+ cpupool_id, cpu, ret);
+ return ret;
+}
+
+/*
+ * unassign a specific cpu from a cpupool
+ * we must be sure not to run on the cpu to be unassigned! to achieve this
+ * the main functionality is performed via continue_hypercall_on_cpu on a
+ * specific cpu.
+ * if the cpu to be removed is the last one of the cpupool no active domain
+ * must be bound to the cpupool. dying domains are moved to cpupool0 as they
+ * might be zombies.
+ * possible failures:
+ * - last cpu and still active domains in cpupool
+ * - cpu just being unplugged
+ */
+int cpupool_unassign_cpu(struct cpupool *c, unsigned int cpu)
+{
+ int work_cpu;
+ int ret;
+ struct domain *d;
+ int cpupool_id = c->cpupool_id;
+
+ printk(XENLOG_DEBUG "cpupool_unassign_cpu(pool=%d,cpu=%d)\n",
+ cpupool_id, cpu);
+ spin_lock(&cpupool_lock);
+ ret = -EBUSY;
+ if ( (cpupool_moving_cpu != -1) && (cpu != cpupool_moving_cpu) )
+ goto out;
+ if ( cpu_isset(cpu, cpupool_locked_cpus) )
+ goto out;
+
+ ret = 0;
+ if ( !cpu_isset(cpu, c->cpu_valid) && (cpu != cpupool_moving_cpu) )
+ goto out;
+
+ if ( (c->n_dom > 0) && (cpus_weight(c->cpu_valid) == 1) &&
+ (cpu != cpupool_moving_cpu) )
+ {
+ for_each_domain(d)
+ {
+ if ( d->cpupool != c )
+ continue;
+ if ( !d->is_dying )
+ {
+ ret = -EBUSY;
+ break;
+ }
+ c->n_dom--;
+ ret = sched_move_domain(d, cpupool0);
+ if ( ret )
+ {
+ c->n_dom++;
+ break;
+ }
+ cpupool0->n_dom++;
+ }
+ if ( ret )
+ goto out;
+ }
+ cpupool_moving_cpu = cpu;
+ cpupool_cpu_moving = c;
+ cpu_clear(cpu, c->cpu_valid);
+ work_cpu = smp_processor_id();
+ if ( work_cpu == cpu )
+ {
+ work_cpu = first_cpu(cpupool0->cpu_valid);
+ if ( work_cpu == cpu )
+ work_cpu = next_cpu(cpu, cpupool0->cpu_valid);
+ }
+ return continue_hypercall_on_cpu(work_cpu, cpupool_unassign_cpu_helper, c);
+
+out:
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_unassign_cpu(pool=%d,cpu=%d) ret %d\n",
+ cpupool_id, cpu, ret);
+ return ret;
+}
+
+/*
+ * assign cpus to the default cpupool
+ * default are all cpus, less cpus may be specified as boot parameter
+ * possible failures:
+ * - no cpu assigned
+ */
+int __init cpupool0_cpu_assign(struct cpupool *c)
+{
+ if ( (cpupool0_max_cpus == 0) || (cpupool0_max_cpus > num_online_cpus()) )
+ cpupool0_max_cpus = num_online_cpus();
+ if ( !cpupool_assign_ncpu(cpupool0, cpupool0_max_cpus) )
+ return 1;
+ return 0;
+}
+
+/*
+ * add a new domain to a cpupool
+ * possible failures:
+ * - pool does not exist
+ * - no cpu assigned to pool
+ */
+int cpupool_add_domain(struct domain *d, int poolid)
+{
+ struct cpupool *c;
+ int rc = 1;
+ int n_dom;
+
+ if ( poolid == CPUPOOLID_NONE )
+ return 0;
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(poolid, 1);
+ if ( (c != NULL) && cpus_weight(c->cpu_valid) )
+ {
+ c->n_dom++;
+ n_dom = c->n_dom;
+ d->cpupool = c;
+ rc = 0;
+ }
+ spin_unlock(&cpupool_lock);
+ if (!rc)
+ printk(XENLOG_DEBUG "cpupool_add_domain(dom=%d,pool=%d) n_dom %d\n",
+ d->domain_id, poolid, n_dom);
+ return rc;
+}
+
+/*
+ * remove a domain from a cpupool
+ */
+void cpupool_rm_domain(struct domain *d)
+{
+ int cpupool_id;
+ int n_dom;
+
+ if ( d->cpupool == NULL )
+ return;
+ spin_lock(&cpupool_lock);
+ cpupool_id = d->cpupool->cpupool_id;
+ d->cpupool->n_dom--;
+ n_dom = d->cpupool->n_dom;
+ d->cpupool = NULL;
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_rm_domain(dom=%d,pool=%d) n_dom %d\n",
+ d->domain_id, cpupool_id, n_dom);
+ return;
+}
+
+/*
+ * called to add a new cpu to pool admin
+ * we add a hotplugged cpu to the cpupool0 to be able to add it to dom0
+ */
+void cpupool_cpu_add(unsigned int cpu)
+{
+ if ( cpupool0 == NULL )
+ return;
+ spin_lock(&cpupool_lock);
+ cpu_clear(cpu, cpupool_locked_cpus);
+ cpu_set(cpu, cpupool_free_cpus);
+ cpupool_assign_cpu_locked(cpupool0, cpu);
+ spin_unlock(&cpupool_lock);
+ return;
+}
+
+/*
+ * called to remove a cpu from pool admin
+ * the cpu to be removed is locked to avoid removing it from dom0
+ * returns failure if not in pool0
+ */
+int cpupool_cpu_remove(unsigned int cpu)
+{
+ int ret = 0;
+
+ spin_lock(&cpupool_lock);
+ if ( !cpu_isset(cpu, cpupool0->cpu_valid))
+ ret = -EBUSY;
+ else
+ cpu_set(cpu, cpupool_locked_cpus);
+ spin_unlock(&cpupool_lock);
+
+ return ret;
+}
+
+/*
+ * do cpupool related domctl operations
+ */
+int cpupool_do_domctl(struct xen_domctl_cpupool_op *op)
+{
+ int ret;
+ struct cpupool *c;
+
+ switch ( op->op )
+ {
+
+ case XEN_DOMCTL_CPUPOOL_OP_CREATE:
+ {
+ int poolid;
+ const struct scheduler *sched;
+
+ poolid = (op->cpupool_id == XEN_DOMCTL_CPUPOOL_PAR_ANY) ?
+ CPUPOOLID_NONE: op->cpupool_id;
+ sched = scheduler_get_by_id(op->sched_id);
+ ret = -ENOENT;
+ if ( sched == NULL )
+ break;
+ ret = 0;
+ c = cpupool_create(poolid, sched->opt_name);
+ if ( c == NULL )
+ ret = -EINVAL;
+ else
+ op->cpupool_id = c->cpupool_id;
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_DESTROY:
+ {
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(op->cpupool_id, 1);
+ spin_unlock(&cpupool_lock);
+ ret = -ENOENT;
+ if ( c == NULL )
+ break;
+ ret = (cpupool_destroy(c) != 0) ? -EBUSY : 0;
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_INFO:
+ {
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(op->cpupool_id, 0);
+ spin_unlock(&cpupool_lock);
+ ret = -ENOENT;
+ if ( c == NULL )
+ break;
+ op->cpupool_id = c->cpupool_id;
+ op->sched_id = c->sched.sched_id;
+ op->n_dom = c->n_dom;
+ cpumask_to_xenctl_cpumap(&(op->cpumap), &(c->cpu_valid));
+ ret = 0;
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_ADDCPU:
+ {
+ unsigned cpu;
+
+ cpu = op->cpu;
+ printk(XENLOG_DEBUG "cpupool_assign_cpu(pool=%d,cpu=%d)\n",
+ op->cpupool_id, cpu);
+ spin_lock(&cpupool_lock);
+ if ( cpu == XEN_DOMCTL_CPUPOOL_PAR_ANY )
+ cpu = first_cpu(cpupool_free_cpus);
+ ret = -EINVAL;
+ if ( cpu >= NR_CPUS )
+ goto addcpu_out;
+ ret = -EBUSY;
+ if ( !cpu_isset(cpu, cpupool_free_cpus) )
+ goto addcpu_out;
+ c = cpupool_find_by_id(op->cpupool_id, 0);
+ ret = -ENOENT;
+ if ( c == NULL )
+ goto addcpu_out;
+ ret = cpupool_assign_cpu_locked(c, cpu);
+addcpu_out:
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_assign_cpu(pool=%d,cpu=%d) ret %d\n",
+ op->cpupool_id, cpu, ret);
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_RMCPU:
+ {
+ unsigned cpu;
+
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(op->cpupool_id, 0);
+ spin_unlock(&cpupool_lock);
+ ret = -ENOENT;
+ if ( c == NULL )
+ break;
+ cpu = op->cpu;
+ if ( cpu == XEN_DOMCTL_CPUPOOL_PAR_ANY )
+ cpu = last_cpu(c->cpu_valid);
+ ret = -EINVAL;
+ if ( cpu >= NR_CPUS )
+ break;
+ /* caution: cpupool_unassign_cpu uses continue_hypercall_on_cpu and
+ * will continue after the local return
+ */
+ ret = cpupool_unassign_cpu(c, cpu);
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_MOVEDOMAIN:
+ {
+ struct domain *d;
+
+ ret = -EINVAL;
+ if ( op->domid == 0 )
+ break;
+ ret = -ESRCH;
+ d = rcu_lock_domain_by_id(op->domid);
+ if ( d == NULL )
+ break;
+ if ( d->cpupool == NULL )
+ {
+ ret = -EINVAL;
+ rcu_unlock_domain(d);
+ break;
+ }
+ printk(XENLOG_DEBUG "cpupool move_domain(dom=%d)->pool=%d\n",
+ d->domain_id, op->cpupool_id);
+ ret = -ENOENT;
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(op->cpupool_id, 1);
+ if ( (c != NULL) && cpus_weight(c->cpu_valid) )
+ {
+ d->cpupool->n_dom--;
+ ret = sched_move_domain(d, c);
+ if ( ret )
+ d->cpupool->n_dom++;
+ else
+ c->n_dom++;
+ }
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool move_domain(dom=%d)->pool=%d ret %d\n",
+ d->domain_id, op->cpupool_id, ret);
+ rcu_unlock_domain(d);
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_FREEINFO:
+ {
+ cpumask_to_xenctl_cpumap(&(op->cpumap),
+ &cpupool_free_cpus);
+ ret = 0;
+ }
+ break;
+
+ default:
+ ret = -ENOSYS;
+
+ }
+
+ return ret;
+}
+
+void schedule_dump(struct cpupool *c);
+
+void dump_runq(unsigned char key)
+{
+ unsigned long flags;
+ s_time_t now = NOW();
+ struct cpupool **c;
+
+ spin_lock(&cpupool_lock);
+ local_irq_save(flags);
+
+ printk("sched_smt_power_savings: %s\n",
+ sched_smt_power_savings? "enabled":"disabled");
+ printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now);
+
+ printk("Idle cpupool:\n");
+ schedule_dump(NULL);
+
+ for_each_cpupool(c)
+ {
+ printk("Cpupool %d:\n", (*c)->cpupool_id);
+ schedule_dump(*c);
+ }
+
+ local_irq_restore(flags);
+ spin_unlock(&cpupool_lock);
+}
+
+static int __init cpupool_init(void)
+{
+ cpupool_free_cpus = cpu_online_map;
+ cpupool_list = NULL;
+ return 0;
+}
+__initcall(cpupool_init);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */