cpupools [1/6]: hypervisor changes

Signed-off-by: Juergen Gross <juergen.gross@ts.fujitsu.com>
author: Keir Fraser <keir.fraser@citrix.com> 2010-04-21 12:48:03 +0100
committer: Keir Fraser <keir.fraser@citrix.com> 2010-04-21 12:48:03 +0100
commit: 78be3dbbfefa43c4ff4a19ae84342b907e0cef4a (patch)
tree: 09f193bbd11b37f0a9721e347ab27eeb000dbb13 /xen/common/cpupool.c
parent: b066d6894d38a2be3f4eb67e974cfdf1275e9da2 (diff)
download: xen-78be3dbbfefa43c4ff4a19ae84342b907e0cef4a.tar.gz
xen-78be3dbbfefa43c4ff4a19ae84342b907e0cef4a.tar.bz2
xen-78be3dbbfefa43c4ff4a19ae84342b907e0cef4a.zip
1 files changed, 604 insertions, 0 deletions
diff --git a/xen/common/cpupool.c b/xen/common/cpupool.c
new file mode 100644
index 0000000000..7e32296061
--- /dev/null
+++ b/xen/common/cpupool.c
@@ -0,0 +1,604 @@
+/******************************************************************************
+ * cpupool.c
+ * 
+ * Generic cpupool-handling functions.
+ *
+ * Cpupools are a feature to have configurable scheduling domains. Each
+ * cpupool runs an own scheduler on a dedicated set of physical cpus.
+ * A domain is bound to one cpupool at any time, but it can be moved to
+ * another cpupool.
+ *
+ * (C) 2009, Juergen Gross, Fujitsu Technology Solutions
+ */
+
+#include <xen/lib.h>
+#include <xen/init.h>
+#include <xen/cpumask.h>
+#include <xen/percpu.h>
+#include <xen/sched.h>
+#include <xen/sched-if.h>
+
+#define for_each_cpupool(ptr)    \
+    for ((ptr) = &cpupool_list; *(ptr) != NULL; (ptr) = &((*(ptr))->next))
+
+struct cpupool *cpupool0;                /* Initial cpupool with Dom0 */
+cpumask_t cpupool_free_cpus;             /* cpus not in any cpupool */
+
+static struct cpupool *cpupool_list;     /* linked list, sorted by poolid */
+
+static int cpupool0_max_cpus;
+integer_param("pool0_max_cpus", cpupool0_max_cpus);
+
+static int cpupool_moving_cpu = -1;
+static struct cpupool *cpupool_cpu_moving = NULL;
+static cpumask_t cpupool_locked_cpus = CPU_MASK_NONE;
+
+/* cpupool lock: be carefull, this lock is sometimes released on another cpu
+ *               as it was obtained!
+ */
+static DEFINE_SPINLOCK(cpupool_lock);
+
+DEFINE_PER_CPU(struct cpupool *, cpupool);
+
+static struct cpupool *alloc_cpupool_struct(void)
+{
+    return xmalloc(struct cpupool);
+}
+
+static void free_cpupool_struct(struct cpupool *c)
+{
+    xfree(c);
+}
+
+/*
+ * find a cpupool by it's id. to be called with cpupool lock held
+ * if exact is not specified, the first cpupool with an id larger or equal to
+ * the searched id is returned
+ * returns NULL if not found.
+ */
+static struct cpupool *cpupool_find_by_id(int id, int exact)
+{
+    struct cpupool **q;
+
+    for_each_cpupool(q)
+    {
+        if ( (*q)->cpupool_id == id )
+            return *q;
+        if ( (*q)->cpupool_id > id )
+            break;
+    }
+    return exact ? NULL : *q;
+}
+
+/*
+ * create a new cpupool with specified poolid and scheduler
+ * returns pointer to new cpupool structure if okay, NULL else
+ * possible failures:
+ * - no memory
+ * - poolid already used
+ * - unknown scheduler
+ */
+struct cpupool *cpupool_create(int poolid, char *sched)
+{
+    struct cpupool *c;
+    struct cpupool **q;
+    int last = 0;
+
+    if ( (c = alloc_cpupool_struct()) == NULL )
+        return NULL;
+    memset(c, 0, sizeof(*c));
+
+    printk(XENLOG_DEBUG "cpupool_create(pool=%d,sched=%s)\n", poolid, sched);
+    spin_lock(&cpupool_lock);
+    for_each_cpupool(q)
+    {
+        last = (*q)->cpupool_id;
+        if ( (poolid != CPUPOOLID_NONE) && (last >= poolid) )
+            break;
+    }
+    if ( *q != NULL )
+    {
+        if ( (*q)->cpupool_id == poolid )
+        {
+            spin_unlock(&cpupool_lock);
+            free_cpupool_struct(c);
+            return NULL;
+        }
+        c->next = *q;
+    }
+    *q = c;
+    c->cpupool_id = (poolid == CPUPOOLID_NONE) ? (last + 1) : poolid;
+    if ( schedule_init_global(sched, &(c->sched)) )
+    {
+        spin_unlock(&cpupool_lock);
+        cpupool_destroy(c);
+        return NULL;
+    }
+    spin_unlock(&cpupool_lock);
+
+    printk("Created cpupool %d with scheduler %s (%s)\n", c->cpupool_id,
+        c->sched.name, c->sched.opt_name);
+
+    return c;
+}
+/*
+ * destroys the given cpupool
+ * returns 0 on success, 1 else
+ * possible failures:
+ * - pool still in use
+ * - cpus still assigned to pool
+ * - pool not in list
+ */
+int cpupool_destroy(struct cpupool *c)
+{
+    struct cpupool **q;
+
+    spin_lock(&cpupool_lock);
+    for_each_cpupool(q)
+        if ( *q == c )
+            break;
+    if ( (*q != c) || (c->n_dom != 0) || cpus_weight(c->cpu_valid) )
+    {
+        spin_unlock(&cpupool_lock);
+        return 1;
+    }
+    *q = c->next;
+    spin_unlock(&cpupool_lock);
+    printk(XENLOG_DEBUG "cpupool_destroy(pool=%d)\n", c->cpupool_id);
+    schedule_deinit_global(&(c->sched));
+    free_cpupool_struct(c);
+    return 0;
+}
+
+/*
+ * assign a specific cpu to a cpupool
+ * cpupool_lock must be held
+ */
+static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
+{
+    if ( (cpupool_moving_cpu == cpu) && (c != cpupool_cpu_moving) )
+        return -EBUSY;
+    per_cpu(cpupool, cpu) = c;
+    schedule_cpu_switch(cpu, c);
+    cpu_clear(cpu, cpupool_free_cpus);
+    if (cpupool_moving_cpu == cpu)
+    {
+        cpupool_moving_cpu = -1;
+        cpupool_cpu_moving = NULL;
+    }
+    cpu_set(cpu, c->cpu_valid);
+    return 0;
+}
+
+/*
+ * assign free physical cpus to a cpupool
+ * cpus assigned are unused cpus with lowest possible ids
+ * returns the number of cpus assigned
+ */
+int cpupool_assign_ncpu(struct cpupool *c, int ncpu)
+{
+    int i;
+    int n;
+
+    n = 0;
+    spin_lock(&cpupool_lock);
+    for_each_cpu_mask(i, cpupool_free_cpus)
+    {
+        if ( cpupool_assign_cpu_locked(c, i) == 0 )
+            n++;
+        if ( n == ncpu )
+            break;
+    }
+    spin_unlock(&cpupool_lock);
+    printk(XENLOG_DEBUG "cpupool_assign_ncpu(pool=%d,ncpu=%d) rc %d\n",
+        c->cpupool_id, ncpu, n);
+    return n;
+}
+
+static long cpupool_unassign_cpu_helper(void *info)
+{
+    struct cpupool *c = (struct cpupool *)info;
+    int cpu = cpupool_moving_cpu;
+    long ret;
+    int cpupool_id = c->cpupool_id;
+
+    ret = cpu_disable_scheduler(cpu);
+    cpu_set(cpu, cpupool_free_cpus);
+    if ( !ret )
+    {
+        schedule_cpu_switch(cpu, NULL);
+        per_cpu(cpupool, cpu) = NULL;
+        cpupool_moving_cpu = -1;
+        cpupool_cpu_moving = NULL;
+    }
+    spin_unlock(&cpupool_lock);
+    printk(XENLOG_DEBUG "cpupool_unassign_cpu(pool=%d,cpu=%d) ret %ld\n",
+        cpupool_id, cpu, ret);
+    return ret;
+}
+
+/*
+ * unassign a specific cpu from a cpupool
+ * we must be sure not to run on the cpu to be unassigned! to achieve this
+ * the main functionality is performed via continue_hypercall_on_cpu on a
+ * specific cpu.
+ * if the cpu to be removed is the last one of the cpupool no active domain
+ * must be bound to the cpupool. dying domains are moved to cpupool0 as they
+ * might be zombies.
+ * possible failures:
+ * - last cpu and still active domains in cpupool
+ * - cpu just being unplugged
+ */
+int cpupool_unassign_cpu(struct cpupool *c, unsigned int cpu)
+{
+    int work_cpu;
+    int ret;
+    struct domain *d;
+    int cpupool_id = c->cpupool_id;
+
+    printk(XENLOG_DEBUG "cpupool_unassign_cpu(pool=%d,cpu=%d)\n",
+        cpupool_id, cpu);
+    spin_lock(&cpupool_lock);
+    ret = -EBUSY;
+    if ( (cpupool_moving_cpu != -1) && (cpu != cpupool_moving_cpu) )
+        goto out;
+    if ( cpu_isset(cpu, cpupool_locked_cpus) )
+        goto out;
+
+    ret = 0;
+    if ( !cpu_isset(cpu, c->cpu_valid) && (cpu != cpupool_moving_cpu) )
+        goto out;
+
+    if ( (c->n_dom > 0) && (cpus_weight(c->cpu_valid) == 1) &&
+         (cpu != cpupool_moving_cpu) )
+    {
+        for_each_domain(d)
+        {
+            if ( d->cpupool != c )
+                continue;
+            if ( !d->is_dying )
+            {
+                ret = -EBUSY;
+                break;
+            }
+            c->n_dom--;
+            ret = sched_move_domain(d, cpupool0);
+            if ( ret )
+            {
+                c->n_dom++;
+                break;
+            }
+            cpupool0->n_dom++;
+        }
+        if ( ret )
+            goto out;
+    }
+    cpupool_moving_cpu = cpu;
+    cpupool_cpu_moving = c;
+    cpu_clear(cpu, c->cpu_valid);
+    work_cpu = smp_processor_id();
+    if ( work_cpu == cpu )
+    {
+        work_cpu = first_cpu(cpupool0->cpu_valid);
+        if ( work_cpu == cpu )
+            work_cpu = next_cpu(cpu, cpupool0->cpu_valid);
+    }
+    return continue_hypercall_on_cpu(work_cpu, cpupool_unassign_cpu_helper, c);
+
+out:
+    spin_unlock(&cpupool_lock);
+    printk(XENLOG_DEBUG "cpupool_unassign_cpu(pool=%d,cpu=%d) ret %d\n",
+        cpupool_id, cpu, ret);
+    return ret;
+}
+
+/*
+ * assign cpus to the default cpupool
+ * default are all cpus, less cpus may be specified as boot parameter
+ * possible failures:
+ * - no cpu assigned
+ */
+int __init cpupool0_cpu_assign(struct cpupool *c)
+{
+    if ( (cpupool0_max_cpus == 0) || (cpupool0_max_cpus > num_online_cpus()) )
+        cpupool0_max_cpus = num_online_cpus();
+    if ( !cpupool_assign_ncpu(cpupool0, cpupool0_max_cpus) )
+        return 1;
+    return 0;
+}
+
+/*
+ * add a new domain to a cpupool
+ * possible failures:
+ * - pool does not exist
+ * - no cpu assigned to pool
+ */
+int cpupool_add_domain(struct domain *d, int poolid)
+{
+    struct cpupool *c;
+    int rc = 1;
+    int n_dom;
+
+    if ( poolid == CPUPOOLID_NONE )
+        return 0;
+    spin_lock(&cpupool_lock);
+    c = cpupool_find_by_id(poolid, 1);
+    if ( (c != NULL) && cpus_weight(c->cpu_valid) )
+    {
+        c->n_dom++;
+        n_dom = c->n_dom;
+        d->cpupool = c;
+        rc = 0;
+    }
+    spin_unlock(&cpupool_lock);
+    if (!rc)
+        printk(XENLOG_DEBUG "cpupool_add_domain(dom=%d,pool=%d) n_dom %d\n",
+            d->domain_id, poolid, n_dom);
+    return rc;
+}
+
+/*
+ * remove a domain from a cpupool
+ */
+void cpupool_rm_domain(struct domain *d)
+{
+    int cpupool_id;
+    int n_dom;
+
+    if ( d->cpupool == NULL )
+        return;
+    spin_lock(&cpupool_lock);
+    cpupool_id = d->cpupool->cpupool_id;
+    d->cpupool->n_dom--;
+    n_dom = d->cpupool->n_dom;
+    d->cpupool = NULL;
+    spin_unlock(&cpupool_lock);
+    printk(XENLOG_DEBUG "cpupool_rm_domain(dom=%d,pool=%d) n_dom %d\n",
+        d->domain_id, cpupool_id, n_dom);
+    return;
+}
+
+/*
+ * called to add a new cpu to pool admin
+ * we add a hotplugged cpu to the cpupool0 to be able to add it to dom0
+ */
+void cpupool_cpu_add(unsigned int cpu)
+{
+    if ( cpupool0 == NULL )
+        return;
+    spin_lock(&cpupool_lock);
+    cpu_clear(cpu, cpupool_locked_cpus);
+    cpu_set(cpu, cpupool_free_cpus);
+    cpupool_assign_cpu_locked(cpupool0, cpu);
+    spin_unlock(&cpupool_lock);
+    return;
+}
+
+/*
+ * called to remove a cpu from pool admin
+ * the cpu to be removed is locked to avoid removing it from dom0
+ * returns failure if not in pool0
+ */
+int cpupool_cpu_remove(unsigned int cpu)
+{
+    int ret = 0;
+	
+    spin_lock(&cpupool_lock);
+    if ( !cpu_isset(cpu, cpupool0->cpu_valid))
+        ret = -EBUSY;
+    else
+        cpu_set(cpu, cpupool_locked_cpus);
+    spin_unlock(&cpupool_lock);
+
+    return ret;
+}
+
+/*
+ * do cpupool related domctl operations
+ */
+int cpupool_do_domctl(struct xen_domctl_cpupool_op *op)
+{
+    int ret;
+    struct cpupool *c;
+
+    switch ( op->op )
+    {
+
+    case XEN_DOMCTL_CPUPOOL_OP_CREATE:
+    {
+        int poolid;
+        const struct scheduler *sched;
+
+        poolid = (op->cpupool_id == XEN_DOMCTL_CPUPOOL_PAR_ANY) ?
+            CPUPOOLID_NONE: op->cpupool_id;
+        sched = scheduler_get_by_id(op->sched_id);
+        ret = -ENOENT;
+        if ( sched == NULL )
+            break;
+        ret = 0;
+        c = cpupool_create(poolid, sched->opt_name);
+        if ( c == NULL )
+            ret = -EINVAL;
+        else
+            op->cpupool_id = c->cpupool_id;
+    }
+    break;
+
+    case XEN_DOMCTL_CPUPOOL_OP_DESTROY:
+    {
+        spin_lock(&cpupool_lock);
+        c = cpupool_find_by_id(op->cpupool_id, 1);
+        spin_unlock(&cpupool_lock);
+        ret = -ENOENT;
+        if ( c == NULL )
+            break;
+        ret = (cpupool_destroy(c) != 0) ? -EBUSY : 0;
+    }
+    break;
+
+    case XEN_DOMCTL_CPUPOOL_OP_INFO:
+    {
+        spin_lock(&cpupool_lock);
+        c = cpupool_find_by_id(op->cpupool_id, 0);
+        spin_unlock(&cpupool_lock);
+        ret = -ENOENT;
+        if ( c == NULL )
+            break;
+        op->cpupool_id = c->cpupool_id;
+        op->sched_id = c->sched.sched_id;
+        op->n_dom = c->n_dom;
+        cpumask_to_xenctl_cpumap(&(op->cpumap), &(c->cpu_valid));
+        ret = 0;
+    }
+    break;
+
+    case XEN_DOMCTL_CPUPOOL_OP_ADDCPU:
+    {
+        unsigned cpu;
+
+        cpu = op->cpu;
+        printk(XENLOG_DEBUG "cpupool_assign_cpu(pool=%d,cpu=%d)\n",
+            op->cpupool_id, cpu);
+        spin_lock(&cpupool_lock);
+        if ( cpu == XEN_DOMCTL_CPUPOOL_PAR_ANY )
+            cpu = first_cpu(cpupool_free_cpus);
+        ret = -EINVAL;
+        if ( cpu >= NR_CPUS )
+            goto addcpu_out;
+        ret = -EBUSY;
+        if ( !cpu_isset(cpu, cpupool_free_cpus) )
+            goto addcpu_out;
+        c = cpupool_find_by_id(op->cpupool_id, 0);
+        ret = -ENOENT;
+        if ( c == NULL )
+            goto addcpu_out;
+        ret = cpupool_assign_cpu_locked(c, cpu);
+addcpu_out:
+        spin_unlock(&cpupool_lock);
+        printk(XENLOG_DEBUG "cpupool_assign_cpu(pool=%d,cpu=%d) ret %d\n",
+            op->cpupool_id, cpu, ret);
+    }
+    break;
+
+    case XEN_DOMCTL_CPUPOOL_OP_RMCPU:
+    {
+        unsigned cpu;
+
+        spin_lock(&cpupool_lock);
+        c = cpupool_find_by_id(op->cpupool_id, 0);
+        spin_unlock(&cpupool_lock);
+        ret = -ENOENT;
+        if ( c == NULL )
+            break;
+        cpu = op->cpu;
+        if ( cpu == XEN_DOMCTL_CPUPOOL_PAR_ANY )
+            cpu = last_cpu(c->cpu_valid);
+        ret = -EINVAL;
+        if ( cpu >= NR_CPUS )
+            break;
+        /* caution: cpupool_unassign_cpu uses continue_hypercall_on_cpu and
+         * will continue after the local return
+         */
+        ret = cpupool_unassign_cpu(c, cpu);
+    }
+    break;
+
+    case XEN_DOMCTL_CPUPOOL_OP_MOVEDOMAIN:
+    {
+        struct domain *d;
+
+        ret = -EINVAL;
+        if ( op->domid == 0 )
+            break;
+        ret = -ESRCH;
+        d = rcu_lock_domain_by_id(op->domid);
+        if ( d == NULL )
+            break;
+        if ( d->cpupool == NULL )
+        {
+            ret = -EINVAL;
+            rcu_unlock_domain(d);
+            break;
+        }
+        printk(XENLOG_DEBUG "cpupool move_domain(dom=%d)->pool=%d\n",
+            d->domain_id, op->cpupool_id);
+        ret = -ENOENT;
+        spin_lock(&cpupool_lock);
+        c = cpupool_find_by_id(op->cpupool_id, 1);
+        if ( (c != NULL) && cpus_weight(c->cpu_valid) )
+        {
+            d->cpupool->n_dom--;
+            ret = sched_move_domain(d, c);
+            if ( ret )
+                d->cpupool->n_dom++;
+            else
+                c->n_dom++;
+        }
+        spin_unlock(&cpupool_lock);
+        printk(XENLOG_DEBUG "cpupool move_domain(dom=%d)->pool=%d ret %d\n",
+            d->domain_id, op->cpupool_id, ret);
+        rcu_unlock_domain(d);
+    }
+    break;
+
+    case XEN_DOMCTL_CPUPOOL_OP_FREEINFO:
+    {
+        cpumask_to_xenctl_cpumap(&(op->cpumap),
+            &cpupool_free_cpus);
+        ret = 0;
+    }
+    break;
+
+    default:
+        ret = -ENOSYS;
+
+    }
+
+    return ret;
+}
+
+void schedule_dump(struct cpupool *c);
+
+void dump_runq(unsigned char key)
+{
+    unsigned long    flags;
+    s_time_t         now = NOW();
+    struct cpupool **c;
+
+    spin_lock(&cpupool_lock);
+    local_irq_save(flags);
+
+    printk("sched_smt_power_savings: %s\n",
+            sched_smt_power_savings? "enabled":"disabled");
+    printk("NOW=0x%08X%08X\n",  (u32)(now>>32), (u32)now);
+
+    printk("Idle cpupool:\n");
+    schedule_dump(NULL);
+
+    for_each_cpupool(c)
+    {
+        printk("Cpupool %d:\n", (*c)->cpupool_id);
+        schedule_dump(*c);
+    }
+
+    local_irq_restore(flags);
+    spin_unlock(&cpupool_lock);
+}
+
+static int __init cpupool_init(void)
+{
+    cpupool_free_cpus = cpu_online_map;
+    cpupool_list = NULL;
+    return 0;
+}
+__initcall(cpupool_init);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
author	Keir Fraser <keir.fraser@citrix.com>	2010-04-21 12:48:03 +0100
committer	Keir Fraser <keir.fraser@citrix.com>	2010-04-21 12:48:03 +0100
commit	78be3dbbfefa43c4ff4a19ae84342b907e0cef4a (patch)
tree	09f193bbd11b37f0a9721e347ab27eeb000dbb13 /xen/common/cpupool.c
parent	b066d6894d38a2be3f4eb67e974cfdf1275e9da2 (diff)
download	xen-78be3dbbfefa43c4ff4a19ae84342b907e0cef4a.tar.gz xen-78be3dbbfefa43c4ff4a19ae84342b907e0cef4a.tar.bz2 xen-78be3dbbfefa43c4ff4a19ae84342b907e0cef4a.zip