aboutsummaryrefslogtreecommitdiffstats
path: root/xen
diff options
context:
space:
mode:
authorKeir Fraser <keir.fraser@citrix.com>2010-04-21 12:48:03 +0100
committerKeir Fraser <keir.fraser@citrix.com>2010-04-21 12:48:03 +0100
commit78be3dbbfefa43c4ff4a19ae84342b907e0cef4a (patch)
tree09f193bbd11b37f0a9721e347ab27eeb000dbb13 /xen
parentb066d6894d38a2be3f4eb67e974cfdf1275e9da2 (diff)
downloadxen-78be3dbbfefa43c4ff4a19ae84342b907e0cef4a.tar.gz
xen-78be3dbbfefa43c4ff4a19ae84342b907e0cef4a.tar.bz2
xen-78be3dbbfefa43c4ff4a19ae84342b907e0cef4a.zip
cpupools [1/6]: hypervisor changes
Signed-off-by: Juergen Gross <juergen.gross@ts.fujitsu.com>
Diffstat (limited to 'xen')
-rw-r--r--xen/arch/x86/domain_build.c11
-rw-r--r--xen/arch/x86/setup.c6
-rw-r--r--xen/arch/x86/smpboot.c18
-rw-r--r--xen/common/Makefile1
-rw-r--r--xen/common/cpupool.c604
-rw-r--r--xen/common/domain.c8
-rw-r--r--xen/common/domctl.c21
-rw-r--r--xen/common/sched_credit.c411
-rw-r--r--xen/common/sched_credit2.c293
-rw-r--r--xen/common/sched_sedf.c144
-rw-r--r--xen/common/schedule.c300
-rw-r--r--xen/include/public/domctl.h31
-rw-r--r--xen/include/xen/sched-if.h60
-rw-r--r--xen/include/xen/sched.h22
14 files changed, 1560 insertions, 370 deletions
diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
index 7307cc919b..3f4d683b28 100644
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -9,6 +9,7 @@
#include <xen/lib.h>
#include <xen/ctype.h>
#include <xen/sched.h>
+#include <xen/sched-if.h>
#include <xen/smp.h>
#include <xen/delay.h>
#include <xen/event.h>
@@ -84,7 +85,7 @@ integer_param("dom0_max_vcpus", opt_dom0_max_vcpus);
struct vcpu *__init alloc_dom0_vcpu0(void)
{
if ( opt_dom0_max_vcpus == 0 )
- opt_dom0_max_vcpus = num_online_cpus();
+ opt_dom0_max_vcpus = num_cpupool_cpus(cpupool0);
if ( opt_dom0_max_vcpus > MAX_VIRT_CPUS )
opt_dom0_max_vcpus = MAX_VIRT_CPUS;
@@ -277,7 +278,7 @@ int __init construct_dom0(
unsigned long _initrd_start, unsigned long initrd_len,
char *cmdline)
{
- int i, rc, compatible, compat32, order, machine;
+ int i, cpu, rc, compatible, compat32, order, machine;
struct cpu_user_regs *regs;
unsigned long pfn, mfn;
unsigned long nr_pages;
@@ -776,8 +777,12 @@ int __init construct_dom0(
printk("Dom0 has maximum %u VCPUs\n", opt_dom0_max_vcpus);
+ cpu = first_cpu(cpupool0->cpu_valid);
for ( i = 1; i < opt_dom0_max_vcpus; i++ )
- (void)alloc_vcpu(d, i, i % num_online_cpus());
+ {
+ cpu = cycle_cpu(cpu, cpupool0->cpu_valid);
+ (void)alloc_vcpu(d, i, cpu);
+ }
/* Set up CR3 value for write_ptbase */
if ( paging_mode_enabled(d) )
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 855cb4ad54..4ff9dab413 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -2,6 +2,7 @@
#include <xen/init.h>
#include <xen/lib.h>
#include <xen/sched.h>
+#include <xen/sched-if.h>
#include <xen/domain.h>
#include <xen/serial.h>
#include <xen/softirq.h>
@@ -1093,6 +1094,11 @@ void __init __start_xen(unsigned long mbi_p)
if ( !tboot_protect_mem_regions() )
panic("Could not protect TXT memory regions\n");
+ /* Create initial cpupool 0. */
+ cpupool0 = cpupool_create(0, NULL);
+ if ( (cpupool0 == NULL) || cpupool0_cpu_assign(cpupool0) )
+ panic("Error creating cpupool 0\n");
+
/* Create initial domain 0. */
dom0 = domain_create(0, DOMCRF_s3_integrity, DOM0_SSIDREF);
if ( (dom0 == NULL) || (alloc_dom0_vcpu0() == NULL) )
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index f3368302f6..963bc24a52 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -39,6 +39,7 @@
#include <xen/mm.h>
#include <xen/domain.h>
#include <xen/sched.h>
+#include <xen/sched-if.h>
#include <xen/irq.h>
#include <xen/delay.h>
#include <xen/softirq.h>
@@ -1296,10 +1297,11 @@ int __cpu_disable(void)
remove_siblinginfo(cpu);
/* It's now safe to remove this processor from the online map */
+ cpu_clear(cpu, cpupool0->cpu_valid);
cpu_clear(cpu, cpu_online_map);
fixup_irqs();
- cpu_disable_scheduler();
+ cpu_disable_scheduler(cpu);
return 0;
}
@@ -1336,11 +1338,6 @@ int cpu_down(unsigned int cpu)
if (!spin_trylock(&cpu_add_remove_lock))
return -EBUSY;
- if (num_online_cpus() == 1) {
- err = -EBUSY;
- goto out;
- }
-
/* Can not offline BSP */
if (cpu == 0) {
err = -EINVAL;
@@ -1352,13 +1349,19 @@ int cpu_down(unsigned int cpu)
goto out;
}
+ err = cpupool_cpu_remove(cpu);
+ if (err)
+ goto out;
+
printk("Prepare to bring CPU%d down...\n", cpu);
cpufreq_del_cpu(cpu);
err = stop_machine_run(take_cpu_down, NULL, cpu);
- if (err < 0)
+ if (err < 0) {
+ cpupool_cpu_add(cpu);
goto out;
+ }
__cpu_die(cpu);
@@ -1559,6 +1562,7 @@ int __devinit __cpu_up(unsigned int cpu)
process_pending_softirqs();
}
+ cpupool_cpu_add(cpu);
cpufreq_add_cpu(cpu);
return 0;
}
diff --git a/xen/common/Makefile b/xen/common/Makefile
index c36d0f351d..a8d093f773 100644
--- a/xen/common/Makefile
+++ b/xen/common/Makefile
@@ -1,5 +1,6 @@
obj-y += bitmap.o
obj-y += cpu.o
+obj-y += cpupool.o
obj-y += domctl.o
obj-y += domain.o
obj-y += event_channel.o
diff --git a/xen/common/cpupool.c b/xen/common/cpupool.c
new file mode 100644
index 0000000000..7e32296061
--- /dev/null
+++ b/xen/common/cpupool.c
@@ -0,0 +1,604 @@
+/******************************************************************************
+ * cpupool.c
+ *
+ * Generic cpupool-handling functions.
+ *
+ * Cpupools are a feature to have configurable scheduling domains. Each
+ * cpupool runs an own scheduler on a dedicated set of physical cpus.
+ * A domain is bound to one cpupool at any time, but it can be moved to
+ * another cpupool.
+ *
+ * (C) 2009, Juergen Gross, Fujitsu Technology Solutions
+ */
+
+#include <xen/lib.h>
+#include <xen/init.h>
+#include <xen/cpumask.h>
+#include <xen/percpu.h>
+#include <xen/sched.h>
+#include <xen/sched-if.h>
+
+#define for_each_cpupool(ptr) \
+ for ((ptr) = &cpupool_list; *(ptr) != NULL; (ptr) = &((*(ptr))->next))
+
+struct cpupool *cpupool0; /* Initial cpupool with Dom0 */
+cpumask_t cpupool_free_cpus; /* cpus not in any cpupool */
+
+static struct cpupool *cpupool_list; /* linked list, sorted by poolid */
+
+static int cpupool0_max_cpus;
+integer_param("pool0_max_cpus", cpupool0_max_cpus);
+
+static int cpupool_moving_cpu = -1;
+static struct cpupool *cpupool_cpu_moving = NULL;
+static cpumask_t cpupool_locked_cpus = CPU_MASK_NONE;
+
+/* cpupool lock: be carefull, this lock is sometimes released on another cpu
+ * as it was obtained!
+ */
+static DEFINE_SPINLOCK(cpupool_lock);
+
+DEFINE_PER_CPU(struct cpupool *, cpupool);
+
+static struct cpupool *alloc_cpupool_struct(void)
+{
+ return xmalloc(struct cpupool);
+}
+
+static void free_cpupool_struct(struct cpupool *c)
+{
+ xfree(c);
+}
+
+/*
+ * find a cpupool by it's id. to be called with cpupool lock held
+ * if exact is not specified, the first cpupool with an id larger or equal to
+ * the searched id is returned
+ * returns NULL if not found.
+ */
+static struct cpupool *cpupool_find_by_id(int id, int exact)
+{
+ struct cpupool **q;
+
+ for_each_cpupool(q)
+ {
+ if ( (*q)->cpupool_id == id )
+ return *q;
+ if ( (*q)->cpupool_id > id )
+ break;
+ }
+ return exact ? NULL : *q;
+}
+
+/*
+ * create a new cpupool with specified poolid and scheduler
+ * returns pointer to new cpupool structure if okay, NULL else
+ * possible failures:
+ * - no memory
+ * - poolid already used
+ * - unknown scheduler
+ */
+struct cpupool *cpupool_create(int poolid, char *sched)
+{
+ struct cpupool *c;
+ struct cpupool **q;
+ int last = 0;
+
+ if ( (c = alloc_cpupool_struct()) == NULL )
+ return NULL;
+ memset(c, 0, sizeof(*c));
+
+ printk(XENLOG_DEBUG "cpupool_create(pool=%d,sched=%s)\n", poolid, sched);
+ spin_lock(&cpupool_lock);
+ for_each_cpupool(q)
+ {
+ last = (*q)->cpupool_id;
+ if ( (poolid != CPUPOOLID_NONE) && (last >= poolid) )
+ break;
+ }
+ if ( *q != NULL )
+ {
+ if ( (*q)->cpupool_id == poolid )
+ {
+ spin_unlock(&cpupool_lock);
+ free_cpupool_struct(c);
+ return NULL;
+ }
+ c->next = *q;
+ }
+ *q = c;
+ c->cpupool_id = (poolid == CPUPOOLID_NONE) ? (last + 1) : poolid;
+ if ( schedule_init_global(sched, &(c->sched)) )
+ {
+ spin_unlock(&cpupool_lock);
+ cpupool_destroy(c);
+ return NULL;
+ }
+ spin_unlock(&cpupool_lock);
+
+ printk("Created cpupool %d with scheduler %s (%s)\n", c->cpupool_id,
+ c->sched.name, c->sched.opt_name);
+
+ return c;
+}
+/*
+ * destroys the given cpupool
+ * returns 0 on success, 1 else
+ * possible failures:
+ * - pool still in use
+ * - cpus still assigned to pool
+ * - pool not in list
+ */
+int cpupool_destroy(struct cpupool *c)
+{
+ struct cpupool **q;
+
+ spin_lock(&cpupool_lock);
+ for_each_cpupool(q)
+ if ( *q == c )
+ break;
+ if ( (*q != c) || (c->n_dom != 0) || cpus_weight(c->cpu_valid) )
+ {
+ spin_unlock(&cpupool_lock);
+ return 1;
+ }
+ *q = c->next;
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_destroy(pool=%d)\n", c->cpupool_id);
+ schedule_deinit_global(&(c->sched));
+ free_cpupool_struct(c);
+ return 0;
+}
+
+/*
+ * assign a specific cpu to a cpupool
+ * cpupool_lock must be held
+ */
+static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
+{
+ if ( (cpupool_moving_cpu == cpu) && (c != cpupool_cpu_moving) )
+ return -EBUSY;
+ per_cpu(cpupool, cpu) = c;
+ schedule_cpu_switch(cpu, c);
+ cpu_clear(cpu, cpupool_free_cpus);
+ if (cpupool_moving_cpu == cpu)
+ {
+ cpupool_moving_cpu = -1;
+ cpupool_cpu_moving = NULL;
+ }
+ cpu_set(cpu, c->cpu_valid);
+ return 0;
+}
+
+/*
+ * assign free physical cpus to a cpupool
+ * cpus assigned are unused cpus with lowest possible ids
+ * returns the number of cpus assigned
+ */
+int cpupool_assign_ncpu(struct cpupool *c, int ncpu)
+{
+ int i;
+ int n;
+
+ n = 0;
+ spin_lock(&cpupool_lock);
+ for_each_cpu_mask(i, cpupool_free_cpus)
+ {
+ if ( cpupool_assign_cpu_locked(c, i) == 0 )
+ n++;
+ if ( n == ncpu )
+ break;
+ }
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_assign_ncpu(pool=%d,ncpu=%d) rc %d\n",
+ c->cpupool_id, ncpu, n);
+ return n;
+}
+
+static long cpupool_unassign_cpu_helper(void *info)
+{
+ struct cpupool *c = (struct cpupool *)info;
+ int cpu = cpupool_moving_cpu;
+ long ret;
+ int cpupool_id = c->cpupool_id;
+
+ ret = cpu_disable_scheduler(cpu);
+ cpu_set(cpu, cpupool_free_cpus);
+ if ( !ret )
+ {
+ schedule_cpu_switch(cpu, NULL);
+ per_cpu(cpupool, cpu) = NULL;
+ cpupool_moving_cpu = -1;
+ cpupool_cpu_moving = NULL;
+ }
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_unassign_cpu(pool=%d,cpu=%d) ret %ld\n",
+ cpupool_id, cpu, ret);
+ return ret;
+}
+
+/*
+ * unassign a specific cpu from a cpupool
+ * we must be sure not to run on the cpu to be unassigned! to achieve this
+ * the main functionality is performed via continue_hypercall_on_cpu on a
+ * specific cpu.
+ * if the cpu to be removed is the last one of the cpupool no active domain
+ * must be bound to the cpupool. dying domains are moved to cpupool0 as they
+ * might be zombies.
+ * possible failures:
+ * - last cpu and still active domains in cpupool
+ * - cpu just being unplugged
+ */
+int cpupool_unassign_cpu(struct cpupool *c, unsigned int cpu)
+{
+ int work_cpu;
+ int ret;
+ struct domain *d;
+ int cpupool_id = c->cpupool_id;
+
+ printk(XENLOG_DEBUG "cpupool_unassign_cpu(pool=%d,cpu=%d)\n",
+ cpupool_id, cpu);
+ spin_lock(&cpupool_lock);
+ ret = -EBUSY;
+ if ( (cpupool_moving_cpu != -1) && (cpu != cpupool_moving_cpu) )
+ goto out;
+ if ( cpu_isset(cpu, cpupool_locked_cpus) )
+ goto out;
+
+ ret = 0;
+ if ( !cpu_isset(cpu, c->cpu_valid) && (cpu != cpupool_moving_cpu) )
+ goto out;
+
+ if ( (c->n_dom > 0) && (cpus_weight(c->cpu_valid) == 1) &&
+ (cpu != cpupool_moving_cpu) )
+ {
+ for_each_domain(d)
+ {
+ if ( d->cpupool != c )
+ continue;
+ if ( !d->is_dying )
+ {
+ ret = -EBUSY;
+ break;
+ }
+ c->n_dom--;
+ ret = sched_move_domain(d, cpupool0);
+ if ( ret )
+ {
+ c->n_dom++;
+ break;
+ }
+ cpupool0->n_dom++;
+ }
+ if ( ret )
+ goto out;
+ }
+ cpupool_moving_cpu = cpu;
+ cpupool_cpu_moving = c;
+ cpu_clear(cpu, c->cpu_valid);
+ work_cpu = smp_processor_id();
+ if ( work_cpu == cpu )
+ {
+ work_cpu = first_cpu(cpupool0->cpu_valid);
+ if ( work_cpu == cpu )
+ work_cpu = next_cpu(cpu, cpupool0->cpu_valid);
+ }
+ return continue_hypercall_on_cpu(work_cpu, cpupool_unassign_cpu_helper, c);
+
+out:
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_unassign_cpu(pool=%d,cpu=%d) ret %d\n",
+ cpupool_id, cpu, ret);
+ return ret;
+}
+
+/*
+ * assign cpus to the default cpupool
+ * default are all cpus, less cpus may be specified as boot parameter
+ * possible failures:
+ * - no cpu assigned
+ */
+int __init cpupool0_cpu_assign(struct cpupool *c)
+{
+ if ( (cpupool0_max_cpus == 0) || (cpupool0_max_cpus > num_online_cpus()) )
+ cpupool0_max_cpus = num_online_cpus();
+ if ( !cpupool_assign_ncpu(cpupool0, cpupool0_max_cpus) )
+ return 1;
+ return 0;
+}
+
+/*
+ * add a new domain to a cpupool
+ * possible failures:
+ * - pool does not exist
+ * - no cpu assigned to pool
+ */
+int cpupool_add_domain(struct domain *d, int poolid)
+{
+ struct cpupool *c;
+ int rc = 1;
+ int n_dom;
+
+ if ( poolid == CPUPOOLID_NONE )
+ return 0;
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(poolid, 1);
+ if ( (c != NULL) && cpus_weight(c->cpu_valid) )
+ {
+ c->n_dom++;
+ n_dom = c->n_dom;
+ d->cpupool = c;
+ rc = 0;
+ }
+ spin_unlock(&cpupool_lock);
+ if (!rc)
+ printk(XENLOG_DEBUG "cpupool_add_domain(dom=%d,pool=%d) n_dom %d\n",
+ d->domain_id, poolid, n_dom);
+ return rc;
+}
+
+/*
+ * remove a domain from a cpupool
+ */
+void cpupool_rm_domain(struct domain *d)
+{
+ int cpupool_id;
+ int n_dom;
+
+ if ( d->cpupool == NULL )
+ return;
+ spin_lock(&cpupool_lock);
+ cpupool_id = d->cpupool->cpupool_id;
+ d->cpupool->n_dom--;
+ n_dom = d->cpupool->n_dom;
+ d->cpupool = NULL;
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_rm_domain(dom=%d,pool=%d) n_dom %d\n",
+ d->domain_id, cpupool_id, n_dom);
+ return;
+}
+
+/*
+ * called to add a new cpu to pool admin
+ * we add a hotplugged cpu to the cpupool0 to be able to add it to dom0
+ */
+void cpupool_cpu_add(unsigned int cpu)
+{
+ if ( cpupool0 == NULL )
+ return;
+ spin_lock(&cpupool_lock);
+ cpu_clear(cpu, cpupool_locked_cpus);
+ cpu_set(cpu, cpupool_free_cpus);
+ cpupool_assign_cpu_locked(cpupool0, cpu);
+ spin_unlock(&cpupool_lock);
+ return;
+}
+
+/*
+ * called to remove a cpu from pool admin
+ * the cpu to be removed is locked to avoid removing it from dom0
+ * returns failure if not in pool0
+ */
+int cpupool_cpu_remove(unsigned int cpu)
+{
+ int ret = 0;
+
+ spin_lock(&cpupool_lock);
+ if ( !cpu_isset(cpu, cpupool0->cpu_valid))
+ ret = -EBUSY;
+ else
+ cpu_set(cpu, cpupool_locked_cpus);
+ spin_unlock(&cpupool_lock);
+
+ return ret;
+}
+
+/*
+ * do cpupool related domctl operations
+ */
+int cpupool_do_domctl(struct xen_domctl_cpupool_op *op)
+{
+ int ret;
+ struct cpupool *c;
+
+ switch ( op->op )
+ {
+
+ case XEN_DOMCTL_CPUPOOL_OP_CREATE:
+ {
+ int poolid;
+ const struct scheduler *sched;
+
+ poolid = (op->cpupool_id == XEN_DOMCTL_CPUPOOL_PAR_ANY) ?
+ CPUPOOLID_NONE: op->cpupool_id;
+ sched = scheduler_get_by_id(op->sched_id);
+ ret = -ENOENT;
+ if ( sched == NULL )
+ break;
+ ret = 0;
+ c = cpupool_create(poolid, sched->opt_name);
+ if ( c == NULL )
+ ret = -EINVAL;
+ else
+ op->cpupool_id = c->cpupool_id;
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_DESTROY:
+ {
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(op->cpupool_id, 1);
+ spin_unlock(&cpupool_lock);
+ ret = -ENOENT;
+ if ( c == NULL )
+ break;
+ ret = (cpupool_destroy(c) != 0) ? -EBUSY : 0;
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_INFO:
+ {
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(op->cpupool_id, 0);
+ spin_unlock(&cpupool_lock);
+ ret = -ENOENT;
+ if ( c == NULL )
+ break;
+ op->cpupool_id = c->cpupool_id;
+ op->sched_id = c->sched.sched_id;
+ op->n_dom = c->n_dom;
+ cpumask_to_xenctl_cpumap(&(op->cpumap), &(c->cpu_valid));
+ ret = 0;
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_ADDCPU:
+ {
+ unsigned cpu;
+
+ cpu = op->cpu;
+ printk(XENLOG_DEBUG "cpupool_assign_cpu(pool=%d,cpu=%d)\n",
+ op->cpupool_id, cpu);
+ spin_lock(&cpupool_lock);
+ if ( cpu == XEN_DOMCTL_CPUPOOL_PAR_ANY )
+ cpu = first_cpu(cpupool_free_cpus);
+ ret = -EINVAL;
+ if ( cpu >= NR_CPUS )
+ goto addcpu_out;
+ ret = -EBUSY;
+ if ( !cpu_isset(cpu, cpupool_free_cpus) )
+ goto addcpu_out;
+ c = cpupool_find_by_id(op->cpupool_id, 0);
+ ret = -ENOENT;
+ if ( c == NULL )
+ goto addcpu_out;
+ ret = cpupool_assign_cpu_locked(c, cpu);
+addcpu_out:
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_assign_cpu(pool=%d,cpu=%d) ret %d\n",
+ op->cpupool_id, cpu, ret);
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_RMCPU:
+ {
+ unsigned cpu;
+
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(op->cpupool_id, 0);
+ spin_unlock(&cpupool_lock);
+ ret = -ENOENT;
+ if ( c == NULL )
+ break;
+ cpu = op->cpu;
+ if ( cpu == XEN_DOMCTL_CPUPOOL_PAR_ANY )
+ cpu = last_cpu(c->cpu_valid);
+ ret = -EINVAL;
+ if ( cpu >= NR_CPUS )
+ break;
+ /* caution: cpupool_unassign_cpu uses continue_hypercall_on_cpu and
+ * will continue after the local return
+ */
+ ret = cpupool_unassign_cpu(c, cpu);
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_MOVEDOMAIN:
+ {
+ struct domain *d;
+
+ ret = -EINVAL;
+ if ( op->domid == 0 )
+ break;
+ ret = -ESRCH;
+ d = rcu_lock_domain_by_id(op->domid);
+ if ( d == NULL )
+ break;
+ if ( d->cpupool == NULL )
+ {
+ ret = -EINVAL;
+ rcu_unlock_domain(d);
+ break;
+ }
+ printk(XENLOG_DEBUG "cpupool move_domain(dom=%d)->pool=%d\n",
+ d->domain_id, op->cpupool_id);
+ ret = -ENOENT;
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(op->cpupool_id, 1);
+ if ( (c != NULL) && cpus_weight(c->cpu_valid) )
+ {
+ d->cpupool->n_dom--;
+ ret = sched_move_domain(d, c);
+ if ( ret )
+ d->cpupool->n_dom++;
+ else
+ c->n_dom++;
+ }
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool move_domain(dom=%d)->pool=%d ret %d\n",
+ d->domain_id, op->cpupool_id, ret);
+ rcu_unlock_domain(d);
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_FREEINFO:
+ {
+ cpumask_to_xenctl_cpumap(&(op->cpumap),
+ &cpupool_free_cpus);
+ ret = 0;
+ }
+ break;
+
+ default:
+ ret = -ENOSYS;
+
+ }
+
+ return ret;
+}
+
+void schedule_dump(struct cpupool *c);
+
+void dump_runq(unsigned char key)
+{
+ unsigned long flags;
+ s_time_t now = NOW();
+ struct cpupool **c;
+
+ spin_lock(&cpupool_lock);
+ local_irq_save(flags);
+
+ printk("sched_smt_power_savings: %s\n",
+ sched_smt_power_savings? "enabled":"disabled");
+ printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now);
+
+ printk("Idle cpupool:\n");
+ schedule_dump(NULL);
+
+ for_each_cpupool(c)
+ {
+ printk("Cpupool %d:\n", (*c)->cpupool_id);
+ schedule_dump(*c);
+ }
+
+ local_irq_restore(flags);
+ spin_unlock(&cpupool_lock);
+}
+
+static int __init cpupool_init(void)
+{
+ cpupool_free_cpus = cpu_online_map;
+ cpupool_list = NULL;
+ return 0;
+}
+__initcall(cpupool_init);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/common/domain.c b/xen/common/domain.c
index c1b32ab087..e6b6ea3f0a 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -218,6 +218,7 @@ struct domain *domain_create(
enum { INIT_xsm = 1u<<0, INIT_rangeset = 1u<<1, INIT_evtchn = 1u<<2,
INIT_gnttab = 1u<<3, INIT_arch = 1u<<4 };
int init_status = 0;
+ int poolid = CPUPOOLID_NONE;
if ( (d = alloc_domain_struct()) == NULL )
return NULL;
@@ -282,6 +283,8 @@ struct domain *domain_create(
if ( grant_table_create(d) != 0 )
goto fail;
init_status |= INIT_gnttab;
+
+ poolid = 0;
}
if ( arch_domain_create(d, domcr_flags) != 0 )
@@ -293,6 +296,9 @@ struct domain *domain_create(
if ( (d->iomem_caps == NULL) || (d->irq_caps == NULL) )
goto fail;
+ if ( cpupool_add_domain(d, poolid) != 0 )
+ goto fail;
+
if ( sched_init_domain(d) != 0 )
goto fail;
@@ -601,6 +607,8 @@ static void complete_domain_destroy(struct rcu_head *head)
rangeset_domain_destroy(d);
+ cpupool_rm_domain(d);
+
sched_destroy_domain(d);
/* Free page used by xen oprofile buffer. */
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index f87d91c06b..a80d34b8d8 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -11,6 +11,7 @@
#include <xen/lib.h>
#include <xen/mm.h>
#include <xen/sched.h>
+#include <xen/sched-if.h>
#include <xen/domain.h>
#include <xen/event.h>
#include <xen/domain_page.h>
@@ -140,10 +141,12 @@ void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info)
info->shared_info_frame = mfn_to_gmfn(d, __pa(d->shared_info)>>PAGE_SHIFT);
BUG_ON(SHARED_M2P(info->shared_info_frame));
+ info->cpupool = d->cpupool ? d->cpupool->cpupool_id : CPUPOOLID_NONE;
+
memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t));
}
-static unsigned int default_vcpu0_location(void)
+static unsigned int default_vcpu0_location(cpumask_t *online)
{
struct domain *d;
struct vcpu *v;
@@ -173,7 +176,7 @@ static unsigned int default_vcpu0_location(void)
if ( cpus_weight(per_cpu(cpu_sibling_map, 0)) > 1 )
cpu = next_cpu(cpu, per_cpu(cpu_sibling_map, 0));
cpu_exclude_map = per_cpu(cpu_sibling_map, 0);
- for_each_online_cpu ( i )
+ for_each_cpu_mask(i, *online)
{
if ( cpu_isset(i, cpu_exclude_map) )
continue;
@@ -450,6 +453,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
{
struct domain *d;
unsigned int i, max = op->u.max_vcpus.max, cpu;
+ cpumask_t *online;
ret = -ESRCH;
if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
@@ -498,6 +502,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
goto maxvcpu_out;
ret = -ENOMEM;
+ online = (d->cpupool == NULL) ? &cpu_online_map : &d->cpupool->cpu_valid;
if ( max > d->max_vcpus )
{
struct vcpu **vcpus;
@@ -521,8 +526,8 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
continue;
cpu = (i == 0) ?
- default_vcpu0_location() :
- cycle_cpu(d->vcpu[i-1]->processor, cpu_online_map);
+ default_vcpu0_location(online) :
+ cycle_cpu(d->vcpu[i-1]->processor, *online);
if ( alloc_vcpu(d, i, cpu) == NULL )
goto maxvcpu_out;
@@ -961,6 +966,14 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
}
break;
+ case XEN_DOMCTL_cpupool_op:
+ {
+ ret = cpupool_do_domctl(&op->u.cpupool_op);
+ if ( (ret == 0) && copy_to_guest(u_domctl, op, 1) )
+ ret = -EFAULT;
+ }
+ break;
+
default:
ret = arch_do_domctl(op, u_domctl);
break;
diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c
index cc191e26dc..a25da1156a 100644
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -70,11 +70,15 @@
/*
* Useful macros
*/
+#define CSCHED_PRIV(_ops) \
+ ((struct csched_private *)((_ops)->sched_data))
#define CSCHED_PCPU(_c) \
((struct csched_pcpu *)per_cpu(schedule_data, _c).sched_priv)
#define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv)
#define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv)
#define RUNQ(_cpu) (&(CSCHED_PCPU(_cpu)->runq))
+#define CSCHED_CPUONLINE(_pool) \
+ (((_pool) == NULL) ? &cpupool_free_cpus : &(_pool)->cpu_valid)
/*
@@ -160,19 +164,22 @@ struct csched_private {
struct timer master_ticker;
unsigned int master;
cpumask_t idlers;
+ cpumask_t cpus;
uint32_t weight;
uint32_t credit;
int credit_balance;
uint32_t runq_sort;
+ int ticker_active;
};
/*
* Global variables
*/
-static struct csched_private csched_priv;
+static struct csched_private *csched_priv0 = NULL;
static void csched_tick(void *_cpu);
+static void csched_acct(void *dummy);
static inline int
__vcpu_on_runq(struct csched_vcpu *svc)
@@ -238,6 +245,7 @@ __runq_tickle(unsigned int cpu, struct csched_vcpu *new)
{
struct csched_vcpu * const cur =
CSCHED_VCPU(per_cpu(schedule_data, cpu).curr);
+ struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));
cpumask_t mask;
ASSERT(cur);
@@ -264,7 +272,7 @@ __runq_tickle(unsigned int cpu, struct csched_vcpu *new)
*/
if ( cur->pri > CSCHED_PRI_IDLE )
{
- if ( cpus_empty(csched_priv.idlers) )
+ if ( cpus_empty(prv->idlers) )
{
CSCHED_STAT_CRANK(tickle_idlers_none);
}
@@ -272,7 +280,7 @@ __runq_tickle(unsigned int cpu, struct csched_vcpu *new)
{
cpumask_t idle_mask;
- cpus_and(idle_mask, csched_priv.idlers, new->vcpu->cpu_affinity);
+ cpus_and(idle_mask, prv->idlers, new->vcpu->cpu_affinity);
if ( !cpus_empty(idle_mask) )
{
CSCHED_STAT_CRANK(tickle_idlers_some);
@@ -294,40 +302,80 @@ __runq_tickle(unsigned int cpu, struct csched_vcpu *new)
cpumask_raise_softirq(mask, SCHEDULE_SOFTIRQ);
}
-static int
-csched_pcpu_init(int cpu)
+static void
+csched_free_pdata(struct scheduler *ops, void *pcpu, int cpu)
+{
+ struct csched_private *prv = CSCHED_PRIV(ops);
+ struct csched_pcpu *spc = pcpu;
+ unsigned long flags;
+
+ if ( spc == NULL )
+ return;
+
+ spin_lock_irqsave(&prv->lock, flags);
+
+ prv->credit -= CSCHED_CREDITS_PER_ACCT;
+ prv->ncpus--;
+ cpu_clear(cpu, prv->idlers);
+ cpu_clear(cpu, prv->cpus);
+ if ( (prv->master == cpu) && (prv->ncpus > 0) )
+ {
+ prv->master = first_cpu(prv->cpus);
+ migrate_timer(&prv->master_ticker, prv->master);
+ }
+ kill_timer(&spc->ticker);
+ if ( prv->ncpus == 0 )
+ kill_timer(&prv->master_ticker);
+
+ spin_unlock_irqrestore(&prv->lock, flags);
+
+ xfree(spc);
+}
+
+static void *
+csched_alloc_pdata(struct scheduler *ops, int cpu)
{
struct csched_pcpu *spc;
+ struct csched_private *prv = CSCHED_PRIV(ops);
unsigned long flags;
/* Allocate per-PCPU info */
spc = xmalloc(struct csched_pcpu);
if ( spc == NULL )
- return -1;
+ return NULL;
memset(spc, 0, sizeof(*spc));
- spin_lock_irqsave(&csched_priv.lock, flags);
+ spin_lock_irqsave(&prv->lock, flags);
/* Initialize/update system-wide config */
- csched_priv.credit += CSCHED_CREDITS_PER_ACCT;
- if ( csched_priv.ncpus <= cpu )
- csched_priv.ncpus = cpu + 1;
- if ( csched_priv.master >= csched_priv.ncpus )
- csched_priv.master = cpu;
+ prv->credit += CSCHED_CREDITS_PER_ACCT;
+ prv->ncpus++;
+ cpu_set(cpu, prv->cpus);
+ if ( (prv->ncpus == 1) && (prv != csched_priv0) )
+ {
+ prv->master = cpu;
+ init_timer( &prv->master_ticker, csched_acct, prv, cpu);
+ prv->ticker_active = 2;
+ }
init_timer(&spc->ticker, csched_tick, (void *)(unsigned long)cpu, cpu);
+
+ if ( prv == csched_priv0 )
+ prv->master = first_cpu(prv->cpus);
+
INIT_LIST_HEAD(&spc->runq);
- spc->runq_sort_last = csched_priv.runq_sort;
+ spc->runq_sort_last = prv->runq_sort;
spc->idle_bias = NR_CPUS - 1;
- per_cpu(schedule_data, cpu).sched_priv = spc;
+ if ( per_cpu(schedule_data, cpu).sched_priv == NULL )
+ per_cpu(schedule_data, cpu).sched_priv = spc;
/* Start off idling... */
BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr));
- cpu_set(cpu, csched_priv.idlers);
+ cpu_set(cpu, prv->idlers);
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ spin_unlock_irqrestore(&prv->lock, flags);
- return 0;
+ return spc;
}
#ifndef NDEBUG
@@ -400,17 +448,19 @@ __csched_vcpu_is_migrateable(struct vcpu *vc, int dest_cpu)
}
static int
-_csched_cpu_pick(struct vcpu *vc, bool_t commit)
+_csched_cpu_pick(struct scheduler *ops, struct vcpu *vc, bool_t commit)
{
cpumask_t cpus;
cpumask_t idlers;
+ cpumask_t *online;
int cpu;
/*
* Pick from online CPUs in VCPU's affinity mask, giving a
* preference to its current processor if it's in there.
*/
- cpus_and(cpus, cpu_online_map, vc->cpu_affinity);
+ online = CSCHED_CPUONLINE(vc->domain->cpupool);
+ cpus_and(cpus, *online, vc->cpu_affinity);
cpu = cpu_isset(vc->processor, cpus)
? vc->processor
: cycle_cpu(vc->processor, cpus);
@@ -428,7 +478,7 @@ _csched_cpu_pick(struct vcpu *vc, bool_t commit)
* like run two VCPUs on co-hyperthreads while there are idle cores
* or sockets.
*/
- cpus_and(idlers, cpu_online_map, csched_priv.idlers);
+ cpus_and(idlers, cpu_online_map, CSCHED_PRIV(ops)->idlers);
cpu_set(cpu, idlers);
cpus_and(cpus, cpus, idlers);
cpu_clear(cpu, cpus);
@@ -474,18 +524,18 @@ _csched_cpu_pick(struct vcpu *vc, bool_t commit)
}
static int
-csched_cpu_pick(struct vcpu *vc)
+csched_cpu_pick(struct scheduler *ops, struct vcpu *vc)
{
- return _csched_cpu_pick(vc, 1);
+ return _csched_cpu_pick(ops, vc, 1);
}
static inline void
-__csched_vcpu_acct_start(struct csched_vcpu *svc)
+__csched_vcpu_acct_start(struct csched_private *prv, struct csched_vcpu *svc)
{
struct csched_dom * const sdom = svc->sdom;
unsigned long flags;
- spin_lock_irqsave(&csched_priv.lock, flags);
+ spin_lock_irqsave(&prv->lock, flags);
if ( list_empty(&svc->active_vcpu_elem) )
{
@@ -496,16 +546,17 @@ __csched_vcpu_acct_start(struct csched_vcpu *svc)
list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
if ( list_empty(&sdom->active_sdom_elem) )
{
- list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
- csched_priv.weight += sdom->weight;
+ list_add(&sdom->active_sdom_elem, &prv->active_sdom);
+ prv->weight += sdom->weight;
}
}
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ spin_unlock_irqrestore(&prv->lock, flags);
}
static inline void
-__csched_vcpu_acct_stop_locked(struct csched_vcpu *svc)
+__csched_vcpu_acct_stop_locked(struct csched_private *prv,
+ struct csched_vcpu *svc)
{
struct csched_dom * const sdom = svc->sdom;
@@ -518,16 +569,17 @@ __csched_vcpu_acct_stop_locked(struct csched_vcpu *svc)
list_del_init(&svc->active_vcpu_elem);
if ( list_empty(&sdom->active_vcpu) )
{
- BUG_ON( csched_priv.weight < sdom->weight );
+ BUG_ON( prv->weight < sdom->weight );
list_del_init(&sdom->active_sdom_elem);
- csched_priv.weight -= sdom->weight;
+ prv->weight -= sdom->weight;
}
}
static void
-csched_vcpu_acct(unsigned int cpu)
+csched_vcpu_acct(struct csched_private *prv, unsigned int cpu)
{
struct csched_vcpu * const svc = CSCHED_VCPU(current);
+ struct scheduler *ops = per_cpu(scheduler, cpu);
ASSERT( current->processor == cpu );
ASSERT( svc->sdom != NULL );
@@ -556,9 +608,9 @@ csched_vcpu_acct(unsigned int cpu)
*/
if ( list_empty(&svc->active_vcpu_elem) )
{
- __csched_vcpu_acct_start(svc);
+ __csched_vcpu_acct_start(prv, svc);
}
- else if ( _csched_cpu_pick(current, 0) != cpu )
+ else if ( _csched_cpu_pick(ops, current, 0) != cpu )
{
CSCHED_VCPU_STAT_CRANK(svc, migrate_r);
CSCHED_STAT_CRANK(migrate_running);
@@ -567,66 +619,75 @@ csched_vcpu_acct(unsigned int cpu)
}
}
-static int
-csched_vcpu_init(struct vcpu *vc)
+static void *
+csched_alloc_vdata(struct scheduler *ops, struct vcpu *vc, void *dd)
{
- struct domain * const dom = vc->domain;
- struct csched_dom *sdom = CSCHED_DOM(dom);
struct csched_vcpu *svc;
- CSCHED_STAT_CRANK(vcpu_init);
-
/* Allocate per-VCPU info */
svc = xmalloc(struct csched_vcpu);
if ( svc == NULL )
- return -1;
+ return NULL;
memset(svc, 0, sizeof(*svc));
INIT_LIST_HEAD(&svc->runq_elem);
INIT_LIST_HEAD(&svc->active_vcpu_elem);
- svc->sdom = sdom;
+ svc->sdom = dd;
svc->vcpu = vc;
atomic_set(&svc->credit, 0);
svc->flags = 0U;
- svc->pri = is_idle_domain(dom) ? CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER;
+ svc->pri = is_idle_domain(vc->domain) ?
+ CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER;
CSCHED_VCPU_STATS_RESET(svc);
- vc->sched_priv = svc;
+ CSCHED_STAT_CRANK(vcpu_init);
+ return svc;
+}
- /* Allocate per-PCPU info */
- if ( unlikely(!CSCHED_PCPU(vc->processor)) )
- {
- if ( csched_pcpu_init(vc->processor) != 0 )
- return -1;
- }
+static void
+csched_vcpu_insert(struct scheduler *ops, struct vcpu *vc)
+{
+ struct csched_vcpu *svc = vc->sched_priv;
- CSCHED_VCPU_CHECK(vc);
- return 0;
+ if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) && !vc->is_running )
+ __runq_insert(vc->processor, svc);
}
static void
-csched_vcpu_destroy(struct vcpu *vc)
+csched_free_vdata(struct scheduler *ops, void *priv)
{
- struct csched_vcpu * const svc = CSCHED_VCPU(vc);
- struct csched_dom * const sdom = svc->sdom;
+ struct csched_private *prv = CSCHED_PRIV(ops);
+ struct csched_vcpu *svc = priv;
unsigned long flags;
- CSCHED_STAT_CRANK(vcpu_destroy);
-
- BUG_ON( sdom == NULL );
- BUG_ON( !list_empty(&svc->runq_elem) );
+ if ( __vcpu_on_runq(svc) )
+ __runq_remove(svc);
- spin_lock_irqsave(&csched_priv.lock, flags);
+ spin_lock_irqsave(&(prv->lock), flags);
if ( !list_empty(&svc->active_vcpu_elem) )
- __csched_vcpu_acct_stop_locked(svc);
+ __csched_vcpu_acct_stop_locked(prv, svc);
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ spin_unlock_irqrestore(&(prv->lock), flags);
xfree(svc);
}
static void
-csched_vcpu_sleep(struct vcpu *vc)
+csched_vcpu_destroy(struct scheduler *ops, struct vcpu *vc)
+{
+ struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+ struct csched_dom * const sdom = svc->sdom;
+
+ CSCHED_STAT_CRANK(vcpu_destroy);
+
+ BUG_ON( sdom == NULL );
+ BUG_ON( !list_empty(&svc->runq_elem) );
+
+ csched_free_vdata(ops, svc);
+}
+
+static void
+csched_vcpu_sleep(struct scheduler *ops, struct vcpu *vc)
{
struct csched_vcpu * const svc = CSCHED_VCPU(vc);
@@ -641,7 +702,7 @@ csched_vcpu_sleep(struct vcpu *vc)
}
static void
-csched_vcpu_wake(struct vcpu *vc)
+csched_vcpu_wake(struct scheduler *ops, struct vcpu *vc)
{
struct csched_vcpu * const svc = CSCHED_VCPU(vc);
const unsigned int cpu = vc->processor;
@@ -697,10 +758,12 @@ csched_vcpu_wake(struct vcpu *vc)
static int
csched_dom_cntl(
+ struct scheduler *ops,
struct domain *d,
struct xen_domctl_scheduler_op *op)
{
struct csched_dom * const sdom = CSCHED_DOM(d);
+ struct csched_private *prv = CSCHED_PRIV(ops);
unsigned long flags;
if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
@@ -712,14 +775,14 @@ csched_dom_cntl(
{
ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo);
- spin_lock_irqsave(&csched_priv.lock, flags);
+ spin_lock_irqsave(&prv->lock, flags);
if ( op->u.credit.weight != 0 )
{
if ( !list_empty(&sdom->active_sdom_elem) )
{
- csched_priv.weight -= sdom->weight;
- csched_priv.weight += op->u.credit.weight;
+ prv->weight -= sdom->weight;
+ prv->weight += op->u.credit.weight;
}
sdom->weight = op->u.credit.weight;
}
@@ -727,25 +790,20 @@ csched_dom_cntl(
if ( op->u.credit.cap != (uint16_t)~0U )
sdom->cap = op->u.credit.cap;
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ spin_unlock_irqrestore(&prv->lock, flags);
}
return 0;
}
-static int
-csched_dom_init(struct domain *dom)
+static void *
+csched_alloc_domdata(struct scheduler *ops, struct domain *dom)
{
struct csched_dom *sdom;
- CSCHED_STAT_CRANK(dom_init);
-
- if ( is_idle_domain(dom) )
- return 0;
-
sdom = xmalloc(struct csched_dom);
if ( sdom == NULL )
- return -ENOMEM;
+ return NULL;
memset(sdom, 0, sizeof(*sdom));
/* Initialize credit and weight */
@@ -755,16 +813,40 @@ csched_dom_init(struct domain *dom)
sdom->dom = dom;
sdom->weight = CSCHED_DEFAULT_WEIGHT;
sdom->cap = 0U;
+
+ return (void *)sdom;
+}
+
+static int
+csched_dom_init(struct scheduler *ops, struct domain *dom)
+{
+ struct csched_dom *sdom;
+
+ CSCHED_STAT_CRANK(dom_init);
+
+ if ( is_idle_domain(dom) )
+ return 0;
+
+ sdom = csched_alloc_domdata(ops, dom);
+ if ( sdom == NULL )
+ return -ENOMEM;
+
dom->sched_priv = sdom;
return 0;
}
static void
-csched_dom_destroy(struct domain *dom)
+csched_free_domdata(struct scheduler *ops, void *data)
+{
+ xfree(data);
+}
+
+static void
+csched_dom_destroy(struct scheduler *ops, struct domain *dom)
{
CSCHED_STAT_CRANK(dom_destroy);
- xfree(CSCHED_DOM(dom));
+ csched_free_domdata(ops, CSCHED_DOM(dom));
}
/*
@@ -775,7 +857,7 @@ csched_dom_destroy(struct domain *dom)
* remember the last UNDER to make the move up operation O(1).
*/
static void
-csched_runq_sort(unsigned int cpu)
+csched_runq_sort(struct csched_private *prv, unsigned int cpu)
{
struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
struct list_head *runq, *elem, *next, *last_under;
@@ -783,7 +865,7 @@ csched_runq_sort(unsigned int cpu)
unsigned long flags;
int sort_epoch;
- sort_epoch = csched_priv.runq_sort;
+ sort_epoch = prv->runq_sort;
if ( sort_epoch == spc->runq_sort_last )
return;
@@ -820,6 +902,7 @@ csched_runq_sort(unsigned int cpu)
static void
csched_acct(void* dummy)
{
+ struct csched_private *prv = dummy;
unsigned long flags;
struct list_head *iter_vcpu, *next_vcpu;
struct list_head *iter_sdom, *next_sdom;
@@ -836,22 +919,22 @@ csched_acct(void* dummy)
int credit;
- spin_lock_irqsave(&csched_priv.lock, flags);
+ spin_lock_irqsave(&prv->lock, flags);
- weight_total = csched_priv.weight;
- credit_total = csched_priv.credit;
+ weight_total = prv->weight;
+ credit_total = prv->credit;
/* Converge balance towards 0 when it drops negative */
- if ( csched_priv.credit_balance < 0 )
+ if ( prv->credit_balance < 0 )
{
- credit_total -= csched_priv.credit_balance;
+ credit_total -= prv->credit_balance;
CSCHED_STAT_CRANK(acct_balance);
}
if ( unlikely(weight_total == 0) )
{
- csched_priv.credit_balance = 0;
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ prv->credit_balance = 0;
+ spin_unlock_irqrestore(&prv->lock, flags);
CSCHED_STAT_CRANK(acct_no_work);
goto out;
}
@@ -863,7 +946,7 @@ csched_acct(void* dummy)
credit_xtra = 0;
credit_cap = 0U;
- list_for_each_safe( iter_sdom, next_sdom, &csched_priv.active_sdom )
+ list_for_each_safe( iter_sdom, next_sdom, &prv->active_sdom )
{
sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
@@ -883,9 +966,9 @@ csched_acct(void* dummy)
* only when the system-wide credit balance is negative.
*/
credit_peak = sdom->active_vcpu_count * CSCHED_CREDITS_PER_ACCT;
- if ( csched_priv.credit_balance < 0 )
+ if ( prv->credit_balance < 0 )
{
- credit_peak += ( ( -csched_priv.credit_balance * sdom->weight) +
+ credit_peak += ( ( -prv->credit_balance * sdom->weight) +
(weight_total - 1)
) / weight_total;
}
@@ -927,7 +1010,7 @@ csched_acct(void* dummy)
*/
CSCHED_STAT_CRANK(acct_reorder);
list_del(&sdom->active_sdom_elem);
- list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
+ list_add(&sdom->active_sdom_elem, &prv->active_sdom);
}
credit_fair = credit_peak;
@@ -993,7 +1076,7 @@ csched_acct(void* dummy)
/* Upper bound on credits means VCPU stops earning */
if ( credit > CSCHED_CREDITS_PER_TSLICE )
{
- __csched_vcpu_acct_stop_locked(svc);
+ __csched_vcpu_acct_stop_locked(prv, svc);
credit = 0;
atomic_set(&svc->credit, credit);
}
@@ -1005,15 +1088,15 @@ csched_acct(void* dummy)
}
}
- csched_priv.credit_balance = credit_balance;
+ prv->credit_balance = credit_balance;
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ spin_unlock_irqrestore(&prv->lock, flags);
/* Inform each CPU that its runq needs to be sorted */
- csched_priv.runq_sort++;
+ prv->runq_sort++;
out:
- set_timer( &csched_priv.master_ticker, NOW() +
+ set_timer( &prv->master_ticker, NOW() +
MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT );
}
@@ -1022,6 +1105,7 @@ csched_tick(void *_cpu)
{
unsigned int cpu = (unsigned long)_cpu;
struct csched_pcpu *spc = CSCHED_PCPU(cpu);
+ struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));
spc->tick++;
@@ -1029,7 +1113,7 @@ csched_tick(void *_cpu)
* Accounting for running VCPU
*/
if ( !is_idle_vcpu(current) )
- csched_vcpu_acct(cpu);
+ csched_vcpu_acct(prv, cpu);
/*
* Check if runq needs to be sorted
@@ -1038,7 +1122,7 @@ csched_tick(void *_cpu)
* modified priorities. This is a special O(n) sort and runs at most
* once per accounting period (currently 30 milliseconds).
*/
- csched_runq_sort(cpu);
+ csched_runq_sort(prv, cpu);
set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK));
}
@@ -1091,16 +1175,19 @@ csched_runq_steal(int peer_cpu, int cpu, int pri)
}
static struct csched_vcpu *
-csched_load_balance(int cpu, struct csched_vcpu *snext)
+csched_load_balance(struct csched_private *prv, int cpu,
+ struct csched_vcpu *snext)
{
struct csched_vcpu *speer;
cpumask_t workers;
+ cpumask_t *online;
int peer_cpu;
BUG_ON( cpu != snext->vcpu->processor );
+ online = CSCHED_CPUONLINE(per_cpu(cpupool, cpu));
/* If this CPU is going offline we shouldn't steal work. */
- if ( unlikely(!cpu_online(cpu)) )
+ if ( unlikely(!cpu_isset(cpu, *online)) )
goto out;
if ( snext->pri == CSCHED_PRI_IDLE )
@@ -1114,7 +1201,7 @@ csched_load_balance(int cpu, struct csched_vcpu *snext)
* Peek at non-idling CPUs in the system, starting with our
* immediate neighbour.
*/
- cpus_andnot(workers, cpu_online_map, csched_priv.idlers);
+ cpus_andnot(workers, *online, prv->idlers);
cpu_clear(cpu, workers);
peer_cpu = cpu;
@@ -1156,11 +1243,12 @@ csched_load_balance(int cpu, struct csched_vcpu *snext)
* fast for the common case.
*/
static struct task_slice
-csched_schedule(s_time_t now)
+csched_schedule(struct scheduler *ops, s_time_t now)
{
const int cpu = smp_processor_id();
struct list_head * const runq = RUNQ(cpu);
struct csched_vcpu * const scurr = CSCHED_VCPU(current);
+ struct csched_private *prv = CSCHED_PRIV(ops);
struct csched_vcpu *snext;
struct task_slice ret;
@@ -1207,7 +1295,7 @@ csched_schedule(s_time_t now)
if ( snext->pri > CSCHED_PRI_TS_OVER )
__runq_remove(snext);
else
- snext = csched_load_balance(cpu, snext);
+ snext = csched_load_balance(prv, cpu, snext);
/*
* Update idlers mask if necessary. When we're idling, other CPUs
@@ -1215,12 +1303,12 @@ csched_schedule(s_time_t now)
*/
if ( snext->pri == CSCHED_PRI_IDLE )
{
- if ( !cpu_isset(cpu, csched_priv.idlers) )
- cpu_set(cpu, csched_priv.idlers);
+ if ( !cpu_isset(cpu, prv->idlers) )
+ cpu_set(cpu, prv->idlers);
}
- else if ( cpu_isset(cpu, csched_priv.idlers) )
+ else if ( cpu_isset(cpu, prv->idlers) )
{
- cpu_clear(cpu, csched_priv.idlers);
+ cpu_clear(cpu, prv->idlers);
}
if ( !is_idle_vcpu(snext->vcpu) )
@@ -1267,7 +1355,7 @@ csched_dump_vcpu(struct csched_vcpu *svc)
}
static void
-csched_dump_pcpu(int cpu)
+csched_dump_pcpu(struct scheduler *ops, int cpu)
{
struct list_head *runq, *iter;
struct csched_pcpu *spc;
@@ -1305,9 +1393,10 @@ csched_dump_pcpu(int cpu)
}
static void
-csched_dump(void)
+csched_dump(struct scheduler *ops)
{
struct list_head *iter_sdom, *iter_svc;
+ struct csched_private *prv = CSCHED_PRIV(ops);
int loop;
#define idlers_buf keyhandler_scratch
@@ -1324,12 +1413,12 @@ csched_dump(void)
"\tticks per tslice = %d\n"
"\tticks per acct = %d\n"
"\tmigration delay = %uus\n",
- csched_priv.ncpus,
- csched_priv.master,
- csched_priv.credit,
- csched_priv.credit_balance,
- csched_priv.weight,
- csched_priv.runq_sort,
+ prv->ncpus,
+ prv->master,
+ prv->credit,
+ prv->credit_balance,
+ prv->weight,
+ prv->runq_sort,
CSCHED_DEFAULT_WEIGHT,
CSCHED_MSECS_PER_TICK,
CSCHED_CREDITS_PER_MSEC,
@@ -1337,12 +1426,12 @@ csched_dump(void)
CSCHED_TICKS_PER_ACCT,
vcpu_migration_delay);
- cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), csched_priv.idlers);
+ cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), prv->idlers);
printk("idlers: %s\n", idlers_buf);
printk("active vcpus:\n");
loop = 0;
- list_for_each( iter_sdom, &csched_priv.active_sdom )
+ list_for_each( iter_sdom, &prv->active_sdom )
{
struct csched_dom *sdom;
sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
@@ -1359,18 +1448,30 @@ csched_dump(void)
#undef idlers_buf
}
-static void
-csched_init(void)
+static int
+csched_init(struct scheduler *ops, int pool0)
{
- spin_lock_init(&csched_priv.lock);
- INIT_LIST_HEAD(&csched_priv.active_sdom);
- csched_priv.ncpus = 0;
- csched_priv.master = UINT_MAX;
- cpus_clear(csched_priv.idlers);
- csched_priv.weight = 0U;
- csched_priv.credit = 0U;
- csched_priv.credit_balance = 0;
- csched_priv.runq_sort = 0U;
+ struct csched_private *prv;
+
+ prv = xmalloc(struct csched_private);
+ if ( prv == NULL )
+ return 1;
+ memset(prv, 0, sizeof(*prv));
+ if ( pool0 )
+ csched_priv0 = prv;
+ ops->sched_data = prv;
+ spin_lock_init(&prv->lock);
+ INIT_LIST_HEAD(&prv->active_sdom);
+ prv->ncpus = 0;
+ prv->master = UINT_MAX;
+ cpus_clear(prv->idlers);
+ prv->weight = 0U;
+ prv->credit = 0U;
+ prv->credit_balance = 0;
+ prv->runq_sort = 0U;
+ prv->ticker_active = (csched_priv0 == prv) ? 0 : 1;
+
+ return 0;
}
/* Tickers cannot be kicked until SMP subsystem is alive. */
@@ -1380,54 +1481,83 @@ static __init int csched_start_tickers(void)
unsigned int cpu;
/* Is the credit scheduler initialised? */
- if ( csched_priv.ncpus == 0 )
+ if ( (csched_priv0 == NULL) || (csched_priv0->ncpus == 0) )
return 0;
+ csched_priv0->ticker_active = 1;
+
for_each_online_cpu ( cpu )
{
spc = CSCHED_PCPU(cpu);
set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK));
}
- init_timer( &csched_priv.master_ticker, csched_acct, NULL,
- csched_priv.master);
+ init_timer( &csched_priv0->master_ticker, csched_acct, csched_priv0,
+ csched_priv0->master);
- set_timer( &csched_priv.master_ticker, NOW() +
+ set_timer( &csched_priv0->master_ticker, NOW() +
MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT );
return 0;
}
__initcall(csched_start_tickers);
-static void csched_tick_suspend(void)
+static void
+csched_deinit(struct scheduler *ops)
+{
+ struct csched_private *prv;
+
+ prv = CSCHED_PRIV(ops);
+ if ( prv != NULL )
+ xfree(prv);
+}
+
+static void csched_tick_suspend(struct scheduler *ops, unsigned int cpu)
{
struct csched_pcpu *spc;
- spc = CSCHED_PCPU(smp_processor_id());
+ spc = CSCHED_PCPU(cpu);
stop_timer(&spc->ticker);
}
-static void csched_tick_resume(void)
+static void csched_tick_resume(struct scheduler *ops, unsigned int cpu)
{
struct csched_pcpu *spc;
uint64_t now = NOW();
+ struct csched_private *prv;
+
+ prv = CSCHED_PRIV(ops);
+ if ( !prv->ticker_active )
+ return;
- spc = CSCHED_PCPU(smp_processor_id());
+
+ spc = CSCHED_PCPU(cpu);
set_timer(&spc->ticker, now + MILLISECS(CSCHED_MSECS_PER_TICK)
- now % MILLISECS(CSCHED_MSECS_PER_TICK) );
+
+ if ( (prv->ticker_active == 2) && (prv->master == cpu) )
+ {
+ set_timer( &prv->master_ticker, now +
+ MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT -
+ now % MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT);
+ prv->ticker_active = 1;
+ }
}
-const struct scheduler sched_credit_def = {
+static struct csched_private _csched_priv;
+
+struct scheduler sched_credit_def = {
.name = "SMP Credit Scheduler",
.opt_name = "credit",
.sched_id = XEN_SCHEDULER_CREDIT,
+ .sched_data = &_csched_priv,
.init_domain = csched_dom_init,
.destroy_domain = csched_dom_destroy,
- .init_vcpu = csched_vcpu_init,
+ .insert_vcpu = csched_vcpu_insert,
.destroy_vcpu = csched_vcpu_destroy,
.sleep = csched_vcpu_sleep,
@@ -1441,6 +1571,13 @@ const struct scheduler sched_credit_def = {
.dump_cpu_state = csched_dump_pcpu,
.dump_settings = csched_dump,
.init = csched_init,
+ .deinit = csched_deinit,
+ .alloc_vdata = csched_alloc_vdata,
+ .free_vdata = csched_free_vdata,
+ .alloc_pdata = csched_alloc_pdata,
+ .free_pdata = csched_free_pdata,
+ .alloc_domdata = csched_alloc_domdata,
+ .free_domdata = csched_free_domdata,
.tick_suspend = csched_tick_suspend,
.tick_resume = csched_tick_resume,
diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 9556429444..9cc4a31e2e 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -149,12 +149,16 @@
/*
* Useful macros
*/
+#define CSCHED_PRIV(_ops) \
+ ((struct csched_private *)((_ops)->sched_data))
#define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv)
#define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv)
+#define CSCHED_CPUONLINE(_pool) \
+ (((_pool) == NULL) ? &cpupool_free_cpus : &(_pool)->cpu_valid)
/* CPU to runq_id macro */
-#define c2r(_cpu) (csched_priv.runq_map[(_cpu)])
+#define c2r(_ops, _cpu) (CSCHED_PRIV(_ops)->runq_map[(_cpu)])
/* CPU to runqueue struct macro */
-#define RQD(_cpu) (&csched_priv.rqd[c2r(_cpu)])
+#define RQD(_ops, _cpu) (&CSCHED_PRIV(_ops)->rqd[c2r(_ops, _cpu)])
/*
* Per-runqueue data
@@ -214,11 +218,6 @@ struct csched_dom {
/*
- * Global variables
- */
-static struct csched_private csched_priv;
-
-/*
* Time-to-credit, credit-to-time.
* FIXME: Do pre-calculated division?
*/
@@ -284,15 +283,15 @@ __runq_insert(struct list_head *runq, struct csched_vcpu *svc)
}
static void
-runq_insert(unsigned int cpu, struct csched_vcpu *svc)
+runq_insert(struct scheduler *ops, unsigned int cpu, struct csched_vcpu *svc)
{
- struct list_head * runq = &RQD(cpu)->runq;
+ struct list_head * runq = &RQD(ops, cpu)->runq;
int pos = 0;
ASSERT( spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock) );
BUG_ON( __vcpu_on_runq(svc) );
- BUG_ON( c2r(cpu) != c2r(svc->vcpu->processor) );
+ BUG_ON( c2r(ops, cpu) != c2r(ops, svc->vcpu->processor) );
pos = __runq_insert(runq, svc);
@@ -324,11 +323,12 @@ void burn_credits(struct csched_runqueue_data *rqd, struct csched_vcpu *, s_time
/* Check to see if the item on the runqueue is higher priority than what's
* currently running; if so, wake up the processor */
static /*inline*/ void
-runq_tickle(unsigned int cpu, struct csched_vcpu *new, s_time_t now)
+runq_tickle(struct scheduler *ops, unsigned int cpu, struct csched_vcpu *new, s_time_t now)
{
int i, ipid=-1;
s_time_t lowest=(1<<30);
- struct csched_runqueue_data *rqd = RQD(cpu);
+ struct csched_runqueue_data *rqd = RQD(ops, cpu);
+ cpumask_t *online;
d2printk("rqt d%dv%d cd%dv%d\n",
new->vcpu->domain->domain_id,
@@ -336,13 +336,14 @@ runq_tickle(unsigned int cpu, struct csched_vcpu *new, s_time_t now)
current->domain->domain_id,
current->vcpu_id);
+ online = CSCHED_CPUONLINE(per_cpu(cpupool, cpu));
/* Find the cpu in this queue group that has the lowest credits */
for ( i=rqd->cpu_min ; i < rqd->cpu_max ; i++ )
{
struct csched_vcpu * cur;
/* Skip cpus that aren't online */
- if ( !cpu_online(i) )
+ if ( !cpu_isset(i, *online) )
continue;
cur = CSCHED_VCPU(per_cpu(schedule_data, i).curr);
@@ -396,11 +397,11 @@ runq_tickle(unsigned int cpu, struct csched_vcpu *new, s_time_t now)
/*
* Credit-related code
*/
-static void reset_credit(int cpu, s_time_t now)
+static void reset_credit(struct scheduler *ops, int cpu, s_time_t now)
{
struct list_head *iter;
- list_for_each( iter, &RQD(cpu)->svc )
+ list_for_each( iter, &RQD(ops, cpu)->svc )
{
struct csched_vcpu * svc = list_entry(iter, struct csched_vcpu, rqd_elem);
@@ -521,90 +522,112 @@ __csched_vcpu_check(struct vcpu *vc)
#define CSCHED_VCPU_CHECK(_vc)
#endif
-static int
-csched_vcpu_init(struct vcpu *vc)
+static void *
+csched_alloc_vdata(struct scheduler *ops, struct vcpu *vc, void *dd)
{
- struct domain * const dom = vc->domain;
- struct csched_dom *sdom = CSCHED_DOM(dom);
struct csched_vcpu *svc;
- printk("%s: Initializing d%dv%d\n",
- __func__, dom->domain_id, vc->vcpu_id);
-
/* Allocate per-VCPU info */
svc = xmalloc(struct csched_vcpu);
if ( svc == NULL )
- return -1;
+ return NULL;
+ memset(svc, 0, sizeof(*svc));
INIT_LIST_HEAD(&svc->rqd_elem);
INIT_LIST_HEAD(&svc->sdom_elem);
INIT_LIST_HEAD(&svc->runq_elem);
- svc->sdom = sdom;
+ svc->sdom = dd;
svc->vcpu = vc;
svc->flags = 0U;
- vc->sched_priv = svc;
if ( ! is_idle_vcpu(vc) )
{
- BUG_ON( sdom == NULL );
+ BUG_ON( svc->sdom == NULL );
svc->credit = CSCHED_CREDIT_INIT;
- svc->weight = sdom->weight;
+ svc->weight = svc->sdom->weight;
+ }
+ else
+ {
+ BUG_ON( svc->sdom != NULL );
+ svc->credit = CSCHED_IDLE_CREDIT;
+ svc->weight = 0;
+ }
+
+ return svc;
+}
+
+static void
+csched_vcpu_insert(struct scheduler *ops, struct vcpu *vc)
+{
+ struct csched_vcpu *svc = vc->sched_priv;
+ struct domain * const dom = vc->domain;
+ struct csched_dom *sdom = CSCHED_DOM(dom);
+
+ printk("%s: Inserting d%dv%d\n",
+ __func__, dom->domain_id, vc->vcpu_id);
+ if ( ! is_idle_vcpu(vc) )
+ {
/* FIXME: Do we need the private lock here? */
- list_add_tail(&svc->sdom_elem, &sdom->vcpu);
+ list_add_tail(&svc->sdom_elem, &svc->sdom->vcpu);
/* Add vcpu to runqueue of initial processor */
/* FIXME: Abstract for multiple runqueues */
vcpu_schedule_lock_irq(vc);
- list_add_tail(&svc->rqd_elem, &RQD(vc->processor)->svc);
- update_max_weight(RQD(vc->processor), svc->weight, 0);
+ list_add_tail(&svc->rqd_elem, &RQD(ops, vc->processor)->svc);
+ update_max_weight(RQD(ops, vc->processor), svc->weight, 0);
vcpu_schedule_unlock_irq(vc);
sdom->nr_vcpus++;
}
- else
- {
- BUG_ON( sdom != NULL );
- svc->credit = CSCHED_IDLE_CREDIT;
- svc->weight = 0;
- }
CSCHED_VCPU_CHECK(vc);
- return 0;
}
static void
-csched_vcpu_destroy(struct vcpu *vc)
+csched_free_vdata(struct scheduler *ops, void *priv)
{
- struct csched_vcpu * const svc = CSCHED_VCPU(vc);
- struct csched_dom * const sdom = svc->sdom;
-
- BUG_ON( sdom == NULL );
- BUG_ON( !list_empty(&svc->runq_elem) );
+ struct csched_vcpu *svc = priv;
+ struct vcpu *vc = svc->vcpu;
- /* Remove from runqueue */
- vcpu_schedule_lock_irq(vc);
+ if ( ! is_idle_vcpu(vc) )
+ {
+ /* Remove from runqueue */
+ vcpu_schedule_lock_irq(vc);
- list_del_init(&svc->rqd_elem);
- update_max_weight(RQD(vc->processor), 0, svc->weight);
+ list_del_init(&svc->rqd_elem);
+ update_max_weight(RQD(ops, vc->processor), 0, svc->weight);
- vcpu_schedule_unlock_irq(vc);
+ vcpu_schedule_unlock_irq(vc);
- /* Remove from sdom list. Don't need a lock for this, as it's called
- * syncronously when nothing else can happen. */
- list_del_init(&svc->sdom_elem);
+ /* Remove from sdom list. Don't need a lock for this, as it's called
+ * syncronously when nothing else can happen. */
+ list_del_init(&svc->sdom_elem);
- sdom->nr_vcpus--;
+ svc->sdom->nr_vcpus--;
+ }
xfree(svc);
}
static void
-csched_vcpu_sleep(struct vcpu *vc)
+csched_vcpu_destroy(struct scheduler *ops, struct vcpu *vc)
+{
+ struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+ struct csched_dom * const sdom = svc->sdom;
+
+ BUG_ON( sdom == NULL );
+ BUG_ON( !list_empty(&svc->runq_elem) );
+
+ csched_free_vdata(ops, svc);
+}
+
+static void
+csched_vcpu_sleep(struct scheduler *ops, struct vcpu *vc)
{
struct csched_vcpu * const svc = CSCHED_VCPU(vc);
@@ -617,7 +640,7 @@ csched_vcpu_sleep(struct vcpu *vc)
}
static void
-csched_vcpu_wake(struct vcpu *vc)
+csched_vcpu_wake(struct scheduler *ops, struct vcpu *vc)
{
struct csched_vcpu * const svc = CSCHED_VCPU(vc);
const unsigned int cpu = vc->processor;
@@ -654,8 +677,8 @@ csched_vcpu_wake(struct vcpu *vc)
now = NOW();
/* Put the VCPU on the runq */
- runq_insert(cpu, svc);
- runq_tickle(cpu, svc, now);
+ runq_insert(ops, cpu, svc);
+ runq_tickle(ops, cpu, svc, now);
out:
d2printk("w-\n");
@@ -663,7 +686,7 @@ out:
}
static void
-csched_context_saved(struct vcpu *vc)
+csched_context_saved(struct scheduler *ops, struct vcpu *vc)
{
struct csched_vcpu * const svc = CSCHED_VCPU(vc);
@@ -688,15 +711,15 @@ csched_context_saved(struct vcpu *vc)
BUG_ON(__vcpu_on_runq(svc));
- runq_insert(cpu, svc);
- runq_tickle(cpu, svc, NOW());
+ runq_insert(ops, cpu, svc);
+ runq_tickle(ops, cpu, svc, NOW());
}
vcpu_schedule_unlock_irq(vc);
}
static int
-csched_cpu_pick(struct vcpu *vc)
+csched_cpu_pick(struct scheduler *ops, struct vcpu *vc)
{
/* FIXME: Chose a schedule group based on load */
/* FIXME: Migrate the vcpu to the new runqueue list, updating
@@ -706,10 +729,12 @@ csched_cpu_pick(struct vcpu *vc)
static int
csched_dom_cntl(
+ struct scheduler *ops,
struct domain *d,
struct xen_domctl_scheduler_op *op)
{
struct csched_dom * const sdom = CSCHED_DOM(d);
+ struct csched_private *prv = CSCHED_PRIV(ops);
unsigned long flags;
if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
@@ -727,7 +752,7 @@ csched_dom_cntl(
/* Must hold csched_priv lock to update sdom, runq lock to
* update csvcs. */
- spin_lock_irqsave(&csched_priv.lock, flags);
+ spin_lock_irqsave(&prv->lock, flags);
old_weight = sdom->weight;
@@ -744,32 +769,28 @@ csched_dom_cntl(
vcpu_schedule_lock_irq(svc->vcpu);
svc->weight = sdom->weight;
- update_max_weight(RQD(svc->vcpu->processor), svc->weight, old_weight);
+ update_max_weight(RQD(ops, svc->vcpu->processor), svc->weight, old_weight);
vcpu_schedule_unlock_irq(svc->vcpu);
}
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ spin_unlock_irqrestore(&prv->lock, flags);
}
}
return 0;
}
-static int
-csched_dom_init(struct domain *dom)
+static void *
+csched_alloc_domdata(struct scheduler *ops, struct domain *dom)
{
struct csched_dom *sdom;
int flags;
- printk("%s: Initializing domain %d\n", __func__, dom->domain_id);
-
- if ( is_idle_domain(dom) )
- return 0;
-
sdom = xmalloc(struct csched_dom);
if ( sdom == NULL )
- return -ENOMEM;
+ return NULL;
+ memset(sdom, 0, sizeof(*sdom));
/* Initialize credit and weight */
INIT_LIST_HEAD(&sdom->vcpu);
@@ -778,40 +799,65 @@ csched_dom_init(struct domain *dom)
sdom->weight = CSCHED_DEFAULT_WEIGHT;
sdom->nr_vcpus = 0;
- dom->sched_priv = sdom;
+ spin_lock_irqsave(&CSCHED_PRIV(ops)->lock, flags);
- spin_lock_irqsave(&csched_priv.lock, flags);
+ list_add_tail(&sdom->sdom_elem, &CSCHED_PRIV(ops)->sdom);
- list_add_tail(&sdom->sdom_elem, &csched_priv.sdom);
+ spin_unlock_irqrestore(&CSCHED_PRIV(ops)->lock, flags);
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ return (void *)sdom;
+}
+
+static int
+csched_dom_init(struct scheduler *ops, struct domain *dom)
+{
+ struct csched_dom *sdom;
+
+ printk("%s: Initializing domain %d\n", __func__, dom->domain_id);
+
+ if ( is_idle_domain(dom) )
+ return 0;
+
+ sdom = csched_alloc_domdata(ops, dom);
+ if ( sdom == NULL )
+ return -ENOMEM;
+
+ dom->sched_priv = sdom;
return 0;
}
static void
-csched_dom_destroy(struct domain *dom)
+csched_free_domdata(struct scheduler *ops, void *data)
{
- struct csched_dom *sdom = CSCHED_DOM(dom);
int flags;
+ struct csched_dom *sdom = data;
- BUG_ON(!list_empty(&sdom->vcpu));
-
- spin_lock_irqsave(&csched_priv.lock, flags);
+ spin_lock_irqsave(&CSCHED_PRIV(ops)->lock, flags);
list_del_init(&sdom->sdom_elem);
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ spin_unlock_irqrestore(&CSCHED_PRIV(ops)->lock, flags);
+
+ xfree(data);
+}
+
+static void
+csched_dom_destroy(struct scheduler *ops, struct domain *dom)
+{
+ struct csched_dom *sdom = CSCHED_DOM(dom);
+
+ BUG_ON(!list_empty(&sdom->vcpu));
- xfree(CSCHED_DOM(dom));
+ csched_free_domdata(ops, CSCHED_DOM(dom));
}
/* How long should we let this vcpu run for? */
static s_time_t
-csched_runtime(int cpu, struct csched_vcpu *snext)
+csched_runtime(struct scheduler *ops, int cpu, struct csched_vcpu *snext)
{
s_time_t time = CSCHED_MAX_TIMER;
- struct csched_runqueue_data *rqd = RQD(cpu);
+ struct csched_runqueue_data *rqd = RQD(ops, cpu);
struct list_head *runq = &rqd->runq;
if ( is_idle_vcpu(snext->vcpu) )
@@ -851,10 +897,10 @@ void __dump_execstate(void *unused);
* fast for the common case.
*/
static struct task_slice
-csched_schedule(s_time_t now)
+csched_schedule(struct scheduler *ops, s_time_t now)
{
const int cpu = smp_processor_id();
- struct csched_runqueue_data *rqd = RQD(cpu);
+ struct csched_runqueue_data *rqd = RQD(ops, cpu);
struct list_head * const runq = &rqd->runq;
struct csched_vcpu * const scurr = CSCHED_VCPU(current);
struct csched_vcpu *snext = NULL;
@@ -927,7 +973,7 @@ csched_schedule(s_time_t now)
}
if ( !is_idle_vcpu(snext->vcpu) && snext->credit <= CSCHED_CREDIT_RESET )
- reset_credit(cpu, now);
+ reset_credit(ops, cpu, now);
#if 0
/*
@@ -955,7 +1001,7 @@ csched_schedule(s_time_t now)
/*
* Return task to run next...
*/
- ret.time = csched_runtime(cpu, snext);
+ ret.time = csched_runtime(ops, cpu, snext);
ret.task = snext->vcpu;
CSCHED_VCPU_CHECK(ret.task);
@@ -977,7 +1023,7 @@ csched_dump_vcpu(struct csched_vcpu *svc)
}
static void
-csched_dump_pcpu(int cpu)
+csched_dump_pcpu(struct scheduler *ops, int cpu)
{
struct list_head *runq, *iter;
struct csched_vcpu *svc;
@@ -986,7 +1032,7 @@ csched_dump_pcpu(int cpu)
/* FIXME: Do locking properly for access to runqueue structures */
- runq = &RQD(cpu)->runq;
+ runq = &RQD(ops, cpu)->runq;
cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_sibling_map,cpu));
printk(" sibling=%s, ", cpustr);
@@ -1014,22 +1060,23 @@ csched_dump_pcpu(int cpu)
}
static void
-csched_dump(void)
+csched_dump(struct scheduler *ops)
{
struct list_head *iter_sdom, *iter_svc;
+ struct csched_private *prv = CSCHED_PRIV(ops);
int loop;
printk("info:\n"
"\tncpus = %u\n"
"\tdefault-weight = %d\n",
- csched_priv.ncpus,
+ prv->ncpus,
CSCHED_DEFAULT_WEIGHT);
/* FIXME: Locking! */
printk("active vcpus:\n");
loop = 0;
- list_for_each( iter_sdom, &csched_priv.sdom )
+ list_for_each( iter_sdom, &prv->sdom )
{
struct csched_dom *sdom;
sdom = list_entry(iter_sdom, struct csched_dom, sdom_elem);
@@ -1046,42 +1093,49 @@ csched_dump(void)
}
static void
-make_runq_map(void)
+make_runq_map(struct scheduler *ops)
{
int cpu, cpu_count=0;
+ struct csched_private *prv = CSCHED_PRIV(ops);
/* FIXME: Read pcpu layout and do this properly */
for_each_possible_cpu( cpu )
{
- csched_priv.runq_map[cpu] = 0;
+ prv->runq_map[cpu] = 0;
cpu_count++;
}
- csched_priv.runq_count = 1;
+ prv->runq_count = 1;
/* Move to the init code...? */
- csched_priv.rqd[0].cpu_min = 0;
- csched_priv.rqd[0].cpu_max = cpu_count;
+ prv->rqd[0].cpu_min = 0;
+ prv->rqd[0].cpu_max = cpu_count;
}
-static void
-csched_init(void)
+static int
+csched_init(struct scheduler *ops, int pool0)
{
int i;
+ struct csched_private *prv;
printk("Initializing Credit2 scheduler\n" \
" WARNING: This is experimental software in development.\n" \
" Use at your own risk.\n");
- spin_lock_init(&csched_priv.lock);
- INIT_LIST_HEAD(&csched_priv.sdom);
+ prv = xmalloc(struct csched_private);
+ if ( prv == NULL )
+ return 1;
+ memset(prv, 0, sizeof(*prv));
+
+ spin_lock_init(&prv->lock);
+ INIT_LIST_HEAD(&prv->sdom);
- csched_priv.ncpus = 0;
+ prv->ncpus = 0;
- make_runq_map();
+ make_runq_map(ops);
- for ( i=0; i<csched_priv.runq_count ; i++ )
+ for ( i=0; i<prv->runq_count ; i++ )
{
- struct csched_runqueue_data *rqd = csched_priv.rqd + i;
+ struct csched_runqueue_data *rqd = prv->rqd + i;
rqd->max_weight = 1;
rqd->id = i;
@@ -1096,24 +1150,40 @@ csched_init(void)
spinlock_t *lock;
/* Point the per-cpu schedule lock to the runq_id lock */
- runq_id = csched_priv.runq_map[i];
+ runq_id = prv->runq_map[i];
lock = &per_cpu(schedule_data, runq_id)._lock;
per_cpu(schedule_data, i).schedule_lock = lock;
- csched_priv.ncpus++;
+ prv->ncpus++;
}
+
+ return 0;
+}
+
+static void
+csched_deinit(struct scheduler *ops)
+{
+ struct csched_private *prv;
+
+ prv = CSCHED_PRIV(ops);
+ if ( prv != NULL )
+ xfree(prv);
}
+
+static struct csched_private _csched_priv;
+
struct scheduler sched_credit2_def = {
.name = "SMP Credit Scheduler rev2",
.opt_name = "credit2",
.sched_id = XEN_SCHEDULER_CREDIT2,
+ .sched_data = &_csched_priv,
.init_domain = csched_dom_init,
.destroy_domain = csched_dom_destroy,
- .init_vcpu = csched_vcpu_init,
+ .insert_vcpu = csched_vcpu_insert,
.destroy_vcpu = csched_vcpu_destroy,
.sleep = csched_vcpu_sleep,
@@ -1128,4 +1198,9 @@ struct scheduler sched_credit2_def = {
.dump_cpu_state = csched_dump_pcpu,
.dump_settings = csched_dump,
.init = csched_init,
+ .deinit = csched_deinit,
+ .alloc_vdata = csched_alloc_vdata,
+ .free_vdata = csched_free_vdata,
+ .alloc_domdata = csched_alloc_domdata,
+ .free_domdata = csched_free_domdata,
};
diff --git a/xen/common/sched_sedf.c b/xen/common/sched_sedf.c
index e2722ea3c2..0defd97f31 100644
--- a/xen/common/sched_sedf.c
+++ b/xen/common/sched_sedf.c
@@ -21,6 +21,9 @@
printk(_a ); \
} while ( 0 )
+#define SEDF_CPUONLINE(_pool) \
+ (((_pool) == NULL) ? &cpupool_free_cpus : &(_pool)->cpu_valid)
+
#ifndef NDEBUG
#define SEDF_STATS
#define CHECK(_p) \
@@ -132,7 +135,7 @@ struct sedf_cpu_info {
#define sedf_runnable(edom) (!(EDOM_INFO(edom)->status & SEDF_ASLEEP))
-static void sedf_dump_cpu_state(int i);
+static void sedf_dump_cpu_state(struct scheduler *ops, int i);
static inline int extraq_on(struct vcpu *d, int i)
{
@@ -329,30 +332,17 @@ static inline void __add_to_runqueue_sort(struct vcpu *v)
}
-static int sedf_init_vcpu(struct vcpu *v)
+static void *sedf_alloc_vdata(struct scheduler *ops, struct vcpu *v, void *dd)
{
struct sedf_vcpu_info *inf;
- if ( (v->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
- return -1;
- memset(v->sched_priv, 0, sizeof(struct sedf_vcpu_info));
+ inf = xmalloc(struct sedf_vcpu_info);
+ if ( inf == NULL )
+ return NULL;
- inf = EDOM_INFO(v);
+ memset(inf, 0, sizeof(struct sedf_vcpu_info));
inf->vcpu = v;
-
- /* Allocate per-CPU context if this is the first domain to be added. */
- if ( unlikely(per_cpu(schedule_data, v->processor).sched_priv == NULL) )
- {
- per_cpu(schedule_data, v->processor).sched_priv =
- xmalloc(struct sedf_cpu_info);
- BUG_ON(per_cpu(schedule_data, v->processor).sched_priv == NULL);
- memset(CPU_INFO(v->processor), 0, sizeof(*CPU_INFO(v->processor)));
- INIT_LIST_HEAD(WAITQ(v->processor));
- INIT_LIST_HEAD(RUNQ(v->processor));
- INIT_LIST_HEAD(EXTRAQ(v->processor,EXTRA_PEN_Q));
- INIT_LIST_HEAD(EXTRAQ(v->processor,EXTRA_UTIL_Q));
- }
-
+
/* Every VCPU gets an equal share of extratime by default. */
inf->deadl_abs = 0;
inf->latency = 0;
@@ -383,39 +373,88 @@ static int sedf_init_vcpu(struct vcpu *v)
}
else
{
- EDOM_INFO(v)->deadl_abs = 0;
- EDOM_INFO(v)->status &= ~SEDF_ASLEEP;
+ inf->deadl_abs = 0;
+ inf->status &= ~SEDF_ASLEEP;
}
- return 0;
+ return inf;
+}
+
+static void *
+sedf_alloc_pdata(struct scheduler *ops, int cpu)
+{
+ struct sedf_cpu_info *spc;
+
+ spc = xmalloc(struct sedf_cpu_info);
+ BUG_ON(spc == NULL);
+ memset(spc, 0, sizeof(*spc));
+ INIT_LIST_HEAD(&spc->waitq);
+ INIT_LIST_HEAD(&spc->runnableq);
+ INIT_LIST_HEAD(&spc->extraq[EXTRA_PEN_Q]);
+ INIT_LIST_HEAD(&spc->extraq[EXTRA_UTIL_Q]);
+
+ return (void *)spc;
+}
+
+static void
+sedf_free_pdata(struct scheduler *ops, void *spc, int cpu)
+{
+ if ( spc == NULL )
+ return;
+
+ xfree(spc);
+}
+
+static void sedf_free_vdata(struct scheduler *ops, void *priv)
+{
+ xfree(priv);
}
-static void sedf_destroy_vcpu(struct vcpu *v)
+static void sedf_destroy_vcpu(struct scheduler *ops, struct vcpu *v)
{
- xfree(v->sched_priv);
+ sedf_free_vdata(ops, v->sched_priv);
}
-static int sedf_init_domain(struct domain *d)
+static void *
+sedf_alloc_domdata(struct scheduler *ops, struct domain *d)
{
- d->sched_priv = xmalloc(struct sedf_dom_info);
+ void *mem;
+
+ mem = xmalloc(struct sedf_dom_info);
+ if ( mem == NULL )
+ return NULL;
+
+ memset(mem, 0, sizeof(struct sedf_dom_info));
+
+ return mem;
+}
+
+static int sedf_init_domain(struct scheduler *ops, struct domain *d)
+{
+ d->sched_priv = sedf_alloc_domdata(ops, d);
if ( d->sched_priv == NULL )
return -ENOMEM;
- memset(d->sched_priv, 0, sizeof(struct sedf_dom_info));
-
return 0;
}
-static void sedf_destroy_domain(struct domain *d)
+static void sedf_free_domdata(struct scheduler *ops, void *data)
+{
+ xfree(data);
+}
+
+static void sedf_destroy_domain(struct scheduler *ops, struct domain *d)
{
- xfree(d->sched_priv);
+ sedf_free_domdata(ops, d->sched_priv);
}
-static int sedf_pick_cpu(struct vcpu *v)
+static int sedf_pick_cpu(struct scheduler *ops, struct vcpu *v)
{
cpumask_t online_affinity;
+ cpumask_t *online;
- cpus_and(online_affinity, v->cpu_affinity, cpu_online_map);
+ online = SEDF_CPUONLINE(v->domain->cpupool);
+ cpus_and(online_affinity, v->cpu_affinity, *online);
return first_cpu(online_affinity);
}
@@ -751,7 +790,7 @@ static struct task_slice sedf_do_extra_schedule(
-timeslice for the current period used up
-domain on waitqueue has started it's period
-and various others ;) in general: determine which domain to run next*/
-static struct task_slice sedf_do_schedule(s_time_t now)
+static struct task_slice sedf_do_schedule(struct scheduler *ops, s_time_t now)
{
int cpu = smp_processor_id();
struct list_head *runq = RUNQ(cpu);
@@ -786,6 +825,13 @@ static struct task_slice sedf_do_schedule(s_time_t now)
}
check_waitq:
update_queues(now, runq, waitq);
+
+ if ( unlikely(!cpu_isset(cpu, *SEDF_CPUONLINE(per_cpu(cpupool, cpu)))) )
+ {
+ ret.task = IDLETASK(cpu);
+ ret.time = SECONDS(1);
+ goto sched_done;
+ }
/*now simply pick the first domain from the runqueue, which has the
earliest deadline, because the list is sorted*/
@@ -824,6 +870,7 @@ static struct task_slice sedf_do_schedule(s_time_t now)
extraq, cpu);
}
+ sched_done:
/*TODO: Do something USEFUL when this happens and find out, why it
still can happen!!!*/
if ( ret.time < 0)
@@ -841,7 +888,7 @@ static struct task_slice sedf_do_schedule(s_time_t now)
}
-static void sedf_sleep(struct vcpu *d)
+static void sedf_sleep(struct scheduler *ops, struct vcpu *d)
{
PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",
d->domain->domain_id, d->vcpu_id);
@@ -1060,7 +1107,7 @@ static inline int should_switch(struct vcpu *cur,
return 1;
}
-static void sedf_wake(struct vcpu *d)
+static void sedf_wake(struct scheduler *ops, struct vcpu *d)
{
s_time_t now = NOW();
struct sedf_vcpu_info* inf = EDOM_INFO(d);
@@ -1213,8 +1260,8 @@ static void sedf_dump_domain(struct vcpu *d)
}
-/* dumps all domains on hte specified cpu */
-static void sedf_dump_cpu_state(int i)
+/* dumps all domains on the specified cpu */
+static void sedf_dump_cpu_state(struct scheduler *ops, int i)
{
struct list_head *list, *queue, *tmp;
struct sedf_vcpu_info *d_inf;
@@ -1287,7 +1334,7 @@ static void sedf_dump_cpu_state(int i)
/* Adjusts periods and slices of the domains accordingly to their weights. */
-static int sedf_adjust_weights(struct xen_domctl_scheduler_op *cmd)
+static int sedf_adjust_weights(struct cpupool *c, struct xen_domctl_scheduler_op *cmd)
{
struct vcpu *p;
struct domain *d;
@@ -1308,6 +1355,8 @@ static int sedf_adjust_weights(struct xen_domctl_scheduler_op *cmd)
rcu_read_lock(&domlist_read_lock);
for_each_domain( d )
{
+ if ( c != d->cpupool )
+ continue;
for_each_vcpu( d, p )
{
if ( EDOM_INFO(p)->weight )
@@ -1359,7 +1408,7 @@ static int sedf_adjust_weights(struct xen_domctl_scheduler_op *cmd)
/* set or fetch domain scheduling parameters */
-static int sedf_adjust(struct domain *p, struct xen_domctl_scheduler_op *op)
+static int sedf_adjust(struct scheduler *ops, struct domain *p, struct xen_domctl_scheduler_op *op)
{
struct vcpu *v;
int rc;
@@ -1369,9 +1418,6 @@ static int sedf_adjust(struct domain *p, struct xen_domctl_scheduler_op *op)
p->domain_id, op->u.sedf.period, op->u.sedf.slice,
op->u.sedf.latency, (op->u.sedf.extratime)?"yes":"no");
- if ( !p->vcpu )
- return -EINVAL;
-
if ( op->cmd == XEN_DOMCTL_SCHEDOP_putinfo )
{
/* Check for sane parameters. */
@@ -1421,7 +1467,7 @@ static int sedf_adjust(struct domain *p, struct xen_domctl_scheduler_op *op)
}
}
- rc = sedf_adjust_weights(op);
+ rc = sedf_adjust_weights(p->cpupool, op);
if ( rc )
return rc;
@@ -1449,7 +1495,7 @@ static int sedf_adjust(struct domain *p, struct xen_domctl_scheduler_op *op)
return 0;
}
-const struct scheduler sched_sedf_def = {
+struct scheduler sched_sedf_def = {
.name = "Simple EDF Scheduler",
.opt_name = "sedf",
.sched_id = XEN_SCHEDULER_SEDF,
@@ -1457,9 +1503,15 @@ const struct scheduler sched_sedf_def = {
.init_domain = sedf_init_domain,
.destroy_domain = sedf_destroy_domain,
- .init_vcpu = sedf_init_vcpu,
.destroy_vcpu = sedf_destroy_vcpu,
+ .alloc_vdata = sedf_alloc_vdata,
+ .free_vdata = sedf_free_vdata,
+ .alloc_pdata = sedf_alloc_pdata,
+ .free_pdata = sedf_free_pdata,
+ .alloc_domdata = sedf_alloc_domdata,
+ .free_domdata = sedf_free_domdata,
+
.do_schedule = sedf_do_schedule,
.pick_cpu = sedf_pick_cpu,
.dump_cpu_state = sedf_dump_cpu_state,
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 3fb5255330..9fd6e5c061 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -53,11 +53,12 @@ static void poll_timer_fn(void *data);
/* This is global for now so that private implementations can reach it */
DEFINE_PER_CPU(struct schedule_data, schedule_data);
+DEFINE_PER_CPU(struct scheduler *, scheduler);
extern const struct scheduler sched_sedf_def;
extern const struct scheduler sched_credit_def;
extern const struct scheduler sched_credit2_def;
-static const struct scheduler *__initdata schedulers[] = {
+static const struct scheduler *schedulers[] = {
&sched_sedf_def,
&sched_credit_def,
&sched_credit2_def,
@@ -66,9 +67,15 @@ static const struct scheduler *__initdata schedulers[] = {
static struct scheduler __read_mostly ops;
-#define SCHED_OP(fn, ...) \
- (( ops.fn != NULL ) ? ops.fn( __VA_ARGS__ ) \
- : (typeof(ops.fn(__VA_ARGS__)))0 )
+#define SCHED_OP(opsptr, fn, ...) \
+ (( (opsptr)->fn != NULL ) ? (opsptr)->fn(opsptr, ##__VA_ARGS__ ) \
+ : (typeof((opsptr)->fn(opsptr, ##__VA_ARGS__)))0 )
+
+#define DOM2OP(_d) (((_d)->cpupool == NULL) ? &ops : &((_d)->cpupool->sched))
+#define VCPU2OP(_v) (DOM2OP((_v)->domain))
+#define VCPU2ONLINE(_v) \
+ (((_v)->domain->cpupool == NULL) ? &cpu_online_map \
+ : &(_v)->domain->cpupool->cpu_valid)
static inline void trace_runstate_change(struct vcpu *v, int new_state)
{
@@ -209,7 +216,86 @@ int sched_init_vcpu(struct vcpu *v, unsigned int processor)
TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
- return SCHED_OP(init_vcpu, v);
+ if ( unlikely(per_cpu(schedule_data, v->processor).sched_priv == NULL) )
+ {
+ per_cpu(schedule_data, v->processor).sched_priv =
+ SCHED_OP(DOM2OP(d), alloc_pdata, processor);
+ if ( per_cpu(schedule_data, v->processor).sched_priv == NULL )
+ return 1;
+ }
+
+ v->sched_priv = SCHED_OP(DOM2OP(d), alloc_vdata, v, d->sched_priv);
+ if ( v->sched_priv == NULL )
+ return 1;
+
+ if ( is_idle_domain(d) )
+ per_cpu(schedule_data, v->processor).sched_idlevpriv = v->sched_priv;
+
+ return 0;
+}
+
+int sched_move_domain(struct domain *d, struct cpupool *c)
+{
+ struct vcpu *v;
+ unsigned int new_p;
+ void **vcpu_priv;
+ void *domdata;
+
+ domdata = SCHED_OP(&(c->sched), alloc_domdata, d);
+ if ( domdata == NULL )
+ return -ENOMEM;
+
+ vcpu_priv = xmalloc_array(void *, d->max_vcpus);
+ if ( vcpu_priv == NULL )
+ {
+ SCHED_OP(&(c->sched), free_domdata, domdata);
+ return -ENOMEM;
+ }
+
+ memset(vcpu_priv, 0, d->max_vcpus * sizeof(void *));
+ for_each_vcpu ( d, v )
+ {
+ vcpu_priv[v->vcpu_id] = SCHED_OP(&(c->sched), alloc_vdata, v, domdata);
+ if ( vcpu_priv[v->vcpu_id] == NULL )
+ {
+ for_each_vcpu ( d, v )
+ {
+ if ( vcpu_priv[v->vcpu_id] != NULL )
+ xfree(vcpu_priv[v->vcpu_id]);
+ }
+ xfree(vcpu_priv);
+ SCHED_OP(&(c->sched), free_domdata, domdata);
+ return -ENOMEM;
+ }
+ }
+
+ domain_pause(d);
+
+ new_p = first_cpu(c->cpu_valid);
+ for_each_vcpu ( d, v )
+ {
+ migrate_timer(&v->periodic_timer, new_p);
+ migrate_timer(&v->singleshot_timer, new_p);
+ migrate_timer(&v->poll_timer, new_p);
+
+ SCHED_OP(VCPU2OP(v), destroy_vcpu, v);
+
+ cpus_setall(v->cpu_affinity);
+ v->processor = new_p;
+ v->sched_priv = vcpu_priv[v->vcpu_id];
+
+ new_p = cycle_cpu(new_p, c->cpu_valid);
+ }
+
+ d->cpupool = c;
+ SCHED_OP(DOM2OP(d), free_domdata, d->sched_priv);
+ d->sched_priv = domdata;
+
+ domain_unpause(d);
+
+ xfree(vcpu_priv);
+
+ return 0;
}
void sched_destroy_vcpu(struct vcpu *v)
@@ -219,17 +305,17 @@ void sched_destroy_vcpu(struct vcpu *v)
kill_timer(&v->poll_timer);
if ( test_and_clear_bool(v->is_urgent) )
atomic_dec(&per_cpu(schedule_data, v->processor).urgent_count);
- SCHED_OP(destroy_vcpu, v);
+ SCHED_OP(VCPU2OP(v), destroy_vcpu, v);
}
int sched_init_domain(struct domain *d)
{
- return SCHED_OP(init_domain, d);
+ return SCHED_OP(DOM2OP(d), init_domain, d);
}
void sched_destroy_domain(struct domain *d)
{
- SCHED_OP(destroy_domain, d);
+ SCHED_OP(DOM2OP(d), destroy_domain, d);
}
void vcpu_sleep_nosync(struct vcpu *v)
@@ -243,7 +329,7 @@ void vcpu_sleep_nosync(struct vcpu *v)
if ( v->runstate.state == RUNSTATE_runnable )
vcpu_runstate_change(v, RUNSTATE_offline, NOW());
- SCHED_OP(sleep, v);
+ SCHED_OP(VCPU2OP(v), sleep, v);
}
vcpu_schedule_unlock_irqrestore(v, flags);
@@ -271,7 +357,7 @@ void vcpu_wake(struct vcpu *v)
{
if ( v->runstate.state >= RUNSTATE_blocked )
vcpu_runstate_change(v, RUNSTATE_runnable, NOW());
- SCHED_OP(wake, v);
+ SCHED_OP(VCPU2OP(v), wake, v);
}
else if ( !test_bit(_VPF_blocked, &v->pause_flags) )
{
@@ -326,7 +412,7 @@ static void vcpu_migrate(struct vcpu *v)
/* Select new CPU. */
old_cpu = v->processor;
- new_cpu = SCHED_OP(pick_cpu, v);
+ new_cpu = SCHED_OP(VCPU2OP(v), pick_cpu, v);
/*
* Transfer urgency status to new CPU before switching CPUs, as once
@@ -369,19 +455,29 @@ void vcpu_force_reschedule(struct vcpu *v)
}
/*
- * This function is used by cpu_hotplug code from stop_machine context.
- * Hence we can avoid needing to take certain locks.
+ * This function is used by cpu_hotplug code from stop_machine context
+ * and from cpupools to switch schedulers on a cpu.
*/
-void cpu_disable_scheduler(void)
+int cpu_disable_scheduler(unsigned int cpu)
{
struct domain *d;
struct vcpu *v;
- unsigned int cpu = smp_processor_id();
+ struct cpupool *c;
+ int ret = 0;
+
+ c = per_cpu(cpupool, cpu);
+ if ( c == NULL )
+ return ret;
for_each_domain ( d )
{
+ if ( d->cpupool != c )
+ continue;
+
for_each_vcpu ( d, v )
{
+ vcpu_schedule_lock_irq(v);
+
if ( (cpus_weight(v->cpu_affinity) == 1) &&
cpu_isset(cpu, v->cpu_affinity) )
{
@@ -395,26 +491,46 @@ void cpu_disable_scheduler(void)
* be chosen when the timer is next re-set.
*/
if ( v->singleshot_timer.cpu == cpu )
- migrate_timer(&v->singleshot_timer, 0);
+ {
+ int cpu_mig = first_cpu(c->cpu_valid);
+ if ( cpu_mig == cpu )
+ cpu_mig = next_cpu(cpu_mig, c->cpu_valid);
+ migrate_timer(&v->singleshot_timer, cpu_mig);
+ }
if ( v->processor == cpu )
{
set_bit(_VPF_migrating, &v->pause_flags);
+ vcpu_schedule_unlock_irq(v);
vcpu_sleep_nosync(v);
vcpu_migrate(v);
}
+ else
+ {
+ vcpu_schedule_unlock_irq(v);
+ }
+
+ /*
+ * A vcpu active in the hypervisor will not be migratable.
+ * The caller should try again after releasing and reaquiring
+ * all locks.
+ */
+ if ( v->processor == cpu )
+ ret = -EAGAIN;
}
}
+ return ret;
}
int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
{
cpumask_t online_affinity, old_affinity;
+ cpumask_t *online;
if ( v->domain->is_pinned )
return -EINVAL;
-
- cpus_and(online_affinity, *affinity, cpu_online_map);
+ online = VCPU2ONLINE(v);
+ cpus_and(online_affinity, *affinity, *online);
if ( cpus_empty(online_affinity) )
return -EINVAL;
@@ -723,7 +839,7 @@ long sched_adjust(struct domain *d, struct xen_domctl_scheduler_op *op)
struct vcpu *v;
long ret;
- if ( (op->sched_id != ops.sched_id) ||
+ if ( (op->sched_id != DOM2OP(d)->sched_id) ||
((op->cmd != XEN_DOMCTL_SCHEDOP_putinfo) &&
(op->cmd != XEN_DOMCTL_SCHEDOP_getinfo)) )
return -EINVAL;
@@ -750,7 +866,7 @@ long sched_adjust(struct domain *d, struct xen_domctl_scheduler_op *op)
if ( d == current->domain )
vcpu_schedule_lock_irq(current);
- if ( (ret = SCHED_OP(adjust, d, op)) == 0 )
+ if ( (ret = SCHED_OP(DOM2OP(d), adjust, d, op)) == 0 )
TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id);
if ( d == current->domain )
@@ -797,6 +913,7 @@ static void schedule(void)
{
struct vcpu *prev = current, *next = NULL;
s_time_t now = NOW();
+ struct scheduler *sched = this_cpu(scheduler);
struct schedule_data *sd;
struct task_slice next_slice;
@@ -812,7 +929,7 @@ static void schedule(void)
stop_timer(&sd->s_timer);
/* get policy-specific decision on scheduling... */
- next_slice = ops.do_schedule(now);
+ next_slice = sched->do_schedule(sched, now);
next = next_slice.task;
@@ -871,6 +988,10 @@ static void schedule(void)
update_vcpu_system_time(next);
vcpu_periodic_timer_work(next);
+ TRACE_4D(TRC_SCHED_SWITCH,
+ prev->domain->domain_id, prev->vcpu_id,
+ next->domain->domain_id, next->vcpu_id);
+
context_switch(prev, next);
}
@@ -884,7 +1005,7 @@ void context_saved(struct vcpu *prev)
/* Check for migration request /after/ clearing running flag. */
smp_mb();
- SCHED_OP(context_saved, prev);
+ SCHED_OP(VCPU2OP(prev), context_saved, prev);
if ( unlikely(test_bit(_VPF_migrating, &prev->pause_flags)) )
vcpu_migrate(prev);
@@ -920,6 +1041,19 @@ static void poll_timer_fn(void *data)
vcpu_unblock(v);
}
+/* Get scheduler by id */
+const struct scheduler *scheduler_get_by_id(unsigned int id)
+{
+ int i;
+
+ for ( i = 0; schedulers[i] != NULL; i++ )
+ {
+ if ( schedulers[i]->sched_id == id )
+ return schedulers[i];
+ }
+ return NULL;
+}
+
/* Initialise the data structures. */
void __init scheduler_init(void)
{
@@ -927,14 +1061,6 @@ void __init scheduler_init(void)
open_softirq(SCHEDULE_SOFTIRQ, schedule);
- for_each_possible_cpu ( i )
- {
- spin_lock_init(&per_cpu(schedule_data, i)._lock);
- per_cpu(schedule_data, i).schedule_lock
- = &per_cpu(schedule_data, i)._lock;
- init_timer(&per_cpu(schedule_data, i).s_timer, s_timer_fn, NULL, i);
- }
-
for ( i = 0; schedulers[i] != NULL; i++ )
{
ops = *schedulers[i];
@@ -948,43 +1074,125 @@ void __init scheduler_init(void)
ops = *schedulers[0];
}
+ for_each_possible_cpu ( i )
+ {
+ per_cpu(scheduler, i) = &ops;
+ spin_lock_init(&per_cpu(schedule_data, i)._lock);
+ per_cpu(schedule_data, i).schedule_lock
+ = &per_cpu(schedule_data, i)._lock;
+ init_timer(&per_cpu(schedule_data, i).s_timer, s_timer_fn, NULL, i);
+ }
+
printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
- SCHED_OP(init);
+ if ( SCHED_OP(&ops, init, 1) )
+ panic("scheduler returned error on init\n");
}
-void dump_runq(unsigned char key)
+/* switch scheduler on cpu */
+void schedule_cpu_switch(unsigned int cpu, struct cpupool *c)
{
- s_time_t now = NOW();
- int i;
unsigned long flags;
+ struct vcpu *v;
+ void *vpriv = NULL;
+ void *ppriv;
+ void *ppriv_old;
+ struct scheduler *old_ops;
+ struct scheduler *new_ops;
+
+ old_ops = per_cpu(scheduler, cpu);
+ new_ops = (c == NULL) ? &ops : &(c->sched);
+ v = per_cpu(schedule_data, cpu).idle;
+ ppriv = SCHED_OP(new_ops, alloc_pdata, cpu);
+ if ( c != NULL )
+ vpriv = SCHED_OP(new_ops, alloc_vdata, v, v->domain->sched_priv);
+
+ spin_lock_irqsave(per_cpu(schedule_data, cpu).schedule_lock, flags);
+
+ if ( c == NULL )
+ {
+ vpriv = v->sched_priv;
+ v->sched_priv = per_cpu(schedule_data, cpu).sched_idlevpriv;
+ }
+ else
+ {
+ v->sched_priv = vpriv;
+ vpriv = NULL;
+ }
+ SCHED_OP(old_ops, tick_suspend, cpu);
+ per_cpu(scheduler, cpu) = new_ops;
+ ppriv_old = per_cpu(schedule_data, cpu).sched_priv;
+ per_cpu(schedule_data, cpu).sched_priv = ppriv;
+ SCHED_OP(new_ops, tick_resume, cpu);
+ SCHED_OP(new_ops, insert_vcpu, v);
+
+ spin_unlock_irqrestore(per_cpu(schedule_data, cpu).schedule_lock, flags);
+
+ if ( vpriv != NULL )
+ SCHED_OP(old_ops, free_vdata, vpriv);
+ SCHED_OP(old_ops, free_pdata, ppriv_old, cpu);
+}
+
+/* init scheduler global data */
+int schedule_init_global(char *name, struct scheduler *sched)
+{
+ int i;
+ const struct scheduler *data;
+
+ data = &ops;
+ for ( i = 0; (schedulers[i] != NULL) && (name != NULL) ; i++ )
+ {
+ if ( strcmp(schedulers[i]->opt_name, name) == 0 )
+ {
+ data = schedulers[i];
+ break;
+ }
+ }
+ memcpy(sched, data, sizeof(*sched));
+ return SCHED_OP(sched, init, 0);
+}
- local_irq_save(flags);
+/* deinitialize scheduler global data */
+void schedule_deinit_global(struct scheduler *sched)
+{
+ SCHED_OP(sched, deinit);
+}
- printk("Scheduler: %s (%s)\n", ops.name, ops.opt_name);
- SCHED_OP(dump_settings);
- printk("sched_smt_power_savings: %s\n",
- sched_smt_power_savings? "enabled":"disabled");
- printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now);
+void schedule_dump(struct cpupool *c)
+{
+ int i;
+ struct scheduler *sched;
+ cpumask_t *cpus;
- for_each_online_cpu ( i )
+ sched = (c == NULL) ? &ops : &(c->sched);
+ cpus = (c == NULL) ? &cpupool_free_cpus : &c->cpu_valid;
+ printk("Scheduler: %s (%s)\n", sched->name, sched->opt_name);
+ SCHED_OP(sched, dump_settings);
+
+ for_each_cpu_mask (i, *cpus)
{
spin_lock(per_cpu(schedule_data, i).schedule_lock);
printk("CPU[%02d] ", i);
- SCHED_OP(dump_cpu_state, i);
+ SCHED_OP(sched, dump_cpu_state, i);
spin_unlock(per_cpu(schedule_data, i).schedule_lock);
}
-
- local_irq_restore(flags);
}
void sched_tick_suspend(void)
{
- SCHED_OP(tick_suspend);
+ struct scheduler *sched;
+ unsigned int cpu = smp_processor_id();
+
+ sched = per_cpu(scheduler, cpu);
+ SCHED_OP(sched, tick_suspend, cpu);
}
void sched_tick_resume(void)
{
- SCHED_OP(tick_resume);
+ struct scheduler *sched;
+ unsigned int cpu = smp_processor_id();
+
+ sched = per_cpu(scheduler, cpu);
+ SCHED_OP(sched, tick_resume, cpu);
}
#ifdef CONFIG_COMPAT
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 7ff84abccf..3ed6468c99 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -35,7 +35,7 @@
#include "xen.h"
#include "grant_table.h"
-#define XEN_DOMCTL_INTERFACE_VERSION 0x00000006
+#define XEN_DOMCTL_INTERFACE_VERSION 0x00000007
struct xenctl_cpumap {
XEN_GUEST_HANDLE_64(uint8) bitmap;
@@ -60,10 +60,10 @@ struct xen_domctl_createdomain {
/* Should domain memory integrity be verifed by tboot during Sx? */
#define _XEN_DOMCTL_CDF_s3_integrity 2
#define XEN_DOMCTL_CDF_s3_integrity (1U<<_XEN_DOMCTL_CDF_s3_integrity)
- uint32_t flags;
/* Disable out-of-sync shadow page tables? */
#define _XEN_DOMCTL_CDF_oos_off 3
#define XEN_DOMCTL_CDF_oos_off (1U<<_XEN_DOMCTL_CDF_oos_off)
+ uint32_t flags;
};
typedef struct xen_domctl_createdomain xen_domctl_createdomain_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t);
@@ -106,6 +106,7 @@ struct xen_domctl_getdomaininfo {
uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */
uint32_t ssidref;
xen_domain_handle_t handle;
+ uint32_t cpupool;
};
typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t);
@@ -785,6 +786,30 @@ struct xen_domctl_mem_sharing_op {
typedef struct xen_domctl_mem_sharing_op xen_domctl_mem_sharing_op_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_sharing_op_t);
+/*
+ * cpupool operations
+ */
+/* XEN_DOMCTL_cpupool_op */
+#define XEN_DOMCTL_CPUPOOL_OP_CREATE 1 /* C */
+#define XEN_DOMCTL_CPUPOOL_OP_DESTROY 2 /* D */
+#define XEN_DOMCTL_CPUPOOL_OP_INFO 3 /* I */
+#define XEN_DOMCTL_CPUPOOL_OP_ADDCPU 4 /* A */
+#define XEN_DOMCTL_CPUPOOL_OP_RMCPU 5 /* R */
+#define XEN_DOMCTL_CPUPOOL_OP_MOVEDOMAIN 6 /* M */
+#define XEN_DOMCTL_CPUPOOL_OP_FREEINFO 7 /* F */
+#define XEN_DOMCTL_CPUPOOL_PAR_ANY 0xFFFFFFFF
+struct xen_domctl_cpupool_op {
+ uint32_t op; /* IN */
+ uint32_t cpupool_id; /* IN: CDIARM OUT: CI */
+ uint32_t sched_id; /* IN: C OUT: I */
+ uint32_t domid; /* IN: M */
+ uint32_t cpu; /* IN: AR */
+ uint32_t n_dom; /* OUT: I */
+ struct xenctl_cpumap cpumap; /* OUT: IF */
+};
+typedef struct xen_domctl_cpupool_op xen_domctl_cpupool_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpupool_op_t);
+
struct xen_domctl {
uint32_t cmd;
@@ -846,6 +871,7 @@ struct xen_domctl {
#define XEN_DOMCTL_gettscinfo 59
#define XEN_DOMCTL_settscinfo 60
#define XEN_DOMCTL_getpageframeinfo3 61
+#define XEN_DOMCTL_cpupool_op 62
#define XEN_DOMCTL_gdbsx_guestmemio 1000
#define XEN_DOMCTL_gdbsx_pausevcpu 1001
#define XEN_DOMCTL_gdbsx_unpausevcpu 1002
@@ -894,6 +920,7 @@ struct xen_domctl {
struct xen_domctl_debug_op debug_op;
struct xen_domctl_mem_event_op mem_event_op;
struct xen_domctl_mem_sharing_op mem_sharing_op;
+ struct xen_domctl_cpupool_op cpupool_op;
#if defined(__i386__) || defined(__x86_64__)
struct xen_domctl_cpuid cpuid;
#endif
diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h
index 8a8bf2782e..ae9f4c3c24 100644
--- a/xen/include/xen/sched-if.h
+++ b/xen/include/xen/sched-if.h
@@ -10,6 +10,12 @@
#include <xen/percpu.h>
+/* A global pointer to the initial cpupool (POOL0). */
+extern struct cpupool *cpupool0;
+
+/* cpus currently in no cpupool */
+extern cpumask_t cpupool_free_cpus;
+
/*
* In order to allow a scheduler to remap the lock->cpu mapping,
* we have a per-cpu pointer, along with a pre-allocated set of
@@ -26,11 +32,14 @@ struct schedule_data {
struct vcpu *curr; /* current task */
struct vcpu *idle; /* idle task for this cpu */
void *sched_priv;
+ void *sched_idlevpriv; /* default scheduler vcpu data */
struct timer s_timer; /* scheduling timer */
atomic_t urgent_count; /* how many urgent vcpus */
} __cacheline_aligned;
DECLARE_PER_CPU(struct schedule_data, schedule_data);
+DECLARE_PER_CPU(struct scheduler *, scheduler);
+DECLARE_PER_CPU(struct cpupool *, cpupool);
static inline void vcpu_schedule_lock(struct vcpu *v)
{
@@ -78,29 +87,50 @@ struct scheduler {
char *name; /* full name for this scheduler */
char *opt_name; /* option name for this scheduler */
unsigned int sched_id; /* ID for this scheduler */
+ void *sched_data; /* global data pointer */
- void (*init) (void);
+ int (*init) (struct scheduler *, int);
+ void (*deinit) (struct scheduler *);
- int (*init_domain) (struct domain *);
- void (*destroy_domain) (struct domain *);
+ void (*free_vdata) (struct scheduler *, void *);
+ void * (*alloc_vdata) (struct scheduler *, struct vcpu *,
+ void *);
+ void (*free_pdata) (struct scheduler *, void *, int);
+ void * (*alloc_pdata) (struct scheduler *, int);
+ void (*free_domdata) (struct scheduler *, void *);
+ void * (*alloc_domdata) (struct scheduler *, struct domain *);
- int (*init_vcpu) (struct vcpu *);
- void (*destroy_vcpu) (struct vcpu *);
+ int (*init_domain) (struct scheduler *, struct domain *);
+ void (*destroy_domain) (struct scheduler *, struct domain *);
- void (*sleep) (struct vcpu *);
- void (*wake) (struct vcpu *);
- void (*context_saved) (struct vcpu *);
+ void (*insert_vcpu) (struct scheduler *, struct vcpu *);
+ void (*destroy_vcpu) (struct scheduler *, struct vcpu *);
- struct task_slice (*do_schedule) (s_time_t);
+ void (*sleep) (struct scheduler *, struct vcpu *);
+ void (*wake) (struct scheduler *, struct vcpu *);
+ void (*context_saved) (struct scheduler *, struct vcpu *);
- int (*pick_cpu) (struct vcpu *);
- int (*adjust) (struct domain *,
+ struct task_slice (*do_schedule) (struct scheduler *, s_time_t);
+
+ int (*pick_cpu) (struct scheduler *, struct vcpu *);
+ int (*adjust) (struct scheduler *, struct domain *,
struct xen_domctl_scheduler_op *);
- void (*dump_settings) (void);
- void (*dump_cpu_state) (int);
+ void (*dump_settings) (struct scheduler *);
+ void (*dump_cpu_state) (struct scheduler *, int);
- void (*tick_suspend) (void);
- void (*tick_resume) (void);
+ void (*tick_suspend) (struct scheduler *, unsigned int);
+ void (*tick_resume) (struct scheduler *, unsigned int);
};
+struct cpupool
+{
+ int cpupool_id;
+ cpumask_t cpu_valid; /* all cpus assigned to pool */
+ struct cpupool *next;
+ unsigned int n_dom;
+ struct scheduler sched;
+};
+
+const struct scheduler *scheduler_get_by_id(unsigned int id);
+
#endif /* __XEN_SCHED_IF_H__ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 15f64fc97e..91368466ff 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -213,6 +213,7 @@ struct domain
/* Scheduling. */
void *sched_priv; /* scheduler-specific data */
+ struct cpupool *cpupool;
struct domain *next_in_list;
struct domain *next_in_hashbucket;
@@ -465,6 +466,7 @@ int sched_init_vcpu(struct vcpu *v, unsigned int processor);
void sched_destroy_vcpu(struct vcpu *v);
int sched_init_domain(struct domain *d);
void sched_destroy_domain(struct domain *d);
+int sched_move_domain(struct domain *d, struct cpupool *c);
long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *);
int sched_id(void);
void sched_tick_suspend(void);
@@ -575,8 +577,13 @@ void domain_pause_by_systemcontroller(struct domain *d);
void domain_unpause_by_systemcontroller(struct domain *d);
void cpu_init(void);
+struct scheduler;
+
+int schedule_init_global(char *name, struct scheduler *sched);
+void schedule_deinit_global(struct scheduler *sched);
+void schedule_cpu_switch(unsigned int cpu, struct cpupool *c);
void vcpu_force_reschedule(struct vcpu *v);
-void cpu_disable_scheduler(void);
+int cpu_disable_scheduler(unsigned int cpu);
int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
@@ -607,6 +614,19 @@ extern enum cpufreq_controller {
FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen
} cpufreq_controller;
+#define CPUPOOLID_NONE -1
+
+struct cpupool *cpupool_create(int poolid, char *sched);
+int cpupool_destroy(struct cpupool *c);
+int cpupool0_cpu_assign(struct cpupool *c);
+int cpupool_assign_ncpu(struct cpupool *c, int ncpu);
+void cpupool_cpu_add(unsigned int cpu);
+int cpupool_cpu_remove(unsigned int cpu);
+int cpupool_add_domain(struct domain *d, int poolid);
+void cpupool_rm_domain(struct domain *d);
+int cpupool_do_domctl(struct xen_domctl_cpupool_op *op);
+#define num_cpupool_cpus(c) (cpus_weight((c)->cpu_valid))
+
#endif /* __SCHED_H__ */
/*