aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeir Fraser <keir.fraser@citrix.com>2009-03-09 09:37:52 +0000
committerKeir Fraser <keir.fraser@citrix.com>2009-03-09 09:37:52 +0000
commit2441e7179c0ffe09dcc9e3fa2276917594760bbd (patch)
tree9d397cf2cce7faf522d15f189e2aea6b532bf0c8
parent881f28d8e897daee3d6ba1c591098b50931b6cec (diff)
downloadxen-2441e7179c0ffe09dcc9e3fa2276917594760bbd.tar.gz
xen-2441e7179c0ffe09dcc9e3fa2276917594760bbd.tar.bz2
xen-2441e7179c0ffe09dcc9e3fa2276917594760bbd.zip
Add vcpu_migration_delay=<microsecs> boot option to scheduler
The idea is borrowed from Linux kernel: if the vCPU is just scheduled out and put to run-queue, it's likely cache-hot on its current pCPU, and it may be scheduled in in a short period of time; however, if vCPU is migrated to another pCPU, it need to re-warm the cache. The patch introduces an option vcpu_migration_delay to avoid aggressive vCPU migration (actually we really see migration frequency is very high most of the time.), while in the meantime keeping load balancing over slightly longer time scales. Linux kernel uses 0.5ms by default. Considering the cost may be higher (e.g. VMCS impact) than in native, vcpu_migration_delay=1000 is chosen for our tests, which are performed on a 4x 6-core Dunnington platform. In 24-VM case, there is ~2% stable performance gain for enterprise workloads like SPECjbb and sysbench. If HVM is with stubdom, the gain is more: 4% for the same workloads. Signed-off-by: Xiaowei Yang <xiaowei.yang@intel.com> Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
-rw-r--r--xen/common/sched_credit.c38
1 files changed, 32 insertions, 6 deletions
diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c
index 923cad0079..ecb61a03c8 100644
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -123,7 +123,8 @@
_MACRO(dom_init) \
_MACRO(dom_destroy) \
_MACRO(vcpu_init) \
- _MACRO(vcpu_destroy)
+ _MACRO(vcpu_destroy) \
+ _MACRO(vcpu_hot)
#ifndef NDEBUG
#define CSCHED_STATS_EXPAND_CHECKS(_MACRO) \
@@ -395,14 +396,37 @@ __csched_vcpu_check(struct vcpu *vc)
#define CSCHED_VCPU_CHECK(_vc)
#endif
+/*
+ * Delay, in microseconds, between migrations of a VCPU between PCPUs.
+ * This prevents rapid fluttering of a VCPU between CPUs, and reduces the
+ * implicit overheads such as cache-warming. 1ms (1000) has been measured
+ * as a good value.
+ */
+static unsigned int vcpu_migration_delay;
+integer_param("vcpu_migration_delay", vcpu_migration_delay);
+
+static inline int
+__csched_vcpu_is_cache_hot(struct vcpu *v)
+{
+ int hot = ((NOW() - v->runstate.state_entry_time) <
+ ((uint64_t)vcpu_migration_delay * 1000u));
+
+ if ( hot )
+ CSCHED_STAT_CRANK(vcpu_hot);
+
+ return hot;
+}
+
static inline int
__csched_vcpu_is_migrateable(struct vcpu *vc, int dest_cpu)
{
/*
- * Don't pick up work that's in the peer's scheduling tail. Also only pick
- * up work that's allowed to run on our CPU.
+ * Don't pick up work that's in the peer's scheduling tail or hot on
+ * peer PCPU. Only pick up work that's allowed to run on our CPU.
*/
- return !vc->is_running && cpu_isset(dest_cpu, vc->cpu_affinity);
+ return !vc->is_running &&
+ !__csched_vcpu_is_cache_hot(vc) &&
+ cpu_isset(dest_cpu, vc->cpu_affinity);
}
static int
@@ -1297,7 +1321,8 @@ csched_dump(void)
"\tmsecs per tick = %dms\n"
"\tcredits per tick = %d\n"
"\tticks per tslice = %d\n"
- "\tticks per acct = %d\n",
+ "\tticks per acct = %d\n"
+ "\tmigration delay = %uus\n",
csched_priv.ncpus,
csched_priv.master,
csched_priv.credit,
@@ -1308,7 +1333,8 @@ csched_dump(void)
CSCHED_MSECS_PER_TICK,
CSCHED_CREDITS_PER_TICK,
CSCHED_TICKS_PER_TSLICE,
- CSCHED_TICKS_PER_ACCT);
+ CSCHED_TICKS_PER_ACCT,
+ vcpu_migration_delay);
cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), csched_priv.idlers);
printk("idlers: %s\n", idlers_buf);