aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorKeir Fraser <keir.fraser@citrix.com>2009-11-25 14:05:28 +0000
committerKeir Fraser <keir.fraser@citrix.com>2009-11-25 14:05:28 +0000
commit08a0b4ab0d193b8fbb9270b7ab26f527535ba69c (patch)
treecdef74bb4aeb86dfa80bcac7637500b547780731 /tools
parent83371c34bc5a2eed015a5d034cd82ec243659d85 (diff)
downloadxen-08a0b4ab0d193b8fbb9270b7ab26f527535ba69c.tar.gz
xen-08a0b4ab0d193b8fbb9270b7ab26f527535ba69c.tar.bz2
xen-08a0b4ab0d193b8fbb9270b7ab26f527535ba69c.zip
Replace tsc_native config option with tsc_mode config option
(NOTE: pvrdtscp mode not finished yet, but all other modes have been tested so sooner seemed better than later to submit this fairly major patch so we can get more mileage on it before next release.) New tsc_mode config option supercedes tsc_native and offers a more intelligent default and an additional option for intelligent apps running on PV domains ("pvrdtscp"). For PV domains, default mode will determine if the initial host has a "safe"** TSC (meaning it is always synchronized across all physical CPUs). If so, all domains will execute all rdtsc instructions natively; if not, all domains will emulate all rdtsc instructions but providing the TSC hertz rate of the initial machine. After being restored or live-migrated, all PV domains will emulate all rdtsc instructions. Hence, this default mode guarantees correctness while providing native performance in most conditions. For PV domains, tsc_mode==1 will always emulate rdtsc and tsc_mode==2 will never emulate rdtsc. For tsc_mode==3, rdtsc will never be emulated, but information is provided through pvcpuid instructions and rdtscp instructions so that an app can obtain "safe" pvclock-like TSC information across save/restore and live migration. (Will be completed in a follow-on patch.) For HVM domains, the default mode and "always emulate" mode do the same as tsc_native==0; the other two modes do the same as tsc_native==1. (HVM domains since 3.4 have implemented a tsc_mode=default-like functionality, but also can preserve native TSC across save/restore and live-migration IFF the initial and target machines have a common TSC cycle rate.) ** All newer AMD machines, and Nehalem and future Intel machines have "Invariant TSC"; many newer Intel machines have "Constant TSC" and do not support deep-C sleep states; these and all single-processor machines are "safe". Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/examples/xmexample.hvm11
-rw-r--r--tools/libxc/xc_domain.c49
-rw-r--r--tools/libxc/xc_domain_restore.c12
-rw-r--r--tools/libxc/xc_domain_save.c31
-rw-r--r--tools/libxc/xenctrl.h14
-rw-r--r--tools/python/xen/lowlevel/xc/xc.c17
-rw-r--r--tools/python/xen/xend/XendConfig.py6
-rw-r--r--tools/python/xen/xend/XendDomainInfo.py6
-rw-r--r--tools/python/xen/xm/create.py10
-rw-r--r--tools/python/xen/xm/xenapi_create.py2
10 files changed, 128 insertions, 30 deletions
diff --git a/tools/examples/xmexample.hvm b/tools/examples/xmexample.hvm
index e5ae97f966..09edda6aac 100644
--- a/tools/examples/xmexample.hvm
+++ b/tools/examples/xmexample.hvm
@@ -178,11 +178,16 @@ stdvga=0
serial='pty'
#----------------------------------------------------------------------------
-# tsc_native : TSC mode (0=emulate TSC, 1=native TSC)
+# tsc_mode : TSC mode (0=default, 1=native TSC, 2=never emulate, 3=pvrdtscp)
# emulate TSC provides synced TSC for all vcpus, but lose perfomrance.
# native TSC leverages hardware's TSC(no perf loss), but vcpu's TSC may lose
-# sync due to hardware's unreliable/unsynced TSC between CPUs.
-tsc_native=1
+# sync due to hardware's unreliable/unsynced TSC between CPUs.
+# default intelligently uses native TSC on machines where it is safe, but
+# switches to emulated if necessary after save/restore/migration
+# pvrdtscp is for intelligent apps that use special Xen-only paravirtualized
+# cpuid instructions to obtain offset/scaling/migration info and maximize
+# performance within pools of machines that support the rdtscp instruction
+tsc_mode=0
#-----------------------------------------------------------------------------
# Qemu Monitor, default is disable
diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index f872583926..54a5914e9b 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -466,24 +466,61 @@ int xc_domain_set_time_offset(int xc_handle,
return do_domctl(xc_handle, &domctl);
}
-int xc_domain_set_tsc_native(int xc_handle, uint32_t domid, int is_native)
+int xc_domain_disable_migrate(int xc_handle, uint32_t domid)
{
DECLARE_DOMCTL;
- domctl.cmd = XEN_DOMCTL_set_tsc_native;
+ domctl.cmd = XEN_DOMCTL_disable_migrate;
domctl.domain = (domid_t)domid;
- domctl.u.set_tsc_native.is_native = is_native;
+ domctl.u.disable_migrate.disable = 1;
return do_domctl(xc_handle, &domctl);
}
-int xc_domain_disable_migrate(int xc_handle, uint32_t domid)
+int xc_domain_set_tsc_info(int xc_handle,
+ uint32_t domid,
+ uint32_t tsc_mode,
+ uint64_t elapsed_nsec,
+ uint32_t gtsc_khz,
+ uint32_t incarnation)
{
DECLARE_DOMCTL;
- domctl.cmd = XEN_DOMCTL_disable_migrate;
+ domctl.cmd = XEN_DOMCTL_settscinfo;
domctl.domain = (domid_t)domid;
- domctl.u.disable_migrate.disable = 1;
+ domctl.u.tsc_info.info.tsc_mode = tsc_mode;
+ domctl.u.tsc_info.info.elapsed_nsec = elapsed_nsec;
+ domctl.u.tsc_info.info.gtsc_khz = gtsc_khz;
+ domctl.u.tsc_info.info.incarnation = incarnation;
return do_domctl(xc_handle, &domctl);
}
+int xc_domain_get_tsc_info(int xc_handle,
+ uint32_t domid,
+ uint32_t *tsc_mode,
+ uint64_t *elapsed_nsec,
+ uint32_t *gtsc_khz,
+ uint32_t *incarnation)
+{
+ int rc;
+ DECLARE_DOMCTL;
+ xen_guest_tsc_info_t info = { 0 };
+
+ domctl.cmd = XEN_DOMCTL_gettscinfo;
+ domctl.domain = (domid_t)domid;
+ set_xen_guest_handle(domctl.u.tsc_info.out_info, &info);
+ if ( (rc = lock_pages(&info, sizeof(info))) != 0 )
+ return rc;
+ rc = do_domctl(xc_handle, &domctl);
+ if ( rc == 0 )
+ {
+ *tsc_mode = info.tsc_mode;
+ *elapsed_nsec = info.elapsed_nsec;
+ *gtsc_khz = info.gtsc_khz;
+ *incarnation = info.incarnation;
+ }
+ unlock_pages(&info,sizeof(info));
+ return rc;
+}
+
+
int xc_domain_memory_increase_reservation(int xc_handle,
uint32_t domid,
unsigned long nr_extents,
diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
index 01d7924f07..cf6a63c25a 100644
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -1084,6 +1084,18 @@ static int pagebuf_get_one(pagebuf_t* buf, int fd, int xch, uint32_t dom)
return -1;
}
return pagebuf_get_one(buf, fd, xch, dom);
+ } else if ( count == -7 ) {
+ uint32_t tsc_mode, khz, incarn;
+ uint64_t nsec;
+ if ( read_exact(fd, &tsc_mode, sizeof(uint32_t)) ||
+ read_exact(fd, &nsec, sizeof(uint64_t)) ||
+ read_exact(fd, &khz, sizeof(uint32_t)) ||
+ read_exact(fd, &incarn, sizeof(uint32_t)) ||
+ xc_domain_set_tsc_info(xch, dom, tsc_mode, nsec, khz, incarn) ) {
+ ERROR("error reading/restoring tsc info");
+ return -1;
+ }
+ return pagebuf_get_one(buf, fd, xch, dom);
} else if ( (count > MAX_BATCH_SIZE) || (count < 0) ) {
ERROR("Max batch size exceeded (%d). Giving up.", count);
return -1;
diff --git a/tools/libxc/xc_domain_save.c b/tools/libxc/xc_domain_save.c
index 30c1b6d3a4..9d706a92d3 100644
--- a/tools/libxc/xc_domain_save.c
+++ b/tools/libxc/xc_domain_save.c
@@ -841,6 +841,24 @@ static xen_pfn_t *map_and_save_p2m_table(int xc_handle,
return success ? p2m : NULL;
}
+/* must be done AFTER suspend_and_state() */
+static int save_tsc_info(int xc_handle, uint32_t dom, int io_fd)
+{
+ int marker = -7;
+ uint32_t tsc_mode, khz, incarn;
+ uint64_t nsec;
+
+ if ( xc_domain_get_tsc_info(xc_handle, dom, &tsc_mode,
+ &nsec, &khz, &incarn) < 0 ||
+ write_exact(io_fd, &marker, sizeof(marker)) ||
+ write_exact(io_fd, &tsc_mode, sizeof(tsc_mode)) ||
+ write_exact(io_fd, &nsec, sizeof(nsec)) ||
+ write_exact(io_fd, &khz, sizeof(khz)) ||
+ write_exact(io_fd, &incarn, sizeof(incarn)) )
+ return -1;
+ return 0;
+}
+
int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
uint32_t max_factor, uint32_t flags,
struct save_callbacks* callbacks,
@@ -1100,6 +1118,12 @@ int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
goto out;
}
+ if ( !live && save_tsc_info(xc_handle, dom, io_fd) < 0 )
+ {
+ ERROR("Error when writing to state file (tsc)");
+ goto out;
+ }
+
copypages:
#define write_exact(fd, buf, len) write_buffer(last_iter, &ob, (fd), (buf), (len))
#ifdef ratewrite
@@ -1458,6 +1482,13 @@ int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
goto out;
}
+ if ( save_tsc_info(xc_handle, dom, io_fd) < 0 )
+ {
+ ERROR("Error when writing to state file (tsc)");
+ goto out;
+ }
+
+
}
if ( xc_shadow_control(xc_handle, dom,
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index d6ecaf399c..9fc05bb30b 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -628,7 +628,19 @@ int xc_domain_set_time_offset(int xc_handle,
uint32_t domid,
int32_t time_offset_seconds);
-int xc_domain_set_tsc_native(int xc_handle, uint32_t domid, int is_native);
+int xc_domain_set_tsc_info(int xc_handle,
+ uint32_t domid,
+ uint32_t tsc_mode,
+ uint64_t elapsed_nsec,
+ uint32_t gtsc_khz,
+ uint32_t incarnation);
+
+int xc_domain_get_tsc_info(int xc_handle,
+ uint32_t domid,
+ uint32_t *tsc_mode,
+ uint64_t *elapsed_nsec,
+ uint32_t *gtsc_khz,
+ uint32_t *incarnation);
int xc_domain_disable_migrate(int xc_handle, uint32_t domid);
diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c
index 7eaf63b94e..aa780aa303 100644
--- a/tools/python/xen/lowlevel/xc/xc.c
+++ b/tools/python/xen/lowlevel/xc/xc.c
@@ -1486,14 +1486,14 @@ static PyObject *pyxc_domain_set_time_offset(XcObject *self, PyObject *args)
return zero;
}
-static PyObject *pyxc_domain_set_tsc_native(XcObject *self, PyObject *args)
+static PyObject *pyxc_domain_set_tsc_info(XcObject *self, PyObject *args)
{
- uint32_t dom, is_native;
+ uint32_t dom, tsc_mode;
- if (!PyArg_ParseTuple(args, "ii", &dom, &is_native))
+ if (!PyArg_ParseTuple(args, "ii", &dom, &tsc_mode))
return NULL;
- if (xc_domain_set_tsc_native(self->xc_handle, dom, is_native) != 0)
+ if (xc_domain_set_tsc_info(self->xc_handle, dom, tsc_mode, 0, 0, 0) != 0)
return pyxc_error_to_exception();
Py_INCREF(zero);
@@ -2036,12 +2036,13 @@ static PyMethodDef pyxc_methods[] = {
" offset [int]: Time offset from UTC in seconds.\n"
"Returns: [int] 0 on success; -1 on error.\n" },
- { "domain_set_tsc_native",
- (PyCFunction)pyxc_domain_set_tsc_native,
+ { "domain_set_tsc_info",
+ (PyCFunction)pyxc_domain_set_tsc_info,
METH_VARARGS, "\n"
- "Set a domain's TSC mode (emulate vs native)\n"
+ "Set a domain's TSC mode\n"
" dom [int]: Domain whose TSC mode is being set.\n"
- " is_native [int]: 1=native, 0=emulate.\n"
+ " tsc_mode [int]: 0=default (monotonic, but native where possible)\n"
+ " 1=always emulate 2=never emulate 3=pvrdtscp\n"
"Returns: [int] 0 on success; -1 on error.\n" },
{ "domain_disable_migrate",
diff --git a/tools/python/xen/xend/XendConfig.py b/tools/python/xen/xend/XendConfig.py
index 0eadf343d3..3227cd4def 100644
--- a/tools/python/xen/xend/XendConfig.py
+++ b/tools/python/xen/xend/XendConfig.py
@@ -163,7 +163,7 @@ XENAPI_PLATFORM_CFG_TYPES = {
'vncdisplay': int,
'vnclisten': str,
'timer_mode': int,
- 'tsc_native': int,
+ 'tsc_mode': int,
'vpt_align': int,
'viridian': int,
'vncpasswd': str,
@@ -477,8 +477,8 @@ class XendConfig(dict):
if not os.path.exists(self['platform']['device_model']):
raise VmError("device model '%s' not found" % str(self['platform']['device_model']))
- if 'tsc_native' not in self['platform']:
- self['platform']['tsc_native'] = 0
+ if 'tsc_mode' not in self['platform']:
+ self['platform']['tsc_mode'] = 0
if 'nomigrate' not in self['platform']:
self['platform']['nomigrate'] = 0
diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py
index 592ba6fad8..8198228b9f 100644
--- a/tools/python/xen/xend/XendDomainInfo.py
+++ b/tools/python/xen/xend/XendDomainInfo.py
@@ -2468,9 +2468,9 @@ class XendDomainInfo:
self._recreateDom()
# Set TSC mode of domain
- tsc_native = self.info["platform"].get("tsc_native")
- if arch.type == "x86" and tsc_native is not None:
- xc.domain_set_tsc_native(self.domid, int(tsc_native))
+ tsc_mode = self.info["platform"].get("tsc_mode")
+ if arch.type == "x86" and tsc_mode is not None:
+ xc.domain_set_tsc_info(self.domid, int(tsc_mode))
# Set timer configuration of domain
timer_mode = self.info["platform"].get("timer_mode")
diff --git a/tools/python/xen/xm/create.py b/tools/python/xen/xm/create.py
index d6485a59a1..32b19235e1 100644
--- a/tools/python/xen/xm/create.py
+++ b/tools/python/xen/xm/create.py
@@ -221,9 +221,9 @@ gopts.var('timer_mode', val='TIMER_MODE',
use="""Timer mode (0=delay virtual time when ticks are missed;
1=virtual time is always wallclock time.""")
-gopts.var('tsc_native', val='TSC_NATIVE',
+gopts.var('tsc_mode', val='TSC_MODE',
fn=set_int, default=0,
- use="""TSC mode (0=emulate TSC, 1=native TSC).""")
+ use="""TSC mode (0=default, 1=always emulate, 2=never emulate, 3=pvrdtscp).""")
gopts.var('nomigrate', val='NOMIGRATE',
fn=set_int, default=0,
@@ -738,8 +738,8 @@ def configure_image(vals):
if vals.suppress_spurious_page_faults:
config_image.append(['suppress_spurious_page_faults', vals.suppress_spurious_page_faults])
- if vals.tsc_native is not None:
- config_image.append(['tsc_native', vals.tsc_native])
+ if vals.tsc_mode is not None:
+ config_image.append(['tsc_mode', vals.tsc_mode])
if vals.nomigrate is not None:
config_image.append(['nomigrate', vals.nomigrate])
@@ -1036,7 +1036,7 @@ def make_config(vals):
config.append([n, v])
map(add_conf, ['name', 'memory', 'maxmem', 'shadow_memory',
- 'restart', 'on_poweroff', 'tsc_native', 'nomigrate',
+ 'restart', 'on_poweroff', 'tsc_mode', 'nomigrate',
'on_reboot', 'on_crash', 'vcpus', 'vcpu_avail', 'features',
'on_xend_start', 'on_xend_stop', 'target', 'cpuid',
'cpuid_check', 'machine_address_size', 'suppress_spurious_page_faults'])
diff --git a/tools/python/xen/xm/xenapi_create.py b/tools/python/xen/xm/xenapi_create.py
index 9cfdb87d7e..4c0177b4aa 100644
--- a/tools/python/xen/xm/xenapi_create.py
+++ b/tools/python/xen/xm/xenapi_create.py
@@ -1108,7 +1108,7 @@ class sxp2xml:
'pci_msitranslate',
'pci_power_mgmt',
'xen_platform_pci',
- 'tsc_native'
+ 'tsc_mode'
'description',
'nomigrate'
]