aboutsummaryrefslogtreecommitdiffstats
path: root/docs/misc/pvrdtscp.c
diff options
context:
space:
mode:
authorKeir Fraser <keir.fraser@citrix.com>2009-12-11 08:51:21 +0000
committerKeir Fraser <keir.fraser@citrix.com>2009-12-11 08:51:21 +0000
commit04656384a1b9714e43db850c51431008e23450d8 (patch)
treec4ef4317b080326dadd21e16645d1c8f4abf4017 /docs/misc/pvrdtscp.c
parent768e0ace278a0c08818186bddd953b7f050a23b7 (diff)
downloadxen-04656384a1b9714e43db850c51431008e23450d8.tar.gz
xen-04656384a1b9714e43db850c51431008e23450d8.tar.bz2
xen-04656384a1b9714e43db850c51431008e23450d8.zip
docs: Example usage of pvrdtscp algorithm
Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Diffstat (limited to 'docs/misc/pvrdtscp.c')
-rw-r--r--docs/misc/pvrdtscp.c307
1 files changed, 307 insertions, 0 deletions
diff --git a/docs/misc/pvrdtscp.c b/docs/misc/pvrdtscp.c
new file mode 100644
index 0000000000..8d25843532
--- /dev/null
+++ b/docs/misc/pvrdtscp.c
@@ -0,0 +1,307 @@
+/* pvrdtscp algorithm
+ *
+ * This sample code demonstrates the use of the paravirtualized rdtscp
+ * algorithm. Using this algorithm, an application may communicate with
+ * the Xen hypervisor (version 4.0+) to obtain timestamp information which
+ * is both monotonically increasing and has a fixed 1 GHz rate, even across
+ * migrations between machines with different TSC rates and offsets.
+ * Further,the algorithm provides performance near the performance of a
+ * native rdtsc/rdtscp instruction -- much faster than emulation PROVIDED
+ * the application is running on a machine on which the rdtscp instruction
+ * is supported and TSC is "safe". The application must also be running in a
+ * PV domain. (HVM domains may be supported at a later time.) On machines
+ * where TSC is unsafe or the rdtscp instruction is not supported, Xen
+ * (v4.0+) provides emulation which is slower but consistent with the pvrdtscp
+ * algorithm, thus providing support for the algorithm for live migration
+ * across all machines.
+ *
+ * More information can be found within the Xen (4.0+) source tree at
+ * docs/misc/tscmode.txt
+ *
+ * Copyright (c) 2009 Oracle Corporation and/or its affiliates.
+ * All rights reserved
+ * Written by: Dan Magenheimer <dan.magenheimer@oracle.com>
+ *
+ * This code is derived from code licensed under the GNU
+ * General Public License ("GPL") version 2 and is therefore itself
+ * also licensed under the GPL version 2.
+ *
+ * This code is known to compile and run on Oracle Enterprise Linux 5 Update 2
+ * using gcc version 4.1.2, but its purpose is to describe the pvrdtscp
+ * algorithm and its ABI to Xen version 4.0+
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/wait.h>
+
+#ifdef __LP64__
+#define __X86_64__
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned long u64;
+typedef int i32;
+typedef long i64;
+#define NSEC_PER_SEC 1000000000
+#else
+#define __X86_32__
+typedef unsigned int u16;
+typedef unsigned long u32;
+typedef unsigned long long u64;
+typedef long i32;
+typedef long long i64;
+#define NSEC_PER_SEC 1000000000L
+#endif
+
+static inline void hvm_cpuid(u32 idx, u32 sub,
+ u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+{
+ *eax = idx, *ecx = sub;
+ asm("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
+static inline void pv_cpuid(u32 idx, u32 sub,
+ u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+{
+ *eax = idx, *ecx = sub;
+ asm volatile ( "ud2a ; .ascii \"xen\"; cpuid" : "=a" (*eax),
+ "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (*eax), "2" (*ecx));
+}
+
+static inline u64 do_rdtscp(u32 *aux)
+{
+static u64 last = 0;
+ u32 lo32, hi32;
+ u64 val;
+
+ asm volatile(".byte 0x0f,0x01,0xf9":"=a"(lo32),"=d"(hi32),"=c" (*aux));
+ val = lo32 | ((u64)hi32 << 32);
+ return val;
+}
+
+static inline int get_xen_tsc_mode(void)
+{
+ u32 val, dummy1, dummy2, dummy3;
+ pv_cpuid(0x40000003,0,&dummy1,&val,&dummy2,&dummy3);
+ return val;
+}
+
+static inline int get_xen_vtsc(void)
+{
+ u32 val, dummy1, dummy2, dummy3;
+ pv_cpuid(0x40000003,0,&val,&dummy1,&dummy2,&dummy3);
+ return val & 1;
+}
+
+static inline int get_xen_vtsc_khz(void)
+{
+ u32 val, dummy1, dummy2, dummy3;
+ pv_cpuid(0x40000003,0,&dummy1,&dummy2,&val,&dummy3);
+ return val;
+}
+
+static inline u32 get_xen_cpu_khz(void)
+{
+ u32 cpu_khz, dummy1, dummy2, dummy3;
+ pv_cpuid(0x40000003,2,&cpu_khz,&dummy1,&dummy2,&dummy3);
+ return cpu_khz;
+}
+
+static inline u32 get_xen_incarnation(void)
+{
+ u32 incarn, dummy1, dummy2, dummy3;
+ pv_cpuid(0x40000003,0,&dummy1,&dummy2,&dummy3,&incarn);
+ return incarn;
+}
+
+static inline void get_xen_time_values(u64 *offset, u32 *mul_frac, u32 *shift)
+{
+ u32 off_lo, off_hi, sys_lo, sys_hi, dummy;
+
+ pv_cpuid(0x40000003,1,&off_lo,&off_hi,mul_frac,shift);
+ *offset = off_lo | ((u64)off_hi << 32);
+}
+
+static inline u64 scale_delta(u64 delta, u32 tsc_mul_frac, i32 tsc_shift)
+{
+ u64 product;
+#ifdef __X86_32__
+ u32 tmp1, tmp2;
+#endif
+
+ if ( tsc_shift < 0 )
+ delta >>= -tsc_shift;
+ else
+ delta <<= tsc_shift;
+
+#ifdef __X86_32__
+ asm (
+ "mul %5 ; "
+ "mov %4,%%eax ; "
+ "mov %%edx,%4 ; "
+ "mul %5 ; "
+ "xor %5,%5 ; "
+ "add %4,%%eax ; "
+ "adc %5,%%edx ; "
+ : "=A" (product), "=r" (tmp1), "=r" (tmp2)
+ : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (tsc_mul_frac) );
+#else
+ asm (
+ "mul %%rdx ; shrd $32,%%rdx,%%rax"
+ : "=a" (product) : "0" (delta), "d" ((u64)tsc_mul_frac) );
+#endif
+
+ return product;
+}
+
+static inline u64 get_pvrdtscp_timestamp(int *discontinuity)
+{
+ static int firsttime = 1;
+ static u64 last_pvrdtscp_timestamp = 0;
+ static u32 last_tsc_aux;
+ static u64 xen_ns_offset;
+ static u32 xen_tsc_to_ns_mul_frac, xen_tsc_to_ns_shift;
+ u32 this_tsc_aux;
+ u64 timestamp, cur_tsc, cur_ns;
+
+ if (firsttime) {
+ cur_tsc = do_rdtscp(&last_tsc_aux);
+ get_xen_time_values(&xen_ns_offset, &xen_tsc_to_ns_mul_frac,
+ &xen_tsc_to_ns_shift);
+ cur_ns = scale_delta(cur_tsc, xen_tsc_to_ns_mul_frac,
+ xen_tsc_to_ns_shift);
+ timestamp = cur_ns - xen_ns_offset;
+ last_pvrdtscp_timestamp = timestamp;
+ firsttime = 0;
+ }
+ cur_tsc = do_rdtscp(&this_tsc_aux);
+ *discontinuity = 0;
+ while (this_tsc_aux != last_tsc_aux) {
+ /* if tsc_aux changed, try again */
+ last_tsc_aux = this_tsc_aux;
+ get_xen_time_values(&xen_ns_offset, &xen_tsc_to_ns_mul_frac,
+ &xen_tsc_to_ns_shift);
+ cur_tsc = do_rdtscp(&this_tsc_aux);
+ *discontinuity = 1;
+ }
+
+ /* compute nsec from TSC and Xen time values */
+ cur_ns = scale_delta(cur_tsc, xen_tsc_to_ns_mul_frac,
+ xen_tsc_to_ns_shift);
+ timestamp = cur_ns - xen_ns_offset;
+
+ /* enforce monotonicity just in case */
+ if ((i64)(timestamp - last_pvrdtscp_timestamp) > 0)
+ last_pvrdtscp_timestamp = timestamp;
+ else {
+ /* this should never happen but we'll check it anyway in
+ * case of some strange combination of scaling errors
+ * occurs across a very fast migration */
+ printf("Time went backwards by %lluns\n",
+ (unsigned long long)(last_pvrdtscp_timestamp-timestamp));
+ timestamp = ++last_pvrdtscp_timestamp;
+ }
+ return timestamp;
+}
+
+#define HVM 1
+#define PVM 0
+
+static int running_on_xen(int hvm, u16 *version_major, u16 *version_minor)
+{
+ u32 eax, ebx, ecx, edx, base;
+ union { char csig[16]; u32 u[4]; } sig;
+
+ for (base=0x40000000; base < 0x40010000; base += 0x100) {
+ if (hvm==HVM)
+ hvm_cpuid(base,0,&eax,&ebx,&ecx,&edx);
+ else
+ pv_cpuid(base,0,&eax,&ebx,&ecx,&edx);
+ sig.u[0] = ebx; sig.u[1] = ecx; sig.u[2] = edx;
+ sig.csig[12] = '\0';
+ if (!strcmp("XenVMMXenVMM",&sig.csig[0]) && (eax >= (base+2))) {
+ if (hvm==HVM)
+ hvm_cpuid(base+1,0,&eax,&ebx,&ecx,&edx);
+ else
+ pv_cpuid(base+1,0,&eax,&ebx,&ecx,&edx);
+ *version_major = (eax >> 16) & 0xffff;
+ *version_minor = eax & 0xffff;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+main(int ac, char **av)
+{
+ u32 dummy;
+ u16 version_hi, version_lo;
+ u64 ts, last_ts;
+ int status, discontinuity = 0;
+ pid_t pid;
+
+ if (running_on_xen(HVM,&version_hi,&version_lo)) {
+ printf("running on Xen v%d.%d as an HVM domain, "
+ "pvrdtsc not supported, exiting\n",
+ (int)version_hi, (int)version_lo);
+ exit(0);
+ }
+ pid = fork();
+ if (pid == -1) {
+ fprintf(stderr,"Huh? Fork failed\n");
+ return 0;
+ }
+ else if (pid == 0) { /* child */
+ pv_cpuid(0x40000000,0,&dummy,&dummy,&dummy,&dummy);
+ exit(0);
+ }
+ waitpid(pid,&status,0);
+ if (!WIFEXITED(status))
+ exit(0);
+ if (!running_on_xen(PVM,&version_hi,&version_lo)) {
+ printf("not running on Xen, exiting\n");
+ exit(0);
+ }
+ printf("running on Xen v%d.%d as a PV domain\n",
+ (int)version_hi, (int)version_lo);
+ if ( version_hi <= 3 ) {
+ printf("pvrdtscp requires Xen version 4.0 or greater\n");
+ /* exit(0); FIXME after xen-unstable is officially v4.0 */
+ }
+ if ( get_xen_tsc_mode() != 3 )
+ printf("tsc_mode not pvrdtscp, set tsc_mode=3, exiting\n");
+
+ /* OK, we are on Xen, now loop forever checking timestamps */
+ ts = get_pvrdtscp_timestamp(&discontinuity);
+ printf("Starting with ts=%lluns 0x%llx (%llusec)\n",ts,ts,ts/NSEC_PER_SEC);
+ printf("incarn=%d: vtsc=%d, vtsc_khz=%lu, phys cpu_khz=%lu\n",
+ (unsigned long)get_xen_incarnation(),
+ (unsigned long)get_xen_vtsc(),
+ (unsigned long)get_xen_vtsc_khz(),
+ (unsigned long)get_xen_cpu_khz());
+ ts = get_pvrdtscp_timestamp(&discontinuity);
+ last_ts = ts;
+ while (1) {
+ ts = get_pvrdtscp_timestamp(&discontinuity);
+ if (discontinuity)
+ printf("migrated/restored, incarn=%d: "
+ "vtsc now %d, vtsc_khz=%lu, phys cpu_khz=%lu\n",
+ (unsigned long)get_xen_incarnation(),
+ (unsigned long)get_xen_vtsc(),
+ (unsigned long)get_xen_vtsc_khz(),
+ (unsigned long)get_xen_cpu_khz());
+ if (ts < last_ts)
+ /* this should NEVER happen, especially since there
+ * is a check for it in get_pvrdtscp_timestamp() */
+ printf("Time went backwards: %lluns (%llusec)\n",
+ last_ts-ts,(last_ts-ts)/NSEC_PER_SEC);
+ if (ts > last_ts + 200000000LL)
+ /* this is OK, usually about 2sec for save/restore
+ * and a fraction of a second for live migrate */
+ printf("Time jumped forward %lluns (%llusec)\n",
+ ts-last_ts,(ts-last_ts)/NSEC_PER_SEC);
+ last_ts = ts;
+ }
+}